Skip to content
  1. Jun 04, 2016
  2. May 25, 2016
  3. May 24, 2016
  4. May 21, 2016
    • Zhaoxiu Zeng's avatar
      lib/GCD.c: use binary GCD algorithm instead of Euclidean · fff7fb0b
      Zhaoxiu Zeng authored
      
      
      The binary GCD algorithm is based on the following facts:
      	1. If a and b are all evens, then gcd(a,b) = 2 * gcd(a/2, b/2)
      	2. If a is even and b is odd, then gcd(a,b) = gcd(a/2, b)
      	3. If a and b are all odds, then gcd(a,b) = gcd((a-b)/2, b) = gcd((a+b)/2, b)
      
      Even on x86 machines with reasonable division hardware, the binary
      algorithm runs about 25% faster (80% the execution time) than the
      division-based Euclidian algorithm.
      
      On platforms like Alpha and ARMv6 where division is a function call to
      emulation code, it's even more significant.
      
      There are two variants of the code here, depending on whether a fast
      __ffs (find least significant set bit) instruction is available.  This
      allows the unpredictable branches in the bit-at-a-time shifting loop to
      be eliminated.
      
      If fast __ffs is not available, the "even/odd" GCD variant is used.
      
      I use the following code to benchmark:
      
      	#include <stdio.h>
      	#include <stdlib.h>
      	#include <stdint.h>
      	#include <string.h>
      	#include <time.h>
      	#include <unistd.h>
      
      	#define swap(a, b) \
      		do { \
      			a ^= b; \
      			b ^= a; \
      			a ^= b; \
      		} while (0)
      
      	unsigned long gcd0(unsigned long a, unsigned long b)
      	{
      		unsigned long r;
      
      		if (a < b) {
      			swap(a, b);
      		}
      
      		if (b == 0)
      			return a;
      
      		while ((r = a % b) != 0) {
      			a = b;
      			b = r;
      		}
      
      		return b;
      	}
      
      	unsigned long gcd1(unsigned long a, unsigned long b)
      	{
      		unsigned long r = a | b;
      
      		if (!a || !b)
      			return r;
      
      		b >>= __builtin_ctzl(b);
      
      		for (;;) {
      			a >>= __builtin_ctzl(a);
      			if (a == b)
      				return a << __builtin_ctzl(r);
      
      			if (a < b)
      				swap(a, b);
      			a -= b;
      		}
      	}
      
      	unsigned long gcd2(unsigned long a, unsigned long b)
      	{
      		unsigned long r = a | b;
      
      		if (!a || !b)
      			return r;
      
      		r &= -r;
      
      		while (!(b & r))
      			b >>= 1;
      
      		for (;;) {
      			while (!(a & r))
      				a >>= 1;
      			if (a == b)
      				return a;
      
      			if (a < b)
      				swap(a, b);
      			a -= b;
      			a >>= 1;
      			if (a & r)
      				a += b;
      			a >>= 1;
      		}
      	}
      
      	unsigned long gcd3(unsigned long a, unsigned long b)
      	{
      		unsigned long r = a | b;
      
      		if (!a || !b)
      			return r;
      
      		b >>= __builtin_ctzl(b);
      		if (b == 1)
      			return r & -r;
      
      		for (;;) {
      			a >>= __builtin_ctzl(a);
      			if (a == 1)
      				return r & -r;
      			if (a == b)
      				return a << __builtin_ctzl(r);
      
      			if (a < b)
      				swap(a, b);
      			a -= b;
      		}
      	}
      
      	unsigned long gcd4(unsigned long a, unsigned long b)
      	{
      		unsigned long r = a | b;
      
      		if (!a || !b)
      			return r;
      
      		r &= -r;
      
      		while (!(b & r))
      			b >>= 1;
      		if (b == r)
      			return r;
      
      		for (;;) {
      			while (!(a & r))
      				a >>= 1;
      			if (a == r)
      				return r;
      			if (a == b)
      				return a;
      
      			if (a < b)
      				swap(a, b);
      			a -= b;
      			a >>= 1;
      			if (a & r)
      				a += b;
      			a >>= 1;
      		}
      	}
      
      	static unsigned long (*gcd_func[])(unsigned long a, unsigned long b) = {
      		gcd0, gcd1, gcd2, gcd3, gcd4,
      	};
      
      	#define TEST_ENTRIES (sizeof(gcd_func) / sizeof(gcd_func[0]))
      
      	#if defined(__x86_64__)
      
      	#define rdtscll(val) do { \
      		unsigned long __a,__d; \
      		__asm__ __volatile__("rdtsc" : "=a" (__a), "=d" (__d)); \
      		(val) = ((unsigned long long)__a) | (((unsigned long long)__d)<<32); \
      	} while(0)
      
      	static unsigned long long benchmark_gcd_func(unsigned long (*gcd)(unsigned long, unsigned long),
      								unsigned long a, unsigned long b, unsigned long *res)
      	{
      		unsigned long long start, end;
      		unsigned long long ret;
      		unsigned long gcd_res;
      
      		rdtscll(start);
      		gcd_res = gcd(a, b);
      		rdtscll(end);
      
      		if (end >= start)
      			ret = end - start;
      		else
      			ret = ~0ULL - start + 1 + end;
      
      		*res = gcd_res;
      		return ret;
      	}
      
      	#else
      
      	static inline struct timespec read_time(void)
      	{
      		struct timespec time;
      		clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &time);
      		return time;
      	}
      
      	static inline unsigned long long diff_time(struct timespec start, struct timespec end)
      	{
      		struct timespec temp;
      
      		if ((end.tv_nsec - start.tv_nsec) < 0) {
      			temp.tv_sec = end.tv_sec - start.tv_sec - 1;
      			temp.tv_nsec = 1000000000ULL + end.tv_nsec - start.tv_nsec;
      		} else {
      			temp.tv_sec = end.tv_sec - start.tv_sec;
      			temp.tv_nsec = end.tv_nsec - start.tv_nsec;
      		}
      
      		return temp.tv_sec * 1000000000ULL + temp.tv_nsec;
      	}
      
      	static unsigned long long benchmark_gcd_func(unsigned long (*gcd)(unsigned long, unsigned long),
      								unsigned long a, unsigned long b, unsigned long *res)
      	{
      		struct timespec start, end;
      		unsigned long gcd_res;
      
      		start = read_time();
      		gcd_res = gcd(a, b);
      		end = read_time();
      
      		*res = gcd_res;
      		return diff_time(start, end);
      	}
      
      	#endif
      
      	static inline unsigned long get_rand()
      	{
      		if (sizeof(long) == 8)
      			return (unsigned long)rand() << 32 | rand();
      		else
      			return rand();
      	}
      
      	int main(int argc, char **argv)
      	{
      		unsigned int seed = time(0);
      		int loops = 100;
      		int repeats = 1000;
      		unsigned long (*res)[TEST_ENTRIES];
      		unsigned long long elapsed[TEST_ENTRIES];
      		int i, j, k;
      
      		for (;;) {
      			int opt = getopt(argc, argv, "n:r:s:");
      			/* End condition always first */
      			if (opt == -1)
      				break;
      
      			switch (opt) {
      			case 'n':
      				loops = atoi(optarg);
      				break;
      			case 'r':
      				repeats = atoi(optarg);
      				break;
      			case 's':
      				seed = strtoul(optarg, NULL, 10);
      				break;
      			default:
      				/* You won't actually get here. */
      				break;
      			}
      		}
      
      		res = malloc(sizeof(unsigned long) * TEST_ENTRIES * loops);
      		memset(elapsed, 0, sizeof(elapsed));
      
      		srand(seed);
      		for (j = 0; j < loops; j++) {
      			unsigned long a = get_rand();
      			/* Do we have args? */
      			unsigned long b = argc > optind ? strtoul(argv[optind], NULL, 10) : get_rand();
      			unsigned long long min_elapsed[TEST_ENTRIES];
      			for (k = 0; k < repeats; k++) {
      				for (i = 0; i < TEST_ENTRIES; i++) {
      					unsigned long long tmp = benchmark_gcd_func(gcd_func[i], a, b, &res[j][i]);
      					if (k == 0 || min_elapsed[i] > tmp)
      						min_elapsed[i] = tmp;
      				}
      			}
      			for (i = 0; i < TEST_ENTRIES; i++)
      				elapsed[i] += min_elapsed[i];
      		}
      
      		for (i = 0; i < TEST_ENTRIES; i++)
      			printf("gcd%d: elapsed %llu\n", i, elapsed[i]);
      
      		k = 0;
      		srand(seed);
      		for (j = 0; j < loops; j++) {
      			unsigned long a = get_rand();
      			unsigned long b = argc > optind ? strtoul(argv[optind], NULL, 10) : get_rand();
      			for (i = 1; i < TEST_ENTRIES; i++) {
      				if (res[j][i] != res[j][0])
      					break;
      			}
      			if (i < TEST_ENTRIES) {
      				if (k == 0) {
      					k = 1;
      					fprintf(stderr, "Error:\n");
      				}
      				fprintf(stderr, "gcd(%lu, %lu): ", a, b);
      				for (i = 0; i < TEST_ENTRIES; i++)
      					fprintf(stderr, "%ld%s", res[j][i], i < TEST_ENTRIES - 1 ? ", " : "\n");
      			}
      		}
      
      		if (k == 0)
      			fprintf(stderr, "PASS\n");
      
      		free(res);
      
      		return 0;
      	}
      
      Compiled with "-O2", on "VirtualBox 4.4.0-22-generic #38-Ubuntu x86_64" got:
      
        zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10
        gcd0: elapsed 10174
        gcd1: elapsed 2120
        gcd2: elapsed 2902
        gcd3: elapsed 2039
        gcd4: elapsed 2812
        PASS
        zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10
        gcd0: elapsed 9309
        gcd1: elapsed 2280
        gcd2: elapsed 2822
        gcd3: elapsed 2217
        gcd4: elapsed 2710
        PASS
        zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10
        gcd0: elapsed 9589
        gcd1: elapsed 2098
        gcd2: elapsed 2815
        gcd3: elapsed 2030
        gcd4: elapsed 2718
        PASS
        zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10
        gcd0: elapsed 9914
        gcd1: elapsed 2309
        gcd2: elapsed 2779
        gcd3: elapsed 2228
        gcd4: elapsed 2709
        PASS
      
      [akpm@linux-foundation.org: avoid #defining a CONFIG_ variable]
      Signed-off-by: default avatarZhaoxiu Zeng <zhaoxiu.zeng@gmail.com>
      Signed-off-by: default avatarGeorge Spelvin <linux@horizon.com>
      Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
      Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
      fff7fb0b
    • Petr Mladek's avatar
      printk/nmi: generic solution for safe printk in NMI · 42a0bb3f
      Petr Mladek authored
      printk() takes some locks and could not be used a safe way in NMI
      context.
      
      The chance of a deadlock is real especially when printing stacks from
      all CPUs.  This particular problem has been addressed on x86 by the
      commit a9edc880 ("x86/nmi: Perform a safe NMI stack trace on all
      CPUs").
      
      The patchset brings two big advantages.  First, it makes the NMI
      backtraces safe on all architectures for free.  Second, it makes all NMI
      messages almost safe on all architectures (the temporary buffer is
      limited.  We still should keep the number of messages in NMI context at
      minimum).
      
      Note that there already are several messages printed in NMI context:
      WARN_ON(in_nmi()), BUG_ON(in_nmi()), anything being printed out from MCE
      handlers.  These are not easy to avoid.
      
      This patch reuses most of the code and makes it generic.  It is useful
      for all messages and architectures that support NMI.
      
      The alternative printk_func is set when entering and is reseted when
      leaving NMI context.  It queues IRQ work to copy the messages into the
      main ring buffer in a safe context.
      
      __printk_nmi_flush() copies all available messages and reset the buffer.
      Then we could use a simple cmpxchg operations to get synchronized with
      writers.  There is also used a spinlock to get synchronized with other
      flushers.
      
      We do not longer use seq_buf because it depends on external lock.  It
      would be hard to make all supported operations safe for a lockless use.
      It would be confusing and error prone to make only some operations safe.
      
      The code is put into separate printk/nmi.c as suggested by Steven
      Rostedt.  It needs a per-CPU buffer and is compiled only on
      architectures that call nmi_enter().  This is achieved by the new
      HAVE_NMI Kconfig flag.
      
      The are MN10300 and Xtensa architectures.  We need to clean up NMI
      handling there first.  Let's do it separately.
      
      The patch is heavily based on the draft from Peter Zijlstra, see
      
        https://lkml.org/lkml/2015/6/10/327
      
      
      
      [arnd@arndb.de: printk-nmi: use %zu format string for size_t]
      [akpm@linux-foundation.org: min_t->min - all types are size_t here]
      Signed-off-by: default avatarPetr Mladek <pmladek@suse.com>
      Suggested-by: default avatarPeter Zijlstra <peterz@infradead.org>
      Suggested-by: default avatarSteven Rostedt <rostedt@goodmis.org>
      Cc: Jan Kara <jack@suse.cz>
      Acked-by: Russell King <rmk+kernel@arm.linux.org.uk>	[arm part]
      Cc: Daniel Thompson <daniel.thompson@linaro.org>
      Cc: Jiri Kosina <jkosina@suse.com>
      Cc: Ingo Molnar <mingo@redhat.com>
      Cc: Thomas Gleixner <tglx@linutronix.de>
      Cc: Ralf Baechle <ralf@linux-mips.org>
      Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
      Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
      Cc: David Miller <davem@davemloft.net>
      Cc: Daniel Thompson <daniel.thompson@linaro.org>
      Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
      Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
      42a0bb3f
    • Jiri Slaby's avatar
      exit_thread: accept a task parameter to be exited · e6464694
      Jiri Slaby authored
      
      
      We need to call exit_thread from copy_process in a fail path.  So make it
      accept task_struct as a parameter.
      
      [v2]
      * s390: exit_thread_runtime_instr doesn't make sense to be called for
        non-current tasks.
      * arm: fix the comment in vfp_thread_copy
      * change 'me' to 'tsk' for task_struct
      * now we can change only archs that actually have exit_thread
      
      [akpm@linux-foundation.org: coding-style fixes]
      Signed-off-by: default avatarJiri Slaby <jslaby@suse.cz>
      Cc: "David S. Miller" <davem@davemloft.net>
      Cc: "H. Peter Anvin" <hpa@zytor.com>
      Cc: "James E.J. Bottomley" <jejb@parisc-linux.org>
      Cc: Aurelien Jacquiot <a-jacquiot@ti.com>
      Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
      Cc: Catalin Marinas <catalin.marinas@arm.com>
      Cc: Chen Liqin <liqin.linux@gmail.com>
      Cc: Chris Metcalf <cmetcalf@mellanox.com>
      Cc: Chris Zankel <chris@zankel.net>
      Cc: David Howells <dhowells@redhat.com>
      Cc: Fenghua Yu <fenghua.yu@intel.com>
      Cc: Geert Uytterhoeven <geert@linux-m68k.org>
      Cc: Guan Xuetao <gxt@mprc.pku.edu.cn>
      Cc: Haavard Skinnemoen <hskinnemoen@gmail.com>
      Cc: Hans-Christian Egtvedt <egtvedt@samfundet.no>
      Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
      Cc: Helge Deller <deller@gmx.de>
      Cc: Ingo Molnar <mingo@redhat.com>
      Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
      Cc: James Hogan <james.hogan@imgtec.com>
      Cc: Jeff Dike <jdike@addtoit.com>
      Cc: Jesper Nilsson <jesper.nilsson@axis.com>
      Cc: Jiri Slaby <jslaby@suse.cz>
      Cc: Jonas Bonn <jonas@southpole.se>
      Cc: Koichi Yasutake <yasutake.koichi@jp.panasonic.com>
      Cc: Lennox Wu <lennox.wu@gmail.com>
      Cc: Ley Foon Tan <lftan@altera.com>
      Cc: Mark Salter <msalter@redhat.com>
      Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
      Cc: Matt Turner <mattst88@gmail.com>
      Cc: Max Filippov <jcmvbkbc@gmail.com>
      Cc: Michael Ellerman <mpe@ellerman.id.au>
      Cc: Michal Simek <monstr@monstr.eu>
      Cc: Mikael Starvik <starvik@axis.com>
      Cc: Paul Mackerras <paulus@samba.org>
      Cc: Peter Zijlstra <peterz@infradead.org>
      Cc: Ralf Baechle <ralf@linux-mips.org>
      Cc: Rich Felker <dalias@libc.org>
      Cc: Richard Henderson <rth@twiddle.net>
      Cc: Richard Kuo <rkuo@codeaurora.org>
      Cc: Richard Weinberger <richard@nod.at>
      Cc: Russell King <linux@arm.linux.org.uk>
      Cc: Steven Miao <realmz6@gmail.com>
      Cc: Thomas Gleixner <tglx@linutronix.de>
      Cc: Tony Luck <tony.luck@intel.com>
      Cc: Vineet Gupta <vgupta@synopsys.com>
      Cc: Will Deacon <will.deacon@arm.com>
      Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
      Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
      Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
      e6464694
    • Jiri Slaby's avatar
      exit_thread: remove empty bodies · 5f56a5df
      Jiri Slaby authored
      
      
      Define HAVE_EXIT_THREAD for archs which want to do something in
      exit_thread. For others, let's define exit_thread as an empty inline.
      
      This is a cleanup before we change the prototype of exit_thread to
      accept a task parameter.
      
      [akpm@linux-foundation.org: fix mips]
      Signed-off-by: default avatarJiri Slaby <jslaby@suse.cz>
      Cc: "David S. Miller" <davem@davemloft.net>
      Cc: "H. Peter Anvin" <hpa@zytor.com>
      Cc: "James E.J. Bottomley" <jejb@parisc-linux.org>
      Cc: Aurelien Jacquiot <a-jacquiot@ti.com>
      Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
      Cc: Catalin Marinas <catalin.marinas@arm.com>
      Cc: Chen Liqin <liqin.linux@gmail.com>
      Cc: Chris Metcalf <cmetcalf@mellanox.com>
      Cc: Chris Zankel <chris@zankel.net>
      Cc: David Howells <dhowells@redhat.com>
      Cc: Fenghua Yu <fenghua.yu@intel.com>
      Cc: Geert Uytterhoeven <geert@linux-m68k.org>
      Cc: Guan Xuetao <gxt@mprc.pku.edu.cn>
      Cc: Haavard Skinnemoen <hskinnemoen@gmail.com>
      Cc: Hans-Christian Egtvedt <egtvedt@samfundet.no>
      Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
      Cc: Helge Deller <deller@gmx.de>
      Cc: Ingo Molnar <mingo@redhat.com>
      Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
      Cc: James Hogan <james.hogan@imgtec.com>
      Cc: Jeff Dike <jdike@addtoit.com>
      Cc: Jesper Nilsson <jesper.nilsson@axis.com>
      Cc: Jiri Slaby <jslaby@suse.cz>
      Cc: Jonas Bonn <jonas@southpole.se>
      Cc: Koichi Yasutake <yasutake.koichi@jp.panasonic.com>
      Cc: Lennox Wu <lennox.wu@gmail.com>
      Cc: Ley Foon Tan <lftan@altera.com>
      Cc: Mark Salter <msalter@redhat.com>
      Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
      Cc: Matt Turner <mattst88@gmail.com>
      Cc: Max Filippov <jcmvbkbc@gmail.com>
      Cc: Michael Ellerman <mpe@ellerman.id.au>
      Cc: Michal Simek <monstr@monstr.eu>
      Cc: Mikael Starvik <starvik@axis.com>
      Cc: Paul Mackerras <paulus@samba.org>
      Cc: Peter Zijlstra <peterz@infradead.org>
      Cc: Ralf Baechle <ralf@linux-mips.org>
      Cc: Rich Felker <dalias@libc.org>
      Cc: Richard Henderson <rth@twiddle.net>
      Cc: Richard Kuo <rkuo@codeaurora.org>
      Cc: Richard Weinberger <richard@nod.at>
      Cc: Russell King <linux@arm.linux.org.uk>
      Cc: Steven Miao <realmz6@gmail.com>
      Cc: Thomas Gleixner <tglx@linutronix.de>
      Cc: Tony Luck <tony.luck@intel.com>
      Cc: Vineet Gupta <vgupta@synopsys.com>
      Cc: Will Deacon <will.deacon@arm.com>
      Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
      Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
      Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
      5f56a5df
  5. May 20, 2016
  6. May 18, 2016
    • Arnd Bergmann's avatar
      ARM: exynos: don't select keyboard driver · 8a6f71cc
      Arnd Bergmann authored
      
      
      The samsung-keypad driver is implicitly selected by ARCH_EXYNOS4 (why?),
      but this fails if CONFIG_INPUT is a loadable module:
      
      drivers/input/built-in.o: In function `samsung_keypad_remove':
      drivers/input/keyboard/samsung-keypad.c:461: undefined reference to `input_unregister_device'
      drivers/input/built-in.o: In function `samsung_keypad_irq':
      drivers/input/keyboard/samsung-keypad.c:137: undefined reference to `input_event'
      drivers/input/built-in.o: In function `samsung_keypad_irq':
      include/linux/input.h:389: undefined reference to `input_event'
      drivers/input/built-in.o: In function `samsung_keypad_probe':
      drivers/input/keyboard/samsung-keypad.c:358: undefined reference to `devm_input_allocate_device'
      drivers/input/built-in.o:(.debug_addr+0x34): undefined reference to `input_set_capability'
      
      This removes the 'select' as suggested by Krzysztof Kozlowski and
      instead enables the driver from the defconfig files.
      
      The problem does not happen on mainline kernels, as we don't normally
      build built-in input drivers when CONFIG_INPUT=m, but I am experimenting
      with a patch to change this, and the samsung keypad driver showed up
      as one example that was silently broken before.
      
      Signed-off-by: default avatarArnd Bergmann <arnd@arndb.de>
      Link: https://lkml.org/lkml/2016/2/14/55
      
      
      Signed-off-by: default avatarKrzysztof Kozlowski <k.kozlowski@samsung.com>
      8a6f71cc
    • Arnd Bergmann's avatar
      ARM: samsung: improve static dma_mask definition · 4cb54493
      Arnd Bergmann authored
      
      
      When no DMA master devices are part of the kernel configuration,
      we get a warning about the unused dma mask definition:
      
      arch/arm/plat-samsung/devs.c:71:12: error: 'samsung_device_dma_mask' defined but not used [-Werror=unused-variable]
       static u64 samsung_device_dma_mask = DMA_BIT_MASK(32);
      
      We could simply mark this as __maybe_unused to shut up that warning,
      but a nicer solution seems to be to have a separate mask for each
      device. The advantage is that a driver that happens to call
      dma_set_mask() on one device doesn't implicitly change the mask
      for the other devices as well. This is more of a theoretical
      problem, as obviously nothing does it for the devices in this
      file (or they would have always been broken), but it feels
      cleaner that way.
      
      The definition works by creating an array in place so we can take
      the address of it and let the compiler generate a hidden symbol
      for it at compile time.
      
      Signed-off-by: default avatarArnd Bergmann <arnd@arndb.de>
      Signed-off-by: default avatarKrzysztof Kozlowski <k.kozlowski@samsung.com>
      4cb54493
  7. May 17, 2016
  8. May 13, 2016
    • Christian Borntraeger's avatar
      KVM: halt_polling: provide a way to qualify wakeups during poll · 3491caf2
      Christian Borntraeger authored
      
      
      Some wakeups should not be considered a sucessful poll. For example on
      s390 I/O interrupts are usually floating, which means that _ALL_ CPUs
      would be considered runnable - letting all vCPUs poll all the time for
      transactional like workload, even if one vCPU would be enough.
      This can result in huge CPU usage for large guests.
      This patch lets architectures provide a way to qualify wakeups if they
      should be considered a good/bad wakeups in regard to polls.
      
      For s390 the implementation will fence of halt polling for anything but
      known good, single vCPU events. The s390 implementation for floating
      interrupts does a wakeup for one vCPU, but the interrupt will be delivered
      by whatever CPU checks first for a pending interrupt. We prefer the
      woken up CPU by marking the poll of this CPU as "good" poll.
      This code will also mark several other wakeup reasons like IPI or
      expired timers as "good". This will of course also mark some events as
      not sucessful. As  KVM on z runs always as a 2nd level hypervisor,
      we prefer to not poll, unless we are really sure, though.
      
      This patch successfully limits the CPU usage for cases like uperf 1byte
      transactional ping pong workload or wakeup heavy workload like OLTP
      while still providing a proper speedup.
      
      This also introduced a new vcpu stat "halt_poll_no_tuning" that marks
      wakeups that are considered not good for polling.
      
      Signed-off-by: default avatarChristian Borntraeger <borntraeger@de.ibm.com>
      Acked-by: Radim Krčmář <rkrcmar@redhat.com> (for an earlier version)
      Cc: David Matlack <dmatlack@google.com>
      Cc: Wanpeng Li <kernellwp@gmail.com>
      [Rename config symbol. - Paolo]
      Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
      3491caf2
    • Steven Rostedt's avatar
      ARM: Hide finish_arch_post_lock_switch() from modules · ef0491ea
      Steven Rostedt authored
      The introduction of switch_mm_irqs_off() brought back an old bug
      regarding the use of preempt_enable_no_resched:
      
      As part of:
      
        62b94a08
      
       ("sched/preempt: Take away preempt_enable_no_resched() from modules")
      
      the definition of preempt_enable_no_resched() is only available in
      built-in code, not in loadable modules, so we can't generally use
      it from header files.
      
      However, the ARM version of finish_arch_post_lock_switch()
      calls preempt_enable_no_resched() and is defined as a static
      inline function in asm/mmu_context.h. This in turn means we cannot
      include asm/mmu_context.h from modules.
      
      With today's tip tree, asm/mmu_context.h gets included from
      linux/mmu_context.h, which is normally the exact pattern one would
      expect, but unfortunately, linux/mmu_context.h can be included from
      the vhost driver that is a loadable module, now causing this compile
      time error with modular configs:
      
        In file included from ../include/linux/mmu_context.h:4:0,
                         from ../drivers/vhost/vhost.c:18:
        ../arch/arm/include/asm/mmu_context.h: In function 'finish_arch_post_lock_switch':
        ../arch/arm/include/asm/mmu_context.h:88:3: error: implicit declaration of function 'preempt_enable_no_resched' [-Werror=implicit-function-declaration]
           preempt_enable_no_resched();
      
      Andy already tried to fix the bug by including linux/preempt.h
      from asm/mmu_context.h, but that didn't help. Arnd suggested reordering
      the header files, which wasn't popular, so let's use this
      workaround instead:
      
      The finish_arch_post_lock_switch() definition is now also hidden
      inside of #ifdef MODULE, so we don't see anything referencing
      preempt_enable_no_resched() from a header file. I've built a
      few hundred randconfig kernels with this, and did not see any
      new problems.
      
      Tested-by: default avatarGuenter Roeck <linux@roeck-us.net>
      Signed-off-by: default avatarSteven Rostedt <rostedt@goodmis.org>
      Signed-off-by: default avatarArnd Bergmann <arnd@arndb.de>
      Acked-by: default avatarRussell King <rmk+kernel@arm.linux.org.uk>
      Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
      Cc: Andy Lutomirski <luto@amacapital.net>
      Cc: Andy Lutomirski <luto@kernel.org>
      Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
      Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
      Cc: Borislav Petkov <bp@suse.de>
      Cc: Frederic Weisbecker <fweisbec@gmail.com>
      Cc: Jiri Olsa <jolsa@redhat.com>
      Cc: Linus Torvalds <torvalds@linux-foundation.org>
      Cc: Mel Gorman <mgorman@techsingularity.net>
      Cc: Peter Zijlstra <peterz@infradead.org>
      Cc: Russell King - ARM Linux <linux@armlinux.org.uk>
      Cc: Stephane Eranian <eranian@google.com>
      Cc: Thomas Gleixner <tglx@linutronix.de>
      Cc: Vince Weaver <vincent.weaver@maine.edu>
      Cc: linux-arm-kernel@lists.infradead.org
      Fixes: f98db601 ("sched/core: Add switch_mm_irqs_off() and use it in the scheduler")
      Link: http://lkml.kernel.org/r/1463146234-161304-1-git-send-email-arnd@arndb.de
      
      
      Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
      ef0491ea
  9. May 11, 2016
    • Boris Brezillon's avatar
      ARM: dts: at91: sam9x5: Fix the memory range assigned to the PMC · aab0a4c8
      Boris Brezillon authored
      
      
      The memory range assigned to the PMC (Power Management Controller) was
      not including the PMC_PCR register which are used to control peripheral
      clocks.
      
      This was working fine thanks to the page granularity of ioremap(), but
      started to fail when we switched to syscon/regmap, because regmap is
      making sure that all accesses are falling into the reserved range.
      
      Signed-off-by: default avatarBoris Brezillon <boris.brezillon@free-electrons.com>
      Reported-by: default avatarRichard Genoud <richard.genoud@gmail.com>
      Tested-by: default avatarRichard Genoud <richard.genoud@gmail.com>
      Fixes: 863a81c3
      
       ("clk: at91: make use of syscon to share PMC registers in several drivers")
      Cc: <stable@vger.kernel.org>
      Signed-off-by: default avatarNicolas Ferre <nicolas.ferre@atmel.com>
      aab0a4c8
    • Arnd Bergmann's avatar
      ARM: aspeed: adapt defconfigs for new CONFIG_PRINTK_TIME · 0ef659a3
      Arnd Bergmann authored
      Commit 94ea9c7a579f ("lib, switch CONFIG_PRINTK_TIME to int") changes
      the type of CONFIG_PRINTK_TIME and adapts all existing defconfig files,
      while we add two new defconfig files with d0bc3483
      
       ("arm/configs:
      Add Aspeed defconfig"), with the old type.
      
      This changes the two new defconfig files to match the other ones.
      As a result, we get harmless warnings for the arm-soc defconfig
      branch by itself, but everything will work when 4.7-rc1 is out.
      
      Signed-off-by: default avatarArnd Bergmann <arnd@arndb.de>
      0ef659a3
    • Vladimir Zapolskiy's avatar
      irqchip: Add LPC32xx interrupt controller driver · 8cb17b5e
      Vladimir Zapolskiy authored
      The change adds improved support of NXP LPC32xx MIC, SIC1 and SIC2
      interrupt controllers.
      
      This is a list of new features in comparison to the legacy driver:
      * irq types are taken from device tree settings, no more need to
        hardcode them,
      * old driver is based on irq_domain_add_legacy, which causes problems
        with handling MIC hardware interrupt 0 produced by SIC1,
      * there is one driver for MIC, SIC1 and SIC2, no more need to handle
        them separately, e.g. have two separate handlers for SIC1 and SIC2,
      * the driver does not have any dependencies on hardcoded register
        offsets,
      * the driver is much simpler for maintenance,
      * SPARSE_IRQS option is supported.
      
      Legacy LPC32xx interrupt controller driver was broken since commit
      76ba59f8 ("genirq: Add irq_domain-aware core IRQ handler"), which
      requires a private interrupt handler, otherwise any SIC1 generated
      interrupt (mapped to MIC hwirq 0) breaks the kernel with the message
      "unexpected IRQ trap at vector 00".
      
      The change disables compilation of a legacy driver found at
      arch/arm/mach-lpc32xx/irq.c, the file will be removed in a separate
      commit.
      
      Fixes: 76ba59f8
      
       ("genirq: Add irq_domain-aware core IRQ handler")
      Tested-by: default avatarSylvain Lemieux <slemieux.tyco@gmail.com>
      Signed-off-by: default avatarVladimir Zapolskiy <vz@mleia.com>
      Signed-off-by: default avatarMarc Zyngier <marc.zyngier@arm.com>
      8cb17b5e
  10. May 10, 2016
  11. May 09, 2016