From e51ea5442996261d4bc3a5b934d27cc0ce6a991c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 14 Jul 2020 15:10:46 -0400 Subject: [PATCH 001/839] sparc32: don't bother with lookup_fault() in __bzero() Signed-off-by: Al Viro --- arch/sparc/lib/memset.S | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/arch/sparc/lib/memset.S b/arch/sparc/lib/memset.S index b89d42b29e344..725041c5224a8 100644 --- a/arch/sparc/lib/memset.S +++ b/arch/sparc/lib/memset.S @@ -19,7 +19,7 @@ 98: x,y; \ .section .fixup,ALLOC,EXECINSTR; \ .align 4; \ -99: ba 30f; \ +99: retl; \ a, b, %o0; \ .section __ex_table,ALLOC; \ .align 4; \ @@ -194,24 +194,15 @@ __memset_end: 1: sll %g2, 3, %g2 add %o3, %o1, %o0 - b 30f + retl sub %o0, %g2, %o0 21: mov 8, %o0 and %o1, 7, %o1 sub %o0, %g2, %o0 sll %o0, 3, %o0 - b 30f + retl add %o0, %o1, %o0 -30: -/* %o4 is faulting address, %o5 is %pc where fault occurred */ - save %sp, -104, %sp - mov %i5, %o0 - mov %i7, %o1 - call lookup_fault - mov %i4, %o2 - ret - restore .globl __bzero_end __bzero_end: -- GitLab From 5f99d33810b00666825784342868240e5790c704 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 14 Jul 2020 15:12:34 -0400 Subject: [PATCH 002/839] sparc32: kill lookup_fault() No callers left. As the result we can kill * lookup_fault() itself * the kludge in do_sparc_fault() for passing the arguments for eventual lookup_fault() into exception handler and labels used by it * the last of magical exception table entries (in __clear_user()) Signed-off-by: Al Viro --- arch/sparc/include/asm/uaccess_32.h | 5 --- arch/sparc/lib/memset.S | 3 -- arch/sparc/mm/fault_32.c | 56 ----------------------------- arch/sparc/mm/mm_32.h | 2 -- 4 files changed, 66 deletions(-) diff --git a/arch/sparc/include/asm/uaccess_32.h b/arch/sparc/include/asm/uaccess_32.h index 0a2d3ebc4bb86..54a3ba7426476 100644 --- a/arch/sparc/include/asm/uaccess_32.h +++ b/arch/sparc/include/asm/uaccess_32.h @@ -252,12 +252,7 @@ static inline unsigned long __clear_user(void __user *addr, unsigned long size) unsigned long ret; __asm__ __volatile__ ( - ".section __ex_table,#alloc\n\t" - ".align 4\n\t" - ".word 1f,3\n\t" - ".previous\n\t" "mov %2, %%o1\n" - "1:\n\t" "call __bzero\n\t" " mov %1, %%o0\n\t" "mov %%o0, %0\n" diff --git a/arch/sparc/lib/memset.S b/arch/sparc/lib/memset.S index 725041c5224a8..d79550e3e63f4 100644 --- a/arch/sparc/lib/memset.S +++ b/arch/sparc/lib/memset.S @@ -68,8 +68,6 @@ __bzero_begin: .globl memset EXPORT_SYMBOL(__bzero) EXPORT_SYMBOL(memset) - .globl __memset_start, __memset_end -__memset_start: memset: mov %o0, %g1 mov 1, %g4 @@ -181,7 +179,6 @@ __bzero: 5: retl clr %o0 -__memset_end: .section .fixup,#alloc,#execinstr .align 4 diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c index 40ce087dfecf2..290869fd6b6ad 100644 --- a/arch/sparc/mm/fault_32.c +++ b/arch/sparc/mm/fault_32.c @@ -54,54 +54,6 @@ static void __noreturn unhandled_fault(unsigned long address, die_if_kernel("Oops", regs); } -asmlinkage int lookup_fault(unsigned long pc, unsigned long ret_pc, - unsigned long address) -{ - struct pt_regs regs; - unsigned long g2; - unsigned int insn; - int i; - - i = search_extables_range(ret_pc, &g2); - switch (i) { - case 3: - /* load & store will be handled by fixup */ - return 3; - - case 1: - /* store will be handled by fixup, load will bump out */ - /* for _to_ macros */ - insn = *((unsigned int *) pc); - if ((insn >> 21) & 1) - return 1; - break; - - case 2: - /* load will be handled by fixup, store will bump out */ - /* for _from_ macros */ - insn = *((unsigned int *) pc); - if (!((insn >> 21) & 1) || ((insn>>19)&0x3f) == 15) - return 2; - break; - - default: - break; - } - - memset(®s, 0, sizeof(regs)); - regs.pc = pc; - regs.npc = pc + 4; - __asm__ __volatile__( - "rd %%psr, %0\n\t" - "nop\n\t" - "nop\n\t" - "nop\n" : "=r" (regs.psr)); - unhandled_fault(address, current, ®s); - - /* Not reached */ - return 0; -} - static inline void show_signal_msg(struct pt_regs *regs, int sig, int code, unsigned long address, struct task_struct *tsk) @@ -286,20 +238,12 @@ no_context: fixup = search_extables_range(regs->pc, &g2); /* Values below 10 are reserved for other things */ if (fixup > 10) { - extern const unsigned int __memset_start[]; - extern const unsigned int __memset_end[]; - #ifdef DEBUG_EXCEPTIONS printk("Exception: PC<%08lx> faddr<%08lx>\n", regs->pc, address); printk("EX_TABLE: insn<%08lx> fixup<%08x> g2<%08lx>\n", regs->pc, fixup, g2); #endif - if ((regs->pc >= (unsigned long)__memset_start && - regs->pc < (unsigned long)__memset_end)) { - regs->u_regs[UREG_I4] = address; - regs->u_regs[UREG_I5] = regs->pc; - } regs->u_regs[UREG_G2] = g2; regs->pc = fixup; regs->npc = regs->pc + 4; diff --git a/arch/sparc/mm/mm_32.h b/arch/sparc/mm/mm_32.h index ce750a99eea96..ee55f10806343 100644 --- a/arch/sparc/mm/mm_32.h +++ b/arch/sparc/mm/mm_32.h @@ -1,7 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* fault_32.c - visible as they are called from assembler */ -asmlinkage int lookup_fault(unsigned long pc, unsigned long ret_pc, - unsigned long address); asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write, unsigned long address); -- GitLab From df06c27ebd86af2b4c43f698c3d38b781dbc722d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 14 Jul 2020 17:27:56 -0400 Subject: [PATCH 003/839] sparc32: switch __bzero() away from range exception table entries Signed-off-by: Al Viro --- arch/sparc/lib/memset.S | 72 ++++++++++++++++++----------------------- 1 file changed, 32 insertions(+), 40 deletions(-) diff --git a/arch/sparc/lib/memset.S b/arch/sparc/lib/memset.S index d79550e3e63f4..eaff68213fdf5 100644 --- a/arch/sparc/lib/memset.S +++ b/arch/sparc/lib/memset.S @@ -27,35 +27,44 @@ .text; \ .align 4 -#define EXT(start,end,handler) \ +#define STORE(source, base, offset, n) \ +98: std source, [base + offset + n]; \ + .section .fixup,ALLOC,EXECINSTR; \ + .align 4; \ +99: ba 30f; \ + sub %o3, n - offset, %o3; \ .section __ex_table,ALLOC; \ .align 4; \ - .word start, 0, end, handler; \ + .word 98b, 99b; \ .text; \ - .align 4 + .align 4; + +#define STORE_LAST(source, base, offset, n) \ + EX(std source, [base - offset - n], \ + add %o1, offset + n); /* Please don't change these macros, unless you change the logic * in the .fixup section below as well. * Store 64 bytes at (BASE + OFFSET) using value SOURCE. */ -#define ZERO_BIG_BLOCK(base, offset, source) \ - std source, [base + offset + 0x00]; \ - std source, [base + offset + 0x08]; \ - std source, [base + offset + 0x10]; \ - std source, [base + offset + 0x18]; \ - std source, [base + offset + 0x20]; \ - std source, [base + offset + 0x28]; \ - std source, [base + offset + 0x30]; \ - std source, [base + offset + 0x38]; +#define ZERO_BIG_BLOCK(base, offset, source) \ + STORE(source, base, offset, 0x00); \ + STORE(source, base, offset, 0x08); \ + STORE(source, base, offset, 0x10); \ + STORE(source, base, offset, 0x18); \ + STORE(source, base, offset, 0x20); \ + STORE(source, base, offset, 0x28); \ + STORE(source, base, offset, 0x30); \ + STORE(source, base, offset, 0x38); #define ZERO_LAST_BLOCKS(base, offset, source) \ - std source, [base - offset - 0x38]; \ - std source, [base - offset - 0x30]; \ - std source, [base - offset - 0x28]; \ - std source, [base - offset - 0x20]; \ - std source, [base - offset - 0x18]; \ - std source, [base - offset - 0x10]; \ - std source, [base - offset - 0x08]; \ - std source, [base - offset - 0x00]; + STORE_LAST(source, base, offset, 0x38); \ + STORE_LAST(source, base, offset, 0x30); \ + STORE_LAST(source, base, offset, 0x28); \ + STORE_LAST(source, base, offset, 0x20); \ + STORE_LAST(source, base, offset, 0x18); \ + STORE_LAST(source, base, offset, 0x10); \ + STORE_LAST(source, base, offset, 0x08); \ + STORE_LAST(source, base, offset, 0x00); .text .align 4 @@ -120,8 +129,6 @@ __bzero: ZERO_BIG_BLOCK(%o0, 0x00, %g2) subcc %o3, 128, %o3 ZERO_BIG_BLOCK(%o0, 0x40, %g2) -11: - EXT(10b, 11b, 20f) bne 10b add %o0, 128, %o0 @@ -136,7 +143,6 @@ __bzero: jmp %o4 add %o0, %o2, %o0 -12: ZERO_LAST_BLOCKS(%o0, 0x48, %g2) ZERO_LAST_BLOCKS(%o0, 0x08, %g2) 13: @@ -182,24 +188,10 @@ __bzero: .section .fixup,#alloc,#execinstr .align 4 -20: - cmp %g2, 8 - bleu 1f - and %o1, 0x7f, %o1 - sub %g2, 9, %g2 - add %o3, 64, %o3 -1: - sll %g2, 3, %g2 - add %o3, %o1, %o0 - retl - sub %o0, %g2, %o0 -21: - mov 8, %o0 - and %o1, 7, %o1 - sub %o0, %g2, %o0 - sll %o0, 3, %o0 +30: + and %o1, 0x7f, %o1 retl - add %o0, %o1, %o0 + add %o3, %o1, %o0 .globl __bzero_end __bzero_end: -- GitLab From cfd5fa7021a54b8ed9bad16246b6b34851fd48d1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 14 Jul 2020 17:33:55 -0400 Subject: [PATCH 004/839] sparc32: get rid of range exception table entries in checksum_32.S trivial - we don't even look at instruction offsets in the handler Signed-off-by: Al Viro --- arch/sparc/lib/checksum_32.S | 64 +++++++++++++++--------------------- 1 file changed, 27 insertions(+), 37 deletions(-) diff --git a/arch/sparc/lib/checksum_32.S b/arch/sparc/lib/checksum_32.S index 7488d130faf73..781e39b3c009f 100644 --- a/arch/sparc/lib/checksum_32.S +++ b/arch/sparc/lib/checksum_32.S @@ -155,13 +155,6 @@ cpout: retl ! get outta here .text; \ .align 4 -#define EXT(start,end) \ - .section __ex_table,ALLOC; \ - .align 4; \ - .word start, 0, end, cc_fault; \ - .text; \ - .align 4 - /* This aligned version executes typically in 8.5 superscalar cycles, this * is the best I can do. I say 8.5 because the final add will pair with * the next ldd in the main unrolled loop. Thus the pipe is always full. @@ -169,20 +162,20 @@ cpout: retl ! get outta here * please check the fixup code below as well. */ #define CSUMCOPY_BIGCHUNK_ALIGNED(src, dst, sum, off, t0, t1, t2, t3, t4, t5, t6, t7) \ - ldd [src + off + 0x00], t0; \ - ldd [src + off + 0x08], t2; \ + EX(ldd [src + off + 0x00], t0); \ + EX(ldd [src + off + 0x08], t2); \ addxcc t0, sum, sum; \ - ldd [src + off + 0x10], t4; \ + EX(ldd [src + off + 0x10], t4); \ addxcc t1, sum, sum; \ - ldd [src + off + 0x18], t6; \ + EX(ldd [src + off + 0x18], t6); \ addxcc t2, sum, sum; \ - std t0, [dst + off + 0x00]; \ + EX(std t0, [dst + off + 0x00]); \ addxcc t3, sum, sum; \ - std t2, [dst + off + 0x08]; \ + EX(std t2, [dst + off + 0x08]); \ addxcc t4, sum, sum; \ - std t4, [dst + off + 0x10]; \ + EX(std t4, [dst + off + 0x10]); \ addxcc t5, sum, sum; \ - std t6, [dst + off + 0x18]; \ + EX(std t6, [dst + off + 0x18]); \ addxcc t6, sum, sum; \ addxcc t7, sum, sum; @@ -191,39 +184,39 @@ cpout: retl ! get outta here * Viking MXCC into streaming mode. Ho hum... */ #define CSUMCOPY_BIGCHUNK(src, dst, sum, off, t0, t1, t2, t3, t4, t5, t6, t7) \ - ldd [src + off + 0x00], t0; \ - ldd [src + off + 0x08], t2; \ - ldd [src + off + 0x10], t4; \ - ldd [src + off + 0x18], t6; \ - st t0, [dst + off + 0x00]; \ + EX(ldd [src + off + 0x00], t0); \ + EX(ldd [src + off + 0x08], t2); \ + EX(ldd [src + off + 0x10], t4); \ + EX(ldd [src + off + 0x18], t6); \ + EX(st t0, [dst + off + 0x00]); \ addxcc t0, sum, sum; \ - st t1, [dst + off + 0x04]; \ + EX(st t1, [dst + off + 0x04]); \ addxcc t1, sum, sum; \ - st t2, [dst + off + 0x08]; \ + EX(st t2, [dst + off + 0x08]); \ addxcc t2, sum, sum; \ - st t3, [dst + off + 0x0c]; \ + EX(st t3, [dst + off + 0x0c]); \ addxcc t3, sum, sum; \ - st t4, [dst + off + 0x10]; \ + EX(st t4, [dst + off + 0x10]); \ addxcc t4, sum, sum; \ - st t5, [dst + off + 0x14]; \ + EX(st t5, [dst + off + 0x14]); \ addxcc t5, sum, sum; \ - st t6, [dst + off + 0x18]; \ + EX(st t6, [dst + off + 0x18]); \ addxcc t6, sum, sum; \ - st t7, [dst + off + 0x1c]; \ + EX(st t7, [dst + off + 0x1c]); \ addxcc t7, sum, sum; /* Yuck, 6 superscalar cycles... */ #define CSUMCOPY_LASTCHUNK(src, dst, sum, off, t0, t1, t2, t3) \ - ldd [src - off - 0x08], t0; \ - ldd [src - off - 0x00], t2; \ + EX(ldd [src - off - 0x08], t0); \ + EX(ldd [src - off - 0x00], t2); \ addxcc t0, sum, sum; \ - st t0, [dst - off - 0x08]; \ + EX(st t0, [dst - off - 0x08]); \ addxcc t1, sum, sum; \ - st t1, [dst - off - 0x04]; \ + EX(st t1, [dst - off - 0x04]); \ addxcc t2, sum, sum; \ - st t2, [dst - off - 0x00]; \ + EX(st t2, [dst - off - 0x00]); \ addxcc t3, sum, sum; \ - st t3, [dst - off + 0x04]; + EX(st t3, [dst - off + 0x04]); /* Handle the end cruft code out of band for better cache patterns. */ cc_end_cruft: @@ -331,7 +324,6 @@ __csum_partial_copy_sparc_generic: CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x20,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3) CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x40,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3) CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x60,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3) -10: EXT(5b, 10b) ! note for exception handling sub %g1, 128, %g1 ! detract from length addx %g0, %g7, %g7 ! add in last carry bit andcc %g1, 0xffffff80, %g0 ! more to csum? @@ -356,8 +348,7 @@ cctbl: CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x68,%g2,%g3,%g4,%g5) CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x28,%g2,%g3,%g4,%g5) CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x18,%g2,%g3,%g4,%g5) CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x08,%g2,%g3,%g4,%g5) -12: EXT(cctbl, 12b) ! note for exception table handling - addx %g0, %g7, %g7 +12: addx %g0, %g7, %g7 andcc %o3, 0xf, %g0 ! check for low bits set ccte: bne cc_end_cruft ! something left, handle it out of band andcc %o3, 8, %g0 ! begin checks for that code @@ -367,7 +358,6 @@ ccdbl: CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x00,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x20,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3) CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x40,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3) CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x60,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3) -11: EXT(ccdbl, 11b) ! note for exception table handling sub %g1, 128, %g1 ! detract from length addx %g0, %g7, %g7 ! add in last carry bit andcc %g1, 0xffffff80, %g0 ! more to csum? -- GitLab From c4da8e0dc6f7cec80f32af080cadf47c1753a2ab Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 16 Jul 2020 10:19:37 -0400 Subject: [PATCH 005/839] sparc32: switch copy_user.S away from range exception table entries Those were the last range exception table entries, which will allow to get rid of a lot of weirdness. Emits the same code into .text. Signed-off-by: Al Viro --- arch/sparc/lib/copy_user.S | 315 +++++++++++++------------------------ 1 file changed, 112 insertions(+), 203 deletions(-) diff --git a/arch/sparc/lib/copy_user.S b/arch/sparc/lib/copy_user.S index dc72f2b970b7b..954572c78539a 100644 --- a/arch/sparc/lib/copy_user.S +++ b/arch/sparc/lib/copy_user.S @@ -21,98 +21,134 @@ /* Work around cpp -rob */ #define ALLOC #alloc #define EXECINSTR #execinstr + +#define EX_ENTRY(l1, l2) \ + .section __ex_table,ALLOC; \ + .align 4; \ + .word l1, l2; \ + .text; + #define EX(x,y,a,b) \ 98: x,y; \ .section .fixup,ALLOC,EXECINSTR; \ .align 4; \ -99: ba fixupretl; \ - a, b, %g3; \ - .section __ex_table,ALLOC; \ - .align 4; \ - .word 98b, 99b; \ - .text; \ - .align 4 +99: retl; \ + a, b, %o0; \ + EX_ENTRY(98b, 99b) #define EX2(x,y,c,d,e,a,b) \ 98: x,y; \ .section .fixup,ALLOC,EXECINSTR; \ .align 4; \ 99: c, d, e; \ - ba fixupretl; \ - a, b, %g3; \ - .section __ex_table,ALLOC; \ - .align 4; \ - .word 98b, 99b; \ - .text; \ - .align 4 + retl; \ + a, b, %o0; \ + EX_ENTRY(98b, 99b) #define EXO2(x,y) \ 98: x, y; \ - .section __ex_table,ALLOC; \ - .align 4; \ - .word 98b, 97f; \ - .text; \ - .align 4 + EX_ENTRY(98b, 97f) -#define EXT(start,end,handler) \ - .section __ex_table,ALLOC; \ - .align 4; \ - .word start, 0, end, handler; \ - .text; \ - .align 4 +#define LD(insn, src, offset, reg, label) \ +98: insn [%src + (offset)], %reg; \ + .section .fixup,ALLOC,EXECINSTR; \ +99: ba label; \ + mov offset, %g5; \ + EX_ENTRY(98b, 99b) -/* Please do not change following macros unless you change logic used - * in .fixup at the end of this file as well - */ +#define ST(insn, dst, offset, reg, label) \ +98: insn %reg, [%dst + (offset)]; \ + .section .fixup,ALLOC,EXECINSTR; \ +99: ba label; \ + mov offset, %g5; \ + EX_ENTRY(98b, 99b) /* Both these macros have to start with exactly the same insn */ +/* left: g7 + (g1 % 128) - offset */ #define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ - ldd [%src + (offset) + 0x00], %t0; \ - ldd [%src + (offset) + 0x08], %t2; \ - ldd [%src + (offset) + 0x10], %t4; \ - ldd [%src + (offset) + 0x18], %t6; \ - st %t0, [%dst + (offset) + 0x00]; \ - st %t1, [%dst + (offset) + 0x04]; \ - st %t2, [%dst + (offset) + 0x08]; \ - st %t3, [%dst + (offset) + 0x0c]; \ - st %t4, [%dst + (offset) + 0x10]; \ - st %t5, [%dst + (offset) + 0x14]; \ - st %t6, [%dst + (offset) + 0x18]; \ - st %t7, [%dst + (offset) + 0x1c]; - + LD(ldd, src, offset + 0x00, t0, bigchunk_fault) \ + LD(ldd, src, offset + 0x08, t2, bigchunk_fault) \ + LD(ldd, src, offset + 0x10, t4, bigchunk_fault) \ + LD(ldd, src, offset + 0x18, t6, bigchunk_fault) \ + ST(st, dst, offset + 0x00, t0, bigchunk_fault) \ + ST(st, dst, offset + 0x04, t1, bigchunk_fault) \ + ST(st, dst, offset + 0x08, t2, bigchunk_fault) \ + ST(st, dst, offset + 0x0c, t3, bigchunk_fault) \ + ST(st, dst, offset + 0x10, t4, bigchunk_fault) \ + ST(st, dst, offset + 0x14, t5, bigchunk_fault) \ + ST(st, dst, offset + 0x18, t6, bigchunk_fault) \ + ST(st, dst, offset + 0x1c, t7, bigchunk_fault) + +/* left: g7 + (g1 % 128) - offset */ #define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ - ldd [%src + (offset) + 0x00], %t0; \ - ldd [%src + (offset) + 0x08], %t2; \ - ldd [%src + (offset) + 0x10], %t4; \ - ldd [%src + (offset) + 0x18], %t6; \ - std %t0, [%dst + (offset) + 0x00]; \ - std %t2, [%dst + (offset) + 0x08]; \ - std %t4, [%dst + (offset) + 0x10]; \ - std %t6, [%dst + (offset) + 0x18]; + LD(ldd, src, offset + 0x00, t0, bigchunk_fault) \ + LD(ldd, src, offset + 0x08, t2, bigchunk_fault) \ + LD(ldd, src, offset + 0x10, t4, bigchunk_fault) \ + LD(ldd, src, offset + 0x18, t6, bigchunk_fault) \ + ST(std, dst, offset + 0x00, t0, bigchunk_fault) \ + ST(std, dst, offset + 0x08, t2, bigchunk_fault) \ + ST(std, dst, offset + 0x10, t4, bigchunk_fault) \ + ST(std, dst, offset + 0x18, t6, bigchunk_fault) + .section .fixup,#alloc,#execinstr +bigchunk_fault: + sub %g7, %g5, %o0 + and %g1, 127, %g1 + retl + add %o0, %g1, %o0 + +/* left: offset + 16 + (g1 % 16) */ #define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ - ldd [%src - (offset) - 0x10], %t0; \ - ldd [%src - (offset) - 0x08], %t2; \ - st %t0, [%dst - (offset) - 0x10]; \ - st %t1, [%dst - (offset) - 0x0c]; \ - st %t2, [%dst - (offset) - 0x08]; \ - st %t3, [%dst - (offset) - 0x04]; + LD(ldd, src, -(offset + 0x10), t0, lastchunk_fault) \ + LD(ldd, src, -(offset + 0x08), t2, lastchunk_fault) \ + ST(st, dst, -(offset + 0x10), t0, lastchunk_fault) \ + ST(st, dst, -(offset + 0x0c), t1, lastchunk_fault) \ + ST(st, dst, -(offset + 0x08), t2, lastchunk_fault) \ + ST(st, dst, -(offset + 0x04), t3, lastchunk_fault) -#define MOVE_HALFCHUNK(src, dst, offset, t0, t1, t2, t3) \ - lduh [%src + (offset) + 0x00], %t0; \ - lduh [%src + (offset) + 0x02], %t1; \ - lduh [%src + (offset) + 0x04], %t2; \ - lduh [%src + (offset) + 0x06], %t3; \ - sth %t0, [%dst + (offset) + 0x00]; \ - sth %t1, [%dst + (offset) + 0x02]; \ - sth %t2, [%dst + (offset) + 0x04]; \ - sth %t3, [%dst + (offset) + 0x06]; + .section .fixup,#alloc,#execinstr +lastchunk_fault: + and %g1, 15, %g1 + retl + sub %g1, %g5, %o0 +/* left: o3 + (o2 % 16) - offset */ +#define MOVE_HALFCHUNK(src, dst, offset, t0, t1, t2, t3) \ + LD(lduh, src, offset + 0x00, t0, halfchunk_fault) \ + LD(lduh, src, offset + 0x02, t1, halfchunk_fault) \ + LD(lduh, src, offset + 0x04, t2, halfchunk_fault) \ + LD(lduh, src, offset + 0x06, t3, halfchunk_fault) \ + ST(sth, dst, offset + 0x00, t0, halfchunk_fault) \ + ST(sth, dst, offset + 0x02, t1, halfchunk_fault) \ + ST(sth, dst, offset + 0x04, t2, halfchunk_fault) \ + ST(sth, dst, offset + 0x06, t3, halfchunk_fault) + +/* left: o3 + (o2 % 16) + offset + 2 */ #define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \ - ldub [%src - (offset) - 0x02], %t0; \ - ldub [%src - (offset) - 0x01], %t1; \ - stb %t0, [%dst - (offset) - 0x02]; \ - stb %t1, [%dst - (offset) - 0x01]; + LD(ldub, src, -(offset + 0x02), t0, halfchunk_fault) \ + LD(ldub, src, -(offset + 0x01), t1, halfchunk_fault) \ + ST(stb, dst, -(offset + 0x02), t0, halfchunk_fault) \ + ST(stb, dst, -(offset + 0x01), t1, halfchunk_fault) + + .section .fixup,#alloc,#execinstr +halfchunk_fault: + and %o2, 15, %o2 + sub %o3, %g5, %o3 + retl + add %o2, %o3, %o0 + +/* left: offset + 2 + (o2 % 2) */ +#define MOVE_LAST_SHORTCHUNK(src, dst, offset, t0, t1) \ + LD(ldub, src, -(offset + 0x02), t0, last_shortchunk_fault) \ + LD(ldub, src, -(offset + 0x01), t1, last_shortchunk_fault) \ + ST(stb, dst, -(offset + 0x02), t0, last_shortchunk_fault) \ + ST(stb, dst, -(offset + 0x01), t1, last_shortchunk_fault) + + .section .fixup,#alloc,#execinstr +last_shortchunk_fault: + and %o2, 1, %o2 + retl + sub %o2, %g5, %o0 .text .align 4 @@ -182,8 +218,6 @@ __copy_user: /* %o0=dst %o1=src %o2=len */ MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) -80: - EXT(5b, 80b, 50f) subcc %g7, 128, %g7 add %o1, 128, %o1 bne 5b @@ -201,7 +235,6 @@ __copy_user: /* %o0=dst %o1=src %o2=len */ jmpl %o5 + %lo(copy_user_table_end), %g0 add %o0, %g7, %o0 -copy_user_table: MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5) MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5) MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5) @@ -210,7 +243,6 @@ copy_user_table: MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5) MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5) copy_user_table_end: - EXT(copy_user_table, copy_user_table_end, 51f) be copy_user_last7 andcc %g1, 4, %g0 @@ -250,8 +282,6 @@ ldd_std: MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) -81: - EXT(ldd_std, 81b, 52f) subcc %g7, 128, %g7 add %o1, 128, %o1 bne ldd_std @@ -290,8 +320,6 @@ cannot_optimize: 10: MOVE_HALFCHUNK(o1, o0, 0x00, g2, g3, g4, g5) MOVE_HALFCHUNK(o1, o0, 0x08, g2, g3, g4, g5) -82: - EXT(10b, 82b, 53f) subcc %o3, 0x10, %o3 add %o1, 0x10, %o1 bne 10b @@ -308,8 +336,6 @@ byte_chunk: MOVE_SHORTCHUNK(o1, o0, -0x0c, g2, g3) MOVE_SHORTCHUNK(o1, o0, -0x0e, g2, g3) MOVE_SHORTCHUNK(o1, o0, -0x10, g2, g3) -83: - EXT(byte_chunk, 83b, 54f) subcc %o3, 0x10, %o3 add %o1, 0x10, %o1 bne byte_chunk @@ -325,16 +351,14 @@ short_end: add %o1, %o3, %o1 jmpl %o5 + %lo(short_table_end), %g0 andcc %o2, 1, %g0 -84: - MOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3) - MOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3) - MOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3) - MOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3) - MOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3) - MOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3) - MOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3) + MOVE_LAST_SHORTCHUNK(o1, o0, 0x0c, g2, g3) + MOVE_LAST_SHORTCHUNK(o1, o0, 0x0a, g2, g3) + MOVE_LAST_SHORTCHUNK(o1, o0, 0x08, g2, g3) + MOVE_LAST_SHORTCHUNK(o1, o0, 0x06, g2, g3) + MOVE_LAST_SHORTCHUNK(o1, o0, 0x04, g2, g3) + MOVE_LAST_SHORTCHUNK(o1, o0, 0x02, g2, g3) + MOVE_LAST_SHORTCHUNK(o1, o0, 0x00, g2, g3) short_table_end: - EXT(84b, short_table_end, 55f) be 1f nop EX(ldub [%o1], %g2, add %g0, 1) @@ -363,123 +387,8 @@ short_aligned_end: .section .fixup,#alloc,#execinstr .align 4 97: - mov %o2, %g3 -fixupretl: retl - mov %g3, %o0 - -/* exception routine sets %g2 to (broken_insn - first_insn)>>2 */ -50: -/* This magic counts how many bytes are left when crash in MOVE_BIGCHUNK - * happens. This is derived from the amount ldd reads, st stores, etc. - * x = g2 % 12; - * g3 = g1 + g7 - ((g2 / 12) * 32 + (x < 4) ? 0 : (x - 4) * 4); - * o0 += (g2 / 12) * 32; - */ - cmp %g2, 12 - add %o0, %g7, %o0 - bcs 1f - cmp %g2, 24 - bcs 2f - cmp %g2, 36 - bcs 3f - nop - sub %g2, 12, %g2 - sub %g7, 32, %g7 -3: sub %g2, 12, %g2 - sub %g7, 32, %g7 -2: sub %g2, 12, %g2 - sub %g7, 32, %g7 -1: cmp %g2, 4 - bcs,a 60f - clr %g2 - sub %g2, 4, %g2 - sll %g2, 2, %g2 -60: and %g1, 0x7f, %g3 - sub %o0, %g7, %o0 - add %g3, %g7, %g3 - ba fixupretl - sub %g3, %g2, %g3 -51: -/* i = 41 - g2; j = i % 6; - * g3 = (g1 & 15) + (i / 6) * 16 + (j < 4) ? (j + 1) * 4 : 16; - * o0 -= (i / 6) * 16 + 16; - */ - neg %g2 - and %g1, 0xf, %g1 - add %g2, 41, %g2 - add %o0, %g1, %o0 -1: cmp %g2, 6 - bcs,a 2f - cmp %g2, 4 - add %g1, 16, %g1 - b 1b - sub %g2, 6, %g2 -2: bcc,a 2f - mov 16, %g2 - inc %g2 - sll %g2, 2, %g2 -2: add %g1, %g2, %g3 - ba fixupretl - sub %o0, %g3, %o0 -52: -/* g3 = g1 + g7 - (g2 / 8) * 32 + (g2 & 4) ? (g2 & 3) * 8 : 0; - o0 += (g2 / 8) * 32 */ - andn %g2, 7, %g4 - add %o0, %g7, %o0 - andcc %g2, 4, %g0 - and %g2, 3, %g2 - sll %g4, 2, %g4 - sll %g2, 3, %g2 - bne 60b - sub %g7, %g4, %g7 - ba 60b - clr %g2 -53: -/* g3 = o3 + (o2 & 15) - (g2 & 8) - (g2 & 4) ? (g2 & 3) * 2 : 0; - o0 += (g2 & 8) */ - and %g2, 3, %g4 - andcc %g2, 4, %g0 - and %g2, 8, %g2 - sll %g4, 1, %g4 - be 1f - add %o0, %g2, %o0 - add %g2, %g4, %g2 -1: and %o2, 0xf, %g3 - add %g3, %o3, %g3 - ba fixupretl - sub %g3, %g2, %g3 -54: -/* g3 = o3 + (o2 & 15) - (g2 / 4) * 2 - (g2 & 2) ? (g2 & 1) : 0; - o0 += (g2 / 4) * 2 */ - srl %g2, 2, %o4 - and %g2, 1, %o5 - srl %g2, 1, %g2 - add %o4, %o4, %o4 - and %o5, %g2, %o5 - and %o2, 0xf, %o2 - add %o0, %o4, %o0 - sub %o3, %o5, %o3 - sub %o2, %o4, %o2 - ba fixupretl - add %o2, %o3, %g3 -55: -/* i = 27 - g2; - g3 = (o2 & 1) + i / 4 * 2 + !(i & 3); - o0 -= i / 4 * 2 + 1 */ - neg %g2 - and %o2, 1, %o2 - add %g2, 27, %g2 - srl %g2, 2, %o5 - andcc %g2, 3, %g0 - mov 1, %g2 - add %o5, %o5, %o5 - be,a 1f - clr %g2 -1: add %g2, %o5, %g3 - sub %o0, %g3, %o0 - ba fixupretl - add %g3, %o2, %g3 + mov %o2, %o0 .globl __copy_user_end __copy_user_end: -- GitLab From b4edf06c8aaae30ef926bd6853df6e59a7579ee9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 16 Jul 2020 14:05:36 -0400 Subject: [PATCH 006/839] sparc32: switch to generic extables Signed-off-by: Al Viro --- arch/sparc/include/asm/elf_64.h | 1 - .../include/asm/{extable_64.h => extable.h} | 4 +- arch/sparc/include/asm/uaccess.h | 3 + arch/sparc/include/asm/uaccess_32.h | 33 ------ arch/sparc/include/asm/uaccess_64.h | 1 - arch/sparc/kernel/unaligned_32.c | 10 +- arch/sparc/mm/Makefile | 2 +- arch/sparc/mm/extable.c | 107 ------------------ arch/sparc/mm/fault_32.c | 26 ++--- lib/extable.c | 5 - 10 files changed, 22 insertions(+), 170 deletions(-) rename arch/sparc/include/asm/{extable_64.h => extable.h} (92%) delete mode 100644 arch/sparc/mm/extable.c diff --git a/arch/sparc/include/asm/elf_64.h b/arch/sparc/include/asm/elf_64.h index 7e078bc73ef56..8fb09eec8c3e7 100644 --- a/arch/sparc/include/asm/elf_64.h +++ b/arch/sparc/include/asm/elf_64.h @@ -8,7 +8,6 @@ #include #include -#include #include #include diff --git a/arch/sparc/include/asm/extable_64.h b/arch/sparc/include/asm/extable.h similarity index 92% rename from arch/sparc/include/asm/extable_64.h rename to arch/sparc/include/asm/extable.h index 5a0171907b7e5..554a9dc376fc8 100644 --- a/arch/sparc/include/asm/extable_64.h +++ b/arch/sparc/include/asm/extable.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __ASM_EXTABLE64_H -#define __ASM_EXTABLE64_H +#ifndef __ASM_EXTABLE_H +#define __ASM_EXTABLE_H /* * The exception table consists of pairs of addresses: the first is the * address of an instruction that is allowed to fault, and the second is diff --git a/arch/sparc/include/asm/uaccess.h b/arch/sparc/include/asm/uaccess.h index dd85bc2c2cad5..390094200fc44 100644 --- a/arch/sparc/include/asm/uaccess.h +++ b/arch/sparc/include/asm/uaccess.h @@ -1,6 +1,9 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef ___ASM_SPARC_UACCESS_H #define ___ASM_SPARC_UACCESS_H + +#include + #if defined(__sparc__) && defined(__arch64__) #include #else diff --git a/arch/sparc/include/asm/uaccess_32.h b/arch/sparc/include/asm/uaccess_32.h index 54a3ba7426476..4a12346bb69c3 100644 --- a/arch/sparc/include/asm/uaccess_32.h +++ b/arch/sparc/include/asm/uaccess_32.h @@ -13,9 +13,6 @@ #include -#define ARCH_HAS_SORT_EXTABLE -#define ARCH_HAS_SEARCH_EXTABLE - /* Sparc is not segmented, however we need to be able to fool access_ok() * when doing system calls from kernel mode legitimately. * @@ -40,36 +37,6 @@ #define __access_ok(addr, size) (__user_ok((addr) & get_fs().seg, (size))) #define access_ok(addr, size) __access_ok((unsigned long)(addr), size) -/* - * The exception table consists of pairs of addresses: the first is the - * address of an instruction that is allowed to fault, and the second is - * the address at which the program should continue. No registers are - * modified, so it is entirely up to the continuation code to figure out - * what to do. - * - * All the routines below use bits of fixup code that are out of line - * with the main instruction path. This means when everything is well, - * we don't even have to jump over them. Further, they do not intrude - * on our cache or tlb entries. - * - * There is a special way how to put a range of potentially faulting - * insns (like twenty ldd/std's with now intervening other instructions) - * You specify address of first in insn and 0 in fixup and in the next - * exception_table_entry you specify last potentially faulting insn + 1 - * and in fixup the routine which should handle the fault. - * That fixup code will get - * (faulting_insn_address - first_insn_in_the_range_address)/4 - * in %g2 (ie. index of the faulting instruction in the range). - */ - -struct exception_table_entry -{ - unsigned long insn, fixup; -}; - -/* Returns 0 if exception not found and fixup otherwise. */ -unsigned long search_extables_range(unsigned long addr, unsigned long *g2); - /* Uh, these should become the main single-value transfer routines.. * They automatically use the right size if we just have the right * pointer type.. diff --git a/arch/sparc/include/asm/uaccess_64.h b/arch/sparc/include/asm/uaccess_64.h index 698cf69f74e99..30eb4c6414d1b 100644 --- a/arch/sparc/include/asm/uaccess_64.h +++ b/arch/sparc/include/asm/uaccess_64.h @@ -10,7 +10,6 @@ #include #include #include -#include #include diff --git a/arch/sparc/kernel/unaligned_32.c b/arch/sparc/kernel/unaligned_32.c index 83db94c0b4318..1d7c3eaabdd4c 100644 --- a/arch/sparc/kernel/unaligned_32.c +++ b/arch/sparc/kernel/unaligned_32.c @@ -16,6 +16,7 @@ #include #include #include +#include #include @@ -213,10 +214,10 @@ static inline int ok_for_kernel(unsigned int insn) static void kernel_mna_trap_fault(struct pt_regs *regs, unsigned int insn) { - unsigned long g2 = regs->u_regs [UREG_G2]; - unsigned long fixup = search_extables_range(regs->pc, &g2); + const struct exception_table_entry *entry; - if (!fixup) { + entry = search_exception_tables(regs->pc); + if (!entry) { unsigned long address = compute_effective_address(regs, insn); if(address < PAGE_SIZE) { printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference in mna handler"); @@ -232,9 +233,8 @@ static void kernel_mna_trap_fault(struct pt_regs *regs, unsigned int insn) die_if_kernel("Oops", regs); /* Not reached */ } - regs->pc = fixup; + regs->pc = entry->fixup; regs->npc = regs->pc + 4; - regs->u_regs [UREG_G2] = g2; } asmlinkage void kernel_unaligned_trap(struct pt_regs *regs, unsigned int insn) diff --git a/arch/sparc/mm/Makefile b/arch/sparc/mm/Makefile index 68db1f859b028..871354aa3c002 100644 --- a/arch/sparc/mm/Makefile +++ b/arch/sparc/mm/Makefile @@ -8,7 +8,7 @@ ccflags-y := -Werror obj-$(CONFIG_SPARC64) += ultra.o tlb.o tsb.o obj-y += fault_$(BITS).o obj-y += init_$(BITS).o -obj-$(CONFIG_SPARC32) += extable.o srmmu.o iommu.o io-unit.o +obj-$(CONFIG_SPARC32) += srmmu.o iommu.o io-unit.o obj-$(CONFIG_SPARC32) += srmmu_access.o obj-$(CONFIG_SPARC32) += hypersparc.o viking.o tsunami.o swift.o obj-$(CONFIG_SPARC32) += leon_mm.o diff --git a/arch/sparc/mm/extable.c b/arch/sparc/mm/extable.c deleted file mode 100644 index 241b406418733..0000000000000 --- a/arch/sparc/mm/extable.c +++ /dev/null @@ -1,107 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * linux/arch/sparc/mm/extable.c - */ - -#include -#include -#include - -void sort_extable(struct exception_table_entry *start, - struct exception_table_entry *finish) -{ -} - -/* Caller knows they are in a range if ret->fixup == 0 */ -const struct exception_table_entry * -search_extable(const struct exception_table_entry *base, - const size_t num, - unsigned long value) -{ - int i; - - /* Single insn entries are encoded as: - * word 1: insn address - * word 2: fixup code address - * - * Range entries are encoded as: - * word 1: first insn address - * word 2: 0 - * word 3: last insn address + 4 bytes - * word 4: fixup code address - * - * Deleted entries are encoded as: - * word 1: unused - * word 2: -1 - * - * See asm/uaccess.h for more details. - */ - - /* 1. Try to find an exact match. */ - for (i = 0; i < num; i++) { - if (base[i].fixup == 0) { - /* A range entry, skip both parts. */ - i++; - continue; - } - - /* A deleted entry; see trim_init_extable */ - if (base[i].fixup == -1) - continue; - - if (base[i].insn == value) - return &base[i]; - } - - /* 2. Try to find a range match. */ - for (i = 0; i < (num - 1); i++) { - if (base[i].fixup) - continue; - - if (base[i].insn <= value && base[i + 1].insn > value) - return &base[i]; - - i++; - } - - return NULL; -} - -#ifdef CONFIG_MODULES -/* We could memmove them around; easier to mark the trimmed ones. */ -void trim_init_extable(struct module *m) -{ - unsigned int i; - bool range; - - for (i = 0; i < m->num_exentries; i += range ? 2 : 1) { - range = m->extable[i].fixup == 0; - - if (within_module_init(m->extable[i].insn, m)) { - m->extable[i].fixup = -1; - if (range) - m->extable[i+1].fixup = -1; - } - if (range) - i++; - } -} -#endif /* CONFIG_MODULES */ - -/* Special extable search, which handles ranges. Returns fixup */ -unsigned long search_extables_range(unsigned long addr, unsigned long *g2) -{ - const struct exception_table_entry *entry; - - entry = search_exception_tables(addr); - if (!entry) - return 0; - - /* Inside range? Fix g2 and return correct fixup */ - if (!entry->fixup) { - *g2 = (addr - entry->insn) / 4; - return (entry + 1)->fixup; - } - - return entry->fixup; -} diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c index 290869fd6b6ad..de2031c2b2d79 100644 --- a/arch/sparc/mm/fault_32.c +++ b/arch/sparc/mm/fault_32.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -114,8 +115,6 @@ asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write, struct vm_area_struct *vma; struct task_struct *tsk = current; struct mm_struct *mm = tsk->mm; - unsigned int fixup; - unsigned long g2; int from_user = !(regs->psr & PSR_PS); int code; vm_fault_t fault; @@ -233,22 +232,19 @@ bad_area_nosemaphore: /* Is this in ex_table? */ no_context: - g2 = regs->u_regs[UREG_G2]; if (!from_user) { - fixup = search_extables_range(regs->pc, &g2); - /* Values below 10 are reserved for other things */ - if (fixup > 10) { + const struct exception_table_entry *entry; + + entry = search_exception_tables(regs->pc); #ifdef DEBUG_EXCEPTIONS - printk("Exception: PC<%08lx> faddr<%08lx>\n", - regs->pc, address); - printk("EX_TABLE: insn<%08lx> fixup<%08x> g2<%08lx>\n", - regs->pc, fixup, g2); + printk("Exception: PC<%08lx> faddr<%08lx>\n", + regs->pc, address); + printk("EX_TABLE: insn<%08lx> fixup<%08x>\n", + regs->pc, entry->fixup); #endif - regs->u_regs[UREG_G2] = g2; - regs->pc = fixup; - regs->npc = regs->pc + 4; - return; - } + regs->pc = entry->fixup; + regs->npc = regs->pc + 4; + return; } unhandled_fault(address, tsk, regs); diff --git a/lib/extable.c b/lib/extable.c index c3e59caf7ffa1..9c9f40bd2b3d9 100644 --- a/lib/extable.c +++ b/lib/extable.c @@ -21,7 +21,6 @@ static inline unsigned long ex_to_insn(const struct exception_table_entry *x) } #endif -#ifndef ARCH_HAS_SORT_EXTABLE #ifndef ARCH_HAS_RELATIVE_EXTABLE #define swap_ex NULL #else @@ -88,9 +87,6 @@ void trim_init_extable(struct module *m) m->num_exentries--; } #endif /* CONFIG_MODULES */ -#endif /* !ARCH_HAS_SORT_EXTABLE */ - -#ifndef ARCH_HAS_SEARCH_EXTABLE static int cmp_ex_search(const void *key, const void *elt) { @@ -120,4 +116,3 @@ search_extable(const struct exception_table_entry *base, return bsearch(&value, base, num, sizeof(struct exception_table_entry), cmp_ex_search); } -#endif -- GitLab From d17b9ec777d86c590a77a404565be5d6005f2fe2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 19 Aug 2020 17:50:40 -0400 Subject: [PATCH 007/839] sparc64: get rid of fake_swapper_regs no reason to have ->kregs of initial thread set up in a special way - we can keep them on stack, same as for every other thread. Signed-off-by: Al Viro --- arch/sparc/include/asm/thread_info_64.h | 1 + arch/sparc/kernel/head_64.S | 2 +- arch/sparc/kernel/setup_64.c | 4 ---- 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h index 42cd4cd3892e3..8047a9caab2fc 100644 --- a/arch/sparc/include/asm/thread_info_64.h +++ b/arch/sparc/include/asm/thread_info_64.h @@ -118,6 +118,7 @@ struct thread_info { .task = &tsk, \ .current_ds = ASI_P, \ .preempt_count = INIT_PREEMPT_COUNT, \ + .kregs = (struct pt_regs *)(init_stack+THREAD_SIZE)-1 \ } /* how to get the thread information struct from C */ diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index c5ff2472b3d9d..72a5bdc833ea2 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S @@ -706,7 +706,7 @@ tlb_fixup_done: wr %g0, ASI_P, %asi mov 1, %g1 sllx %g1, THREAD_SHIFT, %g1 - sub %g1, (STACKFRAME_SZ + STACK_BIAS), %g1 + sub %g1, (STACKFRAME_SZ + STACK_BIAS + TRACEREG_SZ), %g1 add %g6, %g1, %sp /* Set per-cpu pointer initially to zero, this makes diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c index d87244197d5cb..48abee4eee29d 100644 --- a/arch/sparc/kernel/setup_64.c +++ b/arch/sparc/kernel/setup_64.c @@ -165,8 +165,6 @@ extern int root_mountflags; char reboot_command[COMMAND_LINE_SIZE]; -static struct pt_regs fake_swapper_regs = { { 0, }, 0, 0, 0, 0 }; - static void __init per_cpu_patch(void) { struct cpuid_patch_entry *p; @@ -661,8 +659,6 @@ void __init setup_arch(char **cmdline_p) rd_image_start = ram_flags & RAMDISK_IMAGE_START_MASK; #endif - task_thread_info(&init_task)->kregs = &fake_swapper_regs; - #ifdef CONFIG_IP_PNP if (!ic_set_manually) { phandle chosen = prom_finddevice("/chosen"); -- GitLab From af7652500b4c43643a8531b82974e97b1248a03a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 19 Aug 2020 18:05:02 -0400 Subject: [PATCH 008/839] sparc32: get rid of fake_swapper_regs no reason to have ->kregs of initial thread set up in a special way - we can keep them on stack, same as for every other thread. Signed-off-by: Al Viro --- arch/sparc/include/asm/processor_32.h | 1 + arch/sparc/kernel/head_32.S | 2 +- arch/sparc/kernel/process_32.c | 9 +-------- arch/sparc/kernel/setup_32.c | 3 --- 4 files changed, 3 insertions(+), 12 deletions(-) diff --git a/arch/sparc/include/asm/processor_32.h b/arch/sparc/include/asm/processor_32.h index 3c4bc2189092d..d7b71d7bed1fc 100644 --- a/arch/sparc/include/asm/processor_32.h +++ b/arch/sparc/include/asm/processor_32.h @@ -60,6 +60,7 @@ struct thread_struct { #define INIT_THREAD { \ .flags = SPARC_FLAG_KTHREAD, \ .current_ds = KERNEL_DS, \ + .kregs = (struct pt_regs *)(init_stack+THREAD_SIZE)-1 \ } /* Do necessary setup to start up a newly executed thread. */ diff --git a/arch/sparc/kernel/head_32.S b/arch/sparc/kernel/head_32.S index be30c8d4cc737..6044b82b97675 100644 --- a/arch/sparc/kernel/head_32.S +++ b/arch/sparc/kernel/head_32.S @@ -515,7 +515,7 @@ continue_boot: /* I want a kernel stack NOW! */ set init_thread_union, %g1 - set (THREAD_SIZE - STACKFRAME_SZ), %g2 + set (THREAD_SIZE - STACKFRAME_SZ - TRACEREG_SZ), %g2 add %g1, %g2, %sp mov 0, %fp /* And for good luck */ diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c index a023637359154..97549e01f5401 100644 --- a/arch/sparc/kernel/process_32.c +++ b/arch/sparc/kernel/process_32.c @@ -218,14 +218,7 @@ void flush_thread(void) } /* This task is no longer a kernel thread. */ - if (current->thread.flags & SPARC_FLAG_KTHREAD) { - current->thread.flags &= ~SPARC_FLAG_KTHREAD; - - /* We must fixup kregs as well. */ - /* XXX This was not fixed for ti for a while, worked. Unused? */ - current->thread.kregs = (struct pt_regs *) - (task_stack_page(current) + (THREAD_SIZE - TRACEREG_SZ)); - } + current->thread.flags &= ~SPARC_FLAG_KTHREAD; } static inline struct sparc_stackf __user * diff --git a/arch/sparc/kernel/setup_32.c b/arch/sparc/kernel/setup_32.c index eea43a1aef1b9..c8e0dd99f3700 100644 --- a/arch/sparc/kernel/setup_32.c +++ b/arch/sparc/kernel/setup_32.c @@ -266,7 +266,6 @@ static __init void leon_patch(void) } struct tt_entry *sparc_ttable; -static struct pt_regs fake_swapper_regs; /* Called from head_32.S - before we have setup anything * in the kernel. Be very careful with what you do here. @@ -363,8 +362,6 @@ void __init setup_arch(char **cmdline_p) (*(linux_dbvec->teach_debugger))(); } - init_task.thread.kregs = &fake_swapper_regs; - /* Run-time patch instructions to match the cpu model */ per_cpu_patch(); -- GitLab From 415ddc3b105616d6a4fec279ed7d87841cbfa3fb Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 20 Aug 2020 10:55:25 -0400 Subject: [PATCH 009/839] sparc32: take ->thread.flags out it was used for two things - dealing with unusual ->kregs for kernel threads and "emulate unaligned userland accesses for this thread"; the former is killed off by the previous commit, the latter never had been set in the first place. Signed-off-by: Al Viro --- arch/sparc/include/asm/processor_32.h | 5 -- arch/sparc/kernel/process_32.c | 5 -- arch/sparc/kernel/unaligned_32.c | 96 +-------------------------- 3 files changed, 1 insertion(+), 105 deletions(-) diff --git a/arch/sparc/include/asm/processor_32.h b/arch/sparc/include/asm/processor_32.h index d7b71d7bed1fc..b6242f7771e9e 100644 --- a/arch/sparc/include/asm/processor_32.h +++ b/arch/sparc/include/asm/processor_32.h @@ -50,15 +50,10 @@ struct thread_struct { unsigned long fsr; unsigned long fpqdepth; struct fpq fpqueue[16]; - unsigned long flags; mm_segment_t current_ds; }; -#define SPARC_FLAG_KTHREAD 0x1 /* task is a kernel thread */ -#define SPARC_FLAG_UNALIGNED 0x2 /* is allowed to do unaligned accesses */ - #define INIT_THREAD { \ - .flags = SPARC_FLAG_KTHREAD, \ .current_ds = KERNEL_DS, \ .kregs = (struct pt_regs *)(init_stack+THREAD_SIZE)-1 \ } diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c index 97549e01f5401..f75caecff115c 100644 --- a/arch/sparc/kernel/process_32.c +++ b/arch/sparc/kernel/process_32.c @@ -216,9 +216,6 @@ void flush_thread(void) clear_thread_flag(TIF_USEDFPU); #endif } - - /* This task is no longer a kernel thread. */ - current->thread.flags &= ~SPARC_FLAG_KTHREAD; } static inline struct sparc_stackf __user * @@ -306,7 +303,6 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg, extern int nwindows; unsigned long psr; memset(new_stack, 0, STACKFRAME_SZ + TRACEREG_SZ); - p->thread.flags |= SPARC_FLAG_KTHREAD; p->thread.current_ds = KERNEL_DS; ti->kpc = (((unsigned long) ret_from_kernel_thread) - 0x8); childregs->u_regs[UREG_G1] = sp; /* function */ @@ -318,7 +314,6 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg, } memcpy(new_stack, (char *)regs - STACKFRAME_SZ, STACKFRAME_SZ + TRACEREG_SZ); childregs->u_regs[UREG_FP] = sp; - p->thread.flags &= ~SPARC_FLAG_KTHREAD; p->thread.current_ds = USER_DS; ti->kpc = (((unsigned long) ret_from_fork) - 0x8); ti->kpsr = current->thread.fork_kpsr | PSR_PIL; diff --git a/arch/sparc/kernel/unaligned_32.c b/arch/sparc/kernel/unaligned_32.c index 83db94c0b4318..82b60d09ea3cc 100644 --- a/arch/sparc/kernel/unaligned_32.c +++ b/arch/sparc/kernel/unaligned_32.c @@ -274,103 +274,9 @@ asmlinkage void kernel_unaligned_trap(struct pt_regs *regs, unsigned int insn) } } -static inline int ok_for_user(struct pt_regs *regs, unsigned int insn, - enum direction dir) -{ - unsigned int reg; - int size = ((insn >> 19) & 3) == 3 ? 8 : 4; - - if ((regs->pc | regs->npc) & 3) - return 0; - - /* Must access_ok() in all the necessary places. */ -#define WINREG_ADDR(regnum) \ - ((void __user *)(((unsigned long *)regs->u_regs[UREG_FP])+(regnum))) - - reg = (insn >> 25) & 0x1f; - if (reg >= 16) { - if (!access_ok(WINREG_ADDR(reg - 16), size)) - return -EFAULT; - } - reg = (insn >> 14) & 0x1f; - if (reg >= 16) { - if (!access_ok(WINREG_ADDR(reg - 16), size)) - return -EFAULT; - } - if (!(insn & 0x2000)) { - reg = (insn & 0x1f); - if (reg >= 16) { - if (!access_ok(WINREG_ADDR(reg - 16), size)) - return -EFAULT; - } - } -#undef WINREG_ADDR - return 0; -} - -static void user_mna_trap_fault(struct pt_regs *regs, unsigned int insn) +asmlinkage void user_unaligned_trap(struct pt_regs *regs, unsigned int insn) { send_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)safe_compute_effective_address(regs, insn), 0, current); } - -asmlinkage void user_unaligned_trap(struct pt_regs *regs, unsigned int insn) -{ - enum direction dir; - - if(!(current->thread.flags & SPARC_FLAG_UNALIGNED) || - (((insn >> 30) & 3) != 3)) - goto kill_user; - dir = decode_direction(insn); - if(!ok_for_user(regs, insn, dir)) { - goto kill_user; - } else { - int err, size = decode_access_size(insn); - unsigned long addr; - - if(floating_point_load_or_store_p(insn)) { - printk("User FPU load/store unaligned unsupported.\n"); - goto kill_user; - } - - addr = compute_effective_address(regs, insn); - perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr); - switch(dir) { - case load: - err = do_int_load(fetch_reg_addr(((insn>>25)&0x1f), - regs), - size, (unsigned long *) addr, - decode_signedness(insn)); - break; - - case store: - err = do_int_store(((insn>>25)&0x1f), size, - (unsigned long *) addr, regs); - break; - - case both: - /* - * This was supported in 2.4. However, we question - * the value of SWAP instruction across word boundaries. - */ - printk("Unaligned SWAP unsupported.\n"); - err = -EFAULT; - break; - - default: - unaligned_panic("Impossible user unaligned trap."); - goto out; - } - if (err) - goto kill_user; - else - advance(regs); - goto out; - } - -kill_user: - user_mna_trap_fault(regs, insn); -out: - ; -} -- GitLab From 7d200b283aa049fcda0d43dd6e03e9e783d2799c Mon Sep 17 00:00:00 2001 From: Jonathan Albrieux Date: Wed, 13 Jan 2021 16:18:07 +0100 Subject: [PATCH 010/839] iio:adc:qcom-spmi-vadc: add default scale to LR_MUX2_BAT_ID channel Checking at both msm8909-pm8916.dtsi and msm8916.dtsi from downstream it is indicated that "batt_id" channel has to be scaled with the default function: chan@31 { label = "batt_id"; reg = <0x31>; qcom,decimation = <0>; qcom,pre-div-channel-scaling = <0>; qcom,calibration-type = "ratiometric"; qcom,scale-function = <0>; qcom,hw-settle-time = <0xb>; qcom,fast-avg-setup = <0>; }; Change LR_MUX2_BAT_ID scaling accordingly. Signed-off-by: Jonathan Albrieux Acked-by: Bjorn Andersson Fixes: 7c271eea7b8a ("iio: adc: spmi-vadc: Changes to support different scaling") Link: https://lore.kernel.org/r/20210113151808.4628-2-jonathan.albrieux@gmail.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/qcom-spmi-vadc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/qcom-spmi-vadc.c b/drivers/iio/adc/qcom-spmi-vadc.c index b0388f8a69f42..7e7d408452eca 100644 --- a/drivers/iio/adc/qcom-spmi-vadc.c +++ b/drivers/iio/adc/qcom-spmi-vadc.c @@ -598,7 +598,7 @@ static const struct vadc_channels vadc_chans[] = { VADC_CHAN_NO_SCALE(P_MUX16_1_3, 1) VADC_CHAN_NO_SCALE(LR_MUX1_BAT_THERM, 0) - VADC_CHAN_NO_SCALE(LR_MUX2_BAT_ID, 0) + VADC_CHAN_VOLT(LR_MUX2_BAT_ID, 0, SCALE_DEFAULT) VADC_CHAN_NO_SCALE(LR_MUX3_XO_THERM, 0) VADC_CHAN_NO_SCALE(LR_MUX4_AMUX_THM1, 0) VADC_CHAN_NO_SCALE(LR_MUX5_AMUX_THM2, 0) -- GitLab From d68c592e02f6f49a88e705f13dfc1883432cf300 Mon Sep 17 00:00:00 2001 From: Ye Xiang Date: Sat, 30 Jan 2021 18:25:30 +0800 Subject: [PATCH 011/839] iio: hid-sensor-prox: Fix scale not correct issue Currently, the proxy sensor scale is zero because it just return the exponent directly. To fix this issue, this patch use hid_sensor_format_scale to process the scale first then return the output. Fixes: 39a3a0138f61 ("iio: hid-sensors: Added Proximity Sensor Driver") Signed-off-by: Ye Xiang Link: https://lore.kernel.org/r/20210130102530.31064-1-xiang.ye@intel.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/light/hid-sensor-prox.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/iio/light/hid-sensor-prox.c b/drivers/iio/light/hid-sensor-prox.c index 330cf359e0b81..e9e00ce0c6d4d 100644 --- a/drivers/iio/light/hid-sensor-prox.c +++ b/drivers/iio/light/hid-sensor-prox.c @@ -23,6 +23,9 @@ struct prox_state { struct hid_sensor_common common_attributes; struct hid_sensor_hub_attribute_info prox_attr; u32 human_presence; + int scale_pre_decml; + int scale_post_decml; + int scale_precision; }; /* Channel definitions */ @@ -93,8 +96,9 @@ static int prox_read_raw(struct iio_dev *indio_dev, ret_type = IIO_VAL_INT; break; case IIO_CHAN_INFO_SCALE: - *val = prox_state->prox_attr.units; - ret_type = IIO_VAL_INT; + *val = prox_state->scale_pre_decml; + *val2 = prox_state->scale_post_decml; + ret_type = prox_state->scale_precision; break; case IIO_CHAN_INFO_OFFSET: *val = hid_sensor_convert_exponent( @@ -234,6 +238,11 @@ static int prox_parse_report(struct platform_device *pdev, HID_USAGE_SENSOR_HUMAN_PRESENCE, &st->common_attributes.sensitivity); + st->scale_precision = hid_sensor_format_scale( + hsdev->usage, + &st->prox_attr, + &st->scale_pre_decml, &st->scale_post_decml); + return ret; } -- GitLab From f890987fac8153227258121740a9609668c427f3 Mon Sep 17 00:00:00 2001 From: Wilfried Wessner Date: Mon, 8 Feb 2021 15:27:05 +0100 Subject: [PATCH 012/839] iio: adc: ad7949: fix wrong ADC result due to incorrect bit mask Fixes a wrong bit mask used for the ADC's result, which was caused by an improper usage of the GENMASK() macro. The bits higher than ADC's resolution are undefined and if not masked out correctly, a wrong result can be given. The GENMASK() macro indexing is zero based, so the mask has to go from [resolution - 1 , 0]. Fixes: 7f40e0614317f ("iio:adc:ad7949: Add AD7949 ADC driver family") Signed-off-by: Wilfried Wessner Reviewed-by: Andy Shevchenko Reviewed-by: Charles-Antoine Couret Cc: Link: https://lore.kernel.org/r/20210208142705.GA51260@ubuntu Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7949.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/ad7949.c b/drivers/iio/adc/ad7949.c index 5d597e5050f68..1b4b3203e4285 100644 --- a/drivers/iio/adc/ad7949.c +++ b/drivers/iio/adc/ad7949.c @@ -91,7 +91,7 @@ static int ad7949_spi_read_channel(struct ad7949_adc_chip *ad7949_adc, int *val, int ret; int i; int bits_per_word = ad7949_adc->resolution; - int mask = GENMASK(ad7949_adc->resolution, 0); + int mask = GENMASK(ad7949_adc->resolution - 1, 0); struct spi_message msg; struct spi_transfer tx[] = { { -- GitLab From be24c65e9fa2486bb8ec98d9f592bdcf04bedd88 Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Wed, 10 Feb 2021 12:50:44 +0200 Subject: [PATCH 013/839] iio: adc: adi-axi-adc: add proper Kconfig dependencies The ADI AXI ADC driver requires IO mem access and OF to work. This change adds these dependencies to the Kconfig symbol of the driver. This was also found via the lkp bot, as the devm_platform_ioremap_resource() symbol was not found at link-time on the S390 architecture. Fixes: ef04070692a21 ("iio: adc: adi-axi-adc: add support for AXI ADC IP core") Reported-by: kernel test robot Signed-off-by: Alexandru Ardelean Cc: Link: https://lore.kernel.org/r/20210210105044.48914-1-alexandru.ardelean@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig index 15587a1bc80d0..8d0be5b3029af 100644 --- a/drivers/iio/adc/Kconfig +++ b/drivers/iio/adc/Kconfig @@ -266,6 +266,8 @@ config ADI_AXI_ADC select IIO_BUFFER select IIO_BUFFER_HW_CONSUMER select IIO_BUFFER_DMAENGINE + depends on HAS_IOMEM + depends on OF help Say yes here to build support for Analog Devices Generic AXI ADC IP core. The IP core is used for interfacing with -- GitLab From 3b9ea7206d7e1fdd7419cbd10badd3b2c80d04b4 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 14 Feb 2021 19:49:11 +0100 Subject: [PATCH 014/839] ath9k: fix transmitting to stations in dynamic SMPS mode When transmitting to a receiver in dynamic SMPS mode, all transmissions that use multiple spatial streams need to be sent using CTS-to-self or RTS/CTS to give the receiver's extra chains some time to wake up. This fixes the tx rate getting stuck at <= MCS7 for some clients, especially Intel ones, which make aggressive use of SMPS. Cc: stable@vger.kernel.org Reported-by: Martin Kennedy Signed-off-by: Felix Fietkau Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20210214184911.96702-1-nbd@nbd.name --- drivers/net/wireless/ath/ath9k/ath9k.h | 3 ++- drivers/net/wireless/ath/ath9k/xmit.c | 6 ++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath9k/ath9k.h b/drivers/net/wireless/ath/ath9k/ath9k.h index 13b4f5f50f8aa..ef6f5ea06c1f5 100644 --- a/drivers/net/wireless/ath/ath9k/ath9k.h +++ b/drivers/net/wireless/ath/ath9k/ath9k.h @@ -177,7 +177,8 @@ struct ath_frame_info { s8 txq; u8 keyix; u8 rtscts_rate; - u8 retries : 7; + u8 retries : 6; + u8 dyn_smps : 1; u8 baw_tracked : 1; u8 tx_power; enum ath9k_key_type keytype:2; diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c index e60d4737fc6e4..5691bd6eb82c2 100644 --- a/drivers/net/wireless/ath/ath9k/xmit.c +++ b/drivers/net/wireless/ath/ath9k/xmit.c @@ -1271,6 +1271,11 @@ static void ath_buf_set_rate(struct ath_softc *sc, struct ath_buf *bf, is_40, is_sgi, is_sp); if (rix < 8 && (tx_info->flags & IEEE80211_TX_CTL_STBC)) info->rates[i].RateFlags |= ATH9K_RATESERIES_STBC; + if (rix >= 8 && fi->dyn_smps) { + info->rates[i].RateFlags |= + ATH9K_RATESERIES_RTS_CTS; + info->flags |= ATH9K_TXDESC_CTSENA; + } info->txpower[i] = ath_get_rate_txpower(sc, bf, rix, is_40, false); @@ -2114,6 +2119,7 @@ static void setup_frame_info(struct ieee80211_hw *hw, fi->keyix = an->ps_key; else fi->keyix = ATH9K_TXKEYIX_INVALID; + fi->dyn_smps = sta && sta->smps_mode == IEEE80211_SMPS_DYNAMIC; fi->keytype = keytype; fi->framelen = framelen; fi->tx_power = txpower; -- GitLab From ae064fc0e32a4d28389086d9f4b260a0c157cfee Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 16 Feb 2021 14:51:18 +0100 Subject: [PATCH 015/839] mt76: fix tx skb error handling in mt76_dma_tx_queue_skb When running out of room in the tx queue after calling drv->tx_prepare_skb, the buffer list will already have been modified on MT7615 and newer drivers. This can leak a DMA mapping and will show up as swiotlb allocation failures on x86. Fix this by moving the queue length check further up. This is less accurate, since it can overestimate the needed room in the queue on MT7615 and newer, but the difference is small enough to not matter in practice. Signed-off-by: Felix Fietkau Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20210216135119.23809-1-nbd@nbd.name --- drivers/net/wireless/mediatek/mt76/dma.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c index 19098b852d0a8..abdc8d364361c 100644 --- a/drivers/net/wireless/mediatek/mt76/dma.c +++ b/drivers/net/wireless/mediatek/mt76/dma.c @@ -345,7 +345,6 @@ mt76_dma_tx_queue_skb(struct mt76_dev *dev, struct mt76_queue *q, }; struct ieee80211_hw *hw; int len, n = 0, ret = -ENOMEM; - struct mt76_queue_entry e; struct mt76_txwi_cache *t; struct sk_buff *iter; dma_addr_t addr; @@ -387,6 +386,11 @@ mt76_dma_tx_queue_skb(struct mt76_dev *dev, struct mt76_queue *q, } tx_info.nbuf = n; + if (q->queued + (tx_info.nbuf + 1) / 2 >= q->ndesc - 1) { + ret = -ENOMEM; + goto unmap; + } + dma_sync_single_for_cpu(dev->dev, t->dma_addr, dev->drv->txwi_size, DMA_TO_DEVICE); ret = dev->drv->tx_prepare_skb(dev, txwi, q->qid, wcid, sta, &tx_info); @@ -395,11 +399,6 @@ mt76_dma_tx_queue_skb(struct mt76_dev *dev, struct mt76_queue *q, if (ret < 0) goto unmap; - if (q->queued + (tx_info.nbuf + 1) / 2 >= q->ndesc - 1) { - ret = -ENOMEM; - goto unmap; - } - return mt76_dma_add_buf(dev, q, tx_info.buf, tx_info.nbuf, tx_info.info, tx_info.skb, t); @@ -419,9 +418,7 @@ free: } #endif - e.skb = tx_info.skb; - e.txwi = t; - dev->drv->tx_complete_skb(dev, &e); + dev_kfree_skb(tx_info.skb); mt76_put_txwi(dev, t); return ret; } -- GitLab From 94f0e6256c2ab6803c935634aa1f653174c94879 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 16 Feb 2021 14:51:19 +0100 Subject: [PATCH 016/839] mt76: mt7915: only modify tx buffer list after allocating tx token id Modifying the tx buffer list too early can leak DMA mappings Signed-off-by: Felix Fietkau Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20210216135119.23809-2-nbd@nbd.name --- drivers/net/wireless/mediatek/mt76/mt7915/mac.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c index eb889f8d6feaa..e5a258958ac91 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c @@ -967,11 +967,6 @@ int mt7915_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr, } txp->nbuf = nbuf; - /* pass partial skb header to fw */ - tx_info->buf[1].len = MT_CT_PARSE_LEN; - tx_info->buf[1].skip_unmap = true; - tx_info->nbuf = MT_CT_DMA_BUF_NUM; - txp->flags = cpu_to_le16(MT_CT_INFO_APPLY_TXD | MT_CT_INFO_FROM_HOST); if (!key) @@ -1009,6 +1004,11 @@ int mt7915_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr, txp->rept_wds_wcid = cpu_to_le16(0x3ff); tx_info->skb = DMA_DUMMY_DATA; + /* pass partial skb header to fw */ + tx_info->buf[1].len = MT_CT_PARSE_LEN; + tx_info->buf[1].skip_unmap = true; + tx_info->nbuf = MT_CT_DMA_BUF_NUM; + return 0; } -- GitLab From a71266e454b5df10d019b06f5ebacd579f76be28 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 16 Feb 2021 22:42:13 +0300 Subject: [PATCH 017/839] iio: adis16400: Fix an error code in adis16400_initial_setup() This is to silence a new Smatch warning: drivers/iio/imu/adis16400.c:492 adis16400_initial_setup() warn: sscanf doesn't return error codes If the condition "if (st->variant->flags & ADIS16400_HAS_SLOW_MODE) {" is false then we return 1 instead of returning 0 and probe will fail. Fixes: 72a868b38bdd ("iio: imu: check sscanf return value") Signed-off-by: Dan Carpenter Cc: Link: https://lore.kernel.org/r/YCwgFb3JVG6qrlQ+@mwanda Signed-off-by: Jonathan Cameron --- drivers/iio/imu/adis16400.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/iio/imu/adis16400.c b/drivers/iio/imu/adis16400.c index 54af2ed664f6f..785a4ce606d89 100644 --- a/drivers/iio/imu/adis16400.c +++ b/drivers/iio/imu/adis16400.c @@ -462,8 +462,7 @@ static int adis16400_initial_setup(struct iio_dev *indio_dev) if (ret) goto err_ret; - ret = sscanf(indio_dev->name, "adis%u\n", &device_id); - if (ret != 1) { + if (sscanf(indio_dev->name, "adis%u\n", &device_id) != 1) { ret = -EINVAL; goto err_ret; } -- GitLab From 121875b28e3bd7519a675bf8ea2c2e793452c2bd Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 24 Jan 2021 19:50:34 +0000 Subject: [PATCH 018/839] iio:adc:stm32-adc: Add HAS_IOMEM dependency Seems that there are config combinations in which this driver gets enabled and hence selects the MFD, but with out HAS_IOMEM getting pulled in via some other route. MFD is entirely contained in an if HAS_IOMEM block, leading to the build issue in this bugzilla. https://bugzilla.kernel.org/show_bug.cgi?id=209889 Cc: Signed-off-by: Jonathan Cameron Link: https://lore.kernel.org/r/20210124195034.22576-1-jic23@kernel.org --- drivers/iio/adc/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig index 8d0be5b3029af..be1f73166a32b 100644 --- a/drivers/iio/adc/Kconfig +++ b/drivers/iio/adc/Kconfig @@ -925,6 +925,7 @@ config STM32_ADC_CORE depends on ARCH_STM32 || COMPILE_TEST depends on OF depends on REGULATOR + depends on HAS_IOMEM select IIO_BUFFER select MFD_STM32_TIMERS select IIO_STM32_TIMER_TRIGGER -- GitLab From 4f5434086d9223f20b3128a7dc78b35271e76655 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 24 Dec 2020 02:17:00 +0100 Subject: [PATCH 019/839] iio: adc: ab8500-gpadc: Fix off by 10 to 3 Fix an off by three orders of magnitude error in the AB8500 GPADC driver. Luckily it showed up quite quickly when trying to make use of it. The processed reads were returning microvolts, microamperes and microcelsius instead of millivolts, milliamperes and millicelsius as advertised. Cc: stable@vger.kernel.org Fixes: 07063bbfa98e ("iio: adc: New driver for the AB8500 GPADC") Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20201224011700.1059659-1-linus.walleij@linaro.org Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ab8500-gpadc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/ab8500-gpadc.c b/drivers/iio/adc/ab8500-gpadc.c index 1bb987a4acbab..8d81505282dd3 100644 --- a/drivers/iio/adc/ab8500-gpadc.c +++ b/drivers/iio/adc/ab8500-gpadc.c @@ -918,7 +918,7 @@ static int ab8500_gpadc_read_raw(struct iio_dev *indio_dev, return processed; /* Return millivolt or milliamps or millicentigrades */ - *val = processed * 1000; + *val = processed; return IIO_VAL_INT; } -- GitLab From 4538c5ed0f7e892f1b643472e48146757d1e60c5 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 19 Feb 2021 13:35:07 +0100 Subject: [PATCH 020/839] iwlwifi: avoid crash on unsupported debug collection If the opmode doesn't support debug collection (DVM) then don't crash, but just skip the callback. Fixes: d01293154c0a ("iwlwifi: dbg: add op_mode callback for collecting debug data.") Reported-by: Andy Lavr Signed-off-by: Johannes Berg Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20210219133506.ecabe285bc7d.I73d230d555c595fa2d9bf284f80078729fe18aa4@changeid --- drivers/net/wireless/intel/iwlwifi/iwl-op-mode.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-op-mode.h b/drivers/net/wireless/intel/iwlwifi/iwl-op-mode.h index 868da7e79a455..e6d2e09943170 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-op-mode.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-op-mode.h @@ -205,6 +205,8 @@ static inline void iwl_op_mode_time_point(struct iwl_op_mode *op_mode, enum iwl_fw_ini_time_point tp_id, union iwl_dbg_tlv_tp_data *tp_data) { + if (!op_mode || !op_mode->ops || !op_mode->ops->time_point) + return; op_mode->ops->time_point(op_mode, tp_id, tp_data); } -- GitLab From b29dd96b905f3dd543f4ca729447286adf934dd6 Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Tue, 16 Feb 2021 12:53:07 +0000 Subject: [PATCH 021/839] bpf, x86: Fix BPF_FETCH atomic and/or/xor with r0 as src This code generates a CMPXCHG loop in order to implement atomic_fetch bitwise operations. Because CMPXCHG is hard-coded to use rax (which holds the BPF r0 value), it saves the _real_ r0 value into the internal "ax" temporary register and restores it once the loop is complete. In the middle of the loop, the actual bitwise operation is performed using src_reg. The bug occurs when src_reg is r0: as described above, r0 has been clobbered and the real r0 value is in the ax register. Therefore, perform this operation on the ax register instead, when src_reg is r0. Fixes: 981f94c3e921 ("bpf: Add bitwise atomic instructions") Signed-off-by: Brendan Jackman Signed-off-by: Daniel Borkmann Acked-by: KP Singh Link: https://lore.kernel.org/bpf/20210216125307.1406237-1-jackmanb@google.com --- arch/x86/net/bpf_jit_comp.c | 10 +++++--- .../selftests/bpf/verifier/atomic_and.c | 23 +++++++++++++++++++ 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 79e7a0ec1da58..6926d0ca6c717 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1349,6 +1349,7 @@ st: if (is_imm8(insn->off)) insn->imm == (BPF_XOR | BPF_FETCH)) { u8 *branch_target; bool is64 = BPF_SIZE(insn->code) == BPF_DW; + u32 real_src_reg = src_reg; /* * Can't be implemented with a single x86 insn. @@ -1357,6 +1358,9 @@ st: if (is_imm8(insn->off)) /* Will need RAX as a CMPXCHG operand so save R0 */ emit_mov_reg(&prog, true, BPF_REG_AX, BPF_REG_0); + if (src_reg == BPF_REG_0) + real_src_reg = BPF_REG_AX; + branch_target = prog; /* Load old value */ emit_ldx(&prog, BPF_SIZE(insn->code), @@ -1366,9 +1370,9 @@ st: if (is_imm8(insn->off)) * put the result in the AUX_REG. */ emit_mov_reg(&prog, is64, AUX_REG, BPF_REG_0); - maybe_emit_mod(&prog, AUX_REG, src_reg, is64); + maybe_emit_mod(&prog, AUX_REG, real_src_reg, is64); EMIT2(simple_alu_opcodes[BPF_OP(insn->imm)], - add_2reg(0xC0, AUX_REG, src_reg)); + add_2reg(0xC0, AUX_REG, real_src_reg)); /* Attempt to swap in new value */ err = emit_atomic(&prog, BPF_CMPXCHG, dst_reg, AUX_REG, insn->off, @@ -1381,7 +1385,7 @@ st: if (is_imm8(insn->off)) */ EMIT2(X86_JNE, -(prog - branch_target) - 2); /* Return the pre-modification value */ - emit_mov_reg(&prog, is64, src_reg, BPF_REG_0); + emit_mov_reg(&prog, is64, real_src_reg, BPF_REG_0); /* Restore R0 after clobbering RAX */ emit_mov_reg(&prog, true, BPF_REG_0, BPF_REG_AX); break; diff --git a/tools/testing/selftests/bpf/verifier/atomic_and.c b/tools/testing/selftests/bpf/verifier/atomic_and.c index 1bdc8e6684f7c..fe4bb70eb9c57 100644 --- a/tools/testing/selftests/bpf/verifier/atomic_and.c +++ b/tools/testing/selftests/bpf/verifier/atomic_and.c @@ -75,3 +75,26 @@ }, .result = ACCEPT, }, +{ + "BPF_ATOMIC_AND with fetch - r0 as source reg", + .insns = { + /* val = 0x110; */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0x110), + /* old = atomic_fetch_and(&val, 0x011); */ + BPF_MOV64_IMM(BPF_REG_0, 0x011), + BPF_ATOMIC_OP(BPF_DW, BPF_AND | BPF_FETCH, BPF_REG_10, BPF_REG_0, -8), + /* if (old != 0x110) exit(3); */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0x110, 2), + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_EXIT_INSN(), + /* if (val != 0x010) exit(2); */ + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x010, 2), + BPF_MOV64_IMM(BPF_REG_1, 2), + BPF_EXIT_INSN(), + /* exit(0); */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, -- GitLab From 33ccec5fd740d0d5b78b77846f76eb5b4feb4327 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 17 Feb 2021 10:45:25 +0300 Subject: [PATCH 022/839] bpf: Fix a warning message in mark_ptr_not_null_reg() The WARN_ON() argument is a condition, not an error message. So this code will print a stack trace but will not print the warning message. Fix that and also change it to only WARN_ONCE(). Fixes: 4ddb74165ae5 ("bpf: Extract nullable reg type conversion into a helper function") Signed-off-by: Dan Carpenter Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/YCzJlV3hnF%2Ft1Pk4@mwanda --- kernel/bpf/verifier.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 1dda9d81f12c5..3d34ba492d461 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1120,7 +1120,7 @@ static void mark_ptr_not_null_reg(struct bpf_reg_state *reg) reg->type = PTR_TO_RDWR_BUF; break; default: - WARN_ON("unknown nullable register type"); + WARN_ONCE(1, "unknown nullable register type"); } } -- GitLab From 53f523f3052ac16bbc7718032aa6b848f971d28c Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 17 Feb 2021 16:16:47 -0800 Subject: [PATCH 023/839] bpf: Clear percpu pointers in bpf_prog_clone_free() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Similar to bpf_prog_realloc(), bpf_prog_clone_create() also copies the percpu pointers, but the clone still shares them with the original prog, so we have to clear these two percpu pointers in bpf_prog_clone_free(). Otherwise we would get a double free: BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] SMP PTI CPU: 13 PID: 8140 Comm: kworker/13:247 Kdump: loaded Tainted: G                W    OE   5.11.0-rc4.bm.1-amd64+ #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014 test_bpf: #1 TXA Workqueue: events bpf_prog_free_deferred RIP: 0010:percpu_ref_get_many.constprop.97+0x42/0xf0 Code: [...] RSP: 0018:ffffa6bce1f9bda0 EFLAGS: 00010002 RAX: 0000000000000001 RBX: 0000000000000000 RCX: 00000000021dfc7b RDX: ffffffffae2eeb90 RSI: 867f92637e338da5 RDI: 0000000000000046 RBP: ffffa6bce1f9bda8 R08: 0000000000000000 R09: 0000000000000001 R10: 0000000000000046 R11: 0000000000000000 R12: 0000000000000280 R13: 0000000000000000 R14: 0000000000000000 R15: ffff9b5f3ffdedc0 FS:    0000000000000000(0000) GS:ffff9b5f2fb40000(0000) knlGS:0000000000000000 CS:    0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 000000027c36c002 CR4: 00000000003706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace:     refill_obj_stock+0x5e/0xd0     free_percpu+0xee/0x550     __bpf_prog_free+0x4d/0x60     process_one_work+0x26a/0x590     worker_thread+0x3c/0x390     ? process_one_work+0x590/0x590     kthread+0x130/0x150     ? kthread_park+0x80/0x80     ret_from_fork+0x1f/0x30 This bug is 100% reproducible with test_kmod.sh. Fixes: 700d4796ef59 ("bpf: Optimize program stats") Fixes: ca06f55b9002 ("bpf: Add per-program recursion prevention mechanism") Reported-by: Jiang Wang Signed-off-by: Cong Wang Signed-off-by: Daniel Borkmann Cc: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210218001647.71631-1-xiyou.wangcong@gmail.com --- kernel/bpf/core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 0ae015ad1e056..aa1e64196d8d2 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1118,6 +1118,8 @@ static void bpf_prog_clone_free(struct bpf_prog *fp) * clone is guaranteed to not be locked. */ fp->aux = NULL; + fp->stats = NULL; + fp->active = NULL; __bpf_prog_free(fp); } -- GitLab From 9aa940047ae81fa1806506556cde1efd0c39aef9 Mon Sep 17 00:00:00 2001 From: Sharvari Harisangam Date: Mon, 22 Feb 2021 18:19:44 +0530 Subject: [PATCH 024/839] MAINTAINERS: update for mwifiex driver maintainers Add Sharvari Harisangam to Maintainer list. Replace Ganapathi Bhat's email id in Maintainer list. Signed-off-by: Rakesh Parmar Signed-off-by: Sharvari Harisangam Acked-by: Ganapathi Bhat Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/1613998184-20047-1-git-send-email-sharvari.harisangam@nxp.com --- MAINTAINERS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 99335fd22c0a4..98fd98ebe6b2a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10685,7 +10685,8 @@ F: drivers/net/ethernet/marvell/mvpp2/ MARVELL MWIFIEX WIRELESS DRIVER M: Amitkumar Karwar -M: Ganapathi Bhat +M: Ganapathi Bhat +M: Sharvari Harisangam M: Xinming Hu L: linux-wireless@vger.kernel.org S: Maintained -- GitLab From ebb9d34e073dc965e9e1f0632a95dcb83736f166 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sun, 21 Feb 2021 19:27:54 +0100 Subject: [PATCH 025/839] ath11k: qmi: use %pad to format dma_addr_t MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If CONFIG_ARCH_DMA_ADDR_T_64BIT=n: drivers/net/wireless/ath/ath11k/qmi.c: In function ‘ath11k_qmi_respond_fw_mem_request’: drivers/net/wireless/ath/ath11k/qmi.c:1690:8: warning: format ‘%llx’ expects argument of type ‘long long unsigned int’, but argument 5 has type ‘dma_addr_t’ {aka ‘unsigned int’} [-Wformat=] 1690 | "qmi req mem_seg[%d] 0x%llx %u %u\n", i, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1691 | ab->qmi.target_mem[i].paddr, | ~~~~~~~~~~~~~~~~~~~~~~~~~~~ | | | dma_addr_t {aka unsigned int} drivers/net/wireless/ath/ath11k/debug.h:64:30: note: in definition of macro ‘ath11k_dbg’ 64 | __ath11k_dbg(ar, dbg_mask, fmt, ##__VA_ARGS__); \ | ^~~ drivers/net/wireless/ath/ath11k/qmi.c:1690:34: note: format string is defined here 1690 | "qmi req mem_seg[%d] 0x%llx %u %u\n", i, | ~~~^ | | | long long unsigned int | %x Fixes: d5395a5486596308 ("ath11k: qmi: add debug message for allocated memory segment addresses and sizes") Signed-off-by: Geert Uytterhoeven Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20210221182754.2071863-1-geert@linux-m68k.org --- drivers/net/wireless/ath/ath11k/qmi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/qmi.c b/drivers/net/wireless/ath/ath11k/qmi.c index 1aca841cd147c..7968fe4eda22a 100644 --- a/drivers/net/wireless/ath/ath11k/qmi.c +++ b/drivers/net/wireless/ath/ath11k/qmi.c @@ -1687,8 +1687,8 @@ static int ath11k_qmi_respond_fw_mem_request(struct ath11k_base *ab) req->mem_seg[i].size = ab->qmi.target_mem[i].size; req->mem_seg[i].type = ab->qmi.target_mem[i].type; ath11k_dbg(ab, ATH11K_DBG_QMI, - "qmi req mem_seg[%d] 0x%llx %u %u\n", i, - ab->qmi.target_mem[i].paddr, + "qmi req mem_seg[%d] %pad %u %u\n", i, + &ab->qmi.target_mem[i].paddr, ab->qmi.target_mem[i].size, ab->qmi.target_mem[i].type); } -- GitLab From 77d7e87128d4dfb400df4208b2812160e999c165 Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Mon, 22 Feb 2021 17:14:09 +0200 Subject: [PATCH 026/839] ath11k: fix AP mode for QCA6390 Commit c134d1f8c436 ("ath11k: Handle errors if peer creation fails") completely broke AP mode on QCA6390: kernel: [ 151.230734] ath11k_pci 0000:06:00.0: failed to create peer after vdev start delay: -22 wpa_supplicant[2307]: Failed to set beacon parameters wpa_supplicant[2307]: Interface initialization failed wpa_supplicant[2307]: wlan0: interface state UNINITIALIZED->DISABLED wpa_supplicant[2307]: wlan0: AP-DISABLED wpa_supplicant[2307]: wlan0: Unable to setup interface. wpa_supplicant[2307]: Failed to initialize AP interface This was because commit c134d1f8c436 ("ath11k: Handle errors if peer creation fails") added error handling for ath11k_peer_create(), which had been failing all along but was unnoticed due to the missing error handling. The actual bug was introduced already in commit aa44b2f3ecd4 ("ath11k: start vdev if a bss peer is already created"). ath11k_peer_create() was failing because for AP mode the peer is created already earlier op_add_interface() and we should skip creation here, but the check for modes was wrong. Fixing that makes AP mode work again. This shouldn't affect IPQ8074 nor QCN9074 as they have hw_params.vdev_start_delay disabled. Tested-on: QCA6390 hw2.0 PCI WLAN.HST.1.0.1-01740-QCAHSTSWPLZ_V2_TO_X86-1 Fixes: c134d1f8c436 ("ath11k: Handle errors if peer creation fails") Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/1614006849-25764-1-git-send-email-kvalo@codeaurora.org --- drivers/net/wireless/ath/ath11k/mac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index b391169576e27..faa2e678e63ef 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -5450,8 +5450,8 @@ ath11k_mac_op_assign_vif_chanctx(struct ieee80211_hw *hw, } if (ab->hw_params.vdev_start_delay && - (arvif->vdev_type == WMI_VDEV_TYPE_AP || - arvif->vdev_type == WMI_VDEV_TYPE_MONITOR)) { + arvif->vdev_type != WMI_VDEV_TYPE_AP && + arvif->vdev_type != WMI_VDEV_TYPE_MONITOR) { param.vdev_id = arvif->vdev_id; param.peer_type = WMI_PEER_TYPE_DEFAULT; param.peer_addr = ar->mac_addr; -- GitLab From f4eda8b6e4a5c7897c6bb992ed63a27061b371ef Mon Sep 17 00:00:00 2001 From: Dmitrii Banshchikov Date: Tue, 23 Feb 2021 13:04:16 +0400 Subject: [PATCH 027/839] bpf: Drop imprecise log message Now it is possible for global function to have a pointer argument that points to something different than struct. Drop the irrelevant log message and keep the logic same. Fixes: e5069b9c23b3 ("bpf: Support pointers in global func args") Signed-off-by: Dmitrii Banshchikov Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20210223090416.333943-1-me@ubique.spb.ru --- kernel/bpf/btf.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 2efeb5f4b3433..b1a76fe046cba 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -4321,8 +4321,6 @@ btf_get_prog_ctx_type(struct bpf_verifier_log *log, struct btf *btf, * is not supported yet. * BPF_PROG_TYPE_RAW_TRACEPOINT is fine. */ - if (log->level & BPF_LOG_LEVEL) - bpf_log(log, "arg#%d type is not a struct\n", arg); return NULL; } tname = btf_name_by_offset(btf, t->name_off); -- GitLab From c41d81bfbb4579c3e583457e383dd63d026bf947 Mon Sep 17 00:00:00 2001 From: Dmitrii Banshchikov Date: Tue, 23 Feb 2021 12:22:11 +0400 Subject: [PATCH 028/839] selftests/bpf: Fix a compiler warning in global func test Add an explicit 'const void *' cast to pass program ctx pointer type into a global function that expects pointer to structure. warning: incompatible pointer types passing 'struct __sk_buff *' to parameter of type 'const struct S *' [-Wincompatible-pointer-types] return foo(skb); ^~~ progs/test_global_func11.c:10:36: note: passing argument to parameter 's' here __noinline int foo(const struct S *s) ^ Fixes: 8b08807d039a ("selftests/bpf: Add unit tests for pointers in global functions") Signed-off-by: Dmitrii Banshchikov Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210223082211.302596-1-me@ubique.spb.ru --- tools/testing/selftests/bpf/progs/test_global_func11.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/progs/test_global_func11.c b/tools/testing/selftests/bpf/progs/test_global_func11.c index 28488047c8491..ef5277d982d92 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func11.c +++ b/tools/testing/selftests/bpf/progs/test_global_func11.c @@ -15,5 +15,5 @@ __noinline int foo(const struct S *s) SEC("cgroup_skb/ingress") int test_cls(struct __sk_buff *skb) { - return foo(skb); + return foo((const void *)skb); } -- GitLab From 41462c6e730ca0e63f5fed5a517052385d980c54 Mon Sep 17 00:00:00 2001 From: Kun-Chuan Hsieh Date: Wed, 24 Feb 2021 05:27:52 +0000 Subject: [PATCH 029/839] tools/resolve_btfids: Fix build error with older host toolchains Older libelf.h and glibc elf.h might not yet define the ELF compression types. Checking and defining SHF_COMPRESSED fix the build error when compiling with older toolchains. Also, the tool resolve_btfids is compiled with host toolchain. The host toolchain is more likely to be older than the cross compile toolchain. Fixes: 51f6463aacfb ("tools/resolve_btfids: Fix sections with wrong alignment") Signed-off-by: Kun-Chuan Hsieh Signed-off-by: Daniel Borkmann Acked-by: Jiri Olsa Link: https://lore.kernel.org/bpf/20210224052752.5284-1-jetswayss@gmail.com --- tools/bpf/resolve_btfids/main.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c index 7409d7860aa6c..80d966cfcaa14 100644 --- a/tools/bpf/resolve_btfids/main.c +++ b/tools/bpf/resolve_btfids/main.c @@ -260,6 +260,11 @@ static struct btf_id *add_symbol(struct rb_root *root, char *name, size_t size) return btf_id__add(root, id, false); } +/* Older libelf.h and glibc elf.h might not yet define the ELF compression types. */ +#ifndef SHF_COMPRESSED +#define SHF_COMPRESSED (1 << 11) /* Section with compressed data. */ +#endif + /* * The data of compressed section should be aligned to 4 * (for 32bit) or 8 (for 64 bit) bytes. The binutils ld -- GitLab From a7c9c25a99bbdaff51da26b874d2faaa8fdd72b5 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Tue, 23 Feb 2021 21:14:57 +0800 Subject: [PATCH 030/839] bpf: Remove blank line in bpf helper description comment Commit 34b2021cc616 ("bpf: Add BPF-helper for MTU checking") added an extra blank line in bpf helper description. This will make bpf_helpers_doc.py stop building bpf_helper_defs.h immediately after bpf_check_mtu(), which will affect future added functions. Fixes: 34b2021cc616 ("bpf: Add BPF-helper for MTU checking") Signed-off-by: Hangbin Liu Signed-off-by: Daniel Borkmann Acked-by: Jesper Dangaard Brouer Link: https://lore.kernel.org/bpf/20210223131457.1378978-1-liuhangbin@gmail.com --- include/uapi/linux/bpf.h | 1 - tools/include/uapi/linux/bpf.h | 1 - 2 files changed, 2 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 4c24daa43bacc..79c893310492b 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3850,7 +3850,6 @@ union bpf_attr { * * long bpf_check_mtu(void *ctx, u32 ifindex, u32 *mtu_len, s32 len_diff, u64 flags) * Description - * Check ctx packet size against exceeding MTU of net device (based * on *ifindex*). This helper will likely be used in combination * with helpers that adjust/change the packet size. diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 4c24daa43bacc..79c893310492b 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3850,7 +3850,6 @@ union bpf_attr { * * long bpf_check_mtu(void *ctx, u32 ifindex, u32 *mtu_len, s32 len_diff, u64 flags) * Description - * Check ctx packet size against exceeding MTU of net device (based * on *ifindex*). This helper will likely be used in combination * with helpers that adjust/change the packet size. -- GitLab From 557c223b643a35effec9654958d8edc62fd2603a Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 24 Feb 2021 16:14:03 +0800 Subject: [PATCH 031/839] selftests/bpf: No need to drop the packet when there is no geneve opt In bpf geneve tunnel test we set geneve option on tx side. On rx side we only call bpf_skb_get_tunnel_opt(). Since commit 9c2e14b48119 ("ip_tunnels: Set tunnel option flag when tunnel metadata is present") geneve_rx() will not add TUNNEL_GENEVE_OPT flag if there is no geneve option, which cause bpf_skb_get_tunnel_opt() return ENOENT and _geneve_get_tunnel() in test_tunnel_kern.c drop the packet. As it should be valid that bpf_skb_get_tunnel_opt() return error when there is not tunnel option, there is no need to drop the packet and break all geneve rx traffic. Just set opt_class to 0 in this test and keep returning TC_ACT_OK. Fixes: 933a741e3b82 ("selftests/bpf: bpf tunnel test.") Signed-off-by: Hangbin Liu Signed-off-by: Daniel Borkmann Acked-by: William Tu Link: https://lore.kernel.org/bpf/20210224081403.1425474-1-liuhangbin@gmail.com --- tools/testing/selftests/bpf/progs/test_tunnel_kern.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c index a621b58ab079d..9afe947cfae95 100644 --- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c @@ -446,10 +446,8 @@ int _geneve_get_tunnel(struct __sk_buff *skb) } ret = bpf_skb_get_tunnel_opt(skb, &gopt, sizeof(gopt)); - if (ret < 0) { - ERROR(ret); - return TC_ACT_SHOT; - } + if (ret < 0) + gopt.opt_class = 0; bpf_trace_printk(fmt, sizeof(fmt), key.tunnel_id, key.remote_ipv4, gopt.opt_class); -- GitLab From f00bdce0455233a0b76dae6364442dca717a574c Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 23 Feb 2021 14:19:04 +0800 Subject: [PATCH 032/839] vdpa: set the virtqueue num during register This patch delay the queue number setting to vDPA device registering. This allows us to probe the virtqueue numbers between device allocation and registering. Reviewed-by: Stefano Garzarella Signed-off-by: Jason Wang Link: https://lore.kernel.org/r/20210223061905.422659-3-jasowang@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/ifcvf/ifcvf_main.c | 5 ++--- drivers/vdpa/mlx5/net/mlx5_vnet.c | 4 ++-- drivers/vdpa/vdpa.c | 18 ++++++++++-------- drivers/vdpa/vdpa_sim/vdpa_sim.c | 2 +- drivers/vdpa/vdpa_sim/vdpa_sim_net.c | 2 +- include/linux/vdpa.h | 10 +++++----- 6 files changed, 21 insertions(+), 20 deletions(-) diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c index 7c8bbfcf6c3eb..d555a6a5d1baf 100644 --- a/drivers/vdpa/ifcvf/ifcvf_main.c +++ b/drivers/vdpa/ifcvf/ifcvf_main.c @@ -431,8 +431,7 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id) } adapter = vdpa_alloc_device(struct ifcvf_adapter, vdpa, - dev, &ifc_vdpa_ops, - IFCVF_MAX_QUEUE_PAIRS * 2, NULL); + dev, &ifc_vdpa_ops, NULL); if (adapter == NULL) { IFCVF_ERR(pdev, "Failed to allocate vDPA structure"); return -ENOMEM; @@ -456,7 +455,7 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id) for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) vf->vring[i].irq = -EINVAL; - ret = vdpa_register_device(&adapter->vdpa); + ret = vdpa_register_device(&adapter->vdpa, IFCVF_MAX_QUEUE_PAIRS * 2); if (ret) { IFCVF_ERR(pdev, "Failed to register ifcvf to vdpa bus"); goto err; diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 10e9b09932eb8..71397fdafa6a4 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -1982,7 +1982,7 @@ static int mlx5v_probe(struct auxiliary_device *adev, max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS); ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops, - 2 * mlx5_vdpa_max_qps(max_vqs), NULL); + NULL); if (IS_ERR(ndev)) return PTR_ERR(ndev); @@ -2009,7 +2009,7 @@ static int mlx5v_probe(struct auxiliary_device *adev, if (err) goto err_res; - err = vdpa_register_device(&mvdev->vdev); + err = vdpa_register_device(&mvdev->vdev, 2 * mlx5_vdpa_max_qps(max_vqs)); if (err) goto err_reg; diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index da67f07e24fd4..5cffce67cab02 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -69,7 +69,6 @@ static void vdpa_release_dev(struct device *d) * initialized but before registered. * @parent: the parent device * @config: the bus operations that is supported by this device - * @nvqs: number of virtqueues supported by this device * @size: size of the parent structure that contains private data * @name: name of the vdpa device; optional. * @@ -81,7 +80,7 @@ static void vdpa_release_dev(struct device *d) */ struct vdpa_device *__vdpa_alloc_device(struct device *parent, const struct vdpa_config_ops *config, - int nvqs, size_t size, const char *name) + size_t size, const char *name) { struct vdpa_device *vdev; int err = -EINVAL; @@ -107,7 +106,6 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent, vdev->index = err; vdev->config = config; vdev->features_valid = false; - vdev->nvqs = nvqs; if (name) err = dev_set_name(&vdev->dev, "%s", name); @@ -136,10 +134,12 @@ static int vdpa_name_match(struct device *dev, const void *data) return (strcmp(dev_name(&vdev->dev), data) == 0); } -static int __vdpa_register_device(struct vdpa_device *vdev) +static int __vdpa_register_device(struct vdpa_device *vdev, int nvqs) { struct device *dev; + vdev->nvqs = nvqs; + lockdep_assert_held(&vdpa_dev_mutex); dev = bus_find_device(&vdpa_bus, NULL, dev_name(&vdev->dev), vdpa_name_match); if (dev) { @@ -155,15 +155,16 @@ static int __vdpa_register_device(struct vdpa_device *vdev) * Caller must invoke this routine in the management device dev_add() * callback after setting up valid mgmtdev for this vdpa device. * @vdev: the vdpa device to be registered to vDPA bus + * @nvqs: number of virtqueues supported by this device * * Returns an error when fail to add device to vDPA bus */ -int _vdpa_register_device(struct vdpa_device *vdev) +int _vdpa_register_device(struct vdpa_device *vdev, int nvqs) { if (!vdev->mdev) return -EINVAL; - return __vdpa_register_device(vdev); + return __vdpa_register_device(vdev, nvqs); } EXPORT_SYMBOL_GPL(_vdpa_register_device); @@ -171,15 +172,16 @@ EXPORT_SYMBOL_GPL(_vdpa_register_device); * vdpa_register_device - register a vDPA device * Callers must have a succeed call of vdpa_alloc_device() before. * @vdev: the vdpa device to be registered to vDPA bus + * @nvqs: number of virtqueues supported by this device * * Returns an error when fail to add to vDPA bus */ -int vdpa_register_device(struct vdpa_device *vdev) +int vdpa_register_device(struct vdpa_device *vdev, int nvqs) { int err; mutex_lock(&vdpa_dev_mutex); - err = __vdpa_register_device(vdev); + err = __vdpa_register_device(vdev, nvqs); mutex_unlock(&vdpa_dev_mutex); return err; } diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index d5942842432d6..5b6b2f87d40c6 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -235,7 +235,7 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr) ops = &vdpasim_config_ops; vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops, - dev_attr->nvqs, dev_attr->name); + dev_attr->name); if (!vdpasim) goto err_alloc; diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c index d344c5b7c9149..702be74877d27 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c @@ -147,7 +147,7 @@ static int vdpasim_net_dev_add(struct vdpa_mgmt_dev *mdev, const char *name) if (IS_ERR(simdev)) return PTR_ERR(simdev); - ret = _vdpa_register_device(&simdev->vdpa); + ret = _vdpa_register_device(&simdev->vdpa, VDPASIM_NET_VQ_NUM); if (ret) goto reg_err; diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 4ab5494503a83..15fa085fab057 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -250,20 +250,20 @@ struct vdpa_config_ops { struct vdpa_device *__vdpa_alloc_device(struct device *parent, const struct vdpa_config_ops *config, - int nvqs, size_t size, const char *name); + size_t size, const char *name); -#define vdpa_alloc_device(dev_struct, member, parent, config, nvqs, name) \ +#define vdpa_alloc_device(dev_struct, member, parent, config, name) \ container_of(__vdpa_alloc_device( \ - parent, config, nvqs, \ + parent, config, \ sizeof(dev_struct) + \ BUILD_BUG_ON_ZERO(offsetof( \ dev_struct, member)), name), \ dev_struct, member) -int vdpa_register_device(struct vdpa_device *vdev); +int vdpa_register_device(struct vdpa_device *vdev, int nvqs); void vdpa_unregister_device(struct vdpa_device *vdev); -int _vdpa_register_device(struct vdpa_device *vdev); +int _vdpa_register_device(struct vdpa_device *vdev, int nvqs); void _vdpa_unregister_device(struct vdpa_device *vdev); /** -- GitLab From 62541e266703549550e77fd46138422dbdc881f1 Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Thu, 25 Feb 2021 09:04:21 +0200 Subject: [PATCH 033/839] iwlwifi: pcie: fix iwl_so_trans_cfg link error when CONFIG_IWLMVM is disabled Randy reported an error on his randconfig builds: ERROR: modpost: "iwl_so_trans_cfg" [drivers/net/wireless/intel/iwlwifi/iwlwifi.ko] undefined! The problem was that when CONFIG_IWLMVM was disabled we were still accessing iwl_so_trans_cfg. Fix it by moving IS_ENABLED() check before the access. Reported-by: Randy Dunlap Fixes: 930be4e76f26 ("iwlwifi: add support for SnJ with Jf devices") Signed-off-by: Kalle Valo Acked-by: Luca Coelho Acked-by: Randy Dunlap # build-tested Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/1614236661-20274-1-git-send-email-kvalo@codeaurora.org --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 314fec4a89ad1..ffaf973dae948 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -1106,6 +1106,8 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } } +#if IS_ENABLED(CONFIG_IWLMVM) + /* * Workaround for problematic SnJ device: sometimes when * certain RF modules are connected to SnJ, the device ID @@ -1116,7 +1118,6 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (CSR_HW_REV_TYPE(iwl_trans->hw_rev) == IWL_CFG_MAC_TYPE_SNJ) iwl_trans->trans_cfg = &iwl_so_trans_cfg; -#if IS_ENABLED(CONFIG_IWLMVM) /* * special-case 7265D, it has the same PCI IDs. * -- GitLab From fb5fabb192b22293b70bc3351696473c50746d90 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 25 Feb 2021 15:59:15 +0100 Subject: [PATCH 034/839] mt76: mt7921: remove incorrect error handling Clang points out a mistake in the error handling in mt7921_mcu_tx_rate_report(), which tries to dereference a pointer that cannot be initialized because of the error that is being handled: drivers/net/wireless/mediatek/mt76/mt7921/mcu.c:409:3: warning: variable 'stats' is uninitialized when used here [-Wuninitialized] stats->tx_rate = rate; ^~~~~ drivers/net/wireless/mediatek/mt76/mt7921/mcu.c:401:32: note: initialize the variable 'stats' to silence this warning struct mt7921_sta_stats *stats; ^ Just remove the obviously incorrect line. Fixes: 1c099ab44727 ("mt76: mt7921: add MCU support") Signed-off-by: Arnd Bergmann Reviewed-by: Nick Desaulniers Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20210225145953.404859-2-arnd@kernel.org --- drivers/net/wireless/mediatek/mt76/mt7921/mcu.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c index db125cd22b91a..b5cc72e7e81c3 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c @@ -405,10 +405,8 @@ mt7921_mcu_tx_rate_report(struct mt7921_dev *dev, struct sk_buff *skb, if (wlan_idx >= MT76_N_WCIDS) return; wcid = rcu_dereference(dev->mt76.wcid[wlan_idx]); - if (!wcid) { - stats->tx_rate = rate; + if (!wcid) return; - } msta = container_of(wcid, struct mt7921_sta, wcid); stats = &msta->stats; -- GitLab From d0bd52c591a1070c54dc428e926660eb4f981099 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sun, 7 Feb 2021 12:48:31 +0100 Subject: [PATCH 035/839] mt76: dma: do not report truncated frames to mac80211 Commit b102f0c522cf6 ("mt76: fix array overflow on receiving too many fragments for a packet") fixes a possible OOB access but it introduces a memory leak since the pending frame is not released to page_frag_cache if the frag array of skb_shared_info is full. Commit 93a1d4791c10 ("mt76: dma: fix a possible memory leak in mt76_add_fragment()") fixes the issue but does not free the truncated skb that is forwarded to mac80211 layer. Fix the leftover issue discarding even truncated skbs. Fixes: 93a1d4791c10 ("mt76: dma: fix a possible memory leak in mt76_add_fragment()") Signed-off-by: Lorenzo Bianconi Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/a03166fcc8214644333c68674a781836e0f57576.1612697217.git.lorenzo@kernel.org --- drivers/net/wireless/mediatek/mt76/dma.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c index abdc8d364361c..2f27c43ad76df 100644 --- a/drivers/net/wireless/mediatek/mt76/dma.c +++ b/drivers/net/wireless/mediatek/mt76/dma.c @@ -512,13 +512,13 @@ mt76_add_fragment(struct mt76_dev *dev, struct mt76_queue *q, void *data, { struct sk_buff *skb = q->rx_head; struct skb_shared_info *shinfo = skb_shinfo(skb); + int nr_frags = shinfo->nr_frags; - if (shinfo->nr_frags < ARRAY_SIZE(shinfo->frags)) { + if (nr_frags < ARRAY_SIZE(shinfo->frags)) { struct page *page = virt_to_head_page(data); int offset = data - page_address(page) + q->buf_offset; - skb_add_rx_frag(skb, shinfo->nr_frags, page, offset, len, - q->buf_size); + skb_add_rx_frag(skb, nr_frags, page, offset, len, q->buf_size); } else { skb_free_frag(data); } @@ -527,7 +527,10 @@ mt76_add_fragment(struct mt76_dev *dev, struct mt76_queue *q, void *data, return; q->rx_head = NULL; - dev->drv->rx_skb(dev, q - dev->q_rx, skb); + if (nr_frags < ARRAY_SIZE(shinfo->frags)) + dev->drv->rx_skb(dev, q - dev->q_rx, skb); + else + dev_kfree_skb(skb); } static int -- GitLab From c490492f15f656340b35cb9e36b9bfdea3539e19 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 26 Feb 2021 15:21:27 +0100 Subject: [PATCH 036/839] mt76: mt7915: fix unused 'mode' variable clang points out a possible corner case in the mt7915_tm_set_tx_cont() function if called with invalid arguments: drivers/net/wireless/mediatek/mt76/mt7915/testmode.c:593:2: warning: variable 'mode' is used uninitialized whenever switch default is taken [-Wsometimes-uninitialized] default: ^~~~~~~ drivers/net/wireless/mediatek/mt76/mt7915/testmode.c:597:13: note: uninitialized use occurs here rateval = mode << 6 | rate_idx; ^~~~ drivers/net/wireless/mediatek/mt76/mt7915/testmode.c:506:37: note: initialize the variable 'mode' to silence this warning u8 rate_idx = td->tx_rate_idx, mode; ^ Change it to return an error instead of continuing with invalid data here. Fixes: 3f0caa3cbf94 ("mt76: mt7915: add support for continuous tx in testmode") Signed-off-by: Arnd Bergmann Signed-off-by: Kalle Valo --- drivers/net/wireless/mediatek/mt76/mt7915/testmode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/testmode.c b/drivers/net/wireless/mediatek/mt76/mt7915/testmode.c index 7fb2170a95612..bd798df748ba5 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/testmode.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/testmode.c @@ -543,7 +543,7 @@ mt7915_tm_set_tx_cont(struct mt7915_phy *phy, bool en) tx_cont->bw = CMD_CBW_20MHZ; break; default: - break; + return -EINVAL; } if (!en) { @@ -591,7 +591,7 @@ mt7915_tm_set_tx_cont(struct mt7915_phy *phy, bool en) mode = MT_PHY_TYPE_HE_MU; break; default: - break; + return -EINVAL; } rateval = mode << 6 | rate_idx; -- GitLab From a3e860a83397bf761ec1128a3f0ba186445992c6 Mon Sep 17 00:00:00 2001 From: Joakim Zhang Date: Thu, 25 Feb 2021 17:01:10 +0800 Subject: [PATCH 037/839] net: stmmac: stop each tx channel independently If clear GMAC_CONFIG_TE bit, it would stop all tx channels, but users may only want to stop specific tx channel. Fixes: 48863ce5940f ("stmmac: add DMA support for GMAC 4.xx") Signed-off-by: Joakim Zhang Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c index 0b4ee2dbb691d..71e50751ef2dc 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c @@ -53,10 +53,6 @@ void dwmac4_dma_stop_tx(void __iomem *ioaddr, u32 chan) value &= ~DMA_CONTROL_ST; writel(value, ioaddr + DMA_CHAN_TX_CONTROL(chan)); - - value = readl(ioaddr + GMAC_CONFIG); - value &= ~GMAC_CONFIG_TE; - writel(value, ioaddr + GMAC_CONFIG); } void dwmac4_dma_start_rx(void __iomem *ioaddr, u32 chan) -- GitLab From c511819d138de38e1637eedb645c207e09680d0f Mon Sep 17 00:00:00 2001 From: Joakim Zhang Date: Thu, 25 Feb 2021 17:01:11 +0800 Subject: [PATCH 038/839] net: stmmac: fix watchdog timeout during suspend/resume stress test stmmac_xmit() call stmmac_tx_timer_arm() at the end to modify tx timer to do the transmission cleanup work. Imagine such a situation, stmmac enters suspend immediately after tx timer modified, it's expire callback stmmac_tx_clean() would not be invoked. This could affect BQL, since netdev_tx_sent_queue() has been called, but netdev_tx_completed_queue() have not been involved, as a result, dql_avail(&dev_queue->dql) finally always return a negative value. __dev_queue_xmit->__dev_xmit_skb->qdisc_run->__qdisc_run->qdisc_restart->dequeue_skb: if ((q->flags & TCQ_F_ONETXQUEUE) && netif_xmit_frozen_or_stopped(txq)) // __QUEUE_STATE_STACK_XOFF is set Net core will stop transmitting any more. Finillay, net watchdong would timeout. To fix this issue, we should call netdev_tx_reset_queue() in stmmac_resume(). Fixes: 54139cf3bb33 ("net: stmmac: adding multiple buffers for rx") Signed-off-by: Joakim Zhang Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 26b971cd4da5a..12ed337a239b8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -5257,6 +5257,8 @@ static void stmmac_reset_queues_param(struct stmmac_priv *priv) tx_q->cur_tx = 0; tx_q->dirty_tx = 0; tx_q->mss = 0; + + netdev_tx_reset_queue(netdev_get_tx_queue(priv->dev, queue)); } } -- GitLab From bfaf91ca848e758ed7be99b61fd936d03819fa56 Mon Sep 17 00:00:00 2001 From: Joakim Zhang Date: Thu, 25 Feb 2021 17:01:12 +0800 Subject: [PATCH 039/839] net: stmmac: fix dma physical address of descriptor when display ring Driver uses dma_alloc_coherent to allocate dma memory for descriptors, dma_alloc_coherent will return both the virtual address and physical address. AFAIK, virt_to_phys could not convert virtual address to physical address, for which memory is allocated by dma_alloc_coherent. dwmac4_display_ring() function is broken for various descriptor, it only support normal descriptor(struct dma_desc) now, this patch also extends to support all descriptor types. Signed-off-by: Joakim Zhang Signed-off-by: Jakub Kicinski --- .../ethernet/stmicro/stmmac/dwmac4_descs.c | 50 +++++++++++++--- .../net/ethernet/stmicro/stmmac/enh_desc.c | 9 ++- drivers/net/ethernet/stmicro/stmmac/hwif.h | 3 +- .../net/ethernet/stmicro/stmmac/norm_desc.c | 9 ++- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 57 ++++++++++++------- 5 files changed, 94 insertions(+), 34 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c index c6540b003b430..ee87811b0ca5e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c @@ -402,19 +402,53 @@ static void dwmac4_rd_set_tx_ic(struct dma_desc *p) p->des2 |= cpu_to_le32(TDES2_INTERRUPT_ON_COMPLETION); } -static void dwmac4_display_ring(void *head, unsigned int size, bool rx) +static void dwmac4_display_ring(void *head, unsigned int size, bool rx, + dma_addr_t dma_rx_phy, unsigned int desc_size) { - struct dma_desc *p = (struct dma_desc *)head; + dma_addr_t dma_addr; int i; pr_info("%s descriptor ring:\n", rx ? "RX" : "TX"); - for (i = 0; i < size; i++) { - pr_info("%03d [0x%x]: 0x%x 0x%x 0x%x 0x%x\n", - i, (unsigned int)virt_to_phys(p), - le32_to_cpu(p->des0), le32_to_cpu(p->des1), - le32_to_cpu(p->des2), le32_to_cpu(p->des3)); - p++; + if (desc_size == sizeof(struct dma_desc)) { + struct dma_desc *p = (struct dma_desc *)head; + + for (i = 0; i < size; i++) { + dma_addr = dma_rx_phy + i * sizeof(*p); + pr_info("%03d [%pad]: 0x%x 0x%x 0x%x 0x%x\n", + i, &dma_addr, + le32_to_cpu(p->des0), le32_to_cpu(p->des1), + le32_to_cpu(p->des2), le32_to_cpu(p->des3)); + p++; + } + } else if (desc_size == sizeof(struct dma_extended_desc)) { + struct dma_extended_desc *extp = (struct dma_extended_desc *)head; + + for (i = 0; i < size; i++) { + dma_addr = dma_rx_phy + i * sizeof(*extp); + pr_info("%03d [%pad]: 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n", + i, &dma_addr, + le32_to_cpu(extp->basic.des0), le32_to_cpu(extp->basic.des1), + le32_to_cpu(extp->basic.des2), le32_to_cpu(extp->basic.des3), + le32_to_cpu(extp->des4), le32_to_cpu(extp->des5), + le32_to_cpu(extp->des6), le32_to_cpu(extp->des7)); + extp++; + } + } else if (desc_size == sizeof(struct dma_edesc)) { + struct dma_edesc *ep = (struct dma_edesc *)head; + + for (i = 0; i < size; i++) { + dma_addr = dma_rx_phy + i * sizeof(*ep); + pr_info("%03d [%pad]: 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n", + i, &dma_addr, + le32_to_cpu(ep->des4), le32_to_cpu(ep->des5), + le32_to_cpu(ep->des6), le32_to_cpu(ep->des7), + le32_to_cpu(ep->basic.des0), le32_to_cpu(ep->basic.des1), + le32_to_cpu(ep->basic.des2), le32_to_cpu(ep->basic.des3)); + ep++; + } + } else { + pr_err("unsupported descriptor!"); } } diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c index d02cec296f51e..6650edfab5bc4 100644 --- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c +++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c @@ -417,19 +417,22 @@ static int enh_desc_get_rx_timestamp_status(void *desc, void *next_desc, } } -static void enh_desc_display_ring(void *head, unsigned int size, bool rx) +static void enh_desc_display_ring(void *head, unsigned int size, bool rx, + dma_addr_t dma_rx_phy, unsigned int desc_size) { struct dma_extended_desc *ep = (struct dma_extended_desc *)head; + dma_addr_t dma_addr; int i; pr_info("Extended %s descriptor ring:\n", rx ? "RX" : "TX"); for (i = 0; i < size; i++) { u64 x; + dma_addr = dma_rx_phy + i * sizeof(*ep); x = *(u64 *)ep; - pr_info("%03d [0x%x]: 0x%x 0x%x 0x%x 0x%x\n", - i, (unsigned int)virt_to_phys(ep), + pr_info("%03d [%pad]: 0x%x 0x%x 0x%x 0x%x\n", + i, &dma_addr, (unsigned int)x, (unsigned int)(x >> 32), ep->basic.des2, ep->basic.des3); ep++; diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h index b40b2e0667bba..7417db31402fe 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.h +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h @@ -78,7 +78,8 @@ struct stmmac_desc_ops { /* get rx timestamp status */ int (*get_rx_timestamp_status)(void *desc, void *next_desc, u32 ats); /* Display ring */ - void (*display_ring)(void *head, unsigned int size, bool rx); + void (*display_ring)(void *head, unsigned int size, bool rx, + dma_addr_t dma_rx_phy, unsigned int desc_size); /* set MSS via context descriptor */ void (*set_mss)(struct dma_desc *p, unsigned int mss); /* get descriptor skbuff address */ diff --git a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c index f083360e4ba67..98ef43f35802a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c +++ b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c @@ -269,19 +269,22 @@ static int ndesc_get_rx_timestamp_status(void *desc, void *next_desc, u32 ats) return 1; } -static void ndesc_display_ring(void *head, unsigned int size, bool rx) +static void ndesc_display_ring(void *head, unsigned int size, bool rx, + dma_addr_t dma_rx_phy, unsigned int desc_size) { struct dma_desc *p = (struct dma_desc *)head; + dma_addr_t dma_addr; int i; pr_info("%s descriptor ring:\n", rx ? "RX" : "TX"); for (i = 0; i < size; i++) { u64 x; + dma_addr = dma_rx_phy + i * sizeof(*p); x = *(u64 *)p; - pr_info("%03d [0x%x]: 0x%x 0x%x 0x%x 0x%x", - i, (unsigned int)virt_to_phys(p), + pr_info("%03d [%pad]: 0x%x 0x%x 0x%x 0x%x", + i, &dma_addr, (unsigned int)x, (unsigned int)(x >> 32), p->des2, p->des3); p++; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 12ed337a239b8..730f2d71578c1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1133,6 +1133,7 @@ static int stmmac_phy_setup(struct stmmac_priv *priv) static void stmmac_display_rx_rings(struct stmmac_priv *priv) { u32 rx_cnt = priv->plat->rx_queues_to_use; + unsigned int desc_size; void *head_rx; u32 queue; @@ -1142,19 +1143,24 @@ static void stmmac_display_rx_rings(struct stmmac_priv *priv) pr_info("\tRX Queue %u rings\n", queue); - if (priv->extend_desc) + if (priv->extend_desc) { head_rx = (void *)rx_q->dma_erx; - else + desc_size = sizeof(struct dma_extended_desc); + } else { head_rx = (void *)rx_q->dma_rx; + desc_size = sizeof(struct dma_desc); + } /* Display RX ring */ - stmmac_display_ring(priv, head_rx, priv->dma_rx_size, true); + stmmac_display_ring(priv, head_rx, priv->dma_rx_size, true, + rx_q->dma_rx_phy, desc_size); } } static void stmmac_display_tx_rings(struct stmmac_priv *priv) { u32 tx_cnt = priv->plat->tx_queues_to_use; + unsigned int desc_size; void *head_tx; u32 queue; @@ -1164,14 +1170,19 @@ static void stmmac_display_tx_rings(struct stmmac_priv *priv) pr_info("\tTX Queue %d rings\n", queue); - if (priv->extend_desc) + if (priv->extend_desc) { head_tx = (void *)tx_q->dma_etx; - else if (tx_q->tbs & STMMAC_TBS_AVAIL) + desc_size = sizeof(struct dma_extended_desc); + } else if (tx_q->tbs & STMMAC_TBS_AVAIL) { head_tx = (void *)tx_q->dma_entx; - else + desc_size = sizeof(struct dma_edesc); + } else { head_tx = (void *)tx_q->dma_tx; + desc_size = sizeof(struct dma_desc); + } - stmmac_display_ring(priv, head_tx, priv->dma_tx_size, false); + stmmac_display_ring(priv, head_tx, priv->dma_tx_size, false, + tx_q->dma_tx_phy, desc_size); } } @@ -3736,18 +3747,23 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) unsigned int count = 0, error = 0, len = 0; int status = 0, coe = priv->hw->rx_csum; unsigned int next_entry = rx_q->cur_rx; + unsigned int desc_size; struct sk_buff *skb = NULL; if (netif_msg_rx_status(priv)) { void *rx_head; netdev_dbg(priv->dev, "%s: descriptor ring:\n", __func__); - if (priv->extend_desc) + if (priv->extend_desc) { rx_head = (void *)rx_q->dma_erx; - else + desc_size = sizeof(struct dma_extended_desc); + } else { rx_head = (void *)rx_q->dma_rx; + desc_size = sizeof(struct dma_desc); + } - stmmac_display_ring(priv, rx_head, priv->dma_rx_size, true); + stmmac_display_ring(priv, rx_head, priv->dma_rx_size, true, + rx_q->dma_rx_phy, desc_size); } while (count < limit) { unsigned int buf1_len = 0, buf2_len = 0; @@ -4315,24 +4331,27 @@ static int stmmac_set_mac_address(struct net_device *ndev, void *addr) static struct dentry *stmmac_fs_dir; static void sysfs_display_ring(void *head, int size, int extend_desc, - struct seq_file *seq) + struct seq_file *seq, dma_addr_t dma_phy_addr) { int i; struct dma_extended_desc *ep = (struct dma_extended_desc *)head; struct dma_desc *p = (struct dma_desc *)head; + dma_addr_t dma_addr; for (i = 0; i < size; i++) { if (extend_desc) { - seq_printf(seq, "%d [0x%x]: 0x%x 0x%x 0x%x 0x%x\n", - i, (unsigned int)virt_to_phys(ep), + dma_addr = dma_phy_addr + i * sizeof(*ep); + seq_printf(seq, "%d [%pad]: 0x%x 0x%x 0x%x 0x%x\n", + i, &dma_addr, le32_to_cpu(ep->basic.des0), le32_to_cpu(ep->basic.des1), le32_to_cpu(ep->basic.des2), le32_to_cpu(ep->basic.des3)); ep++; } else { - seq_printf(seq, "%d [0x%x]: 0x%x 0x%x 0x%x 0x%x\n", - i, (unsigned int)virt_to_phys(p), + dma_addr = dma_phy_addr + i * sizeof(*p); + seq_printf(seq, "%d [%pad]: 0x%x 0x%x 0x%x 0x%x\n", + i, &dma_addr, le32_to_cpu(p->des0), le32_to_cpu(p->des1), le32_to_cpu(p->des2), le32_to_cpu(p->des3)); p++; @@ -4360,11 +4379,11 @@ static int stmmac_rings_status_show(struct seq_file *seq, void *v) if (priv->extend_desc) { seq_printf(seq, "Extended descriptor ring:\n"); sysfs_display_ring((void *)rx_q->dma_erx, - priv->dma_rx_size, 1, seq); + priv->dma_rx_size, 1, seq, rx_q->dma_rx_phy); } else { seq_printf(seq, "Descriptor ring:\n"); sysfs_display_ring((void *)rx_q->dma_rx, - priv->dma_rx_size, 0, seq); + priv->dma_rx_size, 0, seq, rx_q->dma_rx_phy); } } @@ -4376,11 +4395,11 @@ static int stmmac_rings_status_show(struct seq_file *seq, void *v) if (priv->extend_desc) { seq_printf(seq, "Extended descriptor ring:\n"); sysfs_display_ring((void *)tx_q->dma_etx, - priv->dma_tx_size, 1, seq); + priv->dma_tx_size, 1, seq, tx_q->dma_tx_phy); } else if (!(tx_q->tbs & STMMAC_TBS_AVAIL)) { seq_printf(seq, "Descriptor ring:\n"); sysfs_display_ring((void *)tx_q->dma_tx, - priv->dma_tx_size, 0, seq); + priv->dma_tx_size, 0, seq, tx_q->dma_tx_phy); } } -- GitLab From 396e13e11577b614db77db0bbb6fca935b94eb1b Mon Sep 17 00:00:00 2001 From: Joakim Zhang Date: Thu, 25 Feb 2021 17:01:13 +0800 Subject: [PATCH 040/839] net: stmmac: fix wrongly set buffer2 valid when sph unsupport In current driver, buffer2 available only when hardware supports split header. Wrongly set buffer2 valid in stmmac_rx_refill when refill buffer address. You can see that desc3 is 0x81000000 after initialization, but turn out to be 0x83000000 after refill. Fixes: 67afd6d1cfdf ("net: stmmac: Add Split Header support and enable it in XGMAC cores") Signed-off-by: Joakim Zhang Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c | 9 +++++++-- drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c | 2 +- drivers/net/ethernet/stmicro/stmmac/hwif.h | 2 +- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 8 ++++++-- 4 files changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c index ee87811b0ca5e..cbf4429fb1d23 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c @@ -533,10 +533,15 @@ static void dwmac4_get_rx_header_len(struct dma_desc *p, unsigned int *len) *len = le32_to_cpu(p->des2) & RDES2_HL; } -static void dwmac4_set_sec_addr(struct dma_desc *p, dma_addr_t addr) +static void dwmac4_set_sec_addr(struct dma_desc *p, dma_addr_t addr, bool buf2_valid) { p->des2 = cpu_to_le32(lower_32_bits(addr)); - p->des3 = cpu_to_le32(upper_32_bits(addr) | RDES3_BUFFER2_VALID_ADDR); + p->des3 = cpu_to_le32(upper_32_bits(addr)); + + if (buf2_valid) + p->des3 |= cpu_to_le32(RDES3_BUFFER2_VALID_ADDR); + else + p->des3 &= cpu_to_le32(~RDES3_BUFFER2_VALID_ADDR); } static void dwmac4_set_tbs(struct dma_edesc *p, u32 sec, u32 nsec) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c index 0aaf19ab56729..ccfb0102dde49 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c @@ -292,7 +292,7 @@ static void dwxgmac2_get_rx_header_len(struct dma_desc *p, unsigned int *len) *len = le32_to_cpu(p->des2) & XGMAC_RDES2_HL; } -static void dwxgmac2_set_sec_addr(struct dma_desc *p, dma_addr_t addr) +static void dwxgmac2_set_sec_addr(struct dma_desc *p, dma_addr_t addr, bool is_valid) { p->des2 = cpu_to_le32(lower_32_bits(addr)); p->des3 = cpu_to_le32(upper_32_bits(addr)); diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h index 7417db31402fe..979ac9fca23c7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.h +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h @@ -92,7 +92,7 @@ struct stmmac_desc_ops { int (*get_rx_hash)(struct dma_desc *p, u32 *hash, enum pkt_hash_types *type); void (*get_rx_header_len)(struct dma_desc *p, unsigned int *len); - void (*set_sec_addr)(struct dma_desc *p, dma_addr_t addr); + void (*set_sec_addr)(struct dma_desc *p, dma_addr_t addr, bool buf2_valid); void (*set_sarc)(struct dma_desc *p, u32 sarc_type); void (*set_vlan_tag)(struct dma_desc *p, u16 tag, u16 inner_tag, u32 inner_type); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 730f2d71578c1..cd7709da09694 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1314,9 +1314,10 @@ static int stmmac_init_rx_buffers(struct stmmac_priv *priv, struct dma_desc *p, return -ENOMEM; buf->sec_addr = page_pool_get_dma_addr(buf->sec_page); - stmmac_set_desc_sec_addr(priv, p, buf->sec_addr); + stmmac_set_desc_sec_addr(priv, p, buf->sec_addr, true); } else { buf->sec_page = NULL; + stmmac_set_desc_sec_addr(priv, p, buf->sec_addr, false); } buf->addr = page_pool_get_dma_addr(buf->page); @@ -3659,7 +3660,10 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue) DMA_FROM_DEVICE); stmmac_set_desc_addr(priv, p, buf->addr); - stmmac_set_desc_sec_addr(priv, p, buf->sec_addr); + if (priv->sph) + stmmac_set_desc_sec_addr(priv, p, buf->sec_addr, true); + else + stmmac_set_desc_sec_addr(priv, p, buf->sec_addr, false); stmmac_refill_desc3(priv, rx_q, p); rx_q->rx_count_frames++; -- GitLab From 9c63faaa931e443e7abbbee9de0169f1d4710546 Mon Sep 17 00:00:00 2001 From: Joakim Zhang Date: Thu, 25 Feb 2021 17:01:14 +0800 Subject: [PATCH 041/839] net: stmmac: re-init rx buffers when mac resume back During suspend/resume stress test, we found descriptor write back by DMA could exhibit unusual behavior, e.g.: 003 [0xc4310030]: 0x0 0x40 0x0 0xb5010040 We can see that desc3 write back is 0xb5010040, it is still ownd by DMA, so application would not recycle this buffer. It will trigger fatal bus error when DMA try to use this descriptor again. To fix this issue, we should re-init all rx buffers when mac resume back. Signed-off-by: Joakim Zhang Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 84 ++++++++++++++++++- 1 file changed, 83 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index cd7709da09694..0eba44e9c1f82 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1379,6 +1379,88 @@ static void stmmac_free_tx_buffer(struct stmmac_priv *priv, u32 queue, int i) } } +/** + * stmmac_reinit_rx_buffers - reinit the RX descriptor buffer. + * @priv: driver private structure + * Description: this function is called to re-allocate a receive buffer, perform + * the DMA mapping and init the descriptor. + */ +static void stmmac_reinit_rx_buffers(struct stmmac_priv *priv) +{ + u32 rx_count = priv->plat->rx_queues_to_use; + u32 queue; + int i; + + for (queue = 0; queue < rx_count; queue++) { + struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; + + for (i = 0; i < priv->dma_rx_size; i++) { + struct stmmac_rx_buffer *buf = &rx_q->buf_pool[i]; + + if (buf->page) { + page_pool_recycle_direct(rx_q->page_pool, buf->page); + buf->page = NULL; + } + + if (priv->sph && buf->sec_page) { + page_pool_recycle_direct(rx_q->page_pool, buf->sec_page); + buf->sec_page = NULL; + } + } + } + + for (queue = 0; queue < rx_count; queue++) { + struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; + + for (i = 0; i < priv->dma_rx_size; i++) { + struct stmmac_rx_buffer *buf = &rx_q->buf_pool[i]; + struct dma_desc *p; + + if (priv->extend_desc) + p = &((rx_q->dma_erx + i)->basic); + else + p = rx_q->dma_rx + i; + + if (!buf->page) { + buf->page = page_pool_dev_alloc_pages(rx_q->page_pool); + if (!buf->page) + goto err_reinit_rx_buffers; + + buf->addr = page_pool_get_dma_addr(buf->page); + } + + if (priv->sph && !buf->sec_page) { + buf->sec_page = page_pool_dev_alloc_pages(rx_q->page_pool); + if (!buf->sec_page) + goto err_reinit_rx_buffers; + + buf->sec_addr = page_pool_get_dma_addr(buf->sec_page); + } + + stmmac_set_desc_addr(priv, p, buf->addr); + if (priv->sph) + stmmac_set_desc_sec_addr(priv, p, buf->sec_addr, true); + else + stmmac_set_desc_sec_addr(priv, p, buf->sec_addr, false); + if (priv->dma_buf_sz == BUF_SIZE_16KiB) + stmmac_init_desc3(priv, p); + } + } + + return; + +err_reinit_rx_buffers: + do { + while (--i >= 0) + stmmac_free_rx_buffer(priv, queue, i); + + if (queue == 0) + break; + + i = priv->dma_rx_size; + } while (queue-- > 0); +} + /** * init_dma_rx_desc_rings - init the RX descriptor rings * @dev: net device structure @@ -5343,7 +5425,7 @@ int stmmac_resume(struct device *dev) mutex_lock(&priv->lock); stmmac_reset_queues_param(priv); - + stmmac_reinit_rx_buffers(priv); stmmac_free_tx_skbufs(priv); stmmac_clear_descriptors(priv); -- GitLab From 907310ceb27ee4259bedb6c1257f5d05ee44f3ce Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 25 Feb 2021 15:38:31 +0100 Subject: [PATCH 042/839] net: mscc: ocelot: select NET_DEVLINK Without this option, the driver fails to link: ld.lld: error: undefined symbol: devlink_sb_register >>> referenced by ocelot_devlink.c >>> net/ethernet/mscc/ocelot_devlink.o:(ocelot_devlink_sb_register) in archive drivers/built-in.a >>> referenced by ocelot_devlink.c >>> net/ethernet/mscc/ocelot_devlink.o:(ocelot_devlink_sb_register) in archive drivers/built-in.a Fixes: f59fd9cab730 ("net: mscc: ocelot: configure watermarks using devlink-sb") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20210225143910.3964364-1-arnd@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mscc/Kconfig b/drivers/net/ethernet/mscc/Kconfig index c0ede0ca7115e..05cb040c26775 100644 --- a/drivers/net/ethernet/mscc/Kconfig +++ b/drivers/net/ethernet/mscc/Kconfig @@ -13,6 +13,7 @@ if NET_VENDOR_MICROSEMI # Users should depend on NET_SWITCHDEV, HAS_IOMEM config MSCC_OCELOT_SWITCH_LIB + select NET_DEVLINK select REGMAP_MMIO select PACKING select PHYLIB -- GitLab From 01c2c1ad8f45e0c191bfd961dc41cd77df0d7a2f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 25 Feb 2021 15:38:32 +0100 Subject: [PATCH 043/839] net: dsa: tag_ocelot_8021q: fix driver dependency When the ocelot driver code is in a library, the dsa tag code cannot be built-in: ld.lld: error: undefined symbol: ocelot_can_inject >>> referenced by tag_ocelot_8021q.c >>> dsa/tag_ocelot_8021q.o:(ocelot_xmit) in archive net/built-in.a ld.lld: error: undefined symbol: ocelot_port_inject_frame >>> referenced by tag_ocelot_8021q.c >>> dsa/tag_ocelot_8021q.o:(ocelot_xmit) in archive net/built-in.a Building the tag support only really makes sense for compile-testing when the driver is available, so add a Kconfig dependency that prevents the broken configuration while allowing COMPILE_TEST alternative when MSCC_OCELOT_SWITCH_LIB is disabled entirely. This case is handled through the #ifdef check in include/soc/mscc/ocelot.h. Fixes: 0a6f17c6ae21 ("net: dsa: tag_ocelot_8021q: add support for PTP timestamping") Signed-off-by: Arnd Bergmann Acked-by: Vladimir Oltean Link: https://lore.kernel.org/r/20210225143910.3964364-2-arnd@kernel.org Signed-off-by: Jakub Kicinski --- net/dsa/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index 3589224c8da96..58b8fc82cd3c4 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -118,6 +118,8 @@ config NET_DSA_TAG_OCELOT config NET_DSA_TAG_OCELOT_8021Q tristate "Tag driver for Ocelot family of switches, using VLAN" + depends on MSCC_OCELOT_SWITCH_LIB || \ + (MSCC_OCELOT_SWITCH_LIB=n && COMPILE_TEST) select NET_DSA_TAG_8021Q help Say Y or M if you want to enable support for tagging frames with a -- GitLab From 63c75c053b4160f7b90a418dcc4e5bcfac2fb6fc Mon Sep 17 00:00:00 2001 From: DENG Qingfang Date: Fri, 26 Feb 2021 14:32:26 +0800 Subject: [PATCH 044/839] net: dsa: mt7530: don't build GPIO support if !GPIOLIB The new GPIO support may be optional at runtime, but it requires building against gpiolib: ERROR: modpost: "gpiochip_get_data" [drivers/net/dsa/mt7530.ko] undefined! ERROR: modpost: "devm_gpiochip_add_data_with_key" [drivers/net/dsa/mt7530.ko] undefined! Add #ifdef to exclude GPIO support if GPIOLIB is not enabled. Fixes: 429a0edeefd8 ("net: dsa: mt7530: MT7530 optional GPIO support") Reported-by: Arnd Bergmann Signed-off-by: DENG Qingfang Link: https://lore.kernel.org/r/20210226063226.8474-1-dqfext@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mt7530.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index c17de2bcf2fef..f06f5fa2f898c 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -1624,6 +1624,7 @@ mtk_get_tag_protocol(struct dsa_switch *ds, int port, } } +#ifdef CONFIG_GPIOLIB static inline u32 mt7530_gpio_to_bit(unsigned int offset) { @@ -1726,6 +1727,7 @@ mt7530_setup_gpio(struct mt7530_priv *priv) return devm_gpiochip_add_data(dev, gc, priv); } +#endif /* CONFIG_GPIOLIB */ static int mt7530_setup(struct dsa_switch *ds) @@ -1868,11 +1870,13 @@ mt7530_setup(struct dsa_switch *ds) } } +#ifdef CONFIG_GPIOLIB if (of_property_read_bool(priv->dev->of_node, "gpio-controller")) { ret = mt7530_setup_gpio(priv); if (ret) return ret; } +#endif /* CONFIG_GPIOLIB */ mt7530_setup_port5(ds, interface); -- GitLab From 7f654157f0aefba04cd7f6297351c87b76b47b89 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 25 Feb 2021 15:57:27 +0100 Subject: [PATCH 045/839] net: phy: make mdio_bus_phy_suspend/resume as __maybe_unused When CONFIG_PM_SLEEP is disabled, the compiler warns about unused functions: drivers/net/phy/phy_device.c:273:12: error: unused function 'mdio_bus_phy_suspend' [-Werror,-Wunused-function] static int mdio_bus_phy_suspend(struct device *dev) drivers/net/phy/phy_device.c:293:12: error: unused function 'mdio_bus_phy_resume' [-Werror,-Wunused-function] static int mdio_bus_phy_resume(struct device *dev) The logic is intentional, so just mark these two as __maybe_unused and remove the incorrect #ifdef. Fixes: 4c0d2e96ba05 ("net: phy: consider that suspend2ram may cut off PHY power") Signed-off-by: Arnd Bergmann Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20210225145748.404410-1-arnd@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/phy/phy_device.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index ce495473cd5dc..cc38e326405a6 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -230,7 +230,6 @@ static struct phy_driver genphy_driver; static LIST_HEAD(phy_fixup_list); static DEFINE_MUTEX(phy_fixup_lock); -#ifdef CONFIG_PM static bool mdio_bus_phy_may_suspend(struct phy_device *phydev) { struct device_driver *drv = phydev->mdio.dev.driver; @@ -270,7 +269,7 @@ out: return !phydev->suspended; } -static int mdio_bus_phy_suspend(struct device *dev) +static __maybe_unused int mdio_bus_phy_suspend(struct device *dev) { struct phy_device *phydev = to_phy_device(dev); @@ -290,7 +289,7 @@ static int mdio_bus_phy_suspend(struct device *dev) return phy_suspend(phydev); } -static int mdio_bus_phy_resume(struct device *dev) +static __maybe_unused int mdio_bus_phy_resume(struct device *dev) { struct phy_device *phydev = to_phy_device(dev); int ret; @@ -316,7 +315,6 @@ no_resume: static SIMPLE_DEV_PM_OPS(mdio_bus_phy_pm_ops, mdio_bus_phy_suspend, mdio_bus_phy_resume); -#endif /* CONFIG_PM */ /** * phy_register_fixup - creates a new phy_fixup and adds it to the list -- GitLab From 6a4d7234ae9a3bb31181f348ade9bbdb55aeb5c5 Mon Sep 17 00:00:00 2001 From: Heiko Thiery Date: Thu, 25 Feb 2021 22:15:16 +0100 Subject: [PATCH 046/839] net: fec: ptp: avoid register access when ipg clock is disabled When accessing the timecounter register on an i.MX8MQ the kernel hangs. This is only the case when the interface is down. This can be reproduced by reading with 'phc_ctrl eth0 get'. Like described in the change in 91c0d987a9788dcc5fe26baafd73bf9242b68900 the igp clock is disabled when the interface is down and leads to a system hang. So we check if the ptp clock status before reading the timecounter register. Signed-off-by: Heiko Thiery Acked-by: Richard Cochran Link: https://lore.kernel.org/r/20210225211514.9115-1-heiko.thiery@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/fec_ptp.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c index 2e344aada4c60..1753807cbf97e 100644 --- a/drivers/net/ethernet/freescale/fec_ptp.c +++ b/drivers/net/ethernet/freescale/fec_ptp.c @@ -377,9 +377,16 @@ static int fec_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts) u64 ns; unsigned long flags; + mutex_lock(&adapter->ptp_clk_mutex); + /* Check the ptp clock */ + if (!adapter->ptp_clk_on) { + mutex_unlock(&adapter->ptp_clk_mutex); + return -EINVAL; + } spin_lock_irqsave(&adapter->tmreg_lock, flags); ns = timecounter_read(&adapter->tc); spin_unlock_irqrestore(&adapter->tmreg_lock, flags); + mutex_unlock(&adapter->ptp_clk_mutex); *ts = ns_to_timespec64(ns); -- GitLab From 2107d45f17bedd7dbf4178462da0ac223835a2a7 Mon Sep 17 00:00:00 2001 From: Arjun Roy Date: Thu, 25 Feb 2021 15:26:28 -0800 Subject: [PATCH 047/839] tcp: Fix sign comparison bug in getsockopt(TCP_ZEROCOPY_RECEIVE) getsockopt(TCP_ZEROCOPY_RECEIVE) has a bug where we read a user-provided "len" field of type signed int, and then compare the value to the result of an "offsetofend" operation, which is unsigned. Negative values provided by the user will be promoted to large positive numbers; thus checking that len < offsetofend() will return false when the intention was that it return true. Note that while len is originally checked for negative values earlier on in do_tcp_getsockopt(), subsequent calls to get_user() re-read the value from userspace which may have changed in the meantime. Therefore, re-add the check for negative values after the call to get_user in the handler code for TCP_ZEROCOPY_RECEIVE. Fixes: c8856c051454 ("tcp-zerocopy: Return inq along with tcp receive zerocopy.") Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Arjun Roy Link: https://lore.kernel.org/r/20210225232628.4033281-1-arjunroy.kdev@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index a3422e42784ed..dfb6f286c1de9 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -4143,7 +4143,8 @@ static int do_tcp_getsockopt(struct sock *sk, int level, if (get_user(len, optlen)) return -EFAULT; - if (len < offsetofend(struct tcp_zerocopy_receive, length)) + if (len < 0 || + len < offsetofend(struct tcp_zerocopy_receive, length)) return -EINVAL; if (unlikely(len > sizeof(zc))) { err = check_zeroed_user(optval + sizeof(zc), -- GitLab From edcbf5137f093b5502f5f6b97cce3cbadbde27aa Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Thu, 25 Feb 2021 18:57:19 +0200 Subject: [PATCH 048/839] selftests: forwarding: Fix race condition in mirror installation When mirroring to a gretap in hardware the device expects to be programmed with the egress port and all the encapsulating headers. This requires the driver to resolve the path the packet will take in the software data path and program the device accordingly. If the path cannot be resolved (in this case because of an unresolved neighbor), then mirror installation fails until the path is resolved. This results in a race that causes the test to sometimes fail. Fix this by setting the neighbor's state to permanent, so that it is always valid. Fixes: b5b029399fa6d ("selftests: forwarding: mirror_gre_bridge_1d_vlan: Add STP test") Signed-off-by: Danielle Ratson Reviewed-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: Jakub Kicinski --- .../net/forwarding/mirror_gre_bridge_1d_vlan.sh | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh index 197e769c2ed16..f8cda822c1cec 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh @@ -86,11 +86,20 @@ test_ip6gretap() test_gretap_stp() { + # Sometimes after mirror installation, the neighbor's state is not valid. + # The reason is that there is no SW datapath activity related to the + # neighbor for the remote GRE address. Therefore whether the corresponding + # neighbor will be valid is a matter of luck, and the test is thus racy. + # Set the neighbor's state to permanent, so it would be always valid. + ip neigh replace 192.0.2.130 lladdr $(mac_get $h3) \ + nud permanent dev br2 full_test_span_gre_stp gt4 $swp3.555 "mirror to gretap" } test_ip6gretap_stp() { + ip neigh replace 2001:db8:2::2 lladdr $(mac_get $h3) \ + nud permanent dev br2 full_test_span_gre_stp gt6 $swp3.555 "mirror to ip6gretap" } -- GitLab From ae9b24ddb69b4e31cda1b5e267a5a08a1db11717 Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Thu, 25 Feb 2021 18:57:20 +0200 Subject: [PATCH 049/839] mlxsw: spectrum_ethtool: Add an external speed to PTYS register Currently, only external bits are added to the PTYS register, whereas there is one external bit that is wrongly marked as internal, and so was recently removed from the register. Add that bit to the PTYS register again, as this bit is no longer internal. Its removal resulted in '100000baseLR4_ER4/Full' link mode no longer being supported, causing a regression on some setups. Fixes: 5bf01b571cf4 ("mlxsw: spectrum_ethtool: Remove internal speeds from PTYS register") Signed-off-by: Danielle Ratson Reported-by: Eddie Shklaer Tested-by: Eddie Shklaer Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 1 + drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c | 5 +++++ drivers/net/ethernet/mellanox/mlxsw/switchx2.c | 3 ++- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 16e2df6ef2f48..c4adc7f740d3e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -4430,6 +4430,7 @@ MLXSW_ITEM32(reg, ptys, ext_eth_proto_cap, 0x08, 0, 32); #define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_CR4 BIT(20) #define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4 BIT(21) #define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4 BIT(22) +#define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_LR4_ER4 BIT(23) #define MLXSW_REG_PTYS_ETH_SPEED_25GBASE_CR BIT(27) #define MLXSW_REG_PTYS_ETH_SPEED_25GBASE_KR BIT(28) #define MLXSW_REG_PTYS_ETH_SPEED_25GBASE_SR BIT(29) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c index bd7f873f6290b..0bd64169bf812 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c @@ -1169,6 +1169,11 @@ static const struct mlxsw_sp1_port_link_mode mlxsw_sp1_port_link_mode[] = { .mask_ethtool = ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT, .speed = SPEED_100000, }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_100GBASE_LR4_ER4, + .mask_ethtool = ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT, + .speed = SPEED_100000, + }, }; #define MLXSW_SP1_PORT_LINK_MODE_LEN ARRAY_SIZE(mlxsw_sp1_port_link_mode) diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c index 40e2e79d45179..131b2a53d261d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c @@ -613,7 +613,8 @@ static const struct mlxsw_sx_port_link_mode mlxsw_sx_port_link_mode[] = { { .mask = MLXSW_REG_PTYS_ETH_SPEED_100GBASE_CR4 | MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4 | - MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4, + MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4 | + MLXSW_REG_PTYS_ETH_SPEED_100GBASE_LR4_ER4, .speed = 100000, }, }; -- GitLab From dc860b88ce0a7ed9a048d5042cbb175daf60b657 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 25 Feb 2021 18:57:21 +0200 Subject: [PATCH 050/839] mlxsw: spectrum_router: Ignore routes using a deleted nexthop object Routes are currently processed from a workqueue whereas nexthop objects are processed in system call context. This can result in the driver not finding a suitable nexthop group for a route and issuing a warning [1]. Fix this by ignoring such routes earlier in the process. The subsequent deletion notification will be ignored as well. [1] WARNING: CPU: 2 PID: 7754 at drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c:4853 mlxsw_sp_router_fib_event_work+0x1112/0x1e00 [mlxsw_spectrum] [...] CPU: 2 PID: 7754 Comm: kworker/u8:0 Not tainted 5.11.0-rc6-cq-20210207-1 #16 Hardware name: Mellanox Technologies Ltd. MSN2100/SA001390, BIOS 5.6.5 05/24/2018 Workqueue: mlxsw_core_ordered mlxsw_sp_router_fib_event_work [mlxsw_spectrum] RIP: 0010:mlxsw_sp_router_fib_event_work+0x1112/0x1e00 [mlxsw_spectrum] Fixes: cdd6cfc54c64 ("mlxsw: spectrum_router: Allow programming routes with nexthop objects") Signed-off-by: Ido Schimmel Reported-by: Alex Veber Tested-by: Alex Veber Reviewed-by: Petr Machata Reviewed-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 9ce90841f92d8..eda99d82766a7 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -5951,6 +5951,10 @@ mlxsw_sp_router_fib4_replace(struct mlxsw_sp *mlxsw_sp, if (mlxsw_sp->router->aborted) return 0; + if (fen_info->fi->nh && + !mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, fen_info->fi->nh->id)) + return 0; + fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, fen_info->tb_id, &fen_info->dst, sizeof(fen_info->dst), fen_info->dst_len, @@ -6601,6 +6605,9 @@ static int mlxsw_sp_router_fib6_replace(struct mlxsw_sp *mlxsw_sp, if (mlxsw_sp_fib6_rt_should_ignore(rt)) return 0; + if (rt->nh && !mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, rt->nh->id)) + return 0; + fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id, &rt->fib6_dst.addr, sizeof(rt->fib6_dst.addr), -- GitLab From d20cd745218cde1b268bef5282095ec6c95a3ea2 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Fri, 26 Feb 2021 04:43:09 -0500 Subject: [PATCH 051/839] bnxt_en: Fix race between firmware reset and driver remove. The driver's error recovery reset sequence can take many seconds to complete and only the critical sections are protected by rtnl_lock. A recent change has introduced a regression in this sequence. bnxt_remove_one() may be called while the recovery is in progress. Normally, unregister_netdev() would cause bnxt_close_nic() to be called and this would cause the error recovery to safely abort with the BNXT_STATE_ABORT_ERR flag set in bnxt_close_nic(). Recently, we added bnxt_reinit_after_abort() to allow the user to reopen the device after an aborted recovery. This causes the regression in the scenario described above because we would attempt to re-open even after the netdev has been unregistered. Fix it by checking the netdev reg_state in bnxt_reinit_after_abort() and abort if it is unregistered. Fixes: 6882c36cf82e ("bnxt_en: attempt to reinitialize after aborted reset") Signed-off-by: Vasundhara Volam Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index a680fd9c68ea9..c55189c7bb36e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -9890,6 +9890,9 @@ static int bnxt_reinit_after_abort(struct bnxt *bp) if (test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) return -EBUSY; + if (bp->dev->reg_state == NETREG_UNREGISTERED) + return -ENODEV; + rc = bnxt_fw_init_one(bp); if (!rc) { bnxt_clear_int_mode(bp); -- GitLab From 20d7d1c5c9b11e9f538ed4a2289be106de970d3e Mon Sep 17 00:00:00 2001 From: Edwin Peer Date: Fri, 26 Feb 2021 04:43:10 -0500 Subject: [PATCH 052/839] bnxt_en: reliably allocate IRQ table on reset to avoid crash The following trace excerpt corresponds with a NULL pointer dereference of 'bp->irq_tbl' in bnxt_setup_inta() on an Aarch64 system after many device resets: Unable to handle kernel NULL pointer dereference at ... 000000d ... pc : string+0x3c/0x80 lr : vsnprintf+0x294/0x7e0 sp : ffff00000f61ba70 pstate : 20000145 x29: ffff00000f61ba70 x28: 000000000000000d x27: ffff0000009c8b5a x26: ffff00000f61bb80 x25: ffff0000009c8b5a x24: 0000000000000012 x23: 00000000ffffffe0 x22: ffff000008990428 x21: ffff00000f61bb80 x20: 000000000000000d x19: 000000000000001f x18: 0000000000000000 x17: 0000000000000000 x16: ffff800b6d0fb400 x15: 0000000000000000 x14: ffff800b7fe31ae8 x13: 00001ed16472c920 x12: ffff000008c6b1c9 x11: ffff000008cf0580 x10: ffff00000f61bb80 x9 : 00000000ffffffd8 x8 : 000000000000000c x7 : ffff800b684b8000 x6 : 0000000000000000 x5 : 0000000000000065 x4 : 0000000000000001 x3 : ffff0a00ffffff04 x2 : 000000000000001f x1 : 0000000000000000 x0 : 000000000000000d Call trace: string+0x3c/0x80 vsnprintf+0x294/0x7e0 snprintf+0x44/0x50 __bnxt_open_nic+0x34c/0x928 [bnxt_en] bnxt_open+0xe8/0x238 [bnxt_en] __dev_open+0xbc/0x130 __dev_change_flags+0x12c/0x168 dev_change_flags+0x20/0x60 ... Ordinarily, a call to bnxt_setup_inta() (not in trace due to inlining) would not be expected on a system supporting MSIX at all. However, if bnxt_init_int_mode() does not end up being called after the call to bnxt_clear_int_mode() in bnxt_fw_reset_close(), then the driver will think that only INTA is supported and bp->irq_tbl will be NULL, causing the above crash. In the error recovery scenario, we call bnxt_clear_int_mode() in bnxt_fw_reset_close() early in the sequence. Ordinarily, we will call bnxt_init_int_mode() in bnxt_hwrm_if_change() after we reestablish communication with the firmware after reset. However, if the sequence has to abort before we call bnxt_init_int_mode() and if the user later attempts to re-open the device, then it will cause the crash above. We fix it in 2 ways: 1. Check for bp->irq_tbl in bnxt_setup_int_mode(). If it is NULL, call bnxt_init_init_mode(). 2. If we need to abort in bnxt_hwrm_if_change() and cannot complete the error recovery sequence, set the BNXT_STATE_ABORT_ERR flag. This will cause more drastic recovery at the next attempt to re-open the device, including a call to bnxt_init_int_mode(). Fixes: 3bc7d4a352ef ("bnxt_en: Add BNXT_STATE_IN_FW_RESET state.") Reviewed-by: Scott Branden Signed-off-by: Edwin Peer Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index c55189c7bb36e..b53a0d87371a2 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -8556,10 +8556,18 @@ static void bnxt_setup_inta(struct bnxt *bp) bp->irq_tbl[0].handler = bnxt_inta; } +static int bnxt_init_int_mode(struct bnxt *bp); + static int bnxt_setup_int_mode(struct bnxt *bp) { int rc; + if (!bp->irq_tbl) { + rc = bnxt_init_int_mode(bp); + if (rc || !bp->irq_tbl) + return rc ?: -ENODEV; + } + if (bp->flags & BNXT_FLAG_USING_MSIX) bnxt_setup_msix(bp); else @@ -8744,7 +8752,7 @@ static int bnxt_init_inta(struct bnxt *bp) static int bnxt_init_int_mode(struct bnxt *bp) { - int rc = 0; + int rc = -ENODEV; if (bp->flags & BNXT_FLAG_MSIX_CAP) rc = bnxt_init_msix(bp); @@ -9514,7 +9522,8 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up) { struct hwrm_func_drv_if_change_output *resp = bp->hwrm_cmd_resp_addr; struct hwrm_func_drv_if_change_input req = {0}; - bool resc_reinit = false, fw_reset = false; + bool fw_reset = !bp->irq_tbl; + bool resc_reinit = false; int rc, retry = 0; u32 flags = 0; @@ -9557,6 +9566,7 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up) if (test_bit(BNXT_STATE_IN_FW_RESET, &bp->state) && !fw_reset) { netdev_err(bp->dev, "RESET_DONE not set during FW reset.\n"); + set_bit(BNXT_STATE_ABORT_ERR, &bp->state); return -ENODEV; } if (resc_reinit || fw_reset) { -- GitLab From fae6f62e6a580b663ecf42c2120a0898deae9137 Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Fri, 26 Feb 2021 10:29:31 +0900 Subject: [PATCH 053/839] counter: stm32-timer-cnt: Report count function when SLAVE_MODE_DISABLED When in SLAVE_MODE_DISABLED mode, the count still increases if the counter is enabled because an internal clock is used. This patch fixes the stm32_count_function_get() and stm32_count_function_set() functions to properly handle this behavior. Fixes: ad29937e206f ("counter: Add STM32 Timer quadrature encoder") Cc: Fabrice Gasnier Cc: Maxime Coquelin Cc: Alexandre Torgue Signed-off-by: William Breathitt Gray Reviewed-by: Fabrice Gasnier Link: https://lore.kernel.org/r/20210226012931.161429-1-vilhelm.gray@gmail.com Signed-off-by: Jonathan Cameron --- drivers/counter/stm32-timer-cnt.c | 39 ++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/drivers/counter/stm32-timer-cnt.c b/drivers/counter/stm32-timer-cnt.c index ef2a974a2f105..cd50dc12bd026 100644 --- a/drivers/counter/stm32-timer-cnt.c +++ b/drivers/counter/stm32-timer-cnt.c @@ -44,13 +44,14 @@ struct stm32_timer_cnt { * @STM32_COUNT_ENCODER_MODE_3: counts on both TI1FP1 and TI2FP2 edges */ enum stm32_count_function { - STM32_COUNT_SLAVE_MODE_DISABLED = -1, + STM32_COUNT_SLAVE_MODE_DISABLED, STM32_COUNT_ENCODER_MODE_1, STM32_COUNT_ENCODER_MODE_2, STM32_COUNT_ENCODER_MODE_3, }; static enum counter_count_function stm32_count_functions[] = { + [STM32_COUNT_SLAVE_MODE_DISABLED] = COUNTER_COUNT_FUNCTION_INCREASE, [STM32_COUNT_ENCODER_MODE_1] = COUNTER_COUNT_FUNCTION_QUADRATURE_X2_A, [STM32_COUNT_ENCODER_MODE_2] = COUNTER_COUNT_FUNCTION_QUADRATURE_X2_B, [STM32_COUNT_ENCODER_MODE_3] = COUNTER_COUNT_FUNCTION_QUADRATURE_X4, @@ -90,6 +91,9 @@ static int stm32_count_function_get(struct counter_device *counter, regmap_read(priv->regmap, TIM_SMCR, &smcr); switch (smcr & TIM_SMCR_SMS) { + case 0: + *function = STM32_COUNT_SLAVE_MODE_DISABLED; + return 0; case 1: *function = STM32_COUNT_ENCODER_MODE_1; return 0; @@ -99,9 +103,9 @@ static int stm32_count_function_get(struct counter_device *counter, case 3: *function = STM32_COUNT_ENCODER_MODE_3; return 0; + default: + return -EINVAL; } - - return -EINVAL; } static int stm32_count_function_set(struct counter_device *counter, @@ -112,6 +116,9 @@ static int stm32_count_function_set(struct counter_device *counter, u32 cr1, sms; switch (function) { + case STM32_COUNT_SLAVE_MODE_DISABLED: + sms = 0; + break; case STM32_COUNT_ENCODER_MODE_1: sms = 1; break; @@ -122,8 +129,7 @@ static int stm32_count_function_set(struct counter_device *counter, sms = 3; break; default: - sms = 0; - break; + return -EINVAL; } /* Store enable status */ @@ -274,31 +280,36 @@ static int stm32_action_get(struct counter_device *counter, size_t function; int err; - /* Default action mode (e.g. STM32_COUNT_SLAVE_MODE_DISABLED) */ - *action = STM32_SYNAPSE_ACTION_NONE; - err = stm32_count_function_get(counter, count, &function); if (err) - return 0; + return err; switch (function) { + case STM32_COUNT_SLAVE_MODE_DISABLED: + /* counts on internal clock when CEN=1 */ + *action = STM32_SYNAPSE_ACTION_NONE; + return 0; case STM32_COUNT_ENCODER_MODE_1: /* counts up/down on TI1FP1 edge depending on TI2FP2 level */ if (synapse->signal->id == count->synapses[0].signal->id) *action = STM32_SYNAPSE_ACTION_BOTH_EDGES; - break; + else + *action = STM32_SYNAPSE_ACTION_NONE; + return 0; case STM32_COUNT_ENCODER_MODE_2: /* counts up/down on TI2FP2 edge depending on TI1FP1 level */ if (synapse->signal->id == count->synapses[1].signal->id) *action = STM32_SYNAPSE_ACTION_BOTH_EDGES; - break; + else + *action = STM32_SYNAPSE_ACTION_NONE; + return 0; case STM32_COUNT_ENCODER_MODE_3: /* counts up/down on both TI1FP1 and TI2FP2 edges */ *action = STM32_SYNAPSE_ACTION_BOTH_EDGES; - break; + return 0; + default: + return -EINVAL; } - - return 0; } static const struct counter_ops stm32_timer_cnt_ops = { -- GitLab From c33cb0020ee6dd96cc9976d6085a7d8422f6dbed Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Mon, 22 Feb 2021 08:00:00 +0000 Subject: [PATCH 054/839] uapi: nfnetlink_cthelper.h: fix userspace compilation error Apparently, and could not be included into the same compilation unit because of a cut-and-paste typo in the former header. Fixes: 12f7a505331e6 ("netfilter: add user-space connection tracking helper infrastructure") Cc: # v3.6 Signed-off-by: Dmitry V. Levin Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nfnetlink_cthelper.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/netfilter/nfnetlink_cthelper.h b/include/uapi/linux/netfilter/nfnetlink_cthelper.h index a13137afc4299..70af02092d16e 100644 --- a/include/uapi/linux/netfilter/nfnetlink_cthelper.h +++ b/include/uapi/linux/netfilter/nfnetlink_cthelper.h @@ -5,7 +5,7 @@ #define NFCT_HELPER_STATUS_DISABLED 0 #define NFCT_HELPER_STATUS_ENABLED 1 -enum nfnl_acct_msg_types { +enum nfnl_cthelper_msg_types { NFNL_MSG_CTHELPER_NEW, NFNL_MSG_CTHELPER_GET, NFNL_MSG_CTHELPER_DEL, -- GitLab From c57ea2d7d81fbaa72c7d0ffbff61ade1039f4a0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Klemen=20Ko=C5=A1ir?= Date: Sat, 20 Feb 2021 18:29:26 +0900 Subject: [PATCH 055/839] netfilter: conntrack: Remove a double space in a log message MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Removed an extra space in a log message and an extra blank line in code. Signed-off-by: Klemen Košir Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_helper.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 118f415928aef..b055187235f87 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -219,7 +219,7 @@ nf_ct_lookup_helper(struct nf_conn *ct, struct net *net) return NULL; pr_info("nf_conntrack: default automatic helper assignment " "has been turned off for security reasons and CT-based " - " firewall rule not found. Use the iptables CT target " + "firewall rule not found. Use the iptables CT target " "to attach helpers instead.\n"); net->ct.auto_assign_helper_warned = 1; return NULL; @@ -228,7 +228,6 @@ nf_ct_lookup_helper(struct nf_conn *ct, struct net *net) return __nf_ct_helper_find(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); } - int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, gfp_t flags) { -- GitLab From 03a3ca37e4c6478e3a84f04c8429dd5889e107fd Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 24 Feb 2021 17:23:19 +0100 Subject: [PATCH 056/839] netfilter: nf_nat: undo erroneous tcp edemux lookup Under extremely rare conditions TCP early demux will retrieve the wrong socket. 1. local machine establishes a connection to a remote server, S, on port p. This gives: laddr:lport -> S:p ... both in tcp and conntrack. 2. local machine establishes a connection to host H, on port p2. 2a. TCP stack choses same laddr:lport, so we have laddr:lport -> H:p2 from TCP point of view. 2b). There is a destination NAT rewrite in place, translating H:p2 to S:p. This results in following conntrack entries: I) laddr:lport -> S:p (origin) S:p -> laddr:lport (reply) II) laddr:lport -> H:p2 (origin) S:p -> laddr:lport2 (reply) NAT engine has rewritten laddr:lport to laddr:lport2 to map the reply packet to the correct origin. When server sends SYN/ACK to laddr:lport2, the PREROUTING hook will undo-the SNAT transformation, rewriting IP header to S:p -> laddr:lport This causes TCP early demux to associate the skb with the TCP socket of the first connection. The INPUT hook will then reverse the DNAT transformation, rewriting the IP header to H:p2 -> laddr:lport. Because packet ends up with the wrong socket, the new connection never completes: originator stays in SYN_SENT and conntrack entry remains in SYN_RECV until timeout, and responder retransmits SYN/ACK until it gives up. To resolve this, orphan the skb after the input rewrite: Because the source IP address changed, the socket must be incorrect. We can't move the DNAT undo to prerouting due to backwards compatibility, doing so will make iptables/nftables rules to no longer match the way they did. After orphan, the packet will be handed to the next protocol layer (tcp, udp, ...) and that will repeat the socket lookup just like as if early demux was disabled. Fixes: 41063e9dd1195 ("ipv4: Early TCP socket demux.") Closes: https://bugzilla.netfilter.org/show_bug.cgi?id=1427 Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_nat_proto.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nf_nat_proto.c b/net/netfilter/nf_nat_proto.c index e87b6bd6b3cdb..4731d21fc3ad8 100644 --- a/net/netfilter/nf_nat_proto.c +++ b/net/netfilter/nf_nat_proto.c @@ -646,8 +646,8 @@ nf_nat_ipv4_fn(void *priv, struct sk_buff *skb, } static unsigned int -nf_nat_ipv4_in(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) +nf_nat_ipv4_pre_routing(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) { unsigned int ret; __be32 daddr = ip_hdr(skb)->daddr; @@ -659,6 +659,23 @@ nf_nat_ipv4_in(void *priv, struct sk_buff *skb, return ret; } +static unsigned int +nf_nat_ipv4_local_in(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + __be32 saddr = ip_hdr(skb)->saddr; + struct sock *sk = skb->sk; + unsigned int ret; + + ret = nf_nat_ipv4_fn(priv, skb, state); + + if (ret == NF_ACCEPT && sk && saddr != ip_hdr(skb)->saddr && + !inet_sk_transparent(sk)) + skb_orphan(skb); /* TCP edemux obtained wrong socket */ + + return ret; +} + static unsigned int nf_nat_ipv4_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) @@ -736,7 +753,7 @@ nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb, static const struct nf_hook_ops nf_nat_ipv4_ops[] = { /* Before packet filtering, change destination */ { - .hook = nf_nat_ipv4_in, + .hook = nf_nat_ipv4_pre_routing, .pf = NFPROTO_IPV4, .hooknum = NF_INET_PRE_ROUTING, .priority = NF_IP_PRI_NAT_DST, @@ -757,7 +774,7 @@ static const struct nf_hook_ops nf_nat_ipv4_ops[] = { }, /* After packet filtering, change source */ { - .hook = nf_nat_ipv4_fn, + .hook = nf_nat_ipv4_local_in, .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP_PRI_NAT_SRC, -- GitLab From 07b5a76e18925a595bfef44531dbf2f397bb5507 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 24 Feb 2021 17:23:20 +0100 Subject: [PATCH 057/839] netfilter: conntrack: avoid misleading 'invalid' in log message The packet is not flagged as invalid: conntrack will accept it and its associated with the conntrack entry. This happens e.g. when receiving a retransmitted SYN in SYN_RECV state. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_proto_tcp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 1d7e1c5955463..ec23330687a5e 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -982,8 +982,10 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct, IP_CT_EXP_CHALLENGE_ACK; } spin_unlock_bh(&ct->lock); - nf_ct_l4proto_log_invalid(skb, ct, "invalid packet ignored in " - "state %s ", tcp_conntrack_names[old_state]); + nf_ct_l4proto_log_invalid(skb, ct, + "packet (index %d) in dir %d ignored, state %s", + index, dir, + tcp_conntrack_names[old_state]); return NF_ACCEPT; case TCP_CONNTRACK_MAX: /* Special case for SYN proxy: when the SYN to the server or -- GitLab From c2c16ccba2f55d527dd145a5d8c038694b3b343f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 24 Feb 2021 17:23:21 +0100 Subject: [PATCH 058/839] selftests: netfilter: test nat port clash resolution interaction with tcp early demux Convert Antonio Ojeas bug reproducer to a kselftest. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- tools/testing/selftests/netfilter/Makefile | 2 +- .../selftests/netfilter/nf_nat_edemux.sh | 99 +++++++++++++++++++ 2 files changed, 100 insertions(+), 1 deletion(-) create mode 100755 tools/testing/selftests/netfilter/nf_nat_edemux.sh diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index 3006a8e5b41a1..3171069a6b461 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -4,7 +4,7 @@ TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \ conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \ nft_concat_range.sh nft_conntrack_helper.sh \ - nft_queue.sh nft_meta.sh \ + nft_queue.sh nft_meta.sh nf_nat_edemux.sh \ ipip-conntrack-mtu.sh LDLIBS = -lmnl diff --git a/tools/testing/selftests/netfilter/nf_nat_edemux.sh b/tools/testing/selftests/netfilter/nf_nat_edemux.sh new file mode 100755 index 0000000000000..cfee3b65be0f7 --- /dev/null +++ b/tools/testing/selftests/netfilter/nf_nat_edemux.sh @@ -0,0 +1,99 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test NAT source port clash resolution +# + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +ret=0 + +sfx=$(mktemp -u "XXXXXXXX") +ns1="ns1-$sfx" +ns2="ns2-$sfx" + +cleanup() +{ + ip netns del $ns1 + ip netns del $ns2 +} + +iperf3 -v > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without iperf3" + exit $ksft_skip +fi + +iptables --version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without iptables" + exit $ksft_skip +fi + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +ip netns add "$ns1" +if [ $? -ne 0 ];then + echo "SKIP: Could not create net namespace $ns1" + exit $ksft_skip +fi + +trap cleanup EXIT + +ip netns add $ns2 + +# Connect the namespaces using a veth pair +ip link add name veth2 type veth peer name veth1 +ip link set netns $ns1 dev veth1 +ip link set netns $ns2 dev veth2 + +ip netns exec $ns1 ip link set up dev lo +ip netns exec $ns1 ip link set up dev veth1 +ip netns exec $ns1 ip addr add 192.168.1.1/24 dev veth1 + +ip netns exec $ns2 ip link set up dev lo +ip netns exec $ns2 ip link set up dev veth2 +ip netns exec $ns2 ip addr add 192.168.1.2/24 dev veth2 + +# Create a server in one namespace +ip netns exec $ns1 iperf3 -s > /dev/null 2>&1 & +iperfs=$! + +# Restrict source port to just one so we don't have to exhaust +# all others. +ip netns exec $ns2 sysctl -q net.ipv4.ip_local_port_range="10000 10000" + +# add a virtual IP using DNAT +ip netns exec $ns2 iptables -t nat -A OUTPUT -d 10.96.0.1/32 -p tcp --dport 443 -j DNAT --to-destination 192.168.1.1:5201 + +# ... and route it to the other namespace +ip netns exec $ns2 ip route add 10.96.0.1 via 192.168.1.1 + +sleep 1 + +# add a persistent connection from the other namespace +ip netns exec $ns2 nc -q 10 -w 10 192.168.1.1 5201 > /dev/null & + +sleep 1 + +# ip daddr:dport will be rewritten to 192.168.1.1 5201 +# NAT must reallocate source port 10000 because +# 192.168.1.2:10000 -> 192.168.1.1:5201 is already in use +echo test | ip netns exec $ns2 nc -w 3 -q 3 10.96.0.1 443 >/dev/null +ret=$? + +kill $iperfs + +# Check nc can connect to 10.96.0.1:443 (aka 192.168.1.1:5201). +if [ $ret -eq 0 ]; then + echo "PASS: nc can connect via NAT'd address" +else + echo "FAIL: nc cannot connect via NAT'd address" + exit 1 +fi + +exit 0 -- GitLab From 8e24edddad152b998b37a7f583175137ed2e04a5 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Sat, 27 Feb 2021 11:27:45 +0300 Subject: [PATCH 059/839] netfilter: x_tables: gpf inside xt_find_revision() nested target/match_revfn() calls work with xt[NFPROTO_UNSPEC] lists without taking xt[NFPROTO_UNSPEC].mutex. This can race with module unload and cause host to crash: general protection fault: 0000 [#1] Modules linked in: ... [last unloaded: xt_cluster] CPU: 0 PID: 542455 Comm: iptables RIP: 0010:[] [] strcmp+0x18/0x40 RDX: 0000000000000003 RSI: ffff9a5a5d9abe10 RDI: dead000000000111 R13: ffff9a5a5d9abe10 R14: ffff9a5a5d9abd8c R15: dead000000000100 (VvS: %R15 -- &xt_match, %RDI -- &xt_match.name, xt_cluster unregister match in xt[NFPROTO_UNSPEC].match list) Call Trace: [] match_revfn+0x54/0xc0 [] match_revfn+0xaf/0xc0 [] xt_find_revision+0x6e/0xf0 [] do_ipt_get_ctl+0x100/0x420 [ip_tables] [] nf_getsockopt+0x4f/0x70 [] ip_getsockopt+0xde/0x100 [] raw_getsockopt+0x25/0x50 [] sock_common_getsockopt+0x1a/0x20 [] SyS_getsockopt+0x7d/0xf0 [] system_call_fastpath+0x25/0x2a Fixes: 656caff20e1 ("netfilter 04/09: x_tables: fix match/target revision lookup") Signed-off-by: Vasily Averin Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/x_tables.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index acce622582e3d..bce6ca203d462 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -330,6 +330,7 @@ static int match_revfn(u8 af, const char *name, u8 revision, int *bestp) const struct xt_match *m; int have_rev = 0; + mutex_lock(&xt[af].mutex); list_for_each_entry(m, &xt[af].match, list) { if (strcmp(m->name, name) == 0) { if (m->revision > *bestp) @@ -338,6 +339,7 @@ static int match_revfn(u8 af, const char *name, u8 revision, int *bestp) have_rev = 1; } } + mutex_unlock(&xt[af].mutex); if (af != NFPROTO_UNSPEC && !have_rev) return match_revfn(NFPROTO_UNSPEC, name, revision, bestp); @@ -350,6 +352,7 @@ static int target_revfn(u8 af, const char *name, u8 revision, int *bestp) const struct xt_target *t; int have_rev = 0; + mutex_lock(&xt[af].mutex); list_for_each_entry(t, &xt[af].target, list) { if (strcmp(t->name, name) == 0) { if (t->revision > *bestp) @@ -358,6 +361,7 @@ static int target_revfn(u8 af, const char *name, u8 revision, int *bestp) have_rev = 1; } } + mutex_unlock(&xt[af].mutex); if (af != NFPROTO_UNSPEC && !have_rev) return target_revfn(NFPROTO_UNSPEC, name, revision, bestp); @@ -371,12 +375,10 @@ int xt_find_revision(u8 af, const char *name, u8 revision, int target, { int have_rev, best = -1; - mutex_lock(&xt[af].mutex); if (target == 1) have_rev = target_revfn(af, name, revision, &best); else have_rev = match_revfn(af, name, revision, &best); - mutex_unlock(&xt[af].mutex); /* Nothing at all? Return 0 to try loading module. */ if (best == -1) { -- GitLab From a4fc088ad4ff4a99d01978aa41065132b574b4b2 Mon Sep 17 00:00:00 2001 From: Yinjun Zhang Date: Thu, 25 Feb 2021 13:51:02 +0100 Subject: [PATCH 060/839] ethtool: fix the check logic of at least one channel for RX/TX The command "ethtool -L combined 0" may clean the RX/TX channel count and skip the error path, since the attrs tb[ETHTOOL_A_CHANNELS_RX_COUNT] and tb[ETHTOOL_A_CHANNELS_TX_COUNT] are NULL in this case when recent ethtool is used. Tested using ethtool v5.10. Fixes: 7be92514b99c ("ethtool: check if there is at least one channel for TX/RX in the core") Signed-off-by: Yinjun Zhang Signed-off-by: Simon Horman Signed-off-by: Louis Peens Link: https://lore.kernel.org/r/20210225125102.23989-1-simon.horman@netronome.com Signed-off-by: Jakub Kicinski --- net/ethtool/channels.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/net/ethtool/channels.c b/net/ethtool/channels.c index 25a9e566ef5cd..6a070dc8e4b0d 100644 --- a/net/ethtool/channels.c +++ b/net/ethtool/channels.c @@ -116,10 +116,9 @@ int ethnl_set_channels(struct sk_buff *skb, struct genl_info *info) struct ethtool_channels channels = {}; struct ethnl_req_info req_info = {}; struct nlattr **tb = info->attrs; - const struct nlattr *err_attr; + u32 err_attr, max_rx_in_use = 0; const struct ethtool_ops *ops; struct net_device *dev; - u32 max_rx_in_use = 0; int ret; ret = ethnl_parse_header_dev_get(&req_info, @@ -157,34 +156,35 @@ int ethnl_set_channels(struct sk_buff *skb, struct genl_info *info) /* ensure new channel counts are within limits */ if (channels.rx_count > channels.max_rx) - err_attr = tb[ETHTOOL_A_CHANNELS_RX_COUNT]; + err_attr = ETHTOOL_A_CHANNELS_RX_COUNT; else if (channels.tx_count > channels.max_tx) - err_attr = tb[ETHTOOL_A_CHANNELS_TX_COUNT]; + err_attr = ETHTOOL_A_CHANNELS_TX_COUNT; else if (channels.other_count > channels.max_other) - err_attr = tb[ETHTOOL_A_CHANNELS_OTHER_COUNT]; + err_attr = ETHTOOL_A_CHANNELS_OTHER_COUNT; else if (channels.combined_count > channels.max_combined) - err_attr = tb[ETHTOOL_A_CHANNELS_COMBINED_COUNT]; + err_attr = ETHTOOL_A_CHANNELS_COMBINED_COUNT; else - err_attr = NULL; + err_attr = 0; if (err_attr) { ret = -EINVAL; - NL_SET_ERR_MSG_ATTR(info->extack, err_attr, + NL_SET_ERR_MSG_ATTR(info->extack, tb[err_attr], "requested channel count exceeds maximum"); goto out_ops; } /* ensure there is at least one RX and one TX channel */ if (!channels.combined_count && !channels.rx_count) - err_attr = tb[ETHTOOL_A_CHANNELS_RX_COUNT]; + err_attr = ETHTOOL_A_CHANNELS_RX_COUNT; else if (!channels.combined_count && !channels.tx_count) - err_attr = tb[ETHTOOL_A_CHANNELS_TX_COUNT]; + err_attr = ETHTOOL_A_CHANNELS_TX_COUNT; else - err_attr = NULL; + err_attr = 0; if (err_attr) { if (mod_combined) - err_attr = tb[ETHTOOL_A_CHANNELS_COMBINED_COUNT]; + err_attr = ETHTOOL_A_CHANNELS_COMBINED_COUNT; ret = -EINVAL; - NL_SET_ERR_MSG_ATTR(info->extack, err_attr, "requested channel counts would result in no RX or TX channel being configured"); + NL_SET_ERR_MSG_ATTR(info->extack, tb[err_attr], + "requested channel counts would result in no RX or TX channel being configured"); goto out_ops; } -- GitLab From d313d16bbaea0f11a2e98f04a6c678b43c208915 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Fri, 26 Feb 2021 14:20:38 +0100 Subject: [PATCH 061/839] net: broadcom: bcm4908_enet: enable RX after processing packets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When receiving a lot of packets hardware may run out of free descriptiors and stop RX ring. Enable it every time after handling received packets. Fixes: 4feffeadbcb2 ("net: broadcom: bcm4908enet: add BCM4908 controller driver") Signed-off-by: Rafał Miłecki Acked-by: Florian Fainelli Link: https://lore.kernel.org/r/20210226132038.29849-1-zajec5@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bcm4908_enet.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bcm4908_enet.c b/drivers/net/ethernet/broadcom/bcm4908_enet.c index 0b70e9e0ddad5..98cf82dea3e4a 100644 --- a/drivers/net/ethernet/broadcom/bcm4908_enet.c +++ b/drivers/net/ethernet/broadcom/bcm4908_enet.c @@ -592,6 +592,9 @@ static int bcm4908_enet_poll(struct napi_struct *napi, int weight) bcm4908_enet_intrs_on(enet); } + /* Hardware could disable ring if it run out of descriptors */ + bcm4908_enet_dma_rx_ring_enable(enet, &enet->rx_ring); + return handled; } -- GitLab From 89e5c58fc1e2857ccdaae506fb8bc5fed57ee063 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 26 Feb 2021 22:22:48 +0100 Subject: [PATCH 062/839] net: Fix gro aggregation for udp encaps with zero csum We noticed a GRO issue for UDP-based encaps such as vxlan/geneve when the csum for the UDP header itself is 0. In that case, GRO aggregation does not take place on the phys dev, but instead is deferred to the vxlan/geneve driver (see trace below). The reason is essentially that GRO aggregation bails out in udp_gro_receive() for such case when drivers marked the skb with CHECKSUM_UNNECESSARY (ice, i40e, others) where for non-zero csums 2abb7cdc0dc8 ("udp: Add support for doing checksum unnecessary conversion") promotes those skbs to CHECKSUM_COMPLETE and napi context has csum_valid set. This is however not the case for zero UDP csum (here: csum_cnt is still 0 and csum_valid continues to be false). At the same time 57c67ff4bd92 ("udp: additional GRO support") added matches on !uh->check ^ !uh2->check as part to determine candidates for aggregation, so it certainly is expected to handle zero csums in udp_gro_receive(). The purpose of the check added via 662880f44203 ("net: Allow GRO to use and set levels of checksum unnecessary") seems to catch bad csum and stop aggregation right away. One way to fix aggregation in the zero case is to only perform the !csum_valid check in udp_gro_receive() if uh->check is infact non-zero. Before: [...] swapper 0 [008] 731.946506: net:netif_receive_skb: dev=enp10s0f0 skbaddr=0xffff966497100400 len=1500 (1) swapper 0 [008] 731.946507: net:netif_receive_skb: dev=enp10s0f0 skbaddr=0xffff966497100200 len=1500 swapper 0 [008] 731.946507: net:netif_receive_skb: dev=enp10s0f0 skbaddr=0xffff966497101100 len=1500 swapper 0 [008] 731.946508: net:netif_receive_skb: dev=enp10s0f0 skbaddr=0xffff966497101700 len=1500 swapper 0 [008] 731.946508: net:netif_receive_skb: dev=enp10s0f0 skbaddr=0xffff966497101b00 len=1500 swapper 0 [008] 731.946508: net:netif_receive_skb: dev=enp10s0f0 skbaddr=0xffff966497100600 len=1500 swapper 0 [008] 731.946508: net:netif_receive_skb: dev=enp10s0f0 skbaddr=0xffff966497100f00 len=1500 swapper 0 [008] 731.946509: net:netif_receive_skb: dev=enp10s0f0 skbaddr=0xffff966497100a00 len=1500 swapper 0 [008] 731.946516: net:netif_receive_skb: dev=enp10s0f0 skbaddr=0xffff966497100500 len=1500 swapper 0 [008] 731.946516: net:netif_receive_skb: dev=enp10s0f0 skbaddr=0xffff966497100700 len=1500 swapper 0 [008] 731.946516: net:netif_receive_skb: dev=enp10s0f0 skbaddr=0xffff966497101d00 len=1500 (2) swapper 0 [008] 731.946517: net:netif_receive_skb: dev=enp10s0f0 skbaddr=0xffff966497101000 len=1500 swapper 0 [008] 731.946517: net:netif_receive_skb: dev=enp10s0f0 skbaddr=0xffff966497101c00 len=1500 swapper 0 [008] 731.946517: net:netif_receive_skb: dev=enp10s0f0 skbaddr=0xffff966497101400 len=1500 swapper 0 [008] 731.946518: net:netif_receive_skb: dev=enp10s0f0 skbaddr=0xffff966497100e00 len=1500 swapper 0 [008] 731.946518: net:netif_receive_skb: dev=enp10s0f0 skbaddr=0xffff966497101600 len=1500 swapper 0 [008] 731.946521: net:netif_receive_skb: dev=enp10s0f0 skbaddr=0xffff966497100800 len=774 swapper 0 [008] 731.946530: net:netif_receive_skb: dev=test_vxlan skbaddr=0xffff966497100400 len=14032 (1) swapper 0 [008] 731.946530: net:netif_receive_skb: dev=test_vxlan skbaddr=0xffff966497101d00 len=9112 (2) [...] # netperf -H 10.55.10.4 -t TCP_STREAM -l 20 MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 10.55.10.4 () port 0 AF_INET : demo Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/sec 87380 16384 16384 20.01 13129.24 After: [...] swapper 0 [026] 521.862641: net:netif_receive_skb: dev=enp10s0f0 skbaddr=0xffff93ab0d479000 len=11286 (1) swapper 0 [026] 521.862643: net:netif_receive_skb: dev=test_vxlan skbaddr=0xffff93ab0d479000 len=11236 (1) swapper 0 [026] 521.862650: net:netif_receive_skb: dev=enp10s0f0 skbaddr=0xffff93ab0d478500 len=2898 (2) swapper 0 [026] 521.862650: net:netif_receive_skb: dev=enp10s0f0 skbaddr=0xffff93ab0d479f00 len=8490 (3) swapper 0 [026] 521.862653: net:netif_receive_skb: dev=test_vxlan skbaddr=0xffff93ab0d478500 len=2848 (2) swapper 0 [026] 521.862653: net:netif_receive_skb: dev=test_vxlan skbaddr=0xffff93ab0d479f00 len=8440 (3) [...] # netperf -H 10.55.10.4 -t TCP_STREAM -l 20 MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 10.55.10.4 () port 0 AF_INET : demo Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/sec 87380 16384 16384 20.01 24576.53 Fixes: 57c67ff4bd92 ("udp: additional GRO support") Fixes: 662880f44203 ("net: Allow GRO to use and set levels of checksum unnecessary") Signed-off-by: Daniel Borkmann Cc: Eric Dumazet Cc: Jesse Brandeburg Cc: Tom Herbert Acked-by: Willem de Bruijn Acked-by: John Fastabend Link: https://lore.kernel.org/r/20210226212248.8300-1-daniel@iogearbox.net Signed-off-by: Jakub Kicinski --- net/ipv4/udp_offload.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index b76c48efd37ee..c5b4b586570fe 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -526,7 +526,7 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, } if (!sk || NAPI_GRO_CB(skb)->encap_mark || - (skb->ip_summed != CHECKSUM_PARTIAL && + (uh->check && skb->ip_summed != CHECKSUM_PARTIAL && NAPI_GRO_CB(skb)->csum_cnt == 0 && !NAPI_GRO_CB(skb)->csum_valid) || !udp_sk(sk)->gro_receive) -- GitLab From ae85ddda0f1b341b2d25f5a5e0eff1d42b6ef3df Mon Sep 17 00:00:00 2001 From: Jian Shen Date: Sat, 27 Feb 2021 15:24:51 +0800 Subject: [PATCH 063/839] net: hns3: fix error mask definition of flow director Currently, some bit filed definitions of flow director TCAM configuration command are incorrect. Since the wrong MSB is always 0, and these fields are assgined in order, so it still works. Fix it by redefine them. Fixes: 117328680288 ("net: hns3: Add input key and action config support for flow director") Signed-off-by: Jian Shen Signed-off-by: Huazhong Tan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h index ff52a65b4cffb..057dda7354924 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h @@ -1053,16 +1053,16 @@ struct hclge_fd_tcam_config_3_cmd { #define HCLGE_FD_AD_DROP_B 0 #define HCLGE_FD_AD_DIRECT_QID_B 1 #define HCLGE_FD_AD_QID_S 2 -#define HCLGE_FD_AD_QID_M GENMASK(12, 2) +#define HCLGE_FD_AD_QID_M GENMASK(11, 2) #define HCLGE_FD_AD_USE_COUNTER_B 12 #define HCLGE_FD_AD_COUNTER_NUM_S 13 #define HCLGE_FD_AD_COUNTER_NUM_M GENMASK(20, 13) #define HCLGE_FD_AD_NXT_STEP_B 20 #define HCLGE_FD_AD_NXT_KEY_S 21 -#define HCLGE_FD_AD_NXT_KEY_M GENMASK(26, 21) +#define HCLGE_FD_AD_NXT_KEY_M GENMASK(25, 21) #define HCLGE_FD_AD_WR_RULE_ID_B 0 #define HCLGE_FD_AD_RULE_ID_S 1 -#define HCLGE_FD_AD_RULE_ID_M GENMASK(13, 1) +#define HCLGE_FD_AD_RULE_ID_M GENMASK(12, 1) #define HCLGE_FD_AD_TC_OVRD_B 16 #define HCLGE_FD_AD_TC_SIZE_S 17 #define HCLGE_FD_AD_TC_SIZE_M GENMASK(20, 17) -- GitLab From c75ec148a316e8cf52274d16b9b422703b96f5ce Mon Sep 17 00:00:00 2001 From: Jian Shen Date: Sat, 27 Feb 2021 15:24:52 +0800 Subject: [PATCH 064/839] net: hns3: fix query vlan mask value error for flow director Currently, the driver returns VLAN_VID_MASK for vlan mask field, when get flow director rule information for rule doesn't use vlan. It may cause the vlan mask value display as 0xf000 in this case, like below: estuary:/$ ethtool -u eth1 50 RX rings available Total 1 rules Filter: 2 Rule Type: TCP over IPv4 Src IP addr: 0.0.0.0 mask: 255.255.255.255 Dest IP addr: 0.0.0.0 mask: 255.255.255.255 TOS: 0x0 mask: 0xff Src port: 0 mask: 0xffff Dest port: 0 mask: 0xffff VLAN EtherType: 0x0 mask: 0xffff VLAN: 0x0 mask: 0xf000 User-defined: 0x1234 mask: 0x0 Action: Direct to queue 3 Fix it by return 0. Fixes: 05c2314fe6a8 ("net: hns3: Add support for rule query of flow director") Signed-off-by: Jian Shen Signed-off-by: Huazhong Tan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 34b744df67096..932cfd1fb7e93 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -6330,8 +6330,7 @@ static void hclge_fd_get_ext_info(struct ethtool_rx_flow_spec *fs, fs->h_ext.vlan_tci = cpu_to_be16(rule->tuples.vlan_tag1); fs->m_ext.vlan_tci = rule->unused_tuple & BIT(INNER_VLAN_TAG_FST) ? - cpu_to_be16(VLAN_VID_MASK) : - cpu_to_be16(rule->tuples_mask.vlan_tag1); + 0 : cpu_to_be16(rule->tuples_mask.vlan_tag1); } if (fs->flow_type & FLOW_MAC_EXT) { -- GitLab From b36fc875bcdee56865c444a2cdae17d354a6d5f5 Mon Sep 17 00:00:00 2001 From: Jian Shen Date: Sat, 27 Feb 2021 15:24:53 +0800 Subject: [PATCH 065/839] net: hns3: fix bug when calculating the TCAM table info The function hclge_fd_convert_tuple() is used to convert tuples and tuples mask to TCAM x and y. But it misuses the source mac as source mac mask when convert INNER_SRC_MAC, which may cause the flow director rule works unexpectedly. So fix it. Fixes: 117328680288 ("net: hns3: Add input key and action config support for flow director") Signed-off-by: Jian Shen Signed-off-by: Huazhong Tan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 932cfd1fb7e93..e3f81c7e0ce74 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -5245,9 +5245,9 @@ static bool hclge_fd_convert_tuple(u32 tuple_bit, u8 *key_x, u8 *key_y, case BIT(INNER_SRC_MAC): for (i = 0; i < ETH_ALEN; i++) { calc_x(key_x[ETH_ALEN - 1 - i], rule->tuples.src_mac[i], - rule->tuples.src_mac[i]); + rule->tuples_mask.src_mac[i]); calc_y(key_y[ETH_ALEN - 1 - i], rule->tuples.src_mac[i], - rule->tuples.src_mac[i]); + rule->tuples_mask.src_mac[i]); } return true; -- GitLab From eead089311f4d935ab5d1d8fbb0c42ad44699ada Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 18 Feb 2021 23:30:58 +1100 Subject: [PATCH 066/839] powerpc/4xx: Fix build errors from mfdcr() lkp reported a build error in fsp2.o: CC arch/powerpc/platforms/44x/fsp2.o {standard input}:577: Error: unsupported relocation against base Which comes from: pr_err("GESR0: 0x%08x\n", mfdcr(base + PLB4OPB_GESR0)); Where our mfdcr() macro is stringifying "base + PLB4OPB_GESR0", and passing that to the assembler, which obviously doesn't work. The mfdcr() macro already checks that the argument is constant using __builtin_constant_p(), and if not calls the out-of-line version of mfdcr(). But in this case GCC is smart enough to notice that "base + PLB4OPB_GESR0" will be constant, even though it's not something we can immediately stringify into a register number. Segher pointed out that passing the register number to the inline asm as a constant would be better, and in fact it fixes the build error, presumably because it gives GCC a chance to resolve the value. While we're at it, change mtdcr() similarly. Reported-by: kernel test robot Suggested-by: Segher Boessenkool Signed-off-by: Michael Ellerman Acked-by: Feng Tang Link: https://lore.kernel.org/r/20210218123058.748882-1-mpe@ellerman.id.au --- arch/powerpc/include/asm/dcr-native.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/include/asm/dcr-native.h b/arch/powerpc/include/asm/dcr-native.h index 7141ccea8c94e..a92059964579b 100644 --- a/arch/powerpc/include/asm/dcr-native.h +++ b/arch/powerpc/include/asm/dcr-native.h @@ -53,8 +53,8 @@ static inline void mtdcrx(unsigned int reg, unsigned int val) #define mfdcr(rn) \ ({unsigned int rval; \ if (__builtin_constant_p(rn) && rn < 1024) \ - asm volatile("mfdcr %0," __stringify(rn) \ - : "=r" (rval)); \ + asm volatile("mfdcr %0, %1" : "=r" (rval) \ + : "n" (rn)); \ else if (likely(cpu_has_feature(CPU_FTR_INDEXED_DCR))) \ rval = mfdcrx(rn); \ else \ @@ -64,8 +64,8 @@ static inline void mtdcrx(unsigned int reg, unsigned int val) #define mtdcr(rn, v) \ do { \ if (__builtin_constant_p(rn) && rn < 1024) \ - asm volatile("mtdcr " __stringify(rn) ",%0" \ - : : "r" (v)); \ + asm volatile("mtdcr %0, %1" \ + : : "n" (rn), "r" (v)); \ else if (likely(cpu_has_feature(CPU_FTR_INDEXED_DCR))) \ mtdcrx(rn, v); \ else \ -- GitLab From f9619d5e5174867536b7e558683bc4408eab833f Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Mon, 15 Feb 2021 10:45:06 +0100 Subject: [PATCH 067/839] powerpc/pseries: Don't enforce MSI affinity with kdump MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Depending on the number of online CPUs in the original kernel, it is likely for CPU #0 to be offline in a kdump kernel. The associated IRQs in the affinity mappings provided by irq_create_affinity_masks() are thus not started by irq_startup(), as per-design with managed IRQs. This can be a problem with multi-queue block devices driven by blk-mq : such a non-started IRQ is very likely paired with the single queue enforced by blk-mq during kdump (see blk_mq_alloc_tag_set()). This causes the device to remain silent and likely hangs the guest at some point. This is a regression caused by commit 9ea69a55b3b9 ("powerpc/pseries: Pass MSI affinity to irq_create_mapping()"). Note that this only happens with the XIVE interrupt controller because XICS has a workaround to bypass affinity, which is activated during kdump with the "noirqdistrib" kernel parameter. The issue comes from a combination of factors: - discrepancy between the number of queues detected by the multi-queue block driver, that was used to create the MSI vectors, and the single queue mode enforced later on by blk-mq because of kdump (i.e. keeping all queues fixes the issue) - CPU#0 offline (i.e. kdump always succeed with CPU#0) Given that I couldn't reproduce on x86, which seems to always have CPU#0 online even during kdump, I'm not sure where this should be fixed. Hence going for another approach : fine-grained affinity is for performance and we don't really care about that during kdump. Simply revert to the previous working behavior of ignoring affinity masks in this case only. Fixes: 9ea69a55b3b9 ("powerpc/pseries: Pass MSI affinity to irq_create_mapping()") Cc: stable@vger.kernel.org # v5.10+ Signed-off-by: Greg Kurz Reviewed-by: Laurent Vivier Reviewed-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210215094506.1196119-1-groug@kaod.org --- arch/powerpc/platforms/pseries/msi.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index b3ac2455faadc..637300330507f 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -4,6 +4,7 @@ * Copyright 2006-2007 Michael Ellerman, IBM Corp. */ +#include #include #include #include @@ -458,8 +459,28 @@ again: return hwirq; } - virq = irq_create_mapping_affinity(NULL, hwirq, - entry->affinity); + /* + * Depending on the number of online CPUs in the original + * kernel, it is likely for CPU #0 to be offline in a kdump + * kernel. The associated IRQs in the affinity mappings + * provided by irq_create_affinity_masks() are thus not + * started by irq_startup(), as per-design for managed IRQs. + * This can be a problem with multi-queue block devices driven + * by blk-mq : such a non-started IRQ is very likely paired + * with the single queue enforced by blk-mq during kdump (see + * blk_mq_alloc_tag_set()). This causes the device to remain + * silent and likely hangs the guest at some point. + * + * We don't really care for fine-grained affinity when doing + * kdump actually : simply ignore the pre-computed affinity + * masks in this case and let the default mask with all CPUs + * be used when creating the IRQ mappings. + */ + if (is_kdump_kernel()) + virq = irq_create_mapping(NULL, hwirq); + else + virq = irq_create_mapping_affinity(NULL, hwirq, + entry->affinity); if (!virq) { pr_debug("rtas_msi: Failed mapping hwirq %d\n", hwirq); -- GitLab From c119565a15a628efdfa51352f9f6c5186e506a1c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 1 Feb 2021 06:29:50 +0000 Subject: [PATCH 068/839] powerpc/603: Fix protection of user pages mapped with PROT_NONE On book3s/32, page protection is defined by the PP bits in the PTE which provide the following protection depending on the access keys defined in the matching segment register: - PP 00 means RW with key 0 and N/A with key 1. - PP 01 means RW with key 0 and RO with key 1. - PP 10 means RW with both key 0 and key 1. - PP 11 means RO with both key 0 and key 1. Since the implementation of kernel userspace access protection, PP bits have been set as follows: - PP00 for pages without _PAGE_USER - PP01 for pages with _PAGE_USER and _PAGE_RW - PP11 for pages with _PAGE_USER and without _PAGE_RW For kernelspace segments, kernel accesses are performed with key 0 and user accesses are performed with key 1. As PP00 is used for non _PAGE_USER pages, user can't access kernel pages not flagged _PAGE_USER while kernel can. For userspace segments, both kernel and user accesses are performed with key 0, therefore pages not flagged _PAGE_USER are still accessible to the user. This shouldn't be an issue, because userspace is expected to be accessible to the user. But unlike most other architectures, powerpc implements PROT_NONE protection by removing _PAGE_USER flag instead of flagging the page as not valid. This means that pages in userspace that are not flagged _PAGE_USER shall remain inaccessible. To get the expected behaviour, just mimic other architectures in the TLB miss handler by checking _PAGE_USER permission on userspace accesses as if it was the _PAGE_PRESENT bit. Note that this problem only is only for 603 cores. The 604+ have an hash table, and hash_page() function already implement the verification of _PAGE_USER permission on userspace pages. Fixes: f342adca3afc ("powerpc/32s: Prepare Kernel Userspace Access Protection") Cc: stable@vger.kernel.org # v5.2+ Reported-by: Christoph Plattner Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/4a0c6e3bb8f0c162457bf54d9bc6fd8d7b55129f.1612160907.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_book3s_32.S | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 727fdab557c9b..565e84e20a721 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -457,11 +457,12 @@ InstructionTLBMiss: cmplw 0,r1,r3 #endif mfspr r2, SPRN_SDR1 - li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC + li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC | _PAGE_USER rlwinm r2, r2, 28, 0xfffff000 #ifdef CONFIG_MODULES bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ + li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ #endif 112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ @@ -520,10 +521,11 @@ DataLoadTLBMiss: lis r1, TASK_SIZE@h /* check if kernel address */ cmplw 0,r1,r3 mfspr r2, SPRN_SDR1 - li r1, _PAGE_PRESENT | _PAGE_ACCESSED + li r1, _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_USER rlwinm r2, r2, 28, 0xfffff000 bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ + li r1, _PAGE_PRESENT | _PAGE_ACCESSED addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ 112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ lwz r2,0(r2) /* get pmd entry */ @@ -597,10 +599,11 @@ DataStoreTLBMiss: lis r1, TASK_SIZE@h /* check if kernel address */ cmplw 0,r1,r3 mfspr r2, SPRN_SDR1 - li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED + li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_USER rlwinm r2, r2, 28, 0xfffff000 bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ + li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ 112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ lwz r2,0(r2) /* get pmd entry */ -- GitLab From 91b6c5dbe9e072dbdb181eed89c5c824e92ac0f5 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 24 Feb 2021 06:34:22 +0000 Subject: [PATCH 069/839] powerpc/syscall: Force inlining of __prep_irq_for_enabled_exit() As reported by kernel test robot, a randconfig with high amount of debuging options can lead to build failure for undefined reference to replay_soft_interrupts() on ppc32. This is due to gcc not seeing that __prep_irq_for_enabled_exit() always returns true on ppc32 because it doesn't inline it for some reason. Force inlining of __prep_irq_for_enabled_exit() to fix the build. Fixes: 344bb20b159d ("powerpc/syscall: Make interrupt.c buildable on PPC32") Reported-by: kernel test robot Signed-off-by: Christophe Leroy Acked-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/53f3a1f719441761000c41154602bf097d4350b5.1614148356.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/interrupt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index 398cd86b6ada1..2ef3c4051bb93 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -149,7 +149,7 @@ notrace long system_call_exception(long r3, long r4, long r5, * enabled when the interrupt handler returns (indicating a process-context / * synchronous interrupt) then irqs_enabled should be true. */ -static notrace inline bool __prep_irq_for_enabled_exit(bool clear_ri) +static notrace __always_inline bool __prep_irq_for_enabled_exit(bool clear_ri) { /* This must be done with RI=1 because tracing may touch vmaps */ trace_hardirqs_on(); -- GitLab From 449052cfebf624b670faa040245d3feed770d22f Mon Sep 17 00:00:00 2001 From: Joakim Zhang Date: Thu, 18 Feb 2021 19:00:35 +0800 Subject: [PATCH 070/839] can: flexcan: assert FRZ bit in flexcan_chip_freeze() Assert HALT bit to enter freeze mode, there is a premise that FRZ bit is asserted. This patch asserts FRZ bit in flexcan_chip_freeze, although the reset value is 1b'1. This is a prepare patch, later patch will invoke flexcan_chip_freeze() to enter freeze mode, which polling freeze mode acknowledge. Fixes: b1aa1c7a2165b ("can: flexcan: fix transition from and to freeze mode in chip_{,un}freeze") Link: https://lore.kernel.org/r/20210218110037.16591-2-qiangqing.zhang@nxp.com Signed-off-by: Joakim Zhang Signed-off-by: Marc Kleine-Budde --- drivers/net/can/flexcan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index 971ada36e37fa..ee2d4967d66ac 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -701,7 +701,7 @@ static int flexcan_chip_freeze(struct flexcan_priv *priv) u32 reg; reg = priv->read(®s->mcr); - reg |= FLEXCAN_MCR_HALT; + reg |= FLEXCAN_MCR_FRZ | FLEXCAN_MCR_HALT; priv->write(reg, ®s->mcr); while (timeout-- && !(priv->read(®s->mcr) & FLEXCAN_MCR_FRZ_ACK)) -- GitLab From ec15e27cc8904605846a354bb1f808ea1432f853 Mon Sep 17 00:00:00 2001 From: Joakim Zhang Date: Thu, 18 Feb 2021 19:00:36 +0800 Subject: [PATCH 071/839] can: flexcan: enable RX FIFO after FRZ/HALT valid RX FIFO enable failed could happen when do system reboot stress test: [ 0.303958] flexcan 5a8d0000.can: 5a8d0000.can supply xceiver not found, using dummy regulator [ 0.304281] flexcan 5a8d0000.can (unnamed net_device) (uninitialized): Could not enable RX FIFO, unsupported core [ 0.314640] flexcan 5a8d0000.can: registering netdev failed [ 0.320728] flexcan 5a8e0000.can: 5a8e0000.can supply xceiver not found, using dummy regulator [ 0.320991] flexcan 5a8e0000.can (unnamed net_device) (uninitialized): Could not enable RX FIFO, unsupported core [ 0.331360] flexcan 5a8e0000.can: registering netdev failed [ 0.337444] flexcan 5a8f0000.can: 5a8f0000.can supply xceiver not found, using dummy regulator [ 0.337716] flexcan 5a8f0000.can (unnamed net_device) (uninitialized): Could not enable RX FIFO, unsupported core [ 0.348117] flexcan 5a8f0000.can: registering netdev failed RX FIFO should be enabled after the FRZ/HALT are valid. But the current code enable RX FIFO and FRZ/HALT at the same time. Fixes: e955cead03117 ("CAN: Add Flexcan CAN controller driver") Link: https://lore.kernel.org/r/20210218110037.16591-3-qiangqing.zhang@nxp.com Signed-off-by: Joakim Zhang Signed-off-by: Marc Kleine-Budde --- drivers/net/can/flexcan.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index ee2d4967d66ac..e66a51dbea0a2 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -1865,10 +1865,14 @@ static int register_flexcandev(struct net_device *dev) if (err) goto out_chip_disable; - /* set freeze, halt and activate FIFO, restrict register access */ + /* set freeze, halt */ + err = flexcan_chip_freeze(priv); + if (err) + goto out_chip_disable; + + /* activate FIFO, restrict register access */ reg = priv->read(®s->mcr); - reg |= FLEXCAN_MCR_FRZ | FLEXCAN_MCR_HALT | - FLEXCAN_MCR_FEN | FLEXCAN_MCR_SUPV; + reg |= FLEXCAN_MCR_FEN | FLEXCAN_MCR_SUPV; priv->write(reg, ®s->mcr); /* Currently we only support newer versions of this core -- GitLab From c63820045e2000f05657467a08715c18c9f490d9 Mon Sep 17 00:00:00 2001 From: Joakim Zhang Date: Thu, 18 Feb 2021 19:00:37 +0800 Subject: [PATCH 072/839] can: flexcan: invoke flexcan_chip_freeze() to enter freeze mode Invoke flexcan_chip_freeze() to enter freeze mode, since need poll freeze mode acknowledge. Fixes: e955cead03117 ("CAN: Add Flexcan CAN controller driver") Link: https://lore.kernel.org/r/20210218110037.16591-4-qiangqing.zhang@nxp.com Signed-off-by: Joakim Zhang Signed-off-by: Marc Kleine-Budde --- drivers/net/can/flexcan.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index e66a51dbea0a2..134c05757a3bb 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -1480,10 +1480,13 @@ static int flexcan_chip_start(struct net_device *dev) flexcan_set_bittiming(dev); + /* set freeze, halt */ + err = flexcan_chip_freeze(priv); + if (err) + goto out_chip_disable; + /* MCR * - * enable freeze - * halt now * only supervisor access * enable warning int * enable individual RX masking @@ -1492,9 +1495,8 @@ static int flexcan_chip_start(struct net_device *dev) */ reg_mcr = priv->read(®s->mcr); reg_mcr &= ~FLEXCAN_MCR_MAXMB(0xff); - reg_mcr |= FLEXCAN_MCR_FRZ | FLEXCAN_MCR_HALT | FLEXCAN_MCR_SUPV | - FLEXCAN_MCR_WRN_EN | FLEXCAN_MCR_IRMQ | FLEXCAN_MCR_IDAM_C | - FLEXCAN_MCR_MAXMB(priv->tx_mb_idx); + reg_mcr |= FLEXCAN_MCR_SUPV | FLEXCAN_MCR_WRN_EN | FLEXCAN_MCR_IRMQ | + FLEXCAN_MCR_IDAM_C | FLEXCAN_MCR_MAXMB(priv->tx_mb_idx); /* MCR * -- GitLab From 2afe72ead5ab672c8012bda83cbe65f8145568e0 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Mon, 22 Feb 2021 20:46:06 +0100 Subject: [PATCH 073/839] can: mcp251xfd: revert "can: mcp251xfd: add BQL support" In the following 4 patches | 99842c9685ab can: dev: can_rx_offload_get_echo_skb(): extend to return can frame length | 9420e1d495e2 can: dev: can_get_echo_skb(): extend to return can frame length | 1dcb6e57db83 can: dev: can_put_echo_skb(): extend to handle frame_len | f0ef72febc9a can: dev: extend struct can_skb_priv to hold CAN frame length the CAN echo SKB support was extended to hold the CAN frame length (which is the length of the CAN frame on the wire). It is meant as a helper for BQL support, to avoid the re-calculation of the frame length before sending it and on TX-completion. However if the CAN frame is send without the request to be looped back the SKB is discarded in can_put_echo_skb() and the subsequent can_get_echo_skb() and can_rx_offload_get_echo_skb() return 0 for the CAN frame length. This results in BQL stalling the TX queue after a few packages. Until the BQL helpers can_get_echo_skb() and can_rx_offload_get_echo_skb() are fixed, revert the BQL support for the mcp251xfd driver. This reverts commit 4162e18e949ba520d5116ac0323500355479a00e. Fixes: 4162e18e949b ("can: mcp251xfd: add BQL support") Cc: Manivannan Sadhasivam Cc: Thomas Kopp Link: https://lore.kernel.org/r/20210228083347.28580-1-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- .../net/can/spi/mcp251xfd/mcp251xfd-core.c | 21 ++++--------------- 1 file changed, 4 insertions(+), 17 deletions(-) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index 3c5b92911d469..799e9d5d34813 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -335,8 +335,6 @@ static void mcp251xfd_ring_init(struct mcp251xfd_priv *priv) u8 len; int i, j; - netdev_reset_queue(priv->ndev); - /* TEF */ tef_ring = priv->tef; tef_ring->head = 0; @@ -1249,8 +1247,7 @@ mcp251xfd_handle_tefif_recover(const struct mcp251xfd_priv *priv, const u32 seq) static int mcp251xfd_handle_tefif_one(struct mcp251xfd_priv *priv, - const struct mcp251xfd_hw_tef_obj *hw_tef_obj, - unsigned int *frame_len_ptr) + const struct mcp251xfd_hw_tef_obj *hw_tef_obj) { struct net_device_stats *stats = &priv->ndev->stats; u32 seq, seq_masked, tef_tail_masked; @@ -1272,8 +1269,7 @@ mcp251xfd_handle_tefif_one(struct mcp251xfd_priv *priv, stats->tx_bytes += can_rx_offload_get_echo_skb(&priv->offload, mcp251xfd_get_tef_tail(priv), - hw_tef_obj->ts, - frame_len_ptr); + hw_tef_obj->ts, NULL); stats->tx_packets++; priv->tef->tail++; @@ -1331,7 +1327,6 @@ mcp251xfd_tef_obj_read(const struct mcp251xfd_priv *priv, static int mcp251xfd_handle_tefif(struct mcp251xfd_priv *priv) { struct mcp251xfd_hw_tef_obj hw_tef_obj[MCP251XFD_TX_OBJ_NUM_MAX]; - unsigned int total_frame_len = 0; u8 tef_tail, len, l; int err, i; @@ -1353,9 +1348,7 @@ static int mcp251xfd_handle_tefif(struct mcp251xfd_priv *priv) } for (i = 0; i < len; i++) { - unsigned int frame_len; - - err = mcp251xfd_handle_tefif_one(priv, &hw_tef_obj[i], &frame_len); + err = mcp251xfd_handle_tefif_one(priv, &hw_tef_obj[i]); /* -EAGAIN means the Sequence Number in the TEF * doesn't match our tef_tail. This can happen if we * read the TEF objects too early. Leave loop let the @@ -1365,8 +1358,6 @@ static int mcp251xfd_handle_tefif(struct mcp251xfd_priv *priv) goto out_netif_wake_queue; if (err) return err; - - total_frame_len += frame_len; } out_netif_wake_queue: @@ -1397,7 +1388,6 @@ static int mcp251xfd_handle_tefif(struct mcp251xfd_priv *priv) return err; tx_ring->tail += len; - netdev_completed_queue(priv->ndev, len, total_frame_len); err = mcp251xfd_check_tef_tail(priv); if (err) @@ -2443,7 +2433,6 @@ static netdev_tx_t mcp251xfd_start_xmit(struct sk_buff *skb, struct mcp251xfd_priv *priv = netdev_priv(ndev); struct mcp251xfd_tx_ring *tx_ring = priv->tx; struct mcp251xfd_tx_obj *tx_obj; - unsigned int frame_len; u8 tx_head; int err; @@ -2462,9 +2451,7 @@ static netdev_tx_t mcp251xfd_start_xmit(struct sk_buff *skb, if (mcp251xfd_get_tx_free(tx_ring) == 0) netif_stop_queue(ndev); - frame_len = can_skb_get_frame_len(skb); - can_put_echo_skb(skb, ndev, tx_head, frame_len); - netdev_sent_queue(priv->ndev, frame_len); + can_put_echo_skb(skb, ndev, tx_head, 0); err = mcp251xfd_tx_obj_write(priv, tx_obj); if (err) -- GitLab From e940e0895a82c6fbaa259f2615eb52b57ee91a7e Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Fri, 26 Feb 2021 10:24:56 +0100 Subject: [PATCH 074/839] can: skb: can_skb_set_owner(): fix ref counting if socket was closed before setting skb ownership There are two ref count variables controlling the free()ing of a socket: - struct sock::sk_refcnt - which is changed by sock_hold()/sock_put() - struct sock::sk_wmem_alloc - which accounts the memory allocated by the skbs in the send path. In case there are still TX skbs on the fly and the socket() is closed, the struct sock::sk_refcnt reaches 0. In the TX-path the CAN stack clones an "echo" skb, calls sock_hold() on the original socket and references it. This produces the following back trace: | WARNING: CPU: 0 PID: 280 at lib/refcount.c:25 refcount_warn_saturate+0x114/0x134 | refcount_t: addition on 0; use-after-free. | Modules linked in: coda_vpu(E) v4l2_jpeg(E) videobuf2_vmalloc(E) imx_vdoa(E) | CPU: 0 PID: 280 Comm: test_can.sh Tainted: G E 5.11.0-04577-gf8ff6603c617 #203 | Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree) | Backtrace: | [<80bafea4>] (dump_backtrace) from [<80bb0280>] (show_stack+0x20/0x24) r7:00000000 r6:600f0113 r5:00000000 r4:81441220 | [<80bb0260>] (show_stack) from [<80bb593c>] (dump_stack+0xa0/0xc8) | [<80bb589c>] (dump_stack) from [<8012b268>] (__warn+0xd4/0x114) r9:00000019 r8:80f4a8c2 r7:83e4150c r6:00000000 r5:00000009 r4:80528f90 | [<8012b194>] (__warn) from [<80bb09c4>] (warn_slowpath_fmt+0x88/0xc8) r9:83f26400 r8:80f4a8d1 r7:00000009 r6:80528f90 r5:00000019 r4:80f4a8c2 | [<80bb0940>] (warn_slowpath_fmt) from [<80528f90>] (refcount_warn_saturate+0x114/0x134) r8:00000000 r7:00000000 r6:82b44000 r5:834e5600 r4:83f4d540 | [<80528e7c>] (refcount_warn_saturate) from [<8079a4c8>] (__refcount_add.constprop.0+0x4c/0x50) | [<8079a47c>] (__refcount_add.constprop.0) from [<8079a57c>] (can_put_echo_skb+0xb0/0x13c) | [<8079a4cc>] (can_put_echo_skb) from [<8079ba98>] (flexcan_start_xmit+0x1c4/0x230) r9:00000010 r8:83f48610 r7:0fdc0000 r6:0c080000 r5:82b44000 r4:834e5600 | [<8079b8d4>] (flexcan_start_xmit) from [<80969078>] (netdev_start_xmit+0x44/0x70) r9:814c0ba0 r8:80c8790c r7:00000000 r6:834e5600 r5:82b44000 r4:82ab1f00 | [<80969034>] (netdev_start_xmit) from [<809725a4>] (dev_hard_start_xmit+0x19c/0x318) r9:814c0ba0 r8:00000000 r7:82ab1f00 r6:82b44000 r5:00000000 r4:834e5600 | [<80972408>] (dev_hard_start_xmit) from [<809c6584>] (sch_direct_xmit+0xcc/0x264) r10:834e5600 r9:00000000 r8:00000000 r7:82b44000 r6:82ab1f00 r5:834e5600 r4:83f27400 | [<809c64b8>] (sch_direct_xmit) from [<809c6c0c>] (__qdisc_run+0x4f0/0x534) To fix this problem, only set skb ownership to sockets which have still a ref count > 0. Fixes: 0ae89beb283a ("can: add destructor for self generated skbs") Cc: Oliver Hartkopp Cc: Andre Naujoks Link: https://lore.kernel.org/r/20210226092456.27126-1-o.rempel@pengutronix.de Suggested-by: Eric Dumazet Signed-off-by: Oleksij Rempel Reviewed-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- include/linux/can/skb.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h index 685f34cfba207..d438eb058069b 100644 --- a/include/linux/can/skb.h +++ b/include/linux/can/skb.h @@ -65,8 +65,12 @@ static inline void can_skb_reserve(struct sk_buff *skb) static inline void can_skb_set_owner(struct sk_buff *skb, struct sock *sk) { - if (sk) { - sock_hold(sk); + /* If the socket has already been closed by user space, the + * refcount may already be 0 (and the socket will be freed + * after the last TX skb has been freed). So only increase + * socket refcount if the refcount is > 0. + */ + if (sk && refcount_inc_not_zero(&sk->sk_refcnt)) { skb->destructor = sock_efree; skb->sk = sk; } -- GitLab From 2712625200ed69c642b9abc3a403830c4643364c Mon Sep 17 00:00:00 2001 From: Torin Cooper-Bennun Date: Fri, 26 Feb 2021 16:34:41 +0000 Subject: [PATCH 075/839] can: tcan4x5x: tcan4x5x_init(): fix initialization - clear MRAM before entering Normal Mode This patch prevents a potentially destructive race condition. The device is fully operational on the bus after entering Normal Mode, so zeroing the MRAM after entering this mode may lead to loss of information, e.g. new received messages. This patch fixes the problem by first initializing the MRAM, then bringing the device into Normale Mode. Fixes: 5443c226ba91 ("can: tcan4x5x: Add tcan4x5x driver to the kernel") Link: https://lore.kernel.org/r/20210226163440.313628-1-torin@maxiluxsystems.com Suggested-by: Marc Kleine-Budde Signed-off-by: Torin Cooper-Bennun Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/tcan4x5x-core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/can/m_can/tcan4x5x-core.c b/drivers/net/can/m_can/tcan4x5x-core.c index b7caec769ddb9..4147cecfbbd6d 100644 --- a/drivers/net/can/m_can/tcan4x5x-core.c +++ b/drivers/net/can/m_can/tcan4x5x-core.c @@ -237,14 +237,14 @@ static int tcan4x5x_init(struct m_can_classdev *cdev) if (ret) return ret; + /* Zero out the MCAN buffers */ + m_can_init_ram(cdev); + ret = regmap_update_bits(tcan4x5x->regmap, TCAN4X5X_CONFIG, TCAN4X5X_MODE_SEL_MASK, TCAN4X5X_MODE_NORMAL); if (ret) return ret; - /* Zero out the MCAN buffers */ - m_can_init_ram(cdev); - return ret; } -- GitLab From 73f476aa1975bae6a792b340f5b26ffcfba869a6 Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Fri, 26 Feb 2021 17:30:20 +0200 Subject: [PATCH 076/839] net: phy: ti: take into account all possible interrupt sources The previous implementation of .handle_interrupt() did not take into account the fact that all the interrupt status registers should be acknowledged since multiple interrupt sources could be asserted. Fix this by reading all the status registers before exiting with IRQ_NONE or triggering the PHY state machine. Fixes: 1d1ae3c6ca3f ("net: phy: ti: implement generic .handle_interrupt() callback") Reported-by: Sven Schuchmann Signed-off-by: Ioana Ciornei Link: https://lore.kernel.org/r/20210226153020.867852-1-ciorneiioana@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/dp83822.c | 9 +++++---- drivers/net/phy/dp83tc811.c | 11 ++++++----- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c index be1224b4447b4..f7a2ec150e542 100644 --- a/drivers/net/phy/dp83822.c +++ b/drivers/net/phy/dp83822.c @@ -290,6 +290,7 @@ static int dp83822_config_intr(struct phy_device *phydev) static irqreturn_t dp83822_handle_interrupt(struct phy_device *phydev) { + bool trigger_machine = false; int irq_status; /* The MISR1 and MISR2 registers are holding the interrupt status in @@ -305,7 +306,7 @@ static irqreturn_t dp83822_handle_interrupt(struct phy_device *phydev) return IRQ_NONE; } if (irq_status & ((irq_status & GENMASK(7, 0)) << 8)) - goto trigger_machine; + trigger_machine = true; irq_status = phy_read(phydev, MII_DP83822_MISR2); if (irq_status < 0) { @@ -313,11 +314,11 @@ static irqreturn_t dp83822_handle_interrupt(struct phy_device *phydev) return IRQ_NONE; } if (irq_status & ((irq_status & GENMASK(7, 0)) << 8)) - goto trigger_machine; + trigger_machine = true; - return IRQ_NONE; + if (!trigger_machine) + return IRQ_NONE; -trigger_machine: phy_trigger_machine(phydev); return IRQ_HANDLED; diff --git a/drivers/net/phy/dp83tc811.c b/drivers/net/phy/dp83tc811.c index 688fadffb249d..7ea32fb77190c 100644 --- a/drivers/net/phy/dp83tc811.c +++ b/drivers/net/phy/dp83tc811.c @@ -264,6 +264,7 @@ static int dp83811_config_intr(struct phy_device *phydev) static irqreturn_t dp83811_handle_interrupt(struct phy_device *phydev) { + bool trigger_machine = false; int irq_status; /* The INT_STAT registers 1, 2 and 3 are holding the interrupt status @@ -279,7 +280,7 @@ static irqreturn_t dp83811_handle_interrupt(struct phy_device *phydev) return IRQ_NONE; } if (irq_status & ((irq_status & GENMASK(7, 0)) << 8)) - goto trigger_machine; + trigger_machine = true; irq_status = phy_read(phydev, MII_DP83811_INT_STAT2); if (irq_status < 0) { @@ -287,7 +288,7 @@ static irqreturn_t dp83811_handle_interrupt(struct phy_device *phydev) return IRQ_NONE; } if (irq_status & ((irq_status & GENMASK(7, 0)) << 8)) - goto trigger_machine; + trigger_machine = true; irq_status = phy_read(phydev, MII_DP83811_INT_STAT3); if (irq_status < 0) { @@ -295,11 +296,11 @@ static irqreturn_t dp83811_handle_interrupt(struct phy_device *phydev) return IRQ_NONE; } if (irq_status & ((irq_status & GENMASK(7, 0)) << 8)) - goto trigger_machine; + trigger_machine = true; - return IRQ_NONE; + if (!trigger_machine) + return IRQ_NONE; -trigger_machine: phy_trigger_machine(phydev); return IRQ_HANDLED; -- GitLab From 826d82170b539f16e1955ab940222543c012044e Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 25 Feb 2021 16:39:01 +0100 Subject: [PATCH 077/839] xen-netback: use local var in xenvif_tx_check_gop() instead of re-calculating shinfo already holds the result of skb_shinfo(skb) at this point - no need to re-invoke the construct even twice. Signed-off-by: Jan Beulich Signed-off-by: David S. Miller --- drivers/net/xen-netback/netback.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index e5c73f8196626..aff5ac1f002a3 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -557,8 +557,8 @@ check_frags: } if (skb_has_frag_list(skb) && !first_shinfo) { - first_shinfo = skb_shinfo(skb); - shinfo = skb_shinfo(skb_shinfo(skb)->frag_list); + first_shinfo = shinfo; + shinfo = skb_shinfo(shinfo->frag_list); nr_frags = shinfo->nr_frags; goto check_frags; -- GitLab From 9eb8bc593a5eed167dac2029abef343854c5ba75 Mon Sep 17 00:00:00 2001 From: DENG Qingfang Date: Mon, 1 Mar 2021 01:08:23 +0800 Subject: [PATCH 078/839] net: dsa: tag_rtl4_a: fix egress tags Commit 86dd9868b878 has several issues, but was accepted too soon before anyone could take a look. - Double free. dsa_slave_xmit() will free the skb if the xmit function returns NULL, but the skb is already freed by eth_skb_pad(). Use __skb_put_padto() to avoid that. - Unnecessary allocation. It has been done by DSA core since commit a3b0b6479700. - A u16 pointer points to skb data. It should be __be16 for network byte order. - Typo in comments. "numer" -> "number". Fixes: 86dd9868b878 ("net: dsa: tag_rtl4_a: Support also egress tags") Signed-off-by: DENG Qingfang Reviewed-by: Florian Fainelli Reviewed-by: Linus Walleij Signed-off-by: David S. Miller --- net/dsa/tag_rtl4_a.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/net/dsa/tag_rtl4_a.c b/net/dsa/tag_rtl4_a.c index c17d39b4a1a04..e9176475bac89 100644 --- a/net/dsa/tag_rtl4_a.c +++ b/net/dsa/tag_rtl4_a.c @@ -35,14 +35,12 @@ static struct sk_buff *rtl4a_tag_xmit(struct sk_buff *skb, struct net_device *dev) { struct dsa_port *dp = dsa_slave_to_port(dev); + __be16 *p; u8 *tag; - u16 *p; u16 out; /* Pad out to at least 60 bytes */ - if (unlikely(eth_skb_pad(skb))) - return NULL; - if (skb_cow_head(skb, RTL4_A_HDR_LEN) < 0) + if (unlikely(__skb_put_padto(skb, ETH_ZLEN, false))) return NULL; netdev_dbg(dev, "add realtek tag to package to port %d\n", @@ -53,13 +51,13 @@ static struct sk_buff *rtl4a_tag_xmit(struct sk_buff *skb, tag = skb->data + 2 * ETH_ALEN; /* Set Ethertype */ - p = (u16 *)tag; + p = (__be16 *)tag; *p = htons(RTL4_A_ETHERTYPE); out = (RTL4_A_PROTOCOL_RTL8366RB << 12) | (2 << 8); - /* The lower bits is the port numer */ + /* The lower bits is the port number */ out |= (u8)dp->index; - p = (u16 *)(tag + 2); + p = (__be16 *)(tag + 2); *p = htons(out); return skb; -- GitLab From 4372339efc06bc2a796f4cc9d0a7a929dfda4967 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sat, 27 Feb 2021 01:40:19 +0100 Subject: [PATCH 079/839] net: always use icmp{,v6}_ndo_send from ndo_start_xmit There were a few remaining tunnel drivers that didn't receive the prior conversion to icmp{,v6}_ndo_send. Knowing now that this could lead to memory corrution (see ee576c47db60 ("net: icmp: pass zeroed opts from icmp{,v6}_ndo_send before sending") for details), there's even more imperative to have these all converted. So this commit goes through the remaining cases that I could find and does a boring translation to the ndo variety. The Fixes: line below is the merge that originally added icmp{,v6}_ ndo_send and converted the first batch of icmp{,v6}_send users. The rationale then for the change applies equally to this patch. It's just that these drivers were left out of the initial conversion because these network devices are hiding in net/ rather than in drivers/net/. Cc: Florian Westphal Cc: Willem de Bruijn Cc: David S. Miller Cc: Hideaki YOSHIFUJI Cc: David Ahern Cc: Jakub Kicinski Cc: Steffen Klassert Fixes: 803381f9f117 ("Merge branch 'icmp-account-for-NAT-when-sending-icmps-from-ndo-layer'") Signed-off-by: Jason A. Donenfeld Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel.c | 5 ++--- net/ipv4/ip_vti.c | 6 +++--- net/ipv6/ip6_gre.c | 16 ++++++++-------- net/ipv6/ip6_tunnel.c | 10 +++++----- net/ipv6/ip6_vti.c | 6 +++--- net/ipv6/sit.c | 2 +- 6 files changed, 22 insertions(+), 23 deletions(-) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 76a420c76f16e..f6cc26de5ed30 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -502,8 +502,7 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, if (!skb_is_gso(skb) && (inner_iph->frag_off & htons(IP_DF)) && mtu < pkt_size) { - memset(IPCB(skb), 0, sizeof(*IPCB(skb))); - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); + icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); return -E2BIG; } } @@ -527,7 +526,7 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU && mtu < pkt_size) { - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); return -E2BIG; } } diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index abc171e79d3e4..eb207089ece0b 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -238,13 +238,13 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev, if (skb->len > mtu) { skb_dst_update_pmtu_no_confirm(skb, mtu); if (skb->protocol == htons(ETH_P_IP)) { - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, - htonl(mtu)); + icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, + htonl(mtu)); } else { if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); } dst_release(dst); diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index c3bc89b6b1a1a..1baf43aacb2e4 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -678,8 +678,8 @@ static int prepare_ip6gre_xmit_ipv6(struct sk_buff *skb, tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset]; if (tel->encap_limit == 0) { - icmpv6_send(skb, ICMPV6_PARAMPROB, - ICMPV6_HDR_FIELD, offset + 2); + icmpv6_ndo_send(skb, ICMPV6_PARAMPROB, + ICMPV6_HDR_FIELD, offset + 2); return -1; } *encap_limit = tel->encap_limit - 1; @@ -805,8 +805,8 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev) if (err != 0) { /* XXX: send ICMP error even if DF is not set. */ if (err == -EMSGSIZE) - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, - htonl(mtu)); + icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, + htonl(mtu)); return -1; } @@ -837,7 +837,7 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev) &mtu, skb->protocol); if (err != 0) { if (err == -EMSGSIZE) - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); return -1; } @@ -1063,10 +1063,10 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, /* XXX: send ICMP error even if DF is not set. */ if (err == -EMSGSIZE) { if (skb->protocol == htons(ETH_P_IP)) - icmp_send(skb, ICMP_DEST_UNREACH, - ICMP_FRAG_NEEDED, htonl(mtu)); + icmp_ndo_send(skb, ICMP_DEST_UNREACH, + ICMP_FRAG_NEEDED, htonl(mtu)); else - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); } goto tx_err; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index a7950baa05e51..3fa0eca5a06f8 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1332,8 +1332,8 @@ ipxip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, tel = (void *)&skb_network_header(skb)[offset]; if (tel->encap_limit == 0) { - icmpv6_send(skb, ICMPV6_PARAMPROB, - ICMPV6_HDR_FIELD, offset + 2); + icmpv6_ndo_send(skb, ICMPV6_PARAMPROB, + ICMPV6_HDR_FIELD, offset + 2); return -1; } encap_limit = tel->encap_limit - 1; @@ -1385,11 +1385,11 @@ ipxip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, if (err == -EMSGSIZE) switch (protocol) { case IPPROTO_IPIP: - icmp_send(skb, ICMP_DEST_UNREACH, - ICMP_FRAG_NEEDED, htonl(mtu)); + icmp_ndo_send(skb, ICMP_DEST_UNREACH, + ICMP_FRAG_NEEDED, htonl(mtu)); break; case IPPROTO_IPV6: - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); break; default: break; diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 0225fd6941925..f10e7a72ea624 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -521,10 +521,10 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); } else { - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, - htonl(mtu)); + icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, + htonl(mtu)); } err = -EMSGSIZE; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 93636867aee28..63ccd9f2dcccf 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -987,7 +987,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, skb_dst_update_pmtu_no_confirm(skb, mtu); if (skb->len > mtu && !skb_is_gso(skb)) { - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); ip_rt_put(rt); goto tx_error; } -- GitLab From d9032dba5a2b2bbf0fdce67c8795300ec9923b43 Mon Sep 17 00:00:00 2001 From: Guangbin Huang Date: Sat, 27 Feb 2021 11:05:58 +0800 Subject: [PATCH 080/839] net: phy: fix save wrong speed and duplex problem if autoneg is on If phy uses generic driver and autoneg is on, enter command "ethtool -s eth0 speed 50" will not change phy speed actually, but command "ethtool eth0" shows speed is 50Mb/s because phydev->speed has been set to 50 and no update later. And duplex setting has same problem too. However, if autoneg is on, phy only changes speed and duplex according to phydev->advertising, but not phydev->speed and phydev->duplex. So in this case, phydev->speed and phydev->duplex don't need to be set in function phy_ethtool_ksettings_set() if autoneg is on. Fixes: 51e2a3846eab ("PHY: Avoid unnecessary aneg restarts") Signed-off-by: Guangbin Huang Signed-off-by: Huazhong Tan Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 1be07e45d314e..fc2e7cb5b2e58 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -276,14 +276,16 @@ int phy_ethtool_ksettings_set(struct phy_device *phydev, phydev->autoneg = autoneg; - phydev->speed = speed; + if (autoneg == AUTONEG_DISABLE) { + phydev->speed = speed; + phydev->duplex = duplex; + } linkmode_copy(phydev->advertising, advertising); linkmode_mod_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, phydev->advertising, autoneg == AUTONEG_ENABLE); - phydev->duplex = duplex; phydev->master_slave_set = cmd->base.master_slave_cfg; phydev->mdix_ctrl = cmd->base.eth_tp_mdix_ctrl; -- GitLab From 4deb550bc3b698a1f03d0332cde3df154d1b6c1e Mon Sep 17 00:00:00 2001 From: Tong Zhang Date: Sat, 27 Feb 2021 16:15:06 -0500 Subject: [PATCH 081/839] atm: eni: dont release is never initialized label err_eni_release is reachable when eni_start() fail. In eni_start() it calls dev->phy->start() in the last step, if start() fail we don't need to call phy->stop(), if start() is never called, we neither need to call phy->stop(), otherwise null-ptr-deref will happen. In order to fix this issue, don't call phy->stop() in label err_eni_release [ 4.875714] ================================================================== [ 4.876091] BUG: KASAN: null-ptr-deref in suni_stop+0x47/0x100 [suni] [ 4.876433] Read of size 8 at addr 0000000000000030 by task modprobe/95 [ 4.876778] [ 4.876862] CPU: 0 PID: 95 Comm: modprobe Not tainted 5.11.0-rc7-00090-gdcc0b49040c7 #2 [ 4.877290] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-48-gd94 [ 4.877876] Call Trace: [ 4.878009] dump_stack+0x7d/0xa3 [ 4.878191] kasan_report.cold+0x10c/0x10e [ 4.878410] ? __slab_free+0x2f0/0x340 [ 4.878612] ? suni_stop+0x47/0x100 [suni] [ 4.878832] suni_stop+0x47/0x100 [suni] [ 4.879043] eni_do_release+0x3b/0x70 [eni] [ 4.879269] eni_init_one.cold+0x1152/0x1747 [eni] [ 4.879528] ? _raw_spin_lock_irqsave+0x7b/0xd0 [ 4.879768] ? eni_ioctl+0x270/0x270 [eni] [ 4.879990] ? __mutex_lock_slowpath+0x10/0x10 [ 4.880226] ? eni_ioctl+0x270/0x270 [eni] [ 4.880448] local_pci_probe+0x6f/0xb0 [ 4.880650] pci_device_probe+0x171/0x240 [ 4.880864] ? pci_device_remove+0xe0/0xe0 [ 4.881086] ? kernfs_create_link+0xb6/0x110 [ 4.881315] ? sysfs_do_create_link_sd.isra.0+0x76/0xe0 [ 4.881594] really_probe+0x161/0x420 [ 4.881791] driver_probe_device+0x6d/0xd0 [ 4.882010] device_driver_attach+0x82/0x90 [ 4.882233] ? device_driver_attach+0x90/0x90 [ 4.882465] __driver_attach+0x60/0x100 [ 4.882671] ? device_driver_attach+0x90/0x90 [ 4.882903] bus_for_each_dev+0xe1/0x140 [ 4.883114] ? subsys_dev_iter_exit+0x10/0x10 [ 4.883346] ? klist_node_init+0x61/0x80 [ 4.883557] bus_add_driver+0x254/0x2a0 [ 4.883764] driver_register+0xd3/0x150 [ 4.883971] ? 0xffffffffc0038000 [ 4.884149] do_one_initcall+0x84/0x250 [ 4.884355] ? trace_event_raw_event_initcall_finish+0x150/0x150 [ 4.884674] ? unpoison_range+0xf/0x30 [ 4.884875] ? ____kasan_kmalloc.constprop.0+0x84/0xa0 [ 4.885150] ? unpoison_range+0xf/0x30 [ 4.885352] ? unpoison_range+0xf/0x30 [ 4.885557] do_init_module+0xf8/0x350 [ 4.885760] load_module+0x3fe6/0x4340 [ 4.885960] ? vm_unmap_ram+0x1d0/0x1d0 [ 4.886166] ? ____kasan_kmalloc.constprop.0+0x84/0xa0 [ 4.886441] ? module_frob_arch_sections+0x20/0x20 [ 4.886697] ? __do_sys_finit_module+0x108/0x170 [ 4.886941] __do_sys_finit_module+0x108/0x170 [ 4.887178] ? __ia32_sys_init_module+0x40/0x40 [ 4.887419] ? file_open_root+0x200/0x200 [ 4.887634] ? do_sys_open+0x85/0xe0 [ 4.887826] ? filp_open+0x50/0x50 [ 4.888009] ? fpregs_assert_state_consistent+0x4d/0x60 [ 4.888287] ? exit_to_user_mode_prepare+0x2f/0x130 [ 4.888547] do_syscall_64+0x33/0x40 [ 4.888739] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 4.889010] RIP: 0033:0x7ff62fcf1cf7 [ 4.889202] Code: 48 89 57 30 48 8b 04 24 48 89 47 38 e9 1d a0 02 00 48 89 f8 48 89 f71 [ 4.890172] RSP: 002b:00007ffe6644ade8 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 [ 4.890570] RAX: ffffffffffffffda RBX: 0000000000f2ca70 RCX: 00007ff62fcf1cf7 [ 4.890944] RDX: 0000000000000000 RSI: 0000000000f2b9e0 RDI: 0000000000000003 [ 4.891318] RBP: 0000000000000003 R08: 0000000000000000 R09: 0000000000000001 [ 4.891691] R10: 00007ff62fd55300 R11: 0000000000000246 R12: 0000000000f2b9e0 [ 4.892064] R13: 0000000000000000 R14: 0000000000f2bdd0 R15: 0000000000000001 [ 4.892439] ================================================================== Signed-off-by: Tong Zhang Signed-off-by: David S. Miller --- drivers/atm/eni.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c index 316a9947541fe..b574cce98dc36 100644 --- a/drivers/atm/eni.c +++ b/drivers/atm/eni.c @@ -2260,7 +2260,8 @@ out: return rc; err_eni_release: - eni_do_release(dev); + dev->phy = NULL; + iounmap(ENI_DEV(dev)->ioaddr); err_unregister: atm_dev_deregister(dev); err_free_consistent: -- GitLab From a2bd45834e83d6c5a04d397bde13d744a4812dfc Mon Sep 17 00:00:00 2001 From: Tong Zhang Date: Sat, 27 Feb 2021 22:55:50 -0500 Subject: [PATCH 082/839] atm: lanai: dont run lanai_dev_close if not open lanai_dev_open() can fail. When it fail, lanai->base is unmapped and the pci device is disabled. The caller, lanai_init_one(), then tries to run atm_dev_deregister(). This will subsequently call lanai_dev_close() and use the already released MMIO area. To fix this issue, set the lanai->base to NULL if open fail, and test the flag in lanai_dev_close(). [ 8.324153] lanai: lanai_start() failed, err=19 [ 8.324819] lanai(itf 0): shutting down interface [ 8.325211] BUG: unable to handle page fault for address: ffffc90000180024 [ 8.325781] #PF: supervisor write access in kernel mode [ 8.326215] #PF: error_code(0x0002) - not-present page [ 8.326641] PGD 100000067 P4D 100000067 PUD 100139067 PMD 10013a067 PTE 0 [ 8.327206] Oops: 0002 [#1] SMP KASAN NOPTI [ 8.327557] CPU: 0 PID: 95 Comm: modprobe Not tainted 5.11.0-rc7-00090-gdcc0b49040c7 #12 [ 8.328229] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-48-gd9c812dda519-4 [ 8.329145] RIP: 0010:lanai_dev_close+0x4f/0xe5 [lanai] [ 8.329587] Code: 00 48 c7 c7 00 d3 01 c0 e8 49 4e 0a c2 48 8d bd 08 02 00 00 e8 6e 52 14 c1 48 80 [ 8.330917] RSP: 0018:ffff8881029ef680 EFLAGS: 00010246 [ 8.331196] RAX: 000000000003fffe RBX: ffff888102fb4800 RCX: ffffffffc001a98a [ 8.331572] RDX: ffffc90000180000 RSI: 0000000000000246 RDI: ffff888102fb4000 [ 8.331948] RBP: ffff888102fb4000 R08: ffffffff8115da8a R09: ffffed102053deaa [ 8.332326] R10: 0000000000000003 R11: ffffed102053dea9 R12: ffff888102fb48a4 [ 8.332701] R13: ffffffffc00123c0 R14: ffff888102fb4b90 R15: ffff888102fb4b88 [ 8.333077] FS: 00007f08eb9056a0(0000) GS:ffff88815b400000(0000) knlGS:0000000000000000 [ 8.333502] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 8.333806] CR2: ffffc90000180024 CR3: 0000000102a28000 CR4: 00000000000006f0 [ 8.334182] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 8.334557] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 8.334932] Call Trace: [ 8.335066] atm_dev_deregister+0x161/0x1a0 [atm] [ 8.335324] lanai_init_one.cold+0x20c/0x96d [lanai] [ 8.335594] ? lanai_send+0x2a0/0x2a0 [lanai] [ 8.335831] local_pci_probe+0x6f/0xb0 [ 8.336039] pci_device_probe+0x171/0x240 [ 8.336255] ? pci_device_remove+0xe0/0xe0 [ 8.336475] ? kernfs_create_link+0xb6/0x110 [ 8.336704] ? sysfs_do_create_link_sd.isra.0+0x76/0xe0 [ 8.336983] really_probe+0x161/0x420 [ 8.337181] driver_probe_device+0x6d/0xd0 [ 8.337401] device_driver_attach+0x82/0x90 [ 8.337626] ? device_driver_attach+0x90/0x90 [ 8.337859] __driver_attach+0x60/0x100 [ 8.338065] ? device_driver_attach+0x90/0x90 [ 8.338298] bus_for_each_dev+0xe1/0x140 [ 8.338511] ? subsys_dev_iter_exit+0x10/0x10 [ 8.338745] ? klist_node_init+0x61/0x80 [ 8.338956] bus_add_driver+0x254/0x2a0 [ 8.339164] driver_register+0xd3/0x150 [ 8.339370] ? 0xffffffffc0028000 [ 8.339550] do_one_initcall+0x84/0x250 [ 8.339755] ? trace_event_raw_event_initcall_finish+0x150/0x150 [ 8.340076] ? free_vmap_area_noflush+0x1a5/0x5c0 [ 8.340329] ? unpoison_range+0xf/0x30 [ 8.340532] ? ____kasan_kmalloc.constprop.0+0x84/0xa0 [ 8.340806] ? unpoison_range+0xf/0x30 [ 8.341014] ? unpoison_range+0xf/0x30 [ 8.341217] do_init_module+0xf8/0x350 [ 8.341419] load_module+0x3fe6/0x4340 [ 8.341621] ? vm_unmap_ram+0x1d0/0x1d0 [ 8.341826] ? ____kasan_kmalloc.constprop.0+0x84/0xa0 [ 8.342101] ? module_frob_arch_sections+0x20/0x20 [ 8.342358] ? __do_sys_finit_module+0x108/0x170 [ 8.342604] __do_sys_finit_module+0x108/0x170 [ 8.342841] ? __ia32_sys_init_module+0x40/0x40 [ 8.343083] ? file_open_root+0x200/0x200 [ 8.343298] ? do_sys_open+0x85/0xe0 [ 8.343491] ? filp_open+0x50/0x50 [ 8.343675] ? exit_to_user_mode_prepare+0xfc/0x130 [ 8.343935] do_syscall_64+0x33/0x40 [ 8.344132] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 8.344401] RIP: 0033:0x7f08eb887cf7 [ 8.344594] Code: 48 89 57 30 48 8b 04 24 48 89 47 38 e9 1d a0 02 00 48 89 f8 48 89 f7 48 89 d6 41 [ 8.345565] RSP: 002b:00007ffcd5c98ad8 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 [ 8.345962] RAX: ffffffffffffffda RBX: 00000000008fea70 RCX: 00007f08eb887cf7 [ 8.346336] RDX: 0000000000000000 RSI: 00000000008fd9e0 RDI: 0000000000000003 [ 8.346711] RBP: 0000000000000003 R08: 0000000000000000 R09: 0000000000000001 [ 8.347085] R10: 00007f08eb8eb300 R11: 0000000000000246 R12: 00000000008fd9e0 [ 8.347460] R13: 0000000000000000 R14: 00000000008fddd0 R15: 0000000000000001 [ 8.347836] Modules linked in: lanai(+) atm [ 8.348065] CR2: ffffc90000180024 [ 8.348244] ---[ end trace 7fdc1c668f2003e5 ]--- [ 8.348490] RIP: 0010:lanai_dev_close+0x4f/0xe5 [lanai] [ 8.348772] Code: 00 48 c7 c7 00 d3 01 c0 e8 49 4e 0a c2 48 8d bd 08 02 00 00 e8 6e 52 14 c1 48 80 [ 8.349745] RSP: 0018:ffff8881029ef680 EFLAGS: 00010246 [ 8.350022] RAX: 000000000003fffe RBX: ffff888102fb4800 RCX: ffffffffc001a98a [ 8.350397] RDX: ffffc90000180000 RSI: 0000000000000246 RDI: ffff888102fb4000 [ 8.350772] RBP: ffff888102fb4000 R08: ffffffff8115da8a R09: ffffed102053deaa [ 8.351151] R10: 0000000000000003 R11: ffffed102053dea9 R12: ffff888102fb48a4 [ 8.351525] R13: ffffffffc00123c0 R14: ffff888102fb4b90 R15: ffff888102fb4b88 [ 8.351918] FS: 00007f08eb9056a0(0000) GS:ffff88815b400000(0000) knlGS:0000000000000000 [ 8.352343] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 8.352647] CR2: ffffc90000180024 CR3: 0000000102a28000 CR4: 00000000000006f0 [ 8.353022] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 8.353397] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 8.353958] modprobe (95) used greatest stack depth: 26216 bytes left Signed-off-by: Tong Zhang Signed-off-by: David S. Miller --- drivers/atm/lanai.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/atm/lanai.c b/drivers/atm/lanai.c index d7277c26e4232..32d7aa141d966 100644 --- a/drivers/atm/lanai.c +++ b/drivers/atm/lanai.c @@ -2233,6 +2233,7 @@ static int lanai_dev_open(struct atm_dev *atmdev) conf1_write(lanai); #endif iounmap(lanai->base); + lanai->base = NULL; error_pci: pci_disable_device(lanai->pci); error: @@ -2245,6 +2246,8 @@ static int lanai_dev_open(struct atm_dev *atmdev) static void lanai_dev_close(struct atm_dev *atmdev) { struct lanai_dev *lanai = (struct lanai_dev *) atmdev->dev_data; + if (lanai->base==NULL) + return; printk(KERN_INFO DEV_LABEL "(itf %d): shutting down interface\n", lanai->number); lanai_timed_poll_stop(lanai); @@ -2552,7 +2555,7 @@ static int lanai_init_one(struct pci_dev *pci, struct atm_dev *atmdev; int result; - lanai = kmalloc(sizeof(*lanai), GFP_KERNEL); + lanai = kzalloc(sizeof(*lanai), GFP_KERNEL); if (lanai == NULL) { printk(KERN_ERR DEV_LABEL ": couldn't allocate dev_data structure!\n"); -- GitLab From 8c91bc3d44dfef8284af384877fbe61117e8b7d1 Mon Sep 17 00:00:00 2001 From: Sergey Shtylyov Date: Sun, 28 Feb 2021 23:25:43 +0300 Subject: [PATCH 083/839] sh_eth: fix TRSCER mask for SH771x According to the SH7710, SH7712, SH7713 Group User's Manual: Hardware, Rev. 3.00, the TRSCER register actually has only bit 7 valid (and named differently), with all the other bits reserved. Apparently, this was not the case with some early revisions of the manual as we have the other bits declared (and set) in the original driver. Follow the suit and add the explicit sh_eth_cpu_data::trscer_err_mask initializer for SH771x... Fixes: 86a74ff21a7a ("net: sh_eth: add support for Renesas SuperH Ethernet") Signed-off-by: Sergey Shtylyov Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/sh_eth.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 590b088bc4c7f..e79bb0a3ced52 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -1089,6 +1089,9 @@ static struct sh_eth_cpu_data sh771x_data = { EESIPR_CEEFIP | EESIPR_CELFIP | EESIPR_RRFIP | EESIPR_RTLFIP | EESIPR_RTSFIP | EESIPR_PREIP | EESIPR_CERFIP, + + .trscer_err_mask = DESC_I_RINT8, + .tsu = 1, .dual_port = 1, }; -- GitLab From 75be7fb7f978202c4c3a1a713af4485afb2ff5f6 Mon Sep 17 00:00:00 2001 From: Sergey Shtylyov Date: Sun, 28 Feb 2021 23:26:34 +0300 Subject: [PATCH 084/839] sh_eth: fix TRSCER mask for R7S72100 According to the RZ/A1H Group, RZ/A1M Group User's Manual: Hardware, Rev. 4.00, the TRSCER register has bit 9 reserved, hence we can't use the driver's default TRSCER mask. Add the explicit initializer for sh_eth_cpu_data::trscer_err_mask for R7S72100. Fixes: db893473d313 ("sh_eth: Add support for r7s72100") Signed-off-by: Sergey Shtylyov Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/sh_eth.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index e79bb0a3ced52..7f14d4aa5b3ef 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -560,6 +560,8 @@ static struct sh_eth_cpu_data r7s72100_data = { EESR_TDE, .fdr_value = 0x0000070f, + .trscer_err_mask = DESC_I_RINT8 | DESC_I_RINT5, + .no_psr = 1, .apr = 1, .mpr = 1, -- GitLab From 165bc5a4f30eee4735845aa7dbd6b738643f2603 Mon Sep 17 00:00:00 2001 From: Sergey Shtylyov Date: Sun, 28 Feb 2021 23:27:32 +0300 Subject: [PATCH 085/839] sh_eth: fix TRSCER mask for R7S9210 According to the RZ/A2M Group User's Manual: Hardware, Rev. 2.00, the TRSCER register has bit 9 reserved, hence we can't use the driver's default TRSCER mask. Add the explicit initializer for sh_eth_cpu_data:: trscer_err_mask for R7S9210. Fixes: 6e0bb04d0e4f ("sh_eth: Add R7S9210 support") Signed-off-by: Sergey Shtylyov Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/sh_eth.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 7f14d4aa5b3ef..f029c7c03804f 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -782,6 +782,8 @@ static struct sh_eth_cpu_data r7s9210_data = { .fdr_value = 0x0000070f, + .trscer_err_mask = DESC_I_RINT8 | DESC_I_RINT5, + .apr = 1, .mpr = 1, .tpauser = 1, -- GitLab From 093b036aa94e01a0bea31a38d7f0ee28a2749023 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Mon, 1 Mar 2021 02:22:40 +0300 Subject: [PATCH 086/839] net/qrtr: fix __netdev_alloc_skb call syzbot found WARNING in __alloc_pages_nodemask()[1] when order >= MAX_ORDER. It was caused by a huge length value passed from userspace to qrtr_tun_write_iter(), which tries to allocate skb. Since the value comes from the untrusted source there is no need to raise a warning in __alloc_pages_nodemask(). [1] WARNING in __alloc_pages_nodemask+0x5f8/0x730 mm/page_alloc.c:5014 Call Trace: __alloc_pages include/linux/gfp.h:511 [inline] __alloc_pages_node include/linux/gfp.h:524 [inline] alloc_pages_node include/linux/gfp.h:538 [inline] kmalloc_large_node+0x60/0x110 mm/slub.c:3999 __kmalloc_node_track_caller+0x319/0x3f0 mm/slub.c:4496 __kmalloc_reserve net/core/skbuff.c:150 [inline] __alloc_skb+0x4e4/0x5a0 net/core/skbuff.c:210 __netdev_alloc_skb+0x70/0x400 net/core/skbuff.c:446 netdev_alloc_skb include/linux/skbuff.h:2832 [inline] qrtr_endpoint_post+0x84/0x11b0 net/qrtr/qrtr.c:442 qrtr_tun_write_iter+0x11f/0x1a0 net/qrtr/tun.c:98 call_write_iter include/linux/fs.h:1901 [inline] new_sync_write+0x426/0x650 fs/read_write.c:518 vfs_write+0x791/0xa30 fs/read_write.c:605 ksys_write+0x12d/0x250 fs/read_write.c:658 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Reported-by: syzbot+80dccaee7c6630fa9dcf@syzkaller.appspotmail.com Signed-off-by: Pavel Skripkin Acked-by: Alexander Lobakin Signed-off-by: David S. Miller --- net/qrtr/qrtr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index b34358282f379..82d2eb8c21d13 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -439,7 +439,7 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len) if (len == 0 || len & 3) return -EINVAL; - skb = netdev_alloc_skb(NULL, len); + skb = __netdev_alloc_skb(NULL, len, GFP_ATOMIC | __GFP_NOWARN); if (!skb) return -ENOMEM; -- GitLab From 8bd2a05527349c8627d2b9795d3c7a6f76033676 Mon Sep 17 00:00:00 2001 From: Yejune Deng Date: Mon, 1 Mar 2021 14:05:48 +0800 Subject: [PATCH 087/839] inetpeer: use div64_ul() and clamp_val() calculate inet_peer_threshold In inet_initpeers(), struct inet_peer on IA32 uses 128 bytes in nowdays. Get rid of the cascade and use div64_ul() and clamp_val() calculate that will not need to be adjusted in the future as suggested by Eric Dumazet. Suggested-by: Eric Dumazet Signed-off-by: Yejune Deng Signed-off-by: David S. Miller --- net/ipv4/inetpeer.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index ff327a62c9ce9..da21dfce24d73 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -65,7 +65,7 @@ EXPORT_SYMBOL_GPL(inet_peer_base_init); #define PEER_MAX_GC 32 /* Exported for sysctl_net_ipv4. */ -int inet_peer_threshold __read_mostly = 65536 + 128; /* start to throw entries more +int inet_peer_threshold __read_mostly; /* start to throw entries more * aggressively at this stage */ int inet_peer_minttl __read_mostly = 120 * HZ; /* TTL under high load: 120 sec */ int inet_peer_maxttl __read_mostly = 10 * 60 * HZ; /* usual time to live: 10 min */ @@ -73,20 +73,13 @@ int inet_peer_maxttl __read_mostly = 10 * 60 * HZ; /* usual time to live: 10 min /* Called from ip_output.c:ip_init */ void __init inet_initpeers(void) { - struct sysinfo si; + u64 nr_entries; - /* Use the straight interface to information about memory. */ - si_meminfo(&si); - /* The values below were suggested by Alexey Kuznetsov - * . I don't have any opinion about the values - * myself. --SAW - */ - if (si.totalram <= (32768*1024)/PAGE_SIZE) - inet_peer_threshold >>= 1; /* max pool size about 1MB on IA32 */ - if (si.totalram <= (16384*1024)/PAGE_SIZE) - inet_peer_threshold >>= 1; /* about 512KB */ - if (si.totalram <= (8192*1024)/PAGE_SIZE) - inet_peer_threshold >>= 2; /* about 128KB */ + /* 1% of physical memory */ + nr_entries = div64_ul((u64)totalram_pages() << PAGE_SHIFT, + 100 * L1_CACHE_ALIGN(sizeof(struct inet_peer))); + + inet_peer_threshold = clamp_val(nr_entries, 4096, 65536 + 128); peer_cachep = kmem_cache_create("inet_peer_cache", sizeof(struct inet_peer), -- GitLab From c646d10dda2dcde82c6ce5a474522621ab2b8b19 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 1 Mar 2021 13:18:11 +0200 Subject: [PATCH 088/839] net: enetc: don't overwrite the RSS indirection table when initializing After the blamed patch, all RX traffic gets hashed to CPU 0 because the hashing indirection table set up in: enetc_pf_probe -> enetc_alloc_si_resources -> enetc_configure_si -> enetc_setup_default_rss_table is overwritten later in: enetc_pf_probe -> enetc_init_port_rss_memory which zero-initializes the entire port RSS table in order to avoid ECC errors. The trouble really is that enetc_init_port_rss_memory really neads enetc_alloc_si_resources to be called, because it depends upon enetc_alloc_cbdr and enetc_setup_cbdr. But that whole enetc_configure_si thing could have been better thought out, it has nothing to do in a function called "alloc_si_resources", especially since its counterpart, "free_si_resources", does nothing to unwind the configuration of the SI. The point is, we need to pull out enetc_configure_si out of enetc_alloc_resources, and move it after enetc_init_port_rss_memory. This allows us to set up the default RSS indirection table after initializing the memory. Fixes: 07bf34a50e32 ("net: enetc: initialize the RFS and RSS memories") Cc: Jesse Brandeburg Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/enetc/enetc.c | 11 +++-------- drivers/net/ethernet/freescale/enetc/enetc.h | 1 + drivers/net/ethernet/freescale/enetc/enetc_pf.c | 7 +++++++ drivers/net/ethernet/freescale/enetc/enetc_vf.c | 7 +++++++ 4 files changed, 18 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index c78d12229730b..fdb6b9e8da780 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -1058,13 +1058,12 @@ static int enetc_setup_default_rss_table(struct enetc_si *si, int num_groups) return 0; } -static int enetc_configure_si(struct enetc_ndev_priv *priv) +int enetc_configure_si(struct enetc_ndev_priv *priv) { struct enetc_si *si = priv->si; struct enetc_hw *hw = &si->hw; int err; - enetc_setup_cbdr(hw, &si->cbd_ring); /* set SI cache attributes */ enetc_wr(hw, ENETC_SICAR0, ENETC_SICAR_RD_COHERENT | ENETC_SICAR_WR_COHERENT); @@ -1112,6 +1111,8 @@ int enetc_alloc_si_resources(struct enetc_ndev_priv *priv) if (err) return err; + enetc_setup_cbdr(&si->hw, &si->cbd_ring); + priv->cls_rules = kcalloc(si->num_fs_entries, sizeof(*priv->cls_rules), GFP_KERNEL); if (!priv->cls_rules) { @@ -1119,14 +1120,8 @@ int enetc_alloc_si_resources(struct enetc_ndev_priv *priv) goto err_alloc_cls; } - err = enetc_configure_si(priv); - if (err) - goto err_config_si; - return 0; -err_config_si: - kfree(priv->cls_rules); err_alloc_cls: enetc_clear_cbdr(&si->hw); enetc_free_cbdr(priv->dev, &si->cbd_ring); diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h index 8532d23b54f5f..f8275cef3b5c4 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.h +++ b/drivers/net/ethernet/freescale/enetc/enetc.h @@ -292,6 +292,7 @@ void enetc_get_si_caps(struct enetc_si *si); void enetc_init_si_rings_params(struct enetc_ndev_priv *priv); int enetc_alloc_si_resources(struct enetc_ndev_priv *priv); void enetc_free_si_resources(struct enetc_ndev_priv *priv); +int enetc_configure_si(struct enetc_ndev_priv *priv); int enetc_open(struct net_device *ndev); int enetc_close(struct net_device *ndev); diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c index 515c5b29d7aab..d02ecb2e46ae0 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c @@ -1108,6 +1108,12 @@ static int enetc_pf_probe(struct pci_dev *pdev, goto err_init_port_rss; } + err = enetc_configure_si(priv); + if (err) { + dev_err(&pdev->dev, "Failed to configure SI\n"); + goto err_config_si; + } + err = enetc_alloc_msix(priv); if (err) { dev_err(&pdev->dev, "MSIX alloc failed\n"); @@ -1136,6 +1142,7 @@ err_phylink_create: enetc_mdiobus_destroy(pf); err_mdiobus_create: enetc_free_msix(priv); +err_config_si: err_init_port_rss: err_init_port_rfs: err_alloc_msix: diff --git a/drivers/net/ethernet/freescale/enetc/enetc_vf.c b/drivers/net/ethernet/freescale/enetc/enetc_vf.c index 39c1a09e69a95..9b755a84c2d62 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_vf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_vf.c @@ -171,6 +171,12 @@ static int enetc_vf_probe(struct pci_dev *pdev, goto err_alloc_si_res; } + err = enetc_configure_si(priv); + if (err) { + dev_err(&pdev->dev, "Failed to configure SI\n"); + goto err_config_si; + } + err = enetc_alloc_msix(priv); if (err) { dev_err(&pdev->dev, "MSIX alloc failed\n"); @@ -187,6 +193,7 @@ static int enetc_vf_probe(struct pci_dev *pdev, err_reg_netdev: enetc_free_msix(priv); +err_config_si: err_alloc_msix: enetc_free_si_resources(priv); err_alloc_si_res: -- GitLab From 3222b5b613db558e9a494bbf53f3c984d90f71ea Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 1 Mar 2021 13:18:12 +0200 Subject: [PATCH 089/839] net: enetc: initialize RFS/RSS memories for unused ports too Michael reports that since linux-next-20210211, the AER messages for ECC errors have started reappearing, and this time they can be reliably reproduced with the first ping on one of his LS1028A boards. $ ping 1[ 33.258069] pcieport 0000:00:1f.0: AER: Multiple Corrected error received: 0000:00:00.0 72.16.0.1 PING [ 33.267050] pcieport 0000:00:1f.0: AER: can't find device of ID0000 172.16.0.1 (172.16.0.1): 56 data bytes 64 bytes from 172.16.0.1: seq=0 ttl=64 time=17.124 ms 64 bytes from 172.16.0.1: seq=1 ttl=64 time=0.273 ms $ devmem 0x1f8010e10 32 0xC0000006 It isn't clear why this is necessary, but it seems that for the errors to go away, we must clear the entire RFS and RSS memory, not just for the ports in use. Sadly the code is structured in such a way that we can't have unified logic for the used and unused ports. For the minimal initialization of an unused port, we need just to enable and ioremap the PF memory space, and a control buffer descriptor ring. Unused ports must then free the CBDR because the driver will exit, but used ports can not pick up from where that code path left, since the CBDR API does not reinitialize a ring when setting it up, so its producer and consumer indices are out of sync between the software and hardware state. So a separate enetc_init_unused_port function was created, and it gets called right after the PF memory space is enabled. Fixes: 07bf34a50e32 ("net: enetc: initialize the RFS and RSS memories") Reported-by: Michael Walle Cc: Jesse Brandeburg Signed-off-by: Vladimir Oltean Tested-by: Michael Walle Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/enetc/enetc.c | 8 ++--- drivers/net/ethernet/freescale/enetc/enetc.h | 4 +++ .../net/ethernet/freescale/enetc/enetc_pf.c | 33 ++++++++++++++++--- 3 files changed, 36 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index fdb6b9e8da780..eb45830a1667a 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -984,7 +984,7 @@ static void enetc_free_rxtx_rings(struct enetc_ndev_priv *priv) enetc_free_tx_ring(priv->tx_ring[i]); } -static int enetc_alloc_cbdr(struct device *dev, struct enetc_cbdr *cbdr) +int enetc_alloc_cbdr(struct device *dev, struct enetc_cbdr *cbdr) { int size = cbdr->bd_count * sizeof(struct enetc_cbd); @@ -1005,7 +1005,7 @@ static int enetc_alloc_cbdr(struct device *dev, struct enetc_cbdr *cbdr) return 0; } -static void enetc_free_cbdr(struct device *dev, struct enetc_cbdr *cbdr) +void enetc_free_cbdr(struct device *dev, struct enetc_cbdr *cbdr) { int size = cbdr->bd_count * sizeof(struct enetc_cbd); @@ -1013,7 +1013,7 @@ static void enetc_free_cbdr(struct device *dev, struct enetc_cbdr *cbdr) cbdr->bd_base = NULL; } -static void enetc_setup_cbdr(struct enetc_hw *hw, struct enetc_cbdr *cbdr) +void enetc_setup_cbdr(struct enetc_hw *hw, struct enetc_cbdr *cbdr) { /* set CBDR cache attributes */ enetc_wr(hw, ENETC_SICAR2, @@ -1033,7 +1033,7 @@ static void enetc_setup_cbdr(struct enetc_hw *hw, struct enetc_cbdr *cbdr) cbdr->cir = hw->reg + ENETC_SICBDRCIR; } -static void enetc_clear_cbdr(struct enetc_hw *hw) +void enetc_clear_cbdr(struct enetc_hw *hw) { enetc_wr(hw, ENETC_SICBDRMR, 0); } diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h index f8275cef3b5c4..8b380fc13314a 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.h +++ b/drivers/net/ethernet/freescale/enetc/enetc.h @@ -310,6 +310,10 @@ int enetc_setup_tc(struct net_device *ndev, enum tc_setup_type type, void enetc_set_ethtool_ops(struct net_device *ndev); /* control buffer descriptor ring (CBDR) */ +int enetc_alloc_cbdr(struct device *dev, struct enetc_cbdr *cbdr); +void enetc_free_cbdr(struct device *dev, struct enetc_cbdr *cbdr); +void enetc_setup_cbdr(struct enetc_hw *hw, struct enetc_cbdr *cbdr); +void enetc_clear_cbdr(struct enetc_hw *hw); int enetc_set_mac_flt_entry(struct enetc_si *si, int index, char *mac_addr, int si_map); int enetc_clear_mac_flt_entry(struct enetc_si *si, int index); diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c index d02ecb2e46ae0..62ba4bf56f0d9 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c @@ -1041,6 +1041,26 @@ static int enetc_init_port_rss_memory(struct enetc_si *si) return err; } +static void enetc_init_unused_port(struct enetc_si *si) +{ + struct device *dev = &si->pdev->dev; + struct enetc_hw *hw = &si->hw; + int err; + + si->cbd_ring.bd_count = ENETC_CBDR_DEFAULT_SIZE; + err = enetc_alloc_cbdr(dev, &si->cbd_ring); + if (err) + return; + + enetc_setup_cbdr(hw, &si->cbd_ring); + + enetc_init_port_rfs_memory(si); + enetc_init_port_rss_memory(si); + + enetc_clear_cbdr(hw); + enetc_free_cbdr(dev, &si->cbd_ring); +} + static int enetc_pf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { @@ -1051,11 +1071,6 @@ static int enetc_pf_probe(struct pci_dev *pdev, struct enetc_pf *pf; int err; - if (node && !of_device_is_available(node)) { - dev_info(&pdev->dev, "device is disabled, skipping\n"); - return -ENODEV; - } - err = enetc_pci_probe(pdev, KBUILD_MODNAME, sizeof(*pf)); if (err) { dev_err(&pdev->dev, "PCI probing failed\n"); @@ -1069,6 +1084,13 @@ static int enetc_pf_probe(struct pci_dev *pdev, goto err_map_pf_space; } + if (node && !of_device_is_available(node)) { + enetc_init_unused_port(si); + dev_info(&pdev->dev, "device is disabled, skipping\n"); + err = -ENODEV; + goto err_device_disabled; + } + pf = enetc_si_priv(si); pf->si = si; pf->total_vfs = pci_sriov_get_totalvfs(pdev); @@ -1151,6 +1173,7 @@ err_alloc_si_res: si->ndev = NULL; free_netdev(ndev); err_alloc_netdev: +err_device_disabled: err_map_pf_space: enetc_pci_remove(pdev); -- GitLab From 6d36ecdbc4410e61a0e02adc5d3abeee22a8ffd3 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 1 Mar 2021 13:18:13 +0200 Subject: [PATCH 090/839] net: enetc: take the MDIO lock only once per NAPI poll cycle The workaround for the ENETC MDIO erratum caused a performance degradation of 82 Kpps (seen with IP forwarding of two 1Gbps streams of 64B packets). This is due to excessive locking and unlocking in the fast path, which can be avoided. By taking the MDIO read-side lock only once per NAPI poll cycle, we are able to regain 54 Kpps (65%) of the performance hit. The rest of the performance degradation comes from the TX data path, but unfortunately it doesn't look like we can optimize that away easily, even with netdev_xmit_more(), there just isn't any skb batching done, to help with taking the MDIO lock less often than once per packet. We need to change the register accessor type for enetc_get_tx_tstamp, because it now runs under the enetc_lock_mdio as per the new call path detailed below: enetc_msix -> napi_schedule -> enetc_poll -> enetc_lock_mdio -> enetc_clean_tx_ring -> enetc_get_tx_tstamp -> enetc_clean_rx_ring -> enetc_unlock_mdio Fixes: fd5736bf9f23 ("enetc: Workaround for MDIO register access issue") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/enetc/enetc.c | 31 ++++++------------- .../net/ethernet/freescale/enetc/enetc_hw.h | 2 ++ 2 files changed, 11 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index eb45830a1667a..9bcceb74fb9c4 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -281,6 +281,8 @@ static int enetc_poll(struct napi_struct *napi, int budget) int work_done; int i; + enetc_lock_mdio(); + for (i = 0; i < v->count_tx_rings; i++) if (!enetc_clean_tx_ring(&v->tx_ring[i], budget)) complete = false; @@ -291,8 +293,10 @@ static int enetc_poll(struct napi_struct *napi, int budget) if (work_done) v->rx_napi_work = true; - if (!complete) + if (!complete) { + enetc_unlock_mdio(); return budget; + } napi_complete_done(napi, work_done); @@ -301,8 +305,6 @@ static int enetc_poll(struct napi_struct *napi, int budget) v->rx_napi_work = false; - enetc_lock_mdio(); - /* enable interrupts */ enetc_wr_reg_hot(v->rbier, ENETC_RBIER_RXTIE); @@ -327,8 +329,8 @@ static void enetc_get_tx_tstamp(struct enetc_hw *hw, union enetc_tx_bd *txbd, { u32 lo, hi, tstamp_lo; - lo = enetc_rd(hw, ENETC_SICTR0); - hi = enetc_rd(hw, ENETC_SICTR1); + lo = enetc_rd_hot(hw, ENETC_SICTR0); + hi = enetc_rd_hot(hw, ENETC_SICTR1); tstamp_lo = le32_to_cpu(txbd->wb.tstamp); if (lo <= tstamp_lo) hi -= 1; @@ -358,9 +360,7 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget) i = tx_ring->next_to_clean; tx_swbd = &tx_ring->tx_swbd[i]; - enetc_lock_mdio(); bds_to_clean = enetc_bd_ready_count(tx_ring, i); - enetc_unlock_mdio(); do_tstamp = false; @@ -403,8 +403,6 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget) tx_swbd = tx_ring->tx_swbd; } - enetc_lock_mdio(); - /* BD iteration loop end */ if (is_eof) { tx_frm_cnt++; @@ -415,8 +413,6 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget) if (unlikely(!bds_to_clean)) bds_to_clean = enetc_bd_ready_count(tx_ring, i); - - enetc_unlock_mdio(); } tx_ring->next_to_clean = i; @@ -660,8 +656,6 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring, u32 bd_status; u16 size; - enetc_lock_mdio(); - if (cleaned_cnt >= ENETC_RXBD_BUNDLE) { int count = enetc_refill_rx_ring(rx_ring, cleaned_cnt); @@ -672,19 +666,15 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring, rxbd = enetc_rxbd(rx_ring, i); bd_status = le32_to_cpu(rxbd->r.lstatus); - if (!bd_status) { - enetc_unlock_mdio(); + if (!bd_status) break; - } enetc_wr_reg_hot(rx_ring->idr, BIT(rx_ring->index)); dma_rmb(); /* for reading other rxbd fields */ size = le16_to_cpu(rxbd->r.buf_len); skb = enetc_map_rx_buff_to_skb(rx_ring, i, size); - if (!skb) { - enetc_unlock_mdio(); + if (!skb) break; - } enetc_get_offloads(rx_ring, rxbd, skb); @@ -696,7 +686,6 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring, if (unlikely(bd_status & ENETC_RXBD_LSTATUS(ENETC_RXBD_ERR_MASK))) { - enetc_unlock_mdio(); dev_kfree_skb(skb); while (!(bd_status & ENETC_RXBD_LSTATUS_F)) { dma_rmb(); @@ -736,8 +725,6 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring, enetc_process_skb(rx_ring, skb); - enetc_unlock_mdio(); - napi_gro_receive(napi, skb); rx_frm_cnt++; diff --git a/drivers/net/ethernet/freescale/enetc/enetc_hw.h b/drivers/net/ethernet/freescale/enetc/enetc_hw.h index c71fe8d751d50..8b54562f5da60 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_hw.h +++ b/drivers/net/ethernet/freescale/enetc/enetc_hw.h @@ -453,6 +453,8 @@ static inline u64 _enetc_rd_reg64_wa(void __iomem *reg) #define enetc_wr_reg(reg, val) _enetc_wr_reg_wa((reg), (val)) #define enetc_rd(hw, off) enetc_rd_reg((hw)->reg + (off)) #define enetc_wr(hw, off, val) enetc_wr_reg((hw)->reg + (off), val) +#define enetc_rd_hot(hw, off) enetc_rd_reg_hot((hw)->reg + (off)) +#define enetc_wr_hot(hw, off, val) enetc_wr_reg_hot((hw)->reg + (off), val) #define enetc_rd64(hw, off) _enetc_rd_reg64_wa((hw)->reg + (off)) /* port register accessors - PF only */ #define enetc_port_rd(hw, off) enetc_rd_reg((hw)->port + (off)) -- GitLab From 827b6fd046516af605e190c872949f22208b5d41 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 1 Mar 2021 13:18:14 +0200 Subject: [PATCH 091/839] net: enetc: fix incorrect TPID when receiving 802.1ad tagged packets When the enetc ports have rx-vlan-offload enabled, they report a TPID of ETH_P_8021Q regardless of what was actually in the packet. When rx-vlan-offload is disabled, packets have the proper TPID. Fix this inconsistency by finishing the TODO left in the code. Fixes: d4fd0404c1c9 ("enetc: Introduce basic PF and VF ENETC ethernet drivers") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/enetc/enetc.c | 34 ++++++++++++++----- .../net/ethernet/freescale/enetc/enetc_hw.h | 3 ++ 2 files changed, 29 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 9bcceb74fb9c4..8ddf0cdc37a5a 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -523,9 +523,8 @@ static void enetc_get_rx_tstamp(struct net_device *ndev, static void enetc_get_offloads(struct enetc_bdr *rx_ring, union enetc_rx_bd *rxbd, struct sk_buff *skb) { -#ifdef CONFIG_FSL_ENETC_PTP_CLOCK struct enetc_ndev_priv *priv = netdev_priv(rx_ring->ndev); -#endif + /* TODO: hashing */ if (rx_ring->ndev->features & NETIF_F_RXCSUM) { u16 inet_csum = le16_to_cpu(rxbd->r.inet_csum); @@ -534,12 +533,31 @@ static void enetc_get_offloads(struct enetc_bdr *rx_ring, skb->ip_summed = CHECKSUM_COMPLETE; } - /* copy VLAN to skb, if one is extracted, for now we assume it's a - * standard TPID, but HW also supports custom values - */ - if (le16_to_cpu(rxbd->r.flags) & ENETC_RXBD_FLAG_VLAN) - __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), - le16_to_cpu(rxbd->r.vlan_opt)); + if (le16_to_cpu(rxbd->r.flags) & ENETC_RXBD_FLAG_VLAN) { + __be16 tpid = 0; + + switch (le16_to_cpu(rxbd->r.flags) & ENETC_RXBD_FLAG_TPID) { + case 0: + tpid = htons(ETH_P_8021Q); + break; + case 1: + tpid = htons(ETH_P_8021AD); + break; + case 2: + tpid = htons(enetc_port_rd(&priv->si->hw, + ENETC_PCVLANR1)); + break; + case 3: + tpid = htons(enetc_port_rd(&priv->si->hw, + ENETC_PCVLANR2)); + break; + default: + break; + } + + __vlan_hwaccel_put_tag(skb, tpid, le16_to_cpu(rxbd->r.vlan_opt)); + } + #ifdef CONFIG_FSL_ENETC_PTP_CLOCK if (priv->active_offloads & ENETC_F_RX_TSTAMP) enetc_get_rx_tstamp(rx_ring->ndev, rxbd, skb); diff --git a/drivers/net/ethernet/freescale/enetc/enetc_hw.h b/drivers/net/ethernet/freescale/enetc/enetc_hw.h index 8b54562f5da60..a62604a1e54ed 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_hw.h +++ b/drivers/net/ethernet/freescale/enetc/enetc_hw.h @@ -172,6 +172,8 @@ enum enetc_bdr_type {TX, RX}; #define ENETC_PSIPMAR0(n) (0x0100 + (n) * 0x8) /* n = SI index */ #define ENETC_PSIPMAR1(n) (0x0104 + (n) * 0x8) #define ENETC_PVCLCTR 0x0208 +#define ENETC_PCVLANR1 0x0210 +#define ENETC_PCVLANR2 0x0214 #define ENETC_VLAN_TYPE_C BIT(0) #define ENETC_VLAN_TYPE_S BIT(1) #define ENETC_PVCLCTR_OVTPIDL(bmp) ((bmp) & 0xff) /* VLAN_TYPE */ @@ -570,6 +572,7 @@ union enetc_rx_bd { #define ENETC_RXBD_LSTATUS(flags) ((flags) << 16) #define ENETC_RXBD_FLAG_VLAN BIT(9) #define ENETC_RXBD_FLAG_TSTMP BIT(10) +#define ENETC_RXBD_FLAG_TPID GENMASK(1, 0) #define ENETC_MAC_ADDR_FILT_CNT 8 /* # of supported entries per port */ #define EMETC_MAC_ADDR_FILT_RES 3 /* # of reserved entries at the beginning */ -- GitLab From a74dbce9d4541888fe0d39afe69a3a95004669b4 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 1 Mar 2021 13:18:15 +0200 Subject: [PATCH 092/839] net: enetc: don't disable VLAN filtering in IFF_PROMISC mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Quoting from the blamed commit: In promiscuous mode, it is more intuitive that all traffic is received, including VLAN tagged traffic. It appears that it is necessary to set the flag in PSIPVMR for that to be the case, so VLAN promiscuous mode is also temporarily enabled. On exit from promiscuous mode, the setting made by ethtool is restored. Intuitive or not, there isn't any definition issued by a standards body which says that promiscuity has anything to do with VLAN filtering - it only has to do with accepting packets regardless of destination MAC address. In fact people are already trying to use this misunderstanding/bug of the enetc driver as a justification to transform promiscuity into something it never was about: accepting every packet (maybe that would be the "rx-all" netdev feature?): https://lore.kernel.org/netdev/20201110153958.ci5ekor3o2ekg3ky@ipetronik.com/ This is relevant because there are use cases in the kernel (such as tc-flower rules with the protocol 802.1Q and a vlan_id key) which do not (yet) use the vlan_vid_add API to be compatible with VLAN-filtering NICs such as enetc, so for those, disabling rx-vlan-filter is currently the only right solution to make these setups work: https://lore.kernel.org/netdev/CA+h21hoxwRdhq4y+w8Kwgm74d4cA0xLeiHTrmT-VpSaM7obhkg@mail.gmail.com/ The blamed patch has unintentionally introduced one more way for this to work, which is to enable IFF_PROMISC, however this is non-portable because port promiscuity is not meant to disable VLAN filtering. Therefore, it could invite people to write broken scripts for enetc, and then wonder why they are broken when migrating to other drivers that don't handle promiscuity in the same way. Fixes: 7070eea5e95a ("enetc: permit configuration of rx-vlan-filter with ethtool") Cc: Markus Blöchl Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/enetc/enetc_pf.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c index 62ba4bf56f0d9..49681a0566ed9 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c @@ -190,7 +190,6 @@ static void enetc_pf_set_rx_mode(struct net_device *ndev) { struct enetc_ndev_priv *priv = netdev_priv(ndev); struct enetc_pf *pf = enetc_si_priv(priv->si); - char vlan_promisc_simap = pf->vlan_promisc_simap; struct enetc_hw *hw = &priv->si->hw; bool uprom = false, mprom = false; struct enetc_mac_filter *filter; @@ -203,16 +202,12 @@ static void enetc_pf_set_rx_mode(struct net_device *ndev) psipmr = ENETC_PSIPMR_SET_UP(0) | ENETC_PSIPMR_SET_MP(0); uprom = true; mprom = true; - /* Enable VLAN promiscuous mode for SI0 (PF) */ - vlan_promisc_simap |= BIT(0); } else if (ndev->flags & IFF_ALLMULTI) { /* enable multi cast promisc mode for SI0 (PF) */ psipmr = ENETC_PSIPMR_SET_MP(0); mprom = true; } - enetc_set_vlan_promisc(&pf->si->hw, vlan_promisc_simap); - /* first 2 filter entries belong to PF */ if (!uprom) { /* Update unicast filters */ -- GitLab From c76a97218dcbb2cb7cec1404ace43ef96c87d874 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 1 Mar 2021 13:18:16 +0200 Subject: [PATCH 093/839] net: enetc: force the RGMII speed and duplex instead of operating in inband mode The ENETC port 0 MAC supports in-band status signaling coming from a PHY when operating in RGMII mode, and this feature is enabled by default. It has been reported that RGMII is broken in fixed-link, and that is not surprising considering the fact that no PHY is attached to the MAC in that case, but a switch. This brings us to the topic of the patch: the enetc driver should have not enabled the optional in-band status signaling for RGMII unconditionally, but should have forced the speed and duplex to what was resolved by phylink. Note that phylink does not accept the RGMII modes as valid for in-band signaling, and these operate a bit differently than 1000base-x and SGMII (notably there is no clause 37 state machine so no ACK required from the MAC, instead the PHY sends extra code words on RXD[3:0] whenever it is not transmitting something else, so it should be safe to leave a PHY with this option unconditionally enabled even if we ignore it). The spec talks about this here: https://e2e.ti.com/cfs-file/__key/communityserver-discussions-components-files/138/RGMIIv1_5F00_3.pdf Fixes: 71b77a7a27a3 ("enetc: Migrate to PHYLINK and PCS_LYNX") Cc: Florian Fainelli Cc: Andrew Lunn Cc: Russell King Signed-off-by: Vladimir Oltean Acked-by: Russell King Signed-off-by: David S. Miller --- .../net/ethernet/freescale/enetc/enetc_hw.h | 13 +++-- .../net/ethernet/freescale/enetc/enetc_pf.c | 53 ++++++++++++++++--- 2 files changed, 56 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_hw.h b/drivers/net/ethernet/freescale/enetc/enetc_hw.h index a62604a1e54ed..de0d20b0f489c 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_hw.h +++ b/drivers/net/ethernet/freescale/enetc/enetc_hw.h @@ -238,10 +238,17 @@ enum enetc_bdr_type {TX, RX}; #define ENETC_PM_IMDIO_BASE 0x8030 #define ENETC_PM0_IF_MODE 0x8300 -#define ENETC_PMO_IFM_RG BIT(2) +#define ENETC_PM0_IFM_RG BIT(2) #define ENETC_PM0_IFM_RLP (BIT(5) | BIT(11)) -#define ENETC_PM0_IFM_RGAUTO (BIT(15) | ENETC_PMO_IFM_RG | BIT(1)) -#define ENETC_PM0_IFM_XGMII BIT(12) +#define ENETC_PM0_IFM_EN_AUTO BIT(15) +#define ENETC_PM0_IFM_SSP_MASK GENMASK(14, 13) +#define ENETC_PM0_IFM_SSP_1000 (2 << 13) +#define ENETC_PM0_IFM_SSP_100 (0 << 13) +#define ENETC_PM0_IFM_SSP_10 (1 << 13) +#define ENETC_PM0_IFM_FULL_DPX BIT(12) +#define ENETC_PM0_IFM_IFMODE_MASK GENMASK(1, 0) +#define ENETC_PM0_IFM_IFMODE_XGMII 0 +#define ENETC_PM0_IFM_IFMODE_GMII 2 #define ENETC_PSIDCAPR 0x1b08 #define ENETC_PSIDCAPR_MSK GENMASK(15, 0) #define ENETC_PSFCAPR 0x1b18 diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c index 49681a0566ed9..ca02f033bea21 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c @@ -315,7 +315,7 @@ static void enetc_set_loopback(struct net_device *ndev, bool en) u32 reg; reg = enetc_port_rd(hw, ENETC_PM0_IF_MODE); - if (reg & ENETC_PMO_IFM_RG) { + if (reg & ENETC_PM0_IFM_RG) { /* RGMII mode */ reg = (reg & ~ENETC_PM0_IFM_RLP) | (en ? ENETC_PM0_IFM_RLP : 0); @@ -494,13 +494,20 @@ static void enetc_configure_port_mac(struct enetc_hw *hw) static void enetc_mac_config(struct enetc_hw *hw, phy_interface_t phy_mode) { - /* set auto-speed for RGMII */ - if (enetc_port_rd(hw, ENETC_PM0_IF_MODE) & ENETC_PMO_IFM_RG || - phy_interface_mode_is_rgmii(phy_mode)) - enetc_port_wr(hw, ENETC_PM0_IF_MODE, ENETC_PM0_IFM_RGAUTO); + u32 val; + + if (phy_interface_mode_is_rgmii(phy_mode)) { + val = enetc_port_rd(hw, ENETC_PM0_IF_MODE); + val &= ~ENETC_PM0_IFM_EN_AUTO; + val &= ENETC_PM0_IFM_IFMODE_MASK; + val |= ENETC_PM0_IFM_IFMODE_GMII | ENETC_PM0_IFM_RG; + enetc_port_wr(hw, ENETC_PM0_IF_MODE, val); + } - if (phy_mode == PHY_INTERFACE_MODE_USXGMII) - enetc_port_wr(hw, ENETC_PM0_IF_MODE, ENETC_PM0_IFM_XGMII); + if (phy_mode == PHY_INTERFACE_MODE_USXGMII) { + val = ENETC_PM0_IFM_FULL_DPX | ENETC_PM0_IFM_IFMODE_XGMII; + enetc_port_wr(hw, ENETC_PM0_IF_MODE, val); + } } static void enetc_mac_enable(struct enetc_hw *hw, bool en) @@ -932,6 +939,34 @@ static void enetc_pl_mac_config(struct phylink_config *config, phylink_set_pcs(priv->phylink, &pf->pcs->pcs); } +static void enetc_force_rgmii_mac(struct enetc_hw *hw, int speed, int duplex) +{ + u32 old_val, val; + + old_val = val = enetc_port_rd(hw, ENETC_PM0_IF_MODE); + + if (speed == SPEED_1000) { + val &= ~ENETC_PM0_IFM_SSP_MASK; + val |= ENETC_PM0_IFM_SSP_1000; + } else if (speed == SPEED_100) { + val &= ~ENETC_PM0_IFM_SSP_MASK; + val |= ENETC_PM0_IFM_SSP_100; + } else if (speed == SPEED_10) { + val &= ~ENETC_PM0_IFM_SSP_MASK; + val |= ENETC_PM0_IFM_SSP_10; + } + + if (duplex == DUPLEX_FULL) + val |= ENETC_PM0_IFM_FULL_DPX; + else + val &= ~ENETC_PM0_IFM_FULL_DPX; + + if (val == old_val) + return; + + enetc_port_wr(hw, ENETC_PM0_IF_MODE, val); +} + static void enetc_pl_mac_link_up(struct phylink_config *config, struct phy_device *phy, unsigned int mode, phy_interface_t interface, int speed, @@ -944,6 +979,10 @@ static void enetc_pl_mac_link_up(struct phylink_config *config, if (priv->active_offloads & ENETC_F_QBV) enetc_sched_speed_set(priv, speed); + if (!phylink_autoneg_inband(mode) && + phy_interface_mode_is_rgmii(interface)) + enetc_force_rgmii_mac(&pf->si->hw, speed, duplex); + enetc_mac_enable(&pf->si->hw, true); } -- GitLab From 96a5223b918c8b79270fc0fec235a7ebad459098 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 1 Mar 2021 13:18:17 +0200 Subject: [PATCH 094/839] net: enetc: remove bogus write to SIRXIDR from enetc_setup_rxbdr The Station Interface Receive Interrupt Detect Register (SIRXIDR) contains a 16-bit wide mask of 'interrupt detected' events for each ring associated with a port. Bit i is write-1-to-clean for RX ring i. I have no explanation whatsoever how this line of code came to be inserted in the blamed commit. I checked the downstream versions of that patch and none of them have it. The somewhat comical aspect of it is that we're writing a binary number to the SIRXIDR register, which is derived from enetc_bd_unused(rx_ring). Since the RX rings have 512 buffer descriptors, we end up writing 511 to this register, which is 0x1ff, so we are effectively clearing the 'interrupt detected' event for rings 0-8. This register is not what is used for interrupt handling though - it only provides a summary for the entire SI. The hardware provides one separate Interrupt Detect Register per RX ring, which auto-clears upon read. So there doesn't seem to be any adverse effect caused by this bogus write. There is, however, one reason why this should be handled as a bugfix: next_to_clean _should_ be committed to hardware, just not to that register, and this was obscuring the fact that it wasn't. This is fixed in the next patch, and removing the bogus line now allows the fix patch to be backported beyond that point. Fixes: fd5736bf9f23 ("enetc: Workaround for MDIO register access issue") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/enetc/enetc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 8ddf0cdc37a5a..abb29ee814637 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -1212,7 +1212,6 @@ static void enetc_setup_rxbdr(struct enetc_hw *hw, struct enetc_bdr *rx_ring) rx_ring->idr = hw->reg + ENETC_SIRXIDR; enetc_refill_rx_ring(rx_ring, enetc_bd_unused(rx_ring)); - enetc_wr(hw, ENETC_SIRXIDR, rx_ring->next_to_use); /* enable ring */ enetc_rxbdr_wr(hw, idx, ENETC_RBMR, rbmr); -- GitLab From 3a5d12c9be6f30080600c8bacaf310194e37d029 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 1 Mar 2021 13:18:18 +0200 Subject: [PATCH 095/839] net: enetc: keep RX ring consumer index in sync with hardware The RX rings have a producer index owned by hardware, where newly received frame buffers are placed, and a consumer index owned by software, where newly allocated buffers are placed, in expectation of hardware being able to place frame data in them. Hardware increments the producer index when a frame is received, however it is not allowed to increment the producer index to match the consumer index (RBCIR) since the ring can hold at most RBLENR[LENGTH]-1 received BDs. Whenever the producer index matches the value of the consumer index, the ring has no unprocessed received frames and all BDs in the ring have been initialized/prepared by software, i.e. hardware owns all BDs in the ring. The code uses the next_to_clean variable to keep track of the producer index, and the next_to_use variable to keep track of the consumer index. The RX rings are seeded from enetc_refill_rx_ring, which is called from two places: 1. initially the ring is seeded until full with enetc_bd_unused(rx_ring), i.e. with 511 buffers. This will make next_to_clean=0 and next_to_use=511: .ndo_open -> enetc_open -> enetc_setup_bdrs -> enetc_setup_rxbdr -> enetc_refill_rx_ring 2. then during the data path processing, it is refilled with 16 buffers at a time: enetc_msix -> napi_schedule -> enetc_poll -> enetc_clean_rx_ring -> enetc_refill_rx_ring There is just one problem: the initial seeding done during .ndo_open updates just the producer index (ENETC_RBPIR) with 0, and the software next_to_clean and next_to_use variables. Notably, it will not update the consumer index to make the hardware aware of the newly added buffers. Wait, what? So how does it work? Well, the reset values of the producer index and of the consumer index of a ring are both zero. As per the description in the second paragraph, it means that the ring is full of buffers waiting for hardware to put frames in them, which by coincidence is almost true, because we have in fact seeded 511 buffers into the ring. But will the hardware attempt to access the 512th entry of the ring, which has an invalid BD in it? Well, no, because in order to do that, it would have to first populate the first 511 entries, and the NAPI enetc_poll will kick in by then. Eventually, after 16 processed slots have become available in the RX ring, enetc_clean_rx_ring will call enetc_refill_rx_ring and then will [ finally ] update the consumer index with the new software next_to_use variable. From now on, the next_to_clean and next_to_use variables are in sync with the producer and consumer ring indices. So the day is saved, right? Well, not quite. Freeing the memory allocated for the rings is done in: enetc_close -> enetc_clear_bdrs -> enetc_clear_rxbdr -> this just disables the ring -> enetc_free_rxtx_rings -> enetc_free_rx_ring -> sets next_to_clean and next_to_use to 0 but again, nothing is committed to the hardware producer and consumer indices (yay!). The assumption is that the ring is disabled, so the indices don't matter anyway, and it's the responsibility of the "open" code path to set those up. .. Except that the "open" code path does not set those up properly. While initially, things almost work, during subsequent enetc_close -> enetc_open sequences, we have problems. To be precise, the enetc_open that is subsequent to enetc_close will again refill the ring with 511 entries, but it will leave the consumer index untouched. Untouched means, of course, equal to the value it had before disabling the ring and draining the old buffers in enetc_close. But as mentioned, enetc_setup_rxbdr will at least update the producer index though, through this line of code: enetc_rxbdr_wr(hw, idx, ENETC_RBPIR, 0); so at this stage we'll have: next_to_clean=0 (in hardware 0) next_to_use=511 (in hardware we'll have the refill index prior to enetc_close) Again, the next_to_clean and producer index are in sync and set to correct values, so the driver manages to limp on. Eventually, 16 ring entries will be consumed by enetc_poll, and the savior enetc_clean_rx_ring will come and call enetc_refill_rx_ring, and then update the hardware consumer ring based upon the new next_to_use. So.. it works? Well, by coincidence, it almost does, but there's a circumstance where enetc_clean_rx_ring won't be there to save us. If the previous value of the consumer index was 15, there's a problem, because the NAPI poll sequence will only issue a refill when 16 or more buffers have been consumed. It's easiest to illustrate this with an example: ip link set eno0 up ip addr add 192.168.100.1/24 dev eno0 ping 192.168.100.1 -c 20 # ping this port from another board ip link set eno0 down ip link set eno0 up ping 192.168.100.1 -c 20 # ping it again from the same other board One by one: 1. ip link set eno0 up -> calls enetc_setup_rxbdr: -> calls enetc_refill_rx_ring(511 buffers) -> next_to_clean=0 (in hw 0) -> next_to_use=511 (in hw 0) 2. ping 192.168.100.1 -c 20 # ping this port from another board enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=1 next_to_clean 0 (in hw 1) next_to_use 511 (in hw 0) enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=2 next_to_clean 1 (in hw 2) next_to_use 511 (in hw 0) enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=3 next_to_clean 2 (in hw 3) next_to_use 511 (in hw 0) enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=4 next_to_clean 3 (in hw 4) next_to_use 511 (in hw 0) enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=5 next_to_clean 4 (in hw 5) next_to_use 511 (in hw 0) enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=6 next_to_clean 5 (in hw 6) next_to_use 511 (in hw 0) enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=7 next_to_clean 6 (in hw 7) next_to_use 511 (in hw 0) enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=8 next_to_clean 7 (in hw 8) next_to_use 511 (in hw 0) enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=9 next_to_clean 8 (in hw 9) next_to_use 511 (in hw 0) enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=10 next_to_clean 9 (in hw 10) next_to_use 511 (in hw 0) enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=11 next_to_clean 10 (in hw 11) next_to_use 511 (in hw 0) enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=12 next_to_clean 11 (in hw 12) next_to_use 511 (in hw 0) enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=13 next_to_clean 12 (in hw 13) next_to_use 511 (in hw 0) enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=14 next_to_clean 13 (in hw 14) next_to_use 511 (in hw 0) enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=15 next_to_clean 14 (in hw 15) next_to_use 511 (in hw 0) enetc_clean_rx_ring: enetc_refill_rx_ring(16) increments next_to_use by 16 (mod 512) and writes it to hw enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=0 next_to_clean 15 (in hw 16) next_to_use 15 (in hw 15) enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=1 next_to_clean 16 (in hw 17) next_to_use 15 (in hw 15) enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=2 next_to_clean 17 (in hw 18) next_to_use 15 (in hw 15) enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=3 next_to_clean 18 (in hw 19) next_to_use 15 (in hw 15) enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=4 next_to_clean 19 (in hw 20) next_to_use 15 (in hw 15) enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=5 next_to_clean 20 (in hw 21) next_to_use 15 (in hw 15) enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=6 next_to_clean 21 (in hw 22) next_to_use 15 (in hw 15) 20 packets transmitted, 20 packets received, 0% packet loss 3. ip link set eno0 down enetc_free_rx_ring: next_to_clean 0 (in hw 22), next_to_use 0 (in hw 15) 4. ip link set eno0 up -> calls enetc_setup_rxbdr: -> calls enetc_refill_rx_ring(511 buffers) -> next_to_clean=0 (in hw 0) -> next_to_use=511 (in hw 15) 5. ping 192.168.100.1 -c 20 # ping it again from the same other board enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=1 next_to_clean 0 (in hw 1) next_to_use 511 (in hw 15) enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=2 next_to_clean 1 (in hw 2) next_to_use 511 (in hw 15) enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=3 next_to_clean 2 (in hw 3) next_to_use 511 (in hw 15) enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=4 next_to_clean 3 (in hw 4) next_to_use 511 (in hw 15) enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=5 next_to_clean 4 (in hw 5) next_to_use 511 (in hw 15) enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=6 next_to_clean 5 (in hw 6) next_to_use 511 (in hw 15) enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=7 next_to_clean 6 (in hw 7) next_to_use 511 (in hw 15) enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=8 next_to_clean 7 (in hw 8) next_to_use 511 (in hw 15) enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=9 next_to_clean 8 (in hw 9) next_to_use 511 (in hw 15) enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=10 next_to_clean 9 (in hw 10) next_to_use 511 (in hw 15) enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=11 next_to_clean 10 (in hw 11) next_to_use 511 (in hw 15) enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=12 next_to_clean 11 (in hw 12) next_to_use 511 (in hw 15) enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=13 next_to_clean 12 (in hw 13) next_to_use 511 (in hw 15) enetc_clean_rx_ring: rx_frm_cnt=1 cleaned_cnt=14 next_to_clean 13 (in hw 14) next_to_use 511 (in hw 15) 20 packets transmitted, 12 packets received, 40% packet loss And there it dies. No enetc_refill_rx_ring (because cleaned_cnt must be equal to 15 for that to happen), no nothing. The hardware enters the condition where the producer (14) + 1 is equal to the consumer (15) index, which makes it believe it has no more free buffers to put packets in, so it starts discarding them: ip netns exec ns0 ethtool -S eno0 | grep -v ': 0' NIC statistics: Rx ring 0 discarded frames: 8 Summarized, if the interface receives between 16 and 32 (mod 512) frames and then there is a link flap, then the port will eventually die with no way to recover. If it receives less than 16 (mod 512) frames, then the initial NAPI poll [ before the link flap ] will not update the consumer index in hardware (it will remain zero) which will be ok when the buffers are later reinitialized. If more than 32 (mod 512) frames are received, the initial NAPI poll has the chance to refill the ring twice, updating the consumer index to at least 32. So after the link flap, the consumer index is still wrong, but the post-flap NAPI poll gets a chance to refill the ring once (because it passes through cleaned_cnt=15) and makes the consumer index be again back in sync with next_to_use. The solution to this problem is actually simple, we just need to write next_to_use into the hardware consumer index at enetc_open time, which always brings it back in sync after an initial buffer seeding process. The simpler thing would be to put the write to the consumer index into enetc_refill_rx_ring directly, but there are issues with the MDIO locking: in the NAPI poll code we have the enetc_lock_mdio() taken from top-level and we use the unlocked enetc_wr_reg_hot, whereas in enetc_open, the enetc_lock_mdio() is not taken at the top level, but instead by each individual enetc_wr_reg, so we are forced to put an additional enetc_wr_reg in enetc_setup_rxbdr. Better organization of the code is left as a refactoring exercise. Fixes: d4fd0404c1c9 ("enetc: Introduce basic PF and VF ENETC ethernet drivers") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/enetc/enetc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index abb29ee814637..30d7d4e83900b 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -1212,6 +1212,8 @@ static void enetc_setup_rxbdr(struct enetc_hw *hw, struct enetc_bdr *rx_ring) rx_ring->idr = hw->reg + ENETC_SIRXIDR; enetc_refill_rx_ring(rx_ring, enetc_bd_unused(rx_ring)); + /* update ENETC's consumer index */ + enetc_rxbdr_wr(hw, idx, ENETC_RBCIR, rx_ring->next_to_use); /* enable ring */ enetc_rxbdr_wr(hw, idx, ENETC_RBMR, rbmr); -- GitLab From 2353db75c3db1dd26ff9c8feccfd3543a9cb73be Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Mon, 1 Mar 2021 21:28:23 +0900 Subject: [PATCH 096/839] docs: networking: bonding.rst Fix a typo in bonding.rst This patch fixes a spelling typo in bonding.rst. Signed-off-by: Masanari Iida Signed-off-by: David S. Miller --- Documentation/networking/bonding.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/networking/bonding.rst b/Documentation/networking/bonding.rst index 5f690f0ad0e4f..62f2aab8eaec0 100644 --- a/Documentation/networking/bonding.rst +++ b/Documentation/networking/bonding.rst @@ -1988,7 +1988,7 @@ netif_carrier. If use_carrier is 0, then the MII monitor will first query the device's (via ioctl) MII registers and check the link state. If that request fails (not just that it returns carrier down), then the MII -monitor will make an ethtool ETHOOL_GLINK request to attempt to obtain +monitor will make an ethtool ETHTOOL_GLINK request to attempt to obtain the same information. If both methods fail (i.e., the driver either does not support or had some error in processing both the MII register and ethtool requests), then the MII monitor will assume the link is -- GitLab From b228c9b058760500fda5edb3134527f629fc2dc3 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 1 Mar 2021 15:09:44 +0000 Subject: [PATCH 097/839] net: expand textsearch ts_state to fit skb_seq_state The referenced commit expands the skb_seq_state used by skb_find_text with a 4B frag_off field, growing it to 48B. This exceeds container ts_state->cb, causing a stack corruption: [ 73.238353] Kernel panic - not syncing: stack-protector: Kernel stack is corrupted in: skb_find_text+0xc5/0xd0 [ 73.247384] CPU: 1 PID: 376 Comm: nping Not tainted 5.11.0+ #4 [ 73.252613] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014 [ 73.260078] Call Trace: [ 73.264677] dump_stack+0x57/0x6a [ 73.267866] panic+0xf6/0x2b7 [ 73.270578] ? skb_find_text+0xc5/0xd0 [ 73.273964] __stack_chk_fail+0x10/0x10 [ 73.277491] skb_find_text+0xc5/0xd0 [ 73.280727] string_mt+0x1f/0x30 [ 73.283639] ipt_do_table+0x214/0x410 The struct is passed between skb_find_text and its callbacks skb_prepare_seq_read, skb_seq_read and skb_abort_seq read through the textsearch interface using TS_SKB_CB. I assumed that this mapped to skb->cb like other .._SKB_CB wrappers. skb->cb is 48B. But it maps to ts_state->cb, which is only 40B. skb->cb was increased from 40B to 48B after ts_state was introduced, in commit 3e3850e989c5 ("[NETFILTER]: Fix xfrm lookup in ip_route_me_harder/ip6_route_me_harder"). Increase ts_state.cb[] to 48 to fit the struct. Also add a BUILD_BUG_ON to avoid a repeat. The alternative is to directly add a dependency from textsearch onto linux/skbuff.h, but I think the intent is textsearch to have no such dependencies on its callers. Link: https://bugzilla.kernel.org/show_bug.cgi?id=211911 Fixes: 97550f6fa592 ("net: compound page support in skb_seq_read") Reported-by: Kris Karas Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/textsearch.h | 2 +- net/core/skbuff.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/include/linux/textsearch.h b/include/linux/textsearch.h index 13770cfe33ad8..6673e4d4ac2e1 100644 --- a/include/linux/textsearch.h +++ b/include/linux/textsearch.h @@ -23,7 +23,7 @@ struct ts_config; struct ts_state { unsigned int offset; - char cb[40]; + char cb[48]; }; /** diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 545a472273a50..c421c8f809256 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3659,6 +3659,8 @@ unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, struct ts_state state; unsigned int ret; + BUILD_BUG_ON(sizeof(struct skb_seq_state) > sizeof(state.cb)); + config->get_next_block = skb_ts_get_next_block; config->finish = skb_ts_finish; -- GitLab From 9200f515c41f4cbaeffd8fdd1d8b6373a18b1b67 Mon Sep 17 00:00:00 2001 From: DENG Qingfang Date: Tue, 2 Mar 2021 00:01:59 +0800 Subject: [PATCH 098/839] net: dsa: tag_mtk: fix 802.1ad VLAN egress A different TPID bit is used for 802.1ad VLAN frames. Reported-by: Ilario Gelmetti Fixes: f0af34317f4b ("net: dsa: mediatek: combine MediaTek tag with VLAN tag") Signed-off-by: DENG Qingfang Signed-off-by: David S. Miller --- net/dsa/tag_mtk.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c index 38dcdded74c05..59748487664fe 100644 --- a/net/dsa/tag_mtk.c +++ b/net/dsa/tag_mtk.c @@ -13,6 +13,7 @@ #define MTK_HDR_LEN 4 #define MTK_HDR_XMIT_UNTAGGED 0 #define MTK_HDR_XMIT_TAGGED_TPID_8100 1 +#define MTK_HDR_XMIT_TAGGED_TPID_88A8 2 #define MTK_HDR_RECV_SOURCE_PORT_MASK GENMASK(2, 0) #define MTK_HDR_XMIT_DP_BIT_MASK GENMASK(5, 0) #define MTK_HDR_XMIT_SA_DIS BIT(6) @@ -21,8 +22,8 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb, struct net_device *dev) { struct dsa_port *dp = dsa_slave_to_port(dev); + u8 xmit_tpid; u8 *mtk_tag; - bool is_vlan_skb = true; unsigned char *dest = eth_hdr(skb)->h_dest; bool is_multicast_skb = is_multicast_ether_addr(dest) && !is_broadcast_ether_addr(dest); @@ -33,10 +34,17 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb, * the both special and VLAN tag at the same time and then look up VLAN * table with VID. */ - if (!skb_vlan_tagged(skb)) { + switch (skb->protocol) { + case htons(ETH_P_8021Q): + xmit_tpid = MTK_HDR_XMIT_TAGGED_TPID_8100; + break; + case htons(ETH_P_8021AD): + xmit_tpid = MTK_HDR_XMIT_TAGGED_TPID_88A8; + break; + default: + xmit_tpid = MTK_HDR_XMIT_UNTAGGED; skb_push(skb, MTK_HDR_LEN); memmove(skb->data, skb->data + MTK_HDR_LEN, 2 * ETH_ALEN); - is_vlan_skb = false; } mtk_tag = skb->data + 2 * ETH_ALEN; @@ -44,8 +52,7 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb, /* Mark tag attribute on special tag insertion to notify hardware * whether that's a combined special tag with 802.1Q header. */ - mtk_tag[0] = is_vlan_skb ? MTK_HDR_XMIT_TAGGED_TPID_8100 : - MTK_HDR_XMIT_UNTAGGED; + mtk_tag[0] = xmit_tpid; mtk_tag[1] = (1 << dp->index) & MTK_HDR_XMIT_DP_BIT_MASK; /* Disable SA learning for multicast frames */ @@ -53,7 +60,7 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb, mtk_tag[1] |= MTK_HDR_XMIT_SA_DIS; /* Tag control information is kept for 802.1Q */ - if (!is_vlan_skb) { + if (xmit_tpid == MTK_HDR_XMIT_UNTAGGED) { mtk_tag[2] = 0; mtk_tag[3] = 0; } -- GitLab From 3946688edbc5b629110c339b3babf10aa9e7adad Mon Sep 17 00:00:00 2001 From: "Andrea Parri (Microsoft)" Date: Mon, 1 Mar 2021 19:25:30 +0100 Subject: [PATCH 099/839] hv_netvsc: Fix validation in netvsc_linkstatus_callback() Contrary to the RNDIS protocol specification, certain (pre-Fe) implementations of Hyper-V's vSwitch did not account for the status buffer field in the length of an RNDIS packet; the bug was fixed in newer implementations. Validate the status buffer fields using the length of the 'vmtransfer_page' packet (all implementations), that is known/validated to be less than or equal to the receive section size and not smaller than the length of the RNDIS message. Reported-by: Dexuan Cui Suggested-by: Haiyang Zhang Signed-off-by: Andrea Parri (Microsoft) Fixes: 505e3f00c3f36 ("hv_netvsc: Add (more) validation for untrusted Hyper-V values") Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 2 +- drivers/net/hyperv/netvsc_drv.c | 13 +++++++++---- drivers/net/hyperv/rndis_filter.c | 2 +- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index e1a497d3c9ba4..59ac04a610adb 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -229,7 +229,7 @@ int netvsc_send(struct net_device *net, bool xdp_tx); void netvsc_linkstatus_callback(struct net_device *net, struct rndis_message *resp, - void *data); + void *data, u32 data_buflen); int netvsc_recv_callback(struct net_device *net, struct netvsc_device *nvdev, struct netvsc_channel *nvchan); diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 8176fa0c8b168..15f262b70489e 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -744,7 +744,7 @@ static netdev_tx_t netvsc_start_xmit(struct sk_buff *skb, */ void netvsc_linkstatus_callback(struct net_device *net, struct rndis_message *resp, - void *data) + void *data, u32 data_buflen) { struct rndis_indicate_status *indicate = &resp->msg.indicate_status; struct net_device_context *ndev_ctx = netdev_priv(net); @@ -765,11 +765,16 @@ void netvsc_linkstatus_callback(struct net_device *net, if (indicate->status == RNDIS_STATUS_LINK_SPEED_CHANGE) { u32 speed; - /* Validate status_buf_offset */ + /* Validate status_buf_offset and status_buflen. + * + * Certain (pre-Fe) implementations of Hyper-V's vSwitch didn't account + * for the status buffer field in resp->msg_len; perform the validation + * using data_buflen (>= resp->msg_len). + */ if (indicate->status_buflen < sizeof(speed) || indicate->status_buf_offset < sizeof(*indicate) || - resp->msg_len - RNDIS_HEADER_SIZE < indicate->status_buf_offset || - resp->msg_len - RNDIS_HEADER_SIZE - indicate->status_buf_offset + data_buflen - RNDIS_HEADER_SIZE < indicate->status_buf_offset || + data_buflen - RNDIS_HEADER_SIZE - indicate->status_buf_offset < indicate->status_buflen) { netdev_err(net, "invalid rndis_indicate_status packet\n"); return; diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 123cc9d25f5ed..c0e89e107d575 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -620,7 +620,7 @@ int rndis_filter_receive(struct net_device *ndev, case RNDIS_MSG_INDICATE: /* notification msgs */ - netvsc_linkstatus_callback(ndev, rndis_msg, data); + netvsc_linkstatus_callback(ndev, rndis_msg, data, buflen); break; default: netdev_err(ndev, -- GitLab From 8811f4a9836e31c14ecdf79d9f3cb7c5d463265d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 1 Mar 2021 10:29:17 -0800 Subject: [PATCH 100/839] tcp: add sanity tests to TCP_QUEUE_SEQ Qingyu Li reported a syzkaller bug where the repro changes RCV SEQ _after_ restoring data in the receive queue. mprotect(0x4aa000, 12288, PROT_READ) = 0 mmap(0x1ffff000, 4096, PROT_NONE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x1ffff000 mmap(0x20000000, 16777216, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x20000000 mmap(0x21000000, 4096, PROT_NONE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x21000000 socket(AF_INET6, SOCK_STREAM, IPPROTO_IP) = 3 setsockopt(3, SOL_TCP, TCP_REPAIR, [1], 4) = 0 connect(3, {sa_family=AF_INET6, sin6_port=htons(0), sin6_flowinfo=htonl(0), inet_pton(AF_INET6, "::1", &sin6_addr), sin6_scope_id=0}, 28) = 0 setsockopt(3, SOL_TCP, TCP_REPAIR_QUEUE, [1], 4) = 0 sendmsg(3, {msg_name=NULL, msg_namelen=0, msg_iov=[{iov_base="0x0000000000000003\0\0", iov_len=20}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 20 setsockopt(3, SOL_TCP, TCP_REPAIR, [0], 4) = 0 setsockopt(3, SOL_TCP, TCP_QUEUE_SEQ, [128], 4) = 0 recvfrom(3, NULL, 20, 0, NULL, NULL) = -1 ECONNRESET (Connection reset by peer) syslog shows: [ 111.205099] TCP recvmsg seq # bug 2: copied 80, seq 0, rcvnxt 80, fl 0 [ 111.207894] WARNING: CPU: 1 PID: 356 at net/ipv4/tcp.c:2343 tcp_recvmsg_locked+0x90e/0x29a0 This should not be allowed. TCP_QUEUE_SEQ should only be used when queues are empty. This patch fixes this case, and the tx path as well. Fixes: ee9952831cfd ("tcp: Initial repair mode") Signed-off-by: Eric Dumazet Cc: Pavel Emelyanov Link: https://bugzilla.kernel.org/show_bug.cgi?id=212005 Reported-by: Qingyu Li Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index dfb6f286c1de9..de7cc8445ac03 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3469,16 +3469,23 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int optname, break; case TCP_QUEUE_SEQ: - if (sk->sk_state != TCP_CLOSE) + if (sk->sk_state != TCP_CLOSE) { err = -EPERM; - else if (tp->repair_queue == TCP_SEND_QUEUE) - WRITE_ONCE(tp->write_seq, val); - else if (tp->repair_queue == TCP_RECV_QUEUE) { - WRITE_ONCE(tp->rcv_nxt, val); - WRITE_ONCE(tp->copied_seq, val); - } - else + } else if (tp->repair_queue == TCP_SEND_QUEUE) { + if (!tcp_rtx_queue_empty(sk)) + err = -EPERM; + else + WRITE_ONCE(tp->write_seq, val); + } else if (tp->repair_queue == TCP_RECV_QUEUE) { + if (tp->rcv_nxt != tp->copied_seq) { + err = -EPERM; + } else { + WRITE_ONCE(tp->rcv_nxt, val); + WRITE_ONCE(tp->copied_seq, val); + } + } else { err = -EINVAL; + } break; case TCP_REPAIR_OPTIONS: -- GitLab From 42a382a466a967dc053c73b969cd2ac2fec502cf Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Sat, 27 Feb 2021 06:17:26 +0100 Subject: [PATCH 101/839] selftests/bpf: Use the last page in test_snprintf_btf on s390 test_snprintf_btf fails on s390, because NULL points to a readable struct lowcore there. Fix by using the last page instead. Error message example: printing fffffffffffff000 should generate error, got (361) Fixes: 076a95f5aff2 ("selftests/bpf: Add bpf_snprintf_btf helper tests") Signed-off-by: Ilya Leoshkevich Signed-off-by: Daniel Borkmann Acked-by: Heiko Carstens Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210227051726.121256-1-iii@linux.ibm.com --- .../testing/selftests/bpf/progs/netif_receive_skb.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/netif_receive_skb.c b/tools/testing/selftests/bpf/progs/netif_receive_skb.c index 6b670039ea679..1d8918dfbd3ff 100644 --- a/tools/testing/selftests/bpf/progs/netif_receive_skb.c +++ b/tools/testing/selftests/bpf/progs/netif_receive_skb.c @@ -16,6 +16,13 @@ bool skip = false; #define STRSIZE 2048 #define EXPECTED_STRSIZE 256 +#if defined(bpf_target_s390) +/* NULL points to a readable struct lowcore on s390, so take the last page */ +#define BADPTR ((void *)0xFFFFFFFFFFFFF000ULL) +#else +#define BADPTR 0 +#endif + #ifndef ARRAY_SIZE #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) #endif @@ -113,11 +120,11 @@ int BPF_PROG(trace_netif_receive_skb, struct sk_buff *skb) } /* Check invalid ptr value */ - p.ptr = 0; + p.ptr = BADPTR; __ret = bpf_snprintf_btf(str, STRSIZE, &p, sizeof(p), 0); if (__ret >= 0) { - bpf_printk("printing NULL should generate error, got (%d)", - __ret); + bpf_printk("printing %llx should generate error, got (%d)", + (unsigned long long)BADPTR, __ret); ret = -ERANGE; } -- GitLab From 6185266c5a853bb0f2a459e3ff594546f277609b Mon Sep 17 00:00:00 2001 From: Yauheni Kaliuta Date: Sun, 28 Feb 2021 12:30:17 +0200 Subject: [PATCH 102/839] selftests/bpf: Mask bpf_csum_diff() return value to 16 bits in test_verifier The verifier test labelled "valid read map access into a read-only array 2" calls the bpf_csum_diff() helper and checks its return value. However, architecture implementations of csum_partial() (which is what the helper uses) differ in whether they fold the return value to 16 bit or not. For example, x86 version has ... if (unlikely(odd)) { result = from32to16(result); result = ((result >> 8) & 0xff) | ((result & 0xff) << 8); } ... while generic lib/checksum.c does: result = from32to16(result); if (odd) result = ((result >> 8) & 0xff) | ((result & 0xff) << 8); This makes the helper return different values on different architectures, breaking the test on non-x86. To fix this, add an additional instruction to always mask the return value to 16 bits, and update the expected return value accordingly. Fixes: fb2abb73e575 ("bpf, selftest: test {rd, wr}only flags and direct value access") Signed-off-by: Yauheni Kaliuta Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210228103017.320240-1-yauheni.kaliuta@redhat.com --- tools/testing/selftests/bpf/verifier/array_access.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/verifier/array_access.c b/tools/testing/selftests/bpf/verifier/array_access.c index bed53b561e044..1b138cd2b187d 100644 --- a/tools/testing/selftests/bpf/verifier/array_access.c +++ b/tools/testing/selftests/bpf/verifier/array_access.c @@ -250,12 +250,13 @@ BPF_MOV64_IMM(BPF_REG_5, 0), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xffff), BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .fixup_map_array_ro = { 3 }, .result = ACCEPT, - .retval = -29, + .retval = 65507, }, { "invalid write map access into a read-only array 1", -- GitLab From 386a966f5ce71a0364b158c5d0a6971f4e418ea8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Thu, 25 Feb 2021 23:18:34 +0100 Subject: [PATCH 103/839] vio: make remove callback return void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver core ignores the return value of struct bus_type::remove() because there is only little that can be done. To simplify the quest to make this function return void, let struct vio_driver::remove() return void, too. All users already unconditionally return 0, this commit makes it obvious that returning an error code is a bad idea. Note there are two nominally different implementations for a vio bus: one in arch/sparc/kernel/vio.c and the other in arch/powerpc/platforms/pseries/vio.c. This patch only adapts the powerpc one. Before this patch for a device that was bound to a driver without a remove callback vio_cmo_bus_remove(viodev) wasn't called. As the device core still considers the device unbound after vio_bus_remove() returns calling this unconditionally is the consistent behaviour which is implemented here. Signed-off-by: Uwe Kleine-König Reviewed-by: Tyrel Datwyler Acked-by: Lijun Pan Acked-by: Greg Kroah-Hartman [mpe: Drop unneeded hvcs_remove() forward declaration, squash in change from sfr to drop ibmvnic_remove() forward declaration] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210225221834.160083-1-uwe@kleine-koenig.org --- arch/powerpc/include/asm/vio.h | 2 +- arch/powerpc/platforms/pseries/vio.c | 7 +++---- drivers/char/hw_random/pseries-rng.c | 3 +-- drivers/char/tpm/tpm_ibmvtpm.c | 4 +--- drivers/crypto/nx/nx-842-pseries.c | 4 +--- drivers/crypto/nx/nx.c | 4 +--- drivers/misc/ibmvmc.c | 4 +--- drivers/net/ethernet/ibm/ibmveth.c | 4 +--- drivers/net/ethernet/ibm/ibmvnic.c | 5 +---- drivers/scsi/ibmvscsi/ibmvfc.c | 3 +-- drivers/scsi/ibmvscsi/ibmvscsi.c | 4 +--- drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c | 4 +--- drivers/tty/hvc/hvcs.c | 4 +--- 13 files changed, 15 insertions(+), 37 deletions(-) diff --git a/arch/powerpc/include/asm/vio.h b/arch/powerpc/include/asm/vio.h index 0cf52746531be..721c0d6715ac8 100644 --- a/arch/powerpc/include/asm/vio.h +++ b/arch/powerpc/include/asm/vio.h @@ -113,7 +113,7 @@ struct vio_driver { const char *name; const struct vio_device_id *id_table; int (*probe)(struct vio_dev *dev, const struct vio_device_id *id); - int (*remove)(struct vio_dev *dev); + void (*remove)(struct vio_dev *dev); /* A driver must have a get_desired_dma() function to * be loaded in a CMO environment if it uses DMA. */ diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c index b2797cfe4e2b0..9cb4fc839fd5d 100644 --- a/arch/powerpc/platforms/pseries/vio.c +++ b/arch/powerpc/platforms/pseries/vio.c @@ -1261,7 +1261,6 @@ static int vio_bus_remove(struct device *dev) struct vio_dev *viodev = to_vio_dev(dev); struct vio_driver *viodrv = to_vio_driver(dev->driver); struct device *devptr; - int ret = 1; /* * Hold a reference to the device after the remove function is called @@ -1270,13 +1269,13 @@ static int vio_bus_remove(struct device *dev) devptr = get_device(dev); if (viodrv->remove) - ret = viodrv->remove(viodev); + viodrv->remove(viodev); - if (!ret && firmware_has_feature(FW_FEATURE_CMO)) + if (firmware_has_feature(FW_FEATURE_CMO)) vio_cmo_bus_remove(viodev); put_device(devptr); - return ret; + return 0; } /** diff --git a/drivers/char/hw_random/pseries-rng.c b/drivers/char/hw_random/pseries-rng.c index 8038a8a9fb581..f4949b689bd5a 100644 --- a/drivers/char/hw_random/pseries-rng.c +++ b/drivers/char/hw_random/pseries-rng.c @@ -54,10 +54,9 @@ static int pseries_rng_probe(struct vio_dev *dev, return hwrng_register(&pseries_rng); } -static int pseries_rng_remove(struct vio_dev *dev) +static void pseries_rng_remove(struct vio_dev *dev) { hwrng_unregister(&pseries_rng); - return 0; } static const struct vio_device_id pseries_rng_driver_ids[] = { diff --git a/drivers/char/tpm/tpm_ibmvtpm.c b/drivers/char/tpm/tpm_ibmvtpm.c index 994385bf37c0c..903604769de99 100644 --- a/drivers/char/tpm/tpm_ibmvtpm.c +++ b/drivers/char/tpm/tpm_ibmvtpm.c @@ -343,7 +343,7 @@ static int ibmvtpm_crq_send_init_complete(struct ibmvtpm_dev *ibmvtpm) * * Return: Always 0. */ -static int tpm_ibmvtpm_remove(struct vio_dev *vdev) +static void tpm_ibmvtpm_remove(struct vio_dev *vdev) { struct tpm_chip *chip = dev_get_drvdata(&vdev->dev); struct ibmvtpm_dev *ibmvtpm = dev_get_drvdata(&chip->dev); @@ -372,8 +372,6 @@ static int tpm_ibmvtpm_remove(struct vio_dev *vdev) kfree(ibmvtpm); /* For tpm_ibmvtpm_get_desired_dma */ dev_set_drvdata(&vdev->dev, NULL); - - return 0; } /** diff --git a/drivers/crypto/nx/nx-842-pseries.c b/drivers/crypto/nx/nx-842-pseries.c index 2de5e3672e423..cc8dd3072b8b7 100644 --- a/drivers/crypto/nx/nx-842-pseries.c +++ b/drivers/crypto/nx/nx-842-pseries.c @@ -1042,7 +1042,7 @@ error: return ret; } -static int nx842_remove(struct vio_dev *viodev) +static void nx842_remove(struct vio_dev *viodev) { struct nx842_devdata *old_devdata; unsigned long flags; @@ -1063,8 +1063,6 @@ static int nx842_remove(struct vio_dev *viodev) if (old_devdata) kfree(old_devdata->counters); kfree(old_devdata); - - return 0; } static const struct vio_device_id nx842_vio_driver_ids[] = { diff --git a/drivers/crypto/nx/nx.c b/drivers/crypto/nx/nx.c index 0d2dc5be7f192..1d0e8a1ba1605 100644 --- a/drivers/crypto/nx/nx.c +++ b/drivers/crypto/nx/nx.c @@ -783,7 +783,7 @@ static int nx_probe(struct vio_dev *viodev, const struct vio_device_id *id) return nx_register_algs(); } -static int nx_remove(struct vio_dev *viodev) +static void nx_remove(struct vio_dev *viodev) { dev_dbg(&viodev->dev, "entering nx_remove for UA 0x%x\n", viodev->unit_address); @@ -811,8 +811,6 @@ static int nx_remove(struct vio_dev *viodev) nx_unregister_skcipher(&nx_ecb_aes_alg, NX_FC_AES, NX_MODE_AES_ECB); } - - return 0; } diff --git a/drivers/misc/ibmvmc.c b/drivers/misc/ibmvmc.c index 2d778d0f011e6..c0fe3295c3303 100644 --- a/drivers/misc/ibmvmc.c +++ b/drivers/misc/ibmvmc.c @@ -2288,15 +2288,13 @@ crq_failed: return -EPERM; } -static int ibmvmc_remove(struct vio_dev *vdev) +static void ibmvmc_remove(struct vio_dev *vdev) { struct crq_server_adapter *adapter = dev_get_drvdata(&vdev->dev); dev_info(adapter->dev, "Entering remove for UA 0x%x\n", vdev->unit_address); ibmvmc_release_crq_queue(adapter); - - return 0; } static struct vio_device_id ibmvmc_device_table[] = { diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index c3ec9ceed833e..7fea9ae60f130 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -1758,7 +1758,7 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id) return 0; } -static int ibmveth_remove(struct vio_dev *dev) +static void ibmveth_remove(struct vio_dev *dev) { struct net_device *netdev = dev_get_drvdata(&dev->dev); struct ibmveth_adapter *adapter = netdev_priv(netdev); @@ -1771,8 +1771,6 @@ static int ibmveth_remove(struct vio_dev *dev) free_netdev(netdev); dev_set_drvdata(&dev->dev, NULL); - - return 0; } static struct attribute veth_active_attr; diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 118a4bd3f8779..fe3201ba20341 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -78,7 +78,6 @@ MODULE_LICENSE("GPL"); MODULE_VERSION(IBMVNIC_DRIVER_VERSION); static int ibmvnic_version = IBMVNIC_INITIAL_VERSION; -static int ibmvnic_remove(struct vio_dev *); static void release_sub_crqs(struct ibmvnic_adapter *, bool); static int ibmvnic_reset_crq(struct ibmvnic_adapter *); static int ibmvnic_send_crq_init(struct ibmvnic_adapter *); @@ -5396,7 +5395,7 @@ ibmvnic_init_fail: return rc; } -static int ibmvnic_remove(struct vio_dev *dev) +static void ibmvnic_remove(struct vio_dev *dev) { struct net_device *netdev = dev_get_drvdata(&dev->dev); struct ibmvnic_adapter *adapter = netdev_priv(netdev); @@ -5437,8 +5436,6 @@ static int ibmvnic_remove(struct vio_dev *dev) device_remove_file(&dev->dev, &dev_attr_failover); free_netdev(netdev); dev_set_drvdata(&dev->dev, NULL); - - return 0; } static ssize_t failover_store(struct device *dev, struct device_attribute *attr, diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index 755313b766b91..e663085a89446 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -6038,7 +6038,7 @@ out: * Return value: * 0 **/ -static int ibmvfc_remove(struct vio_dev *vdev) +static void ibmvfc_remove(struct vio_dev *vdev) { struct ibmvfc_host *vhost = dev_get_drvdata(&vdev->dev); LIST_HEAD(purge); @@ -6070,7 +6070,6 @@ static int ibmvfc_remove(struct vio_dev *vdev) spin_unlock(&ibmvfc_driver_lock); scsi_host_put(vhost->host); LEAVE; - return 0; } /** diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c index 29fcc44be2d57..77fafb1bc173a 100644 --- a/drivers/scsi/ibmvscsi/ibmvscsi.c +++ b/drivers/scsi/ibmvscsi/ibmvscsi.c @@ -2335,7 +2335,7 @@ static int ibmvscsi_probe(struct vio_dev *vdev, const struct vio_device_id *id) return -1; } -static int ibmvscsi_remove(struct vio_dev *vdev) +static void ibmvscsi_remove(struct vio_dev *vdev) { struct ibmvscsi_host_data *hostdata = dev_get_drvdata(&vdev->dev); @@ -2356,8 +2356,6 @@ static int ibmvscsi_remove(struct vio_dev *vdev) spin_unlock(&ibmvscsi_driver_lock); scsi_host_put(hostdata->host); - - return 0; } /** diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c index cc3908c2d2f94..9abd9e253af60 100644 --- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c +++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c @@ -3595,7 +3595,7 @@ free_adapter: return rc; } -static int ibmvscsis_remove(struct vio_dev *vdev) +static void ibmvscsis_remove(struct vio_dev *vdev) { struct scsi_info *vscsi = dev_get_drvdata(&vdev->dev); @@ -3622,8 +3622,6 @@ static int ibmvscsis_remove(struct vio_dev *vdev) list_del(&vscsi->list); spin_unlock_bh(&ibmvscsis_dev_lock); kfree(vscsi); - - return 0; } static ssize_t system_id_show(struct device *dev, diff --git a/drivers/tty/hvc/hvcs.c b/drivers/tty/hvc/hvcs.c index c908489196442..9afa1dcef2c2f 100644 --- a/drivers/tty/hvc/hvcs.c +++ b/drivers/tty/hvc/hvcs.c @@ -317,7 +317,6 @@ static void hvcs_hangup(struct tty_struct * tty); static int hvcs_probe(struct vio_dev *dev, const struct vio_device_id *id); -static int hvcs_remove(struct vio_dev *dev); static int __init hvcs_module_init(void); static void __exit hvcs_module_exit(void); static int hvcs_initialize(void); @@ -819,7 +818,7 @@ static int hvcs_probe( return 0; } -static int hvcs_remove(struct vio_dev *dev) +static void hvcs_remove(struct vio_dev *dev) { struct hvcs_struct *hvcsd = dev_get_drvdata(&dev->dev); unsigned long flags; @@ -849,7 +848,6 @@ static int hvcs_remove(struct vio_dev *dev) printk(KERN_INFO "HVCS: vty-server@%X removed from the" " vio bus.\n", dev->unit_address); - return 0; }; static struct vio_driver hvcs_vio_driver = { -- GitLab From acdad8fb4a1574323db88f98a38b630691574e16 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sat, 27 Feb 2021 16:30:48 +0000 Subject: [PATCH 104/839] powerpc: Force inlining of mmu_has_feature to fix build failure The test robot has managed to generate a random config leading to following build failure: LD .tmp_vmlinux.kallsyms1 powerpc64-linux-ld: arch/powerpc/mm/pgtable.o: in function `ptep_set_access_flags': pgtable.c:(.text.ptep_set_access_flags+0xf0): undefined reference to `hash__flush_tlb_page' powerpc64-linux-ld: arch/powerpc/mm/book3s32/mmu.o: in function `MMU_init_hw_patch': mmu.c:(.init.text+0x452): undefined reference to `patch__hash_page_A0' powerpc64-linux-ld: mmu.c:(.init.text+0x45e): undefined reference to `patch__hash_page_A0' powerpc64-linux-ld: mmu.c:(.init.text+0x46a): undefined reference to `patch__hash_page_A1' powerpc64-linux-ld: mmu.c:(.init.text+0x476): undefined reference to `patch__hash_page_A1' powerpc64-linux-ld: mmu.c:(.init.text+0x482): undefined reference to `patch__hash_page_A2' powerpc64-linux-ld: mmu.c:(.init.text+0x48e): undefined reference to `patch__hash_page_A2' powerpc64-linux-ld: mmu.c:(.init.text+0x49e): undefined reference to `patch__hash_page_B' powerpc64-linux-ld: mmu.c:(.init.text+0x4aa): undefined reference to `patch__hash_page_B' powerpc64-linux-ld: mmu.c:(.init.text+0x4b6): undefined reference to `patch__hash_page_C' powerpc64-linux-ld: mmu.c:(.init.text+0x4c2): undefined reference to `patch__hash_page_C' powerpc64-linux-ld: mmu.c:(.init.text+0x4ce): undefined reference to `patch__flush_hash_A0' powerpc64-linux-ld: mmu.c:(.init.text+0x4da): undefined reference to `patch__flush_hash_A0' powerpc64-linux-ld: mmu.c:(.init.text+0x4e6): undefined reference to `patch__flush_hash_A1' powerpc64-linux-ld: mmu.c:(.init.text+0x4f2): undefined reference to `patch__flush_hash_A1' powerpc64-linux-ld: mmu.c:(.init.text+0x4fe): undefined reference to `patch__flush_hash_A2' powerpc64-linux-ld: mmu.c:(.init.text+0x50a): undefined reference to `patch__flush_hash_A2' powerpc64-linux-ld: mmu.c:(.init.text+0x522): undefined reference to `patch__flush_hash_B' powerpc64-linux-ld: mmu.c:(.init.text+0x532): undefined reference to `patch__flush_hash_B' powerpc64-linux-ld: arch/powerpc/mm/book3s32/mmu.o: in function `update_mmu_cache': mmu.c:(.text.update_mmu_cache+0xa0): undefined reference to `add_hash_page' powerpc64-linux-ld: mm/memory.o: in function `zap_pte_range': memory.c:(.text.zap_pte_range+0x160): undefined reference to `flush_hash_pages' powerpc64-linux-ld: mm/memory.o: in function `handle_pte_fault': memory.c:(.text.handle_pte_fault+0x180): undefined reference to `hash__flush_tlb_page' This is due to mmu_has_feature() not being inlined. See extract of build of mmu.c with -Winline: In file included from ./include/linux/mm_types.h:19, from ./include/linux/mmzone.h:21, from ./include/linux/gfp.h:6, from ./include/linux/mm.h:10, from arch/powerpc/mm/book3s32/mmu.c:21: ./arch/powerpc/include/asm/mmu.h: In function 'find_free_bat': ./arch/powerpc/include/asm/mmu.h:231:20: warning: inlining failed in call to 'early_mmu_has_feature': call is unlikely and code size would grow [-Winline] 231 | static inline bool early_mmu_has_feature(unsigned long feature) | ^~~~~~~~~~~~~~~~~~~~~ ./arch/powerpc/include/asm/mmu.h:291:9: note: called from here 291 | return early_mmu_has_feature(feature); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The code relies on constant folding of MMU_FTRS_POSSIBLE at buildtime and elimination of non possible parts of code at compile time. For this to work, mmu_has_feature() and early_mmu_has_feature() must be inlined. Fixes: 259149cf7c3c ("powerpc/32s: Only build hash code when CONFIG_PPC_BOOK3S_604 is selected") Reported-by: kernel test robot Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/cf61345912c078c96f171afd0fcc48ef27cbdc3f.1614443418.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/mmu.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 80b27f5d96486..607168b1aef46 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -228,7 +228,7 @@ enum { #define MMU_FTRS_ALWAYS 0 #endif -static inline bool early_mmu_has_feature(unsigned long feature) +static __always_inline bool early_mmu_has_feature(unsigned long feature) { if (MMU_FTRS_ALWAYS & feature) return true; @@ -286,7 +286,7 @@ static inline void mmu_feature_keys_init(void) } -static inline bool mmu_has_feature(unsigned long feature) +static __always_inline bool mmu_has_feature(unsigned long feature) { return early_mmu_has_feature(feature); } -- GitLab From 5ae5fbd2107959b68ac69a8b75412208663aea88 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Thu, 25 Feb 2021 05:10:39 -0500 Subject: [PATCH 105/839] powerpc/perf: Fix handling of privilege level checks in perf interrupt context Running "perf mem record" in powerpc platforms with selinux enabled resulted in soft lockup's. Below call-trace was seen in the logs: CPU: 58 PID: 3751 Comm: sssd_nss Not tainted 5.11.0-rc7+ #2 NIP: c000000000dff3d4 LR: c000000000dff3d0 CTR: 0000000000000000 REGS: c000007fffab7d60 TRAP: 0100 Not tainted (5.11.0-rc7+) ... NIP _raw_spin_lock_irqsave+0x94/0x120 LR _raw_spin_lock_irqsave+0x90/0x120 Call Trace: 0xc00000000fd47260 (unreliable) skb_queue_tail+0x3c/0x90 audit_log_end+0x6c/0x180 common_lsm_audit+0xb0/0xe0 slow_avc_audit+0xa4/0x110 avc_has_perm+0x1c4/0x260 selinux_perf_event_open+0x74/0xd0 security_perf_event_open+0x68/0xc0 record_and_restart+0x6e8/0x7f0 perf_event_interrupt+0x22c/0x560 performance_monitor_exception0x4c/0x60 performance_monitor_common_virt+0x1c8/0x1d0 interrupt: f00 at _raw_spin_lock_irqsave+0x38/0x120 NIP: c000000000dff378 LR: c000000000b5fbbc CTR: c0000000007d47f0 REGS: c00000000fd47860 TRAP: 0f00 Not tainted (5.11.0-rc7+) ... NIP _raw_spin_lock_irqsave+0x38/0x120 LR skb_queue_tail+0x3c/0x90 interrupt: f00 0x38 (unreliable) 0xc00000000aae6200 audit_log_end+0x6c/0x180 audit_log_exit+0x344/0xf80 __audit_syscall_exit+0x2c0/0x320 do_syscall_trace_leave+0x148/0x200 syscall_exit_prepare+0x324/0x390 system_call_common+0xfc/0x27c The above trace shows that while the CPU was handling a performance monitor exception, there was a call to security_perf_event_open() function. In powerpc core-book3s, this function is called from perf_allow_kernel() check during recording of data address in the sample via perf_get_data_addr(). Commit da97e18458fb ("perf_event: Add support for LSM and SELinux checks") introduced security enhancements to perf. As part of this commit, the new security hook for perf_event_open() was added in all places where perf paranoid check was previously used. In powerpc core-book3s code, originally had paranoid checks in perf_get_data_addr() and power_pmu_bhrb_read(). So perf_paranoid_kernel() checks were replaced with perf_allow_kernel() in these PMU helper functions as well. The intention of paranoid checks in core-book3s was to verify privilege access before capturing some of the sample data. Along with paranoid checks, perf_allow_kernel() also does a security_perf_event_open(). Since these functions are accessed while recording a sample, we end up calling selinux_perf_event_open() in PMI context. Some of the security functions use spinlock like sidtab_sid2str_put(). If a perf interrupt hits under a spin lock and if we end up in calling selinux hook functions in PMI handler, this could cause a dead lock. Since the purpose of this security hook is to control access to perf_event_open(), it is not right to call this in interrupt context. The paranoid checks in powerpc core-book3s were done at interrupt time which is also not correct. Reference commits: Commit cd1231d7035f ("powerpc/perf: Prevent kernel address leak via perf_get_data_addr()") Commit bb19af816025 ("powerpc/perf: Prevent kernel address leak to userspace via BHRB buffer") We only allow creation of events that have already passed the privilege checks in perf_event_open(). So these paranoid checks are not needed at event time. As a fix, patch uses 'event->attr.exclude_kernel' check to prevent exposing kernel address for userspace only sampling. Fixes: cd1231d7035f ("powerpc/perf: Prevent kernel address leak via perf_get_data_addr()") Cc: stable@vger.kernel.org # v4.17+ Suggested-by: Michael Ellerman Signed-off-by: Athira Rajeev Acked-by: Peter Zijlstra (Intel) Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1614247839-1428-1-git-send-email-atrajeev@linux.vnet.ibm.com --- arch/powerpc/perf/core-book3s.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 6817331e22ffc..766f064f00fbf 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -222,7 +222,7 @@ static inline void perf_get_data_addr(struct perf_event *event, struct pt_regs * if (!(mmcra & MMCRA_SAMPLE_ENABLE) || sdar_valid) *addrp = mfspr(SPRN_SDAR); - if (is_kernel_addr(mfspr(SPRN_SDAR)) && perf_allow_kernel(&event->attr) != 0) + if (is_kernel_addr(mfspr(SPRN_SDAR)) && event->attr.exclude_kernel) *addrp = 0; } @@ -507,7 +507,7 @@ static void power_pmu_bhrb_read(struct perf_event *event, struct cpu_hw_events * * addresses, hence include a check before filtering code */ if (!(ppmu->flags & PPMU_ARCH_31) && - is_kernel_addr(addr) && perf_allow_kernel(&event->attr) != 0) + is_kernel_addr(addr) && event->attr.exclude_kernel) continue; /* Branches are read most recent first (ie. mfbhrb 0 is -- GitLab From 5c88a17e15795226b56d83f579cbb9b7a4864f79 Mon Sep 17 00:00:00 2001 From: Jordan Niethe Date: Thu, 25 Feb 2021 14:19:46 +1100 Subject: [PATCH 106/839] powerpc/sstep: Fix VSX instruction emulation Commit af99da74333b ("powerpc/sstep: Support VSX vector paired storage access instructions") added loading and storing 32 word long data into adjacent VSRs. However the calculation used to determine if two VSRs needed to be loaded/stored inadvertently prevented the load/storing taking place for instructions with a data length less than 16 words. This causes the emulation to not function correctly, which can be seen by the alignment_handler selftest: $ ./alignment_handler [snip] test: test_alignment_handler_vsx_207 tags: git_version:powerpc-5.12-1-0-g82d2c16b350f VSX: 2.07B Doing lxsspx: PASSED Doing lxsiwax: FAILED: Wrong Data Doing lxsiwzx: PASSED Doing stxsspx: PASSED Doing stxsiwx: PASSED failure: test_alignment_handler_vsx_207 test: test_alignment_handler_vsx_300 tags: git_version:powerpc-5.12-1-0-g82d2c16b350f VSX: 3.00B Doing lxsd: PASSED Doing lxsibzx: PASSED Doing lxsihzx: PASSED Doing lxssp: FAILED: Wrong Data Doing lxv: PASSED Doing lxvb16x: PASSED Doing lxvh8x: PASSED Doing lxvx: PASSED Doing lxvwsx: FAILED: Wrong Data Doing lxvl: PASSED Doing lxvll: PASSED Doing stxsd: PASSED Doing stxsibx: PASSED Doing stxsihx: PASSED Doing stxssp: PASSED Doing stxv: PASSED Doing stxvb16x: PASSED Doing stxvh8x: PASSED Doing stxvx: PASSED Doing stxvl: PASSED Doing stxvll: PASSED failure: test_alignment_handler_vsx_300 [snip] Fix this by making sure all VSX instruction emulation correctly load/store from the VSRs. Fixes: af99da74333b ("powerpc/sstep: Support VSX vector paired storage access instructions") Signed-off-by: Jordan Niethe Reviewed-by: Ravi Bangoria Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210225031946.1458206-1-jniethe5@gmail.com --- arch/powerpc/lib/sstep.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index bb5c20d4ca91c..c6aebc149d141 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -904,7 +904,7 @@ static nokprobe_inline int do_vsx_load(struct instruction_op *op, if (!address_ok(regs, ea, size) || copy_mem_in(mem, ea, size, regs)) return -EFAULT; - nr_vsx_regs = size / sizeof(__vector128); + nr_vsx_regs = max(1ul, size / sizeof(__vector128)); emulate_vsx_load(op, buf, mem, cross_endian); preempt_disable(); if (reg < 32) { @@ -951,7 +951,7 @@ static nokprobe_inline int do_vsx_store(struct instruction_op *op, if (!address_ok(regs, ea, size)) return -EFAULT; - nr_vsx_regs = size / sizeof(__vector128); + nr_vsx_regs = max(1ul, size / sizeof(__vector128)); preempt_disable(); if (reg < 32) { /* FP regs + extensions */ -- GitLab From 9cc0001a18b4e5f46ec481201c88ae16f0a69bb0 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 27 Feb 2021 22:31:27 +0100 Subject: [PATCH 107/839] netfilter: nftables: disallow updates on table ownership Disallow updating the ownership bit on an existing table: Do not allow to grab ownership on an existing table. Do not allow to drop ownership on an existing table. Fixes: 6001a930ce03 ("netfilter: nftables: introduce table ownership") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index c1eb5cdb30330..b07703e191080 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -916,6 +916,12 @@ static int nf_tables_updtable(struct nft_ctx *ctx) if (flags == ctx->table->flags) return 0; + if ((nft_table_has_owner(ctx->table) && + !(flags & NFT_TABLE_F_OWNER)) || + (!nft_table_has_owner(ctx->table) && + flags & NFT_TABLE_F_OWNER)) + return -EOPNOTSUPP; + trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE, sizeof(struct nft_trans_table)); if (trans == NULL) -- GitLab From 778e45d7720d663811352943dd515b41f6849637 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Tue, 2 Mar 2021 21:07:07 +0100 Subject: [PATCH 108/839] parisc: Enable -mlong-calls gcc option with CONFIG_COMPILE_TEST The kernel test robot reported multiple linkage problems like this: hppa64-linux-ld: init/main.o(.init.text+0x56c): cannot reach printk init/main.o: in function `unknown_bootoption': (.init.text+0x56c): relocation truncated to fit: R_PARISC_PCREL22F against symbol `printk' defined in .text.unlikely section in kernel/printk/printk.o There are two ways to solve it: a) Enable the -mlong-call compiler option (CONFIG_MLONGCALLS), b) Add long branch stub support in 64-bit linker. While b) is the long-term solution, this patch works around the issue by automatically enabling the CONFIG_MLONGCALLS option when CONFIG_COMPILE_TEST is set, which indicates that a non-production kernel (e.g. 0-day kernel) is built. Signed-off-by: Helge Deller Reported-by: kernel test robot Fixes: 00e35f2b0e8a ("parisc: Enable -mlong-calls gcc option by default when !CONFIG_MODULES") Cc: stable@vger.kernel.org # v5.6+ --- arch/parisc/Kconfig | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 4e53ac46e857e..afc3b8d035726 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -203,9 +203,12 @@ config PREFETCH def_bool y depends on PA8X00 || PA7200 +config PARISC_HUGE_KERNEL + def_bool y if !MODULES || UBSAN || FTRACE || COMPILE_TEST + config MLONGCALLS - def_bool y if !MODULES || UBSAN || FTRACE - bool "Enable the -mlong-calls compiler option for big kernels" if MODULES && !UBSAN && !FTRACE + def_bool y if PARISC_HUGE_KERNEL + bool "Enable the -mlong-calls compiler option for big kernels" if !PARISC_HUGE_KERNEL depends on PA8X00 help If you configure the kernel to include many drivers built-in instead -- GitLab From fa706dce2f2d7012654e2eab40da2b526c1424b3 Mon Sep 17 00:00:00 2001 From: Wong Vee Khee Date: Tue, 2 Mar 2021 16:57:21 +0800 Subject: [PATCH 109/839] stmmac: intel: Fix mdio bus registration issue for TGL-H/ADL-S On Intel platforms which consist of two Ethernet Controllers such as TGL-H and ADL-S, a unique MDIO bus id is required for MDIO bus to be successful registered: [ 13.076133] sysfs: cannot create duplicate filename '/class/mdio_bus/stmmac-1' [ 13.083404] CPU: 8 PID: 1898 Comm: systemd-udevd Tainted: G U 5.11.0-net-next #106 [ 13.092410] Hardware name: Intel Corporation Alder Lake Client Platform/AlderLake-S ADP-S DRR4 CRB, BIOS ADLIFSI1.R00.1494.B00.2012031421 12/03/2020 [ 13.105709] Call Trace: [ 13.108176] dump_stack+0x64/0x7c [ 13.111553] sysfs_warn_dup+0x56/0x70 [ 13.115273] sysfs_do_create_link_sd.isra.2+0xbd/0xd0 [ 13.120371] device_add+0x4df/0x840 [ 13.123917] ? complete_all+0x2a/0x40 [ 13.127636] __mdiobus_register+0x98/0x310 [libphy] [ 13.132572] stmmac_mdio_register+0x1c5/0x3f0 [stmmac] [ 13.137771] ? stmmac_napi_add+0xa5/0xf0 [stmmac] [ 13.142493] stmmac_dvr_probe+0x806/0xee0 [stmmac] [ 13.147341] intel_eth_pci_probe+0x1cb/0x250 [dwmac_intel] [ 13.152884] pci_device_probe+0xd2/0x150 [ 13.156897] really_probe+0xf7/0x4d0 [ 13.160527] driver_probe_device+0x5d/0x140 [ 13.164761] device_driver_attach+0x4f/0x60 [ 13.168996] __driver_attach+0xa2/0x140 [ 13.172891] ? device_driver_attach+0x60/0x60 [ 13.177300] bus_for_each_dev+0x76/0xc0 [ 13.181188] bus_add_driver+0x189/0x230 [ 13.185083] ? 0xffffffffc0795000 [ 13.188446] driver_register+0x5b/0xf0 [ 13.192249] ? 0xffffffffc0795000 [ 13.195577] do_one_initcall+0x4d/0x210 [ 13.199467] ? kmem_cache_alloc_trace+0x2ff/0x490 [ 13.204228] do_init_module+0x5b/0x21c [ 13.208031] load_module+0x2a0c/0x2de0 [ 13.211838] ? __do_sys_finit_module+0xb1/0x110 [ 13.216420] __do_sys_finit_module+0xb1/0x110 [ 13.220825] do_syscall_64+0x33/0x40 [ 13.224451] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 13.229515] RIP: 0033:0x7fc2b1919ccd [ 13.233113] Code: 00 c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 93 31 0c 00 f7 d8 64 89 01 48 [ 13.251912] RSP: 002b:00007ffcea2e5b98 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 [ 13.259527] RAX: ffffffffffffffda RBX: 0000560558920f10 RCX: 00007fc2b1919ccd [ 13.266706] RDX: 0000000000000000 RSI: 00007fc2b1a881e3 RDI: 0000000000000012 [ 13.273887] RBP: 0000000000020000 R08: 0000000000000000 R09: 0000000000000000 [ 13.281036] R10: 0000000000000012 R11: 0000000000000246 R12: 00007fc2b1a881e3 [ 13.288183] R13: 0000000000000000 R14: 0000000000000000 R15: 00007ffcea2e5d58 [ 13.295389] libphy: mii_bus stmmac-1 failed to register Fixes: 88af9bd4efbd ("stmmac: intel: Add ADL-S 1Gbps PCI IDs") Fixes: 8450e23f142f ("stmmac: intel: Add PCI IDs for TGL-H platform") Signed-off-by: Wong Vee Khee Signed-off-by: David S. Miller --- .../net/ethernet/stmicro/stmmac/dwmac-intel.c | 54 ++++++++++++++----- 1 file changed, 41 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c index 751dfdeec41c0..f2896872a86c4 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c @@ -446,8 +446,8 @@ static int tgl_common_data(struct pci_dev *pdev, return intel_mgbe_common_data(pdev, plat); } -static int tgl_sgmii_data(struct pci_dev *pdev, - struct plat_stmmacenet_data *plat) +static int tgl_sgmii_phy0_data(struct pci_dev *pdev, + struct plat_stmmacenet_data *plat) { plat->bus_id = 1; plat->phy_interface = PHY_INTERFACE_MODE_SGMII; @@ -456,12 +456,26 @@ static int tgl_sgmii_data(struct pci_dev *pdev, return tgl_common_data(pdev, plat); } -static struct stmmac_pci_info tgl_sgmii1g_info = { - .setup = tgl_sgmii_data, +static struct stmmac_pci_info tgl_sgmii1g_phy0_info = { + .setup = tgl_sgmii_phy0_data, }; -static int adls_sgmii_data(struct pci_dev *pdev, - struct plat_stmmacenet_data *plat) +static int tgl_sgmii_phy1_data(struct pci_dev *pdev, + struct plat_stmmacenet_data *plat) +{ + plat->bus_id = 2; + plat->phy_interface = PHY_INTERFACE_MODE_SGMII; + plat->serdes_powerup = intel_serdes_powerup; + plat->serdes_powerdown = intel_serdes_powerdown; + return tgl_common_data(pdev, plat); +} + +static struct stmmac_pci_info tgl_sgmii1g_phy1_info = { + .setup = tgl_sgmii_phy1_data, +}; + +static int adls_sgmii_phy0_data(struct pci_dev *pdev, + struct plat_stmmacenet_data *plat) { plat->bus_id = 1; plat->phy_interface = PHY_INTERFACE_MODE_SGMII; @@ -471,10 +485,24 @@ static int adls_sgmii_data(struct pci_dev *pdev, return tgl_common_data(pdev, plat); } -static struct stmmac_pci_info adls_sgmii1g_info = { - .setup = adls_sgmii_data, +static struct stmmac_pci_info adls_sgmii1g_phy0_info = { + .setup = adls_sgmii_phy0_data, }; +static int adls_sgmii_phy1_data(struct pci_dev *pdev, + struct plat_stmmacenet_data *plat) +{ + plat->bus_id = 2; + plat->phy_interface = PHY_INTERFACE_MODE_SGMII; + + /* SerDes power up and power down are done in BIOS for ADL */ + + return tgl_common_data(pdev, plat); +} + +static struct stmmac_pci_info adls_sgmii1g_phy1_info = { + .setup = adls_sgmii_phy1_data, +}; static const struct stmmac_pci_func_data galileo_stmmac_func_data[] = { { .func = 6, @@ -756,11 +784,11 @@ static const struct pci_device_id intel_eth_pci_id_table[] = { { PCI_DEVICE_DATA(INTEL, EHL_PSE1_RGMII1G_ID, &ehl_pse1_rgmii1g_info) }, { PCI_DEVICE_DATA(INTEL, EHL_PSE1_SGMII1G_ID, &ehl_pse1_sgmii1g_info) }, { PCI_DEVICE_DATA(INTEL, EHL_PSE1_SGMII2G5_ID, &ehl_pse1_sgmii1g_info) }, - { PCI_DEVICE_DATA(INTEL, TGL_SGMII1G_ID, &tgl_sgmii1g_info) }, - { PCI_DEVICE_DATA(INTEL, TGLH_SGMII1G_0_ID, &tgl_sgmii1g_info) }, - { PCI_DEVICE_DATA(INTEL, TGLH_SGMII1G_1_ID, &tgl_sgmii1g_info) }, - { PCI_DEVICE_DATA(INTEL, ADLS_SGMII1G_0_ID, &adls_sgmii1g_info) }, - { PCI_DEVICE_DATA(INTEL, ADLS_SGMII1G_1_ID, &adls_sgmii1g_info) }, + { PCI_DEVICE_DATA(INTEL, TGL_SGMII1G_ID, &tgl_sgmii1g_phy0_info) }, + { PCI_DEVICE_DATA(INTEL, TGLH_SGMII1G_0_ID, &tgl_sgmii1g_phy0_info) }, + { PCI_DEVICE_DATA(INTEL, TGLH_SGMII1G_1_ID, &tgl_sgmii1g_phy1_info) }, + { PCI_DEVICE_DATA(INTEL, ADLS_SGMII1G_0_ID, &adls_sgmii1g_phy0_info) }, + { PCI_DEVICE_DATA(INTEL, ADLS_SGMII1G_1_ID, &adls_sgmii1g_phy1_info) }, {} }; MODULE_DEVICE_TABLE(pci, intel_eth_pci_id_table); -- GitLab From 95b39f07a17faef3a9b225248ba449b976e529c8 Mon Sep 17 00:00:00 2001 From: Biao Huang Date: Tue, 2 Mar 2021 11:33:23 +0800 Subject: [PATCH 110/839] net: ethernet: mtk-star-emac: fix wrong unmap in RX handling mtk_star_dma_unmap_rx() should unmap the dma_addr of old skb rather than that of new skb. Assign new_dma_addr to desc_data.dma_addr after all handling of old skb ends to avoid unexpected receive side error. Fixes: f96e9641e92b ("net: ethernet: mtk-star-emac: fix error path in RX handling") Signed-off-by: Biao Huang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_star_emac.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_star_emac.c b/drivers/net/ethernet/mediatek/mtk_star_emac.c index a8641a407c06a..96d2891f1675a 100644 --- a/drivers/net/ethernet/mediatek/mtk_star_emac.c +++ b/drivers/net/ethernet/mediatek/mtk_star_emac.c @@ -1225,8 +1225,6 @@ static int mtk_star_receive_packet(struct mtk_star_priv *priv) goto push_new_skb; } - desc_data.dma_addr = new_dma_addr; - /* We can't fail anymore at this point: it's safe to unmap the skb. */ mtk_star_dma_unmap_rx(priv, &desc_data); @@ -1236,6 +1234,9 @@ static int mtk_star_receive_packet(struct mtk_star_priv *priv) desc_data.skb->dev = ndev; netif_receive_skb(desc_data.skb); + /* update dma_addr for new skb */ + desc_data.dma_addr = new_dma_addr; + push_new_skb: desc_data.len = skb_tailroom(new_skb); desc_data.skb = new_skb; -- GitLab From a22549f12767fce49c74c53a853595f82b727935 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 23 Feb 2021 14:00:39 +0000 Subject: [PATCH 111/839] iwlwifi: mvm: add terminate entry for dmi_system_id tables Make sure dmi_system_id tables are NULL terminated. This crashed when LTO was enabled: BUG: KASAN: global-out-of-bounds in dmi_check_system+0x5a/0x70 Read of size 1 at addr ffffffffc16af750 by task NetworkManager/1913 CPU: 4 PID: 1913 Comm: NetworkManager Not tainted 5.12.0-rc1+ #10057 Hardware name: LENOVO 20THCTO1WW/20THCTO1WW, BIOS N2VET27W (1.12 ) 12/21/2020 Call Trace: dump_stack+0x90/0xbe print_address_description.constprop.0+0x1d/0x140 ? dmi_check_system+0x5a/0x70 ? dmi_check_system+0x5a/0x70 kasan_report.cold+0x7b/0xd4 ? dmi_check_system+0x5a/0x70 __asan_load1+0x4d/0x50 dmi_check_system+0x5a/0x70 iwl_mvm_up+0x1360/0x1690 [iwlmvm] ? iwl_mvm_send_recovery_cmd+0x270/0x270 [iwlmvm] ? setup_object.isra.0+0x27/0xd0 ? kasan_poison+0x20/0x50 ? ___slab_alloc.constprop.0+0x483/0x5b0 ? mempool_kmalloc+0x17/0x20 ? ftrace_graph_ret_addr+0x2a/0xb0 ? kasan_poison+0x3c/0x50 ? cfg80211_iftype_allowed+0x2e/0x90 [cfg80211] ? __kasan_check_write+0x14/0x20 ? mutex_lock+0x86/0xe0 ? __mutex_lock_slowpath+0x20/0x20 __iwl_mvm_mac_start+0x49/0x290 [iwlmvm] iwl_mvm_mac_start+0x37/0x50 [iwlmvm] drv_start+0x73/0x1b0 [mac80211] ieee80211_do_open+0x53e/0xf10 [mac80211] ? ieee80211_check_concurrent_iface+0x266/0x2e0 [mac80211] ieee80211_open+0xb9/0x100 [mac80211] __dev_open+0x1b8/0x280 Fixes: a2ac0f48a07c ("iwlwifi: mvm: implement approved list for the PPAG feature") Reported-by: Hulk Robot Signed-off-by: Wei Yongjun Reviewed-by: Nathan Chancellor Tested-by: Victor Michel Acked-by: Luca Coelho [kvalo@codeaurora.org: improve commit log] Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20210223140039.1708534-1-weiyongjun1@huawei.com --- drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index 15e2773ce7e70..5ee64f7f3c85f 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -1083,6 +1083,7 @@ static const struct dmi_system_id dmi_ppag_approved_list[] = { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTek COMPUTER INC."), }, }, + {} }; static int iwl_mvm_ppag_init(struct iwl_mvm *mvm) -- GitLab From 436b265671d653787eed9bc716f44882d2a458cb Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Mon, 1 Mar 2021 19:16:37 -0600 Subject: [PATCH 112/839] iwlwifi: fix ARCH=i386 compilation warnings An unsigned long variable should rely on '%lu' format strings, not '%zd' Fixes: a1a6a4cf49ece ("iwlwifi: pnvm: implement reading PNVM from UEFI") Signed-off-by: Pierre-Louis Bossart Acked-by: Luca Coelho Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20210302011640.1276636-1-pierre-louis.bossart@linux.intel.com --- drivers/net/wireless/intel/iwlwifi/fw/pnvm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c b/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c index fd070ca5e517f..40f2109a097f3 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c @@ -271,12 +271,12 @@ static int iwl_pnvm_get_from_efi(struct iwl_trans *trans, err = efivar_entry_get(pnvm_efivar, NULL, &package_size, package); if (err) { IWL_DEBUG_FW(trans, - "PNVM UEFI variable not found %d (len %zd)\n", + "PNVM UEFI variable not found %d (len %lu)\n", err, package_size); goto out; } - IWL_DEBUG_FW(trans, "Read PNVM fro UEFI with size %zd\n", package_size); + IWL_DEBUG_FW(trans, "Read PNVM fro UEFI with size %lu\n", package_size); *data = kmemdup(package->data, *len, GFP_KERNEL); if (!*data) -- GitLab From 295d4cd82b0181dd36b145fd535c13d623d7a335 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Tue, 2 Mar 2021 11:34:51 +0100 Subject: [PATCH 113/839] iwlwifi: don't call netif_napi_add() with rxq->lock held (was Re: Lockdep warning in iwl_pcie_rx_handle()) We can't call netif_napi_add() with rxq-lock held, as there is a potential for deadlock as spotted by lockdep (see below). rxq->lock is not protecting anything over the netif_napi_add() codepath anyway, so let's drop it just before calling into NAPI. ======================================================== WARNING: possible irq lock inversion dependency detected 5.12.0-rc1-00002-gbada49429032 #5 Not tainted -------------------------------------------------------- irq/136-iwlwifi/565 just changed the state of lock: ffff89f28433b0b0 (&rxq->lock){+.-.}-{2:2}, at: iwl_pcie_rx_handle+0x7f/0x960 [iwlwifi] but this lock took another, SOFTIRQ-unsafe lock in the past: (napi_hash_lock){+.+.}-{2:2} and interrupts could create inverse lock ordering between them. other info that might help us debug this: Possible interrupt unsafe locking scenario: CPU0 CPU1 ---- ---- lock(napi_hash_lock); local_irq_disable(); lock(&rxq->lock); lock(napi_hash_lock); lock(&rxq->lock); *** DEADLOCK *** 1 lock held by irq/136-iwlwifi/565: #0: ffff89f2b1440170 (sync_cmd_lockdep_map){+.+.}-{0:0}, at: iwl_pcie_irq_handler+0x5/0xb30 the shortest dependencies between 2nd lock and 1st lock: -> (napi_hash_lock){+.+.}-{2:2} { HARDIRQ-ON-W at: lock_acquire+0x277/0x3d0 _raw_spin_lock+0x2c/0x40 netif_napi_add+0x14b/0x270 e1000_probe+0x2fe/0xee0 [e1000e] local_pci_probe+0x42/0x90 pci_device_probe+0x10b/0x1c0 really_probe+0xef/0x4b0 driver_probe_device+0xde/0x150 device_driver_attach+0x4f/0x60 __driver_attach+0x9c/0x140 bus_for_each_dev+0x79/0xc0 bus_add_driver+0x18d/0x220 driver_register+0x5b/0xf0 do_one_initcall+0x5b/0x300 do_init_module+0x5b/0x21c load_module+0x1dae/0x22c0 __do_sys_finit_module+0xad/0x110 do_syscall_64+0x33/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae SOFTIRQ-ON-W at: lock_acquire+0x277/0x3d0 _raw_spin_lock+0x2c/0x40 netif_napi_add+0x14b/0x270 e1000_probe+0x2fe/0xee0 [e1000e] local_pci_probe+0x42/0x90 pci_device_probe+0x10b/0x1c0 really_probe+0xef/0x4b0 driver_probe_device+0xde/0x150 device_driver_attach+0x4f/0x60 __driver_attach+0x9c/0x140 bus_for_each_dev+0x79/0xc0 bus_add_driver+0x18d/0x220 driver_register+0x5b/0xf0 do_one_initcall+0x5b/0x300 do_init_module+0x5b/0x21c load_module+0x1dae/0x22c0 __do_sys_finit_module+0xad/0x110 do_syscall_64+0x33/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae INITIAL USE at: lock_acquire+0x277/0x3d0 _raw_spin_lock+0x2c/0x40 netif_napi_add+0x14b/0x270 e1000_probe+0x2fe/0xee0 [e1000e] local_pci_probe+0x42/0x90 pci_device_probe+0x10b/0x1c0 really_probe+0xef/0x4b0 driver_probe_device+0xde/0x150 device_driver_attach+0x4f/0x60 __driver_attach+0x9c/0x140 bus_for_each_dev+0x79/0xc0 bus_add_driver+0x18d/0x220 driver_register+0x5b/0xf0 do_one_initcall+0x5b/0x300 do_init_module+0x5b/0x21c load_module+0x1dae/0x22c0 __do_sys_finit_module+0xad/0x110 do_syscall_64+0x33/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae } ... key at: [] napi_hash_lock+0x18/0x40 ... acquired at: _raw_spin_lock+0x2c/0x40 netif_napi_add+0x14b/0x270 _iwl_pcie_rx_init+0x1f4/0x710 [iwlwifi] iwl_pcie_rx_init+0x1b/0x3b0 [iwlwifi] iwl_trans_pcie_start_fw+0x2ac/0x6a0 [iwlwifi] iwl_mvm_load_ucode_wait_alive+0x116/0x460 [iwlmvm] iwl_run_init_mvm_ucode+0xa4/0x3a0 [iwlmvm] iwl_op_mode_mvm_start+0x9ed/0xbf0 [iwlmvm] _iwl_op_mode_start.isra.4+0x42/0x80 [iwlwifi] iwl_opmode_register+0x71/0xe0 [iwlwifi] iwl_mvm_init+0x34/0x1000 [iwlmvm] do_one_initcall+0x5b/0x300 do_init_module+0x5b/0x21c load_module+0x1dae/0x22c0 __do_sys_finit_module+0xad/0x110 do_syscall_64+0x33/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae [ ... lockdep output trimmed .... ] Fixes: 25edc8f259c7106 ("iwlwifi: pcie: properly implement NAPI") Signed-off-by: Jiri Kosina Acked-by: Luca Coelho Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/nycvar.YFH.7.76.2103021134060.12405@cbobk.fhfr.pm --- drivers/net/wireless/intel/iwlwifi/pcie/rx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c index 42426e25cac60..2bec971331194 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c @@ -1129,6 +1129,8 @@ static int _iwl_pcie_rx_init(struct iwl_trans *trans) iwl_pcie_rx_init_rxb_lists(rxq); + spin_unlock_bh(&rxq->lock); + if (!rxq->napi.poll) { int (*poll)(struct napi_struct *, int) = iwl_pcie_napi_poll; @@ -1149,7 +1151,6 @@ static int _iwl_pcie_rx_init(struct iwl_trans *trans) napi_enable(&rxq->napi); } - spin_unlock_bh(&rxq->lock); } /* move the pool to the default queue and allocator ownerships */ -- GitLab From 2378b2c9ecf437b918dff246b81b5b624ec14f80 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 2 Mar 2021 14:21:54 +0300 Subject: [PATCH 114/839] octeontx2-af: cn10k: fix an array overflow in is_lmac_valid() The value of "lmac_id" can be controlled by the user and if it is larger then the number of bits in long then it reads outside the bitmap. The highest valid value is less than MAX_LMAC_PER_CGX (4). Fixes: 91c6945ea1f9 ("octeontx2-af: cn10k: Add RPM MAC support") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/cgx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index 9caa375d01b15..68deae529bc94 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -56,7 +56,9 @@ static bool is_dev_rpm(void *cgxd) bool is_lmac_valid(struct cgx *cgx, int lmac_id) { - return cgx && test_bit(lmac_id, &cgx->lmac_bmap); + if (!cgx || lmac_id < 0 || lmac_id >= MAX_LMAC_PER_CGX) + return false; + return test_bit(lmac_id, &cgx->lmac_bmap); } struct mac_ops *get_mac_ops(void *cgxd) -- GitLab From 6881b07fdd24850def1f03761c66042b983ff86e Mon Sep 17 00:00:00 2001 From: Michal Suchanek Date: Tue, 2 Mar 2021 20:47:47 +0100 Subject: [PATCH 115/839] ibmvnic: Fix possibly uninitialized old_num_tx_queues variable warning. GCC 7.5 reports: ../drivers/net/ethernet/ibm/ibmvnic.c: In function 'ibmvnic_reset_init': ../drivers/net/ethernet/ibm/ibmvnic.c:5373:51: warning: 'old_num_tx_queues' may be used uninitialized in this function [-Wmaybe-uninitialized] ../drivers/net/ethernet/ibm/ibmvnic.c:5373:6: warning: 'old_num_rx_queues' may be used uninitialized in this function [-Wmaybe-uninitialized] The variable is initialized only if(reset) and used only if(reset && something) so this is a false positive. However, there is no reason to not initialize the variables unconditionally avoiding the warning. Fixes: 635e442f4a48 ("ibmvnic: merge ibmvnic_reset_init and ibmvnic_init") Signed-off-by: Michal Suchanek Reviewed-by: Sukadev Bhattiprolu Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 118a4bd3f8779..3bad762083c54 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -5219,16 +5219,14 @@ static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter, bool reset) { struct device *dev = &adapter->vdev->dev; unsigned long timeout = msecs_to_jiffies(20000); - u64 old_num_rx_queues, old_num_tx_queues; + u64 old_num_rx_queues = adapter->req_rx_queues; + u64 old_num_tx_queues = adapter->req_tx_queues; int rc; adapter->from_passive_init = false; - if (reset) { - old_num_rx_queues = adapter->req_rx_queues; - old_num_tx_queues = adapter->req_tx_queues; + if (reset) reinit_completion(&adapter->init_done); - } adapter->init_done_rc = 0; rc = ibmvnic_send_crq_init(adapter); -- GitLab From 879c348c35bb5fb758dd881d8a97409c1862dae8 Mon Sep 17 00:00:00 2001 From: Ong Boon Leong Date: Wed, 3 Mar 2021 20:38:40 +0530 Subject: [PATCH 116/839] net: stmmac: fix incorrect DMA channel intr enable setting of EQoS v4.10 We introduce dwmac410_dma_init_channel() here for both EQoS v4.10 and above which use different DMA_CH(n)_Interrupt_Enable bit definitions for NIE and AIE. Fixes: 48863ce5940f ("stmmac: add DMA support for GMAC 4.xx") Signed-off-by: Ong Boon Leong Signed-off-by: Ramesh Babu B Signed-off-by: David S. Miller --- .../net/ethernet/stmicro/stmmac/dwmac4_dma.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c index bb29bfcd62c34..62aa0e95beb70 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c @@ -124,6 +124,23 @@ static void dwmac4_dma_init_channel(void __iomem *ioaddr, ioaddr + DMA_CHAN_INTR_ENA(chan)); } +static void dwmac410_dma_init_channel(void __iomem *ioaddr, + struct stmmac_dma_cfg *dma_cfg, u32 chan) +{ + u32 value; + + /* common channel control register config */ + value = readl(ioaddr + DMA_CHAN_CONTROL(chan)); + if (dma_cfg->pblx8) + value = value | DMA_BUS_MODE_PBL; + + writel(value, ioaddr + DMA_CHAN_CONTROL(chan)); + + /* Mask interrupts by writing to CSR7 */ + writel(DMA_CHAN_INTR_DEFAULT_MASK_4_10, + ioaddr + DMA_CHAN_INTR_ENA(chan)); +} + static void dwmac4_dma_init(void __iomem *ioaddr, struct stmmac_dma_cfg *dma_cfg, int atds) { @@ -523,7 +540,7 @@ const struct stmmac_dma_ops dwmac4_dma_ops = { const struct stmmac_dma_ops dwmac410_dma_ops = { .reset = dwmac4_dma_reset, .init = dwmac4_dma_init, - .init_chan = dwmac4_dma_init_channel, + .init_chan = dwmac410_dma_init_channel, .init_rx_chan = dwmac4_dma_init_rx_chan, .init_tx_chan = dwmac4_dma_init_tx_chan, .axi = dwmac4_dma_axi, -- GitLab From dbbe7c962c3a8163bf724dbc3c9fdfc9b16d3117 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 2 Mar 2021 18:46:43 -0800 Subject: [PATCH 117/839] docs: networking: drop special stable handling Leave it to Greg. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- Documentation/networking/netdev-FAQ.rst | 72 ++----------------- Documentation/process/stable-kernel-rules.rst | 6 -- Documentation/process/submitting-patches.rst | 5 -- 3 files changed, 6 insertions(+), 77 deletions(-) diff --git a/Documentation/networking/netdev-FAQ.rst b/Documentation/networking/netdev-FAQ.rst index a64c01b52b4c5..91b2cf7128012 100644 --- a/Documentation/networking/netdev-FAQ.rst +++ b/Documentation/networking/netdev-FAQ.rst @@ -142,73 +142,13 @@ Please send incremental versions on top of what has been merged in order to fix the patches the way they would look like if your latest patch series was to be merged. -How can I tell what patches are queued up for backporting to the various stable releases? ------------------------------------------------------------------------------------------ -Normally Greg Kroah-Hartman collects stable commits himself, but for -networking, Dave collects up patches he deems critical for the -networking subsystem, and then hands them off to Greg. - -There is a patchworks queue that you can see here: - - https://patchwork.kernel.org/bundle/netdev/stable/?state=* - -It contains the patches which Dave has selected, but not yet handed off -to Greg. If Greg already has the patch, then it will be here: - - https://git.kernel.org/pub/scm/linux/kernel/git/stable/stable-queue.git - -A quick way to find whether the patch is in this stable-queue is to -simply clone the repo, and then git grep the mainline commit ID, e.g. -:: - - stable-queue$ git grep -l 284041ef21fdf2e - releases/3.0.84/ipv6-fix-possible-crashes-in-ip6_cork_release.patch - releases/3.4.51/ipv6-fix-possible-crashes-in-ip6_cork_release.patch - releases/3.9.8/ipv6-fix-possible-crashes-in-ip6_cork_release.patch - stable/stable-queue$ - -I see a network patch and I think it should be backported to stable. Should I request it via stable@vger.kernel.org like the references in the kernel's Documentation/process/stable-kernel-rules.rst file say? ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- -No, not for networking. Check the stable queues as per above first -to see if it is already queued. If not, then send a mail to netdev, -listing the upstream commit ID and why you think it should be a stable -candidate. - -Before you jump to go do the above, do note that the normal stable rules -in :ref:`Documentation/process/stable-kernel-rules.rst ` -still apply. So you need to explicitly indicate why it is a critical -fix and exactly what users are impacted. In addition, you need to -convince yourself that you *really* think it has been overlooked, -vs. having been considered and rejected. - -Generally speaking, the longer it has had a chance to "soak" in -mainline, the better the odds that it is an OK candidate for stable. So -scrambling to request a commit be added the day after it appears should -be avoided. - -I have created a network patch and I think it should be backported to stable. Should I add a Cc: stable@vger.kernel.org like the references in the kernel's Documentation/ directory say? ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ -No. See above answer. In short, if you think it really belongs in -stable, then ensure you write a decent commit log that describes who -gets impacted by the bug fix and how it manifests itself, and when the -bug was introduced. If you do that properly, then the commit will get -handled appropriately and most likely get put in the patchworks stable -queue if it really warrants it. - -If you think there is some valid information relating to it being in -stable that does *not* belong in the commit log, then use the three dash -marker line as described in -:ref:`Documentation/process/submitting-patches.rst ` -to temporarily embed that information into the patch that you send. - -Are all networking bug fixes backported to all stable releases? +Are there special rules regarding stable submissions on netdev? --------------------------------------------------------------- -Due to capacity, Dave could only take care of the backports for the -last two stable releases. For earlier stable releases, each stable -branch maintainer is supposed to take care of them. If you find any -patch is missing from an earlier stable branch, please notify -stable@vger.kernel.org with either a commit ID or a formal patch -backported, and CC Dave and other relevant networking developers. +While it used to be the case that netdev submissions were not supposed +to carry explicit ``CC: stable@vger.kernel.org`` tags that is no longer +the case today. Please follow the standard stable rules in +:ref:`Documentation/process/stable-kernel-rules.rst `, +and make sure you include appropriate Fixes tags! Is the comment style convention different for the networking content? --------------------------------------------------------------------- diff --git a/Documentation/process/stable-kernel-rules.rst b/Documentation/process/stable-kernel-rules.rst index 3973556250e17..003c865e9c212 100644 --- a/Documentation/process/stable-kernel-rules.rst +++ b/Documentation/process/stable-kernel-rules.rst @@ -35,12 +35,6 @@ Rules on what kind of patches are accepted, and which ones are not, into the Procedure for submitting patches to the -stable tree ---------------------------------------------------- - - If the patch covers files in net/ or drivers/net please follow netdev stable - submission guidelines as described in - :ref:`Documentation/networking/netdev-FAQ.rst ` - after first checking the stable networking queue at - https://patchwork.kernel.org/bundle/netdev/stable/?state=* - to ensure the requested patch is not already queued up. - Security patches should not be handled (solely) by the -stable review process but should follow the procedures in :ref:`Documentation/admin-guide/security-bugs.rst `. diff --git a/Documentation/process/submitting-patches.rst b/Documentation/process/submitting-patches.rst index 8c991c8636280..91de63b201c12 100644 --- a/Documentation/process/submitting-patches.rst +++ b/Documentation/process/submitting-patches.rst @@ -250,11 +250,6 @@ should also read :ref:`Documentation/process/stable-kernel-rules.rst ` in addition to this file. -Note, however, that some subsystem maintainers want to come to their own -conclusions on which patches should go to the stable trees. The networking -maintainer, in particular, would rather not see individual developers -adding lines like the above to their patches. - If changes affect userland-kernel interfaces, please send the MAN-PAGES maintainer (as listed in the MAINTAINERS file) a man-pages patch, or at least a notification of the change, so that some information makes its way -- GitLab From fc7c5c208eb7bc2df3a9f4234f14eca250001cb6 Mon Sep 17 00:00:00 2001 From: John Ernberg Date: Wed, 3 Mar 2021 18:14:39 +0000 Subject: [PATCH 118/839] ALSA: usb: Add Plantronics C320-M USB ctrl msg delay quirk The microphone in the Plantronics C320-M headset will randomly fail to initialize properly, at least when using Microsoft Teams. Introducing a 20ms delay on the control messages appears to resolve the issue. Link: https://gitlab.freedesktop.org/pulseaudio/pulseaudio/-/issues/1065 Tested-by: Andreas Kempe Signed-off-by: John Ernberg Cc: Link: https://lore.kernel.org/r/20210303181405.39835-1-john.ernberg@actia.se Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 737b2729c0d37..0864692d8a7b6 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1670,6 +1670,14 @@ void snd_usb_ctl_msg_quirk(struct usb_device *dev, unsigned int pipe, && (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS) msleep(20); + /* + * Plantronics C320-M needs a delay to avoid random + * microhpone failures. + */ + if (chip->usb_id == USB_ID(0x047f, 0xc025) && + (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS) + msleep(20); + /* Zoom R16/24, many Logitech(at least H650e/H570e/BCC950), * Jabra 550a, Kingston HyperX needs a tiny delay here, * otherwise requests like get/set frequency return -- GitLab From b12422362ce947098ac420ac3c975fc006af4c02 Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Wed, 3 Mar 2021 11:55:49 -0800 Subject: [PATCH 119/839] net: macb: Add default usrio config to default gem config There is no usrio config defined for default gem config leading to a kernel panic devices that don't define a data. This issue can be reprdouced with microchip polar fire soc where compatible string is defined as "cdns,macb". Fixes: edac63861db7 ("add userio bits as platform configuration") Signed-off-by: Atish Patra Acked-by: Nicolas Ferre Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb_main.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 472bf8f220bc6..15362d016a87e 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -3954,6 +3954,13 @@ static int macb_init(struct platform_device *pdev) return 0; } +static const struct macb_usrio_config macb_default_usrio = { + .mii = MACB_BIT(MII), + .rmii = MACB_BIT(RMII), + .rgmii = GEM_BIT(RGMII), + .refclk = MACB_BIT(CLKEN), +}; + #if defined(CONFIG_OF) /* 1518 rounded up */ #define AT91ETHER_MAX_RBUFF_SZ 0x600 @@ -4439,13 +4446,6 @@ static int fu540_c000_init(struct platform_device *pdev) return macb_init(pdev); } -static const struct macb_usrio_config macb_default_usrio = { - .mii = MACB_BIT(MII), - .rmii = MACB_BIT(RMII), - .rgmii = GEM_BIT(RGMII), - .refclk = MACB_BIT(CLKEN), -}; - static const struct macb_usrio_config sama7g5_usrio = { .mii = 0, .rmii = 1, @@ -4594,6 +4594,7 @@ static const struct macb_config default_gem_config = { .dma_burst_length = 16, .clk_init = macb_clk_init, .init = macb_init, + .usrio = &macb_default_usrio, .jumbo_max_len = 10240, }; -- GitLab From 3e59e8856758eb5a2dfe1f831ef53b168fd58105 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Wed, 3 Mar 2021 16:50:49 +0100 Subject: [PATCH 120/839] net: l2tp: reduce log level of messages in receive path, add counter instead Commit 5ee759cda51b ("l2tp: use standard API for warning log messages") changed a number of warnings about invalid packets in the receive path so that they are always shown, instead of only when a special L2TP debug flag is set. Even with rate limiting these warnings can easily cause significant log spam - potentially triggered by a malicious party sending invalid packets on purpose. In addition these warnings were noticed by projects like Tunneldigger [1], which uses L2TP for its data path, but implements its own control protocol (which is sufficiently different from L2TP data packets that it would always be passed up to userspace even with future extensions of L2TP). Some of the warnings were already redundant, as l2tp_stats has a counter for these packets. This commit adds one additional counter for invalid packets that are passed up to userspace. Packets with unknown session are not counted as invalid, as there is nothing wrong with the format of these packets. With the additional counter, all of these messages are either redundant or benign, so we reduce them to pr_debug_ratelimited(). [1] https://github.com/wlanslovenija/tunneldigger/issues/160 Fixes: 5ee759cda51b ("l2tp: use standard API for warning log messages") Signed-off-by: Matthias Schiffer Signed-off-by: David S. Miller --- include/uapi/linux/l2tp.h | 1 + net/l2tp/l2tp_core.c | 41 +++++++++++++++++++++------------------ net/l2tp/l2tp_core.h | 1 + net/l2tp/l2tp_netlink.c | 6 ++++++ 4 files changed, 30 insertions(+), 19 deletions(-) diff --git a/include/uapi/linux/l2tp.h b/include/uapi/linux/l2tp.h index 30c80d5ba4bfc..bab8c97086111 100644 --- a/include/uapi/linux/l2tp.h +++ b/include/uapi/linux/l2tp.h @@ -145,6 +145,7 @@ enum { L2TP_ATTR_RX_ERRORS, /* u64 */ L2TP_ATTR_STATS_PAD, L2TP_ATTR_RX_COOKIE_DISCARDS, /* u64 */ + L2TP_ATTR_RX_INVALID, /* u64 */ __L2TP_ATTR_STATS_MAX, }; diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 7be5103ff2a84..203890e378cb0 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -649,9 +649,9 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, /* Parse and check optional cookie */ if (session->peer_cookie_len > 0) { if (memcmp(ptr, &session->peer_cookie[0], session->peer_cookie_len)) { - pr_warn_ratelimited("%s: cookie mismatch (%u/%u). Discarding.\n", - tunnel->name, tunnel->tunnel_id, - session->session_id); + pr_debug_ratelimited("%s: cookie mismatch (%u/%u). Discarding.\n", + tunnel->name, tunnel->tunnel_id, + session->session_id); atomic_long_inc(&session->stats.rx_cookie_discards); goto discard; } @@ -702,8 +702,8 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, * If user has configured mandatory sequence numbers, discard. */ if (session->recv_seq) { - pr_warn_ratelimited("%s: recv data has no seq numbers when required. Discarding.\n", - session->name); + pr_debug_ratelimited("%s: recv data has no seq numbers when required. Discarding.\n", + session->name); atomic_long_inc(&session->stats.rx_seq_discards); goto discard; } @@ -718,8 +718,8 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, session->send_seq = 0; l2tp_session_set_header_len(session, tunnel->version); } else if (session->send_seq) { - pr_warn_ratelimited("%s: recv data has no seq numbers when required. Discarding.\n", - session->name); + pr_debug_ratelimited("%s: recv data has no seq numbers when required. Discarding.\n", + session->name); atomic_long_inc(&session->stats.rx_seq_discards); goto discard; } @@ -809,9 +809,9 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb) /* Short packet? */ if (!pskb_may_pull(skb, L2TP_HDR_SIZE_MAX)) { - pr_warn_ratelimited("%s: recv short packet (len=%d)\n", - tunnel->name, skb->len); - goto error; + pr_debug_ratelimited("%s: recv short packet (len=%d)\n", + tunnel->name, skb->len); + goto invalid; } /* Point to L2TP header */ @@ -824,9 +824,9 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb) /* Check protocol version */ version = hdrflags & L2TP_HDR_VER_MASK; if (version != tunnel->version) { - pr_warn_ratelimited("%s: recv protocol version mismatch: got %d expected %d\n", - tunnel->name, version, tunnel->version); - goto error; + pr_debug_ratelimited("%s: recv protocol version mismatch: got %d expected %d\n", + tunnel->name, version, tunnel->version); + goto invalid; } /* Get length of L2TP packet */ @@ -834,7 +834,7 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb) /* If type is control packet, it is handled by userspace. */ if (hdrflags & L2TP_HDRFLAG_T) - goto error; + goto pass; /* Skip flags */ ptr += 2; @@ -863,21 +863,24 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb) l2tp_session_dec_refcount(session); /* Not found? Pass to userspace to deal with */ - pr_warn_ratelimited("%s: no session found (%u/%u). Passing up.\n", - tunnel->name, tunnel_id, session_id); - goto error; + pr_debug_ratelimited("%s: no session found (%u/%u). Passing up.\n", + tunnel->name, tunnel_id, session_id); + goto pass; } if (tunnel->version == L2TP_HDR_VER_3 && l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr)) - goto error; + goto invalid; l2tp_recv_common(session, skb, ptr, optr, hdrflags, length); l2tp_session_dec_refcount(session); return 0; -error: +invalid: + atomic_long_inc(&tunnel->stats.rx_invalid); + +pass: /* Put UDP header back */ __skb_push(skb, sizeof(struct udphdr)); diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index cb21d906343e8..98ea98eb9567b 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -39,6 +39,7 @@ struct l2tp_stats { atomic_long_t rx_oos_packets; atomic_long_t rx_errors; atomic_long_t rx_cookie_discards; + atomic_long_t rx_invalid; }; struct l2tp_tunnel; diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index 83956c9ee1fcc..96eb91be9238b 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -428,6 +428,9 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla L2TP_ATTR_STATS_PAD) || nla_put_u64_64bit(skb, L2TP_ATTR_RX_ERRORS, atomic_long_read(&tunnel->stats.rx_errors), + L2TP_ATTR_STATS_PAD) || + nla_put_u64_64bit(skb, L2TP_ATTR_RX_INVALID, + atomic_long_read(&tunnel->stats.rx_invalid), L2TP_ATTR_STATS_PAD)) goto nla_put_failure; nla_nest_end(skb, nest); @@ -771,6 +774,9 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl L2TP_ATTR_STATS_PAD) || nla_put_u64_64bit(skb, L2TP_ATTR_RX_ERRORS, atomic_long_read(&session->stats.rx_errors), + L2TP_ATTR_STATS_PAD) || + nla_put_u64_64bit(skb, L2TP_ATTR_RX_INVALID, + atomic_long_read(&session->stats.rx_invalid), L2TP_ATTR_STATS_PAD)) goto nla_put_failure; nla_nest_end(skb, nest); -- GitLab From 4b5dc1a94d4f92b5845e98bd9ae344b26d933aad Mon Sep 17 00:00:00 2001 From: Hayes Wang Date: Wed, 3 Mar 2021 16:39:47 +0800 Subject: [PATCH 121/839] Revert "r8152: adjust the settings about MAC clock speed down for RTL8153" This reverts commit 134f98bcf1b898fb9d6f2b91bc85dd2e5478b4b8. The r8153_mac_clk_spd() is used for RTL8153A only, because the register table of RTL8153B is different from RTL8153A. However, this function would be called when RTL8153B calls r8153_first_init() and r8153_enter_oob(). That causes RTL8153B becomes unstable when suspending and resuming. The worst case may let the device stop working. Besides, revert this commit to disable MAC clock speed down for RTL8153A. It would avoid the known issue when enabling U1. The data of the first control transfer may be wrong when exiting U1. Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 35 ++++++----------------------------- 1 file changed, 6 insertions(+), 29 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index b246817f34057..90f1c02000425 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -3021,29 +3021,6 @@ static void __rtl_set_wol(struct r8152 *tp, u32 wolopts) device_set_wakeup_enable(&tp->udev->dev, false); } -static void r8153_mac_clk_spd(struct r8152 *tp, bool enable) -{ - /* MAC clock speed down */ - if (enable) { - ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL, - ALDPS_SPDWN_RATIO); - ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL2, - EEE_SPDWN_RATIO); - ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, - PKT_AVAIL_SPDWN_EN | SUSPEND_SPDWN_EN | - U1U2_SPDWN_EN | L1_SPDWN_EN); - ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4, - PWRSAVE_SPDWN_EN | RXDV_SPDWN_EN | TX10MIDLE_EN | - TP100_SPDWN_EN | TP500_SPDWN_EN | EEE_SPDWN_EN | - TP1000_SPDWN_EN); - } else { - ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL, 0); - ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL2, 0); - ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, 0); - ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4, 0); - } -} - static void r8153_u1u2en(struct r8152 *tp, bool enable) { u8 u1u2[8]; @@ -3338,11 +3315,9 @@ static void rtl8153_runtime_enable(struct r8152 *tp, bool enable) if (enable) { r8153_u1u2en(tp, false); r8153_u2p3en(tp, false); - r8153_mac_clk_spd(tp, true); rtl_runtime_suspend_enable(tp, true); } else { rtl_runtime_suspend_enable(tp, false); - r8153_mac_clk_spd(tp, false); switch (tp->version) { case RTL_VER_03: @@ -4718,7 +4693,6 @@ static void r8153_first_init(struct r8152 *tp) { u32 ocp_data; - r8153_mac_clk_spd(tp, false); rxdy_gated_en(tp, true); r8153_teredo_off(tp); @@ -4769,8 +4743,6 @@ static void r8153_enter_oob(struct r8152 *tp) { u32 ocp_data; - r8153_mac_clk_spd(tp, true); - ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL); ocp_data &= ~NOW_IS_OOB; ocp_write_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, ocp_data); @@ -5496,10 +5468,15 @@ static void r8153_init(struct r8152 *tp) ocp_write_word(tp, MCU_TYPE_USB, USB_CONNECT_TIMER, 0x0001); + /* MAC clock speed down */ + ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL, 0); + ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL2, 0); + ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, 0); + ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4, 0); + r8153_power_cut_en(tp, false); rtl_runtime_suspend_enable(tp, false); r8153_u1u2en(tp, true); - r8153_mac_clk_spd(tp, false); usb_enable_lpm(tp->udev); ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_CONFIG6); -- GitLab From d65614a01d24704b016635abf5cc028a54e45a62 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Tue, 2 Mar 2021 17:19:32 +0800 Subject: [PATCH 122/839] net: 9p: advance iov on empty read I met below warning when cating a small size(about 80bytes) txt file on 9pfs(msize=2097152 is passed to 9p mount option), the reason is we miss iov_iter_advance() if the read count is 0 for zerocopy case, so we didn't truncate the pipe, then iov_iter_pipe() thinks the pipe is full. Fix it by removing the exception for 0 to ensure to call iov_iter_advance() even on empty read for zerocopy case. [ 8.279568] WARNING: CPU: 0 PID: 39 at lib/iov_iter.c:1203 iov_iter_pipe+0x31/0x40 [ 8.280028] Modules linked in: [ 8.280561] CPU: 0 PID: 39 Comm: cat Not tainted 5.11.0+ #6 [ 8.281260] RIP: 0010:iov_iter_pipe+0x31/0x40 [ 8.281974] Code: 2b 42 54 39 42 5c 76 22 c7 07 20 00 00 00 48 89 57 18 8b 42 50 48 c7 47 08 b [ 8.283169] RSP: 0018:ffff888000cbbd80 EFLAGS: 00000246 [ 8.283512] RAX: 0000000000000010 RBX: ffff888000117d00 RCX: 0000000000000000 [ 8.283876] RDX: ffff88800031d600 RSI: 0000000000000000 RDI: ffff888000cbbd90 [ 8.284244] RBP: ffff888000cbbe38 R08: 0000000000000000 R09: ffff8880008d2058 [ 8.284605] R10: 0000000000000002 R11: ffff888000375510 R12: 0000000000000050 [ 8.284964] R13: ffff888000cbbe80 R14: 0000000000000050 R15: ffff88800031d600 [ 8.285439] FS: 00007f24fd8af600(0000) GS:ffff88803ec00000(0000) knlGS:0000000000000000 [ 8.285844] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 8.286150] CR2: 00007f24fd7d7b90 CR3: 0000000000c97000 CR4: 00000000000406b0 [ 8.286710] Call Trace: [ 8.288279] generic_file_splice_read+0x31/0x1a0 [ 8.289273] ? do_splice_to+0x2f/0x90 [ 8.289511] splice_direct_to_actor+0xcc/0x220 [ 8.289788] ? pipe_to_sendpage+0xa0/0xa0 [ 8.290052] do_splice_direct+0x8b/0xd0 [ 8.290314] do_sendfile+0x1ad/0x470 [ 8.290576] do_syscall_64+0x2d/0x40 [ 8.290818] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 8.291409] RIP: 0033:0x7f24fd7dca0a [ 8.292511] Code: c3 0f 1f 80 00 00 00 00 4c 89 d2 4c 89 c6 e9 bd fd ff ff 0f 1f 44 00 00 31 8 [ 8.293360] RSP: 002b:00007ffc20932818 EFLAGS: 00000206 ORIG_RAX: 0000000000000028 [ 8.293800] RAX: ffffffffffffffda RBX: 0000000001000000 RCX: 00007f24fd7dca0a [ 8.294153] RDX: 0000000000000000 RSI: 0000000000000003 RDI: 0000000000000001 [ 8.294504] RBP: 0000000000000003 R08: 0000000000000000 R09: 0000000000000000 [ 8.294867] R10: 0000000001000000 R11: 0000000000000206 R12: 0000000000000003 [ 8.295217] R13: 0000000000000001 R14: 0000000000000001 R15: 0000000000000000 [ 8.295782] ---[ end trace 63317af81b3ca24b ]--- Signed-off-by: Jisheng Zhang Signed-off-by: David S. Miller --- net/9p/client.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/net/9p/client.c b/net/9p/client.c index 4f62f299da0cf..0a9019da18f39 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1623,10 +1623,6 @@ p9_client_read_once(struct p9_fid *fid, u64 offset, struct iov_iter *to, } p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", count); - if (!count) { - p9_tag_remove(clnt, req); - return 0; - } if (non_zc) { int n = copy_to_iter(dataptr, count, to); -- GitLab From a9ecb0cbf03746b17a7c13bd8e3464e6789f73e8 Mon Sep 17 00:00:00 2001 From: zhang kai Date: Tue, 2 Mar 2021 18:16:07 +0800 Subject: [PATCH 123/839] rtnetlink: using dev_base_seq from target net Signed-off-by: zhang kai Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 0edc0b2baaa43..1bdcb33fb5619 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2147,7 +2147,7 @@ out: out_err: cb->args[1] = idx; cb->args[0] = h; - cb->seq = net->dev_base_seq; + cb->seq = tgt_net->dev_base_seq; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); if (netnsid >= 0) put_net(tgt_net); -- GitLab From 2888b080d05c819205bbfe52c624a639f44c266a Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 3 Mar 2021 23:58:27 +0100 Subject: [PATCH 124/839] netfilter: nftables: fix possible double hook unregistration with table owner Skip hook unregistration of owner tables from the netns exit path, nft_rcv_nl_event() unregisters the table hooks before tearing down the table content. Fixes: 6001a930ce03 ("netfilter: nftables: introduce table ownership") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index b07703e191080..796ce86ef7eb5 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -9028,8 +9028,12 @@ static void __nft_release_hooks(struct net *net) { struct nft_table *table; - list_for_each_entry(table, &net->nft.tables, list) + list_for_each_entry(table, &net->nft.tables, list) { + if (nft_table_has_owner(table)) + continue; + __nft_release_hook(net, table); + } } static void __nft_release_table(struct net *net, struct nft_table *table) -- GitLab From bd1777b3a88f98e223392221b330668458aac7f1 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 4 Mar 2021 04:00:09 +0100 Subject: [PATCH 125/839] netfilter: nftables: bogus check for netlink portID with table owner The existing branch checks for 0 != table->nlpid which always evaluates true for tables that have an owner. Fixes: 6001a930ce03 ("netfilter: nftables: introduce table ownership") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 796ce86ef7eb5..224c8e537cb33 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -9083,13 +9083,12 @@ static void __nft_release_table(struct net *net, struct nft_table *table) nf_tables_table_destroy(&ctx); } -static void __nft_release_tables(struct net *net, u32 nlpid) +static void __nft_release_tables(struct net *net) { struct nft_table *table, *nt; list_for_each_entry_safe(table, nt, &net->nft.tables, list) { - if (nft_table_has_owner(table) && - nlpid != table->nlpid) + if (nft_table_has_owner(table)) continue; __nft_release_table(net, table); @@ -9155,7 +9154,7 @@ static void __net_exit nf_tables_exit_net(struct net *net) mutex_lock(&net->nft.commit_mutex); if (!list_empty(&net->nft.commit_list)) __nf_tables_abort(net, NFNL_ABORT_NONE); - __nft_release_tables(net, 0); + __nft_release_tables(net); mutex_unlock(&net->nft.commit_mutex); WARN_ON_ONCE(!list_empty(&net->nft.tables)); WARN_ON_ONCE(!list_empty(&net->nft.module_list)); -- GitLab From 9799110825dba087c2bdce886977cf84dada2005 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Thu, 4 Mar 2021 12:34:16 +0800 Subject: [PATCH 126/839] ALSA: usb-audio: Disable USB autosuspend properly in setup_disable_autosuspend() Rear audio on Lenovo ThinkStation P620 stops working after commit 1965c4364bdd ("ALSA: usb-audio: Disable autosuspend for Lenovo ThinkStation P620"): [ 6.013526] usbcore: registered new interface driver snd-usb-audio [ 6.023064] usb 3-6: cannot get ctl value: req = 0x81, wValue = 0x100, wIndex = 0x0, type = 1 [ 6.023083] usb 3-6: cannot get ctl value: req = 0x81, wValue = 0x202, wIndex = 0x0, type = 4 [ 6.023090] usb 3-6: cannot get ctl value: req = 0x81, wValue = 0x100, wIndex = 0x0, type = 1 [ 6.023098] usb 3-6: cannot get ctl value: req = 0x81, wValue = 0x202, wIndex = 0x0, type = 4 [ 6.023103] usb 3-6: cannot get ctl value: req = 0x81, wValue = 0x100, wIndex = 0x0, type = 1 [ 6.023110] usb 3-6: cannot get ctl value: req = 0x81, wValue = 0x202, wIndex = 0x0, type = 4 [ 6.045846] usb 3-6: cannot get ctl value: req = 0x81, wValue = 0x100, wIndex = 0x0, type = 1 [ 6.045866] usb 3-6: cannot get ctl value: req = 0x81, wValue = 0x202, wIndex = 0x0, type = 4 [ 6.045877] usb 3-6: cannot get ctl value: req = 0x81, wValue = 0x100, wIndex = 0x0, type = 1 [ 6.045886] usb 3-6: cannot get ctl value: req = 0x81, wValue = 0x202, wIndex = 0x0, type = 4 [ 6.045894] usb 3-6: cannot get ctl value: req = 0x81, wValue = 0x100, wIndex = 0x0, type = 1 [ 6.045908] usb 3-6: cannot get ctl value: req = 0x81, wValue = 0x202, wIndex = 0x0, type = 4 I overlooked the issue because when I was working on the said commit, only the front audio is tested. Apology for that. Changing supports_autosuspend in driver is too late for disabling autosuspend, because it was already used by USB probe routine, so it can break the balance on the following code that depends on supports_autosuspend. Fix it by using usb_disable_autosuspend() helper, and balance the suspend count in disconnect callback. Fixes: 1965c4364bdd ("ALSA: usb-audio: Disable autosuspend for Lenovo ThinkStation P620") Signed-off-by: Kai-Heng Feng Cc: Link: https://lore.kernel.org/r/20210304043419.287191-1-kai.heng.feng@canonical.com Signed-off-by: Takashi Iwai --- sound/usb/card.c | 5 +++++ sound/usb/quirks.c | 2 +- sound/usb/usbaudio.h | 1 + 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/sound/usb/card.c b/sound/usb/card.c index 85ed8507e41a2..08c794883299e 100644 --- a/sound/usb/card.c +++ b/sound/usb/card.c @@ -830,6 +830,8 @@ static int usb_audio_probe(struct usb_interface *intf, snd_media_device_create(chip, intf); } + chip->quirk_type = quirk->type; + usb_chip[chip->index] = chip; chip->intf[chip->num_interfaces] = intf; chip->num_interfaces++; @@ -912,6 +914,9 @@ static void usb_audio_disconnect(struct usb_interface *intf) } else { mutex_unlock(®ister_mutex); } + + if (chip->quirk_type & QUIRK_SETUP_DISABLE_AUTOSUSPEND) + usb_enable_autosuspend(interface_to_usbdev(intf)); } /* lock the shutdown (disconnect) task and autoresume */ diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 0864692d8a7b6..66efedaa3aeb3 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -547,7 +547,7 @@ static int setup_disable_autosuspend(struct snd_usb_audio *chip, struct usb_driver *driver, const struct snd_usb_audio_quirk *quirk) { - driver->supports_autosuspend = 0; + usb_disable_autosuspend(interface_to_usbdev(iface)); return 1; /* Continue with creating streams and mixer */ } diff --git a/sound/usb/usbaudio.h b/sound/usb/usbaudio.h index 215c1771dd570..60b9dd7df6bb7 100644 --- a/sound/usb/usbaudio.h +++ b/sound/usb/usbaudio.h @@ -27,6 +27,7 @@ struct snd_usb_audio { struct snd_card *card; struct usb_interface *intf[MAX_CARD_INTERFACES]; u32 usb_id; + uint16_t quirk_type; struct mutex mutex; unsigned int system_suspend; atomic_t active; -- GitLab From 460c9f1c944b4bf04f2934478fd3f865b730b771 Mon Sep 17 00:00:00 2001 From: Zhang Yunkai Date: Wed, 3 Mar 2021 18:24:10 -0800 Subject: [PATCH 127/839] arch/parisc/kernel: remove duplicate include in ptrace 'linux/compat.h' included in 'arch/parisc/kernel/ptrace.c' is duplicated. It is also included in the 24th line. Signed-off-by: Zhang Yunkai Signed-off-by: Helge Deller --- arch/parisc/kernel/ptrace.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/parisc/kernel/ptrace.c b/arch/parisc/kernel/ptrace.c index 2127974982df9..65de6c4c9354d 100644 --- a/arch/parisc/kernel/ptrace.c +++ b/arch/parisc/kernel/ptrace.c @@ -567,8 +567,6 @@ static const struct user_regset_view user_parisc_native_view = { }; #ifdef CONFIG_64BIT -#include - static int gpr32_get(struct task_struct *target, const struct user_regset *regset, struct membuf to) -- GitLab From a14a6219996ee6f6e858d83b11affc7907633687 Mon Sep 17 00:00:00 2001 From: Mark Pearson Date: Tue, 2 Mar 2021 09:10:03 -0500 Subject: [PATCH 128/839] ALSA: hda: ignore invalid NHLT table On some Lenovo systems if the microphone is disabled in the BIOS only the NHLT table header is created, with no data. This means the endpoints field is not correctly set to zero - leading to an unintialised variable and hence invalid descriptors are parsed leading to page faults. The Lenovo firmware team is addressing this, but adding a check preventing invalid tables being parsed is worthwhile. Tested on a Lenovo T14. Tested-by: Philipp Leskovitz Reported-by: Philipp Leskovitz Signed-off-by: Mark Pearson Reviewed-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20210302141003.7342-1-markpearson@lenovo.com Signed-off-by: Takashi Iwai --- sound/hda/intel-nhlt.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sound/hda/intel-nhlt.c b/sound/hda/intel-nhlt.c index d053beccfaec3..e2237239d922a 100644 --- a/sound/hda/intel-nhlt.c +++ b/sound/hda/intel-nhlt.c @@ -39,6 +39,11 @@ int intel_nhlt_get_dmic_geo(struct device *dev, struct nhlt_acpi_table *nhlt) if (!nhlt) return 0; + if (nhlt->header.length <= sizeof(struct acpi_table_header)) { + dev_warn(dev, "Invalid DMIC description table\n"); + return 0; + } + for (j = 0, epnt = nhlt->desc; j < nhlt->endpoint_count; j++, epnt = (struct nhlt_endpoint *)((u8 *)epnt + epnt->length)) { -- GitLab From fec60c3bc5d1713db2727cdffc638d48f9c07dc3 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 4 Mar 2021 09:30:21 +0100 Subject: [PATCH 129/839] ALSA: usb-audio: Fix "cannot get freq eq" errors on Dell AE515 sound bar Dell AE515 sound bar (413c:a506) spews the error messages when the driver tries to read the current sample frequency, hence it needs to be on the list in snd_usb_get_sample_rate_quirk(). BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=211551 Cc: Link: https://lore.kernel.org/r/20210304083021.2152-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 66efedaa3aeb3..6ef73af009138 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1520,6 +1520,7 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip) case USB_ID(0x1901, 0x0191): /* GE B850V3 CP2114 audio interface */ case USB_ID(0x21b4, 0x0081): /* AudioQuest DragonFly */ case USB_ID(0x2912, 0x30c8): /* Audioengine D1 */ + case USB_ID(0x413c, 0xa506): /* Dell AE515 sound bar */ return true; } -- GitLab From 06abcb18b3a021ba1a3f2020cbefb3ed04e59e72 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 4 Mar 2021 09:50:09 +0100 Subject: [PATCH 130/839] ALSA: usb-audio: Apply the control quirk to Plantronics headsets Other Plantronics headset models seem requiring the same workaround as C320-M to add the 20ms delay for the control messages, too. Apply the workaround generically for devices with the vendor ID 0x047f. Note that the problem didn't surface before 5.11 just with luck. Since 5.11 got a big code rewrite about the stream handling, the parameter setup procedure has changed, and this seemed triggering the problem more often. BugLink: https://bugzilla.suse.com/show_bug.cgi?id=1182552 Cc: Link: https://lore.kernel.org/r/20210304085009.4770-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 6ef73af009138..d3001fb18141f 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1672,10 +1672,10 @@ void snd_usb_ctl_msg_quirk(struct usb_device *dev, unsigned int pipe, msleep(20); /* - * Plantronics C320-M needs a delay to avoid random - * microhpone failures. + * Plantronics headsets (C320, C320-M, etc) need a delay to avoid + * random microhpone failures. */ - if (chip->usb_id == USB_ID(0x047f, 0xc025) && + if (USB_ID_VENDOR(chip->usb_id) == 0x047f && (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS) msleep(20); -- GitLab From 775c5033a0d164622d9d10dd0f0a5531639ed3ed Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 4 Mar 2021 11:09:12 +0200 Subject: [PATCH 131/839] fuse: fix live lock in fuse_iget() Commit 5d069dbe8aaf ("fuse: fix bad inode") replaced make_bad_inode() in fuse_iget() with a private implementation fuse_make_bad(). The private implementation fails to remove the bad inode from inode cache, so the retry loop with iget5_locked() finds the same bad inode and marks it bad forever. kmsg snip: [ ] rcu: INFO: rcu_sched self-detected stall on CPU ... [ ] ? bit_wait_io+0x50/0x50 [ ] ? fuse_init_file_inode+0x70/0x70 [ ] ? find_inode.isra.32+0x60/0xb0 [ ] ? fuse_init_file_inode+0x70/0x70 [ ] ilookup5_nowait+0x65/0x90 [ ] ? fuse_init_file_inode+0x70/0x70 [ ] ilookup5.part.36+0x2e/0x80 [ ] ? fuse_init_file_inode+0x70/0x70 [ ] ? fuse_inode_eq+0x20/0x20 [ ] iget5_locked+0x21/0x80 [ ] ? fuse_inode_eq+0x20/0x20 [ ] fuse_iget+0x96/0x1b0 Fixes: 5d069dbe8aaf ("fuse: fix bad inode") Cc: stable@vger.kernel.org # 5.10+ Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/fuse/fuse_i.h | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 68cca8d4db6ed..63d97a15ffde9 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -863,6 +863,7 @@ static inline u64 fuse_get_attr_version(struct fuse_conn *fc) static inline void fuse_make_bad(struct inode *inode) { + remove_inode_hash(inode); set_bit(FUSE_I_BAD, &get_fuse_inode(inode)->state); } -- GitLab From c95c34f01bbda4421c25fdc9b04a4a4aab10d36c Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Wed, 3 Mar 2021 19:56:34 +0100 Subject: [PATCH 132/839] xsk: Remove dangling function declaration from header file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit xdp_umem_query() is dead for a long time, drop the declaration from include/linux/netdevice.h Fixes: c9b47cc1fabc ("xsk: fix bug when trying to use both copy and zero-copy on one queue id") Signed-off-by: Maciej Fijalkowski Signed-off-by: Daniel Borkmann Acked-by: Björn Töpel Link: https://lore.kernel.org/bpf/20210303185636.18070-2-maciej.fijalkowski@intel.com --- include/linux/netdevice.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f06fbee8638e8..5b67ea89d5f26 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3959,8 +3959,6 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode); -int xdp_umem_query(struct net_device *dev, u16 queue_id); - int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb); int dev_forward_skb(struct net_device *dev, struct sk_buff *skb); int dev_forward_skb_nomtu(struct net_device *dev, struct sk_buff *skb); -- GitLab From 6bc6699881012b5bd5d49fa861a69a37fc01b49c Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Wed, 3 Mar 2021 19:56:35 +0100 Subject: [PATCH 133/839] samples, bpf: Add missing munmap in xdpsock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We mmap the umem region, but we never munmap it. Add the missing call at the end of the cleanup. Fixes: 3945b37a975d ("samples/bpf: use hugepages in xdpsock app") Signed-off-by: Maciej Fijalkowski Signed-off-by: Daniel Borkmann Acked-by: Björn Töpel Link: https://lore.kernel.org/bpf/20210303185636.18070-3-maciej.fijalkowski@intel.com --- samples/bpf/xdpsock_user.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index db0cb73513a58..1e2a1105d0e67 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -1699,5 +1699,7 @@ int main(int argc, char **argv) xdpsock_cleanup(); + munmap(bufs, NUM_FRAMES * opt_xsk_frame_size); + return 0; } -- GitLab From 2b2aedabc44e9660f90ccf7ba1ca2706d75f411f Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Wed, 3 Mar 2021 19:56:36 +0100 Subject: [PATCH 134/839] libbpf: Clear map_info before each bpf_obj_get_info_by_fd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit xsk_lookup_bpf_maps, based on prog_fd, looks whether current prog has a reference to XSKMAP. BPF prog can include insns that work on various BPF maps and this is covered by iterating through map_ids. The bpf_map_info that is passed to bpf_obj_get_info_by_fd for filling needs to be cleared at each iteration, so that it doesn't contain any outdated fields and that is currently missing in the function of interest. To fix that, zero-init map_info via memset before each bpf_obj_get_info_by_fd call. Also, since the area of this code is touched, in general strcmp is considered harmful, so let's convert it to strncmp and provide the size of the array name for current map_info. While at it, do s/continue/break/ once we have found the xsks_map to terminate the search. Fixes: 5750902a6e9b ("libbpf: proper XSKMAP cleanup") Signed-off-by: Maciej Fijalkowski Signed-off-by: Daniel Borkmann Acked-by: Björn Töpel Link: https://lore.kernel.org/bpf/20210303185636.18070-4-maciej.fijalkowski@intel.com --- tools/lib/bpf/xsk.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index ffbb588724d8f..526fc35c0b233 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -610,15 +610,16 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk) if (fd < 0) continue; + memset(&map_info, 0, map_len); err = bpf_obj_get_info_by_fd(fd, &map_info, &map_len); if (err) { close(fd); continue; } - if (!strcmp(map_info.name, "xsks_map")) { + if (!strncmp(map_info.name, "xsks_map", sizeof(map_info.name))) { ctx->xsks_map_fd = fd; - continue; + break; } close(fd); -- GitLab From 83a2881903f3d5bc08ded4fb04f6e3bedb1fba65 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Mon, 1 Mar 2021 16:40:19 +0100 Subject: [PATCH 135/839] bpf: Account for BPF_FETCH in insn_has_def32() insn_has_def32() returns false for 32-bit BPF_FETCH insns. This makes adjust_insn_aux_data() incorrectly set zext_dst, as can be seen in [1]. This happens because insn_no_def() does not know about the BPF_FETCH variants of BPF_STX. Fix in two steps. First, replace insn_no_def() with insn_def_regno(), which returns the register an insn defines. Normally insn_no_def() calls are followed by insn->dst_reg uses; replace those with the insn_def_regno() return value. Second, adjust the BPF_STX special case in is_reg64() to deal with queries made from opt_subreg_zext_lo32_rnd_hi32(), where the state information is no longer available. Add a comment, since the purpose of this special case is not clear at first glance. [1] https://lore.kernel.org/bpf/20210223150845.1857620-1-jackmanb@google.com/ Fixes: 5ffa25502b5a ("bpf: Add instructions for atomic_[cmp]xchg") Signed-off-by: Ilya Leoshkevich Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Acked-by: Brendan Jackman Link: https://lore.kernel.org/bpf/20210301154019.129110-1-iii@linux.ibm.com --- kernel/bpf/verifier.c | 70 ++++++++++++++++++++++++------------------- 1 file changed, 39 insertions(+), 31 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 3d34ba492d461..bb3eaab934f39 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1703,7 +1703,11 @@ static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn, } if (class == BPF_STX) { - if (reg->type != SCALAR_VALUE) + /* BPF_STX (including atomic variants) has multiple source + * operands, one of which is a ptr. Check whether the caller is + * asking about it. + */ + if (t == SRC_OP && reg->type != SCALAR_VALUE) return true; return BPF_SIZE(code) == BPF_DW; } @@ -1735,22 +1739,38 @@ static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn, return true; } -/* Return TRUE if INSN doesn't have explicit value define. */ -static bool insn_no_def(struct bpf_insn *insn) +/* Return the regno defined by the insn, or -1. */ +static int insn_def_regno(const struct bpf_insn *insn) { - u8 class = BPF_CLASS(insn->code); - - return (class == BPF_JMP || class == BPF_JMP32 || - class == BPF_STX || class == BPF_ST); + switch (BPF_CLASS(insn->code)) { + case BPF_JMP: + case BPF_JMP32: + case BPF_ST: + return -1; + case BPF_STX: + if (BPF_MODE(insn->code) == BPF_ATOMIC && + (insn->imm & BPF_FETCH)) { + if (insn->imm == BPF_CMPXCHG) + return BPF_REG_0; + else + return insn->src_reg; + } else { + return -1; + } + default: + return insn->dst_reg; + } } /* Return TRUE if INSN has defined any 32-bit value explicitly. */ static bool insn_has_def32(struct bpf_verifier_env *env, struct bpf_insn *insn) { - if (insn_no_def(insn)) + int dst_reg = insn_def_regno(insn); + + if (dst_reg == -1) return false; - return !is_reg64(env, insn, insn->dst_reg, NULL, DST_OP); + return !is_reg64(env, insn, dst_reg, NULL, DST_OP); } static void mark_insn_zext(struct bpf_verifier_env *env, @@ -11006,9 +11026,10 @@ static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env, for (i = 0; i < len; i++) { int adj_idx = i + delta; struct bpf_insn insn; - u8 load_reg; + int load_reg; insn = insns[adj_idx]; + load_reg = insn_def_regno(&insn); if (!aux[adj_idx].zext_dst) { u8 code, class; u32 imm_rnd; @@ -11018,14 +11039,14 @@ static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env, code = insn.code; class = BPF_CLASS(code); - if (insn_no_def(&insn)) + if (load_reg == -1) continue; /* NOTE: arg "reg" (the fourth one) is only used for - * BPF_STX which has been ruled out in above - * check, it is safe to pass NULL here. + * BPF_STX + SRC_OP, so it is safe to pass NULL + * here. */ - if (is_reg64(env, &insn, insn.dst_reg, NULL, DST_OP)) { + if (is_reg64(env, &insn, load_reg, NULL, DST_OP)) { if (class == BPF_LD && BPF_MODE(code) == BPF_IMM) i++; @@ -11040,7 +11061,7 @@ static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env, imm_rnd = get_random_int(); rnd_hi32_patch[0] = insn; rnd_hi32_patch[1].imm = imm_rnd; - rnd_hi32_patch[3].dst_reg = insn.dst_reg; + rnd_hi32_patch[3].dst_reg = load_reg; patch = rnd_hi32_patch; patch_len = 4; goto apply_patch_buffer; @@ -11049,22 +11070,9 @@ static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env, if (!bpf_jit_needs_zext()) continue; - /* zext_dst means that we want to zero-extend whatever register - * the insn defines, which is dst_reg most of the time, with - * the notable exception of BPF_STX + BPF_ATOMIC + BPF_FETCH. - */ - if (BPF_CLASS(insn.code) == BPF_STX && - BPF_MODE(insn.code) == BPF_ATOMIC) { - /* BPF_STX + BPF_ATOMIC insns without BPF_FETCH do not - * define any registers, therefore zext_dst cannot be - * set. - */ - if (WARN_ON(!(insn.imm & BPF_FETCH))) - return -EINVAL; - load_reg = insn.imm == BPF_CMPXCHG ? BPF_REG_0 - : insn.src_reg; - } else { - load_reg = insn.dst_reg; + if (WARN_ON(load_reg == -1)) { + verbose(env, "verifier bug. zext_dst is set, but no reg is defined\n"); + return -EFAULT; } zext_patch[0] = insn; -- GitLab From d785e1fec60179f534fbe8d006c890e5ad186e51 Mon Sep 17 00:00:00 2001 From: Antony Antony Date: Wed, 14 Oct 2020 16:17:48 +0200 Subject: [PATCH 136/839] ixgbe: fail to create xfrm offload of IPsec tunnel mode SA Based on talks and indirect references ixgbe IPsec offlod do not support IPsec tunnel mode offload. It can only support IPsec transport mode offload. Now explicitly fail when creating non transport mode SA with offload to avoid false performance expectations. Fixes: 63a67fe229ea ("ixgbe: add ipsec offload add and remove SA") Signed-off-by: Antony Antony Acked-by: Shannon Nelson Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c | 5 +++++ drivers/net/ethernet/intel/ixgbevf/ipsec.c | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c index eca73526ac86b..54d47265a7ac1 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c @@ -575,6 +575,11 @@ static int ixgbe_ipsec_add_sa(struct xfrm_state *xs) return -EINVAL; } + if (xs->props.mode != XFRM_MODE_TRANSPORT) { + netdev_err(dev, "Unsupported mode for ipsec offload\n"); + return -EINVAL; + } + if (ixgbe_ipsec_check_mgmt_ip(xs)) { netdev_err(dev, "IPsec IP addr clash with mgmt filters\n"); return -EINVAL; diff --git a/drivers/net/ethernet/intel/ixgbevf/ipsec.c b/drivers/net/ethernet/intel/ixgbevf/ipsec.c index 5170dd9d8705b..caaea2c920a6e 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ipsec.c +++ b/drivers/net/ethernet/intel/ixgbevf/ipsec.c @@ -272,6 +272,11 @@ static int ixgbevf_ipsec_add_sa(struct xfrm_state *xs) return -EINVAL; } + if (xs->props.mode != XFRM_MODE_TRANSPORT) { + netdev_err(dev, "Unsupported mode for ipsec offload\n"); + return -EINVAL; + } + if (xs->xso.flags & XFRM_OFFLOAD_INBOUND) { struct rx_sa rsa; -- GitLab From 7a766381634da19fc837619b0a34590498d9d29a Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Sun, 3 Jan 2021 16:08:42 +0800 Subject: [PATCH 137/839] ixgbe: Fix memleak in ixgbe_configure_clsu32 When ixgbe_fdir_write_perfect_filter_82599() fails, input allocated by kzalloc() has not been freed, which leads to memleak. Signed-off-by: Dinghao Liu Reviewed-by: Paul Menzel Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index fae84202d870c..9f3f12e2ccf23 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -9565,8 +9565,10 @@ static int ixgbe_configure_clsu32(struct ixgbe_adapter *adapter, ixgbe_atr_compute_perfect_hash_82599(&input->filter, mask); err = ixgbe_fdir_write_perfect_filter_82599(hw, &input->filter, input->sw_idx, queue); - if (!err) - ixgbe_update_ethtool_fdir_entry(adapter, input, input->sw_idx); + if (err) + goto err_out_w_lock; + + ixgbe_update_ethtool_fdir_entry(adapter, input, input->sw_idx); spin_unlock(&adapter->fdir_perfect_lock); if ((uhtid != 0x800) && (adapter->jump_tables[uhtid])) -- GitLab From d93ef301644ee82925bce1d57fdfe70475dc0bae Mon Sep 17 00:00:00 2001 From: Drew Fustini Date: Wed, 3 Mar 2021 21:55:49 -0800 Subject: [PATCH 138/839] net: sctp: trivial: fix typo in comment Fix typo of 'overflow' for comment in sctp_tsnmap_check(). Reported-by: Gustavo A. R. Silva Signed-off-by: Drew Fustini Signed-off-by: David S. Miller --- net/sctp/tsnmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/tsnmap.c b/net/sctp/tsnmap.c index a9c6af5795d81..5ba456727f631 100644 --- a/net/sctp/tsnmap.c +++ b/net/sctp/tsnmap.c @@ -75,7 +75,7 @@ int sctp_tsnmap_check(const struct sctp_tsnmap *map, __u32 tsn) return 1; /* Verify that we can hold this TSN and that it will not - * overlfow our map + * overflow our map */ if (!TSN_lt(tsn, map->base_tsn + SCTP_TSN_MAP_SIZE)) return -1; -- GitLab From 76c03bf8e2624076b88d93542d78e22d5345c88e Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 4 Mar 2021 10:57:53 +0200 Subject: [PATCH 139/839] nexthop: Do not flush blackhole nexthops when loopback goes down As far as user space is concerned, blackhole nexthops do not have a nexthop device and therefore should not be affected by the administrative or carrier state of any netdev. However, when the loopback netdev goes down all the blackhole nexthops are flushed. This happens because internally the kernel associates blackhole nexthops with the loopback netdev. This behavior is both confusing to those not familiar with kernel internals and also diverges from the legacy API where blackhole IPv4 routes are not flushed when the loopback netdev goes down: # ip route add blackhole 198.51.100.0/24 # ip link set dev lo down # ip route show 198.51.100.0/24 blackhole 198.51.100.0/24 Blackhole IPv6 routes are flushed, but at least user space knows that they are associated with the loopback netdev: # ip -6 route show 2001:db8:1::/64 blackhole 2001:db8:1::/64 dev lo metric 1024 pref medium Fix this by only flushing blackhole nexthops when the loopback netdev is unregistered. Fixes: ab84be7e54fc ("net: Initial nexthop code") Signed-off-by: Ido Schimmel Reported-by: Donald Sharp Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/nexthop.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index f1c6cbdb9e436..743777bce1794 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -1399,7 +1399,7 @@ out: /* rtnl */ /* remove all nexthops tied to a device being deleted */ -static void nexthop_flush_dev(struct net_device *dev) +static void nexthop_flush_dev(struct net_device *dev, unsigned long event) { unsigned int hash = nh_dev_hashfn(dev->ifindex); struct net *net = dev_net(dev); @@ -1411,6 +1411,10 @@ static void nexthop_flush_dev(struct net_device *dev) if (nhi->fib_nhc.nhc_dev != dev) continue; + if (nhi->reject_nh && + (event == NETDEV_DOWN || event == NETDEV_CHANGE)) + continue; + remove_nexthop(net, nhi->nh_parent, NULL); } } @@ -2189,11 +2193,11 @@ static int nh_netdev_event(struct notifier_block *this, switch (event) { case NETDEV_DOWN: case NETDEV_UNREGISTER: - nexthop_flush_dev(dev); + nexthop_flush_dev(dev, event); break; case NETDEV_CHANGE: if (!(dev_get_flags(dev) & (IFF_RUNNING | IFF_LOWER_UP))) - nexthop_flush_dev(dev); + nexthop_flush_dev(dev, event); break; case NETDEV_CHANGEMTU: info_ext = ptr; -- GitLab From 3a1099d3147f391fa11320a759bbcb1bb857fca1 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 4 Mar 2021 10:57:54 +0200 Subject: [PATCH 140/839] selftests: fib_nexthops: Test blackhole nexthops when loopback goes down Test that blackhole nexthops are not flushed when the loopback device goes down. Output without previous patch: # ./fib_nexthops.sh -t basic Basic functional tests ---------------------- TEST: List with nothing defined [ OK ] TEST: Nexthop get on non-existent id [ OK ] TEST: Nexthop with no device or gateway [ OK ] TEST: Nexthop with down device [ OK ] TEST: Nexthop with device that is linkdown [ OK ] TEST: Nexthop with device only [ OK ] TEST: Nexthop with duplicate id [ OK ] TEST: Blackhole nexthop [ OK ] TEST: Blackhole nexthop with other attributes [ OK ] TEST: Blackhole nexthop with loopback device down [FAIL] TEST: Create group [ OK ] TEST: Create group with blackhole nexthop [FAIL] TEST: Create multipath group where 1 path is a blackhole [ OK ] TEST: Multipath group can not have a member replaced by blackhole [ OK ] TEST: Create group with non-existent nexthop [ OK ] TEST: Create group with same nexthop multiple times [ OK ] TEST: Replace nexthop with nexthop group [ OK ] TEST: Replace nexthop group with nexthop [ OK ] TEST: Nexthop group and device [ OK ] TEST: Test proto flush [ OK ] TEST: Nexthop group and blackhole [ OK ] Tests passed: 19 Tests failed: 2 Output with previous patch: # ./fib_nexthops.sh -t basic Basic functional tests ---------------------- TEST: List with nothing defined [ OK ] TEST: Nexthop get on non-existent id [ OK ] TEST: Nexthop with no device or gateway [ OK ] TEST: Nexthop with down device [ OK ] TEST: Nexthop with device that is linkdown [ OK ] TEST: Nexthop with device only [ OK ] TEST: Nexthop with duplicate id [ OK ] TEST: Blackhole nexthop [ OK ] TEST: Blackhole nexthop with other attributes [ OK ] TEST: Blackhole nexthop with loopback device down [ OK ] TEST: Create group [ OK ] TEST: Create group with blackhole nexthop [ OK ] TEST: Create multipath group where 1 path is a blackhole [ OK ] TEST: Multipath group can not have a member replaced by blackhole [ OK ] TEST: Create group with non-existent nexthop [ OK ] TEST: Create group with same nexthop multiple times [ OK ] TEST: Replace nexthop with nexthop group [ OK ] TEST: Replace nexthop group with nexthop [ OK ] TEST: Nexthop group and device [ OK ] TEST: Test proto flush [ OK ] TEST: Nexthop group and blackhole [ OK ] Tests passed: 21 Tests failed: 0 Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: David S. Miller --- tools/testing/selftests/net/fib_nexthops.sh | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index 4c7d33618437c..d98fb85e201c8 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -1524,6 +1524,14 @@ basic() run_cmd "$IP nexthop replace id 2 blackhole dev veth1" log_test $? 2 "Blackhole nexthop with other attributes" + # blackhole nexthop should not be affected by the state of the loopback + # device + run_cmd "$IP link set dev lo down" + check_nexthop "id 2" "id 2 blackhole" + log_test $? 0 "Blackhole nexthop with loopback device down" + + run_cmd "$IP link set dev lo up" + # # groups # -- GitLab From f1becbed411c6fa29d7ce3def3a1dcd4f63f2d74 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 4 Mar 2021 12:29:43 +0200 Subject: [PATCH 141/839] net: mscc: ocelot: properly reject destination IP keys in VCAP IS1 An attempt is made to warn the user about the fact that VCAP IS1 cannot offload keys matching on destination IP (at least given the current half key format), but sadly that warning fails miserably in practice, due to the fact that it operates on an uninitialized "match" variable. We must first decode the keys from the flow rule. Fixes: 75944fda1dfe ("net: mscc: ocelot: offload ingress skbedit and vlan actions to VCAP IS1") Reported-by: Colin Ian King Signed-off-by: Vladimir Oltean Reviewed-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/mscc/ocelot_flower.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mscc/ocelot_flower.c b/drivers/net/ethernet/mscc/ocelot_flower.c index c3ac026f6aea2..a41b458b1b3ef 100644 --- a/drivers/net/ethernet/mscc/ocelot_flower.c +++ b/drivers/net/ethernet/mscc/ocelot_flower.c @@ -540,13 +540,14 @@ ocelot_flower_parse_key(struct ocelot *ocelot, int port, bool ingress, return -EOPNOTSUPP; } + flow_rule_match_ipv4_addrs(rule, &match); + if (filter->block_id == VCAP_IS1 && *(u32 *)&match.mask->dst) { NL_SET_ERR_MSG_MOD(extack, "Key type S1_NORMAL cannot match on destination IP"); return -EOPNOTSUPP; } - flow_rule_match_ipv4_addrs(rule, &match); tmp = &filter->key.ipv4.sip.value.addr[0]; memcpy(tmp, &match.key->src, 4); -- GitLab From 053d8ad10d585adf9891fcd049637536e2fe9ea7 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 4 Mar 2021 12:56:53 +0200 Subject: [PATCH 142/839] net: dsa: sja1105: fix SGMII PCS being forced to SPEED_UNKNOWN instead of SPEED_10 When using MLO_AN_PHY or MLO_AN_FIXED, the MII_BMCR of the SGMII PCS is read before resetting the switch so it can be reprogrammed afterwards. This works for the speeds of 1Gbps and 100Mbps, but not for 10Mbps, because SPEED_10 is actually 0, so AND-ing anything with 0 is false, therefore that last branch is dead code. Do what others do (genphy_read_status_fixed, phy_mii_ioctl) and just remove the check for SPEED_10, let it fall into the default case. Fixes: ffe10e679cec ("net: dsa: sja1105: Add support for the SGMII port") Signed-off-by: Vladimir Oltean Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/sja1105/sja1105_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index 7692338730df5..c1982615c6315 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -1922,7 +1922,7 @@ out_unlock_ptp: speed = SPEED_1000; else if (bmcr & BMCR_SPEED100) speed = SPEED_100; - else if (bmcr & BMCR_SPEED10) + else speed = SPEED_10; sja1105_sgmii_pcs_force_speed(priv, speed); -- GitLab From 6a5166e07c029182ee0e15c1a97b08c3179b2aaf Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 4 Mar 2021 12:56:54 +0200 Subject: [PATCH 143/839] net: dsa: sja1105: fix ucast/bcast flooding always remaining enabled In the blamed patch I managed to introduce a bug while moving code around: the same logic is applied to the ucast_egress_floods and bcast_egress_floods variables both on the "if" and the "else" branches. This is clearly an unintended change compared to how the code used to be prior to that bugfix, so restore it. Fixes: 7f7ccdea8c73 ("net: dsa: sja1105: fix leakage of flooded frames outside bridging domain") Signed-off-by: Vladimir Oltean Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/sja1105/sja1105_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index c1982615c6315..51ea104c63bbb 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -3369,14 +3369,14 @@ static int sja1105_port_ucast_bcast_flood(struct sja1105_private *priv, int to, if (flags.val & BR_FLOOD) priv->ucast_egress_floods |= BIT(to); else - priv->ucast_egress_floods |= BIT(to); + priv->ucast_egress_floods &= ~BIT(to); } if (flags.mask & BR_BCAST_FLOOD) { if (flags.val & BR_BCAST_FLOOD) priv->bcast_egress_floods |= BIT(to); else - priv->bcast_egress_floods |= BIT(to); + priv->bcast_egress_floods &= ~BIT(to); } return sja1105_manage_flood_domains(priv); -- GitLab From 443cc4b45c1be0b25313c9a507b033074334b490 Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Tue, 2 Mar 2021 17:05:39 -0600 Subject: [PATCH 144/839] scsi: ibmvfc: Simplify handling of sub-CRQ initialization If ibmvfc_init_sub_crqs() fails ibmvfc_probe() simply parrots registration failure reported elsewhere, and futher vhost->scsi_scrq.scrq == NULL is indication enough to the driver that it has no sub-CRQs available. The mq_enabled check can also be moved into ibmvfc_init_sub_crqs() such that each caller doesn't have to gate the call with a mq_enabled check. Finally, in the case of sub-CRQ setup failure setting do_enquiry can be turned off to putting the driver into single queue fallback mode. The aforementioned changes also simplify the next patch in the series that fixes a hard reset issue, by tying a sub-CRQ setup failure and do_enquiry logic into ibmvfc_init_sub_crqs(). Link: https://lore.kernel.org/r/20210302230543.9905-2-tyreld@linux.ibm.com Reviewed-by: Brian King Signed-off-by: Tyrel Datwyler Signed-off-by: Martin K. Petersen --- drivers/scsi/ibmvscsi/ibmvfc.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index 755313b766b91..e518dbee6a47d 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -5707,17 +5707,21 @@ static void ibmvfc_deregister_scsi_channel(struct ibmvfc_host *vhost, int index) LEAVE; } -static int ibmvfc_init_sub_crqs(struct ibmvfc_host *vhost) +static void ibmvfc_init_sub_crqs(struct ibmvfc_host *vhost) { int i, j; ENTER; + if (!vhost->mq_enabled) + return; vhost->scsi_scrqs.scrqs = kcalloc(nr_scsi_hw_queues, sizeof(*vhost->scsi_scrqs.scrqs), GFP_KERNEL); - if (!vhost->scsi_scrqs.scrqs) - return -1; + if (!vhost->scsi_scrqs.scrqs) { + vhost->do_enquiry = 0; + return; + } for (i = 0; i < nr_scsi_hw_queues; i++) { if (ibmvfc_register_scsi_channel(vhost, i)) { @@ -5726,13 +5730,12 @@ static int ibmvfc_init_sub_crqs(struct ibmvfc_host *vhost) kfree(vhost->scsi_scrqs.scrqs); vhost->scsi_scrqs.scrqs = NULL; vhost->scsi_scrqs.active_queues = 0; - LEAVE; - return -1; + vhost->do_enquiry = 0; + break; } } LEAVE; - return 0; } static void ibmvfc_release_sub_crqs(struct ibmvfc_host *vhost) @@ -5999,11 +6002,7 @@ static int ibmvfc_probe(struct vio_dev *vdev, const struct vio_device_id *id) goto remove_shost; } - if (vhost->mq_enabled) { - rc = ibmvfc_init_sub_crqs(vhost); - if (rc) - dev_warn(dev, "Failed to allocate Sub-CRQs. rc=%d\n", rc); - } + ibmvfc_init_sub_crqs(vhost); if (shost_to_fc_host(shost)->rqst_q) blk_queue_max_segments(shost_to_fc_host(shost)->rqst_q, 1); -- GitLab From 6c59cff38e66584ae3ac6c2f0cbd8d039c710ba7 Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Thu, 4 Mar 2021 14:15:13 +0100 Subject: [PATCH 145/839] net: usb: qmi_wwan: allow qmimux add/del with master up MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There's no reason for preventing the creation and removal of qmimux network interfaces when the underlying interface is up. This makes qmi_wwan mux implementation more similar to the rmnet one, simplifying userspace management of the same logical interfaces. Fixes: c6adf77953bc ("net: usb: qmi_wwan: add qmap mux protocol support") Reported-by: Aleksander Morgado Signed-off-by: Daniele Palmas Acked-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/qmi_wwan.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 17a050521b866..6700f1970b240 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -429,13 +429,6 @@ static ssize_t add_mux_store(struct device *d, struct device_attribute *attr, c goto err; } - /* we don't want to modify a running netdev */ - if (netif_running(dev->net)) { - netdev_err(dev->net, "Cannot change a running device\n"); - ret = -EBUSY; - goto err; - } - ret = qmimux_register_device(dev->net, mux_id); if (!ret) { info->flags |= QMI_WWAN_FLAG_MUX; @@ -465,13 +458,6 @@ static ssize_t del_mux_store(struct device *d, struct device_attribute *attr, c if (!rtnl_trylock()) return restart_syscall(); - /* we don't want to modify a running netdev */ - if (netif_running(dev->net)) { - netdev_err(dev->net, "Cannot change a running device\n"); - ret = -EBUSY; - goto err; - } - del_dev = qmimux_find_dev(dev, mux_id); if (!del_dev) { netdev_err(dev->net, "mux_id not present\n"); -- GitLab From 5cf529649be20dc27500c1141d58ba57131e2530 Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Tue, 2 Mar 2021 17:05:40 -0600 Subject: [PATCH 146/839] scsi: ibmvfc: Fix invalid sub-CRQ handles after hard reset A hard reset results in a complete transport disconnect such that the CRQ connection with the partner VIOS is broken. This has the side effect of also invalidating the associated sub-CRQs. The current code assumes that the sub-CRQs are perserved resulting in a protocol violation after trying to reconnect them with the VIOS. This introduces an infinite loop such that the VIOS forces a disconnect after each subsequent attempt to re-register with invalid handles. Avoid the aforementioned issue by releasing the sub-CRQs prior to CRQ disconnect, and driving a reinitialization of the sub-CRQs once a new CRQ is registered with the hypervisor. Link: https://lore.kernel.org/r/20210302230543.9905-3-tyreld@linux.ibm.com Fixes: 3034ebe26389 ("scsi: ibmvfc: Add alloc/dealloc routines for SCSI Sub-CRQ Channels") Reviewed-by: Brian King Signed-off-by: Tyrel Datwyler Signed-off-by: Martin K. Petersen --- drivers/scsi/ibmvscsi/ibmvfc.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index e518dbee6a47d..ba4c811ce75aa 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -158,6 +158,9 @@ static void ibmvfc_npiv_logout(struct ibmvfc_host *); static void ibmvfc_tgt_implicit_logout_and_del(struct ibmvfc_target *); static void ibmvfc_tgt_move_login(struct ibmvfc_target *); +static void ibmvfc_release_sub_crqs(struct ibmvfc_host *); +static void ibmvfc_init_sub_crqs(struct ibmvfc_host *); + static const char *unknown_error = "unknown error"; static long h_reg_sub_crq(unsigned long unit_address, unsigned long ioba, @@ -926,8 +929,8 @@ static int ibmvfc_reset_crq(struct ibmvfc_host *vhost) unsigned long flags; struct vio_dev *vdev = to_vio_dev(vhost->dev); struct ibmvfc_queue *crq = &vhost->crq; - struct ibmvfc_queue *scrq; - int i; + + ibmvfc_release_sub_crqs(vhost); /* Close the CRQ */ do { @@ -947,16 +950,6 @@ static int ibmvfc_reset_crq(struct ibmvfc_host *vhost) memset(crq->msgs.crq, 0, PAGE_SIZE); crq->cur = 0; - if (vhost->scsi_scrqs.scrqs) { - for (i = 0; i < nr_scsi_hw_queues; i++) { - scrq = &vhost->scsi_scrqs.scrqs[i]; - spin_lock(scrq->q_lock); - memset(scrq->msgs.scrq, 0, PAGE_SIZE); - scrq->cur = 0; - spin_unlock(scrq->q_lock); - } - } - /* And re-open it again */ rc = plpar_hcall_norets(H_REG_CRQ, vdev->unit_address, crq->msg_token, PAGE_SIZE); @@ -966,9 +959,12 @@ static int ibmvfc_reset_crq(struct ibmvfc_host *vhost) dev_warn(vhost->dev, "Partner adapter not ready\n"); else if (rc != 0) dev_warn(vhost->dev, "Couldn't register crq (rc=%d)\n", rc); + spin_unlock(vhost->crq.q_lock); spin_unlock_irqrestore(vhost->host->host_lock, flags); + ibmvfc_init_sub_crqs(vhost); + return rc; } @@ -5694,6 +5690,7 @@ static void ibmvfc_deregister_scsi_channel(struct ibmvfc_host *vhost, int index) free_irq(scrq->irq, scrq); irq_dispose_mapping(scrq->irq); + scrq->irq = 0; do { rc = plpar_hcall_norets(H_FREE_SUB_CRQ, vdev->unit_address, -- GitLab From 2162dc23f80770e355ef00b43573b67b80466001 Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Tue, 2 Mar 2021 17:05:41 -0600 Subject: [PATCH 147/839] scsi: ibmvfc: Treat H_CLOSED as success during sub-CRQ registration A non-zero return code for H_REG_SUB_CRQ is currently treated as a failure resulting in failing sub-CRQ setup. The case of H_CLOSED should not be treated as a failure. This return code translates to a successful sub-CRQ registration by the hypervisor, and is meant to communicate back that there is currently no partner VIOS CRQ connection established as of yet. This is a common occurrence during a disconnect where the client adapter can possibly come back up prior to the partner adapter. For non-zero return code from H_REG_SUB_CRQ treat a H_CLOSED as success so that sub-CRQs are successfully setup. Link: https://lore.kernel.org/r/20210302230543.9905-4-tyreld@linux.ibm.com Fixes: 3034ebe26389 ("scsi: ibmvfc: Add alloc/dealloc routines for SCSI Sub-CRQ Channels") Reviewed-by: Brian King Signed-off-by: Tyrel Datwyler Signed-off-by: Martin K. Petersen --- drivers/scsi/ibmvscsi/ibmvfc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index ba4c811ce75aa..abc8030e90e09 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -5638,7 +5638,8 @@ static int ibmvfc_register_scsi_channel(struct ibmvfc_host *vhost, rc = h_reg_sub_crq(vdev->unit_address, scrq->msg_token, PAGE_SIZE, &scrq->cookie, &scrq->hw_irq); - if (rc) { + /* H_CLOSED indicates successful register, but no CRQ partner */ + if (rc && rc != H_CLOSED) { dev_warn(dev, "Error registering sub-crq: %d\n", rc); if (rc == H_PARAMETER) dev_warn_once(dev, "Firmware may not support MQ\n"); -- GitLab From 0217a272fe134578d88a4e9c7f5e968551bf952a Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Tue, 2 Mar 2021 17:05:42 -0600 Subject: [PATCH 148/839] scsi: ibmvfc: Store return code of H_FREE_SUB_CRQ during cleanup The H_FREE_SUB_CRQ hypercall can return a retry delay return code that indicates the call needs to be retried after a specific amount of time delay. The error path to free a sub-CRQ in case of a failure during channel registration fails to capture the return code of H_FREE_SUB_CRQ which will result in the delay loop being skipped in the case of a retry delay return code. Store the return code result of the H_FREE_SUB_CRQ call such that the return code check in the delay loop evaluates a meaningful value. Also, use the rtas_busy_delay() to check the rc value and delay for the appropriate amount of time. Link: https://lore.kernel.org/r/20210302230543.9905-5-tyreld@linux.ibm.com Fixes: 39e461fddff0 ("scsi: ibmvfc: Map/request irq and register Sub-CRQ interrupt handler") Reviewed-by: Brian King Signed-off-by: Tyrel Datwyler Signed-off-by: Martin K. Petersen --- drivers/scsi/ibmvscsi/ibmvfc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index abc8030e90e09..5897ee995aeee 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -5672,8 +5673,8 @@ static int ibmvfc_register_scsi_channel(struct ibmvfc_host *vhost, irq_failed: do { - plpar_hcall_norets(H_FREE_SUB_CRQ, vdev->unit_address, scrq->cookie); - } while (rc == H_BUSY || H_IS_LONG_BUSY(rc)); + rc = plpar_hcall_norets(H_FREE_SUB_CRQ, vdev->unit_address, scrq->cookie); + } while (rtas_busy_delay(rc)); reg_failed: ibmvfc_free_queue(vhost, scrq); LEAVE; -- GitLab From dbdbb81bde2440d9a1653acf1333c6c812f9c4b0 Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Tue, 2 Mar 2021 17:05:43 -0600 Subject: [PATCH 149/839] scsi: ibmvfc: Reinitialize sub-CRQs and perform channel enquiry after LPM A live partition migration (LPM) results in a CRQ disconnect similar to a hard reset. In this LPM case the hypervisor mostly preserves the CRQ transport such that it simply needs to be reenabled. However, the capabilities may have changed such as fewer channels, or no channels at all. Further, its possible that there may be sub-CRQ support, but no channel support. The CRQ reenable path currently doesn't take any of this into consideration. For simplicity release and reinitialize sub-CRQs during reenable, and set do_enquiry and using_channels with the appropriate values to trigger channel renegotiation. Link: https://lore.kernel.org/r/20210302230543.9905-6-tyreld@linux.ibm.com Fixes: 3034ebe26389 ("scsi: ibmvfc: Add alloc/dealloc routines for SCSI Sub-CRQ Channels") Reviewed-by: Brian King Signed-off-by: Tyrel Datwyler Signed-off-by: Martin K. Petersen --- drivers/scsi/ibmvscsi/ibmvfc.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index 5897ee995aeee..338e29d3e025f 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -903,6 +903,9 @@ static int ibmvfc_reenable_crq_queue(struct ibmvfc_host *vhost) { int rc = 0; struct vio_dev *vdev = to_vio_dev(vhost->dev); + unsigned long flags; + + ibmvfc_release_sub_crqs(vhost); /* Re-enable the CRQ */ do { @@ -914,6 +917,15 @@ static int ibmvfc_reenable_crq_queue(struct ibmvfc_host *vhost) if (rc) dev_err(vhost->dev, "Error enabling adapter (rc=%d)\n", rc); + spin_lock_irqsave(vhost->host->host_lock, flags); + spin_lock(vhost->crq.q_lock); + vhost->do_enquiry = 1; + vhost->using_channels = 0; + spin_unlock(vhost->crq.q_lock); + spin_unlock_irqrestore(vhost->host->host_lock, flags); + + ibmvfc_init_sub_crqs(vhost); + return rc; } -- GitLab From aa53f580e67b49ec5f4d9bd1de81eb9eb0dc079f Mon Sep 17 00:00:00 2001 From: Can Guo Date: Tue, 23 Feb 2021 21:36:47 -0800 Subject: [PATCH 150/839] scsi: ufs: Minor adjustments to error handling In error handling prepare stage, after SCSI requests are blocked, do a down/up_write(clk_scaling_lock) to clean up the queuecommand() path. Meanwhile, stop eeh_work in case it disturbs error recovery. Moreover, reset ufshcd_state at the entrance of ufshcd_probe_hba(), since it may be called multiple times during error recovery. Link: https://lore.kernel.org/r/1614145010-36079-2-git-send-email-cang@codeaurora.org Reviewed-by: Avri Altman Signed-off-by: Can Guo Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 77161750c9fb3..607fdfa5a5082 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -4987,6 +4987,7 @@ ufshcd_transfer_rsp_status(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) * UFS device needs urgent BKOPs. */ if (!hba->pm_op_in_progress && + !ufshcd_eh_in_progress(hba) && ufshcd_is_exception_event(lrbp->ucd_rsp_ptr) && schedule_work(&hba->eeh_work)) { /* @@ -5784,13 +5785,20 @@ static void ufshcd_err_handling_prepare(struct ufs_hba *hba) ufshcd_suspend_clkscaling(hba); ufshcd_clk_scaling_allow(hba, false); } + ufshcd_scsi_block_requests(hba); + /* Drain ufshcd_queuecommand() */ + down_write(&hba->clk_scaling_lock); + up_write(&hba->clk_scaling_lock); + cancel_work_sync(&hba->eeh_work); } static void ufshcd_err_handling_unprepare(struct ufs_hba *hba) { + ufshcd_scsi_unblock_requests(hba); ufshcd_release(hba); if (ufshcd_is_clkscaling_supported(hba)) ufshcd_clk_scaling_suspend(hba, false); + ufshcd_clear_ua_wluns(hba); pm_runtime_put(hba->dev); } @@ -5882,8 +5890,8 @@ static void ufshcd_err_handler(struct work_struct *work) spin_unlock_irqrestore(hba->host->host_lock, flags); ufshcd_err_handling_prepare(hba); spin_lock_irqsave(hba->host->host_lock, flags); - ufshcd_scsi_block_requests(hba); - hba->ufshcd_state = UFSHCD_STATE_RESET; + if (hba->ufshcd_state != UFSHCD_STATE_ERROR) + hba->ufshcd_state = UFSHCD_STATE_RESET; /* Complete requests that have door-bell cleared by h/w */ ufshcd_complete_requests(hba); @@ -6042,12 +6050,8 @@ skip_err_handling: } ufshcd_clear_eh_in_progress(hba); spin_unlock_irqrestore(hba->host->host_lock, flags); - ufshcd_scsi_unblock_requests(hba); ufshcd_err_handling_unprepare(hba); up(&hba->host_sem); - - if (!err && needs_reset) - ufshcd_clear_ua_wluns(hba); } /** @@ -7858,6 +7862,8 @@ static int ufshcd_probe_hba(struct ufs_hba *hba, bool async) unsigned long flags; ktime_t start = ktime_get(); + hba->ufshcd_state = UFSHCD_STATE_RESET; + ret = ufshcd_link_startup(hba); if (ret) goto out; -- GitLab From 4a791574a0ccf36eb3a0a46fbd71d2768df3eef9 Mon Sep 17 00:00:00 2001 From: Nitin Rawat Date: Tue, 23 Feb 2021 21:36:48 -0800 Subject: [PATCH 151/839] scsi: ufs: ufs-qcom: Disable interrupt in reset path Disable interrupt in reset path to flush pending IRQ handler in order to avoid possible NoC issues. Link: https://lore.kernel.org/r/1614145010-36079-3-git-send-email-cang@codeaurora.org Reviewed-by: Avri Altman Signed-off-by: Nitin Rawat Signed-off-by: Can Guo Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufs-qcom.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/scsi/ufs/ufs-qcom.c b/drivers/scsi/ufs/ufs-qcom.c index f97d7b0ae3b6a..a9dc8d7c9f784 100644 --- a/drivers/scsi/ufs/ufs-qcom.c +++ b/drivers/scsi/ufs/ufs-qcom.c @@ -253,12 +253,17 @@ static int ufs_qcom_host_reset(struct ufs_hba *hba) { int ret = 0; struct ufs_qcom_host *host = ufshcd_get_variant(hba); + bool reenable_intr = false; if (!host->core_reset) { dev_warn(hba->dev, "%s: reset control not set\n", __func__); goto out; } + reenable_intr = hba->is_irq_enabled; + disable_irq(hba->irq); + hba->is_irq_enabled = false; + ret = reset_control_assert(host->core_reset); if (ret) { dev_err(hba->dev, "%s: core_reset assert failed, err = %d\n", @@ -280,6 +285,11 @@ static int ufs_qcom_host_reset(struct ufs_hba *hba) usleep_range(1000, 1100); + if (reenable_intr) { + enable_irq(hba->irq); + hba->is_irq_enabled = true; + } + out: return ret; } -- GitLab From 4d195dc451570db79251a58022309728cc7b29de Mon Sep 17 00:00:00 2001 From: Can Guo Date: Tue, 23 Feb 2021 21:36:49 -0800 Subject: [PATCH 152/839] scsi: ufs: Remove redundant checks of !hba in suspend/resume callbacks Runtime and system suspend/resume can only come after hba probe invokes platform_set_drvdata(pdev, hba), meaning hba cannot be NULL in these PM callbacks, so remove the checks of !hba. Link: https://lore.kernel.org/r/1614145010-36079-4-git-send-email-cang@codeaurora.org Reviewed-by: Avri Altman Signed-off-by: Can Guo Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 607fdfa5a5082..a81a3825aed61 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -95,8 +95,6 @@ 16, 4, buf, __len, false); \ } while (0) -static bool early_suspend; - int ufshcd_dump_regs(struct ufs_hba *hba, size_t offset, size_t len, const char *prefix) { @@ -8978,11 +8976,6 @@ int ufshcd_system_suspend(struct ufs_hba *hba) int ret = 0; ktime_t start = ktime_get(); - if (!hba) { - early_suspend = true; - return 0; - } - down(&hba->host_sem); if (!hba->is_powered) @@ -9034,14 +9027,6 @@ int ufshcd_system_resume(struct ufs_hba *hba) int ret = 0; ktime_t start = ktime_get(); - if (!hba) - return -EINVAL; - - if (unlikely(early_suspend)) { - early_suspend = false; - down(&hba->host_sem); - } - if (!hba->is_powered || pm_runtime_suspended(hba->dev)) /* * Let the runtime resume take care of resuming @@ -9074,9 +9059,6 @@ int ufshcd_runtime_suspend(struct ufs_hba *hba) int ret = 0; ktime_t start = ktime_get(); - if (!hba) - return -EINVAL; - if (!hba->is_powered) goto out; else @@ -9115,9 +9097,6 @@ int ufshcd_runtime_resume(struct ufs_hba *hba) int ret = 0; ktime_t start = ktime_get(); - if (!hba) - return -EINVAL; - if (!hba->is_powered) goto out; else -- GitLab From 1481b7fe438fff3bd918bcb344e75bdf7ce47bb8 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Tue, 2 Mar 2021 14:08:18 +0800 Subject: [PATCH 153/839] scsi: ufs: Convert sysfs sprintf/snprintf family to sysfs_emit Fix the following coccicheck warning: ./drivers/scsi/ufs/ufshcd.c:1538:8-16: WARNING: use scnprintf or sprintf. Link: https://lore.kernel.org/r/1614665298-115183-1-git-send-email-jiapeng.chong@linux.alibaba.com Reported-by: Abaci Robot Reviewed-by: Avri Altman Signed-off-by: Jiapeng Chong Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index a81a3825aed61..c86760788c72c 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -1533,7 +1533,7 @@ static ssize_t ufshcd_clkscale_enable_show(struct device *dev, { struct ufs_hba *hba = dev_get_drvdata(dev); - return snprintf(buf, PAGE_SIZE, "%d\n", hba->clk_scaling.is_enabled); + return sysfs_emit(buf, "%d\n", hba->clk_scaling.is_enabled); } static ssize_t ufshcd_clkscale_enable_store(struct device *dev, -- GitLab From 1112963427d6d186f8729cf36fefb70d5ca5a84a Mon Sep 17 00:00:00 2001 From: Vishal Bhakta Date: Fri, 26 Feb 2021 15:43:48 -0800 Subject: [PATCH 154/839] scsi: vmw_pvscsi: MAINTAINERS: Update maintainer The entries in the source files are removed as well. Link: https://lore.kernel.org/r/20210226234347.21535-1-vbhakta@vmware.com Signed-off-by: Vishal Bhakta Signed-off-by: Martin K. Petersen --- MAINTAINERS | 2 +- drivers/scsi/vmw_pvscsi.c | 2 -- drivers/scsi/vmw_pvscsi.h | 2 -- 3 files changed, 1 insertion(+), 5 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index d92f85ca831d3..dede3b947834a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -19165,7 +19165,7 @@ S: Maintained F: drivers/infiniband/hw/vmw_pvrdma/ VMware PVSCSI driver -M: Jim Gill +M: Vishal Bhakta M: VMware PV-Drivers L: linux-scsi@vger.kernel.org S: Maintained diff --git a/drivers/scsi/vmw_pvscsi.c b/drivers/scsi/vmw_pvscsi.c index 081f54ab7d86c..8a79605d96521 100644 --- a/drivers/scsi/vmw_pvscsi.c +++ b/drivers/scsi/vmw_pvscsi.c @@ -17,8 +17,6 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * - * Maintained by: Jim Gill - * */ #include diff --git a/drivers/scsi/vmw_pvscsi.h b/drivers/scsi/vmw_pvscsi.h index 75966d3f326e0..51a82f7803d3c 100644 --- a/drivers/scsi/vmw_pvscsi.h +++ b/drivers/scsi/vmw_pvscsi.h @@ -17,8 +17,6 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * - * Maintained by: Jim Gill - * */ #ifndef _VMW_PVSCSI_H_ -- GitLab From bfc2560563586372212b0a8aeca7428975fa91fe Mon Sep 17 00:00:00 2001 From: Maximilian Heyne Date: Thu, 4 Mar 2021 14:43:17 +0000 Subject: [PATCH 155/839] net: sched: avoid duplicates in classes dump This is a follow up of commit ea3274695353 ("net: sched: avoid duplicates in qdisc dump") which has fixed the issue only for the qdisc dump. The duplicate printing also occurs when dumping the classes via tc class show dev eth0 Fixes: 59cc1f61f09c ("net: sched: convert qdisc linked list to hashtable") Signed-off-by: Maximilian Heyne Signed-off-by: David S. Miller --- net/sched/sch_api.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index e2e4353db8a70..f87d07736a140 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -2168,7 +2168,7 @@ static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb, static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb, struct tcmsg *tcm, struct netlink_callback *cb, - int *t_p, int s_t) + int *t_p, int s_t, bool recur) { struct Qdisc *q; int b; @@ -2179,7 +2179,7 @@ static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb, if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0) return -1; - if (!qdisc_dev(root)) + if (!qdisc_dev(root) || !recur) return 0; if (tcm->tcm_parent) { @@ -2214,13 +2214,13 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) s_t = cb->args[0]; t = 0; - if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0) + if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t, true) < 0) goto done; dev_queue = dev_ingress_queue(dev); if (dev_queue && tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, - &t, s_t) < 0) + &t, s_t, false) < 0) goto done; done: -- GitLab From e0be4931f3fee2e04dec4013ea4f27ec2db8556f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 4 Mar 2021 13:32:08 -0800 Subject: [PATCH 156/839] mptcp: reset last_snd on subflow close Send logic caches last active subflow in the msk, so it needs to be cleared when the cached subflow is closed. Fixes: d5f49190def61c ("mptcp: allow picking different xmit subflows") Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/155 Reported-by: Christoph Paasch Acked-by: Paolo Abeni Reviewed-by: Matthieu Baerts Signed-off-by: Florian Westphal Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index c5d5e68940ea2..7362a536cbc01 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2126,6 +2126,8 @@ static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk) static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, struct mptcp_subflow_context *subflow) { + struct mptcp_sock *msk = mptcp_sk(sk); + list_del(&subflow->node); lock_sock_nested(ssk, SINGLE_DEPTH_NESTING); @@ -2154,6 +2156,9 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, release_sock(ssk); sock_put(ssk); + + if (ssk == msk->last_snd) + msk->last_snd = NULL; } void mptcp_close_ssk(struct sock *sk, struct sock *ssk, -- GitLab From f07157792c633b528de5fc1dbe2e4ea54f8e09d4 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 4 Mar 2021 13:32:09 -0800 Subject: [PATCH 157/839] mptcp: put subflow sock on connect error mptcp_add_pending_subflow() performs a sock_hold() on the subflow, then adds the subflow to the join list. Without a sock_put the subflow sk won't be freed in case connect() fails. unreferenced object 0xffff88810c03b100 (size 3000): [..] sk_prot_alloc.isra.0+0x2f/0x110 sk_alloc+0x5d/0xc20 inet6_create+0x2b7/0xd30 __sock_create+0x17f/0x410 mptcp_subflow_create_socket+0xff/0x9c0 __mptcp_subflow_connect+0x1da/0xaf0 mptcp_pm_nl_work+0x6e0/0x1120 mptcp_worker+0x508/0x9a0 Fixes: 5b950ff4331ddda ("mptcp: link MPC subflow into msk only after accept") Signed-off-by: Florian Westphal Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/subflow.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index e1fbcab257e6d..41695e26c374f 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1297,6 +1297,7 @@ failed_unlink: spin_lock_bh(&msk->join_list_lock); list_del(&subflow->node); spin_unlock_bh(&msk->join_list_lock); + sock_put(mptcp_subflow_tcp_sock(subflow)); failed: subflow->disposable = 1; -- GitLab From eaeef1ce55ec9161e0c44ff27017777b1644b421 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 4 Mar 2021 13:32:10 -0800 Subject: [PATCH 158/839] mptcp: fix memory accounting on allocation error In case of memory pressure the MPTCP xmit path keeps at most a single skb in the tx cache, eventually freeing additional ones. The associated counter for forward memory is not update accordingly, and that causes the following splat: WARNING: CPU: 0 PID: 12 at net/core/stream.c:208 sk_stream_kill_queues+0x3ca/0x530 net/core/stream.c:208 Modules linked in: CPU: 0 PID: 12 Comm: kworker/0:1 Not tainted 5.11.0-rc2 #59 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Workqueue: events mptcp_worker RIP: 0010:sk_stream_kill_queues+0x3ca/0x530 net/core/stream.c:208 Code: 03 0f b6 04 02 84 c0 74 08 3c 03 0f 8e 63 01 00 00 8b ab 00 01 00 00 e9 60 ff ff ff e8 2f 24 d3 fe 0f 0b eb 97 e8 26 24 d3 fe <0f> 0b eb a0 e8 1d 24 d3 fe 0f 0b e9 a5 fe ff ff 4c 89 e7 e8 0e d0 RSP: 0018:ffffc900000c7bc8 EFLAGS: 00010293 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 RDX: ffff88810030ac40 RSI: ffffffff8262ca4a RDI: 0000000000000003 RBP: 0000000000000d00 R08: 0000000000000000 R09: ffffffff85095aa7 R10: ffffffff8262c9ea R11: 0000000000000001 R12: ffff888108908100 R13: ffffffff85095aa0 R14: ffffc900000c7c48 R15: 1ffff92000018f85 FS: 0000000000000000(0000) GS:ffff88811b200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fa7444baef8 CR3: 0000000035ee9005 CR4: 0000000000170ef0 Call Trace: __mptcp_destroy_sock+0x4a7/0x6c0 net/mptcp/protocol.c:2547 mptcp_worker+0x7dd/0x1610 net/mptcp/protocol.c:2272 process_one_work+0x896/0x1170 kernel/workqueue.c:2275 worker_thread+0x605/0x1350 kernel/workqueue.c:2421 kthread+0x344/0x410 kernel/kthread.c:292 ret_from_fork+0x22/0x30 arch/x86/entry/entry_64.S:296 At close time, as reported by syzkaller/Christoph. This change address the issue properly updating the fwd allocated memory counter in the error path. Reported-by: Christoph Paasch Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/136 Fixes: 724cfd2ee8aa ("mptcp: allocate TX skbs in msk context") Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 7362a536cbc01..aa59101ffe546 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1189,6 +1189,7 @@ static bool mptcp_tx_cache_refill(struct sock *sk, int size, */ while (skbs->qlen > 1) { skb = __skb_dequeue_tail(skbs); + *total_ts -= skb->truesize; __kfree_skb(skb); } return skbs->qlen > 0; -- GitLab From 17aee05dc8822e354f5ad2d68ee39e3ba4b6acf2 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 4 Mar 2021 13:32:11 -0800 Subject: [PATCH 159/839] mptcp: dispose initial struct socket when its subflow is closed Christoph Paasch reported following crash: dst_release underflow WARNING: CPU: 0 PID: 1319 at net/core/dst.c:175 dst_release+0xc1/0xd0 net/core/dst.c:175 CPU: 0 PID: 1319 Comm: syz-executor217 Not tainted 5.11.0-rc6af8e85128b4d0d24083c5cac646e891227052e0c #70 Call Trace: rt_cache_route+0x12e/0x140 net/ipv4/route.c:1503 rt_set_nexthop.constprop.0+0x1fc/0x590 net/ipv4/route.c:1612 __mkroute_output net/ipv4/route.c:2484 [inline] ... The worker leaves msk->subflow alone even when it happened to close the subflow ssk associated with it. Fixes: 866f26f2a9c33b ("mptcp: always graft subflow socket to parent") Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/157 Reported-by: Christoph Paasch Suggested-by: Paolo Abeni Acked-by: Paolo Abeni Signed-off-by: Florian Westphal Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index aa59101ffe546..a58da04bed717 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2116,6 +2116,14 @@ static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk) return backup; } +static void mptcp_dispose_initial_subflow(struct mptcp_sock *msk) +{ + if (msk->subflow) { + iput(SOCK_INODE(msk->subflow)); + msk->subflow = NULL; + } +} + /* subflow sockets can be either outgoing (connect) or incoming * (accept). * @@ -2160,6 +2168,9 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, if (ssk == msk->last_snd) msk->last_snd = NULL; + + if (msk->subflow && ssk == msk->subflow->sk) + mptcp_dispose_initial_subflow(msk); } void mptcp_close_ssk(struct sock *sk, struct sock *ssk, @@ -2529,12 +2540,6 @@ static void __mptcp_destroy_sock(struct sock *sk) might_sleep(); - /* dispose the ancillatory tcp socket, if any */ - if (msk->subflow) { - iput(SOCK_INODE(msk->subflow)); - msk->subflow = NULL; - } - /* be sure to always acquire the join list lock, to sync vs * mptcp_finish_join(). */ @@ -2559,6 +2564,7 @@ static void __mptcp_destroy_sock(struct sock *sk) sk_stream_kill_queues(sk); xfrm_sk_free_policy(sk); sk_refcnt_debug_release(sk); + mptcp_dispose_initial_subflow(msk); sock_put(sk); } -- GitLab From c8fe62f0768cc9378103fc89fb96804645f527c9 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 4 Mar 2021 13:32:12 -0800 Subject: [PATCH 160/839] mptcp: reset 'first' and ack_hint on subflow close Just like with last_snd, we have to NULL 'first' on subflow close. ack_hint isn't strictly required (its never dereferenced), but better to clear this explicitly as well instead of making it an exception. msk->first is dereferenced unconditionally at accept time, but at that point the ssk is not on the conn_list yet -- this means worker can't see it when iterating the conn_list. Reported-by: Paolo Abeni Reviewed-by: Matthieu Baerts Signed-off-by: Florian Westphal Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index a58da04bed717..3dcb564b03ad3 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2169,6 +2169,12 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, if (ssk == msk->last_snd) msk->last_snd = NULL; + if (ssk == msk->ack_hint) + msk->ack_hint = NULL; + + if (ssk == msk->first) + msk->first = NULL; + if (msk->subflow && ssk == msk->subflow->sk) mptcp_dispose_initial_subflow(msk); } @@ -3297,6 +3303,9 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, /* PM/worker can now acquire the first subflow socket * lock without racing with listener queue cleanup, * we can notify it, if needed. + * + * Even if remote has reset the initial subflow by now + * the refcnt is still at least one. */ subflow = mptcp_subflow_ctx(msk->first); list_add(&subflow->node, &msk->conn_list); -- GitLab From 2948d0a1e5aedc789fed27a4473040b6db741426 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 4 Mar 2021 13:32:13 -0800 Subject: [PATCH 161/839] mptcp: factor out __mptcp_retrans helper() Will simplify the following patch, no functional change intended. Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 93 ++++++++++++++++++++++++-------------------- 1 file changed, 50 insertions(+), 43 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 3dcb564b03ad3..67aaf7154dca3 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2261,59 +2261,23 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk) mptcp_close_wake_up(sk); } -static void mptcp_worker(struct work_struct *work) +static void __mptcp_retrans(struct sock *sk) { - struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work); - struct sock *ssk, *sk = &msk->sk.icsk_inet.sk; + struct mptcp_sock *msk = mptcp_sk(sk); struct mptcp_sendmsg_info info = {}; struct mptcp_data_frag *dfrag; size_t copied = 0; - int state, ret; - - lock_sock(sk); - state = sk->sk_state; - if (unlikely(state == TCP_CLOSE)) - goto unlock; - - mptcp_check_data_fin_ack(sk); - __mptcp_flush_join_list(msk); - - mptcp_check_fastclose(msk); - - if (msk->pm.status) - mptcp_pm_nl_work(msk); - - if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags)) - mptcp_check_for_eof(msk); - - __mptcp_check_send_data_fin(sk); - mptcp_check_data_fin(sk); - - /* There is no point in keeping around an orphaned sk timedout or - * closed, but we need the msk around to reply to incoming DATA_FIN, - * even if it is orphaned and in FIN_WAIT2 state - */ - if (sock_flag(sk, SOCK_DEAD) && - (mptcp_check_close_timeout(sk) || sk->sk_state == TCP_CLOSE)) { - inet_sk_state_store(sk, TCP_CLOSE); - __mptcp_destroy_sock(sk); - goto unlock; - } - - if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) - __mptcp_close_subflow(msk); - - if (!test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags)) - goto unlock; + struct sock *ssk; + int ret; __mptcp_clean_una(sk); dfrag = mptcp_rtx_head(sk); if (!dfrag) - goto unlock; + return; ssk = mptcp_subflow_get_retrans(msk); if (!ssk) - goto reset_unlock; + goto reset_timer; lock_sock(ssk); @@ -2339,9 +2303,52 @@ static void mptcp_worker(struct work_struct *work) mptcp_set_timeout(sk, ssk); release_sock(ssk); -reset_unlock: +reset_timer: if (!mptcp_timer_pending(sk)) mptcp_reset_timer(sk); +} + +static void mptcp_worker(struct work_struct *work) +{ + struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work); + struct sock *sk = &msk->sk.icsk_inet.sk; + int state; + + lock_sock(sk); + state = sk->sk_state; + if (unlikely(state == TCP_CLOSE)) + goto unlock; + + mptcp_check_data_fin_ack(sk); + __mptcp_flush_join_list(msk); + + mptcp_check_fastclose(msk); + + if (msk->pm.status) + mptcp_pm_nl_work(msk); + + if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags)) + mptcp_check_for_eof(msk); + + __mptcp_check_send_data_fin(sk); + mptcp_check_data_fin(sk); + + /* There is no point in keeping around an orphaned sk timedout or + * closed, but we need the msk around to reply to incoming DATA_FIN, + * even if it is orphaned and in FIN_WAIT2 state + */ + if (sock_flag(sk, SOCK_DEAD) && + (mptcp_check_close_timeout(sk) || sk->sk_state == TCP_CLOSE)) { + inet_sk_state_store(sk, TCP_CLOSE); + __mptcp_destroy_sock(sk); + goto unlock; + } + + if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) + __mptcp_close_subflow(msk); + + if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags)) + __mptcp_retrans(sk); unlock: release_sock(sk); -- GitLab From c2e6048fa1cf2228063aec299f93ac6eb256b457 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 4 Mar 2021 13:32:14 -0800 Subject: [PATCH 162/839] mptcp: fix race in release_cb If we receive a MPTCP_PUSH_PENDING even from a subflow when mptcp_release_cb() is serving the previous one, the latter will be delayed up to the next release_sock(msk). Address the issue implementing a test/serve loop for such event. Additionally rename the push helper to __mptcp_push_pending() to be more consistent with the existing code. Fixes: 6e628cd3a8f7 ("mptcp: use mptcp release_cb for delayed tasks") Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 67aaf7154dca3..d2a2169e6d9e7 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1445,7 +1445,7 @@ static void mptcp_push_release(struct sock *sk, struct sock *ssk, release_sock(ssk); } -static void mptcp_push_pending(struct sock *sk, unsigned int flags) +static void __mptcp_push_pending(struct sock *sk, unsigned int flags) { struct sock *prev_ssk = NULL, *ssk = NULL; struct mptcp_sock *msk = mptcp_sk(sk); @@ -1697,14 +1697,14 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) wait_for_memory: mptcp_set_nospace(sk); - mptcp_push_pending(sk, msg->msg_flags); + __mptcp_push_pending(sk, msg->msg_flags); ret = sk_stream_wait_memory(sk, &timeo); if (ret) goto out; } if (copied) - mptcp_push_pending(sk, msg->msg_flags); + __mptcp_push_pending(sk, msg->msg_flags); out: release_sock(sk); @@ -2959,13 +2959,14 @@ static void mptcp_release_cb(struct sock *sk) { unsigned long flags, nflags; - /* push_pending may touch wmem_reserved, do it before the later - * cleanup - */ - if (test_and_clear_bit(MPTCP_CLEAN_UNA, &mptcp_sk(sk)->flags)) - __mptcp_clean_una(sk); - if (test_and_clear_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags)) { - /* mptcp_push_pending() acquires the subflow socket lock + for (;;) { + flags = 0; + if (test_and_clear_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags)) + flags |= MPTCP_PUSH_PENDING; + if (!flags) + break; + + /* the following actions acquire the subflow socket lock * * 1) can't be invoked in atomic scope * 2) must avoid ABBA deadlock with msk socket spinlock: the RX @@ -2974,13 +2975,21 @@ static void mptcp_release_cb(struct sock *sk) */ spin_unlock_bh(&sk->sk_lock.slock); - mptcp_push_pending(sk, 0); + if (flags & MPTCP_PUSH_PENDING) + __mptcp_push_pending(sk, 0); + + cond_resched(); spin_lock_bh(&sk->sk_lock.slock); } + + if (test_and_clear_bit(MPTCP_CLEAN_UNA, &mptcp_sk(sk)->flags)) + __mptcp_clean_una(sk); if (test_and_clear_bit(MPTCP_ERROR_REPORT, &mptcp_sk(sk)->flags)) __mptcp_error_report(sk); - /* clear any wmem reservation and errors */ + /* push_pending may touch wmem_reserved, ensure we do the cleanup + * later + */ __mptcp_update_wmem(sk); __mptcp_update_rmem(sk); -- GitLab From 417789df4a03bc820b082bcc503f0d4c5e4704b9 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 4 Mar 2021 13:32:15 -0800 Subject: [PATCH 163/839] mptcp: fix missing wakeup __mptcp_clean_una() can free write memory and should wake-up user-space processes when needed. When such function is invoked by the MPTCP receive path, the wakeup is not needed, as the TCP stack will later trigger subflow_write_space which will do the wakeup as needed. Other __mptcp_clean_una() call sites need an additional wakeup check Let's bundle the relevant code in a new helper and use it. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/165 Fixes: 6e628cd3a8f7 ("mptcp: use mptcp release_cb for delayed tasks") Fixes: 64b9cea7a0af ("mptcp: fix spurious retransmissions") Tested-by: Matthieu Baerts Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index d2a2169e6d9e7..76958570ae7f2 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1061,6 +1061,12 @@ out: } } +static void __mptcp_clean_una_wakeup(struct sock *sk) +{ + __mptcp_clean_una(sk); + mptcp_write_space(sk); +} + static void mptcp_enter_memory_pressure(struct sock *sk) { struct mptcp_subflow_context *subflow; @@ -2270,7 +2276,7 @@ static void __mptcp_retrans(struct sock *sk) struct sock *ssk; int ret; - __mptcp_clean_una(sk); + __mptcp_clean_una_wakeup(sk); dfrag = mptcp_rtx_head(sk); if (!dfrag) return; @@ -2983,7 +2989,7 @@ static void mptcp_release_cb(struct sock *sk) } if (test_and_clear_bit(MPTCP_CLEAN_UNA, &mptcp_sk(sk)->flags)) - __mptcp_clean_una(sk); + __mptcp_clean_una_wakeup(sk); if (test_and_clear_bit(MPTCP_ERROR_REPORT, &mptcp_sk(sk)->flags)) __mptcp_error_report(sk); -- GitLab From 9238e900d6ec2e9b9ca3d8a9731acfd587fc577a Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 4 Mar 2021 13:32:16 -0800 Subject: [PATCH 164/839] mptcp: free resources when the port number is mismatched When the port number is mismatched with the announced ones, use 'goto dispose_child' to free the resources instead of using 'goto out'. This patch also moves the port number checking code in subflow_syn_recv_sock before mptcp_finish_join, otherwise subflow_drop_ctx will fail in dispose_child. Fixes: 5bc56388c74f ("mptcp: add port number check for MP_JOIN") Reported-by: Paolo Abeni Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/subflow.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 41695e26c374f..3d47d670e6654 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -687,11 +687,6 @@ create_child: /* move the msk reference ownership to the subflow */ subflow_req->msk = NULL; ctx->conn = (struct sock *)owner; - if (!mptcp_finish_join(child)) - goto dispose_child; - - SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKRX); - tcp_rsk(req)->drop_req = true; if (subflow_use_different_sport(owner, sk)) { pr_debug("ack inet_sport=%d %d", @@ -699,10 +694,16 @@ create_child: ntohs(inet_sk((struct sock *)owner)->inet_sport)); if (!mptcp_pm_sport_in_anno_list(owner, sk)) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MISMATCHPORTACKRX); - goto out; + goto dispose_child; } SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINPORTACKRX); } + + if (!mptcp_finish_join(child)) + goto dispose_child; + + SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKRX); + tcp_rsk(req)->drop_req = true; } } -- GitLab From 863a42b289c22df63db62b10fc2c2ffc237e2125 Mon Sep 17 00:00:00 2001 From: Hillf Danton Date: Thu, 4 Mar 2021 10:30:09 -0800 Subject: [PATCH 165/839] netdevsim: init u64 stats for 32bit hardware Init the u64 stats in order to avoid the lockdep prints on the 32bit hardware like INFO: trying to register non-static key. the code is fine but needs lockdep annotation. turning off the locking correctness validator. CPU: 0 PID: 4695 Comm: syz-executor.0 Not tainted 5.11.0-rc5-syzkaller #0 Hardware name: ARM-Versatile Express Backtrace: [<826fc5b8>] (dump_backtrace) from [<826fc82c>] (show_stack+0x18/0x1c arch/arm/kernel/traps.c:252) [<826fc814>] (show_stack) from [<8270d1f8>] (__dump_stack lib/dump_stack.c:79 [inline]) [<826fc814>] (show_stack) from [<8270d1f8>] (dump_stack+0xa8/0xc8 lib/dump_stack.c:120) [<8270d150>] (dump_stack) from [<802bf9c0>] (assign_lock_key kernel/locking/lockdep.c:935 [inline]) [<8270d150>] (dump_stack) from [<802bf9c0>] (register_lock_class+0xabc/0xb68 kernel/locking/lockdep.c:1247) [<802bef04>] (register_lock_class) from [<802baa2c>] (__lock_acquire+0x84/0x32d4 kernel/locking/lockdep.c:4711) [<802ba9a8>] (__lock_acquire) from [<802be840>] (lock_acquire.part.0+0xf0/0x554 kernel/locking/lockdep.c:5442) [<802be750>] (lock_acquire.part.0) from [<802bed10>] (lock_acquire+0x6c/0x74 kernel/locking/lockdep.c:5415) [<802beca4>] (lock_acquire) from [<81560548>] (seqcount_lockdep_reader_access include/linux/seqlock.h:103 [inline]) [<802beca4>] (lock_acquire) from [<81560548>] (__u64_stats_fetch_begin include/linux/u64_stats_sync.h:164 [inline]) [<802beca4>] (lock_acquire) from [<81560548>] (u64_stats_fetch_begin include/linux/u64_stats_sync.h:175 [inline]) [<802beca4>] (lock_acquire) from [<81560548>] (nsim_get_stats64+0xdc/0xf0 drivers/net/netdevsim/netdev.c:70) [<8156046c>] (nsim_get_stats64) from [<81e2efa0>] (dev_get_stats+0x44/0xd0 net/core/dev.c:10405) [<81e2ef5c>] (dev_get_stats) from [<81e53204>] (rtnl_fill_stats+0x38/0x120 net/core/rtnetlink.c:1211) [<81e531cc>] (rtnl_fill_stats) from [<81e59d58>] (rtnl_fill_ifinfo+0x6d4/0x148c net/core/rtnetlink.c:1783) [<81e59684>] (rtnl_fill_ifinfo) from [<81e5ceb4>] (rtmsg_ifinfo_build_skb+0x9c/0x108 net/core/rtnetlink.c:3798) [<81e5ce18>] (rtmsg_ifinfo_build_skb) from [<81e5d0ac>] (rtmsg_ifinfo_event net/core/rtnetlink.c:3830 [inline]) [<81e5ce18>] (rtmsg_ifinfo_build_skb) from [<81e5d0ac>] (rtmsg_ifinfo_event net/core/rtnetlink.c:3821 [inline]) [<81e5ce18>] (rtmsg_ifinfo_build_skb) from [<81e5d0ac>] (rtmsg_ifinfo+0x44/0x70 net/core/rtnetlink.c:3839) [<81e5d068>] (rtmsg_ifinfo) from [<81e45c2c>] (register_netdevice+0x664/0x68c net/core/dev.c:10103) [<81e455c8>] (register_netdevice) from [<815608bc>] (nsim_create+0xf8/0x124 drivers/net/netdevsim/netdev.c:317) [<815607c4>] (nsim_create) from [<81561184>] (__nsim_dev_port_add+0x108/0x188 drivers/net/netdevsim/dev.c:941) [<8156107c>] (__nsim_dev_port_add) from [<815620d8>] (nsim_dev_port_add_all drivers/net/netdevsim/dev.c:990 [inline]) [<8156107c>] (__nsim_dev_port_add) from [<815620d8>] (nsim_dev_probe+0x5cc/0x750 drivers/net/netdevsim/dev.c:1119) [<81561b0c>] (nsim_dev_probe) from [<815661dc>] (nsim_bus_probe+0x10/0x14 drivers/net/netdevsim/bus.c:287) [<815661cc>] (nsim_bus_probe) from [<811724c0>] (really_probe+0x100/0x50c drivers/base/dd.c:554) [<811723c0>] (really_probe) from [<811729c4>] (driver_probe_device+0xf8/0x1c8 drivers/base/dd.c:740) [<811728cc>] (driver_probe_device) from [<81172fe4>] (__device_attach_driver+0x8c/0xf0 drivers/base/dd.c:846) [<81172f58>] (__device_attach_driver) from [<8116fee0>] (bus_for_each_drv+0x88/0xd8 drivers/base/bus.c:431) [<8116fe58>] (bus_for_each_drv) from [<81172c6c>] (__device_attach+0xdc/0x1d0 drivers/base/dd.c:914) [<81172b90>] (__device_attach) from [<8117305c>] (device_initial_probe+0x14/0x18 drivers/base/dd.c:961) [<81173048>] (device_initial_probe) from [<81171358>] (bus_probe_device+0x90/0x98 drivers/base/bus.c:491) [<811712c8>] (bus_probe_device) from [<8116e77c>] (device_add+0x320/0x824 drivers/base/core.c:3109) [<8116e45c>] (device_add) from [<8116ec9c>] (device_register+0x1c/0x20 drivers/base/core.c:3182) [<8116ec80>] (device_register) from [<81566710>] (nsim_bus_dev_new drivers/net/netdevsim/bus.c:336 [inline]) [<8116ec80>] (device_register) from [<81566710>] (new_device_store+0x178/0x208 drivers/net/netdevsim/bus.c:215) [<81566598>] (new_device_store) from [<8116fcb4>] (bus_attr_store+0x2c/0x38 drivers/base/bus.c:122) [<8116fc88>] (bus_attr_store) from [<805b4b8c>] (sysfs_kf_write+0x48/0x54 fs/sysfs/file.c:139) [<805b4b44>] (sysfs_kf_write) from [<805b3c90>] (kernfs_fop_write_iter+0x128/0x1ec fs/kernfs/file.c:296) [<805b3b68>] (kernfs_fop_write_iter) from [<804d22fc>] (call_write_iter include/linux/fs.h:1901 [inline]) [<805b3b68>] (kernfs_fop_write_iter) from [<804d22fc>] (new_sync_write fs/read_write.c:518 [inline]) [<805b3b68>] (kernfs_fop_write_iter) from [<804d22fc>] (vfs_write+0x3dc/0x57c fs/read_write.c:605) [<804d1f20>] (vfs_write) from [<804d2604>] (ksys_write+0x68/0xec fs/read_write.c:658) [<804d259c>] (ksys_write) from [<804d2698>] (__do_sys_write fs/read_write.c:670 [inline]) [<804d259c>] (ksys_write) from [<804d2698>] (sys_write+0x10/0x14 fs/read_write.c:667) [<804d2688>] (sys_write) from [<80200060>] (ret_fast_syscall+0x0/0x2c arch/arm/mm/proc-v7.S:64) Fixes: 83c9e13aa39a ("netdevsim: add software driver for testing offloads") Reported-by: syzbot+e74a6857f2d0efe3ad81@syzkaller.appspotmail.com Tested-by: Dmitry Vyukov Signed-off-by: Hillf Danton Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/netdevsim/netdev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index aec92440eef16..659d3dceb687f 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -294,6 +294,7 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port) dev_net_set(dev, nsim_dev_net(nsim_dev)); ns = netdev_priv(dev); ns->netdev = dev; + u64_stats_init(&ns->syncp); ns->nsim_dev = nsim_dev; ns->nsim_dev_port = nsim_dev_port; ns->nsim_bus_dev = nsim_dev->nsim_bus_dev; -- GitLab From 67eb211487f0c993d9f402d1c196ef159fd6a3b5 Mon Sep 17 00:00:00 2001 From: Jiri Wiesner Date: Thu, 4 Mar 2021 17:18:28 +0100 Subject: [PATCH 166/839] ibmvnic: always store valid MAC address The last change to ibmvnic_set_mac(), 8fc3672a8ad3, meant to prevent users from setting an invalid MAC address on an ibmvnic interface that has not been brought up yet. The change also prevented the requested MAC address from being stored by the adapter object for an ibmvnic interface when the state of the ibmvnic interface is VNIC_PROBED - that is after probing has finished but before the ibmvnic interface is brought up. The MAC address stored by the adapter object is used and sent to the hypervisor for checking when an ibmvnic interface is brought up. The ibmvnic driver ignoring the requested MAC address when in VNIC_PROBED state caused LACP bonds (bonds in 802.3ad mode) with more than one slave to malfunction. The bonding code must be able to change the MAC address of its slaves before they are brought up during enslaving. The inability of kernels with 8fc3672a8ad3 to set the MAC addresses of bonding slaves is observable in the output of "ip address show". The MAC addresses of the slaves are the same as the MAC address of the bond on a working system whereas the slaves retain their original MAC addresses on a system with a malfunctioning LACP bond. Fixes: 8fc3672a8ad3 ("ibmvnic: fix ibmvnic_set_mac") Signed-off-by: Jiri Wiesner Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 3bad762083c54..b6102ccf9b906 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1906,10 +1906,9 @@ static int ibmvnic_set_mac(struct net_device *netdev, void *p) if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; - if (adapter->state != VNIC_PROBED) { - ether_addr_copy(adapter->mac_addr, addr->sa_data); + ether_addr_copy(adapter->mac_addr, addr->sa_data); + if (adapter->state != VNIC_PROBED) rc = __ibmvnic_set_mac(netdev, addr->sa_data); - } return rc; } -- GitLab From ad5d07f4a9cd671233ae20983848874731102c08 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Thu, 4 Mar 2021 16:29:51 -0500 Subject: [PATCH 167/839] cipso,calipso: resolve a number of problems with the DOI refcounts The current CIPSO and CALIPSO refcounting scheme for the DOI definitions is a bit flawed in that we: 1. Don't correctly match gets/puts in netlbl_cipsov4_list(). 2. Decrement the refcount on each attempt to remove the DOI from the DOI list, only removing it from the list once the refcount drops to zero. This patch fixes these problems by adding the missing "puts" to netlbl_cipsov4_list() and introduces a more conventional, i.e. not-buggy, refcounting mechanism to the DOI definitions. Upon the addition of a DOI to the DOI list, it is initialized with a refcount of one, removing a DOI from the list removes it from the list and drops the refcount by one; "gets" and "puts" behave as expected with respect to refcounts, increasing and decreasing the DOI's refcount by one. Fixes: b1edeb102397 ("netlabel: Replace protocol/NetLabel linking with refrerence counts") Fixes: d7cce01504a0 ("netlabel: Add support for removing a CALIPSO DOI.") Reported-by: syzbot+9ec037722d2603a9f52e@syzkaller.appspotmail.com Signed-off-by: Paul Moore Signed-off-by: David S. Miller --- net/ipv4/cipso_ipv4.c | 11 +---------- net/ipv6/calipso.c | 14 +++++--------- net/netlabel/netlabel_cipso_v4.c | 3 +++ 3 files changed, 9 insertions(+), 19 deletions(-) diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 471d33a0d095f..be09c7669a799 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -519,16 +519,10 @@ int cipso_v4_doi_remove(u32 doi, struct netlbl_audit *audit_info) ret_val = -ENOENT; goto doi_remove_return; } - if (!refcount_dec_and_test(&doi_def->refcount)) { - spin_unlock(&cipso_v4_doi_list_lock); - ret_val = -EBUSY; - goto doi_remove_return; - } list_del_rcu(&doi_def->list); spin_unlock(&cipso_v4_doi_list_lock); - cipso_v4_cache_invalidate(); - call_rcu(&doi_def->rcu, cipso_v4_doi_free_rcu); + cipso_v4_doi_putdef(doi_def); ret_val = 0; doi_remove_return: @@ -585,9 +579,6 @@ void cipso_v4_doi_putdef(struct cipso_v4_doi *doi_def) if (!refcount_dec_and_test(&doi_def->refcount)) return; - spin_lock(&cipso_v4_doi_list_lock); - list_del_rcu(&doi_def->list); - spin_unlock(&cipso_v4_doi_list_lock); cipso_v4_cache_invalidate(); call_rcu(&doi_def->rcu, cipso_v4_doi_free_rcu); diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c index 51184a70ac7e5..1578ed9e97d89 100644 --- a/net/ipv6/calipso.c +++ b/net/ipv6/calipso.c @@ -83,6 +83,9 @@ struct calipso_map_cache_entry { static struct calipso_map_cache_bkt *calipso_cache; +static void calipso_cache_invalidate(void); +static void calipso_doi_putdef(struct calipso_doi *doi_def); + /* Label Mapping Cache Functions */ @@ -444,15 +447,10 @@ static int calipso_doi_remove(u32 doi, struct netlbl_audit *audit_info) ret_val = -ENOENT; goto doi_remove_return; } - if (!refcount_dec_and_test(&doi_def->refcount)) { - spin_unlock(&calipso_doi_list_lock); - ret_val = -EBUSY; - goto doi_remove_return; - } list_del_rcu(&doi_def->list); spin_unlock(&calipso_doi_list_lock); - call_rcu(&doi_def->rcu, calipso_doi_free_rcu); + calipso_doi_putdef(doi_def); ret_val = 0; doi_remove_return: @@ -508,10 +506,8 @@ static void calipso_doi_putdef(struct calipso_doi *doi_def) if (!refcount_dec_and_test(&doi_def->refcount)) return; - spin_lock(&calipso_doi_list_lock); - list_del_rcu(&doi_def->list); - spin_unlock(&calipso_doi_list_lock); + calipso_cache_invalidate(); call_rcu(&doi_def->rcu, calipso_doi_free_rcu); } diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index 726dda95934c6..4f50a64315cf0 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -575,6 +575,7 @@ list_start: break; } + cipso_v4_doi_putdef(doi_def); rcu_read_unlock(); genlmsg_end(ans_skb, data); @@ -583,12 +584,14 @@ list_start: list_retry: /* XXX - this limit is a guesstimate */ if (nlsze_mult < 4) { + cipso_v4_doi_putdef(doi_def); rcu_read_unlock(); kfree_skb(ans_skb); nlsze_mult *= 2; goto list_start; } list_failure_lock: + cipso_v4_doi_putdef(doi_def); rcu_read_unlock(); list_failure: kfree_skb(ans_skb); -- GitLab From 39491867ace594b4912c35f576864d204beed2b3 Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Thu, 4 Mar 2021 18:56:46 -0800 Subject: [PATCH 168/839] bpf: Explicitly zero-extend R0 after 32-bit cmpxchg As pointed out by Ilya and explained in the new comment, there's a discrepancy between x86 and BPF CMPXCHG semantics: BPF always loads the value from memory into r0, while x86 only does so when r0 and the value in memory are different. The same issue affects s390. At first this might sound like pure semantics, but it makes a real difference when the comparison is 32-bit, since the load will zero-extend r0/rax. The fix is to explicitly zero-extend rax after doing such a CMPXCHG. Since this problem affects multiple archs, this is done in the verifier by patching in a BPF_ZEXT_REG instruction after every 32-bit cmpxchg. Any archs that don't need such manual zero-extension can do a look-ahead with insn_is_zext to skip the unnecessary mov. Note this still goes on top of Ilya's patch: https://lore.kernel.org/bpf/20210301154019.129110-1-iii@linux.ibm.com/T/#u Differences v5->v6[1]: - Moved is_cmpxchg_insn and ensured it can be safely re-used. Also renamed it and removed 'inline' to match the style of the is_*_function helpers. - Fixed up comments in verifier test (thanks for the careful review, Martin!) Differences v4->v5[1]: - Moved the logic entirely into opt_subreg_zext_lo32_rnd_hi32, thanks to Martin for suggesting this. Differences v3->v4[1]: - Moved the optimization against pointless zext into the correct place: opt_subreg_zext_lo32_rnd_hi32 is called _after_ fixup_bpf_calls. Differences v2->v3[1]: - Moved patching into fixup_bpf_calls (patch incoming to rename this function) - Added extra commentary on bpf_jit_needs_zext - Added check to avoid adding a pointless zext(r0) if there's already one there. Difference v1->v2[1]: Now solved centrally in the verifier instead of specifically for the x86 JIT. Thanks to Ilya and Daniel for the suggestions! [1] v5: https://lore.kernel.org/bpf/CA+i-1C3ytZz6FjcPmUg5s4L51pMQDxWcZNvM86w4RHZ_o2khwg@mail.gmail.com/T/#t v4: https://lore.kernel.org/bpf/CA+i-1C3ytZz6FjcPmUg5s4L51pMQDxWcZNvM86w4RHZ_o2khwg@mail.gmail.com/T/#t v3: https://lore.kernel.org/bpf/08669818-c99d-0d30-e1db-53160c063611@iogearbox.net/T/#t v2: https://lore.kernel.org/bpf/08669818-c99d-0d30-e1db-53160c063611@iogearbox.net/T/#t v1: https://lore.kernel.org/bpf/d7ebaefb-bfd6-a441-3ff2-2fdfe699b1d2@iogearbox.net/T/#t Reported-by: Ilya Leoshkevich Fixes: 5ffa25502b5a ("bpf: Add instructions for atomic_[cmp]xchg") Signed-off-by: Brendan Jackman Acked-by: Martin KaFai Lau Acked-by: Ilya Leoshkevich Tested-by: Ilya Leoshkevich Signed-off-by: Alexei Starovoitov --- kernel/bpf/core.c | 4 +++ kernel/bpf/verifier.c | 19 +++++++++++++- .../selftests/bpf/verifier/atomic_cmpxchg.c | 25 +++++++++++++++++++ .../selftests/bpf/verifier/atomic_or.c | 25 +++++++++++++++++++ 4 files changed, 72 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index aa1e64196d8d2..3a283bf97f2f6 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2344,6 +2344,10 @@ bool __weak bpf_helper_changes_pkt_data(void *func) /* Return TRUE if the JIT backend wants verifier to enable sub-register usage * analysis code and wants explicit zero extension inserted by verifier. * Otherwise, return FALSE. + * + * The verifier inserts an explicit zero extension after BPF_CMPXCHGs even if + * you don't override this. JITs that don't want these extra insns can detect + * them using insn_is_zext. */ bool __weak bpf_jit_needs_zext(void) { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index bb3eaab934f39..c56e3fcb5f1a0 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -504,6 +504,13 @@ static bool is_ptr_cast_function(enum bpf_func_id func_id) func_id == BPF_FUNC_skc_to_tcp_request_sock; } +static bool is_cmpxchg_insn(const struct bpf_insn *insn) +{ + return BPF_CLASS(insn->code) == BPF_STX && + BPF_MODE(insn->code) == BPF_ATOMIC && + insn->imm == BPF_CMPXCHG; +} + /* string representation of 'enum bpf_reg_type' */ static const char * const reg_type_str[] = { [NOT_INIT] = "?", @@ -11067,7 +11074,17 @@ static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env, goto apply_patch_buffer; } - if (!bpf_jit_needs_zext()) + /* Add in an zero-extend instruction if a) the JIT has requested + * it or b) it's a CMPXCHG. + * + * The latter is because: BPF_CMPXCHG always loads a value into + * R0, therefore always zero-extends. However some archs' + * equivalent instruction only does this load when the + * comparison is successful. This detail of CMPXCHG is + * orthogonal to the general zero-extension behaviour of the + * CPU, so it's treated independently of bpf_jit_needs_zext. + */ + if (!bpf_jit_needs_zext() && !is_cmpxchg_insn(&insn)) continue; if (WARN_ON(load_reg == -1)) { diff --git a/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c b/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c index 2efd8bcf57a1e..6e52dfc644153 100644 --- a/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c +++ b/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c @@ -94,3 +94,28 @@ .result = REJECT, .errstr = "invalid read from stack", }, +{ + "BPF_W cmpxchg should zero top 32 bits", + .insns = { + /* r0 = U64_MAX; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 1), + /* u64 val = r0; */ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), + /* r0 = (u32)atomic_cmpxchg((u32 *)&val, r0, 1); */ + BPF_MOV32_IMM(BPF_REG_1, 1), + BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, BPF_REG_10, BPF_REG_1, -8), + /* r1 = 0x00000000FFFFFFFFull; */ + BPF_MOV64_IMM(BPF_REG_1, 1), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 32), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 1), + /* if (r0 != r1) exit(1); */ + BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_1, 2), + BPF_MOV32_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + /* exit(0); */ + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, diff --git a/tools/testing/selftests/bpf/verifier/atomic_or.c b/tools/testing/selftests/bpf/verifier/atomic_or.c index 70f982e1f9f05..9d0716ac50808 100644 --- a/tools/testing/selftests/bpf/verifier/atomic_or.c +++ b/tools/testing/selftests/bpf/verifier/atomic_or.c @@ -75,3 +75,28 @@ }, .result = ACCEPT, }, +{ + "BPF_W atomic_fetch_or should zero top 32 bits", + .insns = { + /* r1 = U64_MAX; */ + BPF_MOV64_IMM(BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 1), + /* u64 val = r1; */ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), + /* r1 = (u32)atomic_fetch_or((u32 *)&val, 2); */ + BPF_MOV32_IMM(BPF_REG_1, 2), + BPF_ATOMIC_OP(BPF_W, BPF_OR | BPF_FETCH, BPF_REG_10, BPF_REG_1, -8), + /* r2 = 0x00000000FFFFFFFF; */ + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 32), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 1), + /* if (r2 != r1) exit(1); */ + BPF_JMP_REG(BPF_JEQ, BPF_REG_2, BPF_REG_1, 2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + /* exit(0); */ + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, -- GitLab From beda430177f56656e7980dcce93456ffaa35676b Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 4 Mar 2021 18:18:08 -0800 Subject: [PATCH 169/839] KVM: x86: Ensure deadline timer has truly expired before posting its IRQ When posting a deadline timer interrupt, open code the checks guarding __kvm_wait_lapic_expire() in order to skip the lapic_timer_int_injected() check in kvm_wait_lapic_expire(). The injection check will always fail since the interrupt has not yet be injected. Moving the call after injection would also be wrong as that wouldn't actually delay delivery of the IRQ if it is indeed sent via posted interrupt. Fixes: 010fd37fddf6 ("KVM: LAPIC: Reduce world switch latency caused by timer_advance_ns") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20210305021808.3769732-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 45d40bfacb7cb..cb8ebfaccfb6e 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1642,7 +1642,16 @@ static void apic_timer_expired(struct kvm_lapic *apic, bool from_timer_fn) } if (kvm_use_posted_timer_interrupt(apic->vcpu)) { - kvm_wait_lapic_expire(vcpu); + /* + * Ensure the guest's timer has truly expired before posting an + * interrupt. Open code the relevant checks to avoid querying + * lapic_timer_int_injected(), which will be false since the + * interrupt isn't yet injected. Waiting until after injecting + * is not an option since that won't help a posted interrupt. + */ + if (vcpu->arch.apic->lapic_timer.expired_tscdeadline && + vcpu->arch.apic->lapic_timer.timer_advance_ns) + __kvm_wait_lapic_expire(vcpu); kvm_apic_inject_pending_timer_irqs(apic); return; } -- GitLab From 99840a75454b66d69d2a450ab04e6438d75eba48 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 4 Mar 2021 18:16:37 -0800 Subject: [PATCH 170/839] KVM: SVM: Connect 'npt' module param to KVM's internal 'npt_enabled' Directly connect the 'npt' param to the 'npt_enabled' variable so that runtime adjustments to npt_enabled are reflected in sysfs. Move the !PAE restriction to a runtime check to ensure NPT is forced off if the host is using 2-level paging, and add a comment explicitly stating why NPT requires a 64-bit kernel or a kernel with PAE enabled. Opportunistically switch the param to octal permissions. Signed-off-by: Sean Christopherson Message-Id: <20210305021637.3768573-1-seanjc@google.com> Reviewed-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index baee91c1e9364..58a45bb139f88 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -115,13 +115,6 @@ static const struct svm_direct_access_msrs { { .index = MSR_INVALID, .always = false }, }; -/* enable NPT for AMD64 and X86 with PAE */ -#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) -bool npt_enabled = true; -#else -bool npt_enabled; -#endif - /* * These 2 parameters are used to config the controls for Pause-Loop Exiting: * pause_filter_count: On processors that support Pause filtering(indicated @@ -170,9 +163,12 @@ module_param(pause_filter_count_shrink, ushort, 0444); static unsigned short pause_filter_count_max = KVM_SVM_DEFAULT_PLE_WINDOW_MAX; module_param(pause_filter_count_max, ushort, 0444); -/* allow nested paging (virtualized MMU) for all guests */ -static int npt = true; -module_param(npt, int, S_IRUGO); +/* + * Use nested page tables by default. Note, NPT may get forced off by + * svm_hardware_setup() if it's unsupported by hardware or the host kernel. + */ +bool npt_enabled = true; +module_param_named(npt, npt_enabled, bool, 0444); /* allow nested virtualization in KVM/SVM */ static int nested = true; @@ -988,10 +984,15 @@ static __init int svm_hardware_setup(void) goto err; } - if (!boot_cpu_has(X86_FEATURE_NPT)) + /* + * KVM's MMU doesn't support using 2-level paging for itself, and thus + * NPT isn't supported if the host is using 2-level paging since host + * CR4 is unchanged on VMRUN. + */ + if (!IS_ENABLED(CONFIG_X86_64) && !IS_ENABLED(CONFIG_X86_PAE)) npt_enabled = false; - if (npt_enabled && !npt) + if (!boot_cpu_has(X86_FEATURE_NPT)) npt_enabled = false; kvm_configure_mmu(npt_enabled, get_max_npt_level(), PG_LEVEL_1G); -- GitLab From 3f9b9efd82a84f27e95d0414f852caf1fa839e83 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Tue, 9 Feb 2021 17:47:54 -0500 Subject: [PATCH 171/839] virtiofs: Fail dax mount if device does not support it Right now "mount -t virtiofs -o dax myfs /mnt/virtiofs" succeeds even if filesystem deivce does not have a cache window and hence DAX can't be supported. This gives a false sense to user that they are using DAX with virtiofs but fact of the matter is that they are not. Fix this by returning error if dax can't be supported and user has asked for it. Signed-off-by: Vivek Goyal Reviewed-by: Stefan Hajnoczi Signed-off-by: Miklos Szeredi --- fs/fuse/virtio_fs.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 8868ac31a3c0a..4ee6f734ba838 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -1324,8 +1324,15 @@ static int virtio_fs_fill_super(struct super_block *sb, struct fs_context *fsc) /* virtiofs allocates and installs its own fuse devices */ ctx->fudptr = NULL; - if (ctx->dax) + if (ctx->dax) { + if (!fs->dax_dev) { + err = -EINVAL; + pr_err("virtio-fs: dax can't be enabled as filesystem" + " device does not support it.\n"); + goto err_free_fuse_devs; + } ctx->dax_dev = fs->dax_dev; + } err = fuse_fill_super_common(sb, ctx); if (err < 0) goto err_free_fuse_devs; -- GitLab From 56887cffe946bb0a90c74429fa94d6110a73119d Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 22 Feb 2021 10:48:09 +0100 Subject: [PATCH 172/839] block: Try to handle busy underlying device on discard Commit 384d87ef2c95 ("block: Do not discard buffers under a mounted filesystem") made paths issuing discard or zeroout requests to the underlying device try to grab block device in exclusive mode. If that failed we returned EBUSY to userspace. This however caused unexpected fallout in userspace where e.g. FUSE filesystems issue discard requests from userspace daemons although the device is open exclusively by the kernel. Also shrinking of logical volume by LVM issues discard requests to a device which may be claimed exclusively because there's another LV on the same PV. So to avoid these userspace regressions, fall back to invalidate_inode_pages2_range() instead of returning EBUSY to userspace and return EBUSY only of that call fails as well (meaning that there's indeed someone using the particular device range we are trying to discard). Link: https://bugzilla.kernel.org/show_bug.cgi?id=211167 Fixes: 384d87ef2c95 ("block: Do not discard buffers under a mounted filesystem") CC: stable@vger.kernel.org Signed-off-by: Jan Kara Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- fs/block_dev.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index 4aa1f88d5bf8b..03166b3dea4db 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -118,13 +118,22 @@ int truncate_bdev_range(struct block_device *bdev, fmode_t mode, if (!(mode & FMODE_EXCL)) { int err = bd_prepare_to_claim(bdev, truncate_bdev_range); if (err) - return err; + goto invalidate; } truncate_inode_pages_range(bdev->bd_inode->i_mapping, lstart, lend); if (!(mode & FMODE_EXCL)) bd_abort_claiming(bdev, truncate_bdev_range); return 0; + +invalidate: + /* + * Someone else has handle exclusively open. Try invalidating instead. + * The 'end' argument is inclusive so the rounding is safe. + */ + return invalidate_inode_pages2_range(bdev->bd_inode->i_mapping, + lstart >> PAGE_SHIFT, + lend >> PAGE_SHIFT); } static void set_init_blocksize(struct block_device *bdev) -- GitLab From 7d365bd0bff3c0310c39ebaffc9a8458e036d666 Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Fri, 5 Mar 2021 13:54:38 +0100 Subject: [PATCH 173/839] s390/dasd: fix hanging DASD driver unbind In case of an unbind of the DASD device driver the function dasd_generic_remove() is called which shuts down the device. Among others this functions removes the int_handler from the cdev. During shutdown the device cancels all outstanding IO requests and waits for completion of the clear request. Unfortunately the clear interrupt will never be received when there is no interrupt handler connected. Fix by moving the int_handler removal after the call to the state machine where no request or interrupt is outstanding. Cc: stable@vger.kernel.org Signed-off-by: Stefan Haberland Tested-by: Bjoern Walk Reviewed-by: Jan Hoeppner Signed-off-by: Jens Axboe --- drivers/s390/block/dasd.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 28c04a4efa66f..22805115ebc2f 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -3487,8 +3487,6 @@ void dasd_generic_remove(struct ccw_device *cdev) struct dasd_device *device; struct dasd_block *block; - cdev->handler = NULL; - device = dasd_device_from_cdev(cdev); if (IS_ERR(device)) { dasd_remove_sysfs_files(cdev); @@ -3507,6 +3505,7 @@ void dasd_generic_remove(struct ccw_device *cdev) * no quite down yet. */ dasd_set_target_state(device, DASD_STATE_NEW); + cdev->handler = NULL; /* dasd_delete_device destroys the device reference. */ block = device->block; dasd_delete_device(device); -- GitLab From 66f669a272898feb1c69b770e1504aa2ec7723d1 Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Fri, 5 Mar 2021 13:54:39 +0100 Subject: [PATCH 174/839] s390/dasd: fix hanging IO request during DASD driver unbind Prevent that an IO request is build during device shutdown initiated by a driver unbind. This request will never be able to be processed or canceled and will hang forever. This will lead also to a hanging unbind. Fix by checking not only if the device is in READY state but also check that there is no device offline initiated before building a new IO request. Fixes: e443343e509a ("s390/dasd: blk-mq conversion") Cc: # v4.14+ Signed-off-by: Stefan Haberland Tested-by: Bjoern Walk Reviewed-by: Jan Hoeppner Signed-off-by: Jens Axboe --- drivers/s390/block/dasd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 22805115ebc2f..ba9ce4e0d30a3 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -3052,7 +3052,8 @@ static blk_status_t do_dasd_request(struct blk_mq_hw_ctx *hctx, basedev = block->base; spin_lock_irq(&dq->lock); - if (basedev->state < DASD_STATE_READY) { + if (basedev->state < DASD_STATE_READY || + test_bit(DASD_FLAG_OFFLINE, &basedev->flags)) { DBF_DEV_EVENT(DBF_ERR, basedev, "device not ready for request %p", req); rc = BLK_STS_IOERR; -- GitLab From 4f44657d74873735e93a50eb25014721a66aac19 Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Fri, 5 Mar 2021 16:13:27 +0800 Subject: [PATCH 175/839] blk-cgroup: Fix the recursive blkg rwstat The current blkio.throttle.io_service_bytes_recursive doesn't work correctly. As an example, for the following blkcg hierarchy: (Made 1GB READ in test1, 512MB READ in test2) test / \ test1 test2 $ head -n 1 test/test1/blkio.throttle.io_service_bytes_recursive 8:0 Read 1073684480 $ head -n 1 test/test2/blkio.throttle.io_service_bytes_recursive 8:0 Read 537448448 $ head -n 1 test/blkio.throttle.io_service_bytes_recursive 8:0 Read 537448448 Clearly, above data of "test" reflects "test2" not "test1"+"test2". Do the correct summary in blkg_rwstat_recursive_sum(). Signed-off-by: Xunlei Pang Signed-off-by: Jens Axboe --- block/blk-cgroup-rwstat.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/block/blk-cgroup-rwstat.c b/block/blk-cgroup-rwstat.c index 85d5790ac49b0..3304e841df7ce 100644 --- a/block/blk-cgroup-rwstat.c +++ b/block/blk-cgroup-rwstat.c @@ -109,6 +109,7 @@ void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol, lockdep_assert_held(&blkg->q->queue_lock); + memset(sum, 0, sizeof(*sum)); rcu_read_lock(); blkg_for_each_descendant_pre(pos_blkg, pos_css, blkg) { struct blkg_rwstat *rwstat; @@ -122,7 +123,7 @@ void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol, rwstat = (void *)pos_blkg + off; for (i = 0; i < BLKG_RWSTAT_NR; i++) - sum->cnt[i] = blkg_rwstat_read_counter(rwstat, i); + sum->cnt[i] += blkg_rwstat_read_counter(rwstat, i); } rcu_read_unlock(); } -- GitLab From 00ff801bb8ce6711e919af4530b6ffa14a22390a Mon Sep 17 00:00:00 2001 From: "Kevin(Yudong) Yang" Date: Wed, 3 Mar 2021 09:43:54 -0500 Subject: [PATCH 176/839] net/mlx4_en: update moderation when config reset This patch fixes a bug that the moderation config will not be applied when calling mlx4_en_reset_config. For example, when turning on rx timestamping, mlx4_en_reset_config() will be called, causing the NIC to forget previous moderation config. This fix is in phase with a previous fix: commit 79c54b6bbf06 ("net/mlx4_en: Fix TX moderation info loss after set_ringparam is called") Tested: Before this patch, on a host with NIC using mlx4, run netserver and stream TCP to the host at full utilization. $ sar -I SUM 1 INTR intr/s 14:03:56 sum 48758.00 After rx hwtstamp is enabled: $ sar -I SUM 1 14:10:38 sum 317771.00 We see the moderation is not working properly and issued 7x more interrupts. After the patch, and turned on rx hwtstamp, the rate of interrupts is as expected: $ sar -I SUM 1 14:52:11 sum 49332.00 Fixes: 79c54b6bbf06 ("net/mlx4_en: Fix TX moderation info loss after set_ringparam is called") Signed-off-by: Kevin(Yudong) Yang Reviewed-by: Eric Dumazet Reviewed-by: Neal Cardwell CC: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 2 +- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 2 ++ drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index 23849f2b9c252..1434df66fcf2e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -47,7 +47,7 @@ #define EN_ETHTOOL_SHORT_MASK cpu_to_be16(0xffff) #define EN_ETHTOOL_WORD_MASK cpu_to_be32(0xffffffff) -static int mlx4_en_moderation_update(struct mlx4_en_priv *priv) +int mlx4_en_moderation_update(struct mlx4_en_priv *priv) { int i, t; int err = 0; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 51b9700fce836..5d0c9c62382dc 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -3554,6 +3554,8 @@ int mlx4_en_reset_config(struct net_device *dev, en_err(priv, "Failed starting port\n"); } + if (!err) + err = mlx4_en_moderation_update(priv); out: mutex_unlock(&mdev->state_lock); kfree(tmp); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index e8ed23190de01..f3d1a20201ef3 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -775,6 +775,7 @@ void mlx4_en_ptp_overflow_check(struct mlx4_en_dev *mdev); #define DEV_FEATURE_CHANGED(dev, new_features, feature) \ ((dev->features & feature) ^ (new_features & feature)) +int mlx4_en_moderation_update(struct mlx4_en_priv *priv); int mlx4_en_reset_config(struct net_device *dev, struct hwtstamp_config ts_config, netdev_features_t new_features); -- GitLab From 38c26ff3048af50eee3fcd591921357ee5bfd9ee Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Thu, 4 Mar 2021 18:06:48 -0800 Subject: [PATCH 177/839] net: tehuti: fix error return code in bdx_probe() When bdx_read_mac() fails, no error return code of bdx_probe() is assigned. To fix this bug, err is assigned with -EFAULT as error return code. Reported-by: TOTE Robot Signed-off-by: Jia-Ju Bai Signed-off-by: David S. Miller --- drivers/net/ethernet/tehuti/tehuti.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/tehuti/tehuti.c b/drivers/net/ethernet/tehuti/tehuti.c index b8f4f419173f9..d054c6e83b1c9 100644 --- a/drivers/net/ethernet/tehuti/tehuti.c +++ b/drivers/net/ethernet/tehuti/tehuti.c @@ -2044,6 +2044,7 @@ bdx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /*bdx_hw_reset(priv); */ if (bdx_read_mac(priv)) { pr_err("load MAC address failed\n"); + err = -EFAULT; goto err_out_iomap; } SET_NETDEV_DEV(ndev, &pdev->dev); -- GitLab From 6650d31f21b8a0043613ae0a4a2e42e49dc20b2d Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Thu, 4 Mar 2021 19:10:10 -0800 Subject: [PATCH 178/839] net: intel: iavf: fix error return code of iavf_init_get_resources() When iavf_process_config() fails, no error return code of iavf_init_get_resources() is assigned. To fix this bug, err is assigned with the return value of iavf_process_config(), and then err is checked. Reported-by: TOTE Robot Signed-off-by: Jia-Ju Bai Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/iavf/iavf_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 0a867d64d4675..dc5b3c06d1e01 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -1776,7 +1776,8 @@ static int iavf_init_get_resources(struct iavf_adapter *adapter) goto err_alloc; } - if (iavf_process_config(adapter)) + err = iavf_process_config(adapter); + if (err) goto err_alloc; adapter->current_op = VIRTCHNL_OP_UNKNOWN; -- GitLab From 9a7b3950c7e15968e23d83be215e95ccc7c92a53 Mon Sep 17 00:00:00 2001 From: Ong Boon Leong Date: Fri, 5 Mar 2021 13:49:30 +0800 Subject: [PATCH 179/839] net: stmmac: Fix VLAN filter delete timeout issue in Intel mGBE SGMII For Intel mGbE controller, MAC VLAN filter delete operation will time-out if serdes power-down sequence happened first during driver remove() with below message. [82294.764958] intel-eth-pci 0000:00:1e.4 eth2: stmmac_dvr_remove: removing driver [82294.778677] intel-eth-pci 0000:00:1e.4 eth2: Timeout accessing MAC_VLAN_Tag_Filter [82294.779997] intel-eth-pci 0000:00:1e.4 eth2: failed to kill vid 0081/0 [82294.947053] intel-eth-pci 0000:00:1d.2 eth1: stmmac_dvr_remove: removing driver [82295.002091] intel-eth-pci 0000:00:1d.1 eth0: stmmac_dvr_remove: removing driver Therefore, we delay the serdes power-down to be after unregister_netdev() which triggers the VLAN filter delete. Fixes: b9663b7ca6ff ("net: stmmac: Enable SERDES power up/down sequence") Signed-off-by: Ong Boon Leong Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 0eba44e9c1f82..208cae344ffa2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -5249,13 +5249,16 @@ int stmmac_dvr_remove(struct device *dev) netdev_info(priv->dev, "%s: removing driver", __func__); stmmac_stop_all_dma(priv); + stmmac_mac_set(priv, priv->ioaddr, false); + netif_carrier_off(ndev); + unregister_netdev(ndev); + /* Serdes power down needs to happen after VLAN filter + * is deleted that is triggered by unregister_netdev(). + */ if (priv->plat->serdes_powerdown) priv->plat->serdes_powerdown(ndev, priv->plat->bsp_priv); - stmmac_mac_set(priv, priv->ioaddr, false); - netif_carrier_off(ndev); - unregister_netdev(ndev); #ifdef CONFIG_DEBUG_FS stmmac_exit_fs(ndev); #endif -- GitLab From 8eb37ab7cc045ec6305a6a1a9c32374695a1a977 Mon Sep 17 00:00:00 2001 From: Wong Vee Khee Date: Fri, 5 Mar 2021 14:03:42 +0800 Subject: [PATCH 180/839] stmmac: intel: Fixes clock registration error seen for multiple interfaces Issue seen when enumerating multiple Intel mGbE interfaces in EHL. [ 6.898141] intel-eth-pci 0000:00:1d.2: enabling device (0000 -> 0002) [ 6.900971] intel-eth-pci 0000:00:1d.2: Fail to register stmmac-clk [ 6.906434] intel-eth-pci 0000:00:1d.2: User ID: 0x51, Synopsys ID: 0x52 We fix it by making the clock name to be unique following the format of stmmac-pci_name(pci_dev) so that we can differentiate the clock for these Intel mGbE interfaces in EHL platform as follow: /sys/kernel/debug/clk/stmmac-0000:00:1d.1 /sys/kernel/debug/clk/stmmac-0000:00:1d.2 /sys/kernel/debug/clk/stmmac-0000:00:1e.4 Fixes: 58da0cfa6cf1 ("net: stmmac: create dwmac-intel.c to contain all Intel platform") Signed-off-by: Wong Vee Khee Signed-off-by: Voon Weifeng Co-developed-by: Ong Boon Leong Signed-off-by: Ong Boon Leong Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c index f2896872a86c4..0b64f7710d174 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c @@ -233,6 +233,7 @@ static void common_default_data(struct plat_stmmacenet_data *plat) static int intel_mgbe_common_data(struct pci_dev *pdev, struct plat_stmmacenet_data *plat) { + char clk_name[20]; int ret; int i; @@ -301,8 +302,10 @@ static int intel_mgbe_common_data(struct pci_dev *pdev, plat->eee_usecs_rate = plat->clk_ptp_rate; /* Set system clock */ + sprintf(clk_name, "%s-%s", "stmmac", pci_name(pdev)); + plat->stmmac_clk = clk_register_fixed_rate(&pdev->dev, - "stmmac-clk", NULL, 0, + clk_name, NULL, 0, plat->clk_ptp_rate); if (IS_ERR(plat->stmmac_clk)) { -- GitLab From e233febda6ebab750e30662a7cc9b9efad127685 Mon Sep 17 00:00:00 2001 From: Sergey Nazarov Date: Fri, 5 Mar 2021 11:05:54 +0300 Subject: [PATCH 181/839] CIPSO: Fix unaligned memory access in cipso_v4_gentag_hdr We need to use put_unaligned when writing 32-bit DOI value in cipso_v4_gentag_hdr to avoid unaligned memory access. v2: unneeded type cast removed as Ondrej Mosnacek suggested. Signed-off-by: Sergey Nazarov Acked-by: Paul Moore Signed-off-by: David S. Miller --- net/ipv4/cipso_ipv4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index be09c7669a799..bfaf327e9d121 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -1153,7 +1153,7 @@ static void cipso_v4_gentag_hdr(const struct cipso_v4_doi *doi_def, { buf[0] = IPOPT_CIPSO; buf[1] = CIPSO_V4_HDR_LEN + len; - *(__be32 *)&buf[2] = htonl(doi_def->doi); + put_unaligned_be32(doi_def->doi, &buf[2]); } /** -- GitLab From 69cdb7947adb816fc9325b4ec02a6dddd5070b82 Mon Sep 17 00:00:00 2001 From: Junlin Yang Date: Fri, 5 Mar 2021 16:48:39 +0800 Subject: [PATCH 182/839] ibmvnic: remove excessive irqsave ibmvnic_remove locks multiple spinlocks while disabling interrupts: spin_lock_irqsave(&adapter->state_lock, flags); spin_lock_irqsave(&adapter->rwi_lock, flags); As reported by coccinelle, the second _irqsave() overwrites the value saved in 'flags' by the first _irqsave(), therefore when the second _irqrestore() comes,the value in 'flags' is not valid,the value saved by the first _irqsave() has been lost. This likely leads to IRQs remaining disabled. So remove the second _irqsave(): spin_lock_irqsave(&adapter->state_lock, flags); spin_lock(&adapter->rwi_lock); Generated by: ./scripts/coccinelle/locks/flags.cocci ./drivers/net/ethernet/ibm/ibmvnic.c:5413:1-18: ERROR: nested lock+irqsave that reuses flags from line 5404. Fixes: 4a41c421f367 ("ibmvnic: serialize access to work queue on remove") Signed-off-by: Junlin Yang Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index b6102ccf9b906..161fa95e8768c 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -5408,9 +5408,9 @@ static int ibmvnic_remove(struct vio_dev *dev) * after setting state, so __ibmvnic_reset() which is called * from the flush_work() below, can make progress. */ - spin_lock_irqsave(&adapter->rwi_lock, flags); + spin_lock(&adapter->rwi_lock); adapter->state = VNIC_REMOVING; - spin_unlock_irqrestore(&adapter->rwi_lock, flags); + spin_unlock(&adapter->rwi_lock); spin_unlock_irqrestore(&adapter->state_lock, flags); -- GitLab From 0a7e0c3b5702a6a76cf7e5b8cc10a73e51dc221e Mon Sep 17 00:00:00 2001 From: Xu Wang Date: Fri, 5 Mar 2021 09:33:06 +0000 Subject: [PATCH 183/839] selftest/net/ipsec.c: Remove unneeded semicolon fix semicolon.cocci warning: tools/testing/selftests/net/ipsec.c:1788:2-3: Unneeded semicolon Signed-off-by: Xu Wang Signed-off-by: David S. Miller --- tools/testing/selftests/net/ipsec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/ipsec.c b/tools/testing/selftests/net/ipsec.c index 17ced7d6ce259..f23438d512c58 100644 --- a/tools/testing/selftests/net/ipsec.c +++ b/tools/testing/selftests/net/ipsec.c @@ -1785,7 +1785,7 @@ static void grand_child_serv(unsigned int nr, int cmd_fd, void *buf, break; default: printk("got unknown msg type %d", msg->type); - }; + } } static int grand_child_f(unsigned int nr, int cmd_fd, void *buf) -- GitLab From abbf9a0ef8848dca58c5b97750c1c59bbee45637 Mon Sep 17 00:00:00 2001 From: Hayes Wang Date: Fri, 5 Mar 2021 17:34:41 +0800 Subject: [PATCH 184/839] r8169: fix r8168fp_adjust_ocp_cmd function The (0xBAF70000 & 0x00FFF000) << 6 should be (0xf70 << 18). Fixes: 561535b0f239 ("r8169: fix OCP access on RTL8117") Signed-off-by: Hayes Wang Acked-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index f704da3f214c0..7aad0ba53372c 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -767,7 +767,7 @@ static void r8168fp_adjust_ocp_cmd(struct rtl8169_private *tp, u32 *cmd, int typ if (type == ERIAR_OOB && (tp->mac_version == RTL_GIGA_MAC_VER_52 || tp->mac_version == RTL_GIGA_MAC_VER_53)) - *cmd |= 0x7f0 << 18; + *cmd |= 0xf70 << 18; } DECLARE_RTL_COND(rtl_eriar_cond) -- GitLab From 85554bcd123e307282631defe6bf6fac5031cf60 Mon Sep 17 00:00:00 2001 From: Denis Efremov Date: Fri, 5 Mar 2021 19:26:22 +0300 Subject: [PATCH 185/839] net/hamradio/6pack: remove redundant check in sp_encaps() "len > sp->mtu" checked twice in a row in sp_encaps(). Remove the second check. Signed-off-by: Denis Efremov Signed-off-by: David S. Miller --- drivers/net/hamradio/6pack.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c index 71d6629e65c97..9f5b5614a1503 100644 --- a/drivers/net/hamradio/6pack.c +++ b/drivers/net/hamradio/6pack.c @@ -171,11 +171,6 @@ static void sp_encaps(struct sixpack *sp, unsigned char *icp, int len) goto out_drop; } - if (len > sp->mtu) { /* sp->mtu = AX25_MTU = max. PACLEN = 256 */ - msg = "oversized transmit packet!"; - goto out_drop; - } - if (p[0] > 5) { msg = "invalid KISS command"; goto out_drop; -- GitLab From 155b23e6e53475ca3b8c2a946299b4d4dd6a5a1e Mon Sep 17 00:00:00 2001 From: Denis Efremov Date: Fri, 5 Mar 2021 20:02:12 +0300 Subject: [PATCH 186/839] sun/niu: fix wrong RXMAC_BC_FRM_CNT_COUNT count RXMAC_BC_FRM_CNT_COUNT added to mp->rx_bcasts twice in a row in niu_xmac_interrupt(). Remove the second addition. Signed-off-by: Denis Efremov Signed-off-by: David S. Miller --- drivers/net/ethernet/sun/niu.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index 68695d4afacd5..707ccdd03b19e 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -3931,8 +3931,6 @@ static void niu_xmac_interrupt(struct niu *np) mp->rx_mcasts += RXMAC_MC_FRM_CNT_COUNT; if (val & XRXMAC_STATUS_RXBCAST_CNT_EXP) mp->rx_bcasts += RXMAC_BC_FRM_CNT_COUNT; - if (val & XRXMAC_STATUS_RXBCAST_CNT_EXP) - mp->rx_bcasts += RXMAC_BC_FRM_CNT_COUNT; if (val & XRXMAC_STATUS_RXHIST1_CNT_EXP) mp->rx_hist_cnt1 += RXMAC_HIST_CNT1_COUNT; if (val & XRXMAC_STATUS_RXHIST2_CNT_EXP) -- GitLab From d8861bab48b6c1fc3cdbcab8ff9d1eaea43afe7f Mon Sep 17 00:00:00 2001 From: Michael Braun Date: Thu, 4 Mar 2021 20:52:52 +0100 Subject: [PATCH 187/839] gianfar: fix jumbo packets+napi+rx overrun crash When using jumbo packets and overrunning rx queue with napi enabled, the following sequence is observed in gfar_add_rx_frag: | lstatus | | skb | t | lstatus, size, flags | first | len, data_len, *ptr | ---+--------------------------------------+-------+-----------------------+ 13 | 18002348, 9032, INTERRUPT LAST | 0 | 9600, 8000, f554c12e | 12 | 10000640, 1600, INTERRUPT | 0 | 8000, 6400, f554c12e | 11 | 10000640, 1600, INTERRUPT | 0 | 6400, 4800, f554c12e | 10 | 10000640, 1600, INTERRUPT | 0 | 4800, 3200, f554c12e | 09 | 10000640, 1600, INTERRUPT | 0 | 3200, 1600, f554c12e | 08 | 14000640, 1600, INTERRUPT FIRST | 0 | 1600, 0, f554c12e | 07 | 14000640, 1600, INTERRUPT FIRST | 1 | 0, 0, f554c12e | 06 | 1c000080, 128, INTERRUPT LAST FIRST | 1 | 0, 0, abf3bd6e | 05 | 18002348, 9032, INTERRUPT LAST | 0 | 8000, 6400, c5a57780 | 04 | 10000640, 1600, INTERRUPT | 0 | 6400, 4800, c5a57780 | 03 | 10000640, 1600, INTERRUPT | 0 | 4800, 3200, c5a57780 | 02 | 10000640, 1600, INTERRUPT | 0 | 3200, 1600, c5a57780 | 01 | 10000640, 1600, INTERRUPT | 0 | 1600, 0, c5a57780 | 00 | 14000640, 1600, INTERRUPT FIRST | 1 | 0, 0, c5a57780 | So at t=7 a new packets is started but not finished, probably due to rx overrun - but rx overrun is not indicated in the flags. Instead a new packets starts at t=8. This results in skb->len to exceed size for the LAST fragment at t=13 and thus a negative fragment size added to the skb. This then crashes: kernel BUG at include/linux/skbuff.h:2277! Oops: Exception in kernel mode, sig: 5 [#1] ... NIP [c04689f4] skb_pull+0x2c/0x48 LR [c03f62ac] gfar_clean_rx_ring+0x2e4/0x844 Call Trace: [ec4bfd38] [c06a84c4] _raw_spin_unlock_irqrestore+0x60/0x7c (unreliable) [ec4bfda8] [c03f6a44] gfar_poll_rx_sq+0x48/0xe4 [ec4bfdc8] [c048d504] __napi_poll+0x54/0x26c [ec4bfdf8] [c048d908] net_rx_action+0x138/0x2c0 [ec4bfe68] [c06a8f34] __do_softirq+0x3a4/0x4fc [ec4bfed8] [c0040150] run_ksoftirqd+0x58/0x70 [ec4bfee8] [c0066ecc] smpboot_thread_fn+0x184/0x1cc [ec4bff08] [c0062718] kthread+0x140/0x144 [ec4bff38] [c0012350] ret_from_kernel_thread+0x14/0x1c This patch fixes this by checking for computed LAST fragment size, so a negative sized fragment is never added. In order to prevent the newer rx frame from getting corrupted, the FIRST flag is checked to discard the incomplete older frame. Signed-off-by: Michael Braun Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/gianfar.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index 541de32ea6622..1cf8ef717453d 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -2390,6 +2390,10 @@ static bool gfar_add_rx_frag(struct gfar_rx_buff *rxb, u32 lstatus, if (lstatus & BD_LFLAG(RXBD_LAST)) size -= skb->len; + WARN(size < 0, "gianfar: rx fragment size underflow"); + if (size < 0) + return false; + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, rxb->page_offset + RXBUF_ALIGNMENT, size, GFAR_RXB_TRUESIZE); @@ -2552,6 +2556,17 @@ static int gfar_clean_rx_ring(struct gfar_priv_rx_q *rx_queue, if (lstatus & BD_LFLAG(RXBD_EMPTY)) break; + /* lost RXBD_LAST descriptor due to overrun */ + if (skb && + (lstatus & BD_LFLAG(RXBD_FIRST))) { + /* discard faulty buffer */ + dev_kfree_skb(skb); + skb = NULL; + rx_queue->stats.rx_dropped++; + + /* can continue normally */ + } + /* order rx buffer descriptor reads */ rmb(); -- GitLab From 3e21a10fdea3c2e4e4d1b72cb9d720256461af40 Mon Sep 17 00:00:00 2001 From: George McCollister Date: Fri, 5 Mar 2021 16:24:45 -0600 Subject: [PATCH 188/839] lan743x: trim all 4 bytes of the FCS; not just 2 Trim all 4 bytes of the received FCS; not just 2 of them. Leaving 2 bytes of the FCS on the frame breaks DSA tailing tag drivers. Fixes: a8db76d40e4d ("lan743x: boost performance on cpu archs w/o dma cache snooping") Signed-off-by: George McCollister Signed-off-by: David S. Miller --- drivers/net/ethernet/microchip/lan743x_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index dbdfabff3b003..1c3e204d727cf 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -2040,7 +2040,7 @@ lan743x_rx_trim_skb(struct sk_buff *skb, int frame_length) dev_kfree_skb_irq(skb); return NULL; } - frame_length = max_t(int, 0, frame_length - RX_HEAD_PADDING - 2); + frame_length = max_t(int, 0, frame_length - RX_HEAD_PADDING - 4); if (skb->len > frame_length) { skb->tail -= skb->len - frame_length; skb->len = frame_length; -- GitLab From a4dcfbc4ee2218abd567d81d795082d8d4afcdf6 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 5 Mar 2021 14:17:29 -0800 Subject: [PATCH 189/839] ethernet: alx: fix order of calls on resume netif_device_attach() will unpause the queues so we can't call it before __alx_open(). This went undetected until commit b0999223f224 ("alx: add ability to allocate and free alx_napi structures") but now if stack tries to xmit immediately on resume before __alx_open() we'll crash on the NAPI being null: BUG: kernel NULL pointer dereference, address: 0000000000000198 CPU: 0 PID: 12 Comm: ksoftirqd/0 Tainted: G OE 5.10.0-3-amd64 #1 Debian 5.10.13-1 Hardware name: Gigabyte Technology Co., Ltd. To be filled by O.E.M./H77-D3H, BIOS F15 11/14/2013 RIP: 0010:alx_start_xmit+0x34/0x650 [alx] Code: 41 56 41 55 41 54 55 53 48 83 ec 20 0f b7 57 7c 8b 8e b0 0b 00 00 39 ca 72 06 89 d0 31 d2 f7 f1 89 d2 48 8b 84 df RSP: 0018:ffffb09240083d28 EFLAGS: 00010297 RAX: 0000000000000000 RBX: ffffa04d80ae7800 RCX: 0000000000000004 RDX: 0000000000000000 RSI: ffffa04d80afa000 RDI: ffffa04e92e92a00 RBP: 0000000000000042 R08: 0000000000000100 R09: ffffa04ea3146700 R10: 0000000000000014 R11: 0000000000000000 R12: ffffa04e92e92100 R13: 0000000000000001 R14: ffffa04e92e92a00 R15: ffffa04e92e92a00 FS: 0000000000000000(0000) GS:ffffa0508f600000(0000) knlGS:0000000000000000 i915 0000:00:02.0: vblank wait timed out on crtc 0 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000198 CR3: 000000004460a001 CR4: 00000000001706f0 Call Trace: dev_hard_start_xmit+0xc7/0x1e0 sch_direct_xmit+0x10f/0x310 Cc: # 4.9+ Fixes: bc2bebe8de8e ("alx: remove WoL support") Reported-by: Zbynek Michl Link: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=983595 Signed-off-by: Jakub Kicinski Tested-by: Zbynek Michl Signed-off-by: David S. Miller --- drivers/net/ethernet/atheros/alx/main.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c index 9b7f1af5f5747..9e02f88645931 100644 --- a/drivers/net/ethernet/atheros/alx/main.c +++ b/drivers/net/ethernet/atheros/alx/main.c @@ -1894,13 +1894,16 @@ static int alx_resume(struct device *dev) if (!netif_running(alx->dev)) return 0; - netif_device_attach(alx->dev); rtnl_lock(); err = __alx_open(alx, true); rtnl_unlock(); + if (err) + return err; - return err; + netif_device_attach(alx->dev); + + return 0; } static SIMPLE_DEV_PM_OPS(alx_pm_ops, alx_suspend, alx_resume); -- GitLab From 4691453406c3a799fdebac83a689919c2c877f04 Mon Sep 17 00:00:00 2001 From: Muhammad Usama Anjum Date: Fri, 5 Mar 2021 23:08:16 +0500 Subject: [PATCH 190/839] kvm: x86: use NULL instead of using plain integer as pointer Sparse warnings removed: warning: Using plain integer as NULL pointer Signed-off-by: Muhammad Usama Anjum Message-Id: <20210305180816.GA488770@LEGION> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 868213ca4f984..46b0e52671bb6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10600,7 +10600,7 @@ void __user * __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, return (void __user *)hva; } else { if (!slot || !slot->npages) - return 0; + return NULL; old_npages = slot->npages; hva = slot->userspace_addr; -- GitLab From b96b0c5de685df82019e16826a282d53d86d112c Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Fri, 5 Mar 2021 18:52:47 +0000 Subject: [PATCH 191/839] KVM: arm64: nvhe: Save the SPE context early The nVHE KVM hyp drains and disables the SPE buffer, before entering the guest, as the EL1&0 translation regime is going to be loaded with that of the guest. But this operation is performed way too late, because : - The owning translation regime of the SPE buffer is transferred to EL2. (MDCR_EL2_E2PB == 0) - The guest Stage1 is loaded. Thus the flush could use the host EL1 virtual address, but use the EL2 translations instead of host EL1, for writing out any cached data. Fix this by moving the SPE buffer handling early enough. The restore path is doing the right thing. Fixes: 014c4c77aad7 ("KVM: arm64: Improve debug register save/restore flow") Cc: stable@vger.kernel.org Cc: Christoffer Dall Cc: Marc Zyngier Cc: Will Deacon Cc: Catalin Marinas Cc: Mark Rutland Cc: Alexandru Elisei Reviewed-by: Alexandru Elisei Signed-off-by: Suzuki K Poulose Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210302120345.3102874-1-suzuki.poulose@arm.com Message-Id: <20210305185254.3730990-2-maz@kernel.org> Signed-off-by: Paolo Bonzini --- arch/arm64/include/asm/kvm_hyp.h | 5 +++++ arch/arm64/kvm/hyp/nvhe/debug-sr.c | 12 ++++++++++-- arch/arm64/kvm/hyp/nvhe/switch.c | 11 ++++++++++- 3 files changed, 25 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index c0450828378b5..385bd7dd3d390 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -83,6 +83,11 @@ void sysreg_restore_guest_state_vhe(struct kvm_cpu_context *ctxt); void __debug_switch_to_guest(struct kvm_vcpu *vcpu); void __debug_switch_to_host(struct kvm_vcpu *vcpu); +#ifdef __KVM_NVHE_HYPERVISOR__ +void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu); +void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu); +#endif + void __fpsimd_save_state(struct user_fpsimd_state *fp_regs); void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs); diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c index 91a711aa8382e..f401724f12ef7 100644 --- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c +++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c @@ -58,16 +58,24 @@ static void __debug_restore_spe(u64 pmscr_el1) write_sysreg_s(pmscr_el1, SYS_PMSCR_EL1); } -void __debug_switch_to_guest(struct kvm_vcpu *vcpu) +void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu) { /* Disable and flush SPE data generation */ __debug_save_spe(&vcpu->arch.host_debug_state.pmscr_el1); +} + +void __debug_switch_to_guest(struct kvm_vcpu *vcpu) +{ __debug_switch_to_guest_common(vcpu); } -void __debug_switch_to_host(struct kvm_vcpu *vcpu) +void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu) { __debug_restore_spe(vcpu->arch.host_debug_state.pmscr_el1); +} + +void __debug_switch_to_host(struct kvm_vcpu *vcpu) +{ __debug_switch_to_host_common(vcpu); } diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index f3d0e9eca56cd..59aa1045fdafc 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -192,6 +192,14 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) pmu_switch_needed = __pmu_switch_to_guest(host_ctxt); __sysreg_save_state_nvhe(host_ctxt); + /* + * We must flush and disable the SPE buffer for nVHE, as + * the translation regime(EL1&0) is going to be loaded with + * that of the guest. And we must do this before we change the + * translation regime to EL2 (via MDCR_EL2_E2PB == 0) and + * before we load guest Stage1. + */ + __debug_save_host_buffers_nvhe(vcpu); __adjust_pc(vcpu); @@ -234,11 +242,12 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) __fpsimd_save_fpexc32(vcpu); + __debug_switch_to_host(vcpu); /* * This must come after restoring the host sysregs, since a non-VHE * system may enable SPE here and make use of the TTBRs. */ - __debug_switch_to_host(vcpu); + __debug_restore_host_buffers_nvhe(vcpu); if (pmu_switch_needed) __pmu_switch_to_host(host_ctxt); -- GitLab From 31948332d5fa392ad933f4a6a10026850649ed76 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 5 Mar 2021 18:52:48 +0000 Subject: [PATCH 192/839] KVM: arm64: Avoid corrupting vCPU context register in guest exit Commit 7db21530479f ("KVM: arm64: Restore hyp when panicking in guest context") tracks the currently running vCPU, clearing the pointer to NULL on exit from a guest. Unfortunately, the use of 'set_loaded_vcpu' clobbers x1 to point at the kvm_hyp_ctxt instead of the vCPU context, causing the subsequent RAS code to go off into the weeds when it saves the DISR assuming that the CPU context is embedded in a struct vCPU. Leave x1 alone and use x3 as a temporary register instead when clearing the vCPU on the guest exit path. Cc: Marc Zyngier Cc: Andrew Scull Cc: Fixes: 7db21530479f ("KVM: arm64: Restore hyp when panicking in guest context") Suggested-by: Quentin Perret Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210226181211.14542-1-will@kernel.org Message-Id: <20210305185254.3730990-3-maz@kernel.org> Signed-off-by: Paolo Bonzini --- arch/arm64/kvm/hyp/entry.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index b0afad7a99c6e..0c66a1d408fd7 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -146,7 +146,7 @@ SYM_INNER_LABEL(__guest_exit, SYM_L_GLOBAL) // Now restore the hyp regs restore_callee_saved_regs x2 - set_loaded_vcpu xzr, x1, x2 + set_loaded_vcpu xzr, x2, x3 alternative_if ARM64_HAS_RAS_EXTN // If we have the RAS extensions we can consume a pending error -- GitLab From c4b000c3928d4f20acef79dccf3a65ae3795e0b0 Mon Sep 17 00:00:00 2001 From: Andrew Scull Date: Fri, 5 Mar 2021 18:52:49 +0000 Subject: [PATCH 193/839] KVM: arm64: Fix nVHE hyp panic host context restore When panicking from the nVHE hyp and restoring the host context, x29 is expected to hold a pointer to the host context. This wasn't being done so fix it to make sure there's a valid pointer the host context being used. Rather than passing a boolean indicating whether or not the host context should be restored, instead pass the pointer to the host context. NULL is passed to indicate that no context should be restored. Fixes: a2e102e20fd6 ("KVM: arm64: nVHE: Handle hyp panics") Cc: stable@vger.kernel.org Signed-off-by: Andrew Scull [maz: partial rewrite to fit 5.12-rc1] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210219122406.1337626-1-ascull@google.com Message-Id: <20210305185254.3730990-4-maz@kernel.org> Signed-off-by: Paolo Bonzini --- arch/arm64/include/asm/kvm_hyp.h | 3 ++- arch/arm64/kvm/hyp/nvhe/host.S | 15 ++++++++------- arch/arm64/kvm/hyp/nvhe/switch.c | 3 +-- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 385bd7dd3d390..32ae676236b6b 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -102,7 +102,8 @@ bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt); void __noreturn hyp_panic(void); #ifdef __KVM_NVHE_HYPERVISOR__ -void __noreturn __hyp_do_panic(bool restore_host, u64 spsr, u64 elr, u64 par); +void __noreturn __hyp_do_panic(struct kvm_cpu_context *host_ctxt, u64 spsr, + u64 elr, u64 par); #endif #endif /* __ARM64_KVM_HYP_H__ */ diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S index 6585a7cbbc566..5d94584840ccf 100644 --- a/arch/arm64/kvm/hyp/nvhe/host.S +++ b/arch/arm64/kvm/hyp/nvhe/host.S @@ -71,7 +71,8 @@ SYM_FUNC_START(__host_enter) SYM_FUNC_END(__host_enter) /* - * void __noreturn __hyp_do_panic(bool restore_host, u64 spsr, u64 elr, u64 par); + * void __noreturn __hyp_do_panic(struct kvm_cpu_context *host_ctxt, u64 spsr, + * u64 elr, u64 par); */ SYM_FUNC_START(__hyp_do_panic) /* Prepare and exit to the host's panic funciton. */ @@ -82,9 +83,11 @@ SYM_FUNC_START(__hyp_do_panic) hyp_kimg_va lr, x6 msr elr_el2, lr - /* Set the panic format string. Use the, now free, LR as scratch. */ - ldr lr, =__hyp_panic_string - hyp_kimg_va lr, x6 + mov x29, x0 + + /* Load the format string into x0 and arguments into x1-7 */ + ldr x0, =__hyp_panic_string + hyp_kimg_va x0, x6 /* Load the format arguments into x1-7. */ mov x6, x3 @@ -94,9 +97,7 @@ SYM_FUNC_START(__hyp_do_panic) mrs x5, hpfar_el2 /* Enter the host, conditionally restoring the host context. */ - cmp x0, xzr - mov x0, lr - b.eq __host_enter_without_restoring + cbz x29, __host_enter_without_restoring b __host_enter_for_panic SYM_FUNC_END(__hyp_do_panic) diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 59aa1045fdafc..68ab6b4d51414 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -266,7 +266,6 @@ void __noreturn hyp_panic(void) u64 spsr = read_sysreg_el2(SYS_SPSR); u64 elr = read_sysreg_el2(SYS_ELR); u64 par = read_sysreg_par(); - bool restore_host = true; struct kvm_cpu_context *host_ctxt; struct kvm_vcpu *vcpu; @@ -280,7 +279,7 @@ void __noreturn hyp_panic(void) __sysreg_restore_state_nvhe(host_ctxt); } - __hyp_do_panic(restore_host, spsr, elr, par); + __hyp_do_panic(host_ctxt, spsr, elr, par); unreachable(); } -- GitLab From 6b5b368fccd7109b052e45af8ba1464c8d140a49 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 5 Mar 2021 18:52:50 +0000 Subject: [PATCH 194/839] KVM: arm64: Turn kvm_arm_support_pmu_v3() into a static key We currently find out about the presence of a HW PMU (or the handling of that PMU by perf, which amounts to the same thing) in a fairly roundabout way, by checking the number of counters available to perf. That's good enough for now, but we will soon need to find about about that on paths where perf is out of reach (in the world switch). Instead, let's turn kvm_arm_support_pmu_v3() into a static key. Signed-off-by: Marc Zyngier Reviewed-by: Alexandru Elisei Link: https://lore.kernel.org/r/20210209114844.3278746-2-maz@kernel.org Message-Id: <20210305185254.3730990-5-maz@kernel.org> Signed-off-by: Paolo Bonzini --- arch/arm64/kvm/perf.c | 10 ++++++++++ arch/arm64/kvm/pmu-emul.c | 10 ---------- include/kvm/arm_pmu.h | 9 +++++++-- 3 files changed, 17 insertions(+), 12 deletions(-) diff --git a/arch/arm64/kvm/perf.c b/arch/arm64/kvm/perf.c index d45b8b9a44151..739164324afed 100644 --- a/arch/arm64/kvm/perf.c +++ b/arch/arm64/kvm/perf.c @@ -11,6 +11,8 @@ #include +DEFINE_STATIC_KEY_FALSE(kvm_arm_pmu_available); + static int kvm_is_in_guest(void) { return kvm_get_running_vcpu() != NULL; @@ -48,6 +50,14 @@ static struct perf_guest_info_callbacks kvm_guest_cbs = { int kvm_perf_init(void) { + /* + * Check if HW_PERF_EVENTS are supported by checking the number of + * hardware performance counters. This could ensure the presence of + * a physical PMU and CONFIG_PERF_EVENT is selected. + */ + if (IS_ENABLED(CONFIG_ARM_PMU) && perf_num_counters() > 0) + static_branch_enable(&kvm_arm_pmu_available); + return perf_register_guest_info_callbacks(&kvm_guest_cbs); } diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index e9ec08b0b0703..e32c6e139a09d 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -823,16 +823,6 @@ u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1) return val & mask; } -bool kvm_arm_support_pmu_v3(void) -{ - /* - * Check if HW_PERF_EVENTS are supported by checking the number of - * hardware performance counters. This could ensure the presence of - * a physical PMU and CONFIG_PERF_EVENT is selected. - */ - return (perf_num_counters() > 0); -} - int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu) { if (!kvm_vcpu_has_pmu(vcpu)) diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index 8dcb3e1477bc9..6fd3cda608e4a 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -13,6 +13,13 @@ #define ARMV8_PMU_CYCLE_IDX (ARMV8_PMU_MAX_COUNTERS - 1) #define ARMV8_PMU_MAX_COUNTER_PAIRS ((ARMV8_PMU_MAX_COUNTERS + 1) >> 1) +DECLARE_STATIC_KEY_FALSE(kvm_arm_pmu_available); + +static __always_inline bool kvm_arm_support_pmu_v3(void) +{ + return static_branch_likely(&kvm_arm_pmu_available); +} + #ifdef CONFIG_HW_PERF_EVENTS struct kvm_pmc { @@ -47,7 +54,6 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val); void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val); void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, u64 select_idx); -bool kvm_arm_support_pmu_v3(void); int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, @@ -87,7 +93,6 @@ static inline void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) {} static inline void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) {} static inline void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, u64 select_idx) {} -static inline bool kvm_arm_support_pmu_v3(void) { return false; } static inline int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { -- GitLab From f27647b588c13647a60074b5a8dd39a86d919a1d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 5 Mar 2021 18:52:51 +0000 Subject: [PATCH 195/839] KVM: arm64: Don't access PMSELR_EL0/PMUSERENR_EL0 when no PMU is available When running under a nesting hypervisor, it isn't guaranteed that the virtual HW will include a PMU. In which case, let's not try to access the PMU registers in the world switch, as that'd be deadly. Reported-by: Andre Przywara Signed-off-by: Marc Zyngier Reviewed-by: Alexandru Elisei Link: https://lore.kernel.org/r/20210209114844.3278746-3-maz@kernel.org Message-Id: <20210305185254.3730990-6-maz@kernel.org> Signed-off-by: Paolo Bonzini --- arch/arm64/kernel/image-vars.h | 3 +++ arch/arm64/kvm/hyp/include/hyp/switch.h | 9 ++++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index 23f1a557bd9f0..5aa9ed1e9ec61 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -101,6 +101,9 @@ KVM_NVHE_ALIAS(__stop___kvm_ex_table); /* Array containing bases of nVHE per-CPU memory regions. */ KVM_NVHE_ALIAS(kvm_arm_hyp_percpu_base); +/* PMU available static key */ +KVM_NVHE_ALIAS(kvm_arm_pmu_available); + #endif /* CONFIG_KVM */ #endif /* __ARM64_KERNEL_IMAGE_VARS_H */ diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 54f4860cd87c0..6c1f51f25eb34 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -90,15 +90,18 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu) * counter, which could make a PMXEVCNTR_EL0 access UNDEF at * EL1 instead of being trapped to EL2. */ - write_sysreg(0, pmselr_el0); - write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0); + if (kvm_arm_support_pmu_v3()) { + write_sysreg(0, pmselr_el0); + write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0); + } write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); } static inline void __deactivate_traps_common(void) { write_sysreg(0, hstr_el2); - write_sysreg(0, pmuserenr_el0); + if (kvm_arm_support_pmu_v3()) + write_sysreg(0, pmuserenr_el0); } static inline void ___activate_traps(struct kvm_vcpu *vcpu) -- GitLab From b9d699e2694d032aa8ecc15141f698ccb050dc95 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 5 Mar 2021 18:52:52 +0000 Subject: [PATCH 196/839] KVM: arm64: Rename __vgic_v3_get_ich_vtr_el2() to __vgic_v3_get_gic_config() As we are about to report a bit more information to the rest of the kernel, rename __vgic_v3_get_ich_vtr_el2() to the more explicit __vgic_v3_get_gic_config(). No functional change. Tested-by: Shameer Kolothum Signed-off-by: Marc Zyngier Message-Id: <20210305185254.3730990-7-maz@kernel.org> Signed-off-by: Paolo Bonzini --- arch/arm64/include/asm/kvm_asm.h | 4 ++-- arch/arm64/kvm/hyp/nvhe/hyp-main.c | 6 +++--- arch/arm64/kvm/hyp/vgic-v3-sr.c | 7 ++++++- arch/arm64/kvm/vgic/vgic-v3.c | 4 +++- 4 files changed, 14 insertions(+), 7 deletions(-) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 22d933e9b59e5..9c0e396dd03ff 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -50,7 +50,7 @@ #define __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_local_vmid 5 #define __KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff 6 #define __KVM_HOST_SMCCC_FUNC___kvm_enable_ssbs 7 -#define __KVM_HOST_SMCCC_FUNC___vgic_v3_get_ich_vtr_el2 8 +#define __KVM_HOST_SMCCC_FUNC___vgic_v3_get_gic_config 8 #define __KVM_HOST_SMCCC_FUNC___vgic_v3_read_vmcr 9 #define __KVM_HOST_SMCCC_FUNC___vgic_v3_write_vmcr 10 #define __KVM_HOST_SMCCC_FUNC___vgic_v3_init_lrs 11 @@ -192,7 +192,7 @@ extern void __kvm_timer_set_cntvoff(u64 cntvoff); extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); -extern u64 __vgic_v3_get_ich_vtr_el2(void); +extern u64 __vgic_v3_get_gic_config(void); extern u64 __vgic_v3_read_vmcr(void); extern void __vgic_v3_write_vmcr(u32 vmcr); extern void __vgic_v3_init_lrs(void); diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index f012f8665ecc1..8f129968204ef 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -67,9 +67,9 @@ static void handle___kvm_enable_ssbs(struct kvm_cpu_context *host_ctxt) write_sysreg_el2(tmp, SYS_SCTLR); } -static void handle___vgic_v3_get_ich_vtr_el2(struct kvm_cpu_context *host_ctxt) +static void handle___vgic_v3_get_gic_config(struct kvm_cpu_context *host_ctxt) { - cpu_reg(host_ctxt, 1) = __vgic_v3_get_ich_vtr_el2(); + cpu_reg(host_ctxt, 1) = __vgic_v3_get_gic_config(); } static void handle___vgic_v3_read_vmcr(struct kvm_cpu_context *host_ctxt) @@ -118,7 +118,7 @@ static const hcall_t host_hcall[] = { HANDLE_FUNC(__kvm_tlb_flush_local_vmid), HANDLE_FUNC(__kvm_timer_set_cntvoff), HANDLE_FUNC(__kvm_enable_ssbs), - HANDLE_FUNC(__vgic_v3_get_ich_vtr_el2), + HANDLE_FUNC(__vgic_v3_get_gic_config), HANDLE_FUNC(__vgic_v3_read_vmcr), HANDLE_FUNC(__vgic_v3_write_vmcr), HANDLE_FUNC(__vgic_v3_init_lrs), diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c index 80406f463c28f..005daa0c9dd78 100644 --- a/arch/arm64/kvm/hyp/vgic-v3-sr.c +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -405,7 +405,12 @@ void __vgic_v3_init_lrs(void) __gic_v3_set_lr(0, i); } -u64 __vgic_v3_get_ich_vtr_el2(void) +/* + * Return the GIC CPU configuration: + * - [31:0] ICH_VTR_EL2 + * - [63:32] RES0 + */ +u64 __vgic_v3_get_gic_config(void) { return read_gicreg(ICH_VTR_EL2); } diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index 52915b3423514..c3e6c3fd333b5 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -574,9 +574,11 @@ early_param("kvm-arm.vgic_v4_enable", early_gicv4_enable); */ int vgic_v3_probe(const struct gic_kvm_info *info) { - u32 ich_vtr_el2 = kvm_call_hyp_ret(__vgic_v3_get_ich_vtr_el2); + u64 ich_vtr_el2 = kvm_call_hyp_ret(__vgic_v3_get_gic_config); int ret; + ich_vtr_el2 = (u32)ich_vtr_el2; + /* * The ListRegs field is 5 bits, but there is an architectural * maximum of 16 list registers. Just ignore bit 4... -- GitLab From 9739f6ef053f104a997165701c6e15582c4307ee Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 5 Mar 2021 18:52:53 +0000 Subject: [PATCH 197/839] KVM: arm64: Workaround firmware wrongly advertising GICv2-on-v3 compatibility It looks like we have broken firmware out there that wrongly advertises a GICv2 compatibility interface, despite the CPUs not being able to deal with it. To work around this, check that the CPU initialising KVM is actually able to switch to MMIO instead of system registers, and use that as a precondition to enable GICv2 compatibility in KVM. Note that the detection happens on a single CPU. If the firmware is lying *and* that the CPUs are asymetric, all hope is lost anyway. Reported-by: Shameerali Kolothum Thodi Tested-by: Shameer Kolothum Signed-off-by: Marc Zyngier Message-Id: <20210305185254.3730990-8-maz@kernel.org> Signed-off-by: Paolo Bonzini --- arch/arm64/kvm/hyp/vgic-v3-sr.c | 35 +++++++++++++++++++++++++++++++-- arch/arm64/kvm/vgic/vgic-v3.c | 8 ++++++-- 2 files changed, 39 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c index 005daa0c9dd78..ee3682b9873c4 100644 --- a/arch/arm64/kvm/hyp/vgic-v3-sr.c +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -408,11 +408,42 @@ void __vgic_v3_init_lrs(void) /* * Return the GIC CPU configuration: * - [31:0] ICH_VTR_EL2 - * - [63:32] RES0 + * - [62:32] RES0 + * - [63] MMIO (GICv2) capable */ u64 __vgic_v3_get_gic_config(void) { - return read_gicreg(ICH_VTR_EL2); + u64 val, sre = read_gicreg(ICC_SRE_EL1); + unsigned long flags = 0; + + /* + * To check whether we have a MMIO-based (GICv2 compatible) + * CPU interface, we need to disable the system register + * view. To do that safely, we have to prevent any interrupt + * from firing (which would be deadly). + * + * Note that this only makes sense on VHE, as interrupts are + * already masked for nVHE as part of the exception entry to + * EL2. + */ + if (has_vhe()) + flags = local_daif_save(); + + write_gicreg(0, ICC_SRE_EL1); + isb(); + + val = read_gicreg(ICC_SRE_EL1); + + write_gicreg(sre, ICC_SRE_EL1); + isb(); + + if (has_vhe()) + local_daif_restore(flags); + + val = (val & ICC_SRE_EL1_SRE) ? 0 : (1ULL << 63); + val |= read_gicreg(ICH_VTR_EL2); + + return val; } u64 __vgic_v3_read_vmcr(void) diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index c3e6c3fd333b5..6f530925a2314 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -575,8 +575,10 @@ early_param("kvm-arm.vgic_v4_enable", early_gicv4_enable); int vgic_v3_probe(const struct gic_kvm_info *info) { u64 ich_vtr_el2 = kvm_call_hyp_ret(__vgic_v3_get_gic_config); + bool has_v2; int ret; + has_v2 = ich_vtr_el2 >> 63; ich_vtr_el2 = (u32)ich_vtr_el2; /* @@ -596,13 +598,15 @@ int vgic_v3_probe(const struct gic_kvm_info *info) gicv4_enable ? "en" : "dis"); } + kvm_vgic_global_state.vcpu_base = 0; + if (!info->vcpu.start) { kvm_info("GICv3: no GICV resource entry\n"); - kvm_vgic_global_state.vcpu_base = 0; + } else if (!has_v2) { + pr_warn(FW_BUG "CPU interface incapable of MMIO access\n"); } else if (!PAGE_ALIGNED(info->vcpu.start)) { pr_warn("GICV physical address 0x%llx not page aligned\n", (unsigned long long)info->vcpu.start); - kvm_vgic_global_state.vcpu_base = 0; } else { kvm_vgic_global_state.vcpu_base = info->vcpu.start; kvm_vgic_global_state.can_emulate_gicv2 = true; -- GitLab From 357ad203d45c0f9d76a8feadbd5a1c5d460c638b Mon Sep 17 00:00:00 2001 From: Jia He Date: Fri, 5 Mar 2021 18:52:54 +0000 Subject: [PATCH 198/839] KVM: arm64: Fix range alignment when walking page tables When walking the page tables at a given level, and if the start address for the range isn't aligned for that level, we propagate the misalignment on each iteration at that level. This results in the walker ignoring a number of entries (depending on the original misalignment) on each subsequent iteration. Properly aligning the address before the next iteration addresses this issue. Cc: stable@vger.kernel.org Reported-by: Howard Zhang Acked-by: Will Deacon Signed-off-by: Jia He Fixes: b1e57de62cfb ("KVM: arm64: Add stand-alone page-table walker infrastructure") [maz: rewrite commit message] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210303024225.2591-1-justin.he@arm.com Message-Id: <20210305185254.3730990-9-maz@kernel.org> Signed-off-by: Paolo Bonzini --- arch/arm64/kvm/hyp/pgtable.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 4d177ce1d536f..926fc07074f57 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -223,6 +223,7 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data, goto out; if (!table) { + data->addr = ALIGN_DOWN(data->addr, kvm_granule_size(level)); data->addr += kvm_granule_size(level); goto out; } -- GitLab From 56b26497bb4b7ff970612dc25a8a008c34463f7b Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sat, 6 Mar 2021 10:50:18 +0100 Subject: [PATCH 199/839] ALSA: hda/conexant: Add quirk for mute LED control on HP ZBook G5 The mute and mic-mute LEDs on HP ZBook Studio G5 are controlled via GPIO bits 0x10 and 0x20, respectively, and we need the extra setup for those. As the similar code is already present for other HP models but with different GPIO pins, this patch factors out the common helper code and applies those GPIO values for each model. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=211893 Cc: Link: https://lore.kernel.org/r/20210306095018.11746-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_conexant.c | 62 ++++++++++++++++++++++++---------- 1 file changed, 45 insertions(+), 17 deletions(-) diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index f2aa226d1373d..c20dad46a7c90 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -149,6 +149,21 @@ static int cx_auto_vmaster_mute_led(struct led_classdev *led_cdev, return 0; } +static void cxt_init_gpio_led(struct hda_codec *codec) +{ + struct conexant_spec *spec = codec->spec; + unsigned int mask = spec->gpio_mute_led_mask | spec->gpio_mic_led_mask; + + if (mask) { + snd_hda_codec_write(codec, 0x01, 0, AC_VERB_SET_GPIO_MASK, + mask); + snd_hda_codec_write(codec, 0x01, 0, AC_VERB_SET_GPIO_DIRECTION, + mask); + snd_hda_codec_write(codec, 0x01, 0, AC_VERB_SET_GPIO_DATA, + spec->gpio_led); + } +} + static int cx_auto_init(struct hda_codec *codec) { struct conexant_spec *spec = codec->spec; @@ -156,6 +171,7 @@ static int cx_auto_init(struct hda_codec *codec) if (!spec->dynamic_eapd) cx_auto_turn_eapd(codec, spec->num_eapds, spec->eapds, true); + cxt_init_gpio_led(codec); snd_hda_apply_fixup(codec, HDA_FIXUP_ACT_INIT); return 0; @@ -215,6 +231,7 @@ enum { CXT_FIXUP_HP_SPECTRE, CXT_FIXUP_HP_GATE_MIC, CXT_FIXUP_MUTE_LED_GPIO, + CXT_FIXUP_HP_ZBOOK_MUTE_LED, CXT_FIXUP_HEADSET_MIC, CXT_FIXUP_HP_MIC_NO_PRESENCE, }; @@ -654,31 +671,36 @@ static int cxt_gpio_micmute_update(struct led_classdev *led_cdev, return 0; } - -static void cxt_fixup_mute_led_gpio(struct hda_codec *codec, - const struct hda_fixup *fix, int action) +static void cxt_setup_mute_led(struct hda_codec *codec, + unsigned int mute, unsigned int mic_mute) { struct conexant_spec *spec = codec->spec; - static const struct hda_verb gpio_init[] = { - { 0x01, AC_VERB_SET_GPIO_MASK, 0x03 }, - { 0x01, AC_VERB_SET_GPIO_DIRECTION, 0x03 }, - {} - }; - if (action == HDA_FIXUP_ACT_PRE_PROBE) { + spec->gpio_led = 0; + spec->mute_led_polarity = 0; + if (mute) { snd_hda_gen_add_mute_led_cdev(codec, cxt_gpio_mute_update); - spec->gpio_led = 0; - spec->mute_led_polarity = 0; - spec->gpio_mute_led_mask = 0x01; - spec->gpio_mic_led_mask = 0x02; + spec->gpio_mute_led_mask = mute; + } + if (mic_mute) { snd_hda_gen_add_micmute_led_cdev(codec, cxt_gpio_micmute_update); + spec->gpio_mic_led_mask = mic_mute; } - snd_hda_add_verbs(codec, gpio_init); - if (spec->gpio_led) - snd_hda_codec_write(codec, 0x01, 0, AC_VERB_SET_GPIO_DATA, - spec->gpio_led); } +static void cxt_fixup_mute_led_gpio(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + if (action == HDA_FIXUP_ACT_PRE_PROBE) + cxt_setup_mute_led(codec, 0x01, 0x02); +} + +static void cxt_fixup_hp_zbook_mute_led(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + if (action == HDA_FIXUP_ACT_PRE_PROBE) + cxt_setup_mute_led(codec, 0x10, 0x20); +} /* ThinkPad X200 & co with cxt5051 */ static const struct hda_pintbl cxt_pincfg_lenovo_x200[] = { @@ -839,6 +861,10 @@ static const struct hda_fixup cxt_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = cxt_fixup_mute_led_gpio, }, + [CXT_FIXUP_HP_ZBOOK_MUTE_LED] = { + .type = HDA_FIXUP_FUNC, + .v.func = cxt_fixup_hp_zbook_mute_led, + }, [CXT_FIXUP_HEADSET_MIC] = { .type = HDA_FIXUP_FUNC, .v.func = cxt_fixup_headset_mic, @@ -917,6 +943,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = { SND_PCI_QUIRK(0x103c, 0x8299, "HP 800 G3 SFF", CXT_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x829a, "HP 800 G3 DM", CXT_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x8402, "HP ProBook 645 G4", CXT_FIXUP_MUTE_LED_GPIO), + SND_PCI_QUIRK(0x103c, 0x8427, "HP ZBook Studio G5", CXT_FIXUP_HP_ZBOOK_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x8455, "HP Z2 G4", CXT_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x8456, "HP Z2 G4 SFF", CXT_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x8457, "HP Z2 G4 mini", CXT_FIXUP_HP_MIC_NO_PRESENCE), @@ -956,6 +983,7 @@ static const struct hda_model_fixup cxt5066_fixup_models[] = { { .id = CXT_FIXUP_MUTE_LED_EAPD, .name = "mute-led-eapd" }, { .id = CXT_FIXUP_HP_DOCK, .name = "hp-dock" }, { .id = CXT_FIXUP_MUTE_LED_GPIO, .name = "mute-led-gpio" }, + { .id = CXT_FIXUP_HP_ZBOOK_MUTE_LED, .name = "hp-zbook-mute-led" }, { .id = CXT_FIXUP_HP_MIC_NO_PRESENCE, .name = "hp-mic-fix" }, {} }; -- GitLab From 8a6edb5257e2a84720fe78cb179eca58ba76126f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 13 Feb 2021 13:10:35 +0100 Subject: [PATCH 200/839] sched: Fix migration_cpu_stop() requeueing When affine_move_task(p) is called on a running task @p, which is not otherwise already changing affinity, we'll first set p->migration_pending and then do: stop_one_cpu(cpu_of_rq(rq), migration_cpu_stop, &arg); This then gets us to migration_cpu_stop() running on the CPU that was previously running our victim task @p. If we find that our task is no longer on that runqueue (this can happen because of a concurrent migration due to load-balance etc.), then we'll end up at the: } else if (dest_cpu < 1 || pending) { branch. Which we'll take because we set pending earlier. Here we first check if the task @p has already satisfied the affinity constraints, if so we bail early [A]. Otherwise we'll reissue migration_cpu_stop() onto the CPU that is now hosting our task @p: stop_one_cpu_nowait(cpu_of(rq), migration_cpu_stop, &pending->arg, &pending->stop_work); Except, we've never initialized pending->arg, which will be all 0s. This then results in running migration_cpu_stop() on the next CPU with arg->p == NULL, which gives the by now obvious result of fireworks. The cure is to change affine_move_task() to always use pending->arg, furthermore we can use the exact same pattern as the SCA_MIGRATE_ENABLE case, since we'll block on the pending->done completion anyway, no point in adding yet another completion in stop_one_cpu(). This then gives a clear distinction between the two migration_cpu_stop() use cases: - sched_exec() / migrate_task_to() : arg->pending == NULL - affine_move_task() : arg->pending != NULL; And we can have it ignore p->migration_pending when !arg->pending. Any stop work from sched_exec() / migrate_task_to() is in addition to stop works from affine_move_task(), which will be sufficient to issue the completion. Fixes: 6d337eab041d ("sched: Fix migrate_disable() vs set_cpus_allowed_ptr()") Cc: stable@kernel.org Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Reviewed-by: Valentin Schneider Link: https://lkml.kernel.org/r/20210224131355.357743989@infradead.org --- kernel/sched/core.c | 39 ++++++++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index ca2bb629595f6..79ddba55b123b 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1922,6 +1922,24 @@ static int migration_cpu_stop(void *data) rq_lock(rq, &rf); pending = p->migration_pending; + if (pending && !arg->pending) { + /* + * This happens from sched_exec() and migrate_task_to(), + * neither of them care about pending and just want a task to + * maybe move about. + * + * Even if there is a pending, we can ignore it, since + * affine_move_task() will have it's own stop_work's in flight + * which will manage the completion. + * + * Notably, pending doesn't need to match arg->pending. This can + * happen when tripple concurrent affine_move_task() first sets + * pending, then clears pending and eventually sets another + * pending. + */ + pending = NULL; + } + /* * If task_rq(p) != rq, it cannot be migrated here, because we're * holding rq->lock, if p->on_rq == 0 it cannot get enqueued because @@ -2194,10 +2212,6 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag int dest_cpu, unsigned int flags) { struct set_affinity_pending my_pending = { }, *pending = NULL; - struct migration_arg arg = { - .task = p, - .dest_cpu = dest_cpu, - }; bool complete = false; /* Can the task run on the task's current CPU? If so, we're done */ @@ -2235,6 +2249,12 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag /* Install the request */ refcount_set(&my_pending.refs, 1); init_completion(&my_pending.done); + my_pending.arg = (struct migration_arg) { + .task = p, + .dest_cpu = -1, /* any */ + .pending = &my_pending, + }; + p->migration_pending = &my_pending; } else { pending = p->migration_pending; @@ -2265,12 +2285,6 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag p->migration_flags &= ~MDF_PUSH; task_rq_unlock(rq, p, rf); - pending->arg = (struct migration_arg) { - .task = p, - .dest_cpu = -1, - .pending = pending, - }; - stop_one_cpu_nowait(cpu_of(rq), migration_cpu_stop, &pending->arg, &pending->stop_work); @@ -2283,8 +2297,11 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag * is_migration_disabled(p) checks to the stopper, which will * run on the same CPU as said p. */ + refcount_inc(&pending->refs); /* pending->{arg,stop_work} */ task_rq_unlock(rq, p, rf); - stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); + + stop_one_cpu_nowait(cpu_of(rq), migration_cpu_stop, + &pending->arg, &pending->stop_work); } else { -- GitLab From c20cf065d4a619d394d23290093b1002e27dff86 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 24 Feb 2021 11:50:39 +0100 Subject: [PATCH 201/839] sched: Simplify migration_cpu_stop() When affine_move_task() issues a migration_cpu_stop(), the purpose of that function is to complete that @pending, not any random other p->migration_pending that might have gotten installed since. This realization much simplifies migration_cpu_stop() and allows further necessary steps to fix all this as it provides the guarantee that @pending's stopper will complete @pending (and not some random other @pending). Fixes: 6d337eab041d ("sched: Fix migrate_disable() vs set_cpus_allowed_ptr()") Cc: stable@kernel.org Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Reviewed-by: Valentin Schneider Link: https://lkml.kernel.org/r/20210224131355.430014682@infradead.org --- kernel/sched/core.c | 56 +++++++-------------------------------------- 1 file changed, 8 insertions(+), 48 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 79ddba55b123b..088e8f492271b 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1898,8 +1898,8 @@ static struct rq *__migrate_task(struct rq *rq, struct rq_flags *rf, */ static int migration_cpu_stop(void *data) { - struct set_affinity_pending *pending; struct migration_arg *arg = data; + struct set_affinity_pending *pending = arg->pending; struct task_struct *p = arg->task; int dest_cpu = arg->dest_cpu; struct rq *rq = this_rq(); @@ -1921,25 +1921,6 @@ static int migration_cpu_stop(void *data) raw_spin_lock(&p->pi_lock); rq_lock(rq, &rf); - pending = p->migration_pending; - if (pending && !arg->pending) { - /* - * This happens from sched_exec() and migrate_task_to(), - * neither of them care about pending and just want a task to - * maybe move about. - * - * Even if there is a pending, we can ignore it, since - * affine_move_task() will have it's own stop_work's in flight - * which will manage the completion. - * - * Notably, pending doesn't need to match arg->pending. This can - * happen when tripple concurrent affine_move_task() first sets - * pending, then clears pending and eventually sets another - * pending. - */ - pending = NULL; - } - /* * If task_rq(p) != rq, it cannot be migrated here, because we're * holding rq->lock, if p->on_rq == 0 it cannot get enqueued because @@ -1950,31 +1931,20 @@ static int migration_cpu_stop(void *data) goto out; if (pending) { - p->migration_pending = NULL; + if (p->migration_pending == pending) + p->migration_pending = NULL; complete = true; } - /* migrate_enable() -- we must not race against SCA */ - if (dest_cpu < 0) { - /* - * When this was migrate_enable() but we no longer - * have a @pending, a concurrent SCA 'fixed' things - * and we should be valid again. Nothing to do. - */ - if (!pending) { - WARN_ON_ONCE(!cpumask_test_cpu(task_cpu(p), &p->cpus_mask)); - goto out; - } - + if (dest_cpu < 0) dest_cpu = cpumask_any_distribute(&p->cpus_mask); - } if (task_on_rq_queued(p)) rq = __migrate_task(rq, &rf, p, dest_cpu); else p->wake_cpu = dest_cpu; - } else if (dest_cpu < 0 || pending) { + } else if (pending) { /* * This happens when we get migrated between migrate_enable()'s * preempt_enable() and scheduling the stopper task. At that @@ -1989,22 +1959,13 @@ static int migration_cpu_stop(void *data) * ->pi_lock, so the allowed mask is stable - if it got * somewhere allowed, we're done. */ - if (pending && cpumask_test_cpu(task_cpu(p), p->cpus_ptr)) { - p->migration_pending = NULL; + if (cpumask_test_cpu(task_cpu(p), p->cpus_ptr)) { + if (p->migration_pending == pending) + p->migration_pending = NULL; complete = true; goto out; } - /* - * When this was migrate_enable() but we no longer have an - * @pending, a concurrent SCA 'fixed' things and we should be - * valid again. Nothing to do. - */ - if (!pending) { - WARN_ON_ONCE(!cpumask_test_cpu(task_cpu(p), &p->cpus_mask)); - goto out; - } - /* * When migrate_enable() hits a rq mis-match we can't reliably * determine is_migration_disabled() and so have to chase after @@ -2022,7 +1983,6 @@ out: complete_all(&pending->done); /* For pending->{arg,stop_work} */ - pending = arg->pending; if (pending && refcount_dec_and_test(&pending->refs)) wake_up_var(&pending->refs); -- GitLab From 58b1a45086b5f80f2b2842aa7ed0da51a64a302b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 24 Feb 2021 11:15:23 +0100 Subject: [PATCH 202/839] sched: Collate affine_move_task() stoppers The SCA_MIGRATE_ENABLE and task_running() cases are almost identical, collapse them to avoid further duplication. Fixes: 6d337eab041d ("sched: Fix migrate_disable() vs set_cpus_allowed_ptr()") Cc: stable@kernel.org Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Reviewed-by: Valentin Schneider Link: https://lkml.kernel.org/r/20210224131355.500108964@infradead.org --- kernel/sched/core.c | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 088e8f492271b..84b657f056258 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2239,30 +2239,23 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag return -EINVAL; } - if (flags & SCA_MIGRATE_ENABLE) { - - refcount_inc(&pending->refs); /* pending->{arg,stop_work} */ - p->migration_flags &= ~MDF_PUSH; - task_rq_unlock(rq, p, rf); - - stop_one_cpu_nowait(cpu_of(rq), migration_cpu_stop, - &pending->arg, &pending->stop_work); - - return 0; - } - if (task_running(rq, p) || p->state == TASK_WAKING) { /* - * Lessen races (and headaches) by delegating - * is_migration_disabled(p) checks to the stopper, which will - * run on the same CPU as said p. + * MIGRATE_ENABLE gets here because 'p == current', but for + * anything else we cannot do is_migration_disabled(), punt + * and have the stopper function handle it all race-free. */ + refcount_inc(&pending->refs); /* pending->{arg,stop_work} */ + if (flags & SCA_MIGRATE_ENABLE) + p->migration_flags &= ~MDF_PUSH; task_rq_unlock(rq, p, rf); stop_one_cpu_nowait(cpu_of(rq), migration_cpu_stop, &pending->arg, &pending->stop_work); + if (flags & SCA_MIGRATE_ENABLE) + return 0; } else { if (!is_migration_disabled(p)) { -- GitLab From 3f1bc119cd7fc987c8ed25ffb717f99403bb308c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 24 Feb 2021 11:21:35 +0100 Subject: [PATCH 203/839] sched: Optimize migration_cpu_stop() When the purpose of migration_cpu_stop() is to migrate the task to 'any' valid CPU, don't migrate the task when it's already running on a valid CPU. Fixes: 6d337eab041d ("sched: Fix migrate_disable() vs set_cpus_allowed_ptr()") Cc: stable@kernel.org Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Reviewed-by: Valentin Schneider Link: https://lkml.kernel.org/r/20210224131355.569238629@infradead.org --- kernel/sched/core.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 84b657f056258..ac05afbd982ee 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1936,14 +1936,25 @@ static int migration_cpu_stop(void *data) complete = true; } - if (dest_cpu < 0) + if (dest_cpu < 0) { + if (cpumask_test_cpu(task_cpu(p), &p->cpus_mask)) + goto out; + dest_cpu = cpumask_any_distribute(&p->cpus_mask); + } if (task_on_rq_queued(p)) rq = __migrate_task(rq, &rf, p, dest_cpu); else p->wake_cpu = dest_cpu; + /* + * XXX __migrate_task() can fail, at which point we might end + * up running on a dodgy CPU, AFAICT this can only happen + * during CPU hotplug, at which point we'll get pushed out + * anyway, so it's probably not a big deal. + */ + } else if (pending) { /* * This happens when we get migrated between migrate_enable()'s -- GitLab From 9e81889c7648d48dd5fe13f41cbc99f3c362484a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 24 Feb 2021 11:31:09 +0100 Subject: [PATCH 204/839] sched: Fix affine_move_task() self-concurrency Consider: sched_setaffinity(p, X); sched_setaffinity(p, Y); Then the first will install p->migration_pending = &my_pending; and issue stop_one_cpu_nowait(pending); and the second one will read p->migration_pending and _also_ issue: stop_one_cpu_nowait(pending), the _SAME_ @pending. This causes stopper list corruption. Add set_affinity_pending::stop_pending, to indicate if a stopper is in progress. Fixes: 6d337eab041d ("sched: Fix migrate_disable() vs set_cpus_allowed_ptr()") Cc: stable@kernel.org Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Reviewed-by: Valentin Schneider Link: https://lkml.kernel.org/r/20210224131355.649146419@infradead.org --- kernel/sched/core.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index ac05afbd982ee..4e4d100c1f7a0 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1864,6 +1864,7 @@ struct migration_arg { struct set_affinity_pending { refcount_t refs; + unsigned int stop_pending; struct completion done; struct cpu_stop_work stop_work; struct migration_arg arg; @@ -1982,12 +1983,15 @@ static int migration_cpu_stop(void *data) * determine is_migration_disabled() and so have to chase after * it. */ + WARN_ON_ONCE(!pending->stop_pending); task_rq_unlock(rq, p, &rf); stop_one_cpu_nowait(task_cpu(p), migration_cpu_stop, &pending->arg, &pending->stop_work); return 0; } out: + if (pending) + pending->stop_pending = false; task_rq_unlock(rq, p, &rf); if (complete) @@ -2183,7 +2187,7 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag int dest_cpu, unsigned int flags) { struct set_affinity_pending my_pending = { }, *pending = NULL; - bool complete = false; + bool stop_pending, complete = false; /* Can the task run on the task's current CPU? If so, we're done */ if (cpumask_test_cpu(task_cpu(p), &p->cpus_mask)) { @@ -2256,14 +2260,19 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag * anything else we cannot do is_migration_disabled(), punt * and have the stopper function handle it all race-free. */ + stop_pending = pending->stop_pending; + if (!stop_pending) + pending->stop_pending = true; refcount_inc(&pending->refs); /* pending->{arg,stop_work} */ if (flags & SCA_MIGRATE_ENABLE) p->migration_flags &= ~MDF_PUSH; task_rq_unlock(rq, p, rf); - stop_one_cpu_nowait(cpu_of(rq), migration_cpu_stop, - &pending->arg, &pending->stop_work); + if (!stop_pending) { + stop_one_cpu_nowait(cpu_of(rq), migration_cpu_stop, + &pending->arg, &pending->stop_work); + } if (flags & SCA_MIGRATE_ENABLE) return 0; -- GitLab From 50caf9c14b1498c90cf808dbba2ca29bd32ccba4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 24 Feb 2021 11:42:08 +0100 Subject: [PATCH 205/839] sched: Simplify set_affinity_pending refcounts Now that we have set_affinity_pending::stop_pending to indicate if a stopper is in progress, and we have the guarantee that if that stopper exists, it will (eventually) complete our @pending we can simplify the refcount scheme by no longer counting the stopper thread. Fixes: 6d337eab041d ("sched: Fix migrate_disable() vs set_cpus_allowed_ptr()") Cc: stable@kernel.org Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Reviewed-by: Valentin Schneider Link: https://lkml.kernel.org/r/20210224131355.724130207@infradead.org --- kernel/sched/core.c | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 4e4d100c1f7a0..98191218d891d 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1862,6 +1862,10 @@ struct migration_arg { struct set_affinity_pending *pending; }; +/* + * @refs: number of wait_for_completion() + * @stop_pending: is @stop_work in use + */ struct set_affinity_pending { refcount_t refs; unsigned int stop_pending; @@ -1997,10 +2001,6 @@ out: if (complete) complete_all(&pending->done); - /* For pending->{arg,stop_work} */ - if (pending && refcount_dec_and_test(&pending->refs)) - wake_up_var(&pending->refs); - return 0; } @@ -2199,12 +2199,16 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag push_task = get_task_struct(p); } + /* + * If there are pending waiters, but no pending stop_work, + * then complete now. + */ pending = p->migration_pending; - if (pending) { - refcount_inc(&pending->refs); + if (pending && !pending->stop_pending) { p->migration_pending = NULL; complete = true; } + task_rq_unlock(rq, p, rf); if (push_task) { @@ -2213,7 +2217,7 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag } if (complete) - goto do_complete; + complete_all(&pending->done); return 0; } @@ -2264,9 +2268,9 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag if (!stop_pending) pending->stop_pending = true; - refcount_inc(&pending->refs); /* pending->{arg,stop_work} */ if (flags & SCA_MIGRATE_ENABLE) p->migration_flags &= ~MDF_PUSH; + task_rq_unlock(rq, p, rf); if (!stop_pending) { @@ -2282,12 +2286,13 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag if (task_on_rq_queued(p)) rq = move_queued_task(rq, rf, p, dest_cpu); - p->migration_pending = NULL; - complete = true; + if (!pending->stop_pending) { + p->migration_pending = NULL; + complete = true; + } } task_rq_unlock(rq, p, rf); -do_complete: if (complete) complete_all(&pending->done); } @@ -2295,7 +2300,7 @@ do_complete: wait_for_completion(&pending->done); if (refcount_dec_and_test(&pending->refs)) - wake_up_var(&pending->refs); + wake_up_var(&pending->refs); /* No UaF, just an address */ /* * Block the original owner of &pending until all subsequent callers @@ -2303,6 +2308,9 @@ do_complete: */ wait_var_event(&my_pending.refs, !refcount_read(&my_pending.refs)); + /* ARGH */ + WARN_ON_ONCE(my_pending.stop_pending); + return 0; } -- GitLab From ce29ddc47b91f97e7f69a0fb7cbb5845f52a9825 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Wed, 17 Feb 2021 11:56:51 -0500 Subject: [PATCH 206/839] sched/membarrier: fix missing local execution of ipi_sync_rq_state() The function sync_runqueues_membarrier_state() should copy the membarrier state from the @mm received as parameter to each runqueue currently running tasks using that mm. However, the use of smp_call_function_many() skips the current runqueue, which is unintended. Replace by a call to on_each_cpu_mask(). Fixes: 227a4aadc75b ("sched/membarrier: Fix p->mm->membarrier_state racy load") Reported-by: Nadav Amit Signed-off-by: Mathieu Desnoyers Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Cc: stable@vger.kernel.org # 5.4.x+ Link: https://lore.kernel.org/r/74F1E842-4A84-47BF-B6C2-5407DFDD4A4A@gmail.com --- kernel/sched/membarrier.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c index acdae625c6364..b5add64d9698c 100644 --- a/kernel/sched/membarrier.c +++ b/kernel/sched/membarrier.c @@ -471,9 +471,7 @@ static int sync_runqueues_membarrier_state(struct mm_struct *mm) } rcu_read_unlock(); - preempt_disable(); - smp_call_function_many(tmpmask, ipi_sync_rq_state, mm, 1); - preempt_enable(); + on_each_cpu_mask(tmpmask, ipi_sync_rq_state, mm, true); free_cpumask_var(tmpmask); cpus_read_unlock(); -- GitLab From 50bf8080a94d171e843fc013abec19d8ab9f50ae Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 25 Feb 2021 23:03:51 +0100 Subject: [PATCH 207/839] static_call: Fix the module key fixup Provided the target address of a R_X86_64_PC32 relocation is aligned, the low two bits should be invariant between the relative and absolute value. Turns out the address is not aligned and things go sideways, ensure we transfer the bits in the absolute form when fixing up the key address. Fixes: 73f44fe19d35 ("static_call: Allow module use without exposing static_call_key") Reported-by: Steven Rostedt Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Tested-by: Steven Rostedt (VMware) Link: https://lkml.kernel.org/r/20210225220351.GE4746@worktop.programming.kicks-ass.net --- kernel/static_call.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/kernel/static_call.c b/kernel/static_call.c index 6906c6ec4c97d..ae825295cf685 100644 --- a/kernel/static_call.c +++ b/kernel/static_call.c @@ -349,7 +349,8 @@ static int static_call_add_module(struct module *mod) struct static_call_site *site; for (site = start; site != stop; site++) { - unsigned long addr = (unsigned long)static_call_key(site); + unsigned long s_key = (long)site->key + (long)&site->key; + unsigned long addr = s_key & ~STATIC_CALL_SITE_FLAGS; unsigned long key; /* @@ -373,8 +374,8 @@ static int static_call_add_module(struct module *mod) return -EINVAL; } - site->key = (key - (long)&site->key) | - (site->key & STATIC_CALL_SITE_FLAGS); + key |= s_key & STATIC_CALL_SITE_FLAGS; + site->key = key - (long)&site->key; } return __static_call_init(mod, start, stop); -- GitLab From a5398bffc01fe044848c5024e5e867e407f239b8 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 30 Nov 2020 11:38:40 -0800 Subject: [PATCH 208/839] perf/core: Flush PMU internal buffers for per-CPU events Sometimes the PMU internal buffers have to be flushed for per-CPU events during a context switch, e.g., large PEBS. Otherwise, the perf tool may report samples in locations that do not belong to the process where the samples are processed in, because PEBS does not tag samples with PID/TID. The current code only flush the buffers for a per-task event. It doesn't check a per-CPU event. Add a new event state flag, PERF_ATTACH_SCHED_CB, to indicate that the PMU internal buffers have to be flushed for this event during a context switch. Add sched_cb_entry and perf_sched_cb_usages back to track the PMU/cpuctx which is required to be flushed. Only need to invoke the sched_task() for per-CPU events in this patch. The per-task events have been handled in perf_event_context_sched_in/out already. Fixes: 9c964efa4330 ("perf/x86/intel: Drain the PEBS buffer during context switches") Reported-by: Gabriel Marin Originally-by: Namhyung Kim Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lkml.kernel.org/r/20201130193842.10569-1-kan.liang@linux.intel.com --- include/linux/perf_event.h | 2 ++ kernel/events/core.c | 42 ++++++++++++++++++++++++++++++++++---- 2 files changed, 40 insertions(+), 4 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index fab42cfbd3502..3f7f89ea5e512 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -606,6 +606,7 @@ struct swevent_hlist { #define PERF_ATTACH_TASK 0x04 #define PERF_ATTACH_TASK_DATA 0x08 #define PERF_ATTACH_ITRACE 0x10 +#define PERF_ATTACH_SCHED_CB 0x20 struct perf_cgroup; struct perf_buffer; @@ -872,6 +873,7 @@ struct perf_cpu_context { struct list_head cgrp_cpuctx_entry; #endif + struct list_head sched_cb_entry; int sched_cb_usage; int online; diff --git a/kernel/events/core.c b/kernel/events/core.c index 0aeca5f3c0acc..03db40f6cba90 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -386,6 +386,7 @@ static DEFINE_MUTEX(perf_sched_mutex); static atomic_t perf_sched_count; static DEFINE_PER_CPU(atomic_t, perf_cgroup_events); +static DEFINE_PER_CPU(int, perf_sched_cb_usages); static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events); static atomic_t nr_mmap_events __read_mostly; @@ -3461,11 +3462,16 @@ unlock: } } +static DEFINE_PER_CPU(struct list_head, sched_cb_list); + void perf_sched_cb_dec(struct pmu *pmu) { struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); - --cpuctx->sched_cb_usage; + this_cpu_dec(perf_sched_cb_usages); + + if (!--cpuctx->sched_cb_usage) + list_del(&cpuctx->sched_cb_entry); } @@ -3473,7 +3479,10 @@ void perf_sched_cb_inc(struct pmu *pmu) { struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); - cpuctx->sched_cb_usage++; + if (!cpuctx->sched_cb_usage++) + list_add(&cpuctx->sched_cb_entry, this_cpu_ptr(&sched_cb_list)); + + this_cpu_inc(perf_sched_cb_usages); } /* @@ -3502,6 +3511,24 @@ static void __perf_pmu_sched_task(struct perf_cpu_context *cpuctx, bool sched_in perf_ctx_unlock(cpuctx, cpuctx->task_ctx); } +static void perf_pmu_sched_task(struct task_struct *prev, + struct task_struct *next, + bool sched_in) +{ + struct perf_cpu_context *cpuctx; + + if (prev == next) + return; + + list_for_each_entry(cpuctx, this_cpu_ptr(&sched_cb_list), sched_cb_entry) { + /* will be handled in perf_event_context_sched_in/out */ + if (cpuctx->task_ctx) + continue; + + __perf_pmu_sched_task(cpuctx, sched_in); + } +} + static void perf_event_switch(struct task_struct *task, struct task_struct *next_prev, bool sched_in); @@ -3524,6 +3551,9 @@ void __perf_event_task_sched_out(struct task_struct *task, { int ctxn; + if (__this_cpu_read(perf_sched_cb_usages)) + perf_pmu_sched_task(task, next, false); + if (atomic_read(&nr_switch_events)) perf_event_switch(task, next, false); @@ -3832,6 +3862,9 @@ void __perf_event_task_sched_in(struct task_struct *prev, if (atomic_read(&nr_switch_events)) perf_event_switch(task, prev, true); + + if (__this_cpu_read(perf_sched_cb_usages)) + perf_pmu_sched_task(prev, task, true); } static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) @@ -4656,7 +4689,7 @@ static void unaccount_event(struct perf_event *event) if (event->parent) return; - if (event->attach_state & PERF_ATTACH_TASK) + if (event->attach_state & (PERF_ATTACH_TASK | PERF_ATTACH_SCHED_CB)) dec = true; if (event->attr.mmap || event->attr.mmap_data) atomic_dec(&nr_mmap_events); @@ -11175,7 +11208,7 @@ static void account_event(struct perf_event *event) if (event->parent) return; - if (event->attach_state & PERF_ATTACH_TASK) + if (event->attach_state & (PERF_ATTACH_TASK | PERF_ATTACH_SCHED_CB)) inc = true; if (event->attr.mmap || event->attr.mmap_data) atomic_inc(&nr_mmap_events); @@ -12972,6 +13005,7 @@ static void __init perf_event_init_all_cpus(void) #ifdef CONFIG_CGROUP_PERF INIT_LIST_HEAD(&per_cpu(cgrp_cpuctx_list, cpu)); #endif + INIT_LIST_HEAD(&per_cpu(sched_cb_list, cpu)); } } -- GitLab From afbef30149587ad46f4780b1e0cc5e219745ce90 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 30 Nov 2020 11:38:41 -0800 Subject: [PATCH 209/839] perf/x86/intel: Set PERF_ATTACH_SCHED_CB for large PEBS and LBR To supply a PID/TID for large PEBS, it requires flushing the PEBS buffer in a context switch. For normal LBRs, a context switch can flip the address space and LBR entries are not tagged with an identifier, we need to wipe the LBR, even for per-cpu events. For LBR callstack, save/restore the stack is required during a context switch. Set PERF_ATTACH_SCHED_CB for the event with large PEBS & LBR. Fixes: 9c964efa4330 ("perf/x86/intel: Drain the PEBS buffer during context switches") Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lkml.kernel.org/r/20201130193842.10569-2-kan.liang@linux.intel.com --- arch/x86/events/intel/core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 5bac48d5c18e8..7bbb5bb98d8cd 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3662,8 +3662,10 @@ static int intel_pmu_hw_config(struct perf_event *event) if (!(event->attr.freq || (event->attr.wakeup_events && !event->attr.watermark))) { event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD; if (!(event->attr.sample_type & - ~intel_pmu_large_pebs_flags(event))) + ~intel_pmu_large_pebs_flags(event))) { event->hw.flags |= PERF_X86_EVENT_LARGE_PEBS; + event->attach_state |= PERF_ATTACH_SCHED_CB; + } } if (x86_pmu.pebs_aliases) x86_pmu.pebs_aliases(event); @@ -3676,6 +3678,7 @@ static int intel_pmu_hw_config(struct perf_event *event) ret = intel_pmu_setup_lbr_filter(event); if (ret) return ret; + event->attach_state |= PERF_ATTACH_SCHED_CB; /* * BTS is set up earlier in this path, so don't account twice -- GitLab From e504e74cc3a2c092b05577ce3e8e013fae7d94e6 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 5 Feb 2021 08:24:02 -0600 Subject: [PATCH 210/839] x86/unwind/orc: Disable KASAN checking in the ORC unwinder, part 2 KASAN reserves "redzone" areas between stack frames in order to detect stack overruns. A read or write to such an area triggers a KASAN "stack-out-of-bounds" BUG. Normally, the ORC unwinder stays in-bounds and doesn't access the redzone. But sometimes it can't find ORC metadata for a given instruction. This can happen for code which is missing ORC metadata, or for generated code. In such cases, the unwinder attempts to fall back to frame pointers, as a best-effort type thing. This fallback often works, but when it doesn't, the unwinder can get confused and go off into the weeds into the KASAN redzone, triggering the aforementioned KASAN BUG. But in this case, the unwinder's confusion is actually harmless and working as designed. It already has checks in place to prevent off-stack accesses, but those checks get short-circuited by the KASAN BUG. And a BUG is a lot more disruptive than a harmless unwinder warning. Disable the KASAN checks by using READ_ONCE_NOCHECK() for all stack accesses. This finishes the job started by commit 881125bfe65b ("x86/unwind: Disable KASAN checking in the ORC unwinder"), which only partially fixed the issue. Fixes: ee9f8fce9964 ("x86/unwind: Add the ORC unwinder") Reported-by: Ivan Babrou Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Steven Rostedt (VMware) Tested-by: Ivan Babrou Cc: stable@kernel.org Link: https://lkml.kernel.org/r/9583327904ebbbeda399eca9c56d6c7085ac20fe.1612534649.git.jpoimboe@redhat.com --- arch/x86/kernel/unwind_orc.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index 2a1d47f47eee2..1bcc14c870abd 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -367,8 +367,8 @@ static bool deref_stack_regs(struct unwind_state *state, unsigned long addr, if (!stack_access_ok(state, addr, sizeof(struct pt_regs))) return false; - *ip = regs->ip; - *sp = regs->sp; + *ip = READ_ONCE_NOCHECK(regs->ip); + *sp = READ_ONCE_NOCHECK(regs->sp); return true; } @@ -380,8 +380,8 @@ static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr if (!stack_access_ok(state, addr, IRET_FRAME_SIZE)) return false; - *ip = regs->ip; - *sp = regs->sp; + *ip = READ_ONCE_NOCHECK(regs->ip); + *sp = READ_ONCE_NOCHECK(regs->sp); return true; } @@ -402,12 +402,12 @@ static bool get_reg(struct unwind_state *state, unsigned int reg_off, return false; if (state->full_regs) { - *val = ((unsigned long *)state->regs)[reg]; + *val = READ_ONCE_NOCHECK(((unsigned long *)state->regs)[reg]); return true; } if (state->prev_regs) { - *val = ((unsigned long *)state->prev_regs)[reg]; + *val = READ_ONCE_NOCHECK(((unsigned long *)state->prev_regs)[reg]); return true; } -- GitLab From b59cc97674c947861783ca92b9a6e7d043adba96 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 5 Feb 2021 08:24:03 -0600 Subject: [PATCH 211/839] x86/unwind/orc: Silence warnings caused by missing ORC data The ORC unwinder attempts to fall back to frame pointers when ORC data is missing for a given instruction. It sets state->error, but then tries to keep going as a best-effort type of thing. That may result in further warnings if the unwinder gets lost. Until we have some way to register generated code with the unwinder, missing ORC will be expected, and occasionally going off the rails will also be expected. So don't warn about it. Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Tested-by: Ivan Babrou Link: https://lkml.kernel.org/r/06d02c4bbb220bd31668db579278b0352538efbb.1612534649.git.jpoimboe@redhat.com --- arch/x86/kernel/unwind_orc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index 1bcc14c870abd..a1202536fc57c 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -13,7 +13,7 @@ #define orc_warn_current(args...) \ ({ \ - if (state->task == current) \ + if (state->task == current && !state->error) \ orc_warn(args); \ }) -- GitLab From 5d5675df792ff67e74a500c4c94db0f99e6a10ef Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 4 Mar 2021 11:05:54 -0800 Subject: [PATCH 212/839] x86/entry: Fix entry/exit mismatch on failed fast 32-bit syscalls On a 32-bit fast syscall that fails to read its arguments from user memory, the kernel currently does syscall exit work but not syscall entry work. This confuses audit and ptrace. For example: $ ./tools/testing/selftests/x86/syscall_arg_fault_32 ... strace: pid 264258: entering, ptrace_syscall_info.op == 2 ... This is a minimal fix intended for ease of backporting. A more complete cleanup is coming. Fixes: 0b085e68f407 ("x86/entry: Consolidate 32/64 bit syscall entry") Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/8c82296ddf803b91f8d1e5eac89e5803ba54ab0e.1614884673.git.luto@kernel.org --- arch/x86/entry/common.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index a2433ae8a65e7..4efd39aacb9f2 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -128,7 +128,8 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs) regs->ax = -EFAULT; instrumentation_end(); - syscall_exit_to_user_mode(regs); + local_irq_disable(); + irqentry_exit_to_user_mode(regs); return false; } -- GitLab From a65a802aadba072ca7514fc0c301fd7fdc6fc6cb Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 5 Mar 2021 09:41:22 +0100 Subject: [PATCH 213/839] m68k: Fix virt_addr_valid() W=1 compiler warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If CONFIG_DEBUG_SG=y, and CONFIG_MMU=y: include/linux/scatterlist.h: In function ‘sg_set_buf’: arch/m68k/include/asm/page_mm.h:174:49: warning: ordered comparison of pointer with null pointer [-Wextra] 174 | #define virt_addr_valid(kaddr) ((void *)(kaddr) >= (void *)PAGE_OFFSET && (void *)(kaddr) < high_memory) | ^~ or CONFIG_MMU=n: include/linux/scatterlist.h: In function ‘sg_set_buf’: arch/m68k/include/asm/page_no.h:33:50: warning: ordered comparison of pointer with null pointer [-Wextra] 33 | #define virt_addr_valid(kaddr) (((void *)(kaddr) >= (void *)PAGE_OFFSET) && \ | ^~ Fix this by doing the comparison in the "unsigned long" instead of the "void *" domain. Note that for now this is only seen when compiling btrfs, due to commit e9aa7c285d20a69c ("btrfs: enable W=1 checks for btrfs"), but as people are doing more W=1 compile testing, it will start to show up elsewhere, too. Signed-off-by: Geert Uytterhoeven Acked-by: Greg Ungerer Link: https://lore.kernel.org/r/20210305084122.4118826-1-geert@linux-m68k.org --- arch/m68k/include/asm/page_mm.h | 2 +- arch/m68k/include/asm/page_no.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/m68k/include/asm/page_mm.h b/arch/m68k/include/asm/page_mm.h index 7f5912af2a52e..9e8f0cc30a2cc 100644 --- a/arch/m68k/include/asm/page_mm.h +++ b/arch/m68k/include/asm/page_mm.h @@ -171,7 +171,7 @@ static inline __attribute_const__ int __virt_to_node_shift(void) #include #endif -#define virt_addr_valid(kaddr) ((void *)(kaddr) >= (void *)PAGE_OFFSET && (void *)(kaddr) < high_memory) +#define virt_addr_valid(kaddr) ((unsigned long)(kaddr) >= PAGE_OFFSET && (unsigned long)(kaddr) < (unsigned long)high_memory) #define pfn_valid(pfn) virt_addr_valid(pfn_to_virt(pfn)) #endif /* __ASSEMBLY__ */ diff --git a/arch/m68k/include/asm/page_no.h b/arch/m68k/include/asm/page_no.h index 6bbe52025de3c..8d0f862ee9d79 100644 --- a/arch/m68k/include/asm/page_no.h +++ b/arch/m68k/include/asm/page_no.h @@ -30,8 +30,8 @@ extern unsigned long memory_end; #define page_to_pfn(page) virt_to_pfn(page_to_virt(page)) #define pfn_valid(pfn) ((pfn) < max_mapnr) -#define virt_addr_valid(kaddr) (((void *)(kaddr) >= (void *)PAGE_OFFSET) && \ - ((void *)(kaddr) < (void *)memory_end)) +#define virt_addr_valid(kaddr) (((unsigned long)(kaddr) >= PAGE_OFFSET) && \ + ((unsigned long)(kaddr) < memory_end)) #endif /* __ASSEMBLY__ */ -- GitLab From e4c3e133294c0a292d21073899b05ebf530169bd Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Tue, 2 Mar 2021 15:43:55 +0100 Subject: [PATCH 214/839] counter: stm32-timer-cnt: fix ceiling write max value The ceiling value isn't checked before writing it into registers. The user could write a value higher than the counter resolution (e.g. 16 or 32 bits indicated by max_arr). This makes most significant bits to be truncated. Fix it by checking the max_arr to report a range error [1] to the user. [1] https://lkml.org/lkml/2021/2/12/358 Fixes: ad29937e206f ("counter: Add STM32 Timer quadrature encoder") Signed-off-by: Fabrice Gasnier Acked-by: William Breathitt Gray Cc: Link: https://lore.kernel.org/r/1614696235-24088-1-git-send-email-fabrice.gasnier@foss.st.com Signed-off-by: Jonathan Cameron --- drivers/counter/stm32-timer-cnt.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/counter/stm32-timer-cnt.c b/drivers/counter/stm32-timer-cnt.c index cd50dc12bd026..2295be3f309a7 100644 --- a/drivers/counter/stm32-timer-cnt.c +++ b/drivers/counter/stm32-timer-cnt.c @@ -32,6 +32,7 @@ struct stm32_timer_cnt { struct regmap *regmap; struct clk *clk; u32 ceiling; + u32 max_arr; bool enabled; struct stm32_timer_regs bak; }; @@ -191,6 +192,9 @@ static ssize_t stm32_count_ceiling_write(struct counter_device *counter, if (ret) return ret; + if (ceiling > priv->max_arr) + return -ERANGE; + /* TIMx_ARR register shouldn't be buffered (ARPE=0) */ regmap_update_bits(priv->regmap, TIM_CR1, TIM_CR1_ARPE, 0); regmap_write(priv->regmap, TIM_ARR, ceiling); @@ -371,6 +375,7 @@ static int stm32_timer_cnt_probe(struct platform_device *pdev) priv->regmap = ddata->regmap; priv->clk = ddata->clk; priv->ceiling = ddata->max_arr; + priv->max_arr = ddata->max_arr; priv->counter.name = dev_name(dev); priv->counter.parent = dev; -- GitLab From b14d72ac731753708a7c1a6b3657b9312b6f0042 Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Wed, 3 Mar 2021 18:49:49 +0100 Subject: [PATCH 215/839] counter: stm32-timer-cnt: fix ceiling miss-alignment with reload register Ceiling value may be miss-aligned with what's actually configured into the ARR register. This is seen after probe as currently the ARR value is zero, whereas ceiling value is set to the maximum. So: - reading ceiling reports zero - in case the counter gets enabled without any prior configuration, it won't count. - in case the function gets set by the user 1st, (priv->ceiling) is used. Fix it by getting rid of the cached "priv->ceiling" variable. Rather use the ARR register value directly by using regmap read or write when needed. There should be no drawback on performance as priv->ceiling isn't used in performance critical path. There's also no point in writing ARR while setting function (sms), so it can be safely removed. Fixes: ad29937e206f ("counter: Add STM32 Timer quadrature encoder") Suggested-by: William Breathitt Gray Signed-off-by: Fabrice Gasnier Acked-by: William Breathitt Gray Cc: Link: https://lore.kernel.org/r/1614793789-10346-1-git-send-email-fabrice.gasnier@foss.st.com Signed-off-by: Jonathan Cameron --- drivers/counter/stm32-timer-cnt.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/counter/stm32-timer-cnt.c b/drivers/counter/stm32-timer-cnt.c index 2295be3f309a7..75bc401fdd189 100644 --- a/drivers/counter/stm32-timer-cnt.c +++ b/drivers/counter/stm32-timer-cnt.c @@ -31,7 +31,6 @@ struct stm32_timer_cnt { struct counter_device counter; struct regmap *regmap; struct clk *clk; - u32 ceiling; u32 max_arr; bool enabled; struct stm32_timer_regs bak; @@ -75,8 +74,10 @@ static int stm32_count_write(struct counter_device *counter, const unsigned long val) { struct stm32_timer_cnt *const priv = counter->priv; + u32 ceiling; - if (val > priv->ceiling) + regmap_read(priv->regmap, TIM_ARR, &ceiling); + if (val > ceiling) return -EINVAL; return regmap_write(priv->regmap, TIM_CNT, val); @@ -138,10 +139,6 @@ static int stm32_count_function_set(struct counter_device *counter, regmap_update_bits(priv->regmap, TIM_CR1, TIM_CR1_CEN, 0); - /* TIMx_ARR register shouldn't be buffered (ARPE=0) */ - regmap_update_bits(priv->regmap, TIM_CR1, TIM_CR1_ARPE, 0); - regmap_write(priv->regmap, TIM_ARR, priv->ceiling); - regmap_update_bits(priv->regmap, TIM_SMCR, TIM_SMCR_SMS, sms); /* Make sure that registers are updated */ @@ -199,7 +196,6 @@ static ssize_t stm32_count_ceiling_write(struct counter_device *counter, regmap_update_bits(priv->regmap, TIM_CR1, TIM_CR1_ARPE, 0); regmap_write(priv->regmap, TIM_ARR, ceiling); - priv->ceiling = ceiling; return len; } @@ -374,7 +370,6 @@ static int stm32_timer_cnt_probe(struct platform_device *pdev) priv->regmap = ddata->regmap; priv->clk = ddata->clk; - priv->ceiling = ddata->max_arr; priv->max_arr = ddata->max_arr; priv->counter.name = dev_name(dev); -- GitLab From efc61345274d6c7a46a0570efbc916fcbe3e927b Mon Sep 17 00:00:00 2001 From: Eric Whitney Date: Thu, 18 Feb 2021 10:11:32 -0500 Subject: [PATCH 216/839] ext4: shrink race window in ext4_should_retry_alloc() When generic/371 is run on kvm-xfstests using 5.10 and 5.11 kernels, it fails at significant rates on the two test scenarios that disable delayed allocation (ext3conv and data_journal) and force actual block allocation for the fallocate and pwrite functions in the test. The failure rate on 5.10 for both ext3conv and data_journal on one test system typically runs about 85%. On 5.11, the failure rate on ext3conv sometimes drops to as low as 1% while the rate on data_journal increases to nearly 100%. The observed failures are largely due to ext4_should_retry_alloc() cutting off block allocation retries when s_mb_free_pending (used to indicate that a transaction in progress will free blocks) is 0. However, free space is usually available when this occurs during runs of generic/371. It appears that a thread attempting to allocate blocks is just missing transaction commits in other threads that increase the free cluster count and reset s_mb_free_pending while the allocating thread isn't running. Explicitly testing for free space availability avoids this race. The current code uses a post-increment operator in the conditional expression that determines whether the retry limit has been exceeded. This means that the conditional expression uses the value of the retry counter before it's increased, resulting in an extra retry cycle. The current code actually retries twice before hitting its retry limit rather than once. Increasing the retry limit to 3 from the current actual maximum retry count of 2 in combination with the change described above reduces the observed failure rate to less that 0.1% on both ext3conv and data_journal with what should be limited impact on users sensitive to the overhead caused by retries. A per filesystem percpu counter exported via sysfs is added to allow users or developers to track the number of times the retry limit is exceeded without resorting to debugging methods. This should provide some insight into worst case retry behavior. Signed-off-by: Eric Whitney Link: https://lore.kernel.org/r/20210218151132.19678-1-enwlinux@gmail.com Signed-off-by: Theodore Ts'o --- fs/ext4/balloc.c | 38 ++++++++++++++++++++++++++------------ fs/ext4/ext4.h | 1 + fs/ext4/super.c | 5 +++++ fs/ext4/sysfs.c | 7 +++++++ 4 files changed, 39 insertions(+), 12 deletions(-) diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index f45f9feebe593..74a5172c2d838 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -626,27 +626,41 @@ int ext4_claim_free_clusters(struct ext4_sb_info *sbi, /** * ext4_should_retry_alloc() - check if a block allocation should be retried - * @sb: super block - * @retries: number of attemps has been made + * @sb: superblock + * @retries: number of retry attempts made so far * - * ext4_should_retry_alloc() is called when ENOSPC is returned, and if - * it is profitable to retry the operation, this function will wait - * for the current or committing transaction to complete, and then - * return TRUE. We will only retry once. + * ext4_should_retry_alloc() is called when ENOSPC is returned while + * attempting to allocate blocks. If there's an indication that a pending + * journal transaction might free some space and allow another attempt to + * succeed, this function will wait for the current or committing transaction + * to complete and then return TRUE. */ int ext4_should_retry_alloc(struct super_block *sb, int *retries) { - if (!ext4_has_free_clusters(EXT4_SB(sb), 1, 0) || - (*retries)++ > 1 || - !EXT4_SB(sb)->s_journal) + struct ext4_sb_info *sbi = EXT4_SB(sb); + + if (!sbi->s_journal) return 0; - smp_mb(); - if (EXT4_SB(sb)->s_mb_free_pending == 0) + if (++(*retries) > 3) { + percpu_counter_inc(&sbi->s_sra_exceeded_retry_limit); return 0; + } + /* + * if there's no indication that blocks are about to be freed it's + * possible we just missed a transaction commit that did so + */ + smp_mb(); + if (sbi->s_mb_free_pending == 0) + return ext4_has_free_clusters(sbi, 1, 0); + + /* + * it's possible we've just missed a transaction commit here, + * so ignore the returned status + */ jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id); - jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal); + (void) jbd2_journal_force_commit_nested(sbi->s_journal); return 1; } diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 644fd69185d36..ea3b41579f389 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1484,6 +1484,7 @@ struct ext4_sb_info { struct percpu_counter s_freeinodes_counter; struct percpu_counter s_dirs_counter; struct percpu_counter s_dirtyclusters_counter; + struct percpu_counter s_sra_exceeded_retry_limit; struct blockgroup_lock *s_blockgroup_lock; struct proc_dir_entry *s_proc; struct kobject s_kobj; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index ad34a37278cd5..a0a2568596622 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1210,6 +1210,7 @@ static void ext4_put_super(struct super_block *sb) percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_dirs_counter); percpu_counter_destroy(&sbi->s_dirtyclusters_counter); + percpu_counter_destroy(&sbi->s_sra_exceeded_retry_limit); percpu_free_rwsem(&sbi->s_writepages_rwsem); #ifdef CONFIG_QUOTA for (i = 0; i < EXT4_MAXQUOTAS; i++) @@ -5011,6 +5012,9 @@ no_journal: if (!err) err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0, GFP_KERNEL); + if (!err) + err = percpu_counter_init(&sbi->s_sra_exceeded_retry_limit, 0, + GFP_KERNEL); if (!err) err = percpu_init_rwsem(&sbi->s_writepages_rwsem); @@ -5124,6 +5128,7 @@ failed_mount6: percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_dirs_counter); percpu_counter_destroy(&sbi->s_dirtyclusters_counter); + percpu_counter_destroy(&sbi->s_sra_exceeded_retry_limit); percpu_free_rwsem(&sbi->s_writepages_rwsem); failed_mount5: ext4_ext_release(sb); diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index 075aa3a19ff5f..a3d08276d441e 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -24,6 +24,7 @@ typedef enum { attr_session_write_kbytes, attr_lifetime_write_kbytes, attr_reserved_clusters, + attr_sra_exceeded_retry_limit, attr_inode_readahead, attr_trigger_test_error, attr_first_error_time, @@ -202,6 +203,7 @@ EXT4_ATTR_FUNC(delayed_allocation_blocks, 0444); EXT4_ATTR_FUNC(session_write_kbytes, 0444); EXT4_ATTR_FUNC(lifetime_write_kbytes, 0444); EXT4_ATTR_FUNC(reserved_clusters, 0644); +EXT4_ATTR_FUNC(sra_exceeded_retry_limit, 0444); EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, inode_readahead, ext4_sb_info, s_inode_readahead_blks); @@ -251,6 +253,7 @@ static struct attribute *ext4_attrs[] = { ATTR_LIST(session_write_kbytes), ATTR_LIST(lifetime_write_kbytes), ATTR_LIST(reserved_clusters), + ATTR_LIST(sra_exceeded_retry_limit), ATTR_LIST(inode_readahead_blks), ATTR_LIST(inode_goal), ATTR_LIST(mb_stats), @@ -374,6 +377,10 @@ static ssize_t ext4_attr_show(struct kobject *kobj, return snprintf(buf, PAGE_SIZE, "%llu\n", (unsigned long long) atomic64_read(&sbi->s_resv_clusters)); + case attr_sra_exceeded_retry_limit: + return snprintf(buf, PAGE_SIZE, "%llu\n", + (unsigned long long) + percpu_counter_sum(&sbi->s_sra_exceeded_retry_limit)); case attr_inode_readahead: case attr_pointer_ui: if (!ptr) -- GitLab From 163f0ec1df33cf468509ff38cbcbb5eb0d7fac60 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 22 Feb 2021 18:16:26 +0100 Subject: [PATCH 217/839] ext4: add reclaim checks to xattr code Syzbot is reporting that ext4 can enter fs reclaim from kvmalloc() while the transaction is started like: fs_reclaim_acquire+0x117/0x150 mm/page_alloc.c:4340 might_alloc include/linux/sched/mm.h:193 [inline] slab_pre_alloc_hook mm/slab.h:493 [inline] slab_alloc_node mm/slub.c:2817 [inline] __kmalloc_node+0x5f/0x430 mm/slub.c:4015 kmalloc_node include/linux/slab.h:575 [inline] kvmalloc_node+0x61/0xf0 mm/util.c:587 kvmalloc include/linux/mm.h:781 [inline] ext4_xattr_inode_cache_find fs/ext4/xattr.c:1465 [inline] ext4_xattr_inode_lookup_create fs/ext4/xattr.c:1508 [inline] ext4_xattr_set_entry+0x1ce6/0x3780 fs/ext4/xattr.c:1649 ext4_xattr_ibody_set+0x78/0x2b0 fs/ext4/xattr.c:2224 ext4_xattr_set_handle+0x8f4/0x13e0 fs/ext4/xattr.c:2380 ext4_xattr_set+0x13a/0x340 fs/ext4/xattr.c:2493 This should be impossible since transaction start sets PF_MEMALLOC_NOFS. Add some assertions to the code to catch if something isn't working as expected early. Link: https://lore.kernel.org/linux-ext4/000000000000563a0205bafb7970@google.com/ Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20210222171626.21884-1-jack@suse.cz Signed-off-by: Theodore Ts'o --- fs/ext4/xattr.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 372208500f4e7..083c95126781d 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1462,6 +1462,9 @@ ext4_xattr_inode_cache_find(struct inode *inode, const void *value, if (!ce) return NULL; + WARN_ON_ONCE(ext4_handle_valid(journal_current_handle()) && + !(current->flags & PF_MEMALLOC_NOFS)); + ea_data = kvmalloc(value_len, GFP_KERNEL); if (!ea_data) { mb_cache_entry_put(ea_inode_cache, ce); @@ -2327,6 +2330,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, error = -ENOSPC; goto cleanup; } + WARN_ON_ONCE(!(current->flags & PF_MEMALLOC_NOFS)); } error = ext4_reserve_inode_write(handle, inode, &is.iloc); -- GitLab From f91436d55a279f045987e8b8c1385585dca54be9 Mon Sep 17 00:00:00 2001 From: Sabyrzhan Tasbolatov Date: Wed, 24 Feb 2021 15:58:00 +0600 Subject: [PATCH 218/839] fs/ext4: fix integer overflow in s_log_groups_per_flex syzbot found UBSAN: shift-out-of-bounds in ext4_mb_init [1], when 1 << sbi->s_es->s_log_groups_per_flex is bigger than UINT_MAX, where sbi->s_mb_prefetch is unsigned integer type. 32 is the maximum allowed power of s_log_groups_per_flex. Following if check will also trigger UBSAN shift-out-of-bound: if (1 << sbi->s_es->s_log_groups_per_flex >= UINT_MAX) { So I'm checking it against the raw number, perhaps there is another way to calculate UINT_MAX max power. Also use min_t as to make sure it's uint type. [1] UBSAN: shift-out-of-bounds in fs/ext4/mballoc.c:2713:24 shift exponent 60 is too large for 32-bit type 'int' Call Trace: __dump_stack lib/dump_stack.c:79 [inline] dump_stack+0x137/0x1be lib/dump_stack.c:120 ubsan_epilogue lib/ubsan.c:148 [inline] __ubsan_handle_shift_out_of_bounds+0x432/0x4d0 lib/ubsan.c:395 ext4_mb_init_backend fs/ext4/mballoc.c:2713 [inline] ext4_mb_init+0x19bc/0x19f0 fs/ext4/mballoc.c:2898 ext4_fill_super+0xc2ec/0xfbe0 fs/ext4/super.c:4983 Reported-by: syzbot+a8b4b0c60155e87e9484@syzkaller.appspotmail.com Signed-off-by: Sabyrzhan Tasbolatov Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20210224095800.3350002-1-snovitoll@gmail.com Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 99bf091fee10e..a02fadf4fc84e 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2709,8 +2709,15 @@ static int ext4_mb_init_backend(struct super_block *sb) } if (ext4_has_feature_flex_bg(sb)) { - /* a single flex group is supposed to be read by a single IO */ - sbi->s_mb_prefetch = min(1 << sbi->s_es->s_log_groups_per_flex, + /* a single flex group is supposed to be read by a single IO. + * 2 ^ s_log_groups_per_flex != UINT_MAX as s_mb_prefetch is + * unsigned integer, so the maximum shift is 32. + */ + if (sbi->s_es->s_log_groups_per_flex >= 32) { + ext4_msg(sb, KERN_ERR, "too many log groups per flexible block group"); + goto err_freesgi; + } + sbi->s_mb_prefetch = min_t(uint, 1 << sbi->s_es->s_log_groups_per_flex, BLK_MAX_SEGMENT_SIZE >> (sb->s_blocksize_bits - 9)); sbi->s_mb_prefetch *= 8; /* 8 prefetch IOs in flight at most */ } else { -- GitLab From c915fb80eaa6194fa9bd0a4487705cd5b0dda2f1 Mon Sep 17 00:00:00 2001 From: Zhaolong Zhang Date: Tue, 2 Mar 2021 17:42:31 +0800 Subject: [PATCH 219/839] ext4: fix bh ref count on error paths __ext4_journalled_writepage should drop bhs' ref count on error paths Signed-off-by: Zhaolong Zhang Link: https://lore.kernel.org/r/1614678151-70481-1-git-send-email-zhangzl2013@126.com Signed-off-by: Theodore Ts'o --- fs/ext4/inode.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 650c5acd2f2d9..a79a9ea58c568 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1938,13 +1938,13 @@ static int __ext4_journalled_writepage(struct page *page, if (!ret) ret = err; - if (!ext4_has_inline_data(inode)) - ext4_walk_page_buffers(NULL, page_bufs, 0, len, - NULL, bput_one); ext4_set_inode_state(inode, EXT4_STATE_JDATA); out: unlock_page(page); out_no_pagelock: + if (!inline_data && page_bufs) + ext4_walk_page_buffers(NULL, page_bufs, 0, len, + NULL, bput_one); brelse(inode_bh); return ret; } -- GitLab From 37e89e574dc238a4ebe439543c5ab4fbb2f0311b Mon Sep 17 00:00:00 2001 From: Ye Xiang Date: Wed, 3 Mar 2021 14:36:12 +0800 Subject: [PATCH 220/839] iio: hid-sensor-humidity: Fix alignment issue of timestamp channel This patch ensures that, there is sufficient space and correct alignment for the timestamp. Fixes: d7ed89d5aadf ("iio: hid: Add humidity sensor support") Signed-off-by: Ye Xiang Cc: Link: https://lore.kernel.org/r/20210303063615.12130-2-xiang.ye@intel.com Signed-off-by: Jonathan Cameron --- drivers/iio/humidity/hid-sensor-humidity.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/iio/humidity/hid-sensor-humidity.c b/drivers/iio/humidity/hid-sensor-humidity.c index 52f605114ef77..d62705448ae25 100644 --- a/drivers/iio/humidity/hid-sensor-humidity.c +++ b/drivers/iio/humidity/hid-sensor-humidity.c @@ -15,7 +15,10 @@ struct hid_humidity_state { struct hid_sensor_common common_attributes; struct hid_sensor_hub_attribute_info humidity_attr; - s32 humidity_data; + struct { + s32 humidity_data; + u64 timestamp __aligned(8); + } scan; int scale_pre_decml; int scale_post_decml; int scale_precision; @@ -125,9 +128,8 @@ static int humidity_proc_event(struct hid_sensor_hub_device *hsdev, struct hid_humidity_state *humid_st = iio_priv(indio_dev); if (atomic_read(&humid_st->common_attributes.data_ready)) - iio_push_to_buffers_with_timestamp(indio_dev, - &humid_st->humidity_data, - iio_get_time_ns(indio_dev)); + iio_push_to_buffers_with_timestamp(indio_dev, &humid_st->scan, + iio_get_time_ns(indio_dev)); return 0; } @@ -142,7 +144,7 @@ static int humidity_capture_sample(struct hid_sensor_hub_device *hsdev, switch (usage_id) { case HID_USAGE_SENSOR_ATMOSPHERIC_HUMIDITY: - humid_st->humidity_data = *(s32 *)raw_data; + humid_st->scan.humidity_data = *(s32 *)raw_data; return 0; default: -- GitLab From 141e7633aa4d2838d1f6ad5c74cccc53547c16ac Mon Sep 17 00:00:00 2001 From: Ye Xiang Date: Wed, 3 Mar 2021 14:36:14 +0800 Subject: [PATCH 221/839] iio: hid-sensor-temperature: Fix issues of timestamp channel This patch fixes 2 issues of timestamp channel: 1. This patch ensures that there is sufficient space and correct alignment for the timestamp. 2. Correct the timestamp channel scan index. Fixes: 59d0f2da3569 ("iio: hid: Add temperature sensor support") Signed-off-by: Ye Xiang Cc: Link: https://lore.kernel.org/r/20210303063615.12130-4-xiang.ye@intel.com Signed-off-by: Jonathan Cameron --- drivers/iio/temperature/hid-sensor-temperature.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/iio/temperature/hid-sensor-temperature.c b/drivers/iio/temperature/hid-sensor-temperature.c index 81688f1b932f1..da9a247097fa2 100644 --- a/drivers/iio/temperature/hid-sensor-temperature.c +++ b/drivers/iio/temperature/hid-sensor-temperature.c @@ -15,7 +15,10 @@ struct temperature_state { struct hid_sensor_common common_attributes; struct hid_sensor_hub_attribute_info temperature_attr; - s32 temperature_data; + struct { + s32 temperature_data; + u64 timestamp __aligned(8); + } scan; int scale_pre_decml; int scale_post_decml; int scale_precision; @@ -32,7 +35,7 @@ static const struct iio_chan_spec temperature_channels[] = { BIT(IIO_CHAN_INFO_SAMP_FREQ) | BIT(IIO_CHAN_INFO_HYSTERESIS), }, - IIO_CHAN_SOFT_TIMESTAMP(3), + IIO_CHAN_SOFT_TIMESTAMP(1), }; /* Adjust channel real bits based on report descriptor */ @@ -123,9 +126,8 @@ static int temperature_proc_event(struct hid_sensor_hub_device *hsdev, struct temperature_state *temp_st = iio_priv(indio_dev); if (atomic_read(&temp_st->common_attributes.data_ready)) - iio_push_to_buffers_with_timestamp(indio_dev, - &temp_st->temperature_data, - iio_get_time_ns(indio_dev)); + iio_push_to_buffers_with_timestamp(indio_dev, &temp_st->scan, + iio_get_time_ns(indio_dev)); return 0; } @@ -140,7 +142,7 @@ static int temperature_capture_sample(struct hid_sensor_hub_device *hsdev, switch (usage_id) { case HID_USAGE_SENSOR_DATA_ENVIRONMENTAL_TEMPERATURE: - temp_st->temperature_data = *(s32 *)raw_data; + temp_st->scan.temperature_data = *(s32 *)raw_data; return 0; default: return -EINVAL; -- GitLab From 6dbbbe4cfd398704b72b21c1d4a5d3807e909d60 Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Mon, 1 Mar 2021 16:04:21 +0800 Subject: [PATCH 222/839] iio: gyro: mpu3050: Fix error handling in mpu3050_trigger_handler There is one regmap_bulk_read() call in mpu3050_trigger_handler that we have caught its return value bug lack further handling. Check and terminate the execution flow just like the other three regmap_bulk_read() calls in this function. Fixes: 3904b28efb2c7 ("iio: gyro: Add driver for the MPU-3050 gyroscope") Signed-off-by: Dinghao Liu Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20210301080421.13436-1-dinghao.liu@zju.edu.cn Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/gyro/mpu3050-core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iio/gyro/mpu3050-core.c b/drivers/iio/gyro/mpu3050-core.c index dfa31a23500f0..ac90be03332af 100644 --- a/drivers/iio/gyro/mpu3050-core.c +++ b/drivers/iio/gyro/mpu3050-core.c @@ -551,6 +551,8 @@ static irqreturn_t mpu3050_trigger_handler(int irq, void *p) MPU3050_FIFO_R, &fifo_values[offset], toread); + if (ret) + goto out_trigger_unlock; dev_dbg(mpu3050->dev, "%04x %04x %04x %04x %04x\n", -- GitLab From a249cc8bc2e2fed680047d326eb9a50756724198 Mon Sep 17 00:00:00 2001 From: Aurelien Aptel Date: Thu, 4 Mar 2021 17:42:21 +0000 Subject: [PATCH 223/839] cifs: fix credit accounting for extra channel With multichannel, operations like the queries from "ls -lR" can cause all credits to be used and errors to be returned since max_credits was not being set correctly on the secondary channels and thus the client was requesting 0 credits incorrectly in some cases (which can lead to not having enough credits to perform any operation on that channel). Signed-off-by: Aurelien Aptel CC: # v5.8+ Reviewed-by: Shyam Prasad N Signed-off-by: Steve French --- fs/cifs/connect.c | 10 +++++----- fs/cifs/sess.c | 1 + 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 112692300fb60..68642e3d42705 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1429,6 +1429,11 @@ smbd_connected: tcp_ses->min_offload = ctx->min_offload; tcp_ses->tcpStatus = CifsNeedNegotiate; + if ((ctx->max_credits < 20) || (ctx->max_credits > 60000)) + tcp_ses->max_credits = SMB2_MAX_CREDITS_AVAILABLE; + else + tcp_ses->max_credits = ctx->max_credits; + tcp_ses->nr_targets = 1; tcp_ses->ignore_signature = ctx->ignore_signature; /* thread spawned, put it on the list */ @@ -2832,11 +2837,6 @@ static int mount_get_conns(struct smb3_fs_context *ctx, struct cifs_sb_info *cif *nserver = server; - if ((ctx->max_credits < 20) || (ctx->max_credits > 60000)) - server->max_credits = SMB2_MAX_CREDITS_AVAILABLE; - else - server->max_credits = ctx->max_credits; - /* get a reference to a SMB session */ ses = cifs_get_smb_ses(server, ctx); if (IS_ERR(ses)) { diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 183a3a868d7b9..63d517b9f2ffe 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -230,6 +230,7 @@ cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses, ctx.noautotune = ses->server->noautotune; ctx.sockopt_tcp_nodelay = ses->server->tcp_nodelay; ctx.echo_interval = ses->server->echo_interval / HZ; + ctx.max_credits = ses->server->max_credits; /* * This will be used for encoding/decoding user/domain/pw -- GitLab From 88fd98a2306755b965e4f4567f84e73db3b6738c Mon Sep 17 00:00:00 2001 From: Aurelien Aptel Date: Thu, 4 Mar 2021 17:51:48 +0000 Subject: [PATCH 224/839] cifs: ask for more credit on async read/write code paths When doing a large read or write workload we only very gradually increase the number of credits which can cause problems with parallelizing large i/o (I/O ramps up more slowly than it should for large read/write workloads) especially with multichannel when the number of credits on the secondary channels starts out low (e.g. less than about 130) or when recovering after server throttled back the number of credit. Signed-off-by: Aurelien Aptel Reviewed-by: Shyam Prasad N Signed-off-by: Steve French --- fs/cifs/smb2pdu.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 4bbb6126b14d6..2199a9bfae8f7 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -4041,8 +4041,7 @@ smb2_async_readv(struct cifs_readdata *rdata) if (rdata->credits.value > 0) { shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(rdata->bytes, SMB2_MAX_BUFFER_SIZE)); - shdr->CreditRequest = - cpu_to_le16(le16_to_cpu(shdr->CreditCharge) + 1); + shdr->CreditRequest = cpu_to_le16(le16_to_cpu(shdr->CreditCharge) + 8); rc = adjust_credits(server, &rdata->credits, rdata->bytes); if (rc) @@ -4348,8 +4347,7 @@ smb2_async_writev(struct cifs_writedata *wdata, if (wdata->credits.value > 0) { shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(wdata->bytes, SMB2_MAX_BUFFER_SIZE)); - shdr->CreditRequest = - cpu_to_le16(le16_to_cpu(shdr->CreditCharge) + 1); + shdr->CreditRequest = cpu_to_le16(le16_to_cpu(shdr->CreditCharge) + 8); rc = adjust_credits(server, &wdata->credits, wdata->bytes); if (rc) -- GitLab From 886d0137f104a440d9dfa1d16efc1db06c9a2c02 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 5 Mar 2021 12:59:30 -0700 Subject: [PATCH 225/839] io-wq: fix race in freeing 'wq' and worker access Ran into a use-after-free on the main io-wq struct, wq. It has a worker ref and completion event, but the manager itself isn't holding a reference. This can lead to a race where the manager thinks there are no workers and exits, but a worker is being added. That leads to the following trace: BUG: KASAN: use-after-free in io_wqe_worker+0x4c0/0x5e0 Read of size 8 at addr ffff888108baa8a0 by task iou-wrk-3080422/3080425 CPU: 5 PID: 3080425 Comm: iou-wrk-3080422 Not tainted 5.12.0-rc1+ #110 Hardware name: Micro-Star International Co., Ltd. MS-7C60/TRX40 PRO 10G (MS-7C60), BIOS 1.60 05/13/2020 Call Trace: dump_stack+0x90/0xbe print_address_description.constprop.0+0x67/0x28d ? io_wqe_worker+0x4c0/0x5e0 kasan_report.cold+0x7b/0xd4 ? io_wqe_worker+0x4c0/0x5e0 __asan_load8+0x6d/0xa0 io_wqe_worker+0x4c0/0x5e0 ? io_worker_handle_work+0xc00/0xc00 ? recalc_sigpending+0xe5/0x120 ? io_worker_handle_work+0xc00/0xc00 ? io_worker_handle_work+0xc00/0xc00 ret_from_fork+0x1f/0x30 Allocated by task 3080422: kasan_save_stack+0x23/0x60 __kasan_kmalloc+0x80/0xa0 kmem_cache_alloc_node_trace+0xa0/0x480 io_wq_create+0x3b5/0x600 io_uring_alloc_task_context+0x13c/0x380 io_uring_add_task_file+0x109/0x140 __x64_sys_io_uring_enter+0x45f/0x660 do_syscall_64+0x32/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae Freed by task 3080422: kasan_save_stack+0x23/0x60 kasan_set_track+0x20/0x40 kasan_set_free_info+0x24/0x40 __kasan_slab_free+0xe8/0x120 kfree+0xa8/0x400 io_wq_put+0x14a/0x220 io_wq_put_and_exit+0x9a/0xc0 io_uring_clean_tctx+0x101/0x140 __io_uring_files_cancel+0x36e/0x3c0 do_exit+0x169/0x1340 __x64_sys_exit+0x34/0x40 do_syscall_64+0x32/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae Have the manager itself hold a reference, and now both drop points drop and complete if we hit zero, and the manager can unconditionally do a wait_for_completion() instead of having a race between reading the ref count and waiting if it was non-zero. Fixes: fb3a1f6c745c ("io-wq: have manager wait for all workers to exit") Signed-off-by: Jens Axboe --- fs/io-wq.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/io-wq.c b/fs/io-wq.c index 28868eb4cd099..1bfdb86336e49 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -722,9 +722,9 @@ static int io_wq_manager(void *data) io_wq_for_each_worker(wq->wqes[node], io_wq_worker_wake, NULL); rcu_read_unlock(); - /* we might not ever have created any workers */ - if (atomic_read(&wq->worker_refs)) - wait_for_completion(&wq->worker_done); + if (atomic_dec_and_test(&wq->worker_refs)) + complete(&wq->worker_done); + wait_for_completion(&wq->worker_done); spin_lock_irq(&wq->hash->wait.lock); for_each_node(node) @@ -774,7 +774,8 @@ static int io_wq_fork_manager(struct io_wq *wq) if (wq->manager) return 0; - reinit_completion(&wq->worker_done); + init_completion(&wq->worker_done); + atomic_set(&wq->worker_refs, 1); tsk = create_io_thread(io_wq_manager, wq, NUMA_NO_NODE); if (!IS_ERR(tsk)) { wq->manager = get_task_struct(tsk); @@ -782,6 +783,9 @@ static int io_wq_fork_manager(struct io_wq *wq) return 0; } + if (atomic_dec_and_test(&wq->worker_refs)) + complete(&wq->worker_done); + return PTR_ERR(tsk); } @@ -1018,13 +1022,9 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) init_completion(&wq->exited); refcount_set(&wq->refs, 1); - init_completion(&wq->worker_done); - atomic_set(&wq->worker_refs, 0); - ret = io_wq_fork_manager(wq); if (!ret) return wq; - err: io_wq_put_hash(data->hash); cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node); -- GitLab From 003e8dccdb22712dae388e682182d5f08b32386f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 6 Mar 2021 09:22:27 -0700 Subject: [PATCH 226/839] io-wq: always track creds for async issue If we go async with a request, grab the creds that the task currently has assigned and make sure that the async side switches to them. This is handled in the same way that we do for registered personalities. Signed-off-by: Jens Axboe --- fs/io-wq.h | 2 +- fs/io_uring.c | 33 +++++++++++++++++++-------------- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/fs/io-wq.h b/fs/io-wq.h index 5fbf7997149ee..1ac2f3248088e 100644 --- a/fs/io-wq.h +++ b/fs/io-wq.h @@ -79,8 +79,8 @@ static inline void wq_list_del(struct io_wq_work_list *list, struct io_wq_work { struct io_wq_work_node list; + const struct cred *creds; unsigned flags; - unsigned short personality; }; static inline struct io_wq_work *wq_next_work(struct io_wq_work *work) diff --git a/fs/io_uring.c b/fs/io_uring.c index 92c25b5f13497..d51c6ba9268bd 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1183,6 +1183,9 @@ static void io_prep_async_work(struct io_kiocb *req) const struct io_op_def *def = &io_op_defs[req->opcode]; struct io_ring_ctx *ctx = req->ctx; + if (!req->work.creds) + req->work.creds = get_current_cred(); + if (req->flags & REQ_F_FORCE_ASYNC) req->work.flags |= IO_WQ_WORK_CONCURRENT; @@ -1648,6 +1651,10 @@ static void io_dismantle_req(struct io_kiocb *req) io_put_file(req, req->file, (req->flags & REQ_F_FIXED_FILE)); if (req->fixed_rsrc_refs) percpu_ref_put(req->fixed_rsrc_refs); + if (req->work.creds) { + put_cred(req->work.creds); + req->work.creds = NULL; + } if (req->flags & REQ_F_INFLIGHT) { struct io_ring_ctx *ctx = req->ctx; @@ -5916,18 +5923,8 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags) const struct cred *creds = NULL; int ret; - if (req->work.personality) { - const struct cred *new_creds; - - if (!(issue_flags & IO_URING_F_NONBLOCK)) - mutex_lock(&ctx->uring_lock); - new_creds = idr_find(&ctx->personality_idr, req->work.personality); - if (!(issue_flags & IO_URING_F_NONBLOCK)) - mutex_unlock(&ctx->uring_lock); - if (!new_creds) - return -EINVAL; - creds = override_creds(new_creds); - } + if (req->work.creds && req->work.creds != current_cred()) + creds = override_creds(req->work.creds); switch (req->opcode) { case IORING_OP_NOP: @@ -6291,7 +6288,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, { struct io_submit_state *state; unsigned int sqe_flags; - int ret = 0; + int personality, ret = 0; req->opcode = READ_ONCE(sqe->opcode); /* same numerical values with corresponding REQ_F_*, safe to copy */ @@ -6324,8 +6321,16 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, return -EOPNOTSUPP; req->work.list.next = NULL; + personality = READ_ONCE(sqe->personality); + if (personality) { + req->work.creds = idr_find(&ctx->personality_idr, personality); + if (!req->work.creds) + return -EINVAL; + get_cred(req->work.creds); + } else { + req->work.creds = NULL; + } req->work.flags = 0; - req->work.personality = READ_ONCE(sqe->personality); state = &ctx->submit_state; /* -- GitLab From 3ae0415d0bb401abad1db7468105e3d3756e153f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 23 Feb 2021 09:16:45 -0300 Subject: [PATCH 227/839] tools headers UAPI: Update tools's copy of drm.h headers Picking the changes from: 0e0dc448005583a6 ("drm/doc: demote old doc-comments in drm.h") Silencing these perf build warnings: Warning: Kernel ABI header at 'tools/include/uapi/drm/drm.h' differs from latest version at 'include/uapi/drm/drm.h' diff -u tools/include/uapi/drm/drm.h include/uapi/drm/drm.h No changes in tooling as these are just C comment documentation changes. Cc: Simon Ser Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/drm/drm.h | 97 ++++++++++++++++++------------------ 1 file changed, 49 insertions(+), 48 deletions(-) diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h index 808b48a93330b..0827037c54847 100644 --- a/tools/include/uapi/drm/drm.h +++ b/tools/include/uapi/drm/drm.h @@ -1,11 +1,10 @@ -/** - * \file drm.h +/* * Header for the Direct Rendering Manager * - * \author Rickard E. (Rik) Faith + * Author: Rickard E. (Rik) Faith * - * \par Acknowledgments: - * Dec 1999, Richard Henderson , move to generic \c cmpxchg. + * Acknowledgments: + * Dec 1999, Richard Henderson , move to generic cmpxchg. */ /* @@ -85,7 +84,7 @@ typedef unsigned int drm_context_t; typedef unsigned int drm_drawable_t; typedef unsigned int drm_magic_t; -/** +/* * Cliprect. * * \warning: If you change this structure, make sure you change @@ -101,7 +100,7 @@ struct drm_clip_rect { unsigned short y2; }; -/** +/* * Drawable information. */ struct drm_drawable_info { @@ -109,7 +108,7 @@ struct drm_drawable_info { struct drm_clip_rect *rects; }; -/** +/* * Texture region, */ struct drm_tex_region { @@ -120,7 +119,7 @@ struct drm_tex_region { unsigned int age; }; -/** +/* * Hardware lock. * * The lock structure is a simple cache-line aligned integer. To avoid @@ -132,7 +131,7 @@ struct drm_hw_lock { char padding[60]; /**< Pad to cache line */ }; -/** +/* * DRM_IOCTL_VERSION ioctl argument type. * * \sa drmGetVersion(). @@ -149,7 +148,7 @@ struct drm_version { char __user *desc; /**< User-space buffer to hold desc */ }; -/** +/* * DRM_IOCTL_GET_UNIQUE ioctl argument type. * * \sa drmGetBusid() and drmSetBusId(). @@ -168,7 +167,7 @@ struct drm_block { int unused; }; -/** +/* * DRM_IOCTL_CONTROL ioctl argument type. * * \sa drmCtlInstHandler() and drmCtlUninstHandler(). @@ -183,7 +182,7 @@ struct drm_control { int irq; }; -/** +/* * Type of memory to map. */ enum drm_map_type { @@ -195,7 +194,7 @@ enum drm_map_type { _DRM_CONSISTENT = 5 /**< Consistent memory for PCI DMA */ }; -/** +/* * Memory mapping flags. */ enum drm_map_flags { @@ -214,7 +213,7 @@ struct drm_ctx_priv_map { void *handle; /**< Handle of map */ }; -/** +/* * DRM_IOCTL_GET_MAP, DRM_IOCTL_ADD_MAP and DRM_IOCTL_RM_MAP ioctls * argument type. * @@ -231,7 +230,7 @@ struct drm_map { /* Private data */ }; -/** +/* * DRM_IOCTL_GET_CLIENT ioctl argument type. */ struct drm_client { @@ -263,7 +262,7 @@ enum drm_stat_type { /* Add to the *END* of the list */ }; -/** +/* * DRM_IOCTL_GET_STATS ioctl argument type. */ struct drm_stats { @@ -274,7 +273,7 @@ struct drm_stats { } data[15]; }; -/** +/* * Hardware locking flags. */ enum drm_lock_flags { @@ -289,7 +288,7 @@ enum drm_lock_flags { _DRM_HALT_CUR_QUEUES = 0x20 /**< Halt all current queues */ }; -/** +/* * DRM_IOCTL_LOCK, DRM_IOCTL_UNLOCK and DRM_IOCTL_FINISH ioctl argument type. * * \sa drmGetLock() and drmUnlock(). @@ -299,7 +298,7 @@ struct drm_lock { enum drm_lock_flags flags; }; -/** +/* * DMA flags * * \warning @@ -328,7 +327,7 @@ enum drm_dma_flags { _DRM_DMA_LARGER_OK = 0x40 /**< Larger-than-requested buffers OK */ }; -/** +/* * DRM_IOCTL_ADD_BUFS and DRM_IOCTL_MARK_BUFS ioctl argument type. * * \sa drmAddBufs(). @@ -351,7 +350,7 @@ struct drm_buf_desc { */ }; -/** +/* * DRM_IOCTL_INFO_BUFS ioctl argument type. */ struct drm_buf_info { @@ -359,7 +358,7 @@ struct drm_buf_info { struct drm_buf_desc __user *list; }; -/** +/* * DRM_IOCTL_FREE_BUFS ioctl argument type. */ struct drm_buf_free { @@ -367,7 +366,7 @@ struct drm_buf_free { int __user *list; }; -/** +/* * Buffer information * * \sa drm_buf_map. @@ -379,7 +378,7 @@ struct drm_buf_pub { void __user *address; /**< Address of buffer */ }; -/** +/* * DRM_IOCTL_MAP_BUFS ioctl argument type. */ struct drm_buf_map { @@ -392,7 +391,7 @@ struct drm_buf_map { struct drm_buf_pub __user *list; /**< Buffer information */ }; -/** +/* * DRM_IOCTL_DMA ioctl argument type. * * Indices here refer to the offset into the buffer list in drm_buf_get. @@ -417,7 +416,7 @@ enum drm_ctx_flags { _DRM_CONTEXT_2DONLY = 0x02 }; -/** +/* * DRM_IOCTL_ADD_CTX ioctl argument type. * * \sa drmCreateContext() and drmDestroyContext(). @@ -427,7 +426,7 @@ struct drm_ctx { enum drm_ctx_flags flags; }; -/** +/* * DRM_IOCTL_RES_CTX ioctl argument type. */ struct drm_ctx_res { @@ -435,14 +434,14 @@ struct drm_ctx_res { struct drm_ctx __user *contexts; }; -/** +/* * DRM_IOCTL_ADD_DRAW and DRM_IOCTL_RM_DRAW ioctl argument type. */ struct drm_draw { drm_drawable_t handle; }; -/** +/* * DRM_IOCTL_UPDATE_DRAW ioctl argument type. */ typedef enum { @@ -456,14 +455,14 @@ struct drm_update_draw { unsigned long long data; }; -/** +/* * DRM_IOCTL_GET_MAGIC and DRM_IOCTL_AUTH_MAGIC ioctl argument type. */ struct drm_auth { drm_magic_t magic; }; -/** +/* * DRM_IOCTL_IRQ_BUSID ioctl argument type. * * \sa drmGetInterruptFromBusID(). @@ -505,7 +504,7 @@ struct drm_wait_vblank_reply { long tval_usec; }; -/** +/* * DRM_IOCTL_WAIT_VBLANK ioctl argument type. * * \sa drmWaitVBlank(). @@ -518,7 +517,7 @@ union drm_wait_vblank { #define _DRM_PRE_MODESET 1 #define _DRM_POST_MODESET 2 -/** +/* * DRM_IOCTL_MODESET_CTL ioctl argument type * * \sa drmModesetCtl(). @@ -528,7 +527,7 @@ struct drm_modeset_ctl { __u32 cmd; }; -/** +/* * DRM_IOCTL_AGP_ENABLE ioctl argument type. * * \sa drmAgpEnable(). @@ -537,7 +536,7 @@ struct drm_agp_mode { unsigned long mode; /**< AGP mode */ }; -/** +/* * DRM_IOCTL_AGP_ALLOC and DRM_IOCTL_AGP_FREE ioctls argument type. * * \sa drmAgpAlloc() and drmAgpFree(). @@ -549,7 +548,7 @@ struct drm_agp_buffer { unsigned long physical; /**< Physical used by i810 */ }; -/** +/* * DRM_IOCTL_AGP_BIND and DRM_IOCTL_AGP_UNBIND ioctls argument type. * * \sa drmAgpBind() and drmAgpUnbind(). @@ -559,7 +558,7 @@ struct drm_agp_binding { unsigned long offset; /**< In bytes -- will round to page boundary */ }; -/** +/* * DRM_IOCTL_AGP_INFO ioctl argument type. * * \sa drmAgpVersionMajor(), drmAgpVersionMinor(), drmAgpGetMode(), @@ -580,7 +579,7 @@ struct drm_agp_info { unsigned short id_device; }; -/** +/* * DRM_IOCTL_SG_ALLOC ioctl argument type. */ struct drm_scatter_gather { @@ -588,7 +587,7 @@ struct drm_scatter_gather { unsigned long handle; /**< Used for mapping / unmapping */ }; -/** +/* * DRM_IOCTL_SET_VERSION ioctl argument type. */ struct drm_set_version { @@ -598,14 +597,14 @@ struct drm_set_version { int drm_dd_minor; }; -/** DRM_IOCTL_GEM_CLOSE ioctl argument type */ +/* DRM_IOCTL_GEM_CLOSE ioctl argument type */ struct drm_gem_close { /** Handle of the object to be closed. */ __u32 handle; __u32 pad; }; -/** DRM_IOCTL_GEM_FLINK ioctl argument type */ +/* DRM_IOCTL_GEM_FLINK ioctl argument type */ struct drm_gem_flink { /** Handle for the object being named */ __u32 handle; @@ -614,7 +613,7 @@ struct drm_gem_flink { __u32 name; }; -/** DRM_IOCTL_GEM_OPEN ioctl argument type */ +/* DRM_IOCTL_GEM_OPEN ioctl argument type */ struct drm_gem_open { /** Name of object being opened */ __u32 name; @@ -652,7 +651,7 @@ struct drm_gem_open { #define DRM_CAP_SYNCOBJ 0x13 #define DRM_CAP_SYNCOBJ_TIMELINE 0x14 -/** DRM_IOCTL_GET_CAP ioctl argument type */ +/* DRM_IOCTL_GET_CAP ioctl argument type */ struct drm_get_cap { __u64 capability; __u64 value; @@ -678,7 +677,9 @@ struct drm_get_cap { /** * DRM_CLIENT_CAP_ATOMIC * - * If set to 1, the DRM core will expose atomic properties to userspace + * If set to 1, the DRM core will expose atomic properties to userspace. This + * implicitly enables &DRM_CLIENT_CAP_UNIVERSAL_PLANES and + * &DRM_CLIENT_CAP_ASPECT_RATIO. */ #define DRM_CLIENT_CAP_ATOMIC 3 @@ -698,7 +699,7 @@ struct drm_get_cap { */ #define DRM_CLIENT_CAP_WRITEBACK_CONNECTORS 5 -/** DRM_IOCTL_SET_CLIENT_CAP ioctl argument type */ +/* DRM_IOCTL_SET_CLIENT_CAP ioctl argument type */ struct drm_set_client_cap { __u64 capability; __u64 value; @@ -950,7 +951,7 @@ extern "C" { #define DRM_IOCTL_MODE_GETFB2 DRM_IOWR(0xCE, struct drm_mode_fb_cmd2) -/** +/* * Device specific ioctls should only be in their respective headers * The device specific ioctl range is from 0x40 to 0x9f. * Generic IOCTLS restart at 0xA0. @@ -961,7 +962,7 @@ extern "C" { #define DRM_COMMAND_BASE 0x40 #define DRM_COMMAND_END 0xA0 -/** +/* * Header for events written back to userspace on the drm fd. The * type defines the type of event, the length specifies the total * length of the event (including the header), and user_data is -- GitLab From c2446944b3f588d6a0186f2022a2999c90e0cb63 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 23 Feb 2021 09:21:00 -0300 Subject: [PATCH 228/839] tools headers UAPI: Sync drm/i915_drm.h with the kernel sources To pick the changes in: 8c3b1ba0e7ea9a80 ("drm/i915/gt: Track the overall awake/busy time") 348fb0cb0a79bce0 ("drm/i915/pmu: Deprecate I915_PMU_LAST and optimize state tracking") That don't result in any change in tooling: $ tools/perf/trace/beauty/drm_ioctl.sh > before $ cp include/uapi/drm/i915_drm.h tools/include/uapi/drm/i915_drm.h $ tools/perf/trace/beauty/drm_ioctl.sh > after $ diff -u before after $ Only silences this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/drm/i915_drm.h' differs from latest version at 'include/uapi/drm/i915_drm.h' diff -u tools/include/uapi/drm/i915_drm.h include/uapi/drm/i915_drm.h Cc: Chris Wilson Cc: Tvrtko Ursulin Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/drm/i915_drm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h index fa1f3d62f9a6c..1987e2ea79a3b 100644 --- a/tools/include/uapi/drm/i915_drm.h +++ b/tools/include/uapi/drm/i915_drm.h @@ -177,8 +177,9 @@ enum drm_i915_pmu_engine_sample { #define I915_PMU_REQUESTED_FREQUENCY __I915_PMU_OTHER(1) #define I915_PMU_INTERRUPTS __I915_PMU_OTHER(2) #define I915_PMU_RC6_RESIDENCY __I915_PMU_OTHER(3) +#define I915_PMU_SOFTWARE_GT_AWAKE_TIME __I915_PMU_OTHER(4) -#define I915_PMU_LAST I915_PMU_RC6_RESIDENCY +#define I915_PMU_LAST /* Deprecated - do not use */ I915_PMU_RC6_RESIDENCY /* Each region is a minimum of 16k, and there are at most 255 of them. */ -- GitLab From 1e61463cfcd0b3e7a19ba36b8a98c64ebaac5c6e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 23 Feb 2021 09:44:37 -0300 Subject: [PATCH 229/839] tools headers UAPI: Sync openat2.h with the kernel sources To pick the changes in: 99668f618062816c ("fs: expose LOOKUP_CACHED through openat2() RESOLVE_CACHED") That don't result in any change in tooling, only silences this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/openat2.h' differs from latest version at 'include/uapi/linux/openat2.h' diff -u tools/include/uapi/linux/openat2.h include/uapi/linux/openat2.h Cc: Al Viro Cc: Jens Axboe Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/openat2.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/include/uapi/linux/openat2.h b/tools/include/uapi/linux/openat2.h index 58b1eb7113600..a5feb76049487 100644 --- a/tools/include/uapi/linux/openat2.h +++ b/tools/include/uapi/linux/openat2.h @@ -35,5 +35,9 @@ struct open_how { #define RESOLVE_IN_ROOT 0x10 /* Make all jumps to "/" and ".." be scoped inside the dirfd (similar to chroot(2)). */ +#define RESOLVE_CACHED 0x20 /* Only complete if resolution can be + completed through cached lookup. May + return -EAGAIN if that's not + possible. */ #endif /* _UAPI_LINUX_OPENAT2_H */ -- GitLab From add76c0113ba6343a221f1ba1fa5edc8963db07c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 23 Feb 2021 09:48:05 -0300 Subject: [PATCH 230/839] perf arch powerpc: Sync powerpc syscall.tbl with the kernel sources To get the changes in: fbcee2ebe8edbb6a ("powerpc/32: Always save non volatile GPRs at syscall entry") That shouldn't cause any change in tooling, just silences the following tools/perf/ build warning: Warning: Kernel ABI header at 'tools/perf/arch/powerpc/entry/syscalls/syscall.tbl' differs from latest version at 'arch/powerpc/kernel/syscalls/syscall.tbl' Cc: Christophe Leroy Cc: Michael Ellerman Signed-off-by: Arnaldo Carvalho de Melo --- .../arch/powerpc/entry/syscalls/syscall.tbl | 20 +++++-------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index f744eb5cba887..96b2157f03719 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -9,9 +9,7 @@ # 0 nospu restart_syscall sys_restart_syscall 1 nospu exit sys_exit -2 32 fork ppc_fork sys_fork -2 64 fork sys_fork -2 spu fork sys_ni_syscall +2 nospu fork sys_fork 3 common read sys_read 4 common write sys_write 5 common open sys_open compat_sys_open @@ -160,9 +158,7 @@ 119 32 sigreturn sys_sigreturn compat_sys_sigreturn 119 64 sigreturn sys_ni_syscall 119 spu sigreturn sys_ni_syscall -120 32 clone ppc_clone sys_clone -120 64 clone sys_clone -120 spu clone sys_ni_syscall +120 nospu clone sys_clone 121 common setdomainname sys_setdomainname 122 common uname sys_newuname 123 common modify_ldt sys_ni_syscall @@ -244,9 +240,7 @@ 186 spu sendfile sys_sendfile64 187 common getpmsg sys_ni_syscall 188 common putpmsg sys_ni_syscall -189 32 vfork ppc_vfork sys_vfork -189 64 vfork sys_vfork -189 spu vfork sys_ni_syscall +189 nospu vfork sys_vfork 190 common ugetrlimit sys_getrlimit compat_sys_getrlimit 191 common readahead sys_readahead compat_sys_readahead 192 32 mmap2 sys_mmap2 compat_sys_mmap2 @@ -322,9 +316,7 @@ 248 32 clock_nanosleep sys_clock_nanosleep_time32 248 64 clock_nanosleep sys_clock_nanosleep 248 spu clock_nanosleep sys_clock_nanosleep -249 32 swapcontext ppc_swapcontext compat_sys_swapcontext -249 64 swapcontext sys_swapcontext -249 spu swapcontext sys_ni_syscall +249 nospu swapcontext sys_swapcontext compat_sys_swapcontext 250 common tgkill sys_tgkill 251 32 utimes sys_utimes_time32 251 64 utimes sys_utimes @@ -522,9 +514,7 @@ 432 common fsmount sys_fsmount 433 common fspick sys_fspick 434 common pidfd_open sys_pidfd_open -435 32 clone3 ppc_clone3 sys_clone3 -435 64 clone3 sys_clone3 -435 spu clone3 sys_ni_syscall +435 nospu clone3 sys_clone3 436 common close_range sys_close_range 437 common openat2 sys_openat2 438 common pidfd_getfd sys_pidfd_getfd -- GitLab From 303550a44741de7e853d1c0f1d252a8719a88cb1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 23 Feb 2021 09:51:17 -0300 Subject: [PATCH 231/839] tools headers UAPI s390: Sync ptrace.h kernel headers To pick up the changes from: 56e62a7370283601 ("s390: convert to generic entry") That only adds two new defines, so shouldn't cause problems when building the BPF selftests. Silencing this perf build warning: Warning: Kernel ABI header at 'tools/arch/s390/include/uapi/asm/ptrace.h' differs from latest version at 'arch/s390/include/uapi/asm/ptrace.h' diff -u tools/arch/s390/include/uapi/asm/ptrace.h arch/s390/include/uapi/asm/ptrace.h Cc: Hendrik Brueckner Cc: Sven Schnelle Cc: Vasily Gorbik Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/s390/include/uapi/asm/ptrace.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/arch/s390/include/uapi/asm/ptrace.h b/tools/arch/s390/include/uapi/asm/ptrace.h index 543dd70e12c81..ad64d673b5e6b 100644 --- a/tools/arch/s390/include/uapi/asm/ptrace.h +++ b/tools/arch/s390/include/uapi/asm/ptrace.h @@ -179,8 +179,9 @@ #define ACR_SIZE 4 -#define PTRACE_OLDSETOPTIONS 21 - +#define PTRACE_OLDSETOPTIONS 21 +#define PTRACE_SYSEMU 31 +#define PTRACE_SYSEMU_SINGLESTEP 32 #ifndef __ASSEMBLY__ #include #include -- GitLab From 21b7e35bdf0a0e44525ec4e8a7862eb4a8df8ebe Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 23 Feb 2021 09:56:50 -0300 Subject: [PATCH 232/839] tools headers UAPI: Sync kvm.h headers with the kernel sources To pick the changes in: d9a47edabc4f9481 ("KVM: PPC: Book3S HV: Introduce new capability for 2nd DAWR") 8d4e7e80838f45d3 ("KVM: x86: declare Xen HVM shared info capability and add test case") 40da8ccd724f7ca2 ("KVM: x86/xen: Add event channel interrupt vector upcall") These new IOCTLs are now supported on 'perf trace': $ tools/perf/trace/beauty/kvm_ioctl.sh > before $ cp include/uapi/linux/kvm.h tools/include/uapi/linux/kvm.h $ tools/perf/trace/beauty/kvm_ioctl.sh > after $ diff -u before after --- before 2021-02-23 09:55:46.229058308 -0300 +++ after 2021-02-23 09:55:57.509308058 -0300 @@ -91,6 +91,10 @@ [0xc1] = "GET_SUPPORTED_HV_CPUID", [0xc6] = "X86_SET_MSR_FILTER", [0xc7] = "RESET_DIRTY_RINGS", + [0xc8] = "XEN_HVM_GET_ATTR", + [0xc9] = "XEN_HVM_SET_ATTR", + [0xca] = "XEN_VCPU_GET_ATTR", + [0xcb] = "XEN_VCPU_SET_ATTR", [0xe0] = "CREATE_DEVICE", [0xe1] = "SET_DEVICE_ATTR", [0xe2] = "GET_DEVICE_ATTR", $ Addressing this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/kvm.h' differs from latest version at 'include/uapi/linux/kvm.h' diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h Cc: David Woodhouse Cc: Paul Mackerras Cc: Ravi Bangoria Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/kvm.h | 73 ++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index abb89bbe56354..8b281f722e5bd 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -216,6 +216,20 @@ struct kvm_hyperv_exit { } u; }; +struct kvm_xen_exit { +#define KVM_EXIT_XEN_HCALL 1 + __u32 type; + union { + struct { + __u32 longmode; + __u32 cpl; + __u64 input; + __u64 result; + __u64 params[6]; + } hcall; + } u; +}; + #define KVM_S390_GET_SKEYS_NONE 1 #define KVM_S390_SKEYS_MAX 1048576 @@ -252,6 +266,8 @@ struct kvm_hyperv_exit { #define KVM_EXIT_X86_WRMSR 30 #define KVM_EXIT_DIRTY_RING_FULL 31 #define KVM_EXIT_AP_RESET_HOLD 32 +#define KVM_EXIT_X86_BUS_LOCK 33 +#define KVM_EXIT_XEN 34 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -428,6 +444,8 @@ struct kvm_run { __u32 index; /* kernel -> user */ __u64 data; /* kernel <-> user */ } msr; + /* KVM_EXIT_XEN */ + struct kvm_xen_exit xen; /* Fix the size of the union. */ char padding[256]; }; @@ -1058,6 +1076,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190 #define KVM_CAP_SYS_HYPERV_CPUID 191 #define KVM_CAP_DIRTY_LOG_RING 192 +#define KVM_CAP_X86_BUS_LOCK_EXIT 193 #define KVM_CAP_PPC_DAWR1 194 #ifdef KVM_CAP_IRQ_ROUTING @@ -1132,6 +1151,10 @@ struct kvm_x86_mce { #endif #ifdef KVM_CAP_XEN_HVM +#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0) +#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1) +#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2) + struct kvm_xen_hvm_config { __u32 flags; __u32 msr; @@ -1566,6 +1589,45 @@ struct kvm_pv_cmd { /* Available with KVM_CAP_DIRTY_LOG_RING */ #define KVM_RESET_DIRTY_RINGS _IO(KVMIO, 0xc7) +/* Per-VM Xen attributes */ +#define KVM_XEN_HVM_GET_ATTR _IOWR(KVMIO, 0xc8, struct kvm_xen_hvm_attr) +#define KVM_XEN_HVM_SET_ATTR _IOW(KVMIO, 0xc9, struct kvm_xen_hvm_attr) + +struct kvm_xen_hvm_attr { + __u16 type; + __u16 pad[3]; + union { + __u8 long_mode; + __u8 vector; + struct { + __u64 gfn; + } shared_info; + __u64 pad[8]; + } u; +}; + +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ +#define KVM_XEN_ATTR_TYPE_LONG_MODE 0x0 +#define KVM_XEN_ATTR_TYPE_SHARED_INFO 0x1 +#define KVM_XEN_ATTR_TYPE_UPCALL_VECTOR 0x2 + +/* Per-vCPU Xen attributes */ +#define KVM_XEN_VCPU_GET_ATTR _IOWR(KVMIO, 0xca, struct kvm_xen_vcpu_attr) +#define KVM_XEN_VCPU_SET_ATTR _IOW(KVMIO, 0xcb, struct kvm_xen_vcpu_attr) + +struct kvm_xen_vcpu_attr { + __u16 type; + __u16 pad[3]; + union { + __u64 gpa; + __u64 pad[8]; + } u; +}; + +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ +#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO 0x0 +#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO 0x1 + /* Secure Encrypted Virtualization command */ enum sev_cmd_id { /* Guest initialization commands */ @@ -1594,6 +1656,8 @@ enum sev_cmd_id { KVM_SEV_DBG_ENCRYPT, /* Guest certificates commands */ KVM_SEV_CERT_EXPORT, + /* Attestation report */ + KVM_SEV_GET_ATTESTATION_REPORT, KVM_SEV_NR_MAX, }; @@ -1646,6 +1710,12 @@ struct kvm_sev_dbg { __u32 len; }; +struct kvm_sev_attestation_report { + __u8 mnonce[16]; + __u64 uaddr; + __u32 len; +}; + #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) #define KVM_DEV_ASSIGN_MASK_INTX (1 << 2) @@ -1767,4 +1837,7 @@ struct kvm_dirty_gfn { __u64 offset; }; +#define KVM_BUS_LOCK_DETECTION_OFF (1 << 0) +#define KVM_BUS_LOCK_DETECTION_EXIT (1 << 1) + #endif /* __LINUX_KVM_H */ -- GitLab From ded2e511a8af9f14482b11225f73db63231fc7a4 Mon Sep 17 00:00:00 2001 From: Pierre Gondois Date: Wed, 24 Feb 2021 18:24:10 +0000 Subject: [PATCH 233/839] perf tools: Cast (struct timeval).tv_sec when printing The musl-libc [1] defines (struct timeval).tv_sec as a 'long long' for arm and other architectures. The default build having a '-Wformat' flag, not casting the field when printing prevents from building perf. This patch casts the (struct timeval).tv_sec fields to the expected format. [1] git://git.musl-libc.org/musl Signed-off-by: Pierre Gondois Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Douglas.raillard@arm.com Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20210224182410.5366-1-Pierre.Gondois@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/sched-messaging.c | 4 ++-- tools/perf/bench/sched-pipe.c | 4 ++-- tools/perf/bench/syscall.c | 4 ++-- tools/perf/util/header.c | 4 ++-- tools/perf/util/stat-display.c | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c index cecce93ccc636..488f6e6ba1a55 100644 --- a/tools/perf/bench/sched-messaging.c +++ b/tools/perf/bench/sched-messaging.c @@ -309,11 +309,11 @@ int bench_sched_messaging(int argc, const char **argv) num_groups, num_groups * 2 * num_fds, thread_mode ? "threads" : "processes"); printf(" %14s: %lu.%03lu [sec]\n", "Total time", - diff.tv_sec, + (unsigned long) diff.tv_sec, (unsigned long) (diff.tv_usec / USEC_PER_MSEC)); break; case BENCH_FORMAT_SIMPLE: - printf("%lu.%03lu\n", diff.tv_sec, + printf("%lu.%03lu\n", (unsigned long) diff.tv_sec, (unsigned long) (diff.tv_usec / USEC_PER_MSEC)); break; default: diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c index 3c88d1f201f1c..a960e7a93aecf 100644 --- a/tools/perf/bench/sched-pipe.c +++ b/tools/perf/bench/sched-pipe.c @@ -156,7 +156,7 @@ int bench_sched_pipe(int argc, const char **argv) result_usec += diff.tv_usec; printf(" %14s: %lu.%03lu [sec]\n\n", "Total time", - diff.tv_sec, + (unsigned long) diff.tv_sec, (unsigned long) (diff.tv_usec / USEC_PER_MSEC)); printf(" %14lf usecs/op\n", @@ -168,7 +168,7 @@ int bench_sched_pipe(int argc, const char **argv) case BENCH_FORMAT_SIMPLE: printf("%lu.%03lu\n", - diff.tv_sec, + (unsigned long) diff.tv_sec, (unsigned long) (diff.tv_usec / USEC_PER_MSEC)); break; diff --git a/tools/perf/bench/syscall.c b/tools/perf/bench/syscall.c index 5fe621cff8e93..9b751016f4b65 100644 --- a/tools/perf/bench/syscall.c +++ b/tools/perf/bench/syscall.c @@ -54,7 +54,7 @@ int bench_syscall_basic(int argc, const char **argv) result_usec += diff.tv_usec; printf(" %14s: %lu.%03lu [sec]\n\n", "Total time", - diff.tv_sec, + (unsigned long) diff.tv_sec, (unsigned long) (diff.tv_usec/1000)); printf(" %14lf usecs/op\n", @@ -66,7 +66,7 @@ int bench_syscall_basic(int argc, const char **argv) case BENCH_FORMAT_SIMPLE: printf("%lu.%03lu\n", - diff.tv_sec, + (unsigned long) diff.tv_sec, (unsigned long) (diff.tv_usec / 1000)); break; diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 4fe9e2a54346e..20effdff76ceb 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1618,8 +1618,8 @@ static void print_clock_data(struct feat_fd *ff, FILE *fp) fprintf(fp, "# clockid: %s (%u)\n", clockid_name(clockid), clockid); fprintf(fp, "# reference time: %s = %ld.%06d (TOD) = %ld.%09ld (%s)\n", - tstr, tod_ns.tv_sec, (int) tod_ns.tv_usec, - clockid_ns.tv_sec, clockid_ns.tv_nsec, + tstr, (long) tod_ns.tv_sec, (int) tod_ns.tv_usec, + (long) clockid_ns.tv_sec, clockid_ns.tv_nsec, clockid_name(clockid)); } diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index cce7a76d6473c..7f09cdaf5b600 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -983,7 +983,7 @@ static void print_interval(struct perf_stat_config *config, if (config->interval_clear) puts(CONSOLE_CLEAR); - sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, config->csv_sep); + sprintf(prefix, "%6lu.%09lu%s", (unsigned long) ts->tv_sec, ts->tv_nsec, config->csv_sep); if ((num_print_interval == 0 && !config->csv_output) || config->interval_clear) { switch (config->aggr_mode) { -- GitLab From 762323eb39a257c3b9875172d5ee134bd448692c Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 24 Feb 2021 16:08:31 +0100 Subject: [PATCH 234/839] perf build: Move feature cleanup under tools/build Arnaldo reported issue for following build command: $ rm -rf /tmp/krava; mkdir /tmp/krava; make O=/tmp/krava clean CLEAN config /bin/sh: line 0: cd: /tmp/krava/feature/: No such file or directory ../../scripts/Makefile.include:17: *** output directory "/tmp/krava/feature/" does not exist. Stop. make[1]: *** [Makefile.perf:1010: config-clean] Error 2 make: *** [Makefile:90: clean] Error 2 The problem is that now that we include scripts/Makefile.include in feature's Makefile (which is fine and needed), we need to ensure the OUTPUT directory exists, before executing (out of tree) clean command. Removing the feature's cleanup from perf Makefile and fixing feature's cleanup under build Makefile, so it now checks that there's existing OUTPUT directory before calling the clean. Fixes: 211a741cd3e1 ("tools: Factor Clang, LLC and LLVM utils definitions") Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Tested-by: Sedat Dilek # LLVM/Clang v13-git Cc: Alexander Shishkin Cc: Ian Rogers Cc: Mark Rutland Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20210224150831.409639-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/Makefile | 8 +++++++- tools/perf/Makefile.perf | 10 +--------- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/tools/build/Makefile b/tools/build/Makefile index bae48e6fa9952..5ed41b96fcded 100644 --- a/tools/build/Makefile +++ b/tools/build/Makefile @@ -30,12 +30,18 @@ build := -f $(srctree)/tools/build/Makefile.build dir=. obj all: $(OUTPUT)fixdep +# Make sure there's anything to clean, +# feature contains check for existing OUTPUT +TMP_O := $(if $(OUTPUT),$(OUTPUT)/feature,./) + clean: $(call QUIET_CLEAN, fixdep) $(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete $(Q)rm -f $(OUTPUT)fixdep $(call QUIET_CLEAN, feature-detect) - $(Q)$(MAKE) -C feature/ clean >/dev/null +ifneq ($(wildcard $(TMP_O)),) + $(Q)$(MAKE) -C feature OUTPUT=$(TMP_O) clean >/dev/null +endif $(OUTPUT)fixdep-in.o: FORCE $(Q)$(MAKE) $(build)=fixdep diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 5345ac70cd832..536f6f90af92a 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -1001,14 +1001,6 @@ $(INSTALL_DOC_TARGETS): ### Cleaning rules -# -# This is here, not in Makefile.config, because Makefile.config does -# not get included for the clean target: -# -config-clean: - $(call QUIET_CLEAN, config) - $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ $(if $(OUTPUT),OUTPUT=$(OUTPUT)feature/,) clean >/dev/null - python-clean: $(python-clean) @@ -1048,7 +1040,7 @@ endif # BUILD_BPF_SKEL bpf-skel-clean: $(call QUIET_CLEAN, bpf-skel) $(RM) -r $(SKEL_TMP_OUT) $(SKELETONS) -clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBPERF)-clean config-clean fixdep-clean python-clean bpf-skel-clean +clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBPERF)-clean fixdep-clean python-clean bpf-skel-clean $(call QUIET_CLEAN, core-objs) $(RM) $(LIBPERF_A) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS) $(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete $(Q)$(RM) $(OUTPUT).config-detected -- GitLab From 2b1919ec8338fad3e950f264c0c81f8b17eb6c7e Mon Sep 17 00:00:00 2001 From: Andreas Wendleder Date: Mon, 1 Mar 2021 19:56:42 +0100 Subject: [PATCH 235/839] perf tools: Clean 'generated' directory used for creating the syscall table on x86 Remove generated directory tools/perf/arch/x86/include/generated. Signed-off-by: Andreas Wendleder Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20210301185642.163396-1-gonsolo@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/Makefile | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tools/perf/arch/x86/Makefile b/tools/perf/arch/x86/Makefile index 8cc6642fce7a6..5a9f9a7bf07d1 100644 --- a/tools/perf/arch/x86/Makefile +++ b/tools/perf/arch/x86/Makefile @@ -10,10 +10,11 @@ PERF_HAVE_JITDUMP := 1 # Syscall table generation # -out := $(OUTPUT)arch/x86/include/generated/asm -header := $(out)/syscalls_64.c -sys := $(srctree)/tools/perf/arch/x86/entry/syscalls -systbl := $(sys)/syscalltbl.sh +generated := $(OUTPUT)arch/x86/include/generated +out := $(generated)/asm +header := $(out)/syscalls_64.c +sys := $(srctree)/tools/perf/arch/x86/entry/syscalls +systbl := $(sys)/syscalltbl.sh # Create output directory if not already present _dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') @@ -22,6 +23,6 @@ $(header): $(sys)/syscall_64.tbl $(systbl) $(Q)$(SHELL) '$(systbl)' $(sys)/syscall_64.tbl 'x86_64' > $@ clean:: - $(call QUIET_CLEAN, x86) $(RM) $(header) + $(call QUIET_CLEAN, x86) $(RM) -r $(header) $(generated) archheaders: $(header) -- GitLab From ffc52b7ae5e6ff2b57c05fa8954fd4cae4efaab4 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Tue, 2 Mar 2021 02:35:33 +0000 Subject: [PATCH 236/839] perf diff: Don't crash on freeing errno-session on the error path __cmd_diff() sets result of perf_session__new() to d->session. In case of failure, it's errno and perf-diff may crash with: failed to open perf.data: Permission denied Failed to open perf.data Segmentation fault (core dumped) From the coredump: 0 0x00005569a62b5955 in auxtrace__free (session=0xffffffffffffffff) at util/auxtrace.c:2681 1 0x00005569a626b37d in perf_session__delete (session=0xffffffffffffffff) at util/session.c:295 2 perf_session__delete (session=0xffffffffffffffff) at util/session.c:291 3 0x00005569a618008a in __cmd_diff () at builtin-diff.c:1239 4 cmd_diff (argc=, argv=) at builtin-diff.c:2011 [..] Funny enough, it won't always crash. For me it crashes only if failed file is second in cmd-line: the reason is that cmd_diff() check files for branch-stacks [in check_file_brstack()] and if the first file doesn't have brstacks, it doesn't proceed to try open other files from cmd-line. Check d->session before calling perf_session__delete(). Another solution would be assigning to temporary variable, checking it, but I find it easier to follow with IS_ERR() check in the same function. After some time it's still obvious why the check is needed, and with temp variable it's possible to make the same mistake. Committer testing: $ perf record sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.001 MB perf.data (8 samples) ] $ perf diff failed to open perf.data.old: No such file or directory Failed to open perf.data.old $ perf record sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.001 MB perf.data (8 samples) ] $ perf diff # Event 'cycles:u' # # Baseline Delta Abs Shared Object Symbol # ........ ......... ................ .......................... # 0.92% +87.66% [unknown] [k] 0xffffffff8825de16 11.39% +0.04% ld-2.32.so [.] __GI___tunables_init 87.70% ld-2.32.so [.] _dl_check_map_versions $ sudo chown root:root perf.data [sudo] password for acme: $ perf diff failed to open perf.data: Permission denied Failed to open perf.data Segmentation fault (core dumped) $ After the patch: $ perf diff failed to open perf.data: Permission denied Failed to open perf.data $ Signed-off-by: Dmitry Safonov Acked-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Dmitry Safonov <0x7f454c46@gmail.com> Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Cc: dmitry safonov Link: http://lore.kernel.org/lkml/20210302023533.1572231-1-dima@arista.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-diff.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 8f6c784ce629c..878e04b1fab74 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -1236,7 +1236,8 @@ static int __cmd_diff(void) out_delete: data__for_each_file(i, d) { - perf_session__delete(d->session); + if (!IS_ERR(d->session)) + perf_session__delete(d->session); data__free(d); } -- GitLab From 394e4306b093d037bddcee7e1f0e8e6c53a558fc Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Thu, 25 Feb 2021 11:50:02 -0500 Subject: [PATCH 237/839] perf bench numa: Fix the condition checks for max number of NUMA nodes In systems having higher node numbers available like node 255, perf numa bench will fail with SIGABORT. <<>> perf: bench/numa.c:1416: init: Assertion `!(g->p.nr_nodes > 64 || g->p.nr_nodes < 0)' failed. Aborted (core dumped) <<>> Snippet from 'numactl -H' below on a powerpc system where the highest node number available is 255: available: 6 nodes (0,8,252-255) node 0 cpus: node 0 size: 519587 MB node 0 free: 516659 MB node 8 cpus: node 8 size: 523607 MB node 8 free: 486757 MB node 252 cpus: node 252 size: 0 MB node 252 free: 0 MB node 253 cpus: node 253 size: 0 MB node 253 free: 0 MB node 254 cpus: node 254 size: 0 MB node 254 free: 0 MB node 255 cpus: node 255 size: 0 MB node 255 free: 0 MB node distances: node 0 8 252 253 254 255 Note: expands to actual cpu list in the original output. These nodes 252-255 are to represent the memory on GPUs and are valid nodes. The perf numa bench init code has a condition check to see if the number of NUMA nodes (nr_nodes) exceeds MAX_NR_NODES. The value of MAX_NR_NODES defined in perf code is 64. And the 'nr_nodes' is the value from numa_max_node() which represents the highest node number available in the system. In some systems where we could have NUMA node 255, this condition check fails and results in SIGABORT. The numa benchmark uses static value of MAX_NR_NODES in the code to represent size of two NUMA node arrays and node bitmask used for setting memory policy. Patch adds a fix to dynamically allocate size for the two arrays and bitmask value based on the node numbers available in the system. With the fix, perf numa benchmark will work with node configuration on any system and thus removes the static MAX_NR_NODES value. Signed-off-by: Athira Jajeev Reviewed-by: Srikar Dronamraju Cc: Jiri Olsa Cc: Kajol Jain Cc: Kan Liang Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: linuxppc-dev@lists.ozlabs.org Link: http://lore.kernel.org/lkml/1614271802-1503-1-git-send-email-atrajeev@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/numa.c | 42 ++++++++++++++++++++++++++++------------- 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index 11726ec6285f3..20b87e29c96f4 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -344,18 +344,22 @@ static void mempol_restore(void) static void bind_to_memnode(int node) { - unsigned long nodemask; + struct bitmask *node_mask; int ret; if (node == NUMA_NO_NODE) return; - BUG_ON(g->p.nr_nodes > (int)sizeof(nodemask)*8); - nodemask = 1L << node; + node_mask = numa_allocate_nodemask(); + BUG_ON(!node_mask); - ret = set_mempolicy(MPOL_BIND, &nodemask, sizeof(nodemask)*8); - dprintf("binding to node %d, mask: %016lx => %d\n", node, nodemask, ret); + numa_bitmask_clearall(node_mask); + numa_bitmask_setbit(node_mask, node); + ret = set_mempolicy(MPOL_BIND, node_mask->maskp, node_mask->size + 1); + dprintf("binding to node %d, mask: %016lx => %d\n", node, *node_mask->maskp, ret); + + numa_bitmask_free(node_mask); BUG_ON(ret); } @@ -876,8 +880,6 @@ static void update_curr_cpu(int task_nr, unsigned long bytes_worked) prctl(0, bytes_worked); } -#define MAX_NR_NODES 64 - /* * Count the number of nodes a process's threads * are spread out on. @@ -888,10 +890,15 @@ static void update_curr_cpu(int task_nr, unsigned long bytes_worked) */ static int count_process_nodes(int process_nr) { - char node_present[MAX_NR_NODES] = { 0, }; + char *node_present; int nodes; int n, t; + node_present = (char *)malloc(g->p.nr_nodes * sizeof(char)); + BUG_ON(!node_present); + for (nodes = 0; nodes < g->p.nr_nodes; nodes++) + node_present[nodes] = 0; + for (t = 0; t < g->p.nr_threads; t++) { struct thread_data *td; int task_nr; @@ -901,17 +908,20 @@ static int count_process_nodes(int process_nr) td = g->threads + task_nr; node = numa_node_of_cpu(td->curr_cpu); - if (node < 0) /* curr_cpu was likely still -1 */ + if (node < 0) /* curr_cpu was likely still -1 */ { + free(node_present); return 0; + } node_present[node] = 1; } nodes = 0; - for (n = 0; n < MAX_NR_NODES; n++) + for (n = 0; n < g->p.nr_nodes; n++) nodes += node_present[n]; + free(node_present); return nodes; } @@ -980,7 +990,7 @@ static void calc_convergence(double runtime_ns_max, double *convergence) { unsigned int loops_done_min, loops_done_max; int process_groups; - int nodes[MAX_NR_NODES]; + int *nodes; int distance; int nr_min; int nr_max; @@ -994,6 +1004,8 @@ static void calc_convergence(double runtime_ns_max, double *convergence) if (!g->p.show_convergence && !g->p.measure_convergence) return; + nodes = (int *)malloc(g->p.nr_nodes * sizeof(int)); + BUG_ON(!nodes); for (node = 0; node < g->p.nr_nodes; node++) nodes[node] = 0; @@ -1035,8 +1047,10 @@ static void calc_convergence(double runtime_ns_max, double *convergence) BUG_ON(sum > g->p.nr_tasks); - if (0 && (sum < g->p.nr_tasks)) + if (0 && (sum < g->p.nr_tasks)) { + free(nodes); return; + } /* * Count the number of distinct process groups present @@ -1088,6 +1102,8 @@ static void calc_convergence(double runtime_ns_max, double *convergence) } tprintf("\n"); } + + free(nodes); } static void show_summary(double runtime_ns_max, int l, double *convergence) @@ -1413,7 +1429,7 @@ static int init(void) g->p.nr_nodes = numa_max_node() + 1; /* char array in count_process_nodes(): */ - BUG_ON(g->p.nr_nodes > MAX_NR_NODES || g->p.nr_nodes < 0); + BUG_ON(g->p.nr_nodes < 0); if (g->p.show_quiet && !g->p.show_details) g->p.show_details = -1; -- GitLab From 137a5258939aca56558f3a23eb229b9c4b293917 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 26 Feb 2021 14:14:31 -0800 Subject: [PATCH 238/839] perf traceevent: Ensure read cmdlines are null terminated. Issue detected by address sanitizer. Fixes: cd4ceb63438e9e28 ("perf util: Save pid-cmdline mapping into tracing header") Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20210226221431.1985458-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/trace-event-read.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c index f507dff713c9f..8a01af783310a 100644 --- a/tools/perf/util/trace-event-read.c +++ b/tools/perf/util/trace-event-read.c @@ -361,6 +361,7 @@ static int read_saved_cmdline(struct tep_handle *pevent) pr_debug("error reading saved cmdlines\n"); goto out; } + buf[ret] = '\0'; parse_saved_cmdline(pevent, buf, size); ret = 0; -- GitLab From b55ff1d1456c86209ba28fd06b1b5fb0e05d92c3 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 26 Feb 2021 10:31:44 -0800 Subject: [PATCH 239/839] perf tools: Fix documentation of verbose options Option doesn't take a value, make sure the man pages agree. For example: $ perf evlist --verbose=1 Error: option `verbose' takes no value Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20210226183145.1878782-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-evlist.txt | 2 +- tools/perf/Documentation/perf-ftrace.txt | 4 ++-- tools/perf/Documentation/perf-kallsyms.txt | 2 +- tools/perf/Documentation/perf-trace.txt | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/perf/Documentation/perf-evlist.txt b/tools/perf/Documentation/perf-evlist.txt index c0a66400a960d..9af8b8dfb7b60 100644 --- a/tools/perf/Documentation/perf-evlist.txt +++ b/tools/perf/Documentation/perf-evlist.txt @@ -29,7 +29,7 @@ OPTIONS Show just the sample frequency used for each event. -v:: ---verbose=:: +--verbose:: Show all fields. -g:: diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt index 1e91121bac0f3..6e82b7cc0bf0f 100644 --- a/tools/perf/Documentation/perf-ftrace.txt +++ b/tools/perf/Documentation/perf-ftrace.txt @@ -28,8 +28,8 @@ OPTIONS specified: function_graph or function. -v:: ---verbose=:: - Verbosity level. +--verbose:: + Increase the verbosity level. -F:: --funcs:: diff --git a/tools/perf/Documentation/perf-kallsyms.txt b/tools/perf/Documentation/perf-kallsyms.txt index f3c620951f6eb..c97527df8ecd2 100644 --- a/tools/perf/Documentation/perf-kallsyms.txt +++ b/tools/perf/Documentation/perf-kallsyms.txt @@ -20,5 +20,5 @@ modules). OPTIONS ------- -v:: ---verbose=:: +--verbose:: Increase verbosity level, showing details about symbol table loading, etc. diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index abc9b5d833128..f0da8cf63e9a9 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt @@ -97,8 +97,8 @@ filter out the startup phase of the program, which is often very different. Filter out events for these pids and for 'trace' itself (comma separated list). -v:: ---verbose=:: - Verbosity level. +--verbose:: + Increase the verbosity level. --no-inherit:: Child tasks do not inherit counters. -- GitLab From dacfc08dcafa7d443ab339592999e37bbb8a3ef0 Mon Sep 17 00:00:00 2001 From: Antonio Terceiro Date: Wed, 24 Feb 2021 10:00:46 -0300 Subject: [PATCH 240/839] perf build: Fix ccache usage in $(CC) when generating arch errno table This was introduced by commit e4ffd066ff440a57 ("perf: Normalize gcc parameter when generating arch errno table"). Assuming the first word of $(CC) is the actual compiler breaks usage like CC="ccache gcc": the script ends up calling ccache directly with gcc arguments, what fails. Instead of getting the first word, just remove from $(CC) any word that starts with a "-". This maintains the spirit of the original patch, while not breaking ccache users. Fixes: e4ffd066ff440a57 ("perf: Normalize gcc parameter when generating arch errno table") Signed-off-by: Antonio Terceiro Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: He Zhe Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: stable@vger.kernel.org Link: http://lore.kernel.org/lkml/20210224130046.346977-1-antonio.terceiro@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 536f6f90af92a..f6e609673de2b 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -607,7 +607,7 @@ arch_errno_hdr_dir := $(srctree)/tools arch_errno_tbl := $(srctree)/tools/perf/trace/beauty/arch_errno_names.sh $(arch_errno_name_array): $(arch_errno_tbl) - $(Q)$(SHELL) '$(arch_errno_tbl)' $(firstword $(CC)) $(arch_errno_hdr_dir) > $@ + $(Q)$(SHELL) '$(arch_errno_tbl)' '$(patsubst -%,,$(CC))' $(arch_errno_hdr_dir) > $@ sync_file_range_arrays := $(beauty_outdir)/sync_file_range_arrays.c sync_file_range_tbls := $(srctree)/tools/perf/trace/beauty/sync_file_range.sh -- GitLab From 31bf4e7cb61363b87f1606ec8efb71eebd6393cf Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 1 Mar 2021 13:25:09 +0100 Subject: [PATCH 241/839] perf daemon: Fix control fifo permissions Add proper mode for mkfifo calls to get read and write permissions for user. We can't use O_RDWR in here, changing to standard permission value. Fixes: 6a6d1804a190 ("perf daemon: Set control fifo for session") Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ian Rogers Cc: John Garry Cc: Mark Rutland Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20210301122510.64402-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-daemon.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-daemon.c b/tools/perf/builtin-daemon.c index 617feaf020f65..8f0ed2e592808 100644 --- a/tools/perf/builtin-daemon.c +++ b/tools/perf/builtin-daemon.c @@ -373,12 +373,12 @@ static int daemon_session__run(struct daemon_session *session, dup2(fd, 2); close(fd); - if (mkfifo(SESSION_CONTROL, O_RDWR) && errno != EEXIST) { + if (mkfifo(SESSION_CONTROL, 0600) && errno != EEXIST) { perror("failed: create control fifo"); return -1; } - if (mkfifo(SESSION_ACK, O_RDWR) && errno != EEXIST) { + if (mkfifo(SESSION_ACK, 0600) && errno != EEXIST) { perror("failed: create ack fifo"); return -1; } -- GitLab From 36bc511f63fd21c0c44f973c6d064c1228ba15ae Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 1 Mar 2021 13:25:10 +0100 Subject: [PATCH 242/839] perf daemon: Fix running test for non root user John reported that the daemon test is not working for non root user. Changing the tests configurations so it's allowed to run under normal user. Fixes: 2291bb915b55 ("perf tests: Add daemon 'list' command test") Reported-by: John Garry Signed-off-by: Jiri Olsa Tested-by: John Garry Cc: Alexander Shishkin Cc: Ian Rogers Cc: Mark Rutland Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20210301122510.64402-2-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/daemon.sh | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/tools/perf/tests/shell/daemon.sh b/tools/perf/tests/shell/daemon.sh index e5b824dd08d9d..5ad3ca8d681b7 100755 --- a/tools/perf/tests/shell/daemon.sh +++ b/tools/perf/tests/shell/daemon.sh @@ -140,10 +140,10 @@ test_list() base=BASE [session-size] -run = -e cpu-clock +run = -e cpu-clock -m 1 sleep 10 [session-time] -run = -e task-clock +run = -e task-clock -m 1 sleep 10 EOF sed -i -e "s|BASE|${base}|" ${config} @@ -159,14 +159,14 @@ EOF # check 1st session # pid:size:-e cpu-clock:base/size:base/size/output:base/size/control:base/size/ack:0 local line=`perf daemon --config ${config} -x: | head -2 | tail -1` - check_line_other "${line}" size "-e cpu-clock" ${base}/session-size \ + check_line_other "${line}" size "-e cpu-clock -m 1 sleep 10" ${base}/session-size \ ${base}/session-size/output ${base}/session-size/control \ ${base}/session-size/ack "0" # check 2nd session # pid:time:-e task-clock:base/time:base/time/output:base/time/control:base/time/ack:0 local line=`perf daemon --config ${config} -x: | head -3 | tail -1` - check_line_other "${line}" time "-e task-clock" ${base}/session-time \ + check_line_other "${line}" time "-e task-clock -m 1 sleep 10" ${base}/session-time \ ${base}/session-time/output ${base}/session-time/control \ ${base}/session-time/ack "0" @@ -190,10 +190,10 @@ test_reconfig() base=BASE [session-size] -run = -e cpu-clock +run = -e cpu-clock -m 1 sleep 10 [session-time] -run = -e task-clock +run = -e task-clock -m 1 sleep 10 EOF sed -i -e "s|BASE|${base}|" ${config} @@ -204,7 +204,7 @@ EOF # check 2nd session # pid:time:-e task-clock:base/time:base/time/output:base/time/control:base/time/ack:0 local line=`perf daemon --config ${config} -x: | head -3 | tail -1` - check_line_other "${line}" time "-e task-clock" ${base}/session-time \ + check_line_other "${line}" time "-e task-clock -m 1 sleep 10" ${base}/session-time \ ${base}/session-time/output ${base}/session-time/control ${base}/session-time/ack "0" local pid=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $1 }'` @@ -215,10 +215,10 @@ EOF base=BASE [session-size] -run = -e cpu-clock +run = -e cpu-clock -m 1 sleep 10 [session-time] -run = -e cpu-clock +run = -e cpu-clock -m 1 sleep 10 EOF # TEST 1 - change config @@ -238,7 +238,7 @@ EOF # check reconfigured 2nd session # pid:time:-e task-clock:base/time:base/time/output:base/time/control:base/time/ack:0 local line=`perf daemon --config ${config} -x: | head -3 | tail -1` - check_line_other "${line}" time "-e cpu-clock" ${base}/session-time \ + check_line_other "${line}" time "-e cpu-clock -m 1 sleep 10" ${base}/session-time \ ${base}/session-time/output ${base}/session-time/control ${base}/session-time/ack "0" # TEST 2 - empty config @@ -309,10 +309,10 @@ test_stop() base=BASE [session-size] -run = -e cpu-clock +run = -e cpu-clock -m 1 sleep 10 [session-time] -run = -e task-clock +run = -e task-clock -m 1 sleep 10 EOF sed -i -e "s|BASE|${base}|" ${config} @@ -361,7 +361,7 @@ test_signal() base=BASE [session-test] -run = -e cpu-clock --switch-output +run = -e cpu-clock --switch-output -m 1 sleep 10 EOF sed -i -e "s|BASE|${base}|" ${config} @@ -400,10 +400,10 @@ test_ping() base=BASE [session-size] -run = -e cpu-clock +run = -e cpu-clock -m 1 sleep 10 [session-time] -run = -e task-clock +run = -e task-clock -m 1 sleep 10 EOF sed -i -e "s|BASE|${base}|" ${config} @@ -439,7 +439,7 @@ test_lock() base=BASE [session-size] -run = -e cpu-clock +run = -e cpu-clock -m 1 sleep 10 EOF sed -i -e "s|BASE|${base}|" ${config} -- GitLab From 84ea603650ec41273cc97d50eb01feed8e6baa2e Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 1 Mar 2021 13:23:15 +0100 Subject: [PATCH 243/839] perf tools: Fix event's PMU name parsing Jin Yao reported parser error for software event: # perf stat -e software/r1a/ -a -- sleep 1 event syntax error: 'software/r1a/' \___ parser error This happens after commit 8c3b1ba0e7ea9a80 ("drm/i915/gt: Track the overall awake/busy time"), where new software-gt-awake-time event's non-pmu-event-style makes event parser conflict with software PMU. If we allow PE_PMU_EVENT_PRE to be parsed as PMU name, we fix the conflict and the following character '/' for PMU or '-' for non-pmu-event-style event allows parser to decide what even is specified. Fixes: 8c3b1ba0e7ea9a80 ("drm/i915/gt: Track the overall awake/busy time") Reported-by: Jin Yao Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Chris Wilson Cc: Ian Rogers Cc: Mark Rutland Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20210301122315.63471-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.y | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index d5b6aff82f217..d57ac86ce7ca2 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -89,6 +89,7 @@ static void inc_group_count(struct list_head *list, %type PE_EVENT_NAME %type PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE %type PE_DRV_CFG_TERM +%type event_pmu_name %destructor { free ($$); } %type event_term %destructor { parse_events_term__delete ($$); } @@ -272,8 +273,11 @@ event_def: event_pmu | event_legacy_raw sep_dc | event_bpf_file +event_pmu_name: +PE_NAME | PE_PMU_EVENT_PRE + event_pmu: -PE_NAME opt_pmu_config +event_pmu_name opt_pmu_config { struct parse_events_state *parse_state = _parse_state; struct parse_events_error *error = parse_state->error; -- GitLab From b0faef924d21d0a4592ec81c4bc2b4badc35a343 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 1 Mar 2021 23:03:59 +0900 Subject: [PATCH 244/839] perf test: Fix cpu and thread map leaks in basic mmap test The evlist has the maps with its own refcounts so we don't need to set the pointers to NULL. Otherwise following error was reported by Asan. # perf test -v 4 4: Read samples using the mmap interface : --- start --- test child forked, pid 139782 mmap size 528384B ================================================================= ==139782==ERROR: LeakSanitizer: detected memory leaks Direct leak of 40 byte(s) in 1 object(s) allocated from: #0 0x7f1f76daee8f in __interceptor_malloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:145 #1 0x564ba21a0fea in cpu_map__trim_new /home/namhyung/project/linux/tools/lib/perf/cpumap.c:79 #2 0x564ba21a1a0f in perf_cpu_map__read /home/namhyung/project/linux/tools/lib/perf/cpumap.c:149 #3 0x564ba21a21cf in cpu_map__read_all_cpu_map /home/namhyung/project/linux/tools/lib/perf/cpumap.c:166 #4 0x564ba21a21cf in perf_cpu_map__new /home/namhyung/project/linux/tools/lib/perf/cpumap.c:181 #5 0x564ba1e48298 in test__basic_mmap tests/mmap-basic.c:55 #6 0x564ba1e278fb in run_test tests/builtin-test.c:428 #7 0x564ba1e278fb in test_and_print tests/builtin-test.c:458 #8 0x564ba1e29a53 in __cmd_test tests/builtin-test.c:679 #9 0x564ba1e29a53 in cmd_test tests/builtin-test.c:825 #10 0x564ba1e95cb4 in run_builtin /home/namhyung/project/linux/tools/perf/perf.c:313 #11 0x564ba1d1fa88 in handle_internal_command /home/namhyung/project/linux/tools/perf/perf.c:365 #12 0x564ba1d1fa88 in run_argv /home/namhyung/project/linux/tools/perf/perf.c:409 #13 0x564ba1d1fa88 in main /home/namhyung/project/linux/tools/perf/perf.c:539 #14 0x7f1f768e4d09 in __libc_start_main ../csu/libc-start.c:308 ... test child finished with 1 ---- end ---- Read samples using the mmap interface: FAILED! failed to open shell test directory: /home/namhyung/libexec/perf-core/tests/shell Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Mark Rutland Cc: Stephane Eranian Cc: Ian Rogers Cc: Peter Zijlstra Cc: Adrian Hunter Cc: Ingo Molnar Cc: Leo Yan Cc: Andi Kleen Cc: Alexander Shishkin Link: https://lore.kernel.org/r/20210301140409.184570-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/mmap-basic.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index 57093aeacc6f4..73ae8f7aa0660 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -158,8 +158,6 @@ out_init: out_delete_evlist: evlist__delete(evlist); - cpus = NULL; - threads = NULL; out_free_cpus: perf_cpu_map__put(cpus); out_free_threads: -- GitLab From 09a61c8f86aee7b9c514c6906244a22ec37ef028 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 1 Mar 2021 23:04:00 +0900 Subject: [PATCH 245/839] perf test: Fix a memory leak in attr test The get_argv_exec_path() returns a dynamic memory so it should be freed after use. $ perf test -v 17 ... ==141682==ERROR: LeakSanitizer: detected memory leaks Direct leak of 33 byte(s) in 1 object(s) allocated from: #0 0x7f09107d2e8f in __interceptor_malloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:145 #1 0x7f091035f6a7 in __vasprintf_internal libio/vasprintf.c:71 SUMMARY: AddressSanitizer: 33 byte(s) leaked in 1 allocation(s). Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Ingo Molnar Cc: Leo Yan Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20210301140409.184570-3-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/attr.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c index ec972e0892abe..dd39ce9b02778 100644 --- a/tools/perf/tests/attr.c +++ b/tools/perf/tests/attr.c @@ -182,14 +182,20 @@ int test__attr(struct test *test __maybe_unused, int subtest __maybe_unused) struct stat st; char path_perf[PATH_MAX]; char path_dir[PATH_MAX]; + char *exec_path; /* First try development tree tests. */ if (!lstat("./tests", &st)) return run_dir("./tests", "./perf"); + exec_path = get_argv_exec_path(); + if (exec_path == NULL) + return -1; + /* Then installed path. */ - snprintf(path_dir, PATH_MAX, "%s/tests", get_argv_exec_path()); + snprintf(path_dir, PATH_MAX, "%s/tests", exec_path); snprintf(path_perf, PATH_MAX, "%s/perf", BINDIR); + free(exec_path); if (!lstat(path_dir, &st) && !lstat(path_perf, &st)) -- GitLab From 83d25ccde591fe2356ba336e994b190361158b1e Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 1 Mar 2021 23:04:01 +0900 Subject: [PATCH 246/839] perf test: Fix cpu and thread map leaks in task_exit test The evlist has the maps with its own refcounts so we don't need to set the pointers to NULL. Otherwise following error was reported by Asan. Also change the goto label since it doesn't need to have two. # perf test -v 24 24: Number of exit events of a simple workload : --- start --- test child forked, pid 145915 mmap size 528384B ================================================================= ==145915==ERROR: LeakSanitizer: detected memory leaks Direct leak of 32 byte(s) in 1 object(s) allocated from: #0 0x7fc44e50d1f8 in __interceptor_realloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:164 #1 0x561cf50f4d2e in perf_thread_map__realloc /home/namhyung/project/linux/tools/lib/perf/threadmap.c:23 #2 0x561cf4eeb949 in thread_map__new_by_tid util/thread_map.c:63 #3 0x561cf4db7fd2 in test__task_exit tests/task-exit.c:74 #4 0x561cf4d798fb in run_test tests/builtin-test.c:428 #5 0x561cf4d798fb in test_and_print tests/builtin-test.c:458 #6 0x561cf4d7ba53 in __cmd_test tests/builtin-test.c:679 #7 0x561cf4d7ba53 in cmd_test tests/builtin-test.c:825 #8 0x561cf4de7d04 in run_builtin /home/namhyung/project/linux/tools/perf/perf.c:313 #9 0x561cf4c71a88 in handle_internal_command /home/namhyung/project/linux/tools/perf/perf.c:365 #10 0x561cf4c71a88 in run_argv /home/namhyung/project/linux/tools/perf/perf.c:409 #11 0x561cf4c71a88 in main /home/namhyung/project/linux/tools/perf/perf.c:539 #12 0x7fc44e042d09 in __libc_start_main ../csu/libc-start.c:308 ... test child finished with 1 ---- end ---- Number of exit events of a simple workload: FAILED! Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Ingo Molnar Cc: Leo Yan Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20210301140409.184570-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/task-exit.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c index bbf94e4aa1450..4c2969db59b07 100644 --- a/tools/perf/tests/task-exit.c +++ b/tools/perf/tests/task-exit.c @@ -75,14 +75,11 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused if (!cpus || !threads) { err = -ENOMEM; pr_debug("Not enough memory to create thread/cpu maps\n"); - goto out_free_maps; + goto out_delete_evlist; } perf_evlist__set_maps(&evlist->core, cpus, threads); - cpus = NULL; - threads = NULL; - err = evlist__prepare_workload(evlist, &target, argv, false, workload_exec_failed_signal); if (err < 0) { pr_debug("Couldn't run the workload!\n"); @@ -137,7 +134,7 @@ out_init: if (retry_count++ > 1000) { pr_debug("Failed after retrying 1000 times\n"); err = -1; - goto out_free_maps; + goto out_delete_evlist; } goto retry; @@ -148,10 +145,9 @@ out_init: err = -1; } -out_free_maps: +out_delete_evlist: perf_cpu_map__put(cpus); perf_thread_map__put(threads); -out_delete_evlist: evlist__delete(evlist); return err; } -- GitLab From 97ab7c524fdcaf3098997f81bdf9d01157816f30 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 1 Mar 2021 23:04:02 +0900 Subject: [PATCH 247/839] perf test: Fix cpu and thread map leaks in sw_clock_freq test The evlist has the maps with its own refcounts so we don't need to set the pointers to NULL. Otherwise following error was reported by Asan. Also change the goto label since it doesn't need to have two. # perf test -v 25 25: Software clock events period values : --- start --- test child forked, pid 149154 mmap size 528384B mmap size 528384B ================================================================= ==149154==ERROR: LeakSanitizer: detected memory leaks Direct leak of 32 byte(s) in 1 object(s) allocated from: #0 0x7fef5cd071f8 in __interceptor_realloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:164 #1 0x56260d5e8b8e in perf_thread_map__realloc /home/namhyung/project/linux/tools/lib/perf/threadmap.c:23 #2 0x56260d3df7a9 in thread_map__new_by_tid util/thread_map.c:63 #3 0x56260d2ac6b2 in __test__sw_clock_freq tests/sw-clock.c:65 #4 0x56260d26d8fb in run_test tests/builtin-test.c:428 #5 0x56260d26d8fb in test_and_print tests/builtin-test.c:458 #6 0x56260d26fa53 in __cmd_test tests/builtin-test.c:679 #7 0x56260d26fa53 in cmd_test tests/builtin-test.c:825 #8 0x56260d2dbb64 in run_builtin /home/namhyung/project/linux/tools/perf/perf.c:313 #9 0x56260d165a88 in handle_internal_command /home/namhyung/project/linux/tools/perf/perf.c:365 #10 0x56260d165a88 in run_argv /home/namhyung/project/linux/tools/perf/perf.c:409 #11 0x56260d165a88 in main /home/namhyung/project/linux/tools/perf/perf.c:539 #12 0x7fef5c83cd09 in __libc_start_main ../csu/libc-start.c:308 ... test child finished with 1 ---- end ---- Software clock events period values : FAILED! Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Ingo Molnar Cc: Leo Yan Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20210301140409.184570-5-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/sw-clock.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c index a49c9e23053bd..74988846be1da 100644 --- a/tools/perf/tests/sw-clock.c +++ b/tools/perf/tests/sw-clock.c @@ -42,8 +42,8 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id) .disabled = 1, .freq = 1, }; - struct perf_cpu_map *cpus; - struct perf_thread_map *threads; + struct perf_cpu_map *cpus = NULL; + struct perf_thread_map *threads = NULL; struct mmap *md; attr.sample_freq = 500; @@ -66,14 +66,11 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id) if (!cpus || !threads) { err = -ENOMEM; pr_debug("Not enough memory to create thread/cpu maps\n"); - goto out_free_maps; + goto out_delete_evlist; } perf_evlist__set_maps(&evlist->core, cpus, threads); - cpus = NULL; - threads = NULL; - if (evlist__open(evlist)) { const char *knob = "/proc/sys/kernel/perf_event_max_sample_rate"; @@ -129,10 +126,9 @@ out_init: err = -1; } -out_free_maps: +out_delete_evlist: perf_cpu_map__put(cpus); perf_thread_map__put(threads); -out_delete_evlist: evlist__delete(evlist); return err; } -- GitLab From e06c3ca4922ccf24bd36c007a87f193b235cee93 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 1 Mar 2021 23:04:03 +0900 Subject: [PATCH 248/839] perf test: Fix cpu and thread map leaks in code_reading test The evlist and the cpu/thread maps should be released together. Otherwise following error was reported by Asan. Note that this test still has memory leaks in DSOs so it still fails even after this change. I'll take a look at that too. # perf test -v 26 26: Object code reading : --- start --- test child forked, pid 154184 Looking at the vmlinux_path (8 entries long) symsrc__init: build id mismatch for vmlinux. symsrc__init: cannot get elf header. Using /proc/kcore for kernel data Using /proc/kallsyms for symbols Parsing event 'cycles' mmap size 528384B ... ================================================================= ==154184==ERROR: LeakSanitizer: detected memory leaks Direct leak of 439 byte(s) in 1 object(s) allocated from: #0 0x7fcb66e77037 in __interceptor_calloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:154 #1 0x55ad9b7e821e in dso__new_id util/dso.c:1256 #2 0x55ad9b8cfd4a in __machine__addnew_vdso util/vdso.c:132 #3 0x55ad9b8cfd4a in machine__findnew_vdso util/vdso.c:347 #4 0x55ad9b845b7e in map__new util/map.c:176 #5 0x55ad9b8415a2 in machine__process_mmap2_event util/machine.c:1787 #6 0x55ad9b8fab16 in perf_tool__process_synth_event util/synthetic-events.c:64 #7 0x55ad9b8fab16 in perf_event__synthesize_mmap_events util/synthetic-events.c:499 #8 0x55ad9b8fbfdf in __event__synthesize_thread util/synthetic-events.c:741 #9 0x55ad9b8ff3e3 in perf_event__synthesize_thread_map util/synthetic-events.c:833 #10 0x55ad9b738585 in do_test_code_reading tests/code-reading.c:608 #11 0x55ad9b73b25d in test__code_reading tests/code-reading.c:722 #12 0x55ad9b6f28fb in run_test tests/builtin-test.c:428 #13 0x55ad9b6f28fb in test_and_print tests/builtin-test.c:458 #14 0x55ad9b6f4a53 in __cmd_test tests/builtin-test.c:679 #15 0x55ad9b6f4a53 in cmd_test tests/builtin-test.c:825 #16 0x55ad9b760cc4 in run_builtin /home/namhyung/project/linux/tools/perf/perf.c:313 #17 0x55ad9b5eaa88 in handle_internal_command /home/namhyung/project/linux/tools/perf/perf.c:365 #18 0x55ad9b5eaa88 in run_argv /home/namhyung/project/linux/tools/perf/perf.c:409 #19 0x55ad9b5eaa88 in main /home/namhyung/project/linux/tools/perf/perf.c:539 #20 0x7fcb669acd09 in __libc_start_main ../csu/libc-start.c:308 ... SUMMARY: AddressSanitizer: 471 byte(s) leaked in 2 allocation(s). test child finished with 1 ---- end ---- Object code reading: FAILED! Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Ingo Molnar Cc: Leo Yan Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20210301140409.184570-6-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/code-reading.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 280f0348a09c0..2fdc7b2f996ed 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -706,13 +706,9 @@ static int do_test_code_reading(bool try_kcore) out_put: thread__put(thread); out_err: - - if (evlist) { - evlist__delete(evlist); - } else { - perf_cpu_map__put(cpus); - perf_thread_map__put(threads); - } + evlist__delete(evlist); + perf_cpu_map__put(cpus); + perf_thread_map__put(threads); machine__delete_threads(machine); machine__delete(machine); -- GitLab From f2c3202ba0c7746c50c71c14d1ab977d929c0a27 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 1 Mar 2021 23:04:04 +0900 Subject: [PATCH 249/839] perf test: Fix cpu and thread map leaks in keep_tracking test The evlist and the cpu/thread maps should be released together. Otherwise following error was reported by Asan. $ perf test -v 28 28: Use a dummy software event to keep tracking: --- start --- test child forked, pid 156810 mmap size 528384B ================================================================= ==156810==ERROR: LeakSanitizer: detected memory leaks Direct leak of 40 byte(s) in 1 object(s) allocated from: #0 0x7f637d2bce8f in __interceptor_malloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:145 #1 0x55cc6295cffa in cpu_map__trim_new /home/namhyung/project/linux/tools/lib/perf/cpumap.c:79 #2 0x55cc6295da1f in perf_cpu_map__read /home/namhyung/project/linux/tools/lib/perf/cpumap.c:149 #3 0x55cc6295e1df in cpu_map__read_all_cpu_map /home/namhyung/project/linux/tools/lib/perf/cpumap.c:166 #4 0x55cc6295e1df in perf_cpu_map__new /home/namhyung/project/linux/tools/lib/perf/cpumap.c:181 #5 0x55cc626287cf in test__keep_tracking tests/keep-tracking.c:84 #6 0x55cc625e38fb in run_test tests/builtin-test.c:428 #7 0x55cc625e38fb in test_and_print tests/builtin-test.c:458 #8 0x55cc625e5a53 in __cmd_test tests/builtin-test.c:679 #9 0x55cc625e5a53 in cmd_test tests/builtin-test.c:825 #10 0x55cc62651cc4 in run_builtin /home/namhyung/project/linux/tools/perf/perf.c:313 #11 0x55cc624dba88 in handle_internal_command /home/namhyung/project/linux/tools/perf/perf.c:365 #12 0x55cc624dba88 in run_argv /home/namhyung/project/linux/tools/perf/perf.c:409 #13 0x55cc624dba88 in main /home/namhyung/project/linux/tools/perf/perf.c:539 #14 0x7f637cdf2d09 in __libc_start_main ../csu/libc-start.c:308 SUMMARY: AddressSanitizer: 72 byte(s) leaked in 2 allocation(s). test child finished with 1 ---- end ---- Use a dummy software event to keep tracking: FAILED! Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Ingo Molnar Cc: Leo Yan Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20210301140409.184570-7-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/keep-tracking.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c index e6f1b2a38e032..a0438b0f08052 100644 --- a/tools/perf/tests/keep-tracking.c +++ b/tools/perf/tests/keep-tracking.c @@ -154,10 +154,9 @@ out_err: if (evlist) { evlist__disable(evlist); evlist__delete(evlist); - } else { - perf_cpu_map__put(cpus); - perf_thread_map__put(threads); } + perf_cpu_map__put(cpus); + perf_thread_map__put(threads); return err; } -- GitLab From 953e7b5960f1cf0825da60dbdc762e19b127a94c Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 1 Mar 2021 23:04:05 +0900 Subject: [PATCH 250/839] perf test: Fix cpu and thread map leaks in switch_tracking test The evlist and cpu/thread maps should be released together. Otherwise the following error was reported by Asan. $ perf test -v 35 35: Track with sched_switch : --- start --- test child forked, pid 159287 Using CPUID GenuineIntel-6-8E-C mmap size 528384B 1295 events recorded ================================================================= ==159287==ERROR: LeakSanitizer: detected memory leaks Direct leak of 40 byte(s) in 1 object(s) allocated from: #0 0x7fa28d9a2e8f in __interceptor_malloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:145 #1 0x5652f5a5affa in cpu_map__trim_new /home/namhyung/project/linux/tools/lib/perf/cpumap.c:79 #2 0x5652f5a5ba1f in perf_cpu_map__read /home/namhyung/project/linux/tools/lib/perf/cpumap.c:149 #3 0x5652f5a5c1df in cpu_map__read_all_cpu_map /home/namhyung/project/linux/tools/lib/perf/cpumap.c:166 #4 0x5652f5a5c1df in perf_cpu_map__new /home/namhyung/project/linux/tools/lib/perf/cpumap.c:181 #5 0x5652f5723bbf in test__switch_tracking tests/switch-tracking.c:350 #6 0x5652f56e18fb in run_test tests/builtin-test.c:428 #7 0x5652f56e18fb in test_and_print tests/builtin-test.c:458 #8 0x5652f56e3a53 in __cmd_test tests/builtin-test.c:679 #9 0x5652f56e3a53 in cmd_test tests/builtin-test.c:825 #10 0x5652f574fcc4 in run_builtin /home/namhyung/project/linux/tools/perf/perf.c:313 #11 0x5652f55d9a88 in handle_internal_command /home/namhyung/project/linux/tools/perf/perf.c:365 #12 0x5652f55d9a88 in run_argv /home/namhyung/project/linux/tools/perf/perf.c:409 #13 0x5652f55d9a88 in main /home/namhyung/project/linux/tools/perf/perf.c:539 #14 0x7fa28d4d8d09 in __libc_start_main ../csu/libc-start.c:308 SUMMARY: AddressSanitizer: 72 byte(s) leaked in 2 allocation(s). test child finished with 1 ---- end ---- Track with sched_switch: FAILED! Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Ingo Molnar Cc: Leo Yan Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20210301140409.184570-8-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/switch-tracking.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index 15a2ab765d890..3ebaa758df77e 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -574,10 +574,9 @@ out: if (evlist) { evlist__disable(evlist); evlist__delete(evlist); - } else { - perf_cpu_map__put(cpus); - perf_thread_map__put(threads); } + perf_cpu_map__put(cpus); + perf_thread_map__put(threads); return err; -- GitLab From 4be42882e1f9c8a2d7d7bc066f420418f45b566c Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 1 Mar 2021 23:04:06 +0900 Subject: [PATCH 251/839] perf test: Fix a thread map leak in thread_map_synthesize test It missed to call perf_thread_map__put() after using the map. $ perf test -v 43 43: Synthesize thread map : --- start --- test child forked, pid 162640 ================================================================= ==162640==ERROR: LeakSanitizer: detected memory leaks Direct leak of 32 byte(s) in 1 object(s) allocated from: #0 0x7fd48cdaa1f8 in __interceptor_realloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:164 #1 0x563e6d5f8d0e in perf_thread_map__realloc /home/namhyung/project/linux/tools/lib/perf/threadmap.c:23 #2 0x563e6d3ef69a in thread_map__new_by_pid util/thread_map.c:46 #3 0x563e6d2cec90 in test__thread_map_synthesize tests/thread-map.c:97 #4 0x563e6d27d8fb in run_test tests/builtin-test.c:428 #5 0x563e6d27d8fb in test_and_print tests/builtin-test.c:458 #6 0x563e6d27fa53 in __cmd_test tests/builtin-test.c:679 #7 0x563e6d27fa53 in cmd_test tests/builtin-test.c:825 #8 0x563e6d2ebce4 in run_builtin /home/namhyung/project/linux/tools/perf/perf.c:313 #9 0x563e6d175a88 in handle_internal_command /home/namhyung/project/linux/tools/perf/perf.c:365 #10 0x563e6d175a88 in run_argv /home/namhyung/project/linux/tools/perf/perf.c:409 #11 0x563e6d175a88 in main /home/namhyung/project/linux/tools/perf/perf.c:539 #12 0x7fd48c8dfd09 in __libc_start_main ../csu/libc-start.c:308 SUMMARY: AddressSanitizer: 8224 byte(s) leaked in 2 allocation(s). test child finished with 1 ---- end ---- Synthesize thread map: FAILED! Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Ingo Molnar Cc: Leo Yan Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20210301140409.184570-9-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/thread-map.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/tests/thread-map.c b/tools/perf/tests/thread-map.c index 28f51c4bd373c..9e1cf11149efa 100644 --- a/tools/perf/tests/thread-map.c +++ b/tools/perf/tests/thread-map.c @@ -102,6 +102,7 @@ int test__thread_map_synthesize(struct test *test __maybe_unused, int subtest __ TEST_ASSERT_VAL("failed to synthesize map", !perf_event__synthesize_thread_map2(NULL, threads, process_event, NULL)); + perf_thread_map__put(threads); return 0; } -- GitLab From 641b6250337027311a09009e18264bb65c4d521c Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 1 Mar 2021 23:04:07 +0900 Subject: [PATCH 252/839] perf test: Fix a memory leak in thread_map_remove test The str should be freed after creating a thread map. Also change the open-coded thread map deletion to a call to perf_thread_map__put(). $ perf test -v 44 44: Remove thread map : --- start --- test child forked, pid 165536 2 threads: 165535, 165536 1 thread: 165536 0 thread: ================================================================= ==165536==ERROR: LeakSanitizer: detected memory leaks Direct leak of 14 byte(s) in 1 object(s) allocated from: #0 0x7f54453ffe8f in __interceptor_malloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:145 #1 0x7f5444f8c6a7 in __vasprintf_internal libio/vasprintf.c:71 SUMMARY: AddressSanitizer: 14 byte(s) leaked in 1 allocation(s). test child finished with 1 ---- end ---- Remove thread map: FAILED! Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Ingo Molnar Cc: Leo Yan Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20210301140409.184570-10-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/thread-map.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tools/perf/tests/thread-map.c b/tools/perf/tests/thread-map.c index 9e1cf11149efa..d1e208b4a571c 100644 --- a/tools/perf/tests/thread-map.c +++ b/tools/perf/tests/thread-map.c @@ -110,12 +110,12 @@ int test__thread_map_remove(struct test *test __maybe_unused, int subtest __mayb { struct perf_thread_map *threads; char *str; - int i; TEST_ASSERT_VAL("failed to allocate map string", asprintf(&str, "%d,%d", getpid(), getppid()) >= 0); threads = thread_map__new_str(str, NULL, 0, false); + free(str); TEST_ASSERT_VAL("failed to allocate thread_map", threads); @@ -142,9 +142,6 @@ int test__thread_map_remove(struct test *test __maybe_unused, int subtest __mayb TEST_ASSERT_VAL("failed to not remove thread", thread_map__remove(threads, 0)); - for (i = 0; i < threads->nr; i++) - zfree(&threads->map[i].comm); - - free(threads); + perf_thread_map__put(threads); return 0; } -- GitLab From 690d91f5ec388448f6c2e9e3a8b3da856f400311 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 1 Mar 2021 23:04:08 +0900 Subject: [PATCH 253/839] perf test: Fix cpu map leaks in cpu_map_print test It should be released after printing the map. $ perf test -v 52 52: Print cpu map : --- start --- test child forked, pid 172233 ================================================================= ==172233==ERROR: LeakSanitizer: detected memory leaks Direct leak of 156 byte(s) in 1 object(s) allocated from: #0 0x7fc472518e8f in __interceptor_malloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:145 #1 0x55e63b378f7a in cpu_map__trim_new /home/namhyung/project/linux/tools/lib/perf/cpumap.c:79 #2 0x55e63b37a05c in perf_cpu_map__new /home/namhyung/project/linux/tools/lib/perf/cpumap.c:237 #3 0x55e63b056d16 in cpu_map_print tests/cpumap.c:102 #4 0x55e63b056d16 in test__cpu_map_print tests/cpumap.c:120 #5 0x55e63afff8fb in run_test tests/builtin-test.c:428 #6 0x55e63afff8fb in test_and_print tests/builtin-test.c:458 #7 0x55e63b001a53 in __cmd_test tests/builtin-test.c:679 #8 0x55e63b001a53 in cmd_test tests/builtin-test.c:825 #9 0x55e63b06dc44 in run_builtin /home/namhyung/project/linux/tools/perf/perf.c:313 #10 0x55e63aef7a88 in handle_internal_command /home/namhyung/project/linux/tools/perf/perf.c:365 #11 0x55e63aef7a88 in run_argv /home/namhyung/project/linux/tools/perf/perf.c:409 #12 0x55e63aef7a88 in main /home/namhyung/project/linux/tools/perf/perf.c:539 #13 0x7fc47204ed09 in __libc_start_main ../csu/libc-start.c:308 ... SUMMARY: AddressSanitizer: 448 byte(s) leaked in 7 allocation(s). test child finished with 1 ---- end ---- Print cpu map: FAILED! Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Ingo Molnar Cc: Leo Yan Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20210301140409.184570-11-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/cpumap.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/tests/cpumap.c b/tools/perf/tests/cpumap.c index 29c793ac7d108..0472b110fe651 100644 --- a/tools/perf/tests/cpumap.c +++ b/tools/perf/tests/cpumap.c @@ -106,6 +106,8 @@ static int cpu_map_print(const char *str) return -1; cpu_map__snprint(map, buf, sizeof(buf)); + perf_cpu_map__put(map); + return !strcmp(buf, str); } -- GitLab From 846580c235b3e2625ed494f654a28d235976d3b0 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 1 Mar 2021 23:04:09 +0900 Subject: [PATCH 254/839] perf test: Fix cpu and thread map leaks in perf_time_to_tsc test It should release the maps at the end. $ perf test -v 71 71: Convert perf time to TSC : --- start --- test child forked, pid 178744 mmap size 528384B 1st event perf time 59207256505278 tsc 13187166645142 rdtsc time 59207256542151 tsc 13187166723020 2nd event perf time 59207256543749 tsc 13187166726393 ================================================================= ==178744==ERROR: LeakSanitizer: detected memory leaks Direct leak of 40 byte(s) in 1 object(s) allocated from: #0 0x7faf601f9e8f in __interceptor_malloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:145 #1 0x55b620cfc00a in cpu_map__trim_new /home/namhyung/project/linux/tools/lib/perf/cpumap.c:79 #2 0x55b620cfca2f in perf_cpu_map__read /home/namhyung/project/linux/tools/lib/perf/cpumap.c:149 #3 0x55b620cfd1ef in cpu_map__read_all_cpu_map /home/namhyung/project/linux/tools/lib/perf/cpumap.c:166 #4 0x55b620cfd1ef in perf_cpu_map__new /home/namhyung/project/linux/tools/lib/perf/cpumap.c:181 #5 0x55b6209ef1b2 in test__perf_time_to_tsc tests/perf-time-to-tsc.c:73 #6 0x55b6209828fb in run_test tests/builtin-test.c:428 #7 0x55b6209828fb in test_and_print tests/builtin-test.c:458 #8 0x55b620984a53 in __cmd_test tests/builtin-test.c:679 #9 0x55b620984a53 in cmd_test tests/builtin-test.c:825 #10 0x55b6209f0cd4 in run_builtin /home/namhyung/project/linux/tools/perf/perf.c:313 #11 0x55b62087aa88 in handle_internal_command /home/namhyung/project/linux/tools/perf/perf.c:365 #12 0x55b62087aa88 in run_argv /home/namhyung/project/linux/tools/perf/perf.c:409 #13 0x55b62087aa88 in main /home/namhyung/project/linux/tools/perf/perf.c:539 #14 0x7faf5fd2fd09 in __libc_start_main ../csu/libc-start.c:308 SUMMARY: AddressSanitizer: 72 byte(s) leaked in 2 allocation(s). test child finished with 1 ---- end ---- Convert perf time to TSC: FAILED! Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Ingo Molnar Cc: Leo Yan Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20210301140409.184570-12-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/perf-time-to-tsc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/tests/perf-time-to-tsc.c index 7cff02664d0e6..680c3cffb1286 100644 --- a/tools/perf/tests/perf-time-to-tsc.c +++ b/tools/perf/tests/perf-time-to-tsc.c @@ -167,6 +167,8 @@ next_event: out_err: evlist__delete(evlist); + perf_cpu_map__put(cpus); + perf_thread_map__put(threads); return err; } -- GitLab From 743108e1048ee73e0eda394597c1fc2ea46a599b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 2 Mar 2021 16:44:14 -0300 Subject: [PATCH 255/839] tools headers: Update syscall.tbl files to support mount_setattr To pick the changes from: 9caccd41541a6f7d ("fs: introduce MOUNT_ATTR_IDMAP") This adds this new syscall to the tables used by tools such as 'perf trace', so that one can specify it by name and have it filtered, etc. Addressing these perf build warnings: Warning: Kernel ABI header at 'tools/perf/arch/x86/entry/syscalls/syscall_64.tbl' differs from latest version at 'arch/x86/entry/syscalls/syscall_64.tbl' diff -u tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl Warning: Kernel ABI header at 'tools/perf/arch/powerpc/entry/syscalls/syscall.tbl' differs from latest version at 'arch/powerpc/kernel/syscalls/syscall.tbl' diff -u tools/perf/arch/powerpc/entry/syscalls/syscall.tbl arch/powerpc/kernel/syscalls/syscall.tbl Warning: Kernel ABI header at 'tools/perf/arch/s390/entry/syscalls/syscall.tbl' differs from latest version at 'arch/s390/kernel/syscalls/syscall.tbl' diff -u tools/perf/arch/s390/entry/syscalls/syscall.tbl arch/s390/kernel/syscalls/syscall.tbl Cc: Adrian Hunter Cc: Christian Brauner Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lore.kernel.org/lkml/YD6Wsxr9ByUbab/a@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/mount.h | 16 ++++++++++++++++ .../perf/arch/powerpc/entry/syscalls/syscall.tbl | 1 + tools/perf/arch/s390/entry/syscalls/syscall.tbl | 1 + .../perf/arch/x86/entry/syscalls/syscall_64.tbl | 1 + 4 files changed, 19 insertions(+) diff --git a/tools/include/uapi/linux/mount.h b/tools/include/uapi/linux/mount.h index dd8306ea336c1..e6524ead2b7b9 100644 --- a/tools/include/uapi/linux/mount.h +++ b/tools/include/uapi/linux/mount.h @@ -1,6 +1,8 @@ #ifndef _UAPI_LINUX_MOUNT_H #define _UAPI_LINUX_MOUNT_H +#include + /* * These are the fs-independent mount-flags: up to 32 flags are supported * @@ -117,5 +119,19 @@ enum fsconfig_command { #define MOUNT_ATTR_NOATIME 0x00000010 /* - Do not update access times. */ #define MOUNT_ATTR_STRICTATIME 0x00000020 /* - Always perform atime updates */ #define MOUNT_ATTR_NODIRATIME 0x00000080 /* Do not update directory access times */ +#define MOUNT_ATTR_IDMAP 0x00100000 /* Idmap mount to @userns_fd in struct mount_attr. */ + +/* + * mount_setattr() + */ +struct mount_attr { + __u64 attr_set; + __u64 attr_clr; + __u64 propagation; + __u64 userns_fd; +}; + +/* List of all mount_attr versions. */ +#define MOUNT_ATTR_SIZE_VER0 32 /* sizeof first published struct */ #endif /* _UAPI_LINUX_MOUNT_H */ diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index 96b2157f03719..0b2480cf3e479 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -521,3 +521,4 @@ 439 common faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 +442 common mount_setattr sys_mount_setattr diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl index d443423495e56..3abef2144dac7 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -444,3 +444,4 @@ 439 common faccessat2 sys_faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 +442 common mount_setattr sys_mount_setattr sys_mount_setattr diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index 78672124d28be..7bf01cbe582f0 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -363,6 +363,7 @@ 439 common faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 +442 common mount_setattr sys_mount_setattr # # Due to a historical design error, certain syscalls are numbered differently -- GitLab From 6c0afc579aff90e84736d35ee35a1945ec0f279f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 2 Mar 2021 17:10:52 -0300 Subject: [PATCH 256/839] tools headers UAPI: Update tools' copy of linux/coresight-pmu.h To get the changes in these commits: 88f11864cf1d1324 ("coresight: etm-perf: Support PID tracing for kernel at EL2") 53abf3fe83175626 ("coresight: etm-perf: Clarify comment on perf options") This will possibly be used in patches lined up for v5.13. And silence this perf build warning: Warning: Kernel ABI header at 'tools/include/linux/coresight-pmu.h' differs from latest version at 'include/linux/coresight-pmu.h' diff -u tools/include/linux/coresight-pmu.h include/linux/coresight-pmu.h Cc: Greg Kroah-Hartman Cc: Leo Yan Cc: Mathieu Poirier Cc: Mike Leach Cc: Suzuki K Poulose Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/coresight-pmu.h | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/tools/include/linux/coresight-pmu.h b/tools/include/linux/coresight-pmu.h index b0e35eec6499b..4ac5c081af93d 100644 --- a/tools/include/linux/coresight-pmu.h +++ b/tools/include/linux/coresight-pmu.h @@ -10,17 +10,27 @@ #define CORESIGHT_ETM_PMU_NAME "cs_etm" #define CORESIGHT_ETM_PMU_SEED 0x10 -/* ETMv3.5/PTM's ETMCR config bit */ -#define ETM_OPT_CYCACC 12 -#define ETM_OPT_CTXTID 14 -#define ETM_OPT_TS 28 -#define ETM_OPT_RETSTK 29 +/* + * Below are the definition of bit offsets for perf option, and works as + * arbitrary values for all ETM versions. + * + * Most of them are orignally from ETMv3.5/PTM's ETMCR config, therefore, + * ETMv3.5/PTM doesn't define ETMCR config bits with prefix "ETM3_" and + * directly use below macros as config bits. + */ +#define ETM_OPT_CYCACC 12 +#define ETM_OPT_CTXTID 14 +#define ETM_OPT_CTXTID2 15 +#define ETM_OPT_TS 28 +#define ETM_OPT_RETSTK 29 /* ETMv4 CONFIGR programming bits for the ETM OPTs */ #define ETM4_CFG_BIT_CYCACC 4 #define ETM4_CFG_BIT_CTXTID 6 +#define ETM4_CFG_BIT_VMID 7 #define ETM4_CFG_BIT_TS 11 #define ETM4_CFG_BIT_RETSTK 12 +#define ETM4_CFG_BIT_VMID_OPT 15 static inline int coresight_get_trace_id(int cpu) { -- GitLab From 1a9bcadd0058a3e81c1beca48e5e08dee9446a01 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 2 Mar 2021 17:16:17 -0300 Subject: [PATCH 257/839] tools headers cpufeatures: Sync with the kernel sources To pick the changes from: 3b9c723ed7cfa4e1 ("KVM: SVM: Add support for SVM instruction address check change") b85a0425d8056f3b ("Enumerate AVX Vector Neural Network instructions") fb35d30fe5b06cc2 ("x86/cpufeatures: Assign dedicated feature word for CPUID_0x8000001F[EAX]") This only causes these perf files to be rebuilt: CC /tmp/build/perf/bench/mem-memcpy-x86-64-asm.o CC /tmp/build/perf/bench/mem-memset-x86-64-asm.o And addresses this perf build warning: Warning: Kernel ABI header at 'tools/arch/x86/include/asm/cpufeatures.h' differs from latest version at 'arch/x86/include/asm/cpufeatures.h' diff -u tools/arch/x86/include/asm/cpufeatures.h arch/x86/include/asm/cpufeatures.h Cc: Borislav Petkov Cc: Kyung Min Park Cc: Paolo Bonzini Cc: Sean Christopherson Cc: Wei Huang Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/cpufeatures.h | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 84b887825f126..cc96e26d69f7a 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -13,7 +13,7 @@ /* * Defines x86 CPU feature bits */ -#define NCAPINTS 19 /* N 32-bit words worth of info */ +#define NCAPINTS 20 /* N 32-bit words worth of info */ #define NBUGINTS 1 /* N 32-bit bug flags */ /* @@ -96,7 +96,7 @@ #define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */ #define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */ #define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */ -#define X86_FEATURE_SME_COHERENT ( 3*32+17) /* "" AMD hardware-enforced cache coherency */ +/* FREE! ( 3*32+17) */ #define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" LFENCE synchronizes RDTSC */ #define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */ #define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ @@ -201,7 +201,7 @@ #define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ -#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ +/* FREE! ( 7*32+10) */ #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ #define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */ @@ -211,7 +211,7 @@ #define X86_FEATURE_SSBD ( 7*32+17) /* Speculative Store Bypass Disable */ #define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ #define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */ -#define X86_FEATURE_SEV ( 7*32+20) /* AMD Secure Encrypted Virtualization */ +/* FREE! ( 7*32+20) */ #define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ #define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ #define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */ @@ -236,8 +236,6 @@ #define X86_FEATURE_EPT_AD ( 8*32+17) /* Intel Extended Page Table access-dirty bit */ #define X86_FEATURE_VMCALL ( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */ #define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */ -#define X86_FEATURE_SEV_ES ( 8*32+20) /* AMD Secure Encrypted Virtualization - Encrypted State */ -#define X86_FEATURE_VM_PAGE_FLUSH ( 8*32+21) /* "" VM Page Flush MSR is supported */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */ #define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ @@ -294,6 +292,7 @@ #define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ +#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ @@ -337,6 +336,7 @@ #define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ #define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */ #define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */ +#define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* "" SVME addr check */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */ #define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/ @@ -385,6 +385,13 @@ #define X86_FEATURE_CORE_CAPABILITIES (18*32+30) /* "" IA32_CORE_CAPABILITIES MSR */ #define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */ +/* AMD-defined memory encryption features, CPUID level 0x8000001f (EAX), word 19 */ +#define X86_FEATURE_SME (19*32+ 0) /* AMD Secure Memory Encryption */ +#define X86_FEATURE_SEV (19*32+ 1) /* AMD Secure Encrypted Virtualization */ +#define X86_FEATURE_VM_PAGE_FLUSH (19*32+ 2) /* "" VM Page Flush MSR is supported */ +#define X86_FEATURE_SEV_ES (19*32+ 3) /* AMD Secure Encrypted Virtualization - Encrypted State */ +#define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced cache coherency */ + /* * BUG word(s) */ -- GitLab From 33dc525f93216bc83935ce98518644def04d6c54 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 2 Mar 2021 17:20:08 -0300 Subject: [PATCH 258/839] tools headers UAPI: Sync KVM's kvm.h and vmx.h headers with the kernel sources To pick the changes in: fe6b6bc802b40081 ("KVM: VMX: Enable bus lock VM exit") That makes 'perf kvm-stat' aware of this new BUS_LOCK exit reason, thus addressing the following perf build warning: Warning: Kernel ABI header at 'tools/arch/x86/include/uapi/asm/vmx.h' differs from latest version at 'arch/x86/include/uapi/asm/vmx.h' diff -u tools/arch/x86/include/uapi/asm/vmx.h arch/x86/include/uapi/asm/vmx.h Cc: Chenyi Qiang Cc: Paolo Bonzini Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/uapi/asm/kvm.h | 1 + tools/arch/x86/include/uapi/asm/vmx.h | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 8e76d3701db3f..5a3022c8af82b 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -112,6 +112,7 @@ struct kvm_ioapic_state { #define KVM_NR_IRQCHIPS 3 #define KVM_RUN_X86_SMM (1 << 0) +#define KVM_RUN_X86_BUS_LOCK (1 << 1) /* for KVM_GET_REGS and KVM_SET_REGS */ struct kvm_regs { diff --git a/tools/arch/x86/include/uapi/asm/vmx.h b/tools/arch/x86/include/uapi/asm/vmx.h index ada955c5ebb60..b8e650a985e35 100644 --- a/tools/arch/x86/include/uapi/asm/vmx.h +++ b/tools/arch/x86/include/uapi/asm/vmx.h @@ -89,6 +89,7 @@ #define EXIT_REASON_XRSTORS 64 #define EXIT_REASON_UMWAIT 67 #define EXIT_REASON_TPAUSE 68 +#define EXIT_REASON_BUS_LOCK 74 #define VMX_EXIT_REASONS \ { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \ @@ -150,7 +151,8 @@ { EXIT_REASON_XSAVES, "XSAVES" }, \ { EXIT_REASON_XRSTORS, "XRSTORS" }, \ { EXIT_REASON_UMWAIT, "UMWAIT" }, \ - { EXIT_REASON_TPAUSE, "TPAUSE" } + { EXIT_REASON_TPAUSE, "TPAUSE" }, \ + { EXIT_REASON_BUS_LOCK, "BUS_LOCK" } #define VMX_EXIT_REASON_FLAGS \ { VMX_EXIT_REASONS_FAILED_VMENTRY, "FAILED_VMENTRY" } -- GitLab From 034f7ee130c19b7b04347238395cff1f402198c3 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Thu, 28 Jan 2021 09:34:17 +0800 Subject: [PATCH 259/839] perf stat: Fix wrong skipping for per-die aggregation Uncore becomes die-scope on Xeon Cascade Lake-AP and perf has supported --per-die aggregation yet. One issue is found in check_per_pkg() for uncore events running on AP system. On cascade Lake-AP, we have: S0-D0 S0-D1 S1-D0 S1-D1 But in check_per_pkg(), S0-D1 and S1-D1 are skipped because the mask bits for S0 and S1 have been set for S0-D0 and S1-D0. It doesn't check die_id. So the counting for S0-D1 and S1-D1 are set to zero. That's not correct. root@lkp-csl-2ap4 ~# ./perf stat -a -I 1000 -e llc_misses.mem_read --per-die -- sleep 5 1.001460963 S0-D0 1 1317376 Bytes llc_misses.mem_read 1.001460963 S0-D1 1 998016 Bytes llc_misses.mem_read 1.001460963 S1-D0 1 970496 Bytes llc_misses.mem_read 1.001460963 S1-D1 1 1291264 Bytes llc_misses.mem_read 2.003488021 S0-D0 1 1082048 Bytes llc_misses.mem_read 2.003488021 S0-D1 1 1919040 Bytes llc_misses.mem_read 2.003488021 S1-D0 1 890752 Bytes llc_misses.mem_read 2.003488021 S1-D1 1 2380800 Bytes llc_misses.mem_read 3.005613270 S0-D0 1 1126080 Bytes llc_misses.mem_read 3.005613270 S0-D1 1 2898176 Bytes llc_misses.mem_read 3.005613270 S1-D0 1 870912 Bytes llc_misses.mem_read 3.005613270 S1-D1 1 3388608 Bytes llc_misses.mem_read 4.007627598 S0-D0 1 1124608 Bytes llc_misses.mem_read 4.007627598 S0-D1 1 3884416 Bytes llc_misses.mem_read 4.007627598 S1-D0 1 921088 Bytes llc_misses.mem_read 4.007627598 S1-D1 1 4451840 Bytes llc_misses.mem_read 5.001479927 S0-D0 1 963328 Bytes llc_misses.mem_read 5.001479927 S0-D1 1 4831936 Bytes llc_misses.mem_read 5.001479927 S1-D0 1 895104 Bytes llc_misses.mem_read 5.001479927 S1-D1 1 5496640 Bytes llc_misses.mem_read From above output, we can see S0-D1 and S1-D1 don't report the interval values, they are continued to grow. That's because check_per_pkg() wrongly decides to use zero counts for S0-D1 and S1-D1. So in check_per_pkg(), we should use hashmap(socket,die) to decide if the cpu counts needs to skip. Only considering socket is not enough. Now with this patch, root@lkp-csl-2ap4 ~# ./perf stat -a -I 1000 -e llc_misses.mem_read --per-die -- sleep 5 1.001586691 S0-D0 1 1229440 Bytes llc_misses.mem_read 1.001586691 S0-D1 1 976832 Bytes llc_misses.mem_read 1.001586691 S1-D0 1 938304 Bytes llc_misses.mem_read 1.001586691 S1-D1 1 1227328 Bytes llc_misses.mem_read 2.003776312 S0-D0 1 1586752 Bytes llc_misses.mem_read 2.003776312 S0-D1 1 875392 Bytes llc_misses.mem_read 2.003776312 S1-D0 1 855616 Bytes llc_misses.mem_read 2.003776312 S1-D1 1 949376 Bytes llc_misses.mem_read 3.006512788 S0-D0 1 1338880 Bytes llc_misses.mem_read 3.006512788 S0-D1 1 920064 Bytes llc_misses.mem_read 3.006512788 S1-D0 1 877184 Bytes llc_misses.mem_read 3.006512788 S1-D1 1 1020736 Bytes llc_misses.mem_read 4.008895291 S0-D0 1 926592 Bytes llc_misses.mem_read 4.008895291 S0-D1 1 906368 Bytes llc_misses.mem_read 4.008895291 S1-D0 1 892224 Bytes llc_misses.mem_read 4.008895291 S1-D1 1 987712 Bytes llc_misses.mem_read 5.001590993 S0-D0 1 962624 Bytes llc_misses.mem_read 5.001590993 S0-D1 1 912512 Bytes llc_misses.mem_read 5.001590993 S1-D0 1 891200 Bytes llc_misses.mem_read 5.001590993 S1-D1 1 978432 Bytes llc_misses.mem_read On no-die system, die_id is 0, actually it's hashmap(socket,0), original behavior is not changed. Reported-by: Ying Huang Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jin Yao Cc: Kan Liang Cc: Peter Zijlstra Cc: Ying Huang Link: http://lore.kernel.org/lkml/20210128013417.25597-1-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 18 +++++++++++- tools/perf/util/evsel.h | 4 ++- tools/perf/util/python-ext-sources | 1 + tools/perf/util/stat.c | 47 ++++++++++++++++++++++++------ 4 files changed, 59 insertions(+), 11 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 1bf76864c4f26..7ecbc8e2fbfa8 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -46,6 +46,7 @@ #include "string2.h" #include "memswap.h" #include "util.h" +#include "hashmap.h" #include "../perf-sys.h" #include "util/parse-branch-options.h" #include @@ -1390,7 +1391,9 @@ void evsel__exit(struct evsel *evsel) zfree(&evsel->group_name); zfree(&evsel->name); zfree(&evsel->pmu_name); - zfree(&evsel->per_pkg_mask); + evsel__zero_per_pkg(evsel); + hashmap__free(evsel->per_pkg_mask); + evsel->per_pkg_mask = NULL; zfree(&evsel->metric_events); perf_evsel__object.fini(evsel); } @@ -2781,3 +2784,16 @@ int evsel__store_ids(struct evsel *evsel, struct evlist *evlist) return store_evsel_ids(evsel, evlist); } + +void evsel__zero_per_pkg(struct evsel *evsel) +{ + struct hashmap_entry *cur; + size_t bkt; + + if (evsel->per_pkg_mask) { + hashmap__for_each_entry(evsel->per_pkg_mask, cur, bkt) + free((char *)cur->key); + + hashmap__clear(evsel->per_pkg_mask); + } +} diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 4e8e49fb7e9de..6026487353dd8 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -19,6 +19,7 @@ struct perf_stat_evsel; union perf_event; struct bpf_counter_ops; struct target; +struct hashmap; typedef int (evsel__sb_cb_t)(union perf_event *event, void *data); @@ -112,7 +113,7 @@ struct evsel { bool merged_stat; bool reset_group; bool errored; - unsigned long *per_pkg_mask; + struct hashmap *per_pkg_mask; struct evsel *leader; struct list_head config_terms; int err; @@ -433,4 +434,5 @@ struct perf_env *evsel__env(struct evsel *evsel); int evsel__store_ids(struct evsel *evsel, struct evlist *evlist); +void evsel__zero_per_pkg(struct evsel *evsel); #endif /* __PERF_EVSEL_H */ diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index 71b753523fac0..845dd46e3c618 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -36,3 +36,4 @@ util/symbol_fprintf.c util/units.c util/affinity.c util/rwsem.c +util/hashmap.c diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 5d8af29447f44..c400f8dde017b 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -13,6 +13,7 @@ #include "evlist.h" #include "evsel.h" #include "thread_map.h" +#include "hashmap.h" #include void update_stats(struct stats *stats, u64 val) @@ -277,18 +278,29 @@ void evlist__save_aggr_prev_raw_counts(struct evlist *evlist) } } -static void zero_per_pkg(struct evsel *counter) +static size_t pkg_id_hash(const void *__key, void *ctx __maybe_unused) { - if (counter->per_pkg_mask) - memset(counter->per_pkg_mask, 0, cpu__max_cpu()); + uint64_t *key = (uint64_t *) __key; + + return *key & 0xffffffff; +} + +static bool pkg_id_equal(const void *__key1, const void *__key2, + void *ctx __maybe_unused) +{ + uint64_t *key1 = (uint64_t *) __key1; + uint64_t *key2 = (uint64_t *) __key2; + + return *key1 == *key2; } static int check_per_pkg(struct evsel *counter, struct perf_counts_values *vals, int cpu, bool *skip) { - unsigned long *mask = counter->per_pkg_mask; + struct hashmap *mask = counter->per_pkg_mask; struct perf_cpu_map *cpus = evsel__cpus(counter); - int s; + int s, d, ret = 0; + uint64_t *key; *skip = false; @@ -299,7 +311,7 @@ static int check_per_pkg(struct evsel *counter, return 0; if (!mask) { - mask = zalloc(cpu__max_cpu()); + mask = hashmap__new(pkg_id_hash, pkg_id_equal, NULL); if (!mask) return -ENOMEM; @@ -321,8 +333,25 @@ static int check_per_pkg(struct evsel *counter, if (s < 0) return -1; - *skip = test_and_set_bit(s, mask) == 1; - return 0; + /* + * On multi-die system, die_id > 0. On no-die system, die_id = 0. + * We use hashmap(socket, die) to check the used socket+die pair. + */ + d = cpu_map__get_die(cpus, cpu, NULL).die; + if (d < 0) + return -1; + + key = malloc(sizeof(*key)); + if (!key) + return -ENOMEM; + + *key = (uint64_t)d << 32 | s; + if (hashmap__find(mask, (void *)key, NULL)) + *skip = true; + else + ret = hashmap__add(mask, (void *)key, (void *)1); + + return ret; } static int @@ -422,7 +451,7 @@ int perf_stat_process_counter(struct perf_stat_config *config, } if (counter->per_pkg) - zero_per_pkg(counter); + evsel__zero_per_pkg(counter); ret = process_counter_maps(config, counter); if (ret) -- GitLab From e2a99c9a9aa02ddc7c08d5089ef140965879f8f4 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 25 Feb 2021 12:51:47 +0900 Subject: [PATCH 260/839] libperf: Add perf_evlist__reset_id_hash() Add the perf_evlist__reset_id_hash() function as an internal function so that it can be called by perf to reset the hash table. This is necessary for 'perf stat' to run the workload multiple times. Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20210225035148.778569-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/evlist.c | 13 +++++++++---- tools/lib/perf/include/internal/evlist.h | 2 ++ 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index 17465d454a0e3..a0aaf385cbb58 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -26,13 +26,10 @@ void perf_evlist__init(struct perf_evlist *evlist) { - int i; - - for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i) - INIT_HLIST_HEAD(&evlist->heads[i]); INIT_LIST_HEAD(&evlist->entries); evlist->nr_entries = 0; fdarray__init(&evlist->pollfd, 64); + perf_evlist__reset_id_hash(evlist); } static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, @@ -237,6 +234,14 @@ static void perf_evlist__id_hash(struct perf_evlist *evlist, hlist_add_head(&sid->node, &evlist->heads[hash]); } +void perf_evlist__reset_id_hash(struct perf_evlist *evlist) +{ + int i; + + for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i) + INIT_HLIST_HEAD(&evlist->heads[i]); +} + void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel, int cpu, int thread, u64 id) diff --git a/tools/lib/perf/include/internal/evlist.h b/tools/lib/perf/include/internal/evlist.h index 2d0fa02b036f6..212c29063ad42 100644 --- a/tools/lib/perf/include/internal/evlist.h +++ b/tools/lib/perf/include/internal/evlist.h @@ -124,4 +124,6 @@ int perf_evlist__id_add_fd(struct perf_evlist *evlist, struct perf_evsel *evsel, int cpu, int thread, int fd); +void perf_evlist__reset_id_hash(struct perf_evlist *evlist); + #endif /* __LIBPERF_INTERNAL_EVLIST_H */ -- GitLab From 513068f2b1fe39a60d89f6f8afbdd79c2534889c Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 25 Feb 2021 12:51:48 +0900 Subject: [PATCH 261/839] perf stat: Fix use-after-free when -r option is used I got a segfault when using -r option with event groups. The option makes it run the workload multiple times and it will reuse the evlist and evsel for each run. While most of resources are allocated and freed properly, the id hash in the evlist was not and it resulted in the bug. You can see it with the address sanitizer like below: $ perf stat -r 100 -e '{cycles,instructions}' true ================================================================= ==693052==ERROR: AddressSanitizer: heap-use-after-free on address 0x6080000003d0 at pc 0x558c57732835 bp 0x7fff1526adb0 sp 0x7fff1526ada8 WRITE of size 8 at 0x6080000003d0 thread T0 #0 0x558c57732834 in hlist_add_head /home/namhyung/project/linux/tools/include/linux/list.h:644 #1 0x558c57732834 in perf_evlist__id_hash /home/namhyung/project/linux/tools/lib/perf/evlist.c:237 #2 0x558c57732834 in perf_evlist__id_add /home/namhyung/project/linux/tools/lib/perf/evlist.c:244 #3 0x558c57732834 in perf_evlist__id_add_fd /home/namhyung/project/linux/tools/lib/perf/evlist.c:285 #4 0x558c5747733e in store_evsel_ids util/evsel.c:2765 #5 0x558c5747733e in evsel__store_ids util/evsel.c:2782 #6 0x558c5730b717 in __run_perf_stat /home/namhyung/project/linux/tools/perf/builtin-stat.c:895 #7 0x558c5730b717 in run_perf_stat /home/namhyung/project/linux/tools/perf/builtin-stat.c:1014 #8 0x558c5730b717 in cmd_stat /home/namhyung/project/linux/tools/perf/builtin-stat.c:2446 #9 0x558c57427c24 in run_builtin /home/namhyung/project/linux/tools/perf/perf.c:313 #10 0x558c572b1a48 in handle_internal_command /home/namhyung/project/linux/tools/perf/perf.c:365 #11 0x558c572b1a48 in run_argv /home/namhyung/project/linux/tools/perf/perf.c:409 #12 0x558c572b1a48 in main /home/namhyung/project/linux/tools/perf/perf.c:539 #13 0x7fcadb9f7d09 in __libc_start_main ../csu/libc-start.c:308 #14 0x558c572b60f9 in _start (/home/namhyung/project/linux/tools/perf/perf+0x45d0f9) Actually the nodes in the hash table are struct perf_stream_id and they were freed in the previous run. Fix it by resetting the hash. Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Ian Rogers Cc: Ingo Molnar Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20210225035148.778569-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 5121b4db66fe5..882cd1f721d95 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1306,6 +1306,7 @@ void evlist__close(struct evlist *evlist) perf_evsel__free_fd(&evsel->core); perf_evsel__free_id(&evsel->core); } + perf_evlist__reset_id_hash(&evlist->core); } static int evlist__create_syswide_maps(struct evlist *evlist) -- GitLab From bd57a9f33abc0adede5bafa06b2f1af3de03190d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 24 Feb 2021 16:14:38 +0900 Subject: [PATCH 262/839] perf daemon: Fix compile error with Asan MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I'm seeing a build failure when build with address sanitizer. It seems we could write to the name[100] if the var is longer. $ make EXTRA_CFLAGS=-fsanitize=address ... CC builtin-daemon.o In function ‘get_session_name’, inlined from ‘session_config’ at builtin-daemon.c:164:6, inlined from ‘server_config’ at builtin-daemon.c:223:10: builtin-daemon.c:155:11: error: writing 1 byte into a region of size 0 [-Werror=stringop-overflow=] 155 | *session = 0; | ~~~~~~~~~^~~ builtin-daemon.c: In function ‘server_config’: builtin-daemon.c:162:7: note: at offset 100 to object ‘name’ with size 100 declared here 162 | char name[100]; | ^~~~ Fixes: c0666261ff38 ("perf daemon: Add config file support") Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20210224071438.686677-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-daemon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-daemon.c b/tools/perf/builtin-daemon.c index 8f0ed2e592808..ace8772a4f038 100644 --- a/tools/perf/builtin-daemon.c +++ b/tools/perf/builtin-daemon.c @@ -161,7 +161,7 @@ static int session_config(struct daemon *daemon, const char *var, const char *va struct daemon_session *session; char name[100]; - if (get_session_name(var, name, sizeof(name))) + if (get_session_name(var, name, sizeof(name) - 1)) return -EINVAL; var = strchr(var, '.'); -- GitLab From ec4d0a7680c793ef68d47507fcec245019ee6f33 Mon Sep 17 00:00:00 2001 From: Nicholas Fraser Date: Fri, 19 Feb 2021 11:09:32 -0500 Subject: [PATCH 263/839] perf archive: Fix filtering of empty build-ids A non-existent build-id used to be treated as all-zero SHA-1 hash. Build-ids are now variable width. A non-existent build-id is an empty string and "perf buildid-list" pads this with spaces. This is true even when using old perf.data files recorded from older versions of perf; "perf buildid-list" never reports an all-zero hash anymore. This fixes "perf-archive" to skip missing build-ids by skipping lines that start with a padding space rather than with zeroes. Signed-off-by: Nicholas Fraser Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Huw Davies Cc: Ian Rogers Cc: Ingo Molnar Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ulrich Czekalla Link: https://lore.kernel.org/r/442bffc7-ac5c-0975-b876-a549efce2413@codeweavers.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/perf-archive.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/perf/perf-archive.sh b/tools/perf/perf-archive.sh index 0cfb3e2cefef4..133f0eddbcc46 100644 --- a/tools/perf/perf-archive.sh +++ b/tools/perf/perf-archive.sh @@ -20,9 +20,8 @@ else fi BUILDIDS=$(mktemp /tmp/perf-archive-buildids.XXXXXX) -NOBUILDID=0000000000000000000000000000000000000000 -perf buildid-list -i $PERF_DATA --with-hits | grep -v "^$NOBUILDID " > $BUILDIDS +perf buildid-list -i $PERF_DATA --with-hits | grep -v "^ " > $BUILDIDS if [ ! -s $BUILDIDS ] ; then echo "perf archive: no build-ids found" rm $BUILDIDS || true -- GitLab From a8146d66ab0184ad1728eaeb59cfdf256f4b8fbf Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 3 Mar 2021 08:01:24 -0800 Subject: [PATCH 264/839] perf test: Fix sample-parsing failure on non-x86 platforms Executing 'perf test 27' fails on s390: [root@t35lp46 perf]# ./perf test -Fv 27 27: Sample parsing --- start --- ---- end ---- Sample parsing: FAILED! [root@t35lp46 perf]# The commit fbefe9c2f87fd392 ("perf tools: Support arch specific PERF_SAMPLE_WEIGHT_STRUCT processing") changes the ins_lat to a model-specific variable only for X86, but perf test still verify the variable in the generic test. Remove the ins_lat check in the generic test. The following patch will add it in the X86 specific test. Fixes: fbefe9c2f87fd392 ("perf tools: Support arch specific PERF_SAMPLE_WEIGHT_STRUCT processing") Reported-by: Thomas Richter Signed-off-by: Kan Liang Tested-by: Thomas Richter Cc: Athira Jajeev Cc: Heiko Carstens Cc: Sumanth Korikkar Cc: Sven Schnelle Cc: Vasily Gorbik Link: http://lore.kernel.org/lkml/1614787285-104151-1-git-send-email-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/sample-parsing.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c index 0dbe3aa998536..8fd8a4ef97da1 100644 --- a/tools/perf/tests/sample-parsing.c +++ b/tools/perf/tests/sample-parsing.c @@ -129,9 +129,6 @@ static bool samples_same(const struct perf_sample *s1, if (type & PERF_SAMPLE_WEIGHT) COMP(weight); - if (type & PERF_SAMPLE_WEIGHT_STRUCT) - COMP(ins_lat); - if (type & PERF_SAMPLE_DATA_SRC) COMP(data_src); @@ -245,7 +242,6 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format) .cgroup = 114, .data_page_size = 115, .code_page_size = 116, - .ins_lat = 117, .aux_sample = { .size = sizeof(aux_data), .data = (void *)aux_data, -- GitLab From 7d9d4c6edba93cd96899affe2fc60c3341df152c Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 3 Mar 2021 08:01:25 -0800 Subject: [PATCH 265/839] perf test: Support the ins_lat check in the X86 specific test The ins_lat of PERF_SAMPLE_WEIGHT_STRUCT stands for the instruction latency, which is only available for X86. Add a X86 specific test for the ins_lat and PERF_SAMPLE_WEIGHT_STRUCT type. The test__x86_sample_parsing() uses the same way as the test__sample_parsing() to verify a sample type. Since the ins_lat and PERF_SAMPLE_WEIGHT_STRUCT are the only X86 specific sample type for now, the test__x86_sample_parsing() only verify the PERF_SAMPLE_WEIGHT_STRUCT type. Other sample types are still verified in the generic test. $ perf test 77 -v 77: x86 Sample parsing : --- start --- test child forked, pid 102370 test child finished with 0 ---- end ---- x86 Sample parsing: Ok Signed-off-by: Kan Liang Cc: Athira Jajeev Cc: Heiko Carstens Cc: Sumanth Korikkar Cc: Sven Schnelle Cc: Thomas Richter Cc: Vasily Gorbik Link: http://lore.kernel.org/lkml/1614787285-104151-2-git-send-email-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/include/arch-tests.h | 1 + tools/perf/arch/x86/tests/Build | 1 + tools/perf/arch/x86/tests/arch-tests.c | 4 + tools/perf/arch/x86/tests/sample-parsing.c | 121 +++++++++++++++++++++ 4 files changed, 127 insertions(+) create mode 100644 tools/perf/arch/x86/tests/sample-parsing.c diff --git a/tools/perf/arch/x86/include/arch-tests.h b/tools/perf/arch/x86/include/arch-tests.h index 6a54b94f1c25b..0e20f3dc69f3c 100644 --- a/tools/perf/arch/x86/include/arch-tests.h +++ b/tools/perf/arch/x86/include/arch-tests.h @@ -10,6 +10,7 @@ int test__rdpmc(struct test *test __maybe_unused, int subtest); int test__insn_x86(struct test *test __maybe_unused, int subtest); int test__intel_pt_pkt_decoder(struct test *test, int subtest); int test__bp_modify(struct test *test, int subtest); +int test__x86_sample_parsing(struct test *test, int subtest); #ifdef HAVE_DWARF_UNWIND_SUPPORT struct thread; diff --git a/tools/perf/arch/x86/tests/Build b/tools/perf/arch/x86/tests/Build index 36d4f248b51db..28d793390198f 100644 --- a/tools/perf/arch/x86/tests/Build +++ b/tools/perf/arch/x86/tests/Build @@ -3,5 +3,6 @@ perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o perf-y += arch-tests.o perf-y += rdpmc.o +perf-y += sample-parsing.o perf-$(CONFIG_AUXTRACE) += insn-x86.o intel-pt-pkt-decoder-test.o perf-$(CONFIG_X86_64) += bp-modify.o diff --git a/tools/perf/arch/x86/tests/arch-tests.c b/tools/perf/arch/x86/tests/arch-tests.c index bc25d727b4e9a..71aa67367ad6b 100644 --- a/tools/perf/arch/x86/tests/arch-tests.c +++ b/tools/perf/arch/x86/tests/arch-tests.c @@ -30,6 +30,10 @@ struct test arch_tests[] = { .func = test__bp_modify, }, #endif + { + .desc = "x86 Sample parsing", + .func = test__x86_sample_parsing, + }, { .func = NULL, }, diff --git a/tools/perf/arch/x86/tests/sample-parsing.c b/tools/perf/arch/x86/tests/sample-parsing.c new file mode 100644 index 0000000000000..c92db87e4479e --- /dev/null +++ b/tools/perf/arch/x86/tests/sample-parsing.c @@ -0,0 +1,121 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include +#include +#include +#include +#include + +#include "event.h" +#include "evsel.h" +#include "debug.h" +#include "util/synthetic-events.h" + +#include "tests/tests.h" +#include "arch-tests.h" + +#define COMP(m) do { \ + if (s1->m != s2->m) { \ + pr_debug("Samples differ at '"#m"'\n"); \ + return false; \ + } \ +} while (0) + +static bool samples_same(const struct perf_sample *s1, + const struct perf_sample *s2, + u64 type) +{ + if (type & PERF_SAMPLE_WEIGHT_STRUCT) + COMP(ins_lat); + + return true; +} + +static int do_test(u64 sample_type) +{ + struct evsel evsel = { + .needs_swap = false, + .core = { + . attr = { + .sample_type = sample_type, + .read_format = 0, + }, + }, + }; + union perf_event *event; + struct perf_sample sample = { + .weight = 101, + .ins_lat = 102, + }; + struct perf_sample sample_out; + size_t i, sz, bufsz; + int err, ret = -1; + + sz = perf_event__sample_event_size(&sample, sample_type, 0); + bufsz = sz + 4096; /* Add a bit for overrun checking */ + event = malloc(bufsz); + if (!event) { + pr_debug("malloc failed\n"); + return -1; + } + + memset(event, 0xff, bufsz); + event->header.type = PERF_RECORD_SAMPLE; + event->header.misc = 0; + event->header.size = sz; + + err = perf_event__synthesize_sample(event, sample_type, 0, &sample); + if (err) { + pr_debug("%s failed for sample_type %#"PRIx64", error %d\n", + "perf_event__synthesize_sample", sample_type, err); + goto out_free; + } + + /* The data does not contain 0xff so we use that to check the size */ + for (i = bufsz; i > 0; i--) { + if (*(i - 1 + (u8 *)event) != 0xff) + break; + } + if (i != sz) { + pr_debug("Event size mismatch: actual %zu vs expected %zu\n", + i, sz); + goto out_free; + } + + evsel.sample_size = __evsel__sample_size(sample_type); + + err = evsel__parse_sample(&evsel, event, &sample_out); + if (err) { + pr_debug("%s failed for sample_type %#"PRIx64", error %d\n", + "evsel__parse_sample", sample_type, err); + goto out_free; + } + + if (!samples_same(&sample, &sample_out, sample_type)) { + pr_debug("parsing failed for sample_type %#"PRIx64"\n", + sample_type); + goto out_free; + } + + ret = 0; +out_free: + free(event); + + return ret; +} + +/** + * test__x86_sample_parsing - test X86 specific sample parsing + * + * This function implements a test that synthesizes a sample event, parses it + * and then checks that the parsed sample matches the original sample. If the + * test passes %0 is returned, otherwise %-1 is returned. + * + * For now, the PERF_SAMPLE_WEIGHT_STRUCT is the only X86 specific sample type. + * The test only checks the PERF_SAMPLE_WEIGHT_STRUCT type. + */ +int test__x86_sample_parsing(struct test *test __maybe_unused, int subtest __maybe_unused) +{ + return do_test(PERF_SAMPLE_WEIGHT_STRUCT); +} -- GitLab From c1f272df510c6b1db68ca6597724d17b557d1407 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 4 Mar 2021 09:51:52 -0300 Subject: [PATCH 266/839] perf tests x86: Move insn.h include to make sure it finds stddef.h In some versions of alpine Linux the perf build is broken since commit 1d509f2a6ebca1ae ("x86/insn: Support big endian cross-compiles"): In file included from /usr/include/linux/byteorder/little_endian.h:13, from /usr/include/asm/byteorder.h:5, from arch/x86/util/../../../../arch/x86/include/asm/insn.h:10, from arch/x86/util/archinsn.c:2: /usr/include/linux/swab.h:161:8: error: unknown type name '__always_inline' static __always_inline __u16 __swab16p(const __u16 *p) So move the inclusion of arch/x86/include/asm/insn.h to later in the places where linux/stddef.h (that conditionally defines __always_inline) to workaround this problem on Alpine Linux 3.9 to 3.11, 3.12 onwards works. Cc: Josh Poimboeuf Cc: Martin Schwidefsky Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/tests/insn-x86.c | 2 +- tools/perf/arch/x86/util/archinsn.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/arch/x86/tests/insn-x86.c b/tools/perf/arch/x86/tests/insn-x86.c index f782ef8c5982f..4f75ae9901403 100644 --- a/tools/perf/arch/x86/tests/insn-x86.c +++ b/tools/perf/arch/x86/tests/insn-x86.c @@ -1,11 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 #include -#include "../../../../arch/x86/include/asm/insn.h" #include #include "debug.h" #include "tests/tests.h" #include "arch-tests.h" +#include "../../../../arch/x86/include/asm/insn.h" #include "intel-pt-decoder/intel-pt-insn-decoder.h" diff --git a/tools/perf/arch/x86/util/archinsn.c b/tools/perf/arch/x86/util/archinsn.c index 3e6791531ca5a..34d600c510440 100644 --- a/tools/perf/arch/x86/util/archinsn.c +++ b/tools/perf/arch/x86/util/archinsn.c @@ -1,10 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 -#include "../../../../arch/x86/include/asm/insn.h" #include "archinsn.h" #include "event.h" #include "machine.h" #include "thread.h" #include "symbol.h" +#include "../../../../arch/x86/include/asm/insn.h" void arch_fetch_insn(struct perf_sample *sample, struct thread *thread, -- GitLab From 6740a4e70e5d1b9d8e7fe41fd46dd5656d65dadf Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Thu, 4 Mar 2021 11:59:58 +0530 Subject: [PATCH 267/839] perf report: Fix -F for branch & mem modes perf report fails to add valid additional fields with -F when used with branch or mem modes. Fix it. Before patch: $ perf record -b $ perf report -b -F +srcline_from --stdio Error: Invalid --fields key: `srcline_from' After patch: $ perf report -b -F +srcline_from --stdio # Samples: 8K of event 'cycles' # Event count (approx.): 8784 ... Committer notes: There was an inversion: when looking at branch stack dimensions (keys) it was checking if the sort mode was 'mem', not 'branch'. Fixes: aa6b3c99236b ("perf report: Make -F more strict like -s") Reported-by: Athira Jajeev Signed-off-by: Ravi Bangoria Reviewed-by: Athira Jajeev Tested-by: Arnaldo Carvalho de Melo Tested-by: Athira Jajeev Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Link: http://lore.kernel.org/lkml/20210304062958.85465-1-ravi.bangoria@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/sort.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 0d5ad42812b9d..552b590485bf1 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -3140,7 +3140,7 @@ int output_field_add(struct perf_hpp_list *list, char *tok) if (strncasecmp(tok, sd->name, strlen(tok))) continue; - if (sort__mode != SORT_MODE__MEMORY) + if (sort__mode != SORT_MODE__BRANCH) return -EINVAL; return __sort_dimension__add_output(list, sd); @@ -3152,7 +3152,7 @@ int output_field_add(struct perf_hpp_list *list, char *tok) if (strncasecmp(tok, sd->name, strlen(tok))) continue; - if (sort__mode != SORT_MODE__BRANCH) + if (sort__mode != SORT_MODE__MEMORY) return -EINVAL; return __sort_dimension__add_output(list, sd); -- GitLab From 77d02bd00cea9f1a87afe58113fa75b983d6c23a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 5 Mar 2021 10:02:09 -0300 Subject: [PATCH 268/839] perf map: Tighten snprintf() string precision to pass gcc check on some 32-bit arches Noticed on a debian:experimental mips and mipsel cross build build environment: perfbuilder@ec265a086e9b:~$ mips-linux-gnu-gcc --version | head -1 mips-linux-gnu-gcc (Debian 10.2.1-3) 10.2.1 20201224 perfbuilder@ec265a086e9b:~$ CC /tmp/build/perf/util/map.o util/map.c: In function 'map__new': util/map.c:109:5: error: '%s' directive output may be truncated writing between 1 and 2147483645 bytes into a region of size 4096 [-Werror=format-truncation=] 109 | "%s/platforms/%s/arch-%s/usr/lib/%s", | ^~ In file included from /usr/mips-linux-gnu/include/stdio.h:867, from util/symbol.h:11, from util/map.c:2: /usr/mips-linux-gnu/include/bits/stdio2.h:67:10: note: '__builtin___snprintf_chk' output 32 or more bytes (assuming 4294967321) into a destination of size 4096 67 | return __builtin___snprintf_chk (__s, __n, __USE_FORTIFY_LEVEL - 1, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 68 | __bos (__s), __fmt, __va_arg_pack ()); | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cc1: all warnings being treated as errors Since we have the lenghts for what lands in that place, use it to give the compiler more info and make it happy. Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/map.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 692e56dc832e7..fbc40a2c17d4d 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -77,8 +77,7 @@ static inline bool replace_android_lib(const char *filename, char *newfilename) if (strstarts(filename, "/system/lib/")) { char *ndk, *app; const char *arch; - size_t ndk_length; - size_t app_length; + int ndk_length, app_length; ndk = getenv("NDK_ROOT"); app = getenv("APP_PLATFORM"); @@ -106,8 +105,8 @@ static inline bool replace_android_lib(const char *filename, char *newfilename) if (new_length > PATH_MAX) return false; snprintf(newfilename, new_length, - "%s/platforms/%s/arch-%s/usr/lib/%s", - ndk, app, arch, libname); + "%.*s/platforms/%.*s/arch-%s/usr/lib/%s", + ndk_length, ndk, app_length, app, arch, libname); return true; } -- GitLab From 86a19008af5d88d5d523dbfe9b6ede11473e9a7f Mon Sep 17 00:00:00 2001 From: Michael Petlan Date: Tue, 2 Mar 2021 15:41:20 +0100 Subject: [PATCH 269/839] perf trace: Fix race in signal handling Since a lot of stuff happens before the SIGINT signal handler is registered (scanning /proc/*, etc.), on bigger systems, such as Cavium Sabre CN99xx, it may happen that first interrupt signal is lost and perf isn't correctly terminated. The reproduction code might look like the following: perf trace -a & PERF_PID=$! sleep 4 kill -INT $PERF_PID The issue has been found on a CN99xx machine with RHEL-8 and the patch fixes it by registering the signal handlers earlier in the init stage. Suggested-by: Jiri Olsa Signed-off-by: Michael Petlan Tested-by: Michael Petlan Cc: Jiri Olsa Link: https://lore.kernel.org/lkml/YEJnaMzH2ctp3PPx@kernel.org/ Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 85b6a46e85b68..7ec18ff57fc4a 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -3964,9 +3964,6 @@ static int trace__run(struct trace *trace, int argc, const char **argv) evlist__config(evlist, &trace->opts, &callchain_param); - signal(SIGCHLD, sig_handler); - signal(SIGINT, sig_handler); - if (forks) { err = evlist__prepare_workload(evlist, &trace->opts.target, argv, false, NULL); if (err < 0) { @@ -4827,6 +4824,8 @@ int cmd_trace(int argc, const char **argv) signal(SIGSEGV, sighandler_dump_stack); signal(SIGFPE, sighandler_dump_stack); + signal(SIGCHLD, sig_handler); + signal(SIGINT, sig_handler); trace.evlist = evlist__new(); trace.sctbl = syscalltbl__new(); -- GitLab From 6fc5baf5471700fd613f0b4e52ab4563f1942b78 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Sat, 6 Feb 2021 23:08:29 +0800 Subject: [PATCH 270/839] perf cs-etm: Fix bitmap for option When set option with macros ETM_OPT_CTXTID and ETM_OPT_TS, it wrongly takes these two values (14 and 28 prespectively) as bit masks, but actually both are the offset for bits. But this doesn't lead to further failure due to the AND logic operation will be always true for ETM_OPT_CTXTID / ETM_OPT_TS. This patch defines new independent macros (rather than using the "config" bits) for requesting the "contextid" and "timestamp" for cs_etm_set_option(). Signed-off-by: Suzuki Poulouse Reviewed-by: Mike Leach Cc: Al Grant Cc: Daniel Kiss Cc: Denis Nikitin Cc: Jiri Olsa Cc: John Garry Cc: Jonathan Corbet Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Cc: Will Deacon Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-doc@vger.kernel.org Link: http://lore.kernel.org/lkml/20210206150833.42120-5-leo.yan@linaro.org [ Extract the change as a separate patch for easier review ] Signed-off-by: Leo Yan Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm/util/cs-etm.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index bd446aba64f72..c25c878fd06c9 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -156,6 +156,10 @@ out: return err; } +#define ETM_SET_OPT_CTXTID (1 << 0) +#define ETM_SET_OPT_TS (1 << 1) +#define ETM_SET_OPT_MASK (ETM_SET_OPT_CTXTID | ETM_SET_OPT_TS) + static int cs_etm_set_option(struct auxtrace_record *itr, struct evsel *evsel, u32 option) { @@ -169,17 +173,17 @@ static int cs_etm_set_option(struct auxtrace_record *itr, !cpu_map__has(online_cpus, i)) continue; - if (option & ETM_OPT_CTXTID) { + if (option & ETM_SET_OPT_CTXTID) { err = cs_etm_set_context_id(itr, evsel, i); if (err) goto out; } - if (option & ETM_OPT_TS) { + if (option & ETM_SET_OPT_TS) { err = cs_etm_set_timestamp(itr, evsel, i); if (err) goto out; } - if (option & ~(ETM_OPT_CTXTID | ETM_OPT_TS)) + if (option & ~(ETM_SET_OPT_MASK)) /* Nothing else is currently supported */ goto out; } @@ -406,7 +410,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr, evsel__set_sample_bit(cs_etm_evsel, CPU); err = cs_etm_set_option(itr, cs_etm_evsel, - ETM_OPT_CTXTID | ETM_OPT_TS); + ETM_SET_OPT_CTXTID | ETM_SET_OPT_TS); if (err) goto out; } -- GitLab From d30881f573e565ebb5dbb50b31ed6106b5c81328 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 18 Feb 2021 21:02:07 -0500 Subject: [PATCH 271/839] nfsd: Don't keep looking up unhashed files in the nfsd file cache If a file is unhashed, then we're going to reject it anyway and retry, so make sure we skip it when we're doing the RCU lockless lookup. This avoids a number of unnecessary nfserr_jukebox returns from nfsd_file_acquire() Fixes: 65294c1f2c5e ("nfsd: add a new struct file caching facility to nfsd") Signed-off-by: Trond Myklebust Signed-off-by: Chuck Lever --- fs/nfsd/filecache.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 53fcbf79bdca3..7629248fdd532 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -898,6 +898,8 @@ nfsd_file_find_locked(struct inode *inode, unsigned int may_flags, continue; if (!nfsd_match_cred(nf->nf_cred, current_cred())) continue; + if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) + continue; if (nfsd_file_get(nf) != NULL) return nf; } -- GitLab From 6820bf77864d5894ff67b5c00d7dba8f92011e3d Mon Sep 17 00:00:00 2001 From: Timo Rothenpieler Date: Tue, 23 Feb 2021 00:36:19 +0100 Subject: [PATCH 272/839] svcrdma: disable timeouts on rdma backchannel This brings it in line with the regular tcp backchannel, which also has all those timeouts disabled. Prevents the backchannel from timing out, getting some async operations like server side copying getting stuck indefinitely on the client side. Signed-off-by: Timo Rothenpieler Fixes: 5d252f90a800 ("svcrdma: Add class for RDMA backwards direction transport") Signed-off-by: Chuck Lever --- net/sunrpc/xprtrdma/svc_rdma_backchannel.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c index 4a1edbb4028ef..9150df35fb6f2 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c @@ -252,9 +252,9 @@ xprt_setup_rdma_bc(struct xprt_create *args) xprt->timeout = &xprt_rdma_bc_timeout; xprt_set_bound(xprt); xprt_set_connected(xprt); - xprt->bind_timeout = RPCRDMA_BIND_TO; - xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO; - xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO; + xprt->bind_timeout = 0; + xprt->reestablish_timeout = 0; + xprt->idle_timeout = 0; xprt->prot = XPRT_TRANSPORT_BC_RDMA; xprt->ops = &xprt_rdma_bc_procs; -- GitLab From 7005227369079963d25fb2d5d736d0feb2c44cf6 Mon Sep 17 00:00:00 2001 From: Julian Braha Date: Fri, 19 Feb 2021 16:56:10 -0500 Subject: [PATCH 273/839] fs: nfsd: fix kconfig dependency warning for NFSD_V4 When NFSD_V4 is enabled and CRYPTO is disabled, Kbuild gives the following warning: WARNING: unmet direct dependencies detected for CRYPTO_SHA256 Depends on [n]: CRYPTO [=n] Selected by [y]: - NFSD_V4 [=y] && NETWORK_FILESYSTEMS [=y] && NFSD [=y] && PROC_FS [=y] WARNING: unmet direct dependencies detected for CRYPTO_MD5 Depends on [n]: CRYPTO [=n] Selected by [y]: - NFSD_V4 [=y] && NETWORK_FILESYSTEMS [=y] && NFSD [=y] && PROC_FS [=y] This is because NFSD_V4 selects CRYPTO_MD5 and CRYPTO_SHA256, without depending on or selecting CRYPTO, despite those config options being subordinate to CRYPTO. Signed-off-by: Julian Braha Signed-off-by: Chuck Lever --- fs/nfsd/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index 821e5913faee4..d6cff5fbe705b 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -73,6 +73,7 @@ config NFSD_V4 select NFSD_V3 select FS_POSIX_ACL select SUNRPC_GSS + select CRYPTO select CRYPTO_MD5 select CRYPTO_SHA256 select GRACE_PERIOD -- GitLab From bfdd89f232aa2de5a4b3fc985cba894148b830a8 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 24 Feb 2021 13:39:50 -0500 Subject: [PATCH 274/839] nfsd: don't abort copies early The typical result of the backwards comparison here is that the source server in a server-to-server copy will return BAD_STATEID within a few seconds of the copy starting, instead of giving the copy a full lease period, so the copy_file_range() call will end up unnecessarily returning a short read. Fixes: 624322f1adc5 "NFSD add COPY_NOTIFY operation" Signed-off-by: J. Bruce Fields Signed-off-by: Chuck Lever --- fs/nfsd/nfs4state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 423fd6683f3ae..61552e89bd89d 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -5389,7 +5389,7 @@ nfs4_laundromat(struct nfsd_net *nn) idr_for_each_entry(&nn->s2s_cp_stateids, cps_t, i) { cps = container_of(cps_t, struct nfs4_cpntf_state, cp_stateid); if (cps->cp_stateid.sc_type == NFS4_COPYNOTIFY_STID && - cps->cpntf_time > cutoff) + cps->cpntf_time < cutoff) _free_cpntf_state_locked(nn, cps); } spin_unlock(&nn->s2s_cp_lock); -- GitLab From c7de87ff9dac5f396f62d584f3908f80ddc0e07b Mon Sep 17 00:00:00 2001 From: Joe Korty Date: Fri, 26 Feb 2021 09:38:20 -0500 Subject: [PATCH 275/839] NFSD: Repair misuse of sv_lock in 5.10.16-rt30. [ This problem is in mainline, but only rt has the chops to be able to detect it. ] Lockdep reports a circular lock dependency between serv->sv_lock and softirq_ctl.lock on system shutdown, when using a kernel built with CONFIG_PREEMPT_RT=y, and a nfs mount exists. This is due to the definition of spin_lock_bh on rt: local_bh_disable(); rt_spin_lock(lock); which forces a softirq_ctl.lock -> serv->sv_lock dependency. This is not a problem as long as _every_ lock of serv->sv_lock is a: spin_lock_bh(&serv->sv_lock); but there is one of the form: spin_lock(&serv->sv_lock); This is what is causing the circular dependency splat. The spin_lock() grabs the lock without first grabbing softirq_ctl.lock via local_bh_disable. If later on in the critical region, someone does a local_bh_disable, we get a serv->sv_lock -> softirq_ctrl.lock dependency established. Deadlock. Fix is to make serv->sv_lock be locked with spin_lock_bh everywhere, no exceptions. [ OK ] Stopped target NFS client services. Stopping Logout off all iSCSI sessions on shutdown... Stopping NFS server and services... [ 109.442380] [ 109.442385] ====================================================== [ 109.442386] WARNING: possible circular locking dependency detected [ 109.442387] 5.10.16-rt30 #1 Not tainted [ 109.442389] ------------------------------------------------------ [ 109.442390] nfsd/1032 is trying to acquire lock: [ 109.442392] ffff994237617f60 ((softirq_ctrl.lock).lock){+.+.}-{2:2}, at: __local_bh_disable_ip+0xd9/0x270 [ 109.442405] [ 109.442405] but task is already holding lock: [ 109.442406] ffff994245cb00b0 (&serv->sv_lock){+.+.}-{0:0}, at: svc_close_list+0x1f/0x90 [ 109.442415] [ 109.442415] which lock already depends on the new lock. [ 109.442415] [ 109.442416] [ 109.442416] the existing dependency chain (in reverse order) is: [ 109.442417] [ 109.442417] -> #1 (&serv->sv_lock){+.+.}-{0:0}: [ 109.442421] rt_spin_lock+0x2b/0xc0 [ 109.442428] svc_add_new_perm_xprt+0x42/0xa0 [ 109.442430] svc_addsock+0x135/0x220 [ 109.442434] write_ports+0x4b3/0x620 [ 109.442438] nfsctl_transaction_write+0x45/0x80 [ 109.442440] vfs_write+0xff/0x420 [ 109.442444] ksys_write+0x4f/0xc0 [ 109.442446] do_syscall_64+0x33/0x40 [ 109.442450] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 109.442454] [ 109.442454] -> #0 ((softirq_ctrl.lock).lock){+.+.}-{2:2}: [ 109.442457] __lock_acquire+0x1264/0x20b0 [ 109.442463] lock_acquire+0xc2/0x400 [ 109.442466] rt_spin_lock+0x2b/0xc0 [ 109.442469] __local_bh_disable_ip+0xd9/0x270 [ 109.442471] svc_xprt_do_enqueue+0xc0/0x4d0 [ 109.442474] svc_close_list+0x60/0x90 [ 109.442476] svc_close_net+0x49/0x1a0 [ 109.442478] svc_shutdown_net+0x12/0x40 [ 109.442480] nfsd_destroy+0xc5/0x180 [ 109.442482] nfsd+0x1bc/0x270 [ 109.442483] kthread+0x194/0x1b0 [ 109.442487] ret_from_fork+0x22/0x30 [ 109.442492] [ 109.442492] other info that might help us debug this: [ 109.442492] [ 109.442493] Possible unsafe locking scenario: [ 109.442493] [ 109.442493] CPU0 CPU1 [ 109.442494] ---- ---- [ 109.442495] lock(&serv->sv_lock); [ 109.442496] lock((softirq_ctrl.lock).lock); [ 109.442498] lock(&serv->sv_lock); [ 109.442499] lock((softirq_ctrl.lock).lock); [ 109.442501] [ 109.442501] *** DEADLOCK *** [ 109.442501] [ 109.442501] 3 locks held by nfsd/1032: [ 109.442503] #0: ffffffff93b49258 (nfsd_mutex){+.+.}-{3:3}, at: nfsd+0x19a/0x270 [ 109.442508] #1: ffff994245cb00b0 (&serv->sv_lock){+.+.}-{0:0}, at: svc_close_list+0x1f/0x90 [ 109.442512] #2: ffffffff93a81b20 (rcu_read_lock){....}-{1:2}, at: rt_spin_lock+0x5/0xc0 [ 109.442518] [ 109.442518] stack backtrace: [ 109.442519] CPU: 0 PID: 1032 Comm: nfsd Not tainted 5.10.16-rt30 #1 [ 109.442522] Hardware name: Supermicro X9DRL-3F/iF/X9DRL-3F/iF, BIOS 3.2 09/22/2015 [ 109.442524] Call Trace: [ 109.442527] dump_stack+0x77/0x97 [ 109.442533] check_noncircular+0xdc/0xf0 [ 109.442546] __lock_acquire+0x1264/0x20b0 [ 109.442553] lock_acquire+0xc2/0x400 [ 109.442564] rt_spin_lock+0x2b/0xc0 [ 109.442570] __local_bh_disable_ip+0xd9/0x270 [ 109.442573] svc_xprt_do_enqueue+0xc0/0x4d0 [ 109.442577] svc_close_list+0x60/0x90 [ 109.442581] svc_close_net+0x49/0x1a0 [ 109.442585] svc_shutdown_net+0x12/0x40 [ 109.442588] nfsd_destroy+0xc5/0x180 [ 109.442590] nfsd+0x1bc/0x270 [ 109.442595] kthread+0x194/0x1b0 [ 109.442600] ret_from_fork+0x22/0x30 [ 109.518225] nfsd: last server has exited, flushing export cache [ OK ] Stopped NFSv4 ID-name mapping service. [ OK ] Stopped GSSAPI Proxy Daemon. [ OK ] Stopped NFS Mount Daemon. [ OK ] Stopped NFS status monitor for NFSv2/3 locking.. Fixes: 719f8bcc883e ("svcrpc: fix xpt_list traversal locking on shutdown") Signed-off-by: Joe Korty Signed-off-by: Chuck Lever --- net/sunrpc/svc_xprt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index dcc50ae545506..3cdd71a8df1e7 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -1060,7 +1060,7 @@ static int svc_close_list(struct svc_serv *serv, struct list_head *xprt_list, st struct svc_xprt *xprt; int ret = 0; - spin_lock(&serv->sv_lock); + spin_lock_bh(&serv->sv_lock); list_for_each_entry(xprt, xprt_list, xpt_list) { if (xprt->xpt_net != net) continue; @@ -1068,7 +1068,7 @@ static int svc_close_list(struct svc_serv *serv, struct list_head *xprt_list, st set_bit(XPT_CLOSE, &xprt->xpt_flags); svc_xprt_enqueue(xprt); } - spin_unlock(&serv->sv_lock); + spin_unlock_bh(&serv->sv_lock); return ret; } -- GitLab From f1442d6349a2e7bb7a6134791bdc26cb776c79af Mon Sep 17 00:00:00 2001 From: Daniel Kobras Date: Sat, 27 Feb 2021 00:04:37 +0100 Subject: [PATCH 276/839] sunrpc: fix refcount leak for rpc auth modules If an auth module's accept op returns SVC_CLOSE, svc_process_common() enters a call path that does not call svc_authorise() before leaving the function, and thus leaks a reference on the auth module's refcount. Hence, make sure calls to svc_authenticate() and svc_authorise() are paired for all call paths, to make sure rpc auth modules can be unloaded. Signed-off-by: Daniel Kobras Fixes: 4d712ef1db05 ("svcauth_gss: Close connection when dropping an incoming message") Link: https://lore.kernel.org/linux-nfs/3F1B347F-B809-478F-A1E9-0BE98E22B0F0@oracle.com/T/#t Signed-off-by: Chuck Lever --- net/sunrpc/svc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 61fb8a18552cf..d76dc9d95d163 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1413,7 +1413,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) sendit: if (svc_authorise(rqstp)) - goto close; + goto close_xprt; return 1; /* Caller can now send it */ release_dropit: @@ -1425,6 +1425,8 @@ release_dropit: return 0; close: + svc_authorise(rqstp); +close_xprt: if (rqstp->rq_xprt && test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags)) svc_close_xprt(rqstp->rq_xprt); dprintk("svc: svc_process close\n"); @@ -1433,7 +1435,7 @@ release_dropit: err_short_len: svc_printk(rqstp, "short len %zd, dropping request\n", argv->iov_len); - goto close; + goto close_xprt; err_bad_rpc: serv->sv_stats->rpcbadfmt++; -- GitLab From 0ddc942394013f08992fc379ca04cffacbbe3dae Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 2 Mar 2021 10:48:38 -0500 Subject: [PATCH 277/839] rpc: fix NULL dereference on kmalloc failure I think this is unlikely but possible: svc_authenticate sets rq_authop and calls svcauth_gss_accept. The kmalloc(sizeof(*svcdata), GFP_KERNEL) fails, leaving rq_auth_data NULL, and returning SVC_DENIED. This causes svc_process_common to go to err_bad_auth, and eventually call svc_authorise. That calls ->release == svcauth_gss_release, which tries to dereference rq_auth_data. Signed-off-by: J. Bruce Fields Link: https://lore.kernel.org/linux-nfs/3F1B347F-B809-478F-A1E9-0BE98E22B0F0@oracle.com/T/#t Signed-off-by: Chuck Lever --- net/sunrpc/auth_gss/svcauth_gss.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index bd4678db9d76b..6dff64374bfe1 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -1825,11 +1825,14 @@ static int svcauth_gss_release(struct svc_rqst *rqstp) { struct gss_svc_data *gsd = (struct gss_svc_data *)rqstp->rq_auth_data; - struct rpc_gss_wire_cred *gc = &gsd->clcred; + struct rpc_gss_wire_cred *gc; struct xdr_buf *resbuf = &rqstp->rq_res; int stat = -EINVAL; struct sunrpc_net *sn = net_generic(SVC_NET(rqstp), sunrpc_net_id); + if (!gsd) + goto out; + gc = &gsd->clcred; if (gc->gc_proc != RPC_GSS_PROC_DATA) goto out; /* Release can be called twice, but we only wrap once. */ @@ -1870,10 +1873,10 @@ out_err: if (rqstp->rq_cred.cr_group_info) put_group_info(rqstp->rq_cred.cr_group_info); rqstp->rq_cred.cr_group_info = NULL; - if (gsd->rsci) + if (gsd && gsd->rsci) { cache_put(&gsd->rsci->h, sn->rsc_cache); - gsd->rsci = NULL; - + gsd->rsci = NULL; + } return stat; } -- GitLab From 9e9888a0fe97b9501a40f717225d2bef7100a2c1 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 5 Mar 2021 10:21:05 +0100 Subject: [PATCH 278/839] efi: stub: omit SetVirtualAddressMap() if marked unsupported in RT_PROP table The EFI_RT_PROPERTIES_TABLE contains a mask of runtime services that are available after ExitBootServices(). This mostly does not concern the EFI stub at all, given that it runs before that. However, there is one call that is made at runtime, which is the call to SetVirtualAddressMap() (which is not even callable at boot time to begin with) So add the missing handling of the RT_PROP table to ensure that we only call SetVirtualAddressMap() if it is not being advertised as unsupported by the firmware. Cc: # v5.10+ Tested-by: Shawn Guo Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/libstub/efi-stub.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/firmware/efi/libstub/efi-stub.c b/drivers/firmware/efi/libstub/efi-stub.c index ec2f3985bef35..26e69788f27a4 100644 --- a/drivers/firmware/efi/libstub/efi-stub.c +++ b/drivers/firmware/efi/libstub/efi-stub.c @@ -96,6 +96,18 @@ static void install_memreserve_table(void) efi_err("Failed to install memreserve config table!\n"); } +static u32 get_supported_rt_services(void) +{ + const efi_rt_properties_table_t *rt_prop_table; + u32 supported = EFI_RT_SUPPORTED_ALL; + + rt_prop_table = get_efi_config_table(EFI_RT_PROPERTIES_TABLE_GUID); + if (rt_prop_table) + supported &= rt_prop_table->runtime_services_supported; + + return supported; +} + /* * EFI entry point for the arm/arm64 EFI stubs. This is the entrypoint * that is described in the PE/COFF header. Most of the code is the same @@ -250,6 +262,10 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle, (prop_tbl->memory_protection_attribute & EFI_PROPERTIES_RUNTIME_MEMORY_PROTECTION_NON_EXECUTABLE_PE_DATA); + /* force efi_novamap if SetVirtualAddressMap() is unsupported */ + efi_novamap |= !(get_supported_rt_services() & + EFI_RT_SUPPORTED_SET_VIRTUAL_ADDRESS_MAP); + /* hibernation expects the runtime regions to stay in the same place */ if (!IS_ENABLED(CONFIG_HIBERNATION) && !efi_nokaslr && !flat_va_mapping) { /* -- GitLab From eb602521f43876b3f76c4686de596c9804977228 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Thu, 4 Mar 2021 09:28:57 -0500 Subject: [PATCH 279/839] gfs2: make function gfs2_make_fs_ro() to void type It fixes the following warning detected by coccinelle: ./fs/gfs2/super.c:592:5-10: Unneeded variable: "error". Return "0" on line 628 Reported-by: Abaci Robot Signed-off-by: Yang Li Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/ops_fstype.c | 4 +--- fs/gfs2/super.c | 10 ++-------- fs/gfs2/super.h | 2 +- fs/gfs2/util.c | 2 +- 4 files changed, 5 insertions(+), 13 deletions(-) diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 74c7d01723b95..aa4136055a83c 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1539,9 +1539,7 @@ static int gfs2_reconfigure(struct fs_context *fc) return -EINVAL; if (fc->sb_flags & SB_RDONLY) { - error = gfs2_make_fs_ro(sdp); - if (error) - errorfc(fc, "unable to remount read-only"); + gfs2_make_fs_ro(sdp); } else { error = gfs2_make_fs_rw(sdp); if (error) diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 861ed5fe02a59..97076d3f562f9 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -587,9 +587,8 @@ out: * Returns: errno */ -int gfs2_make_fs_ro(struct gfs2_sbd *sdp) +void gfs2_make_fs_ro(struct gfs2_sbd *sdp) { - int error = 0; int log_write_allowed = test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); gfs2_flush_delete_work(sdp); @@ -624,8 +623,6 @@ int gfs2_make_fs_ro(struct gfs2_sbd *sdp) if (!log_write_allowed) sdp->sd_vfs->s_flags |= SB_RDONLY; - - return error; } /** @@ -637,7 +634,6 @@ int gfs2_make_fs_ro(struct gfs2_sbd *sdp) static void gfs2_put_super(struct super_block *sb) { struct gfs2_sbd *sdp = sb->s_fs_info; - int error; struct gfs2_jdesc *jd; /* No more recovery requests */ @@ -658,9 +654,7 @@ restart: spin_unlock(&sdp->sd_jindex_spin); if (!sb_rdonly(sb)) { - error = gfs2_make_fs_ro(sdp); - if (error) - gfs2_io_error(sdp); + gfs2_make_fs_ro(sdp); } WARN_ON(gfs2_withdrawing(sdp)); diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h index 08e502dec7ecb..ec4affb33ed53 100644 --- a/fs/gfs2/super.h +++ b/fs/gfs2/super.h @@ -34,7 +34,7 @@ extern int gfs2_lookup_in_master_dir(struct gfs2_sbd *sdp, char *filename, struct gfs2_inode **ipp); extern int gfs2_make_fs_rw(struct gfs2_sbd *sdp); -extern int gfs2_make_fs_ro(struct gfs2_sbd *sdp); +extern void gfs2_make_fs_ro(struct gfs2_sbd *sdp); extern void gfs2_online_uevent(struct gfs2_sbd *sdp); extern int gfs2_statfs_init(struct gfs2_sbd *sdp); extern void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free, diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index 8d3c670c990fd..58743315cda9f 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -156,7 +156,7 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp) ret = 0; } if (!ret) - ret = gfs2_make_fs_ro(sdp); + gfs2_make_fs_ro(sdp); gfs2_freeze_unlock(&freeze_gh); } -- GitLab From 1a5a2cfd34c17db73c53ef127272c8c1ae220485 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Thu, 25 Feb 2021 11:11:09 -0500 Subject: [PATCH 280/839] gfs2: fix use-after-free in trans_drain This patch adds code to function trans_drain to remove drained bd elements from the ail lists, if queued, before freeing the bd. If we don't remove the bd from the ail, function ail_drain will try to reference the bd after it has been freed by trans_drain. Thanks to Andy Price for his analysis of the problem. Reported-by: Andy Price Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/log.c | 4 ++++ fs/gfs2/trans.c | 2 ++ 2 files changed, 6 insertions(+) diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 16937ebb2a3e3..760af666576cc 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -998,12 +998,16 @@ static void trans_drain(struct gfs2_trans *tr) while (!list_empty(head)) { bd = list_first_entry(head, struct gfs2_bufdata, bd_list); list_del_init(&bd->bd_list); + if (!list_empty(&bd->bd_ail_st_list)) + gfs2_remove_from_ail(bd); kmem_cache_free(gfs2_bufdata_cachep, bd); } head = &tr->tr_databuf; while (!list_empty(head)) { bd = list_first_entry(head, struct gfs2_bufdata, bd_list); list_del_init(&bd->bd_list); + if (!list_empty(&bd->bd_ail_st_list)) + gfs2_remove_from_ail(bd); kmem_cache_free(gfs2_bufdata_cachep, bd); } } diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index ab96cf0bf26be..63fec11ef2ce3 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c @@ -169,6 +169,8 @@ static struct gfs2_bufdata *gfs2_alloc_bufdata(struct gfs2_glock *gl, bd->bd_bh = bh; bd->bd_gl = gl; INIT_LIST_HEAD(&bd->bd_list); + INIT_LIST_HEAD(&bd->bd_ail_st_list); + INIT_LIST_HEAD(&bd->bd_ail_gl_list); bh->b_private = bd; return bd; } -- GitLab From 2941267bd3dad018de1d51fe2cd996b7bc1e5a5d Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sat, 6 Mar 2021 11:02:11 +0000 Subject: [PATCH 281/839] io_uring: make del_task_file more forgiving Rework io_uring_del_task_file(), so it accepts an index to delete, and it's not necessarily have to be in the ->xa. Infer file from xa_erase() to maintain a single origin of truth. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index d51c6ba9268bd..00a736867b764 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -8785,15 +8785,18 @@ static int io_uring_add_task_file(struct io_ring_ctx *ctx, struct file *file) /* * Remove this io_uring_file -> task mapping. */ -static void io_uring_del_task_file(struct file *file) +static void io_uring_del_task_file(unsigned long index) { struct io_uring_task *tctx = current->io_uring; + struct file *file; + + file = xa_erase(&tctx->xa, index); + if (!file) + return; if (tctx->last == file) tctx->last = NULL; - file = xa_erase(&tctx->xa, (unsigned long)file); - if (file) - fput(file); + fput(file); } static void io_uring_clean_tctx(struct io_uring_task *tctx) @@ -8802,7 +8805,7 @@ static void io_uring_clean_tctx(struct io_uring_task *tctx) unsigned long index; xa_for_each(&tctx->xa, index, file) - io_uring_del_task_file(file); + io_uring_del_task_file(index); if (tctx->io_wq) { io_wq_put_and_exit(tctx->io_wq); tctx->io_wq = NULL; -- GitLab From 13bf43f5f4739739751c0049a1582610c283bdde Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sat, 6 Mar 2021 11:02:12 +0000 Subject: [PATCH 282/839] io_uring: introduce ctx to tctx back map For each pair tcxt-ctx create an object and chain it into ctx, so we have a way to traverse all tctx that are using current ctx. Preparation patch, will be used later. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 58 ++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 44 insertions(+), 14 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 00a736867b764..9a2cff0662e01 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -454,6 +454,7 @@ struct io_ring_ctx { /* Keep this last, we don't need it for the fast path */ struct work_struct exit_work; + struct list_head tctx_list; }; /* @@ -805,6 +806,13 @@ struct io_kiocb { struct io_wq_work work; }; +struct io_tctx_node { + struct list_head ctx_node; + struct task_struct *task; + struct file *file; + struct io_ring_ctx *ctx; +}; + struct io_defer_entry { struct list_head list; struct io_kiocb *req; @@ -1144,6 +1152,7 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) INIT_LIST_HEAD(&ctx->rsrc_ref_list); INIT_DELAYED_WORK(&ctx->rsrc_put_work, io_rsrc_put_work); init_llist_head(&ctx->rsrc_put_llist); + INIT_LIST_HEAD(&ctx->tctx_list); INIT_LIST_HEAD(&ctx->submit_state.comp.free_list); INIT_LIST_HEAD(&ctx->submit_state.comp.locked_free_list); return ctx; @@ -8748,6 +8757,7 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx, static int io_uring_add_task_file(struct io_ring_ctx *ctx, struct file *file) { struct io_uring_task *tctx = current->io_uring; + struct io_tctx_node *node; int ret; if (unlikely(!tctx)) { @@ -8760,13 +8770,25 @@ static int io_uring_add_task_file(struct io_ring_ctx *ctx, struct file *file) void *old = xa_load(&tctx->xa, (unsigned long)file); if (!old) { + node = kmalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return -ENOMEM; + node->ctx = ctx; + node->file = file; + node->task = current; + get_file(file); ret = xa_err(xa_store(&tctx->xa, (unsigned long)file, - file, GFP_KERNEL)); + node, GFP_KERNEL)); if (ret) { fput(file); + kfree(node); return ret; } + + mutex_lock(&ctx->uring_lock); + list_add(&node->ctx_node, &ctx->tctx_list); + mutex_unlock(&ctx->uring_lock); } tctx->last = file; } @@ -8788,23 +8810,31 @@ static int io_uring_add_task_file(struct io_ring_ctx *ctx, struct file *file) static void io_uring_del_task_file(unsigned long index) { struct io_uring_task *tctx = current->io_uring; - struct file *file; + struct io_tctx_node *node; - file = xa_erase(&tctx->xa, index); - if (!file) + node = xa_erase(&tctx->xa, index); + if (!node) return; - if (tctx->last == file) + WARN_ON_ONCE(current != node->task); + WARN_ON_ONCE(list_empty(&node->ctx_node)); + + mutex_lock(&node->ctx->uring_lock); + list_del(&node->ctx_node); + mutex_unlock(&node->ctx->uring_lock); + + if (tctx->last == node->file) tctx->last = NULL; - fput(file); + fput(node->file); + kfree(node); } static void io_uring_clean_tctx(struct io_uring_task *tctx) { - struct file *file; + struct io_tctx_node *node; unsigned long index; - xa_for_each(&tctx->xa, index, file) + xa_for_each(&tctx->xa, index, node) io_uring_del_task_file(index); if (tctx->io_wq) { io_wq_put_and_exit(tctx->io_wq); @@ -8815,13 +8845,13 @@ static void io_uring_clean_tctx(struct io_uring_task *tctx) void __io_uring_files_cancel(struct files_struct *files) { struct io_uring_task *tctx = current->io_uring; - struct file *file; + struct io_tctx_node *node; unsigned long index; /* make sure overflow events are dropped */ atomic_inc(&tctx->in_idle); - xa_for_each(&tctx->xa, index, file) - io_uring_cancel_task_requests(file->private_data, files); + xa_for_each(&tctx->xa, index, node) + io_uring_cancel_task_requests(node->ctx, files); atomic_dec(&tctx->in_idle); if (files) @@ -8884,11 +8914,11 @@ void __io_uring_task_cancel(void) atomic_inc(&tctx->in_idle); if (tctx->sqpoll) { - struct file *file; + struct io_tctx_node *node; unsigned long index; - xa_for_each(&tctx->xa, index, file) - io_uring_cancel_sqpoll(file->private_data); + xa_for_each(&tctx->xa, index, node) + io_uring_cancel_sqpoll(node->ctx); } do { -- GitLab From d56d938b4bef3e1421a42023cdcd6e13c1f50831 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sat, 6 Mar 2021 11:02:13 +0000 Subject: [PATCH 283/839] io_uring: do ctx initiated file note removal Another preparation patch. When full quiesce is done on ctx exit, use task_work infra to remove corresponding to the ctx io_uring->xa entries. For that we use the back tctx map. Also use ->in_idle to prevent removing it while we traversing ->xa on cancellation, just ignore it. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 46 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 9a2cff0662e01..8a4ab86ae64f8 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -987,6 +987,7 @@ static const struct io_op_def io_op_defs[] = { [IORING_OP_UNLINKAT] = {}, }; +static void io_uring_del_task_file(unsigned long index); static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx, struct task_struct *task, struct files_struct *files); @@ -8536,10 +8537,33 @@ static bool io_run_ctx_fallback(struct io_ring_ctx *ctx) return executed; } +struct io_tctx_exit { + struct callback_head task_work; + struct completion completion; + unsigned long index; +}; + +static void io_tctx_exit_cb(struct callback_head *cb) +{ + struct io_uring_task *tctx = current->io_uring; + struct io_tctx_exit *work; + + work = container_of(cb, struct io_tctx_exit, task_work); + /* + * When @in_idle, we're in cancellation and it's racy to remove the + * node. It'll be removed by the end of cancellation, just ignore it. + */ + if (!atomic_read(&tctx->in_idle)) + io_uring_del_task_file(work->index); + complete(&work->completion); +} + static void io_ring_exit_work(struct work_struct *work) { - struct io_ring_ctx *ctx = container_of(work, struct io_ring_ctx, - exit_work); + struct io_ring_ctx *ctx = container_of(work, struct io_ring_ctx, exit_work); + struct io_tctx_exit exit; + struct io_tctx_node *node; + int ret; /* * If we're doing polled IO and end up having requests being @@ -8550,6 +8574,26 @@ static void io_ring_exit_work(struct work_struct *work) do { io_uring_try_cancel_requests(ctx, NULL, NULL); } while (!wait_for_completion_timeout(&ctx->ref_comp, HZ/20)); + + mutex_lock(&ctx->uring_lock); + while (!list_empty(&ctx->tctx_list)) { + node = list_first_entry(&ctx->tctx_list, struct io_tctx_node, + ctx_node); + exit.index = (unsigned long)node->file; + init_completion(&exit.completion); + init_task_work(&exit.task_work, io_tctx_exit_cb); + ret = task_work_add(node->task, &exit.task_work, TWA_SIGNAL); + if (WARN_ON_ONCE(ret)) + continue; + wake_up_process(node->task); + + mutex_unlock(&ctx->uring_lock); + wait_for_completion(&exit.completion); + cond_resched(); + mutex_lock(&ctx->uring_lock); + } + mutex_unlock(&ctx->uring_lock); + io_ring_ctx_free(ctx); } -- GitLab From eebd2e37e662617a6b8041db75205f0a262ce870 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sat, 6 Mar 2021 11:02:14 +0000 Subject: [PATCH 284/839] io_uring: don't take task ring-file notes With ->flush() gone we're now leaving all uring file notes until the task dies/execs, so the ctx will not be freed until all tasks that have ever submit a request die. It was nicer with flush but not much, we could have locked as described ctx in many cases. Now we guarantee that ctx outlives all tctx in a sense that io_ring_exit_work() waits for all tctxs to drop their corresponding enties in ->xa, and ctx won't go away until then. Hence, additional io_uring file reference (a.k.a. task file notes) are not needed anymore. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 8a4ab86ae64f8..f448213267c84 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -8821,11 +8821,9 @@ static int io_uring_add_task_file(struct io_ring_ctx *ctx, struct file *file) node->file = file; node->task = current; - get_file(file); ret = xa_err(xa_store(&tctx->xa, (unsigned long)file, node, GFP_KERNEL)); if (ret) { - fput(file); kfree(node); return ret; } @@ -8856,6 +8854,8 @@ static void io_uring_del_task_file(unsigned long index) struct io_uring_task *tctx = current->io_uring; struct io_tctx_node *node; + if (!tctx) + return; node = xa_erase(&tctx->xa, index); if (!node) return; @@ -8869,7 +8869,6 @@ static void io_uring_del_task_file(unsigned long index) if (tctx->last == node->file) tctx->last = NULL; - fput(node->file); kfree(node); } -- GitLab From baf186c4d345f5a105e63df01100936ad622f369 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sat, 6 Mar 2021 11:02:15 +0000 Subject: [PATCH 285/839] io_uring: index io_uring->xa by ctx not file We don't use task file notes anymore, and no need left in indexing task->io_uring->xa by file, and replace it with ctx. It's better design-wise, especially since we keep a dangling file, and so have to keep an eye on not dereferencing it. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 24 +++++++++++------------- include/linux/io_uring.h | 2 +- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index f448213267c84..01a7fa4a4889e 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -809,7 +809,6 @@ struct io_kiocb { struct io_tctx_node { struct list_head ctx_node; struct task_struct *task; - struct file *file; struct io_ring_ctx *ctx; }; @@ -8540,7 +8539,7 @@ static bool io_run_ctx_fallback(struct io_ring_ctx *ctx) struct io_tctx_exit { struct callback_head task_work; struct completion completion; - unsigned long index; + struct io_ring_ctx *ctx; }; static void io_tctx_exit_cb(struct callback_head *cb) @@ -8554,7 +8553,7 @@ static void io_tctx_exit_cb(struct callback_head *cb) * node. It'll be removed by the end of cancellation, just ignore it. */ if (!atomic_read(&tctx->in_idle)) - io_uring_del_task_file(work->index); + io_uring_del_task_file((unsigned long)work->ctx); complete(&work->completion); } @@ -8579,7 +8578,7 @@ static void io_ring_exit_work(struct work_struct *work) while (!list_empty(&ctx->tctx_list)) { node = list_first_entry(&ctx->tctx_list, struct io_tctx_node, ctx_node); - exit.index = (unsigned long)node->file; + exit.ctx = ctx; init_completion(&exit.completion); init_task_work(&exit.task_work, io_tctx_exit_cb); ret = task_work_add(node->task, &exit.task_work, TWA_SIGNAL); @@ -8798,7 +8797,7 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx, /* * Note that this task has used io_uring. We use it for cancelation purposes. */ -static int io_uring_add_task_file(struct io_ring_ctx *ctx, struct file *file) +static int io_uring_add_task_file(struct io_ring_ctx *ctx) { struct io_uring_task *tctx = current->io_uring; struct io_tctx_node *node; @@ -8810,18 +8809,17 @@ static int io_uring_add_task_file(struct io_ring_ctx *ctx, struct file *file) return ret; tctx = current->io_uring; } - if (tctx->last != file) { - void *old = xa_load(&tctx->xa, (unsigned long)file); + if (tctx->last != ctx) { + void *old = xa_load(&tctx->xa, (unsigned long)ctx); if (!old) { node = kmalloc(sizeof(*node), GFP_KERNEL); if (!node) return -ENOMEM; node->ctx = ctx; - node->file = file; node->task = current; - ret = xa_err(xa_store(&tctx->xa, (unsigned long)file, + ret = xa_err(xa_store(&tctx->xa, (unsigned long)ctx, node, GFP_KERNEL)); if (ret) { kfree(node); @@ -8832,7 +8830,7 @@ static int io_uring_add_task_file(struct io_ring_ctx *ctx, struct file *file) list_add(&node->ctx_node, &ctx->tctx_list); mutex_unlock(&ctx->uring_lock); } - tctx->last = file; + tctx->last = ctx; } /* @@ -8867,7 +8865,7 @@ static void io_uring_del_task_file(unsigned long index) list_del(&node->ctx_node); mutex_unlock(&node->ctx->uring_lock); - if (tctx->last == node->file) + if (tctx->last == node->ctx) tctx->last = NULL; kfree(node); } @@ -9166,7 +9164,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, } submitted = to_submit; } else if (to_submit) { - ret = io_uring_add_task_file(ctx, f.file); + ret = io_uring_add_task_file(ctx); if (unlikely(ret)) goto out; mutex_lock(&ctx->uring_lock); @@ -9375,7 +9373,7 @@ static int io_uring_install_fd(struct io_ring_ctx *ctx, struct file *file) if (fd < 0) return fd; - ret = io_uring_add_task_file(ctx, file); + ret = io_uring_add_task_file(ctx); if (ret) { put_unused_fd(fd); return ret; diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index 7cb7bd0e334c3..9761a0ec9f95c 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -18,7 +18,7 @@ struct io_uring_task { /* submission side */ struct xarray xa; struct wait_queue_head wait; - struct file *last; + void *last; void *io_wq; struct percpu_counter inflight; atomic_t in_idle; -- GitLab From b5bb3a24f69da92e0ec2a301452364333e45be03 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sat, 6 Mar 2021 11:02:16 +0000 Subject: [PATCH 286/839] io_uring: warn when ring exit takes too long We use system_unbound_wq to run io_ring_exit_work(), so it's hard to monitor whether removal hang or not. Add WARN_ONCE to catch hangs. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index 01a7fa4a4889e..945e54690b811 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -8560,6 +8560,7 @@ static void io_tctx_exit_cb(struct callback_head *cb) static void io_ring_exit_work(struct work_struct *work) { struct io_ring_ctx *ctx = container_of(work, struct io_ring_ctx, exit_work); + unsigned long timeout = jiffies + HZ * 60 * 5; struct io_tctx_exit exit; struct io_tctx_node *node; int ret; @@ -8572,10 +8573,14 @@ static void io_ring_exit_work(struct work_struct *work) */ do { io_uring_try_cancel_requests(ctx, NULL, NULL); + + WARN_ON_ONCE(time_after(jiffies, timeout)); } while (!wait_for_completion_timeout(&ctx->ref_comp, HZ/20)); mutex_lock(&ctx->uring_lock); while (!list_empty(&ctx->tctx_list)) { + WARN_ON_ONCE(time_after(jiffies, timeout)); + node = list_first_entry(&ctx->tctx_list, struct io_tctx_node, ctx_node); exit.ctx = ctx; -- GitLab From 1b00764f09b6912d25e188d972a7764a457926ba Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sat, 6 Mar 2021 11:02:17 +0000 Subject: [PATCH 287/839] io_uring: cancel reqs of all iowq's on ring exit io_ring_exit_work() have to cancel all requests, including those staying in io-wq, however it tries only cancellation of current tctx, which is NULL. If we've got task==NULL, use the ctx-to-tctx map to go over all tctx/io-wq and try cancellations on them. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 42 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 945e54690b811..8c74c77999601 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -8688,19 +8688,55 @@ static void io_cancel_defer_files(struct io_ring_ctx *ctx, } } +static bool io_cancel_ctx_cb(struct io_wq_work *work, void *data) +{ + struct io_kiocb *req = container_of(work, struct io_kiocb, work); + + return req->ctx == data; +} + +static bool io_uring_try_cancel_iowq(struct io_ring_ctx *ctx) +{ + struct io_tctx_node *node; + enum io_wq_cancel cret; + bool ret = false; + + mutex_lock(&ctx->uring_lock); + list_for_each_entry(node, &ctx->tctx_list, ctx_node) { + struct io_uring_task *tctx = node->task->io_uring; + + /* + * io_wq will stay alive while we hold uring_lock, because it's + * killed after ctx nodes, which requires to take the lock. + */ + if (!tctx || !tctx->io_wq) + continue; + cret = io_wq_cancel_cb(tctx->io_wq, io_cancel_ctx_cb, ctx, true); + ret |= (cret != IO_WQ_CANCEL_NOTFOUND); + } + mutex_unlock(&ctx->uring_lock); + + return ret; +} + static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx, struct task_struct *task, struct files_struct *files) { struct io_task_cancel cancel = { .task = task, .files = files, }; - struct task_struct *tctx_task = task ?: current; - struct io_uring_task *tctx = tctx_task->io_uring; + struct io_uring_task *tctx = task ? task->io_uring : NULL; while (1) { enum io_wq_cancel cret; bool ret = false; - if (tctx && tctx->io_wq) { + if (!task) { + ret |= io_uring_try_cancel_iowq(ctx); + } else if (tctx && tctx->io_wq) { + /* + * Cancels requests of all rings, not only @ctx, but + * it's fine as the task is in exit/exec. + */ cret = io_wq_cancel_cb(tctx->io_wq, io_cancel_task_cb, &cancel, true); ret |= (cret != IO_WQ_CANCEL_NOTFOUND); -- GitLab From 678eeba481d8c161203382832a4379d507050aed Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sat, 6 Mar 2021 11:02:18 +0000 Subject: [PATCH 288/839] io-wq: warn on creating manager while exiting Add a simple warning making sure that nobody tries to create a new manager while we're under IO_WQ_BIT_EXIT. That can potentially happen due to racy work submission after final put. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io-wq.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/io-wq.c b/fs/io-wq.c index 1bfdb86336e49..1ab9324e602f5 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -774,6 +774,8 @@ static int io_wq_fork_manager(struct io_wq *wq) if (wq->manager) return 0; + WARN_ON_ONCE(test_bit(IO_WQ_BIT_EXIT, &wq->state)); + init_completion(&wq->worker_done); atomic_set(&wq->worker_refs, 1); tsk = create_io_thread(io_wq_manager, wq, NUMA_NO_NODE); -- GitLab From 7c30f36a98ae488741178d69662e4f2baa53e7f6 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Sun, 7 Mar 2021 11:54:28 +0100 Subject: [PATCH 289/839] io_uring: run __io_sq_thread() with the initial creds from io_uring_setup() With IORING_SETUP_ATTACH_WQ we should let __io_sq_thread() use the initial creds from each ctx. Signed-off-by: Stefan Metzmacher Signed-off-by: Jens Axboe --- fs/io_uring.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index 8c74c77999601..4d3333ca27a3a 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -380,6 +380,7 @@ struct io_ring_ctx { /* Only used for accounting purposes */ struct mm_struct *mm_account; + const struct cred *sq_creds; /* cred used for __io_sq_thread() */ struct io_sq_data *sq_data; /* if using sq thread polling */ struct wait_queue_head sqo_sq_wait; @@ -6719,7 +6720,13 @@ static int io_sq_thread(void *data) sqt_spin = false; cap_entries = !list_is_singular(&sqd->ctx_list); list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) { + const struct cred *creds = NULL; + + if (ctx->sq_creds != current_cred()) + creds = override_creds(ctx->sq_creds); ret = __io_sq_thread(ctx, cap_entries); + if (creds) + revert_creds(creds); if (!sqt_spin && (ret > 0 || !list_empty(&ctx->iopoll_list))) sqt_spin = true; } @@ -7152,6 +7159,8 @@ static void io_sq_thread_finish(struct io_ring_ctx *ctx) io_put_sq_data(sqd); ctx->sq_data = NULL; + if (ctx->sq_creds) + put_cred(ctx->sq_creds); } } @@ -7890,6 +7899,7 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx, goto err; } + ctx->sq_creds = get_current_cred(); ctx->sq_data = sqd; io_sq_thread_park(sqd); mutex_lock(&sqd->ctx_lock); -- GitLab From 041474885e9707a38fad081abe30159eb6d463f9 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Sun, 7 Mar 2021 11:54:29 +0100 Subject: [PATCH 290/839] io_uring: kill io_sq_thread_fork() and return -EOWNERDEAD if the sq_thread is gone This brings the behavior back in line with what 5.11 and earlier did, and this is no longer needed with the improved handling of creds not needing to do unshare(). Signed-off-by: Stefan Metzmacher Signed-off-by: Jens Axboe --- fs/io_uring.c | 31 +++---------------------------- 1 file changed, 3 insertions(+), 28 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 4d3333ca27a3a..7cf96be691d89 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -336,7 +336,6 @@ struct io_ring_ctx { unsigned int drain_next: 1; unsigned int eventfd_async: 1; unsigned int restricted: 1; - unsigned int sqo_exec: 1; /* * Ring buffer of indices into array of io_uring_sqe, which is @@ -6786,7 +6785,6 @@ static int io_sq_thread(void *data) sqd->thread = NULL; list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) { - ctx->sqo_exec = 1; io_ring_set_wakeup_flag(ctx); } @@ -7846,26 +7844,6 @@ void __io_uring_free(struct task_struct *tsk) tsk->io_uring = NULL; } -static int io_sq_thread_fork(struct io_sq_data *sqd, struct io_ring_ctx *ctx) -{ - struct task_struct *tsk; - int ret; - - clear_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state); - reinit_completion(&sqd->parked); - ctx->sqo_exec = 0; - sqd->task_pid = current->pid; - tsk = create_io_thread(io_sq_thread, sqd, NUMA_NO_NODE); - if (IS_ERR(tsk)) - return PTR_ERR(tsk); - ret = io_uring_alloc_task_context(tsk, ctx); - if (ret) - set_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state); - sqd->thread = tsk; - wake_up_new_task(tsk); - return ret; -} - static int io_sq_offload_create(struct io_ring_ctx *ctx, struct io_uring_params *p) { @@ -9199,13 +9177,10 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, if (ctx->flags & IORING_SETUP_SQPOLL) { io_cqring_overflow_flush(ctx, false, NULL, NULL); - if (unlikely(ctx->sqo_exec)) { - ret = io_sq_thread_fork(ctx->sq_data, ctx); - if (ret) - goto out; - ctx->sqo_exec = 0; - } ret = -EOWNERDEAD; + if (unlikely(ctx->sq_data->thread == NULL)) { + goto out; + } if (flags & IORING_ENTER_SQ_WAKEUP) wake_up(&ctx->sq_data->wait); if (flags & IORING_ENTER_SQ_WAIT) { -- GitLab From 9f377622a484de0818c49ee01e0ab4eedf6acd81 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Sat, 6 Mar 2021 12:04:38 +0800 Subject: [PATCH 291/839] erofs: fix bio->bi_max_vecs behavior change Martin reported an issue that directory read could be hung on the latest -rc kernel with some certain image. The root cause is that commit baa2c7c97153 ("block: set .bi_max_vecs as actual allocated vector number") changes .bi_max_vecs behavior. bio->bi_max_vecs is set as actual allocated vector number rather than the requested number now. Let's avoid using .bi_max_vecs completely instead. Link: https://lore.kernel.org/r/20210306040438.8084-1-hsiangkao@aol.com Reported-by: Martin DEVERA Reviewed-by: Chao Yu [ Gao Xiang: note that <= 5.11 kernels are not impacted. ] Signed-off-by: Gao Xiang --- fs/erofs/data.c | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/fs/erofs/data.c b/fs/erofs/data.c index f88851c5c250b..1249e74b3bf04 100644 --- a/fs/erofs/data.c +++ b/fs/erofs/data.c @@ -129,6 +129,7 @@ static inline struct bio *erofs_read_raw_page(struct bio *bio, struct page *page, erofs_off_t *last_block, unsigned int nblocks, + unsigned int *eblks, bool ra) { struct inode *const inode = mapping->host; @@ -145,8 +146,7 @@ static inline struct bio *erofs_read_raw_page(struct bio *bio, /* note that for readpage case, bio also equals to NULL */ if (bio && - /* not continuous */ - *last_block + 1 != current_block) { + (*last_block + 1 != current_block || !*eblks)) { submit_bio_retry: submit_bio(bio); bio = NULL; @@ -216,7 +216,8 @@ submit_bio_retry: if (nblocks > DIV_ROUND_UP(map.m_plen, PAGE_SIZE)) nblocks = DIV_ROUND_UP(map.m_plen, PAGE_SIZE); - bio = bio_alloc(GFP_NOIO, bio_max_segs(nblocks)); + *eblks = bio_max_segs(nblocks); + bio = bio_alloc(GFP_NOIO, *eblks); bio->bi_end_io = erofs_readendio; bio_set_dev(bio, sb->s_bdev); @@ -229,16 +230,8 @@ submit_bio_retry: /* out of the extent or bio is full */ if (err < PAGE_SIZE) goto submit_bio_retry; - + --*eblks; *last_block = current_block; - - /* shift in advance in case of it followed by too many gaps */ - if (bio->bi_iter.bi_size >= bio->bi_max_vecs * PAGE_SIZE) { - /* err should reassign to 0 after submitting */ - err = 0; - goto submit_bio_out; - } - return bio; err_out: @@ -252,7 +245,6 @@ has_updated: /* if updated manually, continuous pages has a gap */ if (bio) -submit_bio_out: submit_bio(bio); return err ? ERR_PTR(err) : NULL; } @@ -264,23 +256,26 @@ submit_bio_out: static int erofs_raw_access_readpage(struct file *file, struct page *page) { erofs_off_t last_block; + unsigned int eblks; struct bio *bio; trace_erofs_readpage(page, true); bio = erofs_read_raw_page(NULL, page->mapping, - page, &last_block, 1, false); + page, &last_block, 1, &eblks, false); if (IS_ERR(bio)) return PTR_ERR(bio); - DBG_BUGON(bio); /* since we have only one bio -- must be NULL */ + if (bio) + submit_bio(bio); return 0; } static void erofs_raw_access_readahead(struct readahead_control *rac) { erofs_off_t last_block; + unsigned int eblks; struct bio *bio = NULL; struct page *page; @@ -291,7 +286,7 @@ static void erofs_raw_access_readahead(struct readahead_control *rac) prefetchw(&page->flags); bio = erofs_read_raw_page(bio, rac->mapping, page, &last_block, - readahead_count(rac), true); + readahead_count(rac), &eblks, true); /* all the page errors are ignored when readahead */ if (IS_ERR(bio)) { @@ -305,7 +300,6 @@ static void erofs_raw_access_readahead(struct readahead_control *rac) put_page(page); } - /* the rare case (end in gaps) */ if (bio) submit_bio(bio); } -- GitLab From 46eb1701c046cc18c032fa68f3c8ccbf24483ee4 Mon Sep 17 00:00:00 2001 From: Anna-Maria Behnsen Date: Tue, 23 Feb 2021 17:02:40 +0100 Subject: [PATCH 292/839] hrtimer: Update softirq_expires_next correctly after __hrtimer_get_next_event() hrtimer_force_reprogram() and hrtimer_interrupt() invokes __hrtimer_get_next_event() to find the earliest expiry time of hrtimer bases. __hrtimer_get_next_event() does not update cpu_base::[softirq_]_expires_next to preserve reprogramming logic. That needs to be done at the callsites. hrtimer_force_reprogram() updates cpu_base::softirq_expires_next only when the first expiring timer is a softirq timer and the soft interrupt is not activated. That's wrong because cpu_base::softirq_expires_next is left stale when the first expiring timer of all bases is a timer which expires in hard interrupt context. hrtimer_interrupt() does never update cpu_base::softirq_expires_next which is wrong too. That becomes a problem when clock_settime() sets CLOCK_REALTIME forward and the first soft expiring timer is in the CLOCK_REALTIME_SOFT base. Setting CLOCK_REALTIME forward moves the clock MONOTONIC based expiry time of that timer before the stale cpu_base::softirq_expires_next. cpu_base::softirq_expires_next is cached to make the check for raising the soft interrupt fast. In the above case the soft interrupt won't be raised until clock monotonic reaches the stale cpu_base::softirq_expires_next value. That's incorrect, but what's worse it that if the softirq timer becomes the first expiring timer of all clock bases after the hard expiry timer has been handled the reprogramming of the clockevent from hrtimer_interrupt() will result in an interrupt storm. That happens because the reprogramming does not use cpu_base::softirq_expires_next, it uses __hrtimer_get_next_event() which returns the actual expiry time. Once clock MONOTONIC reaches cpu_base::softirq_expires_next the soft interrupt is raised and the storm subsides. Change the logic in hrtimer_force_reprogram() to evaluate the soft and hard bases seperately, update softirq_expires_next and handle the case when a soft expiring timer is the first of all bases by comparing the expiry times and updating the required cpu base fields. Split this functionality into a separate function to be able to use it in hrtimer_interrupt() as well without copy paste. Fixes: 5da70160462e ("hrtimer: Implement support for softirq based hrtimers") Reported-by: Mikael Beckius Suggested-by: Thomas Gleixner Tested-by: Mikael Beckius Signed-off-by: Anna-Maria Behnsen Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20210223160240.27518-1-anna-maria@linutronix.de --- kernel/time/hrtimer.c | 60 ++++++++++++++++++++++++++++--------------- 1 file changed, 39 insertions(+), 21 deletions(-) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 743c852e10f23..788b9d137de4c 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -546,8 +546,11 @@ static ktime_t __hrtimer_next_event_base(struct hrtimer_cpu_base *cpu_base, } /* - * Recomputes cpu_base::*next_timer and returns the earliest expires_next but - * does not set cpu_base::*expires_next, that is done by hrtimer_reprogram. + * Recomputes cpu_base::*next_timer and returns the earliest expires_next + * but does not set cpu_base::*expires_next, that is done by + * hrtimer[_force]_reprogram and hrtimer_interrupt only. When updating + * cpu_base::*expires_next right away, reprogramming logic would no longer + * work. * * When a softirq is pending, we can ignore the HRTIMER_ACTIVE_SOFT bases, * those timers will get run whenever the softirq gets handled, at the end of @@ -588,6 +591,37 @@ __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, unsigned int active_ return expires_next; } +static ktime_t hrtimer_update_next_event(struct hrtimer_cpu_base *cpu_base) +{ + ktime_t expires_next, soft = KTIME_MAX; + + /* + * If the soft interrupt has already been activated, ignore the + * soft bases. They will be handled in the already raised soft + * interrupt. + */ + if (!cpu_base->softirq_activated) { + soft = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_SOFT); + /* + * Update the soft expiry time. clock_settime() might have + * affected it. + */ + cpu_base->softirq_expires_next = soft; + } + + expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_HARD); + /* + * If a softirq timer is expiring first, update cpu_base->next_timer + * and program the hardware with the soft expiry time. + */ + if (expires_next > soft) { + cpu_base->next_timer = cpu_base->softirq_next_timer; + expires_next = soft; + } + + return expires_next; +} + static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base) { ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset; @@ -628,23 +662,7 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal) { ktime_t expires_next; - /* - * Find the current next expiration time. - */ - expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_ALL); - - if (cpu_base->next_timer && cpu_base->next_timer->is_soft) { - /* - * When the softirq is activated, hrtimer has to be - * programmed with the first hard hrtimer because soft - * timer interrupt could occur too late. - */ - if (cpu_base->softirq_activated) - expires_next = __hrtimer_get_next_event(cpu_base, - HRTIMER_ACTIVE_HARD); - else - cpu_base->softirq_expires_next = expires_next; - } + expires_next = hrtimer_update_next_event(cpu_base); if (skip_equal && expires_next == cpu_base->expires_next) return; @@ -1644,8 +1662,8 @@ retry: __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD); - /* Reevaluate the clock bases for the next expiry */ - expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_ALL); + /* Reevaluate the clock bases for the [soft] next expiry */ + expires_next = hrtimer_update_next_event(cpu_base); /* * Store the new expiry value so the migration code can verify * against it. -- GitLab From eba8e1af5a61e61e5d77e1dfe1e8e20735ebc9c6 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 23 Feb 2021 19:52:20 +0100 Subject: [PATCH 293/839] s390/time,idle: get rid of unsigned long long Get rid of unsigned long long, and use unsigned long instead everywhere. The usage of unsigned long long is a leftover from 31 bit kernel support. Signed-off-by: Heiko Carstens --- arch/s390/include/asm/idle.h | 12 ++++++------ arch/s390/include/asm/timex.h | 36 +++++++++++++++++------------------ arch/s390/kernel/idle.c | 12 ++++++------ arch/s390/kernel/time.c | 28 +++++++++++++-------------- arch/s390/kvm/interrupt.c | 2 +- drivers/s390/cio/device_fsm.c | 2 +- 6 files changed, 46 insertions(+), 46 deletions(-) diff --git a/arch/s390/include/asm/idle.h b/arch/s390/include/asm/idle.h index b04f6a794cdfb..5cea629c548e2 100644 --- a/arch/s390/include/asm/idle.h +++ b/arch/s390/include/asm/idle.h @@ -14,12 +14,12 @@ struct s390_idle_data { seqcount_t seqcount; - unsigned long long idle_count; - unsigned long long idle_time; - unsigned long long clock_idle_enter; - unsigned long long clock_idle_exit; - unsigned long long timer_idle_enter; - unsigned long long timer_idle_exit; + unsigned long idle_count; + unsigned long idle_time; + unsigned long clock_idle_enter; + unsigned long clock_idle_exit; + unsigned long timer_idle_enter; + unsigned long timer_idle_exit; unsigned long mt_cycles_enter[8]; }; diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index c4e23e9256654..f6326c6d2abe8 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -98,10 +98,10 @@ extern unsigned char ptff_function_mask[16]; /* Query TOD offset result */ struct ptff_qto { - unsigned long long physical_clock; - unsigned long long tod_offset; - unsigned long long logical_tod_offset; - unsigned long long tod_epoch_difference; + unsigned long physical_clock; + unsigned long tod_offset; + unsigned long logical_tod_offset; + unsigned long tod_epoch_difference; } __packed; static inline int ptff_query(unsigned int nr) @@ -151,9 +151,9 @@ struct ptff_qui { rc; \ }) -static inline unsigned long long local_tick_disable(void) +static inline unsigned long local_tick_disable(void) { - unsigned long long old; + unsigned long old; old = S390_lowcore.clock_comparator; S390_lowcore.clock_comparator = clock_comparator_max; @@ -161,7 +161,7 @@ static inline unsigned long long local_tick_disable(void) return old; } -static inline void local_tick_enable(unsigned long long comp) +static inline void local_tick_enable(unsigned long comp) { S390_lowcore.clock_comparator = comp; set_clock_comparator(S390_lowcore.clock_comparator); @@ -169,9 +169,9 @@ static inline void local_tick_enable(unsigned long long comp) #define CLOCK_TICK_RATE 1193180 /* Underlying HZ */ -typedef unsigned long long cycles_t; +typedef unsigned long cycles_t; -static inline unsigned long long get_tod_clock(void) +static inline unsigned long get_tod_clock(void) { union tod_clock clk; @@ -179,10 +179,10 @@ static inline unsigned long long get_tod_clock(void) return clk.tod; } -static inline unsigned long long get_tod_clock_fast(void) +static inline unsigned long get_tod_clock_fast(void) { #ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES - unsigned long long clk; + unsigned long clk; asm volatile("stckf %0" : "=Q" (clk) : : "cc"); return clk; @@ -208,9 +208,9 @@ extern union tod_clock tod_clock_base; * Therefore preemption must be disabled, otherwise the returned * value is not guaranteed to be monotonic. */ -static inline unsigned long long get_tod_clock_monotonic(void) +static inline unsigned long get_tod_clock_monotonic(void) { - unsigned long long tod; + unsigned long tod; preempt_disable_notrace(); tod = get_tod_clock() - tod_clock_base.tod; @@ -237,7 +237,7 @@ static inline unsigned long long get_tod_clock_monotonic(void) * -> ns = (th * 125) + ((tl * 125) >> 9); * */ -static inline unsigned long long tod_to_ns(unsigned long long todval) +static inline unsigned long tod_to_ns(unsigned long todval) { return ((todval >> 9) * 125) + (((todval & 0x1ff) * 125) >> 9); } @@ -249,10 +249,10 @@ static inline unsigned long long tod_to_ns(unsigned long long todval) * * Returns: true if a is later than b */ -static inline int tod_after(unsigned long long a, unsigned long long b) +static inline int tod_after(unsigned long a, unsigned long b) { if (MACHINE_HAS_SCC) - return (long long) a > (long long) b; + return (long) a > (long) b; return a > b; } @@ -263,10 +263,10 @@ static inline int tod_after(unsigned long long a, unsigned long long b) * * Returns: true if a is later than b */ -static inline int tod_after_eq(unsigned long long a, unsigned long long b) +static inline int tod_after_eq(unsigned long a, unsigned long b) { if (MACHINE_HAS_SCC) - return (long long) a >= (long long) b; + return (long) a >= (long) b; return a >= b; } diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c index 812073ea073ee..4bf1ee293f2b3 100644 --- a/arch/s390/kernel/idle.c +++ b/arch/s390/kernel/idle.c @@ -47,7 +47,7 @@ void account_idle_time_irq(void) void arch_cpu_idle(void) { struct s390_idle_data *idle = this_cpu_ptr(&s390_idle); - unsigned long long idle_time; + unsigned long idle_time; unsigned long psw_mask; /* Wait for external, I/O or machine check interrupt. */ @@ -73,7 +73,7 @@ static ssize_t show_idle_count(struct device *dev, struct device_attribute *attr, char *buf) { struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id); - unsigned long long idle_count; + unsigned long idle_count; unsigned int seq; do { @@ -82,14 +82,14 @@ static ssize_t show_idle_count(struct device *dev, if (READ_ONCE(idle->clock_idle_enter)) idle_count++; } while (read_seqcount_retry(&idle->seqcount, seq)); - return sprintf(buf, "%llu\n", idle_count); + return sprintf(buf, "%lu\n", idle_count); } DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL); static ssize_t show_idle_time(struct device *dev, struct device_attribute *attr, char *buf) { - unsigned long long now, idle_time, idle_enter, idle_exit, in_idle; + unsigned long now, idle_time, idle_enter, idle_exit, in_idle; struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id); unsigned int seq; @@ -109,14 +109,14 @@ static ssize_t show_idle_time(struct device *dev, } } idle_time += in_idle; - return sprintf(buf, "%llu\n", idle_time >> 12); + return sprintf(buf, "%lu\n", idle_time >> 12); } DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL); u64 arch_cpu_idle_time(int cpu) { struct s390_idle_data *idle = &per_cpu(s390_idle, cpu); - unsigned long long now, idle_enter, idle_exit, in_idle; + unsigned long now, idle_enter, idle_exit, in_idle; unsigned int seq; do { diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 06bcfa6366384..165da961f9019 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -68,10 +68,10 @@ EXPORT_SYMBOL(s390_epoch_delta_notifier); unsigned char ptff_function_mask[16]; -static unsigned long long lpar_offset; -static unsigned long long initial_leap_seconds; -static unsigned long long tod_steering_end; -static long long tod_steering_delta; +static unsigned long lpar_offset; +static unsigned long initial_leap_seconds; +static unsigned long tod_steering_end; +static long tod_steering_delta; /* * Get time offsets with PTFF @@ -96,7 +96,7 @@ void __init time_early_init(void) /* get initial leap seconds */ if (ptff_query(PTFF_QUI) && ptff(&qui, sizeof(qui), PTFF_QUI) == 0) - initial_leap_seconds = (unsigned long long) + initial_leap_seconds = (unsigned long) ((long) qui.old_leap * 4096000000L); } @@ -222,7 +222,7 @@ void __init read_persistent_wall_and_boot_offset(struct timespec64 *wall_time, static u64 read_tod_clock(struct clocksource *cs) { - unsigned long long now, adj; + unsigned long now, adj; preempt_disable(); /* protect from changes to steering parameters */ now = get_tod_clock(); @@ -362,7 +362,7 @@ static inline int check_sync_clock(void) * Apply clock delta to the global data structures. * This is called once on the CPU that performed the clock sync. */ -static void clock_sync_global(unsigned long long delta) +static void clock_sync_global(unsigned long delta) { unsigned long now, adj; struct ptff_qto qto; @@ -378,7 +378,7 @@ static void clock_sync_global(unsigned long long delta) -(adj >> 15) : (adj >> 15); tod_steering_delta += delta; if ((abs(tod_steering_delta) >> 48) != 0) - panic("TOD clock sync offset %lli is too large to drift\n", + panic("TOD clock sync offset %li is too large to drift\n", tod_steering_delta); tod_steering_end = now + (abs(tod_steering_delta) << 15); vdso_data->arch_data.tod_steering_end = tod_steering_end; @@ -394,7 +394,7 @@ static void clock_sync_global(unsigned long long delta) * Apply clock delta to the per-CPU data structures of this CPU. * This is called for each online CPU after the call to clock_sync_global. */ -static void clock_sync_local(unsigned long long delta) +static void clock_sync_local(unsigned long delta) { /* Add the delta to the clock comparator. */ if (S390_lowcore.clock_comparator != clock_comparator_max) { @@ -418,7 +418,7 @@ static void __init time_init_wq(void) struct clock_sync_data { atomic_t cpus; int in_sync; - unsigned long long clock_delta; + unsigned long clock_delta; }; /* @@ -538,7 +538,7 @@ static int stpinfo_valid(void) static int stp_sync_clock(void *data) { struct clock_sync_data *sync = data; - unsigned long long clock_delta, flags; + u64 clock_delta, flags; static int first; int rc; @@ -720,8 +720,8 @@ static ssize_t ctn_id_show(struct device *dev, mutex_lock(&stp_mutex); if (stpinfo_valid()) - ret = sprintf(buf, "%016llx\n", - *(unsigned long long *) stp_info.ctnid); + ret = sprintf(buf, "%016lx\n", + *(unsigned long *) stp_info.ctnid); mutex_unlock(&stp_mutex); return ret; } @@ -794,7 +794,7 @@ static ssize_t leap_seconds_scheduled_show(struct device *dev, if (!stzi.lsoib.p) return sprintf(buf, "0,0\n"); - return sprintf(buf, "%llu,%d\n", + return sprintf(buf, "%lu,%d\n", tod_to_ns(stzi.lsoib.nlsout - TOD_UNIX_EPOCH) / NSEC_PER_SEC, stzi.lsoib.nlso - stzi.lsoib.also); } diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index e3183bd059107..d548d60caed25 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -1287,7 +1287,7 @@ static u64 __calculate_sltime(struct kvm_vcpu *vcpu) /* already expired? */ if (cputm >> 63) return 0; - return min(sltime, tod_to_ns(cputm)); + return min_t(u64, sltime, tod_to_ns(cputm)); } } else if (cpu_timer_interrupts_enabled(vcpu)) { sltime = kvm_s390_get_cpu_timer(vcpu); diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c index 6420b197bb050..05e136cfb8be7 100644 --- a/drivers/s390/cio/device_fsm.c +++ b/drivers/s390/cio/device_fsm.c @@ -47,7 +47,7 @@ static void ccw_timeout_log(struct ccw_device *cdev) orb = &private->orb; cc = stsch(sch->schid, &schib); - printk(KERN_WARNING "cio: ccw device timeout occurred at %llx, " + printk(KERN_WARNING "cio: ccw device timeout occurred at %lx, " "device information:\n", get_tod_clock()); printk(KERN_WARNING "cio: orb:\n"); print_hex_dump(KERN_WARNING, "cio: ", DUMP_PREFIX_NONE, 16, 1, -- GitLab From f9d8cbf33e9fceee671a49760cdcfa4be6a55102 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 17 Feb 2021 12:47:41 +0100 Subject: [PATCH 294/839] s390/topology: remove always false if check The cpumask being checked in cpu_group_map() must have at least one cpu set; therefore remove the check. Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens --- arch/s390/kernel/topology.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index e7ce447651b90..bfcc327acc6b2 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -76,8 +76,6 @@ static void cpu_group_map(cpumask_t *dst, struct mask_info *info, unsigned int c } info = info->next; } - if (cpumask_empty(&mask)) - cpumask_copy(&mask, cpumask_of(cpu)); break; case TOPOLOGY_MODE_PACKAGE: cpumask_copy(&mask, cpu_present_mask); -- GitLab From c41b20de1a7c9a41ceab293a6f08927312ada679 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Thu, 25 Feb 2021 14:28:52 +0100 Subject: [PATCH 295/839] s390/cpumf: remove 60 seconds read limit Remove the 60 seconds read interval limit. Do not impose any limit at all and allow read of complete counter sets. Signed-off-by: Thomas Richter Acked-by: Heiko Carstens Signed-off-by: Heiko Carstens --- arch/s390/kernel/perf_cpum_cf_diag.c | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/arch/s390/kernel/perf_cpum_cf_diag.c b/arch/s390/kernel/perf_cpum_cf_diag.c index db4877bbb9aa5..5eebc912df5a9 100644 --- a/arch/s390/kernel/perf_cpum_cf_diag.c +++ b/arch/s390/kernel/perf_cpum_cf_diag.c @@ -29,9 +29,7 @@ #include #define CF_DIAG_CTRSET_DEF 0xfeef /* Counter set header mark */ -#define CF_DIAG_MIN_INTERVAL 60 /* Minimum counter set read */ /* interval in seconds */ -static unsigned long cf_diag_interval = CF_DIAG_MIN_INTERVAL; static unsigned int cf_diag_cpu_speed; static debug_info_t *cf_diag_dbg; @@ -729,7 +727,6 @@ static DEFINE_MUTEX(cf_diag_ctrset_mutex); static struct cf_diag_ctrset { unsigned long ctrset; /* Bit mask of counter set to read */ cpumask_t mask; /* CPU mask to read from */ - time64_t lastread; /* Epoch counter set last read */ } cf_diag_ctrset; static void cf_diag_ctrset_clear(void) @@ -866,27 +863,16 @@ static int cf_diag_all_read(unsigned long arg) { struct cf_diag_call_on_cpu_parm p; cpumask_var_t mask; - time64_t now; - int rc = 0; + int rc; debug_sprintf_event(cf_diag_dbg, 5, "%s\n", __func__); if (!alloc_cpumask_var(&mask, GFP_KERNEL)) return -ENOMEM; - now = ktime_get_seconds(); - if (cf_diag_ctrset.lastread + cf_diag_interval > now) { - debug_sprintf_event(cf_diag_dbg, 5, "%s now %lld " - " lastread %lld\n", __func__, now, - cf_diag_ctrset.lastread); - rc = -EAGAIN; - goto out; - } else { - cf_diag_ctrset.lastread = now; - } + p.sets = cf_diag_ctrset.ctrset; cpumask_and(mask, &cf_diag_ctrset.mask, cpu_online_mask); on_each_cpu_mask(mask, cf_diag_cpu_read, &p, 1); rc = cf_diag_all_copy(arg, mask); -out: free_cpumask_var(mask); debug_sprintf_event(cf_diag_dbg, 5, "%s rc %d\n", __func__, rc); return rc; -- GitLab From 7449ca87312a5b0390b765be65a126e6e5451026 Mon Sep 17 00:00:00 2001 From: Alexander Egorenkov Date: Fri, 26 Feb 2021 11:21:05 +0100 Subject: [PATCH 296/839] s390/zcore: no need to check return value of debugfs_create functions When calling debugfs functions, there is no need to ever check the return value. The function can work or not, but the code logic should never do something different based on this. See commit 7dd541a3fb34 ("s390: no need to check return value of debugfs_create functions"). Signed-off-by: Alexander Egorenkov Signed-off-by: Heiko Carstens --- drivers/s390/char/zcore.c | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c index 1515fdc3c1abd..1bf507c00eb69 100644 --- a/drivers/s390/char/zcore.c +++ b/drivers/s390/char/zcore.c @@ -293,28 +293,12 @@ static int __init zcore_init(void) goto fail; zcore_dir = debugfs_create_dir("zcore" , NULL); - if (!zcore_dir) { - rc = -ENOMEM; - goto fail; - } zcore_reipl_file = debugfs_create_file("reipl", S_IRUSR, zcore_dir, NULL, &zcore_reipl_fops); - if (!zcore_reipl_file) { - rc = -ENOMEM; - goto fail_dir; - } zcore_hsa_file = debugfs_create_file("hsa", S_IRUSR|S_IWUSR, zcore_dir, NULL, &zcore_hsa_fops); - if (!zcore_hsa_file) { - rc = -ENOMEM; - goto fail_reipl_file; - } - return 0; -fail_reipl_file: - debugfs_remove(zcore_reipl_file); -fail_dir: - debugfs_remove(zcore_dir); + return 0; fail: diag308(DIAG308_REL_HSA, NULL); return rc; -- GitLab From dabdfac0e85c8c1e811b10c08742f49285e78a17 Mon Sep 17 00:00:00 2001 From: Alexander Egorenkov Date: Thu, 25 Feb 2021 14:28:52 +0100 Subject: [PATCH 297/839] s390/zcore: release dump save area on restart or power down The zFCP/NVMe standalone dumper is supposed to release the dump save area resource as soon as possible but might fail to do so, for instance, if it crashes. To avoid this situation, register a reboot notifier and ensure the dump save area resource is released on reboot or power down. Signed-off-by: Alexander Egorenkov Reviewed-by: Heiko Carstens Signed-off-by: Heiko Carstens --- drivers/s390/char/zcore.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c index 1bf507c00eb69..bd3c724bf695f 100644 --- a/drivers/s390/char/zcore.c +++ b/drivers/s390/char/zcore.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -238,6 +239,28 @@ static int __init zcore_reipl_init(void) return 0; } +static int zcore_reboot_and_on_panic_handler(struct notifier_block *self, + unsigned long event, + void *data) +{ + if (hsa_available) + release_hsa(); + + return NOTIFY_OK; +} + +static struct notifier_block zcore_reboot_notifier = { + .notifier_call = zcore_reboot_and_on_panic_handler, + /* we need to be notified before reipl and kdump */ + .priority = INT_MAX, +}; + +static struct notifier_block zcore_on_panic_notifier = { + .notifier_call = zcore_reboot_and_on_panic_handler, + /* we need to be notified before reipl and kdump */ + .priority = INT_MAX, +}; + static int __init zcore_init(void) { unsigned char arch; @@ -298,6 +321,9 @@ static int __init zcore_init(void) zcore_hsa_file = debugfs_create_file("hsa", S_IRUSR|S_IWUSR, zcore_dir, NULL, &zcore_hsa_fops); + register_reboot_notifier(&zcore_reboot_notifier); + atomic_notifier_chain_register(&panic_notifier_list, &zcore_on_panic_notifier); + return 0; fail: diag308(DIAG308_REL_HSA, NULL); -- GitLab From 46b635b6abcf2ee106d36f2e84e942f56030c8f5 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Fri, 26 Feb 2021 15:00:56 +0100 Subject: [PATCH 298/839] s390/cpumf: rename header file to hwctrset.h Signed-off-by: Thomas Richter Suggested-by: Hendrick Brueckner Acked-by: Heiko Carstens Signed-off-by: Heiko Carstens --- arch/s390/include/uapi/asm/{perf_cpum_cf_diag.h => hwctrset.h} | 0 arch/s390/kernel/perf_cpum_cf_diag.c | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename arch/s390/include/uapi/asm/{perf_cpum_cf_diag.h => hwctrset.h} (100%) diff --git a/arch/s390/include/uapi/asm/perf_cpum_cf_diag.h b/arch/s390/include/uapi/asm/hwctrset.h similarity index 100% rename from arch/s390/include/uapi/asm/perf_cpum_cf_diag.h rename to arch/s390/include/uapi/asm/hwctrset.h diff --git a/arch/s390/kernel/perf_cpum_cf_diag.c b/arch/s390/kernel/perf_cpum_cf_diag.c index 5eebc912df5a9..bc302b86ce28f 100644 --- a/arch/s390/kernel/perf_cpum_cf_diag.c +++ b/arch/s390/kernel/perf_cpum_cf_diag.c @@ -26,7 +26,7 @@ #include #include -#include +#include #define CF_DIAG_CTRSET_DEF 0xfeef /* Counter set header mark */ /* interval in seconds */ -- GitLab From 51c44babdc19aaf882e1213325a0ba291573308f Mon Sep 17 00:00:00 2001 From: Wang Qing Date: Mon, 1 Mar 2021 20:01:33 +0800 Subject: [PATCH 299/839] s390/cio: return -EFAULT if copy_to_user() fails The copy_to_user() function returns the number of bytes remaining to be copied, but we want to return -EFAULT if the copy doesn't complete. Fixes: e01bcdd61320 ("vfio: ccw: realize VFIO_DEVICE_GET_REGION_INFO ioctl") Signed-off-by: Wang Qing Signed-off-by: Heiko Carstens Link: https://lore.kernel.org/r/1614600093-13992-1-git-send-email-wangqing@vivo.com Signed-off-by: Heiko Carstens --- drivers/s390/cio/vfio_ccw_ops.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c index 68106be4ba7a1..557d0b888cd3e 100644 --- a/drivers/s390/cio/vfio_ccw_ops.c +++ b/drivers/s390/cio/vfio_ccw_ops.c @@ -543,7 +543,7 @@ static ssize_t vfio_ccw_mdev_ioctl(struct mdev_device *mdev, if (ret) return ret; - return copy_to_user((void __user *)arg, &info, minsz); + return copy_to_user((void __user *)arg, &info, minsz) ? -EFAULT : 0; } case VFIO_DEVICE_GET_REGION_INFO: { @@ -561,7 +561,7 @@ static ssize_t vfio_ccw_mdev_ioctl(struct mdev_device *mdev, if (ret) return ret; - return copy_to_user((void __user *)arg, &info, minsz); + return copy_to_user((void __user *)arg, &info, minsz) ? -EFAULT : 0; } case VFIO_DEVICE_GET_IRQ_INFO: { -- GitLab From 942df4be7ab40195e2a839e9de81951a5862bc5b Mon Sep 17 00:00:00 2001 From: Wang Qing Date: Mon, 1 Mar 2021 20:08:21 +0800 Subject: [PATCH 300/839] s390/crypto: return -EFAULT if copy_to_user() fails The copy_to_user() function returns the number of bytes remaining to be copied, but we want to return -EFAULT if the copy doesn't complete. Fixes: e06670c5fe3b ("s390: vfio-ap: implement VFIO_DEVICE_GET_INFO ioctl") Signed-off-by: Wang Qing Reviewed-by: Tony Krowiak Signed-off-by: Heiko Carstens Link: https://lore.kernel.org/r/1614600502-16714-1-git-send-email-wangqing@vivo.com Signed-off-by: Heiko Carstens --- drivers/s390/crypto/vfio_ap_ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index 41fc2e4135fe1..1ffdd411201cd 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -1286,7 +1286,7 @@ static int vfio_ap_mdev_get_device_info(unsigned long arg) info.num_regions = 0; info.num_irqs = 0; - return copy_to_user((void __user *)arg, &info, minsz); + return copy_to_user((void __user *)arg, &info, minsz) ? -EFAULT : 0; } static ssize_t vfio_ap_mdev_ioctl(struct mdev_device *mdev, -- GitLab From 1c0a9c7997325ef7a8f71fca2e1e6091e757c94b Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Thu, 25 Feb 2021 14:15:36 +0800 Subject: [PATCH 301/839] s390/cpumf: remove unneeded semicolon Fix the following coccicheck warnings: ./arch/s390/kernel/perf_cpum_cf.c:272:2-3: Unneeded semicolon. Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Signed-off-by: Heiko Carstens Link: https://lore.kernel.org/r/1614233736-87331-1-git-send-email-jiapeng.chong@linux.alibaba.com Signed-off-by: Heiko Carstens --- arch/s390/kernel/perf_cpum_cf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 0eb1d1cc53a88..b3beef64d3d48 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -269,7 +269,7 @@ static int __hw_perf_event_init(struct perf_event *event, unsigned int type) case CPUMF_CTR_SET_MAX: /* The counter could not be associated to a counter set */ return -EINVAL; - }; + } /* Initialize for using the CPU-measurement counter facility */ if (!atomic_inc_not_zero(&num_events)) { -- GitLab From db232eb42c21b7bca8e46c7e74676754b8fe06ea Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 24 Aug 2020 21:56:16 -0700 Subject: [PATCH 302/839] s390/tty3270: avoid comma separated statements Use semicolons and braces. Signed-off-by: Joe Perches Signed-off-by: Heiko Carstens Link: https://lore.kernel.org/r/9988babd9cca4ac841961d9f0bbf5e49caa87659.1598331149.git.joe@perches.com Signed-off-by: Heiko Carstens --- drivers/s390/char/tty3270.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/s390/char/tty3270.c b/drivers/s390/char/tty3270.c index 15692449a1c3f..307a80f85c07d 100644 --- a/drivers/s390/char/tty3270.c +++ b/drivers/s390/char/tty3270.c @@ -424,8 +424,10 @@ tty3270_update(struct timer_list *t) * last output position matches the start address * of this line. */ - if (s->string[1] == sba[0] && s->string[2] == sba[1]) - str += 3, len -= 3; + if (s->string[1] == sba[0] && s->string[2] == sba[1]) { + str += 3; + len -= 3; + } if (raw3270_request_add_data(wrq, str, len) != 0) break; list_del_init(&s->update); -- GitLab From d9c48a948d29bcb22f4fe61a81b718ef6de561a0 Mon Sep 17 00:00:00 2001 From: Eric Farman Date: Mon, 1 Mar 2021 19:33:24 +0100 Subject: [PATCH 303/839] s390/cio: return -EFAULT if copy_to_user() fails Fixes: 120e214e504f ("vfio: ccw: realize VFIO_DEVICE_G(S)ET_IRQ_INFO ioctls") Signed-off-by: Eric Farman Signed-off-by: Heiko Carstens --- drivers/s390/cio/vfio_ccw_ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c index 557d0b888cd3e..767ac41686fe2 100644 --- a/drivers/s390/cio/vfio_ccw_ops.c +++ b/drivers/s390/cio/vfio_ccw_ops.c @@ -582,7 +582,7 @@ static ssize_t vfio_ccw_mdev_ioctl(struct mdev_device *mdev, if (info.count == -1) return -EINVAL; - return copy_to_user((void __user *)arg, &info, minsz); + return copy_to_user((void __user *)arg, &info, minsz) ? -EFAULT : 0; } case VFIO_DEVICE_SET_IRQS: { -- GitLab From d0ed78e1780eb3738f9c106fbaff6a1181017cd3 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 1 Mar 2021 21:02:49 +0100 Subject: [PATCH 304/839] s390,alpha: make TMPFS_INODE64 available again Both s390 and alpha have been switched to 64-bit ino_t with commit 96c0a6a72d18 ("s390,alpha: switch to 64-bit ino_t"). Therefore enable TMPFS_INODE64 for both architectures again. Cc: Richard Henderson Cc: Ivan Kokshaysky Link: https://lore.kernel.org/linux-mm/YCV7QiyoweJwvN+m@osiris/ Signed-off-by: Heiko Carstens --- fs/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/Kconfig b/fs/Kconfig index 462253ae483a3..a55bda4233bbe 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -203,7 +203,7 @@ config TMPFS_XATTR config TMPFS_INODE64 bool "Use 64-bit ino_t by default in tmpfs" - depends on TMPFS && 64BIT && !(S390 || ALPHA) + depends on TMPFS && 64BIT default n help tmpfs has historically used only inode numbers as wide as an unsigned -- GitLab From d50aa69d36be43fa8927fd8ef305c4af88b6b450 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 1 Mar 2021 21:48:16 +0100 Subject: [PATCH 305/839] s390: update defconfigs Signed-off-by: Heiko Carstens --- arch/s390/configs/debug_defconfig | 16 +++------------- arch/s390/configs/defconfig | 11 ++--------- arch/s390/configs/zfcpdump_defconfig | 1 - 3 files changed, 5 insertions(+), 23 deletions(-) diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 02056b024091e..dc0b69058ac47 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -275,9 +275,9 @@ CONFIG_IP_VS_DH=m CONFIG_IP_VS_SH=m CONFIG_IP_VS_SED=m CONFIG_IP_VS_NQ=m +CONFIG_IP_VS_TWOS=m CONFIG_IP_VS_FTP=m CONFIG_IP_VS_PE_SIP=m -CONFIG_NF_TABLES_IPV4=y CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y CONFIG_IP_NF_IPTABLES=m @@ -298,7 +298,6 @@ CONFIG_IP_NF_SECURITY=m CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_NF_TABLES_IPV6=y CONFIG_NFT_FIB_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m @@ -481,7 +480,6 @@ CONFIG_NLMON=m # CONFIG_NET_VENDOR_AQUANTIA is not set # CONFIG_NET_VENDOR_ARC is not set # CONFIG_NET_VENDOR_ATHEROS is not set -# CONFIG_NET_VENDOR_AURORA is not set # CONFIG_NET_VENDOR_BROADCOM is not set # CONFIG_NET_VENDOR_BROCADE is not set # CONFIG_NET_VENDOR_CADENCE is not set @@ -581,7 +579,6 @@ CONFIG_VIRTIO_BALLOON=m CONFIG_VIRTIO_INPUT=y CONFIG_VHOST_NET=m CONFIG_VHOST_VSOCK=m -# CONFIG_SURFACE_PLATFORMS is not set CONFIG_S390_CCW_IOMMU=y CONFIG_S390_AP_IOMMU=y CONFIG_EXT4_FS=y @@ -635,6 +632,7 @@ CONFIG_NTFS_RW=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y +CONFIG_TMPFS_INODE64=y CONFIG_HUGETLBFS=y CONFIG_CONFIGFS_FS=m CONFIG_ECRYPT_FS=m @@ -714,12 +712,8 @@ CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_CRC32=m CONFIG_CRYPTO_BLAKE2S=m CONFIG_CRYPTO_MICHAEL_MIC=m -CONFIG_CRYPTO_RMD128=m CONFIG_CRYPTO_RMD160=m -CONFIG_CRYPTO_RMD256=m -CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA3=m -CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_AES_TI=m CONFIG_CRYPTO_ANUBIS=m @@ -731,7 +725,6 @@ CONFIG_CRYPTO_CAST6=m CONFIG_CRYPTO_DES=m CONFIG_CRYPTO_FCRYPT=m CONFIG_CRYPTO_KHAZAD=m -CONFIG_CRYPTO_SALSA20=m CONFIG_CRYPTO_SEED=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_SM4=m @@ -796,12 +789,9 @@ CONFIG_DEBUG_OBJECTS_RCU_HEAD=y CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER=y CONFIG_SLUB_DEBUG_ON=y CONFIG_SLUB_STATS=y -CONFIG_DEBUG_KMEMLEAK=y -CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF=y CONFIG_DEBUG_STACK_USAGE=y CONFIG_DEBUG_VM=y CONFIG_DEBUG_VM_VMACACHE=y -CONFIG_DEBUG_VM_RB=y CONFIG_DEBUG_VM_PGFLAGS=y CONFIG_DEBUG_MEMORY_INIT=y CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m @@ -838,6 +828,7 @@ CONFIG_BPF_KPROBE_OVERRIDE=y CONFIG_HIST_TRIGGERS=y CONFIG_FTRACE_STARTUP_TEST=y # CONFIG_EVENT_TRACE_STARTUP_TEST is not set +CONFIG_DEBUG_ENTRY=y CONFIG_NOTIFIER_ERROR_INJECTION=m CONFIG_NETDEV_NOTIFIER_ERROR_INJECT=m CONFIG_FAULT_INJECTION=y @@ -861,4 +852,3 @@ CONFIG_PERCPU_TEST=m CONFIG_ATOMIC64_SELFTEST=y CONFIG_TEST_BITOPS=m CONFIG_TEST_BPF=m -CONFIG_DEBUG_ENTRY=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index bac721a501da1..320379da96d97 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -266,9 +266,9 @@ CONFIG_IP_VS_DH=m CONFIG_IP_VS_SH=m CONFIG_IP_VS_SED=m CONFIG_IP_VS_NQ=m +CONFIG_IP_VS_TWOS=m CONFIG_IP_VS_FTP=m CONFIG_IP_VS_PE_SIP=m -CONFIG_NF_TABLES_IPV4=y CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y CONFIG_IP_NF_IPTABLES=m @@ -289,7 +289,6 @@ CONFIG_IP_NF_SECURITY=m CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_NF_TABLES_IPV6=y CONFIG_NFT_FIB_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m @@ -473,7 +472,6 @@ CONFIG_NLMON=m # CONFIG_NET_VENDOR_AQUANTIA is not set # CONFIG_NET_VENDOR_ARC is not set # CONFIG_NET_VENDOR_ATHEROS is not set -# CONFIG_NET_VENDOR_AURORA is not set # CONFIG_NET_VENDOR_BROADCOM is not set # CONFIG_NET_VENDOR_BROCADE is not set # CONFIG_NET_VENDOR_CADENCE is not set @@ -573,7 +571,6 @@ CONFIG_VIRTIO_BALLOON=m CONFIG_VIRTIO_INPUT=y CONFIG_VHOST_NET=m CONFIG_VHOST_VSOCK=m -# CONFIG_SURFACE_PLATFORMS is not set CONFIG_S390_CCW_IOMMU=y CONFIG_S390_AP_IOMMU=y CONFIG_EXT4_FS=y @@ -623,6 +620,7 @@ CONFIG_NTFS_RW=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y +CONFIG_TMPFS_INODE64=y CONFIG_HUGETLBFS=y CONFIG_CONFIGFS_FS=m CONFIG_ECRYPT_FS=m @@ -703,12 +701,8 @@ CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_CRC32=m CONFIG_CRYPTO_BLAKE2S=m CONFIG_CRYPTO_MICHAEL_MIC=m -CONFIG_CRYPTO_RMD128=m CONFIG_CRYPTO_RMD160=m -CONFIG_CRYPTO_RMD256=m -CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA3=m -CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_AES_TI=m CONFIG_CRYPTO_ANUBIS=m @@ -720,7 +714,6 @@ CONFIG_CRYPTO_CAST6=m CONFIG_CRYPTO_DES=m CONFIG_CRYPTO_FCRYPT=m CONFIG_CRYPTO_KHAZAD=m -CONFIG_CRYPTO_SALSA20=m CONFIG_CRYPTO_SEED=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_SM4=m diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index acf982a2ae4c5..039fd499b4a93 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -61,7 +61,6 @@ CONFIG_RAW_DRIVER=y # CONFIG_HID is not set # CONFIG_VIRTIO_MENU is not set # CONFIG_VHOST_MENU is not set -# CONFIG_SURFACE_PLATFORMS is not set # CONFIG_IOMMU_SUPPORT is not set # CONFIG_DNOTIFY is not set # CONFIG_INOTIFY_USER is not set -- GitLab From 78c7cccaab9d5f9ead44579d79dd7d13a05aec7e Mon Sep 17 00:00:00 2001 From: Alexander Egorenkov Date: Fri, 26 Feb 2021 09:40:47 +0100 Subject: [PATCH 306/839] s390: remove IBM_PARTITION and CONFIGFS_FS from zfcpdump defconfig Remove by zfcpdump unused CONFIG_IBM_PARTITION and CONFIG_CONFIGFS_FS. Signed-off-by: Alexander Egorenkov Reviewed-by: Steffen Maier Signed-off-by: Heiko Carstens --- arch/s390/configs/zfcpdump_defconfig | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index 039fd499b4a93..76123a4b26ab0 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -26,7 +26,6 @@ CONFIG_CRASH_DUMP=y # CONFIG_SECCOMP is not set # CONFIG_GCC_PLUGINS is not set CONFIG_PARTITION_ADVANCED=y -CONFIG_IBM_PARTITION=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set # CONFIG_COMPACTION is not set # CONFIG_MIGRATION is not set @@ -64,7 +63,6 @@ CONFIG_RAW_DRIVER=y # CONFIG_IOMMU_SUPPORT is not set # CONFIG_DNOTIFY is not set # CONFIG_INOTIFY_USER is not set -CONFIG_CONFIGFS_FS=y # CONFIG_MISC_FILESYSTEMS is not set # CONFIG_NETWORK_FILESYSTEMS is not set CONFIG_LSM="yama,loadpin,safesetid,integrity" -- GitLab From 7a05293af39fc716d0f51c0164cbb727302396a2 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Wed, 3 Mar 2021 19:33:05 +0000 Subject: [PATCH 307/839] MIPS: boot/compressed: Copy DTB to aligned address Since 5.12-rc1, the Device Tree blob must now be properly aligned. Therefore, the decompress routine must be careful to copy the blob at the next aligned address after the kernel image. This commit fixes the kernel sometimes not booting with a Device Tree blob appended to it. Fixes: 79edff12060f ("scripts/dtc: Update to upstream version v1.6.0-51-g183df9e9c2b9") Signed-off-by: Paul Cercueil Acked-by: Rob Herring Signed-off-by: Thomas Bogendoerfer --- arch/mips/boot/compressed/decompress.c | 8 ++++++++ arch/mips/kernel/vmlinux.lds.S | 2 ++ 2 files changed, 10 insertions(+) diff --git a/arch/mips/boot/compressed/decompress.c b/arch/mips/boot/compressed/decompress.c index e3946b06e840a..3d70d15ada286 100644 --- a/arch/mips/boot/compressed/decompress.c +++ b/arch/mips/boot/compressed/decompress.c @@ -14,6 +14,7 @@ #include #include +#include /* * These two variables specify the free mem region @@ -120,6 +121,13 @@ void decompress_kernel(unsigned long boot_heap_start) /* last four bytes is always image size in little endian */ image_size = get_unaligned_le32((void *)&__image_end - 4); + /* The device tree's address must be properly aligned */ + image_size = ALIGN(image_size, STRUCT_ALIGNMENT); + + puts("Copy device tree to address "); + puthex(VMLINUX_LOAD_ADDRESS_ULL + image_size); + puts("\n"); + /* copy dtb to where the booted kernel will expect it */ memcpy((void *)VMLINUX_LOAD_ADDRESS_ULL + image_size, __appended_dtb, dtb_size); diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S index c1c345be04ffd..4b4e39b7c79bc 100644 --- a/arch/mips/kernel/vmlinux.lds.S +++ b/arch/mips/kernel/vmlinux.lds.S @@ -145,6 +145,7 @@ SECTIONS } #ifdef CONFIG_MIPS_ELF_APPENDED_DTB + STRUCT_ALIGN(); .appended_dtb : AT(ADDR(.appended_dtb) - LOAD_OFFSET) { *(.appended_dtb) KEEP(*(.appended_dtb)) @@ -172,6 +173,7 @@ SECTIONS #endif #ifdef CONFIG_MIPS_RAW_APPENDED_DTB + STRUCT_ALIGN(); __appended_dtb = .; /* leave space for appended DTB */ . += 0x100000; -- GitLab From 02fc409540303801994d076fcdb7064bd634dbf3 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Sun, 28 Feb 2021 09:33:19 +0800 Subject: [PATCH 308/839] cpufreq: qcom-hw: fix dereferencing freed memory 'data' Commit 67fc209b527d ("cpufreq: qcom-hw: drop devm_xxx() calls from init/exit hooks") introduces an issue of dereferencing freed memory 'data'. Fix it. Fixes: 67fc209b527d ("cpufreq: qcom-hw: drop devm_xxx() calls from init/exit hooks") Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Shawn Guo Signed-off-by: Viresh Kumar --- drivers/cpufreq/qcom-cpufreq-hw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c index d3c23447b892d..bee5d67a8227c 100644 --- a/drivers/cpufreq/qcom-cpufreq-hw.c +++ b/drivers/cpufreq/qcom-cpufreq-hw.c @@ -374,7 +374,7 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy) error: kfree(data); unmap_base: - iounmap(data->base); + iounmap(base); release_region: release_mem_region(res->start, resource_size(res)); return ret; -- GitLab From 536eb97abeba857126ad055de5923fa592acef25 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 4 Mar 2021 10:04:23 +0000 Subject: [PATCH 309/839] cpufreq: qcom-hw: Fix return value check in qcom_cpufreq_hw_cpu_init() In case of error, the function ioremap() returns NULL pointer not ERR_PTR(). The IS_ERR() test in the return value check should be replaced with NULL test. Fixes: 67fc209b527d ("cpufreq: qcom-hw: drop devm_xxx() calls from init/exit hooks") Reported-by: Hulk Robot Signed-off-by: Wei Yongjun Acked-by: Shawn Guo Signed-off-by: Viresh Kumar --- drivers/cpufreq/qcom-cpufreq-hw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c index bee5d67a8227c..f86859bf76f11 100644 --- a/drivers/cpufreq/qcom-cpufreq-hw.c +++ b/drivers/cpufreq/qcom-cpufreq-hw.c @@ -317,9 +317,9 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy) } base = ioremap(res->start, resource_size(res)); - if (IS_ERR(base)) { + if (!base) { dev_err(dev, "failed to map resource %pR\n", res); - ret = PTR_ERR(base); + ret = -ENOMEM; goto release_region; } -- GitLab From fbb31cb805fd3574d3be7defc06a7fd2fd9af7d2 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Thu, 18 Feb 2021 22:23:26 +0000 Subject: [PATCH 310/839] cpufreq: blacklist Arm Vexpress platforms in cpufreq-dt-platdev Add "arm,vexpress" to cpufreq-dt-platdev blacklist since the actual scaling is handled by the firmware cpufreq drivers(scpi, scmi and vexpress-spc). Signed-off-by: Sudeep Holla Signed-off-by: Viresh Kumar --- drivers/cpufreq/cpufreq-dt-platdev.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index 3ba2f716fe978..5e07065ec22f7 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -103,6 +103,8 @@ static const struct of_device_id whitelist[] __initconst = { static const struct of_device_id blacklist[] __initconst = { { .compatible = "allwinner,sun50i-h6", }, + { .compatible = "arm,vexpress", }, + { .compatible = "calxeda,highbank", }, { .compatible = "calxeda,ecx-2000", }, -- GitLab From 6c810cf20feef0d4338e9b424ab7f2644a8b353e Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Wed, 3 Mar 2021 02:16:04 +0100 Subject: [PATCH 311/839] crypto: mips/poly1305 - enable for all MIPS processors The MIPS Poly1305 implementation is generic MIPS code written such as to support down to the original MIPS I and MIPS III ISA for the 32-bit and 64-bit variant respectively. Lift the current limitation then to enable code for MIPSr1 ISA or newer processors only and have it available for all MIPS processors. Signed-off-by: Maciej W. Rozycki Fixes: a11d055e7a64 ("crypto: mips/poly1305 - incorporate OpenSSL/CRYPTOGAMS optimized implementation") Cc: stable@vger.kernel.org # v5.5+ Acked-by: Jason A. Donenfeld Signed-off-by: Thomas Bogendoerfer --- arch/mips/crypto/Makefile | 4 ++-- crypto/Kconfig | 2 +- drivers/net/Kconfig | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/mips/crypto/Makefile b/arch/mips/crypto/Makefile index 8e1deaf00e0c0..5e4105cccf9fa 100644 --- a/arch/mips/crypto/Makefile +++ b/arch/mips/crypto/Makefile @@ -12,8 +12,8 @@ AFLAGS_chacha-core.o += -O2 # needed to fill branch delay slots obj-$(CONFIG_CRYPTO_POLY1305_MIPS) += poly1305-mips.o poly1305-mips-y := poly1305-core.o poly1305-glue.o -perlasm-flavour-$(CONFIG_CPU_MIPS32) := o32 -perlasm-flavour-$(CONFIG_CPU_MIPS64) := 64 +perlasm-flavour-$(CONFIG_32BIT) := o32 +perlasm-flavour-$(CONFIG_64BIT) := 64 quiet_cmd_perlasm = PERLASM $@ cmd_perlasm = $(PERL) $(<) $(perlasm-flavour-y) $(@) diff --git a/crypto/Kconfig b/crypto/Kconfig index 15c9c28d9f533..5809cc198fa7c 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -767,7 +767,7 @@ config CRYPTO_POLY1305_X86_64 config CRYPTO_POLY1305_MIPS tristate "Poly1305 authenticator algorithm (MIPS optimized)" - depends on CPU_MIPS32 || (CPU_MIPS64 && 64BIT) + depends on MIPS select CRYPTO_ARCH_HAVE_LIB_POLY1305 config CRYPTO_MD4 diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index b09bed554f266..bcd31f458d1ac 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -94,7 +94,7 @@ config WIREGUARD select CRYPTO_BLAKE2S_ARM if ARM select CRYPTO_CURVE25519_NEON if ARM && KERNEL_MODE_NEON select CRYPTO_CHACHA_MIPS if CPU_MIPS32_R2 - select CRYPTO_POLY1305_MIPS if CPU_MIPS32 || (CPU_MIPS64 && 64BIT) + select CRYPTO_POLY1305_MIPS if MIPS help WireGuard is a secure, fast, and easy to use replacement for IPSec that uses modern cryptography and clever networking tricks. It's -- GitLab From 542104ee0cafe789cc07291b71818c143df8d623 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 24 Feb 2021 19:53:16 +0100 Subject: [PATCH 312/839] selftests: gpio: update .gitignore The executable that we build for GPIO selftests was renamed to gpio-mockup-cdev. Let's update .gitignore so that we don't show it as an untracked file. Fixes: 8bc395a6a2e2 ("selftests: gpio: rework and simplify test implementation") Signed-off-by: Bartosz Golaszewski Reviewed-by: Kent Gibson --- tools/testing/selftests/gpio/.gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/gpio/.gitignore b/tools/testing/selftests/gpio/.gitignore index 4c69408f3e840..a4969f7ee020d 100644 --- a/tools/testing/selftests/gpio/.gitignore +++ b/tools/testing/selftests/gpio/.gitignore @@ -1,2 +1,2 @@ # SPDX-License-Identifier: GPL-2.0-only -gpio-mockup-chardev +gpio-mockup-cdev -- GitLab From a6112998ee45514def58200a7914706c0703f3d7 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 1 Mar 2021 10:05:18 +0100 Subject: [PATCH 313/839] gpio: fix NULL-deref-on-deregistration regression Fix a NULL-pointer deference when deregistering the gpio character device that was introduced by the recent stub-driver hack. When the new "driver" is unbound as part of deregistration, driver core clears the driver-data pointer which is used to retrieve the struct gpio_device in its release callback. Fix this by using container_of() in the release callback as should have been done all along. Fixes: 4731210c09f5 ("gpiolib: Bind gpio_device to a driver to enable fw_devlink=on by default") Cc: Saravana Kannan Cc: Greg Kroah-Hartman Reported-by: syzbot+d27b4c8adbbff70fbfde@syzkaller.appspotmail.com Signed-off-by: Johan Hovold Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index adf55db080d86..6e0572515d029 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -474,7 +474,7 @@ EXPORT_SYMBOL_GPL(gpiochip_line_is_valid); static void gpiodevice_release(struct device *dev) { - struct gpio_device *gdev = dev_get_drvdata(dev); + struct gpio_device *gdev = container_of(dev, struct gpio_device, dev); list_del(&gdev->list); ida_free(&gpio_ida, gdev->id); @@ -605,7 +605,6 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, goto err_free_ida; device_initialize(&gdev->dev); - dev_set_drvdata(&gdev->dev, gdev); if (gc->parent && gc->parent->driver) gdev->owner = gc->parent->driver->owner; else if (gc->owner) -- GitLab From cf25ef6b631c6fc6c0435fc91eba8734cca20511 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 1 Mar 2021 10:05:19 +0100 Subject: [PATCH 314/839] gpio: fix gpio-device list corruption Make sure to hold the gpio_lock when removing the gpio device from the gpio_devices list (when dropping the last reference) to avoid corrupting the list when there are concurrent accesses. Fixes: ff2b13592299 ("gpio: make the gpiochip a real device") Cc: stable@vger.kernel.org # 4.6 Reviewed-by: Saravana Kannan Signed-off-by: Johan Hovold Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 6e0572515d029..4253837f870b1 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -475,8 +475,12 @@ EXPORT_SYMBOL_GPL(gpiochip_line_is_valid); static void gpiodevice_release(struct device *dev) { struct gpio_device *gdev = container_of(dev, struct gpio_device, dev); + unsigned long flags; + spin_lock_irqsave(&gpio_lock, flags); list_del(&gdev->list); + spin_unlock_irqrestore(&gpio_lock, flags); + ida_free(&gpio_ida, gdev->id); kfree_const(gdev->label); kfree(gdev->descs); -- GitLab From 6e5d5791730b55a1f987e1db84b078b91eb49e99 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Tue, 23 Feb 2021 16:35:58 +0800 Subject: [PATCH 315/839] gpiolib: acpi: Add missing IRQF_ONESHOT fixed the following coccicheck: ./drivers/gpio/gpiolib-acpi.c:176:7-27: ERROR: Threaded IRQ with no primary handler requested without IRQF_ONESHOT Make sure threaded IRQs without a primary handler are always request with IRQF_ONESHOT Reported-by: Abaci Robot Signed-off-by: Yang Li Acked-by: Andy Shevchenko Signed-off-by: Andy Shevchenko --- drivers/gpio/gpiolib-acpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index e37a57d0a2f07..86efa2d9bf7ff 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -174,7 +174,7 @@ static void acpi_gpiochip_request_irq(struct acpi_gpio_chip *acpi_gpio, int ret, value; ret = request_threaded_irq(event->irq, NULL, event->handler, - event->irqflags, "ACPI:Event", event); + event->irqflags | IRQF_ONESHOT, "ACPI:Event", event); if (ret) { dev_err(acpi_gpio->chip->parent, "Failed to setup interrupt handler for %d\n", -- GitLab From 62d5247d239d4b48762192a251c647d7c997616a Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 25 Feb 2021 18:33:18 +0200 Subject: [PATCH 316/839] gpiolib: acpi: Add ACPI_GPIO_QUIRK_ABSOLUTE_NUMBER quirk On some systems the ACPI tables has wrong pin number and instead of having a relative one it provides an absolute one in the global GPIO number space. Add ACPI_GPIO_QUIRK_ABSOLUTE_NUMBER quirk to cope with such cases. Fixes: ba8c90c61847 ("gpio: pca953x: Override IRQ for one of the expanders on Galileo Gen 2") Depends-on: 0ea683931adb ("gpio: dwapb: Convert driver to using the GPIO-lib-based IRQ-chip") Signed-off-by: Andy Shevchenko Acked-by: Mika Westerberg Acked-by: Linus Walleij --- drivers/gpio/gpiolib-acpi.c | 7 ++++++- include/linux/gpio/consumer.h | 2 ++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index 86efa2d9bf7ff..0fa0127d50ec4 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -677,6 +677,7 @@ static int acpi_populate_gpio_lookup(struct acpi_resource *ares, void *data) if (!lookup->desc) { const struct acpi_resource_gpio *agpio = &ares->data.gpio; bool gpioint = agpio->connection_type == ACPI_RESOURCE_GPIO_TYPE_INT; + struct gpio_desc *desc; u16 pin_index; if (lookup->info.quirks & ACPI_GPIO_QUIRK_ONLY_GPIOIO && gpioint) @@ -689,8 +690,12 @@ static int acpi_populate_gpio_lookup(struct acpi_resource *ares, void *data) if (pin_index >= agpio->pin_table_length) return 1; - lookup->desc = acpi_get_gpiod(agpio->resource_source.string_ptr, + if (lookup->info.quirks & ACPI_GPIO_QUIRK_ABSOLUTE_NUMBER) + desc = gpio_to_desc(agpio->pin_table[pin_index]); + else + desc = acpi_get_gpiod(agpio->resource_source.string_ptr, agpio->pin_table[pin_index]); + lookup->desc = desc; lookup->info.pin_config = agpio->pin_config; lookup->info.debounce = agpio->debounce_timeout; lookup->info.gpioint = gpioint; diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index ef49307611d21..c73b25bc92134 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -674,6 +674,8 @@ struct acpi_gpio_mapping { * get GpioIo type explicitly, this quirk may be used. */ #define ACPI_GPIO_QUIRK_ONLY_GPIOIO BIT(1) +/* Use given pin as an absolute GPIO number in the system */ +#define ACPI_GPIO_QUIRK_ABSOLUTE_NUMBER BIT(2) unsigned int quirks; }; -- GitLab From 809390219fb9c2421239afe5c9eb862d73978ba0 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 25 Feb 2021 18:33:19 +0200 Subject: [PATCH 317/839] gpiolib: acpi: Allow to find GpioInt() resource by name and index Currently only search by index is supported. However, in some cases we might need to pass the quirks to the acpi_dev_gpio_irq_get(). For this, split out acpi_dev_gpio_irq_get_by() and replace acpi_dev_gpio_irq_get() by calling above with NULL for name parameter. Fixes: ba8c90c61847 ("gpio: pca953x: Override IRQ for one of the expanders on Galileo Gen 2") Depends-on: 0ea683931adb ("gpio: dwapb: Convert driver to using the GPIO-lib-based IRQ-chip") Signed-off-by: Andy Shevchenko Acked-by: Mika Westerberg Acked-by: Linus Walleij --- drivers/gpio/gpiolib-acpi.c | 12 ++++++++---- include/linux/acpi.h | 10 ++++++++-- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index 0fa0127d50ec4..1aacd2a5a1fd5 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -945,8 +945,9 @@ struct gpio_desc *acpi_node_get_gpiod(struct fwnode_handle *fwnode, } /** - * acpi_dev_gpio_irq_get() - Find GpioInt and translate it to Linux IRQ number + * acpi_dev_gpio_irq_get_by() - Find GpioInt and translate it to Linux IRQ number * @adev: pointer to a ACPI device to get IRQ from + * @name: optional name of GpioInt resource * @index: index of GpioInt resource (starting from %0) * * If the device has one or more GpioInt resources, this function can be @@ -956,9 +957,12 @@ struct gpio_desc *acpi_node_get_gpiod(struct fwnode_handle *fwnode, * The function is idempotent, though each time it runs it will configure GPIO * pin direction according to the flags in GpioInt resource. * + * The function takes optional @name parameter. If the resource has a property + * name, then only those will be taken into account. + * * Return: Linux IRQ number (> %0) on success, negative errno on failure. */ -int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index) +int acpi_dev_gpio_irq_get_by(struct acpi_device *adev, const char *name, int index) { int idx, i; unsigned int irq_flags; @@ -968,7 +972,7 @@ int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index) struct acpi_gpio_info info; struct gpio_desc *desc; - desc = acpi_get_gpiod_by_index(adev, NULL, i, &info); + desc = acpi_get_gpiod_by_index(adev, name, i, &info); /* Ignore -EPROBE_DEFER, it only matters if idx matches */ if (IS_ERR(desc) && PTR_ERR(desc) != -EPROBE_DEFER) @@ -1013,7 +1017,7 @@ int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index) } return -ENOENT; } -EXPORT_SYMBOL_GPL(acpi_dev_gpio_irq_get); +EXPORT_SYMBOL_GPL(acpi_dev_gpio_irq_get_by); static acpi_status acpi_gpio_adr_space_handler(u32 function, acpi_physical_address address, diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 9f432411e9883..fcdaab7239167 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1079,19 +1079,25 @@ void __acpi_handle_debug(struct _ddebug *descriptor, acpi_handle handle, const c #if defined(CONFIG_ACPI) && defined(CONFIG_GPIOLIB) bool acpi_gpio_get_irq_resource(struct acpi_resource *ares, struct acpi_resource_gpio **agpio); -int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index); +int acpi_dev_gpio_irq_get_by(struct acpi_device *adev, const char *name, int index); #else static inline bool acpi_gpio_get_irq_resource(struct acpi_resource *ares, struct acpi_resource_gpio **agpio) { return false; } -static inline int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index) +static inline int acpi_dev_gpio_irq_get_by(struct acpi_device *adev, + const char *name, int index) { return -ENXIO; } #endif +static inline int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index) +{ + return acpi_dev_gpio_irq_get_by(adev, NULL, index); +} + /* Device properties */ #ifdef CONFIG_ACPI -- GitLab From eb441337c7147514ab45036cadf09c3a71e4ce31 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 25 Feb 2021 18:33:20 +0200 Subject: [PATCH 318/839] gpio: pca953x: Set IRQ type when handle Intel Galileo Gen 2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The commit 0ea683931adb ("gpio: dwapb: Convert driver to using the GPIO-lib-based IRQ-chip") indeliberately made a regression on how IRQ line from GPIO I²C expander is handled. I.e. it reveals that the quirk for Intel Galileo Gen 2 misses the part of setting IRQ type which previously was predefined by gpio-dwapb driver. Now, we have to reorganize the approach to call necessary parts, which can be done via ACPI_GPIO_QUIRK_ABSOLUTE_NUMBER quirk. Without this fix and with above mentioned change the kernel hangs on the first IRQ event with: gpio gpiochip3: Persistence not supported for GPIO 1 irq 32, desc: 62f8fb50, depth: 0, count: 0, unhandled: 0 ->handle_irq(): 41c7b0ab, handle_bad_irq+0x0/0x40 ->irq_data.chip(): e03f1e72, 0xc2539218 ->action(): 0ecc7e6f ->action->handler(): 8a3db21e, irq_default_primary_handler+0x0/0x10 IRQ_NOPROBE set unexpected IRQ trap at vector 20 Fixes: ba8c90c61847 ("gpio: pca953x: Override IRQ for one of the expanders on Galileo Gen 2") Depends-on: 0ea683931adb ("gpio: dwapb: Convert driver to using the GPIO-lib-based IRQ-chip") Signed-off-by: Andy Shevchenko Acked-by: Mika Westerberg Reviewed-by: Linus Walleij --- drivers/gpio/gpio-pca953x.c | 78 +++++++++++-------------------------- 1 file changed, 23 insertions(+), 55 deletions(-) diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index 5ea09fd015444..c91d056515966 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -113,8 +113,29 @@ MODULE_DEVICE_TABLE(i2c, pca953x_id); #ifdef CONFIG_GPIO_PCA953X_IRQ #include -#include -#include + +static const struct acpi_gpio_params pca953x_irq_gpios = { 0, 0, true }; + +static const struct acpi_gpio_mapping pca953x_acpi_irq_gpios[] = { + { "irq-gpios", &pca953x_irq_gpios, 1, ACPI_GPIO_QUIRK_ABSOLUTE_NUMBER }, + { } +}; + +static int pca953x_acpi_get_irq(struct device *dev) +{ + int ret; + + ret = devm_acpi_dev_add_driver_gpios(dev, pca953x_acpi_irq_gpios); + if (ret) + dev_warn(dev, "can't add GPIO ACPI mapping\n"); + + ret = acpi_dev_gpio_irq_get_by(ACPI_COMPANION(dev), "irq-gpios", 0); + if (ret < 0) + return ret; + + dev_info(dev, "ACPI interrupt quirk (IRQ %d)\n", ret); + return ret; +} static const struct dmi_system_id pca953x_dmi_acpi_irq_info[] = { { @@ -133,59 +154,6 @@ static const struct dmi_system_id pca953x_dmi_acpi_irq_info[] = { }, {} }; - -#ifdef CONFIG_ACPI -static int pca953x_acpi_get_pin(struct acpi_resource *ares, void *data) -{ - struct acpi_resource_gpio *agpio; - int *pin = data; - - if (acpi_gpio_get_irq_resource(ares, &agpio)) - *pin = agpio->pin_table[0]; - return 1; -} - -static int pca953x_acpi_find_pin(struct device *dev) -{ - struct acpi_device *adev = ACPI_COMPANION(dev); - int pin = -ENOENT, ret; - LIST_HEAD(r); - - ret = acpi_dev_get_resources(adev, &r, pca953x_acpi_get_pin, &pin); - acpi_dev_free_resource_list(&r); - if (ret < 0) - return ret; - - return pin; -} -#else -static inline int pca953x_acpi_find_pin(struct device *dev) { return -ENXIO; } -#endif - -static int pca953x_acpi_get_irq(struct device *dev) -{ - int pin, ret; - - pin = pca953x_acpi_find_pin(dev); - if (pin < 0) - return pin; - - dev_info(dev, "Applying ACPI interrupt quirk (GPIO %d)\n", pin); - - if (!gpio_is_valid(pin)) - return -EINVAL; - - ret = gpio_request(pin, "pca953x interrupt"); - if (ret) - return ret; - - ret = gpio_to_irq(pin); - - /* When pin is used as an IRQ, no need to keep it requested */ - gpio_free(pin); - - return ret; -} #endif static const struct acpi_device_id pca953x_acpi_ids[] = { -- GitLab From b41ba2ec54a70908067034f139aa23d0dd2985ce Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 5 Mar 2021 14:02:40 +0200 Subject: [PATCH 319/839] gpiolib: Read "gpio-line-names" from a firmware node On STM32MP1, the GPIO banks are subnodes of pin-controller@50002000, see arch/arm/boot/dts/stm32mp151.dtsi. The driver for pin-controller@50002000 is in drivers/pinctrl/stm32/pinctrl-stm32.c and iterates over all of its DT subnodes when registering each GPIO bank gpiochip. Each gpiochip has: - gpio_chip.parent = dev, where dev is the device node of the pin controller - gpio_chip.of_node = np, which is the OF node of the GPIO bank Therefore, dev_fwnode(chip->parent) != of_fwnode_handle(chip.of_node), i.e. pin-controller@50002000 != pin-controller@50002000/gpio@5000*000. The original code behaved correctly, as it extracted the "gpio-line-names" from of_fwnode_handle(chip.of_node) = pin-controller@50002000/gpio@5000*000. To achieve the same behaviour, read property from the firmware node. Fixes: 7cba1a4d5e162 ("gpiolib: generalize devprop_gpiochip_set_names() for device properties") Reported-by: Marek Vasut Reported-by: Roman Guskov Signed-off-by: Andy Shevchenko Tested-by: Marek Vasut Reviewed-by: Marek Vasut Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 4253837f870b1..7ec0822c05055 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -367,22 +367,18 @@ static int gpiochip_set_desc_names(struct gpio_chip *gc) * * Looks for device property "gpio-line-names" and if it exists assigns * GPIO line names for the chip. The memory allocated for the assigned - * names belong to the underlying software node and should not be released + * names belong to the underlying firmware node and should not be released * by the caller. */ static int devprop_gpiochip_set_names(struct gpio_chip *chip) { struct gpio_device *gdev = chip->gpiodev; - struct device *dev = chip->parent; + struct fwnode_handle *fwnode = dev_fwnode(&gdev->dev); const char **names; int ret, i; int count; - /* GPIO chip may not have a parent device whose properties we inspect. */ - if (!dev) - return 0; - - count = device_property_string_array_count(dev, "gpio-line-names"); + count = fwnode_property_string_array_count(fwnode, "gpio-line-names"); if (count < 0) return 0; @@ -396,7 +392,7 @@ static int devprop_gpiochip_set_names(struct gpio_chip *chip) if (!names) return -ENOMEM; - ret = device_property_read_string_array(dev, "gpio-line-names", + ret = fwnode_property_read_string_array(fwnode, "gpio-line-names", names, count); if (ret < 0) { dev_warn(&gdev->dev, "failed to read GPIO line names\n"); -- GitLab From 781e14eaa7d168dc07d2a2eea5c55831a5bb46f3 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 10 Feb 2021 16:06:33 +0200 Subject: [PATCH 320/839] thunderbolt: Initialize HopID IDAs in tb_switch_alloc() If there is a failure before the tb_switch_add() is called the switch object is released by tb_switch_release() but at that point HopID IDAs have not yet been initialized. So we see splat like this: BUG: spinlock bad magic on CPU#2, kworker/u8:5/115 ... Workqueue: thunderbolt0 tb_handle_hotplug Call Trace: dump_stack+0x97/0xdc ? spin_bug+0x9a/0xa7 do_raw_spin_lock+0x68/0x98 _raw_spin_lock_irqsave+0x3f/0x5d ida_destroy+0x4f/0x127 tb_switch_release+0x6d/0xfd device_release+0x2c/0x7d kobject_put+0x9b/0xbc tb_handle_hotplug+0x278/0x452 process_one_work+0x1db/0x396 worker_thread+0x216/0x375 kthread+0x14d/0x155 ? pr_cont_work+0x58/0x58 ? kthread_blkcg+0x2e/0x2e ret_from_fork+0x1f/0x40 Fix this by always initializing HopID IDAs in tb_switch_alloc(). Fixes: 0b2863ac3cfd ("thunderbolt: Add functions for allocating and releasing HopIDs") Cc: stable@vger.kernel.org Reported-by: Chiranjeevi Rapolu Signed-off-by: Mika Westerberg --- drivers/thunderbolt/switch.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c index b63fecca6c2a1..2a95b4ce06c0b 100644 --- a/drivers/thunderbolt/switch.c +++ b/drivers/thunderbolt/switch.c @@ -768,12 +768,6 @@ static int tb_init_port(struct tb_port *port) tb_dump_port(port->sw->tb, &port->config); - /* Control port does not need HopID allocation */ - if (port->port) { - ida_init(&port->in_hopids); - ida_init(&port->out_hopids); - } - INIT_LIST_HEAD(&port->list); return 0; @@ -1842,10 +1836,8 @@ static void tb_switch_release(struct device *dev) dma_port_free(sw->dma_port); tb_switch_for_each_port(sw, port) { - if (!port->disabled) { - ida_destroy(&port->in_hopids); - ida_destroy(&port->out_hopids); - } + ida_destroy(&port->in_hopids); + ida_destroy(&port->out_hopids); } kfree(sw->uuid); @@ -2025,6 +2017,12 @@ struct tb_switch *tb_switch_alloc(struct tb *tb, struct device *parent, /* minimum setup for tb_find_cap and tb_drom_read to work */ sw->ports[i].sw = sw; sw->ports[i].port = i; + + /* Control port does not need HopID allocation */ + if (i) { + ida_init(&sw->ports[i].in_hopids); + ida_init(&sw->ports[i].out_hopids); + } } ret = tb_switch_find_vse_cap(sw, TB_VSE_CAP_PLUG_EVENTS); -- GitLab From c94732bda079ee66b5c3904cbb628d0cb218ab39 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Thu, 10 Dec 2020 14:57:10 +0200 Subject: [PATCH 321/839] thunderbolt: Increase runtime PM reference count on DP tunnel discovery If the driver is unbound and then bound back it goes over the topology and figure out the existing tunnels. However, if it finds DP tunnel it should make sure the domain does not runtime suspend as otherwise it will tear down the DP tunnel unexpectedly. Fixes: 6ac6faee5d7d ("thunderbolt: Add runtime PM for Software CM") Cc: stable@vger.kernel.org Signed-off-by: Mika Westerberg --- drivers/thunderbolt/tb.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/thunderbolt/tb.c b/drivers/thunderbolt/tb.c index 1f000ac1728b9..c348b1fc0efc3 100644 --- a/drivers/thunderbolt/tb.c +++ b/drivers/thunderbolt/tb.c @@ -138,6 +138,10 @@ static void tb_discover_tunnels(struct tb_switch *sw) parent->boot = true; parent = tb_switch_parent(parent); } + } else if (tb_tunnel_is_dp(tunnel)) { + /* Keep the domain from powering down */ + pm_runtime_get_sync(&tunnel->src_port->sw->dev); + pm_runtime_get_sync(&tunnel->dst_port->sw->dev); } list_add_tail(&tunnel->list, &tcm->tunnel_list); -- GitLab From df304c2d0dfd63c40561a8107a217e84fc3515e8 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 3 Mar 2021 13:49:26 +0000 Subject: [PATCH 322/839] arm64: cpufeatures: Fix handling of CONFIG_CMDLINE for idreg overrides The built-in kernel commandline (CONFIG_CMDLINE) can be configured in three different ways: 1. CMDLINE_FORCE: Use CONFIG_CMDLINE instead of any bootloader args 2. CMDLINE_EXTEND: Append the bootloader args to CONFIG_CMDLINE 3. CMDLINE_FROM_BOOTLOADER: Only use CONFIG_CMDLINE if there aren't any bootloader args. The early cmdline parsing to detect idreg overrides gets (2) and (3) slightly wrong: in the case of (2) the bootloader args are parsed first and in the case of (3) the CMDLINE is always parsed. Fix these issues by moving the bootargs parsing out into a helper function and following the same logic as that used by the EFI stub. Reviewed-by: Marc Zyngier Fixes: 33200303553d ("arm64: cpufeature: Add an early command-line cpufeature override facility") Link: https://lore.kernel.org/r/20210303134927.18975-2-will@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/idreg-override.c | 44 +++++++++++++++++------------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c index dffb16682330e..cc071712c6f9e 100644 --- a/arch/arm64/kernel/idreg-override.c +++ b/arch/arm64/kernel/idreg-override.c @@ -163,33 +163,39 @@ static __init void __parse_cmdline(const char *cmdline, bool parse_aliases) } while (1); } -static __init void parse_cmdline(void) +static __init const u8 *get_bootargs_cmdline(void) { - if (!IS_ENABLED(CONFIG_CMDLINE_FORCE)) { - const u8 *prop; - void *fdt; - int node; + const u8 *prop; + void *fdt; + int node; - fdt = get_early_fdt_ptr(); - if (!fdt) - goto out; + fdt = get_early_fdt_ptr(); + if (!fdt) + return NULL; - node = fdt_path_offset(fdt, "/chosen"); - if (node < 0) - goto out; + node = fdt_path_offset(fdt, "/chosen"); + if (node < 0) + return NULL; - prop = fdt_getprop(fdt, node, "bootargs", NULL); - if (!prop) - goto out; + prop = fdt_getprop(fdt, node, "bootargs", NULL); + if (!prop) + return NULL; - __parse_cmdline(prop, true); + return strlen(prop) ? prop : NULL; +} - if (!IS_ENABLED(CONFIG_CMDLINE_EXTEND)) - return; +static __init void parse_cmdline(void) +{ + const u8 *prop = get_bootargs_cmdline(); + + if (IS_ENABLED(CONFIG_CMDLINE_EXTEND) || + IS_ENABLED(CONFIG_CMDLINE_FORCE) || + !prop) { + __parse_cmdline(CONFIG_CMDLINE, true); } -out: - __parse_cmdline(CONFIG_CMDLINE, true); + if (!IS_ENABLED(CONFIG_CMDLINE_FORCE) && prop) + __parse_cmdline(prop, true); } /* Keep checkers quiet */ -- GitLab From cae118b6acc309539b33339e846cbb19187c164c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 3 Mar 2021 13:49:27 +0000 Subject: [PATCH 323/839] arm64: Drop support for CMDLINE_EXTEND The documented behaviour for CMDLINE_EXTEND is that the arguments from the bootloader are appended to the built-in kernel command line. This also matches the option parsing behaviour for the EFI stub and early ID register overrides. Bizarrely, the fdt behaviour is the other way around: appending the built-in command line to the bootloader arguments, resulting in a command-line that doesn't necessarily line-up with the parsing order and definitely doesn't line-up with the documented behaviour. As it turns out, there is a proposal [1] to replace CMDLINE_EXTEND with CMDLINE_PREPEND and CMDLINE_APPEND options which should hopefully make the intended behaviour much clearer. While we wait for those to land, drop CMDLINE_EXTEND for now as there appears to be little enthusiasm for changing the current FDT behaviour. [1] https://lore.kernel.org/lkml/20190319232448.45964-2-danielwa@cisco.com/ Cc: Max Uvarov Cc: Rob Herring Cc: Ard Biesheuvel Cc: Marc Zyngier Cc: Doug Anderson Cc: Tyler Hicks Cc: Frank Rowand Cc: Catalin Marinas Link: https://lore.kernel.org/r/CAL_JsqJX=TCCs7=gg486r9TN4NYscMTCLNfqJF9crskKPq-bTg@mail.gmail.com Link: https://lore.kernel.org/r/20210303134927.18975-3-will@kernel.org Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 6 ------ arch/arm64/kernel/idreg-override.c | 5 +---- 2 files changed, 1 insertion(+), 10 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 1f212b47a48a1..f15418332d169 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1855,12 +1855,6 @@ config CMDLINE_FROM_BOOTLOADER the boot loader doesn't provide any, the default kernel command string provided in CMDLINE will be used. -config CMDLINE_EXTEND - bool "Extend bootloader kernel arguments" - help - The command-line arguments provided by the boot loader will be - appended to the default kernel command string. - config CMDLINE_FORCE bool "Always use the default kernel command string" help diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c index cc071712c6f9e..83f1c4b92095e 100644 --- a/arch/arm64/kernel/idreg-override.c +++ b/arch/arm64/kernel/idreg-override.c @@ -188,11 +188,8 @@ static __init void parse_cmdline(void) { const u8 *prop = get_bootargs_cmdline(); - if (IS_ENABLED(CONFIG_CMDLINE_EXTEND) || - IS_ENABLED(CONFIG_CMDLINE_FORCE) || - !prop) { + if (IS_ENABLED(CONFIG_CMDLINE_FORCE) || !prop) __parse_cmdline(CONFIG_CMDLINE, true); - } if (!IS_ENABLED(CONFIG_CMDLINE_FORCE) && prop) __parse_cmdline(prop, true); -- GitLab From 07fb6dc327f108937881a096ec6e367a07a7395d Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Mon, 1 Mar 2021 10:36:32 +0530 Subject: [PATCH 324/839] arm64/mm: Drop redundant ARCH_WANT_HUGE_PMD_SHARE There is already an ARCH_WANT_HUGE_PMD_SHARE which is being selected for applicable configurations. Hence just drop the other redundant entry. Cc: Catalin Marinas Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Anshuman Khandual Link: https://lore.kernel.org/r/1614575192-21307-1-git-send-email-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index f15418332d169..6f36732dc11a5 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1055,8 +1055,6 @@ config HW_PERF_EVENTS config SYS_SUPPORTS_HUGETLBFS def_bool y -config ARCH_WANT_HUGE_PMD_SHARE - config ARCH_HAS_CACHE_LINE_SIZE def_bool y -- GitLab From 79cc2ed5a716544621b11a3f90550e5c7d314306 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Mon, 1 Mar 2021 16:55:14 +0530 Subject: [PATCH 325/839] arm64/mm: Drop THP conditionality from FORCE_MAX_ZONEORDER Currently without THP being enabled, MAX_ORDER via FORCE_MAX_ZONEORDER gets reduced to 11, which falls below HUGETLB_PAGE_ORDER for certain 16K and 64K page size configurations. This is problematic which throws up the following warning during boot as pageblock_order via HUGETLB_PAGE_ORDER order exceeds MAX_ORDER. WARNING: CPU: 7 PID: 127 at mm/vmstat.c:1092 __fragmentation_index+0x58/0x70 Modules linked in: CPU: 7 PID: 127 Comm: kswapd0 Not tainted 5.12.0-rc1-00005-g0221e3101a1 #237 Hardware name: linux,dummy-virt (DT) pstate: 20400005 (nzCv daif +PAN -UAO -TCO BTYPE=--) pc : __fragmentation_index+0x58/0x70 lr : fragmentation_index+0x88/0xa8 sp : ffff800016ccfc00 x29: ffff800016ccfc00 x28: 0000000000000000 x27: ffff800011fd4000 x26: 0000000000000002 x25: ffff800016ccfda0 x24: 0000000000000002 x23: 0000000000000640 x22: ffff0005ffcb5b18 x21: 0000000000000002 x20: 000000000000000d x19: ffff0005ffcb3980 x18: 0000000000000004 x17: 0000000000000001 x16: 0000000000000019 x15: ffff800011ca7fb8 x14: 00000000000002b3 x13: 0000000000000000 x12: 00000000000005e0 x11: 0000000000000003 x10: 0000000000000080 x9 : ffff800011c93948 x8 : 0000000000000000 x7 : 0000000000000000 x6 : 0000000000007000 x5 : 0000000000007944 x4 : 0000000000000032 x3 : 000000000000001c x2 : 000000000000000b x1 : ffff800016ccfc10 x0 : 000000000000000d Call trace: __fragmentation_index+0x58/0x70 compaction_suitable+0x58/0x78 wakeup_kcompactd+0x8c/0xd8 balance_pgdat+0x570/0x5d0 kswapd+0x1e0/0x388 kthread+0x154/0x158 ret_from_fork+0x10/0x30 This solves the problem via keeping FORCE_MAX_ZONEORDER unchanged with or without THP on 16K and 64K page size configurations, making sure that the HUGETLB_PAGE_ORDER (and pageblock_order) would never exceed MAX_ORDER. Cc: Catalin Marinas Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Anshuman Khandual Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/1614597914-28565-1-git-send-email-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 6f36732dc11a5..5656e7aacd698 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1155,8 +1155,8 @@ config XEN config FORCE_MAX_ZONEORDER int - default "14" if (ARM64_64K_PAGES && TRANSPARENT_HUGEPAGE) - default "12" if (ARM64_16K_PAGES && TRANSPARENT_HUGEPAGE) + default "14" if ARM64_64K_PAGES + default "12" if ARM64_16K_PAGES default "11" help The kernel memory allocator divides physically contiguous memory -- GitLab From 78a81d88f60ba773cbe890205e1ee67f00502948 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 3 Mar 2021 15:17:12 +0100 Subject: [PATCH 326/839] x86/sev-es: Introduce ip_within_syscall_gap() helper Introduce a helper to check whether an exception came from the syscall gap and use it in the SEV-ES code. Extend the check to also cover the compatibility SYSCALL entry path. Fixes: 315562c9af3d5 ("x86/sev-es: Adjust #VC IST Stack on entering NMI handler") Signed-off-by: Joerg Roedel Signed-off-by: Borislav Petkov Cc: stable@vger.kernel.org # 5.10+ Link: https://lkml.kernel.org/r/20210303141716.29223-2-joro@8bytes.org --- arch/x86/entry/entry_64_compat.S | 2 ++ arch/x86/include/asm/proto.h | 1 + arch/x86/include/asm/ptrace.h | 15 +++++++++++++++ arch/x86/kernel/traps.c | 3 +-- 4 files changed, 19 insertions(+), 2 deletions(-) diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 541fdaf640453..0051cf5c792d1 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -210,6 +210,8 @@ SYM_CODE_START(entry_SYSCALL_compat) /* Switch to the kernel stack */ movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp +SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL) + /* Construct struct pt_regs on stack */ pushq $__USER32_DS /* pt_regs->ss */ pushq %r8 /* pt_regs->sp */ diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h index 2c35f1c01a2df..b6a9d51d1d791 100644 --- a/arch/x86/include/asm/proto.h +++ b/arch/x86/include/asm/proto.h @@ -25,6 +25,7 @@ void __end_SYSENTER_singlestep_region(void); void entry_SYSENTER_compat(void); void __end_entry_SYSENTER_compat(void); void entry_SYSCALL_compat(void); +void entry_SYSCALL_compat_safe_stack(void); void entry_INT80_compat(void); #ifdef CONFIG_XEN_PV void xen_entry_INT80_compat(void); diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index d8324a2366961..409f661481e11 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -94,6 +94,8 @@ struct pt_regs { #include #endif +#include + struct cpuinfo_x86; struct task_struct; @@ -175,6 +177,19 @@ static inline bool any_64bit_mode(struct pt_regs *regs) #ifdef CONFIG_X86_64 #define current_user_stack_pointer() current_pt_regs()->sp #define compat_user_stack_pointer() current_pt_regs()->sp + +static inline bool ip_within_syscall_gap(struct pt_regs *regs) +{ + bool ret = (regs->ip >= (unsigned long)entry_SYSCALL_64 && + regs->ip < (unsigned long)entry_SYSCALL_64_safe_stack); + +#ifdef CONFIG_IA32_EMULATION + ret = ret || (regs->ip >= (unsigned long)entry_SYSCALL_compat && + regs->ip < (unsigned long)entry_SYSCALL_compat_safe_stack); +#endif + + return ret; +} #endif static inline unsigned long kernel_stack_pointer(struct pt_regs *regs) diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 7f5aec758f0ee..ac1874a2a70e8 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -694,8 +694,7 @@ asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *r * In the SYSCALL entry path the RSP value comes from user-space - don't * trust it and switch to the current kernel stack */ - if (regs->ip >= (unsigned long)entry_SYSCALL_64 && - regs->ip < (unsigned long)entry_SYSCALL_64_safe_stack) { + if (ip_within_syscall_gap(regs)) { sp = this_cpu_read(cpu_current_top_of_stack); goto sync; } -- GitLab From ee2e3f50629f17b0752b55b2566c15ce8dafb557 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Sat, 6 Mar 2021 11:10:10 +0100 Subject: [PATCH 327/839] mount: fix mounting of detached mounts onto targets that reside on shared mounts Creating a series of detached mounts, attaching them to the filesystem, and unmounting them can be used to trigger an integer overflow in ns->mounts causing the kernel to block any new mounts in count_mounts() and returning ENOSPC because it falsely assumes that the maximum number of mounts in the mount namespace has been reached, i.e. it thinks it can't fit the new mounts into the mount namespace anymore. Depending on the number of mounts in your system, this can be reproduced on any kernel that supportes open_tree() and move_mount() by compiling and running the following program: /* SPDX-License-Identifier: LGPL-2.1+ */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include /* open_tree() */ #ifndef OPEN_TREE_CLONE #define OPEN_TREE_CLONE 1 #endif #ifndef OPEN_TREE_CLOEXEC #define OPEN_TREE_CLOEXEC O_CLOEXEC #endif #ifndef __NR_open_tree #if defined __alpha__ #define __NR_open_tree 538 #elif defined _MIPS_SIM #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ #define __NR_open_tree 4428 #endif #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ #define __NR_open_tree 6428 #endif #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ #define __NR_open_tree 5428 #endif #elif defined __ia64__ #define __NR_open_tree (428 + 1024) #else #define __NR_open_tree 428 #endif #endif /* move_mount() */ #ifndef MOVE_MOUNT_F_EMPTY_PATH #define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */ #endif #ifndef __NR_move_mount #if defined __alpha__ #define __NR_move_mount 539 #elif defined _MIPS_SIM #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ #define __NR_move_mount 4429 #endif #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ #define __NR_move_mount 6429 #endif #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ #define __NR_move_mount 5429 #endif #elif defined __ia64__ #define __NR_move_mount (428 + 1024) #else #define __NR_move_mount 429 #endif #endif static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags) { return syscall(__NR_open_tree, dfd, filename, flags); } static inline int sys_move_mount(int from_dfd, const char *from_pathname, int to_dfd, const char *to_pathname, unsigned int flags) { return syscall(__NR_move_mount, from_dfd, from_pathname, to_dfd, to_pathname, flags); } static bool is_shared_mountpoint(const char *path) { bool shared = false; FILE *f = NULL; char *line = NULL; int i; size_t len = 0; f = fopen("/proc/self/mountinfo", "re"); if (!f) return 0; while (getline(&line, &len, f) > 0) { char *slider1, *slider2; for (slider1 = line, i = 0; slider1 && i < 4; i++) slider1 = strchr(slider1 + 1, ' '); if (!slider1) continue; slider2 = strchr(slider1 + 1, ' '); if (!slider2) continue; *slider2 = '\0'; if (strcmp(slider1 + 1, path) == 0) { /* This is the path. Is it shared? */ slider1 = strchr(slider2 + 1, ' '); if (slider1 && strstr(slider1, "shared:")) { shared = true; break; } } } fclose(f); free(line); return shared; } static void usage(void) { const char *text = "mount-new [--recursive] \n"; fprintf(stderr, "%s", text); _exit(EXIT_SUCCESS); } #define exit_usage(format, ...) \ ({ \ fprintf(stderr, format "\n", ##__VA_ARGS__); \ usage(); \ }) #define exit_log(format, ...) \ ({ \ fprintf(stderr, format "\n", ##__VA_ARGS__); \ exit(EXIT_FAILURE); \ }) static const struct option longopts[] = { {"help", no_argument, 0, 'a'}, { NULL, no_argument, 0, 0 }, }; int main(int argc, char *argv[]) { int exit_code = EXIT_SUCCESS, index = 0; int dfd, fd_tree, new_argc, ret; char *base_dir; char *const *new_argv; char target[PATH_MAX]; while ((ret = getopt_long_only(argc, argv, "", longopts, &index)) != -1) { switch (ret) { case 'a': /* fallthrough */ default: usage(); } } new_argv = &argv[optind]; new_argc = argc - optind; if (new_argc < 1) exit_usage("Missing base directory\n"); base_dir = new_argv[0]; if (*base_dir != '/') exit_log("Please specify an absolute path"); /* Ensure that target is a shared mountpoint. */ if (!is_shared_mountpoint(base_dir)) exit_log("Please ensure that \"%s\" is a shared mountpoint", base_dir); dfd = open(base_dir, O_RDONLY | O_DIRECTORY | O_CLOEXEC); if (dfd < 0) exit_log("%m - Failed to open base directory \"%s\"", base_dir); ret = mkdirat(dfd, "detached-move-mount", 0755); if (ret < 0) exit_log("%m - Failed to create required temporary directories"); ret = snprintf(target, sizeof(target), "%s/detached-move-mount", base_dir); if (ret < 0 || (size_t)ret >= sizeof(target)) exit_log("%m - Failed to assemble target path"); /* * Having a mount table with 10000 mounts is already quite excessive * and shoult account even for weird test systems. */ for (size_t i = 0; i < 10000; i++) { fd_tree = sys_open_tree(dfd, "detached-move-mount", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC | AT_EMPTY_PATH); if (fd_tree < 0) { fprintf(stderr, "%m - Failed to open %d(detached-move-mount)", dfd); exit_code = EXIT_FAILURE; break; } ret = sys_move_mount(fd_tree, "", dfd, "detached-move-mount", MOVE_MOUNT_F_EMPTY_PATH); if (ret < 0) { if (errno == ENOSPC) fprintf(stderr, "%m - Buggy mount counting"); else fprintf(stderr, "%m - Failed to attach mount to %d(detached-move-mount)", dfd); exit_code = EXIT_FAILURE; break; } close(fd_tree); ret = umount2(target, MNT_DETACH); if (ret < 0) { fprintf(stderr, "%m - Failed to unmount %s", target); exit_code = EXIT_FAILURE; break; } } (void)unlinkat(dfd, "detached-move-mount", AT_REMOVEDIR); close(dfd); exit(exit_code); } and wait for the kernel to refuse any new mounts by returning ENOSPC. How many iterations are needed depends on the number of mounts in your system. Assuming you have something like 50 mounts on a standard system it should be almost instantaneous. The root cause of this is that detached mounts aren't handled correctly when source and target mount are identical and reside on a shared mount causing a broken mount tree where the detached source itself is propagated which propagation prevents for regular bind-mounts and new mounts. This ultimately leads to a miscalculation of the number of mounts in the mount namespace. Detached mounts created via open_tree(fd, path, OPEN_TREE_CLONE) are essentially like an unattached new mount, or an unattached bind-mount. They can then later on be attached to the filesystem via move_mount() which calls into attach_recursive_mount(). Part of attaching it to the filesystem is making sure that mounts get correctly propagated in case the destination mountpoint is MS_SHARED, i.e. is a shared mountpoint. This is done by calling into propagate_mnt() which walks the list of peers calling propagate_one() on each mount in this list making sure it receives the propagation event. The propagate_one() functions thereby skips both new mounts and bind mounts to not propagate them "into themselves". Both are identified by checking whether the mount is already attached to any mount namespace in mnt->mnt_ns. The is what the IS_MNT_NEW() helper is responsible for. However, detached mounts have an anonymous mount namespace attached to them stashed in mnt->mnt_ns which means that IS_MNT_NEW() doesn't realize they need to be skipped causing the mount to propagate "into itself" breaking the mount table and causing a disconnect between the number of mounts recorded as being beneath or reachable from the target mountpoint and the number of mounts actually recorded/counted in ns->mounts ultimately causing an overflow which in turn prevents any new mounts via the ENOSPC issue. So teach propagation to handle detached mounts by making it aware of them. I've been tracking this issue down for the last couple of days and then verifying that the fix is correct by unmounting everything in my current mount table leaving only /proc and /sys mounted and running the reproducer above overnight verifying the number of mounts counted in ns->mounts. With this fix the counts are correct and the ENOSPC issue can't be reproduced. This change will only have an effect on mounts created with the new mount API since detached mounts cannot be created with the old mount API so regressions are extremely unlikely. Link: https://lore.kernel.org/r/20210306101010.243666-1-christian.brauner@ubuntu.com Fixes: 2db154b3ea8e ("vfs: syscall: Add move_mount(2) to move mounts around") Cc: David Howells Cc: Al Viro Cc: linux-fsdevel@vger.kernel.org Cc: Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner --- fs/pnode.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/pnode.h b/fs/pnode.h index 26f74e092bd98..988f1aa9b02ae 100644 --- a/fs/pnode.h +++ b/fs/pnode.h @@ -12,7 +12,7 @@ #define IS_MNT_SHARED(m) ((m)->mnt.mnt_flags & MNT_SHARED) #define IS_MNT_SLAVE(m) ((m)->mnt_master) -#define IS_MNT_NEW(m) (!(m)->mnt_ns) +#define IS_MNT_NEW(m) (!(m)->mnt_ns || is_anon_ns((m)->mnt_ns)) #define CLEAR_MNT_SHARED(m) ((m)->mnt.mnt_flags &= ~MNT_SHARED) #define IS_MNT_UNBINDABLE(m) ((m)->mnt.mnt_flags & MNT_UNBINDABLE) #define IS_MNT_MARKED(m) ((m)->mnt.mnt_flags & MNT_MARKED) -- GitLab From 28e96c1693ec1cdc963807611f8b5ad400431e82 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 8 Mar 2021 17:07:26 +0100 Subject: [PATCH 328/839] ALSA: hda: Drop the BATCH workaround for AMD controllers The commit c02f77d32d2c ("ALSA: hda - Workaround for crackled sound on AMD controller (1022:1457)") introduced a few workarounds for the recent AMD HD-audio controller, and one of them is the forced BATCH PCM mode so that PulseAudio avoids the timer-based scheduling. This was thought to cover for some badly working applications, but this actually worsens for more others. In total, this wasn't a good idea to enforce it. This is a partial revert of the commit above for dropping the PCM BATCH enforcement part to recover from the regression again. Fixes: c02f77d32d2c ("ALSA: hda - Workaround for crackled sound on AMD controller (1022:1457)") BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=195303 Cc: Link: https://lore.kernel.org/r/20210308160726.22930-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_controller.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/sound/pci/hda/hda_controller.c b/sound/pci/hda/hda_controller.c index 9087981cd1f70..ca2f2ecd14888 100644 --- a/sound/pci/hda/hda_controller.c +++ b/sound/pci/hda/hda_controller.c @@ -609,13 +609,6 @@ static int azx_pcm_open(struct snd_pcm_substream *substream) 20, 178000000); - /* by some reason, the playback stream stalls on PulseAudio with - * tsched=1 when a capture stream triggers. Until we figure out the - * real cause, disable tsched mode by telling the PCM info flag. - */ - if (chip->driver_caps & AZX_DCAPS_AMD_WORKAROUND) - runtime->hw.info |= SNDRV_PCM_INFO_BATCH; - if (chip->align_buffer_size) /* constrain buffer sizes to be multiple of 128 bytes. This is more efficient in terms of memory -- GitLab From a0590473c5e6c4ef17c3132ad08fbad170f72d55 Mon Sep 17 00:00:00 2001 From: Timo Rothenpieler Date: Tue, 23 Feb 2021 15:19:01 +0100 Subject: [PATCH 329/839] nfs: fix PNFS_FLEXFILE_LAYOUT Kconfig default This follows what was done in 8c2fabc6542d9d0f8b16bd1045c2eda59bdcde13. With the default being m, it's impossible to build the module into the kernel. Signed-off-by: Timo Rothenpieler Signed-off-by: Anna Schumaker --- fs/nfs/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index e2a488d403a61..14a72224b6571 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -127,7 +127,7 @@ config PNFS_BLOCK config PNFS_FLEXFILE_LAYOUT tristate depends on NFS_V4_1 && NFS_V3 - default m + default NFS_V4 config NFS_V4_1_IMPLEMENTATION_ID_DOMAIN string "NFSv4.1 Implementation ID Domain" -- GitLab From 6654111c893fec1516d83046d2b237e83e0d5967 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Sun, 7 Mar 2021 19:23:01 +0100 Subject: [PATCH 330/839] MIPS: vmlinux.lds.S: align raw appended dtb to 8 bytes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The devicetree specification requires 8-byte alignment in memory. This is now enforced by libfdt since commit 79edff12060f ("scripts/dtc: Update to upstream version v1.6.0-51-g183df9e9c2b9") which included the upstream commit 5e735860c478 ("libfdt: Check for 8-byte address alignment in fdt_ro_probe_()"). This broke the MIPS raw appended DTBs which would be appended to the image immediately following the initramfs section. This ends with a 32bit size, resulting in a 4-byte alignment of the DTB. Fix by padding with zeroes to 8-bytes when MIPS_RAW_APPENDED_DTB is defined. Fixes: 79edff12060f ("scripts/dtc: Update to upstream version v1.6.0-51-g183df9e9c2b9") Cc: Rob Herring Cc: Frank Rowand Signed-off-by: Bjørn Mork Signed-off-by: Thomas Bogendoerfer --- arch/mips/kernel/vmlinux.lds.S | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S index 4b4e39b7c79bc..1234834cc4c44 100644 --- a/arch/mips/kernel/vmlinux.lds.S +++ b/arch/mips/kernel/vmlinux.lds.S @@ -173,7 +173,11 @@ SECTIONS #endif #ifdef CONFIG_MIPS_RAW_APPENDED_DTB - STRUCT_ALIGN(); + .fill : { + FILL(0); + BYTE(0); + . = ALIGN(8); + } __appended_dtb = .; /* leave space for appended DTB */ . += 0x100000; -- GitLab From eeb0753ba27b26f609e61f9950b14f1b934fe429 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Fri, 5 Mar 2021 10:54:57 +0530 Subject: [PATCH 331/839] arm64/mm: Fix pfn_valid() for ZONE_DEVICE based memory pfn_valid() validates a pfn but basically it checks for a valid struct page backing for that pfn. It should always return positive for memory ranges backed with struct page mapping. But currently pfn_valid() fails for all ZONE_DEVICE based memory types even though they have struct page mapping. pfn_valid() asserts that there is a memblock entry for a given pfn without MEMBLOCK_NOMAP flag being set. The problem with ZONE_DEVICE based memory is that they do not have memblock entries. Hence memblock_is_map_memory() will invariably fail via memblock_search() for a ZONE_DEVICE based address. This eventually fails pfn_valid() which is wrong. memblock_is_map_memory() needs to be skipped for such memory ranges. As ZONE_DEVICE memory gets hotplugged into the system via memremap_pages() called from a driver, their respective memory sections will not have SECTION_IS_EARLY set. Normal hotplug memory will never have MEMBLOCK_NOMAP set in their memblock regions. Because the flag MEMBLOCK_NOMAP was specifically designed and set for firmware reserved memory regions. memblock_is_map_memory() can just be skipped as its always going to be positive and that will be an optimization for the normal hotplug memory. Like ZONE_DEVICE based memory, all normal hotplugged memory too will not have SECTION_IS_EARLY set for their sections Skipping memblock_is_map_memory() for all non early memory sections would fix pfn_valid() problem for ZONE_DEVICE based memory and also improve its performance for normal hotplug memory as well. Cc: Catalin Marinas Cc: Will Deacon Cc: Ard Biesheuvel Cc: Robin Murphy Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Acked-by: David Hildenbrand Fixes: 73b20c84d42d ("arm64: mm: implement pte_devmap support") Signed-off-by: Anshuman Khandual Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/1614921898-4099-2-git-send-email-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- arch/arm64/mm/init.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 0ace5e68efba0..5920c527845a0 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -230,6 +230,18 @@ int pfn_valid(unsigned long pfn) if (!valid_section(__pfn_to_section(pfn))) return 0; + + /* + * ZONE_DEVICE memory does not have the memblock entries. + * memblock_is_map_memory() check for ZONE_DEVICE based + * addresses will always fail. Even the normal hotplugged + * memory will never have MEMBLOCK_NOMAP flag set in their + * memblock entries. Skip memblock search for all non early + * memory sections covering all of hotplug memory including + * both normal and ZONE_DEVICE based. + */ + if (!early_section(__pfn_to_section(pfn))) + return pfn_section_valid(__pfn_to_section(pfn), pfn); #endif return memblock_is_map_memory(addr); } -- GitLab From 093bbe211ea566fa828536275e09ee9d75df1f25 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Fri, 5 Mar 2021 10:54:58 +0530 Subject: [PATCH 332/839] arm64/mm: Reorganize pfn_valid() There are multiple instances of pfn_to_section_nr() and __pfn_to_section() when CONFIG_SPARSEMEM is enabled. This can be optimized if memory section is fetched earlier. This replaces the open coded PFN and ADDR conversion with PFN_PHYS() and PHYS_PFN() helpers. While there, also add a comment. This does not cause any functional change. Cc: Catalin Marinas Cc: Will Deacon Cc: Ard Biesheuvel Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Reviewed-by: David Hildenbrand Signed-off-by: Anshuman Khandual Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/1614921898-4099-3-git-send-email-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- arch/arm64/mm/init.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 5920c527845a0..3685e12aba9b6 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -219,16 +219,26 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) int pfn_valid(unsigned long pfn) { - phys_addr_t addr = pfn << PAGE_SHIFT; + phys_addr_t addr = PFN_PHYS(pfn); - if ((addr >> PAGE_SHIFT) != pfn) + /* + * Ensure the upper PAGE_SHIFT bits are clear in the + * pfn. Else it might lead to false positives when + * some of the upper bits are set, but the lower bits + * match a valid pfn. + */ + if (PHYS_PFN(addr) != pfn) return 0; #ifdef CONFIG_SPARSEMEM +{ + struct mem_section *ms; + if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) return 0; - if (!valid_section(__pfn_to_section(pfn))) + ms = __pfn_to_section(pfn); + if (!valid_section(ms)) return 0; /* @@ -240,8 +250,9 @@ int pfn_valid(unsigned long pfn) * memory sections covering all of hotplug memory including * both normal and ZONE_DEVICE based. */ - if (!early_section(__pfn_to_section(pfn))) - return pfn_section_valid(__pfn_to_section(pfn), pfn); + if (!early_section(ms)) + return pfn_section_valid(ms, pfn); +} #endif return memblock_is_map_memory(addr); } -- GitLab From ad3dbe35c833c2d4d0bbf3f04c785d32f931e7c9 Mon Sep 17 00:00:00 2001 From: Frank Sorenson Date: Mon, 8 Mar 2021 12:12:13 -0600 Subject: [PATCH 333/839] NFS: Correct size calculation for create reply length CREATE requests return a post_op_fh3, rather than nfs_fh3. The post_op_fh3 includes an extra word to indicate 'handle_follows'. Without that additional word, create fails when full 64-byte filehandles are in use. Add NFS3_post_op_fh_sz, and correct the size calculation for NFS3_createres_sz. Signed-off-by: Frank Sorenson Signed-off-by: Anna Schumaker --- fs/nfs/nfs3xdr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index ca10072644ff2..ed1c83738c30d 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -36,6 +36,7 @@ #define NFS3_pagepad_sz (1) /* Page padding */ #define NFS3_fhandle_sz (1+16) #define NFS3_fh_sz (NFS3_fhandle_sz) /* shorthand */ +#define NFS3_post_op_fh_sz (1+NFS3_fh_sz) #define NFS3_sattr_sz (15) #define NFS3_filename_sz (1+(NFS3_MAXNAMLEN>>2)) #define NFS3_path_sz (1+(NFS3_MAXPATHLEN>>2)) @@ -73,7 +74,7 @@ #define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz+1+NFS3_pagepad_sz) #define NFS3_readres_sz (1+NFS3_post_op_attr_sz+3+NFS3_pagepad_sz) #define NFS3_writeres_sz (1+NFS3_wcc_data_sz+4) -#define NFS3_createres_sz (1+NFS3_fh_sz+NFS3_post_op_attr_sz+NFS3_wcc_data_sz) +#define NFS3_createres_sz (1+NFS3_post_op_fh_sz+NFS3_post_op_attr_sz+NFS3_wcc_data_sz) #define NFS3_renameres_sz (1+(2 * NFS3_wcc_data_sz)) #define NFS3_linkres_sz (1+NFS3_post_op_attr_sz+NFS3_wcc_data_sz) #define NFS3_readdirres_sz (1+NFS3_post_op_attr_sz+2+NFS3_pagepad_sz) -- GitLab From bf9279cd63dcc144b2a3c4c76d8b6b4c30b05c22 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Fri, 5 Mar 2021 10:14:48 +0100 Subject: [PATCH 334/839] net: dsa: bcm_sf2: simplify optional reset handling As of commit bb475230b8e5 ("reset: make optional functions really optional"), the reset framework API calls use NULL pointers to describe optional, non-present reset controls. This allows to unconditionally return errors from devm_reset_control_get_optional_exclusive. Signed-off-by: Philipp Zabel Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/bcm_sf2.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 5ee8103b8e9c7..f277df922fcde 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -406,7 +406,7 @@ static int bcm_sf2_sw_rst(struct bcm_sf2_priv *priv) /* The watchdog reset does not work on 7278, we need to hit the * "external" reset line through the reset controller. */ - if (priv->type == BCM7278_DEVICE_ID && !IS_ERR(priv->rcdev)) { + if (priv->type == BCM7278_DEVICE_ID) { ret = reset_control_assert(priv->rcdev); if (ret) return ret; @@ -1265,7 +1265,7 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev) priv->rcdev = devm_reset_control_get_optional_exclusive(&pdev->dev, "switch"); - if (PTR_ERR(priv->rcdev) == -EPROBE_DEFER) + if (IS_ERR(priv->rcdev)) return PTR_ERR(priv->rcdev); /* Auto-detection using standard registers will not work, so @@ -1426,7 +1426,7 @@ static int bcm_sf2_sw_remove(struct platform_device *pdev) bcm_sf2_mdio_unregister(priv); clk_disable_unprepare(priv->clk_mdiv); clk_disable_unprepare(priv->clk); - if (priv->type == BCM7278_DEVICE_ID && !IS_ERR(priv->rcdev)) + if (priv->type == BCM7278_DEVICE_ID) reset_control_assert(priv->rcdev); return 0; -- GitLab From a4813dc7baa4898f66c84ef68274bbbd1a0ae224 Mon Sep 17 00:00:00 2001 From: Bhaskar Chowdhury Date: Sun, 7 Mar 2021 02:50:28 +0530 Subject: [PATCH 335/839] net: ethernet: chelsio: inline_crypto: Mundane typos fixed throughout the file chcr_ktls.c Mundane typos fixes throughout the file. s/establised/established/ s/availbale/available/ s/vaues/values/ s/Incase/In case/ Signed-off-by: Bhaskar Chowdhury Signed-off-by: David S. Miller --- .../ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c index 46a809f2aeca2..169e10c913780 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c @@ -672,7 +672,7 @@ static int chcr_ktls_cpl_act_open_rpl(struct adapter *adap, if (tx_info->pending_close) { spin_unlock(&tx_info->lock); if (!status) { - /* it's a late success, tcb status is establised, + /* it's a late success, tcb status is established, * mark it close. */ chcr_ktls_mark_tcb_close(tx_info); @@ -930,7 +930,7 @@ chcr_ktls_get_tx_flits(u32 nr_frags, unsigned int key_ctx_len) } /* - * chcr_ktls_check_tcp_options: To check if there is any TCP option availbale + * chcr_ktls_check_tcp_options: To check if there is any TCP option available * other than timestamp. * @skb - skb contains partial record.. * return: 1 / 0 @@ -1115,7 +1115,7 @@ static int chcr_ktls_xmit_wr_complete(struct sk_buff *skb, } if (unlikely(credits < ETHTXQ_STOP_THRES)) { - /* Credits are below the threshold vaues, stop the queue after + /* Credits are below the threshold values, stop the queue after * injecting the Work Request for this packet. */ chcr_eth_txq_stop(q); @@ -2006,7 +2006,7 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev) /* TCP segments can be in received either complete or partial. * chcr_end_part_handler will handle cases if complete record or end - * part of the record is received. Incase of partial end part of record, + * part of the record is received. In case of partial end part of record, * we will send the complete record again. */ -- GitLab From 492bbe7f8a43ff20bb9bfc6b98220dcfb7e5992f Mon Sep 17 00:00:00 2001 From: Grant Grundler Date: Sat, 6 Mar 2021 14:12:31 -0800 Subject: [PATCH 336/839] net: usb: cdc_ncm: emit dev_err on error paths Several error paths in bind/probe code will only emit output using dev_dbg. But if we are going to fail the bind/probe, emit related output with "err" priority. Signed-off-by: Grant Grundler Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ncm.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 4087c9e337819..8acf301154282 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -851,17 +851,17 @@ int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_ /* check if we got everything */ if (!ctx->data) { - dev_dbg(&intf->dev, "CDC Union missing and no IAD found\n"); + dev_err(&intf->dev, "CDC Union missing and no IAD found\n"); goto error; } if (cdc_ncm_comm_intf_is_mbim(intf->cur_altsetting)) { if (!ctx->mbim_desc) { - dev_dbg(&intf->dev, "MBIM functional descriptor missing\n"); + dev_err(&intf->dev, "MBIM functional descriptor missing\n"); goto error; } } else { if (!ctx->ether_desc || !ctx->func_desc) { - dev_dbg(&intf->dev, "NCM or ECM functional descriptors missing\n"); + dev_err(&intf->dev, "NCM or ECM functional descriptors missing\n"); goto error; } } @@ -870,7 +870,7 @@ int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_ if (ctx->data != ctx->control) { temp = usb_driver_claim_interface(driver, ctx->data, dev); if (temp) { - dev_dbg(&intf->dev, "failed to claim data intf\n"); + dev_err(&intf->dev, "failed to claim data intf\n"); goto error; } } @@ -926,7 +926,7 @@ int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_ if (ctx->ether_desc) { temp = usbnet_get_ethernet_addr(dev, ctx->ether_desc->iMACAddress); if (temp) { - dev_dbg(&intf->dev, "failed to get mac address\n"); + dev_err(&intf->dev, "failed to get mac address\n"); goto error2; } dev_info(&intf->dev, "MAC-Address: %pM\n", dev->net->dev_addr); -- GitLab From 4d8c79b7e9ff05030aad68421f7584b129933ba6 Mon Sep 17 00:00:00 2001 From: Grant Grundler Date: Sat, 6 Mar 2021 14:12:32 -0800 Subject: [PATCH 337/839] net: usb: log errors to dmesg/syslog Errors in protocol should be logged when the driver aborts operations. If the driver can carry on and "humor" the device, then emitting the message as debug output level is fine. Signed-off-by: Grant Grundler Signed-off-by: David S. Miller --- drivers/net/usb/usbnet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index b4c8080e6f87a..f4f37ecfed587 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -887,7 +887,7 @@ int usbnet_open (struct net_device *net) // insist peer be connected if (info->check_connect && (retval = info->check_connect (dev)) < 0) { - netif_dbg(dev, ifup, dev->net, "can't open; %d\n", retval); + netif_err(dev, ifup, dev->net, "can't open; %d\n", retval); goto done; } -- GitLab From 143c253f42bad20357e7e4432087aca747c43384 Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Sun, 7 Mar 2021 00:40:12 -0800 Subject: [PATCH 338/839] net: hisilicon: hns: fix error return code of hns_nic_clear_all_rx_fetch() When hns_assemble_skb() returns NULL to skb, no error return code of hns_nic_clear_all_rx_fetch() is assigned. To fix this bug, ret is assigned with -ENOMEM in this case. Reported-by: TOTE Robot Signed-off-by: Jia-Ju Bai Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hns_enet.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index 5d7824d2b4d47..c66a7a51198ee 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -1663,8 +1663,10 @@ static int hns_nic_clear_all_rx_fetch(struct net_device *ndev) for (j = 0; j < fetch_num; j++) { /* alloc one skb and init */ skb = hns_assemble_skb(ndev); - if (!skb) + if (!skb) { + ret = -ENOMEM; goto out; + } rd = &tx_ring_data(priv, skb->queue_mapping); hns_nic_net_xmit_hw(ndev, skb, rd); -- GitLab From 62765d39553cfd1ad340124fe1e280450e8c89e2 Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Sun, 7 Mar 2021 01:12:56 -0800 Subject: [PATCH 339/839] net: wan: fix error return code of uhdlc_init() When priv->rx_skbuff or priv->tx_skbuff is NULL, no error return code of uhdlc_init() is assigned. To fix this bug, ret is assigned with -ENOMEM in these cases. Reported-by: TOTE Robot Signed-off-by: Jia-Ju Bai Signed-off-by: David S. Miller --- drivers/net/wan/fsl_ucc_hdlc.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c index dca97cd7c4e75..7eac6a3e1cdee 100644 --- a/drivers/net/wan/fsl_ucc_hdlc.c +++ b/drivers/net/wan/fsl_ucc_hdlc.c @@ -204,14 +204,18 @@ static int uhdlc_init(struct ucc_hdlc_private *priv) priv->rx_skbuff = kcalloc(priv->rx_ring_size, sizeof(*priv->rx_skbuff), GFP_KERNEL); - if (!priv->rx_skbuff) + if (!priv->rx_skbuff) { + ret = -ENOMEM; goto free_ucc_pram; + } priv->tx_skbuff = kcalloc(priv->tx_ring_size, sizeof(*priv->tx_skbuff), GFP_KERNEL); - if (!priv->tx_skbuff) + if (!priv->tx_skbuff) { + ret = -ENOMEM; goto free_rx_skbuff; + } priv->skb_curtx = 0; priv->skb_dirtytx = 0; -- GitLab From 03cbb87054c17b50a6ead63ed3ab02e094a785b1 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sun, 7 Mar 2021 12:21:56 +0200 Subject: [PATCH 340/839] net: dsa: fix switchdev objects on bridge master mistakenly being applied on ports Tobias reports that after the blamed patch, VLAN objects being added to a bridge device are being added to all slave ports instead (swp2, swp3). ip link add br0 type bridge vlan_filtering 1 ip link set swp2 master br0 ip link set swp3 master br0 bridge vlan add dev br0 vid 100 self This is because the fix was too broad: we made dsa_port_offloads_netdev say "yes, I offload the br0 bridge" for all slave ports, but we didn't add the checks whether the switchdev object was in fact meant for the physical port or for the bridge itself. So we are reacting on events in a way in which we shouldn't. The reason why the fix was too broad is because the question itself, "does this DSA port offload this netdev", was too broad in the first place. The solution is to disambiguate the question and separate it into two different functions, one to be called for each switchdev attribute / object that has an orig_dev == net_bridge (dsa_port_offloads_bridge), and the other for orig_dev == net_bridge_port (*_offloads_bridge_port). In the case of VLAN objects on the bridge interface, this solves the problem because we know that VLAN objects are per bridge port and not per bridge. And when orig_dev is equal to the net_bridge, we offload it as a bridge, but not as a bridge port; that's how we are able to skip reacting on those events. Note that this is compatible with future plans to have explicit offloading of VLAN objects on the bridge interface as a bridge port (in DSA, this signifies that we should add that VLAN towards the CPU port). Fixes: 99b8202b179f ("net: dsa: fix SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING getting ignored") Reported-by: Tobias Waldekranz Signed-off-by: Vladimir Oltean Reviewed-by: Tobias Waldekranz Tested-by: Tobias Waldekranz Signed-off-by: David S. Miller --- net/dsa/dsa_priv.h | 25 +++++++++++--------- net/dsa/slave.c | 59 +++++++++++++++++++++++++++++++++------------- 2 files changed, 57 insertions(+), 27 deletions(-) diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 2eeaa42f2e083..9d4b0e9b1aa1e 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -230,8 +230,8 @@ int dsa_port_hsr_join(struct dsa_port *dp, struct net_device *hsr); void dsa_port_hsr_leave(struct dsa_port *dp, struct net_device *hsr); extern const struct phylink_mac_ops dsa_port_phylink_mac_ops; -static inline bool dsa_port_offloads_netdev(struct dsa_port *dp, - struct net_device *dev) +static inline bool dsa_port_offloads_bridge_port(struct dsa_port *dp, + struct net_device *dev) { /* Switchdev offloading can be configured on: */ @@ -241,12 +241,6 @@ static inline bool dsa_port_offloads_netdev(struct dsa_port *dp, */ return true; - if (dp->bridge_dev == dev) - /* DSA ports connected to a bridge, and event was emitted - * for the bridge. - */ - return true; - if (dp->lag_dev == dev) /* DSA ports connected to a bridge via a LAG */ return true; @@ -254,14 +248,23 @@ static inline bool dsa_port_offloads_netdev(struct dsa_port *dp, return false; } +static inline bool dsa_port_offloads_bridge(struct dsa_port *dp, + struct net_device *bridge_dev) +{ + /* DSA ports connected to a bridge, and event was emitted + * for the bridge. + */ + return dp->bridge_dev == bridge_dev; +} + /* Returns true if any port of this tree offloads the given net_device */ -static inline bool dsa_tree_offloads_netdev(struct dsa_switch_tree *dst, - struct net_device *dev) +static inline bool dsa_tree_offloads_bridge_port(struct dsa_switch_tree *dst, + struct net_device *dev) { struct dsa_port *dp; list_for_each_entry(dp, &dst->ports, list) - if (dsa_port_offloads_netdev(dp, dev)) + if (dsa_port_offloads_bridge_port(dp, dev)) return true; return false; diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 491e3761b5f4a..992fcab4b5529 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -278,28 +278,43 @@ static int dsa_slave_port_attr_set(struct net_device *dev, struct dsa_port *dp = dsa_slave_to_port(dev); int ret; - if (!dsa_port_offloads_netdev(dp, attr->orig_dev)) - return -EOPNOTSUPP; - switch (attr->id) { case SWITCHDEV_ATTR_ID_PORT_STP_STATE: + if (!dsa_port_offloads_bridge_port(dp, attr->orig_dev)) + return -EOPNOTSUPP; + ret = dsa_port_set_state(dp, attr->u.stp_state); break; case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING: + if (!dsa_port_offloads_bridge(dp, attr->orig_dev)) + return -EOPNOTSUPP; + ret = dsa_port_vlan_filtering(dp, attr->u.vlan_filtering, extack); break; case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME: + if (!dsa_port_offloads_bridge(dp, attr->orig_dev)) + return -EOPNOTSUPP; + ret = dsa_port_ageing_time(dp, attr->u.ageing_time); break; case SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS: + if (!dsa_port_offloads_bridge_port(dp, attr->orig_dev)) + return -EOPNOTSUPP; + ret = dsa_port_pre_bridge_flags(dp, attr->u.brport_flags, extack); break; case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS: + if (!dsa_port_offloads_bridge_port(dp, attr->orig_dev)) + return -EOPNOTSUPP; + ret = dsa_port_bridge_flags(dp, attr->u.brport_flags, extack); break; case SWITCHDEV_ATTR_ID_BRIDGE_MROUTER: + if (!dsa_port_offloads_bridge(dp, attr->orig_dev)) + return -EOPNOTSUPP; + ret = dsa_port_mrouter(dp->cpu_dp, attr->u.mrouter, extack); break; default: @@ -341,9 +356,6 @@ static int dsa_slave_vlan_add(struct net_device *dev, struct switchdev_obj_port_vlan vlan; int err; - if (!dsa_port_offloads_netdev(dp, obj->orig_dev)) - return -EOPNOTSUPP; - if (dsa_port_skip_vlan_configuration(dp)) { NL_SET_ERR_MSG_MOD(extack, "skipping configuration of VLAN"); return 0; @@ -391,27 +403,36 @@ static int dsa_slave_port_obj_add(struct net_device *dev, switch (obj->id) { case SWITCHDEV_OBJ_ID_PORT_MDB: - if (!dsa_port_offloads_netdev(dp, obj->orig_dev)) + if (!dsa_port_offloads_bridge_port(dp, obj->orig_dev)) return -EOPNOTSUPP; + err = dsa_port_mdb_add(dp, SWITCHDEV_OBJ_PORT_MDB(obj)); break; case SWITCHDEV_OBJ_ID_HOST_MDB: + if (!dsa_port_offloads_bridge(dp, obj->orig_dev)) + return -EOPNOTSUPP; + /* DSA can directly translate this to a normal MDB add, * but on the CPU port. */ err = dsa_port_mdb_add(dp->cpu_dp, SWITCHDEV_OBJ_PORT_MDB(obj)); break; case SWITCHDEV_OBJ_ID_PORT_VLAN: + if (!dsa_port_offloads_bridge_port(dp, obj->orig_dev)) + return -EOPNOTSUPP; + err = dsa_slave_vlan_add(dev, obj, extack); break; case SWITCHDEV_OBJ_ID_MRP: - if (!dsa_port_offloads_netdev(dp, obj->orig_dev)) + if (!dsa_port_offloads_bridge(dp, obj->orig_dev)) return -EOPNOTSUPP; + err = dsa_port_mrp_add(dp, SWITCHDEV_OBJ_MRP(obj)); break; case SWITCHDEV_OBJ_ID_RING_ROLE_MRP: - if (!dsa_port_offloads_netdev(dp, obj->orig_dev)) + if (!dsa_port_offloads_bridge(dp, obj->orig_dev)) return -EOPNOTSUPP; + err = dsa_port_mrp_add_ring_role(dp, SWITCHDEV_OBJ_RING_ROLE_MRP(obj)); break; @@ -431,9 +452,6 @@ static int dsa_slave_vlan_del(struct net_device *dev, struct switchdev_obj_port_vlan *vlan; int err; - if (!dsa_port_offloads_netdev(dp, obj->orig_dev)) - return -EOPNOTSUPP; - if (dsa_port_skip_vlan_configuration(dp)) return 0; @@ -459,27 +477,36 @@ static int dsa_slave_port_obj_del(struct net_device *dev, switch (obj->id) { case SWITCHDEV_OBJ_ID_PORT_MDB: - if (!dsa_port_offloads_netdev(dp, obj->orig_dev)) + if (!dsa_port_offloads_bridge_port(dp, obj->orig_dev)) return -EOPNOTSUPP; + err = dsa_port_mdb_del(dp, SWITCHDEV_OBJ_PORT_MDB(obj)); break; case SWITCHDEV_OBJ_ID_HOST_MDB: + if (!dsa_port_offloads_bridge(dp, obj->orig_dev)) + return -EOPNOTSUPP; + /* DSA can directly translate this to a normal MDB add, * but on the CPU port. */ err = dsa_port_mdb_del(dp->cpu_dp, SWITCHDEV_OBJ_PORT_MDB(obj)); break; case SWITCHDEV_OBJ_ID_PORT_VLAN: + if (!dsa_port_offloads_bridge_port(dp, obj->orig_dev)) + return -EOPNOTSUPP; + err = dsa_slave_vlan_del(dev, obj); break; case SWITCHDEV_OBJ_ID_MRP: - if (!dsa_port_offloads_netdev(dp, obj->orig_dev)) + if (!dsa_port_offloads_bridge(dp, obj->orig_dev)) return -EOPNOTSUPP; + err = dsa_port_mrp_del(dp, SWITCHDEV_OBJ_MRP(obj)); break; case SWITCHDEV_OBJ_ID_RING_ROLE_MRP: - if (!dsa_port_offloads_netdev(dp, obj->orig_dev)) + if (!dsa_port_offloads_bridge(dp, obj->orig_dev)) return -EOPNOTSUPP; + err = dsa_port_mrp_del_ring_role(dp, SWITCHDEV_OBJ_RING_ROLE_MRP(obj)); break; @@ -2298,7 +2325,7 @@ static int dsa_slave_switchdev_event(struct notifier_block *unused, * other ports bridged with the LAG should be able to * autonomously forward towards it. */ - if (dsa_tree_offloads_netdev(dp->ds->dst, dev)) + if (dsa_tree_offloads_bridge_port(dp->ds->dst, dev)) return NOTIFY_DONE; } -- GitLab From ac88c531a5b38877eba2365a3f28f0c8b513dc33 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Sun, 7 Mar 2021 13:17:47 +0000 Subject: [PATCH 341/839] net: davicom: Fix regulator not turned off on failed probe When the probe fails or requests to be defered, we must disable the regulator that was previously enabled. Fixes: 7994fe55a4a2 ("dm9000: Add regulator and reset support to dm9000") Signed-off-by: Paul Cercueil Signed-off-by: David S. Miller --- drivers/net/ethernet/davicom/dm9000.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c index 3fdc70dab5c14..ae744826bb9e2 100644 --- a/drivers/net/ethernet/davicom/dm9000.c +++ b/drivers/net/ethernet/davicom/dm9000.c @@ -1449,7 +1449,7 @@ dm9000_probe(struct platform_device *pdev) if (ret) { dev_err(dev, "failed to request reset gpio %d: %d\n", reset_gpios, ret); - return -ENODEV; + goto out_regulator_disable; } /* According to manual PWRST# Low Period Min 1ms */ @@ -1461,8 +1461,10 @@ dm9000_probe(struct platform_device *pdev) if (!pdata) { pdata = dm9000_parse_dt(&pdev->dev); - if (IS_ERR(pdata)) - return PTR_ERR(pdata); + if (IS_ERR(pdata)) { + ret = PTR_ERR(pdata); + goto out_regulator_disable; + } } /* Init network device */ @@ -1703,6 +1705,10 @@ out: dm9000_release_board(pdev, db); free_netdev(ndev); +out_regulator_disable: + if (!IS_ERR(power)) + regulator_disable(power); + return ret; } -- GitLab From cf9e60aa69ae6c40d3e3e4c94dd6c8de31674e9b Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Sun, 7 Mar 2021 13:17:48 +0000 Subject: [PATCH 342/839] net: davicom: Fix regulator not turned off on driver removal We must disable the regulator that was enabled in the probe function. Fixes: 7994fe55a4a2 ("dm9000: Add regulator and reset support to dm9000") Signed-off-by: Paul Cercueil Signed-off-by: David S. Miller --- drivers/net/ethernet/davicom/dm9000.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c index ae744826bb9e2..a95e95ce94386 100644 --- a/drivers/net/ethernet/davicom/dm9000.c +++ b/drivers/net/ethernet/davicom/dm9000.c @@ -133,6 +133,8 @@ struct board_info { u32 wake_state; int ip_summed; + + struct regulator *power_supply; }; /* debug code */ @@ -1481,6 +1483,8 @@ dm9000_probe(struct platform_device *pdev) db->dev = &pdev->dev; db->ndev = ndev; + if (!IS_ERR(power)) + db->power_supply = power; spin_lock_init(&db->lock); mutex_init(&db->addr_lock); @@ -1766,10 +1770,13 @@ static int dm9000_drv_remove(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); + struct board_info *dm = to_dm9000_board(ndev); unregister_netdev(ndev); - dm9000_release_board(pdev, netdev_priv(ndev)); + dm9000_release_board(pdev, dm); free_netdev(ndev); /* free device structure */ + if (dm->power_supply) + regulator_disable(dm->power_supply); dev_dbg(&pdev->dev, "released and freed device\n"); return 0; -- GitLab From 2e2696223676d56db1a93acfca722c1b96cd552d Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Sun, 7 Mar 2021 13:17:49 +0000 Subject: [PATCH 343/839] net: davicom: Use platform_get_irq_optional() The second IRQ line really is optional, so use platform_get_irq_optional() to obtain it. Signed-off-by: Paul Cercueil Signed-off-by: David S. Miller --- drivers/net/ethernet/davicom/dm9000.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c index a95e95ce94386..252adfa5d837b 100644 --- a/drivers/net/ethernet/davicom/dm9000.c +++ b/drivers/net/ethernet/davicom/dm9000.c @@ -1507,7 +1507,7 @@ dm9000_probe(struct platform_device *pdev) goto out; } - db->irq_wake = platform_get_irq(pdev, 1); + db->irq_wake = platform_get_irq_optional(pdev, 1); if (db->irq_wake >= 0) { dev_dbg(db->dev, "wakeup irq %d\n", db->irq_wake); -- GitLab From 1b2395dfff5bb40228a187f21f577cd90673d344 Mon Sep 17 00:00:00 2001 From: Alex Marginean Date: Sun, 7 Mar 2021 15:23:38 +0200 Subject: [PATCH 344/839] net: enetc: set MAC RX FIFO to recommended value On LS1028A, the MAC RX FIFO defaults to the value 2, which is too high and may lead to RX lock-up under traffic at a rate higher than 6 Gbps. Set it to 1 instead, as recommended by the hardware design team and by later versions of the ENETC block guide. Signed-off-by: Alex Marginean Reviewed-by: Claudiu Manoil Reviewed-by: Jason Liu Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/enetc/enetc_hw.h | 2 ++ drivers/net/ethernet/freescale/enetc/enetc_pf.c | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_hw.h b/drivers/net/ethernet/freescale/enetc/enetc_hw.h index de0d20b0f489c..00938f7960a43 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_hw.h +++ b/drivers/net/ethernet/freescale/enetc/enetc_hw.h @@ -234,6 +234,8 @@ enum enetc_bdr_type {TX, RX}; #define ENETC_PM0_MAXFRM 0x8014 #define ENETC_SET_TX_MTU(val) ((val) << 16) #define ENETC_SET_MAXFRM(val) ((val) & 0xffff) +#define ENETC_PM0_RX_FIFO 0x801c +#define ENETC_PM0_RX_FIFO_VAL 1 #define ENETC_PM_IMDIO_BASE 0x8030 diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c index ca02f033bea21..224fc37a6757c 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c @@ -490,6 +490,12 @@ static void enetc_configure_port_mac(struct enetc_hw *hw) enetc_port_wr(hw, ENETC_PM1_CMD_CFG, ENETC_PM0_CMD_PHY_TX_EN | ENETC_PM0_CMD_TXP | ENETC_PM0_PROMISC); + + /* On LS1028A, the MAC RX FIFO defaults to 2, which is too high + * and may lead to RX lock-up under traffic. Set it to 1 instead, + * as recommended by the hardware team. + */ + enetc_port_wr(hw, ENETC_PM0_RX_FIFO, ENETC_PM0_RX_FIFO_VAL); } static void enetc_mac_config(struct enetc_hw *hw, phy_interface_t phy_mode) -- GitLab From 29d98f54a4fe1b6a9089bec8715a1b89ff9ad59c Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sun, 7 Mar 2021 15:23:39 +0200 Subject: [PATCH 345/839] net: enetc: allow hardware timestamping on TX queues with tc-etf enabled The txtime is passed to the driver in skb->skb_mstamp_ns, which is actually in a union with skb->tstamp (the place where software timestamps are kept). Since commit b50a5c70ffa4 ("net: allow simultaneous SW and HW transmit timestamping"), __sock_recv_timestamp has some logic for making sure that the two calls to skb_tstamp_tx: skb_tx_timestamp(skb) # Software timestamp in the driver -> skb_tstamp_tx(skb, NULL) and skb_tstamp_tx(skb, &shhwtstamps) # Hardware timestamp in the driver will both do the right thing and in a race-free manner, meaning that skb_tx_timestamp will deliver a cmsg with the software timestamp only, and skb_tstamp_tx with a non-NULL hwtstamps argument will deliver a cmsg with the hardware timestamp only. Why are races even possible? Well, because although the software timestamp skb->tstamp is private per skb, the hardware timestamp skb_hwtstamps(skb) lives in skb_shinfo(skb), an area which is shared between skbs and their clones. And skb_tstamp_tx works by cloning the packets when timestamping them, therefore attempting to perform hardware timestamping on an skb's clone will also change the hardware timestamp of the original skb. And the original skb might have been yet again cloned for software timestamping, at an earlier stage. So the logic in __sock_recv_timestamp can't be as simple as saying "does this skb have a hardware timestamp? if yes I'll send the hardware timestamp to the socket, otherwise I'll send the software timestamp", precisely because the hardware timestamp is shared. Instead, it's quite the other way around: __sock_recv_timestamp says "does this skb have a software timestamp? if yes, I'll send the software timestamp, otherwise the hardware one". This works because the software timestamp is not shared with clones. But that means we have a problem when we attempt hardware timestamping with skbs that don't have the skb->tstamp == 0. __sock_recv_timestamp will say "oh, yeah, this must be some sort of odd clone" and will not deliver the hardware timestamp to the socket. And this is exactly what is happening when we have txtime enabled on the socket: as mentioned, that is put in a union with skb->tstamp, so it is quite easy to mistake it. Do what other drivers do (intel igb/igc) and write zero to skb->tstamp before taking the hardware timestamp. It's of no use to us now (we're already on the TX confirmation path). Fixes: 0d08c9ec7d6e ("enetc: add support time specific departure base on the qos etf") Cc: Vinicius Costa Gomes Signed-off-by: Vladimir Oltean Acked-by: Vinicius Costa Gomes Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/enetc/enetc.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 30d7d4e83900b..09471329f3a36 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -344,6 +344,12 @@ static void enetc_tstamp_tx(struct sk_buff *skb, u64 tstamp) if (skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS) { memset(&shhwtstamps, 0, sizeof(shhwtstamps)); shhwtstamps.hwtstamp = ns_to_ktime(tstamp); + /* Ensure skb_mstamp_ns, which might have been populated with + * the txtime, is not mistaken for a software timestamp, + * because this will prevent the dispatch of our hardware + * timestamp to the socket. + */ + skb->tstamp = ktime_set(0, 0); skb_tstamp_tx(skb, &shhwtstamps); } } -- GitLab From 2055a99da8a253a357bdfd359b3338ef3375a26c Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Sun, 7 Mar 2021 19:11:02 -0800 Subject: [PATCH 346/839] net: bonding: fix error return code of bond_neigh_init() When slave is NULL or slave_ops->ndo_neigh_setup is NULL, no error return code of bond_neigh_init() is assigned. To fix this bug, ret is assigned with -EINVAL in these cases. Fixes: 9e99bfefdbce ("bonding: fix bond_neigh_init()") Reported-by: TOTE Robot Signed-off-by: Jia-Ju Bai Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 74cbbb22470b5..456315bef3a8b 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3978,11 +3978,15 @@ static int bond_neigh_init(struct neighbour *n) rcu_read_lock(); slave = bond_first_slave_rcu(bond); - if (!slave) + if (!slave) { + ret = -EINVAL; goto out; + } slave_ops = slave->dev->netdev_ops; - if (!slave_ops->ndo_neigh_setup) + if (!slave_ops->ndo_neigh_setup) { + ret = -EINVAL; goto out; + } /* TODO: find another way [1] to implement this. * Passing a zeroed structure is fragile, -- GitLab From 69dd4503a7e6bae3389b8e028e5768008be8f2d7 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 16 Feb 2021 15:36:07 +0100 Subject: [PATCH 347/839] irqdomain: Remove debugfs_file from struct irq_domain There's no need to keep around a dentry pointer to a simple file that debugfs itself can look up when we need to remove it from the system. So simplify the code by deleting the variable and cleaning up the logic around the debugfs file. Cc: Marc Zyngier Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/YCvYV53ZdzQSWY6w@kroah.com --- include/linux/irqdomain.h | 4 ---- kernel/irq/irqdomain.c | 9 ++++----- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 42d196805f587..33cacc8af26da 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -150,7 +150,6 @@ struct irq_domain_chip_generic; * setting up one or more generic chips for interrupt controllers * drivers using the generic chip library which uses this pointer. * @parent: Pointer to parent irq_domain to support hierarchy irq_domains - * @debugfs_file: dentry for the domain debugfs file * * Revmap data, used internally by irq_domain * @revmap_direct_max_irq: The largest hwirq that can be set for controllers that @@ -174,9 +173,6 @@ struct irq_domain { #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY struct irq_domain *parent; #endif -#ifdef CONFIG_GENERIC_IRQ_DEBUGFS - struct dentry *debugfs_file; -#endif /* reverse map data. The linear map gets appended to the irq_domain */ irq_hw_number_t hwirq_max; diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 288151393a065..d10ab1d689d58 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -1898,16 +1898,15 @@ DEFINE_SHOW_ATTRIBUTE(irq_domain_debug); static void debugfs_add_domain_dir(struct irq_domain *d) { - if (!d->name || !domain_dir || d->debugfs_file) + if (!d->name || !domain_dir) return; - d->debugfs_file = debugfs_create_file(d->name, 0444, domain_dir, d, - &irq_domain_debug_fops); + debugfs_create_file(d->name, 0444, domain_dir, d, + &irq_domain_debug_fops); } static void debugfs_remove_domain_dir(struct irq_domain *d) { - debugfs_remove(d->debugfs_file); - d->debugfs_file = NULL; + debugfs_remove(debugfs_lookup(d->name, domain_dir)); } void __init irq_domain_debugfs_init(struct dentry *root) -- GitLab From 3e895f4cbd158c31f1295d097a73ea4fe50f88f4 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 17 Feb 2021 18:10:35 +0000 Subject: [PATCH 348/839] ARM: ep93xx: Select GENERIC_IRQ_MULTI_HANDLER directly ep93xx currently relies of CONFIG_ARM_VIC to select GENERIC_IRQ_MULTI_HANDLER. Given that this is logically a platform architecture property, add the selection of GENERIC_IRQ_MULTI_HANDLER at the platform level. Further patches will remove the selection from the irqchip side. Reported-by: Marc Rutland Signed-off-by: Marc Zyngier Signed-off-by: Mark Rutland Tested-by: Hector Martin Cc: Catalin Marinas Cc: James Morse Cc: Thomas Gleixner Cc: Will Deacon Signed-off-by: Marc Zyngier --- arch/arm/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 853aab5ab327a..5da96f5df48f3 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -348,6 +348,7 @@ config ARCH_EP93XX select ARM_AMBA imply ARM_PATCH_PHYS_VIRT select ARM_VIC + select GENERIC_IRQ_MULTI_HANDLER select AUTO_ZRELADDR select CLKDEV_LOOKUP select CLKSRC_MMIO -- GitLab From a79f7051cccb6f3bcd3d2a0a058c7d5c79bb0371 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 17 Feb 2021 14:28:00 +0000 Subject: [PATCH 349/839] irqchip: Do not blindly select CONFIG_GENERIC_IRQ_MULTI_HANDLER Implementing CONFIG_GENERIC_IRQ_MULTI_HANDLER is a decision that is made at the architecture level, and shouldn't involve the irqchip at all (we even provide a fallback helper when the option isn't selected). Drop all instances of such selection from non-arch code. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210217142800.2547737-1-maz@kernel.org Signed-off-by: Mark Rutland Tested-by: Hector Martin Cc: Catalin Marinas Cc: James Morse Cc: Thomas Gleixner Cc: Will Deacon Signed-off-by: Marc Zyngier --- drivers/irqchip/Kconfig | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index e74fa206240a1..15536e321df55 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -8,7 +8,6 @@ config IRQCHIP config ARM_GIC bool select IRQ_DOMAIN_HIERARCHY - select GENERIC_IRQ_MULTI_HANDLER select GENERIC_IRQ_EFFECTIVE_AFF_MASK config ARM_GIC_PM @@ -33,7 +32,6 @@ config GIC_NON_BANKED config ARM_GIC_V3 bool - select GENERIC_IRQ_MULTI_HANDLER select IRQ_DOMAIN_HIERARCHY select PARTITION_PERCPU select GENERIC_IRQ_EFFECTIVE_AFF_MASK @@ -64,7 +62,6 @@ config ARM_NVIC config ARM_VIC bool select IRQ_DOMAIN - select GENERIC_IRQ_MULTI_HANDLER config ARM_VIC_NR int @@ -99,14 +96,12 @@ config ATMEL_AIC_IRQ bool select GENERIC_IRQ_CHIP select IRQ_DOMAIN - select GENERIC_IRQ_MULTI_HANDLER select SPARSE_IRQ config ATMEL_AIC5_IRQ bool select GENERIC_IRQ_CHIP select IRQ_DOMAIN - select GENERIC_IRQ_MULTI_HANDLER select SPARSE_IRQ config I8259 @@ -153,7 +148,6 @@ config DW_APB_ICTL config FARADAY_FTINTC010 bool select IRQ_DOMAIN - select GENERIC_IRQ_MULTI_HANDLER select SPARSE_IRQ config HISILICON_IRQ_MBIGEN @@ -169,7 +163,6 @@ config IMGPDC_IRQ config IXP4XX_IRQ bool select IRQ_DOMAIN - select GENERIC_IRQ_MULTI_HANDLER select SPARSE_IRQ config MADERA_IRQ @@ -186,7 +179,6 @@ config CLPS711X_IRQCHIP bool depends on ARCH_CLPS711X select IRQ_DOMAIN - select GENERIC_IRQ_MULTI_HANDLER select SPARSE_IRQ default y @@ -205,7 +197,6 @@ config OMAP_IRQCHIP config ORION_IRQCHIP bool select IRQ_DOMAIN - select GENERIC_IRQ_MULTI_HANDLER config PIC32_EVIC bool -- GitLab From f0940f4b3284a00f38a5d42e6067c2aaa20e1f2e Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Wed, 3 Mar 2021 08:47:16 -0500 Subject: [PATCH 350/839] SUNRPC: Set memalloc_nofs_save() for sync tasks We could recurse into NFS doing memory reclaim while sending a sync task, which might result in a deadlock. Set memalloc_nofs_save for sync task execution. Fixes: a1231fda7e94 ("SUNRPC: Set memalloc_nofs_save() on all rpciod/xprtiod jobs") Signed-off-by: Benjamin Coddington Signed-off-by: Anna Schumaker --- net/sunrpc/sched.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index cf702a5f7fe5d..39ed0e0afe6d9 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -963,8 +963,11 @@ void rpc_execute(struct rpc_task *task) rpc_set_active(task); rpc_make_runnable(rpciod_workqueue, task); - if (!is_async) + if (!is_async) { + unsigned int pflags = memalloc_nofs_save(); __rpc_execute(task); + memalloc_nofs_restore(pflags); + } } static void rpc_async_schedule(struct work_struct *work) -- GitLab From 82e7ca1334ab16e2e04fafded1cab9dfcdc11b40 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 8 Mar 2021 14:42:51 -0500 Subject: [PATCH 351/839] NFS: Don't revalidate the directory permissions on a lookup failure There should be no reason to expect the directory permissions to change just because the directory contents changed or a negative lookup timed out. So let's avoid doing a full call to nfs_mark_for_revalidate() in that case. Furthermore, if this is a negative dentry, and we haven't actually done a new lookup, then we have no reason yet to believe the directory has changed at all. So let's remove the gratuitous directory inode invalidation altogether when called from nfs_lookup_revalidate_negative(). Reported-by: Geert Jansen Fixes: 5ceb9d7fdaaf ("NFS: Refactor nfs_lookup_revalidate()") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/dir.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 19a9f434442ff..08b162de627fd 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1401,6 +1401,15 @@ out_force: goto out; } +static void nfs_mark_dir_for_revalidate(struct inode *inode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + spin_lock(&inode->i_lock); + nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE; + spin_unlock(&inode->i_lock); +} + /* * We judge how long we want to trust negative * dentries by looking at the parent inode mtime. @@ -1435,7 +1444,6 @@ nfs_lookup_revalidate_done(struct inode *dir, struct dentry *dentry, __func__, dentry); return 1; case 0: - nfs_mark_for_revalidate(dir); if (inode && S_ISDIR(inode->i_mode)) { /* Purge readdir caches. */ nfs_zap_caches(inode); @@ -1525,6 +1533,13 @@ out: nfs_free_fattr(fattr); nfs_free_fhandle(fhandle); nfs4_label_free(label); + + /* + * If the lookup failed despite the dentry change attribute being + * a match, then we should revalidate the directory cache. + */ + if (!ret && nfs_verify_change_attribute(dir, dentry->d_time)) + nfs_mark_dir_for_revalidate(dir); return nfs_lookup_revalidate_done(dir, dentry, inode, ret); } @@ -1567,7 +1582,7 @@ nfs_do_lookup_revalidate(struct inode *dir, struct dentry *dentry, error = nfs_lookup_verify_inode(inode, flags); if (error) { if (error == -ESTALE) - nfs_zap_caches(dir); + nfs_mark_dir_for_revalidate(dir); goto out_bad; } nfs_advise_use_readdirplus(dir); @@ -2064,7 +2079,6 @@ out: dput(parent); return d; out_error: - nfs_mark_for_revalidate(dir); d = ERR_PTR(error); goto out; } -- GitLab From 47397915ede0192235474b145ebcd81b37b03624 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 8 Mar 2021 14:42:52 -0500 Subject: [PATCH 352/839] NFS: Don't gratuitously clear the inode cache when lookup failed The fact that the lookup revalidation failed, does not mean that the inode contents have changed. Fixes: 5ceb9d7fdaaf ("NFS: Refactor nfs_lookup_revalidate()") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/dir.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 08b162de627fd..a91f324cca49f 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1444,18 +1444,14 @@ nfs_lookup_revalidate_done(struct inode *dir, struct dentry *dentry, __func__, dentry); return 1; case 0: - if (inode && S_ISDIR(inode->i_mode)) { - /* Purge readdir caches. */ - nfs_zap_caches(inode); - /* - * We can't d_drop the root of a disconnected tree: - * its d_hash is on the s_anon list and d_drop() would hide - * it from shrink_dcache_for_unmount(), leading to busy - * inodes on unmount and further oopses. - */ - if (IS_ROOT(dentry)) - return 1; - } + /* + * We can't d_drop the root of a disconnected tree: + * its d_hash is on the s_anon list and d_drop() would hide + * it from shrink_dcache_for_unmount(), leading to busy + * inodes on unmount and further oopses. + */ + if (inode && IS_ROOT(dentry)) + return 1; dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is invalid\n", __func__, dentry); return 0; -- GitLab From fd6d3feed041e96b84680d0bfc1e7abc8f65de92 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 8 Mar 2021 14:42:53 -0500 Subject: [PATCH 353/839] NFS: Clean up function nfs_mark_dir_for_revalidate() Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/dir.c | 4 +--- fs/nfs/inode.c | 2 +- fs/nfs/internal.h | 3 ++- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index a91f324cca49f..02ac982846f44 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1403,10 +1403,8 @@ out_force: static void nfs_mark_dir_for_revalidate(struct inode *inode) { - struct nfs_inode *nfsi = NFS_I(inode); - spin_lock(&inode->i_lock); - nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE; + nfs_set_cache_invalid(inode, NFS_INO_REVAL_PAGECACHE); spin_unlock(&inode->i_lock); } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 749bbea14d99a..d21bfaac10b0d 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -207,7 +207,7 @@ static bool nfs_has_xattr_cache(const struct nfs_inode *nfsi) } #endif -static void nfs_set_cache_invalid(struct inode *inode, unsigned long flags) +void nfs_set_cache_invalid(struct inode *inode, unsigned long flags) { struct nfs_inode *nfsi = NFS_I(inode); bool have_delegation = NFS_PROTO(inode)->have_delegation(inode, FMODE_READ); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 25fb43b69e5a4..7b644d6c09e4b 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -411,7 +411,8 @@ extern int nfs_write_inode(struct inode *, struct writeback_control *); extern int nfs_drop_inode(struct inode *); extern void nfs_clear_inode(struct inode *); extern void nfs_evict_inode(struct inode *); -void nfs_zap_acl_cache(struct inode *inode); +extern void nfs_zap_acl_cache(struct inode *inode); +extern void nfs_set_cache_invalid(struct inode *inode, unsigned long flags); extern bool nfs_check_cache_invalid(struct inode *, unsigned long); extern int nfs_wait_bit_killable(struct wait_bit_key *key, int mode); extern int nfs_wait_atomic_killable(atomic_t *p, unsigned int mode); -- GitLab From ac46b3d768e4c2754f7b191b81e1bea582e11907 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 8 Mar 2021 14:42:54 -0500 Subject: [PATCH 354/839] NFS: Fix open coded versions of nfs_set_cache_invalid() nfs_set_cache_invalid() has code to handle delegations, and other optimisations, so let's use it when appropriate. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/dir.c | 20 ++++++++++---------- fs/nfs/inode.c | 4 ++-- fs/nfs/unlink.c | 6 +++--- fs/nfs/write.c | 8 ++++---- 4 files changed, 19 insertions(+), 19 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 02ac982846f44..fc4f490f2d785 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -81,8 +81,9 @@ static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir spin_lock(&dir->i_lock); if (list_empty(&nfsi->open_files) && (nfsi->cache_validity & NFS_INO_DATA_INVAL_DEFER)) - nfsi->cache_validity |= NFS_INO_INVALID_DATA | - NFS_INO_REVAL_FORCED; + nfs_set_cache_invalid(dir, + NFS_INO_INVALID_DATA | + NFS_INO_REVAL_FORCED); list_add(&ctx->list, &nfsi->open_files); spin_unlock(&dir->i_lock); return ctx; @@ -1700,10 +1701,9 @@ static void nfs_drop_nlink(struct inode *inode) if (inode->i_nlink > 0) drop_nlink(inode); NFS_I(inode)->attr_gencount = nfs_inc_attr_generation_counter(); - NFS_I(inode)->cache_validity |= NFS_INO_INVALID_CHANGE - | NFS_INO_INVALID_CTIME - | NFS_INO_INVALID_OTHER - | NFS_INO_REVAL_FORCED; + nfs_set_cache_invalid( + inode, NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME | + NFS_INO_INVALID_OTHER | NFS_INO_REVAL_FORCED); spin_unlock(&inode->i_lock); } @@ -1715,7 +1715,7 @@ static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode) { if (S_ISDIR(inode->i_mode)) /* drop any readdir cache as it could easily be old */ - NFS_I(inode)->cache_validity |= NFS_INO_INVALID_DATA; + nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA); if (dentry->d_flags & DCACHE_NFSFS_RENAMED) { nfs_complete_unlink(dentry, inode); @@ -2481,9 +2481,9 @@ int nfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir, if (error == 0) { spin_lock(&old_inode->i_lock); NFS_I(old_inode)->attr_gencount = nfs_inc_attr_generation_counter(); - NFS_I(old_inode)->cache_validity |= NFS_INO_INVALID_CHANGE - | NFS_INO_INVALID_CTIME - | NFS_INO_REVAL_FORCED; + nfs_set_cache_invalid(old_inode, NFS_INO_INVALID_CHANGE | + NFS_INO_INVALID_CTIME | + NFS_INO_REVAL_FORCED); spin_unlock(&old_inode->i_lock); } out: diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index d21bfaac10b0d..eb1ae77f411ad 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1067,8 +1067,8 @@ void nfs_inode_attach_open_context(struct nfs_open_context *ctx) spin_lock(&inode->i_lock); if (list_empty(&nfsi->open_files) && (nfsi->cache_validity & NFS_INO_DATA_INVAL_DEFER)) - nfsi->cache_validity |= NFS_INO_INVALID_DATA | - NFS_INO_REVAL_FORCED; + nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA | + NFS_INO_REVAL_FORCED); list_add_tail_rcu(&ctx->list, &nfsi->open_files); spin_unlock(&inode->i_lock); } diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index b27ebdccef703..5fa11e1aca4c2 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -500,9 +500,9 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry) nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); spin_lock(&inode->i_lock); NFS_I(inode)->attr_gencount = nfs_inc_attr_generation_counter(); - NFS_I(inode)->cache_validity |= NFS_INO_INVALID_CHANGE - | NFS_INO_INVALID_CTIME - | NFS_INO_REVAL_FORCED; + nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE | + NFS_INO_INVALID_CTIME | + NFS_INO_REVAL_FORCED); spin_unlock(&inode->i_lock); d_move(dentry, sdentry); break; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 82bdcb982186f..f05a90338a76f 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -303,9 +303,9 @@ static void nfs_set_pageerror(struct address_space *mapping) nfs_zap_mapping(mapping->host, mapping); /* Force file size revalidation */ spin_lock(&inode->i_lock); - NFS_I(inode)->cache_validity |= NFS_INO_REVAL_FORCED | - NFS_INO_REVAL_PAGECACHE | - NFS_INO_INVALID_SIZE; + nfs_set_cache_invalid(inode, NFS_INO_REVAL_FORCED | + NFS_INO_REVAL_PAGECACHE | + NFS_INO_INVALID_SIZE); spin_unlock(&inode->i_lock); } @@ -1604,7 +1604,7 @@ static int nfs_writeback_done(struct rpc_task *task, /* Deal with the suid/sgid bit corner case */ if (nfs_should_remove_suid(inode)) { spin_lock(&inode->i_lock); - NFS_I(inode)->cache_validity |= NFS_INO_INVALID_OTHER; + nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER); spin_unlock(&inode->i_lock); } return 0; -- GitLab From b6f80a2ebb97f184c4679518ac83074598bf9bf4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 8 Mar 2021 14:42:55 -0500 Subject: [PATCH 355/839] NFS: Fix open coded versions of nfs_set_cache_invalid() in NFSv4 nfs_set_cache_invalid() has code to handle delegations, and other optimisations, so let's use it when appropriate. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/inode.c | 1 + fs/nfs/nfs42proc.c | 12 +++++++----- fs/nfs/nfs4proc.c | 28 ++++++++++++---------------- 3 files changed, 20 insertions(+), 21 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index eb1ae77f411ad..a7fb076a5f44b 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -229,6 +229,7 @@ void nfs_set_cache_invalid(struct inode *inode, unsigned long flags) if (flags & NFS_INO_INVALID_DATA) nfs_fscache_invalidate(inode); } +EXPORT_SYMBOL_GPL(nfs_set_cache_invalid); /* * Invalidate the local caches diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index f3fd935620fcb..094024b0aca19 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -357,13 +357,15 @@ static ssize_t _nfs42_proc_copy(struct file *src, truncate_pagecache_range(dst_inode, pos_dst, pos_dst + res->write_res.count); spin_lock(&dst_inode->i_lock); - NFS_I(dst_inode)->cache_validity |= (NFS_INO_REVAL_PAGECACHE | - NFS_INO_REVAL_FORCED | NFS_INO_INVALID_SIZE | - NFS_INO_INVALID_ATTR | NFS_INO_INVALID_DATA); + nfs_set_cache_invalid( + dst_inode, NFS_INO_REVAL_PAGECACHE | NFS_INO_REVAL_FORCED | + NFS_INO_INVALID_SIZE | NFS_INO_INVALID_ATTR | + NFS_INO_INVALID_DATA); spin_unlock(&dst_inode->i_lock); spin_lock(&src_inode->i_lock); - NFS_I(src_inode)->cache_validity |= (NFS_INO_REVAL_PAGECACHE | - NFS_INO_REVAL_FORCED | NFS_INO_INVALID_ATIME); + nfs_set_cache_invalid(src_inode, NFS_INO_REVAL_PAGECACHE | + NFS_INO_REVAL_FORCED | + NFS_INO_INVALID_ATIME); spin_unlock(&src_inode->i_lock); status = res->write_res.count; out: diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 74bc5120013d2..6d8fc56e5f458 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1169,14 +1169,14 @@ int nfs4_call_sync(struct rpc_clnt *clnt, static void nfs4_inc_nlink_locked(struct inode *inode) { - NFS_I(inode)->cache_validity |= NFS_INO_INVALID_OTHER; + nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER); inc_nlink(inode); } static void nfs4_dec_nlink_locked(struct inode *inode) { - NFS_I(inode)->cache_validity |= NFS_INO_INVALID_OTHER; + nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER); drop_nlink(inode); } @@ -1187,35 +1187,31 @@ nfs4_update_changeattr_locked(struct inode *inode, { struct nfs_inode *nfsi = NFS_I(inode); - nfsi->cache_validity |= NFS_INO_INVALID_CTIME - | NFS_INO_INVALID_MTIME - | cache_validity; + cache_validity |= NFS_INO_INVALID_CTIME | NFS_INO_INVALID_MTIME; if (cinfo->atomic && cinfo->before == inode_peek_iversion_raw(inode)) { nfsi->cache_validity &= ~NFS_INO_REVAL_PAGECACHE; nfsi->attrtimeo_timestamp = jiffies; } else { if (S_ISDIR(inode->i_mode)) { - nfsi->cache_validity |= NFS_INO_INVALID_DATA; + cache_validity |= NFS_INO_INVALID_DATA; nfs_force_lookup_revalidate(inode); } else { if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) - nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE; + cache_validity |= NFS_INO_REVAL_PAGECACHE; } if (cinfo->before != inode_peek_iversion_raw(inode)) - nfsi->cache_validity |= NFS_INO_INVALID_ACCESS | - NFS_INO_INVALID_ACL | - NFS_INO_INVALID_XATTR; + cache_validity |= NFS_INO_INVALID_ACCESS | + NFS_INO_INVALID_ACL | + NFS_INO_INVALID_XATTR; } inode_set_iversion_raw(inode, cinfo->after); nfsi->read_cache_jiffies = timestamp; nfsi->attr_gencount = nfs_inc_attr_generation_counter(); + nfs_set_cache_invalid(inode, cache_validity); nfsi->cache_validity &= ~NFS_INO_INVALID_CHANGE; - - if (nfsi->cache_validity & NFS_INO_INVALID_DATA) - nfs_fscache_invalidate(inode); } void @@ -5915,9 +5911,9 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl * so mark the attribute cache invalid. */ spin_lock(&inode->i_lock); - NFS_I(inode)->cache_validity |= NFS_INO_INVALID_CHANGE - | NFS_INO_INVALID_CTIME - | NFS_INO_REVAL_FORCED; + nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE | + NFS_INO_INVALID_CTIME | + NFS_INO_REVAL_FORCED); spin_unlock(&inode->i_lock); nfs_access_zap_cache(inode); nfs_zap_acl_cache(inode); -- GitLab From 27ab92d9996e4e003a726d22c56d780a1655d6b4 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Mon, 8 Mar 2021 10:00:04 +0100 Subject: [PATCH 356/839] mptcp: fix length of ADD_ADDR with port sub-option MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit in current Linux, MPTCP peers advertising endpoints with port numbers use a sub-option length that wrongly accounts for the trailing TCP NOP. Also, receivers will only process incoming ADD_ADDR with port having such wrong sub-option length. Fix this, making ADD_ADDR compliant to RFC8684 §3.4.1. this can be verified running tcpdump on the kselftests artifacts: unpatched kernel: [root@bottarga mptcp]# tcpdump -tnnr unpatched.pcap | grep add-addr reading from file unpatched.pcap, link-type LINUX_SLL (Linux cooked v1), snapshot length 65535 IP 10.0.1.1.10000 > 10.0.1.2.53078: Flags [.], ack 101, win 509, options [nop,nop,TS val 214459678 ecr 521312851,mptcp add-addr v1 id 1 a00:201:2774:2d88:7436:85c3:17fd:101], length 0 IP 10.0.1.2.53078 > 10.0.1.1.10000: Flags [.], ack 101, win 502, options [nop,nop,TS val 521312852 ecr 214459678,mptcp add-addr[bad opt]] patched kernel: [root@bottarga mptcp]# tcpdump -tnnr patched.pcap | grep add-addr reading from file patched.pcap, link-type LINUX_SLL (Linux cooked v1), snapshot length 65535 IP 10.0.1.1.10000 > 10.0.1.2.38178: Flags [.], ack 101, win 509, options [nop,nop,TS val 3728873902 ecr 2732713192,mptcp add-addr v1 id 1 10.0.2.1:10100 hmac 0xbccdfcbe59292a1f,nop,nop], length 0 IP 10.0.1.2.38178 > 10.0.1.1.10000: Flags [.], ack 101, win 502, options [nop,nop,TS val 2732713195 ecr 3728873902,mptcp add-addr v1-echo id 1 10.0.2.1:10100,nop,nop], length 0 Fixes: 22fb85ffaefb ("mptcp: add port support for ADD_ADDR suboption writing") CC: stable@vger.kernel.org # 5.11+ Reviewed-by: Mat Martineau Acked-and-tested-by: Geliang Tang Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- net/mptcp/protocol.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 91827d9497669..e21a5bc36cf08 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -52,14 +52,15 @@ #define TCPOLEN_MPTCP_DSS_MAP64 14 #define TCPOLEN_MPTCP_DSS_CHECKSUM 2 #define TCPOLEN_MPTCP_ADD_ADDR 16 -#define TCPOLEN_MPTCP_ADD_ADDR_PORT 20 +#define TCPOLEN_MPTCP_ADD_ADDR_PORT 18 #define TCPOLEN_MPTCP_ADD_ADDR_BASE 8 -#define TCPOLEN_MPTCP_ADD_ADDR_BASE_PORT 12 +#define TCPOLEN_MPTCP_ADD_ADDR_BASE_PORT 10 #define TCPOLEN_MPTCP_ADD_ADDR6 28 -#define TCPOLEN_MPTCP_ADD_ADDR6_PORT 32 +#define TCPOLEN_MPTCP_ADD_ADDR6_PORT 30 #define TCPOLEN_MPTCP_ADD_ADDR6_BASE 20 -#define TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT 24 -#define TCPOLEN_MPTCP_PORT_LEN 4 +#define TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT 22 +#define TCPOLEN_MPTCP_PORT_LEN 2 +#define TCPOLEN_MPTCP_PORT_ALIGN 2 #define TCPOLEN_MPTCP_RM_ADDR_BASE 4 #define TCPOLEN_MPTCP_PRIO 3 #define TCPOLEN_MPTCP_PRIO_ALIGN 4 @@ -701,8 +702,9 @@ static inline unsigned int mptcp_add_addr_len(int family, bool echo, bool port) len = TCPOLEN_MPTCP_ADD_ADDR6_BASE; if (!echo) len += MPTCPOPT_THMAC_LEN; + /* account for 2 trailing 'nop' options */ if (port) - len += TCPOLEN_MPTCP_PORT_LEN; + len += TCPOLEN_MPTCP_PORT_LEN + TCPOLEN_MPTCP_PORT_ALIGN; return len; } -- GitLab From 179d0ba0c454057a65929c46af0d6ad986754781 Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Mon, 8 Mar 2021 01:13:55 -0800 Subject: [PATCH 357/839] net: qrtr: fix error return code of qrtr_sendmsg() When sock_alloc_send_skb() returns NULL to skb, no error return code of qrtr_sendmsg() is assigned. To fix this bug, rc is assigned with -ENOMEM in this case. Fixes: 194ccc88297a ("net: qrtr: Support decoding incoming v2 packets") Reported-by: TOTE Robot Signed-off-by: Jia-Ju Bai Signed-off-by: David S. Miller --- net/qrtr/qrtr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index 82d2eb8c21d13..edb6ac17cecab 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -958,8 +958,10 @@ static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) plen = (len + 3) & ~3; skb = sock_alloc_send_skb(sk, plen + QRTR_HDR_MAX_SIZE, msg->msg_flags & MSG_DONTWAIT, &rc); - if (!skb) + if (!skb) { + rc = -ENOMEM; goto out_node; + } skb_reserve(skb, QRTR_HDR_MAX_SIZE); -- GitLab From 1019d7923d9d4cc878a1a85d4fc2d6619cfe1a6a Mon Sep 17 00:00:00 2001 From: Tong Zhang Date: Sun, 7 Mar 2021 22:25:28 -0500 Subject: [PATCH 358/839] atm: fix a typo in the struct description phy_data means private PHY data not date Signed-off-by: Tong Zhang Signed-off-by: David S. Miller --- include/linux/atmdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h index 60cd25c0461b8..9b02961d65ee6 100644 --- a/include/linux/atmdev.h +++ b/include/linux/atmdev.h @@ -151,7 +151,7 @@ struct atm_dev { const char *type; /* device type name */ int number; /* device index */ void *dev_data; /* per-device data */ - void *phy_data; /* private PHY date */ + void *phy_data; /* private PHY data */ unsigned long flags; /* device flags (ATM_DF_*) */ struct list_head local; /* local ATM addresses */ struct list_head lecs; /* LECS ATM addresses learned via ILMI */ -- GitLab From 3153724fc084d8ef640c611f269ddfb576d1dcb1 Mon Sep 17 00:00:00 2001 From: Tong Zhang Date: Sun, 7 Mar 2021 22:25:29 -0500 Subject: [PATCH 359/839] atm: uPD98402: fix incorrect allocation dev->dev_data is set in zatm.c, calling zatm_start() will overwrite this dev->dev_data in uPD98402_start() and a subsequent PRIV(dev)->lock (i.e dev->phy_data->lock) will result in a null-ptr-dereference. I believe this is a typo and what it actually want to do is to allocate phy_data instead of dev_data. Signed-off-by: Tong Zhang Signed-off-by: David S. Miller --- drivers/atm/uPD98402.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/atm/uPD98402.c b/drivers/atm/uPD98402.c index 7850758b5bb82..239852d855589 100644 --- a/drivers/atm/uPD98402.c +++ b/drivers/atm/uPD98402.c @@ -211,7 +211,7 @@ static void uPD98402_int(struct atm_dev *dev) static int uPD98402_start(struct atm_dev *dev) { DPRINTK("phy_start\n"); - if (!(dev->dev_data = kmalloc(sizeof(struct uPD98402_priv),GFP_KERNEL))) + if (!(dev->phy_data = kmalloc(sizeof(struct uPD98402_priv),GFP_KERNEL))) return -ENOMEM; spin_lock_init(&PRIV(dev)->lock); memset(&PRIV(dev)->sonet_stats,0,sizeof(struct k_sonet_stats)); -- GitLab From 4416e98594dc04590ebc498fc4e530009535c511 Mon Sep 17 00:00:00 2001 From: Tong Zhang Date: Sun, 7 Mar 2021 22:25:30 -0500 Subject: [PATCH 360/839] atm: idt77252: fix null-ptr-dereference this one is similar to the phy_data allocation fix in uPD98402, the driver allocate the idt77105_priv and store to dev_data but later dereference using dev->dev_data, which will cause null-ptr-dereference. fix this issue by changing dev_data to phy_data so that PRIV(dev) can work correctly. Signed-off-by: Tong Zhang Signed-off-by: David S. Miller --- drivers/atm/idt77105.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/atm/idt77105.c b/drivers/atm/idt77105.c index 3c081b6171a8f..bfca7b8a6f31e 100644 --- a/drivers/atm/idt77105.c +++ b/drivers/atm/idt77105.c @@ -262,7 +262,7 @@ static int idt77105_start(struct atm_dev *dev) { unsigned long flags; - if (!(dev->dev_data = kmalloc(sizeof(struct idt77105_priv),GFP_KERNEL))) + if (!(dev->phy_data = kmalloc(sizeof(struct idt77105_priv),GFP_KERNEL))) return -ENOMEM; PRIV(dev)->dev = dev; spin_lock_irqsave(&idt77105_priv_lock, flags); @@ -337,7 +337,7 @@ static int idt77105_stop(struct atm_dev *dev) else idt77105_all = walk->next; dev->phy = NULL; - dev->dev_data = NULL; + dev->phy_data = NULL; kfree(walk); break; } -- GitLab From bf1bc694b6b0cf49756cb06f8f38501b9b2c5527 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Mon, 8 Mar 2021 12:00:47 -0300 Subject: [PATCH 361/839] cifs: print MIDs in decimal notation The MIDs are mostly printed as decimal, so let's make it consistent. Signed-off-by: Paulo Alcantara (SUSE) Reviewed-by: Aurelien Aptel Reviewed-by: Ronnie Sahlberg Signed-off-by: Steve French --- fs/cifs/cifs_debug.c | 2 +- fs/cifs/connect.c | 4 ++-- fs/cifs/smb2misc.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 3aedc484e4401..88a7958170ee0 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -207,7 +207,7 @@ static int cifs_debug_files_proc_show(struct seq_file *m, void *v) from_kuid(&init_user_ns, cfile->uid), cfile->dentry); #ifdef CONFIG_CIFS_DEBUG2 - seq_printf(m, " 0x%llx\n", cfile->fid.mid); + seq_printf(m, " %llu\n", cfile->fid.mid); #else seq_printf(m, "\n"); #endif /* CIFS_DEBUG2 */ diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 68642e3d42705..eec8a2052da22 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -741,7 +741,7 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server) spin_lock(&GlobalMid_Lock); list_for_each_safe(tmp, tmp2, &server->pending_mid_q) { mid_entry = list_entry(tmp, struct mid_q_entry, qhead); - cifs_dbg(FYI, "Clearing mid 0x%llx\n", mid_entry->mid); + cifs_dbg(FYI, "Clearing mid %llu\n", mid_entry->mid); kref_get(&mid_entry->refcount); mid_entry->mid_state = MID_SHUTDOWN; list_move(&mid_entry->qhead, &dispose_list); @@ -752,7 +752,7 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server) /* now walk dispose list and issue callbacks */ list_for_each_safe(tmp, tmp2, &dispose_list) { mid_entry = list_entry(tmp, struct mid_q_entry, qhead); - cifs_dbg(FYI, "Callback mid 0x%llx\n", mid_entry->mid); + cifs_dbg(FYI, "Callback mid %llu\n", mid_entry->mid); list_del_init(&mid_entry->qhead); mid_entry->callback(mid_entry); cifs_mid_q_entry_release(mid_entry); diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index 60d4bd1eae2b3..6e0ea19e710b4 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -767,7 +767,7 @@ smb2_cancelled_close_fid(struct work_struct *work) int rc; if (cancelled->mid) - cifs_tcon_dbg(VFS, "Close unmatched open for MID:%llx\n", + cifs_tcon_dbg(VFS, "Close unmatched open for MID:%llu\n", cancelled->mid); else cifs_tcon_dbg(VFS, "Close interrupted close\n"); -- GitLab From e3d100eae44b42f309c1366efb8397368f1cf8ed Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Mon, 8 Mar 2021 12:00:48 -0300 Subject: [PATCH 362/839] cifs: change noisy error message to FYI A customer has reported that their dmesg were being flooded by CIFS: VFS: \\server Cancelling wait for mid xxx cmd: a CIFS: VFS: \\server Cancelling wait for mid yyy cmd: b CIFS: VFS: \\server Cancelling wait for mid zzz cmd: c because some processes that were performing statfs(2) on the share had been interrupted due to their automount setup when certain users logged in and out. Change it to FYI as they should be mostly informative rather than error messages. Signed-off-by: Paulo Alcantara (SUSE) Reviewed-by: Aurelien Aptel Reviewed-by: Ronnie Sahlberg Signed-off-by: Steve French --- fs/cifs/transport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index e90a1d1380b06..15b72d2e8713d 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -1207,7 +1207,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, } if (rc != 0) { for (; i < num_rqst; i++) { - cifs_server_dbg(VFS, "Cancelling wait for mid %llu cmd: %d\n", + cifs_server_dbg(FYI, "Cancelling wait for mid %llu cmd: %d\n", midQ[i]->mid, le16_to_cpu(midQ[i]->command)); send_cancel(server, &rqst[i], midQ[i]); spin_lock(&GlobalMid_Lock); -- GitLab From 14302ee3301b3a77b331cc14efb95bf7184c73cc Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Mon, 8 Mar 2021 12:00:49 -0300 Subject: [PATCH 363/839] cifs: return proper error code in statfs(2) In cifs_statfs(), if server->ops->queryfs is not NULL, then we should use its return value rather than always returning 0. Instead, use rc variable as it is properly set to 0 in case there is no server->ops->queryfs. Signed-off-by: Paulo Alcantara (SUSE) Reviewed-by: Aurelien Aptel Reviewed-by: Ronnie Sahlberg CC: Signed-off-by: Steve French --- fs/cifs/cifsfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index d43e935d2df43..099ad9f3660bb 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -290,7 +290,7 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf) rc = server->ops->queryfs(xid, tcon, cifs_sb, buf); free_xid(xid); - return 0; + return rc; } static long cifs_fallocate(struct file *file, int mode, loff_t off, loff_t len) -- GitLab From 04ad69c342fc4de5bd23be9ef15ea7574fb1a87e Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Mon, 8 Mar 2021 12:00:50 -0300 Subject: [PATCH 364/839] cifs: do not send close in compound create+close requests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In case of interrupted syscalls, prevent sending CLOSE commands for compound CREATE+CLOSE requests by introducing an CIFS_CP_CREATE_CLOSE_OP flag to indicate lower layers that it should not send a CLOSE command to the MIDs corresponding the compound CREATE+CLOSE request. A simple reproducer: #!/bin/bash mount //server/share /mnt -o username=foo,password=*** tc qdisc add dev eth0 root netem delay 450ms stat -f /mnt &>/dev/null & pid=$! sleep 0.01 kill $pid tc qdisc del dev eth0 root umount /mnt Before patch: ... 6 0.256893470 192.168.122.2 → 192.168.122.15 SMB2 402 Create Request File: ;GetInfo Request FS_INFO/FileFsFullSizeInformation;Close Request 7 0.257144491 192.168.122.15 → 192.168.122.2 SMB2 498 Create Response File: ;GetInfo Response;Close Response 9 0.260798209 192.168.122.2 → 192.168.122.15 SMB2 146 Close Request File: 10 0.260841089 192.168.122.15 → 192.168.122.2 SMB2 130 Close Response, Error: STATUS_FILE_CLOSED Signed-off-by: Paulo Alcantara (SUSE) Reviewed-by: Ronnie Sahlberg Reviewed-by: Aurelien Aptel CC: Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 19 ++++++++++--------- fs/cifs/smb2inode.c | 1 + fs/cifs/smb2misc.c | 8 ++++---- fs/cifs/smb2ops.c | 10 +++++----- fs/cifs/smb2proto.h | 3 +-- fs/cifs/transport.c | 2 +- 6 files changed, 22 insertions(+), 21 deletions(-) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 3de3c5908a72d..31fc8695abd6d 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -257,7 +257,7 @@ struct smb_version_operations { /* verify the message */ int (*check_message)(char *, unsigned int, struct TCP_Server_Info *); bool (*is_oplock_break)(char *, struct TCP_Server_Info *); - int (*handle_cancelled_mid)(char *, struct TCP_Server_Info *); + int (*handle_cancelled_mid)(struct mid_q_entry *, struct TCP_Server_Info *); void (*downgrade_oplock)(struct TCP_Server_Info *server, struct cifsInodeInfo *cinode, __u32 oplock, unsigned int epoch, bool *purge_cache); @@ -1705,16 +1705,17 @@ static inline bool is_retryable_error(int error) #define CIFS_NO_RSP_BUF 0x040 /* no response buffer required */ /* Type of request operation */ -#define CIFS_ECHO_OP 0x080 /* echo request */ -#define CIFS_OBREAK_OP 0x0100 /* oplock break request */ -#define CIFS_NEG_OP 0x0200 /* negotiate request */ +#define CIFS_ECHO_OP 0x080 /* echo request */ +#define CIFS_OBREAK_OP 0x0100 /* oplock break request */ +#define CIFS_NEG_OP 0x0200 /* negotiate request */ +#define CIFS_CP_CREATE_CLOSE_OP 0x0400 /* compound create+close request */ /* Lower bitmask values are reserved by others below. */ -#define CIFS_SESS_OP 0x2000 /* session setup request */ -#define CIFS_OP_MASK 0x2380 /* mask request type */ +#define CIFS_SESS_OP 0x2000 /* session setup request */ +#define CIFS_OP_MASK 0x2780 /* mask request type */ -#define CIFS_HAS_CREDITS 0x0400 /* already has credits */ -#define CIFS_TRANSFORM_REQ 0x0800 /* transform request before sending */ -#define CIFS_NO_SRV_RSP 0x1000 /* there is no server response */ +#define CIFS_HAS_CREDITS 0x0400 /* already has credits */ +#define CIFS_TRANSFORM_REQ 0x0800 /* transform request before sending */ +#define CIFS_NO_SRV_RSP 0x1000 /* there is no server response */ /* Security Flags: indicate type of session setup needed */ #define CIFSSEC_MAY_SIGN 0x00001 diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index 1f900b81c34ae..a718dc77e604e 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -358,6 +358,7 @@ smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, if (cfile) goto after_close; /* Close */ + flags |= CIFS_CP_CREATE_CLOSE_OP; rqst[num_rqst].rq_iov = &vars->close_iov[0]; rqst[num_rqst].rq_nvec = 1; rc = SMB2_close_init(tcon, server, diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index 6e0ea19e710b4..b50164e2c88dc 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -844,14 +844,14 @@ smb2_handle_cancelled_close(struct cifs_tcon *tcon, __u64 persistent_fid, } int -smb2_handle_cancelled_mid(char *buffer, struct TCP_Server_Info *server) +smb2_handle_cancelled_mid(struct mid_q_entry *mid, struct TCP_Server_Info *server) { - struct smb2_sync_hdr *sync_hdr = (struct smb2_sync_hdr *)buffer; - struct smb2_create_rsp *rsp = (struct smb2_create_rsp *)buffer; + struct smb2_sync_hdr *sync_hdr = mid->resp_buf; + struct smb2_create_rsp *rsp = mid->resp_buf; struct cifs_tcon *tcon; int rc; - if (sync_hdr->Command != SMB2_CREATE || + if ((mid->optype & CIFS_CP_CREATE_CLOSE_OP) || sync_hdr->Command != SMB2_CREATE || sync_hdr->Status != STATUS_SUCCESS) return 0; diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index f5087295424c6..9bae7e8deb09e 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -1195,7 +1195,7 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon, struct TCP_Server_Info *server = cifs_pick_channel(ses); __le16 *utf16_path = NULL; int ea_name_len = strlen(ea_name); - int flags = 0; + int flags = CIFS_CP_CREATE_CLOSE_OP; int len; struct smb_rqst rqst[3]; int resp_buftype[3]; @@ -1573,7 +1573,7 @@ smb2_ioctl_query_info(const unsigned int xid, struct smb_query_info qi; struct smb_query_info __user *pqi; int rc = 0; - int flags = 0; + int flags = CIFS_CP_CREATE_CLOSE_OP; struct smb2_query_info_rsp *qi_rsp = NULL; struct smb2_ioctl_rsp *io_rsp = NULL; void *buffer = NULL; @@ -2577,7 +2577,7 @@ smb2_query_info_compound(const unsigned int xid, struct cifs_tcon *tcon, { struct cifs_ses *ses = tcon->ses; struct TCP_Server_Info *server = cifs_pick_channel(ses); - int flags = 0; + int flags = CIFS_CP_CREATE_CLOSE_OP; struct smb_rqst rqst[3]; int resp_buftype[3]; struct kvec rsp_iov[3]; @@ -2975,7 +2975,7 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, unsigned int sub_offset; unsigned int print_len; unsigned int print_offset; - int flags = 0; + int flags = CIFS_CP_CREATE_CLOSE_OP; struct smb_rqst rqst[3]; int resp_buftype[3]; struct kvec rsp_iov[3]; @@ -3157,7 +3157,7 @@ smb2_query_reparse_tag(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_open_parms oparms; struct cifs_fid fid; struct TCP_Server_Info *server = cifs_pick_channel(tcon->ses); - int flags = 0; + int flags = CIFS_CP_CREATE_CLOSE_OP; struct smb_rqst rqst[3]; int resp_buftype[3]; struct kvec rsp_iov[3]; diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 9565e27681a54..a2eb34a8d9c91 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -246,8 +246,7 @@ extern int SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon, extern int smb2_handle_cancelled_close(struct cifs_tcon *tcon, __u64 persistent_fid, __u64 volatile_fid); -extern int smb2_handle_cancelled_mid(char *buffer, - struct TCP_Server_Info *server); +extern int smb2_handle_cancelled_mid(struct mid_q_entry *mid, struct TCP_Server_Info *server); void smb2_cancelled_close_fid(struct work_struct *work); extern int SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_file_id, u64 volatile_file_id, diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 15b72d2e8713d..007d99437c771 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -101,7 +101,7 @@ static void _cifs_mid_q_entry_release(struct kref *refcount) if (midEntry->resp_buf && (midEntry->mid_flags & MID_WAIT_CANCELLED) && midEntry->mid_state == MID_RESPONSE_RECEIVED && server->ops->handle_cancelled_mid) - server->ops->handle_cancelled_mid(midEntry->resp_buf, server); + server->ops->handle_cancelled_mid(midEntry, server); midEntry->mid_state = MID_FREE; atomic_dec(&midCount); -- GitLab From f15c5c11abfbf8909eb30598315ecbec2311cfdc Mon Sep 17 00:00:00 2001 From: Simeon Simeonoff Date: Mon, 8 Mar 2021 20:48:35 +0200 Subject: [PATCH 365/839] ALSA: hda/ca0132: Add Sound BlasterX AE-5 Plus support The new AE-5 Plus model has a different Subsystem ID compared to the non-plus model. Adding the new id to the list of quirks. Signed-off-by: Simeon Simeonoff Cc: Link: https://lore.kernel.org/r/998cafbe10b648f724ee33570553f2d780a38963.camel@gmail.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_ca0132.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c index c966f49fa942a..b2b620f6c8320 100644 --- a/sound/pci/hda/patch_ca0132.c +++ b/sound/pci/hda/patch_ca0132.c @@ -1309,6 +1309,7 @@ static const struct snd_pci_quirk ca0132_quirks[] = { SND_PCI_QUIRK(0x1102, 0x0013, "Recon3D", QUIRK_R3D), SND_PCI_QUIRK(0x1102, 0x0018, "Recon3D", QUIRK_R3D), SND_PCI_QUIRK(0x1102, 0x0051, "Sound Blaster AE-5", QUIRK_AE5), + SND_PCI_QUIRK(0x1102, 0x0191, "Sound Blaster AE-5 Plus", QUIRK_AE5), SND_PCI_QUIRK(0x1102, 0x0081, "Sound Blaster AE-7", QUIRK_AE7), {} }; -- GitLab From 30dea07180de3aa0ad613af88431ef4e34b5ef68 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Tue, 9 Mar 2021 01:30:36 +0300 Subject: [PATCH 366/839] ALSA: usb-audio: fix NULL ptr dereference in usb_audio_probe syzbot reported null pointer dereference in usb_audio_probe. The problem was in case, when quirk == NULL. It's not an error condition, so quirk must be checked before dereferencing. Call Trace: usb_probe_interface+0x315/0x7f0 drivers/usb/core/driver.c:396 really_probe+0x291/0xe60 drivers/base/dd.c:554 driver_probe_device+0x26b/0x3d0 drivers/base/dd.c:740 __device_attach_driver+0x1d1/0x290 drivers/base/dd.c:846 bus_for_each_drv+0x15f/0x1e0 drivers/base/bus.c:431 __device_attach+0x228/0x4a0 drivers/base/dd.c:914 bus_probe_device+0x1e4/0x290 drivers/base/bus.c:491 device_add+0xbdb/0x1db0 drivers/base/core.c:3242 usb_set_configuration+0x113f/0x1910 drivers/usb/core/message.c:2164 usb_generic_driver_probe+0xba/0x100 drivers/usb/core/generic.c:238 usb_probe_device+0xd9/0x2c0 drivers/usb/core/driver.c:293 really_probe+0x291/0xe60 drivers/base/dd.c:554 driver_probe_device+0x26b/0x3d0 drivers/base/dd.c:740 __device_attach_driver+0x1d1/0x290 drivers/base/dd.c:846 bus_for_each_drv+0x15f/0x1e0 drivers/base/bus.c:431 __device_attach+0x228/0x4a0 drivers/base/dd.c:914 bus_probe_device+0x1e4/0x290 drivers/base/bus.c:491 device_add+0xbdb/0x1db0 drivers/base/core.c:3242 usb_new_device.cold+0x721/0x1058 drivers/usb/core/hub.c:2555 hub_port_connect drivers/usb/core/hub.c:5223 [inline] hub_port_connect_change drivers/usb/core/hub.c:5363 [inline] port_event drivers/usb/core/hub.c:5509 [inline] hub_event+0x2357/0x4320 drivers/usb/core/hub.c:5591 process_one_work+0x98d/0x1600 kernel/workqueue.c:2275 worker_thread+0x64c/0x1120 kernel/workqueue.c:2421 kthread+0x3b1/0x4a0 kernel/kthread.c:292 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:294 Reported-by: syzbot+719da9b149a931f5143f@syzkaller.appspotmail.com Fixes: 9799110825db ("ALSA: usb-audio: Disable USB autosuspend properly in setup_disable_autosuspend()") Signed-off-by: Pavel Skripkin Cc: Link: https://lore.kernel.org/r/f1ebad6e721412843bd1b12584444c0a63c6b2fb.1615242183.git.paskripkin@gmail.com Signed-off-by: Takashi Iwai --- sound/usb/card.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/usb/card.c b/sound/usb/card.c index 08c794883299e..3fd1743513b5e 100644 --- a/sound/usb/card.c +++ b/sound/usb/card.c @@ -830,7 +830,8 @@ static int usb_audio_probe(struct usb_interface *intf, snd_media_device_create(chip, intf); } - chip->quirk_type = quirk->type; + if (quirk) + chip->quirk_type = quirk->type; usb_chip[chip->index] = chip; chip->intf[chip->num_interfaces] = intf; -- GitLab From c5aa956eaeb05fe87e33433d7fd9f5e4d23c7416 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Tue, 9 Mar 2021 01:30:57 +0300 Subject: [PATCH 367/839] ALSA: usb-audio: fix use after free in usb_audio_disconnect The problem was in wrong "if" placement. chip->quirk_type is freed in snd_card_free_when_closed(), but inside if statement it's accesed. Fixes: 9799110825db ("ALSA: usb-audio: Disable USB autosuspend properly in setup_disable_autosuspend()") Signed-off-by: Pavel Skripkin Cc: Link: https://lore.kernel.org/r/16da19126ff461e5e64a9aec648cce28fb8ed73e.1615242183.git.paskripkin@gmail.com Signed-off-by: Takashi Iwai --- sound/usb/card.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/usb/card.c b/sound/usb/card.c index 3fd1743513b5e..b6f4c0848e661 100644 --- a/sound/usb/card.c +++ b/sound/usb/card.c @@ -907,6 +907,9 @@ static void usb_audio_disconnect(struct usb_interface *intf) } } + if (chip->quirk_type & QUIRK_SETUP_DISABLE_AUTOSUSPEND) + usb_enable_autosuspend(interface_to_usbdev(intf)); + chip->num_interfaces--; if (chip->num_interfaces <= 0) { usb_chip[chip->index] = NULL; @@ -915,9 +918,6 @@ static void usb_audio_disconnect(struct usb_interface *intf) } else { mutex_unlock(®ister_mutex); } - - if (chip->quirk_type & QUIRK_SETUP_DISABLE_AUTOSUSPEND) - usb_enable_autosuspend(interface_to_usbdev(intf)); } /* lock the shutdown (disconnect) task and autoresume */ -- GitLab From ca667a33207daeaf9c62b106815728718def60ec Mon Sep 17 00:00:00 2001 From: Karan Singhal Date: Tue, 16 Feb 2021 11:03:10 -0500 Subject: [PATCH 368/839] USB: serial: cp210x: add ID for Acuity Brands nLight Air Adapter IDs of nLight Air Adapter, Acuity Brands, Inc.: vid: 10c4 pid: 88d8 Signed-off-by: Karan Singhal Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/cp210x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index 9e1c609792fb0..afbc4f4ea21fe 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -145,6 +145,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x8857) }, /* CEL EM357 ZigBee USB Stick */ { USB_DEVICE(0x10C4, 0x88A4) }, /* MMB Networks ZigBee USB Device */ { USB_DEVICE(0x10C4, 0x88A5) }, /* Planet Innovation Ingeni ZigBee USB Device */ + { USB_DEVICE(0x10C4, 0x88D8) }, /* Acuity Brands nLight Air Adapter */ { USB_DEVICE(0x10C4, 0x88FB) }, /* CESINEL MEDCAL STII Network Analyzer */ { USB_DEVICE(0x10C4, 0x8938) }, /* CESINEL MEDCAL S II Network Analyzer */ { USB_DEVICE(0x10C4, 0x8946) }, /* Ketra N1 Wireless Interface */ -- GitLab From 42213a0190b535093a604945db05a4225bf43885 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Tue, 23 Feb 2021 17:44:18 +0100 Subject: [PATCH 369/839] USB: serial: cp210x: add some more GE USB IDs GE CS1000 has some more custom USB IDs for CP2102N; add them to the driver to have working auto-probing. Signed-off-by: Sebastian Reichel Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/cp210x.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index afbc4f4ea21fe..a373cd63b3a44 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -202,6 +202,8 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x1901, 0x0194) }, /* GE Healthcare Remote Alarm Box */ { USB_DEVICE(0x1901, 0x0195) }, /* GE B850/B650/B450 CP2104 DP UART interface */ { USB_DEVICE(0x1901, 0x0196) }, /* GE B850 CP2105 DP UART interface */ + { USB_DEVICE(0x1901, 0x0197) }, /* GE CS1000 Display serial interface */ + { USB_DEVICE(0x1901, 0x0198) }, /* GE CS1000 M.2 Key E serial interface */ { USB_DEVICE(0x199B, 0xBA30) }, /* LORD WSDA-200-USB */ { USB_DEVICE(0x19CF, 0x3000) }, /* Parrot NMEA GPS Flight Recorder */ { USB_DEVICE(0x1ADB, 0x0001) }, /* Schweitzer Engineering C662 Cable */ -- GitLab From f164f5d8a70245217bee89b246dc4fcfececa526 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 26 Feb 2021 11:08:26 +0100 Subject: [PATCH 370/839] USB: serial: xr: fix NULL-deref on disconnect Claiming the sibling control interface is a bit more involved and specifically requires adding support to USB-serial core for managing either interface being unbound first, something which could otherwise lead to a NULL-pointer dereference. Similarly, additional infrastructure is also needed to handle suspend properly. Since the driver currently isn't actually using the control interface, we can defer this for now by simply not claiming the control interface. Fixes: c2d405aa86b4 ("USB: serial: add MaxLinear/Exar USB to Serial driver") Reported-by: Mauro Carvalho Chehab Tested-by: Mauro Carvalho Chehab Reviewed-by: Greg Kroah-Hartman Reviewed-by: Manivannan Sadhasivam Signed-off-by: Johan Hovold --- drivers/usb/serial/xr_serial.c | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/drivers/usb/serial/xr_serial.c b/drivers/usb/serial/xr_serial.c index 483d07dee19de..0ca04906da4bf 100644 --- a/drivers/usb/serial/xr_serial.c +++ b/drivers/usb/serial/xr_serial.c @@ -545,37 +545,13 @@ static void xr_close(struct usb_serial_port *port) static int xr_probe(struct usb_serial *serial, const struct usb_device_id *id) { - struct usb_driver *driver = serial->type->usb_driver; - struct usb_interface *control_interface; - int ret; - /* Don't bind to control interface */ if (serial->interface->cur_altsetting->desc.bInterfaceNumber == 0) return -ENODEV; - /* But claim the control interface during data interface probe */ - control_interface = usb_ifnum_to_if(serial->dev, 0); - if (!control_interface) - return -ENODEV; - - ret = usb_driver_claim_interface(driver, control_interface, NULL); - if (ret) { - dev_err(&serial->interface->dev, "Failed to claim control interface\n"); - return ret; - } - return 0; } -static void xr_disconnect(struct usb_serial *serial) -{ - struct usb_driver *driver = serial->type->usb_driver; - struct usb_interface *control_interface; - - control_interface = usb_ifnum_to_if(serial->dev, 0); - usb_driver_release_interface(driver, control_interface); -} - static const struct usb_device_id id_table[] = { { USB_DEVICE(0x04e2, 0x1410) }, /* XR21V141X */ { } @@ -590,7 +566,6 @@ static struct usb_serial_driver xr_device = { .id_table = id_table, .num_ports = 1, .probe = xr_probe, - .disconnect = xr_disconnect, .open = xr_open, .close = xr_close, .break_ctl = xr_break_ctl, -- GitLab From 5563b3b6420362c8a1f468ca04afe6d5f0a8d0a3 Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Mon, 1 Mar 2021 17:16:12 -0300 Subject: [PATCH 371/839] USB: serial: ch341: add new Product ID Add PID for CH340 that's found on cheap programmers. The driver works flawlessly as soon as the new PID (0x9986) is added to it. These look like ANU232MI but ship with a ch341 inside. They have no special identifiers (mine only has the string "DB9D20130716" printed on the PCB and nothing identifiable on the packaging. The merchant i bought it from doesn't sell these anymore). the lsusb -v output is: Bus 001 Device 009: ID 9986:7523 Device Descriptor: bLength 18 bDescriptorType 1 bcdUSB 1.10 bDeviceClass 255 Vendor Specific Class bDeviceSubClass 0 bDeviceProtocol 0 bMaxPacketSize0 8 idVendor 0x9986 idProduct 0x7523 bcdDevice 2.54 iManufacturer 0 iProduct 0 iSerial 0 bNumConfigurations 1 Configuration Descriptor: bLength 9 bDescriptorType 2 wTotalLength 0x0027 bNumInterfaces 1 bConfigurationValue 1 iConfiguration 0 bmAttributes 0x80 (Bus Powered) MaxPower 96mA Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 0 bAlternateSetting 0 bNumEndpoints 3 bInterfaceClass 255 Vendor Specific Class bInterfaceSubClass 1 bInterfaceProtocol 2 iInterface 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x82 EP 2 IN bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0020 1x 32 bytes bInterval 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x02 EP 2 OUT bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0020 1x 32 bytes bInterval 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x81 EP 1 IN bmAttributes 3 Transfer Type Interrupt Synch Type None Usage Type Data wMaxPacketSize 0x0008 1x 8 bytes bInterval 1 Signed-off-by: Niv Sardi Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/ch341.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c index 8d997b71056f6..2db917eab7995 100644 --- a/drivers/usb/serial/ch341.c +++ b/drivers/usb/serial/ch341.c @@ -86,6 +86,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x1a86, 0x7522) }, { USB_DEVICE(0x1a86, 0x7523) }, { USB_DEVICE(0x4348, 0x5523) }, + { USB_DEVICE(0x9986, 0x7523) }, { }, }; MODULE_DEVICE_TABLE(usb, id_table); -- GitLab From cfdc67acc785e01a8719eeb7012709d245564701 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Tue, 2 Mar 2021 02:01:52 +0300 Subject: [PATCH 372/839] USB: serial: io_edgeport: fix memory leak in edge_startup sysbot found memory leak in edge_startup(). The problem was that when an error was received from the usb_submit_urb(), nothing was cleaned up. Reported-by: syzbot+59f777bdcbdd7eea5305@syzkaller.appspotmail.com Signed-off-by: Pavel Skripkin Fixes: 6e8cf7751f9f ("USB: add EPIC support to the io_edgeport driver") Cc: stable@vger.kernel.org # 2.6.21: c5c0c55598ce Signed-off-by: Johan Hovold --- drivers/usb/serial/io_edgeport.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/drivers/usb/serial/io_edgeport.c b/drivers/usb/serial/io_edgeport.c index a493670c06e61..68401adcffde5 100644 --- a/drivers/usb/serial/io_edgeport.c +++ b/drivers/usb/serial/io_edgeport.c @@ -3003,26 +3003,32 @@ static int edge_startup(struct usb_serial *serial) response = -ENODEV; } - usb_free_urb(edge_serial->interrupt_read_urb); - kfree(edge_serial->interrupt_in_buffer); - - usb_free_urb(edge_serial->read_urb); - kfree(edge_serial->bulk_in_buffer); - - kfree(edge_serial); - - return response; + goto error; } /* start interrupt read for this edgeport this interrupt will * continue as long as the edgeport is connected */ response = usb_submit_urb(edge_serial->interrupt_read_urb, GFP_KERNEL); - if (response) + if (response) { dev_err(ddev, "%s - Error %d submitting control urb\n", __func__, response); + + goto error; + } } return response; + +error: + usb_free_urb(edge_serial->interrupt_read_urb); + kfree(edge_serial->interrupt_in_buffer); + + usb_free_urb(edge_serial->read_urb); + kfree(edge_serial->bulk_in_buffer); + + kfree(edge_serial); + + return response; } -- GitLab From 673433e7c288927f7244658788f203c660d7a6f6 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Sun, 7 Mar 2021 17:20:13 +0000 Subject: [PATCH 373/839] dt-bindings/irq: Add compatible string for the JZ4760B Add the ingenic,jz4760b-intc compatible string with a fallback to the ingenic,jz4760-intc compatible string. Signed-off-by: Paul Cercueil Acked-by: Rob Herring Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210307172014.73481-1-paul@crapouillou.net --- .../devicetree/bindings/interrupt-controller/ingenic,intc.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/interrupt-controller/ingenic,intc.yaml b/Documentation/devicetree/bindings/interrupt-controller/ingenic,intc.yaml index 0a046be8d1cda..0358a7739c8e2 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/ingenic,intc.yaml +++ b/Documentation/devicetree/bindings/interrupt-controller/ingenic,intc.yaml @@ -23,6 +23,7 @@ properties: - enum: - ingenic,jz4775-intc - ingenic,jz4770-intc + - ingenic,jz4760b-intc - const: ingenic,jz4760-intc - items: - const: ingenic,x1000-intc -- GitLab From 5fbecd2389f48e1415799c63130d0cdce1cf3f60 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Sun, 7 Mar 2021 17:20:14 +0000 Subject: [PATCH 374/839] irqchip/ingenic: Add support for the JZ4760 Add support for the interrupt controller found in the JZ4760 SoC, which works exactly like the one in the JZ4770. Signed-off-by: Paul Cercueil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210307172014.73481-2-paul@crapouillou.net --- drivers/irqchip/irq-ingenic-tcu.c | 1 + drivers/irqchip/irq-ingenic.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/irqchip/irq-ingenic-tcu.c b/drivers/irqchip/irq-ingenic-tcu.c index 7a7222d4c19c0..b938d1d04d96e 100644 --- a/drivers/irqchip/irq-ingenic-tcu.c +++ b/drivers/irqchip/irq-ingenic-tcu.c @@ -179,5 +179,6 @@ err_free_tcu: } IRQCHIP_DECLARE(jz4740_tcu_irq, "ingenic,jz4740-tcu", ingenic_tcu_irq_init); IRQCHIP_DECLARE(jz4725b_tcu_irq, "ingenic,jz4725b-tcu", ingenic_tcu_irq_init); +IRQCHIP_DECLARE(jz4760_tcu_irq, "ingenic,jz4760-tcu", ingenic_tcu_irq_init); IRQCHIP_DECLARE(jz4770_tcu_irq, "ingenic,jz4770-tcu", ingenic_tcu_irq_init); IRQCHIP_DECLARE(x1000_tcu_irq, "ingenic,x1000-tcu", ingenic_tcu_irq_init); diff --git a/drivers/irqchip/irq-ingenic.c b/drivers/irqchip/irq-ingenic.c index b61a8901ef722..ea36bb00be80b 100644 --- a/drivers/irqchip/irq-ingenic.c +++ b/drivers/irqchip/irq-ingenic.c @@ -155,6 +155,7 @@ static int __init intc_2chip_of_init(struct device_node *node, { return ingenic_intc_of_init(node, 2); } +IRQCHIP_DECLARE(jz4760_intc, "ingenic,jz4760-intc", intc_2chip_of_init); IRQCHIP_DECLARE(jz4770_intc, "ingenic,jz4770-intc", intc_2chip_of_init); IRQCHIP_DECLARE(jz4775_intc, "ingenic,jz4775-intc", intc_2chip_of_init); IRQCHIP_DECLARE(jz4780_intc, "ingenic,jz4780-intc", intc_2chip_of_init); -- GitLab From dbaee836d60a8e1b03e7d53a37893235662ba124 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Fri, 5 Mar 2021 12:21:24 -0800 Subject: [PATCH 375/839] KVM: arm64: Don't use cbz/adr with external symbols allmodconfig + CONFIG_LTO_CLANG_THIN=y fails to build due to following linker errors: ld.lld: error: irqbypass.c:(function __guest_enter: .text+0x21CC): relocation R_AARCH64_CONDBR19 out of range: 2031220 is not in [-1048576, 1048575]; references hyp_panic >>> defined in vmlinux.o ld.lld: error: irqbypass.c:(function __guest_enter: .text+0x21E0): relocation R_AARCH64_ADR_PREL_LO21 out of range: 2031200 is not in [-1048576, 1048575]; references hyp_panic >>> defined in vmlinux.o This is because with LTO, the compiler ends up placing hyp_panic() more than 1MB away from __guest_enter(). Use an unconditional branch and adr_l instead to fix the issue. Link: https://github.com/ClangBuiltLinux/linux/issues/1317 Reported-by: Nathan Chancellor Suggested-by: Marc Zyngier Suggested-by: Ard Biesheuvel Signed-off-by: Sami Tolvanen Reviewed-by: Kees Cook Acked-by: Will Deacon Tested-by: Nathan Chancellor Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210305202124.3768527-1-samitolvanen@google.com --- arch/arm64/kvm/hyp/entry.S | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index 0c66a1d408fd7..e831d3dfd50d7 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -85,8 +85,10 @@ SYM_INNER_LABEL(__guest_exit_panic, SYM_L_GLOBAL) // If the hyp context is loaded, go straight to hyp_panic get_loaded_vcpu x0, x1 - cbz x0, hyp_panic + cbnz x0, 1f + b hyp_panic +1: // The hyp context is saved so make sure it is restored to allow // hyp_panic to run at hyp and, subsequently, panic to run in the host. // This makes use of __guest_exit to avoid duplication but sets the @@ -94,7 +96,7 @@ SYM_INNER_LABEL(__guest_exit_panic, SYM_L_GLOBAL) // current state is saved to the guest context but it will only be // accurate if the guest had been completely restored. adr_this_cpu x0, kvm_hyp_ctxt, x1 - adr x1, hyp_panic + adr_l x1, hyp_panic str x1, [x0, #CPU_XREG_OFFSET(30)] get_vcpu_ptr x1, x0 -- GitLab From 774514bf977377c9137640a0310bd64eed0f7323 Mon Sep 17 00:00:00 2001 From: Yann Gautier Date: Thu, 25 Feb 2021 15:54:54 +0100 Subject: [PATCH 376/839] mmc: mmci: Add MMC_CAP_NEED_RSP_BUSY for the stm32 variants An issue has been observed on STM32MP157C-EV1 board, with an erase command with secure erase argument, ending up waiting for ~4 hours before timeout. The requested busy timeout from the mmc core ends up with 14784000ms (~4 hours), but the supported host->max_busy_timeout is 86767ms, which leads to that the core switch to use an R1 response in favor of the R1B and polls for busy with the host->card_busy() ops. In this case the polling doesn't work as expected, as we never detects that the card stops signaling busy, which leads to the following message: mmc1: Card stuck being busy! __mmc_poll_for_busy The problem boils done to that the stm32 variants can't use R1 responses in favor of R1B responses, as it leads to an internal state machine in the controller to get stuck. To continue to process requests, it would need to be reset. To fix this problem, let's set MMC_CAP_NEED_RSP_BUSY for the stm32 variant, which prevent the mmc core from switching to R1 responses. Additionally, let's cap the cmd->busy_timeout to the host->max_busy_timeout, thus rely on 86767ms to be sufficient (~66 seconds was need for this test case). Fixes: 94fe2580a2f3 ("mmc: core: Enable erase/discard/trim support for all mmc hosts") Signed-off-by: Yann Gautier Link: https://lore.kernel.org/r/20210225145454.12780-1-yann.gautier@foss.st.com Cc: stable@vger.kernel.org [Ulf: Simplified the code and extended the commit message] Signed-off-by: Ulf Hansson --- drivers/mmc/host/mmci.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c index 17dbc81c221eb..984d350551567 100644 --- a/drivers/mmc/host/mmci.c +++ b/drivers/mmc/host/mmci.c @@ -1242,7 +1242,11 @@ mmci_start_command(struct mmci_host *host, struct mmc_command *cmd, u32 c) if (!cmd->busy_timeout) cmd->busy_timeout = 10 * MSEC_PER_SEC; - clks = (unsigned long long)cmd->busy_timeout * host->cclk; + if (cmd->busy_timeout > host->mmc->max_busy_timeout) + clks = (unsigned long long)host->mmc->max_busy_timeout * host->cclk; + else + clks = (unsigned long long)cmd->busy_timeout * host->cclk; + do_div(clks, MSEC_PER_SEC); writel_relaxed(clks, host->base + MMCIDATATIMER); } @@ -2151,6 +2155,10 @@ static int mmci_probe(struct amba_device *dev, mmc->caps |= MMC_CAP_WAIT_WHILE_BUSY; } + /* Variants with mandatory busy timeout in HW needs R1B responses. */ + if (variant->busy_timeout) + mmc->caps |= MMC_CAP_NEED_RSP_BUSY; + /* Prepare a CMD12 - needed to clear the DPSM on some variants. */ host->stop_abort.opcode = MMC_STOP_TRANSMISSION; host->stop_abort.arg = 0; -- GitLab From 66fbacccbab91e6e55d9c8f1fc0910a8eb6c81f7 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 3 Mar 2021 11:26:14 +0200 Subject: [PATCH 377/839] mmc: core: Fix partition switch time for eMMC Avoid the following warning by always defining partition switch time: [ 3.209874] mmc1: unspecified timeout for CMD6 - use generic [ 3.222780] ------------[ cut here ]------------ [ 3.233363] WARNING: CPU: 1 PID: 111 at drivers/mmc/core/mmc_ops.c:575 __mmc_switch+0x200/0x204 Reported-by: Paul Fertser Fixes: 1c447116d017 ("mmc: mmc: Fix partition switch timeout for some eMMCs") Signed-off-by: Adrian Hunter Link: https://lore.kernel.org/r/168bbfd6-0c5b-5ace-ab41-402e7937c46e@intel.com Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson --- drivers/mmc/core/mmc.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index 0d80b72ddde81..8741271d39712 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -423,10 +423,6 @@ static int mmc_decode_ext_csd(struct mmc_card *card, u8 *ext_csd) /* EXT_CSD value is in units of 10ms, but we store in ms */ card->ext_csd.part_time = 10 * ext_csd[EXT_CSD_PART_SWITCH_TIME]; - /* Some eMMC set the value too low so set a minimum */ - if (card->ext_csd.part_time && - card->ext_csd.part_time < MMC_MIN_PART_SWITCH_TIME) - card->ext_csd.part_time = MMC_MIN_PART_SWITCH_TIME; /* Sleep / awake timeout in 100ns units */ if (sa_shift > 0 && sa_shift <= 0x17) @@ -616,6 +612,17 @@ static int mmc_decode_ext_csd(struct mmc_card *card, u8 *ext_csd) card->ext_csd.data_sector_size = 512; } + /* + * GENERIC_CMD6_TIME is to be used "unless a specific timeout is defined + * when accessing a specific field", so use it here if there is no + * PARTITION_SWITCH_TIME. + */ + if (!card->ext_csd.part_time) + card->ext_csd.part_time = card->ext_csd.generic_cmd6_time; + /* Some eMMC set the value too low so set a minimum */ + if (card->ext_csd.part_time < MMC_MIN_PART_SWITCH_TIME) + card->ext_csd.part_time = MMC_MIN_PART_SWITCH_TIME; + /* eMMC v5 or later */ if (card->ext_csd.rev >= 7) { memcpy(card->ext_csd.fwrev, &ext_csd[EXT_CSD_FIRMWARE_VERSION], -- GitLab From f06391c45e83f9a731045deb23df7cc3814fd795 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Wed, 3 Mar 2021 11:42:48 -0600 Subject: [PATCH 378/839] mmc: cqhci: Fix random crash when remove mmc module/card [ 6684.493350] Unable to handle kernel paging request at virtual address ffff800011c5b0f0 [ 6684.498531] mmc0: card 0001 removed [ 6684.501556] Mem abort info: [ 6684.509681] ESR = 0x96000047 [ 6684.512786] EC = 0x25: DABT (current EL), IL = 32 bits [ 6684.518394] SET = 0, FnV = 0 [ 6684.521707] EA = 0, S1PTW = 0 [ 6684.524998] Data abort info: [ 6684.528236] ISV = 0, ISS = 0x00000047 [ 6684.532986] CM = 0, WnR = 1 [ 6684.536129] swapper pgtable: 4k pages, 48-bit VAs, pgdp=0000000081b22000 [ 6684.543923] [ffff800011c5b0f0] pgd=00000000bffff003, p4d=00000000bffff003, pud=00000000bfffe003, pmd=00000000900e1003, pte=0000000000000000 [ 6684.557915] Internal error: Oops: 96000047 [#1] PREEMPT SMP [ 6684.564240] Modules linked in: sdhci_esdhc_imx(-) sdhci_pltfm sdhci cqhci mmc_block mmc_core fsl_jr_uio caam_jr caamkeyblob_desc caamhash_desc caamalg_desc crypto_engine rng_core authenc libdes crct10dif_ce flexcan can_dev caam error [last unloaded: mmc_core] [ 6684.587281] CPU: 0 PID: 79138 Comm: kworker/0:3H Not tainted 5.10.9-01410-g3ba33182767b-dirty #10 [ 6684.596160] Hardware name: Freescale i.MX8DXL EVK (DT) [ 6684.601320] Workqueue: kblockd blk_mq_run_work_fn [ 6684.606094] pstate: 40000005 (nZcv daif -PAN -UAO -TCO BTYPE=--) [ 6684.612286] pc : cqhci_request+0x148/0x4e8 [cqhci] ^GMessage from syslogd@ at Thu Jan 1 01:51:24 1970 ...[ 6684.617085] lr : cqhci_request+0x314/0x4e8 [cqhci] [ 6684.626734] sp : ffff80001243b9f0 [ 6684.630049] x29: ffff80001243b9f0 x28: ffff00002c3dd000 [ 6684.635367] x27: 0000000000000001 x26: 0000000000000001 [ 6684.640690] x25: ffff00002c451000 x24: 000000000000000f [ 6684.646007] x23: ffff000017e71c80 x22: ffff00002c451000 [ 6684.651326] x21: ffff00002c0f3550 x20: ffff00002c0f3550 [ 6684.656651] x19: ffff000017d46880 x18: ffff00002cea1500 [ 6684.661977] x17: 0000000000000000 x16: 0000000000000000 [ 6684.667294] x15: 000001ee628e3ed1 x14: 0000000000000278 [ 6684.672610] x13: 0000000000000001 x12: 0000000000000001 [ 6684.677927] x11: 0000000000000000 x10: 0000000000000000 [ 6684.683243] x9 : 000000000000002b x8 : 0000000000001000 [ 6684.688560] x7 : 0000000000000010 x6 : ffff00002c0f3678 [ 6684.693886] x5 : 000000000000000f x4 : ffff800011c5b000 [ 6684.699211] x3 : 000000000002d988 x2 : 0000000000000008 [ 6684.704537] x1 : 00000000000000f0 x0 : 0002d9880008102f [ 6684.709854] Call trace: [ 6684.712313] cqhci_request+0x148/0x4e8 [cqhci] [ 6684.716803] mmc_cqe_start_req+0x58/0x68 [mmc_core] [ 6684.721698] mmc_blk_mq_issue_rq+0x460/0x810 [mmc_block] [ 6684.727018] mmc_mq_queue_rq+0x118/0x2b0 [mmc_block] The problem occurs when cqhci_request() get called after cqhci_disable() as it leads to access of allocated memory that has already been freed. Let's fix the problem by calling cqhci_disable() a bit later in the remove path. Signed-off-by: Frank Li Diagnosed-by: Adrian Hunter Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/20210303174248.542175-1-Frank.Li@nxp.com Fixes: f690f4409ddd ("mmc: mmc: Enable CQE's") Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson --- drivers/mmc/core/bus.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c index c2e70b757dd12..4383c262b3f5a 100644 --- a/drivers/mmc/core/bus.c +++ b/drivers/mmc/core/bus.c @@ -399,11 +399,6 @@ void mmc_remove_card(struct mmc_card *card) mmc_remove_card_debugfs(card); #endif - if (host->cqe_enabled) { - host->cqe_ops->cqe_disable(host); - host->cqe_enabled = false; - } - if (mmc_card_present(card)) { if (mmc_host_is_spi(card->host)) { pr_info("%s: SPI card removed\n", @@ -416,6 +411,10 @@ void mmc_remove_card(struct mmc_card *card) of_node_put(card->dev.of_node); } + if (host->cqe_enabled) { + host->cqe_ops->cqe_disable(host); + host->cqe_enabled = false; + } + put_device(&card->dev); } - -- GitLab From bd67b711bfaa02cf19e88aa2d9edae5c1c1d2739 Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Mon, 8 Mar 2021 10:24:47 +0100 Subject: [PATCH 379/839] MIPS: kernel: Reserve exception base early to prevent corruption BMIPS is one of the few platforms that do change the exception base. After commit 2dcb39645441 ("memblock: do not start bottom-up allocations with kernel_end") we started seeing BMIPS boards fail to boot with the built-in FDT being corrupted. Before the cited commit, early allocations would be in the [kernel_end, RAM_END] range, but after commit they would be within [RAM_START + PAGE_SIZE, RAM_END]. The custom exception base handler that is installed by bmips_ebase_setup() done for BMIPS5000 CPUs ends-up trampling on the memory region allocated by unflatten_and_copy_device_tree() thus corrupting the FDT used by the kernel. To fix this, we need to perform an early reservation of the custom exception space. Additional we reserve the first 4k (1k for R3k) for either normal exception vector space (legacy CPUs) or special vectors like cache exceptions. Huge thanks to Serge for analysing and proposing a solution to this issue. Fixes: 2dcb39645441 ("memblock: do not start bottom-up allocations with kernel_end") Reported-by: Kamal Dasu Debugged-by: Serge Semin Acked-by: Mike Rapoport Tested-by: Florian Fainelli Reviewed-by: Serge Semin Signed-off-by: Thomas Bogendoerfer --- arch/mips/include/asm/traps.h | 3 +++ arch/mips/kernel/cpu-probe.c | 6 ++++++ arch/mips/kernel/cpu-r3k-probe.c | 3 +++ arch/mips/kernel/traps.c | 10 +++++----- 4 files changed, 17 insertions(+), 5 deletions(-) diff --git a/arch/mips/include/asm/traps.h b/arch/mips/include/asm/traps.h index 6aa8f126a43d8..b710e76c9c658 100644 --- a/arch/mips/include/asm/traps.h +++ b/arch/mips/include/asm/traps.h @@ -24,8 +24,11 @@ extern void (*board_ebase_setup)(void); extern void (*board_cache_error_setup)(void); extern int register_nmi_notifier(struct notifier_block *nb); +extern void reserve_exception_space(phys_addr_t addr, unsigned long size); extern char except_vec_nmi[]; +#define VECTORSPACING 0x100 /* for EI/VI mode */ + #define nmi_notifier(fn, pri) \ ({ \ static struct notifier_block fn##_nb = { \ diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index 9a89637b4ecfa..b71892064f273 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include "fpu-probe.h" @@ -1628,6 +1629,7 @@ static inline void cpu_probe_broadcom(struct cpuinfo_mips *c, unsigned int cpu) c->cputype = CPU_BMIPS3300; __cpu_name[cpu] = "Broadcom BMIPS3300"; set_elf_platform(cpu, "bmips3300"); + reserve_exception_space(0x400, VECTORSPACING * 64); break; case PRID_IMP_BMIPS43XX: { int rev = c->processor_id & PRID_REV_MASK; @@ -1638,6 +1640,7 @@ static inline void cpu_probe_broadcom(struct cpuinfo_mips *c, unsigned int cpu) __cpu_name[cpu] = "Broadcom BMIPS4380"; set_elf_platform(cpu, "bmips4380"); c->options |= MIPS_CPU_RIXI; + reserve_exception_space(0x400, VECTORSPACING * 64); } else { c->cputype = CPU_BMIPS4350; __cpu_name[cpu] = "Broadcom BMIPS4350"; @@ -1654,6 +1657,7 @@ static inline void cpu_probe_broadcom(struct cpuinfo_mips *c, unsigned int cpu) __cpu_name[cpu] = "Broadcom BMIPS5000"; set_elf_platform(cpu, "bmips5000"); c->options |= MIPS_CPU_ULRI | MIPS_CPU_RIXI; + reserve_exception_space(0x1000, VECTORSPACING * 64); break; } } @@ -2133,6 +2137,8 @@ void cpu_probe(void) if (cpu == 0) __ua_limit = ~((1ull << cpu_vmbits) - 1); #endif + + reserve_exception_space(0, 0x1000); } void cpu_report(void) diff --git a/arch/mips/kernel/cpu-r3k-probe.c b/arch/mips/kernel/cpu-r3k-probe.c index abdbbe8c5a43a..af654771918cd 100644 --- a/arch/mips/kernel/cpu-r3k-probe.c +++ b/arch/mips/kernel/cpu-r3k-probe.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "fpu-probe.h" @@ -158,6 +159,8 @@ void cpu_probe(void) cpu_set_fpu_opts(c); else cpu_set_nofpu_opts(c); + + reserve_exception_space(0, 0x400); } void cpu_report(void) diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index e0352958e2f72..808b8b61ded15 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -2009,13 +2009,16 @@ void __noreturn nmi_exception_handler(struct pt_regs *regs) nmi_exit(); } -#define VECTORSPACING 0x100 /* for EI/VI mode */ - unsigned long ebase; EXPORT_SYMBOL_GPL(ebase); unsigned long exception_handlers[32]; unsigned long vi_handlers[64]; +void reserve_exception_space(phys_addr_t addr, unsigned long size) +{ + memblock_reserve(addr, size); +} + void __init *set_except_vector(int n, void *addr) { unsigned long handler = (unsigned long) addr; @@ -2367,10 +2370,7 @@ void __init trap_init(void) if (!cpu_has_mips_r2_r6) { ebase = CAC_BASE; - ebase_pa = virt_to_phys((void *)ebase); vec_size = 0x400; - - memblock_reserve(ebase_pa, vec_size); } else { if (cpu_has_veic || cpu_has_vint) vec_size = 0x200 + VECTORSPACING*64; -- GitLab From cea15316ceee2d4a51dfdecd79e08a438135416c Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Thu, 4 Mar 2021 07:34:11 +0530 Subject: [PATCH 380/839] powerpc/64s: Fix instruction encoding for lis in ppc_function_entry() 'lis r2,N' is 'addis r2,0,N' and the instruction encoding in the macro LIS_R2 is incorrect (it currently maps to 'addis r0,r2,N'). Fix the same. Fixes: c71b7eff426f ("powerpc: Add ABIv2 support to ppc_function_entry") Cc: stable@vger.kernel.org # v3.16+ Reported-by: Jiri Olsa Signed-off-by: Naveen N. Rao Acked-by: Segher Boessenkool Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210304020411.16796-1-naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/include/asm/code-patching.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h index eacc9102c2515..d5b3c3bb95b40 100644 --- a/arch/powerpc/include/asm/code-patching.h +++ b/arch/powerpc/include/asm/code-patching.h @@ -73,7 +73,7 @@ void __patch_exception(int exc, unsigned long addr); #endif #define OP_RT_RA_MASK 0xffff0000UL -#define LIS_R2 0x3c020000UL +#define LIS_R2 0x3c400000UL #define ADDIS_R2_R12 0x3c4c0000UL #define ADDI_R2_R2 0x38420000UL -- GitLab From 545ac14c16b5dbd909d5a90ddf5b5a629a40fa94 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 3 Mar 2021 15:17:13 +0100 Subject: [PATCH 381/839] x86/sev-es: Check regs->sp is trusted before adjusting #VC IST stack The code in the NMI handler to adjust the #VC handler IST stack is needed in case an NMI hits when the #VC handler is still using its IST stack. But the check for this condition also needs to look if the regs->sp value is trusted, meaning it was not set by user-space. Extend the check to not use regs->sp when the NMI interrupted user-space code or the SYSCALL gap. Fixes: 315562c9af3d5 ("x86/sev-es: Adjust #VC IST Stack on entering NMI handler") Reported-by: Andy Lutomirski Signed-off-by: Joerg Roedel Signed-off-by: Borislav Petkov Cc: stable@vger.kernel.org # 5.10+ Link: https://lkml.kernel.org/r/20210303141716.29223-3-joro@8bytes.org --- arch/x86/kernel/sev-es.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev-es.c index 84c1821819afb..301f20f6d4dd0 100644 --- a/arch/x86/kernel/sev-es.c +++ b/arch/x86/kernel/sev-es.c @@ -121,8 +121,18 @@ static void __init setup_vc_stacks(int cpu) cea_set_pte((void *)vaddr, pa, PAGE_KERNEL); } -static __always_inline bool on_vc_stack(unsigned long sp) +static __always_inline bool on_vc_stack(struct pt_regs *regs) { + unsigned long sp = regs->sp; + + /* User-mode RSP is not trusted */ + if (user_mode(regs)) + return false; + + /* SYSCALL gap still has user-mode RSP */ + if (ip_within_syscall_gap(regs)) + return false; + return ((sp >= __this_cpu_ist_bottom_va(VC)) && (sp < __this_cpu_ist_top_va(VC))); } @@ -144,7 +154,7 @@ void noinstr __sev_es_ist_enter(struct pt_regs *regs) old_ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]); /* Make room on the IST stack */ - if (on_vc_stack(regs->sp)) + if (on_vc_stack(regs)) new_ist = ALIGN_DOWN(regs->sp, 8) - sizeof(old_ist); else new_ist = old_ist - sizeof(old_ist); -- GitLab From 62441a1fb53263bda349b6e5997c3cc5c120d89e Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 3 Mar 2021 15:17:15 +0100 Subject: [PATCH 382/839] x86/sev-es: Correctly track IRQ states in runtime #VC handler Call irqentry_nmi_enter()/irqentry_nmi_exit() in the #VC handler to correctly track the IRQ state during its execution. Fixes: 0786138c78e79 ("x86/sev-es: Add a Runtime #VC Exception Handler") Reported-by: Andy Lutomirski Signed-off-by: Joerg Roedel Signed-off-by: Borislav Petkov Cc: stable@vger.kernel.org # v5.10+ Link: https://lkml.kernel.org/r/20210303141716.29223-5-joro@8bytes.org --- arch/x86/kernel/sev-es.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev-es.c index 301f20f6d4dd0..c3fd8fa79838e 100644 --- a/arch/x86/kernel/sev-es.c +++ b/arch/x86/kernel/sev-es.c @@ -1258,13 +1258,12 @@ static __always_inline bool on_vc_fallback_stack(struct pt_regs *regs) DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication) { struct sev_es_runtime_data *data = this_cpu_read(runtime_data); + irqentry_state_t irq_state; struct ghcb_state state; struct es_em_ctxt ctxt; enum es_result result; struct ghcb *ghcb; - lockdep_assert_irqs_disabled(); - /* * Handle #DB before calling into !noinstr code to avoid recursive #DB. */ @@ -1273,6 +1272,8 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication) return; } + irq_state = irqentry_nmi_enter(regs); + lockdep_assert_irqs_disabled(); instrumentation_begin(); /* @@ -1335,6 +1336,7 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication) out: instrumentation_end(); + irqentry_nmi_exit(regs, irq_state); return; -- GitLab From bffe30dd9f1f3b2608a87ac909a224d6be472485 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 3 Mar 2021 15:17:16 +0100 Subject: [PATCH 383/839] x86/sev-es: Use __copy_from_user_inatomic() The #VC handler must run in atomic context and cannot sleep. This is a problem when it tries to fetch instruction bytes from user-space via copy_from_user(). Introduce a insn_fetch_from_user_inatomic() helper which uses __copy_from_user_inatomic() to safely copy the instruction bytes to kernel memory in the #VC handler. Fixes: 5e3427a7bc432 ("x86/sev-es: Handle instruction fetches from user-space") Signed-off-by: Joerg Roedel Signed-off-by: Borislav Petkov Cc: stable@vger.kernel.org # v5.10+ Link: https://lkml.kernel.org/r/20210303141716.29223-6-joro@8bytes.org --- arch/x86/include/asm/insn-eval.h | 2 + arch/x86/kernel/sev-es.c | 2 +- arch/x86/lib/insn-eval.c | 66 +++++++++++++++++++++++++------- 3 files changed, 55 insertions(+), 15 deletions(-) diff --git a/arch/x86/include/asm/insn-eval.h b/arch/x86/include/asm/insn-eval.h index a0f839aa144d9..98b4dae5e8bc8 100644 --- a/arch/x86/include/asm/insn-eval.h +++ b/arch/x86/include/asm/insn-eval.h @@ -23,6 +23,8 @@ unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx); int insn_get_code_seg_params(struct pt_regs *regs); int insn_fetch_from_user(struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE]); +int insn_fetch_from_user_inatomic(struct pt_regs *regs, + unsigned char buf[MAX_INSN_SIZE]); bool insn_decode(struct insn *insn, struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE], int buf_size); diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev-es.c index c3fd8fa79838e..04a780abb512d 100644 --- a/arch/x86/kernel/sev-es.c +++ b/arch/x86/kernel/sev-es.c @@ -258,7 +258,7 @@ static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt) int res; if (user_mode(ctxt->regs)) { - res = insn_fetch_from_user(ctxt->regs, buffer); + res = insn_fetch_from_user_inatomic(ctxt->regs, buffer); if (!res) { ctxt->fi.vector = X86_TRAP_PF; ctxt->fi.error_code = X86_PF_INSTR | X86_PF_USER; diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c index 4229950a5d78c..bb0b3fe1e0a02 100644 --- a/arch/x86/lib/insn-eval.c +++ b/arch/x86/lib/insn-eval.c @@ -1415,6 +1415,25 @@ void __user *insn_get_addr_ref(struct insn *insn, struct pt_regs *regs) } } +static unsigned long insn_get_effective_ip(struct pt_regs *regs) +{ + unsigned long seg_base = 0; + + /* + * If not in user-space long mode, a custom code segment could be in + * use. This is true in protected mode (if the process defined a local + * descriptor table), or virtual-8086 mode. In most of the cases + * seg_base will be zero as in USER_CS. + */ + if (!user_64bit_mode(regs)) { + seg_base = insn_get_seg_base(regs, INAT_SEG_REG_CS); + if (seg_base == -1L) + return 0; + } + + return seg_base + regs->ip; +} + /** * insn_fetch_from_user() - Copy instruction bytes from user-space memory * @regs: Structure with register values as seen when entering kernel mode @@ -1431,24 +1450,43 @@ void __user *insn_get_addr_ref(struct insn *insn, struct pt_regs *regs) */ int insn_fetch_from_user(struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE]) { - unsigned long seg_base = 0; + unsigned long ip; int not_copied; - /* - * If not in user-space long mode, a custom code segment could be in - * use. This is true in protected mode (if the process defined a local - * descriptor table), or virtual-8086 mode. In most of the cases - * seg_base will be zero as in USER_CS. - */ - if (!user_64bit_mode(regs)) { - seg_base = insn_get_seg_base(regs, INAT_SEG_REG_CS); - if (seg_base == -1L) - return 0; - } + ip = insn_get_effective_ip(regs); + if (!ip) + return 0; + + not_copied = copy_from_user(buf, (void __user *)ip, MAX_INSN_SIZE); + return MAX_INSN_SIZE - not_copied; +} + +/** + * insn_fetch_from_user_inatomic() - Copy instruction bytes from user-space memory + * while in atomic code + * @regs: Structure with register values as seen when entering kernel mode + * @buf: Array to store the fetched instruction + * + * Gets the linear address of the instruction and copies the instruction bytes + * to the buf. This function must be used in atomic context. + * + * Returns: + * + * Number of instruction bytes copied. + * + * 0 if nothing was copied. + */ +int insn_fetch_from_user_inatomic(struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE]) +{ + unsigned long ip; + int not_copied; + + ip = insn_get_effective_ip(regs); + if (!ip) + return 0; - not_copied = copy_from_user(buf, (void __user *)(seg_base + regs->ip), - MAX_INSN_SIZE); + not_copied = __copy_from_user_inatomic(buf, (void __user *)ip, MAX_INSN_SIZE); return MAX_INSN_SIZE - not_copied; } -- GitLab From 86c83365ab76e4b43cedd3ce07a07d32a4dc79ba Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Mon, 8 Mar 2021 17:10:23 +0100 Subject: [PATCH 384/839] arm64: kasan: fix page_alloc tagging with DEBUG_VIRTUAL When CONFIG_DEBUG_VIRTUAL is enabled, the default page_to_virt() macro implementation from include/linux/mm.h is used. That definition doesn't account for KASAN tags, which leads to no tags on page_alloc allocations. Provide an arm64-specific definition for page_to_virt() when CONFIG_DEBUG_VIRTUAL is enabled that takes care of KASAN tags. Fixes: 2813b9c02962 ("kasan, mm, arm64: tag non slab memory allocated via pagealloc") Cc: Signed-off-by: Andrey Konovalov Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/4b55b35202706223d3118230701c6a59749d9b72.1615219501.git.andreyknvl@google.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/memory.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index c759faf7a1ff9..0aabc3be9a759 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -328,6 +328,11 @@ static inline void *phys_to_virt(phys_addr_t x) #define ARCH_PFN_OFFSET ((unsigned long)PHYS_PFN_OFFSET) #if !defined(CONFIG_SPARSEMEM_VMEMMAP) || defined(CONFIG_DEBUG_VIRTUAL) +#define page_to_virt(x) ({ \ + __typeof__(x) __page = x; \ + void *__addr = __va(page_to_phys(__page)); \ + (void *)__tag_set((const void *)__addr, page_kasan_tag(__page));\ +}) #define virt_to_page(x) pfn_to_page(virt_to_pfn(x)) #else #define page_to_virt(x) ({ \ -- GitLab From 4aa5e002034f0701c3335379fd6c22d7f3338cce Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 8 Mar 2021 10:51:51 -0500 Subject: [PATCH 385/839] Revert "nfsd4: remove check_conflicting_opens warning" This reverts commit 50747dd5e47b "nfsd4: remove check_conflicting_opens warning", as a prerequisite for reverting 94415b06eb8a, which has a serious bug. Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields Signed-off-by: Chuck Lever --- fs/nfsd/nfs4state.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 61552e89bd89d..482e241c96391 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4952,6 +4952,7 @@ static int nfsd4_check_conflicting_opens(struct nfs4_client *clp, writes--; if (fp->fi_fds[O_RDWR]) writes--; + WARN_ON_ONCE(writes < 0); if (writes > 0) return -EAGAIN; spin_lock(&fp->fi_lock); -- GitLab From 6ee65a773096ab3f39d9b00311ac983be5bdeb7c Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 8 Mar 2021 10:52:29 -0500 Subject: [PATCH 386/839] Revert "nfsd4: a client's own opens needn't prevent delegations" This reverts commit 94415b06eb8aed13481646026dc995f04a3a534a. That commit claimed to allow a client to get a read delegation when it was the only writer. Actually it allowed a client to get a read delegation when *any* client has a write open! The main problem is that it's depending on nfs4_clnt_odstate structures that are actually only maintained for pnfs exports. This causes clients to miss writes performed by other clients, even when there have been intervening closes and opens, violating close-to-open cache consistency. We can do this a different way, but first we should just revert this. I've added pynfs 4.1 test DELEG19 to test for this, as I should have done originally! Cc: stable@vger.kernel.org Reported-by: Timo Rothenpieler Signed-off-by: J. Bruce Fields Signed-off-by: Chuck Lever --- fs/locks.c | 3 --- fs/nfsd/nfs4state.c | 54 ++++++++++++--------------------------------- 2 files changed, 14 insertions(+), 43 deletions(-) diff --git a/fs/locks.c b/fs/locks.c index 99ca97e81b7a9..6125d2de39b8b 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1808,9 +1808,6 @@ check_conflicting_open(struct file *filp, const long arg, int flags) if (flags & FL_LAYOUT) return 0; - if (flags & FL_DELEG) - /* We leave these checks to the caller. */ - return 0; if (arg == F_RDLCK) return inode_is_open_for_write(inode) ? -EAGAIN : 0; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 482e241c96391..97447a64bad0b 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4940,32 +4940,6 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp, return fl; } -static int nfsd4_check_conflicting_opens(struct nfs4_client *clp, - struct nfs4_file *fp) -{ - struct nfs4_clnt_odstate *co; - struct file *f = fp->fi_deleg_file->nf_file; - struct inode *ino = locks_inode(f); - int writes = atomic_read(&ino->i_writecount); - - if (fp->fi_fds[O_WRONLY]) - writes--; - if (fp->fi_fds[O_RDWR]) - writes--; - WARN_ON_ONCE(writes < 0); - if (writes > 0) - return -EAGAIN; - spin_lock(&fp->fi_lock); - list_for_each_entry(co, &fp->fi_clnt_odstate, co_perfile) { - if (co->co_client != clp) { - spin_unlock(&fp->fi_lock); - return -EAGAIN; - } - } - spin_unlock(&fp->fi_lock); - return 0; -} - static struct nfs4_delegation * nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, struct nfs4_file *fp, struct nfs4_clnt_odstate *odstate) @@ -4985,12 +4959,9 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, nf = find_readable_file(fp); if (!nf) { - /* - * We probably could attempt another open and get a read - * delegation, but for now, don't bother until the - * client actually sends us one. - */ - return ERR_PTR(-EAGAIN); + /* We should always have a readable file here */ + WARN_ON_ONCE(1); + return ERR_PTR(-EBADF); } spin_lock(&state_lock); spin_lock(&fp->fi_lock); @@ -5020,19 +4991,11 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, if (!fl) goto out_clnt_odstate; - status = nfsd4_check_conflicting_opens(clp, fp); - if (status) { - locks_free_lock(fl); - goto out_clnt_odstate; - } status = vfs_setlease(fp->fi_deleg_file->nf_file, fl->fl_type, &fl, NULL); if (fl) locks_free_lock(fl); if (status) goto out_clnt_odstate; - status = nfsd4_check_conflicting_opens(clp, fp); - if (status) - goto out_clnt_odstate; spin_lock(&state_lock); spin_lock(&fp->fi_lock); @@ -5114,6 +5077,17 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, goto out_no_deleg; if (!cb_up || !(oo->oo_flags & NFS4_OO_CONFIRMED)) goto out_no_deleg; + /* + * Also, if the file was opened for write or + * create, there's a good chance the client's + * about to write to it, resulting in an + * immediate recall (since we don't support + * write delegations): + */ + if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) + goto out_no_deleg; + if (open->op_create == NFS4_OPEN_CREATE) + goto out_no_deleg; break; default: goto out_no_deleg; -- GitLab From 5808fecc572391867fcd929662b29c12e6d08d81 Mon Sep 17 00:00:00 2001 From: Ritesh Harjani Date: Tue, 9 Mar 2021 09:29:11 -0800 Subject: [PATCH 387/839] iomap: Fix negative assignment to unsigned sis->pages in iomap_swapfile_activate In case if isi.nr_pages is 0, we are making sis->pages (which is unsigned int) a huge value in iomap_swapfile_activate() by assigning -1. This could cause a kernel crash in kernel v4.18 (with below signature). Or could lead to unknown issues on latest kernel if the fake big swap gets used. Fix this issue by returning -EINVAL in case of nr_pages is 0, since it is anyway a invalid swapfile. Looks like this issue will be hit when we have pagesize < blocksize type of configuration. I was able to hit the issue in case of a tiny swap file with below test script. https://raw.githubusercontent.com/riteshharjani/LinuxStudy/master/scripts/swap-issue.sh kernel crash analysis on v4.18 ============================== On v4.18 kernel, it causes a kernel panic, since sis->pages becomes a huge value and isi.nr_extents is 0. When 0 is returned it is considered as a swapfile over NFS and SWP_FILE is set (sis->flags |= SWP_FILE). Then when swapoff was getting called it was calling a_ops->swap_deactivate() if (sis->flags & SWP_FILE) is true. Since a_ops->swap_deactivate() is NULL in case of XFS, it causes below panic. Panic signature on v4.18 kernel: ======================================= root@qemu:/home/qemu# [ 8291.723351] XFS (loop2): Unmounting Filesystem [ 8292.123104] XFS (loop2): Mounting V5 Filesystem [ 8292.132451] XFS (loop2): Ending clean mount [ 8292.263362] Adding 4294967232k swap on /mnt1/test/swapfile. Priority:-2 extents:1 across:274877906880k [ 8292.277834] Unable to handle kernel paging request for instruction fetch [ 8292.278677] Faulting instruction address: 0x00000000 cpu 0x19: Vector: 400 (Instruction Access) at [c0000009dd5b7ad0] pc: 0000000000000000 lr: c0000000003eb9dc: destroy_swap_extents+0xfc/0x120 sp: c0000009dd5b7d50 msr: 8000000040009033 current = 0xc0000009b6710080 paca = 0xc00000003ffcb280 irqmask: 0x03 irq_happened: 0x01 pid = 5604, comm = swapoff Linux version 4.18.0 (riteshh@xxxxxxx) (gcc version 8.4.0 (Ubuntu 8.4.0-1ubuntu1~18.04)) #57 SMP Wed Mar 3 01:33:04 CST 2021 enter ? for help [link register ] c0000000003eb9dc destroy_swap_extents+0xfc/0x120 [c0000009dd5b7d50] c0000000025a7058 proc_poll_event+0x0/0x4 (unreliable) [c0000009dd5b7da0] c0000000003f0498 sys_swapoff+0x3f8/0x910 [c0000009dd5b7e30] c00000000000bbe4 system_call+0x5c/0x70 Exception: c01 (System Call) at 00007ffff7d208d8 Signed-off-by: Ritesh Harjani [djwong: rework the comment to provide more details] Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/iomap/swapfile.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/iomap/swapfile.c b/fs/iomap/swapfile.c index a648dbf6991e4..a5e478de14174 100644 --- a/fs/iomap/swapfile.c +++ b/fs/iomap/swapfile.c @@ -170,6 +170,16 @@ int iomap_swapfile_activate(struct swap_info_struct *sis, return ret; } + /* + * If this swapfile doesn't contain even a single page-aligned + * contiguous range of blocks, reject this useless swapfile to + * prevent confusion later on. + */ + if (isi.nr_pages == 0) { + pr_warn("swapon: Cannot find a single usable page in file.\n"); + return -EINVAL; + } + *pagespan = 1 + isi.highest_ppage - isi.lowest_ppage; sis->max = isi.nr_pages; sis->pages = isi.nr_pages - 1; -- GitLab From b5a08423da9da59c7f38ed8dbb6dd6cbbe9024a4 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 2 Mar 2021 09:32:52 -0800 Subject: [PATCH 388/839] xfs: fix quota accounting when a mount is idmapped Nowadays, we indirectly use the idmap-aware helper functions in the VFS to set the initial uid and gid of a file being created. Unfortunately, we didn't convert the quota code, which means we attach the wrong dquots to files created on an idmapped mount. Fixes: f736d93d76d3 ("xfs: support idmapped mounts") Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Acked-by: Christian Brauner Reviewed-by: Dave Chinner --- fs/xfs/xfs_inode.c | 14 ++++++++------ fs/xfs/xfs_symlink.c | 3 ++- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 46a861d55e487..f93370bd7b1e3 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1007,9 +1007,10 @@ xfs_create( /* * Make sure that we have allocated dquot(s) on disk. */ - error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid, - XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, - &udqp, &gdqp, &pdqp); + error = xfs_qm_vop_dqalloc(dp, fsuid_into_mnt(mnt_userns), + fsgid_into_mnt(mnt_userns), prid, + XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, + &udqp, &gdqp, &pdqp); if (error) return error; @@ -1157,9 +1158,10 @@ xfs_create_tmpfile( /* * Make sure that we have allocated dquot(s) on disk. */ - error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid, - XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, - &udqp, &gdqp, &pdqp); + error = xfs_qm_vop_dqalloc(dp, fsuid_into_mnt(mnt_userns), + fsgid_into_mnt(mnt_userns), prid, + XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, + &udqp, &gdqp, &pdqp); if (error) return error; diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index 1379013d74b88..7f368b10ded1b 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c @@ -182,7 +182,8 @@ xfs_symlink( /* * Make sure that we have allocated dquot(s) on disk. */ - error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid, + error = xfs_qm_vop_dqalloc(dp, fsuid_into_mnt(mnt_userns), + fsgid_into_mnt(mnt_userns), prid, XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp, &pdqp); if (error) -- GitLab From 01dc9262ff5797b675c32c0c6bc682777d23de05 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 3 Mar 2021 16:45:05 +0000 Subject: [PATCH 389/839] KVM: arm64: Ensure I-cache isolation between vcpus of a same VM It recently became apparent that the ARMv8 architecture has interesting rules regarding attributes being used when fetching instructions if the MMU is off at Stage-1. In this situation, the CPU is allowed to fetch from the PoC and allocate into the I-cache (unless the memory is mapped with the XN attribute at Stage-2). If we transpose this to vcpus sharing a single physical CPU, it is possible for a vcpu running with its MMU off to influence another vcpu running with its MMU on, as the latter is expected to fetch from the PoU (and self-patching code doesn't flush below that level). In order to solve this, reuse the vcpu-private TLB invalidation code to apply the same policy to the I-cache, nuking it every time the vcpu runs on a physical CPU that ran another vcpu of the same VM in the past. This involve renaming __kvm_tlb_flush_local_vmid() to __kvm_flush_cpu_context(), and inserting a local i-cache invalidation there. Cc: stable@vger.kernel.org Signed-off-by: Marc Zyngier Acked-by: Will Deacon Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20210303164505.68492-1-maz@kernel.org --- arch/arm64/include/asm/kvm_asm.h | 4 ++-- arch/arm64/kvm/arm.c | 7 ++++++- arch/arm64/kvm/hyp/nvhe/hyp-main.c | 6 +++--- arch/arm64/kvm/hyp/nvhe/tlb.c | 3 ++- arch/arm64/kvm/hyp/vhe/tlb.c | 3 ++- 5 files changed, 15 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 9c0e396dd03ff..a7ab84f781f73 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -47,7 +47,7 @@ #define __KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context 2 #define __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa 3 #define __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid 4 -#define __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_local_vmid 5 +#define __KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context 5 #define __KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff 6 #define __KVM_HOST_SMCCC_FUNC___kvm_enable_ssbs 7 #define __KVM_HOST_SMCCC_FUNC___vgic_v3_get_gic_config 8 @@ -183,10 +183,10 @@ DECLARE_KVM_HYP_SYM(__bp_harden_hyp_vecs); #define __bp_harden_hyp_vecs CHOOSE_HYP_SYM(__bp_harden_hyp_vecs) extern void __kvm_flush_vm_context(void); +extern void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu); extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa, int level); extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu); -extern void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu); extern void __kvm_timer_set_cntvoff(u64 cntvoff); diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index bb85da1d58803..a391b984dd051 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -385,11 +385,16 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) last_ran = this_cpu_ptr(mmu->last_vcpu_ran); /* + * We guarantee that both TLBs and I-cache are private to each + * vcpu. If detecting that a vcpu from the same VM has + * previously run on the same physical CPU, call into the + * hypervisor code to nuke the relevant contexts. + * * We might get preempted before the vCPU actually runs, but * over-invalidation doesn't affect correctness. */ if (*last_ran != vcpu->vcpu_id) { - kvm_call_hyp(__kvm_tlb_flush_local_vmid, mmu); + kvm_call_hyp(__kvm_flush_cpu_context, mmu); *last_ran = vcpu->vcpu_id; } diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 8f129968204ef..936328207bde0 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -46,11 +46,11 @@ static void handle___kvm_tlb_flush_vmid(struct kvm_cpu_context *host_ctxt) __kvm_tlb_flush_vmid(kern_hyp_va(mmu)); } -static void handle___kvm_tlb_flush_local_vmid(struct kvm_cpu_context *host_ctxt) +static void handle___kvm_flush_cpu_context(struct kvm_cpu_context *host_ctxt) { DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1); - __kvm_tlb_flush_local_vmid(kern_hyp_va(mmu)); + __kvm_flush_cpu_context(kern_hyp_va(mmu)); } static void handle___kvm_timer_set_cntvoff(struct kvm_cpu_context *host_ctxt) @@ -115,7 +115,7 @@ static const hcall_t host_hcall[] = { HANDLE_FUNC(__kvm_flush_vm_context), HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa), HANDLE_FUNC(__kvm_tlb_flush_vmid), - HANDLE_FUNC(__kvm_tlb_flush_local_vmid), + HANDLE_FUNC(__kvm_flush_cpu_context), HANDLE_FUNC(__kvm_timer_set_cntvoff), HANDLE_FUNC(__kvm_enable_ssbs), HANDLE_FUNC(__vgic_v3_get_gic_config), diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c index fbde89a2c6e83..229b06748c208 100644 --- a/arch/arm64/kvm/hyp/nvhe/tlb.c +++ b/arch/arm64/kvm/hyp/nvhe/tlb.c @@ -123,7 +123,7 @@ void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu) __tlb_switch_to_host(&cxt); } -void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu) +void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu) { struct tlb_inv_context cxt; @@ -131,6 +131,7 @@ void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu) __tlb_switch_to_guest(mmu, &cxt); __tlbi(vmalle1); + asm volatile("ic iallu"); dsb(nsh); isb(); diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c index fd7895945bbc6..66f17349f0c36 100644 --- a/arch/arm64/kvm/hyp/vhe/tlb.c +++ b/arch/arm64/kvm/hyp/vhe/tlb.c @@ -127,7 +127,7 @@ void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu) __tlb_switch_to_host(&cxt); } -void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu) +void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu) { struct tlb_inv_context cxt; @@ -135,6 +135,7 @@ void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu) __tlb_switch_to_guest(mmu, &cxt); __tlbi(vmalle1); + asm volatile("ic iallu"); dsb(nsh); isb(); -- GitLab From 614c9750173e412663728215152cc6d12bcb3425 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Tue, 9 Mar 2021 09:41:14 -0500 Subject: [PATCH 390/839] NFSD: fix dest to src mount in inter-server COPY A cleanup of the inter SSC copy needs to call fput() of the source file handle to make sure that file structure is freed as well as drop the reference on the superblock to unmount the source server. Fixes: 36e1e5ba90fb ("NFSD: Fix use-after-free warning when doing inter-server copy") Signed-off-by: Olga Kornievskaia Signed-off-by: Chuck Lever Tested-by: Dai Ngo --- fs/nfsd/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index acdb3cd806a15..dd9f38d072dd6 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1302,7 +1302,7 @@ nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct nfsd_file *src, struct nfsd_file *dst) { nfs42_ssc_close(src->nf_file); - /* 'src' is freed by nfsd4_do_async_copy */ + fput(src->nf_file); nfsd_file_put(dst); mntput(ss_mnt); } -- GitLab From 53cb245454df5b13d7063162afd7a785aed6ebf2 Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Fri, 15 Jan 2021 18:43:56 +0100 Subject: [PATCH 391/839] NFSv4.2: fix return value of _nfs4_get_security_label() An xattr 'get' handler is expected to return the length of the value on success, yet _nfs4_get_security_label() (and consequently also nfs4_xattr_get_nfs4_label(), which is used as an xattr handler) returns just 0 on success. Fix this by returning label.len instead, which contains the length of the result. Fixes: aa9c2669626c ("NFS: Client implementation of Labeled-NFS") Signed-off-by: Ondrej Mosnacek Reviewed-by: James Morris Reviewed-by: Paul Moore Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6d8fc56e5f458..1002c4f66f3fa 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5965,7 +5965,7 @@ static int _nfs4_get_security_label(struct inode *inode, void *buf, return ret; if (!(fattr.valid & NFS_ATTR_FATTR_V4_SECURITY_LABEL)) return -ENOENT; - return 0; + return label.len; } static int nfs4_get_security_label(struct inode *inode, void *buf, -- GitLab From f7d9d4854519fdf4d45c70a4d953438cd88e7e58 Mon Sep 17 00:00:00 2001 From: Xie He Date: Sun, 7 Mar 2021 03:33:07 -0800 Subject: [PATCH 392/839] net: lapbether: Remove netif_start_queue / netif_stop_queue For the devices in this driver, the default qdisc is "noqueue", because their "tx_queue_len" is 0. In function "__dev_queue_xmit" in "net/core/dev.c", devices with the "noqueue" qdisc are specially handled. Packets are transmitted without being queued after a "dev->flags & IFF_UP" check. However, it's possible that even if this check succeeds, "ops->ndo_stop" may still have already been called. This is because in "__dev_close_many", "ops->ndo_stop" is called before clearing the "IFF_UP" flag. If we call "netif_stop_queue" in "ops->ndo_stop", then it's possible in "__dev_queue_xmit", it sees the "IFF_UP" flag is present, and then it checks "netif_xmit_stopped" and finds that the queue is already stopped. In this case, it will complain that: "Virtual device ... asks to queue packet!" To prevent "__dev_queue_xmit" from generating this complaint, we should not call "netif_stop_queue" in "ops->ndo_stop". We also don't need to call "netif_start_queue" in "ops->ndo_open", because after a netdev is allocated and registered, the "__QUEUE_STATE_DRV_XOFF" flag is initially not set, so there is no need to call "netif_start_queue" to clear it. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Xie He Acked-by: Martin Schiller Signed-off-by: David S. Miller --- drivers/net/wan/lapbether.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c index 605fe555e157d..c3372498f4f15 100644 --- a/drivers/net/wan/lapbether.c +++ b/drivers/net/wan/lapbether.c @@ -292,7 +292,6 @@ static int lapbeth_open(struct net_device *dev) return -ENODEV; } - netif_start_queue(dev); return 0; } @@ -300,8 +299,6 @@ static int lapbeth_close(struct net_device *dev) { int err; - netif_stop_queue(dev); - if ((err = lapb_unregister(dev)) != LAPB_OK) pr_err("lapb_unregister error: %d\n", err); -- GitLab From 993bdde94547887faaad4a97f0b0480a6da271c3 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 28 Feb 2021 15:10:25 +0900 Subject: [PATCH 393/839] kbuild: add image_name to no-sync-config-targets 'make image_name' needs include/config/auto.conf to show the correct output because KBUILD_IMAGE depends on CONFIG options, but should not attempt to resync the configuration. Signed-off-by: Masahiro Yamada --- Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 31dcdb3d61fa5..3f9f44eac27f5 100644 --- a/Makefile +++ b/Makefile @@ -264,7 +264,8 @@ no-dot-config-targets := $(clean-targets) \ $(version_h) headers headers_% archheaders archscripts \ %asm-generic kernelversion %src-pkg dt_binding_check \ outputmakefile -no-sync-config-targets := $(no-dot-config-targets) %install kernelrelease +no-sync-config-targets := $(no-dot-config-targets) %install kernelrelease \ + image_name single-targets := %.a %.i %.ko %.lds %.ll %.lst %.mod %.o %.s %.symtypes %/ config-build := -- GitLab From b3d9fc1436808a4ef9927e558b3415e728e710c5 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Wed, 3 Mar 2021 11:43:14 +0100 Subject: [PATCH 394/839] kbuild: dummy-tools: fix inverted tests for gcc There is a test in Kconfig which takes inverted value of a compiler check: * config CC_HAS_INT128 def_bool !$(cc-option,$(m64-flag) -D__SIZEOF_INT128__=0) This results in CC_HAS_INT128 not being in super-config generated by dummy-tools. So take this into account in the gcc script. Signed-off-by: Jiri Slaby Signed-off-by: Masahiro Yamada --- scripts/dummy-tools/gcc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/scripts/dummy-tools/gcc b/scripts/dummy-tools/gcc index 5c113cad56017..0d0589cf8184e 100755 --- a/scripts/dummy-tools/gcc +++ b/scripts/dummy-tools/gcc @@ -85,3 +85,8 @@ if arg_contain -print-file-name=plugin "$@"; then echo $plugin_dir exit 0 fi + +# inverted return value +if arg_contain -D__SIZEOF_INT128__=0 "$@"; then + exit 1 +fi -- GitLab From 1f09af062556f0610c08e2f3d680a8b8bc40dd48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bernhard=20Rosenkr=C3=A4nzer?= Date: Tue, 2 Mar 2021 23:12:11 +0100 Subject: [PATCH 395/839] kbuild: Fix ld-version.sh script if LLD was built with LLD_VENDOR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If LLD was built with -DLLD_VENDOR="xyz", ld.lld --version output will prefix LLD_VENDOR. Since LLD_VENDOR can contain spaces, the LLD identifier isn't guaranteed to be $2 either. Adjust the version checker to handle such versions of lld. Link: https://lore.kernel.org/lkml/20210302221211.1620858-1-bero@lindev.ch/ Signed-off-by: Bernhard Rosenkränzer [masahiro yamada: refactor the code] Signed-off-by: Masahiro Yamada Reviewed-by: Nathan Chancellor Tested-by: Nathan Chancellor --- scripts/ld-version.sh | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/scripts/ld-version.sh b/scripts/ld-version.sh index a463273509b56..30debf78aa090 100755 --- a/scripts/ld-version.sh +++ b/scripts/ld-version.sh @@ -44,14 +44,20 @@ if [ "$1" = GNU -a "$2" = ld ]; then elif [ "$1" = GNU -a "$2" = gold ]; then echo "gold linker is not supported as it is not capable of linking the kernel proper." >&2 exit 1 -elif [ "$1" = LLD ]; then - version=$2 - min_version=$lld_min_version - name=LLD - disp_name=LLD else - echo "$orig_args: unknown linker" >&2 - exit 1 + while [ $# -gt 1 -a "$1" != "LLD" ]; do + shift + done + + if [ "$1" = LLD ]; then + version=$2 + min_version=$lld_min_version + name=LLD + disp_name=LLD + else + echo "$orig_args: unknown linker" >&2 + exit 1 + fi fi # Some distributions append a package release number, as in 2.34-4.fc32 -- GitLab From eeb05595d22c19c8f814ff893dcf88ec277a2365 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 8 Mar 2021 12:35:01 +0000 Subject: [PATCH 396/839] umem: fix error return code in mm_pci_probe() Fix to return negative error code -ENOMEM from the blk_alloc_queue() and dma_alloc_coherent() error handling cases instead of 0, as done elsewhere in this function. Reported-by: Hulk Robot Signed-off-by: Wei Yongjun Link: https://lore.kernel.org/r/20210308123501.2573816-1-weiyongjun1@huawei.com Signed-off-by: Jens Axboe --- drivers/block/umem.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/block/umem.c b/drivers/block/umem.c index 982732dbe82e6..664280f23bee1 100644 --- a/drivers/block/umem.c +++ b/drivers/block/umem.c @@ -877,6 +877,7 @@ static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) if (card->mm_pages[0].desc == NULL || card->mm_pages[1].desc == NULL) { dev_printk(KERN_ERR, &card->dev->dev, "alloc failed\n"); + ret = -ENOMEM; goto failed_alloc; } reset_page(&card->mm_pages[0]); @@ -888,8 +889,10 @@ static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) spin_lock_init(&card->lock); card->queue = blk_alloc_queue(NUMA_NO_NODE); - if (!card->queue) + if (!card->queue) { + ret = -ENOMEM; goto failed_alloc; + } tasklet_init(&card->tasklet, process_page, (unsigned long)card); -- GitLab From 7aed41cff35a9aaf3431b8c0c23daa7d8bb77cd3 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 9 Mar 2021 17:53:15 +1100 Subject: [PATCH 397/839] powerpc/64s: Use symbolic macros for function entry encoding In ppc_function_entry() we look for a specific set of instructions by masking the instructions and comparing with a known value. Currently those known values are just literal hex values, and we recently discovered one of them was wrong. Instead construct the values using the existing constants we have for defining various fields of instructions. Suggested-by: Christophe Leroy Signed-off-by: Michael Ellerman Acked-by: Naveen N. Rao Link: https://lore.kernel.org/r/20210309071544.515303-1-mpe@ellerman.id.au --- arch/powerpc/include/asm/code-patching.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h index d5b3c3bb95b40..f1d029bf906e5 100644 --- a/arch/powerpc/include/asm/code-patching.h +++ b/arch/powerpc/include/asm/code-patching.h @@ -73,9 +73,10 @@ void __patch_exception(int exc, unsigned long addr); #endif #define OP_RT_RA_MASK 0xffff0000UL -#define LIS_R2 0x3c400000UL -#define ADDIS_R2_R12 0x3c4c0000UL -#define ADDI_R2_R2 0x38420000UL +#define LIS_R2 (PPC_INST_ADDIS | __PPC_RT(R2)) +#define ADDIS_R2_R12 (PPC_INST_ADDIS | __PPC_RT(R2) | __PPC_RA(R12)) +#define ADDI_R2_R2 (PPC_INST_ADDI | __PPC_RT(R2) | __PPC_RA(R2)) + static inline unsigned long ppc_function_entry(void *func) { -- GitLab From 73ac79881804eed2e9d76ecdd1018037f8510cb1 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 8 Mar 2021 18:55:30 +1000 Subject: [PATCH 398/839] powerpc: Fix inverted SET_FULL_REGS bitop This bit operation was inverted and set the low bit rather than cleared it, breaking the ability to ptrace non-volatile GPRs after exec. Fix. Only affects 64e and 32-bit. Fixes: feb9df3462e6 ("powerpc/64s: Always has full regs, so remove remnant checks") Cc: stable@vger.kernel.org # v5.8+ Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210308085530.3191843-1-npiggin@gmail.com --- arch/powerpc/include/asm/ptrace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index 975ba260006a4..1499e928ea6a6 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -195,7 +195,7 @@ static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) #define TRAP_FLAGS_MASK 0x11 #define TRAP(regs) ((regs)->trap & ~TRAP_FLAGS_MASK) #define FULL_REGS(regs) (((regs)->trap & 1) == 0) -#define SET_FULL_REGS(regs) ((regs)->trap |= 1) +#define SET_FULL_REGS(regs) ((regs)->trap &= ~1) #endif #define CHECK_FULL_REGS(regs) BUG_ON(!FULL_REGS(regs)) #define NV_REG_POISON 0xdeadbeefdeadbeefUL @@ -210,7 +210,7 @@ static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) #define TRAP_FLAGS_MASK 0x1F #define TRAP(regs) ((regs)->trap & ~TRAP_FLAGS_MASK) #define FULL_REGS(regs) (((regs)->trap & 1) == 0) -#define SET_FULL_REGS(regs) ((regs)->trap |= 1) +#define SET_FULL_REGS(regs) ((regs)->trap &= ~1) #define IS_CRITICAL_EXC(regs) (((regs)->trap & 2) != 0) #define IS_MCHECK_EXC(regs) (((regs)->trap & 4) != 0) #define IS_DEBUG_EXC(regs) (((regs)->trap & 8) != 0) -- GitLab From c080a173301ffc62cb6c76308c803c7fee05517a Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Thu, 25 Feb 2021 14:09:59 +1100 Subject: [PATCH 399/839] powerpc/64s/exception: Clean up a missed SRR specifier Nick's patch cleaning up the SRR specifiers in exception-64s.S missed a single instance of EXC_HV_OR_STD. Clean that up. Caught by clang's integrated assembler. Fixes: 3f7fbd97d07d ("powerpc/64s/exception: Clean up SRR specifiers") Signed-off-by: Daniel Axtens Acked-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210225031006.1204774-2-dja@axtens.net --- arch/powerpc/kernel/exceptions-64s.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 60d3051a8bc8a..8082b690e8746 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -466,7 +466,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real) ld r10,PACAKMSR(r13) /* get MSR value for kernel */ /* MSR[RI] is clear iff using SRR regs */ - .if IHSRR == EXC_HV_OR_STD + .if IHSRR_IF_HVMODE BEGIN_FTR_SECTION xori r10,r10,MSR_RI END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE) -- GitLab From 286a8624d7f9c6505cd568d947772eb59646514b Mon Sep 17 00:00:00 2001 From: George McCollister Date: Mon, 8 Mar 2021 17:38:22 -0600 Subject: [PATCH 400/839] net: dsa: xrs700x: check if partner is same as port in hsr join Don't assign dp to partner if it's the same port that xrs700x_hsr_join was called with. The partner port is supposed to be the other port in the HSR/PRP redundant pair not the same port. This fixes an issue observed in testing where forwarding between redundant HSR ports on this switch didn't work depending on the order the ports were added to the hsr device. Fixes: bd62e6f5e6a9 ("net: dsa: xrs700x: add HSR offloading support") Signed-off-by: George McCollister Reviewed-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/xrs700x/xrs700x.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/xrs700x/xrs700x.c b/drivers/net/dsa/xrs700x/xrs700x.c index f025f968f96da..fde6e99274b60 100644 --- a/drivers/net/dsa/xrs700x/xrs700x.c +++ b/drivers/net/dsa/xrs700x/xrs700x.c @@ -528,7 +528,10 @@ static int xrs700x_hsr_join(struct dsa_switch *ds, int port, return -EOPNOTSUPP; dsa_hsr_foreach_port(dp, ds, hsr) { - partner = dp; + if (dp->index != port) { + partner = dp; + break; + } } /* We can't enable redundancy on the switch until both @@ -582,7 +585,10 @@ static int xrs700x_hsr_leave(struct dsa_switch *ds, int port, unsigned int val; dsa_hsr_foreach_port(dp, ds, hsr) { - partner = dp; + if (dp->index != port) { + partner = dp; + break; + } } if (!partner) -- GitLab From 924a9bc362a5223cd448ca08c3dde21235adc310 Mon Sep 17 00:00:00 2001 From: Balazs Nemeth Date: Tue, 9 Mar 2021 12:31:00 +0100 Subject: [PATCH 401/839] net: check if protocol extracted by virtio_net_hdr_set_proto is correct For gso packets, virtio_net_hdr_set_proto sets the protocol (if it isn't set) based on the type in the virtio net hdr, but the skb could contain anything since it could come from packet_snd through a raw socket. If there is a mismatch between what virtio_net_hdr_set_proto sets and the actual protocol, then the skb could be handled incorrectly later on. An example where this poses an issue is with the subsequent call to skb_flow_dissect_flow_keys_basic which relies on skb->protocol being set correctly. A specially crafted packet could fool skb_flow_dissect_flow_keys_basic preventing EINVAL to be returned. Avoid blindly trusting the information provided by the virtio net header by checking that the protocol in the packet actually matches the protocol set by virtio_net_hdr_set_proto. Note that since the protocol is only checked if skb->dev implements header_ops->parse_protocol, packets from devices without the implementation are not checked at this stage. Fixes: 9274124f023b ("net: stricter validation of untrusted gso packets") Signed-off-by: Balazs Nemeth Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/virtio_net.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index e8a924eeea3d0..6b5fcfa1e5553 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -79,8 +79,13 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, if (gso_type && skb->network_header) { struct flow_keys_basic keys; - if (!skb->protocol) + if (!skb->protocol) { + __be16 protocol = dev_parse_header_protocol(skb); + virtio_net_hdr_set_proto(skb, hdr); + if (protocol && protocol != skb->protocol) + return -EINVAL; + } retry: if (!skb_flow_dissect_flow_keys_basic(NULL, skb, &keys, NULL, 0, 0, 0, -- GitLab From d348ede32e99d3a04863e9f9b28d224456118c27 Mon Sep 17 00:00:00 2001 From: Balazs Nemeth Date: Tue, 9 Mar 2021 12:31:01 +0100 Subject: [PATCH 402/839] net: avoid infinite loop in mpls_gso_segment when mpls_hlen == 0 A packet with skb_inner_network_header(skb) == skb_network_header(skb) and ETH_P_MPLS_UC will prevent mpls_gso_segment from pulling any headers from the packet. Subsequently, the call to skb_mac_gso_segment will again call mpls_gso_segment with the same packet leading to an infinite loop. In addition, ensure that the header length is a multiple of four, which should hold irrespective of the number of stacked labels. Signed-off-by: Balazs Nemeth Acked-by: Willem de Bruijn Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/mpls/mpls_gso.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c index b1690149b6fa0..1482259de9b5d 100644 --- a/net/mpls/mpls_gso.c +++ b/net/mpls/mpls_gso.c @@ -14,6 +14,7 @@ #include #include #include +#include static struct sk_buff *mpls_gso_segment(struct sk_buff *skb, netdev_features_t features) @@ -27,6 +28,8 @@ static struct sk_buff *mpls_gso_segment(struct sk_buff *skb, skb_reset_network_header(skb); mpls_hlen = skb_inner_network_header(skb) - skb_network_header(skb); + if (unlikely(!mpls_hlen || mpls_hlen % MPLS_HLEN)) + goto out; if (unlikely(!pskb_may_pull(skb, mpls_hlen))) goto out; -- GitLab From e7a36d27f6b9f389e41d8189a8a08919c6835732 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Tue, 9 Mar 2021 17:52:18 +0100 Subject: [PATCH 403/839] s390/qeth: fix memory leak after failed TX Buffer allocation When qeth_alloc_qdio_queues() fails to allocate one of the buffers that back an Output Queue, the 'out_freeoutqbufs' path will free all previously allocated buffers for this queue. But it misses to free the half-finished queue struct itself. Move the buffer allocation into qeth_alloc_output_queue(), and deal with such errors internally. Fixes: 0da9581ddb0f ("qeth: exploit asynchronous delivery of storage blocks") Signed-off-by: Julian Wiedmann Reviewed-by: Alexandra Winter Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_main.c | 35 +++++++++++++++---------------- 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index b71b8902d1c49..f7bc0ca6909b8 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -2634,15 +2634,28 @@ static void qeth_free_output_queue(struct qeth_qdio_out_q *q) static struct qeth_qdio_out_q *qeth_alloc_output_queue(void) { struct qeth_qdio_out_q *q = kzalloc(sizeof(*q), GFP_KERNEL); + unsigned int i; if (!q) return NULL; - if (qdio_alloc_buffers(q->qdio_bufs, QDIO_MAX_BUFFERS_PER_Q)) { - kfree(q); - return NULL; + if (qdio_alloc_buffers(q->qdio_bufs, QDIO_MAX_BUFFERS_PER_Q)) + goto err_qdio_bufs; + + for (i = 0; i < QDIO_MAX_BUFFERS_PER_Q; i++) { + if (qeth_init_qdio_out_buf(q, i)) + goto err_out_bufs; } + return q; + +err_out_bufs: + while (i > 0) + kmem_cache_free(qeth_qdio_outbuf_cache, q->bufs[--i]); + qdio_free_buffers(q->qdio_bufs, QDIO_MAX_BUFFERS_PER_Q); +err_qdio_bufs: + kfree(q); + return NULL; } static void qeth_tx_completion_timer(struct timer_list *timer) @@ -2655,7 +2668,7 @@ static void qeth_tx_completion_timer(struct timer_list *timer) static int qeth_alloc_qdio_queues(struct qeth_card *card) { - int i, j; + unsigned int i; QETH_CARD_TEXT(card, 2, "allcqdbf"); @@ -2689,13 +2702,6 @@ static int qeth_alloc_qdio_queues(struct qeth_card *card) queue->coalesce_usecs = QETH_TX_COALESCE_USECS; queue->max_coalesced_frames = QETH_TX_MAX_COALESCED_FRAMES; queue->priority = QETH_QIB_PQUE_PRIO_DEFAULT; - - /* give outbound qeth_qdio_buffers their qdio_buffers */ - for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; ++j) { - WARN_ON(queue->bufs[j]); - if (qeth_init_qdio_out_buf(queue, j)) - goto out_freeoutqbufs; - } } /* completion */ @@ -2704,13 +2710,6 @@ static int qeth_alloc_qdio_queues(struct qeth_card *card) return 0; -out_freeoutqbufs: - while (j > 0) { - --j; - kmem_cache_free(qeth_qdio_outbuf_cache, - card->qdio.out_qs[i]->bufs[j]); - card->qdio.out_qs[i]->bufs[j] = NULL; - } out_freeoutq: while (i > 0) { qeth_free_output_queue(card->qdio.out_qs[--i]); -- GitLab From c20383ad1656b0f6354dd50e4acd894f9d94090d Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Tue, 9 Mar 2021 17:52:19 +0100 Subject: [PATCH 404/839] s390/qeth: improve completion of pending TX buffers The current design attaches a pending TX buffer to a custom single-linked list, which is anchored at the buffer's slot on the TX ring. The buffer is then checked for final completion whenever this slot is processed during a subsequent TX NAPI poll cycle. But if there's insufficient traffic on the ring, we might never make enough progress to get back to this ring slot and discover the pending buffer's final TX completion. In particular if this missing TX completion blocks the application from sending further traffic. So convert the custom single-linked list code to a per-queue list_head, and scan this list on every TX NAPI cycle. Fixes: 0da9581ddb0f ("qeth: exploit asynchronous delivery of storage blocks") Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core.h | 3 +- drivers/s390/net/qeth_core_main.c | 69 +++++++++++++------------------ 2 files changed, 30 insertions(+), 42 deletions(-) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index a1da83b0b0ef3..91acff493612a 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -436,7 +436,7 @@ struct qeth_qdio_out_buffer { int is_header[QDIO_MAX_ELEMENTS_PER_BUFFER]; struct qeth_qdio_out_q *q; - struct qeth_qdio_out_buffer *next_pending; + struct list_head list_entry; }; struct qeth_card; @@ -500,6 +500,7 @@ struct qeth_qdio_out_q { struct qdio_buffer *qdio_bufs[QDIO_MAX_BUFFERS_PER_Q]; struct qeth_qdio_out_buffer *bufs[QDIO_MAX_BUFFERS_PER_Q]; struct qdio_outbuf_state *bufstates; /* convenience pointer */ + struct list_head pending_bufs; struct qeth_out_q_stats stats; spinlock_t lock; unsigned int priority; diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index f7bc0ca6909b8..3763cd6d14f8f 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -73,8 +73,6 @@ static void qeth_free_qdio_queues(struct qeth_card *card); static void qeth_notify_skbs(struct qeth_qdio_out_q *queue, struct qeth_qdio_out_buffer *buf, enum iucv_tx_notify notification); -static void qeth_tx_complete_buf(struct qeth_qdio_out_buffer *buf, bool error, - int budget); static void qeth_close_dev_handler(struct work_struct *work) { @@ -465,41 +463,6 @@ static enum iucv_tx_notify qeth_compute_cq_notification(int sbalf15, return n; } -static void qeth_cleanup_handled_pending(struct qeth_qdio_out_q *q, int bidx, - int forced_cleanup) -{ - if (q->card->options.cq != QETH_CQ_ENABLED) - return; - - if (q->bufs[bidx]->next_pending != NULL) { - struct qeth_qdio_out_buffer *head = q->bufs[bidx]; - struct qeth_qdio_out_buffer *c = q->bufs[bidx]->next_pending; - - while (c) { - if (forced_cleanup || - atomic_read(&c->state) == QETH_QDIO_BUF_EMPTY) { - struct qeth_qdio_out_buffer *f = c; - - QETH_CARD_TEXT(f->q->card, 5, "fp"); - QETH_CARD_TEXT_(f->q->card, 5, "%lx", (long) f); - /* release here to avoid interleaving between - outbound tasklet and inbound tasklet - regarding notifications and lifecycle */ - qeth_tx_complete_buf(c, forced_cleanup, 0); - - c = f->next_pending; - WARN_ON_ONCE(head->next_pending != f); - head->next_pending = c; - kmem_cache_free(qeth_qdio_outbuf_cache, f); - } else { - head = c; - c = c->next_pending; - } - - } - } -} - static void qeth_qdio_handle_aob(struct qeth_card *card, unsigned long phys_aob_addr) { @@ -537,7 +500,7 @@ static void qeth_qdio_handle_aob(struct qeth_card *card, qeth_notify_skbs(buffer->q, buffer, notification); /* Free dangling allocations. The attached skbs are handled by - * qeth_cleanup_handled_pending(). + * qeth_tx_complete_pending_bufs(). */ for (i = 0; i < aob->sb_count && i < QETH_MAX_BUFFER_ELEMENTS(card); @@ -1488,14 +1451,35 @@ static void qeth_clear_output_buffer(struct qeth_qdio_out_q *queue, atomic_set(&buf->state, QETH_QDIO_BUF_EMPTY); } +static void qeth_tx_complete_pending_bufs(struct qeth_card *card, + struct qeth_qdio_out_q *queue, + bool drain) +{ + struct qeth_qdio_out_buffer *buf, *tmp; + + list_for_each_entry_safe(buf, tmp, &queue->pending_bufs, list_entry) { + if (drain || atomic_read(&buf->state) == QETH_QDIO_BUF_EMPTY) { + QETH_CARD_TEXT(card, 5, "fp"); + QETH_CARD_TEXT_(card, 5, "%lx", (long) buf); + + qeth_tx_complete_buf(buf, drain, 0); + + list_del(&buf->list_entry); + kmem_cache_free(qeth_qdio_outbuf_cache, buf); + } + } +} + static void qeth_drain_output_queue(struct qeth_qdio_out_q *q, bool free) { int j; + qeth_tx_complete_pending_bufs(q->card, q, true); + for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; ++j) { if (!q->bufs[j]) continue; - qeth_cleanup_handled_pending(q, j, 1); + qeth_clear_output_buffer(q, q->bufs[j], true, 0); if (free) { kmem_cache_free(qeth_qdio_outbuf_cache, q->bufs[j]); @@ -2615,7 +2599,6 @@ static int qeth_init_qdio_out_buf(struct qeth_qdio_out_q *q, int bidx) skb_queue_head_init(&newbuf->skb_list); lockdep_set_class(&newbuf->skb_list.lock, &qdio_out_skb_queue_key); newbuf->q = q; - newbuf->next_pending = q->bufs[bidx]; atomic_set(&newbuf->state, QETH_QDIO_BUF_EMPTY); q->bufs[bidx] = newbuf; return 0; @@ -2697,6 +2680,7 @@ static int qeth_alloc_qdio_queues(struct qeth_card *card) card->qdio.out_qs[i] = queue; queue->card = card; queue->queue_no = i; + INIT_LIST_HEAD(&queue->pending_bufs); spin_lock_init(&queue->lock); timer_setup(&queue->timer, qeth_tx_completion_timer, 0); queue->coalesce_usecs = QETH_TX_COALESCE_USECS; @@ -6106,6 +6090,8 @@ static void qeth_iqd_tx_complete(struct qeth_qdio_out_q *queue, qeth_schedule_recovery(card); } + list_add(&buffer->list_entry, + &queue->pending_bufs); /* Skip clearing the buffer: */ return; case QETH_QDIO_BUF_QAOB_OK: @@ -6161,6 +6147,8 @@ static int qeth_tx_poll(struct napi_struct *napi, int budget) unsigned int bytes = 0; int completed; + qeth_tx_complete_pending_bufs(card, queue, false); + if (qeth_out_queue_is_empty(queue)) { napi_complete(napi); return 0; @@ -6193,7 +6181,6 @@ static int qeth_tx_poll(struct napi_struct *napi, int budget) qeth_handle_send_error(card, buffer, error); qeth_iqd_tx_complete(queue, bidx, error, budget); - qeth_cleanup_handled_pending(queue, bidx, false); } netdev_tx_completed_queue(txq, packets, bytes); -- GitLab From 3e83d467a08e25b27c44c885f511624a71c84f7c Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Tue, 9 Mar 2021 17:52:20 +0100 Subject: [PATCH 405/839] s390/qeth: schedule TX NAPI on QAOB completion When a QAOB notifies us that a pending TX buffer has been delivered, the actual TX completion processing by qeth_tx_complete_pending_bufs() is done within the context of a TX NAPI instance. We shouldn't rely on this instance being scheduled by some other TX event, but just do it ourselves. qeth_qdio_handle_aob() is called from qeth_poll(), ie. our main NAPI instance. To avoid touching the TX queue's NAPI instance before/after it is (un-)registered, reorder the code in qeth_open() and qeth_stop() accordingly. Fixes: 0da9581ddb0f ("qeth: exploit asynchronous delivery of storage blocks") Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_main.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 3763cd6d14f8f..d0a56afec0280 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -470,6 +470,7 @@ static void qeth_qdio_handle_aob(struct qeth_card *card, struct qaob *aob; struct qeth_qdio_out_buffer *buffer; enum iucv_tx_notify notification; + struct qeth_qdio_out_q *queue; unsigned int i; aob = (struct qaob *) phys_to_virt(phys_aob_addr); @@ -512,7 +513,9 @@ static void qeth_qdio_handle_aob(struct qeth_card *card, buffer->is_header[i] = 0; } + queue = buffer->q; atomic_set(&buffer->state, QETH_QDIO_BUF_EMPTY); + napi_schedule(&queue->napi); break; default: WARN_ON_ONCE(1); @@ -7235,9 +7238,7 @@ int qeth_open(struct net_device *dev) card->data.state = CH_STATE_UP; netif_tx_start_all_queues(dev); - napi_enable(&card->napi); local_bh_disable(); - napi_schedule(&card->napi); if (IS_IQD(card)) { struct qeth_qdio_out_q *queue; unsigned int i; @@ -7249,8 +7250,12 @@ int qeth_open(struct net_device *dev) napi_schedule(&queue->napi); } } + + napi_enable(&card->napi); + napi_schedule(&card->napi); /* kick-start the NAPI softirq: */ local_bh_enable(); + return 0; } EXPORT_SYMBOL_GPL(qeth_open); @@ -7260,6 +7265,11 @@ int qeth_stop(struct net_device *dev) struct qeth_card *card = dev->ml_priv; QETH_CARD_TEXT(card, 4, "qethstop"); + + napi_disable(&card->napi); + cancel_delayed_work_sync(&card->buffer_reclaim_work); + qdio_stop_irq(CARD_DDEV(card)); + if (IS_IQD(card)) { struct qeth_qdio_out_q *queue; unsigned int i; @@ -7280,10 +7290,6 @@ int qeth_stop(struct net_device *dev) netif_tx_disable(dev); } - napi_disable(&card->napi); - cancel_delayed_work_sync(&card->buffer_reclaim_work); - qdio_stop_irq(CARD_DDEV(card)); - return 0; } EXPORT_SYMBOL_GPL(qeth_stop); -- GitLab From 7eefda7f353ef86ad82a2dc8329e8a3538c08ab6 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Tue, 9 Mar 2021 17:52:21 +0100 Subject: [PATCH 406/839] s390/qeth: fix notification for pending buffers during teardown The cited commit reworked the state machine for pending TX buffers. In qeth_iqd_tx_complete() it turned PENDING into a transient state, and uses NEED_QAOB for buffers that get parked while waiting for their QAOB completion. But it missed to adjust the check in qeth_tx_complete_buf(). So if qeth_tx_complete_pending_bufs() is called during teardown to drain the parked TX buffers, we no longer raise a notification for af_iucv. Instead of updating the checked state, just move this code into qeth_tx_complete_pending_bufs() itself. This also gets rid of the special-case in the common TX completion path. Fixes: 8908f36d20d8 ("s390/qeth: fix af_iucv notification race") Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index d0a56afec0280..a814698387bc3 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -1390,9 +1390,6 @@ static void qeth_tx_complete_buf(struct qeth_qdio_out_buffer *buf, bool error, struct qeth_qdio_out_q *queue = buf->q; struct sk_buff *skb; - if (atomic_read(&buf->state) == QETH_QDIO_BUF_PENDING) - qeth_notify_skbs(queue, buf, TX_NOTIFY_GENERALERROR); - /* Empty buffer? */ if (buf->next_element_to_fill == 0) return; @@ -1465,6 +1462,9 @@ static void qeth_tx_complete_pending_bufs(struct qeth_card *card, QETH_CARD_TEXT(card, 5, "fp"); QETH_CARD_TEXT_(card, 5, "%lx", (long) buf); + if (drain) + qeth_notify_skbs(queue, buf, + TX_NOTIFY_GENERALERROR); qeth_tx_complete_buf(buf, drain, 0); list_del(&buf->list_entry); -- GitLab From bd73758803c2eedc037c2268b65a19542a832594 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 9 Mar 2021 08:39:39 +0000 Subject: [PATCH 407/839] powerpc: Fix missing declaration of [en/dis]able_kernel_vsx() Add stub instances of enable_kernel_vsx() and disable_kernel_vsx() when CONFIG_VSX is not set, to avoid following build failure. CC [M] drivers/gpu/drm/amd/amdgpu/../display/dc/calcs/dcn_calcs.o In file included from ./drivers/gpu/drm/amd/amdgpu/../display/dc/dm_services_types.h:29, from ./drivers/gpu/drm/amd/amdgpu/../display/dc/dm_services.h:37, from drivers/gpu/drm/amd/amdgpu/../display/dc/calcs/dcn_calcs.c:27: drivers/gpu/drm/amd/amdgpu/../display/dc/calcs/dcn_calcs.c: In function 'dcn_bw_apply_registry_override': ./drivers/gpu/drm/amd/amdgpu/../display/dc/os_types.h:64:3: error: implicit declaration of function 'enable_kernel_vsx'; did you mean 'enable_kernel_fp'? [-Werror=implicit-function-declaration] 64 | enable_kernel_vsx(); \ | ^~~~~~~~~~~~~~~~~ drivers/gpu/drm/amd/amdgpu/../display/dc/calcs/dcn_calcs.c:640:2: note: in expansion of macro 'DC_FP_START' 640 | DC_FP_START(); | ^~~~~~~~~~~ ./drivers/gpu/drm/amd/amdgpu/../display/dc/os_types.h:75:3: error: implicit declaration of function 'disable_kernel_vsx'; did you mean 'disable_kernel_fp'? [-Werror=implicit-function-declaration] 75 | disable_kernel_vsx(); \ | ^~~~~~~~~~~~~~~~~~ drivers/gpu/drm/amd/amdgpu/../display/dc/calcs/dcn_calcs.c:676:2: note: in expansion of macro 'DC_FP_END' 676 | DC_FP_END(); | ^~~~~~~~~ cc1: some warnings being treated as errors make[5]: *** [drivers/gpu/drm/amd/amdgpu/../display/dc/calcs/dcn_calcs.o] Error 1 This works because the caller is checking if VSX is available using cpu_has_feature(): #define DC_FP_START() { \ if (cpu_has_feature(CPU_FTR_VSX_COMP)) { \ preempt_disable(); \ enable_kernel_vsx(); \ } else if (cpu_has_feature(CPU_FTR_ALTIVEC_COMP)) { \ preempt_disable(); \ enable_kernel_altivec(); \ } else if (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE)) { \ preempt_disable(); \ enable_kernel_fp(); \ } \ When CONFIG_VSX is not selected, cpu_has_feature(CPU_FTR_VSX_COMP) constant folds to 'false' so the call to enable_kernel_vsx() is discarded and the build succeeds. Fixes: 16a9dea110a6 ("amdgpu: Enable initial DCN support on POWER") Cc: stable@vger.kernel.org # v5.6+ Reported-by: Geert Uytterhoeven Reported-by: kernel test robot Signed-off-by: Christophe Leroy [mpe: Incorporate some discussion comments into the change log] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/8d7d285a027e9d21f5ff7f850fa71a2655b0c4af.1615279170.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/switch_to.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h index fdab934283721..9d1fbd8be1c74 100644 --- a/arch/powerpc/include/asm/switch_to.h +++ b/arch/powerpc/include/asm/switch_to.h @@ -71,6 +71,16 @@ static inline void disable_kernel_vsx(void) { msr_check_and_clear(MSR_FP|MSR_VEC|MSR_VSX); } +#else +static inline void enable_kernel_vsx(void) +{ + BUILD_BUG(); +} + +static inline void disable_kernel_vsx(void) +{ + BUILD_BUG(); +} #endif #ifdef CONFIG_SPE -- GitLab From e5e8b80d352ec999d2bba3ea584f541c83f4ca3f Mon Sep 17 00:00:00 2001 From: Rob Gardner Date: Sun, 28 Feb 2021 22:48:16 -0700 Subject: [PATCH 408/839] sparc64: Fix opcode filtering in handling of no fault loads is_no_fault_exception() has two bugs which were discovered via random opcode testing with stress-ng. Both are caused by improper filtering of opcodes. The first bug can be triggered by a floating point store with a no-fault ASI, for instance "sta %f0, [%g0] #ASI_PNF", opcode C1A01040. The code first tests op3[5] (0x1000000), which denotes a floating point instruction, and then tests op3[2] (0x200000), which denotes a store instruction. But these bits are not mutually exclusive, and the above mentioned opcode has both bits set. The intent is to filter out stores, so the test for stores must be done first in order to have any effect. The second bug can be triggered by a floating point load with one of the invalid ASI values 0x8e or 0x8f, which pass this check in is_no_fault_exception(): if ((asi & 0xf2) == ASI_PNF) An example instruction is "ldqa [%l7 + %o7] #ASI 0x8f, %f38", opcode CF95D1EF. Asi values greater than 0x8b (ASI_SNFL) are fatal in handle_ldf_stq(), and is_no_fault_exception() must not allow these invalid asi values to make it that far. In both of these cases, handle_ldf_stq() reacts by calling sun4v_data_access_exception() or spitfire_data_access_exception(), which call is_no_fault_exception() and results in an infinite recursion. Signed-off-by: Rob Gardner Tested-by: Anatoly Pugachev Signed-off-by: David S. Miller --- arch/sparc/kernel/traps_64.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c index d92e5eaa4c1d7..a850dccd78ea1 100644 --- a/arch/sparc/kernel/traps_64.c +++ b/arch/sparc/kernel/traps_64.c @@ -275,14 +275,13 @@ bool is_no_fault_exception(struct pt_regs *regs) asi = (regs->tstate >> 24); /* saved %asi */ else asi = (insn >> 5); /* immediate asi */ - if ((asi & 0xf2) == ASI_PNF) { - if (insn & 0x1000000) { /* op3[5:4]=3 */ - handle_ldf_stq(insn, regs); - return true; - } else if (insn & 0x200000) { /* op3[2], stores */ + if ((asi & 0xf6) == ASI_PNF) { + if (insn & 0x200000) /* op3[2], stores */ return false; - } - handle_ld_nf(insn, regs); + if (insn & 0x1000000) /* op3[5:4]=3 (fp) */ + handle_ldf_stq(insn, regs); + else + handle_ld_nf(insn, regs); return true; } } -- GitLab From 69264b4a43aff7307283e2bae29e9305ab6b7d47 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Mon, 8 Mar 2021 09:51:26 +0000 Subject: [PATCH 409/839] sparc: sparc64_defconfig: remove duplicate CONFIGs After my patch there is CONFIG_ATA defined twice. Remove the duplicate one. Same problem for CONFIG_HAPPYMEAL, except I added as builtin for boot test with NFS. Reported-by: Stephen Rothwell Fixes: a57cdeb369ef ("sparc: sparc64_defconfig: add necessary configs for qemu") Signed-off-by: Corentin Labbe Signed-off-by: David S. Miller --- arch/sparc/configs/sparc64_defconfig | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/sparc/configs/sparc64_defconfig b/arch/sparc/configs/sparc64_defconfig index 148f44b33890e..12a4fb0bd52aa 100644 --- a/arch/sparc/configs/sparc64_defconfig +++ b/arch/sparc/configs/sparc64_defconfig @@ -93,7 +93,7 @@ CONFIG_NETDEVICES=y CONFIG_NET_ETHERNET=y CONFIG_MII=m CONFIG_SUNLANCE=m -CONFIG_HAPPYMEAL=m +CONFIG_HAPPYMEAL=y CONFIG_SUNGEM=m CONFIG_SUNVNET=m CONFIG_LDMVSW=m @@ -234,9 +234,7 @@ CONFIG_CRYPTO_TWOFISH=m CONFIG_CRC16=m CONFIG_LIBCRC32C=m CONFIG_VCC=m -CONFIG_ATA=y CONFIG_PATA_CMD64X=y -CONFIG_HAPPYMEAL=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_DEVTMPFS=y -- GitLab From 6dd4879f59b0a0679ed8c3ebaff3d79f37930778 Mon Sep 17 00:00:00 2001 From: Heinrich Schuchardt Date: Sat, 6 Mar 2021 06:48:01 +0100 Subject: [PATCH 410/839] RISC-V: correct enum sbi_ext_rfence_fid The constants in enum sbi_ext_rfence_fid should match the SBI specification. See https://github.com/riscv/riscv-sbi-doc/blob/master/riscv-sbi.adoc#78-function-listing | Function Name | FID | EID | sbi_remote_fence_i | 0 | 0x52464E43 | sbi_remote_sfence_vma | 1 | 0x52464E43 | sbi_remote_sfence_vma_asid | 2 | 0x52464E43 | sbi_remote_hfence_gvma_vmid | 3 | 0x52464E43 | sbi_remote_hfence_gvma | 4 | 0x52464E43 | sbi_remote_hfence_vvma_asid | 5 | 0x52464E43 | sbi_remote_hfence_vvma | 6 | 0x52464E43 Fixes: ecbacc2a3efd ("RISC-V: Add SBI v0.2 extension definitions") Reported-by: Sean Anderson Signed-off-by: Heinrich Schuchardt Reviewed-by: Anup Patel Reviewed-by: Atish Patra Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/sbi.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h index 99895d9c3bddb..d7027411dde83 100644 --- a/arch/riscv/include/asm/sbi.h +++ b/arch/riscv/include/asm/sbi.h @@ -51,10 +51,10 @@ enum sbi_ext_rfence_fid { SBI_EXT_RFENCE_REMOTE_FENCE_I = 0, SBI_EXT_RFENCE_REMOTE_SFENCE_VMA, SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID, - SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA, SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA_VMID, - SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA, + SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA, SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID, + SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA, }; enum sbi_ext_hsm_fid { -- GitLab From 030f1dfa855054db5d845eca7f04c8cfda1c9f51 Mon Sep 17 00:00:00 2001 From: Nanyong Sun Date: Fri, 5 Mar 2021 19:33:24 +0800 Subject: [PATCH 411/839] riscv: traps: Fix no prototype warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix all W=1 compilation warnings:'no previous prototype for' in arch/riscv/kernel/traps.c: arch/riscv/kernel/traps.c:96:15: warning: no previous prototype for ‘do_trap_unknown’ [-Wmissing-prototypes] 96 | DO_ERROR_INFO(do_trap_unknown, | ^~~~~~~~~~~~~~~ arch/riscv/kernel/traps.c:91:27: note: in definition of macro ‘DO_ERROR_INFO’ 91 | asmlinkage __visible void name(struct pt_regs *regs) \ | ^~~~ arch/riscv/kernel/traps.c:98:15: warning: no previous prototype for ‘do_trap_insn_misaligned’ [-Wmissing-prototypes] 98 | DO_ERROR_INFO(do_trap_insn_misaligned, | ^~~~~~~~~~~~~~~~~~~~~~~ arch/riscv/kernel/traps.c:91:27: note: in definition of macro ‘DO_ERROR_INFO’ 91 | asmlinkage __visible void name(struct pt_regs *regs) \ | ^~~~ arch/riscv/kernel/traps.c:100:15: warning: no previous prototype for ‘do_trap_insn_fault’ [-Wmissing-prototypes] ... Reported-by: Hulk Robot Signed-off-by: Nanyong Sun [Palmer: fix checkpatch warnings] Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/asm-prototypes.h | 16 ++++++++++++++++ arch/riscv/kernel/traps.c | 1 + 2 files changed, 17 insertions(+) diff --git a/arch/riscv/include/asm/asm-prototypes.h b/arch/riscv/include/asm/asm-prototypes.h index 27e005fca5849..2a652b0c987d5 100644 --- a/arch/riscv/include/asm/asm-prototypes.h +++ b/arch/riscv/include/asm/asm-prototypes.h @@ -9,4 +9,20 @@ long long __lshrti3(long long a, int b); long long __ashrti3(long long a, int b); long long __ashlti3(long long a, int b); + +#define DECLARE_DO_ERROR_INFO(name) asmlinkage void name(struct pt_regs *regs) + +DECLARE_DO_ERROR_INFO(do_trap_unknown); +DECLARE_DO_ERROR_INFO(do_trap_insn_misaligned); +DECLARE_DO_ERROR_INFO(do_trap_insn_fault); +DECLARE_DO_ERROR_INFO(do_trap_insn_illegal); +DECLARE_DO_ERROR_INFO(do_trap_load_fault); +DECLARE_DO_ERROR_INFO(do_trap_load_misaligned); +DECLARE_DO_ERROR_INFO(do_trap_store_misaligned); +DECLARE_DO_ERROR_INFO(do_trap_store_fault); +DECLARE_DO_ERROR_INFO(do_trap_ecall_u); +DECLARE_DO_ERROR_INFO(do_trap_ecall_s); +DECLARE_DO_ERROR_INFO(do_trap_ecall_m); +DECLARE_DO_ERROR_INFO(do_trap_break); + #endif /* _ASM_RISCV_PROTOTYPES_H */ diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 3ed2c23601a02..0879b5df11b92 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -17,6 +17,7 @@ #include #include +#include #include #include #include -- GitLab From 004570c3796bfe454a9cdfb9ab5d3ea48371fe48 Mon Sep 17 00:00:00 2001 From: Nanyong Sun Date: Fri, 5 Mar 2021 19:33:25 +0800 Subject: [PATCH 412/839] riscv: irq: Fix no prototype warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the following W=1 kernel compilation warning: arch/riscv/kernel/irq.c:19:13: warning: no previous prototype for ‘init_IRQ’ [-Wmissing-prototypes] 19 | void __init init_IRQ(void) | ^~~~~~~~ Reported-by: Hulk Robot Signed-off-by: Nanyong Sun Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/irq.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/riscv/include/asm/irq.h b/arch/riscv/include/asm/irq.h index 9807ad164015e..e4c435509983e 100644 --- a/arch/riscv/include/asm/irq.h +++ b/arch/riscv/include/asm/irq.h @@ -12,4 +12,6 @@ #include +extern void __init init_IRQ(void); + #endif /* _ASM_RISCV_IRQ_H */ -- GitLab From 56a6c37f6e3994cba01609768f5a215c85bd2f85 Mon Sep 17 00:00:00 2001 From: Nanyong Sun Date: Fri, 5 Mar 2021 19:33:26 +0800 Subject: [PATCH 413/839] riscv: sbi: Fix comment of __sbi_set_timer_v01 Fix the comment of __sbi_set_timer_v01, the function name in comment is missing '__' Signed-off-by: Nanyong Sun Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/sbi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c index f4a7db3d309e6..d3bf756321a5b 100644 --- a/arch/riscv/kernel/sbi.c +++ b/arch/riscv/kernel/sbi.c @@ -116,7 +116,7 @@ void sbi_clear_ipi(void) EXPORT_SYMBOL(sbi_clear_ipi); /** - * sbi_set_timer_v01() - Program the timer for next timer event. + * __sbi_set_timer_v01() - Program the timer for next timer event. * @stime_value: The value after which next timer event should fire. * * Return: None -- GitLab From e06f4ce1d4c63799eff9d3544b3f7468d5409f3e Mon Sep 17 00:00:00 2001 From: Nanyong Sun Date: Fri, 5 Mar 2021 19:33:27 +0800 Subject: [PATCH 414/839] riscv: ptrace: Fix no prototype warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the following W=1 kernel compilation warnings: arch/riscv/kernel/ftrace.c:186:6: warning: no previous prototype for ‘prepare_ftrace_return’ [-Wmissing-prototypes] 186 | void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, | ^~~~~~~~~~~~~~~~~~~~~ arch/riscv/kernel/ptrace.c:239:15: warning: no previous prototype for ‘do_syscall_trace_enter’ [-Wmissing-prototypes] 239 | __visible int do_syscall_trace_enter(struct pt_regs *regs) | ^~~~~~~~~~~~~~~~~~~~~~ arch/riscv/kernel/ptrace.c:262:16: warning: no previous prototype for ‘do_syscall_trace_exit’ [-Wmissing-prototypes] 262 | __visible void do_syscall_trace_exit(struct pt_regs *regs) | ^~~~~~~~~~~~~~~~~~~~~ Reported-by: Hulk Robot Signed-off-by: Nanyong Sun Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/ptrace.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/riscv/include/asm/ptrace.h b/arch/riscv/include/asm/ptrace.h index cb4abb639e8d6..09ad4e923510a 100644 --- a/arch/riscv/include/asm/ptrace.h +++ b/arch/riscv/include/asm/ptrace.h @@ -119,6 +119,11 @@ extern int regs_query_register_offset(const char *name); extern unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n); +void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, + unsigned long frame_pointer); +int do_syscall_trace_enter(struct pt_regs *regs); +void do_syscall_trace_exit(struct pt_regs *regs); + /** * regs_get_register() - get register value from its offset * @regs: pt_regs from which register value is gotten -- GitLab From db2a8f9256e9a2a931edb83622d81ca73c6c8c6a Mon Sep 17 00:00:00 2001 From: Nanyong Sun Date: Fri, 5 Mar 2021 19:33:28 +0800 Subject: [PATCH 415/839] riscv: time: Fix no prototype for time_init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the following W=1 compilation warning: arch/riscv/kernel/time.c:16:13: warning: no previous prototype for ‘time_init’ [-Wmissing-prototypes] 16 | void __init time_init(void) | ^~~~~~~~~ Reported-by: Hulk Robot Signed-off-by: Nanyong Sun Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/timex.h | 2 ++ arch/riscv/kernel/time.c | 1 + 2 files changed, 3 insertions(+) diff --git a/arch/riscv/include/asm/timex.h b/arch/riscv/include/asm/timex.h index 81de51e6aa32b..507cae273bc62 100644 --- a/arch/riscv/include/asm/timex.h +++ b/arch/riscv/include/asm/timex.h @@ -88,4 +88,6 @@ static inline int read_current_timer(unsigned long *timer_val) return 0; } +extern void time_init(void); + #endif /* _ASM_RISCV_TIMEX_H */ diff --git a/arch/riscv/kernel/time.c b/arch/riscv/kernel/time.c index 8a5cf99c07762..1b432264f7ef8 100644 --- a/arch/riscv/kernel/time.c +++ b/arch/riscv/kernel/time.c @@ -9,6 +9,7 @@ #include #include #include +#include unsigned long riscv_timebase; EXPORT_SYMBOL_GPL(riscv_timebase); -- GitLab From a6a58ecf98c3f6d95123ee3e66ccb6f7672c6e68 Mon Sep 17 00:00:00 2001 From: Nanyong Sun Date: Fri, 5 Mar 2021 19:33:29 +0800 Subject: [PATCH 416/839] riscv: syscall_table: Reduce W=1 compilation warnings noise MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Building riscv syscall table with W=1 throws the warning as following pattern: arch/riscv/kernel/syscall_table.c:14:36: warning: initialized field overwritten [-Woverride-init] 14 | #define __SYSCALL(nr, call) [nr] = (call), | ^ ./include/uapi/asm-generic/unistd.h:29:37: note: in expansion of macro ‘__SYSCALL’ 29 | #define __SC_COMP(_nr, _sys, _comp) __SYSCALL(_nr, _sys) | ^~~~~~~~~ ./include/uapi/asm-generic/unistd.h:34:1: note: in expansion of macro ‘__SC_COMP’ 34 | __SC_COMP(__NR_io_setup, sys_io_setup, compat_sys_io_setup) | ^~~~~~~~~ arch/riscv/kernel/syscall_table.c:14:36: note: (near initialization for ‘sys_call_table[0]’) 14 | #define __SYSCALL(nr, call) [nr] = (call), | ^ ./include/uapi/asm-generic/unistd.h:29:37: note: in expansion of macro ‘__SYSCALL’ 29 | #define __SC_COMP(_nr, _sys, _comp) __SYSCALL(_nr, _sys) | ^~~~~~~~~ ./include/uapi/asm-generic/unistd.h:34:1: note: in expansion of macro ‘__SC_COMP’ 34 | __SC_COMP(__NR_io_setup, sys_io_setup, compat_sys_io_setup) | ^~~~~~~~~ arch/riscv/kernel/syscall_table.c:14:36: warning: initialized field overwritten [-Woverride-init] ... Since we intentionally build the syscall tables this way,ignore the warning. Reported-by: Hulk Robot Signed-off-by: Nanyong Sun Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index 3dc0abde988a2..647a47f5484a1 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -8,6 +8,7 @@ CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_patch.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_sbi.o = $(CC_FLAGS_FTRACE) endif +CFLAGS_syscall_table.o += $(call cc-option,-Wno-override-init,) extra-y += head.o extra-y += vmlinux.lds -- GitLab From 86b276c1ddedfbcc0be708e73d82ce1fb2298768 Mon Sep 17 00:00:00 2001 From: Nanyong Sun Date: Fri, 5 Mar 2021 19:33:30 +0800 Subject: [PATCH 417/839] riscv: process: Fix no prototype for show_regs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Include header file to fix the following W=1 compilation warning: arch/riscv/kernel/process.c:78:6: warning: no previous prototype for ‘show_regs’ [-Wmissing-prototypes] 78 | void show_regs(struct pt_regs *regs) | ^~~~~~~~~ Reported-by: Hulk Robot Signed-off-by: Nanyong Sun Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/process.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index 6f728e731bedf..f9cd57c9c67d2 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include -- GitLab From 288f6775a08913e9cb5f5ae0a43c105b725be0c8 Mon Sep 17 00:00:00 2001 From: Nanyong Sun Date: Fri, 5 Mar 2021 19:33:31 +0800 Subject: [PATCH 418/839] riscv: ftrace: Use ftrace_get_regs helper Use ftrace_get_regs() helper call to get pt_regs from ftrace_regs struct, this makes the code simpler. Signed-off-by: Nanyong Sun Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/probes/ftrace.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/arch/riscv/kernel/probes/ftrace.c b/arch/riscv/kernel/probes/ftrace.c index e6372490aa0ba..2dfb33fdac746 100644 --- a/arch/riscv/kernel/probes/ftrace.c +++ b/arch/riscv/kernel/probes/ftrace.c @@ -4,37 +4,39 @@ /* Ftrace callback handler for kprobes -- called under preepmt disabed */ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *ops, struct ftrace_regs *regs) + struct ftrace_ops *ops, struct ftrace_regs *fregs) { struct kprobe *p; + struct pt_regs *regs; struct kprobe_ctlblk *kcb; p = get_kprobe((kprobe_opcode_t *)ip); if (unlikely(!p) || kprobe_disabled(p)) return; + regs = ftrace_get_regs(fregs); kcb = get_kprobe_ctlblk(); if (kprobe_running()) { kprobes_inc_nmissed_count(p); } else { - unsigned long orig_ip = instruction_pointer(&(regs->regs)); + unsigned long orig_ip = instruction_pointer(regs); - instruction_pointer_set(&(regs->regs), ip); + instruction_pointer_set(regs, ip); __this_cpu_write(current_kprobe, p); kcb->kprobe_status = KPROBE_HIT_ACTIVE; - if (!p->pre_handler || !p->pre_handler(p, &(regs->regs))) { + if (!p->pre_handler || !p->pre_handler(p, regs)) { /* * Emulate singlestep (and also recover regs->pc) * as if there is a nop */ - instruction_pointer_set(&(regs->regs), + instruction_pointer_set(regs, (unsigned long)p->addr + MCOUNT_INSN_SIZE); if (unlikely(p->post_handler)) { kcb->kprobe_status = KPROBE_HIT_SSDONE; - p->post_handler(p, &(regs->regs), 0); + p->post_handler(p, regs, 0); } - instruction_pointer_set(&(regs->regs), orig_ip); + instruction_pointer_set(regs, orig_ip); } /* -- GitLab From 0d7588ab9ef98bad3b52ad0b91291e8258853cc1 Mon Sep 17 00:00:00 2001 From: Nanyong Sun Date: Fri, 5 Mar 2021 19:33:32 +0800 Subject: [PATCH 419/839] riscv: process: Fix no prototype for arch_dup_task_struct MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the following W=1 compilation warning: arch/riscv/kernel/process.c:114:5: warning: no previous prototype for ‘arch_dup_task_struct’ [-Wmissing-prototypes] 114 | int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) | ^~~~~~~~~~~~~~~~~~~~ Reported-by: Hulk Robot Signed-off-by: Nanyong Sun Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/processor.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h index 3a240037bde26..021ed64ee608f 100644 --- a/arch/riscv/include/asm/processor.h +++ b/arch/riscv/include/asm/processor.h @@ -71,6 +71,7 @@ int riscv_of_processor_hartid(struct device_node *node); int riscv_of_parent_hartid(struct device_node *node); extern void riscv_fill_hwcap(void); +extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); #endif /* __ASSEMBLY__ */ -- GitLab From 16db6b532fa4e0397bf33e04368408fd15f0dd90 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Tue, 16 Feb 2021 21:49:30 +0200 Subject: [PATCH 420/839] habanalabs: mark hl_eq_inc_ptr() as static hl_eq_inc_ptr() is not called from anywhere outside irq.c so mark it as static Reported-by: kernel test robot Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/habanalabs/common/irq.c b/drivers/misc/habanalabs/common/irq.c index de53fb5f978a8..44a0522b59b95 100644 --- a/drivers/misc/habanalabs/common/irq.c +++ b/drivers/misc/habanalabs/common/irq.c @@ -47,7 +47,7 @@ inline u32 hl_cq_inc_ptr(u32 ptr) * Increment ptr by 1. If it reaches the number of event queue * entries, set it to 0 */ -inline u32 hl_eq_inc_ptr(u32 ptr) +static inline u32 hl_eq_inc_ptr(u32 ptr) { ptr++; if (unlikely(ptr == HL_EQ_LENGTH)) -- GitLab From bd0c48e53d2fadcc7f62056c46a05718370b7939 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 16 Feb 2021 16:08:28 +0100 Subject: [PATCH 421/839] drivers: habanalabs: remove unused dentry pointer for debugfs files The dentry for the created debugfs file was being saved, but never used anywhere. As the pointer isn't needed for anything, and the debugfs files are being properly removed by removing the parent directory, remove the saved pointer as well, saving a tiny bit of memory and logic. Cc: Oded Gabbay Cc: Arnd Bergmann Cc: Tomer Tayar Cc: Moti Haimovski Cc: Omer Shpigelman Cc: Ofir Bitton Cc: linux-kernel@vger.kernel.org Signed-off-by: Greg Kroah-Hartman Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/debugfs.c | 5 +---- drivers/misc/habanalabs/common/habanalabs.h | 2 -- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c index df847a6d19f41..9f19bee7b5922 100644 --- a/drivers/misc/habanalabs/common/debugfs.c +++ b/drivers/misc/habanalabs/common/debugfs.c @@ -992,7 +992,6 @@ void hl_debugfs_add_device(struct hl_device *hdev) struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs; int count = ARRAY_SIZE(hl_debugfs_list); struct hl_debugfs_entry *entry; - struct dentry *ent; int i; dev_entry->hdev = hdev; @@ -1105,13 +1104,11 @@ void hl_debugfs_add_device(struct hl_device *hdev) &hl_security_violations_fops); for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) { - - ent = debugfs_create_file(hl_debugfs_list[i].name, + debugfs_create_file(hl_debugfs_list[i].name, 0444, dev_entry->root, entry, &hl_debugfs_fops); - entry->dent = ent; entry->info_ent = &hl_debugfs_list[i]; entry->dev_entry = dev_entry; } diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index d933878b24d13..4b321e4f8059f 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -1465,12 +1465,10 @@ struct hl_info_list { /** * struct hl_debugfs_entry - debugfs dentry wrapper. - * @dent: base debugfs entry structure. * @info_ent: dentry realted ops. * @dev_entry: ASIC specific debugfs manager. */ struct hl_debugfs_entry { - struct dentry *dent; const struct hl_info_list *info_ent; struct hl_dbg_device_entry *dev_entry; }; -- GitLab From 27ac5aada024e0821c86540ad18f37edadd77d5e Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Fri, 19 Feb 2021 14:05:33 +0200 Subject: [PATCH 422/839] habanalabs: Call put_pid() when releasing control device The refcount of the "hl_fpriv" structure is not used for the control device, and thus hl_hpriv_put() is not called when releasing this device. This results with no call to put_pid(), so add it explicitly in hl_device_release_ctrl(). Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 15fcb5c31c4b5..9ecd805f0e887 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -117,6 +117,8 @@ static int hl_device_release_ctrl(struct inode *inode, struct file *filp) list_del(&hpriv->dev_node); mutex_unlock(&hdev->fpriv_list_lock); + put_pid(hpriv->taskpid); + kfree(hpriv); return 0; -- GitLab From ffd123fe839700366ea79b19ac3683bf56817372 Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Mon, 1 Feb 2021 19:44:34 +0200 Subject: [PATCH 423/839] habanalabs: Disable file operations after device is removed A device can be removed from the PCI subsystem while a process holds the file descriptor opened. In such a case, the driver attempts to kill the process, but as it is still possible that the process will be alive after this step, the device removal will complete, and we will end up with a process object that points to a device object which was already released. To prevent the usage of this released device object, disable the following file operations for this process object, and avoid the cleanup steps when the file descriptor is eventually closed. The latter is just a best effort, as memory leak will occur. Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 40 ++++++++++++++++--- .../misc/habanalabs/common/habanalabs_ioctl.c | 12 ++++++ 2 files changed, 46 insertions(+), 6 deletions(-) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 9ecd805f0e887..334009e838236 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -93,12 +93,19 @@ void hl_hpriv_put(struct hl_fpriv *hpriv) static int hl_device_release(struct inode *inode, struct file *filp) { struct hl_fpriv *hpriv = filp->private_data; + struct hl_device *hdev = hpriv->hdev; + + filp->private_data = NULL; + + if (!hdev) { + pr_crit("Closing FD after device was removed. Memory leak will occur and it is advised to reboot.\n"); + put_pid(hpriv->taskpid); + return 0; + } hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr); hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr); - filp->private_data = NULL; - hl_hpriv_put(hpriv); return 0; @@ -107,16 +114,19 @@ static int hl_device_release(struct inode *inode, struct file *filp) static int hl_device_release_ctrl(struct inode *inode, struct file *filp) { struct hl_fpriv *hpriv = filp->private_data; - struct hl_device *hdev; + struct hl_device *hdev = hpriv->hdev; filp->private_data = NULL; - hdev = hpriv->hdev; + if (!hdev) { + pr_err("Closing FD after device was removed\n"); + goto out; + } mutex_lock(&hdev->fpriv_list_lock); list_del(&hpriv->dev_node); mutex_unlock(&hdev->fpriv_list_lock); - +out: put_pid(hpriv->taskpid); kfree(hpriv); @@ -136,8 +146,14 @@ static int hl_device_release_ctrl(struct inode *inode, struct file *filp) static int hl_mmap(struct file *filp, struct vm_area_struct *vma) { struct hl_fpriv *hpriv = filp->private_data; + struct hl_device *hdev = hpriv->hdev; unsigned long vm_pgoff; + if (!hdev) { + pr_err_ratelimited("Trying to mmap after device was removed! Please close FD\n"); + return -ENODEV; + } + vm_pgoff = vma->vm_pgoff; vma->vm_pgoff = HL_MMAP_OFFSET_VALUE_GET(vm_pgoff); @@ -885,6 +901,16 @@ wait_for_processes: return -EBUSY; } +static void device_disable_open_processes(struct hl_device *hdev) +{ + struct hl_fpriv *hpriv; + + mutex_lock(&hdev->fpriv_list_lock); + list_for_each_entry(hpriv, &hdev->fpriv_list, dev_node) + hpriv->hdev = NULL; + mutex_unlock(&hdev->fpriv_list_lock); +} + /* * hl_device_reset - reset the device * @@ -1558,8 +1584,10 @@ void hl_device_fini(struct hl_device *hdev) HL_PENDING_RESET_LONG_SEC); rc = device_kill_open_processes(hdev, HL_PENDING_RESET_LONG_SEC); - if (rc) + if (rc) { dev_crit(hdev->dev, "Failed to kill all open processes\n"); + device_disable_open_processes(hdev); + } hl_cb_pool_fini(hdev); diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c index 03af61cecd37b..083a30969c5f3 100644 --- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c @@ -5,6 +5,8 @@ * All Rights Reserved. */ +#define pr_fmt(fmt) "habanalabs: " fmt + #include #include "habanalabs.h" @@ -682,6 +684,11 @@ long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) const struct hl_ioctl_desc *ioctl = NULL; unsigned int nr = _IOC_NR(cmd); + if (!hdev) { + pr_err_ratelimited("Sending ioctl after device was removed! Please close FD\n"); + return -ENODEV; + } + if ((nr >= HL_COMMAND_START) && (nr < HL_COMMAND_END)) { ioctl = &hl_ioctls[nr]; } else { @@ -700,6 +707,11 @@ long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg) const struct hl_ioctl_desc *ioctl = NULL; unsigned int nr = _IOC_NR(cmd); + if (!hdev) { + pr_err_ratelimited("Sending ioctl after device was removed! Please close FD\n"); + return -ENODEV; + } + if (nr == _IOC_NR(HL_IOCTL_INFO)) { ioctl = &hl_ioctls_control[nr]; } else { -- GitLab From 15097e9338ed3de2f5c5904d3dc776ef1b650edc Mon Sep 17 00:00:00 2001 From: farah kassabri Date: Sun, 28 Feb 2021 11:06:14 +0200 Subject: [PATCH 424/839] habanalabs: fix debugfs address translation when user uses virtual addresses to access dram through debugfs, driver translate this address to physical and use it for the access through the pcie bar. in case dram page size is different than the dmmu page size, we need to have special treatment for adding the page offset to the actual address, which is to use the dram page size mask to fetch the page offset from the virtual address, instead of the dmmu last hop shift. Signed-off-by: farah kassabri Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/mmu/mmu.c | 38 ++++++++++++++++-------- 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/drivers/misc/habanalabs/common/mmu/mmu.c b/drivers/misc/habanalabs/common/mmu/mmu.c index 71703a32350f6..93c9e5f587e1a 100644 --- a/drivers/misc/habanalabs/common/mmu/mmu.c +++ b/drivers/misc/habanalabs/common/mmu/mmu.c @@ -499,18 +499,32 @@ static void hl_mmu_pa_page_with_offset(struct hl_ctx *ctx, u64 virt_addr, else /* HL_VA_RANGE_TYPE_DRAM */ p = &prop->dmmu; - /* - * find the correct hop shift field in hl_mmu_properties structure - * in order to determine the right maks for the page offset. - */ - hop0_shift_off = offsetof(struct hl_mmu_properties, hop0_shift); - p = (char *)p + hop0_shift_off; - p = (char *)p + ((hops->used_hops - 1) * sizeof(u64)); - hop_shift = *(u64 *)p; - offset_mask = (1ull << hop_shift) - 1; - addr_mask = ~(offset_mask); - *phys_addr = (tmp_phys_addr & addr_mask) | - (virt_addr & offset_mask); + if ((hops->range_type == HL_VA_RANGE_TYPE_DRAM) && + !is_power_of_2(prop->dram_page_size)) { + u32 bit; + u64 page_offset_mask; + u64 phys_addr_mask; + + bit = __ffs64((u64)prop->dram_page_size); + page_offset_mask = ((1ull << bit) - 1); + phys_addr_mask = ~page_offset_mask; + *phys_addr = (tmp_phys_addr & phys_addr_mask) | + (virt_addr & page_offset_mask); + } else { + /* + * find the correct hop shift field in hl_mmu_properties + * structure in order to determine the right masks + * for the page offset. + */ + hop0_shift_off = offsetof(struct hl_mmu_properties, hop0_shift); + p = (char *)p + hop0_shift_off; + p = (char *)p + ((hops->used_hops - 1) * sizeof(u64)); + hop_shift = *(u64 *)p; + offset_mask = (1ull << hop_shift) - 1; + addr_mask = ~(offset_mask); + *phys_addr = (tmp_phys_addr & addr_mask) | + (virt_addr & offset_mask); + } } int hl_mmu_va_to_pa(struct hl_ctx *ctx, u64 virt_addr, u64 *phys_addr) -- GitLab From 51f24030358bdeeb9e75a38618dd029c5a53beeb Mon Sep 17 00:00:00 2001 From: Shuo Liu Date: Sun, 21 Feb 2021 21:43:38 +0800 Subject: [PATCH 425/839] cpu/hotplug: Fix build error of using {add,remove}_cpu() with !CONFIG_SMP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 279dcf693ac7 ("virt: acrn: Introduce an interface for Service VM to control vCPU") introduced {add,remove}_cpu() usage and it hit below error with !CONFIG_SMP: ../drivers/virt/acrn/hsm.c: In function ‘remove_cpu_store’: ../drivers/virt/acrn/hsm.c:389:3: error: implicit declaration of function ‘remove_cpu’; [-Werror=implicit-function-declaration] remove_cpu(cpu); ../drivers/virt/acrn/hsm.c:402:2: error: implicit declaration of function ‘add_cpu’; [-Werror=implicit-function-declaration] add_cpu(cpu); Add add_cpu() function prototypes with !CONFIG_SMP and remove_cpu() with !CONFIG_HOTPLUG_CPU for such usage. Fixes: 279dcf693ac7 ("virt: acrn: Introduce an interface for Service VM to control vCPU") Cc: Stephen Rothwell Cc: Thomas Gleixner Cc: Greg Kroah-Hartman Cc: Qais Yousef Reported-by: Randy Dunlap Reviewed-by: Qais Yousef Acked-by: Randy Dunlap # build-tested Signed-off-by: Shuo Liu Link: https://lore.kernel.org/r/20210221134339.57851-1-shuo.a.liu@intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/cpu.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 3aaa0687e8df6..94a578a962022 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -108,6 +108,8 @@ static inline void cpu_maps_update_done(void) { } +static inline int add_cpu(unsigned int cpu) { return 0;} + #endif /* CONFIG_SMP */ extern struct bus_type cpu_subsys; @@ -137,6 +139,7 @@ static inline int cpus_read_trylock(void) { return true; } static inline void lockdep_assert_cpus_held(void) { } static inline void cpu_hotplug_disable(void) { } static inline void cpu_hotplug_enable(void) { } +static inline int remove_cpu(unsigned int cpu) { return -EPERM; } static inline void smp_shutdown_nonboot_cpus(unsigned int primary_cpu) { } #endif /* !CONFIG_HOTPLUG_CPU */ -- GitLab From e54b78886949e16301e8ac3cc4b2b43969bfe5fa Mon Sep 17 00:00:00 2001 From: Shuo Liu Date: Sun, 21 Feb 2021 21:43:39 +0800 Subject: [PATCH 426/839] virt: acrn: Make remove_cpu sysfs invisible with !CONFIG_HOTPLUG_CPU Without cpu hotplug support, vCPU cannot be removed from a Service VM. Don't expose remove_cpu sysfs when CONFIG_HOTPLUG_CPU disabled. Cc: Stephen Rothwell Cc: Thomas Gleixner Cc: Greg Kroah-Hartman Cc: Qais Yousef Acked-by: Randy Dunlap # build-tested Signed-off-by: Shuo Liu Link: https://lore.kernel.org/r/20210221134339.57851-2-shuo.a.liu@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/virt/acrn/hsm.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/virt/acrn/hsm.c b/drivers/virt/acrn/hsm.c index 1f6b7c54a1a48..6996ea6219e53 100644 --- a/drivers/virt/acrn/hsm.c +++ b/drivers/virt/acrn/hsm.c @@ -404,6 +404,14 @@ fail_remove: } static DEVICE_ATTR_WO(remove_cpu); +static umode_t acrn_attr_visible(struct kobject *kobj, struct attribute *a, int n) +{ + if (a == &dev_attr_remove_cpu.attr) + return IS_ENABLED(CONFIG_HOTPLUG_CPU) ? a->mode : 0; + + return a->mode; +} + static struct attribute *acrn_attrs[] = { &dev_attr_remove_cpu.attr, NULL @@ -411,6 +419,7 @@ static struct attribute *acrn_attrs[] = { static struct attribute_group acrn_attr_group = { .attrs = acrn_attrs, + .is_visible = acrn_attr_visible, }; static const struct attribute_group *acrn_attr_groups[] = { -- GitLab From dcf9625f2adf33cf3ea14c72b436b7c212807e51 Mon Sep 17 00:00:00 2001 From: Yejune Deng Date: Sun, 21 Feb 2021 21:33:06 +0800 Subject: [PATCH 427/839] virt: acrn: Use vfs_poll() instead of f_op->poll() Use a more advanced function vfs_poll() in acrn_irqfd_assign(). At the same time, modify the definition of events. Signed-off-by: Yejune Deng Signed-off-by: Shuo Liu Link: https://lore.kernel.org/r/20210221133306.33530-1-shuo.a.liu@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/virt/acrn/irqfd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/virt/acrn/irqfd.c b/drivers/virt/acrn/irqfd.c index a8766d528e292..98d6e9b18f9ef 100644 --- a/drivers/virt/acrn/irqfd.c +++ b/drivers/virt/acrn/irqfd.c @@ -112,7 +112,7 @@ static int acrn_irqfd_assign(struct acrn_vm *vm, struct acrn_irqfd *args) { struct eventfd_ctx *eventfd = NULL; struct hsm_irqfd *irqfd, *tmp; - unsigned int events; + __poll_t events; struct fd f; int ret = 0; @@ -158,7 +158,7 @@ static int acrn_irqfd_assign(struct acrn_vm *vm, struct acrn_irqfd *args) mutex_unlock(&vm->irqfds_lock); /* Check the pending event in this stage */ - events = f.file->f_op->poll(f.file, &irqfd->pt); + events = vfs_poll(f.file, &irqfd->pt); if (events & POLLIN) acrn_irqfd_inject(irqfd); -- GitLab From 7c36194558cf49a86a53b5f60db8046c5e3013ae Mon Sep 17 00:00:00 2001 From: Julian Braha Date: Mon, 22 Feb 2021 13:06:07 -0500 Subject: [PATCH 428/839] staging: rtl8192e: fix kconfig dependency on CRYPTO When RTLLIB_CRYPTO_TKIP is enabled and CRYPTO is disabled, Kbuild gives the following warning: WARNING: unmet direct dependencies detected for CRYPTO_MICHAEL_MIC Depends on [n]: CRYPTO [=n] Selected by [m]: - RTLLIB_CRYPTO_TKIP [=m] && STAGING [=y] && RTLLIB [=m] WARNING: unmet direct dependencies detected for CRYPTO_LIB_ARC4 Depends on [n]: CRYPTO [=n] Selected by [m]: - RTLLIB_CRYPTO_TKIP [=m] && STAGING [=y] && RTLLIB [=m] - RTLLIB_CRYPTO_WEP [=m] && STAGING [=y] && RTLLIB [=m] This is because RTLLIB_CRYPTO_TKIP selects CRYPTO_MICHAEL_MIC and CRYPTO_LIB_ARC4, without depending on or selecting CRYPTO, despite those config options being subordinate to CRYPTO. Acked-by: Randy Dunlap Signed-off-by: Julian Braha Link: https://lore.kernel.org/r/20210222180607.399753-1-julianbraha@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/rtl8192e/Kconfig b/drivers/staging/rtl8192e/Kconfig index 963a2ffbc1fb0..39f5a6a7346a5 100644 --- a/drivers/staging/rtl8192e/Kconfig +++ b/drivers/staging/rtl8192e/Kconfig @@ -27,6 +27,7 @@ config RTLLIB_CRYPTO_CCMP config RTLLIB_CRYPTO_TKIP tristate "Support for rtllib TKIP crypto" depends on RTLLIB + select CRYPTO select CRYPTO_LIB_ARC4 select CRYPTO_MICHAEL_MIC default y -- GitLab From d660f4f42ccea50262c6ee90c8e7ad19a69fb225 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 24 Feb 2021 11:45:59 +0300 Subject: [PATCH 429/839] staging: rtl8712: unterminated string leads to read overflow The memdup_user() function does not necessarily return a NUL terminated string so this can lead to a read overflow. Switch from memdup_user() to strndup_user() to fix this bug. Fixes: c6dc001f2add ("staging: r8712u: Merging Realtek's latest (v2.6.6). Various fixes.") Cc: stable Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/YDYSR+1rj26NRhvb@mwanda Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8712/rtl871x_ioctl_linux.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/rtl8712/rtl871x_ioctl_linux.c b/drivers/staging/rtl8712/rtl871x_ioctl_linux.c index 81de5a9e6b679..60dd798a6e515 100644 --- a/drivers/staging/rtl8712/rtl871x_ioctl_linux.c +++ b/drivers/staging/rtl8712/rtl871x_ioctl_linux.c @@ -924,7 +924,7 @@ static int r871x_wx_set_priv(struct net_device *dev, struct iw_point *dwrq = (struct iw_point *)awrq; len = dwrq->length; - ext = memdup_user(dwrq->pointer, len); + ext = strndup_user(dwrq->pointer, len); if (IS_ERR(ext)) return PTR_ERR(ext); -- GitLab From 8687bf9ef9551bcf93897e33364d121667b1aadf Mon Sep 17 00:00:00 2001 From: Lee Gibson Date: Fri, 26 Feb 2021 14:51:57 +0000 Subject: [PATCH 430/839] staging: rtl8192e: Fix possible buffer overflow in _rtl92e_wx_set_scan Function _rtl92e_wx_set_scan calls memcpy without checking the length. A user could control that length and trigger a buffer overflow. Fix by checking the length is within the maximum allowed size. Reviewed-by: Dan Carpenter Signed-off-by: Lee Gibson Cc: stable Link: https://lore.kernel.org/r/20210226145157.424065-1-leegib@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtl8192e/rtl_wx.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c b/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c index 16bcee13f64b5..407effde5e71a 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c @@ -406,9 +406,10 @@ static int _rtl92e_wx_set_scan(struct net_device *dev, struct iw_scan_req *req = (struct iw_scan_req *)b; if (req->essid_len) { - ieee->current_network.ssid_len = req->essid_len; - memcpy(ieee->current_network.ssid, req->essid, - req->essid_len); + int len = min_t(int, req->essid_len, IW_ESSID_MAX_SIZE); + + ieee->current_network.ssid_len = len; + memcpy(ieee->current_network.ssid, req->essid, len); } } -- GitLab From b93c1e3981af19527beee1c10a2bef67a228c48c Mon Sep 17 00:00:00 2001 From: Lee Gibson Date: Mon, 1 Mar 2021 13:26:48 +0000 Subject: [PATCH 431/839] staging: rtl8712: Fix possible buffer overflow in r8712_sitesurvey_cmd Function r8712_sitesurvey_cmd calls memcpy without checking the length. A user could control that length and trigger a buffer overflow. Fix by checking the length is within the maximum allowed size. Signed-off-by: Lee Gibson Link: https://lore.kernel.org/r/20210301132648.420296-1-leegib@gmail.com Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8712/rtl871x_cmd.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/staging/rtl8712/rtl871x_cmd.c b/drivers/staging/rtl8712/rtl871x_cmd.c index 18116469bd316..75716f59044d9 100644 --- a/drivers/staging/rtl8712/rtl871x_cmd.c +++ b/drivers/staging/rtl8712/rtl871x_cmd.c @@ -192,8 +192,10 @@ u8 r8712_sitesurvey_cmd(struct _adapter *padapter, psurveyPara->ss_ssidlen = 0; memset(psurveyPara->ss_ssid, 0, IW_ESSID_MAX_SIZE + 1); if (pssid && pssid->SsidLength) { - memcpy(psurveyPara->ss_ssid, pssid->Ssid, pssid->SsidLength); - psurveyPara->ss_ssidlen = cpu_to_le32(pssid->SsidLength); + int len = min_t(int, pssid->SsidLength, IW_ESSID_MAX_SIZE); + + memcpy(psurveyPara->ss_ssid, pssid->Ssid, len); + psurveyPara->ss_ssidlen = cpu_to_le32(len); } set_fwstate(pmlmepriv, _FW_UNDER_SURVEY); r8712_enqueue_cmd(pcmdpriv, ph2c); -- GitLab From e163b9823a0b08c3bb8dc4f5b4b5c221c24ec3e5 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 2 Mar 2021 14:19:39 +0300 Subject: [PATCH 432/839] staging: ks7010: prevent buffer overflow in ks_wlan_set_scan() The user can specify a "req->essid_len" of up to 255 but if it's over IW_ESSID_MAX_SIZE (32) that can lead to memory corruption. Fixes: 13a9930d15b4 ("staging: ks7010: add driver from Nanonote extra-repository") Signed-off-by: Dan Carpenter Cc: stable Link: https://lore.kernel.org/r/YD4fS8+HmM/Qmrw6@mwanda Signed-off-by: Greg Kroah-Hartman --- drivers/staging/ks7010/ks_wlan_net.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/staging/ks7010/ks_wlan_net.c b/drivers/staging/ks7010/ks_wlan_net.c index dc09cc6e1c478..09e7b4cd0138c 100644 --- a/drivers/staging/ks7010/ks_wlan_net.c +++ b/drivers/staging/ks7010/ks_wlan_net.c @@ -1120,6 +1120,7 @@ static int ks_wlan_set_scan(struct net_device *dev, { struct ks_wlan_private *priv = netdev_priv(dev); struct iw_scan_req *req = NULL; + int len; if (priv->sleep_mode == SLP_SLEEP) return -EPERM; @@ -1129,8 +1130,9 @@ static int ks_wlan_set_scan(struct net_device *dev, if (wrqu->data.length == sizeof(struct iw_scan_req) && wrqu->data.flags & IW_SCAN_THIS_ESSID) { req = (struct iw_scan_req *)extra; - priv->scan_ssid_len = req->essid_len; - memcpy(priv->scan_ssid, req->essid, priv->scan_ssid_len); + len = min_t(int, req->essid_len, IW_ESSID_MAX_SIZE); + priv->scan_ssid_len = len; + memcpy(priv->scan_ssid, req->essid, len); } else { priv->scan_ssid_len = 0; } -- GitLab From 25317f428a78fde71b2bf3f24d05850f08a73a52 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Tue, 23 Feb 2021 14:30:42 +0000 Subject: [PATCH 433/839] staging: comedi: addi_apci_1032: Fix endian problem for COS sample The Change-Of-State (COS) subdevice supports Comedi asynchronous commands to read 16-bit change-of-state values. However, the interrupt handler is calling `comedi_buf_write_samples()` with the address of a 32-bit integer `&s->state`. On bigendian architectures, it will copy 2 bytes from the wrong end of the 32-bit integer. Fix it by transferring the value via a 16-bit integer. Fixes: 6bb45f2b0c86 ("staging: comedi: addi_apci_1032: use comedi_buf_write_samples()") Cc: # 3.19+ Signed-off-by: Ian Abbott Link: https://lore.kernel.org/r/20210223143055.257402-2-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/addi_apci_1032.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/staging/comedi/drivers/addi_apci_1032.c b/drivers/staging/comedi/drivers/addi_apci_1032.c index 35b75f0c9200b..81a246fbcc01f 100644 --- a/drivers/staging/comedi/drivers/addi_apci_1032.c +++ b/drivers/staging/comedi/drivers/addi_apci_1032.c @@ -260,6 +260,7 @@ static irqreturn_t apci1032_interrupt(int irq, void *d) struct apci1032_private *devpriv = dev->private; struct comedi_subdevice *s = dev->read_subdev; unsigned int ctrl; + unsigned short val; /* check interrupt is from this device */ if ((inl(devpriv->amcc_iobase + AMCC_OP_REG_INTCSR) & @@ -275,7 +276,8 @@ static irqreturn_t apci1032_interrupt(int irq, void *d) outl(ctrl & ~APCI1032_CTRL_INT_ENA, dev->iobase + APCI1032_CTRL_REG); s->state = inl(dev->iobase + APCI1032_STATUS_REG) & 0xffff; - comedi_buf_write_samples(s, &s->state, 1); + val = s->state; + comedi_buf_write_samples(s, &val, 1); comedi_handle_events(dev, s); /* enable the interrupt */ -- GitLab From ac0bbf55ed3be75fde1f8907e91ecd2fd589bde3 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Tue, 23 Feb 2021 14:30:43 +0000 Subject: [PATCH 434/839] staging: comedi: addi_apci_1500: Fix endian problem for command sample The digital input subdevice supports Comedi asynchronous commands that read interrupt status information. This uses 16-bit Comedi samples (of which only the bottom 8 bits contain status information). However, the interrupt handler is calling `comedi_buf_write_samples()` with the address of a 32-bit variable `unsigned int status`. On a bigendian machine, this will copy 2 bytes from the wrong end of the variable. Fix it by changing the type of the variable to `unsigned short`. Fixes: a8c66b684efa ("staging: comedi: addi_apci_1500: rewrite the subdevice support functions") Cc: #4.0+ Signed-off-by: Ian Abbott Link: https://lore.kernel.org/r/20210223143055.257402-3-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman --- .../staging/comedi/drivers/addi_apci_1500.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/staging/comedi/drivers/addi_apci_1500.c b/drivers/staging/comedi/drivers/addi_apci_1500.c index 11efb21555e39..b04c15dcfb575 100644 --- a/drivers/staging/comedi/drivers/addi_apci_1500.c +++ b/drivers/staging/comedi/drivers/addi_apci_1500.c @@ -208,7 +208,7 @@ static irqreturn_t apci1500_interrupt(int irq, void *d) struct comedi_device *dev = d; struct apci1500_private *devpriv = dev->private; struct comedi_subdevice *s = dev->read_subdev; - unsigned int status = 0; + unsigned short status = 0; unsigned int val; val = inl(devpriv->amcc + AMCC_OP_REG_INTCSR); @@ -238,14 +238,14 @@ static irqreturn_t apci1500_interrupt(int irq, void *d) * * Mask Meaning * ---------- ------------------------------------------ - * 0x00000001 Event 1 has occurred - * 0x00000010 Event 2 has occurred - * 0x00000100 Counter/timer 1 has run down (not implemented) - * 0x00001000 Counter/timer 2 has run down (not implemented) - * 0x00010000 Counter 3 has run down (not implemented) - * 0x00100000 Watchdog has run down (not implemented) - * 0x01000000 Voltage error - * 0x10000000 Short-circuit error + * 0b00000001 Event 1 has occurred + * 0b00000010 Event 2 has occurred + * 0b00000100 Counter/timer 1 has run down (not implemented) + * 0b00001000 Counter/timer 2 has run down (not implemented) + * 0b00010000 Counter 3 has run down (not implemented) + * 0b00100000 Watchdog has run down (not implemented) + * 0b01000000 Voltage error + * 0b10000000 Short-circuit error */ comedi_buf_write_samples(s, &status, 1); comedi_handle_events(dev, s); -- GitLab From b2e78630f733a76508b53ba680528ca39c890e82 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Tue, 23 Feb 2021 14:30:44 +0000 Subject: [PATCH 435/839] staging: comedi: adv_pci1710: Fix endian problem for AI command data The analog input subdevice supports Comedi asynchronous commands that use Comedi's 16-bit sample format. However, the calls to `comedi_buf_write_samples()` are passing the address of a 32-bit integer variable. On bigendian machines, this will copy 2 bytes from the wrong end of the 32-bit value. Fix it by changing the type of the variables holding the sample value to `unsigned short`. The type of the `val` parameter of `pci1710_ai_read_sample()` is changed to `unsigned short *` accordingly. The type of the `val` variable in `pci1710_ai_insn_read()` is also changed to `unsigned short` since its address is passed to `pci1710_ai_read_sample()`. Fixes: a9c3a015c12f ("staging: comedi: adv_pci1710: use comedi_buf_write_samples()") Cc: # 4.0+ Signed-off-by: Ian Abbott Link: https://lore.kernel.org/r/20210223143055.257402-4-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/adv_pci1710.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/staging/comedi/drivers/adv_pci1710.c b/drivers/staging/comedi/drivers/adv_pci1710.c index 692893c7e5c3d..090607760be6b 100644 --- a/drivers/staging/comedi/drivers/adv_pci1710.c +++ b/drivers/staging/comedi/drivers/adv_pci1710.c @@ -300,11 +300,11 @@ static int pci1710_ai_eoc(struct comedi_device *dev, static int pci1710_ai_read_sample(struct comedi_device *dev, struct comedi_subdevice *s, unsigned int cur_chan, - unsigned int *val) + unsigned short *val) { const struct boardtype *board = dev->board_ptr; struct pci1710_private *devpriv = dev->private; - unsigned int sample; + unsigned short sample; unsigned int chan; sample = inw(dev->iobase + PCI171X_AD_DATA_REG); @@ -345,7 +345,7 @@ static int pci1710_ai_insn_read(struct comedi_device *dev, pci1710_ai_setup_chanlist(dev, s, &insn->chanspec, 1, 1); for (i = 0; i < insn->n; i++) { - unsigned int val; + unsigned short val; /* start conversion */ outw(0, dev->iobase + PCI171X_SOFTTRG_REG); @@ -395,7 +395,7 @@ static void pci1710_handle_every_sample(struct comedi_device *dev, { struct comedi_cmd *cmd = &s->async->cmd; unsigned int status; - unsigned int val; + unsigned short val; int ret; status = inw(dev->iobase + PCI171X_STATUS_REG); @@ -455,7 +455,7 @@ static void pci1710_handle_fifo(struct comedi_device *dev, } for (i = 0; i < devpriv->max_samples; i++) { - unsigned int val; + unsigned short val; int ret; ret = pci1710_ai_read_sample(dev, s, s->async->cur_chan, &val); -- GitLab From 1c0f20b78781b9ca50dc3ecfd396d0db5b141890 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Tue, 23 Feb 2021 14:30:45 +0000 Subject: [PATCH 436/839] staging: comedi: das6402: Fix endian problem for AI command data The analog input subdevice supports Comedi asynchronous commands that use Comedi's 16-bit sample format. However, the call to `comedi_buf_write_samples()` is passing the address of a 32-bit integer variable. On bigendian machines, this will copy 2 bytes from the wrong end of the 32-bit value. Fix it by changing the type of the variable holding the sample value to `unsigned short`. Fixes: d1d24cb65ee3 ("staging: comedi: das6402: read analog input samples in interrupt handler") Cc: # 3.19+ Signed-off-by: Ian Abbott Link: https://lore.kernel.org/r/20210223143055.257402-5-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/das6402.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/comedi/drivers/das6402.c b/drivers/staging/comedi/drivers/das6402.c index 04e224f8b7793..96f4107b8054d 100644 --- a/drivers/staging/comedi/drivers/das6402.c +++ b/drivers/staging/comedi/drivers/das6402.c @@ -186,7 +186,7 @@ static irqreturn_t das6402_interrupt(int irq, void *d) if (status & DAS6402_STATUS_FFULL) { async->events |= COMEDI_CB_OVERFLOW; } else if (status & DAS6402_STATUS_FFNE) { - unsigned int val; + unsigned short val; val = das6402_ai_read_sample(dev, s); comedi_buf_write_samples(s, &val, 1); -- GitLab From 459b1e8c8fe97fcba0bd1b623471713dce2c5eaf Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Tue, 23 Feb 2021 14:30:46 +0000 Subject: [PATCH 437/839] staging: comedi: das800: Fix endian problem for AI command data The analog input subdevice supports Comedi asynchronous commands that use Comedi's 16-bit sample format. However, the call to `comedi_buf_write_samples()` is passing the address of a 32-bit integer variable. On bigendian machines, this will copy 2 bytes from the wrong end of the 32-bit value. Fix it by changing the type of the variable holding the sample value to `unsigned short`. Fixes: ad9eb43c93d8 ("staging: comedi: das800: use comedi_buf_write_samples()") Cc: # 3.19+ Signed-off-by: Ian Abbott Link: https://lore.kernel.org/r/20210223143055.257402-6-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/das800.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/comedi/drivers/das800.c b/drivers/staging/comedi/drivers/das800.c index 4ea100ff6930f..2881808d6606c 100644 --- a/drivers/staging/comedi/drivers/das800.c +++ b/drivers/staging/comedi/drivers/das800.c @@ -427,7 +427,7 @@ static irqreturn_t das800_interrupt(int irq, void *d) struct comedi_cmd *cmd; unsigned long irq_flags; unsigned int status; - unsigned int val; + unsigned short val; bool fifo_empty; bool fifo_overflow; int i; -- GitLab From 54999c0d94b3c26625f896f8e3460bc029821578 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Tue, 23 Feb 2021 14:30:47 +0000 Subject: [PATCH 438/839] staging: comedi: dmm32at: Fix endian problem for AI command data The analog input subdevice supports Comedi asynchronous commands that use Comedi's 16-bit sample format. However, the call to `comedi_buf_write_samples()` is passing the address of a 32-bit integer variable. On bigendian machines, this will copy 2 bytes from the wrong end of the 32-bit value. Fix it by changing the type of the variable holding the sample value to `unsigned short`. [Note: the bug was introduced in commit 1700529b24cc ("staging: comedi: dmm32at: use comedi_buf_write_samples()") but the patch applies better to the later (but in the same kernel release) commit 0c0eadadcbe6e ("staging: comedi: dmm32at: introduce dmm32_ai_get_sample()").] Fixes: 0c0eadadcbe6e ("staging: comedi: dmm32at: introduce dmm32_ai_get_sample()") Cc: # 3.19+ Signed-off-by: Ian Abbott Link: https://lore.kernel.org/r/20210223143055.257402-7-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/dmm32at.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/comedi/drivers/dmm32at.c b/drivers/staging/comedi/drivers/dmm32at.c index 17e6018918bbf..56682f01242fd 100644 --- a/drivers/staging/comedi/drivers/dmm32at.c +++ b/drivers/staging/comedi/drivers/dmm32at.c @@ -404,7 +404,7 @@ static irqreturn_t dmm32at_isr(int irq, void *d) { struct comedi_device *dev = d; unsigned char intstat; - unsigned int val; + unsigned short val; int i; if (!dev->attached) { -- GitLab From b39dfcced399d31e7c4b7341693b18e01c8f655e Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Tue, 23 Feb 2021 14:30:48 +0000 Subject: [PATCH 439/839] staging: comedi: me4000: Fix endian problem for AI command data The analog input subdevice supports Comedi asynchronous commands that use Comedi's 16-bit sample format. However, the calls to `comedi_buf_write_samples()` are passing the address of a 32-bit integer variable. On bigendian machines, this will copy 2 bytes from the wrong end of the 32-bit value. Fix it by changing the type of the variable holding the sample value to `unsigned short`. Fixes: de88924f67d1 ("staging: comedi: me4000: use comedi_buf_write_samples()") Cc: # 3.19+ Signed-off-by: Ian Abbott Link: https://lore.kernel.org/r/20210223143055.257402-8-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/me4000.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/comedi/drivers/me4000.c b/drivers/staging/comedi/drivers/me4000.c index 726e40dc17b62..0d3d4cafce2e8 100644 --- a/drivers/staging/comedi/drivers/me4000.c +++ b/drivers/staging/comedi/drivers/me4000.c @@ -924,7 +924,7 @@ static irqreturn_t me4000_ai_isr(int irq, void *dev_id) struct comedi_subdevice *s = dev->read_subdev; int i; int c = 0; - unsigned int lval; + unsigned short lval; if (!dev->attached) return IRQ_NONE; -- GitLab From a084303a645896e834883f2c5170d044410dfdb3 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Tue, 23 Feb 2021 14:30:49 +0000 Subject: [PATCH 440/839] staging: comedi: pcl711: Fix endian problem for AI command data The analog input subdevice supports Comedi asynchronous commands that use Comedi's 16-bit sample format. However, the call to `comedi_buf_write_samples()` is passing the address of a 32-bit integer variable. On bigendian machines, this will copy 2 bytes from the wrong end of the 32-bit value. Fix it by changing the type of the variable holding the sample value to `unsigned short`. Fixes: 1f44c034de2e ("staging: comedi: pcl711: use comedi_buf_write_samples()") Cc: # 3.19+ Signed-off-by: Ian Abbott Link: https://lore.kernel.org/r/20210223143055.257402-9-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/pcl711.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/comedi/drivers/pcl711.c b/drivers/staging/comedi/drivers/pcl711.c index 2dbf69e309650..bd6f42fe9e3ca 100644 --- a/drivers/staging/comedi/drivers/pcl711.c +++ b/drivers/staging/comedi/drivers/pcl711.c @@ -184,7 +184,7 @@ static irqreturn_t pcl711_interrupt(int irq, void *d) struct comedi_device *dev = d; struct comedi_subdevice *s = dev->read_subdev; struct comedi_cmd *cmd = &s->async->cmd; - unsigned int data; + unsigned short data; if (!dev->attached) { dev_err(dev->class_dev, "spurious interrupt\n"); -- GitLab From 148e34fd33d53740642db523724226de14ee5281 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Tue, 23 Feb 2021 14:30:50 +0000 Subject: [PATCH 441/839] staging: comedi: pcl818: Fix endian problem for AI command data The analog input subdevice supports Comedi asynchronous commands that use Comedi's 16-bit sample format. However, the call to `comedi_buf_write_samples()` is passing the address of a 32-bit integer parameter. On bigendian machines, this will copy 2 bytes from the wrong end of the 32-bit value. Fix it by changing the type of the parameter holding the sample value to `unsigned short`. [Note: the bug was introduced in commit edf4537bcbf5 ("staging: comedi: pcl818: use comedi_buf_write_samples()") but the patch applies better to commit d615416de615 ("staging: comedi: pcl818: introduce pcl818_ai_write_sample()").] Fixes: d615416de615 ("staging: comedi: pcl818: introduce pcl818_ai_write_sample()") Cc: # 4.0+ Signed-off-by: Ian Abbott Link: https://lore.kernel.org/r/20210223143055.257402-10-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/pcl818.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/comedi/drivers/pcl818.c b/drivers/staging/comedi/drivers/pcl818.c index 63e3011158f23..f4b4a686c710f 100644 --- a/drivers/staging/comedi/drivers/pcl818.c +++ b/drivers/staging/comedi/drivers/pcl818.c @@ -423,7 +423,7 @@ static int pcl818_ai_eoc(struct comedi_device *dev, static bool pcl818_ai_write_sample(struct comedi_device *dev, struct comedi_subdevice *s, - unsigned int chan, unsigned int val) + unsigned int chan, unsigned short val) { struct pcl818_private *devpriv = dev->private; struct comedi_cmd *cmd = &s->async->cmd; -- GitLab From 8536749d4952649ada4a88396079e6ec69c1fc9f Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Tue, 23 Feb 2021 14:30:51 +0000 Subject: [PATCH 442/839] staging: comedi: amplc_pc236_common: Use 16-bit 0 for interrupt data The Amplicon PC36AT/PCI236 common driver has an "interrupt" subdevice that supports Comedi asynchronous commands, placing a value in the Comedi buffer for each interrupt. The subdevice uses Comedi's 16-bit sample format but the interrupt handler is calling `comedi_buf_write_samples()` with the address of a 32-bit integer `&s->state`. On bigendian machines, this will copy 2 bytes from the wrong end of the 32-bit integer. This isn't really a problem since `s->state` will always be 0 for this subdevice, but clean it up by using a 16-bit variable initialized to 0 to pass the value. Signed-off-by: Ian Abbott Link: https://lore.kernel.org/r/20210223143055.257402-11-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/amplc_pc236_common.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/staging/comedi/drivers/amplc_pc236_common.c b/drivers/staging/comedi/drivers/amplc_pc236_common.c index 043752663188f..981d281e87a13 100644 --- a/drivers/staging/comedi/drivers/amplc_pc236_common.c +++ b/drivers/staging/comedi/drivers/amplc_pc236_common.c @@ -126,7 +126,9 @@ static irqreturn_t pc236_interrupt(int irq, void *d) handled = pc236_intr_check(dev); if (dev->attached && handled) { - comedi_buf_write_samples(s, &s->state, 1); + unsigned short val = 0; + + comedi_buf_write_samples(s, &val, 1); comedi_handle_events(dev, s); } return IRQ_RETVAL(handled); -- GitLab From a1acdbc55403d94ac8dddcb0311360bc6dae659f Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Tue, 23 Feb 2021 14:30:52 +0000 Subject: [PATCH 443/839] staging: comedi: comedi_parport: Use 16-bit 0 for interrupt data The comedi_parport driver has an "interrupt" subdevice that supports Comedi asynchronous commands, placing a value in the Comedi buffer for each interrupt. The subdevice uses Comedi's 16-bit sample format but the interrupt handler is calling `comedi_buf_write_samples()` with the address of a 32-bit integer `&s->state`. On bigendian machines, this will copy 2 bytes from the wrong end of the 32-bit integer. This isn't really a problem since `s->state` will always be 0 for this subdevice, but clean it up by using a 16-bit variable initialized to 0 to pass the value. Signed-off-by: Ian Abbott Link: https://lore.kernel.org/r/20210223143055.257402-12-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/comedi_parport.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/staging/comedi/drivers/comedi_parport.c b/drivers/staging/comedi/drivers/comedi_parport.c index 9361b2dcf949d..5338b5eea4402 100644 --- a/drivers/staging/comedi/drivers/comedi_parport.c +++ b/drivers/staging/comedi/drivers/comedi_parport.c @@ -210,12 +210,13 @@ static irqreturn_t parport_interrupt(int irq, void *d) struct comedi_device *dev = d; struct comedi_subdevice *s = dev->read_subdev; unsigned int ctrl; + unsigned short val = 0; ctrl = inb(dev->iobase + PARPORT_CTRL_REG); if (!(ctrl & PARPORT_CTRL_IRQ_ENA)) return IRQ_NONE; - comedi_buf_write_samples(s, &s->state, 1); + comedi_buf_write_samples(s, &val, 1); comedi_handle_events(dev, s); return IRQ_HANDLED; -- GitLab From 33444638ae54fa2eb8e9269d70ce2d5bc8b52751 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Tue, 23 Feb 2021 14:30:53 +0000 Subject: [PATCH 444/839] staging: comedi: ni_6527: Use 16-bit 0 for interrupt data The ni_6527 driver has an "interrupt" subdevice that supports Comedi asynchronous commands, placing a value in the Comedi buffer for each interrupt. The subdevice uses Comedi's 16-bit sample format but the interrupt handler is calling `comedi_buf_write_samples()` with the address of a 32-bit integer `&s->state`. On bigendian machines, this will copy 2 bytes from the wrong end of the 32-bit integer. This isn't really a problem since `s->state` will always be 0 for this subdevice, but clean it up by using a 16-bit variable initialized to 0 to pass the value. Signed-off-by: Ian Abbott Link: https://lore.kernel.org/r/20210223143055.257402-13-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/ni_6527.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/staging/comedi/drivers/ni_6527.c b/drivers/staging/comedi/drivers/ni_6527.c index 99e744172f4d4..f1a45cf7342ae 100644 --- a/drivers/staging/comedi/drivers/ni_6527.c +++ b/drivers/staging/comedi/drivers/ni_6527.c @@ -195,7 +195,9 @@ static irqreturn_t ni6527_interrupt(int irq, void *d) return IRQ_NONE; if (status & NI6527_STATUS_EDGE) { - comedi_buf_write_samples(s, &s->state, 1); + unsigned short val = 0; + + comedi_buf_write_samples(s, &val, 1); comedi_handle_events(dev, s); } -- GitLab From 5ff1c08c28c331eee1ac6a02e8e3eabb329fa953 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Tue, 23 Feb 2021 14:30:54 +0000 Subject: [PATCH 445/839] staging: comedi: ni_65xx: Use 16-bit 0 for interrupt data The ni_65xx driver has an "interrupt" subdevice that supports Comedi asynchronous commands, placing a value in the Comedi buffer for each interrupt. The subdevice uses Comedi's 16-bit sample format but the interrupt handler is calling `comedi_buf_write_samples()` with the address of a 32-bit integer `&s->state`. On bigendian machines, this will copy 2 bytes from the wrong end of the 32-bit integer. This isn't really a problem since `s->state` will always be 0 for this subdevice, but clean it up by using a 16-bit variable initialized to 0 to pass the value. Signed-off-by: Ian Abbott Link: https://lore.kernel.org/r/20210223143055.257402-14-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/ni_65xx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/staging/comedi/drivers/ni_65xx.c b/drivers/staging/comedi/drivers/ni_65xx.c index eb3f9f7109da9..7cd8497420f22 100644 --- a/drivers/staging/comedi/drivers/ni_65xx.c +++ b/drivers/staging/comedi/drivers/ni_65xx.c @@ -472,6 +472,7 @@ static irqreturn_t ni_65xx_interrupt(int irq, void *d) struct comedi_device *dev = d; struct comedi_subdevice *s = dev->read_subdev; unsigned int status; + unsigned short val = 0; status = readb(dev->mmio + NI_65XX_STATUS_REG); if ((status & NI_65XX_STATUS_INT) == 0) @@ -482,7 +483,7 @@ static irqreturn_t ni_65xx_interrupt(int irq, void *d) writeb(NI_65XX_CLR_EDGE_INT | NI_65XX_CLR_OVERFLOW_INT, dev->mmio + NI_65XX_CLR_REG); - comedi_buf_write_samples(s, &s->state, 1); + comedi_buf_write_samples(s, &val, 1); comedi_handle_events(dev, s); return IRQ_HANDLED; -- GitLab From fd3ce6557da0e7557a451ee56fc8f5700d3bd08f Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Tue, 23 Feb 2021 14:30:55 +0000 Subject: [PATCH 446/839] staging: comedi: pcl726: Use 16-bit 0 for interrupt data The pcl726 driver has an "interrupt" subdevice that supports Comedi asynchronous commands, placing a value in the Comedi buffer for each interrupt. The subdevice uses Comedi's 16-bit sample format but the interrupt handler is calling `comedi_buf_write_samples()` with the address of a 32-bit integer `&s->state`. On bigendian machines, this will copy 2 bytes from the wrong end of the 32-bit integer. This isn't really a problem since `s->state` will always be 0 for this subdevice, but clean it up by using a 16-bit variable initialized to 0 to pass the value. Signed-off-by: Ian Abbott Link: https://lore.kernel.org/r/20210223143055.257402-15-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/pcl726.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/staging/comedi/drivers/pcl726.c b/drivers/staging/comedi/drivers/pcl726.c index 64eb649c98135..88f25d7e76f7e 100644 --- a/drivers/staging/comedi/drivers/pcl726.c +++ b/drivers/staging/comedi/drivers/pcl726.c @@ -220,9 +220,11 @@ static irqreturn_t pcl726_interrupt(int irq, void *d) struct pcl726_private *devpriv = dev->private; if (devpriv->cmd_running) { + unsigned short val = 0; + pcl726_intr_cancel(dev, s); - comedi_buf_write_samples(s, &s->state, 1); + comedi_buf_write_samples(s, &val, 1); comedi_handle_events(dev, s); } -- GitLab From 87107518d7a93fec6cdb2559588862afeee800fb Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 5 Mar 2021 11:12:49 +0300 Subject: [PATCH 447/839] staging: rtl8192u: fix ->ssid overflow in r8192_wx_set_scan() We need to cap len at IW_ESSID_MAX_SIZE (32) to avoid memory corruption. This can be controlled by the user via the ioctl. Fixes: 5f53d8ca3d5d ("Staging: add rtl8192SU wireless usb driver") Signed-off-by: Dan Carpenter Cc: stable Link: https://lore.kernel.org/r/YEHoAWMOSZBUw91F@mwanda Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192u/r8192U_wx.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/staging/rtl8192u/r8192U_wx.c b/drivers/staging/rtl8192u/r8192U_wx.c index d853586705fc9..77bf88696a844 100644 --- a/drivers/staging/rtl8192u/r8192U_wx.c +++ b/drivers/staging/rtl8192u/r8192U_wx.c @@ -331,8 +331,10 @@ static int r8192_wx_set_scan(struct net_device *dev, struct iw_request_info *a, struct iw_scan_req *req = (struct iw_scan_req *)b; if (req->essid_len) { - ieee->current_network.ssid_len = req->essid_len; - memcpy(ieee->current_network.ssid, req->essid, req->essid_len); + int len = min_t(int, req->essid_len, IW_ESSID_MAX_SIZE); + + ieee->current_network.ssid_len = len; + memcpy(ieee->current_network.ssid, req->essid, len); } } -- GitLab From d4ac640322b06095128a5c45ba4a1e80929fe7f3 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 5 Mar 2021 11:56:32 +0300 Subject: [PATCH 448/839] staging: rtl8188eu: fix potential memory corruption in rtw_check_beacon_data() The "ie_len" is a value in the 1-255 range that comes from the user. We have to cap it to ensure that it's not too large or it could lead to memory corruption. Fixes: 9a7fe54ddc3a ("staging: r8188eu: Add source files for new driver - part 1") Signed-off-by: Dan Carpenter Cc: stable Link: https://lore.kernel.org/r/YEHyQCrFZKTXyT7J@mwanda Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8188eu/core/rtw_ap.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/staging/rtl8188eu/core/rtw_ap.c b/drivers/staging/rtl8188eu/core/rtw_ap.c index fa1e34a0d4561..182bb944c9b3b 100644 --- a/drivers/staging/rtl8188eu/core/rtw_ap.c +++ b/drivers/staging/rtl8188eu/core/rtw_ap.c @@ -791,6 +791,7 @@ int rtw_check_beacon_data(struct adapter *padapter, u8 *pbuf, int len) p = rtw_get_ie(ie + _BEACON_IE_OFFSET_, WLAN_EID_SSID, &ie_len, pbss_network->ie_length - _BEACON_IE_OFFSET_); if (p && ie_len > 0) { + ie_len = min_t(int, ie_len, sizeof(pbss_network->ssid.ssid)); memset(&pbss_network->ssid, 0, sizeof(struct ndis_802_11_ssid)); memcpy(pbss_network->ssid.ssid, p + 2, ie_len); pbss_network->ssid.ssid_length = ie_len; @@ -811,6 +812,7 @@ int rtw_check_beacon_data(struct adapter *padapter, u8 *pbuf, int len) p = rtw_get_ie(ie + _BEACON_IE_OFFSET_, WLAN_EID_SUPP_RATES, &ie_len, pbss_network->ie_length - _BEACON_IE_OFFSET_); if (p) { + ie_len = min_t(int, ie_len, NDIS_802_11_LENGTH_RATES_EX); memcpy(supportRate, p + 2, ie_len); supportRateNum = ie_len; } @@ -819,6 +821,8 @@ int rtw_check_beacon_data(struct adapter *padapter, u8 *pbuf, int len) p = rtw_get_ie(ie + _BEACON_IE_OFFSET_, WLAN_EID_EXT_SUPP_RATES, &ie_len, pbss_network->ie_length - _BEACON_IE_OFFSET_); if (p) { + ie_len = min_t(int, ie_len, + NDIS_802_11_LENGTH_RATES_EX - supportRateNum); memcpy(supportRate + supportRateNum, p + 2, ie_len); supportRateNum += ie_len; } @@ -934,6 +938,7 @@ int rtw_check_beacon_data(struct adapter *padapter, u8 *pbuf, int len) pht_cap->mcs.rx_mask[0] = 0xff; pht_cap->mcs.rx_mask[1] = 0x0; + ie_len = min_t(int, ie_len, sizeof(pmlmepriv->htpriv.ht_cap)); memcpy(&pmlmepriv->htpriv.ht_cap, p + 2, ie_len); } -- GitLab From 74b6b20df8cfe90ada777d621b54c32e69e27cd7 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 5 Mar 2021 11:58:03 +0300 Subject: [PATCH 449/839] staging: rtl8188eu: prevent ->ssid overflow in rtw_wx_set_scan() This code has a check to prevent read overflow but it needs another check to prevent writing beyond the end of the ->ssid[] array. Fixes: a2c60d42d97c ("staging: r8188eu: Add files for new driver - part 16") Signed-off-by: Dan Carpenter Cc: stable Link: https://lore.kernel.org/r/YEHymwsnHewzoam7@mwanda Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8188eu/os_dep/ioctl_linux.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c index bf22f130d3e16..58954b88a817d 100644 --- a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c +++ b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c @@ -1133,9 +1133,11 @@ static int rtw_wx_set_scan(struct net_device *dev, struct iw_request_info *a, break; } sec_len = *(pos++); len -= 1; - if (sec_len > 0 && sec_len <= len) { + if (sec_len > 0 && + sec_len <= len && + sec_len <= 32) { ssid[ssid_index].ssid_length = sec_len; - memcpy(ssid[ssid_index].ssid, pos, ssid[ssid_index].ssid_length); + memcpy(ssid[ssid_index].ssid, pos, sec_len); ssid_index++; } pos += sec_len; -- GitLab From 16d7586dccf83785819f5b66f4d20fac9bfcd644 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 9 Mar 2021 15:51:56 +0100 Subject: [PATCH 450/839] Revert "staging: wfx: remove unused included header files" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 314fd52f01ead9528a5cda5a868425bb736d93a2. It turns .h files into non-stand-alone when building, which might cause problems in the long-run. Reported-by: Jérôme Pouiller Cc: Muhammad Usama Anjum Signed-off-by: Greg Kroah-Hartman --- drivers/staging/wfx/bh.c | 1 + drivers/staging/wfx/bh.h | 4 ++++ drivers/staging/wfx/bus.h | 3 +++ drivers/staging/wfx/bus_sdio.c | 6 ++++++ drivers/staging/wfx/bus_spi.c | 7 +++++++ drivers/staging/wfx/data_rx.c | 5 +++++ drivers/staging/wfx/data_tx.c | 5 +++++ drivers/staging/wfx/data_tx.h | 3 +++ drivers/staging/wfx/debug.c | 6 ++++++ drivers/staging/wfx/fwio.c | 2 ++ drivers/staging/wfx/hif_api_cmd.h | 4 ++++ drivers/staging/wfx/hif_api_general.h | 9 +++++++++ drivers/staging/wfx/hif_tx.c | 4 ++++ drivers/staging/wfx/hif_tx_mib.c | 5 +++++ drivers/staging/wfx/hwio.c | 3 +++ drivers/staging/wfx/hwio.h | 2 ++ drivers/staging/wfx/key.c | 2 ++ drivers/staging/wfx/key.h | 2 ++ drivers/staging/wfx/main.c | 7 +++++++ drivers/staging/wfx/main.h | 3 +++ drivers/staging/wfx/queue.c | 4 ++++ drivers/staging/wfx/queue.h | 3 +++ drivers/staging/wfx/scan.h | 2 ++ drivers/staging/wfx/sta.c | 6 ++++++ drivers/staging/wfx/sta.h | 2 ++ drivers/staging/wfx/traces.h | 3 +++ drivers/staging/wfx/wfx.h | 3 +++ 27 files changed, 106 insertions(+) diff --git a/drivers/staging/wfx/bh.c b/drivers/staging/wfx/bh.c index cd6bcfdfbe9a6..ed53d0b455927 100644 --- a/drivers/staging/wfx/bh.c +++ b/drivers/staging/wfx/bh.c @@ -5,6 +5,7 @@ * Copyright (c) 2017-2020, Silicon Laboratories, Inc. * Copyright (c) 2010, ST-Ericsson */ +#include #include #include "bh.h" diff --git a/drivers/staging/wfx/bh.h b/drivers/staging/wfx/bh.h index 92ef3298d4ac0..78c49329e22a6 100644 --- a/drivers/staging/wfx/bh.h +++ b/drivers/staging/wfx/bh.h @@ -8,6 +8,10 @@ #ifndef WFX_BH_H #define WFX_BH_H +#include +#include +#include + struct wfx_dev; struct wfx_hif { diff --git a/drivers/staging/wfx/bus.h b/drivers/staging/wfx/bus.h index ea39114853074..ca04b3da6204e 100644 --- a/drivers/staging/wfx/bus.h +++ b/drivers/staging/wfx/bus.h @@ -8,6 +8,9 @@ #ifndef WFX_BUS_H #define WFX_BUS_H +#include +#include + #define WFX_REG_CONFIG 0x0 #define WFX_REG_CONTROL 0x1 #define WFX_REG_IN_OUT_QUEUE 0x2 diff --git a/drivers/staging/wfx/bus_sdio.c b/drivers/staging/wfx/bus_sdio.c index 588edce448546..e06d7e1ebe9c3 100644 --- a/drivers/staging/wfx/bus_sdio.c +++ b/drivers/staging/wfx/bus_sdio.c @@ -5,13 +5,19 @@ * Copyright (c) 2017-2020, Silicon Laboratories, Inc. * Copyright (c) 2010, ST-Ericsson */ +#include #include #include #include +#include #include +#include #include "bus.h" #include "wfx.h" +#include "hwio.h" +#include "main.h" +#include "bh.h" static const struct wfx_platform_data wfx_sdio_pdata = { .file_fw = "wfm_wf200", diff --git a/drivers/staging/wfx/bus_spi.c b/drivers/staging/wfx/bus_spi.c index f89855abe9f8d..a99125d1a30d5 100644 --- a/drivers/staging/wfx/bus_spi.c +++ b/drivers/staging/wfx/bus_spi.c @@ -6,12 +6,19 @@ * Copyright (c) 2011, Sagrad Inc. * Copyright (c) 2010, ST-Ericsson */ +#include +#include +#include #include +#include #include #include #include "bus.h" #include "wfx.h" +#include "hwio.h" +#include "main.h" +#include "bh.h" #define SET_WRITE 0x7FFF /* usage: and operation */ #define SET_READ 0x8000 /* usage: or operation */ diff --git a/drivers/staging/wfx/data_rx.c b/drivers/staging/wfx/data_rx.c index 2cfa162792208..385f2d42a0e2d 100644 --- a/drivers/staging/wfx/data_rx.c +++ b/drivers/staging/wfx/data_rx.c @@ -5,8 +5,13 @@ * Copyright (c) 2017-2020, Silicon Laboratories, Inc. * Copyright (c) 2010, ST-Ericsson */ +#include +#include + #include "data_rx.h" #include "wfx.h" +#include "bh.h" +#include "sta.h" static void wfx_rx_handle_ba(struct wfx_vif *wvif, struct ieee80211_mgmt *mgmt) { diff --git a/drivers/staging/wfx/data_tx.c b/drivers/staging/wfx/data_tx.c index 76f26e3c4381c..77fb104efdec1 100644 --- a/drivers/staging/wfx/data_tx.c +++ b/drivers/staging/wfx/data_tx.c @@ -6,9 +6,14 @@ * Copyright (c) 2010, ST-Ericsson */ #include +#include +#include "data_tx.h" #include "wfx.h" +#include "bh.h" #include "sta.h" +#include "queue.h" +#include "debug.h" #include "traces.h" #include "hif_tx_mib.h" diff --git a/drivers/staging/wfx/data_tx.h b/drivers/staging/wfx/data_tx.h index 6b3020097efaa..401363d6b563a 100644 --- a/drivers/staging/wfx/data_tx.h +++ b/drivers/staging/wfx/data_tx.h @@ -8,6 +8,9 @@ #ifndef WFX_DATA_TX_H #define WFX_DATA_TX_H +#include +#include + #include "hif_api_cmd.h" #include "hif_api_mib.h" diff --git a/drivers/staging/wfx/debug.c b/drivers/staging/wfx/debug.c index 3e87d13eb3588..eedada78c25f9 100644 --- a/drivers/staging/wfx/debug.c +++ b/drivers/staging/wfx/debug.c @@ -5,9 +5,15 @@ * Copyright (c) 2017-2020, Silicon Laboratories, Inc. * Copyright (c) 2010, ST-Ericsson */ +#include +#include +#include + #include "debug.h" #include "wfx.h" #include "sta.h" +#include "main.h" +#include "hif_tx.h" #include "hif_tx_mib.h" #define CREATE_TRACE_POINTS diff --git a/drivers/staging/wfx/fwio.c b/drivers/staging/wfx/fwio.c index 1bb9054871c41..1b8aec02d1692 100644 --- a/drivers/staging/wfx/fwio.c +++ b/drivers/staging/wfx/fwio.c @@ -6,6 +6,8 @@ * Copyright (c) 2010, ST-Ericsson */ #include +#include +#include #include #include "fwio.h" diff --git a/drivers/staging/wfx/hif_api_cmd.h b/drivers/staging/wfx/hif_api_cmd.h index 8b671c9ab97cb..58c9bb036011a 100644 --- a/drivers/staging/wfx/hif_api_cmd.h +++ b/drivers/staging/wfx/hif_api_cmd.h @@ -8,6 +8,10 @@ #ifndef WFX_HIF_API_CMD_H #define WFX_HIF_API_CMD_H +#include + +#include "hif_api_general.h" + enum hif_requests_ids { HIF_REQ_ID_RESET = 0x0a, HIF_REQ_ID_READ_MIB = 0x05, diff --git a/drivers/staging/wfx/hif_api_general.h b/drivers/staging/wfx/hif_api_general.h index 70b253d0265d2..24188945718d6 100644 --- a/drivers/staging/wfx/hif_api_general.h +++ b/drivers/staging/wfx/hif_api_general.h @@ -8,6 +8,15 @@ #ifndef WFX_HIF_API_GENERAL_H #define WFX_HIF_API_GENERAL_H +#ifdef __KERNEL__ +#include +#include +#else +#include +#include +#define __packed __attribute__((__packed__)) +#endif + #define HIF_ID_IS_INDICATION 0x80 #define HIF_COUNTER_MAX 7 diff --git a/drivers/staging/wfx/hif_tx.c b/drivers/staging/wfx/hif_tx.c index 17dc133219787..63b437261eb72 100644 --- a/drivers/staging/wfx/hif_tx.c +++ b/drivers/staging/wfx/hif_tx.c @@ -6,7 +6,11 @@ * Copyright (c) 2017-2020, Silicon Laboratories, Inc. * Copyright (c) 2010, ST-Ericsson */ +#include + +#include "hif_tx.h" #include "wfx.h" +#include "bh.h" #include "hwio.h" #include "debug.h" #include "sta.h" diff --git a/drivers/staging/wfx/hif_tx_mib.c b/drivers/staging/wfx/hif_tx_mib.c index 6432ed86505c5..1926cf1b62be9 100644 --- a/drivers/staging/wfx/hif_tx_mib.c +++ b/drivers/staging/wfx/hif_tx_mib.c @@ -6,8 +6,13 @@ * Copyright (c) 2010, ST-Ericsson * Copyright (C) 2010, ST-Ericsson SA */ + +#include + #include "wfx.h" +#include "hif_tx.h" #include "hif_tx_mib.h" +#include "hif_api_mib.h" int hif_set_output_power(struct wfx_vif *wvif, int val) { diff --git a/drivers/staging/wfx/hwio.c b/drivers/staging/wfx/hwio.c index 089bb41be1490..36fbc5b5d64ce 100644 --- a/drivers/staging/wfx/hwio.c +++ b/drivers/staging/wfx/hwio.c @@ -5,10 +5,13 @@ * Copyright (c) 2017-2020, Silicon Laboratories, Inc. * Copyright (c) 2010, ST-Ericsson */ +#include +#include #include #include "hwio.h" #include "wfx.h" +#include "bus.h" #include "traces.h" /* diff --git a/drivers/staging/wfx/hwio.h b/drivers/staging/wfx/hwio.h index 8bb9bcfc31820..0b8e4f7157dfc 100644 --- a/drivers/staging/wfx/hwio.h +++ b/drivers/staging/wfx/hwio.h @@ -8,6 +8,8 @@ #ifndef WFX_HWIO_H #define WFX_HWIO_H +#include + struct wfx_dev; int wfx_data_read(struct wfx_dev *wdev, void *buf, size_t buf_len); diff --git a/drivers/staging/wfx/key.c b/drivers/staging/wfx/key.c index c93d07dcdc10c..2ab82bed4c1bc 100644 --- a/drivers/staging/wfx/key.c +++ b/drivers/staging/wfx/key.c @@ -5,10 +5,12 @@ * Copyright (c) 2017-2020, Silicon Laboratories, Inc. * Copyright (c) 2010, ST-Ericsson */ +#include #include #include "key.h" #include "wfx.h" +#include "hif_tx_mib.h" static int wfx_alloc_key(struct wfx_dev *wdev) { diff --git a/drivers/staging/wfx/key.h b/drivers/staging/wfx/key.h index 4dc9feadaba2c..70a44d0ca35e8 100644 --- a/drivers/staging/wfx/key.h +++ b/drivers/staging/wfx/key.h @@ -8,6 +8,8 @@ #ifndef WFX_KEY_H #define WFX_KEY_H +#include + struct wfx_dev; struct wfx_vif; diff --git a/drivers/staging/wfx/main.c b/drivers/staging/wfx/main.c index b9ea9a93fe1aa..e7bc1988124a8 100644 --- a/drivers/staging/wfx/main.c +++ b/drivers/staging/wfx/main.c @@ -10,21 +10,28 @@ * Copyright (c) 2006, Michael Wu * Copyright (c) 2004-2006 Jean-Baptiste Note , et al. */ +#include #include #include +#include #include #include +#include #include +#include "main.h" #include "wfx.h" #include "fwio.h" #include "hwio.h" #include "bus.h" +#include "bh.h" #include "sta.h" #include "key.h" #include "scan.h" #include "debug.h" +#include "data_tx.h" #include "hif_tx_mib.h" +#include "hif_api_cmd.h" #define WFX_PDS_MAX_SIZE 1500 diff --git a/drivers/staging/wfx/main.h b/drivers/staging/wfx/main.h index 086bcc041b906..a0db322383a3a 100644 --- a/drivers/staging/wfx/main.h +++ b/drivers/staging/wfx/main.h @@ -10,8 +10,11 @@ #ifndef WFX_MAIN_H #define WFX_MAIN_H +#include #include +#include "hif_api_general.h" + struct wfx_dev; struct hwbus_ops; diff --git a/drivers/staging/wfx/queue.c b/drivers/staging/wfx/queue.c index 3bddf282a4cec..31c37f69c295a 100644 --- a/drivers/staging/wfx/queue.c +++ b/drivers/staging/wfx/queue.c @@ -5,9 +5,13 @@ * Copyright (c) 2017-2020, Silicon Laboratories, Inc. * Copyright (c) 2010, ST-Ericsson */ +#include #include +#include "queue.h" #include "wfx.h" +#include "sta.h" +#include "data_tx.h" #include "traces.h" void wfx_tx_lock(struct wfx_dev *wdev) diff --git a/drivers/staging/wfx/queue.h b/drivers/staging/wfx/queue.h index e43aa9dfbc452..80ba19455ef35 100644 --- a/drivers/staging/wfx/queue.h +++ b/drivers/staging/wfx/queue.h @@ -8,6 +8,9 @@ #ifndef WFX_QUEUE_H #define WFX_QUEUE_H +#include +#include + struct wfx_dev; struct wfx_vif; diff --git a/drivers/staging/wfx/scan.h b/drivers/staging/wfx/scan.h index e5b7eef788586..c7496a7664785 100644 --- a/drivers/staging/wfx/scan.h +++ b/drivers/staging/wfx/scan.h @@ -8,6 +8,8 @@ #ifndef WFX_SCAN_H #define WFX_SCAN_H +#include + struct wfx_dev; struct wfx_vif; diff --git a/drivers/staging/wfx/sta.c b/drivers/staging/wfx/sta.c index 5585f9e876e1c..196779a1b89ac 100644 --- a/drivers/staging/wfx/sta.c +++ b/drivers/staging/wfx/sta.c @@ -5,11 +5,17 @@ * Copyright (c) 2017-2020, Silicon Laboratories, Inc. * Copyright (c) 2010, ST-Ericsson */ +#include #include #include "sta.h" #include "wfx.h" +#include "fwio.h" +#include "bh.h" +#include "key.h" #include "scan.h" +#include "debug.h" +#include "hif_tx.h" #include "hif_tx_mib.h" #define HIF_MAX_ARP_IP_ADDRTABLE_ENTRIES 2 diff --git a/drivers/staging/wfx/sta.h b/drivers/staging/wfx/sta.h index a3fb9fc93fa44..d7b5df5ea4e6f 100644 --- a/drivers/staging/wfx/sta.h +++ b/drivers/staging/wfx/sta.h @@ -8,6 +8,8 @@ #ifndef WFX_STA_H #define WFX_STA_H +#include + struct wfx_dev; struct wfx_vif; diff --git a/drivers/staging/wfx/traces.h b/drivers/staging/wfx/traces.h index afe1074e09b33..e34c7a538c65a 100644 --- a/drivers/staging/wfx/traces.h +++ b/drivers/staging/wfx/traces.h @@ -12,8 +12,11 @@ #define _WFX_TRACE_H #include +#include #include "bus.h" +#include "hif_api_cmd.h" +#include "hif_api_mib.h" /* The hell below need some explanations. For each symbolic number, we need to * define it with TRACE_DEFINE_ENUM() and in a list for __print_symbolic. diff --git a/drivers/staging/wfx/wfx.h b/drivers/staging/wfx/wfx.h index a185b82795c4a..94898680ccdee 100644 --- a/drivers/staging/wfx/wfx.h +++ b/drivers/staging/wfx/wfx.h @@ -10,6 +10,9 @@ #ifndef WFX_H #define WFX_H +#include +#include +#include #include #include -- GitLab From 1487e7bae809d73461940a6ef8c1ffc7c4faa0d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 19 Feb 2021 14:33:06 +0100 Subject: [PATCH 451/839] leds: trigger: Fix error path to not unlock the unlocked mutex MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ttyname is allocated before the mutex is taken, so it must not be unlocked in the error path. Fixes: fd4a641ac88f ("leds: trigger: implement a tty trigger") Reported-by: Pavel Machek Acked-by: Pavel Machek Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20210219133307.4840-2-u.kleine-koenig@pengutronix.de Signed-off-by: Greg Kroah-Hartman --- drivers/leds/trigger/ledtrig-tty.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/leds/trigger/ledtrig-tty.c b/drivers/leds/trigger/ledtrig-tty.c index d2ab6ab080ac3..af61281dc6a10 100644 --- a/drivers/leds/trigger/ledtrig-tty.c +++ b/drivers/leds/trigger/ledtrig-tty.c @@ -51,10 +51,8 @@ static ssize_t ttyname_store(struct device *dev, if (size) { ttyname = kmemdup_nul(buf, size, GFP_KERNEL); - if (!ttyname) { - ret = -ENOMEM; - goto out_unlock; - } + if (!ttyname) + return -ENOMEM; } else { ttyname = NULL; } @@ -69,7 +67,6 @@ static ssize_t ttyname_store(struct device *dev, trigger_data->ttyname = ttyname; -out_unlock: mutex_unlock(&trigger_data->mutex); if (ttyname && !running) -- GitLab From ba8a86e4dadb332c41454f02e27d28321e0f03d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 19 Feb 2021 14:33:07 +0100 Subject: [PATCH 452/839] leds: trigger/tty: Use led_set_brightness_sync() from workqueue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit led_set_brightness() involves scheduling a workqueue. As here the led's brightness setting is done in context of the trigger's workqueue this is unjustified overhead and it's more sensible to use led_set_brightness_sync(). Fixes: fd4a641ac88f ("leds: trigger: implement a tty trigger") Reported-by: Pavel Machek Acked-by: Pavel Machek Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20210219133307.4840-3-u.kleine-koenig@pengutronix.de Signed-off-by: Greg Kroah-Hartman --- drivers/leds/trigger/ledtrig-tty.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/leds/trigger/ledtrig-tty.c b/drivers/leds/trigger/ledtrig-tty.c index af61281dc6a10..f62db7e520b52 100644 --- a/drivers/leds/trigger/ledtrig-tty.c +++ b/drivers/leds/trigger/ledtrig-tty.c @@ -122,12 +122,12 @@ static void ledtrig_tty_work(struct work_struct *work) if (icount.rx != trigger_data->rx || icount.tx != trigger_data->tx) { - led_set_brightness(trigger_data->led_cdev, LED_ON); + led_set_brightness_sync(trigger_data->led_cdev, LED_ON); trigger_data->rx = icount.rx; trigger_data->tx = icount.tx; } else { - led_set_brightness(trigger_data->led_cdev, LED_OFF); + led_set_brightness_sync(trigger_data->led_cdev, LED_OFF); } out: -- GitLab From 2334de198fed3da72e9785ecdd691d101aa96e77 Mon Sep 17 00:00:00 2001 From: Alexander Shiyan Date: Wed, 17 Feb 2021 11:06:08 +0300 Subject: [PATCH 453/839] Revert "serial: max310x: rework RX interrupt handling" This reverts commit fce3c5c1a2d9cd888f2987662ce17c0c651916b2. FIFO is triggered 4 intervals after receiving a byte, it's good when we don't care about the time of reception, but are only interested in the presence of any activity on the line. Unfortunately, this method is not suitable for all tasks, for example, the RS-485 protocol will not work properly, since the state machine must track the request-response time and after the timeout expires, a decision is made that the device on the line is not responding. Signed-off-by: Alexander Shiyan Link: https://lore.kernel.org/r/20210217080608.31192-1-shc_work@mail.ru Fixes: fce3c5c1a2d9 ("serial: max310x: rework RX interrupt handling") Cc: Thomas Petazzoni Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/max310x.c | 29 +++++------------------------ 1 file changed, 5 insertions(+), 24 deletions(-) diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c index 9795b2e8b0b2c..1b61d26bb7afe 100644 --- a/drivers/tty/serial/max310x.c +++ b/drivers/tty/serial/max310x.c @@ -1056,9 +1056,9 @@ static int max310x_startup(struct uart_port *port) max310x_port_update(port, MAX310X_MODE1_REG, MAX310X_MODE1_TRNSCVCTRL_BIT, 0); - /* Reset FIFOs */ - max310x_port_write(port, MAX310X_MODE2_REG, - MAX310X_MODE2_FIFORST_BIT); + /* Configure MODE2 register & Reset FIFOs*/ + val = MAX310X_MODE2_RXEMPTINV_BIT | MAX310X_MODE2_FIFORST_BIT; + max310x_port_write(port, MAX310X_MODE2_REG, val); max310x_port_update(port, MAX310X_MODE2_REG, MAX310X_MODE2_FIFORST_BIT, 0); @@ -1086,27 +1086,8 @@ static int max310x_startup(struct uart_port *port) /* Clear IRQ status register */ max310x_port_read(port, MAX310X_IRQSTS_REG); - /* - * Let's ask for an interrupt after a timeout equivalent to - * the receiving time of 4 characters after the last character - * has been received. - */ - max310x_port_write(port, MAX310X_RXTO_REG, 4); - - /* - * Make sure we also get RX interrupts when the RX FIFO is - * filling up quickly, so get an interrupt when half of the RX - * FIFO has been filled in. - */ - max310x_port_write(port, MAX310X_FIFOTRIGLVL_REG, - MAX310X_FIFOTRIGLVL_RX(MAX310X_FIFO_SIZE / 2)); - - /* Enable RX timeout interrupt in LSR */ - max310x_port_write(port, MAX310X_LSR_IRQEN_REG, - MAX310X_LSR_RXTO_BIT); - - /* Enable LSR, RX FIFO trigger, CTS change interrupts */ - val = MAX310X_IRQ_LSR_BIT | MAX310X_IRQ_RXFIFO_BIT | MAX310X_IRQ_TXEMPTY_BIT; + /* Enable RX, TX, CTS change interrupts */ + val = MAX310X_IRQ_RXEMPTY_BIT | MAX310X_IRQ_TXEMPTY_BIT; max310x_port_write(port, MAX310X_IRQEN_REG, val | MAX310X_IRQ_CTS_BIT); return 0; -- GitLab From c776b77a279c327fe9e7710e71a3400766554255 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 4 Mar 2021 17:18:02 +0100 Subject: [PATCH 454/839] Revert "drivers:tty:pty: Fix a race causing data loss on close" This reverts commit 33d4ae98859873ddd49e22e4ca724387548b3d89. Pierre-Louis writes: Our SOF/audio CI shows an across-the-board regression when we try v5.12-rc1, specifically on pause/resume tests with an interactive terminal running 'aplay -i' commands managed by expect to simulate the user pressing the space bar to pause/unpause. It turns out the processes are not longer killed and the audio devices remain busy (see publicly available test results listed below). git bisect points to commit 33d4ae9885987 ("drivers:tty:pty: Fix a race causing data loss on close"). Reverting the patch fixes the issue on all test devices. Further analysis with Corey Minyard points to a problem where a slave tty will not get a SIGHUP when the master is closed. So revert this for now: Reported-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/00154592-c5ee-aaba-956e-b265473b53bc@linux.intel.com Cc: Corey Minyard Cc: Jiri Slaby Cc: Mark Brown , Fixes: 33d4ae988598 ("drivers:tty:pty: Fix a race causing data loss on close") Signed-off-by: Greg Kroah-Hartman --- drivers/tty/pty.c | 15 ++------------- drivers/tty/tty_io.c | 5 ++--- 2 files changed, 4 insertions(+), 16 deletions(-) diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c index 8b2797b6ee441..5e2374580e271 100644 --- a/drivers/tty/pty.c +++ b/drivers/tty/pty.c @@ -66,8 +66,7 @@ static void pty_close(struct tty_struct *tty, struct file *filp) wake_up_interruptible(&tty->link->read_wait); wake_up_interruptible(&tty->link->write_wait); if (tty->driver->subtype == PTY_TYPE_MASTER) { - struct file *f; - + set_bit(TTY_OTHER_CLOSED, &tty->flags); #ifdef CONFIG_UNIX98_PTYS if (tty->driver == ptm_driver) { mutex_lock(&devpts_mutex); @@ -76,17 +75,7 @@ static void pty_close(struct tty_struct *tty, struct file *filp) mutex_unlock(&devpts_mutex); } #endif - - /* - * This hack is required because a program can open a - * pty and redirect a console to it, but if the pty is - * closed and the console is not released, then the - * slave side will never close. So release the - * redirect when the master closes. - */ - f = tty_release_redirect(tty->link); - if (f) - fput(f); + tty_vhangup(tty->link); } } diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 74733ec8f5653..391bada4cedb6 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -544,9 +544,7 @@ EXPORT_SYMBOL_GPL(tty_wakeup); * @tty: tty device * * This is available to the pty code so if the master closes, if the - * slave is a redirect it can release the redirect. It returns the - * filp for the redirect, which must be fput when the operations on - * the tty are completed. + * slave is a redirect it can release the redirect. */ struct file *tty_release_redirect(struct tty_struct *tty) { @@ -561,6 +559,7 @@ struct file *tty_release_redirect(struct tty_struct *tty) return f; } +EXPORT_SYMBOL_GPL(tty_release_redirect); /** * __tty_hangup - actual handler for hangup events -- GitLab From 4d8654e81db7346f915eca9f1aff18f385cab621 Mon Sep 17 00:00:00 2001 From: Yorick de Wid Date: Sat, 13 Feb 2021 15:49:02 +0100 Subject: [PATCH 455/839] Goodix Fingerprint device is not a modem The CDC ACM driver is false matching the Goodix Fingerprint device against the USB_CDC_ACM_PROTO_AT_V25TER. The Goodix Fingerprint device is a biometrics sensor that should be handled in user-space. libfprint has some support for Goodix fingerprint sensors, although not for this particular one. It is possible that the vendor allocates a PID per OEM (Lenovo, Dell etc). If this happens to be the case then more devices from the same vendor could potentially match the ACM modem module table. Signed-off-by: Yorick de Wid Cc: stable Link: https://lore.kernel.org/r/20210213144901.53199-1-ydewid@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 37f824b59daae..39ddb5585ded5 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1935,6 +1935,11 @@ static const struct usb_device_id acm_ids[] = { .driver_info = SEND_ZERO_PACKET, }, + /* Exclude Goodix Fingerprint Reader */ + { USB_DEVICE(0x27c6, 0x5395), + .driver_info = IGNORE_DEVICE, + }, + /* control interfaces without any protocol set */ { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM, USB_CDC_PROTO_NONE) }, -- GitLab From 1edbff9c80ed32071fffa7dbaaea507fdb21ff2d Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Mon, 1 Mar 2021 15:57:45 +0800 Subject: [PATCH 456/839] usb: dwc3: qcom: add ACPI device id for sc8180x It enables USB Host support for sc8180x ACPI boot, both the standalone one and the one behind URS (USB Role Switch). And they share the the same dwc3_acpi_pdata with sdm845. Signed-off-by: Shawn Guo Link: https://lore.kernel.org/r/20210301075745.20544-1-shawn.guo@linaro.org Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-qcom.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/dwc3/dwc3-qcom.c b/drivers/usb/dwc3/dwc3-qcom.c index 846a47be6df7f..d8850f9ccb626 100644 --- a/drivers/usb/dwc3/dwc3-qcom.c +++ b/drivers/usb/dwc3/dwc3-qcom.c @@ -938,6 +938,8 @@ static const struct dwc3_acpi_pdata sdm845_acpi_urs_pdata = { static const struct acpi_device_id dwc3_qcom_acpi_match[] = { { "QCOM2430", (unsigned long)&sdm845_acpi_pdata }, { "QCOM0304", (unsigned long)&sdm845_acpi_urs_pdata }, + { "QCOM0497", (unsigned long)&sdm845_acpi_urs_pdata }, + { "QCOM04A6", (unsigned long)&sdm845_acpi_pdata }, { }, }; MODULE_DEVICE_TABLE(acpi, dwc3_qcom_acpi_match); -- GitLab From 650bf52208d804ad5ee449c58102f8dc43175573 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 15 Feb 2021 15:57:16 +0000 Subject: [PATCH 457/839] USB: gadget: u_ether: Fix a configfs return code If the string is invalid, this should return -EINVAL instead of 0. Fixes: 73517cf49bd4 ("usb: gadget: add RNDIS configfs options for class/subclass/protocol") Cc: stable Acked-by: Lorenzo Colitti Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/YCqZ3P53yyIg5cn7@mwanda Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/u_ether_configfs.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/usb/gadget/function/u_ether_configfs.h b/drivers/usb/gadget/function/u_ether_configfs.h index 3dfb460908fae..f558c3139ebe5 100644 --- a/drivers/usb/gadget/function/u_ether_configfs.h +++ b/drivers/usb/gadget/function/u_ether_configfs.h @@ -182,12 +182,11 @@ out: \ size_t len) \ { \ struct f_##_f_##_opts *opts = to_f_##_f_##_opts(item); \ - int ret; \ + int ret = -EINVAL; \ u8 val; \ \ mutex_lock(&opts->lock); \ - ret = sscanf(page, "%02hhx", &val); \ - if (ret > 0) { \ + if (sscanf(page, "%02hhx", &val) > 0) { \ opts->_n_ = val; \ ret = len; \ } \ -- GitLab From 789ea77310f0200c84002884ffd628e2baf3ad8a Mon Sep 17 00:00:00 2001 From: Ruslan Bilovol Date: Mon, 1 Mar 2021 13:49:31 +0200 Subject: [PATCH 458/839] usb: gadget: f_uac2: always increase endpoint max_packet_size by one audio slot As per UAC2 Audio Data Formats spec (2.3.1.1 USB Packets), if the sampling rate is a constant, the allowable variation of number of audio slots per virtual frame is +/- 1 audio slot. It means that endpoint should be able to accept/send +1 audio slot. Previous endpoint max_packet_size calculation code was adding sometimes +1 audio slot due to DIV_ROUND_UP behaviour which was rounding up to closest integer. However this doesn't work if the numbers are divisible. It had no any impact with Linux hosts which ignore this issue, but in case of more strict Windows it caused rejected enumeration Thus always add +1 audio slot to endpoint's max packet size Fixes: 913e4a90b6f9 ("usb: gadget: f_uac2: finalize wMaxPacketSize according to bandwidth") Cc: Peter Chen Cc: #v4.3+ Signed-off-by: Ruslan Bilovol Link: https://lore.kernel.org/r/1614599375-8803-2-git-send-email-ruslan.bilovol@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_uac2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/f_uac2.c b/drivers/usb/gadget/function/f_uac2.c index 5d960b6603b6f..6f03e944e0e31 100644 --- a/drivers/usb/gadget/function/f_uac2.c +++ b/drivers/usb/gadget/function/f_uac2.c @@ -478,7 +478,7 @@ static int set_ep_max_packet_size(const struct f_uac2_opts *uac2_opts, } max_size_bw = num_channels(chmask) * ssize * - DIV_ROUND_UP(srate, factor / (1 << (ep_desc->bInterval - 1))); + ((srate / (factor / (1 << (ep_desc->bInterval - 1)))) + 1); ep_desc->wMaxPacketSize = cpu_to_le16(min_t(u16, max_size_bw, max_size_ep)); -- GitLab From cc2ac63d4cf72104e0e7f58bb846121f0f51bb19 Mon Sep 17 00:00:00 2001 From: Ruslan Bilovol Date: Mon, 1 Mar 2021 13:49:32 +0200 Subject: [PATCH 459/839] usb: gadget: f_uac1: stop playback on function disable There is missing playback stop/cleanup in case of gadget's ->disable callback that happens on events like USB host resetting or gadget disconnection Fixes: 0591bc236015 ("usb: gadget: add f_uac1 variant based on a new u_audio api") Cc: # 4.13+ Signed-off-by: Ruslan Bilovol Link: https://lore.kernel.org/r/1614599375-8803-3-git-send-email-ruslan.bilovol@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_uac1.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/gadget/function/f_uac1.c b/drivers/usb/gadget/function/f_uac1.c index 00d346965f7a5..560382e0a8f38 100644 --- a/drivers/usb/gadget/function/f_uac1.c +++ b/drivers/usb/gadget/function/f_uac1.c @@ -499,6 +499,7 @@ static void f_audio_disable(struct usb_function *f) uac1->as_out_alt = 0; uac1->as_in_alt = 0; + u_audio_stop_playback(&uac1->g_audio); u_audio_stop_capture(&uac1->g_audio); } -- GitLab From 2664deb0930643149d61cddbb66ada527ae180bd Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Tue, 2 Mar 2021 10:37:03 -0800 Subject: [PATCH 460/839] usb: dwc3: qcom: Honor wakeup enabled/disabled state The dwc3-qcom currently enables wakeup interrupts unconditionally when suspending, however this should not be done when wakeup is disabled (e.g. through the sysfs attribute power/wakeup). Only enable wakeup interrupts when device_may_wakeup() returns true. Fixes: a4333c3a6ba9 ("usb: dwc3: Add Qualcomm DWC3 glue driver") Reviewed-by: Bjorn Andersson Signed-off-by: Matthias Kaehlcke Cc: stable Link: https://lore.kernel.org/r/20210302103659.v2.1.I44954d9e1169f2cf5c44e6454d357c75ddfa99a2@changeid Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-qcom.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/usb/dwc3/dwc3-qcom.c b/drivers/usb/dwc3/dwc3-qcom.c index d8850f9ccb626..730e8d6a2aa6b 100644 --- a/drivers/usb/dwc3/dwc3-qcom.c +++ b/drivers/usb/dwc3/dwc3-qcom.c @@ -358,8 +358,10 @@ static int dwc3_qcom_suspend(struct dwc3_qcom *qcom) if (ret) dev_warn(qcom->dev, "failed to disable interconnect: %d\n", ret); + if (device_may_wakeup(qcom->dev)) + dwc3_qcom_enable_interrupts(qcom); + qcom->is_suspended = true; - dwc3_qcom_enable_interrupts(qcom); return 0; } @@ -372,7 +374,8 @@ static int dwc3_qcom_resume(struct dwc3_qcom *qcom) if (!qcom->is_suspended) return 0; - dwc3_qcom_disable_interrupts(qcom); + if (device_may_wakeup(qcom->dev)) + dwc3_qcom_disable_interrupts(qcom); for (i = 0; i < qcom->num_clocks; i++) { ret = clk_prepare_enable(qcom->clks[i]); -- GitLab From b1d25e6ee57c2605845595b6c61340d734253eb3 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Mon, 8 Mar 2021 10:55:38 +0900 Subject: [PATCH 461/839] usb: renesas_usbhs: Clear PIPECFG for re-enabling pipe with other EPNUM According to the datasheet, this controller has a restriction which "set an endpoint number so that combinations of the DIR bit and the EPNUM bits do not overlap.". However, since the udc core driver is possible to assign a bulk pipe as an interrupt endpoint, an endpoint number may not match the pipe number. After that, when user rebinds another gadget driver, this driver broke the restriction because the driver didn't clear any configuration in usb_ep_disable(). Example: # modprobe g_ncm Then, EP3 = pipe 3, EP4 = pipe 4, EP5 = pipe 6 # rmmod g_ncm # modprobe g_hid Then, EP3 = pipe 6, EP4 = pipe 7. So, pipe 3 and pipe 6 are set as EP3. So, clear PIPECFG register in usbhs_pipe_free(). Fixes: dfb87b8bfe09 ("usb: renesas_usbhs: gadget: fix re-enabling pipe without re-connecting") Cc: stable Signed-off-by: Yoshihiro Shimoda Link: https://lore.kernel.org/r/1615168538-26101-1-git-send-email-yoshihiro.shimoda.uh@renesas.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/renesas_usbhs/pipe.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/renesas_usbhs/pipe.c b/drivers/usb/renesas_usbhs/pipe.c index e7334b7fb3a62..75fff2e4cbc65 100644 --- a/drivers/usb/renesas_usbhs/pipe.c +++ b/drivers/usb/renesas_usbhs/pipe.c @@ -746,6 +746,8 @@ struct usbhs_pipe *usbhs_pipe_malloc(struct usbhs_priv *priv, void usbhs_pipe_free(struct usbhs_pipe *pipe) { + usbhsp_pipe_select(pipe); + usbhsp_pipe_cfg_set(pipe, 0xFFFF, 0); usbhsp_put_pipe(pipe); } -- GitLab From 414c20df7d401bcf1cb6c13d2dd944fb53ae4acf Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 5 Mar 2021 03:49:27 +0000 Subject: [PATCH 462/839] USB: gadget: udc: s3c2410_udc: fix return value check in s3c2410_udc_probe() In case of error, the function devm_platform_ioremap_resource() returns ERR_PTR() and never returns NULL. The NULL test in the return value check should be replaced with IS_ERR(). Fixes: 188db4435ac6 ("usb: gadget: s3c: use platform resources") Cc: stable Reported-by: Hulk Robot Reviewed-by: Arnd Bergmann Reviewed-by: Krzysztof Kozlowski Signed-off-by: Wei Yongjun Link: https://lore.kernel.org/r/20210305034927.3232386-1-weiyongjun1@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/s3c2410_udc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/udc/s3c2410_udc.c b/drivers/usb/gadget/udc/s3c2410_udc.c index f1ea51476add0..1d3ebb07ccd4d 100644 --- a/drivers/usb/gadget/udc/s3c2410_udc.c +++ b/drivers/usb/gadget/udc/s3c2410_udc.c @@ -1773,8 +1773,8 @@ static int s3c2410_udc_probe(struct platform_device *pdev) udc_info = dev_get_platdata(&pdev->dev); base_addr = devm_platform_ioremap_resource(pdev, 0); - if (!base_addr) { - retval = -ENOMEM; + if (IS_ERR(base_addr)) { + retval = PTR_ERR(base_addr); goto err_mem; } -- GitLab From 9de2c43acf37a17dc4c69ff78bb099b80fb74325 Mon Sep 17 00:00:00 2001 From: Pete Zaitcev Date: Wed, 3 Mar 2021 22:10:53 -0600 Subject: [PATCH 463/839] USB: usblp: fix a hang in poll() if disconnected Apparently an application that opens a device and calls select() on it, will hang if the decice is disconnected. It's a little surprising that we had this bug for 15 years, but apparently nobody ever uses select() with a printer: only write() and read(), and those work fine. Well, you can also select() with a timeout. The fix is modeled after devio.c. A few other drivers check the condition first, then do not add the wait queue in case the device is disconnected. We doubt that's completely race-free. So, this patch adds the process first, then locks properly and checks for the disconnect. Reviewed-by: Zqiang Signed-off-by: Pete Zaitcev Cc: stable Link: https://lore.kernel.org/r/20210303221053.1cf3313e@suzdal.zaitcev.lan Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/usblp.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/usb/class/usblp.c b/drivers/usb/class/usblp.c index c9f6e97582885..f27b4aecff3d4 100644 --- a/drivers/usb/class/usblp.c +++ b/drivers/usb/class/usblp.c @@ -494,16 +494,24 @@ static int usblp_release(struct inode *inode, struct file *file) /* No kernel lock - fine */ static __poll_t usblp_poll(struct file *file, struct poll_table_struct *wait) { - __poll_t ret; + struct usblp *usblp = file->private_data; + __poll_t ret = 0; unsigned long flags; - struct usblp *usblp = file->private_data; /* Should we check file->f_mode & FMODE_WRITE before poll_wait()? */ poll_wait(file, &usblp->rwait, wait); poll_wait(file, &usblp->wwait, wait); + + mutex_lock(&usblp->mut); + if (!usblp->present) + ret |= EPOLLHUP; + mutex_unlock(&usblp->mut); + spin_lock_irqsave(&usblp->lock, flags); - ret = ((usblp->bidir && usblp->rcomplete) ? EPOLLIN | EPOLLRDNORM : 0) | - ((usblp->no_paper || usblp->wcomplete) ? EPOLLOUT | EPOLLWRNORM : 0); + if (usblp->bidir && usblp->rcomplete) + ret |= EPOLLIN | EPOLLRDNORM; + if (usblp->no_paper || usblp->wcomplete) + ret |= EPOLLOUT | EPOLLWRNORM; spin_unlock_irqrestore(&usblp->lock, flags); return ret; } -- GitLab From 1cffb1c66499a9db9a735473778abf8427d16287 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Fri, 12 Feb 2021 23:55:19 +0300 Subject: [PATCH 464/839] usb: dwc3: qcom: Add missing DWC3 OF node refcount decrement of_get_child_by_name() increments the reference counter of the OF node it managed to find. So after the code is done using the device node, the refcount must be decremented. Add missing of_node_put() invocation then to the dwc3_qcom_of_register_core() method, since DWC3 OF node is being used only there. Fixes: a4333c3a6ba9 ("usb: dwc3: Add Qualcomm DWC3 glue driver") Signed-off-by: Serge Semin Link: https://lore.kernel.org/r/20210212205521.14280-1-Sergey.Semin@baikalelectronics.ru Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-qcom.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/usb/dwc3/dwc3-qcom.c b/drivers/usb/dwc3/dwc3-qcom.c index 730e8d6a2aa6b..fcaf04483ad02 100644 --- a/drivers/usb/dwc3/dwc3-qcom.c +++ b/drivers/usb/dwc3/dwc3-qcom.c @@ -653,16 +653,19 @@ static int dwc3_qcom_of_register_core(struct platform_device *pdev) ret = of_platform_populate(np, NULL, NULL, dev); if (ret) { dev_err(dev, "failed to register dwc3 core - %d\n", ret); - return ret; + goto node_put; } qcom->dwc3 = of_find_device_by_node(dwc3_np); if (!qcom->dwc3) { + ret = -ENODEV; dev_err(dev, "failed to get dwc3 platform device\n"); - return -ENODEV; } - return 0; +node_put: + of_node_put(dwc3_np); + + return ret; } static struct platform_device * -- GitLab From a758b7c4c6f21f8e117fc8097c56fd9967363c15 Mon Sep 17 00:00:00 2001 From: Yejune Deng Date: Wed, 10 Mar 2021 15:49:01 +0800 Subject: [PATCH 465/839] virt: acrn: Use EPOLLIN instead of POLLIN This fixes the following sparse warning: "sparse warnings: (new ones prefixed by >>)" >> drivers/virt/acrn/irqfd.c:163:13: sparse: sparse: restricted __poll_t degrades to integer Fixes: dcf9625f2adf ("virt: acrn: Use vfs_poll() instead of f_op->poll()") Reported-by: kernel test robot Acked-by: Shuo Liu Signed-off-by: Yejune Deng Link: https://lore.kernel.org/r/20210310074901.7486-1-yejune.deng@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/virt/acrn/irqfd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/virt/acrn/irqfd.c b/drivers/virt/acrn/irqfd.c index 98d6e9b18f9ef..df5184979b282 100644 --- a/drivers/virt/acrn/irqfd.c +++ b/drivers/virt/acrn/irqfd.c @@ -160,7 +160,7 @@ static int acrn_irqfd_assign(struct acrn_vm *vm, struct acrn_irqfd *args) /* Check the pending event in this stage */ events = vfs_poll(f.file, &irqfd->pt); - if (events & POLLIN) + if (events & EPOLLIN) acrn_irqfd_inject(irqfd); fdput(f); -- GitLab From d5b0e0677bfd5efd17c5bbb00156931f0d41cb85 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 8 Mar 2021 09:38:12 +0100 Subject: [PATCH 466/839] u64_stats,lockdep: Fix u64_stats_init() vs lockdep Jakub reported that: static struct net_device *rtl8139_init_board(struct pci_dev *pdev) { ... u64_stats_init(&tp->rx_stats.syncp); u64_stats_init(&tp->tx_stats.syncp); ... } results in lockdep getting confused between the RX and TX stats lock. This is because u64_stats_init() is an inline calling seqcount_init(), which is a macro using a static variable to generate a lockdep class. By wrapping that in an inline, we negate the effect of the macro and fold the static key variable, hence the confusion. Fix by also making u64_stats_init() a macro for the case where it matters, leaving the other case an inline for argument validation etc. Reported-by: Jakub Kicinski Debugged-by: "Ahmed S. Darwish" Signed-off-by: Peter Zijlstra (Intel) Tested-by: "Erhard F." Link: https://lkml.kernel.org/r/YEXicy6+9MksdLZh@hirez.programming.kicks-ass.net --- include/linux/u64_stats_sync.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h index c6abb79501b33..e81856c0ba134 100644 --- a/include/linux/u64_stats_sync.h +++ b/include/linux/u64_stats_sync.h @@ -115,12 +115,13 @@ static inline void u64_stats_inc(u64_stats_t *p) } #endif +#if BITS_PER_LONG == 32 && defined(CONFIG_SMP) +#define u64_stats_init(syncp) seqcount_init(&(syncp)->seq) +#else static inline void u64_stats_init(struct u64_stats_sync *syncp) { -#if BITS_PER_LONG == 32 && defined(CONFIG_SMP) - seqcount_init(&syncp->seq); -#endif } +#endif static inline void u64_stats_update_begin(struct u64_stats_sync *syncp) { -- GitLab From 4817a52b306136c8b2b2271d8770401441e4cf79 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 9 Mar 2021 15:21:18 +0100 Subject: [PATCH 467/839] seqlock,lockdep: Fix seqcount_latch_init() seqcount_init() must be a macro in order to preserve the static variable that is used for the lockdep key. Don't then wrap it in an inline function, which destroys that. Luckily there aren't many users of this function, but fix it before it becomes a problem. Fixes: 80793c3471d9 ("seqlock: Introduce seqcount_latch_t") Reported-by: Eric Dumazet Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/YEeFEbNUVkZaXDp4@hirez.programming.kicks-ass.net --- include/linux/seqlock.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 2f7bb92b4c9ee..f61e34fbaaea4 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -664,10 +664,7 @@ typedef struct { * seqcount_latch_init() - runtime initializer for seqcount_latch_t * @s: Pointer to the seqcount_latch_t instance */ -static inline void seqcount_latch_init(seqcount_latch_t *s) -{ - seqcount_init(&s->seqcount); -} +#define seqcount_latch_init(s) seqcount_init(&(s)->seqcount) /** * raw_read_seqcount_latch() - pick even/odd latch data copy -- GitLab From d15dfd31384ba3cb93150e5f87661a76fa419f74 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 9 Mar 2021 12:26:01 +0000 Subject: [PATCH 468/839] arm64: mte: Map hotplugged memory as Normal Tagged In a system supporting MTE, the linear map must allow reading/writing allocation tags by setting the memory type as Normal Tagged. Currently, this is only handled for memory present at boot. Hotplugged memory uses Normal non-Tagged memory. Introduce pgprot_mhp() for hotplugged memory and use it in add_memory_resource(). The arm64 code maps pgprot_mhp() to pgprot_tagged(). Note that ZONE_DEVICE memory should not be mapped as Tagged and therefore setting the memory type in arch_add_memory() is not feasible. Signed-off-by: Catalin Marinas Fixes: 0178dc761368 ("arm64: mte: Use Normal Tagged attributes for the linear map") Reported-by: Patrick Daly Tested-by: Patrick Daly Link: https://lore.kernel.org/r/1614745263-27827-1-git-send-email-pdaly@codeaurora.org Cc: # 5.10.x Cc: Will Deacon Cc: Andrew Morton Cc: Vincenzo Frascino Cc: David Hildenbrand Reviewed-by: David Hildenbrand Reviewed-by: Vincenzo Frascino Reviewed-by: Anshuman Khandual Link: https://lore.kernel.org/r/20210309122601.5543-1-catalin.marinas@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgtable-prot.h | 1 - arch/arm64/include/asm/pgtable.h | 3 +++ arch/arm64/mm/mmu.c | 3 ++- include/linux/pgtable.h | 4 ++++ mm/memory_hotplug.c | 2 +- 5 files changed, 10 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 046be789fbb47..9a65fb5281100 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -66,7 +66,6 @@ extern bool arm64_use_ng_mappings; #define _PAGE_DEFAULT (_PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL)) #define PAGE_KERNEL __pgprot(PROT_NORMAL) -#define PAGE_KERNEL_TAGGED __pgprot(PROT_NORMAL_TAGGED) #define PAGE_KERNEL_RO __pgprot((PROT_NORMAL & ~PTE_WRITE) | PTE_RDONLY) #define PAGE_KERNEL_ROX __pgprot((PROT_NORMAL & ~(PTE_WRITE | PTE_PXN)) | PTE_RDONLY) #define PAGE_KERNEL_EXEC __pgprot(PROT_NORMAL & ~PTE_PXN) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index e17b96d0e4b59..47027796c2f93 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -486,6 +486,9 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd) __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN) #define pgprot_device(prot) \ __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_nGnRE) | PTE_PXN | PTE_UXN) +#define pgprot_tagged(prot) \ + __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_TAGGED)) +#define pgprot_mhp pgprot_tagged /* * DMA allocations for non-coherent devices use what the Arm architecture calls * "Normal non-cacheable" memory, which permits speculation, unaligned accesses diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 3802cfbdd20d1..9c8aa1b44cd53 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -512,7 +512,8 @@ static void __init map_mem(pgd_t *pgdp) * if MTE is present. Otherwise, it has the same attributes as * PAGE_KERNEL. */ - __map_memblock(pgdp, start, end, PAGE_KERNEL_TAGGED, flags); + __map_memblock(pgdp, start, end, pgprot_tagged(PAGE_KERNEL), + flags); } /* diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index cdfc4e9f253e4..5e772392a3795 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -904,6 +904,10 @@ static inline void ptep_modify_prot_commit(struct vm_area_struct *vma, #define pgprot_device pgprot_noncached #endif +#ifndef pgprot_mhp +#define pgprot_mhp(prot) (prot) +#endif + #ifdef CONFIG_MMU #ifndef pgprot_modify #define pgprot_modify pgprot_modify diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 5ba51a8bdaeb2..0cdbbfbc57572 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1072,7 +1072,7 @@ static int online_memory_block(struct memory_block *mem, void *arg) */ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags) { - struct mhp_params params = { .pgprot = PAGE_KERNEL }; + struct mhp_params params = { .pgprot = pgprot_mhp(PAGE_KERNEL) }; u64 start, size; bool new_node = false; int ret; -- GitLab From 07e644885bf6727a48db109fad053cb43f3c9859 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 9 Mar 2021 19:03:04 +0000 Subject: [PATCH 469/839] kselftest: arm64: Fix exit code of sve-ptrace We track if sve-ptrace encountered a failure in a variable but don't actually use that value when we exit the program, do so. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20210309190304.39169-1-broonie@kernel.org Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/fp/sve-ptrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c index b2282be6f9384..612d3899614ac 100644 --- a/tools/testing/selftests/arm64/fp/sve-ptrace.c +++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c @@ -332,5 +332,5 @@ int main(void) ksft_print_cnts(); - return 0; + return ret; } -- GitLab From 26f55386f964cefa92ab7ccbed68f1a313074215 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 10 Mar 2021 11:23:10 +0530 Subject: [PATCH 470/839] arm64/mm: Fix __enable_mmu() for new TGRAN range values As per ARM ARM DDI 0487G.a, when FEAT_LPA2 is implemented, ID_AA64MMFR0_EL1 might contain a range of values to describe supported translation granules (4K and 16K pages sizes in particular) instead of just enabled or disabled values. This changes __enable_mmu() function to handle complete acceptable range of values (depending on whether the field is signed or unsigned) now represented with ID_AA64MMFR0_TGRAN_SUPPORTED_[MIN..MAX] pair. While here, also fix similar situations in EFI stub and KVM as well. Cc: Catalin Marinas Cc: Will Deacon Cc: Marc Zyngier Cc: James Morse Cc: Suzuki K Poulose Cc: Ard Biesheuvel Cc: Mark Rutland Cc: linux-arm-kernel@lists.infradead.org Cc: kvmarm@lists.cs.columbia.edu Cc: linux-efi@vger.kernel.org Cc: linux-kernel@vger.kernel.org Acked-by: Marc Zyngier Signed-off-by: James Morse Signed-off-by: Anshuman Khandual Link: https://lore.kernel.org/r/1615355590-21102-1-git-send-email-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 20 ++++++++++++++------ arch/arm64/kernel/head.S | 6 ++++-- arch/arm64/kvm/reset.c | 10 ++++++---- drivers/firmware/efi/libstub/arm64-stub.c | 2 +- 4 files changed, 25 insertions(+), 13 deletions(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index dfd4edbfe3608..d4a5fca984c3e 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -796,6 +796,11 @@ #define ID_AA64MMFR0_PARANGE_48 0x5 #define ID_AA64MMFR0_PARANGE_52 0x6 +#define ID_AA64MMFR0_TGRAN_2_SUPPORTED_DEFAULT 0x0 +#define ID_AA64MMFR0_TGRAN_2_SUPPORTED_NONE 0x1 +#define ID_AA64MMFR0_TGRAN_2_SUPPORTED_MIN 0x2 +#define ID_AA64MMFR0_TGRAN_2_SUPPORTED_MAX 0x7 + #ifdef CONFIG_ARM64_PA_BITS_52 #define ID_AA64MMFR0_PARANGE_MAX ID_AA64MMFR0_PARANGE_52 #else @@ -961,14 +966,17 @@ #define ID_PFR1_PROGMOD_SHIFT 0 #if defined(CONFIG_ARM64_4K_PAGES) -#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN4_SHIFT -#define ID_AA64MMFR0_TGRAN_SUPPORTED ID_AA64MMFR0_TGRAN4_SUPPORTED +#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN4_SHIFT +#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_TGRAN4_SUPPORTED +#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX 0x7 #elif defined(CONFIG_ARM64_16K_PAGES) -#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN16_SHIFT -#define ID_AA64MMFR0_TGRAN_SUPPORTED ID_AA64MMFR0_TGRAN16_SUPPORTED +#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN16_SHIFT +#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_TGRAN16_SUPPORTED +#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX 0xF #elif defined(CONFIG_ARM64_64K_PAGES) -#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN64_SHIFT -#define ID_AA64MMFR0_TGRAN_SUPPORTED ID_AA64MMFR0_TGRAN64_SUPPORTED +#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN64_SHIFT +#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_TGRAN64_SUPPORTED +#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX 0x7 #endif #define MVFR2_FPMISC_SHIFT 4 diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 66b0e0b66e312..8b469f1640919 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -655,8 +655,10 @@ SYM_FUNC_END(__secondary_too_slow) SYM_FUNC_START(__enable_mmu) mrs x2, ID_AA64MMFR0_EL1 ubfx x2, x2, #ID_AA64MMFR0_TGRAN_SHIFT, 4 - cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED - b.ne __no_granule_support + cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED_MIN + b.lt __no_granule_support + cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED_MAX + b.gt __no_granule_support update_early_cpu_boot_status 0, x2, x3 adrp x2, idmap_pg_dir phys_to_ttbr x1, x1 diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 47f3f035f3eac..e81c7ec9e1020 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -311,16 +311,18 @@ int kvm_set_ipa_limit(void) } switch (cpuid_feature_extract_unsigned_field(mmfr0, tgran_2)) { - default: - case 1: + case ID_AA64MMFR0_TGRAN_2_SUPPORTED_NONE: kvm_err("PAGE_SIZE not supported at Stage-2, giving up\n"); return -EINVAL; - case 0: + case ID_AA64MMFR0_TGRAN_2_SUPPORTED_DEFAULT: kvm_debug("PAGE_SIZE supported at Stage-2 (default)\n"); break; - case 2: + case ID_AA64MMFR0_TGRAN_2_SUPPORTED_MIN ... ID_AA64MMFR0_TGRAN_2_SUPPORTED_MAX: kvm_debug("PAGE_SIZE supported at Stage-2 (advertised)\n"); break; + default: + kvm_err("Unsupported value for TGRAN_2, giving up\n"); + return -EINVAL; } kvm_ipa_limit = id_aa64mmfr0_parange_to_phys_shift(parange); diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c index b69d63143e0d8..7bf0a7acae5e6 100644 --- a/drivers/firmware/efi/libstub/arm64-stub.c +++ b/drivers/firmware/efi/libstub/arm64-stub.c @@ -24,7 +24,7 @@ efi_status_t check_platform_features(void) return EFI_SUCCESS; tg = (read_cpuid(ID_AA64MMFR0_EL1) >> ID_AA64MMFR0_TGRAN_SHIFT) & 0xf; - if (tg != ID_AA64MMFR0_TGRAN_SUPPORTED) { + if (tg < ID_AA64MMFR0_TGRAN_SUPPORTED_MIN || tg > ID_AA64MMFR0_TGRAN_SUPPORTED_MAX) { if (IS_ENABLED(CONFIG_ARM64_64K_PAGES)) efi_err("This 64 KB granular kernel is not supported by your CPU\n"); else -- GitLab From 7bb8bc6eb550116c504fb25af8678b9d7ca2abc5 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 9 Mar 2021 17:44:12 -0700 Subject: [PATCH 471/839] arm64: perf: Fix 64-bit event counter read truncation Commit 0fdf1bb75953 ("arm64: perf: Avoid PMXEV* indirection") changed armv8pmu_read_evcntr() to return a u32 instead of u64. The result is silent truncation of the event counter when using 64-bit counters. Given the offending commit appears to have passed thru several folks, it seems likely this was a bad rebase after v8.5 PMU 64-bit counters landed. Cc: Alexandru Elisei Cc: Julien Thierry Cc: Mark Rutland Cc: Will Deacon Cc: Catalin Marinas Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Fixes: 0fdf1bb75953 ("arm64: perf: Avoid PMXEV* indirection") Signed-off-by: Rob Herring Acked-by: Mark Rutland Reviewed-by: Alexandru Elisei Link: https://lore.kernel.org/r/20210310004412.1450128-1-robh@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 7d2318f80955c..4658fcf88c2b4 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -460,7 +460,7 @@ static inline int armv8pmu_counter_has_overflowed(u32 pmnc, int idx) return pmnc & BIT(ARMV8_IDX_TO_COUNTER(idx)); } -static inline u32 armv8pmu_read_evcntr(int idx) +static inline u64 armv8pmu_read_evcntr(int idx) { u32 counter = ARMV8_IDX_TO_COUNTER(idx); -- GitLab From 13661fc48461282e43fe8f76bf5bf449b3d40687 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 10 Mar 2021 12:28:07 +0100 Subject: [PATCH 472/839] ALSA: hda: Flush pending unsolicited events before suspend The HD-audio controller driver processes the unsolicited events via its work asynchronously, and this might be pending when the system goes to suspend. When a lengthy event handling like ELD byte reads is running, this might trigger unexpected accesses among suspend/resume procedure, typically seen with Nvidia driver that still requires the handling via unsolicited event verbs for ELD updates. This patch adds the flush of unsol_work to assure that pending events are processed before going into suspend. Buglink: https://bugzilla.suse.com/show_bug.cgi?id=1182377 Reported-and-tested-by: Abhishek Sahu Cc: Link: https://lore.kernel.org/r/20210310112809.9215-2-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_intel.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 5b492c3f816c1..5eea130dcf0a5 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -1026,6 +1026,8 @@ static int azx_prepare(struct device *dev) chip = card->private_data; chip->pm_prepared = 1; + flush_work(&azx_bus(chip)->unsol_work); + /* HDA controller always requires different WAKEEN for runtime suspend * and system suspend, so don't use direct-complete here. */ -- GitLab From 5ff9dde42e8c72ed8102eb8cb62e03f9dc2103ab Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 10 Mar 2021 12:28:08 +0100 Subject: [PATCH 473/839] ALSA: hda: Avoid spurious unsol event handling during S3/S4 When HD-audio bus receives unsolicited events during its system suspend/resume (S3 and S4) phase, the controller driver may still try to process events although the codec chips are already (or yet) powered down. This might screw up the codec communication, resulting in CORB/RIRB errors. Such events should be rather skipped, as the codec chip status such as the jack status will be fully refreshed at the system resume time. Since we're tracking the system suspend/resume state in codec power.power_state field, let's add the check in the common unsol event handler entry point to filter out such events. BugLink: https://bugzilla.suse.com/show_bug.cgi?id=1182377 Tested-by: Abhishek Sahu Cc: # 183ab39eb0ea: ALSA: hda: Initialize power_state Link: https://lore.kernel.org/r/20210310112809.9215-3-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_bind.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/pci/hda/hda_bind.c b/sound/pci/hda/hda_bind.c index 6a85645663759..17a25e453f60c 100644 --- a/sound/pci/hda/hda_bind.c +++ b/sound/pci/hda/hda_bind.c @@ -47,6 +47,10 @@ static void hda_codec_unsol_event(struct hdac_device *dev, unsigned int ev) if (codec->bus->shutdown) return; + /* ignore unsol events during system suspend/resume */ + if (codec->core.dev.power.power_state.event != PM_EVENT_ON) + return; + if (codec->patch_ops.unsol_event) codec->patch_ops.unsol_event(codec, ev); } -- GitLab From eea46a0879bcca23e15071f9968c0f6e6596e470 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 10 Mar 2021 12:28:09 +0100 Subject: [PATCH 474/839] ALSA: hda/hdmi: Cancel pending works before suspend The per_pin->work might be still floating at the suspend, and this may hit the access to the hardware at an unexpected timing. Cancel the work properly at the suspend callback for avoiding the buggy access. Note that the bug doesn't trigger easily in the recent kernels since the work is queued only when the repoll count is set, and usually it's only at the resume callback, but it's still possible to hit in theory. BugLink: https://bugzilla.suse.com/show_bug.cgi?id=1182377 Reported-and-tested-by: Abhishek Sahu Cc: Link: https://lore.kernel.org/r/20210310112809.9215-4-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_hdmi.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index e6d0843ee9df2..45ae845e82df6 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -2480,6 +2480,18 @@ static void generic_hdmi_free(struct hda_codec *codec) } #ifdef CONFIG_PM +static int generic_hdmi_suspend(struct hda_codec *codec) +{ + struct hdmi_spec *spec = codec->spec; + int pin_idx; + + for (pin_idx = 0; pin_idx < spec->num_pins; pin_idx++) { + struct hdmi_spec_per_pin *per_pin = get_pin(spec, pin_idx); + cancel_delayed_work_sync(&per_pin->work); + } + return 0; +} + static int generic_hdmi_resume(struct hda_codec *codec) { struct hdmi_spec *spec = codec->spec; @@ -2503,6 +2515,7 @@ static const struct hda_codec_ops generic_hdmi_patch_ops = { .build_controls = generic_hdmi_build_controls, .unsol_event = hdmi_unsol_event, #ifdef CONFIG_PM + .suspend = generic_hdmi_suspend, .resume = generic_hdmi_resume, #endif }; -- GitLab From 9e0bdaa9fcb8c64efc1487a7fba07722e7bc515e Mon Sep 17 00:00:00 2001 From: Jack Yu Date: Mon, 22 Feb 2021 17:00:56 +0800 Subject: [PATCH 475/839] ASoC: rt1015: fix i2c communication error Remove 0x100 cache re-sync to solve i2c communication error. Signed-off-by: Jack Yu Link: https://lore.kernel.org/r/20210222090057.29532-1-jack.yu@realtek.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt1015.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/rt1015.c b/sound/soc/codecs/rt1015.c index 37b5795b00d13..90767490af820 100644 --- a/sound/soc/codecs/rt1015.c +++ b/sound/soc/codecs/rt1015.c @@ -209,6 +209,7 @@ static bool rt1015_volatile_register(struct device *dev, unsigned int reg) case RT1015_VENDOR_ID: case RT1015_DEVICE_ID: case RT1015_PRO_ALT: + case RT1015_MAN_I2C: case RT1015_DAC3: case RT1015_VBAT_TEST_OUT1: case RT1015_VBAT_TEST_OUT2: -- GitLab From 0d2b6e398975bcc6a29f1d466229a312dde71b53 Mon Sep 17 00:00:00 2001 From: Jack Yu Date: Mon, 22 Feb 2021 17:00:57 +0800 Subject: [PATCH 476/839] ASoC: rt1015: enable BCLK detection after calibration Enable BCLK detection after calibration. Signed-off-by: Jack Yu Link: https://lore.kernel.org/r/20210222090057.29532-2-jack.yu@realtek.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt1015.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/rt1015.c b/sound/soc/codecs/rt1015.c index 90767490af820..844e4079d1760 100644 --- a/sound/soc/codecs/rt1015.c +++ b/sound/soc/codecs/rt1015.c @@ -514,6 +514,7 @@ static void rt1015_calibrate(struct rt1015_priv *rt1015) msleep(300); regmap_write(regmap, RT1015_PWR_STATE_CTRL, 0x0008); regmap_write(regmap, RT1015_SYS_RST1, 0x05F5); + regmap_write(regmap, RT1015_CLK_DET, 0x8000); regcache_cache_bypass(regmap, false); regcache_mark_dirty(regmap); -- GitLab From 0c0a5883783540a56e6a5dbf5868f045dbeaa888 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 15 Feb 2021 16:33:13 +0000 Subject: [PATCH 477/839] ASoC: codecs: lpass-rx-macro: Fix uninitialized variable ec_tx There is potential read of the uninitialized variable ec_tx if the call to snd_soc_component_read fails or returns an unrecognized w->name. To avoid this corner case, initialize ec_tx to -1 so that it is caught later when ec_tx is bounds checked. Addresses-Coverity: ("Uninitialized scalar variable") Fixes: 4f692926f562 ("ASoC: codecs: lpass-rx-macro: add dapm widgets and route") Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20210215163313.84026-1-colin.king@canonical.com Signed-off-by: Mark Brown --- sound/soc/codecs/lpass-rx-macro.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/lpass-rx-macro.c b/sound/soc/codecs/lpass-rx-macro.c index c9c21d22c2c45..8c04b3b2c9075 100644 --- a/sound/soc/codecs/lpass-rx-macro.c +++ b/sound/soc/codecs/lpass-rx-macro.c @@ -2895,7 +2895,7 @@ static int rx_macro_enable_echo(struct snd_soc_dapm_widget *w, { struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm); u16 val, ec_hq_reg; - int ec_tx; + int ec_tx = -1; val = snd_soc_component_read(component, CDC_RX_INP_MUX_RX_MIX_CFG4); -- GitLab From 87263968516fb9507d6215d53f44052627fae8d8 Mon Sep 17 00:00:00 2001 From: Alexander Shiyan Date: Tue, 16 Feb 2021 14:42:21 +0300 Subject: [PATCH 478/839] ASoC: fsl_ssi: Fix TDM slot setup for I2S mode When using the driver in I2S TDM mode, the _fsl_ssi_set_dai_fmt() function rewrites the number of slots previously set by the fsl_ssi_set_dai_tdm_slot() function to 2 by default. To fix this, let's use the saved slot count value or, if TDM is not used and the slot count is not set, proceed as before. Fixes: 4f14f5c11db1 ("ASoC: fsl_ssi: Fix number of words per frame for I2S-slave mode") Signed-off-by: Alexander Shiyan Acked-by: Nicolin Chen Link: https://lore.kernel.org/r/20210216114221.26635-1-shc_work@mail.ru Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_ssi.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c index 57811743c2948..ad8af3f450e29 100644 --- a/sound/soc/fsl/fsl_ssi.c +++ b/sound/soc/fsl/fsl_ssi.c @@ -878,6 +878,7 @@ static int fsl_ssi_hw_free(struct snd_pcm_substream *substream, static int _fsl_ssi_set_dai_fmt(struct fsl_ssi *ssi, unsigned int fmt) { u32 strcr = 0, scr = 0, stcr, srcr, mask; + unsigned int slots; ssi->dai_fmt = fmt; @@ -909,10 +910,11 @@ static int _fsl_ssi_set_dai_fmt(struct fsl_ssi *ssi, unsigned int fmt) return -EINVAL; } + slots = ssi->slots ? : 2; regmap_update_bits(ssi->regs, REG_SSI_STCCR, - SSI_SxCCR_DC_MASK, SSI_SxCCR_DC(2)); + SSI_SxCCR_DC_MASK, SSI_SxCCR_DC(slots)); regmap_update_bits(ssi->regs, REG_SSI_SRCCR, - SSI_SxCCR_DC_MASK, SSI_SxCCR_DC(2)); + SSI_SxCCR_DC_MASK, SSI_SxCCR_DC(slots)); /* Data on rising edge of bclk, frame low, 1clk before data */ strcr |= SSI_STCR_TFSI | SSI_STCR_TSCKP | SSI_STCR_TEFS; -- GitLab From d917b5dde660b11abd757bf99a29353c36880b2c Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 15 Feb 2021 15:21:15 +0100 Subject: [PATCH 479/839] ASoC: rt5670: Remove 'OUT Channel Switch' control The "OUT Channel Switch" control is a left over from code copied from thr rt5640 codec driver. With the rt5640 codec driver the output volume controls have 2 pairs of mute bits: bit 7, 15: Mute Control for Spk/Headphone/Line Output Port bit 6, 14: Mute Control for Spk/Headphone/Line Volume Channel Bits 7 and 15 are normal mute bits on the rt5670/5672 which are controlled by 2 dapm widgets: SND_SOC_DAPM_SWITCH("LOUT L Playback", SND_SOC_NOPM, 0, 0, &lout_l_enable_control), SND_SOC_DAPM_SWITCH("LOUT R Playback", SND_SOC_NOPM, 0, 0, &lout_r_enable_control), But on the 5670/5672 bit 6 is always reserved, where as bit 14 is "LOUT Differential Mode" on the 5670 and also reserved on the 5672. So the "OUT Channel Switch" control which is controlling bits 6+14 of the "LINE Output Control" register is bogus -> remove it. This should not cause any issues for userspace. AFAICT the rt567x codecs are only used on x86/ACPI devices and the UCM profiles used there do not use the "OUT Channel Switch" control. Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20210215142118.308516-2-hdegoede@redhat.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt5670.c | 2 -- sound/soc/codecs/rt5670.h | 4 ---- 2 files changed, 6 deletions(-) diff --git a/sound/soc/codecs/rt5670.c b/sound/soc/codecs/rt5670.c index c29317ea5df2f..2e799e21dbdac 100644 --- a/sound/soc/codecs/rt5670.c +++ b/sound/soc/codecs/rt5670.c @@ -637,8 +637,6 @@ static const struct snd_kcontrol_new rt5670_snd_controls[] = { RT5670_L_VOL_SFT, RT5670_R_VOL_SFT, 39, 1, out_vol_tlv), /* OUTPUT Control */ - SOC_DOUBLE("OUT Channel Switch", RT5670_LOUT1, - RT5670_VOL_L_SFT, RT5670_VOL_R_SFT, 1, 1), SOC_DOUBLE_TLV("OUT Playback Volume", RT5670_LOUT1, RT5670_L_VOL_SFT, RT5670_R_VOL_SFT, 39, 1, out_vol_tlv), /* DAC Digital Volume */ diff --git a/sound/soc/codecs/rt5670.h b/sound/soc/codecs/rt5670.h index 56b13fe6bd3cb..f9c4db156c803 100644 --- a/sound/soc/codecs/rt5670.h +++ b/sound/soc/codecs/rt5670.h @@ -212,12 +212,8 @@ /* global definition */ #define RT5670_L_MUTE (0x1 << 15) #define RT5670_L_MUTE_SFT 15 -#define RT5670_VOL_L_MUTE (0x1 << 14) -#define RT5670_VOL_L_SFT 14 #define RT5670_R_MUTE (0x1 << 7) #define RT5670_R_MUTE_SFT 7 -#define RT5670_VOL_R_MUTE (0x1 << 6) -#define RT5670_VOL_R_SFT 6 #define RT5670_L_VOL_MASK (0x3f << 8) #define RT5670_L_VOL_SFT 8 #define RT5670_R_VOL_MASK (0x3f) -- GitLab From caba8d764770b6824391c5bf3c3eba6e51b69330 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 15 Feb 2021 15:21:16 +0100 Subject: [PATCH 480/839] ASoC: rt5670: Remove 'HP Playback Switch' control The RT5670_L_MUTE_SFT and RT5670_R_MUTE_SFT bits (bits 15 and 7) of the RT5670_HP_VOL register are set / unset by the headphones deplop code run by rt5670_hp_event() on SND_SOC_DAPM_POST_PMU / SND_SOC_DAPM_PRE_PMD. So we should not also export a control to userspace which toggles these same bits. This should not cause any issues for userspace. AFAICT the rt567x codecs are only used on x86/ACPI devices and the UCM profiles used there do not use the "HP Playback Switch" control. Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20210215142118.308516-3-hdegoede@redhat.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt5670.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/sound/soc/codecs/rt5670.c b/sound/soc/codecs/rt5670.c index 2e799e21dbdac..932e4cd1e9a64 100644 --- a/sound/soc/codecs/rt5670.c +++ b/sound/soc/codecs/rt5670.c @@ -631,8 +631,6 @@ static SOC_ENUM_SINGLE_DECL(rt5670_if2_adc_enum, RT5670_DIG_INF1_DATA, static const struct snd_kcontrol_new rt5670_snd_controls[] = { /* Headphone Output Volume */ - SOC_DOUBLE("HP Playback Switch", RT5670_HP_VOL, - RT5670_L_MUTE_SFT, RT5670_R_MUTE_SFT, 1, 1), SOC_DOUBLE_TLV("HP Playback Volume", RT5670_HP_VOL, RT5670_L_VOL_SFT, RT5670_R_VOL_SFT, 39, 1, out_vol_tlv), -- GitLab From 02aa946ef3762aa456d87cc55606667942b3f354 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 15 Feb 2021 15:21:17 +0100 Subject: [PATCH 481/839] ASoC: rt5670: Remove ADC vol-ctrl mute bits poking from Sto1 ADC mixer settings The SND_SOC_DAPM_MIXER declaration for "Sto1 ADC MIXL" and "Sto1 ADC MIXR" was using the mute bits from the RT5670_STO1_ADC_DIG_VOL control as mixer master mute bits. But these bits are already exposed to userspace as controls as part of the "ADC Capture Volume" / "ADC Capture Switch" control pair: SOC_DOUBLE("ADC Capture Switch", RT5670_STO1_ADC_DIG_VOL, RT5670_L_MUTE_SFT, RT5670_R_MUTE_SFT, 1, 1), SOC_DOUBLE_TLV("ADC Capture Volume", RT5670_STO1_ADC_DIG_VOL, RT5670_L_VOL_SFT, RT5670_R_VOL_SFT, 127, 0, adc_vol_tlv), Both the fact that the mute bits belong to the same reg as the vol-ctrl and the "Digital Mixer Path" diagram in the datasheet clearly shows that these mute bits are not part of the mixer and having 2 separate controls poking at the same bits is a bad idea. Remove the master-mute bits settings from the "Sto1 ADC MIXL" and "Sto1 ADC MIXR" DAPM widget declarations, avoiding these bits getting poked from 2 different places. This should not cause any issues for userspace. AFAICT the rt567x codecs are only used on x86/ACPI devices and the UCM profiles used there already set the "ADC Capture Switch" as needed. Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20210215142118.308516-4-hdegoede@redhat.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt5670.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/sound/soc/codecs/rt5670.c b/sound/soc/codecs/rt5670.c index 932e4cd1e9a64..2f015c24c6377 100644 --- a/sound/soc/codecs/rt5670.c +++ b/sound/soc/codecs/rt5670.c @@ -1652,12 +1652,10 @@ static const struct snd_soc_dapm_widget rt5670_dapm_widgets[] = { RT5670_PWR_ADC_S1F_BIT, 0, NULL, 0), SND_SOC_DAPM_SUPPLY("ADC Stereo2 Filter", RT5670_PWR_DIG2, RT5670_PWR_ADC_S2F_BIT, 0, NULL, 0), - SND_SOC_DAPM_MIXER("Sto1 ADC MIXL", RT5670_STO1_ADC_DIG_VOL, - RT5670_L_MUTE_SFT, 1, rt5670_sto1_adc_l_mix, - ARRAY_SIZE(rt5670_sto1_adc_l_mix)), - SND_SOC_DAPM_MIXER("Sto1 ADC MIXR", RT5670_STO1_ADC_DIG_VOL, - RT5670_R_MUTE_SFT, 1, rt5670_sto1_adc_r_mix, - ARRAY_SIZE(rt5670_sto1_adc_r_mix)), + SND_SOC_DAPM_MIXER("Sto1 ADC MIXL", SND_SOC_NOPM, 0, 0, + rt5670_sto1_adc_l_mix, ARRAY_SIZE(rt5670_sto1_adc_l_mix)), + SND_SOC_DAPM_MIXER("Sto1 ADC MIXR", SND_SOC_NOPM, 0, 0, + rt5670_sto1_adc_r_mix, ARRAY_SIZE(rt5670_sto1_adc_r_mix)), SND_SOC_DAPM_MIXER("Sto2 ADC MIXL", SND_SOC_NOPM, 0, 0, rt5670_sto2_adc_l_mix, ARRAY_SIZE(rt5670_sto2_adc_l_mix)), -- GitLab From 42121c2645d229d348399ad278b6c3fd224bd6a2 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 15 Feb 2021 15:21:18 +0100 Subject: [PATCH 482/839] ASoC: rt5670: Add emulated 'DAC1 Playback Switch' control For reliable output-mute LED control we need a "DAC1 Playback Switch" control. The "DAC Playback volume" control is the only control in the path from the DAC1 data input to the speaker output, so the UCM profile for the speaker output will have its PlaybackMixerElem set to "DAC1". But userspace (pulseaudio) will set the "DAC1 Playback Volume" control to its softest setting (which is not fully muted) while still showing the speaker as being enabled at a low volume in the UI. If we were to set the SNDRV_CTL_ELEM_ACCESS_SPK_LED on the "DAC1 Playback Volume" control, this would mean then what pressing KEY_VOLUMEDOWN the speaker-mute LED (embedded in the volume-mute toggle key) would light while the UI is still showing the speaker as being enabled at a low volume, meaning that the UI and the LED are out of sync. Only after an _extra_ KEY_VOLUMEDOWN press would the UI show the speaker as being muted. The path from DAC1 data input to the speaker output does have a digital mixer with DAC1's data as one of its inputs direclty after the "DAC1 Playback Volume" control. This commit adds an emulated "DAC1 Playback Switch" control by: 1. Declaring the enable flag for that mixers DAC1 input as well as the "DAC1 Playback Switch" control both as SND_SOC_NOPM controls. 2. Storing the settings of both controls as driver-private data 3. Only clearing the mute flag for the DAC1 input of that mixer if the stored values indicate both controls are enabled. This is a preparation patch for adding "audio-mute" LED trigger support. Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20210215142118.308516-5-hdegoede@redhat.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt5670.c | 96 +++++++++++++++++++++++++++++++++++++-- sound/soc/codecs/rt5670.h | 5 ++ 2 files changed, 97 insertions(+), 4 deletions(-) diff --git a/sound/soc/codecs/rt5670.c b/sound/soc/codecs/rt5670.c index 2f015c24c6377..4063aac2a4431 100644 --- a/sound/soc/codecs/rt5670.c +++ b/sound/soc/codecs/rt5670.c @@ -629,6 +629,56 @@ static SOC_ENUM_SINGLE_DECL(rt5670_if2_dac_enum, RT5670_DIG_INF1_DATA, static SOC_ENUM_SINGLE_DECL(rt5670_if2_adc_enum, RT5670_DIG_INF1_DATA, RT5670_IF2_ADC_SEL_SFT, rt5670_data_select); +/* + * For reliable output-mute LED control we need a "DAC1 Playback Switch" control. + * We emulate this by only clearing the RT5670_M_DAC1_L/_R AD_DA_MIXER register + * bits when both our emulated DAC1 Playback Switch control and the DAC1 MIXL/R + * DAPM-mixer DAC1 input are enabled. + */ +static void rt5670_update_ad_da_mixer_dac1_m_bits(struct rt5670_priv *rt5670) +{ + int val = RT5670_M_DAC1_L | RT5670_M_DAC1_R; + + if (rt5670->dac1_mixl_dac1_switch && rt5670->dac1_playback_switch_l) + val &= ~RT5670_M_DAC1_L; + + if (rt5670->dac1_mixr_dac1_switch && rt5670->dac1_playback_switch_r) + val &= ~RT5670_M_DAC1_R; + + regmap_update_bits(rt5670->regmap, RT5670_AD_DA_MIXER, + RT5670_M_DAC1_L | RT5670_M_DAC1_R, val); +} + +static int rt5670_dac1_playback_switch_get(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *component = snd_soc_kcontrol_component(kcontrol); + struct rt5670_priv *rt5670 = snd_soc_component_get_drvdata(component); + + ucontrol->value.integer.value[0] = rt5670->dac1_playback_switch_l; + ucontrol->value.integer.value[1] = rt5670->dac1_playback_switch_r; + + return 0; +} + +static int rt5670_dac1_playback_switch_put(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *component = snd_soc_kcontrol_component(kcontrol); + struct rt5670_priv *rt5670 = snd_soc_component_get_drvdata(component); + + if (rt5670->dac1_playback_switch_l == ucontrol->value.integer.value[0] && + rt5670->dac1_playback_switch_r == ucontrol->value.integer.value[1]) + return 0; + + rt5670->dac1_playback_switch_l = ucontrol->value.integer.value[0]; + rt5670->dac1_playback_switch_r = ucontrol->value.integer.value[1]; + + rt5670_update_ad_da_mixer_dac1_m_bits(rt5670); + + return 1; +} + static const struct snd_kcontrol_new rt5670_snd_controls[] = { /* Headphone Output Volume */ SOC_DOUBLE_TLV("HP Playback Volume", RT5670_HP_VOL, @@ -640,6 +690,8 @@ static const struct snd_kcontrol_new rt5670_snd_controls[] = { /* DAC Digital Volume */ SOC_DOUBLE("DAC2 Playback Switch", RT5670_DAC_CTRL, RT5670_M_DAC_L2_VOL_SFT, RT5670_M_DAC_R2_VOL_SFT, 1, 1), + SOC_DOUBLE_EXT("DAC1 Playback Switch", SND_SOC_NOPM, 0, 1, 1, 0, + rt5670_dac1_playback_switch_get, rt5670_dac1_playback_switch_put), SOC_DOUBLE_TLV("DAC1 Playback Volume", RT5670_DAC1_DIG_VOL, RT5670_L_VOL_SFT, RT5670_R_VOL_SFT, 175, 0, dac_vol_tlv), @@ -909,18 +961,44 @@ static const struct snd_kcontrol_new rt5670_mono_adc_r_mix[] = { RT5670_M_MONO_ADC_R2_SFT, 1, 1), }; +/* See comment above rt5670_update_ad_da_mixer_dac1_m_bits() */ +static int rt5670_put_dac1_mix_dac1_switch(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct soc_mixer_control *mc = (struct soc_mixer_control *)kcontrol->private_value; + struct snd_soc_component *component = snd_soc_dapm_kcontrol_component(kcontrol); + struct rt5670_priv *rt5670 = snd_soc_component_get_drvdata(component); + int ret; + + if (mc->shift == 0) + rt5670->dac1_mixl_dac1_switch = ucontrol->value.integer.value[0]; + else + rt5670->dac1_mixr_dac1_switch = ucontrol->value.integer.value[0]; + + /* Apply the update (if any) */ + ret = snd_soc_dapm_put_volsw(kcontrol, ucontrol); + if (ret == 0) + return 0; + + rt5670_update_ad_da_mixer_dac1_m_bits(rt5670); + + return 1; +} + +#define SOC_DAPM_SINGLE_RT5670_DAC1_SW(name, shift) \ + SOC_SINGLE_EXT(name, SND_SOC_NOPM, shift, 1, 0, \ + snd_soc_dapm_get_volsw, rt5670_put_dac1_mix_dac1_switch) + static const struct snd_kcontrol_new rt5670_dac_l_mix[] = { SOC_DAPM_SINGLE("Stereo ADC Switch", RT5670_AD_DA_MIXER, RT5670_M_ADCMIX_L_SFT, 1, 1), - SOC_DAPM_SINGLE("DAC1 Switch", RT5670_AD_DA_MIXER, - RT5670_M_DAC1_L_SFT, 1, 1), + SOC_DAPM_SINGLE_RT5670_DAC1_SW("DAC1 Switch", 0), }; static const struct snd_kcontrol_new rt5670_dac_r_mix[] = { SOC_DAPM_SINGLE("Stereo ADC Switch", RT5670_AD_DA_MIXER, RT5670_M_ADCMIX_R_SFT, 1, 1), - SOC_DAPM_SINGLE("DAC1 Switch", RT5670_AD_DA_MIXER, - RT5670_M_DAC1_R_SFT, 1, 1), + SOC_DAPM_SINGLE_RT5670_DAC1_SW("DAC1 Switch", 1), }; static const struct snd_kcontrol_new rt5670_sto_dac_l_mix[] = { @@ -2993,6 +3071,16 @@ static int rt5670_i2c_probe(struct i2c_client *i2c, dev_info(&i2c->dev, "quirk JD mode 3\n"); } + /* + * Enable the emulated "DAC1 Playback Switch" by default to avoid + * muting the output with older UCM profiles. + */ + rt5670->dac1_playback_switch_l = true; + rt5670->dac1_playback_switch_r = true; + /* The Power-On-Reset values for the DAC1 mixer have the DAC1 input enabled. */ + rt5670->dac1_mixl_dac1_switch = true; + rt5670->dac1_mixr_dac1_switch = true; + rt5670->regmap = devm_regmap_init_i2c(i2c, &rt5670_regmap); if (IS_ERR(rt5670->regmap)) { ret = PTR_ERR(rt5670->regmap); diff --git a/sound/soc/codecs/rt5670.h b/sound/soc/codecs/rt5670.h index f9c4db156c803..6fb3c369ee989 100644 --- a/sound/soc/codecs/rt5670.h +++ b/sound/soc/codecs/rt5670.h @@ -2013,6 +2013,11 @@ struct rt5670_priv { int dsp_rate; int jack_type; int jack_type_saved; + + bool dac1_mixl_dac1_switch; + bool dac1_mixr_dac1_switch; + bool dac1_playback_switch_l; + bool dac1_playback_switch_r; }; void rt5670_jack_suspend(struct snd_soc_component *component); -- GitLab From 4ec5b96775a88dd9b1c3ba1d23c43c478cab95a2 Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Wed, 24 Feb 2021 14:57:51 +0800 Subject: [PATCH 483/839] ASoC: ak4458: Add MODULE_DEVICE_TABLE Add missed MODULE_DEVICE_TABLE for the driver can be loaded automatically at boot. Fixes: 08660086eff9 ("ASoC: ak4458: Add support for AK4458 DAC driver") Cc: Signed-off-by: Shengjiu Wang Link: https://lore.kernel.org/r/1614149872-25510-1-git-send-email-shengjiu.wang@nxp.com Signed-off-by: Mark Brown --- sound/soc/codecs/ak4458.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/ak4458.c b/sound/soc/codecs/ak4458.c index 472caad17012e..85a1d00894a9c 100644 --- a/sound/soc/codecs/ak4458.c +++ b/sound/soc/codecs/ak4458.c @@ -812,6 +812,7 @@ static const struct of_device_id ak4458_of_match[] = { { .compatible = "asahi-kasei,ak4497", .data = &ak4497_drvdata}, { }, }; +MODULE_DEVICE_TABLE(of, ak4458_of_match); static struct i2c_driver ak4458_i2c_driver = { .driver = { -- GitLab From 80cffd2468ddb850e678f17841fc356930b2304a Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Wed, 24 Feb 2021 14:57:52 +0800 Subject: [PATCH 484/839] ASoC: ak5558: Add MODULE_DEVICE_TABLE Add missed MODULE_DEVICE_TABLE for the driver can be loaded automatically at boot. Fixes: 920884777480 ("ASoC: ak5558: Add support for AK5558 ADC driver") Cc: Signed-off-by: Shengjiu Wang Link: https://lore.kernel.org/r/1614149872-25510-2-git-send-email-shengjiu.wang@nxp.com Signed-off-by: Mark Brown --- sound/soc/codecs/ak5558.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/ak5558.c b/sound/soc/codecs/ak5558.c index 8a32b0139cb0c..85bdd05341803 100644 --- a/sound/soc/codecs/ak5558.c +++ b/sound/soc/codecs/ak5558.c @@ -419,6 +419,7 @@ static const struct of_device_id ak5558_i2c_dt_ids[] __maybe_unused = { { .compatible = "asahi-kasei,ak5558"}, { } }; +MODULE_DEVICE_TABLE(of, ak5558_i2c_dt_ids); static struct i2c_driver ak5558_i2c_driver = { .driver = { -- GitLab From ca08ddfd961d2a17208d9182e0ee5791b39bd8bf Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 24 Feb 2021 11:50:52 +0100 Subject: [PATCH 485/839] ASoC: Intel: bytcr_rt5640: Fix HP Pavilion x2 10-p0XX OVCD current threshold When I added the quirk for the "HP Pavilion x2 10-p0XX" I copied the byt_rt5640_quirk_table[] entry for the HP Pavilion x2 10-k0XX / 10-n0XX models since these use almost the same settings. While doing this I accidentally also copied and kept the non-standard OVCD_TH_1500UA setting used on those models. This too low threshold is causing headsets to often be seen as headphones (without a headset-mic) and when correctly identified it is causing ghost play/pause button-presses to get detected. Correct the HP Pavilion x2 10-p0XX quirk to use the default OVCD_TH_2000UA setting, fixing these problems. Fixes: fbdae7d6d04d ("ASoC: Intel: bytcr_rt5640: Fix HP Pavilion x2 Detachable quirks") Signed-off-by: Hans de Goede Acked-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20210224105052.42116-1-hdegoede@redhat.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/bytcr_rt5640.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c index 782f2b4d72ad9..5d48cc359c3da 100644 --- a/sound/soc/intel/boards/bytcr_rt5640.c +++ b/sound/soc/intel/boards/bytcr_rt5640.c @@ -581,7 +581,7 @@ static const struct dmi_system_id byt_rt5640_quirk_table[] = { }, .driver_data = (void *)(BYT_RT5640_DMIC1_MAP | BYT_RT5640_JD_SRC_JD1_IN4P | - BYT_RT5640_OVCD_TH_1500UA | + BYT_RT5640_OVCD_TH_2000UA | BYT_RT5640_OVCD_SF_0P75 | BYT_RT5640_MCLK_EN), }, -- GitLab From cfa26ed1f9f885c2fd8f53ca492989d1e16d0199 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 26 Feb 2021 15:38:13 +0100 Subject: [PATCH 486/839] ASoC: rt5640: Fix dac- and adc- vol-tlv values being off by a factor of 10 The adc_vol_tlv volume-control has a range from -17.625 dB to +30 dB, not -176.25 dB to + 300 dB. This wrong scale is esp. a problem in userspace apps which translate the dB scale to a linear scale. With the logarithmic dB scale being of by a factor of 10 we loose all precision in the lower area of the range when apps translate things to a linear scale. E.g. the 0 dB default, which corresponds with a value of 47 of the 0 - 127 range for the control, would be shown as 0/100 in alsa-mixer. Since the centi-dB values used in the TLV struct cannot represent the 0.375 dB step size used by these controls, change the TLV definition for them to specify a min and max value instead of min + stepsize. Note this mirrors commit 3f31f7d9b540 ("ASoC: rt5670: Fix dac- and adc- vol-tlv values being off by a factor of 10") which made the exact same change to the rt5670 codec driver. Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20210226143817.84287-2-hdegoede@redhat.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt5640.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/rt5640.c b/sound/soc/codecs/rt5640.c index 1414ad15d01cf..a5674c227b3a6 100644 --- a/sound/soc/codecs/rt5640.c +++ b/sound/soc/codecs/rt5640.c @@ -339,9 +339,9 @@ static bool rt5640_readable_register(struct device *dev, unsigned int reg) } static const DECLARE_TLV_DB_SCALE(out_vol_tlv, -4650, 150, 0); -static const DECLARE_TLV_DB_SCALE(dac_vol_tlv, -65625, 375, 0); +static const DECLARE_TLV_DB_MINMAX(dac_vol_tlv, -6562, 0); static const DECLARE_TLV_DB_SCALE(in_vol_tlv, -3450, 150, 0); -static const DECLARE_TLV_DB_SCALE(adc_vol_tlv, -17625, 375, 0); +static const DECLARE_TLV_DB_MINMAX(adc_vol_tlv, -1762, 3000); static const DECLARE_TLV_DB_SCALE(adc_bst_tlv, 0, 1200, 0); /* {0, +20, +24, +30, +35, +40, +44, +50, +52} dB */ -- GitLab From eee51df776bd6cac10a76b2779a9fdee3f622b2b Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 26 Feb 2021 15:38:14 +0100 Subject: [PATCH 487/839] ASoC: rt5651: Fix dac- and adc- vol-tlv values being off by a factor of 10 The adc_vol_tlv volume-control has a range from -17.625 dB to +30 dB, not -176.25 dB to + 300 dB. This wrong scale is esp. a problem in userspace apps which translate the dB scale to a linear scale. With the logarithmic dB scale being of by a factor of 10 we loose all precision in the lower area of the range when apps translate things to a linear scale. E.g. the 0 dB default, which corresponds with a value of 47 of the 0 - 127 range for the control, would be shown as 0/100 in alsa-mixer. Since the centi-dB values used in the TLV struct cannot represent the 0.375 dB step size used by these controls, change the TLV definition for them to specify a min and max value instead of min + stepsize. Note this mirrors commit 3f31f7d9b540 ("ASoC: rt5670: Fix dac- and adc- vol-tlv values being off by a factor of 10") which made the exact same change to the rt5670 codec driver. Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20210226143817.84287-3-hdegoede@redhat.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt5651.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/rt5651.c b/sound/soc/codecs/rt5651.c index d198e191fb0c9..e59fdc81dbd45 100644 --- a/sound/soc/codecs/rt5651.c +++ b/sound/soc/codecs/rt5651.c @@ -285,9 +285,9 @@ static bool rt5651_readable_register(struct device *dev, unsigned int reg) } static const DECLARE_TLV_DB_SCALE(out_vol_tlv, -4650, 150, 0); -static const DECLARE_TLV_DB_SCALE(dac_vol_tlv, -65625, 375, 0); +static const DECLARE_TLV_DB_MINMAX(dac_vol_tlv, -6562, 0); static const DECLARE_TLV_DB_SCALE(in_vol_tlv, -3450, 150, 0); -static const DECLARE_TLV_DB_SCALE(adc_vol_tlv, -17625, 375, 0); +static const DECLARE_TLV_DB_MINMAX(adc_vol_tlv, -1762, 3000); static const DECLARE_TLV_DB_SCALE(adc_bst_tlv, 0, 1200, 0); /* {0, +20, +24, +30, +35, +40, +44, +50, +52} dB */ -- GitLab From f86f58e3594fb0ab1993d833d3b9a2496f3c928c Mon Sep 17 00:00:00 2001 From: Benjamin Rood Date: Fri, 19 Feb 2021 13:33:08 -0500 Subject: [PATCH 488/839] ASoC: sgtl5000: set DAP_AVC_CTRL register to correct default value on probe According to the SGTL5000 datasheet [1], the DAP_AVC_CTRL register has the following bit field definitions: | BITS | FIELD | RW | RESET | DEFINITION | | 15 | RSVD | RO | 0x0 | Reserved | | 14 | RSVD | RW | 0x1 | Reserved | | 13:12 | MAX_GAIN | RW | 0x1 | Max Gain of AVC in expander mode | | 11:10 | RSVD | RO | 0x0 | Reserved | | 9:8 | LBI_RESP | RW | 0x1 | Integrator Response | | 7:6 | RSVD | RO | 0x0 | Reserved | | 5 | HARD_LMT_EN | RW | 0x0 | Enable hard limiter mode | | 4:1 | RSVD | RO | 0x0 | Reserved | | 0 | EN | RW | 0x0 | Enable/Disable AVC | The original default value written to the DAP_AVC_CTRL register during sgtl5000_i2c_probe() was 0x0510. This would incorrectly write values to bits 4 and 10, which are defined as RESERVED. It would also not set bits 12 and 14 to their correct RESET values of 0x1, and instead set them to 0x0. While the DAP_AVC module is effectively disabled because the EN bit is 0, this default value is still writing invalid values to registers that are marked as read-only and RESERVED as well as not setting bits 12 and 14 to their correct default values as defined by the datasheet. The correct value that should be written to the DAP_AVC_CTRL register is 0x5100, which configures the register bits to the default values defined by the datasheet, and prevents any writes to bits defined as 'read-only'. Generally speaking, it is best practice to NOT attempt to write values to registers/bits defined as RESERVED, as it generally produces unwanted/undefined behavior, or errors. Also, all credit for this patch should go to my colleague Dan MacDonald for finding this error in the first place. [1] https://www.nxp.com/docs/en/data-sheet/SGTL5000.pdf Signed-off-by: Benjamin Rood Reviewed-by: Fabio Estevam Link: https://lore.kernel.org/r/20210219183308.GA2117@ubuntu-dev Signed-off-by: Mark Brown --- sound/soc/codecs/sgtl5000.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/sgtl5000.c b/sound/soc/codecs/sgtl5000.c index 73551e36695e9..6d9bb256a2cf5 100644 --- a/sound/soc/codecs/sgtl5000.c +++ b/sound/soc/codecs/sgtl5000.c @@ -71,7 +71,7 @@ static const struct reg_default sgtl5000_reg_defaults[] = { { SGTL5000_DAP_EQ_BASS_BAND4, 0x002f }, { SGTL5000_DAP_MAIN_CHAN, 0x8000 }, { SGTL5000_DAP_MIX_CHAN, 0x0000 }, - { SGTL5000_DAP_AVC_CTRL, 0x0510 }, + { SGTL5000_DAP_AVC_CTRL, 0x5100 }, { SGTL5000_DAP_AVC_THRESHOLD, 0x1473 }, { SGTL5000_DAP_AVC_ATTACK, 0x0028 }, { SGTL5000_DAP_AVC_DECAY, 0x0050 }, -- GitLab From bb18c678754ce1514100fb4c0bf6113b5af36c48 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 28 Feb 2021 17:04:41 +0100 Subject: [PATCH 489/839] ASoC: es8316: Simplify adc_pga_gain_tlv table Most steps in this table are steps of 3dB (300 centi-dB), so we can simplify the table. This not only reduces the amount of space it takes inside the kernel, this also makes alsa-lib's mixer code actually accept the table, where as before this change alsa-lib saw the "ADC PGA Gain" control as a control without a dB scale. Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20210228160441.241110-1-hdegoede@redhat.com Signed-off-by: Mark Brown --- sound/soc/codecs/es8316.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/sound/soc/codecs/es8316.c b/sound/soc/codecs/es8316.c index d632055370e09..067757d1d70a3 100644 --- a/sound/soc/codecs/es8316.c +++ b/sound/soc/codecs/es8316.c @@ -63,13 +63,8 @@ static const SNDRV_CTL_TLVD_DECLARE_DB_RANGE(adc_pga_gain_tlv, 1, 1, TLV_DB_SCALE_ITEM(0, 0, 0), 2, 2, TLV_DB_SCALE_ITEM(250, 0, 0), 3, 3, TLV_DB_SCALE_ITEM(450, 0, 0), - 4, 4, TLV_DB_SCALE_ITEM(700, 0, 0), - 5, 5, TLV_DB_SCALE_ITEM(1000, 0, 0), - 6, 6, TLV_DB_SCALE_ITEM(1300, 0, 0), - 7, 7, TLV_DB_SCALE_ITEM(1600, 0, 0), - 8, 8, TLV_DB_SCALE_ITEM(1800, 0, 0), - 9, 9, TLV_DB_SCALE_ITEM(2100, 0, 0), - 10, 10, TLV_DB_SCALE_ITEM(2400, 0, 0), + 4, 7, TLV_DB_SCALE_ITEM(700, 300, 0), + 8, 10, TLV_DB_SCALE_ITEM(1800, 300, 0), ); static const SNDRV_CTL_TLVD_DECLARE_DB_RANGE(hpout_vol_tlv, -- GitLab From 5bb0ecddb2a7f638d65e457f3da9fa334c967b14 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Mon, 1 Mar 2021 18:34:10 -0600 Subject: [PATCH 490/839] ASoC: SOF: Intel: unregister DMIC device on probe error We only unregister the platform device during the .remove operation, but if the probe fails we will never reach this sequence. Suggested-by: Bard Liao Fixes: dd96daca6c83e ("ASoC: SOF: Intel: Add APL/CNL HW DSP support") Signed-off-by: Pierre-Louis Bossart Reviewed-by: Ranjani Sridharan Reviewed-by: Guennadi Liakhovetski Link: https://lore.kernel.org/r/20210302003410.1178535-1-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/intel/hda.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/sof/intel/hda.c b/sound/soc/sof/intel/hda.c index 1d29b1fd6a949..0c096db07322d 100644 --- a/sound/soc/sof/intel/hda.c +++ b/sound/soc/sof/intel/hda.c @@ -897,6 +897,7 @@ free_streams: /* dsp_unmap: not currently used */ iounmap(sdev->bar[HDA_DSP_BAR]); hdac_bus_unmap: + platform_device_unregister(hdev->dmic_dev); iounmap(bus->remap_addr); hda_codec_i915_exit(sdev); err: -- GitLab From 7de14d581dbed57c2b3c6afffa2c3fdc6955a3cd Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Wed, 3 Mar 2021 11:55:26 +0000 Subject: [PATCH 491/839] ASoC: soc-core: Prevent warning if no DMI table is present Many systems do not use ACPI and hence do not provide a DMI table. On non-ACPI systems a warning, such as the following, is printed on boot. WARNING KERN tegra-audio-graph-card sound: ASoC: no DMI vendor name! The variable 'dmi_available' is not exported and so currently cannot be used by kernel modules without adding an accessor. However, it is possible to use the function is_acpi_device_node() to determine if the sound card is an ACPI device and hence indicate if we expect a DMI table to be present. Therefore, call is_acpi_device_node() to see if we are using ACPI and only parse the DMI table if we are booting with ACPI. Signed-off-by: Jon Hunter Link: https://lore.kernel.org/r/20210303115526.419458-1-jonathanh@nvidia.com Signed-off-by: Mark Brown --- sound/soc/soc-core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index f6d4e99b590c7..0cffc9527e289 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -1573,6 +1574,9 @@ int snd_soc_set_dmi_name(struct snd_soc_card *card, const char *flavour) if (card->long_name) return 0; /* long name already set by driver or from DMI */ + if (!is_acpi_device_node(card->dev->fwnode)) + return 0; + /* make up dmi long name as: vendor-product-version-board */ vendor = dmi_get_system_info(DMI_BOARD_VENDOR); if (!vendor || !is_dmi_valid(vendor)) { -- GitLab From e793c965519b8b7f2fea51a48398405e2a501729 Mon Sep 17 00:00:00 2001 From: Lucas Tanure Date: Fri, 5 Mar 2021 17:34:28 +0000 Subject: [PATCH 492/839] ASoC: cs42l42: Fix Bitclock polarity inversion The driver was setting bit clock polarity opposite to intended polarity. Also simplify the code by grouping ADC and DAC clock configurations into a single field. Signed-off-by: Lucas Tanure Link: https://lore.kernel.org/r/20210305173442.195740-2-tanureal@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs42l42.c | 20 ++++++++------------ sound/soc/codecs/cs42l42.h | 11 ++++++----- 2 files changed, 14 insertions(+), 17 deletions(-) diff --git a/sound/soc/codecs/cs42l42.c b/sound/soc/codecs/cs42l42.c index 210fcbedf2413..df0d5fec0287a 100644 --- a/sound/soc/codecs/cs42l42.c +++ b/sound/soc/codecs/cs42l42.c @@ -797,27 +797,23 @@ static int cs42l42_set_dai_fmt(struct snd_soc_dai *codec_dai, unsigned int fmt) /* Bitclock/frame inversion */ switch (fmt & SND_SOC_DAIFMT_INV_MASK) { case SND_SOC_DAIFMT_NB_NF: + asp_cfg_val |= CS42L42_ASP_SCPOL_NOR << CS42L42_ASP_SCPOL_SHIFT; break; case SND_SOC_DAIFMT_NB_IF: - asp_cfg_val |= CS42L42_ASP_POL_INV << - CS42L42_ASP_LCPOL_IN_SHIFT; + asp_cfg_val |= CS42L42_ASP_SCPOL_NOR << CS42L42_ASP_SCPOL_SHIFT; + asp_cfg_val |= CS42L42_ASP_LCPOL_INV << CS42L42_ASP_LCPOL_SHIFT; break; case SND_SOC_DAIFMT_IB_NF: - asp_cfg_val |= CS42L42_ASP_POL_INV << - CS42L42_ASP_SCPOL_IN_DAC_SHIFT; break; case SND_SOC_DAIFMT_IB_IF: - asp_cfg_val |= CS42L42_ASP_POL_INV << - CS42L42_ASP_LCPOL_IN_SHIFT; - asp_cfg_val |= CS42L42_ASP_POL_INV << - CS42L42_ASP_SCPOL_IN_DAC_SHIFT; + asp_cfg_val |= CS42L42_ASP_LCPOL_INV << CS42L42_ASP_LCPOL_SHIFT; break; } - snd_soc_component_update_bits(component, CS42L42_ASP_CLK_CFG, - CS42L42_ASP_MODE_MASK | - CS42L42_ASP_SCPOL_IN_DAC_MASK | - CS42L42_ASP_LCPOL_IN_MASK, asp_cfg_val); + snd_soc_component_update_bits(component, CS42L42_ASP_CLK_CFG, CS42L42_ASP_MODE_MASK | + CS42L42_ASP_SCPOL_MASK | + CS42L42_ASP_LCPOL_MASK, + asp_cfg_val); return 0; } diff --git a/sound/soc/codecs/cs42l42.h b/sound/soc/codecs/cs42l42.h index 9e3cc528dcff0..1f0d67c95a9ad 100644 --- a/sound/soc/codecs/cs42l42.h +++ b/sound/soc/codecs/cs42l42.h @@ -258,11 +258,12 @@ #define CS42L42_ASP_SLAVE_MODE 0x00 #define CS42L42_ASP_MODE_SHIFT 4 #define CS42L42_ASP_MODE_MASK (1 << CS42L42_ASP_MODE_SHIFT) -#define CS42L42_ASP_SCPOL_IN_DAC_SHIFT 2 -#define CS42L42_ASP_SCPOL_IN_DAC_MASK (1 << CS42L42_ASP_SCPOL_IN_DAC_SHIFT) -#define CS42L42_ASP_LCPOL_IN_SHIFT 0 -#define CS42L42_ASP_LCPOL_IN_MASK (1 << CS42L42_ASP_LCPOL_IN_SHIFT) -#define CS42L42_ASP_POL_INV 1 +#define CS42L42_ASP_SCPOL_SHIFT 2 +#define CS42L42_ASP_SCPOL_MASK (3 << CS42L42_ASP_SCPOL_SHIFT) +#define CS42L42_ASP_SCPOL_NOR 3 +#define CS42L42_ASP_LCPOL_SHIFT 0 +#define CS42L42_ASP_LCPOL_MASK (3 << CS42L42_ASP_LCPOL_SHIFT) +#define CS42L42_ASP_LCPOL_INV 3 #define CS42L42_ASP_FRM_CFG (CS42L42_PAGE_12 + 0x08) #define CS42L42_ASP_STP_SHIFT 4 -- GitLab From 2bdc4f5c6838f7c3feb4fe68e4edbeea158ec0a2 Mon Sep 17 00:00:00 2001 From: Lucas Tanure Date: Fri, 5 Mar 2021 17:34:29 +0000 Subject: [PATCH 493/839] ASoC: cs42l42: Fix channel width support Remove the hard coded 32 bits width and replace with the correct width calculated by params_width. Signed-off-by: Lucas Tanure Link: https://lore.kernel.org/r/20210305173442.195740-3-tanureal@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs42l42.c | 47 ++++++++++++++++++-------------------- sound/soc/codecs/cs42l42.h | 1 - 2 files changed, 22 insertions(+), 26 deletions(-) diff --git a/sound/soc/codecs/cs42l42.c b/sound/soc/codecs/cs42l42.c index df0d5fec0287a..4f9ad95479292 100644 --- a/sound/soc/codecs/cs42l42.c +++ b/sound/soc/codecs/cs42l42.c @@ -691,24 +691,6 @@ static int cs42l42_pll_config(struct snd_soc_component *component) CS42L42_CLK_OASRC_SEL_MASK, CS42L42_CLK_OASRC_SEL_12 << CS42L42_CLK_OASRC_SEL_SHIFT); - /* channel 1 on low LRCLK, 32 bit */ - snd_soc_component_update_bits(component, - CS42L42_ASP_RX_DAI0_CH1_AP_RES, - CS42L42_ASP_RX_CH_AP_MASK | - CS42L42_ASP_RX_CH_RES_MASK, - (CS42L42_ASP_RX_CH_AP_LOW << - CS42L42_ASP_RX_CH_AP_SHIFT) | - (CS42L42_ASP_RX_CH_RES_32 << - CS42L42_ASP_RX_CH_RES_SHIFT)); - /* Channel 2 on high LRCLK, 32 bit */ - snd_soc_component_update_bits(component, - CS42L42_ASP_RX_DAI0_CH2_AP_RES, - CS42L42_ASP_RX_CH_AP_MASK | - CS42L42_ASP_RX_CH_RES_MASK, - (CS42L42_ASP_RX_CH_AP_HI << - CS42L42_ASP_RX_CH_AP_SHIFT) | - (CS42L42_ASP_RX_CH_RES_32 << - CS42L42_ASP_RX_CH_RES_SHIFT)); if (pll_ratio_table[i].mclk_src_sel == 0) { /* Pass the clock straight through */ snd_soc_component_update_bits(component, @@ -824,14 +806,29 @@ static int cs42l42_pcm_hw_params(struct snd_pcm_substream *substream, { struct snd_soc_component *component = dai->component; struct cs42l42_private *cs42l42 = snd_soc_component_get_drvdata(component); - int retval; + unsigned int width = (params_width(params) / 8) - 1; + unsigned int val = 0; cs42l42->srate = params_rate(params); - cs42l42->swidth = params_width(params); - retval = cs42l42_pll_config(component); + switch(substream->stream) { + case SNDRV_PCM_STREAM_PLAYBACK: + val |= width << CS42L42_ASP_RX_CH_RES_SHIFT; + /* channel 1 on low LRCLK */ + snd_soc_component_update_bits(component, CS42L42_ASP_RX_DAI0_CH1_AP_RES, + CS42L42_ASP_RX_CH_AP_MASK | + CS42L42_ASP_RX_CH_RES_MASK, val); + /* Channel 2 on high LRCLK */ + val |= CS42L42_ASP_RX_CH_AP_HI << CS42L42_ASP_RX_CH_AP_SHIFT; + snd_soc_component_update_bits(component, CS42L42_ASP_RX_DAI0_CH2_AP_RES, + CS42L42_ASP_RX_CH_AP_MASK | + CS42L42_ASP_RX_CH_RES_MASK, val); + break; + default: + break; + } - return retval; + return cs42l42_pll_config(component); } static int cs42l42_set_sysclk(struct snd_soc_dai *dai, @@ -896,9 +893,9 @@ static int cs42l42_mute(struct snd_soc_dai *dai, int mute, int direction) return 0; } -#define CS42L42_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S18_3LE | \ - SNDRV_PCM_FMTBIT_S20_3LE | SNDRV_PCM_FMTBIT_S24_LE | \ - SNDRV_PCM_FMTBIT_S32_LE) +#define CS42L42_FORMATS (SNDRV_PCM_FMTBIT_S16_LE |\ + SNDRV_PCM_FMTBIT_S24_LE |\ + SNDRV_PCM_FMTBIT_S32_LE ) static const struct snd_soc_dai_ops cs42l42_ops = { diff --git a/sound/soc/codecs/cs42l42.h b/sound/soc/codecs/cs42l42.h index 1f0d67c95a9ad..9b017b76828a4 100644 --- a/sound/soc/codecs/cs42l42.h +++ b/sound/soc/codecs/cs42l42.h @@ -757,7 +757,6 @@ struct cs42l42_private { struct completion pdn_done; u32 sclk; u32 srate; - u32 swidth; u8 plug_state; u8 hs_type; u8 ts_inv; -- GitLab From 72d904763ae6a8576e7ad034f9da4f0e3c44bf24 Mon Sep 17 00:00:00 2001 From: Lucas Tanure Date: Fri, 5 Mar 2021 17:34:30 +0000 Subject: [PATCH 494/839] ASoC: cs42l42: Fix mixer volume control The minimum value is 0x3f (-63dB), which also is mute Signed-off-by: Lucas Tanure Link: https://lore.kernel.org/r/20210305173442.195740-4-tanureal@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs42l42.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/cs42l42.c b/sound/soc/codecs/cs42l42.c index 4f9ad95479292..d5078ce79fadd 100644 --- a/sound/soc/codecs/cs42l42.c +++ b/sound/soc/codecs/cs42l42.c @@ -401,7 +401,7 @@ static const struct regmap_config cs42l42_regmap = { }; static DECLARE_TLV_DB_SCALE(adc_tlv, -9600, 100, false); -static DECLARE_TLV_DB_SCALE(mixer_tlv, -6200, 100, false); +static DECLARE_TLV_DB_SCALE(mixer_tlv, -6300, 100, true); static const char * const cs42l42_hpf_freq_text[] = { "1.86Hz", "120Hz", "235Hz", "466Hz" @@ -458,7 +458,7 @@ static const struct snd_kcontrol_new cs42l42_snd_controls[] = { CS42L42_DAC_HPF_EN_SHIFT, true, false), SOC_DOUBLE_R_TLV("Mixer Volume", CS42L42_MIXER_CHA_VOL, CS42L42_MIXER_CHB_VOL, CS42L42_MIXER_CH_VOL_SHIFT, - 0x3e, 1, mixer_tlv) + 0x3f, 1, mixer_tlv) }; static int cs42l42_hpdrv_evt(struct snd_soc_dapm_widget *w, -- GitLab From 15013240fcf791691f99c884802099db34c099b9 Mon Sep 17 00:00:00 2001 From: Lucas Tanure Date: Fri, 5 Mar 2021 17:34:31 +0000 Subject: [PATCH 495/839] ASoC: cs42l42: Don't enable/disable regulator at Bias Level dev_pm_ops already enable/disable the codec if not in use Signed-off-by: Lucas Tanure Link: https://lore.kernel.org/r/20210305173442.195740-5-tanureal@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs42l42.c | 38 -------------------------------------- 1 file changed, 38 deletions(-) diff --git a/sound/soc/codecs/cs42l42.c b/sound/soc/codecs/cs42l42.c index d5078ce79fadd..eee3fc3200308 100644 --- a/sound/soc/codecs/cs42l42.c +++ b/sound/soc/codecs/cs42l42.c @@ -511,43 +511,6 @@ static const struct snd_soc_dapm_route cs42l42_audio_map[] = { {"HP", NULL, "HPDRV"} }; -static int cs42l42_set_bias_level(struct snd_soc_component *component, - enum snd_soc_bias_level level) -{ - struct cs42l42_private *cs42l42 = snd_soc_component_get_drvdata(component); - int ret; - - switch (level) { - case SND_SOC_BIAS_ON: - break; - case SND_SOC_BIAS_PREPARE: - break; - case SND_SOC_BIAS_STANDBY: - if (snd_soc_component_get_bias_level(component) == SND_SOC_BIAS_OFF) { - regcache_cache_only(cs42l42->regmap, false); - regcache_sync(cs42l42->regmap); - ret = regulator_bulk_enable( - ARRAY_SIZE(cs42l42->supplies), - cs42l42->supplies); - if (ret != 0) { - dev_err(component->dev, - "Failed to enable regulators: %d\n", - ret); - return ret; - } - } - break; - case SND_SOC_BIAS_OFF: - - regcache_cache_only(cs42l42->regmap, true); - regulator_bulk_disable(ARRAY_SIZE(cs42l42->supplies), - cs42l42->supplies); - break; - } - - return 0; -} - static int cs42l42_component_probe(struct snd_soc_component *component) { struct cs42l42_private *cs42l42 = @@ -560,7 +523,6 @@ static int cs42l42_component_probe(struct snd_soc_component *component) static const struct snd_soc_component_driver soc_component_dev_cs42l42 = { .probe = cs42l42_component_probe, - .set_bias_level = cs42l42_set_bias_level, .dapm_widgets = cs42l42_dapm_widgets, .num_dapm_widgets = ARRAY_SIZE(cs42l42_dapm_widgets), .dapm_routes = cs42l42_audio_map, -- GitLab From 19325cfea04446bc79b36bffd4978af15f46a00e Mon Sep 17 00:00:00 2001 From: Lucas Tanure Date: Fri, 5 Mar 2021 17:34:32 +0000 Subject: [PATCH 496/839] ASoC: cs42l42: Always wait at least 3ms after reset This delay is part of the power-up sequence defined in the datasheet. A runtime_resume is a power-up so must also include the delay. Signed-off-by: Lucas Tanure Link: https://lore.kernel.org/r/20210305173442.195740-6-tanureal@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs42l42.c | 3 ++- sound/soc/codecs/cs42l42.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/cs42l42.c b/sound/soc/codecs/cs42l42.c index eee3fc3200308..811b7b1c9732e 100644 --- a/sound/soc/codecs/cs42l42.c +++ b/sound/soc/codecs/cs42l42.c @@ -1756,7 +1756,7 @@ static int cs42l42_i2c_probe(struct i2c_client *i2c_client, dev_dbg(&i2c_client->dev, "Found reset GPIO\n"); gpiod_set_value_cansleep(cs42l42->reset_gpio, 1); } - mdelay(3); + usleep_range(CS42L42_BOOT_TIME_US, CS42L42_BOOT_TIME_US * 2); /* Request IRQ */ ret = devm_request_threaded_irq(&i2c_client->dev, @@ -1881,6 +1881,7 @@ static int cs42l42_runtime_resume(struct device *dev) } gpiod_set_value_cansleep(cs42l42->reset_gpio, 1); + usleep_range(CS42L42_BOOT_TIME_US, CS42L42_BOOT_TIME_US * 2); regcache_cache_only(cs42l42->regmap, false); regcache_sync(cs42l42->regmap); diff --git a/sound/soc/codecs/cs42l42.h b/sound/soc/codecs/cs42l42.h index 9b017b76828a4..866d7c873e3c9 100644 --- a/sound/soc/codecs/cs42l42.h +++ b/sound/soc/codecs/cs42l42.h @@ -740,6 +740,7 @@ #define CS42L42_FRAC2_VAL(val) (((val) & 0xff0000) >> 16) #define CS42L42_NUM_SUPPLIES 5 +#define CS42L42_BOOT_TIME_US 3000 static const char *const cs42l42_supply_names[CS42L42_NUM_SUPPLIES] = { "VA", -- GitLab From 5346f0e80b7160c91fb599d4545fd12560c286ed Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Thu, 4 Mar 2021 16:56:46 -0500 Subject: [PATCH 497/839] ASoC: codecs: lpass-va-macro: mute/unmute all active decimators An interface can have multiple decimators enabled, so loop over all active decimators. Otherwise only one channel will be unmuted, and other channels will be zero. This fixes recording from dual DMIC as a single two channel stream. Also remove the now unused "active_decimator" field. Fixes: 908e6b1df26e ("ASoC: codecs: lpass-va-macro: Add support to VA Macro") Signed-off-by: Jonathan Marek Reviewed-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20210304215646.17956-1-jonathan@marek.ca Signed-off-by: Mark Brown --- sound/soc/codecs/lpass-va-macro.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/sound/soc/codecs/lpass-va-macro.c b/sound/soc/codecs/lpass-va-macro.c index 91e6890d6efcb..3d6976a3d9e42 100644 --- a/sound/soc/codecs/lpass-va-macro.c +++ b/sound/soc/codecs/lpass-va-macro.c @@ -189,7 +189,6 @@ struct va_macro { struct device *dev; unsigned long active_ch_mask[VA_MACRO_MAX_DAIS]; unsigned long active_ch_cnt[VA_MACRO_MAX_DAIS]; - unsigned long active_decimator[VA_MACRO_MAX_DAIS]; u16 dmic_clk_div; int dec_mode[VA_MACRO_NUM_DECIMATORS]; @@ -549,11 +548,9 @@ static int va_macro_tx_mixer_put(struct snd_kcontrol *kcontrol, if (enable) { set_bit(dec_id, &va->active_ch_mask[dai_id]); va->active_ch_cnt[dai_id]++; - va->active_decimator[dai_id] = dec_id; } else { clear_bit(dec_id, &va->active_ch_mask[dai_id]); va->active_ch_cnt[dai_id]--; - va->active_decimator[dai_id] = -1; } snd_soc_dapm_mixer_update_power(widget->dapm, kcontrol, enable, update); @@ -880,18 +877,19 @@ static int va_macro_digital_mute(struct snd_soc_dai *dai, int mute, int stream) struct va_macro *va = snd_soc_component_get_drvdata(component); u16 tx_vol_ctl_reg, decimator; - decimator = va->active_decimator[dai->id]; - - tx_vol_ctl_reg = CDC_VA_TX0_TX_PATH_CTL + - VA_MACRO_TX_PATH_OFFSET * decimator; - if (mute) - snd_soc_component_update_bits(component, tx_vol_ctl_reg, - CDC_VA_TX_PATH_PGA_MUTE_EN_MASK, - CDC_VA_TX_PATH_PGA_MUTE_EN); - else - snd_soc_component_update_bits(component, tx_vol_ctl_reg, - CDC_VA_TX_PATH_PGA_MUTE_EN_MASK, - CDC_VA_TX_PATH_PGA_MUTE_DISABLE); + for_each_set_bit(decimator, &va->active_ch_mask[dai->id], + VA_MACRO_DEC_MAX) { + tx_vol_ctl_reg = CDC_VA_TX0_TX_PATH_CTL + + VA_MACRO_TX_PATH_OFFSET * decimator; + if (mute) + snd_soc_component_update_bits(component, tx_vol_ctl_reg, + CDC_VA_TX_PATH_PGA_MUTE_EN_MASK, + CDC_VA_TX_PATH_PGA_MUTE_EN); + else + snd_soc_component_update_bits(component, tx_vol_ctl_reg, + CDC_VA_TX_PATH_PGA_MUTE_EN_MASK, + CDC_VA_TX_PATH_PGA_MUTE_DISABLE); + } return 0; } -- GitLab From e4b8b7c916038c1ffcba2c4ce92d5523c4cc2f46 Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Thu, 4 Mar 2021 19:50:48 -0500 Subject: [PATCH 498/839] ASoC: codecs: lpass-wsa-macro: fix RX MIX input controls Attempting to use the RX MIX path at 48kHz plays at 96kHz, because these controls are incorrectly toggling the first bit of the register, which is part of the FS_RATE field. Fix the problem by using the same method used by the "WSA RX_MIX EC0_MUX" control, which is to use SND_SOC_NOPM as the register and use an enum in the shift field instead. Fixes: 2c4066e5d428 ("ASoC: codecs: lpass-wsa-macro: add dapm widgets and route") Signed-off-by: Jonathan Marek Reviewed-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20210305005049.24726-1-jonathan@marek.ca Signed-off-by: Mark Brown --- sound/soc/codecs/lpass-wsa-macro.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/sound/soc/codecs/lpass-wsa-macro.c b/sound/soc/codecs/lpass-wsa-macro.c index 5ebcd935ba898..9ca49a165f693 100644 --- a/sound/soc/codecs/lpass-wsa-macro.c +++ b/sound/soc/codecs/lpass-wsa-macro.c @@ -1211,14 +1211,16 @@ static int wsa_macro_enable_mix_path(struct snd_soc_dapm_widget *w, struct snd_kcontrol *kcontrol, int event) { struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm); - u16 gain_reg; + u16 path_reg, gain_reg; int val; - switch (w->reg) { - case CDC_WSA_RX0_RX_PATH_MIX_CTL: + switch (w->shift) { + case WSA_MACRO_RX_MIX0: + path_reg = CDC_WSA_RX0_RX_PATH_MIX_CTL; gain_reg = CDC_WSA_RX0_RX_VOL_MIX_CTL; break; - case CDC_WSA_RX1_RX_PATH_MIX_CTL: + case WSA_MACRO_RX_MIX1: + path_reg = CDC_WSA_RX1_RX_PATH_MIX_CTL; gain_reg = CDC_WSA_RX1_RX_VOL_MIX_CTL; break; default: @@ -1231,7 +1233,7 @@ static int wsa_macro_enable_mix_path(struct snd_soc_dapm_widget *w, snd_soc_component_write(component, gain_reg, val); break; case SND_SOC_DAPM_POST_PMD: - snd_soc_component_update_bits(component, w->reg, + snd_soc_component_update_bits(component, path_reg, CDC_WSA_RX_PATH_MIX_CLK_EN_MASK, CDC_WSA_RX_PATH_MIX_CLK_DISABLE); break; @@ -2068,14 +2070,14 @@ static const struct snd_soc_dapm_widget wsa_macro_dapm_widgets[] = { SND_SOC_DAPM_MUX("WSA_RX0 INP0", SND_SOC_NOPM, 0, 0, &rx0_prim_inp0_mux), SND_SOC_DAPM_MUX("WSA_RX0 INP1", SND_SOC_NOPM, 0, 0, &rx0_prim_inp1_mux), SND_SOC_DAPM_MUX("WSA_RX0 INP2", SND_SOC_NOPM, 0, 0, &rx0_prim_inp2_mux), - SND_SOC_DAPM_MUX_E("WSA_RX0 MIX INP", CDC_WSA_RX0_RX_PATH_MIX_CTL, - 0, 0, &rx0_mix_mux, wsa_macro_enable_mix_path, + SND_SOC_DAPM_MUX_E("WSA_RX0 MIX INP", SND_SOC_NOPM, WSA_MACRO_RX_MIX0, + 0, &rx0_mix_mux, wsa_macro_enable_mix_path, SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD), SND_SOC_DAPM_MUX("WSA_RX1 INP0", SND_SOC_NOPM, 0, 0, &rx1_prim_inp0_mux), SND_SOC_DAPM_MUX("WSA_RX1 INP1", SND_SOC_NOPM, 0, 0, &rx1_prim_inp1_mux), SND_SOC_DAPM_MUX("WSA_RX1 INP2", SND_SOC_NOPM, 0, 0, &rx1_prim_inp2_mux), - SND_SOC_DAPM_MUX_E("WSA_RX1 MIX INP", CDC_WSA_RX1_RX_PATH_MIX_CTL, - 0, 0, &rx1_mix_mux, wsa_macro_enable_mix_path, + SND_SOC_DAPM_MUX_E("WSA_RX1 MIX INP", SND_SOC_NOPM, WSA_MACRO_RX_MIX1, + 0, &rx1_mix_mux, wsa_macro_enable_mix_path, SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD), SND_SOC_DAPM_MIXER_E("WSA_RX INT0 MIX", SND_SOC_NOPM, 0, 0, NULL, 0, -- GitLab From ac101985cad3912e484295bd0ec22d117fee9f17 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Sun, 7 Mar 2021 16:23:37 +0000 Subject: [PATCH 499/839] ASoC: remove remnants of sirf prima/atlas audio codec In 61fbeb5 the sirf prima/atlas drivers were removed. This cleans up a stray header and some Kconfig entries for the codec that were missed in the process. Fixes: 61fbeb5dcb3d (ASoC: remove sirf prima/atlas drivers) Signed-off-by: Peter Robinson Cc: Arnd Bergmann Cc: Mark Brown Acked-by: Arnd Bergmann Link: https://lore.kernel.org/r/20210307162338.1160604-1-pbrobinson@gmail.com Signed-off-by: Mark Brown --- sound/soc/codecs/Kconfig | 5 -- sound/soc/codecs/sirf-audio-codec.h | 124 ---------------------------- 2 files changed, 129 deletions(-) delete mode 100644 sound/soc/codecs/sirf-audio-codec.h diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig index e4cf14e66a51b..1c87b42606c9f 100644 --- a/sound/soc/codecs/Kconfig +++ b/sound/soc/codecs/Kconfig @@ -186,7 +186,6 @@ config SND_SOC_ALL_CODECS imply SND_SOC_SI476X imply SND_SOC_SIMPLE_AMPLIFIER imply SND_SOC_SIMPLE_MUX - imply SND_SOC_SIRF_AUDIO_CODEC imply SND_SOC_SPDIF imply SND_SOC_SSM2305 imply SND_SOC_SSM2518 @@ -1279,10 +1278,6 @@ config SND_SOC_SIMPLE_MUX tristate "Simple Audio Mux" select GPIOLIB -config SND_SOC_SIRF_AUDIO_CODEC - tristate "SiRF SoC internal audio codec" - select REGMAP_MMIO - config SND_SOC_SPDIF tristate "S/PDIF CODEC" diff --git a/sound/soc/codecs/sirf-audio-codec.h b/sound/soc/codecs/sirf-audio-codec.h deleted file mode 100644 index a7fe2680f4c7e..0000000000000 --- a/sound/soc/codecs/sirf-audio-codec.h +++ /dev/null @@ -1,124 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * SiRF inner codec controllers define - * - * Copyright (c) 2011 Cambridge Silicon Radio Limited, a CSR plc group company. - */ - -#ifndef _SIRF_AUDIO_CODEC_H -#define _SIRF_AUDIO_CODEC_H - - -#define AUDIO_IC_CODEC_PWR (0x00E0) -#define AUDIO_IC_CODEC_CTRL0 (0x00E4) -#define AUDIO_IC_CODEC_CTRL1 (0x00E8) -#define AUDIO_IC_CODEC_CTRL2 (0x00EC) -#define AUDIO_IC_CODEC_CTRL3 (0x00F0) - -#define MICBIASEN (1 << 3) - -#define IC_RDACEN (1 << 0) -#define IC_LDACEN (1 << 1) -#define IC_HSREN (1 << 2) -#define IC_HSLEN (1 << 3) -#define IC_SPEN (1 << 4) -#define IC_CPEN (1 << 5) - -#define IC_HPRSELR (1 << 6) -#define IC_HPLSELR (1 << 7) -#define IC_HPRSELL (1 << 8) -#define IC_HPLSELL (1 << 9) -#define IC_SPSELR (1 << 10) -#define IC_SPSELL (1 << 11) - -#define IC_MONOR (1 << 12) -#define IC_MONOL (1 << 13) - -#define IC_RXOSRSEL (1 << 28) -#define IC_CPFREQ (1 << 29) -#define IC_HSINVEN (1 << 30) - -#define IC_MICINREN (1 << 0) -#define IC_MICINLEN (1 << 1) -#define IC_MICIN1SEL (1 << 2) -#define IC_MICIN2SEL (1 << 3) -#define IC_MICDIFSEL (1 << 4) -#define IC_LINEIN1SEL (1 << 5) -#define IC_LINEIN2SEL (1 << 6) -#define IC_RADCEN (1 << 7) -#define IC_LADCEN (1 << 8) -#define IC_ALM (1 << 9) - -#define IC_DIGMICEN (1 << 22) -#define IC_DIGMICFREQ (1 << 23) -#define IC_ADC14B_12 (1 << 24) -#define IC_FIRDAC_HSL_EN (1 << 25) -#define IC_FIRDAC_HSR_EN (1 << 26) -#define IC_FIRDAC_LOUT_EN (1 << 27) -#define IC_POR (1 << 28) -#define IC_CODEC_CLK_EN (1 << 29) -#define IC_HP_3DB_BOOST (1 << 30) - -#define IC_ADC_LEFT_GAIN_SHIFT 16 -#define IC_ADC_RIGHT_GAIN_SHIFT 10 -#define IC_ADC_GAIN_MASK 0x3F -#define IC_MIC_MAX_GAIN 0x39 - -#define IC_RXPGAR_MASK 0x3F -#define IC_RXPGAR_SHIFT 14 -#define IC_RXPGAL_MASK 0x3F -#define IC_RXPGAL_SHIFT 21 -#define IC_RXPGAR 0x7B -#define IC_RXPGAL 0x7B - -#define AUDIO_PORT_TX_FIFO_LEVEL_CHECK_MASK 0x3F -#define AUDIO_PORT_TX_FIFO_SC_OFFSET 0 -#define AUDIO_PORT_TX_FIFO_LC_OFFSET 10 -#define AUDIO_PORT_TX_FIFO_HC_OFFSET 20 - -#define TX_FIFO_SC(x) (((x) & AUDIO_PORT_TX_FIFO_LEVEL_CHECK_MASK) \ - << AUDIO_PORT_TX_FIFO_SC_OFFSET) -#define TX_FIFO_LC(x) (((x) & AUDIO_PORT_TX_FIFO_LEVEL_CHECK_MASK) \ - << AUDIO_PORT_TX_FIFO_LC_OFFSET) -#define TX_FIFO_HC(x) (((x) & AUDIO_PORT_TX_FIFO_LEVEL_CHECK_MASK) \ - << AUDIO_PORT_TX_FIFO_HC_OFFSET) - -#define AUDIO_PORT_RX_FIFO_LEVEL_CHECK_MASK 0x0F -#define AUDIO_PORT_RX_FIFO_SC_OFFSET 0 -#define AUDIO_PORT_RX_FIFO_LC_OFFSET 10 -#define AUDIO_PORT_RX_FIFO_HC_OFFSET 20 - -#define RX_FIFO_SC(x) (((x) & AUDIO_PORT_RX_FIFO_LEVEL_CHECK_MASK) \ - << AUDIO_PORT_RX_FIFO_SC_OFFSET) -#define RX_FIFO_LC(x) (((x) & AUDIO_PORT_RX_FIFO_LEVEL_CHECK_MASK) \ - << AUDIO_PORT_RX_FIFO_LC_OFFSET) -#define RX_FIFO_HC(x) (((x) & AUDIO_PORT_RX_FIFO_LEVEL_CHECK_MASK) \ - << AUDIO_PORT_RX_FIFO_HC_OFFSET) -#define AUDIO_PORT_IC_CODEC_TX_CTRL (0x00F4) -#define AUDIO_PORT_IC_CODEC_RX_CTRL (0x00F8) - -#define AUDIO_PORT_IC_TXFIFO_OP (0x00FC) -#define AUDIO_PORT_IC_TXFIFO_LEV_CHK (0x0100) -#define AUDIO_PORT_IC_TXFIFO_STS (0x0104) -#define AUDIO_PORT_IC_TXFIFO_INT (0x0108) -#define AUDIO_PORT_IC_TXFIFO_INT_MSK (0x010C) - -#define AUDIO_PORT_IC_RXFIFO_OP (0x0110) -#define AUDIO_PORT_IC_RXFIFO_LEV_CHK (0x0114) -#define AUDIO_PORT_IC_RXFIFO_STS (0x0118) -#define AUDIO_PORT_IC_RXFIFO_INT (0x011C) -#define AUDIO_PORT_IC_RXFIFO_INT_MSK (0x0120) - -#define AUDIO_FIFO_START (1 << 0) -#define AUDIO_FIFO_RESET (1 << 1) - -#define AUDIO_FIFO_FULL (1 << 0) -#define AUDIO_FIFO_EMPTY (1 << 1) -#define AUDIO_FIFO_OFLOW (1 << 2) -#define AUDIO_FIFO_UFLOW (1 << 3) - -#define IC_TX_ENABLE (0x03) -#define IC_RX_ENABLE_MONO (0x01) -#define IC_RX_ENABLE_STEREO (0x03) - -#endif /*__SIRF_AUDIO_CODEC_H*/ -- GitLab From 1c668e1c0a0f74472469cd514f40c9012b324c31 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Tue, 9 Mar 2021 14:21:27 +0000 Subject: [PATCH 500/839] ASoC: qcom: sdm845: Fix array out of bounds access Static analysis Coverity had detected a potential array out-of-bounds write issue due to the fact that MAX AFE port Id was set to 16 instead of using AFE_PORT_MAX macro. Fix this by properly using AFE_PORT_MAX macro. Fixes: 1b93a8843147 ("ASoC: qcom: sdm845: handle soundwire stream") Reported-by: John Stultz Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20210309142129.14182-2-srinivas.kandagatla@linaro.org Signed-off-by: Mark Brown --- sound/soc/qcom/sdm845.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/qcom/sdm845.c b/sound/soc/qcom/sdm845.c index 6c2760e27ea6f..1e2c2d0902ea9 100644 --- a/sound/soc/qcom/sdm845.c +++ b/sound/soc/qcom/sdm845.c @@ -33,12 +33,12 @@ struct sdm845_snd_data { struct snd_soc_jack jack; bool jack_setup; - bool stream_prepared[SLIM_MAX_RX_PORTS]; + bool stream_prepared[AFE_PORT_MAX]; struct snd_soc_card *card; uint32_t pri_mi2s_clk_count; uint32_t sec_mi2s_clk_count; uint32_t quat_tdm_clk_count; - struct sdw_stream_runtime *sruntime[SLIM_MAX_RX_PORTS]; + struct sdw_stream_runtime *sruntime[AFE_PORT_MAX]; }; static unsigned int tdm_slot_offset[8] = {0, 4, 8, 12, 16, 20, 24, 28}; -- GitLab From 4800fe6ea1022eb240215b1743d2541adad8efc7 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Tue, 9 Mar 2021 14:21:28 +0000 Subject: [PATCH 501/839] ASoC: qcom: sdm845: Fix array out of range on rx slim channels WCD934x has only 13 RX SLIM ports however we are setting it as 16 in set_channel_map, this will lead to array out of bounds error! Orignally caught by enabling USBAN array out of bounds check: Fixes: 5caf64c633a3 ("ASoC: qcom: sdm845: add support to DB845c and Lenovo Yoga") Reported-by: John Stultz Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20210309142129.14182-3-srinivas.kandagatla@linaro.org Signed-off-by: Mark Brown --- sound/soc/qcom/sdm845.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/qcom/sdm845.c b/sound/soc/qcom/sdm845.c index 1e2c2d0902ea9..153e9b2de0b53 100644 --- a/sound/soc/qcom/sdm845.c +++ b/sound/soc/qcom/sdm845.c @@ -27,7 +27,7 @@ #define SPK_TDM_RX_MASK 0x03 #define NUM_TDM_SLOTS 8 #define SLIM_MAX_TX_PORTS 16 -#define SLIM_MAX_RX_PORTS 16 +#define SLIM_MAX_RX_PORTS 13 #define WCD934X_DEFAULT_MCLK_RATE 9600000 struct sdm845_snd_data { -- GitLab From 3bb4852d598f0275ed5996a059df55be7318ac2f Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Tue, 9 Mar 2021 14:21:29 +0000 Subject: [PATCH 502/839] ASoC: codecs: wcd934x: add a sanity check in set channel map set channel map can be passed with a channel maps, however if the number of channels that are passed are more than the actual supported channels then we would be accessing array out of bounds. So add a sanity check to validate these numbers! Fixes: a61f3b4f476e ("ASoC: wcd934x: add support to wcd9340/wcd9341 codec") Reported-by: John Stultz Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20210309142129.14182-4-srinivas.kandagatla@linaro.org Signed-off-by: Mark Brown --- sound/soc/codecs/wcd934x.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/soc/codecs/wcd934x.c b/sound/soc/codecs/wcd934x.c index 40f682f5dab8b..d18ae5e3ee809 100644 --- a/sound/soc/codecs/wcd934x.c +++ b/sound/soc/codecs/wcd934x.c @@ -1873,6 +1873,12 @@ static int wcd934x_set_channel_map(struct snd_soc_dai *dai, wcd = snd_soc_component_get_drvdata(dai->component); + if (tx_num > WCD934X_TX_MAX || rx_num > WCD934X_RX_MAX) { + dev_err(wcd->dev, "Invalid tx %d or rx %d channel count\n", + tx_num, rx_num); + return -EINVAL; + } + if (!tx_slot || !rx_slot) { dev_err(wcd->dev, "Invalid tx_slot=%p, rx_slot=%p\n", tx_slot, rx_slot); -- GitLab From fd8299181995093948ec6ca75432e797b4a39143 Mon Sep 17 00:00:00 2001 From: Pan Xiuli Date: Mon, 8 Mar 2021 18:41:27 -0600 Subject: [PATCH 503/839] ASoC: SOF: intel: fix wrong poll bits in dsp power down The ADSPCS_SPA is Set Power Active bit. To check if DSP is powered down, we need to check ADSPCS_CPA, the Current Power Active bit. Fixes: 747503b1813a3 ("ASoC: SOF: Intel: Add Intel specific HDA DSP HW operations") Reviewed-by: Rander Wang Reviewed-by: Ranjani Sridharan Signed-off-by: Pan Xiuli Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20210309004127.4940-1-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/intel/hda-dsp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/sof/intel/hda-dsp.c b/sound/soc/sof/intel/hda-dsp.c index 5788fe3569605..c3b757cf01a04 100644 --- a/sound/soc/sof/intel/hda-dsp.c +++ b/sound/soc/sof/intel/hda-dsp.c @@ -207,7 +207,7 @@ int hda_dsp_core_power_down(struct snd_sof_dev *sdev, unsigned int core_mask) ret = snd_sof_dsp_read_poll_timeout(sdev, HDA_DSP_BAR, HDA_DSP_REG_ADSPCS, adspcs, - !(adspcs & HDA_DSP_ADSPCS_SPA_MASK(core_mask)), + !(adspcs & HDA_DSP_ADSPCS_CPA_MASK(core_mask)), HDA_DSP_REG_POLL_INTERVAL_US, HDA_DSP_PD_TIMEOUT * USEC_PER_MSEC); if (ret < 0) -- GitLab From 9e77d96b8e2724ed00380189f7b0ded61113b39f Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Sat, 6 Mar 2021 17:18:31 +0100 Subject: [PATCH 504/839] xen/events: reset affinity of 2-level event when tearing it down When creating a new event channel with 2-level events the affinity needs to be reset initially in order to avoid using an old affinity from earlier usage of the event channel port. So when tearing an event channel down reset all affinity bits. The same applies to the affinity when onlining a vcpu: all old affinity settings for this vcpu must be reset. As percpu events get initialized before the percpu event channel hook is called, resetting of the affinities happens after offlining a vcpu (this is working, as initial percpu memory is zeroed out). Cc: stable@vger.kernel.org Reported-by: Julien Grall Signed-off-by: Juergen Gross Reviewed-by: Julien Grall Link: https://lore.kernel.org/r/20210306161833.4552-2-jgross@suse.com Signed-off-by: Boris Ostrovsky --- drivers/xen/events/events_2l.c | 15 +++++++++++++++ drivers/xen/events/events_base.c | 1 + drivers/xen/events/events_internal.h | 8 ++++++++ 3 files changed, 24 insertions(+) diff --git a/drivers/xen/events/events_2l.c b/drivers/xen/events/events_2l.c index da87f3a1e351b..a7f413c5c1908 100644 --- a/drivers/xen/events/events_2l.c +++ b/drivers/xen/events/events_2l.c @@ -47,6 +47,11 @@ static unsigned evtchn_2l_max_channels(void) return EVTCHN_2L_NR_CHANNELS; } +static void evtchn_2l_remove(evtchn_port_t evtchn, unsigned int cpu) +{ + clear_bit(evtchn, BM(per_cpu(cpu_evtchn_mask, cpu))); +} + static void evtchn_2l_bind_to_cpu(evtchn_port_t evtchn, unsigned int cpu, unsigned int old_cpu) { @@ -355,9 +360,18 @@ static void evtchn_2l_resume(void) EVTCHN_2L_NR_CHANNELS/BITS_PER_EVTCHN_WORD); } +static int evtchn_2l_percpu_deinit(unsigned int cpu) +{ + memset(per_cpu(cpu_evtchn_mask, cpu), 0, sizeof(xen_ulong_t) * + EVTCHN_2L_NR_CHANNELS/BITS_PER_EVTCHN_WORD); + + return 0; +} + static const struct evtchn_ops evtchn_ops_2l = { .max_channels = evtchn_2l_max_channels, .nr_channels = evtchn_2l_max_channels, + .remove = evtchn_2l_remove, .bind_to_cpu = evtchn_2l_bind_to_cpu, .clear_pending = evtchn_2l_clear_pending, .set_pending = evtchn_2l_set_pending, @@ -367,6 +381,7 @@ static const struct evtchn_ops evtchn_ops_2l = { .unmask = evtchn_2l_unmask, .handle_events = evtchn_2l_handle_events, .resume = evtchn_2l_resume, + .percpu_deinit = evtchn_2l_percpu_deinit, }; void __init xen_evtchn_2l_init(void) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index adb7260e94b23..7e23808892a72 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -377,6 +377,7 @@ static int xen_irq_info_pirq_setup(unsigned irq, static void xen_irq_info_cleanup(struct irq_info *info) { set_evtchn_to_irq(info->evtchn, -1); + xen_evtchn_port_remove(info->evtchn, info->cpu); info->evtchn = 0; channels_on_cpu_dec(info); } diff --git a/drivers/xen/events/events_internal.h b/drivers/xen/events/events_internal.h index 0a97c0549db76..18a4090d07099 100644 --- a/drivers/xen/events/events_internal.h +++ b/drivers/xen/events/events_internal.h @@ -14,6 +14,7 @@ struct evtchn_ops { unsigned (*nr_channels)(void); int (*setup)(evtchn_port_t port); + void (*remove)(evtchn_port_t port, unsigned int cpu); void (*bind_to_cpu)(evtchn_port_t evtchn, unsigned int cpu, unsigned int old_cpu); @@ -54,6 +55,13 @@ static inline int xen_evtchn_port_setup(evtchn_port_t evtchn) return 0; } +static inline void xen_evtchn_port_remove(evtchn_port_t evtchn, + unsigned int cpu) +{ + if (evtchn_ops->remove) + evtchn_ops->remove(evtchn, cpu); +} + static inline void xen_evtchn_port_bind_to_cpu(evtchn_port_t evtchn, unsigned int cpu, unsigned int old_cpu) -- GitLab From 8891123f9cbb9c1ee531e5a87fa116f0af685c48 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Mon, 1 Mar 2021 17:30:11 +0300 Subject: [PATCH 505/839] software node: Fix node registration Software node can not be registered before its parent. Fixes: 80488a6b1d3c ("software node: Add support for static node descriptors") Cc: 5.10+ # 5.10+ Signed-off-by: Heikki Krogerus Reviewed-by: Andy Shevchenko Tested-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki --- drivers/base/swnode.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/base/swnode.c b/drivers/base/swnode.c index 37179a8b1ceba..74db8c971db74 100644 --- a/drivers/base/swnode.c +++ b/drivers/base/swnode.c @@ -938,6 +938,9 @@ int software_node_register(const struct software_node *node) if (software_node_to_swnode(node)) return -EEXIST; + if (node->parent && !parent) + return -EINVAL; + return PTR_ERR_OR_ZERO(swnode_register(node, parent, 0)); } EXPORT_SYMBOL_GPL(software_node_register); -- GitLab From 2a92c90f2ecca4475d6050f2f938a1755a8954cc Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Mon, 1 Mar 2021 17:30:12 +0300 Subject: [PATCH 506/839] software node: Fix device_add_software_node() The function device_add_software_node() was meant to register the node supplied to it, but only if that node wasn't already registered. Right now the function attempts to always register the node. That will cause a failure with nodes that are already registered. Fixing that by incrementing the reference count of the nodes that have already been registered, and only registering the new nodes. Also, clarifying the behaviour in the function documentation. Fixes: e68d0119e328 ("software node: Introduce device_add_software_node()") Signed-off-by: Heikki Krogerus Reviewed-by: Andy Shevchenko Tested-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki --- drivers/base/swnode.c | 26 +++++++++++++++++--------- include/linux/property.h | 2 +- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/drivers/base/swnode.c b/drivers/base/swnode.c index 74db8c971db74..fa3719ef80e4d 100644 --- a/drivers/base/swnode.c +++ b/drivers/base/swnode.c @@ -1005,25 +1005,33 @@ EXPORT_SYMBOL_GPL(fwnode_remove_software_node); /** * device_add_software_node - Assign software node to a device * @dev: The device the software node is meant for. - * @swnode: The software node. + * @node: The software node. * - * This function will register @swnode and make it the secondary firmware node - * pointer of @dev. If @dev has no primary node, then @swnode will become the primary - * node. + * This function will make @node the secondary firmware node pointer of @dev. If + * @dev has no primary node, then @node will become the primary node. The + * function will register @node automatically if it wasn't already registered. */ -int device_add_software_node(struct device *dev, const struct software_node *swnode) +int device_add_software_node(struct device *dev, const struct software_node *node) { + struct swnode *swnode; int ret; /* Only one software node per device. */ if (dev_to_swnode(dev)) return -EBUSY; - ret = software_node_register(swnode); - if (ret) - return ret; + swnode = software_node_to_swnode(node); + if (swnode) { + kobject_get(&swnode->kobj); + } else { + ret = software_node_register(node); + if (ret) + return ret; + + swnode = software_node_to_swnode(node); + } - set_secondary_fwnode(dev, software_node_fwnode(swnode)); + set_secondary_fwnode(dev, &swnode->fwnode); return 0; } diff --git a/include/linux/property.h b/include/linux/property.h index dafccfce02624..dd4687b562393 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -488,7 +488,7 @@ fwnode_create_software_node(const struct property_entry *properties, const struct fwnode_handle *parent); void fwnode_remove_software_node(struct fwnode_handle *fwnode); -int device_add_software_node(struct device *dev, const struct software_node *swnode); +int device_add_software_node(struct device *dev, const struct software_node *node); void device_remove_software_node(struct device *dev); int device_create_managed_software_node(struct device *dev, -- GitLab From 05962f95f9ac7af25fea037ef51b37c0eccb5590 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 6 Mar 2021 13:58:48 -0700 Subject: [PATCH 507/839] io_uring: SQPOLL parking fixes We keep running into weird dependency issues between the sqd lock and the parking state. Disentangle the SQPOLL thread from the last bits of the kthread parking inheritance, and just replace the parking state, and two associated locks, with a single rw mutex. The SQPOLL thread keeps the mutex for read all the time, except if someone has marked us needing to park. Then we drop/re-acquire and try again. This greatly simplifies the parking state machine (by just getting rid of it), and makes it a lot more obvious how it works - if you need to modify the ctx list, then you simply park the thread which will grab the lock for writing. Fold in fix from Hillf Danton on not setting STOP on a fatal signal. Fixes: e54945ae947f ("io_uring: SQPOLL stop error handling fixes") Signed-off-by: Jens Axboe --- fs/io_uring.c | 133 +++++++++++++------------------------------------- 1 file changed, 34 insertions(+), 99 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 7cf96be691d89..2a3542b487ff0 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -258,12 +258,11 @@ enum { struct io_sq_data { refcount_t refs; - struct mutex lock; + struct rw_semaphore rw_lock; /* ctx's that are using this sqd */ struct list_head ctx_list; struct list_head ctx_new_list; - struct mutex ctx_lock; struct task_struct *thread; struct wait_queue_head wait; @@ -274,7 +273,6 @@ struct io_sq_data { unsigned long state; struct completion startup; - struct completion parked; struct completion exited; }; @@ -6638,45 +6636,6 @@ static void io_sqd_init_new(struct io_sq_data *sqd) io_sqd_update_thread_idle(sqd); } -static bool io_sq_thread_should_stop(struct io_sq_data *sqd) -{ - return test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state); -} - -static bool io_sq_thread_should_park(struct io_sq_data *sqd) -{ - return test_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state); -} - -static void io_sq_thread_parkme(struct io_sq_data *sqd) -{ - for (;;) { - /* - * TASK_PARKED is a special state; we must serialize against - * possible pending wakeups to avoid store-store collisions on - * task->state. - * - * Such a collision might possibly result in the task state - * changin from TASK_PARKED and us failing the - * wait_task_inactive() in kthread_park(). - */ - set_special_state(TASK_PARKED); - if (!test_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state)) - break; - - /* - * Thread is going to call schedule(), do not preempt it, - * or the caller of kthread_park() may spend more time in - * wait_task_inactive(). - */ - preempt_disable(); - complete(&sqd->parked); - schedule_preempt_disabled(); - preempt_enable(); - } - __set_current_state(TASK_RUNNING); -} - static int io_sq_thread(void *data) { struct io_sq_data *sqd = data; @@ -6697,17 +6656,16 @@ static int io_sq_thread(void *data) wait_for_completion(&sqd->startup); - while (!io_sq_thread_should_stop(sqd)) { + down_read(&sqd->rw_lock); + + while (!test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state)) { int ret; bool cap_entries, sqt_spin, needs_sched; - /* - * Any changes to the sqd lists are synchronized through the - * thread parking. This synchronizes the thread vs users, - * the users are synchronized on the sqd->ctx_lock. - */ - if (io_sq_thread_should_park(sqd)) { - io_sq_thread_parkme(sqd); + if (test_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state)) { + up_read(&sqd->rw_lock); + cond_resched(); + down_read(&sqd->rw_lock); continue; } if (unlikely(!list_empty(&sqd->ctx_new_list))) { @@ -6752,12 +6710,14 @@ static int io_sq_thread(void *data) } } - if (needs_sched && !io_sq_thread_should_park(sqd)) { + if (needs_sched && !test_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state)) { list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) io_ring_set_wakeup_flag(ctx); + up_read(&sqd->rw_lock); schedule(); try_to_freeze(); + down_read(&sqd->rw_lock); list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) io_ring_clear_wakeup_flag(ctx); } @@ -6768,28 +6728,16 @@ static int io_sq_thread(void *data) list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) io_uring_cancel_sqpoll(ctx); + up_read(&sqd->rw_lock); io_run_task_work(); - /* - * Ensure that we park properly if racing with someone trying to park - * while we're exiting. If we fail to grab the lock, check park and - * park if necessary. The ordering with the park bit and the lock - * ensures that we catch this reliably. - */ - if (!mutex_trylock(&sqd->lock)) { - if (io_sq_thread_should_park(sqd)) - io_sq_thread_parkme(sqd); - mutex_lock(&sqd->lock); - } - + down_write(&sqd->rw_lock); sqd->thread = NULL; - list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) { + list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) io_ring_set_wakeup_flag(ctx); - } - + up_write(&sqd->rw_lock); complete(&sqd->exited); - mutex_unlock(&sqd->lock); do_exit(0); } @@ -7088,44 +7036,40 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx) } static void io_sq_thread_unpark(struct io_sq_data *sqd) - __releases(&sqd->lock) + __releases(&sqd->rw_lock) { if (sqd->thread == current) return; clear_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state); - if (sqd->thread) - wake_up_state(sqd->thread, TASK_PARKED); - mutex_unlock(&sqd->lock); + up_write(&sqd->rw_lock); } static void io_sq_thread_park(struct io_sq_data *sqd) - __acquires(&sqd->lock) + __acquires(&sqd->rw_lock) { if (sqd->thread == current) return; set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state); - mutex_lock(&sqd->lock); - if (sqd->thread) { + down_write(&sqd->rw_lock); + /* set again for consistency, in case concurrent parks are happening */ + set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state); + if (sqd->thread) wake_up_process(sqd->thread); - wait_for_completion(&sqd->parked); - } } static void io_sq_thread_stop(struct io_sq_data *sqd) { if (test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state)) return; - mutex_lock(&sqd->lock); - if (sqd->thread) { - set_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state); - WARN_ON_ONCE(test_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state)); - wake_up_process(sqd->thread); - mutex_unlock(&sqd->lock); - wait_for_completion(&sqd->exited); - WARN_ON_ONCE(sqd->thread); - } else { - mutex_unlock(&sqd->lock); + down_write(&sqd->rw_lock); + if (!sqd->thread) { + up_write(&sqd->rw_lock); + return; } + set_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state); + wake_up_process(sqd->thread); + up_write(&sqd->rw_lock); + wait_for_completion(&sqd->exited); } static void io_put_sq_data(struct io_sq_data *sqd) @@ -7142,18 +7086,13 @@ static void io_sq_thread_finish(struct io_ring_ctx *ctx) if (sqd) { complete(&sqd->startup); - if (sqd->thread) { + if (sqd->thread) wait_for_completion(&ctx->sq_thread_comp); - io_sq_thread_park(sqd); - } - mutex_lock(&sqd->ctx_lock); + io_sq_thread_park(sqd); list_del(&ctx->sqd_list); io_sqd_update_thread_idle(sqd); - mutex_unlock(&sqd->ctx_lock); - - if (sqd->thread) - io_sq_thread_unpark(sqd); + io_sq_thread_unpark(sqd); io_put_sq_data(sqd); ctx->sq_data = NULL; @@ -7202,11 +7141,9 @@ static struct io_sq_data *io_get_sq_data(struct io_uring_params *p) refcount_set(&sqd->refs, 1); INIT_LIST_HEAD(&sqd->ctx_list); INIT_LIST_HEAD(&sqd->ctx_new_list); - mutex_init(&sqd->ctx_lock); - mutex_init(&sqd->lock); + init_rwsem(&sqd->rw_lock); init_waitqueue_head(&sqd->wait); init_completion(&sqd->startup); - init_completion(&sqd->parked); init_completion(&sqd->exited); return sqd; } @@ -7880,9 +7817,7 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx, ctx->sq_creds = get_current_cred(); ctx->sq_data = sqd; io_sq_thread_park(sqd); - mutex_lock(&sqd->ctx_lock); list_add(&ctx->sqd_list, &sqd->ctx_new_list); - mutex_unlock(&sqd->ctx_lock); io_sq_thread_unpark(sqd); ctx->sq_thread_idle = msecs_to_jiffies(p->sq_thread_idle); -- GitLab From f458dd8441e56d122ddf1d8e2af0b6ee62f52af9 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 8 Mar 2021 12:14:14 +0000 Subject: [PATCH 508/839] io_uring: fix unrelated ctx reqs cancellation io-wq now is per-task, so cancellations now should match against request's ctx. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 2a3542b487ff0..d4f018f5838da 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -5573,22 +5573,30 @@ add: return 0; } +struct io_cancel_data { + struct io_ring_ctx *ctx; + u64 user_data; +}; + static bool io_cancel_cb(struct io_wq_work *work, void *data) { struct io_kiocb *req = container_of(work, struct io_kiocb, work); + struct io_cancel_data *cd = data; - return req->user_data == (unsigned long) data; + return req->ctx == cd->ctx && req->user_data == cd->user_data; } -static int io_async_cancel_one(struct io_uring_task *tctx, void *sqe_addr) +static int io_async_cancel_one(struct io_uring_task *tctx, u64 user_data, + struct io_ring_ctx *ctx) { + struct io_cancel_data data = { .ctx = ctx, .user_data = user_data, }; enum io_wq_cancel cancel_ret; int ret = 0; - if (!tctx->io_wq) + if (!tctx || !tctx->io_wq) return -ENOENT; - cancel_ret = io_wq_cancel_cb(tctx->io_wq, io_cancel_cb, sqe_addr, false); + cancel_ret = io_wq_cancel_cb(tctx->io_wq, io_cancel_cb, &data, false); switch (cancel_ret) { case IO_WQ_CANCEL_OK: ret = 0; @@ -5611,8 +5619,7 @@ static void io_async_find_and_cancel(struct io_ring_ctx *ctx, unsigned long flags; int ret; - ret = io_async_cancel_one(req->task->io_uring, - (void *) (unsigned long) sqe_addr); + ret = io_async_cancel_one(req->task->io_uring, sqe_addr, ctx); if (ret != -ENOENT) { spin_lock_irqsave(&ctx->completion_lock, flags); goto done; -- GitLab From 0298ef969a110ca03654f0cea9b50e3f3b331acc Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 8 Mar 2021 13:20:57 +0000 Subject: [PATCH 509/839] io_uring: clean R_DISABLED startup mess There are enough of problems with IORING_SETUP_R_DISABLED, including the burden of checking and kicking off the SQO task all over the codebase -- for exit/cancel/etc. Rework it, always start the thread but don't do submit unless the flag is gone, that's much easier. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index d4f018f5838da..3f6db813d670d 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -6606,7 +6606,8 @@ static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries) if (!list_empty(&ctx->iopoll_list)) io_do_iopoll(ctx, &nr_events, 0); - if (to_submit && likely(!percpu_ref_is_dying(&ctx->refs))) + if (to_submit && likely(!percpu_ref_is_dying(&ctx->refs)) && + !(ctx->flags & IORING_SETUP_R_DISABLED)) ret = io_submit_sqes(ctx, to_submit); mutex_unlock(&ctx->uring_lock); } @@ -7861,6 +7862,7 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx, wake_up_new_task(tsk); if (ret) goto err; + complete(&sqd->startup); } else if (p->flags & IORING_SETUP_SQ_AFF) { /* Can't have SQ_AFF without SQPOLL */ ret = -EINVAL; @@ -7873,15 +7875,6 @@ err: return ret; } -static void io_sq_offload_start(struct io_ring_ctx *ctx) -{ - struct io_sq_data *sqd = ctx->sq_data; - - ctx->flags &= ~IORING_SETUP_R_DISABLED; - if (ctx->flags & IORING_SETUP_SQPOLL) - complete(&sqd->startup); -} - static inline void __io_unaccount_mem(struct user_struct *user, unsigned long nr_pages) { @@ -8742,11 +8735,6 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx, struct task_struct *task = current; if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) { - /* never started, nothing to cancel */ - if (ctx->flags & IORING_SETUP_R_DISABLED) { - io_sq_offload_start(ctx); - return; - } io_sq_thread_park(ctx->sq_data); task = ctx->sq_data->thread; if (task) @@ -9449,9 +9437,6 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p, if (ret) goto err; - if (!(p->flags & IORING_SETUP_R_DISABLED)) - io_sq_offload_start(ctx); - memset(&p->sq_off, 0, sizeof(p->sq_off)); p->sq_off.head = offsetof(struct io_rings, sq.head); p->sq_off.tail = offsetof(struct io_rings, sq.tail); @@ -9668,7 +9653,9 @@ static int io_register_enable_rings(struct io_ring_ctx *ctx) if (ctx->restrictions.registered) ctx->restricted = 1; - io_sq_offload_start(ctx); + ctx->flags &= ~IORING_SETUP_R_DISABLED; + if (ctx->sq_data && wq_has_sleeper(&ctx->sq_data->wait)) + wake_up(&ctx->sq_data->wait); return 0; } -- GitLab From 61cf93700fe6359552848ed5e3becba6cd760efa Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 8 Mar 2021 14:16:16 +0000 Subject: [PATCH 510/839] io_uring: Convert personality_idr to XArray You can't call idr_remove() from within a idr_for_each() callback, but you can call xa_erase() from an xa_for_each() loop, so switch the entire personality_idr from the IDR to the XArray. This manifests as a use-after-free as idr_for_each() attempts to walk the rest of the node after removing the last entry from it. Fixes: 071698e13ac6 ("io_uring: allow registering credentials") Cc: stable@vger.kernel.org # 5.6+ Reported-by: yangerkun Signed-off-by: Matthew Wilcox (Oracle) [Pavel: rebased (creds load was moved into io_init_req())] Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/7ccff36e1375f2b0ebf73d957f037b43becc0dde.1615212806.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 47 ++++++++++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 3f6db813d670d..84eb499368a4b 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -406,7 +406,8 @@ struct io_ring_ctx { struct idr io_buffer_idr; - struct idr personality_idr; + struct xarray personalities; + u32 pers_next; struct { unsigned cached_cq_tail; @@ -1137,7 +1138,7 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) init_completion(&ctx->ref_comp); init_completion(&ctx->sq_thread_comp); idr_init(&ctx->io_buffer_idr); - idr_init(&ctx->personality_idr); + xa_init_flags(&ctx->personalities, XA_FLAGS_ALLOC1); mutex_init(&ctx->uring_lock); init_waitqueue_head(&ctx->wait); spin_lock_init(&ctx->completion_lock); @@ -6337,7 +6338,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, req->work.list.next = NULL; personality = READ_ONCE(sqe->personality); if (personality) { - req->work.creds = idr_find(&ctx->personality_idr, personality); + req->work.creds = xa_load(&ctx->personalities, personality); if (!req->work.creds) return -EINVAL; get_cred(req->work.creds); @@ -8355,7 +8356,6 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx) mutex_unlock(&ctx->uring_lock); io_eventfd_unregister(ctx); io_destroy_buffers(ctx); - idr_destroy(&ctx->personality_idr); #if defined(CONFIG_UNIX) if (ctx->ring_sock) { @@ -8420,7 +8420,7 @@ static int io_unregister_personality(struct io_ring_ctx *ctx, unsigned id) { const struct cred *creds; - creds = idr_remove(&ctx->personality_idr, id); + creds = xa_erase(&ctx->personalities, id); if (creds) { put_cred(creds); return 0; @@ -8429,14 +8429,6 @@ static int io_unregister_personality(struct io_ring_ctx *ctx, unsigned id) return -EINVAL; } -static int io_remove_personalities(int id, void *p, void *data) -{ - struct io_ring_ctx *ctx = data; - - io_unregister_personality(ctx, id); - return 0; -} - static bool io_run_ctx_fallback(struct io_ring_ctx *ctx) { struct callback_head *work, *next; @@ -8526,13 +8518,17 @@ static void io_ring_exit_work(struct work_struct *work) static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) { + unsigned long index; + struct creds *creds; + mutex_lock(&ctx->uring_lock); percpu_ref_kill(&ctx->refs); /* if force is set, the ring is going away. always drop after that */ ctx->cq_overflow_flushed = 1; if (ctx->rings) __io_cqring_overflow_flush(ctx, true, NULL, NULL); - idr_for_each(&ctx->personality_idr, io_remove_personalities, ctx); + xa_for_each(&ctx->personalities, index, creds) + io_unregister_personality(ctx, index); mutex_unlock(&ctx->uring_lock); io_kill_timeouts(ctx, NULL, NULL); @@ -9162,10 +9158,9 @@ out_fput: } #ifdef CONFIG_PROC_FS -static int io_uring_show_cred(int id, void *p, void *data) +static int io_uring_show_cred(struct seq_file *m, unsigned int id, + const struct cred *cred) { - const struct cred *cred = p; - struct seq_file *m = data; struct user_namespace *uns = seq_user_ns(m); struct group_info *gi; kernel_cap_t cap; @@ -9233,9 +9228,13 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m) seq_printf(m, "%5u: 0x%llx/%u\n", i, buf->ubuf, (unsigned int) buf->len); } - if (has_lock && !idr_is_empty(&ctx->personality_idr)) { + if (has_lock && !xa_empty(&ctx->personalities)) { + unsigned long index; + const struct cred *cred; + seq_printf(m, "Personalities:\n"); - idr_for_each(&ctx->personality_idr, io_uring_show_cred, m); + xa_for_each(&ctx->personalities, index, cred) + io_uring_show_cred(m, index, cred); } seq_printf(m, "PollList:\n"); spin_lock_irq(&ctx->completion_lock); @@ -9564,14 +9563,16 @@ out: static int io_register_personality(struct io_ring_ctx *ctx) { const struct cred *creds; + u32 id; int ret; creds = get_current_cred(); - ret = idr_alloc_cyclic(&ctx->personality_idr, (void *) creds, 1, - USHRT_MAX, GFP_KERNEL); - if (ret < 0) - put_cred(creds); + ret = xa_alloc_cyclic(&ctx->personalities, &id, (void *)creds, + XA_LIMIT(0, USHRT_MAX), &ctx->pers_next, GFP_KERNEL); + if (!ret) + return id; + put_cred(creds); return ret; } -- GitLab From cc20e3fec682700b673fcd286e6bef8e9da947e2 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 8 Mar 2021 09:34:43 -0700 Subject: [PATCH 511/839] io-wq: remove unused 'user' member of io_wq Previous patches killed the last user of this, now it's just a dead member in the struct. Get rid of it. Signed-off-by: Jens Axboe --- fs/io-wq.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/io-wq.c b/fs/io-wq.c index 1ab9324e602f5..c2e7031f6d096 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -110,7 +110,6 @@ struct io_wq { io_wq_work_fn *do_work; struct task_struct *manager; - struct user_struct *user; struct io_wq_hash *hash; -- GitLab From 97a73a0f9fbfb2be682fd037814576dbfa0e0da8 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 8 Mar 2021 17:30:54 +0000 Subject: [PATCH 512/839] io_uring: fix io_sq_offload_create error handling Don't set IO_SQ_THREAD_SHOULD_STOP when io_sq_offload_create() has failed on io_uring_alloc_task_context() but leave everything to io_sq_thread_finish(), because currently io_sq_thread_finish() hangs on trying to park it. That's great it stalls there, because otherwise the following io_sq_thread_stop() would be skipped on IO_SQ_THREAD_SHOULD_STOP check and the sqo would race for sqd with freeing ctx. A simple error injection gives something like this. [ 245.463955] INFO: task sqpoll-test-hang:523 blocked for more than 122 seconds. [ 245.463983] Call Trace: [ 245.463990] __schedule+0x36b/0x950 [ 245.464005] schedule+0x68/0xe0 [ 245.464013] schedule_timeout+0x209/0x2a0 [ 245.464032] wait_for_completion+0x8b/0xf0 [ 245.464043] io_sq_thread_finish+0x44/0x1a0 [ 245.464049] io_uring_setup+0x9ea/0xc80 [ 245.464058] __x64_sys_io_uring_setup+0x16/0x20 [ 245.464064] do_syscall_64+0x38/0x50 [ 245.464073] entry_SYSCALL_64_after_hwframe+0x44/0xae Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 84eb499368a4b..3299807894ec0 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -7856,10 +7856,9 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx, ret = PTR_ERR(tsk); goto err; } - ret = io_uring_alloc_task_context(tsk, ctx); - if (ret) - set_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state); + sqd->thread = tsk; + ret = io_uring_alloc_task_context(tsk, ctx); wake_up_new_task(tsk); if (ret) goto err; -- GitLab From 33cc89a9fc248a486857381584cc6b67d9405fab Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 9 Mar 2021 00:37:58 +0000 Subject: [PATCH 513/839] io_uring: add io_disarm_next() helper A preparation patch placing all preparations before extracting a next request into a separate helper io_disarm_next(). Also, don't spuriously do ev_posted in a rare case where REQ_F_FAIL_LINK is set but there are no requests linked (i.e. after cancelling a linked timeout or setting IOSQE_IO_LINK on a last request of a submission batch). Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/44ecff68d6b47e1c4e6b891bdde1ddc08cfc3590.1615250156.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 68 ++++++++++++++++++++++++++------------------------- 1 file changed, 35 insertions(+), 33 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 3299807894ec0..cc9a2cc95608f 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1705,15 +1705,11 @@ static inline void io_remove_next_linked(struct io_kiocb *req) nxt->link = NULL; } -static void io_kill_linked_timeout(struct io_kiocb *req) +static bool io_kill_linked_timeout(struct io_kiocb *req) + __must_hold(&req->ctx->completion_lock) { - struct io_ring_ctx *ctx = req->ctx; - struct io_kiocb *link; + struct io_kiocb *link = req->link; bool cancelled = false; - unsigned long flags; - - spin_lock_irqsave(&ctx->completion_lock, flags); - link = req->link; /* * Can happen if a linked timeout fired and link had been like @@ -1728,50 +1724,48 @@ static void io_kill_linked_timeout(struct io_kiocb *req) ret = hrtimer_try_to_cancel(&io->timer); if (ret != -1) { io_cqring_fill_event(link, -ECANCELED); - io_commit_cqring(ctx); + io_put_req_deferred(link, 1); cancelled = true; } } req->flags &= ~REQ_F_LINK_TIMEOUT; - spin_unlock_irqrestore(&ctx->completion_lock, flags); - - if (cancelled) { - io_cqring_ev_posted(ctx); - io_put_req(link); - } + return cancelled; } - static void io_fail_links(struct io_kiocb *req) + __must_hold(&req->ctx->completion_lock) { - struct io_kiocb *link, *nxt; - struct io_ring_ctx *ctx = req->ctx; - unsigned long flags; + struct io_kiocb *nxt, *link = req->link; - spin_lock_irqsave(&ctx->completion_lock, flags); - link = req->link; req->link = NULL; - while (link) { nxt = link->link; link->link = NULL; trace_io_uring_fail_link(req, link); io_cqring_fill_event(link, -ECANCELED); - io_put_req_deferred(link, 2); link = nxt; } - io_commit_cqring(ctx); - spin_unlock_irqrestore(&ctx->completion_lock, flags); +} - io_cqring_ev_posted(ctx); +static bool io_disarm_next(struct io_kiocb *req) + __must_hold(&req->ctx->completion_lock) +{ + bool posted = false; + + if (likely(req->flags & REQ_F_LINK_TIMEOUT)) + posted = io_kill_linked_timeout(req); + if (unlikely(req->flags & REQ_F_FAIL_LINK)) { + posted |= (req->link != NULL); + io_fail_links(req); + } + return posted; } static struct io_kiocb *__io_req_find_next(struct io_kiocb *req) { - if (req->flags & REQ_F_LINK_TIMEOUT) - io_kill_linked_timeout(req); + struct io_kiocb *nxt; /* * If LINK is set, we have dependent requests in this chain. If we @@ -1779,14 +1773,22 @@ static struct io_kiocb *__io_req_find_next(struct io_kiocb *req) * dependencies to the next request. In case of failure, fail the rest * of the chain. */ - if (likely(!(req->flags & REQ_F_FAIL_LINK))) { - struct io_kiocb *nxt = req->link; + if (req->flags & (REQ_F_LINK_TIMEOUT | REQ_F_FAIL_LINK)) { + struct io_ring_ctx *ctx = req->ctx; + unsigned long flags; + bool posted; - req->link = NULL; - return nxt; + spin_lock_irqsave(&ctx->completion_lock, flags); + posted = io_disarm_next(req); + if (posted) + io_commit_cqring(req->ctx); + spin_unlock_irqrestore(&ctx->completion_lock, flags); + if (posted) + io_cqring_ev_posted(ctx); } - io_fail_links(req); - return NULL; + nxt = req->link; + req->link = NULL; + return nxt; } static inline struct io_kiocb *io_req_find_next(struct io_kiocb *req) -- GitLab From 7a612350a989866510dc5c874fd8ffe1f37555d2 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 9 Mar 2021 00:37:59 +0000 Subject: [PATCH 514/839] io_uring: fix complete_post races for linked req Calling io_queue_next() after spin_unlock in io_req_complete_post() races with the other side extracting and reusing this request. Hand coded parts of io_req_find_next() considering that io_disarm_next() and io_req_task_queue() have (and safe) to be called with completion_lock held. It already does io_commit_cqring() and io_cqring_ev_posted(), so just reuse it for post io_disarm_next(). Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/5672a62f3150ee7c55849f40c0037655c4f2840f.1615250156.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index cc9a2cc95608f..f7153483a3ac9 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -985,6 +985,7 @@ static const struct io_op_def io_op_defs[] = { [IORING_OP_UNLINKAT] = {}, }; +static bool io_disarm_next(struct io_kiocb *req); static void io_uring_del_task_file(unsigned long index); static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx, struct task_struct *task, @@ -1525,15 +1526,14 @@ static void io_cqring_fill_event(struct io_kiocb *req, long res) __io_cqring_fill_event(req, res, 0); } -static inline void io_req_complete_post(struct io_kiocb *req, long res, - unsigned int cflags) +static void io_req_complete_post(struct io_kiocb *req, long res, + unsigned int cflags) { struct io_ring_ctx *ctx = req->ctx; unsigned long flags; spin_lock_irqsave(&ctx->completion_lock, flags); __io_cqring_fill_event(req, res, cflags); - io_commit_cqring(ctx); /* * If we're the last reference to this request, add to our locked * free_list cache. @@ -1541,19 +1541,26 @@ static inline void io_req_complete_post(struct io_kiocb *req, long res, if (refcount_dec_and_test(&req->refs)) { struct io_comp_state *cs = &ctx->submit_state.comp; + if (req->flags & (REQ_F_LINK | REQ_F_HARDLINK)) { + if (req->flags & (REQ_F_LINK_TIMEOUT | REQ_F_FAIL_LINK)) + io_disarm_next(req); + if (req->link) { + io_req_task_queue(req->link); + req->link = NULL; + } + } io_dismantle_req(req); io_put_task(req->task, 1); list_add(&req->compl.list, &cs->locked_free_list); cs->locked_free_nr++; } else req = NULL; + io_commit_cqring(ctx); spin_unlock_irqrestore(&ctx->completion_lock, flags); - io_cqring_ev_posted(ctx); - if (req) { - io_queue_next(req); + + if (req) percpu_ref_put(&ctx->refs); - } } static void io_req_complete_state(struct io_kiocb *req, long res, -- GitLab From 70e35125093b05b0e607ba1f5358ddf76946756c Mon Sep 17 00:00:00 2001 From: yangerkun Date: Tue, 9 Mar 2021 11:04:10 +0800 Subject: [PATCH 515/839] io-wq: fix ref leak for req in case of exit cancelations do_work such as io_wq_submit_work that cancel the work may leave a ref of req as 1 if we have links. Fix it by call io_run_cancel. Fixes: 4fb6ac326204 ("io-wq: improve manager/worker handling over exec") Signed-off-by: yangerkun Link: https://lore.kernel.org/r/20210309030410.3294078-1-yangerkun@huawei.com Signed-off-by: Jens Axboe --- fs/io-wq.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/io-wq.c b/fs/io-wq.c index c2e7031f6d096..3d7060ba547a0 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -799,8 +799,7 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work) /* Can only happen if manager creation fails after exec */ if (io_wq_fork_manager(wqe->wq) || test_bit(IO_WQ_BIT_EXIT, &wqe->wq->state)) { - work->flags |= IO_WQ_WORK_CANCEL; - wqe->wq->do_work(work); + io_run_cancel(work, wqe); return; } -- GitLab From 93e68e036c2fc1ce18e784418e4e19975a5882b4 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 9 Mar 2021 07:02:21 -0700 Subject: [PATCH 516/839] io_uring: move all io_kiocb init early in io_init_req() If we hit an error path in the function, make sure that the io_kiocb is fully initialized at that point so that freeing the request always sees a valid state. Signed-off-by: Jens Axboe --- fs/io_uring.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index f7153483a3ac9..0f18e4a7bd087 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -6327,6 +6327,9 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, refcount_set(&req->refs, 2); req->task = current; req->result = 0; + req->work.list.next = NULL; + req->work.creds = NULL; + req->work.flags = 0; /* enforce forwards compatibility on users */ if (unlikely(sqe_flags & ~SQE_VALID_FLAGS)) { @@ -6344,17 +6347,13 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, !io_op_defs[req->opcode].buffer_select) return -EOPNOTSUPP; - req->work.list.next = NULL; personality = READ_ONCE(sqe->personality); if (personality) { req->work.creds = xa_load(&ctx->personalities, personality); if (!req->work.creds) return -EINVAL; get_cred(req->work.creds); - } else { - req->work.creds = NULL; } - req->work.flags = 0; state = &ctx->submit_state; /* -- GitLab From 5199328a0d415b3e372633096b1b92f36b8ac9e5 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Tue, 9 Mar 2021 14:30:41 +0800 Subject: [PATCH 517/839] io_uring: remove unneeded variable 'ret' Fix the following coccicheck warning: ./fs/io_uring.c:8984:5-8: Unneeded variable: "ret". Return "0" on line 8998 Reported-by: Abaci Robot Signed-off-by: Yang Li Link: https://lore.kernel.org/r/1615271441-33649-1-git-send-email-yang.lee@linux.alibaba.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 0f18e4a7bd087..6325f32ef6a33 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -9022,7 +9022,6 @@ static unsigned long io_uring_nommu_get_unmapped_area(struct file *file, static int io_sqpoll_wait_sq(struct io_ring_ctx *ctx) { - int ret = 0; DEFINE_WAIT(wait); do { @@ -9036,7 +9035,7 @@ static int io_sqpoll_wait_sq(struct io_ring_ctx *ctx) } while (!signal_pending(current)); finish_wait(&ctx->sqo_sq_wait, &wait); - return ret; + return 0; } static int io_get_ext_arg(unsigned flags, const void __user *argp, size_t *argsz, -- GitLab From e8f98f24549d62cc54bf608c815904a56d4437bc Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 9 Mar 2021 16:32:13 -0700 Subject: [PATCH 518/839] io_uring: always wait for sqd exited when stopping SQPOLL thread We have a tiny race where io_put_sq_data() calls io_sq_thead_stop() and finds the thread gone, but the thread has indeed not fully exited or called complete() yet. Close it up by always having io_sq_thread_stop() wait on completion of the exit event. Signed-off-by: Jens Axboe --- fs/io_uring.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 6325f32ef6a33..62f998bf2ce84 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -7079,12 +7079,9 @@ static void io_sq_thread_stop(struct io_sq_data *sqd) if (test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state)) return; down_write(&sqd->rw_lock); - if (!sqd->thread) { - up_write(&sqd->rw_lock); - return; - } set_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state); - wake_up_process(sqd->thread); + if (sqd->thread) + wake_up_process(sqd->thread); up_write(&sqd->rw_lock); wait_for_completion(&sqd->exited); } @@ -7849,9 +7846,9 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx, ret = -EINVAL; if (cpu >= nr_cpu_ids) - goto err; + goto err_sqpoll; if (!cpu_online(cpu)) - goto err; + goto err_sqpoll; sqd->sq_cpu = cpu; } else { @@ -7862,7 +7859,7 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx, tsk = create_io_thread(io_sq_thread, sqd, NUMA_NO_NODE); if (IS_ERR(tsk)) { ret = PTR_ERR(tsk); - goto err; + goto err_sqpoll; } sqd->thread = tsk; @@ -7881,6 +7878,9 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx, err: io_sq_thread_finish(ctx); return ret; +err_sqpoll: + complete(&ctx->sq_data->exited); + goto err; } static inline void __io_unaccount_mem(struct user_struct *user, -- GitLab From e22bc9b481a90d7898984ea17621f04a653e2cd1 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 9 Mar 2021 19:49:02 -0700 Subject: [PATCH 519/839] kernel: make IO threads unfreezable by default The io-wq threads were already marked as no-freeze, but the manager was not. On resume, we perpetually have signal_pending() being true, and hence the manager will loop and spin 100% of the time. Just mark the tasks created by create_io_thread() as PF_NOFREEZE by default, and remove any knowledge of it in io-wq and io_uring. Reported-by: Kevin Locke Tested-by: Kevin Locke Signed-off-by: Jens Axboe --- fs/io-wq.c | 3 +-- fs/io_uring.c | 1 - kernel/fork.c | 1 + 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/io-wq.c b/fs/io-wq.c index 3d7060ba547a0..0ae9ecadf295c 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -591,7 +591,7 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index) tsk->pf_io_worker = worker; worker->task = tsk; set_cpus_allowed_ptr(tsk, cpumask_of_node(wqe->node)); - tsk->flags |= PF_NOFREEZE | PF_NO_SETAFFINITY; + tsk->flags |= PF_NO_SETAFFINITY; raw_spin_lock_irq(&wqe->lock); hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list); @@ -709,7 +709,6 @@ static int io_wq_manager(void *data) set_current_state(TASK_INTERRUPTIBLE); io_wq_check_workers(wq); schedule_timeout(HZ); - try_to_freeze(); if (fatal_signal_pending(current)) set_bit(IO_WQ_BIT_EXIT, &wq->state); } while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)); diff --git a/fs/io_uring.c b/fs/io_uring.c index 62f998bf2ce84..14165e18020c1 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -6733,7 +6733,6 @@ static int io_sq_thread(void *data) up_read(&sqd->rw_lock); schedule(); - try_to_freeze(); down_read(&sqd->rw_lock); list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) io_ring_clear_wakeup_flag(ctx); diff --git a/kernel/fork.c b/kernel/fork.c index d3171e8e88e5d..72e444cd0ffed 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2436,6 +2436,7 @@ struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node) if (!IS_ERR(tsk)) { sigfillset(&tsk->blocked); sigdelsetmask(&tsk->blocked, sigmask(SIGKILL)); + tsk->flags |= PF_NOFREEZE; } return tsk; } -- GitLab From 78d7f6ba82edb7f8763390982be29051c4216772 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 10 Mar 2021 13:13:53 +0000 Subject: [PATCH 520/839] io_uring: fix invalid ctx->sq_thread_idle We have to set ctx->sq_thread_idle before adding a ring to an SQ task, otherwise sqd races for seeing zero and accounting it as such. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 14165e18020c1..7072c0eb22c15 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -7829,14 +7829,14 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx, ctx->sq_creds = get_current_cred(); ctx->sq_data = sqd; - io_sq_thread_park(sqd); - list_add(&ctx->sqd_list, &sqd->ctx_new_list); - io_sq_thread_unpark(sqd); - ctx->sq_thread_idle = msecs_to_jiffies(p->sq_thread_idle); if (!ctx->sq_thread_idle) ctx->sq_thread_idle = HZ; + io_sq_thread_park(sqd); + list_add(&ctx->sqd_list, &sqd->ctx_new_list); + io_sq_thread_unpark(sqd); + if (sqd->thread) return 0; -- GitLab From 7d41e8543d809c3c900d1212d6ea887eb284b69a Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 10 Mar 2021 13:13:54 +0000 Subject: [PATCH 521/839] io_uring: remove indirect ctx into sqo injection We use ->ctx_new_list to notify sqo about new ctx pending, then sqo should stop and splice it to its sqd->ctx_list, paired with ->sq_thread_comp. The last one is broken because nobody reinitialises it, and trying to fix it would only add more complexity and bugs. And the first isn't really needed as is done under park(), that protects from races well. Add ctx into sqd->ctx_list directly (under park()), it's much simpler and allows to kill both, ctx_new_list and sq_thread_comp. note: apparently there is no real problem at the moment, because sq_thread_comp is used only by io_sq_thread_finish() followed by parking, where list_del(&ctx->sqd_list) removes it well regardless whether it's in the new or the active list. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 28 +++------------------------- 1 file changed, 3 insertions(+), 25 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 7072c0eb22c15..5c045a9f7ffe0 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -262,7 +262,6 @@ struct io_sq_data { /* ctx's that are using this sqd */ struct list_head ctx_list; - struct list_head ctx_new_list; struct task_struct *thread; struct wait_queue_head wait; @@ -398,7 +397,6 @@ struct io_ring_ctx { struct user_struct *user; struct completion ref_comp; - struct completion sq_thread_comp; #if defined(CONFIG_UNIX) struct socket *ring_sock; @@ -1137,7 +1135,6 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) init_waitqueue_head(&ctx->cq_wait); INIT_LIST_HEAD(&ctx->cq_overflow_list); init_completion(&ctx->ref_comp); - init_completion(&ctx->sq_thread_comp); idr_init(&ctx->io_buffer_idr); xa_init_flags(&ctx->personalities, XA_FLAGS_ALLOC1); mutex_init(&ctx->uring_lock); @@ -6640,19 +6637,6 @@ static void io_sqd_update_thread_idle(struct io_sq_data *sqd) sqd->sq_thread_idle = sq_thread_idle; } -static void io_sqd_init_new(struct io_sq_data *sqd) -{ - struct io_ring_ctx *ctx; - - while (!list_empty(&sqd->ctx_new_list)) { - ctx = list_first_entry(&sqd->ctx_new_list, struct io_ring_ctx, sqd_list); - list_move_tail(&ctx->sqd_list, &sqd->ctx_list); - complete(&ctx->sq_thread_comp); - } - - io_sqd_update_thread_idle(sqd); -} - static int io_sq_thread(void *data) { struct io_sq_data *sqd = data; @@ -6683,11 +6667,8 @@ static int io_sq_thread(void *data) up_read(&sqd->rw_lock); cond_resched(); down_read(&sqd->rw_lock); - continue; - } - if (unlikely(!list_empty(&sqd->ctx_new_list))) { - io_sqd_init_new(sqd); timeout = jiffies + sqd->sq_thread_idle; + continue; } if (fatal_signal_pending(current)) break; @@ -7099,9 +7080,6 @@ static void io_sq_thread_finish(struct io_ring_ctx *ctx) if (sqd) { complete(&sqd->startup); - if (sqd->thread) - wait_for_completion(&ctx->sq_thread_comp); - io_sq_thread_park(sqd); list_del(&ctx->sqd_list); io_sqd_update_thread_idle(sqd); @@ -7153,7 +7131,6 @@ static struct io_sq_data *io_get_sq_data(struct io_uring_params *p) refcount_set(&sqd->refs, 1); INIT_LIST_HEAD(&sqd->ctx_list); - INIT_LIST_HEAD(&sqd->ctx_new_list); init_rwsem(&sqd->rw_lock); init_waitqueue_head(&sqd->wait); init_completion(&sqd->startup); @@ -7834,7 +7811,8 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx, ctx->sq_thread_idle = HZ; io_sq_thread_park(sqd); - list_add(&ctx->sqd_list, &sqd->ctx_new_list); + list_add(&ctx->sqd_list, &sqd->ctx_list); + io_sqd_update_thread_idle(sqd); io_sq_thread_unpark(sqd); if (sqd->thread) -- GitLab From faa44c69daf9ccbd5b8a1aee13e0e0d037c0be17 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 10 Mar 2021 18:09:19 +0900 Subject: [PATCH 522/839] block: Fix REQ_OP_ZONE_RESET_ALL handling Similarly to a single zone reset operation (REQ_OP_ZONE_RESET), execute REQ_OP_ZONE_RESET_ALL operations with REQ_SYNC set. Signed-off-by: Damien Le Moal Signed-off-by: Jens Axboe --- block/blk-zoned.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 833978c02e608..8b9f3fc5a690a 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -240,7 +240,7 @@ int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op, */ if (op == REQ_OP_ZONE_RESET && blkdev_allow_reset_all_zones(bdev, sector, nr_sectors)) { - bio->bi_opf = REQ_OP_ZONE_RESET_ALL; + bio->bi_opf = REQ_OP_ZONE_RESET_ALL | REQ_SYNC; break; } -- GitLab From df66617bfe87487190a60783d26175b65d2502ce Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Tue, 9 Mar 2021 19:30:17 -0800 Subject: [PATCH 523/839] block: rsxx: fix error return code of rsxx_pci_probe() When create_singlethread_workqueue returns NULL to card->event_wq, no error return code of rsxx_pci_probe() is assigned. To fix this bug, st is assigned with -ENOMEM in this case. Fixes: 8722ff8cdbfa ("block: IBM RamSan 70/80 device driver") Reported-by: TOTE Robot Signed-off-by: Jia-Ju Bai Link: https://lore.kernel.org/r/20210310033017.4023-1-baijiaju1990@gmail.com Signed-off-by: Jens Axboe --- drivers/block/rsxx/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c index 5ac1881396afb..227e1be4c6f99 100644 --- a/drivers/block/rsxx/core.c +++ b/drivers/block/rsxx/core.c @@ -871,6 +871,7 @@ static int rsxx_pci_probe(struct pci_dev *dev, card->event_wq = create_singlethread_workqueue(DRIVER_NAME"_event"); if (!card->event_wq) { dev_err(CARD_TO_DEV(card), "Failed card event setup.\n"); + st = -ENOMEM; goto failed_event_handler; } -- GitLab From c8e2fe13d1d1f3a02842b7b909d4e4846a4b6a2c Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 9 Mar 2021 09:10:19 -0800 Subject: [PATCH 524/839] x86/perf: Use RET0 as default for guest_get_msrs to handle "no PMU" case Initialize x86_pmu.guest_get_msrs to return 0/NULL to handle the "nop" case. Patching in perf_guest_get_msrs_nop() during setup does not work if there is no PMU, as setup bails before updating the static calls, leaving x86_pmu.guest_get_msrs NULL and thus a complete nop. Ultimately, this causes VMX abort on VM-Exit due to KVM putting random garbage from the stack into the MSR load list. Add a comment in KVM to note that nr_msrs is valid if and only if the return value is non-NULL. Fixes: abd562df94d1 ("x86/perf: Use static_call for x86_pmu.guest_get_msrs") Reported-by: Dmitry Vyukov Reported-by: syzbot+cce9ef2dd25246f815ee@syzkaller.appspotmail.com Suggested-by: Peter Zijlstra Signed-off-by: Sean Christopherson Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20210309171019.1125243-1-seanjc@google.com --- arch/x86/events/core.c | 15 ++++++--------- arch/x86/kvm/vmx/vmx.c | 2 +- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 6ddeed3cd2acb..18df171296955 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -81,7 +81,11 @@ DEFINE_STATIC_CALL_NULL(x86_pmu_swap_task_ctx, *x86_pmu.swap_task_ctx); DEFINE_STATIC_CALL_NULL(x86_pmu_drain_pebs, *x86_pmu.drain_pebs); DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_aliases, *x86_pmu.pebs_aliases); -DEFINE_STATIC_CALL_NULL(x86_pmu_guest_get_msrs, *x86_pmu.guest_get_msrs); +/* + * This one is magic, it will get called even when PMU init fails (because + * there is no PMU), in which case it should simply return NULL. + */ +DEFINE_STATIC_CALL_RET0(x86_pmu_guest_get_msrs, *x86_pmu.guest_get_msrs); u64 __read_mostly hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] @@ -1944,13 +1948,6 @@ static void _x86_pmu_read(struct perf_event *event) x86_perf_event_update(event); } -static inline struct perf_guest_switch_msr * -perf_guest_get_msrs_nop(int *nr) -{ - *nr = 0; - return NULL; -} - static int __init init_hw_perf_events(void) { struct x86_pmu_quirk *quirk; @@ -2025,7 +2022,7 @@ static int __init init_hw_perf_events(void) x86_pmu.read = _x86_pmu_read; if (!x86_pmu.guest_get_msrs) - x86_pmu.guest_get_msrs = perf_guest_get_msrs_nop; + x86_pmu.guest_get_msrs = (void *)&__static_call_return0; x86_pmu_static_call_update(); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 50810d4714628..32cf8287d4a77 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6580,8 +6580,8 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx) int i, nr_msrs; struct perf_guest_switch_msr *msrs; + /* Note, nr_msrs may be garbage if perf_guest_get_msrs() returns NULL. */ msrs = perf_guest_get_msrs(&nr_msrs); - if (!msrs) return; -- GitLab From 1201d68f4781141411e734315f22457e6ea2cfcb Mon Sep 17 00:00:00 2001 From: Shuo Liu Date: Wed, 10 Mar 2021 23:37:08 +0800 Subject: [PATCH 525/839] virt: acrn: Correct type casting of argument of copy_from_user() hsm.c:336:50: warning: incorrect type in argument 2 (different address spaces) hsm.c:336:50: expected void const [noderef] __user *from hsm.c:336:50: got void * This patch fixes above sparse warning. Fixes: 3d679d5aec64 ("virt: acrn: Introduce interfaces to query C-states and P-states allowed by hypervisor") Reported-by: kernel test robot Signed-off-by: Shuo Liu Link: https://lore.kernel.org/r/20210310153708.17451-1-shuo.a.liu@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/virt/acrn/hsm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/virt/acrn/hsm.c b/drivers/virt/acrn/hsm.c index 6996ea6219e53..130e12b8652a6 100644 --- a/drivers/virt/acrn/hsm.c +++ b/drivers/virt/acrn/hsm.c @@ -333,7 +333,7 @@ static long acrn_dev_ioctl(struct file *filp, unsigned int cmd, acrn_ioreq_request_clear(vm); break; case ACRN_IOCTL_PM_GET_CPU_STATE: - if (copy_from_user(&cstate_cmd, (void *)ioctl_param, + if (copy_from_user(&cstate_cmd, (void __user *)ioctl_param, sizeof(cstate_cmd))) return -EFAULT; -- GitLab From 20c40794eb85ea29852d7bc37c55713802a543d6 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Fri, 12 Feb 2021 22:26:58 +0300 Subject: [PATCH 526/839] misc: fastrpc: restrict user apps from sending kernel RPC messages Verify that user applications are not using the kernel RPC message handle to restrict them from directly attaching to guest OS on the remote subsystem. This is a port of CVE-2019-2308 fix. Fixes: c68cfb718c8f ("misc: fastrpc: Add support for context Invoke method") Cc: Srinivas Kandagatla Cc: Jonathan Marek Cc: stable@vger.kernel.org Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20210212192658.3476137-1-dmitry.baryshkov@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/fastrpc.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index f12e909034ac0..beda610e6b30d 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -950,6 +950,11 @@ static int fastrpc_internal_invoke(struct fastrpc_user *fl, u32 kernel, if (!fl->cctx->rpdev) return -EPIPE; + if (handle == FASTRPC_INIT_HANDLE && !kernel) { + dev_warn_ratelimited(fl->sctx->dev, "user app trying to send a kernel RPC message (%d)\n", handle); + return -EPERM; + } + ctx = fastrpc_context_alloc(fl, kernel, sc, args); if (IS_ERR(ctx)) return PTR_ERR(ctx); -- GitLab From 65527a51c66f4edfa28602643d7dd4fa366eb826 Mon Sep 17 00:00:00 2001 From: Shile Zhang Date: Thu, 18 Feb 2021 20:31:16 +0800 Subject: [PATCH 527/839] misc/pvpanic: Export module FDT device table Export the module FDT device table to ensure the FDT compatible strings are listed in the module alias. This help the pvpanic driver can be loaded on boot automatically not only the ACPI device, but also the FDT device. Fixes: 46f934c9a12fc ("misc/pvpanic: add support to get pvpanic device info FDT") Signed-off-by: Shile Zhang Link: https://lore.kernel.org/r/20210218123116.207751-1-shile.zhang@linux.alibaba.com Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/misc/pvpanic.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/misc/pvpanic.c b/drivers/misc/pvpanic.c index 9f350e05ef68a..f1655f5ca016d 100644 --- a/drivers/misc/pvpanic.c +++ b/drivers/misc/pvpanic.c @@ -140,6 +140,7 @@ static const struct of_device_id pvpanic_mmio_match[] = { { .compatible = "qemu,pvpanic-mmio", }, {} }; +MODULE_DEVICE_TABLE(of, pvpanic_mmio_match); static const struct acpi_device_id pvpanic_device_ids[] = { { "QEMU0001", 0 }, -- GitLab From 9b1ea29bc0d7b94d420f96a0f4121403efc3dd85 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 10 Mar 2021 10:18:04 -0800 Subject: [PATCH 528/839] Revert "mm, slub: consider rest of partial list if acquire_slab() fails" This reverts commit 8ff60eb052eeba95cfb3efe16b08c9199f8121cf. The kernel test robot reports a huge performance regression due to the commit, and the reason seems fairly straightforward: when there is contention on the page list (which is what causes acquire_slab() to fail), we do _not_ want to just loop and try again, because that will transfer the contention to the 'n->list_lock' spinlock we hold, and just make things even worse. This is admittedly likely a problem only on big machines - the kernel test robot report comes from a 96-thread dual socket Intel Xeon Gold 6252 setup, but the regression there really is quite noticeable: -47.9% regression of stress-ng.rawpkt.ops_per_sec and the commit that was marked as being fixed (7ced37197196: "slub: Acquire_slab() avoid loop") actually did the loop exit early very intentionally (the hint being that "avoid loop" part of that commit message), exactly to avoid this issue. The correct thing to do may be to pick some kind of reasonable middle ground: instead of breaking out of the loop on the very first sign of contention, or trying over and over and over again, the right thing may be to re-try _once_, and then give up on the second failure (or pick your favorite value for "once"..). Reported-by: kernel test robot Link: https://lore.kernel.org/lkml/20210301080404.GF12822@xsang-OptiPlex-9020/ Cc: Jann Horn Cc: David Rientjes Cc: Joonsoo Kim Acked-by: Christoph Lameter Signed-off-by: Linus Torvalds --- mm/slub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/slub.c b/mm/slub.c index e26c274b4657f..3021ce9bf1b3d 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1993,7 +1993,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, t = acquire_slab(s, n, page, object == NULL, &objects); if (!t) - continue; /* cmpxchg raced */ + break; available += objects; if (!object) { -- GitLab From 47ccc8fc2c9c94558b27b6f9e2582df32d29e6e8 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Sun, 7 Mar 2021 20:53:26 -0700 Subject: [PATCH 529/839] usbip: fix stub_dev to check for stream socket Fix usbip_sockfd_store() to validate the passed in file descriptor is a stream socket. If the file descriptor passed was a SOCK_DGRAM socket, sock_recvmsg() can't detect end of stream. Cc: stable@vger.kernel.org Suggested-by: Tetsuo Handa Signed-off-by: Shuah Khan Link: https://lore.kernel.org/r/e942d2bd03afb8e8552bd2a5d84e18d17670d521.1615171203.git.skhan@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/stub_dev.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/usb/usbip/stub_dev.c b/drivers/usb/usbip/stub_dev.c index 2305d425e6c9a..90c105469a074 100644 --- a/drivers/usb/usbip/stub_dev.c +++ b/drivers/usb/usbip/stub_dev.c @@ -69,8 +69,16 @@ static ssize_t usbip_sockfd_store(struct device *dev, struct device_attribute *a } socket = sockfd_lookup(sockfd, &err); - if (!socket) + if (!socket) { + dev_err(dev, "failed to lookup sock"); goto err; + } + + if (socket->type != SOCK_STREAM) { + dev_err(dev, "Expecting SOCK_STREAM - found %d", + socket->type); + goto sock_err; + } sdev->ud.tcp_socket = socket; sdev->ud.sockfd = sockfd; @@ -100,6 +108,8 @@ static ssize_t usbip_sockfd_store(struct device *dev, struct device_attribute *a return count; +sock_err: + sockfd_put(socket); err: spin_unlock_irq(&sdev->ud.lock); return -EINVAL; -- GitLab From f55a0571690c4aae03180e001522538c0927432f Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Sun, 7 Mar 2021 20:53:27 -0700 Subject: [PATCH 530/839] usbip: fix vhci_hcd to check for stream socket Fix attach_store() to validate the passed in file descriptor is a stream socket. If the file descriptor passed was a SOCK_DGRAM socket, sock_recvmsg() can't detect end of stream. Cc: stable@vger.kernel.org Suggested-by: Tetsuo Handa Signed-off-by: Shuah Khan Link: https://lore.kernel.org/r/52712aa308915bda02cece1589e04ee8b401d1f3.1615171203.git.skhan@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/vhci_sysfs.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/usb/usbip/vhci_sysfs.c b/drivers/usb/usbip/vhci_sysfs.c index 96e5371dc335a..1e1ae9bd06ab2 100644 --- a/drivers/usb/usbip/vhci_sysfs.c +++ b/drivers/usb/usbip/vhci_sysfs.c @@ -349,8 +349,16 @@ static ssize_t attach_store(struct device *dev, struct device_attribute *attr, /* Extract socket from fd. */ socket = sockfd_lookup(sockfd, &err); - if (!socket) + if (!socket) { + dev_err(dev, "failed to lookup sock"); return -EINVAL; + } + if (socket->type != SOCK_STREAM) { + dev_err(dev, "Expecting SOCK_STREAM - found %d", + socket->type); + sockfd_put(socket); + return -EINVAL; + } /* now need lock until setting vdev status as used */ -- GitLab From 6801854be94fe8819b3894979875ea31482f5658 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Sun, 7 Mar 2021 20:53:28 -0700 Subject: [PATCH 531/839] usbip: fix vudc to check for stream socket Fix usbip_sockfd_store() to validate the passed in file descriptor is a stream socket. If the file descriptor passed was a SOCK_DGRAM socket, sock_recvmsg() can't detect end of stream. Cc: stable@vger.kernel.org Suggested-by: Tetsuo Handa Signed-off-by: Shuah Khan Link: https://lore.kernel.org/r/387a670316002324113ac7ea1e8b53f4085d0c95.1615171203.git.skhan@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/vudc_sysfs.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/usb/usbip/vudc_sysfs.c b/drivers/usb/usbip/vudc_sysfs.c index 100f680c572ae..83a0c59a3de8a 100644 --- a/drivers/usb/usbip/vudc_sysfs.c +++ b/drivers/usb/usbip/vudc_sysfs.c @@ -138,6 +138,13 @@ static ssize_t usbip_sockfd_store(struct device *dev, struct device_attribute *a goto unlock_ud; } + if (socket->type != SOCK_STREAM) { + dev_err(dev, "Expecting SOCK_STREAM - found %d", + socket->type); + ret = -EINVAL; + goto sock_err; + } + udc->ud.tcp_socket = socket; spin_unlock_irq(&udc->ud.lock); @@ -177,6 +184,8 @@ static ssize_t usbip_sockfd_store(struct device *dev, struct device_attribute *a return count; +sock_err: + sockfd_put(socket); unlock_ud: spin_unlock_irq(&udc->ud.lock); unlock: -- GitLab From 9380afd6df70e24eacbdbde33afc6a3950965d22 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Sun, 7 Mar 2021 20:53:29 -0700 Subject: [PATCH 532/839] usbip: fix stub_dev usbip_sockfd_store() races leading to gpf usbip_sockfd_store() is invoked when user requests attach (import) detach (unimport) usb device from usbip host. vhci_hcd sends import request and usbip_sockfd_store() exports the device if it is free for export. Export and unexport are governed by local state and shared state - Shared state (usbip device status, sockfd) - sockfd and Device status are used to determine if stub should be brought up or shut down. - Local state (tcp_socket, rx and tx thread task_struct ptrs) A valid tcp_socket controls rx and tx thread operations while the device is in exported state. - While the device is exported, device status is marked used and socket, sockfd, and thread pointers are valid. Export sequence (stub-up) includes validating the socket and creating receive (rx) and transmit (tx) threads to talk to the client to provide access to the exported device. rx and tx threads depends on local and shared state to be correct and in sync. Unexport (stub-down) sequence shuts the socket down and stops the rx and tx threads. Stub-down sequence relies on local and shared states to be in sync. There are races in updating the local and shared status in the current stub-up sequence resulting in crashes. These stem from starting rx and tx threads before local and global state is updated correctly to be in sync. 1. Doesn't handle kthread_create() error and saves invalid ptr in local state that drives rx and tx threads. 2. Updates tcp_socket and sockfd, starts stub_rx and stub_tx threads before updating usbip_device status to SDEV_ST_USED. This opens up a race condition between the threads and usbip_sockfd_store() stub up and down handling. Fix the above problems: - Stop using kthread_get_run() macro to create/start threads. - Create threads and get task struct reference. - Add kthread_create() failure handling and bail out. - Hold usbip_device lock to update local and shared states after creating rx and tx threads. - Update usbip_device status to SDEV_ST_USED. - Update usbip_device tcp_socket, sockfd, tcp_rx, and tcp_tx - Start threads after usbip_device (tcp_socket, sockfd, tcp_rx, tcp_tx, and status) is complete. Credit goes to syzbot and Tetsuo Handa for finding and root-causing the kthread_get_run() improper error handling problem and others. This is a hard problem to find and debug since the races aren't seen in a normal case. Fuzzing forces the race window to be small enough for the kthread_get_run() error path bug and starting threads before updating the local and shared state bug in the stub-up sequence. Tested with syzbot reproducer: - https://syzkaller.appspot.com/text?tag=ReproC&x=14801034d00000 Fixes: 9720b4bc76a83807 ("staging/usbip: convert to kthread") Cc: stable@vger.kernel.org Reported-by: syzbot Reported-by: syzbot Reported-by: syzbot Reported-by: Tetsuo Handa Signed-off-by: Shuah Khan Link: https://lore.kernel.org/r/268a0668144d5ff36ec7d87fdfa90faf583b7ccc.1615171203.git.skhan@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/stub_dev.c | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/drivers/usb/usbip/stub_dev.c b/drivers/usb/usbip/stub_dev.c index 90c105469a074..8f1de1fbbeedf 100644 --- a/drivers/usb/usbip/stub_dev.c +++ b/drivers/usb/usbip/stub_dev.c @@ -46,6 +46,8 @@ static ssize_t usbip_sockfd_store(struct device *dev, struct device_attribute *a int sockfd = 0; struct socket *socket; int rv; + struct task_struct *tcp_rx = NULL; + struct task_struct *tcp_tx = NULL; if (!sdev) { dev_err(dev, "sdev is null\n"); @@ -80,20 +82,36 @@ static ssize_t usbip_sockfd_store(struct device *dev, struct device_attribute *a goto sock_err; } - sdev->ud.tcp_socket = socket; - sdev->ud.sockfd = sockfd; - + /* unlock and create threads and get tasks */ spin_unlock_irq(&sdev->ud.lock); + tcp_rx = kthread_create(stub_rx_loop, &sdev->ud, "stub_rx"); + if (IS_ERR(tcp_rx)) { + sockfd_put(socket); + return -EINVAL; + } + tcp_tx = kthread_create(stub_tx_loop, &sdev->ud, "stub_tx"); + if (IS_ERR(tcp_tx)) { + kthread_stop(tcp_rx); + sockfd_put(socket); + return -EINVAL; + } - sdev->ud.tcp_rx = kthread_get_run(stub_rx_loop, &sdev->ud, - "stub_rx"); - sdev->ud.tcp_tx = kthread_get_run(stub_tx_loop, &sdev->ud, - "stub_tx"); + /* get task structs now */ + get_task_struct(tcp_rx); + get_task_struct(tcp_tx); + /* lock and update sdev->ud state */ spin_lock_irq(&sdev->ud.lock); + sdev->ud.tcp_socket = socket; + sdev->ud.sockfd = sockfd; + sdev->ud.tcp_rx = tcp_rx; + sdev->ud.tcp_tx = tcp_tx; sdev->ud.status = SDEV_ST_USED; spin_unlock_irq(&sdev->ud.lock); + wake_up_process(sdev->ud.tcp_rx); + wake_up_process(sdev->ud.tcp_tx); + } else { dev_info(dev, "stub down\n"); -- GitLab From 718ad9693e3656120064b715fe931f43a6201e67 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Sun, 7 Mar 2021 20:53:30 -0700 Subject: [PATCH 533/839] usbip: fix vhci_hcd attach_store() races leading to gpf attach_store() is invoked when user requests import (attach) a device from usbip host. Attach and detach are governed by local state and shared state - Shared state (usbip device status) - Device status is used to manage the attach and detach operations on import-able devices. - Local state (tcp_socket, rx and tx thread task_struct ptrs) A valid tcp_socket controls rx and tx thread operations while the device is in exported state. - Device has to be in the right state to be attached and detached. Attach sequence includes validating the socket and creating receive (rx) and transmit (tx) threads to talk to the host to get access to the imported device. rx and tx threads depends on local and shared state to be correct and in sync. Detach sequence shuts the socket down and stops the rx and tx threads. Detach sequence relies on local and shared states to be in sync. There are races in updating the local and shared status in the current attach sequence resulting in crashes. These stem from starting rx and tx threads before local and global state is updated correctly to be in sync. 1. Doesn't handle kthread_create() error and saves invalid ptr in local state that drives rx and tx threads. 2. Updates tcp_socket and sockfd, starts stub_rx and stub_tx threads before updating usbip_device status to VDEV_ST_NOTASSIGNED. This opens up a race condition between the threads, port connect, and detach handling. Fix the above problems: - Stop using kthread_get_run() macro to create/start threads. - Create threads and get task struct reference. - Add kthread_create() failure handling and bail out. - Hold vhci and usbip_device locks to update local and shared states after creating rx and tx threads. - Update usbip_device status to VDEV_ST_NOTASSIGNED. - Update usbip_device tcp_socket, sockfd, tcp_rx, and tcp_tx - Start threads after usbip_device (tcp_socket, sockfd, tcp_rx, tcp_tx, and status) is complete. Credit goes to syzbot and Tetsuo Handa for finding and root-causing the kthread_get_run() improper error handling problem and others. This is hard problem to find and debug since the races aren't seen in a normal case. Fuzzing forces the race window to be small enough for the kthread_get_run() error path bug and starting threads before updating the local and shared state bug in the attach sequence. - Update usbip_device tcp_rx and tcp_tx pointers holding vhci and usbip_device locks. Tested with syzbot reproducer: - https://syzkaller.appspot.com/text?tag=ReproC&x=14801034d00000 Fixes: 9720b4bc76a83807 ("staging/usbip: convert to kthread") Cc: stable@vger.kernel.org Reported-by: syzbot Reported-by: syzbot Reported-by: syzbot Reported-by: Tetsuo Handa Signed-off-by: Shuah Khan Link: https://lore.kernel.org/r/bb434bd5d7a64fbec38b5ecfb838a6baef6eb12b.1615171203.git.skhan@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/vhci_sysfs.c | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/drivers/usb/usbip/vhci_sysfs.c b/drivers/usb/usbip/vhci_sysfs.c index 1e1ae9bd06ab2..c4b4256e5dad3 100644 --- a/drivers/usb/usbip/vhci_sysfs.c +++ b/drivers/usb/usbip/vhci_sysfs.c @@ -312,6 +312,8 @@ static ssize_t attach_store(struct device *dev, struct device_attribute *attr, struct vhci *vhci; int err; unsigned long flags; + struct task_struct *tcp_rx = NULL; + struct task_struct *tcp_tx = NULL; /* * @rhport: port number of vhci_hcd @@ -360,9 +362,24 @@ static ssize_t attach_store(struct device *dev, struct device_attribute *attr, return -EINVAL; } - /* now need lock until setting vdev status as used */ + /* create threads before locking */ + tcp_rx = kthread_create(vhci_rx_loop, &vdev->ud, "vhci_rx"); + if (IS_ERR(tcp_rx)) { + sockfd_put(socket); + return -EINVAL; + } + tcp_tx = kthread_create(vhci_tx_loop, &vdev->ud, "vhci_tx"); + if (IS_ERR(tcp_tx)) { + kthread_stop(tcp_rx); + sockfd_put(socket); + return -EINVAL; + } + + /* get task structs now */ + get_task_struct(tcp_rx); + get_task_struct(tcp_tx); - /* begin a lock */ + /* now begin lock until setting vdev status set */ spin_lock_irqsave(&vhci->lock, flags); spin_lock(&vdev->ud.lock); @@ -372,6 +389,8 @@ static ssize_t attach_store(struct device *dev, struct device_attribute *attr, spin_unlock_irqrestore(&vhci->lock, flags); sockfd_put(socket); + kthread_stop_put(tcp_rx); + kthread_stop_put(tcp_tx); dev_err(dev, "port %d already used\n", rhport); /* @@ -390,6 +409,8 @@ static ssize_t attach_store(struct device *dev, struct device_attribute *attr, vdev->speed = speed; vdev->ud.sockfd = sockfd; vdev->ud.tcp_socket = socket; + vdev->ud.tcp_rx = tcp_rx; + vdev->ud.tcp_tx = tcp_tx; vdev->ud.status = VDEV_ST_NOTASSIGNED; usbip_kcov_handle_init(&vdev->ud); @@ -397,8 +418,8 @@ static ssize_t attach_store(struct device *dev, struct device_attribute *attr, spin_unlock_irqrestore(&vhci->lock, flags); /* end the lock */ - vdev->ud.tcp_rx = kthread_get_run(vhci_rx_loop, &vdev->ud, "vhci_rx"); - vdev->ud.tcp_tx = kthread_get_run(vhci_tx_loop, &vdev->ud, "vhci_tx"); + wake_up_process(vdev->ud.tcp_rx); + wake_up_process(vdev->ud.tcp_tx); rh_port_connect(vdev, speed); -- GitLab From 46613c9dfa964c0c60b5385dbdf5aaa18be52a9c Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Sun, 7 Mar 2021 20:53:31 -0700 Subject: [PATCH 534/839] usbip: fix vudc usbip_sockfd_store races leading to gpf usbip_sockfd_store() is invoked when user requests attach (import) detach (unimport) usb gadget device from usbip host. vhci_hcd sends import request and usbip_sockfd_store() exports the device if it is free for export. Export and unexport are governed by local state and shared state - Shared state (usbip device status, sockfd) - sockfd and Device status are used to determine if stub should be brought up or shut down. Device status is shared between host and client. - Local state (tcp_socket, rx and tx thread task_struct ptrs) A valid tcp_socket controls rx and tx thread operations while the device is in exported state. - While the device is exported, device status is marked used and socket, sockfd, and thread pointers are valid. Export sequence (stub-up) includes validating the socket and creating receive (rx) and transmit (tx) threads to talk to the client to provide access to the exported device. rx and tx threads depends on local and shared state to be correct and in sync. Unexport (stub-down) sequence shuts the socket down and stops the rx and tx threads. Stub-down sequence relies on local and shared states to be in sync. There are races in updating the local and shared status in the current stub-up sequence resulting in crashes. These stem from starting rx and tx threads before local and global state is updated correctly to be in sync. 1. Doesn't handle kthread_create() error and saves invalid ptr in local state that drives rx and tx threads. 2. Updates tcp_socket and sockfd, starts stub_rx and stub_tx threads before updating usbip_device status to SDEV_ST_USED. This opens up a race condition between the threads and usbip_sockfd_store() stub up and down handling. Fix the above problems: - Stop using kthread_get_run() macro to create/start threads. - Create threads and get task struct reference. - Add kthread_create() failure handling and bail out. - Hold usbip_device lock to update local and shared states after creating rx and tx threads. - Update usbip_device status to SDEV_ST_USED. - Update usbip_device tcp_socket, sockfd, tcp_rx, and tcp_tx - Start threads after usbip_device (tcp_socket, sockfd, tcp_rx, tcp_tx, and status) is complete. Credit goes to syzbot and Tetsuo Handa for finding and root-causing the kthread_get_run() improper error handling problem and others. This is a hard problem to find and debug since the races aren't seen in a normal case. Fuzzing forces the race window to be small enough for the kthread_get_run() error path bug and starting threads before updating the local and shared state bug in the stub-up sequence. Fixes: 9720b4bc76a83807 ("staging/usbip: convert to kthread") Cc: stable@vger.kernel.org Reported-by: syzbot Reported-by: syzbot Reported-by: syzbot Reported-by: Tetsuo Handa Signed-off-by: Shuah Khan Link: https://lore.kernel.org/r/b1c08b983ffa185449c9f0f7d1021dc8c8454b60.1615171203.git.skhan@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/vudc_sysfs.c | 42 +++++++++++++++++++++++++++------- 1 file changed, 34 insertions(+), 8 deletions(-) diff --git a/drivers/usb/usbip/vudc_sysfs.c b/drivers/usb/usbip/vudc_sysfs.c index 83a0c59a3de8a..a3ec39fc61778 100644 --- a/drivers/usb/usbip/vudc_sysfs.c +++ b/drivers/usb/usbip/vudc_sysfs.c @@ -90,8 +90,9 @@ unlock: } static BIN_ATTR_RO(dev_desc, sizeof(struct usb_device_descriptor)); -static ssize_t usbip_sockfd_store(struct device *dev, struct device_attribute *attr, - const char *in, size_t count) +static ssize_t usbip_sockfd_store(struct device *dev, + struct device_attribute *attr, + const char *in, size_t count) { struct vudc *udc = (struct vudc *) dev_get_drvdata(dev); int rv; @@ -100,6 +101,8 @@ static ssize_t usbip_sockfd_store(struct device *dev, struct device_attribute *a struct socket *socket; unsigned long flags; int ret; + struct task_struct *tcp_rx = NULL; + struct task_struct *tcp_tx = NULL; rv = kstrtoint(in, 0, &sockfd); if (rv != 0) @@ -145,24 +148,47 @@ static ssize_t usbip_sockfd_store(struct device *dev, struct device_attribute *a goto sock_err; } - udc->ud.tcp_socket = socket; - + /* unlock and create threads and get tasks */ spin_unlock_irq(&udc->ud.lock); spin_unlock_irqrestore(&udc->lock, flags); - udc->ud.tcp_rx = kthread_get_run(&v_rx_loop, - &udc->ud, "vudc_rx"); - udc->ud.tcp_tx = kthread_get_run(&v_tx_loop, - &udc->ud, "vudc_tx"); + tcp_rx = kthread_create(&v_rx_loop, &udc->ud, "vudc_rx"); + if (IS_ERR(tcp_rx)) { + sockfd_put(socket); + return -EINVAL; + } + tcp_tx = kthread_create(&v_tx_loop, &udc->ud, "vudc_tx"); + if (IS_ERR(tcp_tx)) { + kthread_stop(tcp_rx); + sockfd_put(socket); + return -EINVAL; + } + + /* get task structs now */ + get_task_struct(tcp_rx); + get_task_struct(tcp_tx); + /* lock and update udc->ud state */ spin_lock_irqsave(&udc->lock, flags); spin_lock_irq(&udc->ud.lock); + + udc->ud.tcp_socket = socket; + udc->ud.tcp_rx = tcp_rx; + udc->ud.tcp_rx = tcp_tx; udc->ud.status = SDEV_ST_USED; + spin_unlock_irq(&udc->ud.lock); ktime_get_ts64(&udc->start_time); v_start_timer(udc); udc->connected = 1; + + spin_unlock_irqrestore(&udc->lock, flags); + + wake_up_process(udc->ud.tcp_rx); + wake_up_process(udc->ud.tcp_tx); + return count; + } else { if (!udc->connected) { dev_err(dev, "Device not connected"); -- GitLab From a2f8d988698d7d3645b045f4940415b045140b81 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 10 Dec 2020 01:18:40 -0500 Subject: [PATCH 535/839] drm/amdgpu/display: simplify backlight setting Avoid the extra wrapper function. Reviewed-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 20 ++++--------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 3e1fd1e7d09f1..78720a25c13f6 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -3185,19 +3185,6 @@ static void amdgpu_dm_update_backlight_caps(struct amdgpu_display_manager *dm) #endif } -static int set_backlight_via_aux(struct dc_link *link, uint32_t brightness) -{ - bool rc; - - if (!link) - return 1; - - rc = dc_link_set_backlight_level_nits(link, true, brightness, - AUX_BL_DEFAULT_TRANSITION_TIME_MS); - - return rc ? 0 : 1; -} - static int get_brightness_range(const struct amdgpu_dm_backlight_caps *caps, unsigned *min, unsigned *max) { @@ -3260,9 +3247,10 @@ static int amdgpu_dm_backlight_update_status(struct backlight_device *bd) brightness = convert_brightness_from_user(&caps, bd->props.brightness); // Change brightness based on AUX property if (caps.aux_support) - return set_backlight_via_aux(link, brightness); - - rc = dc_link_set_backlight_level(dm->backlight_link, brightness, 0); + rc = dc_link_set_backlight_level_nits(link, true, brightness, + AUX_BL_DEFAULT_TRANSITION_TIME_MS); + else + rc = dc_link_set_backlight_level(dm->backlight_link, brightness, 0); return rc ? 0 : 1; } -- GitLab From dfd8b7fbd985ec1cf76fe10f2875a50b10833740 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 10 Dec 2020 01:20:08 -0500 Subject: [PATCH 536/839] drm/amdgpu/display: don't assert in set backlight function It just spams the logs. Reviewed-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index fa5059f71727a..bd0101013ec89 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -2602,7 +2602,6 @@ bool dc_link_set_backlight_level(const struct dc_link *link, if (pipe_ctx->plane_state == NULL) frame_ramp = 0; } else { - ASSERT(false); return false; } -- GitLab From 0ad3e64eb46d8c47de3af552e282894e3893e973 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 10 Dec 2020 01:45:12 -0500 Subject: [PATCH 537/839] drm/amdgpu/display: handle aux backlight in backlight_get_brightness Need to fetch it via aux. Reviewed-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 24 +++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 78720a25c13f6..0bc9a06b3f150 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -3258,11 +3258,27 @@ static int amdgpu_dm_backlight_update_status(struct backlight_device *bd) static int amdgpu_dm_backlight_get_brightness(struct backlight_device *bd) { struct amdgpu_display_manager *dm = bl_get_data(bd); - int ret = dc_link_get_backlight_level(dm->backlight_link); + struct amdgpu_dm_backlight_caps caps; + + amdgpu_dm_update_backlight_caps(dm); + caps = dm->backlight_caps; - if (ret == DC_ERROR_UNEXPECTED) - return bd->props.brightness; - return convert_brightness_to_user(&dm->backlight_caps, ret); + if (caps.aux_support) { + struct dc_link *link = (struct dc_link *)dm->backlight_link; + u32 avg, peak; + bool rc; + + rc = dc_link_get_backlight_level_nits(link, &avg, &peak); + if (!rc) + return bd->props.brightness; + return convert_brightness_to_user(&caps, avg); + } else { + int ret = dc_link_get_backlight_level(dm->backlight_link); + + if (ret == DC_ERROR_UNEXPECTED) + return bd->props.brightness; + return convert_brightness_to_user(&caps, ret); + } } static const struct backlight_ops amdgpu_dm_backlight_ops = { -- GitLab From 7a46f05e5e163c00e41892e671294286e53fe15c Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 3 Feb 2021 13:42:41 +0100 Subject: [PATCH 538/839] drm/amd/display: Add a backlight module option There seem devices that don't work with the aux channel backlight control. For allowing such users to test with the other backlight control method, provide a new module option, aux_backlight, to specify enabling or disabling the aux backport support explicitly. As default, the aux support is detected by the hardware capability. v2: make the backlight option generic in case we add future backlight types (Alex) BugLink: https://bugzilla.opensuse.org/show_bug.cgi?id=1180749 BugLink: https://gitlab.freedesktop.org/drm/amd/-/issues/1438 Reviewed-by: Nicholas Kazlauskas Signed-off-by: Takashi Iwai Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 ++++ drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 5 +++++ 3 files changed, 10 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index b6879d97c9c9c..49267eb64302d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -180,6 +180,7 @@ extern uint amdgpu_smu_memory_pool_size; extern uint amdgpu_dc_feature_mask; extern uint amdgpu_dc_debug_mask; extern uint amdgpu_dm_abm_level; +extern int amdgpu_backlight; extern struct amdgpu_mgpu_info mgpu_info; extern int amdgpu_ras_enable; extern uint amdgpu_ras_mask; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 4575192d9b087..b26e2fd1c5384 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -781,6 +781,10 @@ uint amdgpu_dm_abm_level; MODULE_PARM_DESC(abmlevel, "ABM level (0 = off (default), 1-4 = backlight reduction level) "); module_param_named(abmlevel, amdgpu_dm_abm_level, uint, 0444); +int amdgpu_backlight = -1; +MODULE_PARM_DESC(backlight, "Backlight control (0 = pwm, 1 = aux, -1 auto (default))"); +module_param_named(backlight, amdgpu_backlight, bint, 0444); + /** * DOC: tmz (int) * Trusted Memory Zone (TMZ) is a method to protect data being written diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 0bc9a06b3f150..0523be612a48b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2267,6 +2267,11 @@ static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector) caps->ext_caps->bits.hdr_aux_backlight_control == 1) caps->aux_support = true; + if (amdgpu_backlight == 0) + caps->aux_support = false; + else if (amdgpu_backlight == 1) + caps->aux_support = true; + /* From the specification (CTA-861-G), for calculating the maximum * luminance we need to use: * Luminance = 50*2**(CV/32) -- GitLab From 15e8b95d5f7509e0b09289be8c422c459c9f0412 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Holger=20Hoffst=C3=A4tte?= Date: Fri, 5 Mar 2021 12:39:21 +0100 Subject: [PATCH 539/839] drm/amd/display: Fix nested FPU context in dcn21_validate_bandwidth() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 41401ac67791 added FPU wrappers to dcn21_validate_bandwidth(), which was correct. Unfortunately a nested function alredy contained DC_FP_START()/DC_FP_END() calls, which results in nested FPU context enter/exit and complaints by kernel_fpu_begin_mask(). This can be observed e.g. with 5.10.20, which backported 41401ac67791 and now emits the following warning on boot: WARNING: CPU: 6 PID: 858 at arch/x86/kernel/fpu/core.c:129 kernel_fpu_begin_mask+0xa5/0xc0 Call Trace: dcn21_calculate_wm+0x47/0xa90 [amdgpu] dcn21_validate_bandwidth_fp+0x15d/0x2b0 [amdgpu] dcn21_validate_bandwidth+0x29/0x40 [amdgpu] dc_validate_global_state+0x3c7/0x4c0 [amdgpu] The warning is emitted due to the additional DC_FP_START/END calls in patch_bounding_box(), which is inlined into dcn21_calculate_wm(), its only caller. Removing the calls brings the code in line with dcn20 and makes the warning disappear. Fixes: 41401ac67791 ("drm/amd/display: Add FPU wrappers to dcn21_validate_bandwidth()") Signed-off-by: Holger Hoffstätte Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c index 072f8c8809243..68be73fe2e239 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c @@ -1062,8 +1062,6 @@ static void patch_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_s { int i; - DC_FP_START(); - if (dc->bb_overrides.sr_exit_time_ns) { for (i = 0; i < WM_SET_COUNT; i++) { dc->clk_mgr->bw_params->wm_table.entries[i].sr_exit_time_us = @@ -1088,8 +1086,6 @@ static void patch_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_s dc->bb_overrides.dram_clock_change_latency_ns / 1000.0; } } - - DC_FP_END(); } void dcn21_calculate_wm( -- GitLab From 680174cfd1e1cea70a8f30ccb44d8fbdf996018e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Holger=20Hoffst=C3=A4tte?= Date: Fri, 5 Mar 2021 15:23:18 +0100 Subject: [PATCH 540/839] drm/amdgpu/display: use GFP_ATOMIC in dcn21_validate_bandwidth_fp() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After fixing nested FPU contexts caused by 41401ac67791 we're still seeing complaints about spurious kernel_fpu_end(). As it turns out this was already fixed for dcn20 in commit f41ed88cbd ("drm/amdgpu/display: use GFP_ATOMIC in dcn20_validate_bandwidth_internal") but never moved forward to dcn21. Signed-off-by: Holger Hoffstätte Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c index 68be73fe2e239..94ee2cab26b7c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c @@ -1335,7 +1335,7 @@ static noinline bool dcn21_validate_bandwidth_fp(struct dc *dc, int vlevel = 0; int pipe_split_from[MAX_PIPES]; int pipe_cnt = 0; - display_e2e_pipe_params_st *pipes = kzalloc(dc->res_pool->pipe_count * sizeof(display_e2e_pipe_params_st), GFP_KERNEL); + display_e2e_pipe_params_st *pipes = kzalloc(dc->res_pool->pipe_count * sizeof(display_e2e_pipe_params_st), GFP_ATOMIC); DC_LOGGER_INIT(dc->ctx->logger); BW_VAL_TRACE_COUNT(); -- GitLab From 7afa0033d6f7fb8a84798ef99d1117661c4e696c Mon Sep 17 00:00:00 2001 From: Qingqing Zhuo Date: Fri, 19 Feb 2021 17:17:50 -0500 Subject: [PATCH 541/839] drm/amd/display: Enable pflip interrupt upon pipe enable [Why] pflip interrupt would not be enabled promptly if a pipe is disabled and re-enabled, causing flip_done timeout error during DP compliance tests [How] Enable pflip interrupt upon pipe enablement Tested-by: Daniel Wheeler Signed-off-by: Qingqing Zhuo Reviewed-by: Nicholas Kazlauskas Acked-by: Eryk Brol Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 1 + drivers/gpu/drm/amd/display/dc/dc.h | 1 + drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c | 11 +++++++++++ drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h | 6 ++++++ .../gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c | 7 +++++++ drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c | 1 + drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c | 6 ++++++ drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c | 1 + drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c | 1 + drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h | 2 ++ 10 files changed, 37 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 0523be612a48b..573cf17262da4 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4725,6 +4725,7 @@ static int fill_dc_plane_attributes(struct amdgpu_device *adev, dc_plane_state->global_alpha_value = plane_info.global_alpha_value; dc_plane_state->dcc = plane_info.dcc; dc_plane_state->layer_index = plane_info.layer_index; // Always returns 0 + dc_plane_state->flip_int_enabled = true; /* * Always set input transfer function, since plane state is refreshed diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 4eee3a55fa301..18ed0d3f247ea 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -887,6 +887,7 @@ struct dc_plane_state { int layer_index; union surface_update_flags update_flags; + bool flip_int_enabled; /* private to DC core */ struct dc_plane_status status; struct dc_context *ctx; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c index 9e796dfeac204..714c71a5fbde3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c @@ -1257,6 +1257,16 @@ void hubp1_soft_reset(struct hubp *hubp, bool reset) REG_UPDATE(DCHUBP_CNTL, HUBP_DISABLE, reset ? 1 : 0); } +void hubp1_set_flip_int(struct hubp *hubp) +{ + struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp); + + REG_UPDATE(DCSURF_SURFACE_FLIP_INTERRUPT, + SURFACE_FLIP_INT_MASK, 1); + + return; +} + void hubp1_init(struct hubp *hubp) { //do nothing @@ -1290,6 +1300,7 @@ static const struct hubp_funcs dcn10_hubp_funcs = { .dmdata_load = NULL, .hubp_soft_reset = hubp1_soft_reset, .hubp_in_blank = hubp1_in_blank, + .hubp_set_flip_int = hubp1_set_flip_int, }; /*****************************************/ diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h index a9a6ed7f4f993..e2f2f6995935f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h @@ -74,6 +74,7 @@ SRI(DCSURF_SURFACE_EARLIEST_INUSE_C, HUBPREQ, id),\ SRI(DCSURF_SURFACE_EARLIEST_INUSE_HIGH_C, HUBPREQ, id),\ SRI(DCSURF_SURFACE_CONTROL, HUBPREQ, id),\ + SRI(DCSURF_SURFACE_FLIP_INTERRUPT, HUBPREQ, id),\ SRI(HUBPRET_CONTROL, HUBPRET, id),\ SRI(DCN_EXPANSION_MODE, HUBPREQ, id),\ SRI(DCHUBP_REQ_SIZE_CONFIG, HUBP, id),\ @@ -183,6 +184,7 @@ uint32_t DCSURF_SURFACE_EARLIEST_INUSE_C; \ uint32_t DCSURF_SURFACE_EARLIEST_INUSE_HIGH_C; \ uint32_t DCSURF_SURFACE_CONTROL; \ + uint32_t DCSURF_SURFACE_FLIP_INTERRUPT; \ uint32_t HUBPRET_CONTROL; \ uint32_t DCN_EXPANSION_MODE; \ uint32_t DCHUBP_REQ_SIZE_CONFIG; \ @@ -332,6 +334,7 @@ HUBP_SF(HUBPREQ0_DCSURF_SURFACE_CONTROL, SECONDARY_META_SURFACE_TMZ_C, mask_sh),\ HUBP_SF(HUBPREQ0_DCSURF_SURFACE_CONTROL, SECONDARY_SURFACE_DCC_EN, mask_sh),\ HUBP_SF(HUBPREQ0_DCSURF_SURFACE_CONTROL, SECONDARY_SURFACE_DCC_IND_64B_BLK, mask_sh),\ + HUBP_SF(HUBPREQ0_DCSURF_SURFACE_FLIP_INTERRUPT, SURFACE_FLIP_INT_MASK, mask_sh),\ HUBP_SF(HUBPRET0_HUBPRET_CONTROL, DET_BUF_PLANE1_BASE_ADDRESS, mask_sh),\ HUBP_SF(HUBPRET0_HUBPRET_CONTROL, CROSSBAR_SRC_CB_B, mask_sh),\ HUBP_SF(HUBPRET0_HUBPRET_CONTROL, CROSSBAR_SRC_CR_R, mask_sh),\ @@ -531,6 +534,7 @@ type PRIMARY_SURFACE_DCC_IND_64B_BLK;\ type SECONDARY_SURFACE_DCC_EN;\ type SECONDARY_SURFACE_DCC_IND_64B_BLK;\ + type SURFACE_FLIP_INT_MASK;\ type DET_BUF_PLANE1_BASE_ADDRESS;\ type CROSSBAR_SRC_CB_B;\ type CROSSBAR_SRC_CR_R;\ @@ -777,4 +781,6 @@ void hubp1_read_state_common(struct hubp *hubp); bool hubp1_in_blank(struct hubp *hubp); void hubp1_soft_reset(struct hubp *hubp, bool reset); +void hubp1_set_flip_int(struct hubp *hubp); + #endif diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 89912bb5014f8..9ba5c624770de 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -2196,6 +2196,13 @@ static void dcn10_enable_plane( if (dc->debug.sanity_checks) { hws->funcs.verify_allow_pstate_change_high(dc); } + + if (!pipe_ctx->top_pipe + && pipe_ctx->plane_state + && pipe_ctx->plane_state->flip_int_enabled + && pipe_ctx->plane_res.hubp->funcs->hubp_set_flip_int) + pipe_ctx->plane_res.hubp->funcs->hubp_set_flip_int(pipe_ctx->plane_res.hubp); + } void dcn10_program_gamut_remap(struct pipe_ctx *pipe_ctx) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c index 0df0da2e6a4d0..bec7059f6d5d1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c @@ -1597,6 +1597,7 @@ static struct hubp_funcs dcn20_hubp_funcs = { .validate_dml_output = hubp2_validate_dml_output, .hubp_in_blank = hubp1_in_blank, .hubp_soft_reset = hubp1_soft_reset, + .hubp_set_flip_int = hubp1_set_flip_int, }; diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index 0726fb435e2a3..5342c309b78c7 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -1146,6 +1146,12 @@ void dcn20_enable_plane( pipe_ctx->plane_res.hubp->funcs->hubp_set_vm_system_aperture_settings(pipe_ctx->plane_res.hubp, &apt); } + if (!pipe_ctx->top_pipe + && pipe_ctx->plane_state + && pipe_ctx->plane_state->flip_int_enabled + && pipe_ctx->plane_res.hubp->funcs->hubp_set_flip_int) + pipe_ctx->plane_res.hubp->funcs->hubp_set_flip_int(pipe_ctx->plane_res.hubp); + // if (dc->debug.sanity_checks) { // dcn10_verify_allow_pstate_change_high(dc); // } diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c index f9045852728fe..b0c9180b808f6 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c @@ -838,6 +838,7 @@ static struct hubp_funcs dcn21_hubp_funcs = { .hubp_set_flip_control_surface_gsl = hubp2_set_flip_control_surface_gsl, .hubp_init = hubp21_init, .validate_dml_output = hubp21_validate_dml_output, + .hubp_set_flip_int = hubp1_set_flip_int, }; bool hubp21_construct( diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c index 88ffa9ff1ed15..f246125232482 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c @@ -511,6 +511,7 @@ static struct hubp_funcs dcn30_hubp_funcs = { .hubp_init = hubp3_init, .hubp_in_blank = hubp1_in_blank, .hubp_soft_reset = hubp1_soft_reset, + .hubp_set_flip_int = hubp1_set_flip_int, }; bool hubp3_construct( diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h index 22f3f643ed1b8..346dcd87dc106 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h @@ -191,6 +191,8 @@ struct hubp_funcs { bool (*hubp_in_blank)(struct hubp *hubp); void (*hubp_soft_reset)(struct hubp *hubp, bool reset); + void (*hubp_set_flip_int)(struct hubp *hubp); + }; #endif -- GitLab From b0075d114c33580f5c9fa9cee8e13d06db41471b Mon Sep 17 00:00:00 2001 From: Sung Lee Date: Fri, 26 Feb 2021 13:20:43 -0500 Subject: [PATCH 542/839] drm/amd/display: Revert dram_clock_change_latency for DCN2.1 [WHY & HOW] Using values provided by DF for latency may cause hangs in multi display configurations. Revert change to previous value. Tested-by: Daniel Wheeler Signed-off-by: Sung Lee Reviewed-by: Haonan Wang Acked-by: Eryk Brol Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c index 94ee2cab26b7c..173488ab787ac 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c @@ -296,7 +296,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn2_1_soc = { .num_banks = 8, .num_chans = 4, .vmm_page_size_bytes = 4096, - .dram_clock_change_latency_us = 11.72, + .dram_clock_change_latency_us = 23.84, .return_bus_width_bytes = 64, .dispclk_dppclk_vco_speed_mhz = 3600, .xfc_bus_transport_time_us = 4, -- GitLab From d2c91285958a3e77db99c352c136af4243f8f529 Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Fri, 19 Feb 2021 18:15:30 -0500 Subject: [PATCH 543/839] drm/amd/display: Enabled pipe harvesting in dcn30 [Why & How] Ported logic from dcn21 for reading in pipe fusing to dcn30. Supported configurations are 1 and 6 pipes. Invalid fusing will revert to 1 pipe being enabled. Tested-by: Daniel Wheeler Signed-off-by: Dillon Varone Reviewed-by: Jun Lei Acked-by: Eryk Brol Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dcn30/dcn30_resource.c | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c index 8d0f663489ac5..fb7f1dea3c467 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c @@ -2508,6 +2508,19 @@ static const struct resource_funcs dcn30_res_pool_funcs = { .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, }; +#define CTX ctx + +#define REG(reg_name) \ + (DCN_BASE.instance[0].segment[mm ## reg_name ## _BASE_IDX] + mm ## reg_name) + +static uint32_t read_pipe_fuses(struct dc_context *ctx) +{ + uint32_t value = REG_READ(CC_DC_PIPE_DIS); + /* Support for max 6 pipes */ + value = value & 0x3f; + return value; +} + static bool dcn30_resource_construct( uint8_t num_virtual_links, struct dc *dc, @@ -2517,6 +2530,15 @@ static bool dcn30_resource_construct( struct dc_context *ctx = dc->ctx; struct irq_service_init_data init_data; struct ddc_service_init_data ddc_init_data; + uint32_t pipe_fuses = read_pipe_fuses(ctx); + uint32_t num_pipes = 0; + + if (!(pipe_fuses == 0 || pipe_fuses == 0x3e)) { + BREAK_TO_DEBUGGER(); + dm_error("DC: Unexpected fuse recipe for navi2x !\n"); + /* fault to single pipe */ + pipe_fuses = 0x3e; + } DC_FP_START(); @@ -2650,6 +2672,15 @@ static bool dcn30_resource_construct( /* PP Lib and SMU interfaces */ init_soc_bounding_box(dc, pool); + num_pipes = dcn3_0_ip.max_num_dpp; + + for (i = 0; i < dcn3_0_ip.max_num_dpp; i++) + if (pipe_fuses & 1 << i) + num_pipes--; + + dcn3_0_ip.max_num_dpp = num_pipes; + dcn3_0_ip.max_num_otg = num_pipes; + dml_init_instance(&dc->dml, &dcn3_0_soc, &dcn3_0_ip, DML_PROJECT_DCN30); /* IRQ */ -- GitLab From eda29602f1a8b2b32d8c8c354232d9d1ee1c064d Mon Sep 17 00:00:00 2001 From: Zhan Liu Date: Mon, 8 Mar 2021 20:28:22 -0500 Subject: [PATCH 544/839] drm/amdgpu/display: Use wm_table.entries for dcn301 calculate_wm [Why] For DGPU Navi, the wm_table.nv_entries are used. These entires are not populated for DCN301 Vangogh APU, but instead wm_table.entries are. [How] Use DCN21 Renoir style wm calculations. Signed-off-by: Leo Li Signed-off-by: Zhan Liu Reviewed-by: Dmytro Laktyushkin Acked-by: Zhan Liu Signed-off-by: Alex Deucher --- .../amd/display/dc/dcn301/dcn301_resource.c | 96 ++++++++++++++++++- 1 file changed, 95 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c index 5d4b2c60192ee..c494235016e09 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c @@ -1619,12 +1619,106 @@ static void dcn301_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *b dml_init_instance(&dc->dml, &dcn3_01_soc, &dcn3_01_ip, DML_PROJECT_DCN30); } +static void calculate_wm_set_for_vlevel( + int vlevel, + struct wm_range_table_entry *table_entry, + struct dcn_watermarks *wm_set, + struct display_mode_lib *dml, + display_e2e_pipe_params_st *pipes, + int pipe_cnt) +{ + double dram_clock_change_latency_cached = dml->soc.dram_clock_change_latency_us; + + ASSERT(vlevel < dml->soc.num_states); + /* only pipe 0 is read for voltage and dcf/soc clocks */ + pipes[0].clks_cfg.voltage = vlevel; + pipes[0].clks_cfg.dcfclk_mhz = dml->soc.clock_limits[vlevel].dcfclk_mhz; + pipes[0].clks_cfg.socclk_mhz = dml->soc.clock_limits[vlevel].socclk_mhz; + + dml->soc.dram_clock_change_latency_us = table_entry->pstate_latency_us; + dml->soc.sr_exit_time_us = table_entry->sr_exit_time_us; + dml->soc.sr_enter_plus_exit_time_us = table_entry->sr_enter_plus_exit_time_us; + + wm_set->urgent_ns = get_wm_urgent(dml, pipes, pipe_cnt) * 1000; + wm_set->cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(dml, pipes, pipe_cnt) * 1000; + wm_set->cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(dml, pipes, pipe_cnt) * 1000; + wm_set->cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(dml, pipes, pipe_cnt) * 1000; + wm_set->pte_meta_urgent_ns = get_wm_memory_trip(dml, pipes, pipe_cnt) * 1000; + wm_set->frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(dml, pipes, pipe_cnt) * 1000; + wm_set->frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(dml, pipes, pipe_cnt) * 1000; + wm_set->urgent_latency_ns = get_urgent_latency(dml, pipes, pipe_cnt) * 1000; + dml->soc.dram_clock_change_latency_us = dram_clock_change_latency_cached; + +} + +static void dcn301_calculate_wm_and_dlg( + struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, + int vlevel_req) +{ + int i, pipe_idx; + int vlevel, vlevel_max; + struct wm_range_table_entry *table_entry; + struct clk_bw_params *bw_params = dc->clk_mgr->bw_params; + + ASSERT(bw_params); + + vlevel_max = bw_params->clk_table.num_entries - 1; + + /* WM Set D */ + table_entry = &bw_params->wm_table.entries[WM_D]; + if (table_entry->wm_type == WM_TYPE_RETRAINING) + vlevel = 0; + else + vlevel = vlevel_max; + calculate_wm_set_for_vlevel(vlevel, table_entry, &context->bw_ctx.bw.dcn.watermarks.d, + &context->bw_ctx.dml, pipes, pipe_cnt); + /* WM Set C */ + table_entry = &bw_params->wm_table.entries[WM_C]; + vlevel = min(max(vlevel_req, 2), vlevel_max); + calculate_wm_set_for_vlevel(vlevel, table_entry, &context->bw_ctx.bw.dcn.watermarks.c, + &context->bw_ctx.dml, pipes, pipe_cnt); + /* WM Set B */ + table_entry = &bw_params->wm_table.entries[WM_B]; + vlevel = min(max(vlevel_req, 1), vlevel_max); + calculate_wm_set_for_vlevel(vlevel, table_entry, &context->bw_ctx.bw.dcn.watermarks.b, + &context->bw_ctx.dml, pipes, pipe_cnt); + + /* WM Set A */ + table_entry = &bw_params->wm_table.entries[WM_A]; + vlevel = min(vlevel_req, vlevel_max); + calculate_wm_set_for_vlevel(vlevel, table_entry, &context->bw_ctx.bw.dcn.watermarks.a, + &context->bw_ctx.dml, pipes, pipe_cnt); + + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + if (!context->res_ctx.pipe_ctx[i].stream) + continue; + + pipes[pipe_idx].clks_cfg.dispclk_mhz = get_dispclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt); + pipes[pipe_idx].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); + + if (dc->config.forced_clocks) { + pipes[pipe_idx].clks_cfg.dispclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dispclk_mhz; + pipes[pipe_idx].clks_cfg.dppclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dppclk_mhz; + } + if (dc->debug.min_disp_clk_khz > pipes[pipe_idx].clks_cfg.dispclk_mhz * 1000) + pipes[pipe_idx].clks_cfg.dispclk_mhz = dc->debug.min_disp_clk_khz / 1000.0; + if (dc->debug.min_dpp_clk_khz > pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000) + pipes[pipe_idx].clks_cfg.dppclk_mhz = dc->debug.min_dpp_clk_khz / 1000.0; + + pipe_idx++; + } + + dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel); +} + static struct resource_funcs dcn301_res_pool_funcs = { .destroy = dcn301_destroy_resource_pool, .link_enc_create = dcn301_link_encoder_create, .panel_cntl_create = dcn301_panel_cntl_create, .validate_bandwidth = dcn30_validate_bandwidth, - .calculate_wm_and_dlg = dcn30_calculate_wm_and_dlg, + .calculate_wm_and_dlg = dcn301_calculate_wm_and_dlg, .populate_dml_pipes = dcn30_populate_dml_pipes_from_context, .acquire_idle_pipe_for_layer = dcn20_acquire_idle_pipe_for_layer, .add_stream_to_ctx = dcn30_add_stream_to_ctx, -- GitLab From 521f04f9e3ffc73ef96c776035f8a0a31b4cdd81 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Mon, 8 Mar 2021 15:22:22 +0100 Subject: [PATCH 545/839] drm/amdgpu: fb BO should be ttm_bo_type_device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit FB BO should not be ttm_bo_type_kernel type and amdgpufb_create_pinned_object() pins the FB BO anyway. Signed-off-by: Nirmoy Das Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c index 51cd49c6f38fd..24010cacf7d0e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c @@ -146,7 +146,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev, size = mode_cmd->pitches[0] * height; aligned_size = ALIGN(size, PAGE_SIZE); ret = amdgpu_gem_object_create(adev, aligned_size, 0, domain, flags, - ttm_bo_type_kernel, NULL, &gobj); + ttm_bo_type_device, NULL, &gobj); if (ret) { pr_err("failed to allocate framebuffer (%d)\n", aligned_size); return -ENOMEM; -- GitLab From 50ceb1fe7acd50831180f4b5597bf7b39e8059c8 Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Tue, 9 Mar 2021 21:10:16 +0800 Subject: [PATCH 546/839] drm/amd/pm: bug fix for pcie dpm Currently the pcie dpm has two problems. 1. Only the high dpm level speed/width can be overrided if the requested values are out of the pcie capability. 2. The high dpm level is always overrided though sometimes it's not necesarry. Signed-off-by: Kenneth Feng Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- .../drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c | 48 ++++++++++++++ .../drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c | 66 +++++++++++++++++++ .../drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c | 48 ++++++++------ 3 files changed, 141 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c index 29c99642d22d4..22b636e2b89be 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c @@ -1505,6 +1505,48 @@ static int vega10_populate_single_lclk_level(struct pp_hwmgr *hwmgr, return 0; } +static int vega10_override_pcie_parameters(struct pp_hwmgr *hwmgr) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev); + struct vega10_hwmgr *data = + (struct vega10_hwmgr *)(hwmgr->backend); + uint32_t pcie_gen = 0, pcie_width = 0; + PPTable_t *pp_table = &(data->smc_state_table.pp_table); + int i; + + if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4) + pcie_gen = 3; + else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) + pcie_gen = 2; + else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2) + pcie_gen = 1; + else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1) + pcie_gen = 0; + + if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X16) + pcie_width = 6; + else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X12) + pcie_width = 5; + else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X8) + pcie_width = 4; + else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X4) + pcie_width = 3; + else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X2) + pcie_width = 2; + else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X1) + pcie_width = 1; + + for (i = 0; i < NUM_LINK_LEVELS; i++) { + if (pp_table->PcieGenSpeed[i] > pcie_gen) + pp_table->PcieGenSpeed[i] = pcie_gen; + + if (pp_table->PcieLaneCount[i] > pcie_width) + pp_table->PcieLaneCount[i] = pcie_width; + } + + return 0; +} + static int vega10_populate_smc_link_levels(struct pp_hwmgr *hwmgr) { int result = -1; @@ -2556,6 +2598,11 @@ static int vega10_init_smc_table(struct pp_hwmgr *hwmgr) "Failed to initialize Link Level!", return result); + result = vega10_override_pcie_parameters(hwmgr); + PP_ASSERT_WITH_CODE(!result, + "Failed to override pcie parameters!", + return result); + result = vega10_populate_all_graphic_levels(hwmgr); PP_ASSERT_WITH_CODE(!result, "Failed to initialize Graphics Level!", @@ -2922,6 +2969,7 @@ static int vega10_start_dpm(struct pp_hwmgr *hwmgr, uint32_t bitmap) return 0; } + static int vega10_enable_disable_PCC_limit_feature(struct pp_hwmgr *hwmgr, bool enable) { struct vega10_hwmgr *data = hwmgr->backend; diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c index c0753029a8e2a..43e01d880f7ce 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c @@ -481,6 +481,67 @@ static void vega12_init_dpm_state(struct vega12_dpm_state *dpm_state) dpm_state->hard_max_level = 0xffff; } +static int vega12_override_pcie_parameters(struct pp_hwmgr *hwmgr) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev); + struct vega12_hwmgr *data = + (struct vega12_hwmgr *)(hwmgr->backend); + uint32_t pcie_gen = 0, pcie_width = 0, smu_pcie_arg, pcie_gen_arg, pcie_width_arg; + PPTable_t *pp_table = &(data->smc_state_table.pp_table); + int i; + int ret; + + if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4) + pcie_gen = 3; + else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) + pcie_gen = 2; + else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2) + pcie_gen = 1; + else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1) + pcie_gen = 0; + + if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X16) + pcie_width = 6; + else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X12) + pcie_width = 5; + else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X8) + pcie_width = 4; + else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X4) + pcie_width = 3; + else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X2) + pcie_width = 2; + else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X1) + pcie_width = 1; + + /* Bit 31:16: LCLK DPM level. 0 is DPM0, and 1 is DPM1 + * Bit 15:8: PCIE GEN, 0 to 3 corresponds to GEN1 to GEN4 + * Bit 7:0: PCIE lane width, 1 to 7 corresponds is x1 to x32 + */ + for (i = 0; i < NUM_LINK_LEVELS; i++) { + pcie_gen_arg = (pp_table->PcieGenSpeed[i] > pcie_gen) ? pcie_gen : + pp_table->PcieGenSpeed[i]; + pcie_width_arg = (pp_table->PcieLaneCount[i] > pcie_width) ? pcie_width : + pp_table->PcieLaneCount[i]; + + if (pcie_gen_arg != pp_table->PcieGenSpeed[i] || pcie_width_arg != + pp_table->PcieLaneCount[i]) { + smu_pcie_arg = (i << 16) | (pcie_gen_arg << 8) | pcie_width_arg; + ret = smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_OverridePcieParameters, smu_pcie_arg, + NULL); + PP_ASSERT_WITH_CODE(!ret, + "[OverridePcieParameters] Attempt to override pcie params failed!", + return ret); + } + + /* update the pptable */ + pp_table->PcieGenSpeed[i] = pcie_gen_arg; + pp_table->PcieLaneCount[i] = pcie_width_arg; + } + + return 0; +} + static int vega12_get_number_of_dpm_level(struct pp_hwmgr *hwmgr, PPCLK_e clk_id, uint32_t *num_of_levels) { @@ -968,6 +1029,11 @@ static int vega12_enable_dpm_tasks(struct pp_hwmgr *hwmgr) "Failed to enable all smu features!", return result); + result = vega12_override_pcie_parameters(hwmgr); + PP_ASSERT_WITH_CODE(!result, + "[EnableDPMTasks] Failed to override pcie parameters!", + return result); + tmp_result = vega12_power_control_set_level(hwmgr); PP_ASSERT_WITH_CODE(!tmp_result, "Failed to power control set level!", diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c index 87811b005b85f..f19964c69a002 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c @@ -831,7 +831,9 @@ static int vega20_override_pcie_parameters(struct pp_hwmgr *hwmgr) struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev); struct vega20_hwmgr *data = (struct vega20_hwmgr *)(hwmgr->backend); - uint32_t pcie_gen = 0, pcie_width = 0, smu_pcie_arg; + uint32_t pcie_gen = 0, pcie_width = 0, smu_pcie_arg, pcie_gen_arg, pcie_width_arg; + PPTable_t *pp_table = &(data->smc_state_table.pp_table); + int i; int ret; if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4) @@ -860,17 +862,27 @@ static int vega20_override_pcie_parameters(struct pp_hwmgr *hwmgr) * Bit 15:8: PCIE GEN, 0 to 3 corresponds to GEN1 to GEN4 * Bit 7:0: PCIE lane width, 1 to 7 corresponds is x1 to x32 */ - smu_pcie_arg = (1 << 16) | (pcie_gen << 8) | pcie_width; - ret = smum_send_msg_to_smc_with_parameter(hwmgr, - PPSMC_MSG_OverridePcieParameters, smu_pcie_arg, - NULL); - PP_ASSERT_WITH_CODE(!ret, - "[OverridePcieParameters] Attempt to override pcie params failed!", - return ret); + for (i = 0; i < NUM_LINK_LEVELS; i++) { + pcie_gen_arg = (pp_table->PcieGenSpeed[i] > pcie_gen) ? pcie_gen : + pp_table->PcieGenSpeed[i]; + pcie_width_arg = (pp_table->PcieLaneCount[i] > pcie_width) ? pcie_width : + pp_table->PcieLaneCount[i]; + + if (pcie_gen_arg != pp_table->PcieGenSpeed[i] || pcie_width_arg != + pp_table->PcieLaneCount[i]) { + smu_pcie_arg = (i << 16) | (pcie_gen_arg << 8) | pcie_width_arg; + ret = smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_OverridePcieParameters, smu_pcie_arg, + NULL); + PP_ASSERT_WITH_CODE(!ret, + "[OverridePcieParameters] Attempt to override pcie params failed!", + return ret); + } - data->pcie_parameters_override = true; - data->pcie_gen_level1 = pcie_gen; - data->pcie_width_level1 = pcie_width; + /* update the pptable */ + pp_table->PcieGenSpeed[i] = pcie_gen_arg; + pp_table->PcieLaneCount[i] = pcie_width_arg; + } return 0; } @@ -3319,9 +3331,7 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr, data->od8_settings.od8_settings_array; OverDriveTable_t *od_table = &(data->smc_state_table.overdrive_table); - struct phm_ppt_v3_information *pptable_information = - (struct phm_ppt_v3_information *)hwmgr->pptable; - PPTable_t *pptable = (PPTable_t *)pptable_information->smc_pptable; + PPTable_t *pptable = &(data->smc_state_table.pp_table); struct pp_clock_levels_with_latency clocks; struct vega20_single_dpm_table *fclk_dpm_table = &(data->dpm_table.fclk_table); @@ -3420,13 +3430,9 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr, current_lane_width = vega20_get_current_pcie_link_width_level(hwmgr); for (i = 0; i < NUM_LINK_LEVELS; i++) { - if (i == 1 && data->pcie_parameters_override) { - gen_speed = data->pcie_gen_level1; - lane_width = data->pcie_width_level1; - } else { - gen_speed = pptable->PcieGenSpeed[i]; - lane_width = pptable->PcieLaneCount[i]; - } + gen_speed = pptable->PcieGenSpeed[i]; + lane_width = pptable->PcieLaneCount[i]; + size += sprintf(buf + size, "%d: %s %s %dMhz %s\n", i, (gen_speed == 0) ? "2.5GT/s," : (gen_speed == 1) ? "5.0GT/s," : -- GitLab From 48123d068fcb584838ce29912660c5e9490bad0e Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Fri, 5 Mar 2021 14:21:26 +0800 Subject: [PATCH 547/839] drm/amd/pm: correct the watermark settings for Polaris The "/ 10" should be applied to the right-hand operand instead of the left-hand one. Signed-off-by: Evan Quan Noticed-by: Georgios Toptsidis Reviewed-by: Feifei Xu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c index c57dc9ae81f2f..a2681fe875ed6 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c @@ -5216,10 +5216,10 @@ static int smu7_set_watermarks_for_clocks_ranges(struct pp_hwmgr *hwmgr, for (j = 0; j < dep_sclk_table->count; j++) { valid_entry = false; for (k = 0; k < watermarks->num_wm_sets; k++) { - if (dep_sclk_table->entries[i].clk / 10 >= watermarks->wm_clk_ranges[k].wm_min_eng_clk_in_khz && - dep_sclk_table->entries[i].clk / 10 < watermarks->wm_clk_ranges[k].wm_max_eng_clk_in_khz && - dep_mclk_table->entries[i].clk / 10 >= watermarks->wm_clk_ranges[k].wm_min_mem_clk_in_khz && - dep_mclk_table->entries[i].clk / 10 < watermarks->wm_clk_ranges[k].wm_max_mem_clk_in_khz) { + if (dep_sclk_table->entries[i].clk >= watermarks->wm_clk_ranges[k].wm_min_eng_clk_in_khz / 10 && + dep_sclk_table->entries[i].clk < watermarks->wm_clk_ranges[k].wm_max_eng_clk_in_khz / 10 && + dep_mclk_table->entries[i].clk >= watermarks->wm_clk_ranges[k].wm_min_mem_clk_in_khz / 10 && + dep_mclk_table->entries[i].clk < watermarks->wm_clk_ranges[k].wm_max_mem_clk_in_khz / 10) { valid_entry = true; table->DisplayWatermark[i][j] = watermarks->wm_clk_ranges[k].wm_set_id; break; -- GitLab From a25955ba123499d7db520175c6be59c29f9215e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 8 Mar 2021 19:35:14 +0100 Subject: [PATCH 548/839] drm/radeon: also init GEM funcs in radeon_gem_prime_import_sg_table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise we will run into a NULL ptr deref. Signed-off-by: Christian König Bug: https://bugzilla.kernel.org/show_bug.cgi?id=212137 Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 5.11.x --- drivers/gpu/drm/radeon/radeon.h | 2 ++ drivers/gpu/drm/radeon/radeon_gem.c | 4 ++-- drivers/gpu/drm/radeon/radeon_prime.c | 2 ++ 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index f09989bdce984..3effc8c71494c 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -574,6 +574,8 @@ struct radeon_gem { struct list_head objects; }; +extern const struct drm_gem_object_funcs radeon_gem_object_funcs; + int radeon_gem_init(struct radeon_device *rdev); void radeon_gem_fini(struct radeon_device *rdev); int radeon_gem_object_create(struct radeon_device *rdev, unsigned long size, diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index 9418269232473..db14a82a2e4b2 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -43,7 +43,7 @@ struct sg_table *radeon_gem_prime_get_sg_table(struct drm_gem_object *obj); int radeon_gem_prime_pin(struct drm_gem_object *obj); void radeon_gem_prime_unpin(struct drm_gem_object *obj); -static const struct drm_gem_object_funcs radeon_gem_object_funcs; +const struct drm_gem_object_funcs radeon_gem_object_funcs; static void radeon_gem_object_free(struct drm_gem_object *gobj) { @@ -227,7 +227,7 @@ static int radeon_gem_handle_lockup(struct radeon_device *rdev, int r) return r; } -static const struct drm_gem_object_funcs radeon_gem_object_funcs = { +const struct drm_gem_object_funcs radeon_gem_object_funcs = { .free = radeon_gem_object_free, .open = radeon_gem_object_open, .close = radeon_gem_object_close, diff --git a/drivers/gpu/drm/radeon/radeon_prime.c b/drivers/gpu/drm/radeon/radeon_prime.c index ab29eb9e86675..42a87948e28c5 100644 --- a/drivers/gpu/drm/radeon/radeon_prime.c +++ b/drivers/gpu/drm/radeon/radeon_prime.c @@ -56,6 +56,8 @@ struct drm_gem_object *radeon_gem_prime_import_sg_table(struct drm_device *dev, if (ret) return ERR_PTR(ret); + bo->tbo.base.funcs = &radeon_gem_object_funcs; + mutex_lock(&rdev->gem.mutex); list_add_tail(&bo->list, &rdev->gem.objects); mutex_unlock(&rdev->gem.mutex); -- GitLab From cba2afb65cb05c3d197d17323fee4e3c9edef9cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 8 Mar 2021 19:22:13 +0100 Subject: [PATCH 549/839] drm/radeon: fix AGP dependency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When AGP is compiled as module radeon must be compiled as module as well. Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 8bf103de1594c..19fc4ce62a94e 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -225,6 +225,7 @@ source "drivers/gpu/drm/arm/Kconfig" config DRM_RADEON tristate "ATI Radeon" depends on DRM && PCI && MMU + depends on AGP || !AGP select FW_LOADER select DRM_KMS_HELPER select DRM_TTM -- GitLab From a5cb3c1a36376c25cd25fd3e99918dc48ac420bb Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 9 Mar 2021 22:58:47 -0500 Subject: [PATCH 550/839] drm/amdgpu: fix S0ix handling when the CONFIG_AMD_PMC=m Need to check the module variant as well. Acked-by: Prike Liang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index 36a741d63ddcf..2e9b16fb3fcd1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -903,7 +903,7 @@ void amdgpu_acpi_fini(struct amdgpu_device *adev) */ bool amdgpu_acpi_is_s0ix_supported(struct amdgpu_device *adev) { -#if defined(CONFIG_AMD_PMC) +#if defined(CONFIG_AMD_PMC) || defined(CONFIG_AMD_PMC_MODULE) if (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0) { if (adev->flags & AMD_IS_APU) return true; -- GitLab From 25da4618af240fbec6112401498301a6f2bc9702 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Sat, 6 Mar 2021 17:18:32 +0100 Subject: [PATCH 551/839] xen/events: don't unmask an event channel when an eoi is pending An event channel should be kept masked when an eoi is pending for it. When being migrated to another cpu it might be unmasked, though. In order to avoid this keep three different flags for each event channel to be able to distinguish "normal" masking/unmasking from eoi related masking/unmasking and temporary masking. The event channel should only be able to generate an interrupt if all flags are cleared. Cc: stable@vger.kernel.org Fixes: 54c9de89895e ("xen/events: add a new "late EOI" evtchn framework") Reported-by: Julien Grall Signed-off-by: Juergen Gross Reviewed-by: Julien Grall Reviewed-by: Boris Ostrovsky Tested-by: Ross Lagerwall Link: https://lore.kernel.org/r/20210306161833.4552-3-jgross@suse.com [boris -- corrected Fixed tag format] Signed-off-by: Boris Ostrovsky --- drivers/xen/events/events_2l.c | 7 -- drivers/xen/events/events_base.c | 101 +++++++++++++++++++++------ drivers/xen/events/events_fifo.c | 7 -- drivers/xen/events/events_internal.h | 6 -- 4 files changed, 80 insertions(+), 41 deletions(-) diff --git a/drivers/xen/events/events_2l.c b/drivers/xen/events/events_2l.c index a7f413c5c1908..b8f2f971c2f0f 100644 --- a/drivers/xen/events/events_2l.c +++ b/drivers/xen/events/events_2l.c @@ -77,12 +77,6 @@ static bool evtchn_2l_is_pending(evtchn_port_t port) return sync_test_bit(port, BM(&s->evtchn_pending[0])); } -static bool evtchn_2l_test_and_set_mask(evtchn_port_t port) -{ - struct shared_info *s = HYPERVISOR_shared_info; - return sync_test_and_set_bit(port, BM(&s->evtchn_mask[0])); -} - static void evtchn_2l_mask(evtchn_port_t port) { struct shared_info *s = HYPERVISOR_shared_info; @@ -376,7 +370,6 @@ static const struct evtchn_ops evtchn_ops_2l = { .clear_pending = evtchn_2l_clear_pending, .set_pending = evtchn_2l_set_pending, .is_pending = evtchn_2l_is_pending, - .test_and_set_mask = evtchn_2l_test_and_set_mask, .mask = evtchn_2l_mask, .unmask = evtchn_2l_unmask, .handle_events = evtchn_2l_handle_events, diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 7e23808892a72..b27c012c86b58 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -98,13 +98,18 @@ struct irq_info { short refcnt; u8 spurious_cnt; u8 is_accounted; - enum xen_irq_type type; /* type */ + short type; /* type: IRQT_* */ + u8 mask_reason; /* Why is event channel masked */ +#define EVT_MASK_REASON_EXPLICIT 0x01 +#define EVT_MASK_REASON_TEMPORARY 0x02 +#define EVT_MASK_REASON_EOI_PENDING 0x04 unsigned irq; evtchn_port_t evtchn; /* event channel */ unsigned short cpu; /* cpu bound */ unsigned short eoi_cpu; /* EOI must happen on this cpu-1 */ unsigned int irq_epoch; /* If eoi_cpu valid: irq_epoch of event */ u64 eoi_time; /* Time in jiffies when to EOI. */ + spinlock_t lock; union { unsigned short virq; @@ -154,6 +159,7 @@ static DEFINE_RWLOCK(evtchn_rwlock); * evtchn_rwlock * IRQ-desc lock * percpu eoi_list_lock + * irq_info->lock */ static LIST_HEAD(xen_irq_list_head); @@ -304,6 +310,8 @@ static int xen_irq_info_common_setup(struct irq_info *info, info->irq = irq; info->evtchn = evtchn; info->cpu = cpu; + info->mask_reason = EVT_MASK_REASON_EXPLICIT; + spin_lock_init(&info->lock); ret = set_evtchn_to_irq(evtchn, irq); if (ret < 0) @@ -459,6 +467,34 @@ unsigned int cpu_from_evtchn(evtchn_port_t evtchn) return ret; } +static void do_mask(struct irq_info *info, u8 reason) +{ + unsigned long flags; + + spin_lock_irqsave(&info->lock, flags); + + if (!info->mask_reason) + mask_evtchn(info->evtchn); + + info->mask_reason |= reason; + + spin_unlock_irqrestore(&info->lock, flags); +} + +static void do_unmask(struct irq_info *info, u8 reason) +{ + unsigned long flags; + + spin_lock_irqsave(&info->lock, flags); + + info->mask_reason &= ~reason; + + if (!info->mask_reason) + unmask_evtchn(info->evtchn); + + spin_unlock_irqrestore(&info->lock, flags); +} + #ifdef CONFIG_X86 static bool pirq_check_eoi_map(unsigned irq) { @@ -605,7 +641,7 @@ static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious) } info->eoi_time = 0; - unmask_evtchn(evtchn); + do_unmask(info, EVT_MASK_REASON_EOI_PENDING); } static void xen_irq_lateeoi_worker(struct work_struct *work) @@ -850,7 +886,8 @@ static unsigned int __startup_pirq(unsigned int irq) goto err; out: - unmask_evtchn(evtchn); + do_unmask(info, EVT_MASK_REASON_EXPLICIT); + eoi_pirq(irq_get_irq_data(irq)); return 0; @@ -877,7 +914,7 @@ static void shutdown_pirq(struct irq_data *data) if (!VALID_EVTCHN(evtchn)) return; - mask_evtchn(evtchn); + do_mask(info, EVT_MASK_REASON_EXPLICIT); xen_evtchn_close(evtchn); xen_irq_info_cleanup(info); } @@ -1721,10 +1758,10 @@ void rebind_evtchn_irq(evtchn_port_t evtchn, int irq) } /* Rebind an evtchn so that it gets delivered to a specific cpu */ -static int xen_rebind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int tcpu) +static int xen_rebind_evtchn_to_cpu(struct irq_info *info, unsigned int tcpu) { struct evtchn_bind_vcpu bind_vcpu; - int masked; + evtchn_port_t evtchn = info ? info->evtchn : 0; if (!VALID_EVTCHN(evtchn)) return -1; @@ -1740,7 +1777,7 @@ static int xen_rebind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int tcpu) * Mask the event while changing the VCPU binding to prevent * it being delivered on an unexpected VCPU. */ - masked = test_and_set_mask(evtchn); + do_mask(info, EVT_MASK_REASON_TEMPORARY); /* * If this fails, it usually just indicates that we're dealing with a @@ -1750,8 +1787,7 @@ static int xen_rebind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int tcpu) if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0) bind_evtchn_to_cpu(evtchn, tcpu, false); - if (!masked) - unmask_evtchn(evtchn); + do_unmask(info, EVT_MASK_REASON_TEMPORARY); return 0; } @@ -1790,7 +1826,7 @@ static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest, unsigned int tcpu = select_target_cpu(dest); int ret; - ret = xen_rebind_evtchn_to_cpu(evtchn_from_irq(data->irq), tcpu); + ret = xen_rebind_evtchn_to_cpu(info_for_irq(data->irq), tcpu); if (!ret) irq_data_update_effective_affinity(data, cpumask_of(tcpu)); @@ -1799,18 +1835,20 @@ static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest, static void enable_dynirq(struct irq_data *data) { - evtchn_port_t evtchn = evtchn_from_irq(data->irq); + struct irq_info *info = info_for_irq(data->irq); + evtchn_port_t evtchn = info ? info->evtchn : 0; if (VALID_EVTCHN(evtchn)) - unmask_evtchn(evtchn); + do_unmask(info, EVT_MASK_REASON_EXPLICIT); } static void disable_dynirq(struct irq_data *data) { - evtchn_port_t evtchn = evtchn_from_irq(data->irq); + struct irq_info *info = info_for_irq(data->irq); + evtchn_port_t evtchn = info ? info->evtchn : 0; if (VALID_EVTCHN(evtchn)) - mask_evtchn(evtchn); + do_mask(info, EVT_MASK_REASON_EXPLICIT); } static void ack_dynirq(struct irq_data *data) @@ -1829,18 +1867,39 @@ static void mask_ack_dynirq(struct irq_data *data) ack_dynirq(data); } +static void lateeoi_ack_dynirq(struct irq_data *data) +{ + struct irq_info *info = info_for_irq(data->irq); + evtchn_port_t evtchn = info ? info->evtchn : 0; + + if (VALID_EVTCHN(evtchn)) { + do_mask(info, EVT_MASK_REASON_EOI_PENDING); + clear_evtchn(evtchn); + } +} + +static void lateeoi_mask_ack_dynirq(struct irq_data *data) +{ + struct irq_info *info = info_for_irq(data->irq); + evtchn_port_t evtchn = info ? info->evtchn : 0; + + if (VALID_EVTCHN(evtchn)) { + do_mask(info, EVT_MASK_REASON_EXPLICIT); + clear_evtchn(evtchn); + } +} + static int retrigger_dynirq(struct irq_data *data) { - evtchn_port_t evtchn = evtchn_from_irq(data->irq); - int masked; + struct irq_info *info = info_for_irq(data->irq); + evtchn_port_t evtchn = info ? info->evtchn : 0; if (!VALID_EVTCHN(evtchn)) return 0; - masked = test_and_set_mask(evtchn); + do_mask(info, EVT_MASK_REASON_TEMPORARY); set_evtchn(evtchn); - if (!masked) - unmask_evtchn(evtchn); + do_unmask(info, EVT_MASK_REASON_TEMPORARY); return 1; } @@ -2054,8 +2113,8 @@ static struct irq_chip xen_lateeoi_chip __read_mostly = { .irq_mask = disable_dynirq, .irq_unmask = enable_dynirq, - .irq_ack = mask_ack_dynirq, - .irq_mask_ack = mask_ack_dynirq, + .irq_ack = lateeoi_ack_dynirq, + .irq_mask_ack = lateeoi_mask_ack_dynirq, .irq_set_affinity = set_affinity_irq, .irq_retrigger = retrigger_dynirq, diff --git a/drivers/xen/events/events_fifo.c b/drivers/xen/events/events_fifo.c index b234f1766810c..ad9fe51d3fb33 100644 --- a/drivers/xen/events/events_fifo.c +++ b/drivers/xen/events/events_fifo.c @@ -209,12 +209,6 @@ static bool evtchn_fifo_is_pending(evtchn_port_t port) return sync_test_bit(EVTCHN_FIFO_BIT(PENDING, word), BM(word)); } -static bool evtchn_fifo_test_and_set_mask(evtchn_port_t port) -{ - event_word_t *word = event_word_from_port(port); - return sync_test_and_set_bit(EVTCHN_FIFO_BIT(MASKED, word), BM(word)); -} - static void evtchn_fifo_mask(evtchn_port_t port) { event_word_t *word = event_word_from_port(port); @@ -423,7 +417,6 @@ static const struct evtchn_ops evtchn_ops_fifo = { .clear_pending = evtchn_fifo_clear_pending, .set_pending = evtchn_fifo_set_pending, .is_pending = evtchn_fifo_is_pending, - .test_and_set_mask = evtchn_fifo_test_and_set_mask, .mask = evtchn_fifo_mask, .unmask = evtchn_fifo_unmask, .handle_events = evtchn_fifo_handle_events, diff --git a/drivers/xen/events/events_internal.h b/drivers/xen/events/events_internal.h index 18a4090d07099..4d3398eff9cdf 100644 --- a/drivers/xen/events/events_internal.h +++ b/drivers/xen/events/events_internal.h @@ -21,7 +21,6 @@ struct evtchn_ops { void (*clear_pending)(evtchn_port_t port); void (*set_pending)(evtchn_port_t port); bool (*is_pending)(evtchn_port_t port); - bool (*test_and_set_mask)(evtchn_port_t port); void (*mask)(evtchn_port_t port); void (*unmask)(evtchn_port_t port); @@ -84,11 +83,6 @@ static inline bool test_evtchn(evtchn_port_t port) return evtchn_ops->is_pending(port); } -static inline bool test_and_set_mask(evtchn_port_t port) -{ - return evtchn_ops->test_and_set_mask(port); -} - static inline void mask_evtchn(evtchn_port_t port) { return evtchn_ops->mask(port); -- GitLab From b6622798bc50b625a1e62f82c7190df40c1f5b21 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Sat, 6 Mar 2021 17:18:33 +0100 Subject: [PATCH 552/839] xen/events: avoid handling the same event on two cpus at the same time When changing the cpu affinity of an event it can happen today that (with some unlucky timing) the same event will be handled on the old and the new cpu at the same time. Avoid that by adding an "event active" flag to the per-event data and call the handler only if this flag isn't set. Cc: stable@vger.kernel.org Reported-by: Julien Grall Signed-off-by: Juergen Gross Reviewed-by: Julien Grall Link: https://lore.kernel.org/r/20210306161833.4552-4-jgross@suse.com Signed-off-by: Boris Ostrovsky --- drivers/xen/events/events_base.c | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index b27c012c86b58..8236e2364eeb4 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -103,6 +103,7 @@ struct irq_info { #define EVT_MASK_REASON_EXPLICIT 0x01 #define EVT_MASK_REASON_TEMPORARY 0x02 #define EVT_MASK_REASON_EOI_PENDING 0x04 + u8 is_active; /* Is event just being handled? */ unsigned irq; evtchn_port_t evtchn; /* event channel */ unsigned short cpu; /* cpu bound */ @@ -810,6 +811,12 @@ static void xen_evtchn_close(evtchn_port_t port) BUG(); } +static void event_handler_exit(struct irq_info *info) +{ + smp_store_release(&info->is_active, 0); + clear_evtchn(info->evtchn); +} + static void pirq_query_unmask(int irq) { struct physdev_irq_status_query irq_status; @@ -828,14 +835,15 @@ static void pirq_query_unmask(int irq) static void eoi_pirq(struct irq_data *data) { - evtchn_port_t evtchn = evtchn_from_irq(data->irq); + struct irq_info *info = info_for_irq(data->irq); + evtchn_port_t evtchn = info ? info->evtchn : 0; struct physdev_eoi eoi = { .irq = pirq_from_irq(data->irq) }; int rc = 0; if (!VALID_EVTCHN(evtchn)) return; - clear_evtchn(evtchn); + event_handler_exit(info); if (pirq_needs_eoi(data->irq)) { rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi); @@ -1666,6 +1674,8 @@ void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl) } info = info_for_irq(irq); + if (xchg_acquire(&info->is_active, 1)) + return; dev = (info->type == IRQT_EVTCHN) ? info->u.interdomain : NULL; if (dev) @@ -1853,12 +1863,11 @@ static void disable_dynirq(struct irq_data *data) static void ack_dynirq(struct irq_data *data) { - evtchn_port_t evtchn = evtchn_from_irq(data->irq); - - if (!VALID_EVTCHN(evtchn)) - return; + struct irq_info *info = info_for_irq(data->irq); + evtchn_port_t evtchn = info ? info->evtchn : 0; - clear_evtchn(evtchn); + if (VALID_EVTCHN(evtchn)) + event_handler_exit(info); } static void mask_ack_dynirq(struct irq_data *data) @@ -1874,7 +1883,7 @@ static void lateeoi_ack_dynirq(struct irq_data *data) if (VALID_EVTCHN(evtchn)) { do_mask(info, EVT_MASK_REASON_EOI_PENDING); - clear_evtchn(evtchn); + event_handler_exit(info); } } @@ -1885,7 +1894,7 @@ static void lateeoi_mask_ack_dynirq(struct irq_data *data) if (VALID_EVTCHN(evtchn)) { do_mask(info, EVT_MASK_REASON_EXPLICIT); - clear_evtchn(evtchn); + event_handler_exit(info); } } @@ -1998,10 +2007,11 @@ static void restore_cpu_ipis(unsigned int cpu) /* Clear an irq's pending state, in preparation for polling on it */ void xen_clear_irq_pending(int irq) { - evtchn_port_t evtchn = evtchn_from_irq(irq); + struct irq_info *info = info_for_irq(irq); + evtchn_port_t evtchn = info ? info->evtchn : 0; if (VALID_EVTCHN(evtchn)) - clear_evtchn(evtchn); + event_handler_exit(info); } EXPORT_SYMBOL(xen_clear_irq_pending); void xen_set_irq_pending(int irq) -- GitLab From 0f9b05b9a01a4cf6b6bdf904faacf4796e2aa232 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 9 Mar 2021 18:00:44 +0100 Subject: [PATCH 553/839] Xen: drop exports of {set,clear}_foreign_p2m_mapping() They're only used internally, and the layering violation they contain (x86) or imply (Arm) of calling HYPERVISOR_grant_table_op() strongly advise against any (uncontrolled) use from a module. The functions also never had users except the ones from drivers/xen/grant-table.c forever since their introduction in 3.15. Signed-off-by: Jan Beulich Reviewed-by: Stefano Stabellini Link: https://lore.kernel.org/r/746a5cd6-1446-eda4-8b23-03c1cac30b8d@suse.com Signed-off-by: Boris Ostrovsky --- arch/arm/xen/p2m.c | 2 -- arch/x86/xen/p2m.c | 2 -- 2 files changed, 4 deletions(-) diff --git a/arch/arm/xen/p2m.c b/arch/arm/xen/p2m.c index acb464547a54f..5316cf36cb57e 100644 --- a/arch/arm/xen/p2m.c +++ b/arch/arm/xen/p2m.c @@ -130,7 +130,6 @@ int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, return 0; } -EXPORT_SYMBOL_GPL(set_foreign_p2m_mapping); int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, struct gnttab_unmap_grant_ref *kunmap_ops, @@ -145,7 +144,6 @@ int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, return 0; } -EXPORT_SYMBOL_GPL(clear_foreign_p2m_mapping); bool __set_phys_to_machine_multi(unsigned long pfn, unsigned long mfn, unsigned long nr_pages) diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index a3cc33091f46c..e912992e0b77f 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -776,7 +776,6 @@ int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, out: return ret; } -EXPORT_SYMBOL_GPL(set_foreign_p2m_mapping); int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, struct gnttab_unmap_grant_ref *kunmap_ops, @@ -802,7 +801,6 @@ int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, return ret; } -EXPORT_SYMBOL_GPL(clear_foreign_p2m_mapping); #ifdef CONFIG_XEN_DEBUG_FS #include -- GitLab From 36caa3fedf06d377bd08bc91b50c93fb9022f5ec Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 10 Mar 2021 11:45:00 +0100 Subject: [PATCH 554/839] Xen/gntdev: don't needlessly allocate k{,un}map_ops[] They're needed only in the not-auto-translate (i.e. PV) case; there's no point in allocating memory that's never going to get accessed. Signed-off-by: Jan Beulich Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/180d50cb-5531-8952-4bf0-d65c554638ed@suse.com Signed-off-by: Boris Ostrovsky --- drivers/xen/gntdev.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 5447c5156b2e6..b60ebd8bdb261 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -136,17 +136,20 @@ struct gntdev_grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count, add->grants = kvcalloc(count, sizeof(add->grants[0]), GFP_KERNEL); add->map_ops = kvcalloc(count, sizeof(add->map_ops[0]), GFP_KERNEL); add->unmap_ops = kvcalloc(count, sizeof(add->unmap_ops[0]), GFP_KERNEL); - add->kmap_ops = kvcalloc(count, sizeof(add->kmap_ops[0]), GFP_KERNEL); - add->kunmap_ops = kvcalloc(count, - sizeof(add->kunmap_ops[0]), GFP_KERNEL); add->pages = kvcalloc(count, sizeof(add->pages[0]), GFP_KERNEL); if (NULL == add->grants || NULL == add->map_ops || NULL == add->unmap_ops || - NULL == add->kmap_ops || - NULL == add->kunmap_ops || NULL == add->pages) goto err; + if (use_ptemod) { + add->kmap_ops = kvcalloc(count, sizeof(add->kmap_ops[0]), + GFP_KERNEL); + add->kunmap_ops = kvcalloc(count, sizeof(add->kunmap_ops[0]), + GFP_KERNEL); + if (NULL == add->kmap_ops || NULL == add->kunmap_ops) + goto err; + } #ifdef CONFIG_XEN_GRANT_DMA_ALLOC add->dma_flags = dma_flags; @@ -185,8 +188,10 @@ struct gntdev_grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count, for (i = 0; i < count; i++) { add->map_ops[i].handle = -1; add->unmap_ops[i].handle = -1; - add->kmap_ops[i].handle = -1; - add->kunmap_ops[i].handle = -1; + if (use_ptemod) { + add->kmap_ops[i].handle = -1; + add->kunmap_ops[i].handle = -1; + } } add->index = 0; @@ -332,8 +337,8 @@ int gntdev_map_grant_pages(struct gntdev_grant_map *map) } pr_debug("map %d+%d\n", map->index, map->count); - err = gnttab_map_refs(map->map_ops, use_ptemod ? map->kmap_ops : NULL, - map->pages, map->count); + err = gnttab_map_refs(map->map_ops, map->kmap_ops, map->pages, + map->count); for (i = 0; i < map->count; i++) { if (map->map_ops[i].status == GNTST_okay) -- GitLab From bce21a2b48ede7cbcab92db18bc956daf1d5c246 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 10 Mar 2021 11:45:26 +0100 Subject: [PATCH 555/839] Xen/gnttab: introduce common INVALID_GRANT_{HANDLE,REF} It's not helpful if every driver has to cook its own. Generalize xenbus'es INVALID_GRANT_HANDLE and pcifront's INVALID_GRANT_REF (which shouldn't have expanded to zero to begin with). Use the constants in p2m.c and gntdev.c right away, and update field types where necessary so they would match with the constants' types (albeit without touching struct ioctl_gntdev_grant_ref's ref field, as that's part of the public interface of the kernel and would require introducing a dependency on Xen's grant_table.h public header). Signed-off-by: Jan Beulich Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/db7c38a5-0d75-d5d1-19de-e5fe9f0b9c48@suse.com Signed-off-by: Boris Ostrovsky --- arch/arm/xen/p2m.c | 3 ++- arch/x86/xen/p2m.c | 4 ++-- drivers/pci/xen-pcifront.c | 4 ++-- drivers/xen/gntdev.c | 24 +++++++++++++----------- include/xen/grant_table.h | 7 +++++++ include/xen/xenbus.h | 1 - 6 files changed, 26 insertions(+), 17 deletions(-) diff --git a/arch/arm/xen/p2m.c b/arch/arm/xen/p2m.c index 5316cf36cb57e..84a1cea1f43b9 100644 --- a/arch/arm/xen/p2m.c +++ b/arch/arm/xen/p2m.c @@ -11,6 +11,7 @@ #include #include +#include #include #include @@ -109,7 +110,7 @@ int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, map_ops[i].status = GNTST_general_error; unmap.host_addr = map_ops[i].host_addr, unmap.handle = map_ops[i].handle; - map_ops[i].handle = ~0; + map_ops[i].handle = INVALID_GRANT_HANDLE; if (map_ops[i].flags & GNTMAP_device_map) unmap.dev_bus_addr = map_ops[i].dev_bus_addr; else diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index e912992e0b77f..17d80f751fcb2 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -741,7 +741,7 @@ int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, map_ops[i].status = GNTST_general_error; unmap[0].host_addr = map_ops[i].host_addr, unmap[0].handle = map_ops[i].handle; - map_ops[i].handle = ~0; + map_ops[i].handle = INVALID_GRANT_HANDLE; if (map_ops[i].flags & GNTMAP_device_map) unmap[0].dev_bus_addr = map_ops[i].dev_bus_addr; else @@ -751,7 +751,7 @@ int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, kmap_ops[i].status = GNTST_general_error; unmap[1].host_addr = kmap_ops[i].host_addr, unmap[1].handle = kmap_ops[i].handle; - kmap_ops[i].handle = ~0; + kmap_ops[i].handle = INVALID_GRANT_HANDLE; if (kmap_ops[i].flags & GNTMAP_device_map) unmap[1].dev_bus_addr = kmap_ops[i].dev_bus_addr; else diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c index c6fe0cfec0f68..2d75026482197 100644 --- a/drivers/pci/xen-pcifront.c +++ b/drivers/pci/xen-pcifront.c @@ -26,7 +26,7 @@ #include #include -#define INVALID_GRANT_REF (0) + #define INVALID_EVTCHN (-1) struct pci_bus_entry { @@ -42,7 +42,7 @@ struct pcifront_device { struct list_head root_buses; int evtchn; - int gnt_ref; + grant_ref_t gnt_ref; int irq; diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index b60ebd8bdb261..ebfd3e7b20d47 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -186,11 +186,11 @@ struct gntdev_grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count, goto err; for (i = 0; i < count; i++) { - add->map_ops[i].handle = -1; - add->unmap_ops[i].handle = -1; + add->map_ops[i].handle = INVALID_GRANT_HANDLE; + add->unmap_ops[i].handle = INVALID_GRANT_HANDLE; if (use_ptemod) { - add->kmap_ops[i].handle = -1; - add->kunmap_ops[i].handle = -1; + add->kmap_ops[i].handle = INVALID_GRANT_HANDLE; + add->kunmap_ops[i].handle = INVALID_GRANT_HANDLE; } } @@ -279,7 +279,7 @@ static int find_grant_ptes(pte_t *pte, unsigned long addr, void *data) map->grants[pgnr].ref, map->grants[pgnr].domid); gnttab_set_unmap_op(&map->unmap_ops[pgnr], pte_maddr, flags, - -1 /* handle */); + INVALID_GRANT_HANDLE); return 0; } @@ -297,7 +297,7 @@ int gntdev_map_grant_pages(struct gntdev_grant_map *map) if (!use_ptemod) { /* Note: it could already be mapped */ - if (map->map_ops[0].handle != -1) + if (map->map_ops[0].handle != INVALID_GRANT_HANDLE) return 0; for (i = 0; i < map->count; i++) { unsigned long addr = (unsigned long) @@ -306,7 +306,7 @@ int gntdev_map_grant_pages(struct gntdev_grant_map *map) map->grants[i].ref, map->grants[i].domid); gnttab_set_unmap_op(&map->unmap_ops[i], addr, - map->flags, -1 /* handle */); + map->flags, INVALID_GRANT_HANDLE); } } else { /* @@ -332,7 +332,7 @@ int gntdev_map_grant_pages(struct gntdev_grant_map *map) map->grants[i].ref, map->grants[i].domid); gnttab_set_unmap_op(&map->kunmap_ops[i], address, - flags, -1); + flags, INVALID_GRANT_HANDLE); } } @@ -390,7 +390,7 @@ static int __unmap_grant_pages(struct gntdev_grant_map *map, int offset, pr_debug("unmap handle=%d st=%d\n", map->unmap_ops[offset+i].handle, map->unmap_ops[offset+i].status); - map->unmap_ops[offset+i].handle = -1; + map->unmap_ops[offset+i].handle = INVALID_GRANT_HANDLE; } return err; } @@ -406,13 +406,15 @@ static int unmap_grant_pages(struct gntdev_grant_map *map, int offset, * already unmapped some of the grants. Only unmap valid ranges. */ while (pages && !err) { - while (pages && map->unmap_ops[offset].handle == -1) { + while (pages && + map->unmap_ops[offset].handle == INVALID_GRANT_HANDLE) { offset++; pages--; } range = 0; while (range < pages) { - if (map->unmap_ops[offset+range].handle == -1) + if (map->unmap_ops[offset + range].handle == + INVALID_GRANT_HANDLE) break; range++; } diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h index 0b1182a3cf412..cb854df031ce0 100644 --- a/include/xen/grant_table.h +++ b/include/xen/grant_table.h @@ -50,6 +50,13 @@ #include #include +/* + * Technically there's no reliably invalid grant reference or grant handle, + * so pick the value that is the most unlikely one to be observed valid. + */ +#define INVALID_GRANT_REF ((grant_ref_t)-1) +#define INVALID_GRANT_HANDLE ((grant_handle_t)-1) + #define GNTTAB_RESERVED_XENSTORE 1 /* NR_GRANT_FRAMES must be less than or equal to that configured in Xen */ diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index 0b1386073d49b..b94074c827721 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h @@ -51,7 +51,6 @@ #define XENBUS_MAX_RING_GRANT_ORDER 4 #define XENBUS_MAX_RING_GRANTS (1U << XENBUS_MAX_RING_GRANT_ORDER) -#define INVALID_GRANT_HANDLE (~0U) /* Register callback to watch this node. */ struct xenbus_watch -- GitLab From f1d20d8643e54dcde242fd2c8748063ed75702a8 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 10 Mar 2021 11:46:13 +0100 Subject: [PATCH 556/839] Xen/gntdev: don't needlessly use kvcalloc() Requesting zeroed memory when all of it will be overwritten subsequently by all ones is a waste of processing bandwidth. In fact, rather than recording zeroed ->grants[], fill that array too with more appropriate "invalid" indicators. Signed-off-by: Jan Beulich Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/9a726be2-4893-8ffe-0ef1-b70dd1c229b1@suse.com Signed-off-by: Boris Ostrovsky --- drivers/xen/gntdev.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index ebfd3e7b20d47..f01d58c7a042e 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -133,9 +133,12 @@ struct gntdev_grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count, if (NULL == add) return NULL; - add->grants = kvcalloc(count, sizeof(add->grants[0]), GFP_KERNEL); - add->map_ops = kvcalloc(count, sizeof(add->map_ops[0]), GFP_KERNEL); - add->unmap_ops = kvcalloc(count, sizeof(add->unmap_ops[0]), GFP_KERNEL); + add->grants = kvmalloc_array(count, sizeof(add->grants[0]), + GFP_KERNEL); + add->map_ops = kvmalloc_array(count, sizeof(add->map_ops[0]), + GFP_KERNEL); + add->unmap_ops = kvmalloc_array(count, sizeof(add->unmap_ops[0]), + GFP_KERNEL); add->pages = kvcalloc(count, sizeof(add->pages[0]), GFP_KERNEL); if (NULL == add->grants || NULL == add->map_ops || @@ -143,10 +146,10 @@ struct gntdev_grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count, NULL == add->pages) goto err; if (use_ptemod) { - add->kmap_ops = kvcalloc(count, sizeof(add->kmap_ops[0]), - GFP_KERNEL); - add->kunmap_ops = kvcalloc(count, sizeof(add->kunmap_ops[0]), - GFP_KERNEL); + add->kmap_ops = kvmalloc_array(count, sizeof(add->kmap_ops[0]), + GFP_KERNEL); + add->kunmap_ops = kvmalloc_array(count, sizeof(add->kunmap_ops[0]), + GFP_KERNEL); if (NULL == add->kmap_ops || NULL == add->kunmap_ops) goto err; } @@ -186,6 +189,8 @@ struct gntdev_grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count, goto err; for (i = 0; i < count; i++) { + add->grants[i].domid = DOMID_INVALID; + add->grants[i].ref = INVALID_GRANT_REF; add->map_ops[i].handle = INVALID_GRANT_HANDLE; add->unmap_ops[i].handle = INVALID_GRANT_HANDLE; if (use_ptemod) { -- GitLab From ce6ed1c4c9876c2880f52f18c41ef2a30d070bc5 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 4 Mar 2021 20:37:08 +0900 Subject: [PATCH 557/839] kbuild: rebuild GCC plugins when the compiler is upgraded Linus reported a build error due to the GCC plugin incompatibility when the compiler is upgraded. [1] GCC plugins are tied to a particular GCC version. So, they must be rebuilt when the compiler is upgraded. This seems to be a long-standing flaw since the initial support of GCC plugins. Extend commit 8b59cd81dc5e ("kbuild: ensure full rebuild when the compiler is updated"), so that GCC plugins are covered by the compiler upgrade detection. [1]: https://lore.kernel.org/lkml/CAHk-=wieoN5ttOy7SnsGwZv+Fni3R6m-Ut=oxih6bbZ28G+4dw@mail.gmail.com/ Reported-by: Linus Torvalds Signed-off-by: Masahiro Yamada Reviewed-by: Kees Cook --- Makefile | 1 + include/linux/compiler-version.h | 14 ++++++++++++++ include/linux/kconfig.h | 2 -- init/Kconfig | 8 ++++---- scripts/gcc-plugins/Makefile | 1 + 5 files changed, 20 insertions(+), 6 deletions(-) create mode 100644 include/linux/compiler-version.h diff --git a/Makefile b/Makefile index 3f9f44eac27f5..10bb0a62cc7dd 100644 --- a/Makefile +++ b/Makefile @@ -479,6 +479,7 @@ USERINCLUDE := \ -I$(objtree)/arch/$(SRCARCH)/include/generated/uapi \ -I$(srctree)/include/uapi \ -I$(objtree)/include/generated/uapi \ + -include $(srctree)/include/linux/compiler-version.h \ -include $(srctree)/include/linux/kconfig.h # Use LINUXINCLUDE when you must reference the include/ directory. diff --git a/include/linux/compiler-version.h b/include/linux/compiler-version.h new file mode 100644 index 0000000000000..2b2972c77c622 --- /dev/null +++ b/include/linux/compiler-version.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifdef __LINUX_COMPILER_VERSION_H +#error "Please do not include . This is done by the build system." +#endif +#define __LINUX_COMPILER_VERSION_H + +/* + * This header exists to force full rebuild when the compiler is upgraded. + * + * When fixdep scans this, it will find this string "CONFIG_CC_VERSION_TEXT" + * and add dependency on include/config/cc/version/text.h, which is touched + * by Kconfig when the version string from the compiler changes. + */ diff --git a/include/linux/kconfig.h b/include/linux/kconfig.h index e78e17a76dc95..24a59cb069635 100644 --- a/include/linux/kconfig.h +++ b/include/linux/kconfig.h @@ -2,8 +2,6 @@ #ifndef __LINUX_KCONFIG_H #define __LINUX_KCONFIG_H -/* CONFIG_CC_VERSION_TEXT (Do not delete this comment. See help in Kconfig) */ - #include #ifdef CONFIG_CPU_BIG_ENDIAN diff --git a/init/Kconfig b/init/Kconfig index 22946fe5ded98..30c849094c280 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -20,10 +20,10 @@ config CC_VERSION_TEXT When the compiler is updated, Kconfig will be invoked. - Ensure full rebuild when the compiler is updated - include/linux/kconfig.h contains this option in the comment line so - fixdep adds include/config/cc/version/text.h into the auto-generated - dependency. When the compiler is updated, syncconfig will touch it - and then every file will be rebuilt. + include/linux/compiler-version.h contains this option in the comment + line so fixdep adds include/config/cc/version/text.h into the + auto-generated dependency. When the compiler is updated, syncconfig + will touch it and then every file will be rebuilt. config CC_IS_GCC def_bool $(success,test "$(cc-name)" = GCC) diff --git a/scripts/gcc-plugins/Makefile b/scripts/gcc-plugins/Makefile index b5487cce69e8e..1952d3bb80c69 100644 --- a/scripts/gcc-plugins/Makefile +++ b/scripts/gcc-plugins/Makefile @@ -22,6 +22,7 @@ always-y += $(GCC_PLUGIN) GCC_PLUGINS_DIR = $(shell $(CC) -print-file-name=plugin) plugin_cxxflags = -Wp,-MMD,$(depfile) $(KBUILD_HOSTCXXFLAGS) -fPIC \ + -include $(srctree)/include/linux/compiler-version.h \ -I $(GCC_PLUGINS_DIR)/include -I $(obj) -std=gnu++11 \ -fno-rtti -fno-exceptions -fasynchronous-unwind-tables \ -ggdb -Wno-narrowing -Wno-unused-variable \ -- GitLab From 2eab791f940b98d0bdd4d1e8c4857f3dec3c7d04 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 8 Mar 2021 07:28:20 +0100 Subject: [PATCH 558/839] kbuild: dummy-tools: support MPROFILE_KERNEL checks for ppc ppc64le checks for -mprofile-kernel to define MPROFILE_KERNEL Kconfig. Kconfig calls arch/powerpc/tools/gcc-check-mprofile-kernel.sh for that purpose. This script performs two checks: 1) build with -mprofile-kernel should contain "_mcount" 2) build with -mprofile-kernel with a function marked as "notrace" should not produce "_mcount" So support this in dummy-tools' gcc, so that we have MPROFILE_KERNEL always true. Signed-off-by: Jiri Slaby Signed-off-by: Masahiro Yamada --- scripts/dummy-tools/gcc | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/scripts/dummy-tools/gcc b/scripts/dummy-tools/gcc index 0d0589cf8184e..7b10332b23ba2 100755 --- a/scripts/dummy-tools/gcc +++ b/scripts/dummy-tools/gcc @@ -73,6 +73,15 @@ if arg_contain -S "$@"; then echo "%gs" exit 0 fi + + # For arch/powerpc/tools/gcc-check-mprofile-kernel.sh + if arg_contain -m64 "$@" && arg_contain -mlittle-endian "$@" && + arg_contain -mprofile-kernel "$@"; then + if ! test -t 0 && ! grep -q notrace; then + echo "_mcount" + fi + exit 0 + fi fi # To set GCC_PLUGINS -- GitLab From bf3c255150619b71badb328c4dab48401a7ed62d Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Mon, 8 Mar 2021 10:46:56 -0800 Subject: [PATCH 559/839] kbuild: Allow LTO to be selected with KASAN_HW_TAGS While LTO with KASAN is normally not useful, hardware tag-based KASAN can be used also in production kernels with ARM64_MTE. Therefore, allow KASAN_HW_TAGS to be selected together with HAS_LTO_CLANG. Reported-by: Alistair Delva Signed-off-by: Sami Tolvanen Reviewed-by: Kees Cook Signed-off-by: Masahiro Yamada --- arch/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/Kconfig b/arch/Kconfig index 2bb30673d8e6f..2e7139b39e8f6 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -638,7 +638,7 @@ config HAS_LTO_CLANG depends on $(success,$(AR) --help | head -n 1 | grep -qi llvm) depends on ARCH_SUPPORTS_LTO_CLANG depends on !FTRACE_MCOUNT_USE_RECORDMCOUNT - depends on !KASAN + depends on !KASAN || KASAN_HW_TAGS depends on !GCOV_KERNEL help The compiler and Kconfig options support building with Clang's -- GitLab From f9bc754be475582e2cc44296f7de0aaedbdbefeb Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 10 Mar 2021 01:25:45 +0900 Subject: [PATCH 560/839] kbuild: dummy-tools: adjust to scripts/cc-version.sh Commit aec6c60a01d3 ("kbuild: check the minimum compiler version in Kconfig") changed how the script detects the compiler version. Get 'make CROSS_COMPILE=scripts/dummy-tools/' back working again. Fixes: aec6c60a01d3 ("kbuild: check the minimum compiler version in Kconfig") Signed-off-by: Masahiro Yamada Reviewed-by: Nathan Chancellor Tested-by: Nathan Chancellor Acked-by: Miguel Ojeda --- scripts/dummy-tools/gcc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/dummy-tools/gcc b/scripts/dummy-tools/gcc index 7b10332b23ba2..39e65fee59bd3 100755 --- a/scripts/dummy-tools/gcc +++ b/scripts/dummy-tools/gcc @@ -57,9 +57,9 @@ if arg_contain --version "$@"; then fi if arg_contain -E "$@"; then - # For scripts/gcc-version.sh; This emulates GCC 20.0.0 + # For scripts/cc-version.sh; This emulates GCC 20.0.0 if arg_contain - "$@"; then - sed 's/^__GNUC__$/20/; s/^__GNUC_MINOR__$/0/; s/^__GNUC_PATCHLEVEL__$/0/' + sed -n '/^GCC/{s/__GNUC__/20/; s/__GNUC_MINOR__/0/; s/__GNUC_PATCHLEVEL__/0/; p;}' exit 0 else echo "no input files" >&2 -- GitLab From 64bfc99429a5c9613fffb0e54f2f8c2ddc8c1d04 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 10 Mar 2021 20:08:24 +0900 Subject: [PATCH 561/839] kbuild: remove unneeded -O option to dtc This piece of code converts the target suffix to the dtc -O option: *.dtb -> -O dtb *.dt.yaml -> -O yaml Commit ce88c9c79455 ("kbuild: Add support to build overlays (%.dtbo)") added the third case: *.dtbo -> -O dtbo This works thanks to commit 163f0469bf2e ("dtc: Allow overlays to have .dtbo extension") in the upstream DTC, which has already been pulled in the kernel. However, I think it is a bit odd because "dtbo" is not a format name. At least, it does not show up in the help message of dtc. $ scripts/dtc/dtc --help [ snip ] -O, --out-format Output formats are: dts - device tree source text dtb - device tree blob yaml - device tree encoded as YAML asm - assembler source So, I am not a big fan of the second hunk of that change: } else if (streq(outform, "dtbo")) { dt_to_blob(outf, dti, outversion); Anyway, we did not need to do this in Makefile in the first place. guess_type_by_name() had already understood ".yaml" before commit 4f0e3a57d6eb ("kbuild: Add support for DT binding schema checks"), and now does ".dtbo" as well. Makefile does not need to duplicate the same logic. Let's leave it to dtc. Signed-off-by: Masahiro Yamada Reviewed-by: Viresh Kumar Acked-by: Rob Herring --- scripts/Makefile.lib | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index eee59184de640..90a4e04cd8f58 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -327,7 +327,7 @@ $(obj)/%.dtb.S: $(obj)/%.dtb FORCE quiet_cmd_dtc = DTC $@ cmd_dtc = $(HOSTCC) -E $(dtc_cpp_flags) -x assembler-with-cpp -o $(dtc-tmp) $< ; \ - $(DTC) -O $(patsubst .%,%,$(suffix $@)) -o $@ -b 0 \ + $(DTC) -o $@ -b 0 \ $(addprefix -i,$(dir $<) $(DTC_INCLUDE)) $(DTC_FLAGS) \ -d $(depfile).dtc.tmp $(dtc-tmp) ; \ cat $(depfile).pre.tmp $(depfile).dtc.tmp > $(depfile) -- GitLab From 4c273d23c44ad49c73353737b303e78585a4503f Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 10 Mar 2021 22:54:22 +0900 Subject: [PATCH 562/839] kbuild: remove LLVM=1 test from HAS_LTO_CLANG As Documentation/kbuild/llvm.rst notes, LLVM=1 switches the default of tools, but you can still override CC, LD, etc. individually. This LLVM=1 check is unneeded because each tool is already checked separately. "make CC=clang LD=ld.lld NM=llvm-nm AR=llvm-ar LLVM_IAS=1 menuconfig" should be able to enable Clang LTO. Signed-off-by: Masahiro Yamada Reviewed-by: Nathan Chancellor --- arch/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/Kconfig b/arch/Kconfig index 2e7139b39e8f6..ecfd3520b676b 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -632,7 +632,6 @@ config HAS_LTO_CLANG def_bool y # Clang >= 11: https://github.com/ClangBuiltLinux/linux/issues/510 depends on CC_IS_CLANG && CLANG_VERSION >= 110000 && LD_IS_LLD - depends on $(success,test $(LLVM) -eq 1) depends on $(success,test $(LLVM_IAS) -eq 1) depends on $(success,$(NM) --help | head -n 1 | grep -qi llvm) depends on $(success,$(AR) --help | head -n 1 | grep -qi llvm) -- GitLab From 285a65f1a10f87088cefd6c7ea6ff26b143339b3 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 11 Mar 2021 15:30:54 +0900 Subject: [PATCH 563/839] kbuild: remove meaningless parameter to $(call if_changed_rule,dtc) This is a remnant of commit 78046fabe6e7 ("kbuild: determine the output format of DTC by the target suffix"). The parameter "yaml" is meaningless because cmd_dtc no loner takes $(2). Reported-by: Rob Herring Signed-off-by: Masahiro Yamada --- scripts/Makefile.lib | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 90a4e04cd8f58..8cd67b1b6d15a 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -352,7 +352,7 @@ define rule_dtc endef $(obj)/%.dt.yaml: $(src)/%.dts $(DTC) $(DT_TMP_SCHEMA) FORCE - $(call if_changed_rule,dtc,yaml) + $(call if_changed_rule,dtc) dtc-tmp = $(subst $(comma),_,$(dot-target).dts.tmp) -- GitLab From 874a52f9b693ed8bf7a92b3592a547ce8a684e6f Mon Sep 17 00:00:00 2001 From: Tong Zhang Date: Sat, 27 Feb 2021 23:46:25 -0500 Subject: [PATCH 564/839] drm/fb-helper: only unmap if buffer not null drm_fbdev_cleanup() can be called when fb_helper->buffer is null, hence fb_helper->buffer should be checked before calling drm_client_buffer_vunmap(). This buffer is also checked in drm_client_framebuffer_delete(), so we should also do the same thing for drm_client_buffer_vunmap(). [ 199.128742] RIP: 0010:drm_client_buffer_vunmap+0xd/0x20 [ 199.129031] Code: 43 18 48 8b 53 20 49 89 45 00 49 89 55 08 5b 44 89 e0 41 5c 41 5d 41 5e 5d c3 0f 1f 00 53 48 89 fb 48 8d 7f 10 e8 73 7d a1 ff <48> 8b 7b 10 48 8d 73 18 5b e9 75 53 fc ff 0 f 1f 44 00 00 48 b8 00 [ 199.130041] RSP: 0018:ffff888103f3fc88 EFLAGS: 00010282 [ 199.130329] RAX: 0000000000000001 RBX: 0000000000000000 RCX: ffffffff8214d46d [ 199.130733] RDX: 1ffffffff079c6b9 RSI: 0000000000000246 RDI: ffffffff83ce35c8 [ 199.131119] RBP: ffff888103d25458 R08: 0000000000000001 R09: fffffbfff0791761 [ 199.131505] R10: ffffffff83c8bb07 R11: fffffbfff0791760 R12: 0000000000000000 [ 199.131891] R13: ffff888103d25468 R14: ffff888103d25418 R15: ffff888103f18120 [ 199.132277] FS: 00007f36fdcbb6a0(0000) GS:ffff88815b400000(0000) knlGS:0000000000000000 [ 199.132721] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 199.133033] CR2: 0000000000000010 CR3: 0000000103d26000 CR4: 00000000000006f0 [ 199.133420] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 199.133807] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 199.134195] Call Trace: [ 199.134333] drm_fbdev_cleanup+0x179/0x1a0 [ 199.134562] drm_fbdev_client_unregister+0x2b/0x40 [ 199.134828] drm_client_dev_unregister+0xa8/0x180 [ 199.135088] drm_dev_unregister+0x61/0x110 [ 199.135315] mgag200_pci_remove+0x38/0x52 [mgag200] [ 199.135586] pci_device_remove+0x62/0xe0 [ 199.135806] device_release_driver_internal+0x148/0x270 [ 199.136094] driver_detach+0x76/0xe0 [ 199.136294] bus_remove_driver+0x7e/0x100 [ 199.136521] pci_unregister_driver+0x28/0xf0 [ 199.136759] __x64_sys_delete_module+0x268/0x300 [ 199.137016] ? __ia32_sys_delete_module+0x300/0x300 [ 199.137285] ? call_rcu+0x3e4/0x580 [ 199.137481] ? fpregs_assert_state_consistent+0x4d/0x60 [ 199.137767] ? exit_to_user_mode_prepare+0x2f/0x130 [ 199.138037] do_syscall_64+0x33/0x40 [ 199.138237] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 199.138517] RIP: 0033:0x7f36fdc3dcf7 Signed-off-by: Tong Zhang Fixes: 763aea17bf57 ("drm/fb-helper: Unmap client buffer during shutdown") Cc: Thomas Zimmermann Cc: Sam Ravnborg Cc: Maxime Ripard Cc: Maarten Lankhorst Cc: David Airlie Cc: Daniel Vetter Cc: dri-devel@lists.freedesktop.org Cc: # v5.11+ Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20210228044625.171151-1-ztong0001@gmail.com Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/drm_fb_helper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index b9a616737c0ee..f6baa2046124d 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -2048,7 +2048,7 @@ static void drm_fbdev_cleanup(struct drm_fb_helper *fb_helper) if (shadow) vfree(shadow); - else + else if (fb_helper->buffer) drm_client_buffer_vunmap(fb_helper->buffer); drm_client_framebuffer_delete(fb_helper->buffer); -- GitLab From e8dd3506dcf380d4cbe983422dfed1909011b02a Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Thu, 4 Feb 2021 15:57:05 +0100 Subject: [PATCH 565/839] drm/qxl: unpin release objects Balances the qxl_create_bo(..., pinned=true, ...); call in qxl_release_bo_alloc(). Signed-off-by: Gerd Hoffmann Acked-by: Thomas Zimmermann Link: http://patchwork.freedesktop.org/patch/msgid/20210204145712.1531203-5-kraxel@redhat.com (cherry picked from commit 65ffea3c6e738f37bb15ff3ee480415c793df893) Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/qxl/qxl_release.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c index 0fcfc952d5e9e..93b5f7ab055de 100644 --- a/drivers/gpu/drm/qxl/qxl_release.c +++ b/drivers/gpu/drm/qxl/qxl_release.c @@ -347,6 +347,7 @@ int qxl_alloc_release_reserved(struct qxl_device *qdev, unsigned long size, mutex_lock(&qdev->release_mutex); if (qdev->current_release_bo_offset[cur_idx] + 1 >= releases_per_bo[cur_idx]) { + qxl_bo_unpin(qdev->current_release_bo[cur_idx]); qxl_bo_unref(&qdev->current_release_bo[cur_idx]); qdev->current_release_bo_offset[cur_idx] = 0; qdev->current_release_bo[cur_idx] = NULL; -- GitLab From e998d3c8cba93ab4075d6bcc56b1d4451d85efe5 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Wed, 17 Feb 2021 13:32:06 +0100 Subject: [PATCH 566/839] drm/qxl: fix lockdep issue in qxl_alloc_release_reserved Call qxl_bo_unpin (which does a reservation) without holding the release_mutex lock. Fixes lockdep (correctly) warning on a possible deadlock. Fixes: e8dd3506dcf3 ("drm/qxl: unpin release objects") Signed-off-by: Gerd Hoffmann Acked-by: Thomas Zimmermann Link: http://patchwork.freedesktop.org/patch/msgid/20210217123213.2199186-5-kraxel@redhat.com (cherry picked from commit 19089b760e56c97458c272e90e43da761b05cf12) Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/qxl/qxl_release.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c index 93b5f7ab055de..b372455e27298 100644 --- a/drivers/gpu/drm/qxl/qxl_release.c +++ b/drivers/gpu/drm/qxl/qxl_release.c @@ -321,7 +321,7 @@ int qxl_alloc_release_reserved(struct qxl_device *qdev, unsigned long size, int type, struct qxl_release **release, struct qxl_bo **rbo) { - struct qxl_bo *bo; + struct qxl_bo *bo, *free_bo = NULL; int idr_ret; int ret = 0; union qxl_release_info *info; @@ -347,8 +347,7 @@ int qxl_alloc_release_reserved(struct qxl_device *qdev, unsigned long size, mutex_lock(&qdev->release_mutex); if (qdev->current_release_bo_offset[cur_idx] + 1 >= releases_per_bo[cur_idx]) { - qxl_bo_unpin(qdev->current_release_bo[cur_idx]); - qxl_bo_unref(&qdev->current_release_bo[cur_idx]); + free_bo = qdev->current_release_bo[cur_idx]; qdev->current_release_bo_offset[cur_idx] = 0; qdev->current_release_bo[cur_idx] = NULL; } @@ -356,6 +355,10 @@ int qxl_alloc_release_reserved(struct qxl_device *qdev, unsigned long size, ret = qxl_release_bo_alloc(qdev, &qdev->current_release_bo[cur_idx]); if (ret) { mutex_unlock(&qdev->release_mutex); + if (free_bo) { + qxl_bo_unpin(free_bo); + qxl_bo_unref(&free_bo); + } qxl_release_free(qdev, *release); return ret; } @@ -371,6 +374,10 @@ int qxl_alloc_release_reserved(struct qxl_device *qdev, unsigned long size, *rbo = bo; mutex_unlock(&qdev->release_mutex); + if (free_bo) { + qxl_bo_unpin(free_bo); + qxl_bo_unref(&free_bo); + } ret = qxl_release_list_add(*release, bo); qxl_bo_unref(&bo); -- GitLab From 39a3898abf4dfb8702929832836b1f785b1c2bc4 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 24 Feb 2021 13:55:28 -0800 Subject: [PATCH 567/839] fbdev: atyfb: always declare aty_{ld,st}_lcd() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previously added stubs for aty_{ld,}st_lcd() make it so that these functions are used regardless of the config options that were guarding them, so remove the #ifdef/#endif lines and make their declarations always visible. This fixes build warnings that were reported by clang: drivers/video/fbdev/aty/atyfb_base.c:180:6: warning: no previous prototype for function 'aty_st_lcd' [-Wmissing-prototypes] void aty_st_lcd(int index, u32 val, const struct atyfb_par *par) ^ drivers/video/fbdev/aty/atyfb_base.c:180:1: note: declare 'static' if the function is not intended to be used outside of this translation unit void aty_st_lcd(int index, u32 val, const struct atyfb_par *par) drivers/video/fbdev/aty/atyfb_base.c:183:5: warning: no previous prototype for function 'aty_ld_lcd' [-Wmissing-prototypes] u32 aty_ld_lcd(int index, const struct atyfb_par *par) ^ drivers/video/fbdev/aty/atyfb_base.c:183:1: note: declare 'static' if the function is not intended to be used outside of this translation unit u32 aty_ld_lcd(int index, const struct atyfb_par *par) They should not be marked as static since they are used in mach64_ct.c. Fixes: bfa5782b9caa ("fbdev: atyfb: add stubs for aty_{ld,st}_lcd()") Signed-off-by: Randy Dunlap Reported-by: kernel test robot Cc: Daniel Vetter Cc: Nick Desaulniers Cc: linux-fbdev@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Cc: Bartlomiej Zolnierkiewicz Cc: Sam Ravnborg Cc: David Airlie Cc: Jani Nikula Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210224215528.822-1-rdunlap@infradead.org Acked-by: Nick Desaulniers Signed-off-by: Maarten Lankhorst --- drivers/video/fbdev/aty/atyfb.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/video/fbdev/aty/atyfb.h b/drivers/video/fbdev/aty/atyfb.h index 551372f9b9aa2..465f55beb97f9 100644 --- a/drivers/video/fbdev/aty/atyfb.h +++ b/drivers/video/fbdev/aty/atyfb.h @@ -287,11 +287,8 @@ static inline void aty_st_8(int regindex, u8 val, const struct atyfb_par *par) #endif } -#if defined(CONFIG_PMAC_BACKLIGHT) || defined (CONFIG_FB_ATY_GENERIC_LCD) || \ -defined (CONFIG_FB_ATY_BACKLIGHT) extern void aty_st_lcd(int index, u32 val, const struct atyfb_par *par); extern u32 aty_ld_lcd(int index, const struct atyfb_par *par); -#endif /* * DAC operations -- GitLab From b266409310c6b3c523d824616bc3328026b4ee63 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 26 Feb 2021 09:30:08 -0800 Subject: [PATCH 568/839] fbdev: atyfb: use LCD management functions for PPC_PMAC also MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Include PPC_PMAC in the configs that use aty_ld_lcd() and aty_st_lcd() implementations so that the PM code may work correctly for PPC_PMAC. Suggested-by: Ville Syrjälä Signed-off-by: Randy Dunlap Cc: Daniel Vetter Cc: Nick Desaulniers Cc: linux-fbdev@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Cc: Bartlomiej Zolnierkiewicz Cc: Sam Ravnborg Cc: David Airlie Cc: Jani Nikula Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210226173008.18236-1-rdunlap@infradead.org Signed-off-by: Maarten Lankhorst --- drivers/video/fbdev/aty/atyfb_base.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/video/fbdev/aty/atyfb_base.c b/drivers/video/fbdev/aty/atyfb_base.c index e946903a86c2d..1aef3d6ebd880 100644 --- a/drivers/video/fbdev/aty/atyfb_base.c +++ b/drivers/video/fbdev/aty/atyfb_base.c @@ -133,7 +133,7 @@ #define PRINTKE(fmt, args...) printk(KERN_ERR "atyfb: " fmt, ## args) #if defined(CONFIG_PMAC_BACKLIGHT) || defined(CONFIG_FB_ATY_GENERIC_LCD) || \ -defined(CONFIG_FB_ATY_BACKLIGHT) +defined(CONFIG_FB_ATY_BACKLIGHT) || defined (CONFIG_PPC_PMAC) static const u32 lt_lcd_regs[] = { CNFG_PANEL_LG, LCD_GEN_CNTL_LG, @@ -175,8 +175,8 @@ u32 aty_ld_lcd(int index, const struct atyfb_par *par) return aty_ld_le32(LCD_DATA, par); } } -#else /* defined(CONFIG_PMAC_BACKLIGHT) || defined(CONFIG_FB_ATY_BACKLIGHT) \ - defined(CONFIG_FB_ATY_GENERIC_LCD) */ +#else /* defined(CONFIG_PMAC_BACKLIGHT) || defined(CONFIG_FB_ATY_BACKLIGHT) || + defined(CONFIG_FB_ATY_GENERIC_LCD) || defined(CONFIG_PPC_PMAC) */ void aty_st_lcd(int index, u32 val, const struct atyfb_par *par) { } @@ -184,7 +184,8 @@ u32 aty_ld_lcd(int index, const struct atyfb_par *par) { return 0; } -#endif /* defined(CONFIG_PMAC_BACKLIGHT) || defined (CONFIG_FB_ATY_GENERIC_LCD) */ +#endif /* defined(CONFIG_PMAC_BACKLIGHT) || defined(CONFIG_FB_ATY_BACKLIGHT) || + defined (CONFIG_FB_ATY_GENERIC_LCD) || defined(CONFIG_PPC_PMAC) */ #ifdef CONFIG_FB_ATY_GENERIC_LCD /* -- GitLab From 301469c121bfe4a243460ed0b6abf391139c6bb1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pavel=20Turinsk=C3=BD?= Date: Sun, 28 Feb 2021 17:36:58 +0100 Subject: [PATCH 569/839] MAINTAINERS: update drm bug reporting URL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The original bugzilla seems to be read-only now, linking to the gitlab for new bugs. Signed-off-by: Pavel Turinský Cc: trivial@kernel.org Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210228163658.54962-1-ledoian@kam.mff.cuni.cz Signed-off-by: Maarten Lankhorst --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index d92f85ca831d3..f5f8e8966668e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5835,7 +5835,7 @@ M: David Airlie M: Daniel Vetter L: dri-devel@lists.freedesktop.org S: Maintained -B: https://bugs.freedesktop.org/ +B: https://gitlab.freedesktop.org/drm C: irc://chat.freenode.net/dri-devel T: git git://anongit.freedesktop.org/drm/drm F: Documentation/devicetree/bindings/display/ -- GitLab From 659ab7a49cbebe0deffcbe1f9560e82006b21817 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Wed, 3 Mar 2021 14:32:29 +0100 Subject: [PATCH 570/839] drm: Use USB controller's DMA mask when importing dmabufs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit USB devices cannot perform DMA and hence have no dma_mask set in their device structure. Therefore importing dmabuf into a USB-based driver fails, which breaks joining and mirroring of display in X11. For USB devices, pick the associated USB controller as attachment device. This allows the DRM import helpers to perform the DMA setup. If the DMA controller does not support DMA transfers, we're out of luck and cannot import. Our current USB-based DRM drivers don't use DMA, so the actual DMA device is not important. Tested by joining/mirroring displays of udl and radeon under Gnome/X11. v8: * release dmadev if device initialization fails (Noralf) * fix commit description (Noralf) v7: * fix use-before-init bug in gm12u320 (Dan) v6: * implement workaround in DRM drivers and hold reference to DMA device while USB device is in use * remove dev_is_usb() (Greg) * collapse USB helper into usb_intf_get_dma_device() (Alan) * integrate Daniel's TODO statement (Daniel) * fix typos (Greg) v5: * provide a helper for USB interfaces (Alan) * add FIXME item to documentation and TODO list (Daniel) v4: * implement workaround with USB helper functions (Greg) * use struct usb_device->bus->sysdev as DMA device (Takashi) v3: * drop gem_create_object * use DMA mask of USB controller, if any (Daniel, Christian, Noralf) v2: * move fix to importer side (Christian, Daniel) * update SHMEM and CMA helpers for new PRIME callbacks Signed-off-by: Thomas Zimmermann Fixes: 6eb0233ec2d0 ("usb: don't inherity DMA properties for USB devices") Tested-by: Pavel Machek Reviewed-by: Greg Kroah-Hartman Acked-by: Christian König Acked-by: Daniel Vetter Acked-by: Noralf Trønnes Cc: Christoph Hellwig Cc: Greg Kroah-Hartman Cc: # v5.10+ Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20210303133229.3288-1-tzimmermann@suse.de Signed-off-by: Maarten Lankhorst --- Documentation/gpu/todo.rst | 21 ++++++++++++++++ drivers/gpu/drm/tiny/gm12u320.c | 44 +++++++++++++++++++++++++++------ drivers/gpu/drm/udl/udl_drv.c | 17 +++++++++++++ drivers/gpu/drm/udl/udl_drv.h | 1 + drivers/gpu/drm/udl/udl_main.c | 10 ++++++++ drivers/usb/core/usb.c | 32 ++++++++++++++++++++++++ include/linux/usb.h | 2 ++ 7 files changed, 119 insertions(+), 8 deletions(-) diff --git a/Documentation/gpu/todo.rst b/Documentation/gpu/todo.rst index 40ccac61137e2..22ce801e3a8d9 100644 --- a/Documentation/gpu/todo.rst +++ b/Documentation/gpu/todo.rst @@ -613,6 +613,27 @@ Some of these date from the very introduction of KMS in 2008 ... Level: Intermediate +Remove automatic page mapping from dma-buf importing +---------------------------------------------------- + +When importing dma-bufs, the dma-buf and PRIME frameworks automatically map +imported pages into the importer's DMA area. drm_gem_prime_fd_to_handle() and +drm_gem_prime_handle_to_fd() require that importers call dma_buf_attach() +even if they never do actual device DMA, but only CPU access through +dma_buf_vmap(). This is a problem for USB devices, which do not support DMA +operations. + +To fix the issue, automatic page mappings should be removed from the +buffer-sharing code. Fixing this is a bit more involved, since the import/export +cache is also tied to &drm_gem_object.import_attach. Meanwhile we paper over +this problem for USB devices by fishing out the USB host controller device, as +long as that supports DMA. Otherwise importing can still needlessly fail. + +Contact: Thomas Zimmermann , Daniel Vetter + +Level: Advanced + + Better Testing ============== diff --git a/drivers/gpu/drm/tiny/gm12u320.c b/drivers/gpu/drm/tiny/gm12u320.c index 33f65f4626e5a..23866a54e3f91 100644 --- a/drivers/gpu/drm/tiny/gm12u320.c +++ b/drivers/gpu/drm/tiny/gm12u320.c @@ -83,6 +83,7 @@ MODULE_PARM_DESC(eco_mode, "Turn on Eco mode (less bright, more silent)"); struct gm12u320_device { struct drm_device dev; + struct device *dmadev; struct drm_simple_display_pipe pipe; struct drm_connector conn; unsigned char *cmd_buf; @@ -601,6 +602,22 @@ static const uint64_t gm12u320_pipe_modifiers[] = { DRM_FORMAT_MOD_INVALID }; +/* + * FIXME: Dma-buf sharing requires DMA support by the importing device. + * This function is a workaround to make USB devices work as well. + * See todo.rst for how to fix the issue in the dma-buf framework. + */ +static struct drm_gem_object *gm12u320_gem_prime_import(struct drm_device *dev, + struct dma_buf *dma_buf) +{ + struct gm12u320_device *gm12u320 = to_gm12u320(dev); + + if (!gm12u320->dmadev) + return ERR_PTR(-ENODEV); + + return drm_gem_prime_import_dev(dev, dma_buf, gm12u320->dmadev); +} + DEFINE_DRM_GEM_FOPS(gm12u320_fops); static const struct drm_driver gm12u320_drm_driver = { @@ -614,6 +631,7 @@ static const struct drm_driver gm12u320_drm_driver = { .fops = &gm12u320_fops, DRM_GEM_SHMEM_DRIVER_OPS, + .gem_prime_import = gm12u320_gem_prime_import, }; static const struct drm_mode_config_funcs gm12u320_mode_config_funcs = { @@ -640,15 +658,18 @@ static int gm12u320_usb_probe(struct usb_interface *interface, struct gm12u320_device, dev); if (IS_ERR(gm12u320)) return PTR_ERR(gm12u320); + dev = &gm12u320->dev; + + gm12u320->dmadev = usb_intf_get_dma_device(to_usb_interface(dev->dev)); + if (!gm12u320->dmadev) + drm_warn(dev, "buffer sharing not supported"); /* not an error */ INIT_DELAYED_WORK(&gm12u320->fb_update.work, gm12u320_fb_update_work); mutex_init(&gm12u320->fb_update.lock); - dev = &gm12u320->dev; - ret = drmm_mode_config_init(dev); if (ret) - return ret; + goto err_put_device; dev->mode_config.min_width = GM12U320_USER_WIDTH; dev->mode_config.max_width = GM12U320_USER_WIDTH; @@ -658,15 +679,15 @@ static int gm12u320_usb_probe(struct usb_interface *interface, ret = gm12u320_usb_alloc(gm12u320); if (ret) - return ret; + goto err_put_device; ret = gm12u320_set_ecomode(gm12u320); if (ret) - return ret; + goto err_put_device; ret = gm12u320_conn_init(gm12u320); if (ret) - return ret; + goto err_put_device; ret = drm_simple_display_pipe_init(&gm12u320->dev, &gm12u320->pipe, @@ -676,24 +697,31 @@ static int gm12u320_usb_probe(struct usb_interface *interface, gm12u320_pipe_modifiers, &gm12u320->conn); if (ret) - return ret; + goto err_put_device; drm_mode_config_reset(dev); usb_set_intfdata(interface, dev); ret = drm_dev_register(dev, 0); if (ret) - return ret; + goto err_put_device; drm_fbdev_generic_setup(dev, 0); return 0; + +err_put_device: + put_device(gm12u320->dmadev); + return ret; } static void gm12u320_usb_disconnect(struct usb_interface *interface) { struct drm_device *dev = usb_get_intfdata(interface); + struct gm12u320_device *gm12u320 = to_gm12u320(dev); + put_device(gm12u320->dmadev); + gm12u320->dmadev = NULL; drm_dev_unplug(dev); drm_atomic_helper_shutdown(dev); } diff --git a/drivers/gpu/drm/udl/udl_drv.c b/drivers/gpu/drm/udl/udl_drv.c index 9269092697d8c..5703277c6f527 100644 --- a/drivers/gpu/drm/udl/udl_drv.c +++ b/drivers/gpu/drm/udl/udl_drv.c @@ -32,6 +32,22 @@ static int udl_usb_resume(struct usb_interface *interface) return drm_mode_config_helper_resume(dev); } +/* + * FIXME: Dma-buf sharing requires DMA support by the importing device. + * This function is a workaround to make USB devices work as well. + * See todo.rst for how to fix the issue in the dma-buf framework. + */ +static struct drm_gem_object *udl_driver_gem_prime_import(struct drm_device *dev, + struct dma_buf *dma_buf) +{ + struct udl_device *udl = to_udl(dev); + + if (!udl->dmadev) + return ERR_PTR(-ENODEV); + + return drm_gem_prime_import_dev(dev, dma_buf, udl->dmadev); +} + DEFINE_DRM_GEM_FOPS(udl_driver_fops); static const struct drm_driver driver = { @@ -40,6 +56,7 @@ static const struct drm_driver driver = { /* GEM hooks */ .fops = &udl_driver_fops, DRM_GEM_SHMEM_DRIVER_OPS, + .gem_prime_import = udl_driver_gem_prime_import, .name = DRIVER_NAME, .desc = DRIVER_DESC, diff --git a/drivers/gpu/drm/udl/udl_drv.h b/drivers/gpu/drm/udl/udl_drv.h index 875e73551ae98..cc16a13316e4e 100644 --- a/drivers/gpu/drm/udl/udl_drv.h +++ b/drivers/gpu/drm/udl/udl_drv.h @@ -50,6 +50,7 @@ struct urb_list { struct udl_device { struct drm_device drm; struct device *dev; + struct device *dmadev; struct drm_simple_display_pipe display_pipe; diff --git a/drivers/gpu/drm/udl/udl_main.c b/drivers/gpu/drm/udl/udl_main.c index 0e2a376cb0752..853f147036f6b 100644 --- a/drivers/gpu/drm/udl/udl_main.c +++ b/drivers/gpu/drm/udl/udl_main.c @@ -315,6 +315,10 @@ int udl_init(struct udl_device *udl) DRM_DEBUG("\n"); + udl->dmadev = usb_intf_get_dma_device(to_usb_interface(dev->dev)); + if (!udl->dmadev) + drm_warn(dev, "buffer sharing not supported"); /* not an error */ + mutex_init(&udl->gem_lock); if (!udl_parse_vendor_descriptor(udl)) { @@ -343,12 +347,18 @@ int udl_init(struct udl_device *udl) err: if (udl->urbs.count) udl_free_urb_list(dev); + put_device(udl->dmadev); DRM_ERROR("%d\n", ret); return ret; } int udl_drop_usb(struct drm_device *dev) { + struct udl_device *udl = to_udl(dev); + udl_free_urb_list(dev); + put_device(udl->dmadev); + udl->dmadev = NULL; + return 0; } diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c index 8f07b05161009..a566bb494e246 100644 --- a/drivers/usb/core/usb.c +++ b/drivers/usb/core/usb.c @@ -748,6 +748,38 @@ void usb_put_intf(struct usb_interface *intf) } EXPORT_SYMBOL_GPL(usb_put_intf); +/** + * usb_intf_get_dma_device - acquire a reference on the usb interface's DMA endpoint + * @intf: the usb interface + * + * While a USB device cannot perform DMA operations by itself, many USB + * controllers can. A call to usb_intf_get_dma_device() returns the DMA endpoint + * for the given USB interface, if any. The returned device structure must be + * released with put_device(). + * + * See also usb_get_dma_device(). + * + * Returns: A reference to the usb interface's DMA endpoint; or NULL if none + * exists. + */ +struct device *usb_intf_get_dma_device(struct usb_interface *intf) +{ + struct usb_device *udev = interface_to_usbdev(intf); + struct device *dmadev; + + if (!udev->bus) + return NULL; + + dmadev = get_device(udev->bus->sysdev); + if (!dmadev || !dmadev->dma_mask) { + put_device(dmadev); + return NULL; + } + + return dmadev; +} +EXPORT_SYMBOL_GPL(usb_intf_get_dma_device); + /* USB device locking * * USB devices and interfaces are locked using the semaphore in their diff --git a/include/linux/usb.h b/include/linux/usb.h index 7d72c4e0713c1..d6a41841b93e4 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -746,6 +746,8 @@ extern int usb_lock_device_for_reset(struct usb_device *udev, extern int usb_reset_device(struct usb_device *dev); extern void usb_queue_reset_device(struct usb_interface *dev); +extern struct device *usb_intf_get_dma_device(struct usb_interface *intf); + #ifdef CONFIG_ACPI extern int usb_acpi_set_power_state(struct usb_device *hdev, int index, bool enable); -- GitLab From d228f8d8749994eb5c52636090709109120ed339 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 3 Mar 2021 15:47:14 +0100 Subject: [PATCH 571/839] drm/ttm: soften TTM warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit QXL indeed unrefs pinned BOs and the warnings are spamming peoples log files. Make sure we warn only once until the QXL driver is fixed. Signed-off-by: Christian König References: https://lore.kernel.org/lkml/YD+eYcMMcdlXB8PY@alley/ Link: https://patchwork.freedesktop.org/patch/422834/ Reviewed-by: Daniel Vetter Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/ttm/ttm_bo.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 20a25660b35b8..101a68dc615b6 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -136,7 +136,8 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo, struct ttm_bo_device *bdev = bo->bdev; struct ttm_resource_manager *man; - dma_resv_assert_held(bo->base.resv); + if (!bo->deleted) + dma_resv_assert_held(bo->base.resv); if (bo->pin_count) { ttm_bo_del_from_lru(bo); @@ -508,8 +509,11 @@ static void ttm_bo_release(struct kref *kref) * Make pinned bos immediately available to * shrinkers, now that they are queued for * destruction. + * + * FIXME: QXL is triggering this. Can be removed when the + * driver is fixed. */ - if (WARN_ON(bo->pin_count)) { + if (WARN_ON_ONCE(bo->pin_count)) { bo->pin_count = 0; ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL); } -- GitLab From ca63d76fd2319db984f2875992643f900caf2c72 Mon Sep 17 00:00:00 2001 From: Anthony DeRossi Date: Tue, 2 Mar 2021 17:17:25 -0800 Subject: [PATCH 572/839] drm/ttm: Fix TTM page pool accounting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Freed pages are not subtracted from the allocated_pages counter in ttm_pool_type_fini(), causing a leak in the count on device removal. The next shrinker invocation loops forever trying to free pages that are no longer in the pool: rcu: INFO: rcu_sched self-detected stall on CPU rcu: 3-....: (9998 ticks this GP) idle=54e/1/0x4000000000000000 softirq=434857/434857 fqs=2237 (t=10001 jiffies g=2194533 q=49211) NMI backtrace for cpu 3 CPU: 3 PID: 1034 Comm: kswapd0 Tainted: P O 5.11.0-com #1 Hardware name: System manufacturer System Product Name/PRIME X570-PRO, BIOS 1405 11/19/2019 Call Trace: ... sysvec_apic_timer_interrupt+0x77/0x80 asm_sysvec_apic_timer_interrupt+0x12/0x20 RIP: 0010:mutex_unlock+0x16/0x20 Code: e7 48 8b 70 10 e8 7a 53 77 ff eb aa e8 43 6c ff ff 0f 1f 00 65 48 8b 14 25 00 6d 01 00 31 c9 48 89 d0 f0 48 0f b1 0f 48 39 c2 <74> 05 e9 e3 fe ff ff c3 66 90 48 8b 47 20 48 85 c0 74 0f 8b 50 10 RSP: 0018:ffffbdb840797be8 EFLAGS: 00000246 RAX: ffff9ff445a41c00 RBX: ffffffffc02a9ef8 RCX: 0000000000000000 RDX: ffff9ff445a41c00 RSI: ffffbdb840797c78 RDI: ffffffffc02a9ac0 RBP: 0000000000000080 R08: 0000000000000000 R09: ffffbdb840797c80 R10: 0000000000000000 R11: fffffffffffffff5 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000084 R15: ffffffffc02a9a60 ttm_pool_shrink+0x7d/0x90 [ttm] ttm_pool_shrinker_scan+0x5/0x20 [ttm] do_shrink_slab+0x13a/0x1a0 ... debugfs shows the incorrect total: $ cat /sys/kernel/debug/dri/0/ttm_page_pool --- 0--- --- 1--- --- 2--- --- 3--- --- 4--- --- 5--- --- 6--- --- 7--- --- 8--- --- 9--- ---10--- wc : 0 0 0 0 0 0 0 0 0 0 0 uc : 0 0 0 0 0 0 0 0 0 0 0 wc 32 : 0 0 0 0 0 0 0 0 0 0 0 uc 32 : 0 0 0 0 0 0 0 0 0 0 0 DMA uc : 0 0 0 0 0 0 0 0 0 0 0 DMA wc : 0 0 0 0 0 0 0 0 0 0 0 DMA : 0 0 0 0 0 0 0 0 0 0 0 total : 3029 of 8244261 Using ttm_pool_type_take() to remove pages from the pool before freeing them correctly accounts for the freed pages. Fixes: d099fc8f540a ("drm/ttm: new TT backend allocation pool v3") Signed-off-by: Anthony DeRossi Link: https://patchwork.freedesktop.org/patch/msgid/20210303011723.22512-1-ajderossi@gmail.com Reviewed-by: Christian König Signed-off-by: Christian König Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/ttm/ttm_pool.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index 6e27cb1bf48b2..4eb6efb8b8c02 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -268,13 +268,13 @@ static void ttm_pool_type_init(struct ttm_pool_type *pt, struct ttm_pool *pool, /* Remove a pool_type from the global shrinker list and free all pages */ static void ttm_pool_type_fini(struct ttm_pool_type *pt) { - struct page *p, *tmp; + struct page *p; mutex_lock(&shrinker_lock); list_del(&pt->shrinker_list); mutex_unlock(&shrinker_lock); - list_for_each_entry_safe(p, tmp, &pt->pages, lru) + while ((p = ttm_pool_type_take(pt))) ttm_pool_free_page(pt->pool, pt->caching, pt->order, p); } -- GitLab From 738acd49eb018feb873e0fac8f9517493f6ce2c7 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 4 Mar 2021 09:49:28 +0000 Subject: [PATCH 573/839] qxl: Fix uninitialised struct field head.surface_id The surface_id struct field in head is not being initialized and static analysis warns that this is being passed through to dev->monitors_config->heads[i] on an assignment. Clear up this warning by initializing it to zero. Addresses-Coverity: ("Uninitialized scalar variable") Fixes: a6d3c4d79822 ("qxl: hook monitors_config updates into crtc, not encoder.") Signed-off-by: Colin Ian King Link: http://patchwork.freedesktop.org/patch/msgid/20210304094928.2280722-1-colin.king@canonical.com Signed-off-by: Gerd Hoffmann Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/qxl/qxl_display.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c index 012bce0cdb65c..10738e04c09b8 100644 --- a/drivers/gpu/drm/qxl/qxl_display.c +++ b/drivers/gpu/drm/qxl/qxl_display.c @@ -328,6 +328,7 @@ static void qxl_crtc_update_monitors_config(struct drm_crtc *crtc, head.id = i; head.flags = 0; + head.surface_id = 0; oldcount = qdev->monitors_config->count; if (crtc->state->active) { struct drm_display_mode *mode = &crtc->mode; -- GitLab From d611b4a0907cece060699f2fd347c492451cd2aa Mon Sep 17 00:00:00 2001 From: Neil Roberts Date: Tue, 23 Feb 2021 16:51:24 +0100 Subject: [PATCH 574/839] drm/shmem-helper: Check for purged buffers in fault handler When a buffer is madvised as not needed and then purged, any attempts to access the buffer from user-space should cause a bus fault. This patch adds a check for that. Cc: stable@vger.kernel.org Fixes: 17acb9f35ed7 ("drm/shmem: Add madvise state and purge helpers") Signed-off-by: Neil Roberts Reviewed-by: Steven Price Signed-off-by: Steven Price Link: https://patchwork.freedesktop.org/patch/msgid/20210223155125.199577-2-nroberts@igalia.com Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/drm_gem_shmem_helper.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c index 9825c378dfa6d..b26139b1dc35c 100644 --- a/drivers/gpu/drm/drm_gem_shmem_helper.c +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c @@ -525,14 +525,24 @@ static vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf) struct drm_gem_object *obj = vma->vm_private_data; struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); loff_t num_pages = obj->size >> PAGE_SHIFT; + vm_fault_t ret; struct page *page; - if (vmf->pgoff >= num_pages || WARN_ON_ONCE(!shmem->pages)) - return VM_FAULT_SIGBUS; + mutex_lock(&shmem->pages_lock); + + if (vmf->pgoff >= num_pages || + WARN_ON_ONCE(!shmem->pages) || + shmem->madv < 0) { + ret = VM_FAULT_SIGBUS; + } else { + page = shmem->pages[vmf->pgoff]; - page = shmem->pages[vmf->pgoff]; + ret = vmf_insert_page(vma, vmf->address, page); + } - return vmf_insert_page(vma, vmf->address, page); + mutex_unlock(&shmem->pages_lock); + + return ret; } static void drm_gem_shmem_vm_open(struct vm_area_struct *vma) -- GitLab From 11d5a4745e00e73745774671dbf2fb07bd6e2363 Mon Sep 17 00:00:00 2001 From: Neil Roberts Date: Tue, 23 Feb 2021 16:51:25 +0100 Subject: [PATCH 575/839] drm/shmem-helper: Don't remove the offset in vm_area_struct pgoff When mmapping the shmem, it would previously adjust the pgoff in the vm_area_struct to remove the fake offset that is added to be able to identify the buffer. This patch removes the adjustment and makes the fault handler use the vm_fault address to calculate the page offset instead. Although using this address is apparently discouraged, several DRM drivers seem to be doing it anyway. The problem with removing the pgoff is that it prevents drm_vma_node_unmap from working because that searches the mapping tree by address. That doesn't work because all of the mappings are at offset 0. drm_vma_node_unmap is being used by the shmem helpers when purging the buffer. This fixes a bug in Panfrost which is using drm_gem_shmem_purge. Without this the mapping for the purged buffer can still be accessed which might mean it would access random pages from other buffers v2: Don't check whether the unsigned page_offset is less than 0. Cc: stable@vger.kernel.org Fixes: 17acb9f35ed7 ("drm/shmem: Add madvise state and purge helpers") Signed-off-by: Neil Roberts Reviewed-by: Steven Price Signed-off-by: Steven Price Link: https://patchwork.freedesktop.org/patch/msgid/20210223155125.199577-3-nroberts@igalia.com Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/drm_gem_shmem_helper.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c index b26139b1dc35c..5b5c095e86a94 100644 --- a/drivers/gpu/drm/drm_gem_shmem_helper.c +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c @@ -527,15 +527,19 @@ static vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf) loff_t num_pages = obj->size >> PAGE_SHIFT; vm_fault_t ret; struct page *page; + pgoff_t page_offset; + + /* We don't use vmf->pgoff since that has the fake offset */ + page_offset = (vmf->address - vma->vm_start) >> PAGE_SHIFT; mutex_lock(&shmem->pages_lock); - if (vmf->pgoff >= num_pages || + if (page_offset >= num_pages || WARN_ON_ONCE(!shmem->pages) || shmem->madv < 0) { ret = VM_FAULT_SIGBUS; } else { - page = shmem->pages[vmf->pgoff]; + page = shmem->pages[page_offset]; ret = vmf_insert_page(vma, vmf->address, page); } @@ -591,9 +595,6 @@ int drm_gem_shmem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) struct drm_gem_shmem_object *shmem; int ret; - /* Remove the fake offset */ - vma->vm_pgoff -= drm_vma_node_start(&obj->vma_node); - if (obj->import_attach) { /* Drop the reference drm_gem_mmap_obj() acquired.*/ drm_gem_object_put(obj); -- GitLab From fa0c16caf3d73ab4d2e5d6fa2ef2394dbec91791 Mon Sep 17 00:00:00 2001 From: Artem Lapkin Date: Tue, 2 Mar 2021 12:22:02 +0800 Subject: [PATCH 576/839] drm: meson_drv add shutdown function Problem: random stucks on reboot stage about 1/20 stuck/reboots // debug kernel log [ 4.496660] reboot: kernel restart prepare CMD:(null) [ 4.498114] meson_ee_pwrc c883c000.system-controller:power-controller: shutdown begin [ 4.503949] meson_ee_pwrc c883c000.system-controller:power-controller: shutdown domain 0:VPU... ...STUCK... Solution: add shutdown function to meson_drm driver // debug kernel log [ 5.231896] reboot: kernel restart prepare CMD:(null) [ 5.246135] [drm:meson_drv_shutdown] ... [ 5.259271] meson_ee_pwrc c883c000.system-controller:power-controller: shutdown begin [ 5.274688] meson_ee_pwrc c883c000.system-controller:power-controller: shutdown domain 0:VPU... [ 5.338331] reboot: Restarting system [ 5.358293] psci: PSCI_0_2_FN_SYSTEM_RESET reboot_mode:0 cmd:(null) bl31 reboot reason: 0xd bl31 reboot reason: 0x0 system cmd 1. ...REBOOT... Tested: on VIM1 VIM2 VIM3 VIM3L khadas sbcs - 1000+ successful reboots and Odroid boards, WeTek Play2 (GXBB) Fixes: bbbe775ec5b5 ("drm: Add support for Amlogic Meson Graphic Controller") Signed-off-by: Artem Lapkin Tested-by: Christian Hewitt Acked-by: Neil Armstrong Acked-by: Kevin Hilman Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20210302042202.3728113-1-art@khadas.com Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/meson/meson_drv.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/gpu/drm/meson/meson_drv.c b/drivers/gpu/drm/meson/meson_drv.c index 42c5d3246cfcb..453d8b4c5763d 100644 --- a/drivers/gpu/drm/meson/meson_drv.c +++ b/drivers/gpu/drm/meson/meson_drv.c @@ -482,6 +482,16 @@ static int meson_probe_remote(struct platform_device *pdev, return count; } +static void meson_drv_shutdown(struct platform_device *pdev) +{ + struct meson_drm *priv = dev_get_drvdata(&pdev->dev); + struct drm_device *drm = priv->drm; + + DRM_DEBUG_DRIVER("\n"); + drm_kms_helper_poll_fini(drm); + drm_atomic_helper_shutdown(drm); +} + static int meson_drv_probe(struct platform_device *pdev) { struct component_match *match = NULL; @@ -553,6 +563,7 @@ static const struct dev_pm_ops meson_drv_pm_ops = { static struct platform_driver meson_drm_platform_driver = { .probe = meson_drv_probe, + .shutdown = meson_drv_shutdown, .driver = { .name = "meson-drm", .of_match_table = dt_match, -- GitLab From 64e194e278673bceb68fb2dde7dbc3d812bfceb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Noralf=20Tr=C3=B8nnes?= Date: Fri, 19 Feb 2021 13:22:03 +0100 Subject: [PATCH 577/839] drm/shmem-helpers: vunmap: Don't put pages for dma-buf MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit dma-buf importing was reworked in commit 7d2cd72a9aa3 ("drm/shmem-helpers: Simplify dma-buf importing"). Before that commit drm_gem_shmem_prime_import_sg_table() did set ->pages_use_count=1 and drm_gem_shmem_vunmap_locked() could call drm_gem_shmem_put_pages() unconditionally. Now without the use count set, put pages is called also on dma-bufs. Fix this by only putting pages if it's not imported. Signed-off-by: Noralf Trønnes Fixes: 7d2cd72a9aa3 ("drm/shmem-helpers: Simplify dma-buf importing") Cc: Daniel Vetter Cc: Thomas Zimmermann Acked-by: Thomas Zimmermann Tested-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20210219122203.51130-1-noralf@tronnes.org (cherry picked from commit cdea72518a2b38207146e92e1c9e2fac15975679) Signed-off-by: Thomas Zimmermann Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/drm_gem_shmem_helper.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c index 5b5c095e86a94..6d625cee7a6af 100644 --- a/drivers/gpu/drm/drm_gem_shmem_helper.c +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c @@ -357,13 +357,14 @@ static void drm_gem_shmem_vunmap_locked(struct drm_gem_shmem_object *shmem, if (--shmem->vmap_use_count > 0) return; - if (obj->import_attach) + if (obj->import_attach) { dma_buf_vunmap(obj->import_attach->dmabuf, map); - else + } else { vunmap(shmem->vaddr); + drm_gem_shmem_put_pages(shmem); + } shmem->vaddr = NULL; - drm_gem_shmem_put_pages(shmem); } /* -- GitLab From de066e116306baf3a6a62691ac63cfc0b1dabddb Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 22 Feb 2021 11:06:43 +0100 Subject: [PATCH 578/839] drm/compat: Clear bounce structures Some of them have gaps, or fields we don't clear. Native ioctl code does full copies plus zero-extends on size mismatch, so nothing can leak. But compat is more hand-rolled so need to be careful. None of these matter for performance, so just memset. Also I didn't fix up the CONFIG_DRM_LEGACY or CONFIG_DRM_AGP ioctl, those are security holes anyway. Acked-by: Maxime Ripard Reported-by: syzbot+620cf21140fc7e772a5d@syzkaller.appspotmail.com # vblank ioctl Cc: syzbot+620cf21140fc7e772a5d@syzkaller.appspotmail.com Cc: stable@vger.kernel.org Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210222100643.400935-1-daniel.vetter@ffwll.ch (cherry picked from commit e926c474ebee404441c838d18224cd6f246a71b7) Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/drm_ioc32.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/gpu/drm/drm_ioc32.c b/drivers/gpu/drm/drm_ioc32.c index f86448ab1fe04..dc734d4828a17 100644 --- a/drivers/gpu/drm/drm_ioc32.c +++ b/drivers/gpu/drm/drm_ioc32.c @@ -99,6 +99,8 @@ static int compat_drm_version(struct file *file, unsigned int cmd, if (copy_from_user(&v32, (void __user *)arg, sizeof(v32))) return -EFAULT; + memset(&v, 0, sizeof(v)); + v = (struct drm_version) { .name_len = v32.name_len, .name = compat_ptr(v32.name), @@ -137,6 +139,9 @@ static int compat_drm_getunique(struct file *file, unsigned int cmd, if (copy_from_user(&uq32, (void __user *)arg, sizeof(uq32))) return -EFAULT; + + memset(&uq, 0, sizeof(uq)); + uq = (struct drm_unique){ .unique_len = uq32.unique_len, .unique = compat_ptr(uq32.unique), @@ -265,6 +270,8 @@ static int compat_drm_getclient(struct file *file, unsigned int cmd, if (copy_from_user(&c32, argp, sizeof(c32))) return -EFAULT; + memset(&client, 0, sizeof(client)); + client.idx = c32.idx; err = drm_ioctl_kernel(file, drm_getclient, &client, 0); @@ -852,6 +859,8 @@ static int compat_drm_wait_vblank(struct file *file, unsigned int cmd, if (copy_from_user(&req32, argp, sizeof(req32))) return -EFAULT; + memset(&req, 0, sizeof(req)); + req.request.type = req32.request.type; req.request.sequence = req32.request.sequence; req.request.signal = req32.request.signal; @@ -889,6 +898,8 @@ static int compat_drm_mode_addfb2(struct file *file, unsigned int cmd, struct drm_mode_fb_cmd2 req64; int err; + memset(&req64, 0, sizeof(req64)); + if (copy_from_user(&req64, argp, offsetof(drm_mode_fb_cmd232_t, modifier))) return -EFAULT; -- GitLab From f09f9f93afad770a04b35235a0aa465fcc8d6e3d Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 26 Feb 2021 11:37:47 +0100 Subject: [PATCH 579/839] media: rc: compile rc-cec.c into rc-core The rc-cec keymap is unusual in that it can't be built as a module, instead it is registered directly in rc-main.c if CONFIG_MEDIA_CEC_RC is set. This is because it can be called from drm_dp_cec_set_edid() via cec_register_adapter() in an asynchronous context, and it is not allowed to use request_module() to load rc-cec.ko in that case. Trying to do so results in a 'WARN_ON_ONCE(wait && current_is_async())'. Since this keymap is only used if CONFIG_MEDIA_CEC_RC is set, we just compile this keymap into the rc-core module and never as a separate module. Signed-off-by: Hans Verkuil Fixes: 2c6d1fffa1d9 (drm: add support for DisplayPort CEC-Tunneling-over-AUX) Reported-by: Hans de Goede Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab --- drivers/media/rc/Makefile | 1 + drivers/media/rc/keymaps/Makefile | 1 - drivers/media/rc/keymaps/rc-cec.c | 28 +++++++++++----------------- drivers/media/rc/rc-main.c | 6 ++++++ include/media/rc-map.h | 7 +++++++ 5 files changed, 25 insertions(+), 18 deletions(-) diff --git a/drivers/media/rc/Makefile b/drivers/media/rc/Makefile index 5bb2932ab1195..ff6a8fc4c38e5 100644 --- a/drivers/media/rc/Makefile +++ b/drivers/media/rc/Makefile @@ -5,6 +5,7 @@ obj-y += keymaps/ obj-$(CONFIG_RC_CORE) += rc-core.o rc-core-y := rc-main.o rc-ir-raw.o rc-core-$(CONFIG_LIRC) += lirc_dev.o +rc-core-$(CONFIG_MEDIA_CEC_RC) += keymaps/rc-cec.o rc-core-$(CONFIG_BPF_LIRC_MODE2) += bpf-lirc.o obj-$(CONFIG_IR_NEC_DECODER) += ir-nec-decoder.o obj-$(CONFIG_IR_RC5_DECODER) += ir-rc5-decoder.o diff --git a/drivers/media/rc/keymaps/Makefile b/drivers/media/rc/keymaps/Makefile index b252a1d2ebd66..cc6662e1903f5 100644 --- a/drivers/media/rc/keymaps/Makefile +++ b/drivers/media/rc/keymaps/Makefile @@ -21,7 +21,6 @@ obj-$(CONFIG_RC_MAP) += rc-adstech-dvb-t-pci.o \ rc-behold.o \ rc-behold-columbus.o \ rc-budget-ci-old.o \ - rc-cec.o \ rc-cinergy-1400.o \ rc-cinergy.o \ rc-d680-dmb.o \ diff --git a/drivers/media/rc/keymaps/rc-cec.c b/drivers/media/rc/keymaps/rc-cec.c index 3e3bd11092b45..068e22aeac8c3 100644 --- a/drivers/media/rc/keymaps/rc-cec.c +++ b/drivers/media/rc/keymaps/rc-cec.c @@ -1,5 +1,15 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* Keytable for the CEC remote control + * + * This keymap is unusual in that it can't be built as a module, + * instead it is registered directly in rc-main.c if CONFIG_MEDIA_CEC_RC + * is set. This is because it can be called from drm_dp_cec_set_edid() via + * cec_register_adapter() in an asynchronous context, and it is not + * allowed to use request_module() to load rc-cec.ko in that case. + * + * Since this keymap is only used if CONFIG_MEDIA_CEC_RC is set, we + * just compile this keymap into the rc-core module and never as a + * separate module. * * Copyright (c) 2015 by Kamil Debski */ @@ -152,7 +162,7 @@ static struct rc_map_table cec[] = { /* 0x77-0xff: Reserved */ }; -static struct rc_map_list cec_map = { +struct rc_map_list cec_map = { .map = { .scan = cec, .size = ARRAY_SIZE(cec), @@ -160,19 +170,3 @@ static struct rc_map_list cec_map = { .name = RC_MAP_CEC, } }; - -static int __init init_rc_map_cec(void) -{ - return rc_map_register(&cec_map); -} - -static void __exit exit_rc_map_cec(void) -{ - rc_map_unregister(&cec_map); -} - -module_init(init_rc_map_cec); -module_exit(exit_rc_map_cec); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Kamil Debski"); diff --git a/drivers/media/rc/rc-main.c b/drivers/media/rc/rc-main.c index 1fd62c1dac768..8e88dc8ea6c5e 100644 --- a/drivers/media/rc/rc-main.c +++ b/drivers/media/rc/rc-main.c @@ -2069,6 +2069,9 @@ static int __init rc_core_init(void) led_trigger_register_simple("rc-feedback", &led_feedback); rc_map_register(&empty_map); +#ifdef CONFIG_MEDIA_CEC_RC + rc_map_register(&cec_map); +#endif return 0; } @@ -2078,6 +2081,9 @@ static void __exit rc_core_exit(void) lirc_dev_exit(); class_unregister(&rc_class); led_trigger_unregister_simple(led_feedback); +#ifdef CONFIG_MEDIA_CEC_RC + rc_map_unregister(&cec_map); +#endif rc_map_unregister(&empty_map); } diff --git a/include/media/rc-map.h b/include/media/rc-map.h index 999b750bc6b88..30f138ebab6f0 100644 --- a/include/media/rc-map.h +++ b/include/media/rc-map.h @@ -175,6 +175,13 @@ struct rc_map_list { struct rc_map map; }; +#ifdef CONFIG_MEDIA_CEC_RC +/* + * rc_map_list from rc-cec.c + */ +extern struct rc_map_list cec_map; +#endif + /* Routines from rc-map.c */ /** -- GitLab From 8a7e27fd5cd696ba564a3f62cedef7269cfd0723 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Fri, 5 Feb 2021 23:51:39 +0100 Subject: [PATCH 580/839] media: usbtv: Fix deadlock on suspend usbtv doesn't support power management, so on system suspend the .disconnect callback of the driver is called. The teardown sequence includes a call to snd_card_free. Its implementation waits until the refcount of the sound card device drops to zero, however, if its file is open, snd_card_file_add takes a reference, which can't be dropped during the suspend, because the userspace processes are already frozen at this point. snd_card_free waits for completion forever, leading to a hang on suspend. This commit fixes this deadlock condition by replacing snd_card_free with snd_card_free_when_closed, that doesn't wait until all references are released, allowing suspend to progress. Fixes: 63ddf68de52e ("[media] usbtv: add audio support") Signed-off-by: Maxim Mikityanskiy Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/usb/usbtv/usbtv-audio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/usb/usbtv/usbtv-audio.c b/drivers/media/usb/usbtv/usbtv-audio.c index b57e94fb19770..333bd305a4f9f 100644 --- a/drivers/media/usb/usbtv/usbtv-audio.c +++ b/drivers/media/usb/usbtv/usbtv-audio.c @@ -371,7 +371,7 @@ void usbtv_audio_free(struct usbtv *usbtv) cancel_work_sync(&usbtv->snd_trigger); if (usbtv->snd && usbtv->udev) { - snd_card_free(usbtv->snd); + snd_card_free_when_closed(usbtv->snd); usbtv->snd = NULL; } } -- GitLab From ac8d82f586c8692b501cb974604a71ef0e22a04c Mon Sep 17 00:00:00 2001 From: Biju Das Date: Mon, 1 Mar 2021 13:08:27 +0100 Subject: [PATCH 581/839] media: v4l: vsp1: Fix bru null pointer access RZ/G2L SoC has only BRS. This patch fixes null pointer access,when only BRS is enabled. Fixes: cbb7fa49c7466("media: v4l: vsp1: Rename BRU to BRx") Signed-off-by: Biju Das Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/vsp1/vsp1_drm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/platform/vsp1/vsp1_drm.c b/drivers/media/platform/vsp1/vsp1_drm.c index 86d5e3f4b1ffc..f6d2f47a4058f 100644 --- a/drivers/media/platform/vsp1/vsp1_drm.c +++ b/drivers/media/platform/vsp1/vsp1_drm.c @@ -245,7 +245,7 @@ static int vsp1_du_pipeline_setup_brx(struct vsp1_device *vsp1, brx = &vsp1->bru->entity; else if (pipe->brx && !drm_pipe->force_brx_release) brx = pipe->brx; - else if (!vsp1->bru->entity.pipe) + else if (vsp1_feature(vsp1, VSP1_HAS_BRU) && !vsp1->bru->entity.pipe) brx = &vsp1->bru->entity; else brx = &vsp1->brs->entity; -- GitLab From 6732f313938027a910e1f7351951ff52c0329e70 Mon Sep 17 00:00:00 2001 From: Biju Das Date: Mon, 1 Mar 2021 13:08:28 +0100 Subject: [PATCH 582/839] media: v4l: vsp1: Fix uif null pointer access RZ/G2L SoC has no UIF. This patch fixes null pointer access, when UIF module is not used. Fixes: 5e824f989e6e8("media: v4l: vsp1: Integrate DISCOM in display pipeline") Signed-off-by: Biju Das Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/vsp1/vsp1_drm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/media/platform/vsp1/vsp1_drm.c b/drivers/media/platform/vsp1/vsp1_drm.c index f6d2f47a4058f..06f74d410973e 100644 --- a/drivers/media/platform/vsp1/vsp1_drm.c +++ b/drivers/media/platform/vsp1/vsp1_drm.c @@ -462,9 +462,9 @@ static int vsp1_du_pipeline_setup_inputs(struct vsp1_device *vsp1, * make sure it is present in the pipeline's list of entities if it * wasn't already. */ - if (!use_uif) { + if (drm_pipe->uif && !use_uif) { drm_pipe->uif->pipe = NULL; - } else if (!drm_pipe->uif->pipe) { + } else if (drm_pipe->uif && !drm_pipe->uif->pipe) { drm_pipe->uif->pipe = pipe; list_add_tail(&drm_pipe->uif->list_pipe, &pipe->entities); } -- GitLab From 2025a48cfd92d541c5ee47deee97f8a46d00c4ac Mon Sep 17 00:00:00 2001 From: Dafna Hirschfeld Date: Mon, 1 Mar 2021 18:18:35 +0100 Subject: [PATCH 583/839] media: rkisp1: params: fix wrong bits settings The histogram mode is set using 'rkisp1_params_set_bits'. Only the bits of the mode should be the value argument for that function. Otherwise bits outside the mode mask are turned on which is not what was intended. Fixes: bae1155cf579 ("media: staging: rkisp1: add output device for parameters") Signed-off-by: Dafna Hirschfeld Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/rockchip/rkisp1/rkisp1-params.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-params.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-params.c index aa5f45749543b..a60c302ef2676 100644 --- a/drivers/media/platform/rockchip/rkisp1/rkisp1-params.c +++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-params.c @@ -1288,7 +1288,6 @@ static void rkisp1_params_config_parameter(struct rkisp1_params *params) memset(hst.hist_weight, 0x01, sizeof(hst.hist_weight)); rkisp1_hst_config(params, &hst); rkisp1_param_set_bits(params, RKISP1_CIF_ISP_HIST_PROP, - ~RKISP1_CIF_ISP_HIST_PROP_MODE_MASK | rkisp1_hst_params_default_config.mode); /* set the range */ -- GitLab From e6ad55988b968bd4c54fd182d0bd3d7fcb969779 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Wed, 3 Mar 2021 14:47:17 -0800 Subject: [PATCH 584/839] nvme: set max_zone_append_sectors nvme_revalidate_zones The chunk_sectors value affects max_zone_append_sectors. Signed-off-by: Chaitanya Kulkarni Reviewed-by: Keith Busch Tested-by: Kanchan Joshi Signed-off-by: Christoph Hellwig --- drivers/nvme/host/zns.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c index c7e3ec561ba0c..bc2f344f0ae01 100644 --- a/drivers/nvme/host/zns.c +++ b/drivers/nvme/host/zns.c @@ -9,7 +9,13 @@ int nvme_revalidate_zones(struct nvme_ns *ns) { - return blk_revalidate_disk_zones(ns->disk, NULL); + struct request_queue *q = ns->queue; + int ret; + + ret = blk_revalidate_disk_zones(ns->disk, NULL); + if (!ret) + blk_queue_max_zone_append_sectors(q, ns->ctrl->max_zone_append); + return ret; } static int nvme_set_max_append(struct nvme_ctrl *ctrl) @@ -107,7 +113,6 @@ int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf) blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q); blk_queue_max_open_zones(q, le32_to_cpu(id->mor) + 1); blk_queue_max_active_zones(q, le32_to_cpu(id->mar) + 1); - blk_queue_max_zone_append_sectors(q, ns->ctrl->max_zone_append); free_data: kfree(id); return status; -- GitLab From d95c1f4179a7f3ea8aa728ed00252a8ed0f8158f Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 26 Feb 2021 08:17:25 +0100 Subject: [PATCH 585/839] nvme: simplify error logic in nvme_validate_ns() We only should remove namespaces when we get fatal error back from the device or when the namespace IDs have changed. So instead of painfully masking out error numbers which might indicate that the error should be ignored we could use an NVME status code to indicated when the namespace should be removed. That simplifies the final logic and makes it less error-prone. Signed-off-by: Hannes Reinecke Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Daniel Wagner Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index e68a8c4ac5a6e..e084120c3453c 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1440,7 +1440,7 @@ static int nvme_identify_ns(struct nvme_ctrl *ctrl, unsigned nsid, goto out_free_id; } - error = -ENODEV; + error = NVME_SC_INVALID_NS | NVME_SC_DNR; if ((*id)->ncap == 0) /* namespace not allocated or attached */ goto out_free_id; @@ -4038,7 +4038,7 @@ static void nvme_ns_remove_by_nsid(struct nvme_ctrl *ctrl, u32 nsid) static void nvme_validate_ns(struct nvme_ns *ns, struct nvme_ns_ids *ids) { struct nvme_id_ns *id; - int ret = -ENODEV; + int ret = NVME_SC_INVALID_NS | NVME_SC_DNR; if (test_bit(NVME_NS_DEAD, &ns->flags)) goto out; @@ -4047,7 +4047,7 @@ static void nvme_validate_ns(struct nvme_ns *ns, struct nvme_ns_ids *ids) if (ret) goto out; - ret = -ENODEV; + ret = NVME_SC_INVALID_NS | NVME_SC_DNR; if (!nvme_ns_ids_equal(&ns->head->ids, ids)) { dev_err(ns->ctrl->device, "identifiers changed for nsid %d\n", ns->head->ns_id); @@ -4065,7 +4065,7 @@ out: * * TODO: we should probably schedule a delayed retry here. */ - if (ret && ret != -ENOMEM && !(ret > 0 && !(ret & NVME_SC_DNR))) + if (ret > 0 && (ret & NVME_SC_DNR)) nvme_ns_remove(ns); } -- GitLab From d3589381987ec879b03f8ce3039df57e87f05901 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 26 Feb 2021 08:17:26 +0100 Subject: [PATCH 586/839] nvme: add NVME_REQ_CANCELLED flag in nvme_cancel_request() NVME_REQ_CANCELLED is translated into -EINTR in nvme_submit_sync_cmd(), so we should be setting this flags during nvme_cancel_request() to ensure that the callers to nvme_submit_sync_cmd() will get the correct error code when the controller is reset. Signed-off-by: Hannes Reinecke Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Chao Leng Reviewed-by: Daniel Wagner Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index e084120c3453c..63bb1da0861e9 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -380,6 +380,7 @@ bool nvme_cancel_request(struct request *req, void *data, bool reserved) return true; nvme_req(req)->status = NVME_SC_HOST_ABORTED_CMD; + nvme_req(req)->flags |= NVME_REQ_CANCELLED; blk_mq_complete_request(req); return true; } -- GitLab From 3c7aafbc8d3d4d90430dfa126847a796c3e4ecfc Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 26 Feb 2021 08:17:27 +0100 Subject: [PATCH 587/839] nvme-fc: set NVME_REQ_CANCELLED in nvme_fc_terminate_exchange() nvme_fc_terminate_exchange() is being called when exchanges are being deleted, and as such we should be setting the NVME_REQ_CANCELLED flag to have identical behaviour on all transports. Signed-off-by: Hannes Reinecke Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: James Smart Reviewed-by: Daniel Wagner Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 20dadd86e9812..ef12a619daecb 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2443,6 +2443,7 @@ nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved) struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req); + op->nreq.flags |= NVME_REQ_CANCELLED; __nvme_fc_abort_op(ctrl, op); return true; } -- GitLab From ae3afe6308b43bbf49953101d4ba2c1c481133a8 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 26 Feb 2021 08:17:28 +0100 Subject: [PATCH 588/839] nvme-fc: return NVME_SC_HOST_ABORTED_CMD when a command has been aborted When a command has been aborted we should return NVME_SC_HOST_ABORTED_CMD to be consistent with the other transports. Signed-off-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Reviewed-by: James Smart Reviewed-by: Daniel Wagner Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index ef12a619daecb..97e3424c7b032 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -1956,7 +1956,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) sizeof(op->rsp_iu), DMA_FROM_DEVICE); if (opstate == FCPOP_STATE_ABORTED) - status = cpu_to_le16(NVME_SC_HOST_PATH_ERROR << 1); + status = cpu_to_le16(NVME_SC_HOST_ABORTED_CMD << 1); else if (freq->status) { status = cpu_to_le16(NVME_SC_HOST_PATH_ERROR << 1); dev_info(ctrl->ctrl.device, -- GitLab From f20ef34d71abc1fc56b322aaa251f90f94320140 Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 8 Mar 2021 16:51:26 -0800 Subject: [PATCH 589/839] nvme-fc: fix racing controller reset and create association Recent patch to prevent calling __nvme_fc_abort_outstanding_ios in interrupt context results in a possible race condition. A controller reset results in errored io completions, which schedules error work. The change of error work to a work element allows it to fire after the ctrl state transition to NVME_CTRL_CONNECTING, causing any outstanding io (used to initialize the controller) to fail and cause problems for connect_work. Add a state check to only schedule error work if not in the RESETTING state. Fixes: 19fce0470f05 ("nvme-fc: avoid calling _nvme_fc_abort_outstanding_ios from interrupt context") Signed-off-by: Nigel Kirkland Signed-off-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 97e3424c7b032..73d0737483891 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2055,7 +2055,7 @@ done: nvme_fc_complete_rq(rq); check_error: - if (terminate_assoc) + if (terminate_assoc && ctrl->ctrl.state != NVME_CTRL_RESETTING) queue_work(nvme_reset_wq, &ctrl->ioerr_work); } -- GitLab From 0ec84df4953bd42c6583a555773f1d4996a061eb Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Mon, 8 Mar 2021 20:58:21 -0800 Subject: [PATCH 590/839] nvme-core: check ctrl css before setting up zns Ensure multiple Command Sets are supported before starting to setup a ZNS namespace. Signed-off-by: Chaitanya Kulkarni [hch: move the check around a bit] Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 63bb1da0861e9..82ad5eef9d0c3 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4096,6 +4096,12 @@ static void nvme_validate_or_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) nsid); break; } + if (!nvme_multi_css(ctrl)) { + dev_warn(ctrl->device, + "command set not reported for nsid: %d\n", + ns->head->ns_id); + break; + } nvme_alloc_ns(ctrl, nsid, &ids); break; default: -- GitLab From abec6561fc4e0fbb19591a0b35676d8c783b5493 Mon Sep 17 00:00:00 2001 From: Lv Yunlong Date: Wed, 10 Mar 2021 21:44:13 -0800 Subject: [PATCH 591/839] nvme-rdma: Fix a use after free in nvmet_rdma_write_data_done In nvmet_rdma_write_data_done, rsp is recoverd by wc->wr_cqe and freed by nvmet_rdma_release_rsp(). But after that, pr_info() used the freed chunk's member object and could leak the freed chunk address with wc->wr_cqe by computing the offset. Signed-off-by: Lv Yunlong Signed-off-by: Christoph Hellwig --- drivers/nvme/target/rdma.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 06b6b742bb213..6c1f3ab7649c7 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -802,9 +802,8 @@ static void nvmet_rdma_write_data_done(struct ib_cq *cq, struct ib_wc *wc) nvmet_req_uninit(&rsp->req); nvmet_rdma_release_rsp(rsp); if (wc->status != IB_WC_WR_FLUSH_ERR) { - pr_info("RDMA WRITE for CQE 0x%p failed with status %s (%d).\n", - wc->wr_cqe, ib_wc_status_msg(wc->status), - wc->status); + pr_info("RDMA WRITE for CQE failed with status %s (%d).\n", + ib_wc_status_msg(wc->status), wc->status); nvmet_rdma_error_comp(queue); } return; -- GitLab From abbb5f5929ec6c52574c430c5475c158a65c2a8c Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Wed, 10 Mar 2021 12:06:41 +0000 Subject: [PATCH 592/839] nvme-pci: add the DISABLE_WRITE_ZEROES quirk for a Samsung PM1725a This adds a quirk for Samsung PM1725a drive which fixes timeouts and I/O errors due to the fact that the controller does not properly handle the Write Zeroes command, dmesg log: nvme nvme0: I/O 528 QID 10 timeout, aborting nvme nvme0: I/O 529 QID 10 timeout, aborting nvme nvme0: I/O 530 QID 10 timeout, aborting nvme nvme0: I/O 531 QID 10 timeout, aborting nvme nvme0: I/O 532 QID 10 timeout, aborting nvme nvme0: I/O 533 QID 10 timeout, aborting nvme nvme0: I/O 534 QID 10 timeout, aborting nvme nvme0: I/O 535 QID 10 timeout, aborting nvme nvme0: Abort status: 0x0 nvme nvme0: Abort status: 0x0 nvme nvme0: Abort status: 0x0 nvme nvme0: Abort status: 0x0 nvme nvme0: Abort status: 0x0 nvme nvme0: Abort status: 0x0 nvme nvme0: Abort status: 0x0 nvme nvme0: Abort status: 0x0 nvme nvme0: I/O 528 QID 10 timeout, reset controller nvme nvme0: controller is down; will reset: CSTS=0x3, PCI_STATUS=0x10 nvme nvme0: Device not ready; aborting reset, CSTS=0x3 nvme nvme0: Device not ready; aborting reset, CSTS=0x3 nvme nvme0: Removing after probe failure status: -19 nvme0n1: detected capacity change from 6251233968 to 0 blk_update_request: I/O error, dev nvme0n1, sector 32776 op 0x1:(WRITE) flags 0x3000 phys_seg 6 prio class 0 blk_update_request: I/O error, dev nvme0n1, sector 113319936 op 0x9:(WRITE_ZEROES) flags 0x800 phys_seg 0 prio class 0 Buffer I/O error on dev nvme0n1p2, logical block 1, lost async page write blk_update_request: I/O error, dev nvme0n1, sector 113319680 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0 Buffer I/O error on dev nvme0n1p2, logical block 2, lost async page write blk_update_request: I/O error, dev nvme0n1, sector 113319424 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0 Buffer I/O error on dev nvme0n1p2, logical block 3, lost async page write blk_update_request: I/O error, dev nvme0n1, sector 113319168 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0 Buffer I/O error on dev nvme0n1p2, logical block 4, lost async page write blk_update_request: I/O error, dev nvme0n1, sector 113318912 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0 Buffer I/O error on dev nvme0n1p2, logical block 5, lost async page write blk_update_request: I/O error, dev nvme0n1, sector 113318656 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0 Buffer I/O error on dev nvme0n1p2, logical block 6, lost async page write blk_update_request: I/O error, dev nvme0n1, sector 113318400 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0 blk_update_request: I/O error, dev nvme0n1, sector 113318144 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0 blk_update_request: I/O error, dev nvme0n1, sector 113317888 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0 Signed-off-by: Dmitry Monakhov Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 17ab3320d28b4..7249ae74f71ff 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3246,6 +3246,7 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, { PCI_DEVICE(0x144d, 0xa822), /* Samsung PM1725a */ .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY | + NVME_QUIRK_DISABLE_WRITE_ZEROES| NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_DEVICE(0x1987, 0x5016), /* Phison E16 */ .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, }, -- GitLab From 14fbbc8297728e880070f7b077b3301a8c698ef9 Mon Sep 17 00:00:00 2001 From: Daiyue Zhang Date: Mon, 1 Mar 2021 14:10:53 +0800 Subject: [PATCH 593/839] configfs: fix a use-after-free in __configfs_open_file Commit b0841eefd969 ("configfs: provide exclusion between IO and removals") uses ->frag_dead to mark the fragment state, thus no bothering with extra refcount on config_item when opening a file. The configfs_get_config_item was removed in __configfs_open_file, but not with config_item_put. So the refcount on config_item will lost its balance, causing use-after-free issues in some occasions like this: Test: 1. Mount configfs on /config with read-only items: drwxrwx--- 289 root root 0 2021-04-01 11:55 /config drwxr-xr-x 2 root root 0 2021-04-01 11:54 /config/a --w--w--w- 1 root root 4096 2021-04-01 11:53 /config/a/1.txt ...... 2. Then run: for file in /config do echo $file grep -R 'key' $file done 3. __configfs_open_file will be called in parallel, the first one got called will do: if (file->f_mode & FMODE_READ) { if (!(inode->i_mode & S_IRUGO)) goto out_put_module; config_item_put(buffer->item); kref_put() package_details_release() kfree() the other one will run into use-after-free issues like this: BUG: KASAN: use-after-free in __configfs_open_file+0x1bc/0x3b0 Read of size 8 at addr fffffff155f02480 by task grep/13096 CPU: 0 PID: 13096 Comm: grep VIP: 00 Tainted: G W 4.14.116-kasan #1 TGID: 13096 Comm: grep Call trace: dump_stack+0x118/0x160 kasan_report+0x22c/0x294 __asan_load8+0x80/0x88 __configfs_open_file+0x1bc/0x3b0 configfs_open_file+0x28/0x34 do_dentry_open+0x2cc/0x5c0 vfs_open+0x80/0xe0 path_openat+0xd8c/0x2988 do_filp_open+0x1c4/0x2fc do_sys_open+0x23c/0x404 SyS_openat+0x38/0x48 Allocated by task 2138: kasan_kmalloc+0xe0/0x1ac kmem_cache_alloc_trace+0x334/0x394 packages_make_item+0x4c/0x180 configfs_mkdir+0x358/0x740 vfs_mkdir2+0x1bc/0x2e8 SyS_mkdirat+0x154/0x23c el0_svc_naked+0x34/0x38 Freed by task 13096: kasan_slab_free+0xb8/0x194 kfree+0x13c/0x910 package_details_release+0x524/0x56c kref_put+0xc4/0x104 config_item_put+0x24/0x34 __configfs_open_file+0x35c/0x3b0 configfs_open_file+0x28/0x34 do_dentry_open+0x2cc/0x5c0 vfs_open+0x80/0xe0 path_openat+0xd8c/0x2988 do_filp_open+0x1c4/0x2fc do_sys_open+0x23c/0x404 SyS_openat+0x38/0x48 el0_svc_naked+0x34/0x38 To fix this issue, remove the config_item_put in __configfs_open_file to balance the refcount of config_item. Fixes: b0841eefd969 ("configfs: provide exclusion between IO and removals") Signed-off-by: Daiyue Zhang Signed-off-by: Yi Chen Signed-off-by: Ge Qiu Reviewed-by: Chao Yu Acked-by: Al Viro Signed-off-by: Christoph Hellwig --- fs/configfs/file.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/configfs/file.c b/fs/configfs/file.c index 1f0270229d7b7..da8351d1e4552 100644 --- a/fs/configfs/file.c +++ b/fs/configfs/file.c @@ -378,7 +378,7 @@ static int __configfs_open_file(struct inode *inode, struct file *file, int type attr = to_attr(dentry); if (!attr) - goto out_put_item; + goto out_free_buffer; if (type & CONFIGFS_ITEM_BIN_ATTR) { buffer->bin_attr = to_bin_attr(dentry); @@ -391,7 +391,7 @@ static int __configfs_open_file(struct inode *inode, struct file *file, int type /* Grab the module reference for this attribute if we have one */ error = -ENODEV; if (!try_module_get(buffer->owner)) - goto out_put_item; + goto out_free_buffer; error = -EACCES; if (!buffer->item->ci_type) @@ -435,8 +435,6 @@ static int __configfs_open_file(struct inode *inode, struct file *file, int type out_put_module: module_put(buffer->owner); -out_put_item: - config_item_put(buffer->item); out_free_buffer: up_read(&frag->frag_sem); kfree(buffer); -- GitLab From a4a251f8c23518899d2078c320cf9ce2fa459c9f Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Thu, 11 Mar 2021 13:53:50 +0200 Subject: [PATCH 594/839] usb: xhci: do not perform Soft Retry for some xHCI hosts On some systems rt2800usb and mt7601u devices are unable to operate since commit f8f80be501aa ("xhci: Use soft retry to recover faster from transaction errors") Seems that some xHCI controllers can not perform Soft Retry correctly, affecting those devices. To avoid the problem add xhci->quirks flag that restore pre soft retry xhci behaviour for affected xHCI controllers. Currently those are AMD_PROMONTORYA_4 and AMD_PROMONTORYA_2, since it was confirmed by the users: on those xHCI hosts issue happen and is gone after disabling Soft Retry. [minor commit message rewording for checkpatch -Mathias] Fixes: f8f80be501aa ("xhci: Use soft retry to recover faster from transaction errors") Cc: # 4.20+ Reported-by: Bernhard Tested-by: Bernhard Signed-off-by: Stanislaw Gruszka Signed-off-by: Mathias Nyman Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=202541 Link: https://lore.kernel.org/r/20210311115353.2137560-2-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 5 +++++ drivers/usb/host/xhci-ring.c | 3 ++- drivers/usb/host/xhci.h | 1 + 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 84da8406d5b42..1f989a49c8c6d 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -295,6 +295,11 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) pdev->device == 0x9026) xhci->quirks |= XHCI_RESET_PLL_ON_DISCONNECT; + if (pdev->vendor == PCI_VENDOR_ID_AMD && + (pdev->device == PCI_DEVICE_ID_AMD_PROMONTORYA_2 || + pdev->device == PCI_DEVICE_ID_AMD_PROMONTORYA_4)) + xhci->quirks |= XHCI_NO_SOFT_RETRY; + if (xhci->quirks & XHCI_RESET_ON_RESUME) xhci_dbg_trace(xhci, trace_xhci_dbg_quirks, "QUIRK: Resetting on resume"); diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 5e548a1c93ab1..ce38076901e25 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2484,7 +2484,8 @@ static int process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_td *td, remaining = 0; break; case COMP_USB_TRANSACTION_ERROR: - if ((ep_ring->err_count++ > MAX_SOFT_RETRY) || + if (xhci->quirks & XHCI_NO_SOFT_RETRY || + (ep_ring->err_count++ > MAX_SOFT_RETRY) || le32_to_cpu(slot_ctx->tt_info) & TT_SLOT) break; diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index d41de5dc0452e..ca822ad3b65b0 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1891,6 +1891,7 @@ struct xhci_hcd { #define XHCI_SKIP_PHY_INIT BIT_ULL(37) #define XHCI_DISABLE_SPARSE BIT_ULL(38) #define XHCI_SG_TRB_CACHE_SIZE_QUIRK BIT_ULL(39) +#define XHCI_NO_SOFT_RETRY BIT_ULL(40) unsigned int num_active_eps; unsigned int limit_active_eps; -- GitLab From 253f588c70f66184b1f3a9bbb428b49bbda73e80 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 11 Mar 2021 13:53:51 +0200 Subject: [PATCH 595/839] xhci: Improve detection of device initiated wake signal. A xHC USB 3 port might miss the first wake signal from a USB 3 device if the port LFPS reveiver isn't enabled fast enough after xHC resume. xHC host will anyway be resumed by a PME# signal, but will go back to suspend if no port activity is seen. The device resends the U3 LFPS wake signal after a 100ms delay, but by then host is already suspended, starting all over from the beginning of this issue. USB 3 specs say U3 wake LFPS signal is sent for max 10ms, then device needs to delay 100ms before resending the wake. Don't suspend immediately if port activity isn't detected in resume. Instead add a retry. If there is no port activity then delay for 120ms, and re-check for port activity. Cc: Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20210311115353.2137560-3-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index bd27bd670104c..48a68fcf2b360 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -1088,6 +1088,7 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) struct usb_hcd *secondary_hcd; int retval = 0; bool comp_timer_running = false; + bool pending_portevent = false; if (!hcd->state) return 0; @@ -1226,13 +1227,22 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) done: if (retval == 0) { - /* Resume root hubs only when have pending events. */ - if (xhci_pending_portevent(xhci)) { + /* + * Resume roothubs only if there are pending events. + * USB 3 devices resend U3 LFPS wake after a 100ms delay if + * the first wake signalling failed, give it that chance. + */ + pending_portevent = xhci_pending_portevent(xhci); + if (!pending_portevent) { + msleep(120); + pending_portevent = xhci_pending_portevent(xhci); + } + + if (pending_portevent) { usb_hcd_resume_root_hub(xhci->shared_hcd); usb_hcd_resume_root_hub(hcd); } } - /* * If system is subject to the Quirk, Compliance Mode Timer needs to * be re-initialized Always after a system resume. Ports are subject -- GitLab From b71c669ad8390dd1c866298319ff89fe68b45653 Mon Sep 17 00:00:00 2001 From: Forest Crossman Date: Thu, 11 Mar 2021 13:53:52 +0200 Subject: [PATCH 596/839] usb: xhci: Fix ASMedia ASM1042A and ASM3242 DMA addressing I've confirmed that both the ASMedia ASM1042A and ASM3242 have the same problem as the ASM1142 and ASM2142/ASM3142, where they lose some of the upper bits of 64-bit DMA addresses. As with the other chips, this can cause problems on systems where the upper bits matter, and adding the XHCI_NO_64BIT_SUPPORT quirk completely fixes the issue. Cc: stable@vger.kernel.org Signed-off-by: Forest Crossman Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20210311115353.2137560-4-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 1f989a49c8c6d..5bbccc9a0179f 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -66,6 +66,7 @@ #define PCI_DEVICE_ID_ASMEDIA_1042A_XHCI 0x1142 #define PCI_DEVICE_ID_ASMEDIA_1142_XHCI 0x1242 #define PCI_DEVICE_ID_ASMEDIA_2142_XHCI 0x2142 +#define PCI_DEVICE_ID_ASMEDIA_3242_XHCI 0x3242 static const char hcd_name[] = "xhci_hcd"; @@ -276,11 +277,14 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) pdev->device == PCI_DEVICE_ID_ASMEDIA_1042_XHCI) xhci->quirks |= XHCI_BROKEN_STREAMS; if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA && - pdev->device == PCI_DEVICE_ID_ASMEDIA_1042A_XHCI) + pdev->device == PCI_DEVICE_ID_ASMEDIA_1042A_XHCI) { xhci->quirks |= XHCI_TRUST_TX_LENGTH; + xhci->quirks |= XHCI_NO_64BIT_SUPPORT; + } if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA && (pdev->device == PCI_DEVICE_ID_ASMEDIA_1142_XHCI || - pdev->device == PCI_DEVICE_ID_ASMEDIA_2142_XHCI)) + pdev->device == PCI_DEVICE_ID_ASMEDIA_2142_XHCI || + pdev->device == PCI_DEVICE_ID_ASMEDIA_3242_XHCI)) xhci->quirks |= XHCI_NO_64BIT_SUPPORT; if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA && -- GitLab From d26c00e7276fc92b18c253d69e872f6b03832bad Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 11 Mar 2021 13:53:53 +0200 Subject: [PATCH 597/839] xhci: Fix repeated xhci wake after suspend due to uncleared internal wake state If port terminations are detected in suspend, but link never reaches U0 then xHCI may have an internal uncleared wake state that will cause an immediate wake after suspend. This wake state is normally cleared when driver clears the PORT_CSC bit, which is set after a device is enabled and in U0. Write 1 to clear PORT_CSC for ports that don't have anything connected when suspending. This makes sure any pending internal wake states in xHCI are cleared. Cc: stable@vger.kernel.org Tested-by: Mika Westerberg Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20210311115353.2137560-5-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 62 ++++++++++++++++++++--------------------- 1 file changed, 30 insertions(+), 32 deletions(-) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 48a68fcf2b360..1975016f46bff 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -883,44 +883,42 @@ static void xhci_clear_command_ring(struct xhci_hcd *xhci) xhci_set_cmd_ring_deq(xhci); } -static void xhci_disable_port_wake_on_bits(struct xhci_hcd *xhci) +/* + * Disable port wake bits if do_wakeup is not set. + * + * Also clear a possible internal port wake state left hanging for ports that + * detected termination but never successfully enumerated (trained to 0U). + * Internal wake causes immediate xHCI wake after suspend. PORT_CSC write done + * at enumeration clears this wake, force one here as well for unconnected ports + */ + +static void xhci_disable_hub_port_wake(struct xhci_hcd *xhci, + struct xhci_hub *rhub, + bool do_wakeup) { - struct xhci_port **ports; - int port_index; unsigned long flags; u32 t1, t2, portsc; + int i; spin_lock_irqsave(&xhci->lock, flags); - /* disable usb3 ports Wake bits */ - port_index = xhci->usb3_rhub.num_ports; - ports = xhci->usb3_rhub.ports; - while (port_index--) { - t1 = readl(ports[port_index]->addr); - portsc = t1; - t1 = xhci_port_state_to_neutral(t1); - t2 = t1 & ~PORT_WAKE_BITS; - if (t1 != t2) { - writel(t2, ports[port_index]->addr); - xhci_dbg(xhci, "disable wake bits port %d-%d, portsc: 0x%x, write: 0x%x\n", - xhci->usb3_rhub.hcd->self.busnum, - port_index + 1, portsc, t2); - } - } + for (i = 0; i < rhub->num_ports; i++) { + portsc = readl(rhub->ports[i]->addr); + t1 = xhci_port_state_to_neutral(portsc); + t2 = t1; + + /* clear wake bits if do_wake is not set */ + if (!do_wakeup) + t2 &= ~PORT_WAKE_BITS; + + /* Don't touch csc bit if connected or connect change is set */ + if (!(portsc & (PORT_CSC | PORT_CONNECT))) + t2 |= PORT_CSC; - /* disable usb2 ports Wake bits */ - port_index = xhci->usb2_rhub.num_ports; - ports = xhci->usb2_rhub.ports; - while (port_index--) { - t1 = readl(ports[port_index]->addr); - portsc = t1; - t1 = xhci_port_state_to_neutral(t1); - t2 = t1 & ~PORT_WAKE_BITS; if (t1 != t2) { - writel(t2, ports[port_index]->addr); - xhci_dbg(xhci, "disable wake bits port %d-%d, portsc: 0x%x, write: 0x%x\n", - xhci->usb2_rhub.hcd->self.busnum, - port_index + 1, portsc, t2); + writel(t2, rhub->ports[i]->addr); + xhci_dbg(xhci, "config port %d-%d wake bits, portsc: 0x%x, write: 0x%x\n", + rhub->hcd->self.busnum, i + 1, portsc, t2); } } spin_unlock_irqrestore(&xhci->lock, flags); @@ -983,8 +981,8 @@ int xhci_suspend(struct xhci_hcd *xhci, bool do_wakeup) return -EINVAL; /* Clear root port wake on bits if wakeup not allowed. */ - if (!do_wakeup) - xhci_disable_port_wake_on_bits(xhci); + xhci_disable_hub_port_wake(xhci, &xhci->usb3_rhub, do_wakeup); + xhci_disable_hub_port_wake(xhci, &xhci->usb2_rhub, do_wakeup); if (!HCD_HW_ACCESSIBLE(hcd)) return 0; -- GitLab From 7ba8f2b2d652cd8d8a2ab61f4be66973e70f9f88 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 10 Mar 2021 18:15:11 +0100 Subject: [PATCH 598/839] arm64: mm: use a 48-bit ID map when possible on 52-bit VA builds 52-bit VA kernels can run on hardware that is only 48-bit capable, but configure the ID map as 52-bit by default. This was not a problem until recently, because the special T0SZ value for a 52-bit VA space was never programmed into the TCR register anwyay, and because a 52-bit ID map happens to use the same number of translation levels as a 48-bit one. This behavior was changed by commit 1401bef703a4 ("arm64: mm: Always update TCR_EL1 from __cpu_set_tcr_t0sz()"), which causes the unsupported T0SZ value for a 52-bit VA to be programmed into TCR_EL1. While some hardware simply ignores this, Mark reports that Amberwing systems choke on this, resulting in a broken boot. But even before that commit, the unsupported idmap_t0sz value was exposed to KVM and used to program TCR_EL2 incorrectly as well. Given that we already have to deal with address spaces being either 48-bit or 52-bit in size, the cleanest approach seems to be to simply default to a 48-bit VA ID map, and only switch to a 52-bit one if the placement of the kernel in DRAM requires it. This is guaranteed not to happen unless the system is actually 52-bit VA capable. Fixes: 90ec95cda91a ("arm64: mm: Introduce VA_BITS_MIN") Reported-by: Mark Salter Link: http://lore.kernel.org/r/20210310003216.410037-1-msalter@redhat.com Signed-off-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20210310171515.416643-2-ardb@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/mmu_context.h | 5 +---- arch/arm64/kernel/head.S | 2 +- arch/arm64/mm/mmu.c | 2 +- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 70ce8c1d2b078..0f467d550f27f 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -65,10 +65,7 @@ extern u64 idmap_ptrs_per_pgd; static inline bool __cpu_uses_extended_idmap(void) { - if (IS_ENABLED(CONFIG_ARM64_VA_BITS_52)) - return false; - - return unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS)); + return unlikely(idmap_t0sz != TCR_T0SZ(vabits_actual)); } /* diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 8b469f1640919..840bda1869e9c 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -319,7 +319,7 @@ SYM_FUNC_START_LOCAL(__create_page_tables) */ adrp x5, __idmap_text_end clz x5, x5 - cmp x5, TCR_T0SZ(VA_BITS) // default T0SZ small enough? + cmp x5, TCR_T0SZ(VA_BITS_MIN) // default T0SZ small enough? b.ge 1f // .. then skip VA range extension adr_l x6, idmap_t0sz diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 9c8aa1b44cd53..7484ea4f6ba07 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -40,7 +40,7 @@ #define NO_BLOCK_MAPPINGS BIT(0) #define NO_CONT_MAPPINGS BIT(1) -u64 idmap_t0sz = TCR_T0SZ(VA_BITS); +u64 idmap_t0sz = TCR_T0SZ(VA_BITS_MIN); u64 idmap_ptrs_per_pgd = PTRS_PER_PGD; u64 __section(".mmuoff.data.write") vabits_actual; -- GitLab From 30b2675761b8a1a2b6ef56b535ef51b789bb7150 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 10 Mar 2021 18:15:12 +0100 Subject: [PATCH 599/839] arm64: mm: remove unused __cpu_uses_extended_idmap[_level()] These routines lost all existing users during the latest merge window so we can remove them. This avoids the need to fix them in the context of fixing a regression related to the ID map on 52-bit VA kernels. Signed-off-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20210310171515.416643-3-ardb@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/mmu_context.h | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 0f467d550f27f..bd02e99b1a4c5 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -63,20 +63,6 @@ static inline void cpu_switch_mm(pgd_t *pgd, struct mm_struct *mm) extern u64 idmap_t0sz; extern u64 idmap_ptrs_per_pgd; -static inline bool __cpu_uses_extended_idmap(void) -{ - return unlikely(idmap_t0sz != TCR_T0SZ(vabits_actual)); -} - -/* - * True if the extended ID map requires an extra level of translation table - * to be configured. - */ -static inline bool __cpu_uses_extended_idmap_level(void) -{ - return ARM64_HW_PGTABLE_LEVELS(64 - idmap_t0sz) > CONFIG_PGTABLE_LEVELS; -} - /* * Ensure TCR.T0SZ is set to the provided value. */ -- GitLab From d450293c55005a3b0a25d209e981ac425483fead Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Thu, 11 Mar 2021 10:05:58 +0800 Subject: [PATCH 600/839] regulator: mt6315: Fix off-by-one for .n_voltages The valid selector is 0 ~ 0xbf, so the .n_voltages should be 0xc0. Signed-off-by: Axel Lin Link: https://lore.kernel.org/r/20210311020558.579597-1-axel.lin@ingics.com Signed-off-by: Mark Brown --- drivers/regulator/mt6315-regulator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/mt6315-regulator.c b/drivers/regulator/mt6315-regulator.c index fc7654624dd68..9edc34981ee0a 100644 --- a/drivers/regulator/mt6315-regulator.c +++ b/drivers/regulator/mt6315-regulator.c @@ -41,7 +41,7 @@ struct mt6315_chip { .type = REGULATOR_VOLTAGE, \ .id = _bid, \ .owner = THIS_MODULE, \ - .n_voltages = 0xbf, \ + .n_voltages = 0xc0, \ .linear_ranges = mt_volt_range1, \ .n_linear_ranges = ARRAY_SIZE(mt_volt_range1), \ .vsel_reg = _vsel, \ -- GitLab From ea94191e584b146878f0b7fd4b767500d7aae870 Mon Sep 17 00:00:00 2001 From: Meng Li Date: Thu, 11 Mar 2021 17:12:20 +0800 Subject: [PATCH 601/839] spi: cadence: set cqspi to the driver_data field of struct device When initialize cadence qspi controller, it is need to set cqspi to the driver_data field of struct device, because it will be used in function cqspi_remove/suspend/resume(). Otherwise, there will be a crash trace as below when invoking these finctions. Fixes: 31fb632b5d43 ("spi: Move cadence-quadspi driver to drivers/spi/") Cc: stable@vger.kernel.org Signed-off-by: Meng Li Link: https://lore.kernel.org/r/20210311091220.3615-1-Meng.Li@windriver.com Signed-off-by: Mark Brown --- drivers/spi/spi-cadence-quadspi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c index 442cc7c53a47b..52ddb3255d88c 100644 --- a/drivers/spi/spi-cadence-quadspi.c +++ b/drivers/spi/spi-cadence-quadspi.c @@ -1433,6 +1433,7 @@ static int cqspi_probe(struct platform_device *pdev) cqspi = spi_master_get_devdata(master); cqspi->pdev = pdev; + platform_set_drvdata(pdev, cqspi); /* Obtain configuration from OF. */ ret = cqspi_of_get_pdata(cqspi); -- GitLab From a8affc03a9b375e19bc81573de0c9108317d78c7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 11 Mar 2021 12:01:37 +0100 Subject: [PATCH 602/839] block: rename BIO_MAX_PAGES to BIO_MAX_VECS Ever since the addition of multipage bio_vecs BIO_MAX_PAGES has been horribly confusingly misnamed. Rename it to BIO_MAX_VECS to stop confusing users of the bio API. Signed-off-by: Christoph Hellwig Reviewed-by: Matthew Wilcox (Oracle) Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20210311110137.1132391-2-hch@lst.de Signed-off-by: Jens Axboe --- block/bio.c | 14 +++++++------- block/blk-crypto-fallback.c | 2 +- block/blk-lib.c | 2 +- block/blk-map.c | 2 +- block/bounce.c | 6 +++--- drivers/block/drbd/drbd_int.h | 2 +- drivers/md/bcache/super.c | 2 +- drivers/md/dm-crypt.c | 8 ++++---- drivers/md/dm-writecache.c | 4 ++-- drivers/md/raid5-cache.c | 4 ++-- drivers/md/raid5-ppl.c | 2 +- drivers/nvme/target/passthru.c | 6 +++--- fs/block_dev.c | 6 +++--- fs/btrfs/extent_io.c | 2 +- fs/btrfs/scrub.c | 2 +- fs/crypto/bio.c | 6 +++--- fs/erofs/zdata.c | 2 +- fs/ext4/page-io.c | 2 +- fs/f2fs/checkpoint.c | 2 +- fs/f2fs/data.c | 4 ++-- fs/f2fs/segment.c | 2 +- fs/f2fs/segment.h | 4 ++-- fs/f2fs/super.c | 4 ++-- fs/gfs2/lops.c | 2 +- fs/iomap/buffered-io.c | 4 ++-- fs/iomap/direct-io.c | 4 ++-- fs/mpage.c | 2 +- fs/nilfs2/segbuf.c | 2 +- fs/squashfs/block.c | 2 +- fs/zonefs/super.c | 2 +- include/linux/bio.h | 4 ++-- 31 files changed, 56 insertions(+), 56 deletions(-) diff --git a/block/bio.c b/block/bio.c index a1c4d2900c7a8..26b7f721cda88 100644 --- a/block/bio.c +++ b/block/bio.c @@ -33,7 +33,7 @@ static struct biovec_slab { { .nr_vecs = 16, .name = "biovec-16" }, { .nr_vecs = 64, .name = "biovec-64" }, { .nr_vecs = 128, .name = "biovec-128" }, - { .nr_vecs = BIO_MAX_PAGES, .name = "biovec-max" }, + { .nr_vecs = BIO_MAX_VECS, .name = "biovec-max" }, }; static struct biovec_slab *biovec_slab(unsigned short nr_vecs) @@ -46,7 +46,7 @@ static struct biovec_slab *biovec_slab(unsigned short nr_vecs) return &bvec_slabs[1]; case 65 ... 128: return &bvec_slabs[2]; - case 129 ... BIO_MAX_PAGES: + case 129 ... BIO_MAX_VECS: return &bvec_slabs[3]; default: BUG(); @@ -151,9 +151,9 @@ out: void bvec_free(mempool_t *pool, struct bio_vec *bv, unsigned short nr_vecs) { - BIO_BUG_ON(nr_vecs > BIO_MAX_PAGES); + BIO_BUG_ON(nr_vecs > BIO_MAX_VECS); - if (nr_vecs == BIO_MAX_PAGES) + if (nr_vecs == BIO_MAX_VECS) mempool_free(bv, pool); else if (nr_vecs > BIO_INLINE_VECS) kmem_cache_free(biovec_slab(nr_vecs)->slab, bv); @@ -186,15 +186,15 @@ struct bio_vec *bvec_alloc(mempool_t *pool, unsigned short *nr_vecs, /* * Try a slab allocation first for all smaller allocations. If that * fails and __GFP_DIRECT_RECLAIM is set retry with the mempool. - * The mempool is sized to handle up to BIO_MAX_PAGES entries. + * The mempool is sized to handle up to BIO_MAX_VECS entries. */ - if (*nr_vecs < BIO_MAX_PAGES) { + if (*nr_vecs < BIO_MAX_VECS) { struct bio_vec *bvl; bvl = kmem_cache_alloc(bvs->slab, bvec_alloc_gfp(gfp_mask)); if (likely(bvl) || !(gfp_mask & __GFP_DIRECT_RECLAIM)) return bvl; - *nr_vecs = BIO_MAX_PAGES; + *nr_vecs = BIO_MAX_VECS; } return mempool_alloc(pool, gfp_mask); diff --git a/block/blk-crypto-fallback.c b/block/blk-crypto-fallback.c index c176b7af56a7a..c322176a1e099 100644 --- a/block/blk-crypto-fallback.c +++ b/block/blk-crypto-fallback.c @@ -219,7 +219,7 @@ static bool blk_crypto_split_bio_if_needed(struct bio **bio_ptr) bio_for_each_segment(bv, bio, iter) { num_sectors += bv.bv_len >> SECTOR_SHIFT; - if (++i == BIO_MAX_PAGES) + if (++i == BIO_MAX_VECS) break; } if (num_sectors < bio_sectors(bio)) { diff --git a/block/blk-lib.c b/block/blk-lib.c index 752f9c7220622..7b256131b20bb 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -296,7 +296,7 @@ static unsigned int __blkdev_sectors_to_bio_pages(sector_t nr_sects) { sector_t pages = DIV_ROUND_UP_SECTOR_T(nr_sects, PAGE_SIZE / 512); - return min(pages, (sector_t)BIO_MAX_PAGES); + return min(pages, (sector_t)BIO_MAX_VECS); } static int __blkdev_issue_zero_pages(struct block_device *bdev, diff --git a/block/blk-map.c b/block/blk-map.c index 369e204d14d01..1ffef782fcf2d 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -249,7 +249,7 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter, if (!iov_iter_count(iter)) return -EINVAL; - bio = bio_kmalloc(gfp_mask, iov_iter_npages(iter, BIO_MAX_PAGES)); + bio = bio_kmalloc(gfp_mask, iov_iter_npages(iter, BIO_MAX_VECS)); if (!bio) return -ENOMEM; bio->bi_opf |= req_op(rq); diff --git a/block/bounce.c b/block/bounce.c index 87983a35079c2..6c441f4f1cd4a 100644 --- a/block/bounce.c +++ b/block/bounce.c @@ -229,10 +229,10 @@ static struct bio *bounce_clone_bio(struct bio *bio_src) * - The point of cloning the biovec is to produce a bio with a biovec * the caller can modify: bi_idx and bi_bvec_done should be 0. * - * - The original bio could've had more than BIO_MAX_PAGES biovecs; if + * - The original bio could've had more than BIO_MAX_VECS biovecs; if * we tried to clone the whole thing bio_alloc_bioset() would fail. * But the clone should succeed as long as the number of biovecs we - * actually need to allocate is fewer than BIO_MAX_PAGES. + * actually need to allocate is fewer than BIO_MAX_VECS. * * - Lastly, bi_vcnt should not be looked at or relied upon by code * that does not own the bio - reason being drivers don't use it for @@ -299,7 +299,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig, int sectors = 0; bio_for_each_segment(from, *bio_orig, iter) { - if (i++ < BIO_MAX_PAGES) + if (i++ < BIO_MAX_VECS) sectors += from.bv_len >> 9; if (page_to_pfn(from.bv_page) > q->limits.bounce_pfn) bounce = true; diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 7d9cc433b758a..5d9181382ce19 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1324,7 +1324,7 @@ struct bm_extent { * A followup commit may allow even bigger BIO sizes, * once we thought that through. */ #define DRBD_MAX_BIO_SIZE (1U << 20) -#if DRBD_MAX_BIO_SIZE > (BIO_MAX_PAGES << PAGE_SHIFT) +#if DRBD_MAX_BIO_SIZE > (BIO_MAX_VECS << PAGE_SHIFT) #error Architecture not supported: DRBD_MAX_BIO_SIZE > BIO_MAX_SIZE #endif #define DRBD_MAX_BIO_SIZE_SAFE (1U << 12) /* Works always = 4k */ diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 71691f32959b3..03e1fe4de53de 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -965,7 +965,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size, q->limits.max_hw_sectors = UINT_MAX; q->limits.max_sectors = UINT_MAX; q->limits.max_segment_size = UINT_MAX; - q->limits.max_segments = BIO_MAX_PAGES; + q->limits.max_segments = BIO_MAX_VECS; blk_queue_max_discard_sectors(q, UINT_MAX); q->limits.discard_granularity = 512; q->limits.io_min = block_size; diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 11c105ecd165a..b0ab080f25676 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -229,7 +229,7 @@ static DEFINE_SPINLOCK(dm_crypt_clients_lock); static unsigned dm_crypt_clients_n = 0; static volatile unsigned long dm_crypt_pages_per_client; #define DM_CRYPT_MEMORY_PERCENT 2 -#define DM_CRYPT_MIN_PAGES_PER_CLIENT (BIO_MAX_PAGES * 16) +#define DM_CRYPT_MIN_PAGES_PER_CLIENT (BIO_MAX_VECS * 16) static void clone_init(struct dm_crypt_io *, struct bio *); static void kcryptd_queue_crypt(struct dm_crypt_io *io); @@ -3246,7 +3246,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) ALIGN(sizeof(struct dm_crypt_io) + cc->dmreq_start + additional_req_size, ARCH_KMALLOC_MINALIGN); - ret = mempool_init(&cc->page_pool, BIO_MAX_PAGES, crypt_page_alloc, crypt_page_free, cc); + ret = mempool_init(&cc->page_pool, BIO_MAX_VECS, crypt_page_alloc, crypt_page_free, cc); if (ret) { ti->error = "Cannot allocate page mempool"; goto bad; @@ -3373,9 +3373,9 @@ static int crypt_map(struct dm_target *ti, struct bio *bio) /* * Check if bio is too large, split as needed. */ - if (unlikely(bio->bi_iter.bi_size > (BIO_MAX_PAGES << PAGE_SHIFT)) && + if (unlikely(bio->bi_iter.bi_size > (BIO_MAX_VECS << PAGE_SHIFT)) && (bio_data_dir(bio) == WRITE || cc->on_disk_tag_size)) - dm_accept_partial_bio(bio, ((BIO_MAX_PAGES << PAGE_SHIFT) >> SECTOR_SHIFT)); + dm_accept_partial_bio(bio, ((BIO_MAX_VECS << PAGE_SHIFT) >> SECTOR_SHIFT)); /* * Ensure that bio is a multiple of internal sector encryption size diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c index 844c4be11768d..4f72b6f66c3ae 100644 --- a/drivers/md/dm-writecache.c +++ b/drivers/md/dm-writecache.c @@ -1892,10 +1892,10 @@ restart: list_add(&g->lru, &wbl.list); wbl.size++; g->write_in_progress = true; - g->wc_list_contiguous = BIO_MAX_PAGES; + g->wc_list_contiguous = BIO_MAX_VECS; f = g; e->wc_list_contiguous++; - if (unlikely(e->wc_list_contiguous == BIO_MAX_PAGES)) { + if (unlikely(e->wc_list_contiguous == BIO_MAX_VECS)) { if (unlikely(wc->writeback_all)) { next_node = rb_next(&f->rb_node); if (likely(next_node)) diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index 4337ae0e6af2e..0b5dcaabbc155 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -735,7 +735,7 @@ static void r5l_submit_current_io(struct r5l_log *log) static struct bio *r5l_bio_alloc(struct r5l_log *log) { - struct bio *bio = bio_alloc_bioset(GFP_NOIO, BIO_MAX_PAGES, &log->bs); + struct bio *bio = bio_alloc_bioset(GFP_NOIO, BIO_MAX_VECS, &log->bs); bio_set_op_attrs(bio, REQ_OP_WRITE, 0); bio_set_dev(bio, log->rdev->bdev); @@ -1634,7 +1634,7 @@ static int r5l_recovery_allocate_ra_pool(struct r5l_log *log, { struct page *page; - ctx->ra_bio = bio_alloc_bioset(GFP_KERNEL, BIO_MAX_PAGES, &log->bs); + ctx->ra_bio = bio_alloc_bioset(GFP_KERNEL, BIO_MAX_VECS, &log->bs); if (!ctx->ra_bio) return -ENOMEM; diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c index e8c118e05dfd4..3ddc2aa0b5306 100644 --- a/drivers/md/raid5-ppl.c +++ b/drivers/md/raid5-ppl.c @@ -496,7 +496,7 @@ static void ppl_submit_iounit(struct ppl_io_unit *io) if (!bio_add_page(bio, sh->ppl_page, PAGE_SIZE, 0)) { struct bio *prev = bio; - bio = bio_alloc_bioset(GFP_NOIO, BIO_MAX_PAGES, + bio = bio_alloc_bioset(GFP_NOIO, BIO_MAX_VECS, &ppl_conf->bs); bio->bi_opf = prev->bi_opf; bio->bi_write_hint = prev->bi_write_hint; diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c index 26c587ccd152c..2798944899b73 100644 --- a/drivers/nvme/target/passthru.c +++ b/drivers/nvme/target/passthru.c @@ -50,9 +50,9 @@ static u16 nvmet_passthru_override_id_ctrl(struct nvmet_req *req) /* * nvmet_passthru_map_sg is limitted to using a single bio so limit - * the mdts based on BIO_MAX_PAGES as well + * the mdts based on BIO_MAX_VECS as well */ - max_hw_sectors = min_not_zero(BIO_MAX_PAGES << (PAGE_SHIFT - 9), + max_hw_sectors = min_not_zero(BIO_MAX_VECS << (PAGE_SHIFT - 9), max_hw_sectors); page_shift = NVME_CAP_MPSMIN(ctrl->cap) + 12; @@ -191,7 +191,7 @@ static int nvmet_passthru_map_sg(struct nvmet_req *req, struct request *rq) struct bio *bio; int i; - if (req->sg_cnt > BIO_MAX_PAGES) + if (req->sg_cnt > BIO_MAX_VECS) return -EINVAL; if (req->transfer_len <= NVMET_MAX_INLINE_DATA_LEN) { diff --git a/fs/block_dev.c b/fs/block_dev.c index 03166b3dea4db..92ed7d5df6774 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -432,7 +432,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, dio->size += bio->bi_iter.bi_size; pos += bio->bi_iter.bi_size; - nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_PAGES); + nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS); if (!nr_pages) { bool polled = false; @@ -500,8 +500,8 @@ blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter) if (!iov_iter_count(iter)) return 0; - nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_PAGES + 1); - if (is_sync_kiocb(iocb) && nr_pages <= BIO_MAX_PAGES) + nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS + 1); + if (is_sync_kiocb(iocb) && nr_pages <= BIO_MAX_VECS) return __blkdev_direct_IO_simple(iocb, iter, nr_pages); return __blkdev_direct_IO(iocb, iter, bio_max_segs(nr_pages)); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 4dfb3ead11757..db8cb98c020c7 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3048,7 +3048,7 @@ struct bio *btrfs_bio_alloc(u64 first_byte) { struct bio *bio; - bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, &btrfs_bioset); + bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_VECS, &btrfs_bioset); bio->bi_iter.bi_sector = first_byte >> 9; btrfs_io_bio_init(btrfs_io_bio(bio)); return bio; diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 582df11d298af..6daa4309c974d 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -1428,7 +1428,7 @@ static void scrub_recheck_block_on_raid56(struct btrfs_fs_info *fs_info, if (!first_page->dev->bdev) goto out; - bio = btrfs_io_bio_alloc(BIO_MAX_PAGES); + bio = btrfs_io_bio_alloc(BIO_MAX_VECS); bio_set_dev(bio, first_page->dev->bdev); for (page_num = 0; page_num < sblock->page_count; page_num++) { diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c index b048a0e385162..68a2de6b5a9b1 100644 --- a/fs/crypto/bio.c +++ b/fs/crypto/bio.c @@ -52,7 +52,7 @@ static int fscrypt_zeroout_range_inline_crypt(const struct inode *inode, int num_pages = 0; /* This always succeeds since __GFP_DIRECT_RECLAIM is set. */ - bio = bio_alloc(GFP_NOFS, BIO_MAX_PAGES); + bio = bio_alloc(GFP_NOFS, BIO_MAX_VECS); while (len) { unsigned int blocks_this_page = min(len, blocks_per_page); @@ -74,7 +74,7 @@ static int fscrypt_zeroout_range_inline_crypt(const struct inode *inode, len -= blocks_this_page; lblk += blocks_this_page; pblk += blocks_this_page; - if (num_pages == BIO_MAX_PAGES || !len || + if (num_pages == BIO_MAX_VECS || !len || !fscrypt_mergeable_bio(bio, inode, lblk)) { err = submit_bio_wait(bio); if (err) @@ -126,7 +126,7 @@ int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, return fscrypt_zeroout_range_inline_crypt(inode, lblk, pblk, len); - BUILD_BUG_ON(ARRAY_SIZE(pages) > BIO_MAX_PAGES); + BUILD_BUG_ON(ARRAY_SIZE(pages) > BIO_MAX_VECS); nr_pages = min_t(unsigned int, ARRAY_SIZE(pages), (len + blocks_per_page - 1) >> blocks_per_page_bits); diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 6cb356c4217b2..3851e1a64f730 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -1235,7 +1235,7 @@ submit_bio_retry: } if (!bio) { - bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES); + bio = bio_alloc(GFP_NOIO, BIO_MAX_VECS); bio->bi_end_io = z_erofs_decompressqueue_endio; bio_set_dev(bio, sb->s_bdev); diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 03a44a0de86ad..f038d578d8d8f 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -398,7 +398,7 @@ static void io_submit_init_bio(struct ext4_io_submit *io, * bio_alloc will _always_ be able to allocate a bio if * __GFP_DIRECT_RECLAIM is set, see comments for bio_alloc_bioset(). */ - bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES); + bio = bio_alloc(GFP_NOIO, BIO_MAX_VECS); fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO); bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); bio_set_dev(bio, bh->b_bdev); diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 174a0819ad967..be5415a0dbbc6 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -292,7 +292,7 @@ void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index) f2fs_put_page(page, 0); if (readahead) - f2fs_ra_meta_pages(sbi, index, BIO_MAX_PAGES, META_POR, true); + f2fs_ra_meta_pages(sbi, index, BIO_MAX_VECS, META_POR, true); } static int __f2fs_write_meta_page(struct page *page, diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 7c95818639a63..4e5257c763d01 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -857,7 +857,7 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio) f2fs_submit_merged_ipu_write(fio->sbi, &bio, NULL); alloc_new: if (!bio) { - bio = __bio_alloc(fio, BIO_MAX_PAGES); + bio = __bio_alloc(fio, BIO_MAX_VECS); __attach_io_flag(fio); f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host, fio->page->index, fio, GFP_NOIO); @@ -932,7 +932,7 @@ alloc_new: fio->retry = true; goto skip; } - io->bio = __bio_alloc(fio, BIO_MAX_PAGES); + io->bio = __bio_alloc(fio, BIO_MAX_VECS); f2fs_set_bio_crypt_ctx(io->bio, fio->page->mapping->host, bio_page->index, fio, GFP_NOIO); io->fio = *fio; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 993004f06a772..c2866561263e9 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -4381,7 +4381,7 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) block_t total_node_blocks = 0; do { - readed = f2fs_ra_meta_pages(sbi, start_blk, BIO_MAX_PAGES, + readed = f2fs_ra_meta_pages(sbi, start_blk, BIO_MAX_VECS, META_SIT, true); start = start_blk * sit_i->sents_per_block; diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 229814b4f4a6c..e9a7a637d6887 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -851,7 +851,7 @@ static inline int nr_pages_to_skip(struct f2fs_sb_info *sbi, int type) else if (type == NODE) return 8 * sbi->blocks_per_seg; else if (type == META) - return 8 * BIO_MAX_PAGES; + return 8 * BIO_MAX_VECS; else return 0; } @@ -868,7 +868,7 @@ static inline long nr_pages_to_write(struct f2fs_sb_info *sbi, int type, return 0; nr_to_write = wbc->nr_to_write; - desired = BIO_MAX_PAGES; + desired = BIO_MAX_VECS; if (type == NODE) desired <<= 1; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 7069793752f11..82592b19b4e02 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -753,9 +753,9 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) case Opt_io_size_bits: if (args->from && match_int(args, &arg)) return -EINVAL; - if (arg <= 0 || arg > __ilog2_u32(BIO_MAX_PAGES)) { + if (arg <= 0 || arg > __ilog2_u32(BIO_MAX_VECS)) { f2fs_warn(sbi, "Not support %d, larger than %d", - 1 << arg, BIO_MAX_PAGES); + 1 << arg, BIO_MAX_VECS); return -EINVAL; } F2FS_OPTION(sbi).write_io_size_bits = arg; diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index dc1b93a877c6d..a82f4747aa8d5 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -267,7 +267,7 @@ static struct bio *gfs2_log_alloc_bio(struct gfs2_sbd *sdp, u64 blkno, bio_end_io_t *end_io) { struct super_block *sb = sdp->sd_vfs; - struct bio *bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES); + struct bio *bio = bio_alloc(GFP_NOIO, BIO_MAX_VECS); bio->bi_iter.bi_sector = blkno << sdp->sd_fsb2bb_shift; bio_set_dev(bio, sb->s_bdev); diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 7ffcd7ef33d4f..414769a6ad113 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -1221,7 +1221,7 @@ iomap_alloc_ioend(struct inode *inode, struct iomap_writepage_ctx *wpc, struct iomap_ioend *ioend; struct bio *bio; - bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, &iomap_ioend_bioset); + bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_VECS, &iomap_ioend_bioset); bio_set_dev(bio, wpc->iomap.bdev); bio->bi_iter.bi_sector = sector; bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc); @@ -1252,7 +1252,7 @@ iomap_chain_bio(struct bio *prev) { struct bio *new; - new = bio_alloc(GFP_NOFS, BIO_MAX_PAGES); + new = bio_alloc(GFP_NOFS, BIO_MAX_VECS); bio_copy_dev(new, prev);/* also copies over blkcg information */ new->bi_iter.bi_sector = bio_end_sector(prev); new->bi_opf = prev->bi_opf; diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index e2c4991833b8f..bdd0d89bbf0a3 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -296,7 +296,7 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length, */ bio_opf = iomap_dio_bio_opflags(dio, iomap, use_fua); - nr_pages = bio_iov_vecs_to_alloc(dio->submit.iter, BIO_MAX_PAGES); + nr_pages = bio_iov_vecs_to_alloc(dio->submit.iter, BIO_MAX_VECS); do { size_t n; if (dio->error) { @@ -338,7 +338,7 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length, copied += n; nr_pages = bio_iov_vecs_to_alloc(dio->submit.iter, - BIO_MAX_PAGES); + BIO_MAX_VECS); iomap_dio_submit_bio(dio, iomap, bio, pos); pos += n; } while (nr_pages); diff --git a/fs/mpage.c b/fs/mpage.c index 961234d687792..334e7d09aa652 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -616,7 +616,7 @@ alloc_new: goto out; } bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9), - BIO_MAX_PAGES, GFP_NOFS|__GFP_HIGH); + BIO_MAX_VECS, GFP_NOFS|__GFP_HIGH); if (bio == NULL) goto confused; diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c index 1e75417bfe6e5..56872e93823da 100644 --- a/fs/nilfs2/segbuf.c +++ b/fs/nilfs2/segbuf.c @@ -399,7 +399,7 @@ static void nilfs_segbuf_prepare_write(struct nilfs_segment_buffer *segbuf, { wi->bio = NULL; wi->rest_blocks = segbuf->sb_sum.nblocks; - wi->max_pages = BIO_MAX_PAGES; + wi->max_pages = BIO_MAX_VECS; wi->nr_vecs = min(wi->max_pages, wi->rest_blocks); wi->start = wi->end = 0; wi->blocknr = segbuf->sb_pseg_start; diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c index 45f44425d8560..b9e87ebb1060e 100644 --- a/fs/squashfs/block.c +++ b/fs/squashfs/block.c @@ -87,7 +87,7 @@ static int squashfs_bio_read(struct super_block *sb, u64 index, int length, int error, i; struct bio *bio; - if (page_count <= BIO_MAX_PAGES) + if (page_count <= BIO_MAX_VECS) bio = bio_alloc(GFP_NOIO, page_count); else bio = bio_kmalloc(GFP_NOIO, page_count); diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index b6ff4a21abacc..0fe76f376dee2 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -684,7 +684,7 @@ static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from) max = ALIGN_DOWN(max << SECTOR_SHIFT, inode->i_sb->s_blocksize); iov_iter_truncate(from, max); - nr_pages = iov_iter_npages(from, BIO_MAX_PAGES); + nr_pages = iov_iter_npages(from, BIO_MAX_VECS); if (!nr_pages) return 0; diff --git a/include/linux/bio.h b/include/linux/bio.h index 983ed2fe7c850..d0246c92a6e86 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -20,11 +20,11 @@ #define BIO_BUG_ON #endif -#define BIO_MAX_PAGES 256U +#define BIO_MAX_VECS 256U static inline unsigned int bio_max_segs(unsigned int nr_segs) { - return min(nr_segs, BIO_MAX_PAGES); + return min(nr_segs, BIO_MAX_VECS); } #define bio_prio(bio) (bio)->bi_ioprio -- GitLab From f053cf7aa66cd9d592b0fc967f4d887c2abff1b7 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 2 Mar 2021 12:04:19 -0800 Subject: [PATCH 603/839] ext4: fix error handling in ext4_end_enable_verity() ext4 didn't properly clean up if verity failed to be enabled on a file: - It left verity metadata (pages past EOF) in the page cache, which would be exposed to userspace if the file was later extended. - It didn't truncate the verity metadata at all (either from cache or from disk) if an error occurred while setting the verity bit. Fix these bugs by adding a call to truncate_inode_pages() and ensuring that we truncate the verity metadata (both from cache and from disk) in all error paths. Also rework the code to cleanly separate the success path from the error paths, which makes it much easier to understand. Reported-by: Yunlei He Fixes: c93d8f885809 ("ext4: add basic fs-verity support") Cc: stable@vger.kernel.org # v5.4+ Signed-off-by: Eric Biggers Link: https://lore.kernel.org/r/20210302200420.137977-2-ebiggers@kernel.org Signed-off-by: Theodore Ts'o --- fs/ext4/verity.c | 89 ++++++++++++++++++++++++++++++------------------ 1 file changed, 55 insertions(+), 34 deletions(-) diff --git a/fs/ext4/verity.c b/fs/ext4/verity.c index 5b7ba8f711538..00e3cbde472e4 100644 --- a/fs/ext4/verity.c +++ b/fs/ext4/verity.c @@ -201,55 +201,76 @@ static int ext4_end_enable_verity(struct file *filp, const void *desc, struct inode *inode = file_inode(filp); const int credits = 2; /* superblock and inode for ext4_orphan_del() */ handle_t *handle; + struct ext4_iloc iloc; int err = 0; - int err2; - if (desc != NULL) { - /* Succeeded; write the verity descriptor. */ - err = ext4_write_verity_descriptor(inode, desc, desc_size, - merkle_tree_size); - - /* Write all pages before clearing VERITY_IN_PROGRESS. */ - if (!err) - err = filemap_write_and_wait(inode->i_mapping); - } + /* + * If an error already occurred (which fs/verity/ signals by passing + * desc == NULL), then only clean-up is needed. + */ + if (desc == NULL) + goto cleanup; - /* If we failed, truncate anything we wrote past i_size. */ - if (desc == NULL || err) - ext4_truncate(inode); + /* Append the verity descriptor. */ + err = ext4_write_verity_descriptor(inode, desc, desc_size, + merkle_tree_size); + if (err) + goto cleanup; /* - * We must always clean up by clearing EXT4_STATE_VERITY_IN_PROGRESS and - * deleting the inode from the orphan list, even if something failed. - * If everything succeeded, we'll also set the verity bit in the same - * transaction. + * Write all pages (both data and verity metadata). Note that this must + * happen before clearing EXT4_STATE_VERITY_IN_PROGRESS; otherwise pages + * beyond i_size won't be written properly. For crash consistency, this + * also must happen before the verity inode flag gets persisted. */ + err = filemap_write_and_wait(inode->i_mapping); + if (err) + goto cleanup; - ext4_clear_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS); + /* + * Finally, set the verity inode flag and remove the inode from the + * orphan list (in a single transaction). + */ handle = ext4_journal_start(inode, EXT4_HT_INODE, credits); if (IS_ERR(handle)) { - ext4_orphan_del(NULL, inode); - return PTR_ERR(handle); + err = PTR_ERR(handle); + goto cleanup; } - err2 = ext4_orphan_del(handle, inode); - if (err2) - goto out_stop; + err = ext4_orphan_del(handle, inode); + if (err) + goto stop_and_cleanup; - if (desc != NULL && !err) { - struct ext4_iloc iloc; + err = ext4_reserve_inode_write(handle, inode, &iloc); + if (err) + goto stop_and_cleanup; - err = ext4_reserve_inode_write(handle, inode, &iloc); - if (err) - goto out_stop; - ext4_set_inode_flag(inode, EXT4_INODE_VERITY); - ext4_set_inode_flags(inode, false); - err = ext4_mark_iloc_dirty(handle, inode, &iloc); - } -out_stop: + ext4_set_inode_flag(inode, EXT4_INODE_VERITY); + ext4_set_inode_flags(inode, false); + err = ext4_mark_iloc_dirty(handle, inode, &iloc); + if (err) + goto stop_and_cleanup; + + ext4_journal_stop(handle); + + ext4_clear_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS); + return 0; + +stop_and_cleanup: ext4_journal_stop(handle); - return err ?: err2; +cleanup: + /* + * Verity failed to be enabled, so clean up by truncating any verity + * metadata that was written beyond i_size (both from cache and from + * disk), removing the inode from the orphan list (if it wasn't done + * already), and clearing EXT4_STATE_VERITY_IN_PROGRESS. + */ + truncate_inode_pages(inode->i_mapping, inode->i_size); + ext4_truncate(inode); + ext4_orphan_del(NULL, inode); + ext4_clear_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS); + return err; } static int ext4_get_verity_descriptor_location(struct inode *inode, -- GitLab From b4250dd868d1b42c0a65de11ef3afbee67ba5d2f Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Thu, 11 Mar 2021 10:55:00 -0500 Subject: [PATCH 604/839] NFSD: fix error handling in NFSv4.0 callbacks When the server tries to do a callback and a client fails it due to authentication problems, we need the server to set callback down flag in RENEW so that client can recover. Suggested-by: Bruce Fields Signed-off-by: Olga Kornievskaia Signed-off-by: Chuck Lever Tested-by: Benjamin Coddington Link: https://lore.kernel.org/linux-nfs/FB84E90A-1A03-48B3-8BF7-D9D10AC2C9FE@oracle.com/T/#t --- fs/nfsd/nfs4callback.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 052be5bf9ef50..7325592b456e5 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -1189,6 +1189,7 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata) switch (task->tk_status) { case -EIO: case -ETIMEDOUT: + case -EACCES: nfsd4_mark_cb_down(clp, task->tk_status); } break; -- GitLab From 9922f50f7178496e709d3d064920b5031f0d9061 Mon Sep 17 00:00:00 2001 From: Srinivasa Rao Mandadapu Date: Thu, 11 Mar 2021 21:15:57 +0530 Subject: [PATCH 605/839] ASoC: qcom: lpass-cpu: Fix lpass dai ids parse The max boundary check while parsing dai ids makes sound card registration fail after common up dai ids. Fixes: cd3484f7f138 ("ASoC: qcom: Fix broken support to MI2S TERTIARY and QUATERNARY") Signed-off-by: Srinivasa Rao Mandadapu Link: https://lore.kernel.org/r/20210311154557.24978-1-srivasam@codeaurora.org Signed-off-by: Mark Brown --- sound/soc/qcom/lpass-cpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/qcom/lpass-cpu.c b/sound/soc/qcom/lpass-cpu.c index c642e5f8f28c4..be360a402b67f 100644 --- a/sound/soc/qcom/lpass-cpu.c +++ b/sound/soc/qcom/lpass-cpu.c @@ -739,7 +739,7 @@ static void of_lpass_cpu_parse_dai_data(struct device *dev, for_each_child_of_node(dev->of_node, node) { ret = of_property_read_u32(node, "reg", &id); - if (ret || id < 0 || id >= data->variant->num_dai) { + if (ret || id < 0) { dev_err(dev, "valid dai id not found: %d\n", ret); continue; } -- GitLab From 5c2469e0a22e035d52f3ba768151cc75e3d4a1cd Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 11 Mar 2021 10:17:56 -0700 Subject: [PATCH 606/839] io_uring: force creation of separate context for ATTACH_WQ and non-threads Earlier kernels had SQPOLL threads that could share across anything, as we grabbed the context we needed on a per-ring basis. This is no longer the case, so only allow attaching directly if we're in the same thread group. That is the common use case. For non-group tasks, just setup a new context and thread as we would've done if sharing wasn't set. This isn't 100% ideal in terms of CPU utilization for the forked and share case, but hopefully that isn't much of a concern. If it is, there are plans in motion for how to improve that. Most importantly, we want to avoid app side regressions where sharing worked before and now doesn't. With this patch, functionality is equivalent to previous kernels that supported IORING_SETUP_ATTACH_WQ with SQPOLL. Reported-by: Stefan Metzmacher Signed-off-by: Jens Axboe --- fs/io_uring.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 5c045a9f7ffe0..472eab7359f2a 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -269,6 +269,7 @@ struct io_sq_data { unsigned sq_thread_idle; int sq_cpu; pid_t task_pid; + pid_t task_tgid; unsigned long state; struct completion startup; @@ -7112,6 +7113,10 @@ static struct io_sq_data *io_attach_sq_data(struct io_uring_params *p) fdput(f); return ERR_PTR(-EINVAL); } + if (sqd->task_tgid != current->tgid) { + fdput(f); + return ERR_PTR(-EPERM); + } refcount_inc(&sqd->refs); fdput(f); @@ -7122,8 +7127,14 @@ static struct io_sq_data *io_get_sq_data(struct io_uring_params *p) { struct io_sq_data *sqd; - if (p->flags & IORING_SETUP_ATTACH_WQ) - return io_attach_sq_data(p); + if (p->flags & IORING_SETUP_ATTACH_WQ) { + sqd = io_attach_sq_data(p); + if (!IS_ERR(sqd)) + return sqd; + /* fall through for EPERM case, setup new sqd/task */ + if (PTR_ERR(sqd) != -EPERM) + return sqd; + } sqd = kzalloc(sizeof(*sqd), GFP_KERNEL); if (!sqd) @@ -7833,6 +7844,7 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx, } sqd->task_pid = current->pid; + sqd->task_tgid = current->tgid; tsk = create_io_thread(io_sq_thread, sqd, NUMA_NO_NODE); if (IS_ERR(tsk)) { ret = PTR_ERR(tsk); -- GitLab From d052d1d685f5125249ab4ff887562c88ba959638 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 11 Mar 2021 10:49:20 -0700 Subject: [PATCH 607/839] io_uring: perform IOPOLL reaping if canceler is thread itself We bypass IOPOLL completion polling (and reaping) for the SQPOLL thread, but if it's the thread itself invoking cancelations, then we still need to perform it or no one will. Fixes: 9936c7c2bc76 ("io_uring: deduplicate core cancellations sequence") Signed-off-by: Jens Axboe --- fs/io_uring.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 472eab7359f2a..49f85f49e1c3c 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -8658,7 +8658,8 @@ static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx, } /* SQPOLL thread does its own polling */ - if (!(ctx->flags & IORING_SETUP_SQPOLL) && !files) { + if ((!(ctx->flags & IORING_SETUP_SQPOLL) && !files) || + (ctx->sq_data && ctx->sq_data->thread == current)) { while (!list_empty_careful(&ctx->iopoll_list)) { io_iopoll_try_reap_events(ctx); ret = true; -- GitLab From 4f8be1f53bf615102d103c0509ffa9596f65b718 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 28 Jan 2021 17:36:38 -0500 Subject: [PATCH 608/839] nfs: we don't support removing system.nfs4_acl The NFSv4 protocol doesn't have any notion of reomoving an attribute, so removexattr(path,"system.nfs4_acl") doesn't make sense. There's no documented return value. Arguably it could be EOPNOTSUPP but I'm a little worried an application might take that to mean that we don't support ACLs or xattrs. How about EINVAL? Signed-off-by: J. Bruce Fields Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1002c4f66f3fa..c65c4b41e2c19 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5889,6 +5889,9 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE); int ret, i; + /* You can't remove system.nfs4_acl: */ + if (buflen == 0) + return -EINVAL; if (!nfs4_server_supports_acls(server)) return -EOPNOTSUPP; if (npages > ARRAY_SIZE(pages)) -- GitLab From 9ec491447b90ad6a4056a9656b13f0b3a1e83043 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Thu, 11 Mar 2021 16:19:17 +0100 Subject: [PATCH 609/839] block: Suppress uevent for hidden device when removed register_disk() suppress uevents for devices with the GENHD_FL_HIDDEN but enables uevents at the end again in order to announce disk after possible partitions are created. When the device is removed the uevents are still on and user land sees 'remove' messages for devices which were never 'add'ed to the system. KERNEL[95481.571887] remove /devices/virtual/nvme-fabrics/ctl/nvme5/nvme0c5n1 (block) Let's suppress the uevents for GENHD_FL_HIDDEN by not enabling the uevents at all. Signed-off-by: Daniel Wagner Reviewed-by: Christoph Hellwig Reviewed-by: Martin Wilck Link: https://lore.kernel.org/r/20210311151917.136091-1-dwagner@suse.de Signed-off-by: Jens Axboe --- block/genhd.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/block/genhd.c b/block/genhd.c index c55e8f0fced1d..8c8f543572e64 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -534,10 +534,8 @@ static void register_disk(struct device *parent, struct gendisk *disk, kobject_create_and_add("holders", &ddev->kobj); disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj); - if (disk->flags & GENHD_FL_HIDDEN) { - dev_set_uevent_suppress(ddev, 0); + if (disk->flags & GENHD_FL_HIDDEN) return; - } disk_scan_partitions(disk); -- GitLab From e5113505904ea1c1c0e1f92c1cfa91fbf4da1694 Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Thu, 11 Mar 2021 16:25:46 +0900 Subject: [PATCH 610/839] block: Discard page cache of zone reset target range When zone reset ioctl and data read race for a same zone on zoned block devices, the data read leaves stale page cache even though the zone reset ioctl zero clears all the zone data on the device. To avoid non-zero data read from the stale page cache after zone reset, discard page cache of reset target zones in blkdev_zone_mgmt_ioctl(). Introduce the helper function blkdev_truncate_zone_range() to discard the page cache. Ensure the page cache discarded by calling the helper function before and after zone reset in same manner as fallocate does. This patch can be applied back to the stable kernel version v5.10.y. Rework is needed for older stable kernels. Signed-off-by: Shin'ichiro Kawasaki Fixes: 3ed05a987e0f ("blk-zoned: implement ioctls") Cc: # 5.10+ Reviewed-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20210311072546.678999-1-shinichiro.kawasaki@wdc.com Signed-off-by: Jens Axboe --- block/blk-zoned.c | 38 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 8b9f3fc5a690a..c0276b42d9fb7 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -318,6 +318,22 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode, return 0; } +static int blkdev_truncate_zone_range(struct block_device *bdev, fmode_t mode, + const struct blk_zone_range *zrange) +{ + loff_t start, end; + + if (zrange->sector + zrange->nr_sectors <= zrange->sector || + zrange->sector + zrange->nr_sectors > get_capacity(bdev->bd_disk)) + /* Out of range */ + return -EINVAL; + + start = zrange->sector << SECTOR_SHIFT; + end = ((zrange->sector + zrange->nr_sectors) << SECTOR_SHIFT) - 1; + + return truncate_bdev_range(bdev, mode, start, end); +} + /* * BLKRESETZONE, BLKOPENZONE, BLKCLOSEZONE and BLKFINISHZONE ioctl processing. * Called from blkdev_ioctl. @@ -329,6 +345,7 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode, struct request_queue *q; struct blk_zone_range zrange; enum req_opf op; + int ret; if (!argp) return -EINVAL; @@ -352,6 +369,11 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode, switch (cmd) { case BLKRESETZONE: op = REQ_OP_ZONE_RESET; + + /* Invalidate the page cache, including dirty pages. */ + ret = blkdev_truncate_zone_range(bdev, mode, &zrange); + if (ret) + return ret; break; case BLKOPENZONE: op = REQ_OP_ZONE_OPEN; @@ -366,8 +388,20 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode, return -ENOTTY; } - return blkdev_zone_mgmt(bdev, op, zrange.sector, zrange.nr_sectors, - GFP_KERNEL); + ret = blkdev_zone_mgmt(bdev, op, zrange.sector, zrange.nr_sectors, + GFP_KERNEL); + + /* + * Invalidate the page cache again for zone reset: writes can only be + * direct for zoned devices so concurrent writes would not add any page + * to the page cache after/during reset. The page cache may be filled + * again due to concurrent reads though and dropping the pages for + * these is fine. + */ + if (!ret && cmd == BLKRESETZONE) + ret = blkdev_truncate_zone_range(bdev, mode, &zrange); + + return ret; } static inline unsigned long *blk_alloc_zone_bitmap(int node, -- GitLab From bade4be69a6ea6f38c5894468ede10ee60b6f7a0 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 11 Mar 2021 13:25:01 -0500 Subject: [PATCH 611/839] svcrdma: Revert "svcrdma: Reduce Receive doorbell rate" I tested commit 43042b90cae1 ("svcrdma: Reduce Receive doorbell rate") with mlx4 (IB) and software iWARP and didn't find any issues. However, I recently got my hardware iWARP setup back on line (FastLinQ) and it's crashing hard on this commit (confirmed via bisect). The failure mode is complex. - After a connection is established, the first Receive completes normally. - But the second and third Receives have garbage in their Receive buffers. The server responds with ERR_VERS as a result. - When the client tears down the connection to retry, a couple of posted Receives flush twice, and that corrupts the recv_ctxt free list. - __svc_rdma_free then faults or loops infinitely while destroying the xprt's recv_ctxts. Since 43042b90cae1 ("svcrdma: Reduce Receive doorbell rate") does not fix a bug but is a scalability enhancement, it's safe and appropriate to revert it while working on a replacement. Fixes: 43042b90cae1 ("svcrdma: Reduce Receive doorbell rate") Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 1 - net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 82 ++++++++++++------------- 2 files changed, 39 insertions(+), 44 deletions(-) diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 7c693b31965e2..1e76ed6880447 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -104,7 +104,6 @@ struct svcxprt_rdma { wait_queue_head_t sc_send_wait; /* SQ exhaustion waitlist */ unsigned long sc_flags; - u32 sc_pending_recvs; struct list_head sc_read_complete_q; struct work_struct sc_work; diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 6d28f23ceb352..7d34290e2ff8d 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -266,46 +266,33 @@ void svc_rdma_release_rqst(struct svc_rqst *rqstp) svc_rdma_recv_ctxt_put(rdma, ctxt); } -static bool svc_rdma_refresh_recvs(struct svcxprt_rdma *rdma, - unsigned int wanted, bool temp) +static int __svc_rdma_post_recv(struct svcxprt_rdma *rdma, + struct svc_rdma_recv_ctxt *ctxt) { - const struct ib_recv_wr *bad_wr = NULL; - struct svc_rdma_recv_ctxt *ctxt; - struct ib_recv_wr *recv_chain; int ret; - recv_chain = NULL; - while (wanted--) { - ctxt = svc_rdma_recv_ctxt_get(rdma); - if (!ctxt) - break; - - trace_svcrdma_post_recv(ctxt); - ctxt->rc_temp = temp; - ctxt->rc_recv_wr.next = recv_chain; - recv_chain = &ctxt->rc_recv_wr; - rdma->sc_pending_recvs++; - } - if (!recv_chain) - return false; - - ret = ib_post_recv(rdma->sc_qp, recv_chain, &bad_wr); + trace_svcrdma_post_recv(ctxt); + ret = ib_post_recv(rdma->sc_qp, &ctxt->rc_recv_wr, NULL); if (ret) goto err_post; - return true; + return 0; err_post: - while (bad_wr) { - ctxt = container_of(bad_wr, struct svc_rdma_recv_ctxt, - rc_recv_wr); - bad_wr = bad_wr->next; - svc_rdma_recv_ctxt_put(rdma, ctxt); - } - trace_svcrdma_rq_post_err(rdma, ret); - /* Since we're destroying the xprt, no need to reset - * sc_pending_recvs. */ - return false; + svc_rdma_recv_ctxt_put(rdma, ctxt); + return ret; +} + +static int svc_rdma_post_recv(struct svcxprt_rdma *rdma) +{ + struct svc_rdma_recv_ctxt *ctxt; + + if (test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags)) + return 0; + ctxt = svc_rdma_recv_ctxt_get(rdma); + if (!ctxt) + return -ENOMEM; + return __svc_rdma_post_recv(rdma, ctxt); } /** @@ -316,7 +303,20 @@ err_post: */ bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma) { - return svc_rdma_refresh_recvs(rdma, rdma->sc_max_requests, true); + struct svc_rdma_recv_ctxt *ctxt; + unsigned int i; + int ret; + + for (i = 0; i < rdma->sc_max_requests; i++) { + ctxt = svc_rdma_recv_ctxt_get(rdma); + if (!ctxt) + return false; + ctxt->rc_temp = true; + ret = __svc_rdma_post_recv(rdma, ctxt); + if (ret) + return false; + } + return true; } /** @@ -324,6 +324,8 @@ bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma) * @cq: Completion Queue context * @wc: Work Completion object * + * NB: The svc_xprt/svcxprt_rdma is pinned whenever it's possible that + * the Receive completion handler could be running. */ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) { @@ -331,8 +333,6 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) struct ib_cqe *cqe = wc->wr_cqe; struct svc_rdma_recv_ctxt *ctxt; - rdma->sc_pending_recvs--; - /* WARNING: Only wc->wr_cqe and wc->status are reliable */ ctxt = container_of(cqe, struct svc_rdma_recv_ctxt, rc_cqe); @@ -340,6 +340,9 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) if (wc->status != IB_WC_SUCCESS) goto flushed; + if (svc_rdma_post_recv(rdma)) + goto post_err; + /* All wc fields are now known to be valid */ ctxt->rc_byte_len = wc->byte_len; @@ -350,18 +353,11 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) spin_unlock(&rdma->sc_rq_dto_lock); if (!test_bit(RDMAXPRT_CONN_PENDING, &rdma->sc_flags)) svc_xprt_enqueue(&rdma->sc_xprt); - - if (!test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags) && - rdma->sc_pending_recvs < rdma->sc_max_requests) - if (!svc_rdma_refresh_recvs(rdma, RPCRDMA_MAX_RECV_BATCH, - false)) - goto post_err; - return; flushed: - svc_rdma_recv_ctxt_put(rdma, ctxt); post_err: + svc_rdma_recv_ctxt_put(rdma, ctxt); set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); svc_xprt_enqueue(&rdma->sc_xprt); } -- GitLab From 0b736881c8f1a6cd912f7a9162b9e097b28c1c30 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 9 Mar 2021 12:09:26 +0000 Subject: [PATCH 612/839] powerpc/traps: unrecoverable_exception() is not an interrupt handler unrecoverable_exception() is called from interrupt handlers or after an interrupt handler has failed. Make it a standard function to avoid doubling the actions performed on interrupt entry (e.g.: user time accounting). Fixes: 3a96570ffceb ("powerpc: convert interrupt handlers to use wrappers") Signed-off-by: Christophe Leroy Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/ae96c59fa2cb7f24a8929c58cfa2c909cb8ff1f1.1615291471.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/interrupt.h | 3 ++- arch/powerpc/kernel/interrupt.c | 1 - arch/powerpc/kernel/traps.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index aedfba29e43a5..e8d09a841373b 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -410,7 +410,6 @@ DECLARE_INTERRUPT_HANDLER(altivec_assist_exception); DECLARE_INTERRUPT_HANDLER(CacheLockingException); DECLARE_INTERRUPT_HANDLER(SPEFloatingPointException); DECLARE_INTERRUPT_HANDLER(SPEFloatingPointRoundException); -DECLARE_INTERRUPT_HANDLER(unrecoverable_exception); DECLARE_INTERRUPT_HANDLER(WatchdogException); DECLARE_INTERRUPT_HANDLER(kernel_bad_stack); @@ -437,6 +436,8 @@ DECLARE_INTERRUPT_HANDLER_NMI(hmi_exception_realmode); DECLARE_INTERRUPT_HANDLER_ASYNC(TAUException); +void unrecoverable_exception(struct pt_regs *regs); + void replay_system_reset(void); void replay_soft_interrupts(void); diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index 2ef3c4051bb93..c475a229a42ac 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -436,7 +436,6 @@ again: return ret; } -void unrecoverable_exception(struct pt_regs *regs); void preempt_schedule_irq(void); notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsigned long msr) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 1583fd1c60103..a44a30b0688ca 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -2170,7 +2170,7 @@ DEFINE_INTERRUPT_HANDLER(SPEFloatingPointRoundException) * in the MSR is 0. This indicates that SRR0/1 are live, and that * we therefore lost state by taking this exception. */ -DEFINE_INTERRUPT_HANDLER(unrecoverable_exception) +void unrecoverable_exception(struct pt_regs *regs) { pr_emerg("Unrecoverable exception %lx at %lx (msr=%lx)\n", regs->trap, regs->nip, regs->msr); -- GitLab From a829f033e966d5e4aa27c3ef2b381f51734e4a7f Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 2 Mar 2021 11:42:13 +0000 Subject: [PATCH 613/839] drm/i915: Wedge the GPU if command parser setup fails Commit 311a50e76a33 ("drm/i915: Add support for mandatory cmdparsing") introduced mandatory command parsing but setup failures were not translated into wedging the GPU which was probably the intent. Possible errors come in two categories. Either the sanity check on internal tables has failed, which should be caught in CI unless an affected platform would be missed in testing; or memory allocation failure happened during driver load, which should be extremely unlikely but for correctness should still be handled. v2: * Tidy coding style. (Chris) [airlied: cherry-picked to avoid rc1 base] Signed-off-by: Tvrtko Ursulin Fixes: 311a50e76a33 ("drm/i915: Add support for mandatory cmdparsing") Cc: Jon Bloomfield Cc: Joonas Lahtinen Cc: Chris Wilson Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20210302114213.1102223-1-tvrtko.ursulin@linux.intel.com (cherry picked from commit 5a1a659762d35a6dc51047c9127c011303c77b7f) Signed-off-by: Rodrigo Vivi Signed-off-by: Dave Airlie --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 7 ++++++- drivers/gpu/drm/i915/i915_cmd_parser.c | 19 +++++++++++++------ drivers/gpu/drm/i915/i915_drv.h | 2 +- 3 files changed, 20 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index fb1b1d0969751..9cf555d6842b7 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -713,9 +713,12 @@ static int engine_setup_common(struct intel_engine_cs *engine) goto err_status; } + err = intel_engine_init_cmd_parser(engine); + if (err) + goto err_cmd_parser; + intel_engine_init_active(engine, ENGINE_PHYSICAL); intel_engine_init_execlists(engine); - intel_engine_init_cmd_parser(engine); intel_engine_init__pm(engine); intel_engine_init_retire(engine); @@ -732,6 +735,8 @@ static int engine_setup_common(struct intel_engine_cs *engine) return 0; +err_cmd_parser: + intel_breadcrumbs_free(engine->breadcrumbs); err_status: cleanup_status_page(engine); return err; diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index ced9a96d7c34b..5f86f5b2caf6f 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -940,7 +940,7 @@ static void fini_hash_table(struct intel_engine_cs *engine) * struct intel_engine_cs based on whether the platform requires software * command parsing. */ -void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) +int intel_engine_init_cmd_parser(struct intel_engine_cs *engine) { const struct drm_i915_cmd_table *cmd_tables; int cmd_table_count; @@ -948,7 +948,7 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) if (!IS_GEN(engine->i915, 7) && !(IS_GEN(engine->i915, 9) && engine->class == COPY_ENGINE_CLASS)) - return; + return 0; switch (engine->class) { case RENDER_CLASS: @@ -1013,19 +1013,19 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) break; default: MISSING_CASE(engine->class); - return; + goto out; } if (!validate_cmds_sorted(engine, cmd_tables, cmd_table_count)) { drm_err(&engine->i915->drm, "%s: command descriptions are not sorted\n", engine->name); - return; + goto out; } if (!validate_regs_sorted(engine)) { drm_err(&engine->i915->drm, "%s: registers are not sorted\n", engine->name); - return; + goto out; } ret = init_hash_table(engine, cmd_tables, cmd_table_count); @@ -1033,10 +1033,17 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) drm_err(&engine->i915->drm, "%s: initialised failed!\n", engine->name); fini_hash_table(engine); - return; + goto out; } engine->flags |= I915_ENGINE_USING_CMD_PARSER; + +out: + if (intel_engine_requires_cmd_parser(engine) && + !intel_engine_using_cmd_parser(engine)) + return -EINVAL; + + return 0; } /** diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 26d69d06aa6d1..cb62ddba20352 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1952,7 +1952,7 @@ const char *i915_cache_level_str(struct drm_i915_private *i915, int type); /* i915_cmd_parser.c */ int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv); -void intel_engine_init_cmd_parser(struct intel_engine_cs *engine); +int intel_engine_init_cmd_parser(struct intel_engine_cs *engine); void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine); int intel_engine_cmd_parser(struct intel_engine_cs *engine, struct i915_vma *batch, -- GitLab From 4042160c2e5433e0759782c402292a90b5bf458d Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 11 Mar 2021 14:35:27 +1000 Subject: [PATCH 614/839] drm/nouveau: fix dma syncing for loops (v2) The index variable should only be increased in one place. Noticed this while trying to track down another oops. v2: use while loop. Fixes: f295c8cfec83 ("drm/nouveau: fix dma syncing warning with debugging on.") Signed-off-by: Dave Airlie Reviewed-by: Michael J. Ruhl Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20210311043527.5376-1-airlied@gmail.com --- drivers/gpu/drm/nouveau/nouveau_bo.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 2375711877cf3..fabb314a0b2f8 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -556,7 +556,8 @@ nouveau_bo_sync_for_device(struct nouveau_bo *nvbo) if (nvbo->force_coherent) return; - for (i = 0; i < ttm_dma->num_pages; ++i) { + i = 0; + while (i < ttm_dma->num_pages) { struct page *p = ttm_dma->pages[i]; size_t num_pages = 1; @@ -587,7 +588,8 @@ nouveau_bo_sync_for_cpu(struct nouveau_bo *nvbo) if (nvbo->force_coherent) return; - for (i = 0; i < ttm_dma->num_pages; ++i) { + i = 0; + while (i < ttm_dma->num_pages) { struct page *p = ttm_dma->pages[i]; size_t num_pages = 1; -- GitLab From 606a5d4227e4610399c61086ac55c46068a90b03 Mon Sep 17 00:00:00 2001 From: Beata Michalska Date: Thu, 4 Mar 2021 15:07:34 +0000 Subject: [PATCH 615/839] opp: Don't drop extra references to OPPs accidentally We are required to call dev_pm_opp_put() from outside of the opp_table->lock as debugfs removal needs to happen lock-less to avoid circular dependency issues. commit cf1fac943c63 ("opp: Reduce the size of critical section in _opp_kref_release()") tried to fix that introducing a new routine _opp_get_next() which keeps returning OPPs that can be freed by the callers and this routine shall be called without holding the opp_table->lock. Though the commit overlooked the fact that the OPPs can be referenced by other users as well and this routine will end up dropping references which were taken by other users and hence freeing the OPPs prematurely. In effect, other users of the OPPs will end up having invalid pointers at hand. We didn't see any crash reports earlier as the exact situation never happened, though it is certainly possible. We need a way to mark which OPPs are no longer referenced by the OPP core, so we don't drop extra references to them accidentally. This commit adds another OPP flag, "removed", which is used to track this. And now we should never end up dropping extra references to the OPPs. Cc: v5.11+ # v5.11+ Fixes: cf1fac943c63 ("opp: Reduce the size of critical section in _opp_kref_release()") Signed-off-by: Beata Michalska [ Viresh: Almost rewrote entire patch, added new "removed" field, rewrote commit log and added the correct Fixes tag. ] Co-developed-by: Viresh Kumar Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 48 ++++++++++++++++++++++++---------------------- drivers/opp/opp.h | 2 ++ 2 files changed, 27 insertions(+), 23 deletions(-) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index c2689386a9068..1556998425d5b 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -1492,7 +1492,11 @@ static struct dev_pm_opp *_opp_get_next(struct opp_table *opp_table, mutex_lock(&opp_table->lock); list_for_each_entry(temp, &opp_table->opp_list, node) { - if (dynamic == temp->dynamic) { + /* + * Refcount must be dropped only once for each OPP by OPP core, + * do that with help of "removed" flag. + */ + if (!temp->removed && dynamic == temp->dynamic) { opp = temp; break; } @@ -1502,10 +1506,27 @@ static struct dev_pm_opp *_opp_get_next(struct opp_table *opp_table, return opp; } -bool _opp_remove_all_static(struct opp_table *opp_table) +/* + * Can't call dev_pm_opp_put() from under the lock as debugfs removal needs to + * happen lock less to avoid circular dependency issues. This routine must be + * called without the opp_table->lock held. + */ +static void _opp_remove_all(struct opp_table *opp_table, bool dynamic) { struct dev_pm_opp *opp; + while ((opp = _opp_get_next(opp_table, dynamic))) { + opp->removed = true; + dev_pm_opp_put(opp); + + /* Drop the references taken by dev_pm_opp_add() */ + if (dynamic) + dev_pm_opp_put_opp_table(opp_table); + } +} + +bool _opp_remove_all_static(struct opp_table *opp_table) +{ mutex_lock(&opp_table->lock); if (!opp_table->parsed_static_opps) { @@ -1520,13 +1541,7 @@ bool _opp_remove_all_static(struct opp_table *opp_table) mutex_unlock(&opp_table->lock); - /* - * Can't remove the OPP from under the lock, debugfs removal needs to - * happen lock less to avoid circular dependency issues. - */ - while ((opp = _opp_get_next(opp_table, false))) - dev_pm_opp_put(opp); - + _opp_remove_all(opp_table, false); return true; } @@ -1539,25 +1554,12 @@ bool _opp_remove_all_static(struct opp_table *opp_table) void dev_pm_opp_remove_all_dynamic(struct device *dev) { struct opp_table *opp_table; - struct dev_pm_opp *opp; - int count = 0; opp_table = _find_opp_table(dev); if (IS_ERR(opp_table)) return; - /* - * Can't remove the OPP from under the lock, debugfs removal needs to - * happen lock less to avoid circular dependency issues. - */ - while ((opp = _opp_get_next(opp_table, true))) { - dev_pm_opp_put(opp); - count++; - } - - /* Drop the references taken by dev_pm_opp_add() */ - while (count--) - dev_pm_opp_put_opp_table(opp_table); + _opp_remove_all(opp_table, true); /* Drop the reference taken by _find_opp_table() */ dev_pm_opp_put_opp_table(opp_table); diff --git a/drivers/opp/opp.h b/drivers/opp/opp.h index 50fb9dced3c52..407c3bfe51d96 100644 --- a/drivers/opp/opp.h +++ b/drivers/opp/opp.h @@ -56,6 +56,7 @@ extern struct list_head opp_tables, lazy_opp_tables; * @dynamic: not-created from static DT entries. * @turbo: true if turbo (boost) OPP * @suspend: true if suspend OPP + * @removed: flag indicating that OPP's reference is dropped by OPP core. * @pstate: Device's power domain's performance state. * @rate: Frequency in hertz * @level: Performance level @@ -78,6 +79,7 @@ struct dev_pm_opp { bool dynamic; bool turbo; bool suspend; + bool removed; unsigned int pstate; unsigned long rate; unsigned int level; -- GitLab From ba08abca66d46381df60842f64f70099d5482b92 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 8 Mar 2021 15:46:04 +0100 Subject: [PATCH 616/839] objtool,x86: Fix uaccess PUSHF/POPF validation Commit ab234a260b1f ("x86/pv: Rework arch_local_irq_restore() to not use popf") replaced "push %reg; popf" with something like: "test $0x200, %reg; jz 1f; sti; 1:", which breaks the pushf/popf symmetry that commit ea24213d8088 ("objtool: Add UACCESS validation") relies on. The result is: drivers/gpu/drm/amd/amdgpu/si.o: warning: objtool: si_common_hw_init()+0xf36: PUSHF stack exhausted Meanwhile, commit c9c324dc22aa ("objtool: Support stack layout changes in alternatives") makes that we can actually use stack-ops in alternatives, which means we can revert 1ff865e343c2 ("x86,smap: Fix smap_{save,restore}() alternatives"). That in turn means we can limit the PUSHF/POPF handling of ea24213d8088 to those instructions that are in alternatives. Fixes: ab234a260b1f ("x86/pv: Rework arch_local_irq_restore() to not use popf") Reported-by: Borislav Petkov Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/YEY4rIbQYa5fnnEp@hirez.programming.kicks-ass.net --- arch/x86/include/asm/smap.h | 10 ++++------ tools/objtool/check.c | 3 +++ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h index 8b58d6975d5d4..0bc9b0895f33e 100644 --- a/arch/x86/include/asm/smap.h +++ b/arch/x86/include/asm/smap.h @@ -58,9 +58,8 @@ static __always_inline unsigned long smap_save(void) unsigned long flags; asm volatile ("# smap_save\n\t" - ALTERNATIVE("jmp 1f", "", X86_FEATURE_SMAP) - "pushf; pop %0; " __ASM_CLAC "\n\t" - "1:" + ALTERNATIVE("", "pushf; pop %0; " __ASM_CLAC "\n\t", + X86_FEATURE_SMAP) : "=rm" (flags) : : "memory", "cc"); return flags; @@ -69,9 +68,8 @@ static __always_inline unsigned long smap_save(void) static __always_inline void smap_restore(unsigned long flags) { asm volatile ("# smap_restore\n\t" - ALTERNATIVE("jmp 1f", "", X86_FEATURE_SMAP) - "push %0; popf\n\t" - "1:" + ALTERNATIVE("", "push %0; popf\n\t", + X86_FEATURE_SMAP) : : "g" (flags) : "memory", "cc"); } diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 068cdb41f76fd..5e5388a38e2a7 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2442,6 +2442,9 @@ static int handle_insn_ops(struct instruction *insn, struct insn_state *state) if (update_cfi_state(insn, &state->cfi, op)) return 1; + if (!insn->alt_group) + continue; + if (op->dest.type == OP_DEST_PUSHF) { if (!state->uaccess_stack) { state->uaccess_stack = 1; -- GitLab From 3875721e825cf3ab05fc1a52b6cbd76c8d16da51 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 12 Mar 2021 08:04:23 +0000 Subject: [PATCH 617/839] gpiolib: Fix error return code in gpiolib_dev_init() Fix to return a negative error code from the error handling case instead of 0, as done elsewhere in this function. Fixes: 4731210c09f5 ("gpiolib: Bind gpio_device to a driver to enable fw_devlink=on by default") Reported-by: Hulk Robot Signed-off-by: Wei Yongjun Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 7ec0822c05055..6635e4ec60cef 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -4256,7 +4256,8 @@ static int __init gpiolib_dev_init(void) return ret; } - if (driver_register(&gpio_stub_drv) < 0) { + ret = driver_register(&gpio_stub_drv); + if (ret < 0) { pr_err("gpiolib: could not register GPIO stub driver\n"); bus_unregister(&gpio_bus_type); return ret; -- GitLab From c8e3866836528a4ba3b0535834f03768d74f7d8e Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 12 Mar 2021 08:04:21 +0000 Subject: [PATCH 618/839] perf/arm_dmc620_pmu: Fix error return code in dmc620_pmu_device_probe() Fix to return negative error code -ENOMEM from the error handling case instead of 0, as done elsewhere in this function. Fixes: 53c218da220c ("driver/perf: Add PMU driver for the ARM DMC-620 memory controller") Reported-by: Hulk Robot Signed-off-by: Wei Yongjun Link: https://lore.kernel.org/r/20210312080421.277562-1-weiyongjun1@huawei.com Signed-off-by: Will Deacon --- drivers/perf/arm_dmc620_pmu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/perf/arm_dmc620_pmu.c b/drivers/perf/arm_dmc620_pmu.c index 66ad5b3ece197..f2a85500258d0 100644 --- a/drivers/perf/arm_dmc620_pmu.c +++ b/drivers/perf/arm_dmc620_pmu.c @@ -681,6 +681,7 @@ static int dmc620_pmu_device_probe(struct platform_device *pdev) if (!name) { dev_err(&pdev->dev, "Create name failed, PMU @%pa\n", &res->start); + ret = -ENOMEM; goto out_teardown_dev; } -- GitLab From 2bf44e0ee95f39cc54ea1b942f0a027e0181ca4e Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Fri, 12 Mar 2021 12:14:08 +0800 Subject: [PATCH 619/839] ALSA: hda: generic: Fix the micmute led init state Recently we found the micmute led init state is not correct after freshly installing the ubuntu linux on a Lenovo AIO machine. The internal mic is not muted, but the micmute led is on and led mode is 'follow mute'. If we mute internal mic, the led is keeping on, then unmute the internal mic, the led is off. And from then on, the micmute led will work correctly. So the micmute led init state is not correct. The led is controlled by codec gpio (ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), in the patch_realtek, the gpio data is set to 0x4 initially and the led is on with this data. In the hda_generic, the led_value is set to 0 initially, suppose users set the 'capture switch' to on from user space and the micmute led should change to be off with this operation, but the check "if (val == spec->micmute_led.led_value)" in the call_micmute_led_update() will skip the led setting. To guarantee the led state will be set by the 1st time of changing "Capture Switch", set -1 to the init led_value. Cc: Signed-off-by: Hui Wang Link: https://lore.kernel.org/r/20210312041408.3776-1-hui.wang@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_generic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c index 8b7c5508f3688..f5cba7afd1c66 100644 --- a/sound/pci/hda/hda_generic.c +++ b/sound/pci/hda/hda_generic.c @@ -4065,7 +4065,7 @@ static int add_micmute_led_hook(struct hda_codec *codec) spec->micmute_led.led_mode = MICMUTE_LED_FOLLOW_MUTE; spec->micmute_led.capture = 0; - spec->micmute_led.led_value = 0; + spec->micmute_led.led_value = -1; spec->micmute_led.old_hook = spec->cap_sync_hook; spec->cap_sync_hook = update_micmute_led; if (!snd_hda_gen_add_kctl(spec, NULL, &micmute_led_mode_ctl)) -- GitLab From dd7b836d6bc935df95c826f69ff4d051f5561604 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Fri, 12 Mar 2021 18:34:07 +0900 Subject: [PATCH 620/839] ALSA: dice: fix null pointer dereference when node is disconnected When node is removed from IEEE 1394 bus, any transaction fails to the node. In the case, ALSA dice driver doesn't stop isochronous contexts even if they are running. As a result, null pointer dereference occurs in callback from the running context. This commit fixes the bug to release isochronous contexts always. Cc: # v5.4 or later Fixes: e9f21129b8d8 ("ALSA: dice: support AMDTP domain") Signed-off-by: Takashi Sakamoto Link: https://lore.kernel.org/r/20210312093407.23437-1-o-takashi@sakamocchi.jp Signed-off-by: Takashi Iwai --- sound/firewire/dice/dice-stream.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sound/firewire/dice/dice-stream.c b/sound/firewire/dice/dice-stream.c index 8e0c0380b4c4b..1a14c083e8cea 100644 --- a/sound/firewire/dice/dice-stream.c +++ b/sound/firewire/dice/dice-stream.c @@ -493,11 +493,10 @@ void snd_dice_stream_stop_duplex(struct snd_dice *dice) struct reg_params tx_params, rx_params; if (dice->substreams_counter == 0) { - if (get_register_params(dice, &tx_params, &rx_params) >= 0) { - amdtp_domain_stop(&dice->domain); + if (get_register_params(dice, &tx_params, &rx_params) >= 0) finish_session(dice, &tx_params, &rx_params); - } + amdtp_domain_stop(&dice->domain); release_resources(dice); } } -- GitLab From d5bf630f355d8c532bef2347cf90e8ae60a5f1bd Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Fri, 12 Mar 2021 07:58:54 -0500 Subject: [PATCH 621/839] gfs2: bypass signal_our_withdraw if no journal Before this patch, function signal_our_withdraw referenced the journal inode immediately. But corrupt file systems may have some invalid journals, in which case our attempt to read it in will withdraw and the resulting signal_our_withdraw would dereference the NULL value. This patch adds a check to signal_our_withdraw so that if the journal has not yet been initialized, it simply returns and does the old-style withdraw. Thanks, Andy Price, for his analysis. Reported-by: syzbot+50a8a9cf8127f2c6f5df@syzkaller.appspotmail.com Fixes: 601ef0d52e96 ("gfs2: Force withdraw to replay journals and wait for it to finish") Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/util.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index 58743315cda9f..4f034b87b4276 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -119,17 +119,22 @@ void gfs2_freeze_unlock(struct gfs2_holder *freeze_gh) static void signal_our_withdraw(struct gfs2_sbd *sdp) { struct gfs2_glock *live_gl = sdp->sd_live_gh.gh_gl; - struct inode *inode = sdp->sd_jdesc->jd_inode; - struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_glock *i_gl = ip->i_gl; - u64 no_formal_ino = ip->i_no_formal_ino; + struct inode *inode; + struct gfs2_inode *ip; + struct gfs2_glock *i_gl; + u64 no_formal_ino; int log_write_allowed = test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); int ret = 0; int tries; - if (test_bit(SDF_NORECOVERY, &sdp->sd_flags)) + if (test_bit(SDF_NORECOVERY, &sdp->sd_flags) || !sdp->sd_jdesc) return; + inode = sdp->sd_jdesc->jd_inode; + ip = GFS2_I(inode); + i_gl = ip->i_gl; + no_formal_ino = ip->i_no_formal_ino; + /* Prevent any glock dq until withdraw recovery is complete */ set_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags); /* -- GitLab From e1915f76a8981f0a750cf56515df42582a37c4b0 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 11 Mar 2021 23:29:35 +0000 Subject: [PATCH 622/839] io_uring: cancel deferred requests in try_cancel As io_uring_cancel_files() and others let SQO to run between io_uring_try_cancel_requests(), SQO may generate new deferred requests, so it's safer to try to cancel them in it. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 49f85f49e1c3c..56f3d8f408c9a 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -8577,11 +8577,11 @@ static bool io_cancel_task_cb(struct io_wq_work *work, void *data) return ret; } -static void io_cancel_defer_files(struct io_ring_ctx *ctx, +static bool io_cancel_defer_files(struct io_ring_ctx *ctx, struct task_struct *task, struct files_struct *files) { - struct io_defer_entry *de = NULL; + struct io_defer_entry *de; LIST_HEAD(list); spin_lock_irq(&ctx->completion_lock); @@ -8592,6 +8592,8 @@ static void io_cancel_defer_files(struct io_ring_ctx *ctx, } } spin_unlock_irq(&ctx->completion_lock); + if (list_empty(&list)) + return false; while (!list_empty(&list)) { de = list_first_entry(&list, struct io_defer_entry, list); @@ -8601,6 +8603,7 @@ static void io_cancel_defer_files(struct io_ring_ctx *ctx, io_req_complete(de->req, -ECANCELED); kfree(de); } + return true; } static bool io_cancel_ctx_cb(struct io_wq_work *work, void *data) @@ -8666,6 +8669,7 @@ static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx, } } + ret |= io_cancel_defer_files(ctx, task, files); ret |= io_poll_remove_all(ctx, task, files); ret |= io_kill_timeouts(ctx, task, files); ret |= io_run_task_work(); @@ -8734,8 +8738,6 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx, atomic_inc(&task->io_uring->in_idle); } - io_cancel_defer_files(ctx, task, files); - io_uring_cancel_files(ctx, task, files); if (!files) io_uring_try_cancel_requests(ctx, task, NULL); -- GitLab From 0df8ea602b3fe80819a34361027ad40485e78909 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 11 Mar 2021 23:29:36 +0000 Subject: [PATCH 623/839] io_uring: remove useless ->startup completion We always do complete(&sqd->startup) almost right after sqd->thread creation, either in the success path or in io_sq_thread_finish(). It's specifically created not started for us to be able to set some stuff like sqd->thread and io_uring_alloc_task_context() before following right after wake_up_new_task(). Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 56f3d8f408c9a..6349374d715d9 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -272,7 +272,6 @@ struct io_sq_data { pid_t task_tgid; unsigned long state; - struct completion startup; struct completion exited; }; @@ -6656,8 +6655,6 @@ static int io_sq_thread(void *data) set_cpus_allowed_ptr(current, cpu_online_mask); current->flags |= PF_NO_SETAFFINITY; - wait_for_completion(&sqd->startup); - down_read(&sqd->rw_lock); while (!test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state)) { @@ -7080,7 +7077,6 @@ static void io_sq_thread_finish(struct io_ring_ctx *ctx) struct io_sq_data *sqd = ctx->sq_data; if (sqd) { - complete(&sqd->startup); io_sq_thread_park(sqd); list_del(&ctx->sqd_list); io_sqd_update_thread_idle(sqd); @@ -7144,7 +7140,6 @@ static struct io_sq_data *io_get_sq_data(struct io_uring_params *p) INIT_LIST_HEAD(&sqd->ctx_list); init_rwsem(&sqd->rw_lock); init_waitqueue_head(&sqd->wait); - init_completion(&sqd->startup); init_completion(&sqd->exited); return sqd; } @@ -7856,7 +7851,6 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx, wake_up_new_task(tsk); if (ret) goto err; - complete(&sqd->startup); } else if (p->flags & IORING_SETUP_SQ_AFF) { /* Can't have SQ_AFF without SQPOLL */ ret = -EINVAL; -- GitLab From 8d06b9633a66f41fed520f6eebd163189518ba79 Mon Sep 17 00:00:00 2001 From: Jiaxin Yu Date: Fri, 12 Mar 2021 10:26:45 +0800 Subject: [PATCH 624/839] ASoC: mediatek: mt8192: fix tdm out data is valid on rising edge This patch correct tdm out bck inverse register to AUDIO_TOP_CON3[3]. Signed-off-by: Jiaxin Yu Link: https://lore.kernel.org/r/1615516005-781-1-git-send-email-jiaxin.yu@mediatek.com Signed-off-by: Mark Brown --- sound/soc/mediatek/mt8192/mt8192-dai-tdm.c | 4 +++- sound/soc/mediatek/mt8192/mt8192-reg.h | 8 +++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/sound/soc/mediatek/mt8192/mt8192-dai-tdm.c b/sound/soc/mediatek/mt8192/mt8192-dai-tdm.c index f5de1d7696792..f3bebed2428a7 100644 --- a/sound/soc/mediatek/mt8192/mt8192-dai-tdm.c +++ b/sound/soc/mediatek/mt8192/mt8192-dai-tdm.c @@ -555,7 +555,9 @@ static int mtk_dai_tdm_hw_params(struct snd_pcm_substream *substream, /* set tdm */ if (tdm_priv->bck_invert) - tdm_con |= 1 << BCK_INVERSE_SFT; + regmap_update_bits(afe->regmap, AUDIO_TOP_CON3, + BCK_INVERSE_MASK_SFT, + 0x1 << BCK_INVERSE_SFT); if (tdm_priv->lck_invert) tdm_con |= 1 << LRCK_INVERSE_SFT; diff --git a/sound/soc/mediatek/mt8192/mt8192-reg.h b/sound/soc/mediatek/mt8192/mt8192-reg.h index 562f25c79c349..b9fb80d4afecd 100644 --- a/sound/soc/mediatek/mt8192/mt8192-reg.h +++ b/sound/soc/mediatek/mt8192/mt8192-reg.h @@ -21,6 +21,11 @@ enum { /***************************************************************************** * R E G I S T E R D E F I N I T I O N *****************************************************************************/ +/* AUDIO_TOP_CON3 */ +#define BCK_INVERSE_SFT 3 +#define BCK_INVERSE_MASK 0x1 +#define BCK_INVERSE_MASK_SFT (0x1 << 3) + /* AFE_DAC_CON0 */ #define VUL12_ON_SFT 31 #define VUL12_ON_MASK 0x1 @@ -2079,9 +2084,6 @@ enum { #define TDM_EN_SFT 0 #define TDM_EN_MASK 0x1 #define TDM_EN_MASK_SFT (0x1 << 0) -#define BCK_INVERSE_SFT 1 -#define BCK_INVERSE_MASK 0x1 -#define BCK_INVERSE_MASK_SFT (0x1 << 1) #define LRCK_INVERSE_SFT 2 #define LRCK_INVERSE_MASK 0x1 #define LRCK_INVERSE_MASK_SFT (0x1 << 2) -- GitLab From 0efc4976e3da40b09c592b21f722022d8f12a16b Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Fri, 12 Mar 2021 08:47:47 -0500 Subject: [PATCH 625/839] gfs2: bypass log flush if the journal is not live Patch fe3e397668775 ("gfs2: Rework the log space allocation logic") changed gfs2_log_flush to reserve a set of journal blocks in case no transaction is active. However, gfs2_log_flush also gets called in cases where we don't have an active journal, for example, for spectator mounts. In that case, trying to reserve blocks would sleep forever, but we want gfs2_log_flush to be a no-op instead. Fixes: fe3e397668775 ("gfs2: Rework the log space allocation logic") Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/log.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 760af666576cc..6410281546f92 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -1036,7 +1036,7 @@ repeat: * Do this check while holding the log_flush_lock to prevent new * buffers from being added to the ail via gfs2_pin() */ - if (gfs2_withdrawn(sdp)) + if (gfs2_withdrawn(sdp) || !test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) goto out; /* Log might have been flushed while we waited for the flush lock */ -- GitLab From 7d717558dd5ef10d28866750d5c24ff892ea3778 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 11 Mar 2021 10:00:15 +0000 Subject: [PATCH 626/839] KVM: arm64: Reject VM creation when the default IPA size is unsupported KVM/arm64 has forever used a 40bit default IPA space, partially due to its 32bit heritage (where the only choice is 40bit). However, there are implementations in the wild that have a *cough* much smaller *cough* IPA space, which leads to a misprogramming of VTCR_EL2, and a guest that is stuck on its first memory access if userspace dares to ask for the default IPA setting (which most VMMs do). Instead, blundly reject the creation of such VM, as we can't satisfy the requirements from userspace (with a one-off warning). Also clarify the boot warning, and document that the VM creation will fail when an unsupported IPA size is provided. Although this is an ABI change, it doesn't really change much for userspace: - the guest couldn't run before this change, but no error was returned. At least userspace knows what is happening. - a memory slot that was accepted because it did fit the default IPA space now doesn't even get a chance to be registered. The other thing that is left doing is to convince userspace to actually use the IPA space setting instead of relying on the antiquated default. Fixes: 233a7cb23531 ("kvm: arm64: Allow tuning the physical address size for VM") Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org Reviewed-by: Andrew Jones Reviewed-by: Eric Auger Link: https://lore.kernel.org/r/20210311100016.3830038-2-maz@kernel.org --- Documentation/virt/kvm/api.rst | 3 +++ arch/arm64/kvm/reset.c | 12 ++++++++---- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 1a2b5210cdbf5..38e327d4b4790 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -182,6 +182,9 @@ is dependent on the CPU capability and the kernel configuration. The limit can be retrieved using KVM_CAP_ARM_VM_IPA_SIZE of the KVM_CHECK_EXTENSION ioctl() at run-time. +Creation of the VM will fail if the requested IPA size (whether it is +implicit or explicit) is unsupported on the host. + Please note that configuring the IPA size does not affect the capability exposed by the guest CPUs in ID_AA64MMFR0_EL1[PARange]. It only affects size of the address translated by the stage2 level (guest physical to diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 47f3f035f3eac..9d3d09a898945 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -324,10 +324,9 @@ int kvm_set_ipa_limit(void) } kvm_ipa_limit = id_aa64mmfr0_parange_to_phys_shift(parange); - WARN(kvm_ipa_limit < KVM_PHYS_SHIFT, - "KVM IPA Size Limit (%d bits) is smaller than default size\n", - kvm_ipa_limit); - kvm_info("IPA Size Limit: %d bits\n", kvm_ipa_limit); + kvm_info("IPA Size Limit: %d bits%s\n", kvm_ipa_limit, + ((kvm_ipa_limit < KVM_PHYS_SHIFT) ? + " (Reduced IPA size, limited VM/VMM compatibility)" : "")); return 0; } @@ -356,6 +355,11 @@ int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type) return -EINVAL; } else { phys_shift = KVM_PHYS_SHIFT; + if (phys_shift > kvm_ipa_limit) { + pr_warn_once("%s using unsupported default IPA limit, upgrade your VMM\n", + current->comm); + return -EINVAL; + } } mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); -- GitLab From 262b003d059c6671601a19057e9fe1a5e7f23722 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 11 Mar 2021 10:00:16 +0000 Subject: [PATCH 627/839] KVM: arm64: Fix exclusive limit for IPA size When registering a memslot, we check the size and location of that memslot against the IPA size to ensure that we can provide guest access to the whole of the memory. Unfortunately, this check rejects memslot that end-up at the exact limit of the addressing capability for a given IPA size. For example, it refuses the creation of a 2GB memslot at 0x8000000 with a 32bit IPA space. Fix it by relaxing the check to accept a memslot reaching the limit of the IPA space. Fixes: c3058d5da222 ("arm/arm64: KVM: Ensure memslots are within KVM_PHYS_SIZE") Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/20210311100016.3830038-3-maz@kernel.org --- arch/arm64/kvm/mmu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 77cb2d28f2a43..8711894db8c22 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1312,8 +1312,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, * Prevent userspace from creating a memory region outside of the IPA * space addressable by the KVM guest IPA space. */ - if (memslot->base_gfn + memslot->npages >= - (kvm_phys_size(kvm) >> PAGE_SHIFT)) + if ((memslot->base_gfn + memslot->npages) > (kvm_phys_size(kvm) >> PAGE_SHIFT)) return -EFAULT; mmap_read_lock(current->mm); -- GitLab From 26984fbf3ad9d1c1fb56a0c1e0cdf9fa3b806f0c Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 11 Mar 2021 23:29:37 +0000 Subject: [PATCH 628/839] io_uring: prevent racy sqd->thread checks SQPOLL thread to which we're trying to attach may be going away, it's not nice but a more serious problem is if io_sq_offload_create() sees sqd->thread==NULL, and tries to init it with a new thread. There are tons of ways it can be exploited or fail. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 6349374d715d9..cdec595104332 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -7119,14 +7119,18 @@ static struct io_sq_data *io_attach_sq_data(struct io_uring_params *p) return sqd; } -static struct io_sq_data *io_get_sq_data(struct io_uring_params *p) +static struct io_sq_data *io_get_sq_data(struct io_uring_params *p, + bool *attached) { struct io_sq_data *sqd; + *attached = false; if (p->flags & IORING_SETUP_ATTACH_WQ) { sqd = io_attach_sq_data(p); - if (!IS_ERR(sqd)) + if (!IS_ERR(sqd)) { + *attached = true; return sqd; + } /* fall through for EPERM case, setup new sqd/task */ if (PTR_ERR(sqd) != -EPERM) return sqd; @@ -7799,12 +7803,13 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx, if (ctx->flags & IORING_SETUP_SQPOLL) { struct task_struct *tsk; struct io_sq_data *sqd; + bool attached; ret = -EPERM; if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_NICE)) goto err; - sqd = io_get_sq_data(p); + sqd = io_get_sq_data(p, &attached); if (IS_ERR(sqd)) { ret = PTR_ERR(sqd); goto err; @@ -7816,13 +7821,24 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx, if (!ctx->sq_thread_idle) ctx->sq_thread_idle = HZ; + ret = 0; io_sq_thread_park(sqd); - list_add(&ctx->sqd_list, &sqd->ctx_list); - io_sqd_update_thread_idle(sqd); + /* don't attach to a dying SQPOLL thread, would be racy */ + if (attached && !sqd->thread) { + ret = -ENXIO; + } else { + list_add(&ctx->sqd_list, &sqd->ctx_list); + io_sqd_update_thread_idle(sqd); + } io_sq_thread_unpark(sqd); - if (sqd->thread) + if (ret < 0) { + io_put_sq_data(sqd); + ctx->sq_data = NULL; + return ret; + } else if (attached) { return 0; + } if (p->flags & IORING_SETUP_SQ_AFF) { int cpu = p->sq_thread_cpu; -- GitLab From 521d6a737a31c08dbab204a95cd4fb5bee725f0f Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 11 Mar 2021 23:29:38 +0000 Subject: [PATCH 629/839] io_uring: cancel sqpoll via task_work 1) The first problem is io_uring_cancel_sqpoll() -> io_uring_cancel_task_requests() basically doing park(); park(); and so hanging. 2) Another one is more subtle, when the master task is doing cancellations, but SQPOLL task submits in-between the end of the cancellation but before finish() requests taking a ref to the ctx, and so eternally locking it up. 3) Yet another is a dying SQPOLL task doing io_uring_cancel_sqpoll() and same io_uring_cancel_sqpoll() from the owner task, they race for tctx->wait events. And there probably more of them. Instead do SQPOLL cancellations from within SQPOLL task context via task_work, see io_sqpoll_cancel_sync(). With that we don't need temporal park()/unpark() during cancellation, which is ugly, subtle and anyway doesn't allow to do io_run_task_work() properly. io_uring_cancel_sqpoll() is called only from SQPOLL task context and under sqd locking, so all parking is removed from there. And so, io_sq_thread_[un]park() and io_sq_thread_stop() are not used now by SQPOLL task, and that spare us from some headache. Also remove ctx->sqd_list early to avoid 2). And kill tctx->sqpoll, which is not used anymore. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 151 ++++++++++++++++++++++++-------------------------- 1 file changed, 71 insertions(+), 80 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index cdec595104332..70286b393c0e8 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -6665,6 +6665,7 @@ static int io_sq_thread(void *data) up_read(&sqd->rw_lock); cond_resched(); down_read(&sqd->rw_lock); + io_run_task_work(); timeout = jiffies + sqd->sq_thread_idle; continue; } @@ -6720,18 +6721,22 @@ static int io_sq_thread(void *data) finish_wait(&sqd->wait, &wait); timeout = jiffies + sqd->sq_thread_idle; } - - list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) - io_uring_cancel_sqpoll(ctx); up_read(&sqd->rw_lock); - + down_write(&sqd->rw_lock); + /* + * someone may have parked and added a cancellation task_work, run + * it first because we don't want it in io_uring_cancel_sqpoll() + */ io_run_task_work(); - down_write(&sqd->rw_lock); + list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) + io_uring_cancel_sqpoll(ctx); sqd->thread = NULL; list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) io_ring_set_wakeup_flag(ctx); up_write(&sqd->rw_lock); + + io_run_task_work(); complete(&sqd->exited); do_exit(0); } @@ -7033,8 +7038,8 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx) static void io_sq_thread_unpark(struct io_sq_data *sqd) __releases(&sqd->rw_lock) { - if (sqd->thread == current) - return; + WARN_ON_ONCE(sqd->thread == current); + clear_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state); up_write(&sqd->rw_lock); } @@ -7042,8 +7047,8 @@ static void io_sq_thread_unpark(struct io_sq_data *sqd) static void io_sq_thread_park(struct io_sq_data *sqd) __acquires(&sqd->rw_lock) { - if (sqd->thread == current) - return; + WARN_ON_ONCE(sqd->thread == current); + set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state); down_write(&sqd->rw_lock); /* set again for consistency, in case concurrent parks are happening */ @@ -7054,8 +7059,8 @@ static void io_sq_thread_park(struct io_sq_data *sqd) static void io_sq_thread_stop(struct io_sq_data *sqd) { - if (test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state)) - return; + WARN_ON_ONCE(sqd->thread == current); + down_write(&sqd->rw_lock); set_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state); if (sqd->thread) @@ -7078,7 +7083,7 @@ static void io_sq_thread_finish(struct io_ring_ctx *ctx) if (sqd) { io_sq_thread_park(sqd); - list_del(&ctx->sqd_list); + list_del_init(&ctx->sqd_list); io_sqd_update_thread_idle(sqd); io_sq_thread_unpark(sqd); @@ -7760,7 +7765,6 @@ static int io_uring_alloc_task_context(struct task_struct *task, init_waitqueue_head(&tctx->wait); tctx->last = NULL; atomic_set(&tctx->in_idle, 0); - tctx->sqpoll = false; task->io_uring = tctx; spin_lock_init(&tctx->task_lock); INIT_WQ_LIST(&tctx->task_list); @@ -8719,43 +8723,12 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx, io_uring_try_cancel_requests(ctx, task, files); - if (ctx->sq_data) - io_sq_thread_unpark(ctx->sq_data); prepare_to_wait(&task->io_uring->wait, &wait, TASK_UNINTERRUPTIBLE); if (inflight == io_uring_count_inflight(ctx, task, files)) schedule(); finish_wait(&task->io_uring->wait, &wait); - if (ctx->sq_data) - io_sq_thread_park(ctx->sq_data); - } -} - -/* - * We need to iteratively cancel requests, in case a request has dependent - * hard links. These persist even for failure of cancelations, hence keep - * looping until none are found. - */ -static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx, - struct files_struct *files) -{ - struct task_struct *task = current; - - if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) { - io_sq_thread_park(ctx->sq_data); - task = ctx->sq_data->thread; - if (task) - atomic_inc(&task->io_uring->in_idle); } - - io_uring_cancel_files(ctx, task, files); - if (!files) - io_uring_try_cancel_requests(ctx, task, NULL); - - if (task) - atomic_dec(&task->io_uring->in_idle); - if (ctx->sq_data) - io_sq_thread_unpark(ctx->sq_data); } /* @@ -8796,15 +8769,6 @@ static int io_uring_add_task_file(struct io_ring_ctx *ctx) } tctx->last = ctx; } - - /* - * This is race safe in that the task itself is doing this, hence it - * cannot be going through the exit/cancel paths at the same time. - * This cannot be modified while exit/cancel is running. - */ - if (!tctx->sqpoll && (ctx->flags & IORING_SETUP_SQPOLL)) - tctx->sqpoll = true; - return 0; } @@ -8847,6 +8811,44 @@ static void io_uring_clean_tctx(struct io_uring_task *tctx) } } +static s64 tctx_inflight(struct io_uring_task *tctx) +{ + return percpu_counter_sum(&tctx->inflight); +} + +static void io_sqpoll_cancel_cb(struct callback_head *cb) +{ + struct io_tctx_exit *work = container_of(cb, struct io_tctx_exit, task_work); + struct io_ring_ctx *ctx = work->ctx; + struct io_sq_data *sqd = ctx->sq_data; + + if (sqd->thread) + io_uring_cancel_sqpoll(ctx); + complete(&work->completion); +} + +static void io_sqpoll_cancel_sync(struct io_ring_ctx *ctx) +{ + struct io_sq_data *sqd = ctx->sq_data; + struct io_tctx_exit work = { .ctx = ctx, }; + struct task_struct *task; + + io_sq_thread_park(sqd); + list_del_init(&ctx->sqd_list); + io_sqd_update_thread_idle(sqd); + task = sqd->thread; + if (task) { + init_completion(&work.completion); + init_task_work(&work.task_work, io_sqpoll_cancel_cb); + WARN_ON_ONCE(task_work_add(task, &work.task_work, TWA_SIGNAL)); + wake_up_process(task); + } + io_sq_thread_unpark(sqd); + + if (task) + wait_for_completion(&work.completion); +} + void __io_uring_files_cancel(struct files_struct *files) { struct io_uring_task *tctx = current->io_uring; @@ -8855,41 +8857,40 @@ void __io_uring_files_cancel(struct files_struct *files) /* make sure overflow events are dropped */ atomic_inc(&tctx->in_idle); - xa_for_each(&tctx->xa, index, node) - io_uring_cancel_task_requests(node->ctx, files); + xa_for_each(&tctx->xa, index, node) { + struct io_ring_ctx *ctx = node->ctx; + + if (ctx->sq_data) { + io_sqpoll_cancel_sync(ctx); + continue; + } + io_uring_cancel_files(ctx, current, files); + if (!files) + io_uring_try_cancel_requests(ctx, current, NULL); + } atomic_dec(&tctx->in_idle); if (files) io_uring_clean_tctx(tctx); } -static s64 tctx_inflight(struct io_uring_task *tctx) -{ - return percpu_counter_sum(&tctx->inflight); -} - +/* should only be called by SQPOLL task */ static void io_uring_cancel_sqpoll(struct io_ring_ctx *ctx) { struct io_sq_data *sqd = ctx->sq_data; - struct io_uring_task *tctx; + struct io_uring_task *tctx = current->io_uring; s64 inflight; DEFINE_WAIT(wait); - if (!sqd) - return; - io_sq_thread_park(sqd); - if (!sqd->thread || !sqd->thread->io_uring) { - io_sq_thread_unpark(sqd); - return; - } - tctx = ctx->sq_data->thread->io_uring; + WARN_ON_ONCE(!sqd || ctx->sq_data->thread != current); + atomic_inc(&tctx->in_idle); do { /* read completions before cancelations */ inflight = tctx_inflight(tctx); if (!inflight) break; - io_uring_cancel_task_requests(ctx, NULL); + io_uring_try_cancel_requests(ctx, current, NULL); prepare_to_wait(&tctx->wait, &wait, TASK_UNINTERRUPTIBLE); /* @@ -8902,7 +8903,6 @@ static void io_uring_cancel_sqpoll(struct io_ring_ctx *ctx) finish_wait(&tctx->wait, &wait); } while (1); atomic_dec(&tctx->in_idle); - io_sq_thread_unpark(sqd); } /* @@ -8917,15 +8917,6 @@ void __io_uring_task_cancel(void) /* make sure overflow events are dropped */ atomic_inc(&tctx->in_idle); - - if (tctx->sqpoll) { - struct io_tctx_node *node; - unsigned long index; - - xa_for_each(&tctx->xa, index, node) - io_uring_cancel_sqpoll(node->ctx); - } - do { /* read completions before cancelations */ inflight = tctx_inflight(tctx); -- GitLab From 58f99373834151e1ca7edc49bc5578d9d40db099 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 12 Mar 2021 16:25:55 +0000 Subject: [PATCH 630/839] io_uring: fix OP_ASYNC_CANCEL across tasks IORING_OP_ASYNC_CANCEL tries io-wq cancellation only for current task. If it fails go over tctx_list and try it out for every single tctx. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 41 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 70286b393c0e8..a4bce17af506b 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -5667,8 +5667,47 @@ static int io_async_cancel_prep(struct io_kiocb *req, static int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags) { struct io_ring_ctx *ctx = req->ctx; + u64 sqe_addr = req->cancel.addr; + struct io_tctx_node *node; + int ret; - io_async_find_and_cancel(ctx, req, req->cancel.addr, 0); + /* tasks should wait for their io-wq threads, so safe w/o sync */ + ret = io_async_cancel_one(req->task->io_uring, sqe_addr, ctx); + spin_lock_irq(&ctx->completion_lock); + if (ret != -ENOENT) + goto done; + ret = io_timeout_cancel(ctx, sqe_addr); + if (ret != -ENOENT) + goto done; + ret = io_poll_cancel(ctx, sqe_addr); + if (ret != -ENOENT) + goto done; + spin_unlock_irq(&ctx->completion_lock); + + /* slow path, try all io-wq's */ + io_ring_submit_lock(ctx, !(issue_flags & IO_URING_F_NONBLOCK)); + ret = -ENOENT; + list_for_each_entry(node, &ctx->tctx_list, ctx_node) { + struct io_uring_task *tctx = node->task->io_uring; + + if (!tctx || !tctx->io_wq) + continue; + ret = io_async_cancel_one(tctx, req->cancel.addr, ctx); + if (ret != -ENOENT) + break; + } + io_ring_submit_unlock(ctx, !(issue_flags & IO_URING_F_NONBLOCK)); + + spin_lock_irq(&ctx->completion_lock); +done: + io_cqring_fill_event(req, ret); + io_commit_cqring(ctx); + spin_unlock_irq(&ctx->completion_lock); + io_cqring_ev_posted(ctx); + + if (ret < 0) + req_set_fail_links(req); + io_put_req(req); return 0; } -- GitLab From 6fcd9cbc6a903f48eebaa14657aeccb003f69a3d Mon Sep 17 00:00:00 2001 From: Muhammad Usama Anjum Date: Sat, 6 Mar 2021 00:11:23 +0500 Subject: [PATCH 631/839] kvm: x86: annotate RCU pointers This patch adds the annotation to fix the following sparse errors: arch/x86/kvm//x86.c:8147:15: error: incompatible types in comparison expression (different address spaces): arch/x86/kvm//x86.c:8147:15: struct kvm_apic_map [noderef] __rcu * arch/x86/kvm//x86.c:8147:15: struct kvm_apic_map * arch/x86/kvm//x86.c:10628:16: error: incompatible types in comparison expression (different address spaces): arch/x86/kvm//x86.c:10628:16: struct kvm_apic_map [noderef] __rcu * arch/x86/kvm//x86.c:10628:16: struct kvm_apic_map * arch/x86/kvm//x86.c:10629:15: error: incompatible types in comparison expression (different address spaces): arch/x86/kvm//x86.c:10629:15: struct kvm_pmu_event_filter [noderef] __rcu * arch/x86/kvm//x86.c:10629:15: struct kvm_pmu_event_filter * arch/x86/kvm//lapic.c:267:15: error: incompatible types in comparison expression (different address spaces): arch/x86/kvm//lapic.c:267:15: struct kvm_apic_map [noderef] __rcu * arch/x86/kvm//lapic.c:267:15: struct kvm_apic_map * arch/x86/kvm//lapic.c:269:9: error: incompatible types in comparison expression (different address spaces): arch/x86/kvm//lapic.c:269:9: struct kvm_apic_map [noderef] __rcu * arch/x86/kvm//lapic.c:269:9: struct kvm_apic_map * arch/x86/kvm//lapic.c:637:15: error: incompatible types in comparison expression (different address spaces): arch/x86/kvm//lapic.c:637:15: struct kvm_apic_map [noderef] __rcu * arch/x86/kvm//lapic.c:637:15: struct kvm_apic_map * arch/x86/kvm//lapic.c:994:15: error: incompatible types in comparison expression (different address spaces): arch/x86/kvm//lapic.c:994:15: struct kvm_apic_map [noderef] __rcu * arch/x86/kvm//lapic.c:994:15: struct kvm_apic_map * arch/x86/kvm//lapic.c:1036:15: error: incompatible types in comparison expression (different address spaces): arch/x86/kvm//lapic.c:1036:15: struct kvm_apic_map [noderef] __rcu * arch/x86/kvm//lapic.c:1036:15: struct kvm_apic_map * arch/x86/kvm//lapic.c:1173:15: error: incompatible types in comparison expression (different address spaces): arch/x86/kvm//lapic.c:1173:15: struct kvm_apic_map [noderef] __rcu * arch/x86/kvm//lapic.c:1173:15: struct kvm_apic_map * arch/x86/kvm//pmu.c:190:18: error: incompatible types in comparison expression (different address spaces): arch/x86/kvm//pmu.c:190:18: struct kvm_pmu_event_filter [noderef] __rcu * arch/x86/kvm//pmu.c:190:18: struct kvm_pmu_event_filter * arch/x86/kvm//pmu.c:251:18: error: incompatible types in comparison expression (different address spaces): arch/x86/kvm//pmu.c:251:18: struct kvm_pmu_event_filter [noderef] __rcu * arch/x86/kvm//pmu.c:251:18: struct kvm_pmu_event_filter * arch/x86/kvm//pmu.c:522:18: error: incompatible types in comparison expression (different address spaces): arch/x86/kvm//pmu.c:522:18: struct kvm_pmu_event_filter [noderef] __rcu * arch/x86/kvm//pmu.c:522:18: struct kvm_pmu_event_filter * arch/x86/kvm//pmu.c:522:18: error: incompatible types in comparison expression (different address spaces): arch/x86/kvm//pmu.c:522:18: struct kvm_pmu_event_filter [noderef] __rcu * arch/x86/kvm//pmu.c:522:18: struct kvm_pmu_event_filter * Signed-off-by: Muhammad Usama Anjum Message-Id: <20210305191123.GA497469@LEGION> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 877a4025d8da0..9bc091ecaaeb9 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -963,7 +963,7 @@ struct kvm_arch { struct kvm_pit *vpit; atomic_t vapics_in_nmi_mode; struct mutex apic_map_lock; - struct kvm_apic_map *apic_map; + struct kvm_apic_map __rcu *apic_map; atomic_t apic_map_dirty; bool apic_access_page_done; @@ -1036,7 +1036,7 @@ struct kvm_arch { bool bus_lock_detection_enabled; - struct kvm_pmu_event_filter *pmu_event_filter; + struct kvm_pmu_event_filter __rcu *pmu_event_filter; struct task_struct *nx_lpage_recovery_thread; #ifdef CONFIG_X86_64 -- GitLab From d7eb79c6290c7ae4561418544072e0a3266e7384 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Wed, 24 Feb 2021 09:37:29 +0800 Subject: [PATCH 632/839] KVM: kvmclock: Fix vCPUs > 64 can't be online/hotpluged # lscpu Architecture: x86_64 CPU op-mode(s): 32-bit, 64-bit Byte Order: Little Endian CPU(s): 88 On-line CPU(s) list: 0-63 Off-line CPU(s) list: 64-87 # cat /proc/cmdline BOOT_IMAGE=/vmlinuz-5.10.0-rc3-tlinux2-0050+ root=/dev/mapper/cl-root ro rd.lvm.lv=cl/root rhgb quiet console=ttyS0 LANG=en_US .UTF-8 no-kvmclock-vsyscall # echo 1 > /sys/devices/system/cpu/cpu76/online -bash: echo: write error: Cannot allocate memory The per-cpu vsyscall pvclock data pointer assigns either an element of the static array hv_clock_boot (#vCPU <= 64) or dynamically allocated memory hvclock_mem (vCPU > 64), the dynamically memory will not be allocated if kvmclock vsyscall is disabled, this can result in cpu hotpluged fails in kvmclock_setup_percpu() which returns -ENOMEM. It's broken for no-vsyscall and sometimes you end up with vsyscall disabled if the host does something strange. This patch fixes it by allocating this dynamically memory unconditionally even if vsyscall is disabled. Fixes: 6a1cac56f4 ("x86/kvm: Use __bss_decrypted attribute in shared variables") Reported-by: Zelin Deng Cc: Brijesh Singh Cc: stable@vger.kernel.org#v4.19-rc5+ Signed-off-by: Wanpeng Li Message-Id: <1614130683-24137-1-git-send-email-wanpengli@tencent.com> Signed-off-by: Paolo Bonzini --- arch/x86/kernel/kvmclock.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index aa593743acf67..1fc0962c89c08 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -268,21 +268,20 @@ static void __init kvmclock_init_mem(void) static int __init kvm_setup_vsyscall_timeinfo(void) { -#ifdef CONFIG_X86_64 - u8 flags; + kvmclock_init_mem(); - if (!per_cpu(hv_clock_per_cpu, 0) || !kvmclock_vsyscall) - return 0; +#ifdef CONFIG_X86_64 + if (per_cpu(hv_clock_per_cpu, 0) && kvmclock_vsyscall) { + u8 flags; - flags = pvclock_read_flags(&hv_clock_boot[0].pvti); - if (!(flags & PVCLOCK_TSC_STABLE_BIT)) - return 0; + flags = pvclock_read_flags(&hv_clock_boot[0].pvti); + if (!(flags & PVCLOCK_TSC_STABLE_BIT)) + return 0; - kvm_clock.vdso_clock_mode = VDSO_CLOCKMODE_PVCLOCK; + kvm_clock.vdso_clock_mode = VDSO_CLOCKMODE_PVCLOCK; + } #endif - kvmclock_init_mem(); - return 0; } early_initcall(kvm_setup_vsyscall_timeinfo); -- GitLab From 8df9f1af2eced9720f71cf310275d81c1bf07a06 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 9 Mar 2021 16:30:29 -0800 Subject: [PATCH 633/839] KVM: x86/mmu: Skip !MMU-present SPTEs when removing SP in exclusive mode If mmu_lock is held for write, don't bother setting !PRESENT SPTEs to REMOVED_SPTE when recursively zapping SPTEs as part of shadow page removal. The concurrent write protections provided by REMOVED_SPTE are not needed, there are no backing page side effects to record, and MMIO SPTEs can be left as is since they are protected by the memslot generation, not by ensuring that the MMIO SPTE is unreachable (which is racy with respect to lockless walks regardless of zapping behavior). Skipping !PRESENT drastically reduces the number of updates needed to tear down sparsely populated MMUs, e.g. when tearing down a 6gb VM that didn't touch much memory, 6929/7168 (~96.6%) of SPTEs were '0' and could be skipped. Avoiding the write itself is likely close to a wash, but avoiding __handle_changed_spte() is a clear-cut win as that involves saving and restoring all non-volatile GPRs (it's a subtly big function), as well as several conditional branches before bailing out. Cc: Ben Gardon Signed-off-by: Sean Christopherson Message-Id: <20210310003029.1250571-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/tdp_mmu.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index c926c6b899a10..d78915019b082 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -337,7 +337,18 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, u64 *pt, cpu_relax(); } } else { + /* + * If the SPTE is not MMU-present, there is no backing + * page associated with the SPTE and so no side effects + * that need to be recorded, and exclusive ownership of + * mmu_lock ensures the SPTE can't be made present. + * Note, zapping MMIO SPTEs is also unnecessary as they + * are guarded by the memslots generation, not by being + * unreachable. + */ old_child_spte = READ_ONCE(*sptep); + if (!is_shadow_present_pte(old_child_spte)) + continue; /* * Marking the SPTE as a removed SPTE is not -- GitLab From 35737d2db2f4567106c90060ad110b27cb354fa4 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Thu, 4 Mar 2021 08:35:18 +0800 Subject: [PATCH 634/839] KVM: LAPIC: Advancing the timer expiration on guest initiated write Advancing the timer expiration should only be necessary on guest initiated writes. When we cancel the timer and clear .pending during state restore, clear expired_tscdeadline as well. Reviewed-by: Sean Christopherson Signed-off-by: Wanpeng Li Message-Id: <1614818118-965-1-git-send-email-wanpengli@tencent.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index cb8ebfaccfb6e..cc369b9ad8f11 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2604,6 +2604,7 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) apic_update_ppr(apic); hrtimer_cancel(&apic->lapic_timer.timer); + apic->lapic_timer.expired_tscdeadline = 0; apic_update_lvtt(apic); apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0)); update_divide_count(apic); -- GitLab From f4f9fc29e56b6fa9d7fa65ec51d3c82aff99c99b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 12 Mar 2021 20:55:36 +0100 Subject: [PATCH 635/839] nvme: fix the nsid value to print in nvme_validate_or_alloc_ns ns can be NULL at this point, and my move of the check from the original patch by Chaitanya broke this. Fixes: 0ec84df4953b ("nvme-core: check ctrl css before setting up zns") Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 82ad5eef9d0c3..a5653892d7739 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4099,7 +4099,7 @@ static void nvme_validate_or_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) if (!nvme_multi_css(ctrl)) { dev_warn(ctrl->device, "command set not reported for nsid: %d\n", - ns->head->ns_id); + nsid); break; } nvme_alloc_ns(ctrl, nsid, &ids); -- GitLab From 3b0c2d3eaa83da259d7726192cf55a137769012f Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 12 Mar 2021 15:07:09 -0600 Subject: [PATCH 636/839] Revert 95ebabde382c ("capabilities: Don't allow writing ambiguous v3 file capabilities") It turns out that there are in fact userspace implementations that care and this recent change caused a regression. https://github.com/containers/buildah/issues/3071 As the motivation for the original change was future development, and the impact is existing real world code just revert this change and allow the ambiguity in v3 file caps. Cc: stable@vger.kernel.org Fixes: 95ebabde382c ("capabilities: Don't allow writing ambiguous v3 file capabilities") Signed-off-by: Eric W. Biederman --- security/commoncap.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/security/commoncap.c b/security/commoncap.c index 28f4d25480dfa..1c519c8752176 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -543,8 +543,7 @@ int cap_convert_nscap(struct user_namespace *mnt_userns, struct dentry *dentry, __u32 magic, nsmagic; struct inode *inode = d_backing_inode(dentry); struct user_namespace *task_ns = current_user_ns(), - *fs_ns = inode->i_sb->s_user_ns, - *ancestor; + *fs_ns = inode->i_sb->s_user_ns; kuid_t rootid; size_t newsize; @@ -567,15 +566,6 @@ int cap_convert_nscap(struct user_namespace *mnt_userns, struct dentry *dentry, if (nsrootid == -1) return -EINVAL; - /* - * Do not allow allow adding a v3 filesystem capability xattr - * if the rootid field is ambiguous. - */ - for (ancestor = task_ns->parent; ancestor; ancestor = ancestor->parent) { - if (from_kuid(ancestor, rootid) == 0) - return -EINVAL; - } - newsize = sizeof(struct vfs_ns_cap_data); nscap = kmalloc(newsize, GFP_ATOMIC); if (!nscap) -- GitLab From bcbcf50f521843445c9ea320a0569874f88c4b7a Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 13 Mar 2021 04:38:14 +0900 Subject: [PATCH 637/839] kbuild: fix ld-version.sh to not be affected by locale ld-version.sh checks the output from $(LD) --version, but it has a problem on some locales. For example, in Italian: $ LC_MESSAGES=it_IT.UTF-8 ld --version | head -n 1 ld di GNU (GNU Binutils for Debian) 2.35.2 This makes ld-version.sh fail because it expects "GNU ld" for the BFD linker case. Add LC_ALL=C to override the user's locale. BTW, setting LC_MESSAGES=C (or LANG=C) is not enough because it is ineffective if LC_ALL is set on the user's environment. Link: https://bugzilla.kernel.org/show_bug.cgi?id=212105 Reported-by: Marco Scardovi Reported-by: Andy Shevchenko Signed-off-by: Masahiro Yamada Recensito-da: Nick Desaulniers Reviewed-by: Nathan Chancellor --- scripts/ld-version.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/ld-version.sh b/scripts/ld-version.sh index 30debf78aa090..1bf3aadde9def 100755 --- a/scripts/ld-version.sh +++ b/scripts/ld-version.sh @@ -29,7 +29,7 @@ orig_args="$@" # Get the first line of the --version output. IFS=' ' -set -- $("$@" --version) +set -- $(LC_ALL=C "$@" --version) # Split the line on spaces. IFS=' ' -- GitLab From 15b2219facadec583c24523eed40fa45865f859f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 12 Mar 2021 20:20:42 -0700 Subject: [PATCH 638/839] kernel: freezer should treat PF_IO_WORKER like PF_KTHREAD for freezing Don't send fake signals to PF_IO_WORKER threads, they don't accept signals. Just treat them like kthreads in this regard, all they need is a wakeup as no forced kernel/user transition is needed. Suggested-by: Linus Torvalds Signed-off-by: Jens Axboe --- kernel/freezer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/freezer.c b/kernel/freezer.c index dc520f01f99dd..1a2d57d1327cd 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -134,7 +134,7 @@ bool freeze_task(struct task_struct *p) return false; } - if (!(p->flags & PF_KTHREAD)) + if (!(p->flags & (PF_KTHREAD | PF_IO_WORKER))) fake_signal_wake_up(p); else wake_up_state(p, TASK_INTERRUPTIBLE); -- GitLab From 16efa4fce3b7af17bb45d635c3e89992d721e0f3 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 12 Mar 2021 20:26:13 -0700 Subject: [PATCH 639/839] io_uring: allow IO worker threads to be frozen With the freezer using the proper signaling to notify us of when it's time to freeze a thread, we can re-enable normal freezer usage for the IO threads. Ensure that SQPOLL, io-wq, and the io-wq manager call try_to_freeze() appropriately, and remove the default setting of PF_NOFREEZE from create_io_thread(). Signed-off-by: Jens Axboe --- fs/io-wq.c | 6 +++++- fs/io_uring.c | 1 + kernel/fork.c | 1 - 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/io-wq.c b/fs/io-wq.c index 0ae9ecadf295c..e05f996d088f1 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -488,6 +488,8 @@ static int io_wqe_worker(void *data) set_task_comm(current, buf); while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)) { + long ret; + set_current_state(TASK_INTERRUPTIBLE); loop: raw_spin_lock_irq(&wqe->lock); @@ -498,7 +500,8 @@ loop: __io_worker_idle(wqe, worker); raw_spin_unlock_irq(&wqe->lock); io_flush_signals(); - if (schedule_timeout(WORKER_IDLE_TIMEOUT)) + ret = schedule_timeout(WORKER_IDLE_TIMEOUT); + if (try_to_freeze() || ret) continue; if (fatal_signal_pending(current)) break; @@ -709,6 +712,7 @@ static int io_wq_manager(void *data) set_current_state(TASK_INTERRUPTIBLE); io_wq_check_workers(wq); schedule_timeout(HZ); + try_to_freeze(); if (fatal_signal_pending(current)) set_bit(IO_WQ_BIT_EXIT, &wq->state); } while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)); diff --git a/fs/io_uring.c b/fs/io_uring.c index a4bce17af506b..05adc4887ef38 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -6752,6 +6752,7 @@ static int io_sq_thread(void *data) up_read(&sqd->rw_lock); schedule(); + try_to_freeze(); down_read(&sqd->rw_lock); list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) io_ring_clear_wakeup_flag(ctx); diff --git a/kernel/fork.c b/kernel/fork.c index 72e444cd0ffed..d3171e8e88e5d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2436,7 +2436,6 @@ struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node) if (!IS_ERR(tsk)) { sigfillset(&tsk->blocked); sigdelsetmask(&tsk->blocked, sigmask(SIGKILL)); - tsk->flags |= PF_NOFREEZE; } return tsk; } -- GitLab From d0dcd90b7f472691de122515eb0d1765808b6d91 Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Mon, 8 Mar 2021 08:31:03 +0100 Subject: [PATCH 640/839] usb: cdnsp: Fixes incorrect value in ISOC TRB Fixes issue with priority of operator. Operator "|" priority is higher then "? :". To improve the readability the operator "? :" has been replaced with "if ()" statement. Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") Reviewed-by: Greg Kroah-Hartman Signed-off-by: Pawel Laszczak Signed-off-by: Peter Chen --- drivers/usb/cdns3/cdnsp-ring.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/usb/cdns3/cdnsp-ring.c b/drivers/usb/cdns3/cdnsp-ring.c index f9170d177a899..5f0513c96c04e 100644 --- a/drivers/usb/cdns3/cdnsp-ring.c +++ b/drivers/usb/cdns3/cdnsp-ring.c @@ -2197,7 +2197,10 @@ static int cdnsp_queue_isoc_tx(struct cdnsp_device *pdev, * inverted in the first TDs isoc TRB. */ field = TRB_TYPE(TRB_ISOC) | TRB_TLBPC(last_burst_pkt) | - start_cycle ? 0 : 1 | TRB_SIA | TRB_TBC(burst_count); + TRB_SIA | TRB_TBC(burst_count); + + if (!start_cycle) + field |= TRB_CYCLE; /* Fill the rest of the TRB fields, and remaining normal TRBs. */ for (i = 0; i < trbs_per_td; i++) { -- GitLab From 34dc2efb39a231280fd6696a59bbe712bf3c5c4a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 12 Mar 2021 21:07:01 -0800 Subject: [PATCH 641/839] memblock: fix section mismatch warning The inlining logic in clang-13 is rewritten to often not inline some functions that were inlined by all earlier compilers. In case of the memblock interfaces, this exposed a harmless bug of a missing __init annotation: WARNING: modpost: vmlinux.o(.text+0x507c0a): Section mismatch in reference from the function memblock_bottom_up() to the variable .meminit.data:memblock The function memblock_bottom_up() references the variable __meminitdata memblock. This is often because memblock_bottom_up lacks a __meminitdata annotation or the annotation of memblock is wrong. Interestingly, these annotations were present originally, but got removed with the explanation that the __init annotation prevents the function from getting inlined. I checked this again and found that while this is the case with clang, gcc (version 7 through 10, did not test others) does inline the functions regardless. As the previous change was apparently intended to help the clang builds, reverting it to help the newer clang versions seems appropriate as well. gcc builds don't seem to care either way. Link: https://lkml.kernel.org/r/20210225133808.2188581-1-arnd@kernel.org Fixes: 5bdba520c1b3 ("mm: memblock: drop __init from memblock functions to make it inline") Reference: 2cfb3665e864 ("include/linux/memblock.h: add __init to memblock_set_bottom_up()") Signed-off-by: Arnd Bergmann Reviewed-by: David Hildenbrand Reviewed-by: Mike Rapoport Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Faiyaz Mohammed Cc: Baoquan He Cc: Thomas Bogendoerfer Cc: Aslan Bakirov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memblock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/memblock.h b/include/linux/memblock.h index c88bc24e31aa5..d13e3cd938b46 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -460,7 +460,7 @@ static inline void memblock_free_late(phys_addr_t base, phys_addr_t size) /* * Set the allocation direction to bottom-up or top-down. */ -static inline void memblock_set_bottom_up(bool enable) +static inline __init void memblock_set_bottom_up(bool enable) { memblock.bottom_up = enable; } @@ -470,7 +470,7 @@ static inline void memblock_set_bottom_up(bool enable) * if this is true, that said, memblock will allocate memory * in bottom-up direction. */ -static inline bool memblock_bottom_up(void) +static inline __init bool memblock_bottom_up(void) { return memblock.bottom_up; } -- GitLab From cbf78d85079cee662c45749ef4f744d41be85d48 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 12 Mar 2021 21:07:04 -0800 Subject: [PATCH 642/839] stop_machine: mark helpers __always_inline With clang-13, some functions only get partially inlined, with a specialized version referring to a global variable. This triggers a harmless build-time check for the intel-rng driver: WARNING: modpost: drivers/char/hw_random/intel-rng.o(.text+0xe): Section mismatch in reference from the function stop_machine() to the function .init.text:intel_rng_hw_init() The function stop_machine() references the function __init intel_rng_hw_init(). This is often because stop_machine lacks a __init annotation or the annotation of intel_rng_hw_init is wrong. In this instance, an easy workaround is to force the stop_machine() function to be inline, along with related interfaces that did not show the same behavior at the moment, but theoretically could. The combination of the two patches listed below triggers the behavior in clang-13, but individually these commits are correct. Link: https://lkml.kernel.org/r/20210225130153.1956990-1-arnd@kernel.org Fixes: fe5595c07400 ("stop_machine: Provide stop_machine_cpuslocked()") Fixes: ee527cd3a20c ("Use stop_machine_run in the Intel RNG driver") Signed-off-by: Arnd Bergmann Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Thomas Gleixner Cc: Sebastian Andrzej Siewior Cc: "Paul E. McKenney" Cc: Ingo Molnar Cc: Prarit Bhargava Cc: Daniel Bristot de Oliveira Cc: Peter Zijlstra Cc: Valentin Schneider Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/stop_machine.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h index 30577c3aecf81..46fb3ebdd16e4 100644 --- a/include/linux/stop_machine.h +++ b/include/linux/stop_machine.h @@ -128,7 +128,7 @@ int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus); #else /* CONFIG_SMP || CONFIG_HOTPLUG_CPU */ -static inline int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data, +static __always_inline int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus) { unsigned long flags; @@ -139,14 +139,15 @@ static inline int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data, return ret; } -static inline int stop_machine(cpu_stop_fn_t fn, void *data, - const struct cpumask *cpus) +static __always_inline int +stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus) { return stop_machine_cpuslocked(fn, data, cpus); } -static inline int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data, - const struct cpumask *cpus) +static __always_inline int +stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data, + const struct cpumask *cpus) { return stop_machine(fn, data, cpus); } -- GitLab From ea29b20a828511de3348334e529a3d046a180416 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 12 Mar 2021 21:07:08 -0800 Subject: [PATCH 643/839] init/Kconfig: make COMPILE_TEST depend on HAS_IOMEM I read the commit log of the following two: - bc083a64b6c0 ("init/Kconfig: make COMPILE_TEST depend on !UML") - 334ef6ed06fa ("init/Kconfig: make COMPILE_TEST depend on !S390") Both are talking about HAS_IOMEM dependency missing in many drivers. So, 'depends on HAS_IOMEM' seems the direct, sensible solution to me. This does not change the behavior of UML. UML still cannot enable COMPILE_TEST because it does not provide HAS_IOMEM. The current dependency for S390 is too strong. Under the condition of CONFIG_PCI=y, S390 provides HAS_IOMEM, hence can enable COMPILE_TEST. I also removed the meaningless 'default n'. Link: https://lkml.kernel.org/r/20210224140809.1067582-1-masahiroy@kernel.org Signed-off-by: Masahiro Yamada Cc: Heiko Carstens Cc: Guenter Roeck Cc: Arnd Bergmann Cc: Kees Cook Cc: Daniel Borkmann Cc: Johannes Weiner Cc: KP Singh Cc: Nathan Chancellor Cc: Nick Terrell Cc: Quentin Perret Cc: Valentin Schneider Cc: "Enrico Weigelt, metux IT consult" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- init/Kconfig | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/init/Kconfig b/init/Kconfig index 22946fe5ded98..8f8a497aafe05 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -119,8 +119,7 @@ config INIT_ENV_ARG_LIMIT config COMPILE_TEST bool "Compile also drivers which will not load" - depends on !UML && !S390 - default n + depends on HAS_IOMEM help Some drivers can be compiled on a different platform than they are intended to be run on. Despite they cannot be loaded there (or even -- GitLab From 0740a50b9baa4472cfb12442df4b39e2712a64a4 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Fri, 12 Mar 2021 21:07:12 -0800 Subject: [PATCH 644/839] mm/page_alloc.c: refactor initialization of struct page for holes in memory layout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There could be struct pages that are not backed by actual physical memory. This can happen when the actual memory bank is not a multiple of SECTION_SIZE or when an architecture does not register memory holes reserved by the firmware as memblock.memory. Such pages are currently initialized using init_unavailable_mem() function that iterates through PFNs in holes in memblock.memory and if there is a struct page corresponding to a PFN, the fields of this page are set to default values and it is marked as Reserved. init_unavailable_mem() does not take into account zone and node the page belongs to and sets both zone and node links in struct page to zero. Before commit 73a6e474cb37 ("mm: memmap_init: iterate over memblock regions rather that check each PFN") the holes inside a zone were re-initialized during memmap_init() and got their zone/node links right. However, after that commit nothing updates the struct pages representing such holes. On a system that has firmware reserved holes in a zone above ZONE_DMA, for instance in a configuration below: # grep -A1 E820 /proc/iomem 7a17b000-7a216fff : Unknown E820 type 7a217000-7bffffff : System RAM unset zone link in struct page will trigger VM_BUG_ON_PAGE(!zone_spans_pfn(page_zone(page), pfn), page); in set_pfnblock_flags_mask() when called with a struct page from a range other than E820_TYPE_RAM because there are pages in the range of ZONE_DMA32 but the unset zone link in struct page makes them appear as a part of ZONE_DMA. Interleave initialization of the unavailable pages with the normal initialization of memory map, so that zone and node information will be properly set on struct pages that are not backed by the actual memory. With this change the pages for holes inside a zone will get proper zone/node links and the pages that are not spanned by any node will get links to the adjacent zone/node. The holes between nodes will be prepended to the zone/node above the hole and the trailing pages in the last section that will be appended to the zone/node below. [akpm@linux-foundation.org: don't initialize static to zero, use %llu for u64] Link: https://lkml.kernel.org/r/20210225224351.7356-2-rppt@kernel.org Fixes: 73a6e474cb37 ("mm: memmap_init: iterate over memblock regions rather that check each PFN") Signed-off-by: Mike Rapoport Reported-by: Qian Cai Reported-by: Andrea Arcangeli Reviewed-by: Baoquan He Acked-by: Vlastimil Babka Reviewed-by: David Hildenbrand Cc: Borislav Petkov Cc: Chris Wilson Cc: "H. Peter Anvin" Cc: Łukasz Majczak Cc: Ingo Molnar Cc: Mel Gorman Cc: Michal Hocko Cc: "Sarvela, Tomi P" Cc: Thomas Gleixner Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 158 +++++++++++++++++++++++------------------------- 1 file changed, 75 insertions(+), 83 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3e4b29ee2b1e4..d4580e4215d8a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -6259,12 +6259,65 @@ static void __meminit zone_init_free_lists(struct zone *zone) } } +#if !defined(CONFIG_FLAT_NODE_MEM_MAP) +/* + * Only struct pages that correspond to ranges defined by memblock.memory + * are zeroed and initialized by going through __init_single_page() during + * memmap_init_zone(). + * + * But, there could be struct pages that correspond to holes in + * memblock.memory. This can happen because of the following reasons: + * - physical memory bank size is not necessarily the exact multiple of the + * arbitrary section size + * - early reserved memory may not be listed in memblock.memory + * - memory layouts defined with memmap= kernel parameter may not align + * nicely with memmap sections + * + * Explicitly initialize those struct pages so that: + * - PG_Reserved is set + * - zone and node links point to zone and node that span the page if the + * hole is in the middle of a zone + * - zone and node links point to adjacent zone/node if the hole falls on + * the zone boundary; the pages in such holes will be prepended to the + * zone/node above the hole except for the trailing pages in the last + * section that will be appended to the zone/node below. + */ +static u64 __meminit init_unavailable_range(unsigned long spfn, + unsigned long epfn, + int zone, int node) +{ + unsigned long pfn; + u64 pgcnt = 0; + + for (pfn = spfn; pfn < epfn; pfn++) { + if (!pfn_valid(ALIGN_DOWN(pfn, pageblock_nr_pages))) { + pfn = ALIGN_DOWN(pfn, pageblock_nr_pages) + + pageblock_nr_pages - 1; + continue; + } + __init_single_page(pfn_to_page(pfn), pfn, zone, node); + __SetPageReserved(pfn_to_page(pfn)); + pgcnt++; + } + + return pgcnt; +} +#else +static inline u64 init_unavailable_range(unsigned long spfn, unsigned long epfn, + int zone, int node) +{ + return 0; +} +#endif + void __meminit __weak memmap_init_zone(struct zone *zone) { unsigned long zone_start_pfn = zone->zone_start_pfn; unsigned long zone_end_pfn = zone_start_pfn + zone->spanned_pages; int i, nid = zone_to_nid(zone), zone_id = zone_idx(zone); + static unsigned long hole_pfn; unsigned long start_pfn, end_pfn; + u64 pgcnt = 0; for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) { start_pfn = clamp(start_pfn, zone_start_pfn, zone_end_pfn); @@ -6274,7 +6327,29 @@ void __meminit __weak memmap_init_zone(struct zone *zone) memmap_init_range(end_pfn - start_pfn, nid, zone_id, start_pfn, zone_end_pfn, MEMINIT_EARLY, NULL, MIGRATE_MOVABLE); + + if (hole_pfn < start_pfn) + pgcnt += init_unavailable_range(hole_pfn, start_pfn, + zone_id, nid); + hole_pfn = end_pfn; } + +#ifdef CONFIG_SPARSEMEM + /* + * Initialize the hole in the range [zone_end_pfn, section_end]. + * If zone boundary falls in the middle of a section, this hole + * will be re-initialized during the call to this function for the + * higher zone. + */ + end_pfn = round_up(zone_end_pfn, PAGES_PER_SECTION); + if (hole_pfn < end_pfn) + pgcnt += init_unavailable_range(hole_pfn, end_pfn, + zone_id, nid); +#endif + + if (pgcnt) + pr_info(" %s zone: %llu pages in unavailable ranges\n", + zone->name, pgcnt); } static int zone_batchsize(struct zone *zone) @@ -7071,88 +7146,6 @@ void __init free_area_init_memoryless_node(int nid) free_area_init_node(nid); } -#if !defined(CONFIG_FLAT_NODE_MEM_MAP) -/* - * Initialize all valid struct pages in the range [spfn, epfn) and mark them - * PageReserved(). Return the number of struct pages that were initialized. - */ -static u64 __init init_unavailable_range(unsigned long spfn, unsigned long epfn) -{ - unsigned long pfn; - u64 pgcnt = 0; - - for (pfn = spfn; pfn < epfn; pfn++) { - if (!pfn_valid(ALIGN_DOWN(pfn, pageblock_nr_pages))) { - pfn = ALIGN_DOWN(pfn, pageblock_nr_pages) - + pageblock_nr_pages - 1; - continue; - } - /* - * Use a fake node/zone (0) for now. Some of these pages - * (in memblock.reserved but not in memblock.memory) will - * get re-initialized via reserve_bootmem_region() later. - */ - __init_single_page(pfn_to_page(pfn), pfn, 0, 0); - __SetPageReserved(pfn_to_page(pfn)); - pgcnt++; - } - - return pgcnt; -} - -/* - * Only struct pages that are backed by physical memory are zeroed and - * initialized by going through __init_single_page(). But, there are some - * struct pages which are reserved in memblock allocator and their fields - * may be accessed (for example page_to_pfn() on some configuration accesses - * flags). We must explicitly initialize those struct pages. - * - * This function also addresses a similar issue where struct pages are left - * uninitialized because the physical address range is not covered by - * memblock.memory or memblock.reserved. That could happen when memblock - * layout is manually configured via memmap=, or when the highest physical - * address (max_pfn) does not end on a section boundary. - */ -static void __init init_unavailable_mem(void) -{ - phys_addr_t start, end; - u64 i, pgcnt; - phys_addr_t next = 0; - - /* - * Loop through unavailable ranges not covered by memblock.memory. - */ - pgcnt = 0; - for_each_mem_range(i, &start, &end) { - if (next < start) - pgcnt += init_unavailable_range(PFN_DOWN(next), - PFN_UP(start)); - next = end; - } - - /* - * Early sections always have a fully populated memmap for the whole - * section - see pfn_valid(). If the last section has holes at the - * end and that section is marked "online", the memmap will be - * considered initialized. Make sure that memmap has a well defined - * state. - */ - pgcnt += init_unavailable_range(PFN_DOWN(next), - round_up(max_pfn, PAGES_PER_SECTION)); - - /* - * Struct pages that do not have backing memory. This could be because - * firmware is using some of this memory, or for some other reasons. - */ - if (pgcnt) - pr_info("Zeroed struct page in unavailable ranges: %lld pages", pgcnt); -} -#else -static inline void __init init_unavailable_mem(void) -{ -} -#endif /* !CONFIG_FLAT_NODE_MEM_MAP */ - #if MAX_NUMNODES > 1 /* * Figure out the number of possible node ids. @@ -7576,7 +7569,6 @@ void __init free_area_init(unsigned long *max_zone_pfn) /* Initialise every node */ mminit_verify_pageflags_layout(); setup_nr_node_ids(); - init_unavailable_mem(); for_each_online_node(nid) { pg_data_t *pgdat = NODE_DATA(nid); free_area_init_node(nid); -- GitLab From 82e69a121be4b1597ce758534816a8ee04c8b761 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Fri, 12 Mar 2021 21:07:15 -0800 Subject: [PATCH 645/839] mm/fork: clear PASID for new mm When a new mm is created, its PASID should be cleared, i.e. the PASID is initialized to its init state 0 on both ARM and X86. This patch was part of the series introducing mm->pasid, but got lost along the way [1]. It still makes sense to have it, because each address space has a different PASID. And the IOMMU code in iommu_sva_alloc_pasid() expects the pasid field of a new mm struct to be cleared. [1] https://lore.kernel.org/linux-iommu/YDgh53AcQHT+T3L0@otcwcpicx3.sc.intel.com/ Link: https://lkml.kernel.org/r/20210302103837.2562625-1-jean-philippe@linaro.org Signed-off-by: Fenghua Yu Signed-off-by: Jean-Philippe Brucker Reviewed-by: Tony Luck Cc: Jacob Pan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 1 + kernel/fork.c | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 0974ad501a47c..6613b26a88946 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -23,6 +23,7 @@ #endif #define AT_VECTOR_SIZE (2*(AT_VECTOR_SIZE_ARCH + AT_VECTOR_SIZE_BASE + 1)) +#define INIT_PASID 0 struct address_space; struct mem_cgroup; diff --git a/kernel/fork.c b/kernel/fork.c index d3171e8e88e5d..54cc905e5fe09 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -994,6 +994,13 @@ static void mm_init_owner(struct mm_struct *mm, struct task_struct *p) #endif } +static void mm_init_pasid(struct mm_struct *mm) +{ +#ifdef CONFIG_IOMMU_SUPPORT + mm->pasid = INIT_PASID; +#endif +} + static void mm_init_uprobes_state(struct mm_struct *mm) { #ifdef CONFIG_UPROBES @@ -1024,6 +1031,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, mm_init_cpumask(mm); mm_init_aio(mm); mm_init_owner(mm, p); + mm_init_pasid(mm); RCU_INIT_POINTER(mm->exe_file, NULL); mmu_notifier_subscriptions_init(mm); init_tlb_flush_pending(mm); -- GitLab From 2103cf9c3f3ba002feab8bfd9c5528ce676cf65a Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 12 Mar 2021 21:07:18 -0800 Subject: [PATCH 646/839] hugetlb: dedup the code to add a new file_region Patch series "mm/hugetlb: Early cow on fork, and a few cleanups", v5. As reported by Gal [1], we still miss the code clip to handle early cow for hugetlb case, which is true. Again, it still feels odd to fork() after using a few huge pages, especially if they're privately mapped to me.. However I do agree with Gal and Jason in that we should still have that since that'll complete the early cow on fork effort at least, and it'll still fix issues where buffers are not well under control and not easy to apply MADV_DONTFORK. The first two patches (1-2) are some cleanups I noticed when reading into the hugetlb reserve map code. I think it's good to have but they're not necessary for fixing the fork issue. The last two patches (3-4) are the real fix. I tested this with a fork() after some vfio-pci assignment, so I'm pretty sure the page copy path could trigger well (page will be accounted right after the fork()), but I didn't do data check since the card I assigned is some random nic. https://github.com/xzpeter/linux/tree/fork-cow-pin-huge [1] https://lore.kernel.org/lkml/27564187-4a08-f187-5a84-3df50009f6ca@amazon.com/ Introduce hugetlb_resv_map_add() helper to add a new file_region rather than duplication the similar code twice in add_reservation_in_range(). Link: https://lkml.kernel.org/r/20210217233547.93892-1-peterx@redhat.com Link: https://lkml.kernel.org/r/20210217233547.93892-2-peterx@redhat.com Signed-off-by: Peter Xu Reviewed-by: Mike Kravetz Reviewed-by: Miaohe Lin Cc: Gal Pressman Cc: Matthew Wilcox Cc: Wei Zhang Cc: Mike Rapoport Cc: Christoph Hellwig Cc: David Gibson Cc: Jason Gunthorpe Cc: Jann Horn Cc: Kirill Tkhai Cc: Kirill Shutemov Cc: Andrea Arcangeli Cc: Jan Kara Cc: Alexey Dobriyan Cc: Daniel Vetter Cc: David Airlie Cc: Roland Scheidegger Cc: VMware Graphics Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 51 +++++++++++++++++++++++++++------------------------ 1 file changed, 27 insertions(+), 24 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 8fb42c6dd74bf..e92a176dbb914 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -331,6 +331,24 @@ static void coalesce_file_region(struct resv_map *resv, struct file_region *rg) } } +static inline long +hugetlb_resv_map_add(struct resv_map *map, struct file_region *rg, long from, + long to, struct hstate *h, struct hugetlb_cgroup *cg, + long *regions_needed) +{ + struct file_region *nrg; + + if (!regions_needed) { + nrg = get_file_region_entry_from_cache(map, from, to); + record_hugetlb_cgroup_uncharge_info(cg, h, map, nrg); + list_add(&nrg->link, rg->link.prev); + coalesce_file_region(map, nrg); + } else + *regions_needed += 1; + + return to - from; +} + /* * Must be called with resv->lock held. * @@ -346,7 +364,7 @@ static long add_reservation_in_range(struct resv_map *resv, long f, long t, long add = 0; struct list_head *head = &resv->regions; long last_accounted_offset = f; - struct file_region *rg = NULL, *trg = NULL, *nrg = NULL; + struct file_region *rg = NULL, *trg = NULL; if (regions_needed) *regions_needed = 0; @@ -375,18 +393,11 @@ static long add_reservation_in_range(struct resv_map *resv, long f, long t, /* Add an entry for last_accounted_offset -> rg->from, and * update last_accounted_offset. */ - if (rg->from > last_accounted_offset) { - add += rg->from - last_accounted_offset; - if (!regions_needed) { - nrg = get_file_region_entry_from_cache( - resv, last_accounted_offset, rg->from); - record_hugetlb_cgroup_uncharge_info(h_cg, h, - resv, nrg); - list_add(&nrg->link, rg->link.prev); - coalesce_file_region(resv, nrg); - } else - *regions_needed += 1; - } + if (rg->from > last_accounted_offset) + add += hugetlb_resv_map_add(resv, rg, + last_accounted_offset, + rg->from, h, h_cg, + regions_needed); last_accounted_offset = rg->to; } @@ -394,17 +405,9 @@ static long add_reservation_in_range(struct resv_map *resv, long f, long t, /* Handle the case where our range extends beyond * last_accounted_offset. */ - if (last_accounted_offset < t) { - add += t - last_accounted_offset; - if (!regions_needed) { - nrg = get_file_region_entry_from_cache( - resv, last_accounted_offset, t); - record_hugetlb_cgroup_uncharge_info(h_cg, h, resv, nrg); - list_add(&nrg->link, rg->link.prev); - coalesce_file_region(resv, nrg); - } else - *regions_needed += 1; - } + if (last_accounted_offset < t) + add += hugetlb_resv_map_add(resv, rg, last_accounted_offset, + t, h, h_cg, regions_needed); VM_BUG_ON(add < 0); return add; -- GitLab From ca7e0457efefca9eeee8c42a89a7f450651d555b Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 12 Mar 2021 21:07:22 -0800 Subject: [PATCH 647/839] hugetlb: break earlier in add_reservation_in_range() when we can All the regions maintained in hugetlb reserved map is inclusive on "from" but exclusive on "to". We can break earlier even if rg->from==t because it already means no possible intersection. This does not need a Fixes in all cases because when it happens (rg->from==t) we'll not break out of the loop while we should, however the next thing we'd do is still add the last file_region we'd need and quit the loop in the next round. So this change is not a bugfix (since the old code should still run okay iiuc), but we'd better still touch it up to make it logically sane. Link: https://lkml.kernel.org/r/20210217233547.93892-3-peterx@redhat.com Signed-off-by: Peter Xu Reviewed-by: Mike Kravetz Reviewed-by: Miaohe Lin Cc: Alexey Dobriyan Cc: Andrea Arcangeli Cc: Christoph Hellwig Cc: Daniel Vetter Cc: David Airlie Cc: David Gibson Cc: Gal Pressman Cc: Jan Kara Cc: Jann Horn Cc: Jason Gunthorpe Cc: Kirill Shutemov Cc: Kirill Tkhai Cc: Matthew Wilcox Cc: Mike Rapoport Cc: Roland Scheidegger Cc: VMware Graphics Cc: Wei Zhang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index e92a176dbb914..b905143a523eb 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -387,7 +387,7 @@ static long add_reservation_in_range(struct resv_map *resv, long f, long t, /* When we find a region that starts beyond our range, we've * finished. */ - if (rg->from > t) + if (rg->from >= t) break; /* Add an entry for last_accounted_offset -> rg->from, and -- GitLab From 97a7e4733b9b221d012ae68fcd3b3251febf6341 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 12 Mar 2021 21:07:26 -0800 Subject: [PATCH 648/839] mm: introduce page_needs_cow_for_dma() for deciding whether cow We've got quite a few places (pte, pmd, pud) that explicitly checked against whether we should break the cow right now during fork(). It's easier to provide a helper, especially before we work the same thing on hugetlbfs. Since we'll reference is_cow_mapping() in mm.h, move it there too. Actually it suites mm.h more since internal.h is mm/ only, but mm.h is exported to the whole kernel. With that we should expect another patch to use is_cow_mapping() whenever we can across the kernel since we do use it quite a lot but it's always done with raw code against VM_* flags. Link: https://lkml.kernel.org/r/20210217233547.93892-4-peterx@redhat.com Signed-off-by: Peter Xu Reviewed-by: Jason Gunthorpe Cc: Alexey Dobriyan Cc: Andrea Arcangeli Cc: Christoph Hellwig Cc: Daniel Vetter Cc: David Airlie Cc: David Gibson Cc: Gal Pressman Cc: Jan Kara Cc: Jann Horn Cc: Kirill Shutemov Cc: Kirill Tkhai Cc: Matthew Wilcox Cc: Miaohe Lin Cc: Mike Kravetz Cc: Mike Rapoport Cc: Roland Scheidegger Cc: VMware Graphics Cc: Wei Zhang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 21 +++++++++++++++++++++ mm/huge_memory.c | 8 ++------ mm/internal.h | 5 ----- mm/memory.c | 8 +------- 4 files changed, 24 insertions(+), 18 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 77e64e3eac80b..64a71bf205367 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1300,6 +1300,27 @@ static inline bool page_maybe_dma_pinned(struct page *page) GUP_PIN_COUNTING_BIAS; } +static inline bool is_cow_mapping(vm_flags_t flags) +{ + return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; +} + +/* + * This should most likely only be called during fork() to see whether we + * should break the cow immediately for a page on the src mm. + */ +static inline bool page_needs_cow_for_dma(struct vm_area_struct *vma, + struct page *page) +{ + if (!is_cow_mapping(vma->vm_flags)) + return false; + + if (!atomic_read(&vma->vm_mm->has_pinned)) + return false; + + return page_maybe_dma_pinned(page); +} + #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) #define SECTION_IN_PAGE_FLAGS #endif diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 395c75111d335..da1d63a41aecb 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1100,9 +1100,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, * best effort that the pinned pages won't be replaced by another * random page during the coming copy-on-write. */ - if (unlikely(is_cow_mapping(vma->vm_flags) && - atomic_read(&src_mm->has_pinned) && - page_maybe_dma_pinned(src_page))) { + if (unlikely(page_needs_cow_for_dma(vma, src_page))) { pte_free(dst_mm, pgtable); spin_unlock(src_ptl); spin_unlock(dst_ptl); @@ -1214,9 +1212,7 @@ int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm, } /* Please refer to comments in copy_huge_pmd() */ - if (unlikely(is_cow_mapping(vma->vm_flags) && - atomic_read(&src_mm->has_pinned) && - page_maybe_dma_pinned(pud_page(pud)))) { + if (unlikely(page_needs_cow_for_dma(vma, pud_page(pud)))) { spin_unlock(src_ptl); spin_unlock(dst_ptl); __split_huge_pud(vma, src_pud, addr); diff --git a/mm/internal.h b/mm/internal.h index 9902648f2206f..1432feec62df0 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -296,11 +296,6 @@ static inline unsigned int buddy_order(struct page *page) */ #define buddy_order_unsafe(page) READ_ONCE(page_private(page)) -static inline bool is_cow_mapping(vm_flags_t flags) -{ - return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; -} - /* * These three helpers classifies VMAs for virtual memory accounting. */ diff --git a/mm/memory.c b/mm/memory.c index c8e3576273186..523230005db16 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -809,12 +809,8 @@ copy_present_page(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma pte_t *dst_pte, pte_t *src_pte, unsigned long addr, int *rss, struct page **prealloc, pte_t pte, struct page *page) { - struct mm_struct *src_mm = src_vma->vm_mm; struct page *new_page; - if (!is_cow_mapping(src_vma->vm_flags)) - return 1; - /* * What we want to do is to check whether this page may * have been pinned by the parent process. If so, @@ -828,9 +824,7 @@ copy_present_page(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma * the page count. That might give false positives for * for pinning, but it will work correctly. */ - if (likely(!atomic_read(&src_mm->has_pinned))) - return 1; - if (likely(!page_maybe_dma_pinned(page))) + if (likely(!page_needs_cow_for_dma(src_vma, page))) return 1; new_page = *prealloc; -- GitLab From ca6eb14d6453bea85ac66fa4c6ab75dfe93eaf45 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 12 Mar 2021 21:07:30 -0800 Subject: [PATCH 649/839] mm: use is_cow_mapping() across tree where proper After is_cow_mapping() is exported in mm.h, replace some manual checks elsewhere throughout the tree but start to use the new helper. Link: https://lkml.kernel.org/r/20210217233547.93892-5-peterx@redhat.com Signed-off-by: Peter Xu Reviewed-by: Jason Gunthorpe Cc: VMware Graphics Cc: Roland Scheidegger Cc: David Airlie Cc: Daniel Vetter Cc: Mike Kravetz Cc: Alexey Dobriyan Cc: Andrea Arcangeli Cc: Christoph Hellwig Cc: David Gibson Cc: Gal Pressman Cc: Jan Kara Cc: Jann Horn Cc: Kirill Shutemov Cc: Kirill Tkhai Cc: Matthew Wilcox Cc: Miaohe Lin Cc: Mike Rapoport Cc: Wei Zhang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c | 4 +--- drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c | 2 +- fs/proc/task_mmu.c | 2 -- mm/hugetlb.c | 4 +--- 4 files changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c index 0a900afc66ffd..45c9c6a7f1d69 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c @@ -500,8 +500,6 @@ vm_fault_t vmw_bo_vm_huge_fault(struct vm_fault *vmf, vm_fault_t ret; pgoff_t fault_page_size; bool write = vmf->flags & FAULT_FLAG_WRITE; - bool is_cow_mapping = - (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; switch (pe_size) { case PE_SIZE_PMD: @@ -518,7 +516,7 @@ vm_fault_t vmw_bo_vm_huge_fault(struct vm_fault *vmf, } /* Always do write dirty-tracking and COW on PTE level. */ - if (write && (READ_ONCE(vbo->dirty) || is_cow_mapping)) + if (write && (READ_ONCE(vbo->dirty) || is_cow_mapping(vma->vm_flags))) return VM_FAULT_FALLBACK; ret = ttm_bo_vm_reserve(bo, vmf); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c index 3c03b1746661a..cb9975889e2ff 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c @@ -49,7 +49,7 @@ int vmw_mmap(struct file *filp, struct vm_area_struct *vma) vma->vm_ops = &vmw_vm_ops; /* Use VM_PFNMAP rather than VM_MIXEDMAP if not a COW mapping */ - if ((vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) != VM_MAYWRITE) + if (!is_cow_mapping(vma->vm_flags)) vma->vm_flags = (vma->vm_flags & ~VM_MIXEDMAP) | VM_PFNMAP; return 0; diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 3cec6fbef725e..e862cab695838 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1036,8 +1036,6 @@ struct clear_refs_private { #ifdef CONFIG_MEM_SOFT_DIRTY -#define is_cow_mapping(flags) (((flags) & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE) - static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr, pte_t pte) { struct page *page; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index b905143a523eb..7786267da2fe3 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3734,15 +3734,13 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, pte_t *src_pte, *dst_pte, entry, dst_entry; struct page *ptepage; unsigned long addr; - int cow; + bool cow = is_cow_mapping(vma->vm_flags); struct hstate *h = hstate_vma(vma); unsigned long sz = huge_page_size(h); struct address_space *mapping = vma->vm_file->f_mapping; struct mmu_notifier_range range; int ret = 0; - cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; - if (cow) { mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, src, vma->vm_start, -- GitLab From 4eae4efa2c299f85b7ebfbeeda56c19c5eba2768 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 12 Mar 2021 21:07:33 -0800 Subject: [PATCH 650/839] hugetlb: do early cow when page pinned on src mm This is the last missing piece of the COW-during-fork effort when there're pinned pages found. One can reference 70e806e4e645 ("mm: Do early cow for pinned pages during fork() for ptes", 2020-09-27) for more information, since we do similar things here rather than pte this time, but just for hugetlb. Note that after Jason's recent work on 57efa1fe5957 ("mm/gup: prevent gup_fast from racing with COW during fork", 2020-12-15) which is safer and easier to understand, we're safe now within the whole copy_page_range() against gup-fast, we don't need the wr-protect trick that proposed in 70e806e4e645 anymore. Link: https://lkml.kernel.org/r/20210217233547.93892-6-peterx@redhat.com Signed-off-by: Peter Xu Reviewed-by: Mike Kravetz Reviewed-by: Jason Gunthorpe Cc: Alexey Dobriyan Cc: Andrea Arcangeli Cc: Christoph Hellwig Cc: Daniel Vetter Cc: David Airlie Cc: David Gibson Cc: Gal Pressman Cc: Jan Kara Cc: Jann Horn Cc: Kirill Shutemov Cc: Kirill Tkhai Cc: Matthew Wilcox Cc: Miaohe Lin Cc: Mike Rapoport Cc: Roland Scheidegger Cc: VMware Graphics Cc: Wei Zhang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 62 insertions(+), 4 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 7786267da2fe3..5b1ab1f427c57 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3728,6 +3728,18 @@ static bool is_hugetlb_entry_hwpoisoned(pte_t pte) return false; } +static void +hugetlb_install_page(struct vm_area_struct *vma, pte_t *ptep, unsigned long addr, + struct page *new_page) +{ + __SetPageUptodate(new_page); + set_huge_pte_at(vma->vm_mm, addr, ptep, make_huge_pte(vma, new_page, 1)); + hugepage_add_new_anon_rmap(new_page, vma, addr); + hugetlb_count_add(pages_per_huge_page(hstate_vma(vma)), vma->vm_mm); + ClearHPageRestoreReserve(new_page); + SetHPageMigratable(new_page); +} + int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, struct vm_area_struct *vma) { @@ -3737,6 +3749,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, bool cow = is_cow_mapping(vma->vm_flags); struct hstate *h = hstate_vma(vma); unsigned long sz = huge_page_size(h); + unsigned long npages = pages_per_huge_page(h); struct address_space *mapping = vma->vm_file->f_mapping; struct mmu_notifier_range range; int ret = 0; @@ -3785,6 +3798,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); entry = huge_ptep_get(src_pte); dst_entry = huge_ptep_get(dst_pte); +again: if (huge_pte_none(entry) || !huge_pte_none(dst_entry)) { /* * Skip if src entry none. Also, skip in the @@ -3808,6 +3822,52 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, } set_huge_swap_pte_at(dst, addr, dst_pte, entry, sz); } else { + entry = huge_ptep_get(src_pte); + ptepage = pte_page(entry); + get_page(ptepage); + + /* + * This is a rare case where we see pinned hugetlb + * pages while they're prone to COW. We need to do the + * COW earlier during fork. + * + * When pre-allocating the page or copying data, we + * need to be without the pgtable locks since we could + * sleep during the process. + */ + if (unlikely(page_needs_cow_for_dma(vma, ptepage))) { + pte_t src_pte_old = entry; + struct page *new; + + spin_unlock(src_ptl); + spin_unlock(dst_ptl); + /* Do not use reserve as it's private owned */ + new = alloc_huge_page(vma, addr, 1); + if (IS_ERR(new)) { + put_page(ptepage); + ret = PTR_ERR(new); + break; + } + copy_user_huge_page(new, ptepage, addr, vma, + npages); + put_page(ptepage); + + /* Install the new huge page if src pte stable */ + dst_ptl = huge_pte_lock(h, dst, dst_pte); + src_ptl = huge_pte_lockptr(h, src, src_pte); + spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); + entry = huge_ptep_get(src_pte); + if (!pte_same(src_pte_old, entry)) { + put_page(new); + /* dst_entry won't change as in child */ + goto again; + } + hugetlb_install_page(vma, dst_pte, addr, new); + spin_unlock(src_ptl); + spin_unlock(dst_ptl); + continue; + } + if (cow) { /* * No need to notify as we are downgrading page @@ -3818,12 +3878,10 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, */ huge_ptep_set_wrprotect(src, addr, src_pte); } - entry = huge_ptep_get(src_pte); - ptepage = pte_page(entry); - get_page(ptepage); + page_dup_rmap(ptepage, true); set_huge_pte_at(dst, addr, dst_pte, entry); - hugetlb_count_add(pages_per_huge_page(h), dst); + hugetlb_count_add(npages, dst); } spin_unlock(src_ptl); spin_unlock(dst_ptl); -- GitLab From 184cee516f3e24019a08ac8eb5c7cf04c00933cb Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Fri, 12 Mar 2021 21:07:37 -0800 Subject: [PATCH 651/839] mm/highmem.c: fix zero_user_segments() with start > end zero_user_segments() is used from __block_write_begin_int(), for example like the following zero_user_segments(page, 4096, 1024, 512, 918) But new the zero_user_segments() implementation for for HIGHMEM + TRANSPARENT_HUGEPAGE doesn't handle "start > end" case correctly, and hits BUG_ON(). (we can fix __block_write_begin_int() instead though, it is the old and multiple usage) Also it calls kmap_atomic() unnecessarily while start == end == 0. Link: https://lkml.kernel.org/r/87v9ab60r4.fsf@mail.parknet.co.jp Fixes: 0060ef3b4e6d ("mm: support THPs in zero_user_segments") Signed-off-by: OGAWA Hirofumi Cc: Matthew Wilcox Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/highmem.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/mm/highmem.c b/mm/highmem.c index 874b732b120ce..86f2b9495f9cf 100644 --- a/mm/highmem.c +++ b/mm/highmem.c @@ -368,20 +368,24 @@ void zero_user_segments(struct page *page, unsigned start1, unsigned end1, BUG_ON(end1 > page_size(page) || end2 > page_size(page)); + if (start1 >= end1) + start1 = end1 = 0; + if (start2 >= end2) + start2 = end2 = 0; + for (i = 0; i < compound_nr(page); i++) { void *kaddr = NULL; - if (start1 < PAGE_SIZE || start2 < PAGE_SIZE) - kaddr = kmap_atomic(page + i); - if (start1 >= PAGE_SIZE) { start1 -= PAGE_SIZE; end1 -= PAGE_SIZE; } else { unsigned this_end = min_t(unsigned, end1, PAGE_SIZE); - if (end1 > start1) + if (end1 > start1) { + kaddr = kmap_atomic(page + i); memset(kaddr + start1, 0, this_end - start1); + } end1 -= this_end; start1 = 0; } @@ -392,8 +396,11 @@ void zero_user_segments(struct page *page, unsigned start1, unsigned end1, } else { unsigned this_end = min_t(unsigned, end2, PAGE_SIZE); - if (end2 > start2) + if (end2 > start2) { + if (!kaddr) + kaddr = kmap_atomic(page + i); memset(kaddr + start2, 0, this_end - start2); + } end2 -= this_end; start2 = 0; } -- GitLab From e7850f4d844e0acfac7e570af611d89deade3146 Mon Sep 17 00:00:00 2001 From: Lior Ribak Date: Fri, 12 Mar 2021 21:07:41 -0800 Subject: [PATCH 652/839] binfmt_misc: fix possible deadlock in bm_register_write There is a deadlock in bm_register_write: First, in the begining of the function, a lock is taken on the binfmt_misc root inode with inode_lock(d_inode(root)). Then, if the user used the MISC_FMT_OPEN_FILE flag, the function will call open_exec on the user-provided interpreter. open_exec will call a path lookup, and if the path lookup process includes the root of binfmt_misc, it will try to take a shared lock on its inode again, but it is already locked, and the code will get stuck in a deadlock To reproduce the bug: $ echo ":iiiii:E::ii::/proc/sys/fs/binfmt_misc/bla:F" > /proc/sys/fs/binfmt_misc/register backtrace of where the lock occurs (#5): 0 schedule () at ./arch/x86/include/asm/current.h:15 1 0xffffffff81b51237 in rwsem_down_read_slowpath (sem=0xffff888003b202e0, count=, state=state@entry=2) at kernel/locking/rwsem.c:992 2 0xffffffff81b5150a in __down_read_common (state=2, sem=) at kernel/locking/rwsem.c:1213 3 __down_read (sem=) at kernel/locking/rwsem.c:1222 4 down_read (sem=) at kernel/locking/rwsem.c:1355 5 0xffffffff811ee22a in inode_lock_shared (inode=) at ./include/linux/fs.h:783 6 open_last_lookups (op=0xffffc9000022fe34, file=0xffff888004098600, nd=0xffffc9000022fd10) at fs/namei.c:3177 7 path_openat (nd=nd@entry=0xffffc9000022fd10, op=op@entry=0xffffc9000022fe34, flags=flags@entry=65) at fs/namei.c:3366 8 0xffffffff811efe1c in do_filp_open (dfd=, pathname=pathname@entry=0xffff8880031b9000, op=op@entry=0xffffc9000022fe34) at fs/namei.c:3396 9 0xffffffff811e493f in do_open_execat (fd=fd@entry=-100, name=name@entry=0xffff8880031b9000, flags=, flags@entry=0) at fs/exec.c:913 10 0xffffffff811e4a92 in open_exec (name=) at fs/exec.c:948 11 0xffffffff8124aa84 in bm_register_write (file=, buffer=, count=19, ppos=) at fs/binfmt_misc.c:682 12 0xffffffff811decd2 in vfs_write (file=file@entry=0xffff888004098500, buf=buf@entry=0xa758d0 ":iiiii:E::ii::i:CF ", count=count@entry=19, pos=pos@entry=0xffffc9000022ff10) at fs/read_write.c:603 13 0xffffffff811defda in ksys_write (fd=, buf=0xa758d0 ":iiiii:E::ii::i:CF ", count=19) at fs/read_write.c:658 14 0xffffffff81b49813 in do_syscall_64 (nr=, regs=0xffffc9000022ff58) at arch/x86/entry/common.c:46 15 0xffffffff81c0007c in entry_SYSCALL_64 () at arch/x86/entry/entry_64.S:120 To solve the issue, the open_exec call is moved to before the write lock is taken by bm_register_write Link: https://lkml.kernel.org/r/20210228224414.95962-1-liorribak@gmail.com Fixes: 948b701a607f1 ("binfmt_misc: add persistent opened binary handler for containers") Signed-off-by: Lior Ribak Acked-by: Helge Deller Cc: Al Viro Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_misc.c | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index c457334de43f8..e1eae7ea823ae 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -649,12 +649,24 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer, struct super_block *sb = file_inode(file)->i_sb; struct dentry *root = sb->s_root, *dentry; int err = 0; + struct file *f = NULL; e = create_entry(buffer, count); if (IS_ERR(e)) return PTR_ERR(e); + if (e->flags & MISC_FMT_OPEN_FILE) { + f = open_exec(e->interpreter); + if (IS_ERR(f)) { + pr_notice("register: failed to install interpreter file %s\n", + e->interpreter); + kfree(e); + return PTR_ERR(f); + } + e->interp_file = f; + } + inode_lock(d_inode(root)); dentry = lookup_one_len(e->name, root, strlen(e->name)); err = PTR_ERR(dentry); @@ -678,21 +690,6 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer, goto out2; } - if (e->flags & MISC_FMT_OPEN_FILE) { - struct file *f; - - f = open_exec(e->interpreter); - if (IS_ERR(f)) { - err = PTR_ERR(f); - pr_notice("register: failed to install interpreter file %s\n", e->interpreter); - simple_release_fs(&bm_mnt, &entry_count); - iput(inode); - inode = NULL; - goto out2; - } - e->interp_file = f; - } - e->dentry = dget(dentry); inode->i_private = e; inode->i_fop = &bm_entry_operations; @@ -709,6 +706,8 @@ out: inode_unlock(d_inode(root)); if (err) { + if (f) + filp_close(f, NULL); kfree(e); return err; } -- GitLab From f0b15b6081291367634a8f3c557f7a68fdaa35e4 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Fri, 12 Mar 2021 21:07:44 -0800 Subject: [PATCH 653/839] MAINTAINERS: exclude uapi directories in API/ABI section Commit 7b4693e644cb ("MAINTAINERS: add uapi directories to API/ABI section") added include/uapi/ and arch/*/include/uapi/ so that patches modifying them CC linux-api. However that was already done in the past and resulted in too much noise and thus later removed, as explained in b14fd334ff3d ("MAINTAINERS: trim the file triggers for ABI/API") To prevent another round of addition and removal in the future, change the entries to X: (explicit exclusion) for documentation purposes, although they are not subdirectories of broader included directories, as there is apparently no defined way to add plain comments in subsystem sections. Link: https://lkml.kernel.org/r/20210301100255.25229-1-vbabka@suse.cz Signed-off-by: Vlastimil Babka Reported-by: Michael Kerrisk (man-pages) Acked-by: Michael Kerrisk (man-pages) Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 72e5b9db5050b..1b3c05666071b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -261,8 +261,8 @@ ABI/API L: linux-api@vger.kernel.org F: include/linux/syscalls.h F: kernel/sys_ni.c -F: include/uapi/ -F: arch/*/include/uapi/ +X: include/uapi/ +X: arch/*/include/uapi/ ABIT UGURU 1,2 HARDWARE MONITOR DRIVER M: Hans de Goede -- GitLab From 97e4910232fa1f81e806aa60c25a0450276d99a2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 12 Mar 2021 21:07:47 -0800 Subject: [PATCH 654/839] linux/compiler-clang.h: define HAVE_BUILTIN_BSWAP* Separating compiler-clang.h from compiler-gcc.h inadventently dropped the definitions of the three HAVE_BUILTIN_BSWAP macros, which requires falling back to the open-coded version and hoping that the compiler detects it. Since all versions of clang support the __builtin_bswap interfaces, add back the flags and have the headers pick these up automatically. This results in a 4% improvement of compilation speed for arm defconfig. Note: it might also be worth revisiting which architectures set CONFIG_ARCH_USE_BUILTIN_BSWAP for one compiler or the other, today this is set on six architectures (arm32, csky, mips, powerpc, s390, x86), while another ten architectures define custom helpers (alpha, arc, ia64, m68k, mips, nios2, parisc, sh, sparc, xtensa), and the rest (arm64, h8300, hexagon, microblaze, nds32, openrisc, riscv) just get the unoptimized version and rely on the compiler to detect it. A long time ago, the compiler builtins were architecture specific, but nowadays, all compilers that are able to build the kernel have correct implementations of them, though some may not be as optimized as the inline asm versions. The patch that dropped the optimization landed in v4.19, so as discussed it would be fairly safe to backport this revert to stable kernels to the 4.19/5.4/5.10 stable kernels, but there is a remaining risk for regressions, and it has no known side-effects besides compile speed. Link: https://lkml.kernel.org/r/20210226161151.2629097-1-arnd@kernel.org Link: https://lore.kernel.org/lkml/20210225164513.3667778-1-arnd@kernel.org/ Fixes: 815f0ddb346c ("include/linux/compiler*.h: make compiler-*.h mutually exclusive") Signed-off-by: Arnd Bergmann Reviewed-by: Nathan Chancellor Reviewed-by: Kees Cook Acked-by: Miguel Ojeda Acked-by: Nick Desaulniers Acked-by: Luc Van Oostenryck Cc: Masahiro Yamada Cc: Nick Hu Cc: Greentime Hu Cc: Vincent Chen Cc: Paul Walmsley Cc: Palmer Dabbelt Cc: Albert Ou Cc: Guo Ren Cc: Randy Dunlap Cc: Sami Tolvanen Cc: Marco Elver Cc: Arvind Sankar Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compiler-clang.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index 04c0a5a717f7e..d217c382b02de 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -31,6 +31,12 @@ #define __no_sanitize_thread #endif +#if defined(CONFIG_ARCH_USE_BUILTIN_BSWAP) +#define __HAVE_BUILTIN_BSWAP32__ +#define __HAVE_BUILTIN_BSWAP64__ +#define __HAVE_BUILTIN_BSWAP16__ +#endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP */ + #if __has_feature(undefined_behavior_sanitizer) /* GCC does not have __SANITIZE_UNDEFINED__ */ #define __no_sanitize_undefined \ -- GitLab From 702b16d724a61cb97461f403d7a2da29324471b3 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Fri, 12 Mar 2021 21:07:50 -0800 Subject: [PATCH 655/839] kfence: fix printk format for ptrdiff_t Use %td for ptrdiff_t. Link: https://lkml.kernel.org/r/3abbe4c9-16ad-c168-a90f-087978ccd8f7@csgroup.eu Link: https://lkml.kernel.org/r/20210303121157.3430807-1-elver@google.com Signed-off-by: Marco Elver Reported-by: Christophe Leroy Reviewed-by: Alexander Potapenko Cc: Dmitriy Vyukov Cc: Andrey Konovalov Cc: Jann Horn Cc: Christophe Leroy Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kfence/report.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mm/kfence/report.c b/mm/kfence/report.c index ab83d5a59bb12..519f037720f5b 100644 --- a/mm/kfence/report.c +++ b/mm/kfence/report.c @@ -116,12 +116,12 @@ void kfence_print_object(struct seq_file *seq, const struct kfence_metadata *met lockdep_assert_held(&meta->lock); if (meta->state == KFENCE_OBJECT_UNUSED) { - seq_con_printf(seq, "kfence-#%zd unused\n", meta - kfence_metadata); + seq_con_printf(seq, "kfence-#%td unused\n", meta - kfence_metadata); return; } seq_con_printf(seq, - "kfence-#%zd [0x%p-0x%p" + "kfence-#%td [0x%p-0x%p" ", size=%d, cache=%s] allocated by task %d:\n", meta - kfence_metadata, (void *)start, (void *)(start + size - 1), size, (cache && cache->name) ? cache->name : "", meta->alloc_track.pid); @@ -204,7 +204,7 @@ void kfence_report_error(unsigned long address, bool is_write, struct pt_regs *r pr_err("BUG: KFENCE: out-of-bounds %s in %pS\n\n", get_access_type(is_write), (void *)stack_entries[skipnr]); - pr_err("Out-of-bounds %s at 0x%p (%luB %s of kfence-#%zd):\n", + pr_err("Out-of-bounds %s at 0x%p (%luB %s of kfence-#%td):\n", get_access_type(is_write), (void *)address, left_of_object ? meta->addr - address : address - meta->addr, left_of_object ? "left" : "right", object_index); @@ -213,14 +213,14 @@ void kfence_report_error(unsigned long address, bool is_write, struct pt_regs *r case KFENCE_ERROR_UAF: pr_err("BUG: KFENCE: use-after-free %s in %pS\n\n", get_access_type(is_write), (void *)stack_entries[skipnr]); - pr_err("Use-after-free %s at 0x%p (in kfence-#%zd):\n", + pr_err("Use-after-free %s at 0x%p (in kfence-#%td):\n", get_access_type(is_write), (void *)address, object_index); break; case KFENCE_ERROR_CORRUPTION: pr_err("BUG: KFENCE: memory corruption in %pS\n\n", (void *)stack_entries[skipnr]); pr_err("Corrupted memory at 0x%p ", (void *)address); print_diff_canary(address, 16, meta); - pr_cont(" (in kfence-#%zd):\n", object_index); + pr_cont(" (in kfence-#%td):\n", object_index); break; case KFENCE_ERROR_INVALID: pr_err("BUG: KFENCE: invalid %s in %pS\n\n", get_access_type(is_write), @@ -230,7 +230,7 @@ void kfence_report_error(unsigned long address, bool is_write, struct pt_regs *r break; case KFENCE_ERROR_INVALID_FREE: pr_err("BUG: KFENCE: invalid free in %pS\n\n", (void *)stack_entries[skipnr]); - pr_err("Invalid free of 0x%p (in kfence-#%zd):\n", (void *)address, + pr_err("Invalid free of 0x%p (in kfence-#%td):\n", (void *)address, object_index); break; } -- GitLab From df3ae2c9941d38106afd67d7816b58f6dc7405e8 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Fri, 12 Mar 2021 21:07:53 -0800 Subject: [PATCH 656/839] kfence, slab: fix cache_alloc_debugcheck_after() for bulk allocations cache_alloc_debugcheck_after() performs checks on an object, including adjusting the returned pointer. None of this should apply to KFENCE objects. While for non-bulk allocations, the checks are skipped when we allocate via KFENCE, for bulk allocations cache_alloc_debugcheck_after() is called via cache_alloc_debugcheck_after_bulk(). Fix it by skipping cache_alloc_debugcheck_after() for KFENCE objects. Link: https://lkml.kernel.org/r/20210304205256.2162309-1-elver@google.com Signed-off-by: Marco Elver Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Andrey Konovalov Cc: Jann Horn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slab.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/slab.c b/mm/slab.c index 51fd424e0d6d0..ae651bf540b70 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -2992,7 +2992,7 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, gfp_t flags, void *objp, unsigned long caller) { WARN_ON_ONCE(cachep->ctor && (flags & __GFP_ZERO)); - if (!objp) + if (!objp || is_kfence_address(objp)) return objp; if (cachep->flags & SLAB_POISON) { check_poison_obj(cachep, objp); -- GitLab From 0aa41cae92c1e2e61ae5b3a2dde8e674172e40ac Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Fri, 12 Mar 2021 21:08:00 -0800 Subject: [PATCH 657/839] kfence: fix reports if constant function prefixes exist Some architectures prefix all functions with a constant string ('.' on ppc64). Add ARCH_FUNC_PREFIX, which may optionally be defined in , so that get_stack_skipnr() can work properly. Link: https://lkml.kernel.org/r/f036c53d-7e81-763c-47f4-6024c6c5f058@csgroup.eu Link: https://lkml.kernel.org/r/20210304144000.1148590-1-elver@google.com Signed-off-by: Marco Elver Reported-by: Christophe Leroy Tested-by: Christophe Leroy Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Andrey Konovalov Cc: Jann Horn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kfence/report.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/mm/kfence/report.c b/mm/kfence/report.c index 519f037720f5b..e3f71451ad9e2 100644 --- a/mm/kfence/report.c +++ b/mm/kfence/report.c @@ -20,6 +20,11 @@ #include "kfence.h" +/* May be overridden by . */ +#ifndef ARCH_FUNC_PREFIX +#define ARCH_FUNC_PREFIX "" +#endif + extern bool no_hash_pointers; /* Helper function to either print to a seq_file or to console. */ @@ -67,8 +72,9 @@ static int get_stack_skipnr(const unsigned long stack_entries[], int num_entries for (skipnr = 0; skipnr < num_entries; skipnr++) { int len = scnprintf(buf, sizeof(buf), "%ps", (void *)stack_entries[skipnr]); - if (str_has_prefix(buf, "kfence_") || str_has_prefix(buf, "__kfence_") || - !strncmp(buf, "__slab_free", len)) { + if (str_has_prefix(buf, ARCH_FUNC_PREFIX "kfence_") || + str_has_prefix(buf, ARCH_FUNC_PREFIX "__kfence_") || + !strncmp(buf, ARCH_FUNC_PREFIX "__slab_free", len)) { /* * In case of tail calls from any of the below * to any of the above. @@ -77,10 +83,10 @@ static int get_stack_skipnr(const unsigned long stack_entries[], int num_entries } /* Also the *_bulk() variants by only checking prefixes. */ - if (str_has_prefix(buf, "kfree") || - str_has_prefix(buf, "kmem_cache_free") || - str_has_prefix(buf, "__kmalloc") || - str_has_prefix(buf, "kmem_cache_alloc")) + if (str_has_prefix(buf, ARCH_FUNC_PREFIX "kfree") || + str_has_prefix(buf, ARCH_FUNC_PREFIX "kmem_cache_free") || + str_has_prefix(buf, ARCH_FUNC_PREFIX "__kmalloc") || + str_has_prefix(buf, ARCH_FUNC_PREFIX "kmem_cache_alloc")) goto found; } if (fallback < num_entries) -- GitLab From 149fc787353f65b7e72e05e7b75d34863266c3e2 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 12 Mar 2021 21:08:03 -0800 Subject: [PATCH 658/839] include/linux/sched/mm.h: use rcu_dereference in in_vfork() Fix a sparse warning by using rcu_dereference(). Technically this is a bug and a sufficiently aggressive compiler could reload the `real_parent' pointer outside the protection of the rcu lock (and access freed memory), but I think it's pretty unlikely to happen. Link: https://lkml.kernel.org/r/20210221194207.1351703-1-willy@infradead.org Fixes: b18dc5f291c0 ("mm, oom: skip vforked tasks from being selected") Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Miaohe Lin Acked-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched/mm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 1ae08b8462a41..90b2a0bce11ca 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -140,7 +140,8 @@ static inline bool in_vfork(struct task_struct *tsk) * another oom-unkillable task does this it should blame itself. */ rcu_read_lock(); - ret = tsk->vfork_done && tsk->real_parent->mm == tsk->mm; + ret = tsk->vfork_done && + rcu_dereference(tsk->real_parent)->mm == tsk->mm; rcu_read_unlock(); return ret; -- GitLab From 96cfe2c0fd23ea7c2368d14f769d287e7ae1082e Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Fri, 12 Mar 2021 21:08:06 -0800 Subject: [PATCH 659/839] mm/madvise: replace ptrace attach requirement for process_madvise process_madvise currently requires ptrace attach capability. PTRACE_MODE_ATTACH gives one process complete control over another process. It effectively removes the security boundary between the two processes (in one direction). Granting ptrace attach capability even to a system process is considered dangerous since it creates an attack surface. This severely limits the usage of this API. The operations process_madvise can perform do not affect the correctness of the operation of the target process; they only affect where the data is physically located (and therefore, how fast it can be accessed). What we want is the ability for one process to influence another process in order to optimize performance across the entire system while leaving the security boundary intact. Replace PTRACE_MODE_ATTACH with a combination of PTRACE_MODE_READ and CAP_SYS_NICE. PTRACE_MODE_READ to prevent leaking ASLR metadata and CAP_SYS_NICE for influencing process performance. Link: https://lkml.kernel.org/r/20210303185807.2160264-1-surenb@google.com Signed-off-by: Suren Baghdasaryan Reviewed-by: Kees Cook Acked-by: Minchan Kim Acked-by: David Rientjes Cc: Jann Horn Cc: Jeff Vander Stoep Cc: Michal Hocko Cc: Shakeel Butt Cc: Tim Murray Cc: Florian Weimer Cc: Oleg Nesterov Cc: James Morris Cc: [5.10+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/madvise.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/mm/madvise.c b/mm/madvise.c index df692d2e35d4a..01fef79ac761b 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -1198,12 +1198,22 @@ SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec, goto release_task; } - mm = mm_access(task, PTRACE_MODE_ATTACH_FSCREDS); + /* Require PTRACE_MODE_READ to avoid leaking ASLR metadata. */ + mm = mm_access(task, PTRACE_MODE_READ_FSCREDS); if (IS_ERR_OR_NULL(mm)) { ret = IS_ERR(mm) ? PTR_ERR(mm) : -ESRCH; goto release_task; } + /* + * Require CAP_SYS_NICE for influencing process performance. Note that + * only non-destructive hints are currently supported. + */ + if (!capable(CAP_SYS_NICE)) { + ret = -EPERM; + goto release_mm; + } + total_len = iov_iter_count(&iter); while (iov_iter_count(&iter)) { @@ -1218,6 +1228,7 @@ SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec, if (ret == 0) ret = total_len - iov_iter_count(&iter); +release_mm: mmput(mm); release_task: put_task_struct(task); -- GitLab From f9d79e8dce4077d3c6ab739c808169dfa99af9ef Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Fri, 12 Mar 2021 21:08:10 -0800 Subject: [PATCH 660/839] kasan, mm: fix crash with HW_TAGS and DEBUG_PAGEALLOC Currently, kasan_free_nondeferred_pages()->kasan_free_pages() is called after debug_pagealloc_unmap_pages(). This causes a crash when debug_pagealloc is enabled, as HW_TAGS KASAN can't set tags on an unmapped page. This patch puts kasan_free_nondeferred_pages() before debug_pagealloc_unmap_pages() and arch_free_page(), which can also make the page unavailable. Link: https://lkml.kernel.org/r/24cd7db274090f0e5bc3adcdc7399243668e3171.1614987311.git.andreyknvl@google.com Fixes: 94ab5b61ee16 ("kasan, arm64: enable CONFIG_KASAN_HW_TAGS") Signed-off-by: Andrey Konovalov Cc: Catalin Marinas Cc: Will Deacon Cc: Vincenzo Frascino Cc: Dmitry Vyukov Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Marco Elver Cc: Peter Collingbourne Cc: Evgenii Stepanov Cc: Branislav Rankov Cc: Kevin Brodsky Cc: Christoph Hellwig Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d4580e4215d8a..3cd1c0ce4d061 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1281,6 +1281,12 @@ static __always_inline bool free_pages_prepare(struct page *page, kernel_poison_pages(page, 1 << order); + /* + * With hardware tag-based KASAN, memory tags must be set before the + * page becomes unavailable via debug_pagealloc or arch_free_page. + */ + kasan_free_nondeferred_pages(page, order); + /* * arch_free_page() can make the page's contents inaccessible. s390 * does this. So nothing which can access the page's contents should @@ -1290,8 +1296,6 @@ static __always_inline bool free_pages_prepare(struct page *page, debug_pagealloc_unmap_pages(page, 1 << order); - kasan_free_nondeferred_pages(page, order); - return true; } -- GitLab From d9b571c885a8974fbb7d4ee639dbc643fd000f9e Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Fri, 12 Mar 2021 21:08:13 -0800 Subject: [PATCH 661/839] kasan: fix KASAN_STACK dependency for HW_TAGS There's a runtime failure when running HW_TAGS-enabled kernel built with GCC on hardware that doesn't support MTE. GCC-built kernels always have CONFIG_KASAN_STACK enabled, even though stack instrumentation isn't supported by HW_TAGS. Having that config enabled causes KASAN to issue MTE-only instructions to unpoison kernel stacks, which causes the failure. Fix the issue by disallowing CONFIG_KASAN_STACK when HW_TAGS is used. (The commit that introduced CONFIG_KASAN_HW_TAGS specified proper dependency for CONFIG_KASAN_STACK_ENABLE but not for CONFIG_KASAN_STACK.) Link: https://lkml.kernel.org/r/59e75426241dbb5611277758c8d4d6f5f9298dac.1615215441.git.andreyknvl@google.com Fixes: 6a63a63ff1ac ("kasan: introduce CONFIG_KASAN_HW_TAGS") Signed-off-by: Andrey Konovalov Reported-by: Catalin Marinas Cc: Cc: Will Deacon Cc: Vincenzo Frascino Cc: Dmitry Vyukov Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Marco Elver Cc: Peter Collingbourne Cc: Evgenii Stepanov Cc: Branislav Rankov Cc: Kevin Brodsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig.kasan | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index 624ae1df7984c..fba9909e31b71 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -156,6 +156,7 @@ config KASAN_STACK_ENABLE config KASAN_STACK int + depends on KASAN_GENERIC || KASAN_SW_TAGS default 1 if KASAN_STACK_ENABLE || CC_IS_GCC default 0 -- GitLab From 6ce64428d62026a10cb5d80138ff2f90cc21d367 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Fri, 12 Mar 2021 21:08:17 -0800 Subject: [PATCH 662/839] mm/userfaultfd: fix memory corruption due to writeprotect MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Userfaultfd self-test fails occasionally, indicating a memory corruption. Analyzing this problem indicates that there is a real bug since mmap_lock is only taken for read in mwriteprotect_range() and defers flushes, and since there is insufficient consideration of concurrent deferred TLB flushes in wp_page_copy(). Although the PTE is flushed from the TLBs in wp_page_copy(), this flush takes place after the copy has already been performed, and therefore changes of the page are possible between the time of the copy and the time in which the PTE is flushed. To make matters worse, memory-unprotection using userfaultfd also poses a problem. Although memory unprotection is logically a promotion of PTE permissions, and therefore should not require a TLB flush, the current userrfaultfd code might actually cause a demotion of the architectural PTE permission: when userfaultfd_writeprotect() unprotects memory region, it unintentionally *clears* the RW-bit if it was already set. Note that this unprotecting a PTE that is not write-protected is a valid use-case: the userfaultfd monitor might ask to unprotect a region that holds both write-protected and write-unprotected PTEs. The scenario that happens in selftests/vm/userfaultfd is as follows: cpu0 cpu1 cpu2 ---- ---- ---- [ Writable PTE cached in TLB ] userfaultfd_writeprotect() [ write-*unprotect* ] mwriteprotect_range() mmap_read_lock() change_protection() change_protection_range() ... change_pte_range() [ *clear* “write”-bit ] [ defer TLB flushes ] [ page-fault ] ... wp_page_copy() cow_user_page() [ copy page ] [ write to old page ] ... set_pte_at_notify() A similar scenario can happen: cpu0 cpu1 cpu2 cpu3 ---- ---- ---- ---- [ Writable PTE cached in TLB ] userfaultfd_writeprotect() [ write-protect ] [ deferred TLB flush ] userfaultfd_writeprotect() [ write-unprotect ] [ deferred TLB flush] [ page-fault ] wp_page_copy() cow_user_page() [ copy page ] ... [ write to page ] set_pte_at_notify() This race exists since commit 292924b26024 ("userfaultfd: wp: apply _PAGE_UFFD_WP bit"). Yet, as Yu Zhao pointed, these races became apparent since commit 09854ba94c6a ("mm: do_wp_page() simplification") which made wp_page_copy() more likely to take place, specifically if page_count(page) > 1. To resolve the aforementioned races, check whether there are pending flushes on uffd-write-protected VMAs, and if there are, perform a flush before doing the COW. Further optimizations will follow to avoid during uffd-write-unprotect unnecassary PTE write-protection and TLB flushes. Link: https://lkml.kernel.org/r/20210304095423.3825684-1-namit@vmware.com Fixes: 09854ba94c6a ("mm: do_wp_page() simplification") Signed-off-by: Nadav Amit Suggested-by: Yu Zhao Reviewed-by: Peter Xu Tested-by: Peter Xu Cc: Andrea Arcangeli Cc: Andy Lutomirski Cc: Pavel Emelyanov Cc: Mike Kravetz Cc: Mike Rapoport Cc: Minchan Kim Cc: Will Deacon Cc: Peter Zijlstra Cc: [5.9+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/mm/memory.c b/mm/memory.c index 523230005db16..5efa07fb6cdc1 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3097,6 +3097,14 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf) return handle_userfault(vmf, VM_UFFD_WP); } + /* + * Userfaultfd write-protect can defer flushes. Ensure the TLB + * is flushed in this case before copying. + */ + if (unlikely(userfaultfd_wp(vmf->vma) && + mm_tlb_flush_pending(vmf->vma->vm_mm))) + flush_tlb_page(vmf->vma, vmf->address); + vmf->page = vm_normal_page(vma, vmf->address, vmf->orig_pte); if (!vmf->page) { /* -- GitLab From 0ceb1ace4a2778e34a5414e5349712ae4dc41d85 Mon Sep 17 00:00:00 2001 From: Sergei Trofimovich Date: Fri, 12 Mar 2021 21:08:23 -0800 Subject: [PATCH 663/839] ia64: fix ia64_syscall_get_set_arguments() for break-based syscalls In https://bugs.gentoo.org/769614 Dmitry noticed that `ptrace(PTRACE_GET_SYSCALL_INFO)` does not work for syscalls called via glibc's syscall() wrapper. ia64 has two ways to call syscalls from userspace: via `break` and via `eps` instructions. The difference is in stack layout: 1. `eps` creates simple stack frame: no locals, in{0..7} == out{0..8} 2. `break` uses userspace stack frame: may be locals (glibc provides one), in{0..7} == out{0..8}. Both work fine in syscall handling cde itself. But `ptrace(PTRACE_GET_SYSCALL_INFO)` uses unwind mechanism to re-extract syscall arguments but it does not account for locals. The change always skips locals registers. It should not change `eps` path as kernel's handler already enforces locals=0 and fixes `break`. Tested on v5.10 on rx3600 machine (ia64 9040 CPU). Link: https://lkml.kernel.org/r/20210221002554.333076-1-slyfox@gentoo.org Link: https://bugs.gentoo.org/769614 Signed-off-by: Sergei Trofimovich Reported-by: Dmitry V. Levin Cc: Oleg Nesterov Cc: John Paul Adrian Glaubitz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/kernel/ptrace.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index c3490ee2daa58..e14f5653393ac 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -2013,27 +2013,39 @@ static void syscall_get_set_args_cb(struct unw_frame_info *info, void *data) { struct syscall_get_set_args *args = data; struct pt_regs *pt = args->regs; - unsigned long *krbs, cfm, ndirty; + unsigned long *krbs, cfm, ndirty, nlocals, nouts; int i, count; if (unw_unwind_to_user(info) < 0) return; + /* + * We get here via a few paths: + * - break instruction: cfm is shared with caller. + * syscall args are in out= regs, locals are non-empty. + * - epsinstruction: cfm is set by br.call + * locals don't exist. + * + * For both cases argguments are reachable in cfm.sof - cfm.sol. + * CFM: [ ... | sor: 17..14 | sol : 13..7 | sof : 6..0 ] + */ cfm = pt->cr_ifs; + nlocals = (cfm >> 7) & 0x7f; /* aka sol */ + nouts = (cfm & 0x7f) - nlocals; /* aka sof - sol */ krbs = (unsigned long *)info->task + IA64_RBS_OFFSET/8; ndirty = ia64_rse_num_regs(krbs, krbs + (pt->loadrs >> 19)); count = 0; if (in_syscall(pt)) - count = min_t(int, args->n, cfm & 0x7f); + count = min_t(int, args->n, nouts); + /* Iterate over outs. */ for (i = 0; i < count; i++) { + int j = ndirty + nlocals + i + args->i; if (args->rw) - *ia64_rse_skip_regs(krbs, ndirty + i + args->i) = - args->args[i]; + *ia64_rse_skip_regs(krbs, j) = args->args[i]; else - args->args[i] = *ia64_rse_skip_regs(krbs, - ndirty + i + args->i); + args->args[i] = *ia64_rse_skip_regs(krbs, j); } if (!args->rw) { -- GitLab From 61bf318eac2c13356f7bd1c6a05421ef504ccc8a Mon Sep 17 00:00:00 2001 From: Sergei Trofimovich Date: Fri, 12 Mar 2021 21:08:27 -0800 Subject: [PATCH 664/839] ia64: fix ptrace(PTRACE_SYSCALL_INFO_EXIT) sign In https://bugs.gentoo.org/769614 Dmitry noticed that `ptrace(PTRACE_GET_SYSCALL_INFO)` does not return error sign properly. The bug is in mismatch between get/set errors: static inline long syscall_get_error(struct task_struct *task, struct pt_regs *regs) { return regs->r10 == -1 ? regs->r8:0; } static inline long syscall_get_return_value(struct task_struct *task, struct pt_regs *regs) { return regs->r8; } static inline void syscall_set_return_value(struct task_struct *task, struct pt_regs *regs, int error, long val) { if (error) { /* error < 0, but ia64 uses > 0 return value */ regs->r8 = -error; regs->r10 = -1; } else { regs->r8 = val; regs->r10 = 0; } } Tested on v5.10 on rx3600 machine (ia64 9040 CPU). Link: https://lkml.kernel.org/r/20210221002554.333076-2-slyfox@gentoo.org Link: https://bugs.gentoo.org/769614 Signed-off-by: Sergei Trofimovich Reported-by: Dmitry V. Levin Reviewed-by: Dmitry V. Levin Cc: John Paul Adrian Glaubitz Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/include/asm/syscall.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/ia64/include/asm/syscall.h b/arch/ia64/include/asm/syscall.h index 6c6f16e409a87..0d23c00493018 100644 --- a/arch/ia64/include/asm/syscall.h +++ b/arch/ia64/include/asm/syscall.h @@ -32,7 +32,7 @@ static inline void syscall_rollback(struct task_struct *task, static inline long syscall_get_error(struct task_struct *task, struct pt_regs *regs) { - return regs->r10 == -1 ? regs->r8:0; + return regs->r10 == -1 ? -regs->r8:0; } static inline long syscall_get_return_value(struct task_struct *task, -- GitLab From be6c8982e4ab9a41907555f601b711a7e2a17d4c Mon Sep 17 00:00:00 2001 From: Zhou Guanghui Date: Fri, 12 Mar 2021 21:08:30 -0800 Subject: [PATCH 665/839] mm/memcg: rename mem_cgroup_split_huge_fixup to split_page_memcg and add nr_pages argument Rename mem_cgroup_split_huge_fixup to split_page_memcg and explicitly pass in page number argument. In this way, the interface name is more common and can be used by potential users. In addition, the complete info(memcg and flag) of the memcg needs to be set to the tail pages. Link: https://lkml.kernel.org/r/20210304074053.65527-2-zhouguanghui1@huawei.com Signed-off-by: Zhou Guanghui Acked-by: Johannes Weiner Reviewed-by: Zi Yan Reviewed-by: Shakeel Butt Acked-by: Michal Hocko Cc: Hugh Dickins Cc: "Kirill A. Shutemov" Cc: Nicholas Piggin Cc: Kefeng Wang Cc: Hanjun Guo Cc: Tianhong Ding Cc: Weilong Chen Cc: Rui Xiang Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 6 ++---- mm/huge_memory.c | 2 +- mm/memcontrol.c | 15 ++++++--------- 3 files changed, 9 insertions(+), 14 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index e6dc793d587de..0c04d39a79676 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1061,9 +1061,7 @@ static inline void memcg_memory_event_mm(struct mm_struct *mm, rcu_read_unlock(); } -#ifdef CONFIG_TRANSPARENT_HUGEPAGE -void mem_cgroup_split_huge_fixup(struct page *head); -#endif +void split_page_memcg(struct page *head, unsigned int nr); #else /* CONFIG_MEMCG */ @@ -1400,7 +1398,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, return 0; } -static inline void mem_cgroup_split_huge_fixup(struct page *head) +static inline void split_page_memcg(struct page *head, unsigned int nr) { } diff --git a/mm/huge_memory.c b/mm/huge_memory.c index da1d63a41aecb..ae907a9c20506 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2467,7 +2467,7 @@ static void __split_huge_page(struct page *page, struct list_head *list, int i; /* complete memcg works before add pages to LRU */ - mem_cgroup_split_huge_fixup(head); + split_page_memcg(head, nr); if (PageAnon(head) && PageSwapCache(head)) { swp_entry_t entry = { .val = page_private(head) }; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 845eec01ef9d0..e064ac0d850ac 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3287,24 +3287,21 @@ void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size) #endif /* CONFIG_MEMCG_KMEM */ -#ifdef CONFIG_TRANSPARENT_HUGEPAGE /* - * Because page_memcg(head) is not set on compound tails, set it now. + * Because page_memcg(head) is not set on tails, set it now. */ -void mem_cgroup_split_huge_fixup(struct page *head) +void split_page_memcg(struct page *head, unsigned int nr) { struct mem_cgroup *memcg = page_memcg(head); int i; - if (mem_cgroup_disabled()) + if (mem_cgroup_disabled() || !memcg) return; - for (i = 1; i < HPAGE_PMD_NR; i++) { - css_get(&memcg->css); - head[i].memcg_data = (unsigned long)memcg; - } + for (i = 1; i < nr; i++) + head[i].memcg_data = head->memcg_data; + css_get_many(&memcg->css, nr - 1); } -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #ifdef CONFIG_MEMCG_SWAP /** -- GitLab From e1baddf8475b06cc56f4bafecf9a32a124343d9f Mon Sep 17 00:00:00 2001 From: Zhou Guanghui Date: Fri, 12 Mar 2021 21:08:33 -0800 Subject: [PATCH 666/839] mm/memcg: set memcg when splitting page As described in the split_page() comment, for the non-compound high order page, the sub-pages must be freed individually. If the memcg of the first page is valid, the tail pages cannot be uncharged when be freed. For example, when alloc_pages_exact is used to allocate 1MB continuous physical memory, 2MB is charged(kmemcg is enabled and __GFP_ACCOUNT is set). When make_alloc_exact free the unused 1MB and free_pages_exact free the applied 1MB, actually, only 4KB(one page) is uncharged. Therefore, the memcg of the tail page needs to be set when splitting a page. Michel: There are at least two explicit users of __GFP_ACCOUNT with alloc_exact_pages added recently. See 7efe8ef274024 ("KVM: arm64: Allocate stage-2 pgd pages with GFP_KERNEL_ACCOUNT") and c419621873713 ("KVM: s390: Add memcg accounting to KVM allocations"), so this is not just a theoretical issue. Link: https://lkml.kernel.org/r/20210304074053.65527-3-zhouguanghui1@huawei.com Signed-off-by: Zhou Guanghui Acked-by: Johannes Weiner Reviewed-by: Zi Yan Reviewed-by: Shakeel Butt Acked-by: Michal Hocko Cc: Hanjun Guo Cc: Hugh Dickins Cc: Kefeng Wang Cc: "Kirill A. Shutemov" Cc: Nicholas Piggin Cc: Rui Xiang Cc: Tianhong Ding Cc: Weilong Chen Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3cd1c0ce4d061..cfc72873961d9 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3314,6 +3314,7 @@ void split_page(struct page *page, unsigned int order) for (i = 1; i < (1 << order); i++) set_page_refcounted(page + i); split_page_owner(page, 1 << order); + split_page_memcg(page, 1 << order); } EXPORT_SYMBOL_GPL(split_page); -- GitLab From 57e0076e6575a7b7cef620a0bd2ee2549ef77818 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Fri, 12 Mar 2021 21:08:38 -0800 Subject: [PATCH 667/839] zram: fix return value on writeback_store writeback_store's return value is overwritten by submit_bio_wait's return value. Thus, writeback_store will return zero since there was no IO error. In the end, write syscall from userspace will see the zero as return value, which could make the process stall to keep trying the write until it will succeed. Link: https://lkml.kernel.org/r/20210312173949.2197662-1-minchan@kernel.org Fixes: 3b82a051c101("drivers/block/zram/zram_drv.c: fix error return codes not being returned in writeback_store") Signed-off-by: Minchan Kim Cc: Sergey Senozhatsky Cc: Colin Ian King Cc: John Dias Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/zram/zram_drv.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index a711a2e2a7946..63bbefdffc818 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -627,7 +627,7 @@ static ssize_t writeback_store(struct device *dev, struct bio_vec bio_vec; struct page *page; ssize_t ret = len; - int mode; + int mode, err; unsigned long blk_idx = 0; if (sysfs_streq(buf, "idle")) @@ -728,12 +728,17 @@ static ssize_t writeback_store(struct device *dev, * XXX: A single page IO would be inefficient for write * but it would be not bad as starter. */ - ret = submit_bio_wait(&bio); - if (ret) { + err = submit_bio_wait(&bio); + if (err) { zram_slot_lock(zram, index); zram_clear_flag(zram, index, ZRAM_UNDER_WB); zram_clear_flag(zram, index, ZRAM_IDLE); zram_slot_unlock(zram, index); + /* + * Return last IO error unless every IO were + * not suceeded. + */ + ret = err; continue; } -- GitLab From 2766f1821600cc7562bae2128ad0b163f744c5d9 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Fri, 12 Mar 2021 21:08:41 -0800 Subject: [PATCH 668/839] zram: fix broken page writeback commit 0d8359620d9b ("zram: support page writeback") introduced two problems. It overwrites writeback_store's return value as kstrtol's return value, which makes return value zero so user could see zero as return value of write syscall even though it wrote data successfully. It also breaks index value in the loop in that it doesn't increase the index any longer. It means it can write only first starting block index so user couldn't write all idle pages in the zram so lose memory saving chance. This patch fixes those issues. Link: https://lkml.kernel.org/r/20210312173949.2197662-2-minchan@kernel.org Fixes: 0d8359620d9b("zram: support page writeback") Signed-off-by: Minchan Kim Reported-by: Amos Bianchi Cc: Sergey Senozhatsky Cc: John Dias Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/zram/zram_drv.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 63bbefdffc818..cf8deecc39efb 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -638,8 +638,8 @@ static ssize_t writeback_store(struct device *dev, if (strncmp(buf, PAGE_WB_SIG, sizeof(PAGE_WB_SIG) - 1)) return -EINVAL; - ret = kstrtol(buf + sizeof(PAGE_WB_SIG) - 1, 10, &index); - if (ret || index >= nr_pages) + if (kstrtol(buf + sizeof(PAGE_WB_SIG) - 1, 10, &index) || + index >= nr_pages) return -EINVAL; nr_pages = 1; @@ -663,7 +663,7 @@ static ssize_t writeback_store(struct device *dev, goto release_init_lock; } - while (nr_pages--) { + for (; nr_pages != 0; index++, nr_pages--) { struct bio_vec bvec; bvec.bv_page = page; -- GitLab From e1c86210fe27428399643861b81b080eccd79f87 Mon Sep 17 00:00:00 2001 From: Xiaoliang Yu Date: Sat, 13 Mar 2021 07:54:53 +0800 Subject: [PATCH 669/839] ALSA: hda/realtek: Apply headset-mic quirks for Xiaomi Redmibook Air There is another fix for headset-mic problem on Redmibook (1d72:1602), it also works on Redmibook Air (1d72:1947), which has the same issue. Signed-off-by: Xiaoliang Yu Cc: Link: https://lore.kernel.org/r/TYBP286MB02856DC016849DEA0F9B6A37EE6F9@TYBP286MB0285.JPNP286.PROD.OUTLOOK.COM Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index b47504fa8dfd0..c6d916177d5cd 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -8243,6 +8243,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1b7d, 0xa831, "Ordissimo EVE2 ", ALC269VB_FIXUP_ORDISSIMO_EVE2), /* Also known as Malata PC-B1303 */ SND_PCI_QUIRK(0x1d72, 0x1602, "RedmiBook", ALC255_FIXUP_XIAOMI_HEADSET_MIC), SND_PCI_QUIRK(0x1d72, 0x1901, "RedmiBook 14", ALC256_FIXUP_ASUS_HEADSET_MIC), + SND_PCI_QUIRK(0x1d72, 0x1947, "RedmiBook Air", ALC255_FIXUP_XIAOMI_HEADSET_MIC), SND_PCI_QUIRK(0x10ec, 0x118c, "Medion EE4254 MD62100", ALC256_FIXUP_MEDION_HEADSET_NO_PRESENCE), SND_PCI_QUIRK(0x1c06, 0x2013, "Lemote A1802", ALC269_FIXUP_LEMOTE_A1802), SND_PCI_QUIRK(0x1c06, 0x2015, "Lemote A190X", ALC269_FIXUP_LEMOTE_A190X), -- GitLab From da98b54d02981de5b07d8044b2a632bf6ba3ac45 Mon Sep 17 00:00:00 2001 From: Tang Bin Date: Mon, 22 Feb 2021 13:57:24 +0800 Subject: [PATCH 670/839] virtio-mmio: Use to_virtio_mmio_device() to simply code The file virtio_mmio.c has defined the function to_virtio_mmio_device, so use it instead of container_of() to simply code. Signed-off-by: Tang Bin Link: https://lore.kernel.org/r/20210222055724.220-1-tangbin@cmss.chinamobile.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mmio.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c index a286d22b6551c..56128b9c46eba 100644 --- a/drivers/virtio/virtio_mmio.c +++ b/drivers/virtio/virtio_mmio.c @@ -548,8 +548,7 @@ static void virtio_mmio_release_dev(struct device *_d) { struct virtio_device *vdev = container_of(_d, struct virtio_device, dev); - struct virtio_mmio_device *vm_dev = - container_of(vdev, struct virtio_mmio_device, vdev); + struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); struct platform_device *pdev = vm_dev->pdev; devm_kfree(&pdev->dev, vm_dev); -- GitLab From bc22ed2ea1121f9d9ba3f85c524cb857d54a2d00 Mon Sep 17 00:00:00 2001 From: Xianting Tian Date: Sat, 20 Feb 2021 11:28:18 -0500 Subject: [PATCH 671/839] virtio: remove export for virtio_config_{enable, disable} virtio_config_enable(), virtio_config_disable() are only used inside drivers/virtio/virtio.c, so it doesn't need export the symbols. Signed-off-by: Xianting Tian Link: https://lore.kernel.org/r/1613838498-8791-1-git-send-email-xianting_tian@126.com Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Reviewed-by: Stefano Garzarella --- drivers/virtio/virtio.c | 6 ++---- include/linux/virtio.h | 2 -- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index 42e09cc1b8ac5..4b15c00c0a0af 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -141,15 +141,14 @@ void virtio_config_changed(struct virtio_device *dev) } EXPORT_SYMBOL_GPL(virtio_config_changed); -void virtio_config_disable(struct virtio_device *dev) +static void virtio_config_disable(struct virtio_device *dev) { spin_lock_irq(&dev->config_lock); dev->config_enabled = false; spin_unlock_irq(&dev->config_lock); } -EXPORT_SYMBOL_GPL(virtio_config_disable); -void virtio_config_enable(struct virtio_device *dev) +static void virtio_config_enable(struct virtio_device *dev) { spin_lock_irq(&dev->config_lock); dev->config_enabled = true; @@ -158,7 +157,6 @@ void virtio_config_enable(struct virtio_device *dev) dev->config_change_pending = false; spin_unlock_irq(&dev->config_lock); } -EXPORT_SYMBOL_GPL(virtio_config_enable); void virtio_add_status(struct virtio_device *dev, unsigned int status) { diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 55ea329fe72a4..b1894e0323fae 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -132,8 +132,6 @@ bool is_virtio_device(struct device *dev); void virtio_break_device(struct virtio_device *dev); void virtio_config_changed(struct virtio_device *dev); -void virtio_config_disable(struct virtio_device *dev); -void virtio_config_enable(struct virtio_device *dev); int virtio_finalize_features(struct virtio_device *dev); #ifdef CONFIG_PM_SLEEP int virtio_device_freeze(struct virtio_device *dev); -- GitLab From aa443ac20445ad79afc9aa589727e5d9ee88dc2f Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Sat, 13 Feb 2021 20:39:51 +0200 Subject: [PATCH 672/839] vdpa_sim: Skip typecasting from void* Typecasting from void* to virtio_net_config* is not needed. Remove it. Signed-off-by: Parav Pandit Link: https://lore.kernel.org/r/20210213183951.248324-1-parav@nvidia.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa_sim/vdpa_sim_net.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c index 702be74877d27..a1ab6163f7d13 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c @@ -110,8 +110,7 @@ out: static void vdpasim_net_get_config(struct vdpasim *vdpasim, void *config) { - struct virtio_net_config *net_config = - (struct virtio_net_config *)config; + struct virtio_net_config *net_config = config; net_config->mtu = cpu_to_vdpasim16(vdpasim, 1500); net_config->status = cpu_to_vdpasim16(vdpasim, VIRTIO_NET_S_LINK_UP); -- GitLab From 4c050286bb202cffd5467c1cba982dff391d62e1 Mon Sep 17 00:00:00 2001 From: Gautam Dawar Date: Wed, 24 Feb 2021 17:18:45 +0530 Subject: [PATCH 673/839] vhost_vdpa: fix the missing irq_bypass_unregister_producer() invocation When qemu with vhost-vdpa netdevice is run for the first time, it works well. But after the VM is powered off, the next qemu run causes kernel panic due to a NULL pointer dereference in irq_bypass_register_producer(). When the VM is powered off, vhost_vdpa_clean_irq() misses on calling irq_bypass_unregister_producer() for irq 0 because of the existing check. This leaves stale producer nodes, which are reset in vhost_vring_call_reset() when vhost_dev_init() is invoked during the second qemu run. As the node member of struct irq_bypass_producer is also initialized to zero, traversal on the producers list causes crash due to NULL pointer dereference. Fixes: 2cf1ba9a4d15c ("vhost_vdpa: implement IRQ offloading in vhost_vdpa") Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=211711 Signed-off-by: Gautam Dawar Acked-by: Jason Wang Link: https://lore.kernel.org/r/20210224114845.104173-1-gdawar.xilinx@gmail.com Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vdpa.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index ef688c8c0e0e6..dfdd8e4a8b08e 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -900,14 +900,10 @@ err: static void vhost_vdpa_clean_irq(struct vhost_vdpa *v) { - struct vhost_virtqueue *vq; int i; - for (i = 0; i < v->nvqs; i++) { - vq = &v->vqs[i]; - if (vq->call_ctx.producer.irq) - irq_bypass_unregister_producer(&vq->call_ctx.producer); - } + for (i = 0; i < v->nvqs; i++) + vhost_vdpa_unsetup_vq_irq(v, i); } static int vhost_vdpa_release(struct inode *inode, struct file *filep) -- GitLab From 08c18b63d9656e0389087d1956d2b37fd7019172 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 9 Mar 2021 06:19:30 +0000 Subject: [PATCH 674/839] powerpc/vdso32: Add missing _restgpr_31_x to fix build failure With some defconfig including CONFIG_CC_OPTIMIZE_FOR_SIZE, (for instance mvme5100_defconfig and ps3_defconfig), gcc 5 generates a call to _restgpr_31_x. Until recently it went unnoticed, but commit 42ed6d56ade2 ("powerpc/vdso: Block R_PPC_REL24 relocations") made it rise to the surface. Provide that function (copied from lib/crtsavres.S) in gettimeofday.S Fixes: ab037dd87a2f ("powerpc/vdso: Switch VDSO to generic C implementation.") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/a7aa198a88bcd33c6e35e99f70f86c7b7f2f9440.1615270757.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/vdso32/gettimeofday.S | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S index a6e29f880e0e3..d21d08140a5eb 100644 --- a/arch/powerpc/kernel/vdso32/gettimeofday.S +++ b/arch/powerpc/kernel/vdso32/gettimeofday.S @@ -65,3 +65,14 @@ V_FUNCTION_END(__kernel_clock_getres) V_FUNCTION_BEGIN(__kernel_time) cvdso_call_time __c_kernel_time V_FUNCTION_END(__kernel_time) + +/* Routines for restoring integer registers, called by the compiler. */ +/* Called with r11 pointing to the stack header word of the caller of the */ +/* function, just beyond the end of the integer restore area. */ +_GLOBAL(_restgpr_31_x) +_GLOBAL(_rest32gpr_31_x) + lwz r0,4(r11) + lwz r31,-4(r11) + mtlr r0 + mr r1,r11 + blr -- GitLab From eed5fae00593ab9d261a0c1ffc1bdb786a87a55a Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 10 Mar 2021 12:10:34 +0000 Subject: [PATCH 675/839] powerpc: Force inlining of cpu_has_feature() to avoid build failure The code relies on constant folding of cpu_has_feature() based on possible and always true values as defined per CPU_FTRS_ALWAYS and CPU_FTRS_POSSIBLE. Build failure is encountered with for instance book3e_all_defconfig on kisskb in the AMDGPU driver which uses cpu_has_feature(CPU_FTR_VSX_COMP) to decide whether calling kernel_enable_vsx() or not. The failure is due to cpu_has_feature() not being inlined with that configuration with gcc 4.9. In the same way as commit acdad8fb4a15 ("powerpc: Force inlining of mmu_has_feature to fix build failure"), for inlining of cpu_has_feature(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/b231dfa040ce4cc37f702f5c3a595fdeabfe0462.1615378209.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/cpu_has_feature.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/cpu_has_feature.h b/arch/powerpc/include/asm/cpu_has_feature.h index 7897d16e09904..727d4b3219379 100644 --- a/arch/powerpc/include/asm/cpu_has_feature.h +++ b/arch/powerpc/include/asm/cpu_has_feature.h @@ -7,7 +7,7 @@ #include #include -static inline bool early_cpu_has_feature(unsigned long feature) +static __always_inline bool early_cpu_has_feature(unsigned long feature) { return !!((CPU_FTRS_ALWAYS & feature) || (CPU_FTRS_POSSIBLE & cur_cpu_spec->cpu_features & feature)); @@ -46,7 +46,7 @@ static __always_inline bool cpu_has_feature(unsigned long feature) return static_branch_likely(&cpu_feature_keys[i]); } #else -static inline bool cpu_has_feature(unsigned long feature) +static __always_inline bool cpu_has_feature(unsigned long feature) { return early_cpu_has_feature(feature); } -- GitLab From 9e15c3a0ced5a61f320b989072c24983cb1620c1 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 13 Mar 2021 12:29:43 -0700 Subject: [PATCH 676/839] io_uring: convert io_buffer_idr to XArray Like we did for the personality idr, convert the IO buffer idr to use XArray. This avoids a use-after-free on removal of entries, since idr doesn't like doing so from inside an iterator, and it nicely reduces the amount of code we need to support this feature. Fixes: 5a2e745d4d43 ("io_uring: buffer registration infrastructure") Cc: stable@vger.kernel.org Cc: Matthew Wilcox Cc: yangerkun Reported-by: Hulk Robot Signed-off-by: Jens Axboe --- fs/io_uring.c | 43 +++++++++++++++---------------------------- 1 file changed, 15 insertions(+), 28 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 05adc4887ef38..58d62dd9f8e4b 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -402,7 +402,7 @@ struct io_ring_ctx { struct socket *ring_sock; #endif - struct idr io_buffer_idr; + struct xarray io_buffers; struct xarray personalities; u32 pers_next; @@ -1135,7 +1135,7 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) init_waitqueue_head(&ctx->cq_wait); INIT_LIST_HEAD(&ctx->cq_overflow_list); init_completion(&ctx->ref_comp); - idr_init(&ctx->io_buffer_idr); + xa_init_flags(&ctx->io_buffers, XA_FLAGS_ALLOC1); xa_init_flags(&ctx->personalities, XA_FLAGS_ALLOC1); mutex_init(&ctx->uring_lock); init_waitqueue_head(&ctx->wait); @@ -2843,7 +2843,7 @@ static struct io_buffer *io_buffer_select(struct io_kiocb *req, size_t *len, lockdep_assert_held(&req->ctx->uring_lock); - head = idr_find(&req->ctx->io_buffer_idr, bgid); + head = xa_load(&req->ctx->io_buffers, bgid); if (head) { if (!list_empty(&head->list)) { kbuf = list_last_entry(&head->list, struct io_buffer, @@ -2851,7 +2851,7 @@ static struct io_buffer *io_buffer_select(struct io_kiocb *req, size_t *len, list_del(&kbuf->list); } else { kbuf = head; - idr_remove(&req->ctx->io_buffer_idr, bgid); + xa_erase(&req->ctx->io_buffers, bgid); } if (*len > kbuf->len) *len = kbuf->len; @@ -3892,7 +3892,7 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx, struct io_buffer *buf, } i++; kfree(buf); - idr_remove(&ctx->io_buffer_idr, bgid); + xa_erase(&ctx->io_buffers, bgid); return i; } @@ -3910,7 +3910,7 @@ static int io_remove_buffers(struct io_kiocb *req, unsigned int issue_flags) lockdep_assert_held(&ctx->uring_lock); ret = -ENOENT; - head = idr_find(&ctx->io_buffer_idr, p->bgid); + head = xa_load(&ctx->io_buffers, p->bgid); if (head) ret = __io_remove_buffers(ctx, head, p->bgid, p->nbufs); if (ret < 0) @@ -3993,21 +3993,14 @@ static int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags) lockdep_assert_held(&ctx->uring_lock); - list = head = idr_find(&ctx->io_buffer_idr, p->bgid); + list = head = xa_load(&ctx->io_buffers, p->bgid); ret = io_add_buffers(p, &head); - if (ret < 0) - goto out; - - if (!list) { - ret = idr_alloc(&ctx->io_buffer_idr, head, p->bgid, p->bgid + 1, - GFP_KERNEL); - if (ret < 0) { + if (ret >= 0 && !list) { + ret = xa_insert(&ctx->io_buffers, p->bgid, head, GFP_KERNEL); + if (ret < 0) __io_remove_buffers(ctx, head, p->bgid, -1U); - goto out; - } } -out: if (ret < 0) req_set_fail_links(req); @@ -8333,19 +8326,13 @@ static int io_eventfd_unregister(struct io_ring_ctx *ctx) return -ENXIO; } -static int __io_destroy_buffers(int id, void *p, void *data) -{ - struct io_ring_ctx *ctx = data; - struct io_buffer *buf = p; - - __io_remove_buffers(ctx, buf, id, -1U); - return 0; -} - static void io_destroy_buffers(struct io_ring_ctx *ctx) { - idr_for_each(&ctx->io_buffer_idr, __io_destroy_buffers, ctx); - idr_destroy(&ctx->io_buffer_idr); + struct io_buffer *buf; + unsigned long index; + + xa_for_each(&ctx->io_buffers, index, buf) + __io_remove_buffers(ctx, buf, index, -1U); } static void io_req_cache_free(struct list_head *list, struct task_struct *tsk) -- GitLab From c995f12ad8842dbf5cfed113fb52cdd083f5afd1 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sun, 14 Mar 2021 23:51:14 +0300 Subject: [PATCH 677/839] prctl: fix PR_SET_MM_AUXV kernel stack leak Doing a prctl(PR_SET_MM, PR_SET_MM_AUXV, addr, 1); will copy 1 byte from userspace to (quite big) on-stack array and then stash everything to mm->saved_auxv. AT_NULL terminator will be inserted at the very end. /proc/*/auxv handler will find that AT_NULL terminator and copy original stack contents to userspace. This devious scheme requires CAP_SYS_RESOURCE. Signed-off-by: Alexey Dobriyan Signed-off-by: Linus Torvalds --- kernel/sys.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sys.c b/kernel/sys.c index b09fe21e88ff5..2e2e3f378d97f 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2079,7 +2079,7 @@ static int prctl_set_auxv(struct mm_struct *mm, unsigned long addr, * up to the caller to provide sane values here, otherwise userspace * tools which use this vector might be unhappy. */ - unsigned long user_auxv[AT_VECTOR_SIZE]; + unsigned long user_auxv[AT_VECTOR_SIZE] = {}; if (len > sizeof(user_auxv)) return -EINVAL; -- GitLab From 1e28eed17697bcf343c6743f0028cc3b5dd88bf0 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 14 Mar 2021 14:41:02 -0700 Subject: [PATCH 678/839] Linux 5.12-rc3 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 10bb0a62cc7dd..a28bb374663d2 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 12 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc3 NAME = Frozen Wasteland # *DOCUMENTATION* -- GitLab From beb691e69f4dec7bfe8b81b509848acfd1f0dbf9 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Fri, 12 Mar 2021 15:09:13 +0100 Subject: [PATCH 679/839] vhost: Fix vhost_vq_reset() vhost_reset_is_le() is vhost_init_is_le(), and in the case of cross-endian legacy, vhost_init_is_le() depends on vq->user_be. vq->user_be is set by vhost_disable_cross_endian(). But in vhost_vq_reset(), we have: vhost_reset_is_le(vq); vhost_disable_cross_endian(vq); And so user_be is used before being set. To fix that, reverse the lines order as there is no other dependency between them. Signed-off-by: Laurent Vivier Link: https://lore.kernel.org/r/20210312140913.788592-1-lvivier@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vhost.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index a262e12c6dc26..5ccb0705beae1 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -332,8 +332,8 @@ static void vhost_vq_reset(struct vhost_dev *dev, vq->error_ctx = NULL; vq->kick = NULL; vq->log_ctx = NULL; - vhost_reset_is_le(vq); vhost_disable_cross_endian(vq); + vhost_reset_is_le(vq); vq->busyloop_timeout = 0; vq->umem = NULL; vq->iotlb = NULL; -- GitLab From f6bbf0010ba004f5e90c7aefdebc0ee4bd3283b9 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Thu, 11 Mar 2021 14:52:56 +0100 Subject: [PATCH 680/839] vhost-vdpa: fix use-after-free of v->config_ctx When the 'v->config_ctx' eventfd_ctx reference is released we didn't set it to NULL. So if the same character device (e.g. /dev/vhost-vdpa-0) is re-opened, the 'v->config_ctx' is invalid and calling again vhost_vdpa_config_put() causes use-after-free issues like the following refcount_t underflow: refcount_t: underflow; use-after-free. WARNING: CPU: 2 PID: 872 at lib/refcount.c:28 refcount_warn_saturate+0xae/0xf0 RIP: 0010:refcount_warn_saturate+0xae/0xf0 Call Trace: eventfd_ctx_put+0x5b/0x70 vhost_vdpa_release+0xcd/0x150 [vhost_vdpa] __fput+0x8e/0x240 ____fput+0xe/0x10 task_work_run+0x66/0xa0 exit_to_user_mode_prepare+0x118/0x120 syscall_exit_to_user_mode+0x21/0x50 ? __x64_sys_close+0x12/0x40 do_syscall_64+0x45/0x50 entry_SYSCALL_64_after_hwframe+0x44/0xae Fixes: 776f395004d8 ("vhost_vdpa: Support config interrupt in vdpa") Cc: lingshan.zhu@intel.com Cc: stable@vger.kernel.org Signed-off-by: Stefano Garzarella Link: https://lore.kernel.org/r/20210311135257.109460-2-sgarzare@redhat.com Signed-off-by: Michael S. Tsirkin Reviewed-by: Zhu Lingshan Acked-by: Jason Wang --- drivers/vhost/vdpa.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index dfdd8e4a8b08e..4fc768813c4ad 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -308,8 +308,10 @@ static long vhost_vdpa_get_vring_num(struct vhost_vdpa *v, u16 __user *argp) static void vhost_vdpa_config_put(struct vhost_vdpa *v) { - if (v->config_ctx) + if (v->config_ctx) { eventfd_ctx_put(v->config_ctx); + v->config_ctx = NULL; + } } static long vhost_vdpa_set_config_call(struct vhost_vdpa *v, u32 __user *argp) -- GitLab From 0bde59c1723a29e294765c96dbe5c7fb639c2f96 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Thu, 11 Mar 2021 14:52:57 +0100 Subject: [PATCH 681/839] vhost-vdpa: set v->config_ctx to NULL if eventfd_ctx_fdget() fails In vhost_vdpa_set_config_call() if eventfd_ctx_fdget() fails the 'v->config_ctx' contains an error instead of a valid pointer. Since we consider 'v->config_ctx' valid if it is not NULL, we should set it to NULL in this case to avoid to use an invalid pointer in other functions such as vhost_vdpa_config_put(). Fixes: 776f395004d8 ("vhost_vdpa: Support config interrupt in vdpa") Cc: lingshan.zhu@intel.com Cc: stable@vger.kernel.org Signed-off-by: Stefano Garzarella Link: https://lore.kernel.org/r/20210311135257.109460-3-sgarzare@redhat.com Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vhost/vdpa.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 4fc768813c4ad..e0a27e3362935 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -331,8 +331,12 @@ static long vhost_vdpa_set_config_call(struct vhost_vdpa *v, u32 __user *argp) if (!IS_ERR_OR_NULL(ctx)) eventfd_ctx_put(ctx); - if (IS_ERR(v->config_ctx)) - return PTR_ERR(v->config_ctx); + if (IS_ERR(v->config_ctx)) { + long ret = PTR_ERR(v->config_ctx); + + v->config_ctx = NULL; + return ret; + } v->vdpa->config->set_config_cb(v->vdpa, &cb); -- GitLab From 5171317dfd9afcf729799d31fffdbb9e71e45402 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Wed, 10 Mar 2021 10:22:27 +0000 Subject: [PATCH 682/839] cifs: update new ACE pointer after populate_new_aces. After the fix for retaining externally set ACEs with cifsacl and modefromsid,idsfromsid, there was an issue in populating the inherited ACEs after setting the ACEs introduced by these two modes. Fixed this by updating the ACE pointer again after the call to populate_new_aces. Signed-off-by: Shyam Prasad N Reviewed-by: Rohith Surabattula Signed-off-by: Steve French --- fs/cifs/cifsacl.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index 9d29eb9660c28..2be22a5c690f8 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -1118,7 +1118,6 @@ static int set_chmod_dacl(struct cifs_acl *pdacl, struct cifs_acl *pndacl, /* Retain old ACEs which we can retain */ for (i = 0; i < src_num_aces; ++i) { pntace = (struct cifs_ace *) (acl_base + size); - pnntace = (struct cifs_ace *) (nacl_base + nsize); if (!new_aces_set && (pntace->flags & INHERITED_ACE)) { /* Place the new ACEs in between existing explicit and inherited */ @@ -1131,14 +1130,18 @@ static int set_chmod_dacl(struct cifs_acl *pdacl, struct cifs_acl *pndacl, } /* If it's any one of the ACE we're replacing, skip! */ - if ((compare_sids(&pntace->sid, &sid_unix_NFS_mode) == 0) || + if (!mode_from_sid && + ((compare_sids(&pntace->sid, &sid_unix_NFS_mode) == 0) || (compare_sids(&pntace->sid, pownersid) == 0) || (compare_sids(&pntace->sid, pgrpsid) == 0) || (compare_sids(&pntace->sid, &sid_everyone) == 0) || - (compare_sids(&pntace->sid, &sid_authusers) == 0)) { + (compare_sids(&pntace->sid, &sid_authusers) == 0))) { goto next_ace; } + /* update the pointer to the next ACE to populate*/ + pnntace = (struct cifs_ace *) (nacl_base + nsize); + nsize += cifs_copy_ace(pnntace, pntace, NULL); num_aces++; -- GitLab From 05946d4b7a7349ae58bfa2d51ae832e64a394c2d Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Wed, 10 Mar 2021 13:20:40 +0100 Subject: [PATCH 683/839] cifs: Fix preauth hash corruption smb311_update_preauth_hash() uses the shash in server->secmech without appropriate locking, and this can lead to sessions corrupting each other's preauth hashes. The following script can easily trigger the problem: #!/bin/sh -e NMOUNTS=10 for i in $(seq $NMOUNTS); mkdir -p /tmp/mnt$i umount /tmp/mnt$i 2>/dev/null || : done while :; do for i in $(seq $NMOUNTS); do mount -t cifs //192.168.0.1/test /tmp/mnt$i -o ... & done wait for i in $(seq $NMOUNTS); do umount /tmp/mnt$i done done Usually within seconds this leads to one or more of the mounts failing with the following errors, and a "Bad SMB2 signature for message" is seen in the server logs: CIFS: VFS: \\192.168.0.1 failed to connect to IPC (rc=-13) CIFS: VFS: cifs_mount failed w/return code = -13 Fix it by holding the server mutex just like in the other places where the shashes are used. Fixes: 8bd68c6e47abff34e4 ("CIFS: implement v3.11 preauth integrity") Signed-off-by: Vincent Whitchurch CC: Reviewed-by: Aurelien Aptel Signed-off-by: Steve French --- fs/cifs/transport.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 007d99437c771..c1725b55f3648 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -1196,9 +1196,12 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, /* * Compounding is never used during session establish. */ - if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP) || (optype & CIFS_SESS_OP)) + if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP) || (optype & CIFS_SESS_OP)) { + mutex_lock(&server->srv_mutex); smb311_update_preauth_hash(ses, rqst[0].rq_iov, rqst[0].rq_nvec); + mutex_unlock(&server->srv_mutex); + } for (i = 0; i < num_rqst; i++) { rc = wait_for_response(server, midQ[i]); @@ -1266,7 +1269,9 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, .iov_base = resp_iov[0].iov_base, .iov_len = resp_iov[0].iov_len }; + mutex_lock(&server->srv_mutex); smb311_update_preauth_hash(ses, &iov, 1); + mutex_unlock(&server->srv_mutex); } out: -- GitLab From 6c5403173a13a08ff61dbdafa4c0ed4a9dedbfe0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 12 Mar 2021 09:34:39 +0100 Subject: [PATCH 684/839] drm/ttm: make ttm_bo_unpin more defensive MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We seem to have some more driver bugs than thought. Signed-off-by: Christian König Fixes: deb0814b43f3 ("drm/ttm: add ttm_bo_pin()/ttm_bo_unpin() v2") Acked-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210312093810.2202-1-christian.koenig@amd.com --- include/drm/ttm/ttm_bo_api.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index e17be324d95f5..b8ca13664fa20 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -612,9 +612,11 @@ static inline void ttm_bo_pin(struct ttm_buffer_object *bo) static inline void ttm_bo_unpin(struct ttm_buffer_object *bo) { dma_resv_assert_held(bo->base.resv); - WARN_ON_ONCE(!bo->pin_count); WARN_ON_ONCE(!kref_read(&bo->kref)); - --bo->pin_count; + if (bo->pin_count) + --bo->pin_count; + else + WARN_ON_ONCE(true); } int ttm_mem_evict_first(struct ttm_bo_device *bdev, -- GitLab From efe814a471e0e58f28f1efaf430c8784a4f36626 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sun, 14 Mar 2021 20:57:08 +0000 Subject: [PATCH 685/839] io_uring: fix ->flags races by linked timeouts It's racy to modify req->flags from a not owning context, e.g. linked timeout calling req_set_fail_links() for the master request might race with that request setting/clearing flags while being executed concurrently. Just remove req_set_fail_links(prev) from io_link_timeout_fn(), io_async_find_and_cancel() and functions down the line take care of setting the fail bit. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 58d62dd9f8e4b..217f72d50ff5a 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -6197,7 +6197,6 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer) spin_unlock_irqrestore(&ctx->completion_lock, flags); if (prev) { - req_set_fail_links(prev); io_async_find_and_cancel(ctx, req, prev->user_data, -ETIME); io_put_req_deferred(prev, 1); } else { -- GitLab From 180f829fe4026bd192447d261e712b6cb84f6202 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sun, 14 Mar 2021 20:57:09 +0000 Subject: [PATCH 686/839] io_uring: fix complete_post use ctx after free If io_req_complete_post() put not a final ref, we can't rely on the request's ctx ref, and so ctx may potentially be freed while complete_post() is in io_cqring_ev_posted()/etc. In that case get an additional ctx reference, and put it in the end, so protecting following io_cqring_ev_posted(). And also prolong ctx lifetime until spin_unlock happens, as we do with mutexes, so added percpu_ref_get() doesn't race with ctx free. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 217f72d50ff5a..f8a683607b257 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1550,14 +1550,17 @@ static void io_req_complete_post(struct io_kiocb *req, long res, io_put_task(req->task, 1); list_add(&req->compl.list, &cs->locked_free_list); cs->locked_free_nr++; - } else - req = NULL; + } else { + if (!percpu_ref_tryget(&ctx->refs)) + req = NULL; + } io_commit_cqring(ctx); spin_unlock_irqrestore(&ctx->completion_lock, flags); - io_cqring_ev_posted(ctx); - if (req) + if (req) { + io_cqring_ev_posted(ctx); percpu_ref_put(&ctx->refs); + } } static void io_req_complete_state(struct io_kiocb *req, long res, @@ -8373,11 +8376,13 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx) { /* * Some may use context even when all refs and requests have been put, - * and they are free to do so while still holding uring_lock, see - * __io_req_task_submit(). Wait for them to finish. + * and they are free to do so while still holding uring_lock or + * completion_lock, see __io_req_task_submit(). Wait for them to finish. */ mutex_lock(&ctx->uring_lock); mutex_unlock(&ctx->uring_lock); + spin_lock_irq(&ctx->completion_lock); + spin_unlock_irq(&ctx->completion_lock); io_sq_thread_finish(ctx); io_sqe_buffers_unregister(ctx); -- GitLab From 09a6f4efaa6536e760385f949e24078fd78305ad Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sun, 14 Mar 2021 20:57:10 +0000 Subject: [PATCH 687/839] io_uring: replace sqd rw_semaphore with mutex The only user of read-locking of sqd->rw_lock is sq_thread itself, which is by definition alone, so we don't really need rw_semaphore, but mutex will do. Replace it with a mutex, and kill read-to-write upgrading and extra task_work handling in io_sq_thread(). Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 36 ++++++++++++++---------------------- 1 file changed, 14 insertions(+), 22 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index f8a683607b257..6de779aafd339 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -258,7 +258,7 @@ enum { struct io_sq_data { refcount_t refs; - struct rw_semaphore rw_lock; + struct mutex lock; /* ctx's that are using this sqd */ struct list_head ctx_list; @@ -6689,16 +6689,15 @@ static int io_sq_thread(void *data) set_cpus_allowed_ptr(current, cpu_online_mask); current->flags |= PF_NO_SETAFFINITY; - down_read(&sqd->rw_lock); - + mutex_lock(&sqd->lock); while (!test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state)) { int ret; bool cap_entries, sqt_spin, needs_sched; if (test_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state)) { - up_read(&sqd->rw_lock); + mutex_unlock(&sqd->lock); cond_resched(); - down_read(&sqd->rw_lock); + mutex_lock(&sqd->lock); io_run_task_work(); timeout = jiffies + sqd->sq_thread_idle; continue; @@ -6745,10 +6744,10 @@ static int io_sq_thread(void *data) list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) io_ring_set_wakeup_flag(ctx); - up_read(&sqd->rw_lock); + mutex_unlock(&sqd->lock); schedule(); try_to_freeze(); - down_read(&sqd->rw_lock); + mutex_lock(&sqd->lock); list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) io_ring_clear_wakeup_flag(ctx); } @@ -6756,20 +6755,13 @@ static int io_sq_thread(void *data) finish_wait(&sqd->wait, &wait); timeout = jiffies + sqd->sq_thread_idle; } - up_read(&sqd->rw_lock); - down_write(&sqd->rw_lock); - /* - * someone may have parked and added a cancellation task_work, run - * it first because we don't want it in io_uring_cancel_sqpoll() - */ - io_run_task_work(); list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) io_uring_cancel_sqpoll(ctx); sqd->thread = NULL; list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) io_ring_set_wakeup_flag(ctx); - up_write(&sqd->rw_lock); + mutex_unlock(&sqd->lock); io_run_task_work(); complete(&sqd->exited); @@ -7071,21 +7063,21 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx) } static void io_sq_thread_unpark(struct io_sq_data *sqd) - __releases(&sqd->rw_lock) + __releases(&sqd->lock) { WARN_ON_ONCE(sqd->thread == current); clear_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state); - up_write(&sqd->rw_lock); + mutex_unlock(&sqd->lock); } static void io_sq_thread_park(struct io_sq_data *sqd) - __acquires(&sqd->rw_lock) + __acquires(&sqd->lock) { WARN_ON_ONCE(sqd->thread == current); set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state); - down_write(&sqd->rw_lock); + mutex_lock(&sqd->lock); /* set again for consistency, in case concurrent parks are happening */ set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state); if (sqd->thread) @@ -7096,11 +7088,11 @@ static void io_sq_thread_stop(struct io_sq_data *sqd) { WARN_ON_ONCE(sqd->thread == current); - down_write(&sqd->rw_lock); + mutex_lock(&sqd->lock); set_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state); if (sqd->thread) wake_up_process(sqd->thread); - up_write(&sqd->rw_lock); + mutex_unlock(&sqd->lock); wait_for_completion(&sqd->exited); } @@ -7182,7 +7174,7 @@ static struct io_sq_data *io_get_sq_data(struct io_uring_params *p, refcount_set(&sqd->refs, 1); INIT_LIST_HEAD(&sqd->ctx_list); - init_rwsem(&sqd->rw_lock); + mutex_init(&sqd->lock); init_waitqueue_head(&sqd->wait); init_completion(&sqd->exited); return sqd; -- GitLab From f6d54255f4235448d4bbe442362d4caa62da97d5 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sun, 14 Mar 2021 20:57:11 +0000 Subject: [PATCH 688/839] io_uring: halt SQO submission on ctx exit io_sq_thread_finish() is called in io_ring_ctx_free(), so SQPOLL task is potentially running submitting new requests. It's not a disaster because of using a "try" variant of percpu_ref_get, but is far from nice. Remove ctx from the sqd ctx list earlier, before cancellation loop, so SQPOLL can't find it and so won't submit new requests. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index 6de779aafd339..b87012a217758 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -8564,6 +8564,14 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) io_unregister_personality(ctx, index); mutex_unlock(&ctx->uring_lock); + /* prevent SQPOLL from submitting new requests */ + if (ctx->sq_data) { + io_sq_thread_park(ctx->sq_data); + list_del_init(&ctx->sqd_list); + io_sqd_update_thread_idle(ctx->sq_data); + io_sq_thread_unpark(ctx->sq_data); + } + io_kill_timeouts(ctx, NULL, NULL); io_poll_remove_all(ctx, NULL, NULL); -- GitLab From 9e138a48345427fa42f6076396ea069cebf3c08f Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sun, 14 Mar 2021 20:57:12 +0000 Subject: [PATCH 689/839] io_uring: fix concurrent parking If io_sq_thread_park() of one task got rescheduled right after set_bit(), before it gets back to mutex_lock() there can happen park()/unpark() by another task with SQPOLL locking again and continuing running never seeing that first set_bit(SHOULD_PARK), so won't even try to put the mutex down for parking. It will get parked eventually when SQPOLL drops the lock for reschedule, but may be problematic and will get in the way of further fixes. Account number of tasks waiting for parking with a new atomic variable park_pending and adjust SHOULD_PARK accordingly. It doesn't entirely replaces SHOULD_PARK bit with this atomic var because it's convenient to have it as a bit in the state and will help to do optimisations later. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index b87012a217758..70ceb8ed59500 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -258,6 +258,7 @@ enum { struct io_sq_data { refcount_t refs; + atomic_t park_pending; struct mutex lock; /* ctx's that are using this sqd */ @@ -7067,7 +7068,13 @@ static void io_sq_thread_unpark(struct io_sq_data *sqd) { WARN_ON_ONCE(sqd->thread == current); + /* + * Do the dance but not conditional clear_bit() because it'd race with + * other threads incrementing park_pending and setting the bit. + */ clear_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state); + if (atomic_dec_return(&sqd->park_pending)) + set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state); mutex_unlock(&sqd->lock); } @@ -7076,10 +7083,9 @@ static void io_sq_thread_park(struct io_sq_data *sqd) { WARN_ON_ONCE(sqd->thread == current); + atomic_inc(&sqd->park_pending); set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state); mutex_lock(&sqd->lock); - /* set again for consistency, in case concurrent parks are happening */ - set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state); if (sqd->thread) wake_up_process(sqd->thread); } @@ -7099,6 +7105,8 @@ static void io_sq_thread_stop(struct io_sq_data *sqd) static void io_put_sq_data(struct io_sq_data *sqd) { if (refcount_dec_and_test(&sqd->refs)) { + WARN_ON_ONCE(atomic_read(&sqd->park_pending)); + io_sq_thread_stop(sqd); kfree(sqd); } @@ -7172,6 +7180,7 @@ static struct io_sq_data *io_get_sq_data(struct io_uring_params *p, if (!sqd) return ERR_PTR(-ENOMEM); + atomic_set(&sqd->park_pending, 0); refcount_set(&sqd->refs, 1); INIT_LIST_HEAD(&sqd->ctx_list); mutex_init(&sqd->lock); -- GitLab From 9b46571142e47503ed4f3ae3be5ed3968d8cb9cc Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 15 Mar 2021 14:23:07 +0000 Subject: [PATCH 690/839] io_uring: add generic callback_head helpers We already have helpers to run/add callback_head but taking ctx and working with ctx->exit_task_work. Extract generic versions of them implemented in terms of struct callback_head, it will be used later. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 62 ++++++++++++++++++++++++++++++--------------------- 1 file changed, 36 insertions(+), 26 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 70ceb8ed59500..5c2de43b99f53 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1929,17 +1929,44 @@ static int io_req_task_work_add(struct io_kiocb *req) return ret; } -static void io_req_task_work_add_fallback(struct io_kiocb *req, - task_work_func_t cb) +static bool io_run_task_work_head(struct callback_head **work_head) +{ + struct callback_head *work, *next; + bool executed = false; + + do { + work = xchg(work_head, NULL); + if (!work) + break; + + do { + next = work->next; + work->func(work); + work = next; + cond_resched(); + } while (work); + executed = true; + } while (1); + + return executed; +} + +static void io_task_work_add_head(struct callback_head **work_head, + struct callback_head *task_work) { - struct io_ring_ctx *ctx = req->ctx; struct callback_head *head; - init_task_work(&req->task_work, cb); do { - head = READ_ONCE(ctx->exit_task_work); - req->task_work.next = head; - } while (cmpxchg(&ctx->exit_task_work, head, &req->task_work) != head); + head = READ_ONCE(*work_head); + task_work->next = head; + } while (cmpxchg(work_head, head, task_work) != head); +} + +static void io_req_task_work_add_fallback(struct io_kiocb *req, + task_work_func_t cb) +{ + init_task_work(&req->task_work, cb); + io_task_work_add_head(&req->ctx->exit_task_work, &req->task_work); } static void __io_req_task_cancel(struct io_kiocb *req, int error) @@ -8471,26 +8498,9 @@ static int io_unregister_personality(struct io_ring_ctx *ctx, unsigned id) return -EINVAL; } -static bool io_run_ctx_fallback(struct io_ring_ctx *ctx) +static inline bool io_run_ctx_fallback(struct io_ring_ctx *ctx) { - struct callback_head *work, *next; - bool executed = false; - - do { - work = xchg(&ctx->exit_task_work, NULL); - if (!work) - break; - - do { - next = work->next; - work->func(work); - work = next; - cond_resched(); - } while (work); - executed = true; - } while (1); - - return executed; + return io_run_task_work_head(&ctx->exit_task_work); } struct io_tctx_exit { -- GitLab From b7f5a0bfe2061b2c7b2164de06fa4072d7373a45 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 15 Mar 2021 14:23:08 +0000 Subject: [PATCH 691/839] io_uring: fix sqpoll cancellation via task_work Running sqpoll cancellations via task_work_run() is a bad idea because it depends on other task works to be run, but those may be locked in currently running task_work_run() because of how it's (splicing the list in batches). Enqueue and run them through a separate callback head, namely struct io_sq_data::park_task_work. As a nice bonus we now precisely control where it's run, that's much safer than guessing where it can happen as it was before. Reported-by: Jens Axboe Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 5c2de43b99f53..5f4e312111ea7 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -274,6 +274,7 @@ struct io_sq_data { unsigned long state; struct completion exited; + struct callback_head *park_task_work; }; #define IO_IOPOLL_BATCH 8 @@ -6727,6 +6728,7 @@ static int io_sq_thread(void *data) cond_resched(); mutex_lock(&sqd->lock); io_run_task_work(); + io_run_task_work_head(&sqd->park_task_work); timeout = jiffies + sqd->sq_thread_idle; continue; } @@ -6781,6 +6783,7 @@ static int io_sq_thread(void *data) } finish_wait(&sqd->wait, &wait); + io_run_task_work_head(&sqd->park_task_work); timeout = jiffies + sqd->sq_thread_idle; } @@ -6792,6 +6795,7 @@ static int io_sq_thread(void *data) mutex_unlock(&sqd->lock); io_run_task_work(); + io_run_task_work_head(&sqd->park_task_work); complete(&sqd->exited); do_exit(0); } @@ -8890,7 +8894,7 @@ static void io_sqpoll_cancel_sync(struct io_ring_ctx *ctx) if (task) { init_completion(&work.completion); init_task_work(&work.task_work, io_sqpoll_cancel_cb); - WARN_ON_ONCE(task_work_add(task, &work.task_work, TWA_SIGNAL)); + io_task_work_add_head(&sqd->park_task_work, &work.task_work); wake_up_process(task); } io_sq_thread_unpark(sqd); -- GitLab From d336f7ebc65007f5831e2297e6f3383ae8dbf8ed Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 2 Mar 2021 09:32:53 -0800 Subject: [PATCH 692/839] xfs: force log and push AIL to clear pinned inodes when aborting mount If we allocate quota inodes in the process of mounting a filesystem but then decide to abort the mount, it's possible that the quota inodes are sitting around pinned by the log. Now that inode reclaim relies on the AIL to flush inodes, we have to force the log and push the AIL in between releasing the quota inodes and kicking off reclaim to tear down all the incore inodes. Do this by extracting the bits we need from the unmount path and reusing them. As an added bonus, failed writes during a failed mount will not retry forever now. This was originally found during a fuzz test of metadata directories (xfs/1546), but the actual symptom was that reclaim hung up on the quota inodes. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Reviewed-by: Dave Chinner --- fs/xfs/xfs_mount.c | 90 +++++++++++++++++++++++----------------------- 1 file changed, 44 insertions(+), 46 deletions(-) diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 52370d0a3f434..1c97b155a8ee7 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -634,6 +634,47 @@ xfs_check_summary_counts( return xfs_initialize_perag_data(mp, mp->m_sb.sb_agcount); } +/* + * Flush and reclaim dirty inodes in preparation for unmount. Inodes and + * internal inode structures can be sitting in the CIL and AIL at this point, + * so we need to unpin them, write them back and/or reclaim them before unmount + * can proceed. + * + * An inode cluster that has been freed can have its buffer still pinned in + * memory because the transaction is still sitting in a iclog. The stale inodes + * on that buffer will be pinned to the buffer until the transaction hits the + * disk and the callbacks run. Pushing the AIL will skip the stale inodes and + * may never see the pinned buffer, so nothing will push out the iclog and + * unpin the buffer. + * + * Hence we need to force the log to unpin everything first. However, log + * forces don't wait for the discards they issue to complete, so we have to + * explicitly wait for them to complete here as well. + * + * Then we can tell the world we are unmounting so that error handling knows + * that the filesystem is going away and we should error out anything that we + * have been retrying in the background. This will prevent never-ending + * retries in AIL pushing from hanging the unmount. + * + * Finally, we can push the AIL to clean all the remaining dirty objects, then + * reclaim the remaining inodes that are still in memory at this point in time. + */ +static void +xfs_unmount_flush_inodes( + struct xfs_mount *mp) +{ + xfs_log_force(mp, XFS_LOG_SYNC); + xfs_extent_busy_wait_all(mp); + flush_workqueue(xfs_discard_wq); + + mp->m_flags |= XFS_MOUNT_UNMOUNTING; + + xfs_ail_push_all_sync(mp->m_ail); + cancel_delayed_work_sync(&mp->m_reclaim_work); + xfs_reclaim_inodes(mp); + xfs_health_unmount(mp); +} + /* * This function does the following on an initial mount of a file system: * - reads the superblock from disk and init the mount struct @@ -1008,7 +1049,7 @@ xfs_mountfs( /* Clean out dquots that might be in memory after quotacheck. */ xfs_qm_unmount(mp); /* - * Cancel all delayed reclaim work and reclaim the inodes directly. + * Flush all inode reclamation work and flush the log. * We have to do this /after/ rtunmount and qm_unmount because those * two will have scheduled delayed reclaim for the rt/quota inodes. * @@ -1018,11 +1059,8 @@ xfs_mountfs( * qm_unmount_quotas and therefore rely on qm_unmount to release the * quota inodes. */ - cancel_delayed_work_sync(&mp->m_reclaim_work); - xfs_reclaim_inodes(mp); - xfs_health_unmount(mp); + xfs_unmount_flush_inodes(mp); out_log_dealloc: - mp->m_flags |= XFS_MOUNT_UNMOUNTING; xfs_log_mount_cancel(mp); out_fail_wait: if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) @@ -1063,47 +1101,7 @@ xfs_unmountfs( xfs_rtunmount_inodes(mp); xfs_irele(mp->m_rootip); - /* - * We can potentially deadlock here if we have an inode cluster - * that has been freed has its buffer still pinned in memory because - * the transaction is still sitting in a iclog. The stale inodes - * on that buffer will be pinned to the buffer until the - * transaction hits the disk and the callbacks run. Pushing the AIL will - * skip the stale inodes and may never see the pinned buffer, so - * nothing will push out the iclog and unpin the buffer. Hence we - * need to force the log here to ensure all items are flushed into the - * AIL before we go any further. - */ - xfs_log_force(mp, XFS_LOG_SYNC); - - /* - * Wait for all busy extents to be freed, including completion of - * any discard operation. - */ - xfs_extent_busy_wait_all(mp); - flush_workqueue(xfs_discard_wq); - - /* - * We now need to tell the world we are unmounting. This will allow - * us to detect that the filesystem is going away and we should error - * out anything that we have been retrying in the background. This will - * prevent neverending retries in AIL pushing from hanging the unmount. - */ - mp->m_flags |= XFS_MOUNT_UNMOUNTING; - - /* - * Flush all pending changes from the AIL. - */ - xfs_ail_push_all_sync(mp->m_ail); - - /* - * Reclaim all inodes. At this point there should be no dirty inodes and - * none should be pinned or locked. Stop background inode reclaim here - * if it is still running. - */ - cancel_delayed_work_sync(&mp->m_reclaim_work); - xfs_reclaim_inodes(mp); - xfs_health_unmount(mp); + xfs_unmount_flush_inodes(mp); xfs_qm_unmount(mp); -- GitLab From 08a204387e8063ba7375481281701137bd553dee Mon Sep 17 00:00:00 2001 From: Bhaskar Chowdhury Date: Tue, 9 Mar 2021 09:45:51 -0800 Subject: [PATCH 693/839] docs: ABI: Fix the spelling oustanding to outstanding in the file sysfs-fs-xfs s/oustanding/outstanding/ Signed-off-by: Bhaskar Chowdhury Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- Documentation/ABI/testing/sysfs-fs-xfs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/ABI/testing/sysfs-fs-xfs b/Documentation/ABI/testing/sysfs-fs-xfs index ea0cc8c42093f..f704925f6fe93 100644 --- a/Documentation/ABI/testing/sysfs-fs-xfs +++ b/Documentation/ABI/testing/sysfs-fs-xfs @@ -33,7 +33,7 @@ Contact: xfs@oss.sgi.com Description: The current state of the log write grant head. It represents the total log reservation of all currently - oustanding transactions, including regrants due to + outstanding transactions, including regrants due to rolling transactions. The grant head is exported in "cycle:bytes" format. Users: xfstests -- GitLab From 8723d5ba8bdae1c41be7a6fc8469dc9aa551e7d0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 14 Mar 2021 10:59:39 -0700 Subject: [PATCH 694/839] xfs: also reject BULKSTAT_SINGLE in a mount user namespace BULKSTAT_SINGLE exposed the ondisk uids/gids just like bulkstat, and can be called on any inode, including ones not visible in the current mount. Fixes: f736d93d76d3 ("xfs: support idmapped mounts") Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_itable.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index ca310a125d1e1..3498b97fb06d3 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -168,6 +168,12 @@ xfs_bulkstat_one( }; int error; + if (breq->mnt_userns != &init_user_ns) { + xfs_warn_ratelimited(breq->mp, + "bulkstat not supported inside of idmapped mounts."); + return -EINVAL; + } + ASSERT(breq->icount == 1); bc.buf = kmem_zalloc(sizeof(struct xfs_bulkstat), -- GitLab From d2dcc8ed8ec650a793e81d8b2222146eb6ddd84f Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 8 Mar 2021 17:20:17 +0800 Subject: [PATCH 695/839] btrfs: fix wrong offset to zero out range beyond i_size [BUG] The test generic/091 fails , with the following output: fsx -N 10000 -o 128000 -l 500000 -r PSIZE -t BSIZE -w BSIZE -Z -W mapped writes DISABLED Seed set to 1 main: filesystem does not support fallocate mode FALLOC_FL_COLLAPSE_RANGE, disabling! main: filesystem does not support fallocate mode FALLOC_FL_INSERT_RANGE, disabling! skipping zero size read truncating to largest ever: 0xe400 copying to largest ever: 0x1f400 cloning to largest ever: 0x70000 cloning to largest ever: 0x77000 fallocating to largest ever: 0x7a120 Mapped Read: non-zero data past EOF (0x3a7ff) page offset 0x800 is 0xf2e1 <<< ... [CAUSE] In commit c28ea613fafa ("btrfs: subpage: fix the false data csum mismatch error") end_bio_extent_readpage() changes to only zero the range inside the bvec for incoming subpage support. But that commit is using incorrect offset to calculate the start. For subpage, we can have a case that the whole bvec is beyond isize, thus we need to calculate the correct offset. But the offending commit is using @end (bvec end), other than @start (bvec start) to calculate the start offset. This means, we only zero the last byte of the bvec, not from the isize. This stupid bug makes the range beyond isize is not properly zeroed, and failed above test. [FIX] Use correct @start to calculate the range start. Reported-by: kernel test robot Fixes: c28ea613fafa ("btrfs: subpage: fix the false data csum mismatch error") Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 4671c99d468dd..f3d7be975c3a5 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3020,7 +3020,7 @@ readpage_ok: */ if (page->index == end_index && i_size <= end) { u32 zero_start = max(offset_in_page(i_size), - offset_in_page(end)); + offset_in_page(start)); zero_user_segment(page, zero_start, offset_in_page(end) + 1); -- GitLab From fbf48bb0b197e6894a04c714728c952af7153bf3 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 3 Mar 2021 18:41:51 +0800 Subject: [PATCH 696/839] btrfs: track qgroup released data in own variable in insert_prealloc_file_extent There is a piece of weird code in insert_prealloc_file_extent(), which looks like: ret = btrfs_qgroup_release_data(inode, file_offset, len); if (ret < 0) return ERR_PTR(ret); if (trans) { ret = insert_reserved_file_extent(trans, inode, file_offset, &stack_fi, true, ret); ... } extent_info.is_new_extent = true; extent_info.qgroup_reserved = ret; ... Note how the variable @ret is abused here, and if anyone is adding code just after btrfs_qgroup_release_data() call, it's super easy to overwrite the @ret and cause tons of qgroup related bugs. Fix such abuse by introducing new variable @qgroup_released, so that we won't reuse the existing variable @ret. Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c35b724a56116..77182be403c56 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -9873,6 +9873,7 @@ static struct btrfs_trans_handle *insert_prealloc_file_extent( struct btrfs_path *path; u64 start = ins->objectid; u64 len = ins->offset; + int qgroup_released; int ret; memset(&stack_fi, 0, sizeof(stack_fi)); @@ -9885,14 +9886,14 @@ static struct btrfs_trans_handle *insert_prealloc_file_extent( btrfs_set_stack_file_extent_compression(&stack_fi, BTRFS_COMPRESS_NONE); /* Encryption and other encoding is reserved and all 0 */ - ret = btrfs_qgroup_release_data(inode, file_offset, len); - if (ret < 0) - return ERR_PTR(ret); + qgroup_released = btrfs_qgroup_release_data(inode, file_offset, len); + if (qgroup_released < 0) + return ERR_PTR(qgroup_released); if (trans) { ret = insert_reserved_file_extent(trans, inode, file_offset, &stack_fi, - true, ret); + true, qgroup_released); if (ret) return ERR_PTR(ret); return trans; @@ -9905,7 +9906,7 @@ static struct btrfs_trans_handle *insert_prealloc_file_extent( extent_info.file_offset = file_offset; extent_info.extent_buf = (char *)&stack_fi; extent_info.is_new_extent = true; - extent_info.qgroup_reserved = ret; + extent_info.qgroup_reserved = qgroup_released; extent_info.insertions = 0; path = btrfs_alloc_path(); -- GitLab From a3ee79bd8fe17812d2305ccc4bf81bfeab395576 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 3 Mar 2021 18:41:52 +0800 Subject: [PATCH 697/839] btrfs: fix qgroup data rsv leak caused by falloc failure [BUG] When running fsstress with only falloc workload, and a very low qgroup limit set, we can get qgroup data rsv leak at unmount time. BTRFS warning (device dm-0): qgroup 0/5 has unreleased space, type 0 rsv 20480 BTRFS error (device dm-0): qgroup reserved space leaked The minimal reproducer looks like: #!/bin/bash dev=/dev/test/test mnt="/mnt/btrfs" fsstress=~/xfstests-dev/ltp/fsstress runtime=8 workload() { umount $dev &> /dev/null umount $mnt &> /dev/null mkfs.btrfs -f $dev > /dev/null mount $dev $mnt btrfs quota en $mnt btrfs quota rescan -w $mnt btrfs qgroup limit 16m 0/5 $mnt $fsstress -w -z -f creat=10 -f fallocate=10 -p 2 -n 100 \ -d $mnt -v > /tmp/fsstress umount $mnt if dmesg | grep leak ; then echo "!!! FAILED !!!" exit 1 fi } for (( i=0; i < $runtime; i++)); do echo "=== $i/$runtime===" workload done Normally it would fail before round 4. [CAUSE] In function insert_prealloc_file_extent(), we first call btrfs_qgroup_release_data() to know how many bytes are reserved for qgroup data rsv. Then use that @qgroup_released number to continue our work. But after we call btrfs_qgroup_release_data(), we should either queue @qgroup_released to delayed ref or free them manually in error path. Unfortunately, we lack the error handling to free the released bytes, leaking qgroup data rsv. All the error handling function outside won't help at all, as we have released the range, meaning in inode io tree, the EXTENT_QGROUP_RESERVED bit is already cleared, thus all btrfs_qgroup_free_data() call won't free any data rsv. [FIX] Add free_qgroup tag to manually free the released qgroup data rsv. Reported-by: Nikolay Borisov Reported-by: David Sterba Fixes: 9729f10a608f ("btrfs: inode: move qgroup reserved space release to the callers of insert_reserved_file_extent()") CC: stable@vger.kernel.org # 5.10+ Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/inode.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 77182be403c56..ea5ede619220c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -9895,7 +9895,7 @@ static struct btrfs_trans_handle *insert_prealloc_file_extent( file_offset, &stack_fi, true, qgroup_released); if (ret) - return ERR_PTR(ret); + goto free_qgroup; return trans; } @@ -9910,17 +9910,31 @@ static struct btrfs_trans_handle *insert_prealloc_file_extent( extent_info.insertions = 0; path = btrfs_alloc_path(); - if (!path) - return ERR_PTR(-ENOMEM); + if (!path) { + ret = -ENOMEM; + goto free_qgroup; + } ret = btrfs_replace_file_extents(&inode->vfs_inode, path, file_offset, file_offset + len - 1, &extent_info, &trans); btrfs_free_path(path); if (ret) - return ERR_PTR(ret); - + goto free_qgroup; return trans; + +free_qgroup: + /* + * We have released qgroup data range at the beginning of the function, + * and normally qgroup_released bytes will be freed when committing + * transaction. + * But if we error out early, we have to free what we have released + * or we leak qgroup data reservation. + */ + btrfs_qgroup_free_refroot(inode->root->fs_info, + inode->root->root_key.objectid, qgroup_released, + BTRFS_QGROUP_RSV_DATA); + return ERR_PTR(ret); } static int __btrfs_prealloc_file_range(struct inode *inode, int mode, -- GitLab From e3d3b4157610164b0ec43d968b0dfedfe7c68992 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 11 Mar 2021 15:13:30 +0000 Subject: [PATCH 698/839] btrfs: zoned: fix linked list corruption after log root tree allocation failure When using a zoned filesystem, while syncing the log, if we fail to allocate the root node for the log root tree, we are not removing the log context we allocated on stack from the list of log contexts of the log root tree. This means after the return from btrfs_sync_log() we get a corrupted linked list. Fix this by allocating the node before adding our stack allocated context to the list of log contexts of the log root tree. Fixes: 3ddebf27fcd3a9 ("btrfs: zoned: reorder log node allocation on zoned filesystem") Reviewed-by: Johannes Thumshirn Reviewed-by: Naohiro Aota Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 2f1acc9aea9ea..92a3686277918 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3169,10 +3169,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, mutex_lock(&log_root_tree->log_mutex); - index2 = log_root_tree->log_transid % 2; - list_add_tail(&root_log_ctx.list, &log_root_tree->log_ctxs[index2]); - root_log_ctx.log_transid = log_root_tree->log_transid; - if (btrfs_is_zoned(fs_info)) { if (!log_root_tree->node) { ret = btrfs_alloc_log_tree_node(trans, log_root_tree); @@ -3183,6 +3179,10 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, } } + index2 = log_root_tree->log_transid % 2; + list_add_tail(&root_log_ctx.list, &log_root_tree->log_ctxs[index2]); + root_log_ctx.log_transid = log_root_tree->log_transid; + /* * Now we are safe to update the log_root_tree because we're under the * log_mutex, and we're a current writer so we're holding the commit -- GitLab From 64fcbb6158ecc684d84c64424830a9c37c77c5b9 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 2 Mar 2021 10:26:45 +0000 Subject: [PATCH 699/839] afs: Fix accessing YFS xattrs on a non-YFS server If someone attempts to access YFS-related xattrs (e.g. afs.yfs.acl) on a file on a non-YFS AFS server (such as OpenAFS), then the kernel will jump to a NULL function pointer because the afs_fetch_acl_operation descriptor doesn't point to a function for issuing an operation on a non-YFS server[1]. Fix this by making afs_wait_for_operation() check that the issue_afs_rpc method is set before jumping to it and setting -ENOTSUPP if not. This fix also covers other potential operations that also only exist on YFS servers. afs_xattr_get/set_yfs() then need to translate -ENOTSUPP to -ENODATA as the former error is internal to the kernel. The bug shows up as an oops like the following: BUG: kernel NULL pointer dereference, address: 0000000000000000 [...] Code: Unable to access opcode bytes at RIP 0xffffffffffffffd6. [...] Call Trace: afs_wait_for_operation+0x83/0x1b0 [kafs] afs_xattr_get_yfs+0xe6/0x270 [kafs] __vfs_getxattr+0x59/0x80 vfs_getxattr+0x11c/0x140 getxattr+0x181/0x250 ? __check_object_size+0x13f/0x150 ? __fput+0x16d/0x250 __x64_sys_fgetxattr+0x64/0xb0 do_syscall_64+0x49/0xc0 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7fb120a9defe This was triggered with "cp -a" which attempts to copy xattrs, including afs ones, but is easier to reproduce with getfattr, e.g.: getfattr -d -m ".*" /afs/openafs.org/ Fixes: e49c7b2f6de7 ("afs: Build an abstraction around an "operation" concept") Reported-by: Gaja Sophie Peters Signed-off-by: David Howells Tested-by: Gaja Sophie Peters Reviewed-by: Marc Dionne Reviewed-by: Jeffrey Altman cc: linux-afs@lists.infradead.org Link: http://lists.infradead.org/pipermail/linux-afs/2021-March/003498.html [1] Link: http://lists.infradead.org/pipermail/linux-afs/2021-March/003566.html # v1 Link: http://lists.infradead.org/pipermail/linux-afs/2021-March/003572.html # v2 --- fs/afs/fs_operation.c | 7 +++++-- fs/afs/xattr.c | 8 +++++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/fs/afs/fs_operation.c b/fs/afs/fs_operation.c index 97cab12b0a6c2..71c58723763d2 100644 --- a/fs/afs/fs_operation.c +++ b/fs/afs/fs_operation.c @@ -181,10 +181,13 @@ void afs_wait_for_operation(struct afs_operation *op) if (test_bit(AFS_SERVER_FL_IS_YFS, &op->server->flags) && op->ops->issue_yfs_rpc) op->ops->issue_yfs_rpc(op); - else + else if (op->ops->issue_afs_rpc) op->ops->issue_afs_rpc(op); + else + op->ac.error = -ENOTSUPP; - op->error = afs_wait_for_call_to_complete(op->call, &op->ac); + if (op->call) + op->error = afs_wait_for_call_to_complete(op->call, &op->ac); } switch (op->error) { diff --git a/fs/afs/xattr.c b/fs/afs/xattr.c index c629caae50028..4934e325a14a8 100644 --- a/fs/afs/xattr.c +++ b/fs/afs/xattr.c @@ -231,6 +231,8 @@ static int afs_xattr_get_yfs(const struct xattr_handler *handler, else ret = -ERANGE; } + } else if (ret == -ENOTSUPP) { + ret = -ENODATA; } error_yacl: @@ -256,6 +258,7 @@ static int afs_xattr_set_yfs(const struct xattr_handler *handler, { struct afs_operation *op; struct afs_vnode *vnode = AFS_FS_I(inode); + int ret; if (flags == XATTR_CREATE || strcmp(name, "acl") != 0) @@ -270,7 +273,10 @@ static int afs_xattr_set_yfs(const struct xattr_handler *handler, return afs_put_operation(op); op->ops = &yfs_store_opaque_acl2_operation; - return afs_do_sync_operation(op); + ret = afs_do_sync_operation(op); + if (ret == -ENOTSUPP) + ret = -ENODATA; + return ret; } static const struct xattr_handler afs_xattr_yfs_handler = { -- GitLab From a7889c6320b9200e3fe415238f546db677310fa9 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 9 Mar 2021 08:27:39 +0000 Subject: [PATCH 700/839] afs: Stop listxattr() from listing "afs.*" attributes afs_listxattr() lists all the available special afs xattrs (i.e. those in the "afs.*" space), no matter what type of server we're dealing with. But OpenAFS servers, for example, cannot deal with some of the extra-capable attributes that AuriStor (YFS) servers provide. Unfortunately, the presence of the afs.yfs.* attributes causes errors[1] for anything that tries to read them if the server is of the wrong type. Fix the problem by removing afs_listxattr() so that none of the special xattrs are listed (AFS doesn't support xattrs). It does mean, however, that getfattr won't list them, though they can still be accessed with getxattr() and setxattr(). This can be tested with something like: getfattr -d -m ".*" /afs/example.com/path/to/file With this change, none of the afs.* attributes should be visible. Changes: ver #2: - Hide all of the afs.* xattrs, not just the ACL ones. Fixes: ae46578b963f ("afs: Get YFS ACLs and information through xattrs") Reported-by: Gaja Sophie Peters Signed-off-by: David Howells Tested-by: Gaja Sophie Peters Reviewed-by: Jeffrey Altman Reviewed-by: Marc Dionne cc: linux-afs@lists.infradead.org Link: http://lists.infradead.org/pipermail/linux-afs/2021-March/003502.html [1] Link: http://lists.infradead.org/pipermail/linux-afs/2021-March/003567.html # v1 Link: http://lists.infradead.org/pipermail/linux-afs/2021-March/003573.html # v2 --- fs/afs/dir.c | 1 - fs/afs/file.c | 1 - fs/afs/inode.c | 1 - fs/afs/internal.h | 1 - fs/afs/mntpt.c | 1 - fs/afs/xattr.c | 23 ----------------------- 6 files changed, 28 deletions(-) diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 714fcca9af990..17548c1faf029 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -70,7 +70,6 @@ const struct inode_operations afs_dir_inode_operations = { .permission = afs_permission, .getattr = afs_getattr, .setattr = afs_setattr, - .listxattr = afs_listxattr, }; const struct address_space_operations afs_dir_aops = { diff --git a/fs/afs/file.c b/fs/afs/file.c index 85f5adf21aa08..960b64268623e 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -43,7 +43,6 @@ const struct inode_operations afs_file_inode_operations = { .getattr = afs_getattr, .setattr = afs_setattr, .permission = afs_permission, - .listxattr = afs_listxattr, }; const struct address_space_operations afs_fs_aops = { diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 1156b2df28d36..12be88716e4c9 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -27,7 +27,6 @@ static const struct inode_operations afs_symlink_inode_operations = { .get_link = page_get_link, - .listxattr = afs_listxattr, }; static noinline void dump_vnode(struct afs_vnode *vnode, struct afs_vnode *parent_vnode) diff --git a/fs/afs/internal.h b/fs/afs/internal.h index b626e38e9ab5d..1627b18728125 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -1509,7 +1509,6 @@ extern int afs_launder_page(struct page *); * xattr.c */ extern const struct xattr_handler *afs_xattr_handlers[]; -extern ssize_t afs_listxattr(struct dentry *, char *, size_t); /* * yfsclient.c diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index 052dab2f5c03a..bbb2c210d139d 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c @@ -32,7 +32,6 @@ const struct inode_operations afs_mntpt_inode_operations = { .lookup = afs_mntpt_lookup, .readlink = page_readlink, .getattr = afs_getattr, - .listxattr = afs_listxattr, }; const struct inode_operations afs_autocell_inode_operations = { diff --git a/fs/afs/xattr.c b/fs/afs/xattr.c index 4934e325a14a8..7751b0b3f81d4 100644 --- a/fs/afs/xattr.c +++ b/fs/afs/xattr.c @@ -11,29 +11,6 @@ #include #include "internal.h" -static const char afs_xattr_list[] = - "afs.acl\0" - "afs.cell\0" - "afs.fid\0" - "afs.volume\0" - "afs.yfs.acl\0" - "afs.yfs.acl_inherited\0" - "afs.yfs.acl_num_cleaned\0" - "afs.yfs.vol_acl"; - -/* - * Retrieve a list of the supported xattrs. - */ -ssize_t afs_listxattr(struct dentry *dentry, char *buffer, size_t size) -{ - if (size == 0) - return sizeof(afs_xattr_list); - if (size < sizeof(afs_xattr_list)) - return -ERANGE; - memcpy(buffer, afs_xattr_list, sizeof(afs_xattr_list)); - return sizeof(afs_xattr_list); -} - /* * Deal with the result of a successful fetch ACL operation. */ -- GitLab From 73076790e25717b7d452c2eab0bfb118826e5b61 Mon Sep 17 00:00:00 2001 From: Sung Lee Date: Thu, 4 Feb 2021 08:21:15 -0500 Subject: [PATCH 701/839] drm/amd/display: Copy over soc values before bounding box creation [Why] With certain fclock overclocks, state 1 may be chosen as the closest clock level. This may result in this state being empty if not populated beforehand, resulting in black screens and screen corruption. [How] Copy over all soc states to clock_limits before bounding box creation to avoid any cases with empty states. Fixes: f2459c52c84449 ("drm/amd/display: Add Bounding Box State for Low DF PState but High Voltage State") Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1514 Signed-off-by: Sung Lee Reviewed-by: Tony Cheng Reviewed-by: Yongqiang Sun Acked-by: Qingqing Zhuo Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c index 173488ab787ac..4a3df13c9e49a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c @@ -1595,6 +1595,11 @@ static void update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param dcn2_1_soc.num_chans = bw_params->num_channels; ASSERT(clk_table->num_entries); + /* Copy dcn2_1_soc.clock_limits to clock_limits to avoid copying over null states later */ + for (i = 0; i < dcn2_1_soc.num_states + 1; i++) { + clock_limits[i] = dcn2_1_soc.clock_limits[i]; + } + for (i = 0; i < clk_table->num_entries; i++) { /* loop backwards*/ for (closest_clk_lvl = 0, j = dcn2_1_soc.num_states - 1; j >= 0; j--) { -- GitLab From c79f01b6eb5dc708573002fb3ba270918bcd1d32 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Wed, 10 Mar 2021 14:26:48 +0100 Subject: [PATCH 702/839] s390/cpumf: disable preemption when accessing per-cpu variable The following BUG message was triggered repeatedly when complete counter sets are extracted from the CPUMF: BUG: using smp_processor_id() in preemptible [00000000] code: psvc-readsets/7759 caller is cf_diag_needspace+0x2c/0x100 CPU: 7 PID: 7759 Comm: psvc-readsets Not tainted 5.12.0 Hardware name: IBM 3906 M03 703 (LPAR) Call Trace: [<00000000c7043f78>] show_stack+0x90/0xf8 [<00000000c705776a>] dump_stack+0xba/0x108 [<00000000c705d91c>] check_preemption_disabled+0xec/0xf0 [<00000000c63eb1c4>] cf_diag_needspace+0x2c/0x100 [<00000000c63ecbcc>] cf_diag_ioctl_start+0x10c/0x240 [<00000000c63ece9a>] cf_diag_ioctl+0x19a/0x238 [<00000000c675f3f4>] __s390x_sys_ioctl+0xc4/0x100 [<00000000c63ca762>] do_syscall+0x82/0xd0 [<00000000c705bdd8>] __do_syscall+0xc0/0xd8 [<00000000c706d532>] system_call+0x72/0x98 2 locks held by psvc-readsets/7759: #0: 00000000c75a57c0 (cpu_hotplug_lock){++++}-{0:0}, at: cf_diag_ioctl+0x44/0x238 #1: 00000000c75a3078 (cf_diag_ctrset_mutex){+.+.}-{3:3}, at: cf_diag_ioctl+0x54/0x238 This issue is a missing get_cpu_ptr/put_cpu_ptr pair in function cf_diag_needspace. Add it. Fixes: cf6acb8bdb1d ("s390/cpumf: Add support for complete counter set extraction") Reviewed-by: Heiko Carstens Signed-off-by: Thomas Richter Signed-off-by: Heiko Carstens --- arch/s390/kernel/perf_cpum_cf_diag.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/s390/kernel/perf_cpum_cf_diag.c b/arch/s390/kernel/perf_cpum_cf_diag.c index bc302b86ce28f..2e3e7edbe3a0c 100644 --- a/arch/s390/kernel/perf_cpum_cf_diag.c +++ b/arch/s390/kernel/perf_cpum_cf_diag.c @@ -968,7 +968,7 @@ static int cf_diag_all_start(void) */ static size_t cf_diag_needspace(unsigned int sets) { - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + struct cpu_cf_events *cpuhw = get_cpu_ptr(&cpu_cf_events); size_t bytes = 0; int i; @@ -984,6 +984,7 @@ static size_t cf_diag_needspace(unsigned int sets) sizeof(((struct s390_ctrset_cpudata *)0)->no_sets)); debug_sprintf_event(cf_diag_dbg, 5, "%s bytes %ld\n", __func__, bytes); + put_cpu_ptr(&cpu_cf_events); return bytes; } -- GitLab From d54cb7d54877d529bc1e0e1f47a3dd082f73add3 Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Wed, 10 Mar 2021 14:23:37 +0100 Subject: [PATCH 703/839] s390/vtime: fix increased steal time accounting Commit 152e9b8676c6e ("s390/vtime: steal time exponential moving average") inadvertently changed the input value for account_steal_time() from "cputime_to_nsecs(steal)" to just "steal", resulting in broken increased steal time accounting. Fix this by changing it back to "cputime_to_nsecs(steal)". Fixes: 152e9b8676c6e ("s390/vtime: steal time exponential moving average") Cc: # 5.1 Reported-by: Sabine Forkel Reviewed-by: Heiko Carstens Signed-off-by: Gerald Schaefer Signed-off-by: Heiko Carstens --- arch/s390/kernel/vtime.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 73c7afcc0527a..f216a1b2f8257 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -214,7 +214,7 @@ void vtime_flush(struct task_struct *tsk) avg_steal = S390_lowcore.avg_steal_timer / 2; if ((s64) steal > 0) { S390_lowcore.steal_timer = 0; - account_steal_time(steal); + account_steal_time(cputime_to_nsecs(steal)); avg_steal += steal; } S390_lowcore.avg_steal_timer = avg_steal; -- GitLab From 0b13525c20febcfecccf6fc1db5969727401317d Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Wed, 10 Mar 2021 13:46:26 +0100 Subject: [PATCH 704/839] s390/pci: fix leak of PCI device structure In commit 05bc1be6db4b2 ("s390/pci: create zPCI bus") we removed the pci_dev_put() call matching the earlier pci_get_slot() done as part of __zpci_event_availability(). This was based on the wrong understanding that the device_put() done as part of pci_destroy_device() would counter the pci_get_slot() when it only counters the initial reference. This same understanding and existing bad example also lead to not doing a pci_dev_put() in zpci_remove_device(). Since releasing the PCI devices, unlike releasing the PCI slot, does not print any debug message for testing I added one in pci_release_dev(). This revealed that we are indeed leaking the PCI device on PCI hotunplug. Further testing also revealed another missing pci_dev_put() in disable_slot(). Fix this by adding the missing pci_dev_put() in disable_slot() and fix zpci_remove_device() with the correct pci_dev_put() calls. Also instead of calling pci_get_slot() in __zpci_event_availability() to determine if a PCI device is registered and then doing the same again in zpci_remove_device() do this once in zpci_remove_device() which makes sure that the pdev in __zpci_event_availability() is only used for the result of pci_scan_single_device() which does not need a reference count decremnt as its ownership goes to the PCI bus. Also move the check if zdev->zbus->bus is set into zpci_remove_device() since it may be that we're removing a device with devfn != 0 which never had a PCI bus. So we can still set the pdev->error_state to indicate that the device is not usable anymore, add a flag to set the error state. Fixes: 05bc1be6db4b2 ("s390/pci: create zPCI bus") Cc: # 5.8+: e1bff843cde6 s390/pci: remove superfluous zdev->zbus check Cc: # 5.8+: ba764dd703fe s390/pci: refactor zpci_create_device() Cc: # 5.8+ Reviewed-by: Matthew Rosato Signed-off-by: Niklas Schnelle Signed-off-by: Heiko Carstens --- arch/s390/include/asm/pci.h | 2 +- arch/s390/pci/pci.c | 28 ++++++++++++++++++++++++---- arch/s390/pci/pci_event.c | 18 ++++++------------ drivers/pci/hotplug/s390_pci_hpc.c | 3 ++- 4 files changed, 33 insertions(+), 18 deletions(-) diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 053fe8b8dec71..a75d94a9bcb2f 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -202,7 +202,7 @@ extern unsigned int s390_pci_no_rid; ----------------------------------------------------------------------------- */ /* Base stuff */ int zpci_create_device(u32 fid, u32 fh, enum zpci_state state); -void zpci_remove_device(struct zpci_dev *zdev); +void zpci_remove_device(struct zpci_dev *zdev, bool set_error); int zpci_enable_device(struct zpci_dev *); int zpci_disable_device(struct zpci_dev *); int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64); diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 600881d894dd0..91064077526df 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -682,16 +682,36 @@ int zpci_disable_device(struct zpci_dev *zdev) } EXPORT_SYMBOL_GPL(zpci_disable_device); -void zpci_remove_device(struct zpci_dev *zdev) +/* zpci_remove_device - Removes the given zdev from the PCI core + * @zdev: the zdev to be removed from the PCI core + * @set_error: if true the device's error state is set to permanent failure + * + * Sets a zPCI device to a configured but offline state; the zPCI + * device is still accessible through its hotplug slot and the zPCI + * API but is removed from the common code PCI bus, making it + * no longer available to drivers. + */ +void zpci_remove_device(struct zpci_dev *zdev, bool set_error) { struct zpci_bus *zbus = zdev->zbus; struct pci_dev *pdev; + if (!zdev->zbus->bus) + return; + pdev = pci_get_slot(zbus->bus, zdev->devfn); if (pdev) { - if (pdev->is_virtfn) - return zpci_iov_remove_virtfn(pdev, zdev->vfn); + if (set_error) + pdev->error_state = pci_channel_io_perm_failure; + if (pdev->is_virtfn) { + zpci_iov_remove_virtfn(pdev, zdev->vfn); + /* balance pci_get_slot */ + pci_dev_put(pdev); + return; + } pci_stop_and_remove_bus_device_locked(pdev); + /* balance pci_get_slot */ + pci_dev_put(pdev); } } @@ -765,7 +785,7 @@ void zpci_release_device(struct kref *kref) struct zpci_dev *zdev = container_of(kref, struct zpci_dev, kref); if (zdev->zbus->bus) - zpci_remove_device(zdev); + zpci_remove_device(zdev, false); switch (zdev->state) { case ZPCI_FN_STATE_ONLINE: diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index b4162da4e8a20..ac0c65cdd69d9 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -76,13 +76,10 @@ void zpci_event_error(void *data) static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) { struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid); - struct pci_dev *pdev = NULL; enum zpci_state state; + struct pci_dev *pdev; int ret; - if (zdev && zdev->zbus->bus) - pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn); - zpci_err("avail CCDF:\n"); zpci_err_hex(ccdf, sizeof(*ccdf)); @@ -124,8 +121,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) case 0x0303: /* Deconfiguration requested */ if (!zdev) break; - if (pdev) - zpci_remove_device(zdev); + zpci_remove_device(zdev, false); ret = zpci_disable_device(zdev); if (ret) @@ -140,12 +136,10 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) case 0x0304: /* Configured -> Standby|Reserved */ if (!zdev) break; - if (pdev) { - /* Give the driver a hint that the function is - * already unusable. */ - pdev->error_state = pci_channel_io_perm_failure; - zpci_remove_device(zdev); - } + /* Give the driver a hint that the function is + * already unusable. + */ + zpci_remove_device(zdev, true); zdev->fh = ccdf->fh; zpci_disable_device(zdev); diff --git a/drivers/pci/hotplug/s390_pci_hpc.c b/drivers/pci/hotplug/s390_pci_hpc.c index c9e790c74051f..a047c421debe2 100644 --- a/drivers/pci/hotplug/s390_pci_hpc.c +++ b/drivers/pci/hotplug/s390_pci_hpc.c @@ -93,8 +93,9 @@ static int disable_slot(struct hotplug_slot *hotplug_slot) pci_dev_put(pdev); return -EBUSY; } + pci_dev_put(pdev); - zpci_remove_device(zdev); + zpci_remove_device(zdev, false); rc = zpci_disable_device(zdev); if (rc) -- GitLab From b95bc12e0412d14d5fc764f0b82631c7bcaf1959 Mon Sep 17 00:00:00 2001 From: Xiaoliang Yu Date: Tue, 16 Mar 2021 00:49:00 +0800 Subject: [PATCH 705/839] ALSA: hda/realtek: apply pin quirk for XiaomiNotebook Pro Built-in microphone and combojack on Xiaomi Notebook Pro (1d72:1701) needs to be fixed, the existing quirk for Dell works well on that machine. Signed-off-by: Xiaoliang Yu Cc: Link: https://lore.kernel.org/r/OS0P286MB02749B9E13920E6899902CD8EE6C9@OS0P286MB0274.JPNP286.PROD.OUTLOOK.COM Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index c6d916177d5cd..345a447aba813 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -8242,6 +8242,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1b35, 0x1237, "CZC L101", ALC269_FIXUP_CZC_L101), SND_PCI_QUIRK(0x1b7d, 0xa831, "Ordissimo EVE2 ", ALC269VB_FIXUP_ORDISSIMO_EVE2), /* Also known as Malata PC-B1303 */ SND_PCI_QUIRK(0x1d72, 0x1602, "RedmiBook", ALC255_FIXUP_XIAOMI_HEADSET_MIC), + SND_PCI_QUIRK(0x1d72, 0x1701, "XiaomiNotebook Pro", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1d72, 0x1901, "RedmiBook 14", ALC256_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1d72, 0x1947, "RedmiBook Air", ALC255_FIXUP_XIAOMI_HEADSET_MIC), SND_PCI_QUIRK(0x10ec, 0x118c, "Medion EE4254 MD62100", ALC256_FIXUP_MEDION_HEADSET_NO_PRESENCE), -- GitLab From febb0cc847e7efd0b2b6cabf9f0e82b13dbadbec Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Wed, 10 Mar 2021 19:22:12 -0600 Subject: [PATCH 706/839] scsi: ibmvfc: Free channel_setup_buf during device tear down The buffer for negotiating channel setup is DMA allocated at device probe time. However, the remove path fails to free this allocation which will prevent the hypervisor from releasing the virtual device in the case of a hotplug remove. Fix this issue by freeing the buffer allocation in ibmvfc_free_mem(). Link: https://lore.kernel.org/r/20210311012212.428068-1-tyreld@linux.ibm.com Fixes: e95eef3fc0bc ("scsi: ibmvfc: Implement channel enquiry and setup commands") Signed-off-by: Tyrel Datwyler Signed-off-by: Martin K. Petersen --- drivers/scsi/ibmvscsi/ibmvfc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index 338e29d3e025f..6a92891ac488d 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -5784,6 +5784,8 @@ static void ibmvfc_free_mem(struct ibmvfc_host *vhost) vhost->disc_buf_dma); dma_free_coherent(vhost->dev, sizeof(*vhost->login_buf), vhost->login_buf, vhost->login_buf_dma); + dma_free_coherent(vhost->dev, sizeof(*vhost->channel_setup_buf), + vhost->channel_setup_buf, vhost->channel_setup_dma); dma_pool_destroy(vhost->sg_pool); ibmvfc_free_queue(vhost, async_q); LEAVE; -- GitLab From 2bb817712e2f77486d6ee17e7efaf91997a685f8 Mon Sep 17 00:00:00 2001 From: Lv Yunlong Date: Wed, 10 Mar 2021 22:30:05 -0800 Subject: [PATCH 707/839] scsi: myrs: Fix a double free in myrs_cleanup() In myrs_cleanup(), cs->mmio_base will be freed twice by iounmap(). Link: https://lore.kernel.org/r/20210311063005.9963-1-lyl2019@mail.ustc.edu.cn Fixes: 77266186397c ("scsi: myrs: Add Mylex RAID controller (SCSI interface)") Reviewed-by: Hannes Reinecke Signed-off-by: Lv Yunlong Signed-off-by: Martin K. Petersen --- drivers/scsi/myrs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/myrs.c b/drivers/scsi/myrs.c index 4adf9ded296aa..329fd025c7189 100644 --- a/drivers/scsi/myrs.c +++ b/drivers/scsi/myrs.c @@ -2273,12 +2273,12 @@ static void myrs_cleanup(struct myrs_hba *cs) if (cs->mmio_base) { cs->disable_intr(cs); iounmap(cs->mmio_base); + cs->mmio_base = NULL; } if (cs->irq) free_irq(cs->irq, cs); if (cs->io_addr) release_region(cs->io_addr, 0x80); - iounmap(cs->mmio_base); pci_set_drvdata(pdev, NULL); pci_disable_device(pdev); scsi_host_put(cs->host); -- GitLab From c8c165dea4c8f5ad67b1240861e4f6c5395fa4ac Mon Sep 17 00:00:00 2001 From: Lv Yunlong Date: Wed, 10 Mar 2021 22:46:36 -0800 Subject: [PATCH 708/839] scsi: st: Fix a use after free in st_open() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In st_open(), if STp->in_use is true, STp will be freed by scsi_tape_put(). However, STp is still used by DEBC_printk() after. It is better to DEBC_printk() before scsi_tape_put(). Link: https://lore.kernel.org/r/20210311064636.10522-1-lyl2019@mail.ustc.edu.cn Acked-by: Kai Mäkisara Signed-off-by: Lv Yunlong Signed-off-by: Martin K. Petersen --- drivers/scsi/st.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c index 841ad2fc369a0..9ca536aae7849 100644 --- a/drivers/scsi/st.c +++ b/drivers/scsi/st.c @@ -1269,8 +1269,8 @@ static int st_open(struct inode *inode, struct file *filp) spin_lock(&st_use_lock); if (STp->in_use) { spin_unlock(&st_use_lock); - scsi_tape_put(STp); DEBC_printk(STp, "Device already in use.\n"); + scsi_tape_put(STp); return (-EBUSY); } -- GitLab From 5999b9e5b1f8a2f5417b755130919b3ac96f5550 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sun, 14 Mar 2021 18:32:46 +0300 Subject: [PATCH 709/839] scsi: qla2xxx: Fix broken #endif placement Only half of the file is under include guard because terminating #endif is placed too early. Link: https://lore.kernel.org/r/YE4snvoW1SuwcXAn@localhost.localdomain Reviewed-by: Himanshu Madhani Signed-off-by: Alexey Dobriyan Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_target.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h index 10e5e6c8087dc..01620f3eab39f 100644 --- a/drivers/scsi/qla2xxx/qla_target.h +++ b/drivers/scsi/qla2xxx/qla_target.h @@ -116,7 +116,6 @@ (min(1270, ((ql) > 0) ? (QLA_TGT_DATASEGS_PER_CMD_24XX + \ QLA_TGT_DATASEGS_PER_CONT_24XX*((ql) - 1)) : 0)) #endif -#endif #define GET_TARGET_ID(ha, iocb) ((HAS_EXTENDED_IDS(ha)) \ ? le16_to_cpu((iocb)->u.isp2x.target.extended) \ @@ -244,6 +243,7 @@ struct ctio_to_2xxx { #ifndef CTIO_RET_TYPE #define CTIO_RET_TYPE 0x17 /* CTIO return entry */ #define ATIO_TYPE7 0x06 /* Accept target I/O entry for 24xx */ +#endif struct fcp_hdr { uint8_t r_ctl; -- GitLab From 19f1bc7edf0f97186810e13a88f5b62069d89097 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 12 Mar 2021 10:42:11 +0300 Subject: [PATCH 710/839] scsi: lpfc: Fix some error codes in debugfs If copy_from_user() or kstrtoull() fail then the correct behavior is to return a negative error code. Link: https://lore.kernel.org/r/YEsbU/UxYypVrC7/@mwanda Fixes: f9bb2da11db8 ("[SCSI] lpfc 8.3.27: T10 additions for SLI4") Signed-off-by: Dan Carpenter Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_debugfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c index bc79a017e1a21..46a8f2d1d2b83 100644 --- a/drivers/scsi/lpfc/lpfc_debugfs.c +++ b/drivers/scsi/lpfc/lpfc_debugfs.c @@ -2421,7 +2421,7 @@ lpfc_debugfs_dif_err_write(struct file *file, const char __user *buf, memset(dstbuf, 0, 33); size = (nbytes < 32) ? nbytes : 32; if (copy_from_user(dstbuf, buf, size)) - return 0; + return -EFAULT; if (dent == phba->debug_InjErrLBA) { if ((dstbuf[0] == 'o') && (dstbuf[1] == 'f') && @@ -2430,7 +2430,7 @@ lpfc_debugfs_dif_err_write(struct file *file, const char __user *buf, } if ((tmp == 0) && (kstrtoull(dstbuf, 0, &tmp))) - return 0; + return -EINVAL; if (dent == phba->debug_writeGuard) phba->lpfc_injerr_wgrd_cnt = (uint32_t)tmp; -- GitLab From ca6883393f0fa7f13ec8b860dbcef423a759c4a2 Mon Sep 17 00:00:00 2001 From: Jeremy Szu Date: Tue, 16 Mar 2021 14:54:50 +0800 Subject: [PATCH 711/839] ALSA: hda/realtek: fix mute/micmute LEDs for HP 840 G8 The HP EliteBook 840 G8 Notebook PC is using ALC285 codec which is using 0x04 to control mute LED and 0x01 to control micmute LED. Therefore, add a quirk to make it works. Signed-off-by: Jeremy Szu Cc: Link: https://lore.kernel.org/r/20210316065452.75659-1-jeremy.szu@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 345a447aba813..f06bcb1ac4f95 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -8048,6 +8048,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x87f4, "HP", ALC287_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x87f5, "HP", ALC287_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x87f7, "HP Spectre x360 14", ALC245_FIXUP_HP_X360_AMP), + SND_PCI_QUIRK(0x103c, 0x884c, "HP EliteBook 840 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300), SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), -- GitLab From e7d66cf799390166e90f9a5715f2eede4fe06d51 Mon Sep 17 00:00:00 2001 From: Jeremy Szu Date: Tue, 16 Mar 2021 15:46:24 +0800 Subject: [PATCH 712/839] ALSA: hda/realtek: fix mute/micmute LEDs for HP 440 G8 The HP EliteBook 840 G8 Notebook PC is using ALC236 codec which is using 0x02 to control mute LED and 0x01 to control micmute LED. Therefore, add a quirk to make it works. Signed-off-by: Jeremy Szu Cc: Link: https://lore.kernel.org/r/20210316074626.79895-1-jeremy.szu@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index f06bcb1ac4f95..145d6cf4939e3 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4225,6 +4225,12 @@ static void alc_fixup_hp_gpio_led(struct hda_codec *codec, } } +static void alc236_fixup_hp_gpio_led(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + alc_fixup_hp_gpio_led(codec, action, 0x02, 0x01); +} + static void alc269_fixup_hp_gpio_led(struct hda_codec *codec, const struct hda_fixup *fix, int action) { @@ -6381,6 +6387,7 @@ enum { ALC294_FIXUP_ASUS_GX502_VERBS, ALC285_FIXUP_HP_GPIO_LED, ALC285_FIXUP_HP_MUTE_LED, + ALC236_FIXUP_HP_GPIO_LED, ALC236_FIXUP_HP_MUTE_LED, ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET, ALC295_FIXUP_ASUS_MIC_NO_PRESENCE, @@ -7616,6 +7623,10 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc285_fixup_hp_mute_led, }, + [ALC236_FIXUP_HP_GPIO_LED] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc236_fixup_hp_gpio_led, + }, [ALC236_FIXUP_HP_MUTE_LED] = { .type = HDA_FIXUP_FUNC, .v.func = alc236_fixup_hp_mute_led, @@ -8045,6 +8056,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8783, "HP ZBook Fury 15 G7 Mobile Workstation", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x87c8, "HP", ALC287_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x87e5, "HP ProBook 440 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x87f4, "HP", ALC287_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x87f5, "HP", ALC287_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x87f7, "HP Spectre x360 14", ALC245_FIXUP_HP_X360_AMP), -- GitLab From 1601ea068b886da1f8f8d4e18b9403e9e24adef6 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 15 Mar 2021 12:43:55 +0900 Subject: [PATCH 713/839] zonefs: prevent use of seq files as swap file The sequential write constraint of sequential zone file prevent their use as swap files. Only allow conventional zone files to be used as swap files. Fixes: 8dcc1a9d90c1 ("fs: New zonefs file system") Cc: Reviewed-by: Johannes Thumshirn Signed-off-by: Damien Le Moal --- fs/zonefs/super.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index 0fe76f376dee2..a3d074f986604 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -165,6 +165,21 @@ static int zonefs_writepages(struct address_space *mapping, return iomap_writepages(mapping, wbc, &wpc, &zonefs_writeback_ops); } +static int zonefs_swap_activate(struct swap_info_struct *sis, + struct file *swap_file, sector_t *span) +{ + struct inode *inode = file_inode(swap_file); + struct zonefs_inode_info *zi = ZONEFS_I(inode); + + if (zi->i_ztype != ZONEFS_ZTYPE_CNV) { + zonefs_err(inode->i_sb, + "swap file: not a conventional zone file\n"); + return -EINVAL; + } + + return iomap_swapfile_activate(sis, swap_file, span, &zonefs_iomap_ops); +} + static const struct address_space_operations zonefs_file_aops = { .readpage = zonefs_readpage, .readahead = zonefs_readahead, @@ -177,6 +192,7 @@ static const struct address_space_operations zonefs_file_aops = { .is_partially_uptodate = iomap_is_partially_uptodate, .error_remove_page = generic_error_remove_page, .direct_IO = noop_direct_IO, + .swap_activate = zonefs_swap_activate, }; static void zonefs_update_stats(struct inode *inode, loff_t new_isize) -- GitLab From ebfd68cd0c1e81267c757332385cb96df30dacce Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 10 Mar 2021 15:20:28 +0900 Subject: [PATCH 714/839] zonefs: Fix O_APPEND async write handling zonefs updates the size of a sequential zone file inode only on completion of direct writes. When executing asynchronous append writes (with a file open with O_APPEND or using RWF_APPEND), the use of the current inode size in generic_write_checks() to set an iocb offset thus leads to unaligned write if an application issues an append write operation with another write already being executed. Fix this problem by introducing zonefs_write_checks() as a modified version of generic_write_checks() using the file inode wp_offset for an append write iocb offset. Also introduce zonefs_write_check_limits() to replace generic_write_check_limits() call. This zonefs special helper makes sure that the maximum file limit used is the maximum size of the file being accessed. Since zonefs_write_checks() already truncates the iov_iter, the calls to iov_iter_truncate() in zonefs_file_dio_write() and zonefs_file_buffered_write() are removed. Fixes: 8dcc1a9d90c1 ("fs: New zonefs file system") Cc: Reviewed-by: Johannes Thumshirn Signed-off-by: Damien Le Moal --- fs/zonefs/super.c | 78 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 68 insertions(+), 10 deletions(-) diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index a3d074f986604..3427c99abb4d5 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -743,6 +743,68 @@ out_release: return ret; } +/* + * Do not exceed the LFS limits nor the file zone size. If pos is under the + * limit it becomes a short access. If it exceeds the limit, return -EFBIG. + */ +static loff_t zonefs_write_check_limits(struct file *file, loff_t pos, + loff_t count) +{ + struct inode *inode = file_inode(file); + struct zonefs_inode_info *zi = ZONEFS_I(inode); + loff_t limit = rlimit(RLIMIT_FSIZE); + loff_t max_size = zi->i_max_size; + + if (limit != RLIM_INFINITY) { + if (pos >= limit) { + send_sig(SIGXFSZ, current, 0); + return -EFBIG; + } + count = min(count, limit - pos); + } + + if (!(file->f_flags & O_LARGEFILE)) + max_size = min_t(loff_t, MAX_NON_LFS, max_size); + + if (unlikely(pos >= max_size)) + return -EFBIG; + + return min(count, max_size - pos); +} + +static ssize_t zonefs_write_checks(struct kiocb *iocb, struct iov_iter *from) +{ + struct file *file = iocb->ki_filp; + struct inode *inode = file_inode(file); + struct zonefs_inode_info *zi = ZONEFS_I(inode); + loff_t count; + + if (IS_SWAPFILE(inode)) + return -ETXTBSY; + + if (!iov_iter_count(from)) + return 0; + + if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT)) + return -EINVAL; + + if (iocb->ki_flags & IOCB_APPEND) { + if (zi->i_ztype != ZONEFS_ZTYPE_SEQ) + return -EINVAL; + mutex_lock(&zi->i_truncate_mutex); + iocb->ki_pos = zi->i_wpoffset; + mutex_unlock(&zi->i_truncate_mutex); + } + + count = zonefs_write_check_limits(file, iocb->ki_pos, + iov_iter_count(from)); + if (count < 0) + return count; + + iov_iter_truncate(from, count); + return iov_iter_count(from); +} + /* * Handle direct writes. For sequential zone files, this is the only possible * write path. For these files, check that the user is issuing writes @@ -760,8 +822,7 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from) struct super_block *sb = inode->i_sb; bool sync = is_sync_kiocb(iocb); bool append = false; - size_t count; - ssize_t ret; + ssize_t ret, count; /* * For async direct IOs to sequential zone files, refuse IOCB_NOWAIT @@ -779,12 +840,11 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from) inode_lock(inode); } - ret = generic_write_checks(iocb, from); - if (ret <= 0) + count = zonefs_write_checks(iocb, from); + if (count <= 0) { + ret = count; goto inode_unlock; - - iov_iter_truncate(from, zi->i_max_size - iocb->ki_pos); - count = iov_iter_count(from); + } if ((iocb->ki_pos | count) & (sb->s_blocksize - 1)) { ret = -EINVAL; @@ -844,12 +904,10 @@ static ssize_t zonefs_file_buffered_write(struct kiocb *iocb, inode_lock(inode); } - ret = generic_write_checks(iocb, from); + ret = zonefs_write_checks(iocb, from); if (ret <= 0) goto inode_unlock; - iov_iter_truncate(from, zi->i_max_size - iocb->ki_pos); - ret = iomap_file_buffered_write(iocb, from, &zonefs_iomap_ops); if (ret > 0) iocb->ki_pos += ret; -- GitLab From 6cb59afe9e5b45a035bd6b97da6593743feefc72 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 15 Mar 2021 20:44:30 +0200 Subject: [PATCH 715/839] gpiolib: Assign fwnode to parent's if no primary one provided In case when the properties are supplied in the secondary fwnode (for example, built-in device properties) the fwnode pointer left unassigned. This makes unable to retrieve them. Assign fwnode to parent's if no primary one provided. Fixes: 7cba1a4d5e16 ("gpiolib: generalize devprop_gpiochip_set_names() for device properties") Fixes: 2afa97e9868f ("gpiolib: Read "gpio-line-names" from a firmware node") Reported-by: Bartosz Golaszewski Tested-by: Bartosz Golaszewski Signed-off-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 6635e4ec60cef..6367646dce83b 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -571,6 +571,7 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, struct lock_class_key *lock_key, struct lock_class_key *request_key) { + struct fwnode_handle *fwnode = gc->parent ? dev_fwnode(gc->parent) : NULL; unsigned long flags; int ret = 0; unsigned i; @@ -594,6 +595,12 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, of_gpio_dev_init(gc, gdev); + /* + * Assign fwnode depending on the result of the previous calls, + * if none of them succeed, assign it to the parent's one. + */ + gdev->dev.fwnode = dev_fwnode(&gdev->dev) ?: fwnode; + gdev->id = ida_alloc(&gpio_ida, GFP_KERNEL); if (gdev->id < 0) { ret = gdev->id; -- GitLab From d9bb77d51e668a1a6d4530c1ea471574d0ce465f Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 15 Mar 2021 13:39:14 +0800 Subject: [PATCH 716/839] btrfs: subpage: fix wild pointer access during metadata read failure [BUG] When running fstests for btrfs subpage read-write test, it has a very high chance to crash at generic/475 with the following stack: BTRFS warning (device dm-8): direct IO failed ino 510 rw 1,34817 sector 0xcdf0 len 94208 err no 10 Unable to handle kernel paging request at virtual address ffff80001157e7c0 CPU: 2 PID: 687125 Comm: kworker/u12:4 Tainted: G WC 5.12.0-rc2-custom+ #5 Hardware name: Khadas VIM3 (DT) Workqueue: btrfs-endio-meta btrfs_work_helper [btrfs] pc : queued_spin_lock_slowpath+0x1a0/0x390 lr : do_raw_spin_lock+0xc4/0x11c Call trace: queued_spin_lock_slowpath+0x1a0/0x390 _raw_spin_lock+0x68/0x84 btree_readahead_hook+0x38/0xc0 [btrfs] end_bio_extent_readpage+0x504/0x5f4 [btrfs] bio_endio+0x170/0x1a4 end_workqueue_fn+0x3c/0x60 [btrfs] btrfs_work_helper+0x1b0/0x1b4 [btrfs] process_one_work+0x22c/0x430 worker_thread+0x70/0x3a0 kthread+0x13c/0x140 ret_from_fork+0x10/0x30 Code: 910020e0 8b0200c2 f861d884 aa0203e1 (f8246827) [CAUSE] In end_bio_extent_readpage(), if we hit an error during read, we will handle the error differently for data and metadata. For data we queue a repair, while for metadata, we record the error and let the caller choose what to do. But the code is still using page->private to grab extent buffer, which no longer points to extent buffer for subpage metadata pages. Thus this wild pointer access leads to above crash. [FIX] Introduce a helper, find_extent_buffer_readpage(), to grab extent buffer. The difference against find_extent_buffer_nospinlock() is: - Also handles regular sectorsize == PAGE_SIZE case - No extent buffer refs increase/decrease As extent buffer under IO must have non-zero refs, so this is safe Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index f3d7be975c3a5..0d00a9cd123a8 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2885,6 +2885,35 @@ static void end_page_read(struct page *page, bool uptodate, u64 start, u32 len) btrfs_subpage_end_reader(fs_info, page, start, len); } +/* + * Find extent buffer for a givne bytenr. + * + * This is for end_bio_extent_readpage(), thus we can't do any unsafe locking + * in endio context. + */ +static struct extent_buffer *find_extent_buffer_readpage( + struct btrfs_fs_info *fs_info, struct page *page, u64 bytenr) +{ + struct extent_buffer *eb; + + /* + * For regular sectorsize, we can use page->private to grab extent + * buffer + */ + if (fs_info->sectorsize == PAGE_SIZE) { + ASSERT(PagePrivate(page) && page->private); + return (struct extent_buffer *)page->private; + } + + /* For subpage case, we need to lookup buffer radix tree */ + rcu_read_lock(); + eb = radix_tree_lookup(&fs_info->buffer_radix, + bytenr >> fs_info->sectorsize_bits); + rcu_read_unlock(); + ASSERT(eb); + return eb; +} + /* * after a readpage IO is done, we need to: * clear the uptodate bits on error @@ -2996,7 +3025,7 @@ static void end_bio_extent_readpage(struct bio *bio) } else { struct extent_buffer *eb; - eb = (struct extent_buffer *)page->private; + eb = find_extent_buffer_readpage(fs_info, page, start); set_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags); eb->read_mirror = mirror; atomic_dec(&eb->io_pages); -- GitLab From 60484cd9d50117017cf53d5310c6cd629600dc69 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 15 Mar 2021 13:39:15 +0800 Subject: [PATCH 717/839] btrfs: subpage: make readahead work properly In readahead infrastructure, we are using a lot of hard coded PAGE_SHIFT while we're not doing anything specific to PAGE_SIZE. One of the most affected part is the radix tree operation of btrfs_fs_info::reada_tree. If using PAGE_SHIFT, subpage metadata readahead is broken and does no help reading metadata ahead. Fix the problem by using btrfs_fs_info::sectorsize_bits so that readahead could work for subpage. Reviewed-by: Johannes Thumshirn Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/reada.c | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index 20fd4aa48a8ce..06713a8fe26b4 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c @@ -209,7 +209,7 @@ int btree_readahead_hook(struct extent_buffer *eb, int err) /* find extent */ spin_lock(&fs_info->reada_lock); re = radix_tree_lookup(&fs_info->reada_tree, - eb->start >> PAGE_SHIFT); + eb->start >> fs_info->sectorsize_bits); if (re) re->refcnt++; spin_unlock(&fs_info->reada_lock); @@ -240,7 +240,7 @@ static struct reada_zone *reada_find_zone(struct btrfs_device *dev, u64 logical, zone = NULL; spin_lock(&fs_info->reada_lock); ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone, - logical >> PAGE_SHIFT, 1); + logical >> fs_info->sectorsize_bits, 1); if (ret == 1 && logical >= zone->start && logical <= zone->end) { kref_get(&zone->refcnt); spin_unlock(&fs_info->reada_lock); @@ -283,13 +283,13 @@ static struct reada_zone *reada_find_zone(struct btrfs_device *dev, u64 logical, spin_lock(&fs_info->reada_lock); ret = radix_tree_insert(&dev->reada_zones, - (unsigned long)(zone->end >> PAGE_SHIFT), - zone); + (unsigned long)(zone->end >> fs_info->sectorsize_bits), + zone); if (ret == -EEXIST) { kfree(zone); ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone, - logical >> PAGE_SHIFT, 1); + logical >> fs_info->sectorsize_bits, 1); if (ret == 1 && logical >= zone->start && logical <= zone->end) kref_get(&zone->refcnt); else @@ -315,7 +315,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info, u64 length; int real_stripes; int nzones = 0; - unsigned long index = logical >> PAGE_SHIFT; + unsigned long index = logical >> fs_info->sectorsize_bits; int dev_replace_is_ongoing; int have_zone = 0; @@ -497,7 +497,7 @@ static void reada_extent_put(struct btrfs_fs_info *fs_info, struct reada_extent *re) { int i; - unsigned long index = re->logical >> PAGE_SHIFT; + unsigned long index = re->logical >> fs_info->sectorsize_bits; spin_lock(&fs_info->reada_lock); if (--re->refcnt) { @@ -538,11 +538,12 @@ static void reada_extent_put(struct btrfs_fs_info *fs_info, static void reada_zone_release(struct kref *kref) { struct reada_zone *zone = container_of(kref, struct reada_zone, refcnt); + struct btrfs_fs_info *fs_info = zone->device->fs_info; - lockdep_assert_held(&zone->device->fs_info->reada_lock); + lockdep_assert_held(&fs_info->reada_lock); radix_tree_delete(&zone->device->reada_zones, - zone->end >> PAGE_SHIFT); + zone->end >> fs_info->sectorsize_bits); kfree(zone); } @@ -593,7 +594,7 @@ static int reada_add_block(struct reada_control *rc, u64 logical, static void reada_peer_zones_set_lock(struct reada_zone *zone, int lock) { int i; - unsigned long index = zone->end >> PAGE_SHIFT; + unsigned long index = zone->end >> zone->device->fs_info->sectorsize_bits; for (i = 0; i < zone->ndevs; ++i) { struct reada_zone *peer; @@ -628,7 +629,7 @@ static int reada_pick_zone(struct btrfs_device *dev) (void **)&zone, index, 1); if (ret == 0) break; - index = (zone->end >> PAGE_SHIFT) + 1; + index = (zone->end >> dev->fs_info->sectorsize_bits) + 1; if (zone->locked) { if (zone->elems > top_locked_elems) { top_locked_elems = zone->elems; @@ -709,7 +710,7 @@ static int reada_start_machine_dev(struct btrfs_device *dev) * plugging to speed things up */ ret = radix_tree_gang_lookup(&dev->reada_extents, (void **)&re, - dev->reada_next >> PAGE_SHIFT, 1); + dev->reada_next >> fs_info->sectorsize_bits, 1); if (ret == 0 || re->logical > dev->reada_curr_zone->end) { ret = reada_pick_zone(dev); if (!ret) { @@ -718,7 +719,7 @@ static int reada_start_machine_dev(struct btrfs_device *dev) } re = NULL; ret = radix_tree_gang_lookup(&dev->reada_extents, (void **)&re, - dev->reada_next >> PAGE_SHIFT, 1); + dev->reada_next >> fs_info->sectorsize_bits, 1); } if (ret == 0) { spin_unlock(&fs_info->reada_lock); @@ -885,7 +886,7 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all) pr_cont(" curr off %llu", device->reada_next - zone->start); pr_cont("\n"); - index = (zone->end >> PAGE_SHIFT) + 1; + index = (zone->end >> fs_info->sectorsize_bits) + 1; } cnt = 0; index = 0; @@ -910,7 +911,7 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all) } } pr_cont("\n"); - index = (re->logical >> PAGE_SHIFT) + 1; + index = (re->logical >> fs_info->sectorsize_bits) + 1; if (++cnt > 15) break; } @@ -926,7 +927,7 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all) if (ret == 0) break; if (!re->scheduled) { - index = (re->logical >> PAGE_SHIFT) + 1; + index = (re->logical >> fs_info->sectorsize_bits) + 1; continue; } pr_debug("re: logical %llu size %u list empty %d scheduled %d", @@ -942,7 +943,7 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all) } } pr_cont("\n"); - index = (re->logical >> PAGE_SHIFT) + 1; + index = (re->logical >> fs_info->sectorsize_bits) + 1; } spin_unlock(&fs_info->reada_lock); } -- GitLab From f8d70fd6a5a7a38a95eb8021e00d2e547f88efec Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 13 Mar 2021 14:58:02 +0100 Subject: [PATCH 718/839] MAINTAINERS: move some real subsystems off of the staging mailing list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The VME and Android drivers still have their MAINTAINERS entries pointing to the "driverdevel" mailing list, due to them having their codebase move out of the drivers/staging/ directory, but no one remembered to change the mailing list entries. Move them both to linux-kernel for lack of a more specific place at the moment. These are both low-volume areas of the kernel, so this shouldn't be an issue. Cc: Martyn Welch Cc: Manohar Vanga Cc: Arve Hjønnevåg Cc: Todd Kjos Cc: Martijn Coenen Cc: Joel Fernandes Cc: Christian Brauner Cc: Hridya Valsaraju Cc: Suren Baghdasaryan Reported-by: Konstantin Ryabitsev Link: https://lore.kernel.org/r/YEzE6u6U1jkBatmr@kroah.com Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index aa84121c56117..d7c25c0fc08a0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1181,7 +1181,7 @@ M: Joel Fernandes M: Christian Brauner M: Hridya Valsaraju M: Suren Baghdasaryan -L: devel@driverdev.osuosl.org +L: linux-kernel@vger.kernel.org S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging.git F: drivers/android/ @@ -19135,7 +19135,7 @@ VME SUBSYSTEM M: Martyn Welch M: Manohar Vanga M: Greg Kroah-Hartman -L: devel@driverdev.osuosl.org +L: linux-kernel@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git F: Documentation/driver-api/vme.rst -- GitLab From e06da9ea3e3f6746a849edeae1d09ee821f5c2ce Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 16 Mar 2021 11:23:11 +0100 Subject: [PATCH 719/839] MAINTAINERS: move the staging subsystem to lists.linux.dev The drivers/staging/ tree has a new mailing list, linux-staging@lists.linux.dev, so move the MAINTAINER entry to point to it so that we get patches sent to the proper place. There was no need to specify a list for the hikey9xx driver, the tools pick up the "base" list for drivers/staging/* so remove that line to make the file simpler. Cc: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/20210316102311.182375-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index d7c25c0fc08a0..9e876927c60d7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8116,7 +8116,6 @@ F: drivers/crypto/hisilicon/sec2/sec_main.c HISILICON STAGING DRIVERS FOR HIKEY 960/970 M: Mauro Carvalho Chehab -L: devel@driverdev.osuosl.org S: Maintained F: drivers/staging/hikey9xx/ @@ -17040,7 +17039,7 @@ F: drivers/staging/vt665?/ STAGING SUBSYSTEM M: Greg Kroah-Hartman -L: devel@driverdev.osuosl.org +L: linux-staging@lists.linux.dev S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging.git F: drivers/staging/ -- GitLab From 2e5848a3d86f03024ae096478bdb892ab3d79131 Mon Sep 17 00:00:00 2001 From: Tong Zhang Date: Mon, 15 Mar 2021 15:59:14 -0400 Subject: [PATCH 720/839] staging: comedi: cb_pcidas: fix request_irq() warn request_irq() wont accept a name which contains slash so we need to repalce it with something else -- otherwise it will trigger a warning and the entry in /proc/irq/ will not be created since the .name might be used by userspace and we don't want to break userspace, so we are changing the parameters passed to request_irq() [ 1.630764] name 'pci-das1602/16' [ 1.630950] WARNING: CPU: 0 PID: 181 at fs/proc/generic.c:180 __xlate_proc_name+0x93/0xb0 [ 1.634009] RIP: 0010:__xlate_proc_name+0x93/0xb0 [ 1.639441] Call Trace: [ 1.639976] proc_mkdir+0x18/0x20 [ 1.641946] request_threaded_irq+0xfe/0x160 [ 1.642186] cb_pcidas_auto_attach+0xf4/0x610 [cb_pcidas] Suggested-by: Ian Abbott Reviewed-by: Ian Abbott Signed-off-by: Tong Zhang Link: https://lore.kernel.org/r/20210315195914.4801-1-ztong0001@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/cb_pcidas.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/comedi/drivers/cb_pcidas.c b/drivers/staging/comedi/drivers/cb_pcidas.c index d740c47827751..2f20bd56ec6ca 100644 --- a/drivers/staging/comedi/drivers/cb_pcidas.c +++ b/drivers/staging/comedi/drivers/cb_pcidas.c @@ -1281,7 +1281,7 @@ static int cb_pcidas_auto_attach(struct comedi_device *dev, devpriv->amcc + AMCC_OP_REG_INTCSR); ret = request_irq(pcidev->irq, cb_pcidas_interrupt, IRQF_SHARED, - dev->board_name, dev); + "cb_pcidas", dev); if (ret) { dev_dbg(dev->class_dev, "unable to allocate irq %d\n", pcidev->irq); -- GitLab From d2d106fe3badfc3bf0dd3899d1c3f210c7203eab Mon Sep 17 00:00:00 2001 From: Tong Zhang Date: Mon, 15 Mar 2021 15:58:12 -0400 Subject: [PATCH 721/839] staging: comedi: cb_pcidas64: fix request_irq() warn request_irq() wont accept a name which contains slash so we need to repalce it with something else -- otherwise it will trigger a warning and the entry in /proc/irq/ will not be created since the .name might be used by userspace and we don't want to break userspace, so we are changing the parameters passed to request_irq() [ 1.565966] name 'pci-das6402/16' [ 1.566149] WARNING: CPU: 0 PID: 184 at fs/proc/generic.c:180 __xlate_proc_name+0x93/0xb0 [ 1.568923] RIP: 0010:__xlate_proc_name+0x93/0xb0 [ 1.574200] Call Trace: [ 1.574722] proc_mkdir+0x18/0x20 [ 1.576629] request_threaded_irq+0xfe/0x160 [ 1.576859] auto_attach+0x60a/0xc40 [cb_pcidas64] Suggested-by: Ian Abbott Reviewed-by: Ian Abbott Signed-off-by: Tong Zhang Link: https://lore.kernel.org/r/20210315195814.4692-1-ztong0001@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/cb_pcidas64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/comedi/drivers/cb_pcidas64.c b/drivers/staging/comedi/drivers/cb_pcidas64.c index fa987bb0e7cd4..6d3ba399a7f0b 100644 --- a/drivers/staging/comedi/drivers/cb_pcidas64.c +++ b/drivers/staging/comedi/drivers/cb_pcidas64.c @@ -4035,7 +4035,7 @@ static int auto_attach(struct comedi_device *dev, init_stc_registers(dev); retval = request_irq(pcidev->irq, handle_interrupt, IRQF_SHARED, - dev->board_name, dev); + "cb_pcidas64", dev); if (retval) { dev_dbg(dev->class_dev, "unable to allocate irq %u\n", pcidev->irq); -- GitLab From 8ca88d53351cc58d535b2bfc7386835378fb0db2 Mon Sep 17 00:00:00 2001 From: Sameer Pujar Date: Mon, 15 Mar 2021 23:01:31 +0530 Subject: [PATCH 722/839] ASoC: simple-card-utils: Do not handle device clock This reverts commit 1e30f642cf29 ("ASoC: simple-card-utils: Fix device module clock"). The original patch ended up breaking following platform, which depends on set_sysclk() to configure internal PLL on wm8904 codec and expects simple-card-utils to not update the MCLK rate. - "arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var3-ads2.dts" It would be best if codec takes care of setting MCLK clock via DAI set_sysclk() callback. Reported-by: Michael Walle Suggested-by: Mark Brown Suggested-by: Michael Walle Fixes: 1e30f642cf29 ("ASoC: simple-card-utils: Fix device module clock") Signed-off-by: Sameer Pujar Tested-by: Michael Walle Link: https://lore.kernel.org/r/1615829492-8972-2-git-send-email-spujar@nvidia.com Signed-off-by: Mark Brown --- sound/soc/generic/simple-card-utils.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/sound/soc/generic/simple-card-utils.c b/sound/soc/generic/simple-card-utils.c index ab31045cfc952..6cada4c1e283b 100644 --- a/sound/soc/generic/simple-card-utils.c +++ b/sound/soc/generic/simple-card-utils.c @@ -172,15 +172,16 @@ int asoc_simple_parse_clk(struct device *dev, * or device's module clock. */ clk = devm_get_clk_from_child(dev, node, NULL); - if (IS_ERR(clk)) - clk = devm_get_clk_from_child(dev, dlc->of_node, NULL); - if (!IS_ERR(clk)) { - simple_dai->clk = clk; simple_dai->sysclk = clk_get_rate(clk); - } else if (!of_property_read_u32(node, "system-clock-frequency", - &val)) { + + simple_dai->clk = clk; + } else if (!of_property_read_u32(node, "system-clock-frequency", &val)) { simple_dai->sysclk = val; + } else { + clk = devm_get_clk_from_child(dev, dlc->of_node, NULL); + if (!IS_ERR(clk)) + simple_dai->sysclk = clk_get_rate(clk); } if (of_property_read_bool(node, "system-clock-direction-out")) -- GitLab From dbf54a9534350d6aebbb34f5c1c606b81a4f35dd Mon Sep 17 00:00:00 2001 From: Sameer Pujar Date: Mon, 15 Mar 2021 23:01:32 +0530 Subject: [PATCH 723/839] ASoC: rt5659: Update MCLK rate in set_sysclk() Simple-card/audio-graph-card drivers do not handle MCLK clock when it is specified in the codec device node. The expectation here is that, the codec should actually own up the MCLK clock and do necessary setup in the driver. Suggested-by: Mark Brown Suggested-by: Michael Walle Signed-off-by: Sameer Pujar Link: https://lore.kernel.org/r/1615829492-8972-3-git-send-email-spujar@nvidia.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt5659.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sound/soc/codecs/rt5659.c b/sound/soc/codecs/rt5659.c index 41e5917b16a5e..91a4ef7f620ca 100644 --- a/sound/soc/codecs/rt5659.c +++ b/sound/soc/codecs/rt5659.c @@ -3426,12 +3426,17 @@ static int rt5659_set_component_sysclk(struct snd_soc_component *component, int { struct rt5659_priv *rt5659 = snd_soc_component_get_drvdata(component); unsigned int reg_val = 0; + int ret; if (freq == rt5659->sysclk && clk_id == rt5659->sysclk_src) return 0; switch (clk_id) { case RT5659_SCLK_S_MCLK: + ret = clk_set_rate(rt5659->mclk, freq); + if (ret) + return ret; + reg_val |= RT5659_SCLK_SRC_MCLK; break; case RT5659_SCLK_S_PLL1: -- GitLab From 899b12542b0897f92de9ba30944937c39ebb246d Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Tue, 16 Mar 2021 08:52:54 +0800 Subject: [PATCH 724/839] ASoC: rt711: add snd_soc_component remove callback We do some IO operations in the snd_soc_component_set_jack callback function and snd_soc_component_set_jack() will be called when soc component is removed. However, we should not access SoundWire registers when the bus is suspended. So set regcache_cache_only(regmap, true) to avoid accessing in the soc component removal process. Signed-off-by: Bard Liao Reviewed-by: Kai Vehmanen Reviewed-by: Rander Wang Link: https://lore.kernel.org/r/20210316005254.29699-1-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt711.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/soc/codecs/rt711.c b/sound/soc/codecs/rt711.c index 85f744184a60f..047f4e677d78c 100644 --- a/sound/soc/codecs/rt711.c +++ b/sound/soc/codecs/rt711.c @@ -895,6 +895,13 @@ static int rt711_probe(struct snd_soc_component *component) return 0; } +static void rt711_remove(struct snd_soc_component *component) +{ + struct rt711_priv *rt711 = snd_soc_component_get_drvdata(component); + + regcache_cache_only(rt711->regmap, true); +} + static const struct snd_soc_component_driver soc_codec_dev_rt711 = { .probe = rt711_probe, .set_bias_level = rt711_set_bias_level, @@ -905,6 +912,7 @@ static const struct snd_soc_component_driver soc_codec_dev_rt711 = { .dapm_routes = rt711_audio_map, .num_dapm_routes = ARRAY_SIZE(rt711_audio_map), .set_jack = rt711_set_jack_detect, + .remove = rt711_remove, }; static int rt711_set_sdw_stream(struct snd_soc_dai *dai, void *sdw_stream, -- GitLab From 9deef665f5811a7ad22b5e6eb80fe2a14ba4494c Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Tue, 16 Mar 2021 16:40:53 +0800 Subject: [PATCH 725/839] ASoC: dt-bindings: fsl_spdif: Add compatible string for new platforms Add compatible string for new added platforms which support spdif module. They are i.MX8QXP, i.MX8MM, i.MX8MN, i.MX8MQ. Signed-off-by: Shengjiu Wang Link: https://lore.kernel.org/r/1615884053-4264-1-git-send-email-shengjiu.wang@nxp.com Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/sound/fsl,spdif.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/devicetree/bindings/sound/fsl,spdif.yaml b/Documentation/devicetree/bindings/sound/fsl,spdif.yaml index 50449b6d10484..4454aca34d56d 100644 --- a/Documentation/devicetree/bindings/sound/fsl,spdif.yaml +++ b/Documentation/devicetree/bindings/sound/fsl,spdif.yaml @@ -21,6 +21,10 @@ properties: - fsl,vf610-spdif - fsl,imx6sx-spdif - fsl,imx8qm-spdif + - fsl,imx8qxp-spdif + - fsl,imx8mq-spdif + - fsl,imx8mm-spdif + - fsl,imx8mn-spdif reg: maxItems: 1 -- GitLab From 53b861bec737c189cc14ec3b5785d0f13445ac0f Mon Sep 17 00:00:00 2001 From: Jeremy Szu Date: Tue, 16 Mar 2021 17:42:35 +0800 Subject: [PATCH 726/839] ALSA: hda/realtek: fix mute/micmute LEDs for HP 850 G8 The HP EliteBook 850 G8 Notebook PC is using ALC285 codec which is using 0x04 to control mute LED and 0x01 to control micmute LED. Therefore, add a quirk to make it works. Signed-off-by: Jeremy Szu Cc: Link: https://lore.kernel.org/r/20210316094236.89028-1-jeremy.szu@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 145d6cf4939e3..316b9b4ccb32d 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -8060,6 +8060,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x87f4, "HP", ALC287_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x87f5, "HP", ALC287_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x87f7, "HP Spectre x360 14", ALC245_FIXUP_HP_X360_AMP), + SND_PCI_QUIRK(0x103c, 0x8846, "HP EliteBook 850 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x884c, "HP EliteBook 840 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300), -- GitLab From f8425c9396639cc462bcce44b1051f8b4e62fddb Mon Sep 17 00:00:00 2001 From: Alessio Balsini Date: Mon, 25 Jan 2021 15:30:51 +0000 Subject: [PATCH 727/839] fuse: 32-bit user space ioctl compat for fuse device With a 64-bit kernel build the FUSE device cannot handle ioctl requests coming from 32-bit user space. This is due to the ioctl command translation that generates different command identifiers that thus cannot be used for direct comparisons without proper manipulation. Explicitly extract type and number from the ioctl command to enable 32-bit user space compatibility on 64-bit kernel builds. Signed-off-by: Alessio Balsini Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 26 ++++++++++++++++---------- include/uapi/linux/fuse.h | 3 ++- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index c6636b4c4ccf2..c0fee830a34ed 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -2229,19 +2229,21 @@ static int fuse_device_clone(struct fuse_conn *fc, struct file *new) static long fuse_dev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - int err = -ENOTTY; + int res; + int oldfd; + struct fuse_dev *fud = NULL; - if (cmd == FUSE_DEV_IOC_CLONE) { - int oldfd; + if (_IOC_TYPE(cmd) != FUSE_DEV_IOC_MAGIC) + return -ENOTTY; - err = -EFAULT; - if (!get_user(oldfd, (__u32 __user *) arg)) { + switch (_IOC_NR(cmd)) { + case _IOC_NR(FUSE_DEV_IOC_CLONE): + res = -EFAULT; + if (!get_user(oldfd, (__u32 __user *)arg)) { struct file *old = fget(oldfd); - err = -EINVAL; + res = -EINVAL; if (old) { - struct fuse_dev *fud = NULL; - /* * Check against file->f_op because CUSE * uses the same ioctl handler. @@ -2252,14 +2254,18 @@ static long fuse_dev_ioctl(struct file *file, unsigned int cmd, if (fud) { mutex_lock(&fuse_mutex); - err = fuse_device_clone(fud->fc, file); + res = fuse_device_clone(fud->fc, file); mutex_unlock(&fuse_mutex); } fput(old); } } + break; + default: + res = -ENOTTY; + break; } - return err; + return res; } const struct file_operations fuse_dev_operations = { diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 98ca64d1beb6f..54442612c48bd 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -903,7 +903,8 @@ struct fuse_notify_retrieve_in { }; /* Device ioctls: */ -#define FUSE_DEV_IOC_CLONE _IOR(229, 0, uint32_t) +#define FUSE_DEV_IOC_MAGIC 229 +#define FUSE_DEV_IOC_CLONE _IOR(FUSE_DEV_IOC_MAGIC, 0, uint32_t) struct fuse_lseek_in { uint64_t fh; -- GitLab From ef4cb70a4c22bf301cd757dcc838dc8ca9526477 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 2 Mar 2021 18:14:53 +0200 Subject: [PATCH 728/839] genirq/irq_sim: Fix typos in kernel doc (fnode -> fwnode) Fix typos in kernel doc, otherwise validation script complains: .../irq_sim.c:170: warning: Function parameter or member 'fwnode' not described in 'irq_domain_create_sim' .../irq_sim.c:170: warning: Excess function parameter 'fnode' description in 'irq_domain_create_sim' .../irq_sim.c:240: warning: Function parameter or member 'fwnode' not described in 'devm_irq_domain_create_sim' .../irq_sim.c:240: warning: Excess function parameter 'fnode' description in 'devm_irq_domain_create_sim' Signed-off-by: Andy Shevchenko Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20210302161453.28540-1-andriy.shevchenko@linux.intel.com --- kernel/irq/irq_sim.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/irq/irq_sim.c b/kernel/irq/irq_sim.c index 48006608baf0c..40880c350b95d 100644 --- a/kernel/irq/irq_sim.c +++ b/kernel/irq/irq_sim.c @@ -159,7 +159,7 @@ static const struct irq_domain_ops irq_sim_domain_ops = { * irq_domain_create_sim - Create a new interrupt simulator irq_domain and * allocate a range of dummy interrupts. * - * @fnode: struct fwnode_handle to be associated with this domain. + * @fwnode: struct fwnode_handle to be associated with this domain. * @num_irqs: Number of interrupts to allocate. * * On success: return a new irq_domain object. @@ -228,7 +228,7 @@ static void devm_irq_domain_release_sim(struct device *dev, void *res) * a managed device. * * @dev: Device to initialize the simulator object for. - * @fnode: struct fwnode_handle to be associated with this domain. + * @fwnode: struct fwnode_handle to be associated with this domain. * @num_irqs: Number of interrupts to allocate * * On success: return a new irq_domain object. -- GitLab From 7dc4b2fdb27242faf40fc20ef83372b7033af050 Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Thu, 25 Feb 2021 11:25:02 -0800 Subject: [PATCH 729/839] vfio/type1: fix unmap all on ILP32 Some ILP32 architectures support mapping a 32-bit vaddr within a 64-bit iova space. The unmap-all code uses 32-bit SIZE_MAX as an upper bound on the extent of the mappings within iova space, so mappings above 4G cannot be found and unmapped. Use U64_MAX instead, and use u64 for size variables. This also fixes a static analysis bug found by the kernel test robot running smatch for ILP32. Fixes: 0f53afa12bae ("vfio/type1: unmap cleanup") Fixes: c19650995374 ("vfio/type1: implement unmap all") Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Steve Sistare Message-Id: <1614281102-230747-1-git-send-email-steven.sistare@oracle.com> Link: https://lore.kernel.org/linux-mm/20210222141043.GW2222@kadam Reviewed-by: Cornelia Huck Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_type1.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 4bb162c1d649b..12d9905b429fd 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -189,7 +189,7 @@ static struct vfio_dma *vfio_find_dma(struct vfio_iommu *iommu, } static struct rb_node *vfio_find_dma_first_node(struct vfio_iommu *iommu, - dma_addr_t start, size_t size) + dma_addr_t start, u64 size) { struct rb_node *res = NULL; struct rb_node *node = iommu->dma_list.rb_node; @@ -1288,7 +1288,7 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu, int ret = -EINVAL, retries = 0; unsigned long pgshift; dma_addr_t iova = unmap->iova; - unsigned long size = unmap->size; + u64 size = unmap->size; bool unmap_all = unmap->flags & VFIO_DMA_UNMAP_FLAG_ALL; bool invalidate_vaddr = unmap->flags & VFIO_DMA_UNMAP_FLAG_VADDR; struct rb_node *n, *first_n; @@ -1304,14 +1304,12 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu, if (unmap_all) { if (iova || size) goto unlock; - size = SIZE_MAX; - } else if (!size || size & (pgsize - 1)) { + size = U64_MAX; + } else if (!size || size & (pgsize - 1) || + iova + size - 1 < iova || size > SIZE_MAX) { goto unlock; } - if (iova + size - 1 < iova || size > SIZE_MAX) - goto unlock; - /* When dirty tracking is enabled, allow only min supported pgsize */ if ((unmap->flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) && (!iommu->dirty_page_tracking || (bitmap->pgsize != pgsize))) { -- GitLab From 179209fa12709a3df8888c323b37315da2683c24 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 23 Feb 2021 15:17:46 -0400 Subject: [PATCH 730/839] vfio: IOMMU_API should be selected As IOMMU_API is a kconfig without a description (eg does not show in the menu) the correct operator is select not 'depends on'. Using 'depends on' for this kind of symbol means VFIO is not selectable unless some other random kconfig has already enabled IOMMU_API for it. Fixes: cba3345cc494 ("vfio: VFIO core") Signed-off-by: Jason Gunthorpe Message-Id: <1-v1-df057e0f92c3+91-vfio_arm_compile_test_jgg@nvidia.com> Reviewed-by: Eric Auger Signed-off-by: Alex Williamson --- drivers/vfio/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index 5533df91b257d..90c0525b1e0cf 100644 --- a/drivers/vfio/Kconfig +++ b/drivers/vfio/Kconfig @@ -21,7 +21,7 @@ config VFIO_VIRQFD menuconfig VFIO tristate "VFIO Non-Privileged userspace driver framework" - depends on IOMMU_API + select IOMMU_API select VFIO_IOMMU_TYPE1 if (X86 || S390 || ARM || ARM64) help VFIO provides a framework for secure userspace device drivers. -- GitLab From d3d72a6dfffd3fcaac969786118162b596227f70 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 23 Feb 2021 15:17:47 -0400 Subject: [PATCH 731/839] vfio-platform: Add COMPILE_TEST to VFIO_PLATFORM x86 can build platform bus code too, so vfio-platform and all the platform reset implementations compile successfully on x86. Signed-off-by: Jason Gunthorpe Message-Id: <2-v1-df057e0f92c3+91-vfio_arm_compile_test_jgg@nvidia.com> Signed-off-by: Alex Williamson --- drivers/vfio/platform/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vfio/platform/Kconfig b/drivers/vfio/platform/Kconfig index dc1a3c44f2c62..233efde219cc1 100644 --- a/drivers/vfio/platform/Kconfig +++ b/drivers/vfio/platform/Kconfig @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only config VFIO_PLATFORM tristate "VFIO support for platform devices" - depends on VFIO && EVENTFD && (ARM || ARM64) + depends on VFIO && EVENTFD && (ARM || ARM64 || COMPILE_TEST) select VFIO_VIRQFD help Support for platform devices with VFIO. This is required to make -- GitLab From 3b49dfb08c750d4745ad42ec042288aba932b9d5 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 23 Feb 2021 15:17:48 -0400 Subject: [PATCH 732/839] ARM: amba: Allow some ARM_AMBA users to compile with COMPILE_TEST CONFIG_VFIO_AMBA has a light use of AMBA, adding some inline fallbacks when AMBA is disabled will allow it to be compiled under COMPILE_TEST and make VFIO easier to maintain. Signed-off-by: Jason Gunthorpe Message-Id: <3-v1-df057e0f92c3+91-vfio_arm_compile_test_jgg@nvidia.com> Reviewed-by: Eric Auger Signed-off-by: Alex Williamson --- drivers/vfio/platform/Kconfig | 2 +- include/linux/amba/bus.h | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/vfio/platform/Kconfig b/drivers/vfio/platform/Kconfig index 233efde219cc1..ab341108a0be9 100644 --- a/drivers/vfio/platform/Kconfig +++ b/drivers/vfio/platform/Kconfig @@ -12,7 +12,7 @@ config VFIO_PLATFORM config VFIO_AMBA tristate "VFIO support for AMBA devices" - depends on VFIO_PLATFORM && ARM_AMBA + depends on VFIO_PLATFORM && (ARM_AMBA || COMPILE_TEST) help Support for ARM AMBA devices with VFIO. This is required to make use of ARM AMBA devices present on the system using the VFIO diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h index 6cc93ab5b8096..c68d87b872836 100644 --- a/include/linux/amba/bus.h +++ b/include/linux/amba/bus.h @@ -105,8 +105,19 @@ extern struct bus_type amba_bustype; #define amba_get_drvdata(d) dev_get_drvdata(&d->dev) #define amba_set_drvdata(d,p) dev_set_drvdata(&d->dev, p) +#ifdef CONFIG_ARM_AMBA int amba_driver_register(struct amba_driver *); void amba_driver_unregister(struct amba_driver *); +#else +static inline int amba_driver_register(struct amba_driver *drv) +{ + return -EINVAL; +} +static inline void amba_driver_unregister(struct amba_driver *drv) +{ +} +#endif + struct amba_device *amba_device_alloc(const char *, resource_size_t, size_t); void amba_device_put(struct amba_device *); int amba_device_add(struct amba_device *, struct resource *); -- GitLab From b2b12db53507bc97d96f6b7cb279e831e5eafb00 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 4 Mar 2021 21:30:03 -0400 Subject: [PATCH 733/839] vfio: Depend on MMU VFIO_IOMMU_TYPE1 does not compile with !MMU: ../drivers/vfio/vfio_iommu_type1.c: In function 'follow_fault_pfn': ../drivers/vfio/vfio_iommu_type1.c:536:22: error: implicit declaration of function 'pte_write'; did you mean 'vfs_write'? [-Werror=implicit-function-declaration] So require it. Suggested-by: Cornelia Huck Signed-off-by: Jason Gunthorpe Message-Id: <0-v1-02cb5500df6e+78-vfio_no_mmu_jgg@nvidia.com> Signed-off-by: Alex Williamson --- drivers/vfio/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index 90c0525b1e0cf..67d0bf4efa160 100644 --- a/drivers/vfio/Kconfig +++ b/drivers/vfio/Kconfig @@ -22,7 +22,7 @@ config VFIO_VIRQFD menuconfig VFIO tristate "VFIO Non-Privileged userspace driver framework" select IOMMU_API - select VFIO_IOMMU_TYPE1 if (X86 || S390 || ARM || ARM64) + select VFIO_IOMMU_TYPE1 if MMU && (X86 || S390 || ARM || ARM64) help VFIO provides a framework for secure userspace device drivers. See Documentation/driver-api/vfio.rst for more details. -- GitLab From 4ab4fcfce5b540227d80eb32f1db45ab615f7c92 Mon Sep 17 00:00:00 2001 From: Daniel Jordan Date: Mon, 8 Mar 2021 12:24:52 -0500 Subject: [PATCH 734/839] vfio/type1: fix vaddr_get_pfns() return in vfio_pin_page_external() vaddr_get_pfns() now returns the positive number of pfns successfully gotten instead of zero. vfio_pin_page_external() might return 1 to vfio_iommu_type1_pin_pages(), which will treat it as an error, if vaddr_get_pfns() is successful but vfio_pin_page_external() doesn't reach vfio_lock_acct(). Fix it up in vfio_pin_page_external(). Found by inspection. Fixes: be16c1fd99f4 ("vfio/type1: Change success value of vaddr_get_pfn()") Signed-off-by: Daniel Jordan Message-Id: <20210308172452.38864-1-daniel.m.jordan@oracle.com> Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_type1.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 12d9905b429fd..be444407664af 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -785,7 +785,12 @@ static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr, return -ENODEV; ret = vaddr_get_pfns(mm, vaddr, 1, dma->prot, pfn_base, pages); - if (ret == 1 && do_accounting && !is_invalid_reserved_pfn(*pfn_base)) { + if (ret != 1) + goto out; + + ret = 0; + + if (do_accounting && !is_invalid_reserved_pfn(*pfn_base)) { ret = vfio_lock_acct(dma, 1, true); if (ret) { put_pfn(*pfn_base, dma->prot); @@ -797,6 +802,7 @@ static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr, } } +out: mmput(mm); return ret; } -- GitLab From 70fb3e41a97a5fecc0aedc9a429479d702c3ab66 Mon Sep 17 00:00:00 2001 From: Ben Gardon Date: Mon, 15 Mar 2021 16:38:00 -0700 Subject: [PATCH 735/839] KVM: x86/mmu: Fix RCU usage in handle_removed_tdp_mmu_page The pt passed into handle_removed_tdp_mmu_page does not need RCU protection, as it is not at any risk of being freed by another thread at that point. However, the implicit cast from tdp_sptep_t to u64 * dropped the __rcu annotation without a proper rcu_derefrence. Fix this by passing the pt as a tdp_ptep_t and then rcu_dereferencing it in the function. Suggested-by: Sean Christopherson Reported-by: kernel test robot Signed-off-by: Ben Gardon Message-Id: <20210315233803.2706477-2-bgardon@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/tdp_mmu.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index d78915019b082..db2936cca4bfa 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -301,11 +301,16 @@ static void tdp_mmu_unlink_page(struct kvm *kvm, struct kvm_mmu_page *sp, * * Given a page table that has been removed from the TDP paging structure, * iterates through the page table to clear SPTEs and free child page tables. + * + * Note that pt is passed in as a tdp_ptep_t, but it does not need RCU + * protection. Since this thread removed it from the paging structure, + * this thread will be responsible for ensuring the page is freed. Hence the + * early rcu_dereferences in the function. */ -static void handle_removed_tdp_mmu_page(struct kvm *kvm, u64 *pt, +static void handle_removed_tdp_mmu_page(struct kvm *kvm, tdp_ptep_t pt, bool shared) { - struct kvm_mmu_page *sp = sptep_to_sp(pt); + struct kvm_mmu_page *sp = sptep_to_sp(rcu_dereference(pt)); int level = sp->role.level; gfn_t base_gfn = sp->gfn; u64 old_child_spte; @@ -318,7 +323,7 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, u64 *pt, tdp_mmu_unlink_page(kvm, sp, shared); for (i = 0; i < PT64_ENT_PER_PAGE; i++) { - sptep = pt + i; + sptep = rcu_dereference(pt) + i; gfn = base_gfn + (i * KVM_PAGES_PER_HPAGE(level - 1)); if (shared) { -- GitLab From 14f6fec2e8e04b83c87c339b8d8ff4cc62b23d35 Mon Sep 17 00:00:00 2001 From: Ben Gardon Date: Mon, 15 Mar 2021 16:38:01 -0700 Subject: [PATCH 736/839] KVM: x86/mmu: Fix RCU usage when atomically zapping SPTEs Fix a missing rcu_dereference in tdp_mmu_zap_spte_atomic. Reported-by: kernel test robot Signed-off-by: Ben Gardon Message-Id: <20210315233803.2706477-3-bgardon@google.com> Reviewed-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/tdp_mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index db2936cca4bfa..946da74e069cf 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -543,7 +543,7 @@ static inline bool tdp_mmu_zap_spte_atomic(struct kvm *kvm, * here since the SPTE is going from non-present * to non-present. */ - WRITE_ONCE(*iter->sptep, 0); + WRITE_ONCE(*rcu_dereference(iter->sptep), 0); return true; } -- GitLab From b601c3bc9d5053065acdaa1481c21481d0dc3f10 Mon Sep 17 00:00:00 2001 From: Ben Gardon Date: Mon, 15 Mar 2021 16:38:02 -0700 Subject: [PATCH 737/839] KVM: x86/mmu: Factor out tdp_iter_return_to_root In tdp_mmu_iter_cond_resched there is a call to tdp_iter_start which causes the iterator to continue its walk over the paging structure from the root. This is needed after a yield as paging structure could have been freed in the interim. The tdp_iter_start call is not very clear and something of a hack. It requires exposing tdp_iter fields not used elsewhere in tdp_mmu.c and the effect is not obvious from the function name. Factor a more aptly named function out of tdp_iter_start and call it from tdp_mmu_iter_cond_resched and tdp_iter_start. No functional change intended. Signed-off-by: Ben Gardon Message-Id: <20210315233803.2706477-4-bgardon@google.com> Reviewed-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/tdp_iter.c | 24 +++++++++++++++++------- arch/x86/kvm/mmu/tdp_iter.h | 1 + arch/x86/kvm/mmu/tdp_mmu.c | 4 +--- 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/arch/x86/kvm/mmu/tdp_iter.c b/arch/x86/kvm/mmu/tdp_iter.c index e5f148106e206..f7f94ea65243c 100644 --- a/arch/x86/kvm/mmu/tdp_iter.c +++ b/arch/x86/kvm/mmu/tdp_iter.c @@ -20,6 +20,21 @@ static gfn_t round_gfn_for_level(gfn_t gfn, int level) return gfn & -KVM_PAGES_PER_HPAGE(level); } +/* + * Return the TDP iterator to the root PT and allow it to continue its + * traversal over the paging structure from there. + */ +void tdp_iter_restart(struct tdp_iter *iter) +{ + iter->yielded_gfn = iter->next_last_level_gfn; + iter->level = iter->root_level; + + iter->gfn = round_gfn_for_level(iter->next_last_level_gfn, iter->level); + tdp_iter_refresh_sptep(iter); + + iter->valid = true; +} + /* * Sets a TDP iterator to walk a pre-order traversal of the paging structure * rooted at root_pt, starting with the walk to translate next_last_level_gfn. @@ -31,16 +46,11 @@ void tdp_iter_start(struct tdp_iter *iter, u64 *root_pt, int root_level, WARN_ON(root_level > PT64_ROOT_MAX_LEVEL); iter->next_last_level_gfn = next_last_level_gfn; - iter->yielded_gfn = iter->next_last_level_gfn; iter->root_level = root_level; iter->min_level = min_level; - iter->level = root_level; - iter->pt_path[iter->level - 1] = (tdp_ptep_t)root_pt; + iter->pt_path[iter->root_level - 1] = (tdp_ptep_t)root_pt; - iter->gfn = round_gfn_for_level(iter->next_last_level_gfn, iter->level); - tdp_iter_refresh_sptep(iter); - - iter->valid = true; + tdp_iter_restart(iter); } /* diff --git a/arch/x86/kvm/mmu/tdp_iter.h b/arch/x86/kvm/mmu/tdp_iter.h index 4cc177d75c4ae..8eb424d17c915 100644 --- a/arch/x86/kvm/mmu/tdp_iter.h +++ b/arch/x86/kvm/mmu/tdp_iter.h @@ -63,5 +63,6 @@ void tdp_iter_start(struct tdp_iter *iter, u64 *root_pt, int root_level, int min_level, gfn_t next_last_level_gfn); void tdp_iter_next(struct tdp_iter *iter); tdp_ptep_t tdp_iter_root_pt(struct tdp_iter *iter); +void tdp_iter_restart(struct tdp_iter *iter); #endif /* __KVM_X86_MMU_TDP_ITER_H */ diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 946da74e069cf..38b6b6936171b 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -664,9 +664,7 @@ static inline bool tdp_mmu_iter_cond_resched(struct kvm *kvm, WARN_ON(iter->gfn > iter->next_last_level_gfn); - tdp_iter_start(iter, iter->pt_path[iter->root_level - 1], - iter->root_level, iter->min_level, - iter->next_last_level_gfn); + tdp_iter_restart(iter); return true; } -- GitLab From 08889894cc82bc3b213bdb192f274358e5a6b78d Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 15 Mar 2021 16:38:03 -0700 Subject: [PATCH 738/839] KVM: x86/mmu: Store the address space ID in the TDP iterator Store the address space ID in the TDP iterator so that it can be retrieved without having to bounce through the root shadow page. This streamlines the code and fixes a Sparse warning about not properly using rcu_dereference() when grabbing the ID from the root on the fly. Reported-by: kernel test robot Signed-off-by: Sean Christopherson Signed-off-by: Ben Gardon Message-Id: <20210315233803.2706477-5-bgardon@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu_internal.h | 5 +++++ arch/x86/kvm/mmu/tdp_iter.c | 6 +----- arch/x86/kvm/mmu/tdp_iter.h | 3 ++- arch/x86/kvm/mmu/tdp_mmu.c | 23 +++++------------------ 4 files changed, 13 insertions(+), 24 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h index ec4fc28b325a0..1f6f98c76bdf1 100644 --- a/arch/x86/kvm/mmu/mmu_internal.h +++ b/arch/x86/kvm/mmu/mmu_internal.h @@ -78,6 +78,11 @@ static inline struct kvm_mmu_page *sptep_to_sp(u64 *sptep) return to_shadow_page(__pa(sptep)); } +static inline int kvm_mmu_page_as_id(struct kvm_mmu_page *sp) +{ + return sp->role.smm ? 1 : 0; +} + static inline bool kvm_vcpu_ad_need_write_protect(struct kvm_vcpu *vcpu) { /* diff --git a/arch/x86/kvm/mmu/tdp_iter.c b/arch/x86/kvm/mmu/tdp_iter.c index f7f94ea65243c..b3ed302c1a359 100644 --- a/arch/x86/kvm/mmu/tdp_iter.c +++ b/arch/x86/kvm/mmu/tdp_iter.c @@ -49,6 +49,7 @@ void tdp_iter_start(struct tdp_iter *iter, u64 *root_pt, int root_level, iter->root_level = root_level; iter->min_level = min_level; iter->pt_path[iter->root_level - 1] = (tdp_ptep_t)root_pt; + iter->as_id = kvm_mmu_page_as_id(sptep_to_sp(root_pt)); tdp_iter_restart(iter); } @@ -169,8 +170,3 @@ void tdp_iter_next(struct tdp_iter *iter) iter->valid = false; } -tdp_ptep_t tdp_iter_root_pt(struct tdp_iter *iter) -{ - return iter->pt_path[iter->root_level - 1]; -} - diff --git a/arch/x86/kvm/mmu/tdp_iter.h b/arch/x86/kvm/mmu/tdp_iter.h index 8eb424d17c915..b1748b988d3ae 100644 --- a/arch/x86/kvm/mmu/tdp_iter.h +++ b/arch/x86/kvm/mmu/tdp_iter.h @@ -36,6 +36,8 @@ struct tdp_iter { int min_level; /* The iterator's current level within the paging structure */ int level; + /* The address space ID, i.e. SMM vs. regular. */ + int as_id; /* A snapshot of the value at sptep */ u64 old_spte; /* @@ -62,7 +64,6 @@ tdp_ptep_t spte_to_child_pt(u64 pte, int level); void tdp_iter_start(struct tdp_iter *iter, u64 *root_pt, int root_level, int min_level, gfn_t next_last_level_gfn); void tdp_iter_next(struct tdp_iter *iter); -tdp_ptep_t tdp_iter_root_pt(struct tdp_iter *iter); void tdp_iter_restart(struct tdp_iter *iter); #endif /* __KVM_X86_MMU_TDP_ITER_H */ diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 38b6b6936171b..462b1f71c77f9 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -203,11 +203,6 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, u64 old_spte, u64 new_spte, int level, bool shared); -static int kvm_mmu_page_as_id(struct kvm_mmu_page *sp) -{ - return sp->role.smm ? 1 : 0; -} - static void handle_changed_spte_acc_track(u64 old_spte, u64 new_spte, int level) { bool pfn_changed = spte_to_pfn(old_spte) != spte_to_pfn(new_spte); @@ -497,10 +492,6 @@ static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm, struct tdp_iter *iter, u64 new_spte) { - u64 *root_pt = tdp_iter_root_pt(iter); - struct kvm_mmu_page *root = sptep_to_sp(root_pt); - int as_id = kvm_mmu_page_as_id(root); - lockdep_assert_held_read(&kvm->mmu_lock); /* @@ -514,8 +505,8 @@ static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm, new_spte) != iter->old_spte) return false; - handle_changed_spte(kvm, as_id, iter->gfn, iter->old_spte, new_spte, - iter->level, true); + handle_changed_spte(kvm, iter->as_id, iter->gfn, iter->old_spte, + new_spte, iter->level, true); return true; } @@ -569,10 +560,6 @@ static inline void __tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter, u64 new_spte, bool record_acc_track, bool record_dirty_log) { - tdp_ptep_t root_pt = tdp_iter_root_pt(iter); - struct kvm_mmu_page *root = sptep_to_sp(root_pt); - int as_id = kvm_mmu_page_as_id(root); - lockdep_assert_held_write(&kvm->mmu_lock); /* @@ -586,13 +573,13 @@ static inline void __tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter, WRITE_ONCE(*rcu_dereference(iter->sptep), new_spte); - __handle_changed_spte(kvm, as_id, iter->gfn, iter->old_spte, new_spte, - iter->level, false); + __handle_changed_spte(kvm, iter->as_id, iter->gfn, iter->old_spte, + new_spte, iter->level, false); if (record_acc_track) handle_changed_spte_acc_track(iter->old_spte, new_spte, iter->level); if (record_dirty_log) - handle_changed_spte_dirty_log(kvm, as_id, iter->gfn, + handle_changed_spte_dirty_log(kvm, iter->as_id, iter->gfn, iter->old_spte, new_spte, iter->level); } -- GitLab From 34e49994d0dcdb2d31d4d2908d04f4e9ce57e4d7 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Mon, 15 Mar 2021 15:18:24 +0100 Subject: [PATCH 739/839] btrfs: fix slab cache flags for free space tree bitmap The free space tree bitmap slab cache is created with SLAB_RED_ZONE but that's a debugging flag and not always enabled. Also the other slabs are created with at least SLAB_MEM_SPREAD that we want as well to average the memory placement cost. Reported-by: Vlastimil Babka Fixes: 3acd48507dc4 ("btrfs: fix allocation of free space cache v1 bitmap pages") CC: stable@vger.kernel.org # 5.4+ Signed-off-by: David Sterba --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ea5ede619220c..5092dea21448c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -9005,7 +9005,7 @@ int __init btrfs_init_cachep(void) btrfs_free_space_bitmap_cachep = kmem_cache_create("btrfs_free_space_bitmap", PAGE_SIZE, PAGE_SIZE, - SLAB_RED_ZONE, NULL); + SLAB_MEM_SPREAD, NULL); if (!btrfs_free_space_bitmap_cachep) goto fail; -- GitLab From dbcc7d57bffc0c8cac9dac11bec548597d59a6a5 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 11 Mar 2021 14:31:05 +0000 Subject: [PATCH 740/839] btrfs: fix race when cloning extent buffer during rewind of an old root While resolving backreferences, as part of a logical ino ioctl call or fiemap, we can end up hitting a BUG_ON() when replaying tree mod log operations of a root, triggering a stack trace like the following: ------------[ cut here ]------------ kernel BUG at fs/btrfs/ctree.c:1210! invalid opcode: 0000 [#1] SMP KASAN PTI CPU: 1 PID: 19054 Comm: crawl_335 Tainted: G W 5.11.0-2d11c0084b02-misc-next+ #89 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014 RIP: 0010:__tree_mod_log_rewind+0x3b1/0x3c0 Code: 05 48 8d 74 10 (...) RSP: 0018:ffffc90001eb70b8 EFLAGS: 00010297 RAX: 0000000000000000 RBX: ffff88812344e400 RCX: ffffffffb28933b6 RDX: 0000000000000007 RSI: dffffc0000000000 RDI: ffff88812344e42c RBP: ffffc90001eb7108 R08: 1ffff11020b60a20 R09: ffffed1020b60a20 R10: ffff888105b050f9 R11: ffffed1020b60a1f R12: 00000000000000ee R13: ffff8880195520c0 R14: ffff8881bc958500 R15: ffff88812344e42c FS: 00007fd1955e8700(0000) GS:ffff8881f5600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007efdb7928718 CR3: 000000010103a006 CR4: 0000000000170ee0 Call Trace: btrfs_search_old_slot+0x265/0x10d0 ? lock_acquired+0xbb/0x600 ? btrfs_search_slot+0x1090/0x1090 ? free_extent_buffer.part.61+0xd7/0x140 ? free_extent_buffer+0x13/0x20 resolve_indirect_refs+0x3e9/0xfc0 ? lock_downgrade+0x3d0/0x3d0 ? __kasan_check_read+0x11/0x20 ? add_prelim_ref.part.11+0x150/0x150 ? lock_downgrade+0x3d0/0x3d0 ? __kasan_check_read+0x11/0x20 ? lock_acquired+0xbb/0x600 ? __kasan_check_write+0x14/0x20 ? do_raw_spin_unlock+0xa8/0x140 ? rb_insert_color+0x30/0x360 ? prelim_ref_insert+0x12d/0x430 find_parent_nodes+0x5c3/0x1830 ? resolve_indirect_refs+0xfc0/0xfc0 ? lock_release+0xc8/0x620 ? fs_reclaim_acquire+0x67/0xf0 ? lock_acquire+0xc7/0x510 ? lock_downgrade+0x3d0/0x3d0 ? lockdep_hardirqs_on_prepare+0x160/0x210 ? lock_release+0xc8/0x620 ? fs_reclaim_acquire+0x67/0xf0 ? lock_acquire+0xc7/0x510 ? poison_range+0x38/0x40 ? unpoison_range+0x14/0x40 ? trace_hardirqs_on+0x55/0x120 btrfs_find_all_roots_safe+0x142/0x1e0 ? find_parent_nodes+0x1830/0x1830 ? btrfs_inode_flags_to_xflags+0x50/0x50 iterate_extent_inodes+0x20e/0x580 ? tree_backref_for_extent+0x230/0x230 ? lock_downgrade+0x3d0/0x3d0 ? read_extent_buffer+0xdd/0x110 ? lock_downgrade+0x3d0/0x3d0 ? __kasan_check_read+0x11/0x20 ? lock_acquired+0xbb/0x600 ? __kasan_check_write+0x14/0x20 ? _raw_spin_unlock+0x22/0x30 ? __kasan_check_write+0x14/0x20 iterate_inodes_from_logical+0x129/0x170 ? iterate_inodes_from_logical+0x129/0x170 ? btrfs_inode_flags_to_xflags+0x50/0x50 ? iterate_extent_inodes+0x580/0x580 ? __vmalloc_node+0x92/0xb0 ? init_data_container+0x34/0xb0 ? init_data_container+0x34/0xb0 ? kvmalloc_node+0x60/0x80 btrfs_ioctl_logical_to_ino+0x158/0x230 btrfs_ioctl+0x205e/0x4040 ? __might_sleep+0x71/0xe0 ? btrfs_ioctl_get_supported_features+0x30/0x30 ? getrusage+0x4b6/0x9c0 ? __kasan_check_read+0x11/0x20 ? lock_release+0xc8/0x620 ? __might_fault+0x64/0xd0 ? lock_acquire+0xc7/0x510 ? lock_downgrade+0x3d0/0x3d0 ? lockdep_hardirqs_on_prepare+0x210/0x210 ? lockdep_hardirqs_on_prepare+0x210/0x210 ? __kasan_check_read+0x11/0x20 ? do_vfs_ioctl+0xfc/0x9d0 ? ioctl_file_clone+0xe0/0xe0 ? lock_downgrade+0x3d0/0x3d0 ? lockdep_hardirqs_on_prepare+0x210/0x210 ? __kasan_check_read+0x11/0x20 ? lock_release+0xc8/0x620 ? __task_pid_nr_ns+0xd3/0x250 ? lock_acquire+0xc7/0x510 ? __fget_files+0x160/0x230 ? __fget_light+0xf2/0x110 __x64_sys_ioctl+0xc3/0x100 do_syscall_64+0x37/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7fd1976e2427 Code: 00 00 90 48 8b 05 (...) RSP: 002b:00007fd1955e5cf8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00007fd1955e5f40 RCX: 00007fd1976e2427 RDX: 00007fd1955e5f48 RSI: 00000000c038943b RDI: 0000000000000004 RBP: 0000000001000000 R08: 0000000000000000 R09: 00007fd1955e6120 R10: 0000557835366b00 R11: 0000000000000246 R12: 0000000000000004 R13: 00007fd1955e5f48 R14: 00007fd1955e5f40 R15: 00007fd1955e5ef8 Modules linked in: ---[ end trace ec8931a1c36e57be ]--- (gdb) l *(__tree_mod_log_rewind+0x3b1) 0xffffffff81893521 is in __tree_mod_log_rewind (fs/btrfs/ctree.c:1210). 1205 * the modification. as we're going backwards, we do the 1206 * opposite of each operation here. 1207 */ 1208 switch (tm->op) { 1209 case MOD_LOG_KEY_REMOVE_WHILE_FREEING: 1210 BUG_ON(tm->slot < n); 1211 fallthrough; 1212 case MOD_LOG_KEY_REMOVE_WHILE_MOVING: 1213 case MOD_LOG_KEY_REMOVE: 1214 btrfs_set_node_key(eb, &tm->key, tm->slot); Here's what happens to hit that BUG_ON(): 1) We have one tree mod log user (through fiemap or the logical ino ioctl), with a sequence number of 1, so we have fs_info->tree_mod_seq == 1; 2) Another task is at ctree.c:balance_level() and we have eb X currently as the root of the tree, and we promote its single child, eb Y, as the new root. Then, at ctree.c:balance_level(), we call: tree_mod_log_insert_root(eb X, eb Y, 1); 3) At tree_mod_log_insert_root() we create tree mod log elements for each slot of eb X, of operation type MOD_LOG_KEY_REMOVE_WHILE_FREEING each with a ->logical pointing to ebX->start. These are placed in an array named tm_list. Lets assume there are N elements (N pointers in eb X); 4) Then, still at tree_mod_log_insert_root(), we create a tree mod log element of operation type MOD_LOG_ROOT_REPLACE, ->logical set to ebY->start, ->old_root.logical set to ebX->start, ->old_root.level set to the level of eb X and ->generation set to the generation of eb X; 5) Then tree_mod_log_insert_root() calls tree_mod_log_free_eb() with tm_list as argument. After that, tree_mod_log_free_eb() calls __tree_mod_log_insert() for each member of tm_list in reverse order, from highest slot in eb X, slot N - 1, to slot 0 of eb X; 6) __tree_mod_log_insert() sets the sequence number of each given tree mod log operation - it increments fs_info->tree_mod_seq and sets fs_info->tree_mod_seq as the sequence number of the given tree mod log operation. This means that for the tm_list created at tree_mod_log_insert_root(), the element corresponding to slot 0 of eb X has the highest sequence number (1 + N), and the element corresponding to the last slot has the lowest sequence number (2); 7) Then, after inserting tm_list's elements into the tree mod log rbtree, the MOD_LOG_ROOT_REPLACE element is inserted, which gets the highest sequence number, which is N + 2; 8) Back to ctree.c:balance_level(), we free eb X by calling btrfs_free_tree_block() on it. Because eb X was created in the current transaction, has no other references and writeback did not happen for it, we add it back to the free space cache/tree; 9) Later some other task T allocates the metadata extent from eb X, since it is marked as free space in the space cache/tree, and uses it as a node for some other btree; 10) The tree mod log user task calls btrfs_search_old_slot(), which calls get_old_root(), and finally that calls __tree_mod_log_oldest_root() with time_seq == 1 and eb_root == eb Y; 11) First iteration of the while loop finds the tree mod log element with sequence number N + 2, for the logical address of eb Y and of type MOD_LOG_ROOT_REPLACE; 12) Because the operation type is MOD_LOG_ROOT_REPLACE, we don't break out of the loop, and set root_logical to point to tm->old_root.logical which corresponds to the logical address of eb X; 13) On the next iteration of the while loop, the call to tree_mod_log_search_oldest() returns the smallest tree mod log element for the logical address of eb X, which has a sequence number of 2, an operation type of MOD_LOG_KEY_REMOVE_WHILE_FREEING and corresponds to the old slot N - 1 of eb X (eb X had N items in it before being freed); 14) We then break out of the while loop and return the tree mod log operation of type MOD_LOG_ROOT_REPLACE (eb Y), and not the one for slot N - 1 of eb X, to get_old_root(); 15) At get_old_root(), we process the MOD_LOG_ROOT_REPLACE operation and set "logical" to the logical address of eb X, which was the old root. We then call tree_mod_log_search() passing it the logical address of eb X and time_seq == 1; 16) Then before calling tree_mod_log_search(), task T adds a key to eb X, which results in adding a tree mod log operation of type MOD_LOG_KEY_ADD to the tree mod log - this is done at ctree.c:insert_ptr() - but after adding the tree mod log operation and before updating the number of items in eb X from 0 to 1... 17) The task at get_old_root() calls tree_mod_log_search() and gets the tree mod log operation of type MOD_LOG_KEY_ADD just added by task T. Then it enters the following if branch: if (old_root && tm && tm->op != MOD_LOG_KEY_REMOVE_WHILE_FREEING) { (...) } (...) Calls read_tree_block() for eb X, which gets a reference on eb X but does not lock it - task T has it locked. Then it clones eb X while it has nritems set to 0 in its header, before task T sets nritems to 1 in eb X's header. From hereupon we use the clone of eb X which no other task has access to; 18) Then we call __tree_mod_log_rewind(), passing it the MOD_LOG_KEY_ADD mod log operation we just got from tree_mod_log_search() in the previous step and the cloned version of eb X; 19) At __tree_mod_log_rewind(), we set the local variable "n" to the number of items set in eb X's clone, which is 0. Then we enter the while loop, and in its first iteration we process the MOD_LOG_KEY_ADD operation, which just decrements "n" from 0 to (u32)-1, since "n" is declared with a type of u32. At the end of this iteration we call rb_next() to find the next tree mod log operation for eb X, that gives us the mod log operation of type MOD_LOG_KEY_REMOVE_WHILE_FREEING, for slot 0, with a sequence number of N + 1 (steps 3 to 6); 20) Then we go back to the top of the while loop and trigger the following BUG_ON(): (...) switch (tm->op) { case MOD_LOG_KEY_REMOVE_WHILE_FREEING: BUG_ON(tm->slot < n); fallthrough; (...) Because "n" has a value of (u32)-1 (4294967295) and tm->slot is 0. Fix this by taking a read lock on the extent buffer before cloning it at ctree.c:get_old_root(). This should be done regardless of the extent buffer having been freed and reused, as a concurrent task might be modifying it (while holding a write lock on it). Reported-by: Zygo Blaxell Link: https://lore.kernel.org/linux-btrfs/20210227155037.GN28049@hungrycats.org/ Fixes: 834328a8493079 ("Btrfs: tree mod log's old roots could still be part of the tree") CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index d56730a678853..34b929bd5c1af 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1365,7 +1365,9 @@ get_old_root(struct btrfs_root *root, u64 time_seq) "failed to read tree block %llu from get_old_root", logical); } else { + btrfs_tree_read_lock(old); eb = btrfs_clone_extent_buffer(old); + btrfs_tree_read_unlock(old); free_extent_buffer(old); } } else if (old_root) { -- GitLab From 485df75554257e883d0ce39bb886e8212349748e Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 11 Mar 2021 14:31:06 +0000 Subject: [PATCH 741/839] btrfs: always pin deleted leaves when there are active tree mod log users When freeing a tree block we may end up adding its extent back to the free space cache/tree, as long as there are no more references for it, it was created in the current transaction and writeback for it never happened. This is generally fine, however when we have tree mod log operations it can result in inconsistent versions of a btree after unwinding extent buffers with the recorded tree mod log operations. This is because: * We only log operations for nodes (adding and removing key/pointers), for leaves we don't do anything; * This means that we can log a MOD_LOG_KEY_REMOVE_WHILE_FREEING operation for a node that points to a leaf that was deleted; * Before we apply the logged operation to unwind a node, we can have that leaf's extent allocated again, either as a node or as a leaf, and possibly for another btree. This is possible if the leaf was created in the current transaction and writeback for it never started, in which case btrfs_free_tree_block() returns its extent back to the free space cache/tree; * Then, before applying the tree mod log operation, some task allocates the metadata extent just freed before, and uses it either as a leaf or as a node for some btree (can be the same or another one, it does not matter); * After applying the MOD_LOG_KEY_REMOVE_WHILE_FREEING operation we now get the target node with an item pointing to the metadata extent that now has content different from what it had before the leaf was deleted. It might now belong to a different btree and be a node and not a leaf anymore. As a consequence, the results of searches after the unwinding can be unpredictable and produce unexpected results. So make sure we pin extent buffers corresponding to leaves when there are tree mod log users. CC: stable@vger.kernel.org # 4.14+ Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 78ad31a59e59e..36a3c973fda10 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3323,6 +3323,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, if (last_ref && btrfs_header_generation(buf) == trans->transid) { struct btrfs_block_group *cache; + bool must_pin = false; if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { ret = check_ref_cleanup(trans, buf->start); @@ -3340,7 +3341,27 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, goto out; } - if (btrfs_is_zoned(fs_info)) { + /* + * If this is a leaf and there are tree mod log users, we may + * have recorded mod log operations that point to this leaf. + * So we must make sure no one reuses this leaf's extent before + * mod log operations are applied to a node, otherwise after + * rewinding a node using the mod log operations we get an + * inconsistent btree, as the leaf's extent may now be used as + * a node or leaf for another different btree. + * We are safe from races here because at this point no other + * node or root points to this extent buffer, so if after this + * check a new tree mod log user joins, it will not be able to + * find a node pointing to this leaf and record operations that + * point to this leaf. + */ + if (btrfs_header_level(buf) == 0) { + read_lock(&fs_info->tree_mod_log_lock); + must_pin = !list_empty(&fs_info->tree_mod_seq_list); + read_unlock(&fs_info->tree_mod_log_lock); + } + + if (must_pin || btrfs_is_zoned(fs_info)) { btrfs_redirty_list_add(trans->transaction, buf); pin_down_extent(trans, cache, buf->start, buf->len, 1); btrfs_put_block_group(cache); -- GitLab From d88d05a9e0b6d9356e97129d4ff9942d765f46ea Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 12 Mar 2021 05:21:37 -0800 Subject: [PATCH 742/839] perf/x86/intel: Fix a crash caused by zero PEBS status A repeatable crash can be triggered by the perf_fuzzer on some Haswell system. https://lore.kernel.org/lkml/7170d3b-c17f-1ded-52aa-cc6d9ae999f4@maine.edu/ For some old CPUs (HSW and earlier), the PEBS status in a PEBS record may be mistakenly set to 0. To minimize the impact of the defect, the commit was introduced to try to avoid dropping the PEBS record for some cases. It adds a check in the intel_pmu_drain_pebs_nhm(), and updates the local pebs_status accordingly. However, it doesn't correct the PEBS status in the PEBS record, which may trigger the crash, especially for the large PEBS. It's possible that all the PEBS records in a large PEBS have the PEBS status 0. If so, the first get_next_pebs_record_by_bit() in the __intel_pmu_pebs_event() returns NULL. The at = NULL. Since it's a large PEBS, the 'count' parameter must > 1. The second get_next_pebs_record_by_bit() will crash. Besides the local pebs_status, correct the PEBS status in the PEBS record as well. Fixes: 01330d7288e0 ("perf/x86: Allow zero PEBS status with only single active event") Reported-by: Vince Weaver Suggested-by: Peter Zijlstra (Intel) Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/1615555298-140216-1-git-send-email-kan.liang@linux.intel.com --- arch/x86/events/intel/ds.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 7ebae18264033..d32b302719fe5 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -2010,7 +2010,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d */ if (!pebs_status && cpuc->pebs_enabled && !(cpuc->pebs_enabled & (cpuc->pebs_enabled-1))) - pebs_status = cpuc->pebs_enabled; + pebs_status = p->status = cpuc->pebs_enabled; bit = find_first_bit((unsigned long *)&pebs_status, x86_pmu.max_pebs_events); -- GitLab From 2dc0572f2cef87425147658698dce2600b799bd3 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 12 Mar 2021 05:21:38 -0800 Subject: [PATCH 743/839] perf/x86/intel: Fix unchecked MSR access error caused by VLBR_EVENT On a Haswell machine, the perf_fuzzer managed to trigger this message: [117248.075892] unchecked MSR access error: WRMSR to 0x3f1 (tried to write 0x0400000000000000) at rIP: 0xffffffff8106e4f4 (native_write_msr+0x4/0x20) [117248.089957] Call Trace: [117248.092685] intel_pmu_pebs_enable_all+0x31/0x40 [117248.097737] intel_pmu_enable_all+0xa/0x10 [117248.102210] __perf_event_task_sched_in+0x2df/0x2f0 [117248.107511] finish_task_switch.isra.0+0x15f/0x280 [117248.112765] schedule_tail+0xc/0x40 [117248.116562] ret_from_fork+0x8/0x30 A fake event called VLBR_EVENT may use the bit 58 of the PEBS_ENABLE, if the precise_ip is set. The bit 58 is reserved by the HW. Accessing the bit causes the unchecked MSR access error. The fake event doesn't support PEBS. The case should be rejected. Fixes: 097e4311cda9 ("perf/x86: Add constraint to create guest LBR event without hw counter") Reported-by: Vince Weaver Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/1615555298-140216-2-git-send-email-kan.liang@linux.intel.com --- arch/x86/events/intel/core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 7bbb5bb98d8cd..37ce38403cb8d 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3659,6 +3659,9 @@ static int intel_pmu_hw_config(struct perf_event *event) return ret; if (event->attr.precise_ip) { + if ((event->attr.config & INTEL_ARCH_EVENT_MASK) == INTEL_FIXED_VLBR_EVENT) + return -EINVAL; + if (!(event->attr.freq || (event->attr.wakeup_events && !event->attr.watermark))) { event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD; if (!(event->attr.sample_type & -- GitLab From 5abbe51a526253b9f003e9a0a195638dc882d660 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 1 Feb 2021 18:46:41 +0100 Subject: [PATCH 744/839] kernel, fs: Introduce and use set_restart_fn() and arch_set_restart_data() Preparation for fixing get_nr_restart_syscall() on X86 for COMPAT. Add a new helper which sets restart_block->fn and calls a dummy arch_set_restart_data() helper. Fixes: 609c19a385c8 ("x86/ptrace: Stop setting TS_COMPAT in ptrace code") Signed-off-by: Oleg Nesterov Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20210201174641.GA17871@redhat.com --- fs/select.c | 10 ++++------ include/linux/thread_info.h | 13 +++++++++++++ kernel/futex.c | 3 +-- kernel/time/alarmtimer.c | 2 +- kernel/time/hrtimer.c | 2 +- kernel/time/posix-cpu-timers.c | 2 +- 6 files changed, 21 insertions(+), 11 deletions(-) diff --git a/fs/select.c b/fs/select.c index 37aaa8317f3ae..945896d0ac9e7 100644 --- a/fs/select.c +++ b/fs/select.c @@ -1055,10 +1055,9 @@ static long do_restart_poll(struct restart_block *restart_block) ret = do_sys_poll(ufds, nfds, to); - if (ret == -ERESTARTNOHAND) { - restart_block->fn = do_restart_poll; - ret = -ERESTART_RESTARTBLOCK; - } + if (ret == -ERESTARTNOHAND) + ret = set_restart_fn(restart_block, do_restart_poll); + return ret; } @@ -1080,7 +1079,6 @@ SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds, struct restart_block *restart_block; restart_block = ¤t->restart_block; - restart_block->fn = do_restart_poll; restart_block->poll.ufds = ufds; restart_block->poll.nfds = nfds; @@ -1091,7 +1089,7 @@ SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds, } else restart_block->poll.has_timeout = 0; - ret = -ERESTART_RESTARTBLOCK; + ret = set_restart_fn(restart_block, do_restart_poll); } return ret; } diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 9b2158c69275e..157762db9d4bf 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -11,6 +11,7 @@ #include #include #include +#include #ifdef CONFIG_THREAD_INFO_IN_TASK /* @@ -59,6 +60,18 @@ enum syscall_work_bit { #ifdef __KERNEL__ +#ifndef arch_set_restart_data +#define arch_set_restart_data(restart) do { } while (0) +#endif + +static inline long set_restart_fn(struct restart_block *restart, + long (*fn)(struct restart_block *)) +{ + restart->fn = fn; + arch_set_restart_data(restart); + return -ERESTART_RESTARTBLOCK; +} + #ifndef THREAD_ALIGN #define THREAD_ALIGN THREAD_SIZE #endif diff --git a/kernel/futex.c b/kernel/futex.c index e68db77450392..00febd6dea9cc 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -2728,14 +2728,13 @@ retry: goto out; restart = ¤t->restart_block; - restart->fn = futex_wait_restart; restart->futex.uaddr = uaddr; restart->futex.val = val; restart->futex.time = *abs_time; restart->futex.bitset = bitset; restart->futex.flags = flags | FLAGS_HAS_TIMEOUT; - ret = -ERESTART_RESTARTBLOCK; + ret = set_restart_fn(restart, futex_wait_restart); out: if (to) { diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 98d7a15e8cf69..4d94e2b5499d8 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -854,9 +854,9 @@ static int alarm_timer_nsleep(const clockid_t which_clock, int flags, if (flags == TIMER_ABSTIME) return -ERESTARTNOHAND; - restart->fn = alarm_timer_nsleep_restart; restart->nanosleep.clockid = type; restart->nanosleep.expires = exp; + set_restart_fn(restart, alarm_timer_nsleep_restart); return ret; } diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 788b9d137de4c..5c9d968187ae8 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1957,9 +1957,9 @@ long hrtimer_nanosleep(ktime_t rqtp, const enum hrtimer_mode mode, } restart = ¤t->restart_block; - restart->fn = hrtimer_nanosleep_restart; restart->nanosleep.clockid = t.timer.base->clockid; restart->nanosleep.expires = hrtimer_get_expires_tv64(&t.timer); + set_restart_fn(restart, hrtimer_nanosleep_restart); out: destroy_hrtimer_on_stack(&t.timer); return ret; diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index a71758e34e456..9abe15255bc4e 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -1480,8 +1480,8 @@ static int posix_cpu_nsleep(const clockid_t which_clock, int flags, if (flags & TIMER_ABSTIME) return -ERESTARTNOHAND; - restart_block->fn = posix_cpu_nsleep_restart; restart_block->nanosleep.clockid = which_clock; + set_restart_fn(restart_block, posix_cpu_nsleep_restart); } return error; } -- GitLab From 66c1b6d74cd7035e85c426f0af4aede19e805c8a Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 1 Feb 2021 18:46:49 +0100 Subject: [PATCH 745/839] x86: Move TS_COMPAT back to asm/thread_info.h Move TS_COMPAT back to asm/thread_info.h, close to TS_I386_REGS_POKED. It was moved to asm/processor.h by b9d989c7218a ("x86/asm: Move the thread_info::status field to thread_struct"), then later 37a8f7c38339 ("x86/asm: Move 'status' from thread_struct to thread_info") moved the 'status' field back but TS_COMPAT was forgotten. Preparatory patch to fix the COMPAT case for get_nr_restart_syscall() Fixes: 609c19a385c8 ("x86/ptrace: Stop setting TS_COMPAT in ptrace code") Signed-off-by: Oleg Nesterov Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20210201174649.GA17880@redhat.com --- arch/x86/include/asm/processor.h | 9 --------- arch/x86/include/asm/thread_info.h | 9 +++++++++ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index dc6d149bf851f..f1b9ed5efaa90 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -551,15 +551,6 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset, *size = fpu_kernel_xstate_size; } -/* - * Thread-synchronous status. - * - * This is different from the flags in that nobody else - * ever touches our thread-synchronous status, so we don't - * have to worry about atomic accesses. - */ -#define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/ - static inline void native_load_sp0(unsigned long sp0) { diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 0d751d5da702e..c2dc29e215eab 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -205,6 +205,15 @@ static inline int arch_within_stack_frames(const void * const stack, #endif +/* + * Thread-synchronous status. + * + * This is different from the flags in that nobody else + * ever touches our thread-synchronous status, so we don't + * have to worry about atomic accesses. + */ +#define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/ + #ifdef CONFIG_COMPAT #define TS_I386_REGS_POKED 0x0004 /* regs poked by 32-bit ptracer */ #endif -- GitLab From 8c150ba2fb5995c84a7a43848250d444a3329a7d Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 1 Feb 2021 18:47:09 +0100 Subject: [PATCH 746/839] x86: Introduce TS_COMPAT_RESTART to fix get_nr_restart_syscall() The comment in get_nr_restart_syscall() says: * The problem is that we can get here when ptrace pokes * syscall-like values into regs even if we're not in a syscall * at all. Yes, but if not in a syscall then the status & (TS_COMPAT|TS_I386_REGS_POKED) check below can't really help: - TS_COMPAT can't be set - TS_I386_REGS_POKED is only set if regs->orig_ax was changed by 32bit debugger; and even in this case get_nr_restart_syscall() is only correct if the tracee is 32bit too. Suppose that a 64bit debugger plays with a 32bit tracee and * Tracee calls sleep(2) // TS_COMPAT is set * User interrupts the tracee by CTRL-C after 1 sec and does "(gdb) call func()" * gdb saves the regs by PTRACE_GETREGS * does PTRACE_SETREGS to set %rip='func' and %orig_rax=-1 * PTRACE_CONT // TS_COMPAT is cleared * func() hits int3. * Debugger catches SIGTRAP. * Restore original regs by PTRACE_SETREGS. * PTRACE_CONT get_nr_restart_syscall() wrongly returns __NR_restart_syscall==219, the tracee calls ia32_sys_call_table[219] == sys_madvise. Add the sticky TS_COMPAT_RESTART flag which survives after return to user mode. It's going to be removed in the next step again by storing the information in the restart block. As a further cleanup it might be possible to remove also TS_I386_REGS_POKED with that. Test-case: $ cvs -d :pserver:anoncvs:anoncvs@sourceware.org:/cvs/systemtap co ptrace-tests $ gcc -o erestartsys-trap-debuggee ptrace-tests/tests/erestartsys-trap-debuggee.c --m32 $ gcc -o erestartsys-trap-debugger ptrace-tests/tests/erestartsys-trap-debugger.c -lutil $ ./erestartsys-trap-debugger Unexpected: retval 1, errno 22 erestartsys-trap-debugger: ptrace-tests/tests/erestartsys-trap-debugger.c:421 Fixes: 609c19a385c8 ("x86/ptrace: Stop setting TS_COMPAT in ptrace code") Reported-by: Jan Kratochvil Signed-off-by: Oleg Nesterov Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20210201174709.GA17895@redhat.com --- arch/x86/include/asm/thread_info.h | 14 +++++++++++++- arch/x86/kernel/signal.c | 24 +----------------------- 2 files changed, 14 insertions(+), 24 deletions(-) diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index c2dc29e215eab..30d1d187019f8 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -214,10 +214,22 @@ static inline int arch_within_stack_frames(const void * const stack, */ #define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/ +#ifndef __ASSEMBLY__ #ifdef CONFIG_COMPAT #define TS_I386_REGS_POKED 0x0004 /* regs poked by 32-bit ptracer */ +#define TS_COMPAT_RESTART 0x0008 + +#define arch_set_restart_data arch_set_restart_data + +static inline void arch_set_restart_data(struct restart_block *restart) +{ + struct thread_info *ti = current_thread_info(); + if (ti->status & TS_COMPAT) + ti->status |= TS_COMPAT_RESTART; + else + ti->status &= ~TS_COMPAT_RESTART; +} #endif -#ifndef __ASSEMBLY__ #ifdef CONFIG_X86_32 #define in_ia32_syscall() true diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index ea794a083c44e..6c26d2c3a2e4c 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -766,30 +766,8 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs) static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs) { - /* - * This function is fundamentally broken as currently - * implemented. - * - * The idea is that we want to trigger a call to the - * restart_block() syscall and that we want in_ia32_syscall(), - * in_x32_syscall(), etc. to match whatever they were in the - * syscall being restarted. We assume that the syscall - * instruction at (regs->ip - 2) matches whatever syscall - * instruction we used to enter in the first place. - * - * The problem is that we can get here when ptrace pokes - * syscall-like values into regs even if we're not in a syscall - * at all. - * - * For now, we maintain historical behavior and guess based on - * stored state. We could do better by saving the actual - * syscall arch in restart_block or (with caveats on x32) by - * checking if regs->ip points to 'int $0x80'. The current - * behavior is incorrect if a tracer has a different bitness - * than the tracee. - */ #ifdef CONFIG_IA32_EMULATION - if (current_thread_info()->status & (TS_COMPAT|TS_I386_REGS_POKED)) + if (current_thread_info()->status & TS_COMPAT_RESTART) return __NR_ia32_restart_syscall; #endif #ifdef CONFIG_X86_X32_ABI -- GitLab From b2e9df850c58c2b36e915e7d3bed3f6107cccba6 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 1 Feb 2021 18:47:16 +0100 Subject: [PATCH 747/839] x86: Introduce restart_block->arch_data to remove TS_COMPAT_RESTART Save the current_thread_info()->status of X86 in the new restart_block->arch_data field so TS_COMPAT_RESTART can be removed again. Signed-off-by: Oleg Nesterov Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20210201174716.GA17898@redhat.com --- arch/x86/include/asm/thread_info.h | 12 ++---------- arch/x86/kernel/signal.c | 2 +- include/linux/restart_block.h | 1 + 3 files changed, 4 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 30d1d187019f8..06b740bae431d 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -217,18 +217,10 @@ static inline int arch_within_stack_frames(const void * const stack, #ifndef __ASSEMBLY__ #ifdef CONFIG_COMPAT #define TS_I386_REGS_POKED 0x0004 /* regs poked by 32-bit ptracer */ -#define TS_COMPAT_RESTART 0x0008 -#define arch_set_restart_data arch_set_restart_data +#define arch_set_restart_data(restart) \ + do { restart->arch_data = current_thread_info()->status; } while (0) -static inline void arch_set_restart_data(struct restart_block *restart) -{ - struct thread_info *ti = current_thread_info(); - if (ti->status & TS_COMPAT) - ti->status |= TS_COMPAT_RESTART; - else - ti->status &= ~TS_COMPAT_RESTART; -} #endif #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 6c26d2c3a2e4c..f306e85a08a64 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -767,7 +767,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs) static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs) { #ifdef CONFIG_IA32_EMULATION - if (current_thread_info()->status & TS_COMPAT_RESTART) + if (current->restart_block.arch_data & TS_COMPAT) return __NR_ia32_restart_syscall; #endif #ifdef CONFIG_X86_X32_ABI diff --git a/include/linux/restart_block.h b/include/linux/restart_block.h index bba2920e9c053..980a65594412d 100644 --- a/include/linux/restart_block.h +++ b/include/linux/restart_block.h @@ -23,6 +23,7 @@ enum timespec_type { * System call restart block. */ struct restart_block { + unsigned long arch_data; long (*fn)(struct restart_block *); union { /* For futex_wait and futex_wait_requeue_pi */ -- GitLab From 3f6c515d723480bc8afd456b0a52438fe79128a8 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Tue, 16 Mar 2021 15:45:15 +0000 Subject: [PATCH 748/839] MIPS: vmlinux.lds.S: Fix appended dtb not properly aligned MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 6654111c893f ("MIPS: vmlinux.lds.S: align raw appended dtb to 8 bytes") changed the alignment from STRUCT_ALIGNMENT bytes to 8 bytes. The commit's message makes it sound like it was actually done on purpose, but this is not the case. The commit was written when raw appended dtb were not aligned at all. The STRUCT_ALIGN() was added a few days before, in commit 7a05293af39f ("MIPS: boot/compressed: Copy DTB to aligned address"). The true purpose of the commit was not to align specifically to 8 bytes, but to make sure that the generated vmlinux' size was properly padded to the alignment required for DTBs. While the switch to 8-byte alignment worked for vmlinux-appended dtb blobs, it broke vmlinuz-appended dtb blobs, as the decompress routine moves the blob to a STRUCT_ALIGNMENT aligned address. Fix this by changing the raw appended dtb blob alignment from 8 bytes back to STRUCT_ALIGNMENT bytes in vmlinux.lds.S. Fixes: 6654111c893f ("MIPS: vmlinux.lds.S: align raw appended dtb to 8 bytes") Cc: Bjørn Mork Signed-off-by: Paul Cercueil Signed-off-by: Thomas Bogendoerfer --- arch/mips/kernel/vmlinux.lds.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S index 1234834cc4c44..1f98947fe715d 100644 --- a/arch/mips/kernel/vmlinux.lds.S +++ b/arch/mips/kernel/vmlinux.lds.S @@ -176,7 +176,7 @@ SECTIONS .fill : { FILL(0); BYTE(0); - . = ALIGN(8); + STRUCT_ALIGN(); } __appended_dtb = .; /* leave space for appended DTB */ -- GitLab From 6980d29ce4da223ad7f0751c7f1d61d3c6b54ab3 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 16 Mar 2021 20:30:26 +0800 Subject: [PATCH 749/839] zonefs: fix to update .i_wr_refcnt correctly in zonefs_open_zone() In zonefs_open_zone(), if opened zone count is larger than .s_max_open_zones threshold, we missed to recover .i_wr_refcnt, fix this. Fixes: b5c00e975779 ("zonefs: open/close zone on file open/close") Cc: Signed-off-by: Chao Yu Signed-off-by: Damien Le Moal --- fs/zonefs/super.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index 3427c99abb4d5..049e36c69ed70 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -1040,9 +1040,7 @@ static int zonefs_open_zone(struct inode *inode) mutex_lock(&zi->i_truncate_mutex); - zi->i_wr_refcnt++; - if (zi->i_wr_refcnt == 1) { - + if (!zi->i_wr_refcnt) { if (atomic_inc_return(&sbi->s_open_zones) > sbi->s_max_open_zones) { atomic_dec(&sbi->s_open_zones); ret = -EBUSY; @@ -1052,7 +1050,6 @@ static int zonefs_open_zone(struct inode *inode) if (i_size_read(inode) < zi->i_max_size) { ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_OPEN); if (ret) { - zi->i_wr_refcnt--; atomic_dec(&sbi->s_open_zones); goto unlock; } @@ -1060,6 +1057,8 @@ static int zonefs_open_zone(struct inode *inode) } } + zi->i_wr_refcnt++; + unlock: mutex_unlock(&zi->i_truncate_mutex); -- GitLab From 2db4215f47557703dade2baccfa8da7b7e42a7e4 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 10 Mar 2021 18:48:06 +0900 Subject: [PATCH 750/839] scsi: sd_zbc: Update write pointer offset cache Recent changes changed the completion of SCSI commands from Soft-IRQ context to IRQ context. This triggers the following warning, when we're completing writes to zoned block devices that go through the zone append emulation: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.12.0-rc2+ #2 Hardware name: Supermicro Super Server/X10SRL-F, BIOS 2.0 12/17/2015 RIP: 0010:__local_bh_disable_ip+0x3f/0x50 RSP: 0018:ffff8883e1409ba8 EFLAGS: 00010006 RAX: 0000000080010001 RBX: 0000000000000001 RCX: 0000000000000013 RDX: ffff888129e4d200 RSI: 0000000000000201 RDI: ffffffff915b9dbd RBP: ffff888113e9a540 R08: ffff888113e9a540 R09: 00000000000077f0 R10: 0000000000080000 R11: 0000000000000001 R12: ffff888129e4d200 R13: 0000000000001000 R14: 00000000000077f0 R15: ffff888129e4d218 FS: 0000000000000000(0000) GS:ffff8883e1400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f2f8418ebc0 CR3: 000000021202a006 CR4: 00000000001706f0 Call Trace: _raw_spin_lock_bh+0x18/0x40 sd_zbc_complete+0x43d/0x1150 sd_done+0x631/0x1040 ? mark_lock+0xe4/0x2fd0 ? provisioning_mode_store+0x3f0/0x3f0 scsi_finish_command+0x31b/0x5c0 _scsih_io_done+0x960/0x29e0 [mpt3sas] ? mpt3sas_scsih_scsi_lookup_get+0x1c7/0x340 [mpt3sas] ? __lock_acquire+0x166b/0x58b0 ? _get_st_from_smid+0x4a/0x80 [mpt3sas] _base_process_reply_queue+0x23f/0x26e0 [mpt3sas] ? lock_is_held_type+0x98/0x110 ? find_held_lock+0x2c/0x110 ? mpt3sas_base_sync_reply_irqs+0x360/0x360 [mpt3sas] _base_interrupt+0x8d/0xd0 [mpt3sas] ? rcu_read_lock_sched_held+0x3f/0x70 __handle_irq_event_percpu+0x24d/0x600 handle_irq_event+0xef/0x240 ? handle_irq_event_percpu+0x110/0x110 handle_edge_irq+0x1f6/0xb60 __common_interrupt+0x75/0x160 common_interrupt+0x7b/0xa0 asm_common_interrupt+0x1e/0x40 Don't use spin_lock_bh() to protect the update of the write pointer offset cache, but use spin_lock_irqsave() for it. Link: https://lore.kernel.org/r/3cfebe48d09db73041b7849be71ffbcec7ee40b3.1615369586.git.johannes.thumshirn@wdc.com Fixes: 664f0dce2058 ("scsi: mpt3sas: Add support for shared host tagset for CPU hotplug") Reported-by: Shinichiro Kawasaki Tested-by: Shin'ichiro Kawasaki Reviewed-by: Damien Le Moal Signed-off-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen --- drivers/scsi/sd_zbc.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index ee558675eab4a..994f1b8e3504d 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -280,27 +280,28 @@ static int sd_zbc_update_wp_offset_cb(struct blk_zone *zone, unsigned int idx, static void sd_zbc_update_wp_offset_workfn(struct work_struct *work) { struct scsi_disk *sdkp; + unsigned long flags; unsigned int zno; int ret; sdkp = container_of(work, struct scsi_disk, zone_wp_offset_work); - spin_lock_bh(&sdkp->zones_wp_offset_lock); + spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags); for (zno = 0; zno < sdkp->nr_zones; zno++) { if (sdkp->zones_wp_offset[zno] != SD_ZBC_UPDATING_WP_OFST) continue; - spin_unlock_bh(&sdkp->zones_wp_offset_lock); + spin_unlock_irqrestore(&sdkp->zones_wp_offset_lock, flags); ret = sd_zbc_do_report_zones(sdkp, sdkp->zone_wp_update_buf, SD_BUF_SIZE, zno * sdkp->zone_blocks, true); - spin_lock_bh(&sdkp->zones_wp_offset_lock); + spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags); if (!ret) sd_zbc_parse_report(sdkp, sdkp->zone_wp_update_buf + 64, zno, sd_zbc_update_wp_offset_cb, sdkp); } - spin_unlock_bh(&sdkp->zones_wp_offset_lock); + spin_unlock_irqrestore(&sdkp->zones_wp_offset_lock, flags); scsi_device_put(sdkp->device); } @@ -324,6 +325,7 @@ blk_status_t sd_zbc_prepare_zone_append(struct scsi_cmnd *cmd, sector_t *lba, struct request *rq = cmd->request; struct scsi_disk *sdkp = scsi_disk(rq->rq_disk); unsigned int wp_offset, zno = blk_rq_zone_no(rq); + unsigned long flags; blk_status_t ret; ret = sd_zbc_cmnd_checks(cmd); @@ -337,7 +339,7 @@ blk_status_t sd_zbc_prepare_zone_append(struct scsi_cmnd *cmd, sector_t *lba, if (!blk_req_zone_write_trylock(rq)) return BLK_STS_ZONE_RESOURCE; - spin_lock_bh(&sdkp->zones_wp_offset_lock); + spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags); wp_offset = sdkp->zones_wp_offset[zno]; switch (wp_offset) { case SD_ZBC_INVALID_WP_OFST: @@ -366,7 +368,7 @@ blk_status_t sd_zbc_prepare_zone_append(struct scsi_cmnd *cmd, sector_t *lba, *lba += wp_offset; } - spin_unlock_bh(&sdkp->zones_wp_offset_lock); + spin_unlock_irqrestore(&sdkp->zones_wp_offset_lock, flags); if (ret) blk_req_zone_write_unlock(rq); return ret; @@ -445,6 +447,7 @@ static unsigned int sd_zbc_zone_wp_update(struct scsi_cmnd *cmd, struct scsi_disk *sdkp = scsi_disk(rq->rq_disk); unsigned int zno = blk_rq_zone_no(rq); enum req_opf op = req_op(rq); + unsigned long flags; /* * If we got an error for a command that needs updating the write @@ -452,7 +455,7 @@ static unsigned int sd_zbc_zone_wp_update(struct scsi_cmnd *cmd, * invalid to force an update from disk the next time a zone append * command is issued. */ - spin_lock_bh(&sdkp->zones_wp_offset_lock); + spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags); if (result && op != REQ_OP_ZONE_RESET_ALL) { if (op == REQ_OP_ZONE_APPEND) { @@ -496,7 +499,7 @@ static unsigned int sd_zbc_zone_wp_update(struct scsi_cmnd *cmd, } unlock_wp_offset: - spin_unlock_bh(&sdkp->zones_wp_offset_lock); + spin_unlock_irqrestore(&sdkp->zones_wp_offset_lock, flags); return good_bytes; } -- GitLab From 0fdc7d5d8f3719950478cca452cf7f0f1355be10 Mon Sep 17 00:00:00 2001 From: dongjian Date: Tue, 16 Mar 2021 20:15:15 +0800 Subject: [PATCH 751/839] scsi: ufs: ufs-mediatek: Correct operator & -> && The "lpm" and "->enabled" are all boolean. We should be using && rather than the bit operator. Link: https://lore.kernel.org/r/1615896915-148864-1-git-send-email-dj0227@163.com Fixes: 488edafb1120 ("scsi: ufs-mediatek: Introduce low-power mode for device power supply") Reviewed-by: Avri Altman Signed-off-by: dongjian Signed-off-by: Yue Hu Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufs-mediatek.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/ufs/ufs-mediatek.c b/drivers/scsi/ufs/ufs-mediatek.c index c55202b92a43a..a981f261b3043 100644 --- a/drivers/scsi/ufs/ufs-mediatek.c +++ b/drivers/scsi/ufs/ufs-mediatek.c @@ -911,7 +911,7 @@ static void ufs_mtk_vreg_set_lpm(struct ufs_hba *hba, bool lpm) if (!hba->vreg_info.vccq2 || !hba->vreg_info.vcc) return; - if (lpm & !hba->vreg_info.vcc->enabled) + if (lpm && !hba->vreg_info.vcc->enabled) regulator_set_mode(hba->vreg_info.vccq2->reg, REGULATOR_MODE_IDLE); else if (!lpm) -- GitLab From cc7a0bb058b85ea03db87169c60c7cfdd5d34678 Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Mon, 15 Mar 2021 15:48:21 -0600 Subject: [PATCH 752/839] PCI: rpadlpar: Fix potential drc_name corruption in store functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both add_slot_store() and remove_slot_store() try to fix up the drc_name copied from the store buffer by placing a NUL terminator at nbyte + 1 or in place of a '\n' if present. However, the static buffer that we copy the drc_name data into is not zeroed and can contain anything past the n-th byte. This is problematic if a '\n' byte appears in that buffer after nbytes and the string copied into the store buffer was not NUL terminated to start with as the strchr() search for a '\n' byte will mark this incorrectly as the end of the drc_name string resulting in a drc_name string that contains garbage data after the n-th byte. Additionally it will cause us to overwrite that '\n' byte on the stack with NUL, potentially corrupting data on the stack. The following debugging shows an example of the drmgr utility writing "PHB 4543" to the add_slot sysfs attribute, but add_slot_store() logging a corrupted string value. drmgr: drmgr: -c phb -a -s PHB 4543 -d 1 add_slot_store: drc_name = PHB 4543°|<82>!, rc = -19 Fix this by using strscpy() instead of memcpy() to ensure the string is NUL terminated when copied into the static drc_name buffer. Further, since the string is now NUL terminated the code only needs to change '\n' to '\0' when present. Cc: stable@vger.kernel.org Signed-off-by: Tyrel Datwyler [mpe: Reformat change log and add mention of possible stack corruption] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210315214821.452959-1-tyreld@linux.ibm.com --- drivers/pci/hotplug/rpadlpar_sysfs.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/pci/hotplug/rpadlpar_sysfs.c b/drivers/pci/hotplug/rpadlpar_sysfs.c index cdbfa5df3a51f..dbfa0b55d31a5 100644 --- a/drivers/pci/hotplug/rpadlpar_sysfs.c +++ b/drivers/pci/hotplug/rpadlpar_sysfs.c @@ -34,12 +34,11 @@ static ssize_t add_slot_store(struct kobject *kobj, struct kobj_attribute *attr, if (nbytes >= MAX_DRC_NAME_LEN) return 0; - memcpy(drc_name, buf, nbytes); + strscpy(drc_name, buf, nbytes + 1); end = strchr(drc_name, '\n'); - if (!end) - end = &drc_name[nbytes]; - *end = '\0'; + if (end) + *end = '\0'; rc = dlpar_add_slot(drc_name); if (rc) @@ -65,12 +64,11 @@ static ssize_t remove_slot_store(struct kobject *kobj, if (nbytes >= MAX_DRC_NAME_LEN) return 0; - memcpy(drc_name, buf, nbytes); + strscpy(drc_name, buf, nbytes + 1); end = strchr(drc_name, '\n'); - if (!end) - end = &drc_name[nbytes]; - *end = '\0'; + if (end) + *end = '\0'; rc = dlpar_remove_slot(drc_name); if (rc) -- GitLab From a50bd64616907ed126ffbdbaa06c5ce708c4a404 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 20 Feb 2021 10:39:51 +0100 Subject: [PATCH 753/839] scsi: mpt3sas: Do not use GFP_KERNEL in atomic context mpt3sas_get_port_by_id() can be called when a spinlock is held. Use GFP_ATOMIC instead of GFP_KERNEL when allocating memory. Issue spotted by call_kern.cocci: ./drivers/scsi/mpt3sas/mpt3sas_scsih.c:416:42-52: ERROR: function mpt3sas_get_port_by_id called on line 7125 inside lock on line 7123 but uses GFP_KERNEL ./drivers/scsi/mpt3sas/mpt3sas_scsih.c:416:42-52: ERROR: function mpt3sas_get_port_by_id called on line 6842 inside lock on line 6839 but uses GFP_KERNEL ./drivers/scsi/mpt3sas/mpt3sas_scsih.c:416:42-52: ERROR: function mpt3sas_get_port_by_id called on line 6854 inside lock on line 6851 but uses GFP_KERNEL ./drivers/scsi/mpt3sas/mpt3sas_scsih.c:416:42-52: ERROR: function mpt3sas_get_port_by_id called on line 7706 inside lock on line 7702 but uses GFP_KERNEL ./drivers/scsi/mpt3sas/mpt3sas_scsih.c:416:42-52: ERROR: function mpt3sas_get_port_by_id called on line 10260 inside lock on line 10256 but uses GFP_KERNEL Link: https://lore.kernel.org/r/20210220093951.905362-1-christophe.jaillet@wanadoo.fr Fixes: 324c122fc0a4 ("scsi: mpt3sas: Add module parameter multipath_on_hba") Signed-off-by: Christophe JAILLET Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_scsih.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index ffca030647977..6aa6de7291876 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -413,7 +413,7 @@ mpt3sas_get_port_by_id(struct MPT3SAS_ADAPTER *ioc, * And add this object to port_table_list. */ if (!ioc->multipath_on_hba) { - port = kzalloc(sizeof(struct hba_port), GFP_KERNEL); + port = kzalloc(sizeof(struct hba_port), GFP_ATOMIC); if (!port) return NULL; -- GitLab From 6e9070dc2e847ef77aa1c581252a1b97eb2225b9 Mon Sep 17 00:00:00 2001 From: kernel test robot Date: Sun, 28 Feb 2021 12:10:22 +0100 Subject: [PATCH 754/839] riscv: fix bugon.cocci warnings Use BUG_ON instead of a if condition followed by BUG. Generated by: scripts/coccinelle/misc/bugon.cocci Fixes: c22b0bcb1dd0 ("riscv: Add kprobes supported") CC: Guo Ren Reported-by: kernel test robot Signed-off-by: kernel test robot Signed-off-by: Julia Lawall Reviewed-by: Pekka Enberg Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/probes/kprobes.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/riscv/kernel/probes/kprobes.c b/arch/riscv/kernel/probes/kprobes.c index a2ec18662fee0..7e2c78e2ca6b0 100644 --- a/arch/riscv/kernel/probes/kprobes.c +++ b/arch/riscv/kernel/probes/kprobes.c @@ -256,8 +256,7 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int trapnr) * normal page fault. */ regs->epc = (unsigned long) cur->addr; - if (!instruction_pointer(regs)) - BUG(); + BUG_ON(!instruction_pointer(regs)); if (kcb->kprobe_status == KPROBE_REENTER) restore_previous_kprobe(kcb); -- GitLab From bab1770a2ce00bf201c6ac5a013a7195db2e02b7 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 11 Mar 2021 09:40:22 +0000 Subject: [PATCH 755/839] ftrace: Fix spelling mistake "disabed" -> "disabled" There is a spelling mistake in a comment, fix it. Signed-off-by: Colin Ian King Signed-off-by: Palmer Dabbelt --- arch/csky/kernel/probes/ftrace.c | 2 +- arch/riscv/kernel/probes/ftrace.c | 2 +- arch/x86/kernel/kprobes/ftrace.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/csky/kernel/probes/ftrace.c b/arch/csky/kernel/probes/ftrace.c index ae2b1c7b3b5cb..ef2bb9bd9605f 100644 --- a/arch/csky/kernel/probes/ftrace.c +++ b/arch/csky/kernel/probes/ftrace.c @@ -9,7 +9,7 @@ int arch_check_ftrace_location(struct kprobe *p) return 0; } -/* Ftrace callback handler for kprobes -- called under preepmt disabed */ +/* Ftrace callback handler for kprobes -- called under preepmt disabled */ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *ops, struct ftrace_regs *fregs) { diff --git a/arch/riscv/kernel/probes/ftrace.c b/arch/riscv/kernel/probes/ftrace.c index 2dfb33fdac746..17ca5e923bb0d 100644 --- a/arch/riscv/kernel/probes/ftrace.c +++ b/arch/riscv/kernel/probes/ftrace.c @@ -2,7 +2,7 @@ #include -/* Ftrace callback handler for kprobes -- called under preepmt disabed */ +/* Ftrace callback handler for kprobes -- called under preepmt disabled */ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *ops, struct ftrace_regs *fregs) { diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c index 373e5fa3ce1f6..51c7f5271aee4 100644 --- a/arch/x86/kernel/kprobes/ftrace.c +++ b/arch/x86/kernel/kprobes/ftrace.c @@ -12,7 +12,7 @@ #include "common.h" -/* Ftrace callback handler for kprobes -- called under preepmt disabed */ +/* Ftrace callback handler for kprobes -- called under preepmt disabled */ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *ops, struct ftrace_regs *fregs) { -- GitLab From fa59030bf8555a4eb83342fd23c32e30d4f2fe7a Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 11 Mar 2021 19:24:24 +0900 Subject: [PATCH 756/839] riscv: Fix compilation error with Canaan SoC When CONFIG_SOC_CANAAN is selected, the K210 sysctl driver is always compiled. Since this driver early init function calls the function k210_clk_early_init() implemented by the K210 clk driver, this driver must also always be selected for compilation ot avoid build failures. Avoid such build failures by always selecting CONFIG_COMMON_CLK and CONFIG_COMMON_CLK_K210 when CONFIG_SOC_CANAAN is enabled. Reported-by: kernel test robot Signed-off-by: Damien Le Moal Fixes: c6ca7616f7d5 ("clk: Add RISC-V Canaan Kendryte K210 clock driver") Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig.socs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs index 7efcece8896cf..e1b2690b6e45d 100644 --- a/arch/riscv/Kconfig.socs +++ b/arch/riscv/Kconfig.socs @@ -31,6 +31,8 @@ config SOC_CANAAN select SIFIVE_PLIC select ARCH_HAS_RESET_CONTROLLER select PINCTRL + select COMMON_CLK + select COMMON_CLK_K210 help This enables support for Canaan Kendryte K210 SoC platform hardware. -- GitLab From ce989f1472ae350e844b10c880b22543168fbc92 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 12 Mar 2021 16:46:34 +0100 Subject: [PATCH 757/839] RISC-V: Fix out-of-bounds accesses in init_resources() init_resources() allocates an array of resources, based on the current total number of memory regions and reserved memory regions. However, allocating this array using memblock_alloc() might increase the number of reserved memory regions. If that happens, populating the array later based on the new number of regions will cause out-of-bounds writes beyond the end of the allocated array. Fix this by allocating one more entry, which may or may not be used. Fixes: 797f0375dd2ef5cd ("RISC-V: Do not allocate memblock while iterating reserved memblocks") Signed-off-by: Geert Uytterhoeven Reviewed-by: Atish Patra Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/setup.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index e85bacff1b507..f8f15332caa20 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -147,7 +147,8 @@ static void __init init_resources(void) bss_res.end = __pa_symbol(__bss_stop) - 1; bss_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; - mem_res_sz = (memblock.memory.cnt + memblock.reserved.cnt) * sizeof(*mem_res); + /* + 1 as memblock_alloc() might increase memblock.reserved.cnt */ + mem_res_sz = (memblock.memory.cnt + memblock.reserved.cnt + 1) * sizeof(*mem_res); mem_res = memblock_alloc(mem_res_sz, SMP_CACHE_BYTES); if (!mem_res) panic("%s: Failed to allocate %zu bytes\n", __func__, mem_res_sz); -- GitLab From f3773dd031de7b283227f6104049688f77074a2d Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Sat, 13 Mar 2021 03:45:04 -0500 Subject: [PATCH 758/839] riscv: Ensure page table writes are flushed when initializing KASAN vmalloc Make sure that writes to kernel page table during KASAN vmalloc initialization are made visible by adding a sfence.vma. Signed-off-by: Alexandre Ghiti Reviewed-by: Palmer Dabbelt Fixes: e178d670f251 ("riscv/kasan: add KASAN_VMALLOC support") Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/kasan_init.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c index 3fc18f469efbc..5c35e0f71e885 100644 --- a/arch/riscv/mm/kasan_init.c +++ b/arch/riscv/mm/kasan_init.c @@ -187,6 +187,8 @@ void __init kasan_shallow_populate(void *start, void *end) } vaddr += PAGE_SIZE; } + + local_flush_tlb_all(); } void __init kasan_init(void) -- GitLab From 78947bdfd75211cc9482cad01f95fe103a863110 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Tue, 16 Mar 2021 22:01:04 -0700 Subject: [PATCH 759/839] RISC-V: kasan: Declare kasan_shallow_populate() static Without this I get a missing prototype warning. Reported-by: kernel test robot Fixes: e178d670f251 ("riscv/kasan: add KASAN_VMALLOC support") Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/kasan_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c index 5c35e0f71e885..4f85c6d0ddf8b 100644 --- a/arch/riscv/mm/kasan_init.c +++ b/arch/riscv/mm/kasan_init.c @@ -155,7 +155,7 @@ static void __init kasan_populate(void *start, void *end) memset(start, KASAN_SHADOW_INIT, end - start); } -void __init kasan_shallow_populate(void *start, void *end) +static void __init kasan_shallow_populate(void *start, void *end) { unsigned long vaddr = (unsigned long)start & PAGE_MASK; unsigned long vend = PAGE_ALIGN((unsigned long)end); -- GitLab From a5406a7ff56e63376c210b06072aa0ef23473366 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Mon, 15 Mar 2021 20:03:07 +0800 Subject: [PATCH 760/839] riscv: Correct SPARSEMEM configuration There are two issues for RV32, 1) if use FLATMEM, it is useless to enable SPARSEMEM_STATIC. 2) if use SPARSMEM, both SPARSEMEM_VMEMMAP and SPARSEMEM_STATIC is enabled. Fixes: d95f1a542c3d ("RISC-V: Implement sparsemem") Signed-off-by: Kefeng Wang Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 85d626b8ce5e0..87d7b52f278f0 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -93,7 +93,6 @@ config RISCV select PCI_MSI if PCI select RISCV_INTC select RISCV_TIMER if RISCV_SBI - select SPARSEMEM_STATIC if 32BIT select SPARSE_IRQ select SYSCTL_EXCEPTION_TRACE select THREAD_INFO_IN_TASK @@ -154,7 +153,8 @@ config ARCH_FLATMEM_ENABLE config ARCH_SPARSEMEM_ENABLE def_bool y depends on MMU - select SPARSEMEM_VMEMMAP_ENABLE + select SPARSEMEM_STATIC if 32BIT && SPARSMEM + select SPARSEMEM_VMEMMAP_ENABLE if 64BIT config ARCH_SELECT_MEMORY_MODEL def_bool ARCH_SPARSEMEM_ENABLE -- GitLab From d2547cf59793168b564372d75620897416cbaf87 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 16 Mar 2021 15:37:33 +0100 Subject: [PATCH 761/839] KVM: x86: hyper-v: Limit guest to writing zero to HV_X64_MSR_TSC_EMULATION_STATUS HV_X64_MSR_TSC_EMULATION_STATUS indicates whether TSC accesses are emulated after migration (to accommodate for a different host TSC frequency when TSC scaling is not supported; we don't implement this in KVM). Guest can use the same MSR to stop TSC access emulation by writing zero. Writing anything else is forbidden. Signed-off-by: Vitaly Kuznetsov Message-Id: <20210316143736.964151-2-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/hyperv.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 58fa8c0298673..eefb85b86fe84 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1229,6 +1229,9 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data, hv->hv_tsc_emulation_control = data; break; case HV_X64_MSR_TSC_EMULATION_STATUS: + if (data && !host) + return 1; + hv->hv_tsc_emulation_status = data; break; case HV_X64_MSR_TIME_REF_COUNT: -- GitLab From e880c6ea55b9805294ecc100ee95e0c9860ae90e Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 16 Mar 2021 15:37:34 +0100 Subject: [PATCH 762/839] KVM: x86: hyper-v: Prevent using not-yet-updated TSC page by secondary CPUs When KVM_REQ_MASTERCLOCK_UPDATE request is issued (e.g. after migration) we need to make sure no vCPU sees stale values in PV clock structures and thus all vCPUs are kicked with KVM_REQ_CLOCK_UPDATE. Hyper-V TSC page clocksource is global and kvm_guest_time_update() only updates in on vCPU0 but this is not entirely correct: nothing blocks some other vCPU from entering the guest before we finish the update on CPU0 and it can read stale values from the page. Invalidate TSC page in kvm_gen_update_masterclock() to switch all vCPUs to using MSR based clocksource (HV_X64_MSR_TIME_REF_COUNT). Signed-off-by: Vitaly Kuznetsov Message-Id: <20210316143736.964151-3-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/hyperv.c | 23 +++++++++++++++++++++++ arch/x86/kvm/hyperv.h | 1 + arch/x86/kvm/x86.c | 2 ++ 3 files changed, 26 insertions(+) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index eefb85b86fe84..a0e3c49233d4c 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1137,6 +1137,29 @@ out_unlock: mutex_unlock(&hv->hv_lock); } +void kvm_hv_invalidate_tsc_page(struct kvm *kvm) +{ + struct kvm_hv *hv = to_kvm_hv(kvm); + u64 gfn; + + if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)) + return; + + mutex_lock(&hv->hv_lock); + + if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)) + goto out_unlock; + + gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; + + hv->tsc_ref.tsc_sequence = 0; + kvm_write_guest(kvm, gfn_to_gpa(gfn), + &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence)); + +out_unlock: + mutex_unlock(&hv->hv_lock); +} + static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) { diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h index e951af1fcb2c5..60547d5cb6d72 100644 --- a/arch/x86/kvm/hyperv.h +++ b/arch/x86/kvm/hyperv.h @@ -133,6 +133,7 @@ void kvm_hv_process_stimers(struct kvm_vcpu *vcpu); void kvm_hv_setup_tsc_page(struct kvm *kvm, struct pvclock_vcpu_time_info *hv_clock); +void kvm_hv_invalidate_tsc_page(struct kvm *kvm); void kvm_hv_init_vm(struct kvm *kvm); void kvm_hv_destroy_vm(struct kvm *kvm); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 47e021bdcc94a..a5c5b38735e19 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2551,6 +2551,8 @@ static void kvm_gen_update_masterclock(struct kvm *kvm) struct kvm_vcpu *vcpu; struct kvm_arch *ka = &kvm->arch; + kvm_hv_invalidate_tsc_page(kvm); + spin_lock(&ka->pvclock_gtod_sync_lock); kvm_make_mclock_inprogress_request(kvm); /* no guest entries from this point */ -- GitLab From 483028edacab374060d93955382b4865a9e07cba Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Wed, 17 Mar 2021 14:36:06 +0800 Subject: [PATCH 763/839] efivars: respect EFI_UNSUPPORTED return from firmware As per UEFI spec 2.8B section 8.2, EFI_UNSUPPORTED may be returned by EFI variable runtime services if no variable storage is supported by firmware. In this case, there is no point for kernel to continue efivars initialization. That said, efivar_init() should fail by returning an error code, so that efivarfs will not be mounted on /sys/firmware/efi/efivars at all. Otherwise, user space like efibootmgr will be confused by the EFIVARFS_MAGIC seen there, while EFI variable calls cannot be made successfully. Cc: # v5.10+ Signed-off-by: Shawn Guo Acked-by: Ard Biesheuvel Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/vars.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c index 41c1d00bf933c..abdc8a6a39631 100644 --- a/drivers/firmware/efi/vars.c +++ b/drivers/firmware/efi/vars.c @@ -484,6 +484,10 @@ int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *), } } + break; + case EFI_UNSUPPORTED: + err = -EOPNOTSUPP; + status = EFI_NOT_FOUND; break; case EFI_NOT_FOUND: break; -- GitLab From 2046a24ae121cd107929655a6aaf3b8c5beea01f Mon Sep 17 00:00:00 2001 From: Manaf Meethalavalappu Pallikunhi Date: Tue, 8 Dec 2020 00:23:01 +0530 Subject: [PATCH 764/839] thermal/core: Add NULL pointer check before using cooling device stats There is a possible chance that some cooling device stats buffer allocation fails due to very high cooling device max state value. Later cooling device update sysfs can try to access stats data for the same cooling device. It will lead to NULL pointer dereference issue. Add a NULL pointer check before accessing thermal cooling device stats data. It fixes the following bug [ 26.812833] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000004 [ 27.122960] Call trace: [ 27.122963] do_raw_spin_lock+0x18/0xe8 [ 27.122966] _raw_spin_lock+0x24/0x30 [ 27.128157] thermal_cooling_device_stats_update+0x24/0x98 [ 27.128162] cur_state_store+0x88/0xb8 [ 27.128166] dev_attr_store+0x40/0x58 [ 27.128169] sysfs_kf_write+0x50/0x68 [ 27.133358] kernfs_fop_write+0x12c/0x1c8 [ 27.133362] __vfs_write+0x54/0x160 [ 27.152297] vfs_write+0xcc/0x188 [ 27.157132] ksys_write+0x78/0x108 [ 27.162050] ksys_write+0xf8/0x108 [ 27.166968] __arm_smccc_hvc+0x158/0x4b0 [ 27.166973] __arm_smccc_hvc+0x9c/0x4b0 [ 27.186005] el0_svc+0x8/0xc Signed-off-by: Manaf Meethalavalappu Pallikunhi Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/1607367181-24589-1-git-send-email-manafm@codeaurora.org --- drivers/thermal/thermal_sysfs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/thermal/thermal_sysfs.c b/drivers/thermal/thermal_sysfs.c index 345917a58f2fd..1c4aac8464a70 100644 --- a/drivers/thermal/thermal_sysfs.c +++ b/drivers/thermal/thermal_sysfs.c @@ -674,6 +674,9 @@ void thermal_cooling_device_stats_update(struct thermal_cooling_device *cdev, { struct cooling_dev_stats *stats = cdev->stats; + if (!stats) + return; + spin_lock(&stats->lock); if (stats->state == new_state) -- GitLab From 5de2055d31ea88fd9ae9709ac95c372a505a60fa Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Tue, 16 Mar 2021 11:31:16 -0400 Subject: [PATCH 765/839] locking/ww_mutex: Simplify use_ww_ctx & ww_ctx handling The use_ww_ctx flag is passed to mutex_optimistic_spin(), but the function doesn't use it. The frequent use of the (use_ww_ctx && ww_ctx) combination is repetitive. In fact, ww_ctx should not be used at all if !use_ww_ctx. Simplify ww_mutex code by dropping use_ww_ctx from mutex_optimistic_spin() an clear ww_ctx if !use_ww_ctx. In this way, we can replace (use_ww_ctx && ww_ctx) by just (ww_ctx). Signed-off-by: Waiman Long Signed-off-by: Ingo Molnar Acked-by: Davidlohr Bueso Link: https://lore.kernel.org/r/20210316153119.13802-2-longman@redhat.com --- kernel/locking/mutex.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index adb9350907688..622ebdfcd083b 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -626,7 +626,7 @@ static inline int mutex_can_spin_on_owner(struct mutex *lock) */ static __always_inline bool mutex_optimistic_spin(struct mutex *lock, struct ww_acquire_ctx *ww_ctx, - const bool use_ww_ctx, struct mutex_waiter *waiter) + struct mutex_waiter *waiter) { if (!waiter) { /* @@ -702,7 +702,7 @@ fail: #else static __always_inline bool mutex_optimistic_spin(struct mutex *lock, struct ww_acquire_ctx *ww_ctx, - const bool use_ww_ctx, struct mutex_waiter *waiter) + struct mutex_waiter *waiter) { return false; } @@ -922,6 +922,9 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, struct ww_mutex *ww; int ret; + if (!use_ww_ctx) + ww_ctx = NULL; + might_sleep(); #ifdef CONFIG_DEBUG_MUTEXES @@ -929,7 +932,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, #endif ww = container_of(lock, struct ww_mutex, base); - if (use_ww_ctx && ww_ctx) { + if (ww_ctx) { if (unlikely(ww_ctx == READ_ONCE(ww->ctx))) return -EALREADY; @@ -946,10 +949,10 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip); if (__mutex_trylock(lock) || - mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx, NULL)) { + mutex_optimistic_spin(lock, ww_ctx, NULL)) { /* got the lock, yay! */ lock_acquired(&lock->dep_map, ip); - if (use_ww_ctx && ww_ctx) + if (ww_ctx) ww_mutex_set_context_fastpath(ww, ww_ctx); preempt_enable(); return 0; @@ -960,7 +963,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, * After waiting to acquire the wait_lock, try again. */ if (__mutex_trylock(lock)) { - if (use_ww_ctx && ww_ctx) + if (ww_ctx) __ww_mutex_check_waiters(lock, ww_ctx); goto skip_wait; @@ -1013,7 +1016,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, goto err; } - if (use_ww_ctx && ww_ctx) { + if (ww_ctx) { ret = __ww_mutex_check_kill(lock, &waiter, ww_ctx); if (ret) goto err; @@ -1026,7 +1029,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, * ww_mutex needs to always recheck its position since its waiter * list is not FIFO ordered. */ - if ((use_ww_ctx && ww_ctx) || !first) { + if (ww_ctx || !first) { first = __mutex_waiter_is_first(lock, &waiter); if (first) __mutex_set_flag(lock, MUTEX_FLAG_HANDOFF); @@ -1039,7 +1042,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, * or we must see its unlock and acquire. */ if (__mutex_trylock(lock) || - (first && mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx, &waiter))) + (first && mutex_optimistic_spin(lock, ww_ctx, &waiter))) break; spin_lock(&lock->wait_lock); @@ -1048,7 +1051,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, acquired: __set_current_state(TASK_RUNNING); - if (use_ww_ctx && ww_ctx) { + if (ww_ctx) { /* * Wound-Wait; we stole the lock (!first_waiter), check the * waiters as anyone might want to wound us. @@ -1068,7 +1071,7 @@ skip_wait: /* got the lock - cleanup and rejoice! */ lock_acquired(&lock->dep_map, ip); - if (use_ww_ctx && ww_ctx) + if (ww_ctx) ww_mutex_lock_acquired(ww, ww_ctx); spin_unlock(&lock->wait_lock); -- GitLab From bee645788e07eea63055d261d2884ea45c2ba857 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Tue, 16 Mar 2021 11:31:17 -0400 Subject: [PATCH 766/839] locking/ww_mutex: Fix acquire/release imbalance in ww_acquire_init()/ww_acquire_fini() In ww_acquire_init(), mutex_acquire() is gated by CONFIG_DEBUG_LOCK_ALLOC. The dep_map in the ww_acquire_ctx structure is also gated by the same config. However mutex_release() in ww_acquire_fini() is gated by CONFIG_DEBUG_MUTEXES. It is possible to set CONFIG_DEBUG_MUTEXES without setting CONFIG_DEBUG_LOCK_ALLOC though it is an unlikely configuration. That may cause a compilation error as dep_map isn't defined in this case. Fix this potential problem by enclosing mutex_release() inside CONFIG_DEBUG_LOCK_ALLOC. Signed-off-by: Waiman Long Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20210316153119.13802-3-longman@redhat.com --- include/linux/ww_mutex.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h index 850424e5d0306..6ecf2a0220dbe 100644 --- a/include/linux/ww_mutex.h +++ b/include/linux/ww_mutex.h @@ -173,9 +173,10 @@ static inline void ww_acquire_done(struct ww_acquire_ctx *ctx) */ static inline void ww_acquire_fini(struct ww_acquire_ctx *ctx) { -#ifdef CONFIG_DEBUG_MUTEXES +#ifdef CONFIG_DEBUG_LOCK_ALLOC mutex_release(&ctx->dep_map, _THIS_IP_); - +#endif +#ifdef CONFIG_DEBUG_MUTEXES DEBUG_LOCKS_WARN_ON(ctx->acquired); if (!IS_ENABLED(CONFIG_PROVE_LOCKING)) /* -- GitLab From 8e62438a1ee74ceeac77bb4c680ceaaf3f860488 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Sat, 20 Feb 2021 12:33:03 +0200 Subject: [PATCH 767/839] drm/i915: Workaround async flip + VT-d corruption on HSW/BDW MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On HSW/BDW with VT-d active the first tile row scanned out after the first async flip of the frame often ends up corrupted. Whether the corruption happens or not depends on the scanline on which the async flip happens, but the behaviour seems very consistent. Ie. the same set of scanlines (which are most scanlines) always show the corruption. And another set of scanlines (far less of them) never shows the corruption. I discovered that disabling the fetch-stride stretching feature cures the corruption. This is some kind of TLB related prefetch thing AFAIK. We already disable it on SNB primary planes due to a documented workaround. The hardware folks indicated that disabling this should be fine, so let's go with that. And while we're here, let's document the relevant bits on all pre-skl platforms. Fixes: 2a636e240c77 ("drm/i915: Implement async flip for ivb/hsw") Fixes: cda195f13abd ("drm/i915: Implement async flips for bdw") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210220103303.3448-1-ville.syrjala@linux.intel.com Reviewed-by: Karthik B S (cherry picked from commit b7a7053ab2ec558b8ae4e55f62ea8f1f58e14f5c) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_reg.h | 23 ++++++++++++++++++++++- drivers/gpu/drm/i915/intel_pm.c | 16 +++++++++++++++- 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 7146cd0f32560..aaf1f0045b164 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3316,7 +3316,18 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define ILK_DISPLAY_CHICKEN1 _MMIO(0x42000) #define ILK_FBCQ_DIS (1 << 22) -#define ILK_PABSTRETCH_DIS (1 << 21) +#define ILK_PABSTRETCH_DIS REG_BIT(21) +#define ILK_SABSTRETCH_DIS REG_BIT(20) +#define IVB_PRI_STRETCH_MAX_MASK REG_GENMASK(21, 20) +#define IVB_PRI_STRETCH_MAX_X8 REG_FIELD_PREP(IVB_PRI_STRETCH_MAX_MASK, 0) +#define IVB_PRI_STRETCH_MAX_X4 REG_FIELD_PREP(IVB_PRI_STRETCH_MAX_MASK, 1) +#define IVB_PRI_STRETCH_MAX_X2 REG_FIELD_PREP(IVB_PRI_STRETCH_MAX_MASK, 2) +#define IVB_PRI_STRETCH_MAX_X1 REG_FIELD_PREP(IVB_PRI_STRETCH_MAX_MASK, 3) +#define IVB_SPR_STRETCH_MAX_MASK REG_GENMASK(19, 18) +#define IVB_SPR_STRETCH_MAX_X8 REG_FIELD_PREP(IVB_SPR_STRETCH_MAX_MASK, 0) +#define IVB_SPR_STRETCH_MAX_X4 REG_FIELD_PREP(IVB_SPR_STRETCH_MAX_MASK, 1) +#define IVB_SPR_STRETCH_MAX_X2 REG_FIELD_PREP(IVB_SPR_STRETCH_MAX_MASK, 2) +#define IVB_SPR_STRETCH_MAX_X1 REG_FIELD_PREP(IVB_SPR_STRETCH_MAX_MASK, 3) /* @@ -8039,6 +8050,16 @@ enum { #define _CHICKEN_PIPESL_1_A 0x420b0 #define _CHICKEN_PIPESL_1_B 0x420b4 +#define HSW_PRI_STRETCH_MAX_MASK REG_GENMASK(28, 27) +#define HSW_PRI_STRETCH_MAX_X8 REG_FIELD_PREP(HSW_PRI_STRETCH_MAX_MASK, 0) +#define HSW_PRI_STRETCH_MAX_X4 REG_FIELD_PREP(HSW_PRI_STRETCH_MAX_MASK, 1) +#define HSW_PRI_STRETCH_MAX_X2 REG_FIELD_PREP(HSW_PRI_STRETCH_MAX_MASK, 2) +#define HSW_PRI_STRETCH_MAX_X1 REG_FIELD_PREP(HSW_PRI_STRETCH_MAX_MASK, 3) +#define HSW_SPR_STRETCH_MAX_MASK REG_GENMASK(26, 25) +#define HSW_SPR_STRETCH_MAX_X8 REG_FIELD_PREP(HSW_SPR_STRETCH_MAX_MASK, 0) +#define HSW_SPR_STRETCH_MAX_X4 REG_FIELD_PREP(HSW_SPR_STRETCH_MAX_MASK, 1) +#define HSW_SPR_STRETCH_MAX_X2 REG_FIELD_PREP(HSW_SPR_STRETCH_MAX_MASK, 2) +#define HSW_SPR_STRETCH_MAX_X1 REG_FIELD_PREP(HSW_SPR_STRETCH_MAX_MASK, 3) #define HSW_FBCQ_DIS (1 << 22) #define BDW_DPRS_MASK_VBLANK_SRD (1 << 0) #define CHICKEN_PIPESL_1(pipe) _MMIO_PIPE(pipe, _CHICKEN_PIPESL_1_A, _CHICKEN_PIPESL_1_B) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 0c3e63f27c29d..97b57acc02e27 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -7245,11 +7245,16 @@ static void bdw_init_clock_gating(struct drm_i915_private *dev_priv) intel_uncore_write(&dev_priv->uncore, CHICKEN_PAR1_1, intel_uncore_read(&dev_priv->uncore, CHICKEN_PAR1_1) | DPA_MASK_VBLANK_SRD); - /* WaPsrDPRSUnmaskVBlankInSRD:bdw */ for_each_pipe(dev_priv, pipe) { + /* WaPsrDPRSUnmaskVBlankInSRD:bdw */ intel_uncore_write(&dev_priv->uncore, CHICKEN_PIPESL_1(pipe), intel_uncore_read(&dev_priv->uncore, CHICKEN_PIPESL_1(pipe)) | BDW_DPRS_MASK_VBLANK_SRD); + + /* Undocumented but fixes async flip + VT-d corruption */ + if (intel_vtd_active()) + intel_uncore_rmw(&dev_priv->uncore, CHICKEN_PIPESL_1(pipe), + HSW_PRI_STRETCH_MAX_MASK, HSW_PRI_STRETCH_MAX_X1); } /* WaVSRefCountFullforceMissDisable:bdw */ @@ -7285,11 +7290,20 @@ static void bdw_init_clock_gating(struct drm_i915_private *dev_priv) static void hsw_init_clock_gating(struct drm_i915_private *dev_priv) { + enum pipe pipe; + /* WaFbcAsynchFlipDisableFbcQueue:hsw,bdw */ intel_uncore_write(&dev_priv->uncore, CHICKEN_PIPESL_1(PIPE_A), intel_uncore_read(&dev_priv->uncore, CHICKEN_PIPESL_1(PIPE_A)) | HSW_FBCQ_DIS); + for_each_pipe(dev_priv, pipe) { + /* Undocumented but fixes async flip + VT-d corruption */ + if (intel_vtd_active()) + intel_uncore_rmw(&dev_priv->uncore, CHICKEN_PIPESL_1(pipe), + HSW_PRI_STRETCH_MAX_MASK, HSW_PRI_STRETCH_MAX_X1); + } + /* This is required by WaCatErrorRejectionIssue:hsw */ intel_uncore_write(&dev_priv->uncore, GEN7_SQ_CHICKEN_MBCUNIT_CONFIG, intel_uncore_read(&dev_priv->uncore, GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | -- GitLab From 6a77c6bb7260bd5000f95df454d9f8cdb1af7132 Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Fri, 5 Mar 2021 13:09:47 -0800 Subject: [PATCH 768/839] i915/perf: Start hrtimer only if sampling the OA buffer SAMPLE_OA parameter enables sampling of OA buffer and results in a call to init the OA buffer which initializes the OA unit head/tail pointers. The OA_EXPONENT parameter controls the periodicity of the OA reports in the OA buffer and results in starting a hrtimer. Before gen12, all use cases required the use of the OA buffer and i915 enforced this setting when vetting out the parameters passed. In these platforms the hrtimer was enabled if OA_EXPONENT was passed. This worked fine since it was implied that SAMPLE_OA is always passed. With gen12, this changed. Users can use perf without enabling the OA buffer as in OAR use cases. While an OAR use case should ideally not start the hrtimer, we see that passing an OA_EXPONENT parameter will start the hrtimer even though SAMPLE_OA is not specified. This results in an uninitialized OA buffer, so the head/tail pointers used to track the buffer are zero. This itself does not fail, but if we ran a use-case that SAMPLED the OA buffer previously, then the OA_TAIL register is still pointing to an old value. When the timer callback runs, it ends up calculating a wrong/large number of available reports. Since we do a spinlock_irq_save and start processing a large number of reports, NMI watchdog fires and causes a crash. Start the timer only if SAMPLE_OA is specified. v2: - Drop SAMPLE OA check when appending samples (Ashutosh) - Prevent read if OA buffer is not being sampled Fixes: 00a7f0d7155c ("drm/i915/tgl: Add perf support on TGL") Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Ashutosh Dixit Signed-off-by: Lionel Landwerlin Link: https://patchwork.freedesktop.org/patch/msgid/20210305210947.58751-1-umesh.nerlige.ramappa@intel.com (cherry picked from commit be0bdd67fda9468156c733976688f6487d0c42f7) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_perf.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 112ba5f2ce90e..e62ad69606f6e 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -603,7 +603,6 @@ static int append_oa_sample(struct i915_perf_stream *stream, { int report_size = stream->oa_buffer.format_size; struct drm_i915_perf_record_header header; - u32 sample_flags = stream->sample_flags; header.type = DRM_I915_PERF_RECORD_SAMPLE; header.pad = 0; @@ -617,10 +616,8 @@ static int append_oa_sample(struct i915_perf_stream *stream, return -EFAULT; buf += sizeof(header); - if (sample_flags & SAMPLE_OA_REPORT) { - if (copy_to_user(buf, report, report_size)) - return -EFAULT; - } + if (copy_to_user(buf, report, report_size)) + return -EFAULT; (*offset) += header.size; @@ -2682,7 +2679,7 @@ static void i915_oa_stream_enable(struct i915_perf_stream *stream) stream->perf->ops.oa_enable(stream); - if (stream->periodic) + if (stream->sample_flags & SAMPLE_OA_REPORT) hrtimer_start(&stream->poll_check_timer, ns_to_ktime(stream->poll_oa_period), HRTIMER_MODE_REL_PINNED); @@ -2745,7 +2742,7 @@ static void i915_oa_stream_disable(struct i915_perf_stream *stream) { stream->perf->ops.oa_disable(stream); - if (stream->periodic) + if (stream->sample_flags & SAMPLE_OA_REPORT) hrtimer_cancel(&stream->poll_check_timer); } @@ -3028,7 +3025,7 @@ static ssize_t i915_perf_read(struct file *file, * disabled stream as an error. In particular it might otherwise lead * to a deadlock for blocking file descriptors... */ - if (!stream->enabled) + if (!stream->enabled || !(stream->sample_flags & SAMPLE_OA_REPORT)) return -EIO; if (!(file->f_flags & O_NONBLOCK)) { -- GitLab From 6909115442759efef3d4bc5d9c54d7943f1afc14 Mon Sep 17 00:00:00 2001 From: Junlin Yang Date: Fri, 12 Mar 2021 15:14:45 +0800 Subject: [PATCH 769/839] drm/omap: dsi: fix unsigned expression compared with zero r is "u32" always >= 0,mipi_dsi_create_packet may return little than zero. so r < 0 condition is never accessible. Fixes coccicheck warnings: ./drivers/gpu/drm/omapdrm/dss/dsi.c:2155:5-6: WARNING: Unsigned expression compared with zero: r < 0 Signed-off-by: Junlin Yang Reviewed-by: Laurent Pinchart Signed-off-by: Tomi Valkeinen Link: https://patchwork.freedesktop.org/patch/msgid/20210312071445.1721-1-angkery@163.com --- drivers/gpu/drm/omapdrm/dss/dsi.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/omapdrm/dss/dsi.c b/drivers/gpu/drm/omapdrm/dss/dsi.c index 8e11612f5fe17..b31d750c425ac 100644 --- a/drivers/gpu/drm/omapdrm/dss/dsi.c +++ b/drivers/gpu/drm/omapdrm/dss/dsi.c @@ -2149,11 +2149,12 @@ static int dsi_vc_send_short(struct dsi_data *dsi, int vc, const struct mipi_dsi_msg *msg) { struct mipi_dsi_packet pkt; + int ret; u32 r; - r = mipi_dsi_create_packet(&pkt, msg); - if (r < 0) - return r; + ret = mipi_dsi_create_packet(&pkt, msg); + if (ret < 0) + return ret; WARN_ON(!dsi_bus_is_locked(dsi)); -- GitLab From 6417f03132a6952cd17ddd8eaddbac92b61b17e0 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 17 Mar 2021 12:45:47 +0200 Subject: [PATCH 770/839] module: remove never implemented MODULE_SUPPORTED_DEVICE MODULE_SUPPORTED_DEVICE was added in pre-git era and never was implemented. We can safely remove it, because the kernel has grown to have many more reliable mechanisms to determine if device is supported or not. Signed-off-by: Leon Romanovsky Signed-off-by: Linus Torvalds --- arch/x86/platform/iris/iris.c | 1 - drivers/atm/fore200e.c | 2 -- drivers/block/floppy.c | 1 - drivers/bluetooth/btrsi.c | 1 - drivers/char/applicom.c | 3 -- drivers/char/toshiba.c | 1 - drivers/input/joydev.c | 1 - drivers/media/firewire/firedtv-fw.c | 1 - drivers/media/pci/cx18/cx18-alsa-main.c | 1 - drivers/media/pci/cx18/cx18-driver.c | 1 - drivers/media/pci/cx25821/cx25821-alsa.c | 1 - drivers/media/pci/cx88/cx88-alsa.c | 1 - drivers/media/pci/ivtv/ivtv-alsa-main.c | 1 - drivers/media/pci/ivtv/ivtv-driver.c | 3 -- drivers/media/pci/sta2x11/sta2x11_vip.c | 1 - drivers/media/platform/atmel/atmel-isi.c | 1 - .../media/platform/atmel/atmel-sama5d2-isc.c | 1 - .../media/platform/marvell-ccic/cafe-driver.c | 4 --- drivers/media/platform/stm32/stm32-dcmi.c | 1 - drivers/media/usb/cpia2/cpia2_v4l.c | 1 - drivers/media/usb/tm6000/tm6000-alsa.c | 1 - drivers/media/usb/tm6000/tm6000-dvb.c | 2 -- drivers/mtd/maps/sun_uflash.c | 1 - drivers/net/can/peak_canfd/peak_pciefd_main.c | 1 - drivers/net/can/sja1000/ems_pci.c | 1 - drivers/net/can/sja1000/ems_pcmcia.c | 1 - drivers/net/can/sja1000/kvaser_pci.c | 1 - drivers/net/can/sja1000/peak_pci.c | 2 -- drivers/net/can/sja1000/peak_pcmcia.c | 1 - drivers/net/can/sja1000/plx_pci.c | 12 ------- drivers/net/can/usb/peak_usb/pcan_usb.c | 2 -- drivers/net/can/usb/peak_usb/pcan_usb_fd.c | 3 -- drivers/net/can/usb/peak_usb/pcan_usb_pro.c | 2 -- drivers/net/hamradio/scc.c | 1 - drivers/net/wireless/admtek/adm8211.c | 1 - drivers/net/wireless/ath/ath5k/base.c | 1 - drivers/net/wireless/ath/ath9k/hw.c | 1 - drivers/net/wireless/ath/ath9k/init.c | 1 - drivers/net/wireless/atmel/atmel.c | 1 - drivers/net/wireless/atmel/atmel_cs.c | 1 - drivers/net/wireless/atmel/atmel_pci.c | 1 - .../broadcom/brcm80211/brcmsmac/mac80211_if.c | 1 - .../broadcom/brcm80211/brcmutil/utils.c | 1 - drivers/net/wireless/cisco/airo.c | 1 - drivers/net/wireless/cisco/airo_cs.c | 1 - .../net/wireless/intersil/hostap/hostap_cs.c | 1 - .../net/wireless/intersil/hostap/hostap_pci.c | 1 - .../net/wireless/intersil/hostap/hostap_plx.c | 1 - .../net/wireless/ralink/rt2x00/rt2400pci.c | 1 - .../net/wireless/ralink/rt2x00/rt2500pci.c | 1 - .../net/wireless/ralink/rt2x00/rt2500usb.c | 1 - .../net/wireless/ralink/rt2x00/rt2800pci.c | 1 - .../net/wireless/ralink/rt2x00/rt2800usb.c | 1 - drivers/net/wireless/ralink/rt2x00/rt61pci.c | 2 -- drivers/net/wireless/ralink/rt2x00/rt73usb.c | 1 - drivers/net/wireless/rsi/rsi_91x_main.c | 1 - drivers/net/wireless/rsi/rsi_91x_sdio.c | 1 - drivers/net/wireless/rsi/rsi_91x_usb.c | 1 - drivers/parport/parport_amiga.c | 1 - drivers/parport/parport_atari.c | 1 - drivers/parport/parport_gsc.c | 1 - drivers/parport/parport_mfc3.c | 1 - drivers/parport/parport_sunbpp.c | 1 - drivers/s390/block/dasd.c | 1 - drivers/sbus/char/display7seg.c | 1 - drivers/scsi/hpsa.c | 1 - drivers/scsi/pcmcia/nsp_cs.c | 1 - drivers/scsi/smartpqi/smartpqi_init.c | 1 - drivers/sh/maple/maple.c | 1 - drivers/staging/comedi/drivers/vmk80xx.c | 1 - drivers/tee/optee/core.c | 1 - drivers/tty/serial/icom.c | 2 -- drivers/tty/serial/jsm/jsm_driver.c | 1 - drivers/usb/misc/ldusb.c | 1 - drivers/watchdog/cpu5wdt.c | 1 - drivers/watchdog/cpwd.c | 1 - drivers/watchdog/riowd.c | 1 - include/linux/module.h | 3 -- net/batman-adv/main.c | 1 - sound/drivers/aloop.c | 1 - sound/drivers/dummy.c | 1 - sound/drivers/mtpav.c | 1 - sound/drivers/mts64.c | 1 - sound/drivers/pcsp/pcsp.c | 1 - sound/drivers/portman2x4.c | 1 - sound/drivers/serial-u16550.c | 1 - sound/drivers/virmidi.c | 1 - sound/isa/ad1816a/ad1816a.c | 7 ---- sound/isa/ad1848/ad1848.c | 3 -- sound/isa/als100.c | 11 ------ sound/isa/azt2320.c | 5 --- sound/isa/cmi8330.c | 1 - sound/isa/cs423x/cs4231.c | 1 - sound/isa/cs423x/cs4236.c | 34 ------------------- sound/isa/es1688/es1688.c | 5 --- sound/isa/es18xx.c | 10 +----- sound/isa/gus/gusclassic.c | 1 - sound/isa/gus/gusextreme.c | 1 - sound/isa/gus/gusmax.c | 1 - sound/isa/gus/interwave.c | 6 ---- sound/isa/opl3sa2.c | 5 --- sound/isa/opti9xx/miro.c | 3 -- sound/isa/opti9xx/opti92x-ad1848.c | 6 ---- sound/isa/sb/jazz16.c | 3 -- sound/isa/sb/sb16.c | 8 ----- sound/isa/sb/sb8.c | 1 - sound/isa/sc6000.c | 3 -- sound/isa/wavefront/wavefront.c | 1 - sound/mips/sgio2audio.c | 1 - sound/pci/ad1889.c | 1 - sound/pci/ali5451/ali5451.c | 1 - sound/pci/als300.c | 1 - sound/pci/als4000.c | 1 - sound/pci/atiixp.c | 1 - sound/pci/atiixp_modem.c | 1 - sound/pci/au88x0/au88x0.c | 2 -- sound/pci/azt3328.c | 1 - sound/pci/bt87x.c | 2 -- sound/pci/ca0106/ca0106_main.c | 1 - sound/pci/cmipci.c | 4 --- sound/pci/cs4281.c | 1 - sound/pci/cs46xx/cs46xx.c | 7 ---- sound/pci/cs5535audio/cs5535audio.c | 1 - sound/pci/ctxfi/xfi.c | 1 - sound/pci/echoaudio/echoaudio.c | 1 - sound/pci/emu10k1/emu10k1.c | 2 -- sound/pci/emu10k1/emu10k1x.c | 1 - sound/pci/ens1370.c | 8 ----- sound/pci/es1938.c | 4 --- sound/pci/es1968.c | 4 --- sound/pci/fm801.c | 2 -- sound/pci/hda/hda_intel.c | 34 ------------------- sound/pci/ice1712/ice1712.c | 6 ---- sound/pci/ice1712/ice1724.c | 19 ----------- sound/pci/intel8x0.c | 23 ------------- sound/pci/intel8x0m.c | 15 -------- sound/pci/korg1212/korg1212.c | 1 - sound/pci/lola/lola.c | 1 - sound/pci/lx6464es/lx6464es.c | 2 -- sound/pci/maestro3.c | 5 --- sound/pci/mixart/mixart.c | 1 - sound/pci/nm256/nm256.c | 2 -- sound/pci/oxygen/oxygen.c | 3 -- sound/pci/oxygen/se6x.c | 1 - sound/pci/oxygen/virtuoso.c | 1 - sound/pci/pcxhr/pcxhr.c | 1 - sound/pci/riptide/riptide.c | 1 - sound/pci/rme32.c | 1 - sound/pci/rme96.c | 5 --- sound/pci/rme9652/hdsp.c | 3 -- sound/pci/rme9652/hdspm.c | 1 - sound/pci/rme9652/rme9652.c | 2 -- sound/pci/sis7019.c | 1 - sound/pci/sonicvibes.c | 1 - sound/pci/trident/trident.c | 12 ------- sound/pci/via82xx.c | 1 - sound/pci/via82xx_modem.c | 1 - sound/pci/vx222/vx222.c | 1 - sound/pci/ymfpci/ymfpci.c | 6 ---- sound/pcmcia/pdaudiocf/pdaudiocf.c | 1 - sound/pcmcia/vx/vxpocket.c | 4 --- sound/ppc/powermac.c | 1 - sound/sh/aica.c | 1 - sound/sh/sh_dac_audio.c | 1 - sound/sparc/amd7930.c | 1 - sound/sparc/cs4231.c | 1 - sound/sparc/dbri.c | 1 - sound/usb/6fire/chip.c | 1 - sound/usb/caiaq/device.c | 14 -------- sound/usb/card.c | 2 -- sound/usb/hiface/chip.c | 17 ---------- sound/usb/misc/ua101.c | 1 - sound/usb/usx2y/usbusx2y.c | 1 - sound/x86/intel_hdmi_audio.c | 1 - sound/xen/xen_snd_front.c | 1 - 175 files changed, 1 insertion(+), 480 deletions(-) diff --git a/arch/x86/platform/iris/iris.c b/arch/x86/platform/iris/iris.c index 1ac8578258afb..b42bfdab01a99 100644 --- a/arch/x86/platform/iris/iris.c +++ b/arch/x86/platform/iris/iris.c @@ -27,7 +27,6 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Sébastien Hinderer "); MODULE_DESCRIPTION("A power_off handler for Iris devices from EuroBraille"); -MODULE_SUPPORTED_DEVICE("Eurobraille/Iris"); static bool force; diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c index 9a70bee841251..495fd0a1f0402 100644 --- a/drivers/atm/fore200e.c +++ b/drivers/atm/fore200e.c @@ -100,8 +100,6 @@ static LIST_HEAD(fore200e_boards); MODULE_AUTHOR("Christophe Lizzi - credits to Uwe Dannowski and Heikki Vatiainen"); MODULE_DESCRIPTION("FORE Systems 200E-series ATM driver - version " FORE200E_VERSION); -MODULE_SUPPORTED_DEVICE("PCA-200E, SBA-200E"); - static const int fore200e_rx_buf_nbr[ BUFFER_SCHEME_NBR ][ BUFFER_MAGN_NBR ] = { { BUFFER_S1_NBR, BUFFER_L1_NBR }, diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 0b71292d9d5ab..4aa9683ee0c1a 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -5091,7 +5091,6 @@ module_param(floppy, charp, 0); module_param(FLOPPY_IRQ, int, 0); module_param(FLOPPY_DMA, int, 0); MODULE_AUTHOR("Alain L. Knaff"); -MODULE_SUPPORTED_DEVICE("fd"); MODULE_LICENSE("GPL"); /* This doesn't actually get used other than for module information */ diff --git a/drivers/bluetooth/btrsi.c b/drivers/bluetooth/btrsi.c index 3951f7b238404..bea1595f6432d 100644 --- a/drivers/bluetooth/btrsi.c +++ b/drivers/bluetooth/btrsi.c @@ -194,5 +194,4 @@ module_init(rsi_91x_bt_module_init); module_exit(rsi_91x_bt_module_exit); MODULE_AUTHOR("Redpine Signals Inc"); MODULE_DESCRIPTION("RSI BT driver"); -MODULE_SUPPORTED_DEVICE("RSI-BT"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/char/applicom.c b/drivers/char/applicom.c index 14b2d8034c51d..45ac7ab003ce3 100644 --- a/drivers/char/applicom.c +++ b/drivers/char/applicom.c @@ -81,9 +81,6 @@ MODULE_DESCRIPTION("Driver for Applicom Profibus card"); MODULE_LICENSE("GPL"); MODULE_ALIAS_MISCDEV(AC_MINOR); -MODULE_SUPPORTED_DEVICE("ac"); - - static struct applicom_board { unsigned long PhysIO; void __iomem *RamIO; diff --git a/drivers/char/toshiba.c b/drivers/char/toshiba.c index aff0a8e44fffc..776abbfd85d66 100644 --- a/drivers/char/toshiba.c +++ b/drivers/char/toshiba.c @@ -64,7 +64,6 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jonathan Buzzard "); MODULE_DESCRIPTION("Toshiba laptop SMM driver"); -MODULE_SUPPORTED_DEVICE("toshiba"); static DEFINE_MUTEX(tosh_mutex); static int tosh_fn; diff --git a/drivers/input/joydev.c b/drivers/input/joydev.c index 430dc69750048..da8963a9f044c 100644 --- a/drivers/input/joydev.c +++ b/drivers/input/joydev.c @@ -26,7 +26,6 @@ MODULE_AUTHOR("Vojtech Pavlik "); MODULE_DESCRIPTION("Joystick device interfaces"); -MODULE_SUPPORTED_DEVICE("input/js"); MODULE_LICENSE("GPL"); #define JOYDEV_MINOR_BASE 0 diff --git a/drivers/media/firewire/firedtv-fw.c b/drivers/media/firewire/firedtv-fw.c index 8a8585261bb80..5f6e97a8d1c06 100644 --- a/drivers/media/firewire/firedtv-fw.c +++ b/drivers/media/firewire/firedtv-fw.c @@ -430,4 +430,3 @@ MODULE_AUTHOR("Andreas Monitzer "); MODULE_AUTHOR("Ben Backx "); MODULE_DESCRIPTION("FireDTV DVB Driver"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("FireDTV DVB"); diff --git a/drivers/media/pci/cx18/cx18-alsa-main.c b/drivers/media/pci/cx18/cx18-alsa-main.c index 692b95a685d1a..9a82e68303b61 100644 --- a/drivers/media/pci/cx18/cx18-alsa-main.c +++ b/drivers/media/pci/cx18/cx18-alsa-main.c @@ -41,7 +41,6 @@ MODULE_PARM_DESC(debug, MODULE_AUTHOR("Andy Walls"); MODULE_DESCRIPTION("CX23418 ALSA Interface"); -MODULE_SUPPORTED_DEVICE("CX23418 MPEG2 encoder"); MODULE_LICENSE("GPL"); MODULE_VERSION(CX18_VERSION); diff --git a/drivers/media/pci/cx18/cx18-driver.c b/drivers/media/pci/cx18/cx18-driver.c index 95aed00f353bd..f2440eb38820b 100644 --- a/drivers/media/pci/cx18/cx18-driver.c +++ b/drivers/media/pci/cx18/cx18-driver.c @@ -232,7 +232,6 @@ MODULE_PARM_DESC(cx18_first_minor, MODULE_AUTHOR("Hans Verkuil"); MODULE_DESCRIPTION("CX23418 driver"); -MODULE_SUPPORTED_DEVICE("CX23418 MPEG2 encoder"); MODULE_LICENSE("GPL"); MODULE_VERSION(CX18_VERSION); diff --git a/drivers/media/pci/cx25821/cx25821-alsa.c b/drivers/media/pci/cx25821/cx25821-alsa.c index 608fbaf0f659a..8797d85a6b0a2 100644 --- a/drivers/media/pci/cx25821/cx25821-alsa.c +++ b/drivers/media/pci/cx25821/cx25821-alsa.c @@ -104,7 +104,6 @@ MODULE_PARM_DESC(index, "Index value for cx25821 capture interface(s)."); MODULE_DESCRIPTION("ALSA driver module for cx25821 based capture cards"); MODULE_AUTHOR("Hiep Huynh"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{Conexant,25821}"); /* "{{Conexant,23881}," */ static unsigned int debug; module_param(debug, int, 0644); diff --git a/drivers/media/pci/cx88/cx88-alsa.c b/drivers/media/pci/cx88/cx88-alsa.c index 95e0cbb1277dc..c83814c052d36 100644 --- a/drivers/media/pci/cx88/cx88-alsa.c +++ b/drivers/media/pci/cx88/cx88-alsa.c @@ -98,7 +98,6 @@ MODULE_AUTHOR("Mauro Carvalho Chehab "); MODULE_LICENSE("GPL v2"); MODULE_VERSION(CX88_VERSION); -MODULE_SUPPORTED_DEVICE("{{Conexant,23881},{{Conexant,23882},{{Conexant,23883}"); static unsigned int debug; module_param(debug, int, 0644); MODULE_PARM_DESC(debug, "enable debug messages"); diff --git a/drivers/media/pci/ivtv/ivtv-alsa-main.c b/drivers/media/pci/ivtv/ivtv-alsa-main.c index 39029b8e12c9d..4cefdb2e4d403 100644 --- a/drivers/media/pci/ivtv/ivtv-alsa-main.c +++ b/drivers/media/pci/ivtv/ivtv-alsa-main.c @@ -38,7 +38,6 @@ MODULE_PARM_DESC(index, MODULE_AUTHOR("Andy Walls"); MODULE_DESCRIPTION("CX23415/CX23416 ALSA Interface"); -MODULE_SUPPORTED_DEVICE("CX23415/CX23416 MPEG2 encoder"); MODULE_LICENSE("GPL"); MODULE_VERSION(IVTV_VERSION); diff --git a/drivers/media/pci/ivtv/ivtv-driver.c b/drivers/media/pci/ivtv/ivtv-driver.c index 6e448cb3b51cb..942b8c266f504 100644 --- a/drivers/media/pci/ivtv/ivtv-driver.c +++ b/drivers/media/pci/ivtv/ivtv-driver.c @@ -275,9 +275,6 @@ MODULE_PARM_DESC(ivtv_first_minor, "Set device node number assigned to first car MODULE_AUTHOR("Kevin Thayer, Chris Kennedy, Hans Verkuil"); MODULE_DESCRIPTION("CX23415/CX23416 driver"); -MODULE_SUPPORTED_DEVICE - ("CX23415/CX23416 MPEG2 encoder (WinTV PVR-150/250/350/500,\n" - "\t\t\tYuan MPG series and similar)"); MODULE_LICENSE("GPL"); MODULE_VERSION(IVTV_VERSION); diff --git a/drivers/media/pci/sta2x11/sta2x11_vip.c b/drivers/media/pci/sta2x11/sta2x11_vip.c index 336df65c8af11..524912f20d9f2 100644 --- a/drivers/media/pci/sta2x11/sta2x11_vip.c +++ b/drivers/media/pci/sta2x11/sta2x11_vip.c @@ -1269,6 +1269,5 @@ late_initcall_sync(sta2x11_vip_init_module); MODULE_DESCRIPTION("STA2X11 Video Input Port driver"); MODULE_AUTHOR("Wind River"); MODULE_LICENSE("GPL v2"); -MODULE_SUPPORTED_DEVICE("sta2x11 video input"); MODULE_VERSION(DRV_VERSION); MODULE_DEVICE_TABLE(pci, sta2x11_vip_pci_tbl); diff --git a/drivers/media/platform/atmel/atmel-isi.c b/drivers/media/platform/atmel/atmel-isi.c index 0514be6153df4..e392b3efe3633 100644 --- a/drivers/media/platform/atmel/atmel-isi.c +++ b/drivers/media/platform/atmel/atmel-isi.c @@ -1363,4 +1363,3 @@ module_platform_driver(atmel_isi_driver); MODULE_AUTHOR("Josh Wu "); MODULE_DESCRIPTION("The V4L2 driver for Atmel Linux"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("video"); diff --git a/drivers/media/platform/atmel/atmel-sama5d2-isc.c b/drivers/media/platform/atmel/atmel-sama5d2-isc.c index 0b78fecfd2a83..61d9885765f47 100644 --- a/drivers/media/platform/atmel/atmel-sama5d2-isc.c +++ b/drivers/media/platform/atmel/atmel-sama5d2-isc.c @@ -330,4 +330,3 @@ module_platform_driver(atmel_isc_driver); MODULE_AUTHOR("Songjun Wu"); MODULE_DESCRIPTION("The V4L2 driver for Atmel-ISC"); MODULE_LICENSE("GPL v2"); -MODULE_SUPPORTED_DEVICE("video"); diff --git a/drivers/media/platform/marvell-ccic/cafe-driver.c b/drivers/media/platform/marvell-ccic/cafe-driver.c index 9c94a8b58b7c3..baac86f3d1530 100644 --- a/drivers/media/platform/marvell-ccic/cafe-driver.c +++ b/drivers/media/platform/marvell-ccic/cafe-driver.c @@ -44,10 +44,6 @@ MODULE_AUTHOR("Jonathan Corbet "); MODULE_DESCRIPTION("Marvell 88ALP01 CMOS Camera Controller driver"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("Video"); - - - struct cafe_camera { int registered; /* Fully initialized? */ diff --git a/drivers/media/platform/stm32/stm32-dcmi.c b/drivers/media/platform/stm32/stm32-dcmi.c index bbcc2254fa2e6..d9b4ad0abf0c0 100644 --- a/drivers/media/platform/stm32/stm32-dcmi.c +++ b/drivers/media/platform/stm32/stm32-dcmi.c @@ -2149,4 +2149,3 @@ MODULE_AUTHOR("Yannick Fertre "); MODULE_AUTHOR("Hugues Fruchet "); MODULE_DESCRIPTION("STMicroelectronics STM32 Digital Camera Memory Interface driver"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("video"); diff --git a/drivers/media/usb/cpia2/cpia2_v4l.c b/drivers/media/usb/cpia2/cpia2_v4l.c index e488e7870f42c..69d5c628a7978 100644 --- a/drivers/media/usb/cpia2/cpia2_v4l.c +++ b/drivers/media/usb/cpia2/cpia2_v4l.c @@ -56,7 +56,6 @@ MODULE_PARM_DESC(flicker_mode, "Flicker frequency (0 (disabled), " __stringify(5 MODULE_AUTHOR("Steve Miller (STMicroelectronics) "); MODULE_DESCRIPTION("V4L-driver for STMicroelectronics CPiA2 based cameras"); -MODULE_SUPPORTED_DEVICE("video"); MODULE_LICENSE("GPL"); MODULE_VERSION(CPIA_VERSION); diff --git a/drivers/media/usb/tm6000/tm6000-alsa.c b/drivers/media/usb/tm6000/tm6000-alsa.c index 3a2df36ef1db1..a19a46770c2b1 100644 --- a/drivers/media/usb/tm6000/tm6000-alsa.c +++ b/drivers/media/usb/tm6000/tm6000-alsa.c @@ -51,7 +51,6 @@ MODULE_PARM_DESC(index, "Index value for tm6000x capture interface(s)."); MODULE_DESCRIPTION("ALSA driver module for tm5600/tm6000/tm6010 based TV cards"); MODULE_AUTHOR("Mauro Carvalho Chehab"); MODULE_LICENSE("GPL v2"); -MODULE_SUPPORTED_DEVICE("{{Trident,tm5600},{{Trident,tm6000},{{Trident,tm6010}"); static unsigned int debug; module_param(debug, int, 0644); MODULE_PARM_DESC(debug, "enable debug messages"); diff --git a/drivers/media/usb/tm6000/tm6000-dvb.c b/drivers/media/usb/tm6000/tm6000-dvb.c index 293a460f4616c..4990fa886d7a7 100644 --- a/drivers/media/usb/tm6000/tm6000-dvb.c +++ b/drivers/media/usb/tm6000/tm6000-dvb.c @@ -23,8 +23,6 @@ MODULE_DESCRIPTION("DVB driver extension module for tm5600/6000/6010 based TV ca MODULE_AUTHOR("Mauro Carvalho Chehab"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{Trident, tm5600},{{Trident, tm6000},{{Trident, tm6010}"); - static int debug; module_param(debug, int, 0644); diff --git a/drivers/mtd/maps/sun_uflash.c b/drivers/mtd/maps/sun_uflash.c index eb72582932564..f9cfb084c0291 100644 --- a/drivers/mtd/maps/sun_uflash.c +++ b/drivers/mtd/maps/sun_uflash.c @@ -32,7 +32,6 @@ MODULE_AUTHOR("Eric Brower "); MODULE_DESCRIPTION("User-programmable flash device on Sun Microsystems boardsets"); -MODULE_SUPPORTED_DEVICE(DRIVER_NAME); MODULE_LICENSE("GPL"); MODULE_VERSION("2.1"); diff --git a/drivers/net/can/peak_canfd/peak_pciefd_main.c b/drivers/net/can/peak_canfd/peak_pciefd_main.c index 0df1cdfa68357..1df3c4b54f03c 100644 --- a/drivers/net/can/peak_canfd/peak_pciefd_main.c +++ b/drivers/net/can/peak_canfd/peak_pciefd_main.c @@ -21,7 +21,6 @@ MODULE_AUTHOR("Stephane Grosjean "); MODULE_DESCRIPTION("Socket-CAN driver for PEAK PCAN PCIe/M.2 FD family cards"); -MODULE_SUPPORTED_DEVICE("PEAK PCAN PCIe/M.2 FD CAN cards"); MODULE_LICENSE("GPL v2"); #define PCIEFD_DRV_NAME "peak_pciefd" diff --git a/drivers/net/can/sja1000/ems_pci.c b/drivers/net/can/sja1000/ems_pci.c index 6f88c99329201..4ab91759a5c64 100644 --- a/drivers/net/can/sja1000/ems_pci.c +++ b/drivers/net/can/sja1000/ems_pci.c @@ -21,7 +21,6 @@ MODULE_AUTHOR("Sebastian Haas "); MODULE_DESCRIPTION("Socket-CAN driver for EMS CPC-PCI/PCIe/104P CAN cards"); -MODULE_SUPPORTED_DEVICE("EMS CPC-PCI/PCIe/104P CAN card"); MODULE_LICENSE("GPL v2"); #define EMS_PCI_V1_MAX_CHAN 2 diff --git a/drivers/net/can/sja1000/ems_pcmcia.c b/drivers/net/can/sja1000/ems_pcmcia.c index 770304eaef950..e21b169c14c01 100644 --- a/drivers/net/can/sja1000/ems_pcmcia.c +++ b/drivers/net/can/sja1000/ems_pcmcia.c @@ -21,7 +21,6 @@ MODULE_AUTHOR("Markus Plessing "); MODULE_DESCRIPTION("Socket-CAN driver for EMS CPC-CARD cards"); -MODULE_SUPPORTED_DEVICE("EMS CPC-CARD CAN card"); MODULE_LICENSE("GPL v2"); #define EMS_PCMCIA_MAX_CHAN 2 diff --git a/drivers/net/can/sja1000/kvaser_pci.c b/drivers/net/can/sja1000/kvaser_pci.c index 0ea6b711c07be..95fe9ee1ce326 100644 --- a/drivers/net/can/sja1000/kvaser_pci.c +++ b/drivers/net/can/sja1000/kvaser_pci.c @@ -33,7 +33,6 @@ MODULE_AUTHOR("Per Dalen "); MODULE_DESCRIPTION("Socket-CAN driver for KVASER PCAN PCI cards"); -MODULE_SUPPORTED_DEVICE("KVASER PCAN PCI CAN card"); MODULE_LICENSE("GPL v2"); #define MAX_NO_OF_CHANNELS 4 /* max no of channels on a single card */ diff --git a/drivers/net/can/sja1000/peak_pci.c b/drivers/net/can/sja1000/peak_pci.c index 4713921bd511e..84eac8cb86869 100644 --- a/drivers/net/can/sja1000/peak_pci.c +++ b/drivers/net/can/sja1000/peak_pci.c @@ -24,8 +24,6 @@ MODULE_AUTHOR("Stephane Grosjean "); MODULE_DESCRIPTION("Socket-CAN driver for PEAK PCAN PCI family cards"); -MODULE_SUPPORTED_DEVICE("PEAK PCAN PCI/PCIe/PCIeC miniPCI CAN cards"); -MODULE_SUPPORTED_DEVICE("PEAK PCAN miniPCIe/cPCI PC/104+ PCI/104e CAN Cards"); MODULE_LICENSE("GPL v2"); #define DRV_NAME "peak_pci" diff --git a/drivers/net/can/sja1000/peak_pcmcia.c b/drivers/net/can/sja1000/peak_pcmcia.c index cf951a7830781..131a084c35351 100644 --- a/drivers/net/can/sja1000/peak_pcmcia.c +++ b/drivers/net/can/sja1000/peak_pcmcia.c @@ -22,7 +22,6 @@ MODULE_AUTHOR("Stephane Grosjean "); MODULE_DESCRIPTION("CAN driver for PEAK-System PCAN-PC Cards"); MODULE_LICENSE("GPL v2"); -MODULE_SUPPORTED_DEVICE("PEAK PCAN-PC Card"); /* PEAK-System PCMCIA driver name */ #define PCC_NAME "peak_pcmcia" diff --git a/drivers/net/can/sja1000/plx_pci.c b/drivers/net/can/sja1000/plx_pci.c index 85679588ef730..5de1ebb0c6f04 100644 --- a/drivers/net/can/sja1000/plx_pci.c +++ b/drivers/net/can/sja1000/plx_pci.c @@ -25,18 +25,6 @@ MODULE_AUTHOR("Pavel Cheblakov "); MODULE_DESCRIPTION("Socket-CAN driver for PLX90xx PCI-bridge cards with " "the SJA1000 chips"); -MODULE_SUPPORTED_DEVICE("Adlink PCI-7841/cPCI-7841, " - "Adlink PCI-7841/cPCI-7841 SE, " - "Marathon CAN-bus-PCI, " - "Marathon CAN-bus-PCIe, " - "TEWS TECHNOLOGIES TPMC810, " - "esd CAN-PCI/CPCI/PCI104/200, " - "esd CAN-PCI/PMC/266, " - "esd CAN-PCIe/2000, " - "Connect Tech Inc. CANpro/104-Plus Opto (CRG001), " - "IXXAT PC-I 04/PCI, " - "ELCUS CAN-200-PCI, " - "ASEM DUAL CAN-RAW") MODULE_LICENSE("GPL v2"); #define PLX_PCI_MAX_CHAN 2 diff --git a/drivers/net/can/usb/peak_usb/pcan_usb.c b/drivers/net/can/usb/peak_usb/pcan_usb.c index e6c1e5d33924e..e393e8457d773 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb.c @@ -18,8 +18,6 @@ #include "pcan_usb_core.h" -MODULE_SUPPORTED_DEVICE("PEAK-System PCAN-USB adapter"); - /* PCAN-USB Endpoints */ #define PCAN_USB_EP_CMDOUT 1 #define PCAN_USB_EP_CMDIN (PCAN_USB_EP_CMDOUT | USB_DIR_IN) diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c index f347ecc79aef2..bae078579c0d6 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c @@ -16,9 +16,6 @@ #include "pcan_usb_core.h" #include "pcan_usb_pro.h" -MODULE_SUPPORTED_DEVICE("PEAK-System PCAN-USB FD adapter"); -MODULE_SUPPORTED_DEVICE("PEAK-System PCAN-USB Pro FD adapter"); - #define PCAN_USBPROFD_CHANNEL_COUNT 2 #define PCAN_USBFD_CHANNEL_COUNT 1 diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_pro.c b/drivers/net/can/usb/peak_usb/pcan_usb_pro.c index 275087c396025..18fa180ecc81f 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_pro.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_pro.c @@ -17,8 +17,6 @@ #include "pcan_usb_core.h" #include "pcan_usb_pro.h" -MODULE_SUPPORTED_DEVICE("PEAK-System PCAN-USB Pro adapter"); - #define PCAN_USBPRO_CHANNEL_COUNT 2 /* PCAN-USB Pro adapter internal clock (MHz) */ diff --git a/drivers/net/hamradio/scc.c b/drivers/net/hamradio/scc.c index 36eeb80406f2b..4690c6a590548 100644 --- a/drivers/net/hamradio/scc.c +++ b/drivers/net/hamradio/scc.c @@ -2167,7 +2167,6 @@ static void __exit scc_cleanup_driver(void) MODULE_AUTHOR("Joerg Reuter "); MODULE_DESCRIPTION("AX.25 Device Driver for Z8530 based HDLC cards"); -MODULE_SUPPORTED_DEVICE("Z8530 based SCC cards for Amateur Radio"); MODULE_LICENSE("GPL"); module_init(scc_init_driver); module_exit(scc_cleanup_driver); diff --git a/drivers/net/wireless/admtek/adm8211.c b/drivers/net/wireless/admtek/adm8211.c index c41e72508d3db..2db9c948c0fc3 100644 --- a/drivers/net/wireless/admtek/adm8211.c +++ b/drivers/net/wireless/admtek/adm8211.c @@ -28,7 +28,6 @@ MODULE_AUTHOR("Michael Wu "); MODULE_AUTHOR("Jouni Malinen "); MODULE_DESCRIPTION("Driver for IEEE 802.11b wireless cards based on ADMtek ADM8211"); -MODULE_SUPPORTED_DEVICE("ADM8211"); MODULE_LICENSE("GPL"); static unsigned int tx_ring_size __read_mostly = 16; diff --git a/drivers/net/wireless/ath/ath5k/base.c b/drivers/net/wireless/ath/ath5k/base.c index 4c6e57f9976de..cef17f33c69ea 100644 --- a/drivers/net/wireless/ath/ath5k/base.c +++ b/drivers/net/wireless/ath/ath5k/base.c @@ -90,7 +90,6 @@ MODULE_PARM_DESC(no_hw_rfkill_switch, "Ignore the GPIO RFKill switch state"); MODULE_AUTHOR("Jiri Slaby"); MODULE_AUTHOR("Nick Kossifidis"); MODULE_DESCRIPTION("Support for 5xxx series of Atheros 802.11 wireless LAN cards."); -MODULE_SUPPORTED_DEVICE("Atheros 5xxx WLAN cards"); MODULE_LICENSE("Dual BSD/GPL"); static int ath5k_init(struct ieee80211_hw *hw); diff --git a/drivers/net/wireless/ath/ath9k/hw.c b/drivers/net/wireless/ath/ath9k/hw.c index b66eeb5772724..5abc2a5526ecf 100644 --- a/drivers/net/wireless/ath/ath9k/hw.c +++ b/drivers/net/wireless/ath/ath9k/hw.c @@ -34,7 +34,6 @@ static bool ath9k_hw_set_reset_reg(struct ath_hw *ah, u32 type); MODULE_AUTHOR("Atheros Communications"); MODULE_DESCRIPTION("Support for Atheros 802.11n wireless LAN cards."); -MODULE_SUPPORTED_DEVICE("Atheros 802.11n WLAN cards"); MODULE_LICENSE("Dual BSD/GPL"); static void ath9k_hw_set_clockrate(struct ath_hw *ah) diff --git a/drivers/net/wireless/ath/ath9k/init.c b/drivers/net/wireless/ath/ath9k/init.c index 42a208787f5af..01f9c26f9bf37 100644 --- a/drivers/net/wireless/ath/ath9k/init.c +++ b/drivers/net/wireless/ath/ath9k/init.c @@ -37,7 +37,6 @@ static char *dev_info = "ath9k"; MODULE_AUTHOR("Atheros Communications"); MODULE_DESCRIPTION("Support for Atheros 802.11n wireless LAN cards."); -MODULE_SUPPORTED_DEVICE("Atheros 802.11n WLAN cards"); MODULE_LICENSE("Dual BSD/GPL"); static unsigned int ath9k_debug = ATH_DBG_DEFAULT; diff --git a/drivers/net/wireless/atmel/atmel.c b/drivers/net/wireless/atmel/atmel.c index 707fe66727f8d..febce4e8b3dd3 100644 --- a/drivers/net/wireless/atmel/atmel.c +++ b/drivers/net/wireless/atmel/atmel.c @@ -75,7 +75,6 @@ MODULE_AUTHOR("Simon Kelley"); MODULE_DESCRIPTION("Support for Atmel at76c50x 802.11 wireless ethernet cards."); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("Atmel at76c50x wireless cards"); /* The name of the firmware file to be loaded over-rides any automatic selection */ diff --git a/drivers/net/wireless/atmel/atmel_cs.c b/drivers/net/wireless/atmel/atmel_cs.c index 368eebefa741e..453bb84cb3386 100644 --- a/drivers/net/wireless/atmel/atmel_cs.c +++ b/drivers/net/wireless/atmel/atmel_cs.c @@ -57,7 +57,6 @@ MODULE_AUTHOR("Simon Kelley"); MODULE_DESCRIPTION("Support for Atmel at76c50x 802.11 wireless ethernet cards."); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("Atmel at76c50x PCMCIA cards"); /*====================================================================*/ diff --git a/drivers/net/wireless/atmel/atmel_pci.c b/drivers/net/wireless/atmel/atmel_pci.c index 47f7ccb32414e..f428dc79d9161 100644 --- a/drivers/net/wireless/atmel/atmel_pci.c +++ b/drivers/net/wireless/atmel/atmel_pci.c @@ -16,7 +16,6 @@ MODULE_AUTHOR("Simon Kelley"); MODULE_DESCRIPTION("Support for Atmel at76c50x 802.11 wireless ethernet cards."); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("Atmel at76c506 PCI wireless cards"); static const struct pci_device_id card_ids[] = { { 0x1114, 0x0506, PCI_ANY_ID, PCI_ANY_ID }, diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c index 818e523f6025d..39f3af2d0439b 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c @@ -87,7 +87,6 @@ static int n_adapters_found; MODULE_AUTHOR("Broadcom Corporation"); MODULE_DESCRIPTION("Broadcom 802.11n wireless LAN driver."); -MODULE_SUPPORTED_DEVICE("Broadcom 802.11n WLAN cards"); MODULE_LICENSE("Dual BSD/GPL"); /* This needs to be adjusted when brcms_firmwares changes */ MODULE_FIRMWARE("brcm/bcm43xx-0.fw"); diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmutil/utils.c b/drivers/net/wireless/broadcom/brcm80211/brcmutil/utils.c index 4c84c3001c3fd..e87e68cc46e22 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmutil/utils.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmutil/utils.c @@ -12,7 +12,6 @@ MODULE_AUTHOR("Broadcom Corporation"); MODULE_DESCRIPTION("Broadcom 802.11n wireless LAN driver utilities."); -MODULE_SUPPORTED_DEVICE("Broadcom 802.11n WLAN cards"); MODULE_LICENSE("Dual BSD/GPL"); struct sk_buff *brcmu_pkt_buf_get_skb(uint len) diff --git a/drivers/net/wireless/cisco/airo.c b/drivers/net/wireless/cisco/airo.c index e35e1380ae433..60db38c389606 100644 --- a/drivers/net/wireless/cisco/airo.c +++ b/drivers/net/wireless/cisco/airo.c @@ -251,7 +251,6 @@ MODULE_AUTHOR("Benjamin Reed"); MODULE_DESCRIPTION("Support for Cisco/Aironet 802.11 wireless ethernet cards. " "Direct support for ISA/PCI/MPI cards and support for PCMCIA when used with airo_cs."); MODULE_LICENSE("Dual BSD/GPL"); -MODULE_SUPPORTED_DEVICE("Aironet 4500, 4800 and Cisco 340/350"); module_param_hw_array(io, int, ioport, NULL, 0); module_param_hw_array(irq, int, irq, NULL, 0); module_param_array(rates, int, NULL, 0); diff --git a/drivers/net/wireless/cisco/airo_cs.c b/drivers/net/wireless/cisco/airo_cs.c index 3718f958c0fca..fcfe4c6d62f04 100644 --- a/drivers/net/wireless/cisco/airo_cs.c +++ b/drivers/net/wireless/cisco/airo_cs.c @@ -47,7 +47,6 @@ MODULE_DESCRIPTION("Support for Cisco/Aironet 802.11 wireless ethernet " "cards. This is the module that links the PCMCIA card " "with the airo module."); MODULE_LICENSE("Dual BSD/GPL"); -MODULE_SUPPORTED_DEVICE("Aironet 4500, 4800 and Cisco 340 PCMCIA cards"); /*====================================================================*/ diff --git a/drivers/net/wireless/intersil/hostap/hostap_cs.c b/drivers/net/wireless/intersil/hostap/hostap_cs.c index 1a748670835ac..ec7db2badc40e 100644 --- a/drivers/net/wireless/intersil/hostap/hostap_cs.c +++ b/drivers/net/wireless/intersil/hostap/hostap_cs.c @@ -26,7 +26,6 @@ static char *dev_info = "hostap_cs"; MODULE_AUTHOR("Jouni Malinen"); MODULE_DESCRIPTION("Support for Intersil Prism2-based 802.11 wireless LAN " "cards (PC Card)."); -MODULE_SUPPORTED_DEVICE("Intersil Prism2-based WLAN cards (PC Card)"); MODULE_LICENSE("GPL"); diff --git a/drivers/net/wireless/intersil/hostap/hostap_pci.c b/drivers/net/wireless/intersil/hostap/hostap_pci.c index 101887e6bd0fb..52d77506effd2 100644 --- a/drivers/net/wireless/intersil/hostap/hostap_pci.c +++ b/drivers/net/wireless/intersil/hostap/hostap_pci.c @@ -27,7 +27,6 @@ static char *dev_info = "hostap_pci"; MODULE_AUTHOR("Jouni Malinen"); MODULE_DESCRIPTION("Support for Intersil Prism2.5-based 802.11 wireless LAN " "PCI cards."); -MODULE_SUPPORTED_DEVICE("Intersil Prism2.5-based WLAN PCI cards"); MODULE_LICENSE("GPL"); diff --git a/drivers/net/wireless/intersil/hostap/hostap_plx.c b/drivers/net/wireless/intersil/hostap/hostap_plx.c index 841cfc68ce840..58247290fcbc4 100644 --- a/drivers/net/wireless/intersil/hostap/hostap_plx.c +++ b/drivers/net/wireless/intersil/hostap/hostap_plx.c @@ -30,7 +30,6 @@ static char *dev_info = "hostap_plx"; MODULE_AUTHOR("Jouni Malinen"); MODULE_DESCRIPTION("Support for Intersil Prism2-based 802.11 wireless LAN " "cards (PLX)."); -MODULE_SUPPORTED_DEVICE("Intersil Prism2-based WLAN cards (PLX)"); MODULE_LICENSE("GPL"); diff --git a/drivers/net/wireless/ralink/rt2x00/rt2400pci.c b/drivers/net/wireless/ralink/rt2x00/rt2400pci.c index 8f860c14da58b..dec6ffdf07c42 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2400pci.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2400pci.c @@ -1821,7 +1821,6 @@ static const struct pci_device_id rt2400pci_device_table[] = { MODULE_AUTHOR(DRV_PROJECT); MODULE_VERSION(DRV_VERSION); MODULE_DESCRIPTION("Ralink RT2400 PCI & PCMCIA Wireless LAN driver."); -MODULE_SUPPORTED_DEVICE("Ralink RT2460 PCI & PCMCIA chipset based cards"); MODULE_DEVICE_TABLE(pci, rt2400pci_device_table); MODULE_LICENSE("GPL"); diff --git a/drivers/net/wireless/ralink/rt2x00/rt2500pci.c b/drivers/net/wireless/ralink/rt2x00/rt2500pci.c index e940443c52ad8..8faa0a80e73a6 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2500pci.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2500pci.c @@ -2119,7 +2119,6 @@ static const struct pci_device_id rt2500pci_device_table[] = { MODULE_AUTHOR(DRV_PROJECT); MODULE_VERSION(DRV_VERSION); MODULE_DESCRIPTION("Ralink RT2500 PCI & PCMCIA Wireless LAN driver."); -MODULE_SUPPORTED_DEVICE("Ralink RT2560 PCI & PCMCIA chipset based cards"); MODULE_DEVICE_TABLE(pci, rt2500pci_device_table); MODULE_LICENSE("GPL"); diff --git a/drivers/net/wireless/ralink/rt2x00/rt2500usb.c b/drivers/net/wireless/ralink/rt2x00/rt2500usb.c index fce05fc88aaf9..bb5ed66306458 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2500usb.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2500usb.c @@ -1956,7 +1956,6 @@ static const struct usb_device_id rt2500usb_device_table[] = { MODULE_AUTHOR(DRV_PROJECT); MODULE_VERSION(DRV_VERSION); MODULE_DESCRIPTION("Ralink RT2500 USB Wireless LAN driver."); -MODULE_SUPPORTED_DEVICE("Ralink RT2570 USB chipset based cards"); MODULE_DEVICE_TABLE(usb, rt2500usb_device_table); MODULE_LICENSE("GPL"); diff --git a/drivers/net/wireless/ralink/rt2x00/rt2800pci.c b/drivers/net/wireless/ralink/rt2x00/rt2800pci.c index 9a33baaa61843..1fde0e767ce39 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2800pci.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2800pci.c @@ -439,7 +439,6 @@ static const struct pci_device_id rt2800pci_device_table[] = { MODULE_AUTHOR(DRV_PROJECT); MODULE_VERSION(DRV_VERSION); MODULE_DESCRIPTION("Ralink RT2800 PCI & PCMCIA Wireless LAN driver."); -MODULE_SUPPORTED_DEVICE("Ralink RT2860 PCI & PCMCIA chipset based cards"); MODULE_FIRMWARE(FIRMWARE_RT2860); MODULE_DEVICE_TABLE(pci, rt2800pci_device_table); MODULE_LICENSE("GPL"); diff --git a/drivers/net/wireless/ralink/rt2x00/rt2800usb.c b/drivers/net/wireless/ralink/rt2x00/rt2800usb.c index 36ac18ca8082e..b5c67f656cfd5 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2800usb.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2800usb.c @@ -1248,7 +1248,6 @@ static const struct usb_device_id rt2800usb_device_table[] = { MODULE_AUTHOR(DRV_PROJECT); MODULE_VERSION(DRV_VERSION); MODULE_DESCRIPTION("Ralink RT2800 USB Wireless LAN driver."); -MODULE_SUPPORTED_DEVICE("Ralink RT2870 USB chipset based cards"); MODULE_DEVICE_TABLE(usb, rt2800usb_device_table); MODULE_FIRMWARE(FIRMWARE_RT2870); MODULE_LICENSE("GPL"); diff --git a/drivers/net/wireless/ralink/rt2x00/rt61pci.c b/drivers/net/wireless/ralink/rt2x00/rt61pci.c index 02da5dd37ddd7..82cfc2aadc2b3 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt61pci.c +++ b/drivers/net/wireless/ralink/rt2x00/rt61pci.c @@ -2993,8 +2993,6 @@ static const struct pci_device_id rt61pci_device_table[] = { MODULE_AUTHOR(DRV_PROJECT); MODULE_VERSION(DRV_VERSION); MODULE_DESCRIPTION("Ralink RT61 PCI & PCMCIA Wireless LAN driver."); -MODULE_SUPPORTED_DEVICE("Ralink RT2561, RT2561s & RT2661 " - "PCI & PCMCIA chipset based cards"); MODULE_DEVICE_TABLE(pci, rt61pci_device_table); MODULE_FIRMWARE(FIRMWARE_RT2561); MODULE_FIRMWARE(FIRMWARE_RT2561s); diff --git a/drivers/net/wireless/ralink/rt2x00/rt73usb.c b/drivers/net/wireless/ralink/rt2x00/rt73usb.c index e69793773d870..5ff2c740c3ea0 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt73usb.c +++ b/drivers/net/wireless/ralink/rt2x00/rt73usb.c @@ -2513,7 +2513,6 @@ static const struct usb_device_id rt73usb_device_table[] = { MODULE_AUTHOR(DRV_PROJECT); MODULE_VERSION(DRV_VERSION); MODULE_DESCRIPTION("Ralink RT73 USB Wireless LAN driver."); -MODULE_SUPPORTED_DEVICE("Ralink RT2571W & RT2671 USB chipset based cards"); MODULE_DEVICE_TABLE(usb, rt73usb_device_table); MODULE_FIRMWARE(FIRMWARE_RT2571); MODULE_LICENSE("GPL"); diff --git a/drivers/net/wireless/rsi/rsi_91x_main.c b/drivers/net/wireless/rsi/rsi_91x_main.c index 9a3d2439a8e7a..d98483298555c 100644 --- a/drivers/net/wireless/rsi/rsi_91x_main.c +++ b/drivers/net/wireless/rsi/rsi_91x_main.c @@ -441,6 +441,5 @@ module_init(rsi_91x_hal_module_init); module_exit(rsi_91x_hal_module_exit); MODULE_AUTHOR("Redpine Signals Inc"); MODULE_DESCRIPTION("Station driver for RSI 91x devices"); -MODULE_SUPPORTED_DEVICE("RSI-91x"); MODULE_VERSION("0.1"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/net/wireless/rsi/rsi_91x_sdio.c b/drivers/net/wireless/rsi/rsi_91x_sdio.c index 592e9dadcb556..fe0287b22a254 100644 --- a/drivers/net/wireless/rsi/rsi_91x_sdio.c +++ b/drivers/net/wireless/rsi/rsi_91x_sdio.c @@ -1571,7 +1571,6 @@ module_exit(rsi_module_exit); MODULE_AUTHOR("Redpine Signals Inc"); MODULE_DESCRIPTION("Common SDIO layer for RSI drivers"); -MODULE_SUPPORTED_DEVICE("RSI-91x"); MODULE_DEVICE_TABLE(sdio, rsi_dev_table); MODULE_FIRMWARE(FIRMWARE_RSI9113); MODULE_VERSION("0.1"); diff --git a/drivers/net/wireless/rsi/rsi_91x_usb.c b/drivers/net/wireless/rsi/rsi_91x_usb.c index a4a533c2a7838..3fbe2a3c14550 100644 --- a/drivers/net/wireless/rsi/rsi_91x_usb.c +++ b/drivers/net/wireless/rsi/rsi_91x_usb.c @@ -928,7 +928,6 @@ module_usb_driver(rsi_driver); MODULE_AUTHOR("Redpine Signals Inc"); MODULE_DESCRIPTION("Common USB layer for RSI drivers"); -MODULE_SUPPORTED_DEVICE("RSI-91x"); MODULE_DEVICE_TABLE(usb, rsi_dev_table); MODULE_FIRMWARE(FIRMWARE_RSI9113); MODULE_VERSION("0.1"); diff --git a/drivers/parport/parport_amiga.c b/drivers/parport/parport_amiga.c index 1e88bcfe0d7b1..84d5701d606ce 100644 --- a/drivers/parport/parport_amiga.c +++ b/drivers/parport/parport_amiga.c @@ -241,6 +241,5 @@ module_platform_driver_probe(amiga_parallel_driver, amiga_parallel_probe); MODULE_AUTHOR("Joerg Dorchain "); MODULE_DESCRIPTION("Parport Driver for Amiga builtin Port"); -MODULE_SUPPORTED_DEVICE("Amiga builtin Parallel Port"); MODULE_LICENSE("GPL"); MODULE_ALIAS("platform:amiga-parallel"); diff --git a/drivers/parport/parport_atari.c b/drivers/parport/parport_atari.c index 2ff0fe053e6ee..1623f010cdcc6 100644 --- a/drivers/parport/parport_atari.c +++ b/drivers/parport/parport_atari.c @@ -218,7 +218,6 @@ static void __exit parport_atari_exit(void) MODULE_AUTHOR("Andreas Schwab"); MODULE_DESCRIPTION("Parport Driver for Atari builtin Port"); -MODULE_SUPPORTED_DEVICE("Atari builtin Parallel Port"); MODULE_LICENSE("GPL"); module_init(parport_atari_init) diff --git a/drivers/parport/parport_gsc.c b/drivers/parport/parport_gsc.c index 9228e8f903092..1e43b3f399a83 100644 --- a/drivers/parport/parport_gsc.c +++ b/drivers/parport/parport_gsc.c @@ -41,7 +41,6 @@ MODULE_AUTHOR("Helge Deller "); MODULE_DESCRIPTION("HP-PARISC PC-style parallel port driver"); -MODULE_SUPPORTED_DEVICE("integrated PC-style parallel port"); MODULE_LICENSE("GPL"); diff --git a/drivers/parport/parport_mfc3.c b/drivers/parport/parport_mfc3.c index d6bbe84463014..f4d0da741e856 100644 --- a/drivers/parport/parport_mfc3.c +++ b/drivers/parport/parport_mfc3.c @@ -359,7 +359,6 @@ static void __exit parport_mfc3_exit(void) MODULE_AUTHOR("Joerg Dorchain "); MODULE_DESCRIPTION("Parport Driver for Multiface 3 expansion cards Parallel Port"); -MODULE_SUPPORTED_DEVICE("Multiface 3 Parallel Port"); MODULE_LICENSE("GPL"); module_init(parport_mfc3_init) diff --git a/drivers/parport/parport_sunbpp.c b/drivers/parport/parport_sunbpp.c index e840c1b5ab908..865fc41dbb6c4 100644 --- a/drivers/parport/parport_sunbpp.c +++ b/drivers/parport/parport_sunbpp.c @@ -377,6 +377,5 @@ module_platform_driver(bpp_sbus_driver); MODULE_AUTHOR("Derrick J Brashear"); MODULE_DESCRIPTION("Parport Driver for Sparc bidirectional Port"); -MODULE_SUPPORTED_DEVICE("Sparc Bidirectional Parallel Port"); MODULE_VERSION("2.0"); MODULE_LICENSE("GPL"); diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index ba9ce4e0d30a3..3a945abf268c1 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -63,7 +63,6 @@ void dasd_int_handler(struct ccw_device *, unsigned long, struct irb *); MODULE_AUTHOR("Holger Smolinski "); MODULE_DESCRIPTION("Linux on S/390 DASD device driver," " Copyright IBM Corp. 2000"); -MODULE_SUPPORTED_DEVICE("dasd"); MODULE_LICENSE("GPL"); /* diff --git a/drivers/sbus/char/display7seg.c b/drivers/sbus/char/display7seg.c index 00e72b97d0b6e..d93595b39afa9 100644 --- a/drivers/sbus/char/display7seg.c +++ b/drivers/sbus/char/display7seg.c @@ -50,7 +50,6 @@ MODULE_PARM_DESC(sol_compat, MODULE_AUTHOR("Eric Brower "); MODULE_DESCRIPTION("7-Segment Display driver for Sun Microsystems CP1400/1500"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("d7s"); struct d7s { void __iomem *regs; diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index 38369766511c4..f135a10f582bc 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -80,7 +80,6 @@ MODULE_AUTHOR("Hewlett-Packard Company"); MODULE_DESCRIPTION("Driver for HP Smart Array Controller version " \ HPSA_DRIVER_VERSION); -MODULE_SUPPORTED_DEVICE("HP Smart Array Controllers"); MODULE_VERSION(HPSA_DRIVER_VERSION); MODULE_LICENSE("GPL"); MODULE_ALIAS("cciss"); diff --git a/drivers/scsi/pcmcia/nsp_cs.c b/drivers/scsi/pcmcia/nsp_cs.c index 5d5f50d6a02d7..ac89002646a3e 100644 --- a/drivers/scsi/pcmcia/nsp_cs.c +++ b/drivers/scsi/pcmcia/nsp_cs.c @@ -55,7 +55,6 @@ MODULE_AUTHOR("YOKOTA Hiroshi "); MODULE_DESCRIPTION("WorkBit NinjaSCSI-3 / NinjaSCSI-32Bi(16bit) PCMCIA SCSI host adapter module"); -MODULE_SUPPORTED_DEVICE("sd,sr,sg,st"); MODULE_LICENSE("GPL"); #include "nsp_io.h" diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c index c53f456fbd094..a1dacb6e993e7 100644 --- a/drivers/scsi/smartpqi/smartpqi_init.c +++ b/drivers/scsi/smartpqi/smartpqi_init.c @@ -48,7 +48,6 @@ MODULE_AUTHOR("Microsemi"); MODULE_DESCRIPTION("Driver for Microsemi Smart Family Controller version " DRIVER_VERSION); -MODULE_SUPPORTED_DEVICE("Microsemi Smart Family Controllers"); MODULE_VERSION(DRIVER_VERSION); MODULE_LICENSE("GPL"); diff --git a/drivers/sh/maple/maple.c b/drivers/sh/maple/maple.c index e5d7fb81ad665..bd0fbcdbdefe9 100644 --- a/drivers/sh/maple/maple.c +++ b/drivers/sh/maple/maple.c @@ -30,7 +30,6 @@ MODULE_AUTHOR("Adrian McMenamin "); MODULE_DESCRIPTION("Maple bus driver for Dreamcast"); MODULE_LICENSE("GPL v2"); -MODULE_SUPPORTED_DEVICE("{{SEGA, Dreamcast/Maple}}"); static void maple_dma_handler(struct work_struct *work); static void maple_vblank_handler(struct work_struct *work); diff --git a/drivers/staging/comedi/drivers/vmk80xx.c b/drivers/staging/comedi/drivers/vmk80xx.c index 7956abcbae22b..9f920819cd742 100644 --- a/drivers/staging/comedi/drivers/vmk80xx.c +++ b/drivers/staging/comedi/drivers/vmk80xx.c @@ -877,5 +877,4 @@ module_comedi_usb_driver(vmk80xx_driver, vmk80xx_usb_driver); MODULE_AUTHOR("Manuel Gebele "); MODULE_DESCRIPTION("Velleman USB Board Low-Level Driver"); -MODULE_SUPPORTED_DEVICE("K8055/K8061 aka VM110/VM140"); MODULE_LICENSE("GPL"); diff --git a/drivers/tee/optee/core.c b/drivers/tee/optee/core.c index cf4718c6d35da..319a1e701163b 100644 --- a/drivers/tee/optee/core.c +++ b/drivers/tee/optee/core.c @@ -747,7 +747,6 @@ module_platform_driver(optee_driver); MODULE_AUTHOR("Linaro"); MODULE_DESCRIPTION("OP-TEE driver"); -MODULE_SUPPORTED_DEVICE(""); MODULE_VERSION("1.0"); MODULE_LICENSE("GPL v2"); MODULE_ALIAS("platform:optee"); diff --git a/drivers/tty/serial/icom.c b/drivers/tty/serial/icom.c index 9a872750581c9..94af7a5ea4975 100644 --- a/drivers/tty/serial/icom.c +++ b/drivers/tty/serial/icom.c @@ -1639,8 +1639,6 @@ module_exit(icom_exit); MODULE_AUTHOR("Michael Anderson "); MODULE_DESCRIPTION("IBM iSeries Serial IOA driver"); -MODULE_SUPPORTED_DEVICE - ("IBM iSeries 2745, 2771, 2772, 2742, 2793 and 2805 Communications adapters"); MODULE_LICENSE("GPL"); MODULE_FIRMWARE("icom_call_setup.bin"); MODULE_FIRMWARE("icom_res_dce.bin"); diff --git a/drivers/tty/serial/jsm/jsm_driver.c b/drivers/tty/serial/jsm/jsm_driver.c index cd30da0ef0834..0ea799bf8dbb1 100644 --- a/drivers/tty/serial/jsm/jsm_driver.c +++ b/drivers/tty/serial/jsm/jsm_driver.c @@ -19,7 +19,6 @@ MODULE_AUTHOR("Digi International, https://www.digi.com"); MODULE_DESCRIPTION("Driver for the Digi International Neo and Classic PCI based product line"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("jsm"); #define JSM_DRIVER_NAME "jsm" #define NR_PORTS 32 diff --git a/drivers/usb/misc/ldusb.c b/drivers/usb/misc/ldusb.c index 670e4d91e9cae..dcc88df72df42 100644 --- a/drivers/usb/misc/ldusb.c +++ b/drivers/usb/misc/ldusb.c @@ -117,7 +117,6 @@ MODULE_DEVICE_TABLE(usb, ld_usb_table); MODULE_AUTHOR("Michael Hund "); MODULE_DESCRIPTION("LD USB Driver"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("LD USB Devices"); /* All interrupt in transfers are collected in a ring buffer to * avoid racing conditions and get better performance of the driver. diff --git a/drivers/watchdog/cpu5wdt.c b/drivers/watchdog/cpu5wdt.c index 9867a3a936df2..688b112e712ba 100644 --- a/drivers/watchdog/cpu5wdt.c +++ b/drivers/watchdog/cpu5wdt.c @@ -273,7 +273,6 @@ module_exit(cpu5wdt_exit_module); MODULE_AUTHOR("Heiko Ronsdorf "); MODULE_DESCRIPTION("sma cpu5 watchdog driver"); -MODULE_SUPPORTED_DEVICE("sma cpu5 watchdog"); MODULE_LICENSE("GPL"); module_param_hw(port, int, ioport, 0); diff --git a/drivers/watchdog/cpwd.c b/drivers/watchdog/cpwd.c index 808eeb4779e4d..1eafe0b4d71cb 100644 --- a/drivers/watchdog/cpwd.c +++ b/drivers/watchdog/cpwd.c @@ -172,7 +172,6 @@ MODULE_PARM_DESC(wd2_timeout, "Default watchdog2 timeout in 1/10secs"); MODULE_AUTHOR("Eric Brower "); MODULE_DESCRIPTION("Hardware watchdog driver for Sun Microsystems CP1400/1500"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("watchdog"); static void cpwd_writew(u16 val, void __iomem *addr) { diff --git a/drivers/watchdog/riowd.c b/drivers/watchdog/riowd.c index 7008596a575f9..747e346ed06c5 100644 --- a/drivers/watchdog/riowd.c +++ b/drivers/watchdog/riowd.c @@ -46,7 +46,6 @@ MODULE_AUTHOR("David S. Miller "); MODULE_DESCRIPTION("Hardware watchdog driver for Sun RIO"); -MODULE_SUPPORTED_DEVICE("watchdog"); MODULE_LICENSE("GPL"); #define DRIVER_NAME "riowd" diff --git a/include/linux/module.h b/include/linux/module.h index 59f094fa6f744..da4b6fbe8ebe9 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -30,9 +30,6 @@ #include #include -/* Not Yet Implemented */ -#define MODULE_SUPPORTED_DEVICE(name) - #define MODULE_NAME_LEN MAX_PARAM_PREFIX_LEN struct modversion_info { diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index e48f7ac8a854c..3ddd66e4c29ef 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -702,7 +702,6 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR(BATADV_DRIVER_AUTHOR); MODULE_DESCRIPTION(BATADV_DRIVER_DESC); -MODULE_SUPPORTED_DEVICE(BATADV_DRIVER_DEVICE); MODULE_VERSION(BATADV_SOURCE_VERSION); MODULE_ALIAS_RTNL_LINK("batadv"); MODULE_ALIAS_GENL_FAMILY(BATADV_NL_NAME); diff --git a/sound/drivers/aloop.c b/sound/drivers/aloop.c index 8a24e5ae7cef4..52637180af336 100644 --- a/sound/drivers/aloop.c +++ b/sound/drivers/aloop.c @@ -33,7 +33,6 @@ MODULE_AUTHOR("Jaroslav Kysela "); MODULE_DESCRIPTION("A loopback soundcard"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{ALSA,Loopback soundcard}}"); #define MAX_PCM_SUBSTREAMS 8 diff --git a/sound/drivers/dummy.c b/sound/drivers/dummy.c index 316c9afadefee..01a3eab50d7b9 100644 --- a/sound/drivers/dummy.c +++ b/sound/drivers/dummy.c @@ -25,7 +25,6 @@ MODULE_AUTHOR("Jaroslav Kysela "); MODULE_DESCRIPTION("Dummy soundcard (/dev/null)"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{ALSA,Dummy soundcard}}"); #define MAX_PCM_DEVICES 4 #define MAX_PCM_SUBSTREAMS 128 diff --git a/sound/drivers/mtpav.c b/sound/drivers/mtpav.c index ce5fd17bd7208..df4b7f9cd50f6 100644 --- a/sound/drivers/mtpav.c +++ b/sound/drivers/mtpav.c @@ -53,7 +53,6 @@ MODULE_AUTHOR("Michael T. Mayers"); MODULE_DESCRIPTION("MOTU MidiTimePiece AV multiport MIDI"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{MOTU,MidiTimePiece AV multiport MIDI}}"); // io resources #define MTPAV_IOBASE 0x378 diff --git a/sound/drivers/mts64.c b/sound/drivers/mts64.c index 9c708b693cb33..322d530ab07b8 100644 --- a/sound/drivers/mts64.c +++ b/sound/drivers/mts64.c @@ -37,7 +37,6 @@ MODULE_PARM_DESC(enable, "Enable " CARD_NAME " soundcard."); MODULE_AUTHOR("Matthias Koenig "); MODULE_DESCRIPTION("ESI Miditerminal 4140"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{ESI,Miditerminal 4140}}"); /********************************************************************* * Chip specific diff --git a/sound/drivers/pcsp/pcsp.c b/sound/drivers/pcsp/pcsp.c index fd79e57c85cae..7689fa2f95314 100644 --- a/sound/drivers/pcsp/pcsp.c +++ b/sound/drivers/pcsp/pcsp.c @@ -22,7 +22,6 @@ MODULE_AUTHOR("Stas Sergeev "); MODULE_DESCRIPTION("PC-Speaker driver"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{PC-Speaker, pcsp}}"); MODULE_ALIAS("platform:pcspkr"); static int index = SNDRV_DEFAULT_IDX1; /* Index 0-MAX */ diff --git a/sound/drivers/portman2x4.c b/sound/drivers/portman2x4.c index c876cf9b50053..2f4514ed47c5e 100644 --- a/sound/drivers/portman2x4.c +++ b/sound/drivers/portman2x4.c @@ -57,7 +57,6 @@ MODULE_PARM_DESC(enable, "Enable " CARD_NAME " soundcard."); MODULE_AUTHOR("Levent Guendogdu, Tobias Gehrig, Matthias Koenig"); MODULE_DESCRIPTION("Midiman Portman2x4"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{Midiman,Portman2x4}}"); /********************************************************************* * Chip specific diff --git a/sound/drivers/serial-u16550.c b/sound/drivers/serial-u16550.c index 3947f084dd6be..6d5d1ca59ecfe 100644 --- a/sound/drivers/serial-u16550.c +++ b/sound/drivers/serial-u16550.c @@ -34,7 +34,6 @@ MODULE_DESCRIPTION("MIDI serial u16550"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{ALSA, MIDI serial u16550}}"); #define SNDRV_SERIAL_SOUNDCANVAS 0 /* Roland Soundcanvas; F5 NN selects part */ #define SNDRV_SERIAL_MS124T 1 /* Midiator MS-124T */ diff --git a/sound/drivers/virmidi.c b/sound/drivers/virmidi.c index f1fb68b15498f..4206d93ab47e4 100644 --- a/sound/drivers/virmidi.c +++ b/sound/drivers/virmidi.c @@ -43,7 +43,6 @@ MODULE_AUTHOR("Takashi Iwai "); MODULE_DESCRIPTION("Dummy soundcard for virtual rawmidi devices"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{ALSA,Virtual rawmidi device}}"); #define MAX_MIDI_DEVICES 4 diff --git a/sound/isa/ad1816a/ad1816a.c b/sound/isa/ad1816a/ad1816a.c index ca18fe3ff8a59..f11af983b3b6e 100644 --- a/sound/isa/ad1816a/ad1816a.c +++ b/sound/isa/ad1816a/ad1816a.c @@ -22,13 +22,6 @@ MODULE_AUTHOR("Massimo Piccioni "); MODULE_DESCRIPTION("AD1816A, AD1815"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{Highscreen,Sound-Boostar 16 3D}," - "{Analog Devices,AD1815}," - "{Analog Devices,AD1816A}," - "{TerraTec,Base 64}," - "{TerraTec,AudioSystem EWS64S}," - "{Aztech/Newcom SC-16 3D}," - "{Shark Predator ISA}}"); static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; /* Index 1-MAX */ static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; /* ID for this card */ diff --git a/sound/isa/ad1848/ad1848.c b/sound/isa/ad1848/ad1848.c index 6f221eed44e21..edafb49797e74 100644 --- a/sound/isa/ad1848/ad1848.c +++ b/sound/isa/ad1848/ad1848.c @@ -22,9 +22,6 @@ MODULE_DESCRIPTION(CRD_NAME); MODULE_AUTHOR("Tugrul Galatali , Jaroslav Kysela "); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{Analog Devices,AD1848}," - "{Analog Devices,AD1847}," - "{Crystal Semiconductors,CS4248}}"); static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; /* Index 0-MAX */ static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; /* ID for this card */ diff --git a/sound/isa/als100.c b/sound/isa/als100.c index 1085f5b013180..bacb7a1b930c0 100644 --- a/sound/isa/als100.c +++ b/sound/isa/als100.c @@ -26,17 +26,6 @@ #define PFX "als100: " MODULE_DESCRIPTION("Avance Logic ALS007/ALS1X0"); -MODULE_SUPPORTED_DEVICE("{{Diamond Technologies DT-019X}," - "{Avance Logic ALS-007}}" - "{{Avance Logic,ALS100 - PRO16PNP}," - "{Avance Logic,ALS110}," - "{Avance Logic,ALS120}," - "{Avance Logic,ALS200}," - "{3D Melody,MF1000}," - "{Digimate,3D Sound}," - "{Avance Logic,ALS120}," - "{RTL,RTL3000}}"); - MODULE_AUTHOR("Massimo Piccioni "); MODULE_LICENSE("GPL"); diff --git a/sound/isa/azt2320.c b/sound/isa/azt2320.c index 4ed52094fc8d8..867e9ae8f65aa 100644 --- a/sound/isa/azt2320.c +++ b/sound/isa/azt2320.c @@ -35,11 +35,6 @@ MODULE_AUTHOR("Massimo Piccioni "); MODULE_DESCRIPTION("Aztech Systems AZT2320"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{Aztech Systems,PRO16V}," - "{Aztech Systems,AZT2320}," - "{Aztech Systems,AZT3300}," - "{Aztech Systems,AZT2320}," - "{Aztech Systems,AZT3000}}"); static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; /* Index 0-MAX */ static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; /* ID for this card */ diff --git a/sound/isa/cmi8330.c b/sound/isa/cmi8330.c index 19e258527d690..bc112df10fc57 100644 --- a/sound/isa/cmi8330.c +++ b/sound/isa/cmi8330.c @@ -51,7 +51,6 @@ MODULE_AUTHOR("George Talusan "); MODULE_DESCRIPTION("C-Media CMI8330/CMI8329"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{C-Media,CMI8330,isapnp:{CMI0001,@@@0001,@X@0001}}}"); static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; diff --git a/sound/isa/cs423x/cs4231.c b/sound/isa/cs423x/cs4231.c index c56cbc072918a..ec054b929214e 100644 --- a/sound/isa/cs423x/cs4231.c +++ b/sound/isa/cs423x/cs4231.c @@ -23,7 +23,6 @@ MODULE_DESCRIPTION(CRD_NAME); MODULE_AUTHOR("Jaroslav Kysela "); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{Crystal Semiconductors,CS4231}}"); static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; /* Index 0-MAX */ static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; /* ID for this card */ diff --git a/sound/isa/cs423x/cs4236.c b/sound/isa/cs423x/cs4236.c index 63fb0cb754d07..186d7d4db45ec 100644 --- a/sound/isa/cs423x/cs4236.c +++ b/sound/isa/cs423x/cs4236.c @@ -18,40 +18,6 @@ MODULE_AUTHOR("Jaroslav Kysela "); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Cirrus Logic CS4232-9"); -MODULE_SUPPORTED_DEVICE("{{Turtle Beach,TBS-2000}," - "{Turtle Beach,Tropez Plus}," - "{SIC CrystalWave 32}," - "{Hewlett Packard,Omnibook 5500}," - "{TerraTec,Maestro 32/96}," - "{Philips,PCA70PS}}," - "{{Crystal Semiconductors,CS4235}," - "{Crystal Semiconductors,CS4236}," - "{Crystal Semiconductors,CS4237}," - "{Crystal Semiconductors,CS4238}," - "{Crystal Semiconductors,CS4239}," - "{Acer,AW37}," - "{Acer,AW35/Pro}," - "{Crystal,3D}," - "{Crystal Computer,TidalWave128}," - "{Dell,Optiplex GX1}," - "{Dell,Workstation 400 sound}," - "{EliteGroup,P5TX-LA sound}," - "{Gallant,SC-70P}," - "{Gateway,E1000 Onboard CS4236B}," - "{Genius,Sound Maker 3DJ}," - "{Hewlett Packard,HP6330 sound}," - "{IBM,PC 300PL sound}," - "{IBM,Aptiva 2137 E24}," - "{IBM,IntelliStation M Pro}," - "{Intel,Marlin Spike Mobo CS4235}," - "{Intel PR440FX Onboard}," - "{Guillemot,MaxiSound 16 PnP}," - "{NewClear,3D}," - "{TerraTec,AudioSystem EWS64L/XL}," - "{Typhoon Soundsystem,CS4236B}," - "{Turtle Beach,Malibu}," - "{Unknown,Digital PC 5000 Onboard}}"); - MODULE_ALIAS("snd_cs4232"); #define IDENT "CS4232+" diff --git a/sound/isa/es1688/es1688.c b/sound/isa/es1688/es1688.c index 4a1f61f1a3310..750d4995634ff 100644 --- a/sound/isa/es1688/es1688.c +++ b/sound/isa/es1688/es1688.c @@ -26,11 +26,6 @@ MODULE_DESCRIPTION(CRD_NAME); MODULE_AUTHOR("Jaroslav Kysela "); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{ESS,ES688 PnP AudioDrive,pnp:ESS0100}," - "{ESS,ES1688 PnP AudioDrive,pnp:ESS0102}," - "{ESS,ES688 AudioDrive,pnp:ESS6881}," - "{ESS,ES1688 AudioDrive,pnp:ESS1681}}"); - MODULE_ALIAS("snd_es968"); static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; /* Index 0-MAX */ diff --git a/sound/isa/es18xx.c b/sound/isa/es18xx.c index 9beef80791775..375a4a6a4717f 100644 --- a/sound/isa/es18xx.c +++ b/sound/isa/es18xx.c @@ -1929,17 +1929,9 @@ static int snd_es18xx_mixer(struct snd_card *card) /* Card level */ -MODULE_AUTHOR("Christian Fischbach , Abramo Bagnara "); +MODULE_AUTHOR("Christian Fischbach , Abramo Bagnara "); MODULE_DESCRIPTION("ESS ES18xx AudioDrive"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{ESS,ES1868 PnP AudioDrive}," - "{ESS,ES1869 PnP AudioDrive}," - "{ESS,ES1878 PnP AudioDrive}," - "{ESS,ES1879 PnP AudioDrive}," - "{ESS,ES1887 PnP AudioDrive}," - "{ESS,ES1888 PnP AudioDrive}," - "{ESS,ES1887 AudioDrive}," - "{ESS,ES1888 AudioDrive}}"); static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; /* Index 0-MAX */ static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; /* ID for this card */ diff --git a/sound/isa/gus/gusclassic.c b/sound/isa/gus/gusclassic.c index 015f88a113524..0fba5d8fe84f5 100644 --- a/sound/isa/gus/gusclassic.c +++ b/sound/isa/gus/gusclassic.c @@ -23,7 +23,6 @@ MODULE_DESCRIPTION(CRD_NAME); MODULE_AUTHOR("Jaroslav Kysela "); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{Gravis,UltraSound Classic}}"); static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; /* Index 0-MAX */ static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; /* ID for this card */ diff --git a/sound/isa/gus/gusextreme.c b/sound/isa/gus/gusextreme.c index c9f31b4fb8873..da2b2ca6b7211 100644 --- a/sound/isa/gus/gusextreme.c +++ b/sound/isa/gus/gusextreme.c @@ -27,7 +27,6 @@ MODULE_DESCRIPTION(CRD_NAME); MODULE_AUTHOR("Jaroslav Kysela "); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{Gravis,UltraSound Extreme}}"); static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; /* Index 0-MAX */ static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; /* ID for this card */ diff --git a/sound/isa/gus/gusmax.c b/sound/isa/gus/gusmax.c index dc09fbd6f88da..24b945f1768d0 100644 --- a/sound/isa/gus/gusmax.c +++ b/sound/isa/gus/gusmax.c @@ -21,7 +21,6 @@ MODULE_AUTHOR("Jaroslav Kysela "); MODULE_DESCRIPTION("Gravis UltraSound MAX"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{Gravis,UltraSound MAX}}"); static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; /* Index 0-MAX */ static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; /* ID for this card */ diff --git a/sound/isa/gus/interwave.c b/sound/isa/gus/interwave.c index e4d412e72b75a..99581fba4ca8a 100644 --- a/sound/isa/gus/interwave.c +++ b/sound/isa/gus/interwave.c @@ -28,14 +28,8 @@ MODULE_AUTHOR("Jaroslav Kysela "); MODULE_LICENSE("GPL"); #ifndef SNDRV_STB MODULE_DESCRIPTION("AMD InterWave"); -MODULE_SUPPORTED_DEVICE("{{Gravis,UltraSound Plug & Play}," - "{STB,SoundRage32}," - "{MED,MED3210}," - "{Dynasonix,Dynasonix Pro}," - "{Panasonic,PCA761AW}}"); #else MODULE_DESCRIPTION("AMD InterWave STB with TEA6330T"); -MODULE_SUPPORTED_DEVICE("{{AMD,InterWave STB with TEA6330T}}"); #endif static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; /* Index 0-MAX */ diff --git a/sound/isa/opl3sa2.c b/sound/isa/opl3sa2.c index 7649a8a4128d1..9bde11d1cfe86 100644 --- a/sound/isa/opl3sa2.c +++ b/sound/isa/opl3sa2.c @@ -22,11 +22,6 @@ MODULE_AUTHOR("Jaroslav Kysela "); MODULE_DESCRIPTION("Yamaha OPL3SA2+"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{Yamaha,YMF719E-S}," - "{Genius,Sound Maker 3DX}," - "{Yamaha,OPL3SA3}," - "{Intel,AL440LX sound}," - "{NeoMagic,MagicWave 3DX}}"); static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; /* Index 0-MAX */ static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; /* ID for this card */ diff --git a/sound/isa/opti9xx/miro.c b/sound/isa/opti9xx/miro.c index 20933342f5eb3..a510b201143cf 100644 --- a/sound/isa/opti9xx/miro.c +++ b/sound/isa/opti9xx/miro.c @@ -33,9 +33,6 @@ MODULE_AUTHOR("Martin Langer "); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Miro miroSOUND PCM1 pro, PCM12, PCM20 Radio"); -MODULE_SUPPORTED_DEVICE("{{Miro,miroSOUND PCM1 pro}, " - "{Miro,miroSOUND PCM12}, " - "{Miro,miroSOUND PCM20 Radio}}"); static int index = SNDRV_DEFAULT_IDX1; /* Index 0-MAX */ static char *id = SNDRV_DEFAULT_STR1; /* ID for this card */ diff --git a/sound/isa/opti9xx/opti92x-ad1848.c b/sound/isa/opti9xx/opti92x-ad1848.c index 758f5b579138a..08e61d90057b6 100644 --- a/sound/isa/opti9xx/opti92x-ad1848.c +++ b/sound/isa/opti9xx/opti92x-ad1848.c @@ -36,17 +36,11 @@ MODULE_AUTHOR("Massimo Piccioni "); MODULE_LICENSE("GPL"); #ifdef OPTi93X MODULE_DESCRIPTION("OPTi93X"); -MODULE_SUPPORTED_DEVICE("{{OPTi,82C931/3}}"); #else /* OPTi93X */ #ifdef CS4231 MODULE_DESCRIPTION("OPTi92X - CS4231"); -MODULE_SUPPORTED_DEVICE("{{OPTi,82C924 (CS4231)}," - "{OPTi,82C925 (CS4231)}}"); #else /* CS4231 */ MODULE_DESCRIPTION("OPTi92X - AD1848"); -MODULE_SUPPORTED_DEVICE("{{OPTi,82C924 (AD1848)}," - "{OPTi,82C925 (AD1848)}," - "{OAK,Mozart}}"); #endif /* CS4231 */ #endif /* OPTi93X */ diff --git a/sound/isa/sb/jazz16.c b/sound/isa/sb/jazz16.c index 0e2e0ab3b9e4c..7ba5dd1ec810d 100644 --- a/sound/isa/sb/jazz16.c +++ b/sound/isa/sb/jazz16.c @@ -28,9 +28,6 @@ #define PFX "jazz16: " MODULE_DESCRIPTION("Media Vision Jazz16"); -MODULE_SUPPORTED_DEVICE("{{Media Vision ??? }," - "{RTL,RTL3000}}"); - MODULE_AUTHOR("Krzysztof Helt "); MODULE_LICENSE("GPL"); diff --git a/sound/isa/sb/sb16.c b/sound/isa/sb/sb16.c index db284b7b88a73..63ef960abd25f 100644 --- a/sound/isa/sb/sb16.c +++ b/sound/isa/sb/sb16.c @@ -31,16 +31,8 @@ MODULE_AUTHOR("Jaroslav Kysela "); MODULE_LICENSE("GPL"); #ifndef SNDRV_SBAWE MODULE_DESCRIPTION("Sound Blaster 16"); -MODULE_SUPPORTED_DEVICE("{{Creative Labs,SB 16}," - "{Creative Labs,SB Vibra16S}," - "{Creative Labs,SB Vibra16C}," - "{Creative Labs,SB Vibra16CL}," - "{Creative Labs,SB Vibra16X}}"); #else MODULE_DESCRIPTION("Sound Blaster AWE"); -MODULE_SUPPORTED_DEVICE("{{Creative Labs,SB AWE 32}," - "{Creative Labs,SB AWE 64}," - "{Creative Labs,SB AWE 64 Gold}}"); #endif #if 0 diff --git a/sound/isa/sb/sb8.c b/sound/isa/sb/sb8.c index 8e3e67b9a3414..6c9d534ce8b61 100644 --- a/sound/isa/sb/sb8.c +++ b/sound/isa/sb/sb8.c @@ -17,7 +17,6 @@ MODULE_AUTHOR("Jaroslav Kysela "); MODULE_DESCRIPTION("Sound Blaster 1.0/2.0/Pro"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{Creative Labs,SB 1.0/SB 2.0/SB Pro}}"); static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; /* Index 0-MAX */ static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; /* ID for this card */ diff --git a/sound/isa/sc6000.c b/sound/isa/sc6000.c index def1375797171..3462663050bb5 100644 --- a/sound/isa/sc6000.c +++ b/sound/isa/sc6000.c @@ -29,9 +29,6 @@ MODULE_AUTHOR("Krzysztof Helt"); MODULE_DESCRIPTION("Gallant SC-6000"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{Gallant, SC-6000}," - "{AudioExcel, Audio Excel DSP 16}," - "{Zoltrix, AV302}}"); static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; /* Index 0-MAX */ static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; /* ID for this card */ diff --git a/sound/isa/wavefront/wavefront.c b/sound/isa/wavefront/wavefront.c index b750a4fd40de7..a4437971df2fd 100644 --- a/sound/isa/wavefront/wavefront.c +++ b/sound/isa/wavefront/wavefront.c @@ -21,7 +21,6 @@ MODULE_AUTHOR("Paul Barton-Davis "); MODULE_DESCRIPTION("Turtle Beach Wavefront"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{Turtle Beach,Maui/Tropez/Tropez+}}"); static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; /* Index 0-MAX */ static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; /* ID for this card */ diff --git a/sound/mips/sgio2audio.c b/sound/mips/sgio2audio.c index 5bf1ea150f264..989f656e2de7d 100644 --- a/sound/mips/sgio2audio.c +++ b/sound/mips/sgio2audio.c @@ -32,7 +32,6 @@ MODULE_AUTHOR("Vivien Chappelier "); MODULE_DESCRIPTION("SGI O2 Audio"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{Silicon Graphics, O2 Audio}}"); static int index = SNDRV_DEFAULT_IDX1; /* Index 0-MAX */ static char *id = SNDRV_DEFAULT_STR1; /* ID for this card */ diff --git a/sound/pci/ad1889.c b/sound/pci/ad1889.c index 5d835d2af0549..4520022801d90 100644 --- a/sound/pci/ad1889.c +++ b/sound/pci/ad1889.c @@ -43,7 +43,6 @@ MODULE_AUTHOR("Kyle McMartin , Thibaut Varene "); MODULE_DESCRIPTION("Analog Devices AD1889 ALSA sound driver"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{Analog Devices,AD1889}}"); static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; module_param_array(index, int, NULL, 0444); diff --git a/sound/pci/ali5451/ali5451.c b/sound/pci/ali5451/ali5451.c index 51f24796f03fa..0d66b92466d54 100644 --- a/sound/pci/ali5451/ali5451.c +++ b/sound/pci/ali5451/ali5451.c @@ -29,7 +29,6 @@ MODULE_AUTHOR("Matt Wu "); MODULE_DESCRIPTION("ALI M5451"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{ALI,M5451,pci},{ALI,M5451}}"); static int index = SNDRV_DEFAULT_IDX1; /* Index */ static char *id = SNDRV_DEFAULT_STR1; /* ID for this card */ diff --git a/sound/pci/als300.c b/sound/pci/als300.c index 1dc8c4ed0592b..bd4fd09e982b6 100644 --- a/sound/pci/als300.c +++ b/sound/pci/als300.c @@ -86,7 +86,6 @@ enum {DEVICE_ALS300, DEVICE_ALS300_PLUS}; MODULE_AUTHOR("Ash Willis "); MODULE_DESCRIPTION("Avance Logic ALS300"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{Avance Logic,ALS300},{Avance Logic,ALS300+}}"); static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; diff --git a/sound/pci/als4000.c b/sound/pci/als4000.c index 2edc7455285a4..139ac2a3a0efa 100644 --- a/sound/pci/als4000.c +++ b/sound/pci/als4000.c @@ -68,7 +68,6 @@ MODULE_AUTHOR("Bart Hartgers , Andreas Mohr"); MODULE_DESCRIPTION("Avance Logic ALS4000"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{Avance Logic,ALS4000}}"); #if IS_REACHABLE(CONFIG_GAMEPORT) #define SUPPORT_JOYSTICK 1 diff --git a/sound/pci/atiixp.c b/sound/pci/atiixp.c index a25d754558028..579425ccbb6a8 100644 --- a/sound/pci/atiixp.c +++ b/sound/pci/atiixp.c @@ -23,7 +23,6 @@ MODULE_AUTHOR("Takashi Iwai "); MODULE_DESCRIPTION("ATI IXP AC97 controller"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{ATI,IXP150/200/250/300/400/600}}"); static int index = SNDRV_DEFAULT_IDX1; /* Index 0-MAX */ static char *id = SNDRV_DEFAULT_STR1; /* ID for this card */ diff --git a/sound/pci/atiixp_modem.c b/sound/pci/atiixp_modem.c index ae88217d685a9..45e75afec7a0c 100644 --- a/sound/pci/atiixp_modem.c +++ b/sound/pci/atiixp_modem.c @@ -23,7 +23,6 @@ MODULE_AUTHOR("Takashi Iwai "); MODULE_DESCRIPTION("ATI IXP MC97 controller"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{ATI,IXP150/200/250}}"); static int index = -2; /* Exclude the first card */ static char *id = SNDRV_DEFAULT_STR1; /* ID for this card */ diff --git a/sound/pci/au88x0/au88x0.c b/sound/pci/au88x0/au88x0.c index 5dd98e6ff34b9..1b37b7225b1dc 100644 --- a/sound/pci/au88x0/au88x0.c +++ b/sound/pci/au88x0/au88x0.c @@ -41,8 +41,6 @@ MODULE_PARM_DESC(pcifix, "Enable VIA-workaround for " CARD_NAME " soundcard."); MODULE_DESCRIPTION("Aureal vortex"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{Aureal Semiconductor Inc., Aureal Vortex Sound Processor}}"); - MODULE_DEVICE_TABLE(pci, snd_vortex_ids); static void vortex_fix_latency(struct pci_dev *vortex) diff --git a/sound/pci/azt3328.c b/sound/pci/azt3328.c index 2ac594dcf21c8..51dcf1bc4c0cf 100644 --- a/sound/pci/azt3328.c +++ b/sound/pci/azt3328.c @@ -196,7 +196,6 @@ MODULE_AUTHOR("Andreas Mohr "); MODULE_DESCRIPTION("Aztech AZF3328 (PCI168)"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{Aztech,AZF3328}}"); #if IS_REACHABLE(CONFIG_GAMEPORT) #define SUPPORT_GAMEPORT 1 diff --git a/sound/pci/bt87x.c b/sound/pci/bt87x.c index cf9f8d80a0b61..91512b345d19b 100644 --- a/sound/pci/bt87x.c +++ b/sound/pci/bt87x.c @@ -23,8 +23,6 @@ MODULE_AUTHOR("Clemens Ladisch "); MODULE_DESCRIPTION("Brooktree Bt87x audio driver"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{Brooktree,Bt878}," - "{Brooktree,Bt879}}"); static int index[SNDRV_CARDS] = {[0 ... (SNDRV_CARDS - 1)] = -2}; /* Exclude the first card */ static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; /* ID for this card */ diff --git a/sound/pci/ca0106/ca0106_main.c b/sound/pci/ca0106/ca0106_main.c index ee20f9a1aae98..bee4710916c47 100644 --- a/sound/pci/ca0106/ca0106_main.c +++ b/sound/pci/ca0106/ca0106_main.c @@ -137,7 +137,6 @@ MODULE_AUTHOR("James Courtier-Dutton "); MODULE_DESCRIPTION("CA0106"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{Creative,SB CA0106 chip}}"); // module parameters (see "Module Parameters") static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; diff --git a/sound/pci/cmipci.c b/sound/pci/cmipci.c index 7363d61eaec23..598446348da63 100644 --- a/sound/pci/cmipci.c +++ b/sound/pci/cmipci.c @@ -30,10 +30,6 @@ MODULE_AUTHOR("Takashi Iwai "); MODULE_DESCRIPTION("C-Media CMI8x38 PCI"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{C-Media,CMI8738}," - "{C-Media,CMI8738B}," - "{C-Media,CMI8338A}," - "{C-Media,CMI8338B}}"); #if IS_REACHABLE(CONFIG_GAMEPORT) #define SUPPORT_JOYSTICK 1 diff --git a/sound/pci/cs4281.c b/sound/pci/cs4281.c index 94d2a6a466a8c..bf3bb70ffaf92 100644 --- a/sound/pci/cs4281.c +++ b/sound/pci/cs4281.c @@ -25,7 +25,6 @@ MODULE_AUTHOR("Jaroslav Kysela "); MODULE_DESCRIPTION("Cirrus Logic CS4281"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{Cirrus Logic,CS4281}}"); static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; /* Index 0-MAX */ static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; /* ID for this card */ diff --git a/sound/pci/cs46xx/cs46xx.c b/sound/pci/cs46xx/cs46xx.c index a6e0a44393323..1db7b41128402 100644 --- a/sound/pci/cs46xx/cs46xx.c +++ b/sound/pci/cs46xx/cs46xx.c @@ -21,13 +21,6 @@ MODULE_AUTHOR("Jaroslav Kysela "); MODULE_DESCRIPTION("Cirrus Logic Sound Fusion CS46XX"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{Cirrus Logic,Sound Fusion (CS4280)}," - "{Cirrus Logic,Sound Fusion (CS4610)}," - "{Cirrus Logic,Sound Fusion (CS4612)}," - "{Cirrus Logic,Sound Fusion (CS4615)}," - "{Cirrus Logic,Sound Fusion (CS4622)}," - "{Cirrus Logic,Sound Fusion (CS4624)}," - "{Cirrus Logic,Sound Fusion (CS4630)}}"); static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; /* Index 0-MAX */ static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; /* ID for this card */ diff --git a/sound/pci/cs5535audio/cs5535audio.c b/sound/pci/cs5535audio/cs5535audio.c index 359bc6af86708..9b716b56d7392 100644 --- a/sound/pci/cs5535audio/cs5535audio.c +++ b/sound/pci/cs5535audio/cs5535audio.c @@ -393,4 +393,3 @@ module_pci_driver(cs5535audio_driver); MODULE_AUTHOR("Jaya Kumar"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("CS5535 Audio"); -MODULE_SUPPORTED_DEVICE("CS5535 Audio"); diff --git a/sound/pci/ctxfi/xfi.c b/sound/pci/ctxfi/xfi.c index 8c07c6463c246..713d36ea40cb5 100644 --- a/sound/pci/ctxfi/xfi.c +++ b/sound/pci/ctxfi/xfi.c @@ -18,7 +18,6 @@ MODULE_AUTHOR("Creative Technology Ltd"); MODULE_DESCRIPTION("X-Fi driver version 1.03"); MODULE_LICENSE("GPL v2"); -MODULE_SUPPORTED_DEVICE("{{Creative Labs, Sound Blaster X-Fi}"); static unsigned int reference_rate = 48000; static unsigned int multiple = 2; diff --git a/sound/pci/echoaudio/echoaudio.c b/sound/pci/echoaudio/echoaudio.c index a20b2bb5c898d..9bd67ac336575 100644 --- a/sound/pci/echoaudio/echoaudio.c +++ b/sound/pci/echoaudio/echoaudio.c @@ -10,7 +10,6 @@ MODULE_AUTHOR("Giuliano Pochini "); MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("Echoaudio " ECHOCARD_NAME " soundcards driver"); -MODULE_SUPPORTED_DEVICE("{{Echoaudio," ECHOCARD_NAME "}}"); MODULE_DEVICE_TABLE(pci, snd_echo_ids); static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; diff --git a/sound/pci/emu10k1/emu10k1.c b/sound/pci/emu10k1/emu10k1.c index 353934c88cbd8..45833bc2a7e76 100644 --- a/sound/pci/emu10k1/emu10k1.c +++ b/sound/pci/emu10k1/emu10k1.c @@ -18,8 +18,6 @@ MODULE_AUTHOR("Jaroslav Kysela "); MODULE_DESCRIPTION("EMU10K1"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{Creative Labs,SB Live!/PCI512/E-mu APS}," - "{Creative Labs,SB Audigy}}"); #if IS_ENABLED(CONFIG_SND_SEQUENCER) #define ENABLE_SYNTH diff --git a/sound/pci/emu10k1/emu10k1x.c b/sound/pci/emu10k1/emu10k1x.c index 785ec0cf3933a..d9a12cd016478 100644 --- a/sound/pci/emu10k1/emu10k1x.c +++ b/sound/pci/emu10k1/emu10k1x.c @@ -31,7 +31,6 @@ MODULE_AUTHOR("Francisco Moraes "); MODULE_DESCRIPTION("EMU10K1X"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{Dell Creative Labs,SB Live!}"); // module parameters (see "Module Parameters") static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; diff --git a/sound/pci/ens1370.c b/sound/pci/ens1370.c index 93c4fd3133115..3ccccdbc00298 100644 --- a/sound/pci/ens1370.c +++ b/sound/pci/ens1370.c @@ -52,17 +52,9 @@ MODULE_AUTHOR("Jaroslav Kysela , Thomas Sailer "); MODULE_DESCRIPTION("ESS Solo-1"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{ESS,ES1938}," - "{ESS,ES1946}," - "{ESS,ES1969}," - "{TerraTec,128i PCI}}"); #if IS_REACHABLE(CONFIG_GAMEPORT) #define SUPPORT_JOYSTICK 1 diff --git a/sound/pci/es1968.c b/sound/pci/es1968.c index 747fa69bb1c90..5fa1861236f59 100644 --- a/sound/pci/es1968.c +++ b/sound/pci/es1968.c @@ -107,10 +107,6 @@ MODULE_DESCRIPTION("ESS Maestro"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{ESS,Maestro 2e}," - "{ESS,Maestro 2}," - "{ESS,Maestro 1}," - "{TerraTec,DMX}}"); #if IS_REACHABLE(CONFIG_GAMEPORT) #define SUPPORT_JOYSTICK 1 diff --git a/sound/pci/fm801.c b/sound/pci/fm801.c index c6ad6235a6699..6279eb156e36d 100644 --- a/sound/pci/fm801.c +++ b/sound/pci/fm801.c @@ -26,8 +26,6 @@ MODULE_AUTHOR("Jaroslav Kysela "); MODULE_DESCRIPTION("ForteMedia FM801"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{ForteMedia,FM801}," - "{Genius,SoundMaker Live 5.1}}"); static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; /* Index 0-MAX */ static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; /* ID for this card */ diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 5eea130dcf0a5..b59b0f323d4e0 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -208,40 +208,6 @@ MODULE_PARM_DESC(snoop, "Enable/disable snooping"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{Intel, ICH6}," - "{Intel, ICH6M}," - "{Intel, ICH7}," - "{Intel, ESB2}," - "{Intel, ICH8}," - "{Intel, ICH9}," - "{Intel, ICH10}," - "{Intel, PCH}," - "{Intel, CPT}," - "{Intel, PPT}," - "{Intel, LPT}," - "{Intel, LPT_LP}," - "{Intel, WPT_LP}," - "{Intel, SPT}," - "{Intel, SPT_LP}," - "{Intel, HPT}," - "{Intel, PBG}," - "{Intel, SCH}," - "{ATI, SB450}," - "{ATI, SB600}," - "{ATI, RS600}," - "{ATI, RS690}," - "{ATI, RS780}," - "{ATI, R600}," - "{ATI, RV630}," - "{ATI, RV610}," - "{ATI, RV670}," - "{ATI, RV635}," - "{ATI, RV620}," - "{ATI, RV770}," - "{VIA, VT8251}," - "{VIA, VT8237A}," - "{SiS, SIS966}," - "{ULI, M5461}}"); MODULE_DESCRIPTION("Intel HDA driver"); #if defined(CONFIG_PM) && defined(CONFIG_VGA_SWITCHEROO) diff --git a/sound/pci/ice1712/ice1712.c b/sound/pci/ice1712/ice1712.c index f814dbbec2a43..d54cd5143e9fb 100644 --- a/sound/pci/ice1712/ice1712.c +++ b/sound/pci/ice1712/ice1712.c @@ -60,12 +60,6 @@ MODULE_AUTHOR("Jaroslav Kysela "); MODULE_DESCRIPTION("ICEnsemble ICE1712 (Envy24)"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{" - HOONTECH_DEVICE_DESC - DELTA_DEVICE_DESC - EWS_DEVICE_DESC - "{ICEnsemble,Generic ICE1712}," - "{ICEnsemble,Generic Envy24}}"); static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; /* Index 0-MAX */ static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; /* ID for this card */ diff --git a/sound/pci/ice1712/ice1724.c b/sound/pci/ice1712/ice1724.c index c0fca94c1dd27..ef2367d861484 100644 --- a/sound/pci/ice1712/ice1724.c +++ b/sound/pci/ice1712/ice1724.c @@ -44,25 +44,6 @@ MODULE_AUTHOR("Jaroslav Kysela "); MODULE_DESCRIPTION("VIA ICEnsemble ICE1724/1720 (Envy24HT/PT)"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{" - REVO_DEVICE_DESC - AMP_AUDIO2000_DEVICE_DESC - AUREON_DEVICE_DESC - VT1720_MOBO_DEVICE_DESC - PONTIS_DEVICE_DESC - PRODIGY192_DEVICE_DESC - PRODIGY_HIFI_DEVICE_DESC - JULI_DEVICE_DESC - MAYA44_DEVICE_DESC - PHASE_DEVICE_DESC - WTM_DEVICE_DESC - SE_DEVICE_DESC - QTET_DEVICE_DESC - "{VIA,VT1720}," - "{VIA,VT1724}," - "{ICEnsemble,Generic ICE1724}," - "{ICEnsemble,Generic Envy24HT}" - "{ICEnsemble,Generic Envy24PT}}"); static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; /* Index 0-MAX */ static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; /* ID for this card */ diff --git a/sound/pci/intel8x0.c b/sound/pci/intel8x0.c index 3349e455a871a..35903d1a1cbd2 100644 --- a/sound/pci/intel8x0.c +++ b/sound/pci/intel8x0.c @@ -27,29 +27,6 @@ MODULE_AUTHOR("Jaroslav Kysela "); MODULE_DESCRIPTION("Intel 82801AA,82901AB,i810,i820,i830,i840,i845,MX440; SiS 7012; Ali 5455"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{Intel,82801AA-ICH}," - "{Intel,82901AB-ICH0}," - "{Intel,82801BA-ICH2}," - "{Intel,82801CA-ICH3}," - "{Intel,82801DB-ICH4}," - "{Intel,ICH5}," - "{Intel,ICH6}," - "{Intel,ICH7}," - "{Intel,6300ESB}," - "{Intel,ESB2}," - "{Intel,MX440}," - "{SiS,SI7012}," - "{NVidia,nForce Audio}," - "{NVidia,nForce2 Audio}," - "{NVidia,nForce3 Audio}," - "{NVidia,MCP04}," - "{NVidia,MCP501}," - "{NVidia,CK804}," - "{NVidia,CK8}," - "{NVidia,CK8S}," - "{AMD,AMD768}," - "{AMD,AMD8111}," - "{ALI,M5455}}"); static int index = SNDRV_DEFAULT_IDX1; /* Index 0-MAX */ static char *id = SNDRV_DEFAULT_STR1; /* ID for this card */ diff --git a/sound/pci/intel8x0m.c b/sound/pci/intel8x0m.c index 19872cecc9d2e..13ef838b26c19 100644 --- a/sound/pci/intel8x0m.c +++ b/sound/pci/intel8x0m.c @@ -25,21 +25,6 @@ MODULE_AUTHOR("Jaroslav Kysela "); MODULE_DESCRIPTION("Intel 82801AA,82901AB,i810,i820,i830,i840,i845,MX440; " "SiS 7013; NVidia MCP/2/2S/3 modems"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{Intel,82801AA-ICH}," - "{Intel,82901AB-ICH0}," - "{Intel,82801BA-ICH2}," - "{Intel,82801CA-ICH3}," - "{Intel,82801DB-ICH4}," - "{Intel,ICH5}," - "{Intel,ICH6}," - "{Intel,ICH7}," - "{Intel,MX440}," - "{SiS,7013}," - "{NVidia,NForce Modem}," - "{NVidia,NForce2 Modem}," - "{NVidia,NForce2s Modem}," - "{NVidia,NForce3 Modem}," - "{AMD,AMD768}}"); static int index = -2; /* Exclude the first card */ static char *id = SNDRV_DEFAULT_STR1; /* ID for this card */ diff --git a/sound/pci/korg1212/korg1212.c b/sound/pci/korg1212/korg1212.c index 2eddd9de9e6da..80ac3c6152ad8 100644 --- a/sound/pci/korg1212/korg1212.c +++ b/sound/pci/korg1212/korg1212.c @@ -388,7 +388,6 @@ struct snd_korg1212 { MODULE_DESCRIPTION("korg1212"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{KORG,korg1212}}"); MODULE_FIRMWARE("korg/k1212.dsp"); static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; /* Index 0-MAX */ diff --git a/sound/pci/lola/lola.c b/sound/pci/lola/lola.c index 491c90f83fbc0..03b4be44bb260 100644 --- a/sound/pci/lola/lola.c +++ b/sound/pci/lola/lola.c @@ -54,7 +54,6 @@ MODULE_PARM_DESC(sample_rate_min, "Minimal sample rate"); */ MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{Digigram, Lola}}"); MODULE_DESCRIPTION("Digigram Lola driver"); MODULE_AUTHOR("Takashi Iwai "); diff --git a/sound/pci/lx6464es/lx6464es.c b/sound/pci/lx6464es/lx6464es.c index b92ea074ff2af..1be97c38bc718 100644 --- a/sound/pci/lx6464es/lx6464es.c +++ b/sound/pci/lx6464es/lx6464es.c @@ -21,8 +21,6 @@ MODULE_AUTHOR("Tim Blechmann"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("digigram lx6464es"); -MODULE_SUPPORTED_DEVICE("{digigram lx6464es{}}"); - static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; diff --git a/sound/pci/maestro3.c b/sound/pci/maestro3.c index d2c2cd6006f04..cdc4b61062526 100644 --- a/sound/pci/maestro3.c +++ b/sound/pci/maestro3.c @@ -39,11 +39,6 @@ MODULE_AUTHOR("Zach Brown , Takashi Iwai "); MODULE_DESCRIPTION("ESS Maestro3 PCI"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{ESS,Maestro3 PCI}," - "{ESS,ES1988}," - "{ESS,Allegro PCI}," - "{ESS,Allegro-1 PCI}," - "{ESS,Canyon3D-2/LE PCI}}"); MODULE_FIRMWARE("ess/maestro3_assp_kernel.fw"); MODULE_FIRMWARE("ess/maestro3_assp_minisrc.fw"); diff --git a/sound/pci/mixart/mixart.c b/sound/pci/mixart/mixart.c index efff220b26ea7..a0bbb386dc252 100644 --- a/sound/pci/mixart/mixart.c +++ b/sound/pci/mixart/mixart.c @@ -32,7 +32,6 @@ MODULE_AUTHOR("Digigram "); MODULE_DESCRIPTION("Digigram " CARD_NAME); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{Digigram," CARD_NAME "}}"); static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; /* Index 0-MAX */ static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; /* ID for this card */ diff --git a/sound/pci/nm256/nm256.c b/sound/pci/nm256/nm256.c index 975994623c2ce..6cb689aa28c21 100644 --- a/sound/pci/nm256/nm256.c +++ b/sound/pci/nm256/nm256.c @@ -32,8 +32,6 @@ MODULE_AUTHOR("Takashi Iwai "); MODULE_DESCRIPTION("NeoMagic NM256AV/ZX"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{NeoMagic,NM256AV}," - "{NeoMagic,NM256ZX}}"); /* * some compile conditions. diff --git a/sound/pci/oxygen/oxygen.c b/sound/pci/oxygen/oxygen.c index a751fcce7c8eb..e335c4b5b3816 100644 --- a/sound/pci/oxygen/oxygen.c +++ b/sound/pci/oxygen/oxygen.c @@ -56,9 +56,6 @@ MODULE_AUTHOR("Clemens Ladisch "); MODULE_DESCRIPTION("C-Media CMI8788 driver"); MODULE_LICENSE("GPL v2"); -MODULE_SUPPORTED_DEVICE("{{C-Media,CMI8786}" - ",{C-Media,CMI8787}" - ",{C-Media,CMI8788}}"); static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; diff --git a/sound/pci/oxygen/se6x.c b/sound/pci/oxygen/se6x.c index 78c35a0a54776..434f885f6f918 100644 --- a/sound/pci/oxygen/se6x.c +++ b/sound/pci/oxygen/se6x.c @@ -29,7 +29,6 @@ MODULE_AUTHOR("Clemens Ladisch "); MODULE_DESCRIPTION("Studio Evolution SE6X driver"); MODULE_LICENSE("GPL v2"); -MODULE_SUPPORTED_DEVICE("{{Studio Evolution,SE6X}}"); static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; diff --git a/sound/pci/oxygen/virtuoso.c b/sound/pci/oxygen/virtuoso.c index 98ab163298274..baa3244d4dabe 100644 --- a/sound/pci/oxygen/virtuoso.c +++ b/sound/pci/oxygen/virtuoso.c @@ -16,7 +16,6 @@ MODULE_AUTHOR("Clemens Ladisch "); MODULE_DESCRIPTION("Asus Virtuoso driver"); MODULE_LICENSE("GPL v2"); -MODULE_SUPPORTED_DEVICE("{{Asus,AV66},{Asus,AV100},{Asus,AV200}}"); static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; diff --git a/sound/pci/pcxhr/pcxhr.c b/sound/pci/pcxhr/pcxhr.c index c2e4831c3a13f..751f9744b089f 100644 --- a/sound/pci/pcxhr/pcxhr.c +++ b/sound/pci/pcxhr/pcxhr.c @@ -35,7 +35,6 @@ MODULE_AUTHOR("Markus Bollinger , " "Marc Titinger "); MODULE_DESCRIPTION("Digigram " DRIVER_NAME " " PCXHR_DRIVER_VERSION_STRING); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{Digigram," DRIVER_NAME "}}"); static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; /* Index 0-MAX */ static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; /* ID for this card */ diff --git a/sound/pci/riptide/riptide.c b/sound/pci/riptide/riptide.c index fcc2073c5025c..56827db97239f 100644 --- a/sound/pci/riptide/riptide.c +++ b/sound/pci/riptide/riptide.c @@ -103,7 +103,6 @@ MODULE_AUTHOR("Peter Gruber "); MODULE_DESCRIPTION("riptide"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{Conexant,Riptide}}"); MODULE_FIRMWARE("riptide.hex"); static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; diff --git a/sound/pci/rme32.c b/sound/pci/rme32.c index 4eabece4dcbaf..54f3e39f97f5c 100644 --- a/sound/pci/rme32.c +++ b/sound/pci/rme32.c @@ -88,7 +88,6 @@ MODULE_PARM_DESC(fullduplex, "Support full-duplex mode."); MODULE_AUTHOR("Martin Langer , Pilo Chambert "); MODULE_DESCRIPTION("RME Digi32, Digi32/8, Digi32 PRO"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{RME,Digi32}," "{RME,Digi32/8}," "{RME,Digi32 PRO}}"); /* Defines for RME Digi32 series */ #define RME32_SPDIF_NCHANNELS 2 diff --git a/sound/pci/rme96.c b/sound/pci/rme96.c index 84eef6a3739f5..66082e9f526dc 100644 --- a/sound/pci/rme96.c +++ b/sound/pci/rme96.c @@ -31,11 +31,6 @@ MODULE_AUTHOR("Anders Torger "); MODULE_DESCRIPTION("RME Digi96, Digi96/8, Digi96/8 PRO, Digi96/8 PST, " "Digi96/8 PAD"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{RME,Digi96}," - "{RME,Digi96/8}," - "{RME,Digi96/8 PRO}," - "{RME,Digi96/8 PST}," - "{RME,Digi96/8 PAD}}"); static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; /* Index 0-MAX */ static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; /* ID for this card */ diff --git a/sound/pci/rme9652/hdsp.c b/sound/pci/rme9652/hdsp.c index 6d9029333a124..4cf879c42dc4c 100644 --- a/sound/pci/rme9652/hdsp.c +++ b/sound/pci/rme9652/hdsp.c @@ -44,9 +44,6 @@ MODULE_PARM_DESC(enable, "Enable/disable specific Hammerfall DSP soundcards."); MODULE_AUTHOR("Paul Davis , Marcus Andersson, Thomas Charbonnel "); MODULE_DESCRIPTION("RME Hammerfall DSP"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{RME Hammerfall-DSP}," - "{RME HDSP-9652}," - "{RME HDSP-9632}}"); MODULE_FIRMWARE("rpm_firmware.bin"); MODULE_FIRMWARE("multiface_firmware.bin"); MODULE_FIRMWARE("multiface_firmware_rev11.bin"); diff --git a/sound/pci/rme9652/hdspm.c b/sound/pci/rme9652/hdspm.c index b66711574b1a6..8d900c132f0f4 100644 --- a/sound/pci/rme9652/hdspm.c +++ b/sound/pci/rme9652/hdspm.c @@ -165,7 +165,6 @@ MODULE_AUTHOR ); MODULE_DESCRIPTION("RME HDSPM"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{RME HDSPM-MADI}}"); /* --- Write registers. --- These are defined as byte-offsets from the iobase value. */ diff --git a/sound/pci/rme9652/rme9652.c b/sound/pci/rme9652/rme9652.c index 012fbec5e6a74..4df992e846f23 100644 --- a/sound/pci/rme9652/rme9652.c +++ b/sound/pci/rme9652/rme9652.c @@ -39,8 +39,6 @@ MODULE_PARM_DESC(precise_ptr, "Enable precise pointer (doesn't work reliably).") MODULE_AUTHOR("Paul Davis , Winfried Ritsch"); MODULE_DESCRIPTION("RME Digi9652/Digi9636"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{RME,Hammerfall}," - "{RME,Hammerfall-Light}}"); /* The Hammerfall has two sets of 24 ADAT + 2 S/PDIF channels, one for capture, one for playback. Both the ADAT and S/PDIF channels appear diff --git a/sound/pci/sis7019.c b/sound/pci/sis7019.c index 8ffa2f53c0b58..00ab51c889da2 100644 --- a/sound/pci/sis7019.c +++ b/sound/pci/sis7019.c @@ -24,7 +24,6 @@ MODULE_AUTHOR("David Dillow "); MODULE_DESCRIPTION("SiS7019"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{SiS,SiS7019 Audio Accelerator}}"); static int index = SNDRV_DEFAULT_IDX1; /* Index 0-MAX */ static char *id = SNDRV_DEFAULT_STR1; /* ID for this card */ diff --git a/sound/pci/sonicvibes.c b/sound/pci/sonicvibes.c index 26fd1d08c179f..7de10997775f5 100644 --- a/sound/pci/sonicvibes.c +++ b/sound/pci/sonicvibes.c @@ -29,7 +29,6 @@ MODULE_AUTHOR("Jaroslav Kysela "); MODULE_DESCRIPTION("S3 SonicVibes PCI"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{S3,SonicVibes PCI}}"); #if IS_REACHABLE(CONFIG_GAMEPORT) #define SUPPORT_JOYSTICK 1 diff --git a/sound/pci/trident/trident.c b/sound/pci/trident/trident.c index 5bc79da6e35e1..a51041205f7c1 100644 --- a/sound/pci/trident/trident.c +++ b/sound/pci/trident/trident.c @@ -17,18 +17,6 @@ MODULE_AUTHOR("Jaroslav Kysela , "); MODULE_DESCRIPTION("Trident 4D-WaveDX/NX & SiS SI7018"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{Trident,4DWave DX}," - "{Trident,4DWave NX}," - "{SiS,SI7018 PCI Audio}," - "{Best Union,Miss Melody 4DWave PCI}," - "{HIS,4DWave PCI}," - "{Warpspeed,ONSpeed 4DWave PCI}," - "{Aztech Systems,PCI 64-Q3D}," - "{Addonics,SV 750}," - "{CHIC,True Sound 4Dwave}," - "{Shark,Predator4D-PCI}," - "{Jaton,SonicWave 4D}," - "{Hoontech,SoundTrack Digital 4DWave NX}}"); static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; /* Index 0-MAX */ static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; /* ID for this card */ diff --git a/sound/pci/via82xx.c b/sound/pci/via82xx.c index 154d88ce88131..fd1f2f9cfbc32 100644 --- a/sound/pci/via82xx.c +++ b/sound/pci/via82xx.c @@ -56,7 +56,6 @@ MODULE_AUTHOR("Jaroslav Kysela "); MODULE_DESCRIPTION("VIA VT82xx audio"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{VIA,VT82C686A/B/C,pci},{VIA,VT8233A/C,8235}}"); #if IS_REACHABLE(CONFIG_GAMEPORT) #define SUPPORT_JOYSTICK 1 diff --git a/sound/pci/via82xx_modem.c b/sound/pci/via82xx_modem.c index addfa196df21d..30253306f67cc 100644 --- a/sound/pci/via82xx_modem.c +++ b/sound/pci/via82xx_modem.c @@ -38,7 +38,6 @@ MODULE_AUTHOR("Jaroslav Kysela "); MODULE_DESCRIPTION("VIA VT82xx modem"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{VIA,VT82C686A/B/C modem,pci}}"); static int index = -2; /* Exclude the first card */ static char *id = SNDRV_DEFAULT_STR1; /* ID for this card */ diff --git a/sound/pci/vx222/vx222.c b/sound/pci/vx222/vx222.c index f7800ed1b67ea..2a9e1a77a81a8 100644 --- a/sound/pci/vx222/vx222.c +++ b/sound/pci/vx222/vx222.c @@ -20,7 +20,6 @@ MODULE_AUTHOR("Takashi Iwai "); MODULE_DESCRIPTION("Digigram VX222 V2/Mic"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{Digigram," CARD_NAME "}}"); static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; /* Index 0-MAX */ static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; /* ID for this card */ diff --git a/sound/pci/ymfpci/ymfpci.c b/sound/pci/ymfpci/ymfpci.c index 9b0d18a7bf356..99be1490ef0eb 100644 --- a/sound/pci/ymfpci/ymfpci.c +++ b/sound/pci/ymfpci/ymfpci.c @@ -17,12 +17,6 @@ MODULE_AUTHOR("Jaroslav Kysela "); MODULE_DESCRIPTION("Yamaha DS-1 PCI"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{Yamaha,YMF724}," - "{Yamaha,YMF724F}," - "{Yamaha,YMF740}," - "{Yamaha,YMF740C}," - "{Yamaha,YMF744}," - "{Yamaha,YMF754}}"); static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; /* Index 0-MAX */ static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; /* ID for this card */ diff --git a/sound/pcmcia/pdaudiocf/pdaudiocf.c b/sound/pcmcia/pdaudiocf/pdaudiocf.c index 27d9da6d61e83..144582350a05b 100644 --- a/sound/pcmcia/pdaudiocf/pdaudiocf.c +++ b/sound/pcmcia/pdaudiocf/pdaudiocf.c @@ -22,7 +22,6 @@ MODULE_AUTHOR("Jaroslav Kysela "); MODULE_DESCRIPTION("Sound Core " CARD_NAME); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{Sound Core," CARD_NAME "}}"); static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; /* Index 0-MAX */ static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; /* ID for this card */ diff --git a/sound/pcmcia/vx/vxpocket.c b/sound/pcmcia/vx/vxpocket.c index afd30a90c807b..6363204898051 100644 --- a/sound/pcmcia/vx/vxpocket.c +++ b/sound/pcmcia/vx/vxpocket.c @@ -17,13 +17,9 @@ #include #include -/* - */ - MODULE_AUTHOR("Takashi Iwai "); MODULE_DESCRIPTION("Digigram VXPocket"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{Digigram,VXPocket},{Digigram,VXPocket440}}"); static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; /* Index 0-MAX */ static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; /* ID for this card */ diff --git a/sound/ppc/powermac.c b/sound/ppc/powermac.c index 96ef55082bf9a..9fb51ebafde16 100644 --- a/sound/ppc/powermac.c +++ b/sound/ppc/powermac.c @@ -18,7 +18,6 @@ #define CHIP_NAME "PMac" MODULE_DESCRIPTION("PowerMac"); -MODULE_SUPPORTED_DEVICE("{{Apple,PowerMac}}"); MODULE_LICENSE("GPL"); static int index = SNDRV_DEFAULT_IDX1; /* Index 0-MAX */ diff --git a/sound/sh/aica.c b/sound/sh/aica.c index 8fa68432d3c1e..6e9d6bd67369a 100644 --- a/sound/sh/aica.c +++ b/sound/sh/aica.c @@ -32,7 +32,6 @@ MODULE_AUTHOR("Adrian McMenamin "); MODULE_DESCRIPTION("Dreamcast AICA sound (pcm) driver"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{Yamaha/SEGA, AICA}}"); MODULE_FIRMWARE("aica_firmware.bin"); /* module parameters */ diff --git a/sound/sh/sh_dac_audio.c b/sound/sh/sh_dac_audio.c index feb28502940f1..8ebd972846acb 100644 --- a/sound/sh/sh_dac_audio.c +++ b/sound/sh/sh_dac_audio.c @@ -25,7 +25,6 @@ MODULE_AUTHOR("Rafael Ignacio Zurita "); MODULE_DESCRIPTION("SuperH DAC audio driver"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{SuperH DAC audio support}}"); /* Module Parameters */ static int index = SNDRV_DEFAULT_IDX1; diff --git a/sound/sparc/amd7930.c b/sound/sparc/amd7930.c index 9d0da5fa1c70d..d24ae00878f5a 100644 --- a/sound/sparc/amd7930.c +++ b/sound/sparc/amd7930.c @@ -62,7 +62,6 @@ MODULE_PARM_DESC(enable, "Enable Sun AMD7930 soundcard."); MODULE_AUTHOR("Thomas K. Dyas and David S. Miller"); MODULE_DESCRIPTION("Sun AMD7930"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{Sun,AMD7930}}"); /* Device register layout. */ diff --git a/sound/sparc/cs4231.c b/sound/sparc/cs4231.c index 0eed5f79a2bf9..35c17803a4309 100644 --- a/sound/sparc/cs4231.c +++ b/sound/sparc/cs4231.c @@ -52,7 +52,6 @@ MODULE_PARM_DESC(enable, "Enable Sun CS4231 soundcard."); MODULE_AUTHOR("Jaroslav Kysela, Derrick J. Brashear and David S. Miller"); MODULE_DESCRIPTION("Sun CS4231"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{Sun,CS4231}}"); #ifdef SBUS_SUPPORT struct sbus_dma_info { diff --git a/sound/sparc/dbri.c b/sound/sparc/dbri.c index 5a6fb66dd1187..b055f58395787 100644 --- a/sound/sparc/dbri.c +++ b/sound/sparc/dbri.c @@ -76,7 +76,6 @@ MODULE_AUTHOR("Rudolf Koenig, Brent Baccala and Martin Habets"); MODULE_DESCRIPTION("Sun DBRI"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{Sun,DBRI}}"); static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; /* Index 0-MAX */ static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; /* ID for this card */ diff --git a/sound/usb/6fire/chip.c b/sound/usb/6fire/chip.c index 08c6e6a52eb98..33e962178c936 100644 --- a/sound/usb/6fire/chip.c +++ b/sound/usb/6fire/chip.c @@ -26,7 +26,6 @@ MODULE_AUTHOR("Torsten Schenk "); MODULE_DESCRIPTION("TerraTec DMX 6Fire USB audio driver"); MODULE_LICENSE("GPL v2"); -MODULE_SUPPORTED_DEVICE("{{TerraTec,DMX 6Fire USB}}"); static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; /* Index 0-max */ static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; /* Id for card */ diff --git a/sound/usb/caiaq/device.c b/sound/usb/caiaq/device.c index e03481caf7f62..49f63f878e6fe 100644 --- a/sound/usb/caiaq/device.c +++ b/sound/usb/caiaq/device.c @@ -26,20 +26,6 @@ MODULE_AUTHOR("Daniel Mack "); MODULE_DESCRIPTION("caiaq USB audio"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{Native Instruments,RigKontrol2}," - "{Native Instruments,RigKontrol3}," - "{Native Instruments,Kore Controller}," - "{Native Instruments,Kore Controller 2}," - "{Native Instruments,Audio Kontrol 1}," - "{Native Instruments,Audio 2 DJ}," - "{Native Instruments,Audio 4 DJ}," - "{Native Instruments,Audio 8 DJ}," - "{Native Instruments,Traktor Audio 2}," - "{Native Instruments,Session I/O}," - "{Native Instruments,GuitarRig mobile}," - "{Native Instruments,Traktor Kontrol X1}," - "{Native Instruments,Traktor Kontrol S4}," - "{Native Instruments,Maschine Controller}}"); static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; /* Index 0-max */ static char* id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; /* Id for this card */ diff --git a/sound/usb/card.c b/sound/usb/card.c index b6f4c0848e661..0826a437f8fca 100644 --- a/sound/usb/card.c +++ b/sound/usb/card.c @@ -58,8 +58,6 @@ MODULE_AUTHOR("Takashi Iwai "); MODULE_DESCRIPTION("USB Audio"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{Generic,USB Audio}}"); - static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; /* Index 0-MAX */ static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; /* ID for this card */ diff --git a/sound/usb/hiface/chip.c b/sound/usb/hiface/chip.c index c2824188d1420..95385e90882c7 100644 --- a/sound/usb/hiface/chip.c +++ b/sound/usb/hiface/chip.c @@ -21,23 +21,6 @@ MODULE_AUTHOR("Michael Trimarchi "); MODULE_AUTHOR("Antonio Ospite "); MODULE_DESCRIPTION("M2Tech hiFace USB-SPDIF audio driver"); MODULE_LICENSE("GPL v2"); -MODULE_SUPPORTED_DEVICE("{{M2Tech,Young}," - "{M2Tech,hiFace}," - "{M2Tech,North Star}," - "{M2Tech,W4S Young}," - "{M2Tech,Corrson}," - "{M2Tech,AUDIA}," - "{M2Tech,SL Audio}," - "{M2Tech,Empirical}," - "{M2Tech,Rockna}," - "{M2Tech,Pathos}," - "{M2Tech,Metronome}," - "{M2Tech,CAD}," - "{M2Tech,Audio Esclusive}," - "{M2Tech,Rotel}," - "{M2Tech,Eeaudio}," - "{The Chord Company,CHORD}," - "{AVA Group A/S,Vitus}}"); static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; /* Index 0-max */ static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; /* Id for card */ diff --git a/sound/usb/misc/ua101.c b/sound/usb/misc/ua101.c index 6b30155964ec0..5834d1dc317ef 100644 --- a/sound/usb/misc/ua101.c +++ b/sound/usb/misc/ua101.c @@ -19,7 +19,6 @@ MODULE_DESCRIPTION("Edirol UA-101/1000 driver"); MODULE_AUTHOR("Clemens Ladisch "); MODULE_LICENSE("GPL v2"); -MODULE_SUPPORTED_DEVICE("{{Edirol,UA-101},{Edirol,UA-1000}}"); /* * Should not be lower than the minimum scheduling delay of the host diff --git a/sound/usb/usx2y/usbusx2y.c b/sound/usb/usx2y/usbusx2y.c index c54158146917b..3cd28d24f0a73 100644 --- a/sound/usb/usx2y/usbusx2y.c +++ b/sound/usb/usx2y/usbusx2y.c @@ -137,7 +137,6 @@ MODULE_AUTHOR("Karsten Wiese "); MODULE_DESCRIPTION("TASCAM "NAME_ALLCAPS" Version 0.8.7.2"); MODULE_LICENSE("GPL"); -MODULE_SUPPORTED_DEVICE("{{TASCAM(0x1604),"NAME_ALLCAPS"(0x8001)(0x8005)(0x8007)}}"); static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; /* Index 0-max */ static char* id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; /* Id for this card */ diff --git a/sound/x86/intel_hdmi_audio.c b/sound/x86/intel_hdmi_audio.c index 1d66c3a4fb107..33b12aa67cf5b 100644 --- a/sound/x86/intel_hdmi_audio.c +++ b/sound/x86/intel_hdmi_audio.c @@ -1887,4 +1887,3 @@ MODULE_AUTHOR("Vaibhav Agarwal "); MODULE_AUTHOR("Jerome Anand "); MODULE_DESCRIPTION("Intel HDMI Audio driver"); MODULE_LICENSE("GPL v2"); -MODULE_SUPPORTED_DEVICE("{Intel,Intel_HAD}"); diff --git a/sound/xen/xen_snd_front.c b/sound/xen/xen_snd_front.c index 228d820312973..2cb0a19be2b85 100644 --- a/sound/xen/xen_snd_front.c +++ b/sound/xen/xen_snd_front.c @@ -391,4 +391,3 @@ module_exit(xen_drv_fini); MODULE_DESCRIPTION("Xen virtual sound device frontend"); MODULE_LICENSE("GPL"); MODULE_ALIAS("xen:" XENSND_DRIVER_NAME); -MODULE_SUPPORTED_DEVICE("{{ALSA,Virtual soundcard}}"); -- GitLab From 9858af27e69247c5d04c3b093190a93ca365f33d Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 11 Mar 2021 10:44:45 +0000 Subject: [PATCH 771/839] usbip: Fix incorrect double assignment to udc->ud.tcp_rx Currently udc->ud.tcp_rx is being assigned twice, the second assignment is incorrect, it should be to udc->ud.tcp_tx instead of rx. Fix this. Fixes: 46613c9dfa96 ("usbip: fix vudc usbip_sockfd_store races leading to gpf") Acked-by: Shuah Khan Signed-off-by: Colin Ian King Cc: stable Addresses-Coverity: ("Unused value") Link: https://lore.kernel.org/r/20210311104445.7811-1-colin.king@canonical.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/vudc_sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/usbip/vudc_sysfs.c b/drivers/usb/usbip/vudc_sysfs.c index a3ec39fc61778..7383a543c6d12 100644 --- a/drivers/usb/usbip/vudc_sysfs.c +++ b/drivers/usb/usbip/vudc_sysfs.c @@ -174,7 +174,7 @@ static ssize_t usbip_sockfd_store(struct device *dev, udc->ud.tcp_socket = socket; udc->ud.tcp_rx = tcp_rx; - udc->ud.tcp_rx = tcp_tx; + udc->ud.tcp_tx = tcp_tx; udc->ud.status = SDEV_ST_USED; spin_unlock_irq(&udc->ud.lock); -- GitLab From 98f153a10da403ddd5e9d98a3c8c2bb54bb5a0b6 Mon Sep 17 00:00:00 2001 From: Jim Lin Date: Thu, 11 Mar 2021 14:42:41 +0800 Subject: [PATCH 772/839] usb: gadget: configfs: Fix KASAN use-after-free When gadget is disconnected, running sequence is like this. . composite_disconnect . Call trace: usb_string_copy+0xd0/0x128 gadget_config_name_configuration_store+0x4 gadget_config_name_attr_store+0x40/0x50 configfs_write_file+0x198/0x1f4 vfs_write+0x100/0x220 SyS_write+0x58/0xa8 . configfs_composite_unbind . configfs_composite_bind In configfs_composite_bind, it has "cn->strings.s = cn->configuration;" When usb_string_copy is invoked. it would allocate memory, copy input string, release previous pointed memory space, and use new allocated memory. When gadget is connected, host sends down request to get information. Call trace: usb_gadget_get_string+0xec/0x168 lookup_string+0x64/0x98 composite_setup+0xa34/0x1ee8 If gadget is disconnected and connected quickly, in the failed case, cn->configuration memory has been released by usb_string_copy kfree but configfs_composite_bind hasn't been run in time to assign new allocated "cn->configuration" pointer to "cn->strings.s". When "strlen(s->s) of usb_gadget_get_string is being executed, the dangling memory is accessed, "BUG: KASAN: use-after-free" error occurs. Cc: stable@vger.kernel.org Signed-off-by: Jim Lin Signed-off-by: Macpaul Lin Link: https://lore.kernel.org/r/1615444961-13376-1-git-send-email-macpaul.lin@mediatek.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/configfs.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c index 0d56f33d63c29..15a607ccef8a2 100644 --- a/drivers/usb/gadget/configfs.c +++ b/drivers/usb/gadget/configfs.c @@ -97,6 +97,8 @@ struct gadget_config_name { struct list_head list; }; +#define USB_MAX_STRING_WITH_NULL_LEN (USB_MAX_STRING_LEN+1) + static int usb_string_copy(const char *s, char **s_copy) { int ret; @@ -106,12 +108,16 @@ static int usb_string_copy(const char *s, char **s_copy) if (ret > USB_MAX_STRING_LEN) return -EOVERFLOW; - str = kstrdup(s, GFP_KERNEL); - if (!str) - return -ENOMEM; + if (copy) { + str = copy; + } else { + str = kmalloc(USB_MAX_STRING_WITH_NULL_LEN, GFP_KERNEL); + if (!str) + return -ENOMEM; + } + strcpy(str, s); if (str[ret - 1] == '\n') str[ret - 1] = '\0'; - kfree(copy); *s_copy = str; return 0; } -- GitLab From 546aa0e4ea6ed81b6c51baeebc4364542fa3f3a7 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 17 Mar 2021 15:06:54 -0400 Subject: [PATCH 773/839] usb-storage: Add quirk to defeat Kindle's automatic unload Matthias reports that the Amazon Kindle automatically removes its emulated media if it doesn't receive another SCSI command within about one second after a SYNCHRONIZE CACHE. It does so even when the host has sent a PREVENT MEDIUM REMOVAL command. The reason for this behavior isn't clear, although it's not hard to make some guesses. At any rate, the results can be unexpected for anyone who tries to access the Kindle in an unusual fashion, and in theory they can lead to data loss (for example, if one file is closed and synchronized while other files are still in the middle of being written). To avoid such problems, this patch creates a new usb-storage quirks flag telling the driver always to issue a REQUEST SENSE following a SYNCHRONIZE CACHE command, and adds an unusual_devs entry for the Kindle with the flag set. This is sufficient to prevent the Kindle from doing its automatic unload, without interfering with proper operation. Another possible way to deal with this would be to increase the frequency of TEST UNIT READY polling that the kernel normally carries out for removable-media storage devices. However that would increase the overall load on the system and it is not as reliable, because the user can override the polling interval. Changing the driver's behavior is safer and has minimal overhead. CC: Reported-and-tested-by: Matthias Schwarzott Signed-off-by: Alan Stern Link: https://lore.kernel.org/r/20210317190654.GA497856@rowland.harvard.edu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/transport.c | 7 +++++++ drivers/usb/storage/unusual_devs.h | 12 ++++++++++++ include/linux/usb_usual.h | 2 ++ 3 files changed, 21 insertions(+) diff --git a/drivers/usb/storage/transport.c b/drivers/usb/storage/transport.c index 5eb895b19c558..f4304ce69350e 100644 --- a/drivers/usb/storage/transport.c +++ b/drivers/usb/storage/transport.c @@ -656,6 +656,13 @@ void usb_stor_invoke_transport(struct scsi_cmnd *srb, struct us_data *us) need_auto_sense = 1; } + /* Some devices (Kindle) require another command after SYNC CACHE */ + if ((us->fflags & US_FL_SENSE_AFTER_SYNC) && + srb->cmnd[0] == SYNCHRONIZE_CACHE) { + usb_stor_dbg(us, "-- sense after SYNC CACHE\n"); + need_auto_sense = 1; + } + /* * If we have a failure, we're going to do a REQUEST_SENSE * automatically. Note that we differentiate between a command diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index 5732e9691f08f..efa972be2ee34 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -2211,6 +2211,18 @@ UNUSUAL_DEV( 0x1908, 0x3335, 0x0200, 0x0200, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_NO_READ_DISC_INFO ), +/* + * Reported by Matthias Schwarzott + * The Amazon Kindle treats SYNCHRONIZE CACHE as an indication that + * the host may be finished with it, and automatically ejects its + * emulated media unless it receives another command within one second. + */ +UNUSUAL_DEV( 0x1949, 0x0004, 0x0000, 0x9999, + "Amazon", + "Kindle", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_SENSE_AFTER_SYNC ), + /* * Reported by Oliver Neukum * This device morphes spontaneously into another device if the access diff --git a/include/linux/usb_usual.h b/include/linux/usb_usual.h index 6b03fdd69d274..712363c7a2e8e 100644 --- a/include/linux/usb_usual.h +++ b/include/linux/usb_usual.h @@ -86,6 +86,8 @@ /* lies about caching, so always sync */ \ US_FLAG(NO_SAME, 0x40000000) \ /* Cannot handle WRITE_SAME */ \ + US_FLAG(SENSE_AFTER_SYNC, 0x80000000) \ + /* Do REQUEST_SENSE after SYNCHRONIZE_CACHE */ \ #define US_FLAG(name, value) US_FL_##name = value , enum { US_DO_ALL_FLAGS }; -- GitLab From 3cac9104bea41099cf622091f0c0538bcb19050d Mon Sep 17 00:00:00 2001 From: Elias Rudberg Date: Thu, 11 Mar 2021 13:47:10 +0100 Subject: [PATCH 774/839] usb: typec: Remove vdo[3] part of tps6598x_rx_identity_reg struct MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the unused "u32 vdo[3]" part in the tps6598x_rx_identity_reg struct. This helps avoid "failed to register partner" errors which happen when tps6598x_read_partner_identity() fails because the amount of data read is 12 bytes smaller than the struct size. Note that vdo[3] is already in usb_pd_identity and hence shouldn't be added to tps6598x_rx_identity_reg as well. Fixes: f6c56ca91b92 ("usb: typec: Add the Product Type VDOs to struct usb_pd_identity") Reviewed-by: Heikki Krogerus Reviewed-by: Guido Günther Signed-off-by: Elias Rudberg Cc: stable Link: https://lore.kernel.org/r/20210311124710.6563-1-mail@eliasrudberg.se Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tps6598x.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/usb/typec/tps6598x.c b/drivers/usb/typec/tps6598x.c index 6e6ef63175237..29bd1c5a283cd 100644 --- a/drivers/usb/typec/tps6598x.c +++ b/drivers/usb/typec/tps6598x.c @@ -64,7 +64,6 @@ enum { struct tps6598x_rx_identity_reg { u8 status; struct usb_pd_identity identity; - u32 vdo[3]; } __packed; /* Standard Task return codes */ -- GitLab From 86629e098a077922438efa98dc80917604dfd317 Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Wed, 17 Mar 2021 11:12:48 -0700 Subject: [PATCH 775/839] usb: typec: tcpm: Invoke power_supply_changed for tcpm-source-psy- tcpm-source-psy- does not invoke power_supply_changed API when one of the published power supply properties is changed. power_supply_changed needs to be called to notify userspace clients(uevents) and kernel clients. Fixes: f2a8aa053c176 ("typec: tcpm: Represent source supply through power_supply") Reviewed-by: Guenter Roeck Reviewed-by: Heikki Krogerus Signed-off-by: Badhri Jagan Sridharan Cc: stable Link: https://lore.kernel.org/r/20210317181249.1062995-1-badhri@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index be0b6469dd3df..92093ea12cff2 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -942,6 +942,7 @@ static int tcpm_set_current_limit(struct tcpm_port *port, u32 max_ma, u32 mv) port->supply_voltage = mv; port->current_limit = max_ma; + power_supply_changed(port->psy); if (port->tcpc->set_current_limit) ret = port->tcpc->set_current_limit(port->tcpc, max_ma, mv); @@ -2928,6 +2929,7 @@ static int tcpm_pd_select_pdo(struct tcpm_port *port, int *sink_pdo, port->pps_data.supported = false; port->usb_type = POWER_SUPPLY_USB_TYPE_PD; + power_supply_changed(port->psy); /* * Select the source PDO providing the most power which has a @@ -2952,6 +2954,7 @@ static int tcpm_pd_select_pdo(struct tcpm_port *port, int *sink_pdo, port->pps_data.supported = true; port->usb_type = POWER_SUPPLY_USB_TYPE_PD_PPS; + power_supply_changed(port->psy); } continue; default: @@ -3109,6 +3112,7 @@ static unsigned int tcpm_pd_select_pps_apdo(struct tcpm_port *port) port->pps_data.out_volt)); port->pps_data.op_curr = min(port->pps_data.max_curr, port->pps_data.op_curr); + power_supply_changed(port->psy); } return src_pdo; @@ -3344,6 +3348,7 @@ static int tcpm_set_charge(struct tcpm_port *port, bool charge) return ret; } port->vbus_charge = charge; + power_supply_changed(port->psy); return 0; } @@ -3523,6 +3528,7 @@ static void tcpm_reset_port(struct tcpm_port *port) port->try_src_count = 0; port->try_snk_count = 0; port->usb_type = POWER_SUPPLY_USB_TYPE_C; + power_supply_changed(port->psy); port->nr_sink_caps = 0; port->sink_cap_done = false; if (port->tcpc->enable_frs) @@ -5905,7 +5911,7 @@ static int tcpm_psy_set_prop(struct power_supply *psy, ret = -EINVAL; break; } - + power_supply_changed(port->psy); return ret; } @@ -6058,6 +6064,7 @@ struct tcpm_port *tcpm_register_port(struct device *dev, struct tcpc_dev *tcpc) err = devm_tcpm_psy_register(port); if (err) goto out_role_sw_put; + power_supply_changed(port->psy); port->typec_port = typec_register_port(port->dev, &port->typec_caps); if (IS_ERR(port->typec_port)) { -- GitLab From f09ddcfcb8c569675066337adac2ac205113471f Mon Sep 17 00:00:00 2001 From: Wesley Cheng Date: Thu, 11 Mar 2021 15:59:02 -0800 Subject: [PATCH 776/839] usb: dwc3: gadget: Prevent EP queuing while stopping transfers In the situations where the DWC3 gadget stops active transfers, once calling the dwc3_gadget_giveback(), there is a chance where a function driver can queue a new USB request in between the time where the dwc3 lock has been released and re-aquired. This occurs after we've already issued an ENDXFER command. When the stop active transfers continues to remove USB requests from all dep lists, the newly added request will also be removed, while controller still has an active TRB for it. This can lead to the controller accessing an unmapped memory address. Fix this by ensuring parameters to prevent EP queuing are set before calling the stop active transfers API. Fixes: ae7e86108b12 ("usb: dwc3: Stop active transfers before halting the controller") Signed-off-by: Wesley Cheng Link: https://lore.kernel.org/r/1615507142-23097-1-git-send-email-wcheng@codeaurora.org Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index aebcf8ec07163..4a337f3486510 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -783,8 +783,6 @@ static int __dwc3_gadget_ep_disable(struct dwc3_ep *dep) trace_dwc3_gadget_ep_disable(dep); - dwc3_remove_requests(dwc, dep); - /* make sure HW endpoint isn't stalled */ if (dep->flags & DWC3_EP_STALL) __dwc3_gadget_ep_set_halt(dep, 0, false); @@ -803,6 +801,8 @@ static int __dwc3_gadget_ep_disable(struct dwc3_ep *dep) dep->endpoint.desc = NULL; } + dwc3_remove_requests(dwc, dep); + return 0; } @@ -1617,7 +1617,7 @@ static int __dwc3_gadget_ep_queue(struct dwc3_ep *dep, struct dwc3_request *req) { struct dwc3 *dwc = dep->dwc; - if (!dep->endpoint.desc || !dwc->pullups_connected) { + if (!dep->endpoint.desc || !dwc->pullups_connected || !dwc->connected) { dev_err(dwc->dev, "%s: can't queue to disabled endpoint\n", dep->name); return -ESHUTDOWN; @@ -2247,6 +2247,7 @@ static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on) if (!is_on) { u32 count; + dwc->connected = false; /* * In the Synopsis DesignWare Cores USB3 Databook Rev. 3.30a * Section 4.1.8 Table 4-7, it states that for a device-initiated @@ -2271,7 +2272,6 @@ static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on) dwc->ev_buf->lpos = (dwc->ev_buf->lpos + count) % dwc->ev_buf->length; } - dwc->connected = false; } else { __dwc3_gadget_start(dwc); } @@ -3321,8 +3321,6 @@ static void dwc3_gadget_reset_interrupt(struct dwc3 *dwc) { u32 reg; - dwc->connected = true; - /* * WORKAROUND: DWC3 revisions <1.88a have an issue which * would cause a missing Disconnect Event if there's a @@ -3362,6 +3360,7 @@ static void dwc3_gadget_reset_interrupt(struct dwc3 *dwc) * transfers." */ dwc3_stop_active_transfers(dwc); + dwc->connected = true; reg = dwc3_readl(dwc->regs, DWC3_DCTL); reg &= ~DWC3_DCTL_TSTCTRL_MASK; -- GitLab From 34fa493a565cc6fcee6919787c11e264f55603c6 Mon Sep 17 00:00:00 2001 From: Calvin Hou Date: Tue, 2 Mar 2021 10:48:26 -0500 Subject: [PATCH 777/839] drm/amd/display: Correct algorithm for reversed gamma [Why] DCN30 needs to correctly program reversed gamma curve, which DCN20 already has. Also needs to fix a bug that 252-255 values are clipped. [How] Apply two fixes into DCN30. Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1513 Tested-by: Daniel Wheeler Signed-off-by: Calvin Hou Reviewed-by: Jun Lei Reviewed-by: Krunoslav Kovac Acked-by: Solomon Chiu Acked-by: Vladimir Stempen Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- .../amd/display/dc/dcn30/dcn30_cm_common.c | 26 +++++++++++++------ 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c index 41a1d0e9b7e20..e0df9b0065f9c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c @@ -113,6 +113,7 @@ bool cm3_helper_translate_curve_to_hw_format( struct pwl_result_data *rgb_resulted; struct pwl_result_data *rgb; struct pwl_result_data *rgb_plus_1; + struct pwl_result_data *rgb_minus_1; struct fixed31_32 end_value; int32_t region_start, region_end; @@ -140,7 +141,7 @@ bool cm3_helper_translate_curve_to_hw_format( region_start = -MAX_LOW_POINT; region_end = NUMBER_REGIONS - MAX_LOW_POINT; } else { - /* 10 segments + /* 11 segments * segment is from 2^-10 to 2^0 * There are less than 256 points, for optimization */ @@ -154,9 +155,10 @@ bool cm3_helper_translate_curve_to_hw_format( seg_distr[7] = 4; seg_distr[8] = 4; seg_distr[9] = 4; + seg_distr[10] = 1; region_start = -10; - region_end = 0; + region_end = 1; } for (i = region_end - region_start; i < MAX_REGIONS_NUMBER ; i++) @@ -189,6 +191,10 @@ bool cm3_helper_translate_curve_to_hw_format( rgb_resulted[hw_points - 1].green = output_tf->tf_pts.green[start_index]; rgb_resulted[hw_points - 1].blue = output_tf->tf_pts.blue[start_index]; + rgb_resulted[hw_points].red = rgb_resulted[hw_points - 1].red; + rgb_resulted[hw_points].green = rgb_resulted[hw_points - 1].green; + rgb_resulted[hw_points].blue = rgb_resulted[hw_points - 1].blue; + // All 3 color channels have same x corner_points[0].red.x = dc_fixpt_pow(dc_fixpt_from_int(2), dc_fixpt_from_int(region_start)); @@ -259,15 +265,18 @@ bool cm3_helper_translate_curve_to_hw_format( rgb = rgb_resulted; rgb_plus_1 = rgb_resulted + 1; + rgb_minus_1 = rgb; i = 1; while (i != hw_points + 1) { - if (dc_fixpt_lt(rgb_plus_1->red, rgb->red)) - rgb_plus_1->red = rgb->red; - if (dc_fixpt_lt(rgb_plus_1->green, rgb->green)) - rgb_plus_1->green = rgb->green; - if (dc_fixpt_lt(rgb_plus_1->blue, rgb->blue)) - rgb_plus_1->blue = rgb->blue; + if (i >= hw_points - 1) { + if (dc_fixpt_lt(rgb_plus_1->red, rgb->red)) + rgb_plus_1->red = dc_fixpt_add(rgb->red, rgb_minus_1->delta_red); + if (dc_fixpt_lt(rgb_plus_1->green, rgb->green)) + rgb_plus_1->green = dc_fixpt_add(rgb->green, rgb_minus_1->delta_green); + if (dc_fixpt_lt(rgb_plus_1->blue, rgb->blue)) + rgb_plus_1->blue = dc_fixpt_add(rgb->blue, rgb_minus_1->delta_blue); + } rgb->delta_red = dc_fixpt_sub(rgb_plus_1->red, rgb->red); rgb->delta_green = dc_fixpt_sub(rgb_plus_1->green, rgb->green); @@ -283,6 +292,7 @@ bool cm3_helper_translate_curve_to_hw_format( } ++rgb_plus_1; + rgb_minus_1 = rgb; ++rgb; ++i; } -- GitLab From beb6b2f97e0a02164c7f0df6e08c49219cfc2b80 Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Thu, 4 Mar 2021 17:39:10 -0500 Subject: [PATCH 778/839] drm/amd/display: Remove MPC gamut remap logic for DCN30 [Why?] Should only reroute gamut remap to mpc unless 3D LUT is not used and all planes are using the same src->dest. [How?] Remove DCN30 specific logic for rerouting gamut remap to mpc. Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1513 Tested-by: Daniel Wheeler Signed-off-by: Dillon Varone Reviewed-by: Krunoslav Kovac Acked-by: Aric Cyr Acked-by: Solomon Chiu Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- .../drm/amd/display/dc/dcn20/dcn20_hwseq.c | 34 ++----------------- 1 file changed, 2 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index 5342c309b78c7..aece1103331dd 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -1507,38 +1507,8 @@ static void dcn20_update_dchubp_dpp( if (pipe_ctx->update_flags.bits.enable || pipe_ctx->update_flags.bits.opp_changed || pipe_ctx->stream->update_flags.bits.gamut_remap || pipe_ctx->stream->update_flags.bits.out_csc) { - struct mpc *mpc = pipe_ctx->stream_res.opp->ctx->dc->res_pool->mpc; - - if (mpc->funcs->set_gamut_remap) { - int i; - int mpcc_id = hubp->inst; - struct mpc_grph_gamut_adjustment adjust; - bool enable_remap_dpp = false; - - memset(&adjust, 0, sizeof(adjust)); - adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_BYPASS; - - /* save the enablement of gamut remap for dpp */ - enable_remap_dpp = pipe_ctx->stream->gamut_remap_matrix.enable_remap; - - /* force bypass gamut remap for dpp/cm */ - pipe_ctx->stream->gamut_remap_matrix.enable_remap = false; - dc->hwss.program_gamut_remap(pipe_ctx); - - /* restore gamut remap flag and use this remap into mpc */ - pipe_ctx->stream->gamut_remap_matrix.enable_remap = enable_remap_dpp; - - /* build remap matrix for top plane if enabled */ - if (enable_remap_dpp && pipe_ctx->top_pipe == NULL) { - adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_SW; - for (i = 0; i < CSC_TEMPERATURE_MATRIX_SIZE; i++) - adjust.temperature_matrix[i] = - pipe_ctx->stream->gamut_remap_matrix.matrix[i]; - } - mpc->funcs->set_gamut_remap(mpc, mpcc_id, &adjust); - } else - /* dpp/cm gamut remap*/ - dc->hwss.program_gamut_remap(pipe_ctx); + /* dpp/cm gamut remap*/ + dc->hwss.program_gamut_remap(pipe_ctx); /*call the dcn2 method which uses mpc csc*/ dc->hwss.program_output_csc(dc, -- GitLab From ed01fee283a067c72b2d6500046080dbc1bb9dae Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Mar 2021 13:28:22 +0100 Subject: [PATCH 779/839] nvme-fabrics: only reserve a single tag Fabrics drivers currently reserve two tags on the admin queue. But given that the connect command is only run on a freshly created queue or after all commands have been force aborted we only need to reserve a single tag. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Reviewed-by: Hannes Reinecke Reviewed-by: Daniel Wagner --- drivers/nvme/host/fabrics.h | 7 +++++++ drivers/nvme/host/fc.c | 4 ++-- drivers/nvme/host/rdma.c | 4 ++-- drivers/nvme/host/tcp.c | 4 ++-- drivers/nvme/target/loop.c | 4 ++-- 5 files changed, 15 insertions(+), 8 deletions(-) diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index 733010d2eafd8..888b108d87a40 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -18,6 +18,13 @@ /* default is -1: the fail fast mechanism is disabled */ #define NVMF_DEF_FAIL_FAST_TMO -1 +/* + * Reserved one command for internal usage. This command is used for sending + * the connect command, as well as for the keep alive command on the admin + * queue once live. + */ +#define NVMF_RESERVED_TAGS 1 + /* * Define a host as seen by the target. We allocate one at boot, but also * allow the override it when creating controllers. This is both to provide diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 73d0737483891..6ffa8de2a0d77 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2863,7 +2863,7 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl) memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set)); ctrl->tag_set.ops = &nvme_fc_mq_ops; ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size; - ctrl->tag_set.reserved_tags = 1; /* fabric connect */ + ctrl->tag_set.reserved_tags = NVMF_RESERVED_TAGS; ctrl->tag_set.numa_node = ctrl->ctrl.numa_node; ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; ctrl->tag_set.cmd_size = @@ -3485,7 +3485,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set)); ctrl->admin_tag_set.ops = &nvme_fc_admin_mq_ops; ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH; - ctrl->admin_tag_set.reserved_tags = 2; /* fabric connect + Keep-Alive */ + ctrl->admin_tag_set.reserved_tags = NVMF_RESERVED_TAGS; ctrl->admin_tag_set.numa_node = ctrl->ctrl.numa_node; ctrl->admin_tag_set.cmd_size = struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv, diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 53ac4d7442ba9..7855103e05d25 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -798,7 +798,7 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl, memset(set, 0, sizeof(*set)); set->ops = &nvme_rdma_admin_mq_ops; set->queue_depth = NVME_AQ_MQ_TAG_DEPTH; - set->reserved_tags = 2; /* connect + keep-alive */ + set->reserved_tags = NVMF_RESERVED_TAGS; set->numa_node = nctrl->numa_node; set->cmd_size = sizeof(struct nvme_rdma_request) + NVME_RDMA_DATA_SGL_SIZE; @@ -811,7 +811,7 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl, memset(set, 0, sizeof(*set)); set->ops = &nvme_rdma_mq_ops; set->queue_depth = nctrl->sqsize + 1; - set->reserved_tags = 1; /* fabric connect */ + set->reserved_tags = NVMF_RESERVED_TAGS; set->numa_node = nctrl->numa_node; set->flags = BLK_MQ_F_SHOULD_MERGE; set->cmd_size = sizeof(struct nvme_rdma_request) + diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 69f59d2c5799b..c535836e7065b 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1575,7 +1575,7 @@ static struct blk_mq_tag_set *nvme_tcp_alloc_tagset(struct nvme_ctrl *nctrl, memset(set, 0, sizeof(*set)); set->ops = &nvme_tcp_admin_mq_ops; set->queue_depth = NVME_AQ_MQ_TAG_DEPTH; - set->reserved_tags = 2; /* connect + keep-alive */ + set->reserved_tags = NVMF_RESERVED_TAGS; set->numa_node = nctrl->numa_node; set->flags = BLK_MQ_F_BLOCKING; set->cmd_size = sizeof(struct nvme_tcp_request); @@ -1587,7 +1587,7 @@ static struct blk_mq_tag_set *nvme_tcp_alloc_tagset(struct nvme_ctrl *nctrl, memset(set, 0, sizeof(*set)); set->ops = &nvme_tcp_mq_ops; set->queue_depth = nctrl->sqsize + 1; - set->reserved_tags = 1; /* fabric connect */ + set->reserved_tags = NVMF_RESERVED_TAGS; set->numa_node = nctrl->numa_node; set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING; set->cmd_size = sizeof(struct nvme_tcp_request); diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index cb6f86572b24a..3e189e753bcf5 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -349,7 +349,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set)); ctrl->admin_tag_set.ops = &nvme_loop_admin_mq_ops; ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH; - ctrl->admin_tag_set.reserved_tags = 2; /* connect + keep-alive */ + ctrl->admin_tag_set.reserved_tags = NVMF_RESERVED_TAGS; ctrl->admin_tag_set.numa_node = ctrl->ctrl.numa_node; ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_loop_iod) + NVME_INLINE_SG_CNT * sizeof(struct scatterlist); @@ -520,7 +520,7 @@ static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl) memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set)); ctrl->tag_set.ops = &nvme_loop_mq_ops; ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size; - ctrl->tag_set.reserved_tags = 1; /* fabric connect */ + ctrl->tag_set.reserved_tags = NVMF_RESERVED_TAGS; ctrl->tag_set.numa_node = ctrl->ctrl.numa_node; ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; ctrl->tag_set.cmd_size = sizeof(struct nvme_loop_iod) + -- GitLab From 06c3c3365b4bae5ef0f0525d3683b73cbae1e69c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Mar 2021 13:46:06 +0100 Subject: [PATCH 780/839] nvme: merge nvme_keep_alive into nvme_keep_alive_work Merge nvme_keep_alive into its only caller to prepare for additional changes to this code. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Reviewed-by: Hannes Reinecke Reviewed-by: Daniel Wagner --- drivers/nvme/host/core.c | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index a5653892d7739..45d510a12c6be 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1226,28 +1226,12 @@ static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status) queue_delayed_work(nvme_wq, &ctrl->ka_work, ctrl->kato * HZ); } -static int nvme_keep_alive(struct nvme_ctrl *ctrl) -{ - struct request *rq; - - rq = nvme_alloc_request(ctrl->admin_q, &ctrl->ka_cmd, - BLK_MQ_REQ_RESERVED); - if (IS_ERR(rq)) - return PTR_ERR(rq); - - rq->timeout = ctrl->kato * HZ; - rq->end_io_data = ctrl; - - blk_execute_rq_nowait(NULL, rq, 0, nvme_keep_alive_end_io); - - return 0; -} - static void nvme_keep_alive_work(struct work_struct *work) { struct nvme_ctrl *ctrl = container_of(to_delayed_work(work), struct nvme_ctrl, ka_work); bool comp_seen = ctrl->comp_seen; + struct request *rq; if ((ctrl->ctratt & NVME_CTRL_ATTR_TBKAS) && comp_seen) { dev_dbg(ctrl->device, @@ -1257,12 +1241,18 @@ static void nvme_keep_alive_work(struct work_struct *work) return; } - if (nvme_keep_alive(ctrl)) { + rq = nvme_alloc_request(ctrl->admin_q, &ctrl->ka_cmd, + BLK_MQ_REQ_RESERVED); + if (IS_ERR(rq)) { /* allocation failure, reset the controller */ dev_err(ctrl->device, "keep-alive failed\n"); nvme_reset_ctrl(ctrl); return; } + + rq->timeout = ctrl->kato * HZ; + rq->end_io_data = ctrl; + blk_execute_rq_nowait(NULL, rq, 0, nvme_keep_alive_end_io); } static void nvme_start_keep_alive(struct nvme_ctrl *ctrl) -- GitLab From 985c5a329dfe5ecb782551cddef48912961b83f1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Mar 2021 13:51:47 +0100 Subject: [PATCH 781/839] nvme: allocate the keep alive request using BLK_MQ_REQ_NOWAIT To avoid an error recovery deadlock where the keep alive work is waiting for a request and thus can't be flushed to make progress for tearing down the controller. Also print the error code returned from blk_mq_alloc_request to help debugging any future issues in this code. Based on an earlier patch from Hannes Reinecke . Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Reviewed-by: Hannes Reinecke Reviewed-by: Daniel Wagner --- drivers/nvme/host/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 45d510a12c6be..83d6740f8d4e3 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1242,10 +1242,10 @@ static void nvme_keep_alive_work(struct work_struct *work) } rq = nvme_alloc_request(ctrl->admin_q, &ctrl->ka_cmd, - BLK_MQ_REQ_RESERVED); + BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT); if (IS_ERR(rq)) { /* allocation failure, reset the controller */ - dev_err(ctrl->device, "keep-alive failed\n"); + dev_err(ctrl->device, "keep-alive failed: %ld\n", PTR_ERR(rq)); nvme_reset_ctrl(ctrl); return; } -- GitLab From b94e8cd2e6a94fc7563529ddc82726a7e77e04de Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 15 Mar 2021 10:32:07 +0100 Subject: [PATCH 782/839] nvme: fix Write Zeroes limitations We voluntarily limit the Write Zeroes sizes to the MDTS value provided by the hardware, but currently get the units wrong, so fix that. Fixes: 6e02318eaea5 ("nvme: add support for the Write Zeroes command") Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Tested-by: Klaus Jensen Reviewed-by: Klaus Jensen Reviewed-by: Chaitanya Kulkarni Reviewed-by: Himanshu Madhani --- drivers/nvme/host/core.c | 36 ++++++++++++------------------------ 1 file changed, 12 insertions(+), 24 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 83d6740f8d4e3..0896e21642beb 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1954,30 +1954,18 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns) blk_queue_max_write_zeroes_sectors(queue, UINT_MAX); } -static void nvme_config_write_zeroes(struct gendisk *disk, struct nvme_ns *ns) +/* + * Even though NVMe spec explicitly states that MDTS is not applicable to the + * write-zeroes, we are cautious and limit the size to the controllers + * max_hw_sectors value, which is based on the MDTS field and possibly other + * limiting factors. + */ +static void nvme_config_write_zeroes(struct request_queue *q, + struct nvme_ctrl *ctrl) { - u64 max_blocks; - - if (!(ns->ctrl->oncs & NVME_CTRL_ONCS_WRITE_ZEROES) || - (ns->ctrl->quirks & NVME_QUIRK_DISABLE_WRITE_ZEROES)) - return; - /* - * Even though NVMe spec explicitly states that MDTS is not - * applicable to the write-zeroes:- "The restriction does not apply to - * commands that do not transfer data between the host and the - * controller (e.g., Write Uncorrectable ro Write Zeroes command).". - * In order to be more cautious use controller's max_hw_sectors value - * to configure the maximum sectors for the write-zeroes which is - * configured based on the controller's MDTS field in the - * nvme_init_identify() if available. - */ - if (ns->ctrl->max_hw_sectors == UINT_MAX) - max_blocks = (u64)USHRT_MAX + 1; - else - max_blocks = ns->ctrl->max_hw_sectors + 1; - - blk_queue_max_write_zeroes_sectors(disk->queue, - nvme_lba_to_sect(ns, max_blocks)); + if ((ctrl->oncs & NVME_CTRL_ONCS_WRITE_ZEROES) && + !(ctrl->quirks & NVME_QUIRK_DISABLE_WRITE_ZEROES)) + blk_queue_max_write_zeroes_sectors(q, ctrl->max_hw_sectors); } static bool nvme_ns_ids_valid(struct nvme_ns_ids *ids) @@ -2149,7 +2137,7 @@ static void nvme_update_disk_info(struct gendisk *disk, set_capacity_and_notify(disk, capacity); nvme_config_discard(disk, ns); - nvme_config_write_zeroes(disk, ns); + nvme_config_write_zeroes(disk->queue, ns->ctrl); set_disk_ro(disk, (id->nsattr & NVME_NS_ATTR_RO) || test_bit(NVME_NS_FORCE_RO, &ns->flags)); -- GitLab From fd0823f405090f9f410fc3e3ff7efb52e7b486fa Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 15 Mar 2021 14:08:11 -0700 Subject: [PATCH 783/839] nvme-tcp: fix a NULL deref when receiving a 0-length r2t PDU When the controller sends us a 0-length r2t PDU we should not attempt to try to set up a h2cdata PDU but rather conclude that this is a buggy controller (forward progress is not possible) and simply fail it immediately. Fixes: 3f2304f8c6d6 ("nvme-tcp: add NVMe over TCP host driver") Reported-by: Belanger, Martin Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index c535836e7065b..5b23e1d52cb33 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -568,6 +568,13 @@ static int nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req, req->pdu_len = le32_to_cpu(pdu->r2t_length); req->pdu_sent = 0; + if (unlikely(!req->pdu_len)) { + dev_err(queue->ctrl->ctrl.device, + "req %d r2t len is %u, probably a bug...\n", + rq->tag, req->pdu_len); + return -EPROTO; + } + if (unlikely(req->data_sent + req->pdu_len > req->data_len)) { dev_err(queue->ctrl->ctrl.device, "req %d r2t len %u exceeded data len %u (%zu sent)\n", -- GitLab From bb83337058a7000644cdeffc67361d2473534756 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 15 Mar 2021 13:53:47 -0700 Subject: [PATCH 784/839] nvme-tcp: fix misuse of __smp_processor_id with preemption enabled For our pure advisory use-case, we only rely on this call as a hint, so fix the warning complaints of using the smp_processor_id variants with preemption enabled. Fixes: db5ad6b7f8cd ("nvme-tcp: try to send request in queue_rq context") Fixes: ada831772188 ("nvme-tcp: Fix warning with CONFIG_DEBUG_PREEMPT") Signed-off-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Tested-by: Yi Zhang Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 5b23e1d52cb33..35a78682f8ded 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -287,7 +287,7 @@ static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req, * directly, otherwise queue io_work. Also, only do that if we * are on the same cpu, so we don't introduce contention. */ - if (queue->io_cpu == __smp_processor_id() && + if (queue->io_cpu == raw_smp_processor_id() && sync && empty && mutex_trylock(&queue->send_mutex)) { queue->more_requests = !last; nvme_tcp_send_all(queue); -- GitLab From 72f572428b83d0bc7028e7c4326d1a5f45205e44 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 15 Mar 2021 14:04:26 -0700 Subject: [PATCH 785/839] nvme-tcp: fix possible hang when failing to set io queues We only setup io queues for nvme controllers, and it makes absolutely no sense to allow a controller (re)connect without any I/O queues. If we happen to fail setting the queue count for any reason, we should not allow this to be a successful reconnect as I/O has no chance in going through. Instead just fail and schedule another reconnect. Fixes: 3f2304f8c6d6 ("nvme-tcp: add NVMe over TCP host driver") Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 35a78682f8ded..a0f00cb8f9f3c 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1752,8 +1752,11 @@ static int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl) return ret; ctrl->queue_count = nr_io_queues + 1; - if (ctrl->queue_count < 2) - return 0; + if (ctrl->queue_count < 2) { + dev_err(ctrl->device, + "unable to set any I/O queues\n"); + return -ENOMEM; + } dev_info(ctrl->device, "creating %d I/O queues.\n", nr_io_queues); -- GitLab From c4c6df5fc84659690d4391d1fba155cd94185295 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 15 Mar 2021 14:04:27 -0700 Subject: [PATCH 786/839] nvme-rdma: fix possible hang when failing to set io queues We only setup io queues for nvme controllers, and it makes absolutely no sense to allow a controller (re)connect without any I/O queues. If we happen to fail setting the queue count for any reason, we should not allow this to be a successful reconnect as I/O has no chance in going through. Instead just fail and schedule another reconnect. Reported-by: Chao Leng Fixes: 711023071960 ("nvme-rdma: add a NVMe over Fabrics RDMA host driver") Signed-off-by: Sagi Grimberg Reviewed-by: Chao Leng Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 7855103e05d25..be905d4fdb47f 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -736,8 +736,11 @@ static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl) return ret; ctrl->ctrl.queue_count = nr_io_queues + 1; - if (ctrl->ctrl.queue_count < 2) - return 0; + if (ctrl->ctrl.queue_count < 2) { + dev_err(ctrl->ctrl.device, + "unable to set any I/O queues\n"); + return -ENOMEM; + } dev_info(ctrl->ctrl.device, "creating %d I/O queues.\n", nr_io_queues); -- GitLab From d218a8a3003e84ab136e69a4e30dd4ec7dab2d22 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 15 Mar 2021 15:34:51 -0700 Subject: [PATCH 787/839] nvmet: don't check iosqes,iocqes for discovery controllers From the base spec, Figure 78: "Controller Configuration, these fields are defined as parameters to configure an "I/O Controller (IOC)" and not to configure a "Discovery Controller (DC). ... If the controller does not support I/O queues, then this field shall be read-only with a value of 0h Just perform this check for I/O controllers. Fixes: a07b4970f464 ("nvmet: add a generic NVMe target") Reported-by: Belanger, Martin Signed-off-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/core.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index be6fcdaf51a7b..a027433b8be84 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -1118,9 +1118,20 @@ static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl) { lockdep_assert_held(&ctrl->lock); - if (nvmet_cc_iosqes(ctrl->cc) != NVME_NVM_IOSQES || - nvmet_cc_iocqes(ctrl->cc) != NVME_NVM_IOCQES || - nvmet_cc_mps(ctrl->cc) != 0 || + /* + * Only I/O controllers should verify iosqes,iocqes. + * Strictly speaking, the spec says a discovery controller + * should verify iosqes,iocqes are zeroed, however that + * would break backwards compatibility, so don't enforce it. + */ + if (ctrl->subsys->type != NVME_NQN_DISC && + (nvmet_cc_iosqes(ctrl->cc) != NVME_NVM_IOSQES || + nvmet_cc_iocqes(ctrl->cc) != NVME_NVM_IOCQES)) { + ctrl->csts = NVME_CSTS_CFS; + return; + } + + if (nvmet_cc_mps(ctrl->cc) != 0 || nvmet_cc_ams(ctrl->cc) != 0 || nvmet_cc_css(ctrl->cc) != 0) { ctrl->csts = NVME_CSTS_CFS; -- GitLab From bac04454ef9fada009f0572576837548b190bf94 Mon Sep 17 00:00:00 2001 From: Elad Grupi Date: Tue, 16 Mar 2021 17:44:25 +0200 Subject: [PATCH 788/839] nvmet-tcp: fix kmap leak when data digest in use When data digest is enabled we should unmap pdu iovec before handling the data digest pdu. Signed-off-by: Elad Grupi Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 8b0485ada315b..d658c6e8263af 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -1098,11 +1098,11 @@ static int nvmet_tcp_try_recv_data(struct nvmet_tcp_queue *queue) cmd->rbytes_done += ret; } + nvmet_tcp_unmap_pdu_iovec(cmd); if (queue->data_digest) { nvmet_tcp_prep_recv_ddgst(cmd); return 0; } - nvmet_tcp_unmap_pdu_iovec(cmd); if (!(cmd->flags & NVMET_TCP_F_INIT_FAILED) && cmd->rbytes_done == cmd->req.transfer_len) { -- GitLab From 2b8c956ea6ba896ec18ae36c2684ecfa04c1f479 Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Wed, 17 Mar 2021 23:48:05 -0700 Subject: [PATCH 789/839] usb: typec: tcpm: Skip sink_cap query only when VDM sm is busy When port partner responds "Not supported" to the DiscIdentity command, VDM state machine can remain in NVDM_STATE_ERR_TMOUT and this causes querying sink cap to be skipped indefinitely. Hence check for vdm_sm_running instead of checking for VDM_STATE_DONE. Fixes: 8dc4bd073663f ("usb: typec: tcpm: Add support for Sink Fast Role SWAP(FRS)") Acked-by: Heikki Krogerus Signed-off-by: Badhri Jagan Sridharan Cc: stable Link: https://lore.kernel.org/r/20210318064805.3747831-1-badhri@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 92093ea12cff2..ce7af398c7c1c 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -5173,7 +5173,7 @@ static void tcpm_enable_frs_work(struct kthread_work *work) goto unlock; /* Send when the state machine is idle */ - if (port->state != SNK_READY || port->vdm_state != VDM_STATE_DONE || port->send_discover) + if (port->state != SNK_READY || port->vdm_sm_running || port->send_discover) goto resched; port->upcoming_state = GET_SINK_CAP; -- GitLab From 2cafd46a714af1e55354bc6dcea9dcc13f9475b5 Mon Sep 17 00:00:00 2001 From: Edmundo Carmona Antoranz Date: Tue, 16 Mar 2021 12:17:35 -0600 Subject: [PATCH 790/839] staging: vt665x: fix alignment constraints MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Removing 2 instances of alignment warnings drivers/staging/vt6655/rxtx.h:153:1: warning: alignment 1 of ‘struct vnt_cts’ is less than 2 [-Wpacked-not-aligned] drivers/staging/vt6655/rxtx.h:163:1: warning: alignment 1 of ‘struct vnt_cts_fb’ is less than 2 [-Wpacked-not-aligned] The root cause seems to be that _because_ struct ieee80211_cts is marked as __aligned(2), this requires any encapsulating struct to also have an alignment of 2. Fixes: 2faf12c57efe ("staging: vt665x: fix alignment constraints") Reviewed-by: Arnd Bergmann Signed-off-by: Edmundo Carmona Antoranz Link: https://lore.kernel.org/r/20210316181736.2553318-1-eantoranz@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/vt6655/rxtx.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/staging/vt6655/rxtx.h b/drivers/staging/vt6655/rxtx.h index e7061d3833062..c3c2c15668828 100644 --- a/drivers/staging/vt6655/rxtx.h +++ b/drivers/staging/vt6655/rxtx.h @@ -150,7 +150,7 @@ struct vnt_cts { u16 reserved; struct ieee80211_cts data; u16 reserved2; -} __packed; +} __packed __aligned(2); struct vnt_cts_fb { struct vnt_phy_field b; @@ -160,7 +160,7 @@ struct vnt_cts_fb { __le16 cts_duration_ba_f1; struct ieee80211_cts data; u16 reserved2; -} __packed; +} __packed __aligned(2); struct vnt_tx_fifo_head { u8 tx_key[WLAN_KEY_LEN_CCMP]; -- GitLab From 072a03e0a0b1bc22eb5970727877264657c61fd3 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 17 Mar 2021 10:10:35 +0100 Subject: [PATCH 791/839] iommu/amd: Move Stoney Ridge check to detect_ivrs() The AMD IOMMU will not be enabled on AMD Stoney Ridge systems. Bail out even earlier and refuse to even detect the IOMMU there. References: https://bugzilla.kernel.org/show_bug.cgi?id=212133 References: https://bugzilla.suse.com/show_bug.cgi?id=1183132 Cc: stable@vger.kernel.org # v5.11 Signed-off-by: Joerg Roedel Link: https://lore.kernel.org/r/20210317091037.31374-2-joro@8bytes.org Acked-by: Huang Rui --- drivers/iommu/amd/init.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 9126efcbaf2c7..3280e6f5b720e 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -2714,7 +2714,6 @@ static int __init early_amd_iommu_init(void) struct acpi_table_header *ivrs_base; int i, remap_cache_sz, ret; acpi_status status; - u32 pci_id; if (!amd_iommu_detected) return -ENODEV; @@ -2804,16 +2803,6 @@ static int __init early_amd_iommu_init(void) if (ret) goto out; - /* Disable IOMMU if there's Stoney Ridge graphics */ - for (i = 0; i < 32; i++) { - pci_id = read_pci_config(0, i, 0, 0); - if ((pci_id & 0xffff) == 0x1002 && (pci_id >> 16) == 0x98e4) { - pr_info("Disable IOMMU on Stoney Ridge\n"); - amd_iommu_disabled = true; - break; - } - } - /* Disable any previously enabled IOMMUs */ if (!is_kdump_kernel() || amd_iommu_disabled) disable_iommus(); @@ -2880,6 +2869,7 @@ static bool detect_ivrs(void) { struct acpi_table_header *ivrs_base; acpi_status status; + int i; status = acpi_get_table("IVRS", 0, &ivrs_base); if (status == AE_NOT_FOUND) @@ -2892,6 +2882,17 @@ static bool detect_ivrs(void) acpi_put_table(ivrs_base); + /* Don't use IOMMU if there is Stoney Ridge graphics */ + for (i = 0; i < 32; i++) { + u32 pci_id; + + pci_id = read_pci_config(0, i, 0, 0); + if ((pci_id & 0xffff) == 0x1002 && (pci_id >> 16) == 0x98e4) { + pr_info("Disable IOMMU on Stoney Ridge\n"); + return false; + } + } + /* Make sure ACS will be enabled during PCI probe */ pci_request_acs(); -- GitLab From 9f81ca8d1fd68f5697c201f26632ed622e9e462f Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 17 Mar 2021 10:10:36 +0100 Subject: [PATCH 792/839] iommu/amd: Don't call early_amd_iommu_init() when AMD IOMMU is disabled Don't even try to initialize the AMD IOMMU hardware when amd_iommu=off has been passed on the kernel command line. References: https://bugzilla.kernel.org/show_bug.cgi?id=212133 References: https://bugzilla.suse.com/show_bug.cgi?id=1183132 Cc: stable@vger.kernel.org # v5.11 Signed-off-by: Joerg Roedel Link: https://lore.kernel.org/r/20210317091037.31374-3-joro@8bytes.org Acked-by: Huang Rui --- drivers/iommu/amd/init.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 3280e6f5b720e..61dae1800b7fa 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -2919,12 +2919,12 @@ static int __init state_next(void) } break; case IOMMU_IVRS_DETECTED: - ret = early_amd_iommu_init(); - init_state = ret ? IOMMU_INIT_ERROR : IOMMU_ACPI_FINISHED; - if (init_state == IOMMU_ACPI_FINISHED && amd_iommu_disabled) { - pr_info("AMD IOMMU disabled\n"); + if (amd_iommu_disabled) { init_state = IOMMU_CMDLINE_DISABLED; ret = -EINVAL; + } else { + ret = early_amd_iommu_init(); + init_state = ret ? IOMMU_INIT_ERROR : IOMMU_ACPI_FINISHED; } break; case IOMMU_ACPI_FINISHED: -- GitLab From 4b8ef157ca832f812b3302b1800548bd92c207de Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 17 Mar 2021 10:10:37 +0100 Subject: [PATCH 793/839] iommu/amd: Keep track of amd_iommu_irq_remap state The amd_iommu_irq_remap variable is set to true in amd_iommu_prepare(). But if initialization fails it is not set to false. Fix that and correctly keep track of whether irq remapping is enabled or not. References: https://bugzilla.kernel.org/show_bug.cgi?id=212133 References: https://bugzilla.suse.com/show_bug.cgi?id=1183132 Fixes: b34f10c2dc59 ("iommu/amd: Stop irq_remapping_select() matching when remapping is disabled") Cc: stable@vger.kernel.org # v5.11 Signed-off-by: Joerg Roedel Link: https://lore.kernel.org/r/20210317091037.31374-4-joro@8bytes.org Acked-by: Huang Rui --- drivers/iommu/amd/init.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 61dae1800b7fa..321f5906e6ed3 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -3002,8 +3002,11 @@ int __init amd_iommu_prepare(void) amd_iommu_irq_remap = true; ret = iommu_go_to_state(IOMMU_ACPI_FINISHED); - if (ret) + if (ret) { + amd_iommu_irq_remap = false; return ret; + } + return amd_iommu_irq_remap ? 0 : -ENODEV; } -- GitLab From 8dfd0fa6ecdc5e2099a57d485b7ce237abc6c7a0 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Fri, 12 Mar 2021 18:54:39 +0300 Subject: [PATCH 794/839] iommu/tegra-smmu: Make tegra_smmu_probe_device() to handle all IOMMU phandles The tegra_smmu_probe_device() handles only the first IOMMU device-tree phandle, skipping the rest. Devices like 3D module on Tegra30 have multiple IOMMU phandles, one for each h/w block, and thus, only one IOMMU phandle is added to fwspec for the 3D module, breaking GPU. Previously this problem was masked by tegra_smmu_attach_dev() which didn't use the fwspec, but parsed the DT by itself. The previous commit to tegra-smmu driver partially reverted changes that caused problems for T124 and now we have tegra_smmu_attach_dev() that uses the fwspec and the old-buggy variant of tegra_smmu_probe_device() which skips secondary IOMMUs. Make tegra_smmu_probe_device() not to skip the secondary IOMMUs. This fixes a partially attached IOMMU of the 3D module on Tegra30 and now GPU works properly once again. Fixes: 765a9d1d02b2 ("iommu/tegra-smmu: Fix mc errors on tegra124-nyan") Signed-off-by: Dmitry Osipenko Tested-by: Nicolin Chen Link: https://lore.kernel.org/r/20210312155439.18477-1-digetx@gmail.com Signed-off-by: Joerg Roedel --- drivers/iommu/tegra-smmu.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 97eb62f667d22..602aab98c0794 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -849,12 +849,11 @@ static struct iommu_device *tegra_smmu_probe_device(struct device *dev) smmu = tegra_smmu_find(args.np); if (smmu) { err = tegra_smmu_configure(smmu, dev, &args); - of_node_put(args.np); - if (err < 0) + if (err < 0) { + of_node_put(args.np); return ERR_PTR(err); - - break; + } } of_node_put(args.np); -- GitLab From cc9cfddb0433961107bb156fa769fdd7eb6718de Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 16 Mar 2021 15:37:35 +0100 Subject: [PATCH 795/839] KVM: x86: hyper-v: Track Hyper-V TSC page status Create an infrastructure for tracking Hyper-V TSC page status, i.e. if it was updated from guest/host side or if we've failed to set it up (because e.g. guest wrote some garbage to HV_X64_MSR_REFERENCE_TSC) and there's no need to retry. Also, in a hypothetical situation when we are in 'always catchup' mode for TSC we can now avoid contending 'hv->hv_lock' on every guest enter by setting the state to HV_TSC_PAGE_BROKEN after compute_tsc_page_parameters() returns false. Check for HV_TSC_PAGE_SET state instead of '!hv->tsc_ref.tsc_sequence' in get_time_ref_counter() to properly handle the situation when we failed to write the updated TSC page values to the guest. Signed-off-by: Vitaly Kuznetsov Message-Id: <20210316143736.964151-4-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 17 ++++++++++++ arch/x86/kvm/hyperv.c | 49 +++++++++++++++++++++++---------- 2 files changed, 52 insertions(+), 14 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 9bc091ecaaeb9..e1b6e2edc8282 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -884,12 +884,29 @@ struct kvm_hv_syndbg { u64 options; }; +/* Current state of Hyper-V TSC page clocksource */ +enum hv_tsc_page_status { + /* TSC page was not set up or disabled */ + HV_TSC_PAGE_UNSET = 0, + /* TSC page MSR was written by the guest, update pending */ + HV_TSC_PAGE_GUEST_CHANGED, + /* TSC page MSR was written by KVM userspace, update pending */ + HV_TSC_PAGE_HOST_CHANGED, + /* TSC page was properly set up and is currently active */ + HV_TSC_PAGE_SET, + /* TSC page is currently being updated and therefore is inactive */ + HV_TSC_PAGE_UPDATING, + /* TSC page was set up with an inaccessible GPA */ + HV_TSC_PAGE_BROKEN, +}; + /* Hyper-V emulation context */ struct kvm_hv { struct mutex hv_lock; u64 hv_guest_os_id; u64 hv_hypercall; u64 hv_tsc_page; + enum hv_tsc_page_status hv_tsc_page_status; /* Hyper-v based guest crash (NT kernel bugcheck) parameters */ u64 hv_crash_param[HV_X64_MSR_CRASH_PARAMS]; diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index a0e3c49233d4c..5c0f10a2b3abe 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -520,10 +520,10 @@ static u64 get_time_ref_counter(struct kvm *kvm) u64 tsc; /* - * The guest has not set up the TSC page or the clock isn't - * stable, fall back to get_kvmclock_ns. + * Fall back to get_kvmclock_ns() when TSC page hasn't been set up, + * is broken, disabled or being updated. */ - if (!hv->tsc_ref.tsc_sequence) + if (hv->hv_tsc_page_status != HV_TSC_PAGE_SET) return div_u64(get_kvmclock_ns(kvm), 100); vcpu = kvm_get_vcpu(kvm, 0); @@ -1087,7 +1087,8 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm, BUILD_BUG_ON(sizeof(tsc_seq) != sizeof(hv->tsc_ref.tsc_sequence)); BUILD_BUG_ON(offsetof(struct ms_hyperv_tsc_page, tsc_sequence) != 0); - if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)) + if (hv->hv_tsc_page_status == HV_TSC_PAGE_BROKEN || + hv->hv_tsc_page_status == HV_TSC_PAGE_UNSET) return; mutex_lock(&hv->hv_lock); @@ -1101,7 +1102,7 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm, */ if (unlikely(kvm_read_guest(kvm, gfn_to_gpa(gfn), &tsc_seq, sizeof(tsc_seq)))) - goto out_unlock; + goto out_err; /* * While we're computing and writing the parameters, force the @@ -1110,15 +1111,15 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm, hv->tsc_ref.tsc_sequence = 0; if (kvm_write_guest(kvm, gfn_to_gpa(gfn), &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence))) - goto out_unlock; + goto out_err; if (!compute_tsc_page_parameters(hv_clock, &hv->tsc_ref)) - goto out_unlock; + goto out_err; /* Ensure sequence is zero before writing the rest of the struct. */ smp_wmb(); if (kvm_write_guest(kvm, gfn_to_gpa(gfn), &hv->tsc_ref, sizeof(hv->tsc_ref))) - goto out_unlock; + goto out_err; /* * Now switch to the TSC page mechanism by writing the sequence. @@ -1131,8 +1132,15 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm, smp_wmb(); hv->tsc_ref.tsc_sequence = tsc_seq; - kvm_write_guest(kvm, gfn_to_gpa(gfn), - &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence)); + if (kvm_write_guest(kvm, gfn_to_gpa(gfn), + &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence))) + goto out_err; + + hv->hv_tsc_page_status = HV_TSC_PAGE_SET; + goto out_unlock; + +out_err: + hv->hv_tsc_page_status = HV_TSC_PAGE_BROKEN; out_unlock: mutex_unlock(&hv->hv_lock); } @@ -1142,7 +1150,8 @@ void kvm_hv_invalidate_tsc_page(struct kvm *kvm) struct kvm_hv *hv = to_kvm_hv(kvm); u64 gfn; - if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)) + if (hv->hv_tsc_page_status == HV_TSC_PAGE_BROKEN || + hv->hv_tsc_page_status == HV_TSC_PAGE_UNSET) return; mutex_lock(&hv->hv_lock); @@ -1150,11 +1159,16 @@ void kvm_hv_invalidate_tsc_page(struct kvm *kvm) if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)) goto out_unlock; + /* Preserve HV_TSC_PAGE_GUEST_CHANGED/HV_TSC_PAGE_HOST_CHANGED states */ + if (hv->hv_tsc_page_status == HV_TSC_PAGE_SET) + hv->hv_tsc_page_status = HV_TSC_PAGE_UPDATING; + gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; hv->tsc_ref.tsc_sequence = 0; - kvm_write_guest(kvm, gfn_to_gpa(gfn), - &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence)); + if (kvm_write_guest(kvm, gfn_to_gpa(gfn), + &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence))) + hv->hv_tsc_page_status = HV_TSC_PAGE_BROKEN; out_unlock: mutex_unlock(&hv->hv_lock); @@ -1216,8 +1230,15 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data, } case HV_X64_MSR_REFERENCE_TSC: hv->hv_tsc_page = data; - if (hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) + if (hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) { + if (!host) + hv->hv_tsc_page_status = HV_TSC_PAGE_GUEST_CHANGED; + else + hv->hv_tsc_page_status = HV_TSC_PAGE_HOST_CHANGED; kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu); + } else { + hv->hv_tsc_page_status = HV_TSC_PAGE_UNSET; + } break; case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: return kvm_hv_msr_set_crash_data(kvm, -- GitLab From 0469f2f7ab4c6a6cae4b74c4f981c4da6d909411 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 16 Mar 2021 15:37:36 +0100 Subject: [PATCH 796/839] KVM: x86: hyper-v: Don't touch TSC page values when guest opted for re-enlightenment When guest opts for re-enlightenment notifications upon migration, it is in its right to assume that TSC page values never change (as they're only supposed to change upon migration and the host has to keep things as they are before it receives confirmation from the guest). This is mostly true until the guest is migrated somewhere. KVM userspace (e.g. QEMU) will trigger masterclock update by writing to HV_X64_MSR_REFERENCE_TSC, by calling KVM_SET_CLOCK,... and as TSC value and kvmclock reading drift apart (even slightly), the update causes TSC page values to change. The issue at hand is that when Hyper-V is migrated, it uses stale (cached) TSC page values to compute the difference between its own clocksource (provided by KVM) and its guests' TSC pages to program synthetic timers and in some cases, when TSC page is updated, this puts all stimer expirations in the past. This, in its turn, causes an interrupt storm and L2 guests not making much forward progress. Note, KVM doesn't fully implement re-enlightenment notification. Basically, the support for reenlightenment MSRs is just a stub and userspace is only expected to expose the feature when TSC scaling on the expected destination hosts is available. With TSC scaling, no real re-enlightenment is needed as TSC frequency doesn't change. With TSC scaling becoming ubiquitous, it likely makes little sense to fully implement re-enlightenment in KVM. Prevent TSC page from being updated after migration. In case it's not the guest who's initiating the change and when TSC page is already enabled, just keep it as it is: TSC value is supposed to be preserved across migration and TSC frequency can't change with re-enlightenment enabled. The guest is doomed anyway if any of this is not true. Signed-off-by: Vitaly Kuznetsov Message-Id: <20210316143736.964151-5-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/hyperv.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 5c0f10a2b3abe..f98370a399361 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1077,6 +1077,21 @@ static bool compute_tsc_page_parameters(struct pvclock_vcpu_time_info *hv_clock, return true; } +/* + * Don't touch TSC page values if the guest has opted for TSC emulation after + * migration. KVM doesn't fully support reenlightenment notifications and TSC + * access emulation and Hyper-V is known to expect the values in TSC page to + * stay constant before TSC access emulation is disabled from guest side + * (HV_X64_MSR_TSC_EMULATION_STATUS). KVM userspace is expected to preserve TSC + * frequency and guest visible TSC value across migration (and prevent it when + * TSC scaling is unsupported). + */ +static inline bool tsc_page_update_unsafe(struct kvm_hv *hv) +{ + return (hv->hv_tsc_page_status != HV_TSC_PAGE_GUEST_CHANGED) && + hv->hv_tsc_emulation_control; +} + void kvm_hv_setup_tsc_page(struct kvm *kvm, struct pvclock_vcpu_time_info *hv_clock) { @@ -1104,6 +1119,14 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm, &tsc_seq, sizeof(tsc_seq)))) goto out_err; + if (tsc_seq && tsc_page_update_unsafe(hv)) { + if (kvm_read_guest(kvm, gfn_to_gpa(gfn), &hv->tsc_ref, sizeof(hv->tsc_ref))) + goto out_err; + + hv->hv_tsc_page_status = HV_TSC_PAGE_SET; + goto out_unlock; + } + /* * While we're computing and writing the parameters, force the * guest to use the time reference count MSR. @@ -1151,7 +1174,8 @@ void kvm_hv_invalidate_tsc_page(struct kvm *kvm) u64 gfn; if (hv->hv_tsc_page_status == HV_TSC_PAGE_BROKEN || - hv->hv_tsc_page_status == HV_TSC_PAGE_UNSET) + hv->hv_tsc_page_status == HV_TSC_PAGE_UNSET || + tsc_page_update_unsafe(hv)) return; mutex_lock(&hv->hv_lock); -- GitLab From 50b1affc891cbc103a2334ce909a026e25f4c84d Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 18 Mar 2021 13:20:08 +0000 Subject: [PATCH 797/839] ALSA: usb-audio: Fix unintentional sign extension issue The shifting of the u8 integer device by 24 bits to the left will be promoted to a 32 bit signed int and then sign-extended to a 64 bit unsigned long. In the event that the top bit of device is set then all then all the upper 32 bits of the unsigned long will end up as also being set because of the sign-extension. Fix this by casting device to an unsigned long before the shift. Addresses-Coverity: ("Unintended sign extension") Fixes: a07df82c7990 ("ALSA: usb-audio: Add DJM750 to Pioneer mixer quirk") Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20210318132008.15266-1-colin.king@canonical.com Signed-off-by: Takashi Iwai --- sound/usb/mixer_quirks.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c index 08873d2afe4d6..ffd922327ae4a 100644 --- a/sound/usb/mixer_quirks.c +++ b/sound/usb/mixer_quirks.c @@ -2883,7 +2883,7 @@ static int snd_djm_controls_put(struct snd_kcontrol *kctl, struct snd_ctl_elem_v u8 group = (private_value & SND_DJM_GROUP_MASK) >> SND_DJM_GROUP_SHIFT; u16 value = elem->value.enumerated.item[0]; - kctl->private_value = ((device << SND_DJM_DEVICE_SHIFT) | + kctl->private_value = (((unsigned long)device << SND_DJM_DEVICE_SHIFT) | (group << SND_DJM_GROUP_SHIFT) | value); @@ -2921,7 +2921,7 @@ static int snd_djm_controls_create(struct usb_mixer_interface *mixer, value = device->controls[i].default_value; knew.name = device->controls[i].name; knew.private_value = ( - (device_idx << SND_DJM_DEVICE_SHIFT) | + ((unsigned long)device_idx << SND_DJM_DEVICE_SHIFT) | (i << SND_DJM_GROUP_SHIFT) | value); err = snd_djm_controls_update(mixer, device_idx, i, value); -- GitLab From 2c7f76b4c42bd5d953bc821e151644434865f999 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 18 Mar 2021 15:09:49 +0100 Subject: [PATCH 798/839] selftests: kvm: Add basic Hyper-V clocksources tests Introduce a new selftest for Hyper-V clocksources (MSR-based reference TSC and TSC page). As a starting point, test the following: 1) Reference TSC is 1Ghz clock. 2) Reference TSC and TSC page give the same reading. 3) TSC page gets updated upon KVM_SET_CLOCK call. 4) TSC page does not get updated when guest opted for reenlightenment. 5) Disabled TSC page doesn't get updated. Signed-off-by: Vitaly Kuznetsov Message-Id: <20210318140949.1065740-1-vkuznets@redhat.com> [Add a host-side test using TSC + KVM_GET_MSR too. - Paolo] Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/.gitignore | 1 + tools/testing/selftests/kvm/Makefile | 1 + .../selftests/kvm/x86_64/hyperv_clock.c | 260 ++++++++++++++++++ 3 files changed, 262 insertions(+) create mode 100644 tools/testing/selftests/kvm/x86_64/hyperv_clock.c diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 32b87cc77c8e0..22be05c55f136 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -9,6 +9,7 @@ /x86_64/evmcs_test /x86_64/get_cpuid_test /x86_64/kvm_pv_test +/x86_64/hyperv_clock /x86_64/hyperv_cpuid /x86_64/mmio_warning_test /x86_64/platform_info_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index a6d61f451f884..c3672e9087d3f 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -41,6 +41,7 @@ LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_ha TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test TEST_GEN_PROGS_x86_64 += x86_64/get_cpuid_test +TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c new file mode 100644 index 0000000000000..ffbc4555c6e28 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c @@ -0,0 +1,260 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2021, Red Hat, Inc. + * + * Tests for Hyper-V clocksources + */ +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" + +struct ms_hyperv_tsc_page { + volatile u32 tsc_sequence; + u32 reserved1; + volatile u64 tsc_scale; + volatile s64 tsc_offset; +} __packed; + +#define HV_X64_MSR_GUEST_OS_ID 0x40000000 +#define HV_X64_MSR_TIME_REF_COUNT 0x40000020 +#define HV_X64_MSR_REFERENCE_TSC 0x40000021 +#define HV_X64_MSR_TSC_FREQUENCY 0x40000022 +#define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106 +#define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107 + +/* Simplified mul_u64_u64_shr() */ +static inline u64 mul_u64_u64_shr64(u64 a, u64 b) +{ + union { + u64 ll; + struct { + u32 low, high; + } l; + } rm, rn, rh, a0, b0; + u64 c; + + a0.ll = a; + b0.ll = b; + + rm.ll = (u64)a0.l.low * b0.l.high; + rn.ll = (u64)a0.l.high * b0.l.low; + rh.ll = (u64)a0.l.high * b0.l.high; + + rh.l.low = c = rm.l.high + rn.l.high + rh.l.low; + rh.l.high = (c >> 32) + rh.l.high; + + return rh.ll; +} + +static inline void nop_loop(void) +{ + int i; + + for (i = 0; i < 1000000; i++) + asm volatile("nop"); +} + +static inline void check_tsc_msr_rdtsc(void) +{ + u64 tsc_freq, r1, r2, t1, t2; + s64 delta_ns; + + tsc_freq = rdmsr(HV_X64_MSR_TSC_FREQUENCY); + GUEST_ASSERT(tsc_freq > 0); + + /* First, check MSR-based clocksource */ + r1 = rdtsc(); + t1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT); + nop_loop(); + r2 = rdtsc(); + t2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT); + + GUEST_ASSERT(r2 > r1 && t2 > t1); + + /* HV_X64_MSR_TIME_REF_COUNT is in 100ns */ + delta_ns = ((t2 - t1) * 100) - ((r2 - r1) * 1000000000 / tsc_freq); + if (delta_ns < 0) + delta_ns = -delta_ns; + + /* 1% tolerance */ + GUEST_ASSERT(delta_ns * 100 < (t2 - t1) * 100); +} + +static inline void check_tsc_msr_tsc_page(struct ms_hyperv_tsc_page *tsc_page) +{ + u64 r1, r2, t1, t2; + + /* Compare TSC page clocksource with HV_X64_MSR_TIME_REF_COUNT */ + t1 = mul_u64_u64_shr64(rdtsc(), tsc_page->tsc_scale) + tsc_page->tsc_offset; + r1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT); + + /* 10 ms tolerance */ + GUEST_ASSERT(r1 >= t1 && r1 - t1 < 100000); + nop_loop(); + + t2 = mul_u64_u64_shr64(rdtsc(), tsc_page->tsc_scale) + tsc_page->tsc_offset; + r2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT); + GUEST_ASSERT(r2 >= t1 && r2 - t2 < 100000); +} + +static void guest_main(struct ms_hyperv_tsc_page *tsc_page, vm_paddr_t tsc_page_gpa) +{ + u64 tsc_scale, tsc_offset; + + /* Set Guest OS id to enable Hyper-V emulation */ + GUEST_SYNC(1); + wrmsr(HV_X64_MSR_GUEST_OS_ID, (u64)0x8100 << 48); + GUEST_SYNC(2); + + check_tsc_msr_rdtsc(); + + GUEST_SYNC(3); + + /* Set up TSC page is disabled state, check that it's clean */ + wrmsr(HV_X64_MSR_REFERENCE_TSC, tsc_page_gpa); + GUEST_ASSERT(tsc_page->tsc_sequence == 0); + GUEST_ASSERT(tsc_page->tsc_scale == 0); + GUEST_ASSERT(tsc_page->tsc_offset == 0); + + GUEST_SYNC(4); + + /* Set up TSC page is enabled state */ + wrmsr(HV_X64_MSR_REFERENCE_TSC, tsc_page_gpa | 0x1); + GUEST_ASSERT(tsc_page->tsc_sequence != 0); + + GUEST_SYNC(5); + + check_tsc_msr_tsc_page(tsc_page); + + GUEST_SYNC(6); + + tsc_offset = tsc_page->tsc_offset; + /* Call KVM_SET_CLOCK from userspace, check that TSC page was updated */ + GUEST_SYNC(7); + GUEST_ASSERT(tsc_page->tsc_offset != tsc_offset); + + nop_loop(); + + /* + * Enable Re-enlightenment and check that TSC page stays constant across + * KVM_SET_CLOCK. + */ + wrmsr(HV_X64_MSR_REENLIGHTENMENT_CONTROL, 0x1 << 16 | 0xff); + wrmsr(HV_X64_MSR_TSC_EMULATION_CONTROL, 0x1); + tsc_offset = tsc_page->tsc_offset; + tsc_scale = tsc_page->tsc_scale; + GUEST_SYNC(8); + GUEST_ASSERT(tsc_page->tsc_offset == tsc_offset); + GUEST_ASSERT(tsc_page->tsc_scale == tsc_scale); + + GUEST_SYNC(9); + + check_tsc_msr_tsc_page(tsc_page); + + /* + * Disable re-enlightenment and TSC page, check that KVM doesn't update + * it anymore. + */ + wrmsr(HV_X64_MSR_REENLIGHTENMENT_CONTROL, 0); + wrmsr(HV_X64_MSR_TSC_EMULATION_CONTROL, 0); + wrmsr(HV_X64_MSR_REFERENCE_TSC, 0); + memset(tsc_page, 0, sizeof(*tsc_page)); + + GUEST_SYNC(10); + GUEST_ASSERT(tsc_page->tsc_sequence == 0); + GUEST_ASSERT(tsc_page->tsc_offset == 0); + GUEST_ASSERT(tsc_page->tsc_scale == 0); + + GUEST_DONE(); +} + +#define VCPU_ID 0 + +static void host_check_tsc_msr_rdtsc(struct kvm_vm *vm) +{ + u64 tsc_freq, r1, r2, t1, t2; + s64 delta_ns; + + tsc_freq = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TSC_FREQUENCY); + TEST_ASSERT(tsc_freq > 0, "TSC frequency must be nonzero"); + + /* First, check MSR-based clocksource */ + r1 = rdtsc(); + t1 = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TIME_REF_COUNT); + nop_loop(); + r2 = rdtsc(); + t2 = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TIME_REF_COUNT); + + TEST_ASSERT(t2 > t1, "Time reference MSR is not monotonic (%ld <= %ld)", t1, t2); + + /* HV_X64_MSR_TIME_REF_COUNT is in 100ns */ + delta_ns = ((t2 - t1) * 100) - ((r2 - r1) * 1000000000 / tsc_freq); + if (delta_ns < 0) + delta_ns = -delta_ns; + + /* 1% tolerance */ + TEST_ASSERT(delta_ns * 100 < (t2 - t1) * 100, + "Elapsed time does not match (MSR=%ld, TSC=%ld)", + (t2 - t1) * 100, (r2 - r1) * 1000000000 / tsc_freq); +} + +int main(void) +{ + struct kvm_vm *vm; + struct kvm_run *run; + struct ucall uc; + vm_vaddr_t tsc_page_gva; + int stage; + + vm = vm_create_default(VCPU_ID, 0, guest_main); + run = vcpu_state(vm, VCPU_ID); + + vcpu_set_hv_cpuid(vm, VCPU_ID); + + tsc_page_gva = vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0); + memset(addr_gpa2hva(vm, tsc_page_gva), 0x0, getpagesize()); + TEST_ASSERT((addr_gva2gpa(vm, tsc_page_gva) & (getpagesize() - 1)) == 0, + "TSC page has to be page aligned\n"); + vcpu_args_set(vm, VCPU_ID, 2, tsc_page_gva, addr_gva2gpa(vm, tsc_page_gva)); + + host_check_tsc_msr_rdtsc(vm); + + for (stage = 1;; stage++) { + _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Stage %d: unexpected exit reason: %u (%s),\n", + stage, run->exit_reason, + exit_reason_str(run->exit_reason)); + + switch (get_ucall(vm, VCPU_ID, &uc)) { + case UCALL_ABORT: + TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], + __FILE__, uc.args[1]); + /* NOT REACHED */ + case UCALL_SYNC: + break; + case UCALL_DONE: + /* Keep in sync with guest_main() */ + TEST_ASSERT(stage == 11, "Testing ended prematurely, stage %d\n", + stage); + goto out; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + + TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") && + uc.args[1] == stage, + "Stage %d: Unexpected register values vmexit, got %lx", + stage, (ulong)uc.args[1]); + + /* Reset kvmclock triggering TSC page update */ + if (stage == 7 || stage == 8 || stage == 10) { + struct kvm_clock_data clock = {0}; + + vm_ioctl(vm, KVM_SET_CLOCK, &clock); + } + } + +out: + kvm_vm_free(vm); +} -- GitLab From 76cd979f4f38a27df22efb5773a0d567181a9392 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 16 Mar 2021 16:33:27 +0100 Subject: [PATCH 799/839] io_uring: imply MSG_NOSIGNAL for send[msg]()/recv[msg]() calls We never want to generate any SIGPIPE, -EPIPE only is much better. Signed-off-by: Stefan Metzmacher Link: https://lore.kernel.org/r/38961085c3ec49fd21550c7788f214d1ff02d2d4.1615908477.git.metze@samba.org Signed-off-by: Jens Axboe --- fs/io_uring.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 5f4e312111ea7..a81f7a30ea709 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -4384,7 +4384,7 @@ static int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) kmsg = &iomsg; } - flags = req->sr_msg.msg_flags; + flags = req->sr_msg.msg_flags | MSG_NOSIGNAL; if (flags & MSG_DONTWAIT) req->flags |= REQ_F_NOWAIT; else if (issue_flags & IO_URING_F_NONBLOCK) @@ -4428,7 +4428,7 @@ static int io_send(struct io_kiocb *req, unsigned int issue_flags) msg.msg_controllen = 0; msg.msg_namelen = 0; - flags = req->sr_msg.msg_flags; + flags = req->sr_msg.msg_flags | MSG_NOSIGNAL; if (flags & MSG_DONTWAIT) req->flags |= REQ_F_NOWAIT; else if (issue_flags & IO_URING_F_NONBLOCK) @@ -4618,7 +4618,7 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) 1, req->sr_msg.len); } - flags = req->sr_msg.msg_flags; + flags = req->sr_msg.msg_flags | MSG_NOSIGNAL; if (flags & MSG_DONTWAIT) req->flags |= REQ_F_NOWAIT; else if (force_nonblock) @@ -4677,7 +4677,7 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags) msg.msg_iocb = NULL; msg.msg_flags = 0; - flags = req->sr_msg.msg_flags; + flags = req->sr_msg.msg_flags | MSG_NOSIGNAL; if (flags & MSG_DONTWAIT) req->flags |= REQ_F_NOWAIT; else if (force_nonblock) -- GitLab From 53e043b2b432ef2294efec04dd8a88d96c024624 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 15 Mar 2021 12:56:56 +0100 Subject: [PATCH 800/839] io_uring: remove structures from include/linux/io_uring.h Link: https://lore.kernel.org/r/8c1d14f3748105f4caeda01716d47af2fa41d11c.1615809009.git.metze@samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Jens Axboe --- fs/io-wq.h | 10 +++++++++- fs/io_uring.c | 16 ++++++++++++++++ include/linux/io_uring.h | 25 ------------------------- 3 files changed, 25 insertions(+), 26 deletions(-) diff --git a/fs/io-wq.h b/fs/io-wq.h index 1ac2f3248088e..80d590564ff93 100644 --- a/fs/io-wq.h +++ b/fs/io-wq.h @@ -2,7 +2,6 @@ #define INTERNAL_IO_WQ_H #include -#include struct io_wq; @@ -21,6 +20,15 @@ enum io_wq_cancel { IO_WQ_CANCEL_NOTFOUND, /* work not found */ }; +struct io_wq_work_node { + struct io_wq_work_node *next; +}; + +struct io_wq_work_list { + struct io_wq_work_node *first; + struct io_wq_work_node *last; +}; + static inline void wq_list_add_after(struct io_wq_work_node *node, struct io_wq_work_node *pos, struct io_wq_work_list *list) diff --git a/fs/io_uring.c b/fs/io_uring.c index a81f7a30ea709..52ba8d7f3eb86 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -456,6 +456,22 @@ struct io_ring_ctx { struct list_head tctx_list; }; +struct io_uring_task { + /* submission side */ + struct xarray xa; + struct wait_queue_head wait; + void *last; + void *io_wq; + struct percpu_counter inflight; + atomic_t in_idle; + bool sqpoll; + + spinlock_t task_lock; + struct io_wq_work_list task_list; + unsigned long task_state; + struct callback_head task_work; +}; + /* * First field must be the file pointer in all the * iocb unions! See also 'struct kiocb' in diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index 9761a0ec9f95c..79cde9906be04 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -5,31 +5,6 @@ #include #include -struct io_wq_work_node { - struct io_wq_work_node *next; -}; - -struct io_wq_work_list { - struct io_wq_work_node *first; - struct io_wq_work_node *last; -}; - -struct io_uring_task { - /* submission side */ - struct xarray xa; - struct wait_queue_head wait; - void *last; - void *io_wq; - struct percpu_counter inflight; - atomic_t in_idle; - bool sqpoll; - - spinlock_t task_lock; - struct io_wq_work_list task_list; - unsigned long task_state; - struct callback_head task_work; -}; - #if defined(CONFIG_IO_URING) struct sock *io_uring_get_socket(struct file *file); void __io_uring_task_cancel(void); -- GitLab From ee53fb2b197b72b126ca0387ae636da75d969428 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 15 Mar 2021 12:56:57 +0100 Subject: [PATCH 801/839] io_uring: use typesafe pointers in io_uring_task Signed-off-by: Stefan Metzmacher Link: https://lore.kernel.org/r/ce2a598e66e48347bb04afbaf2acc67c0cc7971a.1615809009.git.metze@samba.org Signed-off-by: Jens Axboe --- fs/io_uring.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 52ba8d7f3eb86..6750739667602 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -460,8 +460,8 @@ struct io_uring_task { /* submission side */ struct xarray xa; struct wait_queue_head wait; - void *last; - void *io_wq; + const struct io_ring_ctx *last; + struct io_wq *io_wq; struct percpu_counter inflight; atomic_t in_idle; bool sqpoll; -- GitLab From de75a3d3f5a14c9ab3c4883de3471d3c92a8ee78 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 18 Mar 2021 11:54:35 +0000 Subject: [PATCH 802/839] io_uring: don't leak creds on SQO attach error Attaching to already dead/dying SQPOLL task is disallowed in io_sq_offload_create(), but cleanup is hand coded by calling io_put_sq_data()/etc., that miss to put ctx->sq_creds. Defer everything to error-path io_sq_thread_finish(), adding ctx->sqd_list in the error case as well as finish will handle it. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 6750739667602..ea2d3e120555e 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -7910,22 +7910,17 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx, ret = 0; io_sq_thread_park(sqd); + list_add(&ctx->sqd_list, &sqd->ctx_list); + io_sqd_update_thread_idle(sqd); /* don't attach to a dying SQPOLL thread, would be racy */ - if (attached && !sqd->thread) { + if (attached && !sqd->thread) ret = -ENXIO; - } else { - list_add(&ctx->sqd_list, &sqd->ctx_list); - io_sqd_update_thread_idle(sqd); - } io_sq_thread_unpark(sqd); - if (ret < 0) { - io_put_sq_data(sqd); - ctx->sq_data = NULL; - return ret; - } else if (attached) { + if (ret < 0) + goto err; + if (attached) return 0; - } if (p->flags & IORING_SETUP_SQ_AFF) { int cpu = p->sq_thread_cpu; -- GitLab From 9d3fcb28f9b9750b474811a2964ce022df56336e Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 16 Mar 2021 22:17:48 -0400 Subject: [PATCH 803/839] Revert "PM: ACPI: reboot: Use S5 for reboot" This reverts commit d60cd06331a3566d3305b3c7b566e79edf4e2095. This patch causes a panic when rebooting my Dell Poweredge r440. I do not have the full panic log as it's lost at that stage of the reboot and I do not have a serial console. Reverting this patch makes my system able to reboot again. Signed-off-by: Josef Bacik Signed-off-by: Rafael J. Wysocki --- kernel/reboot.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/kernel/reboot.c b/kernel/reboot.c index eb1b158507616..a6ad5eb2fa733 100644 --- a/kernel/reboot.c +++ b/kernel/reboot.c @@ -244,8 +244,6 @@ void migrate_to_reboot_cpu(void) void kernel_restart(char *cmd) { kernel_restart_prepare(cmd); - if (pm_power_off_prepare) - pm_power_off_prepare(); migrate_to_reboot_cpu(); syscore_shutdown(); if (!cmd) -- GitLab From 83b62687a05205847d627f29126a8fee3c644335 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 18 Mar 2021 11:44:08 -0400 Subject: [PATCH 804/839] workqueue/tracing: Copy workqueue name to buffer in trace event The trace event "workqueue_queue_work" references an unsafe string in dereferencing the name of the workqueue. As the name is allocated, it could later be freed, and the pointer to that string could stay on the tracing buffer. If the trace buffer is read after the string is freed, it will reference an unsafe pointer. I added a new verifier to make sure that all strings referenced in the output of the trace buffer is safe to read and this triggered on the workqueue_queue_work trace event: workqueue_queue_work: work struct=00000000b2b235c7 function=gc_worker workqueue=(0xffff888100051160:events_power_efficient)[UNSAFE-MEMORY] req_cpu=256 cpu=1 workqueue_queue_work: work struct=00000000c344caec function=flush_to_ldisc workqueue=(0xffff888100054d60:events_unbound)[UNSAFE-MEMORY] req_cpu=256 cpu=4294967295 workqueue_queue_work: work struct=00000000b2b235c7 function=gc_worker workqueue=(0xffff888100051160:events_power_efficient)[UNSAFE-MEMORY] req_cpu=256 cpu=1 workqueue_queue_work: work struct=000000000b238b3f function=vmstat_update workqueue=(0xffff8881000c3760:mm_percpu_wq)[UNSAFE-MEMORY] req_cpu=1 cpu=1 Also, if this event is read via a user space application like perf or trace-cmd, the name would only be an address and useless information: workqueue_queue_work: work struct=0xffff953f80b4b918 function=disk_events_workfn workqueue=ffff953f8005d378 req_cpu=8192 cpu=5 Cc: Zqiang Cc: Tejun Heo Fixes: 7bf9c4a88e3e3 ("workqueue: tracing the name of the workqueue instead of it's address") Signed-off-by: Steven Rostedt (VMware) --- include/trace/events/workqueue.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/trace/events/workqueue.h b/include/trace/events/workqueue.h index 970cc2ea28500..6154a2e72bce6 100644 --- a/include/trace/events/workqueue.h +++ b/include/trace/events/workqueue.h @@ -30,7 +30,7 @@ TRACE_EVENT(workqueue_queue_work, TP_STRUCT__entry( __field( void *, work ) __field( void *, function) - __field( const char *, workqueue) + __string( workqueue, pwq->wq->name) __field( unsigned int, req_cpu ) __field( unsigned int, cpu ) ), @@ -38,13 +38,13 @@ TRACE_EVENT(workqueue_queue_work, TP_fast_assign( __entry->work = work; __entry->function = work->func; - __entry->workqueue = pwq->wq->name; + __assign_str(workqueue, pwq->wq->name); __entry->req_cpu = req_cpu; __entry->cpu = pwq->pool->cpu; ), TP_printk("work struct=%p function=%ps workqueue=%s req_cpu=%u cpu=%u", - __entry->work, __entry->function, __entry->workqueue, + __entry->work, __entry->function, __get_str(workqueue), __entry->req_cpu, __entry->cpu) ); -- GitLab From 77a3aa26a00fe55325ae2a51d80a56836d1edce8 Mon Sep 17 00:00:00 2001 From: Emanuele Giuseppe Esposito Date: Thu, 18 Mar 2021 15:56:29 +0100 Subject: [PATCH 805/839] selftests: kvm: add get_msr_index_features Test the KVM_GET_MSR_FEATURE_INDEX_LIST and KVM_GET_MSR_INDEX_LIST ioctls. Signed-off-by: Emanuele Giuseppe Esposito Message-Id: <20210318145629.486450-1-eesposit@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/.gitignore | 1 + tools/testing/selftests/kvm/Makefile | 1 + .../testing/selftests/kvm/include/kvm_util.h | 1 + .../selftests/kvm/lib/kvm_util_internal.h | 2 - .../kvm/x86_64/get_msr_index_features.c | 134 ++++++++++++++++++ 5 files changed, 137 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/kvm/x86_64/get_msr_index_features.c diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 22be05c55f136..5873d54b19a0c 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -8,6 +8,7 @@ /x86_64/debug_regs /x86_64/evmcs_test /x86_64/get_cpuid_test +/x86_64/get_msr_index_features /x86_64/kvm_pv_test /x86_64/hyperv_clock /x86_64/hyperv_cpuid diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index c3672e9087d3f..ce34c8160fa3e 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -39,6 +39,7 @@ LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test +TEST_GEN_PROGS_x86_64 += x86_64/get_msr_index_features TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test TEST_GEN_PROGS_x86_64 += x86_64/get_cpuid_test TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 2d7eb6989e834..81af4bcc9ab93 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -16,6 +16,7 @@ #include "sparsebit.h" +#define KVM_DEV_PATH "/dev/kvm" #define KVM_MAX_VCPUS 512 /* diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h index 34465dc562d8c..91ce1b5d480b2 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util_internal.h +++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h @@ -10,8 +10,6 @@ #include "sparsebit.h" -#define KVM_DEV_PATH "/dev/kvm" - struct userspace_mem_region { struct kvm_userspace_memory_region region; struct sparsebit *unused_phy_pages; diff --git a/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c b/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c new file mode 100644 index 0000000000000..cb953df4d7d0a --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test that KVM_GET_MSR_INDEX_LIST and + * KVM_GET_MSR_FEATURE_INDEX_LIST work as intended + * + * Copyright (C) 2020, Red Hat, Inc. + */ +#include +#include +#include +#include +#include + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" + +static int kvm_num_index_msrs(int kvm_fd, int nmsrs) +{ + struct kvm_msr_list *list; + int r; + + list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0])); + list->nmsrs = nmsrs; + r = ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, list); + TEST_ASSERT(r == -1 && errno == E2BIG, + "Unexpected result from KVM_GET_MSR_INDEX_LIST probe, r: %i", + r); + + r = list->nmsrs; + free(list); + return r; +} + +static void test_get_msr_index(void) +{ + int old_res, res, kvm_fd, r; + struct kvm_msr_list *list; + + kvm_fd = open(KVM_DEV_PATH, O_RDONLY); + if (kvm_fd < 0) + exit(KSFT_SKIP); + + old_res = kvm_num_index_msrs(kvm_fd, 0); + TEST_ASSERT(old_res != 0, "Expecting nmsrs to be > 0"); + + if (old_res != 1) { + res = kvm_num_index_msrs(kvm_fd, 1); + TEST_ASSERT(res > 1, "Expecting nmsrs to be > 1"); + TEST_ASSERT(res == old_res, "Expecting nmsrs to be identical"); + } + + list = malloc(sizeof(*list) + old_res * sizeof(list->indices[0])); + list->nmsrs = old_res; + r = ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, list); + + TEST_ASSERT(r == 0, + "Unexpected result from KVM_GET_MSR_FEATURE_INDEX_LIST, r: %i", + r); + TEST_ASSERT(list->nmsrs == old_res, "Expecting nmsrs to be identical"); + free(list); + + close(kvm_fd); +} + +static int kvm_num_feature_msrs(int kvm_fd, int nmsrs) +{ + struct kvm_msr_list *list; + int r; + + list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0])); + list->nmsrs = nmsrs; + r = ioctl(kvm_fd, KVM_GET_MSR_FEATURE_INDEX_LIST, list); + TEST_ASSERT(r == -1 && errno == E2BIG, + "Unexpected result from KVM_GET_MSR_FEATURE_INDEX_LIST probe, r: %i", + r); + + r = list->nmsrs; + free(list); + return r; +} + +struct kvm_msr_list *kvm_get_msr_feature_list(int kvm_fd, int nmsrs) +{ + struct kvm_msr_list *list; + int r; + + list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0])); + list->nmsrs = nmsrs; + r = ioctl(kvm_fd, KVM_GET_MSR_FEATURE_INDEX_LIST, list); + + TEST_ASSERT(r == 0, + "Unexpected result from KVM_GET_MSR_FEATURE_INDEX_LIST, r: %i", + r); + + return list; +} + +static void test_get_msr_feature(void) +{ + int res, old_res, i, kvm_fd; + struct kvm_msr_list *feature_list; + + kvm_fd = open(KVM_DEV_PATH, O_RDONLY); + if (kvm_fd < 0) + exit(KSFT_SKIP); + + old_res = kvm_num_feature_msrs(kvm_fd, 0); + TEST_ASSERT(old_res != 0, "Expecting nmsrs to be > 0"); + + if (old_res != 1) { + res = kvm_num_feature_msrs(kvm_fd, 1); + TEST_ASSERT(res > 1, "Expecting nmsrs to be > 1"); + TEST_ASSERT(res == old_res, "Expecting nmsrs to be identical"); + } + + feature_list = kvm_get_msr_feature_list(kvm_fd, old_res); + TEST_ASSERT(old_res == feature_list->nmsrs, + "Unmatching number of msr indexes"); + + for (i = 0; i < feature_list->nmsrs; i++) + kvm_get_feature_msr(feature_list->indices[i]); + + free(feature_list); + close(kvm_fd); +} + +int main(int argc, char *argv[]) +{ + if (kvm_check_cap(KVM_CAP_GET_MSR_FEATURES)) + test_get_msr_feature(); + + test_get_msr_index(); +} -- GitLab From e2c12909ae5f5181d9e0b0c536e26c6877daec48 Mon Sep 17 00:00:00 2001 From: Emanuele Giuseppe Esposito Date: Thu, 18 Mar 2021 16:16:23 +0100 Subject: [PATCH 806/839] selftests: kvm: add _vm_ioctl As in kvm_ioctl and _kvm_ioctl, add the respective _vm_ioctl for vm_ioctl. _vm_ioctl invokes an ioctl using the vm fd, leaving the caller to test the result. Signed-off-by: Emanuele Giuseppe Esposito Message-Id: <20210318151624.490861-1-eesposit@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/include/kvm_util.h | 1 + tools/testing/selftests/kvm/lib/kvm_util.c | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 81af4bcc9ab93..0f4258eaa629e 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -134,6 +134,7 @@ void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl, int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl, void *arg); void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg); +int _vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg); void kvm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg); int _kvm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg); void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index e5fbf16f725b5..b8849a1aca792 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -1697,11 +1697,16 @@ void vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg) { int ret; - ret = ioctl(vm->fd, cmd, arg); + ret = _vm_ioctl(vm, cmd, arg); TEST_ASSERT(ret == 0, "vm ioctl %lu failed, rc: %i errno: %i (%s)", cmd, ret, errno, strerror(errno)); } +int _vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg) +{ + return ioctl(vm->fd, cmd, arg); +} + /* * KVM system ioctl * -- GitLab From 3df2252436c08028a549e27ed7f097974e21d17b Mon Sep 17 00:00:00 2001 From: Emanuele Giuseppe Esposito Date: Thu, 18 Mar 2021 16:16:24 +0100 Subject: [PATCH 807/839] selftests: kvm: add set_boot_cpu_id test Test for the KVM_SET_BOOT_CPU_ID ioctl. Check that it correctly allows to change the BSP vcpu. Signed-off-by: Emanuele Giuseppe Esposito Message-Id: <20210318151624.490861-2-eesposit@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/.gitignore | 1 + tools/testing/selftests/kvm/Makefile | 1 + .../selftests/kvm/x86_64/set_boot_cpu_id.c | 166 ++++++++++++++++++ 3 files changed, 168 insertions(+) create mode 100644 tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 5873d54b19a0c..7bd7e776c266a 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -14,6 +14,7 @@ /x86_64/hyperv_cpuid /x86_64/mmio_warning_test /x86_64/platform_info_test +/x86_64/set_boot_cpu_id /x86_64/set_sregs_test /x86_64/smm_test /x86_64/state_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index ce34c8160fa3e..67eebb53235fd 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -47,6 +47,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test +TEST_GEN_PROGS_x86_64 += x86_64/set_boot_cpu_id TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test TEST_GEN_PROGS_x86_64 += x86_64/smm_test TEST_GEN_PROGS_x86_64 += x86_64/state_test diff --git a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c b/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c new file mode 100644 index 0000000000000..12c558fc8074a --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c @@ -0,0 +1,166 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test that KVM_SET_BOOT_CPU_ID works as intended + * + * Copyright (C) 2020, Red Hat, Inc. + */ +#define _GNU_SOURCE /* for program_invocation_name */ +#include +#include +#include +#include +#include + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" + +#define N_VCPU 2 +#define VCPU_ID0 0 +#define VCPU_ID1 1 + +static uint32_t get_bsp_flag(void) +{ + return rdmsr(MSR_IA32_APICBASE) & MSR_IA32_APICBASE_BSP; +} + +static void guest_bsp_vcpu(void *arg) +{ + GUEST_SYNC(1); + + GUEST_ASSERT(get_bsp_flag() != 0); + + GUEST_DONE(); +} + +static void guest_not_bsp_vcpu(void *arg) +{ + GUEST_SYNC(1); + + GUEST_ASSERT(get_bsp_flag() == 0); + + GUEST_DONE(); +} + +static void test_set_boot_busy(struct kvm_vm *vm) +{ + int res; + + res = _vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *) VCPU_ID0); + TEST_ASSERT(res == -1 && errno == EBUSY, + "KVM_SET_BOOT_CPU_ID set while running vm"); +} + +static void run_vcpu(struct kvm_vm *vm, uint32_t vcpuid) +{ + struct ucall uc; + int stage; + + for (stage = 0; stage < 2; stage++) { + + vcpu_run(vm, vcpuid); + + switch (get_ucall(vm, vcpuid, &uc)) { + case UCALL_SYNC: + TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") && + uc.args[1] == stage + 1, + "Stage %d: Unexpected register values vmexit, got %lx", + stage + 1, (ulong)uc.args[1]); + test_set_boot_busy(vm); + break; + case UCALL_DONE: + TEST_ASSERT(stage == 1, + "Expected GUEST_DONE in stage 2, got stage %d", + stage); + break; + case UCALL_ABORT: + TEST_ASSERT(false, "%s at %s:%ld\n\tvalues: %#lx, %#lx", + (const char *)uc.args[0], __FILE__, + uc.args[1], uc.args[2], uc.args[3]); + default: + TEST_ASSERT(false, "Unexpected exit: %s", + exit_reason_str(vcpu_state(vm, vcpuid)->exit_reason)); + } + } +} + +static struct kvm_vm *create_vm(void) +{ + struct kvm_vm *vm; + uint64_t vcpu_pages = (DEFAULT_STACK_PGS) * 2; + uint64_t extra_pg_pages = vcpu_pages / PTES_PER_MIN_PAGE * N_VCPU; + uint64_t pages = DEFAULT_GUEST_PHY_PAGES + vcpu_pages + extra_pg_pages; + + pages = vm_adjust_num_guest_pages(VM_MODE_DEFAULT, pages); + vm = vm_create(VM_MODE_DEFAULT, pages, O_RDWR); + + kvm_vm_elf_load(vm, program_invocation_name, 0, 0); + vm_create_irqchip(vm); + + return vm; +} + +static void add_x86_vcpu(struct kvm_vm *vm, uint32_t vcpuid, bool bsp_code) +{ + if (bsp_code) + vm_vcpu_add_default(vm, vcpuid, guest_bsp_vcpu); + else + vm_vcpu_add_default(vm, vcpuid, guest_not_bsp_vcpu); + + vcpu_set_cpuid(vm, vcpuid, kvm_get_supported_cpuid()); +} + +static void run_vm_bsp(uint32_t bsp_vcpu) +{ + struct kvm_vm *vm; + bool is_bsp_vcpu1 = bsp_vcpu == VCPU_ID1; + + vm = create_vm(); + + if (is_bsp_vcpu1) + vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *) VCPU_ID1); + + add_x86_vcpu(vm, VCPU_ID0, !is_bsp_vcpu1); + add_x86_vcpu(vm, VCPU_ID1, is_bsp_vcpu1); + + run_vcpu(vm, VCPU_ID0); + run_vcpu(vm, VCPU_ID1); + + kvm_vm_free(vm); +} + +static void check_set_bsp_busy(void) +{ + struct kvm_vm *vm; + int res; + + vm = create_vm(); + + add_x86_vcpu(vm, VCPU_ID0, true); + add_x86_vcpu(vm, VCPU_ID1, false); + + res = _vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *) VCPU_ID1); + TEST_ASSERT(res == -1 && errno == EBUSY, "KVM_SET_BOOT_CPU_ID set after adding vcpu"); + + run_vcpu(vm, VCPU_ID0); + run_vcpu(vm, VCPU_ID1); + + res = _vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *) VCPU_ID1); + TEST_ASSERT(res == -1 && errno == EBUSY, "KVM_SET_BOOT_CPU_ID set to a terminated vcpu"); + + kvm_vm_free(vm); +} + +int main(int argc, char *argv[]) +{ + if (!kvm_check_cap(KVM_CAP_SET_BOOT_CPU_ID)) { + print_skip("set_boot_cpu_id not available"); + return 0; + } + + run_vm_bsp(VCPU_ID0); + run_vm_bsp(VCPU_ID1); + run_vm_bsp(VCPU_ID0); + + check_set_bsp_busy(); +} -- GitLab From b318e8decf6b9ef1bcf4ca06fae6d6a2cb5d5c5c Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 16 Mar 2021 11:44:33 -0700 Subject: [PATCH 808/839] KVM: x86: Protect userspace MSR filter with SRCU, and set atomically-ish Fix a plethora of issues with MSR filtering by installing the resulting filter as an atomic bundle instead of updating the live filter one range at a time. The KVM_X86_SET_MSR_FILTER ioctl() isn't truly atomic, as the hardware MSR bitmaps won't be updated until the next VM-Enter, but the relevant software struct is atomically updated, which is what KVM really needs. Similar to the approach used for modifying memslots, make arch.msr_filter a SRCU-protected pointer, do all the work configuring the new filter outside of kvm->lock, and then acquire kvm->lock only when the new filter has been vetted and created. That way vCPU readers either see the old filter or the new filter in their entirety, not some half-baked state. Yuan Yao pointed out a use-after-free in ksm_msr_allowed() due to a TOCTOU bug, but that's just the tip of the iceberg... - Nothing is __rcu annotated, making it nigh impossible to audit the code for correctness. - kvm_add_msr_filter() has an unpaired smp_wmb(). Violation of kernel coding style aside, the lack of a smb_rmb() anywhere casts all code into doubt. - kvm_clear_msr_filter() has a double free TOCTOU bug, as it grabs count before taking the lock. - kvm_clear_msr_filter() also has memory leak due to the same TOCTOU bug. The entire approach of updating the live filter is also flawed. While installing a new filter is inherently racy if vCPUs are running, fixing the above issues also makes it trivial to ensure certain behavior is deterministic, e.g. KVM can provide deterministic behavior for MSRs with identical settings in the old and new filters. An atomic update of the filter also prevents KVM from getting into a half-baked state, e.g. if installing a filter fails, the existing approach would leave the filter in a half-baked state, having already committed whatever bits of the filter were already processed. [*] https://lkml.kernel.org/r/20210312083157.25403-1-yaoyuan0329os@gmail.com Fixes: 1a155254ff93 ("KVM: x86: Introduce MSR filtering") Cc: stable@vger.kernel.org Cc: Alexander Graf Reported-by: Yuan Yao Signed-off-by: Sean Christopherson Message-Id: <20210316184436.2544875-2-seanjc@google.com> Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 6 +- arch/x86/include/asm/kvm_host.h | 17 ++--- arch/x86/kvm/x86.c | 109 +++++++++++++++++++------------- 3 files changed, 78 insertions(+), 54 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 38e327d4b4790..2898d3e86b08b 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -4806,8 +4806,10 @@ If an MSR access is not permitted through the filtering, it generates a allows user space to deflect and potentially handle various MSR accesses into user space. -If a vCPU is in running state while this ioctl is invoked, the vCPU may -experience inconsistent filtering behavior on MSR accesses. +Note, invoking this ioctl with a vCPU is running is inherently racy. However, +KVM does guarantee that vCPUs will see either the previous filter or the new +filter, e.g. MSRs with identical settings in both the old and new filter will +have deterministic behavior. 4.127 KVM_XEN_HVM_SET_ATTR -------------------------- diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index e1b6e2edc8282..3768819693e5c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -948,6 +948,12 @@ enum kvm_irqchip_mode { KVM_IRQCHIP_SPLIT, /* created with KVM_CAP_SPLIT_IRQCHIP */ }; +struct kvm_x86_msr_filter { + u8 count; + bool default_allow:1; + struct msr_bitmap_range ranges[16]; +}; + #define APICV_INHIBIT_REASON_DISABLE 0 #define APICV_INHIBIT_REASON_HYPERV 1 #define APICV_INHIBIT_REASON_NESTED 2 @@ -1042,16 +1048,11 @@ struct kvm_arch { bool guest_can_read_msr_platform_info; bool exception_payload_enabled; + bool bus_lock_detection_enabled; + /* Deflect RDMSR and WRMSR to user space when they trigger a #GP */ u32 user_space_msr_mask; - - struct { - u8 count; - bool default_allow:1; - struct msr_bitmap_range ranges[16]; - } msr_filter; - - bool bus_lock_detection_enabled; + struct kvm_x86_msr_filter __rcu *msr_filter; struct kvm_pmu_event_filter __rcu *pmu_event_filter; struct task_struct *nx_lpage_recovery_thread; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a5c5b38735e19..a04e78b896370 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1526,35 +1526,44 @@ EXPORT_SYMBOL_GPL(kvm_enable_efer_bits); bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type) { + struct kvm_x86_msr_filter *msr_filter; + struct msr_bitmap_range *ranges; struct kvm *kvm = vcpu->kvm; - struct msr_bitmap_range *ranges = kvm->arch.msr_filter.ranges; - u32 count = kvm->arch.msr_filter.count; - u32 i; - bool r = kvm->arch.msr_filter.default_allow; + bool allowed; int idx; + u32 i; - /* MSR filtering not set up or x2APIC enabled, allow everything */ - if (!count || (index >= 0x800 && index <= 0x8ff)) + /* x2APIC MSRs do not support filtering. */ + if (index >= 0x800 && index <= 0x8ff) return true; - /* Prevent collision with set_msr_filter */ idx = srcu_read_lock(&kvm->srcu); - for (i = 0; i < count; i++) { + msr_filter = srcu_dereference(kvm->arch.msr_filter, &kvm->srcu); + if (!msr_filter) { + allowed = true; + goto out; + } + + allowed = msr_filter->default_allow; + ranges = msr_filter->ranges; + + for (i = 0; i < msr_filter->count; i++) { u32 start = ranges[i].base; u32 end = start + ranges[i].nmsrs; u32 flags = ranges[i].flags; unsigned long *bitmap = ranges[i].bitmap; if ((index >= start) && (index < end) && (flags & type)) { - r = !!test_bit(index - start, bitmap); + allowed = !!test_bit(index - start, bitmap); break; } } +out: srcu_read_unlock(&kvm->srcu, idx); - return r; + return allowed; } EXPORT_SYMBOL_GPL(kvm_msr_allowed); @@ -5354,25 +5363,34 @@ split_irqchip_unlock: return r; } -static void kvm_clear_msr_filter(struct kvm *kvm) +static struct kvm_x86_msr_filter *kvm_alloc_msr_filter(bool default_allow) +{ + struct kvm_x86_msr_filter *msr_filter; + + msr_filter = kzalloc(sizeof(*msr_filter), GFP_KERNEL_ACCOUNT); + if (!msr_filter) + return NULL; + + msr_filter->default_allow = default_allow; + return msr_filter; +} + +static void kvm_free_msr_filter(struct kvm_x86_msr_filter *msr_filter) { u32 i; - u32 count = kvm->arch.msr_filter.count; - struct msr_bitmap_range ranges[16]; - mutex_lock(&kvm->lock); - kvm->arch.msr_filter.count = 0; - memcpy(ranges, kvm->arch.msr_filter.ranges, count * sizeof(ranges[0])); - mutex_unlock(&kvm->lock); - synchronize_srcu(&kvm->srcu); + if (!msr_filter) + return; + + for (i = 0; i < msr_filter->count; i++) + kfree(msr_filter->ranges[i].bitmap); - for (i = 0; i < count; i++) - kfree(ranges[i].bitmap); + kfree(msr_filter); } -static int kvm_add_msr_filter(struct kvm *kvm, struct kvm_msr_filter_range *user_range) +static int kvm_add_msr_filter(struct kvm_x86_msr_filter *msr_filter, + struct kvm_msr_filter_range *user_range) { - struct msr_bitmap_range *ranges = kvm->arch.msr_filter.ranges; struct msr_bitmap_range range; unsigned long *bitmap = NULL; size_t bitmap_size; @@ -5406,11 +5424,9 @@ static int kvm_add_msr_filter(struct kvm *kvm, struct kvm_msr_filter_range *user goto err; } - /* Everything ok, add this range identifier to our global pool */ - ranges[kvm->arch.msr_filter.count] = range; - /* Make sure we filled the array before we tell anyone to walk it */ - smp_wmb(); - kvm->arch.msr_filter.count++; + /* Everything ok, add this range identifier. */ + msr_filter->ranges[msr_filter->count] = range; + msr_filter->count++; return 0; err: @@ -5421,10 +5437,11 @@ err: static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp) { struct kvm_msr_filter __user *user_msr_filter = argp; + struct kvm_x86_msr_filter *new_filter, *old_filter; struct kvm_msr_filter filter; bool default_allow; - int r = 0; bool empty = true; + int r = 0; u32 i; if (copy_from_user(&filter, user_msr_filter, sizeof(filter))) @@ -5437,25 +5454,32 @@ static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp) if (empty && !default_allow) return -EINVAL; - kvm_clear_msr_filter(kvm); - - kvm->arch.msr_filter.default_allow = default_allow; + new_filter = kvm_alloc_msr_filter(default_allow); + if (!new_filter) + return -ENOMEM; - /* - * Protect from concurrent calls to this function that could trigger - * a TOCTOU violation on kvm->arch.msr_filter.count. - */ - mutex_lock(&kvm->lock); for (i = 0; i < ARRAY_SIZE(filter.ranges); i++) { - r = kvm_add_msr_filter(kvm, &filter.ranges[i]); - if (r) - break; + r = kvm_add_msr_filter(new_filter, &filter.ranges[i]); + if (r) { + kvm_free_msr_filter(new_filter); + return r; + } } + mutex_lock(&kvm->lock); + + /* The per-VM filter is protected by kvm->lock... */ + old_filter = srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1); + + rcu_assign_pointer(kvm->arch.msr_filter, new_filter); + synchronize_srcu(&kvm->srcu); + + kvm_free_msr_filter(old_filter); + kvm_make_all_cpus_request(kvm, KVM_REQ_MSR_FILTER_CHANGED); mutex_unlock(&kvm->lock); - return r; + return 0; } long kvm_arch_vm_ioctl(struct file *filp, @@ -10636,8 +10660,6 @@ void kvm_arch_pre_destroy_vm(struct kvm *kvm) void kvm_arch_destroy_vm(struct kvm *kvm) { - u32 i; - if (current->mm == kvm->mm) { /* * Free memory regions allocated on behalf of userspace, @@ -10653,8 +10675,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) mutex_unlock(&kvm->slots_lock); } static_call_cond(kvm_x86_vm_destroy)(kvm); - for (i = 0; i < kvm->arch.msr_filter.count; i++) - kfree(kvm->arch.msr_filter.ranges[i].bitmap); + kvm_free_msr_filter(srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1)); kvm_pic_destroy(kvm); kvm_ioapic_destroy(kvm); kvm_free_vcpus(kvm); -- GitLab From c2162e13d6e2f43e5001a356196871642de070ba Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Fri, 12 Mar 2021 10:45:51 +0800 Subject: [PATCH 809/839] KVM: X86: Fix missing local pCPU when executing wbinvd on all dirty pCPUs In order to deal with noncoherent DMA, we should execute wbinvd on all dirty pCPUs when guest wbinvd exits to maintain data consistency. smp_call_function_many() does not execute the provided function on the local core, therefore replace it by on_each_cpu_mask(). Reported-by: Nadav Amit Cc: Nadav Amit Signed-off-by: Wanpeng Li Message-Id: <1615517151-7465-1-git-send-email-wanpengli@tencent.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a04e78b896370..fe806e8942128 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6629,7 +6629,7 @@ static int kvm_emulate_wbinvd_noskip(struct kvm_vcpu *vcpu) int cpu = get_cpu(); cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask); - smp_call_function_many(vcpu->arch.wbinvd_dirty_mask, + on_each_cpu_mask(vcpu->arch.wbinvd_dirty_mask, wbinvd_ipi, NULL, 1); put_cpu(); cpumask_clear(vcpu->arch.wbinvd_dirty_mask); -- GitLab From f4e61f0c9add3b00bd5f2df3c814d688849b8707 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Mon, 15 Mar 2021 14:55:28 +0800 Subject: [PATCH 810/839] x86/kvm: Fix broken irq restoration in kvm_wait After commit 997acaf6b4b59c (lockdep: report broken irq restoration), the guest splatting below during boot: raw_local_irq_restore() called with IRQs enabled WARNING: CPU: 1 PID: 169 at kernel/locking/irqflag-debug.c:10 warn_bogus_irq_restore+0x26/0x30 Modules linked in: hid_generic usbhid hid CPU: 1 PID: 169 Comm: systemd-udevd Not tainted 5.11.0+ #25 RIP: 0010:warn_bogus_irq_restore+0x26/0x30 Call Trace: kvm_wait+0x76/0x90 __pv_queued_spin_lock_slowpath+0x285/0x2e0 do_raw_spin_lock+0xc9/0xd0 _raw_spin_lock+0x59/0x70 lockref_get_not_dead+0xf/0x50 __legitimize_path+0x31/0x60 legitimize_root+0x37/0x50 try_to_unlazy_next+0x7f/0x1d0 lookup_fast+0xb0/0x170 path_openat+0x165/0x9b0 do_filp_open+0x99/0x110 do_sys_openat2+0x1f1/0x2e0 do_sys_open+0x5c/0x80 __x64_sys_open+0x21/0x30 do_syscall_64+0x32/0x50 entry_SYSCALL_64_after_hwframe+0x44/0xae The new consistency checking, expects local_irq_save() and local_irq_restore() to be paired and sanely nested, and therefore expects local_irq_restore() to be called with irqs disabled. The irqflags handling in kvm_wait() which ends up doing: local_irq_save(flags); safe_halt(); local_irq_restore(flags); instead triggers it. This patch fixes it by using local_irq_disable()/enable() directly. Cc: Thomas Gleixner Reported-by: Dmitry Vyukov Signed-off-by: Wanpeng Li Message-Id: <1615791328-2735-1-git-send-email-wanpengli@tencent.com> Signed-off-by: Paolo Bonzini --- arch/x86/kernel/kvm.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 5e78e01ca3b46..78bb0fae39826 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -836,28 +836,25 @@ static void kvm_kick_cpu(int cpu) static void kvm_wait(u8 *ptr, u8 val) { - unsigned long flags; - if (in_nmi()) return; - local_irq_save(flags); - - if (READ_ONCE(*ptr) != val) - goto out; - /* * halt until it's our turn and kicked. Note that we do safe halt * for irq enabled case to avoid hang when lock info is overwritten * in irq spinlock slowpath and no spurious interrupt occur to save us. */ - if (arch_irqs_disabled_flags(flags)) - halt(); - else - safe_halt(); + if (irqs_disabled()) { + if (READ_ONCE(*ptr) == val) + halt(); + } else { + local_irq_disable(); -out: - local_irq_restore(flags); + if (READ_ONCE(*ptr) == val) + safe_halt(); + + local_irq_enable(); + } } #ifdef CONFIG_X86_32 -- GitLab From e94c55b8e0a0bbe9a026250cf31e2fa45957d776 Mon Sep 17 00:00:00 2001 From: Tobias Klausmann Date: Sat, 13 Mar 2021 23:21:59 +0100 Subject: [PATCH 811/839] nouveau: Skip unvailable ttm page entries Starting with commit f295c8cfec833c2707ff1512da10d65386dde7af ("drm/nouveau: fix dma syncing warning with debugging on.") the following oops occures: BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP PTI CPU: 6 PID: 1013 Comm: Xorg.bin Tainted: G E 5.11.0-desktop-rc0+ #2 Hardware name: Acer Aspire VN7-593G/Pluto_KLS, BIOS V1.11 08/01/2018 RIP: 0010:nouveau_bo_sync_for_device+0x40/0xb0 [nouveau] Call Trace: nouveau_bo_validate+0x5d/0x80 [nouveau] nouveau_gem_ioctl_pushbuf+0x662/0x1120 [nouveau] ? nouveau_gem_ioctl_new+0xf0/0xf0 [nouveau] drm_ioctl_kernel+0xa6/0xf0 [drm] drm_ioctl+0x1f4/0x3a0 [drm] ? nouveau_gem_ioctl_new+0xf0/0xf0 [nouveau] nouveau_drm_ioctl+0x50/0xa0 [nouveau] __x64_sys_ioctl+0x7e/0xb0 do_syscall_64+0x33/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae ---[ end trace ccfb1e7f4064374f ]--- RIP: 0010:nouveau_bo_sync_for_device+0x40/0xb0 [nouveau] The underlying problem is not introduced by the commit, yet it uncovered the underlying issue. The cited commit relies on valid pages. This is not given for due to some bugs. For now, just warn and work around the issue by just ignoring the bad ttm objects. Below is some debug info gathered while debugging this issue: nouveau 0000:01:00.0: DRM: ttm_dma->num_pages: 2048 nouveau 0000:01:00.0: DRM: ttm_dma->pages is NULL nouveau 0000:01:00.0: DRM: ttm_dma: 00000000e96058e7 nouveau 0000:01:00.0: DRM: ttm_dma->page_flags: nouveau 0000:01:00.0: DRM: ttm_dma: Populated: 1 nouveau 0000:01:00.0: DRM: ttm_dma: No Retry: 0 nouveau 0000:01:00.0: DRM: ttm_dma: SG: 256 nouveau 0000:01:00.0: DRM: ttm_dma: Zero Alloc: 0 nouveau 0000:01:00.0: DRM: ttm_dma: Swapped: 0 Signed-off-by: Tobias Klausmann Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20210313222159.3346-1-tobias.klausmann@freenet.de --- drivers/gpu/drm/nouveau/nouveau_bo.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index fabb314a0b2f8..f2720a0061993 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -551,6 +551,10 @@ nouveau_bo_sync_for_device(struct nouveau_bo *nvbo) if (!ttm_dma) return; + if (!ttm_dma->pages) { + NV_DEBUG(drm, "ttm_dma 0x%p: pages NULL\n", ttm_dma); + return; + } /* Don't waste time looping if the object is coherent */ if (nvbo->force_coherent) @@ -583,6 +587,10 @@ nouveau_bo_sync_for_cpu(struct nouveau_bo *nvbo) if (!ttm_dma) return; + if (!ttm_dma->pages) { + NV_DEBUG(drm, "ttm_dma 0x%p: pages NULL\n", ttm_dma); + return; + } /* Don't waste time looping if the object is coherent */ if (nvbo->force_coherent) -- GitLab From 403dba003d17b3f0c1627b355cec2d74041cf648 Mon Sep 17 00:00:00 2001 From: Liu xuzhi Date: Thu, 18 Mar 2021 17:46:57 -0700 Subject: [PATCH 812/839] fs/cifs/: fix misspellings using codespell tool A typo is found out by codespell tool in 251th lines of cifs_swn.c: $ codespell ./fs/cifs/ ./cifs_swn.c:251: funciton ==> function Fix a typo found by codespell. Signed-off-by: Liu xuzhi Signed-off-by: Steve French --- fs/cifs/cifs_swn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/cifs_swn.c b/fs/cifs/cifs_swn.c index f2d730fffccb3..d829b8bf833e3 100644 --- a/fs/cifs/cifs_swn.c +++ b/fs/cifs/cifs_swn.c @@ -248,7 +248,7 @@ nlmsg_fail: /* * Try to find a matching registration for the tcon's server name and share name. - * Calls to this funciton must be protected by cifs_swnreg_idr_mutex. + * Calls to this function must be protected by cifs_swnreg_idr_mutex. * TODO Try to avoid memory allocations */ static struct cifs_swn_reg *cifs_find_swn_reg(struct cifs_tcon *tcon) -- GitLab From af3ef3b1031634724a3763606695ebcd113d782b Mon Sep 17 00:00:00 2001 From: Aurelien Aptel Date: Thu, 18 Mar 2021 19:17:10 +0100 Subject: [PATCH 813/839] cifs: warn and fail if trying to use rootfs without the config option If CONFIG_CIFS_ROOT is not set, rootfs mount option is invalid Signed-off-by: Aurelien Aptel CC: # v5.11 Signed-off-by: Steve French --- fs/cifs/fs_context.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c index 892f51a21278b..78889024a7ed0 100644 --- a/fs/cifs/fs_context.c +++ b/fs/cifs/fs_context.c @@ -1196,9 +1196,11 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, pr_warn_once("Witness protocol support is experimental\n"); break; case Opt_rootfs: -#ifdef CONFIG_CIFS_ROOT - ctx->rootfs = true; +#ifndef CONFIG_CIFS_ROOT + cifs_dbg(VFS, "rootfs support requires CONFIG_CIFS_ROOT config option\n"); + goto cifs_parse_mount_err; #endif + ctx->rootfs = true; break; case Opt_posixpaths: if (result.negated) -- GitLab From 9ceee7d0841a8f7d7644021ba7d4cc1fbc7966e3 Mon Sep 17 00:00:00 2001 From: Lv Yunlong Date: Wed, 10 Mar 2021 00:31:27 -0800 Subject: [PATCH 814/839] firmware/efi: Fix a use after bug in efi_mem_reserve_persistent In the for loop in efi_mem_reserve_persistent(), prsv = rsv->next use the unmapped rsv. Use the unmapped pages will cause segment fault. Fixes: 18df7577adae6 ("efi/memreserve: deal with memreserve entries in unmapped memory") Signed-off-by: Lv Yunlong Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/efi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index df3f9bcab581c..4b7ee3fa9224f 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -927,7 +927,7 @@ int __ref efi_mem_reserve_persistent(phys_addr_t addr, u64 size) } /* first try to find a slot in an existing linked list entry */ - for (prsv = efi_memreserve_root->next; prsv; prsv = rsv->next) { + for (prsv = efi_memreserve_root->next; prsv; ) { rsv = memremap(prsv, sizeof(*rsv), MEMREMAP_WB); index = atomic_fetch_add_unless(&rsv->count, 1, rsv->size); if (index < rsv->size) { @@ -937,6 +937,7 @@ int __ref efi_mem_reserve_persistent(phys_addr_t addr, u64 size) memunmap(rsv); return efi_mem_reserve_iomem(addr, size); } + prsv = rsv->next; memunmap(rsv); } -- GitLab From fb98cc0b3af2ba4d87301dff2b381b12eee35d7d Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 10 Mar 2021 08:33:19 +0100 Subject: [PATCH 815/839] efi: use 32-bit alignment for efi_guid_t literals Commit 494c704f9af0 ("efi: Use 32-bit alignment for efi_guid_t") updated the type definition of efi_guid_t to ensure that it always appears sufficiently aligned (the UEFI spec is ambiguous about this, but given the fact that its EFI_GUID type is defined in terms of a struct carrying a uint32_t, the natural alignment is definitely >= 32 bits). However, we missed the EFI_GUID() macro which is used to instantiate efi_guid_t literals: that macro is still based on the guid_t type, which does not have a minimum alignment at all. This results in warnings such as In file included from drivers/firmware/efi/mokvar-table.c:35: include/linux/efi.h:1093:34: warning: passing 1-byte aligned argument to 4-byte aligned parameter 2 of 'get_var' may result in an unaligned pointer access [-Walign-mismatch] status = get_var(L"SecureBoot", &EFI_GLOBAL_VARIABLE_GUID, NULL, &size, ^ include/linux/efi.h:1101:24: warning: passing 1-byte aligned argument to 4-byte aligned parameter 2 of 'get_var' may result in an unaligned pointer access [-Walign-mismatch] get_var(L"SetupMode", &EFI_GLOBAL_VARIABLE_GUID, NULL, &size, &setupmode); The distinction only matters on CPUs that do not support misaligned loads fully, but 32-bit ARM's load-multiple instructions fall into that category, and these are likely to be emitted by the compiler that built the firmware for loading word-aligned 128-bit GUIDs from memory So re-implement the initializer in terms of our own efi_guid_t type, so that the alignment becomes a property of the literal's type. Fixes: 494c704f9af0 ("efi: Use 32-bit alignment for efi_guid_t") Reported-by: Nathan Chancellor Reviewed-by: Nick Desaulniers Reviewed-by: Nathan Chancellor Tested-by: Nathan Chancellor Link: https://github.com/ClangBuiltLinux/linux/issues/1327 Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/include/linux/efi.h b/include/linux/efi.h index 8710f5710c1d1..6b5d36babfcc4 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -72,8 +72,10 @@ typedef void *efi_handle_t; */ typedef guid_t efi_guid_t __aligned(__alignof__(u32)); -#define EFI_GUID(a,b,c,d0,d1,d2,d3,d4,d5,d6,d7) \ - GUID_INIT(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) +#define EFI_GUID(a, b, c, d...) (efi_guid_t){ { \ + (a) & 0xff, ((a) >> 8) & 0xff, ((a) >> 16) & 0xff, ((a) >> 24) & 0xff, \ + (b) & 0xff, ((b) >> 8) & 0xff, \ + (c) & 0xff, ((c) >> 8) & 0xff, d } } /* * Generic EFI table header -- GitLab From 9ce3746d64132a561bceab6421715e7c04e85074 Mon Sep 17 00:00:00 2001 From: Emanuele Giuseppe Esposito Date: Fri, 19 Mar 2021 10:16:50 +0100 Subject: [PATCH 816/839] documentation/kvm: additional explanations on KVM_SET_BOOT_CPU_ID The ioctl KVM_SET_BOOT_CPU_ID fails when called after vcpu creation. Add this explanation in the documentation. Signed-off-by: Emanuele Giuseppe Esposito Message-Id: <20210319091650.11967-1-eesposit@redhat.com> Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 2898d3e86b08b..307f2fcf1b021 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -1495,7 +1495,8 @@ Fails if any VCPU has already been created. Define which vcpu is the Bootstrap Processor (BSP). Values are the same as the vcpu id in KVM_CREATE_VCPU. If this ioctl is not called, the default -is vcpu 0. +is vcpu 0. This ioctl has to be called before vcpu creation, +otherwise it will return EBUSY error. 4.42 KVM_GET_XSAVE -- GitLab From a501b048a95b79e1e34f03cac3c87ff1e9f229ad Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 18 Mar 2021 20:26:47 +0100 Subject: [PATCH 817/839] x86/ioapic: Ignore IRQ2 again Vitaly ran into an issue with hotplugging CPU0 on an Amazon instance where the matrix allocator claimed to be out of vectors. He analyzed it down to the point that IRQ2, the PIC cascade interrupt, which is supposed to be not ever routed to the IO/APIC ended up having an interrupt vector assigned which got moved during unplug of CPU0. The underlying issue is that IRQ2 for various reasons (see commit af174783b925 ("x86: I/O APIC: Never configure IRQ2" for details) is treated as a reserved system vector by the vector core code and is not accounted as a regular vector. The Amazon BIOS has an routing entry of pin2 to IRQ2 which causes the IO/APIC setup to claim that interrupt which is granted by the vector domain because there is no sanity check. As a consequence the allocation counter of CPU0 underflows which causes a subsequent unplug to fail with: [ ... ] CPU 0 has 4294967295 vectors, 589 available. Cannot disable CPU There is another sanity check missing in the matrix allocator, but the underlying root cause is that the IO/APIC code lost the IRQ2 ignore logic during the conversion to irqdomains. For almost 6 years nobody complained about this wreckage, which might indicate that this requirement could be lifted, but for any system which actually has a PIC IRQ2 is unusable by design so any routing entry has no effect and the interrupt cannot be connected to a device anyway. Due to that and due to history biased paranoia reasons restore the IRQ2 ignore logic and treat it as non existent despite a routing entry claiming otherwise. Fixes: d32932d02e18 ("x86/irq: Convert IOAPIC to use hierarchical irqdomain interfaces") Reported-by: Vitaly Kuznetsov Signed-off-by: Thomas Gleixner Tested-by: Vitaly Kuznetsov Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20210318192819.636943062@linutronix.de --- arch/x86/kernel/apic/io_apic.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index c3b60c37c7280..73ff4dd426a83 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1032,6 +1032,16 @@ static int mp_map_pin_to_irq(u32 gsi, int idx, int ioapic, int pin, if (idx >= 0 && test_bit(mp_irqs[idx].srcbus, mp_bus_not_pci)) { irq = mp_irqs[idx].srcbusirq; legacy = mp_is_legacy_irq(irq); + /* + * IRQ2 is unusable for historical reasons on systems which + * have a legacy PIC. See the comment vs. IRQ2 further down. + * + * If this gets removed at some point then the related code + * in lapic_assign_system_vectors() needs to be adjusted as + * well. + */ + if (legacy && irq == PIC_CASCADE_IR) + return -EINVAL; } mutex_lock(&ioapic_mutex); -- GitLab From 68b1eddd421d2b16c6655eceb48918a1e896bbbc Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 18 Mar 2021 11:27:19 +0100 Subject: [PATCH 818/839] static_call: Fix static_call_set_init() It turns out that static_call_set_init() does not preserve the other flags; IOW. it clears TAIL if it was set. Fixes: 9183c3f9ed710 ("static_call: Add inline static call infrastructure") Reported-by: Sumit Garg Signed-off-by: Peter Zijlstra (Intel) Acked-by: Jarkko Sakkinen Tested-by: Sumit Garg Link: https://lkml.kernel.org/r/20210318113610.519406371@infradead.org --- kernel/static_call.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/kernel/static_call.c b/kernel/static_call.c index ae825295cf685..080c8a9ddfafb 100644 --- a/kernel/static_call.c +++ b/kernel/static_call.c @@ -35,27 +35,30 @@ static inline void *static_call_addr(struct static_call_site *site) return (void *)((long)site->addr + (long)&site->addr); } +static inline unsigned long __static_call_key(const struct static_call_site *site) +{ + return (long)site->key + (long)&site->key; +} static inline struct static_call_key *static_call_key(const struct static_call_site *site) { - return (struct static_call_key *) - (((long)site->key + (long)&site->key) & ~STATIC_CALL_SITE_FLAGS); + return (void *)(__static_call_key(site) & ~STATIC_CALL_SITE_FLAGS); } /* These assume the key is word-aligned. */ static inline bool static_call_is_init(struct static_call_site *site) { - return ((long)site->key + (long)&site->key) & STATIC_CALL_SITE_INIT; + return __static_call_key(site) & STATIC_CALL_SITE_INIT; } static inline bool static_call_is_tail(struct static_call_site *site) { - return ((long)site->key + (long)&site->key) & STATIC_CALL_SITE_TAIL; + return __static_call_key(site) & STATIC_CALL_SITE_TAIL; } static inline void static_call_set_init(struct static_call_site *site) { - site->key = ((long)static_call_key(site) | STATIC_CALL_SITE_INIT) - + site->key = (__static_call_key(site) | STATIC_CALL_SITE_INIT) - (long)&site->key; } @@ -190,7 +193,7 @@ void __static_call_update(struct static_call_key *key, void *tramp, void *func) } arch_static_call_transform(site_addr, NULL, func, - static_call_is_tail(site)); + static_call_is_tail(site)); } } @@ -349,7 +352,7 @@ static int static_call_add_module(struct module *mod) struct static_call_site *site; for (site = start; site != stop; site++) { - unsigned long s_key = (long)site->key + (long)&site->key; + unsigned long s_key = __static_call_key(site); unsigned long addr = s_key & ~STATIC_CALL_SITE_FLAGS; unsigned long key; -- GitLab From 698bacefe993ad2922c9d3b1380591ad489355e9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 18 Mar 2021 11:29:56 +0100 Subject: [PATCH 819/839] static_call: Align static_call_is_init() patching condition The intent is to avoid writing init code after init (because the text might have been freed). The code is needlessly different between jump_label and static_call and not obviously correct. The existing code relies on the fact that the module loader clears the init layout, such that within_module_init() always fails, while jump_label relies on the module state which is more obvious and matches the kernel logic. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Jarkko Sakkinen Tested-by: Sumit Garg Link: https://lkml.kernel.org/r/20210318113610.636651340@infradead.org --- kernel/static_call.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/kernel/static_call.c b/kernel/static_call.c index 080c8a9ddfafb..fc2259047be21 100644 --- a/kernel/static_call.c +++ b/kernel/static_call.c @@ -149,6 +149,7 @@ void __static_call_update(struct static_call_key *key, void *tramp, void *func) }; for (site_mod = &first; site_mod; site_mod = site_mod->next) { + bool init = system_state < SYSTEM_RUNNING; struct module *mod = site_mod->mod; if (!site_mod->sites) { @@ -168,6 +169,7 @@ void __static_call_update(struct static_call_key *key, void *tramp, void *func) if (mod) { stop = mod->static_call_sites + mod->num_static_call_sites; + init = mod->state == MODULE_STATE_COMING; } #endif @@ -175,16 +177,8 @@ void __static_call_update(struct static_call_key *key, void *tramp, void *func) site < stop && static_call_key(site) == key; site++) { void *site_addr = static_call_addr(site); - if (static_call_is_init(site)) { - /* - * Don't write to call sites which were in - * initmem and have since been freed. - */ - if (!mod && system_state >= SYSTEM_RUNNING) - continue; - if (mod && !within_module_init((unsigned long)site_addr, mod)) - continue; - } + if (!init && static_call_is_init(site)) + continue; if (!kernel_text_address((unsigned long)site_addr)) { WARN_ONCE(1, "can't patch static call site at %pS", -- GitLab From 38c93587375053c5b9ef093f4a5ea754538cba32 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 18 Mar 2021 11:31:51 +0100 Subject: [PATCH 820/839] static_call: Fix static_call_update() sanity check Sites that match init_section_contains() get marked as INIT. For built-in code init_sections contains both __init and __exit text. OTOH kernel_text_address() only explicitly includes __init text (and there are no __exit text markers). Match what jump_label already does and ignore the warning for INIT sites. Also see the excellent changelog for commit: 8f35eaa5f2de ("jump_label: Don't warn on __exit jump entries") Fixes: 9183c3f9ed710 ("static_call: Add inline static call infrastructure") Reported-by: Sumit Garg Signed-off-by: Peter Zijlstra (Intel) Acked-by: Jarkko Sakkinen Tested-by: Sumit Garg Link: https://lkml.kernel.org/r/20210318113610.739542434@infradead.org --- kernel/jump_label.c | 8 ++++++++ kernel/static_call.c | 11 ++++++++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/kernel/jump_label.c b/kernel/jump_label.c index c6a39d662935e..ba39fbb1f8e73 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -407,6 +407,14 @@ static bool jump_label_can_update(struct jump_entry *entry, bool init) return false; if (!kernel_text_address(jump_entry_code(entry))) { + /* + * This skips patching built-in __exit, which + * is part of init_section_contains() but is + * not part of kernel_text_address(). + * + * Skipping built-in __exit is fine since it + * will never be executed. + */ WARN_ONCE(!jump_entry_is_init(entry), "can't patch jump_label at %pS", (void *)jump_entry_code(entry)); diff --git a/kernel/static_call.c b/kernel/static_call.c index fc2259047be21..2c5950b0b90ef 100644 --- a/kernel/static_call.c +++ b/kernel/static_call.c @@ -181,7 +181,16 @@ void __static_call_update(struct static_call_key *key, void *tramp, void *func) continue; if (!kernel_text_address((unsigned long)site_addr)) { - WARN_ONCE(1, "can't patch static call site at %pS", + /* + * This skips patching built-in __exit, which + * is part of init_section_contains() but is + * not part of kernel_text_address(). + * + * Skipping built-in __exit is fine since it + * will never be executed. + */ + WARN_ONCE(!static_call_is_init(site), + "can't patch static call site at %pS", site_addr); continue; } -- GitLab From 0cab893f409c53634d0d818fa414641cbcdb0dab Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 19 Mar 2021 15:47:25 +0100 Subject: [PATCH 821/839] Revert "PM: runtime: Update device status before letting suppliers suspend" Revert commit 44cc89f76464 ("PM: runtime: Update device status before letting suppliers suspend") that introduced a race condition into __rpm_callback() which allowed a concurrent rpm_resume() to run and resume the device prematurely after its status had been changed to RPM_SUSPENDED by __rpm_callback(). Fixes: 44cc89f76464 ("PM: runtime: Update device status before letting suppliers suspend") Link: https://lore.kernel.org/linux-pm/24dfb6fc-5d54-6ee2-9195-26428b7ecf8a@intel.com/ Reported-by: Adrian Hunter Cc: 4.10+ # 4.10+ Signed-off-by: Rafael J. Wysocki Reviewed-by: Ulf Hansson --- drivers/base/power/runtime.c | 62 +++++++++++++++--------------------- 1 file changed, 25 insertions(+), 37 deletions(-) diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 18b82427d0cb5..a46a7e30881b1 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -325,22 +325,22 @@ static void rpm_put_suppliers(struct device *dev) static int __rpm_callback(int (*cb)(struct device *), struct device *dev) __releases(&dev->power.lock) __acquires(&dev->power.lock) { - bool use_links = dev->power.links_count > 0; - bool get = false; int retval, idx; - bool put; + bool use_links = dev->power.links_count > 0; if (dev->power.irq_safe) { spin_unlock(&dev->power.lock); - } else if (!use_links) { - spin_unlock_irq(&dev->power.lock); } else { - get = dev->power.runtime_status == RPM_RESUMING; - spin_unlock_irq(&dev->power.lock); - /* Resume suppliers if necessary. */ - if (get) { + /* + * Resume suppliers if necessary. + * + * The device's runtime PM status cannot change until this + * routine returns, so it is safe to read the status outside of + * the lock. + */ + if (use_links && dev->power.runtime_status == RPM_RESUMING) { idx = device_links_read_lock(); retval = rpm_get_suppliers(dev); @@ -355,36 +355,24 @@ static int __rpm_callback(int (*cb)(struct device *), struct device *dev) if (dev->power.irq_safe) { spin_lock(&dev->power.lock); - return retval; - } - - spin_lock_irq(&dev->power.lock); - - if (!use_links) - return retval; - - /* - * If the device is suspending and the callback has returned success, - * drop the usage counters of the suppliers that have been reference - * counted on its resume. - * - * Do that if the resume fails too. - */ - put = dev->power.runtime_status == RPM_SUSPENDING && !retval; - if (put) - __update_runtime_status(dev, RPM_SUSPENDED); - else - put = get && retval; - - if (put) { - spin_unlock_irq(&dev->power.lock); - - idx = device_links_read_lock(); + } else { + /* + * If the device is suspending and the callback has returned + * success, drop the usage counters of the suppliers that have + * been reference counted on its resume. + * + * Do that if resume fails too. + */ + if (use_links + && ((dev->power.runtime_status == RPM_SUSPENDING && !retval) + || (dev->power.runtime_status == RPM_RESUMING && retval))) { + idx = device_links_read_lock(); -fail: - rpm_put_suppliers(dev); + fail: + rpm_put_suppliers(dev); - device_links_read_unlock(idx); + device_links_read_unlock(idx); + } spin_lock_irq(&dev->power.lock); } -- GitLab From 65af8f0166f4d15e61c63db498ec7981acdd897f Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 19 Mar 2021 00:05:48 -0500 Subject: [PATCH 822/839] cifs: fix allocation size on newly created files Applications that create and extend and write to a file do not expect to see 0 allocation size. When file is extended, set its allocation size to a plausible value until we have a chance to query the server for it. When the file is cached this will prevent showing an impossible number of allocated blocks (like 0). This fixes e.g. xfstests 614 which does 1) create a file and set its size to 64K 2) mmap write 64K to the file 3) stat -c %b for the file (to query the number of allocated blocks) It was failing because we returned 0 blocks. Even though we would return the correct cached file size, we returned an impossible allocation size. Signed-off-by: Steve French CC: Reviewed-by: Aurelien Aptel --- fs/cifs/inode.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 7c61bc9573c02..f2df4422e54ae 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -2395,7 +2395,7 @@ int cifs_getattr(struct user_namespace *mnt_userns, const struct path *path, * We need to be sure that all dirty pages are written and the server * has actual ctime, mtime and file length. */ - if ((request_mask & (STATX_CTIME | STATX_MTIME | STATX_SIZE)) && + if ((request_mask & (STATX_CTIME | STATX_MTIME | STATX_SIZE | STATX_BLOCKS)) && !CIFS_CACHE_READ(CIFS_I(inode)) && inode->i_mapping && inode->i_mapping->nrpages != 0) { rc = filemap_fdatawait(inode->i_mapping); @@ -2585,6 +2585,14 @@ set_size_out: if (rc == 0) { cifsInode->server_eof = attrs->ia_size; cifs_setsize(inode, attrs->ia_size); + /* + * i_blocks is not related to (i_size / i_blksize), but instead + * 512 byte (2**9) size is required for calculating num blocks. + * Until we can query the server for actual allocation size, + * this is best estimate we have for blocks allocated for a file + * Number of blocks must be rounded up so size 1 is not 0 blocks + */ + inode->i_blocks = (512 - 1 + attrs->ia_size) >> 9; /* * The man page of truncate says if the size changed, -- GitLab From dd926880da8dbbe409e709c1d3c1620729a94732 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 12 Mar 2021 10:20:33 +0100 Subject: [PATCH 823/839] x86/apic/of: Fix CPU devicetree-node lookups Architectures that describe the CPU topology in devicetree and do not have an identity mapping between physical and logical CPU ids must override the default implementation of arch_match_cpu_phys_id(). Failing to do so breaks CPU devicetree-node lookups using of_get_cpu_node() and of_cpu_device_node_get() which several drivers rely on. It also causes the CPU struct devices exported through sysfs to point to the wrong devicetree nodes. On x86, CPUs are described in devicetree using their APIC ids and those do not generally coincide with the logical ids, even if CPU0 typically uses APIC id 0. Add the missing implementation of arch_match_cpu_phys_id() so that CPU-node lookups work also with SMP. Apart from fixing the broken sysfs devicetree-node links this likely does not affect current users of mainline kernels on x86. Fixes: 4e07db9c8db8 ("x86/devicetree: Use CPU description from Device Tree") Signed-off-by: Johan Hovold Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20210312092033.26317-1-johan@kernel.org --- arch/x86/kernel/apic/apic.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index bda4f2a36868d..4f26700f314d9 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2342,6 +2342,11 @@ static int cpuid_to_apicid[] = { [0 ... NR_CPUS - 1] = -1, }; +bool arch_match_cpu_phys_id(int cpu, u64 phys_id) +{ + return phys_id == cpuid_to_apicid[cpu]; +} + #ifdef CONFIG_SMP /** * apic_id_is_primary_thread - Check whether APIC ID belongs to a primary thread -- GitLab From 81e2073c175b887398e5bca6c004efa89983f58d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 17 Mar 2021 15:38:52 +0100 Subject: [PATCH 824/839] genirq: Disable interrupts for force threaded handlers With interrupt force threading all device interrupt handlers are invoked from kernel threads. Contrary to hard interrupt context the invocation only disables bottom halfs, but not interrupts. This was an oversight back then because any code like this will have an issue: thread(irq_A) irq_handler(A) spin_lock(&foo->lock); interrupt(irq_B) irq_handler(B) spin_lock(&foo->lock); This has been triggered with networking (NAPI vs. hrtimers) and console drivers where printk() happens from an interrupt which interrupted the force threaded handler. Now people noticed and started to change the spin_lock() in the handler to spin_lock_irqsave() which affects performance or add IRQF_NOTHREAD to the interrupt request which in turn breaks RT. Fix the root cause and not the symptom and disable interrupts before invoking the force threaded handler which preserves the regular semantics and the usefulness of the interrupt force threading as a general debugging tool. For not RT this is not changing much, except that during the execution of the threaded handler interrupts are delayed until the handler returns. Vs. scheduling and softirq processing there is no difference. For RT kernels there is no issue. Fixes: 8d32a307e4fa ("genirq: Provide forced interrupt threading") Reported-by: Johan Hovold Signed-off-by: Thomas Gleixner Reviewed-by: Johan Hovold Acked-by: Sebastian Andrzej Siewior Link: https://lore.kernel.org/r/20210317143859.513307808@linutronix.de --- kernel/irq/manage.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index dec3f73e8db92..21ea370fccda7 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1142,11 +1142,15 @@ irq_forced_thread_fn(struct irq_desc *desc, struct irqaction *action) irqreturn_t ret; local_bh_disable(); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_irq_disable(); ret = action->thread_fn(action->irq, action->dev_id); if (ret == IRQ_HANDLED) atomic_inc(&desc->threads_handled); irq_finalize_oneshot(desc, action); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_irq_enable(); local_bh_enable(); return ret; } -- GitLab From b7ff91fd030dc9d72ed91b1aab36e445a003af4f Mon Sep 17 00:00:00 2001 From: "zhangyi (F)" Date: Wed, 3 Mar 2021 21:17:02 +0800 Subject: [PATCH 825/839] ext4: find old entry again if failed to rename whiteout If we failed to add new entry on rename whiteout, we cannot reset the old->de entry directly, because the old->de could have moved from under us during make indexed dir. So find the old entry again before reset is needed, otherwise it may corrupt the filesystem as below. /dev/sda: Entry '00000001' in ??? (12) has deleted/unused inode 15. CLEARED. /dev/sda: Unattached inode 75 /dev/sda: UNEXPECTED INCONSISTENCY; RUN fsck MANUALLY. Fixes: 6b4b8e6b4ad ("ext4: fix bug for rename with RENAME_WHITEOUT") Cc: stable@vger.kernel.org Signed-off-by: zhangyi (F) Link: https://lore.kernel.org/r/20210303131703.330415-1-yi.zhang@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/namei.c | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 686bf982c84e9..4aae59d552887 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -3613,6 +3613,31 @@ static int ext4_setent(handle_t *handle, struct ext4_renament *ent, return retval; } +static void ext4_resetent(handle_t *handle, struct ext4_renament *ent, + unsigned ino, unsigned file_type) +{ + struct ext4_renament old = *ent; + int retval = 0; + + /* + * old->de could have moved from under us during make indexed dir, + * so the old->de may no longer valid and need to find it again + * before reset old inode info. + */ + old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL); + if (IS_ERR(old.bh)) + retval = PTR_ERR(old.bh); + if (!old.bh) + retval = -ENOENT; + if (retval) { + ext4_std_error(old.dir->i_sb, retval); + return; + } + + ext4_setent(handle, &old, ino, file_type); + brelse(old.bh); +} + static int ext4_find_delete_entry(handle_t *handle, struct inode *dir, const struct qstr *d_name) { @@ -3937,8 +3962,8 @@ static int ext4_rename(struct user_namespace *mnt_userns, struct inode *old_dir, end_rename: if (whiteout) { if (retval) { - ext4_setent(handle, &old, - old.inode->i_ino, old_file_type); + ext4_resetent(handle, &old, + old.inode->i_ino, old_file_type); drop_nlink(whiteout); } unlock_new_inode(whiteout); -- GitLab From 5dccdc5a1916d4266edd251f20bbbb113a5c495f Mon Sep 17 00:00:00 2001 From: "zhangyi (F)" Date: Wed, 3 Mar 2021 21:17:03 +0800 Subject: [PATCH 826/839] ext4: do not iput inode under running transaction in ext4_rename() In ext4_rename(), when RENAME_WHITEOUT failed to add new entry into directory, it ends up dropping new created whiteout inode under the running transaction. After commit <9b88f9fb0d2> ("ext4: Do not iput inode under running transaction"), we follow the assumptions that evict() does not get called from a transaction context but in ext4_rename() it breaks this suggestion. Although it's not a real problem, better to obey it, so this patch add inode to orphan list and stop transaction before final iput(). Signed-off-by: zhangyi (F) Link: https://lore.kernel.org/r/20210303131703.330415-2-yi.zhang@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/namei.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 4aae59d552887..1fe8370dbf0b9 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -3799,14 +3799,14 @@ static int ext4_rename(struct user_namespace *mnt_userns, struct inode *old_dir, */ retval = -ENOENT; if (!old.bh || le32_to_cpu(old.de->inode) != old.inode->i_ino) - goto end_rename; + goto release_bh; new.bh = ext4_find_entry(new.dir, &new.dentry->d_name, &new.de, &new.inlined); if (IS_ERR(new.bh)) { retval = PTR_ERR(new.bh); new.bh = NULL; - goto end_rename; + goto release_bh; } if (new.bh) { if (!new.inode) { @@ -3823,15 +3823,13 @@ static int ext4_rename(struct user_namespace *mnt_userns, struct inode *old_dir, handle = ext4_journal_start(old.dir, EXT4_HT_DIR, credits); if (IS_ERR(handle)) { retval = PTR_ERR(handle); - handle = NULL; - goto end_rename; + goto release_bh; } } else { whiteout = ext4_whiteout_for_rename(mnt_userns, &old, credits, &handle); if (IS_ERR(whiteout)) { retval = PTR_ERR(whiteout); - whiteout = NULL; - goto end_rename; + goto release_bh; } } @@ -3965,16 +3963,18 @@ end_rename: ext4_resetent(handle, &old, old.inode->i_ino, old_file_type); drop_nlink(whiteout); + ext4_orphan_add(handle, whiteout); } unlock_new_inode(whiteout); + ext4_journal_stop(handle); iput(whiteout); - + } else { + ext4_journal_stop(handle); } +release_bh: brelse(old.dir_bh); brelse(old.bh); brelse(new.bh); - if (handle) - ext4_journal_stop(handle); return retval; } -- GitLab From 6b22489911b726eebbf169caee52fea52013fbdd Mon Sep 17 00:00:00 2001 From: "zhangyi (F)" Date: Fri, 5 Mar 2021 20:05:08 +0800 Subject: [PATCH 827/839] ext4: do not try to set xattr into ea_inode if value is empty Syzbot report a warning that ext4 may create an empty ea_inode if set an empty extent attribute to a file on the file system which is no free blocks left. WARNING: CPU: 6 PID: 10667 at fs/ext4/xattr.c:1640 ext4_xattr_set_entry+0x10f8/0x1114 fs/ext4/xattr.c:1640 ... Call trace: ext4_xattr_set_entry+0x10f8/0x1114 fs/ext4/xattr.c:1640 ext4_xattr_block_set+0x1d0/0x1b1c fs/ext4/xattr.c:1942 ext4_xattr_set_handle+0x8a0/0xf1c fs/ext4/xattr.c:2390 ext4_xattr_set+0x120/0x1f0 fs/ext4/xattr.c:2491 ext4_xattr_trusted_set+0x48/0x5c fs/ext4/xattr_trusted.c:37 __vfs_setxattr+0x208/0x23c fs/xattr.c:177 ... Now, ext4 try to store extent attribute into an external inode if ext4_xattr_block_set() return -ENOSPC, but for the case of store an empty extent attribute, store the extent entry into the extent attribute block is enough. A simple reproduce below. fallocate test.img -l 1M mkfs.ext4 -F -b 2048 -O ea_inode test.img mount test.img /mnt dd if=/dev/zero of=/mnt/foo bs=2048 count=500 setfattr -n "user.test" /mnt/foo Reported-by: syzbot+98b881fdd8ebf45ab4ae@syzkaller.appspotmail.com Fixes: 9c6e7853c531 ("ext4: reserve space for xattr entries/names") Cc: stable@kernel.org Signed-off-by: zhangyi (F) Link: https://lore.kernel.org/r/20210305120508.298465-1-yi.zhang@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/xattr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 083c95126781d..6c1018223c54a 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -2404,7 +2404,7 @@ retry_inode: * external inode if possible. */ if (ext4_has_feature_ea_inode(inode->i_sb) && - !i.in_inode) { + i.value_len && !i.in_inode) { i.in_inode = 1; goto retry_inode; } -- GitLab From 7d8bd3c76da1d94b85e6c9b7007e20e980bfcfe6 Mon Sep 17 00:00:00 2001 From: Shijie Luo Date: Fri, 12 Mar 2021 01:50:51 -0500 Subject: [PATCH 828/839] ext4: fix potential error in ext4_do_update_inode If set_large_file = 1 and errors occur in ext4_handle_dirty_metadata(), the error code will be overridden, go to out_brelse to avoid this situation. Signed-off-by: Shijie Luo Link: https://lore.kernel.org/r/20210312065051.36314-1-luoshijie1@huawei.com Cc: stable@kernel.org Reviewed-by: Jan Kara Signed-off-by: Theodore Ts'o --- fs/ext4/inode.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index a79a9ea58c568..927e47db7f00b 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5026,7 +5026,7 @@ static int ext4_do_update_inode(handle_t *handle, struct ext4_inode_info *ei = EXT4_I(inode); struct buffer_head *bh = iloc->bh; struct super_block *sb = inode->i_sb; - int err = 0, rc, block; + int err = 0, block; int need_datasync = 0, set_large_file = 0; uid_t i_uid; gid_t i_gid; @@ -5138,9 +5138,9 @@ static int ext4_do_update_inode(handle_t *handle, bh->b_data); BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); - rc = ext4_handle_dirty_metadata(handle, NULL, bh); - if (!err) - err = rc; + err = ext4_handle_dirty_metadata(handle, NULL, bh); + if (err) + goto out_brelse; ext4_clear_inode_state(inode, EXT4_STATE_NEW); if (set_large_file) { BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get write access"); -- GitLab From 2a4ae3bcdf05b8639406eaa09a2939f3c6dd8e75 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 15 Mar 2021 17:59:06 +0100 Subject: [PATCH 829/839] ext4: fix timer use-after-free on failed mount When filesystem mount fails because of corrupted filesystem we first cancel the s_err_report timer reminding fs errors every day and only then we flush s_error_work. However s_error_work may report another fs error and re-arm timer thus resulting in timer use-after-free. Fix the problem by first flushing the work and only after that canceling the s_err_report timer. Reported-by: syzbot+628472a2aac693ab0fcd@syzkaller.appspotmail.com Fixes: 2d01ddc86606 ("ext4: save error info to sb through journal if available") CC: stable@vger.kernel.org Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20210315165906.2175-1-jack@suse.cz Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index a0a2568596622..b9693680463aa 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5154,8 +5154,8 @@ failed_mount_wq: failed_mount3a: ext4_es_unregister_shrinker(sbi); failed_mount3: - del_timer_sync(&sbi->s_err_report); flush_work(&sbi->s_error_work); + del_timer_sync(&sbi->s_err_report); if (sbi->s_mmp_tsk) kthread_stop(sbi->s_mmp_tsk); failed_mount2: -- GitLab From 8210bb29c1b66200cff7b25febcf6e39baf49fbf Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Tue, 16 Mar 2021 15:19:21 -0700 Subject: [PATCH 830/839] ext4: fix rename whiteout with fast commit This patch adds rename whiteout support in fast commits. Note that the whiteout object that gets created is actually char device. Which imples, the function ext4_inode_journal_mode(struct inode *inode) would return "JOURNAL_DATA" for this inode. This has a consequence in fast commit code that it will make creation of the whiteout object a fast-commit ineligible behavior and thus will fall back to full commits. With this patch, this can be observed by running fast commits with rename whiteout and seeing the stats generated by ext4_fc_stats tracepoint as follows: ext4_fc_stats: dev 254:32 fc ineligible reasons: XATTR:0, CROSS_RENAME:0, JOURNAL_FLAG_CHANGE:0, NO_MEM:0, SWAP_BOOT:0, RESIZE:0, RENAME_DIR:0, FALLOC_RANGE:0, INODE_JOURNAL_DATA:16; num_commits:6, ineligible: 6, numblks: 3 So in short, this patch guarantees that in case of rename whiteout, we fall back to full commits. Amir mentioned that instead of creating a new whiteout object for every rename, we can create a static whiteout object with irrelevant nlink. That will make fast commits to not fall back to full commit. But until this happens, this patch will ensure correctness by falling back to full commits. Fixes: 8016e29f4362 ("ext4: fast commit recovery path") Cc: stable@kernel.org Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20210316221921.1124955-1-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 2 ++ fs/ext4/fast_commit.c | 9 +++++++-- fs/ext4/namei.c | 3 +++ 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index ea3b41579f389..826a56e3bbd28 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2794,6 +2794,8 @@ void __ext4_fc_track_link(handle_t *handle, struct inode *inode, struct dentry *dentry); void ext4_fc_track_unlink(handle_t *handle, struct dentry *dentry); void ext4_fc_track_link(handle_t *handle, struct dentry *dentry); +void __ext4_fc_track_create(handle_t *handle, struct inode *inode, + struct dentry *dentry); void ext4_fc_track_create(handle_t *handle, struct dentry *dentry); void ext4_fc_track_inode(handle_t *handle, struct inode *inode); void ext4_fc_mark_ineligible(struct super_block *sb, int reason); diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 6c4f19b0a5563..7541d0b5d7069 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -513,10 +513,10 @@ void ext4_fc_track_link(handle_t *handle, struct dentry *dentry) __ext4_fc_track_link(handle, d_inode(dentry), dentry); } -void ext4_fc_track_create(handle_t *handle, struct dentry *dentry) +void __ext4_fc_track_create(handle_t *handle, struct inode *inode, + struct dentry *dentry) { struct __track_dentry_update_args args; - struct inode *inode = d_inode(dentry); int ret; args.dentry = dentry; @@ -527,6 +527,11 @@ void ext4_fc_track_create(handle_t *handle, struct dentry *dentry) trace_ext4_fc_track_create(inode, dentry, ret); } +void ext4_fc_track_create(handle_t *handle, struct dentry *dentry) +{ + __ext4_fc_track_create(handle, d_inode(dentry), dentry); +} + /* __track_fn for inode tracking */ static int __track_inode(struct inode *inode, void *arg, bool update) { diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 1fe8370dbf0b9..883e2a7cd4ab5 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -3873,6 +3873,7 @@ static int ext4_rename(struct user_namespace *mnt_userns, struct inode *old_dir, retval = ext4_mark_inode_dirty(handle, whiteout); if (unlikely(retval)) goto end_rename; + } if (!new.bh) { retval = ext4_add_entry(handle, new.dentry, old.inode); @@ -3946,6 +3947,8 @@ static int ext4_rename(struct user_namespace *mnt_userns, struct inode *old_dir, ext4_fc_track_unlink(handle, new.dentry); __ext4_fc_track_link(handle, old.inode, new.dentry); __ext4_fc_track_unlink(handle, old.inode, old.dentry); + if (whiteout) + __ext4_fc_track_create(handle, whiteout, old.dentry); } if (new.inode) { -- GitLab From 512c15ef05d73a04f1aef18a3bc61a8bb516f323 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Sun, 17 Jan 2021 00:57:32 -0800 Subject: [PATCH 831/839] ext4: stop inode update before return The inode update should be stopped before returing the error code. Signed-off-by: Pan Bian Link: https://lore.kernel.org/r/20210117085732.93788-1-bianpan2016@163.com Fixes: 8016e29f4362 ("ext4: fast commit recovery path") Cc: stable@kernel.org Reviewed-by: Harshad Shirwadkar Signed-off-by: Theodore Ts'o --- fs/ext4/inode.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 927e47db7f00b..0948a43f1b3df 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5387,8 +5387,10 @@ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, inode->i_gid = attr->ia_gid; error = ext4_mark_inode_dirty(handle, inode); ext4_journal_stop(handle); - if (unlikely(error)) + if (unlikely(error)) { + ext4_fc_stop_update(inode); return error; + } } if (attr->ia_valid & ATTR_SIZE) { -- GitLab From 64395d950bc476106b39341e42ebfd4d2eb71d2c Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 21 Mar 2021 00:45:37 -0400 Subject: [PATCH 832/839] ext4: initialize ret to suppress smatch warning Signed-off-by: Theodore Ts'o --- fs/ext4/extents.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 77c7c8a54da73..77c84d6f1af6b 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4382,7 +4382,7 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, { struct inode *inode = file_inode(file); handle_t *handle; - int ret, ret2 = 0, ret3 = 0; + int ret = 0, ret2 = 0, ret3 = 0; int retries = 0; int depth = 0; struct ext4_map_blocks map; -- GitLab From 5be28c8f85ce99ed2d329d2ad8bdd18ea19473a5 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 19 Mar 2021 19:25:13 -0600 Subject: [PATCH 833/839] signal: don't allow sending any signals to PF_IO_WORKER threads They don't take signals individually, and even if they share signals with the parent task, don't allow them to be delivered through the worker thread. Linux does allow this kind of behavior for regular threads, but it's really a compatability thing that we need not care about for the IO threads. Reported-by: Stefan Metzmacher Signed-off-by: Jens Axboe --- kernel/signal.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/signal.c b/kernel/signal.c index ba4d1ef39a9ea..11cabcf20e7a6 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -833,6 +833,9 @@ static int check_kill_permission(int sig, struct kernel_siginfo *info, if (!valid_signal(sig)) return -EINVAL; + /* PF_IO_WORKER threads don't take any signals */ + if (t->flags & PF_IO_WORKER) + return -ESRCH; if (!si_fromuser(info)) return 0; -- GitLab From 4db4b1a0d1779dc159f7b87feb97030ec0b12597 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 21 Mar 2021 09:37:48 -0600 Subject: [PATCH 834/839] signal: don't allow STOP on PF_IO_WORKER threads Just like we don't allow normal signals to IO threads, don't deliver a STOP to a task that has PF_IO_WORKER set. The IO threads don't take signals in general, and have no means of flushing out a stop either. Longer term, we may want to look into allowing stop of these threads, as it relates to eg process freezing. For now, this prevents a spin issue if a SIGSTOP is delivered to the parent task. Reported-by: Stefan Metzmacher Signed-off-by: Jens Axboe Signed-off-by: "Eric W. Biederman" --- kernel/signal.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/signal.c b/kernel/signal.c index 11cabcf20e7a6..f2a1b898da29f 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -288,7 +288,8 @@ bool task_set_jobctl_pending(struct task_struct *task, unsigned long mask) JOBCTL_STOP_SIGMASK | JOBCTL_TRAPPING)); BUG_ON((mask & JOBCTL_TRAPPING) && !(mask & JOBCTL_PENDING_MASK)); - if (unlikely(fatal_signal_pending(task) || (task->flags & PF_EXITING))) + if (unlikely(fatal_signal_pending(task) || + (task->flags & (PF_EXITING | PF_IO_WORKER)))) return false; if (mask & JOBCTL_STOP_SIGMASK) -- GitLab From 00ddff431a458bbf143ea7c4c42d022676da1b17 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 21 Mar 2021 07:06:56 -0600 Subject: [PATCH 835/839] io-wq: ensure task is running before processing task_work Mark the current task as running if we need to run task_work from the io-wq threads as part of work handling. If that is the case, then return as such so that the caller can appropriately loop back and reset if it was part of a going-to-sleep flush. Fixes: 3bfe6106693b ("io-wq: fork worker threads from original task") Signed-off-by: Jens Axboe --- fs/io-wq.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/io-wq.c b/fs/io-wq.c index e05f996d088f1..3dc10bfd8c3ba 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -386,13 +386,16 @@ static struct io_wq_work *io_get_next_work(struct io_wqe *wqe) return NULL; } -static void io_flush_signals(void) +static bool io_flush_signals(void) { if (unlikely(test_tsk_thread_flag(current, TIF_NOTIFY_SIGNAL))) { + __set_current_state(TASK_RUNNING); if (current->task_works) task_work_run(); clear_tsk_thread_flag(current, TIF_NOTIFY_SIGNAL); + return true; } + return false; } static void io_assign_current_work(struct io_worker *worker, @@ -499,7 +502,8 @@ loop: } __io_worker_idle(wqe, worker); raw_spin_unlock_irq(&wqe->lock); - io_flush_signals(); + if (io_flush_signals()) + continue; ret = schedule_timeout(WORKER_IDLE_TIMEOUT); if (try_to_freeze() || ret) continue; -- GitLab From 0031275d119efe16711cd93519b595e6f9b4b330 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Sat, 20 Mar 2021 20:33:36 +0100 Subject: [PATCH 836/839] io_uring: call req_set_fail_links() on short send[msg]()/recv[msg]() with MSG_WAITALL Without that it's not safe to use them in a linked combination with others. Now combinations like IORING_OP_SENDMSG followed by IORING_OP_SPLICE should be possible. We already handle short reads and writes for the following opcodes: - IORING_OP_READV - IORING_OP_READ_FIXED - IORING_OP_READ - IORING_OP_WRITEV - IORING_OP_WRITE_FIXED - IORING_OP_WRITE - IORING_OP_SPLICE - IORING_OP_TEE Now we have it for these as well: - IORING_OP_SENDMSG - IORING_OP_SEND - IORING_OP_RECVMSG - IORING_OP_RECV For IORING_OP_RECVMSG we also check for the MSG_TRUNC and MSG_CTRUNC flags in order to call req_set_fail_links(). There might be applications arround depending on the behavior that even short send[msg]()/recv[msg]() retuns continue an IOSQE_IO_LINK chain. It's very unlikely that such applications pass in MSG_WAITALL, which is only defined in 'man 2 recvmsg', but not in 'man 2 sendmsg'. It's expected that the low level sock_sendmsg() call just ignores MSG_WAITALL, as MSG_ZEROCOPY is also ignored without explicitly set SO_ZEROCOPY. We also expect the caller to know about the implicit truncation to MAX_RW_COUNT, which we don't detect. cc: netdev@vger.kernel.org Link: https://lore.kernel.org/r/c4e1a4cc0d905314f4d5dc567e65a7b09621aab3.1615908477.git.metze@samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Jens Axboe --- fs/io_uring.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index ea2d3e120555e..543551d70327f 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -4386,6 +4386,7 @@ static int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) struct io_async_msghdr iomsg, *kmsg; struct socket *sock; unsigned flags; + int min_ret = 0; int ret; sock = sock_from_file(req->file); @@ -4406,6 +4407,9 @@ static int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) else if (issue_flags & IO_URING_F_NONBLOCK) flags |= MSG_DONTWAIT; + if (flags & MSG_WAITALL) + min_ret = iov_iter_count(&kmsg->msg.msg_iter); + ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags); if ((issue_flags & IO_URING_F_NONBLOCK) && ret == -EAGAIN) return io_setup_async_msg(req, kmsg); @@ -4416,7 +4420,7 @@ static int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) if (kmsg->free_iov) kfree(kmsg->free_iov); req->flags &= ~REQ_F_NEED_CLEANUP; - if (ret < 0) + if (ret < min_ret) req_set_fail_links(req); __io_req_complete(req, issue_flags, ret, 0); return 0; @@ -4429,6 +4433,7 @@ static int io_send(struct io_kiocb *req, unsigned int issue_flags) struct iovec iov; struct socket *sock; unsigned flags; + int min_ret = 0; int ret; sock = sock_from_file(req->file); @@ -4450,6 +4455,9 @@ static int io_send(struct io_kiocb *req, unsigned int issue_flags) else if (issue_flags & IO_URING_F_NONBLOCK) flags |= MSG_DONTWAIT; + if (flags & MSG_WAITALL) + min_ret = iov_iter_count(&msg.msg_iter); + msg.msg_flags = flags; ret = sock_sendmsg(sock, &msg); if ((issue_flags & IO_URING_F_NONBLOCK) && ret == -EAGAIN) @@ -4457,7 +4465,7 @@ static int io_send(struct io_kiocb *req, unsigned int issue_flags) if (ret == -ERESTARTSYS) ret = -EINTR; - if (ret < 0) + if (ret < min_ret) req_set_fail_links(req); __io_req_complete(req, issue_flags, ret, 0); return 0; @@ -4609,6 +4617,7 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) struct socket *sock; struct io_buffer *kbuf; unsigned flags; + int min_ret = 0; int ret, cflags = 0; bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; @@ -4640,6 +4649,9 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) else if (force_nonblock) flags |= MSG_DONTWAIT; + if (flags & MSG_WAITALL) + min_ret = iov_iter_count(&kmsg->msg.msg_iter); + ret = __sys_recvmsg_sock(sock, &kmsg->msg, req->sr_msg.umsg, kmsg->uaddr, flags); if (force_nonblock && ret == -EAGAIN) @@ -4653,7 +4665,7 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) if (kmsg->free_iov) kfree(kmsg->free_iov); req->flags &= ~REQ_F_NEED_CLEANUP; - if (ret < 0) + if (ret < min_ret || ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC)))) req_set_fail_links(req); __io_req_complete(req, issue_flags, ret, cflags); return 0; @@ -4668,6 +4680,7 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags) struct socket *sock; struct iovec iov; unsigned flags; + int min_ret = 0; int ret, cflags = 0; bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; @@ -4699,6 +4712,9 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags) else if (force_nonblock) flags |= MSG_DONTWAIT; + if (flags & MSG_WAITALL) + min_ret = iov_iter_count(&msg.msg_iter); + ret = sock_recvmsg(sock, &msg, flags); if (force_nonblock && ret == -EAGAIN) return -EAGAIN; @@ -4707,7 +4723,7 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags) out_free: if (req->flags & REQ_F_BUFFER_SELECTED) cflags = io_put_recv_kbuf(req); - if (ret < 0) + if (ret < min_ret || ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC)))) req_set_fail_links(req); __io_req_complete(req, issue_flags, ret, cflags); return 0; -- GitLab From 0d02ec6b3136c73c09e7859f0d0e4e2c4c07b49b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 21 Mar 2021 14:56:43 -0700 Subject: [PATCH 837/839] Linux 5.12-rc4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index a28bb374663d2..d4784d1811231 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 12 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Frozen Wasteland # *DOCUMENTATION* -- GitLab From e3baacf54275647a018ee35bff3bc775a8a2a01a Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Mon, 29 Mar 2021 15:57:33 +0300 Subject: [PATCH 838/839] regulator: helpers: Export helper voltage listing Some drivers need to translate voltage values to selectors prior regulator registration. Currently a regulator_desc based list_voltages helper is only exported for regulators using the linear_ranges. Export similar helper also for regulators using simple linear mapping. Signed-off-by: Matti Vaittinen Link: https://lore.kernel.org/r/1200ef7a50c84327ada019b85f6527b4fc9b5ce1.1617020713.git.matti.vaittinen@fi.rohmeurope.com Signed-off-by: Mark Brown --- drivers/regulator/helpers.c | 36 +++++++++++++++++++++++++------- include/linux/regulator/driver.h | 2 ++ 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/drivers/regulator/helpers.c b/drivers/regulator/helpers.c index f42b394a0c46b..3e19ecbf72673 100644 --- a/drivers/regulator/helpers.c +++ b/drivers/regulator/helpers.c @@ -508,6 +508,33 @@ int regulator_map_voltage_pickable_linear_range(struct regulator_dev *rdev, } EXPORT_SYMBOL_GPL(regulator_map_voltage_pickable_linear_range); +/** + * regulator_desc_list_voltage_linear - List voltages with simple calculation + * + * @desc: Regulator desc for regulator which volatges are to be listed + * @selector: Selector to convert into a voltage + * + * Regulators with a simple linear mapping between voltages and + * selectors can set min_uV and uV_step in the regulator descriptor + * and then use this function prior regulator registration to list + * the voltages. This is useful when voltages need to be listed during + * device-tree parsing. + */ +int regulator_desc_list_voltage_linear(const struct regulator_desc *desc, + unsigned int selector) +{ + if (selector >= desc->n_voltages) + return -EINVAL; + + if (selector < desc->linear_min_sel) + return 0; + + selector -= desc->linear_min_sel; + + return desc->min_uV + (desc->uV_step * selector); +} +EXPORT_SYMBOL_GPL(regulator_desc_list_voltage_linear); + /** * regulator_list_voltage_linear - List voltages with simple calculation * @@ -521,14 +548,7 @@ EXPORT_SYMBOL_GPL(regulator_map_voltage_pickable_linear_range); int regulator_list_voltage_linear(struct regulator_dev *rdev, unsigned int selector) { - if (selector >= rdev->desc->n_voltages) - return -EINVAL; - if (selector < rdev->desc->linear_min_sel) - return 0; - - selector -= rdev->desc->linear_min_sel; - - return rdev->desc->min_uV + (rdev->desc->uV_step * selector); + return regulator_desc_list_voltage_linear(rdev->desc, selector); } EXPORT_SYMBOL_GPL(regulator_list_voltage_linear); diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index d7c77ee370f37..39a5401116450 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -543,4 +543,6 @@ void *regulator_get_init_drvdata(struct regulator_init_data *reg_init_data); int regulator_desc_list_voltage_linear_range(const struct regulator_desc *desc, unsigned int selector); +int regulator_desc_list_voltage_linear(const struct regulator_desc *desc, + unsigned int selector); #endif -- GitLab From fb8fee9efdcf084d9e31ba14cc4734d97e5dd972 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Mon, 29 Mar 2021 15:59:04 +0300 Subject: [PATCH 839/839] regulator: Add regmap helper for ramp-delay setting Quite a few regulator ICs do support setting ramp-delay by writing a value matching the delay to a ramp-delay register. Provide a simple helper for table-based delay setting. Signed-off-by: Matti Vaittinen Link: https://lore.kernel.org/r/f101f1db564cf32cb58719c77af0b00d7236bb89.1617020713.git.matti.vaittinen@fi.rohmeurope.com Signed-off-by: Mark Brown --- drivers/regulator/helpers.c | 65 ++++++++++++++++++++++++++++++++ include/linux/regulator/driver.h | 5 +++ 2 files changed, 70 insertions(+) diff --git a/drivers/regulator/helpers.c b/drivers/regulator/helpers.c index 3e19ecbf72673..0e16e31c968f1 100644 --- a/drivers/regulator/helpers.c +++ b/drivers/regulator/helpers.c @@ -901,3 +901,68 @@ bool regulator_is_equal(struct regulator *reg1, struct regulator *reg2) return reg1->rdev == reg2->rdev; } EXPORT_SYMBOL_GPL(regulator_is_equal); + +static int find_closest_bigger(unsigned int target, const unsigned int *table, + unsigned int num_sel, unsigned int *sel) +{ + unsigned int s, tmp, max, maxsel = 0; + bool found = false; + + max = table[0]; + + for (s = 0; s < num_sel; s++) { + if (table[s] > max) { + max = table[s]; + maxsel = s; + } + if (table[s] >= target) { + if (!found || table[s] - target < tmp - target) { + tmp = table[s]; + *sel = s; + found = true; + if (tmp == target) + break; + } + } + } + + if (!found) { + *sel = maxsel; + return -EINVAL; + } + + return 0; +} + +/** + * regulator_set_ramp_delay_regmap - set_ramp_delay() helper + * + * @rdev: regulator to operate on + * + * Regulators that use regmap for their register I/O can set the ramp_reg + * and ramp_mask fields in their descriptor and then use this as their + * set_ramp_delay operation, saving some code. + */ +int regulator_set_ramp_delay_regmap(struct regulator_dev *rdev, int ramp_delay) +{ + int ret; + unsigned int sel; + + if (!rdev->desc->n_ramp_values) + return -EINVAL; + + ret = find_closest_bigger(ramp_delay, rdev->desc->ramp_delay_table, + rdev->desc->n_ramp_values, &sel); + + if (ret) { + dev_warn(rdev_get_dev(rdev), + "Can't set ramp-delay %u, setting %u\n", ramp_delay, + rdev->desc->ramp_delay_table[sel]); + } + + sel <<= ffs(rdev->desc->ramp_mask) - 1; + + return regmap_update_bits(rdev->regmap, rdev->desc->ramp_reg, + rdev->desc->ramp_mask, sel); +} +EXPORT_SYMBOL_GPL(regulator_set_ramp_delay_regmap); diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index 39a5401116450..597ed117086f9 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -373,6 +373,10 @@ struct regulator_desc { unsigned int pull_down_reg; unsigned int pull_down_mask; unsigned int pull_down_val_on; + unsigned int ramp_reg; + unsigned int ramp_mask; + const unsigned int *ramp_delay_table; + unsigned int n_ramp_values; unsigned int enable_time; @@ -535,6 +539,7 @@ int regulator_set_current_limit_regmap(struct regulator_dev *rdev, int min_uA, int max_uA); int regulator_get_current_limit_regmap(struct regulator_dev *rdev); void *regulator_get_init_drvdata(struct regulator_init_data *reg_init_data); +int regulator_set_ramp_delay_regmap(struct regulator_dev *rdev, int ramp_delay); /* * Helper functions intended to be used by regulator drivers prior registering -- GitLab