Loading arch/s390/include/asm/xor.h +20 −1 Original line number Diff line number Diff line #include <asm-generic/xor.h> /* * Optimited xor routines * * Copyright IBM Corp. 2016 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> */ #ifndef _ASM_S390_XOR_H #define _ASM_S390_XOR_H extern struct xor_block_template xor_block_xc; #undef XOR_TRY_TEMPLATES #define XOR_TRY_TEMPLATES \ do { \ xor_speed(&xor_block_xc); \ } while (0) #define XOR_SELECT_TEMPLATE(FASTEST) (&xor_block_xc) #endif /* _ASM_S390_XOR_H */ arch/s390/lib/Makefile +1 −1 Original line number Diff line number Diff line Loading @@ -3,7 +3,7 @@ # lib-y += delay.o string.o uaccess.o find.o obj-y += mem.o obj-y += mem.o xor.o lib-$(CONFIG_SMP) += spinlock.o lib-$(CONFIG_KPROBES) += probes.o lib-$(CONFIG_UPROBES) += probes.o arch/s390/lib/xor.c 0 → 100644 +134 −0 Original line number Diff line number Diff line /* * Optimized xor_block operation for RAID4/5 * * Copyright IBM Corp. 2016 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> */ #include <linux/types.h> #include <linux/module.h> #include <linux/raid/xor.h> static void xor_xc_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) { asm volatile( " larl 1,2f\n" " aghi %0,-1\n" " jm 3f\n" " srlg 0,%0,8\n" " ltgr 0,0\n" " jz 1f\n" "0: xc 0(256,%1),0(%2)\n" " la %1,256(%1)\n" " la %2,256(%2)\n" " brctg 0,0b\n" "1: ex %0,0(1)\n" " j 3f\n" "2: xc 0(1,%1),0(%2)\n" "3:\n" : : "d" (bytes), "a" (p1), "a" (p2) : "0", "1", "cc", "memory"); } static void xor_xc_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, unsigned long *p3) { asm volatile( " larl 1,2f\n" " aghi %0,-1\n" " jm 3f\n" " srlg 0,%0,8\n" " ltgr 0,0\n" " jz 1f\n" "0: xc 0(256,%1),0(%2)\n" " xc 0(256,%1),0(%3)\n" " la %1,256(%1)\n" " la %2,256(%2)\n" " la %3,256(%3)\n" " brctg 0,0b\n" "1: ex %0,0(1)\n" " ex %0,6(1)\n" " j 3f\n" "2: xc 0(1,%1),0(%2)\n" " xc 0(1,%1),0(%3)\n" "3:\n" : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3) : : "0", "1", "cc", "memory"); } static void xor_xc_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, unsigned long *p3, unsigned long *p4) { asm volatile( " larl 1,2f\n" " aghi %0,-1\n" " jm 3f\n" " srlg 0,%0,8\n" " ltgr 0,0\n" " jz 1f\n" "0: xc 0(256,%1),0(%2)\n" " xc 0(256,%1),0(%3)\n" " xc 0(256,%1),0(%4)\n" " la %1,256(%1)\n" " la %2,256(%2)\n" " la %3,256(%3)\n" " la %4,256(%4)\n" " brctg 0,0b\n" "1: ex %0,0(1)\n" " ex %0,6(1)\n" " ex %0,12(1)\n" " j 3f\n" "2: xc 0(1,%1),0(%2)\n" " xc 0(1,%1),0(%3)\n" " xc 0(1,%1),0(%4)\n" "3:\n" : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4) : : "0", "1", "cc", "memory"); } static void xor_xc_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, unsigned long *p3, unsigned long *p4, unsigned long *p5) { /* Get around a gcc oddity */ register unsigned long *reg7 asm ("7") = p5; asm volatile( " larl 1,2f\n" " aghi %0,-1\n" " jm 3f\n" " srlg 0,%0,8\n" " ltgr 0,0\n" " jz 1f\n" "0: xc 0(256,%1),0(%2)\n" " xc 0(256,%1),0(%3)\n" " xc 0(256,%1),0(%4)\n" " xc 0(256,%1),0(%5)\n" " la %1,256(%1)\n" " la %2,256(%2)\n" " la %3,256(%3)\n" " la %4,256(%4)\n" " la %5,256(%5)\n" " brctg 0,0b\n" "1: ex %0,0(1)\n" " ex %0,6(1)\n" " ex %0,12(1)\n" " ex %0,18(1)\n" " j 3f\n" "2: xc 0(1,%1),0(%2)\n" " xc 0(1,%1),0(%3)\n" " xc 0(1,%1),0(%4)\n" " xc 0(1,%1),0(%5)\n" "3:\n" : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4), "+a" (reg7) : : "0", "1", "cc", "memory"); } struct xor_block_template xor_block_xc = { .name = "xc", .do_2 = xor_xc_2, .do_3 = xor_xc_3, .do_4 = xor_xc_4, .do_5 = xor_xc_5, }; EXPORT_SYMBOL(xor_block_xc); Loading
arch/s390/include/asm/xor.h +20 −1 Original line number Diff line number Diff line #include <asm-generic/xor.h> /* * Optimited xor routines * * Copyright IBM Corp. 2016 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> */ #ifndef _ASM_S390_XOR_H #define _ASM_S390_XOR_H extern struct xor_block_template xor_block_xc; #undef XOR_TRY_TEMPLATES #define XOR_TRY_TEMPLATES \ do { \ xor_speed(&xor_block_xc); \ } while (0) #define XOR_SELECT_TEMPLATE(FASTEST) (&xor_block_xc) #endif /* _ASM_S390_XOR_H */
arch/s390/lib/Makefile +1 −1 Original line number Diff line number Diff line Loading @@ -3,7 +3,7 @@ # lib-y += delay.o string.o uaccess.o find.o obj-y += mem.o obj-y += mem.o xor.o lib-$(CONFIG_SMP) += spinlock.o lib-$(CONFIG_KPROBES) += probes.o lib-$(CONFIG_UPROBES) += probes.o
arch/s390/lib/xor.c 0 → 100644 +134 −0 Original line number Diff line number Diff line /* * Optimized xor_block operation for RAID4/5 * * Copyright IBM Corp. 2016 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> */ #include <linux/types.h> #include <linux/module.h> #include <linux/raid/xor.h> static void xor_xc_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) { asm volatile( " larl 1,2f\n" " aghi %0,-1\n" " jm 3f\n" " srlg 0,%0,8\n" " ltgr 0,0\n" " jz 1f\n" "0: xc 0(256,%1),0(%2)\n" " la %1,256(%1)\n" " la %2,256(%2)\n" " brctg 0,0b\n" "1: ex %0,0(1)\n" " j 3f\n" "2: xc 0(1,%1),0(%2)\n" "3:\n" : : "d" (bytes), "a" (p1), "a" (p2) : "0", "1", "cc", "memory"); } static void xor_xc_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, unsigned long *p3) { asm volatile( " larl 1,2f\n" " aghi %0,-1\n" " jm 3f\n" " srlg 0,%0,8\n" " ltgr 0,0\n" " jz 1f\n" "0: xc 0(256,%1),0(%2)\n" " xc 0(256,%1),0(%3)\n" " la %1,256(%1)\n" " la %2,256(%2)\n" " la %3,256(%3)\n" " brctg 0,0b\n" "1: ex %0,0(1)\n" " ex %0,6(1)\n" " j 3f\n" "2: xc 0(1,%1),0(%2)\n" " xc 0(1,%1),0(%3)\n" "3:\n" : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3) : : "0", "1", "cc", "memory"); } static void xor_xc_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, unsigned long *p3, unsigned long *p4) { asm volatile( " larl 1,2f\n" " aghi %0,-1\n" " jm 3f\n" " srlg 0,%0,8\n" " ltgr 0,0\n" " jz 1f\n" "0: xc 0(256,%1),0(%2)\n" " xc 0(256,%1),0(%3)\n" " xc 0(256,%1),0(%4)\n" " la %1,256(%1)\n" " la %2,256(%2)\n" " la %3,256(%3)\n" " la %4,256(%4)\n" " brctg 0,0b\n" "1: ex %0,0(1)\n" " ex %0,6(1)\n" " ex %0,12(1)\n" " j 3f\n" "2: xc 0(1,%1),0(%2)\n" " xc 0(1,%1),0(%3)\n" " xc 0(1,%1),0(%4)\n" "3:\n" : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4) : : "0", "1", "cc", "memory"); } static void xor_xc_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, unsigned long *p3, unsigned long *p4, unsigned long *p5) { /* Get around a gcc oddity */ register unsigned long *reg7 asm ("7") = p5; asm volatile( " larl 1,2f\n" " aghi %0,-1\n" " jm 3f\n" " srlg 0,%0,8\n" " ltgr 0,0\n" " jz 1f\n" "0: xc 0(256,%1),0(%2)\n" " xc 0(256,%1),0(%3)\n" " xc 0(256,%1),0(%4)\n" " xc 0(256,%1),0(%5)\n" " la %1,256(%1)\n" " la %2,256(%2)\n" " la %3,256(%3)\n" " la %4,256(%4)\n" " la %5,256(%5)\n" " brctg 0,0b\n" "1: ex %0,0(1)\n" " ex %0,6(1)\n" " ex %0,12(1)\n" " ex %0,18(1)\n" " j 3f\n" "2: xc 0(1,%1),0(%2)\n" " xc 0(1,%1),0(%3)\n" " xc 0(1,%1),0(%4)\n" " xc 0(1,%1),0(%5)\n" "3:\n" : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4), "+a" (reg7) : : "0", "1", "cc", "memory"); } struct xor_block_template xor_block_xc = { .name = "xc", .do_2 = xor_xc_2, .do_3 = xor_xc_3, .do_4 = xor_xc_4, .do_5 = xor_xc_5, }; EXPORT_SYMBOL(xor_block_xc);