Commit 8318f7c2 authored by Guo Ren's avatar Guo Ren
Browse files

csky: optimize memcpy_{from,to}io() and memset_io()



Optimize memcpy_{from,to}io() and memset_io() by transferring in
64 bit as much as possible with minimized barrier usage.  This
simplest optimization brings faster throughput compare to current
byte-by-byte read and write with barrier in the loop. Code's
skeleton is taken from the powerpc & arm64.

Signed-off-by: default avatarGuo Ren <guoren@linux.alibaba.com>
Signed-off-by: default avatarGuo Ren <guoren@kernel.org>
parent e4df2d5e
Loading
Loading
Loading
Loading
+11 −0
Original line number Diff line number Diff line
@@ -31,6 +31,17 @@
#define writel(v,c)		({ wmb(); writel_relaxed((v),(c)); mb(); })
#endif

/*
 * String version of I/O memory access operations.
 */
extern void __memcpy_fromio(void *, const volatile void __iomem *, size_t);
extern void __memcpy_toio(volatile void __iomem *, const void *, size_t);
extern void __memset_io(volatile void __iomem *, int, size_t);

#define memset_io(c,v,l)        __memset_io((c),(v),(l))
#define memcpy_fromio(a,c,l)    __memcpy_fromio((a),(c),(l))
#define memcpy_toio(c,a,l)      __memcpy_toio((c),(a),(l))

/*
 * I/O memory mapping functions.
 */
+1 −1
Original line number Diff line number Diff line
@@ -2,7 +2,7 @@
extra-y := head.o vmlinux.lds

obj-y += entry.o atomic.o signal.o traps.o irq.o time.o vdso.o vdso/
obj-y += power.o syscall.o syscall_table.o setup.o
obj-y += power.o syscall.o syscall_table.o setup.o io.o
obj-y += process.o cpu-probe.o ptrace.o stacktrace.o
obj-y += probes/

arch/csky/kernel/io.c

0 → 100644
+91 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0

#include <linux/export.h>
#include <linux/types.h>
#include <linux/io.h>

/*
 * Copy data from IO memory space to "real" memory space.
 */
void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t count)
{
	while (count && !IS_ALIGNED((unsigned long)from, 4)) {
		*(u8 *)to = __raw_readb(from);
		from++;
		to++;
		count--;
	}

	while (count >= 4) {
		*(u32 *)to = __raw_readl(from);
		from += 4;
		to += 4;
		count -= 4;
	}

	while (count) {
		*(u8 *)to = __raw_readb(from);
		from++;
		to++;
		count--;
	}
}
EXPORT_SYMBOL(__memcpy_fromio);

/*
 * Copy data from "real" memory space to IO memory space.
 */
void __memcpy_toio(volatile void __iomem *to, const void *from, size_t count)
{
	while (count && !IS_ALIGNED((unsigned long)to, 4)) {
		__raw_writeb(*(u8 *)from, to);
		from++;
		to++;
		count--;
	}

	while (count >= 4) {
		__raw_writel(*(u32 *)from, to);
		from += 4;
		to += 4;
		count -= 4;
	}

	while (count) {
		__raw_writeb(*(u8 *)from, to);
		from++;
		to++;
		count--;
	}
}
EXPORT_SYMBOL(__memcpy_toio);

/*
 * "memset" on IO memory space.
 */
void __memset_io(volatile void __iomem *dst, int c, size_t count)
{
	u32 qc = (u8)c;

	qc |= qc << 8;
	qc |= qc << 16;

	while (count && !IS_ALIGNED((unsigned long)dst, 4)) {
		__raw_writeb(c, dst);
		dst++;
		count--;
	}

	while (count >= 4) {
		__raw_writel(qc, dst);
		dst += 4;
		count -= 4;
	}

	while (count) {
		__raw_writeb(c, dst);
		dst++;
		count--;
	}
}
EXPORT_SYMBOL(__memset_io);