crypto: arm64/crc-t10dif - move NEON yield to C code (fc754c02) · Commits · EulixOS / Software / Kernel

arch/arm64/crypto/crct10dif-ce-core.S

+11 −32

Original line number	Diff line number	Diff line
		@@ -68,10 +68,10 @@
		.text
		.arch armv8-a+crypto

		init_crc .req w19
		buf .req x20
		len .req x21
		fold_consts_ptr .req x22
		init_crc .req w0
		buf .req x1
		len .req x2
		fold_consts_ptr .req x3

		fold_consts .req v10

		@@ -257,12 +257,6 @@ CPU_LE( ext v12.16b, v12.16b, v12.16b, #8 )
		.endm

		.macro crc_t10dif_pmull, p
		frame_push 4, 128

		mov init_crc, w0
		mov buf, x1
		mov len, x2

		__pmull_init_\p

		// For sizes less than 256 bytes, we can't fold 128 bytes at a time.
		@@ -317,26 +311,7 @@ CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 )
		fold_32_bytes \p, v6, v7

		subs len, len, #128
		b.lt .Lfold_128_bytes_loop_done_\@

		if_will_cond_yield_neon
		stp q0, q1, [sp, #.Lframe_local_offset]
		stp q2, q3, [sp, #.Lframe_local_offset + 32]
		stp q4, q5, [sp, #.Lframe_local_offset + 64]
		stp q6, q7, [sp, #.Lframe_local_offset + 96]
		do_cond_yield_neon
		ldp q0, q1, [sp, #.Lframe_local_offset]
		ldp q2, q3, [sp, #.Lframe_local_offset + 32]
		ldp q4, q5, [sp, #.Lframe_local_offset + 64]
		ldp q6, q7, [sp, #.Lframe_local_offset + 96]
		ld1 {fold_consts.2d}, [fold_consts_ptr]
		__pmull_init_\p
		__pmull_pre_\p fold_consts
		endif_yield_neon

		b .Lfold_128_bytes_loop_\@

		.Lfold_128_bytes_loop_done_\@:
		b.ge .Lfold_128_bytes_loop_\@

		// Now fold the 112 bytes in v0-v6 into the 16 bytes in v7.

		@@ -453,7 +428,9 @@ CPU_LE( ext v0.16b, v0.16b, v0.16b, #8 )
		// Final CRC value (x^16 * M(x)) mod G(x) is in low 16 bits of v0.

		umov w0, v0.h[0]
		frame_pop
		.ifc \p, p8
		ldp x29, x30, [sp], #16
		.endif
		ret

		.Lless_than_256_bytes_\@:
		@@ -489,6 +466,8 @@ CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 )
		// Assumes len >= 16.
		//
		SYM_FUNC_START(crc_t10dif_pmull_p8)
		stp x29, x30, [sp, #-16]!
		mov x29, sp
		crc_t10dif_pmull p8
		SYM_FUNC_END(crc_t10dif_pmull_p8)

arch/arm64/crypto/crct10dif-ce-glue.c

+24 −6

Original line number	Diff line number	Diff line
		@@ -37,9 +37,18 @@ static int crct10dif_update_pmull_p8(struct shash_desc desc, const u8 data,
		u16 *crc = shash_desc_ctx(desc);

		if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE && crypto_simd_usable()) {
		do {
		unsigned int chunk = length;

		if (chunk > SZ_4K + CRC_T10DIF_PMULL_CHUNK_SIZE)
		chunk = SZ_4K;

		kernel_neon_begin();
		crc = crc_t10dif_pmull_p8(crc, data, length);
		crc = crc_t10dif_pmull_p8(crc, data, chunk);
		kernel_neon_end();
		data += chunk;
		length -= chunk;
		} while (length);
		} else {
		crc = crc_t10dif_generic(crc, data, length);
		}
		@@ -53,9 +62,18 @@ static int crct10dif_update_pmull_p64(struct shash_desc desc, const u8 data,
		u16 *crc = shash_desc_ctx(desc);

		if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE && crypto_simd_usable()) {
		do {
		unsigned int chunk = length;

		if (chunk > SZ_4K + CRC_T10DIF_PMULL_CHUNK_SIZE)
		chunk = SZ_4K;

		kernel_neon_begin();
		crc = crc_t10dif_pmull_p64(crc, data, length);
		crc = crc_t10dif_pmull_p64(crc, data, chunk);
		kernel_neon_end();
		data += chunk;
		length -= chunk;
		} while (length);
		} else {
		crc = crc_t10dif_generic(crc, data, length);
		}