Commit f0070f4a authored by Ard Biesheuvel's avatar Ard Biesheuvel Committed by Herbert Xu
Browse files

crypto: arm64/aes-ce-mac - simplify NEON yield

parent f5943ef4
Loading
Loading
Loading
Loading
+14 −7
Original line number Diff line number Diff line
@@ -105,7 +105,7 @@ asmlinkage void aes_essiv_cbc_decrypt(u8 out[], u8 const in[], u32 const rk1[],
				      int rounds, int blocks, u8 iv[],
				      u32 const rk2[]);

asmlinkage void aes_mac_update(u8 const in[], u32 const rk[], int rounds,
asmlinkage int aes_mac_update(u8 const in[], u32 const rk[], int rounds,
			      int blocks, u8 dg[], int enc_before,
			      int enc_after);

@@ -856,10 +856,17 @@ static void mac_do_update(struct crypto_aes_ctx *ctx, u8 const in[], int blocks,
	int rounds = 6 + ctx->key_length / 4;

	if (crypto_simd_usable()) {
		int rem;

		do {
			kernel_neon_begin();
		aes_mac_update(in, ctx->key_enc, rounds, blocks, dg, enc_before,
			       enc_after);
			rem = aes_mac_update(in, ctx->key_enc, rounds, blocks,
					     dg, enc_before, enc_after);
			kernel_neon_end();
			in += (blocks - rem) * AES_BLOCK_SIZE;
			blocks = rem;
			enc_before = 0;
		} while (blocks);
	} else {
		if (enc_before)
			aes_encrypt(ctx, dg, dg);
+19 −33
Original line number Diff line number Diff line
@@ -678,61 +678,47 @@ AES_FUNC_END(aes_xts_decrypt)
	 *		  int blocks, u8 dg[], int enc_before, int enc_after)
	 */
AES_FUNC_START(aes_mac_update)
	frame_push	6

	mov		x19, x0
	mov		x20, x1
	mov		x21, x2
	mov		x22, x3
	mov		x23, x4
	mov		x24, x6

	ld1		{v0.16b}, [x23]			/* get dg */
	ld1		{v0.16b}, [x4]			/* get dg */
	enc_prepare	w2, x1, x7
	cbz		w5, .Lmacloop4x

	encrypt_block	v0, w2, x1, x7, w8

.Lmacloop4x:
	subs		w22, w22, #4
	subs		w3, w3, #4
	bmi		.Lmac1x
	ld1		{v1.16b-v4.16b}, [x19], #64	/* get next pt block */
	ld1		{v1.16b-v4.16b}, [x0], #64	/* get next pt block */
	eor		v0.16b, v0.16b, v1.16b		/* ..and xor with dg */
	encrypt_block	v0, w21, x20, x7, w8
	encrypt_block	v0, w2, x1, x7, w8
	eor		v0.16b, v0.16b, v2.16b
	encrypt_block	v0, w21, x20, x7, w8
	encrypt_block	v0, w2, x1, x7, w8
	eor		v0.16b, v0.16b, v3.16b
	encrypt_block	v0, w21, x20, x7, w8
	encrypt_block	v0, w2, x1, x7, w8
	eor		v0.16b, v0.16b, v4.16b
	cmp		w22, wzr
	csinv		x5, x24, xzr, eq
	cmp		w3, wzr
	csinv		x5, x6, xzr, eq
	cbz		w5, .Lmacout
	encrypt_block	v0, w21, x20, x7, w8
	st1		{v0.16b}, [x23]			/* return dg */
	cond_yield_neon	.Lmacrestart
	encrypt_block	v0, w2, x1, x7, w8
	st1		{v0.16b}, [x4]			/* return dg */
	cond_yield	.Lmacout, x7
	b		.Lmacloop4x
.Lmac1x:
	add		w22, w22, #4
	add		w3, w3, #4
.Lmacloop:
	cbz		w22, .Lmacout
	ld1		{v1.16b}, [x19], #16		/* get next pt block */
	cbz		w3, .Lmacout
	ld1		{v1.16b}, [x0], #16		/* get next pt block */
	eor		v0.16b, v0.16b, v1.16b		/* ..and xor with dg */

	subs		w22, w22, #1
	csinv		x5, x24, xzr, eq
	subs		w3, w3, #1
	csinv		x5, x6, xzr, eq
	cbz		w5, .Lmacout

.Lmacenc:
	encrypt_block	v0, w21, x20, x7, w8
	encrypt_block	v0, w2, x1, x7, w8
	b		.Lmacloop

.Lmacout:
	st1		{v0.16b}, [x23]			/* return dg */
	frame_pop
	st1		{v0.16b}, [x4]			/* return dg */
	mov		w0, w3
	ret

.Lmacrestart:
	ld1		{v0.16b}, [x23]			/* get dg */
	enc_prepare	w21, x20, x0
	b		.Lmacloop4x
AES_FUNC_END(aes_mac_update)