crypto: p10-aes-gcm - Revert implementation (596f674d) · Commits · EulixOS / Software / Kernel

arch/powerpc/crypto/Kconfig

+0 −11

Original line number	Diff line number	Diff line
		@@ -94,15 +94,4 @@ config CRYPTO_AES_PPC_SPE
		architecture specific assembler implementations that work on 1KB
		tables or 256 bytes S-boxes.

		config CRYPTO_P10_AES_GCM
		tristate "Stitched AES/GCM acceleration support on P10+ CPU (PPC)"
		depends on PPC64
		select CRYPTO_LIB_AES
		select CRYPTO_ALGAPI
		select CRYPTO_AEAD
		default m
		help
		Support for cryptographic acceleration instructions on Power10+ CPU.
		This module supports stitched acceleration for AES/GCM in hardware.

		endmenu

arch/powerpc/crypto/Makefile

+0 −10

Original line number	Diff line number	Diff line
		@@ -13,7 +13,6 @@ obj-$(CONFIG_CRYPTO_SHA256_PPC_SPE) += sha256-ppc-spe.o
		obj-$(CONFIG_CRYPTO_CRC32C_VPMSUM) += crc32c-vpmsum.o
		obj-$(CONFIG_CRYPTO_CRCT10DIF_VPMSUM) += crct10dif-vpmsum.o
		obj-$(CONFIG_CRYPTO_VPMSUM_TESTER) += crc-vpmsum_test.o
		obj-$(CONFIG_CRYPTO_P10_AES_GCM) += p10-aes-gcm-crypto.o

		aes-ppc-spe-y := aes-spe-core.o aes-spe-keys.o aes-tab-4k.o aes-spe-modes.o aes-spe-glue.o
		md5-ppc-y := md5-asm.o md5-glue.o
		@@ -22,12 +21,3 @@ sha1-ppc-spe-y := sha1-spe-asm.o sha1-spe-glue.o
		sha256-ppc-spe-y := sha256-spe-asm.o sha256-spe-glue.o
		crc32c-vpmsum-y := crc32c-vpmsum_asm.o crc32c-vpmsum_glue.o
		crct10dif-vpmsum-y := crct10dif-vpmsum_asm.o crct10dif-vpmsum_glue.o
		p10-aes-gcm-crypto-y := p10-aes-gcm-glue.o p10_aes_gcm.o ghashp8-ppc.o aesp8-ppc.o

		quiet_cmd_perl = PERL $@
		cmd_perl = $(PERL) $< $(if $(CONFIG_CPU_LITTLE_ENDIAN), linux-ppc64le, linux-ppc64) > $@

		targets += aesp8-ppc.S ghashp8-ppc.S

		$(obj)/aesp8-ppc.S $(obj)/ghashp8-ppc.S: $(obj)/%.S: $(src)/%.pl FORCE
		$(call if_changed,perl)

arch/powerpc/crypto/aesp8-ppc.pl

deleted100644 → 0

+0 −3846

File deleted.

Preview size limit exceeded, changes collapsed.

arch/powerpc/crypto/ghashp8-ppc.pl

deleted100644 → 0

+0 −370

Original line number	Diff line number	Diff line
		#!/usr/bin/env perl
		# SPDX-License-Identifier: GPL-2.0

		# This code is taken from the OpenSSL project but the author (Andy Polyakov)
		# has relicensed it under the GPLv2. Therefore this program is free software;
		# you can redistribute it and/or modify it under the terms of the GNU General
		# Public License version 2 as published by the Free Software Foundation.
		#
		# The original headers, including the original license headers, are
		# included below for completeness.

		# ====================================================================
		# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
		# project. The module is, however, dual licensed under OpenSSL and
		# CRYPTOGAMS licenses depending on where you obtain it. For further
		# details see https://www.openssl.org/~appro/cryptogams/.
		# ====================================================================
		#
		# GHASH for PowerISA v2.07.
		#
		# July 2014
		#
		# Accurate performance measurements are problematic, because it's
		# always virtualized setup with possibly throttled processor.
		# Relative comparison is therefore more informative. This initial
		# version is ~2.1x slower than hardware-assisted AES-128-CTR, ~12x
		# faster than "4-bit" integer-only compiler-generated 64-bit code.
		# "Initial version" means that there is room for futher improvement.

		$flavour=shift;
		$output =shift;

		if ($flavour =~ /64/) {
		$SIZE_T=8;
		$LRSAVE=2*$SIZE_T;
		$STU="stdu";
		$POP="ld";
		$PUSH="std";
		} elsif ($flavour =~ /32/) {
		$SIZE_T=4;
		$LRSAVE=$SIZE_T;
		$STU="stwu";
		$POP="lwz";
		$PUSH="stw";
		} else { die "nonsense $flavour"; }

		$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
		( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
		( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
		die "can't locate ppc-xlate.pl";

		open STDOUT,"\| $^X $xlate $flavour $output" \|\| die "can't call $xlate: $!";

		my ($Xip,$Htbl,$inp,$len)=map("r$_",(3..6)); # argument block

		my ($Xl,$Xm,$Xh,$IN)=map("v$_",(0..3));
		my ($zero,$t0,$t1,$t2,$xC2,$H,$Hh,$Hl,$lemask)=map("v$_",(4..12));
		my ($Xl1,$Xm1,$Xh1,$IN1,$H2,$H2h,$H2l)=map("v$_",(13..19));
		my $vrsave="r12";
		my ($t4,$t5,$t6) = ($Hl,$H,$Hh);

		$code=<<___;
		.machine "any"

		.text

		.globl .gcm_init_p8
		lis r0,0xfff0
		li r8,0x10
		mfspr $vrsave,256
		li r9,0x20
		mtspr 256,r0
		li r10,0x30
		lvx_u $H,0,r4 # load H
		le?xor r7,r7,r7
		le?addi r7,r7,0x8 # need a vperm start with 08
		le?lvsr 5,0,r7
		le?vspltisb 6,0x0f
		le?vxor 5,5,6 # set a b-endian mask
		le?vperm $H,$H,$H,5

		vspltisb $xC2,-16 # 0xf0
		vspltisb $t0,1 # one
		vaddubm $xC2,$xC2,$xC2 # 0xe0
		vxor $zero,$zero,$zero
		vor $xC2,$xC2,$t0 # 0xe1
		vsldoi $xC2,$xC2,$zero,15 # 0xe1...
		vsldoi $t1,$zero,$t0,1 # ...1
		vaddubm $xC2,$xC2,$xC2 # 0xc2...
		vspltisb $t2,7
		vor $xC2,$xC2,$t1 # 0xc2....01
		vspltb $t1,$H,0 # most significant byte
		vsl $H,$H,$t0 # H<<=1
		vsrab $t1,$t1,$t2 # broadcast carry bit
		vand $t1,$t1,$xC2
		vxor $H,$H,$t1 # twisted H

		vsldoi $H,$H,$H,8 # twist even more ...
		vsldoi $xC2,$zero,$xC2,8 # 0xc2.0
		vsldoi $Hl,$zero,$H,8 # ... and split
		vsldoi $Hh,$H,$zero,8

		stvx_u $xC2,0,r3 # save pre-computed table
		stvx_u $Hl,r8,r3
		stvx_u $H, r9,r3
		stvx_u $Hh,r10,r3

		mtspr 256,$vrsave
		blr
		.long 0
		.byte 0,12,0x14,0,0,0,2,0
		.long 0
		.size .gcm_init_p8,.-.gcm_init_p8

		.globl .gcm_init_htable
		lis r0,0xfff0
		li r8,0x10
		mfspr $vrsave,256
		li r9,0x20
		mtspr 256,r0
		li r10,0x30
		lvx_u $H,0,r4 # load H

		vspltisb $xC2,-16 # 0xf0
		vspltisb $t0,1 # one
		vaddubm $xC2,$xC2,$xC2 # 0xe0
		vxor $zero,$zero,$zero
		vor $xC2,$xC2,$t0 # 0xe1
		vsldoi $xC2,$xC2,$zero,15 # 0xe1...
		vsldoi $t1,$zero,$t0,1 # ...1
		vaddubm $xC2,$xC2,$xC2 # 0xc2...
		vspltisb $t2,7
		vor $xC2,$xC2,$t1 # 0xc2....01
		vspltb $t1,$H,0 # most significant byte
		vsl $H,$H,$t0 # H<<=1
		vsrab $t1,$t1,$t2 # broadcast carry bit
		vand $t1,$t1,$xC2
		vxor $IN,$H,$t1 # twisted H

		vsldoi $H,$IN,$IN,8 # twist even more ...
		vsldoi $xC2,$zero,$xC2,8 # 0xc2.0
		vsldoi $Hl,$zero,$H,8 # ... and split
		vsldoi $Hh,$H,$zero,8

		stvx_u $xC2,0,r3 # save pre-computed table
		stvx_u $Hl,r8,r3
		li r8,0x40
		stvx_u $H, r9,r3
		li r9,0x50
		stvx_u $Hh,r10,r3
		li r10,0x60

		vpmsumd $Xl,$IN,$Hl # H.lo·H.lo
		vpmsumd $Xm,$IN,$H # H.hi·H.lo+H.lo·H.hi
		vpmsumd $Xh,$IN,$Hh # H.hi·H.hi

		vpmsumd $t2,$Xl,$xC2 # 1st reduction phase

		vsldoi $t0,$Xm,$zero,8
		vsldoi $t1,$zero,$Xm,8
		vxor $Xl,$Xl,$t0
		vxor $Xh,$Xh,$t1

		vsldoi $Xl,$Xl,$Xl,8
		vxor $Xl,$Xl,$t2

		vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase
		vpmsumd $Xl,$Xl,$xC2
		vxor $t1,$t1,$Xh
		vxor $IN1,$Xl,$t1

		vsldoi $H2,$IN1,$IN1,8
		vsldoi $H2l,$zero,$H2,8
		vsldoi $H2h,$H2,$zero,8

		stvx_u $H2l,r8,r3 # save H^2
		li r8,0x70
		stvx_u $H2,r9,r3
		li r9,0x80
		stvx_u $H2h,r10,r3
		li r10,0x90

		vpmsumd $Xl,$IN,$H2l # H.lo·H^2.lo
		vpmsumd $Xl1,$IN1,$H2l # H^2.lo·H^2.lo
		vpmsumd $Xm,$IN,$H2 # H.hi·H^2.lo+H.lo·H^2.hi
		vpmsumd $Xm1,$IN1,$H2 # H^2.hi·H^2.lo+H^2.lo·H^2.hi
		vpmsumd $Xh,$IN,$H2h # H.hi·H^2.hi
		vpmsumd $Xh1,$IN1,$H2h # H^2.hi·H^2.hi

		vpmsumd $t2,$Xl,$xC2 # 1st reduction phase
		vpmsumd $t6,$Xl1,$xC2 # 1st reduction phase

		vsldoi $t0,$Xm,$zero,8
		vsldoi $t1,$zero,$Xm,8
		vsldoi $t4,$Xm1,$zero,8
		vsldoi $t5,$zero,$Xm1,8
		vxor $Xl,$Xl,$t0
		vxor $Xh,$Xh,$t1
		vxor $Xl1,$Xl1,$t4
		vxor $Xh1,$Xh1,$t5

		vsldoi $Xl,$Xl,$Xl,8
		vsldoi $Xl1,$Xl1,$Xl1,8
		vxor $Xl,$Xl,$t2
		vxor $Xl1,$Xl1,$t6

		vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase
		vsldoi $t5,$Xl1,$Xl1,8 # 2nd reduction phase
		vpmsumd $Xl,$Xl,$xC2
		vpmsumd $Xl1,$Xl1,$xC2
		vxor $t1,$t1,$Xh
		vxor $t5,$t5,$Xh1
		vxor $Xl,$Xl,$t1
		vxor $Xl1,$Xl1,$t5

		vsldoi $H,$Xl,$Xl,8
		vsldoi $H2,$Xl1,$Xl1,8
		vsldoi $Hl,$zero,$H,8
		vsldoi $Hh,$H,$zero,8
		vsldoi $H2l,$zero,$H2,8
		vsldoi $H2h,$H2,$zero,8

		stvx_u $Hl,r8,r3 # save H^3
		li r8,0xa0
		stvx_u $H,r9,r3
		li r9,0xb0
		stvx_u $Hh,r10,r3
		li r10,0xc0
		stvx_u $H2l,r8,r3 # save H^4
		stvx_u $H2,r9,r3
		stvx_u $H2h,r10,r3

		mtspr 256,$vrsave
		blr
		.long 0
		.byte 0,12,0x14,0,0,0,2,0
		.long 0
		.size .gcm_init_htable,.-.gcm_init_htable

		.globl .gcm_gmult_p8
		lis r0,0xfff8
		li r8,0x10
		mfspr $vrsave,256
		li r9,0x20
		mtspr 256,r0
		li r10,0x30
		lvx_u $IN,0,$Xip # load Xi

		lvx_u $Hl,r8,$Htbl # load pre-computed table
		le?lvsl $lemask,r0,r0
		lvx_u $H, r9,$Htbl
		le?vspltisb $t0,0x07
		lvx_u $Hh,r10,$Htbl
		le?vxor $lemask,$lemask,$t0
		lvx_u $xC2,0,$Htbl
		le?vperm $IN,$IN,$IN,$lemask
		vxor $zero,$zero,$zero

		vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo
		vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi
		vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi

		vpmsumd $t2,$Xl,$xC2 # 1st phase

		vsldoi $t0,$Xm,$zero,8
		vsldoi $t1,$zero,$Xm,8
		vxor $Xl,$Xl,$t0
		vxor $Xh,$Xh,$t1

		vsldoi $Xl,$Xl,$Xl,8
		vxor $Xl,$Xl,$t2

		vsldoi $t1,$Xl,$Xl,8 # 2nd phase
		vpmsumd $Xl,$Xl,$xC2
		vxor $t1,$t1,$Xh
		vxor $Xl,$Xl,$t1

		le?vperm $Xl,$Xl,$Xl,$lemask
		stvx_u $Xl,0,$Xip # write out Xi

		mtspr 256,$vrsave
		blr
		.long 0
		.byte 0,12,0x14,0,0,0,2,0
		.long 0
		.size .gcm_gmult_p8,.-.gcm_gmult_p8

		.globl .gcm_ghash_p8
		lis r0,0xfff8
		li r8,0x10
		mfspr $vrsave,256
		li r9,0x20
		mtspr 256,r0
		li r10,0x30
		lvx_u $Xl,0,$Xip # load Xi

		lvx_u $Hl,r8,$Htbl # load pre-computed table
		le?lvsl $lemask,r0,r0
		lvx_u $H, r9,$Htbl
		le?vspltisb $t0,0x07
		lvx_u $Hh,r10,$Htbl
		le?vxor $lemask,$lemask,$t0
		lvx_u $xC2,0,$Htbl
		le?vperm $Xl,$Xl,$Xl,$lemask
		vxor $zero,$zero,$zero

		lvx_u $IN,0,$inp
		addi $inp,$inp,16
		subi $len,$len,16
		le?vperm $IN,$IN,$IN,$lemask
		vxor $IN,$IN,$Xl
		b Loop

		.align 5
		Loop:
		subic $len,$len,16
		vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo
		subfe. r0,r0,r0 # borrow?-1:0
		vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi
		and r0,r0,$len
		vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi
		add $inp,$inp,r0

		vpmsumd $t2,$Xl,$xC2 # 1st phase

		vsldoi $t0,$Xm,$zero,8
		vsldoi $t1,$zero,$Xm,8
		vxor $Xl,$Xl,$t0
		vxor $Xh,$Xh,$t1

		vsldoi $Xl,$Xl,$Xl,8
		vxor $Xl,$Xl,$t2
		lvx_u $IN,0,$inp
		addi $inp,$inp,16

		vsldoi $t1,$Xl,$Xl,8 # 2nd phase
		vpmsumd $Xl,$Xl,$xC2
		le?vperm $IN,$IN,$IN,$lemask
		vxor $t1,$t1,$Xh
		vxor $IN,$IN,$t1
		vxor $IN,$IN,$Xl
		beq Loop # did $len-=16 borrow?

		vxor $Xl,$Xl,$t1
		le?vperm $Xl,$Xl,$Xl,$lemask
		stvx_u $Xl,0,$Xip # write out Xi

		mtspr 256,$vrsave
		blr
		.long 0
		.byte 0,12,0x14,0,0,0,4,0
		.long 0
		.size .gcm_ghash_p8,.-.gcm_ghash_p8

		.asciz "GHASH for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
		.align 2
		___

		foreach (split("\n",$code)) {
		if ($flavour =~ /le$/o) { # little-endian
		s/le\?//o or
		s/be\?/#be#/o;
		} else {
		s/le\?/#le#/o or
		s/be\?//o;
		}
		print $_,"\n";
		}

		close STDOUT; # enforce flush

arch/powerpc/crypto/p10-aes-gcm-glue.c

deleted100644 → 0

+0 −345

Original line number	Diff line number	Diff line
		// SPDX-License-Identifier: GPL-2.0-or-later
		/*
		* Glue code for accelerated AES-GCM stitched implementation for ppc64le.
		*
		* Copyright 2022- IBM Inc. All rights reserved
		*/

		#include <asm/unaligned.h>
		#include <asm/simd.h>
		#include <asm/switch_to.h>
		#include <crypto/algapi.h>
		#include <crypto/aes.h>
		#include <crypto/algapi.h>
		#include <crypto/b128ops.h>
		#include <crypto/gf128mul.h>
		#include <crypto/internal/simd.h>
		#include <crypto/internal/aead.h>
		#include <crypto/internal/hash.h>
		#include <crypto/internal/skcipher.h>
		#include <crypto/scatterwalk.h>
		#include <linux/cpufeature.h>
		#include <linux/crypto.h>
		#include <linux/module.h>
		#include <linux/types.h>

		#define PPC_MODULE_FEATURE_P10 (32 + ilog2(PPC_FEATURE2_ARCH_3_1))
		#define PPC_ALIGN 16
		#define GCM_IV_SIZE 12

		MODULE_DESCRIPTION("PPC64le AES-GCM with Stitched implementation");
		MODULE_AUTHOR("Danny Tsen <dtsen@linux.ibm.com");
		MODULE_LICENSE("GPL v2");
		MODULE_ALIAS_CRYPTO("aes");

		asmlinkage int aes_p8_set_encrypt_key(const u8 *userKey, const int bits,
		void *key);
		asmlinkage void aes_p8_encrypt(const u8 in, u8 out, const void *key);
		asmlinkage void aes_p10_gcm_encrypt(u8 in, u8 out, size_t len,
		void rkey, u8 iv, void *Xi);
		asmlinkage void aes_p10_gcm_decrypt(u8 in, u8 out, size_t len,
		void rkey, u8 iv, void *Xi);
		asmlinkage void gcm_init_htable(unsigned char htable[256], unsigned char Xi[16]);
		asmlinkage void gcm_ghash_p8(unsigned char Xi, unsigned char Htable,
		unsigned char *aad, unsigned int alen);

		struct aes_key {
		u8 key[AES_MAX_KEYLENGTH];
		u64 rounds;
		};

		struct gcm_ctx {
		u8 iv[16];
		u8 ivtag[16];
		u8 aad_hash[16];
		u64 aadLen;
		u64 Plen; /* offset 56 - used in aes_p10_gcm_{en/de}crypt */
		};
		struct Hash_ctx {
		u8 H[16]; /* subkey */
		u8 Htable[256]; /* Xi, Hash table(offset 32) */
		};

		struct p10_aes_gcm_ctx {
		struct aes_key enc_key;
		};

		static void vsx_begin(void)
		{
		preempt_disable();
		enable_kernel_vsx();
		}

		static void vsx_end(void)
		{
		disable_kernel_vsx();
		preempt_enable();
		}

		static void set_subkey(unsigned char *hash)
		{
		(u64 )&hash[0] = be64_to_cpup((__be64 *)&hash[0]);
		(u64 )&hash[8] = be64_to_cpup((__be64 *)&hash[8]);
		}

		/*
		* Compute aad if any.
		* - Hash aad and copy to Xi.
		*/
		static void set_aad(struct gcm_ctx gctx, struct Hash_ctx hash,
		unsigned char *aad, int alen)
		{
		int i;
		u8 nXi[16] = {0, };

		gctx->aadLen = alen;
		i = alen & ~0xf;
		if (i) {
		gcm_ghash_p8(nXi, hash->Htable+32, aad, i);
		aad += i;
		alen -= i;
		}
		if (alen) {
		for (i = 0; i < alen; i++)
		nXi[i] ^= aad[i];

		memset(gctx->aad_hash, 0, 16);
		gcm_ghash_p8(gctx->aad_hash, hash->Htable+32, nXi, 16);
		} else {
		memcpy(gctx->aad_hash, nXi, 16);
		}

		memcpy(hash->Htable, gctx->aad_hash, 16);
		}

		static void gcmp10_init(struct gcm_ctx gctx, u8 iv, unsigned char *rdkey,
		struct Hash_ctx hash, u8 assoc, unsigned int assoclen)
		{
		__be32 counter = cpu_to_be32(1);

		aes_p8_encrypt(hash->H, hash->H, rdkey);
		set_subkey(hash->H);
		gcm_init_htable(hash->Htable+32, hash->H);

		((__be32 )(iv+12)) = counter;

		gctx->Plen = 0;

		/*
		* Encrypt counter vector as iv tag and increment counter.
		*/
		aes_p8_encrypt(iv, gctx->ivtag, rdkey);

		counter = cpu_to_be32(2);
		((__be32 )(iv+12)) = counter;
		memcpy(gctx->iv, iv, 16);

		gctx->aadLen = assoclen;
		memset(gctx->aad_hash, 0, 16);
		if (assoclen)
		set_aad(gctx, hash, assoc, assoclen);
		}

		static void finish_tag(struct gcm_ctx gctx, struct Hash_ctx hash, int len)
		{
		int i;
		unsigned char len_ac[16 + PPC_ALIGN];
		unsigned char aclen = PTR_ALIGN((void )len_ac, PPC_ALIGN);
		__be64 clen = cpu_to_be64(len << 3);
		__be64 alen = cpu_to_be64(gctx->aadLen << 3);

		if (len == 0 && gctx->aadLen == 0) {
		memcpy(hash->Htable, gctx->ivtag, 16);
		return;
		}

		/*
		* Len is in bits.
		*/
		((__be64 )(aclen)) = alen;
		((__be64 )(aclen+8)) = clen;

		/*
		* hash (AAD len and len)
		*/
		gcm_ghash_p8(hash->Htable, hash->Htable+32, aclen, 16);

		for (i = 0; i < 16; i++)
		hash->Htable[i] ^= gctx->ivtag[i];
		}

		static int set_authsize(struct crypto_aead *tfm, unsigned int authsize)
		{
		switch (authsize) {
		case 4:
		case 8:
		case 12:
		case 13:
		case 14:
		case 15:
		case 16:
		break;
		default:
		return -EINVAL;
		}

		return 0;
		}

		static int p10_aes_gcm_setkey(struct crypto_aead aead, const u8 key,
		unsigned int keylen)
		{
		struct crypto_tfm *tfm = crypto_aead_tfm(aead);
		struct p10_aes_gcm_ctx *ctx = crypto_tfm_ctx(tfm);
		int ret;

		vsx_begin();
		ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key);
		vsx_end();

		return ret ? -EINVAL : 0;
		}

		static int p10_aes_gcm_crypt(struct aead_request *req, int enc)
		{
		struct crypto_tfm *tfm = req->base.tfm;
		struct p10_aes_gcm_ctx *ctx = crypto_tfm_ctx(tfm);
		u8 databuf[sizeof(struct gcm_ctx) + PPC_ALIGN];
		struct gcm_ctx gctx = PTR_ALIGN((void )databuf, PPC_ALIGN);
		u8 hashbuf[sizeof(struct Hash_ctx) + PPC_ALIGN];
		struct Hash_ctx hash = PTR_ALIGN((void )hashbuf, PPC_ALIGN);
		struct scatter_walk assoc_sg_walk;
		struct skcipher_walk walk;
		u8 *assocmem = NULL;
		u8 *assoc;
		unsigned int assoclen = req->assoclen;
		unsigned int cryptlen = req->cryptlen;
		unsigned char ivbuf[AES_BLOCK_SIZE+PPC_ALIGN];
		unsigned char iv = PTR_ALIGN((void )ivbuf, PPC_ALIGN);
		int ret;
		unsigned long auth_tag_len = crypto_aead_authsize(__crypto_aead_cast(tfm));
		u8 otag[16];
		int total_processed = 0;

		memset(databuf, 0, sizeof(databuf));
		memset(hashbuf, 0, sizeof(hashbuf));
		memset(ivbuf, 0, sizeof(ivbuf));
		memcpy(iv, req->iv, GCM_IV_SIZE);

		/* Linearize assoc, if not already linear */
		if (req->src->length >= assoclen && req->src->length) {
		scatterwalk_start(&assoc_sg_walk, req->src);
		assoc = scatterwalk_map(&assoc_sg_walk);
		} else {
		gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
		GFP_KERNEL : GFP_ATOMIC;

		/* assoc can be any length, so must be on heap */
		assocmem = kmalloc(assoclen, flags);
		if (unlikely(!assocmem))
		return -ENOMEM;
		assoc = assocmem;

		scatterwalk_map_and_copy(assoc, req->src, 0, assoclen, 0);
		}

		vsx_begin();
		gcmp10_init(gctx, iv, (unsigned char *) &ctx->enc_key, hash, assoc, assoclen);
		vsx_end();

		if (!assocmem)
		scatterwalk_unmap(assoc);
		else
		kfree(assocmem);

		if (enc)
		ret = skcipher_walk_aead_encrypt(&walk, req, false);
		else
		ret = skcipher_walk_aead_decrypt(&walk, req, false);
		if (ret)
		return ret;

		while (walk.nbytes > 0 && ret == 0) {

		vsx_begin();
		if (enc)
		aes_p10_gcm_encrypt(walk.src.virt.addr,
		walk.dst.virt.addr,
		walk.nbytes,
		&ctx->enc_key, gctx->iv, hash->Htable);
		else
		aes_p10_gcm_decrypt(walk.src.virt.addr,
		walk.dst.virt.addr,
		walk.nbytes,
		&ctx->enc_key, gctx->iv, hash->Htable);
		vsx_end();

		total_processed += walk.nbytes;
		ret = skcipher_walk_done(&walk, 0);
		}

		if (ret)
		return ret;

		/* Finalize hash */
		vsx_begin();
		finish_tag(gctx, hash, total_processed);
		vsx_end();

		/* copy Xi to end of dst */
		if (enc)
		scatterwalk_map_and_copy(hash->Htable, req->dst, req->assoclen + cryptlen,
		auth_tag_len, 1);
		else {
		scatterwalk_map_and_copy(otag, req->src,
		req->assoclen + cryptlen - auth_tag_len,
		auth_tag_len, 0);

		if (crypto_memneq(otag, hash->Htable, auth_tag_len)) {
		memzero_explicit(hash->Htable, 16);
		return -EBADMSG;
		}
		}

		return 0;
		}

		static int p10_aes_gcm_encrypt(struct aead_request *req)
		{
		return p10_aes_gcm_crypt(req, 1);
		}

		static int p10_aes_gcm_decrypt(struct aead_request *req)
		{
		return p10_aes_gcm_crypt(req, 0);
		}

		static struct aead_alg gcm_aes_alg = {
		.ivsize = GCM_IV_SIZE,
		.maxauthsize = 16,

		.setauthsize = set_authsize,
		.setkey = p10_aes_gcm_setkey,
		.encrypt = p10_aes_gcm_encrypt,
		.decrypt = p10_aes_gcm_decrypt,

		.base.cra_name = "gcm(aes)",
		.base.cra_driver_name = "p10_aes_gcm",
		.base.cra_priority = 2100,
		.base.cra_blocksize = 1,
		.base.cra_ctxsize = sizeof(struct p10_aes_gcm_ctx),
		.base.cra_module = THIS_MODULE,
		};

		static int __init p10_init(void)
		{
		return crypto_register_aead(&gcm_aes_alg);
		}

		static void __exit p10_exit(void)
		{
		crypto_unregister_aead(&gcm_aes_alg);
		}

		module_cpu_feature_match(PPC_MODULE_FEATURE_P10, p10_init);
		module_exit(p10_exit);