162306a36Sopenharmony_ci#!/usr/bin/env perl
262306a36Sopenharmony_ci# SPDX-License-Identifier: GPL-2.0
362306a36Sopenharmony_ci
462306a36Sopenharmony_ci# This code is taken from the OpenSSL project but the author (Andy Polyakov)
562306a36Sopenharmony_ci# has relicensed it under the GPLv2. Therefore this program is free software;
662306a36Sopenharmony_ci# you can redistribute it and/or modify it under the terms of the GNU General
762306a36Sopenharmony_ci# Public License version 2 as published by the Free Software Foundation.
862306a36Sopenharmony_ci#
962306a36Sopenharmony_ci# The original headers, including the original license headers, are
1062306a36Sopenharmony_ci# included below for completeness.
1162306a36Sopenharmony_ci
1262306a36Sopenharmony_ci# ====================================================================
1362306a36Sopenharmony_ci# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
1462306a36Sopenharmony_ci# project. The module is, however, dual licensed under OpenSSL and
1562306a36Sopenharmony_ci# CRYPTOGAMS licenses depending on where you obtain it. For further
1662306a36Sopenharmony_ci# details see https://www.openssl.org/~appro/cryptogams/.
1762306a36Sopenharmony_ci# ====================================================================
1862306a36Sopenharmony_ci#
1962306a36Sopenharmony_ci# GHASH for PowerISA v2.07.
2062306a36Sopenharmony_ci#
2162306a36Sopenharmony_ci# July 2014
2262306a36Sopenharmony_ci#
2362306a36Sopenharmony_ci# Accurate performance measurements are problematic, because it's
2462306a36Sopenharmony_ci# always virtualized setup with possibly throttled processor.
2562306a36Sopenharmony_ci# Relative comparison is therefore more informative. This initial
2662306a36Sopenharmony_ci# version is ~2.1x slower than hardware-assisted AES-128-CTR, ~12x
2762306a36Sopenharmony_ci# faster than "4-bit" integer-only compiler-generated 64-bit code.
2862306a36Sopenharmony_ci# "Initial version" means that there is room for futher improvement.
2962306a36Sopenharmony_ci
3062306a36Sopenharmony_ci$flavour=shift;
3162306a36Sopenharmony_ci$output =shift;
3262306a36Sopenharmony_ci
3362306a36Sopenharmony_ciif ($flavour =~ /64/) {
3462306a36Sopenharmony_ci	$SIZE_T=8;
3562306a36Sopenharmony_ci	$LRSAVE=2*$SIZE_T;
3662306a36Sopenharmony_ci	$STU="stdu";
3762306a36Sopenharmony_ci	$POP="ld";
3862306a36Sopenharmony_ci	$PUSH="std";
3962306a36Sopenharmony_ci} elsif ($flavour =~ /32/) {
4062306a36Sopenharmony_ci	$SIZE_T=4;
4162306a36Sopenharmony_ci	$LRSAVE=$SIZE_T;
4262306a36Sopenharmony_ci	$STU="stwu";
4362306a36Sopenharmony_ci	$POP="lwz";
4462306a36Sopenharmony_ci	$PUSH="stw";
4562306a36Sopenharmony_ci} else { die "nonsense $flavour"; }
4662306a36Sopenharmony_ci
4762306a36Sopenharmony_ci$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
4862306a36Sopenharmony_ci( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
4962306a36Sopenharmony_ci( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
5062306a36Sopenharmony_cidie "can't locate ppc-xlate.pl";
5162306a36Sopenharmony_ci
5262306a36Sopenharmony_ciopen STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!";
5362306a36Sopenharmony_ci
5462306a36Sopenharmony_cimy ($Xip,$Htbl,$inp,$len)=map("r$_",(3..6));	# argument block
5562306a36Sopenharmony_ci
5662306a36Sopenharmony_cimy ($Xl,$Xm,$Xh,$IN)=map("v$_",(0..3));
5762306a36Sopenharmony_cimy ($zero,$t0,$t1,$t2,$xC2,$H,$Hh,$Hl,$lemask)=map("v$_",(4..12));
5862306a36Sopenharmony_cimy ($Xl1,$Xm1,$Xh1,$IN1,$H2,$H2h,$H2l)=map("v$_",(13..19));
5962306a36Sopenharmony_cimy $vrsave="r12";
6062306a36Sopenharmony_cimy ($t4,$t5,$t6) = ($Hl,$H,$Hh);
6162306a36Sopenharmony_ci
6262306a36Sopenharmony_ci$code=<<___;
6362306a36Sopenharmony_ci.machine	"any"
6462306a36Sopenharmony_ci
6562306a36Sopenharmony_ci.text
6662306a36Sopenharmony_ci
6762306a36Sopenharmony_ci.globl	.gcm_init_p10
6862306a36Sopenharmony_ci	lis		r0,0xfff0
6962306a36Sopenharmony_ci	li		r8,0x10
7062306a36Sopenharmony_ci	mfspr		$vrsave,256
7162306a36Sopenharmony_ci	li		r9,0x20
7262306a36Sopenharmony_ci	mtspr		256,r0
7362306a36Sopenharmony_ci	li		r10,0x30
7462306a36Sopenharmony_ci	lvx_u		$H,0,r4			# load H
7562306a36Sopenharmony_ci	le?xor		r7,r7,r7
7662306a36Sopenharmony_ci	le?addi		r7,r7,0x8		# need a vperm start with 08
7762306a36Sopenharmony_ci	le?lvsr		5,0,r7
7862306a36Sopenharmony_ci	le?vspltisb	6,0x0f
7962306a36Sopenharmony_ci	le?vxor		5,5,6			# set a b-endian mask
8062306a36Sopenharmony_ci	le?vperm	$H,$H,$H,5
8162306a36Sopenharmony_ci
8262306a36Sopenharmony_ci	vspltisb	$xC2,-16		# 0xf0
8362306a36Sopenharmony_ci	vspltisb	$t0,1			# one
8462306a36Sopenharmony_ci	vaddubm		$xC2,$xC2,$xC2		# 0xe0
8562306a36Sopenharmony_ci	vxor		$zero,$zero,$zero
8662306a36Sopenharmony_ci	vor		$xC2,$xC2,$t0		# 0xe1
8762306a36Sopenharmony_ci	vsldoi		$xC2,$xC2,$zero,15	# 0xe1...
8862306a36Sopenharmony_ci	vsldoi		$t1,$zero,$t0,1		# ...1
8962306a36Sopenharmony_ci	vaddubm		$xC2,$xC2,$xC2		# 0xc2...
9062306a36Sopenharmony_ci	vspltisb	$t2,7
9162306a36Sopenharmony_ci	vor		$xC2,$xC2,$t1		# 0xc2....01
9262306a36Sopenharmony_ci	vspltb		$t1,$H,0		# most significant byte
9362306a36Sopenharmony_ci	vsl		$H,$H,$t0		# H<<=1
9462306a36Sopenharmony_ci	vsrab		$t1,$t1,$t2		# broadcast carry bit
9562306a36Sopenharmony_ci	vand		$t1,$t1,$xC2
9662306a36Sopenharmony_ci	vxor		$H,$H,$t1		# twisted H
9762306a36Sopenharmony_ci
9862306a36Sopenharmony_ci	vsldoi		$H,$H,$H,8		# twist even more ...
9962306a36Sopenharmony_ci	vsldoi		$xC2,$zero,$xC2,8	# 0xc2.0
10062306a36Sopenharmony_ci	vsldoi		$Hl,$zero,$H,8		# ... and split
10162306a36Sopenharmony_ci	vsldoi		$Hh,$H,$zero,8
10262306a36Sopenharmony_ci
10362306a36Sopenharmony_ci	stvx_u		$xC2,0,r3		# save pre-computed table
10462306a36Sopenharmony_ci	stvx_u		$Hl,r8,r3
10562306a36Sopenharmony_ci	stvx_u		$H, r9,r3
10662306a36Sopenharmony_ci	stvx_u		$Hh,r10,r3
10762306a36Sopenharmony_ci
10862306a36Sopenharmony_ci	mtspr		256,$vrsave
10962306a36Sopenharmony_ci	blr
11062306a36Sopenharmony_ci	.long		0
11162306a36Sopenharmony_ci	.byte		0,12,0x14,0,0,0,2,0
11262306a36Sopenharmony_ci	.long		0
11362306a36Sopenharmony_ci.size	.gcm_init_p10,.-.gcm_init_p10
11462306a36Sopenharmony_ci
11562306a36Sopenharmony_ci.globl	.gcm_init_htable
11662306a36Sopenharmony_ci	lis		r0,0xfff0
11762306a36Sopenharmony_ci	li		r8,0x10
11862306a36Sopenharmony_ci	mfspr		$vrsave,256
11962306a36Sopenharmony_ci	li		r9,0x20
12062306a36Sopenharmony_ci	mtspr		256,r0
12162306a36Sopenharmony_ci	li		r10,0x30
12262306a36Sopenharmony_ci	lvx_u		$H,0,r4			# load H
12362306a36Sopenharmony_ci
12462306a36Sopenharmony_ci	vspltisb	$xC2,-16		# 0xf0
12562306a36Sopenharmony_ci	vspltisb	$t0,1			# one
12662306a36Sopenharmony_ci	vaddubm		$xC2,$xC2,$xC2		# 0xe0
12762306a36Sopenharmony_ci	vxor		$zero,$zero,$zero
12862306a36Sopenharmony_ci	vor		$xC2,$xC2,$t0		# 0xe1
12962306a36Sopenharmony_ci	vsldoi		$xC2,$xC2,$zero,15	# 0xe1...
13062306a36Sopenharmony_ci	vsldoi		$t1,$zero,$t0,1		# ...1
13162306a36Sopenharmony_ci	vaddubm		$xC2,$xC2,$xC2		# 0xc2...
13262306a36Sopenharmony_ci	vspltisb	$t2,7
13362306a36Sopenharmony_ci	vor		$xC2,$xC2,$t1		# 0xc2....01
13462306a36Sopenharmony_ci	vspltb		$t1,$H,0		# most significant byte
13562306a36Sopenharmony_ci	vsl		$H,$H,$t0		# H<<=1
13662306a36Sopenharmony_ci	vsrab		$t1,$t1,$t2		# broadcast carry bit
13762306a36Sopenharmony_ci	vand		$t1,$t1,$xC2
13862306a36Sopenharmony_ci	vxor		$IN,$H,$t1		# twisted H
13962306a36Sopenharmony_ci
14062306a36Sopenharmony_ci	vsldoi		$H,$IN,$IN,8		# twist even more ...
14162306a36Sopenharmony_ci	vsldoi		$xC2,$zero,$xC2,8	# 0xc2.0
14262306a36Sopenharmony_ci	vsldoi		$Hl,$zero,$H,8		# ... and split
14362306a36Sopenharmony_ci	vsldoi		$Hh,$H,$zero,8
14462306a36Sopenharmony_ci
14562306a36Sopenharmony_ci	stvx_u		$xC2,0,r3		# save pre-computed table
14662306a36Sopenharmony_ci	stvx_u		$Hl,r8,r3
14762306a36Sopenharmony_ci	li		r8,0x40
14862306a36Sopenharmony_ci	stvx_u		$H, r9,r3
14962306a36Sopenharmony_ci	li		r9,0x50
15062306a36Sopenharmony_ci	stvx_u		$Hh,r10,r3
15162306a36Sopenharmony_ci	li		r10,0x60
15262306a36Sopenharmony_ci
15362306a36Sopenharmony_ci	vpmsumd		$Xl,$IN,$Hl		# H.lo·H.lo
15462306a36Sopenharmony_ci	vpmsumd		$Xm,$IN,$H		# H.hi·H.lo+H.lo·H.hi
15562306a36Sopenharmony_ci	vpmsumd		$Xh,$IN,$Hh		# H.hi·H.hi
15662306a36Sopenharmony_ci
15762306a36Sopenharmony_ci	vpmsumd		$t2,$Xl,$xC2		# 1st reduction phase
15862306a36Sopenharmony_ci
15962306a36Sopenharmony_ci	vsldoi		$t0,$Xm,$zero,8
16062306a36Sopenharmony_ci	vsldoi		$t1,$zero,$Xm,8
16162306a36Sopenharmony_ci	vxor		$Xl,$Xl,$t0
16262306a36Sopenharmony_ci	vxor		$Xh,$Xh,$t1
16362306a36Sopenharmony_ci
16462306a36Sopenharmony_ci	vsldoi		$Xl,$Xl,$Xl,8
16562306a36Sopenharmony_ci	vxor		$Xl,$Xl,$t2
16662306a36Sopenharmony_ci
16762306a36Sopenharmony_ci	vsldoi		$t1,$Xl,$Xl,8		# 2nd reduction phase
16862306a36Sopenharmony_ci	vpmsumd		$Xl,$Xl,$xC2
16962306a36Sopenharmony_ci	vxor		$t1,$t1,$Xh
17062306a36Sopenharmony_ci	vxor		$IN1,$Xl,$t1
17162306a36Sopenharmony_ci
17262306a36Sopenharmony_ci	vsldoi		$H2,$IN1,$IN1,8
17362306a36Sopenharmony_ci	vsldoi		$H2l,$zero,$H2,8
17462306a36Sopenharmony_ci	vsldoi		$H2h,$H2,$zero,8
17562306a36Sopenharmony_ci
17662306a36Sopenharmony_ci	stvx_u		$H2l,r8,r3		# save H^2
17762306a36Sopenharmony_ci	li		r8,0x70
17862306a36Sopenharmony_ci	stvx_u		$H2,r9,r3
17962306a36Sopenharmony_ci	li		r9,0x80
18062306a36Sopenharmony_ci	stvx_u		$H2h,r10,r3
18162306a36Sopenharmony_ci	li		r10,0x90
18262306a36Sopenharmony_ci
18362306a36Sopenharmony_ci	vpmsumd		$Xl,$IN,$H2l		# H.lo·H^2.lo
18462306a36Sopenharmony_ci	 vpmsumd	$Xl1,$IN1,$H2l		# H^2.lo·H^2.lo
18562306a36Sopenharmony_ci	vpmsumd		$Xm,$IN,$H2		# H.hi·H^2.lo+H.lo·H^2.hi
18662306a36Sopenharmony_ci	 vpmsumd	$Xm1,$IN1,$H2		# H^2.hi·H^2.lo+H^2.lo·H^2.hi
18762306a36Sopenharmony_ci	vpmsumd		$Xh,$IN,$H2h		# H.hi·H^2.hi
18862306a36Sopenharmony_ci	 vpmsumd	$Xh1,$IN1,$H2h		# H^2.hi·H^2.hi
18962306a36Sopenharmony_ci
19062306a36Sopenharmony_ci	vpmsumd		$t2,$Xl,$xC2		# 1st reduction phase
19162306a36Sopenharmony_ci	 vpmsumd	$t6,$Xl1,$xC2		# 1st reduction phase
19262306a36Sopenharmony_ci
19362306a36Sopenharmony_ci	vsldoi		$t0,$Xm,$zero,8
19462306a36Sopenharmony_ci	vsldoi		$t1,$zero,$Xm,8
19562306a36Sopenharmony_ci	 vsldoi		$t4,$Xm1,$zero,8
19662306a36Sopenharmony_ci	 vsldoi		$t5,$zero,$Xm1,8
19762306a36Sopenharmony_ci	vxor		$Xl,$Xl,$t0
19862306a36Sopenharmony_ci	vxor		$Xh,$Xh,$t1
19962306a36Sopenharmony_ci	 vxor		$Xl1,$Xl1,$t4
20062306a36Sopenharmony_ci	 vxor		$Xh1,$Xh1,$t5
20162306a36Sopenharmony_ci
20262306a36Sopenharmony_ci	vsldoi		$Xl,$Xl,$Xl,8
20362306a36Sopenharmony_ci	 vsldoi		$Xl1,$Xl1,$Xl1,8
20462306a36Sopenharmony_ci	vxor		$Xl,$Xl,$t2
20562306a36Sopenharmony_ci	 vxor		$Xl1,$Xl1,$t6
20662306a36Sopenharmony_ci
20762306a36Sopenharmony_ci	vsldoi		$t1,$Xl,$Xl,8		# 2nd reduction phase
20862306a36Sopenharmony_ci	 vsldoi		$t5,$Xl1,$Xl1,8		# 2nd reduction phase
20962306a36Sopenharmony_ci	vpmsumd		$Xl,$Xl,$xC2
21062306a36Sopenharmony_ci	 vpmsumd	$Xl1,$Xl1,$xC2
21162306a36Sopenharmony_ci	vxor		$t1,$t1,$Xh
21262306a36Sopenharmony_ci	 vxor		$t5,$t5,$Xh1
21362306a36Sopenharmony_ci	vxor		$Xl,$Xl,$t1
21462306a36Sopenharmony_ci	 vxor		$Xl1,$Xl1,$t5
21562306a36Sopenharmony_ci
21662306a36Sopenharmony_ci	vsldoi		$H,$Xl,$Xl,8
21762306a36Sopenharmony_ci	 vsldoi		$H2,$Xl1,$Xl1,8
21862306a36Sopenharmony_ci	vsldoi		$Hl,$zero,$H,8
21962306a36Sopenharmony_ci	vsldoi		$Hh,$H,$zero,8
22062306a36Sopenharmony_ci	 vsldoi		$H2l,$zero,$H2,8
22162306a36Sopenharmony_ci	 vsldoi		$H2h,$H2,$zero,8
22262306a36Sopenharmony_ci
22362306a36Sopenharmony_ci	stvx_u		$Hl,r8,r3		# save H^3
22462306a36Sopenharmony_ci	li		r8,0xa0
22562306a36Sopenharmony_ci	stvx_u		$H,r9,r3
22662306a36Sopenharmony_ci	li		r9,0xb0
22762306a36Sopenharmony_ci	stvx_u		$Hh,r10,r3
22862306a36Sopenharmony_ci	li		r10,0xc0
22962306a36Sopenharmony_ci	 stvx_u		$H2l,r8,r3		# save H^4
23062306a36Sopenharmony_ci	 stvx_u		$H2,r9,r3
23162306a36Sopenharmony_ci	 stvx_u		$H2h,r10,r3
23262306a36Sopenharmony_ci
23362306a36Sopenharmony_ci	mtspr		256,$vrsave
23462306a36Sopenharmony_ci	blr
23562306a36Sopenharmony_ci	.long		0
23662306a36Sopenharmony_ci	.byte		0,12,0x14,0,0,0,2,0
23762306a36Sopenharmony_ci	.long		0
23862306a36Sopenharmony_ci.size	.gcm_init_htable,.-.gcm_init_htable
23962306a36Sopenharmony_ci
24062306a36Sopenharmony_ci.globl	.gcm_gmult_p10
24162306a36Sopenharmony_ci	lis		r0,0xfff8
24262306a36Sopenharmony_ci	li		r8,0x10
24362306a36Sopenharmony_ci	mfspr		$vrsave,256
24462306a36Sopenharmony_ci	li		r9,0x20
24562306a36Sopenharmony_ci	mtspr		256,r0
24662306a36Sopenharmony_ci	li		r10,0x30
24762306a36Sopenharmony_ci	lvx_u		$IN,0,$Xip		# load Xi
24862306a36Sopenharmony_ci
24962306a36Sopenharmony_ci	lvx_u		$Hl,r8,$Htbl		# load pre-computed table
25062306a36Sopenharmony_ci	 le?lvsl	$lemask,r0,r0
25162306a36Sopenharmony_ci	lvx_u		$H, r9,$Htbl
25262306a36Sopenharmony_ci	 le?vspltisb	$t0,0x07
25362306a36Sopenharmony_ci	lvx_u		$Hh,r10,$Htbl
25462306a36Sopenharmony_ci	 le?vxor	$lemask,$lemask,$t0
25562306a36Sopenharmony_ci	lvx_u		$xC2,0,$Htbl
25662306a36Sopenharmony_ci	 le?vperm	$IN,$IN,$IN,$lemask
25762306a36Sopenharmony_ci	vxor		$zero,$zero,$zero
25862306a36Sopenharmony_ci
25962306a36Sopenharmony_ci	vpmsumd		$Xl,$IN,$Hl		# H.lo·Xi.lo
26062306a36Sopenharmony_ci	vpmsumd		$Xm,$IN,$H		# H.hi·Xi.lo+H.lo·Xi.hi
26162306a36Sopenharmony_ci	vpmsumd		$Xh,$IN,$Hh		# H.hi·Xi.hi
26262306a36Sopenharmony_ci
26362306a36Sopenharmony_ci	vpmsumd		$t2,$Xl,$xC2		# 1st phase
26462306a36Sopenharmony_ci
26562306a36Sopenharmony_ci	vsldoi		$t0,$Xm,$zero,8
26662306a36Sopenharmony_ci	vsldoi		$t1,$zero,$Xm,8
26762306a36Sopenharmony_ci	vxor		$Xl,$Xl,$t0
26862306a36Sopenharmony_ci	vxor		$Xh,$Xh,$t1
26962306a36Sopenharmony_ci
27062306a36Sopenharmony_ci	vsldoi		$Xl,$Xl,$Xl,8
27162306a36Sopenharmony_ci	vxor		$Xl,$Xl,$t2
27262306a36Sopenharmony_ci
27362306a36Sopenharmony_ci	vsldoi		$t1,$Xl,$Xl,8		# 2nd phase
27462306a36Sopenharmony_ci	vpmsumd		$Xl,$Xl,$xC2
27562306a36Sopenharmony_ci	vxor		$t1,$t1,$Xh
27662306a36Sopenharmony_ci	vxor		$Xl,$Xl,$t1
27762306a36Sopenharmony_ci
27862306a36Sopenharmony_ci	le?vperm	$Xl,$Xl,$Xl,$lemask
27962306a36Sopenharmony_ci	stvx_u		$Xl,0,$Xip		# write out Xi
28062306a36Sopenharmony_ci
28162306a36Sopenharmony_ci	mtspr		256,$vrsave
28262306a36Sopenharmony_ci	blr
28362306a36Sopenharmony_ci	.long		0
28462306a36Sopenharmony_ci	.byte		0,12,0x14,0,0,0,2,0
28562306a36Sopenharmony_ci	.long		0
28662306a36Sopenharmony_ci.size	.gcm_gmult_p10,.-.gcm_gmult_p10
28762306a36Sopenharmony_ci
28862306a36Sopenharmony_ci.globl	.gcm_ghash_p10
28962306a36Sopenharmony_ci	lis		r0,0xfff8
29062306a36Sopenharmony_ci	li		r8,0x10
29162306a36Sopenharmony_ci	mfspr		$vrsave,256
29262306a36Sopenharmony_ci	li		r9,0x20
29362306a36Sopenharmony_ci	mtspr		256,r0
29462306a36Sopenharmony_ci	li		r10,0x30
29562306a36Sopenharmony_ci	lvx_u		$Xl,0,$Xip		# load Xi
29662306a36Sopenharmony_ci
29762306a36Sopenharmony_ci	lvx_u		$Hl,r8,$Htbl		# load pre-computed table
29862306a36Sopenharmony_ci	 le?lvsl	$lemask,r0,r0
29962306a36Sopenharmony_ci	lvx_u		$H, r9,$Htbl
30062306a36Sopenharmony_ci	 le?vspltisb	$t0,0x07
30162306a36Sopenharmony_ci	lvx_u		$Hh,r10,$Htbl
30262306a36Sopenharmony_ci	 le?vxor	$lemask,$lemask,$t0
30362306a36Sopenharmony_ci	lvx_u		$xC2,0,$Htbl
30462306a36Sopenharmony_ci	 le?vperm	$Xl,$Xl,$Xl,$lemask
30562306a36Sopenharmony_ci	vxor		$zero,$zero,$zero
30662306a36Sopenharmony_ci
30762306a36Sopenharmony_ci	lvx_u		$IN,0,$inp
30862306a36Sopenharmony_ci	addi		$inp,$inp,16
30962306a36Sopenharmony_ci	subi		$len,$len,16
31062306a36Sopenharmony_ci	 le?vperm	$IN,$IN,$IN,$lemask
31162306a36Sopenharmony_ci	vxor		$IN,$IN,$Xl
31262306a36Sopenharmony_ci	b		Loop
31362306a36Sopenharmony_ci
31462306a36Sopenharmony_ci.align	5
31562306a36Sopenharmony_ciLoop:
31662306a36Sopenharmony_ci	 subic		$len,$len,16
31762306a36Sopenharmony_ci	vpmsumd		$Xl,$IN,$Hl		# H.lo·Xi.lo
31862306a36Sopenharmony_ci	 subfe.		r0,r0,r0		# borrow?-1:0
31962306a36Sopenharmony_ci	vpmsumd		$Xm,$IN,$H		# H.hi·Xi.lo+H.lo·Xi.hi
32062306a36Sopenharmony_ci	 and		r0,r0,$len
32162306a36Sopenharmony_ci	vpmsumd		$Xh,$IN,$Hh		# H.hi·Xi.hi
32262306a36Sopenharmony_ci	 add		$inp,$inp,r0
32362306a36Sopenharmony_ci
32462306a36Sopenharmony_ci	vpmsumd		$t2,$Xl,$xC2		# 1st phase
32562306a36Sopenharmony_ci
32662306a36Sopenharmony_ci	vsldoi		$t0,$Xm,$zero,8
32762306a36Sopenharmony_ci	vsldoi		$t1,$zero,$Xm,8
32862306a36Sopenharmony_ci	vxor		$Xl,$Xl,$t0
32962306a36Sopenharmony_ci	vxor		$Xh,$Xh,$t1
33062306a36Sopenharmony_ci
33162306a36Sopenharmony_ci	vsldoi		$Xl,$Xl,$Xl,8
33262306a36Sopenharmony_ci	vxor		$Xl,$Xl,$t2
33362306a36Sopenharmony_ci	 lvx_u		$IN,0,$inp
33462306a36Sopenharmony_ci	 addi		$inp,$inp,16
33562306a36Sopenharmony_ci
33662306a36Sopenharmony_ci	vsldoi		$t1,$Xl,$Xl,8		# 2nd phase
33762306a36Sopenharmony_ci	vpmsumd		$Xl,$Xl,$xC2
33862306a36Sopenharmony_ci	 le?vperm	$IN,$IN,$IN,$lemask
33962306a36Sopenharmony_ci	vxor		$t1,$t1,$Xh
34062306a36Sopenharmony_ci	vxor		$IN,$IN,$t1
34162306a36Sopenharmony_ci	vxor		$IN,$IN,$Xl
34262306a36Sopenharmony_ci	beq		Loop			# did $len-=16 borrow?
34362306a36Sopenharmony_ci
34462306a36Sopenharmony_ci	vxor		$Xl,$Xl,$t1
34562306a36Sopenharmony_ci	le?vperm	$Xl,$Xl,$Xl,$lemask
34662306a36Sopenharmony_ci	stvx_u		$Xl,0,$Xip		# write out Xi
34762306a36Sopenharmony_ci
34862306a36Sopenharmony_ci	mtspr		256,$vrsave
34962306a36Sopenharmony_ci	blr
35062306a36Sopenharmony_ci	.long		0
35162306a36Sopenharmony_ci	.byte		0,12,0x14,0,0,0,4,0
35262306a36Sopenharmony_ci	.long		0
35362306a36Sopenharmony_ci.size	.gcm_ghash_p10,.-.gcm_ghash_p10
35462306a36Sopenharmony_ci
35562306a36Sopenharmony_ci.asciz  "GHASH for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
35662306a36Sopenharmony_ci.align  2
35762306a36Sopenharmony_ci___
35862306a36Sopenharmony_ci
35962306a36Sopenharmony_ciforeach (split("\n",$code)) {
36062306a36Sopenharmony_ci	if ($flavour =~ /le$/o) {	# little-endian
36162306a36Sopenharmony_ci	    s/le\?//o		or
36262306a36Sopenharmony_ci	    s/be\?/#be#/o;
36362306a36Sopenharmony_ci	} else {
36462306a36Sopenharmony_ci	    s/le\?/#le#/o	or
36562306a36Sopenharmony_ci	    s/be\?//o;
36662306a36Sopenharmony_ci	}
36762306a36Sopenharmony_ci	print $_,"\n";
36862306a36Sopenharmony_ci}
36962306a36Sopenharmony_ci
37062306a36Sopenharmony_ciclose STDOUT; # enforce flush
371