162306a36Sopenharmony_ci#!/usr/bin/env perl 262306a36Sopenharmony_ci# SPDX-License-Identifier: GPL-2.0 362306a36Sopenharmony_ci 462306a36Sopenharmony_ci# This code is taken from the OpenSSL project but the author (Andy Polyakov) 562306a36Sopenharmony_ci# has relicensed it under the GPLv2. Therefore this program is free software; 662306a36Sopenharmony_ci# you can redistribute it and/or modify it under the terms of the GNU General 762306a36Sopenharmony_ci# Public License version 2 as published by the Free Software Foundation. 862306a36Sopenharmony_ci# 962306a36Sopenharmony_ci# The original headers, including the original license headers, are 1062306a36Sopenharmony_ci# included below for completeness. 1162306a36Sopenharmony_ci 1262306a36Sopenharmony_ci# ==================================================================== 1362306a36Sopenharmony_ci# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 1462306a36Sopenharmony_ci# project. The module is, however, dual licensed under OpenSSL and 1562306a36Sopenharmony_ci# CRYPTOGAMS licenses depending on where you obtain it. For further 1662306a36Sopenharmony_ci# details see https://www.openssl.org/~appro/cryptogams/. 1762306a36Sopenharmony_ci# ==================================================================== 1862306a36Sopenharmony_ci# 1962306a36Sopenharmony_ci# GHASH for PowerISA v2.07. 2062306a36Sopenharmony_ci# 2162306a36Sopenharmony_ci# July 2014 2262306a36Sopenharmony_ci# 2362306a36Sopenharmony_ci# Accurate performance measurements are problematic, because it's 2462306a36Sopenharmony_ci# always virtualized setup with possibly throttled processor. 2562306a36Sopenharmony_ci# Relative comparison is therefore more informative. This initial 2662306a36Sopenharmony_ci# version is ~2.1x slower than hardware-assisted AES-128-CTR, ~12x 2762306a36Sopenharmony_ci# faster than "4-bit" integer-only compiler-generated 64-bit code. 2862306a36Sopenharmony_ci# "Initial version" means that there is room for futher improvement. 2962306a36Sopenharmony_ci 3062306a36Sopenharmony_ci$flavour=shift; 3162306a36Sopenharmony_ci$output =shift; 3262306a36Sopenharmony_ci 3362306a36Sopenharmony_ciif ($flavour =~ /64/) { 3462306a36Sopenharmony_ci $SIZE_T=8; 3562306a36Sopenharmony_ci $LRSAVE=2*$SIZE_T; 3662306a36Sopenharmony_ci $STU="stdu"; 3762306a36Sopenharmony_ci $POP="ld"; 3862306a36Sopenharmony_ci $PUSH="std"; 3962306a36Sopenharmony_ci} elsif ($flavour =~ /32/) { 4062306a36Sopenharmony_ci $SIZE_T=4; 4162306a36Sopenharmony_ci $LRSAVE=$SIZE_T; 4262306a36Sopenharmony_ci $STU="stwu"; 4362306a36Sopenharmony_ci $POP="lwz"; 4462306a36Sopenharmony_ci $PUSH="stw"; 4562306a36Sopenharmony_ci} else { die "nonsense $flavour"; } 4662306a36Sopenharmony_ci 4762306a36Sopenharmony_ci$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 4862306a36Sopenharmony_ci( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or 4962306a36Sopenharmony_ci( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or 5062306a36Sopenharmony_cidie "can't locate ppc-xlate.pl"; 5162306a36Sopenharmony_ci 5262306a36Sopenharmony_ciopen STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!"; 5362306a36Sopenharmony_ci 5462306a36Sopenharmony_cimy ($Xip,$Htbl,$inp,$len)=map("r$_",(3..6)); # argument block 5562306a36Sopenharmony_ci 5662306a36Sopenharmony_cimy ($Xl,$Xm,$Xh,$IN)=map("v$_",(0..3)); 5762306a36Sopenharmony_cimy ($zero,$t0,$t1,$t2,$xC2,$H,$Hh,$Hl,$lemask)=map("v$_",(4..12)); 5862306a36Sopenharmony_cimy ($Xl1,$Xm1,$Xh1,$IN1,$H2,$H2h,$H2l)=map("v$_",(13..19)); 5962306a36Sopenharmony_cimy $vrsave="r12"; 6062306a36Sopenharmony_cimy ($t4,$t5,$t6) = ($Hl,$H,$Hh); 6162306a36Sopenharmony_ci 6262306a36Sopenharmony_ci$code=<<___; 6362306a36Sopenharmony_ci.machine "any" 6462306a36Sopenharmony_ci 6562306a36Sopenharmony_ci.text 6662306a36Sopenharmony_ci 6762306a36Sopenharmony_ci.globl .gcm_init_p10 6862306a36Sopenharmony_ci lis r0,0xfff0 6962306a36Sopenharmony_ci li r8,0x10 7062306a36Sopenharmony_ci mfspr $vrsave,256 7162306a36Sopenharmony_ci li r9,0x20 7262306a36Sopenharmony_ci mtspr 256,r0 7362306a36Sopenharmony_ci li r10,0x30 7462306a36Sopenharmony_ci lvx_u $H,0,r4 # load H 7562306a36Sopenharmony_ci le?xor r7,r7,r7 7662306a36Sopenharmony_ci le?addi r7,r7,0x8 # need a vperm start with 08 7762306a36Sopenharmony_ci le?lvsr 5,0,r7 7862306a36Sopenharmony_ci le?vspltisb 6,0x0f 7962306a36Sopenharmony_ci le?vxor 5,5,6 # set a b-endian mask 8062306a36Sopenharmony_ci le?vperm $H,$H,$H,5 8162306a36Sopenharmony_ci 8262306a36Sopenharmony_ci vspltisb $xC2,-16 # 0xf0 8362306a36Sopenharmony_ci vspltisb $t0,1 # one 8462306a36Sopenharmony_ci vaddubm $xC2,$xC2,$xC2 # 0xe0 8562306a36Sopenharmony_ci vxor $zero,$zero,$zero 8662306a36Sopenharmony_ci vor $xC2,$xC2,$t0 # 0xe1 8762306a36Sopenharmony_ci vsldoi $xC2,$xC2,$zero,15 # 0xe1... 8862306a36Sopenharmony_ci vsldoi $t1,$zero,$t0,1 # ...1 8962306a36Sopenharmony_ci vaddubm $xC2,$xC2,$xC2 # 0xc2... 9062306a36Sopenharmony_ci vspltisb $t2,7 9162306a36Sopenharmony_ci vor $xC2,$xC2,$t1 # 0xc2....01 9262306a36Sopenharmony_ci vspltb $t1,$H,0 # most significant byte 9362306a36Sopenharmony_ci vsl $H,$H,$t0 # H<<=1 9462306a36Sopenharmony_ci vsrab $t1,$t1,$t2 # broadcast carry bit 9562306a36Sopenharmony_ci vand $t1,$t1,$xC2 9662306a36Sopenharmony_ci vxor $H,$H,$t1 # twisted H 9762306a36Sopenharmony_ci 9862306a36Sopenharmony_ci vsldoi $H,$H,$H,8 # twist even more ... 9962306a36Sopenharmony_ci vsldoi $xC2,$zero,$xC2,8 # 0xc2.0 10062306a36Sopenharmony_ci vsldoi $Hl,$zero,$H,8 # ... and split 10162306a36Sopenharmony_ci vsldoi $Hh,$H,$zero,8 10262306a36Sopenharmony_ci 10362306a36Sopenharmony_ci stvx_u $xC2,0,r3 # save pre-computed table 10462306a36Sopenharmony_ci stvx_u $Hl,r8,r3 10562306a36Sopenharmony_ci stvx_u $H, r9,r3 10662306a36Sopenharmony_ci stvx_u $Hh,r10,r3 10762306a36Sopenharmony_ci 10862306a36Sopenharmony_ci mtspr 256,$vrsave 10962306a36Sopenharmony_ci blr 11062306a36Sopenharmony_ci .long 0 11162306a36Sopenharmony_ci .byte 0,12,0x14,0,0,0,2,0 11262306a36Sopenharmony_ci .long 0 11362306a36Sopenharmony_ci.size .gcm_init_p10,.-.gcm_init_p10 11462306a36Sopenharmony_ci 11562306a36Sopenharmony_ci.globl .gcm_init_htable 11662306a36Sopenharmony_ci lis r0,0xfff0 11762306a36Sopenharmony_ci li r8,0x10 11862306a36Sopenharmony_ci mfspr $vrsave,256 11962306a36Sopenharmony_ci li r9,0x20 12062306a36Sopenharmony_ci mtspr 256,r0 12162306a36Sopenharmony_ci li r10,0x30 12262306a36Sopenharmony_ci lvx_u $H,0,r4 # load H 12362306a36Sopenharmony_ci 12462306a36Sopenharmony_ci vspltisb $xC2,-16 # 0xf0 12562306a36Sopenharmony_ci vspltisb $t0,1 # one 12662306a36Sopenharmony_ci vaddubm $xC2,$xC2,$xC2 # 0xe0 12762306a36Sopenharmony_ci vxor $zero,$zero,$zero 12862306a36Sopenharmony_ci vor $xC2,$xC2,$t0 # 0xe1 12962306a36Sopenharmony_ci vsldoi $xC2,$xC2,$zero,15 # 0xe1... 13062306a36Sopenharmony_ci vsldoi $t1,$zero,$t0,1 # ...1 13162306a36Sopenharmony_ci vaddubm $xC2,$xC2,$xC2 # 0xc2... 13262306a36Sopenharmony_ci vspltisb $t2,7 13362306a36Sopenharmony_ci vor $xC2,$xC2,$t1 # 0xc2....01 13462306a36Sopenharmony_ci vspltb $t1,$H,0 # most significant byte 13562306a36Sopenharmony_ci vsl $H,$H,$t0 # H<<=1 13662306a36Sopenharmony_ci vsrab $t1,$t1,$t2 # broadcast carry bit 13762306a36Sopenharmony_ci vand $t1,$t1,$xC2 13862306a36Sopenharmony_ci vxor $IN,$H,$t1 # twisted H 13962306a36Sopenharmony_ci 14062306a36Sopenharmony_ci vsldoi $H,$IN,$IN,8 # twist even more ... 14162306a36Sopenharmony_ci vsldoi $xC2,$zero,$xC2,8 # 0xc2.0 14262306a36Sopenharmony_ci vsldoi $Hl,$zero,$H,8 # ... and split 14362306a36Sopenharmony_ci vsldoi $Hh,$H,$zero,8 14462306a36Sopenharmony_ci 14562306a36Sopenharmony_ci stvx_u $xC2,0,r3 # save pre-computed table 14662306a36Sopenharmony_ci stvx_u $Hl,r8,r3 14762306a36Sopenharmony_ci li r8,0x40 14862306a36Sopenharmony_ci stvx_u $H, r9,r3 14962306a36Sopenharmony_ci li r9,0x50 15062306a36Sopenharmony_ci stvx_u $Hh,r10,r3 15162306a36Sopenharmony_ci li r10,0x60 15262306a36Sopenharmony_ci 15362306a36Sopenharmony_ci vpmsumd $Xl,$IN,$Hl # H.lo·H.lo 15462306a36Sopenharmony_ci vpmsumd $Xm,$IN,$H # H.hi·H.lo+H.lo·H.hi 15562306a36Sopenharmony_ci vpmsumd $Xh,$IN,$Hh # H.hi·H.hi 15662306a36Sopenharmony_ci 15762306a36Sopenharmony_ci vpmsumd $t2,$Xl,$xC2 # 1st reduction phase 15862306a36Sopenharmony_ci 15962306a36Sopenharmony_ci vsldoi $t0,$Xm,$zero,8 16062306a36Sopenharmony_ci vsldoi $t1,$zero,$Xm,8 16162306a36Sopenharmony_ci vxor $Xl,$Xl,$t0 16262306a36Sopenharmony_ci vxor $Xh,$Xh,$t1 16362306a36Sopenharmony_ci 16462306a36Sopenharmony_ci vsldoi $Xl,$Xl,$Xl,8 16562306a36Sopenharmony_ci vxor $Xl,$Xl,$t2 16662306a36Sopenharmony_ci 16762306a36Sopenharmony_ci vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase 16862306a36Sopenharmony_ci vpmsumd $Xl,$Xl,$xC2 16962306a36Sopenharmony_ci vxor $t1,$t1,$Xh 17062306a36Sopenharmony_ci vxor $IN1,$Xl,$t1 17162306a36Sopenharmony_ci 17262306a36Sopenharmony_ci vsldoi $H2,$IN1,$IN1,8 17362306a36Sopenharmony_ci vsldoi $H2l,$zero,$H2,8 17462306a36Sopenharmony_ci vsldoi $H2h,$H2,$zero,8 17562306a36Sopenharmony_ci 17662306a36Sopenharmony_ci stvx_u $H2l,r8,r3 # save H^2 17762306a36Sopenharmony_ci li r8,0x70 17862306a36Sopenharmony_ci stvx_u $H2,r9,r3 17962306a36Sopenharmony_ci li r9,0x80 18062306a36Sopenharmony_ci stvx_u $H2h,r10,r3 18162306a36Sopenharmony_ci li r10,0x90 18262306a36Sopenharmony_ci 18362306a36Sopenharmony_ci vpmsumd $Xl,$IN,$H2l # H.lo·H^2.lo 18462306a36Sopenharmony_ci vpmsumd $Xl1,$IN1,$H2l # H^2.lo·H^2.lo 18562306a36Sopenharmony_ci vpmsumd $Xm,$IN,$H2 # H.hi·H^2.lo+H.lo·H^2.hi 18662306a36Sopenharmony_ci vpmsumd $Xm1,$IN1,$H2 # H^2.hi·H^2.lo+H^2.lo·H^2.hi 18762306a36Sopenharmony_ci vpmsumd $Xh,$IN,$H2h # H.hi·H^2.hi 18862306a36Sopenharmony_ci vpmsumd $Xh1,$IN1,$H2h # H^2.hi·H^2.hi 18962306a36Sopenharmony_ci 19062306a36Sopenharmony_ci vpmsumd $t2,$Xl,$xC2 # 1st reduction phase 19162306a36Sopenharmony_ci vpmsumd $t6,$Xl1,$xC2 # 1st reduction phase 19262306a36Sopenharmony_ci 19362306a36Sopenharmony_ci vsldoi $t0,$Xm,$zero,8 19462306a36Sopenharmony_ci vsldoi $t1,$zero,$Xm,8 19562306a36Sopenharmony_ci vsldoi $t4,$Xm1,$zero,8 19662306a36Sopenharmony_ci vsldoi $t5,$zero,$Xm1,8 19762306a36Sopenharmony_ci vxor $Xl,$Xl,$t0 19862306a36Sopenharmony_ci vxor $Xh,$Xh,$t1 19962306a36Sopenharmony_ci vxor $Xl1,$Xl1,$t4 20062306a36Sopenharmony_ci vxor $Xh1,$Xh1,$t5 20162306a36Sopenharmony_ci 20262306a36Sopenharmony_ci vsldoi $Xl,$Xl,$Xl,8 20362306a36Sopenharmony_ci vsldoi $Xl1,$Xl1,$Xl1,8 20462306a36Sopenharmony_ci vxor $Xl,$Xl,$t2 20562306a36Sopenharmony_ci vxor $Xl1,$Xl1,$t6 20662306a36Sopenharmony_ci 20762306a36Sopenharmony_ci vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase 20862306a36Sopenharmony_ci vsldoi $t5,$Xl1,$Xl1,8 # 2nd reduction phase 20962306a36Sopenharmony_ci vpmsumd $Xl,$Xl,$xC2 21062306a36Sopenharmony_ci vpmsumd $Xl1,$Xl1,$xC2 21162306a36Sopenharmony_ci vxor $t1,$t1,$Xh 21262306a36Sopenharmony_ci vxor $t5,$t5,$Xh1 21362306a36Sopenharmony_ci vxor $Xl,$Xl,$t1 21462306a36Sopenharmony_ci vxor $Xl1,$Xl1,$t5 21562306a36Sopenharmony_ci 21662306a36Sopenharmony_ci vsldoi $H,$Xl,$Xl,8 21762306a36Sopenharmony_ci vsldoi $H2,$Xl1,$Xl1,8 21862306a36Sopenharmony_ci vsldoi $Hl,$zero,$H,8 21962306a36Sopenharmony_ci vsldoi $Hh,$H,$zero,8 22062306a36Sopenharmony_ci vsldoi $H2l,$zero,$H2,8 22162306a36Sopenharmony_ci vsldoi $H2h,$H2,$zero,8 22262306a36Sopenharmony_ci 22362306a36Sopenharmony_ci stvx_u $Hl,r8,r3 # save H^3 22462306a36Sopenharmony_ci li r8,0xa0 22562306a36Sopenharmony_ci stvx_u $H,r9,r3 22662306a36Sopenharmony_ci li r9,0xb0 22762306a36Sopenharmony_ci stvx_u $Hh,r10,r3 22862306a36Sopenharmony_ci li r10,0xc0 22962306a36Sopenharmony_ci stvx_u $H2l,r8,r3 # save H^4 23062306a36Sopenharmony_ci stvx_u $H2,r9,r3 23162306a36Sopenharmony_ci stvx_u $H2h,r10,r3 23262306a36Sopenharmony_ci 23362306a36Sopenharmony_ci mtspr 256,$vrsave 23462306a36Sopenharmony_ci blr 23562306a36Sopenharmony_ci .long 0 23662306a36Sopenharmony_ci .byte 0,12,0x14,0,0,0,2,0 23762306a36Sopenharmony_ci .long 0 23862306a36Sopenharmony_ci.size .gcm_init_htable,.-.gcm_init_htable 23962306a36Sopenharmony_ci 24062306a36Sopenharmony_ci.globl .gcm_gmult_p10 24162306a36Sopenharmony_ci lis r0,0xfff8 24262306a36Sopenharmony_ci li r8,0x10 24362306a36Sopenharmony_ci mfspr $vrsave,256 24462306a36Sopenharmony_ci li r9,0x20 24562306a36Sopenharmony_ci mtspr 256,r0 24662306a36Sopenharmony_ci li r10,0x30 24762306a36Sopenharmony_ci lvx_u $IN,0,$Xip # load Xi 24862306a36Sopenharmony_ci 24962306a36Sopenharmony_ci lvx_u $Hl,r8,$Htbl # load pre-computed table 25062306a36Sopenharmony_ci le?lvsl $lemask,r0,r0 25162306a36Sopenharmony_ci lvx_u $H, r9,$Htbl 25262306a36Sopenharmony_ci le?vspltisb $t0,0x07 25362306a36Sopenharmony_ci lvx_u $Hh,r10,$Htbl 25462306a36Sopenharmony_ci le?vxor $lemask,$lemask,$t0 25562306a36Sopenharmony_ci lvx_u $xC2,0,$Htbl 25662306a36Sopenharmony_ci le?vperm $IN,$IN,$IN,$lemask 25762306a36Sopenharmony_ci vxor $zero,$zero,$zero 25862306a36Sopenharmony_ci 25962306a36Sopenharmony_ci vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo 26062306a36Sopenharmony_ci vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi 26162306a36Sopenharmony_ci vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi 26262306a36Sopenharmony_ci 26362306a36Sopenharmony_ci vpmsumd $t2,$Xl,$xC2 # 1st phase 26462306a36Sopenharmony_ci 26562306a36Sopenharmony_ci vsldoi $t0,$Xm,$zero,8 26662306a36Sopenharmony_ci vsldoi $t1,$zero,$Xm,8 26762306a36Sopenharmony_ci vxor $Xl,$Xl,$t0 26862306a36Sopenharmony_ci vxor $Xh,$Xh,$t1 26962306a36Sopenharmony_ci 27062306a36Sopenharmony_ci vsldoi $Xl,$Xl,$Xl,8 27162306a36Sopenharmony_ci vxor $Xl,$Xl,$t2 27262306a36Sopenharmony_ci 27362306a36Sopenharmony_ci vsldoi $t1,$Xl,$Xl,8 # 2nd phase 27462306a36Sopenharmony_ci vpmsumd $Xl,$Xl,$xC2 27562306a36Sopenharmony_ci vxor $t1,$t1,$Xh 27662306a36Sopenharmony_ci vxor $Xl,$Xl,$t1 27762306a36Sopenharmony_ci 27862306a36Sopenharmony_ci le?vperm $Xl,$Xl,$Xl,$lemask 27962306a36Sopenharmony_ci stvx_u $Xl,0,$Xip # write out Xi 28062306a36Sopenharmony_ci 28162306a36Sopenharmony_ci mtspr 256,$vrsave 28262306a36Sopenharmony_ci blr 28362306a36Sopenharmony_ci .long 0 28462306a36Sopenharmony_ci .byte 0,12,0x14,0,0,0,2,0 28562306a36Sopenharmony_ci .long 0 28662306a36Sopenharmony_ci.size .gcm_gmult_p10,.-.gcm_gmult_p10 28762306a36Sopenharmony_ci 28862306a36Sopenharmony_ci.globl .gcm_ghash_p10 28962306a36Sopenharmony_ci lis r0,0xfff8 29062306a36Sopenharmony_ci li r8,0x10 29162306a36Sopenharmony_ci mfspr $vrsave,256 29262306a36Sopenharmony_ci li r9,0x20 29362306a36Sopenharmony_ci mtspr 256,r0 29462306a36Sopenharmony_ci li r10,0x30 29562306a36Sopenharmony_ci lvx_u $Xl,0,$Xip # load Xi 29662306a36Sopenharmony_ci 29762306a36Sopenharmony_ci lvx_u $Hl,r8,$Htbl # load pre-computed table 29862306a36Sopenharmony_ci le?lvsl $lemask,r0,r0 29962306a36Sopenharmony_ci lvx_u $H, r9,$Htbl 30062306a36Sopenharmony_ci le?vspltisb $t0,0x07 30162306a36Sopenharmony_ci lvx_u $Hh,r10,$Htbl 30262306a36Sopenharmony_ci le?vxor $lemask,$lemask,$t0 30362306a36Sopenharmony_ci lvx_u $xC2,0,$Htbl 30462306a36Sopenharmony_ci le?vperm $Xl,$Xl,$Xl,$lemask 30562306a36Sopenharmony_ci vxor $zero,$zero,$zero 30662306a36Sopenharmony_ci 30762306a36Sopenharmony_ci lvx_u $IN,0,$inp 30862306a36Sopenharmony_ci addi $inp,$inp,16 30962306a36Sopenharmony_ci subi $len,$len,16 31062306a36Sopenharmony_ci le?vperm $IN,$IN,$IN,$lemask 31162306a36Sopenharmony_ci vxor $IN,$IN,$Xl 31262306a36Sopenharmony_ci b Loop 31362306a36Sopenharmony_ci 31462306a36Sopenharmony_ci.align 5 31562306a36Sopenharmony_ciLoop: 31662306a36Sopenharmony_ci subic $len,$len,16 31762306a36Sopenharmony_ci vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo 31862306a36Sopenharmony_ci subfe. r0,r0,r0 # borrow?-1:0 31962306a36Sopenharmony_ci vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi 32062306a36Sopenharmony_ci and r0,r0,$len 32162306a36Sopenharmony_ci vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi 32262306a36Sopenharmony_ci add $inp,$inp,r0 32362306a36Sopenharmony_ci 32462306a36Sopenharmony_ci vpmsumd $t2,$Xl,$xC2 # 1st phase 32562306a36Sopenharmony_ci 32662306a36Sopenharmony_ci vsldoi $t0,$Xm,$zero,8 32762306a36Sopenharmony_ci vsldoi $t1,$zero,$Xm,8 32862306a36Sopenharmony_ci vxor $Xl,$Xl,$t0 32962306a36Sopenharmony_ci vxor $Xh,$Xh,$t1 33062306a36Sopenharmony_ci 33162306a36Sopenharmony_ci vsldoi $Xl,$Xl,$Xl,8 33262306a36Sopenharmony_ci vxor $Xl,$Xl,$t2 33362306a36Sopenharmony_ci lvx_u $IN,0,$inp 33462306a36Sopenharmony_ci addi $inp,$inp,16 33562306a36Sopenharmony_ci 33662306a36Sopenharmony_ci vsldoi $t1,$Xl,$Xl,8 # 2nd phase 33762306a36Sopenharmony_ci vpmsumd $Xl,$Xl,$xC2 33862306a36Sopenharmony_ci le?vperm $IN,$IN,$IN,$lemask 33962306a36Sopenharmony_ci vxor $t1,$t1,$Xh 34062306a36Sopenharmony_ci vxor $IN,$IN,$t1 34162306a36Sopenharmony_ci vxor $IN,$IN,$Xl 34262306a36Sopenharmony_ci beq Loop # did $len-=16 borrow? 34362306a36Sopenharmony_ci 34462306a36Sopenharmony_ci vxor $Xl,$Xl,$t1 34562306a36Sopenharmony_ci le?vperm $Xl,$Xl,$Xl,$lemask 34662306a36Sopenharmony_ci stvx_u $Xl,0,$Xip # write out Xi 34762306a36Sopenharmony_ci 34862306a36Sopenharmony_ci mtspr 256,$vrsave 34962306a36Sopenharmony_ci blr 35062306a36Sopenharmony_ci .long 0 35162306a36Sopenharmony_ci .byte 0,12,0x14,0,0,0,4,0 35262306a36Sopenharmony_ci .long 0 35362306a36Sopenharmony_ci.size .gcm_ghash_p10,.-.gcm_ghash_p10 35462306a36Sopenharmony_ci 35562306a36Sopenharmony_ci.asciz "GHASH for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>" 35662306a36Sopenharmony_ci.align 2 35762306a36Sopenharmony_ci___ 35862306a36Sopenharmony_ci 35962306a36Sopenharmony_ciforeach (split("\n",$code)) { 36062306a36Sopenharmony_ci if ($flavour =~ /le$/o) { # little-endian 36162306a36Sopenharmony_ci s/le\?//o or 36262306a36Sopenharmony_ci s/be\?/#be#/o; 36362306a36Sopenharmony_ci } else { 36462306a36Sopenharmony_ci s/le\?/#le#/o or 36562306a36Sopenharmony_ci s/be\?//o; 36662306a36Sopenharmony_ci } 36762306a36Sopenharmony_ci print $_,"\n"; 36862306a36Sopenharmony_ci} 36962306a36Sopenharmony_ci 37062306a36Sopenharmony_ciclose STDOUT; # enforce flush 371