162306a36Sopenharmony_ci#! /usr/bin/env perl
262306a36Sopenharmony_ci# SPDX-License-Identifier: GPL-2.0
362306a36Sopenharmony_ci
462306a36Sopenharmony_ci# This code is taken from CRYPTOGAMs[1] and is included here using the option
562306a36Sopenharmony_ci# in the license to distribute the code under the GPL. Therefore this program
662306a36Sopenharmony_ci# is free software; you can redistribute it and/or modify it under the terms of
762306a36Sopenharmony_ci# the GNU General Public License version 2 as published by the Free Software
862306a36Sopenharmony_ci# Foundation.
962306a36Sopenharmony_ci#
1062306a36Sopenharmony_ci# [1] https://www.openssl.org/~appro/cryptogams/
1162306a36Sopenharmony_ci
1262306a36Sopenharmony_ci# Copyright (c) 2006-2017, CRYPTOGAMS by <appro@openssl.org>
1362306a36Sopenharmony_ci# All rights reserved.
1462306a36Sopenharmony_ci#
1562306a36Sopenharmony_ci# Redistribution and use in source and binary forms, with or without
1662306a36Sopenharmony_ci# modification, are permitted provided that the following conditions
1762306a36Sopenharmony_ci# are met:
1862306a36Sopenharmony_ci#
1962306a36Sopenharmony_ci#       * Redistributions of source code must retain copyright notices,
2062306a36Sopenharmony_ci#         this list of conditions and the following disclaimer.
2162306a36Sopenharmony_ci#
2262306a36Sopenharmony_ci#       * Redistributions in binary form must reproduce the above
2362306a36Sopenharmony_ci#         copyright notice, this list of conditions and the following
2462306a36Sopenharmony_ci#         disclaimer in the documentation and/or other materials
2562306a36Sopenharmony_ci#         provided with the distribution.
2662306a36Sopenharmony_ci#
2762306a36Sopenharmony_ci#       * Neither the name of the CRYPTOGAMS nor the names of its
2862306a36Sopenharmony_ci#         copyright holder and contributors may be used to endorse or
2962306a36Sopenharmony_ci#         promote products derived from this software without specific
3062306a36Sopenharmony_ci#         prior written permission.
3162306a36Sopenharmony_ci#
3262306a36Sopenharmony_ci# ALTERNATIVELY, provided that this notice is retained in full, this
3362306a36Sopenharmony_ci# product may be distributed under the terms of the GNU General Public
3462306a36Sopenharmony_ci# License (GPL), in which case the provisions of the GPL apply INSTEAD OF
3562306a36Sopenharmony_ci# those given above.
3662306a36Sopenharmony_ci#
3762306a36Sopenharmony_ci# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
3862306a36Sopenharmony_ci# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
3962306a36Sopenharmony_ci# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
4062306a36Sopenharmony_ci# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
4162306a36Sopenharmony_ci# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
4262306a36Sopenharmony_ci# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
4362306a36Sopenharmony_ci# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
4462306a36Sopenharmony_ci# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
4562306a36Sopenharmony_ci# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
4662306a36Sopenharmony_ci# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
4762306a36Sopenharmony_ci# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
4862306a36Sopenharmony_ci
4962306a36Sopenharmony_ci# ====================================================================
5062306a36Sopenharmony_ci# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5162306a36Sopenharmony_ci# project. The module is, however, dual licensed under OpenSSL and
5262306a36Sopenharmony_ci# CRYPTOGAMS licenses depending on where you obtain it. For further
5362306a36Sopenharmony_ci# details see https://www.openssl.org/~appro/cryptogams/.
5462306a36Sopenharmony_ci# ====================================================================
5562306a36Sopenharmony_ci#
5662306a36Sopenharmony_ci# This module implements support for AES instructions as per PowerISA
5762306a36Sopenharmony_ci# specification version 2.07, first implemented by POWER8 processor.
5862306a36Sopenharmony_ci# The module is endian-agnostic in sense that it supports both big-
5962306a36Sopenharmony_ci# and little-endian cases. Data alignment in parallelizable modes is
6062306a36Sopenharmony_ci# handled with VSX loads and stores, which implies MSR.VSX flag being
6162306a36Sopenharmony_ci# set. It should also be noted that ISA specification doesn't prohibit
6262306a36Sopenharmony_ci# alignment exceptions for these instructions on page boundaries.
6362306a36Sopenharmony_ci# Initially alignment was handled in pure AltiVec/VMX way [when data
6462306a36Sopenharmony_ci# is aligned programmatically, which in turn guarantees exception-
6562306a36Sopenharmony_ci# free execution], but it turned to hamper performance when vcipher
6662306a36Sopenharmony_ci# instructions are interleaved. It's reckoned that eventual
6762306a36Sopenharmony_ci# misalignment penalties at page boundaries are in average lower
6862306a36Sopenharmony_ci# than additional overhead in pure AltiVec approach.
6962306a36Sopenharmony_ci#
7062306a36Sopenharmony_ci# May 2016
7162306a36Sopenharmony_ci#
7262306a36Sopenharmony_ci# Add XTS subroutine, 9x on little- and 12x improvement on big-endian
7362306a36Sopenharmony_ci# systems were measured.
7462306a36Sopenharmony_ci#
7562306a36Sopenharmony_ci######################################################################
7662306a36Sopenharmony_ci# Current large-block performance in cycles per byte processed with
7762306a36Sopenharmony_ci# 128-bit key (less is better).
7862306a36Sopenharmony_ci#
7962306a36Sopenharmony_ci#		CBC en-/decrypt	CTR	XTS
8062306a36Sopenharmony_ci# POWER8[le]	3.96/0.72	0.74	1.1
8162306a36Sopenharmony_ci# POWER8[be]	3.75/0.65	0.66	1.0
8262306a36Sopenharmony_ci
8362306a36Sopenharmony_ci$flavour = shift;
8462306a36Sopenharmony_ci
8562306a36Sopenharmony_ciif ($flavour =~ /64/) {
8662306a36Sopenharmony_ci	$SIZE_T	=8;
8762306a36Sopenharmony_ci	$LRSAVE	=2*$SIZE_T;
8862306a36Sopenharmony_ci	$STU	="stdu";
8962306a36Sopenharmony_ci	$POP	="ld";
9062306a36Sopenharmony_ci	$PUSH	="std";
9162306a36Sopenharmony_ci	$UCMP	="cmpld";
9262306a36Sopenharmony_ci	$SHL	="sldi";
9362306a36Sopenharmony_ci} elsif ($flavour =~ /32/) {
9462306a36Sopenharmony_ci	$SIZE_T	=4;
9562306a36Sopenharmony_ci	$LRSAVE	=$SIZE_T;
9662306a36Sopenharmony_ci	$STU	="stwu";
9762306a36Sopenharmony_ci	$POP	="lwz";
9862306a36Sopenharmony_ci	$PUSH	="stw";
9962306a36Sopenharmony_ci	$UCMP	="cmplw";
10062306a36Sopenharmony_ci	$SHL	="slwi";
10162306a36Sopenharmony_ci} else { die "nonsense $flavour"; }
10262306a36Sopenharmony_ci
10362306a36Sopenharmony_ci$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
10462306a36Sopenharmony_ci
10562306a36Sopenharmony_ci$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
10662306a36Sopenharmony_ci( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
10762306a36Sopenharmony_ci( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
10862306a36Sopenharmony_cidie "can't locate ppc-xlate.pl";
10962306a36Sopenharmony_ci
11062306a36Sopenharmony_ciopen STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
11162306a36Sopenharmony_ci
11262306a36Sopenharmony_ci$FRAME=8*$SIZE_T;
11362306a36Sopenharmony_ci$prefix="aes_p10";
11462306a36Sopenharmony_ci
11562306a36Sopenharmony_ci$sp="r1";
11662306a36Sopenharmony_ci$vrsave="r12";
11762306a36Sopenharmony_ci
11862306a36Sopenharmony_ci#########################################################################
11962306a36Sopenharmony_ci{{{	# Key setup procedures						#
12062306a36Sopenharmony_cimy ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
12162306a36Sopenharmony_cimy ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
12262306a36Sopenharmony_cimy ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
12362306a36Sopenharmony_ci
12462306a36Sopenharmony_ci$code.=<<___;
12562306a36Sopenharmony_ci.machine	"any"
12662306a36Sopenharmony_ci
12762306a36Sopenharmony_ci.text
12862306a36Sopenharmony_ci
12962306a36Sopenharmony_ci.align	7
13062306a36Sopenharmony_circon:
13162306a36Sopenharmony_ci.long	0x01000000, 0x01000000, 0x01000000, 0x01000000	?rev
13262306a36Sopenharmony_ci.long	0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000	?rev
13362306a36Sopenharmony_ci.long	0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c	?rev
13462306a36Sopenharmony_ci.long	0,0,0,0						?asis
13562306a36Sopenharmony_ciLconsts:
13662306a36Sopenharmony_ci	mflr	r0
13762306a36Sopenharmony_ci	bcl	20,31,\$+4
13862306a36Sopenharmony_ci	mflr	$ptr	 #vvvvv "distance between . and rcon
13962306a36Sopenharmony_ci	addi	$ptr,$ptr,-0x48
14062306a36Sopenharmony_ci	mtlr	r0
14162306a36Sopenharmony_ci	blr
14262306a36Sopenharmony_ci	.long	0
14362306a36Sopenharmony_ci	.byte	0,12,0x14,0,0,0,0,0
14462306a36Sopenharmony_ci.asciz	"AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
14562306a36Sopenharmony_ci
14662306a36Sopenharmony_ci.globl	.${prefix}_set_encrypt_key
14762306a36Sopenharmony_ciLset_encrypt_key:
14862306a36Sopenharmony_ci	mflr		r11
14962306a36Sopenharmony_ci	$PUSH		r11,$LRSAVE($sp)
15062306a36Sopenharmony_ci
15162306a36Sopenharmony_ci	li		$ptr,-1
15262306a36Sopenharmony_ci	${UCMP}i	$inp,0
15362306a36Sopenharmony_ci	beq-		Lenc_key_abort		# if ($inp==0) return -1;
15462306a36Sopenharmony_ci	${UCMP}i	$out,0
15562306a36Sopenharmony_ci	beq-		Lenc_key_abort		# if ($out==0) return -1;
15662306a36Sopenharmony_ci	li		$ptr,-2
15762306a36Sopenharmony_ci	cmpwi		$bits,128
15862306a36Sopenharmony_ci	blt-		Lenc_key_abort
15962306a36Sopenharmony_ci	cmpwi		$bits,256
16062306a36Sopenharmony_ci	bgt-		Lenc_key_abort
16162306a36Sopenharmony_ci	andi.		r0,$bits,0x3f
16262306a36Sopenharmony_ci	bne-		Lenc_key_abort
16362306a36Sopenharmony_ci
16462306a36Sopenharmony_ci	lis		r0,0xfff0
16562306a36Sopenharmony_ci	mfspr		$vrsave,256
16662306a36Sopenharmony_ci	mtspr		256,r0
16762306a36Sopenharmony_ci
16862306a36Sopenharmony_ci	bl		Lconsts
16962306a36Sopenharmony_ci	mtlr		r11
17062306a36Sopenharmony_ci
17162306a36Sopenharmony_ci	neg		r9,$inp
17262306a36Sopenharmony_ci	lvx		$in0,0,$inp
17362306a36Sopenharmony_ci	addi		$inp,$inp,15		# 15 is not typo
17462306a36Sopenharmony_ci	lvsr		$key,0,r9		# borrow $key
17562306a36Sopenharmony_ci	li		r8,0x20
17662306a36Sopenharmony_ci	cmpwi		$bits,192
17762306a36Sopenharmony_ci	lvx		$in1,0,$inp
17862306a36Sopenharmony_ci	le?vspltisb	$mask,0x0f		# borrow $mask
17962306a36Sopenharmony_ci	lvx		$rcon,0,$ptr
18062306a36Sopenharmony_ci	le?vxor		$key,$key,$mask		# adjust for byte swap
18162306a36Sopenharmony_ci	lvx		$mask,r8,$ptr
18262306a36Sopenharmony_ci	addi		$ptr,$ptr,0x10
18362306a36Sopenharmony_ci	vperm		$in0,$in0,$in1,$key	# align [and byte swap in LE]
18462306a36Sopenharmony_ci	li		$cnt,8
18562306a36Sopenharmony_ci	vxor		$zero,$zero,$zero
18662306a36Sopenharmony_ci	mtctr		$cnt
18762306a36Sopenharmony_ci
18862306a36Sopenharmony_ci	?lvsr		$outperm,0,$out
18962306a36Sopenharmony_ci	vspltisb	$outmask,-1
19062306a36Sopenharmony_ci	lvx		$outhead,0,$out
19162306a36Sopenharmony_ci	?vperm		$outmask,$zero,$outmask,$outperm
19262306a36Sopenharmony_ci
19362306a36Sopenharmony_ci	blt		Loop128
19462306a36Sopenharmony_ci	addi		$inp,$inp,8
19562306a36Sopenharmony_ci	beq		L192
19662306a36Sopenharmony_ci	addi		$inp,$inp,8
19762306a36Sopenharmony_ci	b		L256
19862306a36Sopenharmony_ci
19962306a36Sopenharmony_ci.align	4
20062306a36Sopenharmony_ciLoop128:
20162306a36Sopenharmony_ci	vperm		$key,$in0,$in0,$mask	# rotate-n-splat
20262306a36Sopenharmony_ci	vsldoi		$tmp,$zero,$in0,12	# >>32
20362306a36Sopenharmony_ci	 vperm		$outtail,$in0,$in0,$outperm	# rotate
20462306a36Sopenharmony_ci	 vsel		$stage,$outhead,$outtail,$outmask
20562306a36Sopenharmony_ci	 vmr		$outhead,$outtail
20662306a36Sopenharmony_ci	vcipherlast	$key,$key,$rcon
20762306a36Sopenharmony_ci	 stvx		$stage,0,$out
20862306a36Sopenharmony_ci	 addi		$out,$out,16
20962306a36Sopenharmony_ci
21062306a36Sopenharmony_ci	vxor		$in0,$in0,$tmp
21162306a36Sopenharmony_ci	vsldoi		$tmp,$zero,$tmp,12	# >>32
21262306a36Sopenharmony_ci	vxor		$in0,$in0,$tmp
21362306a36Sopenharmony_ci	vsldoi		$tmp,$zero,$tmp,12	# >>32
21462306a36Sopenharmony_ci	vxor		$in0,$in0,$tmp
21562306a36Sopenharmony_ci	 vadduwm	$rcon,$rcon,$rcon
21662306a36Sopenharmony_ci	vxor		$in0,$in0,$key
21762306a36Sopenharmony_ci	bdnz		Loop128
21862306a36Sopenharmony_ci
21962306a36Sopenharmony_ci	lvx		$rcon,0,$ptr		# last two round keys
22062306a36Sopenharmony_ci
22162306a36Sopenharmony_ci	vperm		$key,$in0,$in0,$mask	# rotate-n-splat
22262306a36Sopenharmony_ci	vsldoi		$tmp,$zero,$in0,12	# >>32
22362306a36Sopenharmony_ci	 vperm		$outtail,$in0,$in0,$outperm	# rotate
22462306a36Sopenharmony_ci	 vsel		$stage,$outhead,$outtail,$outmask
22562306a36Sopenharmony_ci	 vmr		$outhead,$outtail
22662306a36Sopenharmony_ci	vcipherlast	$key,$key,$rcon
22762306a36Sopenharmony_ci	 stvx		$stage,0,$out
22862306a36Sopenharmony_ci	 addi		$out,$out,16
22962306a36Sopenharmony_ci
23062306a36Sopenharmony_ci	vxor		$in0,$in0,$tmp
23162306a36Sopenharmony_ci	vsldoi		$tmp,$zero,$tmp,12	# >>32
23262306a36Sopenharmony_ci	vxor		$in0,$in0,$tmp
23362306a36Sopenharmony_ci	vsldoi		$tmp,$zero,$tmp,12	# >>32
23462306a36Sopenharmony_ci	vxor		$in0,$in0,$tmp
23562306a36Sopenharmony_ci	 vadduwm	$rcon,$rcon,$rcon
23662306a36Sopenharmony_ci	vxor		$in0,$in0,$key
23762306a36Sopenharmony_ci
23862306a36Sopenharmony_ci	vperm		$key,$in0,$in0,$mask	# rotate-n-splat
23962306a36Sopenharmony_ci	vsldoi		$tmp,$zero,$in0,12	# >>32
24062306a36Sopenharmony_ci	 vperm		$outtail,$in0,$in0,$outperm	# rotate
24162306a36Sopenharmony_ci	 vsel		$stage,$outhead,$outtail,$outmask
24262306a36Sopenharmony_ci	 vmr		$outhead,$outtail
24362306a36Sopenharmony_ci	vcipherlast	$key,$key,$rcon
24462306a36Sopenharmony_ci	 stvx		$stage,0,$out
24562306a36Sopenharmony_ci	 addi		$out,$out,16
24662306a36Sopenharmony_ci
24762306a36Sopenharmony_ci	vxor		$in0,$in0,$tmp
24862306a36Sopenharmony_ci	vsldoi		$tmp,$zero,$tmp,12	# >>32
24962306a36Sopenharmony_ci	vxor		$in0,$in0,$tmp
25062306a36Sopenharmony_ci	vsldoi		$tmp,$zero,$tmp,12	# >>32
25162306a36Sopenharmony_ci	vxor		$in0,$in0,$tmp
25262306a36Sopenharmony_ci	vxor		$in0,$in0,$key
25362306a36Sopenharmony_ci	 vperm		$outtail,$in0,$in0,$outperm	# rotate
25462306a36Sopenharmony_ci	 vsel		$stage,$outhead,$outtail,$outmask
25562306a36Sopenharmony_ci	 vmr		$outhead,$outtail
25662306a36Sopenharmony_ci	 stvx		$stage,0,$out
25762306a36Sopenharmony_ci
25862306a36Sopenharmony_ci	addi		$inp,$out,15		# 15 is not typo
25962306a36Sopenharmony_ci	addi		$out,$out,0x50
26062306a36Sopenharmony_ci
26162306a36Sopenharmony_ci	li		$rounds,10
26262306a36Sopenharmony_ci	b		Ldone
26362306a36Sopenharmony_ci
26462306a36Sopenharmony_ci.align	4
26562306a36Sopenharmony_ciL192:
26662306a36Sopenharmony_ci	lvx		$tmp,0,$inp
26762306a36Sopenharmony_ci	li		$cnt,4
26862306a36Sopenharmony_ci	 vperm		$outtail,$in0,$in0,$outperm	# rotate
26962306a36Sopenharmony_ci	 vsel		$stage,$outhead,$outtail,$outmask
27062306a36Sopenharmony_ci	 vmr		$outhead,$outtail
27162306a36Sopenharmony_ci	 stvx		$stage,0,$out
27262306a36Sopenharmony_ci	 addi		$out,$out,16
27362306a36Sopenharmony_ci	vperm		$in1,$in1,$tmp,$key	# align [and byte swap in LE]
27462306a36Sopenharmony_ci	vspltisb	$key,8			# borrow $key
27562306a36Sopenharmony_ci	mtctr		$cnt
27662306a36Sopenharmony_ci	vsububm		$mask,$mask,$key	# adjust the mask
27762306a36Sopenharmony_ci
27862306a36Sopenharmony_ciLoop192:
27962306a36Sopenharmony_ci	vperm		$key,$in1,$in1,$mask	# roate-n-splat
28062306a36Sopenharmony_ci	vsldoi		$tmp,$zero,$in0,12	# >>32
28162306a36Sopenharmony_ci	vcipherlast	$key,$key,$rcon
28262306a36Sopenharmony_ci
28362306a36Sopenharmony_ci	vxor		$in0,$in0,$tmp
28462306a36Sopenharmony_ci	vsldoi		$tmp,$zero,$tmp,12	# >>32
28562306a36Sopenharmony_ci	vxor		$in0,$in0,$tmp
28662306a36Sopenharmony_ci	vsldoi		$tmp,$zero,$tmp,12	# >>32
28762306a36Sopenharmony_ci	vxor		$in0,$in0,$tmp
28862306a36Sopenharmony_ci
28962306a36Sopenharmony_ci	 vsldoi		$stage,$zero,$in1,8
29062306a36Sopenharmony_ci	vspltw		$tmp,$in0,3
29162306a36Sopenharmony_ci	vxor		$tmp,$tmp,$in1
29262306a36Sopenharmony_ci	vsldoi		$in1,$zero,$in1,12	# >>32
29362306a36Sopenharmony_ci	 vadduwm	$rcon,$rcon,$rcon
29462306a36Sopenharmony_ci	vxor		$in1,$in1,$tmp
29562306a36Sopenharmony_ci	vxor		$in0,$in0,$key
29662306a36Sopenharmony_ci	vxor		$in1,$in1,$key
29762306a36Sopenharmony_ci	 vsldoi		$stage,$stage,$in0,8
29862306a36Sopenharmony_ci
29962306a36Sopenharmony_ci	vperm		$key,$in1,$in1,$mask	# rotate-n-splat
30062306a36Sopenharmony_ci	vsldoi		$tmp,$zero,$in0,12	# >>32
30162306a36Sopenharmony_ci	 vperm		$outtail,$stage,$stage,$outperm	# rotate
30262306a36Sopenharmony_ci	 vsel		$stage,$outhead,$outtail,$outmask
30362306a36Sopenharmony_ci	 vmr		$outhead,$outtail
30462306a36Sopenharmony_ci	vcipherlast	$key,$key,$rcon
30562306a36Sopenharmony_ci	 stvx		$stage,0,$out
30662306a36Sopenharmony_ci	 addi		$out,$out,16
30762306a36Sopenharmony_ci
30862306a36Sopenharmony_ci	 vsldoi		$stage,$in0,$in1,8
30962306a36Sopenharmony_ci	vxor		$in0,$in0,$tmp
31062306a36Sopenharmony_ci	vsldoi		$tmp,$zero,$tmp,12	# >>32
31162306a36Sopenharmony_ci	 vperm		$outtail,$stage,$stage,$outperm	# rotate
31262306a36Sopenharmony_ci	 vsel		$stage,$outhead,$outtail,$outmask
31362306a36Sopenharmony_ci	 vmr		$outhead,$outtail
31462306a36Sopenharmony_ci	vxor		$in0,$in0,$tmp
31562306a36Sopenharmony_ci	vsldoi		$tmp,$zero,$tmp,12	# >>32
31662306a36Sopenharmony_ci	vxor		$in0,$in0,$tmp
31762306a36Sopenharmony_ci	 stvx		$stage,0,$out
31862306a36Sopenharmony_ci	 addi		$out,$out,16
31962306a36Sopenharmony_ci
32062306a36Sopenharmony_ci	vspltw		$tmp,$in0,3
32162306a36Sopenharmony_ci	vxor		$tmp,$tmp,$in1
32262306a36Sopenharmony_ci	vsldoi		$in1,$zero,$in1,12	# >>32
32362306a36Sopenharmony_ci	 vadduwm	$rcon,$rcon,$rcon
32462306a36Sopenharmony_ci	vxor		$in1,$in1,$tmp
32562306a36Sopenharmony_ci	vxor		$in0,$in0,$key
32662306a36Sopenharmony_ci	vxor		$in1,$in1,$key
32762306a36Sopenharmony_ci	 vperm		$outtail,$in0,$in0,$outperm	# rotate
32862306a36Sopenharmony_ci	 vsel		$stage,$outhead,$outtail,$outmask
32962306a36Sopenharmony_ci	 vmr		$outhead,$outtail
33062306a36Sopenharmony_ci	 stvx		$stage,0,$out
33162306a36Sopenharmony_ci	 addi		$inp,$out,15		# 15 is not typo
33262306a36Sopenharmony_ci	 addi		$out,$out,16
33362306a36Sopenharmony_ci	bdnz		Loop192
33462306a36Sopenharmony_ci
33562306a36Sopenharmony_ci	li		$rounds,12
33662306a36Sopenharmony_ci	addi		$out,$out,0x20
33762306a36Sopenharmony_ci	b		Ldone
33862306a36Sopenharmony_ci
33962306a36Sopenharmony_ci.align	4
34062306a36Sopenharmony_ciL256:
34162306a36Sopenharmony_ci	lvx		$tmp,0,$inp
34262306a36Sopenharmony_ci	li		$cnt,7
34362306a36Sopenharmony_ci	li		$rounds,14
34462306a36Sopenharmony_ci	 vperm		$outtail,$in0,$in0,$outperm	# rotate
34562306a36Sopenharmony_ci	 vsel		$stage,$outhead,$outtail,$outmask
34662306a36Sopenharmony_ci	 vmr		$outhead,$outtail
34762306a36Sopenharmony_ci	 stvx		$stage,0,$out
34862306a36Sopenharmony_ci	 addi		$out,$out,16
34962306a36Sopenharmony_ci	vperm		$in1,$in1,$tmp,$key	# align [and byte swap in LE]
35062306a36Sopenharmony_ci	mtctr		$cnt
35162306a36Sopenharmony_ci
35262306a36Sopenharmony_ciLoop256:
35362306a36Sopenharmony_ci	vperm		$key,$in1,$in1,$mask	# rotate-n-splat
35462306a36Sopenharmony_ci	vsldoi		$tmp,$zero,$in0,12	# >>32
35562306a36Sopenharmony_ci	 vperm		$outtail,$in1,$in1,$outperm	# rotate
35662306a36Sopenharmony_ci	 vsel		$stage,$outhead,$outtail,$outmask
35762306a36Sopenharmony_ci	 vmr		$outhead,$outtail
35862306a36Sopenharmony_ci	vcipherlast	$key,$key,$rcon
35962306a36Sopenharmony_ci	 stvx		$stage,0,$out
36062306a36Sopenharmony_ci	 addi		$out,$out,16
36162306a36Sopenharmony_ci
36262306a36Sopenharmony_ci	vxor		$in0,$in0,$tmp
36362306a36Sopenharmony_ci	vsldoi		$tmp,$zero,$tmp,12	# >>32
36462306a36Sopenharmony_ci	vxor		$in0,$in0,$tmp
36562306a36Sopenharmony_ci	vsldoi		$tmp,$zero,$tmp,12	# >>32
36662306a36Sopenharmony_ci	vxor		$in0,$in0,$tmp
36762306a36Sopenharmony_ci	 vadduwm	$rcon,$rcon,$rcon
36862306a36Sopenharmony_ci	vxor		$in0,$in0,$key
36962306a36Sopenharmony_ci	 vperm		$outtail,$in0,$in0,$outperm	# rotate
37062306a36Sopenharmony_ci	 vsel		$stage,$outhead,$outtail,$outmask
37162306a36Sopenharmony_ci	 vmr		$outhead,$outtail
37262306a36Sopenharmony_ci	 stvx		$stage,0,$out
37362306a36Sopenharmony_ci	 addi		$inp,$out,15		# 15 is not typo
37462306a36Sopenharmony_ci	 addi		$out,$out,16
37562306a36Sopenharmony_ci	bdz		Ldone
37662306a36Sopenharmony_ci
37762306a36Sopenharmony_ci	vspltw		$key,$in0,3		# just splat
37862306a36Sopenharmony_ci	vsldoi		$tmp,$zero,$in1,12	# >>32
37962306a36Sopenharmony_ci	vsbox		$key,$key
38062306a36Sopenharmony_ci
38162306a36Sopenharmony_ci	vxor		$in1,$in1,$tmp
38262306a36Sopenharmony_ci	vsldoi		$tmp,$zero,$tmp,12	# >>32
38362306a36Sopenharmony_ci	vxor		$in1,$in1,$tmp
38462306a36Sopenharmony_ci	vsldoi		$tmp,$zero,$tmp,12	# >>32
38562306a36Sopenharmony_ci	vxor		$in1,$in1,$tmp
38662306a36Sopenharmony_ci
38762306a36Sopenharmony_ci	vxor		$in1,$in1,$key
38862306a36Sopenharmony_ci	b		Loop256
38962306a36Sopenharmony_ci
39062306a36Sopenharmony_ci.align	4
39162306a36Sopenharmony_ciLdone:
39262306a36Sopenharmony_ci	lvx		$in1,0,$inp		# redundant in aligned case
39362306a36Sopenharmony_ci	vsel		$in1,$outhead,$in1,$outmask
39462306a36Sopenharmony_ci	stvx		$in1,0,$inp
39562306a36Sopenharmony_ci	li		$ptr,0
39662306a36Sopenharmony_ci	mtspr		256,$vrsave
39762306a36Sopenharmony_ci	stw		$rounds,0($out)
39862306a36Sopenharmony_ci
39962306a36Sopenharmony_ciLenc_key_abort:
40062306a36Sopenharmony_ci	mr		r3,$ptr
40162306a36Sopenharmony_ci	blr
40262306a36Sopenharmony_ci	.long		0
40362306a36Sopenharmony_ci	.byte		0,12,0x14,1,0,0,3,0
40462306a36Sopenharmony_ci	.long		0
40562306a36Sopenharmony_ci.size	.${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
40662306a36Sopenharmony_ci
40762306a36Sopenharmony_ci.globl	.${prefix}_set_decrypt_key
40862306a36Sopenharmony_ci	$STU		$sp,-$FRAME($sp)
40962306a36Sopenharmony_ci	mflr		r10
41062306a36Sopenharmony_ci	$PUSH		r10,$FRAME+$LRSAVE($sp)
41162306a36Sopenharmony_ci	bl		Lset_encrypt_key
41262306a36Sopenharmony_ci	mtlr		r10
41362306a36Sopenharmony_ci
41462306a36Sopenharmony_ci	cmpwi		r3,0
41562306a36Sopenharmony_ci	bne-		Ldec_key_abort
41662306a36Sopenharmony_ci
41762306a36Sopenharmony_ci	slwi		$cnt,$rounds,4
41862306a36Sopenharmony_ci	subi		$inp,$out,240		# first round key
41962306a36Sopenharmony_ci	srwi		$rounds,$rounds,1
42062306a36Sopenharmony_ci	add		$out,$inp,$cnt		# last round key
42162306a36Sopenharmony_ci	mtctr		$rounds
42262306a36Sopenharmony_ci
42362306a36Sopenharmony_ciLdeckey:
42462306a36Sopenharmony_ci	lwz		r0, 0($inp)
42562306a36Sopenharmony_ci	lwz		r6, 4($inp)
42662306a36Sopenharmony_ci	lwz		r7, 8($inp)
42762306a36Sopenharmony_ci	lwz		r8, 12($inp)
42862306a36Sopenharmony_ci	addi		$inp,$inp,16
42962306a36Sopenharmony_ci	lwz		r9, 0($out)
43062306a36Sopenharmony_ci	lwz		r10,4($out)
43162306a36Sopenharmony_ci	lwz		r11,8($out)
43262306a36Sopenharmony_ci	lwz		r12,12($out)
43362306a36Sopenharmony_ci	stw		r0, 0($out)
43462306a36Sopenharmony_ci	stw		r6, 4($out)
43562306a36Sopenharmony_ci	stw		r7, 8($out)
43662306a36Sopenharmony_ci	stw		r8, 12($out)
43762306a36Sopenharmony_ci	subi		$out,$out,16
43862306a36Sopenharmony_ci	stw		r9, -16($inp)
43962306a36Sopenharmony_ci	stw		r10,-12($inp)
44062306a36Sopenharmony_ci	stw		r11,-8($inp)
44162306a36Sopenharmony_ci	stw		r12,-4($inp)
44262306a36Sopenharmony_ci	bdnz		Ldeckey
44362306a36Sopenharmony_ci
44462306a36Sopenharmony_ci	xor		r3,r3,r3		# return value
44562306a36Sopenharmony_ciLdec_key_abort:
44662306a36Sopenharmony_ci	addi		$sp,$sp,$FRAME
44762306a36Sopenharmony_ci	blr
44862306a36Sopenharmony_ci	.long		0
44962306a36Sopenharmony_ci	.byte		0,12,4,1,0x80,0,3,0
45062306a36Sopenharmony_ci	.long		0
45162306a36Sopenharmony_ci.size	.${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
45262306a36Sopenharmony_ci___
45362306a36Sopenharmony_ci}}}
45462306a36Sopenharmony_ci#########################################################################
45562306a36Sopenharmony_ci{{{	# Single block en- and decrypt procedures			#
45662306a36Sopenharmony_cisub gen_block () {
45762306a36Sopenharmony_cimy $dir = shift;
45862306a36Sopenharmony_cimy $n   = $dir eq "de" ? "n" : "";
45962306a36Sopenharmony_cimy ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
46062306a36Sopenharmony_ci
46162306a36Sopenharmony_ci$code.=<<___;
46262306a36Sopenharmony_ci.globl	.${prefix}_${dir}crypt
46362306a36Sopenharmony_ci	lwz		$rounds,240($key)
46462306a36Sopenharmony_ci	lis		r0,0xfc00
46562306a36Sopenharmony_ci	mfspr		$vrsave,256
46662306a36Sopenharmony_ci	li		$idx,15			# 15 is not typo
46762306a36Sopenharmony_ci	mtspr		256,r0
46862306a36Sopenharmony_ci
46962306a36Sopenharmony_ci	lvx		v0,0,$inp
47062306a36Sopenharmony_ci	neg		r11,$out
47162306a36Sopenharmony_ci	lvx		v1,$idx,$inp
47262306a36Sopenharmony_ci	lvsl		v2,0,$inp		# inpperm
47362306a36Sopenharmony_ci	le?vspltisb	v4,0x0f
47462306a36Sopenharmony_ci	?lvsl		v3,0,r11		# outperm
47562306a36Sopenharmony_ci	le?vxor		v2,v2,v4
47662306a36Sopenharmony_ci	li		$idx,16
47762306a36Sopenharmony_ci	vperm		v0,v0,v1,v2		# align [and byte swap in LE]
47862306a36Sopenharmony_ci	lvx		v1,0,$key
47962306a36Sopenharmony_ci	?lvsl		v5,0,$key		# keyperm
48062306a36Sopenharmony_ci	srwi		$rounds,$rounds,1
48162306a36Sopenharmony_ci	lvx		v2,$idx,$key
48262306a36Sopenharmony_ci	addi		$idx,$idx,16
48362306a36Sopenharmony_ci	subi		$rounds,$rounds,1
48462306a36Sopenharmony_ci	?vperm		v1,v1,v2,v5		# align round key
48562306a36Sopenharmony_ci
48662306a36Sopenharmony_ci	vxor		v0,v0,v1
48762306a36Sopenharmony_ci	lvx		v1,$idx,$key
48862306a36Sopenharmony_ci	addi		$idx,$idx,16
48962306a36Sopenharmony_ci	mtctr		$rounds
49062306a36Sopenharmony_ci
49162306a36Sopenharmony_ciLoop_${dir}c:
49262306a36Sopenharmony_ci	?vperm		v2,v2,v1,v5
49362306a36Sopenharmony_ci	v${n}cipher	v0,v0,v2
49462306a36Sopenharmony_ci	lvx		v2,$idx,$key
49562306a36Sopenharmony_ci	addi		$idx,$idx,16
49662306a36Sopenharmony_ci	?vperm		v1,v1,v2,v5
49762306a36Sopenharmony_ci	v${n}cipher	v0,v0,v1
49862306a36Sopenharmony_ci	lvx		v1,$idx,$key
49962306a36Sopenharmony_ci	addi		$idx,$idx,16
50062306a36Sopenharmony_ci	bdnz		Loop_${dir}c
50162306a36Sopenharmony_ci
50262306a36Sopenharmony_ci	?vperm		v2,v2,v1,v5
50362306a36Sopenharmony_ci	v${n}cipher	v0,v0,v2
50462306a36Sopenharmony_ci	lvx		v2,$idx,$key
50562306a36Sopenharmony_ci	?vperm		v1,v1,v2,v5
50662306a36Sopenharmony_ci	v${n}cipherlast	v0,v0,v1
50762306a36Sopenharmony_ci
50862306a36Sopenharmony_ci	vspltisb	v2,-1
50962306a36Sopenharmony_ci	vxor		v1,v1,v1
51062306a36Sopenharmony_ci	li		$idx,15			# 15 is not typo
51162306a36Sopenharmony_ci	?vperm		v2,v1,v2,v3		# outmask
51262306a36Sopenharmony_ci	le?vxor		v3,v3,v4
51362306a36Sopenharmony_ci	lvx		v1,0,$out		# outhead
51462306a36Sopenharmony_ci	vperm		v0,v0,v0,v3		# rotate [and byte swap in LE]
51562306a36Sopenharmony_ci	vsel		v1,v1,v0,v2
51662306a36Sopenharmony_ci	lvx		v4,$idx,$out
51762306a36Sopenharmony_ci	stvx		v1,0,$out
51862306a36Sopenharmony_ci	vsel		v0,v0,v4,v2
51962306a36Sopenharmony_ci	stvx		v0,$idx,$out
52062306a36Sopenharmony_ci
52162306a36Sopenharmony_ci	mtspr		256,$vrsave
52262306a36Sopenharmony_ci	blr
52362306a36Sopenharmony_ci	.long		0
52462306a36Sopenharmony_ci	.byte		0,12,0x14,0,0,0,3,0
52562306a36Sopenharmony_ci	.long		0
52662306a36Sopenharmony_ci.size	.${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
52762306a36Sopenharmony_ci___
52862306a36Sopenharmony_ci}
52962306a36Sopenharmony_ci&gen_block("en");
53062306a36Sopenharmony_ci&gen_block("de");
53162306a36Sopenharmony_ci}}}
53262306a36Sopenharmony_ci
53362306a36Sopenharmony_cimy $consts=1;
53462306a36Sopenharmony_ciforeach(split("\n",$code)) {
53562306a36Sopenharmony_ci        s/\`([^\`]*)\`/eval($1)/geo;
53662306a36Sopenharmony_ci
53762306a36Sopenharmony_ci	# constants table endian-specific conversion
53862306a36Sopenharmony_ci	if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
53962306a36Sopenharmony_ci	    my $conv=$3;
54062306a36Sopenharmony_ci	    my @bytes=();
54162306a36Sopenharmony_ci
54262306a36Sopenharmony_ci	    # convert to endian-agnostic format
54362306a36Sopenharmony_ci	    if ($1 eq "long") {
54462306a36Sopenharmony_ci	      foreach (split(/,\s*/,$2)) {
54562306a36Sopenharmony_ci		my $l = /^0/?oct:int;
54662306a36Sopenharmony_ci		push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
54762306a36Sopenharmony_ci	      }
54862306a36Sopenharmony_ci	    } else {
54962306a36Sopenharmony_ci		@bytes = map(/^0/?oct:int,split(/,\s*/,$2));
55062306a36Sopenharmony_ci	    }
55162306a36Sopenharmony_ci
55262306a36Sopenharmony_ci	    # little-endian conversion
55362306a36Sopenharmony_ci	    if ($flavour =~ /le$/o) {
55462306a36Sopenharmony_ci		SWITCH: for($conv)  {
55562306a36Sopenharmony_ci		    /\?inv/ && do   { @bytes=map($_^0xf,@bytes); last; };
55662306a36Sopenharmony_ci		    /\?rev/ && do   { @bytes=reverse(@bytes);    last; };
55762306a36Sopenharmony_ci		}
55862306a36Sopenharmony_ci	    }
55962306a36Sopenharmony_ci
56062306a36Sopenharmony_ci	    #emit
56162306a36Sopenharmony_ci	    print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
56262306a36Sopenharmony_ci	    next;
56362306a36Sopenharmony_ci	}
56462306a36Sopenharmony_ci	$consts=0 if (m/Lconsts:/o);	# end of table
56562306a36Sopenharmony_ci
56662306a36Sopenharmony_ci	# instructions prefixed with '?' are endian-specific and need
56762306a36Sopenharmony_ci	# to be adjusted accordingly...
56862306a36Sopenharmony_ci	if ($flavour =~ /le$/o) {	# little-endian
56962306a36Sopenharmony_ci	    s/le\?//o		or
57062306a36Sopenharmony_ci	    s/be\?/#be#/o	or
57162306a36Sopenharmony_ci	    s/\?lvsr/lvsl/o	or
57262306a36Sopenharmony_ci	    s/\?lvsl/lvsr/o	or
57362306a36Sopenharmony_ci	    s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
57462306a36Sopenharmony_ci	    s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
57562306a36Sopenharmony_ci	    s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
57662306a36Sopenharmony_ci	} else {			# big-endian
57762306a36Sopenharmony_ci	    s/le\?/#le#/o	or
57862306a36Sopenharmony_ci	    s/be\?//o		or
57962306a36Sopenharmony_ci	    s/\?([a-z]+)/$1/o;
58062306a36Sopenharmony_ci	}
58162306a36Sopenharmony_ci
58262306a36Sopenharmony_ci        print $_,"\n";
58362306a36Sopenharmony_ci}
58462306a36Sopenharmony_ci
58562306a36Sopenharmony_ciclose STDOUT;
586