1e1051a39Sopenharmony_ci#! /usr/bin/env perl 2e1051a39Sopenharmony_ci# Copyright 2013-2020 The OpenSSL Project Authors. All Rights Reserved. 3e1051a39Sopenharmony_ci# 4e1051a39Sopenharmony_ci# Licensed under the Apache License 2.0 (the "License"). You may not use 5e1051a39Sopenharmony_ci# this file except in compliance with the License. You can obtain a copy 6e1051a39Sopenharmony_ci# in the file LICENSE in the source distribution or at 7e1051a39Sopenharmony_ci# https://www.openssl.org/source/license.html 8e1051a39Sopenharmony_ci 9e1051a39Sopenharmony_ci 10e1051a39Sopenharmony_ci###################################################################### 11e1051a39Sopenharmony_ci## Constant-time SSSE3 AES core implementation. 12e1051a39Sopenharmony_ci## version 0.1 13e1051a39Sopenharmony_ci## 14e1051a39Sopenharmony_ci## By Mike Hamburg (Stanford University), 2009 15e1051a39Sopenharmony_ci## Public domain. 16e1051a39Sopenharmony_ci## 17e1051a39Sopenharmony_ci## For details see http://shiftleft.org/papers/vector_aes/ and 18e1051a39Sopenharmony_ci## http://crypto.stanford.edu/vpaes/. 19e1051a39Sopenharmony_ci 20e1051a39Sopenharmony_ci# CBC encrypt/decrypt performance in cycles per byte processed with 21e1051a39Sopenharmony_ci# 128-bit key. 22e1051a39Sopenharmony_ci# 23e1051a39Sopenharmony_ci# aes-ppc.pl this 24e1051a39Sopenharmony_ci# PPC74x0/G4e 35.5/52.1/(23.8) 11.9(*)/15.4 25e1051a39Sopenharmony_ci# PPC970/G5 37.9/55.0/(28.5) 22.2/28.5 26e1051a39Sopenharmony_ci# POWER6 42.7/54.3/(28.2) 63.0/92.8(**) 27e1051a39Sopenharmony_ci# POWER7 32.3/42.9/(18.4) 18.5/23.3 28e1051a39Sopenharmony_ci# 29e1051a39Sopenharmony_ci# (*) This is ~10% worse than reported in paper. The reason is 30e1051a39Sopenharmony_ci# twofold. This module doesn't make any assumption about 31e1051a39Sopenharmony_ci# key schedule (or data for that matter) alignment and handles 32e1051a39Sopenharmony_ci# it in-line. Secondly it, being transliterated from 33e1051a39Sopenharmony_ci# vpaes-x86_64.pl, relies on "nested inversion" better suited 34e1051a39Sopenharmony_ci# for Intel CPUs. 35e1051a39Sopenharmony_ci# (**) Inadequate POWER6 performance is due to astronomic AltiVec 36e1051a39Sopenharmony_ci# latency, 9 cycles per simple logical operation. 37e1051a39Sopenharmony_ci 38e1051a39Sopenharmony_ci# $output is the last argument if it looks like a file (it has an extension) 39e1051a39Sopenharmony_ci# $flavour is the first argument if it doesn't look like a file 40e1051a39Sopenharmony_ci$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; 41e1051a39Sopenharmony_ci$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; 42e1051a39Sopenharmony_ci 43e1051a39Sopenharmony_ciif ($flavour =~ /64/) { 44e1051a39Sopenharmony_ci $SIZE_T =8; 45e1051a39Sopenharmony_ci $LRSAVE =2*$SIZE_T; 46e1051a39Sopenharmony_ci $STU ="stdu"; 47e1051a39Sopenharmony_ci $POP ="ld"; 48e1051a39Sopenharmony_ci $PUSH ="std"; 49e1051a39Sopenharmony_ci $UCMP ="cmpld"; 50e1051a39Sopenharmony_ci} elsif ($flavour =~ /32/) { 51e1051a39Sopenharmony_ci $SIZE_T =4; 52e1051a39Sopenharmony_ci $LRSAVE =$SIZE_T; 53e1051a39Sopenharmony_ci $STU ="stwu"; 54e1051a39Sopenharmony_ci $POP ="lwz"; 55e1051a39Sopenharmony_ci $PUSH ="stw"; 56e1051a39Sopenharmony_ci $UCMP ="cmplw"; 57e1051a39Sopenharmony_ci} else { die "nonsense $flavour"; } 58e1051a39Sopenharmony_ci 59e1051a39Sopenharmony_ci$sp="r1"; 60e1051a39Sopenharmony_ci$FRAME=6*$SIZE_T+13*16; # 13*16 is for v20-v31 offload 61e1051a39Sopenharmony_ci 62e1051a39Sopenharmony_ci$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 63e1051a39Sopenharmony_ci( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or 64e1051a39Sopenharmony_ci( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or 65e1051a39Sopenharmony_cidie "can't locate ppc-xlate.pl"; 66e1051a39Sopenharmony_ci 67e1051a39Sopenharmony_ciopen STDOUT,"| $^X $xlate $flavour \"$output\"" 68e1051a39Sopenharmony_ci || die "can't call $xlate: $!"; 69e1051a39Sopenharmony_ci 70e1051a39Sopenharmony_ci$code.=<<___; 71e1051a39Sopenharmony_ci.machine "any" 72e1051a39Sopenharmony_ci 73e1051a39Sopenharmony_ci.text 74e1051a39Sopenharmony_ci 75e1051a39Sopenharmony_ci.align 7 # totally strategic alignment 76e1051a39Sopenharmony_ci_vpaes_consts: 77e1051a39Sopenharmony_ciLk_mc_forward: # mc_forward 78e1051a39Sopenharmony_ci .long 0x01020300, 0x05060704, 0x090a0b08, 0x0d0e0f0c ?inv 79e1051a39Sopenharmony_ci .long 0x05060704, 0x090a0b08, 0x0d0e0f0c, 0x01020300 ?inv 80e1051a39Sopenharmony_ci .long 0x090a0b08, 0x0d0e0f0c, 0x01020300, 0x05060704 ?inv 81e1051a39Sopenharmony_ci .long 0x0d0e0f0c, 0x01020300, 0x05060704, 0x090a0b08 ?inv 82e1051a39Sopenharmony_ciLk_mc_backward: # mc_backward 83e1051a39Sopenharmony_ci .long 0x03000102, 0x07040506, 0x0b08090a, 0x0f0c0d0e ?inv 84e1051a39Sopenharmony_ci .long 0x0f0c0d0e, 0x03000102, 0x07040506, 0x0b08090a ?inv 85e1051a39Sopenharmony_ci .long 0x0b08090a, 0x0f0c0d0e, 0x03000102, 0x07040506 ?inv 86e1051a39Sopenharmony_ci .long 0x07040506, 0x0b08090a, 0x0f0c0d0e, 0x03000102 ?inv 87e1051a39Sopenharmony_ciLk_sr: # sr 88e1051a39Sopenharmony_ci .long 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f ?inv 89e1051a39Sopenharmony_ci .long 0x00050a0f, 0x04090e03, 0x080d0207, 0x0c01060b ?inv 90e1051a39Sopenharmony_ci .long 0x0009020b, 0x040d060f, 0x08010a03, 0x0c050e07 ?inv 91e1051a39Sopenharmony_ci .long 0x000d0a07, 0x04010e0b, 0x0805020f, 0x0c090603 ?inv 92e1051a39Sopenharmony_ci 93e1051a39Sopenharmony_ci## 94e1051a39Sopenharmony_ci## "Hot" constants 95e1051a39Sopenharmony_ci## 96e1051a39Sopenharmony_ciLk_inv: # inv, inva 97e1051a39Sopenharmony_ci .long 0xf001080d, 0x0f06050e, 0x020c0b0a, 0x09030704 ?rev 98e1051a39Sopenharmony_ci .long 0xf0070b0f, 0x060a0401, 0x09080502, 0x0c0e0d03 ?rev 99e1051a39Sopenharmony_ciLk_ipt: # input transform (lo, hi) 100e1051a39Sopenharmony_ci .long 0x00702a5a, 0x98e8b2c2, 0x08782252, 0x90e0baca ?rev 101e1051a39Sopenharmony_ci .long 0x004d7c31, 0x7d30014c, 0x81ccfdb0, 0xfcb180cd ?rev 102e1051a39Sopenharmony_ciLk_sbo: # sbou, sbot 103e1051a39Sopenharmony_ci .long 0x00c7bd6f, 0x176dd2d0, 0x78a802c5, 0x7abfaa15 ?rev 104e1051a39Sopenharmony_ci .long 0x006abb5f, 0xa574e4cf, 0xfa352b41, 0xd1901e8e ?rev 105e1051a39Sopenharmony_ciLk_sb1: # sb1u, sb1t 106e1051a39Sopenharmony_ci .long 0x0023e2fa, 0x15d41836, 0xefd92e0d, 0xc1ccf73b ?rev 107e1051a39Sopenharmony_ci .long 0x003e50cb, 0x8fe19bb1, 0x44f52a14, 0x6e7adfa5 ?rev 108e1051a39Sopenharmony_ciLk_sb2: # sb2u, sb2t 109e1051a39Sopenharmony_ci .long 0x0029e10a, 0x4088eb69, 0x4a2382ab, 0xc863a1c2 ?rev 110e1051a39Sopenharmony_ci .long 0x0024710b, 0xc6937ae2, 0xcd2f98bc, 0x55e9b75e ?rev 111e1051a39Sopenharmony_ci 112e1051a39Sopenharmony_ci## 113e1051a39Sopenharmony_ci## Decryption stuff 114e1051a39Sopenharmony_ci## 115e1051a39Sopenharmony_ciLk_dipt: # decryption input transform 116e1051a39Sopenharmony_ci .long 0x005f540b, 0x045b500f, 0x1a454e11, 0x1e414a15 ?rev 117e1051a39Sopenharmony_ci .long 0x00650560, 0xe683e386, 0x94f191f4, 0x72177712 ?rev 118e1051a39Sopenharmony_ciLk_dsbo: # decryption sbox final output 119e1051a39Sopenharmony_ci .long 0x0040f97e, 0x53ea8713, 0x2d3e94d4, 0xb96daac7 ?rev 120e1051a39Sopenharmony_ci .long 0x001d4493, 0x0f56d712, 0x9c8ec5d8, 0x59814bca ?rev 121e1051a39Sopenharmony_ciLk_dsb9: # decryption sbox output *9*u, *9*t 122e1051a39Sopenharmony_ci .long 0x00d6869a, 0x53031c85, 0xc94c994f, 0x501fd5ca ?rev 123e1051a39Sopenharmony_ci .long 0x0049d7ec, 0x89173bc0, 0x65a5fbb2, 0x9e2c5e72 ?rev 124e1051a39Sopenharmony_ciLk_dsbd: # decryption sbox output *D*u, *D*t 125e1051a39Sopenharmony_ci .long 0x00a2b1e6, 0xdfcc577d, 0x39442a88, 0x139b6ef5 ?rev 126e1051a39Sopenharmony_ci .long 0x00cbc624, 0xf7fae23c, 0xd3efde15, 0x0d183129 ?rev 127e1051a39Sopenharmony_ciLk_dsbb: # decryption sbox output *B*u, *B*t 128e1051a39Sopenharmony_ci .long 0x0042b496, 0x926422d0, 0x04d4f2b0, 0xf6462660 ?rev 129e1051a39Sopenharmony_ci .long 0x006759cd, 0xa69894c1, 0x6baa5532, 0x3e0cfff3 ?rev 130e1051a39Sopenharmony_ciLk_dsbe: # decryption sbox output *E*u, *E*t 131e1051a39Sopenharmony_ci .long 0x00d0d426, 0x9692f246, 0xb0f6b464, 0x04604222 ?rev 132e1051a39Sopenharmony_ci .long 0x00c1aaff, 0xcda6550c, 0x323e5998, 0x6bf36794 ?rev 133e1051a39Sopenharmony_ci 134e1051a39Sopenharmony_ci## 135e1051a39Sopenharmony_ci## Key schedule constants 136e1051a39Sopenharmony_ci## 137e1051a39Sopenharmony_ciLk_dksd: # decryption key schedule: invskew x*D 138e1051a39Sopenharmony_ci .long 0x0047e4a3, 0x5d1ab9fe, 0xf9be1d5a, 0xa4e34007 ?rev 139e1051a39Sopenharmony_ci .long 0x008336b5, 0xf477c241, 0x1e9d28ab, 0xea69dc5f ?rev 140e1051a39Sopenharmony_ciLk_dksb: # decryption key schedule: invskew x*B 141e1051a39Sopenharmony_ci .long 0x00d55085, 0x1fca4f9a, 0x994cc91c, 0x8653d603 ?rev 142e1051a39Sopenharmony_ci .long 0x004afcb6, 0xa7ed5b11, 0xc882347e, 0x6f2593d9 ?rev 143e1051a39Sopenharmony_ciLk_dkse: # decryption key schedule: invskew x*E + 0x63 144e1051a39Sopenharmony_ci .long 0x00d6c91f, 0xca1c03d5, 0x86504f99, 0x4c9a8553 ?rev 145e1051a39Sopenharmony_ci .long 0xe87bdc4f, 0x059631a2, 0x8714b320, 0x6af95ecd ?rev 146e1051a39Sopenharmony_ciLk_dks9: # decryption key schedule: invskew x*9 147e1051a39Sopenharmony_ci .long 0x00a7d97e, 0xc86f11b6, 0xfc5b2582, 0x3493ed4a ?rev 148e1051a39Sopenharmony_ci .long 0x00331427, 0x62517645, 0xcefddae9, 0xac9fb88b ?rev 149e1051a39Sopenharmony_ci 150e1051a39Sopenharmony_ciLk_rcon: # rcon 151e1051a39Sopenharmony_ci .long 0xb6ee9daf, 0xb991831f, 0x817d7c4d, 0x08982a70 ?asis 152e1051a39Sopenharmony_ciLk_s63: 153e1051a39Sopenharmony_ci .long 0x5b5b5b5b, 0x5b5b5b5b, 0x5b5b5b5b, 0x5b5b5b5b ?asis 154e1051a39Sopenharmony_ci 155e1051a39Sopenharmony_ciLk_opt: # output transform 156e1051a39Sopenharmony_ci .long 0x0060b6d6, 0x29499fff, 0x0868bede, 0x214197f7 ?rev 157e1051a39Sopenharmony_ci .long 0x00ecbc50, 0x51bded01, 0xe00c5cb0, 0xb15d0de1 ?rev 158e1051a39Sopenharmony_ciLk_deskew: # deskew tables: inverts the sbox's "skew" 159e1051a39Sopenharmony_ci .long 0x00e3a447, 0x40a3e407, 0x1af9be5d, 0x5ab9fe1d ?rev 160e1051a39Sopenharmony_ci .long 0x0069ea83, 0xdcb5365f, 0x771e9df4, 0xabc24128 ?rev 161e1051a39Sopenharmony_ci.align 5 162e1051a39Sopenharmony_ciLconsts: 163e1051a39Sopenharmony_ci mflr r0 164e1051a39Sopenharmony_ci bcl 20,31,\$+4 165e1051a39Sopenharmony_ci mflr r12 #vvvvv "distance between . and _vpaes_consts 166e1051a39Sopenharmony_ci addi r12,r12,-0x308 167e1051a39Sopenharmony_ci mtlr r0 168e1051a39Sopenharmony_ci blr 169e1051a39Sopenharmony_ci .long 0 170e1051a39Sopenharmony_ci .byte 0,12,0x14,0,0,0,0,0 171e1051a39Sopenharmony_ci.asciz "Vector Permutation AES for AltiVec, Mike Hamburg (Stanford University)" 172e1051a39Sopenharmony_ci.align 6 173e1051a39Sopenharmony_ci___ 174e1051a39Sopenharmony_ci 175e1051a39Sopenharmony_cimy ($inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm) = map("v$_",(26..31)); 176e1051a39Sopenharmony_ci{ 177e1051a39Sopenharmony_cimy ($inp,$out,$key) = map("r$_",(3..5)); 178e1051a39Sopenharmony_ci 179e1051a39Sopenharmony_cimy ($invlo,$invhi,$iptlo,$ipthi,$sbou,$sbot) = map("v$_",(10..15)); 180e1051a39Sopenharmony_cimy ($sb1u,$sb1t,$sb2u,$sb2t) = map("v$_",(16..19)); 181e1051a39Sopenharmony_cimy ($sb9u,$sb9t,$sbdu,$sbdt,$sbbu,$sbbt,$sbeu,$sbet)=map("v$_",(16..23)); 182e1051a39Sopenharmony_ci 183e1051a39Sopenharmony_ci$code.=<<___; 184e1051a39Sopenharmony_ci## 185e1051a39Sopenharmony_ci## _aes_preheat 186e1051a39Sopenharmony_ci## 187e1051a39Sopenharmony_ci## Fills register %r10 -> .aes_consts (so you can -fPIC) 188e1051a39Sopenharmony_ci## and %xmm9-%xmm15 as specified below. 189e1051a39Sopenharmony_ci## 190e1051a39Sopenharmony_ci.align 4 191e1051a39Sopenharmony_ci_vpaes_encrypt_preheat: 192e1051a39Sopenharmony_ci mflr r8 193e1051a39Sopenharmony_ci bl Lconsts 194e1051a39Sopenharmony_ci mtlr r8 195e1051a39Sopenharmony_ci li r11, 0xc0 # Lk_inv 196e1051a39Sopenharmony_ci li r10, 0xd0 197e1051a39Sopenharmony_ci li r9, 0xe0 # Lk_ipt 198e1051a39Sopenharmony_ci li r8, 0xf0 199e1051a39Sopenharmony_ci vxor v7, v7, v7 # 0x00..00 200e1051a39Sopenharmony_ci vspltisb v8,4 # 0x04..04 201e1051a39Sopenharmony_ci vspltisb v9,0x0f # 0x0f..0f 202e1051a39Sopenharmony_ci lvx $invlo, r12, r11 203e1051a39Sopenharmony_ci li r11, 0x100 204e1051a39Sopenharmony_ci lvx $invhi, r12, r10 205e1051a39Sopenharmony_ci li r10, 0x110 206e1051a39Sopenharmony_ci lvx $iptlo, r12, r9 207e1051a39Sopenharmony_ci li r9, 0x120 208e1051a39Sopenharmony_ci lvx $ipthi, r12, r8 209e1051a39Sopenharmony_ci li r8, 0x130 210e1051a39Sopenharmony_ci lvx $sbou, r12, r11 211e1051a39Sopenharmony_ci li r11, 0x140 212e1051a39Sopenharmony_ci lvx $sbot, r12, r10 213e1051a39Sopenharmony_ci li r10, 0x150 214e1051a39Sopenharmony_ci lvx $sb1u, r12, r9 215e1051a39Sopenharmony_ci lvx $sb1t, r12, r8 216e1051a39Sopenharmony_ci lvx $sb2u, r12, r11 217e1051a39Sopenharmony_ci lvx $sb2t, r12, r10 218e1051a39Sopenharmony_ci blr 219e1051a39Sopenharmony_ci .long 0 220e1051a39Sopenharmony_ci .byte 0,12,0x14,0,0,0,0,0 221e1051a39Sopenharmony_ci 222e1051a39Sopenharmony_ci## 223e1051a39Sopenharmony_ci## _aes_encrypt_core 224e1051a39Sopenharmony_ci## 225e1051a39Sopenharmony_ci## AES-encrypt %xmm0. 226e1051a39Sopenharmony_ci## 227e1051a39Sopenharmony_ci## Inputs: 228e1051a39Sopenharmony_ci## %xmm0 = input 229e1051a39Sopenharmony_ci## %xmm9-%xmm15 as in _vpaes_preheat 230e1051a39Sopenharmony_ci## (%rdx) = scheduled keys 231e1051a39Sopenharmony_ci## 232e1051a39Sopenharmony_ci## Output in %xmm0 233e1051a39Sopenharmony_ci## Clobbers %xmm1-%xmm6, %r9, %r10, %r11, %rax 234e1051a39Sopenharmony_ci## 235e1051a39Sopenharmony_ci## 236e1051a39Sopenharmony_ci.align 5 237e1051a39Sopenharmony_ci_vpaes_encrypt_core: 238e1051a39Sopenharmony_ci lwz r8, 240($key) # pull rounds 239e1051a39Sopenharmony_ci li r9, 16 240e1051a39Sopenharmony_ci lvx v5, 0, $key # vmovdqu (%r9), %xmm5 # round0 key 241e1051a39Sopenharmony_ci li r11, 0x10 242e1051a39Sopenharmony_ci lvx v6, r9, $key 243e1051a39Sopenharmony_ci addi r9, r9, 16 244e1051a39Sopenharmony_ci ?vperm v5, v5, v6, $keyperm # align round key 245e1051a39Sopenharmony_ci addi r10, r11, 0x40 246e1051a39Sopenharmony_ci vsrb v1, v0, v8 # vpsrlb \$4, %xmm0, %xmm0 247e1051a39Sopenharmony_ci vperm v0, $iptlo, $iptlo, v0 # vpshufb %xmm1, %xmm2, %xmm1 248e1051a39Sopenharmony_ci vperm v1, $ipthi, $ipthi, v1 # vpshufb %xmm0, %xmm3, %xmm2 249e1051a39Sopenharmony_ci vxor v0, v0, v5 # vpxor %xmm5, %xmm1, %xmm0 250e1051a39Sopenharmony_ci vxor v0, v0, v1 # vpxor %xmm2, %xmm0, %xmm0 251e1051a39Sopenharmony_ci mtctr r8 252e1051a39Sopenharmony_ci b Lenc_entry 253e1051a39Sopenharmony_ci 254e1051a39Sopenharmony_ci.align 4 255e1051a39Sopenharmony_ciLenc_loop: 256e1051a39Sopenharmony_ci # middle of middle round 257e1051a39Sopenharmony_ci vperm v4, $sb1t, v7, v2 # vpshufb %xmm2, %xmm13, %xmm4 # 4 = sb1u 258e1051a39Sopenharmony_ci lvx v1, r12, r11 # vmovdqa -0x40(%r11,%r10), %xmm1 # .Lk_mc_forward[] 259e1051a39Sopenharmony_ci addi r11, r11, 16 260e1051a39Sopenharmony_ci vperm v0, $sb1u, v7, v3 # vpshufb %xmm3, %xmm12, %xmm0 # 0 = sb1t 261e1051a39Sopenharmony_ci vxor v4, v4, v5 # vpxor %xmm5, %xmm4, %xmm4 # 4 = sb1u + k 262e1051a39Sopenharmony_ci andi. r11, r11, 0x30 # and \$0x30, %r11 # ... mod 4 263e1051a39Sopenharmony_ci vperm v5, $sb2t, v7, v2 # vpshufb %xmm2, %xmm15, %xmm5 # 4 = sb2u 264e1051a39Sopenharmony_ci vxor v0, v0, v4 # vpxor %xmm4, %xmm0, %xmm0 # 0 = A 265e1051a39Sopenharmony_ci vperm v2, $sb2u, v7, v3 # vpshufb %xmm3, %xmm14, %xmm2 # 2 = sb2t 266e1051a39Sopenharmony_ci lvx v4, r12, r10 # vmovdqa (%r11,%r10), %xmm4 # .Lk_mc_backward[] 267e1051a39Sopenharmony_ci addi r10, r11, 0x40 268e1051a39Sopenharmony_ci vperm v3, v0, v7, v1 # vpshufb %xmm1, %xmm0, %xmm3 # 0 = B 269e1051a39Sopenharmony_ci vxor v2, v2, v5 # vpxor %xmm5, %xmm2, %xmm2 # 2 = 2A 270e1051a39Sopenharmony_ci vperm v0, v0, v7, v4 # vpshufb %xmm4, %xmm0, %xmm0 # 3 = D 271e1051a39Sopenharmony_ci vxor v3, v3, v2 # vpxor %xmm2, %xmm3, %xmm3 # 0 = 2A+B 272e1051a39Sopenharmony_ci vperm v4, v3, v7, v1 # vpshufb %xmm1, %xmm3, %xmm4 # 0 = 2B+C 273e1051a39Sopenharmony_ci vxor v0, v0, v3 # vpxor %xmm3, %xmm0, %xmm0 # 3 = 2A+B+D 274e1051a39Sopenharmony_ci vxor v0, v0, v4 # vpxor %xmm4, %xmm0, %xmm0 # 0 = 2A+3B+C+D 275e1051a39Sopenharmony_ci 276e1051a39Sopenharmony_ciLenc_entry: 277e1051a39Sopenharmony_ci # top of round 278e1051a39Sopenharmony_ci vsrb v1, v0, v8 # vpsrlb \$4, %xmm0, %xmm0 # 1 = i 279e1051a39Sopenharmony_ci vperm v5, $invhi, $invhi, v0 # vpshufb %xmm1, %xmm11, %xmm5 # 2 = a/k 280e1051a39Sopenharmony_ci vxor v0, v0, v1 # vpxor %xmm0, %xmm1, %xmm1 # 0 = j 281e1051a39Sopenharmony_ci vperm v3, $invlo, $invlo, v1 # vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i 282e1051a39Sopenharmony_ci vperm v4, $invlo, $invlo, v0 # vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j 283e1051a39Sopenharmony_ci vand v0, v0, v9 284e1051a39Sopenharmony_ci vxor v3, v3, v5 # vpxor %xmm5, %xmm3, %xmm3 # 3 = iak = 1/i + a/k 285e1051a39Sopenharmony_ci vxor v4, v4, v5 # vpxor %xmm5, %xmm4, %xmm4 # 4 = jak = 1/j + a/k 286e1051a39Sopenharmony_ci vperm v2, $invlo, v7, v3 # vpshufb %xmm3, %xmm10, %xmm2 # 2 = 1/iak 287e1051a39Sopenharmony_ci vmr v5, v6 288e1051a39Sopenharmony_ci lvx v6, r9, $key # vmovdqu (%r9), %xmm5 289e1051a39Sopenharmony_ci vperm v3, $invlo, v7, v4 # vpshufb %xmm4, %xmm10, %xmm3 # 3 = 1/jak 290e1051a39Sopenharmony_ci addi r9, r9, 16 291e1051a39Sopenharmony_ci vxor v2, v2, v0 # vpxor %xmm1, %xmm2, %xmm2 # 2 = io 292e1051a39Sopenharmony_ci ?vperm v5, v5, v6, $keyperm # align round key 293e1051a39Sopenharmony_ci vxor v3, v3, v1 # vpxor %xmm0, %xmm3, %xmm3 # 3 = jo 294e1051a39Sopenharmony_ci bdnz Lenc_loop 295e1051a39Sopenharmony_ci 296e1051a39Sopenharmony_ci # middle of last round 297e1051a39Sopenharmony_ci addi r10, r11, 0x80 298e1051a39Sopenharmony_ci # vmovdqa -0x60(%r10), %xmm4 # 3 : sbou .Lk_sbo 299e1051a39Sopenharmony_ci # vmovdqa -0x50(%r10), %xmm0 # 0 : sbot .Lk_sbo+16 300e1051a39Sopenharmony_ci vperm v4, $sbou, v7, v2 # vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbou 301e1051a39Sopenharmony_ci lvx v1, r12, r10 # vmovdqa 0x40(%r11,%r10), %xmm1 # .Lk_sr[] 302e1051a39Sopenharmony_ci vperm v0, $sbot, v7, v3 # vpshufb %xmm3, %xmm0, %xmm0 # 0 = sb1t 303e1051a39Sopenharmony_ci vxor v4, v4, v5 # vpxor %xmm5, %xmm4, %xmm4 # 4 = sb1u + k 304e1051a39Sopenharmony_ci vxor v0, v0, v4 # vpxor %xmm4, %xmm0, %xmm0 # 0 = A 305e1051a39Sopenharmony_ci vperm v0, v0, v7, v1 # vpshufb %xmm1, %xmm0, %xmm0 306e1051a39Sopenharmony_ci blr 307e1051a39Sopenharmony_ci .long 0 308e1051a39Sopenharmony_ci .byte 0,12,0x14,0,0,0,0,0 309e1051a39Sopenharmony_ci 310e1051a39Sopenharmony_ci.globl .vpaes_encrypt 311e1051a39Sopenharmony_ci.align 5 312e1051a39Sopenharmony_ci.vpaes_encrypt: 313e1051a39Sopenharmony_ci $STU $sp,-$FRAME($sp) 314e1051a39Sopenharmony_ci li r10,`15+6*$SIZE_T` 315e1051a39Sopenharmony_ci li r11,`31+6*$SIZE_T` 316e1051a39Sopenharmony_ci mflr r6 317e1051a39Sopenharmony_ci mfspr r7, 256 # save vrsave 318e1051a39Sopenharmony_ci stvx v20,r10,$sp 319e1051a39Sopenharmony_ci addi r10,r10,32 320e1051a39Sopenharmony_ci stvx v21,r11,$sp 321e1051a39Sopenharmony_ci addi r11,r11,32 322e1051a39Sopenharmony_ci stvx v22,r10,$sp 323e1051a39Sopenharmony_ci addi r10,r10,32 324e1051a39Sopenharmony_ci stvx v23,r11,$sp 325e1051a39Sopenharmony_ci addi r11,r11,32 326e1051a39Sopenharmony_ci stvx v24,r10,$sp 327e1051a39Sopenharmony_ci addi r10,r10,32 328e1051a39Sopenharmony_ci stvx v25,r11,$sp 329e1051a39Sopenharmony_ci addi r11,r11,32 330e1051a39Sopenharmony_ci stvx v26,r10,$sp 331e1051a39Sopenharmony_ci addi r10,r10,32 332e1051a39Sopenharmony_ci stvx v27,r11,$sp 333e1051a39Sopenharmony_ci addi r11,r11,32 334e1051a39Sopenharmony_ci stvx v28,r10,$sp 335e1051a39Sopenharmony_ci addi r10,r10,32 336e1051a39Sopenharmony_ci stvx v29,r11,$sp 337e1051a39Sopenharmony_ci addi r11,r11,32 338e1051a39Sopenharmony_ci stvx v30,r10,$sp 339e1051a39Sopenharmony_ci stvx v31,r11,$sp 340e1051a39Sopenharmony_ci stw r7,`$FRAME-4`($sp) # save vrsave 341e1051a39Sopenharmony_ci li r0, -1 342e1051a39Sopenharmony_ci $PUSH r6,`$FRAME+$LRSAVE`($sp) 343e1051a39Sopenharmony_ci mtspr 256, r0 # preserve all AltiVec registers 344e1051a39Sopenharmony_ci 345e1051a39Sopenharmony_ci bl _vpaes_encrypt_preheat 346e1051a39Sopenharmony_ci 347e1051a39Sopenharmony_ci ?lvsl $inpperm, 0, $inp # prepare for unaligned access 348e1051a39Sopenharmony_ci lvx v0, 0, $inp 349e1051a39Sopenharmony_ci addi $inp, $inp, 15 # 15 is not a typo 350e1051a39Sopenharmony_ci ?lvsr $outperm, 0, $out 351e1051a39Sopenharmony_ci ?lvsl $keyperm, 0, $key # prepare for unaligned access 352e1051a39Sopenharmony_ci lvx $inptail, 0, $inp # redundant in aligned case 353e1051a39Sopenharmony_ci ?vperm v0, v0, $inptail, $inpperm 354e1051a39Sopenharmony_ci 355e1051a39Sopenharmony_ci bl _vpaes_encrypt_core 356e1051a39Sopenharmony_ci 357e1051a39Sopenharmony_ci andi. r8, $out, 15 358e1051a39Sopenharmony_ci li r9, 16 359e1051a39Sopenharmony_ci beq Lenc_out_aligned 360e1051a39Sopenharmony_ci 361e1051a39Sopenharmony_ci vperm v0, v0, v0, $outperm # rotate right/left 362e1051a39Sopenharmony_ci mtctr r9 363e1051a39Sopenharmony_ciLenc_out_unaligned: 364e1051a39Sopenharmony_ci stvebx v0, 0, $out 365e1051a39Sopenharmony_ci addi $out, $out, 1 366e1051a39Sopenharmony_ci bdnz Lenc_out_unaligned 367e1051a39Sopenharmony_ci b Lenc_done 368e1051a39Sopenharmony_ci 369e1051a39Sopenharmony_ci.align 4 370e1051a39Sopenharmony_ciLenc_out_aligned: 371e1051a39Sopenharmony_ci stvx v0, 0, $out 372e1051a39Sopenharmony_ciLenc_done: 373e1051a39Sopenharmony_ci 374e1051a39Sopenharmony_ci li r10,`15+6*$SIZE_T` 375e1051a39Sopenharmony_ci li r11,`31+6*$SIZE_T` 376e1051a39Sopenharmony_ci mtlr r6 377e1051a39Sopenharmony_ci mtspr 256, r7 # restore vrsave 378e1051a39Sopenharmony_ci lvx v20,r10,$sp 379e1051a39Sopenharmony_ci addi r10,r10,32 380e1051a39Sopenharmony_ci lvx v21,r11,$sp 381e1051a39Sopenharmony_ci addi r11,r11,32 382e1051a39Sopenharmony_ci lvx v22,r10,$sp 383e1051a39Sopenharmony_ci addi r10,r10,32 384e1051a39Sopenharmony_ci lvx v23,r11,$sp 385e1051a39Sopenharmony_ci addi r11,r11,32 386e1051a39Sopenharmony_ci lvx v24,r10,$sp 387e1051a39Sopenharmony_ci addi r10,r10,32 388e1051a39Sopenharmony_ci lvx v25,r11,$sp 389e1051a39Sopenharmony_ci addi r11,r11,32 390e1051a39Sopenharmony_ci lvx v26,r10,$sp 391e1051a39Sopenharmony_ci addi r10,r10,32 392e1051a39Sopenharmony_ci lvx v27,r11,$sp 393e1051a39Sopenharmony_ci addi r11,r11,32 394e1051a39Sopenharmony_ci lvx v28,r10,$sp 395e1051a39Sopenharmony_ci addi r10,r10,32 396e1051a39Sopenharmony_ci lvx v29,r11,$sp 397e1051a39Sopenharmony_ci addi r11,r11,32 398e1051a39Sopenharmony_ci lvx v30,r10,$sp 399e1051a39Sopenharmony_ci lvx v31,r11,$sp 400e1051a39Sopenharmony_ci addi $sp,$sp,$FRAME 401e1051a39Sopenharmony_ci blr 402e1051a39Sopenharmony_ci .long 0 403e1051a39Sopenharmony_ci .byte 0,12,0x04,1,0x80,0,3,0 404e1051a39Sopenharmony_ci .long 0 405e1051a39Sopenharmony_ci.size .vpaes_encrypt,.-.vpaes_encrypt 406e1051a39Sopenharmony_ci 407e1051a39Sopenharmony_ci.align 4 408e1051a39Sopenharmony_ci_vpaes_decrypt_preheat: 409e1051a39Sopenharmony_ci mflr r8 410e1051a39Sopenharmony_ci bl Lconsts 411e1051a39Sopenharmony_ci mtlr r8 412e1051a39Sopenharmony_ci li r11, 0xc0 # Lk_inv 413e1051a39Sopenharmony_ci li r10, 0xd0 414e1051a39Sopenharmony_ci li r9, 0x160 # Ldipt 415e1051a39Sopenharmony_ci li r8, 0x170 416e1051a39Sopenharmony_ci vxor v7, v7, v7 # 0x00..00 417e1051a39Sopenharmony_ci vspltisb v8,4 # 0x04..04 418e1051a39Sopenharmony_ci vspltisb v9,0x0f # 0x0f..0f 419e1051a39Sopenharmony_ci lvx $invlo, r12, r11 420e1051a39Sopenharmony_ci li r11, 0x180 421e1051a39Sopenharmony_ci lvx $invhi, r12, r10 422e1051a39Sopenharmony_ci li r10, 0x190 423e1051a39Sopenharmony_ci lvx $iptlo, r12, r9 424e1051a39Sopenharmony_ci li r9, 0x1a0 425e1051a39Sopenharmony_ci lvx $ipthi, r12, r8 426e1051a39Sopenharmony_ci li r8, 0x1b0 427e1051a39Sopenharmony_ci lvx $sbou, r12, r11 428e1051a39Sopenharmony_ci li r11, 0x1c0 429e1051a39Sopenharmony_ci lvx $sbot, r12, r10 430e1051a39Sopenharmony_ci li r10, 0x1d0 431e1051a39Sopenharmony_ci lvx $sb9u, r12, r9 432e1051a39Sopenharmony_ci li r9, 0x1e0 433e1051a39Sopenharmony_ci lvx $sb9t, r12, r8 434e1051a39Sopenharmony_ci li r8, 0x1f0 435e1051a39Sopenharmony_ci lvx $sbdu, r12, r11 436e1051a39Sopenharmony_ci li r11, 0x200 437e1051a39Sopenharmony_ci lvx $sbdt, r12, r10 438e1051a39Sopenharmony_ci li r10, 0x210 439e1051a39Sopenharmony_ci lvx $sbbu, r12, r9 440e1051a39Sopenharmony_ci lvx $sbbt, r12, r8 441e1051a39Sopenharmony_ci lvx $sbeu, r12, r11 442e1051a39Sopenharmony_ci lvx $sbet, r12, r10 443e1051a39Sopenharmony_ci blr 444e1051a39Sopenharmony_ci .long 0 445e1051a39Sopenharmony_ci .byte 0,12,0x14,0,0,0,0,0 446e1051a39Sopenharmony_ci 447e1051a39Sopenharmony_ci## 448e1051a39Sopenharmony_ci## Decryption core 449e1051a39Sopenharmony_ci## 450e1051a39Sopenharmony_ci## Same API as encryption core. 451e1051a39Sopenharmony_ci## 452e1051a39Sopenharmony_ci.align 4 453e1051a39Sopenharmony_ci_vpaes_decrypt_core: 454e1051a39Sopenharmony_ci lwz r8, 240($key) # pull rounds 455e1051a39Sopenharmony_ci li r9, 16 456e1051a39Sopenharmony_ci lvx v5, 0, $key # vmovdqu (%r9), %xmm4 # round0 key 457e1051a39Sopenharmony_ci li r11, 0x30 458e1051a39Sopenharmony_ci lvx v6, r9, $key 459e1051a39Sopenharmony_ci addi r9, r9, 16 460e1051a39Sopenharmony_ci ?vperm v5, v5, v6, $keyperm # align round key 461e1051a39Sopenharmony_ci vsrb v1, v0, v8 # vpsrlb \$4, %xmm0, %xmm0 462e1051a39Sopenharmony_ci vperm v0, $iptlo, $iptlo, v0 # vpshufb %xmm1, %xmm2, %xmm2 463e1051a39Sopenharmony_ci vperm v1, $ipthi, $ipthi, v1 # vpshufb %xmm0, %xmm1, %xmm0 464e1051a39Sopenharmony_ci vxor v0, v0, v5 # vpxor %xmm4, %xmm2, %xmm2 465e1051a39Sopenharmony_ci vxor v0, v0, v1 # vpxor %xmm2, %xmm0, %xmm0 466e1051a39Sopenharmony_ci mtctr r8 467e1051a39Sopenharmony_ci b Ldec_entry 468e1051a39Sopenharmony_ci 469e1051a39Sopenharmony_ci.align 4 470e1051a39Sopenharmony_ciLdec_loop: 471e1051a39Sopenharmony_ci# 472e1051a39Sopenharmony_ci# Inverse mix columns 473e1051a39Sopenharmony_ci# 474e1051a39Sopenharmony_ci lvx v0, r12, r11 # v5 and v0 are flipped 475e1051a39Sopenharmony_ci # vmovdqa -0x20(%r10),%xmm4 # 4 : sb9u 476e1051a39Sopenharmony_ci # vmovdqa -0x10(%r10),%xmm1 # 0 : sb9t 477e1051a39Sopenharmony_ci vperm v4, $sb9u, v7, v2 # vpshufb %xmm2, %xmm4, %xmm4 # 4 = sb9u 478e1051a39Sopenharmony_ci subi r11, r11, 16 479e1051a39Sopenharmony_ci vperm v1, $sb9t, v7, v3 # vpshufb %xmm3, %xmm1, %xmm1 # 0 = sb9t 480e1051a39Sopenharmony_ci andi. r11, r11, 0x30 481e1051a39Sopenharmony_ci vxor v5, v5, v4 # vpxor %xmm4, %xmm0, %xmm0 482e1051a39Sopenharmony_ci # vmovdqa 0x00(%r10),%xmm4 # 4 : sbdu 483e1051a39Sopenharmony_ci vxor v5, v5, v1 # vpxor %xmm1, %xmm0, %xmm0 # 0 = ch 484e1051a39Sopenharmony_ci # vmovdqa 0x10(%r10),%xmm1 # 0 : sbdt 485e1051a39Sopenharmony_ci 486e1051a39Sopenharmony_ci vperm v4, $sbdu, v7, v2 # vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbdu 487e1051a39Sopenharmony_ci vperm v5, v5, v7, v0 # vpshufb %xmm5, %xmm0, %xmm0 # MC ch 488e1051a39Sopenharmony_ci vperm v1, $sbdt, v7, v3 # vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbdt 489e1051a39Sopenharmony_ci vxor v5, v5, v4 # vpxor %xmm4, %xmm0, %xmm0 # 4 = ch 490e1051a39Sopenharmony_ci # vmovdqa 0x20(%r10), %xmm4 # 4 : sbbu 491e1051a39Sopenharmony_ci vxor v5, v5, v1 # vpxor %xmm1, %xmm0, %xmm0 # 0 = ch 492e1051a39Sopenharmony_ci # vmovdqa 0x30(%r10), %xmm1 # 0 : sbbt 493e1051a39Sopenharmony_ci 494e1051a39Sopenharmony_ci vperm v4, $sbbu, v7, v2 # vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbbu 495e1051a39Sopenharmony_ci vperm v5, v5, v7, v0 # vpshufb %xmm5, %xmm0, %xmm0 # MC ch 496e1051a39Sopenharmony_ci vperm v1, $sbbt, v7, v3 # vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbbt 497e1051a39Sopenharmony_ci vxor v5, v5, v4 # vpxor %xmm4, %xmm0, %xmm0 # 4 = ch 498e1051a39Sopenharmony_ci # vmovdqa 0x40(%r10), %xmm4 # 4 : sbeu 499e1051a39Sopenharmony_ci vxor v5, v5, v1 # vpxor %xmm1, %xmm0, %xmm0 # 0 = ch 500e1051a39Sopenharmony_ci # vmovdqa 0x50(%r10), %xmm1 # 0 : sbet 501e1051a39Sopenharmony_ci 502e1051a39Sopenharmony_ci vperm v4, $sbeu, v7, v2 # vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbeu 503e1051a39Sopenharmony_ci vperm v5, v5, v7, v0 # vpshufb %xmm5, %xmm0, %xmm0 # MC ch 504e1051a39Sopenharmony_ci vperm v1, $sbet, v7, v3 # vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbet 505e1051a39Sopenharmony_ci vxor v0, v5, v4 # vpxor %xmm4, %xmm0, %xmm0 # 4 = ch 506e1051a39Sopenharmony_ci vxor v0, v0, v1 # vpxor %xmm1, %xmm0, %xmm0 # 0 = ch 507e1051a39Sopenharmony_ci 508e1051a39Sopenharmony_ciLdec_entry: 509e1051a39Sopenharmony_ci # top of round 510e1051a39Sopenharmony_ci vsrb v1, v0, v8 # vpsrlb \$4, %xmm0, %xmm0 # 1 = i 511e1051a39Sopenharmony_ci vperm v2, $invhi, $invhi, v0 # vpshufb %xmm1, %xmm11, %xmm2 # 2 = a/k 512e1051a39Sopenharmony_ci vxor v0, v0, v1 # vpxor %xmm0, %xmm1, %xmm1 # 0 = j 513e1051a39Sopenharmony_ci vperm v3, $invlo, $invlo, v1 # vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i 514e1051a39Sopenharmony_ci vperm v4, $invlo, $invlo, v0 # vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j 515e1051a39Sopenharmony_ci vand v0, v0, v9 516e1051a39Sopenharmony_ci vxor v3, v3, v2 # vpxor %xmm2, %xmm3, %xmm3 # 3 = iak = 1/i + a/k 517e1051a39Sopenharmony_ci vxor v4, v4, v2 # vpxor %xmm2, %xmm4, %xmm4 # 4 = jak = 1/j + a/k 518e1051a39Sopenharmony_ci vperm v2, $invlo, v7, v3 # vpshufb %xmm3, %xmm10, %xmm2 # 2 = 1/iak 519e1051a39Sopenharmony_ci vmr v5, v6 520e1051a39Sopenharmony_ci lvx v6, r9, $key # vmovdqu (%r9), %xmm0 521e1051a39Sopenharmony_ci vperm v3, $invlo, v7, v4 # vpshufb %xmm4, %xmm10, %xmm3 # 3 = 1/jak 522e1051a39Sopenharmony_ci addi r9, r9, 16 523e1051a39Sopenharmony_ci vxor v2, v2, v0 # vpxor %xmm1, %xmm2, %xmm2 # 2 = io 524e1051a39Sopenharmony_ci ?vperm v5, v5, v6, $keyperm # align round key 525e1051a39Sopenharmony_ci vxor v3, v3, v1 # vpxor %xmm0, %xmm3, %xmm3 # 3 = jo 526e1051a39Sopenharmony_ci bdnz Ldec_loop 527e1051a39Sopenharmony_ci 528e1051a39Sopenharmony_ci # middle of last round 529e1051a39Sopenharmony_ci addi r10, r11, 0x80 530e1051a39Sopenharmony_ci # vmovdqa 0x60(%r10), %xmm4 # 3 : sbou 531e1051a39Sopenharmony_ci vperm v4, $sbou, v7, v2 # vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbou 532e1051a39Sopenharmony_ci # vmovdqa 0x70(%r10), %xmm1 # 0 : sbot 533e1051a39Sopenharmony_ci lvx v2, r12, r10 # vmovdqa -0x160(%r11), %xmm2 # .Lk_sr-.Lk_dsbd=-0x160 534e1051a39Sopenharmony_ci vperm v1, $sbot, v7, v3 # vpshufb %xmm3, %xmm1, %xmm1 # 0 = sb1t 535e1051a39Sopenharmony_ci vxor v4, v4, v5 # vpxor %xmm0, %xmm4, %xmm4 # 4 = sb1u + k 536e1051a39Sopenharmony_ci vxor v0, v1, v4 # vpxor %xmm4, %xmm1, %xmm0 # 0 = A 537e1051a39Sopenharmony_ci vperm v0, v0, v7, v2 # vpshufb %xmm2, %xmm0, %xmm0 538e1051a39Sopenharmony_ci blr 539e1051a39Sopenharmony_ci .long 0 540e1051a39Sopenharmony_ci .byte 0,12,0x14,0,0,0,0,0 541e1051a39Sopenharmony_ci 542e1051a39Sopenharmony_ci.globl .vpaes_decrypt 543e1051a39Sopenharmony_ci.align 5 544e1051a39Sopenharmony_ci.vpaes_decrypt: 545e1051a39Sopenharmony_ci $STU $sp,-$FRAME($sp) 546e1051a39Sopenharmony_ci li r10,`15+6*$SIZE_T` 547e1051a39Sopenharmony_ci li r11,`31+6*$SIZE_T` 548e1051a39Sopenharmony_ci mflr r6 549e1051a39Sopenharmony_ci mfspr r7, 256 # save vrsave 550e1051a39Sopenharmony_ci stvx v20,r10,$sp 551e1051a39Sopenharmony_ci addi r10,r10,32 552e1051a39Sopenharmony_ci stvx v21,r11,$sp 553e1051a39Sopenharmony_ci addi r11,r11,32 554e1051a39Sopenharmony_ci stvx v22,r10,$sp 555e1051a39Sopenharmony_ci addi r10,r10,32 556e1051a39Sopenharmony_ci stvx v23,r11,$sp 557e1051a39Sopenharmony_ci addi r11,r11,32 558e1051a39Sopenharmony_ci stvx v24,r10,$sp 559e1051a39Sopenharmony_ci addi r10,r10,32 560e1051a39Sopenharmony_ci stvx v25,r11,$sp 561e1051a39Sopenharmony_ci addi r11,r11,32 562e1051a39Sopenharmony_ci stvx v26,r10,$sp 563e1051a39Sopenharmony_ci addi r10,r10,32 564e1051a39Sopenharmony_ci stvx v27,r11,$sp 565e1051a39Sopenharmony_ci addi r11,r11,32 566e1051a39Sopenharmony_ci stvx v28,r10,$sp 567e1051a39Sopenharmony_ci addi r10,r10,32 568e1051a39Sopenharmony_ci stvx v29,r11,$sp 569e1051a39Sopenharmony_ci addi r11,r11,32 570e1051a39Sopenharmony_ci stvx v30,r10,$sp 571e1051a39Sopenharmony_ci stvx v31,r11,$sp 572e1051a39Sopenharmony_ci stw r7,`$FRAME-4`($sp) # save vrsave 573e1051a39Sopenharmony_ci li r0, -1 574e1051a39Sopenharmony_ci $PUSH r6,`$FRAME+$LRSAVE`($sp) 575e1051a39Sopenharmony_ci mtspr 256, r0 # preserve all AltiVec registers 576e1051a39Sopenharmony_ci 577e1051a39Sopenharmony_ci bl _vpaes_decrypt_preheat 578e1051a39Sopenharmony_ci 579e1051a39Sopenharmony_ci ?lvsl $inpperm, 0, $inp # prepare for unaligned access 580e1051a39Sopenharmony_ci lvx v0, 0, $inp 581e1051a39Sopenharmony_ci addi $inp, $inp, 15 # 15 is not a typo 582e1051a39Sopenharmony_ci ?lvsr $outperm, 0, $out 583e1051a39Sopenharmony_ci ?lvsl $keyperm, 0, $key 584e1051a39Sopenharmony_ci lvx $inptail, 0, $inp # redundant in aligned case 585e1051a39Sopenharmony_ci ?vperm v0, v0, $inptail, $inpperm 586e1051a39Sopenharmony_ci 587e1051a39Sopenharmony_ci bl _vpaes_decrypt_core 588e1051a39Sopenharmony_ci 589e1051a39Sopenharmony_ci andi. r8, $out, 15 590e1051a39Sopenharmony_ci li r9, 16 591e1051a39Sopenharmony_ci beq Ldec_out_aligned 592e1051a39Sopenharmony_ci 593e1051a39Sopenharmony_ci vperm v0, v0, v0, $outperm # rotate right/left 594e1051a39Sopenharmony_ci mtctr r9 595e1051a39Sopenharmony_ciLdec_out_unaligned: 596e1051a39Sopenharmony_ci stvebx v0, 0, $out 597e1051a39Sopenharmony_ci addi $out, $out, 1 598e1051a39Sopenharmony_ci bdnz Ldec_out_unaligned 599e1051a39Sopenharmony_ci b Ldec_done 600e1051a39Sopenharmony_ci 601e1051a39Sopenharmony_ci.align 4 602e1051a39Sopenharmony_ciLdec_out_aligned: 603e1051a39Sopenharmony_ci stvx v0, 0, $out 604e1051a39Sopenharmony_ciLdec_done: 605e1051a39Sopenharmony_ci 606e1051a39Sopenharmony_ci li r10,`15+6*$SIZE_T` 607e1051a39Sopenharmony_ci li r11,`31+6*$SIZE_T` 608e1051a39Sopenharmony_ci mtlr r6 609e1051a39Sopenharmony_ci mtspr 256, r7 # restore vrsave 610e1051a39Sopenharmony_ci lvx v20,r10,$sp 611e1051a39Sopenharmony_ci addi r10,r10,32 612e1051a39Sopenharmony_ci lvx v21,r11,$sp 613e1051a39Sopenharmony_ci addi r11,r11,32 614e1051a39Sopenharmony_ci lvx v22,r10,$sp 615e1051a39Sopenharmony_ci addi r10,r10,32 616e1051a39Sopenharmony_ci lvx v23,r11,$sp 617e1051a39Sopenharmony_ci addi r11,r11,32 618e1051a39Sopenharmony_ci lvx v24,r10,$sp 619e1051a39Sopenharmony_ci addi r10,r10,32 620e1051a39Sopenharmony_ci lvx v25,r11,$sp 621e1051a39Sopenharmony_ci addi r11,r11,32 622e1051a39Sopenharmony_ci lvx v26,r10,$sp 623e1051a39Sopenharmony_ci addi r10,r10,32 624e1051a39Sopenharmony_ci lvx v27,r11,$sp 625e1051a39Sopenharmony_ci addi r11,r11,32 626e1051a39Sopenharmony_ci lvx v28,r10,$sp 627e1051a39Sopenharmony_ci addi r10,r10,32 628e1051a39Sopenharmony_ci lvx v29,r11,$sp 629e1051a39Sopenharmony_ci addi r11,r11,32 630e1051a39Sopenharmony_ci lvx v30,r10,$sp 631e1051a39Sopenharmony_ci lvx v31,r11,$sp 632e1051a39Sopenharmony_ci addi $sp,$sp,$FRAME 633e1051a39Sopenharmony_ci blr 634e1051a39Sopenharmony_ci .long 0 635e1051a39Sopenharmony_ci .byte 0,12,0x04,1,0x80,0,3,0 636e1051a39Sopenharmony_ci .long 0 637e1051a39Sopenharmony_ci.size .vpaes_decrypt,.-.vpaes_decrypt 638e1051a39Sopenharmony_ci 639e1051a39Sopenharmony_ci.globl .vpaes_cbc_encrypt 640e1051a39Sopenharmony_ci.align 5 641e1051a39Sopenharmony_ci.vpaes_cbc_encrypt: 642e1051a39Sopenharmony_ci ${UCMP}i r5,16 643e1051a39Sopenharmony_ci bltlr- 644e1051a39Sopenharmony_ci 645e1051a39Sopenharmony_ci $STU $sp,-`($FRAME+2*$SIZE_T)`($sp) 646e1051a39Sopenharmony_ci mflr r0 647e1051a39Sopenharmony_ci li r10,`15+6*$SIZE_T` 648e1051a39Sopenharmony_ci li r11,`31+6*$SIZE_T` 649e1051a39Sopenharmony_ci mfspr r12, 256 650e1051a39Sopenharmony_ci stvx v20,r10,$sp 651e1051a39Sopenharmony_ci addi r10,r10,32 652e1051a39Sopenharmony_ci stvx v21,r11,$sp 653e1051a39Sopenharmony_ci addi r11,r11,32 654e1051a39Sopenharmony_ci stvx v22,r10,$sp 655e1051a39Sopenharmony_ci addi r10,r10,32 656e1051a39Sopenharmony_ci stvx v23,r11,$sp 657e1051a39Sopenharmony_ci addi r11,r11,32 658e1051a39Sopenharmony_ci stvx v24,r10,$sp 659e1051a39Sopenharmony_ci addi r10,r10,32 660e1051a39Sopenharmony_ci stvx v25,r11,$sp 661e1051a39Sopenharmony_ci addi r11,r11,32 662e1051a39Sopenharmony_ci stvx v26,r10,$sp 663e1051a39Sopenharmony_ci addi r10,r10,32 664e1051a39Sopenharmony_ci stvx v27,r11,$sp 665e1051a39Sopenharmony_ci addi r11,r11,32 666e1051a39Sopenharmony_ci stvx v28,r10,$sp 667e1051a39Sopenharmony_ci addi r10,r10,32 668e1051a39Sopenharmony_ci stvx v29,r11,$sp 669e1051a39Sopenharmony_ci addi r11,r11,32 670e1051a39Sopenharmony_ci stvx v30,r10,$sp 671e1051a39Sopenharmony_ci stvx v31,r11,$sp 672e1051a39Sopenharmony_ci stw r12,`$FRAME-4`($sp) # save vrsave 673e1051a39Sopenharmony_ci $PUSH r30,`$FRAME+$SIZE_T*0`($sp) 674e1051a39Sopenharmony_ci $PUSH r31,`$FRAME+$SIZE_T*1`($sp) 675e1051a39Sopenharmony_ci li r9, -16 676e1051a39Sopenharmony_ci $PUSH r0, `$FRAME+$SIZE_T*2+$LRSAVE`($sp) 677e1051a39Sopenharmony_ci 678e1051a39Sopenharmony_ci and r30, r5, r9 # copy length&-16 679e1051a39Sopenharmony_ci andi. r9, $out, 15 # is $out aligned? 680e1051a39Sopenharmony_ci mr r5, r6 # copy pointer to key 681e1051a39Sopenharmony_ci mr r31, r7 # copy pointer to iv 682e1051a39Sopenharmony_ci li r6, -1 683e1051a39Sopenharmony_ci mcrf cr1, cr0 # put aside $out alignment flag 684e1051a39Sopenharmony_ci mr r7, r12 # copy vrsave 685e1051a39Sopenharmony_ci mtspr 256, r6 # preserve all AltiVec registers 686e1051a39Sopenharmony_ci 687e1051a39Sopenharmony_ci lvx v24, 0, r31 # load [potentially unaligned] iv 688e1051a39Sopenharmony_ci li r9, 15 689e1051a39Sopenharmony_ci ?lvsl $inpperm, 0, r31 690e1051a39Sopenharmony_ci lvx v25, r9, r31 691e1051a39Sopenharmony_ci ?vperm v24, v24, v25, $inpperm 692e1051a39Sopenharmony_ci 693e1051a39Sopenharmony_ci cmpwi r8, 0 # test direction 694e1051a39Sopenharmony_ci neg r8, $inp # prepare for unaligned access 695e1051a39Sopenharmony_ci vxor v7, v7, v7 696e1051a39Sopenharmony_ci ?lvsl $keyperm, 0, $key 697e1051a39Sopenharmony_ci ?lvsr $outperm, 0, $out 698e1051a39Sopenharmony_ci ?lvsr $inpperm, 0, r8 # -$inp 699e1051a39Sopenharmony_ci vnor $outmask, v7, v7 # 0xff..ff 700e1051a39Sopenharmony_ci lvx $inptail, 0, $inp 701e1051a39Sopenharmony_ci ?vperm $outmask, v7, $outmask, $outperm 702e1051a39Sopenharmony_ci addi $inp, $inp, 15 # 15 is not a typo 703e1051a39Sopenharmony_ci 704e1051a39Sopenharmony_ci beq Lcbc_decrypt 705e1051a39Sopenharmony_ci 706e1051a39Sopenharmony_ci bl _vpaes_encrypt_preheat 707e1051a39Sopenharmony_ci li r0, 16 708e1051a39Sopenharmony_ci 709e1051a39Sopenharmony_ci beq cr1, Lcbc_enc_loop # $out is aligned 710e1051a39Sopenharmony_ci 711e1051a39Sopenharmony_ci vmr v0, $inptail 712e1051a39Sopenharmony_ci lvx $inptail, 0, $inp 713e1051a39Sopenharmony_ci addi $inp, $inp, 16 714e1051a39Sopenharmony_ci ?vperm v0, v0, $inptail, $inpperm 715e1051a39Sopenharmony_ci vxor v0, v0, v24 # ^= iv 716e1051a39Sopenharmony_ci 717e1051a39Sopenharmony_ci bl _vpaes_encrypt_core 718e1051a39Sopenharmony_ci 719e1051a39Sopenharmony_ci andi. r8, $out, 15 720e1051a39Sopenharmony_ci vmr v24, v0 # put aside iv 721e1051a39Sopenharmony_ci sub r9, $out, r8 722e1051a39Sopenharmony_ci vperm $outhead, v0, v0, $outperm # rotate right/left 723e1051a39Sopenharmony_ci 724e1051a39Sopenharmony_ciLcbc_enc_head: 725e1051a39Sopenharmony_ci stvebx $outhead, r8, r9 726e1051a39Sopenharmony_ci cmpwi r8, 15 727e1051a39Sopenharmony_ci addi r8, r8, 1 728e1051a39Sopenharmony_ci bne Lcbc_enc_head 729e1051a39Sopenharmony_ci 730e1051a39Sopenharmony_ci sub. r30, r30, r0 # len -= 16 731e1051a39Sopenharmony_ci addi $out, $out, 16 732e1051a39Sopenharmony_ci beq Lcbc_unaligned_done 733e1051a39Sopenharmony_ci 734e1051a39Sopenharmony_ciLcbc_enc_loop: 735e1051a39Sopenharmony_ci vmr v0, $inptail 736e1051a39Sopenharmony_ci lvx $inptail, 0, $inp 737e1051a39Sopenharmony_ci addi $inp, $inp, 16 738e1051a39Sopenharmony_ci ?vperm v0, v0, $inptail, $inpperm 739e1051a39Sopenharmony_ci vxor v0, v0, v24 # ^= iv 740e1051a39Sopenharmony_ci 741e1051a39Sopenharmony_ci bl _vpaes_encrypt_core 742e1051a39Sopenharmony_ci 743e1051a39Sopenharmony_ci vmr v24, v0 # put aside iv 744e1051a39Sopenharmony_ci sub. r30, r30, r0 # len -= 16 745e1051a39Sopenharmony_ci vperm v0, v0, v0, $outperm # rotate right/left 746e1051a39Sopenharmony_ci vsel v1, $outhead, v0, $outmask 747e1051a39Sopenharmony_ci vmr $outhead, v0 748e1051a39Sopenharmony_ci stvx v1, 0, $out 749e1051a39Sopenharmony_ci addi $out, $out, 16 750e1051a39Sopenharmony_ci bne Lcbc_enc_loop 751e1051a39Sopenharmony_ci 752e1051a39Sopenharmony_ci b Lcbc_done 753e1051a39Sopenharmony_ci 754e1051a39Sopenharmony_ci.align 5 755e1051a39Sopenharmony_ciLcbc_decrypt: 756e1051a39Sopenharmony_ci bl _vpaes_decrypt_preheat 757e1051a39Sopenharmony_ci li r0, 16 758e1051a39Sopenharmony_ci 759e1051a39Sopenharmony_ci beq cr1, Lcbc_dec_loop # $out is aligned 760e1051a39Sopenharmony_ci 761e1051a39Sopenharmony_ci vmr v0, $inptail 762e1051a39Sopenharmony_ci lvx $inptail, 0, $inp 763e1051a39Sopenharmony_ci addi $inp, $inp, 16 764e1051a39Sopenharmony_ci ?vperm v0, v0, $inptail, $inpperm 765e1051a39Sopenharmony_ci vmr v25, v0 # put aside input 766e1051a39Sopenharmony_ci 767e1051a39Sopenharmony_ci bl _vpaes_decrypt_core 768e1051a39Sopenharmony_ci 769e1051a39Sopenharmony_ci andi. r8, $out, 15 770e1051a39Sopenharmony_ci vxor v0, v0, v24 # ^= iv 771e1051a39Sopenharmony_ci vmr v24, v25 772e1051a39Sopenharmony_ci sub r9, $out, r8 773e1051a39Sopenharmony_ci vperm $outhead, v0, v0, $outperm # rotate right/left 774e1051a39Sopenharmony_ci 775e1051a39Sopenharmony_ciLcbc_dec_head: 776e1051a39Sopenharmony_ci stvebx $outhead, r8, r9 777e1051a39Sopenharmony_ci cmpwi r8, 15 778e1051a39Sopenharmony_ci addi r8, r8, 1 779e1051a39Sopenharmony_ci bne Lcbc_dec_head 780e1051a39Sopenharmony_ci 781e1051a39Sopenharmony_ci sub. r30, r30, r0 # len -= 16 782e1051a39Sopenharmony_ci addi $out, $out, 16 783e1051a39Sopenharmony_ci beq Lcbc_unaligned_done 784e1051a39Sopenharmony_ci 785e1051a39Sopenharmony_ciLcbc_dec_loop: 786e1051a39Sopenharmony_ci vmr v0, $inptail 787e1051a39Sopenharmony_ci lvx $inptail, 0, $inp 788e1051a39Sopenharmony_ci addi $inp, $inp, 16 789e1051a39Sopenharmony_ci ?vperm v0, v0, $inptail, $inpperm 790e1051a39Sopenharmony_ci vmr v25, v0 # put aside input 791e1051a39Sopenharmony_ci 792e1051a39Sopenharmony_ci bl _vpaes_decrypt_core 793e1051a39Sopenharmony_ci 794e1051a39Sopenharmony_ci vxor v0, v0, v24 # ^= iv 795e1051a39Sopenharmony_ci vmr v24, v25 796e1051a39Sopenharmony_ci sub. r30, r30, r0 # len -= 16 797e1051a39Sopenharmony_ci vperm v0, v0, v0, $outperm # rotate right/left 798e1051a39Sopenharmony_ci vsel v1, $outhead, v0, $outmask 799e1051a39Sopenharmony_ci vmr $outhead, v0 800e1051a39Sopenharmony_ci stvx v1, 0, $out 801e1051a39Sopenharmony_ci addi $out, $out, 16 802e1051a39Sopenharmony_ci bne Lcbc_dec_loop 803e1051a39Sopenharmony_ci 804e1051a39Sopenharmony_ciLcbc_done: 805e1051a39Sopenharmony_ci beq cr1, Lcbc_write_iv # $out is aligned 806e1051a39Sopenharmony_ci 807e1051a39Sopenharmony_ciLcbc_unaligned_done: 808e1051a39Sopenharmony_ci andi. r8, $out, 15 809e1051a39Sopenharmony_ci sub $out, $out, r8 810e1051a39Sopenharmony_ci li r9, 0 811e1051a39Sopenharmony_ciLcbc_tail: 812e1051a39Sopenharmony_ci stvebx $outhead, r9, $out 813e1051a39Sopenharmony_ci addi r9, r9, 1 814e1051a39Sopenharmony_ci cmpw r9, r8 815e1051a39Sopenharmony_ci bne Lcbc_tail 816e1051a39Sopenharmony_ci 817e1051a39Sopenharmony_ciLcbc_write_iv: 818e1051a39Sopenharmony_ci neg r8, r31 # write [potentially unaligned] iv 819e1051a39Sopenharmony_ci li r10, 4 820e1051a39Sopenharmony_ci ?lvsl $outperm, 0, r8 821e1051a39Sopenharmony_ci li r11, 8 822e1051a39Sopenharmony_ci li r12, 12 823e1051a39Sopenharmony_ci vperm v24, v24, v24, $outperm # rotate right/left 824e1051a39Sopenharmony_ci stvewx v24, 0, r31 # ivp is at least 32-bit aligned 825e1051a39Sopenharmony_ci stvewx v24, r10, r31 826e1051a39Sopenharmony_ci stvewx v24, r11, r31 827e1051a39Sopenharmony_ci stvewx v24, r12, r31 828e1051a39Sopenharmony_ci 829e1051a39Sopenharmony_ci mtspr 256, r7 # restore vrsave 830e1051a39Sopenharmony_ci li r10,`15+6*$SIZE_T` 831e1051a39Sopenharmony_ci li r11,`31+6*$SIZE_T` 832e1051a39Sopenharmony_ci lvx v20,r10,$sp 833e1051a39Sopenharmony_ci addi r10,r10,32 834e1051a39Sopenharmony_ci lvx v21,r11,$sp 835e1051a39Sopenharmony_ci addi r11,r11,32 836e1051a39Sopenharmony_ci lvx v22,r10,$sp 837e1051a39Sopenharmony_ci addi r10,r10,32 838e1051a39Sopenharmony_ci lvx v23,r11,$sp 839e1051a39Sopenharmony_ci addi r11,r11,32 840e1051a39Sopenharmony_ci lvx v24,r10,$sp 841e1051a39Sopenharmony_ci addi r10,r10,32 842e1051a39Sopenharmony_ci lvx v25,r11,$sp 843e1051a39Sopenharmony_ci addi r11,r11,32 844e1051a39Sopenharmony_ci lvx v26,r10,$sp 845e1051a39Sopenharmony_ci addi r10,r10,32 846e1051a39Sopenharmony_ci lvx v27,r11,$sp 847e1051a39Sopenharmony_ci addi r11,r11,32 848e1051a39Sopenharmony_ci lvx v28,r10,$sp 849e1051a39Sopenharmony_ci addi r10,r10,32 850e1051a39Sopenharmony_ci lvx v29,r11,$sp 851e1051a39Sopenharmony_ci addi r11,r11,32 852e1051a39Sopenharmony_ci lvx v30,r10,$sp 853e1051a39Sopenharmony_ci lvx v31,r11,$sp 854e1051a39Sopenharmony_ciLcbc_abort: 855e1051a39Sopenharmony_ci $POP r0, `$FRAME+$SIZE_T*2+$LRSAVE`($sp) 856e1051a39Sopenharmony_ci $POP r30,`$FRAME+$SIZE_T*0`($sp) 857e1051a39Sopenharmony_ci $POP r31,`$FRAME+$SIZE_T*1`($sp) 858e1051a39Sopenharmony_ci mtlr r0 859e1051a39Sopenharmony_ci addi $sp,$sp,`$FRAME+$SIZE_T*2` 860e1051a39Sopenharmony_ci blr 861e1051a39Sopenharmony_ci .long 0 862e1051a39Sopenharmony_ci .byte 0,12,0x04,1,0x80,2,6,0 863e1051a39Sopenharmony_ci .long 0 864e1051a39Sopenharmony_ci.size .vpaes_cbc_encrypt,.-.vpaes_cbc_encrypt 865e1051a39Sopenharmony_ci___ 866e1051a39Sopenharmony_ci} 867e1051a39Sopenharmony_ci{ 868e1051a39Sopenharmony_cimy ($inp,$bits,$out)=map("r$_",(3..5)); 869e1051a39Sopenharmony_cimy $dir="cr1"; 870e1051a39Sopenharmony_cimy ($invlo,$invhi,$iptlo,$ipthi,$rcon) = map("v$_",(10..13,24)); 871e1051a39Sopenharmony_ci 872e1051a39Sopenharmony_ci$code.=<<___; 873e1051a39Sopenharmony_ci######################################################## 874e1051a39Sopenharmony_ci## ## 875e1051a39Sopenharmony_ci## AES key schedule ## 876e1051a39Sopenharmony_ci## ## 877e1051a39Sopenharmony_ci######################################################## 878e1051a39Sopenharmony_ci.align 4 879e1051a39Sopenharmony_ci_vpaes_key_preheat: 880e1051a39Sopenharmony_ci mflr r8 881e1051a39Sopenharmony_ci bl Lconsts 882e1051a39Sopenharmony_ci mtlr r8 883e1051a39Sopenharmony_ci li r11, 0xc0 # Lk_inv 884e1051a39Sopenharmony_ci li r10, 0xd0 885e1051a39Sopenharmony_ci li r9, 0xe0 # L_ipt 886e1051a39Sopenharmony_ci li r8, 0xf0 887e1051a39Sopenharmony_ci 888e1051a39Sopenharmony_ci vspltisb v8,4 # 0x04..04 889e1051a39Sopenharmony_ci vxor v9,v9,v9 # 0x00..00 890e1051a39Sopenharmony_ci lvx $invlo, r12, r11 # Lk_inv 891e1051a39Sopenharmony_ci li r11, 0x120 892e1051a39Sopenharmony_ci lvx $invhi, r12, r10 893e1051a39Sopenharmony_ci li r10, 0x130 894e1051a39Sopenharmony_ci lvx $iptlo, r12, r9 # Lk_ipt 895e1051a39Sopenharmony_ci li r9, 0x220 896e1051a39Sopenharmony_ci lvx $ipthi, r12, r8 897e1051a39Sopenharmony_ci li r8, 0x230 898e1051a39Sopenharmony_ci 899e1051a39Sopenharmony_ci lvx v14, r12, r11 # Lk_sb1 900e1051a39Sopenharmony_ci li r11, 0x240 901e1051a39Sopenharmony_ci lvx v15, r12, r10 902e1051a39Sopenharmony_ci li r10, 0x250 903e1051a39Sopenharmony_ci 904e1051a39Sopenharmony_ci lvx v16, r12, r9 # Lk_dksd 905e1051a39Sopenharmony_ci li r9, 0x260 906e1051a39Sopenharmony_ci lvx v17, r12, r8 907e1051a39Sopenharmony_ci li r8, 0x270 908e1051a39Sopenharmony_ci lvx v18, r12, r11 # Lk_dksb 909e1051a39Sopenharmony_ci li r11, 0x280 910e1051a39Sopenharmony_ci lvx v19, r12, r10 911e1051a39Sopenharmony_ci li r10, 0x290 912e1051a39Sopenharmony_ci lvx v20, r12, r9 # Lk_dkse 913e1051a39Sopenharmony_ci li r9, 0x2a0 914e1051a39Sopenharmony_ci lvx v21, r12, r8 915e1051a39Sopenharmony_ci li r8, 0x2b0 916e1051a39Sopenharmony_ci lvx v22, r12, r11 # Lk_dks9 917e1051a39Sopenharmony_ci lvx v23, r12, r10 918e1051a39Sopenharmony_ci 919e1051a39Sopenharmony_ci lvx v24, r12, r9 # Lk_rcon 920e1051a39Sopenharmony_ci lvx v25, 0, r12 # Lk_mc_forward[0] 921e1051a39Sopenharmony_ci lvx v26, r12, r8 # Lks63 922e1051a39Sopenharmony_ci blr 923e1051a39Sopenharmony_ci .long 0 924e1051a39Sopenharmony_ci .byte 0,12,0x14,0,0,0,0,0 925e1051a39Sopenharmony_ci 926e1051a39Sopenharmony_ci.align 4 927e1051a39Sopenharmony_ci_vpaes_schedule_core: 928e1051a39Sopenharmony_ci mflr r7 929e1051a39Sopenharmony_ci 930e1051a39Sopenharmony_ci bl _vpaes_key_preheat # load the tables 931e1051a39Sopenharmony_ci 932e1051a39Sopenharmony_ci #lvx v0, 0, $inp # vmovdqu (%rdi), %xmm0 # load key (unaligned) 933e1051a39Sopenharmony_ci neg r8, $inp # prepare for unaligned access 934e1051a39Sopenharmony_ci lvx v0, 0, $inp 935e1051a39Sopenharmony_ci addi $inp, $inp, 15 # 15 is not typo 936e1051a39Sopenharmony_ci ?lvsr $inpperm, 0, r8 # -$inp 937e1051a39Sopenharmony_ci lvx v6, 0, $inp # v6 serves as inptail 938e1051a39Sopenharmony_ci addi $inp, $inp, 8 939e1051a39Sopenharmony_ci ?vperm v0, v0, v6, $inpperm 940e1051a39Sopenharmony_ci 941e1051a39Sopenharmony_ci # input transform 942e1051a39Sopenharmony_ci vmr v3, v0 # vmovdqa %xmm0, %xmm3 943e1051a39Sopenharmony_ci bl _vpaes_schedule_transform 944e1051a39Sopenharmony_ci vmr v7, v0 # vmovdqa %xmm0, %xmm7 945e1051a39Sopenharmony_ci 946e1051a39Sopenharmony_ci bne $dir, Lschedule_am_decrypting 947e1051a39Sopenharmony_ci 948e1051a39Sopenharmony_ci # encrypting, output zeroth round key after transform 949e1051a39Sopenharmony_ci li r8, 0x30 # mov \$0x30,%r8d 950e1051a39Sopenharmony_ci li r9, 4 951e1051a39Sopenharmony_ci li r10, 8 952e1051a39Sopenharmony_ci li r11, 12 953e1051a39Sopenharmony_ci 954e1051a39Sopenharmony_ci ?lvsr $outperm, 0, $out # prepare for unaligned access 955e1051a39Sopenharmony_ci vnor $outmask, v9, v9 # 0xff..ff 956e1051a39Sopenharmony_ci ?vperm $outmask, v9, $outmask, $outperm 957e1051a39Sopenharmony_ci 958e1051a39Sopenharmony_ci #stvx v0, 0, $out # vmovdqu %xmm0, (%rdx) 959e1051a39Sopenharmony_ci vperm $outhead, v0, v0, $outperm # rotate right/left 960e1051a39Sopenharmony_ci stvewx $outhead, 0, $out # some are superfluous 961e1051a39Sopenharmony_ci stvewx $outhead, r9, $out 962e1051a39Sopenharmony_ci stvewx $outhead, r10, $out 963e1051a39Sopenharmony_ci addi r10, r12, 0x80 # lea .Lk_sr(%rip),%r10 964e1051a39Sopenharmony_ci stvewx $outhead, r11, $out 965e1051a39Sopenharmony_ci b Lschedule_go 966e1051a39Sopenharmony_ci 967e1051a39Sopenharmony_ciLschedule_am_decrypting: 968e1051a39Sopenharmony_ci srwi r8, $bits, 1 # shr \$1,%r8d 969e1051a39Sopenharmony_ci andi. r8, r8, 32 # and \$32,%r8d 970e1051a39Sopenharmony_ci xori r8, r8, 32 # xor \$32,%r8d # nbits==192?0:32 971e1051a39Sopenharmony_ci addi r10, r12, 0x80 # lea .Lk_sr(%rip),%r10 972e1051a39Sopenharmony_ci # decrypting, output zeroth round key after shiftrows 973e1051a39Sopenharmony_ci lvx v1, r8, r10 # vmovdqa (%r8,%r10), %xmm1 974e1051a39Sopenharmony_ci li r9, 4 975e1051a39Sopenharmony_ci li r10, 8 976e1051a39Sopenharmony_ci li r11, 12 977e1051a39Sopenharmony_ci vperm v4, v3, v3, v1 # vpshufb %xmm1, %xmm3, %xmm3 978e1051a39Sopenharmony_ci 979e1051a39Sopenharmony_ci neg r0, $out # prepare for unaligned access 980e1051a39Sopenharmony_ci ?lvsl $outperm, 0, r0 981e1051a39Sopenharmony_ci vnor $outmask, v9, v9 # 0xff..ff 982e1051a39Sopenharmony_ci ?vperm $outmask, $outmask, v9, $outperm 983e1051a39Sopenharmony_ci 984e1051a39Sopenharmony_ci #stvx v4, 0, $out # vmovdqu %xmm3, (%rdx) 985e1051a39Sopenharmony_ci vperm $outhead, v4, v4, $outperm # rotate right/left 986e1051a39Sopenharmony_ci stvewx $outhead, 0, $out # some are superfluous 987e1051a39Sopenharmony_ci stvewx $outhead, r9, $out 988e1051a39Sopenharmony_ci stvewx $outhead, r10, $out 989e1051a39Sopenharmony_ci addi r10, r12, 0x80 # lea .Lk_sr(%rip),%r10 990e1051a39Sopenharmony_ci stvewx $outhead, r11, $out 991e1051a39Sopenharmony_ci addi $out, $out, 15 # 15 is not typo 992e1051a39Sopenharmony_ci xori r8, r8, 0x30 # xor \$0x30, %r8 993e1051a39Sopenharmony_ci 994e1051a39Sopenharmony_ciLschedule_go: 995e1051a39Sopenharmony_ci cmplwi $bits, 192 # cmp \$192, %esi 996e1051a39Sopenharmony_ci bgt Lschedule_256 997e1051a39Sopenharmony_ci beq Lschedule_192 998e1051a39Sopenharmony_ci # 128: fall though 999e1051a39Sopenharmony_ci 1000e1051a39Sopenharmony_ci## 1001e1051a39Sopenharmony_ci## .schedule_128 1002e1051a39Sopenharmony_ci## 1003e1051a39Sopenharmony_ci## 128-bit specific part of key schedule. 1004e1051a39Sopenharmony_ci## 1005e1051a39Sopenharmony_ci## This schedule is really simple, because all its parts 1006e1051a39Sopenharmony_ci## are accomplished by the subroutines. 1007e1051a39Sopenharmony_ci## 1008e1051a39Sopenharmony_ciLschedule_128: 1009e1051a39Sopenharmony_ci li r0, 10 # mov \$10, %esi 1010e1051a39Sopenharmony_ci mtctr r0 1011e1051a39Sopenharmony_ci 1012e1051a39Sopenharmony_ciLoop_schedule_128: 1013e1051a39Sopenharmony_ci bl _vpaes_schedule_round 1014e1051a39Sopenharmony_ci bdz Lschedule_mangle_last # dec %esi 1015e1051a39Sopenharmony_ci bl _vpaes_schedule_mangle # write output 1016e1051a39Sopenharmony_ci b Loop_schedule_128 1017e1051a39Sopenharmony_ci 1018e1051a39Sopenharmony_ci## 1019e1051a39Sopenharmony_ci## .aes_schedule_192 1020e1051a39Sopenharmony_ci## 1021e1051a39Sopenharmony_ci## 192-bit specific part of key schedule. 1022e1051a39Sopenharmony_ci## 1023e1051a39Sopenharmony_ci## The main body of this schedule is the same as the 128-bit 1024e1051a39Sopenharmony_ci## schedule, but with more smearing. The long, high side is 1025e1051a39Sopenharmony_ci## stored in %xmm7 as before, and the short, low side is in 1026e1051a39Sopenharmony_ci## the high bits of %xmm6. 1027e1051a39Sopenharmony_ci## 1028e1051a39Sopenharmony_ci## This schedule is somewhat nastier, however, because each 1029e1051a39Sopenharmony_ci## round produces 192 bits of key material, or 1.5 round keys. 1030e1051a39Sopenharmony_ci## Therefore, on each cycle we do 2 rounds and produce 3 round 1031e1051a39Sopenharmony_ci## keys. 1032e1051a39Sopenharmony_ci## 1033e1051a39Sopenharmony_ci.align 4 1034e1051a39Sopenharmony_ciLschedule_192: 1035e1051a39Sopenharmony_ci li r0, 4 # mov \$4, %esi 1036e1051a39Sopenharmony_ci lvx v0, 0, $inp 1037e1051a39Sopenharmony_ci ?vperm v0, v6, v0, $inpperm 1038e1051a39Sopenharmony_ci ?vsldoi v0, v3, v0, 8 # vmovdqu 8(%rdi),%xmm0 # load key part 2 (very unaligned) 1039e1051a39Sopenharmony_ci bl _vpaes_schedule_transform # input transform 1040e1051a39Sopenharmony_ci ?vsldoi v6, v0, v9, 8 1041e1051a39Sopenharmony_ci ?vsldoi v6, v9, v6, 8 # clobber "low" side with zeros 1042e1051a39Sopenharmony_ci mtctr r0 1043e1051a39Sopenharmony_ci 1044e1051a39Sopenharmony_ciLoop_schedule_192: 1045e1051a39Sopenharmony_ci bl _vpaes_schedule_round 1046e1051a39Sopenharmony_ci ?vsldoi v0, v6, v0, 8 # vpalignr \$8,%xmm6,%xmm0,%xmm0 1047e1051a39Sopenharmony_ci bl _vpaes_schedule_mangle # save key n 1048e1051a39Sopenharmony_ci bl _vpaes_schedule_192_smear 1049e1051a39Sopenharmony_ci bl _vpaes_schedule_mangle # save key n+1 1050e1051a39Sopenharmony_ci bl _vpaes_schedule_round 1051e1051a39Sopenharmony_ci bdz Lschedule_mangle_last # dec %esi 1052e1051a39Sopenharmony_ci bl _vpaes_schedule_mangle # save key n+2 1053e1051a39Sopenharmony_ci bl _vpaes_schedule_192_smear 1054e1051a39Sopenharmony_ci b Loop_schedule_192 1055e1051a39Sopenharmony_ci 1056e1051a39Sopenharmony_ci## 1057e1051a39Sopenharmony_ci## .aes_schedule_256 1058e1051a39Sopenharmony_ci## 1059e1051a39Sopenharmony_ci## 256-bit specific part of key schedule. 1060e1051a39Sopenharmony_ci## 1061e1051a39Sopenharmony_ci## The structure here is very similar to the 128-bit 1062e1051a39Sopenharmony_ci## schedule, but with an additional "low side" in 1063e1051a39Sopenharmony_ci## %xmm6. The low side's rounds are the same as the 1064e1051a39Sopenharmony_ci## high side's, except no rcon and no rotation. 1065e1051a39Sopenharmony_ci## 1066e1051a39Sopenharmony_ci.align 4 1067e1051a39Sopenharmony_ciLschedule_256: 1068e1051a39Sopenharmony_ci li r0, 7 # mov \$7, %esi 1069e1051a39Sopenharmony_ci addi $inp, $inp, 8 1070e1051a39Sopenharmony_ci lvx v0, 0, $inp # vmovdqu 16(%rdi),%xmm0 # load key part 2 (unaligned) 1071e1051a39Sopenharmony_ci ?vperm v0, v6, v0, $inpperm 1072e1051a39Sopenharmony_ci bl _vpaes_schedule_transform # input transform 1073e1051a39Sopenharmony_ci mtctr r0 1074e1051a39Sopenharmony_ci 1075e1051a39Sopenharmony_ciLoop_schedule_256: 1076e1051a39Sopenharmony_ci bl _vpaes_schedule_mangle # output low result 1077e1051a39Sopenharmony_ci vmr v6, v0 # vmovdqa %xmm0, %xmm6 # save cur_lo in xmm6 1078e1051a39Sopenharmony_ci 1079e1051a39Sopenharmony_ci # high round 1080e1051a39Sopenharmony_ci bl _vpaes_schedule_round 1081e1051a39Sopenharmony_ci bdz Lschedule_mangle_last # dec %esi 1082e1051a39Sopenharmony_ci bl _vpaes_schedule_mangle 1083e1051a39Sopenharmony_ci 1084e1051a39Sopenharmony_ci # low round. swap xmm7 and xmm6 1085e1051a39Sopenharmony_ci ?vspltw v0, v0, 3 # vpshufd \$0xFF, %xmm0, %xmm0 1086e1051a39Sopenharmony_ci vmr v5, v7 # vmovdqa %xmm7, %xmm5 1087e1051a39Sopenharmony_ci vmr v7, v6 # vmovdqa %xmm6, %xmm7 1088e1051a39Sopenharmony_ci bl _vpaes_schedule_low_round 1089e1051a39Sopenharmony_ci vmr v7, v5 # vmovdqa %xmm5, %xmm7 1090e1051a39Sopenharmony_ci 1091e1051a39Sopenharmony_ci b Loop_schedule_256 1092e1051a39Sopenharmony_ci## 1093e1051a39Sopenharmony_ci## .aes_schedule_mangle_last 1094e1051a39Sopenharmony_ci## 1095e1051a39Sopenharmony_ci## Mangler for last round of key schedule 1096e1051a39Sopenharmony_ci## Mangles %xmm0 1097e1051a39Sopenharmony_ci## when encrypting, outputs out(%xmm0) ^ 63 1098e1051a39Sopenharmony_ci## when decrypting, outputs unskew(%xmm0) 1099e1051a39Sopenharmony_ci## 1100e1051a39Sopenharmony_ci## Always called right before return... jumps to cleanup and exits 1101e1051a39Sopenharmony_ci## 1102e1051a39Sopenharmony_ci.align 4 1103e1051a39Sopenharmony_ciLschedule_mangle_last: 1104e1051a39Sopenharmony_ci # schedule last round key from xmm0 1105e1051a39Sopenharmony_ci li r11, 0x2e0 # lea .Lk_deskew(%rip),%r11 1106e1051a39Sopenharmony_ci li r9, 0x2f0 1107e1051a39Sopenharmony_ci bne $dir, Lschedule_mangle_last_dec 1108e1051a39Sopenharmony_ci 1109e1051a39Sopenharmony_ci # encrypting 1110e1051a39Sopenharmony_ci lvx v1, r8, r10 # vmovdqa (%r8,%r10),%xmm1 1111e1051a39Sopenharmony_ci li r11, 0x2c0 # lea .Lk_opt(%rip), %r11 # prepare to output transform 1112e1051a39Sopenharmony_ci li r9, 0x2d0 # prepare to output transform 1113e1051a39Sopenharmony_ci vperm v0, v0, v0, v1 # vpshufb %xmm1, %xmm0, %xmm0 # output permute 1114e1051a39Sopenharmony_ci 1115e1051a39Sopenharmony_ci lvx $iptlo, r11, r12 # reload $ipt 1116e1051a39Sopenharmony_ci lvx $ipthi, r9, r12 1117e1051a39Sopenharmony_ci addi $out, $out, 16 # add \$16, %rdx 1118e1051a39Sopenharmony_ci vxor v0, v0, v26 # vpxor .Lk_s63(%rip), %xmm0, %xmm0 1119e1051a39Sopenharmony_ci bl _vpaes_schedule_transform # output transform 1120e1051a39Sopenharmony_ci 1121e1051a39Sopenharmony_ci #stvx v0, r0, $out # vmovdqu %xmm0, (%rdx) # save last key 1122e1051a39Sopenharmony_ci vperm v0, v0, v0, $outperm # rotate right/left 1123e1051a39Sopenharmony_ci li r10, 4 1124e1051a39Sopenharmony_ci vsel v2, $outhead, v0, $outmask 1125e1051a39Sopenharmony_ci li r11, 8 1126e1051a39Sopenharmony_ci stvx v2, 0, $out 1127e1051a39Sopenharmony_ci li r12, 12 1128e1051a39Sopenharmony_ci stvewx v0, 0, $out # some (or all) are redundant 1129e1051a39Sopenharmony_ci stvewx v0, r10, $out 1130e1051a39Sopenharmony_ci stvewx v0, r11, $out 1131e1051a39Sopenharmony_ci stvewx v0, r12, $out 1132e1051a39Sopenharmony_ci b Lschedule_mangle_done 1133e1051a39Sopenharmony_ci 1134e1051a39Sopenharmony_ci.align 4 1135e1051a39Sopenharmony_ciLschedule_mangle_last_dec: 1136e1051a39Sopenharmony_ci lvx $iptlo, r11, r12 # reload $ipt 1137e1051a39Sopenharmony_ci lvx $ipthi, r9, r12 1138e1051a39Sopenharmony_ci addi $out, $out, -16 # add \$-16, %rdx 1139e1051a39Sopenharmony_ci vxor v0, v0, v26 # vpxor .Lk_s63(%rip), %xmm0, %xmm0 1140e1051a39Sopenharmony_ci bl _vpaes_schedule_transform # output transform 1141e1051a39Sopenharmony_ci 1142e1051a39Sopenharmony_ci #stvx v0, r0, $out # vmovdqu %xmm0, (%rdx) # save last key 1143e1051a39Sopenharmony_ci addi r9, $out, -15 # -15 is not typo 1144e1051a39Sopenharmony_ci vperm v0, v0, v0, $outperm # rotate right/left 1145e1051a39Sopenharmony_ci li r10, 4 1146e1051a39Sopenharmony_ci vsel v2, $outhead, v0, $outmask 1147e1051a39Sopenharmony_ci li r11, 8 1148e1051a39Sopenharmony_ci stvx v2, 0, $out 1149e1051a39Sopenharmony_ci li r12, 12 1150e1051a39Sopenharmony_ci stvewx v0, 0, r9 # some (or all) are redundant 1151e1051a39Sopenharmony_ci stvewx v0, r10, r9 1152e1051a39Sopenharmony_ci stvewx v0, r11, r9 1153e1051a39Sopenharmony_ci stvewx v0, r12, r9 1154e1051a39Sopenharmony_ci 1155e1051a39Sopenharmony_ci 1156e1051a39Sopenharmony_ciLschedule_mangle_done: 1157e1051a39Sopenharmony_ci mtlr r7 1158e1051a39Sopenharmony_ci # cleanup 1159e1051a39Sopenharmony_ci vxor v0, v0, v0 # vpxor %xmm0, %xmm0, %xmm0 1160e1051a39Sopenharmony_ci vxor v1, v1, v1 # vpxor %xmm1, %xmm1, %xmm1 1161e1051a39Sopenharmony_ci vxor v2, v2, v2 # vpxor %xmm2, %xmm2, %xmm2 1162e1051a39Sopenharmony_ci vxor v3, v3, v3 # vpxor %xmm3, %xmm3, %xmm3 1163e1051a39Sopenharmony_ci vxor v4, v4, v4 # vpxor %xmm4, %xmm4, %xmm4 1164e1051a39Sopenharmony_ci vxor v5, v5, v5 # vpxor %xmm5, %xmm5, %xmm5 1165e1051a39Sopenharmony_ci vxor v6, v6, v6 # vpxor %xmm6, %xmm6, %xmm6 1166e1051a39Sopenharmony_ci vxor v7, v7, v7 # vpxor %xmm7, %xmm7, %xmm7 1167e1051a39Sopenharmony_ci 1168e1051a39Sopenharmony_ci blr 1169e1051a39Sopenharmony_ci .long 0 1170e1051a39Sopenharmony_ci .byte 0,12,0x14,0,0,0,0,0 1171e1051a39Sopenharmony_ci 1172e1051a39Sopenharmony_ci## 1173e1051a39Sopenharmony_ci## .aes_schedule_192_smear 1174e1051a39Sopenharmony_ci## 1175e1051a39Sopenharmony_ci## Smear the short, low side in the 192-bit key schedule. 1176e1051a39Sopenharmony_ci## 1177e1051a39Sopenharmony_ci## Inputs: 1178e1051a39Sopenharmony_ci## %xmm7: high side, b a x y 1179e1051a39Sopenharmony_ci## %xmm6: low side, d c 0 0 1180e1051a39Sopenharmony_ci## %xmm13: 0 1181e1051a39Sopenharmony_ci## 1182e1051a39Sopenharmony_ci## Outputs: 1183e1051a39Sopenharmony_ci## %xmm6: b+c+d b+c 0 0 1184e1051a39Sopenharmony_ci## %xmm0: b+c+d b+c b a 1185e1051a39Sopenharmony_ci## 1186e1051a39Sopenharmony_ci.align 4 1187e1051a39Sopenharmony_ci_vpaes_schedule_192_smear: 1188e1051a39Sopenharmony_ci ?vspltw v0, v7, 3 1189e1051a39Sopenharmony_ci ?vsldoi v1, v9, v6, 12 # vpshufd \$0x80, %xmm6, %xmm1 # d c 0 0 -> c 0 0 0 1190e1051a39Sopenharmony_ci ?vsldoi v0, v7, v0, 8 # vpshufd \$0xFE, %xmm7, %xmm0 # b a _ _ -> b b b a 1191e1051a39Sopenharmony_ci vxor v6, v6, v1 # vpxor %xmm1, %xmm6, %xmm6 # -> c+d c 0 0 1192e1051a39Sopenharmony_ci vxor v6, v6, v0 # vpxor %xmm0, %xmm6, %xmm6 # -> b+c+d b+c b a 1193e1051a39Sopenharmony_ci vmr v0, v6 1194e1051a39Sopenharmony_ci ?vsldoi v6, v6, v9, 8 1195e1051a39Sopenharmony_ci ?vsldoi v6, v9, v6, 8 # clobber low side with zeros 1196e1051a39Sopenharmony_ci blr 1197e1051a39Sopenharmony_ci .long 0 1198e1051a39Sopenharmony_ci .byte 0,12,0x14,0,0,0,0,0 1199e1051a39Sopenharmony_ci 1200e1051a39Sopenharmony_ci## 1201e1051a39Sopenharmony_ci## .aes_schedule_round 1202e1051a39Sopenharmony_ci## 1203e1051a39Sopenharmony_ci## Runs one main round of the key schedule on %xmm0, %xmm7 1204e1051a39Sopenharmony_ci## 1205e1051a39Sopenharmony_ci## Specifically, runs subbytes on the high dword of %xmm0 1206e1051a39Sopenharmony_ci## then rotates it by one byte and xors into the low dword of 1207e1051a39Sopenharmony_ci## %xmm7. 1208e1051a39Sopenharmony_ci## 1209e1051a39Sopenharmony_ci## Adds rcon from low byte of %xmm8, then rotates %xmm8 for 1210e1051a39Sopenharmony_ci## next rcon. 1211e1051a39Sopenharmony_ci## 1212e1051a39Sopenharmony_ci## Smears the dwords of %xmm7 by xoring the low into the 1213e1051a39Sopenharmony_ci## second low, result into third, result into highest. 1214e1051a39Sopenharmony_ci## 1215e1051a39Sopenharmony_ci## Returns results in %xmm7 = %xmm0. 1216e1051a39Sopenharmony_ci## Clobbers %xmm1-%xmm4, %r11. 1217e1051a39Sopenharmony_ci## 1218e1051a39Sopenharmony_ci.align 4 1219e1051a39Sopenharmony_ci_vpaes_schedule_round: 1220e1051a39Sopenharmony_ci # extract rcon from xmm8 1221e1051a39Sopenharmony_ci #vxor v4, v4, v4 # vpxor %xmm4, %xmm4, %xmm4 1222e1051a39Sopenharmony_ci ?vsldoi v1, $rcon, v9, 15 # vpalignr \$15, %xmm8, %xmm4, %xmm1 1223e1051a39Sopenharmony_ci ?vsldoi $rcon, $rcon, $rcon, 15 # vpalignr \$15, %xmm8, %xmm8, %xmm8 1224e1051a39Sopenharmony_ci vxor v7, v7, v1 # vpxor %xmm1, %xmm7, %xmm7 1225e1051a39Sopenharmony_ci 1226e1051a39Sopenharmony_ci # rotate 1227e1051a39Sopenharmony_ci ?vspltw v0, v0, 3 # vpshufd \$0xFF, %xmm0, %xmm0 1228e1051a39Sopenharmony_ci ?vsldoi v0, v0, v0, 1 # vpalignr \$1, %xmm0, %xmm0, %xmm0 1229e1051a39Sopenharmony_ci 1230e1051a39Sopenharmony_ci # fall through... 1231e1051a39Sopenharmony_ci 1232e1051a39Sopenharmony_ci # low round: same as high round, but no rotation and no rcon. 1233e1051a39Sopenharmony_ci_vpaes_schedule_low_round: 1234e1051a39Sopenharmony_ci # smear xmm7 1235e1051a39Sopenharmony_ci ?vsldoi v1, v9, v7, 12 # vpslldq \$4, %xmm7, %xmm1 1236e1051a39Sopenharmony_ci vxor v7, v7, v1 # vpxor %xmm1, %xmm7, %xmm7 1237e1051a39Sopenharmony_ci vspltisb v1, 0x0f # 0x0f..0f 1238e1051a39Sopenharmony_ci ?vsldoi v4, v9, v7, 8 # vpslldq \$8, %xmm7, %xmm4 1239e1051a39Sopenharmony_ci 1240e1051a39Sopenharmony_ci # subbytes 1241e1051a39Sopenharmony_ci vand v1, v1, v0 # vpand %xmm9, %xmm0, %xmm1 # 0 = k 1242e1051a39Sopenharmony_ci vsrb v0, v0, v8 # vpsrlb \$4, %xmm0, %xmm0 # 1 = i 1243e1051a39Sopenharmony_ci vxor v7, v7, v4 # vpxor %xmm4, %xmm7, %xmm7 1244e1051a39Sopenharmony_ci vperm v2, $invhi, v9, v1 # vpshufb %xmm1, %xmm11, %xmm2 # 2 = a/k 1245e1051a39Sopenharmony_ci vxor v1, v1, v0 # vpxor %xmm0, %xmm1, %xmm1 # 0 = j 1246e1051a39Sopenharmony_ci vperm v3, $invlo, v9, v0 # vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i 1247e1051a39Sopenharmony_ci vxor v3, v3, v2 # vpxor %xmm2, %xmm3, %xmm3 # 3 = iak = 1/i + a/k 1248e1051a39Sopenharmony_ci vperm v4, $invlo, v9, v1 # vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j 1249e1051a39Sopenharmony_ci vxor v7, v7, v26 # vpxor .Lk_s63(%rip), %xmm7, %xmm7 1250e1051a39Sopenharmony_ci vperm v3, $invlo, v9, v3 # vpshufb %xmm3, %xmm10, %xmm3 # 2 = 1/iak 1251e1051a39Sopenharmony_ci vxor v4, v4, v2 # vpxor %xmm2, %xmm4, %xmm4 # 4 = jak = 1/j + a/k 1252e1051a39Sopenharmony_ci vperm v2, $invlo, v9, v4 # vpshufb %xmm4, %xmm10, %xmm2 # 3 = 1/jak 1253e1051a39Sopenharmony_ci vxor v3, v3, v1 # vpxor %xmm1, %xmm3, %xmm3 # 2 = io 1254e1051a39Sopenharmony_ci vxor v2, v2, v0 # vpxor %xmm0, %xmm2, %xmm2 # 3 = jo 1255e1051a39Sopenharmony_ci vperm v4, v15, v9, v3 # vpshufb %xmm3, %xmm13, %xmm4 # 4 = sbou 1256e1051a39Sopenharmony_ci vperm v1, v14, v9, v2 # vpshufb %xmm2, %xmm12, %xmm1 # 0 = sb1t 1257e1051a39Sopenharmony_ci vxor v1, v1, v4 # vpxor %xmm4, %xmm1, %xmm1 # 0 = sbox output 1258e1051a39Sopenharmony_ci 1259e1051a39Sopenharmony_ci # add in smeared stuff 1260e1051a39Sopenharmony_ci vxor v0, v1, v7 # vpxor %xmm7, %xmm1, %xmm0 1261e1051a39Sopenharmony_ci vxor v7, v1, v7 # vmovdqa %xmm0, %xmm7 1262e1051a39Sopenharmony_ci blr 1263e1051a39Sopenharmony_ci .long 0 1264e1051a39Sopenharmony_ci .byte 0,12,0x14,0,0,0,0,0 1265e1051a39Sopenharmony_ci 1266e1051a39Sopenharmony_ci## 1267e1051a39Sopenharmony_ci## .aes_schedule_transform 1268e1051a39Sopenharmony_ci## 1269e1051a39Sopenharmony_ci## Linear-transform %xmm0 according to tables at (%r11) 1270e1051a39Sopenharmony_ci## 1271e1051a39Sopenharmony_ci## Requires that %xmm9 = 0x0F0F... as in preheat 1272e1051a39Sopenharmony_ci## Output in %xmm0 1273e1051a39Sopenharmony_ci## Clobbers %xmm2 1274e1051a39Sopenharmony_ci## 1275e1051a39Sopenharmony_ci.align 4 1276e1051a39Sopenharmony_ci_vpaes_schedule_transform: 1277e1051a39Sopenharmony_ci #vand v1, v0, v9 # vpand %xmm9, %xmm0, %xmm1 1278e1051a39Sopenharmony_ci vsrb v2, v0, v8 # vpsrlb \$4, %xmm0, %xmm0 1279e1051a39Sopenharmony_ci # vmovdqa (%r11), %xmm2 # lo 1280e1051a39Sopenharmony_ci vperm v0, $iptlo, $iptlo, v0 # vpshufb %xmm1, %xmm2, %xmm2 1281e1051a39Sopenharmony_ci # vmovdqa 16(%r11), %xmm1 # hi 1282e1051a39Sopenharmony_ci vperm v2, $ipthi, $ipthi, v2 # vpshufb %xmm0, %xmm1, %xmm0 1283e1051a39Sopenharmony_ci vxor v0, v0, v2 # vpxor %xmm2, %xmm0, %xmm0 1284e1051a39Sopenharmony_ci blr 1285e1051a39Sopenharmony_ci .long 0 1286e1051a39Sopenharmony_ci .byte 0,12,0x14,0,0,0,0,0 1287e1051a39Sopenharmony_ci 1288e1051a39Sopenharmony_ci## 1289e1051a39Sopenharmony_ci## .aes_schedule_mangle 1290e1051a39Sopenharmony_ci## 1291e1051a39Sopenharmony_ci## Mangle xmm0 from (basis-transformed) standard version 1292e1051a39Sopenharmony_ci## to our version. 1293e1051a39Sopenharmony_ci## 1294e1051a39Sopenharmony_ci## On encrypt, 1295e1051a39Sopenharmony_ci## xor with 0x63 1296e1051a39Sopenharmony_ci## multiply by circulant 0,1,1,1 1297e1051a39Sopenharmony_ci## apply shiftrows transform 1298e1051a39Sopenharmony_ci## 1299e1051a39Sopenharmony_ci## On decrypt, 1300e1051a39Sopenharmony_ci## xor with 0x63 1301e1051a39Sopenharmony_ci## multiply by "inverse mixcolumns" circulant E,B,D,9 1302e1051a39Sopenharmony_ci## deskew 1303e1051a39Sopenharmony_ci## apply shiftrows transform 1304e1051a39Sopenharmony_ci## 1305e1051a39Sopenharmony_ci## 1306e1051a39Sopenharmony_ci## Writes out to (%rdx), and increments or decrements it 1307e1051a39Sopenharmony_ci## Keeps track of round number mod 4 in %r8 1308e1051a39Sopenharmony_ci## Preserves xmm0 1309e1051a39Sopenharmony_ci## Clobbers xmm1-xmm5 1310e1051a39Sopenharmony_ci## 1311e1051a39Sopenharmony_ci.align 4 1312e1051a39Sopenharmony_ci_vpaes_schedule_mangle: 1313e1051a39Sopenharmony_ci #vmr v4, v0 # vmovdqa %xmm0, %xmm4 # save xmm0 for later 1314e1051a39Sopenharmony_ci # vmovdqa .Lk_mc_forward(%rip),%xmm5 1315e1051a39Sopenharmony_ci bne $dir, Lschedule_mangle_dec 1316e1051a39Sopenharmony_ci 1317e1051a39Sopenharmony_ci # encrypting 1318e1051a39Sopenharmony_ci vxor v4, v0, v26 # vpxor .Lk_s63(%rip), %xmm0, %xmm4 1319e1051a39Sopenharmony_ci addi $out, $out, 16 # add \$16, %rdx 1320e1051a39Sopenharmony_ci vperm v4, v4, v4, v25 # vpshufb %xmm5, %xmm4, %xmm4 1321e1051a39Sopenharmony_ci vperm v1, v4, v4, v25 # vpshufb %xmm5, %xmm4, %xmm1 1322e1051a39Sopenharmony_ci vperm v3, v1, v1, v25 # vpshufb %xmm5, %xmm1, %xmm3 1323e1051a39Sopenharmony_ci vxor v4, v4, v1 # vpxor %xmm1, %xmm4, %xmm4 1324e1051a39Sopenharmony_ci lvx v1, r8, r10 # vmovdqa (%r8,%r10), %xmm1 1325e1051a39Sopenharmony_ci vxor v3, v3, v4 # vpxor %xmm4, %xmm3, %xmm3 1326e1051a39Sopenharmony_ci 1327e1051a39Sopenharmony_ci vperm v3, v3, v3, v1 # vpshufb %xmm1, %xmm3, %xmm3 1328e1051a39Sopenharmony_ci addi r8, r8, -16 # add \$-16, %r8 1329e1051a39Sopenharmony_ci andi. r8, r8, 0x30 # and \$0x30, %r8 1330e1051a39Sopenharmony_ci 1331e1051a39Sopenharmony_ci #stvx v3, 0, $out # vmovdqu %xmm3, (%rdx) 1332e1051a39Sopenharmony_ci vperm v1, v3, v3, $outperm # rotate right/left 1333e1051a39Sopenharmony_ci vsel v2, $outhead, v1, $outmask 1334e1051a39Sopenharmony_ci vmr $outhead, v1 1335e1051a39Sopenharmony_ci stvx v2, 0, $out 1336e1051a39Sopenharmony_ci blr 1337e1051a39Sopenharmony_ci 1338e1051a39Sopenharmony_ci.align 4 1339e1051a39Sopenharmony_ciLschedule_mangle_dec: 1340e1051a39Sopenharmony_ci # inverse mix columns 1341e1051a39Sopenharmony_ci # lea .Lk_dksd(%rip),%r11 1342e1051a39Sopenharmony_ci vsrb v1, v0, v8 # vpsrlb \$4, %xmm4, %xmm1 # 1 = hi 1343e1051a39Sopenharmony_ci #and v4, v0, v9 # vpand %xmm9, %xmm4, %xmm4 # 4 = lo 1344e1051a39Sopenharmony_ci 1345e1051a39Sopenharmony_ci # vmovdqa 0x00(%r11), %xmm2 1346e1051a39Sopenharmony_ci vperm v2, v16, v16, v0 # vpshufb %xmm4, %xmm2, %xmm2 1347e1051a39Sopenharmony_ci # vmovdqa 0x10(%r11), %xmm3 1348e1051a39Sopenharmony_ci vperm v3, v17, v17, v1 # vpshufb %xmm1, %xmm3, %xmm3 1349e1051a39Sopenharmony_ci vxor v3, v3, v2 # vpxor %xmm2, %xmm3, %xmm3 1350e1051a39Sopenharmony_ci vperm v3, v3, v9, v25 # vpshufb %xmm5, %xmm3, %xmm3 1351e1051a39Sopenharmony_ci 1352e1051a39Sopenharmony_ci # vmovdqa 0x20(%r11), %xmm2 1353e1051a39Sopenharmony_ci vperm v2, v18, v18, v0 # vpshufb %xmm4, %xmm2, %xmm2 1354e1051a39Sopenharmony_ci vxor v2, v2, v3 # vpxor %xmm3, %xmm2, %xmm2 1355e1051a39Sopenharmony_ci # vmovdqa 0x30(%r11), %xmm3 1356e1051a39Sopenharmony_ci vperm v3, v19, v19, v1 # vpshufb %xmm1, %xmm3, %xmm3 1357e1051a39Sopenharmony_ci vxor v3, v3, v2 # vpxor %xmm2, %xmm3, %xmm3 1358e1051a39Sopenharmony_ci vperm v3, v3, v9, v25 # vpshufb %xmm5, %xmm3, %xmm3 1359e1051a39Sopenharmony_ci 1360e1051a39Sopenharmony_ci # vmovdqa 0x40(%r11), %xmm2 1361e1051a39Sopenharmony_ci vperm v2, v20, v20, v0 # vpshufb %xmm4, %xmm2, %xmm2 1362e1051a39Sopenharmony_ci vxor v2, v2, v3 # vpxor %xmm3, %xmm2, %xmm2 1363e1051a39Sopenharmony_ci # vmovdqa 0x50(%r11), %xmm3 1364e1051a39Sopenharmony_ci vperm v3, v21, v21, v1 # vpshufb %xmm1, %xmm3, %xmm3 1365e1051a39Sopenharmony_ci vxor v3, v3, v2 # vpxor %xmm2, %xmm3, %xmm3 1366e1051a39Sopenharmony_ci 1367e1051a39Sopenharmony_ci # vmovdqa 0x60(%r11), %xmm2 1368e1051a39Sopenharmony_ci vperm v2, v22, v22, v0 # vpshufb %xmm4, %xmm2, %xmm2 1369e1051a39Sopenharmony_ci vperm v3, v3, v9, v25 # vpshufb %xmm5, %xmm3, %xmm3 1370e1051a39Sopenharmony_ci # vmovdqa 0x70(%r11), %xmm4 1371e1051a39Sopenharmony_ci vperm v4, v23, v23, v1 # vpshufb %xmm1, %xmm4, %xmm4 1372e1051a39Sopenharmony_ci lvx v1, r8, r10 # vmovdqa (%r8,%r10), %xmm1 1373e1051a39Sopenharmony_ci vxor v2, v2, v3 # vpxor %xmm3, %xmm2, %xmm2 1374e1051a39Sopenharmony_ci vxor v3, v4, v2 # vpxor %xmm2, %xmm4, %xmm3 1375e1051a39Sopenharmony_ci 1376e1051a39Sopenharmony_ci addi $out, $out, -16 # add \$-16, %rdx 1377e1051a39Sopenharmony_ci 1378e1051a39Sopenharmony_ci vperm v3, v3, v3, v1 # vpshufb %xmm1, %xmm3, %xmm3 1379e1051a39Sopenharmony_ci addi r8, r8, -16 # add \$-16, %r8 1380e1051a39Sopenharmony_ci andi. r8, r8, 0x30 # and \$0x30, %r8 1381e1051a39Sopenharmony_ci 1382e1051a39Sopenharmony_ci #stvx v3, 0, $out # vmovdqu %xmm3, (%rdx) 1383e1051a39Sopenharmony_ci vperm v1, v3, v3, $outperm # rotate right/left 1384e1051a39Sopenharmony_ci vsel v2, $outhead, v1, $outmask 1385e1051a39Sopenharmony_ci vmr $outhead, v1 1386e1051a39Sopenharmony_ci stvx v2, 0, $out 1387e1051a39Sopenharmony_ci blr 1388e1051a39Sopenharmony_ci .long 0 1389e1051a39Sopenharmony_ci .byte 0,12,0x14,0,0,0,0,0 1390e1051a39Sopenharmony_ci 1391e1051a39Sopenharmony_ci.globl .vpaes_set_encrypt_key 1392e1051a39Sopenharmony_ci.align 5 1393e1051a39Sopenharmony_ci.vpaes_set_encrypt_key: 1394e1051a39Sopenharmony_ci $STU $sp,-$FRAME($sp) 1395e1051a39Sopenharmony_ci li r10,`15+6*$SIZE_T` 1396e1051a39Sopenharmony_ci li r11,`31+6*$SIZE_T` 1397e1051a39Sopenharmony_ci mflr r0 1398e1051a39Sopenharmony_ci mfspr r6, 256 # save vrsave 1399e1051a39Sopenharmony_ci stvx v20,r10,$sp 1400e1051a39Sopenharmony_ci addi r10,r10,32 1401e1051a39Sopenharmony_ci stvx v21,r11,$sp 1402e1051a39Sopenharmony_ci addi r11,r11,32 1403e1051a39Sopenharmony_ci stvx v22,r10,$sp 1404e1051a39Sopenharmony_ci addi r10,r10,32 1405e1051a39Sopenharmony_ci stvx v23,r11,$sp 1406e1051a39Sopenharmony_ci addi r11,r11,32 1407e1051a39Sopenharmony_ci stvx v24,r10,$sp 1408e1051a39Sopenharmony_ci addi r10,r10,32 1409e1051a39Sopenharmony_ci stvx v25,r11,$sp 1410e1051a39Sopenharmony_ci addi r11,r11,32 1411e1051a39Sopenharmony_ci stvx v26,r10,$sp 1412e1051a39Sopenharmony_ci addi r10,r10,32 1413e1051a39Sopenharmony_ci stvx v27,r11,$sp 1414e1051a39Sopenharmony_ci addi r11,r11,32 1415e1051a39Sopenharmony_ci stvx v28,r10,$sp 1416e1051a39Sopenharmony_ci addi r10,r10,32 1417e1051a39Sopenharmony_ci stvx v29,r11,$sp 1418e1051a39Sopenharmony_ci addi r11,r11,32 1419e1051a39Sopenharmony_ci stvx v30,r10,$sp 1420e1051a39Sopenharmony_ci stvx v31,r11,$sp 1421e1051a39Sopenharmony_ci stw r6,`$FRAME-4`($sp) # save vrsave 1422e1051a39Sopenharmony_ci li r7, -1 1423e1051a39Sopenharmony_ci $PUSH r0, `$FRAME+$LRSAVE`($sp) 1424e1051a39Sopenharmony_ci mtspr 256, r7 # preserve all AltiVec registers 1425e1051a39Sopenharmony_ci 1426e1051a39Sopenharmony_ci srwi r9, $bits, 5 # shr \$5,%eax 1427e1051a39Sopenharmony_ci addi r9, r9, 6 # add \$5,%eax 1428e1051a39Sopenharmony_ci stw r9, 240($out) # mov %eax,240(%rdx) # AES_KEY->rounds = nbits/32+5; 1429e1051a39Sopenharmony_ci 1430e1051a39Sopenharmony_ci cmplw $dir, $bits, $bits # set encrypt direction 1431e1051a39Sopenharmony_ci li r8, 0x30 # mov \$0x30,%r8d 1432e1051a39Sopenharmony_ci bl _vpaes_schedule_core 1433e1051a39Sopenharmony_ci 1434e1051a39Sopenharmony_ci $POP r0, `$FRAME+$LRSAVE`($sp) 1435e1051a39Sopenharmony_ci li r10,`15+6*$SIZE_T` 1436e1051a39Sopenharmony_ci li r11,`31+6*$SIZE_T` 1437e1051a39Sopenharmony_ci mtspr 256, r6 # restore vrsave 1438e1051a39Sopenharmony_ci mtlr r0 1439e1051a39Sopenharmony_ci xor r3, r3, r3 1440e1051a39Sopenharmony_ci lvx v20,r10,$sp 1441e1051a39Sopenharmony_ci addi r10,r10,32 1442e1051a39Sopenharmony_ci lvx v21,r11,$sp 1443e1051a39Sopenharmony_ci addi r11,r11,32 1444e1051a39Sopenharmony_ci lvx v22,r10,$sp 1445e1051a39Sopenharmony_ci addi r10,r10,32 1446e1051a39Sopenharmony_ci lvx v23,r11,$sp 1447e1051a39Sopenharmony_ci addi r11,r11,32 1448e1051a39Sopenharmony_ci lvx v24,r10,$sp 1449e1051a39Sopenharmony_ci addi r10,r10,32 1450e1051a39Sopenharmony_ci lvx v25,r11,$sp 1451e1051a39Sopenharmony_ci addi r11,r11,32 1452e1051a39Sopenharmony_ci lvx v26,r10,$sp 1453e1051a39Sopenharmony_ci addi r10,r10,32 1454e1051a39Sopenharmony_ci lvx v27,r11,$sp 1455e1051a39Sopenharmony_ci addi r11,r11,32 1456e1051a39Sopenharmony_ci lvx v28,r10,$sp 1457e1051a39Sopenharmony_ci addi r10,r10,32 1458e1051a39Sopenharmony_ci lvx v29,r11,$sp 1459e1051a39Sopenharmony_ci addi r11,r11,32 1460e1051a39Sopenharmony_ci lvx v30,r10,$sp 1461e1051a39Sopenharmony_ci lvx v31,r11,$sp 1462e1051a39Sopenharmony_ci addi $sp,$sp,$FRAME 1463e1051a39Sopenharmony_ci blr 1464e1051a39Sopenharmony_ci .long 0 1465e1051a39Sopenharmony_ci .byte 0,12,0x04,1,0x80,0,3,0 1466e1051a39Sopenharmony_ci .long 0 1467e1051a39Sopenharmony_ci.size .vpaes_set_encrypt_key,.-.vpaes_set_encrypt_key 1468e1051a39Sopenharmony_ci 1469e1051a39Sopenharmony_ci.globl .vpaes_set_decrypt_key 1470e1051a39Sopenharmony_ci.align 4 1471e1051a39Sopenharmony_ci.vpaes_set_decrypt_key: 1472e1051a39Sopenharmony_ci $STU $sp,-$FRAME($sp) 1473e1051a39Sopenharmony_ci li r10,`15+6*$SIZE_T` 1474e1051a39Sopenharmony_ci li r11,`31+6*$SIZE_T` 1475e1051a39Sopenharmony_ci mflr r0 1476e1051a39Sopenharmony_ci mfspr r6, 256 # save vrsave 1477e1051a39Sopenharmony_ci stvx v20,r10,$sp 1478e1051a39Sopenharmony_ci addi r10,r10,32 1479e1051a39Sopenharmony_ci stvx v21,r11,$sp 1480e1051a39Sopenharmony_ci addi r11,r11,32 1481e1051a39Sopenharmony_ci stvx v22,r10,$sp 1482e1051a39Sopenharmony_ci addi r10,r10,32 1483e1051a39Sopenharmony_ci stvx v23,r11,$sp 1484e1051a39Sopenharmony_ci addi r11,r11,32 1485e1051a39Sopenharmony_ci stvx v24,r10,$sp 1486e1051a39Sopenharmony_ci addi r10,r10,32 1487e1051a39Sopenharmony_ci stvx v25,r11,$sp 1488e1051a39Sopenharmony_ci addi r11,r11,32 1489e1051a39Sopenharmony_ci stvx v26,r10,$sp 1490e1051a39Sopenharmony_ci addi r10,r10,32 1491e1051a39Sopenharmony_ci stvx v27,r11,$sp 1492e1051a39Sopenharmony_ci addi r11,r11,32 1493e1051a39Sopenharmony_ci stvx v28,r10,$sp 1494e1051a39Sopenharmony_ci addi r10,r10,32 1495e1051a39Sopenharmony_ci stvx v29,r11,$sp 1496e1051a39Sopenharmony_ci addi r11,r11,32 1497e1051a39Sopenharmony_ci stvx v30,r10,$sp 1498e1051a39Sopenharmony_ci stvx v31,r11,$sp 1499e1051a39Sopenharmony_ci stw r6,`$FRAME-4`($sp) # save vrsave 1500e1051a39Sopenharmony_ci li r7, -1 1501e1051a39Sopenharmony_ci $PUSH r0, `$FRAME+$LRSAVE`($sp) 1502e1051a39Sopenharmony_ci mtspr 256, r7 # preserve all AltiVec registers 1503e1051a39Sopenharmony_ci 1504e1051a39Sopenharmony_ci srwi r9, $bits, 5 # shr \$5,%eax 1505e1051a39Sopenharmony_ci addi r9, r9, 6 # add \$5,%eax 1506e1051a39Sopenharmony_ci stw r9, 240($out) # mov %eax,240(%rdx) # AES_KEY->rounds = nbits/32+5; 1507e1051a39Sopenharmony_ci 1508e1051a39Sopenharmony_ci slwi r9, r9, 4 # shl \$4,%eax 1509e1051a39Sopenharmony_ci add $out, $out, r9 # lea (%rdx,%rax),%rdx 1510e1051a39Sopenharmony_ci 1511e1051a39Sopenharmony_ci cmplwi $dir, $bits, 0 # set decrypt direction 1512e1051a39Sopenharmony_ci srwi r8, $bits, 1 # shr \$1,%r8d 1513e1051a39Sopenharmony_ci andi. r8, r8, 32 # and \$32,%r8d 1514e1051a39Sopenharmony_ci xori r8, r8, 32 # xor \$32,%r8d # nbits==192?0:32 1515e1051a39Sopenharmony_ci bl _vpaes_schedule_core 1516e1051a39Sopenharmony_ci 1517e1051a39Sopenharmony_ci $POP r0, `$FRAME+$LRSAVE`($sp) 1518e1051a39Sopenharmony_ci li r10,`15+6*$SIZE_T` 1519e1051a39Sopenharmony_ci li r11,`31+6*$SIZE_T` 1520e1051a39Sopenharmony_ci mtspr 256, r6 # restore vrsave 1521e1051a39Sopenharmony_ci mtlr r0 1522e1051a39Sopenharmony_ci xor r3, r3, r3 1523e1051a39Sopenharmony_ci lvx v20,r10,$sp 1524e1051a39Sopenharmony_ci addi r10,r10,32 1525e1051a39Sopenharmony_ci lvx v21,r11,$sp 1526e1051a39Sopenharmony_ci addi r11,r11,32 1527e1051a39Sopenharmony_ci lvx v22,r10,$sp 1528e1051a39Sopenharmony_ci addi r10,r10,32 1529e1051a39Sopenharmony_ci lvx v23,r11,$sp 1530e1051a39Sopenharmony_ci addi r11,r11,32 1531e1051a39Sopenharmony_ci lvx v24,r10,$sp 1532e1051a39Sopenharmony_ci addi r10,r10,32 1533e1051a39Sopenharmony_ci lvx v25,r11,$sp 1534e1051a39Sopenharmony_ci addi r11,r11,32 1535e1051a39Sopenharmony_ci lvx v26,r10,$sp 1536e1051a39Sopenharmony_ci addi r10,r10,32 1537e1051a39Sopenharmony_ci lvx v27,r11,$sp 1538e1051a39Sopenharmony_ci addi r11,r11,32 1539e1051a39Sopenharmony_ci lvx v28,r10,$sp 1540e1051a39Sopenharmony_ci addi r10,r10,32 1541e1051a39Sopenharmony_ci lvx v29,r11,$sp 1542e1051a39Sopenharmony_ci addi r11,r11,32 1543e1051a39Sopenharmony_ci lvx v30,r10,$sp 1544e1051a39Sopenharmony_ci lvx v31,r11,$sp 1545e1051a39Sopenharmony_ci addi $sp,$sp,$FRAME 1546e1051a39Sopenharmony_ci blr 1547e1051a39Sopenharmony_ci .long 0 1548e1051a39Sopenharmony_ci .byte 0,12,0x04,1,0x80,0,3,0 1549e1051a39Sopenharmony_ci .long 0 1550e1051a39Sopenharmony_ci.size .vpaes_set_decrypt_key,.-.vpaes_set_decrypt_key 1551e1051a39Sopenharmony_ci___ 1552e1051a39Sopenharmony_ci} 1553e1051a39Sopenharmony_ci 1554e1051a39Sopenharmony_cimy $consts=1; 1555e1051a39Sopenharmony_ciforeach (split("\n",$code)) { 1556e1051a39Sopenharmony_ci s/\`([^\`]*)\`/eval $1/geo; 1557e1051a39Sopenharmony_ci 1558e1051a39Sopenharmony_ci # constants table endian-specific conversion 1559e1051a39Sopenharmony_ci if ($consts && m/\.long\s+(.+)\s+(\?[a-z]*)$/o) { 1560e1051a39Sopenharmony_ci my $conv=$2; 1561e1051a39Sopenharmony_ci my @bytes=(); 1562e1051a39Sopenharmony_ci 1563e1051a39Sopenharmony_ci # convert to endian-agnostic format 1564e1051a39Sopenharmony_ci foreach (split(/,\s+/,$1)) { 1565e1051a39Sopenharmony_ci my $l = /^0/?oct:int; 1566e1051a39Sopenharmony_ci push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff; 1567e1051a39Sopenharmony_ci } 1568e1051a39Sopenharmony_ci 1569e1051a39Sopenharmony_ci # little-endian conversion 1570e1051a39Sopenharmony_ci if ($flavour =~ /le$/o) { 1571e1051a39Sopenharmony_ci SWITCH: for($conv) { 1572e1051a39Sopenharmony_ci /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; }; 1573e1051a39Sopenharmony_ci /\?rev/ && do { @bytes=reverse(@bytes); last; }; 1574e1051a39Sopenharmony_ci } 1575e1051a39Sopenharmony_ci } 1576e1051a39Sopenharmony_ci 1577e1051a39Sopenharmony_ci #emit 1578e1051a39Sopenharmony_ci print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n"; 1579e1051a39Sopenharmony_ci next; 1580e1051a39Sopenharmony_ci } 1581e1051a39Sopenharmony_ci $consts=0 if (m/Lconsts:/o); # end of table 1582e1051a39Sopenharmony_ci 1583e1051a39Sopenharmony_ci # instructions prefixed with '?' are endian-specific and need 1584e1051a39Sopenharmony_ci # to be adjusted accordingly... 1585e1051a39Sopenharmony_ci if ($flavour =~ /le$/o) { # little-endian 1586e1051a39Sopenharmony_ci s/\?lvsr/lvsl/o or 1587e1051a39Sopenharmony_ci s/\?lvsl/lvsr/o or 1588e1051a39Sopenharmony_ci s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or 1589e1051a39Sopenharmony_ci s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or 1590e1051a39Sopenharmony_ci s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o; 1591e1051a39Sopenharmony_ci } else { # big-endian 1592e1051a39Sopenharmony_ci s/\?([a-z]+)/$1/o; 1593e1051a39Sopenharmony_ci } 1594e1051a39Sopenharmony_ci 1595e1051a39Sopenharmony_ci print $_,"\n"; 1596e1051a39Sopenharmony_ci} 1597e1051a39Sopenharmony_ci 1598e1051a39Sopenharmony_ciclose STDOUT or die "error closing STDOUT: $!"; 1599