1e1051a39Sopenharmony_ci#! /usr/bin/env perl 2e1051a39Sopenharmony_ci# Copyright 2012-2021 The OpenSSL Project Authors. All Rights Reserved. 3e1051a39Sopenharmony_ci# 4e1051a39Sopenharmony_ci# Licensed under the Apache License 2.0 (the "License"). You may not use 5e1051a39Sopenharmony_ci# this file except in compliance with the License. You can obtain a copy 6e1051a39Sopenharmony_ci# in the file LICENSE in the source distribution or at 7e1051a39Sopenharmony_ci# https://www.openssl.org/source/license.html 8e1051a39Sopenharmony_ci 9e1051a39Sopenharmony_ci 10e1051a39Sopenharmony_ci# ==================================================================== 11e1051a39Sopenharmony_ci# Written by David S. Miller and Andy Polyakov. 12e1051a39Sopenharmony_ci# The module is licensed under 2-clause BSD license. October 2012. 13e1051a39Sopenharmony_ci# All rights reserved. 14e1051a39Sopenharmony_ci# ==================================================================== 15e1051a39Sopenharmony_ci 16e1051a39Sopenharmony_ci###################################################################### 17e1051a39Sopenharmony_ci# AES for SPARC T4. 18e1051a39Sopenharmony_ci# 19e1051a39Sopenharmony_ci# AES round instructions complete in 3 cycles and can be issued every 20e1051a39Sopenharmony_ci# cycle. It means that round calculations should take 4*rounds cycles, 21e1051a39Sopenharmony_ci# because any given round instruction depends on result of *both* 22e1051a39Sopenharmony_ci# previous instructions: 23e1051a39Sopenharmony_ci# 24e1051a39Sopenharmony_ci# |0 |1 |2 |3 |4 25e1051a39Sopenharmony_ci# |01|01|01| 26e1051a39Sopenharmony_ci# |23|23|23| 27e1051a39Sopenharmony_ci# |01|01|... 28e1051a39Sopenharmony_ci# |23|... 29e1051a39Sopenharmony_ci# 30e1051a39Sopenharmony_ci# Provided that fxor [with IV] takes 3 cycles to complete, critical 31e1051a39Sopenharmony_ci# path length for CBC encrypt would be 3+4*rounds, or in other words 32e1051a39Sopenharmony_ci# it should process one byte in at least (3+4*rounds)/16 cycles. This 33e1051a39Sopenharmony_ci# estimate doesn't account for "collateral" instructions, such as 34e1051a39Sopenharmony_ci# fetching input from memory, xor-ing it with zero-round key and 35e1051a39Sopenharmony_ci# storing the result. Yet, *measured* performance [for data aligned 36e1051a39Sopenharmony_ci# at 64-bit boundary!] deviates from this equation by less than 0.5%: 37e1051a39Sopenharmony_ci# 38e1051a39Sopenharmony_ci# 128-bit key 192- 256- 39e1051a39Sopenharmony_ci# CBC encrypt 2.70/2.90(*) 3.20/3.40 3.70/3.90 40e1051a39Sopenharmony_ci# (*) numbers after slash are for 41e1051a39Sopenharmony_ci# misaligned data. 42e1051a39Sopenharmony_ci# 43e1051a39Sopenharmony_ci# Out-of-order execution logic managed to fully overlap "collateral" 44e1051a39Sopenharmony_ci# instructions with those on critical path. Amazing! 45e1051a39Sopenharmony_ci# 46e1051a39Sopenharmony_ci# As with Intel AES-NI, question is if it's possible to improve 47e1051a39Sopenharmony_ci# performance of parallelizable modes by interleaving round 48e1051a39Sopenharmony_ci# instructions. Provided round instruction latency and throughput 49e1051a39Sopenharmony_ci# optimal interleave factor is 2. But can we expect 2x performance 50e1051a39Sopenharmony_ci# improvement? Well, as round instructions can be issued one per 51e1051a39Sopenharmony_ci# cycle, they don't saturate the 2-way issue pipeline and therefore 52e1051a39Sopenharmony_ci# there is room for "collateral" calculations... Yet, 2x speed-up 53e1051a39Sopenharmony_ci# over CBC encrypt remains unattaintable: 54e1051a39Sopenharmony_ci# 55e1051a39Sopenharmony_ci# 128-bit key 192- 256- 56e1051a39Sopenharmony_ci# CBC decrypt 1.64/2.11 1.89/2.37 2.23/2.61 57e1051a39Sopenharmony_ci# CTR 1.64/2.08(*) 1.89/2.33 2.23/2.61 58e1051a39Sopenharmony_ci# (*) numbers after slash are for 59e1051a39Sopenharmony_ci# misaligned data. 60e1051a39Sopenharmony_ci# 61e1051a39Sopenharmony_ci# Estimates based on amount of instructions under assumption that 62e1051a39Sopenharmony_ci# round instructions are not pairable with any other instruction 63e1051a39Sopenharmony_ci# suggest that latter is the actual case and pipeline runs 64e1051a39Sopenharmony_ci# underutilized. It should be noted that T4 out-of-order execution 65e1051a39Sopenharmony_ci# logic is so capable that performance gain from 2x interleave is 66e1051a39Sopenharmony_ci# not even impressive, ~7-13% over non-interleaved code, largest 67e1051a39Sopenharmony_ci# for 256-bit keys. 68e1051a39Sopenharmony_ci 69e1051a39Sopenharmony_ci# To anchor to something else, software implementation processes 70e1051a39Sopenharmony_ci# one byte in 29 cycles with 128-bit key on same processor. Intel 71e1051a39Sopenharmony_ci# Sandy Bridge encrypts byte in 5.07 cycles in CBC mode and decrypts 72e1051a39Sopenharmony_ci# in 0.93, naturally with AES-NI. 73e1051a39Sopenharmony_ci 74e1051a39Sopenharmony_ci$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 75e1051a39Sopenharmony_cipush(@INC,"${dir}","${dir}../../perlasm"); 76e1051a39Sopenharmony_cirequire "sparcv9_modes.pl"; 77e1051a39Sopenharmony_ci 78e1051a39Sopenharmony_ci$output = pop and open STDOUT,">$output"; 79e1051a39Sopenharmony_ci 80e1051a39Sopenharmony_ci$::evp=1; # if $evp is set to 0, script generates module with 81e1051a39Sopenharmony_ci# AES_[en|de]crypt, AES_set_[en|de]crypt_key and AES_cbc_encrypt entry 82e1051a39Sopenharmony_ci# points. These however are not fully compatible with openssl/aes.h, 83e1051a39Sopenharmony_ci# because they expect AES_KEY to be aligned at 64-bit boundary. When 84e1051a39Sopenharmony_ci# used through EVP, alignment is arranged at EVP layer. Second thing 85e1051a39Sopenharmony_ci# that is arranged by EVP is at least 32-bit alignment of IV. 86e1051a39Sopenharmony_ci 87e1051a39Sopenharmony_ci###################################################################### 88e1051a39Sopenharmony_ci# single-round subroutines 89e1051a39Sopenharmony_ci# 90e1051a39Sopenharmony_ci{ 91e1051a39Sopenharmony_cimy ($inp,$out,$key,$rounds,$tmp,$mask)=map("%o$_",(0..5)); 92e1051a39Sopenharmony_ci 93e1051a39Sopenharmony_ci$code.=<<___; 94e1051a39Sopenharmony_ci#ifndef __ASSEMBLER__ 95e1051a39Sopenharmony_ci# define __ASSEMBLER__ 1 96e1051a39Sopenharmony_ci#endif 97e1051a39Sopenharmony_ci#include "crypto/sparc_arch.h" 98e1051a39Sopenharmony_ci 99e1051a39Sopenharmony_ci#ifdef __arch64__ 100e1051a39Sopenharmony_ci.register %g2,#scratch 101e1051a39Sopenharmony_ci.register %g3,#scratch 102e1051a39Sopenharmony_ci#endif 103e1051a39Sopenharmony_ci 104e1051a39Sopenharmony_ci.text 105e1051a39Sopenharmony_ci 106e1051a39Sopenharmony_ci.globl aes_t4_encrypt 107e1051a39Sopenharmony_ci.align 32 108e1051a39Sopenharmony_ciaes_t4_encrypt: 109e1051a39Sopenharmony_ci andcc $inp, 7, %g1 ! is input aligned? 110e1051a39Sopenharmony_ci andn $inp, 7, $inp 111e1051a39Sopenharmony_ci 112e1051a39Sopenharmony_ci ldx [$key + 0], %g4 113e1051a39Sopenharmony_ci ldx [$key + 8], %g5 114e1051a39Sopenharmony_ci 115e1051a39Sopenharmony_ci ldx [$inp + 0], %o4 116e1051a39Sopenharmony_ci bz,pt %icc, 1f 117e1051a39Sopenharmony_ci ldx [$inp + 8], %o5 118e1051a39Sopenharmony_ci ldx [$inp + 16], $inp 119e1051a39Sopenharmony_ci sll %g1, 3, %g1 120e1051a39Sopenharmony_ci sub %g0, %g1, %o3 121e1051a39Sopenharmony_ci sllx %o4, %g1, %o4 122e1051a39Sopenharmony_ci sllx %o5, %g1, %g1 123e1051a39Sopenharmony_ci srlx %o5, %o3, %o5 124e1051a39Sopenharmony_ci srlx $inp, %o3, %o3 125e1051a39Sopenharmony_ci or %o5, %o4, %o4 126e1051a39Sopenharmony_ci or %o3, %g1, %o5 127e1051a39Sopenharmony_ci1: 128e1051a39Sopenharmony_ci ld [$key + 240], $rounds 129e1051a39Sopenharmony_ci ldd [$key + 16], %f12 130e1051a39Sopenharmony_ci ldd [$key + 24], %f14 131e1051a39Sopenharmony_ci xor %g4, %o4, %o4 132e1051a39Sopenharmony_ci xor %g5, %o5, %o5 133e1051a39Sopenharmony_ci movxtod %o4, %f0 134e1051a39Sopenharmony_ci movxtod %o5, %f2 135e1051a39Sopenharmony_ci srl $rounds, 1, $rounds 136e1051a39Sopenharmony_ci ldd [$key + 32], %f16 137e1051a39Sopenharmony_ci sub $rounds, 1, $rounds 138e1051a39Sopenharmony_ci ldd [$key + 40], %f18 139e1051a39Sopenharmony_ci add $key, 48, $key 140e1051a39Sopenharmony_ci 141e1051a39Sopenharmony_ci.Lenc: 142e1051a39Sopenharmony_ci aes_eround01 %f12, %f0, %f2, %f4 143e1051a39Sopenharmony_ci aes_eround23 %f14, %f0, %f2, %f2 144e1051a39Sopenharmony_ci ldd [$key + 0], %f12 145e1051a39Sopenharmony_ci ldd [$key + 8], %f14 146e1051a39Sopenharmony_ci sub $rounds,1,$rounds 147e1051a39Sopenharmony_ci aes_eround01 %f16, %f4, %f2, %f0 148e1051a39Sopenharmony_ci aes_eround23 %f18, %f4, %f2, %f2 149e1051a39Sopenharmony_ci ldd [$key + 16], %f16 150e1051a39Sopenharmony_ci ldd [$key + 24], %f18 151e1051a39Sopenharmony_ci brnz,pt $rounds, .Lenc 152e1051a39Sopenharmony_ci add $key, 32, $key 153e1051a39Sopenharmony_ci 154e1051a39Sopenharmony_ci andcc $out, 7, $tmp ! is output aligned? 155e1051a39Sopenharmony_ci aes_eround01 %f12, %f0, %f2, %f4 156e1051a39Sopenharmony_ci aes_eround23 %f14, %f0, %f2, %f2 157e1051a39Sopenharmony_ci aes_eround01_l %f16, %f4, %f2, %f0 158e1051a39Sopenharmony_ci aes_eround23_l %f18, %f4, %f2, %f2 159e1051a39Sopenharmony_ci 160e1051a39Sopenharmony_ci bnz,pn %icc, 2f 161e1051a39Sopenharmony_ci nop 162e1051a39Sopenharmony_ci 163e1051a39Sopenharmony_ci std %f0, [$out + 0] 164e1051a39Sopenharmony_ci retl 165e1051a39Sopenharmony_ci std %f2, [$out + 8] 166e1051a39Sopenharmony_ci 167e1051a39Sopenharmony_ci2: alignaddrl $out, %g0, $out 168e1051a39Sopenharmony_ci mov 0xff, $mask 169e1051a39Sopenharmony_ci srl $mask, $tmp, $mask 170e1051a39Sopenharmony_ci 171e1051a39Sopenharmony_ci faligndata %f0, %f0, %f4 172e1051a39Sopenharmony_ci faligndata %f0, %f2, %f6 173e1051a39Sopenharmony_ci faligndata %f2, %f2, %f8 174e1051a39Sopenharmony_ci 175e1051a39Sopenharmony_ci stda %f4, [$out + $mask]0xc0 ! partial store 176e1051a39Sopenharmony_ci std %f6, [$out + 8] 177e1051a39Sopenharmony_ci add $out, 16, $out 178e1051a39Sopenharmony_ci orn %g0, $mask, $mask 179e1051a39Sopenharmony_ci retl 180e1051a39Sopenharmony_ci stda %f8, [$out + $mask]0xc0 ! partial store 181e1051a39Sopenharmony_ci.type aes_t4_encrypt,#function 182e1051a39Sopenharmony_ci.size aes_t4_encrypt,.-aes_t4_encrypt 183e1051a39Sopenharmony_ci 184e1051a39Sopenharmony_ci.globl aes_t4_decrypt 185e1051a39Sopenharmony_ci.align 32 186e1051a39Sopenharmony_ciaes_t4_decrypt: 187e1051a39Sopenharmony_ci andcc $inp, 7, %g1 ! is input aligned? 188e1051a39Sopenharmony_ci andn $inp, 7, $inp 189e1051a39Sopenharmony_ci 190e1051a39Sopenharmony_ci ldx [$key + 0], %g4 191e1051a39Sopenharmony_ci ldx [$key + 8], %g5 192e1051a39Sopenharmony_ci 193e1051a39Sopenharmony_ci ldx [$inp + 0], %o4 194e1051a39Sopenharmony_ci bz,pt %icc, 1f 195e1051a39Sopenharmony_ci ldx [$inp + 8], %o5 196e1051a39Sopenharmony_ci ldx [$inp + 16], $inp 197e1051a39Sopenharmony_ci sll %g1, 3, %g1 198e1051a39Sopenharmony_ci sub %g0, %g1, %o3 199e1051a39Sopenharmony_ci sllx %o4, %g1, %o4 200e1051a39Sopenharmony_ci sllx %o5, %g1, %g1 201e1051a39Sopenharmony_ci srlx %o5, %o3, %o5 202e1051a39Sopenharmony_ci srlx $inp, %o3, %o3 203e1051a39Sopenharmony_ci or %o5, %o4, %o4 204e1051a39Sopenharmony_ci or %o3, %g1, %o5 205e1051a39Sopenharmony_ci1: 206e1051a39Sopenharmony_ci ld [$key + 240], $rounds 207e1051a39Sopenharmony_ci ldd [$key + 16], %f12 208e1051a39Sopenharmony_ci ldd [$key + 24], %f14 209e1051a39Sopenharmony_ci xor %g4, %o4, %o4 210e1051a39Sopenharmony_ci xor %g5, %o5, %o5 211e1051a39Sopenharmony_ci movxtod %o4, %f0 212e1051a39Sopenharmony_ci movxtod %o5, %f2 213e1051a39Sopenharmony_ci srl $rounds, 1, $rounds 214e1051a39Sopenharmony_ci ldd [$key + 32], %f16 215e1051a39Sopenharmony_ci sub $rounds, 1, $rounds 216e1051a39Sopenharmony_ci ldd [$key + 40], %f18 217e1051a39Sopenharmony_ci add $key, 48, $key 218e1051a39Sopenharmony_ci 219e1051a39Sopenharmony_ci.Ldec: 220e1051a39Sopenharmony_ci aes_dround01 %f12, %f0, %f2, %f4 221e1051a39Sopenharmony_ci aes_dround23 %f14, %f0, %f2, %f2 222e1051a39Sopenharmony_ci ldd [$key + 0], %f12 223e1051a39Sopenharmony_ci ldd [$key + 8], %f14 224e1051a39Sopenharmony_ci sub $rounds,1,$rounds 225e1051a39Sopenharmony_ci aes_dround01 %f16, %f4, %f2, %f0 226e1051a39Sopenharmony_ci aes_dround23 %f18, %f4, %f2, %f2 227e1051a39Sopenharmony_ci ldd [$key + 16], %f16 228e1051a39Sopenharmony_ci ldd [$key + 24], %f18 229e1051a39Sopenharmony_ci brnz,pt $rounds, .Ldec 230e1051a39Sopenharmony_ci add $key, 32, $key 231e1051a39Sopenharmony_ci 232e1051a39Sopenharmony_ci andcc $out, 7, $tmp ! is output aligned? 233e1051a39Sopenharmony_ci aes_dround01 %f12, %f0, %f2, %f4 234e1051a39Sopenharmony_ci aes_dround23 %f14, %f0, %f2, %f2 235e1051a39Sopenharmony_ci aes_dround01_l %f16, %f4, %f2, %f0 236e1051a39Sopenharmony_ci aes_dround23_l %f18, %f4, %f2, %f2 237e1051a39Sopenharmony_ci 238e1051a39Sopenharmony_ci bnz,pn %icc, 2f 239e1051a39Sopenharmony_ci nop 240e1051a39Sopenharmony_ci 241e1051a39Sopenharmony_ci std %f0, [$out + 0] 242e1051a39Sopenharmony_ci retl 243e1051a39Sopenharmony_ci std %f2, [$out + 8] 244e1051a39Sopenharmony_ci 245e1051a39Sopenharmony_ci2: alignaddrl $out, %g0, $out 246e1051a39Sopenharmony_ci mov 0xff, $mask 247e1051a39Sopenharmony_ci srl $mask, $tmp, $mask 248e1051a39Sopenharmony_ci 249e1051a39Sopenharmony_ci faligndata %f0, %f0, %f4 250e1051a39Sopenharmony_ci faligndata %f0, %f2, %f6 251e1051a39Sopenharmony_ci faligndata %f2, %f2, %f8 252e1051a39Sopenharmony_ci 253e1051a39Sopenharmony_ci stda %f4, [$out + $mask]0xc0 ! partial store 254e1051a39Sopenharmony_ci std %f6, [$out + 8] 255e1051a39Sopenharmony_ci add $out, 16, $out 256e1051a39Sopenharmony_ci orn %g0, $mask, $mask 257e1051a39Sopenharmony_ci retl 258e1051a39Sopenharmony_ci stda %f8, [$out + $mask]0xc0 ! partial store 259e1051a39Sopenharmony_ci.type aes_t4_decrypt,#function 260e1051a39Sopenharmony_ci.size aes_t4_decrypt,.-aes_t4_decrypt 261e1051a39Sopenharmony_ci___ 262e1051a39Sopenharmony_ci} 263e1051a39Sopenharmony_ci 264e1051a39Sopenharmony_ci###################################################################### 265e1051a39Sopenharmony_ci# key setup subroutines 266e1051a39Sopenharmony_ci# 267e1051a39Sopenharmony_ci{ 268e1051a39Sopenharmony_cimy ($inp,$bits,$out,$tmp)=map("%o$_",(0..5)); 269e1051a39Sopenharmony_ci$code.=<<___; 270e1051a39Sopenharmony_ci.globl aes_t4_set_encrypt_key 271e1051a39Sopenharmony_ci.align 32 272e1051a39Sopenharmony_ciaes_t4_set_encrypt_key: 273e1051a39Sopenharmony_ci.Lset_encrypt_key: 274e1051a39Sopenharmony_ci and $inp, 7, $tmp 275e1051a39Sopenharmony_ci alignaddr $inp, %g0, $inp 276e1051a39Sopenharmony_ci cmp $bits, 192 277e1051a39Sopenharmony_ci ldd [$inp + 0], %f0 278e1051a39Sopenharmony_ci bl,pt %icc,.L128 279e1051a39Sopenharmony_ci ldd [$inp + 8], %f2 280e1051a39Sopenharmony_ci 281e1051a39Sopenharmony_ci be,pt %icc,.L192 282e1051a39Sopenharmony_ci ldd [$inp + 16], %f4 283e1051a39Sopenharmony_ci brz,pt $tmp, .L256aligned 284e1051a39Sopenharmony_ci ldd [$inp + 24], %f6 285e1051a39Sopenharmony_ci 286e1051a39Sopenharmony_ci ldd [$inp + 32], %f8 287e1051a39Sopenharmony_ci faligndata %f0, %f2, %f0 288e1051a39Sopenharmony_ci faligndata %f2, %f4, %f2 289e1051a39Sopenharmony_ci faligndata %f4, %f6, %f4 290e1051a39Sopenharmony_ci faligndata %f6, %f8, %f6 291e1051a39Sopenharmony_ci.L256aligned: 292e1051a39Sopenharmony_ci___ 293e1051a39Sopenharmony_cifor ($i=0; $i<6; $i++) { 294e1051a39Sopenharmony_ci $code.=<<___; 295e1051a39Sopenharmony_ci std %f0, [$out + `32*$i+0`] 296e1051a39Sopenharmony_ci aes_kexpand1 %f0, %f6, $i, %f0 297e1051a39Sopenharmony_ci std %f2, [$out + `32*$i+8`] 298e1051a39Sopenharmony_ci aes_kexpand2 %f2, %f0, %f2 299e1051a39Sopenharmony_ci std %f4, [$out + `32*$i+16`] 300e1051a39Sopenharmony_ci aes_kexpand0 %f4, %f2, %f4 301e1051a39Sopenharmony_ci std %f6, [$out + `32*$i+24`] 302e1051a39Sopenharmony_ci aes_kexpand2 %f6, %f4, %f6 303e1051a39Sopenharmony_ci___ 304e1051a39Sopenharmony_ci} 305e1051a39Sopenharmony_ci$code.=<<___; 306e1051a39Sopenharmony_ci std %f0, [$out + `32*$i+0`] 307e1051a39Sopenharmony_ci aes_kexpand1 %f0, %f6, $i, %f0 308e1051a39Sopenharmony_ci std %f2, [$out + `32*$i+8`] 309e1051a39Sopenharmony_ci aes_kexpand2 %f2, %f0, %f2 310e1051a39Sopenharmony_ci std %f4, [$out + `32*$i+16`] 311e1051a39Sopenharmony_ci std %f6, [$out + `32*$i+24`] 312e1051a39Sopenharmony_ci std %f0, [$out + `32*$i+32`] 313e1051a39Sopenharmony_ci std %f2, [$out + `32*$i+40`] 314e1051a39Sopenharmony_ci 315e1051a39Sopenharmony_ci mov 14, $tmp 316e1051a39Sopenharmony_ci st $tmp, [$out + 240] 317e1051a39Sopenharmony_ci retl 318e1051a39Sopenharmony_ci xor %o0, %o0, %o0 319e1051a39Sopenharmony_ci 320e1051a39Sopenharmony_ci.align 16 321e1051a39Sopenharmony_ci.L192: 322e1051a39Sopenharmony_ci brz,pt $tmp, .L192aligned 323e1051a39Sopenharmony_ci nop 324e1051a39Sopenharmony_ci 325e1051a39Sopenharmony_ci ldd [$inp + 24], %f6 326e1051a39Sopenharmony_ci faligndata %f0, %f2, %f0 327e1051a39Sopenharmony_ci faligndata %f2, %f4, %f2 328e1051a39Sopenharmony_ci faligndata %f4, %f6, %f4 329e1051a39Sopenharmony_ci.L192aligned: 330e1051a39Sopenharmony_ci___ 331e1051a39Sopenharmony_cifor ($i=0; $i<7; $i++) { 332e1051a39Sopenharmony_ci $code.=<<___; 333e1051a39Sopenharmony_ci std %f0, [$out + `24*$i+0`] 334e1051a39Sopenharmony_ci aes_kexpand1 %f0, %f4, $i, %f0 335e1051a39Sopenharmony_ci std %f2, [$out + `24*$i+8`] 336e1051a39Sopenharmony_ci aes_kexpand2 %f2, %f0, %f2 337e1051a39Sopenharmony_ci std %f4, [$out + `24*$i+16`] 338e1051a39Sopenharmony_ci aes_kexpand2 %f4, %f2, %f4 339e1051a39Sopenharmony_ci___ 340e1051a39Sopenharmony_ci} 341e1051a39Sopenharmony_ci$code.=<<___; 342e1051a39Sopenharmony_ci std %f0, [$out + `24*$i+0`] 343e1051a39Sopenharmony_ci aes_kexpand1 %f0, %f4, $i, %f0 344e1051a39Sopenharmony_ci std %f2, [$out + `24*$i+8`] 345e1051a39Sopenharmony_ci aes_kexpand2 %f2, %f0, %f2 346e1051a39Sopenharmony_ci std %f4, [$out + `24*$i+16`] 347e1051a39Sopenharmony_ci std %f0, [$out + `24*$i+24`] 348e1051a39Sopenharmony_ci std %f2, [$out + `24*$i+32`] 349e1051a39Sopenharmony_ci 350e1051a39Sopenharmony_ci mov 12, $tmp 351e1051a39Sopenharmony_ci st $tmp, [$out + 240] 352e1051a39Sopenharmony_ci retl 353e1051a39Sopenharmony_ci xor %o0, %o0, %o0 354e1051a39Sopenharmony_ci 355e1051a39Sopenharmony_ci.align 16 356e1051a39Sopenharmony_ci.L128: 357e1051a39Sopenharmony_ci brz,pt $tmp, .L128aligned 358e1051a39Sopenharmony_ci nop 359e1051a39Sopenharmony_ci 360e1051a39Sopenharmony_ci ldd [$inp + 16], %f4 361e1051a39Sopenharmony_ci faligndata %f0, %f2, %f0 362e1051a39Sopenharmony_ci faligndata %f2, %f4, %f2 363e1051a39Sopenharmony_ci.L128aligned: 364e1051a39Sopenharmony_ci___ 365e1051a39Sopenharmony_cifor ($i=0; $i<10; $i++) { 366e1051a39Sopenharmony_ci $code.=<<___; 367e1051a39Sopenharmony_ci std %f0, [$out + `16*$i+0`] 368e1051a39Sopenharmony_ci aes_kexpand1 %f0, %f2, $i, %f0 369e1051a39Sopenharmony_ci std %f2, [$out + `16*$i+8`] 370e1051a39Sopenharmony_ci aes_kexpand2 %f2, %f0, %f2 371e1051a39Sopenharmony_ci___ 372e1051a39Sopenharmony_ci} 373e1051a39Sopenharmony_ci$code.=<<___; 374e1051a39Sopenharmony_ci std %f0, [$out + `16*$i+0`] 375e1051a39Sopenharmony_ci std %f2, [$out + `16*$i+8`] 376e1051a39Sopenharmony_ci 377e1051a39Sopenharmony_ci mov 10, $tmp 378e1051a39Sopenharmony_ci st $tmp, [$out + 240] 379e1051a39Sopenharmony_ci retl 380e1051a39Sopenharmony_ci xor %o0, %o0, %o0 381e1051a39Sopenharmony_ci.type aes_t4_set_encrypt_key,#function 382e1051a39Sopenharmony_ci.size aes_t4_set_encrypt_key,.-aes_t4_set_encrypt_key 383e1051a39Sopenharmony_ci 384e1051a39Sopenharmony_ci.globl aes_t4_set_decrypt_key 385e1051a39Sopenharmony_ci.align 32 386e1051a39Sopenharmony_ciaes_t4_set_decrypt_key: 387e1051a39Sopenharmony_ci mov %o7, %o5 388e1051a39Sopenharmony_ci call .Lset_encrypt_key 389e1051a39Sopenharmony_ci nop 390e1051a39Sopenharmony_ci 391e1051a39Sopenharmony_ci mov %o5, %o7 392e1051a39Sopenharmony_ci sll $tmp, 4, $inp ! $tmp is number of rounds 393e1051a39Sopenharmony_ci add $tmp, 2, $tmp 394e1051a39Sopenharmony_ci add $out, $inp, $inp ! $inp=$out+16*rounds 395e1051a39Sopenharmony_ci srl $tmp, 2, $tmp ! $tmp=(rounds+2)/4 396e1051a39Sopenharmony_ci 397e1051a39Sopenharmony_ci.Lkey_flip: 398e1051a39Sopenharmony_ci ldd [$out + 0], %f0 399e1051a39Sopenharmony_ci ldd [$out + 8], %f2 400e1051a39Sopenharmony_ci ldd [$out + 16], %f4 401e1051a39Sopenharmony_ci ldd [$out + 24], %f6 402e1051a39Sopenharmony_ci ldd [$inp + 0], %f8 403e1051a39Sopenharmony_ci ldd [$inp + 8], %f10 404e1051a39Sopenharmony_ci ldd [$inp - 16], %f12 405e1051a39Sopenharmony_ci ldd [$inp - 8], %f14 406e1051a39Sopenharmony_ci sub $tmp, 1, $tmp 407e1051a39Sopenharmony_ci std %f0, [$inp + 0] 408e1051a39Sopenharmony_ci std %f2, [$inp + 8] 409e1051a39Sopenharmony_ci std %f4, [$inp - 16] 410e1051a39Sopenharmony_ci std %f6, [$inp - 8] 411e1051a39Sopenharmony_ci std %f8, [$out + 0] 412e1051a39Sopenharmony_ci std %f10, [$out + 8] 413e1051a39Sopenharmony_ci std %f12, [$out + 16] 414e1051a39Sopenharmony_ci std %f14, [$out + 24] 415e1051a39Sopenharmony_ci add $out, 32, $out 416e1051a39Sopenharmony_ci brnz $tmp, .Lkey_flip 417e1051a39Sopenharmony_ci sub $inp, 32, $inp 418e1051a39Sopenharmony_ci 419e1051a39Sopenharmony_ci retl 420e1051a39Sopenharmony_ci xor %o0, %o0, %o0 421e1051a39Sopenharmony_ci.type aes_t4_set_decrypt_key,#function 422e1051a39Sopenharmony_ci.size aes_t4_set_decrypt_key,.-aes_t4_set_decrypt_key 423e1051a39Sopenharmony_ci___ 424e1051a39Sopenharmony_ci} 425e1051a39Sopenharmony_ci 426e1051a39Sopenharmony_ci{{{ 427e1051a39Sopenharmony_cimy ($inp,$out,$len,$key,$ivec,$enc)=map("%i$_",(0..5)); 428e1051a39Sopenharmony_cimy ($ileft,$iright,$ooff,$omask,$ivoff)=map("%l$_",(1..7)); 429e1051a39Sopenharmony_ci 430e1051a39Sopenharmony_ci$code.=<<___; 431e1051a39Sopenharmony_ci.align 32 432e1051a39Sopenharmony_ci_aes128_encrypt_1x: 433e1051a39Sopenharmony_ci___ 434e1051a39Sopenharmony_cifor ($i=0; $i<4; $i++) { 435e1051a39Sopenharmony_ci $code.=<<___; 436e1051a39Sopenharmony_ci aes_eround01 %f`16+8*$i+0`, %f0, %f2, %f4 437e1051a39Sopenharmony_ci aes_eround23 %f`16+8*$i+2`, %f0, %f2, %f2 438e1051a39Sopenharmony_ci aes_eround01 %f`16+8*$i+4`, %f4, %f2, %f0 439e1051a39Sopenharmony_ci aes_eround23 %f`16+8*$i+6`, %f4, %f2, %f2 440e1051a39Sopenharmony_ci___ 441e1051a39Sopenharmony_ci} 442e1051a39Sopenharmony_ci$code.=<<___; 443e1051a39Sopenharmony_ci aes_eround01 %f48, %f0, %f2, %f4 444e1051a39Sopenharmony_ci aes_eround23 %f50, %f0, %f2, %f2 445e1051a39Sopenharmony_ci aes_eround01_l %f52, %f4, %f2, %f0 446e1051a39Sopenharmony_ci retl 447e1051a39Sopenharmony_ci aes_eround23_l %f54, %f4, %f2, %f2 448e1051a39Sopenharmony_ci.type _aes128_encrypt_1x,#function 449e1051a39Sopenharmony_ci.size _aes128_encrypt_1x,.-_aes128_encrypt_1x 450e1051a39Sopenharmony_ci 451e1051a39Sopenharmony_ci.align 32 452e1051a39Sopenharmony_ci_aes128_encrypt_2x: 453e1051a39Sopenharmony_ci___ 454e1051a39Sopenharmony_cifor ($i=0; $i<4; $i++) { 455e1051a39Sopenharmony_ci $code.=<<___; 456e1051a39Sopenharmony_ci aes_eround01 %f`16+8*$i+0`, %f0, %f2, %f8 457e1051a39Sopenharmony_ci aes_eround23 %f`16+8*$i+2`, %f0, %f2, %f2 458e1051a39Sopenharmony_ci aes_eround01 %f`16+8*$i+0`, %f4, %f6, %f10 459e1051a39Sopenharmony_ci aes_eround23 %f`16+8*$i+2`, %f4, %f6, %f6 460e1051a39Sopenharmony_ci aes_eround01 %f`16+8*$i+4`, %f8, %f2, %f0 461e1051a39Sopenharmony_ci aes_eround23 %f`16+8*$i+6`, %f8, %f2, %f2 462e1051a39Sopenharmony_ci aes_eround01 %f`16+8*$i+4`, %f10, %f6, %f4 463e1051a39Sopenharmony_ci aes_eround23 %f`16+8*$i+6`, %f10, %f6, %f6 464e1051a39Sopenharmony_ci___ 465e1051a39Sopenharmony_ci} 466e1051a39Sopenharmony_ci$code.=<<___; 467e1051a39Sopenharmony_ci aes_eround01 %f48, %f0, %f2, %f8 468e1051a39Sopenharmony_ci aes_eround23 %f50, %f0, %f2, %f2 469e1051a39Sopenharmony_ci aes_eround01 %f48, %f4, %f6, %f10 470e1051a39Sopenharmony_ci aes_eround23 %f50, %f4, %f6, %f6 471e1051a39Sopenharmony_ci aes_eround01_l %f52, %f8, %f2, %f0 472e1051a39Sopenharmony_ci aes_eround23_l %f54, %f8, %f2, %f2 473e1051a39Sopenharmony_ci aes_eround01_l %f52, %f10, %f6, %f4 474e1051a39Sopenharmony_ci retl 475e1051a39Sopenharmony_ci aes_eround23_l %f54, %f10, %f6, %f6 476e1051a39Sopenharmony_ci.type _aes128_encrypt_2x,#function 477e1051a39Sopenharmony_ci.size _aes128_encrypt_2x,.-_aes128_encrypt_2x 478e1051a39Sopenharmony_ci 479e1051a39Sopenharmony_ci.align 32 480e1051a39Sopenharmony_ci_aes128_loadkey: 481e1051a39Sopenharmony_ci ldx [$key + 0], %g4 482e1051a39Sopenharmony_ci ldx [$key + 8], %g5 483e1051a39Sopenharmony_ci___ 484e1051a39Sopenharmony_cifor ($i=2; $i<22;$i++) { # load key schedule 485e1051a39Sopenharmony_ci $code.=<<___; 486e1051a39Sopenharmony_ci ldd [$key + `8*$i`], %f`12+2*$i` 487e1051a39Sopenharmony_ci___ 488e1051a39Sopenharmony_ci} 489e1051a39Sopenharmony_ci$code.=<<___; 490e1051a39Sopenharmony_ci retl 491e1051a39Sopenharmony_ci nop 492e1051a39Sopenharmony_ci.type _aes128_loadkey,#function 493e1051a39Sopenharmony_ci.size _aes128_loadkey,.-_aes128_loadkey 494e1051a39Sopenharmony_ci_aes128_load_enckey=_aes128_loadkey 495e1051a39Sopenharmony_ci_aes128_load_deckey=_aes128_loadkey 496e1051a39Sopenharmony_ci 497e1051a39Sopenharmony_ci___ 498e1051a39Sopenharmony_ci 499e1051a39Sopenharmony_ci&alg_cbc_encrypt_implement("aes",128); 500e1051a39Sopenharmony_ciif ($::evp) { 501e1051a39Sopenharmony_ci &alg_ctr32_implement("aes",128); 502e1051a39Sopenharmony_ci &alg_xts_implement("aes",128,"en"); 503e1051a39Sopenharmony_ci &alg_xts_implement("aes",128,"de"); 504e1051a39Sopenharmony_ci} 505e1051a39Sopenharmony_ci&alg_cbc_decrypt_implement("aes",128); 506e1051a39Sopenharmony_ci 507e1051a39Sopenharmony_ci$code.=<<___; 508e1051a39Sopenharmony_ci.align 32 509e1051a39Sopenharmony_ci_aes128_decrypt_1x: 510e1051a39Sopenharmony_ci___ 511e1051a39Sopenharmony_cifor ($i=0; $i<4; $i++) { 512e1051a39Sopenharmony_ci $code.=<<___; 513e1051a39Sopenharmony_ci aes_dround01 %f`16+8*$i+0`, %f0, %f2, %f4 514e1051a39Sopenharmony_ci aes_dround23 %f`16+8*$i+2`, %f0, %f2, %f2 515e1051a39Sopenharmony_ci aes_dround01 %f`16+8*$i+4`, %f4, %f2, %f0 516e1051a39Sopenharmony_ci aes_dround23 %f`16+8*$i+6`, %f4, %f2, %f2 517e1051a39Sopenharmony_ci___ 518e1051a39Sopenharmony_ci} 519e1051a39Sopenharmony_ci$code.=<<___; 520e1051a39Sopenharmony_ci aes_dround01 %f48, %f0, %f2, %f4 521e1051a39Sopenharmony_ci aes_dround23 %f50, %f0, %f2, %f2 522e1051a39Sopenharmony_ci aes_dround01_l %f52, %f4, %f2, %f0 523e1051a39Sopenharmony_ci retl 524e1051a39Sopenharmony_ci aes_dround23_l %f54, %f4, %f2, %f2 525e1051a39Sopenharmony_ci.type _aes128_decrypt_1x,#function 526e1051a39Sopenharmony_ci.size _aes128_decrypt_1x,.-_aes128_decrypt_1x 527e1051a39Sopenharmony_ci 528e1051a39Sopenharmony_ci.align 32 529e1051a39Sopenharmony_ci_aes128_decrypt_2x: 530e1051a39Sopenharmony_ci___ 531e1051a39Sopenharmony_cifor ($i=0; $i<4; $i++) { 532e1051a39Sopenharmony_ci $code.=<<___; 533e1051a39Sopenharmony_ci aes_dround01 %f`16+8*$i+0`, %f0, %f2, %f8 534e1051a39Sopenharmony_ci aes_dround23 %f`16+8*$i+2`, %f0, %f2, %f2 535e1051a39Sopenharmony_ci aes_dround01 %f`16+8*$i+0`, %f4, %f6, %f10 536e1051a39Sopenharmony_ci aes_dround23 %f`16+8*$i+2`, %f4, %f6, %f6 537e1051a39Sopenharmony_ci aes_dround01 %f`16+8*$i+4`, %f8, %f2, %f0 538e1051a39Sopenharmony_ci aes_dround23 %f`16+8*$i+6`, %f8, %f2, %f2 539e1051a39Sopenharmony_ci aes_dround01 %f`16+8*$i+4`, %f10, %f6, %f4 540e1051a39Sopenharmony_ci aes_dround23 %f`16+8*$i+6`, %f10, %f6, %f6 541e1051a39Sopenharmony_ci___ 542e1051a39Sopenharmony_ci} 543e1051a39Sopenharmony_ci$code.=<<___; 544e1051a39Sopenharmony_ci aes_dround01 %f48, %f0, %f2, %f8 545e1051a39Sopenharmony_ci aes_dround23 %f50, %f0, %f2, %f2 546e1051a39Sopenharmony_ci aes_dround01 %f48, %f4, %f6, %f10 547e1051a39Sopenharmony_ci aes_dround23 %f50, %f4, %f6, %f6 548e1051a39Sopenharmony_ci aes_dround01_l %f52, %f8, %f2, %f0 549e1051a39Sopenharmony_ci aes_dround23_l %f54, %f8, %f2, %f2 550e1051a39Sopenharmony_ci aes_dround01_l %f52, %f10, %f6, %f4 551e1051a39Sopenharmony_ci retl 552e1051a39Sopenharmony_ci aes_dround23_l %f54, %f10, %f6, %f6 553e1051a39Sopenharmony_ci.type _aes128_decrypt_2x,#function 554e1051a39Sopenharmony_ci.size _aes128_decrypt_2x,.-_aes128_decrypt_2x 555e1051a39Sopenharmony_ci___ 556e1051a39Sopenharmony_ci 557e1051a39Sopenharmony_ci$code.=<<___; 558e1051a39Sopenharmony_ci.align 32 559e1051a39Sopenharmony_ci_aes192_encrypt_1x: 560e1051a39Sopenharmony_ci___ 561e1051a39Sopenharmony_cifor ($i=0; $i<5; $i++) { 562e1051a39Sopenharmony_ci $code.=<<___; 563e1051a39Sopenharmony_ci aes_eround01 %f`16+8*$i+0`, %f0, %f2, %f4 564e1051a39Sopenharmony_ci aes_eround23 %f`16+8*$i+2`, %f0, %f2, %f2 565e1051a39Sopenharmony_ci aes_eround01 %f`16+8*$i+4`, %f4, %f2, %f0 566e1051a39Sopenharmony_ci aes_eround23 %f`16+8*$i+6`, %f4, %f2, %f2 567e1051a39Sopenharmony_ci___ 568e1051a39Sopenharmony_ci} 569e1051a39Sopenharmony_ci$code.=<<___; 570e1051a39Sopenharmony_ci aes_eround01 %f56, %f0, %f2, %f4 571e1051a39Sopenharmony_ci aes_eround23 %f58, %f0, %f2, %f2 572e1051a39Sopenharmony_ci aes_eround01_l %f60, %f4, %f2, %f0 573e1051a39Sopenharmony_ci retl 574e1051a39Sopenharmony_ci aes_eround23_l %f62, %f4, %f2, %f2 575e1051a39Sopenharmony_ci.type _aes192_encrypt_1x,#function 576e1051a39Sopenharmony_ci.size _aes192_encrypt_1x,.-_aes192_encrypt_1x 577e1051a39Sopenharmony_ci 578e1051a39Sopenharmony_ci.align 32 579e1051a39Sopenharmony_ci_aes192_encrypt_2x: 580e1051a39Sopenharmony_ci___ 581e1051a39Sopenharmony_cifor ($i=0; $i<5; $i++) { 582e1051a39Sopenharmony_ci $code.=<<___; 583e1051a39Sopenharmony_ci aes_eround01 %f`16+8*$i+0`, %f0, %f2, %f8 584e1051a39Sopenharmony_ci aes_eround23 %f`16+8*$i+2`, %f0, %f2, %f2 585e1051a39Sopenharmony_ci aes_eround01 %f`16+8*$i+0`, %f4, %f6, %f10 586e1051a39Sopenharmony_ci aes_eround23 %f`16+8*$i+2`, %f4, %f6, %f6 587e1051a39Sopenharmony_ci aes_eround01 %f`16+8*$i+4`, %f8, %f2, %f0 588e1051a39Sopenharmony_ci aes_eround23 %f`16+8*$i+6`, %f8, %f2, %f2 589e1051a39Sopenharmony_ci aes_eround01 %f`16+8*$i+4`, %f10, %f6, %f4 590e1051a39Sopenharmony_ci aes_eround23 %f`16+8*$i+6`, %f10, %f6, %f6 591e1051a39Sopenharmony_ci___ 592e1051a39Sopenharmony_ci} 593e1051a39Sopenharmony_ci$code.=<<___; 594e1051a39Sopenharmony_ci aes_eround01 %f56, %f0, %f2, %f8 595e1051a39Sopenharmony_ci aes_eround23 %f58, %f0, %f2, %f2 596e1051a39Sopenharmony_ci aes_eround01 %f56, %f4, %f6, %f10 597e1051a39Sopenharmony_ci aes_eround23 %f58, %f4, %f6, %f6 598e1051a39Sopenharmony_ci aes_eround01_l %f60, %f8, %f2, %f0 599e1051a39Sopenharmony_ci aes_eround23_l %f62, %f8, %f2, %f2 600e1051a39Sopenharmony_ci aes_eround01_l %f60, %f10, %f6, %f4 601e1051a39Sopenharmony_ci retl 602e1051a39Sopenharmony_ci aes_eround23_l %f62, %f10, %f6, %f6 603e1051a39Sopenharmony_ci.type _aes192_encrypt_2x,#function 604e1051a39Sopenharmony_ci.size _aes192_encrypt_2x,.-_aes192_encrypt_2x 605e1051a39Sopenharmony_ci 606e1051a39Sopenharmony_ci.align 32 607e1051a39Sopenharmony_ci_aes256_encrypt_1x: 608e1051a39Sopenharmony_ci aes_eround01 %f16, %f0, %f2, %f4 609e1051a39Sopenharmony_ci aes_eround23 %f18, %f0, %f2, %f2 610e1051a39Sopenharmony_ci ldd [$key + 208], %f16 611e1051a39Sopenharmony_ci ldd [$key + 216], %f18 612e1051a39Sopenharmony_ci aes_eround01 %f20, %f4, %f2, %f0 613e1051a39Sopenharmony_ci aes_eround23 %f22, %f4, %f2, %f2 614e1051a39Sopenharmony_ci ldd [$key + 224], %f20 615e1051a39Sopenharmony_ci ldd [$key + 232], %f22 616e1051a39Sopenharmony_ci___ 617e1051a39Sopenharmony_cifor ($i=1; $i<6; $i++) { 618e1051a39Sopenharmony_ci $code.=<<___; 619e1051a39Sopenharmony_ci aes_eround01 %f`16+8*$i+0`, %f0, %f2, %f4 620e1051a39Sopenharmony_ci aes_eround23 %f`16+8*$i+2`, %f0, %f2, %f2 621e1051a39Sopenharmony_ci aes_eround01 %f`16+8*$i+4`, %f4, %f2, %f0 622e1051a39Sopenharmony_ci aes_eround23 %f`16+8*$i+6`, %f4, %f2, %f2 623e1051a39Sopenharmony_ci___ 624e1051a39Sopenharmony_ci} 625e1051a39Sopenharmony_ci$code.=<<___; 626e1051a39Sopenharmony_ci aes_eround01 %f16, %f0, %f2, %f4 627e1051a39Sopenharmony_ci aes_eround23 %f18, %f0, %f2, %f2 628e1051a39Sopenharmony_ci ldd [$key + 16], %f16 629e1051a39Sopenharmony_ci ldd [$key + 24], %f18 630e1051a39Sopenharmony_ci aes_eround01_l %f20, %f4, %f2, %f0 631e1051a39Sopenharmony_ci aes_eround23_l %f22, %f4, %f2, %f2 632e1051a39Sopenharmony_ci ldd [$key + 32], %f20 633e1051a39Sopenharmony_ci retl 634e1051a39Sopenharmony_ci ldd [$key + 40], %f22 635e1051a39Sopenharmony_ci.type _aes256_encrypt_1x,#function 636e1051a39Sopenharmony_ci.size _aes256_encrypt_1x,.-_aes256_encrypt_1x 637e1051a39Sopenharmony_ci 638e1051a39Sopenharmony_ci.align 32 639e1051a39Sopenharmony_ci_aes256_encrypt_2x: 640e1051a39Sopenharmony_ci aes_eround01 %f16, %f0, %f2, %f8 641e1051a39Sopenharmony_ci aes_eround23 %f18, %f0, %f2, %f2 642e1051a39Sopenharmony_ci aes_eround01 %f16, %f4, %f6, %f10 643e1051a39Sopenharmony_ci aes_eround23 %f18, %f4, %f6, %f6 644e1051a39Sopenharmony_ci ldd [$key + 208], %f16 645e1051a39Sopenharmony_ci ldd [$key + 216], %f18 646e1051a39Sopenharmony_ci aes_eround01 %f20, %f8, %f2, %f0 647e1051a39Sopenharmony_ci aes_eround23 %f22, %f8, %f2, %f2 648e1051a39Sopenharmony_ci aes_eround01 %f20, %f10, %f6, %f4 649e1051a39Sopenharmony_ci aes_eround23 %f22, %f10, %f6, %f6 650e1051a39Sopenharmony_ci ldd [$key + 224], %f20 651e1051a39Sopenharmony_ci ldd [$key + 232], %f22 652e1051a39Sopenharmony_ci___ 653e1051a39Sopenharmony_cifor ($i=1; $i<6; $i++) { 654e1051a39Sopenharmony_ci $code.=<<___; 655e1051a39Sopenharmony_ci aes_eround01 %f`16+8*$i+0`, %f0, %f2, %f8 656e1051a39Sopenharmony_ci aes_eround23 %f`16+8*$i+2`, %f0, %f2, %f2 657e1051a39Sopenharmony_ci aes_eround01 %f`16+8*$i+0`, %f4, %f6, %f10 658e1051a39Sopenharmony_ci aes_eround23 %f`16+8*$i+2`, %f4, %f6, %f6 659e1051a39Sopenharmony_ci aes_eround01 %f`16+8*$i+4`, %f8, %f2, %f0 660e1051a39Sopenharmony_ci aes_eround23 %f`16+8*$i+6`, %f8, %f2, %f2 661e1051a39Sopenharmony_ci aes_eround01 %f`16+8*$i+4`, %f10, %f6, %f4 662e1051a39Sopenharmony_ci aes_eround23 %f`16+8*$i+6`, %f10, %f6, %f6 663e1051a39Sopenharmony_ci___ 664e1051a39Sopenharmony_ci} 665e1051a39Sopenharmony_ci$code.=<<___; 666e1051a39Sopenharmony_ci aes_eround01 %f16, %f0, %f2, %f8 667e1051a39Sopenharmony_ci aes_eround23 %f18, %f0, %f2, %f2 668e1051a39Sopenharmony_ci aes_eround01 %f16, %f4, %f6, %f10 669e1051a39Sopenharmony_ci aes_eround23 %f18, %f4, %f6, %f6 670e1051a39Sopenharmony_ci ldd [$key + 16], %f16 671e1051a39Sopenharmony_ci ldd [$key + 24], %f18 672e1051a39Sopenharmony_ci aes_eround01_l %f20, %f8, %f2, %f0 673e1051a39Sopenharmony_ci aes_eround23_l %f22, %f8, %f2, %f2 674e1051a39Sopenharmony_ci aes_eround01_l %f20, %f10, %f6, %f4 675e1051a39Sopenharmony_ci aes_eround23_l %f22, %f10, %f6, %f6 676e1051a39Sopenharmony_ci ldd [$key + 32], %f20 677e1051a39Sopenharmony_ci retl 678e1051a39Sopenharmony_ci ldd [$key + 40], %f22 679e1051a39Sopenharmony_ci.type _aes256_encrypt_2x,#function 680e1051a39Sopenharmony_ci.size _aes256_encrypt_2x,.-_aes256_encrypt_2x 681e1051a39Sopenharmony_ci 682e1051a39Sopenharmony_ci.align 32 683e1051a39Sopenharmony_ci_aes192_loadkey: 684e1051a39Sopenharmony_ci ldx [$key + 0], %g4 685e1051a39Sopenharmony_ci ldx [$key + 8], %g5 686e1051a39Sopenharmony_ci___ 687e1051a39Sopenharmony_cifor ($i=2; $i<26;$i++) { # load key schedule 688e1051a39Sopenharmony_ci $code.=<<___; 689e1051a39Sopenharmony_ci ldd [$key + `8*$i`], %f`12+2*$i` 690e1051a39Sopenharmony_ci___ 691e1051a39Sopenharmony_ci} 692e1051a39Sopenharmony_ci$code.=<<___; 693e1051a39Sopenharmony_ci retl 694e1051a39Sopenharmony_ci nop 695e1051a39Sopenharmony_ci.type _aes192_loadkey,#function 696e1051a39Sopenharmony_ci.size _aes192_loadkey,.-_aes192_loadkey 697e1051a39Sopenharmony_ci_aes256_loadkey=_aes192_loadkey 698e1051a39Sopenharmony_ci_aes192_load_enckey=_aes192_loadkey 699e1051a39Sopenharmony_ci_aes192_load_deckey=_aes192_loadkey 700e1051a39Sopenharmony_ci_aes256_load_enckey=_aes192_loadkey 701e1051a39Sopenharmony_ci_aes256_load_deckey=_aes192_loadkey 702e1051a39Sopenharmony_ci___ 703e1051a39Sopenharmony_ci 704e1051a39Sopenharmony_ci&alg_cbc_encrypt_implement("aes",256); 705e1051a39Sopenharmony_ci&alg_cbc_encrypt_implement("aes",192); 706e1051a39Sopenharmony_ciif ($::evp) { 707e1051a39Sopenharmony_ci &alg_ctr32_implement("aes",256); 708e1051a39Sopenharmony_ci &alg_xts_implement("aes",256,"en"); 709e1051a39Sopenharmony_ci &alg_xts_implement("aes",256,"de"); 710e1051a39Sopenharmony_ci &alg_ctr32_implement("aes",192); 711e1051a39Sopenharmony_ci} 712e1051a39Sopenharmony_ci&alg_cbc_decrypt_implement("aes",192); 713e1051a39Sopenharmony_ci&alg_cbc_decrypt_implement("aes",256); 714e1051a39Sopenharmony_ci 715e1051a39Sopenharmony_ci$code.=<<___; 716e1051a39Sopenharmony_ci.align 32 717e1051a39Sopenharmony_ci_aes256_decrypt_1x: 718e1051a39Sopenharmony_ci aes_dround01 %f16, %f0, %f2, %f4 719e1051a39Sopenharmony_ci aes_dround23 %f18, %f0, %f2, %f2 720e1051a39Sopenharmony_ci ldd [$key + 208], %f16 721e1051a39Sopenharmony_ci ldd [$key + 216], %f18 722e1051a39Sopenharmony_ci aes_dround01 %f20, %f4, %f2, %f0 723e1051a39Sopenharmony_ci aes_dround23 %f22, %f4, %f2, %f2 724e1051a39Sopenharmony_ci ldd [$key + 224], %f20 725e1051a39Sopenharmony_ci ldd [$key + 232], %f22 726e1051a39Sopenharmony_ci___ 727e1051a39Sopenharmony_cifor ($i=1; $i<6; $i++) { 728e1051a39Sopenharmony_ci $code.=<<___; 729e1051a39Sopenharmony_ci aes_dround01 %f`16+8*$i+0`, %f0, %f2, %f4 730e1051a39Sopenharmony_ci aes_dround23 %f`16+8*$i+2`, %f0, %f2, %f2 731e1051a39Sopenharmony_ci aes_dround01 %f`16+8*$i+4`, %f4, %f2, %f0 732e1051a39Sopenharmony_ci aes_dround23 %f`16+8*$i+6`, %f4, %f2, %f2 733e1051a39Sopenharmony_ci___ 734e1051a39Sopenharmony_ci} 735e1051a39Sopenharmony_ci$code.=<<___; 736e1051a39Sopenharmony_ci aes_dround01 %f16, %f0, %f2, %f4 737e1051a39Sopenharmony_ci aes_dround23 %f18, %f0, %f2, %f2 738e1051a39Sopenharmony_ci ldd [$key + 16], %f16 739e1051a39Sopenharmony_ci ldd [$key + 24], %f18 740e1051a39Sopenharmony_ci aes_dround01_l %f20, %f4, %f2, %f0 741e1051a39Sopenharmony_ci aes_dround23_l %f22, %f4, %f2, %f2 742e1051a39Sopenharmony_ci ldd [$key + 32], %f20 743e1051a39Sopenharmony_ci retl 744e1051a39Sopenharmony_ci ldd [$key + 40], %f22 745e1051a39Sopenharmony_ci.type _aes256_decrypt_1x,#function 746e1051a39Sopenharmony_ci.size _aes256_decrypt_1x,.-_aes256_decrypt_1x 747e1051a39Sopenharmony_ci 748e1051a39Sopenharmony_ci.align 32 749e1051a39Sopenharmony_ci_aes256_decrypt_2x: 750e1051a39Sopenharmony_ci aes_dround01 %f16, %f0, %f2, %f8 751e1051a39Sopenharmony_ci aes_dround23 %f18, %f0, %f2, %f2 752e1051a39Sopenharmony_ci aes_dround01 %f16, %f4, %f6, %f10 753e1051a39Sopenharmony_ci aes_dround23 %f18, %f4, %f6, %f6 754e1051a39Sopenharmony_ci ldd [$key + 208], %f16 755e1051a39Sopenharmony_ci ldd [$key + 216], %f18 756e1051a39Sopenharmony_ci aes_dround01 %f20, %f8, %f2, %f0 757e1051a39Sopenharmony_ci aes_dround23 %f22, %f8, %f2, %f2 758e1051a39Sopenharmony_ci aes_dround01 %f20, %f10, %f6, %f4 759e1051a39Sopenharmony_ci aes_dround23 %f22, %f10, %f6, %f6 760e1051a39Sopenharmony_ci ldd [$key + 224], %f20 761e1051a39Sopenharmony_ci ldd [$key + 232], %f22 762e1051a39Sopenharmony_ci___ 763e1051a39Sopenharmony_cifor ($i=1; $i<6; $i++) { 764e1051a39Sopenharmony_ci $code.=<<___; 765e1051a39Sopenharmony_ci aes_dround01 %f`16+8*$i+0`, %f0, %f2, %f8 766e1051a39Sopenharmony_ci aes_dround23 %f`16+8*$i+2`, %f0, %f2, %f2 767e1051a39Sopenharmony_ci aes_dround01 %f`16+8*$i+0`, %f4, %f6, %f10 768e1051a39Sopenharmony_ci aes_dround23 %f`16+8*$i+2`, %f4, %f6, %f6 769e1051a39Sopenharmony_ci aes_dround01 %f`16+8*$i+4`, %f8, %f2, %f0 770e1051a39Sopenharmony_ci aes_dround23 %f`16+8*$i+6`, %f8, %f2, %f2 771e1051a39Sopenharmony_ci aes_dround01 %f`16+8*$i+4`, %f10, %f6, %f4 772e1051a39Sopenharmony_ci aes_dround23 %f`16+8*$i+6`, %f10, %f6, %f6 773e1051a39Sopenharmony_ci___ 774e1051a39Sopenharmony_ci} 775e1051a39Sopenharmony_ci$code.=<<___; 776e1051a39Sopenharmony_ci aes_dround01 %f16, %f0, %f2, %f8 777e1051a39Sopenharmony_ci aes_dround23 %f18, %f0, %f2, %f2 778e1051a39Sopenharmony_ci aes_dround01 %f16, %f4, %f6, %f10 779e1051a39Sopenharmony_ci aes_dround23 %f18, %f4, %f6, %f6 780e1051a39Sopenharmony_ci ldd [$key + 16], %f16 781e1051a39Sopenharmony_ci ldd [$key + 24], %f18 782e1051a39Sopenharmony_ci aes_dround01_l %f20, %f8, %f2, %f0 783e1051a39Sopenharmony_ci aes_dround23_l %f22, %f8, %f2, %f2 784e1051a39Sopenharmony_ci aes_dround01_l %f20, %f10, %f6, %f4 785e1051a39Sopenharmony_ci aes_dround23_l %f22, %f10, %f6, %f6 786e1051a39Sopenharmony_ci ldd [$key + 32], %f20 787e1051a39Sopenharmony_ci retl 788e1051a39Sopenharmony_ci ldd [$key + 40], %f22 789e1051a39Sopenharmony_ci.type _aes256_decrypt_2x,#function 790e1051a39Sopenharmony_ci.size _aes256_decrypt_2x,.-_aes256_decrypt_2x 791e1051a39Sopenharmony_ci 792e1051a39Sopenharmony_ci.align 32 793e1051a39Sopenharmony_ci_aes192_decrypt_1x: 794e1051a39Sopenharmony_ci___ 795e1051a39Sopenharmony_cifor ($i=0; $i<5; $i++) { 796e1051a39Sopenharmony_ci $code.=<<___; 797e1051a39Sopenharmony_ci aes_dround01 %f`16+8*$i+0`, %f0, %f2, %f4 798e1051a39Sopenharmony_ci aes_dround23 %f`16+8*$i+2`, %f0, %f2, %f2 799e1051a39Sopenharmony_ci aes_dround01 %f`16+8*$i+4`, %f4, %f2, %f0 800e1051a39Sopenharmony_ci aes_dround23 %f`16+8*$i+6`, %f4, %f2, %f2 801e1051a39Sopenharmony_ci___ 802e1051a39Sopenharmony_ci} 803e1051a39Sopenharmony_ci$code.=<<___; 804e1051a39Sopenharmony_ci aes_dround01 %f56, %f0, %f2, %f4 805e1051a39Sopenharmony_ci aes_dround23 %f58, %f0, %f2, %f2 806e1051a39Sopenharmony_ci aes_dround01_l %f60, %f4, %f2, %f0 807e1051a39Sopenharmony_ci retl 808e1051a39Sopenharmony_ci aes_dround23_l %f62, %f4, %f2, %f2 809e1051a39Sopenharmony_ci.type _aes192_decrypt_1x,#function 810e1051a39Sopenharmony_ci.size _aes192_decrypt_1x,.-_aes192_decrypt_1x 811e1051a39Sopenharmony_ci 812e1051a39Sopenharmony_ci.align 32 813e1051a39Sopenharmony_ci_aes192_decrypt_2x: 814e1051a39Sopenharmony_ci___ 815e1051a39Sopenharmony_cifor ($i=0; $i<5; $i++) { 816e1051a39Sopenharmony_ci $code.=<<___; 817e1051a39Sopenharmony_ci aes_dround01 %f`16+8*$i+0`, %f0, %f2, %f8 818e1051a39Sopenharmony_ci aes_dround23 %f`16+8*$i+2`, %f0, %f2, %f2 819e1051a39Sopenharmony_ci aes_dround01 %f`16+8*$i+0`, %f4, %f6, %f10 820e1051a39Sopenharmony_ci aes_dround23 %f`16+8*$i+2`, %f4, %f6, %f6 821e1051a39Sopenharmony_ci aes_dround01 %f`16+8*$i+4`, %f8, %f2, %f0 822e1051a39Sopenharmony_ci aes_dround23 %f`16+8*$i+6`, %f8, %f2, %f2 823e1051a39Sopenharmony_ci aes_dround01 %f`16+8*$i+4`, %f10, %f6, %f4 824e1051a39Sopenharmony_ci aes_dround23 %f`16+8*$i+6`, %f10, %f6, %f6 825e1051a39Sopenharmony_ci___ 826e1051a39Sopenharmony_ci} 827e1051a39Sopenharmony_ci$code.=<<___; 828e1051a39Sopenharmony_ci aes_dround01 %f56, %f0, %f2, %f8 829e1051a39Sopenharmony_ci aes_dround23 %f58, %f0, %f2, %f2 830e1051a39Sopenharmony_ci aes_dround01 %f56, %f4, %f6, %f10 831e1051a39Sopenharmony_ci aes_dround23 %f58, %f4, %f6, %f6 832e1051a39Sopenharmony_ci aes_dround01_l %f60, %f8, %f2, %f0 833e1051a39Sopenharmony_ci aes_dround23_l %f62, %f8, %f2, %f2 834e1051a39Sopenharmony_ci aes_dround01_l %f60, %f10, %f6, %f4 835e1051a39Sopenharmony_ci retl 836e1051a39Sopenharmony_ci aes_dround23_l %f62, %f10, %f6, %f6 837e1051a39Sopenharmony_ci.type _aes192_decrypt_2x,#function 838e1051a39Sopenharmony_ci.size _aes192_decrypt_2x,.-_aes192_decrypt_2x 839e1051a39Sopenharmony_ci___ 840e1051a39Sopenharmony_ci}}} 841e1051a39Sopenharmony_ci 842e1051a39Sopenharmony_ciif (!$::evp) { 843e1051a39Sopenharmony_ci$code.=<<___; 844e1051a39Sopenharmony_ci.global AES_encrypt 845e1051a39Sopenharmony_ciAES_encrypt=aes_t4_encrypt 846e1051a39Sopenharmony_ci.global AES_decrypt 847e1051a39Sopenharmony_ciAES_decrypt=aes_t4_decrypt 848e1051a39Sopenharmony_ci.global AES_set_encrypt_key 849e1051a39Sopenharmony_ci.align 32 850e1051a39Sopenharmony_ciAES_set_encrypt_key: 851e1051a39Sopenharmony_ci andcc %o2, 7, %g0 ! check alignment 852e1051a39Sopenharmony_ci bnz,a,pn %icc, 1f 853e1051a39Sopenharmony_ci mov -1, %o0 854e1051a39Sopenharmony_ci brz,a,pn %o0, 1f 855e1051a39Sopenharmony_ci mov -1, %o0 856e1051a39Sopenharmony_ci brz,a,pn %o2, 1f 857e1051a39Sopenharmony_ci mov -1, %o0 858e1051a39Sopenharmony_ci andncc %o1, 0x1c0, %g0 859e1051a39Sopenharmony_ci bnz,a,pn %icc, 1f 860e1051a39Sopenharmony_ci mov -2, %o0 861e1051a39Sopenharmony_ci cmp %o1, 128 862e1051a39Sopenharmony_ci bl,a,pn %icc, 1f 863e1051a39Sopenharmony_ci mov -2, %o0 864e1051a39Sopenharmony_ci b aes_t4_set_encrypt_key 865e1051a39Sopenharmony_ci nop 866e1051a39Sopenharmony_ci1: retl 867e1051a39Sopenharmony_ci nop 868e1051a39Sopenharmony_ci.type AES_set_encrypt_key,#function 869e1051a39Sopenharmony_ci.size AES_set_encrypt_key,.-AES_set_encrypt_key 870e1051a39Sopenharmony_ci 871e1051a39Sopenharmony_ci.global AES_set_decrypt_key 872e1051a39Sopenharmony_ci.align 32 873e1051a39Sopenharmony_ciAES_set_decrypt_key: 874e1051a39Sopenharmony_ci andcc %o2, 7, %g0 ! check alignment 875e1051a39Sopenharmony_ci bnz,a,pn %icc, 1f 876e1051a39Sopenharmony_ci mov -1, %o0 877e1051a39Sopenharmony_ci brz,a,pn %o0, 1f 878e1051a39Sopenharmony_ci mov -1, %o0 879e1051a39Sopenharmony_ci brz,a,pn %o2, 1f 880e1051a39Sopenharmony_ci mov -1, %o0 881e1051a39Sopenharmony_ci andncc %o1, 0x1c0, %g0 882e1051a39Sopenharmony_ci bnz,a,pn %icc, 1f 883e1051a39Sopenharmony_ci mov -2, %o0 884e1051a39Sopenharmony_ci cmp %o1, 128 885e1051a39Sopenharmony_ci bl,a,pn %icc, 1f 886e1051a39Sopenharmony_ci mov -2, %o0 887e1051a39Sopenharmony_ci b aes_t4_set_decrypt_key 888e1051a39Sopenharmony_ci nop 889e1051a39Sopenharmony_ci1: retl 890e1051a39Sopenharmony_ci nop 891e1051a39Sopenharmony_ci.type AES_set_decrypt_key,#function 892e1051a39Sopenharmony_ci.size AES_set_decrypt_key,.-AES_set_decrypt_key 893e1051a39Sopenharmony_ci___ 894e1051a39Sopenharmony_ci 895e1051a39Sopenharmony_cimy ($inp,$out,$len,$key,$ivec,$enc)=map("%o$_",(0..5)); 896e1051a39Sopenharmony_ci 897e1051a39Sopenharmony_ci$code.=<<___; 898e1051a39Sopenharmony_ci.globl AES_cbc_encrypt 899e1051a39Sopenharmony_ci.align 32 900e1051a39Sopenharmony_ciAES_cbc_encrypt: 901e1051a39Sopenharmony_ci ld [$key + 240], %g1 902e1051a39Sopenharmony_ci nop 903e1051a39Sopenharmony_ci brz $enc, .Lcbc_decrypt 904e1051a39Sopenharmony_ci cmp %g1, 12 905e1051a39Sopenharmony_ci 906e1051a39Sopenharmony_ci bl,pt %icc, aes128_t4_cbc_encrypt 907e1051a39Sopenharmony_ci nop 908e1051a39Sopenharmony_ci be,pn %icc, aes192_t4_cbc_encrypt 909e1051a39Sopenharmony_ci nop 910e1051a39Sopenharmony_ci ba aes256_t4_cbc_encrypt 911e1051a39Sopenharmony_ci nop 912e1051a39Sopenharmony_ci 913e1051a39Sopenharmony_ci.Lcbc_decrypt: 914e1051a39Sopenharmony_ci bl,pt %icc, aes128_t4_cbc_decrypt 915e1051a39Sopenharmony_ci nop 916e1051a39Sopenharmony_ci be,pn %icc, aes192_t4_cbc_decrypt 917e1051a39Sopenharmony_ci nop 918e1051a39Sopenharmony_ci ba aes256_t4_cbc_decrypt 919e1051a39Sopenharmony_ci nop 920e1051a39Sopenharmony_ci.type AES_cbc_encrypt,#function 921e1051a39Sopenharmony_ci.size AES_cbc_encrypt,.-AES_cbc_encrypt 922e1051a39Sopenharmony_ci___ 923e1051a39Sopenharmony_ci} 924e1051a39Sopenharmony_ci$code.=<<___; 925e1051a39Sopenharmony_ci.asciz "AES for SPARC T4, David S. Miller, Andy Polyakov" 926e1051a39Sopenharmony_ci.align 4 927e1051a39Sopenharmony_ci___ 928e1051a39Sopenharmony_ci 929e1051a39Sopenharmony_ci&emit_assembler(); 930e1051a39Sopenharmony_ci 931e1051a39Sopenharmony_ciclose STDOUT or die "error closing STDOUT: $!"; 932