1e1051a39Sopenharmony_ci#! /usr/bin/env perl 2e1051a39Sopenharmony_ci# Copyright 2005-2020 The OpenSSL Project Authors. All Rights Reserved. 3e1051a39Sopenharmony_ci# 4e1051a39Sopenharmony_ci# Licensed under the Apache License 2.0 (the "License"). You may not use 5e1051a39Sopenharmony_ci# this file except in compliance with the License. You can obtain a copy 6e1051a39Sopenharmony_ci# in the file LICENSE in the source distribution or at 7e1051a39Sopenharmony_ci# https://www.openssl.org/source/license.html 8e1051a39Sopenharmony_ci 9e1051a39Sopenharmony_ci# 10e1051a39Sopenharmony_ci# ==================================================================== 11e1051a39Sopenharmony_ci# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 12e1051a39Sopenharmony_ci# project. The module is, however, dual licensed under OpenSSL and 13e1051a39Sopenharmony_ci# CRYPTOGAMS licenses depending on where you obtain it. For further 14e1051a39Sopenharmony_ci# details see http://www.openssl.org/~appro/cryptogams/. 15e1051a39Sopenharmony_ci# ==================================================================== 16e1051a39Sopenharmony_ci# 17e1051a39Sopenharmony_ci# Version 2.1. 18e1051a39Sopenharmony_ci# 19e1051a39Sopenharmony_ci# aes-*-cbc benchmarks are improved by >70% [compared to gcc 3.3.2 on 20e1051a39Sopenharmony_ci# Opteron 240 CPU] plus all the bells-n-whistles from 32-bit version 21e1051a39Sopenharmony_ci# [you'll notice a lot of resemblance], such as compressed S-boxes 22e1051a39Sopenharmony_ci# in little-endian byte order, prefetch of these tables in CBC mode, 23e1051a39Sopenharmony_ci# as well as avoiding L1 cache aliasing between stack frame and key 24e1051a39Sopenharmony_ci# schedule and already mentioned tables, compressed Td4... 25e1051a39Sopenharmony_ci# 26e1051a39Sopenharmony_ci# Performance in number of cycles per processed byte for 128-bit key: 27e1051a39Sopenharmony_ci# 28e1051a39Sopenharmony_ci# ECB encrypt ECB decrypt CBC large chunk 29e1051a39Sopenharmony_ci# AMD64 33 43 13.0 30e1051a39Sopenharmony_ci# EM64T 38 56 18.6(*) 31e1051a39Sopenharmony_ci# Core 2 30 42 14.5(*) 32e1051a39Sopenharmony_ci# Atom 65 86 32.1(*) 33e1051a39Sopenharmony_ci# 34e1051a39Sopenharmony_ci# (*) with hyper-threading off 35e1051a39Sopenharmony_ci 36e1051a39Sopenharmony_ci# $output is the last argument if it looks like a file (it has an extension) 37e1051a39Sopenharmony_ci# $flavour is the first argument if it doesn't look like a file 38e1051a39Sopenharmony_ci$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; 39e1051a39Sopenharmony_ci$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; 40e1051a39Sopenharmony_ci 41e1051a39Sopenharmony_ci$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); 42e1051a39Sopenharmony_ci 43e1051a39Sopenharmony_ci$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 44e1051a39Sopenharmony_ci( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or 45e1051a39Sopenharmony_ci( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or 46e1051a39Sopenharmony_cidie "can't locate x86_64-xlate.pl"; 47e1051a39Sopenharmony_ci 48e1051a39Sopenharmony_ciopen OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"" 49e1051a39Sopenharmony_ci or die "can't call $xlate: $!"; 50e1051a39Sopenharmony_ci*STDOUT=*OUT; 51e1051a39Sopenharmony_ci 52e1051a39Sopenharmony_ci$verticalspin=1; # unlike 32-bit version $verticalspin performs 53e1051a39Sopenharmony_ci # ~15% better on both AMD and Intel cores 54e1051a39Sopenharmony_ci$speed_limit=512; # see aes-586.pl for details 55e1051a39Sopenharmony_ci 56e1051a39Sopenharmony_ci$code=".text\n"; 57e1051a39Sopenharmony_ci 58e1051a39Sopenharmony_ci$s0="%eax"; 59e1051a39Sopenharmony_ci$s1="%ebx"; 60e1051a39Sopenharmony_ci$s2="%ecx"; 61e1051a39Sopenharmony_ci$s3="%edx"; 62e1051a39Sopenharmony_ci$acc0="%esi"; $mask80="%rsi"; 63e1051a39Sopenharmony_ci$acc1="%edi"; $maskfe="%rdi"; 64e1051a39Sopenharmony_ci$acc2="%ebp"; $mask1b="%rbp"; 65e1051a39Sopenharmony_ci$inp="%r8"; 66e1051a39Sopenharmony_ci$out="%r9"; 67e1051a39Sopenharmony_ci$t0="%r10d"; 68e1051a39Sopenharmony_ci$t1="%r11d"; 69e1051a39Sopenharmony_ci$t2="%r12d"; 70e1051a39Sopenharmony_ci$rnds="%r13d"; 71e1051a39Sopenharmony_ci$sbox="%r14"; 72e1051a39Sopenharmony_ci$key="%r15"; 73e1051a39Sopenharmony_ci 74e1051a39Sopenharmony_cisub hi() { my $r=shift; $r =~ s/%[er]([a-d])x/%\1h/; $r; } 75e1051a39Sopenharmony_cisub lo() { my $r=shift; $r =~ s/%[er]([a-d])x/%\1l/; 76e1051a39Sopenharmony_ci $r =~ s/%[er]([sd]i)/%\1l/; 77e1051a39Sopenharmony_ci $r =~ s/%(r[0-9]+)[d]?/%\1b/; $r; } 78e1051a39Sopenharmony_cisub LO() { my $r=shift; $r =~ s/%r([a-z]+)/%e\1/; 79e1051a39Sopenharmony_ci $r =~ s/%r([0-9]+)/%r\1d/; $r; } 80e1051a39Sopenharmony_cisub _data_word() 81e1051a39Sopenharmony_ci{ my $i; 82e1051a39Sopenharmony_ci while(defined($i=shift)) { $code.=sprintf".long\t0x%08x,0x%08x\n",$i,$i; } 83e1051a39Sopenharmony_ci} 84e1051a39Sopenharmony_cisub data_word() 85e1051a39Sopenharmony_ci{ my $i; 86e1051a39Sopenharmony_ci my $last=pop(@_); 87e1051a39Sopenharmony_ci $code.=".long\t"; 88e1051a39Sopenharmony_ci while(defined($i=shift)) { $code.=sprintf"0x%08x,",$i; } 89e1051a39Sopenharmony_ci $code.=sprintf"0x%08x\n",$last; 90e1051a39Sopenharmony_ci} 91e1051a39Sopenharmony_ci 92e1051a39Sopenharmony_cisub data_byte() 93e1051a39Sopenharmony_ci{ my $i; 94e1051a39Sopenharmony_ci my $last=pop(@_); 95e1051a39Sopenharmony_ci $code.=".byte\t"; 96e1051a39Sopenharmony_ci while(defined($i=shift)) { $code.=sprintf"0x%02x,",$i&0xff; } 97e1051a39Sopenharmony_ci $code.=sprintf"0x%02x\n",$last&0xff; 98e1051a39Sopenharmony_ci} 99e1051a39Sopenharmony_ci 100e1051a39Sopenharmony_cisub encvert() 101e1051a39Sopenharmony_ci{ my $t3="%r8d"; # zaps $inp! 102e1051a39Sopenharmony_ci 103e1051a39Sopenharmony_ci$code.=<<___; 104e1051a39Sopenharmony_ci # favor 3-way issue Opteron pipeline... 105e1051a39Sopenharmony_ci movzb `&lo("$s0")`,$acc0 106e1051a39Sopenharmony_ci movzb `&lo("$s1")`,$acc1 107e1051a39Sopenharmony_ci movzb `&lo("$s2")`,$acc2 108e1051a39Sopenharmony_ci mov 0($sbox,$acc0,8),$t0 109e1051a39Sopenharmony_ci mov 0($sbox,$acc1,8),$t1 110e1051a39Sopenharmony_ci mov 0($sbox,$acc2,8),$t2 111e1051a39Sopenharmony_ci 112e1051a39Sopenharmony_ci movzb `&hi("$s1")`,$acc0 113e1051a39Sopenharmony_ci movzb `&hi("$s2")`,$acc1 114e1051a39Sopenharmony_ci movzb `&lo("$s3")`,$acc2 115e1051a39Sopenharmony_ci xor 3($sbox,$acc0,8),$t0 116e1051a39Sopenharmony_ci xor 3($sbox,$acc1,8),$t1 117e1051a39Sopenharmony_ci mov 0($sbox,$acc2,8),$t3 118e1051a39Sopenharmony_ci 119e1051a39Sopenharmony_ci movzb `&hi("$s3")`,$acc0 120e1051a39Sopenharmony_ci shr \$16,$s2 121e1051a39Sopenharmony_ci movzb `&hi("$s0")`,$acc2 122e1051a39Sopenharmony_ci xor 3($sbox,$acc0,8),$t2 123e1051a39Sopenharmony_ci shr \$16,$s3 124e1051a39Sopenharmony_ci xor 3($sbox,$acc2,8),$t3 125e1051a39Sopenharmony_ci 126e1051a39Sopenharmony_ci shr \$16,$s1 127e1051a39Sopenharmony_ci lea 16($key),$key 128e1051a39Sopenharmony_ci shr \$16,$s0 129e1051a39Sopenharmony_ci 130e1051a39Sopenharmony_ci movzb `&lo("$s2")`,$acc0 131e1051a39Sopenharmony_ci movzb `&lo("$s3")`,$acc1 132e1051a39Sopenharmony_ci movzb `&lo("$s0")`,$acc2 133e1051a39Sopenharmony_ci xor 2($sbox,$acc0,8),$t0 134e1051a39Sopenharmony_ci xor 2($sbox,$acc1,8),$t1 135e1051a39Sopenharmony_ci xor 2($sbox,$acc2,8),$t2 136e1051a39Sopenharmony_ci 137e1051a39Sopenharmony_ci movzb `&hi("$s3")`,$acc0 138e1051a39Sopenharmony_ci movzb `&hi("$s0")`,$acc1 139e1051a39Sopenharmony_ci movzb `&lo("$s1")`,$acc2 140e1051a39Sopenharmony_ci xor 1($sbox,$acc0,8),$t0 141e1051a39Sopenharmony_ci xor 1($sbox,$acc1,8),$t1 142e1051a39Sopenharmony_ci xor 2($sbox,$acc2,8),$t3 143e1051a39Sopenharmony_ci 144e1051a39Sopenharmony_ci mov 12($key),$s3 145e1051a39Sopenharmony_ci movzb `&hi("$s1")`,$acc1 146e1051a39Sopenharmony_ci movzb `&hi("$s2")`,$acc2 147e1051a39Sopenharmony_ci mov 0($key),$s0 148e1051a39Sopenharmony_ci xor 1($sbox,$acc1,8),$t2 149e1051a39Sopenharmony_ci xor 1($sbox,$acc2,8),$t3 150e1051a39Sopenharmony_ci 151e1051a39Sopenharmony_ci mov 4($key),$s1 152e1051a39Sopenharmony_ci mov 8($key),$s2 153e1051a39Sopenharmony_ci xor $t0,$s0 154e1051a39Sopenharmony_ci xor $t1,$s1 155e1051a39Sopenharmony_ci xor $t2,$s2 156e1051a39Sopenharmony_ci xor $t3,$s3 157e1051a39Sopenharmony_ci___ 158e1051a39Sopenharmony_ci} 159e1051a39Sopenharmony_ci 160e1051a39Sopenharmony_cisub enclastvert() 161e1051a39Sopenharmony_ci{ my $t3="%r8d"; # zaps $inp! 162e1051a39Sopenharmony_ci 163e1051a39Sopenharmony_ci$code.=<<___; 164e1051a39Sopenharmony_ci movzb `&lo("$s0")`,$acc0 165e1051a39Sopenharmony_ci movzb `&lo("$s1")`,$acc1 166e1051a39Sopenharmony_ci movzb `&lo("$s2")`,$acc2 167e1051a39Sopenharmony_ci movzb 2($sbox,$acc0,8),$t0 168e1051a39Sopenharmony_ci movzb 2($sbox,$acc1,8),$t1 169e1051a39Sopenharmony_ci movzb 2($sbox,$acc2,8),$t2 170e1051a39Sopenharmony_ci 171e1051a39Sopenharmony_ci movzb `&lo("$s3")`,$acc0 172e1051a39Sopenharmony_ci movzb `&hi("$s1")`,$acc1 173e1051a39Sopenharmony_ci movzb `&hi("$s2")`,$acc2 174e1051a39Sopenharmony_ci movzb 2($sbox,$acc0,8),$t3 175e1051a39Sopenharmony_ci mov 0($sbox,$acc1,8),$acc1 #$t0 176e1051a39Sopenharmony_ci mov 0($sbox,$acc2,8),$acc2 #$t1 177e1051a39Sopenharmony_ci 178e1051a39Sopenharmony_ci and \$0x0000ff00,$acc1 179e1051a39Sopenharmony_ci and \$0x0000ff00,$acc2 180e1051a39Sopenharmony_ci 181e1051a39Sopenharmony_ci xor $acc1,$t0 182e1051a39Sopenharmony_ci xor $acc2,$t1 183e1051a39Sopenharmony_ci shr \$16,$s2 184e1051a39Sopenharmony_ci 185e1051a39Sopenharmony_ci movzb `&hi("$s3")`,$acc0 186e1051a39Sopenharmony_ci movzb `&hi("$s0")`,$acc1 187e1051a39Sopenharmony_ci shr \$16,$s3 188e1051a39Sopenharmony_ci mov 0($sbox,$acc0,8),$acc0 #$t2 189e1051a39Sopenharmony_ci mov 0($sbox,$acc1,8),$acc1 #$t3 190e1051a39Sopenharmony_ci 191e1051a39Sopenharmony_ci and \$0x0000ff00,$acc0 192e1051a39Sopenharmony_ci and \$0x0000ff00,$acc1 193e1051a39Sopenharmony_ci shr \$16,$s1 194e1051a39Sopenharmony_ci xor $acc0,$t2 195e1051a39Sopenharmony_ci xor $acc1,$t3 196e1051a39Sopenharmony_ci shr \$16,$s0 197e1051a39Sopenharmony_ci 198e1051a39Sopenharmony_ci movzb `&lo("$s2")`,$acc0 199e1051a39Sopenharmony_ci movzb `&lo("$s3")`,$acc1 200e1051a39Sopenharmony_ci movzb `&lo("$s0")`,$acc2 201e1051a39Sopenharmony_ci mov 0($sbox,$acc0,8),$acc0 #$t0 202e1051a39Sopenharmony_ci mov 0($sbox,$acc1,8),$acc1 #$t1 203e1051a39Sopenharmony_ci mov 0($sbox,$acc2,8),$acc2 #$t2 204e1051a39Sopenharmony_ci 205e1051a39Sopenharmony_ci and \$0x00ff0000,$acc0 206e1051a39Sopenharmony_ci and \$0x00ff0000,$acc1 207e1051a39Sopenharmony_ci and \$0x00ff0000,$acc2 208e1051a39Sopenharmony_ci 209e1051a39Sopenharmony_ci xor $acc0,$t0 210e1051a39Sopenharmony_ci xor $acc1,$t1 211e1051a39Sopenharmony_ci xor $acc2,$t2 212e1051a39Sopenharmony_ci 213e1051a39Sopenharmony_ci movzb `&lo("$s1")`,$acc0 214e1051a39Sopenharmony_ci movzb `&hi("$s3")`,$acc1 215e1051a39Sopenharmony_ci movzb `&hi("$s0")`,$acc2 216e1051a39Sopenharmony_ci mov 0($sbox,$acc0,8),$acc0 #$t3 217e1051a39Sopenharmony_ci mov 2($sbox,$acc1,8),$acc1 #$t0 218e1051a39Sopenharmony_ci mov 2($sbox,$acc2,8),$acc2 #$t1 219e1051a39Sopenharmony_ci 220e1051a39Sopenharmony_ci and \$0x00ff0000,$acc0 221e1051a39Sopenharmony_ci and \$0xff000000,$acc1 222e1051a39Sopenharmony_ci and \$0xff000000,$acc2 223e1051a39Sopenharmony_ci 224e1051a39Sopenharmony_ci xor $acc0,$t3 225e1051a39Sopenharmony_ci xor $acc1,$t0 226e1051a39Sopenharmony_ci xor $acc2,$t1 227e1051a39Sopenharmony_ci 228e1051a39Sopenharmony_ci movzb `&hi("$s1")`,$acc0 229e1051a39Sopenharmony_ci movzb `&hi("$s2")`,$acc1 230e1051a39Sopenharmony_ci mov 16+12($key),$s3 231e1051a39Sopenharmony_ci mov 2($sbox,$acc0,8),$acc0 #$t2 232e1051a39Sopenharmony_ci mov 2($sbox,$acc1,8),$acc1 #$t3 233e1051a39Sopenharmony_ci mov 16+0($key),$s0 234e1051a39Sopenharmony_ci 235e1051a39Sopenharmony_ci and \$0xff000000,$acc0 236e1051a39Sopenharmony_ci and \$0xff000000,$acc1 237e1051a39Sopenharmony_ci 238e1051a39Sopenharmony_ci xor $acc0,$t2 239e1051a39Sopenharmony_ci xor $acc1,$t3 240e1051a39Sopenharmony_ci 241e1051a39Sopenharmony_ci mov 16+4($key),$s1 242e1051a39Sopenharmony_ci mov 16+8($key),$s2 243e1051a39Sopenharmony_ci xor $t0,$s0 244e1051a39Sopenharmony_ci xor $t1,$s1 245e1051a39Sopenharmony_ci xor $t2,$s2 246e1051a39Sopenharmony_ci xor $t3,$s3 247e1051a39Sopenharmony_ci___ 248e1051a39Sopenharmony_ci} 249e1051a39Sopenharmony_ci 250e1051a39Sopenharmony_cisub encstep() 251e1051a39Sopenharmony_ci{ my ($i,@s) = @_; 252e1051a39Sopenharmony_ci my $tmp0=$acc0; 253e1051a39Sopenharmony_ci my $tmp1=$acc1; 254e1051a39Sopenharmony_ci my $tmp2=$acc2; 255e1051a39Sopenharmony_ci my $out=($t0,$t1,$t2,$s[0])[$i]; 256e1051a39Sopenharmony_ci 257e1051a39Sopenharmony_ci if ($i==3) { 258e1051a39Sopenharmony_ci $tmp0=$s[1]; 259e1051a39Sopenharmony_ci $tmp1=$s[2]; 260e1051a39Sopenharmony_ci $tmp2=$s[3]; 261e1051a39Sopenharmony_ci } 262e1051a39Sopenharmony_ci $code.=" movzb ".&lo($s[0]).",$out\n"; 263e1051a39Sopenharmony_ci $code.=" mov $s[2],$tmp1\n" if ($i!=3); 264e1051a39Sopenharmony_ci $code.=" lea 16($key),$key\n" if ($i==0); 265e1051a39Sopenharmony_ci 266e1051a39Sopenharmony_ci $code.=" movzb ".&hi($s[1]).",$tmp0\n"; 267e1051a39Sopenharmony_ci $code.=" mov 0($sbox,$out,8),$out\n"; 268e1051a39Sopenharmony_ci 269e1051a39Sopenharmony_ci $code.=" shr \$16,$tmp1\n"; 270e1051a39Sopenharmony_ci $code.=" mov $s[3],$tmp2\n" if ($i!=3); 271e1051a39Sopenharmony_ci $code.=" xor 3($sbox,$tmp0,8),$out\n"; 272e1051a39Sopenharmony_ci 273e1051a39Sopenharmony_ci $code.=" movzb ".&lo($tmp1).",$tmp1\n"; 274e1051a39Sopenharmony_ci $code.=" shr \$24,$tmp2\n"; 275e1051a39Sopenharmony_ci $code.=" xor 4*$i($key),$out\n"; 276e1051a39Sopenharmony_ci 277e1051a39Sopenharmony_ci $code.=" xor 2($sbox,$tmp1,8),$out\n"; 278e1051a39Sopenharmony_ci $code.=" xor 1($sbox,$tmp2,8),$out\n"; 279e1051a39Sopenharmony_ci 280e1051a39Sopenharmony_ci $code.=" mov $t0,$s[1]\n" if ($i==3); 281e1051a39Sopenharmony_ci $code.=" mov $t1,$s[2]\n" if ($i==3); 282e1051a39Sopenharmony_ci $code.=" mov $t2,$s[3]\n" if ($i==3); 283e1051a39Sopenharmony_ci $code.="\n"; 284e1051a39Sopenharmony_ci} 285e1051a39Sopenharmony_ci 286e1051a39Sopenharmony_cisub enclast() 287e1051a39Sopenharmony_ci{ my ($i,@s)=@_; 288e1051a39Sopenharmony_ci my $tmp0=$acc0; 289e1051a39Sopenharmony_ci my $tmp1=$acc1; 290e1051a39Sopenharmony_ci my $tmp2=$acc2; 291e1051a39Sopenharmony_ci my $out=($t0,$t1,$t2,$s[0])[$i]; 292e1051a39Sopenharmony_ci 293e1051a39Sopenharmony_ci if ($i==3) { 294e1051a39Sopenharmony_ci $tmp0=$s[1]; 295e1051a39Sopenharmony_ci $tmp1=$s[2]; 296e1051a39Sopenharmony_ci $tmp2=$s[3]; 297e1051a39Sopenharmony_ci } 298e1051a39Sopenharmony_ci $code.=" movzb ".&lo($s[0]).",$out\n"; 299e1051a39Sopenharmony_ci $code.=" mov $s[2],$tmp1\n" if ($i!=3); 300e1051a39Sopenharmony_ci 301e1051a39Sopenharmony_ci $code.=" mov 2($sbox,$out,8),$out\n"; 302e1051a39Sopenharmony_ci $code.=" shr \$16,$tmp1\n"; 303e1051a39Sopenharmony_ci $code.=" mov $s[3],$tmp2\n" if ($i!=3); 304e1051a39Sopenharmony_ci 305e1051a39Sopenharmony_ci $code.=" and \$0x000000ff,$out\n"; 306e1051a39Sopenharmony_ci $code.=" movzb ".&hi($s[1]).",$tmp0\n"; 307e1051a39Sopenharmony_ci $code.=" movzb ".&lo($tmp1).",$tmp1\n"; 308e1051a39Sopenharmony_ci $code.=" shr \$24,$tmp2\n"; 309e1051a39Sopenharmony_ci 310e1051a39Sopenharmony_ci $code.=" mov 0($sbox,$tmp0,8),$tmp0\n"; 311e1051a39Sopenharmony_ci $code.=" mov 0($sbox,$tmp1,8),$tmp1\n"; 312e1051a39Sopenharmony_ci $code.=" mov 2($sbox,$tmp2,8),$tmp2\n"; 313e1051a39Sopenharmony_ci 314e1051a39Sopenharmony_ci $code.=" and \$0x0000ff00,$tmp0\n"; 315e1051a39Sopenharmony_ci $code.=" and \$0x00ff0000,$tmp1\n"; 316e1051a39Sopenharmony_ci $code.=" and \$0xff000000,$tmp2\n"; 317e1051a39Sopenharmony_ci 318e1051a39Sopenharmony_ci $code.=" xor $tmp0,$out\n"; 319e1051a39Sopenharmony_ci $code.=" mov $t0,$s[1]\n" if ($i==3); 320e1051a39Sopenharmony_ci $code.=" xor $tmp1,$out\n"; 321e1051a39Sopenharmony_ci $code.=" mov $t1,$s[2]\n" if ($i==3); 322e1051a39Sopenharmony_ci $code.=" xor $tmp2,$out\n"; 323e1051a39Sopenharmony_ci $code.=" mov $t2,$s[3]\n" if ($i==3); 324e1051a39Sopenharmony_ci $code.="\n"; 325e1051a39Sopenharmony_ci} 326e1051a39Sopenharmony_ci 327e1051a39Sopenharmony_ci$code.=<<___; 328e1051a39Sopenharmony_ci.type _x86_64_AES_encrypt,\@abi-omnipotent 329e1051a39Sopenharmony_ci.align 16 330e1051a39Sopenharmony_ci_x86_64_AES_encrypt: 331e1051a39Sopenharmony_ci.cfi_startproc 332e1051a39Sopenharmony_ci xor 0($key),$s0 # xor with key 333e1051a39Sopenharmony_ci xor 4($key),$s1 334e1051a39Sopenharmony_ci xor 8($key),$s2 335e1051a39Sopenharmony_ci xor 12($key),$s3 336e1051a39Sopenharmony_ci 337e1051a39Sopenharmony_ci mov 240($key),$rnds # load key->rounds 338e1051a39Sopenharmony_ci sub \$1,$rnds 339e1051a39Sopenharmony_ci jmp .Lenc_loop 340e1051a39Sopenharmony_ci.align 16 341e1051a39Sopenharmony_ci.Lenc_loop: 342e1051a39Sopenharmony_ci___ 343e1051a39Sopenharmony_ci if ($verticalspin) { &encvert(); } 344e1051a39Sopenharmony_ci else { &encstep(0,$s0,$s1,$s2,$s3); 345e1051a39Sopenharmony_ci &encstep(1,$s1,$s2,$s3,$s0); 346e1051a39Sopenharmony_ci &encstep(2,$s2,$s3,$s0,$s1); 347e1051a39Sopenharmony_ci &encstep(3,$s3,$s0,$s1,$s2); 348e1051a39Sopenharmony_ci } 349e1051a39Sopenharmony_ci$code.=<<___; 350e1051a39Sopenharmony_ci sub \$1,$rnds 351e1051a39Sopenharmony_ci jnz .Lenc_loop 352e1051a39Sopenharmony_ci___ 353e1051a39Sopenharmony_ci if ($verticalspin) { &enclastvert(); } 354e1051a39Sopenharmony_ci else { &enclast(0,$s0,$s1,$s2,$s3); 355e1051a39Sopenharmony_ci &enclast(1,$s1,$s2,$s3,$s0); 356e1051a39Sopenharmony_ci &enclast(2,$s2,$s3,$s0,$s1); 357e1051a39Sopenharmony_ci &enclast(3,$s3,$s0,$s1,$s2); 358e1051a39Sopenharmony_ci $code.=<<___; 359e1051a39Sopenharmony_ci xor 16+0($key),$s0 # xor with key 360e1051a39Sopenharmony_ci xor 16+4($key),$s1 361e1051a39Sopenharmony_ci xor 16+8($key),$s2 362e1051a39Sopenharmony_ci xor 16+12($key),$s3 363e1051a39Sopenharmony_ci___ 364e1051a39Sopenharmony_ci } 365e1051a39Sopenharmony_ci$code.=<<___; 366e1051a39Sopenharmony_ci .byte 0xf3,0xc3 # rep ret 367e1051a39Sopenharmony_ci.cfi_endproc 368e1051a39Sopenharmony_ci.size _x86_64_AES_encrypt,.-_x86_64_AES_encrypt 369e1051a39Sopenharmony_ci___ 370e1051a39Sopenharmony_ci 371e1051a39Sopenharmony_ci# it's possible to implement this by shifting tN by 8, filling least 372e1051a39Sopenharmony_ci# significant byte with byte load and finally bswap-ing at the end, 373e1051a39Sopenharmony_ci# but such partial register load kills Core 2... 374e1051a39Sopenharmony_cisub enccompactvert() 375e1051a39Sopenharmony_ci{ my ($t3,$t4,$t5)=("%r8d","%r9d","%r13d"); 376e1051a39Sopenharmony_ci 377e1051a39Sopenharmony_ci$code.=<<___; 378e1051a39Sopenharmony_ci movzb `&lo("$s0")`,$t0 379e1051a39Sopenharmony_ci movzb `&lo("$s1")`,$t1 380e1051a39Sopenharmony_ci movzb `&lo("$s2")`,$t2 381e1051a39Sopenharmony_ci movzb `&lo("$s3")`,$t3 382e1051a39Sopenharmony_ci movzb `&hi("$s1")`,$acc0 383e1051a39Sopenharmony_ci movzb `&hi("$s2")`,$acc1 384e1051a39Sopenharmony_ci shr \$16,$s2 385e1051a39Sopenharmony_ci movzb `&hi("$s3")`,$acc2 386e1051a39Sopenharmony_ci movzb ($sbox,$t0,1),$t0 387e1051a39Sopenharmony_ci movzb ($sbox,$t1,1),$t1 388e1051a39Sopenharmony_ci movzb ($sbox,$t2,1),$t2 389e1051a39Sopenharmony_ci movzb ($sbox,$t3,1),$t3 390e1051a39Sopenharmony_ci 391e1051a39Sopenharmony_ci movzb ($sbox,$acc0,1),$t4 #$t0 392e1051a39Sopenharmony_ci movzb `&hi("$s0")`,$acc0 393e1051a39Sopenharmony_ci movzb ($sbox,$acc1,1),$t5 #$t1 394e1051a39Sopenharmony_ci movzb `&lo("$s2")`,$acc1 395e1051a39Sopenharmony_ci movzb ($sbox,$acc2,1),$acc2 #$t2 396e1051a39Sopenharmony_ci movzb ($sbox,$acc0,1),$acc0 #$t3 397e1051a39Sopenharmony_ci 398e1051a39Sopenharmony_ci shl \$8,$t4 399e1051a39Sopenharmony_ci shr \$16,$s3 400e1051a39Sopenharmony_ci shl \$8,$t5 401e1051a39Sopenharmony_ci xor $t4,$t0 402e1051a39Sopenharmony_ci shr \$16,$s0 403e1051a39Sopenharmony_ci movzb `&lo("$s3")`,$t4 404e1051a39Sopenharmony_ci shr \$16,$s1 405e1051a39Sopenharmony_ci xor $t5,$t1 406e1051a39Sopenharmony_ci shl \$8,$acc2 407e1051a39Sopenharmony_ci movzb `&lo("$s0")`,$t5 408e1051a39Sopenharmony_ci movzb ($sbox,$acc1,1),$acc1 #$t0 409e1051a39Sopenharmony_ci xor $acc2,$t2 410e1051a39Sopenharmony_ci 411e1051a39Sopenharmony_ci shl \$8,$acc0 412e1051a39Sopenharmony_ci movzb `&lo("$s1")`,$acc2 413e1051a39Sopenharmony_ci shl \$16,$acc1 414e1051a39Sopenharmony_ci xor $acc0,$t3 415e1051a39Sopenharmony_ci movzb ($sbox,$t4,1),$t4 #$t1 416e1051a39Sopenharmony_ci movzb `&hi("$s3")`,$acc0 417e1051a39Sopenharmony_ci movzb ($sbox,$t5,1),$t5 #$t2 418e1051a39Sopenharmony_ci xor $acc1,$t0 419e1051a39Sopenharmony_ci 420e1051a39Sopenharmony_ci shr \$8,$s2 421e1051a39Sopenharmony_ci movzb `&hi("$s0")`,$acc1 422e1051a39Sopenharmony_ci shl \$16,$t4 423e1051a39Sopenharmony_ci shr \$8,$s1 424e1051a39Sopenharmony_ci shl \$16,$t5 425e1051a39Sopenharmony_ci xor $t4,$t1 426e1051a39Sopenharmony_ci movzb ($sbox,$acc2,1),$acc2 #$t3 427e1051a39Sopenharmony_ci movzb ($sbox,$acc0,1),$acc0 #$t0 428e1051a39Sopenharmony_ci movzb ($sbox,$acc1,1),$acc1 #$t1 429e1051a39Sopenharmony_ci movzb ($sbox,$s2,1),$s3 #$t3 430e1051a39Sopenharmony_ci movzb ($sbox,$s1,1),$s2 #$t2 431e1051a39Sopenharmony_ci 432e1051a39Sopenharmony_ci shl \$16,$acc2 433e1051a39Sopenharmony_ci xor $t5,$t2 434e1051a39Sopenharmony_ci shl \$24,$acc0 435e1051a39Sopenharmony_ci xor $acc2,$t3 436e1051a39Sopenharmony_ci shl \$24,$acc1 437e1051a39Sopenharmony_ci xor $acc0,$t0 438e1051a39Sopenharmony_ci shl \$24,$s3 439e1051a39Sopenharmony_ci xor $acc1,$t1 440e1051a39Sopenharmony_ci shl \$24,$s2 441e1051a39Sopenharmony_ci mov $t0,$s0 442e1051a39Sopenharmony_ci mov $t1,$s1 443e1051a39Sopenharmony_ci xor $t2,$s2 444e1051a39Sopenharmony_ci xor $t3,$s3 445e1051a39Sopenharmony_ci___ 446e1051a39Sopenharmony_ci} 447e1051a39Sopenharmony_ci 448e1051a39Sopenharmony_cisub enctransform_ref() 449e1051a39Sopenharmony_ci{ my $sn = shift; 450e1051a39Sopenharmony_ci my ($acc,$r2,$tmp)=("%r8d","%r9d","%r13d"); 451e1051a39Sopenharmony_ci 452e1051a39Sopenharmony_ci$code.=<<___; 453e1051a39Sopenharmony_ci mov $sn,$acc 454e1051a39Sopenharmony_ci and \$0x80808080,$acc 455e1051a39Sopenharmony_ci mov $acc,$tmp 456e1051a39Sopenharmony_ci shr \$7,$tmp 457e1051a39Sopenharmony_ci lea ($sn,$sn),$r2 458e1051a39Sopenharmony_ci sub $tmp,$acc 459e1051a39Sopenharmony_ci and \$0xfefefefe,$r2 460e1051a39Sopenharmony_ci and \$0x1b1b1b1b,$acc 461e1051a39Sopenharmony_ci mov $sn,$tmp 462e1051a39Sopenharmony_ci xor $acc,$r2 463e1051a39Sopenharmony_ci 464e1051a39Sopenharmony_ci xor $r2,$sn 465e1051a39Sopenharmony_ci rol \$24,$sn 466e1051a39Sopenharmony_ci xor $r2,$sn 467e1051a39Sopenharmony_ci ror \$16,$tmp 468e1051a39Sopenharmony_ci xor $tmp,$sn 469e1051a39Sopenharmony_ci ror \$8,$tmp 470e1051a39Sopenharmony_ci xor $tmp,$sn 471e1051a39Sopenharmony_ci___ 472e1051a39Sopenharmony_ci} 473e1051a39Sopenharmony_ci 474e1051a39Sopenharmony_ci# unlike decrypt case it does not pay off to parallelize enctransform 475e1051a39Sopenharmony_cisub enctransform() 476e1051a39Sopenharmony_ci{ my ($t3,$r20,$r21)=($acc2,"%r8d","%r9d"); 477e1051a39Sopenharmony_ci 478e1051a39Sopenharmony_ci$code.=<<___; 479e1051a39Sopenharmony_ci mov \$0x80808080,$t0 480e1051a39Sopenharmony_ci mov \$0x80808080,$t1 481e1051a39Sopenharmony_ci and $s0,$t0 482e1051a39Sopenharmony_ci and $s1,$t1 483e1051a39Sopenharmony_ci mov $t0,$acc0 484e1051a39Sopenharmony_ci mov $t1,$acc1 485e1051a39Sopenharmony_ci shr \$7,$t0 486e1051a39Sopenharmony_ci lea ($s0,$s0),$r20 487e1051a39Sopenharmony_ci shr \$7,$t1 488e1051a39Sopenharmony_ci lea ($s1,$s1),$r21 489e1051a39Sopenharmony_ci sub $t0,$acc0 490e1051a39Sopenharmony_ci sub $t1,$acc1 491e1051a39Sopenharmony_ci and \$0xfefefefe,$r20 492e1051a39Sopenharmony_ci and \$0xfefefefe,$r21 493e1051a39Sopenharmony_ci and \$0x1b1b1b1b,$acc0 494e1051a39Sopenharmony_ci and \$0x1b1b1b1b,$acc1 495e1051a39Sopenharmony_ci mov $s0,$t0 496e1051a39Sopenharmony_ci mov $s1,$t1 497e1051a39Sopenharmony_ci xor $acc0,$r20 498e1051a39Sopenharmony_ci xor $acc1,$r21 499e1051a39Sopenharmony_ci 500e1051a39Sopenharmony_ci xor $r20,$s0 501e1051a39Sopenharmony_ci xor $r21,$s1 502e1051a39Sopenharmony_ci mov \$0x80808080,$t2 503e1051a39Sopenharmony_ci rol \$24,$s0 504e1051a39Sopenharmony_ci mov \$0x80808080,$t3 505e1051a39Sopenharmony_ci rol \$24,$s1 506e1051a39Sopenharmony_ci and $s2,$t2 507e1051a39Sopenharmony_ci and $s3,$t3 508e1051a39Sopenharmony_ci xor $r20,$s0 509e1051a39Sopenharmony_ci xor $r21,$s1 510e1051a39Sopenharmony_ci mov $t2,$acc0 511e1051a39Sopenharmony_ci ror \$16,$t0 512e1051a39Sopenharmony_ci mov $t3,$acc1 513e1051a39Sopenharmony_ci ror \$16,$t1 514e1051a39Sopenharmony_ci lea ($s2,$s2),$r20 515e1051a39Sopenharmony_ci shr \$7,$t2 516e1051a39Sopenharmony_ci xor $t0,$s0 517e1051a39Sopenharmony_ci shr \$7,$t3 518e1051a39Sopenharmony_ci xor $t1,$s1 519e1051a39Sopenharmony_ci ror \$8,$t0 520e1051a39Sopenharmony_ci lea ($s3,$s3),$r21 521e1051a39Sopenharmony_ci ror \$8,$t1 522e1051a39Sopenharmony_ci sub $t2,$acc0 523e1051a39Sopenharmony_ci sub $t3,$acc1 524e1051a39Sopenharmony_ci xor $t0,$s0 525e1051a39Sopenharmony_ci xor $t1,$s1 526e1051a39Sopenharmony_ci 527e1051a39Sopenharmony_ci and \$0xfefefefe,$r20 528e1051a39Sopenharmony_ci and \$0xfefefefe,$r21 529e1051a39Sopenharmony_ci and \$0x1b1b1b1b,$acc0 530e1051a39Sopenharmony_ci and \$0x1b1b1b1b,$acc1 531e1051a39Sopenharmony_ci mov $s2,$t2 532e1051a39Sopenharmony_ci mov $s3,$t3 533e1051a39Sopenharmony_ci xor $acc0,$r20 534e1051a39Sopenharmony_ci xor $acc1,$r21 535e1051a39Sopenharmony_ci 536e1051a39Sopenharmony_ci ror \$16,$t2 537e1051a39Sopenharmony_ci xor $r20,$s2 538e1051a39Sopenharmony_ci ror \$16,$t3 539e1051a39Sopenharmony_ci xor $r21,$s3 540e1051a39Sopenharmony_ci rol \$24,$s2 541e1051a39Sopenharmony_ci mov 0($sbox),$acc0 # prefetch Te4 542e1051a39Sopenharmony_ci rol \$24,$s3 543e1051a39Sopenharmony_ci xor $r20,$s2 544e1051a39Sopenharmony_ci mov 64($sbox),$acc1 545e1051a39Sopenharmony_ci xor $r21,$s3 546e1051a39Sopenharmony_ci mov 128($sbox),$r20 547e1051a39Sopenharmony_ci xor $t2,$s2 548e1051a39Sopenharmony_ci ror \$8,$t2 549e1051a39Sopenharmony_ci xor $t3,$s3 550e1051a39Sopenharmony_ci ror \$8,$t3 551e1051a39Sopenharmony_ci xor $t2,$s2 552e1051a39Sopenharmony_ci mov 192($sbox),$r21 553e1051a39Sopenharmony_ci xor $t3,$s3 554e1051a39Sopenharmony_ci___ 555e1051a39Sopenharmony_ci} 556e1051a39Sopenharmony_ci 557e1051a39Sopenharmony_ci$code.=<<___; 558e1051a39Sopenharmony_ci.type _x86_64_AES_encrypt_compact,\@abi-omnipotent 559e1051a39Sopenharmony_ci.align 16 560e1051a39Sopenharmony_ci_x86_64_AES_encrypt_compact: 561e1051a39Sopenharmony_ci.cfi_startproc 562e1051a39Sopenharmony_ci lea 128($sbox),$inp # size optimization 563e1051a39Sopenharmony_ci mov 0-128($inp),$acc1 # prefetch Te4 564e1051a39Sopenharmony_ci mov 32-128($inp),$acc2 565e1051a39Sopenharmony_ci mov 64-128($inp),$t0 566e1051a39Sopenharmony_ci mov 96-128($inp),$t1 567e1051a39Sopenharmony_ci mov 128-128($inp),$acc1 568e1051a39Sopenharmony_ci mov 160-128($inp),$acc2 569e1051a39Sopenharmony_ci mov 192-128($inp),$t0 570e1051a39Sopenharmony_ci mov 224-128($inp),$t1 571e1051a39Sopenharmony_ci jmp .Lenc_loop_compact 572e1051a39Sopenharmony_ci.align 16 573e1051a39Sopenharmony_ci.Lenc_loop_compact: 574e1051a39Sopenharmony_ci xor 0($key),$s0 # xor with key 575e1051a39Sopenharmony_ci xor 4($key),$s1 576e1051a39Sopenharmony_ci xor 8($key),$s2 577e1051a39Sopenharmony_ci xor 12($key),$s3 578e1051a39Sopenharmony_ci lea 16($key),$key 579e1051a39Sopenharmony_ci___ 580e1051a39Sopenharmony_ci &enccompactvert(); 581e1051a39Sopenharmony_ci$code.=<<___; 582e1051a39Sopenharmony_ci cmp 16(%rsp),$key 583e1051a39Sopenharmony_ci je .Lenc_compact_done 584e1051a39Sopenharmony_ci___ 585e1051a39Sopenharmony_ci &enctransform(); 586e1051a39Sopenharmony_ci$code.=<<___; 587e1051a39Sopenharmony_ci jmp .Lenc_loop_compact 588e1051a39Sopenharmony_ci.align 16 589e1051a39Sopenharmony_ci.Lenc_compact_done: 590e1051a39Sopenharmony_ci xor 0($key),$s0 591e1051a39Sopenharmony_ci xor 4($key),$s1 592e1051a39Sopenharmony_ci xor 8($key),$s2 593e1051a39Sopenharmony_ci xor 12($key),$s3 594e1051a39Sopenharmony_ci .byte 0xf3,0xc3 # rep ret 595e1051a39Sopenharmony_ci.cfi_endproc 596e1051a39Sopenharmony_ci.size _x86_64_AES_encrypt_compact,.-_x86_64_AES_encrypt_compact 597e1051a39Sopenharmony_ci___ 598e1051a39Sopenharmony_ci 599e1051a39Sopenharmony_ci# void AES_encrypt (const void *inp,void *out,const AES_KEY *key); 600e1051a39Sopenharmony_ci$code.=<<___; 601e1051a39Sopenharmony_ci.globl AES_encrypt 602e1051a39Sopenharmony_ci.type AES_encrypt,\@function,3 603e1051a39Sopenharmony_ci.align 16 604e1051a39Sopenharmony_ci.globl asm_AES_encrypt 605e1051a39Sopenharmony_ci.hidden asm_AES_encrypt 606e1051a39Sopenharmony_ciasm_AES_encrypt: 607e1051a39Sopenharmony_ciAES_encrypt: 608e1051a39Sopenharmony_ci.cfi_startproc 609e1051a39Sopenharmony_ci endbranch 610e1051a39Sopenharmony_ci mov %rsp,%rax 611e1051a39Sopenharmony_ci.cfi_def_cfa_register %rax 612e1051a39Sopenharmony_ci push %rbx 613e1051a39Sopenharmony_ci.cfi_push %rbx 614e1051a39Sopenharmony_ci push %rbp 615e1051a39Sopenharmony_ci.cfi_push %rbp 616e1051a39Sopenharmony_ci push %r12 617e1051a39Sopenharmony_ci.cfi_push %r12 618e1051a39Sopenharmony_ci push %r13 619e1051a39Sopenharmony_ci.cfi_push %r13 620e1051a39Sopenharmony_ci push %r14 621e1051a39Sopenharmony_ci.cfi_push %r14 622e1051a39Sopenharmony_ci push %r15 623e1051a39Sopenharmony_ci.cfi_push %r15 624e1051a39Sopenharmony_ci 625e1051a39Sopenharmony_ci # allocate frame "above" key schedule 626e1051a39Sopenharmony_ci lea -63(%rdx),%rcx # %rdx is key argument 627e1051a39Sopenharmony_ci and \$-64,%rsp 628e1051a39Sopenharmony_ci sub %rsp,%rcx 629e1051a39Sopenharmony_ci neg %rcx 630e1051a39Sopenharmony_ci and \$0x3c0,%rcx 631e1051a39Sopenharmony_ci sub %rcx,%rsp 632e1051a39Sopenharmony_ci sub \$32,%rsp 633e1051a39Sopenharmony_ci 634e1051a39Sopenharmony_ci mov %rsi,16(%rsp) # save out 635e1051a39Sopenharmony_ci mov %rax,24(%rsp) # save original stack pointer 636e1051a39Sopenharmony_ci.cfi_cfa_expression %rsp+24,deref,+8 637e1051a39Sopenharmony_ci.Lenc_prologue: 638e1051a39Sopenharmony_ci 639e1051a39Sopenharmony_ci mov %rdx,$key 640e1051a39Sopenharmony_ci mov 240($key),$rnds # load rounds 641e1051a39Sopenharmony_ci 642e1051a39Sopenharmony_ci mov 0(%rdi),$s0 # load input vector 643e1051a39Sopenharmony_ci mov 4(%rdi),$s1 644e1051a39Sopenharmony_ci mov 8(%rdi),$s2 645e1051a39Sopenharmony_ci mov 12(%rdi),$s3 646e1051a39Sopenharmony_ci 647e1051a39Sopenharmony_ci shl \$4,$rnds 648e1051a39Sopenharmony_ci lea ($key,$rnds),%rbp 649e1051a39Sopenharmony_ci mov $key,(%rsp) # key schedule 650e1051a39Sopenharmony_ci mov %rbp,8(%rsp) # end of key schedule 651e1051a39Sopenharmony_ci 652e1051a39Sopenharmony_ci # pick Te4 copy which can't "overlap" with stack frame or key schedule 653e1051a39Sopenharmony_ci lea .LAES_Te+2048(%rip),$sbox 654e1051a39Sopenharmony_ci lea 768(%rsp),%rbp 655e1051a39Sopenharmony_ci sub $sbox,%rbp 656e1051a39Sopenharmony_ci and \$0x300,%rbp 657e1051a39Sopenharmony_ci lea ($sbox,%rbp),$sbox 658e1051a39Sopenharmony_ci 659e1051a39Sopenharmony_ci call _x86_64_AES_encrypt_compact 660e1051a39Sopenharmony_ci 661e1051a39Sopenharmony_ci mov 16(%rsp),$out # restore out 662e1051a39Sopenharmony_ci mov 24(%rsp),%rsi # restore saved stack pointer 663e1051a39Sopenharmony_ci.cfi_def_cfa %rsi,8 664e1051a39Sopenharmony_ci mov $s0,0($out) # write output vector 665e1051a39Sopenharmony_ci mov $s1,4($out) 666e1051a39Sopenharmony_ci mov $s2,8($out) 667e1051a39Sopenharmony_ci mov $s3,12($out) 668e1051a39Sopenharmony_ci 669e1051a39Sopenharmony_ci mov -48(%rsi),%r15 670e1051a39Sopenharmony_ci.cfi_restore %r15 671e1051a39Sopenharmony_ci mov -40(%rsi),%r14 672e1051a39Sopenharmony_ci.cfi_restore %r14 673e1051a39Sopenharmony_ci mov -32(%rsi),%r13 674e1051a39Sopenharmony_ci.cfi_restore %r13 675e1051a39Sopenharmony_ci mov -24(%rsi),%r12 676e1051a39Sopenharmony_ci.cfi_restore %r12 677e1051a39Sopenharmony_ci mov -16(%rsi),%rbp 678e1051a39Sopenharmony_ci.cfi_restore %rbp 679e1051a39Sopenharmony_ci mov -8(%rsi),%rbx 680e1051a39Sopenharmony_ci.cfi_restore %rbx 681e1051a39Sopenharmony_ci lea (%rsi),%rsp 682e1051a39Sopenharmony_ci.cfi_def_cfa_register %rsp 683e1051a39Sopenharmony_ci.Lenc_epilogue: 684e1051a39Sopenharmony_ci ret 685e1051a39Sopenharmony_ci.cfi_endproc 686e1051a39Sopenharmony_ci.size AES_encrypt,.-AES_encrypt 687e1051a39Sopenharmony_ci___ 688e1051a39Sopenharmony_ci 689e1051a39Sopenharmony_ci#------------------------------------------------------------------# 690e1051a39Sopenharmony_ci 691e1051a39Sopenharmony_cisub decvert() 692e1051a39Sopenharmony_ci{ my $t3="%r8d"; # zaps $inp! 693e1051a39Sopenharmony_ci 694e1051a39Sopenharmony_ci$code.=<<___; 695e1051a39Sopenharmony_ci # favor 3-way issue Opteron pipeline... 696e1051a39Sopenharmony_ci movzb `&lo("$s0")`,$acc0 697e1051a39Sopenharmony_ci movzb `&lo("$s1")`,$acc1 698e1051a39Sopenharmony_ci movzb `&lo("$s2")`,$acc2 699e1051a39Sopenharmony_ci mov 0($sbox,$acc0,8),$t0 700e1051a39Sopenharmony_ci mov 0($sbox,$acc1,8),$t1 701e1051a39Sopenharmony_ci mov 0($sbox,$acc2,8),$t2 702e1051a39Sopenharmony_ci 703e1051a39Sopenharmony_ci movzb `&hi("$s3")`,$acc0 704e1051a39Sopenharmony_ci movzb `&hi("$s0")`,$acc1 705e1051a39Sopenharmony_ci movzb `&lo("$s3")`,$acc2 706e1051a39Sopenharmony_ci xor 3($sbox,$acc0,8),$t0 707e1051a39Sopenharmony_ci xor 3($sbox,$acc1,8),$t1 708e1051a39Sopenharmony_ci mov 0($sbox,$acc2,8),$t3 709e1051a39Sopenharmony_ci 710e1051a39Sopenharmony_ci movzb `&hi("$s1")`,$acc0 711e1051a39Sopenharmony_ci shr \$16,$s0 712e1051a39Sopenharmony_ci movzb `&hi("$s2")`,$acc2 713e1051a39Sopenharmony_ci xor 3($sbox,$acc0,8),$t2 714e1051a39Sopenharmony_ci shr \$16,$s3 715e1051a39Sopenharmony_ci xor 3($sbox,$acc2,8),$t3 716e1051a39Sopenharmony_ci 717e1051a39Sopenharmony_ci shr \$16,$s1 718e1051a39Sopenharmony_ci lea 16($key),$key 719e1051a39Sopenharmony_ci shr \$16,$s2 720e1051a39Sopenharmony_ci 721e1051a39Sopenharmony_ci movzb `&lo("$s2")`,$acc0 722e1051a39Sopenharmony_ci movzb `&lo("$s3")`,$acc1 723e1051a39Sopenharmony_ci movzb `&lo("$s0")`,$acc2 724e1051a39Sopenharmony_ci xor 2($sbox,$acc0,8),$t0 725e1051a39Sopenharmony_ci xor 2($sbox,$acc1,8),$t1 726e1051a39Sopenharmony_ci xor 2($sbox,$acc2,8),$t2 727e1051a39Sopenharmony_ci 728e1051a39Sopenharmony_ci movzb `&hi("$s1")`,$acc0 729e1051a39Sopenharmony_ci movzb `&hi("$s2")`,$acc1 730e1051a39Sopenharmony_ci movzb `&lo("$s1")`,$acc2 731e1051a39Sopenharmony_ci xor 1($sbox,$acc0,8),$t0 732e1051a39Sopenharmony_ci xor 1($sbox,$acc1,8),$t1 733e1051a39Sopenharmony_ci xor 2($sbox,$acc2,8),$t3 734e1051a39Sopenharmony_ci 735e1051a39Sopenharmony_ci movzb `&hi("$s3")`,$acc0 736e1051a39Sopenharmony_ci mov 12($key),$s3 737e1051a39Sopenharmony_ci movzb `&hi("$s0")`,$acc2 738e1051a39Sopenharmony_ci xor 1($sbox,$acc0,8),$t2 739e1051a39Sopenharmony_ci mov 0($key),$s0 740e1051a39Sopenharmony_ci xor 1($sbox,$acc2,8),$t3 741e1051a39Sopenharmony_ci 742e1051a39Sopenharmony_ci xor $t0,$s0 743e1051a39Sopenharmony_ci mov 4($key),$s1 744e1051a39Sopenharmony_ci mov 8($key),$s2 745e1051a39Sopenharmony_ci xor $t2,$s2 746e1051a39Sopenharmony_ci xor $t1,$s1 747e1051a39Sopenharmony_ci xor $t3,$s3 748e1051a39Sopenharmony_ci___ 749e1051a39Sopenharmony_ci} 750e1051a39Sopenharmony_ci 751e1051a39Sopenharmony_cisub declastvert() 752e1051a39Sopenharmony_ci{ my $t3="%r8d"; # zaps $inp! 753e1051a39Sopenharmony_ci 754e1051a39Sopenharmony_ci$code.=<<___; 755e1051a39Sopenharmony_ci lea 2048($sbox),$sbox # size optimization 756e1051a39Sopenharmony_ci movzb `&lo("$s0")`,$acc0 757e1051a39Sopenharmony_ci movzb `&lo("$s1")`,$acc1 758e1051a39Sopenharmony_ci movzb `&lo("$s2")`,$acc2 759e1051a39Sopenharmony_ci movzb ($sbox,$acc0,1),$t0 760e1051a39Sopenharmony_ci movzb ($sbox,$acc1,1),$t1 761e1051a39Sopenharmony_ci movzb ($sbox,$acc2,1),$t2 762e1051a39Sopenharmony_ci 763e1051a39Sopenharmony_ci movzb `&lo("$s3")`,$acc0 764e1051a39Sopenharmony_ci movzb `&hi("$s3")`,$acc1 765e1051a39Sopenharmony_ci movzb `&hi("$s0")`,$acc2 766e1051a39Sopenharmony_ci movzb ($sbox,$acc0,1),$t3 767e1051a39Sopenharmony_ci movzb ($sbox,$acc1,1),$acc1 #$t0 768e1051a39Sopenharmony_ci movzb ($sbox,$acc2,1),$acc2 #$t1 769e1051a39Sopenharmony_ci 770e1051a39Sopenharmony_ci shl \$8,$acc1 771e1051a39Sopenharmony_ci shl \$8,$acc2 772e1051a39Sopenharmony_ci 773e1051a39Sopenharmony_ci xor $acc1,$t0 774e1051a39Sopenharmony_ci xor $acc2,$t1 775e1051a39Sopenharmony_ci shr \$16,$s3 776e1051a39Sopenharmony_ci 777e1051a39Sopenharmony_ci movzb `&hi("$s1")`,$acc0 778e1051a39Sopenharmony_ci movzb `&hi("$s2")`,$acc1 779e1051a39Sopenharmony_ci shr \$16,$s0 780e1051a39Sopenharmony_ci movzb ($sbox,$acc0,1),$acc0 #$t2 781e1051a39Sopenharmony_ci movzb ($sbox,$acc1,1),$acc1 #$t3 782e1051a39Sopenharmony_ci 783e1051a39Sopenharmony_ci shl \$8,$acc0 784e1051a39Sopenharmony_ci shl \$8,$acc1 785e1051a39Sopenharmony_ci shr \$16,$s1 786e1051a39Sopenharmony_ci xor $acc0,$t2 787e1051a39Sopenharmony_ci xor $acc1,$t3 788e1051a39Sopenharmony_ci shr \$16,$s2 789e1051a39Sopenharmony_ci 790e1051a39Sopenharmony_ci movzb `&lo("$s2")`,$acc0 791e1051a39Sopenharmony_ci movzb `&lo("$s3")`,$acc1 792e1051a39Sopenharmony_ci movzb `&lo("$s0")`,$acc2 793e1051a39Sopenharmony_ci movzb ($sbox,$acc0,1),$acc0 #$t0 794e1051a39Sopenharmony_ci movzb ($sbox,$acc1,1),$acc1 #$t1 795e1051a39Sopenharmony_ci movzb ($sbox,$acc2,1),$acc2 #$t2 796e1051a39Sopenharmony_ci 797e1051a39Sopenharmony_ci shl \$16,$acc0 798e1051a39Sopenharmony_ci shl \$16,$acc1 799e1051a39Sopenharmony_ci shl \$16,$acc2 800e1051a39Sopenharmony_ci 801e1051a39Sopenharmony_ci xor $acc0,$t0 802e1051a39Sopenharmony_ci xor $acc1,$t1 803e1051a39Sopenharmony_ci xor $acc2,$t2 804e1051a39Sopenharmony_ci 805e1051a39Sopenharmony_ci movzb `&lo("$s1")`,$acc0 806e1051a39Sopenharmony_ci movzb `&hi("$s1")`,$acc1 807e1051a39Sopenharmony_ci movzb `&hi("$s2")`,$acc2 808e1051a39Sopenharmony_ci movzb ($sbox,$acc0,1),$acc0 #$t3 809e1051a39Sopenharmony_ci movzb ($sbox,$acc1,1),$acc1 #$t0 810e1051a39Sopenharmony_ci movzb ($sbox,$acc2,1),$acc2 #$t1 811e1051a39Sopenharmony_ci 812e1051a39Sopenharmony_ci shl \$16,$acc0 813e1051a39Sopenharmony_ci shl \$24,$acc1 814e1051a39Sopenharmony_ci shl \$24,$acc2 815e1051a39Sopenharmony_ci 816e1051a39Sopenharmony_ci xor $acc0,$t3 817e1051a39Sopenharmony_ci xor $acc1,$t0 818e1051a39Sopenharmony_ci xor $acc2,$t1 819e1051a39Sopenharmony_ci 820e1051a39Sopenharmony_ci movzb `&hi("$s3")`,$acc0 821e1051a39Sopenharmony_ci movzb `&hi("$s0")`,$acc1 822e1051a39Sopenharmony_ci mov 16+12($key),$s3 823e1051a39Sopenharmony_ci movzb ($sbox,$acc0,1),$acc0 #$t2 824e1051a39Sopenharmony_ci movzb ($sbox,$acc1,1),$acc1 #$t3 825e1051a39Sopenharmony_ci mov 16+0($key),$s0 826e1051a39Sopenharmony_ci 827e1051a39Sopenharmony_ci shl \$24,$acc0 828e1051a39Sopenharmony_ci shl \$24,$acc1 829e1051a39Sopenharmony_ci 830e1051a39Sopenharmony_ci xor $acc0,$t2 831e1051a39Sopenharmony_ci xor $acc1,$t3 832e1051a39Sopenharmony_ci 833e1051a39Sopenharmony_ci mov 16+4($key),$s1 834e1051a39Sopenharmony_ci mov 16+8($key),$s2 835e1051a39Sopenharmony_ci lea -2048($sbox),$sbox 836e1051a39Sopenharmony_ci xor $t0,$s0 837e1051a39Sopenharmony_ci xor $t1,$s1 838e1051a39Sopenharmony_ci xor $t2,$s2 839e1051a39Sopenharmony_ci xor $t3,$s3 840e1051a39Sopenharmony_ci___ 841e1051a39Sopenharmony_ci} 842e1051a39Sopenharmony_ci 843e1051a39Sopenharmony_cisub decstep() 844e1051a39Sopenharmony_ci{ my ($i,@s) = @_; 845e1051a39Sopenharmony_ci my $tmp0=$acc0; 846e1051a39Sopenharmony_ci my $tmp1=$acc1; 847e1051a39Sopenharmony_ci my $tmp2=$acc2; 848e1051a39Sopenharmony_ci my $out=($t0,$t1,$t2,$s[0])[$i]; 849e1051a39Sopenharmony_ci 850e1051a39Sopenharmony_ci $code.=" mov $s[0],$out\n" if ($i!=3); 851e1051a39Sopenharmony_ci $tmp1=$s[2] if ($i==3); 852e1051a39Sopenharmony_ci $code.=" mov $s[2],$tmp1\n" if ($i!=3); 853e1051a39Sopenharmony_ci $code.=" and \$0xFF,$out\n"; 854e1051a39Sopenharmony_ci 855e1051a39Sopenharmony_ci $code.=" mov 0($sbox,$out,8),$out\n"; 856e1051a39Sopenharmony_ci $code.=" shr \$16,$tmp1\n"; 857e1051a39Sopenharmony_ci $tmp2=$s[3] if ($i==3); 858e1051a39Sopenharmony_ci $code.=" mov $s[3],$tmp2\n" if ($i!=3); 859e1051a39Sopenharmony_ci 860e1051a39Sopenharmony_ci $tmp0=$s[1] if ($i==3); 861e1051a39Sopenharmony_ci $code.=" movzb ".&hi($s[1]).",$tmp0\n"; 862e1051a39Sopenharmony_ci $code.=" and \$0xFF,$tmp1\n"; 863e1051a39Sopenharmony_ci $code.=" shr \$24,$tmp2\n"; 864e1051a39Sopenharmony_ci 865e1051a39Sopenharmony_ci $code.=" xor 3($sbox,$tmp0,8),$out\n"; 866e1051a39Sopenharmony_ci $code.=" xor 2($sbox,$tmp1,8),$out\n"; 867e1051a39Sopenharmony_ci $code.=" xor 1($sbox,$tmp2,8),$out\n"; 868e1051a39Sopenharmony_ci 869e1051a39Sopenharmony_ci $code.=" mov $t2,$s[1]\n" if ($i==3); 870e1051a39Sopenharmony_ci $code.=" mov $t1,$s[2]\n" if ($i==3); 871e1051a39Sopenharmony_ci $code.=" mov $t0,$s[3]\n" if ($i==3); 872e1051a39Sopenharmony_ci $code.="\n"; 873e1051a39Sopenharmony_ci} 874e1051a39Sopenharmony_ci 875e1051a39Sopenharmony_cisub declast() 876e1051a39Sopenharmony_ci{ my ($i,@s)=@_; 877e1051a39Sopenharmony_ci my $tmp0=$acc0; 878e1051a39Sopenharmony_ci my $tmp1=$acc1; 879e1051a39Sopenharmony_ci my $tmp2=$acc2; 880e1051a39Sopenharmony_ci my $out=($t0,$t1,$t2,$s[0])[$i]; 881e1051a39Sopenharmony_ci 882e1051a39Sopenharmony_ci $code.=" mov $s[0],$out\n" if ($i!=3); 883e1051a39Sopenharmony_ci $tmp1=$s[2] if ($i==3); 884e1051a39Sopenharmony_ci $code.=" mov $s[2],$tmp1\n" if ($i!=3); 885e1051a39Sopenharmony_ci $code.=" and \$0xFF,$out\n"; 886e1051a39Sopenharmony_ci 887e1051a39Sopenharmony_ci $code.=" movzb 2048($sbox,$out,1),$out\n"; 888e1051a39Sopenharmony_ci $code.=" shr \$16,$tmp1\n"; 889e1051a39Sopenharmony_ci $tmp2=$s[3] if ($i==3); 890e1051a39Sopenharmony_ci $code.=" mov $s[3],$tmp2\n" if ($i!=3); 891e1051a39Sopenharmony_ci 892e1051a39Sopenharmony_ci $tmp0=$s[1] if ($i==3); 893e1051a39Sopenharmony_ci $code.=" movzb ".&hi($s[1]).",$tmp0\n"; 894e1051a39Sopenharmony_ci $code.=" and \$0xFF,$tmp1\n"; 895e1051a39Sopenharmony_ci $code.=" shr \$24,$tmp2\n"; 896e1051a39Sopenharmony_ci 897e1051a39Sopenharmony_ci $code.=" movzb 2048($sbox,$tmp0,1),$tmp0\n"; 898e1051a39Sopenharmony_ci $code.=" movzb 2048($sbox,$tmp1,1),$tmp1\n"; 899e1051a39Sopenharmony_ci $code.=" movzb 2048($sbox,$tmp2,1),$tmp2\n"; 900e1051a39Sopenharmony_ci 901e1051a39Sopenharmony_ci $code.=" shl \$8,$tmp0\n"; 902e1051a39Sopenharmony_ci $code.=" shl \$16,$tmp1\n"; 903e1051a39Sopenharmony_ci $code.=" shl \$24,$tmp2\n"; 904e1051a39Sopenharmony_ci 905e1051a39Sopenharmony_ci $code.=" xor $tmp0,$out\n"; 906e1051a39Sopenharmony_ci $code.=" mov $t2,$s[1]\n" if ($i==3); 907e1051a39Sopenharmony_ci $code.=" xor $tmp1,$out\n"; 908e1051a39Sopenharmony_ci $code.=" mov $t1,$s[2]\n" if ($i==3); 909e1051a39Sopenharmony_ci $code.=" xor $tmp2,$out\n"; 910e1051a39Sopenharmony_ci $code.=" mov $t0,$s[3]\n" if ($i==3); 911e1051a39Sopenharmony_ci $code.="\n"; 912e1051a39Sopenharmony_ci} 913e1051a39Sopenharmony_ci 914e1051a39Sopenharmony_ci$code.=<<___; 915e1051a39Sopenharmony_ci.type _x86_64_AES_decrypt,\@abi-omnipotent 916e1051a39Sopenharmony_ci.align 16 917e1051a39Sopenharmony_ci_x86_64_AES_decrypt: 918e1051a39Sopenharmony_ci.cfi_startproc 919e1051a39Sopenharmony_ci xor 0($key),$s0 # xor with key 920e1051a39Sopenharmony_ci xor 4($key),$s1 921e1051a39Sopenharmony_ci xor 8($key),$s2 922e1051a39Sopenharmony_ci xor 12($key),$s3 923e1051a39Sopenharmony_ci 924e1051a39Sopenharmony_ci mov 240($key),$rnds # load key->rounds 925e1051a39Sopenharmony_ci sub \$1,$rnds 926e1051a39Sopenharmony_ci jmp .Ldec_loop 927e1051a39Sopenharmony_ci.align 16 928e1051a39Sopenharmony_ci.Ldec_loop: 929e1051a39Sopenharmony_ci___ 930e1051a39Sopenharmony_ci if ($verticalspin) { &decvert(); } 931e1051a39Sopenharmony_ci else { &decstep(0,$s0,$s3,$s2,$s1); 932e1051a39Sopenharmony_ci &decstep(1,$s1,$s0,$s3,$s2); 933e1051a39Sopenharmony_ci &decstep(2,$s2,$s1,$s0,$s3); 934e1051a39Sopenharmony_ci &decstep(3,$s3,$s2,$s1,$s0); 935e1051a39Sopenharmony_ci $code.=<<___; 936e1051a39Sopenharmony_ci lea 16($key),$key 937e1051a39Sopenharmony_ci xor 0($key),$s0 # xor with key 938e1051a39Sopenharmony_ci xor 4($key),$s1 939e1051a39Sopenharmony_ci xor 8($key),$s2 940e1051a39Sopenharmony_ci xor 12($key),$s3 941e1051a39Sopenharmony_ci___ 942e1051a39Sopenharmony_ci } 943e1051a39Sopenharmony_ci$code.=<<___; 944e1051a39Sopenharmony_ci sub \$1,$rnds 945e1051a39Sopenharmony_ci jnz .Ldec_loop 946e1051a39Sopenharmony_ci___ 947e1051a39Sopenharmony_ci if ($verticalspin) { &declastvert(); } 948e1051a39Sopenharmony_ci else { &declast(0,$s0,$s3,$s2,$s1); 949e1051a39Sopenharmony_ci &declast(1,$s1,$s0,$s3,$s2); 950e1051a39Sopenharmony_ci &declast(2,$s2,$s1,$s0,$s3); 951e1051a39Sopenharmony_ci &declast(3,$s3,$s2,$s1,$s0); 952e1051a39Sopenharmony_ci $code.=<<___; 953e1051a39Sopenharmony_ci xor 16+0($key),$s0 # xor with key 954e1051a39Sopenharmony_ci xor 16+4($key),$s1 955e1051a39Sopenharmony_ci xor 16+8($key),$s2 956e1051a39Sopenharmony_ci xor 16+12($key),$s3 957e1051a39Sopenharmony_ci___ 958e1051a39Sopenharmony_ci } 959e1051a39Sopenharmony_ci$code.=<<___; 960e1051a39Sopenharmony_ci .byte 0xf3,0xc3 # rep ret 961e1051a39Sopenharmony_ci.cfi_endproc 962e1051a39Sopenharmony_ci.size _x86_64_AES_decrypt,.-_x86_64_AES_decrypt 963e1051a39Sopenharmony_ci___ 964e1051a39Sopenharmony_ci 965e1051a39Sopenharmony_cisub deccompactvert() 966e1051a39Sopenharmony_ci{ my ($t3,$t4,$t5)=("%r8d","%r9d","%r13d"); 967e1051a39Sopenharmony_ci 968e1051a39Sopenharmony_ci$code.=<<___; 969e1051a39Sopenharmony_ci movzb `&lo("$s0")`,$t0 970e1051a39Sopenharmony_ci movzb `&lo("$s1")`,$t1 971e1051a39Sopenharmony_ci movzb `&lo("$s2")`,$t2 972e1051a39Sopenharmony_ci movzb `&lo("$s3")`,$t3 973e1051a39Sopenharmony_ci movzb `&hi("$s3")`,$acc0 974e1051a39Sopenharmony_ci movzb `&hi("$s0")`,$acc1 975e1051a39Sopenharmony_ci shr \$16,$s3 976e1051a39Sopenharmony_ci movzb `&hi("$s1")`,$acc2 977e1051a39Sopenharmony_ci movzb ($sbox,$t0,1),$t0 978e1051a39Sopenharmony_ci movzb ($sbox,$t1,1),$t1 979e1051a39Sopenharmony_ci movzb ($sbox,$t2,1),$t2 980e1051a39Sopenharmony_ci movzb ($sbox,$t3,1),$t3 981e1051a39Sopenharmony_ci 982e1051a39Sopenharmony_ci movzb ($sbox,$acc0,1),$t4 #$t0 983e1051a39Sopenharmony_ci movzb `&hi("$s2")`,$acc0 984e1051a39Sopenharmony_ci movzb ($sbox,$acc1,1),$t5 #$t1 985e1051a39Sopenharmony_ci movzb ($sbox,$acc2,1),$acc2 #$t2 986e1051a39Sopenharmony_ci movzb ($sbox,$acc0,1),$acc0 #$t3 987e1051a39Sopenharmony_ci 988e1051a39Sopenharmony_ci shr \$16,$s2 989e1051a39Sopenharmony_ci shl \$8,$t5 990e1051a39Sopenharmony_ci shl \$8,$t4 991e1051a39Sopenharmony_ci movzb `&lo("$s2")`,$acc1 992e1051a39Sopenharmony_ci shr \$16,$s0 993e1051a39Sopenharmony_ci xor $t4,$t0 994e1051a39Sopenharmony_ci shr \$16,$s1 995e1051a39Sopenharmony_ci movzb `&lo("$s3")`,$t4 996e1051a39Sopenharmony_ci 997e1051a39Sopenharmony_ci shl \$8,$acc2 998e1051a39Sopenharmony_ci xor $t5,$t1 999e1051a39Sopenharmony_ci shl \$8,$acc0 1000e1051a39Sopenharmony_ci movzb `&lo("$s0")`,$t5 1001e1051a39Sopenharmony_ci movzb ($sbox,$acc1,1),$acc1 #$t0 1002e1051a39Sopenharmony_ci xor $acc2,$t2 1003e1051a39Sopenharmony_ci movzb `&lo("$s1")`,$acc2 1004e1051a39Sopenharmony_ci 1005e1051a39Sopenharmony_ci shl \$16,$acc1 1006e1051a39Sopenharmony_ci xor $acc0,$t3 1007e1051a39Sopenharmony_ci movzb ($sbox,$t4,1),$t4 #$t1 1008e1051a39Sopenharmony_ci movzb `&hi("$s1")`,$acc0 1009e1051a39Sopenharmony_ci movzb ($sbox,$acc2,1),$acc2 #$t3 1010e1051a39Sopenharmony_ci xor $acc1,$t0 1011e1051a39Sopenharmony_ci movzb ($sbox,$t5,1),$t5 #$t2 1012e1051a39Sopenharmony_ci movzb `&hi("$s2")`,$acc1 1013e1051a39Sopenharmony_ci 1014e1051a39Sopenharmony_ci shl \$16,$acc2 1015e1051a39Sopenharmony_ci shl \$16,$t4 1016e1051a39Sopenharmony_ci shl \$16,$t5 1017e1051a39Sopenharmony_ci xor $acc2,$t3 1018e1051a39Sopenharmony_ci movzb `&hi("$s3")`,$acc2 1019e1051a39Sopenharmony_ci xor $t4,$t1 1020e1051a39Sopenharmony_ci shr \$8,$s0 1021e1051a39Sopenharmony_ci xor $t5,$t2 1022e1051a39Sopenharmony_ci 1023e1051a39Sopenharmony_ci movzb ($sbox,$acc0,1),$acc0 #$t0 1024e1051a39Sopenharmony_ci movzb ($sbox,$acc1,1),$s1 #$t1 1025e1051a39Sopenharmony_ci movzb ($sbox,$acc2,1),$s2 #$t2 1026e1051a39Sopenharmony_ci movzb ($sbox,$s0,1),$s3 #$t3 1027e1051a39Sopenharmony_ci 1028e1051a39Sopenharmony_ci mov $t0,$s0 1029e1051a39Sopenharmony_ci shl \$24,$acc0 1030e1051a39Sopenharmony_ci shl \$24,$s1 1031e1051a39Sopenharmony_ci shl \$24,$s2 1032e1051a39Sopenharmony_ci xor $acc0,$s0 1033e1051a39Sopenharmony_ci shl \$24,$s3 1034e1051a39Sopenharmony_ci xor $t1,$s1 1035e1051a39Sopenharmony_ci xor $t2,$s2 1036e1051a39Sopenharmony_ci xor $t3,$s3 1037e1051a39Sopenharmony_ci___ 1038e1051a39Sopenharmony_ci} 1039e1051a39Sopenharmony_ci 1040e1051a39Sopenharmony_ci# parallelized version! input is pair of 64-bit values: %rax=s1.s0 1041e1051a39Sopenharmony_ci# and %rcx=s3.s2, output is four 32-bit values in %eax=s0, %ebx=s1, 1042e1051a39Sopenharmony_ci# %ecx=s2 and %edx=s3. 1043e1051a39Sopenharmony_cisub dectransform() 1044e1051a39Sopenharmony_ci{ my ($tp10,$tp20,$tp40,$tp80,$acc0)=("%rax","%r8", "%r9", "%r10","%rbx"); 1045e1051a39Sopenharmony_ci my ($tp18,$tp28,$tp48,$tp88,$acc8)=("%rcx","%r11","%r12","%r13","%rdx"); 1046e1051a39Sopenharmony_ci my $prefetch = shift; 1047e1051a39Sopenharmony_ci 1048e1051a39Sopenharmony_ci$code.=<<___; 1049e1051a39Sopenharmony_ci mov $mask80,$tp40 1050e1051a39Sopenharmony_ci mov $mask80,$tp48 1051e1051a39Sopenharmony_ci and $tp10,$tp40 1052e1051a39Sopenharmony_ci and $tp18,$tp48 1053e1051a39Sopenharmony_ci mov $tp40,$acc0 1054e1051a39Sopenharmony_ci mov $tp48,$acc8 1055e1051a39Sopenharmony_ci shr \$7,$tp40 1056e1051a39Sopenharmony_ci lea ($tp10,$tp10),$tp20 1057e1051a39Sopenharmony_ci shr \$7,$tp48 1058e1051a39Sopenharmony_ci lea ($tp18,$tp18),$tp28 1059e1051a39Sopenharmony_ci sub $tp40,$acc0 1060e1051a39Sopenharmony_ci sub $tp48,$acc8 1061e1051a39Sopenharmony_ci and $maskfe,$tp20 1062e1051a39Sopenharmony_ci and $maskfe,$tp28 1063e1051a39Sopenharmony_ci and $mask1b,$acc0 1064e1051a39Sopenharmony_ci and $mask1b,$acc8 1065e1051a39Sopenharmony_ci xor $acc0,$tp20 1066e1051a39Sopenharmony_ci xor $acc8,$tp28 1067e1051a39Sopenharmony_ci mov $mask80,$tp80 1068e1051a39Sopenharmony_ci mov $mask80,$tp88 1069e1051a39Sopenharmony_ci 1070e1051a39Sopenharmony_ci and $tp20,$tp80 1071e1051a39Sopenharmony_ci and $tp28,$tp88 1072e1051a39Sopenharmony_ci mov $tp80,$acc0 1073e1051a39Sopenharmony_ci mov $tp88,$acc8 1074e1051a39Sopenharmony_ci shr \$7,$tp80 1075e1051a39Sopenharmony_ci lea ($tp20,$tp20),$tp40 1076e1051a39Sopenharmony_ci shr \$7,$tp88 1077e1051a39Sopenharmony_ci lea ($tp28,$tp28),$tp48 1078e1051a39Sopenharmony_ci sub $tp80,$acc0 1079e1051a39Sopenharmony_ci sub $tp88,$acc8 1080e1051a39Sopenharmony_ci and $maskfe,$tp40 1081e1051a39Sopenharmony_ci and $maskfe,$tp48 1082e1051a39Sopenharmony_ci and $mask1b,$acc0 1083e1051a39Sopenharmony_ci and $mask1b,$acc8 1084e1051a39Sopenharmony_ci xor $acc0,$tp40 1085e1051a39Sopenharmony_ci xor $acc8,$tp48 1086e1051a39Sopenharmony_ci mov $mask80,$tp80 1087e1051a39Sopenharmony_ci mov $mask80,$tp88 1088e1051a39Sopenharmony_ci 1089e1051a39Sopenharmony_ci and $tp40,$tp80 1090e1051a39Sopenharmony_ci and $tp48,$tp88 1091e1051a39Sopenharmony_ci mov $tp80,$acc0 1092e1051a39Sopenharmony_ci mov $tp88,$acc8 1093e1051a39Sopenharmony_ci shr \$7,$tp80 1094e1051a39Sopenharmony_ci xor $tp10,$tp20 # tp2^=tp1 1095e1051a39Sopenharmony_ci shr \$7,$tp88 1096e1051a39Sopenharmony_ci xor $tp18,$tp28 # tp2^=tp1 1097e1051a39Sopenharmony_ci sub $tp80,$acc0 1098e1051a39Sopenharmony_ci sub $tp88,$acc8 1099e1051a39Sopenharmony_ci lea ($tp40,$tp40),$tp80 1100e1051a39Sopenharmony_ci lea ($tp48,$tp48),$tp88 1101e1051a39Sopenharmony_ci xor $tp10,$tp40 # tp4^=tp1 1102e1051a39Sopenharmony_ci xor $tp18,$tp48 # tp4^=tp1 1103e1051a39Sopenharmony_ci and $maskfe,$tp80 1104e1051a39Sopenharmony_ci and $maskfe,$tp88 1105e1051a39Sopenharmony_ci and $mask1b,$acc0 1106e1051a39Sopenharmony_ci and $mask1b,$acc8 1107e1051a39Sopenharmony_ci xor $acc0,$tp80 1108e1051a39Sopenharmony_ci xor $acc8,$tp88 1109e1051a39Sopenharmony_ci 1110e1051a39Sopenharmony_ci xor $tp80,$tp10 # tp1^=tp8 1111e1051a39Sopenharmony_ci xor $tp88,$tp18 # tp1^=tp8 1112e1051a39Sopenharmony_ci xor $tp80,$tp20 # tp2^tp1^=tp8 1113e1051a39Sopenharmony_ci xor $tp88,$tp28 # tp2^tp1^=tp8 1114e1051a39Sopenharmony_ci mov $tp10,$acc0 1115e1051a39Sopenharmony_ci mov $tp18,$acc8 1116e1051a39Sopenharmony_ci xor $tp80,$tp40 # tp4^tp1^=tp8 1117e1051a39Sopenharmony_ci shr \$32,$acc0 1118e1051a39Sopenharmony_ci xor $tp88,$tp48 # tp4^tp1^=tp8 1119e1051a39Sopenharmony_ci shr \$32,$acc8 1120e1051a39Sopenharmony_ci xor $tp20,$tp80 # tp8^=tp8^tp2^tp1=tp2^tp1 1121e1051a39Sopenharmony_ci rol \$8,`&LO("$tp10")` # ROTATE(tp1^tp8,8) 1122e1051a39Sopenharmony_ci xor $tp28,$tp88 # tp8^=tp8^tp2^tp1=tp2^tp1 1123e1051a39Sopenharmony_ci rol \$8,`&LO("$tp18")` # ROTATE(tp1^tp8,8) 1124e1051a39Sopenharmony_ci xor $tp40,$tp80 # tp2^tp1^=tp8^tp4^tp1=tp8^tp4^tp2 1125e1051a39Sopenharmony_ci rol \$8,`&LO("$acc0")` # ROTATE(tp1^tp8,8) 1126e1051a39Sopenharmony_ci xor $tp48,$tp88 # tp2^tp1^=tp8^tp4^tp1=tp8^tp4^tp2 1127e1051a39Sopenharmony_ci 1128e1051a39Sopenharmony_ci rol \$8,`&LO("$acc8")` # ROTATE(tp1^tp8,8) 1129e1051a39Sopenharmony_ci xor `&LO("$tp80")`,`&LO("$tp10")` 1130e1051a39Sopenharmony_ci shr \$32,$tp80 1131e1051a39Sopenharmony_ci xor `&LO("$tp88")`,`&LO("$tp18")` 1132e1051a39Sopenharmony_ci shr \$32,$tp88 1133e1051a39Sopenharmony_ci xor `&LO("$tp80")`,`&LO("$acc0")` 1134e1051a39Sopenharmony_ci xor `&LO("$tp88")`,`&LO("$acc8")` 1135e1051a39Sopenharmony_ci 1136e1051a39Sopenharmony_ci mov $tp20,$tp80 1137e1051a39Sopenharmony_ci rol \$24,`&LO("$tp20")` # ROTATE(tp2^tp1^tp8,24) 1138e1051a39Sopenharmony_ci mov $tp28,$tp88 1139e1051a39Sopenharmony_ci rol \$24,`&LO("$tp28")` # ROTATE(tp2^tp1^tp8,24) 1140e1051a39Sopenharmony_ci shr \$32,$tp80 1141e1051a39Sopenharmony_ci xor `&LO("$tp20")`,`&LO("$tp10")` 1142e1051a39Sopenharmony_ci shr \$32,$tp88 1143e1051a39Sopenharmony_ci xor `&LO("$tp28")`,`&LO("$tp18")` 1144e1051a39Sopenharmony_ci rol \$24,`&LO("$tp80")` # ROTATE(tp2^tp1^tp8,24) 1145e1051a39Sopenharmony_ci mov $tp40,$tp20 1146e1051a39Sopenharmony_ci rol \$24,`&LO("$tp88")` # ROTATE(tp2^tp1^tp8,24) 1147e1051a39Sopenharmony_ci mov $tp48,$tp28 1148e1051a39Sopenharmony_ci shr \$32,$tp20 1149e1051a39Sopenharmony_ci xor `&LO("$tp80")`,`&LO("$acc0")` 1150e1051a39Sopenharmony_ci shr \$32,$tp28 1151e1051a39Sopenharmony_ci xor `&LO("$tp88")`,`&LO("$acc8")` 1152e1051a39Sopenharmony_ci 1153e1051a39Sopenharmony_ci `"mov 0($sbox),$mask80" if ($prefetch)` 1154e1051a39Sopenharmony_ci rol \$16,`&LO("$tp40")` # ROTATE(tp4^tp1^tp8,16) 1155e1051a39Sopenharmony_ci `"mov 64($sbox),$maskfe" if ($prefetch)` 1156e1051a39Sopenharmony_ci rol \$16,`&LO("$tp48")` # ROTATE(tp4^tp1^tp8,16) 1157e1051a39Sopenharmony_ci `"mov 128($sbox),$mask1b" if ($prefetch)` 1158e1051a39Sopenharmony_ci rol \$16,`&LO("$tp20")` # ROTATE(tp4^tp1^tp8,16) 1159e1051a39Sopenharmony_ci `"mov 192($sbox),$tp80" if ($prefetch)` 1160e1051a39Sopenharmony_ci xor `&LO("$tp40")`,`&LO("$tp10")` 1161e1051a39Sopenharmony_ci rol \$16,`&LO("$tp28")` # ROTATE(tp4^tp1^tp8,16) 1162e1051a39Sopenharmony_ci xor `&LO("$tp48")`,`&LO("$tp18")` 1163e1051a39Sopenharmony_ci `"mov 256($sbox),$tp88" if ($prefetch)` 1164e1051a39Sopenharmony_ci xor `&LO("$tp20")`,`&LO("$acc0")` 1165e1051a39Sopenharmony_ci xor `&LO("$tp28")`,`&LO("$acc8")` 1166e1051a39Sopenharmony_ci___ 1167e1051a39Sopenharmony_ci} 1168e1051a39Sopenharmony_ci 1169e1051a39Sopenharmony_ci$code.=<<___; 1170e1051a39Sopenharmony_ci.type _x86_64_AES_decrypt_compact,\@abi-omnipotent 1171e1051a39Sopenharmony_ci.align 16 1172e1051a39Sopenharmony_ci_x86_64_AES_decrypt_compact: 1173e1051a39Sopenharmony_ci.cfi_startproc 1174e1051a39Sopenharmony_ci lea 128($sbox),$inp # size optimization 1175e1051a39Sopenharmony_ci mov 0-128($inp),$acc1 # prefetch Td4 1176e1051a39Sopenharmony_ci mov 32-128($inp),$acc2 1177e1051a39Sopenharmony_ci mov 64-128($inp),$t0 1178e1051a39Sopenharmony_ci mov 96-128($inp),$t1 1179e1051a39Sopenharmony_ci mov 128-128($inp),$acc1 1180e1051a39Sopenharmony_ci mov 160-128($inp),$acc2 1181e1051a39Sopenharmony_ci mov 192-128($inp),$t0 1182e1051a39Sopenharmony_ci mov 224-128($inp),$t1 1183e1051a39Sopenharmony_ci jmp .Ldec_loop_compact 1184e1051a39Sopenharmony_ci 1185e1051a39Sopenharmony_ci.align 16 1186e1051a39Sopenharmony_ci.Ldec_loop_compact: 1187e1051a39Sopenharmony_ci xor 0($key),$s0 # xor with key 1188e1051a39Sopenharmony_ci xor 4($key),$s1 1189e1051a39Sopenharmony_ci xor 8($key),$s2 1190e1051a39Sopenharmony_ci xor 12($key),$s3 1191e1051a39Sopenharmony_ci lea 16($key),$key 1192e1051a39Sopenharmony_ci___ 1193e1051a39Sopenharmony_ci &deccompactvert(); 1194e1051a39Sopenharmony_ci$code.=<<___; 1195e1051a39Sopenharmony_ci cmp 16(%rsp),$key 1196e1051a39Sopenharmony_ci je .Ldec_compact_done 1197e1051a39Sopenharmony_ci 1198e1051a39Sopenharmony_ci mov 256+0($sbox),$mask80 1199e1051a39Sopenharmony_ci shl \$32,%rbx 1200e1051a39Sopenharmony_ci shl \$32,%rdx 1201e1051a39Sopenharmony_ci mov 256+8($sbox),$maskfe 1202e1051a39Sopenharmony_ci or %rbx,%rax 1203e1051a39Sopenharmony_ci or %rdx,%rcx 1204e1051a39Sopenharmony_ci mov 256+16($sbox),$mask1b 1205e1051a39Sopenharmony_ci___ 1206e1051a39Sopenharmony_ci &dectransform(1); 1207e1051a39Sopenharmony_ci$code.=<<___; 1208e1051a39Sopenharmony_ci jmp .Ldec_loop_compact 1209e1051a39Sopenharmony_ci.align 16 1210e1051a39Sopenharmony_ci.Ldec_compact_done: 1211e1051a39Sopenharmony_ci xor 0($key),$s0 1212e1051a39Sopenharmony_ci xor 4($key),$s1 1213e1051a39Sopenharmony_ci xor 8($key),$s2 1214e1051a39Sopenharmony_ci xor 12($key),$s3 1215e1051a39Sopenharmony_ci .byte 0xf3,0xc3 # rep ret 1216e1051a39Sopenharmony_ci.cfi_endproc 1217e1051a39Sopenharmony_ci.size _x86_64_AES_decrypt_compact,.-_x86_64_AES_decrypt_compact 1218e1051a39Sopenharmony_ci___ 1219e1051a39Sopenharmony_ci 1220e1051a39Sopenharmony_ci# void AES_decrypt (const void *inp,void *out,const AES_KEY *key); 1221e1051a39Sopenharmony_ci$code.=<<___; 1222e1051a39Sopenharmony_ci.globl AES_decrypt 1223e1051a39Sopenharmony_ci.type AES_decrypt,\@function,3 1224e1051a39Sopenharmony_ci.align 16 1225e1051a39Sopenharmony_ci.globl asm_AES_decrypt 1226e1051a39Sopenharmony_ci.hidden asm_AES_decrypt 1227e1051a39Sopenharmony_ciasm_AES_decrypt: 1228e1051a39Sopenharmony_ciAES_decrypt: 1229e1051a39Sopenharmony_ci.cfi_startproc 1230e1051a39Sopenharmony_ci endbranch 1231e1051a39Sopenharmony_ci mov %rsp,%rax 1232e1051a39Sopenharmony_ci.cfi_def_cfa_register %rax 1233e1051a39Sopenharmony_ci push %rbx 1234e1051a39Sopenharmony_ci.cfi_push %rbx 1235e1051a39Sopenharmony_ci push %rbp 1236e1051a39Sopenharmony_ci.cfi_push %rbp 1237e1051a39Sopenharmony_ci push %r12 1238e1051a39Sopenharmony_ci.cfi_push %r12 1239e1051a39Sopenharmony_ci push %r13 1240e1051a39Sopenharmony_ci.cfi_push %r13 1241e1051a39Sopenharmony_ci push %r14 1242e1051a39Sopenharmony_ci.cfi_push %r14 1243e1051a39Sopenharmony_ci push %r15 1244e1051a39Sopenharmony_ci.cfi_push %r15 1245e1051a39Sopenharmony_ci 1246e1051a39Sopenharmony_ci # allocate frame "above" key schedule 1247e1051a39Sopenharmony_ci lea -63(%rdx),%rcx # %rdx is key argument 1248e1051a39Sopenharmony_ci and \$-64,%rsp 1249e1051a39Sopenharmony_ci sub %rsp,%rcx 1250e1051a39Sopenharmony_ci neg %rcx 1251e1051a39Sopenharmony_ci and \$0x3c0,%rcx 1252e1051a39Sopenharmony_ci sub %rcx,%rsp 1253e1051a39Sopenharmony_ci sub \$32,%rsp 1254e1051a39Sopenharmony_ci 1255e1051a39Sopenharmony_ci mov %rsi,16(%rsp) # save out 1256e1051a39Sopenharmony_ci mov %rax,24(%rsp) # save original stack pointer 1257e1051a39Sopenharmony_ci.cfi_cfa_expression %rsp+24,deref,+8 1258e1051a39Sopenharmony_ci.Ldec_prologue: 1259e1051a39Sopenharmony_ci 1260e1051a39Sopenharmony_ci mov %rdx,$key 1261e1051a39Sopenharmony_ci mov 240($key),$rnds # load rounds 1262e1051a39Sopenharmony_ci 1263e1051a39Sopenharmony_ci mov 0(%rdi),$s0 # load input vector 1264e1051a39Sopenharmony_ci mov 4(%rdi),$s1 1265e1051a39Sopenharmony_ci mov 8(%rdi),$s2 1266e1051a39Sopenharmony_ci mov 12(%rdi),$s3 1267e1051a39Sopenharmony_ci 1268e1051a39Sopenharmony_ci shl \$4,$rnds 1269e1051a39Sopenharmony_ci lea ($key,$rnds),%rbp 1270e1051a39Sopenharmony_ci mov $key,(%rsp) # key schedule 1271e1051a39Sopenharmony_ci mov %rbp,8(%rsp) # end of key schedule 1272e1051a39Sopenharmony_ci 1273e1051a39Sopenharmony_ci # pick Td4 copy which can't "overlap" with stack frame or key schedule 1274e1051a39Sopenharmony_ci lea .LAES_Td+2048(%rip),$sbox 1275e1051a39Sopenharmony_ci lea 768(%rsp),%rbp 1276e1051a39Sopenharmony_ci sub $sbox,%rbp 1277e1051a39Sopenharmony_ci and \$0x300,%rbp 1278e1051a39Sopenharmony_ci lea ($sbox,%rbp),$sbox 1279e1051a39Sopenharmony_ci shr \$3,%rbp # recall "magic" constants! 1280e1051a39Sopenharmony_ci add %rbp,$sbox 1281e1051a39Sopenharmony_ci 1282e1051a39Sopenharmony_ci call _x86_64_AES_decrypt_compact 1283e1051a39Sopenharmony_ci 1284e1051a39Sopenharmony_ci mov 16(%rsp),$out # restore out 1285e1051a39Sopenharmony_ci mov 24(%rsp),%rsi # restore saved stack pointer 1286e1051a39Sopenharmony_ci.cfi_def_cfa %rsi,8 1287e1051a39Sopenharmony_ci mov $s0,0($out) # write output vector 1288e1051a39Sopenharmony_ci mov $s1,4($out) 1289e1051a39Sopenharmony_ci mov $s2,8($out) 1290e1051a39Sopenharmony_ci mov $s3,12($out) 1291e1051a39Sopenharmony_ci 1292e1051a39Sopenharmony_ci mov -48(%rsi),%r15 1293e1051a39Sopenharmony_ci.cfi_restore %r15 1294e1051a39Sopenharmony_ci mov -40(%rsi),%r14 1295e1051a39Sopenharmony_ci.cfi_restore %r14 1296e1051a39Sopenharmony_ci mov -32(%rsi),%r13 1297e1051a39Sopenharmony_ci.cfi_restore %r13 1298e1051a39Sopenharmony_ci mov -24(%rsi),%r12 1299e1051a39Sopenharmony_ci.cfi_restore %r12 1300e1051a39Sopenharmony_ci mov -16(%rsi),%rbp 1301e1051a39Sopenharmony_ci.cfi_restore %rbp 1302e1051a39Sopenharmony_ci mov -8(%rsi),%rbx 1303e1051a39Sopenharmony_ci.cfi_restore %rbx 1304e1051a39Sopenharmony_ci lea (%rsi),%rsp 1305e1051a39Sopenharmony_ci.cfi_def_cfa_register %rsp 1306e1051a39Sopenharmony_ci.Ldec_epilogue: 1307e1051a39Sopenharmony_ci ret 1308e1051a39Sopenharmony_ci.cfi_endproc 1309e1051a39Sopenharmony_ci.size AES_decrypt,.-AES_decrypt 1310e1051a39Sopenharmony_ci___ 1311e1051a39Sopenharmony_ci#------------------------------------------------------------------# 1312e1051a39Sopenharmony_ci 1313e1051a39Sopenharmony_cisub enckey() 1314e1051a39Sopenharmony_ci{ 1315e1051a39Sopenharmony_ci$code.=<<___; 1316e1051a39Sopenharmony_ci movz %dl,%esi # rk[i]>>0 1317e1051a39Sopenharmony_ci movzb -128(%rbp,%rsi),%ebx 1318e1051a39Sopenharmony_ci movz %dh,%esi # rk[i]>>8 1319e1051a39Sopenharmony_ci shl \$24,%ebx 1320e1051a39Sopenharmony_ci xor %ebx,%eax 1321e1051a39Sopenharmony_ci 1322e1051a39Sopenharmony_ci movzb -128(%rbp,%rsi),%ebx 1323e1051a39Sopenharmony_ci shr \$16,%edx 1324e1051a39Sopenharmony_ci movz %dl,%esi # rk[i]>>16 1325e1051a39Sopenharmony_ci xor %ebx,%eax 1326e1051a39Sopenharmony_ci 1327e1051a39Sopenharmony_ci movzb -128(%rbp,%rsi),%ebx 1328e1051a39Sopenharmony_ci movz %dh,%esi # rk[i]>>24 1329e1051a39Sopenharmony_ci shl \$8,%ebx 1330e1051a39Sopenharmony_ci xor %ebx,%eax 1331e1051a39Sopenharmony_ci 1332e1051a39Sopenharmony_ci movzb -128(%rbp,%rsi),%ebx 1333e1051a39Sopenharmony_ci shl \$16,%ebx 1334e1051a39Sopenharmony_ci xor %ebx,%eax 1335e1051a39Sopenharmony_ci 1336e1051a39Sopenharmony_ci xor 1024-128(%rbp,%rcx,4),%eax # rcon 1337e1051a39Sopenharmony_ci___ 1338e1051a39Sopenharmony_ci} 1339e1051a39Sopenharmony_ci 1340e1051a39Sopenharmony_ci# int AES_set_encrypt_key(const unsigned char *userKey, const int bits, 1341e1051a39Sopenharmony_ci# AES_KEY *key) 1342e1051a39Sopenharmony_ci$code.=<<___; 1343e1051a39Sopenharmony_ci.globl AES_set_encrypt_key 1344e1051a39Sopenharmony_ci.type AES_set_encrypt_key,\@function,3 1345e1051a39Sopenharmony_ci.align 16 1346e1051a39Sopenharmony_ciAES_set_encrypt_key: 1347e1051a39Sopenharmony_ci.cfi_startproc 1348e1051a39Sopenharmony_ci endbranch 1349e1051a39Sopenharmony_ci push %rbx 1350e1051a39Sopenharmony_ci.cfi_push %rbx 1351e1051a39Sopenharmony_ci push %rbp 1352e1051a39Sopenharmony_ci.cfi_push %rbp 1353e1051a39Sopenharmony_ci push %r12 # redundant, but allows to share 1354e1051a39Sopenharmony_ci.cfi_push %r12 1355e1051a39Sopenharmony_ci push %r13 # exception handler... 1356e1051a39Sopenharmony_ci.cfi_push %r13 1357e1051a39Sopenharmony_ci push %r14 1358e1051a39Sopenharmony_ci.cfi_push %r14 1359e1051a39Sopenharmony_ci push %r15 1360e1051a39Sopenharmony_ci.cfi_push %r15 1361e1051a39Sopenharmony_ci sub \$8,%rsp 1362e1051a39Sopenharmony_ci.cfi_adjust_cfa_offset 8 1363e1051a39Sopenharmony_ci.Lenc_key_prologue: 1364e1051a39Sopenharmony_ci 1365e1051a39Sopenharmony_ci call _x86_64_AES_set_encrypt_key 1366e1051a39Sopenharmony_ci 1367e1051a39Sopenharmony_ci mov 40(%rsp),%rbp 1368e1051a39Sopenharmony_ci.cfi_restore %rbp 1369e1051a39Sopenharmony_ci mov 48(%rsp),%rbx 1370e1051a39Sopenharmony_ci.cfi_restore %rbx 1371e1051a39Sopenharmony_ci add \$56,%rsp 1372e1051a39Sopenharmony_ci.cfi_adjust_cfa_offset -56 1373e1051a39Sopenharmony_ci.Lenc_key_epilogue: 1374e1051a39Sopenharmony_ci ret 1375e1051a39Sopenharmony_ci.cfi_endproc 1376e1051a39Sopenharmony_ci.size AES_set_encrypt_key,.-AES_set_encrypt_key 1377e1051a39Sopenharmony_ci 1378e1051a39Sopenharmony_ci.type _x86_64_AES_set_encrypt_key,\@abi-omnipotent 1379e1051a39Sopenharmony_ci.align 16 1380e1051a39Sopenharmony_ci_x86_64_AES_set_encrypt_key: 1381e1051a39Sopenharmony_ci.cfi_startproc 1382e1051a39Sopenharmony_ci mov %esi,%ecx # %ecx=bits 1383e1051a39Sopenharmony_ci mov %rdi,%rsi # %rsi=userKey 1384e1051a39Sopenharmony_ci mov %rdx,%rdi # %rdi=key 1385e1051a39Sopenharmony_ci 1386e1051a39Sopenharmony_ci test \$-1,%rsi 1387e1051a39Sopenharmony_ci jz .Lbadpointer 1388e1051a39Sopenharmony_ci test \$-1,%rdi 1389e1051a39Sopenharmony_ci jz .Lbadpointer 1390e1051a39Sopenharmony_ci 1391e1051a39Sopenharmony_ci lea .LAES_Te(%rip),%rbp 1392e1051a39Sopenharmony_ci lea 2048+128(%rbp),%rbp 1393e1051a39Sopenharmony_ci 1394e1051a39Sopenharmony_ci # prefetch Te4 1395e1051a39Sopenharmony_ci mov 0-128(%rbp),%eax 1396e1051a39Sopenharmony_ci mov 32-128(%rbp),%ebx 1397e1051a39Sopenharmony_ci mov 64-128(%rbp),%r8d 1398e1051a39Sopenharmony_ci mov 96-128(%rbp),%edx 1399e1051a39Sopenharmony_ci mov 128-128(%rbp),%eax 1400e1051a39Sopenharmony_ci mov 160-128(%rbp),%ebx 1401e1051a39Sopenharmony_ci mov 192-128(%rbp),%r8d 1402e1051a39Sopenharmony_ci mov 224-128(%rbp),%edx 1403e1051a39Sopenharmony_ci 1404e1051a39Sopenharmony_ci cmp \$128,%ecx 1405e1051a39Sopenharmony_ci je .L10rounds 1406e1051a39Sopenharmony_ci cmp \$192,%ecx 1407e1051a39Sopenharmony_ci je .L12rounds 1408e1051a39Sopenharmony_ci cmp \$256,%ecx 1409e1051a39Sopenharmony_ci je .L14rounds 1410e1051a39Sopenharmony_ci mov \$-2,%rax # invalid number of bits 1411e1051a39Sopenharmony_ci jmp .Lexit 1412e1051a39Sopenharmony_ci 1413e1051a39Sopenharmony_ci.L10rounds: 1414e1051a39Sopenharmony_ci mov 0(%rsi),%rax # copy first 4 dwords 1415e1051a39Sopenharmony_ci mov 8(%rsi),%rdx 1416e1051a39Sopenharmony_ci mov %rax,0(%rdi) 1417e1051a39Sopenharmony_ci mov %rdx,8(%rdi) 1418e1051a39Sopenharmony_ci 1419e1051a39Sopenharmony_ci shr \$32,%rdx 1420e1051a39Sopenharmony_ci xor %ecx,%ecx 1421e1051a39Sopenharmony_ci jmp .L10shortcut 1422e1051a39Sopenharmony_ci.align 4 1423e1051a39Sopenharmony_ci.L10loop: 1424e1051a39Sopenharmony_ci mov 0(%rdi),%eax # rk[0] 1425e1051a39Sopenharmony_ci mov 12(%rdi),%edx # rk[3] 1426e1051a39Sopenharmony_ci.L10shortcut: 1427e1051a39Sopenharmony_ci___ 1428e1051a39Sopenharmony_ci &enckey (); 1429e1051a39Sopenharmony_ci$code.=<<___; 1430e1051a39Sopenharmony_ci mov %eax,16(%rdi) # rk[4] 1431e1051a39Sopenharmony_ci xor 4(%rdi),%eax 1432e1051a39Sopenharmony_ci mov %eax,20(%rdi) # rk[5] 1433e1051a39Sopenharmony_ci xor 8(%rdi),%eax 1434e1051a39Sopenharmony_ci mov %eax,24(%rdi) # rk[6] 1435e1051a39Sopenharmony_ci xor 12(%rdi),%eax 1436e1051a39Sopenharmony_ci mov %eax,28(%rdi) # rk[7] 1437e1051a39Sopenharmony_ci add \$1,%ecx 1438e1051a39Sopenharmony_ci lea 16(%rdi),%rdi 1439e1051a39Sopenharmony_ci cmp \$10,%ecx 1440e1051a39Sopenharmony_ci jl .L10loop 1441e1051a39Sopenharmony_ci 1442e1051a39Sopenharmony_ci movl \$10,80(%rdi) # setup number of rounds 1443e1051a39Sopenharmony_ci xor %rax,%rax 1444e1051a39Sopenharmony_ci jmp .Lexit 1445e1051a39Sopenharmony_ci 1446e1051a39Sopenharmony_ci.L12rounds: 1447e1051a39Sopenharmony_ci mov 0(%rsi),%rax # copy first 6 dwords 1448e1051a39Sopenharmony_ci mov 8(%rsi),%rbx 1449e1051a39Sopenharmony_ci mov 16(%rsi),%rdx 1450e1051a39Sopenharmony_ci mov %rax,0(%rdi) 1451e1051a39Sopenharmony_ci mov %rbx,8(%rdi) 1452e1051a39Sopenharmony_ci mov %rdx,16(%rdi) 1453e1051a39Sopenharmony_ci 1454e1051a39Sopenharmony_ci shr \$32,%rdx 1455e1051a39Sopenharmony_ci xor %ecx,%ecx 1456e1051a39Sopenharmony_ci jmp .L12shortcut 1457e1051a39Sopenharmony_ci.align 4 1458e1051a39Sopenharmony_ci.L12loop: 1459e1051a39Sopenharmony_ci mov 0(%rdi),%eax # rk[0] 1460e1051a39Sopenharmony_ci mov 20(%rdi),%edx # rk[5] 1461e1051a39Sopenharmony_ci.L12shortcut: 1462e1051a39Sopenharmony_ci___ 1463e1051a39Sopenharmony_ci &enckey (); 1464e1051a39Sopenharmony_ci$code.=<<___; 1465e1051a39Sopenharmony_ci mov %eax,24(%rdi) # rk[6] 1466e1051a39Sopenharmony_ci xor 4(%rdi),%eax 1467e1051a39Sopenharmony_ci mov %eax,28(%rdi) # rk[7] 1468e1051a39Sopenharmony_ci xor 8(%rdi),%eax 1469e1051a39Sopenharmony_ci mov %eax,32(%rdi) # rk[8] 1470e1051a39Sopenharmony_ci xor 12(%rdi),%eax 1471e1051a39Sopenharmony_ci mov %eax,36(%rdi) # rk[9] 1472e1051a39Sopenharmony_ci 1473e1051a39Sopenharmony_ci cmp \$7,%ecx 1474e1051a39Sopenharmony_ci je .L12break 1475e1051a39Sopenharmony_ci add \$1,%ecx 1476e1051a39Sopenharmony_ci 1477e1051a39Sopenharmony_ci xor 16(%rdi),%eax 1478e1051a39Sopenharmony_ci mov %eax,40(%rdi) # rk[10] 1479e1051a39Sopenharmony_ci xor 20(%rdi),%eax 1480e1051a39Sopenharmony_ci mov %eax,44(%rdi) # rk[11] 1481e1051a39Sopenharmony_ci 1482e1051a39Sopenharmony_ci lea 24(%rdi),%rdi 1483e1051a39Sopenharmony_ci jmp .L12loop 1484e1051a39Sopenharmony_ci.L12break: 1485e1051a39Sopenharmony_ci movl \$12,72(%rdi) # setup number of rounds 1486e1051a39Sopenharmony_ci xor %rax,%rax 1487e1051a39Sopenharmony_ci jmp .Lexit 1488e1051a39Sopenharmony_ci 1489e1051a39Sopenharmony_ci.L14rounds: 1490e1051a39Sopenharmony_ci mov 0(%rsi),%rax # copy first 8 dwords 1491e1051a39Sopenharmony_ci mov 8(%rsi),%rbx 1492e1051a39Sopenharmony_ci mov 16(%rsi),%rcx 1493e1051a39Sopenharmony_ci mov 24(%rsi),%rdx 1494e1051a39Sopenharmony_ci mov %rax,0(%rdi) 1495e1051a39Sopenharmony_ci mov %rbx,8(%rdi) 1496e1051a39Sopenharmony_ci mov %rcx,16(%rdi) 1497e1051a39Sopenharmony_ci mov %rdx,24(%rdi) 1498e1051a39Sopenharmony_ci 1499e1051a39Sopenharmony_ci shr \$32,%rdx 1500e1051a39Sopenharmony_ci xor %ecx,%ecx 1501e1051a39Sopenharmony_ci jmp .L14shortcut 1502e1051a39Sopenharmony_ci.align 4 1503e1051a39Sopenharmony_ci.L14loop: 1504e1051a39Sopenharmony_ci mov 0(%rdi),%eax # rk[0] 1505e1051a39Sopenharmony_ci mov 28(%rdi),%edx # rk[4] 1506e1051a39Sopenharmony_ci.L14shortcut: 1507e1051a39Sopenharmony_ci___ 1508e1051a39Sopenharmony_ci &enckey (); 1509e1051a39Sopenharmony_ci$code.=<<___; 1510e1051a39Sopenharmony_ci mov %eax,32(%rdi) # rk[8] 1511e1051a39Sopenharmony_ci xor 4(%rdi),%eax 1512e1051a39Sopenharmony_ci mov %eax,36(%rdi) # rk[9] 1513e1051a39Sopenharmony_ci xor 8(%rdi),%eax 1514e1051a39Sopenharmony_ci mov %eax,40(%rdi) # rk[10] 1515e1051a39Sopenharmony_ci xor 12(%rdi),%eax 1516e1051a39Sopenharmony_ci mov %eax,44(%rdi) # rk[11] 1517e1051a39Sopenharmony_ci 1518e1051a39Sopenharmony_ci cmp \$6,%ecx 1519e1051a39Sopenharmony_ci je .L14break 1520e1051a39Sopenharmony_ci add \$1,%ecx 1521e1051a39Sopenharmony_ci 1522e1051a39Sopenharmony_ci mov %eax,%edx 1523e1051a39Sopenharmony_ci mov 16(%rdi),%eax # rk[4] 1524e1051a39Sopenharmony_ci movz %dl,%esi # rk[11]>>0 1525e1051a39Sopenharmony_ci movzb -128(%rbp,%rsi),%ebx 1526e1051a39Sopenharmony_ci movz %dh,%esi # rk[11]>>8 1527e1051a39Sopenharmony_ci xor %ebx,%eax 1528e1051a39Sopenharmony_ci 1529e1051a39Sopenharmony_ci movzb -128(%rbp,%rsi),%ebx 1530e1051a39Sopenharmony_ci shr \$16,%edx 1531e1051a39Sopenharmony_ci shl \$8,%ebx 1532e1051a39Sopenharmony_ci movz %dl,%esi # rk[11]>>16 1533e1051a39Sopenharmony_ci xor %ebx,%eax 1534e1051a39Sopenharmony_ci 1535e1051a39Sopenharmony_ci movzb -128(%rbp,%rsi),%ebx 1536e1051a39Sopenharmony_ci movz %dh,%esi # rk[11]>>24 1537e1051a39Sopenharmony_ci shl \$16,%ebx 1538e1051a39Sopenharmony_ci xor %ebx,%eax 1539e1051a39Sopenharmony_ci 1540e1051a39Sopenharmony_ci movzb -128(%rbp,%rsi),%ebx 1541e1051a39Sopenharmony_ci shl \$24,%ebx 1542e1051a39Sopenharmony_ci xor %ebx,%eax 1543e1051a39Sopenharmony_ci 1544e1051a39Sopenharmony_ci mov %eax,48(%rdi) # rk[12] 1545e1051a39Sopenharmony_ci xor 20(%rdi),%eax 1546e1051a39Sopenharmony_ci mov %eax,52(%rdi) # rk[13] 1547e1051a39Sopenharmony_ci xor 24(%rdi),%eax 1548e1051a39Sopenharmony_ci mov %eax,56(%rdi) # rk[14] 1549e1051a39Sopenharmony_ci xor 28(%rdi),%eax 1550e1051a39Sopenharmony_ci mov %eax,60(%rdi) # rk[15] 1551e1051a39Sopenharmony_ci 1552e1051a39Sopenharmony_ci lea 32(%rdi),%rdi 1553e1051a39Sopenharmony_ci jmp .L14loop 1554e1051a39Sopenharmony_ci.L14break: 1555e1051a39Sopenharmony_ci movl \$14,48(%rdi) # setup number of rounds 1556e1051a39Sopenharmony_ci xor %rax,%rax 1557e1051a39Sopenharmony_ci jmp .Lexit 1558e1051a39Sopenharmony_ci 1559e1051a39Sopenharmony_ci.Lbadpointer: 1560e1051a39Sopenharmony_ci mov \$-1,%rax 1561e1051a39Sopenharmony_ci.Lexit: 1562e1051a39Sopenharmony_ci .byte 0xf3,0xc3 # rep ret 1563e1051a39Sopenharmony_ci.cfi_endproc 1564e1051a39Sopenharmony_ci.size _x86_64_AES_set_encrypt_key,.-_x86_64_AES_set_encrypt_key 1565e1051a39Sopenharmony_ci___ 1566e1051a39Sopenharmony_ci 1567e1051a39Sopenharmony_cisub deckey_ref() 1568e1051a39Sopenharmony_ci{ my ($i,$ptr,$te,$td) = @_; 1569e1051a39Sopenharmony_ci my ($tp1,$tp2,$tp4,$tp8,$acc)=("%eax","%ebx","%edi","%edx","%r8d"); 1570e1051a39Sopenharmony_ci$code.=<<___; 1571e1051a39Sopenharmony_ci mov $i($ptr),$tp1 1572e1051a39Sopenharmony_ci mov $tp1,$acc 1573e1051a39Sopenharmony_ci and \$0x80808080,$acc 1574e1051a39Sopenharmony_ci mov $acc,$tp4 1575e1051a39Sopenharmony_ci shr \$7,$tp4 1576e1051a39Sopenharmony_ci lea 0($tp1,$tp1),$tp2 1577e1051a39Sopenharmony_ci sub $tp4,$acc 1578e1051a39Sopenharmony_ci and \$0xfefefefe,$tp2 1579e1051a39Sopenharmony_ci and \$0x1b1b1b1b,$acc 1580e1051a39Sopenharmony_ci xor $tp2,$acc 1581e1051a39Sopenharmony_ci mov $acc,$tp2 1582e1051a39Sopenharmony_ci 1583e1051a39Sopenharmony_ci and \$0x80808080,$acc 1584e1051a39Sopenharmony_ci mov $acc,$tp8 1585e1051a39Sopenharmony_ci shr \$7,$tp8 1586e1051a39Sopenharmony_ci lea 0($tp2,$tp2),$tp4 1587e1051a39Sopenharmony_ci sub $tp8,$acc 1588e1051a39Sopenharmony_ci and \$0xfefefefe,$tp4 1589e1051a39Sopenharmony_ci and \$0x1b1b1b1b,$acc 1590e1051a39Sopenharmony_ci xor $tp1,$tp2 # tp2^tp1 1591e1051a39Sopenharmony_ci xor $tp4,$acc 1592e1051a39Sopenharmony_ci mov $acc,$tp4 1593e1051a39Sopenharmony_ci 1594e1051a39Sopenharmony_ci and \$0x80808080,$acc 1595e1051a39Sopenharmony_ci mov $acc,$tp8 1596e1051a39Sopenharmony_ci shr \$7,$tp8 1597e1051a39Sopenharmony_ci sub $tp8,$acc 1598e1051a39Sopenharmony_ci lea 0($tp4,$tp4),$tp8 1599e1051a39Sopenharmony_ci xor $tp1,$tp4 # tp4^tp1 1600e1051a39Sopenharmony_ci and \$0xfefefefe,$tp8 1601e1051a39Sopenharmony_ci and \$0x1b1b1b1b,$acc 1602e1051a39Sopenharmony_ci xor $acc,$tp8 1603e1051a39Sopenharmony_ci 1604e1051a39Sopenharmony_ci xor $tp8,$tp1 # tp1^tp8 1605e1051a39Sopenharmony_ci rol \$8,$tp1 # ROTATE(tp1^tp8,8) 1606e1051a39Sopenharmony_ci xor $tp8,$tp2 # tp2^tp1^tp8 1607e1051a39Sopenharmony_ci xor $tp8,$tp4 # tp4^tp1^tp8 1608e1051a39Sopenharmony_ci xor $tp2,$tp8 1609e1051a39Sopenharmony_ci xor $tp4,$tp8 # tp8^(tp8^tp4^tp1)^(tp8^tp2^tp1)=tp8^tp4^tp2 1610e1051a39Sopenharmony_ci 1611e1051a39Sopenharmony_ci xor $tp8,$tp1 1612e1051a39Sopenharmony_ci rol \$24,$tp2 # ROTATE(tp2^tp1^tp8,24) 1613e1051a39Sopenharmony_ci xor $tp2,$tp1 1614e1051a39Sopenharmony_ci rol \$16,$tp4 # ROTATE(tp4^tp1^tp8,16) 1615e1051a39Sopenharmony_ci xor $tp4,$tp1 1616e1051a39Sopenharmony_ci 1617e1051a39Sopenharmony_ci mov $tp1,$i($ptr) 1618e1051a39Sopenharmony_ci___ 1619e1051a39Sopenharmony_ci} 1620e1051a39Sopenharmony_ci 1621e1051a39Sopenharmony_ci# int AES_set_decrypt_key(const unsigned char *userKey, const int bits, 1622e1051a39Sopenharmony_ci# AES_KEY *key) 1623e1051a39Sopenharmony_ci$code.=<<___; 1624e1051a39Sopenharmony_ci.globl AES_set_decrypt_key 1625e1051a39Sopenharmony_ci.type AES_set_decrypt_key,\@function,3 1626e1051a39Sopenharmony_ci.align 16 1627e1051a39Sopenharmony_ciAES_set_decrypt_key: 1628e1051a39Sopenharmony_ci.cfi_startproc 1629e1051a39Sopenharmony_ci endbranch 1630e1051a39Sopenharmony_ci push %rbx 1631e1051a39Sopenharmony_ci.cfi_push %rbx 1632e1051a39Sopenharmony_ci push %rbp 1633e1051a39Sopenharmony_ci.cfi_push %rbp 1634e1051a39Sopenharmony_ci push %r12 1635e1051a39Sopenharmony_ci.cfi_push %r12 1636e1051a39Sopenharmony_ci push %r13 1637e1051a39Sopenharmony_ci.cfi_push %r13 1638e1051a39Sopenharmony_ci push %r14 1639e1051a39Sopenharmony_ci.cfi_push %r14 1640e1051a39Sopenharmony_ci push %r15 1641e1051a39Sopenharmony_ci.cfi_push %r15 1642e1051a39Sopenharmony_ci push %rdx # save key schedule 1643e1051a39Sopenharmony_ci.cfi_adjust_cfa_offset 8 1644e1051a39Sopenharmony_ci.Ldec_key_prologue: 1645e1051a39Sopenharmony_ci 1646e1051a39Sopenharmony_ci call _x86_64_AES_set_encrypt_key 1647e1051a39Sopenharmony_ci mov (%rsp),%r8 # restore key schedule 1648e1051a39Sopenharmony_ci cmp \$0,%eax 1649e1051a39Sopenharmony_ci jne .Labort 1650e1051a39Sopenharmony_ci 1651e1051a39Sopenharmony_ci mov 240(%r8),%r14d # pull number of rounds 1652e1051a39Sopenharmony_ci xor %rdi,%rdi 1653e1051a39Sopenharmony_ci lea (%rdi,%r14d,4),%rcx 1654e1051a39Sopenharmony_ci mov %r8,%rsi 1655e1051a39Sopenharmony_ci lea (%r8,%rcx,4),%rdi # pointer to last chunk 1656e1051a39Sopenharmony_ci.align 4 1657e1051a39Sopenharmony_ci.Linvert: 1658e1051a39Sopenharmony_ci mov 0(%rsi),%rax 1659e1051a39Sopenharmony_ci mov 8(%rsi),%rbx 1660e1051a39Sopenharmony_ci mov 0(%rdi),%rcx 1661e1051a39Sopenharmony_ci mov 8(%rdi),%rdx 1662e1051a39Sopenharmony_ci mov %rax,0(%rdi) 1663e1051a39Sopenharmony_ci mov %rbx,8(%rdi) 1664e1051a39Sopenharmony_ci mov %rcx,0(%rsi) 1665e1051a39Sopenharmony_ci mov %rdx,8(%rsi) 1666e1051a39Sopenharmony_ci lea 16(%rsi),%rsi 1667e1051a39Sopenharmony_ci lea -16(%rdi),%rdi 1668e1051a39Sopenharmony_ci cmp %rsi,%rdi 1669e1051a39Sopenharmony_ci jne .Linvert 1670e1051a39Sopenharmony_ci 1671e1051a39Sopenharmony_ci lea .LAES_Te+2048+1024(%rip),%rax # rcon 1672e1051a39Sopenharmony_ci 1673e1051a39Sopenharmony_ci mov 40(%rax),$mask80 1674e1051a39Sopenharmony_ci mov 48(%rax),$maskfe 1675e1051a39Sopenharmony_ci mov 56(%rax),$mask1b 1676e1051a39Sopenharmony_ci 1677e1051a39Sopenharmony_ci mov %r8,$key 1678e1051a39Sopenharmony_ci sub \$1,%r14d 1679e1051a39Sopenharmony_ci.align 4 1680e1051a39Sopenharmony_ci.Lpermute: 1681e1051a39Sopenharmony_ci lea 16($key),$key 1682e1051a39Sopenharmony_ci mov 0($key),%rax 1683e1051a39Sopenharmony_ci mov 8($key),%rcx 1684e1051a39Sopenharmony_ci___ 1685e1051a39Sopenharmony_ci &dectransform (); 1686e1051a39Sopenharmony_ci$code.=<<___; 1687e1051a39Sopenharmony_ci mov %eax,0($key) 1688e1051a39Sopenharmony_ci mov %ebx,4($key) 1689e1051a39Sopenharmony_ci mov %ecx,8($key) 1690e1051a39Sopenharmony_ci mov %edx,12($key) 1691e1051a39Sopenharmony_ci sub \$1,%r14d 1692e1051a39Sopenharmony_ci jnz .Lpermute 1693e1051a39Sopenharmony_ci 1694e1051a39Sopenharmony_ci xor %rax,%rax 1695e1051a39Sopenharmony_ci.Labort: 1696e1051a39Sopenharmony_ci mov 8(%rsp),%r15 1697e1051a39Sopenharmony_ci.cfi_restore %r15 1698e1051a39Sopenharmony_ci mov 16(%rsp),%r14 1699e1051a39Sopenharmony_ci.cfi_restore %r14 1700e1051a39Sopenharmony_ci mov 24(%rsp),%r13 1701e1051a39Sopenharmony_ci.cfi_restore %r13 1702e1051a39Sopenharmony_ci mov 32(%rsp),%r12 1703e1051a39Sopenharmony_ci.cfi_restore %r12 1704e1051a39Sopenharmony_ci mov 40(%rsp),%rbp 1705e1051a39Sopenharmony_ci.cfi_restore %rbp 1706e1051a39Sopenharmony_ci mov 48(%rsp),%rbx 1707e1051a39Sopenharmony_ci.cfi_restore %rbx 1708e1051a39Sopenharmony_ci add \$56,%rsp 1709e1051a39Sopenharmony_ci.cfi_adjust_cfa_offset -56 1710e1051a39Sopenharmony_ci.Ldec_key_epilogue: 1711e1051a39Sopenharmony_ci ret 1712e1051a39Sopenharmony_ci.cfi_endproc 1713e1051a39Sopenharmony_ci.size AES_set_decrypt_key,.-AES_set_decrypt_key 1714e1051a39Sopenharmony_ci___ 1715e1051a39Sopenharmony_ci 1716e1051a39Sopenharmony_ci# void AES_cbc_encrypt (const void char *inp, unsigned char *out, 1717e1051a39Sopenharmony_ci# size_t length, const AES_KEY *key, 1718e1051a39Sopenharmony_ci# unsigned char *ivp,const int enc); 1719e1051a39Sopenharmony_ci{ 1720e1051a39Sopenharmony_ci# stack frame layout 1721e1051a39Sopenharmony_ci# -8(%rsp) return address 1722e1051a39Sopenharmony_cimy $keyp="0(%rsp)"; # one to pass as $key 1723e1051a39Sopenharmony_cimy $keyend="8(%rsp)"; # &(keyp->rd_key[4*keyp->rounds]) 1724e1051a39Sopenharmony_cimy $_rsp="16(%rsp)"; # saved %rsp 1725e1051a39Sopenharmony_cimy $_inp="24(%rsp)"; # copy of 1st parameter, inp 1726e1051a39Sopenharmony_cimy $_out="32(%rsp)"; # copy of 2nd parameter, out 1727e1051a39Sopenharmony_cimy $_len="40(%rsp)"; # copy of 3rd parameter, length 1728e1051a39Sopenharmony_cimy $_key="48(%rsp)"; # copy of 4th parameter, key 1729e1051a39Sopenharmony_cimy $_ivp="56(%rsp)"; # copy of 5th parameter, ivp 1730e1051a39Sopenharmony_cimy $ivec="64(%rsp)"; # ivec[16] 1731e1051a39Sopenharmony_cimy $aes_key="80(%rsp)"; # copy of aes_key 1732e1051a39Sopenharmony_cimy $mark="80+240(%rsp)"; # copy of aes_key->rounds 1733e1051a39Sopenharmony_ci 1734e1051a39Sopenharmony_ci$code.=<<___; 1735e1051a39Sopenharmony_ci.globl AES_cbc_encrypt 1736e1051a39Sopenharmony_ci.type AES_cbc_encrypt,\@function,6 1737e1051a39Sopenharmony_ci.align 16 1738e1051a39Sopenharmony_ci.extern OPENSSL_ia32cap_P 1739e1051a39Sopenharmony_ci.globl asm_AES_cbc_encrypt 1740e1051a39Sopenharmony_ci.hidden asm_AES_cbc_encrypt 1741e1051a39Sopenharmony_ciasm_AES_cbc_encrypt: 1742e1051a39Sopenharmony_ciAES_cbc_encrypt: 1743e1051a39Sopenharmony_ci.cfi_startproc 1744e1051a39Sopenharmony_ci endbranch 1745e1051a39Sopenharmony_ci cmp \$0,%rdx # check length 1746e1051a39Sopenharmony_ci je .Lcbc_epilogue 1747e1051a39Sopenharmony_ci pushfq 1748e1051a39Sopenharmony_ci# This could be .cfi_push 49, but libunwind fails on registers it does not 1749e1051a39Sopenharmony_ci# recognize. See https://bugzilla.redhat.com/show_bug.cgi?id=217087. 1750e1051a39Sopenharmony_ci.cfi_adjust_cfa_offset 8 1751e1051a39Sopenharmony_ci push %rbx 1752e1051a39Sopenharmony_ci.cfi_push %rbx 1753e1051a39Sopenharmony_ci push %rbp 1754e1051a39Sopenharmony_ci.cfi_push %rbp 1755e1051a39Sopenharmony_ci push %r12 1756e1051a39Sopenharmony_ci.cfi_push %r12 1757e1051a39Sopenharmony_ci push %r13 1758e1051a39Sopenharmony_ci.cfi_push %r13 1759e1051a39Sopenharmony_ci push %r14 1760e1051a39Sopenharmony_ci.cfi_push %r14 1761e1051a39Sopenharmony_ci push %r15 1762e1051a39Sopenharmony_ci.cfi_push %r15 1763e1051a39Sopenharmony_ci.Lcbc_prologue: 1764e1051a39Sopenharmony_ci 1765e1051a39Sopenharmony_ci cld 1766e1051a39Sopenharmony_ci mov %r9d,%r9d # clear upper half of enc 1767e1051a39Sopenharmony_ci 1768e1051a39Sopenharmony_ci lea .LAES_Te(%rip),$sbox 1769e1051a39Sopenharmony_ci lea .LAES_Td(%rip),%r10 1770e1051a39Sopenharmony_ci cmp \$0,%r9 1771e1051a39Sopenharmony_ci cmoveq %r10,$sbox 1772e1051a39Sopenharmony_ci 1773e1051a39Sopenharmony_ci.cfi_remember_state 1774e1051a39Sopenharmony_ci mov OPENSSL_ia32cap_P(%rip),%r10d 1775e1051a39Sopenharmony_ci cmp \$$speed_limit,%rdx 1776e1051a39Sopenharmony_ci jb .Lcbc_slow_prologue 1777e1051a39Sopenharmony_ci test \$15,%rdx 1778e1051a39Sopenharmony_ci jnz .Lcbc_slow_prologue 1779e1051a39Sopenharmony_ci bt \$28,%r10d 1780e1051a39Sopenharmony_ci jc .Lcbc_slow_prologue 1781e1051a39Sopenharmony_ci 1782e1051a39Sopenharmony_ci # allocate aligned stack frame... 1783e1051a39Sopenharmony_ci lea -88-248(%rsp),$key 1784e1051a39Sopenharmony_ci and \$-64,$key 1785e1051a39Sopenharmony_ci 1786e1051a39Sopenharmony_ci # ... and make sure it doesn't alias with AES_T[ed] modulo 4096 1787e1051a39Sopenharmony_ci mov $sbox,%r10 1788e1051a39Sopenharmony_ci lea 2304($sbox),%r11 1789e1051a39Sopenharmony_ci mov $key,%r12 1790e1051a39Sopenharmony_ci and \$0xFFF,%r10 # s = $sbox&0xfff 1791e1051a39Sopenharmony_ci and \$0xFFF,%r11 # e = ($sbox+2048)&0xfff 1792e1051a39Sopenharmony_ci and \$0xFFF,%r12 # p = %rsp&0xfff 1793e1051a39Sopenharmony_ci 1794e1051a39Sopenharmony_ci cmp %r11,%r12 # if (p=>e) %rsp =- (p-e); 1795e1051a39Sopenharmony_ci jb .Lcbc_te_break_out 1796e1051a39Sopenharmony_ci sub %r11,%r12 1797e1051a39Sopenharmony_ci sub %r12,$key 1798e1051a39Sopenharmony_ci jmp .Lcbc_te_ok 1799e1051a39Sopenharmony_ci.Lcbc_te_break_out: # else %rsp -= (p-s)&0xfff + framesz 1800e1051a39Sopenharmony_ci sub %r10,%r12 1801e1051a39Sopenharmony_ci and \$0xFFF,%r12 1802e1051a39Sopenharmony_ci add \$320,%r12 1803e1051a39Sopenharmony_ci sub %r12,$key 1804e1051a39Sopenharmony_ci.align 4 1805e1051a39Sopenharmony_ci.Lcbc_te_ok: 1806e1051a39Sopenharmony_ci 1807e1051a39Sopenharmony_ci xchg %rsp,$key 1808e1051a39Sopenharmony_ci.cfi_def_cfa_register $key 1809e1051a39Sopenharmony_ci #add \$8,%rsp # reserve for return address! 1810e1051a39Sopenharmony_ci mov $key,$_rsp # save %rsp 1811e1051a39Sopenharmony_ci.cfi_cfa_expression $_rsp,deref,+64 1812e1051a39Sopenharmony_ci.Lcbc_fast_body: 1813e1051a39Sopenharmony_ci mov %rdi,$_inp # save copy of inp 1814e1051a39Sopenharmony_ci mov %rsi,$_out # save copy of out 1815e1051a39Sopenharmony_ci mov %rdx,$_len # save copy of len 1816e1051a39Sopenharmony_ci mov %rcx,$_key # save copy of key 1817e1051a39Sopenharmony_ci mov %r8,$_ivp # save copy of ivp 1818e1051a39Sopenharmony_ci movl \$0,$mark # copy of aes_key->rounds = 0; 1819e1051a39Sopenharmony_ci mov %r8,%rbp # rearrange input arguments 1820e1051a39Sopenharmony_ci mov %r9,%rbx 1821e1051a39Sopenharmony_ci mov %rsi,$out 1822e1051a39Sopenharmony_ci mov %rdi,$inp 1823e1051a39Sopenharmony_ci mov %rcx,$key 1824e1051a39Sopenharmony_ci 1825e1051a39Sopenharmony_ci mov 240($key),%eax # key->rounds 1826e1051a39Sopenharmony_ci # do we copy key schedule to stack? 1827e1051a39Sopenharmony_ci mov $key,%r10 1828e1051a39Sopenharmony_ci sub $sbox,%r10 1829e1051a39Sopenharmony_ci and \$0xfff,%r10 1830e1051a39Sopenharmony_ci cmp \$2304,%r10 1831e1051a39Sopenharmony_ci jb .Lcbc_do_ecopy 1832e1051a39Sopenharmony_ci cmp \$4096-248,%r10 1833e1051a39Sopenharmony_ci jb .Lcbc_skip_ecopy 1834e1051a39Sopenharmony_ci.align 4 1835e1051a39Sopenharmony_ci.Lcbc_do_ecopy: 1836e1051a39Sopenharmony_ci mov $key,%rsi 1837e1051a39Sopenharmony_ci lea $aes_key,%rdi 1838e1051a39Sopenharmony_ci lea $aes_key,$key 1839e1051a39Sopenharmony_ci mov \$240/8,%ecx 1840e1051a39Sopenharmony_ci .long 0x90A548F3 # rep movsq 1841e1051a39Sopenharmony_ci mov %eax,(%rdi) # copy aes_key->rounds 1842e1051a39Sopenharmony_ci.Lcbc_skip_ecopy: 1843e1051a39Sopenharmony_ci mov $key,$keyp # save key pointer 1844e1051a39Sopenharmony_ci 1845e1051a39Sopenharmony_ci mov \$18,%ecx 1846e1051a39Sopenharmony_ci.align 4 1847e1051a39Sopenharmony_ci.Lcbc_prefetch_te: 1848e1051a39Sopenharmony_ci mov 0($sbox),%r10 1849e1051a39Sopenharmony_ci mov 32($sbox),%r11 1850e1051a39Sopenharmony_ci mov 64($sbox),%r12 1851e1051a39Sopenharmony_ci mov 96($sbox),%r13 1852e1051a39Sopenharmony_ci lea 128($sbox),$sbox 1853e1051a39Sopenharmony_ci sub \$1,%ecx 1854e1051a39Sopenharmony_ci jnz .Lcbc_prefetch_te 1855e1051a39Sopenharmony_ci lea -2304($sbox),$sbox 1856e1051a39Sopenharmony_ci 1857e1051a39Sopenharmony_ci cmp \$0,%rbx 1858e1051a39Sopenharmony_ci je .LFAST_DECRYPT 1859e1051a39Sopenharmony_ci 1860e1051a39Sopenharmony_ci#----------------------------- ENCRYPT -----------------------------# 1861e1051a39Sopenharmony_ci mov 0(%rbp),$s0 # load iv 1862e1051a39Sopenharmony_ci mov 4(%rbp),$s1 1863e1051a39Sopenharmony_ci mov 8(%rbp),$s2 1864e1051a39Sopenharmony_ci mov 12(%rbp),$s3 1865e1051a39Sopenharmony_ci 1866e1051a39Sopenharmony_ci.align 4 1867e1051a39Sopenharmony_ci.Lcbc_fast_enc_loop: 1868e1051a39Sopenharmony_ci xor 0($inp),$s0 1869e1051a39Sopenharmony_ci xor 4($inp),$s1 1870e1051a39Sopenharmony_ci xor 8($inp),$s2 1871e1051a39Sopenharmony_ci xor 12($inp),$s3 1872e1051a39Sopenharmony_ci mov $keyp,$key # restore key 1873e1051a39Sopenharmony_ci mov $inp,$_inp # if ($verticalspin) save inp 1874e1051a39Sopenharmony_ci 1875e1051a39Sopenharmony_ci call _x86_64_AES_encrypt 1876e1051a39Sopenharmony_ci 1877e1051a39Sopenharmony_ci mov $_inp,$inp # if ($verticalspin) restore inp 1878e1051a39Sopenharmony_ci mov $_len,%r10 1879e1051a39Sopenharmony_ci mov $s0,0($out) 1880e1051a39Sopenharmony_ci mov $s1,4($out) 1881e1051a39Sopenharmony_ci mov $s2,8($out) 1882e1051a39Sopenharmony_ci mov $s3,12($out) 1883e1051a39Sopenharmony_ci 1884e1051a39Sopenharmony_ci lea 16($inp),$inp 1885e1051a39Sopenharmony_ci lea 16($out),$out 1886e1051a39Sopenharmony_ci sub \$16,%r10 1887e1051a39Sopenharmony_ci test \$-16,%r10 1888e1051a39Sopenharmony_ci mov %r10,$_len 1889e1051a39Sopenharmony_ci jnz .Lcbc_fast_enc_loop 1890e1051a39Sopenharmony_ci mov $_ivp,%rbp # restore ivp 1891e1051a39Sopenharmony_ci mov $s0,0(%rbp) # save ivec 1892e1051a39Sopenharmony_ci mov $s1,4(%rbp) 1893e1051a39Sopenharmony_ci mov $s2,8(%rbp) 1894e1051a39Sopenharmony_ci mov $s3,12(%rbp) 1895e1051a39Sopenharmony_ci 1896e1051a39Sopenharmony_ci jmp .Lcbc_fast_cleanup 1897e1051a39Sopenharmony_ci 1898e1051a39Sopenharmony_ci#----------------------------- DECRYPT -----------------------------# 1899e1051a39Sopenharmony_ci.align 16 1900e1051a39Sopenharmony_ci.LFAST_DECRYPT: 1901e1051a39Sopenharmony_ci cmp $inp,$out 1902e1051a39Sopenharmony_ci je .Lcbc_fast_dec_in_place 1903e1051a39Sopenharmony_ci 1904e1051a39Sopenharmony_ci mov %rbp,$ivec 1905e1051a39Sopenharmony_ci.align 4 1906e1051a39Sopenharmony_ci.Lcbc_fast_dec_loop: 1907e1051a39Sopenharmony_ci mov 0($inp),$s0 # read input 1908e1051a39Sopenharmony_ci mov 4($inp),$s1 1909e1051a39Sopenharmony_ci mov 8($inp),$s2 1910e1051a39Sopenharmony_ci mov 12($inp),$s3 1911e1051a39Sopenharmony_ci mov $keyp,$key # restore key 1912e1051a39Sopenharmony_ci mov $inp,$_inp # if ($verticalspin) save inp 1913e1051a39Sopenharmony_ci 1914e1051a39Sopenharmony_ci call _x86_64_AES_decrypt 1915e1051a39Sopenharmony_ci 1916e1051a39Sopenharmony_ci mov $ivec,%rbp # load ivp 1917e1051a39Sopenharmony_ci mov $_inp,$inp # if ($verticalspin) restore inp 1918e1051a39Sopenharmony_ci mov $_len,%r10 # load len 1919e1051a39Sopenharmony_ci xor 0(%rbp),$s0 # xor iv 1920e1051a39Sopenharmony_ci xor 4(%rbp),$s1 1921e1051a39Sopenharmony_ci xor 8(%rbp),$s2 1922e1051a39Sopenharmony_ci xor 12(%rbp),$s3 1923e1051a39Sopenharmony_ci mov $inp,%rbp # current input, next iv 1924e1051a39Sopenharmony_ci 1925e1051a39Sopenharmony_ci sub \$16,%r10 1926e1051a39Sopenharmony_ci mov %r10,$_len # update len 1927e1051a39Sopenharmony_ci mov %rbp,$ivec # update ivp 1928e1051a39Sopenharmony_ci 1929e1051a39Sopenharmony_ci mov $s0,0($out) # write output 1930e1051a39Sopenharmony_ci mov $s1,4($out) 1931e1051a39Sopenharmony_ci mov $s2,8($out) 1932e1051a39Sopenharmony_ci mov $s3,12($out) 1933e1051a39Sopenharmony_ci 1934e1051a39Sopenharmony_ci lea 16($inp),$inp 1935e1051a39Sopenharmony_ci lea 16($out),$out 1936e1051a39Sopenharmony_ci jnz .Lcbc_fast_dec_loop 1937e1051a39Sopenharmony_ci mov $_ivp,%r12 # load user ivp 1938e1051a39Sopenharmony_ci mov 0(%rbp),%r10 # load iv 1939e1051a39Sopenharmony_ci mov 8(%rbp),%r11 1940e1051a39Sopenharmony_ci mov %r10,0(%r12) # copy back to user 1941e1051a39Sopenharmony_ci mov %r11,8(%r12) 1942e1051a39Sopenharmony_ci jmp .Lcbc_fast_cleanup 1943e1051a39Sopenharmony_ci 1944e1051a39Sopenharmony_ci.align 16 1945e1051a39Sopenharmony_ci.Lcbc_fast_dec_in_place: 1946e1051a39Sopenharmony_ci mov 0(%rbp),%r10 # copy iv to stack 1947e1051a39Sopenharmony_ci mov 8(%rbp),%r11 1948e1051a39Sopenharmony_ci mov %r10,0+$ivec 1949e1051a39Sopenharmony_ci mov %r11,8+$ivec 1950e1051a39Sopenharmony_ci.align 4 1951e1051a39Sopenharmony_ci.Lcbc_fast_dec_in_place_loop: 1952e1051a39Sopenharmony_ci mov 0($inp),$s0 # load input 1953e1051a39Sopenharmony_ci mov 4($inp),$s1 1954e1051a39Sopenharmony_ci mov 8($inp),$s2 1955e1051a39Sopenharmony_ci mov 12($inp),$s3 1956e1051a39Sopenharmony_ci mov $keyp,$key # restore key 1957e1051a39Sopenharmony_ci mov $inp,$_inp # if ($verticalspin) save inp 1958e1051a39Sopenharmony_ci 1959e1051a39Sopenharmony_ci call _x86_64_AES_decrypt 1960e1051a39Sopenharmony_ci 1961e1051a39Sopenharmony_ci mov $_inp,$inp # if ($verticalspin) restore inp 1962e1051a39Sopenharmony_ci mov $_len,%r10 1963e1051a39Sopenharmony_ci xor 0+$ivec,$s0 1964e1051a39Sopenharmony_ci xor 4+$ivec,$s1 1965e1051a39Sopenharmony_ci xor 8+$ivec,$s2 1966e1051a39Sopenharmony_ci xor 12+$ivec,$s3 1967e1051a39Sopenharmony_ci 1968e1051a39Sopenharmony_ci mov 0($inp),%r11 # load input 1969e1051a39Sopenharmony_ci mov 8($inp),%r12 1970e1051a39Sopenharmony_ci sub \$16,%r10 1971e1051a39Sopenharmony_ci jz .Lcbc_fast_dec_in_place_done 1972e1051a39Sopenharmony_ci 1973e1051a39Sopenharmony_ci mov %r11,0+$ivec # copy input to iv 1974e1051a39Sopenharmony_ci mov %r12,8+$ivec 1975e1051a39Sopenharmony_ci 1976e1051a39Sopenharmony_ci mov $s0,0($out) # save output [zaps input] 1977e1051a39Sopenharmony_ci mov $s1,4($out) 1978e1051a39Sopenharmony_ci mov $s2,8($out) 1979e1051a39Sopenharmony_ci mov $s3,12($out) 1980e1051a39Sopenharmony_ci 1981e1051a39Sopenharmony_ci lea 16($inp),$inp 1982e1051a39Sopenharmony_ci lea 16($out),$out 1983e1051a39Sopenharmony_ci mov %r10,$_len 1984e1051a39Sopenharmony_ci jmp .Lcbc_fast_dec_in_place_loop 1985e1051a39Sopenharmony_ci.Lcbc_fast_dec_in_place_done: 1986e1051a39Sopenharmony_ci mov $_ivp,%rdi 1987e1051a39Sopenharmony_ci mov %r11,0(%rdi) # copy iv back to user 1988e1051a39Sopenharmony_ci mov %r12,8(%rdi) 1989e1051a39Sopenharmony_ci 1990e1051a39Sopenharmony_ci mov $s0,0($out) # save output [zaps input] 1991e1051a39Sopenharmony_ci mov $s1,4($out) 1992e1051a39Sopenharmony_ci mov $s2,8($out) 1993e1051a39Sopenharmony_ci mov $s3,12($out) 1994e1051a39Sopenharmony_ci 1995e1051a39Sopenharmony_ci.align 4 1996e1051a39Sopenharmony_ci.Lcbc_fast_cleanup: 1997e1051a39Sopenharmony_ci cmpl \$0,$mark # was the key schedule copied? 1998e1051a39Sopenharmony_ci lea $aes_key,%rdi 1999e1051a39Sopenharmony_ci je .Lcbc_exit 2000e1051a39Sopenharmony_ci mov \$240/8,%ecx 2001e1051a39Sopenharmony_ci xor %rax,%rax 2002e1051a39Sopenharmony_ci .long 0x90AB48F3 # rep stosq 2003e1051a39Sopenharmony_ci 2004e1051a39Sopenharmony_ci jmp .Lcbc_exit 2005e1051a39Sopenharmony_ci 2006e1051a39Sopenharmony_ci#--------------------------- SLOW ROUTINE ---------------------------# 2007e1051a39Sopenharmony_ci.align 16 2008e1051a39Sopenharmony_ci.Lcbc_slow_prologue: 2009e1051a39Sopenharmony_ci.cfi_restore_state 2010e1051a39Sopenharmony_ci # allocate aligned stack frame... 2011e1051a39Sopenharmony_ci lea -88(%rsp),%rbp 2012e1051a39Sopenharmony_ci and \$-64,%rbp 2013e1051a39Sopenharmony_ci # ... just "above" key schedule 2014e1051a39Sopenharmony_ci lea -88-63(%rcx),%r10 2015e1051a39Sopenharmony_ci sub %rbp,%r10 2016e1051a39Sopenharmony_ci neg %r10 2017e1051a39Sopenharmony_ci and \$0x3c0,%r10 2018e1051a39Sopenharmony_ci sub %r10,%rbp 2019e1051a39Sopenharmony_ci 2020e1051a39Sopenharmony_ci xchg %rsp,%rbp 2021e1051a39Sopenharmony_ci.cfi_def_cfa_register %rbp 2022e1051a39Sopenharmony_ci #add \$8,%rsp # reserve for return address! 2023e1051a39Sopenharmony_ci mov %rbp,$_rsp # save %rsp 2024e1051a39Sopenharmony_ci.cfi_cfa_expression $_rsp,deref,+64 2025e1051a39Sopenharmony_ci.Lcbc_slow_body: 2026e1051a39Sopenharmony_ci #mov %rdi,$_inp # save copy of inp 2027e1051a39Sopenharmony_ci #mov %rsi,$_out # save copy of out 2028e1051a39Sopenharmony_ci #mov %rdx,$_len # save copy of len 2029e1051a39Sopenharmony_ci #mov %rcx,$_key # save copy of key 2030e1051a39Sopenharmony_ci mov %r8,$_ivp # save copy of ivp 2031e1051a39Sopenharmony_ci mov %r8,%rbp # rearrange input arguments 2032e1051a39Sopenharmony_ci mov %r9,%rbx 2033e1051a39Sopenharmony_ci mov %rsi,$out 2034e1051a39Sopenharmony_ci mov %rdi,$inp 2035e1051a39Sopenharmony_ci mov %rcx,$key 2036e1051a39Sopenharmony_ci mov %rdx,%r10 2037e1051a39Sopenharmony_ci 2038e1051a39Sopenharmony_ci mov 240($key),%eax 2039e1051a39Sopenharmony_ci mov $key,$keyp # save key pointer 2040e1051a39Sopenharmony_ci shl \$4,%eax 2041e1051a39Sopenharmony_ci lea ($key,%rax),%rax 2042e1051a39Sopenharmony_ci mov %rax,$keyend 2043e1051a39Sopenharmony_ci 2044e1051a39Sopenharmony_ci # pick Te4 copy which can't "overlap" with stack frame or key schedule 2045e1051a39Sopenharmony_ci lea 2048($sbox),$sbox 2046e1051a39Sopenharmony_ci lea 768-8(%rsp),%rax 2047e1051a39Sopenharmony_ci sub $sbox,%rax 2048e1051a39Sopenharmony_ci and \$0x300,%rax 2049e1051a39Sopenharmony_ci lea ($sbox,%rax),$sbox 2050e1051a39Sopenharmony_ci 2051e1051a39Sopenharmony_ci cmp \$0,%rbx 2052e1051a39Sopenharmony_ci je .LSLOW_DECRYPT 2053e1051a39Sopenharmony_ci 2054e1051a39Sopenharmony_ci#--------------------------- SLOW ENCRYPT ---------------------------# 2055e1051a39Sopenharmony_ci test \$-16,%r10 # check upon length 2056e1051a39Sopenharmony_ci mov 0(%rbp),$s0 # load iv 2057e1051a39Sopenharmony_ci mov 4(%rbp),$s1 2058e1051a39Sopenharmony_ci mov 8(%rbp),$s2 2059e1051a39Sopenharmony_ci mov 12(%rbp),$s3 2060e1051a39Sopenharmony_ci jz .Lcbc_slow_enc_tail # short input... 2061e1051a39Sopenharmony_ci 2062e1051a39Sopenharmony_ci.align 4 2063e1051a39Sopenharmony_ci.Lcbc_slow_enc_loop: 2064e1051a39Sopenharmony_ci xor 0($inp),$s0 2065e1051a39Sopenharmony_ci xor 4($inp),$s1 2066e1051a39Sopenharmony_ci xor 8($inp),$s2 2067e1051a39Sopenharmony_ci xor 12($inp),$s3 2068e1051a39Sopenharmony_ci mov $keyp,$key # restore key 2069e1051a39Sopenharmony_ci mov $inp,$_inp # save inp 2070e1051a39Sopenharmony_ci mov $out,$_out # save out 2071e1051a39Sopenharmony_ci mov %r10,$_len # save len 2072e1051a39Sopenharmony_ci 2073e1051a39Sopenharmony_ci call _x86_64_AES_encrypt_compact 2074e1051a39Sopenharmony_ci 2075e1051a39Sopenharmony_ci mov $_inp,$inp # restore inp 2076e1051a39Sopenharmony_ci mov $_out,$out # restore out 2077e1051a39Sopenharmony_ci mov $_len,%r10 # restore len 2078e1051a39Sopenharmony_ci mov $s0,0($out) 2079e1051a39Sopenharmony_ci mov $s1,4($out) 2080e1051a39Sopenharmony_ci mov $s2,8($out) 2081e1051a39Sopenharmony_ci mov $s3,12($out) 2082e1051a39Sopenharmony_ci 2083e1051a39Sopenharmony_ci lea 16($inp),$inp 2084e1051a39Sopenharmony_ci lea 16($out),$out 2085e1051a39Sopenharmony_ci sub \$16,%r10 2086e1051a39Sopenharmony_ci test \$-16,%r10 2087e1051a39Sopenharmony_ci jnz .Lcbc_slow_enc_loop 2088e1051a39Sopenharmony_ci test \$15,%r10 2089e1051a39Sopenharmony_ci jnz .Lcbc_slow_enc_tail 2090e1051a39Sopenharmony_ci mov $_ivp,%rbp # restore ivp 2091e1051a39Sopenharmony_ci mov $s0,0(%rbp) # save ivec 2092e1051a39Sopenharmony_ci mov $s1,4(%rbp) 2093e1051a39Sopenharmony_ci mov $s2,8(%rbp) 2094e1051a39Sopenharmony_ci mov $s3,12(%rbp) 2095e1051a39Sopenharmony_ci 2096e1051a39Sopenharmony_ci jmp .Lcbc_exit 2097e1051a39Sopenharmony_ci 2098e1051a39Sopenharmony_ci.align 4 2099e1051a39Sopenharmony_ci.Lcbc_slow_enc_tail: 2100e1051a39Sopenharmony_ci mov %rax,%r11 2101e1051a39Sopenharmony_ci mov %rcx,%r12 2102e1051a39Sopenharmony_ci mov %r10,%rcx 2103e1051a39Sopenharmony_ci mov $inp,%rsi 2104e1051a39Sopenharmony_ci mov $out,%rdi 2105e1051a39Sopenharmony_ci .long 0x9066A4F3 # rep movsb 2106e1051a39Sopenharmony_ci mov \$16,%rcx # zero tail 2107e1051a39Sopenharmony_ci sub %r10,%rcx 2108e1051a39Sopenharmony_ci xor %rax,%rax 2109e1051a39Sopenharmony_ci .long 0x9066AAF3 # rep stosb 2110e1051a39Sopenharmony_ci mov $out,$inp # this is not a mistake! 2111e1051a39Sopenharmony_ci mov \$16,%r10 # len=16 2112e1051a39Sopenharmony_ci mov %r11,%rax 2113e1051a39Sopenharmony_ci mov %r12,%rcx 2114e1051a39Sopenharmony_ci jmp .Lcbc_slow_enc_loop # one more spin... 2115e1051a39Sopenharmony_ci#--------------------------- SLOW DECRYPT ---------------------------# 2116e1051a39Sopenharmony_ci.align 16 2117e1051a39Sopenharmony_ci.LSLOW_DECRYPT: 2118e1051a39Sopenharmony_ci shr \$3,%rax 2119e1051a39Sopenharmony_ci add %rax,$sbox # recall "magic" constants! 2120e1051a39Sopenharmony_ci 2121e1051a39Sopenharmony_ci mov 0(%rbp),%r11 # copy iv to stack 2122e1051a39Sopenharmony_ci mov 8(%rbp),%r12 2123e1051a39Sopenharmony_ci mov %r11,0+$ivec 2124e1051a39Sopenharmony_ci mov %r12,8+$ivec 2125e1051a39Sopenharmony_ci 2126e1051a39Sopenharmony_ci.align 4 2127e1051a39Sopenharmony_ci.Lcbc_slow_dec_loop: 2128e1051a39Sopenharmony_ci mov 0($inp),$s0 # load input 2129e1051a39Sopenharmony_ci mov 4($inp),$s1 2130e1051a39Sopenharmony_ci mov 8($inp),$s2 2131e1051a39Sopenharmony_ci mov 12($inp),$s3 2132e1051a39Sopenharmony_ci mov $keyp,$key # restore key 2133e1051a39Sopenharmony_ci mov $inp,$_inp # save inp 2134e1051a39Sopenharmony_ci mov $out,$_out # save out 2135e1051a39Sopenharmony_ci mov %r10,$_len # save len 2136e1051a39Sopenharmony_ci 2137e1051a39Sopenharmony_ci call _x86_64_AES_decrypt_compact 2138e1051a39Sopenharmony_ci 2139e1051a39Sopenharmony_ci mov $_inp,$inp # restore inp 2140e1051a39Sopenharmony_ci mov $_out,$out # restore out 2141e1051a39Sopenharmony_ci mov $_len,%r10 2142e1051a39Sopenharmony_ci xor 0+$ivec,$s0 2143e1051a39Sopenharmony_ci xor 4+$ivec,$s1 2144e1051a39Sopenharmony_ci xor 8+$ivec,$s2 2145e1051a39Sopenharmony_ci xor 12+$ivec,$s3 2146e1051a39Sopenharmony_ci 2147e1051a39Sopenharmony_ci mov 0($inp),%r11 # load input 2148e1051a39Sopenharmony_ci mov 8($inp),%r12 2149e1051a39Sopenharmony_ci sub \$16,%r10 2150e1051a39Sopenharmony_ci jc .Lcbc_slow_dec_partial 2151e1051a39Sopenharmony_ci jz .Lcbc_slow_dec_done 2152e1051a39Sopenharmony_ci 2153e1051a39Sopenharmony_ci mov %r11,0+$ivec # copy input to iv 2154e1051a39Sopenharmony_ci mov %r12,8+$ivec 2155e1051a39Sopenharmony_ci 2156e1051a39Sopenharmony_ci mov $s0,0($out) # save output [can zap input] 2157e1051a39Sopenharmony_ci mov $s1,4($out) 2158e1051a39Sopenharmony_ci mov $s2,8($out) 2159e1051a39Sopenharmony_ci mov $s3,12($out) 2160e1051a39Sopenharmony_ci 2161e1051a39Sopenharmony_ci lea 16($inp),$inp 2162e1051a39Sopenharmony_ci lea 16($out),$out 2163e1051a39Sopenharmony_ci jmp .Lcbc_slow_dec_loop 2164e1051a39Sopenharmony_ci.Lcbc_slow_dec_done: 2165e1051a39Sopenharmony_ci mov $_ivp,%rdi 2166e1051a39Sopenharmony_ci mov %r11,0(%rdi) # copy iv back to user 2167e1051a39Sopenharmony_ci mov %r12,8(%rdi) 2168e1051a39Sopenharmony_ci 2169e1051a39Sopenharmony_ci mov $s0,0($out) # save output [can zap input] 2170e1051a39Sopenharmony_ci mov $s1,4($out) 2171e1051a39Sopenharmony_ci mov $s2,8($out) 2172e1051a39Sopenharmony_ci mov $s3,12($out) 2173e1051a39Sopenharmony_ci 2174e1051a39Sopenharmony_ci jmp .Lcbc_exit 2175e1051a39Sopenharmony_ci 2176e1051a39Sopenharmony_ci.align 4 2177e1051a39Sopenharmony_ci.Lcbc_slow_dec_partial: 2178e1051a39Sopenharmony_ci mov $_ivp,%rdi 2179e1051a39Sopenharmony_ci mov %r11,0(%rdi) # copy iv back to user 2180e1051a39Sopenharmony_ci mov %r12,8(%rdi) 2181e1051a39Sopenharmony_ci 2182e1051a39Sopenharmony_ci mov $s0,0+$ivec # save output to stack 2183e1051a39Sopenharmony_ci mov $s1,4+$ivec 2184e1051a39Sopenharmony_ci mov $s2,8+$ivec 2185e1051a39Sopenharmony_ci mov $s3,12+$ivec 2186e1051a39Sopenharmony_ci 2187e1051a39Sopenharmony_ci mov $out,%rdi 2188e1051a39Sopenharmony_ci lea $ivec,%rsi 2189e1051a39Sopenharmony_ci lea 16(%r10),%rcx 2190e1051a39Sopenharmony_ci .long 0x9066A4F3 # rep movsb 2191e1051a39Sopenharmony_ci jmp .Lcbc_exit 2192e1051a39Sopenharmony_ci 2193e1051a39Sopenharmony_ci.align 16 2194e1051a39Sopenharmony_ci.Lcbc_exit: 2195e1051a39Sopenharmony_ci mov $_rsp,%rsi 2196e1051a39Sopenharmony_ci.cfi_def_cfa %rsi,64 2197e1051a39Sopenharmony_ci mov (%rsi),%r15 2198e1051a39Sopenharmony_ci.cfi_restore %r15 2199e1051a39Sopenharmony_ci mov 8(%rsi),%r14 2200e1051a39Sopenharmony_ci.cfi_restore %r14 2201e1051a39Sopenharmony_ci mov 16(%rsi),%r13 2202e1051a39Sopenharmony_ci.cfi_restore %r13 2203e1051a39Sopenharmony_ci mov 24(%rsi),%r12 2204e1051a39Sopenharmony_ci.cfi_restore %r12 2205e1051a39Sopenharmony_ci mov 32(%rsi),%rbp 2206e1051a39Sopenharmony_ci.cfi_restore %rbp 2207e1051a39Sopenharmony_ci mov 40(%rsi),%rbx 2208e1051a39Sopenharmony_ci.cfi_restore %rbx 2209e1051a39Sopenharmony_ci lea 48(%rsi),%rsp 2210e1051a39Sopenharmony_ci.cfi_def_cfa %rsp,16 2211e1051a39Sopenharmony_ci.Lcbc_popfq: 2212e1051a39Sopenharmony_ci popfq 2213e1051a39Sopenharmony_ci# This could be .cfi_pop 49, but libunwind fails on registers it does not 2214e1051a39Sopenharmony_ci# recognize. See https://bugzilla.redhat.com/show_bug.cgi?id=217087. 2215e1051a39Sopenharmony_ci.cfi_adjust_cfa_offset -8 2216e1051a39Sopenharmony_ci.Lcbc_epilogue: 2217e1051a39Sopenharmony_ci ret 2218e1051a39Sopenharmony_ci.cfi_endproc 2219e1051a39Sopenharmony_ci.size AES_cbc_encrypt,.-AES_cbc_encrypt 2220e1051a39Sopenharmony_ci___ 2221e1051a39Sopenharmony_ci} 2222e1051a39Sopenharmony_ci 2223e1051a39Sopenharmony_ci$code.=<<___; 2224e1051a39Sopenharmony_ci.align 64 2225e1051a39Sopenharmony_ci.LAES_Te: 2226e1051a39Sopenharmony_ci___ 2227e1051a39Sopenharmony_ci &_data_word(0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6); 2228e1051a39Sopenharmony_ci &_data_word(0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591); 2229e1051a39Sopenharmony_ci &_data_word(0x50303060, 0x03010102, 0xa96767ce, 0x7d2b2b56); 2230e1051a39Sopenharmony_ci &_data_word(0x19fefee7, 0x62d7d7b5, 0xe6abab4d, 0x9a7676ec); 2231e1051a39Sopenharmony_ci &_data_word(0x45caca8f, 0x9d82821f, 0x40c9c989, 0x877d7dfa); 2232e1051a39Sopenharmony_ci &_data_word(0x15fafaef, 0xeb5959b2, 0xc947478e, 0x0bf0f0fb); 2233e1051a39Sopenharmony_ci &_data_word(0xecadad41, 0x67d4d4b3, 0xfda2a25f, 0xeaafaf45); 2234e1051a39Sopenharmony_ci &_data_word(0xbf9c9c23, 0xf7a4a453, 0x967272e4, 0x5bc0c09b); 2235e1051a39Sopenharmony_ci &_data_word(0xc2b7b775, 0x1cfdfde1, 0xae93933d, 0x6a26264c); 2236e1051a39Sopenharmony_ci &_data_word(0x5a36366c, 0x413f3f7e, 0x02f7f7f5, 0x4fcccc83); 2237e1051a39Sopenharmony_ci &_data_word(0x5c343468, 0xf4a5a551, 0x34e5e5d1, 0x08f1f1f9); 2238e1051a39Sopenharmony_ci &_data_word(0x937171e2, 0x73d8d8ab, 0x53313162, 0x3f15152a); 2239e1051a39Sopenharmony_ci &_data_word(0x0c040408, 0x52c7c795, 0x65232346, 0x5ec3c39d); 2240e1051a39Sopenharmony_ci &_data_word(0x28181830, 0xa1969637, 0x0f05050a, 0xb59a9a2f); 2241e1051a39Sopenharmony_ci &_data_word(0x0907070e, 0x36121224, 0x9b80801b, 0x3de2e2df); 2242e1051a39Sopenharmony_ci &_data_word(0x26ebebcd, 0x6927274e, 0xcdb2b27f, 0x9f7575ea); 2243e1051a39Sopenharmony_ci &_data_word(0x1b090912, 0x9e83831d, 0x742c2c58, 0x2e1a1a34); 2244e1051a39Sopenharmony_ci &_data_word(0x2d1b1b36, 0xb26e6edc, 0xee5a5ab4, 0xfba0a05b); 2245e1051a39Sopenharmony_ci &_data_word(0xf65252a4, 0x4d3b3b76, 0x61d6d6b7, 0xceb3b37d); 2246e1051a39Sopenharmony_ci &_data_word(0x7b292952, 0x3ee3e3dd, 0x712f2f5e, 0x97848413); 2247e1051a39Sopenharmony_ci &_data_word(0xf55353a6, 0x68d1d1b9, 0x00000000, 0x2cededc1); 2248e1051a39Sopenharmony_ci &_data_word(0x60202040, 0x1ffcfce3, 0xc8b1b179, 0xed5b5bb6); 2249e1051a39Sopenharmony_ci &_data_word(0xbe6a6ad4, 0x46cbcb8d, 0xd9bebe67, 0x4b393972); 2250e1051a39Sopenharmony_ci &_data_word(0xde4a4a94, 0xd44c4c98, 0xe85858b0, 0x4acfcf85); 2251e1051a39Sopenharmony_ci &_data_word(0x6bd0d0bb, 0x2aefefc5, 0xe5aaaa4f, 0x16fbfbed); 2252e1051a39Sopenharmony_ci &_data_word(0xc5434386, 0xd74d4d9a, 0x55333366, 0x94858511); 2253e1051a39Sopenharmony_ci &_data_word(0xcf45458a, 0x10f9f9e9, 0x06020204, 0x817f7ffe); 2254e1051a39Sopenharmony_ci &_data_word(0xf05050a0, 0x443c3c78, 0xba9f9f25, 0xe3a8a84b); 2255e1051a39Sopenharmony_ci &_data_word(0xf35151a2, 0xfea3a35d, 0xc0404080, 0x8a8f8f05); 2256e1051a39Sopenharmony_ci &_data_word(0xad92923f, 0xbc9d9d21, 0x48383870, 0x04f5f5f1); 2257e1051a39Sopenharmony_ci &_data_word(0xdfbcbc63, 0xc1b6b677, 0x75dadaaf, 0x63212142); 2258e1051a39Sopenharmony_ci &_data_word(0x30101020, 0x1affffe5, 0x0ef3f3fd, 0x6dd2d2bf); 2259e1051a39Sopenharmony_ci &_data_word(0x4ccdcd81, 0x140c0c18, 0x35131326, 0x2fececc3); 2260e1051a39Sopenharmony_ci &_data_word(0xe15f5fbe, 0xa2979735, 0xcc444488, 0x3917172e); 2261e1051a39Sopenharmony_ci &_data_word(0x57c4c493, 0xf2a7a755, 0x827e7efc, 0x473d3d7a); 2262e1051a39Sopenharmony_ci &_data_word(0xac6464c8, 0xe75d5dba, 0x2b191932, 0x957373e6); 2263e1051a39Sopenharmony_ci &_data_word(0xa06060c0, 0x98818119, 0xd14f4f9e, 0x7fdcdca3); 2264e1051a39Sopenharmony_ci &_data_word(0x66222244, 0x7e2a2a54, 0xab90903b, 0x8388880b); 2265e1051a39Sopenharmony_ci &_data_word(0xca46468c, 0x29eeeec7, 0xd3b8b86b, 0x3c141428); 2266e1051a39Sopenharmony_ci &_data_word(0x79dedea7, 0xe25e5ebc, 0x1d0b0b16, 0x76dbdbad); 2267e1051a39Sopenharmony_ci &_data_word(0x3be0e0db, 0x56323264, 0x4e3a3a74, 0x1e0a0a14); 2268e1051a39Sopenharmony_ci &_data_word(0xdb494992, 0x0a06060c, 0x6c242448, 0xe45c5cb8); 2269e1051a39Sopenharmony_ci &_data_word(0x5dc2c29f, 0x6ed3d3bd, 0xefacac43, 0xa66262c4); 2270e1051a39Sopenharmony_ci &_data_word(0xa8919139, 0xa4959531, 0x37e4e4d3, 0x8b7979f2); 2271e1051a39Sopenharmony_ci &_data_word(0x32e7e7d5, 0x43c8c88b, 0x5937376e, 0xb76d6dda); 2272e1051a39Sopenharmony_ci &_data_word(0x8c8d8d01, 0x64d5d5b1, 0xd24e4e9c, 0xe0a9a949); 2273e1051a39Sopenharmony_ci &_data_word(0xb46c6cd8, 0xfa5656ac, 0x07f4f4f3, 0x25eaeacf); 2274e1051a39Sopenharmony_ci &_data_word(0xaf6565ca, 0x8e7a7af4, 0xe9aeae47, 0x18080810); 2275e1051a39Sopenharmony_ci &_data_word(0xd5baba6f, 0x887878f0, 0x6f25254a, 0x722e2e5c); 2276e1051a39Sopenharmony_ci &_data_word(0x241c1c38, 0xf1a6a657, 0xc7b4b473, 0x51c6c697); 2277e1051a39Sopenharmony_ci &_data_word(0x23e8e8cb, 0x7cdddda1, 0x9c7474e8, 0x211f1f3e); 2278e1051a39Sopenharmony_ci &_data_word(0xdd4b4b96, 0xdcbdbd61, 0x868b8b0d, 0x858a8a0f); 2279e1051a39Sopenharmony_ci &_data_word(0x907070e0, 0x423e3e7c, 0xc4b5b571, 0xaa6666cc); 2280e1051a39Sopenharmony_ci &_data_word(0xd8484890, 0x05030306, 0x01f6f6f7, 0x120e0e1c); 2281e1051a39Sopenharmony_ci &_data_word(0xa36161c2, 0x5f35356a, 0xf95757ae, 0xd0b9b969); 2282e1051a39Sopenharmony_ci &_data_word(0x91868617, 0x58c1c199, 0x271d1d3a, 0xb99e9e27); 2283e1051a39Sopenharmony_ci &_data_word(0x38e1e1d9, 0x13f8f8eb, 0xb398982b, 0x33111122); 2284e1051a39Sopenharmony_ci &_data_word(0xbb6969d2, 0x70d9d9a9, 0x898e8e07, 0xa7949433); 2285e1051a39Sopenharmony_ci &_data_word(0xb69b9b2d, 0x221e1e3c, 0x92878715, 0x20e9e9c9); 2286e1051a39Sopenharmony_ci &_data_word(0x49cece87, 0xff5555aa, 0x78282850, 0x7adfdfa5); 2287e1051a39Sopenharmony_ci &_data_word(0x8f8c8c03, 0xf8a1a159, 0x80898909, 0x170d0d1a); 2288e1051a39Sopenharmony_ci &_data_word(0xdabfbf65, 0x31e6e6d7, 0xc6424284, 0xb86868d0); 2289e1051a39Sopenharmony_ci &_data_word(0xc3414182, 0xb0999929, 0x772d2d5a, 0x110f0f1e); 2290e1051a39Sopenharmony_ci &_data_word(0xcbb0b07b, 0xfc5454a8, 0xd6bbbb6d, 0x3a16162c); 2291e1051a39Sopenharmony_ci 2292e1051a39Sopenharmony_ci#Te4 # four copies of Te4 to choose from to avoid L1 aliasing 2293e1051a39Sopenharmony_ci &data_byte(0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5); 2294e1051a39Sopenharmony_ci &data_byte(0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76); 2295e1051a39Sopenharmony_ci &data_byte(0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0); 2296e1051a39Sopenharmony_ci &data_byte(0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0); 2297e1051a39Sopenharmony_ci &data_byte(0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc); 2298e1051a39Sopenharmony_ci &data_byte(0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15); 2299e1051a39Sopenharmony_ci &data_byte(0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a); 2300e1051a39Sopenharmony_ci &data_byte(0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75); 2301e1051a39Sopenharmony_ci &data_byte(0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0); 2302e1051a39Sopenharmony_ci &data_byte(0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84); 2303e1051a39Sopenharmony_ci &data_byte(0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b); 2304e1051a39Sopenharmony_ci &data_byte(0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf); 2305e1051a39Sopenharmony_ci &data_byte(0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85); 2306e1051a39Sopenharmony_ci &data_byte(0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8); 2307e1051a39Sopenharmony_ci &data_byte(0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5); 2308e1051a39Sopenharmony_ci &data_byte(0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2); 2309e1051a39Sopenharmony_ci &data_byte(0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17); 2310e1051a39Sopenharmony_ci &data_byte(0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73); 2311e1051a39Sopenharmony_ci &data_byte(0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88); 2312e1051a39Sopenharmony_ci &data_byte(0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb); 2313e1051a39Sopenharmony_ci &data_byte(0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c); 2314e1051a39Sopenharmony_ci &data_byte(0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79); 2315e1051a39Sopenharmony_ci &data_byte(0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9); 2316e1051a39Sopenharmony_ci &data_byte(0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08); 2317e1051a39Sopenharmony_ci &data_byte(0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6); 2318e1051a39Sopenharmony_ci &data_byte(0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a); 2319e1051a39Sopenharmony_ci &data_byte(0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e); 2320e1051a39Sopenharmony_ci &data_byte(0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e); 2321e1051a39Sopenharmony_ci &data_byte(0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94); 2322e1051a39Sopenharmony_ci &data_byte(0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf); 2323e1051a39Sopenharmony_ci &data_byte(0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68); 2324e1051a39Sopenharmony_ci &data_byte(0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16); 2325e1051a39Sopenharmony_ci 2326e1051a39Sopenharmony_ci &data_byte(0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5); 2327e1051a39Sopenharmony_ci &data_byte(0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76); 2328e1051a39Sopenharmony_ci &data_byte(0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0); 2329e1051a39Sopenharmony_ci &data_byte(0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0); 2330e1051a39Sopenharmony_ci &data_byte(0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc); 2331e1051a39Sopenharmony_ci &data_byte(0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15); 2332e1051a39Sopenharmony_ci &data_byte(0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a); 2333e1051a39Sopenharmony_ci &data_byte(0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75); 2334e1051a39Sopenharmony_ci &data_byte(0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0); 2335e1051a39Sopenharmony_ci &data_byte(0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84); 2336e1051a39Sopenharmony_ci &data_byte(0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b); 2337e1051a39Sopenharmony_ci &data_byte(0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf); 2338e1051a39Sopenharmony_ci &data_byte(0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85); 2339e1051a39Sopenharmony_ci &data_byte(0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8); 2340e1051a39Sopenharmony_ci &data_byte(0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5); 2341e1051a39Sopenharmony_ci &data_byte(0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2); 2342e1051a39Sopenharmony_ci &data_byte(0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17); 2343e1051a39Sopenharmony_ci &data_byte(0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73); 2344e1051a39Sopenharmony_ci &data_byte(0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88); 2345e1051a39Sopenharmony_ci &data_byte(0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb); 2346e1051a39Sopenharmony_ci &data_byte(0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c); 2347e1051a39Sopenharmony_ci &data_byte(0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79); 2348e1051a39Sopenharmony_ci &data_byte(0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9); 2349e1051a39Sopenharmony_ci &data_byte(0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08); 2350e1051a39Sopenharmony_ci &data_byte(0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6); 2351e1051a39Sopenharmony_ci &data_byte(0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a); 2352e1051a39Sopenharmony_ci &data_byte(0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e); 2353e1051a39Sopenharmony_ci &data_byte(0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e); 2354e1051a39Sopenharmony_ci &data_byte(0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94); 2355e1051a39Sopenharmony_ci &data_byte(0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf); 2356e1051a39Sopenharmony_ci &data_byte(0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68); 2357e1051a39Sopenharmony_ci &data_byte(0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16); 2358e1051a39Sopenharmony_ci 2359e1051a39Sopenharmony_ci &data_byte(0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5); 2360e1051a39Sopenharmony_ci &data_byte(0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76); 2361e1051a39Sopenharmony_ci &data_byte(0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0); 2362e1051a39Sopenharmony_ci &data_byte(0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0); 2363e1051a39Sopenharmony_ci &data_byte(0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc); 2364e1051a39Sopenharmony_ci &data_byte(0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15); 2365e1051a39Sopenharmony_ci &data_byte(0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a); 2366e1051a39Sopenharmony_ci &data_byte(0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75); 2367e1051a39Sopenharmony_ci &data_byte(0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0); 2368e1051a39Sopenharmony_ci &data_byte(0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84); 2369e1051a39Sopenharmony_ci &data_byte(0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b); 2370e1051a39Sopenharmony_ci &data_byte(0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf); 2371e1051a39Sopenharmony_ci &data_byte(0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85); 2372e1051a39Sopenharmony_ci &data_byte(0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8); 2373e1051a39Sopenharmony_ci &data_byte(0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5); 2374e1051a39Sopenharmony_ci &data_byte(0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2); 2375e1051a39Sopenharmony_ci &data_byte(0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17); 2376e1051a39Sopenharmony_ci &data_byte(0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73); 2377e1051a39Sopenharmony_ci &data_byte(0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88); 2378e1051a39Sopenharmony_ci &data_byte(0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb); 2379e1051a39Sopenharmony_ci &data_byte(0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c); 2380e1051a39Sopenharmony_ci &data_byte(0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79); 2381e1051a39Sopenharmony_ci &data_byte(0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9); 2382e1051a39Sopenharmony_ci &data_byte(0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08); 2383e1051a39Sopenharmony_ci &data_byte(0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6); 2384e1051a39Sopenharmony_ci &data_byte(0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a); 2385e1051a39Sopenharmony_ci &data_byte(0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e); 2386e1051a39Sopenharmony_ci &data_byte(0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e); 2387e1051a39Sopenharmony_ci &data_byte(0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94); 2388e1051a39Sopenharmony_ci &data_byte(0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf); 2389e1051a39Sopenharmony_ci &data_byte(0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68); 2390e1051a39Sopenharmony_ci &data_byte(0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16); 2391e1051a39Sopenharmony_ci 2392e1051a39Sopenharmony_ci &data_byte(0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5); 2393e1051a39Sopenharmony_ci &data_byte(0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76); 2394e1051a39Sopenharmony_ci &data_byte(0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0); 2395e1051a39Sopenharmony_ci &data_byte(0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0); 2396e1051a39Sopenharmony_ci &data_byte(0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc); 2397e1051a39Sopenharmony_ci &data_byte(0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15); 2398e1051a39Sopenharmony_ci &data_byte(0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a); 2399e1051a39Sopenharmony_ci &data_byte(0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75); 2400e1051a39Sopenharmony_ci &data_byte(0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0); 2401e1051a39Sopenharmony_ci &data_byte(0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84); 2402e1051a39Sopenharmony_ci &data_byte(0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b); 2403e1051a39Sopenharmony_ci &data_byte(0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf); 2404e1051a39Sopenharmony_ci &data_byte(0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85); 2405e1051a39Sopenharmony_ci &data_byte(0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8); 2406e1051a39Sopenharmony_ci &data_byte(0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5); 2407e1051a39Sopenharmony_ci &data_byte(0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2); 2408e1051a39Sopenharmony_ci &data_byte(0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17); 2409e1051a39Sopenharmony_ci &data_byte(0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73); 2410e1051a39Sopenharmony_ci &data_byte(0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88); 2411e1051a39Sopenharmony_ci &data_byte(0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb); 2412e1051a39Sopenharmony_ci &data_byte(0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c); 2413e1051a39Sopenharmony_ci &data_byte(0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79); 2414e1051a39Sopenharmony_ci &data_byte(0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9); 2415e1051a39Sopenharmony_ci &data_byte(0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08); 2416e1051a39Sopenharmony_ci &data_byte(0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6); 2417e1051a39Sopenharmony_ci &data_byte(0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a); 2418e1051a39Sopenharmony_ci &data_byte(0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e); 2419e1051a39Sopenharmony_ci &data_byte(0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e); 2420e1051a39Sopenharmony_ci &data_byte(0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94); 2421e1051a39Sopenharmony_ci &data_byte(0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf); 2422e1051a39Sopenharmony_ci &data_byte(0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68); 2423e1051a39Sopenharmony_ci &data_byte(0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16); 2424e1051a39Sopenharmony_ci#rcon: 2425e1051a39Sopenharmony_ci$code.=<<___; 2426e1051a39Sopenharmony_ci .long 0x00000001, 0x00000002, 0x00000004, 0x00000008 2427e1051a39Sopenharmony_ci .long 0x00000010, 0x00000020, 0x00000040, 0x00000080 2428e1051a39Sopenharmony_ci .long 0x0000001b, 0x00000036, 0x80808080, 0x80808080 2429e1051a39Sopenharmony_ci .long 0xfefefefe, 0xfefefefe, 0x1b1b1b1b, 0x1b1b1b1b 2430e1051a39Sopenharmony_ci___ 2431e1051a39Sopenharmony_ci$code.=<<___; 2432e1051a39Sopenharmony_ci.align 64 2433e1051a39Sopenharmony_ci.LAES_Td: 2434e1051a39Sopenharmony_ci___ 2435e1051a39Sopenharmony_ci &_data_word(0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a); 2436e1051a39Sopenharmony_ci &_data_word(0xcb6bab3b, 0xf1459d1f, 0xab58faac, 0x9303e34b); 2437e1051a39Sopenharmony_ci &_data_word(0x55fa3020, 0xf66d76ad, 0x9176cc88, 0x254c02f5); 2438e1051a39Sopenharmony_ci &_data_word(0xfcd7e54f, 0xd7cb2ac5, 0x80443526, 0x8fa362b5); 2439e1051a39Sopenharmony_ci &_data_word(0x495ab1de, 0x671bba25, 0x980eea45, 0xe1c0fe5d); 2440e1051a39Sopenharmony_ci &_data_word(0x02752fc3, 0x12f04c81, 0xa397468d, 0xc6f9d36b); 2441e1051a39Sopenharmony_ci &_data_word(0xe75f8f03, 0x959c9215, 0xeb7a6dbf, 0xda595295); 2442e1051a39Sopenharmony_ci &_data_word(0x2d83bed4, 0xd3217458, 0x2969e049, 0x44c8c98e); 2443e1051a39Sopenharmony_ci &_data_word(0x6a89c275, 0x78798ef4, 0x6b3e5899, 0xdd71b927); 2444e1051a39Sopenharmony_ci &_data_word(0xb64fe1be, 0x17ad88f0, 0x66ac20c9, 0xb43ace7d); 2445e1051a39Sopenharmony_ci &_data_word(0x184adf63, 0x82311ae5, 0x60335197, 0x457f5362); 2446e1051a39Sopenharmony_ci &_data_word(0xe07764b1, 0x84ae6bbb, 0x1ca081fe, 0x942b08f9); 2447e1051a39Sopenharmony_ci &_data_word(0x58684870, 0x19fd458f, 0x876cde94, 0xb7f87b52); 2448e1051a39Sopenharmony_ci &_data_word(0x23d373ab, 0xe2024b72, 0x578f1fe3, 0x2aab5566); 2449e1051a39Sopenharmony_ci &_data_word(0x0728ebb2, 0x03c2b52f, 0x9a7bc586, 0xa50837d3); 2450e1051a39Sopenharmony_ci &_data_word(0xf2872830, 0xb2a5bf23, 0xba6a0302, 0x5c8216ed); 2451e1051a39Sopenharmony_ci &_data_word(0x2b1ccf8a, 0x92b479a7, 0xf0f207f3, 0xa1e2694e); 2452e1051a39Sopenharmony_ci &_data_word(0xcdf4da65, 0xd5be0506, 0x1f6234d1, 0x8afea6c4); 2453e1051a39Sopenharmony_ci &_data_word(0x9d532e34, 0xa055f3a2, 0x32e18a05, 0x75ebf6a4); 2454e1051a39Sopenharmony_ci &_data_word(0x39ec830b, 0xaaef6040, 0x069f715e, 0x51106ebd); 2455e1051a39Sopenharmony_ci &_data_word(0xf98a213e, 0x3d06dd96, 0xae053edd, 0x46bde64d); 2456e1051a39Sopenharmony_ci &_data_word(0xb58d5491, 0x055dc471, 0x6fd40604, 0xff155060); 2457e1051a39Sopenharmony_ci &_data_word(0x24fb9819, 0x97e9bdd6, 0xcc434089, 0x779ed967); 2458e1051a39Sopenharmony_ci &_data_word(0xbd42e8b0, 0x888b8907, 0x385b19e7, 0xdbeec879); 2459e1051a39Sopenharmony_ci &_data_word(0x470a7ca1, 0xe90f427c, 0xc91e84f8, 0x00000000); 2460e1051a39Sopenharmony_ci &_data_word(0x83868009, 0x48ed2b32, 0xac70111e, 0x4e725a6c); 2461e1051a39Sopenharmony_ci &_data_word(0xfbff0efd, 0x5638850f, 0x1ed5ae3d, 0x27392d36); 2462e1051a39Sopenharmony_ci &_data_word(0x64d90f0a, 0x21a65c68, 0xd1545b9b, 0x3a2e3624); 2463e1051a39Sopenharmony_ci &_data_word(0xb1670a0c, 0x0fe75793, 0xd296eeb4, 0x9e919b1b); 2464e1051a39Sopenharmony_ci &_data_word(0x4fc5c080, 0xa220dc61, 0x694b775a, 0x161a121c); 2465e1051a39Sopenharmony_ci &_data_word(0x0aba93e2, 0xe52aa0c0, 0x43e0223c, 0x1d171b12); 2466e1051a39Sopenharmony_ci &_data_word(0x0b0d090e, 0xadc78bf2, 0xb9a8b62d, 0xc8a91e14); 2467e1051a39Sopenharmony_ci &_data_word(0x8519f157, 0x4c0775af, 0xbbdd99ee, 0xfd607fa3); 2468e1051a39Sopenharmony_ci &_data_word(0x9f2601f7, 0xbcf5725c, 0xc53b6644, 0x347efb5b); 2469e1051a39Sopenharmony_ci &_data_word(0x7629438b, 0xdcc623cb, 0x68fcedb6, 0x63f1e4b8); 2470e1051a39Sopenharmony_ci &_data_word(0xcadc31d7, 0x10856342, 0x40229713, 0x2011c684); 2471e1051a39Sopenharmony_ci &_data_word(0x7d244a85, 0xf83dbbd2, 0x1132f9ae, 0x6da129c7); 2472e1051a39Sopenharmony_ci &_data_word(0x4b2f9e1d, 0xf330b2dc, 0xec52860d, 0xd0e3c177); 2473e1051a39Sopenharmony_ci &_data_word(0x6c16b32b, 0x99b970a9, 0xfa489411, 0x2264e947); 2474e1051a39Sopenharmony_ci &_data_word(0xc48cfca8, 0x1a3ff0a0, 0xd82c7d56, 0xef903322); 2475e1051a39Sopenharmony_ci &_data_word(0xc74e4987, 0xc1d138d9, 0xfea2ca8c, 0x360bd498); 2476e1051a39Sopenharmony_ci &_data_word(0xcf81f5a6, 0x28de7aa5, 0x268eb7da, 0xa4bfad3f); 2477e1051a39Sopenharmony_ci &_data_word(0xe49d3a2c, 0x0d927850, 0x9bcc5f6a, 0x62467e54); 2478e1051a39Sopenharmony_ci &_data_word(0xc2138df6, 0xe8b8d890, 0x5ef7392e, 0xf5afc382); 2479e1051a39Sopenharmony_ci &_data_word(0xbe805d9f, 0x7c93d069, 0xa92dd56f, 0xb31225cf); 2480e1051a39Sopenharmony_ci &_data_word(0x3b99acc8, 0xa77d1810, 0x6e639ce8, 0x7bbb3bdb); 2481e1051a39Sopenharmony_ci &_data_word(0x097826cd, 0xf418596e, 0x01b79aec, 0xa89a4f83); 2482e1051a39Sopenharmony_ci &_data_word(0x656e95e6, 0x7ee6ffaa, 0x08cfbc21, 0xe6e815ef); 2483e1051a39Sopenharmony_ci &_data_word(0xd99be7ba, 0xce366f4a, 0xd4099fea, 0xd67cb029); 2484e1051a39Sopenharmony_ci &_data_word(0xafb2a431, 0x31233f2a, 0x3094a5c6, 0xc066a235); 2485e1051a39Sopenharmony_ci &_data_word(0x37bc4e74, 0xa6ca82fc, 0xb0d090e0, 0x15d8a733); 2486e1051a39Sopenharmony_ci &_data_word(0x4a9804f1, 0xf7daec41, 0x0e50cd7f, 0x2ff69117); 2487e1051a39Sopenharmony_ci &_data_word(0x8dd64d76, 0x4db0ef43, 0x544daacc, 0xdf0496e4); 2488e1051a39Sopenharmony_ci &_data_word(0xe3b5d19e, 0x1b886a4c, 0xb81f2cc1, 0x7f516546); 2489e1051a39Sopenharmony_ci &_data_word(0x04ea5e9d, 0x5d358c01, 0x737487fa, 0x2e410bfb); 2490e1051a39Sopenharmony_ci &_data_word(0x5a1d67b3, 0x52d2db92, 0x335610e9, 0x1347d66d); 2491e1051a39Sopenharmony_ci &_data_word(0x8c61d79a, 0x7a0ca137, 0x8e14f859, 0x893c13eb); 2492e1051a39Sopenharmony_ci &_data_word(0xee27a9ce, 0x35c961b7, 0xede51ce1, 0x3cb1477a); 2493e1051a39Sopenharmony_ci &_data_word(0x59dfd29c, 0x3f73f255, 0x79ce1418, 0xbf37c773); 2494e1051a39Sopenharmony_ci &_data_word(0xeacdf753, 0x5baafd5f, 0x146f3ddf, 0x86db4478); 2495e1051a39Sopenharmony_ci &_data_word(0x81f3afca, 0x3ec468b9, 0x2c342438, 0x5f40a3c2); 2496e1051a39Sopenharmony_ci &_data_word(0x72c31d16, 0x0c25e2bc, 0x8b493c28, 0x41950dff); 2497e1051a39Sopenharmony_ci &_data_word(0x7101a839, 0xdeb30c08, 0x9ce4b4d8, 0x90c15664); 2498e1051a39Sopenharmony_ci &_data_word(0x6184cb7b, 0x70b632d5, 0x745c6c48, 0x4257b8d0); 2499e1051a39Sopenharmony_ci 2500e1051a39Sopenharmony_ci#Td4: # four copies of Td4 to choose from to avoid L1 aliasing 2501e1051a39Sopenharmony_ci &data_byte(0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38); 2502e1051a39Sopenharmony_ci &data_byte(0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb); 2503e1051a39Sopenharmony_ci &data_byte(0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87); 2504e1051a39Sopenharmony_ci &data_byte(0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb); 2505e1051a39Sopenharmony_ci &data_byte(0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d); 2506e1051a39Sopenharmony_ci &data_byte(0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e); 2507e1051a39Sopenharmony_ci &data_byte(0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2); 2508e1051a39Sopenharmony_ci &data_byte(0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25); 2509e1051a39Sopenharmony_ci &data_byte(0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16); 2510e1051a39Sopenharmony_ci &data_byte(0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92); 2511e1051a39Sopenharmony_ci &data_byte(0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda); 2512e1051a39Sopenharmony_ci &data_byte(0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84); 2513e1051a39Sopenharmony_ci &data_byte(0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a); 2514e1051a39Sopenharmony_ci &data_byte(0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06); 2515e1051a39Sopenharmony_ci &data_byte(0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02); 2516e1051a39Sopenharmony_ci &data_byte(0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b); 2517e1051a39Sopenharmony_ci &data_byte(0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea); 2518e1051a39Sopenharmony_ci &data_byte(0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73); 2519e1051a39Sopenharmony_ci &data_byte(0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85); 2520e1051a39Sopenharmony_ci &data_byte(0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e); 2521e1051a39Sopenharmony_ci &data_byte(0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89); 2522e1051a39Sopenharmony_ci &data_byte(0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b); 2523e1051a39Sopenharmony_ci &data_byte(0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20); 2524e1051a39Sopenharmony_ci &data_byte(0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4); 2525e1051a39Sopenharmony_ci &data_byte(0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31); 2526e1051a39Sopenharmony_ci &data_byte(0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f); 2527e1051a39Sopenharmony_ci &data_byte(0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d); 2528e1051a39Sopenharmony_ci &data_byte(0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef); 2529e1051a39Sopenharmony_ci &data_byte(0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0); 2530e1051a39Sopenharmony_ci &data_byte(0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61); 2531e1051a39Sopenharmony_ci &data_byte(0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26); 2532e1051a39Sopenharmony_ci &data_byte(0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d); 2533e1051a39Sopenharmony_ci$code.=<<___; 2534e1051a39Sopenharmony_ci .long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe 2535e1051a39Sopenharmony_ci .long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0 2536e1051a39Sopenharmony_ci___ 2537e1051a39Sopenharmony_ci &data_byte(0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38); 2538e1051a39Sopenharmony_ci &data_byte(0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb); 2539e1051a39Sopenharmony_ci &data_byte(0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87); 2540e1051a39Sopenharmony_ci &data_byte(0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb); 2541e1051a39Sopenharmony_ci &data_byte(0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d); 2542e1051a39Sopenharmony_ci &data_byte(0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e); 2543e1051a39Sopenharmony_ci &data_byte(0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2); 2544e1051a39Sopenharmony_ci &data_byte(0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25); 2545e1051a39Sopenharmony_ci &data_byte(0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16); 2546e1051a39Sopenharmony_ci &data_byte(0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92); 2547e1051a39Sopenharmony_ci &data_byte(0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda); 2548e1051a39Sopenharmony_ci &data_byte(0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84); 2549e1051a39Sopenharmony_ci &data_byte(0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a); 2550e1051a39Sopenharmony_ci &data_byte(0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06); 2551e1051a39Sopenharmony_ci &data_byte(0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02); 2552e1051a39Sopenharmony_ci &data_byte(0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b); 2553e1051a39Sopenharmony_ci &data_byte(0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea); 2554e1051a39Sopenharmony_ci &data_byte(0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73); 2555e1051a39Sopenharmony_ci &data_byte(0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85); 2556e1051a39Sopenharmony_ci &data_byte(0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e); 2557e1051a39Sopenharmony_ci &data_byte(0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89); 2558e1051a39Sopenharmony_ci &data_byte(0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b); 2559e1051a39Sopenharmony_ci &data_byte(0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20); 2560e1051a39Sopenharmony_ci &data_byte(0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4); 2561e1051a39Sopenharmony_ci &data_byte(0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31); 2562e1051a39Sopenharmony_ci &data_byte(0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f); 2563e1051a39Sopenharmony_ci &data_byte(0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d); 2564e1051a39Sopenharmony_ci &data_byte(0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef); 2565e1051a39Sopenharmony_ci &data_byte(0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0); 2566e1051a39Sopenharmony_ci &data_byte(0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61); 2567e1051a39Sopenharmony_ci &data_byte(0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26); 2568e1051a39Sopenharmony_ci &data_byte(0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d); 2569e1051a39Sopenharmony_ci$code.=<<___; 2570e1051a39Sopenharmony_ci .long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe 2571e1051a39Sopenharmony_ci .long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0 2572e1051a39Sopenharmony_ci___ 2573e1051a39Sopenharmony_ci &data_byte(0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38); 2574e1051a39Sopenharmony_ci &data_byte(0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb); 2575e1051a39Sopenharmony_ci &data_byte(0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87); 2576e1051a39Sopenharmony_ci &data_byte(0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb); 2577e1051a39Sopenharmony_ci &data_byte(0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d); 2578e1051a39Sopenharmony_ci &data_byte(0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e); 2579e1051a39Sopenharmony_ci &data_byte(0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2); 2580e1051a39Sopenharmony_ci &data_byte(0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25); 2581e1051a39Sopenharmony_ci &data_byte(0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16); 2582e1051a39Sopenharmony_ci &data_byte(0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92); 2583e1051a39Sopenharmony_ci &data_byte(0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda); 2584e1051a39Sopenharmony_ci &data_byte(0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84); 2585e1051a39Sopenharmony_ci &data_byte(0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a); 2586e1051a39Sopenharmony_ci &data_byte(0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06); 2587e1051a39Sopenharmony_ci &data_byte(0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02); 2588e1051a39Sopenharmony_ci &data_byte(0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b); 2589e1051a39Sopenharmony_ci &data_byte(0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea); 2590e1051a39Sopenharmony_ci &data_byte(0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73); 2591e1051a39Sopenharmony_ci &data_byte(0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85); 2592e1051a39Sopenharmony_ci &data_byte(0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e); 2593e1051a39Sopenharmony_ci &data_byte(0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89); 2594e1051a39Sopenharmony_ci &data_byte(0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b); 2595e1051a39Sopenharmony_ci &data_byte(0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20); 2596e1051a39Sopenharmony_ci &data_byte(0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4); 2597e1051a39Sopenharmony_ci &data_byte(0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31); 2598e1051a39Sopenharmony_ci &data_byte(0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f); 2599e1051a39Sopenharmony_ci &data_byte(0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d); 2600e1051a39Sopenharmony_ci &data_byte(0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef); 2601e1051a39Sopenharmony_ci &data_byte(0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0); 2602e1051a39Sopenharmony_ci &data_byte(0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61); 2603e1051a39Sopenharmony_ci &data_byte(0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26); 2604e1051a39Sopenharmony_ci &data_byte(0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d); 2605e1051a39Sopenharmony_ci$code.=<<___; 2606e1051a39Sopenharmony_ci .long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe 2607e1051a39Sopenharmony_ci .long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0 2608e1051a39Sopenharmony_ci___ 2609e1051a39Sopenharmony_ci &data_byte(0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38); 2610e1051a39Sopenharmony_ci &data_byte(0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb); 2611e1051a39Sopenharmony_ci &data_byte(0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87); 2612e1051a39Sopenharmony_ci &data_byte(0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb); 2613e1051a39Sopenharmony_ci &data_byte(0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d); 2614e1051a39Sopenharmony_ci &data_byte(0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e); 2615e1051a39Sopenharmony_ci &data_byte(0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2); 2616e1051a39Sopenharmony_ci &data_byte(0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25); 2617e1051a39Sopenharmony_ci &data_byte(0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16); 2618e1051a39Sopenharmony_ci &data_byte(0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92); 2619e1051a39Sopenharmony_ci &data_byte(0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda); 2620e1051a39Sopenharmony_ci &data_byte(0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84); 2621e1051a39Sopenharmony_ci &data_byte(0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a); 2622e1051a39Sopenharmony_ci &data_byte(0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06); 2623e1051a39Sopenharmony_ci &data_byte(0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02); 2624e1051a39Sopenharmony_ci &data_byte(0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b); 2625e1051a39Sopenharmony_ci &data_byte(0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea); 2626e1051a39Sopenharmony_ci &data_byte(0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73); 2627e1051a39Sopenharmony_ci &data_byte(0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85); 2628e1051a39Sopenharmony_ci &data_byte(0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e); 2629e1051a39Sopenharmony_ci &data_byte(0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89); 2630e1051a39Sopenharmony_ci &data_byte(0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b); 2631e1051a39Sopenharmony_ci &data_byte(0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20); 2632e1051a39Sopenharmony_ci &data_byte(0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4); 2633e1051a39Sopenharmony_ci &data_byte(0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31); 2634e1051a39Sopenharmony_ci &data_byte(0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f); 2635e1051a39Sopenharmony_ci &data_byte(0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d); 2636e1051a39Sopenharmony_ci &data_byte(0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef); 2637e1051a39Sopenharmony_ci &data_byte(0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0); 2638e1051a39Sopenharmony_ci &data_byte(0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61); 2639e1051a39Sopenharmony_ci &data_byte(0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26); 2640e1051a39Sopenharmony_ci &data_byte(0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d); 2641e1051a39Sopenharmony_ci$code.=<<___; 2642e1051a39Sopenharmony_ci .long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe 2643e1051a39Sopenharmony_ci .long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0 2644e1051a39Sopenharmony_ci.asciz "AES for x86_64, CRYPTOGAMS by <appro\@openssl.org>" 2645e1051a39Sopenharmony_ci.align 64 2646e1051a39Sopenharmony_ci___ 2647e1051a39Sopenharmony_ci 2648e1051a39Sopenharmony_ci# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, 2649e1051a39Sopenharmony_ci# CONTEXT *context,DISPATCHER_CONTEXT *disp) 2650e1051a39Sopenharmony_ciif ($win64) { 2651e1051a39Sopenharmony_ci$rec="%rcx"; 2652e1051a39Sopenharmony_ci$frame="%rdx"; 2653e1051a39Sopenharmony_ci$context="%r8"; 2654e1051a39Sopenharmony_ci$disp="%r9"; 2655e1051a39Sopenharmony_ci 2656e1051a39Sopenharmony_ci$code.=<<___; 2657e1051a39Sopenharmony_ci.extern __imp_RtlVirtualUnwind 2658e1051a39Sopenharmony_ci.type block_se_handler,\@abi-omnipotent 2659e1051a39Sopenharmony_ci.align 16 2660e1051a39Sopenharmony_ciblock_se_handler: 2661e1051a39Sopenharmony_ci push %rsi 2662e1051a39Sopenharmony_ci push %rdi 2663e1051a39Sopenharmony_ci push %rbx 2664e1051a39Sopenharmony_ci push %rbp 2665e1051a39Sopenharmony_ci push %r12 2666e1051a39Sopenharmony_ci push %r13 2667e1051a39Sopenharmony_ci push %r14 2668e1051a39Sopenharmony_ci push %r15 2669e1051a39Sopenharmony_ci pushfq 2670e1051a39Sopenharmony_ci sub \$64,%rsp 2671e1051a39Sopenharmony_ci 2672e1051a39Sopenharmony_ci mov 120($context),%rax # pull context->Rax 2673e1051a39Sopenharmony_ci mov 248($context),%rbx # pull context->Rip 2674e1051a39Sopenharmony_ci 2675e1051a39Sopenharmony_ci mov 8($disp),%rsi # disp->ImageBase 2676e1051a39Sopenharmony_ci mov 56($disp),%r11 # disp->HandlerData 2677e1051a39Sopenharmony_ci 2678e1051a39Sopenharmony_ci mov 0(%r11),%r10d # HandlerData[0] 2679e1051a39Sopenharmony_ci lea (%rsi,%r10),%r10 # prologue label 2680e1051a39Sopenharmony_ci cmp %r10,%rbx # context->Rip<prologue label 2681e1051a39Sopenharmony_ci jb .Lin_block_prologue 2682e1051a39Sopenharmony_ci 2683e1051a39Sopenharmony_ci mov 152($context),%rax # pull context->Rsp 2684e1051a39Sopenharmony_ci 2685e1051a39Sopenharmony_ci mov 4(%r11),%r10d # HandlerData[1] 2686e1051a39Sopenharmony_ci lea (%rsi,%r10),%r10 # epilogue label 2687e1051a39Sopenharmony_ci cmp %r10,%rbx # context->Rip>=epilogue label 2688e1051a39Sopenharmony_ci jae .Lin_block_prologue 2689e1051a39Sopenharmony_ci 2690e1051a39Sopenharmony_ci mov 24(%rax),%rax # pull saved real stack pointer 2691e1051a39Sopenharmony_ci 2692e1051a39Sopenharmony_ci mov -8(%rax),%rbx 2693e1051a39Sopenharmony_ci mov -16(%rax),%rbp 2694e1051a39Sopenharmony_ci mov -24(%rax),%r12 2695e1051a39Sopenharmony_ci mov -32(%rax),%r13 2696e1051a39Sopenharmony_ci mov -40(%rax),%r14 2697e1051a39Sopenharmony_ci mov -48(%rax),%r15 2698e1051a39Sopenharmony_ci mov %rbx,144($context) # restore context->Rbx 2699e1051a39Sopenharmony_ci mov %rbp,160($context) # restore context->Rbp 2700e1051a39Sopenharmony_ci mov %r12,216($context) # restore context->R12 2701e1051a39Sopenharmony_ci mov %r13,224($context) # restore context->R13 2702e1051a39Sopenharmony_ci mov %r14,232($context) # restore context->R14 2703e1051a39Sopenharmony_ci mov %r15,240($context) # restore context->R15 2704e1051a39Sopenharmony_ci 2705e1051a39Sopenharmony_ci.Lin_block_prologue: 2706e1051a39Sopenharmony_ci mov 8(%rax),%rdi 2707e1051a39Sopenharmony_ci mov 16(%rax),%rsi 2708e1051a39Sopenharmony_ci mov %rax,152($context) # restore context->Rsp 2709e1051a39Sopenharmony_ci mov %rsi,168($context) # restore context->Rsi 2710e1051a39Sopenharmony_ci mov %rdi,176($context) # restore context->Rdi 2711e1051a39Sopenharmony_ci 2712e1051a39Sopenharmony_ci jmp .Lcommon_seh_exit 2713e1051a39Sopenharmony_ci.size block_se_handler,.-block_se_handler 2714e1051a39Sopenharmony_ci 2715e1051a39Sopenharmony_ci.type key_se_handler,\@abi-omnipotent 2716e1051a39Sopenharmony_ci.align 16 2717e1051a39Sopenharmony_cikey_se_handler: 2718e1051a39Sopenharmony_ci push %rsi 2719e1051a39Sopenharmony_ci push %rdi 2720e1051a39Sopenharmony_ci push %rbx 2721e1051a39Sopenharmony_ci push %rbp 2722e1051a39Sopenharmony_ci push %r12 2723e1051a39Sopenharmony_ci push %r13 2724e1051a39Sopenharmony_ci push %r14 2725e1051a39Sopenharmony_ci push %r15 2726e1051a39Sopenharmony_ci pushfq 2727e1051a39Sopenharmony_ci sub \$64,%rsp 2728e1051a39Sopenharmony_ci 2729e1051a39Sopenharmony_ci mov 120($context),%rax # pull context->Rax 2730e1051a39Sopenharmony_ci mov 248($context),%rbx # pull context->Rip 2731e1051a39Sopenharmony_ci 2732e1051a39Sopenharmony_ci mov 8($disp),%rsi # disp->ImageBase 2733e1051a39Sopenharmony_ci mov 56($disp),%r11 # disp->HandlerData 2734e1051a39Sopenharmony_ci 2735e1051a39Sopenharmony_ci mov 0(%r11),%r10d # HandlerData[0] 2736e1051a39Sopenharmony_ci lea (%rsi,%r10),%r10 # prologue label 2737e1051a39Sopenharmony_ci cmp %r10,%rbx # context->Rip<prologue label 2738e1051a39Sopenharmony_ci jb .Lin_key_prologue 2739e1051a39Sopenharmony_ci 2740e1051a39Sopenharmony_ci mov 152($context),%rax # pull context->Rsp 2741e1051a39Sopenharmony_ci 2742e1051a39Sopenharmony_ci mov 4(%r11),%r10d # HandlerData[1] 2743e1051a39Sopenharmony_ci lea (%rsi,%r10),%r10 # epilogue label 2744e1051a39Sopenharmony_ci cmp %r10,%rbx # context->Rip>=epilogue label 2745e1051a39Sopenharmony_ci jae .Lin_key_prologue 2746e1051a39Sopenharmony_ci 2747e1051a39Sopenharmony_ci lea 56(%rax),%rax 2748e1051a39Sopenharmony_ci 2749e1051a39Sopenharmony_ci mov -8(%rax),%rbx 2750e1051a39Sopenharmony_ci mov -16(%rax),%rbp 2751e1051a39Sopenharmony_ci mov -24(%rax),%r12 2752e1051a39Sopenharmony_ci mov -32(%rax),%r13 2753e1051a39Sopenharmony_ci mov -40(%rax),%r14 2754e1051a39Sopenharmony_ci mov -48(%rax),%r15 2755e1051a39Sopenharmony_ci mov %rbx,144($context) # restore context->Rbx 2756e1051a39Sopenharmony_ci mov %rbp,160($context) # restore context->Rbp 2757e1051a39Sopenharmony_ci mov %r12,216($context) # restore context->R12 2758e1051a39Sopenharmony_ci mov %r13,224($context) # restore context->R13 2759e1051a39Sopenharmony_ci mov %r14,232($context) # restore context->R14 2760e1051a39Sopenharmony_ci mov %r15,240($context) # restore context->R15 2761e1051a39Sopenharmony_ci 2762e1051a39Sopenharmony_ci.Lin_key_prologue: 2763e1051a39Sopenharmony_ci mov 8(%rax),%rdi 2764e1051a39Sopenharmony_ci mov 16(%rax),%rsi 2765e1051a39Sopenharmony_ci mov %rax,152($context) # restore context->Rsp 2766e1051a39Sopenharmony_ci mov %rsi,168($context) # restore context->Rsi 2767e1051a39Sopenharmony_ci mov %rdi,176($context) # restore context->Rdi 2768e1051a39Sopenharmony_ci 2769e1051a39Sopenharmony_ci jmp .Lcommon_seh_exit 2770e1051a39Sopenharmony_ci.size key_se_handler,.-key_se_handler 2771e1051a39Sopenharmony_ci 2772e1051a39Sopenharmony_ci.type cbc_se_handler,\@abi-omnipotent 2773e1051a39Sopenharmony_ci.align 16 2774e1051a39Sopenharmony_cicbc_se_handler: 2775e1051a39Sopenharmony_ci push %rsi 2776e1051a39Sopenharmony_ci push %rdi 2777e1051a39Sopenharmony_ci push %rbx 2778e1051a39Sopenharmony_ci push %rbp 2779e1051a39Sopenharmony_ci push %r12 2780e1051a39Sopenharmony_ci push %r13 2781e1051a39Sopenharmony_ci push %r14 2782e1051a39Sopenharmony_ci push %r15 2783e1051a39Sopenharmony_ci pushfq 2784e1051a39Sopenharmony_ci sub \$64,%rsp 2785e1051a39Sopenharmony_ci 2786e1051a39Sopenharmony_ci mov 120($context),%rax # pull context->Rax 2787e1051a39Sopenharmony_ci mov 248($context),%rbx # pull context->Rip 2788e1051a39Sopenharmony_ci 2789e1051a39Sopenharmony_ci lea .Lcbc_prologue(%rip),%r10 2790e1051a39Sopenharmony_ci cmp %r10,%rbx # context->Rip<.Lcbc_prologue 2791e1051a39Sopenharmony_ci jb .Lin_cbc_prologue 2792e1051a39Sopenharmony_ci 2793e1051a39Sopenharmony_ci lea .Lcbc_fast_body(%rip),%r10 2794e1051a39Sopenharmony_ci cmp %r10,%rbx # context->Rip<.Lcbc_fast_body 2795e1051a39Sopenharmony_ci jb .Lin_cbc_frame_setup 2796e1051a39Sopenharmony_ci 2797e1051a39Sopenharmony_ci lea .Lcbc_slow_prologue(%rip),%r10 2798e1051a39Sopenharmony_ci cmp %r10,%rbx # context->Rip<.Lcbc_slow_prologue 2799e1051a39Sopenharmony_ci jb .Lin_cbc_body 2800e1051a39Sopenharmony_ci 2801e1051a39Sopenharmony_ci lea .Lcbc_slow_body(%rip),%r10 2802e1051a39Sopenharmony_ci cmp %r10,%rbx # context->Rip<.Lcbc_slow_body 2803e1051a39Sopenharmony_ci jb .Lin_cbc_frame_setup 2804e1051a39Sopenharmony_ci 2805e1051a39Sopenharmony_ci.Lin_cbc_body: 2806e1051a39Sopenharmony_ci mov 152($context),%rax # pull context->Rsp 2807e1051a39Sopenharmony_ci 2808e1051a39Sopenharmony_ci lea .Lcbc_epilogue(%rip),%r10 2809e1051a39Sopenharmony_ci cmp %r10,%rbx # context->Rip>=.Lcbc_epilogue 2810e1051a39Sopenharmony_ci jae .Lin_cbc_prologue 2811e1051a39Sopenharmony_ci 2812e1051a39Sopenharmony_ci lea 8(%rax),%rax 2813e1051a39Sopenharmony_ci 2814e1051a39Sopenharmony_ci lea .Lcbc_popfq(%rip),%r10 2815e1051a39Sopenharmony_ci cmp %r10,%rbx # context->Rip>=.Lcbc_popfq 2816e1051a39Sopenharmony_ci jae .Lin_cbc_prologue 2817e1051a39Sopenharmony_ci 2818e1051a39Sopenharmony_ci mov `16-8`(%rax),%rax # biased $_rsp 2819e1051a39Sopenharmony_ci lea 56(%rax),%rax 2820e1051a39Sopenharmony_ci 2821e1051a39Sopenharmony_ci.Lin_cbc_frame_setup: 2822e1051a39Sopenharmony_ci mov -16(%rax),%rbx 2823e1051a39Sopenharmony_ci mov -24(%rax),%rbp 2824e1051a39Sopenharmony_ci mov -32(%rax),%r12 2825e1051a39Sopenharmony_ci mov -40(%rax),%r13 2826e1051a39Sopenharmony_ci mov -48(%rax),%r14 2827e1051a39Sopenharmony_ci mov -56(%rax),%r15 2828e1051a39Sopenharmony_ci mov %rbx,144($context) # restore context->Rbx 2829e1051a39Sopenharmony_ci mov %rbp,160($context) # restore context->Rbp 2830e1051a39Sopenharmony_ci mov %r12,216($context) # restore context->R12 2831e1051a39Sopenharmony_ci mov %r13,224($context) # restore context->R13 2832e1051a39Sopenharmony_ci mov %r14,232($context) # restore context->R14 2833e1051a39Sopenharmony_ci mov %r15,240($context) # restore context->R15 2834e1051a39Sopenharmony_ci 2835e1051a39Sopenharmony_ci.Lin_cbc_prologue: 2836e1051a39Sopenharmony_ci mov 8(%rax),%rdi 2837e1051a39Sopenharmony_ci mov 16(%rax),%rsi 2838e1051a39Sopenharmony_ci mov %rax,152($context) # restore context->Rsp 2839e1051a39Sopenharmony_ci mov %rsi,168($context) # restore context->Rsi 2840e1051a39Sopenharmony_ci mov %rdi,176($context) # restore context->Rdi 2841e1051a39Sopenharmony_ci 2842e1051a39Sopenharmony_ci.Lcommon_seh_exit: 2843e1051a39Sopenharmony_ci 2844e1051a39Sopenharmony_ci mov 40($disp),%rdi # disp->ContextRecord 2845e1051a39Sopenharmony_ci mov $context,%rsi # context 2846e1051a39Sopenharmony_ci mov \$`1232/8`,%ecx # sizeof(CONTEXT) 2847e1051a39Sopenharmony_ci .long 0xa548f3fc # cld; rep movsq 2848e1051a39Sopenharmony_ci 2849e1051a39Sopenharmony_ci mov $disp,%rsi 2850e1051a39Sopenharmony_ci xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER 2851e1051a39Sopenharmony_ci mov 8(%rsi),%rdx # arg2, disp->ImageBase 2852e1051a39Sopenharmony_ci mov 0(%rsi),%r8 # arg3, disp->ControlPc 2853e1051a39Sopenharmony_ci mov 16(%rsi),%r9 # arg4, disp->FunctionEntry 2854e1051a39Sopenharmony_ci mov 40(%rsi),%r10 # disp->ContextRecord 2855e1051a39Sopenharmony_ci lea 56(%rsi),%r11 # &disp->HandlerData 2856e1051a39Sopenharmony_ci lea 24(%rsi),%r12 # &disp->EstablisherFrame 2857e1051a39Sopenharmony_ci mov %r10,32(%rsp) # arg5 2858e1051a39Sopenharmony_ci mov %r11,40(%rsp) # arg6 2859e1051a39Sopenharmony_ci mov %r12,48(%rsp) # arg7 2860e1051a39Sopenharmony_ci mov %rcx,56(%rsp) # arg8, (NULL) 2861e1051a39Sopenharmony_ci call *__imp_RtlVirtualUnwind(%rip) 2862e1051a39Sopenharmony_ci 2863e1051a39Sopenharmony_ci mov \$1,%eax # ExceptionContinueSearch 2864e1051a39Sopenharmony_ci add \$64,%rsp 2865e1051a39Sopenharmony_ci popfq 2866e1051a39Sopenharmony_ci pop %r15 2867e1051a39Sopenharmony_ci pop %r14 2868e1051a39Sopenharmony_ci pop %r13 2869e1051a39Sopenharmony_ci pop %r12 2870e1051a39Sopenharmony_ci pop %rbp 2871e1051a39Sopenharmony_ci pop %rbx 2872e1051a39Sopenharmony_ci pop %rdi 2873e1051a39Sopenharmony_ci pop %rsi 2874e1051a39Sopenharmony_ci ret 2875e1051a39Sopenharmony_ci.size cbc_se_handler,.-cbc_se_handler 2876e1051a39Sopenharmony_ci 2877e1051a39Sopenharmony_ci.section .pdata 2878e1051a39Sopenharmony_ci.align 4 2879e1051a39Sopenharmony_ci .rva .LSEH_begin_AES_encrypt 2880e1051a39Sopenharmony_ci .rva .LSEH_end_AES_encrypt 2881e1051a39Sopenharmony_ci .rva .LSEH_info_AES_encrypt 2882e1051a39Sopenharmony_ci 2883e1051a39Sopenharmony_ci .rva .LSEH_begin_AES_decrypt 2884e1051a39Sopenharmony_ci .rva .LSEH_end_AES_decrypt 2885e1051a39Sopenharmony_ci .rva .LSEH_info_AES_decrypt 2886e1051a39Sopenharmony_ci 2887e1051a39Sopenharmony_ci .rva .LSEH_begin_AES_set_encrypt_key 2888e1051a39Sopenharmony_ci .rva .LSEH_end_AES_set_encrypt_key 2889e1051a39Sopenharmony_ci .rva .LSEH_info_AES_set_encrypt_key 2890e1051a39Sopenharmony_ci 2891e1051a39Sopenharmony_ci .rva .LSEH_begin_AES_set_decrypt_key 2892e1051a39Sopenharmony_ci .rva .LSEH_end_AES_set_decrypt_key 2893e1051a39Sopenharmony_ci .rva .LSEH_info_AES_set_decrypt_key 2894e1051a39Sopenharmony_ci 2895e1051a39Sopenharmony_ci .rva .LSEH_begin_AES_cbc_encrypt 2896e1051a39Sopenharmony_ci .rva .LSEH_end_AES_cbc_encrypt 2897e1051a39Sopenharmony_ci .rva .LSEH_info_AES_cbc_encrypt 2898e1051a39Sopenharmony_ci 2899e1051a39Sopenharmony_ci.section .xdata 2900e1051a39Sopenharmony_ci.align 8 2901e1051a39Sopenharmony_ci.LSEH_info_AES_encrypt: 2902e1051a39Sopenharmony_ci .byte 9,0,0,0 2903e1051a39Sopenharmony_ci .rva block_se_handler 2904e1051a39Sopenharmony_ci .rva .Lenc_prologue,.Lenc_epilogue # HandlerData[] 2905e1051a39Sopenharmony_ci.LSEH_info_AES_decrypt: 2906e1051a39Sopenharmony_ci .byte 9,0,0,0 2907e1051a39Sopenharmony_ci .rva block_se_handler 2908e1051a39Sopenharmony_ci .rva .Ldec_prologue,.Ldec_epilogue # HandlerData[] 2909e1051a39Sopenharmony_ci.LSEH_info_AES_set_encrypt_key: 2910e1051a39Sopenharmony_ci .byte 9,0,0,0 2911e1051a39Sopenharmony_ci .rva key_se_handler 2912e1051a39Sopenharmony_ci .rva .Lenc_key_prologue,.Lenc_key_epilogue # HandlerData[] 2913e1051a39Sopenharmony_ci.LSEH_info_AES_set_decrypt_key: 2914e1051a39Sopenharmony_ci .byte 9,0,0,0 2915e1051a39Sopenharmony_ci .rva key_se_handler 2916e1051a39Sopenharmony_ci .rva .Ldec_key_prologue,.Ldec_key_epilogue # HandlerData[] 2917e1051a39Sopenharmony_ci.LSEH_info_AES_cbc_encrypt: 2918e1051a39Sopenharmony_ci .byte 9,0,0,0 2919e1051a39Sopenharmony_ci .rva cbc_se_handler 2920e1051a39Sopenharmony_ci___ 2921e1051a39Sopenharmony_ci} 2922e1051a39Sopenharmony_ci 2923e1051a39Sopenharmony_ci$code =~ s/\`([^\`]*)\`/eval($1)/gem; 2924e1051a39Sopenharmony_ci 2925e1051a39Sopenharmony_ciprint $code; 2926e1051a39Sopenharmony_ci 2927e1051a39Sopenharmony_ciclose STDOUT or die "error closing STDOUT: $!"; 2928