1e1051a39Sopenharmony_ci#! /usr/bin/env perl 2e1051a39Sopenharmony_ci# Copyright 2014-2023 The OpenSSL Project Authors. All Rights Reserved. 3e1051a39Sopenharmony_ci# 4e1051a39Sopenharmony_ci# Licensed under the Apache License 2.0 (the "License"). You may not use 5e1051a39Sopenharmony_ci# this file except in compliance with the License. You can obtain a copy 6e1051a39Sopenharmony_ci# in the file LICENSE in the source distribution or at 7e1051a39Sopenharmony_ci# https://www.openssl.org/source/license.html 8e1051a39Sopenharmony_ci 9e1051a39Sopenharmony_ci# 10e1051a39Sopenharmony_ci# ==================================================================== 11e1051a39Sopenharmony_ci# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 12e1051a39Sopenharmony_ci# project. The module is, however, dual licensed under OpenSSL and 13e1051a39Sopenharmony_ci# CRYPTOGAMS licenses depending on where you obtain it. For further 14e1051a39Sopenharmony_ci# details see http://www.openssl.org/~appro/cryptogams/. 15e1051a39Sopenharmony_ci# ==================================================================== 16e1051a39Sopenharmony_ci# 17e1051a39Sopenharmony_ci# This module implements support for ARMv8 AES instructions. The 18e1051a39Sopenharmony_ci# module is endian-agnostic in sense that it supports both big- and 19e1051a39Sopenharmony_ci# little-endian cases. As does it support both 32- and 64-bit modes 20e1051a39Sopenharmony_ci# of operation. Latter is achieved by limiting amount of utilized 21e1051a39Sopenharmony_ci# registers to 16, which implies additional NEON load and integer 22e1051a39Sopenharmony_ci# instructions. This has no effect on mighty Apple A7, where results 23e1051a39Sopenharmony_ci# are literally equal to the theoretical estimates based on AES 24e1051a39Sopenharmony_ci# instruction latencies and issue rates. On Cortex-A53, an in-order 25e1051a39Sopenharmony_ci# execution core, this costs up to 10-15%, which is partially 26e1051a39Sopenharmony_ci# compensated by implementing dedicated code path for 128-bit 27e1051a39Sopenharmony_ci# CBC encrypt case. On Cortex-A57 parallelizable mode performance 28e1051a39Sopenharmony_ci# seems to be limited by sheer amount of NEON instructions... 29e1051a39Sopenharmony_ci# 30e1051a39Sopenharmony_ci# April 2019 31e1051a39Sopenharmony_ci# 32e1051a39Sopenharmony_ci# Key to performance of parallelize-able modes is round instruction 33e1051a39Sopenharmony_ci# interleaving. But which factor to use? There is optimal one for 34e1051a39Sopenharmony_ci# each combination of instruction latency and issue rate, beyond 35e1051a39Sopenharmony_ci# which increasing interleave factor doesn't pay off. While on cons 36e1051a39Sopenharmony_ci# side we have code size increase and resource waste on platforms for 37e1051a39Sopenharmony_ci# which interleave factor is too high. In other words you want it to 38e1051a39Sopenharmony_ci# be just right. So far interleave factor of 3x was serving well all 39e1051a39Sopenharmony_ci# platforms. But for ThunderX2 optimal interleave factor was measured 40e1051a39Sopenharmony_ci# to be 5x... 41e1051a39Sopenharmony_ci# 42e1051a39Sopenharmony_ci# Performance in cycles per byte processed with 128-bit key: 43e1051a39Sopenharmony_ci# 44e1051a39Sopenharmony_ci# CBC enc CBC dec CTR 45e1051a39Sopenharmony_ci# Apple A7 2.39 1.20 1.20 46e1051a39Sopenharmony_ci# Cortex-A53 1.32 1.17/1.29(**) 1.36/1.46 47e1051a39Sopenharmony_ci# Cortex-A57(*) 1.95 0.82/0.85 0.89/0.93 48e1051a39Sopenharmony_ci# Cortex-A72 1.33 0.85/0.88 0.92/0.96 49e1051a39Sopenharmony_ci# Denver 1.96 0.65/0.86 0.76/0.80 50e1051a39Sopenharmony_ci# Mongoose 1.33 1.23/1.20 1.30/1.20 51e1051a39Sopenharmony_ci# Kryo 1.26 0.87/0.94 1.00/1.00 52e1051a39Sopenharmony_ci# ThunderX2 5.95 1.25 1.30 53e1051a39Sopenharmony_ci# 54e1051a39Sopenharmony_ci# (*) original 3.64/1.34/1.32 results were for r0p0 revision 55e1051a39Sopenharmony_ci# and are still same even for updated module; 56e1051a39Sopenharmony_ci# (**) numbers after slash are for 32-bit code, which is 3x- 57e1051a39Sopenharmony_ci# interleaved; 58e1051a39Sopenharmony_ci 59e1051a39Sopenharmony_ci# $output is the last argument if it looks like a file (it has an extension) 60e1051a39Sopenharmony_ci# $flavour is the first argument if it doesn't look like a file 61e1051a39Sopenharmony_ci$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; 62e1051a39Sopenharmony_ci$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; 63e1051a39Sopenharmony_ci 64e1051a39Sopenharmony_ci$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 65e1051a39Sopenharmony_ci( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or 66e1051a39Sopenharmony_ci( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or 67e1051a39Sopenharmony_cidie "can't locate arm-xlate.pl"; 68e1051a39Sopenharmony_ci 69e1051a39Sopenharmony_ciopen OUT,"| \"$^X\" $xlate $flavour \"$output\"" 70e1051a39Sopenharmony_ci or die "can't call $xlate: $!"; 71e1051a39Sopenharmony_ci*STDOUT=*OUT; 72e1051a39Sopenharmony_ci 73e1051a39Sopenharmony_ci$prefix="aes_v8"; 74e1051a39Sopenharmony_ci 75e1051a39Sopenharmony_ci$_byte = ($flavour =~ /win/ ? "DCB" : ".byte"); 76e1051a39Sopenharmony_ci 77e1051a39Sopenharmony_ci$code=<<___; 78e1051a39Sopenharmony_ci#include "arm_arch.h" 79e1051a39Sopenharmony_ci 80e1051a39Sopenharmony_ci#if __ARM_MAX_ARCH__>=7 81e1051a39Sopenharmony_ci___ 82e1051a39Sopenharmony_ci$code.=".arch armv8-a+crypto\n.text\n" if ($flavour =~ /64/); 83e1051a39Sopenharmony_ci$code.=<<___ if ($flavour !~ /64/); 84e1051a39Sopenharmony_ci.arch armv7-a // don't confuse not-so-latest binutils with argv8 :-) 85e1051a39Sopenharmony_ci.fpu neon 86e1051a39Sopenharmony_ci#ifdef __thumb2__ 87e1051a39Sopenharmony_ci.syntax unified 88e1051a39Sopenharmony_ci.thumb 89e1051a39Sopenharmony_ci# define INST(a,b,c,d) $_byte c,d|0xc,a,b 90e1051a39Sopenharmony_ci#else 91e1051a39Sopenharmony_ci.code 32 92e1051a39Sopenharmony_ci# define INST(a,b,c,d) $_byte a,b,c,d 93e1051a39Sopenharmony_ci#endif 94e1051a39Sopenharmony_ci 95e1051a39Sopenharmony_ci.text 96e1051a39Sopenharmony_ci___ 97e1051a39Sopenharmony_ci 98e1051a39Sopenharmony_ci# Assembler mnemonics are an eclectic mix of 32- and 64-bit syntax, 99e1051a39Sopenharmony_ci# NEON is mostly 32-bit mnemonics, integer - mostly 64. Goal is to 100e1051a39Sopenharmony_ci# maintain both 32- and 64-bit codes within single module and 101e1051a39Sopenharmony_ci# transliterate common code to either flavour with regex vodoo. 102e1051a39Sopenharmony_ci# 103e1051a39Sopenharmony_ci{{{ 104e1051a39Sopenharmony_cimy ($inp,$bits,$out,$ptr,$rounds)=("x0","w1","x2","x3","w12"); 105e1051a39Sopenharmony_cimy ($zero,$rcon,$mask,$in0,$in1,$tmp,$key)= 106e1051a39Sopenharmony_ci $flavour=~/64/? map("q$_",(0..6)) : map("q$_",(0..3,8..10)); 107e1051a39Sopenharmony_ci 108e1051a39Sopenharmony_ci 109e1051a39Sopenharmony_ci# 110e1051a39Sopenharmony_ci# This file generates .s file for 64-bit and 32-bit CPUs. 111e1051a39Sopenharmony_ci# We don't implement .rodata on 32-bit CPUs yet. 112e1051a39Sopenharmony_ci# 113e1051a39Sopenharmony_ci$code.=".rodata\n" if ($flavour =~ /64/); 114e1051a39Sopenharmony_ci$code.=<<___; 115e1051a39Sopenharmony_ci.align 5 116e1051a39Sopenharmony_ci.Lrcon: 117e1051a39Sopenharmony_ci.long 0x01,0x01,0x01,0x01 118e1051a39Sopenharmony_ci.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat 119e1051a39Sopenharmony_ci.long 0x1b,0x1b,0x1b,0x1b 120e1051a39Sopenharmony_ci___ 121e1051a39Sopenharmony_ci$code.=".previous\n" if ($flavour =~ /64/); 122e1051a39Sopenharmony_ci 123e1051a39Sopenharmony_ci$code.=<<___; 124e1051a39Sopenharmony_ci.globl ${prefix}_set_encrypt_key 125e1051a39Sopenharmony_ci.type ${prefix}_set_encrypt_key,%function 126e1051a39Sopenharmony_ci.align 5 127e1051a39Sopenharmony_ci${prefix}_set_encrypt_key: 128e1051a39Sopenharmony_ci.Lenc_key: 129e1051a39Sopenharmony_ci___ 130e1051a39Sopenharmony_ci$code.=<<___ if ($flavour =~ /64/); 131e1051a39Sopenharmony_ci stp x29,x30,[sp,#-16]! 132e1051a39Sopenharmony_ci add x29,sp,#0 133e1051a39Sopenharmony_ci___ 134e1051a39Sopenharmony_ci$code.=<<___; 135e1051a39Sopenharmony_ci mov $ptr,#-1 136e1051a39Sopenharmony_ci cmp $inp,#0 137e1051a39Sopenharmony_ci b.eq .Lenc_key_abort 138e1051a39Sopenharmony_ci cmp $out,#0 139e1051a39Sopenharmony_ci b.eq .Lenc_key_abort 140e1051a39Sopenharmony_ci mov $ptr,#-2 141e1051a39Sopenharmony_ci cmp $bits,#128 142e1051a39Sopenharmony_ci b.lt .Lenc_key_abort 143e1051a39Sopenharmony_ci cmp $bits,#256 144e1051a39Sopenharmony_ci b.gt .Lenc_key_abort 145e1051a39Sopenharmony_ci tst $bits,#0x3f 146e1051a39Sopenharmony_ci b.ne .Lenc_key_abort 147e1051a39Sopenharmony_ci 148e1051a39Sopenharmony_ci___ 149e1051a39Sopenharmony_ci$code.=<<___ if ($flavour =~ /64/); 150e1051a39Sopenharmony_ci adrp $ptr,.Lrcon 151e1051a39Sopenharmony_ci add $ptr,$ptr,:lo12:.Lrcon 152e1051a39Sopenharmony_ci___ 153e1051a39Sopenharmony_ci$code.=<<___ if ($flavour =~ /32/); 154e1051a39Sopenharmony_ci adr $ptr,.Lrcon 155e1051a39Sopenharmony_ci___ 156e1051a39Sopenharmony_ci$code.=<<___; 157e1051a39Sopenharmony_ci cmp $bits,#192 158e1051a39Sopenharmony_ci 159e1051a39Sopenharmony_ci veor $zero,$zero,$zero 160e1051a39Sopenharmony_ci vld1.8 {$in0},[$inp],#16 161e1051a39Sopenharmony_ci mov $bits,#8 // reuse $bits 162e1051a39Sopenharmony_ci vld1.32 {$rcon,$mask},[$ptr],#32 163e1051a39Sopenharmony_ci 164e1051a39Sopenharmony_ci b.lt .Loop128 165e1051a39Sopenharmony_ci b.eq .L192 166e1051a39Sopenharmony_ci b .L256 167e1051a39Sopenharmony_ci 168e1051a39Sopenharmony_ci.align 4 169e1051a39Sopenharmony_ci.Loop128: 170e1051a39Sopenharmony_ci vtbl.8 $key,{$in0},$mask 171e1051a39Sopenharmony_ci vext.8 $tmp,$zero,$in0,#12 172e1051a39Sopenharmony_ci vst1.32 {$in0},[$out],#16 173e1051a39Sopenharmony_ci aese $key,$zero 174e1051a39Sopenharmony_ci subs $bits,$bits,#1 175e1051a39Sopenharmony_ci 176e1051a39Sopenharmony_ci veor $in0,$in0,$tmp 177e1051a39Sopenharmony_ci vext.8 $tmp,$zero,$tmp,#12 178e1051a39Sopenharmony_ci veor $in0,$in0,$tmp 179e1051a39Sopenharmony_ci vext.8 $tmp,$zero,$tmp,#12 180e1051a39Sopenharmony_ci veor $key,$key,$rcon 181e1051a39Sopenharmony_ci veor $in0,$in0,$tmp 182e1051a39Sopenharmony_ci vshl.u8 $rcon,$rcon,#1 183e1051a39Sopenharmony_ci veor $in0,$in0,$key 184e1051a39Sopenharmony_ci b.ne .Loop128 185e1051a39Sopenharmony_ci 186e1051a39Sopenharmony_ci vld1.32 {$rcon},[$ptr] 187e1051a39Sopenharmony_ci 188e1051a39Sopenharmony_ci vtbl.8 $key,{$in0},$mask 189e1051a39Sopenharmony_ci vext.8 $tmp,$zero,$in0,#12 190e1051a39Sopenharmony_ci vst1.32 {$in0},[$out],#16 191e1051a39Sopenharmony_ci aese $key,$zero 192e1051a39Sopenharmony_ci 193e1051a39Sopenharmony_ci veor $in0,$in0,$tmp 194e1051a39Sopenharmony_ci vext.8 $tmp,$zero,$tmp,#12 195e1051a39Sopenharmony_ci veor $in0,$in0,$tmp 196e1051a39Sopenharmony_ci vext.8 $tmp,$zero,$tmp,#12 197e1051a39Sopenharmony_ci veor $key,$key,$rcon 198e1051a39Sopenharmony_ci veor $in0,$in0,$tmp 199e1051a39Sopenharmony_ci vshl.u8 $rcon,$rcon,#1 200e1051a39Sopenharmony_ci veor $in0,$in0,$key 201e1051a39Sopenharmony_ci 202e1051a39Sopenharmony_ci vtbl.8 $key,{$in0},$mask 203e1051a39Sopenharmony_ci vext.8 $tmp,$zero,$in0,#12 204e1051a39Sopenharmony_ci vst1.32 {$in0},[$out],#16 205e1051a39Sopenharmony_ci aese $key,$zero 206e1051a39Sopenharmony_ci 207e1051a39Sopenharmony_ci veor $in0,$in0,$tmp 208e1051a39Sopenharmony_ci vext.8 $tmp,$zero,$tmp,#12 209e1051a39Sopenharmony_ci veor $in0,$in0,$tmp 210e1051a39Sopenharmony_ci vext.8 $tmp,$zero,$tmp,#12 211e1051a39Sopenharmony_ci veor $key,$key,$rcon 212e1051a39Sopenharmony_ci veor $in0,$in0,$tmp 213e1051a39Sopenharmony_ci veor $in0,$in0,$key 214e1051a39Sopenharmony_ci vst1.32 {$in0},[$out] 215e1051a39Sopenharmony_ci add $out,$out,#0x50 216e1051a39Sopenharmony_ci 217e1051a39Sopenharmony_ci mov $rounds,#10 218e1051a39Sopenharmony_ci b .Ldone 219e1051a39Sopenharmony_ci 220e1051a39Sopenharmony_ci.align 4 221e1051a39Sopenharmony_ci.L192: 222e1051a39Sopenharmony_ci vld1.8 {$in1},[$inp],#8 223e1051a39Sopenharmony_ci vmov.i8 $key,#8 // borrow $key 224e1051a39Sopenharmony_ci vst1.32 {$in0},[$out],#16 225e1051a39Sopenharmony_ci vsub.i8 $mask,$mask,$key // adjust the mask 226e1051a39Sopenharmony_ci 227e1051a39Sopenharmony_ci.Loop192: 228e1051a39Sopenharmony_ci vtbl.8 $key,{$in1},$mask 229e1051a39Sopenharmony_ci vext.8 $tmp,$zero,$in0,#12 230e1051a39Sopenharmony_ci#ifdef __ARMEB__ 231e1051a39Sopenharmony_ci vst1.32 {$in1},[$out],#16 232e1051a39Sopenharmony_ci sub $out,$out,#8 233e1051a39Sopenharmony_ci#else 234e1051a39Sopenharmony_ci vst1.32 {$in1},[$out],#8 235e1051a39Sopenharmony_ci#endif 236e1051a39Sopenharmony_ci aese $key,$zero 237e1051a39Sopenharmony_ci subs $bits,$bits,#1 238e1051a39Sopenharmony_ci 239e1051a39Sopenharmony_ci veor $in0,$in0,$tmp 240e1051a39Sopenharmony_ci vext.8 $tmp,$zero,$tmp,#12 241e1051a39Sopenharmony_ci veor $in0,$in0,$tmp 242e1051a39Sopenharmony_ci vext.8 $tmp,$zero,$tmp,#12 243e1051a39Sopenharmony_ci veor $in0,$in0,$tmp 244e1051a39Sopenharmony_ci 245e1051a39Sopenharmony_ci vdup.32 $tmp,${in0}[3] 246e1051a39Sopenharmony_ci veor $tmp,$tmp,$in1 247e1051a39Sopenharmony_ci veor $key,$key,$rcon 248e1051a39Sopenharmony_ci vext.8 $in1,$zero,$in1,#12 249e1051a39Sopenharmony_ci vshl.u8 $rcon,$rcon,#1 250e1051a39Sopenharmony_ci veor $in1,$in1,$tmp 251e1051a39Sopenharmony_ci veor $in0,$in0,$key 252e1051a39Sopenharmony_ci veor $in1,$in1,$key 253e1051a39Sopenharmony_ci vst1.32 {$in0},[$out],#16 254e1051a39Sopenharmony_ci b.ne .Loop192 255e1051a39Sopenharmony_ci 256e1051a39Sopenharmony_ci mov $rounds,#12 257e1051a39Sopenharmony_ci add $out,$out,#0x20 258e1051a39Sopenharmony_ci b .Ldone 259e1051a39Sopenharmony_ci 260e1051a39Sopenharmony_ci.align 4 261e1051a39Sopenharmony_ci.L256: 262e1051a39Sopenharmony_ci vld1.8 {$in1},[$inp] 263e1051a39Sopenharmony_ci mov $bits,#7 264e1051a39Sopenharmony_ci mov $rounds,#14 265e1051a39Sopenharmony_ci vst1.32 {$in0},[$out],#16 266e1051a39Sopenharmony_ci 267e1051a39Sopenharmony_ci.Loop256: 268e1051a39Sopenharmony_ci vtbl.8 $key,{$in1},$mask 269e1051a39Sopenharmony_ci vext.8 $tmp,$zero,$in0,#12 270e1051a39Sopenharmony_ci vst1.32 {$in1},[$out],#16 271e1051a39Sopenharmony_ci aese $key,$zero 272e1051a39Sopenharmony_ci subs $bits,$bits,#1 273e1051a39Sopenharmony_ci 274e1051a39Sopenharmony_ci veor $in0,$in0,$tmp 275e1051a39Sopenharmony_ci vext.8 $tmp,$zero,$tmp,#12 276e1051a39Sopenharmony_ci veor $in0,$in0,$tmp 277e1051a39Sopenharmony_ci vext.8 $tmp,$zero,$tmp,#12 278e1051a39Sopenharmony_ci veor $key,$key,$rcon 279e1051a39Sopenharmony_ci veor $in0,$in0,$tmp 280e1051a39Sopenharmony_ci vshl.u8 $rcon,$rcon,#1 281e1051a39Sopenharmony_ci veor $in0,$in0,$key 282e1051a39Sopenharmony_ci vst1.32 {$in0},[$out],#16 283e1051a39Sopenharmony_ci b.eq .Ldone 284e1051a39Sopenharmony_ci 285e1051a39Sopenharmony_ci vdup.32 $key,${in0}[3] // just splat 286e1051a39Sopenharmony_ci vext.8 $tmp,$zero,$in1,#12 287e1051a39Sopenharmony_ci aese $key,$zero 288e1051a39Sopenharmony_ci 289e1051a39Sopenharmony_ci veor $in1,$in1,$tmp 290e1051a39Sopenharmony_ci vext.8 $tmp,$zero,$tmp,#12 291e1051a39Sopenharmony_ci veor $in1,$in1,$tmp 292e1051a39Sopenharmony_ci vext.8 $tmp,$zero,$tmp,#12 293e1051a39Sopenharmony_ci veor $in1,$in1,$tmp 294e1051a39Sopenharmony_ci 295e1051a39Sopenharmony_ci veor $in1,$in1,$key 296e1051a39Sopenharmony_ci b .Loop256 297e1051a39Sopenharmony_ci 298e1051a39Sopenharmony_ci.Ldone: 299e1051a39Sopenharmony_ci str $rounds,[$out] 300e1051a39Sopenharmony_ci mov $ptr,#0 301e1051a39Sopenharmony_ci 302e1051a39Sopenharmony_ci.Lenc_key_abort: 303e1051a39Sopenharmony_ci mov x0,$ptr // return value 304e1051a39Sopenharmony_ci `"ldr x29,[sp],#16" if ($flavour =~ /64/)` 305e1051a39Sopenharmony_ci ret 306e1051a39Sopenharmony_ci.size ${prefix}_set_encrypt_key,.-${prefix}_set_encrypt_key 307e1051a39Sopenharmony_ci 308e1051a39Sopenharmony_ci.globl ${prefix}_set_decrypt_key 309e1051a39Sopenharmony_ci.type ${prefix}_set_decrypt_key,%function 310e1051a39Sopenharmony_ci.align 5 311e1051a39Sopenharmony_ci${prefix}_set_decrypt_key: 312e1051a39Sopenharmony_ci___ 313e1051a39Sopenharmony_ci$code.=<<___ if ($flavour =~ /64/); 314e1051a39Sopenharmony_ci .inst 0xd503233f // paciasp 315e1051a39Sopenharmony_ci stp x29,x30,[sp,#-16]! 316e1051a39Sopenharmony_ci add x29,sp,#0 317e1051a39Sopenharmony_ci___ 318e1051a39Sopenharmony_ci$code.=<<___ if ($flavour !~ /64/); 319e1051a39Sopenharmony_ci stmdb sp!,{r4,lr} 320e1051a39Sopenharmony_ci___ 321e1051a39Sopenharmony_ci$code.=<<___; 322e1051a39Sopenharmony_ci bl .Lenc_key 323e1051a39Sopenharmony_ci 324e1051a39Sopenharmony_ci cmp x0,#0 325e1051a39Sopenharmony_ci b.ne .Ldec_key_abort 326e1051a39Sopenharmony_ci 327e1051a39Sopenharmony_ci sub $out,$out,#240 // restore original $out 328e1051a39Sopenharmony_ci mov x4,#-16 329e1051a39Sopenharmony_ci add $inp,$out,x12,lsl#4 // end of key schedule 330e1051a39Sopenharmony_ci 331e1051a39Sopenharmony_ci vld1.32 {v0.16b},[$out] 332e1051a39Sopenharmony_ci vld1.32 {v1.16b},[$inp] 333e1051a39Sopenharmony_ci vst1.32 {v0.16b},[$inp],x4 334e1051a39Sopenharmony_ci vst1.32 {v1.16b},[$out],#16 335e1051a39Sopenharmony_ci 336e1051a39Sopenharmony_ci.Loop_imc: 337e1051a39Sopenharmony_ci vld1.32 {v0.16b},[$out] 338e1051a39Sopenharmony_ci vld1.32 {v1.16b},[$inp] 339e1051a39Sopenharmony_ci aesimc v0.16b,v0.16b 340e1051a39Sopenharmony_ci aesimc v1.16b,v1.16b 341e1051a39Sopenharmony_ci vst1.32 {v0.16b},[$inp],x4 342e1051a39Sopenharmony_ci vst1.32 {v1.16b},[$out],#16 343e1051a39Sopenharmony_ci cmp $inp,$out 344e1051a39Sopenharmony_ci b.hi .Loop_imc 345e1051a39Sopenharmony_ci 346e1051a39Sopenharmony_ci vld1.32 {v0.16b},[$out] 347e1051a39Sopenharmony_ci aesimc v0.16b,v0.16b 348e1051a39Sopenharmony_ci vst1.32 {v0.16b},[$inp] 349e1051a39Sopenharmony_ci 350e1051a39Sopenharmony_ci eor x0,x0,x0 // return value 351e1051a39Sopenharmony_ci.Ldec_key_abort: 352e1051a39Sopenharmony_ci___ 353e1051a39Sopenharmony_ci$code.=<<___ if ($flavour !~ /64/); 354e1051a39Sopenharmony_ci ldmia sp!,{r4,pc} 355e1051a39Sopenharmony_ci___ 356e1051a39Sopenharmony_ci$code.=<<___ if ($flavour =~ /64/); 357e1051a39Sopenharmony_ci ldp x29,x30,[sp],#16 358e1051a39Sopenharmony_ci .inst 0xd50323bf // autiasp 359e1051a39Sopenharmony_ci ret 360e1051a39Sopenharmony_ci___ 361e1051a39Sopenharmony_ci$code.=<<___; 362e1051a39Sopenharmony_ci.size ${prefix}_set_decrypt_key,.-${prefix}_set_decrypt_key 363e1051a39Sopenharmony_ci___ 364e1051a39Sopenharmony_ci}}} 365e1051a39Sopenharmony_ci{{{ 366e1051a39Sopenharmony_cisub gen_block () { 367e1051a39Sopenharmony_cimy $dir = shift; 368e1051a39Sopenharmony_cimy ($e,$mc) = $dir eq "en" ? ("e","mc") : ("d","imc"); 369e1051a39Sopenharmony_cimy ($inp,$out,$key)=map("x$_",(0..2)); 370e1051a39Sopenharmony_cimy $rounds="w3"; 371e1051a39Sopenharmony_cimy ($rndkey0,$rndkey1,$inout)=map("q$_",(0..3)); 372e1051a39Sopenharmony_ci 373e1051a39Sopenharmony_ci$code.=<<___; 374e1051a39Sopenharmony_ci.globl ${prefix}_${dir}crypt 375e1051a39Sopenharmony_ci.type ${prefix}_${dir}crypt,%function 376e1051a39Sopenharmony_ci.align 5 377e1051a39Sopenharmony_ci${prefix}_${dir}crypt: 378e1051a39Sopenharmony_ci ldr $rounds,[$key,#240] 379e1051a39Sopenharmony_ci vld1.32 {$rndkey0},[$key],#16 380e1051a39Sopenharmony_ci vld1.8 {$inout},[$inp] 381e1051a39Sopenharmony_ci sub $rounds,$rounds,#2 382e1051a39Sopenharmony_ci vld1.32 {$rndkey1},[$key],#16 383e1051a39Sopenharmony_ci 384e1051a39Sopenharmony_ci.Loop_${dir}c: 385e1051a39Sopenharmony_ci aes$e $inout,$rndkey0 386e1051a39Sopenharmony_ci aes$mc $inout,$inout 387e1051a39Sopenharmony_ci vld1.32 {$rndkey0},[$key],#16 388e1051a39Sopenharmony_ci subs $rounds,$rounds,#2 389e1051a39Sopenharmony_ci aes$e $inout,$rndkey1 390e1051a39Sopenharmony_ci aes$mc $inout,$inout 391e1051a39Sopenharmony_ci vld1.32 {$rndkey1},[$key],#16 392e1051a39Sopenharmony_ci b.gt .Loop_${dir}c 393e1051a39Sopenharmony_ci 394e1051a39Sopenharmony_ci aes$e $inout,$rndkey0 395e1051a39Sopenharmony_ci aes$mc $inout,$inout 396e1051a39Sopenharmony_ci vld1.32 {$rndkey0},[$key] 397e1051a39Sopenharmony_ci aes$e $inout,$rndkey1 398e1051a39Sopenharmony_ci veor $inout,$inout,$rndkey0 399e1051a39Sopenharmony_ci 400e1051a39Sopenharmony_ci vst1.8 {$inout},[$out] 401e1051a39Sopenharmony_ci ret 402e1051a39Sopenharmony_ci.size ${prefix}_${dir}crypt,.-${prefix}_${dir}crypt 403e1051a39Sopenharmony_ci___ 404e1051a39Sopenharmony_ci} 405e1051a39Sopenharmony_ci&gen_block("en"); 406e1051a39Sopenharmony_ci&gen_block("de"); 407e1051a39Sopenharmony_ci}}} 408e1051a39Sopenharmony_ci 409e1051a39Sopenharmony_ci# Performance in cycles per byte. 410e1051a39Sopenharmony_ci# Processed with AES-ECB different key size. 411e1051a39Sopenharmony_ci# It shows the value before and after optimization as below: 412e1051a39Sopenharmony_ci# (before/after): 413e1051a39Sopenharmony_ci# 414e1051a39Sopenharmony_ci# AES-128-ECB AES-192-ECB AES-256-ECB 415e1051a39Sopenharmony_ci# Cortex-A57 1.85/0.82 2.16/0.96 2.47/1.10 416e1051a39Sopenharmony_ci# Cortex-A72 1.64/0.85 1.82/0.99 2.13/1.14 417e1051a39Sopenharmony_ci 418e1051a39Sopenharmony_ci# Optimization is implemented by loop unrolling and interleaving. 419e1051a39Sopenharmony_ci# Commonly, we choose the unrolling factor as 5, if the input 420e1051a39Sopenharmony_ci# data size smaller than 5 blocks, but not smaller than 3 blocks, 421e1051a39Sopenharmony_ci# choose 3 as the unrolling factor. 422e1051a39Sopenharmony_ci# If the input data size dsize >= 5*16 bytes, then take 5 blocks 423e1051a39Sopenharmony_ci# as one iteration, every loop the left size lsize -= 5*16. 424e1051a39Sopenharmony_ci# If 5*16 > lsize >= 3*16 bytes, take 3 blocks as one iteration, 425e1051a39Sopenharmony_ci# every loop lsize -=3*16. 426e1051a39Sopenharmony_ci# If lsize < 3*16 bytes, treat them as the tail, interleave the 427e1051a39Sopenharmony_ci# two blocks AES instructions. 428e1051a39Sopenharmony_ci# There is one special case, if the original input data size dsize 429e1051a39Sopenharmony_ci# = 16 bytes, we will treat it seperately to improve the 430e1051a39Sopenharmony_ci# performance: one independent code block without LR, FP load and 431e1051a39Sopenharmony_ci# store, just looks like what the original ECB implementation does. 432e1051a39Sopenharmony_ci 433e1051a39Sopenharmony_ci{{{ 434e1051a39Sopenharmony_cimy ($inp,$out,$len,$key)=map("x$_",(0..3)); 435e1051a39Sopenharmony_cimy ($enc,$rounds,$cnt,$key_,$step)=("w4","w5","w6","x7","x8"); 436e1051a39Sopenharmony_cimy ($dat0,$dat1,$in0,$in1,$tmp0,$tmp1,$tmp2,$rndlast)=map("q$_",(0..7)); 437e1051a39Sopenharmony_ci 438e1051a39Sopenharmony_cimy ($dat,$tmp,$rndzero_n_last)=($dat0,$tmp0,$tmp1); 439e1051a39Sopenharmony_ci 440e1051a39Sopenharmony_ci### q7 last round key 441e1051a39Sopenharmony_ci### q10-q15 q7 Last 7 round keys 442e1051a39Sopenharmony_ci### q8-q9 preloaded round keys except last 7 keys for big size 443e1051a39Sopenharmony_ci### q5, q6, q8-q9 preloaded round keys except last 7 keys for only 16 byte 444e1051a39Sopenharmony_ci 445e1051a39Sopenharmony_ci{ 446e1051a39Sopenharmony_cimy ($dat2,$in2,$tmp2)=map("q$_",(10,11,9)); 447e1051a39Sopenharmony_ci 448e1051a39Sopenharmony_cimy ($dat3,$in3,$tmp3); # used only in 64-bit mode 449e1051a39Sopenharmony_cimy ($dat4,$in4,$tmp4); 450e1051a39Sopenharmony_ciif ($flavour =~ /64/) { 451e1051a39Sopenharmony_ci ($dat2,$dat3,$dat4,$in2,$in3,$in4,$tmp3,$tmp4)=map("q$_",(16..23)); 452e1051a39Sopenharmony_ci} 453e1051a39Sopenharmony_ci 454e1051a39Sopenharmony_ci$code.=<<___; 455e1051a39Sopenharmony_ci.globl ${prefix}_ecb_encrypt 456e1051a39Sopenharmony_ci.type ${prefix}_ecb_encrypt,%function 457e1051a39Sopenharmony_ci.align 5 458e1051a39Sopenharmony_ci${prefix}_ecb_encrypt: 459e1051a39Sopenharmony_ci___ 460e1051a39Sopenharmony_ci$code.=<<___ if ($flavour =~ /64/); 461e1051a39Sopenharmony_ci subs $len,$len,#16 462e1051a39Sopenharmony_ci // Original input data size bigger than 16, jump to big size processing. 463e1051a39Sopenharmony_ci b.ne .Lecb_big_size 464e1051a39Sopenharmony_ci vld1.8 {$dat0},[$inp] 465e1051a39Sopenharmony_ci cmp $enc,#0 // en- or decrypting? 466e1051a39Sopenharmony_ci ldr $rounds,[$key,#240] 467e1051a39Sopenharmony_ci vld1.32 {q5-q6},[$key],#32 // load key schedule... 468e1051a39Sopenharmony_ci 469e1051a39Sopenharmony_ci b.eq .Lecb_small_dec 470e1051a39Sopenharmony_ci aese $dat0,q5 471e1051a39Sopenharmony_ci aesmc $dat0,$dat0 472e1051a39Sopenharmony_ci vld1.32 {q8-q9},[$key],#32 // load key schedule... 473e1051a39Sopenharmony_ci aese $dat0,q6 474e1051a39Sopenharmony_ci aesmc $dat0,$dat0 475e1051a39Sopenharmony_ci subs $rounds,$rounds,#10 // if rounds==10, jump to aes-128-ecb processing 476e1051a39Sopenharmony_ci b.eq .Lecb_128_enc 477e1051a39Sopenharmony_ci.Lecb_round_loop: 478e1051a39Sopenharmony_ci aese $dat0,q8 479e1051a39Sopenharmony_ci aesmc $dat0,$dat0 480e1051a39Sopenharmony_ci vld1.32 {q8},[$key],#16 // load key schedule... 481e1051a39Sopenharmony_ci aese $dat0,q9 482e1051a39Sopenharmony_ci aesmc $dat0,$dat0 483e1051a39Sopenharmony_ci vld1.32 {q9},[$key],#16 // load key schedule... 484e1051a39Sopenharmony_ci subs $rounds,$rounds,#2 // bias 485e1051a39Sopenharmony_ci b.gt .Lecb_round_loop 486e1051a39Sopenharmony_ci.Lecb_128_enc: 487e1051a39Sopenharmony_ci vld1.32 {q10-q11},[$key],#32 // load key schedule... 488e1051a39Sopenharmony_ci aese $dat0,q8 489e1051a39Sopenharmony_ci aesmc $dat0,$dat0 490e1051a39Sopenharmony_ci aese $dat0,q9 491e1051a39Sopenharmony_ci aesmc $dat0,$dat0 492e1051a39Sopenharmony_ci vld1.32 {q12-q13},[$key],#32 // load key schedule... 493e1051a39Sopenharmony_ci aese $dat0,q10 494e1051a39Sopenharmony_ci aesmc $dat0,$dat0 495e1051a39Sopenharmony_ci aese $dat0,q11 496e1051a39Sopenharmony_ci aesmc $dat0,$dat0 497e1051a39Sopenharmony_ci vld1.32 {q14-q15},[$key],#32 // load key schedule... 498e1051a39Sopenharmony_ci aese $dat0,q12 499e1051a39Sopenharmony_ci aesmc $dat0,$dat0 500e1051a39Sopenharmony_ci aese $dat0,q13 501e1051a39Sopenharmony_ci aesmc $dat0,$dat0 502e1051a39Sopenharmony_ci vld1.32 {$rndlast},[$key] 503e1051a39Sopenharmony_ci aese $dat0,q14 504e1051a39Sopenharmony_ci aesmc $dat0,$dat0 505e1051a39Sopenharmony_ci aese $dat0,q15 506e1051a39Sopenharmony_ci veor $dat0,$dat0,$rndlast 507e1051a39Sopenharmony_ci vst1.8 {$dat0},[$out] 508e1051a39Sopenharmony_ci b .Lecb_Final_abort 509e1051a39Sopenharmony_ci.Lecb_small_dec: 510e1051a39Sopenharmony_ci aesd $dat0,q5 511e1051a39Sopenharmony_ci aesimc $dat0,$dat0 512e1051a39Sopenharmony_ci vld1.32 {q8-q9},[$key],#32 // load key schedule... 513e1051a39Sopenharmony_ci aesd $dat0,q6 514e1051a39Sopenharmony_ci aesimc $dat0,$dat0 515e1051a39Sopenharmony_ci subs $rounds,$rounds,#10 // bias 516e1051a39Sopenharmony_ci b.eq .Lecb_128_dec 517e1051a39Sopenharmony_ci.Lecb_dec_round_loop: 518e1051a39Sopenharmony_ci aesd $dat0,q8 519e1051a39Sopenharmony_ci aesimc $dat0,$dat0 520e1051a39Sopenharmony_ci vld1.32 {q8},[$key],#16 // load key schedule... 521e1051a39Sopenharmony_ci aesd $dat0,q9 522e1051a39Sopenharmony_ci aesimc $dat0,$dat0 523e1051a39Sopenharmony_ci vld1.32 {q9},[$key],#16 // load key schedule... 524e1051a39Sopenharmony_ci subs $rounds,$rounds,#2 // bias 525e1051a39Sopenharmony_ci b.gt .Lecb_dec_round_loop 526e1051a39Sopenharmony_ci.Lecb_128_dec: 527e1051a39Sopenharmony_ci vld1.32 {q10-q11},[$key],#32 // load key schedule... 528e1051a39Sopenharmony_ci aesd $dat0,q8 529e1051a39Sopenharmony_ci aesimc $dat0,$dat0 530e1051a39Sopenharmony_ci aesd $dat0,q9 531e1051a39Sopenharmony_ci aesimc $dat0,$dat0 532e1051a39Sopenharmony_ci vld1.32 {q12-q13},[$key],#32 // load key schedule... 533e1051a39Sopenharmony_ci aesd $dat0,q10 534e1051a39Sopenharmony_ci aesimc $dat0,$dat0 535e1051a39Sopenharmony_ci aesd $dat0,q11 536e1051a39Sopenharmony_ci aesimc $dat0,$dat0 537e1051a39Sopenharmony_ci vld1.32 {q14-q15},[$key],#32 // load key schedule... 538e1051a39Sopenharmony_ci aesd $dat0,q12 539e1051a39Sopenharmony_ci aesimc $dat0,$dat0 540e1051a39Sopenharmony_ci aesd $dat0,q13 541e1051a39Sopenharmony_ci aesimc $dat0,$dat0 542e1051a39Sopenharmony_ci vld1.32 {$rndlast},[$key] 543e1051a39Sopenharmony_ci aesd $dat0,q14 544e1051a39Sopenharmony_ci aesimc $dat0,$dat0 545e1051a39Sopenharmony_ci aesd $dat0,q15 546e1051a39Sopenharmony_ci veor $dat0,$dat0,$rndlast 547e1051a39Sopenharmony_ci vst1.8 {$dat0},[$out] 548e1051a39Sopenharmony_ci b .Lecb_Final_abort 549e1051a39Sopenharmony_ci.Lecb_big_size: 550e1051a39Sopenharmony_ci___ 551e1051a39Sopenharmony_ci$code.=<<___ if ($flavour =~ /64/); 552e1051a39Sopenharmony_ci stp x29,x30,[sp,#-16]! 553e1051a39Sopenharmony_ci add x29,sp,#0 554e1051a39Sopenharmony_ci___ 555e1051a39Sopenharmony_ci$code.=<<___ if ($flavour !~ /64/); 556e1051a39Sopenharmony_ci mov ip,sp 557e1051a39Sopenharmony_ci stmdb sp!,{r4-r8,lr} 558e1051a39Sopenharmony_ci vstmdb sp!,{d8-d15} @ ABI specification says so 559e1051a39Sopenharmony_ci ldmia ip,{r4-r5} @ load remaining args 560e1051a39Sopenharmony_ci subs $len,$len,#16 561e1051a39Sopenharmony_ci___ 562e1051a39Sopenharmony_ci$code.=<<___; 563e1051a39Sopenharmony_ci mov $step,#16 564e1051a39Sopenharmony_ci b.lo .Lecb_done 565e1051a39Sopenharmony_ci cclr $step,eq 566e1051a39Sopenharmony_ci 567e1051a39Sopenharmony_ci cmp $enc,#0 // en- or decrypting? 568e1051a39Sopenharmony_ci ldr $rounds,[$key,#240] 569e1051a39Sopenharmony_ci and $len,$len,#-16 570e1051a39Sopenharmony_ci vld1.8 {$dat},[$inp],$step 571e1051a39Sopenharmony_ci 572e1051a39Sopenharmony_ci vld1.32 {q8-q9},[$key] // load key schedule... 573e1051a39Sopenharmony_ci sub $rounds,$rounds,#6 574e1051a39Sopenharmony_ci add $key_,$key,x5,lsl#4 // pointer to last 7 round keys 575e1051a39Sopenharmony_ci sub $rounds,$rounds,#2 576e1051a39Sopenharmony_ci vld1.32 {q10-q11},[$key_],#32 577e1051a39Sopenharmony_ci vld1.32 {q12-q13},[$key_],#32 578e1051a39Sopenharmony_ci vld1.32 {q14-q15},[$key_],#32 579e1051a39Sopenharmony_ci vld1.32 {$rndlast},[$key_] 580e1051a39Sopenharmony_ci 581e1051a39Sopenharmony_ci add $key_,$key,#32 582e1051a39Sopenharmony_ci mov $cnt,$rounds 583e1051a39Sopenharmony_ci b.eq .Lecb_dec 584e1051a39Sopenharmony_ci 585e1051a39Sopenharmony_ci vld1.8 {$dat1},[$inp],#16 586e1051a39Sopenharmony_ci subs $len,$len,#32 // bias 587e1051a39Sopenharmony_ci add $cnt,$rounds,#2 588e1051a39Sopenharmony_ci vorr $in1,$dat1,$dat1 589e1051a39Sopenharmony_ci vorr $dat2,$dat1,$dat1 590e1051a39Sopenharmony_ci vorr $dat1,$dat,$dat 591e1051a39Sopenharmony_ci b.lo .Lecb_enc_tail 592e1051a39Sopenharmony_ci 593e1051a39Sopenharmony_ci vorr $dat1,$in1,$in1 594e1051a39Sopenharmony_ci vld1.8 {$dat2},[$inp],#16 595e1051a39Sopenharmony_ci___ 596e1051a39Sopenharmony_ci$code.=<<___ if ($flavour =~ /64/); 597e1051a39Sopenharmony_ci cmp $len,#32 598e1051a39Sopenharmony_ci b.lo .Loop3x_ecb_enc 599e1051a39Sopenharmony_ci 600e1051a39Sopenharmony_ci vld1.8 {$dat3},[$inp],#16 601e1051a39Sopenharmony_ci vld1.8 {$dat4},[$inp],#16 602e1051a39Sopenharmony_ci sub $len,$len,#32 // bias 603e1051a39Sopenharmony_ci mov $cnt,$rounds 604e1051a39Sopenharmony_ci 605e1051a39Sopenharmony_ci.Loop5x_ecb_enc: 606e1051a39Sopenharmony_ci aese $dat0,q8 607e1051a39Sopenharmony_ci aesmc $dat0,$dat0 608e1051a39Sopenharmony_ci aese $dat1,q8 609e1051a39Sopenharmony_ci aesmc $dat1,$dat1 610e1051a39Sopenharmony_ci aese $dat2,q8 611e1051a39Sopenharmony_ci aesmc $dat2,$dat2 612e1051a39Sopenharmony_ci aese $dat3,q8 613e1051a39Sopenharmony_ci aesmc $dat3,$dat3 614e1051a39Sopenharmony_ci aese $dat4,q8 615e1051a39Sopenharmony_ci aesmc $dat4,$dat4 616e1051a39Sopenharmony_ci vld1.32 {q8},[$key_],#16 617e1051a39Sopenharmony_ci subs $cnt,$cnt,#2 618e1051a39Sopenharmony_ci aese $dat0,q9 619e1051a39Sopenharmony_ci aesmc $dat0,$dat0 620e1051a39Sopenharmony_ci aese $dat1,q9 621e1051a39Sopenharmony_ci aesmc $dat1,$dat1 622e1051a39Sopenharmony_ci aese $dat2,q9 623e1051a39Sopenharmony_ci aesmc $dat2,$dat2 624e1051a39Sopenharmony_ci aese $dat3,q9 625e1051a39Sopenharmony_ci aesmc $dat3,$dat3 626e1051a39Sopenharmony_ci aese $dat4,q9 627e1051a39Sopenharmony_ci aesmc $dat4,$dat4 628e1051a39Sopenharmony_ci vld1.32 {q9},[$key_],#16 629e1051a39Sopenharmony_ci b.gt .Loop5x_ecb_enc 630e1051a39Sopenharmony_ci 631e1051a39Sopenharmony_ci aese $dat0,q8 632e1051a39Sopenharmony_ci aesmc $dat0,$dat0 633e1051a39Sopenharmony_ci aese $dat1,q8 634e1051a39Sopenharmony_ci aesmc $dat1,$dat1 635e1051a39Sopenharmony_ci aese $dat2,q8 636e1051a39Sopenharmony_ci aesmc $dat2,$dat2 637e1051a39Sopenharmony_ci aese $dat3,q8 638e1051a39Sopenharmony_ci aesmc $dat3,$dat3 639e1051a39Sopenharmony_ci aese $dat4,q8 640e1051a39Sopenharmony_ci aesmc $dat4,$dat4 641e1051a39Sopenharmony_ci cmp $len,#0x40 // because .Lecb_enc_tail4x 642e1051a39Sopenharmony_ci sub $len,$len,#0x50 643e1051a39Sopenharmony_ci 644e1051a39Sopenharmony_ci aese $dat0,q9 645e1051a39Sopenharmony_ci aesmc $dat0,$dat0 646e1051a39Sopenharmony_ci aese $dat1,q9 647e1051a39Sopenharmony_ci aesmc $dat1,$dat1 648e1051a39Sopenharmony_ci aese $dat2,q9 649e1051a39Sopenharmony_ci aesmc $dat2,$dat2 650e1051a39Sopenharmony_ci aese $dat3,q9 651e1051a39Sopenharmony_ci aesmc $dat3,$dat3 652e1051a39Sopenharmony_ci aese $dat4,q9 653e1051a39Sopenharmony_ci aesmc $dat4,$dat4 654e1051a39Sopenharmony_ci csel x6,xzr,$len,gt // borrow x6, $cnt, "gt" is not typo 655e1051a39Sopenharmony_ci mov $key_,$key 656e1051a39Sopenharmony_ci 657e1051a39Sopenharmony_ci aese $dat0,q10 658e1051a39Sopenharmony_ci aesmc $dat0,$dat0 659e1051a39Sopenharmony_ci aese $dat1,q10 660e1051a39Sopenharmony_ci aesmc $dat1,$dat1 661e1051a39Sopenharmony_ci aese $dat2,q10 662e1051a39Sopenharmony_ci aesmc $dat2,$dat2 663e1051a39Sopenharmony_ci aese $dat3,q10 664e1051a39Sopenharmony_ci aesmc $dat3,$dat3 665e1051a39Sopenharmony_ci aese $dat4,q10 666e1051a39Sopenharmony_ci aesmc $dat4,$dat4 667e1051a39Sopenharmony_ci add $inp,$inp,x6 // $inp is adjusted in such way that 668e1051a39Sopenharmony_ci // at exit from the loop $dat1-$dat4 669e1051a39Sopenharmony_ci // are loaded with last "words" 670e1051a39Sopenharmony_ci add x6,$len,#0x60 // because .Lecb_enc_tail4x 671e1051a39Sopenharmony_ci 672e1051a39Sopenharmony_ci aese $dat0,q11 673e1051a39Sopenharmony_ci aesmc $dat0,$dat0 674e1051a39Sopenharmony_ci aese $dat1,q11 675e1051a39Sopenharmony_ci aesmc $dat1,$dat1 676e1051a39Sopenharmony_ci aese $dat2,q11 677e1051a39Sopenharmony_ci aesmc $dat2,$dat2 678e1051a39Sopenharmony_ci aese $dat3,q11 679e1051a39Sopenharmony_ci aesmc $dat3,$dat3 680e1051a39Sopenharmony_ci aese $dat4,q11 681e1051a39Sopenharmony_ci aesmc $dat4,$dat4 682e1051a39Sopenharmony_ci 683e1051a39Sopenharmony_ci aese $dat0,q12 684e1051a39Sopenharmony_ci aesmc $dat0,$dat0 685e1051a39Sopenharmony_ci aese $dat1,q12 686e1051a39Sopenharmony_ci aesmc $dat1,$dat1 687e1051a39Sopenharmony_ci aese $dat2,q12 688e1051a39Sopenharmony_ci aesmc $dat2,$dat2 689e1051a39Sopenharmony_ci aese $dat3,q12 690e1051a39Sopenharmony_ci aesmc $dat3,$dat3 691e1051a39Sopenharmony_ci aese $dat4,q12 692e1051a39Sopenharmony_ci aesmc $dat4,$dat4 693e1051a39Sopenharmony_ci 694e1051a39Sopenharmony_ci aese $dat0,q13 695e1051a39Sopenharmony_ci aesmc $dat0,$dat0 696e1051a39Sopenharmony_ci aese $dat1,q13 697e1051a39Sopenharmony_ci aesmc $dat1,$dat1 698e1051a39Sopenharmony_ci aese $dat2,q13 699e1051a39Sopenharmony_ci aesmc $dat2,$dat2 700e1051a39Sopenharmony_ci aese $dat3,q13 701e1051a39Sopenharmony_ci aesmc $dat3,$dat3 702e1051a39Sopenharmony_ci aese $dat4,q13 703e1051a39Sopenharmony_ci aesmc $dat4,$dat4 704e1051a39Sopenharmony_ci 705e1051a39Sopenharmony_ci aese $dat0,q14 706e1051a39Sopenharmony_ci aesmc $dat0,$dat0 707e1051a39Sopenharmony_ci aese $dat1,q14 708e1051a39Sopenharmony_ci aesmc $dat1,$dat1 709e1051a39Sopenharmony_ci aese $dat2,q14 710e1051a39Sopenharmony_ci aesmc $dat2,$dat2 711e1051a39Sopenharmony_ci aese $dat3,q14 712e1051a39Sopenharmony_ci aesmc $dat3,$dat3 713e1051a39Sopenharmony_ci aese $dat4,q14 714e1051a39Sopenharmony_ci aesmc $dat4,$dat4 715e1051a39Sopenharmony_ci 716e1051a39Sopenharmony_ci aese $dat0,q15 717e1051a39Sopenharmony_ci vld1.8 {$in0},[$inp],#16 718e1051a39Sopenharmony_ci aese $dat1,q15 719e1051a39Sopenharmony_ci vld1.8 {$in1},[$inp],#16 720e1051a39Sopenharmony_ci aese $dat2,q15 721e1051a39Sopenharmony_ci vld1.8 {$in2},[$inp],#16 722e1051a39Sopenharmony_ci aese $dat3,q15 723e1051a39Sopenharmony_ci vld1.8 {$in3},[$inp],#16 724e1051a39Sopenharmony_ci aese $dat4,q15 725e1051a39Sopenharmony_ci vld1.8 {$in4},[$inp],#16 726e1051a39Sopenharmony_ci cbz x6,.Lecb_enc_tail4x 727e1051a39Sopenharmony_ci vld1.32 {q8},[$key_],#16 // re-pre-load rndkey[0] 728e1051a39Sopenharmony_ci veor $tmp0,$rndlast,$dat0 729e1051a39Sopenharmony_ci vorr $dat0,$in0,$in0 730e1051a39Sopenharmony_ci veor $tmp1,$rndlast,$dat1 731e1051a39Sopenharmony_ci vorr $dat1,$in1,$in1 732e1051a39Sopenharmony_ci veor $tmp2,$rndlast,$dat2 733e1051a39Sopenharmony_ci vorr $dat2,$in2,$in2 734e1051a39Sopenharmony_ci veor $tmp3,$rndlast,$dat3 735e1051a39Sopenharmony_ci vorr $dat3,$in3,$in3 736e1051a39Sopenharmony_ci veor $tmp4,$rndlast,$dat4 737e1051a39Sopenharmony_ci vst1.8 {$tmp0},[$out],#16 738e1051a39Sopenharmony_ci vorr $dat4,$in4,$in4 739e1051a39Sopenharmony_ci vst1.8 {$tmp1},[$out],#16 740e1051a39Sopenharmony_ci mov $cnt,$rounds 741e1051a39Sopenharmony_ci vst1.8 {$tmp2},[$out],#16 742e1051a39Sopenharmony_ci vld1.32 {q9},[$key_],#16 // re-pre-load rndkey[1] 743e1051a39Sopenharmony_ci vst1.8 {$tmp3},[$out],#16 744e1051a39Sopenharmony_ci vst1.8 {$tmp4},[$out],#16 745e1051a39Sopenharmony_ci b.hs .Loop5x_ecb_enc 746e1051a39Sopenharmony_ci 747e1051a39Sopenharmony_ci add $len,$len,#0x50 748e1051a39Sopenharmony_ci cbz $len,.Lecb_done 749e1051a39Sopenharmony_ci 750e1051a39Sopenharmony_ci add $cnt,$rounds,#2 751e1051a39Sopenharmony_ci subs $len,$len,#0x30 752e1051a39Sopenharmony_ci vorr $dat0,$in2,$in2 753e1051a39Sopenharmony_ci vorr $dat1,$in3,$in3 754e1051a39Sopenharmony_ci vorr $dat2,$in4,$in4 755e1051a39Sopenharmony_ci b.lo .Lecb_enc_tail 756e1051a39Sopenharmony_ci 757e1051a39Sopenharmony_ci b .Loop3x_ecb_enc 758e1051a39Sopenharmony_ci 759e1051a39Sopenharmony_ci.align 4 760e1051a39Sopenharmony_ci.Lecb_enc_tail4x: 761e1051a39Sopenharmony_ci veor $tmp1,$rndlast,$dat1 762e1051a39Sopenharmony_ci veor $tmp2,$rndlast,$dat2 763e1051a39Sopenharmony_ci veor $tmp3,$rndlast,$dat3 764e1051a39Sopenharmony_ci veor $tmp4,$rndlast,$dat4 765e1051a39Sopenharmony_ci vst1.8 {$tmp1},[$out],#16 766e1051a39Sopenharmony_ci vst1.8 {$tmp2},[$out],#16 767e1051a39Sopenharmony_ci vst1.8 {$tmp3},[$out],#16 768e1051a39Sopenharmony_ci vst1.8 {$tmp4},[$out],#16 769e1051a39Sopenharmony_ci 770e1051a39Sopenharmony_ci b .Lecb_done 771e1051a39Sopenharmony_ci.align 4 772e1051a39Sopenharmony_ci___ 773e1051a39Sopenharmony_ci$code.=<<___; 774e1051a39Sopenharmony_ci.Loop3x_ecb_enc: 775e1051a39Sopenharmony_ci aese $dat0,q8 776e1051a39Sopenharmony_ci aesmc $dat0,$dat0 777e1051a39Sopenharmony_ci aese $dat1,q8 778e1051a39Sopenharmony_ci aesmc $dat1,$dat1 779e1051a39Sopenharmony_ci aese $dat2,q8 780e1051a39Sopenharmony_ci aesmc $dat2,$dat2 781e1051a39Sopenharmony_ci vld1.32 {q8},[$key_],#16 782e1051a39Sopenharmony_ci subs $cnt,$cnt,#2 783e1051a39Sopenharmony_ci aese $dat0,q9 784e1051a39Sopenharmony_ci aesmc $dat0,$dat0 785e1051a39Sopenharmony_ci aese $dat1,q9 786e1051a39Sopenharmony_ci aesmc $dat1,$dat1 787e1051a39Sopenharmony_ci aese $dat2,q9 788e1051a39Sopenharmony_ci aesmc $dat2,$dat2 789e1051a39Sopenharmony_ci vld1.32 {q9},[$key_],#16 790e1051a39Sopenharmony_ci b.gt .Loop3x_ecb_enc 791e1051a39Sopenharmony_ci 792e1051a39Sopenharmony_ci aese $dat0,q8 793e1051a39Sopenharmony_ci aesmc $dat0,$dat0 794e1051a39Sopenharmony_ci aese $dat1,q8 795e1051a39Sopenharmony_ci aesmc $dat1,$dat1 796e1051a39Sopenharmony_ci aese $dat2,q8 797e1051a39Sopenharmony_ci aesmc $dat2,$dat2 798e1051a39Sopenharmony_ci subs $len,$len,#0x30 799e1051a39Sopenharmony_ci mov.lo x6,$len // x6, $cnt, is zero at this point 800e1051a39Sopenharmony_ci aese $dat0,q9 801e1051a39Sopenharmony_ci aesmc $dat0,$dat0 802e1051a39Sopenharmony_ci aese $dat1,q9 803e1051a39Sopenharmony_ci aesmc $dat1,$dat1 804e1051a39Sopenharmony_ci aese $dat2,q9 805e1051a39Sopenharmony_ci aesmc $dat2,$dat2 806e1051a39Sopenharmony_ci add $inp,$inp,x6 // $inp is adjusted in such way that 807e1051a39Sopenharmony_ci // at exit from the loop $dat1-$dat2 808e1051a39Sopenharmony_ci // are loaded with last "words" 809e1051a39Sopenharmony_ci mov $key_,$key 810e1051a39Sopenharmony_ci aese $dat0,q12 811e1051a39Sopenharmony_ci aesmc $dat0,$dat0 812e1051a39Sopenharmony_ci aese $dat1,q12 813e1051a39Sopenharmony_ci aesmc $dat1,$dat1 814e1051a39Sopenharmony_ci aese $dat2,q12 815e1051a39Sopenharmony_ci aesmc $dat2,$dat2 816e1051a39Sopenharmony_ci vld1.8 {$in0},[$inp],#16 817e1051a39Sopenharmony_ci aese $dat0,q13 818e1051a39Sopenharmony_ci aesmc $dat0,$dat0 819e1051a39Sopenharmony_ci aese $dat1,q13 820e1051a39Sopenharmony_ci aesmc $dat1,$dat1 821e1051a39Sopenharmony_ci aese $dat2,q13 822e1051a39Sopenharmony_ci aesmc $dat2,$dat2 823e1051a39Sopenharmony_ci vld1.8 {$in1},[$inp],#16 824e1051a39Sopenharmony_ci aese $dat0,q14 825e1051a39Sopenharmony_ci aesmc $dat0,$dat0 826e1051a39Sopenharmony_ci aese $dat1,q14 827e1051a39Sopenharmony_ci aesmc $dat1,$dat1 828e1051a39Sopenharmony_ci aese $dat2,q14 829e1051a39Sopenharmony_ci aesmc $dat2,$dat2 830e1051a39Sopenharmony_ci vld1.8 {$in2},[$inp],#16 831e1051a39Sopenharmony_ci aese $dat0,q15 832e1051a39Sopenharmony_ci aese $dat1,q15 833e1051a39Sopenharmony_ci aese $dat2,q15 834e1051a39Sopenharmony_ci vld1.32 {q8},[$key_],#16 // re-pre-load rndkey[0] 835e1051a39Sopenharmony_ci add $cnt,$rounds,#2 836e1051a39Sopenharmony_ci veor $tmp0,$rndlast,$dat0 837e1051a39Sopenharmony_ci veor $tmp1,$rndlast,$dat1 838e1051a39Sopenharmony_ci veor $dat2,$dat2,$rndlast 839e1051a39Sopenharmony_ci vld1.32 {q9},[$key_],#16 // re-pre-load rndkey[1] 840e1051a39Sopenharmony_ci vst1.8 {$tmp0},[$out],#16 841e1051a39Sopenharmony_ci vorr $dat0,$in0,$in0 842e1051a39Sopenharmony_ci vst1.8 {$tmp1},[$out],#16 843e1051a39Sopenharmony_ci vorr $dat1,$in1,$in1 844e1051a39Sopenharmony_ci vst1.8 {$dat2},[$out],#16 845e1051a39Sopenharmony_ci vorr $dat2,$in2,$in2 846e1051a39Sopenharmony_ci b.hs .Loop3x_ecb_enc 847e1051a39Sopenharmony_ci 848e1051a39Sopenharmony_ci cmn $len,#0x30 849e1051a39Sopenharmony_ci b.eq .Lecb_done 850e1051a39Sopenharmony_ci nop 851e1051a39Sopenharmony_ci 852e1051a39Sopenharmony_ci.Lecb_enc_tail: 853e1051a39Sopenharmony_ci aese $dat1,q8 854e1051a39Sopenharmony_ci aesmc $dat1,$dat1 855e1051a39Sopenharmony_ci aese $dat2,q8 856e1051a39Sopenharmony_ci aesmc $dat2,$dat2 857e1051a39Sopenharmony_ci vld1.32 {q8},[$key_],#16 858e1051a39Sopenharmony_ci subs $cnt,$cnt,#2 859e1051a39Sopenharmony_ci aese $dat1,q9 860e1051a39Sopenharmony_ci aesmc $dat1,$dat1 861e1051a39Sopenharmony_ci aese $dat2,q9 862e1051a39Sopenharmony_ci aesmc $dat2,$dat2 863e1051a39Sopenharmony_ci vld1.32 {q9},[$key_],#16 864e1051a39Sopenharmony_ci b.gt .Lecb_enc_tail 865e1051a39Sopenharmony_ci 866e1051a39Sopenharmony_ci aese $dat1,q8 867e1051a39Sopenharmony_ci aesmc $dat1,$dat1 868e1051a39Sopenharmony_ci aese $dat2,q8 869e1051a39Sopenharmony_ci aesmc $dat2,$dat2 870e1051a39Sopenharmony_ci aese $dat1,q9 871e1051a39Sopenharmony_ci aesmc $dat1,$dat1 872e1051a39Sopenharmony_ci aese $dat2,q9 873e1051a39Sopenharmony_ci aesmc $dat2,$dat2 874e1051a39Sopenharmony_ci aese $dat1,q12 875e1051a39Sopenharmony_ci aesmc $dat1,$dat1 876e1051a39Sopenharmony_ci aese $dat2,q12 877e1051a39Sopenharmony_ci aesmc $dat2,$dat2 878e1051a39Sopenharmony_ci cmn $len,#0x20 879e1051a39Sopenharmony_ci aese $dat1,q13 880e1051a39Sopenharmony_ci aesmc $dat1,$dat1 881e1051a39Sopenharmony_ci aese $dat2,q13 882e1051a39Sopenharmony_ci aesmc $dat2,$dat2 883e1051a39Sopenharmony_ci aese $dat1,q14 884e1051a39Sopenharmony_ci aesmc $dat1,$dat1 885e1051a39Sopenharmony_ci aese $dat2,q14 886e1051a39Sopenharmony_ci aesmc $dat2,$dat2 887e1051a39Sopenharmony_ci aese $dat1,q15 888e1051a39Sopenharmony_ci aese $dat2,q15 889e1051a39Sopenharmony_ci b.eq .Lecb_enc_one 890e1051a39Sopenharmony_ci veor $tmp1,$rndlast,$dat1 891e1051a39Sopenharmony_ci veor $tmp2,$rndlast,$dat2 892e1051a39Sopenharmony_ci vst1.8 {$tmp1},[$out],#16 893e1051a39Sopenharmony_ci vst1.8 {$tmp2},[$out],#16 894e1051a39Sopenharmony_ci b .Lecb_done 895e1051a39Sopenharmony_ci 896e1051a39Sopenharmony_ci.Lecb_enc_one: 897e1051a39Sopenharmony_ci veor $tmp1,$rndlast,$dat2 898e1051a39Sopenharmony_ci vst1.8 {$tmp1},[$out],#16 899e1051a39Sopenharmony_ci b .Lecb_done 900e1051a39Sopenharmony_ci___ 901e1051a39Sopenharmony_ci 902e1051a39Sopenharmony_ci$code.=<<___; 903e1051a39Sopenharmony_ci.align 5 904e1051a39Sopenharmony_ci.Lecb_dec: 905e1051a39Sopenharmony_ci vld1.8 {$dat1},[$inp],#16 906e1051a39Sopenharmony_ci subs $len,$len,#32 // bias 907e1051a39Sopenharmony_ci add $cnt,$rounds,#2 908e1051a39Sopenharmony_ci vorr $in1,$dat1,$dat1 909e1051a39Sopenharmony_ci vorr $dat2,$dat1,$dat1 910e1051a39Sopenharmony_ci vorr $dat1,$dat,$dat 911e1051a39Sopenharmony_ci b.lo .Lecb_dec_tail 912e1051a39Sopenharmony_ci 913e1051a39Sopenharmony_ci vorr $dat1,$in1,$in1 914e1051a39Sopenharmony_ci vld1.8 {$dat2},[$inp],#16 915e1051a39Sopenharmony_ci___ 916e1051a39Sopenharmony_ci$code.=<<___ if ($flavour =~ /64/); 917e1051a39Sopenharmony_ci cmp $len,#32 918e1051a39Sopenharmony_ci b.lo .Loop3x_ecb_dec 919e1051a39Sopenharmony_ci 920e1051a39Sopenharmony_ci vld1.8 {$dat3},[$inp],#16 921e1051a39Sopenharmony_ci vld1.8 {$dat4},[$inp],#16 922e1051a39Sopenharmony_ci sub $len,$len,#32 // bias 923e1051a39Sopenharmony_ci mov $cnt,$rounds 924e1051a39Sopenharmony_ci 925e1051a39Sopenharmony_ci.Loop5x_ecb_dec: 926e1051a39Sopenharmony_ci aesd $dat0,q8 927e1051a39Sopenharmony_ci aesimc $dat0,$dat0 928e1051a39Sopenharmony_ci aesd $dat1,q8 929e1051a39Sopenharmony_ci aesimc $dat1,$dat1 930e1051a39Sopenharmony_ci aesd $dat2,q8 931e1051a39Sopenharmony_ci aesimc $dat2,$dat2 932e1051a39Sopenharmony_ci aesd $dat3,q8 933e1051a39Sopenharmony_ci aesimc $dat3,$dat3 934e1051a39Sopenharmony_ci aesd $dat4,q8 935e1051a39Sopenharmony_ci aesimc $dat4,$dat4 936e1051a39Sopenharmony_ci vld1.32 {q8},[$key_],#16 937e1051a39Sopenharmony_ci subs $cnt,$cnt,#2 938e1051a39Sopenharmony_ci aesd $dat0,q9 939e1051a39Sopenharmony_ci aesimc $dat0,$dat0 940e1051a39Sopenharmony_ci aesd $dat1,q9 941e1051a39Sopenharmony_ci aesimc $dat1,$dat1 942e1051a39Sopenharmony_ci aesd $dat2,q9 943e1051a39Sopenharmony_ci aesimc $dat2,$dat2 944e1051a39Sopenharmony_ci aesd $dat3,q9 945e1051a39Sopenharmony_ci aesimc $dat3,$dat3 946e1051a39Sopenharmony_ci aesd $dat4,q9 947e1051a39Sopenharmony_ci aesimc $dat4,$dat4 948e1051a39Sopenharmony_ci vld1.32 {q9},[$key_],#16 949e1051a39Sopenharmony_ci b.gt .Loop5x_ecb_dec 950e1051a39Sopenharmony_ci 951e1051a39Sopenharmony_ci aesd $dat0,q8 952e1051a39Sopenharmony_ci aesimc $dat0,$dat0 953e1051a39Sopenharmony_ci aesd $dat1,q8 954e1051a39Sopenharmony_ci aesimc $dat1,$dat1 955e1051a39Sopenharmony_ci aesd $dat2,q8 956e1051a39Sopenharmony_ci aesimc $dat2,$dat2 957e1051a39Sopenharmony_ci aesd $dat3,q8 958e1051a39Sopenharmony_ci aesimc $dat3,$dat3 959e1051a39Sopenharmony_ci aesd $dat4,q8 960e1051a39Sopenharmony_ci aesimc $dat4,$dat4 961e1051a39Sopenharmony_ci cmp $len,#0x40 // because .Lecb_tail4x 962e1051a39Sopenharmony_ci sub $len,$len,#0x50 963e1051a39Sopenharmony_ci 964e1051a39Sopenharmony_ci aesd $dat0,q9 965e1051a39Sopenharmony_ci aesimc $dat0,$dat0 966e1051a39Sopenharmony_ci aesd $dat1,q9 967e1051a39Sopenharmony_ci aesimc $dat1,$dat1 968e1051a39Sopenharmony_ci aesd $dat2,q9 969e1051a39Sopenharmony_ci aesimc $dat2,$dat2 970e1051a39Sopenharmony_ci aesd $dat3,q9 971e1051a39Sopenharmony_ci aesimc $dat3,$dat3 972e1051a39Sopenharmony_ci aesd $dat4,q9 973e1051a39Sopenharmony_ci aesimc $dat4,$dat4 974e1051a39Sopenharmony_ci csel x6,xzr,$len,gt // borrow x6, $cnt, "gt" is not typo 975e1051a39Sopenharmony_ci mov $key_,$key 976e1051a39Sopenharmony_ci 977e1051a39Sopenharmony_ci aesd $dat0,q10 978e1051a39Sopenharmony_ci aesimc $dat0,$dat0 979e1051a39Sopenharmony_ci aesd $dat1,q10 980e1051a39Sopenharmony_ci aesimc $dat1,$dat1 981e1051a39Sopenharmony_ci aesd $dat2,q10 982e1051a39Sopenharmony_ci aesimc $dat2,$dat2 983e1051a39Sopenharmony_ci aesd $dat3,q10 984e1051a39Sopenharmony_ci aesimc $dat3,$dat3 985e1051a39Sopenharmony_ci aesd $dat4,q10 986e1051a39Sopenharmony_ci aesimc $dat4,$dat4 987e1051a39Sopenharmony_ci add $inp,$inp,x6 // $inp is adjusted in such way that 988e1051a39Sopenharmony_ci // at exit from the loop $dat1-$dat4 989e1051a39Sopenharmony_ci // are loaded with last "words" 990e1051a39Sopenharmony_ci add x6,$len,#0x60 // because .Lecb_tail4x 991e1051a39Sopenharmony_ci 992e1051a39Sopenharmony_ci aesd $dat0,q11 993e1051a39Sopenharmony_ci aesimc $dat0,$dat0 994e1051a39Sopenharmony_ci aesd $dat1,q11 995e1051a39Sopenharmony_ci aesimc $dat1,$dat1 996e1051a39Sopenharmony_ci aesd $dat2,q11 997e1051a39Sopenharmony_ci aesimc $dat2,$dat2 998e1051a39Sopenharmony_ci aesd $dat3,q11 999e1051a39Sopenharmony_ci aesimc $dat3,$dat3 1000e1051a39Sopenharmony_ci aesd $dat4,q11 1001e1051a39Sopenharmony_ci aesimc $dat4,$dat4 1002e1051a39Sopenharmony_ci 1003e1051a39Sopenharmony_ci aesd $dat0,q12 1004e1051a39Sopenharmony_ci aesimc $dat0,$dat0 1005e1051a39Sopenharmony_ci aesd $dat1,q12 1006e1051a39Sopenharmony_ci aesimc $dat1,$dat1 1007e1051a39Sopenharmony_ci aesd $dat2,q12 1008e1051a39Sopenharmony_ci aesimc $dat2,$dat2 1009e1051a39Sopenharmony_ci aesd $dat3,q12 1010e1051a39Sopenharmony_ci aesimc $dat3,$dat3 1011e1051a39Sopenharmony_ci aesd $dat4,q12 1012e1051a39Sopenharmony_ci aesimc $dat4,$dat4 1013e1051a39Sopenharmony_ci 1014e1051a39Sopenharmony_ci aesd $dat0,q13 1015e1051a39Sopenharmony_ci aesimc $dat0,$dat0 1016e1051a39Sopenharmony_ci aesd $dat1,q13 1017e1051a39Sopenharmony_ci aesimc $dat1,$dat1 1018e1051a39Sopenharmony_ci aesd $dat2,q13 1019e1051a39Sopenharmony_ci aesimc $dat2,$dat2 1020e1051a39Sopenharmony_ci aesd $dat3,q13 1021e1051a39Sopenharmony_ci aesimc $dat3,$dat3 1022e1051a39Sopenharmony_ci aesd $dat4,q13 1023e1051a39Sopenharmony_ci aesimc $dat4,$dat4 1024e1051a39Sopenharmony_ci 1025e1051a39Sopenharmony_ci aesd $dat0,q14 1026e1051a39Sopenharmony_ci aesimc $dat0,$dat0 1027e1051a39Sopenharmony_ci aesd $dat1,q14 1028e1051a39Sopenharmony_ci aesimc $dat1,$dat1 1029e1051a39Sopenharmony_ci aesd $dat2,q14 1030e1051a39Sopenharmony_ci aesimc $dat2,$dat2 1031e1051a39Sopenharmony_ci aesd $dat3,q14 1032e1051a39Sopenharmony_ci aesimc $dat3,$dat3 1033e1051a39Sopenharmony_ci aesd $dat4,q14 1034e1051a39Sopenharmony_ci aesimc $dat4,$dat4 1035e1051a39Sopenharmony_ci 1036e1051a39Sopenharmony_ci aesd $dat0,q15 1037e1051a39Sopenharmony_ci vld1.8 {$in0},[$inp],#16 1038e1051a39Sopenharmony_ci aesd $dat1,q15 1039e1051a39Sopenharmony_ci vld1.8 {$in1},[$inp],#16 1040e1051a39Sopenharmony_ci aesd $dat2,q15 1041e1051a39Sopenharmony_ci vld1.8 {$in2},[$inp],#16 1042e1051a39Sopenharmony_ci aesd $dat3,q15 1043e1051a39Sopenharmony_ci vld1.8 {$in3},[$inp],#16 1044e1051a39Sopenharmony_ci aesd $dat4,q15 1045e1051a39Sopenharmony_ci vld1.8 {$in4},[$inp],#16 1046e1051a39Sopenharmony_ci cbz x6,.Lecb_tail4x 1047e1051a39Sopenharmony_ci vld1.32 {q8},[$key_],#16 // re-pre-load rndkey[0] 1048e1051a39Sopenharmony_ci veor $tmp0,$rndlast,$dat0 1049e1051a39Sopenharmony_ci vorr $dat0,$in0,$in0 1050e1051a39Sopenharmony_ci veor $tmp1,$rndlast,$dat1 1051e1051a39Sopenharmony_ci vorr $dat1,$in1,$in1 1052e1051a39Sopenharmony_ci veor $tmp2,$rndlast,$dat2 1053e1051a39Sopenharmony_ci vorr $dat2,$in2,$in2 1054e1051a39Sopenharmony_ci veor $tmp3,$rndlast,$dat3 1055e1051a39Sopenharmony_ci vorr $dat3,$in3,$in3 1056e1051a39Sopenharmony_ci veor $tmp4,$rndlast,$dat4 1057e1051a39Sopenharmony_ci vst1.8 {$tmp0},[$out],#16 1058e1051a39Sopenharmony_ci vorr $dat4,$in4,$in4 1059e1051a39Sopenharmony_ci vst1.8 {$tmp1},[$out],#16 1060e1051a39Sopenharmony_ci mov $cnt,$rounds 1061e1051a39Sopenharmony_ci vst1.8 {$tmp2},[$out],#16 1062e1051a39Sopenharmony_ci vld1.32 {q9},[$key_],#16 // re-pre-load rndkey[1] 1063e1051a39Sopenharmony_ci vst1.8 {$tmp3},[$out],#16 1064e1051a39Sopenharmony_ci vst1.8 {$tmp4},[$out],#16 1065e1051a39Sopenharmony_ci b.hs .Loop5x_ecb_dec 1066e1051a39Sopenharmony_ci 1067e1051a39Sopenharmony_ci add $len,$len,#0x50 1068e1051a39Sopenharmony_ci cbz $len,.Lecb_done 1069e1051a39Sopenharmony_ci 1070e1051a39Sopenharmony_ci add $cnt,$rounds,#2 1071e1051a39Sopenharmony_ci subs $len,$len,#0x30 1072e1051a39Sopenharmony_ci vorr $dat0,$in2,$in2 1073e1051a39Sopenharmony_ci vorr $dat1,$in3,$in3 1074e1051a39Sopenharmony_ci vorr $dat2,$in4,$in4 1075e1051a39Sopenharmony_ci b.lo .Lecb_dec_tail 1076e1051a39Sopenharmony_ci 1077e1051a39Sopenharmony_ci b .Loop3x_ecb_dec 1078e1051a39Sopenharmony_ci 1079e1051a39Sopenharmony_ci.align 4 1080e1051a39Sopenharmony_ci.Lecb_tail4x: 1081e1051a39Sopenharmony_ci veor $tmp1,$rndlast,$dat1 1082e1051a39Sopenharmony_ci veor $tmp2,$rndlast,$dat2 1083e1051a39Sopenharmony_ci veor $tmp3,$rndlast,$dat3 1084e1051a39Sopenharmony_ci veor $tmp4,$rndlast,$dat4 1085e1051a39Sopenharmony_ci vst1.8 {$tmp1},[$out],#16 1086e1051a39Sopenharmony_ci vst1.8 {$tmp2},[$out],#16 1087e1051a39Sopenharmony_ci vst1.8 {$tmp3},[$out],#16 1088e1051a39Sopenharmony_ci vst1.8 {$tmp4},[$out],#16 1089e1051a39Sopenharmony_ci 1090e1051a39Sopenharmony_ci b .Lecb_done 1091e1051a39Sopenharmony_ci.align 4 1092e1051a39Sopenharmony_ci___ 1093e1051a39Sopenharmony_ci$code.=<<___; 1094e1051a39Sopenharmony_ci.Loop3x_ecb_dec: 1095e1051a39Sopenharmony_ci aesd $dat0,q8 1096e1051a39Sopenharmony_ci aesimc $dat0,$dat0 1097e1051a39Sopenharmony_ci aesd $dat1,q8 1098e1051a39Sopenharmony_ci aesimc $dat1,$dat1 1099e1051a39Sopenharmony_ci aesd $dat2,q8 1100e1051a39Sopenharmony_ci aesimc $dat2,$dat2 1101e1051a39Sopenharmony_ci vld1.32 {q8},[$key_],#16 1102e1051a39Sopenharmony_ci subs $cnt,$cnt,#2 1103e1051a39Sopenharmony_ci aesd $dat0,q9 1104e1051a39Sopenharmony_ci aesimc $dat0,$dat0 1105e1051a39Sopenharmony_ci aesd $dat1,q9 1106e1051a39Sopenharmony_ci aesimc $dat1,$dat1 1107e1051a39Sopenharmony_ci aesd $dat2,q9 1108e1051a39Sopenharmony_ci aesimc $dat2,$dat2 1109e1051a39Sopenharmony_ci vld1.32 {q9},[$key_],#16 1110e1051a39Sopenharmony_ci b.gt .Loop3x_ecb_dec 1111e1051a39Sopenharmony_ci 1112e1051a39Sopenharmony_ci aesd $dat0,q8 1113e1051a39Sopenharmony_ci aesimc $dat0,$dat0 1114e1051a39Sopenharmony_ci aesd $dat1,q8 1115e1051a39Sopenharmony_ci aesimc $dat1,$dat1 1116e1051a39Sopenharmony_ci aesd $dat2,q8 1117e1051a39Sopenharmony_ci aesimc $dat2,$dat2 1118e1051a39Sopenharmony_ci subs $len,$len,#0x30 1119e1051a39Sopenharmony_ci mov.lo x6,$len // x6, $cnt, is zero at this point 1120e1051a39Sopenharmony_ci aesd $dat0,q9 1121e1051a39Sopenharmony_ci aesimc $dat0,$dat0 1122e1051a39Sopenharmony_ci aesd $dat1,q9 1123e1051a39Sopenharmony_ci aesimc $dat1,$dat1 1124e1051a39Sopenharmony_ci aesd $dat2,q9 1125e1051a39Sopenharmony_ci aesimc $dat2,$dat2 1126e1051a39Sopenharmony_ci add $inp,$inp,x6 // $inp is adjusted in such way that 1127e1051a39Sopenharmony_ci // at exit from the loop $dat1-$dat2 1128e1051a39Sopenharmony_ci // are loaded with last "words" 1129e1051a39Sopenharmony_ci mov $key_,$key 1130e1051a39Sopenharmony_ci aesd $dat0,q12 1131e1051a39Sopenharmony_ci aesimc $dat0,$dat0 1132e1051a39Sopenharmony_ci aesd $dat1,q12 1133e1051a39Sopenharmony_ci aesimc $dat1,$dat1 1134e1051a39Sopenharmony_ci aesd $dat2,q12 1135e1051a39Sopenharmony_ci aesimc $dat2,$dat2 1136e1051a39Sopenharmony_ci vld1.8 {$in0},[$inp],#16 1137e1051a39Sopenharmony_ci aesd $dat0,q13 1138e1051a39Sopenharmony_ci aesimc $dat0,$dat0 1139e1051a39Sopenharmony_ci aesd $dat1,q13 1140e1051a39Sopenharmony_ci aesimc $dat1,$dat1 1141e1051a39Sopenharmony_ci aesd $dat2,q13 1142e1051a39Sopenharmony_ci aesimc $dat2,$dat2 1143e1051a39Sopenharmony_ci vld1.8 {$in1},[$inp],#16 1144e1051a39Sopenharmony_ci aesd $dat0,q14 1145e1051a39Sopenharmony_ci aesimc $dat0,$dat0 1146e1051a39Sopenharmony_ci aesd $dat1,q14 1147e1051a39Sopenharmony_ci aesimc $dat1,$dat1 1148e1051a39Sopenharmony_ci aesd $dat2,q14 1149e1051a39Sopenharmony_ci aesimc $dat2,$dat2 1150e1051a39Sopenharmony_ci vld1.8 {$in2},[$inp],#16 1151e1051a39Sopenharmony_ci aesd $dat0,q15 1152e1051a39Sopenharmony_ci aesd $dat1,q15 1153e1051a39Sopenharmony_ci aesd $dat2,q15 1154e1051a39Sopenharmony_ci vld1.32 {q8},[$key_],#16 // re-pre-load rndkey[0] 1155e1051a39Sopenharmony_ci add $cnt,$rounds,#2 1156e1051a39Sopenharmony_ci veor $tmp0,$rndlast,$dat0 1157e1051a39Sopenharmony_ci veor $tmp1,$rndlast,$dat1 1158e1051a39Sopenharmony_ci veor $dat2,$dat2,$rndlast 1159e1051a39Sopenharmony_ci vld1.32 {q9},[$key_],#16 // re-pre-load rndkey[1] 1160e1051a39Sopenharmony_ci vst1.8 {$tmp0},[$out],#16 1161e1051a39Sopenharmony_ci vorr $dat0,$in0,$in0 1162e1051a39Sopenharmony_ci vst1.8 {$tmp1},[$out],#16 1163e1051a39Sopenharmony_ci vorr $dat1,$in1,$in1 1164e1051a39Sopenharmony_ci vst1.8 {$dat2},[$out],#16 1165e1051a39Sopenharmony_ci vorr $dat2,$in2,$in2 1166e1051a39Sopenharmony_ci b.hs .Loop3x_ecb_dec 1167e1051a39Sopenharmony_ci 1168e1051a39Sopenharmony_ci cmn $len,#0x30 1169e1051a39Sopenharmony_ci b.eq .Lecb_done 1170e1051a39Sopenharmony_ci nop 1171e1051a39Sopenharmony_ci 1172e1051a39Sopenharmony_ci.Lecb_dec_tail: 1173e1051a39Sopenharmony_ci aesd $dat1,q8 1174e1051a39Sopenharmony_ci aesimc $dat1,$dat1 1175e1051a39Sopenharmony_ci aesd $dat2,q8 1176e1051a39Sopenharmony_ci aesimc $dat2,$dat2 1177e1051a39Sopenharmony_ci vld1.32 {q8},[$key_],#16 1178e1051a39Sopenharmony_ci subs $cnt,$cnt,#2 1179e1051a39Sopenharmony_ci aesd $dat1,q9 1180e1051a39Sopenharmony_ci aesimc $dat1,$dat1 1181e1051a39Sopenharmony_ci aesd $dat2,q9 1182e1051a39Sopenharmony_ci aesimc $dat2,$dat2 1183e1051a39Sopenharmony_ci vld1.32 {q9},[$key_],#16 1184e1051a39Sopenharmony_ci b.gt .Lecb_dec_tail 1185e1051a39Sopenharmony_ci 1186e1051a39Sopenharmony_ci aesd $dat1,q8 1187e1051a39Sopenharmony_ci aesimc $dat1,$dat1 1188e1051a39Sopenharmony_ci aesd $dat2,q8 1189e1051a39Sopenharmony_ci aesimc $dat2,$dat2 1190e1051a39Sopenharmony_ci aesd $dat1,q9 1191e1051a39Sopenharmony_ci aesimc $dat1,$dat1 1192e1051a39Sopenharmony_ci aesd $dat2,q9 1193e1051a39Sopenharmony_ci aesimc $dat2,$dat2 1194e1051a39Sopenharmony_ci aesd $dat1,q12 1195e1051a39Sopenharmony_ci aesimc $dat1,$dat1 1196e1051a39Sopenharmony_ci aesd $dat2,q12 1197e1051a39Sopenharmony_ci aesimc $dat2,$dat2 1198e1051a39Sopenharmony_ci cmn $len,#0x20 1199e1051a39Sopenharmony_ci aesd $dat1,q13 1200e1051a39Sopenharmony_ci aesimc $dat1,$dat1 1201e1051a39Sopenharmony_ci aesd $dat2,q13 1202e1051a39Sopenharmony_ci aesimc $dat2,$dat2 1203e1051a39Sopenharmony_ci aesd $dat1,q14 1204e1051a39Sopenharmony_ci aesimc $dat1,$dat1 1205e1051a39Sopenharmony_ci aesd $dat2,q14 1206e1051a39Sopenharmony_ci aesimc $dat2,$dat2 1207e1051a39Sopenharmony_ci aesd $dat1,q15 1208e1051a39Sopenharmony_ci aesd $dat2,q15 1209e1051a39Sopenharmony_ci b.eq .Lecb_dec_one 1210e1051a39Sopenharmony_ci veor $tmp1,$rndlast,$dat1 1211e1051a39Sopenharmony_ci veor $tmp2,$rndlast,$dat2 1212e1051a39Sopenharmony_ci vst1.8 {$tmp1},[$out],#16 1213e1051a39Sopenharmony_ci vst1.8 {$tmp2},[$out],#16 1214e1051a39Sopenharmony_ci b .Lecb_done 1215e1051a39Sopenharmony_ci 1216e1051a39Sopenharmony_ci.Lecb_dec_one: 1217e1051a39Sopenharmony_ci veor $tmp1,$rndlast,$dat2 1218e1051a39Sopenharmony_ci vst1.8 {$tmp1},[$out],#16 1219e1051a39Sopenharmony_ci 1220e1051a39Sopenharmony_ci.Lecb_done: 1221e1051a39Sopenharmony_ci___ 1222e1051a39Sopenharmony_ci} 1223e1051a39Sopenharmony_ci$code.=<<___ if ($flavour !~ /64/); 1224e1051a39Sopenharmony_ci vldmia sp!,{d8-d15} 1225e1051a39Sopenharmony_ci ldmia sp!,{r4-r8,pc} 1226e1051a39Sopenharmony_ci___ 1227e1051a39Sopenharmony_ci$code.=<<___ if ($flavour =~ /64/); 1228e1051a39Sopenharmony_ci ldr x29,[sp],#16 1229e1051a39Sopenharmony_ci___ 1230e1051a39Sopenharmony_ci$code.=<<___ if ($flavour =~ /64/); 1231e1051a39Sopenharmony_ci.Lecb_Final_abort: 1232e1051a39Sopenharmony_ci ret 1233e1051a39Sopenharmony_ci___ 1234e1051a39Sopenharmony_ci$code.=<<___; 1235e1051a39Sopenharmony_ci.size ${prefix}_ecb_encrypt,.-${prefix}_ecb_encrypt 1236e1051a39Sopenharmony_ci___ 1237e1051a39Sopenharmony_ci}}} 1238e1051a39Sopenharmony_ci{{{ 1239e1051a39Sopenharmony_cimy ($inp,$out,$len,$key,$ivp)=map("x$_",(0..4)); my $enc="w5"; 1240e1051a39Sopenharmony_cimy ($rounds,$cnt,$key_,$step,$step1)=($enc,"w6","x7","x8","x12"); 1241e1051a39Sopenharmony_cimy ($dat0,$dat1,$in0,$in1,$tmp0,$tmp1,$ivec,$rndlast)=map("q$_",(0..7)); 1242e1051a39Sopenharmony_ci 1243e1051a39Sopenharmony_cimy ($dat,$tmp,$rndzero_n_last)=($dat0,$tmp0,$tmp1); 1244e1051a39Sopenharmony_cimy ($key4,$key5,$key6,$key7)=("x6","x12","x14",$key); 1245e1051a39Sopenharmony_ci 1246e1051a39Sopenharmony_ci### q8-q15 preloaded key schedule 1247e1051a39Sopenharmony_ci 1248e1051a39Sopenharmony_ci$code.=<<___; 1249e1051a39Sopenharmony_ci.globl ${prefix}_cbc_encrypt 1250e1051a39Sopenharmony_ci.type ${prefix}_cbc_encrypt,%function 1251e1051a39Sopenharmony_ci.align 5 1252e1051a39Sopenharmony_ci${prefix}_cbc_encrypt: 1253e1051a39Sopenharmony_ci___ 1254e1051a39Sopenharmony_ci$code.=<<___ if ($flavour =~ /64/); 1255e1051a39Sopenharmony_ci stp x29,x30,[sp,#-16]! 1256e1051a39Sopenharmony_ci add x29,sp,#0 1257e1051a39Sopenharmony_ci___ 1258e1051a39Sopenharmony_ci$code.=<<___ if ($flavour !~ /64/); 1259e1051a39Sopenharmony_ci mov ip,sp 1260e1051a39Sopenharmony_ci stmdb sp!,{r4-r8,lr} 1261e1051a39Sopenharmony_ci vstmdb sp!,{d8-d15} @ ABI specification says so 1262e1051a39Sopenharmony_ci ldmia ip,{r4-r5} @ load remaining args 1263e1051a39Sopenharmony_ci___ 1264e1051a39Sopenharmony_ci$code.=<<___; 1265e1051a39Sopenharmony_ci subs $len,$len,#16 1266e1051a39Sopenharmony_ci mov $step,#16 1267e1051a39Sopenharmony_ci b.lo .Lcbc_abort 1268e1051a39Sopenharmony_ci cclr $step,eq 1269e1051a39Sopenharmony_ci 1270e1051a39Sopenharmony_ci cmp $enc,#0 // en- or decrypting? 1271e1051a39Sopenharmony_ci ldr $rounds,[$key,#240] 1272e1051a39Sopenharmony_ci and $len,$len,#-16 1273e1051a39Sopenharmony_ci vld1.8 {$ivec},[$ivp] 1274e1051a39Sopenharmony_ci vld1.8 {$dat},[$inp],$step 1275e1051a39Sopenharmony_ci 1276e1051a39Sopenharmony_ci vld1.32 {q8-q9},[$key] // load key schedule... 1277e1051a39Sopenharmony_ci sub $rounds,$rounds,#6 1278e1051a39Sopenharmony_ci add $key_,$key,x5,lsl#4 // pointer to last 7 round keys 1279e1051a39Sopenharmony_ci sub $rounds,$rounds,#2 1280e1051a39Sopenharmony_ci vld1.32 {q10-q11},[$key_],#32 1281e1051a39Sopenharmony_ci vld1.32 {q12-q13},[$key_],#32 1282e1051a39Sopenharmony_ci vld1.32 {q14-q15},[$key_],#32 1283e1051a39Sopenharmony_ci vld1.32 {$rndlast},[$key_] 1284e1051a39Sopenharmony_ci 1285e1051a39Sopenharmony_ci add $key_,$key,#32 1286e1051a39Sopenharmony_ci mov $cnt,$rounds 1287e1051a39Sopenharmony_ci b.eq .Lcbc_dec 1288e1051a39Sopenharmony_ci 1289e1051a39Sopenharmony_ci cmp $rounds,#2 1290e1051a39Sopenharmony_ci veor $dat,$dat,$ivec 1291e1051a39Sopenharmony_ci veor $rndzero_n_last,q8,$rndlast 1292e1051a39Sopenharmony_ci b.eq .Lcbc_enc128 1293e1051a39Sopenharmony_ci 1294e1051a39Sopenharmony_ci vld1.32 {$in0-$in1},[$key_] 1295e1051a39Sopenharmony_ci add $key_,$key,#16 1296e1051a39Sopenharmony_ci add $key4,$key,#16*4 1297e1051a39Sopenharmony_ci add $key5,$key,#16*5 1298e1051a39Sopenharmony_ci aese $dat,q8 1299e1051a39Sopenharmony_ci aesmc $dat,$dat 1300e1051a39Sopenharmony_ci add $key6,$key,#16*6 1301e1051a39Sopenharmony_ci add $key7,$key,#16*7 1302e1051a39Sopenharmony_ci b .Lenter_cbc_enc 1303e1051a39Sopenharmony_ci 1304e1051a39Sopenharmony_ci.align 4 1305e1051a39Sopenharmony_ci.Loop_cbc_enc: 1306e1051a39Sopenharmony_ci aese $dat,q8 1307e1051a39Sopenharmony_ci aesmc $dat,$dat 1308e1051a39Sopenharmony_ci vst1.8 {$ivec},[$out],#16 1309e1051a39Sopenharmony_ci.Lenter_cbc_enc: 1310e1051a39Sopenharmony_ci aese $dat,q9 1311e1051a39Sopenharmony_ci aesmc $dat,$dat 1312e1051a39Sopenharmony_ci aese $dat,$in0 1313e1051a39Sopenharmony_ci aesmc $dat,$dat 1314e1051a39Sopenharmony_ci vld1.32 {q8},[$key4] 1315e1051a39Sopenharmony_ci cmp $rounds,#4 1316e1051a39Sopenharmony_ci aese $dat,$in1 1317e1051a39Sopenharmony_ci aesmc $dat,$dat 1318e1051a39Sopenharmony_ci vld1.32 {q9},[$key5] 1319e1051a39Sopenharmony_ci b.eq .Lcbc_enc192 1320e1051a39Sopenharmony_ci 1321e1051a39Sopenharmony_ci aese $dat,q8 1322e1051a39Sopenharmony_ci aesmc $dat,$dat 1323e1051a39Sopenharmony_ci vld1.32 {q8},[$key6] 1324e1051a39Sopenharmony_ci aese $dat,q9 1325e1051a39Sopenharmony_ci aesmc $dat,$dat 1326e1051a39Sopenharmony_ci vld1.32 {q9},[$key7] 1327e1051a39Sopenharmony_ci nop 1328e1051a39Sopenharmony_ci 1329e1051a39Sopenharmony_ci.Lcbc_enc192: 1330e1051a39Sopenharmony_ci aese $dat,q8 1331e1051a39Sopenharmony_ci aesmc $dat,$dat 1332e1051a39Sopenharmony_ci subs $len,$len,#16 1333e1051a39Sopenharmony_ci aese $dat,q9 1334e1051a39Sopenharmony_ci aesmc $dat,$dat 1335e1051a39Sopenharmony_ci cclr $step,eq 1336e1051a39Sopenharmony_ci aese $dat,q10 1337e1051a39Sopenharmony_ci aesmc $dat,$dat 1338e1051a39Sopenharmony_ci aese $dat,q11 1339e1051a39Sopenharmony_ci aesmc $dat,$dat 1340e1051a39Sopenharmony_ci vld1.8 {q8},[$inp],$step 1341e1051a39Sopenharmony_ci aese $dat,q12 1342e1051a39Sopenharmony_ci aesmc $dat,$dat 1343e1051a39Sopenharmony_ci veor q8,q8,$rndzero_n_last 1344e1051a39Sopenharmony_ci aese $dat,q13 1345e1051a39Sopenharmony_ci aesmc $dat,$dat 1346e1051a39Sopenharmony_ci vld1.32 {q9},[$key_] // re-pre-load rndkey[1] 1347e1051a39Sopenharmony_ci aese $dat,q14 1348e1051a39Sopenharmony_ci aesmc $dat,$dat 1349e1051a39Sopenharmony_ci aese $dat,q15 1350e1051a39Sopenharmony_ci veor $ivec,$dat,$rndlast 1351e1051a39Sopenharmony_ci b.hs .Loop_cbc_enc 1352e1051a39Sopenharmony_ci 1353e1051a39Sopenharmony_ci vst1.8 {$ivec},[$out],#16 1354e1051a39Sopenharmony_ci b .Lcbc_done 1355e1051a39Sopenharmony_ci 1356e1051a39Sopenharmony_ci.align 5 1357e1051a39Sopenharmony_ci.Lcbc_enc128: 1358e1051a39Sopenharmony_ci vld1.32 {$in0-$in1},[$key_] 1359e1051a39Sopenharmony_ci aese $dat,q8 1360e1051a39Sopenharmony_ci aesmc $dat,$dat 1361e1051a39Sopenharmony_ci b .Lenter_cbc_enc128 1362e1051a39Sopenharmony_ci.Loop_cbc_enc128: 1363e1051a39Sopenharmony_ci aese $dat,q8 1364e1051a39Sopenharmony_ci aesmc $dat,$dat 1365e1051a39Sopenharmony_ci vst1.8 {$ivec},[$out],#16 1366e1051a39Sopenharmony_ci.Lenter_cbc_enc128: 1367e1051a39Sopenharmony_ci aese $dat,q9 1368e1051a39Sopenharmony_ci aesmc $dat,$dat 1369e1051a39Sopenharmony_ci subs $len,$len,#16 1370e1051a39Sopenharmony_ci aese $dat,$in0 1371e1051a39Sopenharmony_ci aesmc $dat,$dat 1372e1051a39Sopenharmony_ci cclr $step,eq 1373e1051a39Sopenharmony_ci aese $dat,$in1 1374e1051a39Sopenharmony_ci aesmc $dat,$dat 1375e1051a39Sopenharmony_ci aese $dat,q10 1376e1051a39Sopenharmony_ci aesmc $dat,$dat 1377e1051a39Sopenharmony_ci aese $dat,q11 1378e1051a39Sopenharmony_ci aesmc $dat,$dat 1379e1051a39Sopenharmony_ci vld1.8 {q8},[$inp],$step 1380e1051a39Sopenharmony_ci aese $dat,q12 1381e1051a39Sopenharmony_ci aesmc $dat,$dat 1382e1051a39Sopenharmony_ci aese $dat,q13 1383e1051a39Sopenharmony_ci aesmc $dat,$dat 1384e1051a39Sopenharmony_ci aese $dat,q14 1385e1051a39Sopenharmony_ci aesmc $dat,$dat 1386e1051a39Sopenharmony_ci veor q8,q8,$rndzero_n_last 1387e1051a39Sopenharmony_ci aese $dat,q15 1388e1051a39Sopenharmony_ci veor $ivec,$dat,$rndlast 1389e1051a39Sopenharmony_ci b.hs .Loop_cbc_enc128 1390e1051a39Sopenharmony_ci 1391e1051a39Sopenharmony_ci vst1.8 {$ivec},[$out],#16 1392e1051a39Sopenharmony_ci b .Lcbc_done 1393e1051a39Sopenharmony_ci___ 1394e1051a39Sopenharmony_ci{ 1395e1051a39Sopenharmony_cimy ($dat2,$in2,$tmp2)=map("q$_",(10,11,9)); 1396e1051a39Sopenharmony_ci 1397e1051a39Sopenharmony_cimy ($dat3,$in3,$tmp3); # used only in 64-bit mode 1398e1051a39Sopenharmony_cimy ($dat4,$in4,$tmp4); 1399e1051a39Sopenharmony_ciif ($flavour =~ /64/) { 1400e1051a39Sopenharmony_ci ($dat2,$dat3,$dat4,$in2,$in3,$in4,$tmp3,$tmp4)=map("q$_",(16..23)); 1401e1051a39Sopenharmony_ci} 1402e1051a39Sopenharmony_ci 1403e1051a39Sopenharmony_ci$code.=<<___; 1404e1051a39Sopenharmony_ci.align 5 1405e1051a39Sopenharmony_ci.Lcbc_dec: 1406e1051a39Sopenharmony_ci vld1.8 {$dat2},[$inp],#16 1407e1051a39Sopenharmony_ci subs $len,$len,#32 // bias 1408e1051a39Sopenharmony_ci add $cnt,$rounds,#2 1409e1051a39Sopenharmony_ci vorr $in1,$dat,$dat 1410e1051a39Sopenharmony_ci vorr $dat1,$dat,$dat 1411e1051a39Sopenharmony_ci vorr $in2,$dat2,$dat2 1412e1051a39Sopenharmony_ci b.lo .Lcbc_dec_tail 1413e1051a39Sopenharmony_ci 1414e1051a39Sopenharmony_ci vorr $dat1,$dat2,$dat2 1415e1051a39Sopenharmony_ci vld1.8 {$dat2},[$inp],#16 1416e1051a39Sopenharmony_ci vorr $in0,$dat,$dat 1417e1051a39Sopenharmony_ci vorr $in1,$dat1,$dat1 1418e1051a39Sopenharmony_ci vorr $in2,$dat2,$dat2 1419e1051a39Sopenharmony_ci___ 1420e1051a39Sopenharmony_ci$code.=<<___ if ($flavour =~ /64/); 1421e1051a39Sopenharmony_ci cmp $len,#32 1422e1051a39Sopenharmony_ci b.lo .Loop3x_cbc_dec 1423e1051a39Sopenharmony_ci 1424e1051a39Sopenharmony_ci vld1.8 {$dat3},[$inp],#16 1425e1051a39Sopenharmony_ci vld1.8 {$dat4},[$inp],#16 1426e1051a39Sopenharmony_ci sub $len,$len,#32 // bias 1427e1051a39Sopenharmony_ci mov $cnt,$rounds 1428e1051a39Sopenharmony_ci vorr $in3,$dat3,$dat3 1429e1051a39Sopenharmony_ci vorr $in4,$dat4,$dat4 1430e1051a39Sopenharmony_ci 1431e1051a39Sopenharmony_ci.Loop5x_cbc_dec: 1432e1051a39Sopenharmony_ci aesd $dat0,q8 1433e1051a39Sopenharmony_ci aesimc $dat0,$dat0 1434e1051a39Sopenharmony_ci aesd $dat1,q8 1435e1051a39Sopenharmony_ci aesimc $dat1,$dat1 1436e1051a39Sopenharmony_ci aesd $dat2,q8 1437e1051a39Sopenharmony_ci aesimc $dat2,$dat2 1438e1051a39Sopenharmony_ci aesd $dat3,q8 1439e1051a39Sopenharmony_ci aesimc $dat3,$dat3 1440e1051a39Sopenharmony_ci aesd $dat4,q8 1441e1051a39Sopenharmony_ci aesimc $dat4,$dat4 1442e1051a39Sopenharmony_ci vld1.32 {q8},[$key_],#16 1443e1051a39Sopenharmony_ci subs $cnt,$cnt,#2 1444e1051a39Sopenharmony_ci aesd $dat0,q9 1445e1051a39Sopenharmony_ci aesimc $dat0,$dat0 1446e1051a39Sopenharmony_ci aesd $dat1,q9 1447e1051a39Sopenharmony_ci aesimc $dat1,$dat1 1448e1051a39Sopenharmony_ci aesd $dat2,q9 1449e1051a39Sopenharmony_ci aesimc $dat2,$dat2 1450e1051a39Sopenharmony_ci aesd $dat3,q9 1451e1051a39Sopenharmony_ci aesimc $dat3,$dat3 1452e1051a39Sopenharmony_ci aesd $dat4,q9 1453e1051a39Sopenharmony_ci aesimc $dat4,$dat4 1454e1051a39Sopenharmony_ci vld1.32 {q9},[$key_],#16 1455e1051a39Sopenharmony_ci b.gt .Loop5x_cbc_dec 1456e1051a39Sopenharmony_ci 1457e1051a39Sopenharmony_ci aesd $dat0,q8 1458e1051a39Sopenharmony_ci aesimc $dat0,$dat0 1459e1051a39Sopenharmony_ci aesd $dat1,q8 1460e1051a39Sopenharmony_ci aesimc $dat1,$dat1 1461e1051a39Sopenharmony_ci aesd $dat2,q8 1462e1051a39Sopenharmony_ci aesimc $dat2,$dat2 1463e1051a39Sopenharmony_ci aesd $dat3,q8 1464e1051a39Sopenharmony_ci aesimc $dat3,$dat3 1465e1051a39Sopenharmony_ci aesd $dat4,q8 1466e1051a39Sopenharmony_ci aesimc $dat4,$dat4 1467e1051a39Sopenharmony_ci cmp $len,#0x40 // because .Lcbc_tail4x 1468e1051a39Sopenharmony_ci sub $len,$len,#0x50 1469e1051a39Sopenharmony_ci 1470e1051a39Sopenharmony_ci aesd $dat0,q9 1471e1051a39Sopenharmony_ci aesimc $dat0,$dat0 1472e1051a39Sopenharmony_ci aesd $dat1,q9 1473e1051a39Sopenharmony_ci aesimc $dat1,$dat1 1474e1051a39Sopenharmony_ci aesd $dat2,q9 1475e1051a39Sopenharmony_ci aesimc $dat2,$dat2 1476e1051a39Sopenharmony_ci aesd $dat3,q9 1477e1051a39Sopenharmony_ci aesimc $dat3,$dat3 1478e1051a39Sopenharmony_ci aesd $dat4,q9 1479e1051a39Sopenharmony_ci aesimc $dat4,$dat4 1480e1051a39Sopenharmony_ci csel x6,xzr,$len,gt // borrow x6, $cnt, "gt" is not typo 1481e1051a39Sopenharmony_ci mov $key_,$key 1482e1051a39Sopenharmony_ci 1483e1051a39Sopenharmony_ci aesd $dat0,q10 1484e1051a39Sopenharmony_ci aesimc $dat0,$dat0 1485e1051a39Sopenharmony_ci aesd $dat1,q10 1486e1051a39Sopenharmony_ci aesimc $dat1,$dat1 1487e1051a39Sopenharmony_ci aesd $dat2,q10 1488e1051a39Sopenharmony_ci aesimc $dat2,$dat2 1489e1051a39Sopenharmony_ci aesd $dat3,q10 1490e1051a39Sopenharmony_ci aesimc $dat3,$dat3 1491e1051a39Sopenharmony_ci aesd $dat4,q10 1492e1051a39Sopenharmony_ci aesimc $dat4,$dat4 1493e1051a39Sopenharmony_ci add $inp,$inp,x6 // $inp is adjusted in such way that 1494e1051a39Sopenharmony_ci // at exit from the loop $dat1-$dat4 1495e1051a39Sopenharmony_ci // are loaded with last "words" 1496e1051a39Sopenharmony_ci add x6,$len,#0x60 // because .Lcbc_tail4x 1497e1051a39Sopenharmony_ci 1498e1051a39Sopenharmony_ci aesd $dat0,q11 1499e1051a39Sopenharmony_ci aesimc $dat0,$dat0 1500e1051a39Sopenharmony_ci aesd $dat1,q11 1501e1051a39Sopenharmony_ci aesimc $dat1,$dat1 1502e1051a39Sopenharmony_ci aesd $dat2,q11 1503e1051a39Sopenharmony_ci aesimc $dat2,$dat2 1504e1051a39Sopenharmony_ci aesd $dat3,q11 1505e1051a39Sopenharmony_ci aesimc $dat3,$dat3 1506e1051a39Sopenharmony_ci aesd $dat4,q11 1507e1051a39Sopenharmony_ci aesimc $dat4,$dat4 1508e1051a39Sopenharmony_ci 1509e1051a39Sopenharmony_ci aesd $dat0,q12 1510e1051a39Sopenharmony_ci aesimc $dat0,$dat0 1511e1051a39Sopenharmony_ci aesd $dat1,q12 1512e1051a39Sopenharmony_ci aesimc $dat1,$dat1 1513e1051a39Sopenharmony_ci aesd $dat2,q12 1514e1051a39Sopenharmony_ci aesimc $dat2,$dat2 1515e1051a39Sopenharmony_ci aesd $dat3,q12 1516e1051a39Sopenharmony_ci aesimc $dat3,$dat3 1517e1051a39Sopenharmony_ci aesd $dat4,q12 1518e1051a39Sopenharmony_ci aesimc $dat4,$dat4 1519e1051a39Sopenharmony_ci 1520e1051a39Sopenharmony_ci aesd $dat0,q13 1521e1051a39Sopenharmony_ci aesimc $dat0,$dat0 1522e1051a39Sopenharmony_ci aesd $dat1,q13 1523e1051a39Sopenharmony_ci aesimc $dat1,$dat1 1524e1051a39Sopenharmony_ci aesd $dat2,q13 1525e1051a39Sopenharmony_ci aesimc $dat2,$dat2 1526e1051a39Sopenharmony_ci aesd $dat3,q13 1527e1051a39Sopenharmony_ci aesimc $dat3,$dat3 1528e1051a39Sopenharmony_ci aesd $dat4,q13 1529e1051a39Sopenharmony_ci aesimc $dat4,$dat4 1530e1051a39Sopenharmony_ci 1531e1051a39Sopenharmony_ci aesd $dat0,q14 1532e1051a39Sopenharmony_ci aesimc $dat0,$dat0 1533e1051a39Sopenharmony_ci aesd $dat1,q14 1534e1051a39Sopenharmony_ci aesimc $dat1,$dat1 1535e1051a39Sopenharmony_ci aesd $dat2,q14 1536e1051a39Sopenharmony_ci aesimc $dat2,$dat2 1537e1051a39Sopenharmony_ci aesd $dat3,q14 1538e1051a39Sopenharmony_ci aesimc $dat3,$dat3 1539e1051a39Sopenharmony_ci aesd $dat4,q14 1540e1051a39Sopenharmony_ci aesimc $dat4,$dat4 1541e1051a39Sopenharmony_ci 1542e1051a39Sopenharmony_ci veor $tmp0,$ivec,$rndlast 1543e1051a39Sopenharmony_ci aesd $dat0,q15 1544e1051a39Sopenharmony_ci veor $tmp1,$in0,$rndlast 1545e1051a39Sopenharmony_ci vld1.8 {$in0},[$inp],#16 1546e1051a39Sopenharmony_ci aesd $dat1,q15 1547e1051a39Sopenharmony_ci veor $tmp2,$in1,$rndlast 1548e1051a39Sopenharmony_ci vld1.8 {$in1},[$inp],#16 1549e1051a39Sopenharmony_ci aesd $dat2,q15 1550e1051a39Sopenharmony_ci veor $tmp3,$in2,$rndlast 1551e1051a39Sopenharmony_ci vld1.8 {$in2},[$inp],#16 1552e1051a39Sopenharmony_ci aesd $dat3,q15 1553e1051a39Sopenharmony_ci veor $tmp4,$in3,$rndlast 1554e1051a39Sopenharmony_ci vld1.8 {$in3},[$inp],#16 1555e1051a39Sopenharmony_ci aesd $dat4,q15 1556e1051a39Sopenharmony_ci vorr $ivec,$in4,$in4 1557e1051a39Sopenharmony_ci vld1.8 {$in4},[$inp],#16 1558e1051a39Sopenharmony_ci cbz x6,.Lcbc_tail4x 1559e1051a39Sopenharmony_ci vld1.32 {q8},[$key_],#16 // re-pre-load rndkey[0] 1560e1051a39Sopenharmony_ci veor $tmp0,$tmp0,$dat0 1561e1051a39Sopenharmony_ci vorr $dat0,$in0,$in0 1562e1051a39Sopenharmony_ci veor $tmp1,$tmp1,$dat1 1563e1051a39Sopenharmony_ci vorr $dat1,$in1,$in1 1564e1051a39Sopenharmony_ci veor $tmp2,$tmp2,$dat2 1565e1051a39Sopenharmony_ci vorr $dat2,$in2,$in2 1566e1051a39Sopenharmony_ci veor $tmp3,$tmp3,$dat3 1567e1051a39Sopenharmony_ci vorr $dat3,$in3,$in3 1568e1051a39Sopenharmony_ci veor $tmp4,$tmp4,$dat4 1569e1051a39Sopenharmony_ci vst1.8 {$tmp0},[$out],#16 1570e1051a39Sopenharmony_ci vorr $dat4,$in4,$in4 1571e1051a39Sopenharmony_ci vst1.8 {$tmp1},[$out],#16 1572e1051a39Sopenharmony_ci mov $cnt,$rounds 1573e1051a39Sopenharmony_ci vst1.8 {$tmp2},[$out],#16 1574e1051a39Sopenharmony_ci vld1.32 {q9},[$key_],#16 // re-pre-load rndkey[1] 1575e1051a39Sopenharmony_ci vst1.8 {$tmp3},[$out],#16 1576e1051a39Sopenharmony_ci vst1.8 {$tmp4},[$out],#16 1577e1051a39Sopenharmony_ci b.hs .Loop5x_cbc_dec 1578e1051a39Sopenharmony_ci 1579e1051a39Sopenharmony_ci add $len,$len,#0x50 1580e1051a39Sopenharmony_ci cbz $len,.Lcbc_done 1581e1051a39Sopenharmony_ci 1582e1051a39Sopenharmony_ci add $cnt,$rounds,#2 1583e1051a39Sopenharmony_ci subs $len,$len,#0x30 1584e1051a39Sopenharmony_ci vorr $dat0,$in2,$in2 1585e1051a39Sopenharmony_ci vorr $in0,$in2,$in2 1586e1051a39Sopenharmony_ci vorr $dat1,$in3,$in3 1587e1051a39Sopenharmony_ci vorr $in1,$in3,$in3 1588e1051a39Sopenharmony_ci vorr $dat2,$in4,$in4 1589e1051a39Sopenharmony_ci vorr $in2,$in4,$in4 1590e1051a39Sopenharmony_ci b.lo .Lcbc_dec_tail 1591e1051a39Sopenharmony_ci 1592e1051a39Sopenharmony_ci b .Loop3x_cbc_dec 1593e1051a39Sopenharmony_ci 1594e1051a39Sopenharmony_ci.align 4 1595e1051a39Sopenharmony_ci.Lcbc_tail4x: 1596e1051a39Sopenharmony_ci veor $tmp1,$tmp0,$dat1 1597e1051a39Sopenharmony_ci veor $tmp2,$tmp2,$dat2 1598e1051a39Sopenharmony_ci veor $tmp3,$tmp3,$dat3 1599e1051a39Sopenharmony_ci veor $tmp4,$tmp4,$dat4 1600e1051a39Sopenharmony_ci vst1.8 {$tmp1},[$out],#16 1601e1051a39Sopenharmony_ci vst1.8 {$tmp2},[$out],#16 1602e1051a39Sopenharmony_ci vst1.8 {$tmp3},[$out],#16 1603e1051a39Sopenharmony_ci vst1.8 {$tmp4},[$out],#16 1604e1051a39Sopenharmony_ci 1605e1051a39Sopenharmony_ci b .Lcbc_done 1606e1051a39Sopenharmony_ci.align 4 1607e1051a39Sopenharmony_ci___ 1608e1051a39Sopenharmony_ci$code.=<<___; 1609e1051a39Sopenharmony_ci.Loop3x_cbc_dec: 1610e1051a39Sopenharmony_ci aesd $dat0,q8 1611e1051a39Sopenharmony_ci aesimc $dat0,$dat0 1612e1051a39Sopenharmony_ci aesd $dat1,q8 1613e1051a39Sopenharmony_ci aesimc $dat1,$dat1 1614e1051a39Sopenharmony_ci aesd $dat2,q8 1615e1051a39Sopenharmony_ci aesimc $dat2,$dat2 1616e1051a39Sopenharmony_ci vld1.32 {q8},[$key_],#16 1617e1051a39Sopenharmony_ci subs $cnt,$cnt,#2 1618e1051a39Sopenharmony_ci aesd $dat0,q9 1619e1051a39Sopenharmony_ci aesimc $dat0,$dat0 1620e1051a39Sopenharmony_ci aesd $dat1,q9 1621e1051a39Sopenharmony_ci aesimc $dat1,$dat1 1622e1051a39Sopenharmony_ci aesd $dat2,q9 1623e1051a39Sopenharmony_ci aesimc $dat2,$dat2 1624e1051a39Sopenharmony_ci vld1.32 {q9},[$key_],#16 1625e1051a39Sopenharmony_ci b.gt .Loop3x_cbc_dec 1626e1051a39Sopenharmony_ci 1627e1051a39Sopenharmony_ci aesd $dat0,q8 1628e1051a39Sopenharmony_ci aesimc $dat0,$dat0 1629e1051a39Sopenharmony_ci aesd $dat1,q8 1630e1051a39Sopenharmony_ci aesimc $dat1,$dat1 1631e1051a39Sopenharmony_ci aesd $dat2,q8 1632e1051a39Sopenharmony_ci aesimc $dat2,$dat2 1633e1051a39Sopenharmony_ci veor $tmp0,$ivec,$rndlast 1634e1051a39Sopenharmony_ci subs $len,$len,#0x30 1635e1051a39Sopenharmony_ci veor $tmp1,$in0,$rndlast 1636e1051a39Sopenharmony_ci mov.lo x6,$len // x6, $cnt, is zero at this point 1637e1051a39Sopenharmony_ci aesd $dat0,q9 1638e1051a39Sopenharmony_ci aesimc $dat0,$dat0 1639e1051a39Sopenharmony_ci aesd $dat1,q9 1640e1051a39Sopenharmony_ci aesimc $dat1,$dat1 1641e1051a39Sopenharmony_ci aesd $dat2,q9 1642e1051a39Sopenharmony_ci aesimc $dat2,$dat2 1643e1051a39Sopenharmony_ci veor $tmp2,$in1,$rndlast 1644e1051a39Sopenharmony_ci add $inp,$inp,x6 // $inp is adjusted in such way that 1645e1051a39Sopenharmony_ci // at exit from the loop $dat1-$dat2 1646e1051a39Sopenharmony_ci // are loaded with last "words" 1647e1051a39Sopenharmony_ci vorr $ivec,$in2,$in2 1648e1051a39Sopenharmony_ci mov $key_,$key 1649e1051a39Sopenharmony_ci aesd $dat0,q12 1650e1051a39Sopenharmony_ci aesimc $dat0,$dat0 1651e1051a39Sopenharmony_ci aesd $dat1,q12 1652e1051a39Sopenharmony_ci aesimc $dat1,$dat1 1653e1051a39Sopenharmony_ci aesd $dat2,q12 1654e1051a39Sopenharmony_ci aesimc $dat2,$dat2 1655e1051a39Sopenharmony_ci vld1.8 {$in0},[$inp],#16 1656e1051a39Sopenharmony_ci aesd $dat0,q13 1657e1051a39Sopenharmony_ci aesimc $dat0,$dat0 1658e1051a39Sopenharmony_ci aesd $dat1,q13 1659e1051a39Sopenharmony_ci aesimc $dat1,$dat1 1660e1051a39Sopenharmony_ci aesd $dat2,q13 1661e1051a39Sopenharmony_ci aesimc $dat2,$dat2 1662e1051a39Sopenharmony_ci vld1.8 {$in1},[$inp],#16 1663e1051a39Sopenharmony_ci aesd $dat0,q14 1664e1051a39Sopenharmony_ci aesimc $dat0,$dat0 1665e1051a39Sopenharmony_ci aesd $dat1,q14 1666e1051a39Sopenharmony_ci aesimc $dat1,$dat1 1667e1051a39Sopenharmony_ci aesd $dat2,q14 1668e1051a39Sopenharmony_ci aesimc $dat2,$dat2 1669e1051a39Sopenharmony_ci vld1.8 {$in2},[$inp],#16 1670e1051a39Sopenharmony_ci aesd $dat0,q15 1671e1051a39Sopenharmony_ci aesd $dat1,q15 1672e1051a39Sopenharmony_ci aesd $dat2,q15 1673e1051a39Sopenharmony_ci vld1.32 {q8},[$key_],#16 // re-pre-load rndkey[0] 1674e1051a39Sopenharmony_ci add $cnt,$rounds,#2 1675e1051a39Sopenharmony_ci veor $tmp0,$tmp0,$dat0 1676e1051a39Sopenharmony_ci veor $tmp1,$tmp1,$dat1 1677e1051a39Sopenharmony_ci veor $dat2,$dat2,$tmp2 1678e1051a39Sopenharmony_ci vld1.32 {q9},[$key_],#16 // re-pre-load rndkey[1] 1679e1051a39Sopenharmony_ci vst1.8 {$tmp0},[$out],#16 1680e1051a39Sopenharmony_ci vorr $dat0,$in0,$in0 1681e1051a39Sopenharmony_ci vst1.8 {$tmp1},[$out],#16 1682e1051a39Sopenharmony_ci vorr $dat1,$in1,$in1 1683e1051a39Sopenharmony_ci vst1.8 {$dat2},[$out],#16 1684e1051a39Sopenharmony_ci vorr $dat2,$in2,$in2 1685e1051a39Sopenharmony_ci b.hs .Loop3x_cbc_dec 1686e1051a39Sopenharmony_ci 1687e1051a39Sopenharmony_ci cmn $len,#0x30 1688e1051a39Sopenharmony_ci b.eq .Lcbc_done 1689e1051a39Sopenharmony_ci nop 1690e1051a39Sopenharmony_ci 1691e1051a39Sopenharmony_ci.Lcbc_dec_tail: 1692e1051a39Sopenharmony_ci aesd $dat1,q8 1693e1051a39Sopenharmony_ci aesimc $dat1,$dat1 1694e1051a39Sopenharmony_ci aesd $dat2,q8 1695e1051a39Sopenharmony_ci aesimc $dat2,$dat2 1696e1051a39Sopenharmony_ci vld1.32 {q8},[$key_],#16 1697e1051a39Sopenharmony_ci subs $cnt,$cnt,#2 1698e1051a39Sopenharmony_ci aesd $dat1,q9 1699e1051a39Sopenharmony_ci aesimc $dat1,$dat1 1700e1051a39Sopenharmony_ci aesd $dat2,q9 1701e1051a39Sopenharmony_ci aesimc $dat2,$dat2 1702e1051a39Sopenharmony_ci vld1.32 {q9},[$key_],#16 1703e1051a39Sopenharmony_ci b.gt .Lcbc_dec_tail 1704e1051a39Sopenharmony_ci 1705e1051a39Sopenharmony_ci aesd $dat1,q8 1706e1051a39Sopenharmony_ci aesimc $dat1,$dat1 1707e1051a39Sopenharmony_ci aesd $dat2,q8 1708e1051a39Sopenharmony_ci aesimc $dat2,$dat2 1709e1051a39Sopenharmony_ci aesd $dat1,q9 1710e1051a39Sopenharmony_ci aesimc $dat1,$dat1 1711e1051a39Sopenharmony_ci aesd $dat2,q9 1712e1051a39Sopenharmony_ci aesimc $dat2,$dat2 1713e1051a39Sopenharmony_ci aesd $dat1,q12 1714e1051a39Sopenharmony_ci aesimc $dat1,$dat1 1715e1051a39Sopenharmony_ci aesd $dat2,q12 1716e1051a39Sopenharmony_ci aesimc $dat2,$dat2 1717e1051a39Sopenharmony_ci cmn $len,#0x20 1718e1051a39Sopenharmony_ci aesd $dat1,q13 1719e1051a39Sopenharmony_ci aesimc $dat1,$dat1 1720e1051a39Sopenharmony_ci aesd $dat2,q13 1721e1051a39Sopenharmony_ci aesimc $dat2,$dat2 1722e1051a39Sopenharmony_ci veor $tmp1,$ivec,$rndlast 1723e1051a39Sopenharmony_ci aesd $dat1,q14 1724e1051a39Sopenharmony_ci aesimc $dat1,$dat1 1725e1051a39Sopenharmony_ci aesd $dat2,q14 1726e1051a39Sopenharmony_ci aesimc $dat2,$dat2 1727e1051a39Sopenharmony_ci veor $tmp2,$in1,$rndlast 1728e1051a39Sopenharmony_ci aesd $dat1,q15 1729e1051a39Sopenharmony_ci aesd $dat2,q15 1730e1051a39Sopenharmony_ci b.eq .Lcbc_dec_one 1731e1051a39Sopenharmony_ci veor $tmp1,$tmp1,$dat1 1732e1051a39Sopenharmony_ci veor $tmp2,$tmp2,$dat2 1733e1051a39Sopenharmony_ci vorr $ivec,$in2,$in2 1734e1051a39Sopenharmony_ci vst1.8 {$tmp1},[$out],#16 1735e1051a39Sopenharmony_ci vst1.8 {$tmp2},[$out],#16 1736e1051a39Sopenharmony_ci b .Lcbc_done 1737e1051a39Sopenharmony_ci 1738e1051a39Sopenharmony_ci.Lcbc_dec_one: 1739e1051a39Sopenharmony_ci veor $tmp1,$tmp1,$dat2 1740e1051a39Sopenharmony_ci vorr $ivec,$in2,$in2 1741e1051a39Sopenharmony_ci vst1.8 {$tmp1},[$out],#16 1742e1051a39Sopenharmony_ci 1743e1051a39Sopenharmony_ci.Lcbc_done: 1744e1051a39Sopenharmony_ci vst1.8 {$ivec},[$ivp] 1745e1051a39Sopenharmony_ci.Lcbc_abort: 1746e1051a39Sopenharmony_ci___ 1747e1051a39Sopenharmony_ci} 1748e1051a39Sopenharmony_ci$code.=<<___ if ($flavour !~ /64/); 1749e1051a39Sopenharmony_ci vldmia sp!,{d8-d15} 1750e1051a39Sopenharmony_ci ldmia sp!,{r4-r8,pc} 1751e1051a39Sopenharmony_ci___ 1752e1051a39Sopenharmony_ci$code.=<<___ if ($flavour =~ /64/); 1753e1051a39Sopenharmony_ci ldr x29,[sp],#16 1754e1051a39Sopenharmony_ci ret 1755e1051a39Sopenharmony_ci___ 1756e1051a39Sopenharmony_ci$code.=<<___; 1757e1051a39Sopenharmony_ci.size ${prefix}_cbc_encrypt,.-${prefix}_cbc_encrypt 1758e1051a39Sopenharmony_ci___ 1759e1051a39Sopenharmony_ci}}} 1760e1051a39Sopenharmony_ci{{{ 1761e1051a39Sopenharmony_cimy ($inp,$out,$len,$key,$ivp)=map("x$_",(0..4)); 1762e1051a39Sopenharmony_cimy ($rounds,$cnt,$key_)=("w5","w6","x7"); 1763e1051a39Sopenharmony_cimy ($ctr,$tctr0,$tctr1,$tctr2)=map("w$_",(8..10,12)); 1764e1051a39Sopenharmony_cimy $step="x12"; # aliases with $tctr2 1765e1051a39Sopenharmony_ci 1766e1051a39Sopenharmony_cimy ($dat0,$dat1,$in0,$in1,$tmp0,$tmp1,$ivec,$rndlast)=map("q$_",(0..7)); 1767e1051a39Sopenharmony_cimy ($dat2,$in2,$tmp2)=map("q$_",(10,11,9)); 1768e1051a39Sopenharmony_ci 1769e1051a39Sopenharmony_ci# used only in 64-bit mode... 1770e1051a39Sopenharmony_cimy ($dat3,$dat4,$in3,$in4)=map("q$_",(16..23)); 1771e1051a39Sopenharmony_ci 1772e1051a39Sopenharmony_cimy ($dat,$tmp)=($dat0,$tmp0); 1773e1051a39Sopenharmony_ci 1774e1051a39Sopenharmony_ci### q8-q15 preloaded key schedule 1775e1051a39Sopenharmony_ci 1776e1051a39Sopenharmony_ci$code.=<<___; 1777e1051a39Sopenharmony_ci.globl ${prefix}_ctr32_encrypt_blocks 1778e1051a39Sopenharmony_ci.type ${prefix}_ctr32_encrypt_blocks,%function 1779e1051a39Sopenharmony_ci.align 5 1780e1051a39Sopenharmony_ci${prefix}_ctr32_encrypt_blocks: 1781e1051a39Sopenharmony_ci___ 1782e1051a39Sopenharmony_ci$code.=<<___ if ($flavour =~ /64/); 1783e1051a39Sopenharmony_ci stp x29,x30,[sp,#-16]! 1784e1051a39Sopenharmony_ci add x29,sp,#0 1785e1051a39Sopenharmony_ci___ 1786e1051a39Sopenharmony_ci$code.=<<___ if ($flavour !~ /64/); 1787e1051a39Sopenharmony_ci mov ip,sp 1788e1051a39Sopenharmony_ci stmdb sp!,{r4-r10,lr} 1789e1051a39Sopenharmony_ci vstmdb sp!,{d8-d15} @ ABI specification says so 1790e1051a39Sopenharmony_ci ldr r4, [ip] @ load remaining arg 1791e1051a39Sopenharmony_ci___ 1792e1051a39Sopenharmony_ci$code.=<<___; 1793e1051a39Sopenharmony_ci ldr $rounds,[$key,#240] 1794e1051a39Sopenharmony_ci 1795e1051a39Sopenharmony_ci ldr $ctr, [$ivp, #12] 1796e1051a39Sopenharmony_ci#ifdef __ARMEB__ 1797e1051a39Sopenharmony_ci vld1.8 {$dat0},[$ivp] 1798e1051a39Sopenharmony_ci#else 1799e1051a39Sopenharmony_ci vld1.32 {$dat0},[$ivp] 1800e1051a39Sopenharmony_ci#endif 1801e1051a39Sopenharmony_ci vld1.32 {q8-q9},[$key] // load key schedule... 1802e1051a39Sopenharmony_ci sub $rounds,$rounds,#4 1803e1051a39Sopenharmony_ci mov $step,#16 1804e1051a39Sopenharmony_ci cmp $len,#2 1805e1051a39Sopenharmony_ci add $key_,$key,x5,lsl#4 // pointer to last 5 round keys 1806e1051a39Sopenharmony_ci sub $rounds,$rounds,#2 1807e1051a39Sopenharmony_ci vld1.32 {q12-q13},[$key_],#32 1808e1051a39Sopenharmony_ci vld1.32 {q14-q15},[$key_],#32 1809e1051a39Sopenharmony_ci vld1.32 {$rndlast},[$key_] 1810e1051a39Sopenharmony_ci add $key_,$key,#32 1811e1051a39Sopenharmony_ci mov $cnt,$rounds 1812e1051a39Sopenharmony_ci cclr $step,lo 1813e1051a39Sopenharmony_ci#ifndef __ARMEB__ 1814e1051a39Sopenharmony_ci rev $ctr, $ctr 1815e1051a39Sopenharmony_ci#endif 1816e1051a39Sopenharmony_ci___ 1817e1051a39Sopenharmony_ci$code.=<<___ if ($flavour =~ /64/); 1818e1051a39Sopenharmony_ci vorr $dat1,$dat0,$dat0 1819e1051a39Sopenharmony_ci add $tctr1, $ctr, #1 1820e1051a39Sopenharmony_ci vorr $dat2,$dat0,$dat0 1821e1051a39Sopenharmony_ci add $ctr, $ctr, #2 1822e1051a39Sopenharmony_ci vorr $ivec,$dat0,$dat0 1823e1051a39Sopenharmony_ci rev $tctr1, $tctr1 1824e1051a39Sopenharmony_ci vmov.32 ${dat1}[3],$tctr1 1825e1051a39Sopenharmony_ci b.ls .Lctr32_tail 1826e1051a39Sopenharmony_ci rev $tctr2, $ctr 1827e1051a39Sopenharmony_ci sub $len,$len,#3 // bias 1828e1051a39Sopenharmony_ci vmov.32 ${dat2}[3],$tctr2 1829e1051a39Sopenharmony_ci___ 1830e1051a39Sopenharmony_ci$code.=<<___ if ($flavour !~ /64/); 1831e1051a39Sopenharmony_ci add $tctr1, $ctr, #1 1832e1051a39Sopenharmony_ci vorr $ivec,$dat0,$dat0 1833e1051a39Sopenharmony_ci rev $tctr1, $tctr1 1834e1051a39Sopenharmony_ci vmov.32 ${ivec}[3],$tctr1 1835e1051a39Sopenharmony_ci add $ctr, $ctr, #2 1836e1051a39Sopenharmony_ci vorr $dat1,$ivec,$ivec 1837e1051a39Sopenharmony_ci b.ls .Lctr32_tail 1838e1051a39Sopenharmony_ci rev $tctr2, $ctr 1839e1051a39Sopenharmony_ci vmov.32 ${ivec}[3],$tctr2 1840e1051a39Sopenharmony_ci sub $len,$len,#3 // bias 1841e1051a39Sopenharmony_ci vorr $dat2,$ivec,$ivec 1842e1051a39Sopenharmony_ci___ 1843e1051a39Sopenharmony_ci$code.=<<___ if ($flavour =~ /64/); 1844e1051a39Sopenharmony_ci cmp $len,#32 1845e1051a39Sopenharmony_ci b.lo .Loop3x_ctr32 1846e1051a39Sopenharmony_ci 1847e1051a39Sopenharmony_ci add w13,$ctr,#1 1848e1051a39Sopenharmony_ci add w14,$ctr,#2 1849e1051a39Sopenharmony_ci vorr $dat3,$dat0,$dat0 1850e1051a39Sopenharmony_ci rev w13,w13 1851e1051a39Sopenharmony_ci vorr $dat4,$dat0,$dat0 1852e1051a39Sopenharmony_ci rev w14,w14 1853e1051a39Sopenharmony_ci vmov.32 ${dat3}[3],w13 1854e1051a39Sopenharmony_ci sub $len,$len,#2 // bias 1855e1051a39Sopenharmony_ci vmov.32 ${dat4}[3],w14 1856e1051a39Sopenharmony_ci add $ctr,$ctr,#2 1857e1051a39Sopenharmony_ci b .Loop5x_ctr32 1858e1051a39Sopenharmony_ci 1859e1051a39Sopenharmony_ci.align 4 1860e1051a39Sopenharmony_ci.Loop5x_ctr32: 1861e1051a39Sopenharmony_ci aese $dat0,q8 1862e1051a39Sopenharmony_ci aesmc $dat0,$dat0 1863e1051a39Sopenharmony_ci aese $dat1,q8 1864e1051a39Sopenharmony_ci aesmc $dat1,$dat1 1865e1051a39Sopenharmony_ci aese $dat2,q8 1866e1051a39Sopenharmony_ci aesmc $dat2,$dat2 1867e1051a39Sopenharmony_ci aese $dat3,q8 1868e1051a39Sopenharmony_ci aesmc $dat3,$dat3 1869e1051a39Sopenharmony_ci aese $dat4,q8 1870e1051a39Sopenharmony_ci aesmc $dat4,$dat4 1871e1051a39Sopenharmony_ci vld1.32 {q8},[$key_],#16 1872e1051a39Sopenharmony_ci subs $cnt,$cnt,#2 1873e1051a39Sopenharmony_ci aese $dat0,q9 1874e1051a39Sopenharmony_ci aesmc $dat0,$dat0 1875e1051a39Sopenharmony_ci aese $dat1,q9 1876e1051a39Sopenharmony_ci aesmc $dat1,$dat1 1877e1051a39Sopenharmony_ci aese $dat2,q9 1878e1051a39Sopenharmony_ci aesmc $dat2,$dat2 1879e1051a39Sopenharmony_ci aese $dat3,q9 1880e1051a39Sopenharmony_ci aesmc $dat3,$dat3 1881e1051a39Sopenharmony_ci aese $dat4,q9 1882e1051a39Sopenharmony_ci aesmc $dat4,$dat4 1883e1051a39Sopenharmony_ci vld1.32 {q9},[$key_],#16 1884e1051a39Sopenharmony_ci b.gt .Loop5x_ctr32 1885e1051a39Sopenharmony_ci 1886e1051a39Sopenharmony_ci mov $key_,$key 1887e1051a39Sopenharmony_ci aese $dat0,q8 1888e1051a39Sopenharmony_ci aesmc $dat0,$dat0 1889e1051a39Sopenharmony_ci aese $dat1,q8 1890e1051a39Sopenharmony_ci aesmc $dat1,$dat1 1891e1051a39Sopenharmony_ci aese $dat2,q8 1892e1051a39Sopenharmony_ci aesmc $dat2,$dat2 1893e1051a39Sopenharmony_ci aese $dat3,q8 1894e1051a39Sopenharmony_ci aesmc $dat3,$dat3 1895e1051a39Sopenharmony_ci aese $dat4,q8 1896e1051a39Sopenharmony_ci aesmc $dat4,$dat4 1897e1051a39Sopenharmony_ci vld1.32 {q8},[$key_],#16 // re-pre-load rndkey[0] 1898e1051a39Sopenharmony_ci 1899e1051a39Sopenharmony_ci aese $dat0,q9 1900e1051a39Sopenharmony_ci aesmc $dat0,$dat0 1901e1051a39Sopenharmony_ci aese $dat1,q9 1902e1051a39Sopenharmony_ci aesmc $dat1,$dat1 1903e1051a39Sopenharmony_ci aese $dat2,q9 1904e1051a39Sopenharmony_ci aesmc $dat2,$dat2 1905e1051a39Sopenharmony_ci aese $dat3,q9 1906e1051a39Sopenharmony_ci aesmc $dat3,$dat3 1907e1051a39Sopenharmony_ci aese $dat4,q9 1908e1051a39Sopenharmony_ci aesmc $dat4,$dat4 1909e1051a39Sopenharmony_ci vld1.32 {q9},[$key_],#16 // re-pre-load rndkey[1] 1910e1051a39Sopenharmony_ci 1911e1051a39Sopenharmony_ci aese $dat0,q12 1912e1051a39Sopenharmony_ci aesmc $dat0,$dat0 1913e1051a39Sopenharmony_ci add $tctr0,$ctr,#1 1914e1051a39Sopenharmony_ci add $tctr1,$ctr,#2 1915e1051a39Sopenharmony_ci aese $dat1,q12 1916e1051a39Sopenharmony_ci aesmc $dat1,$dat1 1917e1051a39Sopenharmony_ci add $tctr2,$ctr,#3 1918e1051a39Sopenharmony_ci add w13,$ctr,#4 1919e1051a39Sopenharmony_ci aese $dat2,q12 1920e1051a39Sopenharmony_ci aesmc $dat2,$dat2 1921e1051a39Sopenharmony_ci add w14,$ctr,#5 1922e1051a39Sopenharmony_ci rev $tctr0,$tctr0 1923e1051a39Sopenharmony_ci aese $dat3,q12 1924e1051a39Sopenharmony_ci aesmc $dat3,$dat3 1925e1051a39Sopenharmony_ci rev $tctr1,$tctr1 1926e1051a39Sopenharmony_ci rev $tctr2,$tctr2 1927e1051a39Sopenharmony_ci aese $dat4,q12 1928e1051a39Sopenharmony_ci aesmc $dat4,$dat4 1929e1051a39Sopenharmony_ci rev w13,w13 1930e1051a39Sopenharmony_ci rev w14,w14 1931e1051a39Sopenharmony_ci 1932e1051a39Sopenharmony_ci aese $dat0,q13 1933e1051a39Sopenharmony_ci aesmc $dat0,$dat0 1934e1051a39Sopenharmony_ci aese $dat1,q13 1935e1051a39Sopenharmony_ci aesmc $dat1,$dat1 1936e1051a39Sopenharmony_ci aese $dat2,q13 1937e1051a39Sopenharmony_ci aesmc $dat2,$dat2 1938e1051a39Sopenharmony_ci aese $dat3,q13 1939e1051a39Sopenharmony_ci aesmc $dat3,$dat3 1940e1051a39Sopenharmony_ci aese $dat4,q13 1941e1051a39Sopenharmony_ci aesmc $dat4,$dat4 1942e1051a39Sopenharmony_ci 1943e1051a39Sopenharmony_ci aese $dat0,q14 1944e1051a39Sopenharmony_ci aesmc $dat0,$dat0 1945e1051a39Sopenharmony_ci vld1.8 {$in0},[$inp],#16 1946e1051a39Sopenharmony_ci aese $dat1,q14 1947e1051a39Sopenharmony_ci aesmc $dat1,$dat1 1948e1051a39Sopenharmony_ci vld1.8 {$in1},[$inp],#16 1949e1051a39Sopenharmony_ci aese $dat2,q14 1950e1051a39Sopenharmony_ci aesmc $dat2,$dat2 1951e1051a39Sopenharmony_ci vld1.8 {$in2},[$inp],#16 1952e1051a39Sopenharmony_ci aese $dat3,q14 1953e1051a39Sopenharmony_ci aesmc $dat3,$dat3 1954e1051a39Sopenharmony_ci vld1.8 {$in3},[$inp],#16 1955e1051a39Sopenharmony_ci aese $dat4,q14 1956e1051a39Sopenharmony_ci aesmc $dat4,$dat4 1957e1051a39Sopenharmony_ci vld1.8 {$in4},[$inp],#16 1958e1051a39Sopenharmony_ci 1959e1051a39Sopenharmony_ci aese $dat0,q15 1960e1051a39Sopenharmony_ci veor $in0,$in0,$rndlast 1961e1051a39Sopenharmony_ci aese $dat1,q15 1962e1051a39Sopenharmony_ci veor $in1,$in1,$rndlast 1963e1051a39Sopenharmony_ci aese $dat2,q15 1964e1051a39Sopenharmony_ci veor $in2,$in2,$rndlast 1965e1051a39Sopenharmony_ci aese $dat3,q15 1966e1051a39Sopenharmony_ci veor $in3,$in3,$rndlast 1967e1051a39Sopenharmony_ci aese $dat4,q15 1968e1051a39Sopenharmony_ci veor $in4,$in4,$rndlast 1969e1051a39Sopenharmony_ci 1970e1051a39Sopenharmony_ci veor $in0,$in0,$dat0 1971e1051a39Sopenharmony_ci vorr $dat0,$ivec,$ivec 1972e1051a39Sopenharmony_ci veor $in1,$in1,$dat1 1973e1051a39Sopenharmony_ci vorr $dat1,$ivec,$ivec 1974e1051a39Sopenharmony_ci veor $in2,$in2,$dat2 1975e1051a39Sopenharmony_ci vorr $dat2,$ivec,$ivec 1976e1051a39Sopenharmony_ci veor $in3,$in3,$dat3 1977e1051a39Sopenharmony_ci vorr $dat3,$ivec,$ivec 1978e1051a39Sopenharmony_ci veor $in4,$in4,$dat4 1979e1051a39Sopenharmony_ci vorr $dat4,$ivec,$ivec 1980e1051a39Sopenharmony_ci 1981e1051a39Sopenharmony_ci vst1.8 {$in0},[$out],#16 1982e1051a39Sopenharmony_ci vmov.32 ${dat0}[3],$tctr0 1983e1051a39Sopenharmony_ci vst1.8 {$in1},[$out],#16 1984e1051a39Sopenharmony_ci vmov.32 ${dat1}[3],$tctr1 1985e1051a39Sopenharmony_ci vst1.8 {$in2},[$out],#16 1986e1051a39Sopenharmony_ci vmov.32 ${dat2}[3],$tctr2 1987e1051a39Sopenharmony_ci vst1.8 {$in3},[$out],#16 1988e1051a39Sopenharmony_ci vmov.32 ${dat3}[3],w13 1989e1051a39Sopenharmony_ci vst1.8 {$in4},[$out],#16 1990e1051a39Sopenharmony_ci vmov.32 ${dat4}[3],w14 1991e1051a39Sopenharmony_ci 1992e1051a39Sopenharmony_ci mov $cnt,$rounds 1993e1051a39Sopenharmony_ci cbz $len,.Lctr32_done 1994e1051a39Sopenharmony_ci 1995e1051a39Sopenharmony_ci add $ctr,$ctr,#5 1996e1051a39Sopenharmony_ci subs $len,$len,#5 1997e1051a39Sopenharmony_ci b.hs .Loop5x_ctr32 1998e1051a39Sopenharmony_ci 1999e1051a39Sopenharmony_ci add $len,$len,#5 2000e1051a39Sopenharmony_ci sub $ctr,$ctr,#5 2001e1051a39Sopenharmony_ci 2002e1051a39Sopenharmony_ci cmp $len,#2 2003e1051a39Sopenharmony_ci mov $step,#16 2004e1051a39Sopenharmony_ci cclr $step,lo 2005e1051a39Sopenharmony_ci b.ls .Lctr32_tail 2006e1051a39Sopenharmony_ci 2007e1051a39Sopenharmony_ci sub $len,$len,#3 // bias 2008e1051a39Sopenharmony_ci add $ctr,$ctr,#3 2009e1051a39Sopenharmony_ci___ 2010e1051a39Sopenharmony_ci$code.=<<___; 2011e1051a39Sopenharmony_ci b .Loop3x_ctr32 2012e1051a39Sopenharmony_ci 2013e1051a39Sopenharmony_ci.align 4 2014e1051a39Sopenharmony_ci.Loop3x_ctr32: 2015e1051a39Sopenharmony_ci aese $dat0,q8 2016e1051a39Sopenharmony_ci aesmc $dat0,$dat0 2017e1051a39Sopenharmony_ci aese $dat1,q8 2018e1051a39Sopenharmony_ci aesmc $dat1,$dat1 2019e1051a39Sopenharmony_ci aese $dat2,q8 2020e1051a39Sopenharmony_ci aesmc $dat2,$dat2 2021e1051a39Sopenharmony_ci vld1.32 {q8},[$key_],#16 2022e1051a39Sopenharmony_ci subs $cnt,$cnt,#2 2023e1051a39Sopenharmony_ci aese $dat0,q9 2024e1051a39Sopenharmony_ci aesmc $dat0,$dat0 2025e1051a39Sopenharmony_ci aese $dat1,q9 2026e1051a39Sopenharmony_ci aesmc $dat1,$dat1 2027e1051a39Sopenharmony_ci aese $dat2,q9 2028e1051a39Sopenharmony_ci aesmc $dat2,$dat2 2029e1051a39Sopenharmony_ci vld1.32 {q9},[$key_],#16 2030e1051a39Sopenharmony_ci b.gt .Loop3x_ctr32 2031e1051a39Sopenharmony_ci 2032e1051a39Sopenharmony_ci aese $dat0,q8 2033e1051a39Sopenharmony_ci aesmc $tmp0,$dat0 2034e1051a39Sopenharmony_ci aese $dat1,q8 2035e1051a39Sopenharmony_ci aesmc $tmp1,$dat1 2036e1051a39Sopenharmony_ci vld1.8 {$in0},[$inp],#16 2037e1051a39Sopenharmony_ci___ 2038e1051a39Sopenharmony_ci$code.=<<___ if ($flavour =~ /64/); 2039e1051a39Sopenharmony_ci vorr $dat0,$ivec,$ivec 2040e1051a39Sopenharmony_ci___ 2041e1051a39Sopenharmony_ci$code.=<<___ if ($flavour !~ /64/); 2042e1051a39Sopenharmony_ci add $tctr0,$ctr,#1 2043e1051a39Sopenharmony_ci___ 2044e1051a39Sopenharmony_ci$code.=<<___; 2045e1051a39Sopenharmony_ci aese $dat2,q8 2046e1051a39Sopenharmony_ci aesmc $dat2,$dat2 2047e1051a39Sopenharmony_ci vld1.8 {$in1},[$inp],#16 2048e1051a39Sopenharmony_ci___ 2049e1051a39Sopenharmony_ci$code.=<<___ if ($flavour =~ /64/); 2050e1051a39Sopenharmony_ci vorr $dat1,$ivec,$ivec 2051e1051a39Sopenharmony_ci___ 2052e1051a39Sopenharmony_ci$code.=<<___ if ($flavour !~ /64/); 2053e1051a39Sopenharmony_ci rev $tctr0,$tctr0 2054e1051a39Sopenharmony_ci___ 2055e1051a39Sopenharmony_ci$code.=<<___; 2056e1051a39Sopenharmony_ci aese $tmp0,q9 2057e1051a39Sopenharmony_ci aesmc $tmp0,$tmp0 2058e1051a39Sopenharmony_ci aese $tmp1,q9 2059e1051a39Sopenharmony_ci aesmc $tmp1,$tmp1 2060e1051a39Sopenharmony_ci vld1.8 {$in2},[$inp],#16 2061e1051a39Sopenharmony_ci mov $key_,$key 2062e1051a39Sopenharmony_ci aese $dat2,q9 2063e1051a39Sopenharmony_ci aesmc $tmp2,$dat2 2064e1051a39Sopenharmony_ci___ 2065e1051a39Sopenharmony_ci$code.=<<___ if ($flavour =~ /64/); 2066e1051a39Sopenharmony_ci vorr $dat2,$ivec,$ivec 2067e1051a39Sopenharmony_ci add $tctr0,$ctr,#1 2068e1051a39Sopenharmony_ci___ 2069e1051a39Sopenharmony_ci$code.=<<___; 2070e1051a39Sopenharmony_ci aese $tmp0,q12 2071e1051a39Sopenharmony_ci aesmc $tmp0,$tmp0 2072e1051a39Sopenharmony_ci aese $tmp1,q12 2073e1051a39Sopenharmony_ci aesmc $tmp1,$tmp1 2074e1051a39Sopenharmony_ci veor $in0,$in0,$rndlast 2075e1051a39Sopenharmony_ci add $tctr1,$ctr,#2 2076e1051a39Sopenharmony_ci aese $tmp2,q12 2077e1051a39Sopenharmony_ci aesmc $tmp2,$tmp2 2078e1051a39Sopenharmony_ci veor $in1,$in1,$rndlast 2079e1051a39Sopenharmony_ci add $ctr,$ctr,#3 2080e1051a39Sopenharmony_ci aese $tmp0,q13 2081e1051a39Sopenharmony_ci aesmc $tmp0,$tmp0 2082e1051a39Sopenharmony_ci aese $tmp1,q13 2083e1051a39Sopenharmony_ci aesmc $tmp1,$tmp1 2084e1051a39Sopenharmony_ci veor $in2,$in2,$rndlast 2085e1051a39Sopenharmony_ci___ 2086e1051a39Sopenharmony_ci$code.=<<___ if ($flavour =~ /64/); 2087e1051a39Sopenharmony_ci rev $tctr0,$tctr0 2088e1051a39Sopenharmony_ci aese $tmp2,q13 2089e1051a39Sopenharmony_ci aesmc $tmp2,$tmp2 2090e1051a39Sopenharmony_ci vmov.32 ${dat0}[3], $tctr0 2091e1051a39Sopenharmony_ci___ 2092e1051a39Sopenharmony_ci$code.=<<___ if ($flavour !~ /64/); 2093e1051a39Sopenharmony_ci vmov.32 ${ivec}[3], $tctr0 2094e1051a39Sopenharmony_ci aese $tmp2,q13 2095e1051a39Sopenharmony_ci aesmc $tmp2,$tmp2 2096e1051a39Sopenharmony_ci vorr $dat0,$ivec,$ivec 2097e1051a39Sopenharmony_ci___ 2098e1051a39Sopenharmony_ci$code.=<<___; 2099e1051a39Sopenharmony_ci rev $tctr1,$tctr1 2100e1051a39Sopenharmony_ci aese $tmp0,q14 2101e1051a39Sopenharmony_ci aesmc $tmp0,$tmp0 2102e1051a39Sopenharmony_ci___ 2103e1051a39Sopenharmony_ci$code.=<<___ if ($flavour !~ /64/); 2104e1051a39Sopenharmony_ci vmov.32 ${ivec}[3], $tctr1 2105e1051a39Sopenharmony_ci rev $tctr2,$ctr 2106e1051a39Sopenharmony_ci___ 2107e1051a39Sopenharmony_ci$code.=<<___; 2108e1051a39Sopenharmony_ci aese $tmp1,q14 2109e1051a39Sopenharmony_ci aesmc $tmp1,$tmp1 2110e1051a39Sopenharmony_ci___ 2111e1051a39Sopenharmony_ci$code.=<<___ if ($flavour =~ /64/); 2112e1051a39Sopenharmony_ci vmov.32 ${dat1}[3], $tctr1 2113e1051a39Sopenharmony_ci rev $tctr2,$ctr 2114e1051a39Sopenharmony_ci aese $tmp2,q14 2115e1051a39Sopenharmony_ci aesmc $tmp2,$tmp2 2116e1051a39Sopenharmony_ci vmov.32 ${dat2}[3], $tctr2 2117e1051a39Sopenharmony_ci___ 2118e1051a39Sopenharmony_ci$code.=<<___ if ($flavour !~ /64/); 2119e1051a39Sopenharmony_ci vorr $dat1,$ivec,$ivec 2120e1051a39Sopenharmony_ci vmov.32 ${ivec}[3], $tctr2 2121e1051a39Sopenharmony_ci aese $tmp2,q14 2122e1051a39Sopenharmony_ci aesmc $tmp2,$tmp2 2123e1051a39Sopenharmony_ci vorr $dat2,$ivec,$ivec 2124e1051a39Sopenharmony_ci___ 2125e1051a39Sopenharmony_ci$code.=<<___; 2126e1051a39Sopenharmony_ci subs $len,$len,#3 2127e1051a39Sopenharmony_ci aese $tmp0,q15 2128e1051a39Sopenharmony_ci aese $tmp1,q15 2129e1051a39Sopenharmony_ci aese $tmp2,q15 2130e1051a39Sopenharmony_ci 2131e1051a39Sopenharmony_ci veor $in0,$in0,$tmp0 2132e1051a39Sopenharmony_ci vld1.32 {q8},[$key_],#16 // re-pre-load rndkey[0] 2133e1051a39Sopenharmony_ci vst1.8 {$in0},[$out],#16 2134e1051a39Sopenharmony_ci veor $in1,$in1,$tmp1 2135e1051a39Sopenharmony_ci mov $cnt,$rounds 2136e1051a39Sopenharmony_ci vst1.8 {$in1},[$out],#16 2137e1051a39Sopenharmony_ci veor $in2,$in2,$tmp2 2138e1051a39Sopenharmony_ci vld1.32 {q9},[$key_],#16 // re-pre-load rndkey[1] 2139e1051a39Sopenharmony_ci vst1.8 {$in2},[$out],#16 2140e1051a39Sopenharmony_ci b.hs .Loop3x_ctr32 2141e1051a39Sopenharmony_ci 2142e1051a39Sopenharmony_ci adds $len,$len,#3 2143e1051a39Sopenharmony_ci b.eq .Lctr32_done 2144e1051a39Sopenharmony_ci cmp $len,#1 2145e1051a39Sopenharmony_ci mov $step,#16 2146e1051a39Sopenharmony_ci cclr $step,eq 2147e1051a39Sopenharmony_ci 2148e1051a39Sopenharmony_ci.Lctr32_tail: 2149e1051a39Sopenharmony_ci aese $dat0,q8 2150e1051a39Sopenharmony_ci aesmc $dat0,$dat0 2151e1051a39Sopenharmony_ci aese $dat1,q8 2152e1051a39Sopenharmony_ci aesmc $dat1,$dat1 2153e1051a39Sopenharmony_ci vld1.32 {q8},[$key_],#16 2154e1051a39Sopenharmony_ci subs $cnt,$cnt,#2 2155e1051a39Sopenharmony_ci aese $dat0,q9 2156e1051a39Sopenharmony_ci aesmc $dat0,$dat0 2157e1051a39Sopenharmony_ci aese $dat1,q9 2158e1051a39Sopenharmony_ci aesmc $dat1,$dat1 2159e1051a39Sopenharmony_ci vld1.32 {q9},[$key_],#16 2160e1051a39Sopenharmony_ci b.gt .Lctr32_tail 2161e1051a39Sopenharmony_ci 2162e1051a39Sopenharmony_ci aese $dat0,q8 2163e1051a39Sopenharmony_ci aesmc $dat0,$dat0 2164e1051a39Sopenharmony_ci aese $dat1,q8 2165e1051a39Sopenharmony_ci aesmc $dat1,$dat1 2166e1051a39Sopenharmony_ci aese $dat0,q9 2167e1051a39Sopenharmony_ci aesmc $dat0,$dat0 2168e1051a39Sopenharmony_ci aese $dat1,q9 2169e1051a39Sopenharmony_ci aesmc $dat1,$dat1 2170e1051a39Sopenharmony_ci vld1.8 {$in0},[$inp],$step 2171e1051a39Sopenharmony_ci aese $dat0,q12 2172e1051a39Sopenharmony_ci aesmc $dat0,$dat0 2173e1051a39Sopenharmony_ci aese $dat1,q12 2174e1051a39Sopenharmony_ci aesmc $dat1,$dat1 2175e1051a39Sopenharmony_ci vld1.8 {$in1},[$inp] 2176e1051a39Sopenharmony_ci aese $dat0,q13 2177e1051a39Sopenharmony_ci aesmc $dat0,$dat0 2178e1051a39Sopenharmony_ci aese $dat1,q13 2179e1051a39Sopenharmony_ci aesmc $dat1,$dat1 2180e1051a39Sopenharmony_ci veor $in0,$in0,$rndlast 2181e1051a39Sopenharmony_ci aese $dat0,q14 2182e1051a39Sopenharmony_ci aesmc $dat0,$dat0 2183e1051a39Sopenharmony_ci aese $dat1,q14 2184e1051a39Sopenharmony_ci aesmc $dat1,$dat1 2185e1051a39Sopenharmony_ci veor $in1,$in1,$rndlast 2186e1051a39Sopenharmony_ci aese $dat0,q15 2187e1051a39Sopenharmony_ci aese $dat1,q15 2188e1051a39Sopenharmony_ci 2189e1051a39Sopenharmony_ci cmp $len,#1 2190e1051a39Sopenharmony_ci veor $in0,$in0,$dat0 2191e1051a39Sopenharmony_ci veor $in1,$in1,$dat1 2192e1051a39Sopenharmony_ci vst1.8 {$in0},[$out],#16 2193e1051a39Sopenharmony_ci b.eq .Lctr32_done 2194e1051a39Sopenharmony_ci vst1.8 {$in1},[$out] 2195e1051a39Sopenharmony_ci 2196e1051a39Sopenharmony_ci.Lctr32_done: 2197e1051a39Sopenharmony_ci___ 2198e1051a39Sopenharmony_ci$code.=<<___ if ($flavour !~ /64/); 2199e1051a39Sopenharmony_ci vldmia sp!,{d8-d15} 2200e1051a39Sopenharmony_ci ldmia sp!,{r4-r10,pc} 2201e1051a39Sopenharmony_ci___ 2202e1051a39Sopenharmony_ci$code.=<<___ if ($flavour =~ /64/); 2203e1051a39Sopenharmony_ci ldr x29,[sp],#16 2204e1051a39Sopenharmony_ci ret 2205e1051a39Sopenharmony_ci___ 2206e1051a39Sopenharmony_ci$code.=<<___; 2207e1051a39Sopenharmony_ci.size ${prefix}_ctr32_encrypt_blocks,.-${prefix}_ctr32_encrypt_blocks 2208e1051a39Sopenharmony_ci___ 2209e1051a39Sopenharmony_ci}}} 2210e1051a39Sopenharmony_ci# Performance in cycles per byte. 2211e1051a39Sopenharmony_ci# Processed with AES-XTS different key size. 2212e1051a39Sopenharmony_ci# It shows the value before and after optimization as below: 2213e1051a39Sopenharmony_ci# (before/after): 2214e1051a39Sopenharmony_ci# 2215e1051a39Sopenharmony_ci# AES-128-XTS AES-256-XTS 2216e1051a39Sopenharmony_ci# Cortex-A57 3.36/1.09 4.02/1.37 2217e1051a39Sopenharmony_ci# Cortex-A72 3.03/1.02 3.28/1.33 2218e1051a39Sopenharmony_ci 2219e1051a39Sopenharmony_ci# Optimization is implemented by loop unrolling and interleaving. 2220e1051a39Sopenharmony_ci# Commonly, we choose the unrolling factor as 5, if the input 2221e1051a39Sopenharmony_ci# data size smaller than 5 blocks, but not smaller than 3 blocks, 2222e1051a39Sopenharmony_ci# choose 3 as the unrolling factor. 2223e1051a39Sopenharmony_ci# If the input data size dsize >= 5*16 bytes, then take 5 blocks 2224e1051a39Sopenharmony_ci# as one iteration, every loop the left size lsize -= 5*16. 2225e1051a39Sopenharmony_ci# If lsize < 5*16 bytes, treat them as the tail. Note: left 4*16 bytes 2226e1051a39Sopenharmony_ci# will be processed specially, which be integrated into the 5*16 bytes 2227e1051a39Sopenharmony_ci# loop to improve the efficiency. 2228e1051a39Sopenharmony_ci# There is one special case, if the original input data size dsize 2229e1051a39Sopenharmony_ci# = 16 bytes, we will treat it seperately to improve the 2230e1051a39Sopenharmony_ci# performance: one independent code block without LR, FP load and 2231e1051a39Sopenharmony_ci# store. 2232e1051a39Sopenharmony_ci# Encryption will process the (length -tailcnt) bytes as mentioned 2233e1051a39Sopenharmony_ci# previously, then encrypt the composite block as last second 2234e1051a39Sopenharmony_ci# cipher block. 2235e1051a39Sopenharmony_ci# Decryption will process the (length -tailcnt -1) bytes as mentioned 2236e1051a39Sopenharmony_ci# previously, then decrypt the last second cipher block to get the 2237e1051a39Sopenharmony_ci# last plain block(tail), decrypt the composite block as last second 2238e1051a39Sopenharmony_ci# plain text block. 2239e1051a39Sopenharmony_ci 2240e1051a39Sopenharmony_ci{{{ 2241e1051a39Sopenharmony_cimy ($inp,$out,$len,$key1,$key2,$ivp)=map("x$_",(0..5)); 2242e1051a39Sopenharmony_cimy ($rounds0,$rounds,$key_,$step,$ivl,$ivh)=("w5","w6","x7","x8","x9","x10"); 2243e1051a39Sopenharmony_cimy ($tmpoutp,$loutp,$l2outp,$tmpinp)=("x13","w14","w15","x20"); 2244e1051a39Sopenharmony_cimy ($tailcnt,$midnum,$midnumx,$constnum,$constnumx)=("x21","w22","x22","w19","x19"); 2245e1051a39Sopenharmony_cimy ($xoffset,$tmpmx,$tmpmw)=("x6","x11","w11"); 2246e1051a39Sopenharmony_cimy ($dat0,$dat1,$in0,$in1,$tmp0,$tmp1,$tmp2,$rndlast)=map("q$_",(0..7)); 2247e1051a39Sopenharmony_cimy ($iv0,$iv1,$iv2,$iv3,$iv4)=("v6.16b","v8.16b","v9.16b","v10.16b","v11.16b"); 2248e1051a39Sopenharmony_cimy ($ivd00,$ivd01,$ivd20,$ivd21)=("d6","v6.d[1]","d9","v9.d[1]"); 2249e1051a39Sopenharmony_cimy ($ivd10,$ivd11,$ivd30,$ivd31,$ivd40,$ivd41)=("d8","v8.d[1]","d10","v10.d[1]","d11","v11.d[1]"); 2250e1051a39Sopenharmony_ci 2251e1051a39Sopenharmony_cimy ($tmpin)=("v26.16b"); 2252e1051a39Sopenharmony_cimy ($dat,$tmp,$rndzero_n_last)=($dat0,$tmp0,$tmp1); 2253e1051a39Sopenharmony_ci 2254e1051a39Sopenharmony_ci# q7 last round key 2255e1051a39Sopenharmony_ci# q10-q15, q7 Last 7 round keys 2256e1051a39Sopenharmony_ci# q8-q9 preloaded round keys except last 7 keys for big size 2257e1051a39Sopenharmony_ci# q20, q21, q8-q9 preloaded round keys except last 7 keys for only 16 byte 2258e1051a39Sopenharmony_ci 2259e1051a39Sopenharmony_ci 2260e1051a39Sopenharmony_cimy ($dat2,$in2,$tmp2)=map("q$_",(10,11,9)); 2261e1051a39Sopenharmony_ci 2262e1051a39Sopenharmony_cimy ($dat3,$in3,$tmp3); # used only in 64-bit mode 2263e1051a39Sopenharmony_cimy ($dat4,$in4,$tmp4); 2264e1051a39Sopenharmony_ciif ($flavour =~ /64/) { 2265e1051a39Sopenharmony_ci ($dat2,$dat3,$dat4,$in2,$in3,$in4,$tmp3,$tmp4)=map("q$_",(16..23)); 2266e1051a39Sopenharmony_ci} 2267e1051a39Sopenharmony_ci 2268e1051a39Sopenharmony_ci$code.=<<___ if ($flavour =~ /64/); 2269e1051a39Sopenharmony_ci.globl ${prefix}_xts_encrypt 2270e1051a39Sopenharmony_ci.type ${prefix}_xts_encrypt,%function 2271e1051a39Sopenharmony_ci.align 5 2272e1051a39Sopenharmony_ci${prefix}_xts_encrypt: 2273e1051a39Sopenharmony_ci___ 2274e1051a39Sopenharmony_ci$code.=<<___ if ($flavour =~ /64/); 2275e1051a39Sopenharmony_ci cmp $len,#16 2276e1051a39Sopenharmony_ci // Original input data size bigger than 16, jump to big size processing. 2277e1051a39Sopenharmony_ci b.ne .Lxts_enc_big_size 2278e1051a39Sopenharmony_ci // Encrypt the iv with key2, as the first XEX iv. 2279e1051a39Sopenharmony_ci ldr $rounds,[$key2,#240] 2280e1051a39Sopenharmony_ci vld1.32 {$dat},[$key2],#16 2281e1051a39Sopenharmony_ci vld1.8 {$iv0},[$ivp] 2282e1051a39Sopenharmony_ci sub $rounds,$rounds,#2 2283e1051a39Sopenharmony_ci vld1.32 {$dat1},[$key2],#16 2284e1051a39Sopenharmony_ci 2285e1051a39Sopenharmony_ci.Loop_enc_iv_enc: 2286e1051a39Sopenharmony_ci aese $iv0,$dat 2287e1051a39Sopenharmony_ci aesmc $iv0,$iv0 2288e1051a39Sopenharmony_ci vld1.32 {$dat},[$key2],#16 2289e1051a39Sopenharmony_ci subs $rounds,$rounds,#2 2290e1051a39Sopenharmony_ci aese $iv0,$dat1 2291e1051a39Sopenharmony_ci aesmc $iv0,$iv0 2292e1051a39Sopenharmony_ci vld1.32 {$dat1},[$key2],#16 2293e1051a39Sopenharmony_ci b.gt .Loop_enc_iv_enc 2294e1051a39Sopenharmony_ci 2295e1051a39Sopenharmony_ci aese $iv0,$dat 2296e1051a39Sopenharmony_ci aesmc $iv0,$iv0 2297e1051a39Sopenharmony_ci vld1.32 {$dat},[$key2] 2298e1051a39Sopenharmony_ci aese $iv0,$dat1 2299e1051a39Sopenharmony_ci veor $iv0,$iv0,$dat 2300e1051a39Sopenharmony_ci 2301e1051a39Sopenharmony_ci vld1.8 {$dat0},[$inp] 2302e1051a39Sopenharmony_ci veor $dat0,$iv0,$dat0 2303e1051a39Sopenharmony_ci 2304e1051a39Sopenharmony_ci ldr $rounds,[$key1,#240] 2305e1051a39Sopenharmony_ci vld1.32 {q20-q21},[$key1],#32 // load key schedule... 2306e1051a39Sopenharmony_ci 2307e1051a39Sopenharmony_ci aese $dat0,q20 2308e1051a39Sopenharmony_ci aesmc $dat0,$dat0 2309e1051a39Sopenharmony_ci vld1.32 {q8-q9},[$key1],#32 // load key schedule... 2310e1051a39Sopenharmony_ci aese $dat0,q21 2311e1051a39Sopenharmony_ci aesmc $dat0,$dat0 2312e1051a39Sopenharmony_ci subs $rounds,$rounds,#10 // if rounds==10, jump to aes-128-xts processing 2313e1051a39Sopenharmony_ci b.eq .Lxts_128_enc 2314e1051a39Sopenharmony_ci.Lxts_enc_round_loop: 2315e1051a39Sopenharmony_ci aese $dat0,q8 2316e1051a39Sopenharmony_ci aesmc $dat0,$dat0 2317e1051a39Sopenharmony_ci vld1.32 {q8},[$key1],#16 // load key schedule... 2318e1051a39Sopenharmony_ci aese $dat0,q9 2319e1051a39Sopenharmony_ci aesmc $dat0,$dat0 2320e1051a39Sopenharmony_ci vld1.32 {q9},[$key1],#16 // load key schedule... 2321e1051a39Sopenharmony_ci subs $rounds,$rounds,#2 // bias 2322e1051a39Sopenharmony_ci b.gt .Lxts_enc_round_loop 2323e1051a39Sopenharmony_ci.Lxts_128_enc: 2324e1051a39Sopenharmony_ci vld1.32 {q10-q11},[$key1],#32 // load key schedule... 2325e1051a39Sopenharmony_ci aese $dat0,q8 2326e1051a39Sopenharmony_ci aesmc $dat0,$dat0 2327e1051a39Sopenharmony_ci aese $dat0,q9 2328e1051a39Sopenharmony_ci aesmc $dat0,$dat0 2329e1051a39Sopenharmony_ci vld1.32 {q12-q13},[$key1],#32 // load key schedule... 2330e1051a39Sopenharmony_ci aese $dat0,q10 2331e1051a39Sopenharmony_ci aesmc $dat0,$dat0 2332e1051a39Sopenharmony_ci aese $dat0,q11 2333e1051a39Sopenharmony_ci aesmc $dat0,$dat0 2334e1051a39Sopenharmony_ci vld1.32 {q14-q15},[$key1],#32 // load key schedule... 2335e1051a39Sopenharmony_ci aese $dat0,q12 2336e1051a39Sopenharmony_ci aesmc $dat0,$dat0 2337e1051a39Sopenharmony_ci aese $dat0,q13 2338e1051a39Sopenharmony_ci aesmc $dat0,$dat0 2339e1051a39Sopenharmony_ci vld1.32 {$rndlast},[$key1] 2340e1051a39Sopenharmony_ci aese $dat0,q14 2341e1051a39Sopenharmony_ci aesmc $dat0,$dat0 2342e1051a39Sopenharmony_ci aese $dat0,q15 2343e1051a39Sopenharmony_ci veor $dat0,$dat0,$rndlast 2344e1051a39Sopenharmony_ci veor $dat0,$dat0,$iv0 2345e1051a39Sopenharmony_ci vst1.8 {$dat0},[$out] 2346e1051a39Sopenharmony_ci b .Lxts_enc_final_abort 2347e1051a39Sopenharmony_ci 2348e1051a39Sopenharmony_ci.align 4 2349e1051a39Sopenharmony_ci.Lxts_enc_big_size: 2350e1051a39Sopenharmony_ci___ 2351e1051a39Sopenharmony_ci$code.=<<___ if ($flavour =~ /64/); 2352e1051a39Sopenharmony_ci stp $constnumx,$tmpinp,[sp,#-64]! 2353e1051a39Sopenharmony_ci stp $tailcnt,$midnumx,[sp,#48] 2354e1051a39Sopenharmony_ci stp $ivd10,$ivd20,[sp,#32] 2355e1051a39Sopenharmony_ci stp $ivd30,$ivd40,[sp,#16] 2356e1051a39Sopenharmony_ci 2357e1051a39Sopenharmony_ci // tailcnt store the tail value of length%16. 2358e1051a39Sopenharmony_ci and $tailcnt,$len,#0xf 2359e1051a39Sopenharmony_ci and $len,$len,#-16 2360e1051a39Sopenharmony_ci subs $len,$len,#16 2361e1051a39Sopenharmony_ci mov $step,#16 2362e1051a39Sopenharmony_ci b.lo .Lxts_abort 2363e1051a39Sopenharmony_ci csel $step,xzr,$step,eq 2364e1051a39Sopenharmony_ci 2365e1051a39Sopenharmony_ci // Firstly, encrypt the iv with key2, as the first iv of XEX. 2366e1051a39Sopenharmony_ci ldr $rounds,[$key2,#240] 2367e1051a39Sopenharmony_ci vld1.32 {$dat},[$key2],#16 2368e1051a39Sopenharmony_ci vld1.8 {$iv0},[$ivp] 2369e1051a39Sopenharmony_ci sub $rounds,$rounds,#2 2370e1051a39Sopenharmony_ci vld1.32 {$dat1},[$key2],#16 2371e1051a39Sopenharmony_ci 2372e1051a39Sopenharmony_ci.Loop_iv_enc: 2373e1051a39Sopenharmony_ci aese $iv0,$dat 2374e1051a39Sopenharmony_ci aesmc $iv0,$iv0 2375e1051a39Sopenharmony_ci vld1.32 {$dat},[$key2],#16 2376e1051a39Sopenharmony_ci subs $rounds,$rounds,#2 2377e1051a39Sopenharmony_ci aese $iv0,$dat1 2378e1051a39Sopenharmony_ci aesmc $iv0,$iv0 2379e1051a39Sopenharmony_ci vld1.32 {$dat1},[$key2],#16 2380e1051a39Sopenharmony_ci b.gt .Loop_iv_enc 2381e1051a39Sopenharmony_ci 2382e1051a39Sopenharmony_ci aese $iv0,$dat 2383e1051a39Sopenharmony_ci aesmc $iv0,$iv0 2384e1051a39Sopenharmony_ci vld1.32 {$dat},[$key2] 2385e1051a39Sopenharmony_ci aese $iv0,$dat1 2386e1051a39Sopenharmony_ci veor $iv0,$iv0,$dat 2387e1051a39Sopenharmony_ci 2388e1051a39Sopenharmony_ci // The iv for second block 2389e1051a39Sopenharmony_ci // $ivl- iv(low), $ivh - iv(high) 2390e1051a39Sopenharmony_ci // the five ivs stored into, $iv0,$iv1,$iv2,$iv3,$iv4 2391e1051a39Sopenharmony_ci fmov $ivl,$ivd00 2392e1051a39Sopenharmony_ci fmov $ivh,$ivd01 2393e1051a39Sopenharmony_ci mov $constnum,#0x87 2394e1051a39Sopenharmony_ci extr $midnumx,$ivh,$ivh,#32 2395e1051a39Sopenharmony_ci extr $ivh,$ivh,$ivl,#63 2396e1051a39Sopenharmony_ci and $tmpmw,$constnum,$midnum,asr#31 2397e1051a39Sopenharmony_ci eor $ivl,$tmpmx,$ivl,lsl#1 2398e1051a39Sopenharmony_ci fmov $ivd10,$ivl 2399e1051a39Sopenharmony_ci fmov $ivd11,$ivh 2400e1051a39Sopenharmony_ci 2401e1051a39Sopenharmony_ci ldr $rounds0,[$key1,#240] // next starting point 2402e1051a39Sopenharmony_ci vld1.8 {$dat},[$inp],$step 2403e1051a39Sopenharmony_ci 2404e1051a39Sopenharmony_ci vld1.32 {q8-q9},[$key1] // load key schedule... 2405e1051a39Sopenharmony_ci sub $rounds0,$rounds0,#6 2406e1051a39Sopenharmony_ci add $key_,$key1,$ivp,lsl#4 // pointer to last 7 round keys 2407e1051a39Sopenharmony_ci sub $rounds0,$rounds0,#2 2408e1051a39Sopenharmony_ci vld1.32 {q10-q11},[$key_],#32 2409e1051a39Sopenharmony_ci vld1.32 {q12-q13},[$key_],#32 2410e1051a39Sopenharmony_ci vld1.32 {q14-q15},[$key_],#32 2411e1051a39Sopenharmony_ci vld1.32 {$rndlast},[$key_] 2412e1051a39Sopenharmony_ci 2413e1051a39Sopenharmony_ci add $key_,$key1,#32 2414e1051a39Sopenharmony_ci mov $rounds,$rounds0 2415e1051a39Sopenharmony_ci 2416e1051a39Sopenharmony_ci // Encryption 2417e1051a39Sopenharmony_ci.Lxts_enc: 2418e1051a39Sopenharmony_ci vld1.8 {$dat2},[$inp],#16 2419e1051a39Sopenharmony_ci subs $len,$len,#32 // bias 2420e1051a39Sopenharmony_ci add $rounds,$rounds0,#2 2421e1051a39Sopenharmony_ci vorr $in1,$dat,$dat 2422e1051a39Sopenharmony_ci vorr $dat1,$dat,$dat 2423e1051a39Sopenharmony_ci vorr $in3,$dat,$dat 2424e1051a39Sopenharmony_ci vorr $in2,$dat2,$dat2 2425e1051a39Sopenharmony_ci vorr $in4,$dat2,$dat2 2426e1051a39Sopenharmony_ci b.lo .Lxts_inner_enc_tail 2427e1051a39Sopenharmony_ci veor $dat,$dat,$iv0 // before encryption, xor with iv 2428e1051a39Sopenharmony_ci veor $dat2,$dat2,$iv1 2429e1051a39Sopenharmony_ci 2430e1051a39Sopenharmony_ci // The iv for third block 2431e1051a39Sopenharmony_ci extr $midnumx,$ivh,$ivh,#32 2432e1051a39Sopenharmony_ci extr $ivh,$ivh,$ivl,#63 2433e1051a39Sopenharmony_ci and $tmpmw,$constnum,$midnum,asr#31 2434e1051a39Sopenharmony_ci eor $ivl,$tmpmx,$ivl,lsl#1 2435e1051a39Sopenharmony_ci fmov $ivd20,$ivl 2436e1051a39Sopenharmony_ci fmov $ivd21,$ivh 2437e1051a39Sopenharmony_ci 2438e1051a39Sopenharmony_ci 2439e1051a39Sopenharmony_ci vorr $dat1,$dat2,$dat2 2440e1051a39Sopenharmony_ci vld1.8 {$dat2},[$inp],#16 2441e1051a39Sopenharmony_ci vorr $in0,$dat,$dat 2442e1051a39Sopenharmony_ci vorr $in1,$dat1,$dat1 2443e1051a39Sopenharmony_ci veor $in2,$dat2,$iv2 // the third block 2444e1051a39Sopenharmony_ci veor $dat2,$dat2,$iv2 2445e1051a39Sopenharmony_ci cmp $len,#32 2446e1051a39Sopenharmony_ci b.lo .Lxts_outer_enc_tail 2447e1051a39Sopenharmony_ci 2448e1051a39Sopenharmony_ci // The iv for fourth block 2449e1051a39Sopenharmony_ci extr $midnumx,$ivh,$ivh,#32 2450e1051a39Sopenharmony_ci extr $ivh,$ivh,$ivl,#63 2451e1051a39Sopenharmony_ci and $tmpmw,$constnum,$midnum,asr#31 2452e1051a39Sopenharmony_ci eor $ivl,$tmpmx,$ivl,lsl#1 2453e1051a39Sopenharmony_ci fmov $ivd30,$ivl 2454e1051a39Sopenharmony_ci fmov $ivd31,$ivh 2455e1051a39Sopenharmony_ci 2456e1051a39Sopenharmony_ci vld1.8 {$dat3},[$inp],#16 2457e1051a39Sopenharmony_ci // The iv for fifth block 2458e1051a39Sopenharmony_ci extr $midnumx,$ivh,$ivh,#32 2459e1051a39Sopenharmony_ci extr $ivh,$ivh,$ivl,#63 2460e1051a39Sopenharmony_ci and $tmpmw,$constnum,$midnum,asr#31 2461e1051a39Sopenharmony_ci eor $ivl,$tmpmx,$ivl,lsl#1 2462e1051a39Sopenharmony_ci fmov $ivd40,$ivl 2463e1051a39Sopenharmony_ci fmov $ivd41,$ivh 2464e1051a39Sopenharmony_ci 2465e1051a39Sopenharmony_ci vld1.8 {$dat4},[$inp],#16 2466e1051a39Sopenharmony_ci veor $dat3,$dat3,$iv3 // the fourth block 2467e1051a39Sopenharmony_ci veor $dat4,$dat4,$iv4 2468e1051a39Sopenharmony_ci sub $len,$len,#32 // bias 2469e1051a39Sopenharmony_ci mov $rounds,$rounds0 2470e1051a39Sopenharmony_ci b .Loop5x_xts_enc 2471e1051a39Sopenharmony_ci 2472e1051a39Sopenharmony_ci.align 4 2473e1051a39Sopenharmony_ci.Loop5x_xts_enc: 2474e1051a39Sopenharmony_ci aese $dat0,q8 2475e1051a39Sopenharmony_ci aesmc $dat0,$dat0 2476e1051a39Sopenharmony_ci aese $dat1,q8 2477e1051a39Sopenharmony_ci aesmc $dat1,$dat1 2478e1051a39Sopenharmony_ci aese $dat2,q8 2479e1051a39Sopenharmony_ci aesmc $dat2,$dat2 2480e1051a39Sopenharmony_ci aese $dat3,q8 2481e1051a39Sopenharmony_ci aesmc $dat3,$dat3 2482e1051a39Sopenharmony_ci aese $dat4,q8 2483e1051a39Sopenharmony_ci aesmc $dat4,$dat4 2484e1051a39Sopenharmony_ci vld1.32 {q8},[$key_],#16 2485e1051a39Sopenharmony_ci subs $rounds,$rounds,#2 2486e1051a39Sopenharmony_ci aese $dat0,q9 2487e1051a39Sopenharmony_ci aesmc $dat0,$dat0 2488e1051a39Sopenharmony_ci aese $dat1,q9 2489e1051a39Sopenharmony_ci aesmc $dat1,$dat1 2490e1051a39Sopenharmony_ci aese $dat2,q9 2491e1051a39Sopenharmony_ci aesmc $dat2,$dat2 2492e1051a39Sopenharmony_ci aese $dat3,q9 2493e1051a39Sopenharmony_ci aesmc $dat3,$dat3 2494e1051a39Sopenharmony_ci aese $dat4,q9 2495e1051a39Sopenharmony_ci aesmc $dat4,$dat4 2496e1051a39Sopenharmony_ci vld1.32 {q9},[$key_],#16 2497e1051a39Sopenharmony_ci b.gt .Loop5x_xts_enc 2498e1051a39Sopenharmony_ci 2499e1051a39Sopenharmony_ci aese $dat0,q8 2500e1051a39Sopenharmony_ci aesmc $dat0,$dat0 2501e1051a39Sopenharmony_ci aese $dat1,q8 2502e1051a39Sopenharmony_ci aesmc $dat1,$dat1 2503e1051a39Sopenharmony_ci aese $dat2,q8 2504e1051a39Sopenharmony_ci aesmc $dat2,$dat2 2505e1051a39Sopenharmony_ci aese $dat3,q8 2506e1051a39Sopenharmony_ci aesmc $dat3,$dat3 2507e1051a39Sopenharmony_ci aese $dat4,q8 2508e1051a39Sopenharmony_ci aesmc $dat4,$dat4 2509e1051a39Sopenharmony_ci subs $len,$len,#0x50 // because .Lxts_enc_tail4x 2510e1051a39Sopenharmony_ci 2511e1051a39Sopenharmony_ci aese $dat0,q9 2512e1051a39Sopenharmony_ci aesmc $dat0,$dat0 2513e1051a39Sopenharmony_ci aese $dat1,q9 2514e1051a39Sopenharmony_ci aesmc $dat1,$dat1 2515e1051a39Sopenharmony_ci aese $dat2,q9 2516e1051a39Sopenharmony_ci aesmc $dat2,$dat2 2517e1051a39Sopenharmony_ci aese $dat3,q9 2518e1051a39Sopenharmony_ci aesmc $dat3,$dat3 2519e1051a39Sopenharmony_ci aese $dat4,q9 2520e1051a39Sopenharmony_ci aesmc $dat4,$dat4 2521e1051a39Sopenharmony_ci csel $xoffset,xzr,$len,gt // borrow x6, w6, "gt" is not typo 2522e1051a39Sopenharmony_ci mov $key_,$key1 2523e1051a39Sopenharmony_ci 2524e1051a39Sopenharmony_ci aese $dat0,q10 2525e1051a39Sopenharmony_ci aesmc $dat0,$dat0 2526e1051a39Sopenharmony_ci aese $dat1,q10 2527e1051a39Sopenharmony_ci aesmc $dat1,$dat1 2528e1051a39Sopenharmony_ci aese $dat2,q10 2529e1051a39Sopenharmony_ci aesmc $dat2,$dat2 2530e1051a39Sopenharmony_ci aese $dat3,q10 2531e1051a39Sopenharmony_ci aesmc $dat3,$dat3 2532e1051a39Sopenharmony_ci aese $dat4,q10 2533e1051a39Sopenharmony_ci aesmc $dat4,$dat4 2534e1051a39Sopenharmony_ci add $inp,$inp,$xoffset // x0 is adjusted in such way that 2535e1051a39Sopenharmony_ci // at exit from the loop v1.16b-v26.16b 2536e1051a39Sopenharmony_ci // are loaded with last "words" 2537e1051a39Sopenharmony_ci add $xoffset,$len,#0x60 // because .Lxts_enc_tail4x 2538e1051a39Sopenharmony_ci 2539e1051a39Sopenharmony_ci aese $dat0,q11 2540e1051a39Sopenharmony_ci aesmc $dat0,$dat0 2541e1051a39Sopenharmony_ci aese $dat1,q11 2542e1051a39Sopenharmony_ci aesmc $dat1,$dat1 2543e1051a39Sopenharmony_ci aese $dat2,q11 2544e1051a39Sopenharmony_ci aesmc $dat2,$dat2 2545e1051a39Sopenharmony_ci aese $dat3,q11 2546e1051a39Sopenharmony_ci aesmc $dat3,$dat3 2547e1051a39Sopenharmony_ci aese $dat4,q11 2548e1051a39Sopenharmony_ci aesmc $dat4,$dat4 2549e1051a39Sopenharmony_ci 2550e1051a39Sopenharmony_ci aese $dat0,q12 2551e1051a39Sopenharmony_ci aesmc $dat0,$dat0 2552e1051a39Sopenharmony_ci aese $dat1,q12 2553e1051a39Sopenharmony_ci aesmc $dat1,$dat1 2554e1051a39Sopenharmony_ci aese $dat2,q12 2555e1051a39Sopenharmony_ci aesmc $dat2,$dat2 2556e1051a39Sopenharmony_ci aese $dat3,q12 2557e1051a39Sopenharmony_ci aesmc $dat3,$dat3 2558e1051a39Sopenharmony_ci aese $dat4,q12 2559e1051a39Sopenharmony_ci aesmc $dat4,$dat4 2560e1051a39Sopenharmony_ci 2561e1051a39Sopenharmony_ci aese $dat0,q13 2562e1051a39Sopenharmony_ci aesmc $dat0,$dat0 2563e1051a39Sopenharmony_ci aese $dat1,q13 2564e1051a39Sopenharmony_ci aesmc $dat1,$dat1 2565e1051a39Sopenharmony_ci aese $dat2,q13 2566e1051a39Sopenharmony_ci aesmc $dat2,$dat2 2567e1051a39Sopenharmony_ci aese $dat3,q13 2568e1051a39Sopenharmony_ci aesmc $dat3,$dat3 2569e1051a39Sopenharmony_ci aese $dat4,q13 2570e1051a39Sopenharmony_ci aesmc $dat4,$dat4 2571e1051a39Sopenharmony_ci 2572e1051a39Sopenharmony_ci aese $dat0,q14 2573e1051a39Sopenharmony_ci aesmc $dat0,$dat0 2574e1051a39Sopenharmony_ci aese $dat1,q14 2575e1051a39Sopenharmony_ci aesmc $dat1,$dat1 2576e1051a39Sopenharmony_ci aese $dat2,q14 2577e1051a39Sopenharmony_ci aesmc $dat2,$dat2 2578e1051a39Sopenharmony_ci aese $dat3,q14 2579e1051a39Sopenharmony_ci aesmc $dat3,$dat3 2580e1051a39Sopenharmony_ci aese $dat4,q14 2581e1051a39Sopenharmony_ci aesmc $dat4,$dat4 2582e1051a39Sopenharmony_ci 2583e1051a39Sopenharmony_ci veor $tmp0,$rndlast,$iv0 2584e1051a39Sopenharmony_ci aese $dat0,q15 2585e1051a39Sopenharmony_ci // The iv for first block of one iteration 2586e1051a39Sopenharmony_ci extr $midnumx,$ivh,$ivh,#32 2587e1051a39Sopenharmony_ci extr $ivh,$ivh,$ivl,#63 2588e1051a39Sopenharmony_ci and $tmpmw,$constnum,$midnum,asr#31 2589e1051a39Sopenharmony_ci eor $ivl,$tmpmx,$ivl,lsl#1 2590e1051a39Sopenharmony_ci fmov $ivd00,$ivl 2591e1051a39Sopenharmony_ci fmov $ivd01,$ivh 2592e1051a39Sopenharmony_ci veor $tmp1,$rndlast,$iv1 2593e1051a39Sopenharmony_ci vld1.8 {$in0},[$inp],#16 2594e1051a39Sopenharmony_ci aese $dat1,q15 2595e1051a39Sopenharmony_ci // The iv for second block 2596e1051a39Sopenharmony_ci extr $midnumx,$ivh,$ivh,#32 2597e1051a39Sopenharmony_ci extr $ivh,$ivh,$ivl,#63 2598e1051a39Sopenharmony_ci and $tmpmw,$constnum,$midnum,asr#31 2599e1051a39Sopenharmony_ci eor $ivl,$tmpmx,$ivl,lsl#1 2600e1051a39Sopenharmony_ci fmov $ivd10,$ivl 2601e1051a39Sopenharmony_ci fmov $ivd11,$ivh 2602e1051a39Sopenharmony_ci veor $tmp2,$rndlast,$iv2 2603e1051a39Sopenharmony_ci vld1.8 {$in1},[$inp],#16 2604e1051a39Sopenharmony_ci aese $dat2,q15 2605e1051a39Sopenharmony_ci // The iv for third block 2606e1051a39Sopenharmony_ci extr $midnumx,$ivh,$ivh,#32 2607e1051a39Sopenharmony_ci extr $ivh,$ivh,$ivl,#63 2608e1051a39Sopenharmony_ci and $tmpmw,$constnum,$midnum,asr#31 2609e1051a39Sopenharmony_ci eor $ivl,$tmpmx,$ivl,lsl#1 2610e1051a39Sopenharmony_ci fmov $ivd20,$ivl 2611e1051a39Sopenharmony_ci fmov $ivd21,$ivh 2612e1051a39Sopenharmony_ci veor $tmp3,$rndlast,$iv3 2613e1051a39Sopenharmony_ci vld1.8 {$in2},[$inp],#16 2614e1051a39Sopenharmony_ci aese $dat3,q15 2615e1051a39Sopenharmony_ci // The iv for fourth block 2616e1051a39Sopenharmony_ci extr $midnumx,$ivh,$ivh,#32 2617e1051a39Sopenharmony_ci extr $ivh,$ivh,$ivl,#63 2618e1051a39Sopenharmony_ci and $tmpmw,$constnum,$midnum,asr#31 2619e1051a39Sopenharmony_ci eor $ivl,$tmpmx,$ivl,lsl#1 2620e1051a39Sopenharmony_ci fmov $ivd30,$ivl 2621e1051a39Sopenharmony_ci fmov $ivd31,$ivh 2622e1051a39Sopenharmony_ci veor $tmp4,$rndlast,$iv4 2623e1051a39Sopenharmony_ci vld1.8 {$in3},[$inp],#16 2624e1051a39Sopenharmony_ci aese $dat4,q15 2625e1051a39Sopenharmony_ci 2626e1051a39Sopenharmony_ci // The iv for fifth block 2627e1051a39Sopenharmony_ci extr $midnumx,$ivh,$ivh,#32 2628e1051a39Sopenharmony_ci extr $ivh,$ivh,$ivl,#63 2629e1051a39Sopenharmony_ci and $tmpmw,$constnum,$midnum,asr #31 2630e1051a39Sopenharmony_ci eor $ivl,$tmpmx,$ivl,lsl #1 2631e1051a39Sopenharmony_ci fmov $ivd40,$ivl 2632e1051a39Sopenharmony_ci fmov $ivd41,$ivh 2633e1051a39Sopenharmony_ci 2634e1051a39Sopenharmony_ci vld1.8 {$in4},[$inp],#16 2635e1051a39Sopenharmony_ci cbz $xoffset,.Lxts_enc_tail4x 2636e1051a39Sopenharmony_ci vld1.32 {q8},[$key_],#16 // re-pre-load rndkey[0] 2637e1051a39Sopenharmony_ci veor $tmp0,$tmp0,$dat0 2638e1051a39Sopenharmony_ci veor $dat0,$in0,$iv0 2639e1051a39Sopenharmony_ci veor $tmp1,$tmp1,$dat1 2640e1051a39Sopenharmony_ci veor $dat1,$in1,$iv1 2641e1051a39Sopenharmony_ci veor $tmp2,$tmp2,$dat2 2642e1051a39Sopenharmony_ci veor $dat2,$in2,$iv2 2643e1051a39Sopenharmony_ci veor $tmp3,$tmp3,$dat3 2644e1051a39Sopenharmony_ci veor $dat3,$in3,$iv3 2645e1051a39Sopenharmony_ci veor $tmp4,$tmp4,$dat4 2646e1051a39Sopenharmony_ci vst1.8 {$tmp0},[$out],#16 2647e1051a39Sopenharmony_ci veor $dat4,$in4,$iv4 2648e1051a39Sopenharmony_ci vst1.8 {$tmp1},[$out],#16 2649e1051a39Sopenharmony_ci mov $rounds,$rounds0 2650e1051a39Sopenharmony_ci vst1.8 {$tmp2},[$out],#16 2651e1051a39Sopenharmony_ci vld1.32 {q9},[$key_],#16 // re-pre-load rndkey[1] 2652e1051a39Sopenharmony_ci vst1.8 {$tmp3},[$out],#16 2653e1051a39Sopenharmony_ci vst1.8 {$tmp4},[$out],#16 2654e1051a39Sopenharmony_ci b.hs .Loop5x_xts_enc 2655e1051a39Sopenharmony_ci 2656e1051a39Sopenharmony_ci 2657e1051a39Sopenharmony_ci // If left 4 blocks, borrow the five block's processing. 2658e1051a39Sopenharmony_ci cmn $len,#0x10 2659e1051a39Sopenharmony_ci b.ne .Loop5x_enc_after 2660e1051a39Sopenharmony_ci vorr $iv4,$iv3,$iv3 2661e1051a39Sopenharmony_ci vorr $iv3,$iv2,$iv2 2662e1051a39Sopenharmony_ci vorr $iv2,$iv1,$iv1 2663e1051a39Sopenharmony_ci vorr $iv1,$iv0,$iv0 2664e1051a39Sopenharmony_ci fmov $ivl,$ivd40 2665e1051a39Sopenharmony_ci fmov $ivh,$ivd41 2666e1051a39Sopenharmony_ci veor $dat0,$iv0,$in0 2667e1051a39Sopenharmony_ci veor $dat1,$iv1,$in1 2668e1051a39Sopenharmony_ci veor $dat2,$in2,$iv2 2669e1051a39Sopenharmony_ci veor $dat3,$in3,$iv3 2670e1051a39Sopenharmony_ci veor $dat4,$in4,$iv4 2671e1051a39Sopenharmony_ci b.eq .Loop5x_xts_enc 2672e1051a39Sopenharmony_ci 2673e1051a39Sopenharmony_ci.Loop5x_enc_after: 2674e1051a39Sopenharmony_ci add $len,$len,#0x50 2675e1051a39Sopenharmony_ci cbz $len,.Lxts_enc_done 2676e1051a39Sopenharmony_ci 2677e1051a39Sopenharmony_ci add $rounds,$rounds0,#2 2678e1051a39Sopenharmony_ci subs $len,$len,#0x30 2679e1051a39Sopenharmony_ci b.lo .Lxts_inner_enc_tail 2680e1051a39Sopenharmony_ci 2681e1051a39Sopenharmony_ci veor $dat0,$iv0,$in2 2682e1051a39Sopenharmony_ci veor $dat1,$iv1,$in3 2683e1051a39Sopenharmony_ci veor $dat2,$in4,$iv2 2684e1051a39Sopenharmony_ci b .Lxts_outer_enc_tail 2685e1051a39Sopenharmony_ci 2686e1051a39Sopenharmony_ci.align 4 2687e1051a39Sopenharmony_ci.Lxts_enc_tail4x: 2688e1051a39Sopenharmony_ci add $inp,$inp,#16 2689e1051a39Sopenharmony_ci veor $tmp1,$dat1,$tmp1 2690e1051a39Sopenharmony_ci vst1.8 {$tmp1},[$out],#16 2691e1051a39Sopenharmony_ci veor $tmp2,$dat2,$tmp2 2692e1051a39Sopenharmony_ci vst1.8 {$tmp2},[$out],#16 2693e1051a39Sopenharmony_ci veor $tmp3,$dat3,$tmp3 2694e1051a39Sopenharmony_ci veor $tmp4,$dat4,$tmp4 2695e1051a39Sopenharmony_ci vst1.8 {$tmp3-$tmp4},[$out],#32 2696e1051a39Sopenharmony_ci 2697e1051a39Sopenharmony_ci b .Lxts_enc_done 2698e1051a39Sopenharmony_ci.align 4 2699e1051a39Sopenharmony_ci.Lxts_outer_enc_tail: 2700e1051a39Sopenharmony_ci aese $dat0,q8 2701e1051a39Sopenharmony_ci aesmc $dat0,$dat0 2702e1051a39Sopenharmony_ci aese $dat1,q8 2703e1051a39Sopenharmony_ci aesmc $dat1,$dat1 2704e1051a39Sopenharmony_ci aese $dat2,q8 2705e1051a39Sopenharmony_ci aesmc $dat2,$dat2 2706e1051a39Sopenharmony_ci vld1.32 {q8},[$key_],#16 2707e1051a39Sopenharmony_ci subs $rounds,$rounds,#2 2708e1051a39Sopenharmony_ci aese $dat0,q9 2709e1051a39Sopenharmony_ci aesmc $dat0,$dat0 2710e1051a39Sopenharmony_ci aese $dat1,q9 2711e1051a39Sopenharmony_ci aesmc $dat1,$dat1 2712e1051a39Sopenharmony_ci aese $dat2,q9 2713e1051a39Sopenharmony_ci aesmc $dat2,$dat2 2714e1051a39Sopenharmony_ci vld1.32 {q9},[$key_],#16 2715e1051a39Sopenharmony_ci b.gt .Lxts_outer_enc_tail 2716e1051a39Sopenharmony_ci 2717e1051a39Sopenharmony_ci aese $dat0,q8 2718e1051a39Sopenharmony_ci aesmc $dat0,$dat0 2719e1051a39Sopenharmony_ci aese $dat1,q8 2720e1051a39Sopenharmony_ci aesmc $dat1,$dat1 2721e1051a39Sopenharmony_ci aese $dat2,q8 2722e1051a39Sopenharmony_ci aesmc $dat2,$dat2 2723e1051a39Sopenharmony_ci veor $tmp0,$iv0,$rndlast 2724e1051a39Sopenharmony_ci subs $len,$len,#0x30 2725e1051a39Sopenharmony_ci // The iv for first block 2726e1051a39Sopenharmony_ci fmov $ivl,$ivd20 2727e1051a39Sopenharmony_ci fmov $ivh,$ivd21 2728e1051a39Sopenharmony_ci //mov $constnum,#0x87 2729e1051a39Sopenharmony_ci extr $midnumx,$ivh,$ivh,#32 2730e1051a39Sopenharmony_ci extr $ivh,$ivh,$ivl,#63 2731e1051a39Sopenharmony_ci and $tmpmw,$constnum,$midnum,asr#31 2732e1051a39Sopenharmony_ci eor $ivl,$tmpmx,$ivl,lsl#1 2733e1051a39Sopenharmony_ci fmov $ivd00,$ivl 2734e1051a39Sopenharmony_ci fmov $ivd01,$ivh 2735e1051a39Sopenharmony_ci veor $tmp1,$iv1,$rndlast 2736e1051a39Sopenharmony_ci csel $xoffset,$len,$xoffset,lo // x6, w6, is zero at this point 2737e1051a39Sopenharmony_ci aese $dat0,q9 2738e1051a39Sopenharmony_ci aesmc $dat0,$dat0 2739e1051a39Sopenharmony_ci aese $dat1,q9 2740e1051a39Sopenharmony_ci aesmc $dat1,$dat1 2741e1051a39Sopenharmony_ci aese $dat2,q9 2742e1051a39Sopenharmony_ci aesmc $dat2,$dat2 2743e1051a39Sopenharmony_ci veor $tmp2,$iv2,$rndlast 2744e1051a39Sopenharmony_ci 2745e1051a39Sopenharmony_ci add $xoffset,$xoffset,#0x20 2746e1051a39Sopenharmony_ci add $inp,$inp,$xoffset 2747e1051a39Sopenharmony_ci mov $key_,$key1 2748e1051a39Sopenharmony_ci 2749e1051a39Sopenharmony_ci aese $dat0,q12 2750e1051a39Sopenharmony_ci aesmc $dat0,$dat0 2751e1051a39Sopenharmony_ci aese $dat1,q12 2752e1051a39Sopenharmony_ci aesmc $dat1,$dat1 2753e1051a39Sopenharmony_ci aese $dat2,q12 2754e1051a39Sopenharmony_ci aesmc $dat2,$dat2 2755e1051a39Sopenharmony_ci aese $dat0,q13 2756e1051a39Sopenharmony_ci aesmc $dat0,$dat0 2757e1051a39Sopenharmony_ci aese $dat1,q13 2758e1051a39Sopenharmony_ci aesmc $dat1,$dat1 2759e1051a39Sopenharmony_ci aese $dat2,q13 2760e1051a39Sopenharmony_ci aesmc $dat2,$dat2 2761e1051a39Sopenharmony_ci aese $dat0,q14 2762e1051a39Sopenharmony_ci aesmc $dat0,$dat0 2763e1051a39Sopenharmony_ci aese $dat1,q14 2764e1051a39Sopenharmony_ci aesmc $dat1,$dat1 2765e1051a39Sopenharmony_ci aese $dat2,q14 2766e1051a39Sopenharmony_ci aesmc $dat2,$dat2 2767e1051a39Sopenharmony_ci aese $dat0,q15 2768e1051a39Sopenharmony_ci aese $dat1,q15 2769e1051a39Sopenharmony_ci aese $dat2,q15 2770e1051a39Sopenharmony_ci vld1.8 {$in2},[$inp],#16 2771e1051a39Sopenharmony_ci add $rounds,$rounds0,#2 2772e1051a39Sopenharmony_ci vld1.32 {q8},[$key_],#16 // re-pre-load rndkey[0] 2773e1051a39Sopenharmony_ci veor $tmp0,$tmp0,$dat0 2774e1051a39Sopenharmony_ci veor $tmp1,$tmp1,$dat1 2775e1051a39Sopenharmony_ci veor $dat2,$dat2,$tmp2 2776e1051a39Sopenharmony_ci vld1.32 {q9},[$key_],#16 // re-pre-load rndkey[1] 2777e1051a39Sopenharmony_ci vst1.8 {$tmp0},[$out],#16 2778e1051a39Sopenharmony_ci vst1.8 {$tmp1},[$out],#16 2779e1051a39Sopenharmony_ci vst1.8 {$dat2},[$out],#16 2780e1051a39Sopenharmony_ci cmn $len,#0x30 2781e1051a39Sopenharmony_ci b.eq .Lxts_enc_done 2782e1051a39Sopenharmony_ci.Lxts_encxor_one: 2783e1051a39Sopenharmony_ci vorr $in3,$in1,$in1 2784e1051a39Sopenharmony_ci vorr $in4,$in2,$in2 2785e1051a39Sopenharmony_ci nop 2786e1051a39Sopenharmony_ci 2787e1051a39Sopenharmony_ci.Lxts_inner_enc_tail: 2788e1051a39Sopenharmony_ci cmn $len,#0x10 2789e1051a39Sopenharmony_ci veor $dat1,$in3,$iv0 2790e1051a39Sopenharmony_ci veor $dat2,$in4,$iv1 2791e1051a39Sopenharmony_ci b.eq .Lxts_enc_tail_loop 2792e1051a39Sopenharmony_ci veor $dat2,$in4,$iv0 2793e1051a39Sopenharmony_ci.Lxts_enc_tail_loop: 2794e1051a39Sopenharmony_ci aese $dat1,q8 2795e1051a39Sopenharmony_ci aesmc $dat1,$dat1 2796e1051a39Sopenharmony_ci aese $dat2,q8 2797e1051a39Sopenharmony_ci aesmc $dat2,$dat2 2798e1051a39Sopenharmony_ci vld1.32 {q8},[$key_],#16 2799e1051a39Sopenharmony_ci subs $rounds,$rounds,#2 2800e1051a39Sopenharmony_ci aese $dat1,q9 2801e1051a39Sopenharmony_ci aesmc $dat1,$dat1 2802e1051a39Sopenharmony_ci aese $dat2,q9 2803e1051a39Sopenharmony_ci aesmc $dat2,$dat2 2804e1051a39Sopenharmony_ci vld1.32 {q9},[$key_],#16 2805e1051a39Sopenharmony_ci b.gt .Lxts_enc_tail_loop 2806e1051a39Sopenharmony_ci 2807e1051a39Sopenharmony_ci aese $dat1,q8 2808e1051a39Sopenharmony_ci aesmc $dat1,$dat1 2809e1051a39Sopenharmony_ci aese $dat2,q8 2810e1051a39Sopenharmony_ci aesmc $dat2,$dat2 2811e1051a39Sopenharmony_ci aese $dat1,q9 2812e1051a39Sopenharmony_ci aesmc $dat1,$dat1 2813e1051a39Sopenharmony_ci aese $dat2,q9 2814e1051a39Sopenharmony_ci aesmc $dat2,$dat2 2815e1051a39Sopenharmony_ci aese $dat1,q12 2816e1051a39Sopenharmony_ci aesmc $dat1,$dat1 2817e1051a39Sopenharmony_ci aese $dat2,q12 2818e1051a39Sopenharmony_ci aesmc $dat2,$dat2 2819e1051a39Sopenharmony_ci cmn $len,#0x20 2820e1051a39Sopenharmony_ci aese $dat1,q13 2821e1051a39Sopenharmony_ci aesmc $dat1,$dat1 2822e1051a39Sopenharmony_ci aese $dat2,q13 2823e1051a39Sopenharmony_ci aesmc $dat2,$dat2 2824e1051a39Sopenharmony_ci veor $tmp1,$iv0,$rndlast 2825e1051a39Sopenharmony_ci aese $dat1,q14 2826e1051a39Sopenharmony_ci aesmc $dat1,$dat1 2827e1051a39Sopenharmony_ci aese $dat2,q14 2828e1051a39Sopenharmony_ci aesmc $dat2,$dat2 2829e1051a39Sopenharmony_ci veor $tmp2,$iv1,$rndlast 2830e1051a39Sopenharmony_ci aese $dat1,q15 2831e1051a39Sopenharmony_ci aese $dat2,q15 2832e1051a39Sopenharmony_ci b.eq .Lxts_enc_one 2833e1051a39Sopenharmony_ci veor $tmp1,$tmp1,$dat1 2834e1051a39Sopenharmony_ci vst1.8 {$tmp1},[$out],#16 2835e1051a39Sopenharmony_ci veor $tmp2,$tmp2,$dat2 2836e1051a39Sopenharmony_ci vorr $iv0,$iv1,$iv1 2837e1051a39Sopenharmony_ci vst1.8 {$tmp2},[$out],#16 2838e1051a39Sopenharmony_ci fmov $ivl,$ivd10 2839e1051a39Sopenharmony_ci fmov $ivh,$ivd11 2840e1051a39Sopenharmony_ci mov $constnum,#0x87 2841e1051a39Sopenharmony_ci extr $midnumx,$ivh,$ivh,#32 2842e1051a39Sopenharmony_ci extr $ivh,$ivh,$ivl,#63 2843e1051a39Sopenharmony_ci and $tmpmw,$constnum,$midnum,asr #31 2844e1051a39Sopenharmony_ci eor $ivl,$tmpmx,$ivl,lsl #1 2845e1051a39Sopenharmony_ci fmov $ivd00,$ivl 2846e1051a39Sopenharmony_ci fmov $ivd01,$ivh 2847e1051a39Sopenharmony_ci b .Lxts_enc_done 2848e1051a39Sopenharmony_ci 2849e1051a39Sopenharmony_ci.Lxts_enc_one: 2850e1051a39Sopenharmony_ci veor $tmp1,$tmp1,$dat2 2851e1051a39Sopenharmony_ci vorr $iv0,$iv0,$iv0 2852e1051a39Sopenharmony_ci vst1.8 {$tmp1},[$out],#16 2853e1051a39Sopenharmony_ci fmov $ivl,$ivd00 2854e1051a39Sopenharmony_ci fmov $ivh,$ivd01 2855e1051a39Sopenharmony_ci mov $constnum,#0x87 2856e1051a39Sopenharmony_ci extr $midnumx,$ivh,$ivh,#32 2857e1051a39Sopenharmony_ci extr $ivh,$ivh,$ivl,#63 2858e1051a39Sopenharmony_ci and $tmpmw,$constnum,$midnum,asr #31 2859e1051a39Sopenharmony_ci eor $ivl,$tmpmx,$ivl,lsl #1 2860e1051a39Sopenharmony_ci fmov $ivd00,$ivl 2861e1051a39Sopenharmony_ci fmov $ivd01,$ivh 2862e1051a39Sopenharmony_ci b .Lxts_enc_done 2863e1051a39Sopenharmony_ci.align 5 2864e1051a39Sopenharmony_ci.Lxts_enc_done: 2865e1051a39Sopenharmony_ci // Process the tail block with cipher stealing. 2866e1051a39Sopenharmony_ci tst $tailcnt,#0xf 2867e1051a39Sopenharmony_ci b.eq .Lxts_abort 2868e1051a39Sopenharmony_ci 2869e1051a39Sopenharmony_ci mov $tmpinp,$inp 2870e1051a39Sopenharmony_ci mov $tmpoutp,$out 2871e1051a39Sopenharmony_ci sub $out,$out,#16 2872e1051a39Sopenharmony_ci.composite_enc_loop: 2873e1051a39Sopenharmony_ci subs $tailcnt,$tailcnt,#1 2874e1051a39Sopenharmony_ci ldrb $l2outp,[$out,$tailcnt] 2875e1051a39Sopenharmony_ci ldrb $loutp,[$tmpinp,$tailcnt] 2876e1051a39Sopenharmony_ci strb $l2outp,[$tmpoutp,$tailcnt] 2877e1051a39Sopenharmony_ci strb $loutp,[$out,$tailcnt] 2878e1051a39Sopenharmony_ci b.gt .composite_enc_loop 2879e1051a39Sopenharmony_ci.Lxts_enc_load_done: 2880e1051a39Sopenharmony_ci vld1.8 {$tmpin},[$out] 2881e1051a39Sopenharmony_ci veor $tmpin,$tmpin,$iv0 2882e1051a39Sopenharmony_ci 2883e1051a39Sopenharmony_ci // Encrypt the composite block to get the last second encrypted text block 2884e1051a39Sopenharmony_ci ldr $rounds,[$key1,#240] // load key schedule... 2885e1051a39Sopenharmony_ci vld1.32 {$dat},[$key1],#16 2886e1051a39Sopenharmony_ci sub $rounds,$rounds,#2 2887e1051a39Sopenharmony_ci vld1.32 {$dat1},[$key1],#16 // load key schedule... 2888e1051a39Sopenharmony_ci.Loop_final_enc: 2889e1051a39Sopenharmony_ci aese $tmpin,$dat0 2890e1051a39Sopenharmony_ci aesmc $tmpin,$tmpin 2891e1051a39Sopenharmony_ci vld1.32 {$dat0},[$key1],#16 2892e1051a39Sopenharmony_ci subs $rounds,$rounds,#2 2893e1051a39Sopenharmony_ci aese $tmpin,$dat1 2894e1051a39Sopenharmony_ci aesmc $tmpin,$tmpin 2895e1051a39Sopenharmony_ci vld1.32 {$dat1},[$key1],#16 2896e1051a39Sopenharmony_ci b.gt .Loop_final_enc 2897e1051a39Sopenharmony_ci 2898e1051a39Sopenharmony_ci aese $tmpin,$dat0 2899e1051a39Sopenharmony_ci aesmc $tmpin,$tmpin 2900e1051a39Sopenharmony_ci vld1.32 {$dat0},[$key1] 2901e1051a39Sopenharmony_ci aese $tmpin,$dat1 2902e1051a39Sopenharmony_ci veor $tmpin,$tmpin,$dat0 2903e1051a39Sopenharmony_ci veor $tmpin,$tmpin,$iv0 2904e1051a39Sopenharmony_ci vst1.8 {$tmpin},[$out] 2905e1051a39Sopenharmony_ci 2906e1051a39Sopenharmony_ci.Lxts_abort: 2907e1051a39Sopenharmony_ci ldp $tailcnt,$midnumx,[sp,#48] 2908e1051a39Sopenharmony_ci ldp $ivd10,$ivd20,[sp,#32] 2909e1051a39Sopenharmony_ci ldp $ivd30,$ivd40,[sp,#16] 2910e1051a39Sopenharmony_ci ldp $constnumx,$tmpinp,[sp],#64 2911e1051a39Sopenharmony_ci.Lxts_enc_final_abort: 2912e1051a39Sopenharmony_ci ret 2913e1051a39Sopenharmony_ci.size ${prefix}_xts_encrypt,.-${prefix}_xts_encrypt 2914e1051a39Sopenharmony_ci___ 2915e1051a39Sopenharmony_ci 2916e1051a39Sopenharmony_ci}}} 2917e1051a39Sopenharmony_ci{{{ 2918e1051a39Sopenharmony_cimy ($inp,$out,$len,$key1,$key2,$ivp)=map("x$_",(0..5)); 2919e1051a39Sopenharmony_cimy ($rounds0,$rounds,$key_,$step,$ivl,$ivh)=("w5","w6","x7","x8","x9","x10"); 2920e1051a39Sopenharmony_cimy ($tmpoutp,$loutp,$l2outp,$tmpinp)=("x13","w14","w15","x20"); 2921e1051a39Sopenharmony_cimy ($tailcnt,$midnum,$midnumx,$constnum,$constnumx)=("x21","w22","x22","w19","x19"); 2922e1051a39Sopenharmony_cimy ($xoffset,$tmpmx,$tmpmw)=("x6","x11","w11"); 2923e1051a39Sopenharmony_cimy ($dat0,$dat1,$in0,$in1,$tmp0,$tmp1,$tmp2,$rndlast)=map("q$_",(0..7)); 2924e1051a39Sopenharmony_cimy ($iv0,$iv1,$iv2,$iv3,$iv4,$tmpin)=("v6.16b","v8.16b","v9.16b","v10.16b","v11.16b","v26.16b"); 2925e1051a39Sopenharmony_cimy ($ivd00,$ivd01,$ivd20,$ivd21)=("d6","v6.d[1]","d9","v9.d[1]"); 2926e1051a39Sopenharmony_cimy ($ivd10,$ivd11,$ivd30,$ivd31,$ivd40,$ivd41)=("d8","v8.d[1]","d10","v10.d[1]","d11","v11.d[1]"); 2927e1051a39Sopenharmony_ci 2928e1051a39Sopenharmony_cimy ($dat,$tmp,$rndzero_n_last)=($dat0,$tmp0,$tmp1); 2929e1051a39Sopenharmony_ci 2930e1051a39Sopenharmony_ci# q7 last round key 2931e1051a39Sopenharmony_ci# q10-q15, q7 Last 7 round keys 2932e1051a39Sopenharmony_ci# q8-q9 preloaded round keys except last 7 keys for big size 2933e1051a39Sopenharmony_ci# q20, q21, q8-q9 preloaded round keys except last 7 keys for only 16 byte 2934e1051a39Sopenharmony_ci 2935e1051a39Sopenharmony_ci{ 2936e1051a39Sopenharmony_cimy ($dat2,$in2,$tmp2)=map("q$_",(10,11,9)); 2937e1051a39Sopenharmony_ci 2938e1051a39Sopenharmony_cimy ($dat3,$in3,$tmp3); # used only in 64-bit mode 2939e1051a39Sopenharmony_cimy ($dat4,$in4,$tmp4); 2940e1051a39Sopenharmony_ciif ($flavour =~ /64/) { 2941e1051a39Sopenharmony_ci ($dat2,$dat3,$dat4,$in2,$in3,$in4,$tmp3,$tmp4)=map("q$_",(16..23)); 2942e1051a39Sopenharmony_ci} 2943e1051a39Sopenharmony_ci 2944e1051a39Sopenharmony_ci$code.=<<___ if ($flavour =~ /64/); 2945e1051a39Sopenharmony_ci.globl ${prefix}_xts_decrypt 2946e1051a39Sopenharmony_ci.type ${prefix}_xts_decrypt,%function 2947e1051a39Sopenharmony_ci.align 5 2948e1051a39Sopenharmony_ci${prefix}_xts_decrypt: 2949e1051a39Sopenharmony_ci___ 2950e1051a39Sopenharmony_ci$code.=<<___ if ($flavour =~ /64/); 2951e1051a39Sopenharmony_ci cmp $len,#16 2952e1051a39Sopenharmony_ci // Original input data size bigger than 16, jump to big size processing. 2953e1051a39Sopenharmony_ci b.ne .Lxts_dec_big_size 2954e1051a39Sopenharmony_ci // Encrypt the iv with key2, as the first XEX iv. 2955e1051a39Sopenharmony_ci ldr $rounds,[$key2,#240] 2956e1051a39Sopenharmony_ci vld1.32 {$dat},[$key2],#16 2957e1051a39Sopenharmony_ci vld1.8 {$iv0},[$ivp] 2958e1051a39Sopenharmony_ci sub $rounds,$rounds,#2 2959e1051a39Sopenharmony_ci vld1.32 {$dat1},[$key2],#16 2960e1051a39Sopenharmony_ci 2961e1051a39Sopenharmony_ci.Loop_dec_small_iv_enc: 2962e1051a39Sopenharmony_ci aese $iv0,$dat 2963e1051a39Sopenharmony_ci aesmc $iv0,$iv0 2964e1051a39Sopenharmony_ci vld1.32 {$dat},[$key2],#16 2965e1051a39Sopenharmony_ci subs $rounds,$rounds,#2 2966e1051a39Sopenharmony_ci aese $iv0,$dat1 2967e1051a39Sopenharmony_ci aesmc $iv0,$iv0 2968e1051a39Sopenharmony_ci vld1.32 {$dat1},[$key2],#16 2969e1051a39Sopenharmony_ci b.gt .Loop_dec_small_iv_enc 2970e1051a39Sopenharmony_ci 2971e1051a39Sopenharmony_ci aese $iv0,$dat 2972e1051a39Sopenharmony_ci aesmc $iv0,$iv0 2973e1051a39Sopenharmony_ci vld1.32 {$dat},[$key2] 2974e1051a39Sopenharmony_ci aese $iv0,$dat1 2975e1051a39Sopenharmony_ci veor $iv0,$iv0,$dat 2976e1051a39Sopenharmony_ci 2977e1051a39Sopenharmony_ci vld1.8 {$dat0},[$inp] 2978e1051a39Sopenharmony_ci veor $dat0,$iv0,$dat0 2979e1051a39Sopenharmony_ci 2980e1051a39Sopenharmony_ci ldr $rounds,[$key1,#240] 2981e1051a39Sopenharmony_ci vld1.32 {q20-q21},[$key1],#32 // load key schedule... 2982e1051a39Sopenharmony_ci 2983e1051a39Sopenharmony_ci aesd $dat0,q20 2984e1051a39Sopenharmony_ci aesimc $dat0,$dat0 2985e1051a39Sopenharmony_ci vld1.32 {q8-q9},[$key1],#32 // load key schedule... 2986e1051a39Sopenharmony_ci aesd $dat0,q21 2987e1051a39Sopenharmony_ci aesimc $dat0,$dat0 2988e1051a39Sopenharmony_ci subs $rounds,$rounds,#10 // bias 2989e1051a39Sopenharmony_ci b.eq .Lxts_128_dec 2990e1051a39Sopenharmony_ci.Lxts_dec_round_loop: 2991e1051a39Sopenharmony_ci aesd $dat0,q8 2992e1051a39Sopenharmony_ci aesimc $dat0,$dat0 2993e1051a39Sopenharmony_ci vld1.32 {q8},[$key1],#16 // load key schedule... 2994e1051a39Sopenharmony_ci aesd $dat0,q9 2995e1051a39Sopenharmony_ci aesimc $dat0,$dat0 2996e1051a39Sopenharmony_ci vld1.32 {q9},[$key1],#16 // load key schedule... 2997e1051a39Sopenharmony_ci subs $rounds,$rounds,#2 // bias 2998e1051a39Sopenharmony_ci b.gt .Lxts_dec_round_loop 2999e1051a39Sopenharmony_ci.Lxts_128_dec: 3000e1051a39Sopenharmony_ci vld1.32 {q10-q11},[$key1],#32 // load key schedule... 3001e1051a39Sopenharmony_ci aesd $dat0,q8 3002e1051a39Sopenharmony_ci aesimc $dat0,$dat0 3003e1051a39Sopenharmony_ci aesd $dat0,q9 3004e1051a39Sopenharmony_ci aesimc $dat0,$dat0 3005e1051a39Sopenharmony_ci vld1.32 {q12-q13},[$key1],#32 // load key schedule... 3006e1051a39Sopenharmony_ci aesd $dat0,q10 3007e1051a39Sopenharmony_ci aesimc $dat0,$dat0 3008e1051a39Sopenharmony_ci aesd $dat0,q11 3009e1051a39Sopenharmony_ci aesimc $dat0,$dat0 3010e1051a39Sopenharmony_ci vld1.32 {q14-q15},[$key1],#32 // load key schedule... 3011e1051a39Sopenharmony_ci aesd $dat0,q12 3012e1051a39Sopenharmony_ci aesimc $dat0,$dat0 3013e1051a39Sopenharmony_ci aesd $dat0,q13 3014e1051a39Sopenharmony_ci aesimc $dat0,$dat0 3015e1051a39Sopenharmony_ci vld1.32 {$rndlast},[$key1] 3016e1051a39Sopenharmony_ci aesd $dat0,q14 3017e1051a39Sopenharmony_ci aesimc $dat0,$dat0 3018e1051a39Sopenharmony_ci aesd $dat0,q15 3019e1051a39Sopenharmony_ci veor $dat0,$dat0,$rndlast 3020e1051a39Sopenharmony_ci veor $dat0,$iv0,$dat0 3021e1051a39Sopenharmony_ci vst1.8 {$dat0},[$out] 3022e1051a39Sopenharmony_ci b .Lxts_dec_final_abort 3023e1051a39Sopenharmony_ci.Lxts_dec_big_size: 3024e1051a39Sopenharmony_ci___ 3025e1051a39Sopenharmony_ci$code.=<<___ if ($flavour =~ /64/); 3026e1051a39Sopenharmony_ci stp $constnumx,$tmpinp,[sp,#-64]! 3027e1051a39Sopenharmony_ci stp $tailcnt,$midnumx,[sp,#48] 3028e1051a39Sopenharmony_ci stp $ivd10,$ivd20,[sp,#32] 3029e1051a39Sopenharmony_ci stp $ivd30,$ivd40,[sp,#16] 3030e1051a39Sopenharmony_ci 3031e1051a39Sopenharmony_ci and $tailcnt,$len,#0xf 3032e1051a39Sopenharmony_ci and $len,$len,#-16 3033e1051a39Sopenharmony_ci subs $len,$len,#16 3034e1051a39Sopenharmony_ci mov $step,#16 3035e1051a39Sopenharmony_ci b.lo .Lxts_dec_abort 3036e1051a39Sopenharmony_ci 3037e1051a39Sopenharmony_ci // Encrypt the iv with key2, as the first XEX iv 3038e1051a39Sopenharmony_ci ldr $rounds,[$key2,#240] 3039e1051a39Sopenharmony_ci vld1.32 {$dat},[$key2],#16 3040e1051a39Sopenharmony_ci vld1.8 {$iv0},[$ivp] 3041e1051a39Sopenharmony_ci sub $rounds,$rounds,#2 3042e1051a39Sopenharmony_ci vld1.32 {$dat1},[$key2],#16 3043e1051a39Sopenharmony_ci 3044e1051a39Sopenharmony_ci.Loop_dec_iv_enc: 3045e1051a39Sopenharmony_ci aese $iv0,$dat 3046e1051a39Sopenharmony_ci aesmc $iv0,$iv0 3047e1051a39Sopenharmony_ci vld1.32 {$dat},[$key2],#16 3048e1051a39Sopenharmony_ci subs $rounds,$rounds,#2 3049e1051a39Sopenharmony_ci aese $iv0,$dat1 3050e1051a39Sopenharmony_ci aesmc $iv0,$iv0 3051e1051a39Sopenharmony_ci vld1.32 {$dat1},[$key2],#16 3052e1051a39Sopenharmony_ci b.gt .Loop_dec_iv_enc 3053e1051a39Sopenharmony_ci 3054e1051a39Sopenharmony_ci aese $iv0,$dat 3055e1051a39Sopenharmony_ci aesmc $iv0,$iv0 3056e1051a39Sopenharmony_ci vld1.32 {$dat},[$key2] 3057e1051a39Sopenharmony_ci aese $iv0,$dat1 3058e1051a39Sopenharmony_ci veor $iv0,$iv0,$dat 3059e1051a39Sopenharmony_ci 3060e1051a39Sopenharmony_ci // The iv for second block 3061e1051a39Sopenharmony_ci // $ivl- iv(low), $ivh - iv(high) 3062e1051a39Sopenharmony_ci // the five ivs stored into, $iv0,$iv1,$iv2,$iv3,$iv4 3063e1051a39Sopenharmony_ci fmov $ivl,$ivd00 3064e1051a39Sopenharmony_ci fmov $ivh,$ivd01 3065e1051a39Sopenharmony_ci mov $constnum,#0x87 3066e1051a39Sopenharmony_ci extr $midnumx,$ivh,$ivh,#32 3067e1051a39Sopenharmony_ci extr $ivh,$ivh,$ivl,#63 3068e1051a39Sopenharmony_ci and $tmpmw,$constnum,$midnum,asr #31 3069e1051a39Sopenharmony_ci eor $ivl,$tmpmx,$ivl,lsl #1 3070e1051a39Sopenharmony_ci fmov $ivd10,$ivl 3071e1051a39Sopenharmony_ci fmov $ivd11,$ivh 3072e1051a39Sopenharmony_ci 3073e1051a39Sopenharmony_ci ldr $rounds0,[$key1,#240] // load rounds number 3074e1051a39Sopenharmony_ci 3075e1051a39Sopenharmony_ci // The iv for third block 3076e1051a39Sopenharmony_ci extr $midnumx,$ivh,$ivh,#32 3077e1051a39Sopenharmony_ci extr $ivh,$ivh,$ivl,#63 3078e1051a39Sopenharmony_ci and $tmpmw,$constnum,$midnum,asr #31 3079e1051a39Sopenharmony_ci eor $ivl,$tmpmx,$ivl,lsl #1 3080e1051a39Sopenharmony_ci fmov $ivd20,$ivl 3081e1051a39Sopenharmony_ci fmov $ivd21,$ivh 3082e1051a39Sopenharmony_ci 3083e1051a39Sopenharmony_ci vld1.32 {q8-q9},[$key1] // load key schedule... 3084e1051a39Sopenharmony_ci sub $rounds0,$rounds0,#6 3085e1051a39Sopenharmony_ci add $key_,$key1,$ivp,lsl#4 // pointer to last 7 round keys 3086e1051a39Sopenharmony_ci sub $rounds0,$rounds0,#2 3087e1051a39Sopenharmony_ci vld1.32 {q10-q11},[$key_],#32 // load key schedule... 3088e1051a39Sopenharmony_ci vld1.32 {q12-q13},[$key_],#32 3089e1051a39Sopenharmony_ci vld1.32 {q14-q15},[$key_],#32 3090e1051a39Sopenharmony_ci vld1.32 {$rndlast},[$key_] 3091e1051a39Sopenharmony_ci 3092e1051a39Sopenharmony_ci // The iv for fourth block 3093e1051a39Sopenharmony_ci extr $midnumx,$ivh,$ivh,#32 3094e1051a39Sopenharmony_ci extr $ivh,$ivh,$ivl,#63 3095e1051a39Sopenharmony_ci and $tmpmw,$constnum,$midnum,asr #31 3096e1051a39Sopenharmony_ci eor $ivl,$tmpmx,$ivl,lsl #1 3097e1051a39Sopenharmony_ci fmov $ivd30,$ivl 3098e1051a39Sopenharmony_ci fmov $ivd31,$ivh 3099e1051a39Sopenharmony_ci 3100e1051a39Sopenharmony_ci add $key_,$key1,#32 3101e1051a39Sopenharmony_ci mov $rounds,$rounds0 3102e1051a39Sopenharmony_ci b .Lxts_dec 3103e1051a39Sopenharmony_ci 3104e1051a39Sopenharmony_ci // Decryption 3105e1051a39Sopenharmony_ci.align 5 3106e1051a39Sopenharmony_ci.Lxts_dec: 3107e1051a39Sopenharmony_ci tst $tailcnt,#0xf 3108e1051a39Sopenharmony_ci b.eq .Lxts_dec_begin 3109e1051a39Sopenharmony_ci subs $len,$len,#16 3110e1051a39Sopenharmony_ci csel $step,xzr,$step,eq 3111e1051a39Sopenharmony_ci vld1.8 {$dat},[$inp],#16 3112e1051a39Sopenharmony_ci b.lo .Lxts_done 3113e1051a39Sopenharmony_ci sub $inp,$inp,#16 3114e1051a39Sopenharmony_ci.Lxts_dec_begin: 3115e1051a39Sopenharmony_ci vld1.8 {$dat},[$inp],$step 3116e1051a39Sopenharmony_ci subs $len,$len,#32 // bias 3117e1051a39Sopenharmony_ci add $rounds,$rounds0,#2 3118e1051a39Sopenharmony_ci vorr $in1,$dat,$dat 3119e1051a39Sopenharmony_ci vorr $dat1,$dat,$dat 3120e1051a39Sopenharmony_ci vorr $in3,$dat,$dat 3121e1051a39Sopenharmony_ci vld1.8 {$dat2},[$inp],#16 3122e1051a39Sopenharmony_ci vorr $in2,$dat2,$dat2 3123e1051a39Sopenharmony_ci vorr $in4,$dat2,$dat2 3124e1051a39Sopenharmony_ci b.lo .Lxts_inner_dec_tail 3125e1051a39Sopenharmony_ci veor $dat,$dat,$iv0 // before decryt, xor with iv 3126e1051a39Sopenharmony_ci veor $dat2,$dat2,$iv1 3127e1051a39Sopenharmony_ci 3128e1051a39Sopenharmony_ci vorr $dat1,$dat2,$dat2 3129e1051a39Sopenharmony_ci vld1.8 {$dat2},[$inp],#16 3130e1051a39Sopenharmony_ci vorr $in0,$dat,$dat 3131e1051a39Sopenharmony_ci vorr $in1,$dat1,$dat1 3132e1051a39Sopenharmony_ci veor $in2,$dat2,$iv2 // third block xox with third iv 3133e1051a39Sopenharmony_ci veor $dat2,$dat2,$iv2 3134e1051a39Sopenharmony_ci cmp $len,#32 3135e1051a39Sopenharmony_ci b.lo .Lxts_outer_dec_tail 3136e1051a39Sopenharmony_ci 3137e1051a39Sopenharmony_ci vld1.8 {$dat3},[$inp],#16 3138e1051a39Sopenharmony_ci 3139e1051a39Sopenharmony_ci // The iv for fifth block 3140e1051a39Sopenharmony_ci extr $midnumx,$ivh,$ivh,#32 3141e1051a39Sopenharmony_ci extr $ivh,$ivh,$ivl,#63 3142e1051a39Sopenharmony_ci and $tmpmw,$constnum,$midnum,asr #31 3143e1051a39Sopenharmony_ci eor $ivl,$tmpmx,$ivl,lsl #1 3144e1051a39Sopenharmony_ci fmov $ivd40,$ivl 3145e1051a39Sopenharmony_ci fmov $ivd41,$ivh 3146e1051a39Sopenharmony_ci 3147e1051a39Sopenharmony_ci vld1.8 {$dat4},[$inp],#16 3148e1051a39Sopenharmony_ci veor $dat3,$dat3,$iv3 // the fourth block 3149e1051a39Sopenharmony_ci veor $dat4,$dat4,$iv4 3150e1051a39Sopenharmony_ci sub $len,$len,#32 // bias 3151e1051a39Sopenharmony_ci mov $rounds,$rounds0 3152e1051a39Sopenharmony_ci b .Loop5x_xts_dec 3153e1051a39Sopenharmony_ci 3154e1051a39Sopenharmony_ci.align 4 3155e1051a39Sopenharmony_ci.Loop5x_xts_dec: 3156e1051a39Sopenharmony_ci aesd $dat0,q8 3157e1051a39Sopenharmony_ci aesimc $dat0,$dat0 3158e1051a39Sopenharmony_ci aesd $dat1,q8 3159e1051a39Sopenharmony_ci aesimc $dat1,$dat1 3160e1051a39Sopenharmony_ci aesd $dat2,q8 3161e1051a39Sopenharmony_ci aesimc $dat2,$dat2 3162e1051a39Sopenharmony_ci aesd $dat3,q8 3163e1051a39Sopenharmony_ci aesimc $dat3,$dat3 3164e1051a39Sopenharmony_ci aesd $dat4,q8 3165e1051a39Sopenharmony_ci aesimc $dat4,$dat4 3166e1051a39Sopenharmony_ci vld1.32 {q8},[$key_],#16 // load key schedule... 3167e1051a39Sopenharmony_ci subs $rounds,$rounds,#2 3168e1051a39Sopenharmony_ci aesd $dat0,q9 3169e1051a39Sopenharmony_ci aesimc $dat0,$dat0 3170e1051a39Sopenharmony_ci aesd $dat1,q9 3171e1051a39Sopenharmony_ci aesimc $dat1,$dat1 3172e1051a39Sopenharmony_ci aesd $dat2,q9 3173e1051a39Sopenharmony_ci aesimc $dat2,$dat2 3174e1051a39Sopenharmony_ci aesd $dat3,q9 3175e1051a39Sopenharmony_ci aesimc $dat3,$dat3 3176e1051a39Sopenharmony_ci aesd $dat4,q9 3177e1051a39Sopenharmony_ci aesimc $dat4,$dat4 3178e1051a39Sopenharmony_ci vld1.32 {q9},[$key_],#16 // load key schedule... 3179e1051a39Sopenharmony_ci b.gt .Loop5x_xts_dec 3180e1051a39Sopenharmony_ci 3181e1051a39Sopenharmony_ci aesd $dat0,q8 3182e1051a39Sopenharmony_ci aesimc $dat0,$dat0 3183e1051a39Sopenharmony_ci aesd $dat1,q8 3184e1051a39Sopenharmony_ci aesimc $dat1,$dat1 3185e1051a39Sopenharmony_ci aesd $dat2,q8 3186e1051a39Sopenharmony_ci aesimc $dat2,$dat2 3187e1051a39Sopenharmony_ci aesd $dat3,q8 3188e1051a39Sopenharmony_ci aesimc $dat3,$dat3 3189e1051a39Sopenharmony_ci aesd $dat4,q8 3190e1051a39Sopenharmony_ci aesimc $dat4,$dat4 3191e1051a39Sopenharmony_ci subs $len,$len,#0x50 // because .Lxts_dec_tail4x 3192e1051a39Sopenharmony_ci 3193e1051a39Sopenharmony_ci aesd $dat0,q9 3194e1051a39Sopenharmony_ci aesimc $dat0,$dat 3195e1051a39Sopenharmony_ci aesd $dat1,q9 3196e1051a39Sopenharmony_ci aesimc $dat1,$dat1 3197e1051a39Sopenharmony_ci aesd $dat2,q9 3198e1051a39Sopenharmony_ci aesimc $dat2,$dat2 3199e1051a39Sopenharmony_ci aesd $dat3,q9 3200e1051a39Sopenharmony_ci aesimc $dat3,$dat3 3201e1051a39Sopenharmony_ci aesd $dat4,q9 3202e1051a39Sopenharmony_ci aesimc $dat4,$dat4 3203e1051a39Sopenharmony_ci csel $xoffset,xzr,$len,gt // borrow x6, w6, "gt" is not typo 3204e1051a39Sopenharmony_ci mov $key_,$key1 3205e1051a39Sopenharmony_ci 3206e1051a39Sopenharmony_ci aesd $dat0,q10 3207e1051a39Sopenharmony_ci aesimc $dat0,$dat0 3208e1051a39Sopenharmony_ci aesd $dat1,q10 3209e1051a39Sopenharmony_ci aesimc $dat1,$dat1 3210e1051a39Sopenharmony_ci aesd $dat2,q10 3211e1051a39Sopenharmony_ci aesimc $dat2,$dat2 3212e1051a39Sopenharmony_ci aesd $dat3,q10 3213e1051a39Sopenharmony_ci aesimc $dat3,$dat3 3214e1051a39Sopenharmony_ci aesd $dat4,q10 3215e1051a39Sopenharmony_ci aesimc $dat4,$dat4 3216e1051a39Sopenharmony_ci add $inp,$inp,$xoffset // x0 is adjusted in such way that 3217e1051a39Sopenharmony_ci // at exit from the loop v1.16b-v26.16b 3218e1051a39Sopenharmony_ci // are loaded with last "words" 3219e1051a39Sopenharmony_ci add $xoffset,$len,#0x60 // because .Lxts_dec_tail4x 3220e1051a39Sopenharmony_ci 3221e1051a39Sopenharmony_ci aesd $dat0,q11 3222e1051a39Sopenharmony_ci aesimc $dat0,$dat0 3223e1051a39Sopenharmony_ci aesd $dat1,q11 3224e1051a39Sopenharmony_ci aesimc $dat1,$dat1 3225e1051a39Sopenharmony_ci aesd $dat2,q11 3226e1051a39Sopenharmony_ci aesimc $dat2,$dat2 3227e1051a39Sopenharmony_ci aesd $dat3,q11 3228e1051a39Sopenharmony_ci aesimc $dat3,$dat3 3229e1051a39Sopenharmony_ci aesd $dat4,q11 3230e1051a39Sopenharmony_ci aesimc $dat4,$dat4 3231e1051a39Sopenharmony_ci 3232e1051a39Sopenharmony_ci aesd $dat0,q12 3233e1051a39Sopenharmony_ci aesimc $dat0,$dat0 3234e1051a39Sopenharmony_ci aesd $dat1,q12 3235e1051a39Sopenharmony_ci aesimc $dat1,$dat1 3236e1051a39Sopenharmony_ci aesd $dat2,q12 3237e1051a39Sopenharmony_ci aesimc $dat2,$dat2 3238e1051a39Sopenharmony_ci aesd $dat3,q12 3239e1051a39Sopenharmony_ci aesimc $dat3,$dat3 3240e1051a39Sopenharmony_ci aesd $dat4,q12 3241e1051a39Sopenharmony_ci aesimc $dat4,$dat4 3242e1051a39Sopenharmony_ci 3243e1051a39Sopenharmony_ci aesd $dat0,q13 3244e1051a39Sopenharmony_ci aesimc $dat0,$dat0 3245e1051a39Sopenharmony_ci aesd $dat1,q13 3246e1051a39Sopenharmony_ci aesimc $dat1,$dat1 3247e1051a39Sopenharmony_ci aesd $dat2,q13 3248e1051a39Sopenharmony_ci aesimc $dat2,$dat2 3249e1051a39Sopenharmony_ci aesd $dat3,q13 3250e1051a39Sopenharmony_ci aesimc $dat3,$dat3 3251e1051a39Sopenharmony_ci aesd $dat4,q13 3252e1051a39Sopenharmony_ci aesimc $dat4,$dat4 3253e1051a39Sopenharmony_ci 3254e1051a39Sopenharmony_ci aesd $dat0,q14 3255e1051a39Sopenharmony_ci aesimc $dat0,$dat0 3256e1051a39Sopenharmony_ci aesd $dat1,q14 3257e1051a39Sopenharmony_ci aesimc $dat1,$dat1 3258e1051a39Sopenharmony_ci aesd $dat2,q14 3259e1051a39Sopenharmony_ci aesimc $dat2,$dat2 3260e1051a39Sopenharmony_ci aesd $dat3,q14 3261e1051a39Sopenharmony_ci aesimc $dat3,$dat3 3262e1051a39Sopenharmony_ci aesd $dat4,q14 3263e1051a39Sopenharmony_ci aesimc $dat4,$dat4 3264e1051a39Sopenharmony_ci 3265e1051a39Sopenharmony_ci veor $tmp0,$rndlast,$iv0 3266e1051a39Sopenharmony_ci aesd $dat0,q15 3267e1051a39Sopenharmony_ci // The iv for first block of next iteration. 3268e1051a39Sopenharmony_ci extr $midnumx,$ivh,$ivh,#32 3269e1051a39Sopenharmony_ci extr $ivh,$ivh,$ivl,#63 3270e1051a39Sopenharmony_ci and $tmpmw,$constnum,$midnum,asr #31 3271e1051a39Sopenharmony_ci eor $ivl,$tmpmx,$ivl,lsl #1 3272e1051a39Sopenharmony_ci fmov $ivd00,$ivl 3273e1051a39Sopenharmony_ci fmov $ivd01,$ivh 3274e1051a39Sopenharmony_ci veor $tmp1,$rndlast,$iv1 3275e1051a39Sopenharmony_ci vld1.8 {$in0},[$inp],#16 3276e1051a39Sopenharmony_ci aesd $dat1,q15 3277e1051a39Sopenharmony_ci // The iv for second block 3278e1051a39Sopenharmony_ci extr $midnumx,$ivh,$ivh,#32 3279e1051a39Sopenharmony_ci extr $ivh,$ivh,$ivl,#63 3280e1051a39Sopenharmony_ci and $tmpmw,$constnum,$midnum,asr #31 3281e1051a39Sopenharmony_ci eor $ivl,$tmpmx,$ivl,lsl #1 3282e1051a39Sopenharmony_ci fmov $ivd10,$ivl 3283e1051a39Sopenharmony_ci fmov $ivd11,$ivh 3284e1051a39Sopenharmony_ci veor $tmp2,$rndlast,$iv2 3285e1051a39Sopenharmony_ci vld1.8 {$in1},[$inp],#16 3286e1051a39Sopenharmony_ci aesd $dat2,q15 3287e1051a39Sopenharmony_ci // The iv for third block 3288e1051a39Sopenharmony_ci extr $midnumx,$ivh,$ivh,#32 3289e1051a39Sopenharmony_ci extr $ivh,$ivh,$ivl,#63 3290e1051a39Sopenharmony_ci and $tmpmw,$constnum,$midnum,asr #31 3291e1051a39Sopenharmony_ci eor $ivl,$tmpmx,$ivl,lsl #1 3292e1051a39Sopenharmony_ci fmov $ivd20,$ivl 3293e1051a39Sopenharmony_ci fmov $ivd21,$ivh 3294e1051a39Sopenharmony_ci veor $tmp3,$rndlast,$iv3 3295e1051a39Sopenharmony_ci vld1.8 {$in2},[$inp],#16 3296e1051a39Sopenharmony_ci aesd $dat3,q15 3297e1051a39Sopenharmony_ci // The iv for fourth block 3298e1051a39Sopenharmony_ci extr $midnumx,$ivh,$ivh,#32 3299e1051a39Sopenharmony_ci extr $ivh,$ivh,$ivl,#63 3300e1051a39Sopenharmony_ci and $tmpmw,$constnum,$midnum,asr #31 3301e1051a39Sopenharmony_ci eor $ivl,$tmpmx,$ivl,lsl #1 3302e1051a39Sopenharmony_ci fmov $ivd30,$ivl 3303e1051a39Sopenharmony_ci fmov $ivd31,$ivh 3304e1051a39Sopenharmony_ci veor $tmp4,$rndlast,$iv4 3305e1051a39Sopenharmony_ci vld1.8 {$in3},[$inp],#16 3306e1051a39Sopenharmony_ci aesd $dat4,q15 3307e1051a39Sopenharmony_ci 3308e1051a39Sopenharmony_ci // The iv for fifth block 3309e1051a39Sopenharmony_ci extr $midnumx,$ivh,$ivh,#32 3310e1051a39Sopenharmony_ci extr $ivh,$ivh,$ivl,#63 3311e1051a39Sopenharmony_ci and $tmpmw,$constnum,$midnum,asr #31 3312e1051a39Sopenharmony_ci eor $ivl,$tmpmx,$ivl,lsl #1 3313e1051a39Sopenharmony_ci fmov $ivd40,$ivl 3314e1051a39Sopenharmony_ci fmov $ivd41,$ivh 3315e1051a39Sopenharmony_ci 3316e1051a39Sopenharmony_ci vld1.8 {$in4},[$inp],#16 3317e1051a39Sopenharmony_ci cbz $xoffset,.Lxts_dec_tail4x 3318e1051a39Sopenharmony_ci vld1.32 {q8},[$key_],#16 // re-pre-load rndkey[0] 3319e1051a39Sopenharmony_ci veor $tmp0,$tmp0,$dat0 3320e1051a39Sopenharmony_ci veor $dat0,$in0,$iv0 3321e1051a39Sopenharmony_ci veor $tmp1,$tmp1,$dat1 3322e1051a39Sopenharmony_ci veor $dat1,$in1,$iv1 3323e1051a39Sopenharmony_ci veor $tmp2,$tmp2,$dat2 3324e1051a39Sopenharmony_ci veor $dat2,$in2,$iv2 3325e1051a39Sopenharmony_ci veor $tmp3,$tmp3,$dat3 3326e1051a39Sopenharmony_ci veor $dat3,$in3,$iv3 3327e1051a39Sopenharmony_ci veor $tmp4,$tmp4,$dat4 3328e1051a39Sopenharmony_ci vst1.8 {$tmp0},[$out],#16 3329e1051a39Sopenharmony_ci veor $dat4,$in4,$iv4 3330e1051a39Sopenharmony_ci vst1.8 {$tmp1},[$out],#16 3331e1051a39Sopenharmony_ci mov $rounds,$rounds0 3332e1051a39Sopenharmony_ci vst1.8 {$tmp2},[$out],#16 3333e1051a39Sopenharmony_ci vld1.32 {q9},[$key_],#16 // re-pre-load rndkey[1] 3334e1051a39Sopenharmony_ci vst1.8 {$tmp3},[$out],#16 3335e1051a39Sopenharmony_ci vst1.8 {$tmp4},[$out],#16 3336e1051a39Sopenharmony_ci b.hs .Loop5x_xts_dec 3337e1051a39Sopenharmony_ci 3338e1051a39Sopenharmony_ci cmn $len,#0x10 3339e1051a39Sopenharmony_ci b.ne .Loop5x_dec_after 3340e1051a39Sopenharmony_ci // If x2($len) equal to -0x10, the left blocks is 4. 3341e1051a39Sopenharmony_ci // After specially processing, utilize the five blocks processing again. 3342e1051a39Sopenharmony_ci // It will use the following IVs: $iv0,$iv0,$iv1,$iv2,$iv3. 3343e1051a39Sopenharmony_ci vorr $iv4,$iv3,$iv3 3344e1051a39Sopenharmony_ci vorr $iv3,$iv2,$iv2 3345e1051a39Sopenharmony_ci vorr $iv2,$iv1,$iv1 3346e1051a39Sopenharmony_ci vorr $iv1,$iv0,$iv0 3347e1051a39Sopenharmony_ci fmov $ivl,$ivd40 3348e1051a39Sopenharmony_ci fmov $ivh,$ivd41 3349e1051a39Sopenharmony_ci veor $dat0,$iv0,$in0 3350e1051a39Sopenharmony_ci veor $dat1,$iv1,$in1 3351e1051a39Sopenharmony_ci veor $dat2,$in2,$iv2 3352e1051a39Sopenharmony_ci veor $dat3,$in3,$iv3 3353e1051a39Sopenharmony_ci veor $dat4,$in4,$iv4 3354e1051a39Sopenharmony_ci b.eq .Loop5x_xts_dec 3355e1051a39Sopenharmony_ci 3356e1051a39Sopenharmony_ci.Loop5x_dec_after: 3357e1051a39Sopenharmony_ci add $len,$len,#0x50 3358e1051a39Sopenharmony_ci cbz $len,.Lxts_done 3359e1051a39Sopenharmony_ci 3360e1051a39Sopenharmony_ci add $rounds,$rounds0,#2 3361e1051a39Sopenharmony_ci subs $len,$len,#0x30 3362e1051a39Sopenharmony_ci b.lo .Lxts_inner_dec_tail 3363e1051a39Sopenharmony_ci 3364e1051a39Sopenharmony_ci veor $dat0,$iv0,$in2 3365e1051a39Sopenharmony_ci veor $dat1,$iv1,$in3 3366e1051a39Sopenharmony_ci veor $dat2,$in4,$iv2 3367e1051a39Sopenharmony_ci b .Lxts_outer_dec_tail 3368e1051a39Sopenharmony_ci 3369e1051a39Sopenharmony_ci.align 4 3370e1051a39Sopenharmony_ci.Lxts_dec_tail4x: 3371e1051a39Sopenharmony_ci add $inp,$inp,#16 3372e1051a39Sopenharmony_ci tst $tailcnt,#0xf 3373e1051a39Sopenharmony_ci veor $tmp1,$dat1,$tmp0 3374e1051a39Sopenharmony_ci vst1.8 {$tmp1},[$out],#16 3375e1051a39Sopenharmony_ci veor $tmp2,$dat2,$tmp2 3376e1051a39Sopenharmony_ci vst1.8 {$tmp2},[$out],#16 3377e1051a39Sopenharmony_ci veor $tmp3,$dat3,$tmp3 3378e1051a39Sopenharmony_ci veor $tmp4,$dat4,$tmp4 3379e1051a39Sopenharmony_ci vst1.8 {$tmp3-$tmp4},[$out],#32 3380e1051a39Sopenharmony_ci 3381e1051a39Sopenharmony_ci b.eq .Lxts_dec_abort 3382e1051a39Sopenharmony_ci vld1.8 {$dat0},[$inp],#16 3383e1051a39Sopenharmony_ci b .Lxts_done 3384e1051a39Sopenharmony_ci.align 4 3385e1051a39Sopenharmony_ci.Lxts_outer_dec_tail: 3386e1051a39Sopenharmony_ci aesd $dat0,q8 3387e1051a39Sopenharmony_ci aesimc $dat0,$dat0 3388e1051a39Sopenharmony_ci aesd $dat1,q8 3389e1051a39Sopenharmony_ci aesimc $dat1,$dat1 3390e1051a39Sopenharmony_ci aesd $dat2,q8 3391e1051a39Sopenharmony_ci aesimc $dat2,$dat2 3392e1051a39Sopenharmony_ci vld1.32 {q8},[$key_],#16 3393e1051a39Sopenharmony_ci subs $rounds,$rounds,#2 3394e1051a39Sopenharmony_ci aesd $dat0,q9 3395e1051a39Sopenharmony_ci aesimc $dat0,$dat0 3396e1051a39Sopenharmony_ci aesd $dat1,q9 3397e1051a39Sopenharmony_ci aesimc $dat1,$dat1 3398e1051a39Sopenharmony_ci aesd $dat2,q9 3399e1051a39Sopenharmony_ci aesimc $dat2,$dat2 3400e1051a39Sopenharmony_ci vld1.32 {q9},[$key_],#16 3401e1051a39Sopenharmony_ci b.gt .Lxts_outer_dec_tail 3402e1051a39Sopenharmony_ci 3403e1051a39Sopenharmony_ci aesd $dat0,q8 3404e1051a39Sopenharmony_ci aesimc $dat0,$dat0 3405e1051a39Sopenharmony_ci aesd $dat1,q8 3406e1051a39Sopenharmony_ci aesimc $dat1,$dat1 3407e1051a39Sopenharmony_ci aesd $dat2,q8 3408e1051a39Sopenharmony_ci aesimc $dat2,$dat2 3409e1051a39Sopenharmony_ci veor $tmp0,$iv0,$rndlast 3410e1051a39Sopenharmony_ci subs $len,$len,#0x30 3411e1051a39Sopenharmony_ci // The iv for first block 3412e1051a39Sopenharmony_ci fmov $ivl,$ivd20 3413e1051a39Sopenharmony_ci fmov $ivh,$ivd21 3414e1051a39Sopenharmony_ci mov $constnum,#0x87 3415e1051a39Sopenharmony_ci extr $midnumx,$ivh,$ivh,#32 3416e1051a39Sopenharmony_ci extr $ivh,$ivh,$ivl,#63 3417e1051a39Sopenharmony_ci and $tmpmw,$constnum,$midnum,asr #31 3418e1051a39Sopenharmony_ci eor $ivl,$tmpmx,$ivl,lsl #1 3419e1051a39Sopenharmony_ci fmov $ivd00,$ivl 3420e1051a39Sopenharmony_ci fmov $ivd01,$ivh 3421e1051a39Sopenharmony_ci veor $tmp1,$iv1,$rndlast 3422e1051a39Sopenharmony_ci csel $xoffset,$len,$xoffset,lo // x6, w6, is zero at this point 3423e1051a39Sopenharmony_ci aesd $dat0,q9 3424e1051a39Sopenharmony_ci aesimc $dat0,$dat0 3425e1051a39Sopenharmony_ci aesd $dat1,q9 3426e1051a39Sopenharmony_ci aesimc $dat1,$dat1 3427e1051a39Sopenharmony_ci aesd $dat2,q9 3428e1051a39Sopenharmony_ci aesimc $dat2,$dat2 3429e1051a39Sopenharmony_ci veor $tmp2,$iv2,$rndlast 3430e1051a39Sopenharmony_ci // The iv for second block 3431e1051a39Sopenharmony_ci extr $midnumx,$ivh,$ivh,#32 3432e1051a39Sopenharmony_ci extr $ivh,$ivh,$ivl,#63 3433e1051a39Sopenharmony_ci and $tmpmw,$constnum,$midnum,asr #31 3434e1051a39Sopenharmony_ci eor $ivl,$tmpmx,$ivl,lsl #1 3435e1051a39Sopenharmony_ci fmov $ivd10,$ivl 3436e1051a39Sopenharmony_ci fmov $ivd11,$ivh 3437e1051a39Sopenharmony_ci 3438e1051a39Sopenharmony_ci add $xoffset,$xoffset,#0x20 3439e1051a39Sopenharmony_ci add $inp,$inp,$xoffset // $inp is adjusted to the last data 3440e1051a39Sopenharmony_ci 3441e1051a39Sopenharmony_ci mov $key_,$key1 3442e1051a39Sopenharmony_ci 3443e1051a39Sopenharmony_ci // The iv for third block 3444e1051a39Sopenharmony_ci extr $midnumx,$ivh,$ivh,#32 3445e1051a39Sopenharmony_ci extr $ivh,$ivh,$ivl,#63 3446e1051a39Sopenharmony_ci and $tmpmw,$constnum,$midnum,asr #31 3447e1051a39Sopenharmony_ci eor $ivl,$tmpmx,$ivl,lsl #1 3448e1051a39Sopenharmony_ci fmov $ivd20,$ivl 3449e1051a39Sopenharmony_ci fmov $ivd21,$ivh 3450e1051a39Sopenharmony_ci 3451e1051a39Sopenharmony_ci aesd $dat0,q12 3452e1051a39Sopenharmony_ci aesimc $dat0,$dat0 3453e1051a39Sopenharmony_ci aesd $dat1,q12 3454e1051a39Sopenharmony_ci aesimc $dat1,$dat1 3455e1051a39Sopenharmony_ci aesd $dat2,q12 3456e1051a39Sopenharmony_ci aesimc $dat2,$dat2 3457e1051a39Sopenharmony_ci aesd $dat0,q13 3458e1051a39Sopenharmony_ci aesimc $dat0,$dat0 3459e1051a39Sopenharmony_ci aesd $dat1,q13 3460e1051a39Sopenharmony_ci aesimc $dat1,$dat1 3461e1051a39Sopenharmony_ci aesd $dat2,q13 3462e1051a39Sopenharmony_ci aesimc $dat2,$dat2 3463e1051a39Sopenharmony_ci aesd $dat0,q14 3464e1051a39Sopenharmony_ci aesimc $dat0,$dat0 3465e1051a39Sopenharmony_ci aesd $dat1,q14 3466e1051a39Sopenharmony_ci aesimc $dat1,$dat1 3467e1051a39Sopenharmony_ci aesd $dat2,q14 3468e1051a39Sopenharmony_ci aesimc $dat2,$dat2 3469e1051a39Sopenharmony_ci vld1.8 {$in2},[$inp],#16 3470e1051a39Sopenharmony_ci aesd $dat0,q15 3471e1051a39Sopenharmony_ci aesd $dat1,q15 3472e1051a39Sopenharmony_ci aesd $dat2,q15 3473e1051a39Sopenharmony_ci vld1.32 {q8},[$key_],#16 // re-pre-load rndkey[0] 3474e1051a39Sopenharmony_ci add $rounds,$rounds0,#2 3475e1051a39Sopenharmony_ci veor $tmp0,$tmp0,$dat0 3476e1051a39Sopenharmony_ci veor $tmp1,$tmp1,$dat1 3477e1051a39Sopenharmony_ci veor $dat2,$dat2,$tmp2 3478e1051a39Sopenharmony_ci vld1.32 {q9},[$key_],#16 // re-pre-load rndkey[1] 3479e1051a39Sopenharmony_ci vst1.8 {$tmp0},[$out],#16 3480e1051a39Sopenharmony_ci vst1.8 {$tmp1},[$out],#16 3481e1051a39Sopenharmony_ci vst1.8 {$dat2},[$out],#16 3482e1051a39Sopenharmony_ci 3483e1051a39Sopenharmony_ci cmn $len,#0x30 3484e1051a39Sopenharmony_ci add $len,$len,#0x30 3485e1051a39Sopenharmony_ci b.eq .Lxts_done 3486e1051a39Sopenharmony_ci sub $len,$len,#0x30 3487e1051a39Sopenharmony_ci vorr $in3,$in1,$in1 3488e1051a39Sopenharmony_ci vorr $in4,$in2,$in2 3489e1051a39Sopenharmony_ci nop 3490e1051a39Sopenharmony_ci 3491e1051a39Sopenharmony_ci.Lxts_inner_dec_tail: 3492e1051a39Sopenharmony_ci // $len == -0x10 means two blocks left. 3493e1051a39Sopenharmony_ci cmn $len,#0x10 3494e1051a39Sopenharmony_ci veor $dat1,$in3,$iv0 3495e1051a39Sopenharmony_ci veor $dat2,$in4,$iv1 3496e1051a39Sopenharmony_ci b.eq .Lxts_dec_tail_loop 3497e1051a39Sopenharmony_ci veor $dat2,$in4,$iv0 3498e1051a39Sopenharmony_ci.Lxts_dec_tail_loop: 3499e1051a39Sopenharmony_ci aesd $dat1,q8 3500e1051a39Sopenharmony_ci aesimc $dat1,$dat1 3501e1051a39Sopenharmony_ci aesd $dat2,q8 3502e1051a39Sopenharmony_ci aesimc $dat2,$dat2 3503e1051a39Sopenharmony_ci vld1.32 {q8},[$key_],#16 3504e1051a39Sopenharmony_ci subs $rounds,$rounds,#2 3505e1051a39Sopenharmony_ci aesd $dat1,q9 3506e1051a39Sopenharmony_ci aesimc $dat1,$dat1 3507e1051a39Sopenharmony_ci aesd $dat2,q9 3508e1051a39Sopenharmony_ci aesimc $dat2,$dat2 3509e1051a39Sopenharmony_ci vld1.32 {q9},[$key_],#16 3510e1051a39Sopenharmony_ci b.gt .Lxts_dec_tail_loop 3511e1051a39Sopenharmony_ci 3512e1051a39Sopenharmony_ci aesd $dat1,q8 3513e1051a39Sopenharmony_ci aesimc $dat1,$dat1 3514e1051a39Sopenharmony_ci aesd $dat2,q8 3515e1051a39Sopenharmony_ci aesimc $dat2,$dat2 3516e1051a39Sopenharmony_ci aesd $dat1,q9 3517e1051a39Sopenharmony_ci aesimc $dat1,$dat1 3518e1051a39Sopenharmony_ci aesd $dat2,q9 3519e1051a39Sopenharmony_ci aesimc $dat2,$dat2 3520e1051a39Sopenharmony_ci aesd $dat1,q12 3521e1051a39Sopenharmony_ci aesimc $dat1,$dat1 3522e1051a39Sopenharmony_ci aesd $dat2,q12 3523e1051a39Sopenharmony_ci aesimc $dat2,$dat2 3524e1051a39Sopenharmony_ci cmn $len,#0x20 3525e1051a39Sopenharmony_ci aesd $dat1,q13 3526e1051a39Sopenharmony_ci aesimc $dat1,$dat1 3527e1051a39Sopenharmony_ci aesd $dat2,q13 3528e1051a39Sopenharmony_ci aesimc $dat2,$dat2 3529e1051a39Sopenharmony_ci veor $tmp1,$iv0,$rndlast 3530e1051a39Sopenharmony_ci aesd $dat1,q14 3531e1051a39Sopenharmony_ci aesimc $dat1,$dat1 3532e1051a39Sopenharmony_ci aesd $dat2,q14 3533e1051a39Sopenharmony_ci aesimc $dat2,$dat2 3534e1051a39Sopenharmony_ci veor $tmp2,$iv1,$rndlast 3535e1051a39Sopenharmony_ci aesd $dat1,q15 3536e1051a39Sopenharmony_ci aesd $dat2,q15 3537e1051a39Sopenharmony_ci b.eq .Lxts_dec_one 3538e1051a39Sopenharmony_ci veor $tmp1,$tmp1,$dat1 3539e1051a39Sopenharmony_ci veor $tmp2,$tmp2,$dat2 3540e1051a39Sopenharmony_ci vorr $iv0,$iv2,$iv2 3541e1051a39Sopenharmony_ci vorr $iv1,$iv3,$iv3 3542e1051a39Sopenharmony_ci vst1.8 {$tmp1},[$out],#16 3543e1051a39Sopenharmony_ci vst1.8 {$tmp2},[$out],#16 3544e1051a39Sopenharmony_ci add $len,$len,#16 3545e1051a39Sopenharmony_ci b .Lxts_done 3546e1051a39Sopenharmony_ci 3547e1051a39Sopenharmony_ci.Lxts_dec_one: 3548e1051a39Sopenharmony_ci veor $tmp1,$tmp1,$dat2 3549e1051a39Sopenharmony_ci vorr $iv0,$iv1,$iv1 3550e1051a39Sopenharmony_ci vorr $iv1,$iv2,$iv2 3551e1051a39Sopenharmony_ci vst1.8 {$tmp1},[$out],#16 3552e1051a39Sopenharmony_ci add $len,$len,#32 3553e1051a39Sopenharmony_ci 3554e1051a39Sopenharmony_ci.Lxts_done: 3555e1051a39Sopenharmony_ci tst $tailcnt,#0xf 3556e1051a39Sopenharmony_ci b.eq .Lxts_dec_abort 3557e1051a39Sopenharmony_ci // Processing the last two blocks with cipher stealing. 3558e1051a39Sopenharmony_ci mov x7,x3 3559e1051a39Sopenharmony_ci cbnz x2,.Lxts_dec_1st_done 3560e1051a39Sopenharmony_ci vld1.8 {$dat0},[$inp],#16 3561e1051a39Sopenharmony_ci 3562e1051a39Sopenharmony_ci // Decrypt the last secod block to get the last plain text block 3563e1051a39Sopenharmony_ci.Lxts_dec_1st_done: 3564e1051a39Sopenharmony_ci eor $tmpin,$dat0,$iv1 3565e1051a39Sopenharmony_ci ldr $rounds,[$key1,#240] 3566e1051a39Sopenharmony_ci vld1.32 {$dat0},[$key1],#16 3567e1051a39Sopenharmony_ci sub $rounds,$rounds,#2 3568e1051a39Sopenharmony_ci vld1.32 {$dat1},[$key1],#16 3569e1051a39Sopenharmony_ci.Loop_final_2nd_dec: 3570e1051a39Sopenharmony_ci aesd $tmpin,$dat0 3571e1051a39Sopenharmony_ci aesimc $tmpin,$tmpin 3572e1051a39Sopenharmony_ci vld1.32 {$dat0},[$key1],#16 // load key schedule... 3573e1051a39Sopenharmony_ci subs $rounds,$rounds,#2 3574e1051a39Sopenharmony_ci aesd $tmpin,$dat1 3575e1051a39Sopenharmony_ci aesimc $tmpin,$tmpin 3576e1051a39Sopenharmony_ci vld1.32 {$dat1},[$key1],#16 // load key schedule... 3577e1051a39Sopenharmony_ci b.gt .Loop_final_2nd_dec 3578e1051a39Sopenharmony_ci 3579e1051a39Sopenharmony_ci aesd $tmpin,$dat0 3580e1051a39Sopenharmony_ci aesimc $tmpin,$tmpin 3581e1051a39Sopenharmony_ci vld1.32 {$dat0},[$key1] 3582e1051a39Sopenharmony_ci aesd $tmpin,$dat1 3583e1051a39Sopenharmony_ci veor $tmpin,$tmpin,$dat0 3584e1051a39Sopenharmony_ci veor $tmpin,$tmpin,$iv1 3585e1051a39Sopenharmony_ci vst1.8 {$tmpin},[$out] 3586e1051a39Sopenharmony_ci 3587e1051a39Sopenharmony_ci mov $tmpinp,$inp 3588e1051a39Sopenharmony_ci add $tmpoutp,$out,#16 3589e1051a39Sopenharmony_ci 3590e1051a39Sopenharmony_ci // Composite the tailcnt "16 byte not aligned block" into the last second plain blocks 3591e1051a39Sopenharmony_ci // to get the last encrypted block. 3592e1051a39Sopenharmony_ci.composite_dec_loop: 3593e1051a39Sopenharmony_ci subs $tailcnt,$tailcnt,#1 3594e1051a39Sopenharmony_ci ldrb $l2outp,[$out,$tailcnt] 3595e1051a39Sopenharmony_ci ldrb $loutp,[$tmpinp,$tailcnt] 3596e1051a39Sopenharmony_ci strb $l2outp,[$tmpoutp,$tailcnt] 3597e1051a39Sopenharmony_ci strb $loutp,[$out,$tailcnt] 3598e1051a39Sopenharmony_ci b.gt .composite_dec_loop 3599e1051a39Sopenharmony_ci.Lxts_dec_load_done: 3600e1051a39Sopenharmony_ci vld1.8 {$tmpin},[$out] 3601e1051a39Sopenharmony_ci veor $tmpin,$tmpin,$iv0 3602e1051a39Sopenharmony_ci 3603e1051a39Sopenharmony_ci // Decrypt the composite block to get the last second plain text block 3604e1051a39Sopenharmony_ci ldr $rounds,[$key_,#240] 3605e1051a39Sopenharmony_ci vld1.32 {$dat},[$key_],#16 3606e1051a39Sopenharmony_ci sub $rounds,$rounds,#2 3607e1051a39Sopenharmony_ci vld1.32 {$dat1},[$key_],#16 3608e1051a39Sopenharmony_ci.Loop_final_dec: 3609e1051a39Sopenharmony_ci aesd $tmpin,$dat0 3610e1051a39Sopenharmony_ci aesimc $tmpin,$tmpin 3611e1051a39Sopenharmony_ci vld1.32 {$dat0},[$key_],#16 // load key schedule... 3612e1051a39Sopenharmony_ci subs $rounds,$rounds,#2 3613e1051a39Sopenharmony_ci aesd $tmpin,$dat1 3614e1051a39Sopenharmony_ci aesimc $tmpin,$tmpin 3615e1051a39Sopenharmony_ci vld1.32 {$dat1},[$key_],#16 // load key schedule... 3616e1051a39Sopenharmony_ci b.gt .Loop_final_dec 3617e1051a39Sopenharmony_ci 3618e1051a39Sopenharmony_ci aesd $tmpin,$dat0 3619e1051a39Sopenharmony_ci aesimc $tmpin,$tmpin 3620e1051a39Sopenharmony_ci vld1.32 {$dat0},[$key_] 3621e1051a39Sopenharmony_ci aesd $tmpin,$dat1 3622e1051a39Sopenharmony_ci veor $tmpin,$tmpin,$dat0 3623e1051a39Sopenharmony_ci veor $tmpin,$tmpin,$iv0 3624e1051a39Sopenharmony_ci vst1.8 {$tmpin},[$out] 3625e1051a39Sopenharmony_ci 3626e1051a39Sopenharmony_ci.Lxts_dec_abort: 3627e1051a39Sopenharmony_ci ldp $tailcnt,$midnumx,[sp,#48] 3628e1051a39Sopenharmony_ci ldp $ivd10,$ivd20,[sp,#32] 3629e1051a39Sopenharmony_ci ldp $ivd30,$ivd40,[sp,#16] 3630e1051a39Sopenharmony_ci ldp $constnumx,$tmpinp,[sp],#64 3631e1051a39Sopenharmony_ci 3632e1051a39Sopenharmony_ci.Lxts_dec_final_abort: 3633e1051a39Sopenharmony_ci ret 3634e1051a39Sopenharmony_ci.size ${prefix}_xts_decrypt,.-${prefix}_xts_decrypt 3635e1051a39Sopenharmony_ci___ 3636e1051a39Sopenharmony_ci} 3637e1051a39Sopenharmony_ci}}} 3638e1051a39Sopenharmony_ci$code.=<<___; 3639e1051a39Sopenharmony_ci#endif 3640e1051a39Sopenharmony_ci___ 3641e1051a39Sopenharmony_ci######################################## 3642e1051a39Sopenharmony_ciif ($flavour =~ /64/) { ######## 64-bit code 3643e1051a39Sopenharmony_ci my %opcode = ( 3644e1051a39Sopenharmony_ci "aesd" => 0x4e285800, "aese" => 0x4e284800, 3645e1051a39Sopenharmony_ci "aesimc"=> 0x4e287800, "aesmc" => 0x4e286800 ); 3646e1051a39Sopenharmony_ci 3647e1051a39Sopenharmony_ci local *unaes = sub { 3648e1051a39Sopenharmony_ci my ($mnemonic,$arg)=@_; 3649e1051a39Sopenharmony_ci 3650e1051a39Sopenharmony_ci $arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)/o && 3651e1051a39Sopenharmony_ci sprintf ".inst\t0x%08x\t//%s %s", 3652e1051a39Sopenharmony_ci $opcode{$mnemonic}|$1|($2<<5), 3653e1051a39Sopenharmony_ci $mnemonic,$arg; 3654e1051a39Sopenharmony_ci }; 3655e1051a39Sopenharmony_ci 3656e1051a39Sopenharmony_ci foreach(split("\n",$code)) { 3657e1051a39Sopenharmony_ci s/\`([^\`]*)\`/eval($1)/geo; 3658e1051a39Sopenharmony_ci 3659e1051a39Sopenharmony_ci s/\bq([0-9]+)\b/"v".($1<8?$1:$1+8).".16b"/geo; # old->new registers 3660e1051a39Sopenharmony_ci s/@\s/\/\//o; # old->new style commentary 3661e1051a39Sopenharmony_ci 3662e1051a39Sopenharmony_ci #s/[v]?(aes\w+)\s+([qv].*)/unaes($1,$2)/geo or 3663e1051a39Sopenharmony_ci s/cclr\s+([wx])([^,]+),\s*([a-z]+)/csel $1$2,$1zr,$1$2,$3/o or 3664e1051a39Sopenharmony_ci s/mov\.([a-z]+)\s+([wx][0-9]+),\s*([wx][0-9]+)/csel $2,$3,$2,$1/o or 3665e1051a39Sopenharmony_ci s/vmov\.i8/movi/o or # fix up legacy mnemonics 3666e1051a39Sopenharmony_ci s/vext\.8/ext/o or 3667e1051a39Sopenharmony_ci s/vrev32\.8/rev32/o or 3668e1051a39Sopenharmony_ci s/vtst\.8/cmtst/o or 3669e1051a39Sopenharmony_ci s/vshr/ushr/o or 3670e1051a39Sopenharmony_ci s/^(\s+)v/$1/o or # strip off v prefix 3671e1051a39Sopenharmony_ci s/\bbx\s+lr\b/ret/o; 3672e1051a39Sopenharmony_ci 3673e1051a39Sopenharmony_ci # fix up remaining legacy suffixes 3674e1051a39Sopenharmony_ci s/\.[ui]?8//o; 3675e1051a39Sopenharmony_ci m/\],#8/o and s/\.16b/\.8b/go; 3676e1051a39Sopenharmony_ci s/\.[ui]?32//o and s/\.16b/\.4s/go; 3677e1051a39Sopenharmony_ci s/\.[ui]?64//o and s/\.16b/\.2d/go; 3678e1051a39Sopenharmony_ci s/\.[42]([sd])\[([0-3])\]/\.$1\[$2\]/o; 3679e1051a39Sopenharmony_ci 3680e1051a39Sopenharmony_ci print $_,"\n"; 3681e1051a39Sopenharmony_ci } 3682e1051a39Sopenharmony_ci} else { ######## 32-bit code 3683e1051a39Sopenharmony_ci my %opcode = ( 3684e1051a39Sopenharmony_ci "aesd" => 0xf3b00340, "aese" => 0xf3b00300, 3685e1051a39Sopenharmony_ci "aesimc"=> 0xf3b003c0, "aesmc" => 0xf3b00380 ); 3686e1051a39Sopenharmony_ci 3687e1051a39Sopenharmony_ci local *unaes = sub { 3688e1051a39Sopenharmony_ci my ($mnemonic,$arg)=@_; 3689e1051a39Sopenharmony_ci 3690e1051a39Sopenharmony_ci if ($arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)/o) { 3691e1051a39Sopenharmony_ci my $word = $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19) 3692e1051a39Sopenharmony_ci |(($2&7)<<1) |(($2&8)<<2); 3693e1051a39Sopenharmony_ci # since ARMv7 instructions are always encoded little-endian. 3694e1051a39Sopenharmony_ci # correct solution is to use .inst directive, but older 3695e1051a39Sopenharmony_ci # assemblers don't implement it:-( 3696e1051a39Sopenharmony_ci sprintf "INST(0x%02x,0x%02x,0x%02x,0x%02x)\t@ %s %s", 3697e1051a39Sopenharmony_ci $word&0xff,($word>>8)&0xff, 3698e1051a39Sopenharmony_ci ($word>>16)&0xff,($word>>24)&0xff, 3699e1051a39Sopenharmony_ci $mnemonic,$arg; 3700e1051a39Sopenharmony_ci } 3701e1051a39Sopenharmony_ci }; 3702e1051a39Sopenharmony_ci 3703e1051a39Sopenharmony_ci sub unvtbl { 3704e1051a39Sopenharmony_ci my $arg=shift; 3705e1051a39Sopenharmony_ci 3706e1051a39Sopenharmony_ci $arg =~ m/q([0-9]+),\s*\{q([0-9]+)\},\s*q([0-9]+)/o && 3707e1051a39Sopenharmony_ci sprintf "vtbl.8 d%d,{q%d},d%d\n\t". 3708e1051a39Sopenharmony_ci "vtbl.8 d%d,{q%d},d%d", 2*$1,$2,2*$3, 2*$1+1,$2,2*$3+1; 3709e1051a39Sopenharmony_ci } 3710e1051a39Sopenharmony_ci 3711e1051a39Sopenharmony_ci sub unvdup32 { 3712e1051a39Sopenharmony_ci my $arg=shift; 3713e1051a39Sopenharmony_ci 3714e1051a39Sopenharmony_ci $arg =~ m/q([0-9]+),\s*q([0-9]+)\[([0-3])\]/o && 3715e1051a39Sopenharmony_ci sprintf "vdup.32 q%d,d%d[%d]",$1,2*$2+($3>>1),$3&1; 3716e1051a39Sopenharmony_ci } 3717e1051a39Sopenharmony_ci 3718e1051a39Sopenharmony_ci sub unvmov32 { 3719e1051a39Sopenharmony_ci my $arg=shift; 3720e1051a39Sopenharmony_ci 3721e1051a39Sopenharmony_ci $arg =~ m/q([0-9]+)\[([0-3])\],(.*)/o && 3722e1051a39Sopenharmony_ci sprintf "vmov.32 d%d[%d],%s",2*$1+($2>>1),$2&1,$3; 3723e1051a39Sopenharmony_ci } 3724e1051a39Sopenharmony_ci 3725e1051a39Sopenharmony_ci foreach(split("\n",$code)) { 3726e1051a39Sopenharmony_ci s/\`([^\`]*)\`/eval($1)/geo; 3727e1051a39Sopenharmony_ci 3728e1051a39Sopenharmony_ci s/\b[wx]([0-9]+)\b/r$1/go; # new->old registers 3729e1051a39Sopenharmony_ci s/\bv([0-9])\.[12468]+[bsd]\b/q$1/go; # new->old registers 3730e1051a39Sopenharmony_ci s/\/\/\s?/@ /o; # new->old style commentary 3731e1051a39Sopenharmony_ci 3732e1051a39Sopenharmony_ci # fix up remaining new-style suffixes 3733e1051a39Sopenharmony_ci s/\{q([0-9]+)\},\s*\[(.+)\],#8/sprintf "{d%d},[$2]!",2*$1/eo or 3734e1051a39Sopenharmony_ci s/\],#[0-9]+/]!/o; 3735e1051a39Sopenharmony_ci 3736e1051a39Sopenharmony_ci s/[v]?(aes\w+)\s+([qv].*)/unaes($1,$2)/geo or 3737e1051a39Sopenharmony_ci s/cclr\s+([^,]+),\s*([a-z]+)/mov.$2 $1,#0/o or 3738e1051a39Sopenharmony_ci s/vtbl\.8\s+(.*)/unvtbl($1)/geo or 3739e1051a39Sopenharmony_ci s/vdup\.32\s+(.*)/unvdup32($1)/geo or 3740e1051a39Sopenharmony_ci s/vmov\.32\s+(.*)/unvmov32($1)/geo or 3741e1051a39Sopenharmony_ci s/^(\s+)b\./$1b/o or 3742e1051a39Sopenharmony_ci s/^(\s+)ret/$1bx\tlr/o; 3743e1051a39Sopenharmony_ci 3744e1051a39Sopenharmony_ci if (s/^(\s+)mov\.([a-z]+)/$1mov$2/) { 3745e1051a39Sopenharmony_ci print " it $2\n"; 3746e1051a39Sopenharmony_ci } 3747e1051a39Sopenharmony_ci 3748e1051a39Sopenharmony_ci print $_,"\n"; 3749e1051a39Sopenharmony_ci } 3750e1051a39Sopenharmony_ci} 3751e1051a39Sopenharmony_ci 3752e1051a39Sopenharmony_ciclose STDOUT or die "error closing STDOUT: $!"; 3753