1e1051a39Sopenharmony_ci#! /usr/bin/env perl 2e1051a39Sopenharmony_ci# Copyright 2019-2023 The OpenSSL Project Authors. All Rights Reserved. 3e1051a39Sopenharmony_ci# 4e1051a39Sopenharmony_ci# Licensed under the Apache License 2.0 (the "License"). You may not use 5e1051a39Sopenharmony_ci# this file except in compliance with the License. You can obtain a copy 6e1051a39Sopenharmony_ci# in the file LICENSE in the source distribution or at 7e1051a39Sopenharmony_ci# https://www.openssl.org/source/license.html 8e1051a39Sopenharmony_ci 9e1051a39Sopenharmony_ci# 10e1051a39Sopenharmony_ci#======================================================================== 11e1051a39Sopenharmony_ci# Written by Fangming Fang <fangming.fang@arm.com> for the OpenSSL project, 12e1051a39Sopenharmony_ci# derived from https://github.com/ARM-software/AArch64cryptolib, original 13e1051a39Sopenharmony_ci# author Samuel Lee <Samuel.Lee@arm.com>. The module is, however, dual 14e1051a39Sopenharmony_ci# licensed under OpenSSL and CRYPTOGAMS licenses depending on where you 15e1051a39Sopenharmony_ci# obtain it. For further details see http://www.openssl.org/~appro/cryptogams/. 16e1051a39Sopenharmony_ci#======================================================================== 17e1051a39Sopenharmony_ci# 18e1051a39Sopenharmony_ci# Approach - assume we don't want to reload constants, so reserve ~half of vector register file for constants 19e1051a39Sopenharmony_ci# 20e1051a39Sopenharmony_ci# main loop to act on 4 16B blocks per iteration, and then do modulo of the accumulated intermediate hashes from the 4 blocks 21e1051a39Sopenharmony_ci# 22e1051a39Sopenharmony_ci# ____________________________________________________ 23e1051a39Sopenharmony_ci# | | 24e1051a39Sopenharmony_ci# | PRE | 25e1051a39Sopenharmony_ci# |____________________________________________________| 26e1051a39Sopenharmony_ci# | | | | 27e1051a39Sopenharmony_ci# | CTR block 4k+8 | AES block 4k+4 | GHASH block 4k+0 | 28e1051a39Sopenharmony_ci# |________________|________________|__________________| 29e1051a39Sopenharmony_ci# | | | | 30e1051a39Sopenharmony_ci# | CTR block 4k+9 | AES block 4k+5 | GHASH block 4k+1 | 31e1051a39Sopenharmony_ci# |________________|________________|__________________| 32e1051a39Sopenharmony_ci# | | | | 33e1051a39Sopenharmony_ci# | CTR block 4k+10| AES block 4k+6 | GHASH block 4k+2 | 34e1051a39Sopenharmony_ci# |________________|________________|__________________| 35e1051a39Sopenharmony_ci# | | | | 36e1051a39Sopenharmony_ci# | CTR block 4k+11| AES block 4k+7 | GHASH block 4k+3 | 37e1051a39Sopenharmony_ci# |________________|____(mostly)____|__________________| 38e1051a39Sopenharmony_ci# | | 39e1051a39Sopenharmony_ci# | MODULO | 40e1051a39Sopenharmony_ci# |____________________________________________________| 41e1051a39Sopenharmony_ci# 42e1051a39Sopenharmony_ci# PRE: 43e1051a39Sopenharmony_ci# Ensure previous generated intermediate hash is aligned and merged with result for GHASH 4k+0 44e1051a39Sopenharmony_ci# EXT low_acc, low_acc, low_acc, #8 45e1051a39Sopenharmony_ci# EOR res_curr (4k+0), res_curr (4k+0), low_acc 46e1051a39Sopenharmony_ci# 47e1051a39Sopenharmony_ci# CTR block: 48e1051a39Sopenharmony_ci# Increment and byte reverse counter in scalar registers and transfer to SIMD registers 49e1051a39Sopenharmony_ci# REV ctr32, rev_ctr32 50e1051a39Sopenharmony_ci# ORR ctr64, constctr96_top32, ctr32, LSL #32 51e1051a39Sopenharmony_ci# INS ctr_next.d[0], constctr96_bottom64 // Keeping this in scalar registers to free up space in SIMD RF 52e1051a39Sopenharmony_ci# INS ctr_next.d[1], ctr64X 53e1051a39Sopenharmony_ci# ADD rev_ctr32, #1 54e1051a39Sopenharmony_ci# 55e1051a39Sopenharmony_ci# AES block: 56e1051a39Sopenharmony_ci# Do AES encryption/decryption on CTR block X and EOR it with input block X. Take 256 bytes key below for example. 57e1051a39Sopenharmony_ci# Doing small trick here of loading input in scalar registers, EORing with last key and then transferring 58e1051a39Sopenharmony_ci# Given we are very constrained in our ASIMD registers this is quite important 59e1051a39Sopenharmony_ci# 60e1051a39Sopenharmony_ci# Encrypt: 61e1051a39Sopenharmony_ci# LDR input_low, [ input_ptr ], #8 62e1051a39Sopenharmony_ci# LDR input_high, [ input_ptr ], #8 63e1051a39Sopenharmony_ci# EOR input_low, k14_low 64e1051a39Sopenharmony_ci# EOR input_high, k14_high 65e1051a39Sopenharmony_ci# INS res_curr.d[0], input_low 66e1051a39Sopenharmony_ci# INS res_curr.d[1], input_high 67e1051a39Sopenharmony_ci# AESE ctr_curr, k0; AESMC ctr_curr, ctr_curr 68e1051a39Sopenharmony_ci# AESE ctr_curr, k1; AESMC ctr_curr, ctr_curr 69e1051a39Sopenharmony_ci# AESE ctr_curr, k2; AESMC ctr_curr, ctr_curr 70e1051a39Sopenharmony_ci# AESE ctr_curr, k3; AESMC ctr_curr, ctr_curr 71e1051a39Sopenharmony_ci# AESE ctr_curr, k4; AESMC ctr_curr, ctr_curr 72e1051a39Sopenharmony_ci# AESE ctr_curr, k5; AESMC ctr_curr, ctr_curr 73e1051a39Sopenharmony_ci# AESE ctr_curr, k6; AESMC ctr_curr, ctr_curr 74e1051a39Sopenharmony_ci# AESE ctr_curr, k7; AESMC ctr_curr, ctr_curr 75e1051a39Sopenharmony_ci# AESE ctr_curr, k8; AESMC ctr_curr, ctr_curr 76e1051a39Sopenharmony_ci# AESE ctr_curr, k9; AESMC ctr_curr, ctr_curr 77e1051a39Sopenharmony_ci# AESE ctr_curr, k10; AESMC ctr_curr, ctr_curr 78e1051a39Sopenharmony_ci# AESE ctr_curr, k11; AESMC ctr_curr, ctr_curr 79e1051a39Sopenharmony_ci# AESE ctr_curr, k12; AESMC ctr_curr, ctr_curr 80e1051a39Sopenharmony_ci# AESE ctr_curr, k13 81e1051a39Sopenharmony_ci# EOR res_curr, res_curr, ctr_curr 82e1051a39Sopenharmony_ci# ST1 { res_curr.16b }, [ output_ptr ], #16 83e1051a39Sopenharmony_ci# 84e1051a39Sopenharmony_ci# Decrypt: 85e1051a39Sopenharmony_ci# AESE ctr_curr, k0; AESMC ctr_curr, ctr_curr 86e1051a39Sopenharmony_ci# AESE ctr_curr, k1; AESMC ctr_curr, ctr_curr 87e1051a39Sopenharmony_ci# AESE ctr_curr, k2; AESMC ctr_curr, ctr_curr 88e1051a39Sopenharmony_ci# AESE ctr_curr, k3; AESMC ctr_curr, ctr_curr 89e1051a39Sopenharmony_ci# AESE ctr_curr, k4; AESMC ctr_curr, ctr_curr 90e1051a39Sopenharmony_ci# AESE ctr_curr, k5; AESMC ctr_curr, ctr_curr 91e1051a39Sopenharmony_ci# AESE ctr_curr, k6; AESMC ctr_curr, ctr_curr 92e1051a39Sopenharmony_ci# AESE ctr_curr, k7; AESMC ctr_curr, ctr_curr 93e1051a39Sopenharmony_ci# AESE ctr_curr, k8; AESMC ctr_curr, ctr_curr 94e1051a39Sopenharmony_ci# AESE ctr_curr, k9; AESMC ctr_curr, ctr_curr 95e1051a39Sopenharmony_ci# AESE ctr_curr, k10; AESMC ctr_curr, ctr_curr 96e1051a39Sopenharmony_ci# AESE ctr_curr, k11; AESMC ctr_curr, ctr_curr 97e1051a39Sopenharmony_ci# AESE ctr_curr, k12; AESMC ctr_curr, ctr_curr 98e1051a39Sopenharmony_ci# AESE ctr_curr, k13 99e1051a39Sopenharmony_ci# LDR res_curr, [ input_ptr ], #16 100e1051a39Sopenharmony_ci# EOR res_curr, res_curr, ctr_curr 101e1051a39Sopenharmony_ci# MOV output_low, res_curr.d[0] 102e1051a39Sopenharmony_ci# MOV output_high, res_curr.d[1] 103e1051a39Sopenharmony_ci# EOR output_low, k14_low 104e1051a39Sopenharmony_ci# EOR output_high, k14_high 105e1051a39Sopenharmony_ci# STP output_low, output_high, [ output_ptr ], #16 106e1051a39Sopenharmony_ci# 107e1051a39Sopenharmony_ci# GHASH block X: 108e1051a39Sopenharmony_ci# do 128b karatsuba polynomial multiplication on block 109e1051a39Sopenharmony_ci# We only have 64b->128b polynomial multipliers, naively that means we need to do 4 64b multiplies to generate a 128b 110e1051a39Sopenharmony_ci# 111e1051a39Sopenharmony_ci# multiplication: 112e1051a39Sopenharmony_ci# Pmull(A,B) == (Pmull(Ah,Bh)<<128 | Pmull(Al,Bl)) ^ (Pmull(Ah,Bl) ^ Pmull(Al,Bh))<<64 113e1051a39Sopenharmony_ci# 114e1051a39Sopenharmony_ci# The idea behind Karatsuba multiplication is that we can do just 3 64b multiplies: 115e1051a39Sopenharmony_ci# Pmull(A,B) == (Pmull(Ah,Bh)<<128 | Pmull(Al,Bl)) ^ (Pmull(Ah^Al,Bh^Bl) ^ Pmull(Ah,Bh) ^ Pmull(Al,Bl))<<64 116e1051a39Sopenharmony_ci# 117e1051a39Sopenharmony_ci# There is some complication here because the bit order of GHASH's PMULL is reversed compared to elsewhere, so we are 118e1051a39Sopenharmony_ci# multiplying with "twisted" powers of H 119e1051a39Sopenharmony_ci# 120e1051a39Sopenharmony_ci# Note: We can PMULL directly into the acc_x in first GHASH of the loop 121e1051a39Sopenharmony_ci# Note: For scheduling big cores we want to split the processing to happen over two loop iterations - otherwise the critical 122e1051a39Sopenharmony_ci# path latency dominates the performance 123e1051a39Sopenharmony_ci# 124e1051a39Sopenharmony_ci# This has a knock on effect on register pressure, so we have to be a bit more clever with our temporary registers 125e1051a39Sopenharmony_ci# than indicated here 126e1051a39Sopenharmony_ci# REV64 res_curr, res_curr 127e1051a39Sopenharmony_ci# INS t_m.d[0], res_curr.d[1] 128e1051a39Sopenharmony_ci# EOR t_m.8B, t_m.8B, res_curr.8B 129e1051a39Sopenharmony_ci# PMULL2 t_h, res_curr, HX 130e1051a39Sopenharmony_ci# PMULL t_l, res_curr, HX 131e1051a39Sopenharmony_ci# PMULL t_m, t_m, HX_k 132e1051a39Sopenharmony_ci# EOR acc_h, acc_h, t_h 133e1051a39Sopenharmony_ci# EOR acc_l, acc_l, t_l 134e1051a39Sopenharmony_ci# EOR acc_m, acc_m, t_m 135e1051a39Sopenharmony_ci# 136e1051a39Sopenharmony_ci# MODULO: take the partial accumulators (~representing sum of 256b multiplication results), from GHASH and do modulo reduction on them 137e1051a39Sopenharmony_ci# There is some complication here because the bit order of GHASH's PMULL is reversed compared to elsewhere, so we are doing modulo 138e1051a39Sopenharmony_ci# with a reversed constant 139e1051a39Sopenharmony_ci# EOR acc_m, acc_m, acc_h 140e1051a39Sopenharmony_ci# EOR acc_m, acc_m, acc_l // Finish off karatsuba processing 141e1051a39Sopenharmony_ci# PMULL t_mod, acc_h, mod_constant 142e1051a39Sopenharmony_ci# EXT acc_h, acc_h, acc_h, #8 143e1051a39Sopenharmony_ci# EOR acc_m, acc_m, acc_h 144e1051a39Sopenharmony_ci# EOR acc_m, acc_m, t_mod 145e1051a39Sopenharmony_ci# PMULL acc_h, acc_m, mod_constant 146e1051a39Sopenharmony_ci# EXT acc_m, acc_m, acc_m, #8 147e1051a39Sopenharmony_ci# EOR acc_l, acc_l, acc_h 148e1051a39Sopenharmony_ci# EOR acc_l, acc_l, acc_m 149e1051a39Sopenharmony_ci 150e1051a39Sopenharmony_ci$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; 151e1051a39Sopenharmony_ci$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; 152e1051a39Sopenharmony_ci 153e1051a39Sopenharmony_ci$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 154e1051a39Sopenharmony_ci( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or 155e1051a39Sopenharmony_ci( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate ) or 156e1051a39Sopenharmony_cidie "can't locate arm-xlate.pl"; 157e1051a39Sopenharmony_ci 158e1051a39Sopenharmony_ciopen OUT,"| \"$^X\" $xlate $flavour $output"; 159e1051a39Sopenharmony_ci*STDOUT=*OUT; 160e1051a39Sopenharmony_ci 161e1051a39Sopenharmony_ci$input_ptr="x0"; #argument block 162e1051a39Sopenharmony_ci$bit_length="x1"; 163e1051a39Sopenharmony_ci$output_ptr="x2"; 164e1051a39Sopenharmony_ci$current_tag="x3"; 165e1051a39Sopenharmony_ci$counter="x16"; 166e1051a39Sopenharmony_ci$cc="x8"; 167e1051a39Sopenharmony_ci 168e1051a39Sopenharmony_ci{ 169e1051a39Sopenharmony_cimy ($end_input_ptr,$main_end_input_ptr,$input_l0,$input_h0)=map("x$_",(4..7)); 170e1051a39Sopenharmony_cimy ($input_l1,$input_h1,$input_l2,$input_h2,$input_l3,$input_h3)=map("x$_",(19..24)); 171e1051a39Sopenharmony_cimy ($output_l1,$output_h1,$output_l2,$output_h2,$output_l3,$output_h3)=map("x$_",(19..24)); 172e1051a39Sopenharmony_cimy ($output_l0,$output_h0)=map("x$_",(6..7)); 173e1051a39Sopenharmony_ci 174e1051a39Sopenharmony_cimy $ctr32w="w9"; 175e1051a39Sopenharmony_cimy ($ctr32x,$ctr96_b64x,$ctr96_t32x,$rctr32x,$rk10_l,$rk10_h,$len)=map("x$_",(9..15)); 176e1051a39Sopenharmony_cimy ($ctr96_t32w,$rctr32w)=map("w$_",(11..12)); 177e1051a39Sopenharmony_ci 178e1051a39Sopenharmony_cimy ($ctr0b,$ctr1b,$ctr2b,$ctr3b,$res0b,$res1b,$res2b,$res3b)=map("v$_.16b",(0..7)); 179e1051a39Sopenharmony_cimy ($ctr0,$ctr1,$ctr2,$ctr3,$res0,$res1,$res2,$res3)=map("v$_",(0..7)); 180e1051a39Sopenharmony_cimy ($ctr0d,$ctr1d,$ctr2d,$ctr3d,$res0d,$res1d,$res2d,$res3d)=map("d$_",(0..7)); 181e1051a39Sopenharmony_cimy ($res0q,$res1q,$res2q,$res3q)=map("q$_",(4..7)); 182e1051a39Sopenharmony_ci 183e1051a39Sopenharmony_cimy ($acc_hb,$acc_mb,$acc_lb)=map("v$_.16b",(9..11)); 184e1051a39Sopenharmony_cimy ($acc_h,$acc_m,$acc_l)=map("v$_",(9..11)); 185e1051a39Sopenharmony_cimy ($acc_hd,$acc_md,$acc_ld)=map("d$_",(9..11)); 186e1051a39Sopenharmony_ci 187e1051a39Sopenharmony_cimy ($h1,$h2,$h3,$h4,$h12k,$h34k)=map("v$_",(12..17)); 188e1051a39Sopenharmony_cimy ($h1q,$h2q,$h3q,$h4q)=map("q$_",(12..15)); 189e1051a39Sopenharmony_cimy ($h1b,$h2b,$h3b,$h4b)=map("v$_.16b",(12..15)); 190e1051a39Sopenharmony_ci 191e1051a39Sopenharmony_cimy $t0="v8"; 192e1051a39Sopenharmony_cimy $t0d="d8"; 193e1051a39Sopenharmony_ci 194e1051a39Sopenharmony_cimy ($t1,$t2,$t3)=map("v$_",(28..30)); 195e1051a39Sopenharmony_cimy ($t1d,$t2d,$t3d)=map("d$_",(28..30)); 196e1051a39Sopenharmony_ci 197e1051a39Sopenharmony_cimy $t4="v8"; 198e1051a39Sopenharmony_cimy $t4d="d8"; 199e1051a39Sopenharmony_cimy $t5="v28"; 200e1051a39Sopenharmony_cimy $t5d="d28"; 201e1051a39Sopenharmony_cimy $t6="v31"; 202e1051a39Sopenharmony_cimy $t6d="d31"; 203e1051a39Sopenharmony_ci 204e1051a39Sopenharmony_cimy $t7="v4"; 205e1051a39Sopenharmony_cimy $t7d="d4"; 206e1051a39Sopenharmony_cimy $t8="v29"; 207e1051a39Sopenharmony_cimy $t8d="d29"; 208e1051a39Sopenharmony_cimy $t9="v30"; 209e1051a39Sopenharmony_cimy $t9d="d30"; 210e1051a39Sopenharmony_ci 211e1051a39Sopenharmony_cimy ($ctr_t0,$ctr_t1,$ctr_t2,$ctr_t3)=map("v$_",(4..7)); 212e1051a39Sopenharmony_cimy ($ctr_t0d,$ctr_t1d,$ctr_t2d,$ctr_t3d)=map("d$_",(4..7)); 213e1051a39Sopenharmony_cimy ($ctr_t0b,$ctr_t1b,$ctr_t2b,$ctr_t3b)=map("v$_.16b",(4..7)); 214e1051a39Sopenharmony_ci 215e1051a39Sopenharmony_cimy $mod_constantd="d8"; 216e1051a39Sopenharmony_cimy $mod_constant="v8"; 217e1051a39Sopenharmony_cimy $mod_t="v31"; 218e1051a39Sopenharmony_ci 219e1051a39Sopenharmony_cimy ($rk0,$rk1,$rk2,$rk3,$rk4,$rk5,$rk6,$rk7,$rk8,$rk9)=map("v$_.16b",(18..27)); 220e1051a39Sopenharmony_cimy ($rk0s,$rk1s,$rk2s,$rk3s,$rk4s,$rk5s,$rk6s,$rk7s,$rk8s,$rk9s)=map("v$_.4s",(18..27)); 221e1051a39Sopenharmony_cimy ($rk0q,$rk1q,$rk2q,$rk3q,$rk4q,$rk5q,$rk6q,$rk7q,$rk8q,$rk9q)=map("q$_",(18..27)); 222e1051a39Sopenharmony_cimy $rk2q1="v20.1q"; 223e1051a39Sopenharmony_cimy $rk3q1="v21.1q"; 224e1051a39Sopenharmony_cimy $rk4v="v22"; 225e1051a39Sopenharmony_cimy $rk4d="d22"; 226e1051a39Sopenharmony_ci 227e1051a39Sopenharmony_ci$code=<<___; 228e1051a39Sopenharmony_ci#include "arm_arch.h" 229e1051a39Sopenharmony_ci 230e1051a39Sopenharmony_ci#if __ARM_MAX_ARCH__>=8 231e1051a39Sopenharmony_ci___ 232e1051a39Sopenharmony_ci$code.=".arch armv8-a+crypto\n.text\n" if ($flavour =~ /64/); 233e1051a39Sopenharmony_ci$code.=<<___ if ($flavour !~ /64/); 234e1051a39Sopenharmony_ci.fpu neon 235e1051a39Sopenharmony_ci#ifdef __thumb2__ 236e1051a39Sopenharmony_ci.syntax unified 237e1051a39Sopenharmony_ci.thumb 238e1051a39Sopenharmony_ci# define INST(a,b,c,d) $_byte c,0xef,a,b 239e1051a39Sopenharmony_ci#else 240e1051a39Sopenharmony_ci.code 32 241e1051a39Sopenharmony_ci# define INST(a,b,c,d) $_byte a,b,c,0xf2 242e1051a39Sopenharmony_ci#endif 243e1051a39Sopenharmony_ci 244e1051a39Sopenharmony_ci.text 245e1051a39Sopenharmony_ci___ 246e1051a39Sopenharmony_ci 247e1051a39Sopenharmony_ci######################################################################################### 248e1051a39Sopenharmony_ci# size_t aes_gcm_enc_128_kernel(const unsigned char *in, 249e1051a39Sopenharmony_ci# size_t len, 250e1051a39Sopenharmony_ci# unsigned char *out, 251e1051a39Sopenharmony_ci# const void *key, 252e1051a39Sopenharmony_ci# unsigned char ivec[16], 253e1051a39Sopenharmony_ci# u64 *Xi); 254e1051a39Sopenharmony_ci# 255e1051a39Sopenharmony_ci$code.=<<___; 256e1051a39Sopenharmony_ci.global aes_gcm_enc_128_kernel 257e1051a39Sopenharmony_ci.type aes_gcm_enc_128_kernel,%function 258e1051a39Sopenharmony_ci.align 4 259e1051a39Sopenharmony_ciaes_gcm_enc_128_kernel: 260e1051a39Sopenharmony_ci cbz x1, .L128_enc_ret 261e1051a39Sopenharmony_ci stp x19, x20, [sp, #-112]! 262e1051a39Sopenharmony_ci mov x16, x4 263e1051a39Sopenharmony_ci mov x8, x5 264e1051a39Sopenharmony_ci stp x21, x22, [sp, #16] 265e1051a39Sopenharmony_ci stp x23, x24, [sp, #32] 266e1051a39Sopenharmony_ci stp d8, d9, [sp, #48] 267e1051a39Sopenharmony_ci stp d10, d11, [sp, #64] 268e1051a39Sopenharmony_ci stp d12, d13, [sp, #80] 269e1051a39Sopenharmony_ci stp d14, d15, [sp, #96] 270e1051a39Sopenharmony_ci 271e1051a39Sopenharmony_ci ldp $ctr96_b64x, $ctr96_t32x, [$counter] @ ctr96_b64, ctr96_t32 272e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 273e1051a39Sopenharmony_ci rev $ctr96_b64x, $ctr96_b64x 274e1051a39Sopenharmony_ci rev $ctr96_t32x, $ctr96_t32x 275e1051a39Sopenharmony_ci#endif 276e1051a39Sopenharmony_ci ldp $rk10_l, $rk10_h, [$cc, #160] @ load rk10 277e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 278e1051a39Sopenharmony_ci ror $rk10_l, $rk10_l, #32 279e1051a39Sopenharmony_ci ror $rk10_h, $rk10_h, #32 280e1051a39Sopenharmony_ci#endif 281e1051a39Sopenharmony_ci ld1 {$acc_lb}, [$current_tag] 282e1051a39Sopenharmony_ci ext $acc_lb, $acc_lb, $acc_lb, #8 283e1051a39Sopenharmony_ci rev64 $acc_lb, $acc_lb 284e1051a39Sopenharmony_ci lsr $main_end_input_ptr, $bit_length, #3 @ byte_len 285e1051a39Sopenharmony_ci mov $len, $main_end_input_ptr 286e1051a39Sopenharmony_ci 287e1051a39Sopenharmony_ci ld1 {$rk0s}, [$cc], #16 @ load rk0 288e1051a39Sopenharmony_ci add $end_input_ptr, $input_ptr, $bit_length, lsr #3 @ end_input_ptr 289e1051a39Sopenharmony_ci sub $main_end_input_ptr, $main_end_input_ptr, #1 @ byte_len - 1 290e1051a39Sopenharmony_ci 291e1051a39Sopenharmony_ci lsr $rctr32x, $ctr96_t32x, #32 292e1051a39Sopenharmony_ci ldr $h4q, [$current_tag, #112] @ load h4l | h4h 293e1051a39Sopenharmony_ci#ifndef __AARCH64EB__ 294e1051a39Sopenharmony_ci ext $h4b, $h4b, $h4b, #8 295e1051a39Sopenharmony_ci#endif 296e1051a39Sopenharmony_ci fmov $ctr1d, $ctr96_b64x @ CTR block 1 297e1051a39Sopenharmony_ci rev $rctr32w, $rctr32w @ rev_ctr32 298e1051a39Sopenharmony_ci 299e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ increment rev_ctr32 300e1051a39Sopenharmony_ci orr $ctr96_t32w, $ctr96_t32w, $ctr96_t32w 301e1051a39Sopenharmony_ci ld1 {$rk1s}, [$cc], #16 @ load rk1 302e1051a39Sopenharmony_ci 303e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 1 304e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 1 305e1051a39Sopenharmony_ci fmov $ctr3d, $ctr96_b64x @ CTR block 3 306e1051a39Sopenharmony_ci 307e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 1 308e1051a39Sopenharmony_ci ld1 { $ctr0b}, [$counter] @ special case vector load initial counter so we can start first AES block as quickly as possible 309e1051a39Sopenharmony_ci 310e1051a39Sopenharmony_ci fmov $ctr1.d[1], $ctr32x @ CTR block 1 311e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 2 312e1051a39Sopenharmony_ci 313e1051a39Sopenharmony_ci fmov $ctr2d, $ctr96_b64x @ CTR block 2 314e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 2 315e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 2 316e1051a39Sopenharmony_ci 317e1051a39Sopenharmony_ci fmov $ctr2.d[1], $ctr32x @ CTR block 2 318e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 3 319e1051a39Sopenharmony_ci 320e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 3 321e1051a39Sopenharmony_ci ld1 {$rk2s}, [$cc], #16 @ load rk2 322e1051a39Sopenharmony_ci 323e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 3 324e1051a39Sopenharmony_ci fmov $ctr3.d[1], $ctr32x @ CTR block 3 325e1051a39Sopenharmony_ci 326e1051a39Sopenharmony_ci ldr $h3q, [$current_tag, #80] @ load h3l | h3h 327e1051a39Sopenharmony_ci#ifndef __AARCH64EB__ 328e1051a39Sopenharmony_ci ext $h3b, $h3b, $h3b, #8 329e1051a39Sopenharmony_ci#endif 330e1051a39Sopenharmony_ci aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 0 331e1051a39Sopenharmony_ci ld1 {$rk3s}, [$cc], #16 @ load rk3 332e1051a39Sopenharmony_ci 333e1051a39Sopenharmony_ci aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 0 334e1051a39Sopenharmony_ci ldr $h1q, [$current_tag, #32] @ load h1l | h1h 335e1051a39Sopenharmony_ci#ifndef __AARCH64EB__ 336e1051a39Sopenharmony_ci ext $h1b, $h1b, $h1b, #8 337e1051a39Sopenharmony_ci#endif 338e1051a39Sopenharmony_ci 339e1051a39Sopenharmony_ci aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 0 340e1051a39Sopenharmony_ci ld1 {$rk4s}, [$cc], #16 @ load rk4 341e1051a39Sopenharmony_ci 342e1051a39Sopenharmony_ci aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 0 343e1051a39Sopenharmony_ci ld1 {$rk5s}, [$cc], #16 @ load rk5 344e1051a39Sopenharmony_ci 345e1051a39Sopenharmony_ci aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 1 346e1051a39Sopenharmony_ci trn2 $h34k.2d, $h3.2d, $h4.2d @ h4l | h3l 347e1051a39Sopenharmony_ci 348e1051a39Sopenharmony_ci aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 1 349e1051a39Sopenharmony_ci ld1 {$rk6s}, [$cc], #16 @ load rk6 350e1051a39Sopenharmony_ci 351e1051a39Sopenharmony_ci aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 1 352e1051a39Sopenharmony_ci ld1 {$rk7s}, [$cc], #16 @ load rk7 353e1051a39Sopenharmony_ci 354e1051a39Sopenharmony_ci aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 1 355e1051a39Sopenharmony_ci trn1 $acc_h.2d, $h3.2d, $h4.2d @ h4h | h3h 356e1051a39Sopenharmony_ci 357e1051a39Sopenharmony_ci aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 2 358e1051a39Sopenharmony_ci ld1 {$rk8s}, [$cc], #16 @ load rk8 359e1051a39Sopenharmony_ci 360e1051a39Sopenharmony_ci aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 2 361e1051a39Sopenharmony_ci ldr $h2q, [$current_tag, #64] @ load h2l | h2h 362e1051a39Sopenharmony_ci#ifndef __AARCH64EB__ 363e1051a39Sopenharmony_ci ext $h2b, $h2b, $h2b, #8 364e1051a39Sopenharmony_ci#endif 365e1051a39Sopenharmony_ci 366e1051a39Sopenharmony_ci aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 2 367e1051a39Sopenharmony_ci 368e1051a39Sopenharmony_ci aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 2 369e1051a39Sopenharmony_ci eor $h34k.16b, $h34k.16b, $acc_h.16b @ h4k | h3k 370e1051a39Sopenharmony_ci 371e1051a39Sopenharmony_ci aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 3 372e1051a39Sopenharmony_ci 373e1051a39Sopenharmony_ci aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 3 374e1051a39Sopenharmony_ci 375e1051a39Sopenharmony_ci aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 3 376e1051a39Sopenharmony_ci ld1 {$rk9s}, [$cc], #16 @ load rk9 377e1051a39Sopenharmony_ci 378e1051a39Sopenharmony_ci aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 3 379e1051a39Sopenharmony_ci 380e1051a39Sopenharmony_ci and $main_end_input_ptr, $main_end_input_ptr, #0xffffffffffffffc0 @ number of bytes to be processed in main loop (at least 1 byte must be handled by tail) 381e1051a39Sopenharmony_ci trn2 $h12k.2d, $h1.2d, $h2.2d @ h2l | h1l 382e1051a39Sopenharmony_ci 383e1051a39Sopenharmony_ci aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 4 384e1051a39Sopenharmony_ci add $main_end_input_ptr, $main_end_input_ptr, $input_ptr 385e1051a39Sopenharmony_ci 386e1051a39Sopenharmony_ci aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 4 387e1051a39Sopenharmony_ci cmp $input_ptr, $main_end_input_ptr @ check if we have <= 4 blocks 388e1051a39Sopenharmony_ci 389e1051a39Sopenharmony_ci aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 4 390e1051a39Sopenharmony_ci 391e1051a39Sopenharmony_ci aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 5 392e1051a39Sopenharmony_ci 393e1051a39Sopenharmony_ci aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 5 394e1051a39Sopenharmony_ci 395e1051a39Sopenharmony_ci aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 5 396e1051a39Sopenharmony_ci 397e1051a39Sopenharmony_ci aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 6 398e1051a39Sopenharmony_ci 399e1051a39Sopenharmony_ci aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 4 400e1051a39Sopenharmony_ci 401e1051a39Sopenharmony_ci aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 6 402e1051a39Sopenharmony_ci trn1 $t0.2d, $h1.2d, $h2.2d @ h2h | h1h 403e1051a39Sopenharmony_ci 404e1051a39Sopenharmony_ci aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 6 405e1051a39Sopenharmony_ci 406e1051a39Sopenharmony_ci aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 5 407e1051a39Sopenharmony_ci 408e1051a39Sopenharmony_ci aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 7 409e1051a39Sopenharmony_ci 410e1051a39Sopenharmony_ci aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 7 411e1051a39Sopenharmony_ci 412e1051a39Sopenharmony_ci aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 6 413e1051a39Sopenharmony_ci 414e1051a39Sopenharmony_ci aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 7 415e1051a39Sopenharmony_ci 416e1051a39Sopenharmony_ci aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 8 417e1051a39Sopenharmony_ci 418e1051a39Sopenharmony_ci aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 7 419e1051a39Sopenharmony_ci 420e1051a39Sopenharmony_ci aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 8 421e1051a39Sopenharmony_ci 422e1051a39Sopenharmony_ci aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 8 423e1051a39Sopenharmony_ci 424e1051a39Sopenharmony_ci aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 8 425e1051a39Sopenharmony_ci 426e1051a39Sopenharmony_ci aese $ctr2b, $rk9 @ AES block 2 - round 9 427e1051a39Sopenharmony_ci 428e1051a39Sopenharmony_ci aese $ctr0b, $rk9 @ AES block 0 - round 9 429e1051a39Sopenharmony_ci 430e1051a39Sopenharmony_ci eor $h12k.16b, $h12k.16b, $t0.16b @ h2k | h1k 431e1051a39Sopenharmony_ci 432e1051a39Sopenharmony_ci aese $ctr1b, $rk9 @ AES block 1 - round 9 433e1051a39Sopenharmony_ci 434e1051a39Sopenharmony_ci aese $ctr3b, $rk9 @ AES block 3 - round 9 435e1051a39Sopenharmony_ci b.ge .L128_enc_tail @ handle tail 436e1051a39Sopenharmony_ci 437e1051a39Sopenharmony_ci ldp $input_l0, $input_h0, [$input_ptr, #0] @ AES block 0 - load plaintext 438e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 439e1051a39Sopenharmony_ci rev $input_l0, $input_l0 440e1051a39Sopenharmony_ci rev $input_h0, $input_h0 441e1051a39Sopenharmony_ci#endif 442e1051a39Sopenharmony_ci ldp $input_l2, $input_h2, [$input_ptr, #32] @ AES block 2 - load plaintext 443e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 444e1051a39Sopenharmony_ci rev $input_l2, $input_l2 445e1051a39Sopenharmony_ci rev $input_h2, $input_h2 446e1051a39Sopenharmony_ci#endif 447e1051a39Sopenharmony_ci ldp $input_l1, $input_h1, [$input_ptr, #16] @ AES block 1 - load plaintext 448e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 449e1051a39Sopenharmony_ci rev $input_l1, $input_l1 450e1051a39Sopenharmony_ci rev $input_h1, $input_h1 451e1051a39Sopenharmony_ci#endif 452e1051a39Sopenharmony_ci ldp $input_l3, $input_h3, [$input_ptr, #48] @ AES block 3 - load plaintext 453e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 454e1051a39Sopenharmony_ci rev $input_l3, $input_l3 455e1051a39Sopenharmony_ci rev $input_h3, $input_h3 456e1051a39Sopenharmony_ci#endif 457e1051a39Sopenharmony_ci eor $input_l0, $input_l0, $rk10_l @ AES block 0 - round 10 low 458e1051a39Sopenharmony_ci eor $input_h0, $input_h0, $rk10_h @ AES block 0 - round 10 high 459e1051a39Sopenharmony_ci 460e1051a39Sopenharmony_ci eor $input_l2, $input_l2, $rk10_l @ AES block 2 - round 10 low 461e1051a39Sopenharmony_ci fmov $ctr_t0d, $input_l0 @ AES block 0 - mov low 462e1051a39Sopenharmony_ci 463e1051a39Sopenharmony_ci eor $input_l1, $input_l1, $rk10_l @ AES block 1 - round 10 low 464e1051a39Sopenharmony_ci eor $input_h2, $input_h2, $rk10_h @ AES block 2 - round 10 high 465e1051a39Sopenharmony_ci fmov $ctr_t0.d[1], $input_h0 @ AES block 0 - mov high 466e1051a39Sopenharmony_ci 467e1051a39Sopenharmony_ci fmov $ctr_t1d, $input_l1 @ AES block 1 - mov low 468e1051a39Sopenharmony_ci eor $input_h1, $input_h1, $rk10_h @ AES block 1 - round 10 high 469e1051a39Sopenharmony_ci 470e1051a39Sopenharmony_ci eor $input_l3, $input_l3, $rk10_l @ AES block 3 - round 10 low 471e1051a39Sopenharmony_ci fmov $ctr_t1.d[1], $input_h1 @ AES block 1 - mov high 472e1051a39Sopenharmony_ci 473e1051a39Sopenharmony_ci fmov $ctr_t2d, $input_l2 @ AES block 2 - mov low 474e1051a39Sopenharmony_ci eor $input_h3, $input_h3, $rk10_h @ AES block 3 - round 10 high 475e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 4 476e1051a39Sopenharmony_ci 477e1051a39Sopenharmony_ci fmov $ctr_t2.d[1], $input_h2 @ AES block 2 - mov high 478e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 4 479e1051a39Sopenharmony_ci 480e1051a39Sopenharmony_ci eor $res0b, $ctr_t0b, $ctr0b @ AES block 0 - result 481e1051a39Sopenharmony_ci fmov $ctr0d, $ctr96_b64x @ CTR block 4 482e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 4 483e1051a39Sopenharmony_ci 484e1051a39Sopenharmony_ci fmov $ctr0.d[1], $ctr32x @ CTR block 4 485e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 5 486e1051a39Sopenharmony_ci 487e1051a39Sopenharmony_ci eor $res1b, $ctr_t1b, $ctr1b @ AES block 1 - result 488e1051a39Sopenharmony_ci fmov $ctr1d, $ctr96_b64x @ CTR block 5 489e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 5 490e1051a39Sopenharmony_ci 491e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 5 492e1051a39Sopenharmony_ci add $input_ptr, $input_ptr, #64 @ AES input_ptr update 493e1051a39Sopenharmony_ci fmov $ctr1.d[1], $ctr32x @ CTR block 5 494e1051a39Sopenharmony_ci 495e1051a39Sopenharmony_ci fmov $ctr_t3d, $input_l3 @ AES block 3 - mov low 496e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 6 497e1051a39Sopenharmony_ci st1 { $res0b}, [$output_ptr], #16 @ AES block 0 - store result 498e1051a39Sopenharmony_ci 499e1051a39Sopenharmony_ci fmov $ctr_t3.d[1], $input_h3 @ AES block 3 - mov high 500e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 6 501e1051a39Sopenharmony_ci 502e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 6 503e1051a39Sopenharmony_ci eor $res2b, $ctr_t2b, $ctr2b @ AES block 2 - result 504e1051a39Sopenharmony_ci st1 { $res1b}, [$output_ptr], #16 @ AES block 1 - store result 505e1051a39Sopenharmony_ci 506e1051a39Sopenharmony_ci fmov $ctr2d, $ctr96_b64x @ CTR block 6 507e1051a39Sopenharmony_ci cmp $input_ptr, $main_end_input_ptr @ check if we have <= 8 blocks 508e1051a39Sopenharmony_ci 509e1051a39Sopenharmony_ci fmov $ctr2.d[1], $ctr32x @ CTR block 6 510e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 7 511e1051a39Sopenharmony_ci st1 { $res2b}, [$output_ptr], #16 @ AES block 2 - store result 512e1051a39Sopenharmony_ci 513e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 7 514e1051a39Sopenharmony_ci 515e1051a39Sopenharmony_ci eor $res3b, $ctr_t3b, $ctr3b @ AES block 3 - result 516e1051a39Sopenharmony_ci st1 { $res3b}, [$output_ptr], #16 @ AES block 3 - store result 517e1051a39Sopenharmony_ci b.ge .L128_enc_prepretail @ do prepretail 518e1051a39Sopenharmony_ci 519e1051a39Sopenharmony_ci .L128_enc_main_loop: @ main loop start 520e1051a39Sopenharmony_ci ldp $input_l3, $input_h3, [$input_ptr, #48] @ AES block 4k+3 - load plaintext 521e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 522e1051a39Sopenharmony_ci rev $input_l3, $input_l3 523e1051a39Sopenharmony_ci rev $input_h3, $input_h3 524e1051a39Sopenharmony_ci#endif 525e1051a39Sopenharmony_ci rev64 $res0b, $res0b @ GHASH block 4k (only t0 is free) 526e1051a39Sopenharmony_ci rev64 $res2b, $res2b @ GHASH block 4k+2 (t0, t1, and t2 free) 527e1051a39Sopenharmony_ci 528e1051a39Sopenharmony_ci aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 0 529e1051a39Sopenharmony_ci fmov $ctr3d, $ctr96_b64x @ CTR block 4k+3 530e1051a39Sopenharmony_ci 531e1051a39Sopenharmony_ci ext $acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0 532e1051a39Sopenharmony_ci rev64 $res1b, $res1b @ GHASH block 4k+1 (t0 and t1 free) 533e1051a39Sopenharmony_ci 534e1051a39Sopenharmony_ci aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 0 535e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 4k+3 536e1051a39Sopenharmony_ci fmov $ctr3.d[1], $ctr32x @ CTR block 4k+3 537e1051a39Sopenharmony_ci 538e1051a39Sopenharmony_ci aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 0 539e1051a39Sopenharmony_ci mov $t6d, $res2.d[1] @ GHASH block 4k+2 - mid 540e1051a39Sopenharmony_ci 541e1051a39Sopenharmony_ci aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 1 542e1051a39Sopenharmony_ci mov $t3d, $res1.d[1] @ GHASH block 4k+1 - mid 543e1051a39Sopenharmony_ci 544e1051a39Sopenharmony_ci aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 1 545e1051a39Sopenharmony_ci eor $res0b, $res0b, $acc_lb @ PRE 1 546e1051a39Sopenharmony_ci 547e1051a39Sopenharmony_ci aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 0 548e1051a39Sopenharmony_ci eor $input_h3, $input_h3, $rk10_h @ AES block 4k+3 - round 10 high 549e1051a39Sopenharmony_ci 550e1051a39Sopenharmony_ci pmull2 $t1.1q, $res1.2d, $h3.2d @ GHASH block 4k+1 - high 551e1051a39Sopenharmony_ci eor $t6.8b, $t6.8b, $res2.8b @ GHASH block 4k+2 - mid 552e1051a39Sopenharmony_ci ldp $input_l0, $input_h0, [$input_ptr, #0] @ AES block 4k+4 - load plaintext 553e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 554e1051a39Sopenharmony_ci rev $input_l0, $input_l0 555e1051a39Sopenharmony_ci rev $input_h0, $input_h0 556e1051a39Sopenharmony_ci#endif 557e1051a39Sopenharmony_ci aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 1 558e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 4k+8 559e1051a39Sopenharmony_ci 560e1051a39Sopenharmony_ci eor $t3.8b, $t3.8b, $res1.8b @ GHASH block 4k+1 - mid 561e1051a39Sopenharmony_ci mov $t0d, $res0.d[1] @ GHASH block 4k - mid 562e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 4k+8 563e1051a39Sopenharmony_ci 564e1051a39Sopenharmony_ci pmull2 $acc_h.1q, $res0.2d, $h4.2d @ GHASH block 4k - high 565e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 4k+8 566e1051a39Sopenharmony_ci mov $acc_md, $h34k.d[1] @ GHASH block 4k - mid 567e1051a39Sopenharmony_ci 568e1051a39Sopenharmony_ci aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 2 569e1051a39Sopenharmony_ci 570e1051a39Sopenharmony_ci pmull $acc_l.1q, $res0.1d, $h4.1d @ GHASH block 4k - low 571e1051a39Sopenharmony_ci eor $t0.8b, $t0.8b, $res0.8b @ GHASH block 4k - mid 572e1051a39Sopenharmony_ci 573e1051a39Sopenharmony_ci aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 2 574e1051a39Sopenharmony_ci 575e1051a39Sopenharmony_ci aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 3 576e1051a39Sopenharmony_ci eor $acc_hb, $acc_hb, $t1.16b @ GHASH block 4k+1 - high 577e1051a39Sopenharmony_ci 578e1051a39Sopenharmony_ci pmull $t5.1q, $res2.1d, $h2.1d @ GHASH block 4k+2 - low 579e1051a39Sopenharmony_ci 580e1051a39Sopenharmony_ci pmull $acc_m.1q, $t0.1d, $acc_m.1d @ GHASH block 4k - mid 581e1051a39Sopenharmony_ci rev64 $res3b, $res3b @ GHASH block 4k+3 (t0, t1, t2 and t3 free) 582e1051a39Sopenharmony_ci 583e1051a39Sopenharmony_ci pmull $t3.1q, $t3.1d, $h34k.1d @ GHASH block 4k+1 - mid 584e1051a39Sopenharmony_ci 585e1051a39Sopenharmony_ci pmull $t2.1q, $res1.1d, $h3.1d @ GHASH block 4k+1 - low 586e1051a39Sopenharmony_ci ins $t6.d[1], $t6.d[0] @ GHASH block 4k+2 - mid 587e1051a39Sopenharmony_ci 588e1051a39Sopenharmony_ci pmull2 $t4.1q, $res2.2d, $h2.2d @ GHASH block 4k+2 - high 589e1051a39Sopenharmony_ci eor $input_h0, $input_h0, $rk10_h @ AES block 4k+4 - round 10 high 590e1051a39Sopenharmony_ci 591e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t3.16b @ GHASH block 4k+1 - mid 592e1051a39Sopenharmony_ci mov $t9d, $res3.d[1] @ GHASH block 4k+3 - mid 593e1051a39Sopenharmony_ci 594e1051a39Sopenharmony_ci aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 1 595e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $t2.16b @ GHASH block 4k+1 - low 596e1051a39Sopenharmony_ci 597e1051a39Sopenharmony_ci aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 2 598e1051a39Sopenharmony_ci eor $input_l0, $input_l0, $rk10_l @ AES block 4k+4 - round 10 low 599e1051a39Sopenharmony_ci 600e1051a39Sopenharmony_ci aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 3 601e1051a39Sopenharmony_ci eor $t9.8b, $t9.8b, $res3.8b @ GHASH block 4k+3 - mid 602e1051a39Sopenharmony_ci 603e1051a39Sopenharmony_ci pmull2 $t7.1q, $res3.2d, $h1.2d @ GHASH block 4k+3 - high 604e1051a39Sopenharmony_ci 605e1051a39Sopenharmony_ci aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 3 606e1051a39Sopenharmony_ci eor $acc_hb, $acc_hb, $t4.16b @ GHASH block 4k+2 - high 607e1051a39Sopenharmony_ci 608e1051a39Sopenharmony_ci pmull2 $t6.1q, $t6.2d, $h12k.2d @ GHASH block 4k+2 - mid 609e1051a39Sopenharmony_ci 610e1051a39Sopenharmony_ci pmull $t8.1q, $res3.1d, $h1.1d @ GHASH block 4k+3 - low 611e1051a39Sopenharmony_ci movi $mod_constant.8b, #0xc2 612e1051a39Sopenharmony_ci 613e1051a39Sopenharmony_ci pmull $t9.1q, $t9.1d, $h12k.1d @ GHASH block 4k+3 - mid 614e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $t5.16b @ GHASH block 4k+2 - low 615e1051a39Sopenharmony_ci 616e1051a39Sopenharmony_ci aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 4 617e1051a39Sopenharmony_ci 618e1051a39Sopenharmony_ci aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 2 619e1051a39Sopenharmony_ci shl $mod_constantd, $mod_constantd, #56 @ mod_constant 620e1051a39Sopenharmony_ci 621e1051a39Sopenharmony_ci aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 4 622e1051a39Sopenharmony_ci eor $acc_hb, $acc_hb, $t7.16b @ GHASH block 4k+3 - high 623e1051a39Sopenharmony_ci 624e1051a39Sopenharmony_ci aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 5 625e1051a39Sopenharmony_ci ldp $input_l1, $input_h1, [$input_ptr, #16] @ AES block 4k+5 - load plaintext 626e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 627e1051a39Sopenharmony_ci rev $input_l1, $input_l1 628e1051a39Sopenharmony_ci rev $input_h1, $input_h1 629e1051a39Sopenharmony_ci#endif 630e1051a39Sopenharmony_ci aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 3 631e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t6.16b @ GHASH block 4k+2 - mid 632e1051a39Sopenharmony_ci 633e1051a39Sopenharmony_ci aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 5 634e1051a39Sopenharmony_ci ldp $input_l2, $input_h2, [$input_ptr, #32] @ AES block 4k+6 - load plaintext 635e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 636e1051a39Sopenharmony_ci rev $input_l2, $input_l2 637e1051a39Sopenharmony_ci rev $input_h2, $input_h2 638e1051a39Sopenharmony_ci#endif 639e1051a39Sopenharmony_ci pmull $mod_t.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid 640e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $t8.16b @ GHASH block 4k+3 - low 641e1051a39Sopenharmony_ci 642e1051a39Sopenharmony_ci aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 4 643e1051a39Sopenharmony_ci eor $input_l1, $input_l1, $rk10_l @ AES block 4k+5 - round 10 low 644e1051a39Sopenharmony_ci 645e1051a39Sopenharmony_ci aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 4 646e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t9.16b @ GHASH block 4k+3 - mid 647e1051a39Sopenharmony_ci 648e1051a39Sopenharmony_ci aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 6 649e1051a39Sopenharmony_ci eor $input_l3, $input_l3, $rk10_l @ AES block 4k+3 - round 10 low 650e1051a39Sopenharmony_ci 651e1051a39Sopenharmony_ci aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 5 652e1051a39Sopenharmony_ci eor $t9.16b, $acc_lb, $acc_hb @ MODULO - karatsuba tidy up 653e1051a39Sopenharmony_ci 654e1051a39Sopenharmony_ci fmov $ctr_t0d, $input_l0 @ AES block 4k+4 - mov low 655e1051a39Sopenharmony_ci aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 6 656e1051a39Sopenharmony_ci fmov $ctr_t0.d[1], $input_h0 @ AES block 4k+4 - mov high 657e1051a39Sopenharmony_ci 658e1051a39Sopenharmony_ci add $input_ptr, $input_ptr, #64 @ AES input_ptr update 659e1051a39Sopenharmony_ci fmov $ctr_t3d, $input_l3 @ AES block 4k+3 - mov low 660e1051a39Sopenharmony_ci ext $acc_hb, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment 661e1051a39Sopenharmony_ci 662e1051a39Sopenharmony_ci aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 5 663e1051a39Sopenharmony_ci fmov $ctr_t1d, $input_l1 @ AES block 4k+5 - mov low 664e1051a39Sopenharmony_ci 665e1051a39Sopenharmony_ci aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 7 666e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t9.16b @ MODULO - karatsuba tidy up 667e1051a39Sopenharmony_ci 668e1051a39Sopenharmony_ci aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 6 669e1051a39Sopenharmony_ci eor $input_h1, $input_h1, $rk10_h @ AES block 4k+5 - round 10 high 670e1051a39Sopenharmony_ci 671e1051a39Sopenharmony_ci aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 7 672e1051a39Sopenharmony_ci fmov $ctr_t1.d[1], $input_h1 @ AES block 4k+5 - mov high 673e1051a39Sopenharmony_ci 674e1051a39Sopenharmony_ci aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 8 675e1051a39Sopenharmony_ci fmov $ctr_t3.d[1], $input_h3 @ AES block 4k+3 - mov high 676e1051a39Sopenharmony_ci 677e1051a39Sopenharmony_ci aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 6 678e1051a39Sopenharmony_ci cmp $input_ptr, $main_end_input_ptr @ LOOP CONTROL 679e1051a39Sopenharmony_ci 680e1051a39Sopenharmony_ci aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 8 681e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $mod_t.16b @ MODULO - fold into mid 682e1051a39Sopenharmony_ci 683e1051a39Sopenharmony_ci aese $ctr0b, $rk9 @ AES block 4k+4 - round 9 684e1051a39Sopenharmony_ci eor $input_l2, $input_l2, $rk10_l @ AES block 4k+6 - round 10 low 685e1051a39Sopenharmony_ci eor $input_h2, $input_h2, $rk10_h @ AES block 4k+6 - round 10 high 686e1051a39Sopenharmony_ci 687e1051a39Sopenharmony_ci aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 7 688e1051a39Sopenharmony_ci fmov $ctr_t2d, $input_l2 @ AES block 4k+6 - mov low 689e1051a39Sopenharmony_ci 690e1051a39Sopenharmony_ci aese $ctr1b, $rk9 @ AES block 4k+5 - round 9 691e1051a39Sopenharmony_ci fmov $ctr_t2.d[1], $input_h2 @ AES block 4k+6 - mov high 692e1051a39Sopenharmony_ci 693e1051a39Sopenharmony_ci aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 7 694e1051a39Sopenharmony_ci eor $res0b, $ctr_t0b, $ctr0b @ AES block 4k+4 - result 695e1051a39Sopenharmony_ci 696e1051a39Sopenharmony_ci fmov $ctr0d, $ctr96_b64x @ CTR block 4k+8 697e1051a39Sopenharmony_ci aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 8 698e1051a39Sopenharmony_ci 699e1051a39Sopenharmony_ci fmov $ctr0.d[1], $ctr32x @ CTR block 4k+8 700e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 4k+9 701e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $acc_hb @ MODULO - fold into mid 702e1051a39Sopenharmony_ci 703e1051a39Sopenharmony_ci aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 8 704e1051a39Sopenharmony_ci eor $res1b, $ctr_t1b, $ctr1b @ AES block 4k+5 - result 705e1051a39Sopenharmony_ci 706e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 4k+9 707e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 4k+9 708e1051a39Sopenharmony_ci fmov $ctr1d, $ctr96_b64x @ CTR block 4k+9 709e1051a39Sopenharmony_ci 710e1051a39Sopenharmony_ci pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low 711e1051a39Sopenharmony_ci fmov $ctr1.d[1], $ctr32x @ CTR block 4k+9 712e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 4k+10 713e1051a39Sopenharmony_ci 714e1051a39Sopenharmony_ci aese $ctr2b, $rk9 @ AES block 4k+6 - round 9 715e1051a39Sopenharmony_ci st1 { $res0b}, [$output_ptr], #16 @ AES block 4k+4 - store result 716e1051a39Sopenharmony_ci eor $res2b, $ctr_t2b, $ctr2b @ AES block 4k+6 - result 717e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 4k+10 718e1051a39Sopenharmony_ci 719e1051a39Sopenharmony_ci aese $ctr3b, $rk9 @ AES block 4k+7 - round 9 720e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 4k+10 721e1051a39Sopenharmony_ci ext $acc_mb, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment 722e1051a39Sopenharmony_ci fmov $ctr2d, $ctr96_b64x @ CTR block 4k+10 723e1051a39Sopenharmony_ci 724e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $acc_hb @ MODULO - fold into low 725e1051a39Sopenharmony_ci st1 { $res1b}, [$output_ptr], #16 @ AES block 4k+5 - store result 726e1051a39Sopenharmony_ci 727e1051a39Sopenharmony_ci fmov $ctr2.d[1], $ctr32x @ CTR block 4k+10 728e1051a39Sopenharmony_ci st1 { $res2b}, [$output_ptr], #16 @ AES block 4k+6 - store result 729e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 4k+11 730e1051a39Sopenharmony_ci 731e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 4k+11 732e1051a39Sopenharmony_ci eor $res3b, $ctr_t3b, $ctr3b @ AES block 4k+3 - result 733e1051a39Sopenharmony_ci 734e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $acc_mb @ MODULO - fold into low 735e1051a39Sopenharmony_ci st1 { $res3b}, [$output_ptr], #16 @ AES block 4k+3 - store result 736e1051a39Sopenharmony_ci b.lt .L128_enc_main_loop 737e1051a39Sopenharmony_ci 738e1051a39Sopenharmony_ci .L128_enc_prepretail: @ PREPRETAIL 739e1051a39Sopenharmony_ci rev64 $res0b, $res0b @ GHASH block 4k (only t0 is free) 740e1051a39Sopenharmony_ci fmov $ctr3d, $ctr96_b64x @ CTR block 4k+3 741e1051a39Sopenharmony_ci rev64 $res1b, $res1b @ GHASH block 4k+1 (t0 and t1 free) 742e1051a39Sopenharmony_ci 743e1051a39Sopenharmony_ci ext $acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0 744e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 4k+3 745e1051a39Sopenharmony_ci fmov $ctr3.d[1], $ctr32x @ CTR block 4k+3 746e1051a39Sopenharmony_ci 747e1051a39Sopenharmony_ci aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 0 748e1051a39Sopenharmony_ci rev64 $res2b, $res2b @ GHASH block 4k+2 (t0, t1, and t2 free) 749e1051a39Sopenharmony_ci 750e1051a39Sopenharmony_ci pmull $t2.1q, $res1.1d, $h3.1d @ GHASH block 4k+1 - low 751e1051a39Sopenharmony_ci 752e1051a39Sopenharmony_ci rev64 $res3b, $res3b @ GHASH block 4k+3 (t0, t1, t2 and t3 free) 753e1051a39Sopenharmony_ci eor $res0b, $res0b, $acc_lb @ PRE 1 754e1051a39Sopenharmony_ci 755e1051a39Sopenharmony_ci pmull2 $t1.1q, $res1.2d, $h3.2d @ GHASH block 4k+1 - high 756e1051a39Sopenharmony_ci 757e1051a39Sopenharmony_ci aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 0 758e1051a39Sopenharmony_ci mov $t3d, $res1.d[1] @ GHASH block 4k+1 - mid 759e1051a39Sopenharmony_ci 760e1051a39Sopenharmony_ci pmull $acc_l.1q, $res0.1d, $h4.1d @ GHASH block 4k - low 761e1051a39Sopenharmony_ci mov $t0d, $res0.d[1] @ GHASH block 4k - mid 762e1051a39Sopenharmony_ci 763e1051a39Sopenharmony_ci mov $t6d, $res2.d[1] @ GHASH block 4k+2 - mid 764e1051a39Sopenharmony_ci mov $acc_md, $h34k.d[1] @ GHASH block 4k - mid 765e1051a39Sopenharmony_ci 766e1051a39Sopenharmony_ci aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 1 767e1051a39Sopenharmony_ci eor $t3.8b, $t3.8b, $res1.8b @ GHASH block 4k+1 - mid 768e1051a39Sopenharmony_ci 769e1051a39Sopenharmony_ci eor $t0.8b, $t0.8b, $res0.8b @ GHASH block 4k - mid 770e1051a39Sopenharmony_ci 771e1051a39Sopenharmony_ci pmull2 $acc_h.1q, $res0.2d, $h4.2d @ GHASH block 4k - high 772e1051a39Sopenharmony_ci eor $t6.8b, $t6.8b, $res2.8b @ GHASH block 4k+2 - mid 773e1051a39Sopenharmony_ci 774e1051a39Sopenharmony_ci aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 1 775e1051a39Sopenharmony_ci 776e1051a39Sopenharmony_ci pmull $t3.1q, $t3.1d, $h34k.1d @ GHASH block 4k+1 - mid 777e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $t2.16b @ GHASH block 4k+1 - low 778e1051a39Sopenharmony_ci 779e1051a39Sopenharmony_ci pmull $acc_m.1q, $t0.1d, $acc_m.1d @ GHASH block 4k - mid 780e1051a39Sopenharmony_ci 781e1051a39Sopenharmony_ci aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 0 782e1051a39Sopenharmony_ci ins $t6.d[1], $t6.d[0] @ GHASH block 4k+2 - mid 783e1051a39Sopenharmony_ci 784e1051a39Sopenharmony_ci aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 0 785e1051a39Sopenharmony_ci 786e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t3.16b @ GHASH block 4k+1 - mid 787e1051a39Sopenharmony_ci mov $t9d, $res3.d[1] @ GHASH block 4k+3 - mid 788e1051a39Sopenharmony_ci 789e1051a39Sopenharmony_ci aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 1 790e1051a39Sopenharmony_ci eor $acc_hb, $acc_hb, $t1.16b @ GHASH block 4k+1 - high 791e1051a39Sopenharmony_ci 792e1051a39Sopenharmony_ci pmull2 $t6.1q, $t6.2d, $h12k.2d @ GHASH block 4k+2 - mid 793e1051a39Sopenharmony_ci 794e1051a39Sopenharmony_ci pmull2 $t4.1q, $res2.2d, $h2.2d @ GHASH block 4k+2 - high 795e1051a39Sopenharmony_ci eor $t9.8b, $t9.8b, $res3.8b @ GHASH block 4k+3 - mid 796e1051a39Sopenharmony_ci 797e1051a39Sopenharmony_ci pmull2 $t7.1q, $res3.2d, $h1.2d @ GHASH block 4k+3 - high 798e1051a39Sopenharmony_ci 799e1051a39Sopenharmony_ci pmull $t5.1q, $res2.1d, $h2.1d @ GHASH block 4k+2 - low 800e1051a39Sopenharmony_ci 801e1051a39Sopenharmony_ci aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 1 802e1051a39Sopenharmony_ci eor $acc_hb, $acc_hb, $t4.16b @ GHASH block 4k+2 - high 803e1051a39Sopenharmony_ci 804e1051a39Sopenharmony_ci aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 2 805e1051a39Sopenharmony_ci 806e1051a39Sopenharmony_ci pmull $t8.1q, $res3.1d, $h1.1d @ GHASH block 4k+3 - low 807e1051a39Sopenharmony_ci movi $mod_constant.8b, #0xc2 808e1051a39Sopenharmony_ci 809e1051a39Sopenharmony_ci aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 2 810e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $t5.16b @ GHASH block 4k+2 - low 811e1051a39Sopenharmony_ci 812e1051a39Sopenharmony_ci aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 2 813e1051a39Sopenharmony_ci 814e1051a39Sopenharmony_ci pmull $t9.1q, $t9.1d, $h12k.1d @ GHASH block 4k+3 - mid 815e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t6.16b @ GHASH block 4k+2 - mid 816e1051a39Sopenharmony_ci 817e1051a39Sopenharmony_ci aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 3 818e1051a39Sopenharmony_ci 819e1051a39Sopenharmony_ci aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 2 820e1051a39Sopenharmony_ci eor $acc_hb, $acc_hb, $t7.16b @ GHASH block 4k+3 - high 821e1051a39Sopenharmony_ci 822e1051a39Sopenharmony_ci aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 3 823e1051a39Sopenharmony_ci 824e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t9.16b @ GHASH block 4k+3 - mid 825e1051a39Sopenharmony_ci shl $mod_constantd, $mod_constantd, #56 @ mod_constant 826e1051a39Sopenharmony_ci 827e1051a39Sopenharmony_ci aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 3 828e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $t8.16b @ GHASH block 4k+3 - low 829e1051a39Sopenharmony_ci 830e1051a39Sopenharmony_ci aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 4 831e1051a39Sopenharmony_ci 832e1051a39Sopenharmony_ci pmull $t1.1q, $acc_h.1d, $mod_constant.1d 833e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $acc_hb @ karatsuba tidy up 834e1051a39Sopenharmony_ci 835e1051a39Sopenharmony_ci aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 4 836e1051a39Sopenharmony_ci 837e1051a39Sopenharmony_ci aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 5 838e1051a39Sopenharmony_ci ext $acc_hb, $acc_hb, $acc_hb, #8 839e1051a39Sopenharmony_ci 840e1051a39Sopenharmony_ci aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 3 841e1051a39Sopenharmony_ci 842e1051a39Sopenharmony_ci aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 4 843e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $acc_lb 844e1051a39Sopenharmony_ci 845e1051a39Sopenharmony_ci aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 6 846e1051a39Sopenharmony_ci 847e1051a39Sopenharmony_ci aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 4 848e1051a39Sopenharmony_ci 849e1051a39Sopenharmony_ci aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 5 850e1051a39Sopenharmony_ci 851e1051a39Sopenharmony_ci aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 5 852e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t1.16b 853e1051a39Sopenharmony_ci 854e1051a39Sopenharmony_ci aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 5 855e1051a39Sopenharmony_ci 856e1051a39Sopenharmony_ci aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 6 857e1051a39Sopenharmony_ci 858e1051a39Sopenharmony_ci aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 6 859e1051a39Sopenharmony_ci 860e1051a39Sopenharmony_ci aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 6 861e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $acc_hb 862e1051a39Sopenharmony_ci 863e1051a39Sopenharmony_ci aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 7 864e1051a39Sopenharmony_ci 865e1051a39Sopenharmony_ci aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 7 866e1051a39Sopenharmony_ci 867e1051a39Sopenharmony_ci aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 7 868e1051a39Sopenharmony_ci 869e1051a39Sopenharmony_ci pmull $t1.1q, $acc_m.1d, $mod_constant.1d 870e1051a39Sopenharmony_ci 871e1051a39Sopenharmony_ci aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 7 872e1051a39Sopenharmony_ci ext $acc_mb, $acc_mb, $acc_mb, #8 873e1051a39Sopenharmony_ci 874e1051a39Sopenharmony_ci aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 8 875e1051a39Sopenharmony_ci 876e1051a39Sopenharmony_ci aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 8 877e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $t1.16b 878e1051a39Sopenharmony_ci 879e1051a39Sopenharmony_ci aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 8 880e1051a39Sopenharmony_ci 881e1051a39Sopenharmony_ci aese $ctr3b, $rk9 @ AES block 4k+7 - round 9 882e1051a39Sopenharmony_ci 883e1051a39Sopenharmony_ci aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 8 884e1051a39Sopenharmony_ci 885e1051a39Sopenharmony_ci aese $ctr0b, $rk9 @ AES block 4k+4 - round 9 886e1051a39Sopenharmony_ci 887e1051a39Sopenharmony_ci aese $ctr1b, $rk9 @ AES block 4k+5 - round 9 888e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $acc_mb 889e1051a39Sopenharmony_ci 890e1051a39Sopenharmony_ci aese $ctr2b, $rk9 @ AES block 4k+6 - round 9 891e1051a39Sopenharmony_ci .L128_enc_tail: @ TAIL 892e1051a39Sopenharmony_ci 893e1051a39Sopenharmony_ci sub $main_end_input_ptr, $end_input_ptr, $input_ptr @ main_end_input_ptr is number of bytes left to process 894e1051a39Sopenharmony_ci ldp $input_l0, $input_h0, [$input_ptr], #16 @ AES block 4k+4 - load plaintext 895e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 896e1051a39Sopenharmony_ci rev $input_l0, $input_l0 897e1051a39Sopenharmony_ci rev $input_h0, $input_h0 898e1051a39Sopenharmony_ci#endif 899e1051a39Sopenharmony_ci cmp $main_end_input_ptr, #48 900e1051a39Sopenharmony_ci 901e1051a39Sopenharmony_ci ext $t0.16b, $acc_lb, $acc_lb, #8 @ prepare final partial tag 902e1051a39Sopenharmony_ci eor $input_l0, $input_l0, $rk10_l @ AES block 4k+4 - round 10 low 903e1051a39Sopenharmony_ci eor $input_h0, $input_h0, $rk10_h @ AES block 4k+4 - round 10 high 904e1051a39Sopenharmony_ci 905e1051a39Sopenharmony_ci fmov $ctr_t0d, $input_l0 @ AES block 4k+4 - mov low 906e1051a39Sopenharmony_ci 907e1051a39Sopenharmony_ci fmov $ctr_t0.d[1], $input_h0 @ AES block 4k+4 - mov high 908e1051a39Sopenharmony_ci 909e1051a39Sopenharmony_ci eor $res1b, $ctr_t0b, $ctr0b @ AES block 4k+4 - result 910e1051a39Sopenharmony_ci 911e1051a39Sopenharmony_ci b.gt .L128_enc_blocks_more_than_3 912e1051a39Sopenharmony_ci 913e1051a39Sopenharmony_ci sub $rctr32w, $rctr32w, #1 914e1051a39Sopenharmony_ci movi $acc_l.8b, #0 915e1051a39Sopenharmony_ci mov $ctr3b, $ctr2b 916e1051a39Sopenharmony_ci 917e1051a39Sopenharmony_ci cmp $main_end_input_ptr, #32 918e1051a39Sopenharmony_ci mov $ctr2b, $ctr1b 919e1051a39Sopenharmony_ci movi $acc_h.8b, #0 920e1051a39Sopenharmony_ci 921e1051a39Sopenharmony_ci movi $acc_m.8b, #0 922e1051a39Sopenharmony_ci b.gt .L128_enc_blocks_more_than_2 923e1051a39Sopenharmony_ci 924e1051a39Sopenharmony_ci mov $ctr3b, $ctr1b 925e1051a39Sopenharmony_ci cmp $main_end_input_ptr, #16 926e1051a39Sopenharmony_ci 927e1051a39Sopenharmony_ci sub $rctr32w, $rctr32w, #1 928e1051a39Sopenharmony_ci b.gt .L128_enc_blocks_more_than_1 929e1051a39Sopenharmony_ci 930e1051a39Sopenharmony_ci sub $rctr32w, $rctr32w, #1 931e1051a39Sopenharmony_ci b .L128_enc_blocks_less_than_1 932e1051a39Sopenharmony_ci .L128_enc_blocks_more_than_3: @ blocks left > 3 933e1051a39Sopenharmony_ci st1 { $res1b}, [$output_ptr], #16 @ AES final-3 block - store result 934e1051a39Sopenharmony_ci 935e1051a39Sopenharmony_ci ldp $input_l0, $input_h0, [$input_ptr], #16 @ AES final-2 block - load input low & high 936e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 937e1051a39Sopenharmony_ci rev $input_l0, $input_l0 938e1051a39Sopenharmony_ci rev $input_h0, $input_h0 939e1051a39Sopenharmony_ci#endif 940e1051a39Sopenharmony_ci rev64 $res0b, $res1b @ GHASH final-3 block 941e1051a39Sopenharmony_ci 942e1051a39Sopenharmony_ci eor $res0b, $res0b, $t0.16b @ feed in partial tag 943e1051a39Sopenharmony_ci eor $input_h0, $input_h0, $rk10_h @ AES final-2 block - round 10 high 944e1051a39Sopenharmony_ci eor $input_l0, $input_l0, $rk10_l @ AES final-2 block - round 10 low 945e1051a39Sopenharmony_ci 946e1051a39Sopenharmony_ci fmov $res1d, $input_l0 @ AES final-2 block - mov low 947e1051a39Sopenharmony_ci 948e1051a39Sopenharmony_ci movi $t0.8b, #0 @ suppress further partial tag feed in 949e1051a39Sopenharmony_ci fmov $res1.d[1], $input_h0 @ AES final-2 block - mov high 950e1051a39Sopenharmony_ci 951e1051a39Sopenharmony_ci pmull $acc_l.1q, $res0.1d, $h4.1d @ GHASH final-3 block - low 952e1051a39Sopenharmony_ci mov $rk4d, $res0.d[1] @ GHASH final-3 block - mid 953e1051a39Sopenharmony_ci 954e1051a39Sopenharmony_ci pmull2 $acc_h.1q, $res0.2d, $h4.2d @ GHASH final-3 block - high 955e1051a39Sopenharmony_ci 956e1051a39Sopenharmony_ci mov $acc_md, $h34k.d[1] @ GHASH final-3 block - mid 957e1051a39Sopenharmony_ci 958e1051a39Sopenharmony_ci eor $res1b, $res1b, $ctr1b @ AES final-2 block - result 959e1051a39Sopenharmony_ci eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-3 block - mid 960e1051a39Sopenharmony_ci 961e1051a39Sopenharmony_ci pmull $acc_m.1q, $rk4v.1d, $acc_m.1d @ GHASH final-3 block - mid 962e1051a39Sopenharmony_ci .L128_enc_blocks_more_than_2: @ blocks left > 2 963e1051a39Sopenharmony_ci 964e1051a39Sopenharmony_ci st1 { $res1b}, [$output_ptr], #16 @ AES final-2 block - store result 965e1051a39Sopenharmony_ci 966e1051a39Sopenharmony_ci rev64 $res0b, $res1b @ GHASH final-2 block 967e1051a39Sopenharmony_ci ldp $input_l0, $input_h0, [$input_ptr], #16 @ AES final-1 block - load input low & high 968e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 969e1051a39Sopenharmony_ci rev $input_l0, $input_l0 970e1051a39Sopenharmony_ci rev $input_h0, $input_h0 971e1051a39Sopenharmony_ci#endif 972e1051a39Sopenharmony_ci eor $res0b, $res0b, $t0.16b @ feed in partial tag 973e1051a39Sopenharmony_ci 974e1051a39Sopenharmony_ci eor $input_l0, $input_l0, $rk10_l @ AES final-1 block - round 10 low 975e1051a39Sopenharmony_ci 976e1051a39Sopenharmony_ci fmov $res1d, $input_l0 @ AES final-1 block - mov low 977e1051a39Sopenharmony_ci eor $input_h0, $input_h0, $rk10_h @ AES final-1 block - round 10 high 978e1051a39Sopenharmony_ci 979e1051a39Sopenharmony_ci pmull2 $rk2q1, $res0.2d, $h3.2d @ GHASH final-2 block - high 980e1051a39Sopenharmony_ci fmov $res1.d[1], $input_h0 @ AES final-1 block - mov high 981e1051a39Sopenharmony_ci 982e1051a39Sopenharmony_ci mov $rk4d, $res0.d[1] @ GHASH final-2 block - mid 983e1051a39Sopenharmony_ci 984e1051a39Sopenharmony_ci pmull $rk3q1, $res0.1d, $h3.1d @ GHASH final-2 block - low 985e1051a39Sopenharmony_ci 986e1051a39Sopenharmony_ci eor $acc_hb, $acc_hb, $rk2 @ GHASH final-2 block - high 987e1051a39Sopenharmony_ci 988e1051a39Sopenharmony_ci eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-2 block - mid 989e1051a39Sopenharmony_ci 990e1051a39Sopenharmony_ci eor $res1b, $res1b, $ctr2b @ AES final-1 block - result 991e1051a39Sopenharmony_ci 992e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $rk3 @ GHASH final-2 block - low 993e1051a39Sopenharmony_ci 994e1051a39Sopenharmony_ci pmull $rk4v.1q, $rk4v.1d, $h34k.1d @ GHASH final-2 block - mid 995e1051a39Sopenharmony_ci 996e1051a39Sopenharmony_ci movi $t0.8b, #0 @ suppress further partial tag feed in 997e1051a39Sopenharmony_ci 998e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-2 block - mid 999e1051a39Sopenharmony_ci .L128_enc_blocks_more_than_1: @ blocks left > 1 1000e1051a39Sopenharmony_ci 1001e1051a39Sopenharmony_ci st1 { $res1b}, [$output_ptr], #16 @ AES final-1 block - store result 1002e1051a39Sopenharmony_ci 1003e1051a39Sopenharmony_ci rev64 $res0b, $res1b @ GHASH final-1 block 1004e1051a39Sopenharmony_ci ldp $input_l0, $input_h0, [$input_ptr], #16 @ AES final block - load input low & high 1005e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 1006e1051a39Sopenharmony_ci rev $input_l0, $input_l0 1007e1051a39Sopenharmony_ci rev $input_h0, $input_h0 1008e1051a39Sopenharmony_ci#endif 1009e1051a39Sopenharmony_ci eor $res0b, $res0b, $t0.16b @ feed in partial tag 1010e1051a39Sopenharmony_ci 1011e1051a39Sopenharmony_ci eor $input_h0, $input_h0, $rk10_h @ AES final block - round 10 high 1012e1051a39Sopenharmony_ci eor $input_l0, $input_l0, $rk10_l @ AES final block - round 10 low 1013e1051a39Sopenharmony_ci 1014e1051a39Sopenharmony_ci fmov $res1d, $input_l0 @ AES final block - mov low 1015e1051a39Sopenharmony_ci 1016e1051a39Sopenharmony_ci pmull2 $rk2q1, $res0.2d, $h2.2d @ GHASH final-1 block - high 1017e1051a39Sopenharmony_ci fmov $res1.d[1], $input_h0 @ AES final block - mov high 1018e1051a39Sopenharmony_ci 1019e1051a39Sopenharmony_ci mov $rk4d, $res0.d[1] @ GHASH final-1 block - mid 1020e1051a39Sopenharmony_ci 1021e1051a39Sopenharmony_ci pmull $rk3q1, $res0.1d, $h2.1d @ GHASH final-1 block - low 1022e1051a39Sopenharmony_ci 1023e1051a39Sopenharmony_ci eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-1 block - mid 1024e1051a39Sopenharmony_ci 1025e1051a39Sopenharmony_ci eor $res1b, $res1b, $ctr3b @ AES final block - result 1026e1051a39Sopenharmony_ci 1027e1051a39Sopenharmony_ci ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-1 block - mid 1028e1051a39Sopenharmony_ci 1029e1051a39Sopenharmony_ci pmull2 $rk4v.1q, $rk4v.2d, $h12k.2d @ GHASH final-1 block - mid 1030e1051a39Sopenharmony_ci 1031e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $rk3 @ GHASH final-1 block - low 1032e1051a39Sopenharmony_ci 1033e1051a39Sopenharmony_ci eor $acc_hb, $acc_hb, $rk2 @ GHASH final-1 block - high 1034e1051a39Sopenharmony_ci 1035e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-1 block - mid 1036e1051a39Sopenharmony_ci movi $t0.8b, #0 @ suppress further partial tag feed in 1037e1051a39Sopenharmony_ci .L128_enc_blocks_less_than_1: @ blocks left <= 1 1038e1051a39Sopenharmony_ci 1039e1051a39Sopenharmony_ci and $bit_length, $bit_length, #127 @ bit_length %= 128 1040e1051a39Sopenharmony_ci mvn $rk10_l, xzr @ rk10_l = 0xffffffffffffffff 1041e1051a39Sopenharmony_ci 1042e1051a39Sopenharmony_ci mvn $rk10_h, xzr @ rk10_h = 0xffffffffffffffff 1043e1051a39Sopenharmony_ci sub $bit_length, $bit_length, #128 @ bit_length -= 128 1044e1051a39Sopenharmony_ci 1045e1051a39Sopenharmony_ci neg $bit_length, $bit_length @ bit_length = 128 - #bits in input (in range [1,128]) 1046e1051a39Sopenharmony_ci 1047e1051a39Sopenharmony_ci and $bit_length, $bit_length, #127 @ bit_length %= 128 1048e1051a39Sopenharmony_ci 1049e1051a39Sopenharmony_ci lsr $rk10_h, $rk10_h, $bit_length @ rk10_h is mask for top 64b of last block 1050e1051a39Sopenharmony_ci cmp $bit_length, #64 1051e1051a39Sopenharmony_ci 1052e1051a39Sopenharmony_ci csel $input_l0, $rk10_l, $rk10_h, lt 1053e1051a39Sopenharmony_ci csel $input_h0, $rk10_h, xzr, lt 1054e1051a39Sopenharmony_ci 1055e1051a39Sopenharmony_ci fmov $ctr0d, $input_l0 @ ctr0b is mask for last block 1056e1051a39Sopenharmony_ci 1057e1051a39Sopenharmony_ci fmov $ctr0.d[1], $input_h0 1058e1051a39Sopenharmony_ci 1059e1051a39Sopenharmony_ci and $res1b, $res1b, $ctr0b @ possibly partial last block has zeroes in highest bits 1060e1051a39Sopenharmony_ci 1061e1051a39Sopenharmony_ci rev64 $res0b, $res1b @ GHASH final block 1062e1051a39Sopenharmony_ci 1063e1051a39Sopenharmony_ci eor $res0b, $res0b, $t0.16b @ feed in partial tag 1064e1051a39Sopenharmony_ci 1065e1051a39Sopenharmony_ci mov $t0d, $res0.d[1] @ GHASH final block - mid 1066e1051a39Sopenharmony_ci 1067e1051a39Sopenharmony_ci pmull $rk3q1, $res0.1d, $h1.1d @ GHASH final block - low 1068e1051a39Sopenharmony_ci ld1 { $rk0}, [$output_ptr] @ load existing bytes where the possibly partial last block is to be stored 1069e1051a39Sopenharmony_ci 1070e1051a39Sopenharmony_ci eor $t0.8b, $t0.8b, $res0.8b @ GHASH final block - mid 1071e1051a39Sopenharmony_ci#ifndef __AARCH64EB__ 1072e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w 1073e1051a39Sopenharmony_ci#else 1074e1051a39Sopenharmony_ci mov $ctr32w, $rctr32w 1075e1051a39Sopenharmony_ci#endif 1076e1051a39Sopenharmony_ci pmull2 $rk2q1, $res0.2d, $h1.2d @ GHASH final block - high 1077e1051a39Sopenharmony_ci 1078e1051a39Sopenharmony_ci pmull $t0.1q, $t0.1d, $h12k.1d @ GHASH final block - mid 1079e1051a39Sopenharmony_ci 1080e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $rk3 @ GHASH final block - low 1081e1051a39Sopenharmony_ci 1082e1051a39Sopenharmony_ci eor $acc_hb, $acc_hb, $rk2 @ GHASH final block - high 1083e1051a39Sopenharmony_ci 1084e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t0.16b @ GHASH final block - mid 1085e1051a39Sopenharmony_ci movi $mod_constant.8b, #0xc2 1086e1051a39Sopenharmony_ci 1087e1051a39Sopenharmony_ci eor $t9.16b, $acc_lb, $acc_hb @ MODULO - karatsuba tidy up 1088e1051a39Sopenharmony_ci 1089e1051a39Sopenharmony_ci shl $mod_constantd, $mod_constantd, #56 @ mod_constant 1090e1051a39Sopenharmony_ci 1091e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t9.16b @ MODULO - karatsuba tidy up 1092e1051a39Sopenharmony_ci 1093e1051a39Sopenharmony_ci pmull $mod_t.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid 1094e1051a39Sopenharmony_ci 1095e1051a39Sopenharmony_ci ext $acc_hb, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment 1096e1051a39Sopenharmony_ci 1097e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $mod_t.16b @ MODULO - fold into mid 1098e1051a39Sopenharmony_ci 1099e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $acc_hb @ MODULO - fold into mid 1100e1051a39Sopenharmony_ci 1101e1051a39Sopenharmony_ci pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low 1102e1051a39Sopenharmony_ci 1103e1051a39Sopenharmony_ci ext $acc_mb, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment 1104e1051a39Sopenharmony_ci 1105e1051a39Sopenharmony_ci bif $res1b, $rk0, $ctr0b @ insert existing bytes in top end of result before storing 1106e1051a39Sopenharmony_ci 1107e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $acc_hb @ MODULO - fold into low 1108e1051a39Sopenharmony_ci st1 { $res1b}, [$output_ptr] @ store all 16B 1109e1051a39Sopenharmony_ci 1110e1051a39Sopenharmony_ci str $ctr32w, [$counter, #12] @ store the updated counter 1111e1051a39Sopenharmony_ci 1112e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $acc_mb @ MODULO - fold into low 1113e1051a39Sopenharmony_ci ext $acc_lb, $acc_lb, $acc_lb, #8 1114e1051a39Sopenharmony_ci rev64 $acc_lb, $acc_lb 1115e1051a39Sopenharmony_ci mov x0, $len 1116e1051a39Sopenharmony_ci st1 { $acc_l.16b }, [$current_tag] 1117e1051a39Sopenharmony_ci ldp x21, x22, [sp, #16] 1118e1051a39Sopenharmony_ci ldp x23, x24, [sp, #32] 1119e1051a39Sopenharmony_ci ldp d8, d9, [sp, #48] 1120e1051a39Sopenharmony_ci ldp d10, d11, [sp, #64] 1121e1051a39Sopenharmony_ci ldp d12, d13, [sp, #80] 1122e1051a39Sopenharmony_ci ldp d14, d15, [sp, #96] 1123e1051a39Sopenharmony_ci ldp x19, x20, [sp], #112 1124e1051a39Sopenharmony_ci ret 1125e1051a39Sopenharmony_ci 1126e1051a39Sopenharmony_ci.L128_enc_ret: 1127e1051a39Sopenharmony_ci mov w0, #0x0 1128e1051a39Sopenharmony_ci ret 1129e1051a39Sopenharmony_ci.size aes_gcm_enc_128_kernel,.-aes_gcm_enc_128_kernel 1130e1051a39Sopenharmony_ci___ 1131e1051a39Sopenharmony_ci 1132e1051a39Sopenharmony_ci######################################################################################### 1133e1051a39Sopenharmony_ci# size_t aes_gcm_dec_128_kernel(const unsigned char *in, 1134e1051a39Sopenharmony_ci# size_t len, 1135e1051a39Sopenharmony_ci# unsigned char *out, 1136e1051a39Sopenharmony_ci# const void *key, 1137e1051a39Sopenharmony_ci# unsigned char ivec[16], 1138e1051a39Sopenharmony_ci# u64 *Xi); 1139e1051a39Sopenharmony_ci# 1140e1051a39Sopenharmony_ci$code.=<<___; 1141e1051a39Sopenharmony_ci.global aes_gcm_dec_128_kernel 1142e1051a39Sopenharmony_ci.type aes_gcm_dec_128_kernel,%function 1143e1051a39Sopenharmony_ci.align 4 1144e1051a39Sopenharmony_ciaes_gcm_dec_128_kernel: 1145e1051a39Sopenharmony_ci cbz x1, .L128_dec_ret 1146e1051a39Sopenharmony_ci stp x19, x20, [sp, #-112]! 1147e1051a39Sopenharmony_ci mov x16, x4 1148e1051a39Sopenharmony_ci mov x8, x5 1149e1051a39Sopenharmony_ci stp x21, x22, [sp, #16] 1150e1051a39Sopenharmony_ci stp x23, x24, [sp, #32] 1151e1051a39Sopenharmony_ci stp d8, d9, [sp, #48] 1152e1051a39Sopenharmony_ci stp d10, d11, [sp, #64] 1153e1051a39Sopenharmony_ci stp d12, d13, [sp, #80] 1154e1051a39Sopenharmony_ci stp d14, d15, [sp, #96] 1155e1051a39Sopenharmony_ci 1156e1051a39Sopenharmony_ci lsr $main_end_input_ptr, $bit_length, #3 @ byte_len 1157e1051a39Sopenharmony_ci mov $len, $main_end_input_ptr 1158e1051a39Sopenharmony_ci ldp $ctr96_b64x, $ctr96_t32x, [$counter] @ ctr96_b64, ctr96_t32 1159e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 1160e1051a39Sopenharmony_ci rev $ctr96_b64x, $ctr96_b64x 1161e1051a39Sopenharmony_ci rev $ctr96_t32x, $ctr96_t32x 1162e1051a39Sopenharmony_ci#endif 1163e1051a39Sopenharmony_ci ldp $rk10_l, $rk10_h, [$cc, #160] @ load rk10 1164e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 1165e1051a39Sopenharmony_ci ror $rk10_h, $rk10_h, 32 1166e1051a39Sopenharmony_ci ror $rk10_l, $rk10_l, 32 1167e1051a39Sopenharmony_ci#endif 1168e1051a39Sopenharmony_ci sub $main_end_input_ptr, $main_end_input_ptr, #1 @ byte_len - 1 1169e1051a39Sopenharmony_ci ld1 {$rk0s}, [$cc], #16 @ load rk0 1170e1051a39Sopenharmony_ci 1171e1051a39Sopenharmony_ci and $main_end_input_ptr, $main_end_input_ptr, #0xffffffffffffffc0 @ number of bytes to be processed in main loop (at least 1 byte must be handled by tail) 1172e1051a39Sopenharmony_ci ld1 { $ctr0b}, [$counter] @ special case vector load initial counter so we can start first AES block as quickly as possible 1173e1051a39Sopenharmony_ci 1174e1051a39Sopenharmony_ci ldr $h2q, [$current_tag, #64] @ load h2l | h2h 1175e1051a39Sopenharmony_ci#ifndef __AARCH64EB__ 1176e1051a39Sopenharmony_ci ext $h2b, $h2b, $h2b, #8 1177e1051a39Sopenharmony_ci#endif 1178e1051a39Sopenharmony_ci lsr $rctr32x, $ctr96_t32x, #32 1179e1051a39Sopenharmony_ci fmov $ctr2d, $ctr96_b64x @ CTR block 2 1180e1051a39Sopenharmony_ci 1181e1051a39Sopenharmony_ci ld1 {$rk1s}, [$cc], #16 @ load rk1 1182e1051a39Sopenharmony_ci orr $ctr96_t32w, $ctr96_t32w, $ctr96_t32w 1183e1051a39Sopenharmony_ci rev $rctr32w, $rctr32w @ rev_ctr32 1184e1051a39Sopenharmony_ci 1185e1051a39Sopenharmony_ci fmov $ctr1d, $ctr96_b64x @ CTR block 1 1186e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ increment rev_ctr32 1187e1051a39Sopenharmony_ci 1188e1051a39Sopenharmony_ci aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 0 1189e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 1 1190e1051a39Sopenharmony_ci 1191e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 1 1192e1051a39Sopenharmony_ci ld1 {$rk2s}, [$cc], #16 @ load rk2 1193e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 1 1194e1051a39Sopenharmony_ci 1195e1051a39Sopenharmony_ci fmov $ctr1.d[1], $ctr32x @ CTR block 1 1196e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 2 1197e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 2 1198e1051a39Sopenharmony_ci 1199e1051a39Sopenharmony_ci aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 1 1200e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 2 1201e1051a39Sopenharmony_ci 1202e1051a39Sopenharmony_ci fmov $ctr2.d[1], $ctr32x @ CTR block 2 1203e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 3 1204e1051a39Sopenharmony_ci 1205e1051a39Sopenharmony_ci fmov $ctr3d, $ctr96_b64x @ CTR block 3 1206e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 3 1207e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 3 1208e1051a39Sopenharmony_ci 1209e1051a39Sopenharmony_ci fmov $ctr3.d[1], $ctr32x @ CTR block 3 1210e1051a39Sopenharmony_ci add $end_input_ptr, $input_ptr, $bit_length, lsr #3 @ end_input_ptr 1211e1051a39Sopenharmony_ci 1212e1051a39Sopenharmony_ci aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 0 1213e1051a39Sopenharmony_ci ld1 {$rk3s}, [$cc], #16 @ load rk3 1214e1051a39Sopenharmony_ci 1215e1051a39Sopenharmony_ci aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 2 1216e1051a39Sopenharmony_ci ld1 {$rk4s}, [$cc], #16 @ load rk4 1217e1051a39Sopenharmony_ci 1218e1051a39Sopenharmony_ci aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 0 1219e1051a39Sopenharmony_ci ld1 {$rk5s}, [$cc], #16 @ load rk5 1220e1051a39Sopenharmony_ci 1221e1051a39Sopenharmony_ci aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 1 1222e1051a39Sopenharmony_ci ld1 {$rk6s}, [$cc], #16 @ load rk6 1223e1051a39Sopenharmony_ci 1224e1051a39Sopenharmony_ci aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 0 1225e1051a39Sopenharmony_ci 1226e1051a39Sopenharmony_ci aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 1 1227e1051a39Sopenharmony_ci 1228e1051a39Sopenharmony_ci aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 2 1229e1051a39Sopenharmony_ci 1230e1051a39Sopenharmony_ci aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 1 1231e1051a39Sopenharmony_ci ld1 { $acc_lb}, [$current_tag] 1232e1051a39Sopenharmony_ci ext $acc_lb, $acc_lb, $acc_lb, #8 1233e1051a39Sopenharmony_ci rev64 $acc_lb, $acc_lb 1234e1051a39Sopenharmony_ci 1235e1051a39Sopenharmony_ci aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 3 1236e1051a39Sopenharmony_ci ld1 {$rk7s}, [$cc], #16 @ load rk7 1237e1051a39Sopenharmony_ci 1238e1051a39Sopenharmony_ci aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 3 1239e1051a39Sopenharmony_ci 1240e1051a39Sopenharmony_ci aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 2 1241e1051a39Sopenharmony_ci 1242e1051a39Sopenharmony_ci aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 2 1243e1051a39Sopenharmony_ci ld1 {$rk8s}, [$cc], #16 @ load rk8 1244e1051a39Sopenharmony_ci 1245e1051a39Sopenharmony_ci aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 4 1246e1051a39Sopenharmony_ci 1247e1051a39Sopenharmony_ci aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 3 1248e1051a39Sopenharmony_ci 1249e1051a39Sopenharmony_ci aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 3 1250e1051a39Sopenharmony_ci ldr $h3q, [$current_tag, #80] @ load h3l | h3h 1251e1051a39Sopenharmony_ci#ifndef __AARCH64EB__ 1252e1051a39Sopenharmony_ci ext $h3b, $h3b, $h3b, #8 1253e1051a39Sopenharmony_ci#endif 1254e1051a39Sopenharmony_ci aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 4 1255e1051a39Sopenharmony_ci ld1 {$rk9s}, [$cc], #16 @ load rk9 1256e1051a39Sopenharmony_ci 1257e1051a39Sopenharmony_ci aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 5 1258e1051a39Sopenharmony_ci 1259e1051a39Sopenharmony_ci aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 4 1260e1051a39Sopenharmony_ci 1261e1051a39Sopenharmony_ci aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 4 1262e1051a39Sopenharmony_ci 1263e1051a39Sopenharmony_ci aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 5 1264e1051a39Sopenharmony_ci 1265e1051a39Sopenharmony_ci aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 5 1266e1051a39Sopenharmony_ci ldr $h1q, [$current_tag, #32] @ load h1l | h1h 1267e1051a39Sopenharmony_ci#ifndef __AARCH64EB__ 1268e1051a39Sopenharmony_ci ext $h1b, $h1b, $h1b, #8 1269e1051a39Sopenharmony_ci#endif 1270e1051a39Sopenharmony_ci aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 5 1271e1051a39Sopenharmony_ci 1272e1051a39Sopenharmony_ci aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 6 1273e1051a39Sopenharmony_ci 1274e1051a39Sopenharmony_ci aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 6 1275e1051a39Sopenharmony_ci 1276e1051a39Sopenharmony_ci aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 6 1277e1051a39Sopenharmony_ci 1278e1051a39Sopenharmony_ci aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 6 1279e1051a39Sopenharmony_ci trn1 $t0.2d, $h1.2d, $h2.2d @ h2h | h1h 1280e1051a39Sopenharmony_ci 1281e1051a39Sopenharmony_ci ldr $h4q, [$current_tag, #112] @ load h4l | h4h 1282e1051a39Sopenharmony_ci#ifndef __AARCH64EB__ 1283e1051a39Sopenharmony_ci ext $h4b, $h4b, $h4b, #8 1284e1051a39Sopenharmony_ci#endif 1285e1051a39Sopenharmony_ci trn2 $h12k.2d, $h1.2d, $h2.2d @ h2l | h1l 1286e1051a39Sopenharmony_ci add $main_end_input_ptr, $main_end_input_ptr, $input_ptr 1287e1051a39Sopenharmony_ci 1288e1051a39Sopenharmony_ci aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 7 1289e1051a39Sopenharmony_ci 1290e1051a39Sopenharmony_ci aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 7 1291e1051a39Sopenharmony_ci 1292e1051a39Sopenharmony_ci aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 7 1293e1051a39Sopenharmony_ci eor $h12k.16b, $h12k.16b, $t0.16b @ h2k | h1k 1294e1051a39Sopenharmony_ci 1295e1051a39Sopenharmony_ci aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 7 1296e1051a39Sopenharmony_ci 1297e1051a39Sopenharmony_ci aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 8 1298e1051a39Sopenharmony_ci trn2 $h34k.2d, $h3.2d, $h4.2d @ h4l | h3l 1299e1051a39Sopenharmony_ci 1300e1051a39Sopenharmony_ci aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 8 1301e1051a39Sopenharmony_ci 1302e1051a39Sopenharmony_ci aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 8 1303e1051a39Sopenharmony_ci 1304e1051a39Sopenharmony_ci aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 8 1305e1051a39Sopenharmony_ci trn1 $acc_h.2d, $h3.2d, $h4.2d @ h4h | h3h 1306e1051a39Sopenharmony_ci 1307e1051a39Sopenharmony_ci aese $ctr2b, $rk9 @ AES block 2 - round 9 1308e1051a39Sopenharmony_ci 1309e1051a39Sopenharmony_ci aese $ctr3b, $rk9 @ AES block 3 - round 9 1310e1051a39Sopenharmony_ci 1311e1051a39Sopenharmony_ci aese $ctr0b, $rk9 @ AES block 0 - round 9 1312e1051a39Sopenharmony_ci cmp $input_ptr, $main_end_input_ptr @ check if we have <= 4 blocks 1313e1051a39Sopenharmony_ci 1314e1051a39Sopenharmony_ci aese $ctr1b, $rk9 @ AES block 1 - round 9 1315e1051a39Sopenharmony_ci eor $h34k.16b, $h34k.16b, $acc_h.16b @ h4k | h3k 1316e1051a39Sopenharmony_ci b.ge .L128_dec_tail @ handle tail 1317e1051a39Sopenharmony_ci 1318e1051a39Sopenharmony_ci ld1 {$res0b, $res1b}, [$input_ptr], #32 @ AES block 0 - load ciphertext; AES block 1 - load ciphertext 1319e1051a39Sopenharmony_ci 1320e1051a39Sopenharmony_ci eor $ctr1b, $res1b, $ctr1b @ AES block 1 - result 1321e1051a39Sopenharmony_ci ld1 {$res2b}, [$input_ptr], #16 @ AES block 2 - load ciphertext 1322e1051a39Sopenharmony_ci 1323e1051a39Sopenharmony_ci eor $ctr0b, $res0b, $ctr0b @ AES block 0 - result 1324e1051a39Sopenharmony_ci rev64 $res0b, $res0b @ GHASH block 0 1325e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 4 1326e1051a39Sopenharmony_ci 1327e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 4 1328e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 4 1329e1051a39Sopenharmony_ci ld1 {$res3b}, [$input_ptr], #16 @ AES block 3 - load ciphertext 1330e1051a39Sopenharmony_ci 1331e1051a39Sopenharmony_ci rev64 $res1b, $res1b @ GHASH block 1 1332e1051a39Sopenharmony_ci mov $output_l1, $ctr1.d[0] @ AES block 1 - mov low 1333e1051a39Sopenharmony_ci 1334e1051a39Sopenharmony_ci mov $output_h1, $ctr1.d[1] @ AES block 1 - mov high 1335e1051a39Sopenharmony_ci 1336e1051a39Sopenharmony_ci mov $output_l0, $ctr0.d[0] @ AES block 0 - mov low 1337e1051a39Sopenharmony_ci cmp $input_ptr, $main_end_input_ptr @ check if we have <= 8 blocks 1338e1051a39Sopenharmony_ci 1339e1051a39Sopenharmony_ci mov $output_h0, $ctr0.d[1] @ AES block 0 - mov high 1340e1051a39Sopenharmony_ci 1341e1051a39Sopenharmony_ci fmov $ctr0d, $ctr96_b64x @ CTR block 4 1342e1051a39Sopenharmony_ci 1343e1051a39Sopenharmony_ci fmov $ctr0.d[1], $ctr32x @ CTR block 4 1344e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 5 1345e1051a39Sopenharmony_ci eor $output_l1, $output_l1, $rk10_l @ AES block 1 - round 10 low 1346e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 1347e1051a39Sopenharmony_ci rev $output_l1, $output_l1 1348e1051a39Sopenharmony_ci#endif 1349e1051a39Sopenharmony_ci fmov $ctr1d, $ctr96_b64x @ CTR block 5 1350e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 5 1351e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 5 1352e1051a39Sopenharmony_ci 1353e1051a39Sopenharmony_ci fmov $ctr1.d[1], $ctr32x @ CTR block 5 1354e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 6 1355e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 6 1356e1051a39Sopenharmony_ci 1357e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 6 1358e1051a39Sopenharmony_ci 1359e1051a39Sopenharmony_ci eor $output_h1, $output_h1, $rk10_h @ AES block 1 - round 10 high 1360e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 1361e1051a39Sopenharmony_ci rev $output_h1, $output_h1 1362e1051a39Sopenharmony_ci#endif 1363e1051a39Sopenharmony_ci eor $output_l0, $output_l0, $rk10_l @ AES block 0 - round 10 low 1364e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 1365e1051a39Sopenharmony_ci rev $output_l0, $output_l0 1366e1051a39Sopenharmony_ci#endif 1367e1051a39Sopenharmony_ci eor $ctr2b, $res2b, $ctr2b @ AES block 2 - result 1368e1051a39Sopenharmony_ci 1369e1051a39Sopenharmony_ci eor $output_h0, $output_h0, $rk10_h @ AES block 0 - round 10 high 1370e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 1371e1051a39Sopenharmony_ci rev $output_h0, $output_h0 1372e1051a39Sopenharmony_ci#endif 1373e1051a39Sopenharmony_ci stp $output_l0, $output_h0, [$output_ptr], #16 @ AES block 0 - store result 1374e1051a39Sopenharmony_ci 1375e1051a39Sopenharmony_ci stp $output_l1, $output_h1, [$output_ptr], #16 @ AES block 1 - store result 1376e1051a39Sopenharmony_ci b.ge .L128_dec_prepretail @ do prepretail 1377e1051a39Sopenharmony_ci 1378e1051a39Sopenharmony_ci .L128_dec_main_loop: @ main loop start 1379e1051a39Sopenharmony_ci eor $ctr3b, $res3b, $ctr3b @ AES block 4k+3 - result 1380e1051a39Sopenharmony_ci ext $acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0 1381e1051a39Sopenharmony_ci mov $output_l2, $ctr2.d[0] @ AES block 4k+2 - mov low 1382e1051a39Sopenharmony_ci 1383e1051a39Sopenharmony_ci pmull2 $t1.1q, $res1.2d, $h3.2d @ GHASH block 4k+1 - high 1384e1051a39Sopenharmony_ci mov $output_h2, $ctr2.d[1] @ AES block 4k+2 - mov high 1385e1051a39Sopenharmony_ci 1386e1051a39Sopenharmony_ci aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 0 1387e1051a39Sopenharmony_ci fmov $ctr2d, $ctr96_b64x @ CTR block 4k+6 1388e1051a39Sopenharmony_ci 1389e1051a39Sopenharmony_ci rev64 $res2b, $res2b @ GHASH block 4k+2 1390e1051a39Sopenharmony_ci fmov $ctr2.d[1], $ctr32x @ CTR block 4k+6 1391e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 4k+7 1392e1051a39Sopenharmony_ci 1393e1051a39Sopenharmony_ci mov $output_l3, $ctr3.d[0] @ AES block 4k+3 - mov low 1394e1051a39Sopenharmony_ci eor $res0b, $res0b, $acc_lb @ PRE 1 1395e1051a39Sopenharmony_ci mov $t3d, $res1.d[1] @ GHASH block 4k+1 - mid 1396e1051a39Sopenharmony_ci 1397e1051a39Sopenharmony_ci aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 1 1398e1051a39Sopenharmony_ci rev64 $res3b, $res3b @ GHASH block 4k+3 1399e1051a39Sopenharmony_ci 1400e1051a39Sopenharmony_ci pmull $t2.1q, $res1.1d, $h3.1d @ GHASH block 4k+1 - low 1401e1051a39Sopenharmony_ci mov $output_h3, $ctr3.d[1] @ AES block 4k+3 - mov high 1402e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 4k+7 1403e1051a39Sopenharmony_ci 1404e1051a39Sopenharmony_ci pmull $acc_l.1q, $res0.1d, $h4.1d @ GHASH block 4k - low 1405e1051a39Sopenharmony_ci fmov $ctr3d, $ctr96_b64x @ CTR block 4k+7 1406e1051a39Sopenharmony_ci eor $t3.8b, $t3.8b, $res1.8b @ GHASH block 4k+1 - mid 1407e1051a39Sopenharmony_ci 1408e1051a39Sopenharmony_ci aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 2 1409e1051a39Sopenharmony_ci fmov $ctr3.d[1], $ctr32x @ CTR block 4k+7 1410e1051a39Sopenharmony_ci 1411e1051a39Sopenharmony_ci aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 0 1412e1051a39Sopenharmony_ci mov $acc_md, $h34k.d[1] @ GHASH block 4k - mid 1413e1051a39Sopenharmony_ci 1414e1051a39Sopenharmony_ci pmull2 $acc_h.1q, $res0.2d, $h4.2d @ GHASH block 4k - high 1415e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $t2.16b @ GHASH block 4k+1 - low 1416e1051a39Sopenharmony_ci 1417e1051a39Sopenharmony_ci pmull $t8.1q, $res3.1d, $h1.1d @ GHASH block 4k+3 - low 1418e1051a39Sopenharmony_ci 1419e1051a39Sopenharmony_ci aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 3 1420e1051a39Sopenharmony_ci mov $t0d, $res0.d[1] @ GHASH block 4k - mid 1421e1051a39Sopenharmony_ci 1422e1051a39Sopenharmony_ci aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 0 1423e1051a39Sopenharmony_ci eor $acc_hb, $acc_hb, $t1.16b @ GHASH block 4k+1 - high 1424e1051a39Sopenharmony_ci 1425e1051a39Sopenharmony_ci aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 0 1426e1051a39Sopenharmony_ci 1427e1051a39Sopenharmony_ci pmull $t5.1q, $res2.1d, $h2.1d @ GHASH block 4k+2 - low 1428e1051a39Sopenharmony_ci eor $t0.8b, $t0.8b, $res0.8b @ GHASH block 4k - mid 1429e1051a39Sopenharmony_ci 1430e1051a39Sopenharmony_ci aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 1 1431e1051a39Sopenharmony_ci eor $output_l3, $output_l3, $rk10_l @ AES block 4k+3 - round 10 low 1432e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 1433e1051a39Sopenharmony_ci rev $output_l3, $output_l3 1434e1051a39Sopenharmony_ci#endif 1435e1051a39Sopenharmony_ci pmull $t3.1q, $t3.1d, $h34k.1d @ GHASH block 4k+1 - mid 1436e1051a39Sopenharmony_ci eor $output_h2, $output_h2, $rk10_h @ AES block 4k+2 - round 10 high 1437e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 1438e1051a39Sopenharmony_ci rev $output_h2, $output_h2 1439e1051a39Sopenharmony_ci#endif 1440e1051a39Sopenharmony_ci mov $t6d, $res2.d[1] @ GHASH block 4k+2 - mid 1441e1051a39Sopenharmony_ci 1442e1051a39Sopenharmony_ci aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 1 1443e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $t5.16b @ GHASH block 4k+2 - low 1444e1051a39Sopenharmony_ci 1445e1051a39Sopenharmony_ci pmull $acc_m.1q, $t0.1d, $acc_m.1d @ GHASH block 4k - mid 1446e1051a39Sopenharmony_ci 1447e1051a39Sopenharmony_ci aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 2 1448e1051a39Sopenharmony_ci eor $t6.8b, $t6.8b, $res2.8b @ GHASH block 4k+2 - mid 1449e1051a39Sopenharmony_ci 1450e1051a39Sopenharmony_ci aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 2 1451e1051a39Sopenharmony_ci 1452e1051a39Sopenharmony_ci aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 4 1453e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t3.16b @ GHASH block 4k+1 - mid 1454e1051a39Sopenharmony_ci 1455e1051a39Sopenharmony_ci pmull2 $t4.1q, $res2.2d, $h2.2d @ GHASH block 4k+2 - high 1456e1051a39Sopenharmony_ci 1457e1051a39Sopenharmony_ci aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 3 1458e1051a39Sopenharmony_ci ins $t6.d[1], $t6.d[0] @ GHASH block 4k+2 - mid 1459e1051a39Sopenharmony_ci 1460e1051a39Sopenharmony_ci pmull2 $t7.1q, $res3.2d, $h1.2d @ GHASH block 4k+3 - high 1461e1051a39Sopenharmony_ci 1462e1051a39Sopenharmony_ci aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 1 1463e1051a39Sopenharmony_ci mov $t9d, $res3.d[1] @ GHASH block 4k+3 - mid 1464e1051a39Sopenharmony_ci 1465e1051a39Sopenharmony_ci aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 4 1466e1051a39Sopenharmony_ci eor $acc_hb, $acc_hb, $t4.16b @ GHASH block 4k+2 - high 1467e1051a39Sopenharmony_ci 1468e1051a39Sopenharmony_ci pmull2 $t6.1q, $t6.2d, $h12k.2d @ GHASH block 4k+2 - mid 1469e1051a39Sopenharmony_ci eor $output_h3, $output_h3, $rk10_h @ AES block 4k+3 - round 10 high 1470e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 1471e1051a39Sopenharmony_ci rev $output_h3, $output_h3 1472e1051a39Sopenharmony_ci#endif 1473e1051a39Sopenharmony_ci aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 2 1474e1051a39Sopenharmony_ci eor $t9.8b, $t9.8b, $res3.8b @ GHASH block 4k+3 - mid 1475e1051a39Sopenharmony_ci 1476e1051a39Sopenharmony_ci aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 5 1477e1051a39Sopenharmony_ci eor $output_l2, $output_l2, $rk10_l @ AES block 4k+2 - round 10 low 1478e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 1479e1051a39Sopenharmony_ci rev $output_l2, $output_l2 1480e1051a39Sopenharmony_ci#endif 1481e1051a39Sopenharmony_ci aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 5 1482e1051a39Sopenharmony_ci movi $mod_constant.8b, #0xc2 1483e1051a39Sopenharmony_ci 1484e1051a39Sopenharmony_ci aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 3 1485e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $t8.16b @ GHASH block 4k+3 - low 1486e1051a39Sopenharmony_ci 1487e1051a39Sopenharmony_ci aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 6 1488e1051a39Sopenharmony_ci 1489e1051a39Sopenharmony_ci aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 6 1490e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t6.16b @ GHASH block 4k+2 - mid 1491e1051a39Sopenharmony_ci 1492e1051a39Sopenharmony_ci aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 4 1493e1051a39Sopenharmony_ci stp $output_l2, $output_h2, [$output_ptr], #16 @ AES block 4k+2 - store result 1494e1051a39Sopenharmony_ci 1495e1051a39Sopenharmony_ci pmull $t9.1q, $t9.1d, $h12k.1d @ GHASH block 4k+3 - mid 1496e1051a39Sopenharmony_ci eor $acc_hb, $acc_hb, $t7.16b @ GHASH block 4k+3 - high 1497e1051a39Sopenharmony_ci ld1 {$res0b}, [$input_ptr], #16 @ AES block 4k+3 - load ciphertext 1498e1051a39Sopenharmony_ci 1499e1051a39Sopenharmony_ci aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 7 1500e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 4k+7 1501e1051a39Sopenharmony_ci 1502e1051a39Sopenharmony_ci aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 7 1503e1051a39Sopenharmony_ci shl $mod_constantd, $mod_constantd, #56 @ mod_constant 1504e1051a39Sopenharmony_ci 1505e1051a39Sopenharmony_ci aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 5 1506e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t9.16b @ GHASH block 4k+3 - mid 1507e1051a39Sopenharmony_ci 1508e1051a39Sopenharmony_ci aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 8 1509e1051a39Sopenharmony_ci stp $output_l3, $output_h3, [$output_ptr], #16 @ AES block 4k+3 - store result 1510e1051a39Sopenharmony_ci 1511e1051a39Sopenharmony_ci aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 8 1512e1051a39Sopenharmony_ci eor $t9.16b, $acc_lb, $acc_hb @ MODULO - karatsuba tidy up 1513e1051a39Sopenharmony_ci 1514e1051a39Sopenharmony_ci aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 3 1515e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 4k+8 1516e1051a39Sopenharmony_ci 1517e1051a39Sopenharmony_ci pmull $mod_t.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid 1518e1051a39Sopenharmony_ci ld1 {$res1b}, [$input_ptr], #16 @ AES block 4k+4 - load ciphertext 1519e1051a39Sopenharmony_ci ext $acc_hb, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment 1520e1051a39Sopenharmony_ci 1521e1051a39Sopenharmony_ci aese $ctr0b, $rk9 @ AES block 4k+4 - round 9 1522e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 4k+8 1523e1051a39Sopenharmony_ci 1524e1051a39Sopenharmony_ci aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 4 1525e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t9.16b @ MODULO - karatsuba tidy up 1526e1051a39Sopenharmony_ci 1527e1051a39Sopenharmony_ci aese $ctr1b, $rk9 @ AES block 4k+5 - round 9 1528e1051a39Sopenharmony_ci 1529e1051a39Sopenharmony_ci aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 6 1530e1051a39Sopenharmony_ci eor $ctr0b, $res0b, $ctr0b @ AES block 4k+4 - result 1531e1051a39Sopenharmony_ci 1532e1051a39Sopenharmony_ci aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 5 1533e1051a39Sopenharmony_ci ld1 {$res2b}, [$input_ptr], #16 @ AES block 4k+5 - load ciphertext 1534e1051a39Sopenharmony_ci 1535e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 4k+8 1536e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $mod_t.16b @ MODULO - fold into mid 1537e1051a39Sopenharmony_ci eor $ctr1b, $res1b, $ctr1b @ AES block 4k+5 - result 1538e1051a39Sopenharmony_ci 1539e1051a39Sopenharmony_ci aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 7 1540e1051a39Sopenharmony_ci ld1 {$res3b}, [$input_ptr], #16 @ AES block 4k+6 - load ciphertext 1541e1051a39Sopenharmony_ci 1542e1051a39Sopenharmony_ci aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 6 1543e1051a39Sopenharmony_ci 1544e1051a39Sopenharmony_ci rev64 $res1b, $res1b @ GHASH block 4k+5 1545e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $acc_hb @ MODULO - fold into mid 1546e1051a39Sopenharmony_ci mov $output_h0, $ctr0.d[1] @ AES block 4k+4 - mov high 1547e1051a39Sopenharmony_ci 1548e1051a39Sopenharmony_ci aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 8 1549e1051a39Sopenharmony_ci mov $output_l0, $ctr0.d[0] @ AES block 4k+4 - mov low 1550e1051a39Sopenharmony_ci 1551e1051a39Sopenharmony_ci aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 7 1552e1051a39Sopenharmony_ci fmov $ctr0d, $ctr96_b64x @ CTR block 4k+8 1553e1051a39Sopenharmony_ci 1554e1051a39Sopenharmony_ci pmull $mod_constant.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low 1555e1051a39Sopenharmony_ci fmov $ctr0.d[1], $ctr32x @ CTR block 4k+8 1556e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 4k+9 1557e1051a39Sopenharmony_ci 1558e1051a39Sopenharmony_ci aese $ctr2b, $rk9 @ AES block 4k+6 - round 9 1559e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 4k+9 1560e1051a39Sopenharmony_ci ext $acc_mb, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment 1561e1051a39Sopenharmony_ci 1562e1051a39Sopenharmony_ci aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 8 1563e1051a39Sopenharmony_ci eor $output_h0, $output_h0, $rk10_h @ AES block 4k+4 - round 10 high 1564e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 1565e1051a39Sopenharmony_ci rev $output_h0, $output_h0 1566e1051a39Sopenharmony_ci#endif 1567e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $mod_constant.16b @ MODULO - fold into low 1568e1051a39Sopenharmony_ci mov $output_h1, $ctr1.d[1] @ AES block 4k+5 - mov high 1569e1051a39Sopenharmony_ci eor $output_l0, $output_l0, $rk10_l @ AES block 4k+4 - round 10 low 1570e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 1571e1051a39Sopenharmony_ci rev $output_l0, $output_l0 1572e1051a39Sopenharmony_ci#endif 1573e1051a39Sopenharmony_ci eor $ctr2b, $res2b, $ctr2b @ AES block 4k+6 - result 1574e1051a39Sopenharmony_ci mov $output_l1, $ctr1.d[0] @ AES block 4k+5 - mov low 1575e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 4k+9 1576e1051a39Sopenharmony_ci 1577e1051a39Sopenharmony_ci aese $ctr3b, $rk9 @ AES block 4k+7 - round 9 1578e1051a39Sopenharmony_ci fmov $ctr1d, $ctr96_b64x @ CTR block 4k+9 1579e1051a39Sopenharmony_ci cmp $input_ptr, $main_end_input_ptr @ LOOP CONTROL 1580e1051a39Sopenharmony_ci 1581e1051a39Sopenharmony_ci rev64 $res0b, $res0b @ GHASH block 4k+4 1582e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $acc_mb @ MODULO - fold into low 1583e1051a39Sopenharmony_ci fmov $ctr1.d[1], $ctr32x @ CTR block 4k+9 1584e1051a39Sopenharmony_ci 1585e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 4k+10 1586e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 4k+10 1587e1051a39Sopenharmony_ci 1588e1051a39Sopenharmony_ci eor $output_h1, $output_h1, $rk10_h @ AES block 4k+5 - round 10 high 1589e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 1590e1051a39Sopenharmony_ci rev $output_h1, $output_h1 1591e1051a39Sopenharmony_ci#endif 1592e1051a39Sopenharmony_ci stp $output_l0, $output_h0, [$output_ptr], #16 @ AES block 4k+4 - store result 1593e1051a39Sopenharmony_ci 1594e1051a39Sopenharmony_ci eor $output_l1, $output_l1, $rk10_l @ AES block 4k+5 - round 10 low 1595e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 1596e1051a39Sopenharmony_ci rev $output_l1, $output_l1 1597e1051a39Sopenharmony_ci#endif 1598e1051a39Sopenharmony_ci stp $output_l1, $output_h1, [$output_ptr], #16 @ AES block 4k+5 - store result 1599e1051a39Sopenharmony_ci 1600e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 4k+10 1601e1051a39Sopenharmony_ci b.lt L128_dec_main_loop 1602e1051a39Sopenharmony_ci 1603e1051a39Sopenharmony_ci .L128_dec_prepretail: @ PREPRETAIL 1604e1051a39Sopenharmony_ci ext $acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0 1605e1051a39Sopenharmony_ci mov $output_l2, $ctr2.d[0] @ AES block 4k+2 - mov low 1606e1051a39Sopenharmony_ci mov $t3d, $res1.d[1] @ GHASH block 4k+1 - mid 1607e1051a39Sopenharmony_ci 1608e1051a39Sopenharmony_ci aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 0 1609e1051a39Sopenharmony_ci eor $ctr3b, $res3b, $ctr3b @ AES block 4k+3 - result 1610e1051a39Sopenharmony_ci 1611e1051a39Sopenharmony_ci aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 0 1612e1051a39Sopenharmony_ci mov $output_h2, $ctr2.d[1] @ AES block 4k+2 - mov high 1613e1051a39Sopenharmony_ci 1614e1051a39Sopenharmony_ci eor $res0b, $res0b, $acc_lb @ PRE 1 1615e1051a39Sopenharmony_ci fmov $ctr2d, $ctr96_b64x @ CTR block 4k+6 1616e1051a39Sopenharmony_ci rev64 $res2b, $res2b @ GHASH block 4k+2 1617e1051a39Sopenharmony_ci 1618e1051a39Sopenharmony_ci aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 1 1619e1051a39Sopenharmony_ci fmov $ctr2.d[1], $ctr32x @ CTR block 4k+6 1620e1051a39Sopenharmony_ci 1621e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 4k+7 1622e1051a39Sopenharmony_ci mov $output_l3, $ctr3.d[0] @ AES block 4k+3 - mov low 1623e1051a39Sopenharmony_ci eor $t3.8b, $t3.8b, $res1.8b @ GHASH block 4k+1 - mid 1624e1051a39Sopenharmony_ci 1625e1051a39Sopenharmony_ci pmull $acc_l.1q, $res0.1d, $h4.1d @ GHASH block 4k - low 1626e1051a39Sopenharmony_ci mov $acc_md, $h34k.d[1] @ GHASH block 4k - mid 1627e1051a39Sopenharmony_ci mov $output_h3, $ctr3.d[1] @ AES block 4k+3 - mov high 1628e1051a39Sopenharmony_ci 1629e1051a39Sopenharmony_ci aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 1 1630e1051a39Sopenharmony_ci mov $t6d, $res2.d[1] @ GHASH block 4k+2 - mid 1631e1051a39Sopenharmony_ci 1632e1051a39Sopenharmony_ci aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 2 1633e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 4k+7 1634e1051a39Sopenharmony_ci 1635e1051a39Sopenharmony_ci pmull $t2.1q, $res1.1d, $h3.1d @ GHASH block 4k+1 - low 1636e1051a39Sopenharmony_ci mov $t0d, $res0.d[1] @ GHASH block 4k - mid 1637e1051a39Sopenharmony_ci fmov $ctr3d, $ctr96_b64x @ CTR block 4k+7 1638e1051a39Sopenharmony_ci 1639e1051a39Sopenharmony_ci aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 0 1640e1051a39Sopenharmony_ci fmov $ctr3.d[1], $ctr32x @ CTR block 4k+7 1641e1051a39Sopenharmony_ci 1642e1051a39Sopenharmony_ci pmull $t3.1q, $t3.1d, $h34k.1d @ GHASH block 4k+1 - mid 1643e1051a39Sopenharmony_ci eor $t6.8b, $t6.8b, $res2.8b @ GHASH block 4k+2 - mid 1644e1051a39Sopenharmony_ci 1645e1051a39Sopenharmony_ci rev64 $res3b, $res3b @ GHASH block 4k+3 1646e1051a39Sopenharmony_ci 1647e1051a39Sopenharmony_ci aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 1 1648e1051a39Sopenharmony_ci eor $t0.8b, $t0.8b, $res0.8b @ GHASH block 4k - mid 1649e1051a39Sopenharmony_ci 1650e1051a39Sopenharmony_ci pmull2 $acc_h.1q, $res0.2d, $h4.2d @ GHASH block 4k - high 1651e1051a39Sopenharmony_ci 1652e1051a39Sopenharmony_ci aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 0 1653e1051a39Sopenharmony_ci ins $t6.d[1], $t6.d[0] @ GHASH block 4k+2 - mid 1654e1051a39Sopenharmony_ci 1655e1051a39Sopenharmony_ci pmull2 $t1.1q, $res1.2d, $h3.2d @ GHASH block 4k+1 - high 1656e1051a39Sopenharmony_ci 1657e1051a39Sopenharmony_ci pmull $acc_m.1q, $t0.1d, $acc_m.1d @ GHASH block 4k - mid 1658e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $t2.16b @ GHASH block 4k+1 - low 1659e1051a39Sopenharmony_ci 1660e1051a39Sopenharmony_ci pmull $t8.1q, $res3.1d, $h1.1d @ GHASH block 4k+3 - low 1661e1051a39Sopenharmony_ci 1662e1051a39Sopenharmony_ci pmull2 $t6.1q, $t6.2d, $h12k.2d @ GHASH block 4k+2 - mid 1663e1051a39Sopenharmony_ci eor $acc_hb, $acc_hb, $t1.16b @ GHASH block 4k+1 - high 1664e1051a39Sopenharmony_ci 1665e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t3.16b @ GHASH block 4k+1 - mid 1666e1051a39Sopenharmony_ci 1667e1051a39Sopenharmony_ci pmull2 $t7.1q, $res3.2d, $h1.2d @ GHASH block 4k+3 - high 1668e1051a39Sopenharmony_ci 1669e1051a39Sopenharmony_ci pmull2 $t4.1q, $res2.2d, $h2.2d @ GHASH block 4k+2 - high 1670e1051a39Sopenharmony_ci mov $t9d, $res3.d[1] @ GHASH block 4k+3 - mid 1671e1051a39Sopenharmony_ci 1672e1051a39Sopenharmony_ci aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 2 1673e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t6.16b @ GHASH block 4k+2 - mid 1674e1051a39Sopenharmony_ci 1675e1051a39Sopenharmony_ci pmull $t5.1q, $res2.1d, $h2.1d @ GHASH block 4k+2 - low 1676e1051a39Sopenharmony_ci 1677e1051a39Sopenharmony_ci eor $acc_hb, $acc_hb, $t4.16b @ GHASH block 4k+2 - high 1678e1051a39Sopenharmony_ci movi $mod_constant.8b, #0xc2 1679e1051a39Sopenharmony_ci 1680e1051a39Sopenharmony_ci aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 1 1681e1051a39Sopenharmony_ci eor $t9.8b, $t9.8b, $res3.8b @ GHASH block 4k+3 - mid 1682e1051a39Sopenharmony_ci 1683e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $t5.16b @ GHASH block 4k+2 - low 1684e1051a39Sopenharmony_ci 1685e1051a39Sopenharmony_ci aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 2 1686e1051a39Sopenharmony_ci eor $acc_hb, $acc_hb, $t7.16b @ GHASH block 4k+3 - high 1687e1051a39Sopenharmony_ci 1688e1051a39Sopenharmony_ci aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 2 1689e1051a39Sopenharmony_ci eor $output_l3, $output_l3, $rk10_l @ AES block 4k+3 - round 10 low 1690e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 1691e1051a39Sopenharmony_ci rev $output_l3, $output_l3 1692e1051a39Sopenharmony_ci#endif 1693e1051a39Sopenharmony_ci pmull $t9.1q, $t9.1d, $h12k.1d @ GHASH block 4k+3 - mid 1694e1051a39Sopenharmony_ci eor $output_l2, $output_l2, $rk10_l @ AES block 4k+2 - round 10 low 1695e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 1696e1051a39Sopenharmony_ci rev $output_l2, $output_l2 1697e1051a39Sopenharmony_ci#endif 1698e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $t8.16b @ GHASH block 4k+3 - low 1699e1051a39Sopenharmony_ci 1700e1051a39Sopenharmony_ci aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 3 1701e1051a39Sopenharmony_ci 1702e1051a39Sopenharmony_ci aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 3 1703e1051a39Sopenharmony_ci shl $mod_constantd, $mod_constantd, #56 @ mod_constant 1704e1051a39Sopenharmony_ci 1705e1051a39Sopenharmony_ci aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 3 1706e1051a39Sopenharmony_ci 1707e1051a39Sopenharmony_ci aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 4 1708e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t9.16b @ GHASH block 4k+3 - mid 1709e1051a39Sopenharmony_ci 1710e1051a39Sopenharmony_ci aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 4 1711e1051a39Sopenharmony_ci 1712e1051a39Sopenharmony_ci aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 3 1713e1051a39Sopenharmony_ci eor $t9.16b, $acc_lb, $acc_hb @ MODULO - karatsuba tidy up 1714e1051a39Sopenharmony_ci 1715e1051a39Sopenharmony_ci aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 5 1716e1051a39Sopenharmony_ci 1717e1051a39Sopenharmony_ci aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 5 1718e1051a39Sopenharmony_ci 1719e1051a39Sopenharmony_ci aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 4 1720e1051a39Sopenharmony_ci 1721e1051a39Sopenharmony_ci aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 4 1722e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t9.16b @ MODULO - karatsuba tidy up 1723e1051a39Sopenharmony_ci 1724e1051a39Sopenharmony_ci pmull $mod_t.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid 1725e1051a39Sopenharmony_ci 1726e1051a39Sopenharmony_ci aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 6 1727e1051a39Sopenharmony_ci ext $acc_hb, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment 1728e1051a39Sopenharmony_ci 1729e1051a39Sopenharmony_ci aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 5 1730e1051a39Sopenharmony_ci 1731e1051a39Sopenharmony_ci aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 5 1732e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $mod_t.16b @ MODULO - fold into mid 1733e1051a39Sopenharmony_ci 1734e1051a39Sopenharmony_ci aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 7 1735e1051a39Sopenharmony_ci 1736e1051a39Sopenharmony_ci aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 6 1737e1051a39Sopenharmony_ci 1738e1051a39Sopenharmony_ci aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 6 1739e1051a39Sopenharmony_ci 1740e1051a39Sopenharmony_ci aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 8 1741e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $acc_hb @ MODULO - fold into mid 1742e1051a39Sopenharmony_ci 1743e1051a39Sopenharmony_ci aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 6 1744e1051a39Sopenharmony_ci 1745e1051a39Sopenharmony_ci aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 7 1746e1051a39Sopenharmony_ci 1747e1051a39Sopenharmony_ci aese $ctr1b, $rk9 @ AES block 4k+5 - round 9 1748e1051a39Sopenharmony_ci 1749e1051a39Sopenharmony_ci pmull $mod_constant.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low 1750e1051a39Sopenharmony_ci eor $output_h3, $output_h3, $rk10_h @ AES block 4k+3 - round 10 high 1751e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 1752e1051a39Sopenharmony_ci rev $output_h3, $output_h3 1753e1051a39Sopenharmony_ci#endif 1754e1051a39Sopenharmony_ci aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 7 1755e1051a39Sopenharmony_ci ext $acc_mb, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment 1756e1051a39Sopenharmony_ci 1757e1051a39Sopenharmony_ci aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 7 1758e1051a39Sopenharmony_ci 1759e1051a39Sopenharmony_ci aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 8 1760e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $mod_constant.16b @ MODULO - fold into low 1761e1051a39Sopenharmony_ci 1762e1051a39Sopenharmony_ci aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 8 1763e1051a39Sopenharmony_ci 1764e1051a39Sopenharmony_ci aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 8 1765e1051a39Sopenharmony_ci eor $output_h2, $output_h2, $rk10_h @ AES block 4k+2 - round 10 high 1766e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 1767e1051a39Sopenharmony_ci rev $output_h2, $output_h2 1768e1051a39Sopenharmony_ci#endif 1769e1051a39Sopenharmony_ci aese $ctr0b, $rk9 @ AES block 4k+4 - round 9 1770e1051a39Sopenharmony_ci stp $output_l2, $output_h2, [$output_ptr], #16 @ AES block 4k+2 - store result 1771e1051a39Sopenharmony_ci 1772e1051a39Sopenharmony_ci aese $ctr2b, $rk9 @ AES block 4k+6 - round 9 1773e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 4k+7 1774e1051a39Sopenharmony_ci stp $output_l3, $output_h3, [$output_ptr], #16 @ AES block 4k+3 - store result 1775e1051a39Sopenharmony_ci 1776e1051a39Sopenharmony_ci aese $ctr3b, $rk9 @ AES block 4k+7 - round 9 1777e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $acc_mb @ MODULO - fold into low 1778e1051a39Sopenharmony_ci .L128_dec_tail: @ TAIL 1779e1051a39Sopenharmony_ci 1780e1051a39Sopenharmony_ci sub $main_end_input_ptr, $end_input_ptr, $input_ptr @ main_end_input_ptr is number of bytes left to process 1781e1051a39Sopenharmony_ci ld1 { $res1b}, [$input_ptr], #16 @ AES block 4k+4 - load ciphertext 1782e1051a39Sopenharmony_ci 1783e1051a39Sopenharmony_ci eor $ctr0b, $res1b, $ctr0b @ AES block 4k+4 - result 1784e1051a39Sopenharmony_ci 1785e1051a39Sopenharmony_ci mov $output_h0, $ctr0.d[1] @ AES block 4k+4 - mov high 1786e1051a39Sopenharmony_ci 1787e1051a39Sopenharmony_ci mov $output_l0, $ctr0.d[0] @ AES block 4k+4 - mov low 1788e1051a39Sopenharmony_ci 1789e1051a39Sopenharmony_ci cmp $main_end_input_ptr, #48 1790e1051a39Sopenharmony_ci 1791e1051a39Sopenharmony_ci eor $output_h0, $output_h0, $rk10_h @ AES block 4k+4 - round 10 high 1792e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 1793e1051a39Sopenharmony_ci rev $output_h0, $output_h0 1794e1051a39Sopenharmony_ci#endif 1795e1051a39Sopenharmony_ci ext $t0.16b, $acc_lb, $acc_lb, #8 @ prepare final partial tag 1796e1051a39Sopenharmony_ci eor $output_l0, $output_l0, $rk10_l @ AES block 4k+4 - round 10 low 1797e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 1798e1051a39Sopenharmony_ci rev $output_l0, $output_l0 1799e1051a39Sopenharmony_ci#endif 1800e1051a39Sopenharmony_ci b.gt .L128_dec_blocks_more_than_3 1801e1051a39Sopenharmony_ci 1802e1051a39Sopenharmony_ci mov $ctr3b, $ctr2b 1803e1051a39Sopenharmony_ci sub $rctr32w, $rctr32w, #1 1804e1051a39Sopenharmony_ci movi $acc_l.8b, #0 1805e1051a39Sopenharmony_ci 1806e1051a39Sopenharmony_ci movi $acc_h.8b, #0 1807e1051a39Sopenharmony_ci mov $ctr2b, $ctr1b 1808e1051a39Sopenharmony_ci 1809e1051a39Sopenharmony_ci movi $acc_m.8b, #0 1810e1051a39Sopenharmony_ci cmp $main_end_input_ptr, #32 1811e1051a39Sopenharmony_ci b.gt .L128_dec_blocks_more_than_2 1812e1051a39Sopenharmony_ci 1813e1051a39Sopenharmony_ci cmp $main_end_input_ptr, #16 1814e1051a39Sopenharmony_ci 1815e1051a39Sopenharmony_ci mov $ctr3b, $ctr1b 1816e1051a39Sopenharmony_ci sub $rctr32w, $rctr32w, #1 1817e1051a39Sopenharmony_ci b.gt .L128_dec_blocks_more_than_1 1818e1051a39Sopenharmony_ci 1819e1051a39Sopenharmony_ci sub $rctr32w, $rctr32w, #1 1820e1051a39Sopenharmony_ci b .L128_dec_blocks_less_than_1 1821e1051a39Sopenharmony_ci .L128_dec_blocks_more_than_3: @ blocks left > 3 1822e1051a39Sopenharmony_ci rev64 $res0b, $res1b @ GHASH final-3 block 1823e1051a39Sopenharmony_ci ld1 { $res1b}, [$input_ptr], #16 @ AES final-2 block - load ciphertext 1824e1051a39Sopenharmony_ci 1825e1051a39Sopenharmony_ci eor $res0b, $res0b, $t0.16b @ feed in partial tag 1826e1051a39Sopenharmony_ci 1827e1051a39Sopenharmony_ci mov $acc_md, $h34k.d[1] @ GHASH final-3 block - mid 1828e1051a39Sopenharmony_ci stp $output_l0, $output_h0, [$output_ptr], #16 @ AES final-3 block - store result 1829e1051a39Sopenharmony_ci eor $ctr0b, $res1b, $ctr1b @ AES final-2 block - result 1830e1051a39Sopenharmony_ci 1831e1051a39Sopenharmony_ci mov $rk4d, $res0.d[1] @ GHASH final-3 block - mid 1832e1051a39Sopenharmony_ci mov $output_h0, $ctr0.d[1] @ AES final-2 block - mov high 1833e1051a39Sopenharmony_ci 1834e1051a39Sopenharmony_ci pmull $acc_l.1q, $res0.1d, $h4.1d @ GHASH final-3 block - low 1835e1051a39Sopenharmony_ci mov $output_l0, $ctr0.d[0] @ AES final-2 block - mov low 1836e1051a39Sopenharmony_ci 1837e1051a39Sopenharmony_ci pmull2 $acc_h.1q, $res0.2d, $h4.2d @ GHASH final-3 block - high 1838e1051a39Sopenharmony_ci 1839e1051a39Sopenharmony_ci eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-3 block - mid 1840e1051a39Sopenharmony_ci 1841e1051a39Sopenharmony_ci movi $t0.8b, #0 @ suppress further partial tag feed in 1842e1051a39Sopenharmony_ci eor $output_h0, $output_h0, $rk10_h @ AES final-2 block - round 10 high 1843e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 1844e1051a39Sopenharmony_ci rev $output_h0, $output_h0 1845e1051a39Sopenharmony_ci#endif 1846e1051a39Sopenharmony_ci pmull $acc_m.1q, $rk4v.1d, $acc_m.1d @ GHASH final-3 block - mid 1847e1051a39Sopenharmony_ci eor $output_l0, $output_l0, $rk10_l @ AES final-2 block - round 10 low 1848e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 1849e1051a39Sopenharmony_ci rev $output_l0, $output_l0 1850e1051a39Sopenharmony_ci#endif 1851e1051a39Sopenharmony_ci .L128_dec_blocks_more_than_2: @ blocks left > 2 1852e1051a39Sopenharmony_ci 1853e1051a39Sopenharmony_ci rev64 $res0b, $res1b @ GHASH final-2 block 1854e1051a39Sopenharmony_ci ld1 { $res1b}, [$input_ptr], #16 @ AES final-1 block - load ciphertext 1855e1051a39Sopenharmony_ci 1856e1051a39Sopenharmony_ci eor $res0b, $res0b, $t0.16b @ feed in partial tag 1857e1051a39Sopenharmony_ci 1858e1051a39Sopenharmony_ci eor $ctr0b, $res1b, $ctr2b @ AES final-1 block - result 1859e1051a39Sopenharmony_ci stp $output_l0, $output_h0, [$output_ptr], #16 @ AES final-2 block - store result 1860e1051a39Sopenharmony_ci 1861e1051a39Sopenharmony_ci mov $rk4d, $res0.d[1] @ GHASH final-2 block - mid 1862e1051a39Sopenharmony_ci 1863e1051a39Sopenharmony_ci pmull $rk3q1, $res0.1d, $h3.1d @ GHASH final-2 block - low 1864e1051a39Sopenharmony_ci 1865e1051a39Sopenharmony_ci pmull2 $rk2q1, $res0.2d, $h3.2d @ GHASH final-2 block - high 1866e1051a39Sopenharmony_ci mov $output_l0, $ctr0.d[0] @ AES final-1 block - mov low 1867e1051a39Sopenharmony_ci 1868e1051a39Sopenharmony_ci mov $output_h0, $ctr0.d[1] @ AES final-1 block - mov high 1869e1051a39Sopenharmony_ci eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-2 block - mid 1870e1051a39Sopenharmony_ci 1871e1051a39Sopenharmony_ci movi $t0.8b, #0 @ suppress further partial tag feed in 1872e1051a39Sopenharmony_ci 1873e1051a39Sopenharmony_ci pmull $rk4v.1q, $rk4v.1d, $h34k.1d @ GHASH final-2 block - mid 1874e1051a39Sopenharmony_ci 1875e1051a39Sopenharmony_ci eor $output_l0, $output_l0, $rk10_l @ AES final-1 block - round 10 low 1876e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 1877e1051a39Sopenharmony_ci rev $output_l0, $output_l0 1878e1051a39Sopenharmony_ci#endif 1879e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $rk3 @ GHASH final-2 block - low 1880e1051a39Sopenharmony_ci 1881e1051a39Sopenharmony_ci eor $acc_hb, $acc_hb, $rk2 @ GHASH final-2 block - high 1882e1051a39Sopenharmony_ci 1883e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-2 block - mid 1884e1051a39Sopenharmony_ci eor $output_h0, $output_h0, $rk10_h @ AES final-1 block - round 10 high 1885e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 1886e1051a39Sopenharmony_ci rev $output_h0, $output_h0 1887e1051a39Sopenharmony_ci#endif 1888e1051a39Sopenharmony_ci .L128_dec_blocks_more_than_1: @ blocks left > 1 1889e1051a39Sopenharmony_ci 1890e1051a39Sopenharmony_ci rev64 $res0b, $res1b @ GHASH final-1 block 1891e1051a39Sopenharmony_ci 1892e1051a39Sopenharmony_ci ld1 { $res1b}, [$input_ptr], #16 @ AES final block - load ciphertext 1893e1051a39Sopenharmony_ci eor $res0b, $res0b, $t0.16b @ feed in partial tag 1894e1051a39Sopenharmony_ci 1895e1051a39Sopenharmony_ci mov $rk4d, $res0.d[1] @ GHASH final-1 block - mid 1896e1051a39Sopenharmony_ci 1897e1051a39Sopenharmony_ci eor $ctr0b, $res1b, $ctr3b @ AES final block - result 1898e1051a39Sopenharmony_ci 1899e1051a39Sopenharmony_ci eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-1 block - mid 1900e1051a39Sopenharmony_ci 1901e1051a39Sopenharmony_ci stp $output_l0, $output_h0, [$output_ptr], #16 @ AES final-1 block - store result 1902e1051a39Sopenharmony_ci mov $output_l0, $ctr0.d[0] @ AES final block - mov low 1903e1051a39Sopenharmony_ci 1904e1051a39Sopenharmony_ci mov $output_h0, $ctr0.d[1] @ AES final block - mov high 1905e1051a39Sopenharmony_ci ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-1 block - mid 1906e1051a39Sopenharmony_ci 1907e1051a39Sopenharmony_ci pmull $rk3q1, $res0.1d, $h2.1d @ GHASH final-1 block - low 1908e1051a39Sopenharmony_ci 1909e1051a39Sopenharmony_ci pmull2 $rk2q1, $res0.2d, $h2.2d @ GHASH final-1 block - high 1910e1051a39Sopenharmony_ci 1911e1051a39Sopenharmony_ci pmull2 $rk4v.1q, $rk4v.2d, $h12k.2d @ GHASH final-1 block - mid 1912e1051a39Sopenharmony_ci movi $t0.8b, #0 @ suppress further partial tag feed in 1913e1051a39Sopenharmony_ci 1914e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $rk3 @ GHASH final-1 block - low 1915e1051a39Sopenharmony_ci 1916e1051a39Sopenharmony_ci eor $acc_hb, $acc_hb, $rk2 @ GHASH final-1 block - high 1917e1051a39Sopenharmony_ci eor $output_h0, $output_h0, $rk10_h @ AES final block - round 10 high 1918e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 1919e1051a39Sopenharmony_ci rev $output_h0, $output_h0 1920e1051a39Sopenharmony_ci#endif 1921e1051a39Sopenharmony_ci eor $output_l0, $output_l0, $rk10_l @ AES final block - round 10 low 1922e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 1923e1051a39Sopenharmony_ci rev $output_l0, $output_l0 1924e1051a39Sopenharmony_ci#endif 1925e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-1 block - mid 1926e1051a39Sopenharmony_ci .L128_dec_blocks_less_than_1: @ blocks left <= 1 1927e1051a39Sopenharmony_ci 1928e1051a39Sopenharmony_ci mvn $rk10_h, xzr @ rk10_h = 0xffffffffffffffff 1929e1051a39Sopenharmony_ci and $bit_length, $bit_length, #127 @ bit_length %= 128 1930e1051a39Sopenharmony_ci 1931e1051a39Sopenharmony_ci mvn $rk10_l, xzr @ rk10_l = 0xffffffffffffffff 1932e1051a39Sopenharmony_ci sub $bit_length, $bit_length, #128 @ bit_length -= 128 1933e1051a39Sopenharmony_ci 1934e1051a39Sopenharmony_ci neg $bit_length, $bit_length @ bit_length = 128 - #bits in input (in range [1,128]) 1935e1051a39Sopenharmony_ci 1936e1051a39Sopenharmony_ci and $bit_length, $bit_length, #127 @ bit_length %= 128 1937e1051a39Sopenharmony_ci 1938e1051a39Sopenharmony_ci lsr $rk10_h, $rk10_h, $bit_length @ rk10_h is mask for top 64b of last block 1939e1051a39Sopenharmony_ci cmp $bit_length, #64 1940e1051a39Sopenharmony_ci 1941e1051a39Sopenharmony_ci csel $ctr96_b64x, $rk10_h, xzr, lt 1942e1051a39Sopenharmony_ci csel $ctr32x, $rk10_l, $rk10_h, lt 1943e1051a39Sopenharmony_ci 1944e1051a39Sopenharmony_ci fmov $ctr0d, $ctr32x @ ctr0b is mask for last block 1945e1051a39Sopenharmony_ci 1946e1051a39Sopenharmony_ci mov $ctr0.d[1], $ctr96_b64x 1947e1051a39Sopenharmony_ci 1948e1051a39Sopenharmony_ci and $res1b, $res1b, $ctr0b @ possibly partial last block has zeroes in highest bits 1949e1051a39Sopenharmony_ci 1950e1051a39Sopenharmony_ci rev64 $res0b, $res1b @ GHASH final block 1951e1051a39Sopenharmony_ci 1952e1051a39Sopenharmony_ci eor $res0b, $res0b, $t0.16b @ feed in partial tag 1953e1051a39Sopenharmony_ci 1954e1051a39Sopenharmony_ci ldp $end_input_ptr, $main_end_input_ptr, [$output_ptr] @ load existing bytes we need to not overwrite 1955e1051a39Sopenharmony_ci 1956e1051a39Sopenharmony_ci and $output_h0, $output_h0, $ctr96_b64x 1957e1051a39Sopenharmony_ci 1958e1051a39Sopenharmony_ci pmull2 $rk2q1, $res0.2d, $h1.2d @ GHASH final block - high 1959e1051a39Sopenharmony_ci mov $t0d, $res0.d[1] @ GHASH final block - mid 1960e1051a39Sopenharmony_ci 1961e1051a39Sopenharmony_ci eor $t0.8b, $t0.8b, $res0.8b @ GHASH final block - mid 1962e1051a39Sopenharmony_ci eor $acc_hb, $acc_hb, $rk2 @ GHASH final block - high 1963e1051a39Sopenharmony_ci 1964e1051a39Sopenharmony_ci pmull $t0.1q, $t0.1d, $h12k.1d @ GHASH final block - mid 1965e1051a39Sopenharmony_ci 1966e1051a39Sopenharmony_ci pmull $rk3q1, $res0.1d, $h1.1d @ GHASH final block - low 1967e1051a39Sopenharmony_ci bic $end_input_ptr, $end_input_ptr, $ctr32x @ mask out low existing bytes 1968e1051a39Sopenharmony_ci and $output_l0, $output_l0, $ctr32x 1969e1051a39Sopenharmony_ci 1970e1051a39Sopenharmony_ci#ifndef __AARCH64EB__ 1971e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w 1972e1051a39Sopenharmony_ci#else 1973e1051a39Sopenharmony_ci mov $ctr32w, $rctr32w 1974e1051a39Sopenharmony_ci#endif 1975e1051a39Sopenharmony_ci 1976e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t0.16b @ GHASH final block - mid 1977e1051a39Sopenharmony_ci movi $mod_constant.8b, #0xc2 1978e1051a39Sopenharmony_ci 1979e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $rk3 @ GHASH final block - low 1980e1051a39Sopenharmony_ci 1981e1051a39Sopenharmony_ci bic $main_end_input_ptr, $main_end_input_ptr, $ctr96_b64x @ mask out high existing bytes 1982e1051a39Sopenharmony_ci shl $mod_constantd, $mod_constantd, #56 @ mod_constant 1983e1051a39Sopenharmony_ci 1984e1051a39Sopenharmony_ci eor $t9.16b, $acc_lb, $acc_hb @ MODULO - karatsuba tidy up 1985e1051a39Sopenharmony_ci 1986e1051a39Sopenharmony_ci pmull $mod_t.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid 1987e1051a39Sopenharmony_ci 1988e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t9.16b @ MODULO - karatsuba tidy up 1989e1051a39Sopenharmony_ci 1990e1051a39Sopenharmony_ci orr $output_l0, $output_l0, $end_input_ptr 1991e1051a39Sopenharmony_ci str $ctr32w, [$counter, #12] @ store the updated counter 1992e1051a39Sopenharmony_ci 1993e1051a39Sopenharmony_ci orr $output_h0, $output_h0, $main_end_input_ptr 1994e1051a39Sopenharmony_ci stp $output_l0, $output_h0, [$output_ptr] 1995e1051a39Sopenharmony_ci ext $acc_hb, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment 1996e1051a39Sopenharmony_ci 1997e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $mod_t.16b @ MODULO - fold into mid 1998e1051a39Sopenharmony_ci 1999e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $acc_hb @ MODULO - fold into mid 2000e1051a39Sopenharmony_ci 2001e1051a39Sopenharmony_ci pmull $mod_constant.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low 2002e1051a39Sopenharmony_ci ext $acc_mb, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment 2003e1051a39Sopenharmony_ci 2004e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $mod_constant.16b @ MODULO - fold into low 2005e1051a39Sopenharmony_ci 2006e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $acc_mb @ MODULO - fold into low 2007e1051a39Sopenharmony_ci ext $acc_lb, $acc_lb, $acc_lb, #8 2008e1051a39Sopenharmony_ci rev64 $acc_lb, $acc_lb 2009e1051a39Sopenharmony_ci mov x0, $len 2010e1051a39Sopenharmony_ci st1 { $acc_l.16b }, [$current_tag] 2011e1051a39Sopenharmony_ci 2012e1051a39Sopenharmony_ci ldp x21, x22, [sp, #16] 2013e1051a39Sopenharmony_ci ldp x23, x24, [sp, #32] 2014e1051a39Sopenharmony_ci ldp d8, d9, [sp, #48] 2015e1051a39Sopenharmony_ci ldp d10, d11, [sp, #64] 2016e1051a39Sopenharmony_ci ldp d12, d13, [sp, #80] 2017e1051a39Sopenharmony_ci ldp d14, d15, [sp, #96] 2018e1051a39Sopenharmony_ci ldp x19, x20, [sp], #112 2019e1051a39Sopenharmony_ci ret 2020e1051a39Sopenharmony_ci 2021e1051a39Sopenharmony_ci .L128_dec_ret: 2022e1051a39Sopenharmony_ci mov w0, #0x0 2023e1051a39Sopenharmony_ci ret 2024e1051a39Sopenharmony_ci.size aes_gcm_dec_128_kernel,.-aes_gcm_dec_128_kernel 2025e1051a39Sopenharmony_ci___ 2026e1051a39Sopenharmony_ci} 2027e1051a39Sopenharmony_ci 2028e1051a39Sopenharmony_ci{ 2029e1051a39Sopenharmony_cimy ($end_input_ptr,$main_end_input_ptr,$input_l0,$input_h0)=map("x$_",(4..7)); 2030e1051a39Sopenharmony_cimy ($input_l1,$input_h1,$input_l2,$input_h2,$input_l3,$input_h3)=map("x$_",(19..24)); 2031e1051a39Sopenharmony_cimy ($output_l1,$output_h1,$output_l2,$output_h2,$output_l3,$output_h3)=map("x$_",(19..24)); 2032e1051a39Sopenharmony_cimy ($output_l0,$output_h0)=map("x$_",(6..7)); 2033e1051a39Sopenharmony_ci 2034e1051a39Sopenharmony_cimy $ctr32w="w9"; 2035e1051a39Sopenharmony_cimy ($ctr32x,$ctr96_b64x,$ctr96_t32x,$rctr32x,$rk12_l,$rk12_h,$len)=map("x$_",(9..15)); 2036e1051a39Sopenharmony_cimy ($ctr96_t32w,$rctr32w)=map("w$_",(11..12)); 2037e1051a39Sopenharmony_ci 2038e1051a39Sopenharmony_cimy ($ctr0b,$ctr1b,$ctr2b,$ctr3b,$res0b,$res1b,$res2b,$res3b)=map("v$_.16b",(0..7)); 2039e1051a39Sopenharmony_cimy ($ctr0,$ctr1,$ctr2,$ctr3,$res0,$res1,$res2,$res3)=map("v$_",(0..7)); 2040e1051a39Sopenharmony_cimy ($ctr0d,$ctr1d,$ctr2d,$ctr3d,$res0d,$res1d,$res2d,$res3d)=map("d$_",(0..7)); 2041e1051a39Sopenharmony_cimy ($res0q,$res1q,$res2q,$res3q)=map("q$_",(4..7)); 2042e1051a39Sopenharmony_ci 2043e1051a39Sopenharmony_cimy ($acc_hb,$acc_mb,$acc_lb)=map("v$_.16b",(9..11)); 2044e1051a39Sopenharmony_cimy ($acc_h,$acc_m,$acc_l)=map("v$_",(9..11)); 2045e1051a39Sopenharmony_cimy ($acc_hd,$acc_md,$acc_ld)=map("d$_",(9..11)); 2046e1051a39Sopenharmony_ci 2047e1051a39Sopenharmony_cimy ($h1,$h2,$h3,$h4,$h12k,$h34k)=map("v$_",(12..17)); 2048e1051a39Sopenharmony_cimy ($h1q,$h2q,$h3q,$h4q)=map("q$_",(12..15)); 2049e1051a39Sopenharmony_cimy ($h1b,$h2b,$h3b,$h4b)=map("v$_.16b",(12..15)); 2050e1051a39Sopenharmony_ci 2051e1051a39Sopenharmony_cimy $t0="v8"; 2052e1051a39Sopenharmony_cimy $t0d="d8"; 2053e1051a39Sopenharmony_cimy $t3="v4"; 2054e1051a39Sopenharmony_cimy $t3d="d4"; 2055e1051a39Sopenharmony_ci 2056e1051a39Sopenharmony_cimy ($t1,$t2)=map("v$_",(30..31)); 2057e1051a39Sopenharmony_cimy ($t1d,$t2d)=map("d$_",(30..31)); 2058e1051a39Sopenharmony_ci 2059e1051a39Sopenharmony_cimy $t4="v30"; 2060e1051a39Sopenharmony_cimy $t4d="d30"; 2061e1051a39Sopenharmony_cimy $t5="v8"; 2062e1051a39Sopenharmony_cimy $t5d="d8"; 2063e1051a39Sopenharmony_cimy $t6="v31"; 2064e1051a39Sopenharmony_cimy $t6d="d31"; 2065e1051a39Sopenharmony_ci 2066e1051a39Sopenharmony_cimy $t7="v5"; 2067e1051a39Sopenharmony_cimy $t7d="d5"; 2068e1051a39Sopenharmony_cimy $t8="v6"; 2069e1051a39Sopenharmony_cimy $t8d="d6"; 2070e1051a39Sopenharmony_cimy $t9="v30"; 2071e1051a39Sopenharmony_cimy $t9d="d30"; 2072e1051a39Sopenharmony_ci 2073e1051a39Sopenharmony_cimy ($ctr_t0,$ctr_t1,$ctr_t2,$ctr_t3)=map("v$_",(4..7)); 2074e1051a39Sopenharmony_cimy ($ctr_t0d,$ctr_t1d,$ctr_t2d,$ctr_t3d)=map("d$_",(4..7)); 2075e1051a39Sopenharmony_cimy ($ctr_t0b,$ctr_t1b,$ctr_t2b,$ctr_t3b)=map("v$_.16b",(4..7)); 2076e1051a39Sopenharmony_ci 2077e1051a39Sopenharmony_cimy $mod_constantd="d8"; 2078e1051a39Sopenharmony_cimy $mod_constant="v8"; 2079e1051a39Sopenharmony_cimy $mod_t="v31"; 2080e1051a39Sopenharmony_ci 2081e1051a39Sopenharmony_cimy ($rk0,$rk1,$rk2,$rk3,$rk4,$rk5,$rk6,$rk7,$rk8,$rk9,$rk10,$rk11)=map("v$_.16b",(18..29)); 2082e1051a39Sopenharmony_cimy ($rk0q,$rk1q,$rk2q,$rk3q,$rk4q,$rk5q,$rk6q,$rk7q,$rk8q,$rk9q,$rk10q,$rk11q)=map("q$_",(18..29)); 2083e1051a39Sopenharmony_cimy ($rk0s,$rk1s,$rk2s,$rk3s,$rk4s,$rk5s,$rk6s,$rk7s,$rk8s,$rk9s,$rk10s,$rk11s)=map("v$_.4s",(18..29)); 2084e1051a39Sopenharmony_cimy $rk2q1="v20.1q"; 2085e1051a39Sopenharmony_cimy $rk3q1="v21.1q"; 2086e1051a39Sopenharmony_cimy $rk4v="v22"; 2087e1051a39Sopenharmony_cimy $rk4d="d22"; 2088e1051a39Sopenharmony_ci 2089e1051a39Sopenharmony_ci######################################################################################### 2090e1051a39Sopenharmony_ci# size_t aes_gcm_enc_192_kernel(const unsigned char *in, 2091e1051a39Sopenharmony_ci# size_t len, 2092e1051a39Sopenharmony_ci# unsigned char *out, 2093e1051a39Sopenharmony_ci# const void *key, 2094e1051a39Sopenharmony_ci# unsigned char ivec[16], 2095e1051a39Sopenharmony_ci# u64 *Xi); 2096e1051a39Sopenharmony_ci# 2097e1051a39Sopenharmony_ci$code.=<<___; 2098e1051a39Sopenharmony_ci.global aes_gcm_enc_192_kernel 2099e1051a39Sopenharmony_ci.type aes_gcm_enc_192_kernel,%function 2100e1051a39Sopenharmony_ci.align 4 2101e1051a39Sopenharmony_ciaes_gcm_enc_192_kernel: 2102e1051a39Sopenharmony_ci cbz x1, .L192_enc_ret 2103e1051a39Sopenharmony_ci stp x19, x20, [sp, #-112]! 2104e1051a39Sopenharmony_ci mov x16, x4 2105e1051a39Sopenharmony_ci mov x8, x5 2106e1051a39Sopenharmony_ci stp x21, x22, [sp, #16] 2107e1051a39Sopenharmony_ci stp x23, x24, [sp, #32] 2108e1051a39Sopenharmony_ci stp d8, d9, [sp, #48] 2109e1051a39Sopenharmony_ci stp d10, d11, [sp, #64] 2110e1051a39Sopenharmony_ci stp d12, d13, [sp, #80] 2111e1051a39Sopenharmony_ci stp d14, d15, [sp, #96] 2112e1051a39Sopenharmony_ci 2113e1051a39Sopenharmony_ci ldp $ctr96_b64x, $ctr96_t32x, [$counter] @ ctr96_b64, ctr96_t32 2114e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 2115e1051a39Sopenharmony_ci rev $ctr96_b64x, $ctr96_b64x 2116e1051a39Sopenharmony_ci rev $ctr96_t32x, $ctr96_t32x 2117e1051a39Sopenharmony_ci#endif 2118e1051a39Sopenharmony_ci ldp $rk12_l, $rk12_h, [$cc, #192] @ load rk12 2119e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 2120e1051a39Sopenharmony_ci ror $rk12_l, $rk12_l, #32 2121e1051a39Sopenharmony_ci ror $rk12_h, $rk12_h, #32 2122e1051a39Sopenharmony_ci#endif 2123e1051a39Sopenharmony_ci ld1 {$rk0s}, [$cc], #16 @ load rk0 2124e1051a39Sopenharmony_ci 2125e1051a39Sopenharmony_ci ld1 {$rk1s}, [$cc], #16 @ load rk1 2126e1051a39Sopenharmony_ci 2127e1051a39Sopenharmony_ci ld1 {$rk2s}, [$cc], #16 @ load rk2 2128e1051a39Sopenharmony_ci 2129e1051a39Sopenharmony_ci lsr $rctr32x, $ctr96_t32x, #32 2130e1051a39Sopenharmony_ci ld1 {$rk3s}, [$cc], #16 @ load rk3 2131e1051a39Sopenharmony_ci orr $ctr96_t32w, $ctr96_t32w, $ctr96_t32w 2132e1051a39Sopenharmony_ci 2133e1051a39Sopenharmony_ci ld1 {$rk4s}, [$cc], #16 @ load rk4 2134e1051a39Sopenharmony_ci rev $rctr32w, $rctr32w @ rev_ctr32 2135e1051a39Sopenharmony_ci 2136e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ increment rev_ctr32 2137e1051a39Sopenharmony_ci fmov $ctr3d, $ctr96_b64x @ CTR block 3 2138e1051a39Sopenharmony_ci 2139e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 1 2140e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 1 2141e1051a39Sopenharmony_ci fmov $ctr1d, $ctr96_b64x @ CTR block 1 2142e1051a39Sopenharmony_ci 2143e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 1 2144e1051a39Sopenharmony_ci ld1 { $ctr0b}, [$counter] @ special case vector load initial counter so we can start first AES block as quickly as possible 2145e1051a39Sopenharmony_ci 2146e1051a39Sopenharmony_ci fmov $ctr1.d[1], $ctr32x @ CTR block 1 2147e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 2 2148e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 2 2149e1051a39Sopenharmony_ci 2150e1051a39Sopenharmony_ci fmov $ctr2d, $ctr96_b64x @ CTR block 2 2151e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 2 2152e1051a39Sopenharmony_ci 2153e1051a39Sopenharmony_ci fmov $ctr2.d[1], $ctr32x @ CTR block 2 2154e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 3 2155e1051a39Sopenharmony_ci 2156e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 3 2157e1051a39Sopenharmony_ci ld1 {$rk5s}, [$cc], #16 @ load rk5 2158e1051a39Sopenharmony_ci 2159e1051a39Sopenharmony_ci fmov $ctr3.d[1], $ctr32x @ CTR block 3 2160e1051a39Sopenharmony_ci 2161e1051a39Sopenharmony_ci ld1 {$rk6s}, [$cc], #16 @ load rk6 2162e1051a39Sopenharmony_ci 2163e1051a39Sopenharmony_ci ld1 {$rk7s}, [$cc], #16 @ load rk7 2164e1051a39Sopenharmony_ci 2165e1051a39Sopenharmony_ci aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 0 2166e1051a39Sopenharmony_ci ld1 { $acc_lb}, [$current_tag] 2167e1051a39Sopenharmony_ci ext $acc_lb, $acc_lb, $acc_lb, #8 2168e1051a39Sopenharmony_ci rev64 $acc_lb, $acc_lb 2169e1051a39Sopenharmony_ci 2170e1051a39Sopenharmony_ci aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 0 2171e1051a39Sopenharmony_ci ld1 {$rk8s}, [$cc], #16 @ load rk8 2172e1051a39Sopenharmony_ci 2173e1051a39Sopenharmony_ci aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 0 2174e1051a39Sopenharmony_ci ldr $h4q, [$current_tag, #112] @ load h4l | h4h 2175e1051a39Sopenharmony_ci#ifndef __AARCH64EB__ 2176e1051a39Sopenharmony_ci ext $h4b, $h4b, $h4b, #8 2177e1051a39Sopenharmony_ci#endif 2178e1051a39Sopenharmony_ci aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 0 2179e1051a39Sopenharmony_ci ld1 {$rk9s}, [$cc], #16 @ load rk9 2180e1051a39Sopenharmony_ci 2181e1051a39Sopenharmony_ci aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 1 2182e1051a39Sopenharmony_ci ld1 {$rk10s}, [$cc], #16 @ load rk10 2183e1051a39Sopenharmony_ci 2184e1051a39Sopenharmony_ci aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 1 2185e1051a39Sopenharmony_ci ldr $h1q, [$current_tag, #32] @ load h1l | h1h 2186e1051a39Sopenharmony_ci#ifndef __AARCH64EB__ 2187e1051a39Sopenharmony_ci ext $h1b, $h1b, $h1b, #8 2188e1051a39Sopenharmony_ci#endif 2189e1051a39Sopenharmony_ci aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 1 2190e1051a39Sopenharmony_ci ld1 {$rk11s}, [$cc], #16 @ load rk11 2191e1051a39Sopenharmony_ci 2192e1051a39Sopenharmony_ci aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 1 2193e1051a39Sopenharmony_ci ldr $h3q, [$current_tag, #80] @ load h3l | h3h 2194e1051a39Sopenharmony_ci#ifndef __AARCH64EB__ 2195e1051a39Sopenharmony_ci ext $h3b, $h3b, $h3b, #8 2196e1051a39Sopenharmony_ci#endif 2197e1051a39Sopenharmony_ci aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 2 2198e1051a39Sopenharmony_ci 2199e1051a39Sopenharmony_ci aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 2 2200e1051a39Sopenharmony_ci 2201e1051a39Sopenharmony_ci aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 2 2202e1051a39Sopenharmony_ci 2203e1051a39Sopenharmony_ci aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 3 2204e1051a39Sopenharmony_ci trn1 $acc_h.2d, $h3.2d, $h4.2d @ h4h | h3h 2205e1051a39Sopenharmony_ci 2206e1051a39Sopenharmony_ci aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 3 2207e1051a39Sopenharmony_ci 2208e1051a39Sopenharmony_ci aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 2 2209e1051a39Sopenharmony_ci trn2 $h34k.2d, $h3.2d, $h4.2d @ h4l | h3l 2210e1051a39Sopenharmony_ci 2211e1051a39Sopenharmony_ci aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 4 2212e1051a39Sopenharmony_ci 2213e1051a39Sopenharmony_ci aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 3 2214e1051a39Sopenharmony_ci 2215e1051a39Sopenharmony_ci aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 3 2216e1051a39Sopenharmony_ci 2217e1051a39Sopenharmony_ci aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 5 2218e1051a39Sopenharmony_ci 2219e1051a39Sopenharmony_ci aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 4 2220e1051a39Sopenharmony_ci 2221e1051a39Sopenharmony_ci aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 4 2222e1051a39Sopenharmony_ci 2223e1051a39Sopenharmony_ci aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 6 2224e1051a39Sopenharmony_ci 2225e1051a39Sopenharmony_ci aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 4 2226e1051a39Sopenharmony_ci 2227e1051a39Sopenharmony_ci aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 5 2228e1051a39Sopenharmony_ci 2229e1051a39Sopenharmony_ci aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 5 2230e1051a39Sopenharmony_ci 2231e1051a39Sopenharmony_ci aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 5 2232e1051a39Sopenharmony_ci 2233e1051a39Sopenharmony_ci aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 6 2234e1051a39Sopenharmony_ci ldr $h2q, [$current_tag, #64] @ load h2l | h2h 2235e1051a39Sopenharmony_ci#ifndef __AARCH64EB__ 2236e1051a39Sopenharmony_ci ext $h2b, $h2b, $h2b, #8 2237e1051a39Sopenharmony_ci#endif 2238e1051a39Sopenharmony_ci aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 6 2239e1051a39Sopenharmony_ci 2240e1051a39Sopenharmony_ci aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 6 2241e1051a39Sopenharmony_ci 2242e1051a39Sopenharmony_ci aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 7 2243e1051a39Sopenharmony_ci 2244e1051a39Sopenharmony_ci aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 7 2245e1051a39Sopenharmony_ci trn2 $h12k.2d, $h1.2d, $h2.2d @ h2l | h1l 2246e1051a39Sopenharmony_ci 2247e1051a39Sopenharmony_ci aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 7 2248e1051a39Sopenharmony_ci 2249e1051a39Sopenharmony_ci aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 8 2250e1051a39Sopenharmony_ci 2251e1051a39Sopenharmony_ci aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 7 2252e1051a39Sopenharmony_ci trn1 $t0.2d, $h1.2d, $h2.2d @ h2h | h1h 2253e1051a39Sopenharmony_ci 2254e1051a39Sopenharmony_ci aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 8 2255e1051a39Sopenharmony_ci 2256e1051a39Sopenharmony_ci aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 8 2257e1051a39Sopenharmony_ci 2258e1051a39Sopenharmony_ci aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 8 2259e1051a39Sopenharmony_ci 2260e1051a39Sopenharmony_ci aese $ctr0b, $rk9 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 9 2261e1051a39Sopenharmony_ci 2262e1051a39Sopenharmony_ci aese $ctr3b, $rk9 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 9 2263e1051a39Sopenharmony_ci 2264e1051a39Sopenharmony_ci aese $ctr2b, $rk9 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 9 2265e1051a39Sopenharmony_ci 2266e1051a39Sopenharmony_ci aese $ctr1b, $rk9 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 9 2267e1051a39Sopenharmony_ci 2268e1051a39Sopenharmony_ci aese $ctr0b, $rk10 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 10 2269e1051a39Sopenharmony_ci 2270e1051a39Sopenharmony_ci aese $ctr2b, $rk10 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 10 2271e1051a39Sopenharmony_ci 2272e1051a39Sopenharmony_ci aese $ctr1b, $rk10 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 10 2273e1051a39Sopenharmony_ci lsr $main_end_input_ptr, $bit_length, #3 @ byte_len 2274e1051a39Sopenharmony_ci mov $len, $main_end_input_ptr 2275e1051a39Sopenharmony_ci 2276e1051a39Sopenharmony_ci aese $ctr3b, $rk10 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 10 2277e1051a39Sopenharmony_ci sub $main_end_input_ptr, $main_end_input_ptr, #1 @ byte_len - 1 2278e1051a39Sopenharmony_ci 2279e1051a39Sopenharmony_ci eor $h12k.16b, $h12k.16b, $t0.16b @ h2k | h1k 2280e1051a39Sopenharmony_ci and $main_end_input_ptr, $main_end_input_ptr, #0xffffffffffffffc0 @ number of bytes to be processed in main loop (at least 1 byte must be handled by tail) 2281e1051a39Sopenharmony_ci 2282e1051a39Sopenharmony_ci eor $h34k.16b, $h34k.16b, $acc_h.16b @ h4k | h3k 2283e1051a39Sopenharmony_ci 2284e1051a39Sopenharmony_ci aese $ctr2b, $rk11 @ AES block 2 - round 11 2285e1051a39Sopenharmony_ci add $end_input_ptr, $input_ptr, $bit_length, lsr #3 @ end_input_ptr 2286e1051a39Sopenharmony_ci add $main_end_input_ptr, $main_end_input_ptr, $input_ptr 2287e1051a39Sopenharmony_ci 2288e1051a39Sopenharmony_ci aese $ctr1b, $rk11 @ AES block 1 - round 11 2289e1051a39Sopenharmony_ci cmp $input_ptr, $main_end_input_ptr @ check if we have <= 4 blocks 2290e1051a39Sopenharmony_ci 2291e1051a39Sopenharmony_ci aese $ctr0b, $rk11 @ AES block 0 - round 11 2292e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 3 2293e1051a39Sopenharmony_ci 2294e1051a39Sopenharmony_ci aese $ctr3b, $rk11 @ AES block 3 - round 11 2295e1051a39Sopenharmony_ci b.ge .L192_enc_tail @ handle tail 2296e1051a39Sopenharmony_ci 2297e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 4 2298e1051a39Sopenharmony_ci ldp $input_l0, $input_h0, [$input_ptr, #0] @ AES block 0 - load plaintext 2299e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 2300e1051a39Sopenharmony_ci rev $input_l0, $input_l0 2301e1051a39Sopenharmony_ci rev $input_h0, $input_h0 2302e1051a39Sopenharmony_ci#endif 2303e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 4 2304e1051a39Sopenharmony_ci ldp $input_l2, $input_h2, [$input_ptr, #32] @ AES block 2 - load plaintext 2305e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 2306e1051a39Sopenharmony_ci rev $input_l2, $input_l2 2307e1051a39Sopenharmony_ci rev $input_h2, $input_h2 2308e1051a39Sopenharmony_ci#endif 2309e1051a39Sopenharmony_ci ldp $input_l3, $input_h3, [$input_ptr, #48] @ AES block 3 - load plaintext 2310e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 2311e1051a39Sopenharmony_ci rev $input_l3, $input_l3 2312e1051a39Sopenharmony_ci rev $input_h3, $input_h3 2313e1051a39Sopenharmony_ci#endif 2314e1051a39Sopenharmony_ci ldp $input_l1, $input_h1, [$input_ptr, #16] @ AES block 1 - load plaintext 2315e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 2316e1051a39Sopenharmony_ci rev $input_l1, $input_l1 2317e1051a39Sopenharmony_ci rev $input_h1, $input_h1 2318e1051a39Sopenharmony_ci#endif 2319e1051a39Sopenharmony_ci add $input_ptr, $input_ptr, #64 @ AES input_ptr update 2320e1051a39Sopenharmony_ci cmp $input_ptr, $main_end_input_ptr @ check if we have <= 8 blocks 2321e1051a39Sopenharmony_ci 2322e1051a39Sopenharmony_ci eor $input_l0, $input_l0, $rk12_l @ AES block 0 - round 12 low 2323e1051a39Sopenharmony_ci 2324e1051a39Sopenharmony_ci eor $input_h0, $input_h0, $rk12_h @ AES block 0 - round 12 high 2325e1051a39Sopenharmony_ci eor $input_h2, $input_h2, $rk12_h @ AES block 2 - round 12 high 2326e1051a39Sopenharmony_ci fmov $ctr_t0d, $input_l0 @ AES block 0 - mov low 2327e1051a39Sopenharmony_ci 2328e1051a39Sopenharmony_ci eor $input_h3, $input_h3, $rk12_h @ AES block 3 - round 12 high 2329e1051a39Sopenharmony_ci fmov $ctr_t0.d[1], $input_h0 @ AES block 0 - mov high 2330e1051a39Sopenharmony_ci 2331e1051a39Sopenharmony_ci eor $input_l2, $input_l2, $rk12_l @ AES block 2 - round 12 low 2332e1051a39Sopenharmony_ci eor $input_l1, $input_l1, $rk12_l @ AES block 1 - round 12 low 2333e1051a39Sopenharmony_ci 2334e1051a39Sopenharmony_ci fmov $ctr_t1d, $input_l1 @ AES block 1 - mov low 2335e1051a39Sopenharmony_ci eor $input_h1, $input_h1, $rk12_h @ AES block 1 - round 12 high 2336e1051a39Sopenharmony_ci 2337e1051a39Sopenharmony_ci fmov $ctr_t1.d[1], $input_h1 @ AES block 1 - mov high 2338e1051a39Sopenharmony_ci 2339e1051a39Sopenharmony_ci eor $input_l3, $input_l3, $rk12_l @ AES block 3 - round 12 low 2340e1051a39Sopenharmony_ci fmov $ctr_t2d, $input_l2 @ AES block 2 - mov low 2341e1051a39Sopenharmony_ci 2342e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 4 2343e1051a39Sopenharmony_ci eor $res0b, $ctr_t0b, $ctr0b @ AES block 0 - result 2344e1051a39Sopenharmony_ci fmov $ctr0d, $ctr96_b64x @ CTR block 4 2345e1051a39Sopenharmony_ci 2346e1051a39Sopenharmony_ci fmov $ctr0.d[1], $ctr32x @ CTR block 4 2347e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 5 2348e1051a39Sopenharmony_ci 2349e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 5 2350e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 5 2351e1051a39Sopenharmony_ci 2352e1051a39Sopenharmony_ci fmov $ctr_t3d, $input_l3 @ AES block 3 - mov low 2353e1051a39Sopenharmony_ci st1 { $res0b}, [$output_ptr], #16 @ AES block 0 - store result 2354e1051a39Sopenharmony_ci 2355e1051a39Sopenharmony_ci fmov $ctr_t2.d[1], $input_h2 @ AES block 2 - mov high 2356e1051a39Sopenharmony_ci 2357e1051a39Sopenharmony_ci eor $res1b, $ctr_t1b, $ctr1b @ AES block 1 - result 2358e1051a39Sopenharmony_ci fmov $ctr1d, $ctr96_b64x @ CTR block 5 2359e1051a39Sopenharmony_ci st1 { $res1b}, [$output_ptr], #16 @ AES block 1 - store result 2360e1051a39Sopenharmony_ci 2361e1051a39Sopenharmony_ci fmov $ctr_t3.d[1], $input_h3 @ AES block 3 - mov high 2362e1051a39Sopenharmony_ci 2363e1051a39Sopenharmony_ci fmov $ctr1.d[1], $ctr32x @ CTR block 5 2364e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 6 2365e1051a39Sopenharmony_ci 2366e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 6 2367e1051a39Sopenharmony_ci 2368e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 6 2369e1051a39Sopenharmony_ci eor $res2b, $ctr_t2b, $ctr2b @ AES block 2 - result 2370e1051a39Sopenharmony_ci fmov $ctr2d, $ctr96_b64x @ CTR block 6 2371e1051a39Sopenharmony_ci 2372e1051a39Sopenharmony_ci fmov $ctr2.d[1], $ctr32x @ CTR block 6 2373e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 7 2374e1051a39Sopenharmony_ci 2375e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 7 2376e1051a39Sopenharmony_ci st1 { $res2b}, [$output_ptr], #16 @ AES block 2 - store result 2377e1051a39Sopenharmony_ci 2378e1051a39Sopenharmony_ci eor $res3b, $ctr_t3b, $ctr3b @ AES block 3 - result 2379e1051a39Sopenharmony_ci st1 { $res3b}, [$output_ptr], #16 @ AES block 3 - store result 2380e1051a39Sopenharmony_ci b.ge .L192_enc_prepretail @ do prepretail 2381e1051a39Sopenharmony_ci 2382e1051a39Sopenharmony_ci .L192_enc_main_loop: @ main loop start 2383e1051a39Sopenharmony_ci aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 0 2384e1051a39Sopenharmony_ci rev64 $res1b, $res1b @ GHASH block 4k+1 (t0 and t1 free) 2385e1051a39Sopenharmony_ci 2386e1051a39Sopenharmony_ci aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 0 2387e1051a39Sopenharmony_ci ldp $input_l1, $input_h1, [$input_ptr, #16] @ AES block 4k+5 - load plaintext 2388e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 2389e1051a39Sopenharmony_ci rev $input_l1, $input_l1 2390e1051a39Sopenharmony_ci rev $input_h1, $input_h1 2391e1051a39Sopenharmony_ci#endif 2392e1051a39Sopenharmony_ci ext $acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0 2393e1051a39Sopenharmony_ci fmov $ctr3d, $ctr96_b64x @ CTR block 4k+3 2394e1051a39Sopenharmony_ci rev64 $res0b, $res0b @ GHASH block 4k (only t0 is free) 2395e1051a39Sopenharmony_ci 2396e1051a39Sopenharmony_ci aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 1 2397e1051a39Sopenharmony_ci fmov $ctr3.d[1], $ctr32x @ CTR block 4k+3 2398e1051a39Sopenharmony_ci 2399e1051a39Sopenharmony_ci pmull2 $t1.1q, $res1.2d, $h3.2d @ GHASH block 4k+1 - high 2400e1051a39Sopenharmony_ci rev64 $res3b, $res3b @ GHASH block 4k+3 (t0, t1, t2 and t3 free) 2401e1051a39Sopenharmony_ci ldp $input_l2, $input_h2, [$input_ptr, #32] @ AES block 4k+6 - load plaintext 2402e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 2403e1051a39Sopenharmony_ci rev $input_l2, $input_l2 2404e1051a39Sopenharmony_ci rev $input_h2, $input_h2 2405e1051a39Sopenharmony_ci#endif 2406e1051a39Sopenharmony_ci aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 0 2407e1051a39Sopenharmony_ci ldp $input_l3, $input_h3, [$input_ptr, #48] @ AES block 4k+3 - load plaintext 2408e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 2409e1051a39Sopenharmony_ci rev $input_l3, $input_l3 2410e1051a39Sopenharmony_ci rev $input_h3, $input_h3 2411e1051a39Sopenharmony_ci#endif 2412e1051a39Sopenharmony_ci pmull $t2.1q, $res1.1d, $h3.1d @ GHASH block 4k+1 - low 2413e1051a39Sopenharmony_ci eor $res0b, $res0b, $acc_lb @ PRE 1 2414e1051a39Sopenharmony_ci 2415e1051a39Sopenharmony_ci aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 1 2416e1051a39Sopenharmony_ci 2417e1051a39Sopenharmony_ci aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 1 2418e1051a39Sopenharmony_ci rev64 $res2b, $res2b @ GHASH block 4k+2 (t0, t1, and t2 free) 2419e1051a39Sopenharmony_ci 2420e1051a39Sopenharmony_ci aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 0 2421e1051a39Sopenharmony_ci eor $input_h3, $input_h3, $rk12_h @ AES block 4k+3 - round 12 high 2422e1051a39Sopenharmony_ci 2423e1051a39Sopenharmony_ci pmull $acc_l.1q, $res0.1d, $h4.1d @ GHASH block 4k - low 2424e1051a39Sopenharmony_ci mov $t0d, $res0.d[1] @ GHASH block 4k - mid 2425e1051a39Sopenharmony_ci 2426e1051a39Sopenharmony_ci aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 2 2427e1051a39Sopenharmony_ci 2428e1051a39Sopenharmony_ci aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 1 2429e1051a39Sopenharmony_ci eor $input_l2, $input_l2, $rk12_l @ AES block 4k+6 - round 12 low 2430e1051a39Sopenharmony_ci 2431e1051a39Sopenharmony_ci eor $t0.8b, $t0.8b, $res0.8b @ GHASH block 4k - mid 2432e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $t2.16b @ GHASH block 4k+1 - low 2433e1051a39Sopenharmony_ci 2434e1051a39Sopenharmony_ci aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 3 2435e1051a39Sopenharmony_ci eor $input_l1, $input_l1, $rk12_l @ AES block 4k+5 - round 12 low 2436e1051a39Sopenharmony_ci 2437e1051a39Sopenharmony_ci aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 2 2438e1051a39Sopenharmony_ci mov $t6d, $res2.d[1] @ GHASH block 4k+2 - mid 2439e1051a39Sopenharmony_ci 2440e1051a39Sopenharmony_ci pmull2 $acc_h.1q, $res0.2d, $h4.2d @ GHASH block 4k - high 2441e1051a39Sopenharmony_ci mov $t3d, $res1.d[1] @ GHASH block 4k+1 - mid 2442e1051a39Sopenharmony_ci 2443e1051a39Sopenharmony_ci aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 2 2444e1051a39Sopenharmony_ci 2445e1051a39Sopenharmony_ci aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 3 2446e1051a39Sopenharmony_ci 2447e1051a39Sopenharmony_ci mov $acc_md, $h34k.d[1] @ GHASH block 4k - mid 2448e1051a39Sopenharmony_ci eor $acc_hb, $acc_hb, $t1.16b @ GHASH block 4k+1 - high 2449e1051a39Sopenharmony_ci 2450e1051a39Sopenharmony_ci aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 2 2451e1051a39Sopenharmony_ci eor $t6.8b, $t6.8b, $res2.8b @ GHASH block 4k+2 - mid 2452e1051a39Sopenharmony_ci 2453e1051a39Sopenharmony_ci pmull2 $t4.1q, $res2.2d, $h2.2d @ GHASH block 4k+2 - high 2454e1051a39Sopenharmony_ci 2455e1051a39Sopenharmony_ci aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 4 2456e1051a39Sopenharmony_ci eor $t3.8b, $t3.8b, $res1.8b @ GHASH block 4k+1 - mid 2457e1051a39Sopenharmony_ci 2458e1051a39Sopenharmony_ci aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 3 2459e1051a39Sopenharmony_ci 2460e1051a39Sopenharmony_ci pmull2 $t7.1q, $res3.2d, $h1.2d @ GHASH block 4k+3 - high 2461e1051a39Sopenharmony_ci eor $input_h1, $input_h1, $rk12_h @ AES block 4k+5 - round 12 high 2462e1051a39Sopenharmony_ci ins $t6.d[1], $t6.d[0] @ GHASH block 4k+2 - mid 2463e1051a39Sopenharmony_ci 2464e1051a39Sopenharmony_ci aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 5 2465e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 4k+3 2466e1051a39Sopenharmony_ci 2467e1051a39Sopenharmony_ci aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 4 2468e1051a39Sopenharmony_ci eor $acc_hb, $acc_hb, $t4.16b @ GHASH block 4k+2 - high 2469e1051a39Sopenharmony_ci 2470e1051a39Sopenharmony_ci pmull $t3.1q, $t3.1d, $h34k.1d @ GHASH block 4k+1 - mid 2471e1051a39Sopenharmony_ci eor $input_h2, $input_h2, $rk12_h @ AES block 4k+6 - round 12 high 2472e1051a39Sopenharmony_ci 2473e1051a39Sopenharmony_ci pmull2 $t6.1q, $t6.2d, $h12k.2d @ GHASH block 4k+2 - mid 2474e1051a39Sopenharmony_ci eor $input_l3, $input_l3, $rk12_l @ AES block 4k+3 - round 12 low 2475e1051a39Sopenharmony_ci mov $t9d, $res3.d[1] @ GHASH block 4k+3 - mid 2476e1051a39Sopenharmony_ci 2477e1051a39Sopenharmony_ci pmull $acc_m.1q, $t0.1d, $acc_m.1d @ GHASH block 4k - mid 2478e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 4k+8 2479e1051a39Sopenharmony_ci 2480e1051a39Sopenharmony_ci pmull $t5.1q, $res2.1d, $h2.1d @ GHASH block 4k+2 - low 2481e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 4k+8 2482e1051a39Sopenharmony_ci 2483e1051a39Sopenharmony_ci aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 3 2484e1051a39Sopenharmony_ci eor $t9.8b, $t9.8b, $res3.8b @ GHASH block 4k+3 - mid 2485e1051a39Sopenharmony_ci 2486e1051a39Sopenharmony_ci aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 4 2487e1051a39Sopenharmony_ci ldp $input_l0, $input_h0, [$input_ptr, #0] @ AES block 4k+4 - load plaintext 2488e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 2489e1051a39Sopenharmony_ci rev $input_l0, $input_l0 2490e1051a39Sopenharmony_ci rev $input_h0, $input_h0 2491e1051a39Sopenharmony_ci#endif 2492e1051a39Sopenharmony_ci aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 6 2493e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $t5.16b @ GHASH block 4k+2 - low 2494e1051a39Sopenharmony_ci 2495e1051a39Sopenharmony_ci aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 4 2496e1051a39Sopenharmony_ci add $input_ptr, $input_ptr, #64 @ AES input_ptr update 2497e1051a39Sopenharmony_ci 2498e1051a39Sopenharmony_ci aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 5 2499e1051a39Sopenharmony_ci movi $mod_constant.8b, #0xc2 2500e1051a39Sopenharmony_ci 2501e1051a39Sopenharmony_ci pmull $t8.1q, $res3.1d, $h1.1d @ GHASH block 4k+3 - low 2502e1051a39Sopenharmony_ci eor $input_h0, $input_h0, $rk12_h @ AES block 4k+4 - round 12 high 2503e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t3.16b @ GHASH block 4k+1 - mid 2504e1051a39Sopenharmony_ci 2505e1051a39Sopenharmony_ci aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 5 2506e1051a39Sopenharmony_ci eor $input_l0, $input_l0, $rk12_l @ AES block 4k+4 - round 12 low 2507e1051a39Sopenharmony_ci 2508e1051a39Sopenharmony_ci aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 6 2509e1051a39Sopenharmony_ci shl $mod_constantd, $mod_constantd, #56 @ mod_constant 2510e1051a39Sopenharmony_ci 2511e1051a39Sopenharmony_ci aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 5 2512e1051a39Sopenharmony_ci eor $acc_hb, $acc_hb, $t7.16b @ GHASH block 4k+3 - high 2513e1051a39Sopenharmony_ci 2514e1051a39Sopenharmony_ci aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 7 2515e1051a39Sopenharmony_ci fmov $ctr_t1d, $input_l1 @ AES block 4k+5 - mov low 2516e1051a39Sopenharmony_ci 2517e1051a39Sopenharmony_ci aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 7 2518e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t6.16b @ GHASH block 4k+2 - mid 2519e1051a39Sopenharmony_ci 2520e1051a39Sopenharmony_ci aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 6 2521e1051a39Sopenharmony_ci fmov $ctr_t1.d[1], $input_h1 @ AES block 4k+5 - mov high 2522e1051a39Sopenharmony_ci 2523e1051a39Sopenharmony_ci aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 8 2524e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $t8.16b @ GHASH block 4k+3 - low 2525e1051a39Sopenharmony_ci 2526e1051a39Sopenharmony_ci pmull $t9.1q, $t9.1d, $h12k.1d @ GHASH block 4k+3 - mid 2527e1051a39Sopenharmony_ci cmp $input_ptr, $main_end_input_ptr @ LOOP CONTROL 2528e1051a39Sopenharmony_ci fmov $ctr_t0d, $input_l0 @ AES block 4k+4 - mov low 2529e1051a39Sopenharmony_ci 2530e1051a39Sopenharmony_ci aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 6 2531e1051a39Sopenharmony_ci fmov $ctr_t0.d[1], $input_h0 @ AES block 4k+4 - mov high 2532e1051a39Sopenharmony_ci 2533e1051a39Sopenharmony_ci aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 8 2534e1051a39Sopenharmony_ci fmov $ctr_t3d, $input_l3 @ AES block 4k+3 - mov low 2535e1051a39Sopenharmony_ci 2536e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t9.16b @ GHASH block 4k+3 - mid 2537e1051a39Sopenharmony_ci eor $t9.16b, $acc_lb, $acc_hb @ MODULO - karatsuba tidy up 2538e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 4k+8 2539e1051a39Sopenharmony_ci 2540e1051a39Sopenharmony_ci aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 7 2541e1051a39Sopenharmony_ci fmov $ctr_t3.d[1], $input_h3 @ AES block 4k+3 - mov high 2542e1051a39Sopenharmony_ci 2543e1051a39Sopenharmony_ci pmull $mod_t.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid 2544e1051a39Sopenharmony_ci ext $acc_hb, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment 2545e1051a39Sopenharmony_ci fmov $ctr_t2d, $input_l2 @ AES block 4k+6 - mov low 2546e1051a39Sopenharmony_ci 2547e1051a39Sopenharmony_ci aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 7 2548e1051a39Sopenharmony_ci 2549e1051a39Sopenharmony_ci aese $ctr0b, $rk9 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 9 2550e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t9.16b @ MODULO - karatsuba tidy up 2551e1051a39Sopenharmony_ci 2552e1051a39Sopenharmony_ci aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 8 2553e1051a39Sopenharmony_ci 2554e1051a39Sopenharmony_ci aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 8 2555e1051a39Sopenharmony_ci 2556e1051a39Sopenharmony_ci aese $ctr1b, $rk9 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 9 2557e1051a39Sopenharmony_ci 2558e1051a39Sopenharmony_ci aese $ctr0b, $rk10 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 10 2559e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $mod_t.16b @ MODULO - fold into mid 2560e1051a39Sopenharmony_ci 2561e1051a39Sopenharmony_ci aese $ctr3b, $rk9 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 9 2562e1051a39Sopenharmony_ci 2563e1051a39Sopenharmony_ci aese $ctr2b, $rk9 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 9 2564e1051a39Sopenharmony_ci 2565e1051a39Sopenharmony_ci aese $ctr0b, $rk11 @ AES block 4k+4 - round 11 2566e1051a39Sopenharmony_ci 2567e1051a39Sopenharmony_ci aese $ctr1b, $rk10 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 10 2568e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $acc_hb @ MODULO - fold into mid 2569e1051a39Sopenharmony_ci 2570e1051a39Sopenharmony_ci aese $ctr2b, $rk10 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 10 2571e1051a39Sopenharmony_ci 2572e1051a39Sopenharmony_ci eor $res0b, $ctr_t0b, $ctr0b @ AES block 4k+4 - result 2573e1051a39Sopenharmony_ci fmov $ctr0d, $ctr96_b64x @ CTR block 4k+8 2574e1051a39Sopenharmony_ci 2575e1051a39Sopenharmony_ci aese $ctr1b, $rk11 @ AES block 4k+5 - round 11 2576e1051a39Sopenharmony_ci fmov $ctr0.d[1], $ctr32x @ CTR block 4k+8 2577e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 4k+9 2578e1051a39Sopenharmony_ci 2579e1051a39Sopenharmony_ci pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low 2580e1051a39Sopenharmony_ci fmov $ctr_t2.d[1], $input_h2 @ AES block 4k+6 - mov high 2581e1051a39Sopenharmony_ci st1 { $res0b}, [$output_ptr], #16 @ AES block 4k+4 - store result 2582e1051a39Sopenharmony_ci 2583e1051a39Sopenharmony_ci aese $ctr3b, $rk10 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 10 2584e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 4k+9 2585e1051a39Sopenharmony_ci 2586e1051a39Sopenharmony_ci eor $res1b, $ctr_t1b, $ctr1b @ AES block 4k+5 - result 2587e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 4k+9 2588e1051a39Sopenharmony_ci fmov $ctr1d, $ctr96_b64x @ CTR block 4k+9 2589e1051a39Sopenharmony_ci 2590e1051a39Sopenharmony_ci aese $ctr2b, $rk11 @ AES block 4k+6 - round 11 2591e1051a39Sopenharmony_ci fmov $ctr1.d[1], $ctr32x @ CTR block 4k+9 2592e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 4k+10 2593e1051a39Sopenharmony_ci 2594e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 4k+10 2595e1051a39Sopenharmony_ci ext $acc_mb, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment 2596e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 4k+10 2597e1051a39Sopenharmony_ci 2598e1051a39Sopenharmony_ci st1 { $res1b}, [$output_ptr], #16 @ AES block 4k+5 - store result 2599e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $acc_hb @ MODULO - fold into low 2600e1051a39Sopenharmony_ci 2601e1051a39Sopenharmony_ci aese $ctr3b, $rk11 @ AES block 4k+7 - round 11 2602e1051a39Sopenharmony_ci eor $res2b, $ctr_t2b, $ctr2b @ AES block 4k+6 - result 2603e1051a39Sopenharmony_ci fmov $ctr2d, $ctr96_b64x @ CTR block 4k+10 2604e1051a39Sopenharmony_ci 2605e1051a39Sopenharmony_ci st1 { $res2b}, [$output_ptr], #16 @ AES block 4k+6 - store result 2606e1051a39Sopenharmony_ci fmov $ctr2.d[1], $ctr32x @ CTR block 4k+10 2607e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 4k+11 2608e1051a39Sopenharmony_ci 2609e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $acc_mb @ MODULO - fold into low 2610e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 4k+11 2611e1051a39Sopenharmony_ci 2612e1051a39Sopenharmony_ci eor $res3b, $ctr_t3b, $ctr3b @ AES block 4k+3 - result 2613e1051a39Sopenharmony_ci st1 { $res3b}, [$output_ptr], #16 @ AES block 4k+3 - store result 2614e1051a39Sopenharmony_ci b.lt .L192_enc_main_loop 2615e1051a39Sopenharmony_ci 2616e1051a39Sopenharmony_ci .L192_enc_prepretail: @ PREPRETAIL 2617e1051a39Sopenharmony_ci aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 0 2618e1051a39Sopenharmony_ci rev64 $res0b, $res0b @ GHASH block 4k (only t0 is free) 2619e1051a39Sopenharmony_ci 2620e1051a39Sopenharmony_ci fmov $ctr3d, $ctr96_b64x @ CTR block 4k+3 2621e1051a39Sopenharmony_ci ext $acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0 2622e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 4k+3 2623e1051a39Sopenharmony_ci 2624e1051a39Sopenharmony_ci aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 0 2625e1051a39Sopenharmony_ci rev64 $res1b, $res1b @ GHASH block 4k+1 (t0 and t1 free) 2626e1051a39Sopenharmony_ci 2627e1051a39Sopenharmony_ci aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 0 2628e1051a39Sopenharmony_ci 2629e1051a39Sopenharmony_ci fmov $ctr3.d[1], $ctr32x @ CTR block 4k+3 2630e1051a39Sopenharmony_ci eor $res0b, $res0b, $acc_lb @ PRE 1 2631e1051a39Sopenharmony_ci mov $acc_md, $h34k.d[1] @ GHASH block 4k - mid 2632e1051a39Sopenharmony_ci 2633e1051a39Sopenharmony_ci aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 1 2634e1051a39Sopenharmony_ci rev64 $res2b, $res2b @ GHASH block 4k+2 (t0, t1, and t2 free) 2635e1051a39Sopenharmony_ci 2636e1051a39Sopenharmony_ci pmull2 $t1.1q, $res1.2d, $h3.2d @ GHASH block 4k+1 - high 2637e1051a39Sopenharmony_ci 2638e1051a39Sopenharmony_ci pmull $acc_l.1q, $res0.1d, $h4.1d @ GHASH block 4k - low 2639e1051a39Sopenharmony_ci mov $t0d, $res0.d[1] @ GHASH block 4k - mid 2640e1051a39Sopenharmony_ci 2641e1051a39Sopenharmony_ci pmull $t2.1q, $res1.1d, $h3.1d @ GHASH block 4k+1 - low 2642e1051a39Sopenharmony_ci rev64 $res3b, $res3b @ GHASH block 4k+3 (t0, t1, t2 and t3 free) 2643e1051a39Sopenharmony_ci 2644e1051a39Sopenharmony_ci pmull2 $acc_h.1q, $res0.2d, $h4.2d @ GHASH block 4k - high 2645e1051a39Sopenharmony_ci 2646e1051a39Sopenharmony_ci eor $t0.8b, $t0.8b, $res0.8b @ GHASH block 4k - mid 2647e1051a39Sopenharmony_ci mov $t3d, $res1.d[1] @ GHASH block 4k+1 - mid 2648e1051a39Sopenharmony_ci 2649e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $t2.16b @ GHASH block 4k+1 - low 2650e1051a39Sopenharmony_ci mov $t6d, $res2.d[1] @ GHASH block 4k+2 - mid 2651e1051a39Sopenharmony_ci 2652e1051a39Sopenharmony_ci aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 0 2653e1051a39Sopenharmony_ci eor $acc_hb, $acc_hb, $t1.16b @ GHASH block 4k+1 - high 2654e1051a39Sopenharmony_ci 2655e1051a39Sopenharmony_ci pmull2 $t4.1q, $res2.2d, $h2.2d @ GHASH block 4k+2 - high 2656e1051a39Sopenharmony_ci 2657e1051a39Sopenharmony_ci eor $t3.8b, $t3.8b, $res1.8b @ GHASH block 4k+1 - mid 2658e1051a39Sopenharmony_ci eor $t6.8b, $t6.8b, $res2.8b @ GHASH block 4k+2 - mid 2659e1051a39Sopenharmony_ci 2660e1051a39Sopenharmony_ci aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 1 2661e1051a39Sopenharmony_ci 2662e1051a39Sopenharmony_ci aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 1 2663e1051a39Sopenharmony_ci eor $acc_hb, $acc_hb, $t4.16b @ GHASH block 4k+2 - high 2664e1051a39Sopenharmony_ci 2665e1051a39Sopenharmony_ci aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 1 2666e1051a39Sopenharmony_ci 2667e1051a39Sopenharmony_ci aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 2 2668e1051a39Sopenharmony_ci mov $t9d, $res3.d[1] @ GHASH block 4k+3 - mid 2669e1051a39Sopenharmony_ci 2670e1051a39Sopenharmony_ci pmull2 $t7.1q, $res3.2d, $h1.2d @ GHASH block 4k+3 - high 2671e1051a39Sopenharmony_ci ins $t6.d[1], $t6.d[0] @ GHASH block 4k+2 - mid 2672e1051a39Sopenharmony_ci 2673e1051a39Sopenharmony_ci aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 2 2674e1051a39Sopenharmony_ci 2675e1051a39Sopenharmony_ci pmull $acc_m.1q, $t0.1d, $acc_m.1d @ GHASH block 4k - mid 2676e1051a39Sopenharmony_ci eor $t9.8b, $t9.8b, $res3.8b @ GHASH block 4k+3 - mid 2677e1051a39Sopenharmony_ci 2678e1051a39Sopenharmony_ci aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 3 2679e1051a39Sopenharmony_ci 2680e1051a39Sopenharmony_ci pmull2 $t6.1q, $t6.2d, $h12k.2d @ GHASH block 4k+2 - mid 2681e1051a39Sopenharmony_ci 2682e1051a39Sopenharmony_ci pmull $t3.1q, $t3.1d, $h34k.1d @ GHASH block 4k+1 - mid 2683e1051a39Sopenharmony_ci 2684e1051a39Sopenharmony_ci pmull $t9.1q, $t9.1d, $h12k.1d @ GHASH block 4k+3 - mid 2685e1051a39Sopenharmony_ci eor $acc_hb, $acc_hb, $t7.16b @ GHASH block 4k+3 - high 2686e1051a39Sopenharmony_ci 2687e1051a39Sopenharmony_ci pmull $t5.1q, $res2.1d, $h2.1d @ GHASH block 4k+2 - low 2688e1051a39Sopenharmony_ci 2689e1051a39Sopenharmony_ci aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 3 2690e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t3.16b @ GHASH block 4k+1 - mid 2691e1051a39Sopenharmony_ci 2692e1051a39Sopenharmony_ci aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 2 2693e1051a39Sopenharmony_ci 2694e1051a39Sopenharmony_ci aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 2 2695e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $t5.16b @ GHASH block 4k+2 - low 2696e1051a39Sopenharmony_ci 2697e1051a39Sopenharmony_ci aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 4 2698e1051a39Sopenharmony_ci 2699e1051a39Sopenharmony_ci aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 3 2700e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t6.16b @ GHASH block 4k+2 - mid 2701e1051a39Sopenharmony_ci 2702e1051a39Sopenharmony_ci aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 3 2703e1051a39Sopenharmony_ci 2704e1051a39Sopenharmony_ci pmull $t8.1q, $res3.1d, $h1.1d @ GHASH block 4k+3 - low 2705e1051a39Sopenharmony_ci movi $mod_constant.8b, #0xc2 2706e1051a39Sopenharmony_ci 2707e1051a39Sopenharmony_ci aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 4 2708e1051a39Sopenharmony_ci 2709e1051a39Sopenharmony_ci aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 4 2710e1051a39Sopenharmony_ci 2711e1051a39Sopenharmony_ci aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 4 2712e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t9.16b @ GHASH block 4k+3 - mid 2713e1051a39Sopenharmony_ci 2714e1051a39Sopenharmony_ci aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 5 2715e1051a39Sopenharmony_ci 2716e1051a39Sopenharmony_ci aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 5 2717e1051a39Sopenharmony_ci 2718e1051a39Sopenharmony_ci aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 5 2719e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $t8.16b @ GHASH block 4k+3 - low 2720e1051a39Sopenharmony_ci 2721e1051a39Sopenharmony_ci aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 5 2722e1051a39Sopenharmony_ci 2723e1051a39Sopenharmony_ci aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 6 2724e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $acc_hb @ karatsuba tidy up 2725e1051a39Sopenharmony_ci 2726e1051a39Sopenharmony_ci aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 6 2727e1051a39Sopenharmony_ci 2728e1051a39Sopenharmony_ci aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 6 2729e1051a39Sopenharmony_ci shl $mod_constantd, $mod_constantd, #56 @ mod_constant 2730e1051a39Sopenharmony_ci 2731e1051a39Sopenharmony_ci aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 7 2732e1051a39Sopenharmony_ci 2733e1051a39Sopenharmony_ci aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 7 2734e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $acc_lb 2735e1051a39Sopenharmony_ci 2736e1051a39Sopenharmony_ci aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 7 2737e1051a39Sopenharmony_ci 2738e1051a39Sopenharmony_ci pmull $t1.1q, $acc_h.1d, $mod_constant.1d 2739e1051a39Sopenharmony_ci 2740e1051a39Sopenharmony_ci aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 6 2741e1051a39Sopenharmony_ci ext $acc_hb, $acc_hb, $acc_hb, #8 2742e1051a39Sopenharmony_ci 2743e1051a39Sopenharmony_ci aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 8 2744e1051a39Sopenharmony_ci 2745e1051a39Sopenharmony_ci aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 8 2746e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t1.16b 2747e1051a39Sopenharmony_ci 2748e1051a39Sopenharmony_ci aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 7 2749e1051a39Sopenharmony_ci 2750e1051a39Sopenharmony_ci aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 8 2751e1051a39Sopenharmony_ci 2752e1051a39Sopenharmony_ci aese $ctr0b, $rk9 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 9 2753e1051a39Sopenharmony_ci 2754e1051a39Sopenharmony_ci aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 8 2755e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $acc_hb 2756e1051a39Sopenharmony_ci 2757e1051a39Sopenharmony_ci aese $ctr3b, $rk9 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 9 2758e1051a39Sopenharmony_ci 2759e1051a39Sopenharmony_ci aese $ctr1b, $rk9 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 9 2760e1051a39Sopenharmony_ci 2761e1051a39Sopenharmony_ci aese $ctr2b, $rk9 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 9 2762e1051a39Sopenharmony_ci 2763e1051a39Sopenharmony_ci pmull $t1.1q, $acc_m.1d, $mod_constant.1d 2764e1051a39Sopenharmony_ci 2765e1051a39Sopenharmony_ci ext $acc_mb, $acc_mb, $acc_mb, #8 2766e1051a39Sopenharmony_ci 2767e1051a39Sopenharmony_ci aese $ctr3b, $rk10 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 10 2768e1051a39Sopenharmony_ci 2769e1051a39Sopenharmony_ci aese $ctr0b, $rk10 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 10 2770e1051a39Sopenharmony_ci 2771e1051a39Sopenharmony_ci aese $ctr2b, $rk10 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 10 2772e1051a39Sopenharmony_ci 2773e1051a39Sopenharmony_ci aese $ctr1b, $rk10 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 10 2774e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $t1.16b 2775e1051a39Sopenharmony_ci 2776e1051a39Sopenharmony_ci aese $ctr0b, $rk11 @ AES block 4k+4 - round 11 2777e1051a39Sopenharmony_ci 2778e1051a39Sopenharmony_ci aese $ctr3b, $rk11 @ AES block 4k+7 - round 11 2779e1051a39Sopenharmony_ci 2780e1051a39Sopenharmony_ci aese $ctr2b, $rk11 @ AES block 4k+6 - round 11 2781e1051a39Sopenharmony_ci 2782e1051a39Sopenharmony_ci aese $ctr1b, $rk11 @ AES block 4k+5 - round 11 2783e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $acc_mb 2784e1051a39Sopenharmony_ci .L192_enc_tail: @ TAIL 2785e1051a39Sopenharmony_ci 2786e1051a39Sopenharmony_ci sub $main_end_input_ptr, $end_input_ptr, $input_ptr @ main_end_input_ptr is number of bytes left to process 2787e1051a39Sopenharmony_ci ldp $input_l0, $input_h0, [$input_ptr], #16 @ AES block 4k+4 - load plaintext 2788e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 2789e1051a39Sopenharmony_ci rev $input_l0, $input_l0 2790e1051a39Sopenharmony_ci rev $input_h0, $input_h0 2791e1051a39Sopenharmony_ci#endif 2792e1051a39Sopenharmony_ci eor $input_l0, $input_l0, $rk12_l @ AES block 4k+4 - round 12 low 2793e1051a39Sopenharmony_ci eor $input_h0, $input_h0, $rk12_h @ AES block 4k+4 - round 12 high 2794e1051a39Sopenharmony_ci 2795e1051a39Sopenharmony_ci fmov $ctr_t0d, $input_l0 @ AES block 4k+4 - mov low 2796e1051a39Sopenharmony_ci 2797e1051a39Sopenharmony_ci fmov $ctr_t0.d[1], $input_h0 @ AES block 4k+4 - mov high 2798e1051a39Sopenharmony_ci cmp $main_end_input_ptr, #48 2799e1051a39Sopenharmony_ci 2800e1051a39Sopenharmony_ci eor $res1b, $ctr_t0b, $ctr0b @ AES block 4k+4 - result 2801e1051a39Sopenharmony_ci 2802e1051a39Sopenharmony_ci ext $t0.16b, $acc_lb, $acc_lb, #8 @ prepare final partial tag 2803e1051a39Sopenharmony_ci b.gt .L192_enc_blocks_more_than_3 2804e1051a39Sopenharmony_ci 2805e1051a39Sopenharmony_ci sub $rctr32w, $rctr32w, #1 2806e1051a39Sopenharmony_ci movi $acc_m.8b, #0 2807e1051a39Sopenharmony_ci 2808e1051a39Sopenharmony_ci mov $ctr3b, $ctr2b 2809e1051a39Sopenharmony_ci movi $acc_h.8b, #0 2810e1051a39Sopenharmony_ci cmp $main_end_input_ptr, #32 2811e1051a39Sopenharmony_ci 2812e1051a39Sopenharmony_ci mov $ctr2b, $ctr1b 2813e1051a39Sopenharmony_ci movi $acc_l.8b, #0 2814e1051a39Sopenharmony_ci b.gt .L192_enc_blocks_more_than_2 2815e1051a39Sopenharmony_ci 2816e1051a39Sopenharmony_ci sub $rctr32w, $rctr32w, #1 2817e1051a39Sopenharmony_ci 2818e1051a39Sopenharmony_ci mov $ctr3b, $ctr1b 2819e1051a39Sopenharmony_ci cmp $main_end_input_ptr, #16 2820e1051a39Sopenharmony_ci b.gt .L192_enc_blocks_more_than_1 2821e1051a39Sopenharmony_ci 2822e1051a39Sopenharmony_ci sub $rctr32w, $rctr32w, #1 2823e1051a39Sopenharmony_ci b .L192_enc_blocks_less_than_1 2824e1051a39Sopenharmony_ci .L192_enc_blocks_more_than_3: @ blocks left > 3 2825e1051a39Sopenharmony_ci st1 { $res1b}, [$output_ptr], #16 @ AES final-3 block - store result 2826e1051a39Sopenharmony_ci 2827e1051a39Sopenharmony_ci ldp $input_l0, $input_h0, [$input_ptr], #16 @ AES final-2 block - load input low & high 2828e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 2829e1051a39Sopenharmony_ci rev $input_l0, $input_l0 2830e1051a39Sopenharmony_ci rev $input_h0, $input_h0 2831e1051a39Sopenharmony_ci#endif 2832e1051a39Sopenharmony_ci rev64 $res0b, $res1b @ GHASH final-3 block 2833e1051a39Sopenharmony_ci 2834e1051a39Sopenharmony_ci eor $input_l0, $input_l0, $rk12_l @ AES final-2 block - round 12 low 2835e1051a39Sopenharmony_ci eor $res0b, $res0b, $t0.16b @ feed in partial tag 2836e1051a39Sopenharmony_ci 2837e1051a39Sopenharmony_ci eor $input_h0, $input_h0, $rk12_h @ AES final-2 block - round 12 high 2838e1051a39Sopenharmony_ci fmov $res1d, $input_l0 @ AES final-2 block - mov low 2839e1051a39Sopenharmony_ci 2840e1051a39Sopenharmony_ci fmov $res1.d[1], $input_h0 @ AES final-2 block - mov high 2841e1051a39Sopenharmony_ci 2842e1051a39Sopenharmony_ci mov $rk4d, $res0.d[1] @ GHASH final-3 block - mid 2843e1051a39Sopenharmony_ci 2844e1051a39Sopenharmony_ci pmull $acc_l.1q, $res0.1d, $h4.1d @ GHASH final-3 block - low 2845e1051a39Sopenharmony_ci 2846e1051a39Sopenharmony_ci mov $acc_md, $h34k.d[1] @ GHASH final-3 block - mid 2847e1051a39Sopenharmony_ci 2848e1051a39Sopenharmony_ci eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-3 block - mid 2849e1051a39Sopenharmony_ci 2850e1051a39Sopenharmony_ci movi $t0.8b, #0 @ suppress further partial tag feed in 2851e1051a39Sopenharmony_ci 2852e1051a39Sopenharmony_ci pmull2 $acc_h.1q, $res0.2d, $h4.2d @ GHASH final-3 block - high 2853e1051a39Sopenharmony_ci 2854e1051a39Sopenharmony_ci pmull $acc_m.1q, $rk4v.1d, $acc_m.1d @ GHASH final-3 block - mid 2855e1051a39Sopenharmony_ci eor $res1b, $res1b, $ctr1b @ AES final-2 block - result 2856e1051a39Sopenharmony_ci .L192_enc_blocks_more_than_2: @ blocks left > 2 2857e1051a39Sopenharmony_ci 2858e1051a39Sopenharmony_ci st1 { $res1b}, [$output_ptr], #16 @ AES final-2 block - store result 2859e1051a39Sopenharmony_ci 2860e1051a39Sopenharmony_ci rev64 $res0b, $res1b @ GHASH final-2 block 2861e1051a39Sopenharmony_ci ldp $input_l0, $input_h0, [$input_ptr], #16 @ AES final-1 block - load input low & high 2862e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 2863e1051a39Sopenharmony_ci rev $input_l0, $input_l0 2864e1051a39Sopenharmony_ci rev $input_h0, $input_h0 2865e1051a39Sopenharmony_ci#endif 2866e1051a39Sopenharmony_ci eor $res0b, $res0b, $t0.16b @ feed in partial tag 2867e1051a39Sopenharmony_ci 2868e1051a39Sopenharmony_ci eor $input_h0, $input_h0, $rk12_h @ AES final-1 block - round 12 high 2869e1051a39Sopenharmony_ci 2870e1051a39Sopenharmony_ci pmull2 $rk2q1, $res0.2d, $h3.2d @ GHASH final-2 block - high 2871e1051a39Sopenharmony_ci mov $rk4d, $res0.d[1] @ GHASH final-2 block - mid 2872e1051a39Sopenharmony_ci 2873e1051a39Sopenharmony_ci pmull $rk3q1, $res0.1d, $h3.1d @ GHASH final-2 block - low 2874e1051a39Sopenharmony_ci eor $input_l0, $input_l0, $rk12_l @ AES final-1 block - round 12 low 2875e1051a39Sopenharmony_ci 2876e1051a39Sopenharmony_ci fmov $res1d, $input_l0 @ AES final-1 block - mov low 2877e1051a39Sopenharmony_ci 2878e1051a39Sopenharmony_ci fmov $res1.d[1], $input_h0 @ AES final-1 block - mov high 2879e1051a39Sopenharmony_ci eor $acc_hb, $acc_hb, $rk2 @ GHASH final-2 block - high 2880e1051a39Sopenharmony_ci eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-2 block - mid 2881e1051a39Sopenharmony_ci 2882e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $rk3 @ GHASH final-2 block - low 2883e1051a39Sopenharmony_ci 2884e1051a39Sopenharmony_ci pmull $rk4v.1q, $rk4v.1d, $h34k.1d @ GHASH final-2 block - mid 2885e1051a39Sopenharmony_ci 2886e1051a39Sopenharmony_ci movi $t0.8b, #0 @ suppress further partial tag feed in 2887e1051a39Sopenharmony_ci 2888e1051a39Sopenharmony_ci eor $res1b, $res1b, $ctr2b @ AES final-1 block - result 2889e1051a39Sopenharmony_ci 2890e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-2 block - mid 2891e1051a39Sopenharmony_ci .L192_enc_blocks_more_than_1: @ blocks left > 1 2892e1051a39Sopenharmony_ci 2893e1051a39Sopenharmony_ci st1 { $res1b}, [$output_ptr], #16 @ AES final-1 block - store result 2894e1051a39Sopenharmony_ci 2895e1051a39Sopenharmony_ci ldp $input_l0, $input_h0, [$input_ptr], #16 @ AES final block - load input low & high 2896e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 2897e1051a39Sopenharmony_ci rev $input_l0, $input_l0 2898e1051a39Sopenharmony_ci rev $input_h0, $input_h0 2899e1051a39Sopenharmony_ci#endif 2900e1051a39Sopenharmony_ci rev64 $res0b, $res1b @ GHASH final-1 block 2901e1051a39Sopenharmony_ci 2902e1051a39Sopenharmony_ci eor $input_l0, $input_l0, $rk12_l @ AES final block - round 12 low 2903e1051a39Sopenharmony_ci eor $res0b, $res0b, $t0.16b @ feed in partial tag 2904e1051a39Sopenharmony_ci movi $t0.8b, #0 @ suppress further partial tag feed in 2905e1051a39Sopenharmony_ci 2906e1051a39Sopenharmony_ci mov $rk4d, $res0.d[1] @ GHASH final-1 block - mid 2907e1051a39Sopenharmony_ci 2908e1051a39Sopenharmony_ci eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-1 block - mid 2909e1051a39Sopenharmony_ci eor $input_h0, $input_h0, $rk12_h @ AES final block - round 12 high 2910e1051a39Sopenharmony_ci fmov $res1d, $input_l0 @ AES final block - mov low 2911e1051a39Sopenharmony_ci 2912e1051a39Sopenharmony_ci pmull2 $rk2q1, $res0.2d, $h2.2d @ GHASH final-1 block - high 2913e1051a39Sopenharmony_ci fmov $res1.d[1], $input_h0 @ AES final block - mov high 2914e1051a39Sopenharmony_ci 2915e1051a39Sopenharmony_ci ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-1 block - mid 2916e1051a39Sopenharmony_ci 2917e1051a39Sopenharmony_ci eor $acc_hb, $acc_hb, $rk2 @ GHASH final-1 block - high 2918e1051a39Sopenharmony_ci 2919e1051a39Sopenharmony_ci pmull $rk3q1, $res0.1d, $h2.1d @ GHASH final-1 block - low 2920e1051a39Sopenharmony_ci 2921e1051a39Sopenharmony_ci pmull2 $rk4v.1q, $rk4v.2d, $h12k.2d @ GHASH final-1 block - mid 2922e1051a39Sopenharmony_ci 2923e1051a39Sopenharmony_ci eor $res1b, $res1b, $ctr3b @ AES final block - result 2924e1051a39Sopenharmony_ci 2925e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $rk3 @ GHASH final-1 block - low 2926e1051a39Sopenharmony_ci 2927e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-1 block - mid 2928e1051a39Sopenharmony_ci .L192_enc_blocks_less_than_1: @ blocks left <= 1 2929e1051a39Sopenharmony_ci 2930e1051a39Sopenharmony_ci ld1 { $rk0}, [$output_ptr] @ load existing bytes where the possibly partial last block is to be stored 2931e1051a39Sopenharmony_ci#ifndef __AARCH64EB__ 2932e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w 2933e1051a39Sopenharmony_ci#else 2934e1051a39Sopenharmony_ci mov $ctr32w, $rctr32w 2935e1051a39Sopenharmony_ci#endif 2936e1051a39Sopenharmony_ci and $bit_length, $bit_length, #127 @ bit_length %= 128 2937e1051a39Sopenharmony_ci 2938e1051a39Sopenharmony_ci sub $bit_length, $bit_length, #128 @ bit_length -= 128 2939e1051a39Sopenharmony_ci mvn $rk12_h, xzr @ rk12_h = 0xffffffffffffffff 2940e1051a39Sopenharmony_ci 2941e1051a39Sopenharmony_ci neg $bit_length, $bit_length @ bit_length = 128 - #bits in input (in range [1,128]) 2942e1051a39Sopenharmony_ci mvn $rk12_l, xzr @ rk12_l = 0xffffffffffffffff 2943e1051a39Sopenharmony_ci 2944e1051a39Sopenharmony_ci and $bit_length, $bit_length, #127 @ bit_length %= 128 2945e1051a39Sopenharmony_ci 2946e1051a39Sopenharmony_ci lsr $rk12_h, $rk12_h, $bit_length @ rk12_h is mask for top 64b of last block 2947e1051a39Sopenharmony_ci cmp $bit_length, #64 2948e1051a39Sopenharmony_ci 2949e1051a39Sopenharmony_ci csel $input_l0, $rk12_l, $rk12_h, lt 2950e1051a39Sopenharmony_ci csel $input_h0, $rk12_h, xzr, lt 2951e1051a39Sopenharmony_ci 2952e1051a39Sopenharmony_ci fmov $ctr0d, $input_l0 @ ctr0b is mask for last block 2953e1051a39Sopenharmony_ci 2954e1051a39Sopenharmony_ci fmov $ctr0.d[1], $input_h0 2955e1051a39Sopenharmony_ci 2956e1051a39Sopenharmony_ci and $res1b, $res1b, $ctr0b @ possibly partial last block has zeroes in highest bits 2957e1051a39Sopenharmony_ci 2958e1051a39Sopenharmony_ci rev64 $res0b, $res1b @ GHASH final block 2959e1051a39Sopenharmony_ci 2960e1051a39Sopenharmony_ci eor $res0b, $res0b, $t0.16b @ feed in partial tag 2961e1051a39Sopenharmony_ci 2962e1051a39Sopenharmony_ci mov $t0d, $res0.d[1] @ GHASH final block - mid 2963e1051a39Sopenharmony_ci 2964e1051a39Sopenharmony_ci pmull $rk3q1, $res0.1d, $h1.1d @ GHASH final block - low 2965e1051a39Sopenharmony_ci 2966e1051a39Sopenharmony_ci pmull2 $rk2q1, $res0.2d, $h1.2d @ GHASH final block - high 2967e1051a39Sopenharmony_ci 2968e1051a39Sopenharmony_ci eor $t0.8b, $t0.8b, $res0.8b @ GHASH final block - mid 2969e1051a39Sopenharmony_ci 2970e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $rk3 @ GHASH final block - low 2971e1051a39Sopenharmony_ci 2972e1051a39Sopenharmony_ci eor $acc_hb, $acc_hb, $rk2 @ GHASH final block - high 2973e1051a39Sopenharmony_ci 2974e1051a39Sopenharmony_ci pmull $t0.1q, $t0.1d, $h12k.1d @ GHASH final block - mid 2975e1051a39Sopenharmony_ci 2976e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t0.16b @ GHASH final block - mid 2977e1051a39Sopenharmony_ci movi $mod_constant.8b, #0xc2 2978e1051a39Sopenharmony_ci 2979e1051a39Sopenharmony_ci eor $t9.16b, $acc_lb, $acc_hb @ MODULO - karatsuba tidy up 2980e1051a39Sopenharmony_ci 2981e1051a39Sopenharmony_ci shl $mod_constantd, $mod_constantd, #56 @ mod_constant 2982e1051a39Sopenharmony_ci 2983e1051a39Sopenharmony_ci bif $res1b, $rk0, $ctr0b @ insert existing bytes in top end of result before storing 2984e1051a39Sopenharmony_ci 2985e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t9.16b @ MODULO - karatsuba tidy up 2986e1051a39Sopenharmony_ci 2987e1051a39Sopenharmony_ci pmull $mod_t.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid 2988e1051a39Sopenharmony_ci 2989e1051a39Sopenharmony_ci ext $acc_hb, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment 2990e1051a39Sopenharmony_ci 2991e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $mod_t.16b @ MODULO - fold into mid 2992e1051a39Sopenharmony_ci 2993e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $acc_hb @ MODULO - fold into mid 2994e1051a39Sopenharmony_ci 2995e1051a39Sopenharmony_ci pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low 2996e1051a39Sopenharmony_ci 2997e1051a39Sopenharmony_ci ext $acc_mb, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment 2998e1051a39Sopenharmony_ci 2999e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $acc_hb @ MODULO - fold into low 3000e1051a39Sopenharmony_ci str $ctr32w, [$counter, #12] @ store the updated counter 3001e1051a39Sopenharmony_ci 3002e1051a39Sopenharmony_ci st1 { $res1b}, [$output_ptr] @ store all 16B 3003e1051a39Sopenharmony_ci 3004e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $acc_mb @ MODULO - fold into low 3005e1051a39Sopenharmony_ci ext $acc_lb, $acc_lb, $acc_lb, #8 3006e1051a39Sopenharmony_ci rev64 $acc_lb, $acc_lb 3007e1051a39Sopenharmony_ci mov x0, $len 3008e1051a39Sopenharmony_ci st1 { $acc_l.16b }, [$current_tag] 3009e1051a39Sopenharmony_ci 3010e1051a39Sopenharmony_ci ldp x21, x22, [sp, #16] 3011e1051a39Sopenharmony_ci ldp x23, x24, [sp, #32] 3012e1051a39Sopenharmony_ci ldp d8, d9, [sp, #48] 3013e1051a39Sopenharmony_ci ldp d10, d11, [sp, #64] 3014e1051a39Sopenharmony_ci ldp d12, d13, [sp, #80] 3015e1051a39Sopenharmony_ci ldp d14, d15, [sp, #96] 3016e1051a39Sopenharmony_ci ldp x19, x20, [sp], #112 3017e1051a39Sopenharmony_ci ret 3018e1051a39Sopenharmony_ci 3019e1051a39Sopenharmony_ci.L192_enc_ret: 3020e1051a39Sopenharmony_ci mov w0, #0x0 3021e1051a39Sopenharmony_ci ret 3022e1051a39Sopenharmony_ci.size aes_gcm_enc_192_kernel,.-aes_gcm_enc_192_kernel 3023e1051a39Sopenharmony_ci___ 3024e1051a39Sopenharmony_ci 3025e1051a39Sopenharmony_ci######################################################################################### 3026e1051a39Sopenharmony_ci# size_t aes_gcm_dec_192_kernel(const unsigned char *in, 3027e1051a39Sopenharmony_ci# size_t len, 3028e1051a39Sopenharmony_ci# unsigned char *out, 3029e1051a39Sopenharmony_ci# const void *key, 3030e1051a39Sopenharmony_ci# unsigned char ivec[16], 3031e1051a39Sopenharmony_ci# u64 *Xi); 3032e1051a39Sopenharmony_ci# 3033e1051a39Sopenharmony_ci$code.=<<___; 3034e1051a39Sopenharmony_ci.global aes_gcm_dec_192_kernel 3035e1051a39Sopenharmony_ci.type aes_gcm_dec_192_kernel,%function 3036e1051a39Sopenharmony_ci.align 4 3037e1051a39Sopenharmony_ciaes_gcm_dec_192_kernel: 3038e1051a39Sopenharmony_ci cbz x1, .L192_dec_ret 3039e1051a39Sopenharmony_ci stp x19, x20, [sp, #-112]! 3040e1051a39Sopenharmony_ci mov x16, x4 3041e1051a39Sopenharmony_ci mov x8, x5 3042e1051a39Sopenharmony_ci stp x21, x22, [sp, #16] 3043e1051a39Sopenharmony_ci stp x23, x24, [sp, #32] 3044e1051a39Sopenharmony_ci stp d8, d9, [sp, #48] 3045e1051a39Sopenharmony_ci stp d10, d11, [sp, #64] 3046e1051a39Sopenharmony_ci stp d12, d13, [sp, #80] 3047e1051a39Sopenharmony_ci stp d14, d15, [sp, #96] 3048e1051a39Sopenharmony_ci 3049e1051a39Sopenharmony_ci add $end_input_ptr, $input_ptr, $bit_length, lsr #3 @ end_input_ptr 3050e1051a39Sopenharmony_ci ldp $ctr96_b64x, $ctr96_t32x, [$counter] @ ctr96_b64, ctr96_t32 3051e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 3052e1051a39Sopenharmony_ci rev $ctr96_b64x, $ctr96_b64x 3053e1051a39Sopenharmony_ci rev $ctr96_t32x, $ctr96_t32x 3054e1051a39Sopenharmony_ci#endif 3055e1051a39Sopenharmony_ci ldp $rk12_l, $rk12_h, [$cc, #192] @ load rk12 3056e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 3057e1051a39Sopenharmony_ci ror $rk12_l, $rk12_l, #32 3058e1051a39Sopenharmony_ci ror $rk12_h, $rk12_h, #32 3059e1051a39Sopenharmony_ci#endif 3060e1051a39Sopenharmony_ci ld1 { $ctr0b}, [$counter] @ special case vector load initial counter so we can start first AES block as quickly as possible 3061e1051a39Sopenharmony_ci 3062e1051a39Sopenharmony_ci ld1 {$rk0s}, [$cc], #16 @ load rk0 3063e1051a39Sopenharmony_ci 3064e1051a39Sopenharmony_ci lsr $main_end_input_ptr, $bit_length, #3 @ byte_len 3065e1051a39Sopenharmony_ci mov $len, $main_end_input_ptr 3066e1051a39Sopenharmony_ci ld1 {$rk1s}, [$cc], #16 @ load rk1 3067e1051a39Sopenharmony_ci 3068e1051a39Sopenharmony_ci lsr $rctr32x, $ctr96_t32x, #32 3069e1051a39Sopenharmony_ci orr $ctr96_t32w, $ctr96_t32w, $ctr96_t32w 3070e1051a39Sopenharmony_ci fmov $ctr3d, $ctr96_b64x @ CTR block 3 3071e1051a39Sopenharmony_ci 3072e1051a39Sopenharmony_ci rev $rctr32w, $rctr32w @ rev_ctr32 3073e1051a39Sopenharmony_ci fmov $ctr1d, $ctr96_b64x @ CTR block 1 3074e1051a39Sopenharmony_ci 3075e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ increment rev_ctr32 3076e1051a39Sopenharmony_ci ld1 {$rk2s}, [$cc], #16 @ load rk2 3077e1051a39Sopenharmony_ci 3078e1051a39Sopenharmony_ci aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 0 3079e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 1 3080e1051a39Sopenharmony_ci 3081e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 1 3082e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 1 3083e1051a39Sopenharmony_ci ld1 {$rk3s}, [$cc], #16 @ load rk3 3084e1051a39Sopenharmony_ci 3085e1051a39Sopenharmony_ci fmov $ctr1.d[1], $ctr32x @ CTR block 1 3086e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 2 3087e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 2 3088e1051a39Sopenharmony_ci 3089e1051a39Sopenharmony_ci fmov $ctr2d, $ctr96_b64x @ CTR block 2 3090e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 2 3091e1051a39Sopenharmony_ci 3092e1051a39Sopenharmony_ci fmov $ctr2.d[1], $ctr32x @ CTR block 2 3093e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 3 3094e1051a39Sopenharmony_ci 3095e1051a39Sopenharmony_ci aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 1 3096e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 3 3097e1051a39Sopenharmony_ci 3098e1051a39Sopenharmony_ci fmov $ctr3.d[1], $ctr32x @ CTR block 3 3099e1051a39Sopenharmony_ci 3100e1051a39Sopenharmony_ci ld1 {$rk4s}, [$cc], #16 @ load rk4 3101e1051a39Sopenharmony_ci 3102e1051a39Sopenharmony_ci aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 2 3103e1051a39Sopenharmony_ci 3104e1051a39Sopenharmony_ci aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 0 3105e1051a39Sopenharmony_ci ld1 {$rk5s}, [$cc], #16 @ load rk5 3106e1051a39Sopenharmony_ci 3107e1051a39Sopenharmony_ci aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 0 3108e1051a39Sopenharmony_ci ldr $h4q, [$current_tag, #112] @ load h4l | h4h 3109e1051a39Sopenharmony_ci#ifndef __AARCH64EB__ 3110e1051a39Sopenharmony_ci ext $h4b, $h4b, $h4b, #8 3111e1051a39Sopenharmony_ci#endif 3112e1051a39Sopenharmony_ci aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 0 3113e1051a39Sopenharmony_ci ldr $h2q, [$current_tag, #64] @ load h2l | h2h 3114e1051a39Sopenharmony_ci#ifndef __AARCH64EB__ 3115e1051a39Sopenharmony_ci ext $h2b, $h2b, $h2b, #8 3116e1051a39Sopenharmony_ci#endif 3117e1051a39Sopenharmony_ci aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 1 3118e1051a39Sopenharmony_ci ldr $h3q, [$current_tag, #80] @ load h3l | h3h 3119e1051a39Sopenharmony_ci#ifndef __AARCH64EB__ 3120e1051a39Sopenharmony_ci ext $h3b, $h3b, $h3b, #8 3121e1051a39Sopenharmony_ci#endif 3122e1051a39Sopenharmony_ci aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 1 3123e1051a39Sopenharmony_ci 3124e1051a39Sopenharmony_ci aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 1 3125e1051a39Sopenharmony_ci ldr $h1q, [$current_tag, #32] @ load h1l | h1h 3126e1051a39Sopenharmony_ci#ifndef __AARCH64EB__ 3127e1051a39Sopenharmony_ci ext $h1b, $h1b, $h1b, #8 3128e1051a39Sopenharmony_ci#endif 3129e1051a39Sopenharmony_ci aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 2 3130e1051a39Sopenharmony_ci ld1 {$rk6s}, [$cc], #16 @ load rk6 3131e1051a39Sopenharmony_ci 3132e1051a39Sopenharmony_ci aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 3 3133e1051a39Sopenharmony_ci ld1 {$rk7s}, [$cc], #16 @ load rk7 3134e1051a39Sopenharmony_ci 3135e1051a39Sopenharmony_ci aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 2 3136e1051a39Sopenharmony_ci ld1 {$rk8s}, [$cc], #16 @ load rk8 3137e1051a39Sopenharmony_ci 3138e1051a39Sopenharmony_ci aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 2 3139e1051a39Sopenharmony_ci ld1 {$rk9s}, [$cc], #16 @ load rk9 3140e1051a39Sopenharmony_ci 3141e1051a39Sopenharmony_ci aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 3 3142e1051a39Sopenharmony_ci ld1 { $acc_lb}, [$current_tag] 3143e1051a39Sopenharmony_ci ext $acc_lb, $acc_lb, $acc_lb, #8 3144e1051a39Sopenharmony_ci rev64 $acc_lb, $acc_lb 3145e1051a39Sopenharmony_ci 3146e1051a39Sopenharmony_ci aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 3 3147e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 3 3148e1051a39Sopenharmony_ci 3149e1051a39Sopenharmony_ci aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 3 3150e1051a39Sopenharmony_ci trn1 $acc_h.2d, $h3.2d, $h4.2d @ h4h | h3h 3151e1051a39Sopenharmony_ci 3152e1051a39Sopenharmony_ci aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 4 3153e1051a39Sopenharmony_ci ld1 {$rk10s}, [$cc], #16 @ load rk10 3154e1051a39Sopenharmony_ci 3155e1051a39Sopenharmony_ci aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 4 3156e1051a39Sopenharmony_ci trn2 $h34k.2d, $h3.2d, $h4.2d @ h4l | h3l 3157e1051a39Sopenharmony_ci 3158e1051a39Sopenharmony_ci aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 4 3159e1051a39Sopenharmony_ci 3160e1051a39Sopenharmony_ci aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 4 3161e1051a39Sopenharmony_ci trn2 $h12k.2d, $h1.2d, $h2.2d @ h2l | h1l 3162e1051a39Sopenharmony_ci 3163e1051a39Sopenharmony_ci aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 5 3164e1051a39Sopenharmony_ci ld1 {$rk11s}, [$cc], #16 @ load rk11 3165e1051a39Sopenharmony_ci 3166e1051a39Sopenharmony_ci aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 5 3167e1051a39Sopenharmony_ci 3168e1051a39Sopenharmony_ci aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 5 3169e1051a39Sopenharmony_ci 3170e1051a39Sopenharmony_ci aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 5 3171e1051a39Sopenharmony_ci 3172e1051a39Sopenharmony_ci aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 6 3173e1051a39Sopenharmony_ci 3174e1051a39Sopenharmony_ci aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 6 3175e1051a39Sopenharmony_ci 3176e1051a39Sopenharmony_ci aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 6 3177e1051a39Sopenharmony_ci 3178e1051a39Sopenharmony_ci aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 7 3179e1051a39Sopenharmony_ci 3180e1051a39Sopenharmony_ci aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 7 3181e1051a39Sopenharmony_ci 3182e1051a39Sopenharmony_ci aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 7 3183e1051a39Sopenharmony_ci 3184e1051a39Sopenharmony_ci aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 6 3185e1051a39Sopenharmony_ci 3186e1051a39Sopenharmony_ci aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 8 3187e1051a39Sopenharmony_ci 3188e1051a39Sopenharmony_ci aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 8 3189e1051a39Sopenharmony_ci 3190e1051a39Sopenharmony_ci aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 7 3191e1051a39Sopenharmony_ci 3192e1051a39Sopenharmony_ci aese $ctr2b, $rk9 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 9 3193e1051a39Sopenharmony_ci 3194e1051a39Sopenharmony_ci aese $ctr3b, $rk9 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 9 3195e1051a39Sopenharmony_ci 3196e1051a39Sopenharmony_ci aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 8 3197e1051a39Sopenharmony_ci sub $main_end_input_ptr, $main_end_input_ptr, #1 @ byte_len - 1 3198e1051a39Sopenharmony_ci 3199e1051a39Sopenharmony_ci aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 8 3200e1051a39Sopenharmony_ci and $main_end_input_ptr, $main_end_input_ptr, #0xffffffffffffffc0 @ number of bytes to be processed in main loop (at least 1 byte must be handled by tail) 3201e1051a39Sopenharmony_ci 3202e1051a39Sopenharmony_ci aese $ctr3b, $rk10 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 10 3203e1051a39Sopenharmony_ci add $main_end_input_ptr, $main_end_input_ptr, $input_ptr 3204e1051a39Sopenharmony_ci 3205e1051a39Sopenharmony_ci aese $ctr1b, $rk9 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 9 3206e1051a39Sopenharmony_ci cmp $input_ptr, $main_end_input_ptr @ check if we have <= 4 blocks 3207e1051a39Sopenharmony_ci 3208e1051a39Sopenharmony_ci aese $ctr0b, $rk9 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 9 3209e1051a39Sopenharmony_ci trn1 $t0.2d, $h1.2d, $h2.2d @ h2h | h1h 3210e1051a39Sopenharmony_ci 3211e1051a39Sopenharmony_ci aese $ctr3b, $rk11 @ AES block 3 - round 11 3212e1051a39Sopenharmony_ci 3213e1051a39Sopenharmony_ci aese $ctr2b, $rk10 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 10 3214e1051a39Sopenharmony_ci 3215e1051a39Sopenharmony_ci aese $ctr1b, $rk10 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 10 3216e1051a39Sopenharmony_ci 3217e1051a39Sopenharmony_ci aese $ctr0b, $rk10 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 10 3218e1051a39Sopenharmony_ci eor $h12k.16b, $h12k.16b, $t0.16b @ h2k | h1k 3219e1051a39Sopenharmony_ci 3220e1051a39Sopenharmony_ci aese $ctr2b, $rk11 @ AES block 2 - round 11 3221e1051a39Sopenharmony_ci 3222e1051a39Sopenharmony_ci aese $ctr1b, $rk11 @ AES block 1 - round 11 3223e1051a39Sopenharmony_ci eor $h34k.16b, $h34k.16b, $acc_h.16b @ h4k | h3k 3224e1051a39Sopenharmony_ci 3225e1051a39Sopenharmony_ci aese $ctr0b, $rk11 @ AES block 0 - round 11 3226e1051a39Sopenharmony_ci b.ge .L192_dec_tail @ handle tail 3227e1051a39Sopenharmony_ci 3228e1051a39Sopenharmony_ci ld1 {$res0b, $res1b}, [$input_ptr], #32 @ AES block 0,1 - load ciphertext 3229e1051a39Sopenharmony_ci 3230e1051a39Sopenharmony_ci eor $ctr1b, $res1b, $ctr1b @ AES block 1 - result 3231e1051a39Sopenharmony_ci 3232e1051a39Sopenharmony_ci eor $ctr0b, $res0b, $ctr0b @ AES block 0 - result 3233e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 4 3234e1051a39Sopenharmony_ci ld1 {$res2b, $res3b}, [$input_ptr], #32 @ AES block 2,3 - load ciphertext 3235e1051a39Sopenharmony_ci 3236e1051a39Sopenharmony_ci mov $output_l1, $ctr1.d[0] @ AES block 1 - mov low 3237e1051a39Sopenharmony_ci 3238e1051a39Sopenharmony_ci mov $output_h1, $ctr1.d[1] @ AES block 1 - mov high 3239e1051a39Sopenharmony_ci 3240e1051a39Sopenharmony_ci mov $output_l0, $ctr0.d[0] @ AES block 0 - mov low 3241e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 4 3242e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 4 3243e1051a39Sopenharmony_ci 3244e1051a39Sopenharmony_ci mov $output_h0, $ctr0.d[1] @ AES block 0 - mov high 3245e1051a39Sopenharmony_ci rev64 $res0b, $res0b @ GHASH block 0 3246e1051a39Sopenharmony_ci 3247e1051a39Sopenharmony_ci fmov $ctr0d, $ctr96_b64x @ CTR block 4 3248e1051a39Sopenharmony_ci rev64 $res1b, $res1b @ GHASH block 1 3249e1051a39Sopenharmony_ci cmp $input_ptr, $main_end_input_ptr @ check if we have <= 8 blocks 3250e1051a39Sopenharmony_ci 3251e1051a39Sopenharmony_ci eor $output_l1, $output_l1, $rk12_l @ AES block 1 - round 12 low 3252e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 3253e1051a39Sopenharmony_ci rev $output_l1, $output_l1 3254e1051a39Sopenharmony_ci#endif 3255e1051a39Sopenharmony_ci fmov $ctr0.d[1], $ctr32x @ CTR block 4 3256e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 5 3257e1051a39Sopenharmony_ci 3258e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 5 3259e1051a39Sopenharmony_ci fmov $ctr1d, $ctr96_b64x @ CTR block 5 3260e1051a39Sopenharmony_ci eor $output_h1, $output_h1, $rk12_h @ AES block 1 - round 12 high 3261e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 3262e1051a39Sopenharmony_ci rev $output_h1, $output_h1 3263e1051a39Sopenharmony_ci#endif 3264e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 5 3265e1051a39Sopenharmony_ci fmov $ctr1.d[1], $ctr32x @ CTR block 5 3266e1051a39Sopenharmony_ci eor $output_l0, $output_l0, $rk12_l @ AES block 0 - round 12 low 3267e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 3268e1051a39Sopenharmony_ci rev $output_l0, $output_l0 3269e1051a39Sopenharmony_ci#endif 3270e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 6 3271e1051a39Sopenharmony_ci eor $output_h0, $output_h0, $rk12_h @ AES block 0 - round 12 high 3272e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 3273e1051a39Sopenharmony_ci rev $output_h0, $output_h0 3274e1051a39Sopenharmony_ci#endif 3275e1051a39Sopenharmony_ci stp $output_l0, $output_h0, [$output_ptr], #16 @ AES block 0 - store result 3276e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 6 3277e1051a39Sopenharmony_ci 3278e1051a39Sopenharmony_ci stp $output_l1, $output_h1, [$output_ptr], #16 @ AES block 1 - store result 3279e1051a39Sopenharmony_ci 3280e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 6 3281e1051a39Sopenharmony_ci eor $ctr2b, $res2b, $ctr2b @ AES block 2 - result 3282e1051a39Sopenharmony_ci b.ge .L192_dec_prepretail @ do prepretail 3283e1051a39Sopenharmony_ci 3284e1051a39Sopenharmony_ci .L192_dec_main_loop: @ main loop start 3285e1051a39Sopenharmony_ci aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 0 3286e1051a39Sopenharmony_ci ext $acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0 3287e1051a39Sopenharmony_ci 3288e1051a39Sopenharmony_ci pmull $t2.1q, $res1.1d, $h3.1d @ GHASH block 4k+1 - low 3289e1051a39Sopenharmony_ci mov $output_l2, $ctr2.d[0] @ AES block 4k+2 - mov low 3290e1051a39Sopenharmony_ci 3291e1051a39Sopenharmony_ci mov $output_h2, $ctr2.d[1] @ AES block 4k+2 - mov high 3292e1051a39Sopenharmony_ci eor $ctr3b, $res3b, $ctr3b @ AES block 4k+3 - result 3293e1051a39Sopenharmony_ci rev64 $res3b, $res3b @ GHASH block 4k+3 3294e1051a39Sopenharmony_ci 3295e1051a39Sopenharmony_ci aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 1 3296e1051a39Sopenharmony_ci fmov $ctr2d, $ctr96_b64x @ CTR block 4k+6 3297e1051a39Sopenharmony_ci 3298e1051a39Sopenharmony_ci aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 0 3299e1051a39Sopenharmony_ci eor $res0b, $res0b, $acc_lb @ PRE 1 3300e1051a39Sopenharmony_ci 3301e1051a39Sopenharmony_ci pmull2 $t1.1q, $res1.2d, $h3.2d @ GHASH block 4k+1 - high 3302e1051a39Sopenharmony_ci fmov $ctr2.d[1], $ctr32x @ CTR block 4k+6 3303e1051a39Sopenharmony_ci 3304e1051a39Sopenharmony_ci aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 2 3305e1051a39Sopenharmony_ci mov $output_h3, $ctr3.d[1] @ AES block 4k+3 - mov high 3306e1051a39Sopenharmony_ci 3307e1051a39Sopenharmony_ci aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 1 3308e1051a39Sopenharmony_ci mov $output_l3, $ctr3.d[0] @ AES block 4k+3 - mov low 3309e1051a39Sopenharmony_ci 3310e1051a39Sopenharmony_ci pmull2 $acc_h.1q, $res0.2d, $h4.2d @ GHASH block 4k - high 3311e1051a39Sopenharmony_ci fmov $ctr3d, $ctr96_b64x @ CTR block 4k+7 3312e1051a39Sopenharmony_ci mov $t0d, $res0.d[1] @ GHASH block 4k - mid 3313e1051a39Sopenharmony_ci 3314e1051a39Sopenharmony_ci pmull $acc_l.1q, $res0.1d, $h4.1d @ GHASH block 4k - low 3315e1051a39Sopenharmony_ci mov $acc_md, $h34k.d[1] @ GHASH block 4k - mid 3316e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 4k+7 3317e1051a39Sopenharmony_ci 3318e1051a39Sopenharmony_ci aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 0 3319e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 4k+7 3320e1051a39Sopenharmony_ci 3321e1051a39Sopenharmony_ci fmov $ctr3.d[1], $ctr32x @ CTR block 4k+7 3322e1051a39Sopenharmony_ci eor $t0.8b, $t0.8b, $res0.8b @ GHASH block 4k - mid 3323e1051a39Sopenharmony_ci mov $t3d, $res1.d[1] @ GHASH block 4k+1 - mid 3324e1051a39Sopenharmony_ci 3325e1051a39Sopenharmony_ci aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 3 3326e1051a39Sopenharmony_ci 3327e1051a39Sopenharmony_ci aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 2 3328e1051a39Sopenharmony_ci eor $output_h2, $output_h2, $rk12_h @ AES block 4k+2 - round 12 high 3329e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 3330e1051a39Sopenharmony_ci rev $output_h2, $output_h2 3331e1051a39Sopenharmony_ci#endif 3332e1051a39Sopenharmony_ci aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 1 3333e1051a39Sopenharmony_ci eor $t3.8b, $t3.8b, $res1.8b @ GHASH block 4k+1 - mid 3334e1051a39Sopenharmony_ci 3335e1051a39Sopenharmony_ci pmull $acc_m.1q, $t0.1d, $acc_m.1d @ GHASH block 4k - mid 3336e1051a39Sopenharmony_ci 3337e1051a39Sopenharmony_ci aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 0 3338e1051a39Sopenharmony_ci rev64 $res2b, $res2b @ GHASH block 4k+2 3339e1051a39Sopenharmony_ci 3340e1051a39Sopenharmony_ci aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 2 3341e1051a39Sopenharmony_ci 3342e1051a39Sopenharmony_ci pmull $t3.1q, $t3.1d, $h34k.1d @ GHASH block 4k+1 - mid 3343e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $t2.16b @ GHASH block 4k+1 - low 3344e1051a39Sopenharmony_ci eor $output_l2, $output_l2, $rk12_l @ AES block 4k+2 - round 12 low 3345e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 3346e1051a39Sopenharmony_ci rev $output_l2, $output_l2 3347e1051a39Sopenharmony_ci#endif 3348e1051a39Sopenharmony_ci aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 4 3349e1051a39Sopenharmony_ci 3350e1051a39Sopenharmony_ci aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 3 3351e1051a39Sopenharmony_ci 3352e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t3.16b @ GHASH block 4k+1 - mid 3353e1051a39Sopenharmony_ci mov $t6d, $res2.d[1] @ GHASH block 4k+2 - mid 3354e1051a39Sopenharmony_ci 3355e1051a39Sopenharmony_ci aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 1 3356e1051a39Sopenharmony_ci eor $acc_hb, $acc_hb, $t1.16b @ GHASH block 4k+1 - high 3357e1051a39Sopenharmony_ci 3358e1051a39Sopenharmony_ci aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 4 3359e1051a39Sopenharmony_ci 3360e1051a39Sopenharmony_ci pmull2 $t4.1q, $res2.2d, $h2.2d @ GHASH block 4k+2 - high 3361e1051a39Sopenharmony_ci eor $t6.8b, $t6.8b, $res2.8b @ GHASH block 4k+2 - mid 3362e1051a39Sopenharmony_ci 3363e1051a39Sopenharmony_ci pmull $t5.1q, $res2.1d, $h2.1d @ GHASH block 4k+2 - low 3364e1051a39Sopenharmony_ci 3365e1051a39Sopenharmony_ci aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 5 3366e1051a39Sopenharmony_ci 3367e1051a39Sopenharmony_ci eor $acc_hb, $acc_hb, $t4.16b @ GHASH block 4k+2 - high 3368e1051a39Sopenharmony_ci mov $t9d, $res3.d[1] @ GHASH block 4k+3 - mid 3369e1051a39Sopenharmony_ci 3370e1051a39Sopenharmony_ci aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 5 3371e1051a39Sopenharmony_ci 3372e1051a39Sopenharmony_ci pmull2 $t7.1q, $res3.2d, $h1.2d @ GHASH block 4k+3 - high 3373e1051a39Sopenharmony_ci 3374e1051a39Sopenharmony_ci aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 2 3375e1051a39Sopenharmony_ci eor $t9.8b, $t9.8b, $res3.8b @ GHASH block 4k+3 - mid 3376e1051a39Sopenharmony_ci 3377e1051a39Sopenharmony_ci aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 6 3378e1051a39Sopenharmony_ci 3379e1051a39Sopenharmony_ci aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 6 3380e1051a39Sopenharmony_ci ins $t6.d[1], $t6.d[0] @ GHASH block 4k+2 - mid 3381e1051a39Sopenharmony_ci 3382e1051a39Sopenharmony_ci aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 3 3383e1051a39Sopenharmony_ci 3384e1051a39Sopenharmony_ci pmull $t9.1q, $t9.1d, $h12k.1d @ GHASH block 4k+3 - mid 3385e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $t5.16b @ GHASH block 4k+2 - low 3386e1051a39Sopenharmony_ci 3387e1051a39Sopenharmony_ci aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 7 3388e1051a39Sopenharmony_ci 3389e1051a39Sopenharmony_ci pmull2 $t6.1q, $t6.2d, $h12k.2d @ GHASH block 4k+2 - mid 3390e1051a39Sopenharmony_ci eor $acc_hb, $acc_hb, $t7.16b @ GHASH block 4k+3 - high 3391e1051a39Sopenharmony_ci 3392e1051a39Sopenharmony_ci aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 7 3393e1051a39Sopenharmony_ci 3394e1051a39Sopenharmony_ci aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 8 3395e1051a39Sopenharmony_ci movi $mod_constant.8b, #0xc2 3396e1051a39Sopenharmony_ci 3397e1051a39Sopenharmony_ci pmull $t8.1q, $res3.1d, $h1.1d @ GHASH block 4k+3 - low 3398e1051a39Sopenharmony_ci 3399e1051a39Sopenharmony_ci aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 8 3400e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t6.16b @ GHASH block 4k+2 - mid 3401e1051a39Sopenharmony_ci 3402e1051a39Sopenharmony_ci aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 3 3403e1051a39Sopenharmony_ci 3404e1051a39Sopenharmony_ci aese $ctr0b, $rk9 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 9 3405e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $t8.16b @ GHASH block 4k+3 - low 3406e1051a39Sopenharmony_ci 3407e1051a39Sopenharmony_ci aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 4 3408e1051a39Sopenharmony_ci 3409e1051a39Sopenharmony_ci aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 4 3410e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t9.16b @ GHASH block 4k+3 - mid 3411e1051a39Sopenharmony_ci 3412e1051a39Sopenharmony_ci aese $ctr0b, $rk10 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 10 3413e1051a39Sopenharmony_ci 3414e1051a39Sopenharmony_ci aese $ctr1b, $rk9 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 9 3415e1051a39Sopenharmony_ci eor $t9.16b, $acc_lb, $acc_hb @ MODULO - karatsuba tidy up 3416e1051a39Sopenharmony_ci 3417e1051a39Sopenharmony_ci aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 5 3418e1051a39Sopenharmony_ci 3419e1051a39Sopenharmony_ci aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 5 3420e1051a39Sopenharmony_ci shl $mod_constantd, $mod_constantd, #56 @ mod_constant 3421e1051a39Sopenharmony_ci 3422e1051a39Sopenharmony_ci aese $ctr1b, $rk10 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 10 3423e1051a39Sopenharmony_ci 3424e1051a39Sopenharmony_ci aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 6 3425e1051a39Sopenharmony_ci ld1 {$res0b}, [$input_ptr], #16 @ AES block 4k+4 - load ciphertext 3426e1051a39Sopenharmony_ci 3427e1051a39Sopenharmony_ci aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 6 3428e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t9.16b @ MODULO - karatsuba tidy up 3429e1051a39Sopenharmony_ci 3430e1051a39Sopenharmony_ci pmull $mod_t.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid 3431e1051a39Sopenharmony_ci ld1 {$res1b}, [$input_ptr], #16 @ AES block 4k+5 - load ciphertext 3432e1051a39Sopenharmony_ci eor $output_l3, $output_l3, $rk12_l @ AES block 4k+3 - round 12 low 3433e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 3434e1051a39Sopenharmony_ci rev $output_l3, $output_l3 3435e1051a39Sopenharmony_ci#endif 3436e1051a39Sopenharmony_ci aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 7 3437e1051a39Sopenharmony_ci ext $acc_hb, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment 3438e1051a39Sopenharmony_ci 3439e1051a39Sopenharmony_ci aese $ctr0b, $rk11 @ AES block 4k+4 - round 11 3440e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 4k+7 3441e1051a39Sopenharmony_ci 3442e1051a39Sopenharmony_ci aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 7 3443e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $mod_t.16b @ MODULO - fold into mid 3444e1051a39Sopenharmony_ci 3445e1051a39Sopenharmony_ci aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 8 3446e1051a39Sopenharmony_ci ld1 {$res2b}, [$input_ptr], #16 @ AES block 4k+6 - load ciphertext 3447e1051a39Sopenharmony_ci 3448e1051a39Sopenharmony_ci aese $ctr1b, $rk11 @ AES block 4k+5 - round 11 3449e1051a39Sopenharmony_ci ld1 {$res3b}, [$input_ptr], #16 @ AES block 4k+7 - load ciphertext 3450e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 4k+8 3451e1051a39Sopenharmony_ci 3452e1051a39Sopenharmony_ci aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 8 3453e1051a39Sopenharmony_ci stp $output_l2, $output_h2, [$output_ptr], #16 @ AES block 4k+2 - store result 3454e1051a39Sopenharmony_ci 3455e1051a39Sopenharmony_ci aese $ctr2b, $rk9 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 9 3456e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $acc_hb @ MODULO - fold into mid 3457e1051a39Sopenharmony_ci 3458e1051a39Sopenharmony_ci cmp $input_ptr, $main_end_input_ptr @ LOOP CONTROL 3459e1051a39Sopenharmony_ci 3460e1051a39Sopenharmony_ci eor $ctr0b, $res0b, $ctr0b @ AES block 4k+4 - result 3461e1051a39Sopenharmony_ci eor $output_h3, $output_h3, $rk12_h @ AES block 4k+3 - round 12 high 3462e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 3463e1051a39Sopenharmony_ci rev $output_h3, $output_h3 3464e1051a39Sopenharmony_ci#endif 3465e1051a39Sopenharmony_ci eor $ctr1b, $res1b, $ctr1b @ AES block 4k+5 - result 3466e1051a39Sopenharmony_ci 3467e1051a39Sopenharmony_ci aese $ctr2b, $rk10 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 10 3468e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 4k+8 3469e1051a39Sopenharmony_ci 3470e1051a39Sopenharmony_ci aese $ctr3b, $rk9 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 9 3471e1051a39Sopenharmony_ci 3472e1051a39Sopenharmony_ci pmull $mod_constant.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low 3473e1051a39Sopenharmony_ci mov $output_l1, $ctr1.d[0] @ AES block 4k+5 - mov low 3474e1051a39Sopenharmony_ci 3475e1051a39Sopenharmony_ci mov $output_l0, $ctr0.d[0] @ AES block 4k+4 - mov low 3476e1051a39Sopenharmony_ci stp $output_l3, $output_h3, [$output_ptr], #16 @ AES block 4k+3 - store result 3477e1051a39Sopenharmony_ci rev64 $res1b, $res1b @ GHASH block 4k+5 3478e1051a39Sopenharmony_ci 3479e1051a39Sopenharmony_ci aese $ctr2b, $rk11 @ AES block 4k+6 - round 11 3480e1051a39Sopenharmony_ci mov $output_h0, $ctr0.d[1] @ AES block 4k+4 - mov high 3481e1051a39Sopenharmony_ci 3482e1051a39Sopenharmony_ci aese $ctr3b, $rk10 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 10 3483e1051a39Sopenharmony_ci mov $output_h1, $ctr1.d[1] @ AES block 4k+5 - mov high 3484e1051a39Sopenharmony_ci 3485e1051a39Sopenharmony_ci fmov $ctr0d, $ctr96_b64x @ CTR block 4k+8 3486e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 4k+8 3487e1051a39Sopenharmony_ci ext $acc_mb, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment 3488e1051a39Sopenharmony_ci 3489e1051a39Sopenharmony_ci eor $ctr2b, $res2b, $ctr2b @ AES block 4k+6 - result 3490e1051a39Sopenharmony_ci fmov $ctr0.d[1], $ctr32x @ CTR block 4k+8 3491e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 4k+9 3492e1051a39Sopenharmony_ci 3493e1051a39Sopenharmony_ci eor $output_l0, $output_l0, $rk12_l @ AES block 4k+4 - round 12 low 3494e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 3495e1051a39Sopenharmony_ci rev $output_l0, $output_l0 3496e1051a39Sopenharmony_ci#endif 3497e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 4k+9 3498e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $mod_constant.16b @ MODULO - fold into low 3499e1051a39Sopenharmony_ci 3500e1051a39Sopenharmony_ci fmov $ctr1d, $ctr96_b64x @ CTR block 4k+9 3501e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 4k+9 3502e1051a39Sopenharmony_ci eor $output_l1, $output_l1, $rk12_l @ AES block 4k+5 - round 12 low 3503e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 3504e1051a39Sopenharmony_ci rev $output_l1, $output_l1 3505e1051a39Sopenharmony_ci#endif 3506e1051a39Sopenharmony_ci fmov $ctr1.d[1], $ctr32x @ CTR block 4k+9 3507e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 4k+10 3508e1051a39Sopenharmony_ci eor $output_h1, $output_h1, $rk12_h @ AES block 4k+5 - round 12 high 3509e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 3510e1051a39Sopenharmony_ci rev $output_h1, $output_h1 3511e1051a39Sopenharmony_ci#endif 3512e1051a39Sopenharmony_ci eor $output_h0, $output_h0, $rk12_h @ AES block 4k+4 - round 12 high 3513e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 3514e1051a39Sopenharmony_ci rev $output_h0, $output_h0 3515e1051a39Sopenharmony_ci#endif 3516e1051a39Sopenharmony_ci stp $output_l0, $output_h0, [$output_ptr], #16 @ AES block 4k+4 - store result 3517e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $acc_mb @ MODULO - fold into low 3518e1051a39Sopenharmony_ci 3519e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 4k+10 3520e1051a39Sopenharmony_ci rev64 $res0b, $res0b @ GHASH block 4k+4 3521e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 4k+10 3522e1051a39Sopenharmony_ci 3523e1051a39Sopenharmony_ci aese $ctr3b, $rk11 @ AES block 4k+7 - round 11 3524e1051a39Sopenharmony_ci stp $output_l1, $output_h1, [$output_ptr], #16 @ AES block 4k+5 - store result 3525e1051a39Sopenharmony_ci b.lt .L192_dec_main_loop 3526e1051a39Sopenharmony_ci 3527e1051a39Sopenharmony_ci .L192_dec_prepretail: @ PREPRETAIL 3528e1051a39Sopenharmony_ci mov $output_h2, $ctr2.d[1] @ AES block 4k+2 - mov high 3529e1051a39Sopenharmony_ci ext $acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0 3530e1051a39Sopenharmony_ci eor $ctr3b, $res3b, $ctr3b @ AES block 4k+3 - result 3531e1051a39Sopenharmony_ci 3532e1051a39Sopenharmony_ci aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 0 3533e1051a39Sopenharmony_ci mov $output_l2, $ctr2.d[0] @ AES block 4k+2 - mov low 3534e1051a39Sopenharmony_ci 3535e1051a39Sopenharmony_ci aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 0 3536e1051a39Sopenharmony_ci mov $acc_md, $h34k.d[1] @ GHASH block 4k - mid 3537e1051a39Sopenharmony_ci 3538e1051a39Sopenharmony_ci eor $res0b, $res0b, $acc_lb @ PRE 1 3539e1051a39Sopenharmony_ci fmov $ctr2d, $ctr96_b64x @ CTR block 4k+6 3540e1051a39Sopenharmony_ci 3541e1051a39Sopenharmony_ci aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 1 3542e1051a39Sopenharmony_ci mov $output_l3, $ctr3.d[0] @ AES block 4k+3 - mov low 3543e1051a39Sopenharmony_ci 3544e1051a39Sopenharmony_ci aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 1 3545e1051a39Sopenharmony_ci mov $output_h3, $ctr3.d[1] @ AES block 4k+3 - mov high 3546e1051a39Sopenharmony_ci 3547e1051a39Sopenharmony_ci pmull $acc_l.1q, $res0.1d, $h4.1d @ GHASH block 4k - low 3548e1051a39Sopenharmony_ci mov $t0d, $res0.d[1] @ GHASH block 4k - mid 3549e1051a39Sopenharmony_ci fmov $ctr3d, $ctr96_b64x @ CTR block 4k+7 3550e1051a39Sopenharmony_ci 3551e1051a39Sopenharmony_ci aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 2 3552e1051a39Sopenharmony_ci rev64 $res2b, $res2b @ GHASH block 4k+2 3553e1051a39Sopenharmony_ci 3554e1051a39Sopenharmony_ci pmull2 $acc_h.1q, $res0.2d, $h4.2d @ GHASH block 4k - high 3555e1051a39Sopenharmony_ci fmov $ctr2.d[1], $ctr32x @ CTR block 4k+6 3556e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 4k+7 3557e1051a39Sopenharmony_ci 3558e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 4k+7 3559e1051a39Sopenharmony_ci eor $t0.8b, $t0.8b, $res0.8b @ GHASH block 4k - mid 3560e1051a39Sopenharmony_ci mov $t3d, $res1.d[1] @ GHASH block 4k+1 - mid 3561e1051a39Sopenharmony_ci 3562e1051a39Sopenharmony_ci pmull $t2.1q, $res1.1d, $h3.1d @ GHASH block 4k+1 - low 3563e1051a39Sopenharmony_ci eor $output_h3, $output_h3, $rk12_h @ AES block 4k+3 - round 12 high 3564e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 3565e1051a39Sopenharmony_ci rev $output_h3, $output_h3 3566e1051a39Sopenharmony_ci#endif 3567e1051a39Sopenharmony_ci fmov $ctr3.d[1], $ctr32x @ CTR block 4k+7 3568e1051a39Sopenharmony_ci 3569e1051a39Sopenharmony_ci aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 2 3570e1051a39Sopenharmony_ci eor $output_l2, $output_l2, $rk12_l @ AES block 4k+2 - round 12 low 3571e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 3572e1051a39Sopenharmony_ci rev $output_l2, $output_l2 3573e1051a39Sopenharmony_ci#endif 3574e1051a39Sopenharmony_ci pmull2 $t1.1q, $res1.2d, $h3.2d @ GHASH block 4k+1 - high 3575e1051a39Sopenharmony_ci eor $output_h2, $output_h2, $rk12_h @ AES block 4k+2 - round 12 high 3576e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 3577e1051a39Sopenharmony_ci rev $output_h2, $output_h2 3578e1051a39Sopenharmony_ci#endif 3579e1051a39Sopenharmony_ci eor $t3.8b, $t3.8b, $res1.8b @ GHASH block 4k+1 - mid 3580e1051a39Sopenharmony_ci 3581e1051a39Sopenharmony_ci pmull $acc_m.1q, $t0.1d, $acc_m.1d @ GHASH block 4k - mid 3582e1051a39Sopenharmony_ci eor $output_l3, $output_l3, $rk12_l @ AES block 4k+3 - round 12 low 3583e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 3584e1051a39Sopenharmony_ci rev $output_l3, $output_l3 3585e1051a39Sopenharmony_ci#endif 3586e1051a39Sopenharmony_ci stp $output_l2, $output_h2, [$output_ptr], #16 @ AES block 4k+2 - store result 3587e1051a39Sopenharmony_ci 3588e1051a39Sopenharmony_ci rev64 $res3b, $res3b @ GHASH block 4k+3 3589e1051a39Sopenharmony_ci stp $output_l3, $output_h3, [$output_ptr], #16 @ AES block 4k+3 - store result 3590e1051a39Sopenharmony_ci 3591e1051a39Sopenharmony_ci aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 0 3592e1051a39Sopenharmony_ci eor $acc_hb, $acc_hb, $t1.16b @ GHASH block 4k+1 - high 3593e1051a39Sopenharmony_ci 3594e1051a39Sopenharmony_ci pmull $t3.1q, $t3.1d, $h34k.1d @ GHASH block 4k+1 - mid 3595e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 4k+7 3596e1051a39Sopenharmony_ci 3597e1051a39Sopenharmony_ci pmull2 $t4.1q, $res2.2d, $h2.2d @ GHASH block 4k+2 - high 3598e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $t2.16b @ GHASH block 4k+1 - low 3599e1051a39Sopenharmony_ci 3600e1051a39Sopenharmony_ci aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 0 3601e1051a39Sopenharmony_ci 3602e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t3.16b @ GHASH block 4k+1 - mid 3603e1051a39Sopenharmony_ci mov $t6d, $res2.d[1] @ GHASH block 4k+2 - mid 3604e1051a39Sopenharmony_ci 3605e1051a39Sopenharmony_ci aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 1 3606e1051a39Sopenharmony_ci 3607e1051a39Sopenharmony_ci aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 1 3608e1051a39Sopenharmony_ci eor $acc_hb, $acc_hb, $t4.16b @ GHASH block 4k+2 - high 3609e1051a39Sopenharmony_ci 3610e1051a39Sopenharmony_ci eor $t6.8b, $t6.8b, $res2.8b @ GHASH block 4k+2 - mid 3611e1051a39Sopenharmony_ci 3612e1051a39Sopenharmony_ci pmull $t5.1q, $res2.1d, $h2.1d @ GHASH block 4k+2 - low 3613e1051a39Sopenharmony_ci 3614e1051a39Sopenharmony_ci aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 2 3615e1051a39Sopenharmony_ci mov $t9d, $res3.d[1] @ GHASH block 4k+3 - mid 3616e1051a39Sopenharmony_ci 3617e1051a39Sopenharmony_ci aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 2 3618e1051a39Sopenharmony_ci ins $t6.d[1], $t6.d[0] @ GHASH block 4k+2 - mid 3619e1051a39Sopenharmony_ci 3620e1051a39Sopenharmony_ci pmull $t8.1q, $res3.1d, $h1.1d @ GHASH block 4k+3 - low 3621e1051a39Sopenharmony_ci 3622e1051a39Sopenharmony_ci aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 3 3623e1051a39Sopenharmony_ci eor $t9.8b, $t9.8b, $res3.8b @ GHASH block 4k+3 - mid 3624e1051a39Sopenharmony_ci 3625e1051a39Sopenharmony_ci aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 3 3626e1051a39Sopenharmony_ci 3627e1051a39Sopenharmony_ci pmull2 $t6.1q, $t6.2d, $h12k.2d @ GHASH block 4k+2 - mid 3628e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $t5.16b @ GHASH block 4k+2 - low 3629e1051a39Sopenharmony_ci 3630e1051a39Sopenharmony_ci aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 4 3631e1051a39Sopenharmony_ci 3632e1051a39Sopenharmony_ci pmull2 $t7.1q, $res3.2d, $h1.2d @ GHASH block 4k+3 - high 3633e1051a39Sopenharmony_ci movi $mod_constant.8b, #0xc2 3634e1051a39Sopenharmony_ci 3635e1051a39Sopenharmony_ci pmull $t9.1q, $t9.1d, $h12k.1d @ GHASH block 4k+3 - mid 3636e1051a39Sopenharmony_ci 3637e1051a39Sopenharmony_ci aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 3 3638e1051a39Sopenharmony_ci 3639e1051a39Sopenharmony_ci shl $mod_constantd, $mod_constantd, #56 @ mod_constant 3640e1051a39Sopenharmony_ci eor $acc_hb, $acc_hb, $t7.16b @ GHASH block 4k+3 - high 3641e1051a39Sopenharmony_ci 3642e1051a39Sopenharmony_ci aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 5 3643e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t6.16b @ GHASH block 4k+2 - mid 3644e1051a39Sopenharmony_ci 3645e1051a39Sopenharmony_ci aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 4 3646e1051a39Sopenharmony_ci 3647e1051a39Sopenharmony_ci pmull $mod_t.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid 3648e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $t8.16b @ GHASH block 4k+3 - low 3649e1051a39Sopenharmony_ci 3650e1051a39Sopenharmony_ci aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 6 3651e1051a39Sopenharmony_ci 3652e1051a39Sopenharmony_ci aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 3 3653e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t9.16b @ GHASH block 4k+3 - mid 3654e1051a39Sopenharmony_ci 3655e1051a39Sopenharmony_ci aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 5 3656e1051a39Sopenharmony_ci 3657e1051a39Sopenharmony_ci aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 7 3658e1051a39Sopenharmony_ci eor $t9.16b, $acc_lb, $acc_hb @ MODULO - karatsuba tidy up 3659e1051a39Sopenharmony_ci 3660e1051a39Sopenharmony_ci aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 4 3661e1051a39Sopenharmony_ci 3662e1051a39Sopenharmony_ci aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 6 3663e1051a39Sopenharmony_ci ext $acc_hb, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment 3664e1051a39Sopenharmony_ci 3665e1051a39Sopenharmony_ci aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 8 3666e1051a39Sopenharmony_ci 3667e1051a39Sopenharmony_ci aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 5 3668e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t9.16b @ MODULO - karatsuba tidy up 3669e1051a39Sopenharmony_ci 3670e1051a39Sopenharmony_ci aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 4 3671e1051a39Sopenharmony_ci 3672e1051a39Sopenharmony_ci aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 7 3673e1051a39Sopenharmony_ci 3674e1051a39Sopenharmony_ci aese $ctr0b, $rk9 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 9 3675e1051a39Sopenharmony_ci 3676e1051a39Sopenharmony_ci aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 5 3677e1051a39Sopenharmony_ci 3678e1051a39Sopenharmony_ci aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 6 3679e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $mod_t.16b @ MODULO - fold into mid 3680e1051a39Sopenharmony_ci 3681e1051a39Sopenharmony_ci aese $ctr0b, $rk10 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 10 3682e1051a39Sopenharmony_ci 3683e1051a39Sopenharmony_ci aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 6 3684e1051a39Sopenharmony_ci 3685e1051a39Sopenharmony_ci aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 7 3686e1051a39Sopenharmony_ci 3687e1051a39Sopenharmony_ci aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 8 3688e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $acc_hb @ MODULO - fold into mid 3689e1051a39Sopenharmony_ci 3690e1051a39Sopenharmony_ci aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 7 3691e1051a39Sopenharmony_ci 3692e1051a39Sopenharmony_ci aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 8 3693e1051a39Sopenharmony_ci 3694e1051a39Sopenharmony_ci aese $ctr2b, $rk9 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 9 3695e1051a39Sopenharmony_ci 3696e1051a39Sopenharmony_ci aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 8 3697e1051a39Sopenharmony_ci 3698e1051a39Sopenharmony_ci aese $ctr3b, $rk9 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 9 3699e1051a39Sopenharmony_ci 3700e1051a39Sopenharmony_ci pmull $mod_constant.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low 3701e1051a39Sopenharmony_ci 3702e1051a39Sopenharmony_ci aese $ctr1b, $rk9 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 9 3703e1051a39Sopenharmony_ci 3704e1051a39Sopenharmony_ci aese $ctr2b, $rk10 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 10 3705e1051a39Sopenharmony_ci 3706e1051a39Sopenharmony_ci aese $ctr3b, $rk10 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 10 3707e1051a39Sopenharmony_ci ext $acc_mb, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment 3708e1051a39Sopenharmony_ci 3709e1051a39Sopenharmony_ci aese $ctr1b, $rk10 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 10 3710e1051a39Sopenharmony_ci 3711e1051a39Sopenharmony_ci aese $ctr0b, $rk11 3712e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $mod_constant.16b @ MODULO - fold into low 3713e1051a39Sopenharmony_ci 3714e1051a39Sopenharmony_ci aese $ctr2b, $rk11 3715e1051a39Sopenharmony_ci 3716e1051a39Sopenharmony_ci aese $ctr1b, $rk11 3717e1051a39Sopenharmony_ci 3718e1051a39Sopenharmony_ci aese $ctr3b, $rk11 3719e1051a39Sopenharmony_ci 3720e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $acc_mb @ MODULO - fold into low 3721e1051a39Sopenharmony_ci .L192_dec_tail: @ TAIL 3722e1051a39Sopenharmony_ci 3723e1051a39Sopenharmony_ci sub $main_end_input_ptr, $end_input_ptr, $input_ptr @ main_end_input_ptr is number of bytes left to process 3724e1051a39Sopenharmony_ci ld1 { $res1b}, [$input_ptr], #16 @ AES block 4k+4 - load ciphertext 3725e1051a39Sopenharmony_ci 3726e1051a39Sopenharmony_ci eor $ctr0b, $res1b, $ctr0b @ AES block 4k+4 - result 3727e1051a39Sopenharmony_ci 3728e1051a39Sopenharmony_ci mov $output_h0, $ctr0.d[1] @ AES block 4k+4 - mov high 3729e1051a39Sopenharmony_ci 3730e1051a39Sopenharmony_ci mov $output_l0, $ctr0.d[0] @ AES block 4k+4 - mov low 3731e1051a39Sopenharmony_ci 3732e1051a39Sopenharmony_ci ext $t0.16b, $acc_lb, $acc_lb, #8 @ prepare final partial tag 3733e1051a39Sopenharmony_ci 3734e1051a39Sopenharmony_ci cmp $main_end_input_ptr, #48 3735e1051a39Sopenharmony_ci 3736e1051a39Sopenharmony_ci eor $output_h0, $output_h0, $rk12_h @ AES block 4k+4 - round 12 high 3737e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 3738e1051a39Sopenharmony_ci rev $output_h0, $output_h0 3739e1051a39Sopenharmony_ci#endif 3740e1051a39Sopenharmony_ci eor $output_l0, $output_l0, $rk12_l @ AES block 4k+4 - round 12 low 3741e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 3742e1051a39Sopenharmony_ci rev $output_l0, $output_l0 3743e1051a39Sopenharmony_ci#endif 3744e1051a39Sopenharmony_ci b.gt .L192_dec_blocks_more_than_3 3745e1051a39Sopenharmony_ci 3746e1051a39Sopenharmony_ci movi $acc_l.8b, #0 3747e1051a39Sopenharmony_ci movi $acc_h.8b, #0 3748e1051a39Sopenharmony_ci 3749e1051a39Sopenharmony_ci mov $ctr3b, $ctr2b 3750e1051a39Sopenharmony_ci mov $ctr2b, $ctr1b 3751e1051a39Sopenharmony_ci sub $rctr32w, $rctr32w, #1 3752e1051a39Sopenharmony_ci 3753e1051a39Sopenharmony_ci movi $acc_m.8b, #0 3754e1051a39Sopenharmony_ci cmp $main_end_input_ptr, #32 3755e1051a39Sopenharmony_ci b.gt .L192_dec_blocks_more_than_2 3756e1051a39Sopenharmony_ci 3757e1051a39Sopenharmony_ci mov $ctr3b, $ctr1b 3758e1051a39Sopenharmony_ci cmp $main_end_input_ptr, #16 3759e1051a39Sopenharmony_ci sub $rctr32w, $rctr32w, #1 3760e1051a39Sopenharmony_ci 3761e1051a39Sopenharmony_ci b.gt .L192_dec_blocks_more_than_1 3762e1051a39Sopenharmony_ci 3763e1051a39Sopenharmony_ci sub $rctr32w, $rctr32w, #1 3764e1051a39Sopenharmony_ci b .L192_dec_blocks_less_than_1 3765e1051a39Sopenharmony_ci .L192_dec_blocks_more_than_3: @ blocks left > 3 3766e1051a39Sopenharmony_ci rev64 $res0b, $res1b @ GHASH final-3 block 3767e1051a39Sopenharmony_ci ld1 { $res1b}, [$input_ptr], #16 @ AES final-2 block - load ciphertext 3768e1051a39Sopenharmony_ci 3769e1051a39Sopenharmony_ci stp $output_l0, $output_h0, [$output_ptr], #16 @ AES final-3 block - store result 3770e1051a39Sopenharmony_ci 3771e1051a39Sopenharmony_ci eor $res0b, $res0b, $t0.16b @ feed in partial tag 3772e1051a39Sopenharmony_ci 3773e1051a39Sopenharmony_ci eor $ctr0b, $res1b, $ctr1b @ AES final-2 block - result 3774e1051a39Sopenharmony_ci 3775e1051a39Sopenharmony_ci pmull $acc_l.1q, $res0.1d, $h4.1d @ GHASH final-3 block - low 3776e1051a39Sopenharmony_ci mov $output_l0, $ctr0.d[0] @ AES final-2 block - mov low 3777e1051a39Sopenharmony_ci mov $rk4d, $res0.d[1] @ GHASH final-3 block - mid 3778e1051a39Sopenharmony_ci 3779e1051a39Sopenharmony_ci mov $output_h0, $ctr0.d[1] @ AES final-2 block - mov high 3780e1051a39Sopenharmony_ci 3781e1051a39Sopenharmony_ci mov $acc_md, $h34k.d[1] @ GHASH final-3 block - mid 3782e1051a39Sopenharmony_ci eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-3 block - mid 3783e1051a39Sopenharmony_ci 3784e1051a39Sopenharmony_ci pmull2 $acc_h.1q, $res0.2d, $h4.2d @ GHASH final-3 block - high 3785e1051a39Sopenharmony_ci 3786e1051a39Sopenharmony_ci eor $output_l0, $output_l0, $rk12_l @ AES final-2 block - round 12 low 3787e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 3788e1051a39Sopenharmony_ci rev $output_l0, $output_l0 3789e1051a39Sopenharmony_ci#endif 3790e1051a39Sopenharmony_ci movi $t0.8b, #0 @ suppress further partial tag feed in 3791e1051a39Sopenharmony_ci 3792e1051a39Sopenharmony_ci pmull $acc_m.1q, $rk4v.1d, $acc_m.1d @ GHASH final-3 block - mid 3793e1051a39Sopenharmony_ci eor $output_h0, $output_h0, $rk12_h @ AES final-2 block - round 12 high 3794e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 3795e1051a39Sopenharmony_ci rev $output_h0, $output_h0 3796e1051a39Sopenharmony_ci#endif 3797e1051a39Sopenharmony_ci .L192_dec_blocks_more_than_2: @ blocks left > 2 3798e1051a39Sopenharmony_ci 3799e1051a39Sopenharmony_ci rev64 $res0b, $res1b @ GHASH final-2 block 3800e1051a39Sopenharmony_ci ld1 { $res1b}, [$input_ptr], #16 @ AES final-1 block - load ciphertext 3801e1051a39Sopenharmony_ci 3802e1051a39Sopenharmony_ci eor $res0b, $res0b, $t0.16b @ feed in partial tag 3803e1051a39Sopenharmony_ci 3804e1051a39Sopenharmony_ci movi $t0.8b, #0 @ suppress further partial tag feed in 3805e1051a39Sopenharmony_ci 3806e1051a39Sopenharmony_ci eor $ctr0b, $res1b, $ctr2b @ AES final-1 block - result 3807e1051a39Sopenharmony_ci 3808e1051a39Sopenharmony_ci mov $rk4d, $res0.d[1] @ GHASH final-2 block - mid 3809e1051a39Sopenharmony_ci 3810e1051a39Sopenharmony_ci pmull $rk3q1, $res0.1d, $h3.1d @ GHASH final-2 block - low 3811e1051a39Sopenharmony_ci 3812e1051a39Sopenharmony_ci stp $output_l0, $output_h0, [$output_ptr], #16 @ AES final-2 block - store result 3813e1051a39Sopenharmony_ci 3814e1051a39Sopenharmony_ci eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-2 block - mid 3815e1051a39Sopenharmony_ci mov $output_h0, $ctr0.d[1] @ AES final-1 block - mov high 3816e1051a39Sopenharmony_ci 3817e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $rk3 @ GHASH final-2 block - low 3818e1051a39Sopenharmony_ci mov $output_l0, $ctr0.d[0] @ AES final-1 block - mov low 3819e1051a39Sopenharmony_ci 3820e1051a39Sopenharmony_ci pmull2 $rk2q1, $res0.2d, $h3.2d @ GHASH final-2 block - high 3821e1051a39Sopenharmony_ci 3822e1051a39Sopenharmony_ci pmull $rk4v.1q, $rk4v.1d, $h34k.1d @ GHASH final-2 block - mid 3823e1051a39Sopenharmony_ci 3824e1051a39Sopenharmony_ci eor $acc_hb, $acc_hb, $rk2 @ GHASH final-2 block - high 3825e1051a39Sopenharmony_ci eor $output_h0, $output_h0, $rk12_h @ AES final-1 block - round 12 high 3826e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 3827e1051a39Sopenharmony_ci rev $output_h0, $output_h0 3828e1051a39Sopenharmony_ci#endif 3829e1051a39Sopenharmony_ci eor $output_l0, $output_l0, $rk12_l @ AES final-1 block - round 12 low 3830e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 3831e1051a39Sopenharmony_ci rev $output_l0, $output_l0 3832e1051a39Sopenharmony_ci#endif 3833e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-2 block - mid 3834e1051a39Sopenharmony_ci .L192_dec_blocks_more_than_1: @ blocks left > 1 3835e1051a39Sopenharmony_ci 3836e1051a39Sopenharmony_ci rev64 $res0b, $res1b @ GHASH final-1 block 3837e1051a39Sopenharmony_ci 3838e1051a39Sopenharmony_ci eor $res0b, $res0b, $t0.16b @ feed in partial tag 3839e1051a39Sopenharmony_ci ld1 { $res1b}, [$input_ptr], #16 @ AES final block - load ciphertext 3840e1051a39Sopenharmony_ci 3841e1051a39Sopenharmony_ci mov $rk4d, $res0.d[1] @ GHASH final-1 block - mid 3842e1051a39Sopenharmony_ci 3843e1051a39Sopenharmony_ci pmull2 $rk2q1, $res0.2d, $h2.2d @ GHASH final-1 block - high 3844e1051a39Sopenharmony_ci 3845e1051a39Sopenharmony_ci eor $ctr0b, $res1b, $ctr3b @ AES final block - result 3846e1051a39Sopenharmony_ci stp $output_l0, $output_h0, [$output_ptr], #16 @ AES final-1 block - store result 3847e1051a39Sopenharmony_ci 3848e1051a39Sopenharmony_ci eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-1 block - mid 3849e1051a39Sopenharmony_ci 3850e1051a39Sopenharmony_ci eor $acc_hb, $acc_hb, $rk2 @ GHASH final-1 block - high 3851e1051a39Sopenharmony_ci 3852e1051a39Sopenharmony_ci pmull $rk3q1, $res0.1d, $h2.1d @ GHASH final-1 block - low 3853e1051a39Sopenharmony_ci mov $output_h0, $ctr0.d[1] @ AES final block - mov high 3854e1051a39Sopenharmony_ci 3855e1051a39Sopenharmony_ci ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-1 block - mid 3856e1051a39Sopenharmony_ci mov $output_l0, $ctr0.d[0] @ AES final block - mov low 3857e1051a39Sopenharmony_ci 3858e1051a39Sopenharmony_ci pmull2 $rk4v.1q, $rk4v.2d, $h12k.2d @ GHASH final-1 block - mid 3859e1051a39Sopenharmony_ci 3860e1051a39Sopenharmony_ci movi $t0.8b, #0 @ suppress further partial tag feed in 3861e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $rk3 @ GHASH final-1 block - low 3862e1051a39Sopenharmony_ci eor $output_h0, $output_h0, $rk12_h @ AES final block - round 12 high 3863e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 3864e1051a39Sopenharmony_ci rev $output_h0, $output_h0 3865e1051a39Sopenharmony_ci#endif 3866e1051a39Sopenharmony_ci eor $output_l0, $output_l0, $rk12_l @ AES final block - round 12 low 3867e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 3868e1051a39Sopenharmony_ci rev $output_l0, $output_l0 3869e1051a39Sopenharmony_ci#endif 3870e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-1 block - mid 3871e1051a39Sopenharmony_ci .L192_dec_blocks_less_than_1: @ blocks left <= 1 3872e1051a39Sopenharmony_ci 3873e1051a39Sopenharmony_ci mvn $rk12_l, xzr @ rk12_l = 0xffffffffffffffff 3874e1051a39Sopenharmony_ci ldp $end_input_ptr, $main_end_input_ptr, [$output_ptr] @ load existing bytes we need to not overwrite 3875e1051a39Sopenharmony_ci and $bit_length, $bit_length, #127 @ bit_length %= 128 3876e1051a39Sopenharmony_ci 3877e1051a39Sopenharmony_ci sub $bit_length, $bit_length, #128 @ bit_length -= 128 3878e1051a39Sopenharmony_ci 3879e1051a39Sopenharmony_ci neg $bit_length, $bit_length @ bit_length = 128 - #bits in input (in range [1,128]) 3880e1051a39Sopenharmony_ci 3881e1051a39Sopenharmony_ci and $bit_length, $bit_length, #127 @ bit_length %= 128 3882e1051a39Sopenharmony_ci mvn $rk12_h, xzr @ rk12_h = 0xffffffffffffffff 3883e1051a39Sopenharmony_ci 3884e1051a39Sopenharmony_ci lsr $rk12_h, $rk12_h, $bit_length @ rk12_h is mask for top 64b of last block 3885e1051a39Sopenharmony_ci cmp $bit_length, #64 3886e1051a39Sopenharmony_ci 3887e1051a39Sopenharmony_ci csel $ctr32x, $rk12_l, $rk12_h, lt 3888e1051a39Sopenharmony_ci csel $ctr96_b64x, $rk12_h, xzr, lt 3889e1051a39Sopenharmony_ci 3890e1051a39Sopenharmony_ci fmov $ctr0d, $ctr32x @ ctr0b is mask for last block 3891e1051a39Sopenharmony_ci and $output_l0, $output_l0, $ctr32x 3892e1051a39Sopenharmony_ci bic $end_input_ptr, $end_input_ptr, $ctr32x @ mask out low existing bytes 3893e1051a39Sopenharmony_ci 3894e1051a39Sopenharmony_ci orr $output_l0, $output_l0, $end_input_ptr 3895e1051a39Sopenharmony_ci mov $ctr0.d[1], $ctr96_b64x 3896e1051a39Sopenharmony_ci#ifndef __AARCH64EB__ 3897e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w 3898e1051a39Sopenharmony_ci#else 3899e1051a39Sopenharmony_ci mov $ctr32w, $rctr32w 3900e1051a39Sopenharmony_ci#endif 3901e1051a39Sopenharmony_ci 3902e1051a39Sopenharmony_ci and $res1b, $res1b, $ctr0b @ possibly partial last block has zeroes in highest bits 3903e1051a39Sopenharmony_ci str $ctr32w, [$counter, #12] @ store the updated counter 3904e1051a39Sopenharmony_ci 3905e1051a39Sopenharmony_ci rev64 $res0b, $res1b @ GHASH final block 3906e1051a39Sopenharmony_ci 3907e1051a39Sopenharmony_ci eor $res0b, $res0b, $t0.16b @ feed in partial tag 3908e1051a39Sopenharmony_ci bic $main_end_input_ptr, $main_end_input_ptr, $ctr96_b64x @ mask out high existing bytes 3909e1051a39Sopenharmony_ci 3910e1051a39Sopenharmony_ci and $output_h0, $output_h0, $ctr96_b64x 3911e1051a39Sopenharmony_ci 3912e1051a39Sopenharmony_ci pmull2 $rk2q1, $res0.2d, $h1.2d @ GHASH final block - high 3913e1051a39Sopenharmony_ci mov $t0d, $res0.d[1] @ GHASH final block - mid 3914e1051a39Sopenharmony_ci 3915e1051a39Sopenharmony_ci pmull $rk3q1, $res0.1d, $h1.1d @ GHASH final block - low 3916e1051a39Sopenharmony_ci 3917e1051a39Sopenharmony_ci eor $t0.8b, $t0.8b, $res0.8b @ GHASH final block - mid 3918e1051a39Sopenharmony_ci 3919e1051a39Sopenharmony_ci eor $acc_hb, $acc_hb, $rk2 @ GHASH final block - high 3920e1051a39Sopenharmony_ci 3921e1051a39Sopenharmony_ci pmull $t0.1q, $t0.1d, $h12k.1d @ GHASH final block - mid 3922e1051a39Sopenharmony_ci 3923e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $rk3 @ GHASH final block - low 3924e1051a39Sopenharmony_ci 3925e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t0.16b @ GHASH final block - mid 3926e1051a39Sopenharmony_ci movi $mod_constant.8b, #0xc2 3927e1051a39Sopenharmony_ci 3928e1051a39Sopenharmony_ci eor $t9.16b, $acc_lb, $acc_hb @ MODULO - karatsuba tidy up 3929e1051a39Sopenharmony_ci 3930e1051a39Sopenharmony_ci shl $mod_constantd, $mod_constantd, #56 @ mod_constant 3931e1051a39Sopenharmony_ci 3932e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t9.16b @ MODULO - karatsuba tidy up 3933e1051a39Sopenharmony_ci 3934e1051a39Sopenharmony_ci pmull $mod_t.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid 3935e1051a39Sopenharmony_ci orr $output_h0, $output_h0, $main_end_input_ptr 3936e1051a39Sopenharmony_ci stp $output_l0, $output_h0, [$output_ptr] 3937e1051a39Sopenharmony_ci 3938e1051a39Sopenharmony_ci ext $acc_hb, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment 3939e1051a39Sopenharmony_ci 3940e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $mod_t.16b @ MODULO - fold into mid 3941e1051a39Sopenharmony_ci 3942e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $acc_hb @ MODULO - fold into mid 3943e1051a39Sopenharmony_ci 3944e1051a39Sopenharmony_ci pmull $mod_constant.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low 3945e1051a39Sopenharmony_ci 3946e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $mod_constant.16b @ MODULO - fold into low 3947e1051a39Sopenharmony_ci 3948e1051a39Sopenharmony_ci ext $acc_mb, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment 3949e1051a39Sopenharmony_ci 3950e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $acc_mb @ MODULO - fold into low 3951e1051a39Sopenharmony_ci ext $acc_lb, $acc_lb, $acc_lb, #8 3952e1051a39Sopenharmony_ci rev64 $acc_lb, $acc_lb 3953e1051a39Sopenharmony_ci mov x0, $len 3954e1051a39Sopenharmony_ci st1 { $acc_l.16b }, [$current_tag] 3955e1051a39Sopenharmony_ci 3956e1051a39Sopenharmony_ci ldp x21, x22, [sp, #16] 3957e1051a39Sopenharmony_ci ldp x23, x24, [sp, #32] 3958e1051a39Sopenharmony_ci ldp d8, d9, [sp, #48] 3959e1051a39Sopenharmony_ci ldp d10, d11, [sp, #64] 3960e1051a39Sopenharmony_ci ldp d12, d13, [sp, #80] 3961e1051a39Sopenharmony_ci ldp d14, d15, [sp, #96] 3962e1051a39Sopenharmony_ci ldp x19, x20, [sp], #112 3963e1051a39Sopenharmony_ci ret 3964e1051a39Sopenharmony_ci 3965e1051a39Sopenharmony_ci.L192_dec_ret: 3966e1051a39Sopenharmony_ci mov w0, #0x0 3967e1051a39Sopenharmony_ci ret 3968e1051a39Sopenharmony_ci.size aes_gcm_dec_192_kernel,.-aes_gcm_dec_192_kernel 3969e1051a39Sopenharmony_ci___ 3970e1051a39Sopenharmony_ci} 3971e1051a39Sopenharmony_ci 3972e1051a39Sopenharmony_ci{ 3973e1051a39Sopenharmony_cimy ($end_input_ptr,$main_end_input_ptr,$input_l0,$input_h0)=map("x$_",(4..7)); 3974e1051a39Sopenharmony_cimy ($input_l1,$input_h1,$input_l2,$input_h2,$input_l3,$input_h3)=map("x$_",(19..24)); 3975e1051a39Sopenharmony_cimy ($output_l1,$output_h1,$output_l2,$output_h2,$output_l3,$output_h3)=map("x$_",(19..24)); 3976e1051a39Sopenharmony_cimy ($output_l0,$output_h0)=map("x$_",(6..7)); 3977e1051a39Sopenharmony_ci 3978e1051a39Sopenharmony_cimy $ctr32w="w9"; 3979e1051a39Sopenharmony_cimy ($ctr32x,$ctr96_b64x,$ctr96_t32x,$rctr32x,$rk14_l,$rk14_h,$len)=map("x$_",(9..15)); 3980e1051a39Sopenharmony_cimy ($ctr96_t32w,$rctr32w)=map("w$_",(11..12)); 3981e1051a39Sopenharmony_ci 3982e1051a39Sopenharmony_cimy ($ctr0b,$ctr1b,$ctr2b,$ctr3b,$res0b,$res1b,$res2b,$res3b)=map("v$_.16b",(0..7)); 3983e1051a39Sopenharmony_cimy ($ctr0,$ctr1,$ctr2,$ctr3,$res0,$res1,$res2,$res3)=map("v$_",(0..7)); 3984e1051a39Sopenharmony_cimy ($ctr0d,$ctr1d,$ctr2d,$ctr3d,$res0d,$res1d,$res2d,$res3d)=map("d$_",(0..7)); 3985e1051a39Sopenharmony_cimy ($res0q,$res1q,$res2q,$res3q)=map("q$_",(4..7)); 3986e1051a39Sopenharmony_ci 3987e1051a39Sopenharmony_cimy ($acc_hb,$acc_mb,$acc_lb)=map("v$_.16b",(9..11)); 3988e1051a39Sopenharmony_cimy ($acc_h,$acc_m,$acc_l)=map("v$_",(9..11)); 3989e1051a39Sopenharmony_cimy ($acc_hd,$acc_md,$acc_ld)=map("d$_",(9..11)); 3990e1051a39Sopenharmony_ci 3991e1051a39Sopenharmony_cimy ($h1,$h2,$h3,$h4,$h12k,$h34k)=map("v$_",(12..17)); 3992e1051a39Sopenharmony_cimy ($h1q,$h2q,$h3q,$h4q)=map("q$_",(12..15)); 3993e1051a39Sopenharmony_cimy ($h1b,$h2b,$h3b,$h4b)=map("v$_.16b",(12..15)); 3994e1051a39Sopenharmony_ci 3995e1051a39Sopenharmony_cimy $t0="v8"; 3996e1051a39Sopenharmony_cimy $t0d="d8"; 3997e1051a39Sopenharmony_cimy $t1="v4"; 3998e1051a39Sopenharmony_cimy $t1d="d4"; 3999e1051a39Sopenharmony_cimy $t2="v8"; 4000e1051a39Sopenharmony_cimy $t2d="d8"; 4001e1051a39Sopenharmony_cimy $t3="v4"; 4002e1051a39Sopenharmony_cimy $t3d="d4"; 4003e1051a39Sopenharmony_cimy $t4="v4"; 4004e1051a39Sopenharmony_cimy $t4d="d4"; 4005e1051a39Sopenharmony_cimy $t5="v5"; 4006e1051a39Sopenharmony_cimy $t5d="d5"; 4007e1051a39Sopenharmony_cimy $t6="v8"; 4008e1051a39Sopenharmony_cimy $t6d="d8"; 4009e1051a39Sopenharmony_cimy $t7="v5"; 4010e1051a39Sopenharmony_cimy $t7d="d5"; 4011e1051a39Sopenharmony_cimy $t8="v6"; 4012e1051a39Sopenharmony_cimy $t8d="d6"; 4013e1051a39Sopenharmony_cimy $t9="v4"; 4014e1051a39Sopenharmony_cimy $t9d="d4"; 4015e1051a39Sopenharmony_ci 4016e1051a39Sopenharmony_cimy ($ctr_t0,$ctr_t1,$ctr_t2,$ctr_t3)=map("v$_",(4..7)); 4017e1051a39Sopenharmony_cimy ($ctr_t0d,$ctr_t1d,$ctr_t2d,$ctr_t3d)=map("d$_",(4..7)); 4018e1051a39Sopenharmony_cimy ($ctr_t0b,$ctr_t1b,$ctr_t2b,$ctr_t3b)=map("v$_.16b",(4..7)); 4019e1051a39Sopenharmony_ci 4020e1051a39Sopenharmony_cimy $mod_constantd="d8"; 4021e1051a39Sopenharmony_cimy $mod_constant="v8"; 4022e1051a39Sopenharmony_cimy $mod_t="v7"; 4023e1051a39Sopenharmony_ci 4024e1051a39Sopenharmony_cimy ($rk0,$rk1,$rk2,$rk3,$rk4,$rk5,$rk6,$rk7,$rk8,$rk9,$rk10,$rk11,$rk12,$rk13)=map("v$_.16b",(18..31)); 4025e1051a39Sopenharmony_cimy ($rk0s,$rk1s,$rk2s,$rk3s,$rk4s,$rk5s,$rk6s,$rk7s,$rk8s,$rk9s,$rk10s,$rk11s,$rk12s,$rk13s)=map("v$_.4s",(18..31)); 4026e1051a39Sopenharmony_cimy ($rk0q,$rk1q,$rk2q,$rk3q,$rk4q,$rk5q,$rk6q,$rk7q,$rk8q,$rk9q,$rk10q,$rk11q,$rk12q,$rk13q)=map("q$_",(18..31)); 4027e1051a39Sopenharmony_cimy $rk2q1="v20.1q"; 4028e1051a39Sopenharmony_cimy $rk3q1="v21.1q"; 4029e1051a39Sopenharmony_cimy $rk4v="v22"; 4030e1051a39Sopenharmony_cimy $rk4d="d22"; 4031e1051a39Sopenharmony_ci 4032e1051a39Sopenharmony_ci######################################################################################### 4033e1051a39Sopenharmony_ci# size_t aes_gcm_enc_256_kernel(const unsigned char *in, 4034e1051a39Sopenharmony_ci# size_t len, 4035e1051a39Sopenharmony_ci# unsigned char *out, 4036e1051a39Sopenharmony_ci# const void *key, 4037e1051a39Sopenharmony_ci# unsigned char ivec[16], 4038e1051a39Sopenharmony_ci# u64 *Xi); 4039e1051a39Sopenharmony_ci# 4040e1051a39Sopenharmony_ci$code.=<<___; 4041e1051a39Sopenharmony_ci.global aes_gcm_enc_256_kernel 4042e1051a39Sopenharmony_ci.type aes_gcm_enc_256_kernel,%function 4043e1051a39Sopenharmony_ci.align 4 4044e1051a39Sopenharmony_ciaes_gcm_enc_256_kernel: 4045e1051a39Sopenharmony_ci cbz x1, .L256_enc_ret 4046e1051a39Sopenharmony_ci stp x19, x20, [sp, #-112]! 4047e1051a39Sopenharmony_ci mov x16, x4 4048e1051a39Sopenharmony_ci mov x8, x5 4049e1051a39Sopenharmony_ci stp x21, x22, [sp, #16] 4050e1051a39Sopenharmony_ci stp x23, x24, [sp, #32] 4051e1051a39Sopenharmony_ci stp d8, d9, [sp, #48] 4052e1051a39Sopenharmony_ci stp d10, d11, [sp, #64] 4053e1051a39Sopenharmony_ci stp d12, d13, [sp, #80] 4054e1051a39Sopenharmony_ci stp d14, d15, [sp, #96] 4055e1051a39Sopenharmony_ci 4056e1051a39Sopenharmony_ci add $end_input_ptr, $input_ptr, $bit_length, lsr #3 @ end_input_ptr 4057e1051a39Sopenharmony_ci lsr $main_end_input_ptr, $bit_length, #3 @ byte_len 4058e1051a39Sopenharmony_ci mov $len, $main_end_input_ptr 4059e1051a39Sopenharmony_ci ldp $ctr96_b64x, $ctr96_t32x, [$counter] @ ctr96_b64, ctr96_t32 4060e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 4061e1051a39Sopenharmony_ci rev $ctr96_b64x, $ctr96_b64x 4062e1051a39Sopenharmony_ci rev $ctr96_t32x, $ctr96_t32x 4063e1051a39Sopenharmony_ci#endif 4064e1051a39Sopenharmony_ci ldp $rk14_l, $rk14_h, [$cc, #224] @ load rk14 4065e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 4066e1051a39Sopenharmony_ci ror $rk14_l, $rk14_l, #32 4067e1051a39Sopenharmony_ci ror $rk14_h, $rk14_h, #32 4068e1051a39Sopenharmony_ci#endif 4069e1051a39Sopenharmony_ci ld1 { $ctr0b}, [$counter] @ special case vector load initial counter so we can start first AES block as quickly as possible 4070e1051a39Sopenharmony_ci sub $main_end_input_ptr, $main_end_input_ptr, #1 @ byte_len - 1 4071e1051a39Sopenharmony_ci 4072e1051a39Sopenharmony_ci ld1 {$rk0s}, [$cc], #16 @ load rk0 4073e1051a39Sopenharmony_ci and $main_end_input_ptr, $main_end_input_ptr, #0xffffffffffffffc0 @ number of bytes to be processed in main loop (at least 1 byte must be handled by tail) 4074e1051a39Sopenharmony_ci 4075e1051a39Sopenharmony_ci ld1 {$rk1s}, [$cc], #16 @ load rk1 4076e1051a39Sopenharmony_ci add $main_end_input_ptr, $main_end_input_ptr, $input_ptr 4077e1051a39Sopenharmony_ci 4078e1051a39Sopenharmony_ci lsr $rctr32x, $ctr96_t32x, #32 4079e1051a39Sopenharmony_ci fmov $ctr2d, $ctr96_b64x @ CTR block 2 4080e1051a39Sopenharmony_ci orr $ctr96_t32w, $ctr96_t32w, $ctr96_t32w 4081e1051a39Sopenharmony_ci 4082e1051a39Sopenharmony_ci rev $rctr32w, $rctr32w @ rev_ctr32 4083e1051a39Sopenharmony_ci cmp $input_ptr, $main_end_input_ptr @ check if we have <= 4 blocks 4084e1051a39Sopenharmony_ci fmov $ctr1d, $ctr96_b64x @ CTR block 1 4085e1051a39Sopenharmony_ci 4086e1051a39Sopenharmony_ci aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 0 4087e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ increment rev_ctr32 4088e1051a39Sopenharmony_ci 4089e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 1 4090e1051a39Sopenharmony_ci fmov $ctr3d, $ctr96_b64x @ CTR block 3 4091e1051a39Sopenharmony_ci 4092e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 1 4093e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 1 4094e1051a39Sopenharmony_ci ld1 {$rk2s}, [$cc], #16 @ load rk2 4095e1051a39Sopenharmony_ci 4096e1051a39Sopenharmony_ci fmov $ctr1.d[1], $ctr32x @ CTR block 1 4097e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 2 4098e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 2 4099e1051a39Sopenharmony_ci 4100e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 2 4101e1051a39Sopenharmony_ci ld1 {$rk3s}, [$cc], #16 @ load rk3 4102e1051a39Sopenharmony_ci 4103e1051a39Sopenharmony_ci fmov $ctr2.d[1], $ctr32x @ CTR block 2 4104e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 3 4105e1051a39Sopenharmony_ci 4106e1051a39Sopenharmony_ci aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 1 4107e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 3 4108e1051a39Sopenharmony_ci 4109e1051a39Sopenharmony_ci fmov $ctr3.d[1], $ctr32x @ CTR block 3 4110e1051a39Sopenharmony_ci 4111e1051a39Sopenharmony_ci aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 0 4112e1051a39Sopenharmony_ci ld1 {$rk4s}, [$cc], #16 @ load rk4 4113e1051a39Sopenharmony_ci 4114e1051a39Sopenharmony_ci aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 2 4115e1051a39Sopenharmony_ci ld1 {$rk5s}, [$cc], #16 @ load rk5 4116e1051a39Sopenharmony_ci 4117e1051a39Sopenharmony_ci aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 0 4118e1051a39Sopenharmony_ci ld1 {$rk6s}, [$cc], #16 @ load rk6 4119e1051a39Sopenharmony_ci 4120e1051a39Sopenharmony_ci aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 1 4121e1051a39Sopenharmony_ci ldr $h3q, [$current_tag, #80] @ load h3l | h3h 4122e1051a39Sopenharmony_ci#ifndef __AARCH64EB__ 4123e1051a39Sopenharmony_ci ext $h3b, $h3b, $h3b, #8 4124e1051a39Sopenharmony_ci#endif 4125e1051a39Sopenharmony_ci aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 0 4126e1051a39Sopenharmony_ci ld1 {$rk7s}, [$cc], #16 @ load rk7 4127e1051a39Sopenharmony_ci 4128e1051a39Sopenharmony_ci aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 1 4129e1051a39Sopenharmony_ci ld1 {$rk8s}, [$cc], #16 @ load rk8 4130e1051a39Sopenharmony_ci 4131e1051a39Sopenharmony_ci aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 2 4132e1051a39Sopenharmony_ci ldr $h2q, [$current_tag, #64] @ load h2l | h2h 4133e1051a39Sopenharmony_ci#ifndef __AARCH64EB__ 4134e1051a39Sopenharmony_ci ext $h2b, $h2b, $h2b, #8 4135e1051a39Sopenharmony_ci#endif 4136e1051a39Sopenharmony_ci aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 1 4137e1051a39Sopenharmony_ci ld1 {$rk9s}, [$cc], #16 @ load rk9 4138e1051a39Sopenharmony_ci 4139e1051a39Sopenharmony_ci aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 2 4140e1051a39Sopenharmony_ci ldr $h4q, [$current_tag, #112] @ load h4l | h4h 4141e1051a39Sopenharmony_ci#ifndef __AARCH64EB__ 4142e1051a39Sopenharmony_ci ext $h4b, $h4b, $h4b, #8 4143e1051a39Sopenharmony_ci#endif 4144e1051a39Sopenharmony_ci aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 3 4145e1051a39Sopenharmony_ci ld1 {$rk10s}, [$cc], #16 @ load rk10 4146e1051a39Sopenharmony_ci 4147e1051a39Sopenharmony_ci aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 2 4148e1051a39Sopenharmony_ci ld1 {$rk11s}, [$cc], #16 @ load rk11 4149e1051a39Sopenharmony_ci 4150e1051a39Sopenharmony_ci aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 3 4151e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 3 4152e1051a39Sopenharmony_ci 4153e1051a39Sopenharmony_ci aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 3 4154e1051a39Sopenharmony_ci 4155e1051a39Sopenharmony_ci aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 3 4156e1051a39Sopenharmony_ci ld1 { $acc_lb}, [$current_tag] 4157e1051a39Sopenharmony_ci ext $acc_lb, $acc_lb, $acc_lb, #8 4158e1051a39Sopenharmony_ci rev64 $acc_lb, $acc_lb 4159e1051a39Sopenharmony_ci 4160e1051a39Sopenharmony_ci aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 4 4161e1051a39Sopenharmony_ci 4162e1051a39Sopenharmony_ci aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 4 4163e1051a39Sopenharmony_ci 4164e1051a39Sopenharmony_ci aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 4 4165e1051a39Sopenharmony_ci 4166e1051a39Sopenharmony_ci aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 4 4167e1051a39Sopenharmony_ci 4168e1051a39Sopenharmony_ci aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 5 4169e1051a39Sopenharmony_ci 4170e1051a39Sopenharmony_ci aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 5 4171e1051a39Sopenharmony_ci 4172e1051a39Sopenharmony_ci aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 5 4173e1051a39Sopenharmony_ci 4174e1051a39Sopenharmony_ci aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 5 4175e1051a39Sopenharmony_ci 4176e1051a39Sopenharmony_ci aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 6 4177e1051a39Sopenharmony_ci trn2 $h34k.2d, $h3.2d, $h4.2d @ h4l | h3l 4178e1051a39Sopenharmony_ci 4179e1051a39Sopenharmony_ci aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 6 4180e1051a39Sopenharmony_ci ld1 {$rk12s}, [$cc], #16 @ load rk12 4181e1051a39Sopenharmony_ci 4182e1051a39Sopenharmony_ci aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 6 4183e1051a39Sopenharmony_ci ldr $h1q, [$current_tag, #32] @ load h1l | h1h 4184e1051a39Sopenharmony_ci#ifndef __AARCH64EB__ 4185e1051a39Sopenharmony_ci ext $h1b, $h1b, $h1b, #8 4186e1051a39Sopenharmony_ci#endif 4187e1051a39Sopenharmony_ci aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 6 4188e1051a39Sopenharmony_ci ld1 {$rk13s}, [$cc], #16 @ load rk13 4189e1051a39Sopenharmony_ci 4190e1051a39Sopenharmony_ci aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 7 4191e1051a39Sopenharmony_ci trn1 $acc_h.2d, $h3.2d, $h4.2d @ h4h | h3h 4192e1051a39Sopenharmony_ci 4193e1051a39Sopenharmony_ci aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 7 4194e1051a39Sopenharmony_ci 4195e1051a39Sopenharmony_ci aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 7 4196e1051a39Sopenharmony_ci 4197e1051a39Sopenharmony_ci aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 7 4198e1051a39Sopenharmony_ci trn2 $h12k.2d, $h1.2d, $h2.2d @ h2l | h1l 4199e1051a39Sopenharmony_ci 4200e1051a39Sopenharmony_ci aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 8 4201e1051a39Sopenharmony_ci 4202e1051a39Sopenharmony_ci aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 8 4203e1051a39Sopenharmony_ci 4204e1051a39Sopenharmony_ci aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 8 4205e1051a39Sopenharmony_ci 4206e1051a39Sopenharmony_ci aese $ctr1b, $rk9 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 9 4207e1051a39Sopenharmony_ci 4208e1051a39Sopenharmony_ci aese $ctr2b, $rk9 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 9 4209e1051a39Sopenharmony_ci 4210e1051a39Sopenharmony_ci aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 8 4211e1051a39Sopenharmony_ci 4212e1051a39Sopenharmony_ci aese $ctr1b, $rk10 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 10 4213e1051a39Sopenharmony_ci 4214e1051a39Sopenharmony_ci aese $ctr3b, $rk9 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 9 4215e1051a39Sopenharmony_ci 4216e1051a39Sopenharmony_ci aese $ctr0b, $rk9 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 9 4217e1051a39Sopenharmony_ci 4218e1051a39Sopenharmony_ci aese $ctr2b, $rk10 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 10 4219e1051a39Sopenharmony_ci 4220e1051a39Sopenharmony_ci aese $ctr3b, $rk10 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 10 4221e1051a39Sopenharmony_ci 4222e1051a39Sopenharmony_ci aese $ctr1b, $rk11 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 11 4223e1051a39Sopenharmony_ci 4224e1051a39Sopenharmony_ci aese $ctr2b, $rk11 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 11 4225e1051a39Sopenharmony_ci 4226e1051a39Sopenharmony_ci aese $ctr0b, $rk10 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 10 4227e1051a39Sopenharmony_ci 4228e1051a39Sopenharmony_ci aese $ctr1b, $rk12 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 12 4229e1051a39Sopenharmony_ci 4230e1051a39Sopenharmony_ci aese $ctr2b, $rk12 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 12 4231e1051a39Sopenharmony_ci 4232e1051a39Sopenharmony_ci aese $ctr0b, $rk11 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 11 4233e1051a39Sopenharmony_ci eor $h34k.16b, $h34k.16b, $acc_h.16b @ h4k | h3k 4234e1051a39Sopenharmony_ci 4235e1051a39Sopenharmony_ci aese $ctr3b, $rk11 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 11 4236e1051a39Sopenharmony_ci 4237e1051a39Sopenharmony_ci aese $ctr2b, $rk13 @ AES block 2 - round 13 4238e1051a39Sopenharmony_ci trn1 $t0.2d, $h1.2d, $h2.2d @ h2h | h1h 4239e1051a39Sopenharmony_ci 4240e1051a39Sopenharmony_ci aese $ctr0b, $rk12 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 12 4241e1051a39Sopenharmony_ci 4242e1051a39Sopenharmony_ci aese $ctr3b, $rk12 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 12 4243e1051a39Sopenharmony_ci 4244e1051a39Sopenharmony_ci aese $ctr1b, $rk13 @ AES block 1 - round 13 4245e1051a39Sopenharmony_ci 4246e1051a39Sopenharmony_ci aese $ctr0b, $rk13 @ AES block 0 - round 13 4247e1051a39Sopenharmony_ci 4248e1051a39Sopenharmony_ci aese $ctr3b, $rk13 @ AES block 3 - round 13 4249e1051a39Sopenharmony_ci eor $h12k.16b, $h12k.16b, $t0.16b @ h2k | h1k 4250e1051a39Sopenharmony_ci b.ge .L256_enc_tail @ handle tail 4251e1051a39Sopenharmony_ci 4252e1051a39Sopenharmony_ci ldp $input_l1, $input_h1, [$input_ptr, #16] @ AES block 1 - load plaintext 4253e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 4254e1051a39Sopenharmony_ci rev $input_l1, $input_l1 4255e1051a39Sopenharmony_ci rev $input_h1, $input_h1 4256e1051a39Sopenharmony_ci#endif 4257e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 4 4258e1051a39Sopenharmony_ci ldp $input_l0, $input_h0, [$input_ptr, #0] @ AES block 0 - load plaintext 4259e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 4260e1051a39Sopenharmony_ci rev $input_l0, $input_l0 4261e1051a39Sopenharmony_ci rev $input_h0, $input_h0 4262e1051a39Sopenharmony_ci#endif 4263e1051a39Sopenharmony_ci ldp $input_l3, $input_h3, [$input_ptr, #48] @ AES block 3 - load plaintext 4264e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 4265e1051a39Sopenharmony_ci rev $input_l3, $input_l3 4266e1051a39Sopenharmony_ci rev $input_h3, $input_h3 4267e1051a39Sopenharmony_ci#endif 4268e1051a39Sopenharmony_ci ldp $input_l2, $input_h2, [$input_ptr, #32] @ AES block 2 - load plaintext 4269e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 4270e1051a39Sopenharmony_ci rev $input_l2, $input_l2 4271e1051a39Sopenharmony_ci rev $input_h2, $input_h2 4272e1051a39Sopenharmony_ci#endif 4273e1051a39Sopenharmony_ci add $input_ptr, $input_ptr, #64 @ AES input_ptr update 4274e1051a39Sopenharmony_ci 4275e1051a39Sopenharmony_ci eor $input_l1, $input_l1, $rk14_l @ AES block 1 - round 14 low 4276e1051a39Sopenharmony_ci eor $input_h1, $input_h1, $rk14_h @ AES block 1 - round 14 high 4277e1051a39Sopenharmony_ci 4278e1051a39Sopenharmony_ci fmov $ctr_t1d, $input_l1 @ AES block 1 - mov low 4279e1051a39Sopenharmony_ci eor $input_l0, $input_l0, $rk14_l @ AES block 0 - round 14 low 4280e1051a39Sopenharmony_ci 4281e1051a39Sopenharmony_ci eor $input_h0, $input_h0, $rk14_h @ AES block 0 - round 14 high 4282e1051a39Sopenharmony_ci eor $input_h3, $input_h3, $rk14_h @ AES block 3 - round 14 high 4283e1051a39Sopenharmony_ci fmov $ctr_t0d, $input_l0 @ AES block 0 - mov low 4284e1051a39Sopenharmony_ci 4285e1051a39Sopenharmony_ci cmp $input_ptr, $main_end_input_ptr @ check if we have <= 8 blocks 4286e1051a39Sopenharmony_ci fmov $ctr_t0.d[1], $input_h0 @ AES block 0 - mov high 4287e1051a39Sopenharmony_ci eor $input_l3, $input_l3, $rk14_l @ AES block 3 - round 14 low 4288e1051a39Sopenharmony_ci 4289e1051a39Sopenharmony_ci eor $input_l2, $input_l2, $rk14_l @ AES block 2 - round 14 low 4290e1051a39Sopenharmony_ci fmov $ctr_t1.d[1], $input_h1 @ AES block 1 - mov high 4291e1051a39Sopenharmony_ci 4292e1051a39Sopenharmony_ci fmov $ctr_t2d, $input_l2 @ AES block 2 - mov low 4293e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 4 4294e1051a39Sopenharmony_ci 4295e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 4 4296e1051a39Sopenharmony_ci fmov $ctr_t3d, $input_l3 @ AES block 3 - mov low 4297e1051a39Sopenharmony_ci eor $input_h2, $input_h2, $rk14_h @ AES block 2 - round 14 high 4298e1051a39Sopenharmony_ci 4299e1051a39Sopenharmony_ci fmov $ctr_t2.d[1], $input_h2 @ AES block 2 - mov high 4300e1051a39Sopenharmony_ci 4301e1051a39Sopenharmony_ci eor $res0b, $ctr_t0b, $ctr0b @ AES block 0 - result 4302e1051a39Sopenharmony_ci fmov $ctr0d, $ctr96_b64x @ CTR block 4 4303e1051a39Sopenharmony_ci 4304e1051a39Sopenharmony_ci fmov $ctr0.d[1], $ctr32x @ CTR block 4 4305e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 5 4306e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 5 4307e1051a39Sopenharmony_ci 4308e1051a39Sopenharmony_ci eor $res1b, $ctr_t1b, $ctr1b @ AES block 1 - result 4309e1051a39Sopenharmony_ci fmov $ctr1d, $ctr96_b64x @ CTR block 5 4310e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 5 4311e1051a39Sopenharmony_ci 4312e1051a39Sopenharmony_ci fmov $ctr1.d[1], $ctr32x @ CTR block 5 4313e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 6 4314e1051a39Sopenharmony_ci st1 { $res0b}, [$output_ptr], #16 @ AES block 0 - store result 4315e1051a39Sopenharmony_ci 4316e1051a39Sopenharmony_ci fmov $ctr_t3.d[1], $input_h3 @ AES block 3 - mov high 4317e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 6 4318e1051a39Sopenharmony_ci eor $res2b, $ctr_t2b, $ctr2b @ AES block 2 - result 4319e1051a39Sopenharmony_ci 4320e1051a39Sopenharmony_ci st1 { $res1b}, [$output_ptr], #16 @ AES block 1 - store result 4321e1051a39Sopenharmony_ci 4322e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 6 4323e1051a39Sopenharmony_ci fmov $ctr2d, $ctr96_b64x @ CTR block 6 4324e1051a39Sopenharmony_ci 4325e1051a39Sopenharmony_ci fmov $ctr2.d[1], $ctr32x @ CTR block 6 4326e1051a39Sopenharmony_ci st1 { $res2b}, [$output_ptr], #16 @ AES block 2 - store result 4327e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 7 4328e1051a39Sopenharmony_ci 4329e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 7 4330e1051a39Sopenharmony_ci 4331e1051a39Sopenharmony_ci eor $res3b, $ctr_t3b, $ctr3b @ AES block 3 - result 4332e1051a39Sopenharmony_ci st1 { $res3b}, [$output_ptr], #16 @ AES block 3 - store result 4333e1051a39Sopenharmony_ci b.ge L256_enc_prepretail @ do prepretail 4334e1051a39Sopenharmony_ci 4335e1051a39Sopenharmony_ci .L256_enc_main_loop: @ main loop start 4336e1051a39Sopenharmony_ci aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 0 4337e1051a39Sopenharmony_ci rev64 $res0b, $res0b @ GHASH block 4k (only t0 is free) 4338e1051a39Sopenharmony_ci 4339e1051a39Sopenharmony_ci aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 0 4340e1051a39Sopenharmony_ci fmov $ctr3d, $ctr96_b64x @ CTR block 4k+3 4341e1051a39Sopenharmony_ci 4342e1051a39Sopenharmony_ci aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 0 4343e1051a39Sopenharmony_ci ext $acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0 4344e1051a39Sopenharmony_ci 4345e1051a39Sopenharmony_ci aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 1 4346e1051a39Sopenharmony_ci fmov $ctr3.d[1], $ctr32x @ CTR block 4k+3 4347e1051a39Sopenharmony_ci 4348e1051a39Sopenharmony_ci aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 1 4349e1051a39Sopenharmony_ci ldp $input_l3, $input_h3, [$input_ptr, #48] @ AES block 4k+7 - load plaintext 4350e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 4351e1051a39Sopenharmony_ci rev $input_l3, $input_l3 4352e1051a39Sopenharmony_ci rev $input_h3, $input_h3 4353e1051a39Sopenharmony_ci#endif 4354e1051a39Sopenharmony_ci aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 1 4355e1051a39Sopenharmony_ci ldp $input_l2, $input_h2, [$input_ptr, #32] @ AES block 4k+6 - load plaintext 4356e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 4357e1051a39Sopenharmony_ci rev $input_l2, $input_l2 4358e1051a39Sopenharmony_ci rev $input_h2, $input_h2 4359e1051a39Sopenharmony_ci#endif 4360e1051a39Sopenharmony_ci aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 2 4361e1051a39Sopenharmony_ci eor $res0b, $res0b, $acc_lb @ PRE 1 4362e1051a39Sopenharmony_ci 4363e1051a39Sopenharmony_ci aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 2 4364e1051a39Sopenharmony_ci 4365e1051a39Sopenharmony_ci aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 0 4366e1051a39Sopenharmony_ci eor $input_l3, $input_l3, $rk14_l @ AES block 4k+7 - round 14 low 4367e1051a39Sopenharmony_ci 4368e1051a39Sopenharmony_ci aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 3 4369e1051a39Sopenharmony_ci mov $acc_md, $h34k.d[1] @ GHASH block 4k - mid 4370e1051a39Sopenharmony_ci 4371e1051a39Sopenharmony_ci pmull2 $acc_h.1q, $res0.2d, $h4.2d @ GHASH block 4k - high 4372e1051a39Sopenharmony_ci eor $input_h2, $input_h2, $rk14_h @ AES block 4k+6 - round 14 high 4373e1051a39Sopenharmony_ci mov $t0d, $res0.d[1] @ GHASH block 4k - mid 4374e1051a39Sopenharmony_ci 4375e1051a39Sopenharmony_ci aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 1 4376e1051a39Sopenharmony_ci rev64 $res1b, $res1b @ GHASH block 4k+1 (t0 and t1 free) 4377e1051a39Sopenharmony_ci 4378e1051a39Sopenharmony_ci aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 4 4379e1051a39Sopenharmony_ci 4380e1051a39Sopenharmony_ci pmull $acc_l.1q, $res0.1d, $h4.1d @ GHASH block 4k - low 4381e1051a39Sopenharmony_ci eor $t0.8b, $t0.8b, $res0.8b @ GHASH block 4k - mid 4382e1051a39Sopenharmony_ci 4383e1051a39Sopenharmony_ci aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 2 4384e1051a39Sopenharmony_ci 4385e1051a39Sopenharmony_ci aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 5 4386e1051a39Sopenharmony_ci rev64 $res3b, $res3b @ GHASH block 4k+3 (t0, t1, t2 and t3 free) 4387e1051a39Sopenharmony_ci 4388e1051a39Sopenharmony_ci pmull2 $t1.1q, $res1.2d, $h3.2d @ GHASH block 4k+1 - high 4389e1051a39Sopenharmony_ci 4390e1051a39Sopenharmony_ci pmull $acc_m.1q, $t0.1d, $acc_m.1d @ GHASH block 4k - mid 4391e1051a39Sopenharmony_ci rev64 $res2b, $res2b @ GHASH block 4k+2 (t0, t1, and t2 free) 4392e1051a39Sopenharmony_ci 4393e1051a39Sopenharmony_ci pmull $t2.1q, $res1.1d, $h3.1d @ GHASH block 4k+1 - low 4394e1051a39Sopenharmony_ci 4395e1051a39Sopenharmony_ci eor $acc_hb, $acc_hb, $t1.16b @ GHASH block 4k+1 - high 4396e1051a39Sopenharmony_ci mov $t3d, $res1.d[1] @ GHASH block 4k+1 - mid 4397e1051a39Sopenharmony_ci 4398e1051a39Sopenharmony_ci aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 3 4399e1051a39Sopenharmony_ci 4400e1051a39Sopenharmony_ci aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 2 4401e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $t2.16b @ GHASH block 4k+1 - low 4402e1051a39Sopenharmony_ci 4403e1051a39Sopenharmony_ci aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 3 4404e1051a39Sopenharmony_ci 4405e1051a39Sopenharmony_ci aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 4 4406e1051a39Sopenharmony_ci mov $t6d, $res2.d[1] @ GHASH block 4k+2 - mid 4407e1051a39Sopenharmony_ci 4408e1051a39Sopenharmony_ci aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 3 4409e1051a39Sopenharmony_ci eor $t3.8b, $t3.8b, $res1.8b @ GHASH block 4k+1 - mid 4410e1051a39Sopenharmony_ci 4411e1051a39Sopenharmony_ci aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 4 4412e1051a39Sopenharmony_ci 4413e1051a39Sopenharmony_ci aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 6 4414e1051a39Sopenharmony_ci eor $t6.8b, $t6.8b, $res2.8b @ GHASH block 4k+2 - mid 4415e1051a39Sopenharmony_ci 4416e1051a39Sopenharmony_ci aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 4 4417e1051a39Sopenharmony_ci 4418e1051a39Sopenharmony_ci pmull $t3.1q, $t3.1d, $h34k.1d @ GHASH block 4k+1 - mid 4419e1051a39Sopenharmony_ci 4420e1051a39Sopenharmony_ci aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 7 4421e1051a39Sopenharmony_ci 4422e1051a39Sopenharmony_ci aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 5 4423e1051a39Sopenharmony_ci ins $t6.d[1], $t6.d[0] @ GHASH block 4k+2 - mid 4424e1051a39Sopenharmony_ci 4425e1051a39Sopenharmony_ci aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 5 4426e1051a39Sopenharmony_ci 4427e1051a39Sopenharmony_ci aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 8 4428e1051a39Sopenharmony_ci 4429e1051a39Sopenharmony_ci aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 5 4430e1051a39Sopenharmony_ci 4431e1051a39Sopenharmony_ci aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 6 4432e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t3.16b @ GHASH block 4k+1 - mid 4433e1051a39Sopenharmony_ci 4434e1051a39Sopenharmony_ci pmull2 $t4.1q, $res2.2d, $h2.2d @ GHASH block 4k+2 - high 4435e1051a39Sopenharmony_ci 4436e1051a39Sopenharmony_ci pmull $t5.1q, $res2.1d, $h2.1d @ GHASH block 4k+2 - low 4437e1051a39Sopenharmony_ci 4438e1051a39Sopenharmony_ci aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 7 4439e1051a39Sopenharmony_ci 4440e1051a39Sopenharmony_ci pmull $t8.1q, $res3.1d, $h1.1d @ GHASH block 4k+3 - low 4441e1051a39Sopenharmony_ci eor $acc_hb, $acc_hb, $t4.16b @ GHASH block 4k+2 - high 4442e1051a39Sopenharmony_ci 4443e1051a39Sopenharmony_ci aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 6 4444e1051a39Sopenharmony_ci ldp $input_l1, $input_h1, [$input_ptr, #16] @ AES block 4k+5 - load plaintext 4445e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 4446e1051a39Sopenharmony_ci rev $input_l1, $input_l1 4447e1051a39Sopenharmony_ci rev $input_h1, $input_h1 4448e1051a39Sopenharmony_ci#endif 4449e1051a39Sopenharmony_ci aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 8 4450e1051a39Sopenharmony_ci mov $t9d, $res3.d[1] @ GHASH block 4k+3 - mid 4451e1051a39Sopenharmony_ci 4452e1051a39Sopenharmony_ci aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 6 4453e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $t5.16b @ GHASH block 4k+2 - low 4454e1051a39Sopenharmony_ci 4455e1051a39Sopenharmony_ci pmull2 $t6.1q, $t6.2d, $h12k.2d @ GHASH block 4k+2 - mid 4456e1051a39Sopenharmony_ci 4457e1051a39Sopenharmony_ci pmull2 $t7.1q, $res3.2d, $h1.2d @ GHASH block 4k+3 - high 4458e1051a39Sopenharmony_ci eor $t9.8b, $t9.8b, $res3.8b @ GHASH block 4k+3 - mid 4459e1051a39Sopenharmony_ci 4460e1051a39Sopenharmony_ci aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 7 4461e1051a39Sopenharmony_ci eor $input_l1, $input_l1, $rk14_l @ AES block 4k+5 - round 14 low 4462e1051a39Sopenharmony_ci 4463e1051a39Sopenharmony_ci aese $ctr1b, $rk9 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 9 4464e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t6.16b @ GHASH block 4k+2 - mid 4465e1051a39Sopenharmony_ci 4466e1051a39Sopenharmony_ci aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 7 4467e1051a39Sopenharmony_ci eor $input_l2, $input_l2, $rk14_l @ AES block 4k+6 - round 14 low 4468e1051a39Sopenharmony_ci 4469e1051a39Sopenharmony_ci aese $ctr0b, $rk9 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 9 4470e1051a39Sopenharmony_ci movi $mod_constant.8b, #0xc2 4471e1051a39Sopenharmony_ci 4472e1051a39Sopenharmony_ci pmull $t9.1q, $t9.1d, $h12k.1d @ GHASH block 4k+3 - mid 4473e1051a39Sopenharmony_ci eor $acc_hb, $acc_hb, $t7.16b @ GHASH block 4k+3 - high 4474e1051a39Sopenharmony_ci fmov $ctr_t1d, $input_l1 @ AES block 4k+5 - mov low 4475e1051a39Sopenharmony_ci 4476e1051a39Sopenharmony_ci aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 8 4477e1051a39Sopenharmony_ci ldp $input_l0, $input_h0, [$input_ptr, #0] @ AES block 4k+4 - load plaintext 4478e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 4479e1051a39Sopenharmony_ci rev $input_l0, $input_l0 4480e1051a39Sopenharmony_ci rev $input_h0, $input_h0 4481e1051a39Sopenharmony_ci#endif 4482e1051a39Sopenharmony_ci aese $ctr0b, $rk10 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 10 4483e1051a39Sopenharmony_ci shl $mod_constantd, $mod_constantd, #56 @ mod_constant 4484e1051a39Sopenharmony_ci 4485e1051a39Sopenharmony_ci aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 8 4486e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $t8.16b @ GHASH block 4k+3 - low 4487e1051a39Sopenharmony_ci 4488e1051a39Sopenharmony_ci aese $ctr2b, $rk9 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 9 4489e1051a39Sopenharmony_ci 4490e1051a39Sopenharmony_ci aese $ctr1b, $rk10 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 10 4491e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t9.16b @ GHASH block 4k+3 - mid 4492e1051a39Sopenharmony_ci 4493e1051a39Sopenharmony_ci aese $ctr3b, $rk9 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 9 4494e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 4k+3 4495e1051a39Sopenharmony_ci 4496e1051a39Sopenharmony_ci aese $ctr0b, $rk11 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 11 4497e1051a39Sopenharmony_ci eor $t9.16b, $acc_lb, $acc_hb @ MODULO - karatsuba tidy up 4498e1051a39Sopenharmony_ci 4499e1051a39Sopenharmony_ci aese $ctr1b, $rk11 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 11 4500e1051a39Sopenharmony_ci add $input_ptr, $input_ptr, #64 @ AES input_ptr update 4501e1051a39Sopenharmony_ci 4502e1051a39Sopenharmony_ci pmull $mod_t.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid 4503e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 4k+8 4504e1051a39Sopenharmony_ci ext $acc_hb, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment 4505e1051a39Sopenharmony_ci 4506e1051a39Sopenharmony_ci aese $ctr2b, $rk10 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 10 4507e1051a39Sopenharmony_ci eor $input_l0, $input_l0, $rk14_l @ AES block 4k+4 - round 14 low 4508e1051a39Sopenharmony_ci 4509e1051a39Sopenharmony_ci aese $ctr1b, $rk12 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 12 4510e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t9.16b @ MODULO - karatsuba tidy up 4511e1051a39Sopenharmony_ci 4512e1051a39Sopenharmony_ci aese $ctr3b, $rk10 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 10 4513e1051a39Sopenharmony_ci eor $input_h0, $input_h0, $rk14_h @ AES block 4k+4 - round 14 high 4514e1051a39Sopenharmony_ci 4515e1051a39Sopenharmony_ci fmov $ctr_t0d, $input_l0 @ AES block 4k+4 - mov low 4516e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 4k+8 4517e1051a39Sopenharmony_ci eor $mod_t.16b, $acc_hb, $mod_t.16b @ MODULO - fold into mid 4518e1051a39Sopenharmony_ci 4519e1051a39Sopenharmony_ci aese $ctr0b, $rk12 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 12 4520e1051a39Sopenharmony_ci eor $input_h1, $input_h1, $rk14_h @ AES block 4k+5 - round 14 high 4521e1051a39Sopenharmony_ci 4522e1051a39Sopenharmony_ci aese $ctr2b, $rk11 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 11 4523e1051a39Sopenharmony_ci eor $input_h3, $input_h3, $rk14_h @ AES block 4k+7 - round 14 high 4524e1051a39Sopenharmony_ci 4525e1051a39Sopenharmony_ci aese $ctr3b, $rk11 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 11 4526e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 4k+8 4527e1051a39Sopenharmony_ci 4528e1051a39Sopenharmony_ci aese $ctr0b, $rk13 @ AES block 4k+4 - round 13 4529e1051a39Sopenharmony_ci fmov $ctr_t0.d[1], $input_h0 @ AES block 4k+4 - mov high 4530e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $mod_t.16b @ MODULO - fold into mid 4531e1051a39Sopenharmony_ci 4532e1051a39Sopenharmony_ci aese $ctr2b, $rk12 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 12 4533e1051a39Sopenharmony_ci fmov $ctr_t3d, $input_l3 @ AES block 4k+7 - mov low 4534e1051a39Sopenharmony_ci 4535e1051a39Sopenharmony_ci aese $ctr1b, $rk13 @ AES block 4k+5 - round 13 4536e1051a39Sopenharmony_ci fmov $ctr_t1.d[1], $input_h1 @ AES block 4k+5 - mov high 4537e1051a39Sopenharmony_ci 4538e1051a39Sopenharmony_ci fmov $ctr_t2d, $input_l2 @ AES block 4k+6 - mov low 4539e1051a39Sopenharmony_ci cmp $input_ptr, $main_end_input_ptr @ LOOP CONTROL 4540e1051a39Sopenharmony_ci 4541e1051a39Sopenharmony_ci fmov $ctr_t2.d[1], $input_h2 @ AES block 4k+6 - mov high 4542e1051a39Sopenharmony_ci 4543e1051a39Sopenharmony_ci pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low 4544e1051a39Sopenharmony_ci eor $res0b, $ctr_t0b, $ctr0b @ AES block 4k+4 - result 4545e1051a39Sopenharmony_ci fmov $ctr0d, $ctr96_b64x @ CTR block 4k+8 4546e1051a39Sopenharmony_ci 4547e1051a39Sopenharmony_ci fmov $ctr0.d[1], $ctr32x @ CTR block 4k+8 4548e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 4k+9 4549e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 4k+9 4550e1051a39Sopenharmony_ci 4551e1051a39Sopenharmony_ci eor $res1b, $ctr_t1b, $ctr1b @ AES block 4k+5 - result 4552e1051a39Sopenharmony_ci fmov $ctr1d, $ctr96_b64x @ CTR block 4k+9 4553e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 4k+9 4554e1051a39Sopenharmony_ci 4555e1051a39Sopenharmony_ci aese $ctr3b, $rk12 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 12 4556e1051a39Sopenharmony_ci fmov $ctr1.d[1], $ctr32x @ CTR block 4k+9 4557e1051a39Sopenharmony_ci 4558e1051a39Sopenharmony_ci aese $ctr2b, $rk13 @ AES block 4k+6 - round 13 4559e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 4k+10 4560e1051a39Sopenharmony_ci st1 { $res0b}, [$output_ptr], #16 @ AES block 4k+4 - store result 4561e1051a39Sopenharmony_ci 4562e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 4k+10 4563e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $acc_hb @ MODULO - fold into low 4564e1051a39Sopenharmony_ci fmov $ctr_t3.d[1], $input_h3 @ AES block 4k+7 - mov high 4565e1051a39Sopenharmony_ci 4566e1051a39Sopenharmony_ci ext $acc_mb, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment 4567e1051a39Sopenharmony_ci st1 { $res1b}, [$output_ptr], #16 @ AES block 4k+5 - store result 4568e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 4k+10 4569e1051a39Sopenharmony_ci 4570e1051a39Sopenharmony_ci aese $ctr3b, $rk13 @ AES block 4k+7 - round 13 4571e1051a39Sopenharmony_ci eor $res2b, $ctr_t2b, $ctr2b @ AES block 4k+6 - result 4572e1051a39Sopenharmony_ci fmov $ctr2d, $ctr96_b64x @ CTR block 4k+10 4573e1051a39Sopenharmony_ci 4574e1051a39Sopenharmony_ci st1 { $res2b}, [$output_ptr], #16 @ AES block 4k+6 - store result 4575e1051a39Sopenharmony_ci fmov $ctr2.d[1], $ctr32x @ CTR block 4k+10 4576e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 4k+11 4577e1051a39Sopenharmony_ci 4578e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $acc_mb @ MODULO - fold into low 4579e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 4k+11 4580e1051a39Sopenharmony_ci 4581e1051a39Sopenharmony_ci eor $res3b, $ctr_t3b, $ctr3b @ AES block 4k+7 - result 4582e1051a39Sopenharmony_ci st1 { $res3b}, [$output_ptr], #16 @ AES block 4k+7 - store result 4583e1051a39Sopenharmony_ci b.lt L256_enc_main_loop 4584e1051a39Sopenharmony_ci 4585e1051a39Sopenharmony_ci .L256_enc_prepretail: @ PREPRETAIL 4586e1051a39Sopenharmony_ci aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 0 4587e1051a39Sopenharmony_ci rev64 $res2b, $res2b @ GHASH block 4k+2 (t0, t1, and t2 free) 4588e1051a39Sopenharmony_ci 4589e1051a39Sopenharmony_ci aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 0 4590e1051a39Sopenharmony_ci fmov $ctr3d, $ctr96_b64x @ CTR block 4k+3 4591e1051a39Sopenharmony_ci 4592e1051a39Sopenharmony_ci aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 0 4593e1051a39Sopenharmony_ci rev64 $res0b, $res0b @ GHASH block 4k (only t0 is free) 4594e1051a39Sopenharmony_ci 4595e1051a39Sopenharmony_ci fmov $ctr3.d[1], $ctr32x @ CTR block 4k+3 4596e1051a39Sopenharmony_ci ext $acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0 4597e1051a39Sopenharmony_ci 4598e1051a39Sopenharmony_ci aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 1 4599e1051a39Sopenharmony_ci 4600e1051a39Sopenharmony_ci aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 1 4601e1051a39Sopenharmony_ci 4602e1051a39Sopenharmony_ci eor $res0b, $res0b, $acc_lb @ PRE 1 4603e1051a39Sopenharmony_ci rev64 $res1b, $res1b @ GHASH block 4k+1 (t0 and t1 free) 4604e1051a39Sopenharmony_ci 4605e1051a39Sopenharmony_ci aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 2 4606e1051a39Sopenharmony_ci 4607e1051a39Sopenharmony_ci aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 0 4608e1051a39Sopenharmony_ci mov $acc_md, $h34k.d[1] @ GHASH block 4k - mid 4609e1051a39Sopenharmony_ci 4610e1051a39Sopenharmony_ci aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 1 4611e1051a39Sopenharmony_ci 4612e1051a39Sopenharmony_ci pmull $acc_l.1q, $res0.1d, $h4.1d @ GHASH block 4k - low 4613e1051a39Sopenharmony_ci mov $t0d, $res0.d[1] @ GHASH block 4k - mid 4614e1051a39Sopenharmony_ci 4615e1051a39Sopenharmony_ci pmull2 $acc_h.1q, $res0.2d, $h4.2d @ GHASH block 4k - high 4616e1051a39Sopenharmony_ci 4617e1051a39Sopenharmony_ci aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 3 4618e1051a39Sopenharmony_ci 4619e1051a39Sopenharmony_ci aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 2 4620e1051a39Sopenharmony_ci eor $t0.8b, $t0.8b, $res0.8b @ GHASH block 4k - mid 4621e1051a39Sopenharmony_ci 4622e1051a39Sopenharmony_ci aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 2 4623e1051a39Sopenharmony_ci 4624e1051a39Sopenharmony_ci aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 1 4625e1051a39Sopenharmony_ci 4626e1051a39Sopenharmony_ci aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 3 4627e1051a39Sopenharmony_ci 4628e1051a39Sopenharmony_ci pmull $acc_m.1q, $t0.1d, $acc_m.1d @ GHASH block 4k - mid 4629e1051a39Sopenharmony_ci 4630e1051a39Sopenharmony_ci pmull2 $t1.1q, $res1.2d, $h3.2d @ GHASH block 4k+1 - high 4631e1051a39Sopenharmony_ci 4632e1051a39Sopenharmony_ci pmull $t2.1q, $res1.1d, $h3.1d @ GHASH block 4k+1 - low 4633e1051a39Sopenharmony_ci 4634e1051a39Sopenharmony_ci aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 2 4635e1051a39Sopenharmony_ci 4636e1051a39Sopenharmony_ci eor $acc_hb, $acc_hb, $t1.16b @ GHASH block 4k+1 - high 4637e1051a39Sopenharmony_ci mov $t3d, $res1.d[1] @ GHASH block 4k+1 - mid 4638e1051a39Sopenharmony_ci 4639e1051a39Sopenharmony_ci aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 3 4640e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $t2.16b @ GHASH block 4k+1 - low 4641e1051a39Sopenharmony_ci 4642e1051a39Sopenharmony_ci aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 3 4643e1051a39Sopenharmony_ci 4644e1051a39Sopenharmony_ci eor $t3.8b, $t3.8b, $res1.8b @ GHASH block 4k+1 - mid 4645e1051a39Sopenharmony_ci mov $t6d, $res2.d[1] @ GHASH block 4k+2 - mid 4646e1051a39Sopenharmony_ci 4647e1051a39Sopenharmony_ci aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 4 4648e1051a39Sopenharmony_ci rev64 $res3b, $res3b @ GHASH block 4k+3 (t0, t1, t2 and t3 free) 4649e1051a39Sopenharmony_ci 4650e1051a39Sopenharmony_ci aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 4 4651e1051a39Sopenharmony_ci 4652e1051a39Sopenharmony_ci pmull $t3.1q, $t3.1d, $h34k.1d @ GHASH block 4k+1 - mid 4653e1051a39Sopenharmony_ci eor $t6.8b, $t6.8b, $res2.8b @ GHASH block 4k+2 - mid 4654e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 4k+3 4655e1051a39Sopenharmony_ci 4656e1051a39Sopenharmony_ci pmull $t5.1q, $res2.1d, $h2.1d @ GHASH block 4k+2 - low 4657e1051a39Sopenharmony_ci 4658e1051a39Sopenharmony_ci aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 5 4659e1051a39Sopenharmony_ci 4660e1051a39Sopenharmony_ci aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 4 4661e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t3.16b @ GHASH block 4k+1 - mid 4662e1051a39Sopenharmony_ci 4663e1051a39Sopenharmony_ci pmull2 $t4.1q, $res2.2d, $h2.2d @ GHASH block 4k+2 - high 4664e1051a39Sopenharmony_ci 4665e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $t5.16b @ GHASH block 4k+2 - low 4666e1051a39Sopenharmony_ci ins $t6.d[1], $t6.d[0] @ GHASH block 4k+2 - mid 4667e1051a39Sopenharmony_ci 4668e1051a39Sopenharmony_ci aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 5 4669e1051a39Sopenharmony_ci 4670e1051a39Sopenharmony_ci eor $acc_hb, $acc_hb, $t4.16b @ GHASH block 4k+2 - high 4671e1051a39Sopenharmony_ci mov $t9d, $res3.d[1] @ GHASH block 4k+3 - mid 4672e1051a39Sopenharmony_ci 4673e1051a39Sopenharmony_ci aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 4 4674e1051a39Sopenharmony_ci 4675e1051a39Sopenharmony_ci pmull2 $t6.1q, $t6.2d, $h12k.2d @ GHASH block 4k+2 - mid 4676e1051a39Sopenharmony_ci 4677e1051a39Sopenharmony_ci eor $t9.8b, $t9.8b, $res3.8b @ GHASH block 4k+3 - mid 4678e1051a39Sopenharmony_ci 4679e1051a39Sopenharmony_ci pmull2 $t7.1q, $res3.2d, $h1.2d @ GHASH block 4k+3 - high 4680e1051a39Sopenharmony_ci 4681e1051a39Sopenharmony_ci aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 5 4682e1051a39Sopenharmony_ci 4683e1051a39Sopenharmony_ci pmull $t9.1q, $t9.1d, $h12k.1d @ GHASH block 4k+3 - mid 4684e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t6.16b @ GHASH block 4k+2 - mid 4685e1051a39Sopenharmony_ci 4686e1051a39Sopenharmony_ci aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 5 4687e1051a39Sopenharmony_ci 4688e1051a39Sopenharmony_ci aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 6 4689e1051a39Sopenharmony_ci 4690e1051a39Sopenharmony_ci aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 6 4691e1051a39Sopenharmony_ci 4692e1051a39Sopenharmony_ci aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 6 4693e1051a39Sopenharmony_ci movi $mod_constant.8b, #0xc2 4694e1051a39Sopenharmony_ci 4695e1051a39Sopenharmony_ci aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 6 4696e1051a39Sopenharmony_ci 4697e1051a39Sopenharmony_ci aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 7 4698e1051a39Sopenharmony_ci eor $acc_hb, $acc_hb, $t7.16b @ GHASH block 4k+3 - high 4699e1051a39Sopenharmony_ci 4700e1051a39Sopenharmony_ci aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 7 4701e1051a39Sopenharmony_ci 4702e1051a39Sopenharmony_ci aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 7 4703e1051a39Sopenharmony_ci shl $mod_constantd, $mod_constantd, #56 @ mod_constant 4704e1051a39Sopenharmony_ci 4705e1051a39Sopenharmony_ci aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 8 4706e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t9.16b @ GHASH block 4k+3 - mid 4707e1051a39Sopenharmony_ci 4708e1051a39Sopenharmony_ci pmull $t8.1q, $res3.1d, $h1.1d @ GHASH block 4k+3 - low 4709e1051a39Sopenharmony_ci 4710e1051a39Sopenharmony_ci aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 8 4711e1051a39Sopenharmony_ci 4712e1051a39Sopenharmony_ci aese $ctr1b, $rk9 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 9 4713e1051a39Sopenharmony_ci 4714e1051a39Sopenharmony_ci aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 8 4715e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $t8.16b @ GHASH block 4k+3 - low 4716e1051a39Sopenharmony_ci 4717e1051a39Sopenharmony_ci aese $ctr3b, $rk9 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 9 4718e1051a39Sopenharmony_ci 4719e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $acc_hb @ karatsuba tidy up 4720e1051a39Sopenharmony_ci 4721e1051a39Sopenharmony_ci pmull $t1.1q, $acc_h.1d, $mod_constant.1d 4722e1051a39Sopenharmony_ci ext $acc_hb, $acc_hb, $acc_hb, #8 4723e1051a39Sopenharmony_ci 4724e1051a39Sopenharmony_ci aese $ctr3b, $rk10 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 10 4725e1051a39Sopenharmony_ci 4726e1051a39Sopenharmony_ci aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 7 4727e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $acc_lb 4728e1051a39Sopenharmony_ci 4729e1051a39Sopenharmony_ci aese $ctr1b, $rk10 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 10 4730e1051a39Sopenharmony_ci 4731e1051a39Sopenharmony_ci aese $ctr0b, $rk9 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 9 4732e1051a39Sopenharmony_ci 4733e1051a39Sopenharmony_ci aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 8 4734e1051a39Sopenharmony_ci 4735e1051a39Sopenharmony_ci aese $ctr1b, $rk11 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 11 4736e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t1.16b 4737e1051a39Sopenharmony_ci 4738e1051a39Sopenharmony_ci aese $ctr0b, $rk10 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 10 4739e1051a39Sopenharmony_ci 4740e1051a39Sopenharmony_ci aese $ctr2b, $rk9 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 9 4741e1051a39Sopenharmony_ci 4742e1051a39Sopenharmony_ci aese $ctr1b, $rk12 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 12 4743e1051a39Sopenharmony_ci 4744e1051a39Sopenharmony_ci aese $ctr0b, $rk11 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 11 4745e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $acc_hb 4746e1051a39Sopenharmony_ci 4747e1051a39Sopenharmony_ci aese $ctr3b, $rk11 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 11 4748e1051a39Sopenharmony_ci 4749e1051a39Sopenharmony_ci aese $ctr2b, $rk10 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 10 4750e1051a39Sopenharmony_ci 4751e1051a39Sopenharmony_ci aese $ctr0b, $rk12 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 12 4752e1051a39Sopenharmony_ci 4753e1051a39Sopenharmony_ci pmull $t1.1q, $acc_m.1d, $mod_constant.1d 4754e1051a39Sopenharmony_ci 4755e1051a39Sopenharmony_ci aese $ctr2b, $rk11 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 11 4756e1051a39Sopenharmony_ci ext $acc_mb, $acc_mb, $acc_mb, #8 4757e1051a39Sopenharmony_ci 4758e1051a39Sopenharmony_ci aese $ctr3b, $rk12 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 12 4759e1051a39Sopenharmony_ci 4760e1051a39Sopenharmony_ci aese $ctr1b, $rk13 @ AES block 4k+5 - round 13 4761e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $t1.16b 4762e1051a39Sopenharmony_ci 4763e1051a39Sopenharmony_ci aese $ctr2b, $rk12 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 12 4764e1051a39Sopenharmony_ci 4765e1051a39Sopenharmony_ci aese $ctr3b, $rk13 @ AES block 4k+7 - round 13 4766e1051a39Sopenharmony_ci 4767e1051a39Sopenharmony_ci aese $ctr0b, $rk13 @ AES block 4k+4 - round 13 4768e1051a39Sopenharmony_ci 4769e1051a39Sopenharmony_ci aese $ctr2b, $rk13 @ AES block 4k+6 - round 13 4770e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $acc_mb 4771e1051a39Sopenharmony_ci .L256_enc_tail: @ TAIL 4772e1051a39Sopenharmony_ci 4773e1051a39Sopenharmony_ci ext $t0.16b, $acc_lb, $acc_lb, #8 @ prepare final partial tag 4774e1051a39Sopenharmony_ci sub $main_end_input_ptr, $end_input_ptr, $input_ptr @ main_end_input_ptr is number of bytes left to process 4775e1051a39Sopenharmony_ci ldp $input_l0, $input_h0, [$input_ptr], #16 @ AES block 4k+4 - load plaintext 4776e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 4777e1051a39Sopenharmony_ci rev $input_l0, $input_l0 4778e1051a39Sopenharmony_ci rev $input_h0, $input_h0 4779e1051a39Sopenharmony_ci#endif 4780e1051a39Sopenharmony_ci eor $input_l0, $input_l0, $rk14_l @ AES block 4k+4 - round 14 low 4781e1051a39Sopenharmony_ci eor $input_h0, $input_h0, $rk14_h @ AES block 4k+4 - round 14 high 4782e1051a39Sopenharmony_ci 4783e1051a39Sopenharmony_ci cmp $main_end_input_ptr, #48 4784e1051a39Sopenharmony_ci fmov $ctr_t0d, $input_l0 @ AES block 4k+4 - mov low 4785e1051a39Sopenharmony_ci 4786e1051a39Sopenharmony_ci fmov $ctr_t0.d[1], $input_h0 @ AES block 4k+4 - mov high 4787e1051a39Sopenharmony_ci 4788e1051a39Sopenharmony_ci eor $res1b, $ctr_t0b, $ctr0b @ AES block 4k+4 - result 4789e1051a39Sopenharmony_ci b.gt .L256_enc_blocks_more_than_3 4790e1051a39Sopenharmony_ci 4791e1051a39Sopenharmony_ci cmp $main_end_input_ptr, #32 4792e1051a39Sopenharmony_ci mov $ctr3b, $ctr2b 4793e1051a39Sopenharmony_ci movi $acc_l.8b, #0 4794e1051a39Sopenharmony_ci 4795e1051a39Sopenharmony_ci movi $acc_h.8b, #0 4796e1051a39Sopenharmony_ci sub $rctr32w, $rctr32w, #1 4797e1051a39Sopenharmony_ci 4798e1051a39Sopenharmony_ci mov $ctr2b, $ctr1b 4799e1051a39Sopenharmony_ci movi $acc_m.8b, #0 4800e1051a39Sopenharmony_ci b.gt .L256_enc_blocks_more_than_2 4801e1051a39Sopenharmony_ci 4802e1051a39Sopenharmony_ci mov $ctr3b, $ctr1b 4803e1051a39Sopenharmony_ci sub $rctr32w, $rctr32w, #1 4804e1051a39Sopenharmony_ci cmp $main_end_input_ptr, #16 4805e1051a39Sopenharmony_ci 4806e1051a39Sopenharmony_ci b.gt .L256_enc_blocks_more_than_1 4807e1051a39Sopenharmony_ci 4808e1051a39Sopenharmony_ci sub $rctr32w, $rctr32w, #1 4809e1051a39Sopenharmony_ci b .L256_enc_blocks_less_than_1 4810e1051a39Sopenharmony_ci .L256_enc_blocks_more_than_3: @ blocks left > 3 4811e1051a39Sopenharmony_ci st1 { $res1b}, [$output_ptr], #16 @ AES final-3 block - store result 4812e1051a39Sopenharmony_ci 4813e1051a39Sopenharmony_ci ldp $input_l0, $input_h0, [$input_ptr], #16 @ AES final-2 block - load input low & high 4814e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 4815e1051a39Sopenharmony_ci rev $input_l0, $input_l0 4816e1051a39Sopenharmony_ci rev $input_h0, $input_h0 4817e1051a39Sopenharmony_ci#endif 4818e1051a39Sopenharmony_ci rev64 $res0b, $res1b @ GHASH final-3 block 4819e1051a39Sopenharmony_ci 4820e1051a39Sopenharmony_ci eor $input_l0, $input_l0, $rk14_l @ AES final-2 block - round 14 low 4821e1051a39Sopenharmony_ci eor $res0b, $res0b, $t0.16b @ feed in partial tag 4822e1051a39Sopenharmony_ci 4823e1051a39Sopenharmony_ci eor $input_h0, $input_h0, $rk14_h @ AES final-2 block - round 14 high 4824e1051a39Sopenharmony_ci 4825e1051a39Sopenharmony_ci mov $rk4d, $res0.d[1] @ GHASH final-3 block - mid 4826e1051a39Sopenharmony_ci fmov $res1d, $input_l0 @ AES final-2 block - mov low 4827e1051a39Sopenharmony_ci 4828e1051a39Sopenharmony_ci fmov $res1.d[1], $input_h0 @ AES final-2 block - mov high 4829e1051a39Sopenharmony_ci 4830e1051a39Sopenharmony_ci eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-3 block - mid 4831e1051a39Sopenharmony_ci movi $t0.8b, #0 @ suppress further partial tag feed in 4832e1051a39Sopenharmony_ci 4833e1051a39Sopenharmony_ci mov $acc_md, $h34k.d[1] @ GHASH final-3 block - mid 4834e1051a39Sopenharmony_ci 4835e1051a39Sopenharmony_ci pmull $acc_l.1q, $res0.1d, $h4.1d @ GHASH final-3 block - low 4836e1051a39Sopenharmony_ci 4837e1051a39Sopenharmony_ci pmull2 $acc_h.1q, $res0.2d, $h4.2d @ GHASH final-3 block - high 4838e1051a39Sopenharmony_ci 4839e1051a39Sopenharmony_ci pmull $acc_m.1q, $rk4v.1d, $acc_m.1d @ GHASH final-3 block - mid 4840e1051a39Sopenharmony_ci eor $res1b, $res1b, $ctr1b @ AES final-2 block - result 4841e1051a39Sopenharmony_ci .L256_enc_blocks_more_than_2: @ blocks left > 2 4842e1051a39Sopenharmony_ci 4843e1051a39Sopenharmony_ci st1 { $res1b}, [$output_ptr], #16 @ AES final-2 block - store result 4844e1051a39Sopenharmony_ci 4845e1051a39Sopenharmony_ci ldp $input_l0, $input_h0, [$input_ptr], #16 @ AES final-1 block - load input low & high 4846e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 4847e1051a39Sopenharmony_ci rev $input_l0, $input_l0 4848e1051a39Sopenharmony_ci rev $input_h0, $input_h0 4849e1051a39Sopenharmony_ci#endif 4850e1051a39Sopenharmony_ci rev64 $res0b, $res1b @ GHASH final-2 block 4851e1051a39Sopenharmony_ci 4852e1051a39Sopenharmony_ci eor $input_l0, $input_l0, $rk14_l @ AES final-1 block - round 14 low 4853e1051a39Sopenharmony_ci eor $res0b, $res0b, $t0.16b @ feed in partial tag 4854e1051a39Sopenharmony_ci 4855e1051a39Sopenharmony_ci fmov $res1d, $input_l0 @ AES final-1 block - mov low 4856e1051a39Sopenharmony_ci eor $input_h0, $input_h0, $rk14_h @ AES final-1 block - round 14 high 4857e1051a39Sopenharmony_ci 4858e1051a39Sopenharmony_ci fmov $res1.d[1], $input_h0 @ AES final-1 block - mov high 4859e1051a39Sopenharmony_ci 4860e1051a39Sopenharmony_ci movi $t0.8b, #0 @ suppress further partial tag feed in 4861e1051a39Sopenharmony_ci 4862e1051a39Sopenharmony_ci pmull2 $rk2q1, $res0.2d, $h3.2d @ GHASH final-2 block - high 4863e1051a39Sopenharmony_ci mov $rk4d, $res0.d[1] @ GHASH final-2 block - mid 4864e1051a39Sopenharmony_ci 4865e1051a39Sopenharmony_ci pmull $rk3q1, $res0.1d, $h3.1d @ GHASH final-2 block - low 4866e1051a39Sopenharmony_ci 4867e1051a39Sopenharmony_ci eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-2 block - mid 4868e1051a39Sopenharmony_ci 4869e1051a39Sopenharmony_ci eor $res1b, $res1b, $ctr2b @ AES final-1 block - result 4870e1051a39Sopenharmony_ci 4871e1051a39Sopenharmony_ci eor $acc_hb, $acc_hb, $rk2 @ GHASH final-2 block - high 4872e1051a39Sopenharmony_ci 4873e1051a39Sopenharmony_ci pmull $rk4v.1q, $rk4v.1d, $h34k.1d @ GHASH final-2 block - mid 4874e1051a39Sopenharmony_ci 4875e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $rk3 @ GHASH final-2 block - low 4876e1051a39Sopenharmony_ci 4877e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-2 block - mid 4878e1051a39Sopenharmony_ci .L256_enc_blocks_more_than_1: @ blocks left > 1 4879e1051a39Sopenharmony_ci 4880e1051a39Sopenharmony_ci st1 { $res1b}, [$output_ptr], #16 @ AES final-1 block - store result 4881e1051a39Sopenharmony_ci 4882e1051a39Sopenharmony_ci rev64 $res0b, $res1b @ GHASH final-1 block 4883e1051a39Sopenharmony_ci 4884e1051a39Sopenharmony_ci ldp $input_l0, $input_h0, [$input_ptr], #16 @ AES final block - load input low & high 4885e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 4886e1051a39Sopenharmony_ci rev $input_l0, $input_l0 4887e1051a39Sopenharmony_ci rev $input_h0, $input_h0 4888e1051a39Sopenharmony_ci#endif 4889e1051a39Sopenharmony_ci eor $res0b, $res0b, $t0.16b @ feed in partial tag 4890e1051a39Sopenharmony_ci 4891e1051a39Sopenharmony_ci movi $t0.8b, #0 @ suppress further partial tag feed in 4892e1051a39Sopenharmony_ci 4893e1051a39Sopenharmony_ci eor $input_l0, $input_l0, $rk14_l @ AES final block - round 14 low 4894e1051a39Sopenharmony_ci mov $rk4d, $res0.d[1] @ GHASH final-1 block - mid 4895e1051a39Sopenharmony_ci 4896e1051a39Sopenharmony_ci pmull2 $rk2q1, $res0.2d, $h2.2d @ GHASH final-1 block - high 4897e1051a39Sopenharmony_ci eor $input_h0, $input_h0, $rk14_h @ AES final block - round 14 high 4898e1051a39Sopenharmony_ci 4899e1051a39Sopenharmony_ci eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-1 block - mid 4900e1051a39Sopenharmony_ci 4901e1051a39Sopenharmony_ci eor $acc_hb, $acc_hb, $rk2 @ GHASH final-1 block - high 4902e1051a39Sopenharmony_ci 4903e1051a39Sopenharmony_ci ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-1 block - mid 4904e1051a39Sopenharmony_ci fmov $res1d, $input_l0 @ AES final block - mov low 4905e1051a39Sopenharmony_ci 4906e1051a39Sopenharmony_ci fmov $res1.d[1], $input_h0 @ AES final block - mov high 4907e1051a39Sopenharmony_ci 4908e1051a39Sopenharmony_ci pmull2 $rk4v.1q, $rk4v.2d, $h12k.2d @ GHASH final-1 block - mid 4909e1051a39Sopenharmony_ci 4910e1051a39Sopenharmony_ci pmull $rk3q1, $res0.1d, $h2.1d @ GHASH final-1 block - low 4911e1051a39Sopenharmony_ci 4912e1051a39Sopenharmony_ci eor $res1b, $res1b, $ctr3b @ AES final block - result 4913e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-1 block - mid 4914e1051a39Sopenharmony_ci 4915e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $rk3 @ GHASH final-1 block - low 4916e1051a39Sopenharmony_ci .L256_enc_blocks_less_than_1: @ blocks left <= 1 4917e1051a39Sopenharmony_ci 4918e1051a39Sopenharmony_ci and $bit_length, $bit_length, #127 @ bit_length %= 128 4919e1051a39Sopenharmony_ci 4920e1051a39Sopenharmony_ci mvn $rk14_l, xzr @ rk14_l = 0xffffffffffffffff 4921e1051a39Sopenharmony_ci sub $bit_length, $bit_length, #128 @ bit_length -= 128 4922e1051a39Sopenharmony_ci 4923e1051a39Sopenharmony_ci neg $bit_length, $bit_length @ bit_length = 128 - #bits in input (in range [1,128]) 4924e1051a39Sopenharmony_ci ld1 { $rk0}, [$output_ptr] @ load existing bytes where the possibly partial last block is to be stored 4925e1051a39Sopenharmony_ci 4926e1051a39Sopenharmony_ci mvn $rk14_h, xzr @ rk14_h = 0xffffffffffffffff 4927e1051a39Sopenharmony_ci and $bit_length, $bit_length, #127 @ bit_length %= 128 4928e1051a39Sopenharmony_ci 4929e1051a39Sopenharmony_ci lsr $rk14_h, $rk14_h, $bit_length @ rk14_h is mask for top 64b of last block 4930e1051a39Sopenharmony_ci cmp $bit_length, #64 4931e1051a39Sopenharmony_ci 4932e1051a39Sopenharmony_ci csel $input_l0, $rk14_l, $rk14_h, lt 4933e1051a39Sopenharmony_ci csel $input_h0, $rk14_h, xzr, lt 4934e1051a39Sopenharmony_ci 4935e1051a39Sopenharmony_ci fmov $ctr0d, $input_l0 @ ctr0b is mask for last block 4936e1051a39Sopenharmony_ci 4937e1051a39Sopenharmony_ci fmov $ctr0.d[1], $input_h0 4938e1051a39Sopenharmony_ci 4939e1051a39Sopenharmony_ci and $res1b, $res1b, $ctr0b @ possibly partial last block has zeroes in highest bits 4940e1051a39Sopenharmony_ci 4941e1051a39Sopenharmony_ci rev64 $res0b, $res1b @ GHASH final block 4942e1051a39Sopenharmony_ci 4943e1051a39Sopenharmony_ci eor $res0b, $res0b, $t0.16b @ feed in partial tag 4944e1051a39Sopenharmony_ci 4945e1051a39Sopenharmony_ci bif $res1b, $rk0, $ctr0b @ insert existing bytes in top end of result before storing 4946e1051a39Sopenharmony_ci 4947e1051a39Sopenharmony_ci pmull2 $rk2q1, $res0.2d, $h1.2d @ GHASH final block - high 4948e1051a39Sopenharmony_ci mov $t0d, $res0.d[1] @ GHASH final block - mid 4949e1051a39Sopenharmony_ci#ifndef __AARCH64EB__ 4950e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w 4951e1051a39Sopenharmony_ci#else 4952e1051a39Sopenharmony_ci mov $ctr32w, $rctr32w 4953e1051a39Sopenharmony_ci#endif 4954e1051a39Sopenharmony_ci 4955e1051a39Sopenharmony_ci pmull $rk3q1, $res0.1d, $h1.1d @ GHASH final block - low 4956e1051a39Sopenharmony_ci 4957e1051a39Sopenharmony_ci eor $acc_hb, $acc_hb, $rk2 @ GHASH final block - high 4958e1051a39Sopenharmony_ci eor $t0.8b, $t0.8b, $res0.8b @ GHASH final block - mid 4959e1051a39Sopenharmony_ci 4960e1051a39Sopenharmony_ci pmull $t0.1q, $t0.1d, $h12k.1d @ GHASH final block - mid 4961e1051a39Sopenharmony_ci 4962e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $rk3 @ GHASH final block - low 4963e1051a39Sopenharmony_ci 4964e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t0.16b @ GHASH final block - mid 4965e1051a39Sopenharmony_ci movi $mod_constant.8b, #0xc2 4966e1051a39Sopenharmony_ci 4967e1051a39Sopenharmony_ci eor $t9.16b, $acc_lb, $acc_hb @ MODULO - karatsuba tidy up 4968e1051a39Sopenharmony_ci 4969e1051a39Sopenharmony_ci shl $mod_constantd, $mod_constantd, #56 @ mod_constant 4970e1051a39Sopenharmony_ci 4971e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t9.16b @ MODULO - karatsuba tidy up 4972e1051a39Sopenharmony_ci 4973e1051a39Sopenharmony_ci pmull $mod_t.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid 4974e1051a39Sopenharmony_ci 4975e1051a39Sopenharmony_ci ext $acc_hb, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment 4976e1051a39Sopenharmony_ci 4977e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $mod_t.16b @ MODULO - fold into mid 4978e1051a39Sopenharmony_ci 4979e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $acc_hb @ MODULO - fold into mid 4980e1051a39Sopenharmony_ci 4981e1051a39Sopenharmony_ci pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low 4982e1051a39Sopenharmony_ci 4983e1051a39Sopenharmony_ci ext $acc_mb, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment 4984e1051a39Sopenharmony_ci 4985e1051a39Sopenharmony_ci str $ctr32w, [$counter, #12] @ store the updated counter 4986e1051a39Sopenharmony_ci 4987e1051a39Sopenharmony_ci st1 { $res1b}, [$output_ptr] @ store all 16B 4988e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $acc_hb @ MODULO - fold into low 4989e1051a39Sopenharmony_ci 4990e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $acc_mb @ MODULO - fold into low 4991e1051a39Sopenharmony_ci ext $acc_lb, $acc_lb, $acc_lb, #8 4992e1051a39Sopenharmony_ci rev64 $acc_lb, $acc_lb 4993e1051a39Sopenharmony_ci mov x0, $len 4994e1051a39Sopenharmony_ci st1 { $acc_l.16b }, [$current_tag] 4995e1051a39Sopenharmony_ci 4996e1051a39Sopenharmony_ci ldp x21, x22, [sp, #16] 4997e1051a39Sopenharmony_ci ldp x23, x24, [sp, #32] 4998e1051a39Sopenharmony_ci ldp d8, d9, [sp, #48] 4999e1051a39Sopenharmony_ci ldp d10, d11, [sp, #64] 5000e1051a39Sopenharmony_ci ldp d12, d13, [sp, #80] 5001e1051a39Sopenharmony_ci ldp d14, d15, [sp, #96] 5002e1051a39Sopenharmony_ci ldp x19, x20, [sp], #112 5003e1051a39Sopenharmony_ci ret 5004e1051a39Sopenharmony_ci 5005e1051a39Sopenharmony_ci.L256_enc_ret: 5006e1051a39Sopenharmony_ci mov w0, #0x0 5007e1051a39Sopenharmony_ci ret 5008e1051a39Sopenharmony_ci.size aes_gcm_enc_256_kernel,.-aes_gcm_enc_256_kernel 5009e1051a39Sopenharmony_ci___ 5010e1051a39Sopenharmony_ci 5011e1051a39Sopenharmony_ci{ 5012e1051a39Sopenharmony_cimy $t8="v4"; 5013e1051a39Sopenharmony_cimy $t8d="d4"; 5014e1051a39Sopenharmony_cimy $t9="v6"; 5015e1051a39Sopenharmony_cimy $t9d="d6"; 5016e1051a39Sopenharmony_ci######################################################################################### 5017e1051a39Sopenharmony_ci# size_t aes_gcm_dec_256_kernel(const unsigned char *in, 5018e1051a39Sopenharmony_ci# size_t len, 5019e1051a39Sopenharmony_ci# unsigned char *out, 5020e1051a39Sopenharmony_ci# const void *key, 5021e1051a39Sopenharmony_ci# unsigned char ivec[16], 5022e1051a39Sopenharmony_ci# u64 *Xi); 5023e1051a39Sopenharmony_ci# 5024e1051a39Sopenharmony_ci$code.=<<___; 5025e1051a39Sopenharmony_ci.global aes_gcm_dec_256_kernel 5026e1051a39Sopenharmony_ci.type aes_gcm_dec_256_kernel,%function 5027e1051a39Sopenharmony_ci.align 4 5028e1051a39Sopenharmony_ciaes_gcm_dec_256_kernel: 5029e1051a39Sopenharmony_ci cbz x1, .L256_dec_ret 5030e1051a39Sopenharmony_ci stp x19, x20, [sp, #-112]! 5031e1051a39Sopenharmony_ci mov x16, x4 5032e1051a39Sopenharmony_ci mov x8, x5 5033e1051a39Sopenharmony_ci stp x21, x22, [sp, #16] 5034e1051a39Sopenharmony_ci stp x23, x24, [sp, #32] 5035e1051a39Sopenharmony_ci stp d8, d9, [sp, #48] 5036e1051a39Sopenharmony_ci stp d10, d11, [sp, #64] 5037e1051a39Sopenharmony_ci stp d12, d13, [sp, #80] 5038e1051a39Sopenharmony_ci stp d14, d15, [sp, #96] 5039e1051a39Sopenharmony_ci 5040e1051a39Sopenharmony_ci lsr $main_end_input_ptr, $bit_length, #3 @ byte_len 5041e1051a39Sopenharmony_ci mov $len, $main_end_input_ptr 5042e1051a39Sopenharmony_ci ldp $ctr96_b64x, $ctr96_t32x, [$counter] @ ctr96_b64, ctr96_t32 5043e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 5044e1051a39Sopenharmony_ci rev $ctr96_b64x, $ctr96_b64x 5045e1051a39Sopenharmony_ci rev $ctr96_t32x, $ctr96_t32x 5046e1051a39Sopenharmony_ci#endif 5047e1051a39Sopenharmony_ci ldp $rk14_l, $rk14_h, [$cc, #224] @ load rk14 5048e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 5049e1051a39Sopenharmony_ci ror $rk14_h, $rk14_h, #32 5050e1051a39Sopenharmony_ci ror $rk14_l, $rk14_l, #32 5051e1051a39Sopenharmony_ci#endif 5052e1051a39Sopenharmony_ci ld1 {$rk0s}, [$cc], #16 @ load rk0 5053e1051a39Sopenharmony_ci sub $main_end_input_ptr, $main_end_input_ptr, #1 @ byte_len - 1 5054e1051a39Sopenharmony_ci 5055e1051a39Sopenharmony_ci ld1 {$rk1s}, [$cc], #16 @ load rk1 5056e1051a39Sopenharmony_ci and $main_end_input_ptr, $main_end_input_ptr, #0xffffffffffffffc0 @ number of bytes to be processed in main loop (at least 1 byte must be handled by tail) 5057e1051a39Sopenharmony_ci 5058e1051a39Sopenharmony_ci add $end_input_ptr, $input_ptr, $bit_length, lsr #3 @ end_input_ptr 5059e1051a39Sopenharmony_ci ld1 {$rk2s}, [$cc], #16 @ load rk2 5060e1051a39Sopenharmony_ci 5061e1051a39Sopenharmony_ci lsr $rctr32x, $ctr96_t32x, #32 5062e1051a39Sopenharmony_ci ld1 {$rk3s}, [$cc], #16 @ load rk3 5063e1051a39Sopenharmony_ci orr $ctr96_t32w, $ctr96_t32w, $ctr96_t32w 5064e1051a39Sopenharmony_ci 5065e1051a39Sopenharmony_ci ld1 {$rk4s}, [$cc], #16 @ load rk4 5066e1051a39Sopenharmony_ci add $main_end_input_ptr, $main_end_input_ptr, $input_ptr 5067e1051a39Sopenharmony_ci rev $rctr32w, $rctr32w @ rev_ctr32 5068e1051a39Sopenharmony_ci 5069e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ increment rev_ctr32 5070e1051a39Sopenharmony_ci fmov $ctr3d, $ctr96_b64x @ CTR block 3 5071e1051a39Sopenharmony_ci 5072e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 1 5073e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 1 5074e1051a39Sopenharmony_ci fmov $ctr1d, $ctr96_b64x @ CTR block 1 5075e1051a39Sopenharmony_ci 5076e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 1 5077e1051a39Sopenharmony_ci ld1 { $ctr0b}, [$counter] @ special case vector load initial counter so we can start first AES block as quickly as possible 5078e1051a39Sopenharmony_ci 5079e1051a39Sopenharmony_ci fmov $ctr1.d[1], $ctr32x @ CTR block 1 5080e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 2 5081e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 2 5082e1051a39Sopenharmony_ci 5083e1051a39Sopenharmony_ci fmov $ctr2d, $ctr96_b64x @ CTR block 2 5084e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 2 5085e1051a39Sopenharmony_ci 5086e1051a39Sopenharmony_ci fmov $ctr2.d[1], $ctr32x @ CTR block 2 5087e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 3 5088e1051a39Sopenharmony_ci 5089e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 3 5090e1051a39Sopenharmony_ci ld1 {$rk5s}, [$cc], #16 @ load rk5 5091e1051a39Sopenharmony_ci 5092e1051a39Sopenharmony_ci fmov $ctr3.d[1], $ctr32x @ CTR block 3 5093e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 3 5094e1051a39Sopenharmony_ci 5095e1051a39Sopenharmony_ci ld1 {$rk6s}, [$cc], #16 @ load rk6 5096e1051a39Sopenharmony_ci 5097e1051a39Sopenharmony_ci ld1 {$rk7s}, [$cc], #16 @ load rk7 5098e1051a39Sopenharmony_ci 5099e1051a39Sopenharmony_ci ld1 {$rk8s}, [$cc], #16 @ load rk8 5100e1051a39Sopenharmony_ci 5101e1051a39Sopenharmony_ci aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 0 5102e1051a39Sopenharmony_ci ldr $h3q, [$current_tag, #80] @ load h3l | h3h 5103e1051a39Sopenharmony_ci#ifndef __AARCH64EB__ 5104e1051a39Sopenharmony_ci ext $h3b, $h3b, $h3b, #8 5105e1051a39Sopenharmony_ci#endif 5106e1051a39Sopenharmony_ci 5107e1051a39Sopenharmony_ci aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 0 5108e1051a39Sopenharmony_ci ldr $h4q, [$current_tag, #112] @ load h4l | h4h 5109e1051a39Sopenharmony_ci#ifndef __AARCH64EB__ 5110e1051a39Sopenharmony_ci ext $h4b, $h4b, $h4b, #8 5111e1051a39Sopenharmony_ci#endif 5112e1051a39Sopenharmony_ci 5113e1051a39Sopenharmony_ci aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 0 5114e1051a39Sopenharmony_ci ldr $h2q, [$current_tag, #64] @ load h2l | h2h 5115e1051a39Sopenharmony_ci#ifndef __AARCH64EB__ 5116e1051a39Sopenharmony_ci ext $h2b, $h2b, $h2b, #8 5117e1051a39Sopenharmony_ci#endif 5118e1051a39Sopenharmony_ci 5119e1051a39Sopenharmony_ci aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 0 5120e1051a39Sopenharmony_ci ld1 {$rk9s}, [$cc], #16 @ load rk9 5121e1051a39Sopenharmony_ci 5122e1051a39Sopenharmony_ci aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 1 5123e1051a39Sopenharmony_ci 5124e1051a39Sopenharmony_ci aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 1 5125e1051a39Sopenharmony_ci ld1 { $acc_lb}, [$current_tag] 5126e1051a39Sopenharmony_ci ext $acc_lb, $acc_lb, $acc_lb, #8 5127e1051a39Sopenharmony_ci rev64 $acc_lb, $acc_lb 5128e1051a39Sopenharmony_ci 5129e1051a39Sopenharmony_ci aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 1 5130e1051a39Sopenharmony_ci ld1 {$rk10s}, [$cc], #16 @ load rk10 5131e1051a39Sopenharmony_ci 5132e1051a39Sopenharmony_ci aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 1 5133e1051a39Sopenharmony_ci ld1 {$rk11s}, [$cc], #16 @ load rk11 5134e1051a39Sopenharmony_ci 5135e1051a39Sopenharmony_ci aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 2 5136e1051a39Sopenharmony_ci ldr $h1q, [$current_tag, #32] @ load h1l | h1h 5137e1051a39Sopenharmony_ci#ifndef __AARCH64EB__ 5138e1051a39Sopenharmony_ci ext $h1b, $h1b, $h1b, #8 5139e1051a39Sopenharmony_ci#endif 5140e1051a39Sopenharmony_ci aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 2 5141e1051a39Sopenharmony_ci ld1 {$rk12s}, [$cc], #16 @ load rk12 5142e1051a39Sopenharmony_ci 5143e1051a39Sopenharmony_ci aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 2 5144e1051a39Sopenharmony_ci 5145e1051a39Sopenharmony_ci aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 3 5146e1051a39Sopenharmony_ci 5147e1051a39Sopenharmony_ci aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 2 5148e1051a39Sopenharmony_ci 5149e1051a39Sopenharmony_ci aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 3 5150e1051a39Sopenharmony_ci 5151e1051a39Sopenharmony_ci aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 4 5152e1051a39Sopenharmony_ci cmp $input_ptr, $main_end_input_ptr @ check if we have <= 4 blocks 5153e1051a39Sopenharmony_ci 5154e1051a39Sopenharmony_ci aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 3 5155e1051a39Sopenharmony_ci 5156e1051a39Sopenharmony_ci aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 3 5157e1051a39Sopenharmony_ci 5158e1051a39Sopenharmony_ci aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 4 5159e1051a39Sopenharmony_ci 5160e1051a39Sopenharmony_ci aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 4 5161e1051a39Sopenharmony_ci 5162e1051a39Sopenharmony_ci aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 4 5163e1051a39Sopenharmony_ci 5164e1051a39Sopenharmony_ci aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 5 5165e1051a39Sopenharmony_ci 5166e1051a39Sopenharmony_ci aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 5 5167e1051a39Sopenharmony_ci 5168e1051a39Sopenharmony_ci aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 5 5169e1051a39Sopenharmony_ci 5170e1051a39Sopenharmony_ci aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 5 5171e1051a39Sopenharmony_ci 5172e1051a39Sopenharmony_ci aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 6 5173e1051a39Sopenharmony_ci 5174e1051a39Sopenharmony_ci aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 6 5175e1051a39Sopenharmony_ci 5176e1051a39Sopenharmony_ci aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 6 5177e1051a39Sopenharmony_ci 5178e1051a39Sopenharmony_ci aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 6 5179e1051a39Sopenharmony_ci 5180e1051a39Sopenharmony_ci aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 7 5181e1051a39Sopenharmony_ci 5182e1051a39Sopenharmony_ci aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 7 5183e1051a39Sopenharmony_ci 5184e1051a39Sopenharmony_ci aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 7 5185e1051a39Sopenharmony_ci 5186e1051a39Sopenharmony_ci aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 8 5187e1051a39Sopenharmony_ci 5188e1051a39Sopenharmony_ci aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 7 5189e1051a39Sopenharmony_ci 5190e1051a39Sopenharmony_ci aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 8 5191e1051a39Sopenharmony_ci 5192e1051a39Sopenharmony_ci aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 8 5193e1051a39Sopenharmony_ci 5194e1051a39Sopenharmony_ci aese $ctr0b, $rk9 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 9 5195e1051a39Sopenharmony_ci 5196e1051a39Sopenharmony_ci aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 8 5197e1051a39Sopenharmony_ci ld1 {$rk13s}, [$cc], #16 @ load rk13 5198e1051a39Sopenharmony_ci 5199e1051a39Sopenharmony_ci aese $ctr1b, $rk9 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 9 5200e1051a39Sopenharmony_ci 5201e1051a39Sopenharmony_ci aese $ctr0b, $rk10 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 10 5202e1051a39Sopenharmony_ci 5203e1051a39Sopenharmony_ci aese $ctr3b, $rk9 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 9 5204e1051a39Sopenharmony_ci 5205e1051a39Sopenharmony_ci aese $ctr1b, $rk10 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 10 5206e1051a39Sopenharmony_ci 5207e1051a39Sopenharmony_ci aese $ctr2b, $rk9 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 9 5208e1051a39Sopenharmony_ci 5209e1051a39Sopenharmony_ci aese $ctr3b, $rk10 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 10 5210e1051a39Sopenharmony_ci 5211e1051a39Sopenharmony_ci aese $ctr0b, $rk11 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 11 5212e1051a39Sopenharmony_ci 5213e1051a39Sopenharmony_ci aese $ctr2b, $rk10 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 10 5214e1051a39Sopenharmony_ci 5215e1051a39Sopenharmony_ci aese $ctr3b, $rk11 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 11 5216e1051a39Sopenharmony_ci 5217e1051a39Sopenharmony_ci aese $ctr1b, $rk11 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 11 5218e1051a39Sopenharmony_ci 5219e1051a39Sopenharmony_ci aese $ctr2b, $rk11 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 11 5220e1051a39Sopenharmony_ci 5221e1051a39Sopenharmony_ci trn1 $acc_h.2d, $h3.2d, $h4.2d @ h4h | h3h 5222e1051a39Sopenharmony_ci 5223e1051a39Sopenharmony_ci trn2 $h34k.2d, $h3.2d, $h4.2d @ h4l | h3l 5224e1051a39Sopenharmony_ci 5225e1051a39Sopenharmony_ci trn1 $t0.2d, $h1.2d, $h2.2d @ h2h | h1h 5226e1051a39Sopenharmony_ci trn2 $h12k.2d, $h1.2d, $h2.2d @ h2l | h1l 5227e1051a39Sopenharmony_ci 5228e1051a39Sopenharmony_ci aese $ctr1b, $rk12 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 12 5229e1051a39Sopenharmony_ci 5230e1051a39Sopenharmony_ci aese $ctr0b, $rk12 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 12 5231e1051a39Sopenharmony_ci 5232e1051a39Sopenharmony_ci aese $ctr2b, $rk12 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 12 5233e1051a39Sopenharmony_ci 5234e1051a39Sopenharmony_ci aese $ctr3b, $rk12 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 12 5235e1051a39Sopenharmony_ci eor $h34k.16b, $h34k.16b, $acc_h.16b @ h4k | h3k 5236e1051a39Sopenharmony_ci 5237e1051a39Sopenharmony_ci aese $ctr1b, $rk13 @ AES block 1 - round 13 5238e1051a39Sopenharmony_ci 5239e1051a39Sopenharmony_ci aese $ctr2b, $rk13 @ AES block 2 - round 13 5240e1051a39Sopenharmony_ci eor $h12k.16b, $h12k.16b, $t0.16b @ h2k | h1k 5241e1051a39Sopenharmony_ci 5242e1051a39Sopenharmony_ci aese $ctr3b, $rk13 @ AES block 3 - round 13 5243e1051a39Sopenharmony_ci 5244e1051a39Sopenharmony_ci aese $ctr0b, $rk13 @ AES block 0 - round 13 5245e1051a39Sopenharmony_ci b.ge .L256_dec_tail @ handle tail 5246e1051a39Sopenharmony_ci 5247e1051a39Sopenharmony_ci ld1 {$res0b, $res1b}, [$input_ptr], #32 @ AES block 0,1 - load ciphertext 5248e1051a39Sopenharmony_ci 5249e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 4 5250e1051a39Sopenharmony_ci 5251e1051a39Sopenharmony_ci eor $ctr0b, $res0b, $ctr0b @ AES block 0 - result 5252e1051a39Sopenharmony_ci 5253e1051a39Sopenharmony_ci eor $ctr1b, $res1b, $ctr1b @ AES block 1 - result 5254e1051a39Sopenharmony_ci rev64 $res1b, $res1b @ GHASH block 1 5255e1051a39Sopenharmony_ci ld1 {$res2b}, [$input_ptr], #16 @ AES block 2 - load ciphertext 5256e1051a39Sopenharmony_ci 5257e1051a39Sopenharmony_ci mov $output_h0, $ctr0.d[1] @ AES block 0 - mov high 5258e1051a39Sopenharmony_ci 5259e1051a39Sopenharmony_ci mov $output_l0, $ctr0.d[0] @ AES block 0 - mov low 5260e1051a39Sopenharmony_ci rev64 $res0b, $res0b @ GHASH block 0 5261e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 4 5262e1051a39Sopenharmony_ci 5263e1051a39Sopenharmony_ci fmov $ctr0d, $ctr96_b64x @ CTR block 4 5264e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 4 5265e1051a39Sopenharmony_ci 5266e1051a39Sopenharmony_ci fmov $ctr0.d[1], $ctr32x @ CTR block 4 5267e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 5 5268e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 5 5269e1051a39Sopenharmony_ci 5270e1051a39Sopenharmony_ci mov $output_l1, $ctr1.d[0] @ AES block 1 - mov low 5271e1051a39Sopenharmony_ci 5272e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 5 5273e1051a39Sopenharmony_ci mov $output_h1, $ctr1.d[1] @ AES block 1 - mov high 5274e1051a39Sopenharmony_ci eor $output_h0, $output_h0, $rk14_h @ AES block 0 - round 14 high 5275e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 5276e1051a39Sopenharmony_ci rev $output_h0, $output_h0 5277e1051a39Sopenharmony_ci#endif 5278e1051a39Sopenharmony_ci eor $output_l0, $output_l0, $rk14_l @ AES block 0 - round 14 low 5279e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 5280e1051a39Sopenharmony_ci rev $output_l0, $output_l0 5281e1051a39Sopenharmony_ci#endif 5282e1051a39Sopenharmony_ci stp $output_l0, $output_h0, [$output_ptr], #16 @ AES block 0 - store result 5283e1051a39Sopenharmony_ci fmov $ctr1d, $ctr96_b64x @ CTR block 5 5284e1051a39Sopenharmony_ci 5285e1051a39Sopenharmony_ci ld1 {$res3b}, [$input_ptr], #16 @ AES block 3 - load ciphertext 5286e1051a39Sopenharmony_ci 5287e1051a39Sopenharmony_ci fmov $ctr1.d[1], $ctr32x @ CTR block 5 5288e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 6 5289e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 6 5290e1051a39Sopenharmony_ci 5291e1051a39Sopenharmony_ci eor $output_l1, $output_l1, $rk14_l @ AES block 1 - round 14 low 5292e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 5293e1051a39Sopenharmony_ci rev $output_l1, $output_l1 5294e1051a39Sopenharmony_ci#endif 5295e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 6 5296e1051a39Sopenharmony_ci 5297e1051a39Sopenharmony_ci eor $output_h1, $output_h1, $rk14_h @ AES block 1 - round 14 high 5298e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 5299e1051a39Sopenharmony_ci rev $output_h1, $output_h1 5300e1051a39Sopenharmony_ci#endif 5301e1051a39Sopenharmony_ci stp $output_l1, $output_h1, [$output_ptr], #16 @ AES block 1 - store result 5302e1051a39Sopenharmony_ci 5303e1051a39Sopenharmony_ci eor $ctr2b, $res2b, $ctr2b @ AES block 2 - result 5304e1051a39Sopenharmony_ci cmp $input_ptr, $main_end_input_ptr @ check if we have <= 8 blocks 5305e1051a39Sopenharmony_ci b.ge .L256_dec_prepretail @ do prepretail 5306e1051a39Sopenharmony_ci 5307e1051a39Sopenharmony_ci .L256_dec_main_loop: @ main loop start 5308e1051a39Sopenharmony_ci mov $output_l2, $ctr2.d[0] @ AES block 4k+2 - mov low 5309e1051a39Sopenharmony_ci ext $acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0 5310e1051a39Sopenharmony_ci eor $ctr3b, $res3b, $ctr3b @ AES block 4k+3 - result 5311e1051a39Sopenharmony_ci 5312e1051a39Sopenharmony_ci aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 0 5313e1051a39Sopenharmony_ci mov $output_h2, $ctr2.d[1] @ AES block 4k+2 - mov high 5314e1051a39Sopenharmony_ci 5315e1051a39Sopenharmony_ci aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 0 5316e1051a39Sopenharmony_ci fmov $ctr2d, $ctr96_b64x @ CTR block 4k+6 5317e1051a39Sopenharmony_ci 5318e1051a39Sopenharmony_ci fmov $ctr2.d[1], $ctr32x @ CTR block 4k+6 5319e1051a39Sopenharmony_ci eor $res0b, $res0b, $acc_lb @ PRE 1 5320e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 4k+7 5321e1051a39Sopenharmony_ci 5322e1051a39Sopenharmony_ci aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 1 5323e1051a39Sopenharmony_ci mov $output_h3, $ctr3.d[1] @ AES block 4k+3 - mov high 5324e1051a39Sopenharmony_ci 5325e1051a39Sopenharmony_ci aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 1 5326e1051a39Sopenharmony_ci mov $output_l3, $ctr3.d[0] @ AES block 4k+3 - mov low 5327e1051a39Sopenharmony_ci 5328e1051a39Sopenharmony_ci pmull2 $acc_h.1q, $res0.2d, $h4.2d @ GHASH block 4k - high 5329e1051a39Sopenharmony_ci mov $t0d, $res0.d[1] @ GHASH block 4k - mid 5330e1051a39Sopenharmony_ci fmov $ctr3d, $ctr96_b64x @ CTR block 4k+7 5331e1051a39Sopenharmony_ci 5332e1051a39Sopenharmony_ci aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 2 5333e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 4k+7 5334e1051a39Sopenharmony_ci 5335e1051a39Sopenharmony_ci aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 0 5336e1051a39Sopenharmony_ci fmov $ctr3.d[1], $ctr32x @ CTR block 4k+7 5337e1051a39Sopenharmony_ci 5338e1051a39Sopenharmony_ci aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 2 5339e1051a39Sopenharmony_ci eor $t0.8b, $t0.8b, $res0.8b @ GHASH block 4k - mid 5340e1051a39Sopenharmony_ci 5341e1051a39Sopenharmony_ci aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 3 5342e1051a39Sopenharmony_ci eor $output_h2, $output_h2, $rk14_h @ AES block 4k+2 - round 14 high 5343e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 5344e1051a39Sopenharmony_ci rev $output_h2, $output_h2 5345e1051a39Sopenharmony_ci#endif 5346e1051a39Sopenharmony_ci aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 1 5347e1051a39Sopenharmony_ci mov $acc_md, $h34k.d[1] @ GHASH block 4k - mid 5348e1051a39Sopenharmony_ci 5349e1051a39Sopenharmony_ci aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 3 5350e1051a39Sopenharmony_ci rev64 $res2b, $res2b @ GHASH block 4k+2 5351e1051a39Sopenharmony_ci 5352e1051a39Sopenharmony_ci aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 0 5353e1051a39Sopenharmony_ci eor $output_l2, $output_l2, $rk14_l @ AES block 4k+2 - round 14 low 5354e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 5355e1051a39Sopenharmony_ci rev $output_l2, $output_l2 5356e1051a39Sopenharmony_ci#endif 5357e1051a39Sopenharmony_ci aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 2 5358e1051a39Sopenharmony_ci stp $output_l2, $output_h2, [$output_ptr], #16 @ AES block 4k+2 - store result 5359e1051a39Sopenharmony_ci 5360e1051a39Sopenharmony_ci pmull $acc_l.1q, $res0.1d, $h4.1d @ GHASH block 4k - low 5361e1051a39Sopenharmony_ci 5362e1051a39Sopenharmony_ci pmull2 $t1.1q, $res1.2d, $h3.2d @ GHASH block 4k+1 - high 5363e1051a39Sopenharmony_ci 5364e1051a39Sopenharmony_ci aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 3 5365e1051a39Sopenharmony_ci rev64 $res3b, $res3b @ GHASH block 4k+3 5366e1051a39Sopenharmony_ci 5367e1051a39Sopenharmony_ci pmull $acc_m.1q, $t0.1d, $acc_m.1d @ GHASH block 4k - mid 5368e1051a39Sopenharmony_ci eor $output_l3, $output_l3, $rk14_l @ AES block 4k+3 - round 14 low 5369e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 5370e1051a39Sopenharmony_ci rev $output_l3, $output_l3 5371e1051a39Sopenharmony_ci#endif 5372e1051a39Sopenharmony_ci pmull $t2.1q, $res1.1d, $h3.1d @ GHASH block 4k+1 - low 5373e1051a39Sopenharmony_ci eor $output_h3, $output_h3, $rk14_h @ AES block 4k+3 - round 14 high 5374e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 5375e1051a39Sopenharmony_ci rev $output_h3, $output_h3 5376e1051a39Sopenharmony_ci#endif 5377e1051a39Sopenharmony_ci eor $acc_hb, $acc_hb, $t1.16b @ GHASH block 4k+1 - high 5378e1051a39Sopenharmony_ci 5379e1051a39Sopenharmony_ci aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 4 5380e1051a39Sopenharmony_ci 5381e1051a39Sopenharmony_ci aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 1 5382e1051a39Sopenharmony_ci mov $t3d, $res1.d[1] @ GHASH block 4k+1 - mid 5383e1051a39Sopenharmony_ci 5384e1051a39Sopenharmony_ci aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 4 5385e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $t2.16b @ GHASH block 4k+1 - low 5386e1051a39Sopenharmony_ci 5387e1051a39Sopenharmony_ci aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 5 5388e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 4k+7 5389e1051a39Sopenharmony_ci 5390e1051a39Sopenharmony_ci aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 2 5391e1051a39Sopenharmony_ci mov $t6d, $res2.d[1] @ GHASH block 4k+2 - mid 5392e1051a39Sopenharmony_ci 5393e1051a39Sopenharmony_ci aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 4 5394e1051a39Sopenharmony_ci eor $t3.8b, $t3.8b, $res1.8b @ GHASH block 4k+1 - mid 5395e1051a39Sopenharmony_ci 5396e1051a39Sopenharmony_ci pmull $t5.1q, $res2.1d, $h2.1d @ GHASH block 4k+2 - low 5397e1051a39Sopenharmony_ci 5398e1051a39Sopenharmony_ci aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 3 5399e1051a39Sopenharmony_ci eor $t6.8b, $t6.8b, $res2.8b @ GHASH block 4k+2 - mid 5400e1051a39Sopenharmony_ci 5401e1051a39Sopenharmony_ci aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 5 5402e1051a39Sopenharmony_ci 5403e1051a39Sopenharmony_ci aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 5 5404e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $t5.16b @ GHASH block 4k+2 - low 5405e1051a39Sopenharmony_ci 5406e1051a39Sopenharmony_ci pmull $t3.1q, $t3.1d, $h34k.1d @ GHASH block 4k+1 - mid 5407e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 4k+8 5408e1051a39Sopenharmony_ci 5409e1051a39Sopenharmony_ci aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 6 5410e1051a39Sopenharmony_ci ins $t6.d[1], $t6.d[0] @ GHASH block 4k+2 - mid 5411e1051a39Sopenharmony_ci 5412e1051a39Sopenharmony_ci aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 6 5413e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 4k+8 5414e1051a39Sopenharmony_ci 5415e1051a39Sopenharmony_ci aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 4 5416e1051a39Sopenharmony_ci 5417e1051a39Sopenharmony_ci aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 7 5418e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t3.16b @ GHASH block 4k+1 - mid 5419e1051a39Sopenharmony_ci 5420e1051a39Sopenharmony_ci aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 7 5421e1051a39Sopenharmony_ci 5422e1051a39Sopenharmony_ci pmull2 $t4.1q, $res2.2d, $h2.2d @ GHASH block 4k+2 - high 5423e1051a39Sopenharmony_ci mov $t9d, $res3.d[1] @ GHASH block 4k+3 - mid 5424e1051a39Sopenharmony_ci 5425e1051a39Sopenharmony_ci aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 5 5426e1051a39Sopenharmony_ci 5427e1051a39Sopenharmony_ci pmull2 $t6.1q, $t6.2d, $h12k.2d @ GHASH block 4k+2 - mid 5428e1051a39Sopenharmony_ci 5429e1051a39Sopenharmony_ci aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 8 5430e1051a39Sopenharmony_ci eor $acc_hb, $acc_hb, $t4.16b @ GHASH block 4k+2 - high 5431e1051a39Sopenharmony_ci 5432e1051a39Sopenharmony_ci aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 6 5433e1051a39Sopenharmony_ci 5434e1051a39Sopenharmony_ci pmull $t8.1q, $res3.1d, $h1.1d @ GHASH block 4k+3 - low 5435e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 4k+8 5436e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t6.16b @ GHASH block 4k+2 - mid 5437e1051a39Sopenharmony_ci 5438e1051a39Sopenharmony_ci pmull2 $t7.1q, $res3.2d, $h1.2d @ GHASH block 4k+3 - high 5439e1051a39Sopenharmony_ci 5440e1051a39Sopenharmony_ci aese $ctr0b, $rk9 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 9 5441e1051a39Sopenharmony_ci eor $t9.8b, $t9.8b, $res3.8b @ GHASH block 4k+3 - mid 5442e1051a39Sopenharmony_ci 5443e1051a39Sopenharmony_ci aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 8 5444e1051a39Sopenharmony_ci 5445e1051a39Sopenharmony_ci aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 6 5446e1051a39Sopenharmony_ci eor $acc_hb, $acc_hb, $t7.16b @ GHASH block 4k+3 - high 5447e1051a39Sopenharmony_ci 5448e1051a39Sopenharmony_ci aese $ctr0b, $rk10 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 10 5449e1051a39Sopenharmony_ci 5450e1051a39Sopenharmony_ci pmull $t9.1q, $t9.1d, $h12k.1d @ GHASH block 4k+3 - mid 5451e1051a39Sopenharmony_ci movi $mod_constant.8b, #0xc2 5452e1051a39Sopenharmony_ci 5453e1051a39Sopenharmony_ci aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 7 5454e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $t8.16b @ GHASH block 4k+3 - low 5455e1051a39Sopenharmony_ci 5456e1051a39Sopenharmony_ci aese $ctr0b, $rk11 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 11 5457e1051a39Sopenharmony_ci 5458e1051a39Sopenharmony_ci aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 7 5459e1051a39Sopenharmony_ci shl $mod_constantd, $mod_constantd, #56 @ mod_constant 5460e1051a39Sopenharmony_ci 5461e1051a39Sopenharmony_ci aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 8 5462e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t9.16b @ GHASH block 4k+3 - mid 5463e1051a39Sopenharmony_ci 5464e1051a39Sopenharmony_ci aese $ctr0b, $rk12 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 12 5465e1051a39Sopenharmony_ci 5466e1051a39Sopenharmony_ci pmull $mod_t.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid 5467e1051a39Sopenharmony_ci eor $t9.16b, $acc_lb, $acc_hb @ MODULO - karatsuba tidy up 5468e1051a39Sopenharmony_ci 5469e1051a39Sopenharmony_ci aese $ctr1b, $rk9 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 9 5470e1051a39Sopenharmony_ci ld1 {$res0b}, [$input_ptr], #16 @ AES block 4k+4 - load ciphertext 5471e1051a39Sopenharmony_ci 5472e1051a39Sopenharmony_ci aese $ctr0b, $rk13 @ AES block 4k+4 - round 13 5473e1051a39Sopenharmony_ci ext $acc_hb, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment 5474e1051a39Sopenharmony_ci 5475e1051a39Sopenharmony_ci aese $ctr1b, $rk10 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 10 5476e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t9.16b @ MODULO - karatsuba tidy up 5477e1051a39Sopenharmony_ci 5478e1051a39Sopenharmony_ci aese $ctr2b, $rk9 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 9 5479e1051a39Sopenharmony_ci ld1 {$res1b}, [$input_ptr], #16 @ AES block 4k+5 - load ciphertext 5480e1051a39Sopenharmony_ci 5481e1051a39Sopenharmony_ci aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 8 5482e1051a39Sopenharmony_ci eor $ctr0b, $res0b, $ctr0b @ AES block 4k+4 - result 5483e1051a39Sopenharmony_ci 5484e1051a39Sopenharmony_ci aese $ctr1b, $rk11 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 11 5485e1051a39Sopenharmony_ci stp $output_l3, $output_h3, [$output_ptr], #16 @ AES block 4k+3 - store result 5486e1051a39Sopenharmony_ci 5487e1051a39Sopenharmony_ci aese $ctr2b, $rk10 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 10 5488e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $mod_t.16b @ MODULO - fold into mid 5489e1051a39Sopenharmony_ci 5490e1051a39Sopenharmony_ci aese $ctr3b, $rk9 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 9 5491e1051a39Sopenharmony_ci ld1 {$res2b}, [$input_ptr], #16 @ AES block 4k+6 - load ciphertext 5492e1051a39Sopenharmony_ci 5493e1051a39Sopenharmony_ci aese $ctr1b, $rk12 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 12 5494e1051a39Sopenharmony_ci ld1 {$res3b}, [$input_ptr], #16 @ AES block 4k+7 - load ciphertext 5495e1051a39Sopenharmony_ci 5496e1051a39Sopenharmony_ci aese $ctr2b, $rk11 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 11 5497e1051a39Sopenharmony_ci mov $output_h0, $ctr0.d[1] @ AES block 4k+4 - mov high 5498e1051a39Sopenharmony_ci 5499e1051a39Sopenharmony_ci aese $ctr3b, $rk10 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 10 5500e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $acc_hb @ MODULO - fold into mid 5501e1051a39Sopenharmony_ci 5502e1051a39Sopenharmony_ci aese $ctr1b, $rk13 @ AES block 4k+5 - round 13 5503e1051a39Sopenharmony_ci mov $output_l0, $ctr0.d[0] @ AES block 4k+4 - mov low 5504e1051a39Sopenharmony_ci 5505e1051a39Sopenharmony_ci aese $ctr2b, $rk12 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 12 5506e1051a39Sopenharmony_ci fmov $ctr0d, $ctr96_b64x @ CTR block 4k+8 5507e1051a39Sopenharmony_ci 5508e1051a39Sopenharmony_ci aese $ctr3b, $rk11 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 11 5509e1051a39Sopenharmony_ci fmov $ctr0.d[1], $ctr32x @ CTR block 4k+8 5510e1051a39Sopenharmony_ci 5511e1051a39Sopenharmony_ci pmull $mod_constant.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low 5512e1051a39Sopenharmony_ci eor $ctr1b, $res1b, $ctr1b @ AES block 4k+5 - result 5513e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 4k+9 5514e1051a39Sopenharmony_ci 5515e1051a39Sopenharmony_ci aese $ctr2b, $rk13 @ AES block 4k+6 - round 13 5516e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 4k+9 5517e1051a39Sopenharmony_ci cmp $input_ptr, $main_end_input_ptr @ LOOP CONTROL 5518e1051a39Sopenharmony_ci 5519e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 4k+9 5520e1051a39Sopenharmony_ci 5521e1051a39Sopenharmony_ci eor $output_l0, $output_l0, $rk14_l @ AES block 4k+4 - round 14 low 5522e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 5523e1051a39Sopenharmony_ci rev $output_l0, $output_l0 5524e1051a39Sopenharmony_ci#endif 5525e1051a39Sopenharmony_ci eor $output_h0, $output_h0, $rk14_h @ AES block 4k+4 - round 14 high 5526e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 5527e1051a39Sopenharmony_ci rev $output_h0, $output_h0 5528e1051a39Sopenharmony_ci#endif 5529e1051a39Sopenharmony_ci mov $output_h1, $ctr1.d[1] @ AES block 4k+5 - mov high 5530e1051a39Sopenharmony_ci eor $ctr2b, $res2b, $ctr2b @ AES block 4k+6 - result 5531e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $mod_constant.16b @ MODULO - fold into low 5532e1051a39Sopenharmony_ci 5533e1051a39Sopenharmony_ci aese $ctr3b, $rk12 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 12 5534e1051a39Sopenharmony_ci mov $output_l1, $ctr1.d[0] @ AES block 4k+5 - mov low 5535e1051a39Sopenharmony_ci 5536e1051a39Sopenharmony_ci fmov $ctr1d, $ctr96_b64x @ CTR block 4k+9 5537e1051a39Sopenharmony_ci ext $acc_mb, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment 5538e1051a39Sopenharmony_ci 5539e1051a39Sopenharmony_ci fmov $ctr1.d[1], $ctr32x @ CTR block 4k+9 5540e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 4k+10 5541e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 4k+10 5542e1051a39Sopenharmony_ci 5543e1051a39Sopenharmony_ci aese $ctr3b, $rk13 @ AES block 4k+7 - round 13 5544e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 4k+10 5545e1051a39Sopenharmony_ci 5546e1051a39Sopenharmony_ci rev64 $res1b, $res1b @ GHASH block 4k+5 5547e1051a39Sopenharmony_ci eor $output_h1, $output_h1, $rk14_h @ AES block 4k+5 - round 14 high 5548e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 5549e1051a39Sopenharmony_ci rev $output_h1, $output_h1 5550e1051a39Sopenharmony_ci#endif 5551e1051a39Sopenharmony_ci stp $output_l0, $output_h0, [$output_ptr], #16 @ AES block 4k+4 - store result 5552e1051a39Sopenharmony_ci 5553e1051a39Sopenharmony_ci eor $output_l1, $output_l1, $rk14_l @ AES block 4k+5 - round 14 low 5554e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 5555e1051a39Sopenharmony_ci rev $output_l1, $output_l1 5556e1051a39Sopenharmony_ci#endif 5557e1051a39Sopenharmony_ci stp $output_l1, $output_h1, [$output_ptr], #16 @ AES block 4k+5 - store result 5558e1051a39Sopenharmony_ci 5559e1051a39Sopenharmony_ci rev64 $res0b, $res0b @ GHASH block 4k+4 5560e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $acc_mb @ MODULO - fold into low 5561e1051a39Sopenharmony_ci b.lt .L256_dec_main_loop 5562e1051a39Sopenharmony_ci 5563e1051a39Sopenharmony_ci 5564e1051a39Sopenharmony_ci .L256_dec_prepretail: @ PREPRETAIL 5565e1051a39Sopenharmony_ci ext $acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0 5566e1051a39Sopenharmony_ci mov $output_l2, $ctr2.d[0] @ AES block 4k+2 - mov low 5567e1051a39Sopenharmony_ci eor $ctr3b, $res3b, $ctr3b @ AES block 4k+3 - result 5568e1051a39Sopenharmony_ci 5569e1051a39Sopenharmony_ci aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 0 5570e1051a39Sopenharmony_ci mov $output_h2, $ctr2.d[1] @ AES block 4k+2 - mov high 5571e1051a39Sopenharmony_ci 5572e1051a39Sopenharmony_ci aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 0 5573e1051a39Sopenharmony_ci fmov $ctr2d, $ctr96_b64x @ CTR block 4k+6 5574e1051a39Sopenharmony_ci 5575e1051a39Sopenharmony_ci fmov $ctr2.d[1], $ctr32x @ CTR block 4k+6 5576e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w @ CTR block 4k+7 5577e1051a39Sopenharmony_ci eor $res0b, $res0b, $acc_lb @ PRE 1 5578e1051a39Sopenharmony_ci 5579e1051a39Sopenharmony_ci rev64 $res2b, $res2b @ GHASH block 4k+2 5580e1051a39Sopenharmony_ci orr $ctr32x, $ctr96_t32x, $ctr32x, lsl #32 @ CTR block 4k+7 5581e1051a39Sopenharmony_ci mov $output_l3, $ctr3.d[0] @ AES block 4k+3 - mov low 5582e1051a39Sopenharmony_ci 5583e1051a39Sopenharmony_ci aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 1 5584e1051a39Sopenharmony_ci mov $output_h3, $ctr3.d[1] @ AES block 4k+3 - mov high 5585e1051a39Sopenharmony_ci 5586e1051a39Sopenharmony_ci pmull $acc_l.1q, $res0.1d, $h4.1d @ GHASH block 4k - low 5587e1051a39Sopenharmony_ci mov $t0d, $res0.d[1] @ GHASH block 4k - mid 5588e1051a39Sopenharmony_ci fmov $ctr3d, $ctr96_b64x @ CTR block 4k+7 5589e1051a39Sopenharmony_ci 5590e1051a39Sopenharmony_ci pmull2 $acc_h.1q, $res0.2d, $h4.2d @ GHASH block 4k - high 5591e1051a39Sopenharmony_ci fmov $ctr3.d[1], $ctr32x @ CTR block 4k+7 5592e1051a39Sopenharmony_ci 5593e1051a39Sopenharmony_ci aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 0 5594e1051a39Sopenharmony_ci mov $acc_md, $h34k.d[1] @ GHASH block 4k - mid 5595e1051a39Sopenharmony_ci 5596e1051a39Sopenharmony_ci aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 1 5597e1051a39Sopenharmony_ci eor $t0.8b, $t0.8b, $res0.8b @ GHASH block 4k - mid 5598e1051a39Sopenharmony_ci 5599e1051a39Sopenharmony_ci pmull2 $t1.1q, $res1.2d, $h3.2d @ GHASH block 4k+1 - high 5600e1051a39Sopenharmony_ci 5601e1051a39Sopenharmony_ci aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 1 5602e1051a39Sopenharmony_ci rev64 $res3b, $res3b @ GHASH block 4k+3 5603e1051a39Sopenharmony_ci 5604e1051a39Sopenharmony_ci aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 0 5605e1051a39Sopenharmony_ci 5606e1051a39Sopenharmony_ci pmull $acc_m.1q, $t0.1d, $acc_m.1d @ GHASH block 4k - mid 5607e1051a39Sopenharmony_ci eor $acc_hb, $acc_hb, $t1.16b @ GHASH block 4k+1 - high 5608e1051a39Sopenharmony_ci 5609e1051a39Sopenharmony_ci pmull $t2.1q, $res1.1d, $h3.1d @ GHASH block 4k+1 - low 5610e1051a39Sopenharmony_ci 5611e1051a39Sopenharmony_ci aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 1 5612e1051a39Sopenharmony_ci mov $t3d, $res1.d[1] @ GHASH block 4k+1 - mid 5613e1051a39Sopenharmony_ci 5614e1051a39Sopenharmony_ci aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 2 5615e1051a39Sopenharmony_ci 5616e1051a39Sopenharmony_ci aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 2 5617e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $t2.16b @ GHASH block 4k+1 - low 5618e1051a39Sopenharmony_ci 5619e1051a39Sopenharmony_ci aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 2 5620e1051a39Sopenharmony_ci 5621e1051a39Sopenharmony_ci aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 3 5622e1051a39Sopenharmony_ci mov $t6d, $res2.d[1] @ GHASH block 4k+2 - mid 5623e1051a39Sopenharmony_ci 5624e1051a39Sopenharmony_ci aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 2 5625e1051a39Sopenharmony_ci eor $t3.8b, $t3.8b, $res1.8b @ GHASH block 4k+1 - mid 5626e1051a39Sopenharmony_ci 5627e1051a39Sopenharmony_ci pmull $t5.1q, $res2.1d, $h2.1d @ GHASH block 4k+2 - low 5628e1051a39Sopenharmony_ci 5629e1051a39Sopenharmony_ci aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 4 5630e1051a39Sopenharmony_ci 5631e1051a39Sopenharmony_ci aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 3 5632e1051a39Sopenharmony_ci eor $t6.8b, $t6.8b, $res2.8b @ GHASH block 4k+2 - mid 5633e1051a39Sopenharmony_ci 5634e1051a39Sopenharmony_ci pmull $t3.1q, $t3.1d, $h34k.1d @ GHASH block 4k+1 - mid 5635e1051a39Sopenharmony_ci 5636e1051a39Sopenharmony_ci aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 5 5637e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $t5.16b @ GHASH block 4k+2 - low 5638e1051a39Sopenharmony_ci 5639e1051a39Sopenharmony_ci aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 4 5640e1051a39Sopenharmony_ci 5641e1051a39Sopenharmony_ci pmull2 $t7.1q, $res3.2d, $h1.2d @ GHASH block 4k+3 - high 5642e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t3.16b @ GHASH block 4k+1 - mid 5643e1051a39Sopenharmony_ci 5644e1051a39Sopenharmony_ci pmull2 $t4.1q, $res2.2d, $h2.2d @ GHASH block 4k+2 - high 5645e1051a39Sopenharmony_ci 5646e1051a39Sopenharmony_ci aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 5 5647e1051a39Sopenharmony_ci ins $t6.d[1], $t6.d[0] @ GHASH block 4k+2 - mid 5648e1051a39Sopenharmony_ci 5649e1051a39Sopenharmony_ci aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 3 5650e1051a39Sopenharmony_ci 5651e1051a39Sopenharmony_ci aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 3 5652e1051a39Sopenharmony_ci eor $acc_hb, $acc_hb, $t4.16b @ GHASH block 4k+2 - high 5653e1051a39Sopenharmony_ci 5654e1051a39Sopenharmony_ci pmull $t8.1q, $res3.1d, $h1.1d @ GHASH block 4k+3 - low 5655e1051a39Sopenharmony_ci 5656e1051a39Sopenharmony_ci aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 4 5657e1051a39Sopenharmony_ci mov $t9d, $res3.d[1] @ GHASH block 4k+3 - mid 5658e1051a39Sopenharmony_ci 5659e1051a39Sopenharmony_ci aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 4 5660e1051a39Sopenharmony_ci 5661e1051a39Sopenharmony_ci pmull2 $t6.1q, $t6.2d, $h12k.2d @ GHASH block 4k+2 - mid 5662e1051a39Sopenharmony_ci 5663e1051a39Sopenharmony_ci aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 5 5664e1051a39Sopenharmony_ci eor $t9.8b, $t9.8b, $res3.8b @ GHASH block 4k+3 - mid 5665e1051a39Sopenharmony_ci 5666e1051a39Sopenharmony_ci aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 5 5667e1051a39Sopenharmony_ci 5668e1051a39Sopenharmony_ci aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 6 5669e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t6.16b @ GHASH block 4k+2 - mid 5670e1051a39Sopenharmony_ci 5671e1051a39Sopenharmony_ci aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 6 5672e1051a39Sopenharmony_ci 5673e1051a39Sopenharmony_ci aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 6 5674e1051a39Sopenharmony_ci movi $mod_constant.8b, #0xc2 5675e1051a39Sopenharmony_ci 5676e1051a39Sopenharmony_ci aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 6 5677e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $t8.16b @ GHASH block 4k+3 - low 5678e1051a39Sopenharmony_ci 5679e1051a39Sopenharmony_ci pmull $t9.1q, $t9.1d, $h12k.1d @ GHASH block 4k+3 - mid 5680e1051a39Sopenharmony_ci 5681e1051a39Sopenharmony_ci aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 7 5682e1051a39Sopenharmony_ci eor $acc_hb, $acc_hb, $t7.16b @ GHASH block 4k+3 - high 5683e1051a39Sopenharmony_ci 5684e1051a39Sopenharmony_ci aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 7 5685e1051a39Sopenharmony_ci 5686e1051a39Sopenharmony_ci aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 7 5687e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t9.16b @ GHASH block 4k+3 - mid 5688e1051a39Sopenharmony_ci 5689e1051a39Sopenharmony_ci aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 8 5690e1051a39Sopenharmony_ci 5691e1051a39Sopenharmony_ci aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 7 5692e1051a39Sopenharmony_ci eor $t9.16b, $acc_lb, $acc_hb @ MODULO - karatsuba tidy up 5693e1051a39Sopenharmony_ci 5694e1051a39Sopenharmony_ci aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 8 5695e1051a39Sopenharmony_ci 5696e1051a39Sopenharmony_ci aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 8 5697e1051a39Sopenharmony_ci shl $mod_constantd, $mod_constantd, #56 @ mod_constant 5698e1051a39Sopenharmony_ci 5699e1051a39Sopenharmony_ci aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 8 5700e1051a39Sopenharmony_ci 5701e1051a39Sopenharmony_ci aese $ctr1b, $rk9 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 9 5702e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t9.16b @ MODULO - karatsuba tidy up 5703e1051a39Sopenharmony_ci 5704e1051a39Sopenharmony_ci pmull $mod_t.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid 5705e1051a39Sopenharmony_ci 5706e1051a39Sopenharmony_ci aese $ctr2b, $rk9 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 9 5707e1051a39Sopenharmony_ci ext $acc_hb, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment 5708e1051a39Sopenharmony_ci 5709e1051a39Sopenharmony_ci aese $ctr3b, $rk9 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 9 5710e1051a39Sopenharmony_ci 5711e1051a39Sopenharmony_ci aese $ctr0b, $rk9 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 9 5712e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $mod_t.16b @ MODULO - fold into mid 5713e1051a39Sopenharmony_ci 5714e1051a39Sopenharmony_ci aese $ctr2b, $rk10 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 10 5715e1051a39Sopenharmony_ci 5716e1051a39Sopenharmony_ci aese $ctr3b, $rk10 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 10 5717e1051a39Sopenharmony_ci 5718e1051a39Sopenharmony_ci aese $ctr0b, $rk10 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 10 5719e1051a39Sopenharmony_ci eor $output_h2, $output_h2, $rk14_h @ AES block 4k+2 - round 14 high 5720e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 5721e1051a39Sopenharmony_ci rev $output_h2, $output_h2 5722e1051a39Sopenharmony_ci#endif 5723e1051a39Sopenharmony_ci aese $ctr1b, $rk10 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 10 5724e1051a39Sopenharmony_ci eor $output_l3, $output_l3, $rk14_l @ AES block 4k+3 - round 14 low 5725e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 5726e1051a39Sopenharmony_ci rev $output_l3, $output_l3 5727e1051a39Sopenharmony_ci#endif 5728e1051a39Sopenharmony_ci aese $ctr2b, $rk11 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 11 5729e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $acc_hb @ MODULO - fold into mid 5730e1051a39Sopenharmony_ci 5731e1051a39Sopenharmony_ci aese $ctr0b, $rk11 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 11 5732e1051a39Sopenharmony_ci add $rctr32w, $rctr32w, #1 @ CTR block 4k+7 5733e1051a39Sopenharmony_ci 5734e1051a39Sopenharmony_ci aese $ctr1b, $rk11 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 11 5735e1051a39Sopenharmony_ci eor $output_l2, $output_l2, $rk14_l @ AES block 4k+2 - round 14 low 5736e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 5737e1051a39Sopenharmony_ci rev $output_l2, $output_l2 5738e1051a39Sopenharmony_ci#endif 5739e1051a39Sopenharmony_ci 5740e1051a39Sopenharmony_ci aese $ctr2b, $rk12 \n aesmc $ctr2b, $ctr2b @ AES block 4k+6 - round 12 5741e1051a39Sopenharmony_ci 5742e1051a39Sopenharmony_ci pmull $mod_constant.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low 5743e1051a39Sopenharmony_ci eor $output_h3, $output_h3, $rk14_h @ AES block 4k+3 - round 14 high 5744e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 5745e1051a39Sopenharmony_ci rev $output_h3, $output_h3 5746e1051a39Sopenharmony_ci#endif 5747e1051a39Sopenharmony_ci 5748e1051a39Sopenharmony_ci aese $ctr3b, $rk11 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 11 5749e1051a39Sopenharmony_ci stp $output_l2, $output_h2, [$output_ptr], #16 @ AES block 4k+2 - store result 5750e1051a39Sopenharmony_ci 5751e1051a39Sopenharmony_ci aese $ctr1b, $rk12 \n aesmc $ctr1b, $ctr1b @ AES block 4k+5 - round 12 5752e1051a39Sopenharmony_ci ext $acc_mb, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment 5753e1051a39Sopenharmony_ci 5754e1051a39Sopenharmony_ci aese $ctr0b, $rk12 \n aesmc $ctr0b, $ctr0b @ AES block 4k+4 - round 12 5755e1051a39Sopenharmony_ci stp $output_l3, $output_h3, [$output_ptr], #16 @ AES block 4k+3 - store result 5756e1051a39Sopenharmony_ci 5757e1051a39Sopenharmony_ci aese $ctr3b, $rk12 \n aesmc $ctr3b, $ctr3b @ AES block 4k+7 - round 12 5758e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $mod_constant.16b @ MODULO - fold into low 5759e1051a39Sopenharmony_ci 5760e1051a39Sopenharmony_ci aese $ctr1b, $rk13 @ AES block 4k+5 - round 13 5761e1051a39Sopenharmony_ci 5762e1051a39Sopenharmony_ci aese $ctr0b, $rk13 @ AES block 4k+4 - round 13 5763e1051a39Sopenharmony_ci 5764e1051a39Sopenharmony_ci aese $ctr3b, $rk13 @ AES block 4k+7 - round 13 5765e1051a39Sopenharmony_ci 5766e1051a39Sopenharmony_ci aese $ctr2b, $rk13 @ AES block 4k+6 - round 13 5767e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $acc_mb @ MODULO - fold into low 5768e1051a39Sopenharmony_ci .L256_dec_tail: @ TAIL 5769e1051a39Sopenharmony_ci 5770e1051a39Sopenharmony_ci sub $main_end_input_ptr, $end_input_ptr, $input_ptr @ main_end_input_ptr is number of bytes left to process 5771e1051a39Sopenharmony_ci ld1 { $res1b}, [$input_ptr], #16 @ AES block 4k+4 - load ciphertext 5772e1051a39Sopenharmony_ci 5773e1051a39Sopenharmony_ci eor $ctr0b, $res1b, $ctr0b @ AES block 4k+4 - result 5774e1051a39Sopenharmony_ci 5775e1051a39Sopenharmony_ci mov $output_l0, $ctr0.d[0] @ AES block 4k+4 - mov low 5776e1051a39Sopenharmony_ci 5777e1051a39Sopenharmony_ci mov $output_h0, $ctr0.d[1] @ AES block 4k+4 - mov high 5778e1051a39Sopenharmony_ci ext $t0.16b, $acc_lb, $acc_lb, #8 @ prepare final partial tag 5779e1051a39Sopenharmony_ci 5780e1051a39Sopenharmony_ci cmp $main_end_input_ptr, #48 5781e1051a39Sopenharmony_ci 5782e1051a39Sopenharmony_ci eor $output_l0, $output_l0, $rk14_l @ AES block 4k+4 - round 14 low 5783e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 5784e1051a39Sopenharmony_ci rev $output_l0, $output_l0 5785e1051a39Sopenharmony_ci#endif 5786e1051a39Sopenharmony_ci 5787e1051a39Sopenharmony_ci eor $output_h0, $output_h0, $rk14_h @ AES block 4k+4 - round 14 high 5788e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 5789e1051a39Sopenharmony_ci rev $output_h0, $output_h0 5790e1051a39Sopenharmony_ci#endif 5791e1051a39Sopenharmony_ci b.gt .L256_dec_blocks_more_than_3 5792e1051a39Sopenharmony_ci 5793e1051a39Sopenharmony_ci sub $rctr32w, $rctr32w, #1 5794e1051a39Sopenharmony_ci mov $ctr3b, $ctr2b 5795e1051a39Sopenharmony_ci movi $acc_m.8b, #0 5796e1051a39Sopenharmony_ci 5797e1051a39Sopenharmony_ci movi $acc_l.8b, #0 5798e1051a39Sopenharmony_ci cmp $main_end_input_ptr, #32 5799e1051a39Sopenharmony_ci 5800e1051a39Sopenharmony_ci movi $acc_h.8b, #0 5801e1051a39Sopenharmony_ci mov $ctr2b, $ctr1b 5802e1051a39Sopenharmony_ci b.gt .L256_dec_blocks_more_than_2 5803e1051a39Sopenharmony_ci 5804e1051a39Sopenharmony_ci sub $rctr32w, $rctr32w, #1 5805e1051a39Sopenharmony_ci 5806e1051a39Sopenharmony_ci mov $ctr3b, $ctr1b 5807e1051a39Sopenharmony_ci cmp $main_end_input_ptr, #16 5808e1051a39Sopenharmony_ci b.gt .L256_dec_blocks_more_than_1 5809e1051a39Sopenharmony_ci 5810e1051a39Sopenharmony_ci sub $rctr32w, $rctr32w, #1 5811e1051a39Sopenharmony_ci b .L256_dec_blocks_less_than_1 5812e1051a39Sopenharmony_ci .L256_dec_blocks_more_than_3: @ blocks left > 3 5813e1051a39Sopenharmony_ci rev64 $res0b, $res1b @ GHASH final-3 block 5814e1051a39Sopenharmony_ci ld1 { $res1b}, [$input_ptr], #16 @ AES final-2 block - load ciphertext 5815e1051a39Sopenharmony_ci 5816e1051a39Sopenharmony_ci stp $output_l0, $output_h0, [$output_ptr], #16 @ AES final-3 block - store result 5817e1051a39Sopenharmony_ci 5818e1051a39Sopenharmony_ci mov $acc_md, $h34k.d[1] @ GHASH final-3 block - mid 5819e1051a39Sopenharmony_ci 5820e1051a39Sopenharmony_ci eor $res0b, $res0b, $t0.16b @ feed in partial tag 5821e1051a39Sopenharmony_ci 5822e1051a39Sopenharmony_ci eor $ctr0b, $res1b, $ctr1b @ AES final-2 block - result 5823e1051a39Sopenharmony_ci 5824e1051a39Sopenharmony_ci mov $rk4d, $res0.d[1] @ GHASH final-3 block - mid 5825e1051a39Sopenharmony_ci 5826e1051a39Sopenharmony_ci mov $output_l0, $ctr0.d[0] @ AES final-2 block - mov low 5827e1051a39Sopenharmony_ci 5828e1051a39Sopenharmony_ci mov $output_h0, $ctr0.d[1] @ AES final-2 block - mov high 5829e1051a39Sopenharmony_ci 5830e1051a39Sopenharmony_ci eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-3 block - mid 5831e1051a39Sopenharmony_ci 5832e1051a39Sopenharmony_ci movi $t0.8b, #0 @ suppress further partial tag feed in 5833e1051a39Sopenharmony_ci 5834e1051a39Sopenharmony_ci pmull2 $acc_h.1q, $res0.2d, $h4.2d @ GHASH final-3 block - high 5835e1051a39Sopenharmony_ci 5836e1051a39Sopenharmony_ci pmull $acc_m.1q, $rk4v.1d, $acc_m.1d @ GHASH final-3 block - mid 5837e1051a39Sopenharmony_ci eor $output_l0, $output_l0, $rk14_l @ AES final-2 block - round 14 low 5838e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 5839e1051a39Sopenharmony_ci rev $output_l0, $output_l0 5840e1051a39Sopenharmony_ci#endif 5841e1051a39Sopenharmony_ci 5842e1051a39Sopenharmony_ci pmull $acc_l.1q, $res0.1d, $h4.1d @ GHASH final-3 block - low 5843e1051a39Sopenharmony_ci eor $output_h0, $output_h0, $rk14_h @ AES final-2 block - round 14 high 5844e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 5845e1051a39Sopenharmony_ci rev $output_h0, $output_h0 5846e1051a39Sopenharmony_ci#endif 5847e1051a39Sopenharmony_ci .L256_dec_blocks_more_than_2: @ blocks left > 2 5848e1051a39Sopenharmony_ci 5849e1051a39Sopenharmony_ci rev64 $res0b, $res1b @ GHASH final-2 block 5850e1051a39Sopenharmony_ci ld1 { $res1b}, [$input_ptr], #16 @ AES final-1 block - load ciphertext 5851e1051a39Sopenharmony_ci 5852e1051a39Sopenharmony_ci eor $res0b, $res0b, $t0.16b @ feed in partial tag 5853e1051a39Sopenharmony_ci stp $output_l0, $output_h0, [$output_ptr], #16 @ AES final-2 block - store result 5854e1051a39Sopenharmony_ci 5855e1051a39Sopenharmony_ci eor $ctr0b, $res1b, $ctr2b @ AES final-1 block - result 5856e1051a39Sopenharmony_ci 5857e1051a39Sopenharmony_ci mov $rk4d, $res0.d[1] @ GHASH final-2 block - mid 5858e1051a39Sopenharmony_ci 5859e1051a39Sopenharmony_ci pmull $rk3q1, $res0.1d, $h3.1d @ GHASH final-2 block - low 5860e1051a39Sopenharmony_ci 5861e1051a39Sopenharmony_ci pmull2 $rk2q1, $res0.2d, $h3.2d @ GHASH final-2 block - high 5862e1051a39Sopenharmony_ci 5863e1051a39Sopenharmony_ci eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-2 block - mid 5864e1051a39Sopenharmony_ci mov $output_l0, $ctr0.d[0] @ AES final-1 block - mov low 5865e1051a39Sopenharmony_ci 5866e1051a39Sopenharmony_ci mov $output_h0, $ctr0.d[1] @ AES final-1 block - mov high 5867e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $rk3 @ GHASH final-2 block - low 5868e1051a39Sopenharmony_ci movi $t0.8b, #0 @ suppress further partial tag feed in 5869e1051a39Sopenharmony_ci 5870e1051a39Sopenharmony_ci pmull $rk4v.1q, $rk4v.1d, $h34k.1d @ GHASH final-2 block - mid 5871e1051a39Sopenharmony_ci 5872e1051a39Sopenharmony_ci eor $acc_hb, $acc_hb, $rk2 @ GHASH final-2 block - high 5873e1051a39Sopenharmony_ci eor $output_l0, $output_l0, $rk14_l @ AES final-1 block - round 14 low 5874e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 5875e1051a39Sopenharmony_ci rev $output_l0, $output_l0 5876e1051a39Sopenharmony_ci#endif 5877e1051a39Sopenharmony_ci 5878e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-2 block - mid 5879e1051a39Sopenharmony_ci eor $output_h0, $output_h0, $rk14_h @ AES final-1 block - round 14 high 5880e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 5881e1051a39Sopenharmony_ci rev $output_h0, $output_h0 5882e1051a39Sopenharmony_ci#endif 5883e1051a39Sopenharmony_ci .L256_dec_blocks_more_than_1: @ blocks left > 1 5884e1051a39Sopenharmony_ci 5885e1051a39Sopenharmony_ci stp $output_l0, $output_h0, [$output_ptr], #16 @ AES final-1 block - store result 5886e1051a39Sopenharmony_ci rev64 $res0b, $res1b @ GHASH final-1 block 5887e1051a39Sopenharmony_ci 5888e1051a39Sopenharmony_ci ld1 { $res1b}, [$input_ptr], #16 @ AES final block - load ciphertext 5889e1051a39Sopenharmony_ci 5890e1051a39Sopenharmony_ci eor $res0b, $res0b, $t0.16b @ feed in partial tag 5891e1051a39Sopenharmony_ci movi $t0.8b, #0 @ suppress further partial tag feed in 5892e1051a39Sopenharmony_ci 5893e1051a39Sopenharmony_ci mov $rk4d, $res0.d[1] @ GHASH final-1 block - mid 5894e1051a39Sopenharmony_ci 5895e1051a39Sopenharmony_ci eor $ctr0b, $res1b, $ctr3b @ AES final block - result 5896e1051a39Sopenharmony_ci 5897e1051a39Sopenharmony_ci pmull2 $rk2q1, $res0.2d, $h2.2d @ GHASH final-1 block - high 5898e1051a39Sopenharmony_ci 5899e1051a39Sopenharmony_ci eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-1 block - mid 5900e1051a39Sopenharmony_ci 5901e1051a39Sopenharmony_ci pmull $rk3q1, $res0.1d, $h2.1d @ GHASH final-1 block - low 5902e1051a39Sopenharmony_ci mov $output_l0, $ctr0.d[0] @ AES final block - mov low 5903e1051a39Sopenharmony_ci 5904e1051a39Sopenharmony_ci ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-1 block - mid 5905e1051a39Sopenharmony_ci 5906e1051a39Sopenharmony_ci mov $output_h0, $ctr0.d[1] @ AES final block - mov high 5907e1051a39Sopenharmony_ci 5908e1051a39Sopenharmony_ci pmull2 $rk4v.1q, $rk4v.2d, $h12k.2d @ GHASH final-1 block - mid 5909e1051a39Sopenharmony_ci eor $output_l0, $output_l0, $rk14_l @ AES final block - round 14 low 5910e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 5911e1051a39Sopenharmony_ci rev $output_l0, $output_l0 5912e1051a39Sopenharmony_ci#endif 5913e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $rk3 @ GHASH final-1 block - low 5914e1051a39Sopenharmony_ci 5915e1051a39Sopenharmony_ci eor $acc_hb, $acc_hb, $rk2 @ GHASH final-1 block - high 5916e1051a39Sopenharmony_ci 5917e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-1 block - mid 5918e1051a39Sopenharmony_ci eor $output_h0, $output_h0, $rk14_h @ AES final block - round 14 high 5919e1051a39Sopenharmony_ci#ifdef __AARCH64EB__ 5920e1051a39Sopenharmony_ci rev $output_h0, $output_h0 5921e1051a39Sopenharmony_ci#endif 5922e1051a39Sopenharmony_ci .L256_dec_blocks_less_than_1: @ blocks left <= 1 5923e1051a39Sopenharmony_ci 5924e1051a39Sopenharmony_ci and $bit_length, $bit_length, #127 @ bit_length %= 128 5925e1051a39Sopenharmony_ci mvn $rk14_h, xzr @ rk14_h = 0xffffffffffffffff 5926e1051a39Sopenharmony_ci 5927e1051a39Sopenharmony_ci sub $bit_length, $bit_length, #128 @ bit_length -= 128 5928e1051a39Sopenharmony_ci mvn $rk14_l, xzr @ rk14_l = 0xffffffffffffffff 5929e1051a39Sopenharmony_ci 5930e1051a39Sopenharmony_ci ldp $end_input_ptr, $main_end_input_ptr, [$output_ptr] @ load existing bytes we need to not overwrite 5931e1051a39Sopenharmony_ci neg $bit_length, $bit_length @ bit_length = 128 - #bits in input (in range [1,128]) 5932e1051a39Sopenharmony_ci 5933e1051a39Sopenharmony_ci and $bit_length, $bit_length, #127 @ bit_length %= 128 5934e1051a39Sopenharmony_ci 5935e1051a39Sopenharmony_ci lsr $rk14_h, $rk14_h, $bit_length @ rk14_h is mask for top 64b of last block 5936e1051a39Sopenharmony_ci cmp $bit_length, #64 5937e1051a39Sopenharmony_ci 5938e1051a39Sopenharmony_ci csel $ctr32x, $rk14_l, $rk14_h, lt 5939e1051a39Sopenharmony_ci csel $ctr96_b64x, $rk14_h, xzr, lt 5940e1051a39Sopenharmony_ci 5941e1051a39Sopenharmony_ci fmov $ctr0d, $ctr32x @ ctr0b is mask for last block 5942e1051a39Sopenharmony_ci and $output_l0, $output_l0, $ctr32x 5943e1051a39Sopenharmony_ci 5944e1051a39Sopenharmony_ci mov $ctr0.d[1], $ctr96_b64x 5945e1051a39Sopenharmony_ci bic $end_input_ptr, $end_input_ptr, $ctr32x @ mask out low existing bytes 5946e1051a39Sopenharmony_ci 5947e1051a39Sopenharmony_ci#ifndef __AARCH64EB__ 5948e1051a39Sopenharmony_ci rev $ctr32w, $rctr32w 5949e1051a39Sopenharmony_ci#else 5950e1051a39Sopenharmony_ci mov $ctr32w, $rctr32w 5951e1051a39Sopenharmony_ci#endif 5952e1051a39Sopenharmony_ci 5953e1051a39Sopenharmony_ci bic $main_end_input_ptr, $main_end_input_ptr, $ctr96_b64x @ mask out high existing bytes 5954e1051a39Sopenharmony_ci 5955e1051a39Sopenharmony_ci orr $output_l0, $output_l0, $end_input_ptr 5956e1051a39Sopenharmony_ci 5957e1051a39Sopenharmony_ci and $output_h0, $output_h0, $ctr96_b64x 5958e1051a39Sopenharmony_ci 5959e1051a39Sopenharmony_ci orr $output_h0, $output_h0, $main_end_input_ptr 5960e1051a39Sopenharmony_ci 5961e1051a39Sopenharmony_ci and $res1b, $res1b, $ctr0b @ possibly partial last block has zeroes in highest bits 5962e1051a39Sopenharmony_ci 5963e1051a39Sopenharmony_ci rev64 $res0b, $res1b @ GHASH final block 5964e1051a39Sopenharmony_ci 5965e1051a39Sopenharmony_ci eor $res0b, $res0b, $t0.16b @ feed in partial tag 5966e1051a39Sopenharmony_ci 5967e1051a39Sopenharmony_ci pmull $rk3q1, $res0.1d, $h1.1d @ GHASH final block - low 5968e1051a39Sopenharmony_ci 5969e1051a39Sopenharmony_ci mov $t0d, $res0.d[1] @ GHASH final block - mid 5970e1051a39Sopenharmony_ci 5971e1051a39Sopenharmony_ci eor $t0.8b, $t0.8b, $res0.8b @ GHASH final block - mid 5972e1051a39Sopenharmony_ci 5973e1051a39Sopenharmony_ci pmull2 $rk2q1, $res0.2d, $h1.2d @ GHASH final block - high 5974e1051a39Sopenharmony_ci 5975e1051a39Sopenharmony_ci pmull $t0.1q, $t0.1d, $h12k.1d @ GHASH final block - mid 5976e1051a39Sopenharmony_ci 5977e1051a39Sopenharmony_ci eor $acc_hb, $acc_hb, $rk2 @ GHASH final block - high 5978e1051a39Sopenharmony_ci 5979e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $rk3 @ GHASH final block - low 5980e1051a39Sopenharmony_ci 5981e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t0.16b @ GHASH final block - mid 5982e1051a39Sopenharmony_ci movi $mod_constant.8b, #0xc2 5983e1051a39Sopenharmony_ci 5984e1051a39Sopenharmony_ci eor $t9.16b, $acc_lb, $acc_hb @ MODULO - karatsuba tidy up 5985e1051a39Sopenharmony_ci 5986e1051a39Sopenharmony_ci shl $mod_constantd, $mod_constantd, #56 @ mod_constant 5987e1051a39Sopenharmony_ci 5988e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $t9.16b @ MODULO - karatsuba tidy up 5989e1051a39Sopenharmony_ci 5990e1051a39Sopenharmony_ci pmull $mod_t.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid 5991e1051a39Sopenharmony_ci 5992e1051a39Sopenharmony_ci ext $acc_hb, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment 5993e1051a39Sopenharmony_ci 5994e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $mod_t.16b @ MODULO - fold into mid 5995e1051a39Sopenharmony_ci 5996e1051a39Sopenharmony_ci eor $acc_mb, $acc_mb, $acc_hb @ MODULO - fold into mid 5997e1051a39Sopenharmony_ci 5998e1051a39Sopenharmony_ci pmull $mod_constant.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low 5999e1051a39Sopenharmony_ci 6000e1051a39Sopenharmony_ci ext $acc_mb, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment 6001e1051a39Sopenharmony_ci 6002e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $mod_constant.16b @ MODULO - fold into low 6003e1051a39Sopenharmony_ci 6004e1051a39Sopenharmony_ci stp $output_l0, $output_h0, [$output_ptr] 6005e1051a39Sopenharmony_ci 6006e1051a39Sopenharmony_ci str $ctr32w, [$counter, #12] @ store the updated counter 6007e1051a39Sopenharmony_ci 6008e1051a39Sopenharmony_ci eor $acc_lb, $acc_lb, $acc_mb @ MODULO - fold into low 6009e1051a39Sopenharmony_ci ext $acc_lb, $acc_lb, $acc_lb, #8 6010e1051a39Sopenharmony_ci rev64 $acc_lb, $acc_lb 6011e1051a39Sopenharmony_ci mov x0, $len 6012e1051a39Sopenharmony_ci st1 { $acc_l.16b }, [$current_tag] 6013e1051a39Sopenharmony_ci 6014e1051a39Sopenharmony_ci ldp x21, x22, [sp, #16] 6015e1051a39Sopenharmony_ci ldp x23, x24, [sp, #32] 6016e1051a39Sopenharmony_ci ldp d8, d9, [sp, #48] 6017e1051a39Sopenharmony_ci ldp d10, d11, [sp, #64] 6018e1051a39Sopenharmony_ci ldp d12, d13, [sp, #80] 6019e1051a39Sopenharmony_ci ldp d14, d15, [sp, #96] 6020e1051a39Sopenharmony_ci ldp x19, x20, [sp], #112 6021e1051a39Sopenharmony_ci ret 6022e1051a39Sopenharmony_ci 6023e1051a39Sopenharmony_ci.L256_dec_ret: 6024e1051a39Sopenharmony_ci mov w0, #0x0 6025e1051a39Sopenharmony_ci ret 6026e1051a39Sopenharmony_ci.size aes_gcm_dec_256_kernel,.-aes_gcm_dec_256_kernel 6027e1051a39Sopenharmony_ci___ 6028e1051a39Sopenharmony_ci} 6029e1051a39Sopenharmony_ci} 6030e1051a39Sopenharmony_ci 6031e1051a39Sopenharmony_ci$code.=<<___; 6032e1051a39Sopenharmony_ci.rodata 6033e1051a39Sopenharmony_ci.asciz "GHASH for ARMv8, CRYPTOGAMS by <appro\@openssl.org>" 6034e1051a39Sopenharmony_ci.align 2 6035e1051a39Sopenharmony_ci#endif 6036e1051a39Sopenharmony_ci___ 6037e1051a39Sopenharmony_ci 6038e1051a39Sopenharmony_ciif ($flavour =~ /64/) { ######## 64-bit code 6039e1051a39Sopenharmony_ci sub unvmov { 6040e1051a39Sopenharmony_ci my $arg=shift; 6041e1051a39Sopenharmony_ci 6042e1051a39Sopenharmony_ci $arg =~ m/q([0-9]+)#(lo|hi),\s*q([0-9]+)#(lo|hi)/o && 6043e1051a39Sopenharmony_ci sprintf "ins v%d.d[%d],v%d.d[%d]",$1<8?$1:$1+8,($2 eq "lo")?0:1, 6044e1051a39Sopenharmony_ci $3<8?$3:$3+8,($4 eq "lo")?0:1; 6045e1051a39Sopenharmony_ci } 6046e1051a39Sopenharmony_ci foreach(split("\n",$code)) { 6047e1051a39Sopenharmony_ci s/@\s/\/\//o; # old->new style commentary 6048e1051a39Sopenharmony_ci print $_,"\n"; 6049e1051a39Sopenharmony_ci } 6050e1051a39Sopenharmony_ci} else { ######## 32-bit code 6051e1051a39Sopenharmony_ci sub unvdup32 { 6052e1051a39Sopenharmony_ci my $arg=shift; 6053e1051a39Sopenharmony_ci 6054e1051a39Sopenharmony_ci $arg =~ m/q([0-9]+),\s*q([0-9]+)\[([0-3])\]/o && 6055e1051a39Sopenharmony_ci sprintf "vdup.32 q%d,d%d[%d]",$1,2*$2+($3>>1),$3&1; 6056e1051a39Sopenharmony_ci } 6057e1051a39Sopenharmony_ci sub unvpmullp64 { 6058e1051a39Sopenharmony_ci my ($mnemonic,$arg)=@_; 6059e1051a39Sopenharmony_ci 6060e1051a39Sopenharmony_ci if ($arg =~ m/q([0-9]+),\s*q([0-9]+),\s*q([0-9]+)/o) { 6061e1051a39Sopenharmony_ci my $word = 0xf2a00e00|(($1&7)<<13)|(($1&8)<<19) 6062e1051a39Sopenharmony_ci |(($2&7)<<17)|(($2&8)<<4) 6063e1051a39Sopenharmony_ci |(($3&7)<<1) |(($3&8)<<2); 6064e1051a39Sopenharmony_ci $word |= 0x00010001 if ($mnemonic =~ "2"); 6065e1051a39Sopenharmony_ci # since ARMv7 instructions are always encoded little-endian. 6066e1051a39Sopenharmony_ci # correct solution is to use .inst directive, but older%%%% 6067e1051a39Sopenharmony_ci # assemblers don't implement it:-( 6068e1051a39Sopenharmony_ci sprintf "INST(0x%02x,0x%02x,0x%02x,0x%02x)\t@ %s %s", 6069e1051a39Sopenharmony_ci $word&0xff,($word>>8)&0xff, 6070e1051a39Sopenharmony_ci ($word>>16)&0xff,($word>>24)&0xff, 6071e1051a39Sopenharmony_ci $mnemonic,$arg; 6072e1051a39Sopenharmony_ci } 6073e1051a39Sopenharmony_ci } 6074e1051a39Sopenharmony_ci 6075e1051a39Sopenharmony_ci foreach(split("\n",$code)) { 6076e1051a39Sopenharmony_ci s/\b[wx]([0-9]+)\b/r$1/go; # new->old registers 6077e1051a39Sopenharmony_ci s/\bv([0-9])\.[12468]+[bsd]\b/q$1/go; # new->old registers 6078e1051a39Sopenharmony_ci s/\/\/\s?/@ /o; # new->old style commentary 6079e1051a39Sopenharmony_ci 6080e1051a39Sopenharmony_ci # fix up remaining new-style suffixes 6081e1051a39Sopenharmony_ci s/\],#[0-9]+/]!/o; 6082e1051a39Sopenharmony_ci 6083e1051a39Sopenharmony_ci s/cclr\s+([^,]+),\s*([a-z]+)/mov.$2 $1,#0/o or 6084e1051a39Sopenharmony_ci s/vdup\.32\s+(.*)/unvdup32($1)/geo or 6085e1051a39Sopenharmony_ci s/v?(pmull2?)\.p64\s+(.*)/unvpmullp64($1,$2)/geo or 6086e1051a39Sopenharmony_ci s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo or 6087e1051a39Sopenharmony_ci s/^(\s+)b\./$1b/o or 6088e1051a39Sopenharmony_ci s/^(\s+)ret/$1bx\tlr/o; 6089e1051a39Sopenharmony_ci 6090e1051a39Sopenharmony_ci if (s/^(\s+)mov\.([a-z]+)/$1mov$2/) { 6091e1051a39Sopenharmony_ci print " it $2\n"; 6092e1051a39Sopenharmony_ci } 6093e1051a39Sopenharmony_ci s/__AARCH64E([BL])__/__ARME$1__/go; 6094e1051a39Sopenharmony_ci print $_,"\n"; 6095e1051a39Sopenharmony_ci } 6096e1051a39Sopenharmony_ci} 6097e1051a39Sopenharmony_ci 6098e1051a39Sopenharmony_ciclose STDOUT or die "error closing STDOUT: $!"; # enforce flush 6099