1e1051a39Sopenharmony_ci#! /usr/bin/env perl
2e1051a39Sopenharmony_ci# Copyright 2019-2023 The OpenSSL Project Authors. All Rights Reserved.
3e1051a39Sopenharmony_ci#
4e1051a39Sopenharmony_ci# Licensed under the Apache License 2.0 (the "License").  You may not use
5e1051a39Sopenharmony_ci# this file except in compliance with the License.  You can obtain a copy
6e1051a39Sopenharmony_ci# in the file LICENSE in the source distribution or at
7e1051a39Sopenharmony_ci# https://www.openssl.org/source/license.html
8e1051a39Sopenharmony_ci
9e1051a39Sopenharmony_ci#
10e1051a39Sopenharmony_ci#========================================================================
11e1051a39Sopenharmony_ci# Written by Fangming Fang <fangming.fang@arm.com> for the OpenSSL project,
12e1051a39Sopenharmony_ci# derived from https://github.com/ARM-software/AArch64cryptolib, original
13e1051a39Sopenharmony_ci# author Samuel Lee <Samuel.Lee@arm.com>. The module is, however, dual
14e1051a39Sopenharmony_ci# licensed under OpenSSL and CRYPTOGAMS licenses depending on where you
15e1051a39Sopenharmony_ci# obtain it. For further details see http://www.openssl.org/~appro/cryptogams/.
16e1051a39Sopenharmony_ci#========================================================================
17e1051a39Sopenharmony_ci#
18e1051a39Sopenharmony_ci# Approach - assume we don't want to reload constants, so reserve ~half of vector register file for constants
19e1051a39Sopenharmony_ci#
20e1051a39Sopenharmony_ci# main loop to act on 4 16B blocks per iteration, and then do modulo of the accumulated intermediate hashes from the 4 blocks
21e1051a39Sopenharmony_ci#
22e1051a39Sopenharmony_ci#  ____________________________________________________
23e1051a39Sopenharmony_ci# |                                                    |
24e1051a39Sopenharmony_ci# | PRE                                                |
25e1051a39Sopenharmony_ci# |____________________________________________________|
26e1051a39Sopenharmony_ci# |                |                |                  |
27e1051a39Sopenharmony_ci# | CTR block 4k+8 | AES block 4k+4 | GHASH block 4k+0 |
28e1051a39Sopenharmony_ci# |________________|________________|__________________|
29e1051a39Sopenharmony_ci# |                |                |                  |
30e1051a39Sopenharmony_ci# | CTR block 4k+9 | AES block 4k+5 | GHASH block 4k+1 |
31e1051a39Sopenharmony_ci# |________________|________________|__________________|
32e1051a39Sopenharmony_ci# |                |                |                  |
33e1051a39Sopenharmony_ci# | CTR block 4k+10| AES block 4k+6 | GHASH block 4k+2 |
34e1051a39Sopenharmony_ci# |________________|________________|__________________|
35e1051a39Sopenharmony_ci# |                |                |                  |
36e1051a39Sopenharmony_ci# | CTR block 4k+11| AES block 4k+7 | GHASH block 4k+3 |
37e1051a39Sopenharmony_ci# |________________|____(mostly)____|__________________|
38e1051a39Sopenharmony_ci# |                                                    |
39e1051a39Sopenharmony_ci# | MODULO                                             |
40e1051a39Sopenharmony_ci# |____________________________________________________|
41e1051a39Sopenharmony_ci#
42e1051a39Sopenharmony_ci# PRE:
43e1051a39Sopenharmony_ci#     Ensure previous generated intermediate hash is aligned and merged with result for GHASH 4k+0
44e1051a39Sopenharmony_ci# EXT low_acc, low_acc, low_acc, #8
45e1051a39Sopenharmony_ci# EOR res_curr (4k+0), res_curr (4k+0), low_acc
46e1051a39Sopenharmony_ci#
47e1051a39Sopenharmony_ci# CTR block:
48e1051a39Sopenharmony_ci#     Increment and byte reverse counter in scalar registers and transfer to SIMD registers
49e1051a39Sopenharmony_ci# REV     ctr32, rev_ctr32
50e1051a39Sopenharmony_ci# ORR     ctr64, constctr96_top32, ctr32, LSL #32
51e1051a39Sopenharmony_ci# INS     ctr_next.d[0], constctr96_bottom64      // Keeping this in scalar registers to free up space in SIMD RF
52e1051a39Sopenharmony_ci# INS     ctr_next.d[1], ctr64X
53e1051a39Sopenharmony_ci# ADD     rev_ctr32, #1
54e1051a39Sopenharmony_ci#
55e1051a39Sopenharmony_ci# AES block:
56e1051a39Sopenharmony_ci#     Do AES encryption/decryption on CTR block X and EOR it with input block X. Take 256 bytes key below for example.
57e1051a39Sopenharmony_ci#     Doing small trick here of loading input in scalar registers, EORing with last key and then transferring
58e1051a39Sopenharmony_ci#     Given we are very constrained in our ASIMD registers this is quite important
59e1051a39Sopenharmony_ci#
60e1051a39Sopenharmony_ci#     Encrypt:
61e1051a39Sopenharmony_ci# LDR     input_low, [ input_ptr  ], #8
62e1051a39Sopenharmony_ci# LDR     input_high, [ input_ptr  ], #8
63e1051a39Sopenharmony_ci# EOR     input_low, k14_low
64e1051a39Sopenharmony_ci# EOR     input_high, k14_high
65e1051a39Sopenharmony_ci# INS     res_curr.d[0], input_low
66e1051a39Sopenharmony_ci# INS     res_curr.d[1], input_high
67e1051a39Sopenharmony_ci# AESE    ctr_curr, k0; AESMC ctr_curr, ctr_curr
68e1051a39Sopenharmony_ci# AESE    ctr_curr, k1; AESMC ctr_curr, ctr_curr
69e1051a39Sopenharmony_ci# AESE    ctr_curr, k2; AESMC ctr_curr, ctr_curr
70e1051a39Sopenharmony_ci# AESE    ctr_curr, k3; AESMC ctr_curr, ctr_curr
71e1051a39Sopenharmony_ci# AESE    ctr_curr, k4; AESMC ctr_curr, ctr_curr
72e1051a39Sopenharmony_ci# AESE    ctr_curr, k5; AESMC ctr_curr, ctr_curr
73e1051a39Sopenharmony_ci# AESE    ctr_curr, k6; AESMC ctr_curr, ctr_curr
74e1051a39Sopenharmony_ci# AESE    ctr_curr, k7; AESMC ctr_curr, ctr_curr
75e1051a39Sopenharmony_ci# AESE    ctr_curr, k8; AESMC ctr_curr, ctr_curr
76e1051a39Sopenharmony_ci# AESE    ctr_curr, k9; AESMC ctr_curr, ctr_curr
77e1051a39Sopenharmony_ci# AESE    ctr_curr, k10; AESMC ctr_curr, ctr_curr
78e1051a39Sopenharmony_ci# AESE    ctr_curr, k11; AESMC ctr_curr, ctr_curr
79e1051a39Sopenharmony_ci# AESE    ctr_curr, k12; AESMC ctr_curr, ctr_curr
80e1051a39Sopenharmony_ci# AESE    ctr_curr, k13
81e1051a39Sopenharmony_ci# EOR     res_curr, res_curr, ctr_curr
82e1051a39Sopenharmony_ci# ST1     { res_curr.16b  }, [ output_ptr  ], #16
83e1051a39Sopenharmony_ci#
84e1051a39Sopenharmony_ci#     Decrypt:
85e1051a39Sopenharmony_ci# AESE    ctr_curr, k0; AESMC ctr_curr, ctr_curr
86e1051a39Sopenharmony_ci# AESE    ctr_curr, k1; AESMC ctr_curr, ctr_curr
87e1051a39Sopenharmony_ci# AESE    ctr_curr, k2; AESMC ctr_curr, ctr_curr
88e1051a39Sopenharmony_ci# AESE    ctr_curr, k3; AESMC ctr_curr, ctr_curr
89e1051a39Sopenharmony_ci# AESE    ctr_curr, k4; AESMC ctr_curr, ctr_curr
90e1051a39Sopenharmony_ci# AESE    ctr_curr, k5; AESMC ctr_curr, ctr_curr
91e1051a39Sopenharmony_ci# AESE    ctr_curr, k6; AESMC ctr_curr, ctr_curr
92e1051a39Sopenharmony_ci# AESE    ctr_curr, k7; AESMC ctr_curr, ctr_curr
93e1051a39Sopenharmony_ci# AESE    ctr_curr, k8; AESMC ctr_curr, ctr_curr
94e1051a39Sopenharmony_ci# AESE    ctr_curr, k9; AESMC ctr_curr, ctr_curr
95e1051a39Sopenharmony_ci# AESE    ctr_curr, k10; AESMC ctr_curr, ctr_curr
96e1051a39Sopenharmony_ci# AESE    ctr_curr, k11; AESMC ctr_curr, ctr_curr
97e1051a39Sopenharmony_ci# AESE    ctr_curr, k12; AESMC ctr_curr, ctr_curr
98e1051a39Sopenharmony_ci# AESE    ctr_curr, k13
99e1051a39Sopenharmony_ci# LDR     res_curr, [ input_ptr  ], #16
100e1051a39Sopenharmony_ci# EOR     res_curr, res_curr, ctr_curr
101e1051a39Sopenharmony_ci# MOV     output_low, res_curr.d[0]
102e1051a39Sopenharmony_ci# MOV     output_high, res_curr.d[1]
103e1051a39Sopenharmony_ci# EOR     output_low, k14_low
104e1051a39Sopenharmony_ci# EOR     output_high, k14_high
105e1051a39Sopenharmony_ci# STP     output_low, output_high, [ output_ptr  ], #16
106e1051a39Sopenharmony_ci#
107e1051a39Sopenharmony_ci# GHASH block X:
108e1051a39Sopenharmony_ci#     do 128b karatsuba polynomial multiplication on block
109e1051a39Sopenharmony_ci#     We only have 64b->128b polynomial multipliers, naively that means we need to do 4 64b multiplies to generate a 128b
110e1051a39Sopenharmony_ci#
111e1051a39Sopenharmony_ci# multiplication:
112e1051a39Sopenharmony_ci#     Pmull(A,B) == (Pmull(Ah,Bh)<<128 | Pmull(Al,Bl)) ^ (Pmull(Ah,Bl) ^ Pmull(Al,Bh))<<64
113e1051a39Sopenharmony_ci#
114e1051a39Sopenharmony_ci#     The idea behind Karatsuba multiplication is that we can do just 3 64b multiplies:
115e1051a39Sopenharmony_ci#     Pmull(A,B) == (Pmull(Ah,Bh)<<128 | Pmull(Al,Bl)) ^ (Pmull(Ah^Al,Bh^Bl) ^ Pmull(Ah,Bh) ^ Pmull(Al,Bl))<<64
116e1051a39Sopenharmony_ci#
117e1051a39Sopenharmony_ci#     There is some complication here because the bit order of GHASH's PMULL is reversed compared to elsewhere, so we are
118e1051a39Sopenharmony_ci#     multiplying with "twisted" powers of H
119e1051a39Sopenharmony_ci#
120e1051a39Sopenharmony_ci# Note: We can PMULL directly into the acc_x in first GHASH of the loop
121e1051a39Sopenharmony_ci# Note: For scheduling big cores we want to split the processing to happen over two loop iterations - otherwise the critical
122e1051a39Sopenharmony_ci#       path latency dominates the performance
123e1051a39Sopenharmony_ci#
124e1051a39Sopenharmony_ci#       This has a knock on effect on register pressure, so we have to be a bit more clever with our temporary registers
125e1051a39Sopenharmony_ci#       than indicated here
126e1051a39Sopenharmony_ci# REV64   res_curr, res_curr
127e1051a39Sopenharmony_ci# INS     t_m.d[0], res_curr.d[1]
128e1051a39Sopenharmony_ci# EOR     t_m.8B, t_m.8B, res_curr.8B
129e1051a39Sopenharmony_ci# PMULL2  t_h, res_curr, HX
130e1051a39Sopenharmony_ci# PMULL   t_l, res_curr, HX
131e1051a39Sopenharmony_ci# PMULL   t_m, t_m, HX_k
132e1051a39Sopenharmony_ci# EOR     acc_h, acc_h, t_h
133e1051a39Sopenharmony_ci# EOR     acc_l, acc_l, t_l
134e1051a39Sopenharmony_ci# EOR     acc_m, acc_m, t_m
135e1051a39Sopenharmony_ci#
136e1051a39Sopenharmony_ci# MODULO: take the partial accumulators (~representing sum of 256b multiplication results), from GHASH and do modulo reduction on them
137e1051a39Sopenharmony_ci#         There is some complication here because the bit order of GHASH's PMULL is reversed compared to elsewhere, so we are doing modulo
138e1051a39Sopenharmony_ci#         with a reversed constant
139e1051a39Sopenharmony_ci# EOR     acc_m, acc_m, acc_h
140e1051a39Sopenharmony_ci# EOR     acc_m, acc_m, acc_l                     // Finish off karatsuba processing
141e1051a39Sopenharmony_ci# PMULL   t_mod, acc_h, mod_constant
142e1051a39Sopenharmony_ci# EXT     acc_h, acc_h, acc_h, #8
143e1051a39Sopenharmony_ci# EOR     acc_m, acc_m, acc_h
144e1051a39Sopenharmony_ci# EOR     acc_m, acc_m, t_mod
145e1051a39Sopenharmony_ci# PMULL   acc_h, acc_m, mod_constant
146e1051a39Sopenharmony_ci# EXT     acc_m, acc_m, acc_m, #8
147e1051a39Sopenharmony_ci# EOR     acc_l, acc_l, acc_h
148e1051a39Sopenharmony_ci# EOR     acc_l, acc_l, acc_m
149e1051a39Sopenharmony_ci
150e1051a39Sopenharmony_ci$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
151e1051a39Sopenharmony_ci$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
152e1051a39Sopenharmony_ci
153e1051a39Sopenharmony_ci$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
154e1051a39Sopenharmony_ci( $xlate="${dir}arm-xlate.pl" and -f $xlate  ) or
155e1051a39Sopenharmony_ci( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate ) or
156e1051a39Sopenharmony_cidie "can't locate arm-xlate.pl";
157e1051a39Sopenharmony_ci
158e1051a39Sopenharmony_ciopen OUT,"| \"$^X\" $xlate $flavour $output";
159e1051a39Sopenharmony_ci*STDOUT=*OUT;
160e1051a39Sopenharmony_ci
161e1051a39Sopenharmony_ci$input_ptr="x0";  #argument block
162e1051a39Sopenharmony_ci$bit_length="x1";
163e1051a39Sopenharmony_ci$output_ptr="x2";
164e1051a39Sopenharmony_ci$current_tag="x3";
165e1051a39Sopenharmony_ci$counter="x16";
166e1051a39Sopenharmony_ci$cc="x8";
167e1051a39Sopenharmony_ci
168e1051a39Sopenharmony_ci{
169e1051a39Sopenharmony_cimy ($end_input_ptr,$main_end_input_ptr,$input_l0,$input_h0)=map("x$_",(4..7));
170e1051a39Sopenharmony_cimy ($input_l1,$input_h1,$input_l2,$input_h2,$input_l3,$input_h3)=map("x$_",(19..24));
171e1051a39Sopenharmony_cimy ($output_l1,$output_h1,$output_l2,$output_h2,$output_l3,$output_h3)=map("x$_",(19..24));
172e1051a39Sopenharmony_cimy ($output_l0,$output_h0)=map("x$_",(6..7));
173e1051a39Sopenharmony_ci
174e1051a39Sopenharmony_cimy $ctr32w="w9";
175e1051a39Sopenharmony_cimy ($ctr32x,$ctr96_b64x,$ctr96_t32x,$rctr32x,$rk10_l,$rk10_h,$len)=map("x$_",(9..15));
176e1051a39Sopenharmony_cimy ($ctr96_t32w,$rctr32w)=map("w$_",(11..12));
177e1051a39Sopenharmony_ci
178e1051a39Sopenharmony_cimy ($ctr0b,$ctr1b,$ctr2b,$ctr3b,$res0b,$res1b,$res2b,$res3b)=map("v$_.16b",(0..7));
179e1051a39Sopenharmony_cimy ($ctr0,$ctr1,$ctr2,$ctr3,$res0,$res1,$res2,$res3)=map("v$_",(0..7));
180e1051a39Sopenharmony_cimy ($ctr0d,$ctr1d,$ctr2d,$ctr3d,$res0d,$res1d,$res2d,$res3d)=map("d$_",(0..7));
181e1051a39Sopenharmony_cimy ($res0q,$res1q,$res2q,$res3q)=map("q$_",(4..7));
182e1051a39Sopenharmony_ci
183e1051a39Sopenharmony_cimy ($acc_hb,$acc_mb,$acc_lb)=map("v$_.16b",(9..11));
184e1051a39Sopenharmony_cimy ($acc_h,$acc_m,$acc_l)=map("v$_",(9..11));
185e1051a39Sopenharmony_cimy ($acc_hd,$acc_md,$acc_ld)=map("d$_",(9..11));
186e1051a39Sopenharmony_ci
187e1051a39Sopenharmony_cimy ($h1,$h2,$h3,$h4,$h12k,$h34k)=map("v$_",(12..17));
188e1051a39Sopenharmony_cimy ($h1q,$h2q,$h3q,$h4q)=map("q$_",(12..15));
189e1051a39Sopenharmony_cimy ($h1b,$h2b,$h3b,$h4b)=map("v$_.16b",(12..15));
190e1051a39Sopenharmony_ci
191e1051a39Sopenharmony_cimy $t0="v8";
192e1051a39Sopenharmony_cimy $t0d="d8";
193e1051a39Sopenharmony_ci
194e1051a39Sopenharmony_cimy ($t1,$t2,$t3)=map("v$_",(28..30));
195e1051a39Sopenharmony_cimy ($t1d,$t2d,$t3d)=map("d$_",(28..30));
196e1051a39Sopenharmony_ci
197e1051a39Sopenharmony_cimy $t4="v8";
198e1051a39Sopenharmony_cimy $t4d="d8";
199e1051a39Sopenharmony_cimy $t5="v28";
200e1051a39Sopenharmony_cimy $t5d="d28";
201e1051a39Sopenharmony_cimy $t6="v31";
202e1051a39Sopenharmony_cimy $t6d="d31";
203e1051a39Sopenharmony_ci
204e1051a39Sopenharmony_cimy $t7="v4";
205e1051a39Sopenharmony_cimy $t7d="d4";
206e1051a39Sopenharmony_cimy $t8="v29";
207e1051a39Sopenharmony_cimy $t8d="d29";
208e1051a39Sopenharmony_cimy $t9="v30";
209e1051a39Sopenharmony_cimy $t9d="d30";
210e1051a39Sopenharmony_ci
211e1051a39Sopenharmony_cimy ($ctr_t0,$ctr_t1,$ctr_t2,$ctr_t3)=map("v$_",(4..7));
212e1051a39Sopenharmony_cimy ($ctr_t0d,$ctr_t1d,$ctr_t2d,$ctr_t3d)=map("d$_",(4..7));
213e1051a39Sopenharmony_cimy ($ctr_t0b,$ctr_t1b,$ctr_t2b,$ctr_t3b)=map("v$_.16b",(4..7));
214e1051a39Sopenharmony_ci
215e1051a39Sopenharmony_cimy $mod_constantd="d8";
216e1051a39Sopenharmony_cimy $mod_constant="v8";
217e1051a39Sopenharmony_cimy $mod_t="v31";
218e1051a39Sopenharmony_ci
219e1051a39Sopenharmony_cimy ($rk0,$rk1,$rk2,$rk3,$rk4,$rk5,$rk6,$rk7,$rk8,$rk9)=map("v$_.16b",(18..27));
220e1051a39Sopenharmony_cimy ($rk0s,$rk1s,$rk2s,$rk3s,$rk4s,$rk5s,$rk6s,$rk7s,$rk8s,$rk9s)=map("v$_.4s",(18..27));
221e1051a39Sopenharmony_cimy ($rk0q,$rk1q,$rk2q,$rk3q,$rk4q,$rk5q,$rk6q,$rk7q,$rk8q,$rk9q)=map("q$_",(18..27));
222e1051a39Sopenharmony_cimy $rk2q1="v20.1q";
223e1051a39Sopenharmony_cimy $rk3q1="v21.1q";
224e1051a39Sopenharmony_cimy $rk4v="v22";
225e1051a39Sopenharmony_cimy $rk4d="d22";
226e1051a39Sopenharmony_ci
227e1051a39Sopenharmony_ci$code=<<___;
228e1051a39Sopenharmony_ci#include "arm_arch.h"
229e1051a39Sopenharmony_ci
230e1051a39Sopenharmony_ci#if __ARM_MAX_ARCH__>=8
231e1051a39Sopenharmony_ci___
232e1051a39Sopenharmony_ci$code.=".arch   armv8-a+crypto\n.text\n"    if ($flavour =~ /64/);
233e1051a39Sopenharmony_ci$code.=<<___                    if ($flavour !~ /64/);
234e1051a39Sopenharmony_ci.fpu    neon
235e1051a39Sopenharmony_ci#ifdef __thumb2__
236e1051a39Sopenharmony_ci.syntax        unified
237e1051a39Sopenharmony_ci.thumb
238e1051a39Sopenharmony_ci# define INST(a,b,c,d) $_byte  c,0xef,a,b
239e1051a39Sopenharmony_ci#else
240e1051a39Sopenharmony_ci.code  32
241e1051a39Sopenharmony_ci# define INST(a,b,c,d) $_byte  a,b,c,0xf2
242e1051a39Sopenharmony_ci#endif
243e1051a39Sopenharmony_ci
244e1051a39Sopenharmony_ci.text
245e1051a39Sopenharmony_ci___
246e1051a39Sopenharmony_ci
247e1051a39Sopenharmony_ci#########################################################################################
248e1051a39Sopenharmony_ci# size_t aes_gcm_enc_128_kernel(const unsigned char *in,
249e1051a39Sopenharmony_ci#                               size_t len,
250e1051a39Sopenharmony_ci#                               unsigned char *out,
251e1051a39Sopenharmony_ci#                               const void *key,
252e1051a39Sopenharmony_ci#                               unsigned char ivec[16],
253e1051a39Sopenharmony_ci#                               u64 *Xi);
254e1051a39Sopenharmony_ci#
255e1051a39Sopenharmony_ci$code.=<<___;
256e1051a39Sopenharmony_ci.global aes_gcm_enc_128_kernel
257e1051a39Sopenharmony_ci.type   aes_gcm_enc_128_kernel,%function
258e1051a39Sopenharmony_ci.align  4
259e1051a39Sopenharmony_ciaes_gcm_enc_128_kernel:
260e1051a39Sopenharmony_ci	cbz     x1, .L128_enc_ret
261e1051a39Sopenharmony_ci	stp     x19, x20, [sp, #-112]!
262e1051a39Sopenharmony_ci	mov     x16, x4
263e1051a39Sopenharmony_ci	mov     x8, x5
264e1051a39Sopenharmony_ci	stp     x21, x22, [sp, #16]
265e1051a39Sopenharmony_ci	stp     x23, x24, [sp, #32]
266e1051a39Sopenharmony_ci	stp     d8, d9, [sp, #48]
267e1051a39Sopenharmony_ci	stp     d10, d11, [sp, #64]
268e1051a39Sopenharmony_ci	stp     d12, d13, [sp, #80]
269e1051a39Sopenharmony_ci	stp     d14, d15, [sp, #96]
270e1051a39Sopenharmony_ci
271e1051a39Sopenharmony_ci	ldp     $ctr96_b64x, $ctr96_t32x, [$counter]              @ ctr96_b64, ctr96_t32
272e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
273e1051a39Sopenharmony_ci	rev     $ctr96_b64x, $ctr96_b64x
274e1051a39Sopenharmony_ci	rev     $ctr96_t32x, $ctr96_t32x
275e1051a39Sopenharmony_ci#endif
276e1051a39Sopenharmony_ci	ldp     $rk10_l, $rk10_h, [$cc, #160]                     @ load rk10
277e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
278e1051a39Sopenharmony_ci	ror     $rk10_l, $rk10_l, #32
279e1051a39Sopenharmony_ci	ror     $rk10_h, $rk10_h, #32
280e1051a39Sopenharmony_ci#endif
281e1051a39Sopenharmony_ci	ld1     {$acc_lb}, [$current_tag]
282e1051a39Sopenharmony_ci	ext     $acc_lb, $acc_lb, $acc_lb, #8
283e1051a39Sopenharmony_ci	rev64   $acc_lb, $acc_lb
284e1051a39Sopenharmony_ci	lsr     $main_end_input_ptr, $bit_length, #3              @ byte_len
285e1051a39Sopenharmony_ci	mov     $len, $main_end_input_ptr
286e1051a39Sopenharmony_ci
287e1051a39Sopenharmony_ci	ld1     {$rk0s}, [$cc], #16								  @ load rk0
288e1051a39Sopenharmony_ci	add     $end_input_ptr, $input_ptr, $bit_length, lsr #3   @ end_input_ptr
289e1051a39Sopenharmony_ci	sub     $main_end_input_ptr, $main_end_input_ptr, #1      @ byte_len - 1
290e1051a39Sopenharmony_ci
291e1051a39Sopenharmony_ci	lsr     $rctr32x, $ctr96_t32x, #32
292e1051a39Sopenharmony_ci	ldr     $h4q, [$current_tag, #112]                        @ load h4l | h4h
293e1051a39Sopenharmony_ci#ifndef __AARCH64EB__
294e1051a39Sopenharmony_ci	ext     $h4b, $h4b, $h4b, #8
295e1051a39Sopenharmony_ci#endif
296e1051a39Sopenharmony_ci	fmov    $ctr1d, $ctr96_b64x                               @ CTR block 1
297e1051a39Sopenharmony_ci	rev     $rctr32w, $rctr32w                                @ rev_ctr32
298e1051a39Sopenharmony_ci
299e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ increment rev_ctr32
300e1051a39Sopenharmony_ci	orr     $ctr96_t32w, $ctr96_t32w, $ctr96_t32w
301e1051a39Sopenharmony_ci	ld1     {$rk1s}, [$cc], #16								  @ load rk1
302e1051a39Sopenharmony_ci
303e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                 @ CTR block 1
304e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ CTR block 1
305e1051a39Sopenharmony_ci	fmov    $ctr3d, $ctr96_b64x                               @ CTR block 3
306e1051a39Sopenharmony_ci
307e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32            @ CTR block 1
308e1051a39Sopenharmony_ci	ld1     { $ctr0b}, [$counter]                             @ special case vector load initial counter so we can start first AES block as quickly as possible
309e1051a39Sopenharmony_ci
310e1051a39Sopenharmony_ci	fmov    $ctr1.d[1], $ctr32x                               @ CTR block 1
311e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                 @ CTR block 2
312e1051a39Sopenharmony_ci
313e1051a39Sopenharmony_ci	fmov    $ctr2d, $ctr96_b64x                               @ CTR block 2
314e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32            @ CTR block 2
315e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ CTR block 2
316e1051a39Sopenharmony_ci
317e1051a39Sopenharmony_ci	fmov    $ctr2.d[1], $ctr32x                               @ CTR block 2
318e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                 @ CTR block 3
319e1051a39Sopenharmony_ci
320e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32            @ CTR block 3
321e1051a39Sopenharmony_ci	ld1     {$rk2s}, [$cc], #16								  @ load rk2
322e1051a39Sopenharmony_ci
323e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ CTR block 3
324e1051a39Sopenharmony_ci	fmov    $ctr3.d[1], $ctr32x                               @ CTR block 3
325e1051a39Sopenharmony_ci
326e1051a39Sopenharmony_ci	ldr     $h3q, [$current_tag, #80]                         @ load h3l | h3h
327e1051a39Sopenharmony_ci#ifndef __AARCH64EB__
328e1051a39Sopenharmony_ci	ext     $h3b, $h3b, $h3b, #8
329e1051a39Sopenharmony_ci#endif
330e1051a39Sopenharmony_ci	aese    $ctr1b, $rk0  \n  aesmc   $ctr1b, $ctr1b          @ AES block 1 - round 0
331e1051a39Sopenharmony_ci	ld1     {$rk3s}, [$cc], #16								  @ load rk3
332e1051a39Sopenharmony_ci
333e1051a39Sopenharmony_ci	aese    $ctr2b, $rk0  \n  aesmc   $ctr2b, $ctr2b          @ AES block 2 - round 0
334e1051a39Sopenharmony_ci	ldr     $h1q, [$current_tag, #32]                         @ load h1l | h1h
335e1051a39Sopenharmony_ci#ifndef __AARCH64EB__
336e1051a39Sopenharmony_ci	ext     $h1b, $h1b, $h1b, #8
337e1051a39Sopenharmony_ci#endif
338e1051a39Sopenharmony_ci
339e1051a39Sopenharmony_ci	aese    $ctr0b, $rk0  \n  aesmc   $ctr0b, $ctr0b          @ AES block 0 - round 0
340e1051a39Sopenharmony_ci	ld1     {$rk4s}, [$cc], #16								  @ load rk4
341e1051a39Sopenharmony_ci
342e1051a39Sopenharmony_ci	aese    $ctr3b, $rk0  \n  aesmc   $ctr3b, $ctr3b          @ AES block 3 - round 0
343e1051a39Sopenharmony_ci	ld1     {$rk5s}, [$cc], #16								  @ load rk5
344e1051a39Sopenharmony_ci
345e1051a39Sopenharmony_ci	aese    $ctr2b, $rk1  \n  aesmc   $ctr2b, $ctr2b          @ AES block 2 - round 1
346e1051a39Sopenharmony_ci	trn2    $h34k.2d,  $h3.2d,    $h4.2d                      @ h4l | h3l
347e1051a39Sopenharmony_ci
348e1051a39Sopenharmony_ci	aese    $ctr0b, $rk1  \n  aesmc   $ctr0b, $ctr0b          @ AES block 0 - round 1
349e1051a39Sopenharmony_ci	ld1     {$rk6s}, [$cc], #16								  @ load rk6
350e1051a39Sopenharmony_ci
351e1051a39Sopenharmony_ci	aese    $ctr1b, $rk1  \n  aesmc   $ctr1b, $ctr1b          @ AES block 1 - round 1
352e1051a39Sopenharmony_ci	ld1     {$rk7s}, [$cc], #16								  @ load rk7
353e1051a39Sopenharmony_ci
354e1051a39Sopenharmony_ci	aese    $ctr3b, $rk1  \n  aesmc   $ctr3b, $ctr3b          @ AES block 3 - round 1
355e1051a39Sopenharmony_ci	trn1    $acc_h.2d, $h3.2d,    $h4.2d                      @ h4h | h3h
356e1051a39Sopenharmony_ci
357e1051a39Sopenharmony_ci	aese    $ctr0b, $rk2  \n  aesmc   $ctr0b, $ctr0b          @ AES block 0 - round 2
358e1051a39Sopenharmony_ci	ld1     {$rk8s}, [$cc], #16								  @ load rk8
359e1051a39Sopenharmony_ci
360e1051a39Sopenharmony_ci	aese    $ctr1b, $rk2  \n  aesmc   $ctr1b, $ctr1b          @ AES block 1 - round 2
361e1051a39Sopenharmony_ci	ldr     $h2q, [$current_tag, #64]                         @ load h2l | h2h
362e1051a39Sopenharmony_ci#ifndef __AARCH64EB__
363e1051a39Sopenharmony_ci	ext     $h2b, $h2b, $h2b, #8
364e1051a39Sopenharmony_ci#endif
365e1051a39Sopenharmony_ci
366e1051a39Sopenharmony_ci	aese    $ctr3b, $rk2  \n  aesmc   $ctr3b, $ctr3b          @ AES block 3 - round 2
367e1051a39Sopenharmony_ci
368e1051a39Sopenharmony_ci	aese    $ctr2b, $rk2  \n  aesmc   $ctr2b, $ctr2b          @ AES block 2 - round 2
369e1051a39Sopenharmony_ci	eor     $h34k.16b, $h34k.16b, $acc_h.16b                  @ h4k | h3k
370e1051a39Sopenharmony_ci
371e1051a39Sopenharmony_ci	aese    $ctr0b, $rk3  \n  aesmc   $ctr0b, $ctr0b          @ AES block 0 - round 3
372e1051a39Sopenharmony_ci
373e1051a39Sopenharmony_ci	aese    $ctr1b, $rk3  \n  aesmc   $ctr1b, $ctr1b          @ AES block 1 - round 3
374e1051a39Sopenharmony_ci
375e1051a39Sopenharmony_ci	aese    $ctr2b, $rk3  \n  aesmc   $ctr2b, $ctr2b          @ AES block 2 - round 3
376e1051a39Sopenharmony_ci	ld1     {$rk9s}, [$cc], #16								  @ load rk9
377e1051a39Sopenharmony_ci
378e1051a39Sopenharmony_ci	aese    $ctr3b, $rk3  \n  aesmc   $ctr3b, $ctr3b          @ AES block 3 - round 3
379e1051a39Sopenharmony_ci
380e1051a39Sopenharmony_ci	and     $main_end_input_ptr, $main_end_input_ptr, #0xffffffffffffffc0    @ number of bytes to be processed in main loop (at least 1 byte must be handled by tail)
381e1051a39Sopenharmony_ci	trn2    $h12k.2d,  $h1.2d,    $h2.2d                      @ h2l | h1l
382e1051a39Sopenharmony_ci
383e1051a39Sopenharmony_ci	aese    $ctr3b, $rk4  \n  aesmc   $ctr3b, $ctr3b          @ AES block 3 - round 4
384e1051a39Sopenharmony_ci	add     $main_end_input_ptr, $main_end_input_ptr, $input_ptr
385e1051a39Sopenharmony_ci
386e1051a39Sopenharmony_ci	aese    $ctr2b, $rk4  \n  aesmc   $ctr2b, $ctr2b          @ AES block 2 - round 4
387e1051a39Sopenharmony_ci	cmp     $input_ptr, $main_end_input_ptr                   @ check if we have <= 4 blocks
388e1051a39Sopenharmony_ci
389e1051a39Sopenharmony_ci	aese    $ctr0b, $rk4  \n  aesmc   $ctr0b, $ctr0b          @ AES block 0 - round 4
390e1051a39Sopenharmony_ci
391e1051a39Sopenharmony_ci	aese    $ctr3b, $rk5  \n  aesmc   $ctr3b, $ctr3b          @ AES block 3 - round 5
392e1051a39Sopenharmony_ci
393e1051a39Sopenharmony_ci	aese    $ctr2b, $rk5  \n  aesmc   $ctr2b, $ctr2b          @ AES block 2 - round 5
394e1051a39Sopenharmony_ci
395e1051a39Sopenharmony_ci	aese    $ctr0b, $rk5  \n  aesmc   $ctr0b, $ctr0b          @ AES block 0 - round 5
396e1051a39Sopenharmony_ci
397e1051a39Sopenharmony_ci	aese    $ctr3b, $rk6  \n  aesmc   $ctr3b, $ctr3b          @ AES block 3 - round 6
398e1051a39Sopenharmony_ci
399e1051a39Sopenharmony_ci	aese    $ctr1b, $rk4  \n  aesmc   $ctr1b, $ctr1b          @ AES block 1 - round 4
400e1051a39Sopenharmony_ci
401e1051a39Sopenharmony_ci	aese    $ctr2b, $rk6  \n  aesmc   $ctr2b, $ctr2b          @ AES block 2 - round 6
402e1051a39Sopenharmony_ci	trn1    $t0.2d,    $h1.2d,    $h2.2d                      @ h2h | h1h
403e1051a39Sopenharmony_ci
404e1051a39Sopenharmony_ci	aese    $ctr0b, $rk6  \n  aesmc   $ctr0b, $ctr0b          @ AES block 0 - round 6
405e1051a39Sopenharmony_ci
406e1051a39Sopenharmony_ci	aese    $ctr1b, $rk5  \n  aesmc   $ctr1b, $ctr1b          @ AES block 1 - round 5
407e1051a39Sopenharmony_ci
408e1051a39Sopenharmony_ci	aese    $ctr3b, $rk7  \n  aesmc   $ctr3b, $ctr3b          @ AES block 3 - round 7
409e1051a39Sopenharmony_ci
410e1051a39Sopenharmony_ci	aese    $ctr0b, $rk7  \n  aesmc   $ctr0b, $ctr0b          @ AES block 0 - round 7
411e1051a39Sopenharmony_ci
412e1051a39Sopenharmony_ci	aese    $ctr1b, $rk6  \n  aesmc   $ctr1b, $ctr1b          @ AES block 1 - round 6
413e1051a39Sopenharmony_ci
414e1051a39Sopenharmony_ci	aese    $ctr2b, $rk7  \n  aesmc   $ctr2b, $ctr2b          @ AES block 2 - round 7
415e1051a39Sopenharmony_ci
416e1051a39Sopenharmony_ci	aese    $ctr0b, $rk8  \n  aesmc   $ctr0b, $ctr0b          @ AES block 0 - round 8
417e1051a39Sopenharmony_ci
418e1051a39Sopenharmony_ci	aese    $ctr1b, $rk7  \n  aesmc   $ctr1b, $ctr1b          @ AES block 1 - round 7
419e1051a39Sopenharmony_ci
420e1051a39Sopenharmony_ci	aese    $ctr2b, $rk8  \n  aesmc   $ctr2b, $ctr2b          @ AES block 2 - round 8
421e1051a39Sopenharmony_ci
422e1051a39Sopenharmony_ci	aese    $ctr3b, $rk8  \n  aesmc   $ctr3b, $ctr3b          @ AES block 3 - round 8
423e1051a39Sopenharmony_ci
424e1051a39Sopenharmony_ci	aese    $ctr1b, $rk8  \n  aesmc   $ctr1b, $ctr1b          @ AES block 1 - round 8
425e1051a39Sopenharmony_ci
426e1051a39Sopenharmony_ci	aese    $ctr2b, $rk9                                      @ AES block 2 - round 9
427e1051a39Sopenharmony_ci
428e1051a39Sopenharmony_ci	aese    $ctr0b, $rk9                                      @ AES block 0 - round 9
429e1051a39Sopenharmony_ci
430e1051a39Sopenharmony_ci	eor     $h12k.16b, $h12k.16b, $t0.16b                     @ h2k | h1k
431e1051a39Sopenharmony_ci
432e1051a39Sopenharmony_ci	aese    $ctr1b, $rk9                                      @ AES block 1 - round 9
433e1051a39Sopenharmony_ci
434e1051a39Sopenharmony_ci	aese    $ctr3b, $rk9                                      @ AES block 3 - round 9
435e1051a39Sopenharmony_ci	b.ge    .L128_enc_tail                                    @ handle tail
436e1051a39Sopenharmony_ci
437e1051a39Sopenharmony_ci	ldp     $input_l0, $input_h0, [$input_ptr, #0]            @ AES block 0 - load plaintext
438e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
439e1051a39Sopenharmony_ci	rev     $input_l0, $input_l0
440e1051a39Sopenharmony_ci	rev     $input_h0, $input_h0
441e1051a39Sopenharmony_ci#endif
442e1051a39Sopenharmony_ci	ldp     $input_l2, $input_h2, [$input_ptr, #32]           @ AES block 2 - load plaintext
443e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
444e1051a39Sopenharmony_ci	rev     $input_l2, $input_l2
445e1051a39Sopenharmony_ci	rev     $input_h2, $input_h2
446e1051a39Sopenharmony_ci#endif
447e1051a39Sopenharmony_ci	ldp     $input_l1, $input_h1, [$input_ptr, #16]           @ AES block 1 - load plaintext
448e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
449e1051a39Sopenharmony_ci	rev     $input_l1, $input_l1
450e1051a39Sopenharmony_ci	rev     $input_h1, $input_h1
451e1051a39Sopenharmony_ci#endif
452e1051a39Sopenharmony_ci	ldp     $input_l3, $input_h3, [$input_ptr, #48]           @ AES block 3 - load plaintext
453e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
454e1051a39Sopenharmony_ci	rev     $input_l3, $input_l3
455e1051a39Sopenharmony_ci	rev     $input_h3, $input_h3
456e1051a39Sopenharmony_ci#endif
457e1051a39Sopenharmony_ci	eor     $input_l0, $input_l0, $rk10_l                     @ AES block 0 - round 10 low
458e1051a39Sopenharmony_ci	eor     $input_h0, $input_h0, $rk10_h                     @ AES block 0 - round 10 high
459e1051a39Sopenharmony_ci
460e1051a39Sopenharmony_ci	eor     $input_l2, $input_l2, $rk10_l                     @ AES block 2 - round 10 low
461e1051a39Sopenharmony_ci	fmov    $ctr_t0d, $input_l0                               @ AES block 0 - mov low
462e1051a39Sopenharmony_ci
463e1051a39Sopenharmony_ci	eor     $input_l1, $input_l1, $rk10_l                     @ AES block 1 - round 10 low
464e1051a39Sopenharmony_ci	eor     $input_h2, $input_h2, $rk10_h                     @ AES block 2 - round 10 high
465e1051a39Sopenharmony_ci	fmov    $ctr_t0.d[1], $input_h0                           @ AES block 0 - mov high
466e1051a39Sopenharmony_ci
467e1051a39Sopenharmony_ci	fmov    $ctr_t1d, $input_l1                               @ AES block 1 - mov low
468e1051a39Sopenharmony_ci	eor     $input_h1, $input_h1, $rk10_h                     @ AES block 1 - round 10 high
469e1051a39Sopenharmony_ci
470e1051a39Sopenharmony_ci	eor     $input_l3, $input_l3, $rk10_l                     @ AES block 3 - round 10 low
471e1051a39Sopenharmony_ci	fmov    $ctr_t1.d[1], $input_h1                           @ AES block 1 - mov high
472e1051a39Sopenharmony_ci
473e1051a39Sopenharmony_ci	fmov    $ctr_t2d, $input_l2                               @ AES block 2 - mov low
474e1051a39Sopenharmony_ci	eor     $input_h3, $input_h3, $rk10_h                     @ AES block 3 - round 10 high
475e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                 @ CTR block 4
476e1051a39Sopenharmony_ci
477e1051a39Sopenharmony_ci	fmov    $ctr_t2.d[1], $input_h2                           @ AES block 2 - mov high
478e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32            @ CTR block 4
479e1051a39Sopenharmony_ci
480e1051a39Sopenharmony_ci	eor     $res0b, $ctr_t0b, $ctr0b                          @ AES block 0 - result
481e1051a39Sopenharmony_ci	fmov    $ctr0d, $ctr96_b64x                               @ CTR block 4
482e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ CTR block 4
483e1051a39Sopenharmony_ci
484e1051a39Sopenharmony_ci	fmov    $ctr0.d[1], $ctr32x                               @ CTR block 4
485e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                 @ CTR block 5
486e1051a39Sopenharmony_ci
487e1051a39Sopenharmony_ci	eor     $res1b, $ctr_t1b, $ctr1b                          @ AES block 1 - result
488e1051a39Sopenharmony_ci	fmov    $ctr1d, $ctr96_b64x                               @ CTR block 5
489e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32            @ CTR block 5
490e1051a39Sopenharmony_ci
491e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ CTR block 5
492e1051a39Sopenharmony_ci	add     $input_ptr, $input_ptr, #64                       @ AES input_ptr update
493e1051a39Sopenharmony_ci	fmov    $ctr1.d[1], $ctr32x                               @ CTR block 5
494e1051a39Sopenharmony_ci
495e1051a39Sopenharmony_ci	fmov    $ctr_t3d, $input_l3                               @ AES block 3 - mov low
496e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                 @ CTR block 6
497e1051a39Sopenharmony_ci	st1     { $res0b}, [$output_ptr], #16                     @ AES block 0 - store result
498e1051a39Sopenharmony_ci
499e1051a39Sopenharmony_ci	fmov    $ctr_t3.d[1], $input_h3                           @ AES block 3 - mov high
500e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32            @ CTR block 6
501e1051a39Sopenharmony_ci
502e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ CTR block 6
503e1051a39Sopenharmony_ci	eor     $res2b, $ctr_t2b, $ctr2b                          @ AES block 2 - result
504e1051a39Sopenharmony_ci	st1     { $res1b}, [$output_ptr], #16                     @ AES block 1 - store result
505e1051a39Sopenharmony_ci
506e1051a39Sopenharmony_ci	fmov    $ctr2d, $ctr96_b64x                               @ CTR block 6
507e1051a39Sopenharmony_ci	cmp     $input_ptr, $main_end_input_ptr                   @ check if we have <= 8 blocks
508e1051a39Sopenharmony_ci
509e1051a39Sopenharmony_ci	fmov    $ctr2.d[1], $ctr32x                               @ CTR block 6
510e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                 @ CTR block 7
511e1051a39Sopenharmony_ci	st1     { $res2b}, [$output_ptr], #16                     @ AES block 2 - store result
512e1051a39Sopenharmony_ci
513e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32            @ CTR block 7
514e1051a39Sopenharmony_ci
515e1051a39Sopenharmony_ci	eor     $res3b, $ctr_t3b, $ctr3b                          @ AES block 3 - result
516e1051a39Sopenharmony_ci	st1     { $res3b}, [$output_ptr], #16                     @ AES block 3 - store result
517e1051a39Sopenharmony_ci	b.ge    .L128_enc_prepretail                              @ do prepretail
518e1051a39Sopenharmony_ci
519e1051a39Sopenharmony_ci	.L128_enc_main_loop:                                      @ main loop start
520e1051a39Sopenharmony_ci	ldp     $input_l3, $input_h3, [$input_ptr, #48]           @ AES block 4k+3 - load plaintext
521e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
522e1051a39Sopenharmony_ci	rev     $input_l3, $input_l3
523e1051a39Sopenharmony_ci	rev     $input_h3, $input_h3
524e1051a39Sopenharmony_ci#endif
525e1051a39Sopenharmony_ci	rev64   $res0b, $res0b                                    @ GHASH block 4k (only t0 is free)
526e1051a39Sopenharmony_ci	rev64   $res2b, $res2b                                    @ GHASH block 4k+2 (t0, t1, and t2 free)
527e1051a39Sopenharmony_ci
528e1051a39Sopenharmony_ci	aese    $ctr2b, $rk0  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 0
529e1051a39Sopenharmony_ci	fmov    $ctr3d, $ctr96_b64x                               @ CTR block 4k+3
530e1051a39Sopenharmony_ci
531e1051a39Sopenharmony_ci	ext     $acc_lb, $acc_lb, $acc_lb, #8                     @ PRE 0
532e1051a39Sopenharmony_ci	rev64   $res1b, $res1b                                    @ GHASH block 4k+1 (t0 and t1 free)
533e1051a39Sopenharmony_ci
534e1051a39Sopenharmony_ci	aese    $ctr1b, $rk0  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 0
535e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ CTR block 4k+3
536e1051a39Sopenharmony_ci	fmov    $ctr3.d[1], $ctr32x                               @ CTR block 4k+3
537e1051a39Sopenharmony_ci
538e1051a39Sopenharmony_ci	aese    $ctr0b, $rk0  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 0
539e1051a39Sopenharmony_ci	mov     $t6d, $res2.d[1]                                  @ GHASH block 4k+2 - mid
540e1051a39Sopenharmony_ci
541e1051a39Sopenharmony_ci	aese    $ctr2b, $rk1  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 1
542e1051a39Sopenharmony_ci	mov     $t3d, $res1.d[1]                                  @ GHASH block 4k+1 - mid
543e1051a39Sopenharmony_ci
544e1051a39Sopenharmony_ci	aese    $ctr1b, $rk1  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 1
545e1051a39Sopenharmony_ci	eor     $res0b, $res0b, $acc_lb                           @ PRE 1
546e1051a39Sopenharmony_ci
547e1051a39Sopenharmony_ci	aese    $ctr3b, $rk0  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 0
548e1051a39Sopenharmony_ci	eor     $input_h3, $input_h3, $rk10_h                     @ AES block 4k+3 - round 10 high
549e1051a39Sopenharmony_ci
550e1051a39Sopenharmony_ci	pmull2  $t1.1q, $res1.2d, $h3.2d                          @ GHASH block 4k+1 - high
551e1051a39Sopenharmony_ci	eor     $t6.8b, $t6.8b, $res2.8b                          @ GHASH block 4k+2 - mid
552e1051a39Sopenharmony_ci	ldp     $input_l0, $input_h0, [$input_ptr, #0]            @ AES block 4k+4 - load plaintext
553e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
554e1051a39Sopenharmony_ci	rev     $input_l0, $input_l0
555e1051a39Sopenharmony_ci	rev     $input_h0, $input_h0
556e1051a39Sopenharmony_ci#endif
557e1051a39Sopenharmony_ci	aese    $ctr0b, $rk1  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 1
558e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                 @ CTR block 4k+8
559e1051a39Sopenharmony_ci
560e1051a39Sopenharmony_ci	eor     $t3.8b, $t3.8b, $res1.8b                          @ GHASH block 4k+1 - mid
561e1051a39Sopenharmony_ci	mov     $t0d, $res0.d[1]                                  @ GHASH block 4k - mid
562e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32            @ CTR block 4k+8
563e1051a39Sopenharmony_ci
564e1051a39Sopenharmony_ci	pmull2  $acc_h.1q, $res0.2d, $h4.2d                       @ GHASH block 4k - high
565e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ CTR block 4k+8
566e1051a39Sopenharmony_ci	mov     $acc_md, $h34k.d[1]                               @ GHASH block 4k - mid
567e1051a39Sopenharmony_ci
568e1051a39Sopenharmony_ci	aese    $ctr0b, $rk2  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 2
569e1051a39Sopenharmony_ci
570e1051a39Sopenharmony_ci	pmull   $acc_l.1q, $res0.1d, $h4.1d                       @ GHASH block 4k - low
571e1051a39Sopenharmony_ci	eor     $t0.8b, $t0.8b, $res0.8b                          @ GHASH block 4k - mid
572e1051a39Sopenharmony_ci
573e1051a39Sopenharmony_ci	aese    $ctr1b, $rk2  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 2
574e1051a39Sopenharmony_ci
575e1051a39Sopenharmony_ci	aese    $ctr0b, $rk3  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 3
576e1051a39Sopenharmony_ci	eor     $acc_hb, $acc_hb, $t1.16b                         @ GHASH block 4k+1 - high
577e1051a39Sopenharmony_ci
578e1051a39Sopenharmony_ci	pmull   $t5.1q, $res2.1d, $h2.1d                          @ GHASH block 4k+2 - low
579e1051a39Sopenharmony_ci
580e1051a39Sopenharmony_ci	pmull   $acc_m.1q, $t0.1d, $acc_m.1d                      @ GHASH block 4k - mid
581e1051a39Sopenharmony_ci	rev64   $res3b, $res3b                                    @ GHASH block 4k+3 (t0, t1, t2 and t3 free)
582e1051a39Sopenharmony_ci
583e1051a39Sopenharmony_ci	pmull   $t3.1q, $t3.1d, $h34k.1d                          @ GHASH block 4k+1 - mid
584e1051a39Sopenharmony_ci
585e1051a39Sopenharmony_ci	pmull   $t2.1q, $res1.1d, $h3.1d                          @ GHASH block 4k+1 - low
586e1051a39Sopenharmony_ci	ins     $t6.d[1], $t6.d[0]                                @ GHASH block 4k+2 - mid
587e1051a39Sopenharmony_ci
588e1051a39Sopenharmony_ci	pmull2  $t4.1q, $res2.2d, $h2.2d                          @ GHASH block 4k+2 - high
589e1051a39Sopenharmony_ci	eor     $input_h0, $input_h0, $rk10_h                     @ AES block 4k+4 - round 10 high
590e1051a39Sopenharmony_ci
591e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t3.16b                         @ GHASH block 4k+1 - mid
592e1051a39Sopenharmony_ci	mov     $t9d, $res3.d[1]                                  @ GHASH block 4k+3 - mid
593e1051a39Sopenharmony_ci
594e1051a39Sopenharmony_ci	aese    $ctr3b, $rk1  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 1
595e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $t2.16b                         @ GHASH block 4k+1 - low
596e1051a39Sopenharmony_ci
597e1051a39Sopenharmony_ci	aese    $ctr2b, $rk2  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 2
598e1051a39Sopenharmony_ci	eor     $input_l0, $input_l0, $rk10_l                     @ AES block 4k+4 - round 10 low
599e1051a39Sopenharmony_ci
600e1051a39Sopenharmony_ci	aese    $ctr1b, $rk3  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 3
601e1051a39Sopenharmony_ci	eor     $t9.8b, $t9.8b, $res3.8b                          @ GHASH block 4k+3 - mid
602e1051a39Sopenharmony_ci
603e1051a39Sopenharmony_ci	pmull2  $t7.1q, $res3.2d, $h1.2d                          @ GHASH block 4k+3 - high
604e1051a39Sopenharmony_ci
605e1051a39Sopenharmony_ci	aese    $ctr2b, $rk3  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 3
606e1051a39Sopenharmony_ci	eor     $acc_hb, $acc_hb, $t4.16b                         @ GHASH block 4k+2 - high
607e1051a39Sopenharmony_ci
608e1051a39Sopenharmony_ci	pmull2  $t6.1q, $t6.2d, $h12k.2d                          @ GHASH block 4k+2 - mid
609e1051a39Sopenharmony_ci
610e1051a39Sopenharmony_ci	pmull   $t8.1q, $res3.1d, $h1.1d                          @ GHASH block 4k+3 - low
611e1051a39Sopenharmony_ci	movi    $mod_constant.8b, #0xc2
612e1051a39Sopenharmony_ci
613e1051a39Sopenharmony_ci	pmull   $t9.1q, $t9.1d, $h12k.1d                          @ GHASH block 4k+3 - mid
614e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $t5.16b                         @ GHASH block 4k+2 - low
615e1051a39Sopenharmony_ci
616e1051a39Sopenharmony_ci	aese    $ctr1b, $rk4  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 4
617e1051a39Sopenharmony_ci
618e1051a39Sopenharmony_ci	aese    $ctr3b, $rk2  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 2
619e1051a39Sopenharmony_ci	shl     $mod_constantd, $mod_constantd, #56               @ mod_constant
620e1051a39Sopenharmony_ci
621e1051a39Sopenharmony_ci	aese    $ctr0b, $rk4  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 4
622e1051a39Sopenharmony_ci	eor     $acc_hb, $acc_hb, $t7.16b                         @ GHASH block 4k+3 - high
623e1051a39Sopenharmony_ci
624e1051a39Sopenharmony_ci	aese    $ctr1b, $rk5  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 5
625e1051a39Sopenharmony_ci	ldp     $input_l1, $input_h1, [$input_ptr, #16]           @ AES block 4k+5 - load plaintext
626e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
627e1051a39Sopenharmony_ci	rev     $input_l1, $input_l1
628e1051a39Sopenharmony_ci	rev     $input_h1, $input_h1
629e1051a39Sopenharmony_ci#endif
630e1051a39Sopenharmony_ci	aese    $ctr3b, $rk3  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 3
631e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t6.16b                         @ GHASH block 4k+2 - mid
632e1051a39Sopenharmony_ci
633e1051a39Sopenharmony_ci	aese    $ctr0b, $rk5  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 5
634e1051a39Sopenharmony_ci	ldp     $input_l2, $input_h2, [$input_ptr, #32]           @ AES block 4k+6 - load plaintext
635e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
636e1051a39Sopenharmony_ci	rev     $input_l2, $input_l2
637e1051a39Sopenharmony_ci	rev     $input_h2, $input_h2
638e1051a39Sopenharmony_ci#endif
639e1051a39Sopenharmony_ci	pmull   $mod_t.1q, $acc_h.1d, $mod_constant.1d            @ MODULO - top 64b align with mid
640e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $t8.16b                         @ GHASH block 4k+3 - low
641e1051a39Sopenharmony_ci
642e1051a39Sopenharmony_ci	aese    $ctr2b, $rk4  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 4
643e1051a39Sopenharmony_ci	eor     $input_l1, $input_l1, $rk10_l                     @ AES block 4k+5 - round 10 low
644e1051a39Sopenharmony_ci
645e1051a39Sopenharmony_ci	aese    $ctr3b, $rk4  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 4
646e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t9.16b                         @ GHASH block 4k+3 - mid
647e1051a39Sopenharmony_ci
648e1051a39Sopenharmony_ci	aese    $ctr1b, $rk6  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 6
649e1051a39Sopenharmony_ci	eor     $input_l3, $input_l3, $rk10_l                     @ AES block 4k+3 - round 10 low
650e1051a39Sopenharmony_ci
651e1051a39Sopenharmony_ci	aese    $ctr2b, $rk5  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 5
652e1051a39Sopenharmony_ci	eor     $t9.16b, $acc_lb, $acc_hb                         @ MODULO - karatsuba tidy up
653e1051a39Sopenharmony_ci
654e1051a39Sopenharmony_ci	fmov    $ctr_t0d, $input_l0                               @ AES block 4k+4 - mov low
655e1051a39Sopenharmony_ci	aese    $ctr0b, $rk6  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 6
656e1051a39Sopenharmony_ci	fmov    $ctr_t0.d[1], $input_h0                           @ AES block 4k+4 - mov high
657e1051a39Sopenharmony_ci
658e1051a39Sopenharmony_ci	add     $input_ptr, $input_ptr, #64                       @ AES input_ptr update
659e1051a39Sopenharmony_ci	fmov    $ctr_t3d, $input_l3                               @ AES block 4k+3 - mov low
660e1051a39Sopenharmony_ci	ext     $acc_hb, $acc_hb, $acc_hb, #8                     @ MODULO - other top alignment
661e1051a39Sopenharmony_ci
662e1051a39Sopenharmony_ci	aese    $ctr3b, $rk5  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 5
663e1051a39Sopenharmony_ci	fmov    $ctr_t1d, $input_l1                               @ AES block 4k+5 - mov low
664e1051a39Sopenharmony_ci
665e1051a39Sopenharmony_ci	aese    $ctr0b, $rk7  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 7
666e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t9.16b                         @ MODULO - karatsuba tidy up
667e1051a39Sopenharmony_ci
668e1051a39Sopenharmony_ci	aese    $ctr2b, $rk6  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 6
669e1051a39Sopenharmony_ci	eor     $input_h1, $input_h1, $rk10_h                     @ AES block 4k+5 - round 10 high
670e1051a39Sopenharmony_ci
671e1051a39Sopenharmony_ci	aese    $ctr1b, $rk7  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 7
672e1051a39Sopenharmony_ci	fmov    $ctr_t1.d[1], $input_h1                           @ AES block 4k+5 - mov high
673e1051a39Sopenharmony_ci
674e1051a39Sopenharmony_ci	aese    $ctr0b, $rk8  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 8
675e1051a39Sopenharmony_ci	fmov    $ctr_t3.d[1], $input_h3                           @ AES block 4k+3 - mov high
676e1051a39Sopenharmony_ci
677e1051a39Sopenharmony_ci	aese    $ctr3b, $rk6  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 6
678e1051a39Sopenharmony_ci	cmp     $input_ptr, $main_end_input_ptr                   @ LOOP CONTROL
679e1051a39Sopenharmony_ci
680e1051a39Sopenharmony_ci	aese    $ctr1b, $rk8  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 8
681e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $mod_t.16b                      @ MODULO - fold into mid
682e1051a39Sopenharmony_ci
683e1051a39Sopenharmony_ci	aese    $ctr0b, $rk9                                      @ AES block 4k+4 - round 9
684e1051a39Sopenharmony_ci	eor     $input_l2, $input_l2, $rk10_l                     @ AES block 4k+6 - round 10 low
685e1051a39Sopenharmony_ci	eor     $input_h2, $input_h2, $rk10_h                     @ AES block 4k+6 - round 10 high
686e1051a39Sopenharmony_ci
687e1051a39Sopenharmony_ci	aese    $ctr3b, $rk7  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 7
688e1051a39Sopenharmony_ci	fmov    $ctr_t2d, $input_l2                               @ AES block 4k+6 - mov low
689e1051a39Sopenharmony_ci
690e1051a39Sopenharmony_ci	aese    $ctr1b, $rk9                                      @ AES block 4k+5 - round 9
691e1051a39Sopenharmony_ci	fmov    $ctr_t2.d[1], $input_h2                           @ AES block 4k+6 - mov high
692e1051a39Sopenharmony_ci
693e1051a39Sopenharmony_ci	aese    $ctr2b, $rk7  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 7
694e1051a39Sopenharmony_ci	eor     $res0b, $ctr_t0b, $ctr0b                          @ AES block 4k+4 - result
695e1051a39Sopenharmony_ci
696e1051a39Sopenharmony_ci	fmov    $ctr0d, $ctr96_b64x                               @ CTR block 4k+8
697e1051a39Sopenharmony_ci	aese    $ctr3b, $rk8  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 8
698e1051a39Sopenharmony_ci
699e1051a39Sopenharmony_ci	fmov    $ctr0.d[1], $ctr32x                               @ CTR block 4k+8
700e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                 @ CTR block 4k+9
701e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $acc_hb                         @ MODULO - fold into mid
702e1051a39Sopenharmony_ci
703e1051a39Sopenharmony_ci	aese    $ctr2b, $rk8  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 8
704e1051a39Sopenharmony_ci	eor     $res1b, $ctr_t1b, $ctr1b                          @ AES block 4k+5 - result
705e1051a39Sopenharmony_ci
706e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ CTR block 4k+9
707e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32            @ CTR block 4k+9
708e1051a39Sopenharmony_ci	fmov    $ctr1d, $ctr96_b64x                               @ CTR block 4k+9
709e1051a39Sopenharmony_ci
710e1051a39Sopenharmony_ci	pmull   $acc_h.1q, $acc_m.1d, $mod_constant.1d            @ MODULO - mid 64b align with low
711e1051a39Sopenharmony_ci	fmov    $ctr1.d[1], $ctr32x                               @ CTR block 4k+9
712e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                 @ CTR block 4k+10
713e1051a39Sopenharmony_ci
714e1051a39Sopenharmony_ci	aese    $ctr2b, $rk9                                      @ AES block 4k+6 - round 9
715e1051a39Sopenharmony_ci	st1     { $res0b}, [$output_ptr], #16                     @ AES block 4k+4 - store result
716e1051a39Sopenharmony_ci	eor     $res2b, $ctr_t2b, $ctr2b                          @ AES block 4k+6 - result
717e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32            @ CTR block 4k+10
718e1051a39Sopenharmony_ci
719e1051a39Sopenharmony_ci	aese    $ctr3b, $rk9                                      @ AES block 4k+7 - round 9
720e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ CTR block 4k+10
721e1051a39Sopenharmony_ci	ext     $acc_mb, $acc_mb, $acc_mb, #8                     @ MODULO - other mid alignment
722e1051a39Sopenharmony_ci	fmov    $ctr2d, $ctr96_b64x                               @ CTR block 4k+10
723e1051a39Sopenharmony_ci
724e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $acc_hb                         @ MODULO - fold into low
725e1051a39Sopenharmony_ci	st1     { $res1b}, [$output_ptr], #16                     @ AES block 4k+5 - store result
726e1051a39Sopenharmony_ci
727e1051a39Sopenharmony_ci	fmov    $ctr2.d[1], $ctr32x                               @ CTR block 4k+10
728e1051a39Sopenharmony_ci	st1     { $res2b}, [$output_ptr], #16                     @ AES block 4k+6 - store result
729e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                 @ CTR block 4k+11
730e1051a39Sopenharmony_ci
731e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32            @ CTR block 4k+11
732e1051a39Sopenharmony_ci	eor     $res3b, $ctr_t3b, $ctr3b                          @ AES block 4k+3 - result
733e1051a39Sopenharmony_ci
734e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $acc_mb                         @ MODULO - fold into low
735e1051a39Sopenharmony_ci	st1     { $res3b}, [$output_ptr], #16                     @ AES block 4k+3 - store result
736e1051a39Sopenharmony_ci	b.lt    .L128_enc_main_loop
737e1051a39Sopenharmony_ci
738e1051a39Sopenharmony_ci	.L128_enc_prepretail:                                     @ PREPRETAIL
739e1051a39Sopenharmony_ci	rev64   $res0b, $res0b                                    @ GHASH block 4k (only t0 is free)
740e1051a39Sopenharmony_ci	fmov    $ctr3d, $ctr96_b64x                               @ CTR block 4k+3
741e1051a39Sopenharmony_ci	rev64   $res1b, $res1b                                    @ GHASH block 4k+1 (t0 and t1 free)
742e1051a39Sopenharmony_ci
743e1051a39Sopenharmony_ci	ext     $acc_lb, $acc_lb, $acc_lb, #8                     @ PRE 0
744e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ CTR block 4k+3
745e1051a39Sopenharmony_ci	fmov    $ctr3.d[1], $ctr32x                               @ CTR block 4k+3
746e1051a39Sopenharmony_ci
747e1051a39Sopenharmony_ci	aese    $ctr1b, $rk0  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 0
748e1051a39Sopenharmony_ci	rev64   $res2b, $res2b                                    @ GHASH block 4k+2 (t0, t1, and t2 free)
749e1051a39Sopenharmony_ci
750e1051a39Sopenharmony_ci	pmull   $t2.1q, $res1.1d, $h3.1d                          @ GHASH block 4k+1 - low
751e1051a39Sopenharmony_ci
752e1051a39Sopenharmony_ci	rev64   $res3b, $res3b                                    @ GHASH block 4k+3 (t0, t1, t2 and t3 free)
753e1051a39Sopenharmony_ci	eor     $res0b, $res0b, $acc_lb                           @ PRE 1
754e1051a39Sopenharmony_ci
755e1051a39Sopenharmony_ci	pmull2  $t1.1q, $res1.2d, $h3.2d                          @ GHASH block 4k+1 - high
756e1051a39Sopenharmony_ci
757e1051a39Sopenharmony_ci	aese    $ctr3b, $rk0  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 0
758e1051a39Sopenharmony_ci	mov     $t3d, $res1.d[1]                                  @ GHASH block 4k+1 - mid
759e1051a39Sopenharmony_ci
760e1051a39Sopenharmony_ci	pmull   $acc_l.1q, $res0.1d, $h4.1d                       @ GHASH block 4k - low
761e1051a39Sopenharmony_ci	mov     $t0d, $res0.d[1]                                  @ GHASH block 4k - mid
762e1051a39Sopenharmony_ci
763e1051a39Sopenharmony_ci	mov     $t6d, $res2.d[1]                                  @ GHASH block 4k+2 - mid
764e1051a39Sopenharmony_ci	mov     $acc_md, $h34k.d[1]                               @ GHASH block 4k - mid
765e1051a39Sopenharmony_ci
766e1051a39Sopenharmony_ci	aese    $ctr1b, $rk1  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 1
767e1051a39Sopenharmony_ci	eor     $t3.8b, $t3.8b, $res1.8b                          @ GHASH block 4k+1 - mid
768e1051a39Sopenharmony_ci
769e1051a39Sopenharmony_ci	eor     $t0.8b, $t0.8b, $res0.8b                          @ GHASH block 4k - mid
770e1051a39Sopenharmony_ci
771e1051a39Sopenharmony_ci	pmull2  $acc_h.1q, $res0.2d, $h4.2d                       @ GHASH block 4k - high
772e1051a39Sopenharmony_ci	eor     $t6.8b, $t6.8b, $res2.8b                          @ GHASH block 4k+2 - mid
773e1051a39Sopenharmony_ci
774e1051a39Sopenharmony_ci	aese    $ctr3b, $rk1  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 1
775e1051a39Sopenharmony_ci
776e1051a39Sopenharmony_ci	pmull   $t3.1q, $t3.1d, $h34k.1d                          @ GHASH block 4k+1 - mid
777e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $t2.16b                         @ GHASH block 4k+1 - low
778e1051a39Sopenharmony_ci
779e1051a39Sopenharmony_ci	pmull   $acc_m.1q, $t0.1d, $acc_m.1d                      @ GHASH block 4k - mid
780e1051a39Sopenharmony_ci
781e1051a39Sopenharmony_ci	aese    $ctr0b, $rk0  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 0
782e1051a39Sopenharmony_ci	ins     $t6.d[1], $t6.d[0]                                @ GHASH block 4k+2 - mid
783e1051a39Sopenharmony_ci
784e1051a39Sopenharmony_ci	aese    $ctr2b, $rk0  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 0
785e1051a39Sopenharmony_ci
786e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t3.16b                         @ GHASH block 4k+1 - mid
787e1051a39Sopenharmony_ci	mov     $t9d, $res3.d[1]                                  @ GHASH block 4k+3 - mid
788e1051a39Sopenharmony_ci
789e1051a39Sopenharmony_ci	aese    $ctr0b, $rk1  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 1
790e1051a39Sopenharmony_ci	eor     $acc_hb, $acc_hb, $t1.16b                         @ GHASH block 4k+1 - high
791e1051a39Sopenharmony_ci
792e1051a39Sopenharmony_ci	pmull2  $t6.1q, $t6.2d, $h12k.2d                          @ GHASH block 4k+2 - mid
793e1051a39Sopenharmony_ci
794e1051a39Sopenharmony_ci	pmull2  $t4.1q, $res2.2d, $h2.2d                          @ GHASH block 4k+2 - high
795e1051a39Sopenharmony_ci	eor     $t9.8b, $t9.8b, $res3.8b                          @ GHASH block 4k+3 - mid
796e1051a39Sopenharmony_ci
797e1051a39Sopenharmony_ci	pmull2  $t7.1q, $res3.2d, $h1.2d                          @ GHASH block 4k+3 - high
798e1051a39Sopenharmony_ci
799e1051a39Sopenharmony_ci	pmull   $t5.1q, $res2.1d, $h2.1d                          @ GHASH block 4k+2 - low
800e1051a39Sopenharmony_ci
801e1051a39Sopenharmony_ci	aese    $ctr2b, $rk1  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 1
802e1051a39Sopenharmony_ci	eor     $acc_hb, $acc_hb, $t4.16b                         @ GHASH block 4k+2 - high
803e1051a39Sopenharmony_ci
804e1051a39Sopenharmony_ci	aese    $ctr0b, $rk2  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 2
805e1051a39Sopenharmony_ci
806e1051a39Sopenharmony_ci	pmull   $t8.1q, $res3.1d, $h1.1d                          @ GHASH block 4k+3 - low
807e1051a39Sopenharmony_ci	movi    $mod_constant.8b, #0xc2
808e1051a39Sopenharmony_ci
809e1051a39Sopenharmony_ci	aese    $ctr2b, $rk2  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 2
810e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $t5.16b                         @ GHASH block 4k+2 - low
811e1051a39Sopenharmony_ci
812e1051a39Sopenharmony_ci	aese    $ctr3b, $rk2  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 2
813e1051a39Sopenharmony_ci
814e1051a39Sopenharmony_ci	pmull   $t9.1q, $t9.1d, $h12k.1d                          @ GHASH block 4k+3 - mid
815e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t6.16b                         @ GHASH block 4k+2 - mid
816e1051a39Sopenharmony_ci
817e1051a39Sopenharmony_ci	aese    $ctr2b, $rk3  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 3
818e1051a39Sopenharmony_ci
819e1051a39Sopenharmony_ci	aese    $ctr1b, $rk2  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 2
820e1051a39Sopenharmony_ci	eor     $acc_hb, $acc_hb, $t7.16b                         @ GHASH block 4k+3 - high
821e1051a39Sopenharmony_ci
822e1051a39Sopenharmony_ci	aese    $ctr0b, $rk3  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 3
823e1051a39Sopenharmony_ci
824e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t9.16b                         @ GHASH block 4k+3 - mid
825e1051a39Sopenharmony_ci	shl     $mod_constantd, $mod_constantd, #56               @ mod_constant
826e1051a39Sopenharmony_ci
827e1051a39Sopenharmony_ci	aese    $ctr1b, $rk3  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 3
828e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $t8.16b                         @ GHASH block 4k+3 - low
829e1051a39Sopenharmony_ci
830e1051a39Sopenharmony_ci	aese    $ctr0b, $rk4  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 4
831e1051a39Sopenharmony_ci
832e1051a39Sopenharmony_ci	pmull   $t1.1q, $acc_h.1d, $mod_constant.1d
833e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $acc_hb                         @ karatsuba tidy up
834e1051a39Sopenharmony_ci
835e1051a39Sopenharmony_ci	aese    $ctr1b, $rk4  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 4
836e1051a39Sopenharmony_ci
837e1051a39Sopenharmony_ci	aese    $ctr0b, $rk5  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 5
838e1051a39Sopenharmony_ci	ext     $acc_hb, $acc_hb, $acc_hb, #8
839e1051a39Sopenharmony_ci
840e1051a39Sopenharmony_ci	aese    $ctr3b, $rk3  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 3
841e1051a39Sopenharmony_ci
842e1051a39Sopenharmony_ci	aese    $ctr2b, $rk4  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 4
843e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $acc_lb
844e1051a39Sopenharmony_ci
845e1051a39Sopenharmony_ci	aese    $ctr0b, $rk6  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 6
846e1051a39Sopenharmony_ci
847e1051a39Sopenharmony_ci	aese    $ctr3b, $rk4  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 4
848e1051a39Sopenharmony_ci
849e1051a39Sopenharmony_ci	aese    $ctr1b, $rk5  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 5
850e1051a39Sopenharmony_ci
851e1051a39Sopenharmony_ci	aese    $ctr2b, $rk5  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 5
852e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t1.16b
853e1051a39Sopenharmony_ci
854e1051a39Sopenharmony_ci	aese    $ctr3b, $rk5  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 5
855e1051a39Sopenharmony_ci
856e1051a39Sopenharmony_ci	aese    $ctr1b, $rk6  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 6
857e1051a39Sopenharmony_ci
858e1051a39Sopenharmony_ci	aese    $ctr2b, $rk6  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 6
859e1051a39Sopenharmony_ci
860e1051a39Sopenharmony_ci	aese    $ctr3b, $rk6  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 6
861e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $acc_hb
862e1051a39Sopenharmony_ci
863e1051a39Sopenharmony_ci	aese    $ctr0b, $rk7  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 7
864e1051a39Sopenharmony_ci
865e1051a39Sopenharmony_ci	aese    $ctr2b, $rk7  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 7
866e1051a39Sopenharmony_ci
867e1051a39Sopenharmony_ci	aese    $ctr3b, $rk7  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 7
868e1051a39Sopenharmony_ci
869e1051a39Sopenharmony_ci	pmull   $t1.1q, $acc_m.1d, $mod_constant.1d
870e1051a39Sopenharmony_ci
871e1051a39Sopenharmony_ci	aese    $ctr1b, $rk7  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 7
872e1051a39Sopenharmony_ci	ext     $acc_mb, $acc_mb, $acc_mb, #8
873e1051a39Sopenharmony_ci
874e1051a39Sopenharmony_ci	aese    $ctr3b, $rk8  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 8
875e1051a39Sopenharmony_ci
876e1051a39Sopenharmony_ci	aese    $ctr0b, $rk8  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 8
877e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $t1.16b
878e1051a39Sopenharmony_ci
879e1051a39Sopenharmony_ci	aese    $ctr1b, $rk8  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 8
880e1051a39Sopenharmony_ci
881e1051a39Sopenharmony_ci	aese    $ctr3b, $rk9                                      @ AES block 4k+7 - round 9
882e1051a39Sopenharmony_ci
883e1051a39Sopenharmony_ci	aese    $ctr2b, $rk8  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 8
884e1051a39Sopenharmony_ci
885e1051a39Sopenharmony_ci	aese    $ctr0b, $rk9                                      @ AES block 4k+4 - round 9
886e1051a39Sopenharmony_ci
887e1051a39Sopenharmony_ci	aese    $ctr1b, $rk9                                      @ AES block 4k+5 - round 9
888e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $acc_mb
889e1051a39Sopenharmony_ci
890e1051a39Sopenharmony_ci	aese    $ctr2b, $rk9                                      @ AES block 4k+6 - round 9
891e1051a39Sopenharmony_ci	.L128_enc_tail:                                           @ TAIL
892e1051a39Sopenharmony_ci
893e1051a39Sopenharmony_ci	sub     $main_end_input_ptr, $end_input_ptr, $input_ptr   @ main_end_input_ptr is number of bytes left to process
894e1051a39Sopenharmony_ci	ldp     $input_l0, $input_h0, [$input_ptr], #16           @ AES block 4k+4 - load plaintext
895e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
896e1051a39Sopenharmony_ci	rev     $input_l0, $input_l0
897e1051a39Sopenharmony_ci	rev     $input_h0, $input_h0
898e1051a39Sopenharmony_ci#endif
899e1051a39Sopenharmony_ci	cmp     $main_end_input_ptr, #48
900e1051a39Sopenharmony_ci
901e1051a39Sopenharmony_ci	ext     $t0.16b, $acc_lb, $acc_lb, #8                     @ prepare final partial tag
902e1051a39Sopenharmony_ci	eor     $input_l0, $input_l0, $rk10_l                     @ AES block 4k+4 - round 10 low
903e1051a39Sopenharmony_ci	eor     $input_h0, $input_h0, $rk10_h                     @ AES block 4k+4 - round 10 high
904e1051a39Sopenharmony_ci
905e1051a39Sopenharmony_ci	fmov    $ctr_t0d, $input_l0                               @ AES block 4k+4 - mov low
906e1051a39Sopenharmony_ci
907e1051a39Sopenharmony_ci	fmov    $ctr_t0.d[1], $input_h0                           @ AES block 4k+4 - mov high
908e1051a39Sopenharmony_ci
909e1051a39Sopenharmony_ci	eor     $res1b, $ctr_t0b, $ctr0b                          @ AES block 4k+4 - result
910e1051a39Sopenharmony_ci
911e1051a39Sopenharmony_ci	b.gt    .L128_enc_blocks_more_than_3
912e1051a39Sopenharmony_ci
913e1051a39Sopenharmony_ci	sub     $rctr32w, $rctr32w, #1
914e1051a39Sopenharmony_ci	movi    $acc_l.8b, #0
915e1051a39Sopenharmony_ci	mov     $ctr3b, $ctr2b
916e1051a39Sopenharmony_ci
917e1051a39Sopenharmony_ci	cmp     $main_end_input_ptr, #32
918e1051a39Sopenharmony_ci	mov     $ctr2b, $ctr1b
919e1051a39Sopenharmony_ci	movi    $acc_h.8b, #0
920e1051a39Sopenharmony_ci
921e1051a39Sopenharmony_ci	movi    $acc_m.8b, #0
922e1051a39Sopenharmony_ci	b.gt    .L128_enc_blocks_more_than_2
923e1051a39Sopenharmony_ci
924e1051a39Sopenharmony_ci	mov     $ctr3b, $ctr1b
925e1051a39Sopenharmony_ci	cmp     $main_end_input_ptr, #16
926e1051a39Sopenharmony_ci
927e1051a39Sopenharmony_ci	sub     $rctr32w, $rctr32w, #1
928e1051a39Sopenharmony_ci	b.gt    .L128_enc_blocks_more_than_1
929e1051a39Sopenharmony_ci
930e1051a39Sopenharmony_ci	sub     $rctr32w, $rctr32w, #1
931e1051a39Sopenharmony_ci	b       .L128_enc_blocks_less_than_1
932e1051a39Sopenharmony_ci	.L128_enc_blocks_more_than_3:                             @ blocks left >  3
933e1051a39Sopenharmony_ci	st1     { $res1b}, [$output_ptr], #16                     @ AES final-3 block  - store result
934e1051a39Sopenharmony_ci
935e1051a39Sopenharmony_ci	ldp     $input_l0, $input_h0, [$input_ptr], #16           @ AES final-2 block - load input low & high
936e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
937e1051a39Sopenharmony_ci	rev     $input_l0, $input_l0
938e1051a39Sopenharmony_ci	rev     $input_h0, $input_h0
939e1051a39Sopenharmony_ci#endif
940e1051a39Sopenharmony_ci	rev64   $res0b, $res1b                                    @ GHASH final-3 block
941e1051a39Sopenharmony_ci
942e1051a39Sopenharmony_ci	eor     $res0b, $res0b, $t0.16b                           @ feed in partial tag
943e1051a39Sopenharmony_ci	eor     $input_h0, $input_h0, $rk10_h                     @ AES final-2 block - round 10 high
944e1051a39Sopenharmony_ci	eor     $input_l0, $input_l0, $rk10_l                     @ AES final-2 block - round 10 low
945e1051a39Sopenharmony_ci
946e1051a39Sopenharmony_ci	fmov    $res1d, $input_l0                                 @ AES final-2 block - mov low
947e1051a39Sopenharmony_ci
948e1051a39Sopenharmony_ci	movi    $t0.8b, #0                                        @ suppress further partial tag feed in
949e1051a39Sopenharmony_ci	fmov    $res1.d[1], $input_h0                             @ AES final-2 block - mov high
950e1051a39Sopenharmony_ci
951e1051a39Sopenharmony_ci	pmull   $acc_l.1q, $res0.1d, $h4.1d                       @ GHASH final-3 block - low
952e1051a39Sopenharmony_ci	mov     $rk4d, $res0.d[1]                                 @ GHASH final-3 block - mid
953e1051a39Sopenharmony_ci
954e1051a39Sopenharmony_ci	pmull2  $acc_h.1q, $res0.2d, $h4.2d                       @ GHASH final-3 block - high
955e1051a39Sopenharmony_ci
956e1051a39Sopenharmony_ci	mov     $acc_md, $h34k.d[1]                               @ GHASH final-3 block - mid
957e1051a39Sopenharmony_ci
958e1051a39Sopenharmony_ci	eor     $res1b, $res1b, $ctr1b                            @ AES final-2 block - result
959e1051a39Sopenharmony_ci	eor     $rk4v.8b, $rk4v.8b, $res0.8b                      @ GHASH final-3 block - mid
960e1051a39Sopenharmony_ci
961e1051a39Sopenharmony_ci	pmull   $acc_m.1q, $rk4v.1d, $acc_m.1d                    @ GHASH final-3 block - mid
962e1051a39Sopenharmony_ci	.L128_enc_blocks_more_than_2:                             @ blocks left >  2
963e1051a39Sopenharmony_ci
964e1051a39Sopenharmony_ci	st1     { $res1b}, [$output_ptr], #16                     @ AES final-2 block - store result
965e1051a39Sopenharmony_ci
966e1051a39Sopenharmony_ci	rev64   $res0b, $res1b                                    @ GHASH final-2 block
967e1051a39Sopenharmony_ci	ldp     $input_l0, $input_h0, [$input_ptr], #16           @ AES final-1 block - load input low & high
968e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
969e1051a39Sopenharmony_ci	rev     $input_l0, $input_l0
970e1051a39Sopenharmony_ci	rev     $input_h0, $input_h0
971e1051a39Sopenharmony_ci#endif
972e1051a39Sopenharmony_ci	eor     $res0b, $res0b, $t0.16b                           @ feed in partial tag
973e1051a39Sopenharmony_ci
974e1051a39Sopenharmony_ci	eor     $input_l0, $input_l0, $rk10_l                     @ AES final-1 block - round 10 low
975e1051a39Sopenharmony_ci
976e1051a39Sopenharmony_ci	fmov    $res1d, $input_l0                                 @ AES final-1 block - mov low
977e1051a39Sopenharmony_ci	eor     $input_h0, $input_h0, $rk10_h                     @ AES final-1 block - round 10 high
978e1051a39Sopenharmony_ci
979e1051a39Sopenharmony_ci	pmull2  $rk2q1, $res0.2d, $h3.2d                          @ GHASH final-2 block - high
980e1051a39Sopenharmony_ci	fmov    $res1.d[1], $input_h0                             @ AES final-1 block - mov high
981e1051a39Sopenharmony_ci
982e1051a39Sopenharmony_ci	mov     $rk4d, $res0.d[1]                                 @ GHASH final-2 block - mid
983e1051a39Sopenharmony_ci
984e1051a39Sopenharmony_ci	pmull   $rk3q1, $res0.1d, $h3.1d                          @ GHASH final-2 block - low
985e1051a39Sopenharmony_ci
986e1051a39Sopenharmony_ci	eor     $acc_hb, $acc_hb, $rk2                            @ GHASH final-2 block - high
987e1051a39Sopenharmony_ci
988e1051a39Sopenharmony_ci	eor     $rk4v.8b, $rk4v.8b, $res0.8b                      @ GHASH final-2 block - mid
989e1051a39Sopenharmony_ci
990e1051a39Sopenharmony_ci	eor     $res1b, $res1b, $ctr2b                            @ AES final-1 block - result
991e1051a39Sopenharmony_ci
992e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $rk3                            @ GHASH final-2 block - low
993e1051a39Sopenharmony_ci
994e1051a39Sopenharmony_ci	pmull   $rk4v.1q, $rk4v.1d, $h34k.1d                      @ GHASH final-2 block - mid
995e1051a39Sopenharmony_ci
996e1051a39Sopenharmony_ci	movi    $t0.8b, #0                                        @ suppress further partial tag feed in
997e1051a39Sopenharmony_ci
998e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $rk4v.16b                       @ GHASH final-2 block - mid
999e1051a39Sopenharmony_ci	.L128_enc_blocks_more_than_1:                             @ blocks left >  1
1000e1051a39Sopenharmony_ci
1001e1051a39Sopenharmony_ci	st1     { $res1b}, [$output_ptr], #16                     @ AES final-1 block - store result
1002e1051a39Sopenharmony_ci
1003e1051a39Sopenharmony_ci	rev64   $res0b, $res1b                                    @ GHASH final-1 block
1004e1051a39Sopenharmony_ci	ldp     $input_l0, $input_h0, [$input_ptr], #16           @ AES final block - load input low & high
1005e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
1006e1051a39Sopenharmony_ci	rev     $input_l0, $input_l0
1007e1051a39Sopenharmony_ci	rev     $input_h0, $input_h0
1008e1051a39Sopenharmony_ci#endif
1009e1051a39Sopenharmony_ci	eor     $res0b, $res0b, $t0.16b                           @ feed in partial tag
1010e1051a39Sopenharmony_ci
1011e1051a39Sopenharmony_ci	eor     $input_h0, $input_h0, $rk10_h                     @ AES final block - round 10 high
1012e1051a39Sopenharmony_ci	eor     $input_l0, $input_l0, $rk10_l                     @ AES final block - round 10 low
1013e1051a39Sopenharmony_ci
1014e1051a39Sopenharmony_ci	fmov    $res1d, $input_l0                                 @ AES final block - mov low
1015e1051a39Sopenharmony_ci
1016e1051a39Sopenharmony_ci	pmull2  $rk2q1, $res0.2d, $h2.2d                          @ GHASH final-1 block - high
1017e1051a39Sopenharmony_ci	fmov    $res1.d[1], $input_h0                             @ AES final block - mov high
1018e1051a39Sopenharmony_ci
1019e1051a39Sopenharmony_ci	mov     $rk4d, $res0.d[1]                                 @ GHASH final-1 block - mid
1020e1051a39Sopenharmony_ci
1021e1051a39Sopenharmony_ci	pmull   $rk3q1, $res0.1d, $h2.1d                          @ GHASH final-1 block - low
1022e1051a39Sopenharmony_ci
1023e1051a39Sopenharmony_ci	eor     $rk4v.8b, $rk4v.8b, $res0.8b                      @ GHASH final-1 block - mid
1024e1051a39Sopenharmony_ci
1025e1051a39Sopenharmony_ci	eor     $res1b, $res1b, $ctr3b                            @ AES final block - result
1026e1051a39Sopenharmony_ci
1027e1051a39Sopenharmony_ci	ins     $rk4v.d[1], $rk4v.d[0]                            @ GHASH final-1 block - mid
1028e1051a39Sopenharmony_ci
1029e1051a39Sopenharmony_ci	pmull2  $rk4v.1q, $rk4v.2d, $h12k.2d                      @ GHASH final-1 block - mid
1030e1051a39Sopenharmony_ci
1031e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $rk3                            @ GHASH final-1 block - low
1032e1051a39Sopenharmony_ci
1033e1051a39Sopenharmony_ci	eor     $acc_hb, $acc_hb, $rk2                            @ GHASH final-1 block - high
1034e1051a39Sopenharmony_ci
1035e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $rk4v.16b                       @ GHASH final-1 block - mid
1036e1051a39Sopenharmony_ci	movi    $t0.8b, #0                                        @ suppress further partial tag feed in
1037e1051a39Sopenharmony_ci	.L128_enc_blocks_less_than_1:                             @ blocks left <= 1
1038e1051a39Sopenharmony_ci
1039e1051a39Sopenharmony_ci	and     $bit_length, $bit_length, #127                    @ bit_length %= 128
1040e1051a39Sopenharmony_ci	mvn     $rk10_l, xzr                                      @ rk10_l = 0xffffffffffffffff
1041e1051a39Sopenharmony_ci
1042e1051a39Sopenharmony_ci	mvn     $rk10_h, xzr                                      @ rk10_h = 0xffffffffffffffff
1043e1051a39Sopenharmony_ci	sub     $bit_length, $bit_length, #128                    @ bit_length -= 128
1044e1051a39Sopenharmony_ci
1045e1051a39Sopenharmony_ci	neg     $bit_length, $bit_length                          @ bit_length = 128 - #bits in input (in range [1,128])
1046e1051a39Sopenharmony_ci
1047e1051a39Sopenharmony_ci	and     $bit_length, $bit_length, #127                    @ bit_length %= 128
1048e1051a39Sopenharmony_ci
1049e1051a39Sopenharmony_ci	lsr     $rk10_h, $rk10_h, $bit_length                     @ rk10_h is mask for top 64b of last block
1050e1051a39Sopenharmony_ci	cmp     $bit_length, #64
1051e1051a39Sopenharmony_ci
1052e1051a39Sopenharmony_ci	csel    $input_l0, $rk10_l, $rk10_h, lt
1053e1051a39Sopenharmony_ci	csel    $input_h0, $rk10_h, xzr, lt
1054e1051a39Sopenharmony_ci
1055e1051a39Sopenharmony_ci	fmov    $ctr0d, $input_l0                                 @ ctr0b is mask for last block
1056e1051a39Sopenharmony_ci
1057e1051a39Sopenharmony_ci	fmov    $ctr0.d[1], $input_h0
1058e1051a39Sopenharmony_ci
1059e1051a39Sopenharmony_ci	and     $res1b, $res1b, $ctr0b                            @ possibly partial last block has zeroes in highest bits
1060e1051a39Sopenharmony_ci
1061e1051a39Sopenharmony_ci	rev64   $res0b, $res1b                                    @ GHASH final block
1062e1051a39Sopenharmony_ci
1063e1051a39Sopenharmony_ci	eor     $res0b, $res0b, $t0.16b                           @ feed in partial tag
1064e1051a39Sopenharmony_ci
1065e1051a39Sopenharmony_ci	mov     $t0d, $res0.d[1]                                  @ GHASH final block - mid
1066e1051a39Sopenharmony_ci
1067e1051a39Sopenharmony_ci	pmull   $rk3q1, $res0.1d, $h1.1d                          @ GHASH final block - low
1068e1051a39Sopenharmony_ci	ld1     { $rk0}, [$output_ptr]                            @ load existing bytes where the possibly partial last block is to be stored
1069e1051a39Sopenharmony_ci
1070e1051a39Sopenharmony_ci	eor     $t0.8b, $t0.8b, $res0.8b                          @ GHASH final block - mid
1071e1051a39Sopenharmony_ci#ifndef __AARCH64EB__
1072e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w
1073e1051a39Sopenharmony_ci#else
1074e1051a39Sopenharmony_ci	mov     $ctr32w, $rctr32w
1075e1051a39Sopenharmony_ci#endif
1076e1051a39Sopenharmony_ci	pmull2  $rk2q1, $res0.2d, $h1.2d                          @ GHASH final block - high
1077e1051a39Sopenharmony_ci
1078e1051a39Sopenharmony_ci	pmull   $t0.1q, $t0.1d, $h12k.1d                          @ GHASH final block - mid
1079e1051a39Sopenharmony_ci
1080e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $rk3                            @ GHASH final block - low
1081e1051a39Sopenharmony_ci
1082e1051a39Sopenharmony_ci	eor     $acc_hb, $acc_hb, $rk2                            @ GHASH final block - high
1083e1051a39Sopenharmony_ci
1084e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t0.16b                         @ GHASH final block - mid
1085e1051a39Sopenharmony_ci	movi    $mod_constant.8b, #0xc2
1086e1051a39Sopenharmony_ci
1087e1051a39Sopenharmony_ci	eor     $t9.16b, $acc_lb, $acc_hb                         @ MODULO - karatsuba tidy up
1088e1051a39Sopenharmony_ci
1089e1051a39Sopenharmony_ci	shl     $mod_constantd, $mod_constantd, #56               @ mod_constant
1090e1051a39Sopenharmony_ci
1091e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t9.16b                         @ MODULO - karatsuba tidy up
1092e1051a39Sopenharmony_ci
1093e1051a39Sopenharmony_ci	pmull   $mod_t.1q, $acc_h.1d, $mod_constant.1d            @ MODULO - top 64b align with mid
1094e1051a39Sopenharmony_ci
1095e1051a39Sopenharmony_ci	ext     $acc_hb, $acc_hb, $acc_hb, #8                     @ MODULO - other top alignment
1096e1051a39Sopenharmony_ci
1097e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $mod_t.16b                      @ MODULO - fold into mid
1098e1051a39Sopenharmony_ci
1099e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $acc_hb                         @ MODULO - fold into mid
1100e1051a39Sopenharmony_ci
1101e1051a39Sopenharmony_ci	pmull   $acc_h.1q, $acc_m.1d, $mod_constant.1d            @ MODULO - mid 64b align with low
1102e1051a39Sopenharmony_ci
1103e1051a39Sopenharmony_ci	ext     $acc_mb, $acc_mb, $acc_mb, #8                     @ MODULO - other mid alignment
1104e1051a39Sopenharmony_ci
1105e1051a39Sopenharmony_ci	bif     $res1b, $rk0, $ctr0b                              @ insert existing bytes in top end of result before storing
1106e1051a39Sopenharmony_ci
1107e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $acc_hb                         @ MODULO - fold into low
1108e1051a39Sopenharmony_ci	st1     { $res1b}, [$output_ptr]                          @ store all 16B
1109e1051a39Sopenharmony_ci
1110e1051a39Sopenharmony_ci	str     $ctr32w, [$counter, #12]                          @ store the updated counter
1111e1051a39Sopenharmony_ci
1112e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $acc_mb                         @ MODULO - fold into low
1113e1051a39Sopenharmony_ci	ext     $acc_lb, $acc_lb, $acc_lb, #8
1114e1051a39Sopenharmony_ci	rev64   $acc_lb, $acc_lb
1115e1051a39Sopenharmony_ci	mov     x0, $len
1116e1051a39Sopenharmony_ci	st1     { $acc_l.16b }, [$current_tag]
1117e1051a39Sopenharmony_ci	ldp     x21, x22, [sp, #16]
1118e1051a39Sopenharmony_ci	ldp     x23, x24, [sp, #32]
1119e1051a39Sopenharmony_ci	ldp     d8, d9, [sp, #48]
1120e1051a39Sopenharmony_ci	ldp     d10, d11, [sp, #64]
1121e1051a39Sopenharmony_ci	ldp     d12, d13, [sp, #80]
1122e1051a39Sopenharmony_ci	ldp     d14, d15, [sp, #96]
1123e1051a39Sopenharmony_ci	ldp     x19, x20, [sp], #112
1124e1051a39Sopenharmony_ci	ret
1125e1051a39Sopenharmony_ci
1126e1051a39Sopenharmony_ci.L128_enc_ret:
1127e1051a39Sopenharmony_ci	mov w0, #0x0
1128e1051a39Sopenharmony_ci	ret
1129e1051a39Sopenharmony_ci.size aes_gcm_enc_128_kernel,.-aes_gcm_enc_128_kernel
1130e1051a39Sopenharmony_ci___
1131e1051a39Sopenharmony_ci
1132e1051a39Sopenharmony_ci#########################################################################################
1133e1051a39Sopenharmony_ci# size_t aes_gcm_dec_128_kernel(const unsigned char *in,
1134e1051a39Sopenharmony_ci#                               size_t len,
1135e1051a39Sopenharmony_ci#                               unsigned char *out,
1136e1051a39Sopenharmony_ci#                               const void *key,
1137e1051a39Sopenharmony_ci#                               unsigned char ivec[16],
1138e1051a39Sopenharmony_ci#                               u64 *Xi);
1139e1051a39Sopenharmony_ci#
1140e1051a39Sopenharmony_ci$code.=<<___;
1141e1051a39Sopenharmony_ci.global aes_gcm_dec_128_kernel
1142e1051a39Sopenharmony_ci.type   aes_gcm_dec_128_kernel,%function
1143e1051a39Sopenharmony_ci.align  4
1144e1051a39Sopenharmony_ciaes_gcm_dec_128_kernel:
1145e1051a39Sopenharmony_ci	cbz     x1, .L128_dec_ret
1146e1051a39Sopenharmony_ci	stp     x19, x20, [sp, #-112]!
1147e1051a39Sopenharmony_ci	mov     x16, x4
1148e1051a39Sopenharmony_ci	mov     x8, x5
1149e1051a39Sopenharmony_ci	stp     x21, x22, [sp, #16]
1150e1051a39Sopenharmony_ci	stp     x23, x24, [sp, #32]
1151e1051a39Sopenharmony_ci	stp     d8, d9, [sp, #48]
1152e1051a39Sopenharmony_ci	stp     d10, d11, [sp, #64]
1153e1051a39Sopenharmony_ci	stp     d12, d13, [sp, #80]
1154e1051a39Sopenharmony_ci	stp     d14, d15, [sp, #96]
1155e1051a39Sopenharmony_ci
1156e1051a39Sopenharmony_ci	lsr     $main_end_input_ptr, $bit_length, #3              @ byte_len
1157e1051a39Sopenharmony_ci	mov     $len, $main_end_input_ptr
1158e1051a39Sopenharmony_ci	ldp     $ctr96_b64x, $ctr96_t32x, [$counter]              @ ctr96_b64, ctr96_t32
1159e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
1160e1051a39Sopenharmony_ci	rev     $ctr96_b64x, $ctr96_b64x
1161e1051a39Sopenharmony_ci	rev     $ctr96_t32x, $ctr96_t32x
1162e1051a39Sopenharmony_ci#endif
1163e1051a39Sopenharmony_ci	ldp     $rk10_l, $rk10_h, [$cc, #160]                     @ load rk10
1164e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
1165e1051a39Sopenharmony_ci	ror     $rk10_h, $rk10_h, 32
1166e1051a39Sopenharmony_ci	ror     $rk10_l, $rk10_l, 32
1167e1051a39Sopenharmony_ci#endif
1168e1051a39Sopenharmony_ci	sub     $main_end_input_ptr, $main_end_input_ptr, #1      @ byte_len - 1
1169e1051a39Sopenharmony_ci	ld1     {$rk0s}, [$cc], #16                                @ load rk0
1170e1051a39Sopenharmony_ci
1171e1051a39Sopenharmony_ci	and     $main_end_input_ptr, $main_end_input_ptr, #0xffffffffffffffc0 @ number of bytes to be processed in main loop (at least 1 byte must be handled by tail)
1172e1051a39Sopenharmony_ci	ld1     { $ctr0b}, [$counter]                             @ special case vector load initial counter so we can start first AES block as quickly as possible
1173e1051a39Sopenharmony_ci
1174e1051a39Sopenharmony_ci	ldr     $h2q, [$current_tag, #64]                         @ load h2l | h2h
1175e1051a39Sopenharmony_ci#ifndef __AARCH64EB__
1176e1051a39Sopenharmony_ci	ext     $h2b, $h2b, $h2b, #8
1177e1051a39Sopenharmony_ci#endif
1178e1051a39Sopenharmony_ci	lsr     $rctr32x, $ctr96_t32x, #32
1179e1051a39Sopenharmony_ci	fmov    $ctr2d, $ctr96_b64x                               @ CTR block 2
1180e1051a39Sopenharmony_ci
1181e1051a39Sopenharmony_ci	ld1     {$rk1s}, [$cc], #16                                @ load rk1
1182e1051a39Sopenharmony_ci	orr     $ctr96_t32w, $ctr96_t32w, $ctr96_t32w
1183e1051a39Sopenharmony_ci	rev     $rctr32w, $rctr32w                                @ rev_ctr32
1184e1051a39Sopenharmony_ci
1185e1051a39Sopenharmony_ci	fmov    $ctr1d, $ctr96_b64x                               @ CTR block 1
1186e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ increment rev_ctr32
1187e1051a39Sopenharmony_ci
1188e1051a39Sopenharmony_ci	aese    $ctr0b, $rk0  \n  aesmc   $ctr0b, $ctr0b          @ AES block 0 - round 0
1189e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                 @ CTR block 1
1190e1051a39Sopenharmony_ci
1191e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32            @ CTR block 1
1192e1051a39Sopenharmony_ci	ld1     {$rk2s}, [$cc], #16                                @ load rk2
1193e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ CTR block 1
1194e1051a39Sopenharmony_ci
1195e1051a39Sopenharmony_ci	fmov    $ctr1.d[1], $ctr32x                               @ CTR block 1
1196e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                 @ CTR block 2
1197e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ CTR block 2
1198e1051a39Sopenharmony_ci
1199e1051a39Sopenharmony_ci	aese    $ctr0b, $rk1  \n  aesmc   $ctr0b, $ctr0b          @ AES block 0 - round 1
1200e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32            @ CTR block 2
1201e1051a39Sopenharmony_ci
1202e1051a39Sopenharmony_ci	fmov    $ctr2.d[1], $ctr32x                               @ CTR block 2
1203e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                 @ CTR block 3
1204e1051a39Sopenharmony_ci
1205e1051a39Sopenharmony_ci	fmov    $ctr3d, $ctr96_b64x                               @ CTR block 3
1206e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32            @ CTR block 3
1207e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ CTR block 3
1208e1051a39Sopenharmony_ci
1209e1051a39Sopenharmony_ci	fmov    $ctr3.d[1], $ctr32x                               @ CTR block 3
1210e1051a39Sopenharmony_ci	add     $end_input_ptr, $input_ptr, $bit_length, lsr #3   @ end_input_ptr
1211e1051a39Sopenharmony_ci
1212e1051a39Sopenharmony_ci	aese    $ctr1b, $rk0  \n  aesmc   $ctr1b, $ctr1b          @ AES block 1 - round 0
1213e1051a39Sopenharmony_ci	ld1     {$rk3s}, [$cc], #16                                @ load rk3
1214e1051a39Sopenharmony_ci
1215e1051a39Sopenharmony_ci	aese    $ctr0b, $rk2  \n  aesmc   $ctr0b, $ctr0b          @ AES block 0 - round 2
1216e1051a39Sopenharmony_ci	ld1     {$rk4s}, [$cc], #16                                @ load rk4
1217e1051a39Sopenharmony_ci
1218e1051a39Sopenharmony_ci	aese    $ctr2b, $rk0  \n  aesmc   $ctr2b, $ctr2b          @ AES block 2 - round 0
1219e1051a39Sopenharmony_ci	ld1     {$rk5s}, [$cc], #16                                @ load rk5
1220e1051a39Sopenharmony_ci
1221e1051a39Sopenharmony_ci	aese    $ctr1b, $rk1  \n  aesmc   $ctr1b, $ctr1b          @ AES block 1 - round 1
1222e1051a39Sopenharmony_ci	ld1     {$rk6s}, [$cc], #16                                @ load rk6
1223e1051a39Sopenharmony_ci
1224e1051a39Sopenharmony_ci	aese    $ctr3b, $rk0  \n  aesmc   $ctr3b, $ctr3b          @ AES block 3 - round 0
1225e1051a39Sopenharmony_ci
1226e1051a39Sopenharmony_ci	aese    $ctr2b, $rk1  \n  aesmc   $ctr2b, $ctr2b          @ AES block 2 - round 1
1227e1051a39Sopenharmony_ci
1228e1051a39Sopenharmony_ci	aese    $ctr1b, $rk2  \n  aesmc   $ctr1b, $ctr1b          @ AES block 1 - round 2
1229e1051a39Sopenharmony_ci
1230e1051a39Sopenharmony_ci	aese    $ctr3b, $rk1  \n  aesmc   $ctr3b, $ctr3b          @ AES block 3 - round 1
1231e1051a39Sopenharmony_ci	ld1     { $acc_lb}, [$current_tag]
1232e1051a39Sopenharmony_ci	ext     $acc_lb, $acc_lb, $acc_lb, #8
1233e1051a39Sopenharmony_ci	rev64   $acc_lb, $acc_lb
1234e1051a39Sopenharmony_ci
1235e1051a39Sopenharmony_ci	aese    $ctr0b, $rk3  \n  aesmc   $ctr0b, $ctr0b          @ AES block 0 - round 3
1236e1051a39Sopenharmony_ci	ld1     {$rk7s}, [$cc], #16                                @ load rk7
1237e1051a39Sopenharmony_ci
1238e1051a39Sopenharmony_ci	aese    $ctr1b, $rk3  \n  aesmc   $ctr1b, $ctr1b          @ AES block 1 - round 3
1239e1051a39Sopenharmony_ci
1240e1051a39Sopenharmony_ci	aese    $ctr3b, $rk2  \n  aesmc   $ctr3b, $ctr3b          @ AES block 3 - round 2
1241e1051a39Sopenharmony_ci
1242e1051a39Sopenharmony_ci	aese    $ctr2b, $rk2  \n  aesmc   $ctr2b, $ctr2b          @ AES block 2 - round 2
1243e1051a39Sopenharmony_ci	ld1     {$rk8s}, [$cc], #16                                @ load rk8
1244e1051a39Sopenharmony_ci
1245e1051a39Sopenharmony_ci	aese    $ctr1b, $rk4  \n  aesmc   $ctr1b, $ctr1b          @ AES block 1 - round 4
1246e1051a39Sopenharmony_ci
1247e1051a39Sopenharmony_ci	aese    $ctr3b, $rk3  \n  aesmc   $ctr3b, $ctr3b          @ AES block 3 - round 3
1248e1051a39Sopenharmony_ci
1249e1051a39Sopenharmony_ci	aese    $ctr2b, $rk3  \n  aesmc   $ctr2b, $ctr2b          @ AES block 2 - round 3
1250e1051a39Sopenharmony_ci	ldr     $h3q, [$current_tag, #80]                         @ load h3l | h3h
1251e1051a39Sopenharmony_ci#ifndef __AARCH64EB__
1252e1051a39Sopenharmony_ci	ext     $h3b, $h3b, $h3b, #8
1253e1051a39Sopenharmony_ci#endif
1254e1051a39Sopenharmony_ci	aese    $ctr0b, $rk4  \n  aesmc   $ctr0b, $ctr0b          @ AES block 0 - round 4
1255e1051a39Sopenharmony_ci	ld1     {$rk9s}, [$cc], #16                                @ load rk9
1256e1051a39Sopenharmony_ci
1257e1051a39Sopenharmony_ci	aese    $ctr1b, $rk5  \n  aesmc   $ctr1b, $ctr1b          @ AES block 1 - round 5
1258e1051a39Sopenharmony_ci
1259e1051a39Sopenharmony_ci	aese    $ctr2b, $rk4  \n  aesmc   $ctr2b, $ctr2b          @ AES block 2 - round 4
1260e1051a39Sopenharmony_ci
1261e1051a39Sopenharmony_ci	aese    $ctr3b, $rk4  \n  aesmc   $ctr3b, $ctr3b          @ AES block 3 - round 4
1262e1051a39Sopenharmony_ci
1263e1051a39Sopenharmony_ci	aese    $ctr0b, $rk5  \n  aesmc   $ctr0b, $ctr0b          @ AES block 0 - round 5
1264e1051a39Sopenharmony_ci
1265e1051a39Sopenharmony_ci	aese    $ctr2b, $rk5  \n  aesmc   $ctr2b, $ctr2b          @ AES block 2 - round 5
1266e1051a39Sopenharmony_ci	ldr     $h1q, [$current_tag, #32]                         @ load h1l | h1h
1267e1051a39Sopenharmony_ci#ifndef __AARCH64EB__
1268e1051a39Sopenharmony_ci	ext     $h1b, $h1b, $h1b, #8
1269e1051a39Sopenharmony_ci#endif
1270e1051a39Sopenharmony_ci	aese    $ctr3b, $rk5  \n  aesmc   $ctr3b, $ctr3b          @ AES block 3 - round 5
1271e1051a39Sopenharmony_ci
1272e1051a39Sopenharmony_ci	aese    $ctr0b, $rk6  \n  aesmc   $ctr0b, $ctr0b          @ AES block 0 - round 6
1273e1051a39Sopenharmony_ci
1274e1051a39Sopenharmony_ci	aese    $ctr1b, $rk6  \n  aesmc   $ctr1b, $ctr1b          @ AES block 1 - round 6
1275e1051a39Sopenharmony_ci
1276e1051a39Sopenharmony_ci	aese    $ctr3b, $rk6  \n  aesmc   $ctr3b, $ctr3b          @ AES block 3 - round 6
1277e1051a39Sopenharmony_ci
1278e1051a39Sopenharmony_ci	aese    $ctr2b, $rk6  \n  aesmc   $ctr2b, $ctr2b          @ AES block 2 - round 6
1279e1051a39Sopenharmony_ci	trn1    $t0.2d,    $h1.2d,    $h2.2d                      @ h2h | h1h
1280e1051a39Sopenharmony_ci
1281e1051a39Sopenharmony_ci	ldr     $h4q, [$current_tag, #112]                        @ load h4l | h4h
1282e1051a39Sopenharmony_ci#ifndef __AARCH64EB__
1283e1051a39Sopenharmony_ci	ext     $h4b, $h4b, $h4b, #8
1284e1051a39Sopenharmony_ci#endif
1285e1051a39Sopenharmony_ci	trn2    $h12k.2d,  $h1.2d,    $h2.2d                      @ h2l | h1l
1286e1051a39Sopenharmony_ci	add     $main_end_input_ptr, $main_end_input_ptr, $input_ptr
1287e1051a39Sopenharmony_ci
1288e1051a39Sopenharmony_ci	aese    $ctr1b, $rk7  \n  aesmc   $ctr1b, $ctr1b          @ AES block 1 - round 7
1289e1051a39Sopenharmony_ci
1290e1051a39Sopenharmony_ci	aese    $ctr2b, $rk7  \n  aesmc   $ctr2b, $ctr2b          @ AES block 2 - round 7
1291e1051a39Sopenharmony_ci
1292e1051a39Sopenharmony_ci	aese    $ctr0b, $rk7  \n  aesmc   $ctr0b, $ctr0b          @ AES block 0 - round 7
1293e1051a39Sopenharmony_ci	eor     $h12k.16b, $h12k.16b, $t0.16b                     @ h2k | h1k
1294e1051a39Sopenharmony_ci
1295e1051a39Sopenharmony_ci	aese    $ctr3b, $rk7  \n  aesmc   $ctr3b, $ctr3b          @ AES block 3 - round 7
1296e1051a39Sopenharmony_ci
1297e1051a39Sopenharmony_ci	aese    $ctr1b, $rk8  \n  aesmc   $ctr1b, $ctr1b          @ AES block 1 - round 8
1298e1051a39Sopenharmony_ci	trn2    $h34k.2d,  $h3.2d,    $h4.2d                      @ h4l | h3l
1299e1051a39Sopenharmony_ci
1300e1051a39Sopenharmony_ci	aese    $ctr2b, $rk8  \n  aesmc   $ctr2b, $ctr2b          @ AES block 2 - round 8
1301e1051a39Sopenharmony_ci
1302e1051a39Sopenharmony_ci	aese    $ctr3b, $rk8  \n  aesmc   $ctr3b, $ctr3b          @ AES block 3 - round 8
1303e1051a39Sopenharmony_ci
1304e1051a39Sopenharmony_ci	aese    $ctr0b, $rk8  \n  aesmc   $ctr0b, $ctr0b          @ AES block 0 - round 8
1305e1051a39Sopenharmony_ci	trn1    $acc_h.2d, $h3.2d,    $h4.2d                      @ h4h | h3h
1306e1051a39Sopenharmony_ci
1307e1051a39Sopenharmony_ci	aese    $ctr2b, $rk9                                      @ AES block 2 - round 9
1308e1051a39Sopenharmony_ci
1309e1051a39Sopenharmony_ci	aese    $ctr3b, $rk9                                      @ AES block 3 - round 9
1310e1051a39Sopenharmony_ci
1311e1051a39Sopenharmony_ci	aese    $ctr0b, $rk9                                      @ AES block 0 - round 9
1312e1051a39Sopenharmony_ci	cmp     $input_ptr, $main_end_input_ptr                   @ check if we have <= 4 blocks
1313e1051a39Sopenharmony_ci
1314e1051a39Sopenharmony_ci	aese    $ctr1b, $rk9                                      @ AES block 1 - round 9
1315e1051a39Sopenharmony_ci	eor     $h34k.16b, $h34k.16b, $acc_h.16b                  @ h4k | h3k
1316e1051a39Sopenharmony_ci	b.ge    .L128_dec_tail                                    @ handle tail
1317e1051a39Sopenharmony_ci
1318e1051a39Sopenharmony_ci	ld1     {$res0b, $res1b}, [$input_ptr], #32               @ AES block 0 - load ciphertext; AES block 1 - load ciphertext
1319e1051a39Sopenharmony_ci
1320e1051a39Sopenharmony_ci	eor     $ctr1b, $res1b, $ctr1b                            @ AES block 1 - result
1321e1051a39Sopenharmony_ci	ld1     {$res2b}, [$input_ptr], #16                       @ AES block 2 - load ciphertext
1322e1051a39Sopenharmony_ci
1323e1051a39Sopenharmony_ci	eor     $ctr0b, $res0b, $ctr0b                            @ AES block 0 - result
1324e1051a39Sopenharmony_ci	rev64   $res0b, $res0b                                    @ GHASH block 0
1325e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                 @ CTR block 4
1326e1051a39Sopenharmony_ci
1327e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32            @ CTR block 4
1328e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ CTR block 4
1329e1051a39Sopenharmony_ci	ld1     {$res3b}, [$input_ptr], #16                       @ AES block 3 - load ciphertext
1330e1051a39Sopenharmony_ci
1331e1051a39Sopenharmony_ci	rev64   $res1b, $res1b                                    @ GHASH block 1
1332e1051a39Sopenharmony_ci	mov     $output_l1, $ctr1.d[0]                            @ AES block 1 - mov low
1333e1051a39Sopenharmony_ci
1334e1051a39Sopenharmony_ci	mov     $output_h1, $ctr1.d[1]                            @ AES block 1 - mov high
1335e1051a39Sopenharmony_ci
1336e1051a39Sopenharmony_ci	mov     $output_l0, $ctr0.d[0]                            @ AES block 0 - mov low
1337e1051a39Sopenharmony_ci	cmp     $input_ptr, $main_end_input_ptr                   @ check if we have <= 8 blocks
1338e1051a39Sopenharmony_ci
1339e1051a39Sopenharmony_ci	mov     $output_h0, $ctr0.d[1]                            @ AES block 0 - mov high
1340e1051a39Sopenharmony_ci
1341e1051a39Sopenharmony_ci	fmov    $ctr0d, $ctr96_b64x                               @ CTR block 4
1342e1051a39Sopenharmony_ci
1343e1051a39Sopenharmony_ci	fmov    $ctr0.d[1], $ctr32x                               @ CTR block 4
1344e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                 @ CTR block 5
1345e1051a39Sopenharmony_ci	eor     $output_l1, $output_l1, $rk10_l                   @ AES block 1 - round 10 low
1346e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
1347e1051a39Sopenharmony_ci	rev     $output_l1, $output_l1
1348e1051a39Sopenharmony_ci#endif
1349e1051a39Sopenharmony_ci	fmov    $ctr1d, $ctr96_b64x                               @ CTR block 5
1350e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ CTR block 5
1351e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32            @ CTR block 5
1352e1051a39Sopenharmony_ci
1353e1051a39Sopenharmony_ci	fmov    $ctr1.d[1], $ctr32x                               @ CTR block 5
1354e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                 @ CTR block 6
1355e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ CTR block 6
1356e1051a39Sopenharmony_ci
1357e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32            @ CTR block 6
1358e1051a39Sopenharmony_ci
1359e1051a39Sopenharmony_ci	eor     $output_h1, $output_h1, $rk10_h                   @ AES block 1 - round 10 high
1360e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
1361e1051a39Sopenharmony_ci	rev     $output_h1, $output_h1
1362e1051a39Sopenharmony_ci#endif
1363e1051a39Sopenharmony_ci	eor     $output_l0, $output_l0, $rk10_l                   @ AES block 0 - round 10 low
1364e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
1365e1051a39Sopenharmony_ci	rev     $output_l0, $output_l0
1366e1051a39Sopenharmony_ci#endif
1367e1051a39Sopenharmony_ci	eor     $ctr2b, $res2b, $ctr2b                            @ AES block 2 - result
1368e1051a39Sopenharmony_ci
1369e1051a39Sopenharmony_ci	eor     $output_h0, $output_h0, $rk10_h                   @ AES block 0 - round 10 high
1370e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
1371e1051a39Sopenharmony_ci	rev     $output_h0, $output_h0
1372e1051a39Sopenharmony_ci#endif
1373e1051a39Sopenharmony_ci	stp     $output_l0, $output_h0, [$output_ptr], #16        @ AES block 0 - store result
1374e1051a39Sopenharmony_ci
1375e1051a39Sopenharmony_ci	stp     $output_l1, $output_h1, [$output_ptr], #16        @ AES block 1 - store result
1376e1051a39Sopenharmony_ci	b.ge    .L128_dec_prepretail                              @ do prepretail
1377e1051a39Sopenharmony_ci
1378e1051a39Sopenharmony_ci	.L128_dec_main_loop:                                      @ main loop start
1379e1051a39Sopenharmony_ci	eor     $ctr3b, $res3b, $ctr3b                            @ AES block 4k+3 - result
1380e1051a39Sopenharmony_ci	ext     $acc_lb, $acc_lb, $acc_lb, #8                     @ PRE 0
1381e1051a39Sopenharmony_ci	mov     $output_l2, $ctr2.d[0]                            @ AES block 4k+2 - mov low
1382e1051a39Sopenharmony_ci
1383e1051a39Sopenharmony_ci	pmull2  $t1.1q, $res1.2d, $h3.2d                          @ GHASH block 4k+1 - high
1384e1051a39Sopenharmony_ci	mov     $output_h2, $ctr2.d[1]                            @ AES block 4k+2 - mov high
1385e1051a39Sopenharmony_ci
1386e1051a39Sopenharmony_ci	aese    $ctr1b, $rk0  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 0
1387e1051a39Sopenharmony_ci	fmov    $ctr2d, $ctr96_b64x                               @ CTR block 4k+6
1388e1051a39Sopenharmony_ci
1389e1051a39Sopenharmony_ci	rev64   $res2b, $res2b                                    @ GHASH block 4k+2
1390e1051a39Sopenharmony_ci	fmov    $ctr2.d[1], $ctr32x                               @ CTR block 4k+6
1391e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                 @ CTR block 4k+7
1392e1051a39Sopenharmony_ci
1393e1051a39Sopenharmony_ci	mov     $output_l3, $ctr3.d[0]                            @ AES block 4k+3 - mov low
1394e1051a39Sopenharmony_ci	eor     $res0b, $res0b, $acc_lb                           @ PRE 1
1395e1051a39Sopenharmony_ci	mov     $t3d, $res1.d[1]                                  @ GHASH block 4k+1 - mid
1396e1051a39Sopenharmony_ci
1397e1051a39Sopenharmony_ci	aese    $ctr1b, $rk1  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 1
1398e1051a39Sopenharmony_ci	rev64   $res3b, $res3b                                    @ GHASH block 4k+3
1399e1051a39Sopenharmony_ci
1400e1051a39Sopenharmony_ci	pmull   $t2.1q, $res1.1d, $h3.1d                          @ GHASH block 4k+1 - low
1401e1051a39Sopenharmony_ci	mov     $output_h3, $ctr3.d[1]                            @ AES block 4k+3 - mov high
1402e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32            @ CTR block 4k+7
1403e1051a39Sopenharmony_ci
1404e1051a39Sopenharmony_ci	pmull   $acc_l.1q, $res0.1d, $h4.1d                       @ GHASH block 4k - low
1405e1051a39Sopenharmony_ci	fmov    $ctr3d, $ctr96_b64x                               @ CTR block 4k+7
1406e1051a39Sopenharmony_ci	eor     $t3.8b, $t3.8b, $res1.8b                          @ GHASH block 4k+1 - mid
1407e1051a39Sopenharmony_ci
1408e1051a39Sopenharmony_ci	aese    $ctr1b, $rk2  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 2
1409e1051a39Sopenharmony_ci	fmov    $ctr3.d[1], $ctr32x                               @ CTR block 4k+7
1410e1051a39Sopenharmony_ci
1411e1051a39Sopenharmony_ci	aese    $ctr2b, $rk0  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 0
1412e1051a39Sopenharmony_ci	mov     $acc_md, $h34k.d[1]                               @ GHASH block 4k - mid
1413e1051a39Sopenharmony_ci
1414e1051a39Sopenharmony_ci	pmull2  $acc_h.1q, $res0.2d, $h4.2d                       @ GHASH block 4k - high
1415e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $t2.16b                         @ GHASH block 4k+1 - low
1416e1051a39Sopenharmony_ci
1417e1051a39Sopenharmony_ci	pmull   $t8.1q, $res3.1d, $h1.1d                          @ GHASH block 4k+3 - low
1418e1051a39Sopenharmony_ci
1419e1051a39Sopenharmony_ci	aese    $ctr1b, $rk3  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 3
1420e1051a39Sopenharmony_ci	mov     $t0d, $res0.d[1]                                  @ GHASH block 4k - mid
1421e1051a39Sopenharmony_ci
1422e1051a39Sopenharmony_ci	aese    $ctr3b, $rk0  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 0
1423e1051a39Sopenharmony_ci	eor     $acc_hb, $acc_hb, $t1.16b                         @ GHASH block 4k+1 - high
1424e1051a39Sopenharmony_ci
1425e1051a39Sopenharmony_ci	aese    $ctr0b, $rk0  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 0
1426e1051a39Sopenharmony_ci
1427e1051a39Sopenharmony_ci	pmull   $t5.1q, $res2.1d, $h2.1d                          @ GHASH block 4k+2 - low
1428e1051a39Sopenharmony_ci	eor     $t0.8b, $t0.8b, $res0.8b                          @ GHASH block 4k - mid
1429e1051a39Sopenharmony_ci
1430e1051a39Sopenharmony_ci	aese    $ctr3b, $rk1  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 1
1431e1051a39Sopenharmony_ci	eor     $output_l3, $output_l3, $rk10_l                   @ AES block 4k+3 - round 10 low
1432e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
1433e1051a39Sopenharmony_ci	rev     $output_l3, $output_l3
1434e1051a39Sopenharmony_ci#endif
1435e1051a39Sopenharmony_ci	pmull   $t3.1q, $t3.1d, $h34k.1d                          @ GHASH block 4k+1 - mid
1436e1051a39Sopenharmony_ci	eor     $output_h2, $output_h2, $rk10_h                   @ AES block 4k+2 - round 10 high
1437e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
1438e1051a39Sopenharmony_ci	rev     $output_h2, $output_h2
1439e1051a39Sopenharmony_ci#endif
1440e1051a39Sopenharmony_ci	mov     $t6d, $res2.d[1]                                  @ GHASH block 4k+2 - mid
1441e1051a39Sopenharmony_ci
1442e1051a39Sopenharmony_ci	aese    $ctr0b, $rk1  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 1
1443e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $t5.16b                         @ GHASH block 4k+2 - low
1444e1051a39Sopenharmony_ci
1445e1051a39Sopenharmony_ci	pmull   $acc_m.1q, $t0.1d, $acc_m.1d                      @ GHASH block 4k - mid
1446e1051a39Sopenharmony_ci
1447e1051a39Sopenharmony_ci	aese    $ctr3b, $rk2  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 2
1448e1051a39Sopenharmony_ci	eor     $t6.8b, $t6.8b, $res2.8b                          @ GHASH block 4k+2 - mid
1449e1051a39Sopenharmony_ci
1450e1051a39Sopenharmony_ci	aese    $ctr0b, $rk2  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 2
1451e1051a39Sopenharmony_ci
1452e1051a39Sopenharmony_ci	aese    $ctr1b, $rk4  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 4
1453e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t3.16b                         @ GHASH block 4k+1 - mid
1454e1051a39Sopenharmony_ci
1455e1051a39Sopenharmony_ci	pmull2  $t4.1q, $res2.2d, $h2.2d                          @ GHASH block 4k+2 - high
1456e1051a39Sopenharmony_ci
1457e1051a39Sopenharmony_ci	aese    $ctr0b, $rk3  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 3
1458e1051a39Sopenharmony_ci	ins     $t6.d[1], $t6.d[0]                                @ GHASH block 4k+2 - mid
1459e1051a39Sopenharmony_ci
1460e1051a39Sopenharmony_ci	pmull2  $t7.1q, $res3.2d, $h1.2d                          @ GHASH block 4k+3 - high
1461e1051a39Sopenharmony_ci
1462e1051a39Sopenharmony_ci	aese    $ctr2b, $rk1  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 1
1463e1051a39Sopenharmony_ci	mov     $t9d, $res3.d[1]                                  @ GHASH block 4k+3 - mid
1464e1051a39Sopenharmony_ci
1465e1051a39Sopenharmony_ci	aese    $ctr0b, $rk4  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 4
1466e1051a39Sopenharmony_ci	eor     $acc_hb, $acc_hb, $t4.16b                         @ GHASH block 4k+2 - high
1467e1051a39Sopenharmony_ci
1468e1051a39Sopenharmony_ci	pmull2  $t6.1q, $t6.2d, $h12k.2d                          @ GHASH block 4k+2 - mid
1469e1051a39Sopenharmony_ci	eor     $output_h3, $output_h3, $rk10_h                   @ AES block 4k+3 - round 10 high
1470e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
1471e1051a39Sopenharmony_ci	rev     $output_h3, $output_h3
1472e1051a39Sopenharmony_ci#endif
1473e1051a39Sopenharmony_ci	aese    $ctr2b, $rk2  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 2
1474e1051a39Sopenharmony_ci	eor     $t9.8b, $t9.8b, $res3.8b                          @ GHASH block 4k+3 - mid
1475e1051a39Sopenharmony_ci
1476e1051a39Sopenharmony_ci	aese    $ctr1b, $rk5  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 5
1477e1051a39Sopenharmony_ci	eor     $output_l2, $output_l2, $rk10_l                   @ AES block 4k+2 - round 10 low
1478e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
1479e1051a39Sopenharmony_ci	rev     $output_l2, $output_l2
1480e1051a39Sopenharmony_ci#endif
1481e1051a39Sopenharmony_ci	aese    $ctr0b, $rk5  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 5
1482e1051a39Sopenharmony_ci	movi    $mod_constant.8b, #0xc2
1483e1051a39Sopenharmony_ci
1484e1051a39Sopenharmony_ci	aese    $ctr2b, $rk3  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 3
1485e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $t8.16b                         @ GHASH block 4k+3 - low
1486e1051a39Sopenharmony_ci
1487e1051a39Sopenharmony_ci	aese    $ctr1b, $rk6  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 6
1488e1051a39Sopenharmony_ci
1489e1051a39Sopenharmony_ci	aese    $ctr0b, $rk6  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 6
1490e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t6.16b                         @ GHASH block 4k+2 - mid
1491e1051a39Sopenharmony_ci
1492e1051a39Sopenharmony_ci	aese    $ctr2b, $rk4  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 4
1493e1051a39Sopenharmony_ci	stp     $output_l2, $output_h2, [$output_ptr], #16        @ AES block 4k+2 - store result
1494e1051a39Sopenharmony_ci
1495e1051a39Sopenharmony_ci	pmull   $t9.1q, $t9.1d, $h12k.1d                          @ GHASH block 4k+3 - mid
1496e1051a39Sopenharmony_ci	eor     $acc_hb, $acc_hb, $t7.16b                         @ GHASH block 4k+3 - high
1497e1051a39Sopenharmony_ci	ld1     {$res0b}, [$input_ptr], #16                       @ AES block 4k+3 - load ciphertext
1498e1051a39Sopenharmony_ci
1499e1051a39Sopenharmony_ci	aese    $ctr1b, $rk7  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 7
1500e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ CTR block 4k+7
1501e1051a39Sopenharmony_ci
1502e1051a39Sopenharmony_ci	aese    $ctr0b, $rk7  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 7
1503e1051a39Sopenharmony_ci	shl     $mod_constantd, $mod_constantd, #56               @ mod_constant
1504e1051a39Sopenharmony_ci
1505e1051a39Sopenharmony_ci	aese    $ctr2b, $rk5  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 5
1506e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t9.16b                         @ GHASH block 4k+3 - mid
1507e1051a39Sopenharmony_ci
1508e1051a39Sopenharmony_ci	aese    $ctr1b, $rk8  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 8
1509e1051a39Sopenharmony_ci	stp     $output_l3, $output_h3, [$output_ptr], #16        @ AES block 4k+3 - store result
1510e1051a39Sopenharmony_ci
1511e1051a39Sopenharmony_ci	aese    $ctr0b, $rk8  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 8
1512e1051a39Sopenharmony_ci	eor     $t9.16b, $acc_lb, $acc_hb                         @ MODULO - karatsuba tidy up
1513e1051a39Sopenharmony_ci
1514e1051a39Sopenharmony_ci	aese    $ctr3b, $rk3  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 3
1515e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                 @ CTR block 4k+8
1516e1051a39Sopenharmony_ci
1517e1051a39Sopenharmony_ci	pmull   $mod_t.1q, $acc_h.1d, $mod_constant.1d            @ MODULO - top 64b align with mid
1518e1051a39Sopenharmony_ci	ld1     {$res1b}, [$input_ptr], #16                       @ AES block 4k+4 - load ciphertext
1519e1051a39Sopenharmony_ci	ext     $acc_hb, $acc_hb, $acc_hb, #8                     @ MODULO - other top alignment
1520e1051a39Sopenharmony_ci
1521e1051a39Sopenharmony_ci	aese    $ctr0b, $rk9                                      @ AES block 4k+4 - round 9
1522e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32            @ CTR block 4k+8
1523e1051a39Sopenharmony_ci
1524e1051a39Sopenharmony_ci	aese    $ctr3b, $rk4  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 4
1525e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t9.16b                         @ MODULO - karatsuba tidy up
1526e1051a39Sopenharmony_ci
1527e1051a39Sopenharmony_ci	aese    $ctr1b, $rk9                                      @ AES block 4k+5 - round 9
1528e1051a39Sopenharmony_ci
1529e1051a39Sopenharmony_ci	aese    $ctr2b, $rk6  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 6
1530e1051a39Sopenharmony_ci	eor     $ctr0b, $res0b, $ctr0b                            @ AES block 4k+4 - result
1531e1051a39Sopenharmony_ci
1532e1051a39Sopenharmony_ci	aese    $ctr3b, $rk5  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 5
1533e1051a39Sopenharmony_ci	ld1     {$res2b}, [$input_ptr], #16                       @ AES block 4k+5 - load ciphertext
1534e1051a39Sopenharmony_ci
1535e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ CTR block 4k+8
1536e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $mod_t.16b                      @ MODULO - fold into mid
1537e1051a39Sopenharmony_ci	eor     $ctr1b, $res1b, $ctr1b                            @ AES block 4k+5 - result
1538e1051a39Sopenharmony_ci
1539e1051a39Sopenharmony_ci	aese    $ctr2b, $rk7  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 7
1540e1051a39Sopenharmony_ci	ld1     {$res3b}, [$input_ptr], #16                       @ AES block 4k+6 - load ciphertext
1541e1051a39Sopenharmony_ci
1542e1051a39Sopenharmony_ci	aese    $ctr3b, $rk6  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 6
1543e1051a39Sopenharmony_ci
1544e1051a39Sopenharmony_ci	rev64   $res1b, $res1b                                    @ GHASH block 4k+5
1545e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $acc_hb                         @ MODULO - fold into mid
1546e1051a39Sopenharmony_ci	mov     $output_h0, $ctr0.d[1]                            @ AES block 4k+4 - mov high
1547e1051a39Sopenharmony_ci
1548e1051a39Sopenharmony_ci	aese    $ctr2b, $rk8  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 8
1549e1051a39Sopenharmony_ci	mov     $output_l0, $ctr0.d[0]                            @ AES block 4k+4 - mov low
1550e1051a39Sopenharmony_ci
1551e1051a39Sopenharmony_ci	aese    $ctr3b, $rk7  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 7
1552e1051a39Sopenharmony_ci	fmov    $ctr0d, $ctr96_b64x                               @ CTR block 4k+8
1553e1051a39Sopenharmony_ci
1554e1051a39Sopenharmony_ci	pmull   $mod_constant.1q, $acc_m.1d, $mod_constant.1d     @ MODULO - mid 64b align with low
1555e1051a39Sopenharmony_ci	fmov    $ctr0.d[1], $ctr32x                               @ CTR block 4k+8
1556e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                 @ CTR block 4k+9
1557e1051a39Sopenharmony_ci
1558e1051a39Sopenharmony_ci	aese    $ctr2b, $rk9                                      @ AES block 4k+6 - round 9
1559e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32            @ CTR block 4k+9
1560e1051a39Sopenharmony_ci	ext     $acc_mb, $acc_mb, $acc_mb, #8                     @ MODULO - other mid alignment
1561e1051a39Sopenharmony_ci
1562e1051a39Sopenharmony_ci	aese    $ctr3b, $rk8  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 8
1563e1051a39Sopenharmony_ci	eor     $output_h0, $output_h0, $rk10_h                   @ AES block 4k+4 - round 10 high
1564e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
1565e1051a39Sopenharmony_ci	rev     $output_h0, $output_h0
1566e1051a39Sopenharmony_ci#endif
1567e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $mod_constant.16b               @ MODULO - fold into low
1568e1051a39Sopenharmony_ci	mov     $output_h1, $ctr1.d[1]                            @ AES block 4k+5 - mov high
1569e1051a39Sopenharmony_ci	eor     $output_l0, $output_l0, $rk10_l                   @ AES block 4k+4 - round 10 low
1570e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
1571e1051a39Sopenharmony_ci	rev     $output_l0, $output_l0
1572e1051a39Sopenharmony_ci#endif
1573e1051a39Sopenharmony_ci	eor     $ctr2b, $res2b, $ctr2b                            @ AES block 4k+6 - result
1574e1051a39Sopenharmony_ci	mov     $output_l1, $ctr1.d[0]                            @ AES block 4k+5 - mov low
1575e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ CTR block 4k+9
1576e1051a39Sopenharmony_ci
1577e1051a39Sopenharmony_ci	aese    $ctr3b, $rk9                                      @ AES block 4k+7 - round 9
1578e1051a39Sopenharmony_ci	fmov    $ctr1d, $ctr96_b64x                               @ CTR block 4k+9
1579e1051a39Sopenharmony_ci	cmp     $input_ptr, $main_end_input_ptr                   @ LOOP CONTROL
1580e1051a39Sopenharmony_ci
1581e1051a39Sopenharmony_ci	rev64   $res0b, $res0b                                    @ GHASH block 4k+4
1582e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $acc_mb                         @ MODULO - fold into low
1583e1051a39Sopenharmony_ci	fmov    $ctr1.d[1], $ctr32x                               @ CTR block 4k+9
1584e1051a39Sopenharmony_ci
1585e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                 @ CTR block 4k+10
1586e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ CTR block 4k+10
1587e1051a39Sopenharmony_ci
1588e1051a39Sopenharmony_ci	eor     $output_h1, $output_h1, $rk10_h                   @ AES block 4k+5 - round 10 high
1589e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
1590e1051a39Sopenharmony_ci	rev     $output_h1, $output_h1
1591e1051a39Sopenharmony_ci#endif
1592e1051a39Sopenharmony_ci	stp     $output_l0, $output_h0, [$output_ptr], #16        @ AES block 4k+4 - store result
1593e1051a39Sopenharmony_ci
1594e1051a39Sopenharmony_ci	eor     $output_l1, $output_l1, $rk10_l                   @ AES block 4k+5 - round 10 low
1595e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
1596e1051a39Sopenharmony_ci	rev     $output_l1, $output_l1
1597e1051a39Sopenharmony_ci#endif
1598e1051a39Sopenharmony_ci	stp     $output_l1, $output_h1, [$output_ptr], #16        @ AES block 4k+5 - store result
1599e1051a39Sopenharmony_ci
1600e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32            @ CTR block 4k+10
1601e1051a39Sopenharmony_ci	b.lt    L128_dec_main_loop
1602e1051a39Sopenharmony_ci
1603e1051a39Sopenharmony_ci	.L128_dec_prepretail:                                     @ PREPRETAIL
1604e1051a39Sopenharmony_ci	ext     $acc_lb, $acc_lb, $acc_lb, #8                     @ PRE 0
1605e1051a39Sopenharmony_ci	mov     $output_l2, $ctr2.d[0]                            @ AES block 4k+2 - mov low
1606e1051a39Sopenharmony_ci	mov     $t3d, $res1.d[1]                                  @ GHASH block 4k+1 - mid
1607e1051a39Sopenharmony_ci
1608e1051a39Sopenharmony_ci	aese    $ctr0b, $rk0  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 0
1609e1051a39Sopenharmony_ci	eor     $ctr3b, $res3b, $ctr3b                            @ AES block 4k+3 - result
1610e1051a39Sopenharmony_ci
1611e1051a39Sopenharmony_ci	aese    $ctr1b, $rk0  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 0
1612e1051a39Sopenharmony_ci	mov     $output_h2, $ctr2.d[1]                            @ AES block 4k+2 - mov high
1613e1051a39Sopenharmony_ci
1614e1051a39Sopenharmony_ci	eor     $res0b, $res0b, $acc_lb                           @ PRE 1
1615e1051a39Sopenharmony_ci	fmov    $ctr2d, $ctr96_b64x                               @ CTR block 4k+6
1616e1051a39Sopenharmony_ci	rev64   $res2b, $res2b                                    @ GHASH block 4k+2
1617e1051a39Sopenharmony_ci
1618e1051a39Sopenharmony_ci	aese    $ctr0b, $rk1  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 1
1619e1051a39Sopenharmony_ci	fmov    $ctr2.d[1], $ctr32x                               @ CTR block 4k+6
1620e1051a39Sopenharmony_ci
1621e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                 @ CTR block 4k+7
1622e1051a39Sopenharmony_ci	mov     $output_l3, $ctr3.d[0]                            @ AES block 4k+3 - mov low
1623e1051a39Sopenharmony_ci	eor     $t3.8b, $t3.8b, $res1.8b                          @ GHASH block 4k+1 - mid
1624e1051a39Sopenharmony_ci
1625e1051a39Sopenharmony_ci	pmull   $acc_l.1q, $res0.1d, $h4.1d                       @ GHASH block 4k - low
1626e1051a39Sopenharmony_ci	mov     $acc_md, $h34k.d[1]                               @ GHASH block 4k - mid
1627e1051a39Sopenharmony_ci	mov     $output_h3, $ctr3.d[1]                            @ AES block 4k+3 - mov high
1628e1051a39Sopenharmony_ci
1629e1051a39Sopenharmony_ci	aese    $ctr1b, $rk1  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 1
1630e1051a39Sopenharmony_ci	mov     $t6d, $res2.d[1]                                  @ GHASH block 4k+2 - mid
1631e1051a39Sopenharmony_ci
1632e1051a39Sopenharmony_ci	aese    $ctr0b, $rk2  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 2
1633e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32            @ CTR block 4k+7
1634e1051a39Sopenharmony_ci
1635e1051a39Sopenharmony_ci	pmull   $t2.1q, $res1.1d, $h3.1d                          @ GHASH block 4k+1 - low
1636e1051a39Sopenharmony_ci	mov     $t0d, $res0.d[1]                                  @ GHASH block 4k - mid
1637e1051a39Sopenharmony_ci	fmov    $ctr3d, $ctr96_b64x                               @ CTR block 4k+7
1638e1051a39Sopenharmony_ci
1639e1051a39Sopenharmony_ci	aese    $ctr2b, $rk0  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 0
1640e1051a39Sopenharmony_ci	fmov    $ctr3.d[1], $ctr32x                               @ CTR block 4k+7
1641e1051a39Sopenharmony_ci
1642e1051a39Sopenharmony_ci	pmull   $t3.1q, $t3.1d, $h34k.1d                          @ GHASH block 4k+1 - mid
1643e1051a39Sopenharmony_ci	eor     $t6.8b, $t6.8b, $res2.8b                          @ GHASH block 4k+2 - mid
1644e1051a39Sopenharmony_ci
1645e1051a39Sopenharmony_ci	rev64   $res3b, $res3b                                    @ GHASH block 4k+3
1646e1051a39Sopenharmony_ci
1647e1051a39Sopenharmony_ci	aese    $ctr2b, $rk1  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 1
1648e1051a39Sopenharmony_ci	eor     $t0.8b, $t0.8b, $res0.8b                          @ GHASH block 4k - mid
1649e1051a39Sopenharmony_ci
1650e1051a39Sopenharmony_ci	pmull2  $acc_h.1q, $res0.2d, $h4.2d                       @ GHASH block 4k - high
1651e1051a39Sopenharmony_ci
1652e1051a39Sopenharmony_ci	aese    $ctr3b, $rk0  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 0
1653e1051a39Sopenharmony_ci	ins     $t6.d[1], $t6.d[0]                                @ GHASH block 4k+2 - mid
1654e1051a39Sopenharmony_ci
1655e1051a39Sopenharmony_ci	pmull2  $t1.1q, $res1.2d, $h3.2d                          @ GHASH block 4k+1 - high
1656e1051a39Sopenharmony_ci
1657e1051a39Sopenharmony_ci	pmull   $acc_m.1q, $t0.1d, $acc_m.1d                      @ GHASH block 4k - mid
1658e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $t2.16b                         @ GHASH block 4k+1 - low
1659e1051a39Sopenharmony_ci
1660e1051a39Sopenharmony_ci	pmull   $t8.1q, $res3.1d, $h1.1d                          @ GHASH block 4k+3 - low
1661e1051a39Sopenharmony_ci
1662e1051a39Sopenharmony_ci	pmull2  $t6.1q, $t6.2d, $h12k.2d                          @ GHASH block 4k+2 - mid
1663e1051a39Sopenharmony_ci	eor     $acc_hb, $acc_hb, $t1.16b                         @ GHASH block 4k+1 - high
1664e1051a39Sopenharmony_ci
1665e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t3.16b                         @ GHASH block 4k+1 - mid
1666e1051a39Sopenharmony_ci
1667e1051a39Sopenharmony_ci	pmull2  $t7.1q, $res3.2d, $h1.2d                          @ GHASH block 4k+3 - high
1668e1051a39Sopenharmony_ci
1669e1051a39Sopenharmony_ci	pmull2  $t4.1q, $res2.2d, $h2.2d                          @ GHASH block 4k+2 - high
1670e1051a39Sopenharmony_ci	mov     $t9d, $res3.d[1]                                  @ GHASH block 4k+3 - mid
1671e1051a39Sopenharmony_ci
1672e1051a39Sopenharmony_ci	aese    $ctr1b, $rk2  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 2
1673e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t6.16b                         @ GHASH block 4k+2 - mid
1674e1051a39Sopenharmony_ci
1675e1051a39Sopenharmony_ci	pmull   $t5.1q, $res2.1d, $h2.1d                          @ GHASH block 4k+2 - low
1676e1051a39Sopenharmony_ci
1677e1051a39Sopenharmony_ci	eor     $acc_hb, $acc_hb, $t4.16b                         @ GHASH block 4k+2 - high
1678e1051a39Sopenharmony_ci	movi    $mod_constant.8b, #0xc2
1679e1051a39Sopenharmony_ci
1680e1051a39Sopenharmony_ci	aese    $ctr3b, $rk1  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 1
1681e1051a39Sopenharmony_ci	eor     $t9.8b, $t9.8b, $res3.8b                          @ GHASH block 4k+3 - mid
1682e1051a39Sopenharmony_ci
1683e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $t5.16b                         @ GHASH block 4k+2 - low
1684e1051a39Sopenharmony_ci
1685e1051a39Sopenharmony_ci	aese    $ctr2b, $rk2  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 2
1686e1051a39Sopenharmony_ci	eor     $acc_hb, $acc_hb, $t7.16b                         @ GHASH block 4k+3 - high
1687e1051a39Sopenharmony_ci
1688e1051a39Sopenharmony_ci	aese    $ctr3b, $rk2  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 2
1689e1051a39Sopenharmony_ci	eor     $output_l3, $output_l3, $rk10_l                   @ AES block 4k+3 - round 10 low
1690e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
1691e1051a39Sopenharmony_ci	rev     $output_l3, $output_l3
1692e1051a39Sopenharmony_ci#endif
1693e1051a39Sopenharmony_ci	pmull   $t9.1q, $t9.1d, $h12k.1d                          @ GHASH block 4k+3 - mid
1694e1051a39Sopenharmony_ci	eor     $output_l2, $output_l2, $rk10_l                   @ AES block 4k+2 - round 10 low
1695e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
1696e1051a39Sopenharmony_ci	rev     $output_l2, $output_l2
1697e1051a39Sopenharmony_ci#endif
1698e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $t8.16b                         @ GHASH block 4k+3 - low
1699e1051a39Sopenharmony_ci
1700e1051a39Sopenharmony_ci	aese    $ctr2b, $rk3  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 3
1701e1051a39Sopenharmony_ci
1702e1051a39Sopenharmony_ci	aese    $ctr1b, $rk3  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 3
1703e1051a39Sopenharmony_ci	shl     $mod_constantd, $mod_constantd, #56               @ mod_constant
1704e1051a39Sopenharmony_ci
1705e1051a39Sopenharmony_ci	aese    $ctr0b, $rk3  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 3
1706e1051a39Sopenharmony_ci
1707e1051a39Sopenharmony_ci	aese    $ctr2b, $rk4  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 4
1708e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t9.16b                         @ GHASH block 4k+3 - mid
1709e1051a39Sopenharmony_ci
1710e1051a39Sopenharmony_ci	aese    $ctr1b, $rk4  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 4
1711e1051a39Sopenharmony_ci
1712e1051a39Sopenharmony_ci	aese    $ctr3b, $rk3  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 3
1713e1051a39Sopenharmony_ci	eor     $t9.16b, $acc_lb, $acc_hb                         @ MODULO - karatsuba tidy up
1714e1051a39Sopenharmony_ci
1715e1051a39Sopenharmony_ci	aese    $ctr2b, $rk5  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 5
1716e1051a39Sopenharmony_ci
1717e1051a39Sopenharmony_ci	aese    $ctr1b, $rk5  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 5
1718e1051a39Sopenharmony_ci
1719e1051a39Sopenharmony_ci	aese    $ctr3b, $rk4  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 4
1720e1051a39Sopenharmony_ci
1721e1051a39Sopenharmony_ci	aese    $ctr0b, $rk4  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 4
1722e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t9.16b                         @ MODULO - karatsuba tidy up
1723e1051a39Sopenharmony_ci
1724e1051a39Sopenharmony_ci	pmull   $mod_t.1q, $acc_h.1d, $mod_constant.1d            @ MODULO - top 64b align with mid
1725e1051a39Sopenharmony_ci
1726e1051a39Sopenharmony_ci	aese    $ctr1b, $rk6  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 6
1727e1051a39Sopenharmony_ci	ext     $acc_hb, $acc_hb, $acc_hb, #8                     @ MODULO - other top alignment
1728e1051a39Sopenharmony_ci
1729e1051a39Sopenharmony_ci	aese    $ctr3b, $rk5  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 5
1730e1051a39Sopenharmony_ci
1731e1051a39Sopenharmony_ci	aese    $ctr0b, $rk5  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 5
1732e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $mod_t.16b                      @ MODULO - fold into mid
1733e1051a39Sopenharmony_ci
1734e1051a39Sopenharmony_ci	aese    $ctr1b, $rk7  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 7
1735e1051a39Sopenharmony_ci
1736e1051a39Sopenharmony_ci	aese    $ctr2b, $rk6  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 6
1737e1051a39Sopenharmony_ci
1738e1051a39Sopenharmony_ci	aese    $ctr0b, $rk6  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 6
1739e1051a39Sopenharmony_ci
1740e1051a39Sopenharmony_ci	aese    $ctr1b, $rk8  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 8
1741e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $acc_hb                         @ MODULO - fold into mid
1742e1051a39Sopenharmony_ci
1743e1051a39Sopenharmony_ci	aese    $ctr3b, $rk6  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 6
1744e1051a39Sopenharmony_ci
1745e1051a39Sopenharmony_ci	aese    $ctr0b, $rk7  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 7
1746e1051a39Sopenharmony_ci
1747e1051a39Sopenharmony_ci	aese    $ctr1b, $rk9                                      @ AES block 4k+5 - round 9
1748e1051a39Sopenharmony_ci
1749e1051a39Sopenharmony_ci	pmull   $mod_constant.1q, $acc_m.1d, $mod_constant.1d     @ MODULO - mid 64b align with low
1750e1051a39Sopenharmony_ci	eor     $output_h3, $output_h3, $rk10_h                   @ AES block 4k+3 - round 10 high
1751e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
1752e1051a39Sopenharmony_ci	rev     $output_h3, $output_h3
1753e1051a39Sopenharmony_ci#endif
1754e1051a39Sopenharmony_ci	aese    $ctr2b, $rk7  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 7
1755e1051a39Sopenharmony_ci	ext     $acc_mb, $acc_mb, $acc_mb, #8                     @ MODULO - other mid alignment
1756e1051a39Sopenharmony_ci
1757e1051a39Sopenharmony_ci	aese    $ctr3b, $rk7  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 7
1758e1051a39Sopenharmony_ci
1759e1051a39Sopenharmony_ci	aese    $ctr0b, $rk8  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 8
1760e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $mod_constant.16b               @ MODULO - fold into low
1761e1051a39Sopenharmony_ci
1762e1051a39Sopenharmony_ci	aese    $ctr2b, $rk8  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 8
1763e1051a39Sopenharmony_ci
1764e1051a39Sopenharmony_ci	aese    $ctr3b, $rk8  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 8
1765e1051a39Sopenharmony_ci	eor     $output_h2, $output_h2, $rk10_h                   @ AES block 4k+2 - round 10 high
1766e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
1767e1051a39Sopenharmony_ci	rev     $output_h2, $output_h2
1768e1051a39Sopenharmony_ci#endif
1769e1051a39Sopenharmony_ci	aese    $ctr0b, $rk9                                      @ AES block 4k+4 - round 9
1770e1051a39Sopenharmony_ci	stp     $output_l2, $output_h2, [$output_ptr], #16        @ AES block 4k+2 - store result
1771e1051a39Sopenharmony_ci
1772e1051a39Sopenharmony_ci	aese    $ctr2b, $rk9                                      @ AES block 4k+6 - round 9
1773e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ CTR block 4k+7
1774e1051a39Sopenharmony_ci	stp     $output_l3, $output_h3, [$output_ptr], #16        @ AES block 4k+3 - store result
1775e1051a39Sopenharmony_ci
1776e1051a39Sopenharmony_ci	aese    $ctr3b, $rk9                                      @ AES block 4k+7 - round 9
1777e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $acc_mb                         @ MODULO - fold into low
1778e1051a39Sopenharmony_ci	.L128_dec_tail:                                           @ TAIL
1779e1051a39Sopenharmony_ci
1780e1051a39Sopenharmony_ci	sub     $main_end_input_ptr, $end_input_ptr, $input_ptr   @ main_end_input_ptr is number of bytes left to process
1781e1051a39Sopenharmony_ci	ld1     { $res1b}, [$input_ptr], #16                      @ AES block 4k+4 - load ciphertext
1782e1051a39Sopenharmony_ci
1783e1051a39Sopenharmony_ci	eor     $ctr0b, $res1b, $ctr0b                            @ AES block 4k+4 - result
1784e1051a39Sopenharmony_ci
1785e1051a39Sopenharmony_ci	mov     $output_h0, $ctr0.d[1]                            @ AES block 4k+4 - mov high
1786e1051a39Sopenharmony_ci
1787e1051a39Sopenharmony_ci	mov     $output_l0, $ctr0.d[0]                            @ AES block 4k+4 - mov low
1788e1051a39Sopenharmony_ci
1789e1051a39Sopenharmony_ci	cmp     $main_end_input_ptr, #48
1790e1051a39Sopenharmony_ci
1791e1051a39Sopenharmony_ci	eor     $output_h0, $output_h0, $rk10_h                   @ AES block 4k+4 - round 10 high
1792e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
1793e1051a39Sopenharmony_ci	rev     $output_h0, $output_h0
1794e1051a39Sopenharmony_ci#endif
1795e1051a39Sopenharmony_ci	ext     $t0.16b, $acc_lb, $acc_lb, #8                     @ prepare final partial tag
1796e1051a39Sopenharmony_ci	eor     $output_l0, $output_l0, $rk10_l                   @ AES block 4k+4 - round 10 low
1797e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
1798e1051a39Sopenharmony_ci	rev     $output_l0, $output_l0
1799e1051a39Sopenharmony_ci#endif
1800e1051a39Sopenharmony_ci	b.gt    .L128_dec_blocks_more_than_3
1801e1051a39Sopenharmony_ci
1802e1051a39Sopenharmony_ci	mov     $ctr3b, $ctr2b
1803e1051a39Sopenharmony_ci	sub     $rctr32w, $rctr32w, #1
1804e1051a39Sopenharmony_ci	movi    $acc_l.8b, #0
1805e1051a39Sopenharmony_ci
1806e1051a39Sopenharmony_ci	movi    $acc_h.8b, #0
1807e1051a39Sopenharmony_ci	mov     $ctr2b, $ctr1b
1808e1051a39Sopenharmony_ci
1809e1051a39Sopenharmony_ci	movi    $acc_m.8b, #0
1810e1051a39Sopenharmony_ci	cmp     $main_end_input_ptr, #32
1811e1051a39Sopenharmony_ci	b.gt     .L128_dec_blocks_more_than_2
1812e1051a39Sopenharmony_ci
1813e1051a39Sopenharmony_ci	cmp     $main_end_input_ptr, #16
1814e1051a39Sopenharmony_ci
1815e1051a39Sopenharmony_ci	mov     $ctr3b, $ctr1b
1816e1051a39Sopenharmony_ci	sub     $rctr32w, $rctr32w, #1
1817e1051a39Sopenharmony_ci	b.gt    .L128_dec_blocks_more_than_1
1818e1051a39Sopenharmony_ci
1819e1051a39Sopenharmony_ci	sub     $rctr32w, $rctr32w, #1
1820e1051a39Sopenharmony_ci	b       .L128_dec_blocks_less_than_1
1821e1051a39Sopenharmony_ci	.L128_dec_blocks_more_than_3:                             @ blocks left >  3
1822e1051a39Sopenharmony_ci	rev64   $res0b, $res1b                                    @ GHASH final-3 block
1823e1051a39Sopenharmony_ci	ld1     { $res1b}, [$input_ptr], #16                      @ AES final-2 block - load ciphertext
1824e1051a39Sopenharmony_ci
1825e1051a39Sopenharmony_ci	eor     $res0b, $res0b, $t0.16b                           @ feed in partial tag
1826e1051a39Sopenharmony_ci
1827e1051a39Sopenharmony_ci	mov     $acc_md, $h34k.d[1]                               @ GHASH final-3 block - mid
1828e1051a39Sopenharmony_ci	stp     $output_l0, $output_h0, [$output_ptr], #16        @ AES final-3 block  - store result
1829e1051a39Sopenharmony_ci	eor     $ctr0b, $res1b, $ctr1b                            @ AES final-2 block - result
1830e1051a39Sopenharmony_ci
1831e1051a39Sopenharmony_ci	mov     $rk4d, $res0.d[1]                                 @ GHASH final-3 block - mid
1832e1051a39Sopenharmony_ci	mov     $output_h0, $ctr0.d[1]                            @ AES final-2 block - mov high
1833e1051a39Sopenharmony_ci
1834e1051a39Sopenharmony_ci	pmull   $acc_l.1q, $res0.1d, $h4.1d                       @ GHASH final-3 block - low
1835e1051a39Sopenharmony_ci	mov     $output_l0, $ctr0.d[0]                            @ AES final-2 block - mov low
1836e1051a39Sopenharmony_ci
1837e1051a39Sopenharmony_ci	pmull2  $acc_h.1q, $res0.2d, $h4.2d                       @ GHASH final-3 block - high
1838e1051a39Sopenharmony_ci
1839e1051a39Sopenharmony_ci	eor     $rk4v.8b, $rk4v.8b, $res0.8b                      @ GHASH final-3 block - mid
1840e1051a39Sopenharmony_ci
1841e1051a39Sopenharmony_ci	movi    $t0.8b, #0                                        @ suppress further partial tag feed in
1842e1051a39Sopenharmony_ci	eor     $output_h0, $output_h0, $rk10_h                   @ AES final-2 block - round 10 high
1843e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
1844e1051a39Sopenharmony_ci	rev     $output_h0, $output_h0
1845e1051a39Sopenharmony_ci#endif
1846e1051a39Sopenharmony_ci	pmull   $acc_m.1q, $rk4v.1d, $acc_m.1d                    @ GHASH final-3 block - mid
1847e1051a39Sopenharmony_ci	eor     $output_l0, $output_l0, $rk10_l                   @ AES final-2 block - round 10 low
1848e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
1849e1051a39Sopenharmony_ci	rev     $output_l0, $output_l0
1850e1051a39Sopenharmony_ci#endif
1851e1051a39Sopenharmony_ci	.L128_dec_blocks_more_than_2:                             @ blocks left >  2
1852e1051a39Sopenharmony_ci
1853e1051a39Sopenharmony_ci	rev64   $res0b, $res1b                                    @ GHASH final-2 block
1854e1051a39Sopenharmony_ci	ld1     { $res1b}, [$input_ptr], #16                      @ AES final-1 block - load ciphertext
1855e1051a39Sopenharmony_ci
1856e1051a39Sopenharmony_ci	eor     $res0b, $res0b, $t0.16b                           @ feed in partial tag
1857e1051a39Sopenharmony_ci
1858e1051a39Sopenharmony_ci	eor     $ctr0b, $res1b, $ctr2b                            @ AES final-1 block - result
1859e1051a39Sopenharmony_ci	stp     $output_l0, $output_h0, [$output_ptr], #16        @ AES final-2 block  - store result
1860e1051a39Sopenharmony_ci
1861e1051a39Sopenharmony_ci	mov     $rk4d, $res0.d[1]                                 @ GHASH final-2 block - mid
1862e1051a39Sopenharmony_ci
1863e1051a39Sopenharmony_ci	pmull   $rk3q1, $res0.1d, $h3.1d                          @ GHASH final-2 block - low
1864e1051a39Sopenharmony_ci
1865e1051a39Sopenharmony_ci	pmull2  $rk2q1, $res0.2d, $h3.2d                          @ GHASH final-2 block - high
1866e1051a39Sopenharmony_ci	mov     $output_l0, $ctr0.d[0]                            @ AES final-1 block - mov low
1867e1051a39Sopenharmony_ci
1868e1051a39Sopenharmony_ci	mov     $output_h0, $ctr0.d[1]                            @ AES final-1 block - mov high
1869e1051a39Sopenharmony_ci	eor     $rk4v.8b, $rk4v.8b, $res0.8b                      @ GHASH final-2 block - mid
1870e1051a39Sopenharmony_ci
1871e1051a39Sopenharmony_ci	movi    $t0.8b, #0                                        @ suppress further partial tag feed in
1872e1051a39Sopenharmony_ci
1873e1051a39Sopenharmony_ci	pmull   $rk4v.1q, $rk4v.1d, $h34k.1d                      @ GHASH final-2 block - mid
1874e1051a39Sopenharmony_ci
1875e1051a39Sopenharmony_ci	eor     $output_l0, $output_l0, $rk10_l                   @ AES final-1 block - round 10 low
1876e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
1877e1051a39Sopenharmony_ci	rev     $output_l0, $output_l0
1878e1051a39Sopenharmony_ci#endif
1879e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $rk3                            @ GHASH final-2 block - low
1880e1051a39Sopenharmony_ci
1881e1051a39Sopenharmony_ci	eor     $acc_hb, $acc_hb, $rk2                            @ GHASH final-2 block - high
1882e1051a39Sopenharmony_ci
1883e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $rk4v.16b                       @ GHASH final-2 block - mid
1884e1051a39Sopenharmony_ci	eor     $output_h0, $output_h0, $rk10_h                   @ AES final-1 block - round 10 high
1885e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
1886e1051a39Sopenharmony_ci	rev     $output_h0, $output_h0
1887e1051a39Sopenharmony_ci#endif
1888e1051a39Sopenharmony_ci	.L128_dec_blocks_more_than_1:                             @ blocks left >  1
1889e1051a39Sopenharmony_ci
1890e1051a39Sopenharmony_ci	rev64   $res0b, $res1b                                    @ GHASH final-1 block
1891e1051a39Sopenharmony_ci
1892e1051a39Sopenharmony_ci	ld1     { $res1b}, [$input_ptr], #16                      @ AES final block - load ciphertext
1893e1051a39Sopenharmony_ci	eor     $res0b, $res0b, $t0.16b                           @ feed in partial tag
1894e1051a39Sopenharmony_ci
1895e1051a39Sopenharmony_ci	mov     $rk4d, $res0.d[1]                                 @ GHASH final-1 block - mid
1896e1051a39Sopenharmony_ci
1897e1051a39Sopenharmony_ci	eor     $ctr0b, $res1b, $ctr3b                            @ AES final block - result
1898e1051a39Sopenharmony_ci
1899e1051a39Sopenharmony_ci	eor     $rk4v.8b, $rk4v.8b, $res0.8b                      @ GHASH final-1 block - mid
1900e1051a39Sopenharmony_ci
1901e1051a39Sopenharmony_ci	stp     $output_l0, $output_h0, [$output_ptr], #16        @ AES final-1 block  - store result
1902e1051a39Sopenharmony_ci	mov     $output_l0, $ctr0.d[0]                            @ AES final block - mov low
1903e1051a39Sopenharmony_ci
1904e1051a39Sopenharmony_ci	mov     $output_h0, $ctr0.d[1]                            @ AES final block - mov high
1905e1051a39Sopenharmony_ci	ins     $rk4v.d[1], $rk4v.d[0]                            @ GHASH final-1 block - mid
1906e1051a39Sopenharmony_ci
1907e1051a39Sopenharmony_ci	pmull   $rk3q1, $res0.1d, $h2.1d                          @ GHASH final-1 block - low
1908e1051a39Sopenharmony_ci
1909e1051a39Sopenharmony_ci	pmull2  $rk2q1, $res0.2d, $h2.2d                          @ GHASH final-1 block - high
1910e1051a39Sopenharmony_ci
1911e1051a39Sopenharmony_ci	pmull2  $rk4v.1q, $rk4v.2d, $h12k.2d                      @ GHASH final-1 block - mid
1912e1051a39Sopenharmony_ci	movi    $t0.8b, #0                                        @ suppress further partial tag feed in
1913e1051a39Sopenharmony_ci
1914e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $rk3                            @ GHASH final-1 block - low
1915e1051a39Sopenharmony_ci
1916e1051a39Sopenharmony_ci	eor     $acc_hb, $acc_hb, $rk2                            @ GHASH final-1 block - high
1917e1051a39Sopenharmony_ci	eor     $output_h0, $output_h0, $rk10_h                   @ AES final block - round 10 high
1918e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
1919e1051a39Sopenharmony_ci	rev     $output_h0, $output_h0
1920e1051a39Sopenharmony_ci#endif
1921e1051a39Sopenharmony_ci	eor     $output_l0, $output_l0, $rk10_l                   @ AES final block - round 10 low
1922e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
1923e1051a39Sopenharmony_ci	rev     $output_l0, $output_l0
1924e1051a39Sopenharmony_ci#endif
1925e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $rk4v.16b                       @ GHASH final-1 block - mid
1926e1051a39Sopenharmony_ci	.L128_dec_blocks_less_than_1:                                            @ blocks left <= 1
1927e1051a39Sopenharmony_ci
1928e1051a39Sopenharmony_ci	mvn     $rk10_h, xzr                                      @ rk10_h = 0xffffffffffffffff
1929e1051a39Sopenharmony_ci	and     $bit_length, $bit_length, #127                    @ bit_length %= 128
1930e1051a39Sopenharmony_ci
1931e1051a39Sopenharmony_ci	mvn     $rk10_l, xzr                                      @ rk10_l = 0xffffffffffffffff
1932e1051a39Sopenharmony_ci	sub     $bit_length, $bit_length, #128                    @ bit_length -= 128
1933e1051a39Sopenharmony_ci
1934e1051a39Sopenharmony_ci	neg     $bit_length, $bit_length                          @ bit_length = 128 - #bits in input (in range [1,128])
1935e1051a39Sopenharmony_ci
1936e1051a39Sopenharmony_ci	and     $bit_length, $bit_length, #127                    @ bit_length %= 128
1937e1051a39Sopenharmony_ci
1938e1051a39Sopenharmony_ci	lsr     $rk10_h, $rk10_h, $bit_length                     @ rk10_h is mask for top 64b of last block
1939e1051a39Sopenharmony_ci	cmp     $bit_length, #64
1940e1051a39Sopenharmony_ci
1941e1051a39Sopenharmony_ci	csel    $ctr96_b64x, $rk10_h, xzr, lt
1942e1051a39Sopenharmony_ci	csel    $ctr32x, $rk10_l, $rk10_h, lt
1943e1051a39Sopenharmony_ci
1944e1051a39Sopenharmony_ci	fmov    $ctr0d, $ctr32x                                   @ ctr0b is mask for last block
1945e1051a39Sopenharmony_ci
1946e1051a39Sopenharmony_ci	mov     $ctr0.d[1], $ctr96_b64x
1947e1051a39Sopenharmony_ci
1948e1051a39Sopenharmony_ci	and     $res1b, $res1b, $ctr0b                            @ possibly partial last block has zeroes in highest bits
1949e1051a39Sopenharmony_ci
1950e1051a39Sopenharmony_ci	rev64   $res0b, $res1b                                    @ GHASH final block
1951e1051a39Sopenharmony_ci
1952e1051a39Sopenharmony_ci	eor     $res0b, $res0b, $t0.16b                           @ feed in partial tag
1953e1051a39Sopenharmony_ci
1954e1051a39Sopenharmony_ci	ldp     $end_input_ptr, $main_end_input_ptr, [$output_ptr] @ load existing bytes we need to not overwrite
1955e1051a39Sopenharmony_ci
1956e1051a39Sopenharmony_ci	and     $output_h0, $output_h0, $ctr96_b64x
1957e1051a39Sopenharmony_ci
1958e1051a39Sopenharmony_ci	pmull2  $rk2q1, $res0.2d, $h1.2d                          @ GHASH final block - high
1959e1051a39Sopenharmony_ci	mov     $t0d, $res0.d[1]                                  @ GHASH final block - mid
1960e1051a39Sopenharmony_ci
1961e1051a39Sopenharmony_ci	eor     $t0.8b, $t0.8b, $res0.8b                          @ GHASH final block - mid
1962e1051a39Sopenharmony_ci	eor     $acc_hb, $acc_hb, $rk2                            @ GHASH final block - high
1963e1051a39Sopenharmony_ci
1964e1051a39Sopenharmony_ci	pmull   $t0.1q, $t0.1d, $h12k.1d                          @ GHASH final block - mid
1965e1051a39Sopenharmony_ci
1966e1051a39Sopenharmony_ci	pmull   $rk3q1, $res0.1d, $h1.1d                          @ GHASH final block - low
1967e1051a39Sopenharmony_ci	bic     $end_input_ptr, $end_input_ptr, $ctr32x           @ mask out low existing bytes
1968e1051a39Sopenharmony_ci	and     $output_l0, $output_l0, $ctr32x
1969e1051a39Sopenharmony_ci
1970e1051a39Sopenharmony_ci#ifndef __AARCH64EB__
1971e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w
1972e1051a39Sopenharmony_ci#else
1973e1051a39Sopenharmony_ci	mov     $ctr32w, $rctr32w
1974e1051a39Sopenharmony_ci#endif
1975e1051a39Sopenharmony_ci
1976e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t0.16b                         @ GHASH final block - mid
1977e1051a39Sopenharmony_ci	movi    $mod_constant.8b, #0xc2
1978e1051a39Sopenharmony_ci
1979e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $rk3                            @ GHASH final block - low
1980e1051a39Sopenharmony_ci
1981e1051a39Sopenharmony_ci	bic     $main_end_input_ptr, $main_end_input_ptr, $ctr96_b64x   @ mask out high existing bytes
1982e1051a39Sopenharmony_ci	shl     $mod_constantd, $mod_constantd, #56               @ mod_constant
1983e1051a39Sopenharmony_ci
1984e1051a39Sopenharmony_ci	eor     $t9.16b, $acc_lb, $acc_hb                         @ MODULO - karatsuba tidy up
1985e1051a39Sopenharmony_ci
1986e1051a39Sopenharmony_ci	pmull   $mod_t.1q, $acc_h.1d, $mod_constant.1d            @ MODULO - top 64b align with mid
1987e1051a39Sopenharmony_ci
1988e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t9.16b                         @ MODULO - karatsuba tidy up
1989e1051a39Sopenharmony_ci
1990e1051a39Sopenharmony_ci	orr     $output_l0, $output_l0, $end_input_ptr
1991e1051a39Sopenharmony_ci	str     $ctr32w, [$counter, #12]                          @ store the updated counter
1992e1051a39Sopenharmony_ci
1993e1051a39Sopenharmony_ci	orr     $output_h0, $output_h0, $main_end_input_ptr
1994e1051a39Sopenharmony_ci	stp     $output_l0, $output_h0, [$output_ptr]
1995e1051a39Sopenharmony_ci	ext     $acc_hb, $acc_hb, $acc_hb, #8                     @ MODULO - other top alignment
1996e1051a39Sopenharmony_ci
1997e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $mod_t.16b                      @ MODULO - fold into mid
1998e1051a39Sopenharmony_ci
1999e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $acc_hb                         @ MODULO - fold into mid
2000e1051a39Sopenharmony_ci
2001e1051a39Sopenharmony_ci	pmull   $mod_constant.1q, $acc_m.1d, $mod_constant.1d     @ MODULO - mid 64b align with low
2002e1051a39Sopenharmony_ci	ext     $acc_mb, $acc_mb, $acc_mb, #8                     @ MODULO - other mid alignment
2003e1051a39Sopenharmony_ci
2004e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $mod_constant.16b               @ MODULO - fold into low
2005e1051a39Sopenharmony_ci
2006e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $acc_mb                         @ MODULO - fold into low
2007e1051a39Sopenharmony_ci	ext     $acc_lb, $acc_lb, $acc_lb, #8
2008e1051a39Sopenharmony_ci	rev64   $acc_lb, $acc_lb
2009e1051a39Sopenharmony_ci	mov     x0, $len
2010e1051a39Sopenharmony_ci	st1     { $acc_l.16b }, [$current_tag]
2011e1051a39Sopenharmony_ci
2012e1051a39Sopenharmony_ci	ldp     x21, x22, [sp, #16]
2013e1051a39Sopenharmony_ci	ldp     x23, x24, [sp, #32]
2014e1051a39Sopenharmony_ci	ldp     d8, d9, [sp, #48]
2015e1051a39Sopenharmony_ci	ldp     d10, d11, [sp, #64]
2016e1051a39Sopenharmony_ci	ldp     d12, d13, [sp, #80]
2017e1051a39Sopenharmony_ci	ldp     d14, d15, [sp, #96]
2018e1051a39Sopenharmony_ci	ldp     x19, x20, [sp], #112
2019e1051a39Sopenharmony_ci	ret
2020e1051a39Sopenharmony_ci
2021e1051a39Sopenharmony_ci	.L128_dec_ret:
2022e1051a39Sopenharmony_ci	mov w0, #0x0
2023e1051a39Sopenharmony_ci	ret
2024e1051a39Sopenharmony_ci.size aes_gcm_dec_128_kernel,.-aes_gcm_dec_128_kernel
2025e1051a39Sopenharmony_ci___
2026e1051a39Sopenharmony_ci}
2027e1051a39Sopenharmony_ci
2028e1051a39Sopenharmony_ci{
2029e1051a39Sopenharmony_cimy ($end_input_ptr,$main_end_input_ptr,$input_l0,$input_h0)=map("x$_",(4..7));
2030e1051a39Sopenharmony_cimy ($input_l1,$input_h1,$input_l2,$input_h2,$input_l3,$input_h3)=map("x$_",(19..24));
2031e1051a39Sopenharmony_cimy ($output_l1,$output_h1,$output_l2,$output_h2,$output_l3,$output_h3)=map("x$_",(19..24));
2032e1051a39Sopenharmony_cimy ($output_l0,$output_h0)=map("x$_",(6..7));
2033e1051a39Sopenharmony_ci
2034e1051a39Sopenharmony_cimy $ctr32w="w9";
2035e1051a39Sopenharmony_cimy ($ctr32x,$ctr96_b64x,$ctr96_t32x,$rctr32x,$rk12_l,$rk12_h,$len)=map("x$_",(9..15));
2036e1051a39Sopenharmony_cimy ($ctr96_t32w,$rctr32w)=map("w$_",(11..12));
2037e1051a39Sopenharmony_ci
2038e1051a39Sopenharmony_cimy ($ctr0b,$ctr1b,$ctr2b,$ctr3b,$res0b,$res1b,$res2b,$res3b)=map("v$_.16b",(0..7));
2039e1051a39Sopenharmony_cimy ($ctr0,$ctr1,$ctr2,$ctr3,$res0,$res1,$res2,$res3)=map("v$_",(0..7));
2040e1051a39Sopenharmony_cimy ($ctr0d,$ctr1d,$ctr2d,$ctr3d,$res0d,$res1d,$res2d,$res3d)=map("d$_",(0..7));
2041e1051a39Sopenharmony_cimy ($res0q,$res1q,$res2q,$res3q)=map("q$_",(4..7));
2042e1051a39Sopenharmony_ci
2043e1051a39Sopenharmony_cimy ($acc_hb,$acc_mb,$acc_lb)=map("v$_.16b",(9..11));
2044e1051a39Sopenharmony_cimy ($acc_h,$acc_m,$acc_l)=map("v$_",(9..11));
2045e1051a39Sopenharmony_cimy ($acc_hd,$acc_md,$acc_ld)=map("d$_",(9..11));
2046e1051a39Sopenharmony_ci
2047e1051a39Sopenharmony_cimy ($h1,$h2,$h3,$h4,$h12k,$h34k)=map("v$_",(12..17));
2048e1051a39Sopenharmony_cimy ($h1q,$h2q,$h3q,$h4q)=map("q$_",(12..15));
2049e1051a39Sopenharmony_cimy ($h1b,$h2b,$h3b,$h4b)=map("v$_.16b",(12..15));
2050e1051a39Sopenharmony_ci
2051e1051a39Sopenharmony_cimy $t0="v8";
2052e1051a39Sopenharmony_cimy $t0d="d8";
2053e1051a39Sopenharmony_cimy $t3="v4";
2054e1051a39Sopenharmony_cimy $t3d="d4";
2055e1051a39Sopenharmony_ci
2056e1051a39Sopenharmony_cimy ($t1,$t2)=map("v$_",(30..31));
2057e1051a39Sopenharmony_cimy ($t1d,$t2d)=map("d$_",(30..31));
2058e1051a39Sopenharmony_ci
2059e1051a39Sopenharmony_cimy $t4="v30";
2060e1051a39Sopenharmony_cimy $t4d="d30";
2061e1051a39Sopenharmony_cimy $t5="v8";
2062e1051a39Sopenharmony_cimy $t5d="d8";
2063e1051a39Sopenharmony_cimy $t6="v31";
2064e1051a39Sopenharmony_cimy $t6d="d31";
2065e1051a39Sopenharmony_ci
2066e1051a39Sopenharmony_cimy $t7="v5";
2067e1051a39Sopenharmony_cimy $t7d="d5";
2068e1051a39Sopenharmony_cimy $t8="v6";
2069e1051a39Sopenharmony_cimy $t8d="d6";
2070e1051a39Sopenharmony_cimy $t9="v30";
2071e1051a39Sopenharmony_cimy $t9d="d30";
2072e1051a39Sopenharmony_ci
2073e1051a39Sopenharmony_cimy ($ctr_t0,$ctr_t1,$ctr_t2,$ctr_t3)=map("v$_",(4..7));
2074e1051a39Sopenharmony_cimy ($ctr_t0d,$ctr_t1d,$ctr_t2d,$ctr_t3d)=map("d$_",(4..7));
2075e1051a39Sopenharmony_cimy ($ctr_t0b,$ctr_t1b,$ctr_t2b,$ctr_t3b)=map("v$_.16b",(4..7));
2076e1051a39Sopenharmony_ci
2077e1051a39Sopenharmony_cimy $mod_constantd="d8";
2078e1051a39Sopenharmony_cimy $mod_constant="v8";
2079e1051a39Sopenharmony_cimy $mod_t="v31";
2080e1051a39Sopenharmony_ci
2081e1051a39Sopenharmony_cimy ($rk0,$rk1,$rk2,$rk3,$rk4,$rk5,$rk6,$rk7,$rk8,$rk9,$rk10,$rk11)=map("v$_.16b",(18..29));
2082e1051a39Sopenharmony_cimy ($rk0q,$rk1q,$rk2q,$rk3q,$rk4q,$rk5q,$rk6q,$rk7q,$rk8q,$rk9q,$rk10q,$rk11q)=map("q$_",(18..29));
2083e1051a39Sopenharmony_cimy ($rk0s,$rk1s,$rk2s,$rk3s,$rk4s,$rk5s,$rk6s,$rk7s,$rk8s,$rk9s,$rk10s,$rk11s)=map("v$_.4s",(18..29));
2084e1051a39Sopenharmony_cimy $rk2q1="v20.1q";
2085e1051a39Sopenharmony_cimy $rk3q1="v21.1q";
2086e1051a39Sopenharmony_cimy $rk4v="v22";
2087e1051a39Sopenharmony_cimy $rk4d="d22";
2088e1051a39Sopenharmony_ci
2089e1051a39Sopenharmony_ci#########################################################################################
2090e1051a39Sopenharmony_ci# size_t aes_gcm_enc_192_kernel(const unsigned char *in,
2091e1051a39Sopenharmony_ci#                               size_t len,
2092e1051a39Sopenharmony_ci#                               unsigned char *out,
2093e1051a39Sopenharmony_ci#                               const void *key,
2094e1051a39Sopenharmony_ci#                               unsigned char ivec[16],
2095e1051a39Sopenharmony_ci#                               u64 *Xi);
2096e1051a39Sopenharmony_ci#
2097e1051a39Sopenharmony_ci$code.=<<___;
2098e1051a39Sopenharmony_ci.global aes_gcm_enc_192_kernel
2099e1051a39Sopenharmony_ci.type   aes_gcm_enc_192_kernel,%function
2100e1051a39Sopenharmony_ci.align  4
2101e1051a39Sopenharmony_ciaes_gcm_enc_192_kernel:
2102e1051a39Sopenharmony_ci	cbz     x1, .L192_enc_ret
2103e1051a39Sopenharmony_ci	stp     x19, x20, [sp, #-112]!
2104e1051a39Sopenharmony_ci	mov     x16, x4
2105e1051a39Sopenharmony_ci	mov     x8, x5
2106e1051a39Sopenharmony_ci	stp     x21, x22, [sp, #16]
2107e1051a39Sopenharmony_ci	stp     x23, x24, [sp, #32]
2108e1051a39Sopenharmony_ci	stp     d8, d9, [sp, #48]
2109e1051a39Sopenharmony_ci	stp     d10, d11, [sp, #64]
2110e1051a39Sopenharmony_ci	stp     d12, d13, [sp, #80]
2111e1051a39Sopenharmony_ci	stp     d14, d15, [sp, #96]
2112e1051a39Sopenharmony_ci
2113e1051a39Sopenharmony_ci	ldp     $ctr96_b64x, $ctr96_t32x, [$counter]             @ ctr96_b64, ctr96_t32
2114e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
2115e1051a39Sopenharmony_ci	rev     $ctr96_b64x, $ctr96_b64x
2116e1051a39Sopenharmony_ci	rev     $ctr96_t32x, $ctr96_t32x
2117e1051a39Sopenharmony_ci#endif
2118e1051a39Sopenharmony_ci	ldp     $rk12_l, $rk12_h, [$cc, #192]                     @ load rk12
2119e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
2120e1051a39Sopenharmony_ci	ror     $rk12_l, $rk12_l, #32
2121e1051a39Sopenharmony_ci	ror     $rk12_h, $rk12_h, #32
2122e1051a39Sopenharmony_ci#endif
2123e1051a39Sopenharmony_ci	ld1     {$rk0s}, [$cc], #16	                             @ load rk0
2124e1051a39Sopenharmony_ci
2125e1051a39Sopenharmony_ci	ld1     {$rk1s}, [$cc], #16	                             @ load rk1
2126e1051a39Sopenharmony_ci
2127e1051a39Sopenharmony_ci	ld1     {$rk2s}, [$cc], #16	                             @ load rk2
2128e1051a39Sopenharmony_ci
2129e1051a39Sopenharmony_ci	lsr     $rctr32x, $ctr96_t32x, #32
2130e1051a39Sopenharmony_ci	ld1     {$rk3s}, [$cc], #16	                             @ load rk3
2131e1051a39Sopenharmony_ci	orr     $ctr96_t32w, $ctr96_t32w, $ctr96_t32w
2132e1051a39Sopenharmony_ci
2133e1051a39Sopenharmony_ci	ld1     {$rk4s}, [$cc], #16	                             @ load rk4
2134e1051a39Sopenharmony_ci	rev     $rctr32w, $rctr32w                               @ rev_ctr32
2135e1051a39Sopenharmony_ci
2136e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                           @ increment rev_ctr32
2137e1051a39Sopenharmony_ci	fmov    $ctr3d, $ctr96_b64x                              @ CTR block 3
2138e1051a39Sopenharmony_ci
2139e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                @ CTR block 1
2140e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                           @ CTR block 1
2141e1051a39Sopenharmony_ci	fmov    $ctr1d, $ctr96_b64x                              @ CTR block 1
2142e1051a39Sopenharmony_ci
2143e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32           @ CTR block 1
2144e1051a39Sopenharmony_ci	ld1     { $ctr0b}, [$counter]                            @ special case vector load initial counter so we can start first AES block as quickly as possible
2145e1051a39Sopenharmony_ci
2146e1051a39Sopenharmony_ci	fmov    $ctr1.d[1], $ctr32x                              @ CTR block 1
2147e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                @ CTR block 2
2148e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                           @ CTR block 2
2149e1051a39Sopenharmony_ci
2150e1051a39Sopenharmony_ci	fmov    $ctr2d, $ctr96_b64x                              @ CTR block 2
2151e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32           @ CTR block 2
2152e1051a39Sopenharmony_ci
2153e1051a39Sopenharmony_ci	fmov    $ctr2.d[1], $ctr32x                              @ CTR block 2
2154e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                @ CTR block 3
2155e1051a39Sopenharmony_ci
2156e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32           @ CTR block 3
2157e1051a39Sopenharmony_ci	ld1     {$rk5s}, [$cc], #16	                             @ load rk5
2158e1051a39Sopenharmony_ci
2159e1051a39Sopenharmony_ci	fmov    $ctr3.d[1], $ctr32x                              @ CTR block 3
2160e1051a39Sopenharmony_ci
2161e1051a39Sopenharmony_ci	ld1     {$rk6s}, [$cc], #16	                             @ load rk6
2162e1051a39Sopenharmony_ci
2163e1051a39Sopenharmony_ci	ld1     {$rk7s}, [$cc], #16	                             @ load rk7
2164e1051a39Sopenharmony_ci
2165e1051a39Sopenharmony_ci	aese    $ctr0b, $rk0  \n  aesmc   $ctr0b, $ctr0b         @ AES block 0 - round 0
2166e1051a39Sopenharmony_ci	ld1     { $acc_lb}, [$current_tag]
2167e1051a39Sopenharmony_ci	ext     $acc_lb, $acc_lb, $acc_lb, #8
2168e1051a39Sopenharmony_ci	rev64   $acc_lb, $acc_lb
2169e1051a39Sopenharmony_ci
2170e1051a39Sopenharmony_ci	aese    $ctr3b, $rk0  \n  aesmc   $ctr3b, $ctr3b         @ AES block 3 - round 0
2171e1051a39Sopenharmony_ci	ld1     {$rk8s}, [$cc], #16	                             @ load rk8
2172e1051a39Sopenharmony_ci
2173e1051a39Sopenharmony_ci	aese    $ctr1b, $rk0  \n  aesmc   $ctr1b, $ctr1b         @ AES block 1 - round 0
2174e1051a39Sopenharmony_ci	ldr     $h4q, [$current_tag, #112]                       @ load h4l | h4h
2175e1051a39Sopenharmony_ci#ifndef __AARCH64EB__
2176e1051a39Sopenharmony_ci	ext     $h4b, $h4b, $h4b, #8
2177e1051a39Sopenharmony_ci#endif
2178e1051a39Sopenharmony_ci	aese    $ctr2b, $rk0  \n  aesmc   $ctr2b, $ctr2b         @ AES block 2 - round 0
2179e1051a39Sopenharmony_ci	ld1     {$rk9s}, [$cc], #16	                             @ load rk9
2180e1051a39Sopenharmony_ci
2181e1051a39Sopenharmony_ci	aese    $ctr0b, $rk1  \n  aesmc   $ctr0b, $ctr0b         @ AES block 0 - round 1
2182e1051a39Sopenharmony_ci	ld1     {$rk10s}, [$cc], #16	                         @ load rk10
2183e1051a39Sopenharmony_ci
2184e1051a39Sopenharmony_ci	aese    $ctr1b, $rk1  \n  aesmc   $ctr1b, $ctr1b         @ AES block 1 - round 1
2185e1051a39Sopenharmony_ci	ldr     $h1q, [$current_tag, #32]                        @ load h1l | h1h
2186e1051a39Sopenharmony_ci#ifndef __AARCH64EB__
2187e1051a39Sopenharmony_ci	ext     $h1b, $h1b, $h1b, #8
2188e1051a39Sopenharmony_ci#endif
2189e1051a39Sopenharmony_ci	aese    $ctr2b, $rk1  \n  aesmc   $ctr2b, $ctr2b         @ AES block 2 - round 1
2190e1051a39Sopenharmony_ci	ld1     {$rk11s}, [$cc], #16	                         @ load rk11
2191e1051a39Sopenharmony_ci
2192e1051a39Sopenharmony_ci	aese    $ctr3b, $rk1  \n  aesmc   $ctr3b, $ctr3b         @ AES block 3 - round 1
2193e1051a39Sopenharmony_ci	ldr     $h3q, [$current_tag, #80]                        @ load h3l | h3h
2194e1051a39Sopenharmony_ci#ifndef __AARCH64EB__
2195e1051a39Sopenharmony_ci	ext     $h3b, $h3b, $h3b, #8
2196e1051a39Sopenharmony_ci#endif
2197e1051a39Sopenharmony_ci	aese    $ctr0b, $rk2  \n  aesmc   $ctr0b, $ctr0b         @ AES block 0 - round 2
2198e1051a39Sopenharmony_ci
2199e1051a39Sopenharmony_ci	aese    $ctr2b, $rk2  \n  aesmc   $ctr2b, $ctr2b         @ AES block 2 - round 2
2200e1051a39Sopenharmony_ci
2201e1051a39Sopenharmony_ci	aese    $ctr3b, $rk2  \n  aesmc   $ctr3b, $ctr3b         @ AES block 3 - round 2
2202e1051a39Sopenharmony_ci
2203e1051a39Sopenharmony_ci	aese    $ctr0b, $rk3  \n  aesmc   $ctr0b, $ctr0b         @ AES block 0 - round 3
2204e1051a39Sopenharmony_ci	trn1    $acc_h.2d, $h3.2d,    $h4.2d                     @ h4h | h3h
2205e1051a39Sopenharmony_ci
2206e1051a39Sopenharmony_ci	aese    $ctr2b, $rk3  \n  aesmc   $ctr2b, $ctr2b         @ AES block 2 - round 3
2207e1051a39Sopenharmony_ci
2208e1051a39Sopenharmony_ci	aese    $ctr1b, $rk2  \n  aesmc   $ctr1b, $ctr1b         @ AES block 1 - round 2
2209e1051a39Sopenharmony_ci	trn2    $h34k.2d,  $h3.2d,    $h4.2d                     @ h4l | h3l
2210e1051a39Sopenharmony_ci
2211e1051a39Sopenharmony_ci	aese    $ctr0b, $rk4  \n  aesmc   $ctr0b, $ctr0b         @ AES block 0 - round 4
2212e1051a39Sopenharmony_ci
2213e1051a39Sopenharmony_ci	aese    $ctr3b, $rk3  \n  aesmc   $ctr3b, $ctr3b         @ AES block 3 - round 3
2214e1051a39Sopenharmony_ci
2215e1051a39Sopenharmony_ci	aese    $ctr1b, $rk3  \n  aesmc   $ctr1b, $ctr1b         @ AES block 1 - round 3
2216e1051a39Sopenharmony_ci
2217e1051a39Sopenharmony_ci	aese    $ctr0b, $rk5  \n  aesmc   $ctr0b, $ctr0b         @ AES block 0 - round 5
2218e1051a39Sopenharmony_ci
2219e1051a39Sopenharmony_ci	aese    $ctr2b, $rk4  \n  aesmc   $ctr2b, $ctr2b         @ AES block 2 - round 4
2220e1051a39Sopenharmony_ci
2221e1051a39Sopenharmony_ci	aese    $ctr1b, $rk4  \n  aesmc   $ctr1b, $ctr1b         @ AES block 1 - round 4
2222e1051a39Sopenharmony_ci
2223e1051a39Sopenharmony_ci	aese    $ctr0b, $rk6  \n  aesmc   $ctr0b, $ctr0b         @ AES block 0 - round 6
2224e1051a39Sopenharmony_ci
2225e1051a39Sopenharmony_ci	aese    $ctr3b, $rk4  \n  aesmc   $ctr3b, $ctr3b         @ AES block 3 - round 4
2226e1051a39Sopenharmony_ci
2227e1051a39Sopenharmony_ci	aese    $ctr2b, $rk5  \n  aesmc   $ctr2b, $ctr2b         @ AES block 2 - round 5
2228e1051a39Sopenharmony_ci
2229e1051a39Sopenharmony_ci	aese    $ctr1b, $rk5  \n  aesmc   $ctr1b, $ctr1b         @ AES block 1 - round 5
2230e1051a39Sopenharmony_ci
2231e1051a39Sopenharmony_ci	aese    $ctr3b, $rk5  \n  aesmc   $ctr3b, $ctr3b         @ AES block 3 - round 5
2232e1051a39Sopenharmony_ci
2233e1051a39Sopenharmony_ci	aese    $ctr2b, $rk6  \n  aesmc   $ctr2b, $ctr2b         @ AES block 2 - round 6
2234e1051a39Sopenharmony_ci	ldr     $h2q, [$current_tag, #64]                        @ load h2l | h2h
2235e1051a39Sopenharmony_ci#ifndef __AARCH64EB__
2236e1051a39Sopenharmony_ci	ext     $h2b, $h2b, $h2b, #8
2237e1051a39Sopenharmony_ci#endif
2238e1051a39Sopenharmony_ci	aese    $ctr1b, $rk6  \n  aesmc   $ctr1b, $ctr1b         @ AES block 1 - round 6
2239e1051a39Sopenharmony_ci
2240e1051a39Sopenharmony_ci	aese    $ctr3b, $rk6  \n  aesmc   $ctr3b, $ctr3b         @ AES block 3 - round 6
2241e1051a39Sopenharmony_ci
2242e1051a39Sopenharmony_ci	aese    $ctr0b, $rk7  \n  aesmc   $ctr0b, $ctr0b         @ AES block 0 - round 7
2243e1051a39Sopenharmony_ci
2244e1051a39Sopenharmony_ci	aese    $ctr1b, $rk7  \n  aesmc   $ctr1b, $ctr1b         @ AES block 1 - round 7
2245e1051a39Sopenharmony_ci	trn2    $h12k.2d,  $h1.2d,    $h2.2d                     @ h2l | h1l
2246e1051a39Sopenharmony_ci
2247e1051a39Sopenharmony_ci	aese    $ctr3b, $rk7  \n  aesmc   $ctr3b, $ctr3b         @ AES block 3 - round 7
2248e1051a39Sopenharmony_ci
2249e1051a39Sopenharmony_ci	aese    $ctr0b, $rk8  \n  aesmc   $ctr0b, $ctr0b         @ AES block 0 - round 8
2250e1051a39Sopenharmony_ci
2251e1051a39Sopenharmony_ci	aese    $ctr2b, $rk7  \n  aesmc   $ctr2b, $ctr2b         @ AES block 2 - round 7
2252e1051a39Sopenharmony_ci	trn1    $t0.2d,    $h1.2d,    $h2.2d                     @ h2h | h1h
2253e1051a39Sopenharmony_ci
2254e1051a39Sopenharmony_ci	aese    $ctr1b, $rk8  \n  aesmc   $ctr1b, $ctr1b         @ AES block 1 - round 8
2255e1051a39Sopenharmony_ci
2256e1051a39Sopenharmony_ci	aese    $ctr3b, $rk8  \n  aesmc   $ctr3b, $ctr3b         @ AES block 3 - round 8
2257e1051a39Sopenharmony_ci
2258e1051a39Sopenharmony_ci	aese    $ctr2b, $rk8  \n  aesmc   $ctr2b, $ctr2b         @ AES block 2 - round 8
2259e1051a39Sopenharmony_ci
2260e1051a39Sopenharmony_ci	aese    $ctr0b, $rk9  \n  aesmc   $ctr0b, $ctr0b         @ AES block 0 - round 9
2261e1051a39Sopenharmony_ci
2262e1051a39Sopenharmony_ci	aese    $ctr3b, $rk9  \n  aesmc   $ctr3b, $ctr3b         @ AES block 3 - round 9
2263e1051a39Sopenharmony_ci
2264e1051a39Sopenharmony_ci	aese    $ctr2b, $rk9  \n  aesmc   $ctr2b, $ctr2b         @ AES block 2 - round 9
2265e1051a39Sopenharmony_ci
2266e1051a39Sopenharmony_ci	aese    $ctr1b, $rk9  \n  aesmc   $ctr1b, $ctr1b         @ AES block 1 - round 9
2267e1051a39Sopenharmony_ci
2268e1051a39Sopenharmony_ci	aese    $ctr0b, $rk10 \n  aesmc   $ctr0b, $ctr0b         @ AES block 0 - round 10
2269e1051a39Sopenharmony_ci
2270e1051a39Sopenharmony_ci	aese    $ctr2b, $rk10 \n  aesmc   $ctr2b, $ctr2b         @ AES block 2 - round 10
2271e1051a39Sopenharmony_ci
2272e1051a39Sopenharmony_ci	aese    $ctr1b, $rk10 \n  aesmc   $ctr1b, $ctr1b         @ AES block 1 - round 10
2273e1051a39Sopenharmony_ci	lsr     $main_end_input_ptr, $bit_length, #3             @ byte_len
2274e1051a39Sopenharmony_ci	mov     $len, $main_end_input_ptr
2275e1051a39Sopenharmony_ci
2276e1051a39Sopenharmony_ci	aese    $ctr3b, $rk10 \n  aesmc   $ctr3b, $ctr3b         @ AES block 3 - round 10
2277e1051a39Sopenharmony_ci	sub     $main_end_input_ptr, $main_end_input_ptr, #1     @ byte_len - 1
2278e1051a39Sopenharmony_ci
2279e1051a39Sopenharmony_ci	eor     $h12k.16b, $h12k.16b, $t0.16b                    @ h2k | h1k
2280e1051a39Sopenharmony_ci	and     $main_end_input_ptr, $main_end_input_ptr, #0xffffffffffffffc0   @ number of bytes to be processed in main loop (at least 1 byte must be handled by tail)
2281e1051a39Sopenharmony_ci
2282e1051a39Sopenharmony_ci	eor     $h34k.16b, $h34k.16b, $acc_h.16b                 @ h4k | h3k
2283e1051a39Sopenharmony_ci
2284e1051a39Sopenharmony_ci	aese    $ctr2b, $rk11                                    @ AES block 2 - round 11
2285e1051a39Sopenharmony_ci	add     $end_input_ptr, $input_ptr, $bit_length, lsr #3  @ end_input_ptr
2286e1051a39Sopenharmony_ci	add     $main_end_input_ptr, $main_end_input_ptr, $input_ptr
2287e1051a39Sopenharmony_ci
2288e1051a39Sopenharmony_ci	aese    $ctr1b, $rk11                                    @ AES block 1 - round 11
2289e1051a39Sopenharmony_ci	cmp     $input_ptr, $main_end_input_ptr                  @ check if we have <= 4 blocks
2290e1051a39Sopenharmony_ci
2291e1051a39Sopenharmony_ci	aese    $ctr0b, $rk11                                    @ AES block 0 - round 11
2292e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                           @ CTR block 3
2293e1051a39Sopenharmony_ci
2294e1051a39Sopenharmony_ci	aese    $ctr3b, $rk11                                    @ AES block 3 - round 11
2295e1051a39Sopenharmony_ci	b.ge    .L192_enc_tail                                   @ handle tail
2296e1051a39Sopenharmony_ci
2297e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                @ CTR block 4
2298e1051a39Sopenharmony_ci	ldp     $input_l0, $input_h0, [$input_ptr, #0]           @ AES block 0 - load plaintext
2299e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
2300e1051a39Sopenharmony_ci	rev     $input_l0, $input_l0
2301e1051a39Sopenharmony_ci	rev     $input_h0, $input_h0
2302e1051a39Sopenharmony_ci#endif
2303e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32           @ CTR block 4
2304e1051a39Sopenharmony_ci	ldp     $input_l2, $input_h2, [$input_ptr, #32]          @ AES block 2 - load plaintext
2305e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
2306e1051a39Sopenharmony_ci	rev     $input_l2, $input_l2
2307e1051a39Sopenharmony_ci	rev     $input_h2, $input_h2
2308e1051a39Sopenharmony_ci#endif
2309e1051a39Sopenharmony_ci	ldp     $input_l3, $input_h3, [$input_ptr, #48]          @ AES block 3 - load plaintext
2310e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
2311e1051a39Sopenharmony_ci	rev     $input_l3, $input_l3
2312e1051a39Sopenharmony_ci	rev     $input_h3, $input_h3
2313e1051a39Sopenharmony_ci#endif
2314e1051a39Sopenharmony_ci	ldp     $input_l1, $input_h1, [$input_ptr, #16]          @ AES block 1 - load plaintext
2315e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
2316e1051a39Sopenharmony_ci	rev     $input_l1, $input_l1
2317e1051a39Sopenharmony_ci	rev     $input_h1, $input_h1
2318e1051a39Sopenharmony_ci#endif
2319e1051a39Sopenharmony_ci	add     $input_ptr, $input_ptr, #64                      @ AES input_ptr update
2320e1051a39Sopenharmony_ci	cmp     $input_ptr, $main_end_input_ptr                  @ check if we have <= 8 blocks
2321e1051a39Sopenharmony_ci
2322e1051a39Sopenharmony_ci	eor     $input_l0, $input_l0, $rk12_l                    @ AES block 0 - round 12 low
2323e1051a39Sopenharmony_ci
2324e1051a39Sopenharmony_ci	eor     $input_h0, $input_h0, $rk12_h                    @ AES block 0 - round 12 high
2325e1051a39Sopenharmony_ci	eor     $input_h2, $input_h2, $rk12_h                    @ AES block 2 - round 12 high
2326e1051a39Sopenharmony_ci	fmov    $ctr_t0d, $input_l0                              @ AES block 0 - mov low
2327e1051a39Sopenharmony_ci
2328e1051a39Sopenharmony_ci	eor     $input_h3, $input_h3, $rk12_h                    @ AES block 3 - round 12 high
2329e1051a39Sopenharmony_ci	fmov    $ctr_t0.d[1], $input_h0                          @ AES block 0 - mov high
2330e1051a39Sopenharmony_ci
2331e1051a39Sopenharmony_ci	eor     $input_l2, $input_l2, $rk12_l                    @ AES block 2 - round 12 low
2332e1051a39Sopenharmony_ci	eor     $input_l1, $input_l1, $rk12_l                    @ AES block 1 - round 12 low
2333e1051a39Sopenharmony_ci
2334e1051a39Sopenharmony_ci	fmov    $ctr_t1d, $input_l1                              @ AES block 1 - mov low
2335e1051a39Sopenharmony_ci	eor     $input_h1, $input_h1, $rk12_h                    @ AES block 1 - round 12 high
2336e1051a39Sopenharmony_ci
2337e1051a39Sopenharmony_ci	fmov    $ctr_t1.d[1], $input_h1                          @ AES block 1 - mov high
2338e1051a39Sopenharmony_ci
2339e1051a39Sopenharmony_ci	eor     $input_l3, $input_l3, $rk12_l                    @ AES block 3 - round 12 low
2340e1051a39Sopenharmony_ci	fmov    $ctr_t2d, $input_l2                              @ AES block 2 - mov low
2341e1051a39Sopenharmony_ci
2342e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                           @ CTR block 4
2343e1051a39Sopenharmony_ci	eor     $res0b, $ctr_t0b, $ctr0b                         @ AES block 0 - result
2344e1051a39Sopenharmony_ci	fmov    $ctr0d, $ctr96_b64x                              @ CTR block 4
2345e1051a39Sopenharmony_ci
2346e1051a39Sopenharmony_ci	fmov    $ctr0.d[1], $ctr32x                              @ CTR block 4
2347e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                @ CTR block 5
2348e1051a39Sopenharmony_ci
2349e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32           @ CTR block 5
2350e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                           @ CTR block 5
2351e1051a39Sopenharmony_ci
2352e1051a39Sopenharmony_ci	fmov    $ctr_t3d, $input_l3                              @ AES block 3 - mov low
2353e1051a39Sopenharmony_ci	st1     { $res0b}, [$output_ptr], #16                    @ AES block 0 - store result
2354e1051a39Sopenharmony_ci
2355e1051a39Sopenharmony_ci	fmov    $ctr_t2.d[1], $input_h2                          @ AES block 2 - mov high
2356e1051a39Sopenharmony_ci
2357e1051a39Sopenharmony_ci	eor     $res1b, $ctr_t1b, $ctr1b                         @ AES block 1 - result
2358e1051a39Sopenharmony_ci	fmov    $ctr1d, $ctr96_b64x                              @ CTR block 5
2359e1051a39Sopenharmony_ci	st1     { $res1b}, [$output_ptr], #16                    @ AES block 1 - store result
2360e1051a39Sopenharmony_ci
2361e1051a39Sopenharmony_ci	fmov    $ctr_t3.d[1], $input_h3                          @ AES block 3 - mov high
2362e1051a39Sopenharmony_ci
2363e1051a39Sopenharmony_ci	fmov    $ctr1.d[1], $ctr32x                              @ CTR block 5
2364e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                @ CTR block 6
2365e1051a39Sopenharmony_ci
2366e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32           @ CTR block 6
2367e1051a39Sopenharmony_ci
2368e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                           @ CTR block 6
2369e1051a39Sopenharmony_ci	eor     $res2b, $ctr_t2b, $ctr2b                         @ AES block 2 - result
2370e1051a39Sopenharmony_ci	fmov    $ctr2d, $ctr96_b64x                              @ CTR block 6
2371e1051a39Sopenharmony_ci
2372e1051a39Sopenharmony_ci	fmov    $ctr2.d[1], $ctr32x                              @ CTR block 6
2373e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                @ CTR block 7
2374e1051a39Sopenharmony_ci
2375e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32           @ CTR block 7
2376e1051a39Sopenharmony_ci	st1     { $res2b}, [$output_ptr], #16                    @ AES block 2 - store result
2377e1051a39Sopenharmony_ci
2378e1051a39Sopenharmony_ci	eor     $res3b, $ctr_t3b, $ctr3b                         @ AES block 3 - result
2379e1051a39Sopenharmony_ci	st1     { $res3b}, [$output_ptr], #16                    @ AES block 3 - store result
2380e1051a39Sopenharmony_ci	b.ge    .L192_enc_prepretail                             @ do prepretail
2381e1051a39Sopenharmony_ci
2382e1051a39Sopenharmony_ci	.L192_enc_main_loop:                                     @ main loop start
2383e1051a39Sopenharmony_ci	aese    $ctr2b, $rk0  \n  aesmc   $ctr2b, $ctr2b         @ AES block 4k+6 - round 0
2384e1051a39Sopenharmony_ci	rev64   $res1b, $res1b                                   @ GHASH block 4k+1 (t0 and t1 free)
2385e1051a39Sopenharmony_ci
2386e1051a39Sopenharmony_ci	aese    $ctr1b, $rk0  \n  aesmc   $ctr1b, $ctr1b         @ AES block 4k+5 - round 0
2387e1051a39Sopenharmony_ci	ldp     $input_l1, $input_h1, [$input_ptr, #16]          @ AES block 4k+5 - load plaintext
2388e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
2389e1051a39Sopenharmony_ci	rev     $input_l1, $input_l1
2390e1051a39Sopenharmony_ci	rev     $input_h1, $input_h1
2391e1051a39Sopenharmony_ci#endif
2392e1051a39Sopenharmony_ci	ext     $acc_lb, $acc_lb, $acc_lb, #8                    @ PRE 0
2393e1051a39Sopenharmony_ci	fmov    $ctr3d, $ctr96_b64x                              @ CTR block 4k+3
2394e1051a39Sopenharmony_ci	rev64   $res0b, $res0b                                   @ GHASH block 4k (only t0 is free)
2395e1051a39Sopenharmony_ci
2396e1051a39Sopenharmony_ci	aese    $ctr2b, $rk1  \n  aesmc   $ctr2b, $ctr2b         @ AES block 4k+6 - round 1
2397e1051a39Sopenharmony_ci	fmov    $ctr3.d[1], $ctr32x                              @ CTR block 4k+3
2398e1051a39Sopenharmony_ci
2399e1051a39Sopenharmony_ci	pmull2  $t1.1q, $res1.2d, $h3.2d                         @ GHASH block 4k+1 - high
2400e1051a39Sopenharmony_ci	rev64   $res3b, $res3b                                   @ GHASH block 4k+3 (t0, t1, t2 and t3 free)
2401e1051a39Sopenharmony_ci	ldp     $input_l2, $input_h2, [$input_ptr, #32]          @ AES block 4k+6 - load plaintext
2402e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
2403e1051a39Sopenharmony_ci	rev     $input_l2, $input_l2
2404e1051a39Sopenharmony_ci	rev     $input_h2, $input_h2
2405e1051a39Sopenharmony_ci#endif
2406e1051a39Sopenharmony_ci	aese    $ctr0b, $rk0  \n  aesmc   $ctr0b, $ctr0b         @ AES block 4k+4 - round 0
2407e1051a39Sopenharmony_ci	ldp     $input_l3, $input_h3, [$input_ptr, #48]          @ AES block 4k+3 - load plaintext
2408e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
2409e1051a39Sopenharmony_ci	rev     $input_l3, $input_l3
2410e1051a39Sopenharmony_ci	rev     $input_h3, $input_h3
2411e1051a39Sopenharmony_ci#endif
2412e1051a39Sopenharmony_ci	pmull   $t2.1q, $res1.1d, $h3.1d                         @ GHASH block 4k+1 - low
2413e1051a39Sopenharmony_ci	eor     $res0b, $res0b, $acc_lb                          @ PRE 1
2414e1051a39Sopenharmony_ci
2415e1051a39Sopenharmony_ci	aese    $ctr1b, $rk1  \n  aesmc   $ctr1b, $ctr1b         @ AES block 4k+5 - round 1
2416e1051a39Sopenharmony_ci
2417e1051a39Sopenharmony_ci	aese    $ctr0b, $rk1  \n  aesmc   $ctr0b, $ctr0b         @ AES block 4k+4 - round 1
2418e1051a39Sopenharmony_ci	rev64   $res2b, $res2b                                   @ GHASH block 4k+2 (t0, t1, and t2 free)
2419e1051a39Sopenharmony_ci
2420e1051a39Sopenharmony_ci	aese    $ctr3b, $rk0  \n  aesmc   $ctr3b, $ctr3b         @ AES block 4k+7 - round 0
2421e1051a39Sopenharmony_ci	eor     $input_h3, $input_h3, $rk12_h                    @ AES block 4k+3 - round 12 high
2422e1051a39Sopenharmony_ci
2423e1051a39Sopenharmony_ci	pmull   $acc_l.1q, $res0.1d, $h4.1d                      @ GHASH block 4k - low
2424e1051a39Sopenharmony_ci	mov     $t0d, $res0.d[1]                                 @ GHASH block 4k - mid
2425e1051a39Sopenharmony_ci
2426e1051a39Sopenharmony_ci	aese    $ctr0b, $rk2  \n  aesmc   $ctr0b, $ctr0b         @ AES block 4k+4 - round 2
2427e1051a39Sopenharmony_ci
2428e1051a39Sopenharmony_ci	aese    $ctr3b, $rk1  \n  aesmc   $ctr3b, $ctr3b         @ AES block 4k+7 - round 1
2429e1051a39Sopenharmony_ci	eor     $input_l2, $input_l2, $rk12_l                    @ AES block 4k+6 - round 12 low
2430e1051a39Sopenharmony_ci
2431e1051a39Sopenharmony_ci	eor     $t0.8b, $t0.8b, $res0.8b                         @ GHASH block 4k - mid
2432e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $t2.16b                        @ GHASH block 4k+1 - low
2433e1051a39Sopenharmony_ci
2434e1051a39Sopenharmony_ci	aese    $ctr0b, $rk3  \n  aesmc   $ctr0b, $ctr0b         @ AES block 4k+4 - round 3
2435e1051a39Sopenharmony_ci	eor     $input_l1, $input_l1, $rk12_l                    @ AES block 4k+5 - round 12 low
2436e1051a39Sopenharmony_ci
2437e1051a39Sopenharmony_ci	aese    $ctr1b, $rk2  \n  aesmc   $ctr1b, $ctr1b         @ AES block 4k+5 - round 2
2438e1051a39Sopenharmony_ci	mov     $t6d, $res2.d[1]                                 @ GHASH block 4k+2 - mid
2439e1051a39Sopenharmony_ci
2440e1051a39Sopenharmony_ci	pmull2  $acc_h.1q, $res0.2d, $h4.2d                      @ GHASH block 4k - high
2441e1051a39Sopenharmony_ci	mov     $t3d, $res1.d[1]                                 @ GHASH block 4k+1 - mid
2442e1051a39Sopenharmony_ci
2443e1051a39Sopenharmony_ci	aese    $ctr2b, $rk2  \n  aesmc   $ctr2b, $ctr2b         @ AES block 4k+6 - round 2
2444e1051a39Sopenharmony_ci
2445e1051a39Sopenharmony_ci	aese    $ctr1b, $rk3  \n  aesmc   $ctr1b, $ctr1b         @ AES block 4k+5 - round 3
2446e1051a39Sopenharmony_ci
2447e1051a39Sopenharmony_ci	mov     $acc_md, $h34k.d[1]                              @ GHASH block 4k - mid
2448e1051a39Sopenharmony_ci	eor     $acc_hb, $acc_hb, $t1.16b                        @ GHASH block 4k+1 - high
2449e1051a39Sopenharmony_ci
2450e1051a39Sopenharmony_ci	aese    $ctr3b, $rk2  \n  aesmc   $ctr3b, $ctr3b         @ AES block 4k+7 - round 2
2451e1051a39Sopenharmony_ci	eor     $t6.8b, $t6.8b, $res2.8b                         @ GHASH block 4k+2 - mid
2452e1051a39Sopenharmony_ci
2453e1051a39Sopenharmony_ci	pmull2  $t4.1q, $res2.2d, $h2.2d                         @ GHASH block 4k+2 - high
2454e1051a39Sopenharmony_ci
2455e1051a39Sopenharmony_ci	aese    $ctr0b, $rk4  \n  aesmc   $ctr0b, $ctr0b         @ AES block 4k+4 - round 4
2456e1051a39Sopenharmony_ci	eor     $t3.8b, $t3.8b, $res1.8b                         @ GHASH block 4k+1 - mid
2457e1051a39Sopenharmony_ci
2458e1051a39Sopenharmony_ci	aese    $ctr3b, $rk3  \n  aesmc   $ctr3b, $ctr3b         @ AES block 4k+7 - round 3
2459e1051a39Sopenharmony_ci
2460e1051a39Sopenharmony_ci	pmull2  $t7.1q, $res3.2d, $h1.2d                         @ GHASH block 4k+3 - high
2461e1051a39Sopenharmony_ci	eor     $input_h1, $input_h1, $rk12_h                    @ AES block 4k+5 - round 12 high
2462e1051a39Sopenharmony_ci	ins     $t6.d[1], $t6.d[0]                               @ GHASH block 4k+2 - mid
2463e1051a39Sopenharmony_ci
2464e1051a39Sopenharmony_ci	aese    $ctr0b, $rk5  \n  aesmc   $ctr0b, $ctr0b         @ AES block 4k+4 - round 5
2465e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                           @ CTR block 4k+3
2466e1051a39Sopenharmony_ci
2467e1051a39Sopenharmony_ci	aese    $ctr3b, $rk4  \n  aesmc   $ctr3b, $ctr3b         @ AES block 4k+7 - round 4
2468e1051a39Sopenharmony_ci	eor     $acc_hb, $acc_hb, $t4.16b                        @ GHASH block 4k+2 - high
2469e1051a39Sopenharmony_ci
2470e1051a39Sopenharmony_ci	pmull   $t3.1q, $t3.1d, $h34k.1d                         @ GHASH block 4k+1 - mid
2471e1051a39Sopenharmony_ci	eor     $input_h2, $input_h2, $rk12_h                    @ AES block 4k+6 - round 12 high
2472e1051a39Sopenharmony_ci
2473e1051a39Sopenharmony_ci	pmull2  $t6.1q, $t6.2d, $h12k.2d                         @ GHASH block 4k+2 - mid
2474e1051a39Sopenharmony_ci	eor     $input_l3, $input_l3, $rk12_l                    @ AES block 4k+3 - round 12 low
2475e1051a39Sopenharmony_ci	mov     $t9d, $res3.d[1]                                 @ GHASH block 4k+3 - mid
2476e1051a39Sopenharmony_ci
2477e1051a39Sopenharmony_ci	pmull   $acc_m.1q, $t0.1d, $acc_m.1d                     @ GHASH block 4k - mid
2478e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                @ CTR block 4k+8
2479e1051a39Sopenharmony_ci
2480e1051a39Sopenharmony_ci	pmull   $t5.1q, $res2.1d, $h2.1d                         @ GHASH block 4k+2 - low
2481e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32           @ CTR block 4k+8
2482e1051a39Sopenharmony_ci
2483e1051a39Sopenharmony_ci	aese    $ctr2b, $rk3  \n  aesmc   $ctr2b, $ctr2b         @ AES block 4k+6 - round 3
2484e1051a39Sopenharmony_ci	eor     $t9.8b, $t9.8b, $res3.8b                         @ GHASH block 4k+3 - mid
2485e1051a39Sopenharmony_ci
2486e1051a39Sopenharmony_ci	aese    $ctr1b, $rk4  \n  aesmc   $ctr1b, $ctr1b         @ AES block 4k+5 - round 4
2487e1051a39Sopenharmony_ci	ldp     $input_l0, $input_h0, [$input_ptr, #0]           @ AES block 4k+4 - load plaintext
2488e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
2489e1051a39Sopenharmony_ci	rev     $input_l0, $input_l0
2490e1051a39Sopenharmony_ci	rev     $input_h0, $input_h0
2491e1051a39Sopenharmony_ci#endif
2492e1051a39Sopenharmony_ci	aese    $ctr0b, $rk6  \n  aesmc   $ctr0b, $ctr0b         @ AES block 4k+4 - round 6
2493e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $t5.16b                        @ GHASH block 4k+2 - low
2494e1051a39Sopenharmony_ci
2495e1051a39Sopenharmony_ci	aese    $ctr2b, $rk4  \n  aesmc   $ctr2b, $ctr2b         @ AES block 4k+6 - round 4
2496e1051a39Sopenharmony_ci	add     $input_ptr, $input_ptr, #64                      @ AES input_ptr update
2497e1051a39Sopenharmony_ci
2498e1051a39Sopenharmony_ci	aese    $ctr1b, $rk5  \n  aesmc   $ctr1b, $ctr1b         @ AES block 4k+5 - round 5
2499e1051a39Sopenharmony_ci	movi    $mod_constant.8b, #0xc2
2500e1051a39Sopenharmony_ci
2501e1051a39Sopenharmony_ci	pmull   $t8.1q, $res3.1d, $h1.1d                         @ GHASH block 4k+3 - low
2502e1051a39Sopenharmony_ci	eor     $input_h0, $input_h0, $rk12_h                    @ AES block 4k+4 - round 12 high
2503e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t3.16b                        @ GHASH block 4k+1 - mid
2504e1051a39Sopenharmony_ci
2505e1051a39Sopenharmony_ci	aese    $ctr2b, $rk5  \n  aesmc   $ctr2b, $ctr2b         @ AES block 4k+6 - round 5
2506e1051a39Sopenharmony_ci	eor     $input_l0, $input_l0, $rk12_l                    @ AES block 4k+4 - round 12 low
2507e1051a39Sopenharmony_ci
2508e1051a39Sopenharmony_ci	aese    $ctr1b, $rk6  \n  aesmc   $ctr1b, $ctr1b         @ AES block 4k+5 - round 6
2509e1051a39Sopenharmony_ci	shl     $mod_constantd, $mod_constantd, #56              @ mod_constant
2510e1051a39Sopenharmony_ci
2511e1051a39Sopenharmony_ci	aese    $ctr3b, $rk5  \n  aesmc   $ctr3b, $ctr3b         @ AES block 4k+7 - round 5
2512e1051a39Sopenharmony_ci	eor     $acc_hb, $acc_hb, $t7.16b                        @ GHASH block 4k+3 - high
2513e1051a39Sopenharmony_ci
2514e1051a39Sopenharmony_ci	aese    $ctr0b, $rk7  \n  aesmc   $ctr0b, $ctr0b         @ AES block 4k+4 - round 7
2515e1051a39Sopenharmony_ci	fmov    $ctr_t1d, $input_l1                              @ AES block 4k+5 - mov low
2516e1051a39Sopenharmony_ci
2517e1051a39Sopenharmony_ci	aese    $ctr1b, $rk7  \n  aesmc   $ctr1b, $ctr1b         @ AES block 4k+5 - round 7
2518e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t6.16b                        @ GHASH block 4k+2 - mid
2519e1051a39Sopenharmony_ci
2520e1051a39Sopenharmony_ci	aese    $ctr3b, $rk6  \n  aesmc   $ctr3b, $ctr3b         @ AES block 4k+7 - round 6
2521e1051a39Sopenharmony_ci	fmov    $ctr_t1.d[1], $input_h1                          @ AES block 4k+5 - mov high
2522e1051a39Sopenharmony_ci
2523e1051a39Sopenharmony_ci	aese    $ctr0b, $rk8  \n  aesmc   $ctr0b, $ctr0b         @ AES block 4k+4 - round 8
2524e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $t8.16b                        @ GHASH block 4k+3 - low
2525e1051a39Sopenharmony_ci
2526e1051a39Sopenharmony_ci	pmull   $t9.1q, $t9.1d, $h12k.1d                         @ GHASH block 4k+3 - mid
2527e1051a39Sopenharmony_ci	cmp     $input_ptr, $main_end_input_ptr                  @ LOOP CONTROL
2528e1051a39Sopenharmony_ci	fmov    $ctr_t0d, $input_l0                              @ AES block 4k+4 - mov low
2529e1051a39Sopenharmony_ci
2530e1051a39Sopenharmony_ci	aese    $ctr2b, $rk6  \n  aesmc   $ctr2b, $ctr2b         @ AES block 4k+6 - round 6
2531e1051a39Sopenharmony_ci	fmov    $ctr_t0.d[1], $input_h0                          @ AES block 4k+4 - mov high
2532e1051a39Sopenharmony_ci
2533e1051a39Sopenharmony_ci	aese    $ctr1b, $rk8  \n  aesmc   $ctr1b, $ctr1b         @ AES block 4k+5 - round 8
2534e1051a39Sopenharmony_ci	fmov    $ctr_t3d, $input_l3                              @ AES block 4k+3 - mov low
2535e1051a39Sopenharmony_ci
2536e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t9.16b                        @ GHASH block 4k+3 - mid
2537e1051a39Sopenharmony_ci	eor     $t9.16b, $acc_lb, $acc_hb                        @ MODULO - karatsuba tidy up
2538e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                           @ CTR block 4k+8
2539e1051a39Sopenharmony_ci
2540e1051a39Sopenharmony_ci	aese    $ctr2b, $rk7  \n  aesmc   $ctr2b, $ctr2b         @ AES block 4k+6 - round 7
2541e1051a39Sopenharmony_ci	fmov    $ctr_t3.d[1], $input_h3                          @ AES block 4k+3 - mov high
2542e1051a39Sopenharmony_ci
2543e1051a39Sopenharmony_ci	pmull   $mod_t.1q, $acc_h.1d, $mod_constant.1d           @ MODULO - top 64b align with mid
2544e1051a39Sopenharmony_ci	ext     $acc_hb, $acc_hb, $acc_hb, #8                    @ MODULO - other top alignment
2545e1051a39Sopenharmony_ci	fmov    $ctr_t2d, $input_l2                              @ AES block 4k+6 - mov low
2546e1051a39Sopenharmony_ci
2547e1051a39Sopenharmony_ci	aese    $ctr3b, $rk7  \n  aesmc   $ctr3b, $ctr3b         @ AES block 4k+7 - round 7
2548e1051a39Sopenharmony_ci
2549e1051a39Sopenharmony_ci	aese    $ctr0b, $rk9  \n  aesmc   $ctr0b, $ctr0b         @ AES block 4k+4 - round 9
2550e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t9.16b                        @ MODULO - karatsuba tidy up
2551e1051a39Sopenharmony_ci
2552e1051a39Sopenharmony_ci	aese    $ctr2b, $rk8  \n  aesmc   $ctr2b, $ctr2b         @ AES block 4k+6 - round 8
2553e1051a39Sopenharmony_ci
2554e1051a39Sopenharmony_ci	aese    $ctr3b, $rk8  \n  aesmc   $ctr3b, $ctr3b         @ AES block 4k+7 - round 8
2555e1051a39Sopenharmony_ci
2556e1051a39Sopenharmony_ci	aese    $ctr1b, $rk9  \n  aesmc   $ctr1b, $ctr1b         @ AES block 4k+5 - round 9
2557e1051a39Sopenharmony_ci
2558e1051a39Sopenharmony_ci	aese    $ctr0b, $rk10 \n  aesmc   $ctr0b, $ctr0b         @ AES block 4k+4 - round 10
2559e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $mod_t.16b                     @ MODULO - fold into mid
2560e1051a39Sopenharmony_ci
2561e1051a39Sopenharmony_ci	aese    $ctr3b, $rk9  \n  aesmc   $ctr3b, $ctr3b         @ AES block 4k+7 - round 9
2562e1051a39Sopenharmony_ci
2563e1051a39Sopenharmony_ci	aese    $ctr2b, $rk9  \n  aesmc   $ctr2b, $ctr2b         @ AES block 4k+6 - round 9
2564e1051a39Sopenharmony_ci
2565e1051a39Sopenharmony_ci	aese    $ctr0b, $rk11                                    @ AES block 4k+4 - round 11
2566e1051a39Sopenharmony_ci
2567e1051a39Sopenharmony_ci	aese    $ctr1b, $rk10 \n  aesmc   $ctr1b, $ctr1b         @ AES block 4k+5 - round 10
2568e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $acc_hb                        @ MODULO - fold into mid
2569e1051a39Sopenharmony_ci
2570e1051a39Sopenharmony_ci	aese    $ctr2b, $rk10 \n  aesmc   $ctr2b, $ctr2b         @ AES block 4k+6 - round 10
2571e1051a39Sopenharmony_ci
2572e1051a39Sopenharmony_ci	eor     $res0b, $ctr_t0b, $ctr0b                         @ AES block 4k+4 - result
2573e1051a39Sopenharmony_ci	fmov    $ctr0d, $ctr96_b64x                              @ CTR block 4k+8
2574e1051a39Sopenharmony_ci
2575e1051a39Sopenharmony_ci	aese    $ctr1b, $rk11                                    @ AES block 4k+5 - round 11
2576e1051a39Sopenharmony_ci	fmov    $ctr0.d[1], $ctr32x                              @ CTR block 4k+8
2577e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                @ CTR block 4k+9
2578e1051a39Sopenharmony_ci
2579e1051a39Sopenharmony_ci	pmull   $acc_h.1q, $acc_m.1d, $mod_constant.1d           @ MODULO - mid 64b align with low
2580e1051a39Sopenharmony_ci	fmov    $ctr_t2.d[1], $input_h2                          @ AES block 4k+6 - mov high
2581e1051a39Sopenharmony_ci	st1     { $res0b}, [$output_ptr], #16                    @ AES block 4k+4 - store result
2582e1051a39Sopenharmony_ci
2583e1051a39Sopenharmony_ci	aese    $ctr3b, $rk10 \n  aesmc   $ctr3b, $ctr3b         @ AES block 4k+7 - round 10
2584e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32           @ CTR block 4k+9
2585e1051a39Sopenharmony_ci
2586e1051a39Sopenharmony_ci	eor     $res1b, $ctr_t1b, $ctr1b                         @ AES block 4k+5 - result
2587e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                           @ CTR block 4k+9
2588e1051a39Sopenharmony_ci	fmov    $ctr1d, $ctr96_b64x                              @ CTR block 4k+9
2589e1051a39Sopenharmony_ci
2590e1051a39Sopenharmony_ci	aese    $ctr2b, $rk11                                    @ AES block 4k+6 - round 11
2591e1051a39Sopenharmony_ci	fmov    $ctr1.d[1], $ctr32x                              @ CTR block 4k+9
2592e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                @ CTR block 4k+10
2593e1051a39Sopenharmony_ci
2594e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                           @ CTR block 4k+10
2595e1051a39Sopenharmony_ci	ext     $acc_mb, $acc_mb, $acc_mb, #8                    @ MODULO - other mid alignment
2596e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32           @ CTR block 4k+10
2597e1051a39Sopenharmony_ci
2598e1051a39Sopenharmony_ci	st1     { $res1b}, [$output_ptr], #16                    @ AES block 4k+5 - store result
2599e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $acc_hb                        @ MODULO - fold into low
2600e1051a39Sopenharmony_ci
2601e1051a39Sopenharmony_ci	aese    $ctr3b, $rk11                                    @ AES block 4k+7 - round 11
2602e1051a39Sopenharmony_ci	eor     $res2b, $ctr_t2b, $ctr2b                         @ AES block 4k+6 - result
2603e1051a39Sopenharmony_ci	fmov    $ctr2d, $ctr96_b64x                              @ CTR block 4k+10
2604e1051a39Sopenharmony_ci
2605e1051a39Sopenharmony_ci	st1     { $res2b}, [$output_ptr], #16                    @ AES block 4k+6 - store result
2606e1051a39Sopenharmony_ci	fmov    $ctr2.d[1], $ctr32x                              @ CTR block 4k+10
2607e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                @ CTR block 4k+11
2608e1051a39Sopenharmony_ci
2609e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $acc_mb                        @ MODULO - fold into low
2610e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32           @ CTR block 4k+11
2611e1051a39Sopenharmony_ci
2612e1051a39Sopenharmony_ci	eor     $res3b, $ctr_t3b, $ctr3b                         @ AES block 4k+3 - result
2613e1051a39Sopenharmony_ci	st1     { $res3b}, [$output_ptr], #16                    @ AES block 4k+3 - store result
2614e1051a39Sopenharmony_ci	b.lt    .L192_enc_main_loop
2615e1051a39Sopenharmony_ci
2616e1051a39Sopenharmony_ci	.L192_enc_prepretail:                                    @ PREPRETAIL
2617e1051a39Sopenharmony_ci	aese    $ctr0b, $rk0  \n  aesmc   $ctr0b, $ctr0b         @ AES block 4k+4 - round 0
2618e1051a39Sopenharmony_ci	rev64   $res0b, $res0b                                   @ GHASH block 4k (only t0 is free)
2619e1051a39Sopenharmony_ci
2620e1051a39Sopenharmony_ci	fmov    $ctr3d, $ctr96_b64x                              @ CTR block 4k+3
2621e1051a39Sopenharmony_ci	ext     $acc_lb, $acc_lb, $acc_lb, #8                    @ PRE 0
2622e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                           @ CTR block 4k+3
2623e1051a39Sopenharmony_ci
2624e1051a39Sopenharmony_ci	aese    $ctr1b, $rk0  \n  aesmc   $ctr1b, $ctr1b         @ AES block 4k+5 - round 0
2625e1051a39Sopenharmony_ci	rev64   $res1b, $res1b                                   @ GHASH block 4k+1 (t0 and t1 free)
2626e1051a39Sopenharmony_ci
2627e1051a39Sopenharmony_ci	aese    $ctr2b, $rk0  \n  aesmc   $ctr2b, $ctr2b         @ AES block 4k+6 - round 0
2628e1051a39Sopenharmony_ci
2629e1051a39Sopenharmony_ci	fmov    $ctr3.d[1], $ctr32x                              @ CTR block 4k+3
2630e1051a39Sopenharmony_ci	eor     $res0b, $res0b, $acc_lb                          @ PRE 1
2631e1051a39Sopenharmony_ci	mov     $acc_md, $h34k.d[1]                              @ GHASH block 4k - mid
2632e1051a39Sopenharmony_ci
2633e1051a39Sopenharmony_ci	aese    $ctr1b, $rk1  \n  aesmc   $ctr1b, $ctr1b         @ AES block 4k+5 - round 1
2634e1051a39Sopenharmony_ci	rev64   $res2b, $res2b                                   @ GHASH block 4k+2 (t0, t1, and t2 free)
2635e1051a39Sopenharmony_ci
2636e1051a39Sopenharmony_ci	pmull2  $t1.1q, $res1.2d, $h3.2d                         @ GHASH block 4k+1 - high
2637e1051a39Sopenharmony_ci
2638e1051a39Sopenharmony_ci	pmull   $acc_l.1q, $res0.1d, $h4.1d                      @ GHASH block 4k - low
2639e1051a39Sopenharmony_ci	mov     $t0d, $res0.d[1]                                 @ GHASH block 4k - mid
2640e1051a39Sopenharmony_ci
2641e1051a39Sopenharmony_ci	pmull   $t2.1q, $res1.1d, $h3.1d                         @ GHASH block 4k+1 - low
2642e1051a39Sopenharmony_ci	rev64   $res3b, $res3b                                   @ GHASH block 4k+3 (t0, t1, t2 and t3 free)
2643e1051a39Sopenharmony_ci
2644e1051a39Sopenharmony_ci	pmull2  $acc_h.1q, $res0.2d, $h4.2d                      @ GHASH block 4k - high
2645e1051a39Sopenharmony_ci
2646e1051a39Sopenharmony_ci	eor     $t0.8b, $t0.8b, $res0.8b                         @ GHASH block 4k - mid
2647e1051a39Sopenharmony_ci	mov     $t3d, $res1.d[1]                                 @ GHASH block 4k+1 - mid
2648e1051a39Sopenharmony_ci
2649e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $t2.16b                        @ GHASH block 4k+1 - low
2650e1051a39Sopenharmony_ci	mov     $t6d, $res2.d[1]                                 @ GHASH block 4k+2 - mid
2651e1051a39Sopenharmony_ci
2652e1051a39Sopenharmony_ci	aese    $ctr3b, $rk0  \n  aesmc   $ctr3b, $ctr3b         @ AES block 4k+7 - round 0
2653e1051a39Sopenharmony_ci	eor     $acc_hb, $acc_hb, $t1.16b                        @ GHASH block 4k+1 - high
2654e1051a39Sopenharmony_ci
2655e1051a39Sopenharmony_ci	pmull2  $t4.1q, $res2.2d, $h2.2d                         @ GHASH block 4k+2 - high
2656e1051a39Sopenharmony_ci
2657e1051a39Sopenharmony_ci	eor     $t3.8b, $t3.8b, $res1.8b                         @ GHASH block 4k+1 - mid
2658e1051a39Sopenharmony_ci	eor     $t6.8b, $t6.8b, $res2.8b                         @ GHASH block 4k+2 - mid
2659e1051a39Sopenharmony_ci
2660e1051a39Sopenharmony_ci	aese    $ctr3b, $rk1  \n  aesmc   $ctr3b, $ctr3b         @ AES block 4k+7 - round 1
2661e1051a39Sopenharmony_ci
2662e1051a39Sopenharmony_ci	aese    $ctr2b, $rk1  \n  aesmc   $ctr2b, $ctr2b         @ AES block 4k+6 - round 1
2663e1051a39Sopenharmony_ci	eor     $acc_hb, $acc_hb, $t4.16b                        @ GHASH block 4k+2 - high
2664e1051a39Sopenharmony_ci
2665e1051a39Sopenharmony_ci	aese    $ctr0b, $rk1  \n  aesmc   $ctr0b, $ctr0b         @ AES block 4k+4 - round 1
2666e1051a39Sopenharmony_ci
2667e1051a39Sopenharmony_ci	aese    $ctr1b, $rk2  \n  aesmc   $ctr1b, $ctr1b         @ AES block 4k+5 - round 2
2668e1051a39Sopenharmony_ci	mov     $t9d, $res3.d[1]                                 @ GHASH block 4k+3 - mid
2669e1051a39Sopenharmony_ci
2670e1051a39Sopenharmony_ci	pmull2  $t7.1q, $res3.2d, $h1.2d                         @ GHASH block 4k+3 - high
2671e1051a39Sopenharmony_ci	ins     $t6.d[1], $t6.d[0]                               @ GHASH block 4k+2 - mid
2672e1051a39Sopenharmony_ci
2673e1051a39Sopenharmony_ci	aese    $ctr0b, $rk2  \n  aesmc   $ctr0b, $ctr0b         @ AES block 4k+4 - round 2
2674e1051a39Sopenharmony_ci
2675e1051a39Sopenharmony_ci	pmull   $acc_m.1q, $t0.1d, $acc_m.1d                     @ GHASH block 4k - mid
2676e1051a39Sopenharmony_ci	eor     $t9.8b, $t9.8b, $res3.8b                         @ GHASH block 4k+3 - mid
2677e1051a39Sopenharmony_ci
2678e1051a39Sopenharmony_ci	aese    $ctr1b, $rk3  \n  aesmc   $ctr1b, $ctr1b         @ AES block 4k+5 - round 3
2679e1051a39Sopenharmony_ci
2680e1051a39Sopenharmony_ci	pmull2  $t6.1q, $t6.2d, $h12k.2d                         @ GHASH block 4k+2 - mid
2681e1051a39Sopenharmony_ci
2682e1051a39Sopenharmony_ci	pmull   $t3.1q, $t3.1d, $h34k.1d                         @ GHASH block 4k+1 - mid
2683e1051a39Sopenharmony_ci
2684e1051a39Sopenharmony_ci	pmull   $t9.1q, $t9.1d, $h12k.1d                         @ GHASH block 4k+3 - mid
2685e1051a39Sopenharmony_ci	eor     $acc_hb, $acc_hb, $t7.16b                        @ GHASH block 4k+3 - high
2686e1051a39Sopenharmony_ci
2687e1051a39Sopenharmony_ci	pmull   $t5.1q, $res2.1d, $h2.1d                         @ GHASH block 4k+2 - low
2688e1051a39Sopenharmony_ci
2689e1051a39Sopenharmony_ci	aese    $ctr0b, $rk3  \n  aesmc   $ctr0b, $ctr0b         @ AES block 4k+4 - round 3
2690e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t3.16b                        @ GHASH block 4k+1 - mid
2691e1051a39Sopenharmony_ci
2692e1051a39Sopenharmony_ci	aese    $ctr3b, $rk2  \n  aesmc   $ctr3b, $ctr3b         @ AES block 4k+7 - round 2
2693e1051a39Sopenharmony_ci
2694e1051a39Sopenharmony_ci	aese    $ctr2b, $rk2  \n  aesmc   $ctr2b, $ctr2b         @ AES block 4k+6 - round 2
2695e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $t5.16b                        @ GHASH block 4k+2 - low
2696e1051a39Sopenharmony_ci
2697e1051a39Sopenharmony_ci	aese    $ctr0b, $rk4  \n  aesmc   $ctr0b, $ctr0b         @ AES block 4k+4 - round 4
2698e1051a39Sopenharmony_ci
2699e1051a39Sopenharmony_ci	aese    $ctr3b, $rk3  \n  aesmc   $ctr3b, $ctr3b         @ AES block 4k+7 - round 3
2700e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t6.16b                        @ GHASH block 4k+2 - mid
2701e1051a39Sopenharmony_ci
2702e1051a39Sopenharmony_ci	aese    $ctr2b, $rk3  \n  aesmc   $ctr2b, $ctr2b         @ AES block 4k+6 - round 3
2703e1051a39Sopenharmony_ci
2704e1051a39Sopenharmony_ci	pmull   $t8.1q, $res3.1d, $h1.1d                         @ GHASH block 4k+3 - low
2705e1051a39Sopenharmony_ci	movi    $mod_constant.8b, #0xc2
2706e1051a39Sopenharmony_ci
2707e1051a39Sopenharmony_ci	aese    $ctr3b, $rk4  \n  aesmc   $ctr3b, $ctr3b         @ AES block 4k+7 - round 4
2708e1051a39Sopenharmony_ci
2709e1051a39Sopenharmony_ci	aese    $ctr2b, $rk4  \n  aesmc   $ctr2b, $ctr2b         @ AES block 4k+6 - round 4
2710e1051a39Sopenharmony_ci
2711e1051a39Sopenharmony_ci	aese    $ctr1b, $rk4  \n  aesmc   $ctr1b, $ctr1b         @ AES block 4k+5 - round 4
2712e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t9.16b                        @ GHASH block 4k+3 - mid
2713e1051a39Sopenharmony_ci
2714e1051a39Sopenharmony_ci	aese    $ctr3b, $rk5  \n  aesmc   $ctr3b, $ctr3b         @ AES block 4k+7 - round 5
2715e1051a39Sopenharmony_ci
2716e1051a39Sopenharmony_ci	aese    $ctr2b, $rk5  \n  aesmc   $ctr2b, $ctr2b         @ AES block 4k+6 - round 5
2717e1051a39Sopenharmony_ci
2718e1051a39Sopenharmony_ci	aese    $ctr1b, $rk5  \n  aesmc   $ctr1b, $ctr1b         @ AES block 4k+5 - round 5
2719e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $t8.16b                        @ GHASH block 4k+3 - low
2720e1051a39Sopenharmony_ci
2721e1051a39Sopenharmony_ci	aese    $ctr0b, $rk5  \n  aesmc   $ctr0b, $ctr0b         @ AES block 4k+4 - round 5
2722e1051a39Sopenharmony_ci
2723e1051a39Sopenharmony_ci	aese    $ctr3b, $rk6  \n  aesmc   $ctr3b, $ctr3b         @ AES block 4k+7 - round 6
2724e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $acc_hb                        @ karatsuba tidy up
2725e1051a39Sopenharmony_ci
2726e1051a39Sopenharmony_ci	aese    $ctr1b, $rk6  \n  aesmc   $ctr1b, $ctr1b         @ AES block 4k+5 - round 6
2727e1051a39Sopenharmony_ci
2728e1051a39Sopenharmony_ci	aese    $ctr0b, $rk6  \n  aesmc   $ctr0b, $ctr0b         @ AES block 4k+4 - round 6
2729e1051a39Sopenharmony_ci	shl     $mod_constantd, $mod_constantd, #56              @ mod_constant
2730e1051a39Sopenharmony_ci
2731e1051a39Sopenharmony_ci	aese    $ctr3b, $rk7  \n  aesmc   $ctr3b, $ctr3b         @ AES block 4k+7 - round 7
2732e1051a39Sopenharmony_ci
2733e1051a39Sopenharmony_ci	aese    $ctr1b, $rk7  \n  aesmc   $ctr1b, $ctr1b         @ AES block 4k+5 - round 7
2734e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $acc_lb
2735e1051a39Sopenharmony_ci
2736e1051a39Sopenharmony_ci	aese    $ctr0b, $rk7  \n  aesmc   $ctr0b, $ctr0b         @ AES block 4k+4 - round 7
2737e1051a39Sopenharmony_ci
2738e1051a39Sopenharmony_ci	pmull   $t1.1q, $acc_h.1d, $mod_constant.1d
2739e1051a39Sopenharmony_ci
2740e1051a39Sopenharmony_ci	aese    $ctr2b, $rk6  \n  aesmc   $ctr2b, $ctr2b         @ AES block 4k+6 - round 6
2741e1051a39Sopenharmony_ci	ext     $acc_hb, $acc_hb, $acc_hb, #8
2742e1051a39Sopenharmony_ci
2743e1051a39Sopenharmony_ci	aese    $ctr0b, $rk8  \n  aesmc   $ctr0b, $ctr0b         @ AES block 4k+4 - round 8
2744e1051a39Sopenharmony_ci
2745e1051a39Sopenharmony_ci	aese    $ctr1b, $rk8  \n  aesmc   $ctr1b, $ctr1b         @ AES block 4k+5 - round 8
2746e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t1.16b
2747e1051a39Sopenharmony_ci
2748e1051a39Sopenharmony_ci	aese    $ctr2b, $rk7  \n  aesmc   $ctr2b, $ctr2b         @ AES block 4k+6 - round 7
2749e1051a39Sopenharmony_ci
2750e1051a39Sopenharmony_ci	aese    $ctr3b, $rk8  \n  aesmc   $ctr3b, $ctr3b         @ AES block 4k+7 - round 8
2751e1051a39Sopenharmony_ci
2752e1051a39Sopenharmony_ci	aese    $ctr0b, $rk9  \n  aesmc   $ctr0b, $ctr0b         @ AES block 4k+4 - round 9
2753e1051a39Sopenharmony_ci
2754e1051a39Sopenharmony_ci	aese    $ctr2b, $rk8  \n  aesmc   $ctr2b, $ctr2b         @ AES block 4k+6 - round 8
2755e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $acc_hb
2756e1051a39Sopenharmony_ci
2757e1051a39Sopenharmony_ci	aese    $ctr3b, $rk9  \n  aesmc   $ctr3b, $ctr3b         @ AES block 4k+7 - round 9
2758e1051a39Sopenharmony_ci
2759e1051a39Sopenharmony_ci	aese    $ctr1b, $rk9  \n  aesmc   $ctr1b, $ctr1b         @ AES block 4k+5 - round 9
2760e1051a39Sopenharmony_ci
2761e1051a39Sopenharmony_ci	aese    $ctr2b, $rk9  \n  aesmc   $ctr2b, $ctr2b         @ AES block 4k+6 - round 9
2762e1051a39Sopenharmony_ci
2763e1051a39Sopenharmony_ci	pmull   $t1.1q, $acc_m.1d, $mod_constant.1d
2764e1051a39Sopenharmony_ci
2765e1051a39Sopenharmony_ci	ext     $acc_mb, $acc_mb, $acc_mb, #8
2766e1051a39Sopenharmony_ci
2767e1051a39Sopenharmony_ci	aese    $ctr3b, $rk10 \n  aesmc   $ctr3b, $ctr3b         @ AES block 4k+7 - round 10
2768e1051a39Sopenharmony_ci
2769e1051a39Sopenharmony_ci	aese    $ctr0b, $rk10 \n  aesmc   $ctr0b, $ctr0b         @ AES block 4k+4 - round 10
2770e1051a39Sopenharmony_ci
2771e1051a39Sopenharmony_ci	aese    $ctr2b, $rk10 \n  aesmc   $ctr2b, $ctr2b         @ AES block 4k+6 - round 10
2772e1051a39Sopenharmony_ci
2773e1051a39Sopenharmony_ci	aese    $ctr1b, $rk10 \n  aesmc   $ctr1b, $ctr1b         @ AES block 4k+5 - round 10
2774e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $t1.16b
2775e1051a39Sopenharmony_ci
2776e1051a39Sopenharmony_ci	aese    $ctr0b, $rk11                                    @ AES block 4k+4 - round 11
2777e1051a39Sopenharmony_ci
2778e1051a39Sopenharmony_ci	aese    $ctr3b, $rk11                                    @ AES block 4k+7 - round 11
2779e1051a39Sopenharmony_ci
2780e1051a39Sopenharmony_ci	aese    $ctr2b, $rk11                                    @ AES block 4k+6 - round 11
2781e1051a39Sopenharmony_ci
2782e1051a39Sopenharmony_ci	aese    $ctr1b, $rk11                                    @ AES block 4k+5 - round 11
2783e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $acc_mb
2784e1051a39Sopenharmony_ci	.L192_enc_tail:                                          @ TAIL
2785e1051a39Sopenharmony_ci
2786e1051a39Sopenharmony_ci	sub     $main_end_input_ptr, $end_input_ptr, $input_ptr  @ main_end_input_ptr is number of bytes left to process
2787e1051a39Sopenharmony_ci	ldp     $input_l0, $input_h0, [$input_ptr], #16          @ AES block 4k+4 - load plaintext
2788e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
2789e1051a39Sopenharmony_ci	rev     $input_l0, $input_l0
2790e1051a39Sopenharmony_ci	rev     $input_h0, $input_h0
2791e1051a39Sopenharmony_ci#endif
2792e1051a39Sopenharmony_ci	eor     $input_l0, $input_l0, $rk12_l                    @ AES block 4k+4 - round 12 low
2793e1051a39Sopenharmony_ci	eor     $input_h0, $input_h0, $rk12_h                    @ AES block 4k+4 - round 12 high
2794e1051a39Sopenharmony_ci
2795e1051a39Sopenharmony_ci	fmov    $ctr_t0d, $input_l0                              @ AES block 4k+4 - mov low
2796e1051a39Sopenharmony_ci
2797e1051a39Sopenharmony_ci	fmov    $ctr_t0.d[1], $input_h0                          @ AES block 4k+4 - mov high
2798e1051a39Sopenharmony_ci	cmp     $main_end_input_ptr, #48
2799e1051a39Sopenharmony_ci
2800e1051a39Sopenharmony_ci	eor     $res1b, $ctr_t0b, $ctr0b                         @ AES block 4k+4 - result
2801e1051a39Sopenharmony_ci
2802e1051a39Sopenharmony_ci	ext     $t0.16b, $acc_lb, $acc_lb, #8                    @ prepare final partial tag
2803e1051a39Sopenharmony_ci	b.gt    .L192_enc_blocks_more_than_3
2804e1051a39Sopenharmony_ci
2805e1051a39Sopenharmony_ci	sub     $rctr32w, $rctr32w, #1
2806e1051a39Sopenharmony_ci	movi    $acc_m.8b, #0
2807e1051a39Sopenharmony_ci
2808e1051a39Sopenharmony_ci	mov     $ctr3b, $ctr2b
2809e1051a39Sopenharmony_ci	movi    $acc_h.8b, #0
2810e1051a39Sopenharmony_ci	cmp     $main_end_input_ptr, #32
2811e1051a39Sopenharmony_ci
2812e1051a39Sopenharmony_ci	mov     $ctr2b, $ctr1b
2813e1051a39Sopenharmony_ci	movi    $acc_l.8b, #0
2814e1051a39Sopenharmony_ci	b.gt    .L192_enc_blocks_more_than_2
2815e1051a39Sopenharmony_ci
2816e1051a39Sopenharmony_ci	sub     $rctr32w, $rctr32w, #1
2817e1051a39Sopenharmony_ci
2818e1051a39Sopenharmony_ci	mov     $ctr3b, $ctr1b
2819e1051a39Sopenharmony_ci	cmp     $main_end_input_ptr, #16
2820e1051a39Sopenharmony_ci	b.gt    .L192_enc_blocks_more_than_1
2821e1051a39Sopenharmony_ci
2822e1051a39Sopenharmony_ci	sub     $rctr32w, $rctr32w, #1
2823e1051a39Sopenharmony_ci	b       .L192_enc_blocks_less_than_1
2824e1051a39Sopenharmony_ci	.L192_enc_blocks_more_than_3:                            @ blocks left >  3
2825e1051a39Sopenharmony_ci	st1     { $res1b}, [$output_ptr], #16                    @ AES final-3 block  - store result
2826e1051a39Sopenharmony_ci
2827e1051a39Sopenharmony_ci	ldp     $input_l0, $input_h0, [$input_ptr], #16          @ AES final-2 block - load input low & high
2828e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
2829e1051a39Sopenharmony_ci	rev     $input_l0, $input_l0
2830e1051a39Sopenharmony_ci	rev     $input_h0, $input_h0
2831e1051a39Sopenharmony_ci#endif
2832e1051a39Sopenharmony_ci	rev64   $res0b, $res1b                                   @ GHASH final-3 block
2833e1051a39Sopenharmony_ci
2834e1051a39Sopenharmony_ci	eor     $input_l0, $input_l0, $rk12_l                    @ AES final-2 block - round 12 low
2835e1051a39Sopenharmony_ci	eor     $res0b, $res0b, $t0.16b                          @ feed in partial tag
2836e1051a39Sopenharmony_ci
2837e1051a39Sopenharmony_ci	eor     $input_h0, $input_h0, $rk12_h                    @ AES final-2 block - round 12 high
2838e1051a39Sopenharmony_ci	fmov    $res1d, $input_l0                                @ AES final-2 block - mov low
2839e1051a39Sopenharmony_ci
2840e1051a39Sopenharmony_ci	fmov    $res1.d[1], $input_h0                            @ AES final-2 block - mov high
2841e1051a39Sopenharmony_ci
2842e1051a39Sopenharmony_ci	mov     $rk4d, $res0.d[1]                                @ GHASH final-3 block - mid
2843e1051a39Sopenharmony_ci
2844e1051a39Sopenharmony_ci	pmull   $acc_l.1q, $res0.1d, $h4.1d                      @ GHASH final-3 block - low
2845e1051a39Sopenharmony_ci
2846e1051a39Sopenharmony_ci	mov     $acc_md, $h34k.d[1]                              @ GHASH final-3 block - mid
2847e1051a39Sopenharmony_ci
2848e1051a39Sopenharmony_ci	eor     $rk4v.8b, $rk4v.8b, $res0.8b                     @ GHASH final-3 block - mid
2849e1051a39Sopenharmony_ci
2850e1051a39Sopenharmony_ci	movi    $t0.8b, #0                                       @ suppress further partial tag feed in
2851e1051a39Sopenharmony_ci
2852e1051a39Sopenharmony_ci	pmull2  $acc_h.1q, $res0.2d, $h4.2d                      @ GHASH final-3 block - high
2853e1051a39Sopenharmony_ci
2854e1051a39Sopenharmony_ci	pmull   $acc_m.1q, $rk4v.1d, $acc_m.1d                   @ GHASH final-3 block - mid
2855e1051a39Sopenharmony_ci	eor     $res1b, $res1b, $ctr1b                           @ AES final-2 block - result
2856e1051a39Sopenharmony_ci	.L192_enc_blocks_more_than_2:                            @ blocks left >  2
2857e1051a39Sopenharmony_ci
2858e1051a39Sopenharmony_ci	st1     { $res1b}, [$output_ptr], #16                    @ AES final-2 block - store result
2859e1051a39Sopenharmony_ci
2860e1051a39Sopenharmony_ci	rev64   $res0b, $res1b                                   @ GHASH final-2 block
2861e1051a39Sopenharmony_ci	ldp     $input_l0, $input_h0, [$input_ptr], #16          @ AES final-1 block - load input low & high
2862e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
2863e1051a39Sopenharmony_ci	rev     $input_l0, $input_l0
2864e1051a39Sopenharmony_ci	rev     $input_h0, $input_h0
2865e1051a39Sopenharmony_ci#endif
2866e1051a39Sopenharmony_ci	eor     $res0b, $res0b, $t0.16b                          @ feed in partial tag
2867e1051a39Sopenharmony_ci
2868e1051a39Sopenharmony_ci	eor     $input_h0, $input_h0, $rk12_h                    @ AES final-1 block - round 12 high
2869e1051a39Sopenharmony_ci
2870e1051a39Sopenharmony_ci	pmull2  $rk2q1, $res0.2d, $h3.2d                         @ GHASH final-2 block - high
2871e1051a39Sopenharmony_ci	mov     $rk4d, $res0.d[1]                                @ GHASH final-2 block - mid
2872e1051a39Sopenharmony_ci
2873e1051a39Sopenharmony_ci	pmull   $rk3q1, $res0.1d, $h3.1d                         @ GHASH final-2 block - low
2874e1051a39Sopenharmony_ci	eor     $input_l0, $input_l0, $rk12_l                    @ AES final-1 block - round 12 low
2875e1051a39Sopenharmony_ci
2876e1051a39Sopenharmony_ci	fmov    $res1d, $input_l0                                @ AES final-1 block - mov low
2877e1051a39Sopenharmony_ci
2878e1051a39Sopenharmony_ci	fmov    $res1.d[1], $input_h0                            @ AES final-1 block - mov high
2879e1051a39Sopenharmony_ci	eor     $acc_hb, $acc_hb, $rk2                           @ GHASH final-2 block - high
2880e1051a39Sopenharmony_ci	eor     $rk4v.8b, $rk4v.8b, $res0.8b                     @ GHASH final-2 block - mid
2881e1051a39Sopenharmony_ci
2882e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $rk3                           @ GHASH final-2 block - low
2883e1051a39Sopenharmony_ci
2884e1051a39Sopenharmony_ci	pmull   $rk4v.1q, $rk4v.1d, $h34k.1d                     @ GHASH final-2 block - mid
2885e1051a39Sopenharmony_ci
2886e1051a39Sopenharmony_ci	movi    $t0.8b, #0                                       @ suppress further partial tag feed in
2887e1051a39Sopenharmony_ci
2888e1051a39Sopenharmony_ci	eor     $res1b, $res1b, $ctr2b                           @ AES final-1 block - result
2889e1051a39Sopenharmony_ci
2890e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $rk4v.16b                      @ GHASH final-2 block - mid
2891e1051a39Sopenharmony_ci	.L192_enc_blocks_more_than_1:                            @ blocks left >  1
2892e1051a39Sopenharmony_ci
2893e1051a39Sopenharmony_ci	st1     { $res1b}, [$output_ptr], #16                    @ AES final-1 block - store result
2894e1051a39Sopenharmony_ci
2895e1051a39Sopenharmony_ci	ldp     $input_l0, $input_h0, [$input_ptr], #16          @ AES final block - load input low & high
2896e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
2897e1051a39Sopenharmony_ci	rev     $input_l0, $input_l0
2898e1051a39Sopenharmony_ci	rev     $input_h0, $input_h0
2899e1051a39Sopenharmony_ci#endif
2900e1051a39Sopenharmony_ci	rev64   $res0b, $res1b                                   @ GHASH final-1 block
2901e1051a39Sopenharmony_ci
2902e1051a39Sopenharmony_ci	eor     $input_l0, $input_l0, $rk12_l                    @ AES final block - round 12 low
2903e1051a39Sopenharmony_ci	eor     $res0b, $res0b, $t0.16b                          @ feed in partial tag
2904e1051a39Sopenharmony_ci	movi    $t0.8b, #0                                       @ suppress further partial tag feed in
2905e1051a39Sopenharmony_ci
2906e1051a39Sopenharmony_ci	mov     $rk4d, $res0.d[1]                                @ GHASH final-1 block - mid
2907e1051a39Sopenharmony_ci
2908e1051a39Sopenharmony_ci	eor     $rk4v.8b, $rk4v.8b, $res0.8b                     @ GHASH final-1 block - mid
2909e1051a39Sopenharmony_ci	eor     $input_h0, $input_h0, $rk12_h                    @ AES final block - round 12 high
2910e1051a39Sopenharmony_ci	fmov    $res1d, $input_l0                                @ AES final block - mov low
2911e1051a39Sopenharmony_ci
2912e1051a39Sopenharmony_ci	pmull2  $rk2q1, $res0.2d, $h2.2d                         @ GHASH final-1 block - high
2913e1051a39Sopenharmony_ci	fmov    $res1.d[1], $input_h0                            @ AES final block - mov high
2914e1051a39Sopenharmony_ci
2915e1051a39Sopenharmony_ci	ins     $rk4v.d[1], $rk4v.d[0]                           @ GHASH final-1 block - mid
2916e1051a39Sopenharmony_ci
2917e1051a39Sopenharmony_ci	eor     $acc_hb, $acc_hb, $rk2                           @ GHASH final-1 block - high
2918e1051a39Sopenharmony_ci
2919e1051a39Sopenharmony_ci	pmull   $rk3q1, $res0.1d, $h2.1d                         @ GHASH final-1 block - low
2920e1051a39Sopenharmony_ci
2921e1051a39Sopenharmony_ci	pmull2  $rk4v.1q, $rk4v.2d, $h12k.2d                     @ GHASH final-1 block - mid
2922e1051a39Sopenharmony_ci
2923e1051a39Sopenharmony_ci	eor     $res1b, $res1b, $ctr3b                           @ AES final block - result
2924e1051a39Sopenharmony_ci
2925e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $rk3                           @ GHASH final-1 block - low
2926e1051a39Sopenharmony_ci
2927e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $rk4v.16b                      @ GHASH final-1 block - mid
2928e1051a39Sopenharmony_ci	.L192_enc_blocks_less_than_1:                            @ blocks left <= 1
2929e1051a39Sopenharmony_ci
2930e1051a39Sopenharmony_ci	ld1     { $rk0}, [$output_ptr]                           @ load existing bytes where the possibly partial last block is to be stored
2931e1051a39Sopenharmony_ci#ifndef __AARCH64EB__
2932e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w
2933e1051a39Sopenharmony_ci#else
2934e1051a39Sopenharmony_ci	mov     $ctr32w, $rctr32w
2935e1051a39Sopenharmony_ci#endif
2936e1051a39Sopenharmony_ci	and     $bit_length, $bit_length, #127                   @ bit_length %= 128
2937e1051a39Sopenharmony_ci
2938e1051a39Sopenharmony_ci	sub     $bit_length, $bit_length, #128                   @ bit_length -= 128
2939e1051a39Sopenharmony_ci	mvn     $rk12_h, xzr                                     @ rk12_h = 0xffffffffffffffff
2940e1051a39Sopenharmony_ci
2941e1051a39Sopenharmony_ci	neg     $bit_length, $bit_length                         @ bit_length = 128 - #bits in input (in range [1,128])
2942e1051a39Sopenharmony_ci	mvn     $rk12_l, xzr                                     @ rk12_l = 0xffffffffffffffff
2943e1051a39Sopenharmony_ci
2944e1051a39Sopenharmony_ci	and     $bit_length, $bit_length, #127                   @ bit_length %= 128
2945e1051a39Sopenharmony_ci
2946e1051a39Sopenharmony_ci	lsr     $rk12_h, $rk12_h, $bit_length                    @ rk12_h is mask for top 64b of last block
2947e1051a39Sopenharmony_ci	cmp     $bit_length, #64
2948e1051a39Sopenharmony_ci
2949e1051a39Sopenharmony_ci	csel    $input_l0, $rk12_l, $rk12_h, lt
2950e1051a39Sopenharmony_ci	csel    $input_h0, $rk12_h, xzr, lt
2951e1051a39Sopenharmony_ci
2952e1051a39Sopenharmony_ci	fmov    $ctr0d, $input_l0                                @ ctr0b is mask for last block
2953e1051a39Sopenharmony_ci
2954e1051a39Sopenharmony_ci	fmov    $ctr0.d[1], $input_h0
2955e1051a39Sopenharmony_ci
2956e1051a39Sopenharmony_ci	and     $res1b, $res1b, $ctr0b                           @ possibly partial last block has zeroes in highest bits
2957e1051a39Sopenharmony_ci
2958e1051a39Sopenharmony_ci	rev64   $res0b, $res1b                                   @ GHASH final block
2959e1051a39Sopenharmony_ci
2960e1051a39Sopenharmony_ci	eor     $res0b, $res0b, $t0.16b                          @ feed in partial tag
2961e1051a39Sopenharmony_ci
2962e1051a39Sopenharmony_ci	mov     $t0d, $res0.d[1]                                 @ GHASH final block - mid
2963e1051a39Sopenharmony_ci
2964e1051a39Sopenharmony_ci	pmull   $rk3q1, $res0.1d, $h1.1d                         @ GHASH final block - low
2965e1051a39Sopenharmony_ci
2966e1051a39Sopenharmony_ci	pmull2  $rk2q1, $res0.2d, $h1.2d                         @ GHASH final block - high
2967e1051a39Sopenharmony_ci
2968e1051a39Sopenharmony_ci	eor     $t0.8b, $t0.8b, $res0.8b                         @ GHASH final block - mid
2969e1051a39Sopenharmony_ci
2970e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $rk3                           @ GHASH final block - low
2971e1051a39Sopenharmony_ci
2972e1051a39Sopenharmony_ci	eor     $acc_hb, $acc_hb, $rk2                           @ GHASH final block - high
2973e1051a39Sopenharmony_ci
2974e1051a39Sopenharmony_ci	pmull   $t0.1q, $t0.1d, $h12k.1d                         @ GHASH final block - mid
2975e1051a39Sopenharmony_ci
2976e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t0.16b                        @ GHASH final block - mid
2977e1051a39Sopenharmony_ci	movi    $mod_constant.8b, #0xc2
2978e1051a39Sopenharmony_ci
2979e1051a39Sopenharmony_ci	eor     $t9.16b, $acc_lb, $acc_hb                        @ MODULO - karatsuba tidy up
2980e1051a39Sopenharmony_ci
2981e1051a39Sopenharmony_ci	shl     $mod_constantd, $mod_constantd, #56              @ mod_constant
2982e1051a39Sopenharmony_ci
2983e1051a39Sopenharmony_ci	bif     $res1b, $rk0, $ctr0b                             @ insert existing bytes in top end of result before storing
2984e1051a39Sopenharmony_ci
2985e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t9.16b                        @ MODULO - karatsuba tidy up
2986e1051a39Sopenharmony_ci
2987e1051a39Sopenharmony_ci	pmull   $mod_t.1q, $acc_h.1d, $mod_constant.1d           @ MODULO - top 64b align with mid
2988e1051a39Sopenharmony_ci
2989e1051a39Sopenharmony_ci	ext     $acc_hb, $acc_hb, $acc_hb, #8                    @ MODULO - other top alignment
2990e1051a39Sopenharmony_ci
2991e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $mod_t.16b                     @ MODULO - fold into mid
2992e1051a39Sopenharmony_ci
2993e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $acc_hb                        @ MODULO - fold into mid
2994e1051a39Sopenharmony_ci
2995e1051a39Sopenharmony_ci	pmull   $acc_h.1q, $acc_m.1d, $mod_constant.1d           @ MODULO - mid 64b align with low
2996e1051a39Sopenharmony_ci
2997e1051a39Sopenharmony_ci	ext     $acc_mb, $acc_mb, $acc_mb, #8                    @ MODULO - other mid alignment
2998e1051a39Sopenharmony_ci
2999e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $acc_hb                        @ MODULO - fold into low
3000e1051a39Sopenharmony_ci	str     $ctr32w, [$counter, #12]                         @ store the updated counter
3001e1051a39Sopenharmony_ci
3002e1051a39Sopenharmony_ci	st1     { $res1b}, [$output_ptr]                         @ store all 16B
3003e1051a39Sopenharmony_ci
3004e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $acc_mb                        @ MODULO - fold into low
3005e1051a39Sopenharmony_ci	ext     $acc_lb, $acc_lb, $acc_lb, #8
3006e1051a39Sopenharmony_ci	rev64   $acc_lb, $acc_lb
3007e1051a39Sopenharmony_ci	mov     x0, $len
3008e1051a39Sopenharmony_ci	st1     { $acc_l.16b }, [$current_tag]
3009e1051a39Sopenharmony_ci
3010e1051a39Sopenharmony_ci	ldp     x21, x22, [sp, #16]
3011e1051a39Sopenharmony_ci	ldp     x23, x24, [sp, #32]
3012e1051a39Sopenharmony_ci	ldp     d8, d9, [sp, #48]
3013e1051a39Sopenharmony_ci	ldp     d10, d11, [sp, #64]
3014e1051a39Sopenharmony_ci	ldp     d12, d13, [sp, #80]
3015e1051a39Sopenharmony_ci	ldp     d14, d15, [sp, #96]
3016e1051a39Sopenharmony_ci	ldp     x19, x20, [sp], #112
3017e1051a39Sopenharmony_ci	ret
3018e1051a39Sopenharmony_ci
3019e1051a39Sopenharmony_ci.L192_enc_ret:
3020e1051a39Sopenharmony_ci	mov w0, #0x0
3021e1051a39Sopenharmony_ci	ret
3022e1051a39Sopenharmony_ci.size aes_gcm_enc_192_kernel,.-aes_gcm_enc_192_kernel
3023e1051a39Sopenharmony_ci___
3024e1051a39Sopenharmony_ci
3025e1051a39Sopenharmony_ci#########################################################################################
3026e1051a39Sopenharmony_ci# size_t aes_gcm_dec_192_kernel(const unsigned char *in,
3027e1051a39Sopenharmony_ci#                               size_t len,
3028e1051a39Sopenharmony_ci#                               unsigned char *out,
3029e1051a39Sopenharmony_ci#                               const void *key,
3030e1051a39Sopenharmony_ci#                               unsigned char ivec[16],
3031e1051a39Sopenharmony_ci#                               u64 *Xi);
3032e1051a39Sopenharmony_ci#
3033e1051a39Sopenharmony_ci$code.=<<___;
3034e1051a39Sopenharmony_ci.global aes_gcm_dec_192_kernel
3035e1051a39Sopenharmony_ci.type   aes_gcm_dec_192_kernel,%function
3036e1051a39Sopenharmony_ci.align  4
3037e1051a39Sopenharmony_ciaes_gcm_dec_192_kernel:
3038e1051a39Sopenharmony_ci	cbz     x1, .L192_dec_ret
3039e1051a39Sopenharmony_ci	stp     x19, x20, [sp, #-112]!
3040e1051a39Sopenharmony_ci	mov     x16, x4
3041e1051a39Sopenharmony_ci	mov     x8, x5
3042e1051a39Sopenharmony_ci	stp     x21, x22, [sp, #16]
3043e1051a39Sopenharmony_ci	stp     x23, x24, [sp, #32]
3044e1051a39Sopenharmony_ci	stp     d8, d9, [sp, #48]
3045e1051a39Sopenharmony_ci	stp     d10, d11, [sp, #64]
3046e1051a39Sopenharmony_ci	stp     d12, d13, [sp, #80]
3047e1051a39Sopenharmony_ci	stp     d14, d15, [sp, #96]
3048e1051a39Sopenharmony_ci
3049e1051a39Sopenharmony_ci	add     $end_input_ptr, $input_ptr, $bit_length, lsr #3   @ end_input_ptr
3050e1051a39Sopenharmony_ci	ldp     $ctr96_b64x, $ctr96_t32x, [$counter]              @ ctr96_b64, ctr96_t32
3051e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
3052e1051a39Sopenharmony_ci	rev     $ctr96_b64x, $ctr96_b64x
3053e1051a39Sopenharmony_ci	rev     $ctr96_t32x, $ctr96_t32x
3054e1051a39Sopenharmony_ci#endif
3055e1051a39Sopenharmony_ci	ldp     $rk12_l, $rk12_h, [$cc, #192]                     @ load rk12
3056e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
3057e1051a39Sopenharmony_ci	ror     $rk12_l, $rk12_l, #32
3058e1051a39Sopenharmony_ci	ror     $rk12_h, $rk12_h, #32
3059e1051a39Sopenharmony_ci#endif
3060e1051a39Sopenharmony_ci	ld1     { $ctr0b}, [$counter]                             @ special case vector load initial counter so we can start first AES block as quickly as possible
3061e1051a39Sopenharmony_ci
3062e1051a39Sopenharmony_ci	ld1     {$rk0s}, [$cc], #16                                  @ load rk0
3063e1051a39Sopenharmony_ci
3064e1051a39Sopenharmony_ci	lsr     $main_end_input_ptr, $bit_length, #3              @ byte_len
3065e1051a39Sopenharmony_ci	mov     $len, $main_end_input_ptr
3066e1051a39Sopenharmony_ci	ld1     {$rk1s}, [$cc], #16                               @ load rk1
3067e1051a39Sopenharmony_ci
3068e1051a39Sopenharmony_ci	lsr     $rctr32x, $ctr96_t32x, #32
3069e1051a39Sopenharmony_ci	orr     $ctr96_t32w, $ctr96_t32w, $ctr96_t32w
3070e1051a39Sopenharmony_ci	fmov    $ctr3d, $ctr96_b64x                               @ CTR block 3
3071e1051a39Sopenharmony_ci
3072e1051a39Sopenharmony_ci	rev     $rctr32w, $rctr32w                                @ rev_ctr32
3073e1051a39Sopenharmony_ci	fmov    $ctr1d, $ctr96_b64x                               @ CTR block 1
3074e1051a39Sopenharmony_ci
3075e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ increment rev_ctr32
3076e1051a39Sopenharmony_ci	ld1     {$rk2s}, [$cc], #16                               @ load rk2
3077e1051a39Sopenharmony_ci
3078e1051a39Sopenharmony_ci	aese    $ctr0b, $rk0  \n  aesmc   $ctr0b, $ctr0b          @ AES block 0 - round 0
3079e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                 @ CTR block 1
3080e1051a39Sopenharmony_ci
3081e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ CTR block 1
3082e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32            @ CTR block 1
3083e1051a39Sopenharmony_ci	ld1     {$rk3s}, [$cc], #16                               @ load rk3
3084e1051a39Sopenharmony_ci
3085e1051a39Sopenharmony_ci	fmov    $ctr1.d[1], $ctr32x                               @ CTR block 1
3086e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                 @ CTR block 2
3087e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ CTR block 2
3088e1051a39Sopenharmony_ci
3089e1051a39Sopenharmony_ci	fmov    $ctr2d, $ctr96_b64x                               @ CTR block 2
3090e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32            @ CTR block 2
3091e1051a39Sopenharmony_ci
3092e1051a39Sopenharmony_ci	fmov    $ctr2.d[1], $ctr32x                               @ CTR block 2
3093e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                 @ CTR block 3
3094e1051a39Sopenharmony_ci
3095e1051a39Sopenharmony_ci	aese    $ctr0b, $rk1  \n  aesmc   $ctr0b, $ctr0b          @ AES block 0 - round 1
3096e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32            @ CTR block 3
3097e1051a39Sopenharmony_ci
3098e1051a39Sopenharmony_ci	fmov    $ctr3.d[1], $ctr32x                               @ CTR block 3
3099e1051a39Sopenharmony_ci
3100e1051a39Sopenharmony_ci	ld1     {$rk4s}, [$cc], #16                               @ load rk4
3101e1051a39Sopenharmony_ci
3102e1051a39Sopenharmony_ci	aese    $ctr0b, $rk2  \n  aesmc   $ctr0b, $ctr0b          @ AES block 0 - round 2
3103e1051a39Sopenharmony_ci
3104e1051a39Sopenharmony_ci	aese    $ctr2b, $rk0  \n  aesmc   $ctr2b, $ctr2b          @ AES block 2 - round 0
3105e1051a39Sopenharmony_ci	ld1     {$rk5s}, [$cc], #16                               @ load rk5
3106e1051a39Sopenharmony_ci
3107e1051a39Sopenharmony_ci	aese    $ctr1b, $rk0  \n  aesmc   $ctr1b, $ctr1b          @ AES block 1 - round 0
3108e1051a39Sopenharmony_ci	ldr     $h4q, [$current_tag, #112]                        @ load h4l | h4h
3109e1051a39Sopenharmony_ci#ifndef __AARCH64EB__
3110e1051a39Sopenharmony_ci	ext     $h4b, $h4b, $h4b, #8
3111e1051a39Sopenharmony_ci#endif
3112e1051a39Sopenharmony_ci	aese    $ctr3b, $rk0  \n  aesmc   $ctr3b, $ctr3b          @ AES block 3 - round 0
3113e1051a39Sopenharmony_ci	ldr     $h2q, [$current_tag, #64]                         @ load h2l | h2h
3114e1051a39Sopenharmony_ci#ifndef __AARCH64EB__
3115e1051a39Sopenharmony_ci	ext     $h2b, $h2b, $h2b, #8
3116e1051a39Sopenharmony_ci#endif
3117e1051a39Sopenharmony_ci	aese    $ctr2b, $rk1  \n  aesmc   $ctr2b, $ctr2b          @ AES block 2 - round 1
3118e1051a39Sopenharmony_ci	ldr     $h3q, [$current_tag, #80]                         @ load h3l | h3h
3119e1051a39Sopenharmony_ci#ifndef __AARCH64EB__
3120e1051a39Sopenharmony_ci	ext     $h3b, $h3b, $h3b, #8
3121e1051a39Sopenharmony_ci#endif
3122e1051a39Sopenharmony_ci	aese    $ctr1b, $rk1  \n  aesmc   $ctr1b, $ctr1b          @ AES block 1 - round 1
3123e1051a39Sopenharmony_ci
3124e1051a39Sopenharmony_ci	aese    $ctr3b, $rk1  \n  aesmc   $ctr3b, $ctr3b          @ AES block 3 - round 1
3125e1051a39Sopenharmony_ci	ldr     $h1q, [$current_tag, #32]                         @ load h1l | h1h
3126e1051a39Sopenharmony_ci#ifndef __AARCH64EB__
3127e1051a39Sopenharmony_ci	ext     $h1b, $h1b, $h1b, #8
3128e1051a39Sopenharmony_ci#endif
3129e1051a39Sopenharmony_ci	aese    $ctr2b, $rk2  \n  aesmc   $ctr2b, $ctr2b          @ AES block 2 - round 2
3130e1051a39Sopenharmony_ci	ld1     {$rk6s}, [$cc], #16                               @ load rk6
3131e1051a39Sopenharmony_ci
3132e1051a39Sopenharmony_ci	aese    $ctr0b, $rk3  \n  aesmc   $ctr0b, $ctr0b          @ AES block 0 - round 3
3133e1051a39Sopenharmony_ci	ld1     {$rk7s}, [$cc], #16                               @ load rk7
3134e1051a39Sopenharmony_ci
3135e1051a39Sopenharmony_ci	aese    $ctr1b, $rk2  \n  aesmc   $ctr1b, $ctr1b          @ AES block 1 - round 2
3136e1051a39Sopenharmony_ci	ld1     {$rk8s}, [$cc], #16                               @ load rk8
3137e1051a39Sopenharmony_ci
3138e1051a39Sopenharmony_ci	aese    $ctr3b, $rk2  \n  aesmc   $ctr3b, $ctr3b          @ AES block 3 - round 2
3139e1051a39Sopenharmony_ci	ld1     {$rk9s}, [$cc], #16                               @ load rk9
3140e1051a39Sopenharmony_ci
3141e1051a39Sopenharmony_ci	aese    $ctr2b, $rk3  \n  aesmc   $ctr2b, $ctr2b          @ AES block 2 - round 3
3142e1051a39Sopenharmony_ci	ld1     { $acc_lb}, [$current_tag]
3143e1051a39Sopenharmony_ci	ext     $acc_lb, $acc_lb, $acc_lb, #8
3144e1051a39Sopenharmony_ci	rev64   $acc_lb, $acc_lb
3145e1051a39Sopenharmony_ci
3146e1051a39Sopenharmony_ci	aese    $ctr1b, $rk3  \n  aesmc   $ctr1b, $ctr1b          @ AES block 1 - round 3
3147e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ CTR block 3
3148e1051a39Sopenharmony_ci
3149e1051a39Sopenharmony_ci	aese    $ctr3b, $rk3  \n  aesmc   $ctr3b, $ctr3b          @ AES block 3 - round 3
3150e1051a39Sopenharmony_ci	trn1    $acc_h.2d, $h3.2d,    $h4.2d                      @ h4h | h3h
3151e1051a39Sopenharmony_ci
3152e1051a39Sopenharmony_ci	aese    $ctr0b, $rk4  \n  aesmc   $ctr0b, $ctr0b          @ AES block 0 - round 4
3153e1051a39Sopenharmony_ci	ld1     {$rk10s}, [$cc], #16                              @ load rk10
3154e1051a39Sopenharmony_ci
3155e1051a39Sopenharmony_ci	aese    $ctr1b, $rk4  \n  aesmc   $ctr1b, $ctr1b          @ AES block 1 - round 4
3156e1051a39Sopenharmony_ci	trn2    $h34k.2d,  $h3.2d,    $h4.2d                      @ h4l | h3l
3157e1051a39Sopenharmony_ci
3158e1051a39Sopenharmony_ci	aese    $ctr2b, $rk4  \n  aesmc   $ctr2b, $ctr2b          @ AES block 2 - round 4
3159e1051a39Sopenharmony_ci
3160e1051a39Sopenharmony_ci	aese    $ctr3b, $rk4  \n  aesmc   $ctr3b, $ctr3b          @ AES block 3 - round 4
3161e1051a39Sopenharmony_ci	trn2    $h12k.2d,  $h1.2d,    $h2.2d                      @ h2l | h1l
3162e1051a39Sopenharmony_ci
3163e1051a39Sopenharmony_ci	aese    $ctr0b, $rk5  \n  aesmc   $ctr0b, $ctr0b          @ AES block 0 - round 5
3164e1051a39Sopenharmony_ci	ld1     {$rk11s}, [$cc], #16                              @ load rk11
3165e1051a39Sopenharmony_ci
3166e1051a39Sopenharmony_ci	aese    $ctr1b, $rk5  \n  aesmc   $ctr1b, $ctr1b          @ AES block 1 - round 5
3167e1051a39Sopenharmony_ci
3168e1051a39Sopenharmony_ci	aese    $ctr2b, $rk5  \n  aesmc   $ctr2b, $ctr2b          @ AES block 2 - round 5
3169e1051a39Sopenharmony_ci
3170e1051a39Sopenharmony_ci	aese    $ctr3b, $rk5  \n  aesmc   $ctr3b, $ctr3b          @ AES block 3 - round 5
3171e1051a39Sopenharmony_ci
3172e1051a39Sopenharmony_ci	aese    $ctr0b, $rk6  \n  aesmc   $ctr0b, $ctr0b          @ AES block 0 - round 6
3173e1051a39Sopenharmony_ci
3174e1051a39Sopenharmony_ci	aese    $ctr2b, $rk6  \n  aesmc   $ctr2b, $ctr2b          @ AES block 2 - round 6
3175e1051a39Sopenharmony_ci
3176e1051a39Sopenharmony_ci	aese    $ctr3b, $rk6  \n  aesmc   $ctr3b, $ctr3b          @ AES block 3 - round 6
3177e1051a39Sopenharmony_ci
3178e1051a39Sopenharmony_ci	aese    $ctr0b, $rk7  \n  aesmc   $ctr0b, $ctr0b          @ AES block 0 - round 7
3179e1051a39Sopenharmony_ci
3180e1051a39Sopenharmony_ci	aese    $ctr2b, $rk7  \n  aesmc   $ctr2b, $ctr2b          @ AES block 2 - round 7
3181e1051a39Sopenharmony_ci
3182e1051a39Sopenharmony_ci	aese    $ctr3b, $rk7  \n  aesmc   $ctr3b, $ctr3b          @ AES block 3 - round 7
3183e1051a39Sopenharmony_ci
3184e1051a39Sopenharmony_ci	aese    $ctr1b, $rk6  \n  aesmc   $ctr1b, $ctr1b          @ AES block 1 - round 6
3185e1051a39Sopenharmony_ci
3186e1051a39Sopenharmony_ci	aese    $ctr2b, $rk8  \n  aesmc   $ctr2b, $ctr2b          @ AES block 2 - round 8
3187e1051a39Sopenharmony_ci
3188e1051a39Sopenharmony_ci	aese    $ctr3b, $rk8  \n  aesmc   $ctr3b, $ctr3b          @ AES block 3 - round 8
3189e1051a39Sopenharmony_ci
3190e1051a39Sopenharmony_ci	aese    $ctr1b, $rk7  \n  aesmc   $ctr1b, $ctr1b          @ AES block 1 - round 7
3191e1051a39Sopenharmony_ci
3192e1051a39Sopenharmony_ci	aese    $ctr2b, $rk9  \n  aesmc   $ctr2b, $ctr2b          @ AES block 2 - round 9
3193e1051a39Sopenharmony_ci
3194e1051a39Sopenharmony_ci	aese    $ctr3b, $rk9  \n  aesmc   $ctr3b, $ctr3b          @ AES block 3 - round 9
3195e1051a39Sopenharmony_ci
3196e1051a39Sopenharmony_ci	aese    $ctr1b, $rk8  \n  aesmc   $ctr1b, $ctr1b          @ AES block 1 - round 8
3197e1051a39Sopenharmony_ci	sub     $main_end_input_ptr, $main_end_input_ptr, #1      @ byte_len - 1
3198e1051a39Sopenharmony_ci
3199e1051a39Sopenharmony_ci	aese    $ctr0b, $rk8  \n  aesmc   $ctr0b, $ctr0b          @ AES block 0 - round 8
3200e1051a39Sopenharmony_ci	and     $main_end_input_ptr, $main_end_input_ptr, #0xffffffffffffffc0    @ number of bytes to be processed in main loop (at least 1 byte must be handled by tail)
3201e1051a39Sopenharmony_ci
3202e1051a39Sopenharmony_ci	aese    $ctr3b, $rk10 \n  aesmc   $ctr3b, $ctr3b          @ AES block 3 - round 10
3203e1051a39Sopenharmony_ci	add     $main_end_input_ptr, $main_end_input_ptr, $input_ptr
3204e1051a39Sopenharmony_ci
3205e1051a39Sopenharmony_ci	aese    $ctr1b, $rk9  \n  aesmc   $ctr1b, $ctr1b          @ AES block 1 - round 9
3206e1051a39Sopenharmony_ci	cmp     $input_ptr, $main_end_input_ptr                   @ check if we have <= 4 blocks
3207e1051a39Sopenharmony_ci
3208e1051a39Sopenharmony_ci	aese    $ctr0b, $rk9  \n  aesmc   $ctr0b, $ctr0b          @ AES block 0 - round 9
3209e1051a39Sopenharmony_ci	trn1    $t0.2d,    $h1.2d,    $h2.2d                      @ h2h | h1h
3210e1051a39Sopenharmony_ci
3211e1051a39Sopenharmony_ci	aese    $ctr3b, $rk11                                     @ AES block 3 - round 11
3212e1051a39Sopenharmony_ci
3213e1051a39Sopenharmony_ci	aese    $ctr2b, $rk10 \n  aesmc   $ctr2b, $ctr2b          @ AES block 2 - round 10
3214e1051a39Sopenharmony_ci
3215e1051a39Sopenharmony_ci	aese    $ctr1b, $rk10 \n  aesmc   $ctr1b, $ctr1b          @ AES block 1 - round 10
3216e1051a39Sopenharmony_ci
3217e1051a39Sopenharmony_ci	aese    $ctr0b, $rk10 \n  aesmc   $ctr0b, $ctr0b          @ AES block 0 - round 10
3218e1051a39Sopenharmony_ci	eor     $h12k.16b, $h12k.16b, $t0.16b                     @ h2k | h1k
3219e1051a39Sopenharmony_ci
3220e1051a39Sopenharmony_ci	aese    $ctr2b, $rk11                                     @ AES block 2 - round 11
3221e1051a39Sopenharmony_ci
3222e1051a39Sopenharmony_ci	aese    $ctr1b, $rk11                                     @ AES block 1 - round 11
3223e1051a39Sopenharmony_ci	eor     $h34k.16b, $h34k.16b, $acc_h.16b                  @ h4k | h3k
3224e1051a39Sopenharmony_ci
3225e1051a39Sopenharmony_ci	aese    $ctr0b, $rk11                                     @ AES block 0 - round 11
3226e1051a39Sopenharmony_ci	b.ge    .L192_dec_tail                                    @ handle tail
3227e1051a39Sopenharmony_ci
3228e1051a39Sopenharmony_ci	ld1     {$res0b, $res1b}, [$input_ptr], #32               @ AES block 0,1 - load ciphertext
3229e1051a39Sopenharmony_ci
3230e1051a39Sopenharmony_ci	eor     $ctr1b, $res1b, $ctr1b                            @ AES block 1 - result
3231e1051a39Sopenharmony_ci
3232e1051a39Sopenharmony_ci	eor     $ctr0b, $res0b, $ctr0b                            @ AES block 0 - result
3233e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                 @ CTR block 4
3234e1051a39Sopenharmony_ci	ld1     {$res2b, $res3b}, [$input_ptr], #32               @ AES block 2,3 - load ciphertext
3235e1051a39Sopenharmony_ci
3236e1051a39Sopenharmony_ci	mov     $output_l1, $ctr1.d[0]                            @ AES block 1 - mov low
3237e1051a39Sopenharmony_ci
3238e1051a39Sopenharmony_ci	mov     $output_h1, $ctr1.d[1]                            @ AES block 1 - mov high
3239e1051a39Sopenharmony_ci
3240e1051a39Sopenharmony_ci	mov     $output_l0, $ctr0.d[0]                            @ AES block 0 - mov low
3241e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32            @ CTR block 4
3242e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ CTR block 4
3243e1051a39Sopenharmony_ci
3244e1051a39Sopenharmony_ci	mov     $output_h0, $ctr0.d[1]                            @ AES block 0 - mov high
3245e1051a39Sopenharmony_ci	rev64   $res0b, $res0b                                    @ GHASH block 0
3246e1051a39Sopenharmony_ci
3247e1051a39Sopenharmony_ci	fmov    $ctr0d, $ctr96_b64x                               @ CTR block 4
3248e1051a39Sopenharmony_ci	rev64   $res1b, $res1b                                    @ GHASH block 1
3249e1051a39Sopenharmony_ci	cmp     $input_ptr, $main_end_input_ptr                   @ check if we have <= 8 blocks
3250e1051a39Sopenharmony_ci
3251e1051a39Sopenharmony_ci	eor     $output_l1, $output_l1, $rk12_l                   @ AES block 1 - round 12 low
3252e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
3253e1051a39Sopenharmony_ci	rev     $output_l1, $output_l1
3254e1051a39Sopenharmony_ci#endif
3255e1051a39Sopenharmony_ci	fmov    $ctr0.d[1], $ctr32x                               @ CTR block 4
3256e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                 @ CTR block 5
3257e1051a39Sopenharmony_ci
3258e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32            @ CTR block 5
3259e1051a39Sopenharmony_ci	fmov    $ctr1d, $ctr96_b64x                               @ CTR block 5
3260e1051a39Sopenharmony_ci	eor     $output_h1, $output_h1, $rk12_h                   @ AES block 1 - round 12 high
3261e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
3262e1051a39Sopenharmony_ci	rev     $output_h1, $output_h1
3263e1051a39Sopenharmony_ci#endif
3264e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ CTR block 5
3265e1051a39Sopenharmony_ci	fmov    $ctr1.d[1], $ctr32x                               @ CTR block 5
3266e1051a39Sopenharmony_ci	eor     $output_l0, $output_l0, $rk12_l                   @ AES block 0 - round 12 low
3267e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
3268e1051a39Sopenharmony_ci	rev     $output_l0, $output_l0
3269e1051a39Sopenharmony_ci#endif
3270e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                 @ CTR block 6
3271e1051a39Sopenharmony_ci	eor     $output_h0, $output_h0, $rk12_h                   @ AES block 0 - round 12 high
3272e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
3273e1051a39Sopenharmony_ci	rev     $output_h0, $output_h0
3274e1051a39Sopenharmony_ci#endif
3275e1051a39Sopenharmony_ci	stp     $output_l0, $output_h0, [$output_ptr], #16        @ AES block 0 - store result
3276e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32            @ CTR block 6
3277e1051a39Sopenharmony_ci
3278e1051a39Sopenharmony_ci	stp     $output_l1, $output_h1, [$output_ptr], #16        @ AES block 1 - store result
3279e1051a39Sopenharmony_ci
3280e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ CTR block 6
3281e1051a39Sopenharmony_ci	eor     $ctr2b, $res2b, $ctr2b                            @ AES block 2 - result
3282e1051a39Sopenharmony_ci	b.ge    .L192_dec_prepretail                              @ do prepretail
3283e1051a39Sopenharmony_ci
3284e1051a39Sopenharmony_ci	.L192_dec_main_loop:                                      @ main loop start
3285e1051a39Sopenharmony_ci	aese    $ctr1b, $rk0  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 0
3286e1051a39Sopenharmony_ci	ext     $acc_lb, $acc_lb, $acc_lb, #8                     @ PRE 0
3287e1051a39Sopenharmony_ci
3288e1051a39Sopenharmony_ci	pmull   $t2.1q, $res1.1d, $h3.1d                          @ GHASH block 4k+1 - low
3289e1051a39Sopenharmony_ci	mov     $output_l2, $ctr2.d[0]                            @ AES block 4k+2 - mov low
3290e1051a39Sopenharmony_ci
3291e1051a39Sopenharmony_ci	mov     $output_h2, $ctr2.d[1]                            @ AES block 4k+2 - mov high
3292e1051a39Sopenharmony_ci	eor     $ctr3b, $res3b, $ctr3b                            @ AES block 4k+3 - result
3293e1051a39Sopenharmony_ci	rev64   $res3b, $res3b                                    @ GHASH block 4k+3
3294e1051a39Sopenharmony_ci
3295e1051a39Sopenharmony_ci	aese    $ctr1b, $rk1  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 1
3296e1051a39Sopenharmony_ci	fmov    $ctr2d, $ctr96_b64x                               @ CTR block 4k+6
3297e1051a39Sopenharmony_ci
3298e1051a39Sopenharmony_ci	aese    $ctr0b, $rk0  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 0
3299e1051a39Sopenharmony_ci	eor     $res0b, $res0b, $acc_lb                           @ PRE 1
3300e1051a39Sopenharmony_ci
3301e1051a39Sopenharmony_ci	pmull2  $t1.1q, $res1.2d, $h3.2d                          @ GHASH block 4k+1 - high
3302e1051a39Sopenharmony_ci	fmov    $ctr2.d[1], $ctr32x                               @ CTR block 4k+6
3303e1051a39Sopenharmony_ci
3304e1051a39Sopenharmony_ci	aese    $ctr1b, $rk2  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 2
3305e1051a39Sopenharmony_ci	mov     $output_h3, $ctr3.d[1]                            @ AES block 4k+3 - mov high
3306e1051a39Sopenharmony_ci
3307e1051a39Sopenharmony_ci	aese    $ctr0b, $rk1  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 1
3308e1051a39Sopenharmony_ci	mov     $output_l3, $ctr3.d[0]                            @ AES block 4k+3 - mov low
3309e1051a39Sopenharmony_ci
3310e1051a39Sopenharmony_ci	pmull2  $acc_h.1q, $res0.2d, $h4.2d                       @ GHASH block 4k - high
3311e1051a39Sopenharmony_ci	fmov    $ctr3d, $ctr96_b64x                               @ CTR block 4k+7
3312e1051a39Sopenharmony_ci	mov     $t0d, $res0.d[1]                                  @ GHASH block 4k - mid
3313e1051a39Sopenharmony_ci
3314e1051a39Sopenharmony_ci	pmull   $acc_l.1q, $res0.1d, $h4.1d                       @ GHASH block 4k - low
3315e1051a39Sopenharmony_ci	mov     $acc_md, $h34k.d[1]                               @ GHASH block 4k - mid
3316e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                 @ CTR block 4k+7
3317e1051a39Sopenharmony_ci
3318e1051a39Sopenharmony_ci	aese    $ctr2b, $rk0  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 0
3319e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32            @ CTR block 4k+7
3320e1051a39Sopenharmony_ci
3321e1051a39Sopenharmony_ci	fmov    $ctr3.d[1], $ctr32x                               @ CTR block 4k+7
3322e1051a39Sopenharmony_ci	eor     $t0.8b, $t0.8b, $res0.8b                          @ GHASH block 4k - mid
3323e1051a39Sopenharmony_ci	mov     $t3d, $res1.d[1]                                  @ GHASH block 4k+1 - mid
3324e1051a39Sopenharmony_ci
3325e1051a39Sopenharmony_ci	aese    $ctr1b, $rk3  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 3
3326e1051a39Sopenharmony_ci
3327e1051a39Sopenharmony_ci	aese    $ctr0b, $rk2  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 2
3328e1051a39Sopenharmony_ci	eor     $output_h2, $output_h2, $rk12_h                   @ AES block 4k+2 - round 12 high
3329e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
3330e1051a39Sopenharmony_ci	rev     $output_h2, $output_h2
3331e1051a39Sopenharmony_ci#endif
3332e1051a39Sopenharmony_ci	aese    $ctr2b, $rk1  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 1
3333e1051a39Sopenharmony_ci	eor     $t3.8b, $t3.8b, $res1.8b                          @ GHASH block 4k+1 - mid
3334e1051a39Sopenharmony_ci
3335e1051a39Sopenharmony_ci	pmull   $acc_m.1q, $t0.1d, $acc_m.1d                      @ GHASH block 4k - mid
3336e1051a39Sopenharmony_ci
3337e1051a39Sopenharmony_ci	aese    $ctr3b, $rk0  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 0
3338e1051a39Sopenharmony_ci	rev64   $res2b, $res2b                                    @ GHASH block 4k+2
3339e1051a39Sopenharmony_ci
3340e1051a39Sopenharmony_ci	aese    $ctr2b, $rk2  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 2
3341e1051a39Sopenharmony_ci
3342e1051a39Sopenharmony_ci	pmull   $t3.1q, $t3.1d, $h34k.1d                          @ GHASH block 4k+1 - mid
3343e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $t2.16b                         @ GHASH block 4k+1 - low
3344e1051a39Sopenharmony_ci	eor     $output_l2, $output_l2, $rk12_l                   @ AES block 4k+2 - round 12 low
3345e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
3346e1051a39Sopenharmony_ci	rev     $output_l2, $output_l2
3347e1051a39Sopenharmony_ci#endif
3348e1051a39Sopenharmony_ci	aese    $ctr1b, $rk4  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 4
3349e1051a39Sopenharmony_ci
3350e1051a39Sopenharmony_ci	aese    $ctr0b, $rk3  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 3
3351e1051a39Sopenharmony_ci
3352e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t3.16b                         @ GHASH block 4k+1 - mid
3353e1051a39Sopenharmony_ci	mov     $t6d, $res2.d[1]                                  @ GHASH block 4k+2 - mid
3354e1051a39Sopenharmony_ci
3355e1051a39Sopenharmony_ci	aese    $ctr3b, $rk1  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 1
3356e1051a39Sopenharmony_ci	eor     $acc_hb, $acc_hb, $t1.16b                         @ GHASH block 4k+1 - high
3357e1051a39Sopenharmony_ci
3358e1051a39Sopenharmony_ci	aese    $ctr0b, $rk4  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 4
3359e1051a39Sopenharmony_ci
3360e1051a39Sopenharmony_ci	pmull2  $t4.1q, $res2.2d, $h2.2d                          @ GHASH block 4k+2 - high
3361e1051a39Sopenharmony_ci	eor     $t6.8b, $t6.8b, $res2.8b                          @ GHASH block 4k+2 - mid
3362e1051a39Sopenharmony_ci
3363e1051a39Sopenharmony_ci	pmull   $t5.1q, $res2.1d, $h2.1d                          @ GHASH block 4k+2 - low
3364e1051a39Sopenharmony_ci
3365e1051a39Sopenharmony_ci	aese    $ctr0b, $rk5  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 5
3366e1051a39Sopenharmony_ci
3367e1051a39Sopenharmony_ci	eor     $acc_hb, $acc_hb, $t4.16b                         @ GHASH block 4k+2 - high
3368e1051a39Sopenharmony_ci	mov     $t9d, $res3.d[1]                                  @ GHASH block 4k+3 - mid
3369e1051a39Sopenharmony_ci
3370e1051a39Sopenharmony_ci	aese    $ctr1b, $rk5  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 5
3371e1051a39Sopenharmony_ci
3372e1051a39Sopenharmony_ci	pmull2  $t7.1q, $res3.2d, $h1.2d                          @ GHASH block 4k+3 - high
3373e1051a39Sopenharmony_ci
3374e1051a39Sopenharmony_ci	aese    $ctr3b, $rk2  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 2
3375e1051a39Sopenharmony_ci	eor     $t9.8b, $t9.8b, $res3.8b                          @ GHASH block 4k+3 - mid
3376e1051a39Sopenharmony_ci
3377e1051a39Sopenharmony_ci	aese    $ctr1b, $rk6  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 6
3378e1051a39Sopenharmony_ci
3379e1051a39Sopenharmony_ci	aese    $ctr0b, $rk6  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 6
3380e1051a39Sopenharmony_ci	ins     $t6.d[1], $t6.d[0]                                @ GHASH block 4k+2 - mid
3381e1051a39Sopenharmony_ci
3382e1051a39Sopenharmony_ci	aese    $ctr3b, $rk3  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 3
3383e1051a39Sopenharmony_ci
3384e1051a39Sopenharmony_ci	pmull   $t9.1q, $t9.1d, $h12k.1d                          @ GHASH block 4k+3 - mid
3385e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $t5.16b                         @ GHASH block 4k+2 - low
3386e1051a39Sopenharmony_ci
3387e1051a39Sopenharmony_ci	aese    $ctr0b, $rk7  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 7
3388e1051a39Sopenharmony_ci
3389e1051a39Sopenharmony_ci	pmull2  $t6.1q, $t6.2d, $h12k.2d                          @ GHASH block 4k+2 - mid
3390e1051a39Sopenharmony_ci	eor     $acc_hb, $acc_hb, $t7.16b                         @ GHASH block 4k+3 - high
3391e1051a39Sopenharmony_ci
3392e1051a39Sopenharmony_ci	aese    $ctr1b, $rk7  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 7
3393e1051a39Sopenharmony_ci
3394e1051a39Sopenharmony_ci	aese    $ctr0b, $rk8  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 8
3395e1051a39Sopenharmony_ci	movi    $mod_constant.8b, #0xc2
3396e1051a39Sopenharmony_ci
3397e1051a39Sopenharmony_ci	pmull   $t8.1q, $res3.1d, $h1.1d                          @ GHASH block 4k+3 - low
3398e1051a39Sopenharmony_ci
3399e1051a39Sopenharmony_ci	aese    $ctr1b, $rk8  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 8
3400e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t6.16b                         @ GHASH block 4k+2 - mid
3401e1051a39Sopenharmony_ci
3402e1051a39Sopenharmony_ci	aese    $ctr2b, $rk3  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 3
3403e1051a39Sopenharmony_ci
3404e1051a39Sopenharmony_ci	aese    $ctr0b, $rk9  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 9
3405e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $t8.16b                         @ GHASH block 4k+3 - low
3406e1051a39Sopenharmony_ci
3407e1051a39Sopenharmony_ci	aese    $ctr3b, $rk4  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 4
3408e1051a39Sopenharmony_ci
3409e1051a39Sopenharmony_ci	aese    $ctr2b, $rk4  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 4
3410e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t9.16b                         @ GHASH block 4k+3 - mid
3411e1051a39Sopenharmony_ci
3412e1051a39Sopenharmony_ci	aese    $ctr0b, $rk10 \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 10
3413e1051a39Sopenharmony_ci
3414e1051a39Sopenharmony_ci	aese    $ctr1b, $rk9  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 9
3415e1051a39Sopenharmony_ci	eor     $t9.16b, $acc_lb, $acc_hb                         @ MODULO - karatsuba tidy up
3416e1051a39Sopenharmony_ci
3417e1051a39Sopenharmony_ci	aese    $ctr2b, $rk5  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 5
3418e1051a39Sopenharmony_ci
3419e1051a39Sopenharmony_ci	aese    $ctr3b, $rk5  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 5
3420e1051a39Sopenharmony_ci	shl     $mod_constantd, $mod_constantd, #56               @ mod_constant
3421e1051a39Sopenharmony_ci
3422e1051a39Sopenharmony_ci	aese    $ctr1b, $rk10 \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 10
3423e1051a39Sopenharmony_ci
3424e1051a39Sopenharmony_ci	aese    $ctr2b, $rk6  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 6
3425e1051a39Sopenharmony_ci	ld1     {$res0b}, [$input_ptr], #16                       @ AES block 4k+4 - load ciphertext
3426e1051a39Sopenharmony_ci
3427e1051a39Sopenharmony_ci	aese    $ctr3b, $rk6  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 6
3428e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t9.16b                         @ MODULO - karatsuba tidy up
3429e1051a39Sopenharmony_ci
3430e1051a39Sopenharmony_ci	pmull   $mod_t.1q, $acc_h.1d, $mod_constant.1d            @ MODULO - top 64b align with mid
3431e1051a39Sopenharmony_ci	ld1     {$res1b}, [$input_ptr], #16                       @ AES block 4k+5 - load ciphertext
3432e1051a39Sopenharmony_ci	eor     $output_l3, $output_l3, $rk12_l                   @ AES block 4k+3 - round 12 low
3433e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
3434e1051a39Sopenharmony_ci	rev     $output_l3, $output_l3
3435e1051a39Sopenharmony_ci#endif
3436e1051a39Sopenharmony_ci	aese    $ctr2b, $rk7  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 7
3437e1051a39Sopenharmony_ci	ext     $acc_hb, $acc_hb, $acc_hb, #8                     @ MODULO - other top alignment
3438e1051a39Sopenharmony_ci
3439e1051a39Sopenharmony_ci	aese    $ctr0b, $rk11                                     @ AES block 4k+4 - round 11
3440e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ CTR block 4k+7
3441e1051a39Sopenharmony_ci
3442e1051a39Sopenharmony_ci	aese    $ctr3b, $rk7  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 7
3443e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $mod_t.16b                      @ MODULO - fold into mid
3444e1051a39Sopenharmony_ci
3445e1051a39Sopenharmony_ci	aese    $ctr2b, $rk8  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 8
3446e1051a39Sopenharmony_ci	ld1     {$res2b}, [$input_ptr], #16                       @ AES block 4k+6 - load ciphertext
3447e1051a39Sopenharmony_ci
3448e1051a39Sopenharmony_ci	aese    $ctr1b, $rk11                                     @ AES block 4k+5 - round 11
3449e1051a39Sopenharmony_ci	ld1     {$res3b}, [$input_ptr], #16                       @ AES block 4k+7 - load ciphertext
3450e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                 @ CTR block 4k+8
3451e1051a39Sopenharmony_ci
3452e1051a39Sopenharmony_ci	aese    $ctr3b, $rk8  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 8
3453e1051a39Sopenharmony_ci	stp     $output_l2, $output_h2, [$output_ptr], #16        @ AES block 4k+2 - store result
3454e1051a39Sopenharmony_ci
3455e1051a39Sopenharmony_ci	aese    $ctr2b, $rk9  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 9
3456e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $acc_hb                         @ MODULO - fold into mid
3457e1051a39Sopenharmony_ci
3458e1051a39Sopenharmony_ci	cmp     $input_ptr, $main_end_input_ptr                   @ LOOP CONTROL
3459e1051a39Sopenharmony_ci
3460e1051a39Sopenharmony_ci	eor     $ctr0b, $res0b, $ctr0b                            @ AES block 4k+4 - result
3461e1051a39Sopenharmony_ci	eor     $output_h3, $output_h3, $rk12_h                   @ AES block 4k+3 - round 12 high
3462e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
3463e1051a39Sopenharmony_ci	rev     $output_h3, $output_h3
3464e1051a39Sopenharmony_ci#endif
3465e1051a39Sopenharmony_ci	eor     $ctr1b, $res1b, $ctr1b                            @ AES block 4k+5 - result
3466e1051a39Sopenharmony_ci
3467e1051a39Sopenharmony_ci	aese    $ctr2b, $rk10 \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 10
3468e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32            @ CTR block 4k+8
3469e1051a39Sopenharmony_ci
3470e1051a39Sopenharmony_ci	aese    $ctr3b, $rk9  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 9
3471e1051a39Sopenharmony_ci
3472e1051a39Sopenharmony_ci	pmull   $mod_constant.1q, $acc_m.1d, $mod_constant.1d     @ MODULO - mid 64b align with low
3473e1051a39Sopenharmony_ci	mov     $output_l1, $ctr1.d[0]                            @ AES block 4k+5 - mov low
3474e1051a39Sopenharmony_ci
3475e1051a39Sopenharmony_ci	mov     $output_l0, $ctr0.d[0]                            @ AES block 4k+4 - mov low
3476e1051a39Sopenharmony_ci	stp     $output_l3, $output_h3, [$output_ptr], #16        @ AES block 4k+3 - store result
3477e1051a39Sopenharmony_ci	rev64   $res1b, $res1b                                    @ GHASH block 4k+5
3478e1051a39Sopenharmony_ci
3479e1051a39Sopenharmony_ci	aese    $ctr2b, $rk11                                     @ AES block 4k+6 - round 11
3480e1051a39Sopenharmony_ci	mov     $output_h0, $ctr0.d[1]                            @ AES block 4k+4 - mov high
3481e1051a39Sopenharmony_ci
3482e1051a39Sopenharmony_ci	aese    $ctr3b, $rk10 \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 10
3483e1051a39Sopenharmony_ci	mov     $output_h1, $ctr1.d[1]                            @ AES block 4k+5 - mov high
3484e1051a39Sopenharmony_ci
3485e1051a39Sopenharmony_ci	fmov    $ctr0d, $ctr96_b64x                               @ CTR block 4k+8
3486e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ CTR block 4k+8
3487e1051a39Sopenharmony_ci	ext     $acc_mb, $acc_mb, $acc_mb, #8                     @ MODULO - other mid alignment
3488e1051a39Sopenharmony_ci
3489e1051a39Sopenharmony_ci	eor     $ctr2b, $res2b, $ctr2b                            @ AES block 4k+6 - result
3490e1051a39Sopenharmony_ci	fmov    $ctr0.d[1], $ctr32x                               @ CTR block 4k+8
3491e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                 @ CTR block 4k+9
3492e1051a39Sopenharmony_ci
3493e1051a39Sopenharmony_ci	eor     $output_l0, $output_l0, $rk12_l                   @ AES block 4k+4 - round 12 low
3494e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
3495e1051a39Sopenharmony_ci	rev     $output_l0, $output_l0
3496e1051a39Sopenharmony_ci#endif
3497e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32            @ CTR block 4k+9
3498e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $mod_constant.16b               @ MODULO - fold into low
3499e1051a39Sopenharmony_ci
3500e1051a39Sopenharmony_ci	fmov    $ctr1d, $ctr96_b64x                               @ CTR block 4k+9
3501e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ CTR block 4k+9
3502e1051a39Sopenharmony_ci	eor     $output_l1, $output_l1, $rk12_l                   @ AES block 4k+5 - round 12 low
3503e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
3504e1051a39Sopenharmony_ci	rev     $output_l1, $output_l1
3505e1051a39Sopenharmony_ci#endif
3506e1051a39Sopenharmony_ci	fmov    $ctr1.d[1], $ctr32x                               @ CTR block 4k+9
3507e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                 @ CTR block 4k+10
3508e1051a39Sopenharmony_ci	eor     $output_h1, $output_h1, $rk12_h                   @ AES block 4k+5 - round 12 high
3509e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
3510e1051a39Sopenharmony_ci	rev     $output_h1, $output_h1
3511e1051a39Sopenharmony_ci#endif
3512e1051a39Sopenharmony_ci	eor     $output_h0, $output_h0, $rk12_h                   @ AES block 4k+4 - round 12 high
3513e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
3514e1051a39Sopenharmony_ci	rev     $output_h0, $output_h0
3515e1051a39Sopenharmony_ci#endif
3516e1051a39Sopenharmony_ci	stp     $output_l0, $output_h0, [$output_ptr], #16        @ AES block 4k+4 - store result
3517e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $acc_mb                         @ MODULO - fold into low
3518e1051a39Sopenharmony_ci
3519e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ CTR block 4k+10
3520e1051a39Sopenharmony_ci	rev64   $res0b, $res0b                                    @ GHASH block 4k+4
3521e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32            @ CTR block 4k+10
3522e1051a39Sopenharmony_ci
3523e1051a39Sopenharmony_ci	aese    $ctr3b, $rk11                                     @ AES block 4k+7 - round 11
3524e1051a39Sopenharmony_ci	stp     $output_l1, $output_h1, [$output_ptr], #16        @ AES block 4k+5 - store result
3525e1051a39Sopenharmony_ci	b.lt    .L192_dec_main_loop
3526e1051a39Sopenharmony_ci
3527e1051a39Sopenharmony_ci	.L192_dec_prepretail:                                     @ PREPRETAIL
3528e1051a39Sopenharmony_ci	mov     $output_h2, $ctr2.d[1]                            @ AES block 4k+2 - mov high
3529e1051a39Sopenharmony_ci	ext     $acc_lb, $acc_lb, $acc_lb, #8                     @ PRE 0
3530e1051a39Sopenharmony_ci	eor     $ctr3b, $res3b, $ctr3b                            @ AES block 4k+3 - result
3531e1051a39Sopenharmony_ci
3532e1051a39Sopenharmony_ci	aese    $ctr1b, $rk0  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 0
3533e1051a39Sopenharmony_ci	mov     $output_l2, $ctr2.d[0]                            @ AES block 4k+2 - mov low
3534e1051a39Sopenharmony_ci
3535e1051a39Sopenharmony_ci	aese    $ctr0b, $rk0  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 0
3536e1051a39Sopenharmony_ci	mov     $acc_md, $h34k.d[1]                               @ GHASH block 4k - mid
3537e1051a39Sopenharmony_ci
3538e1051a39Sopenharmony_ci	eor     $res0b, $res0b, $acc_lb                           @ PRE 1
3539e1051a39Sopenharmony_ci	fmov    $ctr2d, $ctr96_b64x                               @ CTR block 4k+6
3540e1051a39Sopenharmony_ci
3541e1051a39Sopenharmony_ci	aese    $ctr1b, $rk1  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 1
3542e1051a39Sopenharmony_ci	mov     $output_l3, $ctr3.d[0]                            @ AES block 4k+3 - mov low
3543e1051a39Sopenharmony_ci
3544e1051a39Sopenharmony_ci	aese    $ctr0b, $rk1  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 1
3545e1051a39Sopenharmony_ci	mov     $output_h3, $ctr3.d[1]                            @ AES block 4k+3 - mov high
3546e1051a39Sopenharmony_ci
3547e1051a39Sopenharmony_ci	pmull   $acc_l.1q, $res0.1d, $h4.1d                       @ GHASH block 4k - low
3548e1051a39Sopenharmony_ci	mov     $t0d, $res0.d[1]                                  @ GHASH block 4k - mid
3549e1051a39Sopenharmony_ci	fmov    $ctr3d, $ctr96_b64x                               @ CTR block 4k+7
3550e1051a39Sopenharmony_ci
3551e1051a39Sopenharmony_ci	aese    $ctr1b, $rk2  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 2
3552e1051a39Sopenharmony_ci	rev64   $res2b, $res2b                                    @ GHASH block 4k+2
3553e1051a39Sopenharmony_ci
3554e1051a39Sopenharmony_ci	pmull2  $acc_h.1q, $res0.2d, $h4.2d                       @ GHASH block 4k - high
3555e1051a39Sopenharmony_ci	fmov    $ctr2.d[1], $ctr32x                               @ CTR block 4k+6
3556e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                 @ CTR block 4k+7
3557e1051a39Sopenharmony_ci
3558e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32            @ CTR block 4k+7
3559e1051a39Sopenharmony_ci	eor     $t0.8b, $t0.8b, $res0.8b                          @ GHASH block 4k - mid
3560e1051a39Sopenharmony_ci	mov     $t3d, $res1.d[1]                                  @ GHASH block 4k+1 - mid
3561e1051a39Sopenharmony_ci
3562e1051a39Sopenharmony_ci	pmull   $t2.1q, $res1.1d, $h3.1d                          @ GHASH block 4k+1 - low
3563e1051a39Sopenharmony_ci	eor     $output_h3, $output_h3, $rk12_h                   @ AES block 4k+3 - round 12 high
3564e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
3565e1051a39Sopenharmony_ci	rev     $output_h3, $output_h3
3566e1051a39Sopenharmony_ci#endif
3567e1051a39Sopenharmony_ci	fmov    $ctr3.d[1], $ctr32x                               @ CTR block 4k+7
3568e1051a39Sopenharmony_ci
3569e1051a39Sopenharmony_ci	aese    $ctr0b, $rk2  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 2
3570e1051a39Sopenharmony_ci	eor     $output_l2, $output_l2, $rk12_l                   @ AES block 4k+2 - round 12 low
3571e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
3572e1051a39Sopenharmony_ci	rev     $output_l2, $output_l2
3573e1051a39Sopenharmony_ci#endif
3574e1051a39Sopenharmony_ci	pmull2  $t1.1q, $res1.2d, $h3.2d                          @ GHASH block 4k+1 - high
3575e1051a39Sopenharmony_ci	eor     $output_h2, $output_h2, $rk12_h                   @ AES block 4k+2 - round 12 high
3576e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
3577e1051a39Sopenharmony_ci	rev     $output_h2, $output_h2
3578e1051a39Sopenharmony_ci#endif
3579e1051a39Sopenharmony_ci	eor     $t3.8b, $t3.8b, $res1.8b                          @ GHASH block 4k+1 - mid
3580e1051a39Sopenharmony_ci
3581e1051a39Sopenharmony_ci	pmull   $acc_m.1q, $t0.1d, $acc_m.1d                      @ GHASH block 4k - mid
3582e1051a39Sopenharmony_ci	eor     $output_l3, $output_l3, $rk12_l                   @ AES block 4k+3 - round 12 low
3583e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
3584e1051a39Sopenharmony_ci	rev     $output_l3, $output_l3
3585e1051a39Sopenharmony_ci#endif
3586e1051a39Sopenharmony_ci	stp     $output_l2, $output_h2, [$output_ptr], #16        @ AES block 4k+2 - store result
3587e1051a39Sopenharmony_ci
3588e1051a39Sopenharmony_ci	rev64   $res3b, $res3b                                    @ GHASH block 4k+3
3589e1051a39Sopenharmony_ci	stp     $output_l3, $output_h3, [$output_ptr], #16        @ AES block 4k+3 - store result
3590e1051a39Sopenharmony_ci
3591e1051a39Sopenharmony_ci	aese    $ctr3b, $rk0  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 0
3592e1051a39Sopenharmony_ci	eor     $acc_hb, $acc_hb, $t1.16b                         @ GHASH block 4k+1 - high
3593e1051a39Sopenharmony_ci
3594e1051a39Sopenharmony_ci	pmull   $t3.1q, $t3.1d, $h34k.1d                          @ GHASH block 4k+1 - mid
3595e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ CTR block 4k+7
3596e1051a39Sopenharmony_ci
3597e1051a39Sopenharmony_ci	pmull2  $t4.1q, $res2.2d, $h2.2d                          @ GHASH block 4k+2 - high
3598e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $t2.16b                         @ GHASH block 4k+1 - low
3599e1051a39Sopenharmony_ci
3600e1051a39Sopenharmony_ci	aese    $ctr2b, $rk0  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 0
3601e1051a39Sopenharmony_ci
3602e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t3.16b                         @ GHASH block 4k+1 - mid
3603e1051a39Sopenharmony_ci	mov     $t6d, $res2.d[1]                                  @ GHASH block 4k+2 - mid
3604e1051a39Sopenharmony_ci
3605e1051a39Sopenharmony_ci	aese    $ctr3b, $rk1  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 1
3606e1051a39Sopenharmony_ci
3607e1051a39Sopenharmony_ci	aese    $ctr2b, $rk1  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 1
3608e1051a39Sopenharmony_ci	eor     $acc_hb, $acc_hb, $t4.16b                         @ GHASH block 4k+2 - high
3609e1051a39Sopenharmony_ci
3610e1051a39Sopenharmony_ci	eor     $t6.8b, $t6.8b, $res2.8b                          @ GHASH block 4k+2 - mid
3611e1051a39Sopenharmony_ci
3612e1051a39Sopenharmony_ci	pmull   $t5.1q, $res2.1d, $h2.1d                          @ GHASH block 4k+2 - low
3613e1051a39Sopenharmony_ci
3614e1051a39Sopenharmony_ci	aese    $ctr2b, $rk2  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 2
3615e1051a39Sopenharmony_ci	mov     $t9d, $res3.d[1]                                  @ GHASH block 4k+3 - mid
3616e1051a39Sopenharmony_ci
3617e1051a39Sopenharmony_ci	aese    $ctr3b, $rk2  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 2
3618e1051a39Sopenharmony_ci	ins     $t6.d[1], $t6.d[0]                                @ GHASH block 4k+2 - mid
3619e1051a39Sopenharmony_ci
3620e1051a39Sopenharmony_ci	pmull   $t8.1q, $res3.1d, $h1.1d                          @ GHASH block 4k+3 - low
3621e1051a39Sopenharmony_ci
3622e1051a39Sopenharmony_ci	aese    $ctr0b, $rk3  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 3
3623e1051a39Sopenharmony_ci	eor     $t9.8b, $t9.8b, $res3.8b                          @ GHASH block 4k+3 - mid
3624e1051a39Sopenharmony_ci
3625e1051a39Sopenharmony_ci	aese    $ctr1b, $rk3  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 3
3626e1051a39Sopenharmony_ci
3627e1051a39Sopenharmony_ci	pmull2  $t6.1q, $t6.2d, $h12k.2d                          @ GHASH block 4k+2 - mid
3628e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $t5.16b                         @ GHASH block 4k+2 - low
3629e1051a39Sopenharmony_ci
3630e1051a39Sopenharmony_ci	aese    $ctr0b, $rk4  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 4
3631e1051a39Sopenharmony_ci
3632e1051a39Sopenharmony_ci	pmull2  $t7.1q, $res3.2d, $h1.2d                          @ GHASH block 4k+3 - high
3633e1051a39Sopenharmony_ci	movi    $mod_constant.8b, #0xc2
3634e1051a39Sopenharmony_ci
3635e1051a39Sopenharmony_ci	pmull   $t9.1q, $t9.1d, $h12k.1d                          @ GHASH block 4k+3 - mid
3636e1051a39Sopenharmony_ci
3637e1051a39Sopenharmony_ci	aese    $ctr2b, $rk3  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 3
3638e1051a39Sopenharmony_ci
3639e1051a39Sopenharmony_ci	shl     $mod_constantd, $mod_constantd, #56               @ mod_constant
3640e1051a39Sopenharmony_ci	eor     $acc_hb, $acc_hb, $t7.16b                         @ GHASH block 4k+3 - high
3641e1051a39Sopenharmony_ci
3642e1051a39Sopenharmony_ci	aese    $ctr0b, $rk5  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 5
3643e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t6.16b                         @ GHASH block 4k+2 - mid
3644e1051a39Sopenharmony_ci
3645e1051a39Sopenharmony_ci	aese    $ctr2b, $rk4  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 4
3646e1051a39Sopenharmony_ci
3647e1051a39Sopenharmony_ci	pmull   $mod_t.1q, $acc_h.1d, $mod_constant.1d            @ MODULO - top 64b align with mid
3648e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $t8.16b                         @ GHASH block 4k+3 - low
3649e1051a39Sopenharmony_ci
3650e1051a39Sopenharmony_ci	aese    $ctr0b, $rk6  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 6
3651e1051a39Sopenharmony_ci
3652e1051a39Sopenharmony_ci	aese    $ctr3b, $rk3  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 3
3653e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t9.16b                         @ GHASH block 4k+3 - mid
3654e1051a39Sopenharmony_ci
3655e1051a39Sopenharmony_ci	aese    $ctr2b, $rk5  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 5
3656e1051a39Sopenharmony_ci
3657e1051a39Sopenharmony_ci	aese    $ctr0b, $rk7  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 7
3658e1051a39Sopenharmony_ci	eor     $t9.16b, $acc_lb, $acc_hb                         @ MODULO - karatsuba tidy up
3659e1051a39Sopenharmony_ci
3660e1051a39Sopenharmony_ci	aese    $ctr3b, $rk4  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 4
3661e1051a39Sopenharmony_ci
3662e1051a39Sopenharmony_ci	aese    $ctr2b, $rk6  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 6
3663e1051a39Sopenharmony_ci	ext     $acc_hb, $acc_hb, $acc_hb, #8                     @ MODULO - other top alignment
3664e1051a39Sopenharmony_ci
3665e1051a39Sopenharmony_ci	aese    $ctr0b, $rk8  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 8
3666e1051a39Sopenharmony_ci
3667e1051a39Sopenharmony_ci	aese    $ctr3b, $rk5  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 5
3668e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t9.16b                         @ MODULO - karatsuba tidy up
3669e1051a39Sopenharmony_ci
3670e1051a39Sopenharmony_ci	aese    $ctr1b, $rk4  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 4
3671e1051a39Sopenharmony_ci
3672e1051a39Sopenharmony_ci	aese    $ctr2b, $rk7  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 7
3673e1051a39Sopenharmony_ci
3674e1051a39Sopenharmony_ci	aese    $ctr0b, $rk9  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 9
3675e1051a39Sopenharmony_ci
3676e1051a39Sopenharmony_ci	aese    $ctr1b, $rk5  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 5
3677e1051a39Sopenharmony_ci
3678e1051a39Sopenharmony_ci	aese    $ctr3b, $rk6  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 6
3679e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $mod_t.16b                      @ MODULO - fold into mid
3680e1051a39Sopenharmony_ci
3681e1051a39Sopenharmony_ci	aese    $ctr0b, $rk10 \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 10
3682e1051a39Sopenharmony_ci
3683e1051a39Sopenharmony_ci	aese    $ctr1b, $rk6  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 6
3684e1051a39Sopenharmony_ci
3685e1051a39Sopenharmony_ci	aese    $ctr3b, $rk7  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 7
3686e1051a39Sopenharmony_ci
3687e1051a39Sopenharmony_ci	aese    $ctr2b, $rk8  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 8
3688e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $acc_hb                         @ MODULO - fold into mid
3689e1051a39Sopenharmony_ci
3690e1051a39Sopenharmony_ci	aese    $ctr1b, $rk7  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 7
3691e1051a39Sopenharmony_ci
3692e1051a39Sopenharmony_ci	aese    $ctr3b, $rk8  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 8
3693e1051a39Sopenharmony_ci
3694e1051a39Sopenharmony_ci	aese    $ctr2b, $rk9  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 9
3695e1051a39Sopenharmony_ci
3696e1051a39Sopenharmony_ci	aese    $ctr1b, $rk8  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 8
3697e1051a39Sopenharmony_ci
3698e1051a39Sopenharmony_ci	aese    $ctr3b, $rk9  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 9
3699e1051a39Sopenharmony_ci
3700e1051a39Sopenharmony_ci	pmull   $mod_constant.1q, $acc_m.1d, $mod_constant.1d     @ MODULO - mid 64b align with low
3701e1051a39Sopenharmony_ci
3702e1051a39Sopenharmony_ci	aese    $ctr1b, $rk9  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 9
3703e1051a39Sopenharmony_ci
3704e1051a39Sopenharmony_ci	aese    $ctr2b, $rk10 \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 10
3705e1051a39Sopenharmony_ci
3706e1051a39Sopenharmony_ci	aese    $ctr3b, $rk10 \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 10
3707e1051a39Sopenharmony_ci	ext     $acc_mb, $acc_mb, $acc_mb, #8                     @ MODULO - other mid alignment
3708e1051a39Sopenharmony_ci
3709e1051a39Sopenharmony_ci	aese    $ctr1b, $rk10 \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 10
3710e1051a39Sopenharmony_ci
3711e1051a39Sopenharmony_ci	aese    $ctr0b, $rk11
3712e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $mod_constant.16b               @ MODULO - fold into low
3713e1051a39Sopenharmony_ci
3714e1051a39Sopenharmony_ci	aese    $ctr2b, $rk11
3715e1051a39Sopenharmony_ci
3716e1051a39Sopenharmony_ci	aese    $ctr1b, $rk11
3717e1051a39Sopenharmony_ci
3718e1051a39Sopenharmony_ci	aese    $ctr3b, $rk11
3719e1051a39Sopenharmony_ci
3720e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $acc_mb                         @ MODULO - fold into low
3721e1051a39Sopenharmony_ci	.L192_dec_tail:                                           @ TAIL
3722e1051a39Sopenharmony_ci
3723e1051a39Sopenharmony_ci	sub     $main_end_input_ptr, $end_input_ptr, $input_ptr   @ main_end_input_ptr is number of bytes left to process
3724e1051a39Sopenharmony_ci	ld1     { $res1b}, [$input_ptr], #16                      @ AES block 4k+4 - load ciphertext
3725e1051a39Sopenharmony_ci
3726e1051a39Sopenharmony_ci	eor     $ctr0b, $res1b, $ctr0b                            @ AES block 4k+4 - result
3727e1051a39Sopenharmony_ci
3728e1051a39Sopenharmony_ci	mov     $output_h0, $ctr0.d[1]                            @ AES block 4k+4 - mov high
3729e1051a39Sopenharmony_ci
3730e1051a39Sopenharmony_ci	mov     $output_l0, $ctr0.d[0]                            @ AES block 4k+4 - mov low
3731e1051a39Sopenharmony_ci
3732e1051a39Sopenharmony_ci	ext     $t0.16b, $acc_lb, $acc_lb, #8                     @ prepare final partial tag
3733e1051a39Sopenharmony_ci
3734e1051a39Sopenharmony_ci	cmp     $main_end_input_ptr, #48
3735e1051a39Sopenharmony_ci
3736e1051a39Sopenharmony_ci	eor     $output_h0, $output_h0, $rk12_h                   @ AES block 4k+4 - round 12 high
3737e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
3738e1051a39Sopenharmony_ci	rev     $output_h0, $output_h0
3739e1051a39Sopenharmony_ci#endif
3740e1051a39Sopenharmony_ci	eor     $output_l0, $output_l0, $rk12_l                   @ AES block 4k+4 - round 12 low
3741e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
3742e1051a39Sopenharmony_ci	rev     $output_l0, $output_l0
3743e1051a39Sopenharmony_ci#endif
3744e1051a39Sopenharmony_ci	b.gt    .L192_dec_blocks_more_than_3
3745e1051a39Sopenharmony_ci
3746e1051a39Sopenharmony_ci	movi    $acc_l.8b, #0
3747e1051a39Sopenharmony_ci	movi    $acc_h.8b, #0
3748e1051a39Sopenharmony_ci
3749e1051a39Sopenharmony_ci	mov     $ctr3b, $ctr2b
3750e1051a39Sopenharmony_ci	mov     $ctr2b, $ctr1b
3751e1051a39Sopenharmony_ci	sub     $rctr32w, $rctr32w, #1
3752e1051a39Sopenharmony_ci
3753e1051a39Sopenharmony_ci	movi    $acc_m.8b, #0
3754e1051a39Sopenharmony_ci	cmp     $main_end_input_ptr, #32
3755e1051a39Sopenharmony_ci	b.gt    .L192_dec_blocks_more_than_2
3756e1051a39Sopenharmony_ci
3757e1051a39Sopenharmony_ci	mov     $ctr3b, $ctr1b
3758e1051a39Sopenharmony_ci	cmp     $main_end_input_ptr, #16
3759e1051a39Sopenharmony_ci	sub     $rctr32w, $rctr32w, #1
3760e1051a39Sopenharmony_ci
3761e1051a39Sopenharmony_ci	b.gt    .L192_dec_blocks_more_than_1
3762e1051a39Sopenharmony_ci
3763e1051a39Sopenharmony_ci	sub     $rctr32w, $rctr32w, #1
3764e1051a39Sopenharmony_ci	b       .L192_dec_blocks_less_than_1
3765e1051a39Sopenharmony_ci	.L192_dec_blocks_more_than_3:                             @ blocks left >  3
3766e1051a39Sopenharmony_ci	rev64   $res0b, $res1b                                    @ GHASH final-3 block
3767e1051a39Sopenharmony_ci	ld1     { $res1b}, [$input_ptr], #16                      @ AES final-2 block - load ciphertext
3768e1051a39Sopenharmony_ci
3769e1051a39Sopenharmony_ci	stp     $output_l0, $output_h0, [$output_ptr], #16        @ AES final-3 block  - store result
3770e1051a39Sopenharmony_ci
3771e1051a39Sopenharmony_ci	eor     $res0b, $res0b, $t0.16b                           @ feed in partial tag
3772e1051a39Sopenharmony_ci
3773e1051a39Sopenharmony_ci	eor     $ctr0b, $res1b, $ctr1b                            @ AES final-2 block - result
3774e1051a39Sopenharmony_ci
3775e1051a39Sopenharmony_ci	pmull   $acc_l.1q, $res0.1d, $h4.1d                       @ GHASH final-3 block - low
3776e1051a39Sopenharmony_ci	mov     $output_l0, $ctr0.d[0]                            @ AES final-2 block - mov low
3777e1051a39Sopenharmony_ci	mov     $rk4d, $res0.d[1]                                 @ GHASH final-3 block - mid
3778e1051a39Sopenharmony_ci
3779e1051a39Sopenharmony_ci	mov     $output_h0, $ctr0.d[1]                            @ AES final-2 block - mov high
3780e1051a39Sopenharmony_ci
3781e1051a39Sopenharmony_ci	mov     $acc_md, $h34k.d[1]                               @ GHASH final-3 block - mid
3782e1051a39Sopenharmony_ci	eor     $rk4v.8b, $rk4v.8b, $res0.8b                      @ GHASH final-3 block - mid
3783e1051a39Sopenharmony_ci
3784e1051a39Sopenharmony_ci	pmull2  $acc_h.1q, $res0.2d, $h4.2d                       @ GHASH final-3 block - high
3785e1051a39Sopenharmony_ci
3786e1051a39Sopenharmony_ci	eor     $output_l0, $output_l0, $rk12_l                   @ AES final-2 block - round 12 low
3787e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
3788e1051a39Sopenharmony_ci	rev     $output_l0, $output_l0
3789e1051a39Sopenharmony_ci#endif
3790e1051a39Sopenharmony_ci	movi    $t0.8b, #0                                        @ suppress further partial tag feed in
3791e1051a39Sopenharmony_ci
3792e1051a39Sopenharmony_ci	pmull   $acc_m.1q, $rk4v.1d, $acc_m.1d                    @ GHASH final-3 block - mid
3793e1051a39Sopenharmony_ci	eor     $output_h0, $output_h0, $rk12_h                   @ AES final-2 block - round 12 high
3794e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
3795e1051a39Sopenharmony_ci	rev     $output_h0, $output_h0
3796e1051a39Sopenharmony_ci#endif
3797e1051a39Sopenharmony_ci	.L192_dec_blocks_more_than_2:                             @ blocks left >  2
3798e1051a39Sopenharmony_ci
3799e1051a39Sopenharmony_ci	rev64   $res0b, $res1b                                    @ GHASH final-2 block
3800e1051a39Sopenharmony_ci	ld1     { $res1b}, [$input_ptr], #16                      @ AES final-1 block - load ciphertext
3801e1051a39Sopenharmony_ci
3802e1051a39Sopenharmony_ci	eor     $res0b, $res0b, $t0.16b                           @ feed in partial tag
3803e1051a39Sopenharmony_ci
3804e1051a39Sopenharmony_ci	movi    $t0.8b, #0                                        @ suppress further partial tag feed in
3805e1051a39Sopenharmony_ci
3806e1051a39Sopenharmony_ci	eor     $ctr0b, $res1b, $ctr2b                            @ AES final-1 block - result
3807e1051a39Sopenharmony_ci
3808e1051a39Sopenharmony_ci	mov     $rk4d, $res0.d[1]                                 @ GHASH final-2 block - mid
3809e1051a39Sopenharmony_ci
3810e1051a39Sopenharmony_ci	pmull   $rk3q1, $res0.1d, $h3.1d                          @ GHASH final-2 block - low
3811e1051a39Sopenharmony_ci
3812e1051a39Sopenharmony_ci	stp     $output_l0, $output_h0, [$output_ptr], #16        @ AES final-2 block  - store result
3813e1051a39Sopenharmony_ci
3814e1051a39Sopenharmony_ci	eor     $rk4v.8b, $rk4v.8b, $res0.8b                      @ GHASH final-2 block - mid
3815e1051a39Sopenharmony_ci	mov     $output_h0, $ctr0.d[1]                            @ AES final-1 block - mov high
3816e1051a39Sopenharmony_ci
3817e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $rk3                            @ GHASH final-2 block - low
3818e1051a39Sopenharmony_ci	mov     $output_l0, $ctr0.d[0]                            @ AES final-1 block - mov low
3819e1051a39Sopenharmony_ci
3820e1051a39Sopenharmony_ci	pmull2  $rk2q1, $res0.2d, $h3.2d                          @ GHASH final-2 block - high
3821e1051a39Sopenharmony_ci
3822e1051a39Sopenharmony_ci	pmull   $rk4v.1q, $rk4v.1d, $h34k.1d                      @ GHASH final-2 block - mid
3823e1051a39Sopenharmony_ci
3824e1051a39Sopenharmony_ci	eor     $acc_hb, $acc_hb, $rk2                            @ GHASH final-2 block - high
3825e1051a39Sopenharmony_ci	eor     $output_h0, $output_h0, $rk12_h                   @ AES final-1 block - round 12 high
3826e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
3827e1051a39Sopenharmony_ci	rev     $output_h0, $output_h0
3828e1051a39Sopenharmony_ci#endif
3829e1051a39Sopenharmony_ci	eor     $output_l0, $output_l0, $rk12_l                   @ AES final-1 block - round 12 low
3830e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
3831e1051a39Sopenharmony_ci	rev     $output_l0, $output_l0
3832e1051a39Sopenharmony_ci#endif
3833e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $rk4v.16b                       @ GHASH final-2 block - mid
3834e1051a39Sopenharmony_ci	.L192_dec_blocks_more_than_1:                             @ blocks left >  1
3835e1051a39Sopenharmony_ci
3836e1051a39Sopenharmony_ci	rev64   $res0b, $res1b                                    @ GHASH final-1 block
3837e1051a39Sopenharmony_ci
3838e1051a39Sopenharmony_ci	eor     $res0b, $res0b, $t0.16b                           @ feed in partial tag
3839e1051a39Sopenharmony_ci	ld1     { $res1b}, [$input_ptr], #16                      @ AES final block - load ciphertext
3840e1051a39Sopenharmony_ci
3841e1051a39Sopenharmony_ci	mov     $rk4d, $res0.d[1]                                 @ GHASH final-1 block - mid
3842e1051a39Sopenharmony_ci
3843e1051a39Sopenharmony_ci	pmull2  $rk2q1, $res0.2d, $h2.2d                          @ GHASH final-1 block - high
3844e1051a39Sopenharmony_ci
3845e1051a39Sopenharmony_ci	eor     $ctr0b, $res1b, $ctr3b                            @ AES final block - result
3846e1051a39Sopenharmony_ci	stp     $output_l0, $output_h0, [$output_ptr], #16        @ AES final-1 block  - store result
3847e1051a39Sopenharmony_ci
3848e1051a39Sopenharmony_ci	eor     $rk4v.8b, $rk4v.8b, $res0.8b                      @ GHASH final-1 block - mid
3849e1051a39Sopenharmony_ci
3850e1051a39Sopenharmony_ci	eor     $acc_hb, $acc_hb, $rk2                            @ GHASH final-1 block - high
3851e1051a39Sopenharmony_ci
3852e1051a39Sopenharmony_ci	pmull   $rk3q1, $res0.1d, $h2.1d                          @ GHASH final-1 block - low
3853e1051a39Sopenharmony_ci	mov     $output_h0, $ctr0.d[1]                            @ AES final block - mov high
3854e1051a39Sopenharmony_ci
3855e1051a39Sopenharmony_ci	ins     $rk4v.d[1], $rk4v.d[0]                            @ GHASH final-1 block - mid
3856e1051a39Sopenharmony_ci	mov     $output_l0, $ctr0.d[0]                            @ AES final block - mov low
3857e1051a39Sopenharmony_ci
3858e1051a39Sopenharmony_ci	pmull2  $rk4v.1q, $rk4v.2d, $h12k.2d                      @ GHASH final-1 block - mid
3859e1051a39Sopenharmony_ci
3860e1051a39Sopenharmony_ci	movi    $t0.8b, #0                                        @ suppress further partial tag feed in
3861e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $rk3                            @ GHASH final-1 block - low
3862e1051a39Sopenharmony_ci	eor     $output_h0, $output_h0, $rk12_h                   @ AES final block - round 12 high
3863e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
3864e1051a39Sopenharmony_ci	rev     $output_h0, $output_h0
3865e1051a39Sopenharmony_ci#endif
3866e1051a39Sopenharmony_ci	eor     $output_l0, $output_l0, $rk12_l                   @ AES final block - round 12 low
3867e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
3868e1051a39Sopenharmony_ci	rev     $output_l0, $output_l0
3869e1051a39Sopenharmony_ci#endif
3870e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $rk4v.16b                       @ GHASH final-1 block - mid
3871e1051a39Sopenharmony_ci	.L192_dec_blocks_less_than_1:                             @ blocks left <= 1
3872e1051a39Sopenharmony_ci
3873e1051a39Sopenharmony_ci	mvn     $rk12_l, xzr                                      @ rk12_l = 0xffffffffffffffff
3874e1051a39Sopenharmony_ci	ldp     $end_input_ptr, $main_end_input_ptr, [$output_ptr]  @ load existing bytes we need to not overwrite
3875e1051a39Sopenharmony_ci	and     $bit_length, $bit_length, #127                    @ bit_length %= 128
3876e1051a39Sopenharmony_ci
3877e1051a39Sopenharmony_ci	sub     $bit_length, $bit_length, #128                    @ bit_length -= 128
3878e1051a39Sopenharmony_ci
3879e1051a39Sopenharmony_ci	neg     $bit_length, $bit_length                          @ bit_length = 128 - #bits in input (in range [1,128])
3880e1051a39Sopenharmony_ci
3881e1051a39Sopenharmony_ci	and     $bit_length, $bit_length, #127                    @ bit_length %= 128
3882e1051a39Sopenharmony_ci	mvn     $rk12_h, xzr                                      @ rk12_h = 0xffffffffffffffff
3883e1051a39Sopenharmony_ci
3884e1051a39Sopenharmony_ci	lsr     $rk12_h, $rk12_h, $bit_length                     @ rk12_h is mask for top 64b of last block
3885e1051a39Sopenharmony_ci	cmp     $bit_length, #64
3886e1051a39Sopenharmony_ci
3887e1051a39Sopenharmony_ci	csel    $ctr32x, $rk12_l, $rk12_h, lt
3888e1051a39Sopenharmony_ci	csel    $ctr96_b64x, $rk12_h, xzr, lt
3889e1051a39Sopenharmony_ci
3890e1051a39Sopenharmony_ci	fmov    $ctr0d, $ctr32x                                   @ ctr0b is mask for last block
3891e1051a39Sopenharmony_ci	and     $output_l0, $output_l0, $ctr32x
3892e1051a39Sopenharmony_ci	bic     $end_input_ptr, $end_input_ptr, $ctr32x           @ mask out low existing bytes
3893e1051a39Sopenharmony_ci
3894e1051a39Sopenharmony_ci	orr     $output_l0, $output_l0, $end_input_ptr
3895e1051a39Sopenharmony_ci	mov     $ctr0.d[1], $ctr96_b64x
3896e1051a39Sopenharmony_ci#ifndef __AARCH64EB__
3897e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w
3898e1051a39Sopenharmony_ci#else
3899e1051a39Sopenharmony_ci	mov     $ctr32w, $rctr32w
3900e1051a39Sopenharmony_ci#endif
3901e1051a39Sopenharmony_ci
3902e1051a39Sopenharmony_ci	and     $res1b, $res1b, $ctr0b                            @ possibly partial last block has zeroes in highest bits
3903e1051a39Sopenharmony_ci	str     $ctr32w, [$counter, #12]                          @ store the updated counter
3904e1051a39Sopenharmony_ci
3905e1051a39Sopenharmony_ci	rev64   $res0b, $res1b                                    @ GHASH final block
3906e1051a39Sopenharmony_ci
3907e1051a39Sopenharmony_ci	eor     $res0b, $res0b, $t0.16b                           @ feed in partial tag
3908e1051a39Sopenharmony_ci	bic     $main_end_input_ptr, $main_end_input_ptr, $ctr96_b64x @ mask out high existing bytes
3909e1051a39Sopenharmony_ci
3910e1051a39Sopenharmony_ci	and     $output_h0, $output_h0, $ctr96_b64x
3911e1051a39Sopenharmony_ci
3912e1051a39Sopenharmony_ci	pmull2  $rk2q1, $res0.2d, $h1.2d                          @ GHASH final block - high
3913e1051a39Sopenharmony_ci	mov     $t0d, $res0.d[1]                                  @ GHASH final block - mid
3914e1051a39Sopenharmony_ci
3915e1051a39Sopenharmony_ci	pmull   $rk3q1, $res0.1d, $h1.1d                          @ GHASH final block - low
3916e1051a39Sopenharmony_ci
3917e1051a39Sopenharmony_ci	eor     $t0.8b, $t0.8b, $res0.8b                          @ GHASH final block - mid
3918e1051a39Sopenharmony_ci
3919e1051a39Sopenharmony_ci	eor     $acc_hb, $acc_hb, $rk2                            @ GHASH final block - high
3920e1051a39Sopenharmony_ci
3921e1051a39Sopenharmony_ci	pmull   $t0.1q, $t0.1d, $h12k.1d                          @ GHASH final block - mid
3922e1051a39Sopenharmony_ci
3923e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $rk3                            @ GHASH final block - low
3924e1051a39Sopenharmony_ci
3925e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t0.16b                         @ GHASH final block - mid
3926e1051a39Sopenharmony_ci	movi    $mod_constant.8b, #0xc2
3927e1051a39Sopenharmony_ci
3928e1051a39Sopenharmony_ci	eor     $t9.16b, $acc_lb, $acc_hb                         @ MODULO - karatsuba tidy up
3929e1051a39Sopenharmony_ci
3930e1051a39Sopenharmony_ci	shl     $mod_constantd, $mod_constantd, #56               @ mod_constant
3931e1051a39Sopenharmony_ci
3932e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t9.16b                         @ MODULO - karatsuba tidy up
3933e1051a39Sopenharmony_ci
3934e1051a39Sopenharmony_ci	pmull   $mod_t.1q, $acc_h.1d, $mod_constant.1d            @ MODULO - top 64b align with mid
3935e1051a39Sopenharmony_ci	orr     $output_h0, $output_h0, $main_end_input_ptr
3936e1051a39Sopenharmony_ci	stp     $output_l0, $output_h0, [$output_ptr]
3937e1051a39Sopenharmony_ci
3938e1051a39Sopenharmony_ci	ext     $acc_hb, $acc_hb, $acc_hb, #8                     @ MODULO - other top alignment
3939e1051a39Sopenharmony_ci
3940e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $mod_t.16b                      @ MODULO - fold into mid
3941e1051a39Sopenharmony_ci
3942e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $acc_hb                         @ MODULO - fold into mid
3943e1051a39Sopenharmony_ci
3944e1051a39Sopenharmony_ci	pmull   $mod_constant.1q, $acc_m.1d, $mod_constant.1d     @ MODULO - mid 64b align with low
3945e1051a39Sopenharmony_ci
3946e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $mod_constant.16b               @ MODULO - fold into low
3947e1051a39Sopenharmony_ci
3948e1051a39Sopenharmony_ci	ext     $acc_mb, $acc_mb, $acc_mb, #8                     @ MODULO - other mid alignment
3949e1051a39Sopenharmony_ci
3950e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $acc_mb                         @ MODULO - fold into low
3951e1051a39Sopenharmony_ci	ext     $acc_lb, $acc_lb, $acc_lb, #8
3952e1051a39Sopenharmony_ci	rev64   $acc_lb, $acc_lb
3953e1051a39Sopenharmony_ci	mov     x0, $len
3954e1051a39Sopenharmony_ci	st1     { $acc_l.16b }, [$current_tag]
3955e1051a39Sopenharmony_ci
3956e1051a39Sopenharmony_ci	ldp     x21, x22, [sp, #16]
3957e1051a39Sopenharmony_ci	ldp     x23, x24, [sp, #32]
3958e1051a39Sopenharmony_ci	ldp     d8, d9, [sp, #48]
3959e1051a39Sopenharmony_ci	ldp     d10, d11, [sp, #64]
3960e1051a39Sopenharmony_ci	ldp     d12, d13, [sp, #80]
3961e1051a39Sopenharmony_ci	ldp     d14, d15, [sp, #96]
3962e1051a39Sopenharmony_ci	ldp     x19, x20, [sp], #112
3963e1051a39Sopenharmony_ci	ret
3964e1051a39Sopenharmony_ci
3965e1051a39Sopenharmony_ci.L192_dec_ret:
3966e1051a39Sopenharmony_ci	mov w0, #0x0
3967e1051a39Sopenharmony_ci	ret
3968e1051a39Sopenharmony_ci.size aes_gcm_dec_192_kernel,.-aes_gcm_dec_192_kernel
3969e1051a39Sopenharmony_ci___
3970e1051a39Sopenharmony_ci}
3971e1051a39Sopenharmony_ci
3972e1051a39Sopenharmony_ci{
3973e1051a39Sopenharmony_cimy ($end_input_ptr,$main_end_input_ptr,$input_l0,$input_h0)=map("x$_",(4..7));
3974e1051a39Sopenharmony_cimy ($input_l1,$input_h1,$input_l2,$input_h2,$input_l3,$input_h3)=map("x$_",(19..24));
3975e1051a39Sopenharmony_cimy ($output_l1,$output_h1,$output_l2,$output_h2,$output_l3,$output_h3)=map("x$_",(19..24));
3976e1051a39Sopenharmony_cimy ($output_l0,$output_h0)=map("x$_",(6..7));
3977e1051a39Sopenharmony_ci
3978e1051a39Sopenharmony_cimy $ctr32w="w9";
3979e1051a39Sopenharmony_cimy ($ctr32x,$ctr96_b64x,$ctr96_t32x,$rctr32x,$rk14_l,$rk14_h,$len)=map("x$_",(9..15));
3980e1051a39Sopenharmony_cimy ($ctr96_t32w,$rctr32w)=map("w$_",(11..12));
3981e1051a39Sopenharmony_ci
3982e1051a39Sopenharmony_cimy ($ctr0b,$ctr1b,$ctr2b,$ctr3b,$res0b,$res1b,$res2b,$res3b)=map("v$_.16b",(0..7));
3983e1051a39Sopenharmony_cimy ($ctr0,$ctr1,$ctr2,$ctr3,$res0,$res1,$res2,$res3)=map("v$_",(0..7));
3984e1051a39Sopenharmony_cimy ($ctr0d,$ctr1d,$ctr2d,$ctr3d,$res0d,$res1d,$res2d,$res3d)=map("d$_",(0..7));
3985e1051a39Sopenharmony_cimy ($res0q,$res1q,$res2q,$res3q)=map("q$_",(4..7));
3986e1051a39Sopenharmony_ci
3987e1051a39Sopenharmony_cimy ($acc_hb,$acc_mb,$acc_lb)=map("v$_.16b",(9..11));
3988e1051a39Sopenharmony_cimy ($acc_h,$acc_m,$acc_l)=map("v$_",(9..11));
3989e1051a39Sopenharmony_cimy ($acc_hd,$acc_md,$acc_ld)=map("d$_",(9..11));
3990e1051a39Sopenharmony_ci
3991e1051a39Sopenharmony_cimy ($h1,$h2,$h3,$h4,$h12k,$h34k)=map("v$_",(12..17));
3992e1051a39Sopenharmony_cimy ($h1q,$h2q,$h3q,$h4q)=map("q$_",(12..15));
3993e1051a39Sopenharmony_cimy ($h1b,$h2b,$h3b,$h4b)=map("v$_.16b",(12..15));
3994e1051a39Sopenharmony_ci
3995e1051a39Sopenharmony_cimy $t0="v8";
3996e1051a39Sopenharmony_cimy $t0d="d8";
3997e1051a39Sopenharmony_cimy $t1="v4";
3998e1051a39Sopenharmony_cimy $t1d="d4";
3999e1051a39Sopenharmony_cimy $t2="v8";
4000e1051a39Sopenharmony_cimy $t2d="d8";
4001e1051a39Sopenharmony_cimy $t3="v4";
4002e1051a39Sopenharmony_cimy $t3d="d4";
4003e1051a39Sopenharmony_cimy $t4="v4";
4004e1051a39Sopenharmony_cimy $t4d="d4";
4005e1051a39Sopenharmony_cimy $t5="v5";
4006e1051a39Sopenharmony_cimy $t5d="d5";
4007e1051a39Sopenharmony_cimy $t6="v8";
4008e1051a39Sopenharmony_cimy $t6d="d8";
4009e1051a39Sopenharmony_cimy $t7="v5";
4010e1051a39Sopenharmony_cimy $t7d="d5";
4011e1051a39Sopenharmony_cimy $t8="v6";
4012e1051a39Sopenharmony_cimy $t8d="d6";
4013e1051a39Sopenharmony_cimy $t9="v4";
4014e1051a39Sopenharmony_cimy $t9d="d4";
4015e1051a39Sopenharmony_ci
4016e1051a39Sopenharmony_cimy ($ctr_t0,$ctr_t1,$ctr_t2,$ctr_t3)=map("v$_",(4..7));
4017e1051a39Sopenharmony_cimy ($ctr_t0d,$ctr_t1d,$ctr_t2d,$ctr_t3d)=map("d$_",(4..7));
4018e1051a39Sopenharmony_cimy ($ctr_t0b,$ctr_t1b,$ctr_t2b,$ctr_t3b)=map("v$_.16b",(4..7));
4019e1051a39Sopenharmony_ci
4020e1051a39Sopenharmony_cimy $mod_constantd="d8";
4021e1051a39Sopenharmony_cimy $mod_constant="v8";
4022e1051a39Sopenharmony_cimy $mod_t="v7";
4023e1051a39Sopenharmony_ci
4024e1051a39Sopenharmony_cimy ($rk0,$rk1,$rk2,$rk3,$rk4,$rk5,$rk6,$rk7,$rk8,$rk9,$rk10,$rk11,$rk12,$rk13)=map("v$_.16b",(18..31));
4025e1051a39Sopenharmony_cimy ($rk0s,$rk1s,$rk2s,$rk3s,$rk4s,$rk5s,$rk6s,$rk7s,$rk8s,$rk9s,$rk10s,$rk11s,$rk12s,$rk13s)=map("v$_.4s",(18..31));
4026e1051a39Sopenharmony_cimy ($rk0q,$rk1q,$rk2q,$rk3q,$rk4q,$rk5q,$rk6q,$rk7q,$rk8q,$rk9q,$rk10q,$rk11q,$rk12q,$rk13q)=map("q$_",(18..31));
4027e1051a39Sopenharmony_cimy $rk2q1="v20.1q";
4028e1051a39Sopenharmony_cimy $rk3q1="v21.1q";
4029e1051a39Sopenharmony_cimy $rk4v="v22";
4030e1051a39Sopenharmony_cimy $rk4d="d22";
4031e1051a39Sopenharmony_ci
4032e1051a39Sopenharmony_ci#########################################################################################
4033e1051a39Sopenharmony_ci# size_t aes_gcm_enc_256_kernel(const unsigned char *in,
4034e1051a39Sopenharmony_ci#                               size_t len,
4035e1051a39Sopenharmony_ci#                               unsigned char *out,
4036e1051a39Sopenharmony_ci#                               const void *key,
4037e1051a39Sopenharmony_ci#                               unsigned char ivec[16],
4038e1051a39Sopenharmony_ci#                               u64 *Xi);
4039e1051a39Sopenharmony_ci#
4040e1051a39Sopenharmony_ci$code.=<<___;
4041e1051a39Sopenharmony_ci.global aes_gcm_enc_256_kernel
4042e1051a39Sopenharmony_ci.type   aes_gcm_enc_256_kernel,%function
4043e1051a39Sopenharmony_ci.align  4
4044e1051a39Sopenharmony_ciaes_gcm_enc_256_kernel:
4045e1051a39Sopenharmony_ci	cbz     x1, .L256_enc_ret
4046e1051a39Sopenharmony_ci	stp     x19, x20, [sp, #-112]!
4047e1051a39Sopenharmony_ci	mov     x16, x4
4048e1051a39Sopenharmony_ci	mov     x8, x5
4049e1051a39Sopenharmony_ci	stp     x21, x22, [sp, #16]
4050e1051a39Sopenharmony_ci	stp     x23, x24, [sp, #32]
4051e1051a39Sopenharmony_ci	stp     d8, d9, [sp, #48]
4052e1051a39Sopenharmony_ci	stp     d10, d11, [sp, #64]
4053e1051a39Sopenharmony_ci	stp     d12, d13, [sp, #80]
4054e1051a39Sopenharmony_ci	stp     d14, d15, [sp, #96]
4055e1051a39Sopenharmony_ci
4056e1051a39Sopenharmony_ci	add     $end_input_ptr, $input_ptr, $bit_length, lsr #3   @ end_input_ptr
4057e1051a39Sopenharmony_ci	lsr     $main_end_input_ptr, $bit_length, #3              @ byte_len
4058e1051a39Sopenharmony_ci	mov     $len, $main_end_input_ptr
4059e1051a39Sopenharmony_ci	ldp     $ctr96_b64x, $ctr96_t32x, [$counter]              @ ctr96_b64, ctr96_t32
4060e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
4061e1051a39Sopenharmony_ci	rev     $ctr96_b64x, $ctr96_b64x
4062e1051a39Sopenharmony_ci	rev     $ctr96_t32x, $ctr96_t32x
4063e1051a39Sopenharmony_ci#endif
4064e1051a39Sopenharmony_ci	ldp     $rk14_l, $rk14_h, [$cc, #224]                     @ load rk14
4065e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
4066e1051a39Sopenharmony_ci	ror     $rk14_l, $rk14_l, #32
4067e1051a39Sopenharmony_ci	ror     $rk14_h, $rk14_h, #32
4068e1051a39Sopenharmony_ci#endif
4069e1051a39Sopenharmony_ci	ld1     { $ctr0b}, [$counter]                             @ special case vector load initial counter so we can start first AES block as quickly as possible
4070e1051a39Sopenharmony_ci	sub     $main_end_input_ptr, $main_end_input_ptr, #1      @ byte_len - 1
4071e1051a39Sopenharmony_ci
4072e1051a39Sopenharmony_ci	ld1     {$rk0s}, [$cc], #16                               @ load rk0
4073e1051a39Sopenharmony_ci	and     $main_end_input_ptr, $main_end_input_ptr, #0xffffffffffffffc0 @ number of bytes to be processed in main loop (at least 1 byte must be handled by tail)
4074e1051a39Sopenharmony_ci
4075e1051a39Sopenharmony_ci	ld1     {$rk1s}, [$cc], #16                               @ load rk1
4076e1051a39Sopenharmony_ci	add     $main_end_input_ptr, $main_end_input_ptr, $input_ptr
4077e1051a39Sopenharmony_ci
4078e1051a39Sopenharmony_ci	lsr     $rctr32x, $ctr96_t32x, #32
4079e1051a39Sopenharmony_ci	fmov    $ctr2d, $ctr96_b64x                               @ CTR block 2
4080e1051a39Sopenharmony_ci	orr     $ctr96_t32w, $ctr96_t32w, $ctr96_t32w
4081e1051a39Sopenharmony_ci
4082e1051a39Sopenharmony_ci	rev     $rctr32w, $rctr32w                                @ rev_ctr32
4083e1051a39Sopenharmony_ci	cmp     $input_ptr, $main_end_input_ptr                   @ check if we have <= 4 blocks
4084e1051a39Sopenharmony_ci	fmov    $ctr1d, $ctr96_b64x                               @ CTR block 1
4085e1051a39Sopenharmony_ci
4086e1051a39Sopenharmony_ci	aese    $ctr0b, $rk0  \n  aesmc   $ctr0b, $ctr0b          @ AES block 0 - round 0
4087e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ increment rev_ctr32
4088e1051a39Sopenharmony_ci
4089e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                 @ CTR block 1
4090e1051a39Sopenharmony_ci	fmov    $ctr3d, $ctr96_b64x                               @ CTR block 3
4091e1051a39Sopenharmony_ci
4092e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32            @ CTR block 1
4093e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ CTR block 1
4094e1051a39Sopenharmony_ci	ld1     {$rk2s}, [$cc], #16                               @ load rk2
4095e1051a39Sopenharmony_ci
4096e1051a39Sopenharmony_ci	fmov    $ctr1.d[1], $ctr32x                               @ CTR block 1
4097e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                 @ CTR block 2
4098e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ CTR block 2
4099e1051a39Sopenharmony_ci
4100e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32            @ CTR block 2
4101e1051a39Sopenharmony_ci	ld1     {$rk3s}, [$cc], #16                               @ load rk3
4102e1051a39Sopenharmony_ci
4103e1051a39Sopenharmony_ci	fmov    $ctr2.d[1], $ctr32x                               @ CTR block 2
4104e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                 @ CTR block 3
4105e1051a39Sopenharmony_ci
4106e1051a39Sopenharmony_ci	aese    $ctr0b, $rk1  \n  aesmc   $ctr0b, $ctr0b          @ AES block 0 - round 1
4107e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32            @ CTR block 3
4108e1051a39Sopenharmony_ci
4109e1051a39Sopenharmony_ci	fmov    $ctr3.d[1], $ctr32x                               @ CTR block 3
4110e1051a39Sopenharmony_ci
4111e1051a39Sopenharmony_ci	aese    $ctr1b, $rk0  \n  aesmc   $ctr1b, $ctr1b          @ AES block 1 - round 0
4112e1051a39Sopenharmony_ci	ld1     {$rk4s}, [$cc], #16                               @ load rk4
4113e1051a39Sopenharmony_ci
4114e1051a39Sopenharmony_ci	aese    $ctr0b, $rk2  \n  aesmc   $ctr0b, $ctr0b          @ AES block 0 - round 2
4115e1051a39Sopenharmony_ci	ld1     {$rk5s}, [$cc], #16                               @ load rk5
4116e1051a39Sopenharmony_ci
4117e1051a39Sopenharmony_ci	aese    $ctr2b, $rk0  \n  aesmc   $ctr2b, $ctr2b          @ AES block 2 - round 0
4118e1051a39Sopenharmony_ci	ld1     {$rk6s}, [$cc], #16                               @ load rk6
4119e1051a39Sopenharmony_ci
4120e1051a39Sopenharmony_ci	aese    $ctr1b, $rk1  \n  aesmc   $ctr1b, $ctr1b          @ AES block 1 - round 1
4121e1051a39Sopenharmony_ci	ldr     $h3q, [$current_tag, #80]                         @ load h3l | h3h
4122e1051a39Sopenharmony_ci#ifndef __AARCH64EB__
4123e1051a39Sopenharmony_ci	ext     $h3b, $h3b, $h3b, #8
4124e1051a39Sopenharmony_ci#endif
4125e1051a39Sopenharmony_ci	aese    $ctr3b, $rk0  \n  aesmc   $ctr3b, $ctr3b          @ AES block 3 - round 0
4126e1051a39Sopenharmony_ci	ld1     {$rk7s}, [$cc], #16                               @ load rk7
4127e1051a39Sopenharmony_ci
4128e1051a39Sopenharmony_ci	aese    $ctr2b, $rk1  \n  aesmc   $ctr2b, $ctr2b          @ AES block 2 - round 1
4129e1051a39Sopenharmony_ci	ld1     {$rk8s}, [$cc], #16                               @ load rk8
4130e1051a39Sopenharmony_ci
4131e1051a39Sopenharmony_ci	aese    $ctr1b, $rk2  \n  aesmc   $ctr1b, $ctr1b          @ AES block 1 - round 2
4132e1051a39Sopenharmony_ci	ldr     $h2q, [$current_tag, #64]                         @ load h2l | h2h
4133e1051a39Sopenharmony_ci#ifndef __AARCH64EB__
4134e1051a39Sopenharmony_ci	ext     $h2b, $h2b, $h2b, #8
4135e1051a39Sopenharmony_ci#endif
4136e1051a39Sopenharmony_ci	aese    $ctr3b, $rk1  \n  aesmc   $ctr3b, $ctr3b          @ AES block 3 - round 1
4137e1051a39Sopenharmony_ci	ld1     {$rk9s}, [$cc], #16                               @ load rk9
4138e1051a39Sopenharmony_ci
4139e1051a39Sopenharmony_ci	aese    $ctr2b, $rk2  \n  aesmc   $ctr2b, $ctr2b          @ AES block 2 - round 2
4140e1051a39Sopenharmony_ci	ldr     $h4q, [$current_tag, #112]                        @ load h4l | h4h
4141e1051a39Sopenharmony_ci#ifndef __AARCH64EB__
4142e1051a39Sopenharmony_ci	ext     $h4b, $h4b, $h4b, #8
4143e1051a39Sopenharmony_ci#endif
4144e1051a39Sopenharmony_ci	aese    $ctr1b, $rk3  \n  aesmc   $ctr1b, $ctr1b          @ AES block 1 - round 3
4145e1051a39Sopenharmony_ci	ld1     {$rk10s}, [$cc], #16                              @ load rk10
4146e1051a39Sopenharmony_ci
4147e1051a39Sopenharmony_ci	aese    $ctr3b, $rk2  \n  aesmc   $ctr3b, $ctr3b          @ AES block 3 - round 2
4148e1051a39Sopenharmony_ci	ld1     {$rk11s}, [$cc], #16                              @ load rk11
4149e1051a39Sopenharmony_ci
4150e1051a39Sopenharmony_ci	aese    $ctr2b, $rk3  \n  aesmc   $ctr2b, $ctr2b          @ AES block 2 - round 3
4151e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ CTR block 3
4152e1051a39Sopenharmony_ci
4153e1051a39Sopenharmony_ci	aese    $ctr0b, $rk3  \n  aesmc   $ctr0b, $ctr0b          @ AES block 0 - round 3
4154e1051a39Sopenharmony_ci
4155e1051a39Sopenharmony_ci	aese    $ctr3b, $rk3  \n  aesmc   $ctr3b, $ctr3b          @ AES block 3 - round 3
4156e1051a39Sopenharmony_ci	ld1     { $acc_lb}, [$current_tag]
4157e1051a39Sopenharmony_ci	ext     $acc_lb, $acc_lb, $acc_lb, #8
4158e1051a39Sopenharmony_ci	rev64   $acc_lb, $acc_lb
4159e1051a39Sopenharmony_ci
4160e1051a39Sopenharmony_ci	aese    $ctr2b, $rk4  \n  aesmc   $ctr2b, $ctr2b          @ AES block 2 - round 4
4161e1051a39Sopenharmony_ci
4162e1051a39Sopenharmony_ci	aese    $ctr0b, $rk4  \n  aesmc   $ctr0b, $ctr0b          @ AES block 0 - round 4
4163e1051a39Sopenharmony_ci
4164e1051a39Sopenharmony_ci	aese    $ctr1b, $rk4  \n  aesmc   $ctr1b, $ctr1b          @ AES block 1 - round 4
4165e1051a39Sopenharmony_ci
4166e1051a39Sopenharmony_ci	aese    $ctr3b, $rk4  \n  aesmc   $ctr3b, $ctr3b          @ AES block 3 - round 4
4167e1051a39Sopenharmony_ci
4168e1051a39Sopenharmony_ci	aese    $ctr0b, $rk5  \n  aesmc   $ctr0b, $ctr0b          @ AES block 0 - round 5
4169e1051a39Sopenharmony_ci
4170e1051a39Sopenharmony_ci	aese    $ctr1b, $rk5  \n  aesmc   $ctr1b, $ctr1b          @ AES block 1 - round 5
4171e1051a39Sopenharmony_ci
4172e1051a39Sopenharmony_ci	aese    $ctr3b, $rk5  \n  aesmc   $ctr3b, $ctr3b          @ AES block 3 - round 5
4173e1051a39Sopenharmony_ci
4174e1051a39Sopenharmony_ci	aese    $ctr2b, $rk5  \n  aesmc   $ctr2b, $ctr2b          @ AES block 2 - round 5
4175e1051a39Sopenharmony_ci
4176e1051a39Sopenharmony_ci	aese    $ctr1b, $rk6  \n  aesmc   $ctr1b, $ctr1b          @ AES block 1 - round 6
4177e1051a39Sopenharmony_ci	trn2    $h34k.2d,  $h3.2d,    $h4.2d                      @ h4l | h3l
4178e1051a39Sopenharmony_ci
4179e1051a39Sopenharmony_ci	aese    $ctr3b, $rk6  \n  aesmc   $ctr3b, $ctr3b          @ AES block 3 - round 6
4180e1051a39Sopenharmony_ci	ld1     {$rk12s}, [$cc], #16                              @ load rk12
4181e1051a39Sopenharmony_ci
4182e1051a39Sopenharmony_ci	aese    $ctr0b, $rk6  \n  aesmc   $ctr0b, $ctr0b          @ AES block 0 - round 6
4183e1051a39Sopenharmony_ci	ldr     $h1q, [$current_tag, #32]                         @ load h1l | h1h
4184e1051a39Sopenharmony_ci#ifndef __AARCH64EB__
4185e1051a39Sopenharmony_ci	ext     $h1b, $h1b, $h1b, #8
4186e1051a39Sopenharmony_ci#endif
4187e1051a39Sopenharmony_ci	aese    $ctr2b, $rk6  \n  aesmc   $ctr2b, $ctr2b          @ AES block 2 - round 6
4188e1051a39Sopenharmony_ci	ld1     {$rk13s}, [$cc], #16                              @ load rk13
4189e1051a39Sopenharmony_ci
4190e1051a39Sopenharmony_ci	aese    $ctr1b, $rk7  \n  aesmc   $ctr1b, $ctr1b          @ AES block 1 - round 7
4191e1051a39Sopenharmony_ci	trn1    $acc_h.2d, $h3.2d,    $h4.2d                      @ h4h | h3h
4192e1051a39Sopenharmony_ci
4193e1051a39Sopenharmony_ci	aese    $ctr0b, $rk7  \n  aesmc   $ctr0b, $ctr0b          @ AES block 0 - round 7
4194e1051a39Sopenharmony_ci
4195e1051a39Sopenharmony_ci	aese    $ctr2b, $rk7  \n  aesmc   $ctr2b, $ctr2b          @ AES block 2 - round 7
4196e1051a39Sopenharmony_ci
4197e1051a39Sopenharmony_ci	aese    $ctr3b, $rk7  \n  aesmc   $ctr3b, $ctr3b          @ AES block 3 - round 7
4198e1051a39Sopenharmony_ci	trn2    $h12k.2d,  $h1.2d,    $h2.2d                      @ h2l | h1l
4199e1051a39Sopenharmony_ci
4200e1051a39Sopenharmony_ci	aese    $ctr1b, $rk8  \n  aesmc   $ctr1b, $ctr1b          @ AES block 1 - round 8
4201e1051a39Sopenharmony_ci
4202e1051a39Sopenharmony_ci	aese    $ctr2b, $rk8  \n  aesmc   $ctr2b, $ctr2b          @ AES block 2 - round 8
4203e1051a39Sopenharmony_ci
4204e1051a39Sopenharmony_ci	aese    $ctr3b, $rk8  \n  aesmc   $ctr3b, $ctr3b          @ AES block 3 - round 8
4205e1051a39Sopenharmony_ci
4206e1051a39Sopenharmony_ci	aese    $ctr1b, $rk9  \n  aesmc   $ctr1b, $ctr1b          @ AES block 1 - round 9
4207e1051a39Sopenharmony_ci
4208e1051a39Sopenharmony_ci	aese    $ctr2b, $rk9  \n  aesmc   $ctr2b, $ctr2b          @ AES block 2 - round 9
4209e1051a39Sopenharmony_ci
4210e1051a39Sopenharmony_ci	aese    $ctr0b, $rk8  \n  aesmc   $ctr0b, $ctr0b          @ AES block 0 - round 8
4211e1051a39Sopenharmony_ci
4212e1051a39Sopenharmony_ci	aese    $ctr1b, $rk10 \n  aesmc   $ctr1b, $ctr1b          @ AES block 1 - round 10
4213e1051a39Sopenharmony_ci
4214e1051a39Sopenharmony_ci	aese    $ctr3b, $rk9  \n  aesmc   $ctr3b, $ctr3b          @ AES block 3 - round 9
4215e1051a39Sopenharmony_ci
4216e1051a39Sopenharmony_ci	aese    $ctr0b, $rk9  \n  aesmc   $ctr0b, $ctr0b          @ AES block 0 - round 9
4217e1051a39Sopenharmony_ci
4218e1051a39Sopenharmony_ci	aese    $ctr2b, $rk10 \n  aesmc   $ctr2b, $ctr2b          @ AES block 2 - round 10
4219e1051a39Sopenharmony_ci
4220e1051a39Sopenharmony_ci	aese    $ctr3b, $rk10 \n  aesmc   $ctr3b, $ctr3b          @ AES block 3 - round 10
4221e1051a39Sopenharmony_ci
4222e1051a39Sopenharmony_ci	aese    $ctr1b, $rk11 \n  aesmc   $ctr1b, $ctr1b          @ AES block 1 - round 11
4223e1051a39Sopenharmony_ci
4224e1051a39Sopenharmony_ci	aese    $ctr2b, $rk11 \n  aesmc   $ctr2b, $ctr2b          @ AES block 2 - round 11
4225e1051a39Sopenharmony_ci
4226e1051a39Sopenharmony_ci	aese    $ctr0b, $rk10 \n  aesmc   $ctr0b, $ctr0b          @ AES block 0 - round 10
4227e1051a39Sopenharmony_ci
4228e1051a39Sopenharmony_ci	aese    $ctr1b, $rk12 \n  aesmc   $ctr1b, $ctr1b          @ AES block 1 - round 12
4229e1051a39Sopenharmony_ci
4230e1051a39Sopenharmony_ci	aese    $ctr2b, $rk12 \n  aesmc   $ctr2b, $ctr2b          @ AES block 2 - round 12
4231e1051a39Sopenharmony_ci
4232e1051a39Sopenharmony_ci	aese    $ctr0b, $rk11 \n  aesmc   $ctr0b, $ctr0b          @ AES block 0 - round 11
4233e1051a39Sopenharmony_ci	eor     $h34k.16b, $h34k.16b, $acc_h.16b                  @ h4k | h3k
4234e1051a39Sopenharmony_ci
4235e1051a39Sopenharmony_ci	aese    $ctr3b, $rk11 \n  aesmc   $ctr3b, $ctr3b          @ AES block 3 - round 11
4236e1051a39Sopenharmony_ci
4237e1051a39Sopenharmony_ci	aese    $ctr2b, $rk13                                     @ AES block 2 - round 13
4238e1051a39Sopenharmony_ci	trn1    $t0.2d,    $h1.2d,    $h2.2d                      @ h2h | h1h
4239e1051a39Sopenharmony_ci
4240e1051a39Sopenharmony_ci	aese    $ctr0b, $rk12 \n  aesmc   $ctr0b, $ctr0b          @ AES block 0 - round 12
4241e1051a39Sopenharmony_ci
4242e1051a39Sopenharmony_ci	aese    $ctr3b, $rk12 \n  aesmc   $ctr3b, $ctr3b          @ AES block 3 - round 12
4243e1051a39Sopenharmony_ci
4244e1051a39Sopenharmony_ci	aese    $ctr1b, $rk13                                     @ AES block 1 - round 13
4245e1051a39Sopenharmony_ci
4246e1051a39Sopenharmony_ci	aese    $ctr0b, $rk13                                     @ AES block 0 - round 13
4247e1051a39Sopenharmony_ci
4248e1051a39Sopenharmony_ci	aese    $ctr3b, $rk13                                     @ AES block 3 - round 13
4249e1051a39Sopenharmony_ci	eor     $h12k.16b, $h12k.16b, $t0.16b                     @ h2k | h1k
4250e1051a39Sopenharmony_ci	b.ge    .L256_enc_tail                                    @ handle tail
4251e1051a39Sopenharmony_ci
4252e1051a39Sopenharmony_ci	ldp     $input_l1, $input_h1, [$input_ptr, #16]           @ AES block 1 - load plaintext
4253e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
4254e1051a39Sopenharmony_ci	rev     $input_l1, $input_l1
4255e1051a39Sopenharmony_ci	rev     $input_h1, $input_h1
4256e1051a39Sopenharmony_ci#endif
4257e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                 @ CTR block 4
4258e1051a39Sopenharmony_ci	ldp     $input_l0, $input_h0, [$input_ptr, #0]            @ AES block 0 - load plaintext
4259e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
4260e1051a39Sopenharmony_ci	rev     $input_l0, $input_l0
4261e1051a39Sopenharmony_ci	rev     $input_h0, $input_h0
4262e1051a39Sopenharmony_ci#endif
4263e1051a39Sopenharmony_ci	ldp     $input_l3, $input_h3, [$input_ptr, #48]           @ AES block 3 - load plaintext
4264e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
4265e1051a39Sopenharmony_ci	rev     $input_l3, $input_l3
4266e1051a39Sopenharmony_ci	rev     $input_h3, $input_h3
4267e1051a39Sopenharmony_ci#endif
4268e1051a39Sopenharmony_ci	ldp     $input_l2, $input_h2, [$input_ptr, #32]           @ AES block 2 - load plaintext
4269e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
4270e1051a39Sopenharmony_ci	rev     $input_l2, $input_l2
4271e1051a39Sopenharmony_ci	rev     $input_h2, $input_h2
4272e1051a39Sopenharmony_ci#endif
4273e1051a39Sopenharmony_ci	add     $input_ptr, $input_ptr, #64                       @ AES input_ptr update
4274e1051a39Sopenharmony_ci
4275e1051a39Sopenharmony_ci	eor     $input_l1, $input_l1, $rk14_l                     @ AES block 1 - round 14 low
4276e1051a39Sopenharmony_ci	eor     $input_h1, $input_h1, $rk14_h                     @ AES block 1 - round 14 high
4277e1051a39Sopenharmony_ci
4278e1051a39Sopenharmony_ci	fmov    $ctr_t1d, $input_l1                               @ AES block 1 - mov low
4279e1051a39Sopenharmony_ci	eor     $input_l0, $input_l0, $rk14_l                     @ AES block 0 - round 14 low
4280e1051a39Sopenharmony_ci
4281e1051a39Sopenharmony_ci	eor     $input_h0, $input_h0, $rk14_h                     @ AES block 0 - round 14 high
4282e1051a39Sopenharmony_ci	eor     $input_h3, $input_h3, $rk14_h                     @ AES block 3 - round 14 high
4283e1051a39Sopenharmony_ci	fmov    $ctr_t0d, $input_l0                               @ AES block 0 - mov low
4284e1051a39Sopenharmony_ci
4285e1051a39Sopenharmony_ci	cmp     $input_ptr, $main_end_input_ptr                   @ check if we have <= 8 blocks
4286e1051a39Sopenharmony_ci	fmov    $ctr_t0.d[1], $input_h0                           @ AES block 0 - mov high
4287e1051a39Sopenharmony_ci	eor     $input_l3, $input_l3, $rk14_l                     @ AES block 3 - round 14 low
4288e1051a39Sopenharmony_ci
4289e1051a39Sopenharmony_ci	eor     $input_l2, $input_l2, $rk14_l                     @ AES block 2 - round 14 low
4290e1051a39Sopenharmony_ci	fmov    $ctr_t1.d[1], $input_h1                           @ AES block 1 - mov high
4291e1051a39Sopenharmony_ci
4292e1051a39Sopenharmony_ci	fmov    $ctr_t2d, $input_l2                               @ AES block 2 - mov low
4293e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ CTR block 4
4294e1051a39Sopenharmony_ci
4295e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32            @ CTR block 4
4296e1051a39Sopenharmony_ci	fmov    $ctr_t3d, $input_l3                               @ AES block 3 - mov low
4297e1051a39Sopenharmony_ci	eor     $input_h2, $input_h2, $rk14_h                     @ AES block 2 - round 14 high
4298e1051a39Sopenharmony_ci
4299e1051a39Sopenharmony_ci	fmov    $ctr_t2.d[1], $input_h2                           @ AES block 2 - mov high
4300e1051a39Sopenharmony_ci
4301e1051a39Sopenharmony_ci	eor     $res0b, $ctr_t0b, $ctr0b                          @ AES block 0 - result
4302e1051a39Sopenharmony_ci	fmov    $ctr0d, $ctr96_b64x                               @ CTR block 4
4303e1051a39Sopenharmony_ci
4304e1051a39Sopenharmony_ci	fmov    $ctr0.d[1], $ctr32x                               @ CTR block 4
4305e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                 @ CTR block 5
4306e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ CTR block 5
4307e1051a39Sopenharmony_ci
4308e1051a39Sopenharmony_ci	eor     $res1b, $ctr_t1b, $ctr1b                          @ AES block 1 - result
4309e1051a39Sopenharmony_ci	fmov    $ctr1d, $ctr96_b64x                               @ CTR block 5
4310e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32            @ CTR block 5
4311e1051a39Sopenharmony_ci
4312e1051a39Sopenharmony_ci	fmov    $ctr1.d[1], $ctr32x                               @ CTR block 5
4313e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                 @ CTR block 6
4314e1051a39Sopenharmony_ci	st1     { $res0b}, [$output_ptr], #16                     @ AES block 0 - store result
4315e1051a39Sopenharmony_ci
4316e1051a39Sopenharmony_ci	fmov    $ctr_t3.d[1], $input_h3                           @ AES block 3 - mov high
4317e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32            @ CTR block 6
4318e1051a39Sopenharmony_ci	eor     $res2b, $ctr_t2b, $ctr2b                          @ AES block 2 - result
4319e1051a39Sopenharmony_ci
4320e1051a39Sopenharmony_ci	st1     { $res1b}, [$output_ptr], #16                     @ AES block 1 - store result
4321e1051a39Sopenharmony_ci
4322e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ CTR block 6
4323e1051a39Sopenharmony_ci	fmov    $ctr2d, $ctr96_b64x                               @ CTR block 6
4324e1051a39Sopenharmony_ci
4325e1051a39Sopenharmony_ci	fmov    $ctr2.d[1], $ctr32x                               @ CTR block 6
4326e1051a39Sopenharmony_ci	st1     { $res2b}, [$output_ptr], #16                     @ AES block 2 - store result
4327e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                 @ CTR block 7
4328e1051a39Sopenharmony_ci
4329e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32            @ CTR block 7
4330e1051a39Sopenharmony_ci
4331e1051a39Sopenharmony_ci	eor     $res3b, $ctr_t3b, $ctr3b                          @ AES block 3 - result
4332e1051a39Sopenharmony_ci	st1     { $res3b}, [$output_ptr], #16                     @ AES block 3 - store result
4333e1051a39Sopenharmony_ci	b.ge    L256_enc_prepretail                               @ do prepretail
4334e1051a39Sopenharmony_ci
4335e1051a39Sopenharmony_ci	.L256_enc_main_loop:                                      @ main loop start
4336e1051a39Sopenharmony_ci	aese    $ctr0b, $rk0  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 0
4337e1051a39Sopenharmony_ci	rev64   $res0b, $res0b                                    @ GHASH block 4k (only t0 is free)
4338e1051a39Sopenharmony_ci
4339e1051a39Sopenharmony_ci	aese    $ctr1b, $rk0  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 0
4340e1051a39Sopenharmony_ci	fmov    $ctr3d, $ctr96_b64x                               @ CTR block 4k+3
4341e1051a39Sopenharmony_ci
4342e1051a39Sopenharmony_ci	aese    $ctr2b, $rk0  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 0
4343e1051a39Sopenharmony_ci	ext     $acc_lb, $acc_lb, $acc_lb, #8                     @ PRE 0
4344e1051a39Sopenharmony_ci
4345e1051a39Sopenharmony_ci	aese    $ctr0b, $rk1  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 1
4346e1051a39Sopenharmony_ci	fmov    $ctr3.d[1], $ctr32x                               @ CTR block 4k+3
4347e1051a39Sopenharmony_ci
4348e1051a39Sopenharmony_ci	aese    $ctr1b, $rk1  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 1
4349e1051a39Sopenharmony_ci	ldp     $input_l3, $input_h3, [$input_ptr, #48]           @ AES block 4k+7 - load plaintext
4350e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
4351e1051a39Sopenharmony_ci	rev     $input_l3, $input_l3
4352e1051a39Sopenharmony_ci	rev     $input_h3, $input_h3
4353e1051a39Sopenharmony_ci#endif
4354e1051a39Sopenharmony_ci	aese    $ctr2b, $rk1  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 1
4355e1051a39Sopenharmony_ci	ldp     $input_l2, $input_h2, [$input_ptr, #32]           @ AES block 4k+6 - load plaintext
4356e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
4357e1051a39Sopenharmony_ci	rev     $input_l2, $input_l2
4358e1051a39Sopenharmony_ci	rev     $input_h2, $input_h2
4359e1051a39Sopenharmony_ci#endif
4360e1051a39Sopenharmony_ci	aese    $ctr0b, $rk2  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 2
4361e1051a39Sopenharmony_ci	eor     $res0b, $res0b, $acc_lb                           @ PRE 1
4362e1051a39Sopenharmony_ci
4363e1051a39Sopenharmony_ci	aese    $ctr1b, $rk2  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 2
4364e1051a39Sopenharmony_ci
4365e1051a39Sopenharmony_ci	aese    $ctr3b, $rk0  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 0
4366e1051a39Sopenharmony_ci	eor     $input_l3, $input_l3, $rk14_l                     @ AES block 4k+7 - round 14 low
4367e1051a39Sopenharmony_ci
4368e1051a39Sopenharmony_ci	aese    $ctr0b, $rk3  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 3
4369e1051a39Sopenharmony_ci	mov     $acc_md, $h34k.d[1]                               @ GHASH block 4k - mid
4370e1051a39Sopenharmony_ci
4371e1051a39Sopenharmony_ci	pmull2  $acc_h.1q, $res0.2d, $h4.2d                       @ GHASH block 4k - high
4372e1051a39Sopenharmony_ci	eor     $input_h2, $input_h2, $rk14_h                     @ AES block 4k+6 - round 14 high
4373e1051a39Sopenharmony_ci	mov     $t0d, $res0.d[1]                                  @ GHASH block 4k - mid
4374e1051a39Sopenharmony_ci
4375e1051a39Sopenharmony_ci	aese    $ctr3b, $rk1  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 1
4376e1051a39Sopenharmony_ci	rev64   $res1b, $res1b                                    @ GHASH block 4k+1 (t0 and t1 free)
4377e1051a39Sopenharmony_ci
4378e1051a39Sopenharmony_ci	aese    $ctr0b, $rk4  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 4
4379e1051a39Sopenharmony_ci
4380e1051a39Sopenharmony_ci	pmull   $acc_l.1q, $res0.1d, $h4.1d                       @ GHASH block 4k - low
4381e1051a39Sopenharmony_ci	eor     $t0.8b, $t0.8b, $res0.8b                          @ GHASH block 4k - mid
4382e1051a39Sopenharmony_ci
4383e1051a39Sopenharmony_ci	aese    $ctr2b, $rk2  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 2
4384e1051a39Sopenharmony_ci
4385e1051a39Sopenharmony_ci	aese    $ctr0b, $rk5  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 5
4386e1051a39Sopenharmony_ci	rev64   $res3b, $res3b                                    @ GHASH block 4k+3 (t0, t1, t2 and t3 free)
4387e1051a39Sopenharmony_ci
4388e1051a39Sopenharmony_ci	pmull2  $t1.1q, $res1.2d, $h3.2d                          @ GHASH block 4k+1 - high
4389e1051a39Sopenharmony_ci
4390e1051a39Sopenharmony_ci	pmull   $acc_m.1q, $t0.1d, $acc_m.1d                      @ GHASH block 4k - mid
4391e1051a39Sopenharmony_ci	rev64   $res2b, $res2b                                    @ GHASH block 4k+2 (t0, t1, and t2 free)
4392e1051a39Sopenharmony_ci
4393e1051a39Sopenharmony_ci	pmull   $t2.1q, $res1.1d, $h3.1d                          @ GHASH block 4k+1 - low
4394e1051a39Sopenharmony_ci
4395e1051a39Sopenharmony_ci	eor     $acc_hb, $acc_hb, $t1.16b                         @ GHASH block 4k+1 - high
4396e1051a39Sopenharmony_ci	mov     $t3d, $res1.d[1]                                  @ GHASH block 4k+1 - mid
4397e1051a39Sopenharmony_ci
4398e1051a39Sopenharmony_ci	aese    $ctr1b, $rk3  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 3
4399e1051a39Sopenharmony_ci
4400e1051a39Sopenharmony_ci	aese    $ctr3b, $rk2  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 2
4401e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $t2.16b                         @ GHASH block 4k+1 - low
4402e1051a39Sopenharmony_ci
4403e1051a39Sopenharmony_ci	aese    $ctr2b, $rk3  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 3
4404e1051a39Sopenharmony_ci
4405e1051a39Sopenharmony_ci	aese    $ctr1b, $rk4  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 4
4406e1051a39Sopenharmony_ci	mov     $t6d, $res2.d[1]                                  @ GHASH block 4k+2 - mid
4407e1051a39Sopenharmony_ci
4408e1051a39Sopenharmony_ci	aese    $ctr3b, $rk3  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 3
4409e1051a39Sopenharmony_ci	eor     $t3.8b, $t3.8b, $res1.8b                          @ GHASH block 4k+1 - mid
4410e1051a39Sopenharmony_ci
4411e1051a39Sopenharmony_ci	aese    $ctr2b, $rk4  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 4
4412e1051a39Sopenharmony_ci
4413e1051a39Sopenharmony_ci	aese    $ctr0b, $rk6  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 6
4414e1051a39Sopenharmony_ci	eor     $t6.8b, $t6.8b, $res2.8b                          @ GHASH block 4k+2 - mid
4415e1051a39Sopenharmony_ci
4416e1051a39Sopenharmony_ci	aese    $ctr3b, $rk4  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 4
4417e1051a39Sopenharmony_ci
4418e1051a39Sopenharmony_ci	pmull   $t3.1q, $t3.1d, $h34k.1d                          @ GHASH block 4k+1 - mid
4419e1051a39Sopenharmony_ci
4420e1051a39Sopenharmony_ci	aese    $ctr0b, $rk7  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 7
4421e1051a39Sopenharmony_ci
4422e1051a39Sopenharmony_ci	aese    $ctr3b, $rk5  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 5
4423e1051a39Sopenharmony_ci	ins     $t6.d[1], $t6.d[0]                                @ GHASH block 4k+2 - mid
4424e1051a39Sopenharmony_ci
4425e1051a39Sopenharmony_ci	aese    $ctr1b, $rk5  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 5
4426e1051a39Sopenharmony_ci
4427e1051a39Sopenharmony_ci	aese    $ctr0b, $rk8  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 8
4428e1051a39Sopenharmony_ci
4429e1051a39Sopenharmony_ci	aese    $ctr2b, $rk5  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 5
4430e1051a39Sopenharmony_ci
4431e1051a39Sopenharmony_ci	aese    $ctr1b, $rk6  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 6
4432e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t3.16b                         @ GHASH block 4k+1 - mid
4433e1051a39Sopenharmony_ci
4434e1051a39Sopenharmony_ci	pmull2  $t4.1q, $res2.2d, $h2.2d                          @ GHASH block 4k+2 - high
4435e1051a39Sopenharmony_ci
4436e1051a39Sopenharmony_ci	pmull   $t5.1q, $res2.1d, $h2.1d                          @ GHASH block 4k+2 - low
4437e1051a39Sopenharmony_ci
4438e1051a39Sopenharmony_ci	aese    $ctr1b, $rk7  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 7
4439e1051a39Sopenharmony_ci
4440e1051a39Sopenharmony_ci	pmull   $t8.1q, $res3.1d, $h1.1d                          @ GHASH block 4k+3 - low
4441e1051a39Sopenharmony_ci	eor     $acc_hb, $acc_hb, $t4.16b                         @ GHASH block 4k+2 - high
4442e1051a39Sopenharmony_ci
4443e1051a39Sopenharmony_ci	aese    $ctr3b, $rk6  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 6
4444e1051a39Sopenharmony_ci	ldp     $input_l1, $input_h1, [$input_ptr, #16]           @ AES block 4k+5 - load plaintext
4445e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
4446e1051a39Sopenharmony_ci	rev     $input_l1, $input_l1
4447e1051a39Sopenharmony_ci	rev     $input_h1, $input_h1
4448e1051a39Sopenharmony_ci#endif
4449e1051a39Sopenharmony_ci	aese    $ctr1b, $rk8  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 8
4450e1051a39Sopenharmony_ci	mov     $t9d, $res3.d[1]                                  @ GHASH block 4k+3 - mid
4451e1051a39Sopenharmony_ci
4452e1051a39Sopenharmony_ci	aese    $ctr2b, $rk6  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 6
4453e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $t5.16b                         @ GHASH block 4k+2 - low
4454e1051a39Sopenharmony_ci
4455e1051a39Sopenharmony_ci	pmull2  $t6.1q, $t6.2d, $h12k.2d                          @ GHASH block 4k+2 - mid
4456e1051a39Sopenharmony_ci
4457e1051a39Sopenharmony_ci	pmull2  $t7.1q, $res3.2d, $h1.2d                          @ GHASH block 4k+3 - high
4458e1051a39Sopenharmony_ci	eor     $t9.8b, $t9.8b, $res3.8b                          @ GHASH block 4k+3 - mid
4459e1051a39Sopenharmony_ci
4460e1051a39Sopenharmony_ci	aese    $ctr2b, $rk7  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 7
4461e1051a39Sopenharmony_ci	eor     $input_l1, $input_l1, $rk14_l                     @ AES block 4k+5 - round 14 low
4462e1051a39Sopenharmony_ci
4463e1051a39Sopenharmony_ci	aese    $ctr1b, $rk9  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 9
4464e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t6.16b                         @ GHASH block 4k+2 - mid
4465e1051a39Sopenharmony_ci
4466e1051a39Sopenharmony_ci	aese    $ctr3b, $rk7  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 7
4467e1051a39Sopenharmony_ci	eor     $input_l2, $input_l2, $rk14_l                     @ AES block 4k+6 - round 14 low
4468e1051a39Sopenharmony_ci
4469e1051a39Sopenharmony_ci	aese    $ctr0b, $rk9  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 9
4470e1051a39Sopenharmony_ci	movi    $mod_constant.8b, #0xc2
4471e1051a39Sopenharmony_ci
4472e1051a39Sopenharmony_ci	pmull   $t9.1q, $t9.1d, $h12k.1d                          @ GHASH block 4k+3 - mid
4473e1051a39Sopenharmony_ci	eor     $acc_hb, $acc_hb, $t7.16b                         @ GHASH block 4k+3 - high
4474e1051a39Sopenharmony_ci	fmov    $ctr_t1d, $input_l1                               @ AES block 4k+5 - mov low
4475e1051a39Sopenharmony_ci
4476e1051a39Sopenharmony_ci	aese    $ctr2b, $rk8  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 8
4477e1051a39Sopenharmony_ci	ldp     $input_l0, $input_h0, [$input_ptr, #0]            @ AES block 4k+4 - load plaintext
4478e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
4479e1051a39Sopenharmony_ci	rev     $input_l0, $input_l0
4480e1051a39Sopenharmony_ci	rev     $input_h0, $input_h0
4481e1051a39Sopenharmony_ci#endif
4482e1051a39Sopenharmony_ci	aese    $ctr0b, $rk10 \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 10
4483e1051a39Sopenharmony_ci	shl     $mod_constantd, $mod_constantd, #56               @ mod_constant
4484e1051a39Sopenharmony_ci
4485e1051a39Sopenharmony_ci	aese    $ctr3b, $rk8  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 8
4486e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $t8.16b                         @ GHASH block 4k+3 - low
4487e1051a39Sopenharmony_ci
4488e1051a39Sopenharmony_ci	aese    $ctr2b, $rk9  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 9
4489e1051a39Sopenharmony_ci
4490e1051a39Sopenharmony_ci	aese    $ctr1b, $rk10 \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 10
4491e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t9.16b                         @ GHASH block 4k+3 - mid
4492e1051a39Sopenharmony_ci
4493e1051a39Sopenharmony_ci	aese    $ctr3b, $rk9  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 9
4494e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ CTR block 4k+3
4495e1051a39Sopenharmony_ci
4496e1051a39Sopenharmony_ci	aese    $ctr0b, $rk11 \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 11
4497e1051a39Sopenharmony_ci	eor     $t9.16b, $acc_lb, $acc_hb                         @ MODULO - karatsuba tidy up
4498e1051a39Sopenharmony_ci
4499e1051a39Sopenharmony_ci	aese    $ctr1b, $rk11 \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 11
4500e1051a39Sopenharmony_ci	add     $input_ptr, $input_ptr, #64                       @ AES input_ptr update
4501e1051a39Sopenharmony_ci
4502e1051a39Sopenharmony_ci	pmull   $mod_t.1q, $acc_h.1d, $mod_constant.1d            @ MODULO - top 64b align with mid
4503e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                 @ CTR block 4k+8
4504e1051a39Sopenharmony_ci	ext     $acc_hb, $acc_hb, $acc_hb, #8                     @ MODULO - other top alignment
4505e1051a39Sopenharmony_ci
4506e1051a39Sopenharmony_ci	aese    $ctr2b, $rk10 \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 10
4507e1051a39Sopenharmony_ci	eor     $input_l0, $input_l0, $rk14_l                     @ AES block 4k+4 - round 14 low
4508e1051a39Sopenharmony_ci
4509e1051a39Sopenharmony_ci	aese    $ctr1b, $rk12 \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 12
4510e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t9.16b                         @ MODULO - karatsuba tidy up
4511e1051a39Sopenharmony_ci
4512e1051a39Sopenharmony_ci	aese    $ctr3b, $rk10 \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 10
4513e1051a39Sopenharmony_ci	eor     $input_h0, $input_h0, $rk14_h                     @ AES block 4k+4 - round 14 high
4514e1051a39Sopenharmony_ci
4515e1051a39Sopenharmony_ci	fmov    $ctr_t0d, $input_l0                               @ AES block 4k+4 - mov low
4516e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32            @ CTR block 4k+8
4517e1051a39Sopenharmony_ci	eor     $mod_t.16b, $acc_hb, $mod_t.16b                   @ MODULO - fold into mid
4518e1051a39Sopenharmony_ci
4519e1051a39Sopenharmony_ci	aese    $ctr0b, $rk12 \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 12
4520e1051a39Sopenharmony_ci	eor     $input_h1, $input_h1, $rk14_h                     @ AES block 4k+5 - round 14 high
4521e1051a39Sopenharmony_ci
4522e1051a39Sopenharmony_ci	aese    $ctr2b, $rk11 \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 11
4523e1051a39Sopenharmony_ci	eor     $input_h3, $input_h3, $rk14_h                     @ AES block 4k+7 - round 14 high
4524e1051a39Sopenharmony_ci
4525e1051a39Sopenharmony_ci	aese    $ctr3b, $rk11 \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 11
4526e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ CTR block 4k+8
4527e1051a39Sopenharmony_ci
4528e1051a39Sopenharmony_ci	aese    $ctr0b, $rk13                                     @ AES block 4k+4 - round 13
4529e1051a39Sopenharmony_ci	fmov    $ctr_t0.d[1], $input_h0                           @ AES block 4k+4 - mov high
4530e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $mod_t.16b                      @ MODULO - fold into mid
4531e1051a39Sopenharmony_ci
4532e1051a39Sopenharmony_ci	aese    $ctr2b, $rk12 \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 12
4533e1051a39Sopenharmony_ci	fmov    $ctr_t3d, $input_l3                               @ AES block 4k+7 - mov low
4534e1051a39Sopenharmony_ci
4535e1051a39Sopenharmony_ci	aese    $ctr1b, $rk13                                     @ AES block 4k+5 - round 13
4536e1051a39Sopenharmony_ci	fmov    $ctr_t1.d[1], $input_h1                           @ AES block 4k+5 - mov high
4537e1051a39Sopenharmony_ci
4538e1051a39Sopenharmony_ci	fmov    $ctr_t2d, $input_l2                               @ AES block 4k+6 - mov low
4539e1051a39Sopenharmony_ci	cmp     $input_ptr, $main_end_input_ptr                   @ LOOP CONTROL
4540e1051a39Sopenharmony_ci
4541e1051a39Sopenharmony_ci	fmov    $ctr_t2.d[1], $input_h2                           @ AES block 4k+6 - mov high
4542e1051a39Sopenharmony_ci
4543e1051a39Sopenharmony_ci	pmull   $acc_h.1q, $acc_m.1d, $mod_constant.1d            @ MODULO - mid 64b align with low
4544e1051a39Sopenharmony_ci	eor     $res0b, $ctr_t0b, $ctr0b                          @ AES block 4k+4 - result
4545e1051a39Sopenharmony_ci	fmov    $ctr0d, $ctr96_b64x                               @ CTR block 4k+8
4546e1051a39Sopenharmony_ci
4547e1051a39Sopenharmony_ci	fmov    $ctr0.d[1], $ctr32x                               @ CTR block 4k+8
4548e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                 @ CTR block 4k+9
4549e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ CTR block 4k+9
4550e1051a39Sopenharmony_ci
4551e1051a39Sopenharmony_ci	eor     $res1b, $ctr_t1b, $ctr1b                          @ AES block 4k+5 - result
4552e1051a39Sopenharmony_ci	fmov    $ctr1d, $ctr96_b64x                               @ CTR block 4k+9
4553e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32            @ CTR block 4k+9
4554e1051a39Sopenharmony_ci
4555e1051a39Sopenharmony_ci	aese    $ctr3b, $rk12 \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 12
4556e1051a39Sopenharmony_ci	fmov    $ctr1.d[1], $ctr32x                               @ CTR block 4k+9
4557e1051a39Sopenharmony_ci
4558e1051a39Sopenharmony_ci	aese    $ctr2b, $rk13                                     @ AES block 4k+6 - round 13
4559e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                 @ CTR block 4k+10
4560e1051a39Sopenharmony_ci	st1     { $res0b}, [$output_ptr], #16                     @ AES block 4k+4 - store result
4561e1051a39Sopenharmony_ci
4562e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32            @ CTR block 4k+10
4563e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $acc_hb                         @ MODULO - fold into low
4564e1051a39Sopenharmony_ci	fmov    $ctr_t3.d[1], $input_h3                           @ AES block 4k+7 - mov high
4565e1051a39Sopenharmony_ci
4566e1051a39Sopenharmony_ci	ext     $acc_mb, $acc_mb, $acc_mb, #8                     @ MODULO - other mid alignment
4567e1051a39Sopenharmony_ci	st1     { $res1b}, [$output_ptr], #16                     @ AES block 4k+5 - store result
4568e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ CTR block 4k+10
4569e1051a39Sopenharmony_ci
4570e1051a39Sopenharmony_ci	aese    $ctr3b, $rk13                                     @ AES block 4k+7 - round 13
4571e1051a39Sopenharmony_ci	eor     $res2b, $ctr_t2b, $ctr2b                          @ AES block 4k+6 - result
4572e1051a39Sopenharmony_ci	fmov    $ctr2d, $ctr96_b64x                               @ CTR block 4k+10
4573e1051a39Sopenharmony_ci
4574e1051a39Sopenharmony_ci	st1     { $res2b}, [$output_ptr], #16                     @ AES block 4k+6 - store result
4575e1051a39Sopenharmony_ci	fmov    $ctr2.d[1], $ctr32x                               @ CTR block 4k+10
4576e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                 @ CTR block 4k+11
4577e1051a39Sopenharmony_ci
4578e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $acc_mb                         @ MODULO - fold into low
4579e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32            @ CTR block 4k+11
4580e1051a39Sopenharmony_ci
4581e1051a39Sopenharmony_ci	eor     $res3b, $ctr_t3b, $ctr3b                          @ AES block 4k+7 - result
4582e1051a39Sopenharmony_ci	st1     { $res3b}, [$output_ptr], #16                     @ AES block 4k+7 - store result
4583e1051a39Sopenharmony_ci	b.lt    L256_enc_main_loop
4584e1051a39Sopenharmony_ci
4585e1051a39Sopenharmony_ci	.L256_enc_prepretail:                                     @ PREPRETAIL
4586e1051a39Sopenharmony_ci	aese    $ctr1b, $rk0  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 0
4587e1051a39Sopenharmony_ci	rev64   $res2b, $res2b                                    @ GHASH block 4k+2 (t0, t1, and t2 free)
4588e1051a39Sopenharmony_ci
4589e1051a39Sopenharmony_ci	aese    $ctr2b, $rk0  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 0
4590e1051a39Sopenharmony_ci	fmov    $ctr3d, $ctr96_b64x                               @ CTR block 4k+3
4591e1051a39Sopenharmony_ci
4592e1051a39Sopenharmony_ci	aese    $ctr0b, $rk0  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 0
4593e1051a39Sopenharmony_ci	rev64   $res0b, $res0b                                    @ GHASH block 4k (only t0 is free)
4594e1051a39Sopenharmony_ci
4595e1051a39Sopenharmony_ci	fmov    $ctr3.d[1], $ctr32x                               @ CTR block 4k+3
4596e1051a39Sopenharmony_ci	ext     $acc_lb, $acc_lb, $acc_lb, #8                     @ PRE 0
4597e1051a39Sopenharmony_ci
4598e1051a39Sopenharmony_ci	aese    $ctr2b, $rk1  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 1
4599e1051a39Sopenharmony_ci
4600e1051a39Sopenharmony_ci	aese    $ctr0b, $rk1  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 1
4601e1051a39Sopenharmony_ci
4602e1051a39Sopenharmony_ci	eor     $res0b, $res0b, $acc_lb                           @ PRE 1
4603e1051a39Sopenharmony_ci	rev64   $res1b, $res1b                                    @ GHASH block 4k+1 (t0 and t1 free)
4604e1051a39Sopenharmony_ci
4605e1051a39Sopenharmony_ci	aese    $ctr2b, $rk2  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 2
4606e1051a39Sopenharmony_ci
4607e1051a39Sopenharmony_ci	aese    $ctr3b, $rk0  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 0
4608e1051a39Sopenharmony_ci	mov     $acc_md, $h34k.d[1]                               @ GHASH block 4k - mid
4609e1051a39Sopenharmony_ci
4610e1051a39Sopenharmony_ci	aese    $ctr1b, $rk1  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 1
4611e1051a39Sopenharmony_ci
4612e1051a39Sopenharmony_ci	pmull   $acc_l.1q, $res0.1d, $h4.1d                       @ GHASH block 4k - low
4613e1051a39Sopenharmony_ci	mov     $t0d, $res0.d[1]                                  @ GHASH block 4k - mid
4614e1051a39Sopenharmony_ci
4615e1051a39Sopenharmony_ci	pmull2  $acc_h.1q, $res0.2d, $h4.2d                       @ GHASH block 4k - high
4616e1051a39Sopenharmony_ci
4617e1051a39Sopenharmony_ci	aese    $ctr2b, $rk3  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 3
4618e1051a39Sopenharmony_ci
4619e1051a39Sopenharmony_ci	aese    $ctr1b, $rk2  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 2
4620e1051a39Sopenharmony_ci	eor     $t0.8b, $t0.8b, $res0.8b                          @ GHASH block 4k - mid
4621e1051a39Sopenharmony_ci
4622e1051a39Sopenharmony_ci	aese    $ctr0b, $rk2  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 2
4623e1051a39Sopenharmony_ci
4624e1051a39Sopenharmony_ci	aese    $ctr3b, $rk1  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 1
4625e1051a39Sopenharmony_ci
4626e1051a39Sopenharmony_ci	aese    $ctr1b, $rk3  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 3
4627e1051a39Sopenharmony_ci
4628e1051a39Sopenharmony_ci	pmull   $acc_m.1q, $t0.1d, $acc_m.1d                      @ GHASH block 4k - mid
4629e1051a39Sopenharmony_ci
4630e1051a39Sopenharmony_ci	pmull2  $t1.1q, $res1.2d, $h3.2d                          @ GHASH block 4k+1 - high
4631e1051a39Sopenharmony_ci
4632e1051a39Sopenharmony_ci	pmull   $t2.1q, $res1.1d, $h3.1d                          @ GHASH block 4k+1 - low
4633e1051a39Sopenharmony_ci
4634e1051a39Sopenharmony_ci	aese    $ctr3b, $rk2  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 2
4635e1051a39Sopenharmony_ci
4636e1051a39Sopenharmony_ci	eor     $acc_hb, $acc_hb, $t1.16b                         @ GHASH block 4k+1 - high
4637e1051a39Sopenharmony_ci	mov     $t3d, $res1.d[1]                                  @ GHASH block 4k+1 - mid
4638e1051a39Sopenharmony_ci
4639e1051a39Sopenharmony_ci	aese    $ctr0b, $rk3  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 3
4640e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $t2.16b                         @ GHASH block 4k+1 - low
4641e1051a39Sopenharmony_ci
4642e1051a39Sopenharmony_ci	aese    $ctr3b, $rk3  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 3
4643e1051a39Sopenharmony_ci
4644e1051a39Sopenharmony_ci	eor     $t3.8b, $t3.8b, $res1.8b                          @ GHASH block 4k+1 - mid
4645e1051a39Sopenharmony_ci	mov     $t6d, $res2.d[1]                                  @ GHASH block 4k+2 - mid
4646e1051a39Sopenharmony_ci
4647e1051a39Sopenharmony_ci	aese    $ctr0b, $rk4  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 4
4648e1051a39Sopenharmony_ci	rev64   $res3b, $res3b                                    @ GHASH block 4k+3 (t0, t1, t2 and t3 free)
4649e1051a39Sopenharmony_ci
4650e1051a39Sopenharmony_ci	aese    $ctr3b, $rk4  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 4
4651e1051a39Sopenharmony_ci
4652e1051a39Sopenharmony_ci	pmull   $t3.1q, $t3.1d, $h34k.1d                          @ GHASH block 4k+1 - mid
4653e1051a39Sopenharmony_ci	eor     $t6.8b, $t6.8b, $res2.8b                          @ GHASH block 4k+2 - mid
4654e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ CTR block 4k+3
4655e1051a39Sopenharmony_ci
4656e1051a39Sopenharmony_ci	pmull   $t5.1q, $res2.1d, $h2.1d                          @ GHASH block 4k+2 - low
4657e1051a39Sopenharmony_ci
4658e1051a39Sopenharmony_ci	aese    $ctr3b, $rk5  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 5
4659e1051a39Sopenharmony_ci
4660e1051a39Sopenharmony_ci	aese    $ctr2b, $rk4  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 4
4661e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t3.16b                         @ GHASH block 4k+1 - mid
4662e1051a39Sopenharmony_ci
4663e1051a39Sopenharmony_ci	pmull2  $t4.1q, $res2.2d, $h2.2d                          @ GHASH block 4k+2 - high
4664e1051a39Sopenharmony_ci
4665e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $t5.16b                         @ GHASH block 4k+2 - low
4666e1051a39Sopenharmony_ci	ins     $t6.d[1], $t6.d[0]                                @ GHASH block 4k+2 - mid
4667e1051a39Sopenharmony_ci
4668e1051a39Sopenharmony_ci	aese    $ctr2b, $rk5  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 5
4669e1051a39Sopenharmony_ci
4670e1051a39Sopenharmony_ci	eor     $acc_hb, $acc_hb, $t4.16b                         @ GHASH block 4k+2 - high
4671e1051a39Sopenharmony_ci	mov     $t9d, $res3.d[1]                                  @ GHASH block 4k+3 - mid
4672e1051a39Sopenharmony_ci
4673e1051a39Sopenharmony_ci	aese    $ctr1b, $rk4  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 4
4674e1051a39Sopenharmony_ci
4675e1051a39Sopenharmony_ci	pmull2  $t6.1q, $t6.2d, $h12k.2d                          @ GHASH block 4k+2 - mid
4676e1051a39Sopenharmony_ci
4677e1051a39Sopenharmony_ci	eor     $t9.8b, $t9.8b, $res3.8b                          @ GHASH block 4k+3 - mid
4678e1051a39Sopenharmony_ci
4679e1051a39Sopenharmony_ci	pmull2  $t7.1q, $res3.2d, $h1.2d                          @ GHASH block 4k+3 - high
4680e1051a39Sopenharmony_ci
4681e1051a39Sopenharmony_ci	aese    $ctr1b, $rk5  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 5
4682e1051a39Sopenharmony_ci
4683e1051a39Sopenharmony_ci	pmull   $t9.1q, $t9.1d, $h12k.1d                          @ GHASH block 4k+3 - mid
4684e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t6.16b                         @ GHASH block 4k+2 - mid
4685e1051a39Sopenharmony_ci
4686e1051a39Sopenharmony_ci	aese    $ctr0b, $rk5  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 5
4687e1051a39Sopenharmony_ci
4688e1051a39Sopenharmony_ci	aese    $ctr1b, $rk6  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 6
4689e1051a39Sopenharmony_ci
4690e1051a39Sopenharmony_ci	aese    $ctr2b, $rk6  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 6
4691e1051a39Sopenharmony_ci
4692e1051a39Sopenharmony_ci	aese    $ctr0b, $rk6  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 6
4693e1051a39Sopenharmony_ci	movi    $mod_constant.8b, #0xc2
4694e1051a39Sopenharmony_ci
4695e1051a39Sopenharmony_ci	aese    $ctr3b, $rk6  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 6
4696e1051a39Sopenharmony_ci
4697e1051a39Sopenharmony_ci	aese    $ctr1b, $rk7  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 7
4698e1051a39Sopenharmony_ci	eor     $acc_hb, $acc_hb, $t7.16b                         @ GHASH block 4k+3 - high
4699e1051a39Sopenharmony_ci
4700e1051a39Sopenharmony_ci	aese    $ctr0b, $rk7  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 7
4701e1051a39Sopenharmony_ci
4702e1051a39Sopenharmony_ci	aese    $ctr3b, $rk7  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 7
4703e1051a39Sopenharmony_ci	shl     $mod_constantd, $mod_constantd, #56               @ mod_constant
4704e1051a39Sopenharmony_ci
4705e1051a39Sopenharmony_ci	aese    $ctr1b, $rk8  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 8
4706e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t9.16b                         @ GHASH block 4k+3 - mid
4707e1051a39Sopenharmony_ci
4708e1051a39Sopenharmony_ci	pmull   $t8.1q, $res3.1d, $h1.1d                          @ GHASH block 4k+3 - low
4709e1051a39Sopenharmony_ci
4710e1051a39Sopenharmony_ci	aese    $ctr3b, $rk8  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 8
4711e1051a39Sopenharmony_ci
4712e1051a39Sopenharmony_ci	aese    $ctr1b, $rk9  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 9
4713e1051a39Sopenharmony_ci
4714e1051a39Sopenharmony_ci	aese    $ctr0b, $rk8  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 8
4715e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $t8.16b                         @ GHASH block 4k+3 - low
4716e1051a39Sopenharmony_ci
4717e1051a39Sopenharmony_ci	aese    $ctr3b, $rk9  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 9
4718e1051a39Sopenharmony_ci
4719e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $acc_hb                         @ karatsuba tidy up
4720e1051a39Sopenharmony_ci
4721e1051a39Sopenharmony_ci	pmull   $t1.1q, $acc_h.1d, $mod_constant.1d
4722e1051a39Sopenharmony_ci	ext     $acc_hb, $acc_hb, $acc_hb, #8
4723e1051a39Sopenharmony_ci
4724e1051a39Sopenharmony_ci	aese    $ctr3b, $rk10 \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 10
4725e1051a39Sopenharmony_ci
4726e1051a39Sopenharmony_ci	aese    $ctr2b, $rk7  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 7
4727e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $acc_lb
4728e1051a39Sopenharmony_ci
4729e1051a39Sopenharmony_ci	aese    $ctr1b, $rk10 \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 10
4730e1051a39Sopenharmony_ci
4731e1051a39Sopenharmony_ci	aese    $ctr0b, $rk9  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 9
4732e1051a39Sopenharmony_ci
4733e1051a39Sopenharmony_ci	aese    $ctr2b, $rk8  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 8
4734e1051a39Sopenharmony_ci
4735e1051a39Sopenharmony_ci	aese    $ctr1b, $rk11 \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 11
4736e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t1.16b
4737e1051a39Sopenharmony_ci
4738e1051a39Sopenharmony_ci	aese    $ctr0b, $rk10 \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 10
4739e1051a39Sopenharmony_ci
4740e1051a39Sopenharmony_ci	aese    $ctr2b, $rk9  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 9
4741e1051a39Sopenharmony_ci
4742e1051a39Sopenharmony_ci	aese    $ctr1b, $rk12 \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 12
4743e1051a39Sopenharmony_ci
4744e1051a39Sopenharmony_ci	aese    $ctr0b, $rk11 \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 11
4745e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $acc_hb
4746e1051a39Sopenharmony_ci
4747e1051a39Sopenharmony_ci	aese    $ctr3b, $rk11 \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 11
4748e1051a39Sopenharmony_ci
4749e1051a39Sopenharmony_ci	aese    $ctr2b, $rk10 \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 10
4750e1051a39Sopenharmony_ci
4751e1051a39Sopenharmony_ci	aese    $ctr0b, $rk12 \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 12
4752e1051a39Sopenharmony_ci
4753e1051a39Sopenharmony_ci	pmull   $t1.1q, $acc_m.1d, $mod_constant.1d
4754e1051a39Sopenharmony_ci
4755e1051a39Sopenharmony_ci	aese    $ctr2b, $rk11 \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 11
4756e1051a39Sopenharmony_ci	ext     $acc_mb, $acc_mb, $acc_mb, #8
4757e1051a39Sopenharmony_ci
4758e1051a39Sopenharmony_ci	aese    $ctr3b, $rk12 \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 12
4759e1051a39Sopenharmony_ci
4760e1051a39Sopenharmony_ci	aese    $ctr1b, $rk13                                     @ AES block 4k+5 - round 13
4761e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $t1.16b
4762e1051a39Sopenharmony_ci
4763e1051a39Sopenharmony_ci	aese    $ctr2b, $rk12 \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 12
4764e1051a39Sopenharmony_ci
4765e1051a39Sopenharmony_ci	aese    $ctr3b, $rk13                                     @ AES block 4k+7 - round 13
4766e1051a39Sopenharmony_ci
4767e1051a39Sopenharmony_ci	aese    $ctr0b, $rk13                                     @ AES block 4k+4 - round 13
4768e1051a39Sopenharmony_ci
4769e1051a39Sopenharmony_ci	aese    $ctr2b, $rk13                                     @ AES block 4k+6 - round 13
4770e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $acc_mb
4771e1051a39Sopenharmony_ci	.L256_enc_tail:                                           @ TAIL
4772e1051a39Sopenharmony_ci
4773e1051a39Sopenharmony_ci	ext     $t0.16b, $acc_lb, $acc_lb, #8                     @ prepare final partial tag
4774e1051a39Sopenharmony_ci	sub     $main_end_input_ptr, $end_input_ptr, $input_ptr   @ main_end_input_ptr is number of bytes left to process
4775e1051a39Sopenharmony_ci	ldp     $input_l0, $input_h0, [$input_ptr], #16           @ AES block 4k+4 - load plaintext
4776e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
4777e1051a39Sopenharmony_ci	rev     $input_l0, $input_l0
4778e1051a39Sopenharmony_ci	rev     $input_h0, $input_h0
4779e1051a39Sopenharmony_ci#endif
4780e1051a39Sopenharmony_ci	eor     $input_l0, $input_l0, $rk14_l                     @ AES block 4k+4 - round 14 low
4781e1051a39Sopenharmony_ci	eor     $input_h0, $input_h0, $rk14_h                     @ AES block 4k+4 - round 14 high
4782e1051a39Sopenharmony_ci
4783e1051a39Sopenharmony_ci	cmp     $main_end_input_ptr, #48
4784e1051a39Sopenharmony_ci	fmov    $ctr_t0d, $input_l0                               @ AES block 4k+4 - mov low
4785e1051a39Sopenharmony_ci
4786e1051a39Sopenharmony_ci	fmov    $ctr_t0.d[1], $input_h0                           @ AES block 4k+4 - mov high
4787e1051a39Sopenharmony_ci
4788e1051a39Sopenharmony_ci	eor     $res1b, $ctr_t0b, $ctr0b                          @ AES block 4k+4 - result
4789e1051a39Sopenharmony_ci	b.gt    .L256_enc_blocks_more_than_3
4790e1051a39Sopenharmony_ci
4791e1051a39Sopenharmony_ci	cmp     $main_end_input_ptr, #32
4792e1051a39Sopenharmony_ci	mov     $ctr3b, $ctr2b
4793e1051a39Sopenharmony_ci	movi    $acc_l.8b, #0
4794e1051a39Sopenharmony_ci
4795e1051a39Sopenharmony_ci	movi    $acc_h.8b, #0
4796e1051a39Sopenharmony_ci	sub     $rctr32w, $rctr32w, #1
4797e1051a39Sopenharmony_ci
4798e1051a39Sopenharmony_ci	mov     $ctr2b, $ctr1b
4799e1051a39Sopenharmony_ci	movi    $acc_m.8b, #0
4800e1051a39Sopenharmony_ci	b.gt    .L256_enc_blocks_more_than_2
4801e1051a39Sopenharmony_ci
4802e1051a39Sopenharmony_ci	mov     $ctr3b, $ctr1b
4803e1051a39Sopenharmony_ci	sub     $rctr32w, $rctr32w, #1
4804e1051a39Sopenharmony_ci	cmp     $main_end_input_ptr, #16
4805e1051a39Sopenharmony_ci
4806e1051a39Sopenharmony_ci	b.gt    .L256_enc_blocks_more_than_1
4807e1051a39Sopenharmony_ci
4808e1051a39Sopenharmony_ci	sub     $rctr32w, $rctr32w, #1
4809e1051a39Sopenharmony_ci	b       .L256_enc_blocks_less_than_1
4810e1051a39Sopenharmony_ci	.L256_enc_blocks_more_than_3:                            @ blocks left >  3
4811e1051a39Sopenharmony_ci	st1     { $res1b}, [$output_ptr], #16                    @ AES final-3 block  - store result
4812e1051a39Sopenharmony_ci
4813e1051a39Sopenharmony_ci	ldp     $input_l0, $input_h0, [$input_ptr], #16          @ AES final-2 block - load input low & high
4814e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
4815e1051a39Sopenharmony_ci	rev     $input_l0, $input_l0
4816e1051a39Sopenharmony_ci	rev     $input_h0, $input_h0
4817e1051a39Sopenharmony_ci#endif
4818e1051a39Sopenharmony_ci	rev64   $res0b, $res1b                                   @ GHASH final-3 block
4819e1051a39Sopenharmony_ci
4820e1051a39Sopenharmony_ci	eor     $input_l0, $input_l0, $rk14_l                    @ AES final-2 block - round 14 low
4821e1051a39Sopenharmony_ci	eor     $res0b, $res0b, $t0.16b                          @ feed in partial tag
4822e1051a39Sopenharmony_ci
4823e1051a39Sopenharmony_ci	eor     $input_h0, $input_h0, $rk14_h                    @ AES final-2 block - round 14 high
4824e1051a39Sopenharmony_ci
4825e1051a39Sopenharmony_ci	mov     $rk4d, $res0.d[1]                                @ GHASH final-3 block - mid
4826e1051a39Sopenharmony_ci	fmov    $res1d, $input_l0                                @ AES final-2 block - mov low
4827e1051a39Sopenharmony_ci
4828e1051a39Sopenharmony_ci	fmov    $res1.d[1], $input_h0                            @ AES final-2 block - mov high
4829e1051a39Sopenharmony_ci
4830e1051a39Sopenharmony_ci	eor     $rk4v.8b, $rk4v.8b, $res0.8b                     @ GHASH final-3 block - mid
4831e1051a39Sopenharmony_ci	movi    $t0.8b, #0                                       @ suppress further partial tag feed in
4832e1051a39Sopenharmony_ci
4833e1051a39Sopenharmony_ci	mov     $acc_md, $h34k.d[1]                              @ GHASH final-3 block - mid
4834e1051a39Sopenharmony_ci
4835e1051a39Sopenharmony_ci	pmull   $acc_l.1q, $res0.1d, $h4.1d                      @ GHASH final-3 block - low
4836e1051a39Sopenharmony_ci
4837e1051a39Sopenharmony_ci	pmull2  $acc_h.1q, $res0.2d, $h4.2d                      @ GHASH final-3 block - high
4838e1051a39Sopenharmony_ci
4839e1051a39Sopenharmony_ci	pmull   $acc_m.1q, $rk4v.1d, $acc_m.1d                   @ GHASH final-3 block - mid
4840e1051a39Sopenharmony_ci	eor     $res1b, $res1b, $ctr1b                           @ AES final-2 block - result
4841e1051a39Sopenharmony_ci	.L256_enc_blocks_more_than_2:                            @ blocks left >  2
4842e1051a39Sopenharmony_ci
4843e1051a39Sopenharmony_ci	st1     { $res1b}, [$output_ptr], #16                    @ AES final-2 block - store result
4844e1051a39Sopenharmony_ci
4845e1051a39Sopenharmony_ci	ldp     $input_l0, $input_h0, [$input_ptr], #16          @ AES final-1 block - load input low & high
4846e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
4847e1051a39Sopenharmony_ci	rev     $input_l0, $input_l0
4848e1051a39Sopenharmony_ci	rev     $input_h0, $input_h0
4849e1051a39Sopenharmony_ci#endif
4850e1051a39Sopenharmony_ci	rev64   $res0b, $res1b                                   @ GHASH final-2 block
4851e1051a39Sopenharmony_ci
4852e1051a39Sopenharmony_ci	eor     $input_l0, $input_l0, $rk14_l                    @ AES final-1 block - round 14 low
4853e1051a39Sopenharmony_ci	eor     $res0b, $res0b, $t0.16b                          @ feed in partial tag
4854e1051a39Sopenharmony_ci
4855e1051a39Sopenharmony_ci	fmov    $res1d, $input_l0                                @ AES final-1 block - mov low
4856e1051a39Sopenharmony_ci	eor     $input_h0, $input_h0, $rk14_h                    @ AES final-1 block - round 14 high
4857e1051a39Sopenharmony_ci
4858e1051a39Sopenharmony_ci	fmov    $res1.d[1], $input_h0                            @ AES final-1 block - mov high
4859e1051a39Sopenharmony_ci
4860e1051a39Sopenharmony_ci	movi    $t0.8b, #0                                       @ suppress further partial tag feed in
4861e1051a39Sopenharmony_ci
4862e1051a39Sopenharmony_ci	pmull2  $rk2q1, $res0.2d, $h3.2d                         @ GHASH final-2 block - high
4863e1051a39Sopenharmony_ci	mov     $rk4d, $res0.d[1]                                @ GHASH final-2 block - mid
4864e1051a39Sopenharmony_ci
4865e1051a39Sopenharmony_ci	pmull   $rk3q1, $res0.1d, $h3.1d                         @ GHASH final-2 block - low
4866e1051a39Sopenharmony_ci
4867e1051a39Sopenharmony_ci	eor     $rk4v.8b, $rk4v.8b, $res0.8b                     @ GHASH final-2 block - mid
4868e1051a39Sopenharmony_ci
4869e1051a39Sopenharmony_ci	eor     $res1b, $res1b, $ctr2b                           @ AES final-1 block - result
4870e1051a39Sopenharmony_ci
4871e1051a39Sopenharmony_ci	eor     $acc_hb, $acc_hb, $rk2                           @ GHASH final-2 block - high
4872e1051a39Sopenharmony_ci
4873e1051a39Sopenharmony_ci	pmull   $rk4v.1q, $rk4v.1d, $h34k.1d                     @ GHASH final-2 block - mid
4874e1051a39Sopenharmony_ci
4875e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $rk3                           @ GHASH final-2 block - low
4876e1051a39Sopenharmony_ci
4877e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $rk4v.16b                      @ GHASH final-2 block - mid
4878e1051a39Sopenharmony_ci	.L256_enc_blocks_more_than_1:                            @ blocks left >  1
4879e1051a39Sopenharmony_ci
4880e1051a39Sopenharmony_ci	st1     { $res1b}, [$output_ptr], #16                    @ AES final-1 block - store result
4881e1051a39Sopenharmony_ci
4882e1051a39Sopenharmony_ci	rev64   $res0b, $res1b                                   @ GHASH final-1 block
4883e1051a39Sopenharmony_ci
4884e1051a39Sopenharmony_ci	ldp     $input_l0, $input_h0, [$input_ptr], #16          @ AES final block - load input low & high
4885e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
4886e1051a39Sopenharmony_ci	rev     $input_l0, $input_l0
4887e1051a39Sopenharmony_ci	rev     $input_h0, $input_h0
4888e1051a39Sopenharmony_ci#endif
4889e1051a39Sopenharmony_ci	eor     $res0b, $res0b, $t0.16b                          @ feed in partial tag
4890e1051a39Sopenharmony_ci
4891e1051a39Sopenharmony_ci	movi    $t0.8b, #0                                       @ suppress further partial tag feed in
4892e1051a39Sopenharmony_ci
4893e1051a39Sopenharmony_ci	eor     $input_l0, $input_l0, $rk14_l                    @ AES final block - round 14 low
4894e1051a39Sopenharmony_ci	mov     $rk4d, $res0.d[1]                                @ GHASH final-1 block - mid
4895e1051a39Sopenharmony_ci
4896e1051a39Sopenharmony_ci	pmull2  $rk2q1, $res0.2d, $h2.2d                         @ GHASH final-1 block - high
4897e1051a39Sopenharmony_ci	eor     $input_h0, $input_h0, $rk14_h                    @ AES final block - round 14 high
4898e1051a39Sopenharmony_ci
4899e1051a39Sopenharmony_ci	eor     $rk4v.8b, $rk4v.8b, $res0.8b                     @ GHASH final-1 block - mid
4900e1051a39Sopenharmony_ci
4901e1051a39Sopenharmony_ci	eor     $acc_hb, $acc_hb, $rk2                           @ GHASH final-1 block - high
4902e1051a39Sopenharmony_ci
4903e1051a39Sopenharmony_ci	ins     $rk4v.d[1], $rk4v.d[0]                           @ GHASH final-1 block - mid
4904e1051a39Sopenharmony_ci	fmov    $res1d, $input_l0                                @ AES final block - mov low
4905e1051a39Sopenharmony_ci
4906e1051a39Sopenharmony_ci	fmov    $res1.d[1], $input_h0                            @ AES final block - mov high
4907e1051a39Sopenharmony_ci
4908e1051a39Sopenharmony_ci	pmull2  $rk4v.1q, $rk4v.2d, $h12k.2d                     @ GHASH final-1 block - mid
4909e1051a39Sopenharmony_ci
4910e1051a39Sopenharmony_ci	pmull   $rk3q1, $res0.1d, $h2.1d                         @ GHASH final-1 block - low
4911e1051a39Sopenharmony_ci
4912e1051a39Sopenharmony_ci	eor     $res1b, $res1b, $ctr3b                           @ AES final block - result
4913e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $rk4v.16b                      @ GHASH final-1 block - mid
4914e1051a39Sopenharmony_ci
4915e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $rk3                           @ GHASH final-1 block - low
4916e1051a39Sopenharmony_ci	.L256_enc_blocks_less_than_1:                            @ blocks left <= 1
4917e1051a39Sopenharmony_ci
4918e1051a39Sopenharmony_ci	and     $bit_length, $bit_length, #127                   @ bit_length %= 128
4919e1051a39Sopenharmony_ci
4920e1051a39Sopenharmony_ci	mvn     $rk14_l, xzr                                     @ rk14_l = 0xffffffffffffffff
4921e1051a39Sopenharmony_ci	sub     $bit_length, $bit_length, #128                   @ bit_length -= 128
4922e1051a39Sopenharmony_ci
4923e1051a39Sopenharmony_ci	neg     $bit_length, $bit_length                         @ bit_length = 128 - #bits in input (in range [1,128])
4924e1051a39Sopenharmony_ci	ld1     { $rk0}, [$output_ptr]                           @ load existing bytes where the possibly partial last block is to be stored
4925e1051a39Sopenharmony_ci
4926e1051a39Sopenharmony_ci	mvn     $rk14_h, xzr                                     @ rk14_h = 0xffffffffffffffff
4927e1051a39Sopenharmony_ci	and     $bit_length, $bit_length, #127                   @ bit_length %= 128
4928e1051a39Sopenharmony_ci
4929e1051a39Sopenharmony_ci	lsr     $rk14_h, $rk14_h, $bit_length                    @ rk14_h is mask for top 64b of last block
4930e1051a39Sopenharmony_ci	cmp     $bit_length, #64
4931e1051a39Sopenharmony_ci
4932e1051a39Sopenharmony_ci	csel    $input_l0, $rk14_l, $rk14_h, lt
4933e1051a39Sopenharmony_ci	csel    $input_h0, $rk14_h, xzr, lt
4934e1051a39Sopenharmony_ci
4935e1051a39Sopenharmony_ci	fmov    $ctr0d, $input_l0                                @ ctr0b is mask for last block
4936e1051a39Sopenharmony_ci
4937e1051a39Sopenharmony_ci	fmov    $ctr0.d[1], $input_h0
4938e1051a39Sopenharmony_ci
4939e1051a39Sopenharmony_ci	and     $res1b, $res1b, $ctr0b                           @ possibly partial last block has zeroes in highest bits
4940e1051a39Sopenharmony_ci
4941e1051a39Sopenharmony_ci	rev64   $res0b, $res1b                                   @ GHASH final block
4942e1051a39Sopenharmony_ci
4943e1051a39Sopenharmony_ci	eor     $res0b, $res0b, $t0.16b                          @ feed in partial tag
4944e1051a39Sopenharmony_ci
4945e1051a39Sopenharmony_ci	bif     $res1b, $rk0, $ctr0b                             @ insert existing bytes in top end of result before storing
4946e1051a39Sopenharmony_ci
4947e1051a39Sopenharmony_ci	pmull2  $rk2q1, $res0.2d, $h1.2d                         @ GHASH final block - high
4948e1051a39Sopenharmony_ci	mov     $t0d, $res0.d[1]                                 @ GHASH final block - mid
4949e1051a39Sopenharmony_ci#ifndef __AARCH64EB__
4950e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w
4951e1051a39Sopenharmony_ci#else
4952e1051a39Sopenharmony_ci	mov     $ctr32w, $rctr32w
4953e1051a39Sopenharmony_ci#endif
4954e1051a39Sopenharmony_ci
4955e1051a39Sopenharmony_ci	pmull   $rk3q1, $res0.1d, $h1.1d                         @ GHASH final block - low
4956e1051a39Sopenharmony_ci
4957e1051a39Sopenharmony_ci	eor     $acc_hb, $acc_hb, $rk2                           @ GHASH final block - high
4958e1051a39Sopenharmony_ci	eor     $t0.8b, $t0.8b, $res0.8b                         @ GHASH final block - mid
4959e1051a39Sopenharmony_ci
4960e1051a39Sopenharmony_ci	pmull   $t0.1q, $t0.1d, $h12k.1d                         @ GHASH final block - mid
4961e1051a39Sopenharmony_ci
4962e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $rk3                           @ GHASH final block - low
4963e1051a39Sopenharmony_ci
4964e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t0.16b                        @ GHASH final block - mid
4965e1051a39Sopenharmony_ci	movi    $mod_constant.8b, #0xc2
4966e1051a39Sopenharmony_ci
4967e1051a39Sopenharmony_ci	eor     $t9.16b, $acc_lb, $acc_hb                        @ MODULO - karatsuba tidy up
4968e1051a39Sopenharmony_ci
4969e1051a39Sopenharmony_ci	shl     $mod_constantd, $mod_constantd, #56              @ mod_constant
4970e1051a39Sopenharmony_ci
4971e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t9.16b                        @ MODULO - karatsuba tidy up
4972e1051a39Sopenharmony_ci
4973e1051a39Sopenharmony_ci	pmull   $mod_t.1q, $acc_h.1d, $mod_constant.1d           @ MODULO - top 64b align with mid
4974e1051a39Sopenharmony_ci
4975e1051a39Sopenharmony_ci	ext     $acc_hb, $acc_hb, $acc_hb, #8                    @ MODULO - other top alignment
4976e1051a39Sopenharmony_ci
4977e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $mod_t.16b                     @ MODULO - fold into mid
4978e1051a39Sopenharmony_ci
4979e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $acc_hb                        @ MODULO - fold into mid
4980e1051a39Sopenharmony_ci
4981e1051a39Sopenharmony_ci	pmull   $acc_h.1q, $acc_m.1d, $mod_constant.1d           @ MODULO - mid 64b align with low
4982e1051a39Sopenharmony_ci
4983e1051a39Sopenharmony_ci	ext     $acc_mb, $acc_mb, $acc_mb, #8                    @ MODULO - other mid alignment
4984e1051a39Sopenharmony_ci
4985e1051a39Sopenharmony_ci	str     $ctr32w, [$counter, #12]                         @ store the updated counter
4986e1051a39Sopenharmony_ci
4987e1051a39Sopenharmony_ci	st1     { $res1b}, [$output_ptr]                         @ store all 16B
4988e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $acc_hb                        @ MODULO - fold into low
4989e1051a39Sopenharmony_ci
4990e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $acc_mb                        @ MODULO - fold into low
4991e1051a39Sopenharmony_ci	ext     $acc_lb, $acc_lb, $acc_lb, #8
4992e1051a39Sopenharmony_ci	rev64   $acc_lb, $acc_lb
4993e1051a39Sopenharmony_ci	mov     x0, $len
4994e1051a39Sopenharmony_ci	st1     { $acc_l.16b }, [$current_tag]
4995e1051a39Sopenharmony_ci
4996e1051a39Sopenharmony_ci	ldp     x21, x22, [sp, #16]
4997e1051a39Sopenharmony_ci	ldp     x23, x24, [sp, #32]
4998e1051a39Sopenharmony_ci	ldp     d8, d9, [sp, #48]
4999e1051a39Sopenharmony_ci	ldp     d10, d11, [sp, #64]
5000e1051a39Sopenharmony_ci	ldp     d12, d13, [sp, #80]
5001e1051a39Sopenharmony_ci	ldp     d14, d15, [sp, #96]
5002e1051a39Sopenharmony_ci	ldp     x19, x20, [sp], #112
5003e1051a39Sopenharmony_ci	ret
5004e1051a39Sopenharmony_ci
5005e1051a39Sopenharmony_ci.L256_enc_ret:
5006e1051a39Sopenharmony_ci	mov w0, #0x0
5007e1051a39Sopenharmony_ci	ret
5008e1051a39Sopenharmony_ci.size aes_gcm_enc_256_kernel,.-aes_gcm_enc_256_kernel
5009e1051a39Sopenharmony_ci___
5010e1051a39Sopenharmony_ci
5011e1051a39Sopenharmony_ci{
5012e1051a39Sopenharmony_cimy $t8="v4";
5013e1051a39Sopenharmony_cimy $t8d="d4";
5014e1051a39Sopenharmony_cimy $t9="v6";
5015e1051a39Sopenharmony_cimy $t9d="d6";
5016e1051a39Sopenharmony_ci#########################################################################################
5017e1051a39Sopenharmony_ci# size_t aes_gcm_dec_256_kernel(const unsigned char *in,
5018e1051a39Sopenharmony_ci#                               size_t len,
5019e1051a39Sopenharmony_ci#                               unsigned char *out,
5020e1051a39Sopenharmony_ci#                               const void *key,
5021e1051a39Sopenharmony_ci#                               unsigned char ivec[16],
5022e1051a39Sopenharmony_ci#                               u64 *Xi);
5023e1051a39Sopenharmony_ci#
5024e1051a39Sopenharmony_ci$code.=<<___;
5025e1051a39Sopenharmony_ci.global aes_gcm_dec_256_kernel
5026e1051a39Sopenharmony_ci.type   aes_gcm_dec_256_kernel,%function
5027e1051a39Sopenharmony_ci.align  4
5028e1051a39Sopenharmony_ciaes_gcm_dec_256_kernel:
5029e1051a39Sopenharmony_ci	cbz     x1, .L256_dec_ret
5030e1051a39Sopenharmony_ci	stp     x19, x20, [sp, #-112]!
5031e1051a39Sopenharmony_ci	mov     x16, x4
5032e1051a39Sopenharmony_ci	mov     x8, x5
5033e1051a39Sopenharmony_ci	stp     x21, x22, [sp, #16]
5034e1051a39Sopenharmony_ci	stp     x23, x24, [sp, #32]
5035e1051a39Sopenharmony_ci	stp     d8, d9, [sp, #48]
5036e1051a39Sopenharmony_ci	stp     d10, d11, [sp, #64]
5037e1051a39Sopenharmony_ci	stp     d12, d13, [sp, #80]
5038e1051a39Sopenharmony_ci	stp     d14, d15, [sp, #96]
5039e1051a39Sopenharmony_ci
5040e1051a39Sopenharmony_ci	lsr     $main_end_input_ptr, $bit_length, #3              @ byte_len
5041e1051a39Sopenharmony_ci	mov     $len, $main_end_input_ptr
5042e1051a39Sopenharmony_ci	ldp     $ctr96_b64x, $ctr96_t32x, [$counter]              @ ctr96_b64, ctr96_t32
5043e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
5044e1051a39Sopenharmony_ci	rev     $ctr96_b64x, $ctr96_b64x
5045e1051a39Sopenharmony_ci	rev     $ctr96_t32x, $ctr96_t32x
5046e1051a39Sopenharmony_ci#endif
5047e1051a39Sopenharmony_ci	ldp     $rk14_l, $rk14_h, [$cc, #224]                     @ load rk14
5048e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
5049e1051a39Sopenharmony_ci	ror     $rk14_h, $rk14_h, #32
5050e1051a39Sopenharmony_ci	ror     $rk14_l, $rk14_l, #32
5051e1051a39Sopenharmony_ci#endif
5052e1051a39Sopenharmony_ci	ld1     {$rk0s}, [$cc], #16                               @ load rk0
5053e1051a39Sopenharmony_ci	sub     $main_end_input_ptr, $main_end_input_ptr, #1      @ byte_len - 1
5054e1051a39Sopenharmony_ci
5055e1051a39Sopenharmony_ci	ld1     {$rk1s}, [$cc], #16                               @ load rk1
5056e1051a39Sopenharmony_ci	and     $main_end_input_ptr, $main_end_input_ptr, #0xffffffffffffffc0 @ number of bytes to be processed in main loop (at least 1 byte must be handled by tail)
5057e1051a39Sopenharmony_ci
5058e1051a39Sopenharmony_ci	add     $end_input_ptr, $input_ptr, $bit_length, lsr #3   @ end_input_ptr
5059e1051a39Sopenharmony_ci	ld1     {$rk2s}, [$cc], #16                               @ load rk2
5060e1051a39Sopenharmony_ci
5061e1051a39Sopenharmony_ci	lsr     $rctr32x, $ctr96_t32x, #32
5062e1051a39Sopenharmony_ci	ld1     {$rk3s}, [$cc], #16                               @ load rk3
5063e1051a39Sopenharmony_ci	orr     $ctr96_t32w, $ctr96_t32w, $ctr96_t32w
5064e1051a39Sopenharmony_ci
5065e1051a39Sopenharmony_ci	ld1     {$rk4s}, [$cc], #16                               @ load rk4
5066e1051a39Sopenharmony_ci	add     $main_end_input_ptr, $main_end_input_ptr, $input_ptr
5067e1051a39Sopenharmony_ci	rev     $rctr32w, $rctr32w                                @ rev_ctr32
5068e1051a39Sopenharmony_ci
5069e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ increment rev_ctr32
5070e1051a39Sopenharmony_ci	fmov    $ctr3d, $ctr96_b64x                               @ CTR block 3
5071e1051a39Sopenharmony_ci
5072e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                 @ CTR block 1
5073e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ CTR block 1
5074e1051a39Sopenharmony_ci	fmov    $ctr1d, $ctr96_b64x                               @ CTR block 1
5075e1051a39Sopenharmony_ci
5076e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32            @ CTR block 1
5077e1051a39Sopenharmony_ci	ld1     { $ctr0b}, [$counter]                             @ special case vector load initial counter so we can start first AES block as quickly as possible
5078e1051a39Sopenharmony_ci
5079e1051a39Sopenharmony_ci	fmov    $ctr1.d[1], $ctr32x                               @ CTR block 1
5080e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                 @ CTR block 2
5081e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ CTR block 2
5082e1051a39Sopenharmony_ci
5083e1051a39Sopenharmony_ci	fmov    $ctr2d, $ctr96_b64x                               @ CTR block 2
5084e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32            @ CTR block 2
5085e1051a39Sopenharmony_ci
5086e1051a39Sopenharmony_ci	fmov    $ctr2.d[1], $ctr32x                               @ CTR block 2
5087e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                 @ CTR block 3
5088e1051a39Sopenharmony_ci
5089e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32            @ CTR block 3
5090e1051a39Sopenharmony_ci	ld1     {$rk5s}, [$cc], #16                               @ load rk5
5091e1051a39Sopenharmony_ci
5092e1051a39Sopenharmony_ci	fmov    $ctr3.d[1], $ctr32x                               @ CTR block 3
5093e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ CTR block 3
5094e1051a39Sopenharmony_ci
5095e1051a39Sopenharmony_ci	ld1     {$rk6s}, [$cc], #16                               @ load rk6
5096e1051a39Sopenharmony_ci
5097e1051a39Sopenharmony_ci	ld1     {$rk7s}, [$cc], #16                               @ load rk7
5098e1051a39Sopenharmony_ci
5099e1051a39Sopenharmony_ci	ld1     {$rk8s}, [$cc], #16                               @ load rk8
5100e1051a39Sopenharmony_ci
5101e1051a39Sopenharmony_ci	aese    $ctr0b, $rk0  \n  aesmc   $ctr0b, $ctr0b          @ AES block 0 - round 0
5102e1051a39Sopenharmony_ci	ldr     $h3q, [$current_tag, #80]                         @ load h3l | h3h
5103e1051a39Sopenharmony_ci#ifndef __AARCH64EB__
5104e1051a39Sopenharmony_ci	ext     $h3b, $h3b, $h3b, #8
5105e1051a39Sopenharmony_ci#endif
5106e1051a39Sopenharmony_ci
5107e1051a39Sopenharmony_ci	aese    $ctr3b, $rk0  \n  aesmc   $ctr3b, $ctr3b          @ AES block 3 - round 0
5108e1051a39Sopenharmony_ci	ldr     $h4q, [$current_tag, #112]                        @ load h4l | h4h
5109e1051a39Sopenharmony_ci#ifndef __AARCH64EB__
5110e1051a39Sopenharmony_ci	ext     $h4b, $h4b, $h4b, #8
5111e1051a39Sopenharmony_ci#endif
5112e1051a39Sopenharmony_ci
5113e1051a39Sopenharmony_ci	aese    $ctr1b, $rk0  \n  aesmc   $ctr1b, $ctr1b          @ AES block 1 - round 0
5114e1051a39Sopenharmony_ci	ldr     $h2q, [$current_tag, #64]                         @ load h2l | h2h
5115e1051a39Sopenharmony_ci#ifndef __AARCH64EB__
5116e1051a39Sopenharmony_ci	ext     $h2b, $h2b, $h2b, #8
5117e1051a39Sopenharmony_ci#endif
5118e1051a39Sopenharmony_ci
5119e1051a39Sopenharmony_ci	aese    $ctr2b, $rk0  \n  aesmc   $ctr2b, $ctr2b          @ AES block 2 - round 0
5120e1051a39Sopenharmony_ci	ld1     {$rk9s}, [$cc], #16                                 @ load rk9
5121e1051a39Sopenharmony_ci
5122e1051a39Sopenharmony_ci	aese    $ctr0b, $rk1  \n  aesmc   $ctr0b, $ctr0b          @ AES block 0 - round 1
5123e1051a39Sopenharmony_ci
5124e1051a39Sopenharmony_ci	aese    $ctr1b, $rk1  \n  aesmc   $ctr1b, $ctr1b          @ AES block 1 - round 1
5125e1051a39Sopenharmony_ci	ld1     { $acc_lb}, [$current_tag]
5126e1051a39Sopenharmony_ci	ext     $acc_lb, $acc_lb, $acc_lb, #8
5127e1051a39Sopenharmony_ci	rev64   $acc_lb, $acc_lb
5128e1051a39Sopenharmony_ci
5129e1051a39Sopenharmony_ci	aese    $ctr2b, $rk1  \n  aesmc   $ctr2b, $ctr2b          @ AES block 2 - round 1
5130e1051a39Sopenharmony_ci	ld1     {$rk10s}, [$cc], #16                              @ load rk10
5131e1051a39Sopenharmony_ci
5132e1051a39Sopenharmony_ci	aese    $ctr3b, $rk1  \n  aesmc   $ctr3b, $ctr3b          @ AES block 3 - round 1
5133e1051a39Sopenharmony_ci	ld1     {$rk11s}, [$cc], #16                              @ load rk11
5134e1051a39Sopenharmony_ci
5135e1051a39Sopenharmony_ci	aese    $ctr0b, $rk2  \n  aesmc   $ctr0b, $ctr0b          @ AES block 0 - round 2
5136e1051a39Sopenharmony_ci	ldr     $h1q, [$current_tag, #32]                         @ load h1l | h1h
5137e1051a39Sopenharmony_ci#ifndef __AARCH64EB__
5138e1051a39Sopenharmony_ci	ext     $h1b, $h1b, $h1b, #8
5139e1051a39Sopenharmony_ci#endif
5140e1051a39Sopenharmony_ci	aese    $ctr2b, $rk2  \n  aesmc   $ctr2b, $ctr2b          @ AES block 2 - round 2
5141e1051a39Sopenharmony_ci	ld1     {$rk12s}, [$cc], #16                              @ load rk12
5142e1051a39Sopenharmony_ci
5143e1051a39Sopenharmony_ci	aese    $ctr3b, $rk2  \n  aesmc   $ctr3b, $ctr3b          @ AES block 3 - round 2
5144e1051a39Sopenharmony_ci
5145e1051a39Sopenharmony_ci	aese    $ctr0b, $rk3  \n  aesmc   $ctr0b, $ctr0b          @ AES block 0 - round 3
5146e1051a39Sopenharmony_ci
5147e1051a39Sopenharmony_ci	aese    $ctr1b, $rk2  \n  aesmc   $ctr1b, $ctr1b          @ AES block 1 - round 2
5148e1051a39Sopenharmony_ci
5149e1051a39Sopenharmony_ci	aese    $ctr3b, $rk3  \n  aesmc   $ctr3b, $ctr3b          @ AES block 3 - round 3
5150e1051a39Sopenharmony_ci
5151e1051a39Sopenharmony_ci	aese    $ctr0b, $rk4  \n  aesmc   $ctr0b, $ctr0b          @ AES block 0 - round 4
5152e1051a39Sopenharmony_ci	cmp     $input_ptr, $main_end_input_ptr                   @ check if we have <= 4 blocks
5153e1051a39Sopenharmony_ci
5154e1051a39Sopenharmony_ci	aese    $ctr2b, $rk3  \n  aesmc   $ctr2b, $ctr2b          @ AES block 2 - round 3
5155e1051a39Sopenharmony_ci
5156e1051a39Sopenharmony_ci	aese    $ctr1b, $rk3  \n  aesmc   $ctr1b, $ctr1b          @ AES block 1 - round 3
5157e1051a39Sopenharmony_ci
5158e1051a39Sopenharmony_ci	aese    $ctr3b, $rk4  \n  aesmc   $ctr3b, $ctr3b          @ AES block 3 - round 4
5159e1051a39Sopenharmony_ci
5160e1051a39Sopenharmony_ci	aese    $ctr2b, $rk4  \n  aesmc   $ctr2b, $ctr2b          @ AES block 2 - round 4
5161e1051a39Sopenharmony_ci
5162e1051a39Sopenharmony_ci	aese    $ctr1b, $rk4  \n  aesmc   $ctr1b, $ctr1b          @ AES block 1 - round 4
5163e1051a39Sopenharmony_ci
5164e1051a39Sopenharmony_ci	aese    $ctr3b, $rk5  \n  aesmc   $ctr3b, $ctr3b          @ AES block 3 - round 5
5165e1051a39Sopenharmony_ci
5166e1051a39Sopenharmony_ci	aese    $ctr0b, $rk5  \n  aesmc   $ctr0b, $ctr0b          @ AES block 0 - round 5
5167e1051a39Sopenharmony_ci
5168e1051a39Sopenharmony_ci	aese    $ctr1b, $rk5  \n  aesmc   $ctr1b, $ctr1b          @ AES block 1 - round 5
5169e1051a39Sopenharmony_ci
5170e1051a39Sopenharmony_ci	aese    $ctr2b, $rk5  \n  aesmc   $ctr2b, $ctr2b          @ AES block 2 - round 5
5171e1051a39Sopenharmony_ci
5172e1051a39Sopenharmony_ci	aese    $ctr0b, $rk6  \n  aesmc   $ctr0b, $ctr0b          @ AES block 0 - round 6
5173e1051a39Sopenharmony_ci
5174e1051a39Sopenharmony_ci	aese    $ctr3b, $rk6  \n  aesmc   $ctr3b, $ctr3b          @ AES block 3 - round 6
5175e1051a39Sopenharmony_ci
5176e1051a39Sopenharmony_ci	aese    $ctr1b, $rk6  \n  aesmc   $ctr1b, $ctr1b          @ AES block 1 - round 6
5177e1051a39Sopenharmony_ci
5178e1051a39Sopenharmony_ci	aese    $ctr2b, $rk6  \n  aesmc   $ctr2b, $ctr2b          @ AES block 2 - round 6
5179e1051a39Sopenharmony_ci
5180e1051a39Sopenharmony_ci	aese    $ctr0b, $rk7  \n  aesmc   $ctr0b, $ctr0b          @ AES block 0 - round 7
5181e1051a39Sopenharmony_ci
5182e1051a39Sopenharmony_ci	aese    $ctr1b, $rk7  \n  aesmc   $ctr1b, $ctr1b          @ AES block 1 - round 7
5183e1051a39Sopenharmony_ci
5184e1051a39Sopenharmony_ci	aese    $ctr3b, $rk7  \n  aesmc   $ctr3b, $ctr3b          @ AES block 3 - round 7
5185e1051a39Sopenharmony_ci
5186e1051a39Sopenharmony_ci	aese    $ctr0b, $rk8  \n  aesmc   $ctr0b, $ctr0b          @ AES block 0 - round 8
5187e1051a39Sopenharmony_ci
5188e1051a39Sopenharmony_ci	aese    $ctr2b, $rk7  \n  aesmc   $ctr2b, $ctr2b          @ AES block 2 - round 7
5189e1051a39Sopenharmony_ci
5190e1051a39Sopenharmony_ci	aese    $ctr3b, $rk8  \n  aesmc   $ctr3b, $ctr3b          @ AES block 3 - round 8
5191e1051a39Sopenharmony_ci
5192e1051a39Sopenharmony_ci	aese    $ctr1b, $rk8  \n  aesmc   $ctr1b, $ctr1b          @ AES block 1 - round 8
5193e1051a39Sopenharmony_ci
5194e1051a39Sopenharmony_ci	aese    $ctr0b, $rk9  \n  aesmc   $ctr0b, $ctr0b          @ AES block 0 - round 9
5195e1051a39Sopenharmony_ci
5196e1051a39Sopenharmony_ci	aese    $ctr2b, $rk8  \n  aesmc   $ctr2b, $ctr2b          @ AES block 2 - round 8
5197e1051a39Sopenharmony_ci	ld1     {$rk13s}, [$cc], #16                             @ load rk13
5198e1051a39Sopenharmony_ci
5199e1051a39Sopenharmony_ci	aese    $ctr1b, $rk9  \n  aesmc   $ctr1b, $ctr1b          @ AES block 1 - round 9
5200e1051a39Sopenharmony_ci
5201e1051a39Sopenharmony_ci	aese    $ctr0b, $rk10 \n  aesmc   $ctr0b, $ctr0b          @ AES block 0 - round 10
5202e1051a39Sopenharmony_ci
5203e1051a39Sopenharmony_ci	aese    $ctr3b, $rk9  \n  aesmc   $ctr3b, $ctr3b          @ AES block 3 - round 9
5204e1051a39Sopenharmony_ci
5205e1051a39Sopenharmony_ci	aese    $ctr1b, $rk10 \n  aesmc   $ctr1b, $ctr1b          @ AES block 1 - round 10
5206e1051a39Sopenharmony_ci
5207e1051a39Sopenharmony_ci	aese    $ctr2b, $rk9  \n  aesmc   $ctr2b, $ctr2b          @ AES block 2 - round 9
5208e1051a39Sopenharmony_ci
5209e1051a39Sopenharmony_ci	aese    $ctr3b, $rk10 \n  aesmc   $ctr3b, $ctr3b          @ AES block 3 - round 10
5210e1051a39Sopenharmony_ci
5211e1051a39Sopenharmony_ci	aese    $ctr0b, $rk11 \n  aesmc   $ctr0b, $ctr0b          @ AES block 0 - round 11
5212e1051a39Sopenharmony_ci
5213e1051a39Sopenharmony_ci	aese    $ctr2b, $rk10 \n  aesmc   $ctr2b, $ctr2b          @ AES block 2 - round 10
5214e1051a39Sopenharmony_ci
5215e1051a39Sopenharmony_ci	aese    $ctr3b, $rk11 \n  aesmc   $ctr3b, $ctr3b          @ AES block 3 - round 11
5216e1051a39Sopenharmony_ci
5217e1051a39Sopenharmony_ci	aese    $ctr1b, $rk11 \n  aesmc   $ctr1b, $ctr1b          @ AES block 1 - round 11
5218e1051a39Sopenharmony_ci
5219e1051a39Sopenharmony_ci	aese    $ctr2b, $rk11 \n  aesmc   $ctr2b, $ctr2b          @ AES block 2 - round 11
5220e1051a39Sopenharmony_ci
5221e1051a39Sopenharmony_ci	trn1    $acc_h.2d, $h3.2d,    $h4.2d                      @ h4h | h3h
5222e1051a39Sopenharmony_ci
5223e1051a39Sopenharmony_ci	trn2    $h34k.2d,  $h3.2d,    $h4.2d                      @ h4l | h3l
5224e1051a39Sopenharmony_ci
5225e1051a39Sopenharmony_ci	trn1    $t0.2d,    $h1.2d,    $h2.2d                      @ h2h | h1h
5226e1051a39Sopenharmony_ci	trn2    $h12k.2d,  $h1.2d,    $h2.2d                      @ h2l | h1l
5227e1051a39Sopenharmony_ci
5228e1051a39Sopenharmony_ci	aese    $ctr1b, $rk12 \n  aesmc   $ctr1b, $ctr1b          @ AES block 1 - round 12
5229e1051a39Sopenharmony_ci
5230e1051a39Sopenharmony_ci	aese    $ctr0b, $rk12 \n  aesmc   $ctr0b, $ctr0b          @ AES block 0 - round 12
5231e1051a39Sopenharmony_ci
5232e1051a39Sopenharmony_ci	aese    $ctr2b, $rk12 \n  aesmc   $ctr2b, $ctr2b          @ AES block 2 - round 12
5233e1051a39Sopenharmony_ci
5234e1051a39Sopenharmony_ci	aese    $ctr3b, $rk12 \n  aesmc   $ctr3b, $ctr3b          @ AES block 3 - round 12
5235e1051a39Sopenharmony_ci	eor     $h34k.16b, $h34k.16b, $acc_h.16b                  @ h4k | h3k
5236e1051a39Sopenharmony_ci
5237e1051a39Sopenharmony_ci	aese    $ctr1b, $rk13                                     @ AES block 1 - round 13
5238e1051a39Sopenharmony_ci
5239e1051a39Sopenharmony_ci	aese    $ctr2b, $rk13                                     @ AES block 2 - round 13
5240e1051a39Sopenharmony_ci	eor     $h12k.16b, $h12k.16b, $t0.16b                     @ h2k | h1k
5241e1051a39Sopenharmony_ci
5242e1051a39Sopenharmony_ci	aese    $ctr3b, $rk13                                     @ AES block 3 - round 13
5243e1051a39Sopenharmony_ci
5244e1051a39Sopenharmony_ci	aese    $ctr0b, $rk13                                     @ AES block 0 - round 13
5245e1051a39Sopenharmony_ci	b.ge    .L256_dec_tail                                    @ handle tail
5246e1051a39Sopenharmony_ci
5247e1051a39Sopenharmony_ci	ld1     {$res0b, $res1b}, [$input_ptr], #32               @ AES block 0,1 - load ciphertext
5248e1051a39Sopenharmony_ci
5249e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                 @ CTR block 4
5250e1051a39Sopenharmony_ci
5251e1051a39Sopenharmony_ci	eor     $ctr0b, $res0b, $ctr0b                            @ AES block 0 - result
5252e1051a39Sopenharmony_ci
5253e1051a39Sopenharmony_ci	eor     $ctr1b, $res1b, $ctr1b                            @ AES block 1 - result
5254e1051a39Sopenharmony_ci	rev64   $res1b, $res1b                                    @ GHASH block 1
5255e1051a39Sopenharmony_ci	ld1     {$res2b}, [$input_ptr], #16                       @ AES block 2 - load ciphertext
5256e1051a39Sopenharmony_ci
5257e1051a39Sopenharmony_ci	mov     $output_h0, $ctr0.d[1]                            @ AES block 0 - mov high
5258e1051a39Sopenharmony_ci
5259e1051a39Sopenharmony_ci	mov     $output_l0, $ctr0.d[0]                            @ AES block 0 - mov low
5260e1051a39Sopenharmony_ci	rev64   $res0b, $res0b                                    @ GHASH block 0
5261e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ CTR block 4
5262e1051a39Sopenharmony_ci
5263e1051a39Sopenharmony_ci	fmov    $ctr0d, $ctr96_b64x                               @ CTR block 4
5264e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32            @ CTR block 4
5265e1051a39Sopenharmony_ci
5266e1051a39Sopenharmony_ci	fmov    $ctr0.d[1], $ctr32x                               @ CTR block 4
5267e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                 @ CTR block 5
5268e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ CTR block 5
5269e1051a39Sopenharmony_ci
5270e1051a39Sopenharmony_ci	mov     $output_l1, $ctr1.d[0]                            @ AES block 1 - mov low
5271e1051a39Sopenharmony_ci
5272e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32            @ CTR block 5
5273e1051a39Sopenharmony_ci	mov     $output_h1, $ctr1.d[1]                            @ AES block 1 - mov high
5274e1051a39Sopenharmony_ci	eor     $output_h0, $output_h0, $rk14_h                   @ AES block 0 - round 14 high
5275e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
5276e1051a39Sopenharmony_ci	rev     $output_h0, $output_h0
5277e1051a39Sopenharmony_ci#endif
5278e1051a39Sopenharmony_ci	eor     $output_l0, $output_l0, $rk14_l                   @ AES block 0 - round 14 low
5279e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
5280e1051a39Sopenharmony_ci	rev     $output_l0, $output_l0
5281e1051a39Sopenharmony_ci#endif
5282e1051a39Sopenharmony_ci	stp     $output_l0, $output_h0, [$output_ptr], #16        @ AES block 0 - store result
5283e1051a39Sopenharmony_ci	fmov    $ctr1d, $ctr96_b64x                               @ CTR block 5
5284e1051a39Sopenharmony_ci
5285e1051a39Sopenharmony_ci	ld1     {$res3b}, [$input_ptr], #16                       @ AES block 3 - load ciphertext
5286e1051a39Sopenharmony_ci
5287e1051a39Sopenharmony_ci	fmov    $ctr1.d[1], $ctr32x                               @ CTR block 5
5288e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                 @ CTR block 6
5289e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ CTR block 6
5290e1051a39Sopenharmony_ci
5291e1051a39Sopenharmony_ci	eor     $output_l1, $output_l1, $rk14_l                   @ AES block 1 - round 14 low
5292e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
5293e1051a39Sopenharmony_ci	rev     $output_l1, $output_l1
5294e1051a39Sopenharmony_ci#endif
5295e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32            @ CTR block 6
5296e1051a39Sopenharmony_ci
5297e1051a39Sopenharmony_ci	eor     $output_h1, $output_h1, $rk14_h                   @ AES block 1 - round 14 high
5298e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
5299e1051a39Sopenharmony_ci	rev     $output_h1, $output_h1
5300e1051a39Sopenharmony_ci#endif
5301e1051a39Sopenharmony_ci	stp     $output_l1, $output_h1, [$output_ptr], #16        @ AES block 1 - store result
5302e1051a39Sopenharmony_ci
5303e1051a39Sopenharmony_ci	eor     $ctr2b, $res2b, $ctr2b                            @ AES block 2 - result
5304e1051a39Sopenharmony_ci	cmp     $input_ptr, $main_end_input_ptr                   @ check if we have <= 8 blocks
5305e1051a39Sopenharmony_ci	b.ge    .L256_dec_prepretail                              @ do prepretail
5306e1051a39Sopenharmony_ci
5307e1051a39Sopenharmony_ci	.L256_dec_main_loop:                                      @ main loop start
5308e1051a39Sopenharmony_ci	mov     $output_l2, $ctr2.d[0]                            @ AES block 4k+2 - mov low
5309e1051a39Sopenharmony_ci	ext     $acc_lb, $acc_lb, $acc_lb, #8                     @ PRE 0
5310e1051a39Sopenharmony_ci	eor     $ctr3b, $res3b, $ctr3b                            @ AES block 4k+3 - result
5311e1051a39Sopenharmony_ci
5312e1051a39Sopenharmony_ci	aese    $ctr0b, $rk0  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 0
5313e1051a39Sopenharmony_ci	mov     $output_h2, $ctr2.d[1]                            @ AES block 4k+2 - mov high
5314e1051a39Sopenharmony_ci
5315e1051a39Sopenharmony_ci	aese    $ctr1b, $rk0  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 0
5316e1051a39Sopenharmony_ci	fmov    $ctr2d, $ctr96_b64x                               @ CTR block 4k+6
5317e1051a39Sopenharmony_ci
5318e1051a39Sopenharmony_ci	fmov    $ctr2.d[1], $ctr32x                               @ CTR block 4k+6
5319e1051a39Sopenharmony_ci	eor     $res0b, $res0b, $acc_lb                           @ PRE 1
5320e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                 @ CTR block 4k+7
5321e1051a39Sopenharmony_ci
5322e1051a39Sopenharmony_ci	aese    $ctr0b, $rk1  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 1
5323e1051a39Sopenharmony_ci	mov     $output_h3, $ctr3.d[1]                            @ AES block 4k+3 - mov high
5324e1051a39Sopenharmony_ci
5325e1051a39Sopenharmony_ci	aese    $ctr1b, $rk1  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 1
5326e1051a39Sopenharmony_ci	mov     $output_l3, $ctr3.d[0]                            @ AES block 4k+3 - mov low
5327e1051a39Sopenharmony_ci
5328e1051a39Sopenharmony_ci	pmull2  $acc_h.1q, $res0.2d, $h4.2d                       @ GHASH block 4k - high
5329e1051a39Sopenharmony_ci	mov     $t0d, $res0.d[1]                                  @ GHASH block 4k - mid
5330e1051a39Sopenharmony_ci	fmov    $ctr3d, $ctr96_b64x                               @ CTR block 4k+7
5331e1051a39Sopenharmony_ci
5332e1051a39Sopenharmony_ci	aese    $ctr0b, $rk2  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 2
5333e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32            @ CTR block 4k+7
5334e1051a39Sopenharmony_ci
5335e1051a39Sopenharmony_ci	aese    $ctr2b, $rk0  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 0
5336e1051a39Sopenharmony_ci	fmov    $ctr3.d[1], $ctr32x                               @ CTR block 4k+7
5337e1051a39Sopenharmony_ci
5338e1051a39Sopenharmony_ci	aese    $ctr1b, $rk2  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 2
5339e1051a39Sopenharmony_ci	eor     $t0.8b, $t0.8b, $res0.8b                          @ GHASH block 4k - mid
5340e1051a39Sopenharmony_ci
5341e1051a39Sopenharmony_ci	aese    $ctr0b, $rk3  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 3
5342e1051a39Sopenharmony_ci	eor     $output_h2, $output_h2, $rk14_h                   @ AES block 4k+2 - round 14 high
5343e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
5344e1051a39Sopenharmony_ci	rev     $output_h2, $output_h2
5345e1051a39Sopenharmony_ci#endif
5346e1051a39Sopenharmony_ci	aese    $ctr2b, $rk1  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 1
5347e1051a39Sopenharmony_ci	mov     $acc_md, $h34k.d[1]                               @ GHASH block 4k - mid
5348e1051a39Sopenharmony_ci
5349e1051a39Sopenharmony_ci	aese    $ctr1b, $rk3  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 3
5350e1051a39Sopenharmony_ci	rev64   $res2b, $res2b                                    @ GHASH block 4k+2
5351e1051a39Sopenharmony_ci
5352e1051a39Sopenharmony_ci	aese    $ctr3b, $rk0  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 0
5353e1051a39Sopenharmony_ci	eor     $output_l2, $output_l2, $rk14_l                   @ AES block 4k+2 - round 14 low
5354e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
5355e1051a39Sopenharmony_ci	rev     $output_l2, $output_l2
5356e1051a39Sopenharmony_ci#endif
5357e1051a39Sopenharmony_ci	aese    $ctr2b, $rk2  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 2
5358e1051a39Sopenharmony_ci	stp     $output_l2, $output_h2, [$output_ptr], #16        @ AES block 4k+2 - store result
5359e1051a39Sopenharmony_ci
5360e1051a39Sopenharmony_ci	pmull   $acc_l.1q, $res0.1d, $h4.1d                       @ GHASH block 4k - low
5361e1051a39Sopenharmony_ci
5362e1051a39Sopenharmony_ci	pmull2  $t1.1q, $res1.2d, $h3.2d                          @ GHASH block 4k+1 - high
5363e1051a39Sopenharmony_ci
5364e1051a39Sopenharmony_ci	aese    $ctr2b, $rk3  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 3
5365e1051a39Sopenharmony_ci	rev64   $res3b, $res3b                                    @ GHASH block 4k+3
5366e1051a39Sopenharmony_ci
5367e1051a39Sopenharmony_ci	pmull   $acc_m.1q, $t0.1d, $acc_m.1d                      @ GHASH block 4k - mid
5368e1051a39Sopenharmony_ci	eor     $output_l3, $output_l3, $rk14_l                   @ AES block 4k+3 - round 14 low
5369e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
5370e1051a39Sopenharmony_ci	rev     $output_l3, $output_l3
5371e1051a39Sopenharmony_ci#endif
5372e1051a39Sopenharmony_ci	pmull   $t2.1q, $res1.1d, $h3.1d                          @ GHASH block 4k+1 - low
5373e1051a39Sopenharmony_ci	eor     $output_h3, $output_h3, $rk14_h                   @ AES block 4k+3 - round 14 high
5374e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
5375e1051a39Sopenharmony_ci	rev     $output_h3, $output_h3
5376e1051a39Sopenharmony_ci#endif
5377e1051a39Sopenharmony_ci	eor     $acc_hb, $acc_hb, $t1.16b                         @ GHASH block 4k+1 - high
5378e1051a39Sopenharmony_ci
5379e1051a39Sopenharmony_ci	aese    $ctr2b, $rk4  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 4
5380e1051a39Sopenharmony_ci
5381e1051a39Sopenharmony_ci	aese    $ctr3b, $rk1  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 1
5382e1051a39Sopenharmony_ci	mov     $t3d, $res1.d[1]                                  @ GHASH block 4k+1 - mid
5383e1051a39Sopenharmony_ci
5384e1051a39Sopenharmony_ci	aese    $ctr0b, $rk4  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 4
5385e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $t2.16b                         @ GHASH block 4k+1 - low
5386e1051a39Sopenharmony_ci
5387e1051a39Sopenharmony_ci	aese    $ctr2b, $rk5  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 5
5388e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ CTR block 4k+7
5389e1051a39Sopenharmony_ci
5390e1051a39Sopenharmony_ci	aese    $ctr3b, $rk2  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 2
5391e1051a39Sopenharmony_ci	mov     $t6d, $res2.d[1]                                  @ GHASH block 4k+2 - mid
5392e1051a39Sopenharmony_ci
5393e1051a39Sopenharmony_ci	aese    $ctr1b, $rk4  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 4
5394e1051a39Sopenharmony_ci	eor     $t3.8b, $t3.8b, $res1.8b                          @ GHASH block 4k+1 - mid
5395e1051a39Sopenharmony_ci
5396e1051a39Sopenharmony_ci	pmull   $t5.1q, $res2.1d, $h2.1d                          @ GHASH block 4k+2 - low
5397e1051a39Sopenharmony_ci
5398e1051a39Sopenharmony_ci	aese    $ctr3b, $rk3  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 3
5399e1051a39Sopenharmony_ci	eor     $t6.8b, $t6.8b, $res2.8b                          @ GHASH block 4k+2 - mid
5400e1051a39Sopenharmony_ci
5401e1051a39Sopenharmony_ci	aese    $ctr1b, $rk5  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 5
5402e1051a39Sopenharmony_ci
5403e1051a39Sopenharmony_ci	aese    $ctr0b, $rk5  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 5
5404e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $t5.16b                         @ GHASH block 4k+2 - low
5405e1051a39Sopenharmony_ci
5406e1051a39Sopenharmony_ci	pmull   $t3.1q, $t3.1d, $h34k.1d                          @ GHASH block 4k+1 - mid
5407e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                 @ CTR block 4k+8
5408e1051a39Sopenharmony_ci
5409e1051a39Sopenharmony_ci	aese    $ctr1b, $rk6  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 6
5410e1051a39Sopenharmony_ci	ins     $t6.d[1], $t6.d[0]                                @ GHASH block 4k+2 - mid
5411e1051a39Sopenharmony_ci
5412e1051a39Sopenharmony_ci	aese    $ctr0b, $rk6  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 6
5413e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ CTR block 4k+8
5414e1051a39Sopenharmony_ci
5415e1051a39Sopenharmony_ci	aese    $ctr3b, $rk4  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 4
5416e1051a39Sopenharmony_ci
5417e1051a39Sopenharmony_ci	aese    $ctr1b, $rk7  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 7
5418e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t3.16b                         @ GHASH block 4k+1 - mid
5419e1051a39Sopenharmony_ci
5420e1051a39Sopenharmony_ci	aese    $ctr0b, $rk7  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 7
5421e1051a39Sopenharmony_ci
5422e1051a39Sopenharmony_ci	pmull2  $t4.1q, $res2.2d, $h2.2d                          @ GHASH block 4k+2 - high
5423e1051a39Sopenharmony_ci	mov     $t9d, $res3.d[1]                                  @ GHASH block 4k+3 - mid
5424e1051a39Sopenharmony_ci
5425e1051a39Sopenharmony_ci	aese    $ctr3b, $rk5  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 5
5426e1051a39Sopenharmony_ci
5427e1051a39Sopenharmony_ci	pmull2  $t6.1q, $t6.2d, $h12k.2d                          @ GHASH block 4k+2 - mid
5428e1051a39Sopenharmony_ci
5429e1051a39Sopenharmony_ci	aese    $ctr0b, $rk8  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 8
5430e1051a39Sopenharmony_ci	eor     $acc_hb, $acc_hb, $t4.16b                         @ GHASH block 4k+2 - high
5431e1051a39Sopenharmony_ci
5432e1051a39Sopenharmony_ci	aese    $ctr3b, $rk6  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 6
5433e1051a39Sopenharmony_ci
5434e1051a39Sopenharmony_ci	pmull   $t8.1q, $res3.1d, $h1.1d                          @ GHASH block 4k+3 - low
5435e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32            @ CTR block 4k+8
5436e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t6.16b                         @ GHASH block 4k+2 - mid
5437e1051a39Sopenharmony_ci
5438e1051a39Sopenharmony_ci	pmull2  $t7.1q, $res3.2d, $h1.2d                          @ GHASH block 4k+3 - high
5439e1051a39Sopenharmony_ci
5440e1051a39Sopenharmony_ci	aese    $ctr0b, $rk9  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 9
5441e1051a39Sopenharmony_ci	eor     $t9.8b, $t9.8b, $res3.8b                          @ GHASH block 4k+3 - mid
5442e1051a39Sopenharmony_ci
5443e1051a39Sopenharmony_ci	aese    $ctr1b, $rk8  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 8
5444e1051a39Sopenharmony_ci
5445e1051a39Sopenharmony_ci	aese    $ctr2b, $rk6  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 6
5446e1051a39Sopenharmony_ci	eor     $acc_hb, $acc_hb, $t7.16b                         @ GHASH block 4k+3 - high
5447e1051a39Sopenharmony_ci
5448e1051a39Sopenharmony_ci	aese    $ctr0b, $rk10 \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 10
5449e1051a39Sopenharmony_ci
5450e1051a39Sopenharmony_ci	pmull   $t9.1q, $t9.1d, $h12k.1d                          @ GHASH block 4k+3 - mid
5451e1051a39Sopenharmony_ci	movi    $mod_constant.8b, #0xc2
5452e1051a39Sopenharmony_ci
5453e1051a39Sopenharmony_ci	aese    $ctr2b, $rk7  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 7
5454e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $t8.16b                         @ GHASH block 4k+3 - low
5455e1051a39Sopenharmony_ci
5456e1051a39Sopenharmony_ci	aese    $ctr0b, $rk11 \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 11
5457e1051a39Sopenharmony_ci
5458e1051a39Sopenharmony_ci	aese    $ctr3b, $rk7  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 7
5459e1051a39Sopenharmony_ci	shl     $mod_constantd, $mod_constantd, #56               @ mod_constant
5460e1051a39Sopenharmony_ci
5461e1051a39Sopenharmony_ci	aese    $ctr2b, $rk8  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 8
5462e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t9.16b                         @ GHASH block 4k+3 - mid
5463e1051a39Sopenharmony_ci
5464e1051a39Sopenharmony_ci	aese    $ctr0b, $rk12 \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 12
5465e1051a39Sopenharmony_ci
5466e1051a39Sopenharmony_ci	pmull   $mod_t.1q, $acc_h.1d, $mod_constant.1d            @ MODULO - top 64b align with mid
5467e1051a39Sopenharmony_ci	eor     $t9.16b, $acc_lb, $acc_hb                         @ MODULO - karatsuba tidy up
5468e1051a39Sopenharmony_ci
5469e1051a39Sopenharmony_ci	aese    $ctr1b, $rk9  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 9
5470e1051a39Sopenharmony_ci	ld1     {$res0b}, [$input_ptr], #16                       @ AES block 4k+4 - load ciphertext
5471e1051a39Sopenharmony_ci
5472e1051a39Sopenharmony_ci	aese    $ctr0b, $rk13                                     @ AES block 4k+4 - round 13
5473e1051a39Sopenharmony_ci	ext     $acc_hb, $acc_hb, $acc_hb, #8                     @ MODULO - other top alignment
5474e1051a39Sopenharmony_ci
5475e1051a39Sopenharmony_ci	aese    $ctr1b, $rk10 \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 10
5476e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t9.16b                         @ MODULO - karatsuba tidy up
5477e1051a39Sopenharmony_ci
5478e1051a39Sopenharmony_ci	aese    $ctr2b, $rk9  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 9
5479e1051a39Sopenharmony_ci	ld1     {$res1b}, [$input_ptr], #16                       @ AES block 4k+5 - load ciphertext
5480e1051a39Sopenharmony_ci
5481e1051a39Sopenharmony_ci	aese    $ctr3b, $rk8  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 8
5482e1051a39Sopenharmony_ci	eor     $ctr0b, $res0b, $ctr0b                            @ AES block 4k+4 - result
5483e1051a39Sopenharmony_ci
5484e1051a39Sopenharmony_ci	aese    $ctr1b, $rk11 \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 11
5485e1051a39Sopenharmony_ci	stp     $output_l3, $output_h3, [$output_ptr], #16        @ AES block 4k+3 - store result
5486e1051a39Sopenharmony_ci
5487e1051a39Sopenharmony_ci	aese    $ctr2b, $rk10 \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 10
5488e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $mod_t.16b                      @ MODULO - fold into mid
5489e1051a39Sopenharmony_ci
5490e1051a39Sopenharmony_ci	aese    $ctr3b, $rk9  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 9
5491e1051a39Sopenharmony_ci	ld1     {$res2b}, [$input_ptr], #16                       @ AES block 4k+6 - load ciphertext
5492e1051a39Sopenharmony_ci
5493e1051a39Sopenharmony_ci	aese    $ctr1b, $rk12 \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 12
5494e1051a39Sopenharmony_ci	ld1     {$res3b}, [$input_ptr], #16                       @ AES block 4k+7 - load ciphertext
5495e1051a39Sopenharmony_ci
5496e1051a39Sopenharmony_ci	aese    $ctr2b, $rk11 \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 11
5497e1051a39Sopenharmony_ci	mov     $output_h0, $ctr0.d[1]                            @ AES block 4k+4 - mov high
5498e1051a39Sopenharmony_ci
5499e1051a39Sopenharmony_ci	aese    $ctr3b, $rk10 \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 10
5500e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $acc_hb                         @ MODULO - fold into mid
5501e1051a39Sopenharmony_ci
5502e1051a39Sopenharmony_ci	aese    $ctr1b, $rk13                                     @ AES block 4k+5 - round 13
5503e1051a39Sopenharmony_ci	mov     $output_l0, $ctr0.d[0]                            @ AES block 4k+4 - mov low
5504e1051a39Sopenharmony_ci
5505e1051a39Sopenharmony_ci	aese    $ctr2b, $rk12 \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 12
5506e1051a39Sopenharmony_ci	fmov    $ctr0d, $ctr96_b64x                               @ CTR block 4k+8
5507e1051a39Sopenharmony_ci
5508e1051a39Sopenharmony_ci	aese    $ctr3b, $rk11 \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 11
5509e1051a39Sopenharmony_ci	fmov    $ctr0.d[1], $ctr32x                               @ CTR block 4k+8
5510e1051a39Sopenharmony_ci
5511e1051a39Sopenharmony_ci	pmull   $mod_constant.1q, $acc_m.1d, $mod_constant.1d     @ MODULO - mid 64b align with low
5512e1051a39Sopenharmony_ci	eor     $ctr1b, $res1b, $ctr1b                            @ AES block 4k+5 - result
5513e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                 @ CTR block 4k+9
5514e1051a39Sopenharmony_ci
5515e1051a39Sopenharmony_ci	aese    $ctr2b, $rk13                                     @ AES block 4k+6 - round 13
5516e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32            @ CTR block 4k+9
5517e1051a39Sopenharmony_ci	cmp     $input_ptr, $main_end_input_ptr                   @ LOOP CONTROL
5518e1051a39Sopenharmony_ci
5519e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ CTR block 4k+9
5520e1051a39Sopenharmony_ci
5521e1051a39Sopenharmony_ci	eor     $output_l0, $output_l0, $rk14_l                   @ AES block 4k+4 - round 14 low
5522e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
5523e1051a39Sopenharmony_ci	rev     $output_l0, $output_l0
5524e1051a39Sopenharmony_ci#endif
5525e1051a39Sopenharmony_ci	eor     $output_h0, $output_h0, $rk14_h                   @ AES block 4k+4 - round 14 high
5526e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
5527e1051a39Sopenharmony_ci	rev     $output_h0, $output_h0
5528e1051a39Sopenharmony_ci#endif
5529e1051a39Sopenharmony_ci	mov     $output_h1, $ctr1.d[1]                            @ AES block 4k+5 - mov high
5530e1051a39Sopenharmony_ci	eor     $ctr2b, $res2b, $ctr2b                            @ AES block 4k+6 - result
5531e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $mod_constant.16b               @ MODULO - fold into low
5532e1051a39Sopenharmony_ci
5533e1051a39Sopenharmony_ci	aese    $ctr3b, $rk12 \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 12
5534e1051a39Sopenharmony_ci	mov     $output_l1, $ctr1.d[0]                            @ AES block 4k+5 - mov low
5535e1051a39Sopenharmony_ci
5536e1051a39Sopenharmony_ci	fmov    $ctr1d, $ctr96_b64x                               @ CTR block 4k+9
5537e1051a39Sopenharmony_ci	ext     $acc_mb, $acc_mb, $acc_mb, #8                     @ MODULO - other mid alignment
5538e1051a39Sopenharmony_ci
5539e1051a39Sopenharmony_ci	fmov    $ctr1.d[1], $ctr32x                               @ CTR block 4k+9
5540e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                 @ CTR block 4k+10
5541e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ CTR block 4k+10
5542e1051a39Sopenharmony_ci
5543e1051a39Sopenharmony_ci	aese    $ctr3b, $rk13                                     @ AES block 4k+7 - round 13
5544e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32            @ CTR block 4k+10
5545e1051a39Sopenharmony_ci
5546e1051a39Sopenharmony_ci	rev64   $res1b, $res1b                                    @ GHASH block 4k+5
5547e1051a39Sopenharmony_ci	eor     $output_h1, $output_h1, $rk14_h                   @ AES block 4k+5 - round 14 high
5548e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
5549e1051a39Sopenharmony_ci	rev     $output_h1, $output_h1
5550e1051a39Sopenharmony_ci#endif
5551e1051a39Sopenharmony_ci	stp     $output_l0, $output_h0, [$output_ptr], #16        @ AES block 4k+4 - store result
5552e1051a39Sopenharmony_ci
5553e1051a39Sopenharmony_ci	eor     $output_l1, $output_l1, $rk14_l                   @ AES block 4k+5 - round 14 low
5554e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
5555e1051a39Sopenharmony_ci	rev     $output_l1, $output_l1
5556e1051a39Sopenharmony_ci#endif
5557e1051a39Sopenharmony_ci	stp     $output_l1, $output_h1, [$output_ptr], #16        @ AES block 4k+5 - store result
5558e1051a39Sopenharmony_ci
5559e1051a39Sopenharmony_ci	rev64   $res0b, $res0b                                    @ GHASH block 4k+4
5560e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $acc_mb                         @ MODULO - fold into low
5561e1051a39Sopenharmony_ci	b.lt    .L256_dec_main_loop
5562e1051a39Sopenharmony_ci
5563e1051a39Sopenharmony_ci
5564e1051a39Sopenharmony_ci	.L256_dec_prepretail:                                     @ PREPRETAIL
5565e1051a39Sopenharmony_ci	ext     $acc_lb, $acc_lb, $acc_lb, #8                     @ PRE 0
5566e1051a39Sopenharmony_ci	mov     $output_l2, $ctr2.d[0]                            @ AES block 4k+2 - mov low
5567e1051a39Sopenharmony_ci	eor     $ctr3b, $res3b, $ctr3b                            @ AES block 4k+3 - result
5568e1051a39Sopenharmony_ci
5569e1051a39Sopenharmony_ci	aese    $ctr0b, $rk0  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 0
5570e1051a39Sopenharmony_ci	mov     $output_h2, $ctr2.d[1]                            @ AES block 4k+2 - mov high
5571e1051a39Sopenharmony_ci
5572e1051a39Sopenharmony_ci	aese    $ctr1b, $rk0  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 0
5573e1051a39Sopenharmony_ci	fmov    $ctr2d, $ctr96_b64x                               @ CTR block 4k+6
5574e1051a39Sopenharmony_ci
5575e1051a39Sopenharmony_ci	fmov    $ctr2.d[1], $ctr32x                               @ CTR block 4k+6
5576e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w                                 @ CTR block 4k+7
5577e1051a39Sopenharmony_ci	eor     $res0b, $res0b, $acc_lb                           @ PRE 1
5578e1051a39Sopenharmony_ci
5579e1051a39Sopenharmony_ci	rev64   $res2b, $res2b                                    @ GHASH block 4k+2
5580e1051a39Sopenharmony_ci	orr     $ctr32x, $ctr96_t32x, $ctr32x, lsl #32            @ CTR block 4k+7
5581e1051a39Sopenharmony_ci	mov     $output_l3, $ctr3.d[0]                            @ AES block 4k+3 - mov low
5582e1051a39Sopenharmony_ci
5583e1051a39Sopenharmony_ci	aese    $ctr1b, $rk1  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 1
5584e1051a39Sopenharmony_ci	mov     $output_h3, $ctr3.d[1]                            @ AES block 4k+3 - mov high
5585e1051a39Sopenharmony_ci
5586e1051a39Sopenharmony_ci	pmull   $acc_l.1q, $res0.1d, $h4.1d                       @ GHASH block 4k - low
5587e1051a39Sopenharmony_ci	mov     $t0d, $res0.d[1]                                  @ GHASH block 4k - mid
5588e1051a39Sopenharmony_ci	fmov    $ctr3d, $ctr96_b64x                               @ CTR block 4k+7
5589e1051a39Sopenharmony_ci
5590e1051a39Sopenharmony_ci	pmull2  $acc_h.1q, $res0.2d, $h4.2d                       @ GHASH block 4k - high
5591e1051a39Sopenharmony_ci	fmov    $ctr3.d[1], $ctr32x                               @ CTR block 4k+7
5592e1051a39Sopenharmony_ci
5593e1051a39Sopenharmony_ci	aese    $ctr2b, $rk0  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 0
5594e1051a39Sopenharmony_ci	mov     $acc_md, $h34k.d[1]                               @ GHASH block 4k - mid
5595e1051a39Sopenharmony_ci
5596e1051a39Sopenharmony_ci	aese    $ctr0b, $rk1  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 1
5597e1051a39Sopenharmony_ci	eor     $t0.8b, $t0.8b, $res0.8b                          @ GHASH block 4k - mid
5598e1051a39Sopenharmony_ci
5599e1051a39Sopenharmony_ci	pmull2  $t1.1q, $res1.2d, $h3.2d                          @ GHASH block 4k+1 - high
5600e1051a39Sopenharmony_ci
5601e1051a39Sopenharmony_ci	aese    $ctr2b, $rk1  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 1
5602e1051a39Sopenharmony_ci	rev64   $res3b, $res3b                                    @ GHASH block 4k+3
5603e1051a39Sopenharmony_ci
5604e1051a39Sopenharmony_ci	aese    $ctr3b, $rk0  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 0
5605e1051a39Sopenharmony_ci
5606e1051a39Sopenharmony_ci	pmull   $acc_m.1q, $t0.1d, $acc_m.1d                      @ GHASH block 4k - mid
5607e1051a39Sopenharmony_ci	eor     $acc_hb, $acc_hb, $t1.16b                         @ GHASH block 4k+1 - high
5608e1051a39Sopenharmony_ci
5609e1051a39Sopenharmony_ci	pmull   $t2.1q, $res1.1d, $h3.1d                          @ GHASH block 4k+1 - low
5610e1051a39Sopenharmony_ci
5611e1051a39Sopenharmony_ci	aese    $ctr3b, $rk1  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 1
5612e1051a39Sopenharmony_ci	mov     $t3d, $res1.d[1]                                  @ GHASH block 4k+1 - mid
5613e1051a39Sopenharmony_ci
5614e1051a39Sopenharmony_ci	aese    $ctr0b, $rk2  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 2
5615e1051a39Sopenharmony_ci
5616e1051a39Sopenharmony_ci	aese    $ctr1b, $rk2  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 2
5617e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $t2.16b                         @ GHASH block 4k+1 - low
5618e1051a39Sopenharmony_ci
5619e1051a39Sopenharmony_ci	aese    $ctr2b, $rk2  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 2
5620e1051a39Sopenharmony_ci
5621e1051a39Sopenharmony_ci	aese    $ctr0b, $rk3  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 3
5622e1051a39Sopenharmony_ci	mov     $t6d, $res2.d[1]                                  @ GHASH block 4k+2 - mid
5623e1051a39Sopenharmony_ci
5624e1051a39Sopenharmony_ci	aese    $ctr3b, $rk2  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 2
5625e1051a39Sopenharmony_ci	eor     $t3.8b, $t3.8b, $res1.8b                          @ GHASH block 4k+1 - mid
5626e1051a39Sopenharmony_ci
5627e1051a39Sopenharmony_ci	pmull   $t5.1q, $res2.1d, $h2.1d                          @ GHASH block 4k+2 - low
5628e1051a39Sopenharmony_ci
5629e1051a39Sopenharmony_ci	aese    $ctr0b, $rk4  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 4
5630e1051a39Sopenharmony_ci
5631e1051a39Sopenharmony_ci	aese    $ctr3b, $rk3  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 3
5632e1051a39Sopenharmony_ci	eor     $t6.8b, $t6.8b, $res2.8b                          @ GHASH block 4k+2 - mid
5633e1051a39Sopenharmony_ci
5634e1051a39Sopenharmony_ci	pmull   $t3.1q, $t3.1d, $h34k.1d                          @ GHASH block 4k+1 - mid
5635e1051a39Sopenharmony_ci
5636e1051a39Sopenharmony_ci	aese    $ctr0b, $rk5  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 5
5637e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $t5.16b                         @ GHASH block 4k+2 - low
5638e1051a39Sopenharmony_ci
5639e1051a39Sopenharmony_ci	aese    $ctr3b, $rk4  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 4
5640e1051a39Sopenharmony_ci
5641e1051a39Sopenharmony_ci	pmull2  $t7.1q, $res3.2d, $h1.2d                          @ GHASH block 4k+3 - high
5642e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t3.16b                         @ GHASH block 4k+1 - mid
5643e1051a39Sopenharmony_ci
5644e1051a39Sopenharmony_ci	pmull2  $t4.1q, $res2.2d, $h2.2d                          @ GHASH block 4k+2 - high
5645e1051a39Sopenharmony_ci
5646e1051a39Sopenharmony_ci	aese    $ctr3b, $rk5  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 5
5647e1051a39Sopenharmony_ci	ins     $t6.d[1], $t6.d[0]                                @ GHASH block 4k+2 - mid
5648e1051a39Sopenharmony_ci
5649e1051a39Sopenharmony_ci	aese    $ctr2b, $rk3  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 3
5650e1051a39Sopenharmony_ci
5651e1051a39Sopenharmony_ci	aese    $ctr1b, $rk3  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 3
5652e1051a39Sopenharmony_ci	eor     $acc_hb, $acc_hb, $t4.16b                         @ GHASH block 4k+2 - high
5653e1051a39Sopenharmony_ci
5654e1051a39Sopenharmony_ci	pmull   $t8.1q, $res3.1d, $h1.1d                          @ GHASH block 4k+3 - low
5655e1051a39Sopenharmony_ci
5656e1051a39Sopenharmony_ci	aese    $ctr2b, $rk4  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 4
5657e1051a39Sopenharmony_ci	mov     $t9d, $res3.d[1]                                  @ GHASH block 4k+3 - mid
5658e1051a39Sopenharmony_ci
5659e1051a39Sopenharmony_ci	aese    $ctr1b, $rk4  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 4
5660e1051a39Sopenharmony_ci
5661e1051a39Sopenharmony_ci	pmull2  $t6.1q, $t6.2d, $h12k.2d                          @ GHASH block 4k+2 - mid
5662e1051a39Sopenharmony_ci
5663e1051a39Sopenharmony_ci	aese    $ctr2b, $rk5  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 5
5664e1051a39Sopenharmony_ci	eor     $t9.8b, $t9.8b, $res3.8b                          @ GHASH block 4k+3 - mid
5665e1051a39Sopenharmony_ci
5666e1051a39Sopenharmony_ci	aese    $ctr1b, $rk5  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 5
5667e1051a39Sopenharmony_ci
5668e1051a39Sopenharmony_ci	aese    $ctr3b, $rk6  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 6
5669e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t6.16b                         @ GHASH block 4k+2 - mid
5670e1051a39Sopenharmony_ci
5671e1051a39Sopenharmony_ci	aese    $ctr2b, $rk6  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 6
5672e1051a39Sopenharmony_ci
5673e1051a39Sopenharmony_ci	aese    $ctr0b, $rk6  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 6
5674e1051a39Sopenharmony_ci	movi    $mod_constant.8b, #0xc2
5675e1051a39Sopenharmony_ci
5676e1051a39Sopenharmony_ci	aese    $ctr1b, $rk6  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 6
5677e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $t8.16b                         @ GHASH block 4k+3 - low
5678e1051a39Sopenharmony_ci
5679e1051a39Sopenharmony_ci	pmull   $t9.1q, $t9.1d, $h12k.1d                          @ GHASH block 4k+3 - mid
5680e1051a39Sopenharmony_ci
5681e1051a39Sopenharmony_ci	aese    $ctr3b, $rk7  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 7
5682e1051a39Sopenharmony_ci	eor     $acc_hb, $acc_hb, $t7.16b                         @ GHASH block 4k+3 - high
5683e1051a39Sopenharmony_ci
5684e1051a39Sopenharmony_ci	aese    $ctr1b, $rk7  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 7
5685e1051a39Sopenharmony_ci
5686e1051a39Sopenharmony_ci	aese    $ctr0b, $rk7  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 7
5687e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t9.16b                         @ GHASH block 4k+3 - mid
5688e1051a39Sopenharmony_ci
5689e1051a39Sopenharmony_ci	aese    $ctr3b, $rk8  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 8
5690e1051a39Sopenharmony_ci
5691e1051a39Sopenharmony_ci	aese    $ctr2b, $rk7  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 7
5692e1051a39Sopenharmony_ci	eor     $t9.16b, $acc_lb, $acc_hb                         @ MODULO - karatsuba tidy up
5693e1051a39Sopenharmony_ci
5694e1051a39Sopenharmony_ci	aese    $ctr1b, $rk8  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 8
5695e1051a39Sopenharmony_ci
5696e1051a39Sopenharmony_ci	aese    $ctr0b, $rk8  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 8
5697e1051a39Sopenharmony_ci	shl     $mod_constantd, $mod_constantd, #56               @ mod_constant
5698e1051a39Sopenharmony_ci
5699e1051a39Sopenharmony_ci	aese    $ctr2b, $rk8  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 8
5700e1051a39Sopenharmony_ci
5701e1051a39Sopenharmony_ci	aese    $ctr1b, $rk9  \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 9
5702e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t9.16b                         @ MODULO - karatsuba tidy up
5703e1051a39Sopenharmony_ci
5704e1051a39Sopenharmony_ci	pmull   $mod_t.1q, $acc_h.1d, $mod_constant.1d            @ MODULO - top 64b align with mid
5705e1051a39Sopenharmony_ci
5706e1051a39Sopenharmony_ci	aese    $ctr2b, $rk9  \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 9
5707e1051a39Sopenharmony_ci	ext     $acc_hb, $acc_hb, $acc_hb, #8                     @ MODULO - other top alignment
5708e1051a39Sopenharmony_ci
5709e1051a39Sopenharmony_ci	aese    $ctr3b, $rk9  \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 9
5710e1051a39Sopenharmony_ci
5711e1051a39Sopenharmony_ci	aese    $ctr0b, $rk9  \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 9
5712e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $mod_t.16b                      @ MODULO - fold into mid
5713e1051a39Sopenharmony_ci
5714e1051a39Sopenharmony_ci	aese    $ctr2b, $rk10 \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 10
5715e1051a39Sopenharmony_ci
5716e1051a39Sopenharmony_ci	aese    $ctr3b, $rk10 \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 10
5717e1051a39Sopenharmony_ci
5718e1051a39Sopenharmony_ci	aese    $ctr0b, $rk10 \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 10
5719e1051a39Sopenharmony_ci	eor     $output_h2, $output_h2, $rk14_h                   @ AES block 4k+2 - round 14 high
5720e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
5721e1051a39Sopenharmony_ci	rev     $output_h2, $output_h2
5722e1051a39Sopenharmony_ci#endif
5723e1051a39Sopenharmony_ci	aese    $ctr1b, $rk10 \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 10
5724e1051a39Sopenharmony_ci	eor     $output_l3, $output_l3, $rk14_l                   @ AES block 4k+3 - round 14 low
5725e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
5726e1051a39Sopenharmony_ci	rev     $output_l3, $output_l3
5727e1051a39Sopenharmony_ci#endif
5728e1051a39Sopenharmony_ci	aese    $ctr2b, $rk11 \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 11
5729e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $acc_hb                         @ MODULO - fold into mid
5730e1051a39Sopenharmony_ci
5731e1051a39Sopenharmony_ci	aese    $ctr0b, $rk11 \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 11
5732e1051a39Sopenharmony_ci	add     $rctr32w, $rctr32w, #1                            @ CTR block 4k+7
5733e1051a39Sopenharmony_ci
5734e1051a39Sopenharmony_ci	aese    $ctr1b, $rk11 \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 11
5735e1051a39Sopenharmony_ci	eor     $output_l2, $output_l2, $rk14_l                   @ AES block 4k+2 - round 14 low
5736e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
5737e1051a39Sopenharmony_ci	rev     $output_l2, $output_l2
5738e1051a39Sopenharmony_ci#endif
5739e1051a39Sopenharmony_ci
5740e1051a39Sopenharmony_ci	aese    $ctr2b, $rk12 \n  aesmc   $ctr2b, $ctr2b          @ AES block 4k+6 - round 12
5741e1051a39Sopenharmony_ci
5742e1051a39Sopenharmony_ci	pmull   $mod_constant.1q, $acc_m.1d, $mod_constant.1d     @ MODULO - mid 64b align with low
5743e1051a39Sopenharmony_ci	eor     $output_h3, $output_h3, $rk14_h                   @ AES block 4k+3 - round 14 high
5744e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
5745e1051a39Sopenharmony_ci	rev     $output_h3, $output_h3
5746e1051a39Sopenharmony_ci#endif
5747e1051a39Sopenharmony_ci
5748e1051a39Sopenharmony_ci	aese    $ctr3b, $rk11 \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 11
5749e1051a39Sopenharmony_ci	stp     $output_l2, $output_h2, [$output_ptr], #16        @ AES block 4k+2 - store result
5750e1051a39Sopenharmony_ci
5751e1051a39Sopenharmony_ci	aese    $ctr1b, $rk12 \n  aesmc   $ctr1b, $ctr1b          @ AES block 4k+5 - round 12
5752e1051a39Sopenharmony_ci	ext     $acc_mb, $acc_mb, $acc_mb, #8                     @ MODULO - other mid alignment
5753e1051a39Sopenharmony_ci
5754e1051a39Sopenharmony_ci	aese    $ctr0b, $rk12 \n  aesmc   $ctr0b, $ctr0b          @ AES block 4k+4 - round 12
5755e1051a39Sopenharmony_ci	stp     $output_l3, $output_h3, [$output_ptr], #16        @ AES block 4k+3 - store result
5756e1051a39Sopenharmony_ci
5757e1051a39Sopenharmony_ci	aese    $ctr3b, $rk12 \n  aesmc   $ctr3b, $ctr3b          @ AES block 4k+7 - round 12
5758e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $mod_constant.16b               @ MODULO - fold into low
5759e1051a39Sopenharmony_ci
5760e1051a39Sopenharmony_ci	aese    $ctr1b, $rk13                                     @ AES block 4k+5 - round 13
5761e1051a39Sopenharmony_ci
5762e1051a39Sopenharmony_ci	aese    $ctr0b, $rk13                                     @ AES block 4k+4 - round 13
5763e1051a39Sopenharmony_ci
5764e1051a39Sopenharmony_ci	aese    $ctr3b, $rk13                                     @ AES block 4k+7 - round 13
5765e1051a39Sopenharmony_ci
5766e1051a39Sopenharmony_ci	aese    $ctr2b, $rk13                                     @ AES block 4k+6 - round 13
5767e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $acc_mb                         @ MODULO - fold into low
5768e1051a39Sopenharmony_ci	.L256_dec_tail:                                           @ TAIL
5769e1051a39Sopenharmony_ci
5770e1051a39Sopenharmony_ci	sub     $main_end_input_ptr, $end_input_ptr, $input_ptr   @ main_end_input_ptr is number of bytes left to process
5771e1051a39Sopenharmony_ci	ld1     { $res1b}, [$input_ptr], #16                      @ AES block 4k+4 - load ciphertext
5772e1051a39Sopenharmony_ci
5773e1051a39Sopenharmony_ci	eor     $ctr0b, $res1b, $ctr0b                            @ AES block 4k+4 - result
5774e1051a39Sopenharmony_ci
5775e1051a39Sopenharmony_ci	mov     $output_l0, $ctr0.d[0]                            @ AES block 4k+4 - mov low
5776e1051a39Sopenharmony_ci
5777e1051a39Sopenharmony_ci	mov     $output_h0, $ctr0.d[1]                            @ AES block 4k+4 - mov high
5778e1051a39Sopenharmony_ci	ext     $t0.16b, $acc_lb, $acc_lb, #8                     @ prepare final partial tag
5779e1051a39Sopenharmony_ci
5780e1051a39Sopenharmony_ci	cmp     $main_end_input_ptr, #48
5781e1051a39Sopenharmony_ci
5782e1051a39Sopenharmony_ci	eor     $output_l0, $output_l0, $rk14_l                   @ AES block 4k+4 - round 14 low
5783e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
5784e1051a39Sopenharmony_ci	rev     $output_l0, $output_l0
5785e1051a39Sopenharmony_ci#endif
5786e1051a39Sopenharmony_ci
5787e1051a39Sopenharmony_ci	eor     $output_h0, $output_h0, $rk14_h                   @ AES block 4k+4 - round 14 high
5788e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
5789e1051a39Sopenharmony_ci	rev     $output_h0, $output_h0
5790e1051a39Sopenharmony_ci#endif
5791e1051a39Sopenharmony_ci	b.gt    .L256_dec_blocks_more_than_3
5792e1051a39Sopenharmony_ci
5793e1051a39Sopenharmony_ci	sub     $rctr32w, $rctr32w, #1
5794e1051a39Sopenharmony_ci	mov     $ctr3b, $ctr2b
5795e1051a39Sopenharmony_ci	movi    $acc_m.8b, #0
5796e1051a39Sopenharmony_ci
5797e1051a39Sopenharmony_ci	movi    $acc_l.8b, #0
5798e1051a39Sopenharmony_ci	cmp     $main_end_input_ptr, #32
5799e1051a39Sopenharmony_ci
5800e1051a39Sopenharmony_ci	movi    $acc_h.8b, #0
5801e1051a39Sopenharmony_ci	mov     $ctr2b, $ctr1b
5802e1051a39Sopenharmony_ci	b.gt    .L256_dec_blocks_more_than_2
5803e1051a39Sopenharmony_ci
5804e1051a39Sopenharmony_ci	sub     $rctr32w, $rctr32w, #1
5805e1051a39Sopenharmony_ci
5806e1051a39Sopenharmony_ci	mov     $ctr3b, $ctr1b
5807e1051a39Sopenharmony_ci	cmp     $main_end_input_ptr, #16
5808e1051a39Sopenharmony_ci	b.gt    .L256_dec_blocks_more_than_1
5809e1051a39Sopenharmony_ci
5810e1051a39Sopenharmony_ci	sub     $rctr32w, $rctr32w, #1
5811e1051a39Sopenharmony_ci	b       .L256_dec_blocks_less_than_1
5812e1051a39Sopenharmony_ci	.L256_dec_blocks_more_than_3:                            @ blocks left >  3
5813e1051a39Sopenharmony_ci	rev64   $res0b, $res1b                                   @ GHASH final-3 block
5814e1051a39Sopenharmony_ci	ld1     { $res1b}, [$input_ptr], #16                     @ AES final-2 block - load ciphertext
5815e1051a39Sopenharmony_ci
5816e1051a39Sopenharmony_ci	stp     $output_l0, $output_h0, [$output_ptr], #16       @ AES final-3 block  - store result
5817e1051a39Sopenharmony_ci
5818e1051a39Sopenharmony_ci	mov     $acc_md, $h34k.d[1]                              @ GHASH final-3 block - mid
5819e1051a39Sopenharmony_ci
5820e1051a39Sopenharmony_ci	eor     $res0b, $res0b, $t0.16b                          @ feed in partial tag
5821e1051a39Sopenharmony_ci
5822e1051a39Sopenharmony_ci	eor     $ctr0b, $res1b, $ctr1b                           @ AES final-2 block - result
5823e1051a39Sopenharmony_ci
5824e1051a39Sopenharmony_ci	mov     $rk4d, $res0.d[1]                                @ GHASH final-3 block - mid
5825e1051a39Sopenharmony_ci
5826e1051a39Sopenharmony_ci	mov     $output_l0, $ctr0.d[0]                           @ AES final-2 block - mov low
5827e1051a39Sopenharmony_ci
5828e1051a39Sopenharmony_ci	mov     $output_h0, $ctr0.d[1]                           @ AES final-2 block - mov high
5829e1051a39Sopenharmony_ci
5830e1051a39Sopenharmony_ci	eor     $rk4v.8b, $rk4v.8b, $res0.8b                     @ GHASH final-3 block - mid
5831e1051a39Sopenharmony_ci
5832e1051a39Sopenharmony_ci	movi    $t0.8b, #0                                       @ suppress further partial tag feed in
5833e1051a39Sopenharmony_ci
5834e1051a39Sopenharmony_ci	pmull2  $acc_h.1q, $res0.2d, $h4.2d                      @ GHASH final-3 block - high
5835e1051a39Sopenharmony_ci
5836e1051a39Sopenharmony_ci	pmull   $acc_m.1q, $rk4v.1d, $acc_m.1d                   @ GHASH final-3 block - mid
5837e1051a39Sopenharmony_ci	eor     $output_l0, $output_l0, $rk14_l                  @ AES final-2 block - round 14 low
5838e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
5839e1051a39Sopenharmony_ci	rev     $output_l0, $output_l0
5840e1051a39Sopenharmony_ci#endif
5841e1051a39Sopenharmony_ci
5842e1051a39Sopenharmony_ci	pmull   $acc_l.1q, $res0.1d, $h4.1d                      @ GHASH final-3 block - low
5843e1051a39Sopenharmony_ci	eor     $output_h0, $output_h0, $rk14_h                  @ AES final-2 block - round 14 high
5844e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
5845e1051a39Sopenharmony_ci	rev     $output_h0, $output_h0
5846e1051a39Sopenharmony_ci#endif
5847e1051a39Sopenharmony_ci	.L256_dec_blocks_more_than_2:                            @ blocks left >  2
5848e1051a39Sopenharmony_ci
5849e1051a39Sopenharmony_ci	rev64   $res0b, $res1b                                   @ GHASH final-2 block
5850e1051a39Sopenharmony_ci	ld1     { $res1b}, [$input_ptr], #16                     @ AES final-1 block - load ciphertext
5851e1051a39Sopenharmony_ci
5852e1051a39Sopenharmony_ci	eor     $res0b, $res0b, $t0.16b                          @ feed in partial tag
5853e1051a39Sopenharmony_ci	stp     $output_l0, $output_h0, [$output_ptr], #16       @ AES final-2 block  - store result
5854e1051a39Sopenharmony_ci
5855e1051a39Sopenharmony_ci	eor     $ctr0b, $res1b, $ctr2b                           @ AES final-1 block - result
5856e1051a39Sopenharmony_ci
5857e1051a39Sopenharmony_ci	mov     $rk4d, $res0.d[1]                                @ GHASH final-2 block - mid
5858e1051a39Sopenharmony_ci
5859e1051a39Sopenharmony_ci	pmull   $rk3q1, $res0.1d, $h3.1d                         @ GHASH final-2 block - low
5860e1051a39Sopenharmony_ci
5861e1051a39Sopenharmony_ci	pmull2  $rk2q1, $res0.2d, $h3.2d                         @ GHASH final-2 block - high
5862e1051a39Sopenharmony_ci
5863e1051a39Sopenharmony_ci	eor     $rk4v.8b, $rk4v.8b, $res0.8b                     @ GHASH final-2 block - mid
5864e1051a39Sopenharmony_ci	mov     $output_l0, $ctr0.d[0]                           @ AES final-1 block - mov low
5865e1051a39Sopenharmony_ci
5866e1051a39Sopenharmony_ci	mov     $output_h0, $ctr0.d[1]                           @ AES final-1 block - mov high
5867e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $rk3                           @ GHASH final-2 block - low
5868e1051a39Sopenharmony_ci	movi    $t0.8b, #0                                       @ suppress further partial tag feed in
5869e1051a39Sopenharmony_ci
5870e1051a39Sopenharmony_ci	pmull   $rk4v.1q, $rk4v.1d, $h34k.1d                     @ GHASH final-2 block - mid
5871e1051a39Sopenharmony_ci
5872e1051a39Sopenharmony_ci	eor     $acc_hb, $acc_hb, $rk2                           @ GHASH final-2 block - high
5873e1051a39Sopenharmony_ci	eor     $output_l0, $output_l0, $rk14_l                  @ AES final-1 block - round 14 low
5874e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
5875e1051a39Sopenharmony_ci	rev     $output_l0, $output_l0
5876e1051a39Sopenharmony_ci#endif
5877e1051a39Sopenharmony_ci
5878e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $rk4v.16b                      @ GHASH final-2 block - mid
5879e1051a39Sopenharmony_ci	eor     $output_h0, $output_h0, $rk14_h                  @ AES final-1 block - round 14 high
5880e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
5881e1051a39Sopenharmony_ci	rev     $output_h0, $output_h0
5882e1051a39Sopenharmony_ci#endif
5883e1051a39Sopenharmony_ci	.L256_dec_blocks_more_than_1:                            @ blocks left >  1
5884e1051a39Sopenharmony_ci
5885e1051a39Sopenharmony_ci	stp     $output_l0, $output_h0, [$output_ptr], #16       @ AES final-1 block  - store result
5886e1051a39Sopenharmony_ci	rev64   $res0b, $res1b                                   @ GHASH final-1 block
5887e1051a39Sopenharmony_ci
5888e1051a39Sopenharmony_ci	ld1     { $res1b}, [$input_ptr], #16                     @ AES final block - load ciphertext
5889e1051a39Sopenharmony_ci
5890e1051a39Sopenharmony_ci	eor     $res0b, $res0b, $t0.16b                          @ feed in partial tag
5891e1051a39Sopenharmony_ci	movi    $t0.8b, #0                                       @ suppress further partial tag feed in
5892e1051a39Sopenharmony_ci
5893e1051a39Sopenharmony_ci	mov     $rk4d, $res0.d[1]                                @ GHASH final-1 block - mid
5894e1051a39Sopenharmony_ci
5895e1051a39Sopenharmony_ci	eor     $ctr0b, $res1b, $ctr3b                           @ AES final block - result
5896e1051a39Sopenharmony_ci
5897e1051a39Sopenharmony_ci	pmull2  $rk2q1, $res0.2d, $h2.2d                         @ GHASH final-1 block - high
5898e1051a39Sopenharmony_ci
5899e1051a39Sopenharmony_ci	eor     $rk4v.8b, $rk4v.8b, $res0.8b                     @ GHASH final-1 block - mid
5900e1051a39Sopenharmony_ci
5901e1051a39Sopenharmony_ci	pmull   $rk3q1, $res0.1d, $h2.1d                         @ GHASH final-1 block - low
5902e1051a39Sopenharmony_ci	mov     $output_l0, $ctr0.d[0]                           @ AES final block - mov low
5903e1051a39Sopenharmony_ci
5904e1051a39Sopenharmony_ci	ins     $rk4v.d[1], $rk4v.d[0]                           @ GHASH final-1 block - mid
5905e1051a39Sopenharmony_ci
5906e1051a39Sopenharmony_ci	mov     $output_h0, $ctr0.d[1]                           @ AES final block - mov high
5907e1051a39Sopenharmony_ci
5908e1051a39Sopenharmony_ci	pmull2  $rk4v.1q, $rk4v.2d, $h12k.2d                     @ GHASH final-1 block - mid
5909e1051a39Sopenharmony_ci	eor     $output_l0, $output_l0, $rk14_l                  @ AES final block - round 14 low
5910e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
5911e1051a39Sopenharmony_ci	rev     $output_l0, $output_l0
5912e1051a39Sopenharmony_ci#endif
5913e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $rk3                           @ GHASH final-1 block - low
5914e1051a39Sopenharmony_ci
5915e1051a39Sopenharmony_ci	eor     $acc_hb, $acc_hb, $rk2                           @ GHASH final-1 block - high
5916e1051a39Sopenharmony_ci
5917e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $rk4v.16b                      @ GHASH final-1 block - mid
5918e1051a39Sopenharmony_ci	eor     $output_h0, $output_h0, $rk14_h                  @ AES final block - round 14 high
5919e1051a39Sopenharmony_ci#ifdef __AARCH64EB__
5920e1051a39Sopenharmony_ci	rev     $output_h0, $output_h0
5921e1051a39Sopenharmony_ci#endif
5922e1051a39Sopenharmony_ci	.L256_dec_blocks_less_than_1:                            @ blocks left <= 1
5923e1051a39Sopenharmony_ci
5924e1051a39Sopenharmony_ci	and     $bit_length, $bit_length, #127                   @ bit_length %= 128
5925e1051a39Sopenharmony_ci	mvn     $rk14_h, xzr                                     @ rk14_h = 0xffffffffffffffff
5926e1051a39Sopenharmony_ci
5927e1051a39Sopenharmony_ci	sub     $bit_length, $bit_length, #128                   @ bit_length -= 128
5928e1051a39Sopenharmony_ci	mvn     $rk14_l, xzr                                     @ rk14_l = 0xffffffffffffffff
5929e1051a39Sopenharmony_ci
5930e1051a39Sopenharmony_ci	ldp     $end_input_ptr, $main_end_input_ptr, [$output_ptr] @ load existing bytes we need to not overwrite
5931e1051a39Sopenharmony_ci	neg     $bit_length, $bit_length                         @ bit_length = 128 - #bits in input (in range [1,128])
5932e1051a39Sopenharmony_ci
5933e1051a39Sopenharmony_ci	and     $bit_length, $bit_length, #127                   @ bit_length %= 128
5934e1051a39Sopenharmony_ci
5935e1051a39Sopenharmony_ci	lsr     $rk14_h, $rk14_h, $bit_length                    @ rk14_h is mask for top 64b of last block
5936e1051a39Sopenharmony_ci	cmp     $bit_length, #64
5937e1051a39Sopenharmony_ci
5938e1051a39Sopenharmony_ci	csel    $ctr32x, $rk14_l, $rk14_h, lt
5939e1051a39Sopenharmony_ci	csel    $ctr96_b64x, $rk14_h, xzr, lt
5940e1051a39Sopenharmony_ci
5941e1051a39Sopenharmony_ci	fmov    $ctr0d, $ctr32x                                  @ ctr0b is mask for last block
5942e1051a39Sopenharmony_ci	and     $output_l0, $output_l0, $ctr32x
5943e1051a39Sopenharmony_ci
5944e1051a39Sopenharmony_ci	mov     $ctr0.d[1], $ctr96_b64x
5945e1051a39Sopenharmony_ci	bic     $end_input_ptr, $end_input_ptr, $ctr32x          @ mask out low existing bytes
5946e1051a39Sopenharmony_ci
5947e1051a39Sopenharmony_ci#ifndef __AARCH64EB__
5948e1051a39Sopenharmony_ci	rev     $ctr32w, $rctr32w
5949e1051a39Sopenharmony_ci#else
5950e1051a39Sopenharmony_ci	mov     $ctr32w, $rctr32w
5951e1051a39Sopenharmony_ci#endif
5952e1051a39Sopenharmony_ci
5953e1051a39Sopenharmony_ci	bic     $main_end_input_ptr, $main_end_input_ptr, $ctr96_b64x      @ mask out high existing bytes
5954e1051a39Sopenharmony_ci
5955e1051a39Sopenharmony_ci	orr     $output_l0, $output_l0, $end_input_ptr
5956e1051a39Sopenharmony_ci
5957e1051a39Sopenharmony_ci	and     $output_h0, $output_h0, $ctr96_b64x
5958e1051a39Sopenharmony_ci
5959e1051a39Sopenharmony_ci	orr     $output_h0, $output_h0, $main_end_input_ptr
5960e1051a39Sopenharmony_ci
5961e1051a39Sopenharmony_ci	and     $res1b, $res1b, $ctr0b                            @ possibly partial last block has zeroes in highest bits
5962e1051a39Sopenharmony_ci
5963e1051a39Sopenharmony_ci	rev64   $res0b, $res1b                                    @ GHASH final block
5964e1051a39Sopenharmony_ci
5965e1051a39Sopenharmony_ci	eor     $res0b, $res0b, $t0.16b                           @ feed in partial tag
5966e1051a39Sopenharmony_ci
5967e1051a39Sopenharmony_ci	pmull   $rk3q1, $res0.1d, $h1.1d                          @ GHASH final block - low
5968e1051a39Sopenharmony_ci
5969e1051a39Sopenharmony_ci	mov     $t0d, $res0.d[1]                                  @ GHASH final block - mid
5970e1051a39Sopenharmony_ci
5971e1051a39Sopenharmony_ci	eor     $t0.8b, $t0.8b, $res0.8b                          @ GHASH final block - mid
5972e1051a39Sopenharmony_ci
5973e1051a39Sopenharmony_ci	pmull2  $rk2q1, $res0.2d, $h1.2d                          @ GHASH final block - high
5974e1051a39Sopenharmony_ci
5975e1051a39Sopenharmony_ci	pmull   $t0.1q, $t0.1d, $h12k.1d                          @ GHASH final block - mid
5976e1051a39Sopenharmony_ci
5977e1051a39Sopenharmony_ci	eor     $acc_hb, $acc_hb, $rk2                            @ GHASH final block - high
5978e1051a39Sopenharmony_ci
5979e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $rk3                            @ GHASH final block - low
5980e1051a39Sopenharmony_ci
5981e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t0.16b                         @ GHASH final block - mid
5982e1051a39Sopenharmony_ci	movi    $mod_constant.8b, #0xc2
5983e1051a39Sopenharmony_ci
5984e1051a39Sopenharmony_ci	eor     $t9.16b, $acc_lb, $acc_hb                         @ MODULO - karatsuba tidy up
5985e1051a39Sopenharmony_ci
5986e1051a39Sopenharmony_ci	shl     $mod_constantd, $mod_constantd, #56               @ mod_constant
5987e1051a39Sopenharmony_ci
5988e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $t9.16b                         @ MODULO - karatsuba tidy up
5989e1051a39Sopenharmony_ci
5990e1051a39Sopenharmony_ci	pmull   $mod_t.1q, $acc_h.1d, $mod_constant.1d            @ MODULO - top 64b align with mid
5991e1051a39Sopenharmony_ci
5992e1051a39Sopenharmony_ci	ext     $acc_hb, $acc_hb, $acc_hb, #8                     @ MODULO - other top alignment
5993e1051a39Sopenharmony_ci
5994e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $mod_t.16b                      @ MODULO - fold into mid
5995e1051a39Sopenharmony_ci
5996e1051a39Sopenharmony_ci	eor     $acc_mb, $acc_mb, $acc_hb                         @ MODULO - fold into mid
5997e1051a39Sopenharmony_ci
5998e1051a39Sopenharmony_ci	pmull   $mod_constant.1q, $acc_m.1d, $mod_constant.1d     @ MODULO - mid 64b align with low
5999e1051a39Sopenharmony_ci
6000e1051a39Sopenharmony_ci	ext     $acc_mb, $acc_mb, $acc_mb, #8                     @ MODULO - other mid alignment
6001e1051a39Sopenharmony_ci
6002e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $mod_constant.16b               @ MODULO - fold into low
6003e1051a39Sopenharmony_ci
6004e1051a39Sopenharmony_ci	stp     $output_l0, $output_h0, [$output_ptr]
6005e1051a39Sopenharmony_ci
6006e1051a39Sopenharmony_ci	str     $ctr32w, [$counter, #12]                          @ store the updated counter
6007e1051a39Sopenharmony_ci
6008e1051a39Sopenharmony_ci	eor     $acc_lb, $acc_lb, $acc_mb                         @ MODULO - fold into low
6009e1051a39Sopenharmony_ci	ext     $acc_lb, $acc_lb, $acc_lb, #8
6010e1051a39Sopenharmony_ci	rev64   $acc_lb, $acc_lb
6011e1051a39Sopenharmony_ci	mov     x0, $len
6012e1051a39Sopenharmony_ci	st1     { $acc_l.16b }, [$current_tag]
6013e1051a39Sopenharmony_ci
6014e1051a39Sopenharmony_ci	ldp     x21, x22, [sp, #16]
6015e1051a39Sopenharmony_ci	ldp     x23, x24, [sp, #32]
6016e1051a39Sopenharmony_ci	ldp     d8, d9, [sp, #48]
6017e1051a39Sopenharmony_ci	ldp     d10, d11, [sp, #64]
6018e1051a39Sopenharmony_ci	ldp     d12, d13, [sp, #80]
6019e1051a39Sopenharmony_ci	ldp     d14, d15, [sp, #96]
6020e1051a39Sopenharmony_ci	ldp     x19, x20, [sp], #112
6021e1051a39Sopenharmony_ci	ret
6022e1051a39Sopenharmony_ci
6023e1051a39Sopenharmony_ci.L256_dec_ret:
6024e1051a39Sopenharmony_ci	mov w0, #0x0
6025e1051a39Sopenharmony_ci	ret
6026e1051a39Sopenharmony_ci.size aes_gcm_dec_256_kernel,.-aes_gcm_dec_256_kernel
6027e1051a39Sopenharmony_ci___
6028e1051a39Sopenharmony_ci}
6029e1051a39Sopenharmony_ci}
6030e1051a39Sopenharmony_ci
6031e1051a39Sopenharmony_ci$code.=<<___;
6032e1051a39Sopenharmony_ci.rodata
6033e1051a39Sopenharmony_ci.asciz  "GHASH for ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
6034e1051a39Sopenharmony_ci.align  2
6035e1051a39Sopenharmony_ci#endif
6036e1051a39Sopenharmony_ci___
6037e1051a39Sopenharmony_ci
6038e1051a39Sopenharmony_ciif ($flavour =~ /64/) {         ######## 64-bit code
6039e1051a39Sopenharmony_ci    sub unvmov {
6040e1051a39Sopenharmony_ci        my $arg=shift;
6041e1051a39Sopenharmony_ci
6042e1051a39Sopenharmony_ci        $arg =~ m/q([0-9]+)#(lo|hi),\s*q([0-9]+)#(lo|hi)/o &&
6043e1051a39Sopenharmony_ci        sprintf "ins    v%d.d[%d],v%d.d[%d]",$1<8?$1:$1+8,($2 eq "lo")?0:1,
6044e1051a39Sopenharmony_ci                             $3<8?$3:$3+8,($4 eq "lo")?0:1;
6045e1051a39Sopenharmony_ci    }
6046e1051a39Sopenharmony_ci    foreach(split("\n",$code)) {
6047e1051a39Sopenharmony_ci        s/@\s/\/\//o;               # old->new style commentary
6048e1051a39Sopenharmony_ci        print $_,"\n";
6049e1051a39Sopenharmony_ci    }
6050e1051a39Sopenharmony_ci} else {                ######## 32-bit code
6051e1051a39Sopenharmony_ci    sub unvdup32 {
6052e1051a39Sopenharmony_ci        my $arg=shift;
6053e1051a39Sopenharmony_ci
6054e1051a39Sopenharmony_ci        $arg =~ m/q([0-9]+),\s*q([0-9]+)\[([0-3])\]/o &&
6055e1051a39Sopenharmony_ci        sprintf "vdup.32    q%d,d%d[%d]",$1,2*$2+($3>>1),$3&1;
6056e1051a39Sopenharmony_ci    }
6057e1051a39Sopenharmony_ci    sub unvpmullp64 {
6058e1051a39Sopenharmony_ci        my ($mnemonic,$arg)=@_;
6059e1051a39Sopenharmony_ci
6060e1051a39Sopenharmony_ci        if ($arg =~ m/q([0-9]+),\s*q([0-9]+),\s*q([0-9]+)/o) {
6061e1051a39Sopenharmony_ci            my $word = 0xf2a00e00|(($1&7)<<13)|(($1&8)<<19)
6062e1051a39Sopenharmony_ci                       |(($2&7)<<17)|(($2&8)<<4)
6063e1051a39Sopenharmony_ci                       |(($3&7)<<1) |(($3&8)<<2);
6064e1051a39Sopenharmony_ci            $word |= 0x00010001  if ($mnemonic =~ "2");
6065e1051a39Sopenharmony_ci            # since ARMv7 instructions are always encoded little-endian.
6066e1051a39Sopenharmony_ci            # correct solution is to use .inst directive, but older%%%%
6067e1051a39Sopenharmony_ci            # assemblers don't implement it:-(
6068e1051a39Sopenharmony_ci            sprintf "INST(0x%02x,0x%02x,0x%02x,0x%02x)\t@ %s %s",
6069e1051a39Sopenharmony_ci                    $word&0xff,($word>>8)&0xff,
6070e1051a39Sopenharmony_ci                    ($word>>16)&0xff,($word>>24)&0xff,
6071e1051a39Sopenharmony_ci                    $mnemonic,$arg;
6072e1051a39Sopenharmony_ci        }
6073e1051a39Sopenharmony_ci    }
6074e1051a39Sopenharmony_ci
6075e1051a39Sopenharmony_ci    foreach(split("\n",$code)) {
6076e1051a39Sopenharmony_ci        s/\b[wx]([0-9]+)\b/r$1/go;      # new->old registers
6077e1051a39Sopenharmony_ci        s/\bv([0-9])\.[12468]+[bsd]\b/q$1/go;   # new->old registers
6078e1051a39Sopenharmony_ci        s/\/\/\s?/@ /o;             # new->old style commentary
6079e1051a39Sopenharmony_ci
6080e1051a39Sopenharmony_ci        # fix up remaining new-style suffixes
6081e1051a39Sopenharmony_ci        s/\],#[0-9]+/]!/o;
6082e1051a39Sopenharmony_ci
6083e1051a39Sopenharmony_ci        s/cclr\s+([^,]+),\s*([a-z]+)/mov.$2 $1,#0/o         or
6084e1051a39Sopenharmony_ci        s/vdup\.32\s+(.*)/unvdup32($1)/geo              or
6085e1051a39Sopenharmony_ci        s/v?(pmull2?)\.p64\s+(.*)/unvpmullp64($1,$2)/geo        or
6086e1051a39Sopenharmony_ci        s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo   or
6087e1051a39Sopenharmony_ci        s/^(\s+)b\./$1b/o                       or
6088e1051a39Sopenharmony_ci        s/^(\s+)ret/$1bx\tlr/o;
6089e1051a39Sopenharmony_ci
6090e1051a39Sopenharmony_ci        if (s/^(\s+)mov\.([a-z]+)/$1mov$2/) {
6091e1051a39Sopenharmony_ci            print "     it      $2\n";
6092e1051a39Sopenharmony_ci        }
6093e1051a39Sopenharmony_ci        s/__AARCH64E([BL])__/__ARME$1__/go;
6094e1051a39Sopenharmony_ci        print $_,"\n";
6095e1051a39Sopenharmony_ci    }
6096e1051a39Sopenharmony_ci}
6097e1051a39Sopenharmony_ci
6098e1051a39Sopenharmony_ciclose STDOUT or die "error closing STDOUT: $!"; # enforce flush
6099