1e1051a39Sopenharmony_ci#! /usr/bin/env perl
2e1051a39Sopenharmony_ci# Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved.
3e1051a39Sopenharmony_ci#
4e1051a39Sopenharmony_ci# Licensed under the Apache License 2.0 (the "License").  You may not use
5e1051a39Sopenharmony_ci# this file except in compliance with the License.  You can obtain a copy
6e1051a39Sopenharmony_ci# in the file LICENSE in the source distribution or at
7e1051a39Sopenharmony_ci# https://www.openssl.org/source/license.html
8e1051a39Sopenharmony_ci
9e1051a39Sopenharmony_ci#
10e1051a39Sopenharmony_ci# ====================================================================
11e1051a39Sopenharmony_ci# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12e1051a39Sopenharmony_ci# project. The module is, however, dual licensed under OpenSSL and
13e1051a39Sopenharmony_ci# CRYPTOGAMS licenses depending on where you obtain it. For further
14e1051a39Sopenharmony_ci# details see http://www.openssl.org/~appro/cryptogams/.
15e1051a39Sopenharmony_ci# ====================================================================
16e1051a39Sopenharmony_ci#
17e1051a39Sopenharmony_ci# SHA1 for ARMv8.
18e1051a39Sopenharmony_ci#
19e1051a39Sopenharmony_ci# Performance in cycles per processed byte and improvement coefficient
20e1051a39Sopenharmony_ci# over code generated with "default" compiler:
21e1051a39Sopenharmony_ci#
22e1051a39Sopenharmony_ci#		hardware-assisted	software(*)
23e1051a39Sopenharmony_ci# Apple A7	2.31			4.13 (+14%)
24e1051a39Sopenharmony_ci# Cortex-A53	2.24			8.03 (+97%)
25e1051a39Sopenharmony_ci# Cortex-A57	2.35			7.88 (+74%)
26e1051a39Sopenharmony_ci# Denver	2.13			3.97 (+0%)(**)
27e1051a39Sopenharmony_ci# X-Gene				8.80 (+200%)
28e1051a39Sopenharmony_ci# Mongoose	2.05			6.50 (+160%)
29e1051a39Sopenharmony_ci# Kryo		1.88			8.00 (+90%)
30e1051a39Sopenharmony_ci# ThunderX2	2.64			6.36 (+150%)
31e1051a39Sopenharmony_ci#
32e1051a39Sopenharmony_ci# (*)	Software results are presented mostly for reference purposes.
33e1051a39Sopenharmony_ci# (**)	Keep in mind that Denver relies on binary translation, which
34e1051a39Sopenharmony_ci#	optimizes compiler output at run-time.
35e1051a39Sopenharmony_ci
36e1051a39Sopenharmony_ci# $output is the last argument if it looks like a file (it has an extension)
37e1051a39Sopenharmony_ci# $flavour is the first argument if it doesn't look like a file
38e1051a39Sopenharmony_ci$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
39e1051a39Sopenharmony_ci$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
40e1051a39Sopenharmony_ci
41e1051a39Sopenharmony_ci$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
42e1051a39Sopenharmony_ci( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
43e1051a39Sopenharmony_ci( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
44e1051a39Sopenharmony_cidie "can't locate arm-xlate.pl";
45e1051a39Sopenharmony_ci
46e1051a39Sopenharmony_ciopen OUT,"| \"$^X\" $xlate $flavour \"$output\""
47e1051a39Sopenharmony_ci    or die "can't call $xlate: $1";
48e1051a39Sopenharmony_ci*STDOUT=*OUT;
49e1051a39Sopenharmony_ci
50e1051a39Sopenharmony_ci($ctx,$inp,$num)=("x0","x1","x2");
51e1051a39Sopenharmony_ci@Xw=map("w$_",(3..17,19));
52e1051a39Sopenharmony_ci@Xx=map("x$_",(3..17,19));
53e1051a39Sopenharmony_ci@V=($A,$B,$C,$D,$E)=map("w$_",(20..24));
54e1051a39Sopenharmony_ci($t0,$t1,$t2,$K)=map("w$_",(25..28));
55e1051a39Sopenharmony_ci
56e1051a39Sopenharmony_ci
57e1051a39Sopenharmony_cisub BODY_00_19 {
58e1051a39Sopenharmony_cimy ($i,$a,$b,$c,$d,$e)=@_;
59e1051a39Sopenharmony_cimy $j=($i+2)&15;
60e1051a39Sopenharmony_ci
61e1051a39Sopenharmony_ci$code.=<<___ if ($i<15 && !($i&1));
62e1051a39Sopenharmony_ci	lsr	@Xx[$i+1],@Xx[$i],#32
63e1051a39Sopenharmony_ci___
64e1051a39Sopenharmony_ci$code.=<<___ if ($i<14 && !($i&1));
65e1051a39Sopenharmony_ci	ldur	@Xx[$i+2],[$inp,#`($i+2)*4-64`]
66e1051a39Sopenharmony_ci___
67e1051a39Sopenharmony_ci$code.=<<___ if ($i<14 && ($i&1));
68e1051a39Sopenharmony_ci#ifdef	__AARCH64EB__
69e1051a39Sopenharmony_ci	ror	@Xx[$i+1],@Xx[$i+1],#32
70e1051a39Sopenharmony_ci#else
71e1051a39Sopenharmony_ci	rev32	@Xx[$i+1],@Xx[$i+1]
72e1051a39Sopenharmony_ci#endif
73e1051a39Sopenharmony_ci___
74e1051a39Sopenharmony_ci$code.=<<___ if ($i<14);
75e1051a39Sopenharmony_ci	bic	$t0,$d,$b
76e1051a39Sopenharmony_ci	and	$t1,$c,$b
77e1051a39Sopenharmony_ci	ror	$t2,$a,#27
78e1051a39Sopenharmony_ci	add	$d,$d,$K		// future e+=K
79e1051a39Sopenharmony_ci	orr	$t0,$t0,$t1
80e1051a39Sopenharmony_ci	add	$e,$e,$t2		// e+=rot(a,5)
81e1051a39Sopenharmony_ci	ror	$b,$b,#2
82e1051a39Sopenharmony_ci	add	$d,$d,@Xw[($i+1)&15]	// future e+=X[i]
83e1051a39Sopenharmony_ci	add	$e,$e,$t0		// e+=F(b,c,d)
84e1051a39Sopenharmony_ci___
85e1051a39Sopenharmony_ci$code.=<<___ if ($i==19);
86e1051a39Sopenharmony_ci	movz	$K,#0xeba1
87e1051a39Sopenharmony_ci	movk	$K,#0x6ed9,lsl#16
88e1051a39Sopenharmony_ci___
89e1051a39Sopenharmony_ci$code.=<<___ if ($i>=14);
90e1051a39Sopenharmony_ci	 eor	@Xw[$j],@Xw[$j],@Xw[($j+2)&15]
91e1051a39Sopenharmony_ci	bic	$t0,$d,$b
92e1051a39Sopenharmony_ci	and	$t1,$c,$b
93e1051a39Sopenharmony_ci	ror	$t2,$a,#27
94e1051a39Sopenharmony_ci	 eor	@Xw[$j],@Xw[$j],@Xw[($j+8)&15]
95e1051a39Sopenharmony_ci	add	$d,$d,$K		// future e+=K
96e1051a39Sopenharmony_ci	orr	$t0,$t0,$t1
97e1051a39Sopenharmony_ci	add	$e,$e,$t2		// e+=rot(a,5)
98e1051a39Sopenharmony_ci	 eor	@Xw[$j],@Xw[$j],@Xw[($j+13)&15]
99e1051a39Sopenharmony_ci	ror	$b,$b,#2
100e1051a39Sopenharmony_ci	add	$d,$d,@Xw[($i+1)&15]	// future e+=X[i]
101e1051a39Sopenharmony_ci	add	$e,$e,$t0		// e+=F(b,c,d)
102e1051a39Sopenharmony_ci	 ror	@Xw[$j],@Xw[$j],#31
103e1051a39Sopenharmony_ci___
104e1051a39Sopenharmony_ci}
105e1051a39Sopenharmony_ci
106e1051a39Sopenharmony_cisub BODY_40_59 {
107e1051a39Sopenharmony_cimy ($i,$a,$b,$c,$d,$e)=@_;
108e1051a39Sopenharmony_cimy $j=($i+2)&15;
109e1051a39Sopenharmony_ci
110e1051a39Sopenharmony_ci$code.=<<___ if ($i==59);
111e1051a39Sopenharmony_ci	movz	$K,#0xc1d6
112e1051a39Sopenharmony_ci	movk	$K,#0xca62,lsl#16
113e1051a39Sopenharmony_ci___
114e1051a39Sopenharmony_ci$code.=<<___;
115e1051a39Sopenharmony_ci	orr	$t0,$b,$c
116e1051a39Sopenharmony_ci	and	$t1,$b,$c
117e1051a39Sopenharmony_ci	 eor	@Xw[$j],@Xw[$j],@Xw[($j+2)&15]
118e1051a39Sopenharmony_ci	ror	$t2,$a,#27
119e1051a39Sopenharmony_ci	and	$t0,$t0,$d
120e1051a39Sopenharmony_ci	add	$d,$d,$K		// future e+=K
121e1051a39Sopenharmony_ci	 eor	@Xw[$j],@Xw[$j],@Xw[($j+8)&15]
122e1051a39Sopenharmony_ci	add	$e,$e,$t2		// e+=rot(a,5)
123e1051a39Sopenharmony_ci	orr	$t0,$t0,$t1
124e1051a39Sopenharmony_ci	ror	$b,$b,#2
125e1051a39Sopenharmony_ci	 eor	@Xw[$j],@Xw[$j],@Xw[($j+13)&15]
126e1051a39Sopenharmony_ci	add	$d,$d,@Xw[($i+1)&15]	// future e+=X[i]
127e1051a39Sopenharmony_ci	add	$e,$e,$t0		// e+=F(b,c,d)
128e1051a39Sopenharmony_ci	 ror	@Xw[$j],@Xw[$j],#31
129e1051a39Sopenharmony_ci___
130e1051a39Sopenharmony_ci}
131e1051a39Sopenharmony_ci
132e1051a39Sopenharmony_cisub BODY_20_39 {
133e1051a39Sopenharmony_cimy ($i,$a,$b,$c,$d,$e)=@_;
134e1051a39Sopenharmony_cimy $j=($i+2)&15;
135e1051a39Sopenharmony_ci
136e1051a39Sopenharmony_ci$code.=<<___ if ($i==39);
137e1051a39Sopenharmony_ci	movz	$K,#0xbcdc
138e1051a39Sopenharmony_ci	movk	$K,#0x8f1b,lsl#16
139e1051a39Sopenharmony_ci___
140e1051a39Sopenharmony_ci$code.=<<___ if ($i<78);
141e1051a39Sopenharmony_ci	 eor	@Xw[$j],@Xw[$j],@Xw[($j+2)&15]
142e1051a39Sopenharmony_ci	eor	$t0,$d,$b
143e1051a39Sopenharmony_ci	ror	$t2,$a,#27
144e1051a39Sopenharmony_ci	add	$d,$d,$K		// future e+=K
145e1051a39Sopenharmony_ci	 eor	@Xw[$j],@Xw[$j],@Xw[($j+8)&15]
146e1051a39Sopenharmony_ci	eor	$t0,$t0,$c
147e1051a39Sopenharmony_ci	add	$e,$e,$t2		// e+=rot(a,5)
148e1051a39Sopenharmony_ci	ror	$b,$b,#2
149e1051a39Sopenharmony_ci	 eor	@Xw[$j],@Xw[$j],@Xw[($j+13)&15]
150e1051a39Sopenharmony_ci	add	$d,$d,@Xw[($i+1)&15]	// future e+=X[i]
151e1051a39Sopenharmony_ci	add	$e,$e,$t0		// e+=F(b,c,d)
152e1051a39Sopenharmony_ci	 ror	@Xw[$j],@Xw[$j],#31
153e1051a39Sopenharmony_ci___
154e1051a39Sopenharmony_ci$code.=<<___ if ($i==78);
155e1051a39Sopenharmony_ci	ldp	@Xw[1],@Xw[2],[$ctx]
156e1051a39Sopenharmony_ci	eor	$t0,$d,$b
157e1051a39Sopenharmony_ci	ror	$t2,$a,#27
158e1051a39Sopenharmony_ci	add	$d,$d,$K		// future e+=K
159e1051a39Sopenharmony_ci	eor	$t0,$t0,$c
160e1051a39Sopenharmony_ci	add	$e,$e,$t2		// e+=rot(a,5)
161e1051a39Sopenharmony_ci	ror	$b,$b,#2
162e1051a39Sopenharmony_ci	add	$d,$d,@Xw[($i+1)&15]	// future e+=X[i]
163e1051a39Sopenharmony_ci	add	$e,$e,$t0		// e+=F(b,c,d)
164e1051a39Sopenharmony_ci___
165e1051a39Sopenharmony_ci$code.=<<___ if ($i==79);
166e1051a39Sopenharmony_ci	ldp	@Xw[3],@Xw[4],[$ctx,#8]
167e1051a39Sopenharmony_ci	eor	$t0,$d,$b
168e1051a39Sopenharmony_ci	ror	$t2,$a,#27
169e1051a39Sopenharmony_ci	eor	$t0,$t0,$c
170e1051a39Sopenharmony_ci	add	$e,$e,$t2		// e+=rot(a,5)
171e1051a39Sopenharmony_ci	ror	$b,$b,#2
172e1051a39Sopenharmony_ci	ldr	@Xw[5],[$ctx,#16]
173e1051a39Sopenharmony_ci	add	$e,$e,$t0		// e+=F(b,c,d)
174e1051a39Sopenharmony_ci___
175e1051a39Sopenharmony_ci}
176e1051a39Sopenharmony_ci
177e1051a39Sopenharmony_ci$code.=<<___;
178e1051a39Sopenharmony_ci#ifndef	__KERNEL__
179e1051a39Sopenharmony_ci# include "arm_arch.h"
180e1051a39Sopenharmony_ci.extern OPENSSL_armcap_P
181e1051a39Sopenharmony_ci.hidden OPENSSL_armcap_P
182e1051a39Sopenharmony_ci#endif
183e1051a39Sopenharmony_ci
184e1051a39Sopenharmony_ci.text
185e1051a39Sopenharmony_ci
186e1051a39Sopenharmony_ci.globl	sha1_block_data_order
187e1051a39Sopenharmony_ci.type	sha1_block_data_order,%function
188e1051a39Sopenharmony_ci.align	6
189e1051a39Sopenharmony_cisha1_block_data_order:
190e1051a39Sopenharmony_ci	adrp	x16,OPENSSL_armcap_P
191e1051a39Sopenharmony_ci	ldr	w16,[x16,#:lo12:OPENSSL_armcap_P]
192e1051a39Sopenharmony_ci	tst	w16,#ARMV8_SHA1
193e1051a39Sopenharmony_ci	b.ne	.Lv8_entry
194e1051a39Sopenharmony_ci
195e1051a39Sopenharmony_ci	stp	x29,x30,[sp,#-96]!
196e1051a39Sopenharmony_ci	add	x29,sp,#0
197e1051a39Sopenharmony_ci	stp	x19,x20,[sp,#16]
198e1051a39Sopenharmony_ci	stp	x21,x22,[sp,#32]
199e1051a39Sopenharmony_ci	stp	x23,x24,[sp,#48]
200e1051a39Sopenharmony_ci	stp	x25,x26,[sp,#64]
201e1051a39Sopenharmony_ci	stp	x27,x28,[sp,#80]
202e1051a39Sopenharmony_ci
203e1051a39Sopenharmony_ci	ldp	$A,$B,[$ctx]
204e1051a39Sopenharmony_ci	ldp	$C,$D,[$ctx,#8]
205e1051a39Sopenharmony_ci	ldr	$E,[$ctx,#16]
206e1051a39Sopenharmony_ci
207e1051a39Sopenharmony_ci.Loop:
208e1051a39Sopenharmony_ci	ldr	@Xx[0],[$inp],#64
209e1051a39Sopenharmony_ci	movz	$K,#0x7999
210e1051a39Sopenharmony_ci	sub	$num,$num,#1
211e1051a39Sopenharmony_ci	movk	$K,#0x5a82,lsl#16
212e1051a39Sopenharmony_ci#ifdef	__AARCH64EB__
213e1051a39Sopenharmony_ci	ror	$Xx[0],@Xx[0],#32
214e1051a39Sopenharmony_ci#else
215e1051a39Sopenharmony_ci	rev32	@Xx[0],@Xx[0]
216e1051a39Sopenharmony_ci#endif
217e1051a39Sopenharmony_ci	add	$E,$E,$K		// warm it up
218e1051a39Sopenharmony_ci	add	$E,$E,@Xw[0]
219e1051a39Sopenharmony_ci___
220e1051a39Sopenharmony_cifor($i=0;$i<20;$i++)	{ &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
221e1051a39Sopenharmony_cifor(;$i<40;$i++)	{ &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
222e1051a39Sopenharmony_cifor(;$i<60;$i++)	{ &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
223e1051a39Sopenharmony_cifor(;$i<80;$i++)	{ &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
224e1051a39Sopenharmony_ci$code.=<<___;
225e1051a39Sopenharmony_ci	add	$B,$B,@Xw[2]
226e1051a39Sopenharmony_ci	add	$C,$C,@Xw[3]
227e1051a39Sopenharmony_ci	add	$A,$A,@Xw[1]
228e1051a39Sopenharmony_ci	add	$D,$D,@Xw[4]
229e1051a39Sopenharmony_ci	add	$E,$E,@Xw[5]
230e1051a39Sopenharmony_ci	stp	$A,$B,[$ctx]
231e1051a39Sopenharmony_ci	stp	$C,$D,[$ctx,#8]
232e1051a39Sopenharmony_ci	str	$E,[$ctx,#16]
233e1051a39Sopenharmony_ci	cbnz	$num,.Loop
234e1051a39Sopenharmony_ci
235e1051a39Sopenharmony_ci	ldp	x19,x20,[sp,#16]
236e1051a39Sopenharmony_ci	ldp	x21,x22,[sp,#32]
237e1051a39Sopenharmony_ci	ldp	x23,x24,[sp,#48]
238e1051a39Sopenharmony_ci	ldp	x25,x26,[sp,#64]
239e1051a39Sopenharmony_ci	ldp	x27,x28,[sp,#80]
240e1051a39Sopenharmony_ci	ldr	x29,[sp],#96
241e1051a39Sopenharmony_ci	ret
242e1051a39Sopenharmony_ci.size	sha1_block_data_order,.-sha1_block_data_order
243e1051a39Sopenharmony_ci___
244e1051a39Sopenharmony_ci{{{
245e1051a39Sopenharmony_cimy ($ABCD,$E,$E0,$E1)=map("v$_.16b",(0..3));
246e1051a39Sopenharmony_cimy @MSG=map("v$_.16b",(4..7));
247e1051a39Sopenharmony_cimy @Kxx=map("v$_.4s",(16..19));
248e1051a39Sopenharmony_cimy ($W0,$W1)=("v20.4s","v21.4s");
249e1051a39Sopenharmony_cimy $ABCD_SAVE="v22.16b";
250e1051a39Sopenharmony_ci
251e1051a39Sopenharmony_ci$code.=<<___;
252e1051a39Sopenharmony_ci.type	sha1_block_armv8,%function
253e1051a39Sopenharmony_ci.align	6
254e1051a39Sopenharmony_cisha1_block_armv8:
255e1051a39Sopenharmony_ci.Lv8_entry:
256e1051a39Sopenharmony_ci	stp	x29,x30,[sp,#-16]!
257e1051a39Sopenharmony_ci	add	x29,sp,#0
258e1051a39Sopenharmony_ci
259e1051a39Sopenharmony_ci	adrp	x4,.Lconst
260e1051a39Sopenharmony_ci	add	x4,x4,:lo12:.Lconst
261e1051a39Sopenharmony_ci	eor	$E,$E,$E
262e1051a39Sopenharmony_ci	ld1.32	{$ABCD},[$ctx],#16
263e1051a39Sopenharmony_ci	ld1.32	{$E}[0],[$ctx]
264e1051a39Sopenharmony_ci	sub	$ctx,$ctx,#16
265e1051a39Sopenharmony_ci	ld1.32	{@Kxx[0]-@Kxx[3]},[x4]
266e1051a39Sopenharmony_ci
267e1051a39Sopenharmony_ci.Loop_hw:
268e1051a39Sopenharmony_ci	ld1	{@MSG[0]-@MSG[3]},[$inp],#64
269e1051a39Sopenharmony_ci	sub	$num,$num,#1
270e1051a39Sopenharmony_ci	rev32	@MSG[0],@MSG[0]
271e1051a39Sopenharmony_ci	rev32	@MSG[1],@MSG[1]
272e1051a39Sopenharmony_ci
273e1051a39Sopenharmony_ci	add.i32	$W0,@Kxx[0],@MSG[0]
274e1051a39Sopenharmony_ci	rev32	@MSG[2],@MSG[2]
275e1051a39Sopenharmony_ci	orr	$ABCD_SAVE,$ABCD,$ABCD	// offload
276e1051a39Sopenharmony_ci
277e1051a39Sopenharmony_ci	add.i32	$W1,@Kxx[0],@MSG[1]
278e1051a39Sopenharmony_ci	rev32	@MSG[3],@MSG[3]
279e1051a39Sopenharmony_ci	sha1h	$E1,$ABCD
280e1051a39Sopenharmony_ci	sha1c	$ABCD,$E,$W0		// 0
281e1051a39Sopenharmony_ci	add.i32	$W0,@Kxx[$j],@MSG[2]
282e1051a39Sopenharmony_ci	sha1su0	@MSG[0],@MSG[1],@MSG[2]
283e1051a39Sopenharmony_ci___
284e1051a39Sopenharmony_cifor ($j=0,$i=1;$i<20-3;$i++) {
285e1051a39Sopenharmony_cimy $f=("c","p","m","p")[$i/5];
286e1051a39Sopenharmony_ci$code.=<<___;
287e1051a39Sopenharmony_ci	sha1h	$E0,$ABCD		// $i
288e1051a39Sopenharmony_ci	sha1$f	$ABCD,$E1,$W1
289e1051a39Sopenharmony_ci	add.i32	$W1,@Kxx[$j],@MSG[3]
290e1051a39Sopenharmony_ci	sha1su1	@MSG[0],@MSG[3]
291e1051a39Sopenharmony_ci___
292e1051a39Sopenharmony_ci$code.=<<___ if ($i<20-4);
293e1051a39Sopenharmony_ci	sha1su0	@MSG[1],@MSG[2],@MSG[3]
294e1051a39Sopenharmony_ci___
295e1051a39Sopenharmony_ci	($E0,$E1)=($E1,$E0);		($W0,$W1)=($W1,$W0);
296e1051a39Sopenharmony_ci	push(@MSG,shift(@MSG));		$j++ if ((($i+3)%5)==0);
297e1051a39Sopenharmony_ci}
298e1051a39Sopenharmony_ci$code.=<<___;
299e1051a39Sopenharmony_ci	sha1h	$E0,$ABCD		// $i
300e1051a39Sopenharmony_ci	sha1p	$ABCD,$E1,$W1
301e1051a39Sopenharmony_ci	add.i32	$W1,@Kxx[$j],@MSG[3]
302e1051a39Sopenharmony_ci
303e1051a39Sopenharmony_ci	sha1h	$E1,$ABCD		// 18
304e1051a39Sopenharmony_ci	sha1p	$ABCD,$E0,$W0
305e1051a39Sopenharmony_ci
306e1051a39Sopenharmony_ci	sha1h	$E0,$ABCD		// 19
307e1051a39Sopenharmony_ci	sha1p	$ABCD,$E1,$W1
308e1051a39Sopenharmony_ci
309e1051a39Sopenharmony_ci	add.i32	$E,$E,$E0
310e1051a39Sopenharmony_ci	add.i32	$ABCD,$ABCD,$ABCD_SAVE
311e1051a39Sopenharmony_ci
312e1051a39Sopenharmony_ci	cbnz	$num,.Loop_hw
313e1051a39Sopenharmony_ci
314e1051a39Sopenharmony_ci	st1.32	{$ABCD},[$ctx],#16
315e1051a39Sopenharmony_ci	st1.32	{$E}[0],[$ctx]
316e1051a39Sopenharmony_ci
317e1051a39Sopenharmony_ci	ldr	x29,[sp],#16
318e1051a39Sopenharmony_ci	ret
319e1051a39Sopenharmony_ci.size	sha1_block_armv8,.-sha1_block_armv8
320e1051a39Sopenharmony_ci
321e1051a39Sopenharmony_ci.rodata
322e1051a39Sopenharmony_ci
323e1051a39Sopenharmony_ci.align	6
324e1051a39Sopenharmony_ci.Lconst:
325e1051a39Sopenharmony_ci.long	0x5a827999,0x5a827999,0x5a827999,0x5a827999	//K_00_19
326e1051a39Sopenharmony_ci.long	0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1	//K_20_39
327e1051a39Sopenharmony_ci.long	0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc	//K_40_59
328e1051a39Sopenharmony_ci.long	0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6	//K_60_79
329e1051a39Sopenharmony_ci.asciz	"SHA1 block transform for ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
330e1051a39Sopenharmony_ci.align	2
331e1051a39Sopenharmony_ci___
332e1051a39Sopenharmony_ci}}}
333e1051a39Sopenharmony_ci
334e1051a39Sopenharmony_ci{   my	%opcode = (
335e1051a39Sopenharmony_ci	"sha1c"		=> 0x5e000000,	"sha1p"		=> 0x5e001000,
336e1051a39Sopenharmony_ci	"sha1m"		=> 0x5e002000,	"sha1su0"	=> 0x5e003000,
337e1051a39Sopenharmony_ci	"sha1h"		=> 0x5e280800,	"sha1su1"	=> 0x5e281800	);
338e1051a39Sopenharmony_ci
339e1051a39Sopenharmony_ci    sub unsha1 {
340e1051a39Sopenharmony_ci	my ($mnemonic,$arg)=@_;
341e1051a39Sopenharmony_ci
342e1051a39Sopenharmony_ci	$arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)[^,]*(?:,\s*[qv]([0-9]+))?/o
343e1051a39Sopenharmony_ci	&&
344e1051a39Sopenharmony_ci	sprintf ".inst\t0x%08x\t//%s %s",
345e1051a39Sopenharmony_ci			$opcode{$mnemonic}|$1|($2<<5)|($3<<16),
346e1051a39Sopenharmony_ci			$mnemonic,$arg;
347e1051a39Sopenharmony_ci    }
348e1051a39Sopenharmony_ci}
349e1051a39Sopenharmony_ci
350e1051a39Sopenharmony_ciforeach(split("\n",$code)) {
351e1051a39Sopenharmony_ci
352e1051a39Sopenharmony_ci	s/\`([^\`]*)\`/eval($1)/geo;
353e1051a39Sopenharmony_ci
354e1051a39Sopenharmony_ci	s/\b(sha1\w+)\s+([qv].*)/unsha1($1,$2)/geo;
355e1051a39Sopenharmony_ci
356e1051a39Sopenharmony_ci	s/\.\w?32\b//o		and s/\.16b/\.4s/go;
357e1051a39Sopenharmony_ci	m/(ld|st)1[^\[]+\[0\]/o	and s/\.4s/\.s/go;
358e1051a39Sopenharmony_ci
359e1051a39Sopenharmony_ci	print $_,"\n";
360e1051a39Sopenharmony_ci}
361e1051a39Sopenharmony_ci
362e1051a39Sopenharmony_ciclose STDOUT or die "error closing STDOUT: $!";
363