1e1051a39Sopenharmony_ci#! /usr/bin/env perl
2e1051a39Sopenharmony_ci# Copyright 2005-2021 The OpenSSL Project Authors. All Rights Reserved.
3e1051a39Sopenharmony_ci#
4e1051a39Sopenharmony_ci# Licensed under the Apache License 2.0 (the "License").  You may not use
5e1051a39Sopenharmony_ci# this file except in compliance with the License.  You can obtain a copy
6e1051a39Sopenharmony_ci# in the file LICENSE in the source distribution or at
7e1051a39Sopenharmony_ci# https://www.openssl.org/source/license.html
8e1051a39Sopenharmony_ci
9e1051a39Sopenharmony_ci
10e1051a39Sopenharmony_ci# $output is the last argument if it looks like a file (it has an extension)
11e1051a39Sopenharmony_ci# $flavour is the first argument if it doesn't look like a file
12e1051a39Sopenharmony_ci$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
13e1051a39Sopenharmony_ci$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
14e1051a39Sopenharmony_ci
15e1051a39Sopenharmony_ci$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
16e1051a39Sopenharmony_ci
17e1051a39Sopenharmony_ci$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
18e1051a39Sopenharmony_ci( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
19e1051a39Sopenharmony_ci( $xlate="${dir}perlasm/x86_64-xlate.pl" and -f $xlate) or
20e1051a39Sopenharmony_cidie "can't locate x86_64-xlate.pl";
21e1051a39Sopenharmony_ci
22e1051a39Sopenharmony_ciopen OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""
23e1051a39Sopenharmony_ci     or die "can't call $xlate: $!";
24e1051a39Sopenharmony_ci*STDOUT=*OUT;
25e1051a39Sopenharmony_ci
26e1051a39Sopenharmony_ci($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") :	# Win64 order
27e1051a39Sopenharmony_ci				 ("%rdi","%rsi","%rdx","%rcx");	# Unix order
28e1051a39Sopenharmony_ci
29e1051a39Sopenharmony_ciprint<<___;
30e1051a39Sopenharmony_ci.extern		OPENSSL_cpuid_setup
31e1051a39Sopenharmony_ci.hidden		OPENSSL_cpuid_setup
32e1051a39Sopenharmony_ci.section	.init
33e1051a39Sopenharmony_ci	call	OPENSSL_cpuid_setup
34e1051a39Sopenharmony_ci
35e1051a39Sopenharmony_ci.hidden	OPENSSL_ia32cap_P
36e1051a39Sopenharmony_ci.comm	OPENSSL_ia32cap_P,16,4
37e1051a39Sopenharmony_ci
38e1051a39Sopenharmony_ci.text
39e1051a39Sopenharmony_ci
40e1051a39Sopenharmony_ci.globl	OPENSSL_atomic_add
41e1051a39Sopenharmony_ci.type	OPENSSL_atomic_add,\@abi-omnipotent
42e1051a39Sopenharmony_ci.align	16
43e1051a39Sopenharmony_ciOPENSSL_atomic_add:
44e1051a39Sopenharmony_ci.cfi_startproc
45e1051a39Sopenharmony_ci	endbranch
46e1051a39Sopenharmony_ci	movl	($arg1),%eax
47e1051a39Sopenharmony_ci.Lspin:	leaq	($arg2,%rax),%r8
48e1051a39Sopenharmony_ci	.byte	0xf0		# lock
49e1051a39Sopenharmony_ci	cmpxchgl	%r8d,($arg1)
50e1051a39Sopenharmony_ci	jne	.Lspin
51e1051a39Sopenharmony_ci	movl	%r8d,%eax
52e1051a39Sopenharmony_ci	.byte	0x48,0x98	# cltq/cdqe
53e1051a39Sopenharmony_ci	ret
54e1051a39Sopenharmony_ci.cfi_endproc
55e1051a39Sopenharmony_ci.size	OPENSSL_atomic_add,.-OPENSSL_atomic_add
56e1051a39Sopenharmony_ci
57e1051a39Sopenharmony_ci.globl	OPENSSL_rdtsc
58e1051a39Sopenharmony_ci.type	OPENSSL_rdtsc,\@abi-omnipotent
59e1051a39Sopenharmony_ci.align	16
60e1051a39Sopenharmony_ciOPENSSL_rdtsc:
61e1051a39Sopenharmony_ci.cfi_startproc
62e1051a39Sopenharmony_ci	endbranch
63e1051a39Sopenharmony_ci	rdtsc
64e1051a39Sopenharmony_ci	shl	\$32,%rdx
65e1051a39Sopenharmony_ci	or	%rdx,%rax
66e1051a39Sopenharmony_ci	ret
67e1051a39Sopenharmony_ci.cfi_endproc
68e1051a39Sopenharmony_ci.size	OPENSSL_rdtsc,.-OPENSSL_rdtsc
69e1051a39Sopenharmony_ci
70e1051a39Sopenharmony_ci.globl	OPENSSL_ia32_cpuid
71e1051a39Sopenharmony_ci.type	OPENSSL_ia32_cpuid,\@function,1
72e1051a39Sopenharmony_ci.align	16
73e1051a39Sopenharmony_ciOPENSSL_ia32_cpuid:
74e1051a39Sopenharmony_ci.cfi_startproc
75e1051a39Sopenharmony_ci	endbranch
76e1051a39Sopenharmony_ci	mov	%rbx,%r8		# save %rbx
77e1051a39Sopenharmony_ci.cfi_register	%rbx,%r8
78e1051a39Sopenharmony_ci
79e1051a39Sopenharmony_ci	xor	%eax,%eax
80e1051a39Sopenharmony_ci	mov	%rax,8(%rdi)		# clear extended feature flags
81e1051a39Sopenharmony_ci	cpuid
82e1051a39Sopenharmony_ci	mov	%eax,%r11d		# max value for standard query level
83e1051a39Sopenharmony_ci
84e1051a39Sopenharmony_ci	xor	%eax,%eax
85e1051a39Sopenharmony_ci	cmp	\$0x756e6547,%ebx	# "Genu"
86e1051a39Sopenharmony_ci	setne	%al
87e1051a39Sopenharmony_ci	mov	%eax,%r9d
88e1051a39Sopenharmony_ci	cmp	\$0x49656e69,%edx	# "ineI"
89e1051a39Sopenharmony_ci	setne	%al
90e1051a39Sopenharmony_ci	or	%eax,%r9d
91e1051a39Sopenharmony_ci	cmp	\$0x6c65746e,%ecx	# "ntel"
92e1051a39Sopenharmony_ci	setne	%al
93e1051a39Sopenharmony_ci	or	%eax,%r9d		# 0 indicates Intel CPU
94e1051a39Sopenharmony_ci	jz	.Lintel
95e1051a39Sopenharmony_ci
96e1051a39Sopenharmony_ci	cmp	\$0x68747541,%ebx	# "Auth"
97e1051a39Sopenharmony_ci	setne	%al
98e1051a39Sopenharmony_ci	mov	%eax,%r10d
99e1051a39Sopenharmony_ci	cmp	\$0x69746E65,%edx	# "enti"
100e1051a39Sopenharmony_ci	setne	%al
101e1051a39Sopenharmony_ci	or	%eax,%r10d
102e1051a39Sopenharmony_ci	cmp	\$0x444D4163,%ecx	# "cAMD"
103e1051a39Sopenharmony_ci	setne	%al
104e1051a39Sopenharmony_ci	or	%eax,%r10d		# 0 indicates AMD CPU
105e1051a39Sopenharmony_ci	jnz	.Lintel
106e1051a39Sopenharmony_ci
107e1051a39Sopenharmony_ci	# AMD specific
108e1051a39Sopenharmony_ci	mov	\$0x80000000,%eax
109e1051a39Sopenharmony_ci	cpuid
110e1051a39Sopenharmony_ci	cmp	\$0x80000001,%eax
111e1051a39Sopenharmony_ci	jb	.Lintel
112e1051a39Sopenharmony_ci	mov	%eax,%r10d
113e1051a39Sopenharmony_ci	mov	\$0x80000001,%eax
114e1051a39Sopenharmony_ci	cpuid
115e1051a39Sopenharmony_ci	or	%ecx,%r9d
116e1051a39Sopenharmony_ci	and	\$0x00000801,%r9d	# isolate AMD XOP bit, 1<<11
117e1051a39Sopenharmony_ci
118e1051a39Sopenharmony_ci	cmp	\$0x80000008,%r10d
119e1051a39Sopenharmony_ci	jb	.Lintel
120e1051a39Sopenharmony_ci
121e1051a39Sopenharmony_ci	mov	\$0x80000008,%eax
122e1051a39Sopenharmony_ci	cpuid
123e1051a39Sopenharmony_ci	movzb	%cl,%r10		# number of cores - 1
124e1051a39Sopenharmony_ci	inc	%r10			# number of cores
125e1051a39Sopenharmony_ci
126e1051a39Sopenharmony_ci	mov	\$1,%eax
127e1051a39Sopenharmony_ci	cpuid
128e1051a39Sopenharmony_ci	bt	\$28,%edx		# test hyper-threading bit
129e1051a39Sopenharmony_ci	jnc	.Lgeneric
130e1051a39Sopenharmony_ci	shr	\$16,%ebx		# number of logical processors
131e1051a39Sopenharmony_ci	cmp	%r10b,%bl
132e1051a39Sopenharmony_ci	ja	.Lgeneric
133e1051a39Sopenharmony_ci	and	\$0xefffffff,%edx	# ~(1<<28)
134e1051a39Sopenharmony_ci	jmp	.Lgeneric
135e1051a39Sopenharmony_ci
136e1051a39Sopenharmony_ci.Lintel:
137e1051a39Sopenharmony_ci	cmp	\$4,%r11d
138e1051a39Sopenharmony_ci	mov	\$-1,%r10d
139e1051a39Sopenharmony_ci	jb	.Lnocacheinfo
140e1051a39Sopenharmony_ci
141e1051a39Sopenharmony_ci	mov	\$4,%eax
142e1051a39Sopenharmony_ci	mov	\$0,%ecx		# query L1D
143e1051a39Sopenharmony_ci	cpuid
144e1051a39Sopenharmony_ci	mov	%eax,%r10d
145e1051a39Sopenharmony_ci	shr	\$14,%r10d
146e1051a39Sopenharmony_ci	and	\$0xfff,%r10d		# number of cores -1 per L1D
147e1051a39Sopenharmony_ci
148e1051a39Sopenharmony_ci.Lnocacheinfo:
149e1051a39Sopenharmony_ci	mov	\$1,%eax
150e1051a39Sopenharmony_ci	cpuid
151e1051a39Sopenharmony_ci	movd	%eax,%xmm0		# put aside processor id
152e1051a39Sopenharmony_ci	and	\$0xbfefffff,%edx	# force reserved bits to 0
153e1051a39Sopenharmony_ci	cmp	\$0,%r9d
154e1051a39Sopenharmony_ci	jne	.Lnotintel
155e1051a39Sopenharmony_ci	or	\$0x40000000,%edx	# set reserved bit#30 on Intel CPUs
156e1051a39Sopenharmony_ci	and	\$15,%ah
157e1051a39Sopenharmony_ci	cmp	\$15,%ah		# examine Family ID
158e1051a39Sopenharmony_ci	jne	.LnotP4
159e1051a39Sopenharmony_ci	or	\$0x00100000,%edx	# set reserved bit#20 to engage RC4_CHAR
160e1051a39Sopenharmony_ci.LnotP4:
161e1051a39Sopenharmony_ci	cmp	\$6,%ah
162e1051a39Sopenharmony_ci	jne	.Lnotintel
163e1051a39Sopenharmony_ci	and	\$0x0fff0ff0,%eax
164e1051a39Sopenharmony_ci	cmp	\$0x00050670,%eax	# Knights Landing
165e1051a39Sopenharmony_ci	je	.Lknights
166e1051a39Sopenharmony_ci	cmp	\$0x00080650,%eax	# Knights Mill (according to sde)
167e1051a39Sopenharmony_ci	jne	.Lnotintel
168e1051a39Sopenharmony_ci.Lknights:
169e1051a39Sopenharmony_ci	and	\$0xfbffffff,%ecx	# clear XSAVE flag to mimic Silvermont
170e1051a39Sopenharmony_ci
171e1051a39Sopenharmony_ci.Lnotintel:
172e1051a39Sopenharmony_ci	bt	\$28,%edx		# test hyper-threading bit
173e1051a39Sopenharmony_ci	jnc	.Lgeneric
174e1051a39Sopenharmony_ci	and	\$0xefffffff,%edx	# ~(1<<28)
175e1051a39Sopenharmony_ci	cmp	\$0,%r10d
176e1051a39Sopenharmony_ci	je	.Lgeneric
177e1051a39Sopenharmony_ci
178e1051a39Sopenharmony_ci	or	\$0x10000000,%edx	# 1<<28
179e1051a39Sopenharmony_ci	shr	\$16,%ebx
180e1051a39Sopenharmony_ci	cmp	\$1,%bl			# see if cache is shared
181e1051a39Sopenharmony_ci	ja	.Lgeneric
182e1051a39Sopenharmony_ci	and	\$0xefffffff,%edx	# ~(1<<28)
183e1051a39Sopenharmony_ci.Lgeneric:
184e1051a39Sopenharmony_ci	and	\$0x00000800,%r9d	# isolate AMD XOP flag
185e1051a39Sopenharmony_ci	and	\$0xfffff7ff,%ecx
186e1051a39Sopenharmony_ci	or	%ecx,%r9d		# merge AMD XOP flag
187e1051a39Sopenharmony_ci
188e1051a39Sopenharmony_ci	mov	%edx,%r10d		# %r9d:%r10d is copy of %ecx:%edx
189e1051a39Sopenharmony_ci
190e1051a39Sopenharmony_ci	cmp	\$7,%r11d
191e1051a39Sopenharmony_ci	jb	.Lno_extended_info
192e1051a39Sopenharmony_ci	mov	\$7,%eax
193e1051a39Sopenharmony_ci	xor	%ecx,%ecx
194e1051a39Sopenharmony_ci	cpuid
195e1051a39Sopenharmony_ci	bt	\$26,%r9d		# check XSAVE bit, cleared on Knights
196e1051a39Sopenharmony_ci	jc	.Lnotknights
197e1051a39Sopenharmony_ci	and	\$0xfff7ffff,%ebx	# clear ADCX/ADOX flag
198e1051a39Sopenharmony_ci.Lnotknights:
199e1051a39Sopenharmony_ci	movd	%xmm0,%eax		# restore processor id
200e1051a39Sopenharmony_ci	and	\$0x0fff0ff0,%eax
201e1051a39Sopenharmony_ci	cmp	\$0x00050650,%eax	# Skylake-X
202e1051a39Sopenharmony_ci	jne	.Lnotskylakex
203e1051a39Sopenharmony_ci	and	\$0xfffeffff,%ebx	# ~(1<<16)
204e1051a39Sopenharmony_ci					# suppress AVX512F flag on Skylake-X
205e1051a39Sopenharmony_ci.Lnotskylakex:
206e1051a39Sopenharmony_ci	mov	%ebx,8(%rdi)		# save extended feature flags
207e1051a39Sopenharmony_ci	mov	%ecx,12(%rdi)
208e1051a39Sopenharmony_ci.Lno_extended_info:
209e1051a39Sopenharmony_ci
210e1051a39Sopenharmony_ci	bt	\$27,%r9d		# check OSXSAVE bit
211e1051a39Sopenharmony_ci	jnc	.Lclear_avx
212e1051a39Sopenharmony_ci	xor	%ecx,%ecx		# XCR0
213e1051a39Sopenharmony_ci	.byte	0x0f,0x01,0xd0		# xgetbv
214e1051a39Sopenharmony_ci	and	\$0xe6,%eax		# isolate XMM, YMM and ZMM state support
215e1051a39Sopenharmony_ci	cmp	\$0xe6,%eax
216e1051a39Sopenharmony_ci	je	.Ldone
217e1051a39Sopenharmony_ci	andl	\$0x3fdeffff,8(%rdi)	# ~(1<<31|1<<30|1<<21|1<<16)
218e1051a39Sopenharmony_ci					# clear AVX512F+BW+VL+IFMA, all of
219e1051a39Sopenharmony_ci					# them are EVEX-encoded, which requires
220e1051a39Sopenharmony_ci					# ZMM state support even if one uses
221e1051a39Sopenharmony_ci					# only XMM and YMM :-(
222e1051a39Sopenharmony_ci	and	\$6,%eax		# isolate XMM and YMM state support
223e1051a39Sopenharmony_ci	cmp	\$6,%eax
224e1051a39Sopenharmony_ci	je	.Ldone
225e1051a39Sopenharmony_ci.Lclear_avx:
226e1051a39Sopenharmony_ci	mov	\$0xefffe7ff,%eax	# ~(1<<28|1<<12|1<<11)
227e1051a39Sopenharmony_ci	and	%eax,%r9d		# clear AVX, FMA and AMD XOP bits
228e1051a39Sopenharmony_ci	mov	\$0x3fdeffdf,%eax	# ~(1<<31|1<<30|1<<21|1<<16|1<<5)
229e1051a39Sopenharmony_ci	and	%eax,8(%rdi)		# clear AVX2 and AVX512* bits
230e1051a39Sopenharmony_ci.Ldone:
231e1051a39Sopenharmony_ci	shl	\$32,%r9
232e1051a39Sopenharmony_ci	mov	%r10d,%eax
233e1051a39Sopenharmony_ci	mov	%r8,%rbx		# restore %rbx
234e1051a39Sopenharmony_ci.cfi_restore	%rbx
235e1051a39Sopenharmony_ci	or	%r9,%rax
236e1051a39Sopenharmony_ci	ret
237e1051a39Sopenharmony_ci.cfi_endproc
238e1051a39Sopenharmony_ci.size	OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
239e1051a39Sopenharmony_ci
240e1051a39Sopenharmony_ci.globl  OPENSSL_cleanse
241e1051a39Sopenharmony_ci.type   OPENSSL_cleanse,\@abi-omnipotent
242e1051a39Sopenharmony_ci.align  16
243e1051a39Sopenharmony_ciOPENSSL_cleanse:
244e1051a39Sopenharmony_ci.cfi_startproc
245e1051a39Sopenharmony_ci	endbranch
246e1051a39Sopenharmony_ci	xor	%rax,%rax
247e1051a39Sopenharmony_ci	cmp	\$15,$arg2
248e1051a39Sopenharmony_ci	jae	.Lot
249e1051a39Sopenharmony_ci	cmp	\$0,$arg2
250e1051a39Sopenharmony_ci	je	.Lret
251e1051a39Sopenharmony_ci.Little:
252e1051a39Sopenharmony_ci	mov	%al,($arg1)
253e1051a39Sopenharmony_ci	sub	\$1,$arg2
254e1051a39Sopenharmony_ci	lea	1($arg1),$arg1
255e1051a39Sopenharmony_ci	jnz	.Little
256e1051a39Sopenharmony_ci.Lret:
257e1051a39Sopenharmony_ci	ret
258e1051a39Sopenharmony_ci.align	16
259e1051a39Sopenharmony_ci.Lot:
260e1051a39Sopenharmony_ci	test	\$7,$arg1
261e1051a39Sopenharmony_ci	jz	.Laligned
262e1051a39Sopenharmony_ci	mov	%al,($arg1)
263e1051a39Sopenharmony_ci	lea	-1($arg2),$arg2
264e1051a39Sopenharmony_ci	lea	1($arg1),$arg1
265e1051a39Sopenharmony_ci	jmp	.Lot
266e1051a39Sopenharmony_ci.Laligned:
267e1051a39Sopenharmony_ci	mov	%rax,($arg1)
268e1051a39Sopenharmony_ci	lea	-8($arg2),$arg2
269e1051a39Sopenharmony_ci	test	\$-8,$arg2
270e1051a39Sopenharmony_ci	lea	8($arg1),$arg1
271e1051a39Sopenharmony_ci	jnz	.Laligned
272e1051a39Sopenharmony_ci	cmp	\$0,$arg2
273e1051a39Sopenharmony_ci	jne	.Little
274e1051a39Sopenharmony_ci	ret
275e1051a39Sopenharmony_ci.cfi_endproc
276e1051a39Sopenharmony_ci.size	OPENSSL_cleanse,.-OPENSSL_cleanse
277e1051a39Sopenharmony_ci
278e1051a39Sopenharmony_ci.globl  CRYPTO_memcmp
279e1051a39Sopenharmony_ci.type   CRYPTO_memcmp,\@abi-omnipotent
280e1051a39Sopenharmony_ci.align  16
281e1051a39Sopenharmony_ciCRYPTO_memcmp:
282e1051a39Sopenharmony_ci.cfi_startproc
283e1051a39Sopenharmony_ci	endbranch
284e1051a39Sopenharmony_ci	xor	%rax,%rax
285e1051a39Sopenharmony_ci	xor	%r10,%r10
286e1051a39Sopenharmony_ci	cmp	\$0,$arg3
287e1051a39Sopenharmony_ci	je	.Lno_data
288e1051a39Sopenharmony_ci	cmp	\$16,$arg3
289e1051a39Sopenharmony_ci	jne	.Loop_cmp
290e1051a39Sopenharmony_ci	mov	($arg1),%r10
291e1051a39Sopenharmony_ci	mov	8($arg1),%r11
292e1051a39Sopenharmony_ci	mov	\$1,$arg3
293e1051a39Sopenharmony_ci	xor	($arg2),%r10
294e1051a39Sopenharmony_ci	xor	8($arg2),%r11
295e1051a39Sopenharmony_ci	or	%r11,%r10
296e1051a39Sopenharmony_ci	cmovnz	$arg3,%rax
297e1051a39Sopenharmony_ci	ret
298e1051a39Sopenharmony_ci
299e1051a39Sopenharmony_ci.align	16
300e1051a39Sopenharmony_ci.Loop_cmp:
301e1051a39Sopenharmony_ci	mov	($arg1),%r10b
302e1051a39Sopenharmony_ci	lea	1($arg1),$arg1
303e1051a39Sopenharmony_ci	xor	($arg2),%r10b
304e1051a39Sopenharmony_ci	lea	1($arg2),$arg2
305e1051a39Sopenharmony_ci	or	%r10b,%al
306e1051a39Sopenharmony_ci	dec	$arg3
307e1051a39Sopenharmony_ci	jnz	.Loop_cmp
308e1051a39Sopenharmony_ci	neg	%rax
309e1051a39Sopenharmony_ci	shr	\$63,%rax
310e1051a39Sopenharmony_ci.Lno_data:
311e1051a39Sopenharmony_ci	ret
312e1051a39Sopenharmony_ci.cfi_endproc
313e1051a39Sopenharmony_ci.size	CRYPTO_memcmp,.-CRYPTO_memcmp
314e1051a39Sopenharmony_ci___
315e1051a39Sopenharmony_ci
316e1051a39Sopenharmony_ciprint<<___ if (!$win64);
317e1051a39Sopenharmony_ci.globl	OPENSSL_wipe_cpu
318e1051a39Sopenharmony_ci.type	OPENSSL_wipe_cpu,\@abi-omnipotent
319e1051a39Sopenharmony_ci.align	16
320e1051a39Sopenharmony_ciOPENSSL_wipe_cpu:
321e1051a39Sopenharmony_ci.cfi_startproc
322e1051a39Sopenharmony_ci	endbranch
323e1051a39Sopenharmony_ci	pxor	%xmm0,%xmm0
324e1051a39Sopenharmony_ci	pxor	%xmm1,%xmm1
325e1051a39Sopenharmony_ci	pxor	%xmm2,%xmm2
326e1051a39Sopenharmony_ci	pxor	%xmm3,%xmm3
327e1051a39Sopenharmony_ci	pxor	%xmm4,%xmm4
328e1051a39Sopenharmony_ci	pxor	%xmm5,%xmm5
329e1051a39Sopenharmony_ci	pxor	%xmm6,%xmm6
330e1051a39Sopenharmony_ci	pxor	%xmm7,%xmm7
331e1051a39Sopenharmony_ci	pxor	%xmm8,%xmm8
332e1051a39Sopenharmony_ci	pxor	%xmm9,%xmm9
333e1051a39Sopenharmony_ci	pxor	%xmm10,%xmm10
334e1051a39Sopenharmony_ci	pxor	%xmm11,%xmm11
335e1051a39Sopenharmony_ci	pxor	%xmm12,%xmm12
336e1051a39Sopenharmony_ci	pxor	%xmm13,%xmm13
337e1051a39Sopenharmony_ci	pxor	%xmm14,%xmm14
338e1051a39Sopenharmony_ci	pxor	%xmm15,%xmm15
339e1051a39Sopenharmony_ci	xorq	%rcx,%rcx
340e1051a39Sopenharmony_ci	xorq	%rdx,%rdx
341e1051a39Sopenharmony_ci	xorq	%rsi,%rsi
342e1051a39Sopenharmony_ci	xorq	%rdi,%rdi
343e1051a39Sopenharmony_ci	xorq	%r8,%r8
344e1051a39Sopenharmony_ci	xorq	%r9,%r9
345e1051a39Sopenharmony_ci	xorq	%r10,%r10
346e1051a39Sopenharmony_ci	xorq	%r11,%r11
347e1051a39Sopenharmony_ci	leaq	8(%rsp),%rax
348e1051a39Sopenharmony_ci	ret
349e1051a39Sopenharmony_ci.cfi_endproc
350e1051a39Sopenharmony_ci.size	OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
351e1051a39Sopenharmony_ci___
352e1051a39Sopenharmony_ciprint<<___ if ($win64);
353e1051a39Sopenharmony_ci.globl	OPENSSL_wipe_cpu
354e1051a39Sopenharmony_ci.type	OPENSSL_wipe_cpu,\@abi-omnipotent
355e1051a39Sopenharmony_ci.align	16
356e1051a39Sopenharmony_ciOPENSSL_wipe_cpu:
357e1051a39Sopenharmony_ci	pxor	%xmm0,%xmm0
358e1051a39Sopenharmony_ci	pxor	%xmm1,%xmm1
359e1051a39Sopenharmony_ci	pxor	%xmm2,%xmm2
360e1051a39Sopenharmony_ci	pxor	%xmm3,%xmm3
361e1051a39Sopenharmony_ci	pxor	%xmm4,%xmm4
362e1051a39Sopenharmony_ci	pxor	%xmm5,%xmm5
363e1051a39Sopenharmony_ci	xorq	%rcx,%rcx
364e1051a39Sopenharmony_ci	xorq	%rdx,%rdx
365e1051a39Sopenharmony_ci	xorq	%r8,%r8
366e1051a39Sopenharmony_ci	xorq	%r9,%r9
367e1051a39Sopenharmony_ci	xorq	%r10,%r10
368e1051a39Sopenharmony_ci	xorq	%r11,%r11
369e1051a39Sopenharmony_ci	leaq	8(%rsp),%rax
370e1051a39Sopenharmony_ci	ret
371e1051a39Sopenharmony_ci.size	OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
372e1051a39Sopenharmony_ci___
373e1051a39Sopenharmony_ci{
374e1051a39Sopenharmony_cimy $out="%r10";
375e1051a39Sopenharmony_cimy $cnt="%rcx";
376e1051a39Sopenharmony_cimy $max="%r11";
377e1051a39Sopenharmony_cimy $lasttick="%r8d";
378e1051a39Sopenharmony_cimy $lastdiff="%r9d";
379e1051a39Sopenharmony_cimy $redzone=win64?8:-8;
380e1051a39Sopenharmony_ci
381e1051a39Sopenharmony_ciprint<<___;
382e1051a39Sopenharmony_ci.globl	OPENSSL_instrument_bus
383e1051a39Sopenharmony_ci.type	OPENSSL_instrument_bus,\@abi-omnipotent
384e1051a39Sopenharmony_ci.align	16
385e1051a39Sopenharmony_ciOPENSSL_instrument_bus:
386e1051a39Sopenharmony_ci.cfi_startproc
387e1051a39Sopenharmony_ci	endbranch
388e1051a39Sopenharmony_ci	mov	$arg1,$out	# tribute to Win64
389e1051a39Sopenharmony_ci	mov	$arg2,$cnt
390e1051a39Sopenharmony_ci	mov	$arg2,$max
391e1051a39Sopenharmony_ci
392e1051a39Sopenharmony_ci	rdtsc			# collect 1st tick
393e1051a39Sopenharmony_ci	mov	%eax,$lasttick	# lasttick = tick
394e1051a39Sopenharmony_ci	mov	\$0,$lastdiff	# lastdiff = 0
395e1051a39Sopenharmony_ci	clflush	($out)
396e1051a39Sopenharmony_ci	.byte	0xf0		# lock
397e1051a39Sopenharmony_ci	add	$lastdiff,($out)
398e1051a39Sopenharmony_ci	jmp	.Loop
399e1051a39Sopenharmony_ci.align	16
400e1051a39Sopenharmony_ci.Loop:	rdtsc
401e1051a39Sopenharmony_ci	mov	%eax,%edx
402e1051a39Sopenharmony_ci	sub	$lasttick,%eax
403e1051a39Sopenharmony_ci	mov	%edx,$lasttick
404e1051a39Sopenharmony_ci	mov	%eax,$lastdiff
405e1051a39Sopenharmony_ci	clflush	($out)
406e1051a39Sopenharmony_ci	.byte	0xf0		# lock
407e1051a39Sopenharmony_ci	add	%eax,($out)
408e1051a39Sopenharmony_ci	lea	4($out),$out
409e1051a39Sopenharmony_ci	sub	\$1,$cnt
410e1051a39Sopenharmony_ci	jnz	.Loop
411e1051a39Sopenharmony_ci
412e1051a39Sopenharmony_ci	mov	$max,%rax
413e1051a39Sopenharmony_ci	ret
414e1051a39Sopenharmony_ci.cfi_endproc
415e1051a39Sopenharmony_ci.size	OPENSSL_instrument_bus,.-OPENSSL_instrument_bus
416e1051a39Sopenharmony_ci
417e1051a39Sopenharmony_ci.globl	OPENSSL_instrument_bus2
418e1051a39Sopenharmony_ci.type	OPENSSL_instrument_bus2,\@abi-omnipotent
419e1051a39Sopenharmony_ci.align	16
420e1051a39Sopenharmony_ciOPENSSL_instrument_bus2:
421e1051a39Sopenharmony_ci.cfi_startproc
422e1051a39Sopenharmony_ci	endbranch
423e1051a39Sopenharmony_ci	mov	$arg1,$out	# tribute to Win64
424e1051a39Sopenharmony_ci	mov	$arg2,$cnt
425e1051a39Sopenharmony_ci	mov	$arg3,$max
426e1051a39Sopenharmony_ci	mov	$cnt,$redzone(%rsp)
427e1051a39Sopenharmony_ci
428e1051a39Sopenharmony_ci	rdtsc			# collect 1st tick
429e1051a39Sopenharmony_ci	mov	%eax,$lasttick	# lasttick = tick
430e1051a39Sopenharmony_ci	mov	\$0,$lastdiff	# lastdiff = 0
431e1051a39Sopenharmony_ci
432e1051a39Sopenharmony_ci	clflush	($out)
433e1051a39Sopenharmony_ci	.byte	0xf0		# lock
434e1051a39Sopenharmony_ci	add	$lastdiff,($out)
435e1051a39Sopenharmony_ci
436e1051a39Sopenharmony_ci	rdtsc			# collect 1st diff
437e1051a39Sopenharmony_ci	mov	%eax,%edx
438e1051a39Sopenharmony_ci	sub	$lasttick,%eax	# diff
439e1051a39Sopenharmony_ci	mov	%edx,$lasttick	# lasttick = tick
440e1051a39Sopenharmony_ci	mov	%eax,$lastdiff	# lastdiff = diff
441e1051a39Sopenharmony_ci.Loop2:
442e1051a39Sopenharmony_ci	clflush	($out)
443e1051a39Sopenharmony_ci	.byte	0xf0		# lock
444e1051a39Sopenharmony_ci	add	%eax,($out)	# accumulate diff
445e1051a39Sopenharmony_ci
446e1051a39Sopenharmony_ci	sub	\$1,$max
447e1051a39Sopenharmony_ci	jz	.Ldone2
448e1051a39Sopenharmony_ci
449e1051a39Sopenharmony_ci	rdtsc
450e1051a39Sopenharmony_ci	mov	%eax,%edx
451e1051a39Sopenharmony_ci	sub	$lasttick,%eax	# diff
452e1051a39Sopenharmony_ci	mov	%edx,$lasttick	# lasttick = tick
453e1051a39Sopenharmony_ci	cmp	$lastdiff,%eax
454e1051a39Sopenharmony_ci	mov	%eax,$lastdiff	# lastdiff = diff
455e1051a39Sopenharmony_ci	mov	\$0,%edx
456e1051a39Sopenharmony_ci	setne	%dl
457e1051a39Sopenharmony_ci	sub	%rdx,$cnt	# conditional --$cnt
458e1051a39Sopenharmony_ci	lea	($out,%rdx,4),$out	# conditional ++$out
459e1051a39Sopenharmony_ci	jnz	.Loop2
460e1051a39Sopenharmony_ci
461e1051a39Sopenharmony_ci.Ldone2:
462e1051a39Sopenharmony_ci	mov	$redzone(%rsp),%rax
463e1051a39Sopenharmony_ci	sub	$cnt,%rax
464e1051a39Sopenharmony_ci	ret
465e1051a39Sopenharmony_ci.cfi_endproc
466e1051a39Sopenharmony_ci.size	OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2
467e1051a39Sopenharmony_ci___
468e1051a39Sopenharmony_ci}
469e1051a39Sopenharmony_ci
470e1051a39Sopenharmony_cisub gen_random {
471e1051a39Sopenharmony_cimy $rdop = shift;
472e1051a39Sopenharmony_ciprint<<___;
473e1051a39Sopenharmony_ci.globl	OPENSSL_ia32_${rdop}_bytes
474e1051a39Sopenharmony_ci.type	OPENSSL_ia32_${rdop}_bytes,\@abi-omnipotent
475e1051a39Sopenharmony_ci.align	16
476e1051a39Sopenharmony_ciOPENSSL_ia32_${rdop}_bytes:
477e1051a39Sopenharmony_ci.cfi_startproc
478e1051a39Sopenharmony_ci	endbranch
479e1051a39Sopenharmony_ci	xor	%rax, %rax	# return value
480e1051a39Sopenharmony_ci	cmp	\$0,$arg2
481e1051a39Sopenharmony_ci	je	.Ldone_${rdop}_bytes
482e1051a39Sopenharmony_ci
483e1051a39Sopenharmony_ci	mov	\$8,%r11
484e1051a39Sopenharmony_ci.Loop_${rdop}_bytes:
485e1051a39Sopenharmony_ci	${rdop}	%r10
486e1051a39Sopenharmony_ci	jc	.Lbreak_${rdop}_bytes
487e1051a39Sopenharmony_ci	dec	%r11
488e1051a39Sopenharmony_ci	jnz	.Loop_${rdop}_bytes
489e1051a39Sopenharmony_ci	jmp	.Ldone_${rdop}_bytes
490e1051a39Sopenharmony_ci
491e1051a39Sopenharmony_ci.align	16
492e1051a39Sopenharmony_ci.Lbreak_${rdop}_bytes:
493e1051a39Sopenharmony_ci	cmp	\$8,$arg2
494e1051a39Sopenharmony_ci	jb	.Ltail_${rdop}_bytes
495e1051a39Sopenharmony_ci	mov	%r10,($arg1)
496e1051a39Sopenharmony_ci	lea	8($arg1),$arg1
497e1051a39Sopenharmony_ci	add	\$8,%rax
498e1051a39Sopenharmony_ci	sub	\$8,$arg2
499e1051a39Sopenharmony_ci	jz	.Ldone_${rdop}_bytes
500e1051a39Sopenharmony_ci	mov	\$8,%r11
501e1051a39Sopenharmony_ci	jmp	.Loop_${rdop}_bytes
502e1051a39Sopenharmony_ci
503e1051a39Sopenharmony_ci.align	16
504e1051a39Sopenharmony_ci.Ltail_${rdop}_bytes:
505e1051a39Sopenharmony_ci	mov	%r10b,($arg1)
506e1051a39Sopenharmony_ci	lea	1($arg1),$arg1
507e1051a39Sopenharmony_ci	inc	%rax
508e1051a39Sopenharmony_ci	shr	\$8,%r10
509e1051a39Sopenharmony_ci	dec	$arg2
510e1051a39Sopenharmony_ci	jnz	.Ltail_${rdop}_bytes
511e1051a39Sopenharmony_ci
512e1051a39Sopenharmony_ci.Ldone_${rdop}_bytes:
513e1051a39Sopenharmony_ci	xor	%r10,%r10	# Clear sensitive data from register
514e1051a39Sopenharmony_ci	ret
515e1051a39Sopenharmony_ci.cfi_endproc
516e1051a39Sopenharmony_ci.size	OPENSSL_ia32_${rdop}_bytes,.-OPENSSL_ia32_${rdop}_bytes
517e1051a39Sopenharmony_ci___
518e1051a39Sopenharmony_ci}
519e1051a39Sopenharmony_cigen_random("rdrand");
520e1051a39Sopenharmony_cigen_random("rdseed");
521e1051a39Sopenharmony_ci
522e1051a39Sopenharmony_ciclose STDOUT or die "error closing STDOUT: $!";	# flush
523