xref: /third_party/openssl/crypto/x86cpuid.pl (revision e1051a39)
1e1051a39Sopenharmony_ci#! /usr/bin/env perl
2e1051a39Sopenharmony_ci# Copyright 2004-2020 The OpenSSL Project Authors. All Rights Reserved.
3e1051a39Sopenharmony_ci#
4e1051a39Sopenharmony_ci# Licensed under the Apache License 2.0 (the "License").  You may not use
5e1051a39Sopenharmony_ci# this file except in compliance with the License.  You can obtain a copy
6e1051a39Sopenharmony_ci# in the file LICENSE in the source distribution or at
7e1051a39Sopenharmony_ci# https://www.openssl.org/source/license.html
8e1051a39Sopenharmony_ci
9e1051a39Sopenharmony_ci$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
10e1051a39Sopenharmony_cipush(@INC, "${dir}perlasm", "perlasm");
11e1051a39Sopenharmony_cirequire "x86asm.pl";
12e1051a39Sopenharmony_ci
13e1051a39Sopenharmony_ci$output = pop and open STDOUT,">$output";
14e1051a39Sopenharmony_ci
15e1051a39Sopenharmony_ci&asm_init($ARGV[0]);
16e1051a39Sopenharmony_ci
17e1051a39Sopenharmony_cifor (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
18e1051a39Sopenharmony_ci
19e1051a39Sopenharmony_ci&function_begin("OPENSSL_ia32_cpuid");
20e1051a39Sopenharmony_ci	&xor	("edx","edx");
21e1051a39Sopenharmony_ci	&pushf	();
22e1051a39Sopenharmony_ci	&pop	("eax");
23e1051a39Sopenharmony_ci	&mov	("ecx","eax");
24e1051a39Sopenharmony_ci	&xor	("eax",1<<21);
25e1051a39Sopenharmony_ci	&push	("eax");
26e1051a39Sopenharmony_ci	&popf	();
27e1051a39Sopenharmony_ci	&pushf	();
28e1051a39Sopenharmony_ci	&pop	("eax");
29e1051a39Sopenharmony_ci	&xor	("ecx","eax");
30e1051a39Sopenharmony_ci	&xor	("eax","eax");
31e1051a39Sopenharmony_ci	&mov	("esi",&wparam(0));
32e1051a39Sopenharmony_ci	&mov	(&DWP(8,"esi"),"eax");	# clear extended feature flags
33e1051a39Sopenharmony_ci	&bt	("ecx",21);
34e1051a39Sopenharmony_ci	&jnc	(&label("nocpuid"));
35e1051a39Sopenharmony_ci	&cpuid	();
36e1051a39Sopenharmony_ci	&mov	("edi","eax");		# max value for standard query level
37e1051a39Sopenharmony_ci
38e1051a39Sopenharmony_ci	&xor	("eax","eax");
39e1051a39Sopenharmony_ci	&cmp	("ebx",0x756e6547);	# "Genu"
40e1051a39Sopenharmony_ci	&setne	(&LB("eax"));
41e1051a39Sopenharmony_ci	&mov	("ebp","eax");
42e1051a39Sopenharmony_ci	&cmp	("edx",0x49656e69);	# "ineI"
43e1051a39Sopenharmony_ci	&setne	(&LB("eax"));
44e1051a39Sopenharmony_ci	&or	("ebp","eax");
45e1051a39Sopenharmony_ci	&cmp	("ecx",0x6c65746e);	# "ntel"
46e1051a39Sopenharmony_ci	&setne	(&LB("eax"));
47e1051a39Sopenharmony_ci	&or	("ebp","eax");		# 0 indicates Intel CPU
48e1051a39Sopenharmony_ci	&jz	(&label("intel"));
49e1051a39Sopenharmony_ci
50e1051a39Sopenharmony_ci	&cmp	("ebx",0x68747541);	# "Auth"
51e1051a39Sopenharmony_ci	&setne	(&LB("eax"));
52e1051a39Sopenharmony_ci	&mov	("esi","eax");
53e1051a39Sopenharmony_ci	&cmp	("edx",0x69746E65);	# "enti"
54e1051a39Sopenharmony_ci	&setne	(&LB("eax"));
55e1051a39Sopenharmony_ci	&or	("esi","eax");
56e1051a39Sopenharmony_ci	&cmp	("ecx",0x444D4163);	# "cAMD"
57e1051a39Sopenharmony_ci	&setne	(&LB("eax"));
58e1051a39Sopenharmony_ci	&or	("esi","eax");		# 0 indicates AMD CPU
59e1051a39Sopenharmony_ci	&jnz	(&label("intel"));
60e1051a39Sopenharmony_ci
61e1051a39Sopenharmony_ci	# AMD specific
62e1051a39Sopenharmony_ci	&mov	("eax",0x80000000);
63e1051a39Sopenharmony_ci	&cpuid	();
64e1051a39Sopenharmony_ci	&cmp	("eax",0x80000001);
65e1051a39Sopenharmony_ci	&jb	(&label("intel"));
66e1051a39Sopenharmony_ci	&mov	("esi","eax");
67e1051a39Sopenharmony_ci	&mov	("eax",0x80000001);
68e1051a39Sopenharmony_ci	&cpuid	();
69e1051a39Sopenharmony_ci	&or	("ebp","ecx");
70e1051a39Sopenharmony_ci	&and	("ebp",1<<11|1);	# isolate XOP bit
71e1051a39Sopenharmony_ci	&cmp	("esi",0x80000008);
72e1051a39Sopenharmony_ci	&jb	(&label("intel"));
73e1051a39Sopenharmony_ci
74e1051a39Sopenharmony_ci	&mov	("eax",0x80000008);
75e1051a39Sopenharmony_ci	&cpuid	();
76e1051a39Sopenharmony_ci	&movz	("esi",&LB("ecx"));	# number of cores - 1
77e1051a39Sopenharmony_ci	&inc	("esi");		# number of cores
78e1051a39Sopenharmony_ci
79e1051a39Sopenharmony_ci	&mov	("eax",1);
80e1051a39Sopenharmony_ci	&xor	("ecx","ecx");
81e1051a39Sopenharmony_ci	&cpuid	();
82e1051a39Sopenharmony_ci	&bt	("edx",28);
83e1051a39Sopenharmony_ci	&jnc	(&label("generic"));
84e1051a39Sopenharmony_ci	&shr	("ebx",16);
85e1051a39Sopenharmony_ci	&and	("ebx",0xff);
86e1051a39Sopenharmony_ci	&cmp	("ebx","esi");
87e1051a39Sopenharmony_ci	&ja	(&label("generic"));
88e1051a39Sopenharmony_ci	&and	("edx",0xefffffff);	# clear hyper-threading bit
89e1051a39Sopenharmony_ci	&jmp	(&label("generic"));
90e1051a39Sopenharmony_ci
91e1051a39Sopenharmony_ci&set_label("intel");
92e1051a39Sopenharmony_ci	&cmp	("edi",4);
93e1051a39Sopenharmony_ci	&mov	("esi",-1);
94e1051a39Sopenharmony_ci	&jb	(&label("nocacheinfo"));
95e1051a39Sopenharmony_ci
96e1051a39Sopenharmony_ci	&mov	("eax",4);
97e1051a39Sopenharmony_ci	&mov	("ecx",0);		# query L1D
98e1051a39Sopenharmony_ci	&cpuid	();
99e1051a39Sopenharmony_ci	&mov	("esi","eax");
100e1051a39Sopenharmony_ci	&shr	("esi",14);
101e1051a39Sopenharmony_ci	&and	("esi",0xfff);		# number of cores -1 per L1D
102e1051a39Sopenharmony_ci
103e1051a39Sopenharmony_ci&set_label("nocacheinfo");
104e1051a39Sopenharmony_ci	&mov	("eax",1);
105e1051a39Sopenharmony_ci	&xor	("ecx","ecx");
106e1051a39Sopenharmony_ci	&cpuid	();
107e1051a39Sopenharmony_ci	&and	("edx",0xbfefffff);	# force reserved bits #20, #30 to 0
108e1051a39Sopenharmony_ci	&cmp	("ebp",0);
109e1051a39Sopenharmony_ci	&jne	(&label("notintel"));
110e1051a39Sopenharmony_ci	&or	("edx",1<<30);		# set reserved bit#30 on Intel CPUs
111e1051a39Sopenharmony_ci	&and	(&HB("eax"),15);	# family ID
112e1051a39Sopenharmony_ci	&cmp	(&HB("eax"),15);	# P4?
113e1051a39Sopenharmony_ci	&jne	(&label("notintel"));
114e1051a39Sopenharmony_ci	&or	("edx",1<<20);		# set reserved bit#20 to engage RC4_CHAR
115e1051a39Sopenharmony_ci&set_label("notintel");
116e1051a39Sopenharmony_ci	&bt	("edx",28);		# test hyper-threading bit
117e1051a39Sopenharmony_ci	&jnc	(&label("generic"));
118e1051a39Sopenharmony_ci	&and	("edx",0xefffffff);
119e1051a39Sopenharmony_ci	&cmp	("esi",0);
120e1051a39Sopenharmony_ci	&je	(&label("generic"));
121e1051a39Sopenharmony_ci
122e1051a39Sopenharmony_ci	&or	("edx",0x10000000);
123e1051a39Sopenharmony_ci	&shr	("ebx",16);
124e1051a39Sopenharmony_ci	&cmp	(&LB("ebx"),1);
125e1051a39Sopenharmony_ci	&ja	(&label("generic"));
126e1051a39Sopenharmony_ci	&and	("edx",0xefffffff);	# clear hyper-threading bit if not
127e1051a39Sopenharmony_ci
128e1051a39Sopenharmony_ci&set_label("generic");
129e1051a39Sopenharmony_ci	&and	("ebp",1<<11);		# isolate AMD XOP flag
130e1051a39Sopenharmony_ci	&and	("ecx",0xfffff7ff);	# force 11th bit to 0
131e1051a39Sopenharmony_ci	&mov	("esi","edx");		# %ebp:%esi is copy of %ecx:%edx
132e1051a39Sopenharmony_ci	&or	("ebp","ecx");		# merge AMD XOP flag
133e1051a39Sopenharmony_ci
134e1051a39Sopenharmony_ci	&cmp	("edi",7);
135e1051a39Sopenharmony_ci	&mov	("edi",&wparam(0));
136e1051a39Sopenharmony_ci	&jb	(&label("no_extended_info"));
137e1051a39Sopenharmony_ci	&mov	("eax",7);
138e1051a39Sopenharmony_ci	&xor	("ecx","ecx");
139e1051a39Sopenharmony_ci	&cpuid	();
140e1051a39Sopenharmony_ci	&mov	(&DWP(8,"edi"),"ebx");	# save extended feature flag
141e1051a39Sopenharmony_ci&set_label("no_extended_info");
142e1051a39Sopenharmony_ci
143e1051a39Sopenharmony_ci	&bt	("ebp",27);		# check OSXSAVE bit
144e1051a39Sopenharmony_ci	&jnc	(&label("clear_avx"));
145e1051a39Sopenharmony_ci	&xor	("ecx","ecx");
146e1051a39Sopenharmony_ci	&data_byte(0x0f,0x01,0xd0);	# xgetbv
147e1051a39Sopenharmony_ci	&and	("eax",6);
148e1051a39Sopenharmony_ci	&cmp	("eax",6);
149e1051a39Sopenharmony_ci	&je	(&label("done"));
150e1051a39Sopenharmony_ci	&cmp	("eax",2);
151e1051a39Sopenharmony_ci	&je	(&label("clear_avx"));
152e1051a39Sopenharmony_ci&set_label("clear_xmm");
153e1051a39Sopenharmony_ci	&and	("ebp",0xfdfffffd);	# clear AESNI and PCLMULQDQ bits
154e1051a39Sopenharmony_ci	&and	("esi",0xfeffffff);	# clear FXSR
155e1051a39Sopenharmony_ci&set_label("clear_avx");
156e1051a39Sopenharmony_ci	&and	("ebp",0xefffe7ff);	# clear AVX, FMA and AMD XOP bits
157e1051a39Sopenharmony_ci	&and	(&DWP(8,"edi"),0xffffffdf);	# clear AVX2
158e1051a39Sopenharmony_ci&set_label("done");
159e1051a39Sopenharmony_ci	&mov	("eax","esi");
160e1051a39Sopenharmony_ci	&mov	("edx","ebp");
161e1051a39Sopenharmony_ci&set_label("nocpuid");
162e1051a39Sopenharmony_ci&function_end("OPENSSL_ia32_cpuid");
163e1051a39Sopenharmony_ci
164e1051a39Sopenharmony_ci&external_label("OPENSSL_ia32cap_P");
165e1051a39Sopenharmony_ci
166e1051a39Sopenharmony_ci&function_begin_B("OPENSSL_rdtsc","EXTRN\t_OPENSSL_ia32cap_P:DWORD");
167e1051a39Sopenharmony_ci	&xor	("eax","eax");
168e1051a39Sopenharmony_ci	&xor	("edx","edx");
169e1051a39Sopenharmony_ci	&picmeup("ecx","OPENSSL_ia32cap_P");
170e1051a39Sopenharmony_ci	&bt	(&DWP(0,"ecx"),4);
171e1051a39Sopenharmony_ci	&jnc	(&label("notsc"));
172e1051a39Sopenharmony_ci	&rdtsc	();
173e1051a39Sopenharmony_ci&set_label("notsc");
174e1051a39Sopenharmony_ci	&ret	();
175e1051a39Sopenharmony_ci&function_end_B("OPENSSL_rdtsc");
176e1051a39Sopenharmony_ci
177e1051a39Sopenharmony_ci# This works in Ring 0 only [read DJGPP+MS-DOS+privileged DPMI host],
178e1051a39Sopenharmony_ci# but it's safe to call it on any [supported] 32-bit platform...
179e1051a39Sopenharmony_ci# Just check for [non-]zero return value...
180e1051a39Sopenharmony_ci&function_begin_B("OPENSSL_instrument_halt","EXTRN\t_OPENSSL_ia32cap_P:DWORD");
181e1051a39Sopenharmony_ci	&picmeup("ecx","OPENSSL_ia32cap_P");
182e1051a39Sopenharmony_ci	&bt	(&DWP(0,"ecx"),4);
183e1051a39Sopenharmony_ci	&jnc	(&label("nohalt"));	# no TSC
184e1051a39Sopenharmony_ci
185e1051a39Sopenharmony_ci	&data_word(0x9058900e);		# push %cs; pop %eax
186e1051a39Sopenharmony_ci	&and	("eax",3);
187e1051a39Sopenharmony_ci	&jnz	(&label("nohalt"));	# not enough privileges
188e1051a39Sopenharmony_ci
189e1051a39Sopenharmony_ci	&pushf	();
190e1051a39Sopenharmony_ci	&pop	("eax");
191e1051a39Sopenharmony_ci	&bt	("eax",9);
192e1051a39Sopenharmony_ci	&jnc	(&label("nohalt"));	# interrupts are disabled
193e1051a39Sopenharmony_ci
194e1051a39Sopenharmony_ci	&rdtsc	();
195e1051a39Sopenharmony_ci	&push	("edx");
196e1051a39Sopenharmony_ci	&push	("eax");
197e1051a39Sopenharmony_ci	&halt	();
198e1051a39Sopenharmony_ci	&rdtsc	();
199e1051a39Sopenharmony_ci
200e1051a39Sopenharmony_ci	&sub	("eax",&DWP(0,"esp"));
201e1051a39Sopenharmony_ci	&sbb	("edx",&DWP(4,"esp"));
202e1051a39Sopenharmony_ci	&add	("esp",8);
203e1051a39Sopenharmony_ci	&ret	();
204e1051a39Sopenharmony_ci
205e1051a39Sopenharmony_ci&set_label("nohalt");
206e1051a39Sopenharmony_ci	&xor	("eax","eax");
207e1051a39Sopenharmony_ci	&xor	("edx","edx");
208e1051a39Sopenharmony_ci	&ret	();
209e1051a39Sopenharmony_ci&function_end_B("OPENSSL_instrument_halt");
210e1051a39Sopenharmony_ci
211e1051a39Sopenharmony_ci# Essentially there is only one use for this function. Under DJGPP:
212e1051a39Sopenharmony_ci#
213e1051a39Sopenharmony_ci#	#include <go32.h>
214e1051a39Sopenharmony_ci#	...
215e1051a39Sopenharmony_ci#	i=OPENSSL_far_spin(_dos_ds,0x46c);
216e1051a39Sopenharmony_ci#	...
217e1051a39Sopenharmony_ci# to obtain the number of spins till closest timer interrupt.
218e1051a39Sopenharmony_ci
219e1051a39Sopenharmony_ci&function_begin_B("OPENSSL_far_spin");
220e1051a39Sopenharmony_ci	&pushf	();
221e1051a39Sopenharmony_ci	&pop	("eax");
222e1051a39Sopenharmony_ci	&bt	("eax",9);
223e1051a39Sopenharmony_ci	&jnc	(&label("nospin"));	# interrupts are disabled
224e1051a39Sopenharmony_ci
225e1051a39Sopenharmony_ci	&mov	("eax",&DWP(4,"esp"));
226e1051a39Sopenharmony_ci	&mov	("ecx",&DWP(8,"esp"));
227e1051a39Sopenharmony_ci	&data_word (0x90d88e1e);	# push %ds, mov %eax,%ds
228e1051a39Sopenharmony_ci	&xor	("eax","eax");
229e1051a39Sopenharmony_ci	&mov	("edx",&DWP(0,"ecx"));
230e1051a39Sopenharmony_ci	&jmp	(&label("spin"));
231e1051a39Sopenharmony_ci
232e1051a39Sopenharmony_ci	&align	(16);
233e1051a39Sopenharmony_ci&set_label("spin");
234e1051a39Sopenharmony_ci	&inc	("eax");
235e1051a39Sopenharmony_ci	&cmp	("edx",&DWP(0,"ecx"));
236e1051a39Sopenharmony_ci	&je	(&label("spin"));
237e1051a39Sopenharmony_ci
238e1051a39Sopenharmony_ci	&data_word (0x1f909090);	# pop	%ds
239e1051a39Sopenharmony_ci	&ret	();
240e1051a39Sopenharmony_ci
241e1051a39Sopenharmony_ci&set_label("nospin");
242e1051a39Sopenharmony_ci	&xor	("eax","eax");
243e1051a39Sopenharmony_ci	&xor	("edx","edx");
244e1051a39Sopenharmony_ci	&ret	();
245e1051a39Sopenharmony_ci&function_end_B("OPENSSL_far_spin");
246e1051a39Sopenharmony_ci
247e1051a39Sopenharmony_ci&function_begin_B("OPENSSL_wipe_cpu","EXTRN\t_OPENSSL_ia32cap_P:DWORD");
248e1051a39Sopenharmony_ci	&xor	("eax","eax");
249e1051a39Sopenharmony_ci	&xor	("edx","edx");
250e1051a39Sopenharmony_ci	&picmeup("ecx","OPENSSL_ia32cap_P");
251e1051a39Sopenharmony_ci	&mov	("ecx",&DWP(0,"ecx"));
252e1051a39Sopenharmony_ci	&bt	(&DWP(0,"ecx"),1);
253e1051a39Sopenharmony_ci	&jnc	(&label("no_x87"));
254e1051a39Sopenharmony_ci	if ($sse2) {
255e1051a39Sopenharmony_ci		&and	("ecx",1<<26|1<<24);	# check SSE2 and FXSR bits
256e1051a39Sopenharmony_ci		&cmp	("ecx",1<<26|1<<24);
257e1051a39Sopenharmony_ci		&jne	(&label("no_sse2"));
258e1051a39Sopenharmony_ci		&pxor	("xmm0","xmm0");
259e1051a39Sopenharmony_ci		&pxor	("xmm1","xmm1");
260e1051a39Sopenharmony_ci		&pxor	("xmm2","xmm2");
261e1051a39Sopenharmony_ci		&pxor	("xmm3","xmm3");
262e1051a39Sopenharmony_ci		&pxor	("xmm4","xmm4");
263e1051a39Sopenharmony_ci		&pxor	("xmm5","xmm5");
264e1051a39Sopenharmony_ci		&pxor	("xmm6","xmm6");
265e1051a39Sopenharmony_ci		&pxor	("xmm7","xmm7");
266e1051a39Sopenharmony_ci	&set_label("no_sse2");
267e1051a39Sopenharmony_ci	}
268e1051a39Sopenharmony_ci	# just a bunch of fldz to zap the fp/mm bank followed by finit...
269e1051a39Sopenharmony_ci	&data_word(0xeed9eed9,0xeed9eed9,0xeed9eed9,0xeed9eed9,0x90e3db9b);
270e1051a39Sopenharmony_ci&set_label("no_x87");
271e1051a39Sopenharmony_ci	&lea	("eax",&DWP(4,"esp"));
272e1051a39Sopenharmony_ci	&ret	();
273e1051a39Sopenharmony_ci&function_end_B("OPENSSL_wipe_cpu");
274e1051a39Sopenharmony_ci
275e1051a39Sopenharmony_ci&function_begin_B("OPENSSL_atomic_add");
276e1051a39Sopenharmony_ci	&mov	("edx",&DWP(4,"esp"));	# fetch the pointer, 1st arg
277e1051a39Sopenharmony_ci	&mov	("ecx",&DWP(8,"esp"));	# fetch the increment, 2nd arg
278e1051a39Sopenharmony_ci	&push	("ebx");
279e1051a39Sopenharmony_ci	&nop	();
280e1051a39Sopenharmony_ci	&mov	("eax",&DWP(0,"edx"));
281e1051a39Sopenharmony_ci&set_label("spin");
282e1051a39Sopenharmony_ci	&lea	("ebx",&DWP(0,"eax","ecx"));
283e1051a39Sopenharmony_ci	&nop	();
284e1051a39Sopenharmony_ci	&data_word(0x1ab10ff0);	# lock;	cmpxchg	%ebx,(%edx)	# %eax is involved and is always reloaded
285e1051a39Sopenharmony_ci	&jne	(&label("spin"));
286e1051a39Sopenharmony_ci	&mov	("eax","ebx");	# OpenSSL expects the new value
287e1051a39Sopenharmony_ci	&pop	("ebx");
288e1051a39Sopenharmony_ci	&ret	();
289e1051a39Sopenharmony_ci&function_end_B("OPENSSL_atomic_add");
290e1051a39Sopenharmony_ci
291e1051a39Sopenharmony_ci&function_begin_B("OPENSSL_cleanse");
292e1051a39Sopenharmony_ci	&mov	("edx",&wparam(0));
293e1051a39Sopenharmony_ci	&mov	("ecx",&wparam(1));
294e1051a39Sopenharmony_ci	&xor	("eax","eax");
295e1051a39Sopenharmony_ci	&cmp	("ecx",7);
296e1051a39Sopenharmony_ci	&jae	(&label("lot"));
297e1051a39Sopenharmony_ci	&cmp	("ecx",0);
298e1051a39Sopenharmony_ci	&je	(&label("ret"));
299e1051a39Sopenharmony_ci&set_label("little");
300e1051a39Sopenharmony_ci	&mov	(&BP(0,"edx"),"al");
301e1051a39Sopenharmony_ci	&sub	("ecx",1);
302e1051a39Sopenharmony_ci	&lea	("edx",&DWP(1,"edx"));
303e1051a39Sopenharmony_ci	&jnz	(&label("little"));
304e1051a39Sopenharmony_ci&set_label("ret");
305e1051a39Sopenharmony_ci	&ret	();
306e1051a39Sopenharmony_ci
307e1051a39Sopenharmony_ci&set_label("lot",16);
308e1051a39Sopenharmony_ci	&test	("edx",3);
309e1051a39Sopenharmony_ci	&jz	(&label("aligned"));
310e1051a39Sopenharmony_ci	&mov	(&BP(0,"edx"),"al");
311e1051a39Sopenharmony_ci	&lea	("ecx",&DWP(-1,"ecx"));
312e1051a39Sopenharmony_ci	&lea	("edx",&DWP(1,"edx"));
313e1051a39Sopenharmony_ci	&jmp	(&label("lot"));
314e1051a39Sopenharmony_ci&set_label("aligned");
315e1051a39Sopenharmony_ci	&mov	(&DWP(0,"edx"),"eax");
316e1051a39Sopenharmony_ci	&lea	("ecx",&DWP(-4,"ecx"));
317e1051a39Sopenharmony_ci	&test	("ecx",-4);
318e1051a39Sopenharmony_ci	&lea	("edx",&DWP(4,"edx"));
319e1051a39Sopenharmony_ci	&jnz	(&label("aligned"));
320e1051a39Sopenharmony_ci	&cmp	("ecx",0);
321e1051a39Sopenharmony_ci	&jne	(&label("little"));
322e1051a39Sopenharmony_ci	&ret	();
323e1051a39Sopenharmony_ci&function_end_B("OPENSSL_cleanse");
324e1051a39Sopenharmony_ci
325e1051a39Sopenharmony_ci&function_begin_B("CRYPTO_memcmp");
326e1051a39Sopenharmony_ci	&push	("esi");
327e1051a39Sopenharmony_ci	&push	("edi");
328e1051a39Sopenharmony_ci	&mov	("esi",&wparam(0));
329e1051a39Sopenharmony_ci	&mov	("edi",&wparam(1));
330e1051a39Sopenharmony_ci	&mov	("ecx",&wparam(2));
331e1051a39Sopenharmony_ci	&xor	("eax","eax");
332e1051a39Sopenharmony_ci	&xor	("edx","edx");
333e1051a39Sopenharmony_ci	&cmp	("ecx",0);
334e1051a39Sopenharmony_ci	&je	(&label("no_data"));
335e1051a39Sopenharmony_ci&set_label("loop");
336e1051a39Sopenharmony_ci	&mov	("dl",&BP(0,"esi"));
337e1051a39Sopenharmony_ci	&lea	("esi",&DWP(1,"esi"));
338e1051a39Sopenharmony_ci	&xor	("dl",&BP(0,"edi"));
339e1051a39Sopenharmony_ci	&lea	("edi",&DWP(1,"edi"));
340e1051a39Sopenharmony_ci	&or	("al","dl");
341e1051a39Sopenharmony_ci	&dec	("ecx");
342e1051a39Sopenharmony_ci	&jnz	(&label("loop"));
343e1051a39Sopenharmony_ci	&neg	("eax");
344e1051a39Sopenharmony_ci	&shr	("eax",31);
345e1051a39Sopenharmony_ci&set_label("no_data");
346e1051a39Sopenharmony_ci	&pop	("edi");
347e1051a39Sopenharmony_ci	&pop	("esi");
348e1051a39Sopenharmony_ci	&ret	();
349e1051a39Sopenharmony_ci&function_end_B("CRYPTO_memcmp");
350e1051a39Sopenharmony_ci{
351e1051a39Sopenharmony_cimy $lasttick = "esi";
352e1051a39Sopenharmony_cimy $lastdiff = "ebx";
353e1051a39Sopenharmony_cimy $out = "edi";
354e1051a39Sopenharmony_cimy $cnt = "ecx";
355e1051a39Sopenharmony_cimy $max = "ebp";
356e1051a39Sopenharmony_ci
357e1051a39Sopenharmony_ci&function_begin("OPENSSL_instrument_bus");
358e1051a39Sopenharmony_ci    &mov	("eax",0);
359e1051a39Sopenharmony_ci    if ($sse2) {
360e1051a39Sopenharmony_ci	&picmeup("edx","OPENSSL_ia32cap_P");
361e1051a39Sopenharmony_ci	&bt	(&DWP(0,"edx"),4);
362e1051a39Sopenharmony_ci	&jnc	(&label("nogo"));	# no TSC
363e1051a39Sopenharmony_ci	&bt	(&DWP(0,"edx"),19);
364e1051a39Sopenharmony_ci	&jnc	(&label("nogo"));	# no CLFLUSH
365e1051a39Sopenharmony_ci
366e1051a39Sopenharmony_ci	&mov	($out,&wparam(0));	# load arguments
367e1051a39Sopenharmony_ci	&mov	($cnt,&wparam(1));
368e1051a39Sopenharmony_ci
369e1051a39Sopenharmony_ci	# collect 1st tick
370e1051a39Sopenharmony_ci	&rdtsc	();
371e1051a39Sopenharmony_ci	&mov	($lasttick,"eax");	# lasttick = tick
372e1051a39Sopenharmony_ci	&mov	($lastdiff,0);		# lastdiff = 0
373e1051a39Sopenharmony_ci	&clflush(&DWP(0,$out));
374e1051a39Sopenharmony_ci	&data_byte(0xf0);		# lock
375e1051a39Sopenharmony_ci	&add	(&DWP(0,$out),$lastdiff);
376e1051a39Sopenharmony_ci	&jmp	(&label("loop"));
377e1051a39Sopenharmony_ci
378e1051a39Sopenharmony_ci&set_label("loop",16);
379e1051a39Sopenharmony_ci	&rdtsc	();
380e1051a39Sopenharmony_ci	&mov	("edx","eax");		# put aside tick (yes, I neglect edx)
381e1051a39Sopenharmony_ci	&sub	("eax",$lasttick);	# diff
382e1051a39Sopenharmony_ci	&mov	($lasttick,"edx");	# lasttick = tick
383e1051a39Sopenharmony_ci	&mov	($lastdiff,"eax");	# lastdiff = diff
384e1051a39Sopenharmony_ci	&clflush(&DWP(0,$out));
385e1051a39Sopenharmony_ci	&data_byte(0xf0);		# lock
386e1051a39Sopenharmony_ci	&add	(&DWP(0,$out),"eax");	# accumulate diff
387e1051a39Sopenharmony_ci	&lea	($out,&DWP(4,$out));	# ++$out
388e1051a39Sopenharmony_ci	&sub	($cnt,1);		# --$cnt
389e1051a39Sopenharmony_ci	&jnz	(&label("loop"));
390e1051a39Sopenharmony_ci
391e1051a39Sopenharmony_ci	&mov	("eax",&wparam(1));
392e1051a39Sopenharmony_ci&set_label("nogo");
393e1051a39Sopenharmony_ci    }
394e1051a39Sopenharmony_ci&function_end("OPENSSL_instrument_bus");
395e1051a39Sopenharmony_ci
396e1051a39Sopenharmony_ci&function_begin("OPENSSL_instrument_bus2");
397e1051a39Sopenharmony_ci    &mov	("eax",0);
398e1051a39Sopenharmony_ci    if ($sse2) {
399e1051a39Sopenharmony_ci	&picmeup("edx","OPENSSL_ia32cap_P");
400e1051a39Sopenharmony_ci	&bt	(&DWP(0,"edx"),4);
401e1051a39Sopenharmony_ci	&jnc	(&label("nogo"));	# no TSC
402e1051a39Sopenharmony_ci	&bt	(&DWP(0,"edx"),19);
403e1051a39Sopenharmony_ci	&jnc	(&label("nogo"));	# no CLFLUSH
404e1051a39Sopenharmony_ci
405e1051a39Sopenharmony_ci	&mov	($out,&wparam(0));	# load arguments
406e1051a39Sopenharmony_ci	&mov	($cnt,&wparam(1));
407e1051a39Sopenharmony_ci	&mov	($max,&wparam(2));
408e1051a39Sopenharmony_ci
409e1051a39Sopenharmony_ci	&rdtsc	();			# collect 1st tick
410e1051a39Sopenharmony_ci	&mov	($lasttick,"eax");	# lasttick = tick
411e1051a39Sopenharmony_ci	&mov	($lastdiff,0);		# lastdiff = 0
412e1051a39Sopenharmony_ci
413e1051a39Sopenharmony_ci	&clflush(&DWP(0,$out));
414e1051a39Sopenharmony_ci	&data_byte(0xf0);		# lock
415e1051a39Sopenharmony_ci	&add	(&DWP(0,$out),$lastdiff);
416e1051a39Sopenharmony_ci
417e1051a39Sopenharmony_ci	&rdtsc	();			# collect 1st diff
418e1051a39Sopenharmony_ci	&mov	("edx","eax");		# put aside tick (yes, I neglect edx)
419e1051a39Sopenharmony_ci	&sub	("eax",$lasttick);	# diff
420e1051a39Sopenharmony_ci	&mov	($lasttick,"edx");	# lasttick = tick
421e1051a39Sopenharmony_ci	&mov	($lastdiff,"eax");	# lastdiff = diff
422e1051a39Sopenharmony_ci	&jmp	(&label("loop2"));
423e1051a39Sopenharmony_ci
424e1051a39Sopenharmony_ci&set_label("loop2",16);
425e1051a39Sopenharmony_ci	&clflush(&DWP(0,$out));
426e1051a39Sopenharmony_ci	&data_byte(0xf0);		# lock
427e1051a39Sopenharmony_ci	&add	(&DWP(0,$out),"eax");	# accumulate diff
428e1051a39Sopenharmony_ci
429e1051a39Sopenharmony_ci	&sub	($max,1);
430e1051a39Sopenharmony_ci	&jz	(&label("done2"));
431e1051a39Sopenharmony_ci
432e1051a39Sopenharmony_ci	&rdtsc	();
433e1051a39Sopenharmony_ci	&mov	("edx","eax");		# put aside tick (yes, I neglect edx)
434e1051a39Sopenharmony_ci	&sub	("eax",$lasttick);	# diff
435e1051a39Sopenharmony_ci	&mov	($lasttick,"edx");	# lasttick = tick
436e1051a39Sopenharmony_ci	&cmp	("eax",$lastdiff);
437e1051a39Sopenharmony_ci	&mov	($lastdiff,"eax");	# lastdiff = diff
438e1051a39Sopenharmony_ci	&mov	("edx",0);
439e1051a39Sopenharmony_ci	&setne	("dl");
440e1051a39Sopenharmony_ci	&sub	($cnt,"edx");		# conditional --$cnt
441e1051a39Sopenharmony_ci	&lea	($out,&DWP(0,$out,"edx",4));	# conditional ++$out
442e1051a39Sopenharmony_ci	&jnz	(&label("loop2"));
443e1051a39Sopenharmony_ci
444e1051a39Sopenharmony_ci&set_label("done2");
445e1051a39Sopenharmony_ci	&mov	("eax",&wparam(1));
446e1051a39Sopenharmony_ci	&sub	("eax",$cnt);
447e1051a39Sopenharmony_ci&set_label("nogo");
448e1051a39Sopenharmony_ci    }
449e1051a39Sopenharmony_ci&function_end("OPENSSL_instrument_bus2");
450e1051a39Sopenharmony_ci}
451e1051a39Sopenharmony_ci
452e1051a39Sopenharmony_cisub gen_random {
453e1051a39Sopenharmony_cimy $rdop = shift;
454e1051a39Sopenharmony_ci&function_begin_B("OPENSSL_ia32_${rdop}_bytes");
455e1051a39Sopenharmony_ci	&push	("edi");
456e1051a39Sopenharmony_ci	&push	("ebx");
457e1051a39Sopenharmony_ci	&xor	("eax","eax");		# return value
458e1051a39Sopenharmony_ci	&mov	("edi",&wparam(0));
459e1051a39Sopenharmony_ci	&mov	("ebx",&wparam(1));
460e1051a39Sopenharmony_ci
461e1051a39Sopenharmony_ci	&cmp	("ebx",0);
462e1051a39Sopenharmony_ci	&je	(&label("done"));
463e1051a39Sopenharmony_ci
464e1051a39Sopenharmony_ci	&mov	("ecx",8);
465e1051a39Sopenharmony_ci&set_label("loop");
466e1051a39Sopenharmony_ci	&${rdop}("edx");
467e1051a39Sopenharmony_ci	&jc	(&label("break"));
468e1051a39Sopenharmony_ci	&loop	(&label("loop"));
469e1051a39Sopenharmony_ci	&jmp	(&label("done"));
470e1051a39Sopenharmony_ci
471e1051a39Sopenharmony_ci&set_label("break",16);
472e1051a39Sopenharmony_ci	&cmp	("ebx",4);
473e1051a39Sopenharmony_ci	&jb	(&label("tail"));
474e1051a39Sopenharmony_ci	&mov	(&DWP(0,"edi"),"edx");
475e1051a39Sopenharmony_ci	&lea	("edi",&DWP(4,"edi"));
476e1051a39Sopenharmony_ci	&add	("eax",4);
477e1051a39Sopenharmony_ci	&sub	("ebx",4);
478e1051a39Sopenharmony_ci	&jz	(&label("done"));
479e1051a39Sopenharmony_ci	&mov	("ecx",8);
480e1051a39Sopenharmony_ci	&jmp	(&label("loop"));
481e1051a39Sopenharmony_ci
482e1051a39Sopenharmony_ci&set_label("tail",16);
483e1051a39Sopenharmony_ci	&mov	(&BP(0,"edi"),"dl");
484e1051a39Sopenharmony_ci	&lea	("edi",&DWP(1,"edi"));
485e1051a39Sopenharmony_ci	&inc	("eax");
486e1051a39Sopenharmony_ci	&shr	("edx",8);
487e1051a39Sopenharmony_ci	&dec	("ebx");
488e1051a39Sopenharmony_ci	&jnz	(&label("tail"));
489e1051a39Sopenharmony_ci
490e1051a39Sopenharmony_ci&set_label("done");
491e1051a39Sopenharmony_ci	&xor	("edx","edx");		# Clear random value from registers
492e1051a39Sopenharmony_ci	&pop	("ebx");
493e1051a39Sopenharmony_ci	&pop	("edi");
494e1051a39Sopenharmony_ci	&ret	();
495e1051a39Sopenharmony_ci&function_end_B("OPENSSL_ia32_${rdop}_bytes");
496e1051a39Sopenharmony_ci}
497e1051a39Sopenharmony_ci&gen_random("rdrand");
498e1051a39Sopenharmony_ci&gen_random("rdseed");
499e1051a39Sopenharmony_ci
500e1051a39Sopenharmony_ci&initseg("OPENSSL_cpuid_setup");
501e1051a39Sopenharmony_ci
502e1051a39Sopenharmony_ci&hidden("OPENSSL_cpuid_setup");
503e1051a39Sopenharmony_ci&hidden("OPENSSL_ia32cap_P");
504e1051a39Sopenharmony_ci
505e1051a39Sopenharmony_ci&asm_finish();
506e1051a39Sopenharmony_ci
507e1051a39Sopenharmony_ciclose STDOUT or die "error closing STDOUT: $!";
508