11cb0ef41Sopenharmony_ci#! /usr/bin/env perl
21cb0ef41Sopenharmony_ci# Copyright 2004-2020 The OpenSSL Project Authors. All Rights Reserved.
31cb0ef41Sopenharmony_ci#
41cb0ef41Sopenharmony_ci# Licensed under the Apache License 2.0 (the "License").  You may not use
51cb0ef41Sopenharmony_ci# this file except in compliance with the License.  You can obtain a copy
61cb0ef41Sopenharmony_ci# in the file LICENSE in the source distribution or at
71cb0ef41Sopenharmony_ci# https://www.openssl.org/source/license.html
81cb0ef41Sopenharmony_ci
91cb0ef41Sopenharmony_ci$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
101cb0ef41Sopenharmony_cipush(@INC, "${dir}perlasm", "perlasm");
111cb0ef41Sopenharmony_cirequire "x86asm.pl";
121cb0ef41Sopenharmony_ci
131cb0ef41Sopenharmony_ci$output = pop and open STDOUT,">$output";
141cb0ef41Sopenharmony_ci
151cb0ef41Sopenharmony_ci&asm_init($ARGV[0]);
161cb0ef41Sopenharmony_ci
171cb0ef41Sopenharmony_cifor (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
181cb0ef41Sopenharmony_ci
191cb0ef41Sopenharmony_ci&function_begin("OPENSSL_ia32_cpuid");
201cb0ef41Sopenharmony_ci	&xor	("edx","edx");
211cb0ef41Sopenharmony_ci	&pushf	();
221cb0ef41Sopenharmony_ci	&pop	("eax");
231cb0ef41Sopenharmony_ci	&mov	("ecx","eax");
241cb0ef41Sopenharmony_ci	&xor	("eax",1<<21);
251cb0ef41Sopenharmony_ci	&push	("eax");
261cb0ef41Sopenharmony_ci	&popf	();
271cb0ef41Sopenharmony_ci	&pushf	();
281cb0ef41Sopenharmony_ci	&pop	("eax");
291cb0ef41Sopenharmony_ci	&xor	("ecx","eax");
301cb0ef41Sopenharmony_ci	&xor	("eax","eax");
311cb0ef41Sopenharmony_ci	&mov	("esi",&wparam(0));
321cb0ef41Sopenharmony_ci	&mov	(&DWP(8,"esi"),"eax");	# clear extended feature flags
331cb0ef41Sopenharmony_ci	&bt	("ecx",21);
341cb0ef41Sopenharmony_ci	&jnc	(&label("nocpuid"));
351cb0ef41Sopenharmony_ci	&cpuid	();
361cb0ef41Sopenharmony_ci	&mov	("edi","eax");		# max value for standard query level
371cb0ef41Sopenharmony_ci
381cb0ef41Sopenharmony_ci	&xor	("eax","eax");
391cb0ef41Sopenharmony_ci	&cmp	("ebx",0x756e6547);	# "Genu"
401cb0ef41Sopenharmony_ci	&setne	(&LB("eax"));
411cb0ef41Sopenharmony_ci	&mov	("ebp","eax");
421cb0ef41Sopenharmony_ci	&cmp	("edx",0x49656e69);	# "ineI"
431cb0ef41Sopenharmony_ci	&setne	(&LB("eax"));
441cb0ef41Sopenharmony_ci	&or	("ebp","eax");
451cb0ef41Sopenharmony_ci	&cmp	("ecx",0x6c65746e);	# "ntel"
461cb0ef41Sopenharmony_ci	&setne	(&LB("eax"));
471cb0ef41Sopenharmony_ci	&or	("ebp","eax");		# 0 indicates Intel CPU
481cb0ef41Sopenharmony_ci	&jz	(&label("intel"));
491cb0ef41Sopenharmony_ci
501cb0ef41Sopenharmony_ci	&cmp	("ebx",0x68747541);	# "Auth"
511cb0ef41Sopenharmony_ci	&setne	(&LB("eax"));
521cb0ef41Sopenharmony_ci	&mov	("esi","eax");
531cb0ef41Sopenharmony_ci	&cmp	("edx",0x69746E65);	# "enti"
541cb0ef41Sopenharmony_ci	&setne	(&LB("eax"));
551cb0ef41Sopenharmony_ci	&or	("esi","eax");
561cb0ef41Sopenharmony_ci	&cmp	("ecx",0x444D4163);	# "cAMD"
571cb0ef41Sopenharmony_ci	&setne	(&LB("eax"));
581cb0ef41Sopenharmony_ci	&or	("esi","eax");		# 0 indicates AMD CPU
591cb0ef41Sopenharmony_ci	&jnz	(&label("intel"));
601cb0ef41Sopenharmony_ci
611cb0ef41Sopenharmony_ci	# AMD specific
621cb0ef41Sopenharmony_ci	&mov	("eax",0x80000000);
631cb0ef41Sopenharmony_ci	&cpuid	();
641cb0ef41Sopenharmony_ci	&cmp	("eax",0x80000001);
651cb0ef41Sopenharmony_ci	&jb	(&label("intel"));
661cb0ef41Sopenharmony_ci	&mov	("esi","eax");
671cb0ef41Sopenharmony_ci	&mov	("eax",0x80000001);
681cb0ef41Sopenharmony_ci	&cpuid	();
691cb0ef41Sopenharmony_ci	&or	("ebp","ecx");
701cb0ef41Sopenharmony_ci	&and	("ebp",1<<11|1);	# isolate XOP bit
711cb0ef41Sopenharmony_ci	&cmp	("esi",0x80000008);
721cb0ef41Sopenharmony_ci	&jb	(&label("intel"));
731cb0ef41Sopenharmony_ci
741cb0ef41Sopenharmony_ci	&mov	("eax",0x80000008);
751cb0ef41Sopenharmony_ci	&cpuid	();
761cb0ef41Sopenharmony_ci	&movz	("esi",&LB("ecx"));	# number of cores - 1
771cb0ef41Sopenharmony_ci	&inc	("esi");		# number of cores
781cb0ef41Sopenharmony_ci
791cb0ef41Sopenharmony_ci	&mov	("eax",1);
801cb0ef41Sopenharmony_ci	&xor	("ecx","ecx");
811cb0ef41Sopenharmony_ci	&cpuid	();
821cb0ef41Sopenharmony_ci	&bt	("edx",28);
831cb0ef41Sopenharmony_ci	&jnc	(&label("generic"));
841cb0ef41Sopenharmony_ci	&shr	("ebx",16);
851cb0ef41Sopenharmony_ci	&and	("ebx",0xff);
861cb0ef41Sopenharmony_ci	&cmp	("ebx","esi");
871cb0ef41Sopenharmony_ci	&ja	(&label("generic"));
881cb0ef41Sopenharmony_ci	&and	("edx",0xefffffff);	# clear hyper-threading bit
891cb0ef41Sopenharmony_ci	&jmp	(&label("generic"));
901cb0ef41Sopenharmony_ci
911cb0ef41Sopenharmony_ci&set_label("intel");
921cb0ef41Sopenharmony_ci	&cmp	("edi",4);
931cb0ef41Sopenharmony_ci	&mov	("esi",-1);
941cb0ef41Sopenharmony_ci	&jb	(&label("nocacheinfo"));
951cb0ef41Sopenharmony_ci
961cb0ef41Sopenharmony_ci	&mov	("eax",4);
971cb0ef41Sopenharmony_ci	&mov	("ecx",0);		# query L1D
981cb0ef41Sopenharmony_ci	&cpuid	();
991cb0ef41Sopenharmony_ci	&mov	("esi","eax");
1001cb0ef41Sopenharmony_ci	&shr	("esi",14);
1011cb0ef41Sopenharmony_ci	&and	("esi",0xfff);		# number of cores -1 per L1D
1021cb0ef41Sopenharmony_ci
1031cb0ef41Sopenharmony_ci&set_label("nocacheinfo");
1041cb0ef41Sopenharmony_ci	&mov	("eax",1);
1051cb0ef41Sopenharmony_ci	&xor	("ecx","ecx");
1061cb0ef41Sopenharmony_ci	&cpuid	();
1071cb0ef41Sopenharmony_ci	&and	("edx",0xbfefffff);	# force reserved bits #20, #30 to 0
1081cb0ef41Sopenharmony_ci	&cmp	("ebp",0);
1091cb0ef41Sopenharmony_ci	&jne	(&label("notintel"));
1101cb0ef41Sopenharmony_ci	&or	("edx",1<<30);		# set reserved bit#30 on Intel CPUs
1111cb0ef41Sopenharmony_ci	&and	(&HB("eax"),15);	# family ID
1121cb0ef41Sopenharmony_ci	&cmp	(&HB("eax"),15);	# P4?
1131cb0ef41Sopenharmony_ci	&jne	(&label("notintel"));
1141cb0ef41Sopenharmony_ci	&or	("edx",1<<20);		# set reserved bit#20 to engage RC4_CHAR
1151cb0ef41Sopenharmony_ci&set_label("notintel");
1161cb0ef41Sopenharmony_ci	&bt	("edx",28);		# test hyper-threading bit
1171cb0ef41Sopenharmony_ci	&jnc	(&label("generic"));
1181cb0ef41Sopenharmony_ci	&and	("edx",0xefffffff);
1191cb0ef41Sopenharmony_ci	&cmp	("esi",0);
1201cb0ef41Sopenharmony_ci	&je	(&label("generic"));
1211cb0ef41Sopenharmony_ci
1221cb0ef41Sopenharmony_ci	&or	("edx",0x10000000);
1231cb0ef41Sopenharmony_ci	&shr	("ebx",16);
1241cb0ef41Sopenharmony_ci	&cmp	(&LB("ebx"),1);
1251cb0ef41Sopenharmony_ci	&ja	(&label("generic"));
1261cb0ef41Sopenharmony_ci	&and	("edx",0xefffffff);	# clear hyper-threading bit if not
1271cb0ef41Sopenharmony_ci
1281cb0ef41Sopenharmony_ci&set_label("generic");
1291cb0ef41Sopenharmony_ci	&and	("ebp",1<<11);		# isolate AMD XOP flag
1301cb0ef41Sopenharmony_ci	&and	("ecx",0xfffff7ff);	# force 11th bit to 0
1311cb0ef41Sopenharmony_ci	&mov	("esi","edx");		# %ebp:%esi is copy of %ecx:%edx
1321cb0ef41Sopenharmony_ci	&or	("ebp","ecx");		# merge AMD XOP flag
1331cb0ef41Sopenharmony_ci
1341cb0ef41Sopenharmony_ci	&cmp	("edi",7);
1351cb0ef41Sopenharmony_ci	&mov	("edi",&wparam(0));
1361cb0ef41Sopenharmony_ci	&jb	(&label("no_extended_info"));
1371cb0ef41Sopenharmony_ci	&mov	("eax",7);
1381cb0ef41Sopenharmony_ci	&xor	("ecx","ecx");
1391cb0ef41Sopenharmony_ci	&cpuid	();
1401cb0ef41Sopenharmony_ci	&mov	(&DWP(8,"edi"),"ebx");	# save extended feature flag
1411cb0ef41Sopenharmony_ci&set_label("no_extended_info");
1421cb0ef41Sopenharmony_ci
1431cb0ef41Sopenharmony_ci	&bt	("ebp",27);		# check OSXSAVE bit
1441cb0ef41Sopenharmony_ci	&jnc	(&label("clear_avx"));
1451cb0ef41Sopenharmony_ci	&xor	("ecx","ecx");
1461cb0ef41Sopenharmony_ci	&data_byte(0x0f,0x01,0xd0);	# xgetbv
1471cb0ef41Sopenharmony_ci	&and	("eax",6);
1481cb0ef41Sopenharmony_ci	&cmp	("eax",6);
1491cb0ef41Sopenharmony_ci	&je	(&label("done"));
1501cb0ef41Sopenharmony_ci	&cmp	("eax",2);
1511cb0ef41Sopenharmony_ci	&je	(&label("clear_avx"));
1521cb0ef41Sopenharmony_ci&set_label("clear_xmm");
1531cb0ef41Sopenharmony_ci	&and	("ebp",0xfdfffffd);	# clear AESNI and PCLMULQDQ bits
1541cb0ef41Sopenharmony_ci	&and	("esi",0xfeffffff);	# clear FXSR
1551cb0ef41Sopenharmony_ci&set_label("clear_avx");
1561cb0ef41Sopenharmony_ci	&and	("ebp",0xefffe7ff);	# clear AVX, FMA and AMD XOP bits
1571cb0ef41Sopenharmony_ci	&and	(&DWP(8,"edi"),0xffffffdf);	# clear AVX2
1581cb0ef41Sopenharmony_ci&set_label("done");
1591cb0ef41Sopenharmony_ci	&mov	("eax","esi");
1601cb0ef41Sopenharmony_ci	&mov	("edx","ebp");
1611cb0ef41Sopenharmony_ci&set_label("nocpuid");
1621cb0ef41Sopenharmony_ci&function_end("OPENSSL_ia32_cpuid");
1631cb0ef41Sopenharmony_ci
1641cb0ef41Sopenharmony_ci&external_label("OPENSSL_ia32cap_P");
1651cb0ef41Sopenharmony_ci
1661cb0ef41Sopenharmony_ci&function_begin_B("OPENSSL_rdtsc","EXTRN\t_OPENSSL_ia32cap_P:DWORD");
1671cb0ef41Sopenharmony_ci	&xor	("eax","eax");
1681cb0ef41Sopenharmony_ci	&xor	("edx","edx");
1691cb0ef41Sopenharmony_ci	&picmeup("ecx","OPENSSL_ia32cap_P");
1701cb0ef41Sopenharmony_ci	&bt	(&DWP(0,"ecx"),4);
1711cb0ef41Sopenharmony_ci	&jnc	(&label("notsc"));
1721cb0ef41Sopenharmony_ci	&rdtsc	();
1731cb0ef41Sopenharmony_ci&set_label("notsc");
1741cb0ef41Sopenharmony_ci	&ret	();
1751cb0ef41Sopenharmony_ci&function_end_B("OPENSSL_rdtsc");
1761cb0ef41Sopenharmony_ci
1771cb0ef41Sopenharmony_ci# This works in Ring 0 only [read DJGPP+MS-DOS+privileged DPMI host],
1781cb0ef41Sopenharmony_ci# but it's safe to call it on any [supported] 32-bit platform...
1791cb0ef41Sopenharmony_ci# Just check for [non-]zero return value...
1801cb0ef41Sopenharmony_ci&function_begin_B("OPENSSL_instrument_halt","EXTRN\t_OPENSSL_ia32cap_P:DWORD");
1811cb0ef41Sopenharmony_ci	&picmeup("ecx","OPENSSL_ia32cap_P");
1821cb0ef41Sopenharmony_ci	&bt	(&DWP(0,"ecx"),4);
1831cb0ef41Sopenharmony_ci	&jnc	(&label("nohalt"));	# no TSC
1841cb0ef41Sopenharmony_ci
1851cb0ef41Sopenharmony_ci	&data_word(0x9058900e);		# push %cs; pop %eax
1861cb0ef41Sopenharmony_ci	&and	("eax",3);
1871cb0ef41Sopenharmony_ci	&jnz	(&label("nohalt"));	# not enough privileges
1881cb0ef41Sopenharmony_ci
1891cb0ef41Sopenharmony_ci	&pushf	();
1901cb0ef41Sopenharmony_ci	&pop	("eax");
1911cb0ef41Sopenharmony_ci	&bt	("eax",9);
1921cb0ef41Sopenharmony_ci	&jnc	(&label("nohalt"));	# interrupts are disabled
1931cb0ef41Sopenharmony_ci
1941cb0ef41Sopenharmony_ci	&rdtsc	();
1951cb0ef41Sopenharmony_ci	&push	("edx");
1961cb0ef41Sopenharmony_ci	&push	("eax");
1971cb0ef41Sopenharmony_ci	&halt	();
1981cb0ef41Sopenharmony_ci	&rdtsc	();
1991cb0ef41Sopenharmony_ci
2001cb0ef41Sopenharmony_ci	&sub	("eax",&DWP(0,"esp"));
2011cb0ef41Sopenharmony_ci	&sbb	("edx",&DWP(4,"esp"));
2021cb0ef41Sopenharmony_ci	&add	("esp",8);
2031cb0ef41Sopenharmony_ci	&ret	();
2041cb0ef41Sopenharmony_ci
2051cb0ef41Sopenharmony_ci&set_label("nohalt");
2061cb0ef41Sopenharmony_ci	&xor	("eax","eax");
2071cb0ef41Sopenharmony_ci	&xor	("edx","edx");
2081cb0ef41Sopenharmony_ci	&ret	();
2091cb0ef41Sopenharmony_ci&function_end_B("OPENSSL_instrument_halt");
2101cb0ef41Sopenharmony_ci
2111cb0ef41Sopenharmony_ci# Essentially there is only one use for this function. Under DJGPP:
2121cb0ef41Sopenharmony_ci#
2131cb0ef41Sopenharmony_ci#	#include <go32.h>
2141cb0ef41Sopenharmony_ci#	...
2151cb0ef41Sopenharmony_ci#	i=OPENSSL_far_spin(_dos_ds,0x46c);
2161cb0ef41Sopenharmony_ci#	...
2171cb0ef41Sopenharmony_ci# to obtain the number of spins till closest timer interrupt.
2181cb0ef41Sopenharmony_ci
2191cb0ef41Sopenharmony_ci&function_begin_B("OPENSSL_far_spin");
2201cb0ef41Sopenharmony_ci	&pushf	();
2211cb0ef41Sopenharmony_ci	&pop	("eax");
2221cb0ef41Sopenharmony_ci	&bt	("eax",9);
2231cb0ef41Sopenharmony_ci	&jnc	(&label("nospin"));	# interrupts are disabled
2241cb0ef41Sopenharmony_ci
2251cb0ef41Sopenharmony_ci	&mov	("eax",&DWP(4,"esp"));
2261cb0ef41Sopenharmony_ci	&mov	("ecx",&DWP(8,"esp"));
2271cb0ef41Sopenharmony_ci	&data_word (0x90d88e1e);	# push %ds, mov %eax,%ds
2281cb0ef41Sopenharmony_ci	&xor	("eax","eax");
2291cb0ef41Sopenharmony_ci	&mov	("edx",&DWP(0,"ecx"));
2301cb0ef41Sopenharmony_ci	&jmp	(&label("spin"));
2311cb0ef41Sopenharmony_ci
2321cb0ef41Sopenharmony_ci	&align	(16);
2331cb0ef41Sopenharmony_ci&set_label("spin");
2341cb0ef41Sopenharmony_ci	&inc	("eax");
2351cb0ef41Sopenharmony_ci	&cmp	("edx",&DWP(0,"ecx"));
2361cb0ef41Sopenharmony_ci	&je	(&label("spin"));
2371cb0ef41Sopenharmony_ci
2381cb0ef41Sopenharmony_ci	&data_word (0x1f909090);	# pop	%ds
2391cb0ef41Sopenharmony_ci	&ret	();
2401cb0ef41Sopenharmony_ci
2411cb0ef41Sopenharmony_ci&set_label("nospin");
2421cb0ef41Sopenharmony_ci	&xor	("eax","eax");
2431cb0ef41Sopenharmony_ci	&xor	("edx","edx");
2441cb0ef41Sopenharmony_ci	&ret	();
2451cb0ef41Sopenharmony_ci&function_end_B("OPENSSL_far_spin");
2461cb0ef41Sopenharmony_ci
2471cb0ef41Sopenharmony_ci&function_begin_B("OPENSSL_wipe_cpu","EXTRN\t_OPENSSL_ia32cap_P:DWORD");
2481cb0ef41Sopenharmony_ci	&xor	("eax","eax");
2491cb0ef41Sopenharmony_ci	&xor	("edx","edx");
2501cb0ef41Sopenharmony_ci	&picmeup("ecx","OPENSSL_ia32cap_P");
2511cb0ef41Sopenharmony_ci	&mov	("ecx",&DWP(0,"ecx"));
2521cb0ef41Sopenharmony_ci	&bt	(&DWP(0,"ecx"),1);
2531cb0ef41Sopenharmony_ci	&jnc	(&label("no_x87"));
2541cb0ef41Sopenharmony_ci	if ($sse2) {
2551cb0ef41Sopenharmony_ci		&and	("ecx",1<<26|1<<24);	# check SSE2 and FXSR bits
2561cb0ef41Sopenharmony_ci		&cmp	("ecx",1<<26|1<<24);
2571cb0ef41Sopenharmony_ci		&jne	(&label("no_sse2"));
2581cb0ef41Sopenharmony_ci		&pxor	("xmm0","xmm0");
2591cb0ef41Sopenharmony_ci		&pxor	("xmm1","xmm1");
2601cb0ef41Sopenharmony_ci		&pxor	("xmm2","xmm2");
2611cb0ef41Sopenharmony_ci		&pxor	("xmm3","xmm3");
2621cb0ef41Sopenharmony_ci		&pxor	("xmm4","xmm4");
2631cb0ef41Sopenharmony_ci		&pxor	("xmm5","xmm5");
2641cb0ef41Sopenharmony_ci		&pxor	("xmm6","xmm6");
2651cb0ef41Sopenharmony_ci		&pxor	("xmm7","xmm7");
2661cb0ef41Sopenharmony_ci	&set_label("no_sse2");
2671cb0ef41Sopenharmony_ci	}
2681cb0ef41Sopenharmony_ci	# just a bunch of fldz to zap the fp/mm bank followed by finit...
2691cb0ef41Sopenharmony_ci	&data_word(0xeed9eed9,0xeed9eed9,0xeed9eed9,0xeed9eed9,0x90e3db9b);
2701cb0ef41Sopenharmony_ci&set_label("no_x87");
2711cb0ef41Sopenharmony_ci	&lea	("eax",&DWP(4,"esp"));
2721cb0ef41Sopenharmony_ci	&ret	();
2731cb0ef41Sopenharmony_ci&function_end_B("OPENSSL_wipe_cpu");
2741cb0ef41Sopenharmony_ci
2751cb0ef41Sopenharmony_ci&function_begin_B("OPENSSL_atomic_add");
2761cb0ef41Sopenharmony_ci	&mov	("edx",&DWP(4,"esp"));	# fetch the pointer, 1st arg
2771cb0ef41Sopenharmony_ci	&mov	("ecx",&DWP(8,"esp"));	# fetch the increment, 2nd arg
2781cb0ef41Sopenharmony_ci	&push	("ebx");
2791cb0ef41Sopenharmony_ci	&nop	();
2801cb0ef41Sopenharmony_ci	&mov	("eax",&DWP(0,"edx"));
2811cb0ef41Sopenharmony_ci&set_label("spin");
2821cb0ef41Sopenharmony_ci	&lea	("ebx",&DWP(0,"eax","ecx"));
2831cb0ef41Sopenharmony_ci	&nop	();
2841cb0ef41Sopenharmony_ci	&data_word(0x1ab10ff0);	# lock;	cmpxchg	%ebx,(%edx)	# %eax is involved and is always reloaded
2851cb0ef41Sopenharmony_ci	&jne	(&label("spin"));
2861cb0ef41Sopenharmony_ci	&mov	("eax","ebx");	# OpenSSL expects the new value
2871cb0ef41Sopenharmony_ci	&pop	("ebx");
2881cb0ef41Sopenharmony_ci	&ret	();
2891cb0ef41Sopenharmony_ci&function_end_B("OPENSSL_atomic_add");
2901cb0ef41Sopenharmony_ci
2911cb0ef41Sopenharmony_ci&function_begin_B("OPENSSL_cleanse");
2921cb0ef41Sopenharmony_ci	&mov	("edx",&wparam(0));
2931cb0ef41Sopenharmony_ci	&mov	("ecx",&wparam(1));
2941cb0ef41Sopenharmony_ci	&xor	("eax","eax");
2951cb0ef41Sopenharmony_ci	&cmp	("ecx",7);
2961cb0ef41Sopenharmony_ci	&jae	(&label("lot"));
2971cb0ef41Sopenharmony_ci	&cmp	("ecx",0);
2981cb0ef41Sopenharmony_ci	&je	(&label("ret"));
2991cb0ef41Sopenharmony_ci&set_label("little");
3001cb0ef41Sopenharmony_ci	&mov	(&BP(0,"edx"),"al");
3011cb0ef41Sopenharmony_ci	&sub	("ecx",1);
3021cb0ef41Sopenharmony_ci	&lea	("edx",&DWP(1,"edx"));
3031cb0ef41Sopenharmony_ci	&jnz	(&label("little"));
3041cb0ef41Sopenharmony_ci&set_label("ret");
3051cb0ef41Sopenharmony_ci	&ret	();
3061cb0ef41Sopenharmony_ci
3071cb0ef41Sopenharmony_ci&set_label("lot",16);
3081cb0ef41Sopenharmony_ci	&test	("edx",3);
3091cb0ef41Sopenharmony_ci	&jz	(&label("aligned"));
3101cb0ef41Sopenharmony_ci	&mov	(&BP(0,"edx"),"al");
3111cb0ef41Sopenharmony_ci	&lea	("ecx",&DWP(-1,"ecx"));
3121cb0ef41Sopenharmony_ci	&lea	("edx",&DWP(1,"edx"));
3131cb0ef41Sopenharmony_ci	&jmp	(&label("lot"));
3141cb0ef41Sopenharmony_ci&set_label("aligned");
3151cb0ef41Sopenharmony_ci	&mov	(&DWP(0,"edx"),"eax");
3161cb0ef41Sopenharmony_ci	&lea	("ecx",&DWP(-4,"ecx"));
3171cb0ef41Sopenharmony_ci	&test	("ecx",-4);
3181cb0ef41Sopenharmony_ci	&lea	("edx",&DWP(4,"edx"));
3191cb0ef41Sopenharmony_ci	&jnz	(&label("aligned"));
3201cb0ef41Sopenharmony_ci	&cmp	("ecx",0);
3211cb0ef41Sopenharmony_ci	&jne	(&label("little"));
3221cb0ef41Sopenharmony_ci	&ret	();
3231cb0ef41Sopenharmony_ci&function_end_B("OPENSSL_cleanse");
3241cb0ef41Sopenharmony_ci
3251cb0ef41Sopenharmony_ci&function_begin_B("CRYPTO_memcmp");
3261cb0ef41Sopenharmony_ci	&push	("esi");
3271cb0ef41Sopenharmony_ci	&push	("edi");
3281cb0ef41Sopenharmony_ci	&mov	("esi",&wparam(0));
3291cb0ef41Sopenharmony_ci	&mov	("edi",&wparam(1));
3301cb0ef41Sopenharmony_ci	&mov	("ecx",&wparam(2));
3311cb0ef41Sopenharmony_ci	&xor	("eax","eax");
3321cb0ef41Sopenharmony_ci	&xor	("edx","edx");
3331cb0ef41Sopenharmony_ci	&cmp	("ecx",0);
3341cb0ef41Sopenharmony_ci	&je	(&label("no_data"));
3351cb0ef41Sopenharmony_ci&set_label("loop");
3361cb0ef41Sopenharmony_ci	&mov	("dl",&BP(0,"esi"));
3371cb0ef41Sopenharmony_ci	&lea	("esi",&DWP(1,"esi"));
3381cb0ef41Sopenharmony_ci	&xor	("dl",&BP(0,"edi"));
3391cb0ef41Sopenharmony_ci	&lea	("edi",&DWP(1,"edi"));
3401cb0ef41Sopenharmony_ci	&or	("al","dl");
3411cb0ef41Sopenharmony_ci	&dec	("ecx");
3421cb0ef41Sopenharmony_ci	&jnz	(&label("loop"));
3431cb0ef41Sopenharmony_ci	&neg	("eax");
3441cb0ef41Sopenharmony_ci	&shr	("eax",31);
3451cb0ef41Sopenharmony_ci&set_label("no_data");
3461cb0ef41Sopenharmony_ci	&pop	("edi");
3471cb0ef41Sopenharmony_ci	&pop	("esi");
3481cb0ef41Sopenharmony_ci	&ret	();
3491cb0ef41Sopenharmony_ci&function_end_B("CRYPTO_memcmp");
3501cb0ef41Sopenharmony_ci{
3511cb0ef41Sopenharmony_cimy $lasttick = "esi";
3521cb0ef41Sopenharmony_cimy $lastdiff = "ebx";
3531cb0ef41Sopenharmony_cimy $out = "edi";
3541cb0ef41Sopenharmony_cimy $cnt = "ecx";
3551cb0ef41Sopenharmony_cimy $max = "ebp";
3561cb0ef41Sopenharmony_ci
3571cb0ef41Sopenharmony_ci&function_begin("OPENSSL_instrument_bus");
3581cb0ef41Sopenharmony_ci    &mov	("eax",0);
3591cb0ef41Sopenharmony_ci    if ($sse2) {
3601cb0ef41Sopenharmony_ci	&picmeup("edx","OPENSSL_ia32cap_P");
3611cb0ef41Sopenharmony_ci	&bt	(&DWP(0,"edx"),4);
3621cb0ef41Sopenharmony_ci	&jnc	(&label("nogo"));	# no TSC
3631cb0ef41Sopenharmony_ci	&bt	(&DWP(0,"edx"),19);
3641cb0ef41Sopenharmony_ci	&jnc	(&label("nogo"));	# no CLFLUSH
3651cb0ef41Sopenharmony_ci
3661cb0ef41Sopenharmony_ci	&mov	($out,&wparam(0));	# load arguments
3671cb0ef41Sopenharmony_ci	&mov	($cnt,&wparam(1));
3681cb0ef41Sopenharmony_ci
3691cb0ef41Sopenharmony_ci	# collect 1st tick
3701cb0ef41Sopenharmony_ci	&rdtsc	();
3711cb0ef41Sopenharmony_ci	&mov	($lasttick,"eax");	# lasttick = tick
3721cb0ef41Sopenharmony_ci	&mov	($lastdiff,0);		# lastdiff = 0
3731cb0ef41Sopenharmony_ci	&clflush(&DWP(0,$out));
3741cb0ef41Sopenharmony_ci	&data_byte(0xf0);		# lock
3751cb0ef41Sopenharmony_ci	&add	(&DWP(0,$out),$lastdiff);
3761cb0ef41Sopenharmony_ci	&jmp	(&label("loop"));
3771cb0ef41Sopenharmony_ci
3781cb0ef41Sopenharmony_ci&set_label("loop",16);
3791cb0ef41Sopenharmony_ci	&rdtsc	();
3801cb0ef41Sopenharmony_ci	&mov	("edx","eax");		# put aside tick (yes, I neglect edx)
3811cb0ef41Sopenharmony_ci	&sub	("eax",$lasttick);	# diff
3821cb0ef41Sopenharmony_ci	&mov	($lasttick,"edx");	# lasttick = tick
3831cb0ef41Sopenharmony_ci	&mov	($lastdiff,"eax");	# lastdiff = diff
3841cb0ef41Sopenharmony_ci	&clflush(&DWP(0,$out));
3851cb0ef41Sopenharmony_ci	&data_byte(0xf0);		# lock
3861cb0ef41Sopenharmony_ci	&add	(&DWP(0,$out),"eax");	# accumulate diff
3871cb0ef41Sopenharmony_ci	&lea	($out,&DWP(4,$out));	# ++$out
3881cb0ef41Sopenharmony_ci	&sub	($cnt,1);		# --$cnt
3891cb0ef41Sopenharmony_ci	&jnz	(&label("loop"));
3901cb0ef41Sopenharmony_ci
3911cb0ef41Sopenharmony_ci	&mov	("eax",&wparam(1));
3921cb0ef41Sopenharmony_ci&set_label("nogo");
3931cb0ef41Sopenharmony_ci    }
3941cb0ef41Sopenharmony_ci&function_end("OPENSSL_instrument_bus");
3951cb0ef41Sopenharmony_ci
3961cb0ef41Sopenharmony_ci&function_begin("OPENSSL_instrument_bus2");
3971cb0ef41Sopenharmony_ci    &mov	("eax",0);
3981cb0ef41Sopenharmony_ci    if ($sse2) {
3991cb0ef41Sopenharmony_ci	&picmeup("edx","OPENSSL_ia32cap_P");
4001cb0ef41Sopenharmony_ci	&bt	(&DWP(0,"edx"),4);
4011cb0ef41Sopenharmony_ci	&jnc	(&label("nogo"));	# no TSC
4021cb0ef41Sopenharmony_ci	&bt	(&DWP(0,"edx"),19);
4031cb0ef41Sopenharmony_ci	&jnc	(&label("nogo"));	# no CLFLUSH
4041cb0ef41Sopenharmony_ci
4051cb0ef41Sopenharmony_ci	&mov	($out,&wparam(0));	# load arguments
4061cb0ef41Sopenharmony_ci	&mov	($cnt,&wparam(1));
4071cb0ef41Sopenharmony_ci	&mov	($max,&wparam(2));
4081cb0ef41Sopenharmony_ci
4091cb0ef41Sopenharmony_ci	&rdtsc	();			# collect 1st tick
4101cb0ef41Sopenharmony_ci	&mov	($lasttick,"eax");	# lasttick = tick
4111cb0ef41Sopenharmony_ci	&mov	($lastdiff,0);		# lastdiff = 0
4121cb0ef41Sopenharmony_ci
4131cb0ef41Sopenharmony_ci	&clflush(&DWP(0,$out));
4141cb0ef41Sopenharmony_ci	&data_byte(0xf0);		# lock
4151cb0ef41Sopenharmony_ci	&add	(&DWP(0,$out),$lastdiff);
4161cb0ef41Sopenharmony_ci
4171cb0ef41Sopenharmony_ci	&rdtsc	();			# collect 1st diff
4181cb0ef41Sopenharmony_ci	&mov	("edx","eax");		# put aside tick (yes, I neglect edx)
4191cb0ef41Sopenharmony_ci	&sub	("eax",$lasttick);	# diff
4201cb0ef41Sopenharmony_ci	&mov	($lasttick,"edx");	# lasttick = tick
4211cb0ef41Sopenharmony_ci	&mov	($lastdiff,"eax");	# lastdiff = diff
4221cb0ef41Sopenharmony_ci	&jmp	(&label("loop2"));
4231cb0ef41Sopenharmony_ci
4241cb0ef41Sopenharmony_ci&set_label("loop2",16);
4251cb0ef41Sopenharmony_ci	&clflush(&DWP(0,$out));
4261cb0ef41Sopenharmony_ci	&data_byte(0xf0);		# lock
4271cb0ef41Sopenharmony_ci	&add	(&DWP(0,$out),"eax");	# accumulate diff
4281cb0ef41Sopenharmony_ci
4291cb0ef41Sopenharmony_ci	&sub	($max,1);
4301cb0ef41Sopenharmony_ci	&jz	(&label("done2"));
4311cb0ef41Sopenharmony_ci
4321cb0ef41Sopenharmony_ci	&rdtsc	();
4331cb0ef41Sopenharmony_ci	&mov	("edx","eax");		# put aside tick (yes, I neglect edx)
4341cb0ef41Sopenharmony_ci	&sub	("eax",$lasttick);	# diff
4351cb0ef41Sopenharmony_ci	&mov	($lasttick,"edx");	# lasttick = tick
4361cb0ef41Sopenharmony_ci	&cmp	("eax",$lastdiff);
4371cb0ef41Sopenharmony_ci	&mov	($lastdiff,"eax");	# lastdiff = diff
4381cb0ef41Sopenharmony_ci	&mov	("edx",0);
4391cb0ef41Sopenharmony_ci	&setne	("dl");
4401cb0ef41Sopenharmony_ci	&sub	($cnt,"edx");		# conditional --$cnt
4411cb0ef41Sopenharmony_ci	&lea	($out,&DWP(0,$out,"edx",4));	# conditional ++$out
4421cb0ef41Sopenharmony_ci	&jnz	(&label("loop2"));
4431cb0ef41Sopenharmony_ci
4441cb0ef41Sopenharmony_ci&set_label("done2");
4451cb0ef41Sopenharmony_ci	&mov	("eax",&wparam(1));
4461cb0ef41Sopenharmony_ci	&sub	("eax",$cnt);
4471cb0ef41Sopenharmony_ci&set_label("nogo");
4481cb0ef41Sopenharmony_ci    }
4491cb0ef41Sopenharmony_ci&function_end("OPENSSL_instrument_bus2");
4501cb0ef41Sopenharmony_ci}
4511cb0ef41Sopenharmony_ci
4521cb0ef41Sopenharmony_cisub gen_random {
4531cb0ef41Sopenharmony_cimy $rdop = shift;
4541cb0ef41Sopenharmony_ci&function_begin_B("OPENSSL_ia32_${rdop}_bytes");
4551cb0ef41Sopenharmony_ci	&push	("edi");
4561cb0ef41Sopenharmony_ci	&push	("ebx");
4571cb0ef41Sopenharmony_ci	&xor	("eax","eax");		# return value
4581cb0ef41Sopenharmony_ci	&mov	("edi",&wparam(0));
4591cb0ef41Sopenharmony_ci	&mov	("ebx",&wparam(1));
4601cb0ef41Sopenharmony_ci
4611cb0ef41Sopenharmony_ci	&cmp	("ebx",0);
4621cb0ef41Sopenharmony_ci	&je	(&label("done"));
4631cb0ef41Sopenharmony_ci
4641cb0ef41Sopenharmony_ci	&mov	("ecx",8);
4651cb0ef41Sopenharmony_ci&set_label("loop");
4661cb0ef41Sopenharmony_ci	&${rdop}("edx");
4671cb0ef41Sopenharmony_ci	&jc	(&label("break"));
4681cb0ef41Sopenharmony_ci	&loop	(&label("loop"));
4691cb0ef41Sopenharmony_ci	&jmp	(&label("done"));
4701cb0ef41Sopenharmony_ci
4711cb0ef41Sopenharmony_ci&set_label("break",16);
4721cb0ef41Sopenharmony_ci	&cmp	("ebx",4);
4731cb0ef41Sopenharmony_ci	&jb	(&label("tail"));
4741cb0ef41Sopenharmony_ci	&mov	(&DWP(0,"edi"),"edx");
4751cb0ef41Sopenharmony_ci	&lea	("edi",&DWP(4,"edi"));
4761cb0ef41Sopenharmony_ci	&add	("eax",4);
4771cb0ef41Sopenharmony_ci	&sub	("ebx",4);
4781cb0ef41Sopenharmony_ci	&jz	(&label("done"));
4791cb0ef41Sopenharmony_ci	&mov	("ecx",8);
4801cb0ef41Sopenharmony_ci	&jmp	(&label("loop"));
4811cb0ef41Sopenharmony_ci
4821cb0ef41Sopenharmony_ci&set_label("tail",16);
4831cb0ef41Sopenharmony_ci	&mov	(&BP(0,"edi"),"dl");
4841cb0ef41Sopenharmony_ci	&lea	("edi",&DWP(1,"edi"));
4851cb0ef41Sopenharmony_ci	&inc	("eax");
4861cb0ef41Sopenharmony_ci	&shr	("edx",8);
4871cb0ef41Sopenharmony_ci	&dec	("ebx");
4881cb0ef41Sopenharmony_ci	&jnz	(&label("tail"));
4891cb0ef41Sopenharmony_ci
4901cb0ef41Sopenharmony_ci&set_label("done");
4911cb0ef41Sopenharmony_ci	&xor	("edx","edx");		# Clear random value from registers
4921cb0ef41Sopenharmony_ci	&pop	("ebx");
4931cb0ef41Sopenharmony_ci	&pop	("edi");
4941cb0ef41Sopenharmony_ci	&ret	();
4951cb0ef41Sopenharmony_ci&function_end_B("OPENSSL_ia32_${rdop}_bytes");
4961cb0ef41Sopenharmony_ci}
4971cb0ef41Sopenharmony_ci&gen_random("rdrand");
4981cb0ef41Sopenharmony_ci&gen_random("rdseed");
4991cb0ef41Sopenharmony_ci
5001cb0ef41Sopenharmony_ci&initseg("OPENSSL_cpuid_setup");
5011cb0ef41Sopenharmony_ci
5021cb0ef41Sopenharmony_ci&hidden("OPENSSL_cpuid_setup");
5031cb0ef41Sopenharmony_ci&hidden("OPENSSL_ia32cap_P");
5041cb0ef41Sopenharmony_ci
5051cb0ef41Sopenharmony_ci&asm_finish();
5061cb0ef41Sopenharmony_ci
5071cb0ef41Sopenharmony_ciclose STDOUT or die "error closing STDOUT: $!";
508