11cb0ef41Sopenharmony_ci#! /usr/bin/env perl 21cb0ef41Sopenharmony_ci# Copyright 2004-2020 The OpenSSL Project Authors. All Rights Reserved. 31cb0ef41Sopenharmony_ci# 41cb0ef41Sopenharmony_ci# Licensed under the Apache License 2.0 (the "License"). You may not use 51cb0ef41Sopenharmony_ci# this file except in compliance with the License. You can obtain a copy 61cb0ef41Sopenharmony_ci# in the file LICENSE in the source distribution or at 71cb0ef41Sopenharmony_ci# https://www.openssl.org/source/license.html 81cb0ef41Sopenharmony_ci 91cb0ef41Sopenharmony_ci$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 101cb0ef41Sopenharmony_cipush(@INC, "${dir}perlasm", "perlasm"); 111cb0ef41Sopenharmony_cirequire "x86asm.pl"; 121cb0ef41Sopenharmony_ci 131cb0ef41Sopenharmony_ci$output = pop and open STDOUT,">$output"; 141cb0ef41Sopenharmony_ci 151cb0ef41Sopenharmony_ci&asm_init($ARGV[0]); 161cb0ef41Sopenharmony_ci 171cb0ef41Sopenharmony_cifor (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } 181cb0ef41Sopenharmony_ci 191cb0ef41Sopenharmony_ci&function_begin("OPENSSL_ia32_cpuid"); 201cb0ef41Sopenharmony_ci &xor ("edx","edx"); 211cb0ef41Sopenharmony_ci &pushf (); 221cb0ef41Sopenharmony_ci &pop ("eax"); 231cb0ef41Sopenharmony_ci &mov ("ecx","eax"); 241cb0ef41Sopenharmony_ci &xor ("eax",1<<21); 251cb0ef41Sopenharmony_ci &push ("eax"); 261cb0ef41Sopenharmony_ci &popf (); 271cb0ef41Sopenharmony_ci &pushf (); 281cb0ef41Sopenharmony_ci &pop ("eax"); 291cb0ef41Sopenharmony_ci &xor ("ecx","eax"); 301cb0ef41Sopenharmony_ci &xor ("eax","eax"); 311cb0ef41Sopenharmony_ci &mov ("esi",&wparam(0)); 321cb0ef41Sopenharmony_ci &mov (&DWP(8,"esi"),"eax"); # clear extended feature flags 331cb0ef41Sopenharmony_ci &bt ("ecx",21); 341cb0ef41Sopenharmony_ci &jnc (&label("nocpuid")); 351cb0ef41Sopenharmony_ci &cpuid (); 361cb0ef41Sopenharmony_ci &mov ("edi","eax"); # max value for standard query level 371cb0ef41Sopenharmony_ci 381cb0ef41Sopenharmony_ci &xor ("eax","eax"); 391cb0ef41Sopenharmony_ci &cmp ("ebx",0x756e6547); # "Genu" 401cb0ef41Sopenharmony_ci &setne (&LB("eax")); 411cb0ef41Sopenharmony_ci &mov ("ebp","eax"); 421cb0ef41Sopenharmony_ci &cmp ("edx",0x49656e69); # "ineI" 431cb0ef41Sopenharmony_ci &setne (&LB("eax")); 441cb0ef41Sopenharmony_ci &or ("ebp","eax"); 451cb0ef41Sopenharmony_ci &cmp ("ecx",0x6c65746e); # "ntel" 461cb0ef41Sopenharmony_ci &setne (&LB("eax")); 471cb0ef41Sopenharmony_ci &or ("ebp","eax"); # 0 indicates Intel CPU 481cb0ef41Sopenharmony_ci &jz (&label("intel")); 491cb0ef41Sopenharmony_ci 501cb0ef41Sopenharmony_ci &cmp ("ebx",0x68747541); # "Auth" 511cb0ef41Sopenharmony_ci &setne (&LB("eax")); 521cb0ef41Sopenharmony_ci &mov ("esi","eax"); 531cb0ef41Sopenharmony_ci &cmp ("edx",0x69746E65); # "enti" 541cb0ef41Sopenharmony_ci &setne (&LB("eax")); 551cb0ef41Sopenharmony_ci &or ("esi","eax"); 561cb0ef41Sopenharmony_ci &cmp ("ecx",0x444D4163); # "cAMD" 571cb0ef41Sopenharmony_ci &setne (&LB("eax")); 581cb0ef41Sopenharmony_ci &or ("esi","eax"); # 0 indicates AMD CPU 591cb0ef41Sopenharmony_ci &jnz (&label("intel")); 601cb0ef41Sopenharmony_ci 611cb0ef41Sopenharmony_ci # AMD specific 621cb0ef41Sopenharmony_ci &mov ("eax",0x80000000); 631cb0ef41Sopenharmony_ci &cpuid (); 641cb0ef41Sopenharmony_ci &cmp ("eax",0x80000001); 651cb0ef41Sopenharmony_ci &jb (&label("intel")); 661cb0ef41Sopenharmony_ci &mov ("esi","eax"); 671cb0ef41Sopenharmony_ci &mov ("eax",0x80000001); 681cb0ef41Sopenharmony_ci &cpuid (); 691cb0ef41Sopenharmony_ci &or ("ebp","ecx"); 701cb0ef41Sopenharmony_ci &and ("ebp",1<<11|1); # isolate XOP bit 711cb0ef41Sopenharmony_ci &cmp ("esi",0x80000008); 721cb0ef41Sopenharmony_ci &jb (&label("intel")); 731cb0ef41Sopenharmony_ci 741cb0ef41Sopenharmony_ci &mov ("eax",0x80000008); 751cb0ef41Sopenharmony_ci &cpuid (); 761cb0ef41Sopenharmony_ci &movz ("esi",&LB("ecx")); # number of cores - 1 771cb0ef41Sopenharmony_ci &inc ("esi"); # number of cores 781cb0ef41Sopenharmony_ci 791cb0ef41Sopenharmony_ci &mov ("eax",1); 801cb0ef41Sopenharmony_ci &xor ("ecx","ecx"); 811cb0ef41Sopenharmony_ci &cpuid (); 821cb0ef41Sopenharmony_ci &bt ("edx",28); 831cb0ef41Sopenharmony_ci &jnc (&label("generic")); 841cb0ef41Sopenharmony_ci &shr ("ebx",16); 851cb0ef41Sopenharmony_ci &and ("ebx",0xff); 861cb0ef41Sopenharmony_ci &cmp ("ebx","esi"); 871cb0ef41Sopenharmony_ci &ja (&label("generic")); 881cb0ef41Sopenharmony_ci &and ("edx",0xefffffff); # clear hyper-threading bit 891cb0ef41Sopenharmony_ci &jmp (&label("generic")); 901cb0ef41Sopenharmony_ci 911cb0ef41Sopenharmony_ci&set_label("intel"); 921cb0ef41Sopenharmony_ci &cmp ("edi",4); 931cb0ef41Sopenharmony_ci &mov ("esi",-1); 941cb0ef41Sopenharmony_ci &jb (&label("nocacheinfo")); 951cb0ef41Sopenharmony_ci 961cb0ef41Sopenharmony_ci &mov ("eax",4); 971cb0ef41Sopenharmony_ci &mov ("ecx",0); # query L1D 981cb0ef41Sopenharmony_ci &cpuid (); 991cb0ef41Sopenharmony_ci &mov ("esi","eax"); 1001cb0ef41Sopenharmony_ci &shr ("esi",14); 1011cb0ef41Sopenharmony_ci &and ("esi",0xfff); # number of cores -1 per L1D 1021cb0ef41Sopenharmony_ci 1031cb0ef41Sopenharmony_ci&set_label("nocacheinfo"); 1041cb0ef41Sopenharmony_ci &mov ("eax",1); 1051cb0ef41Sopenharmony_ci &xor ("ecx","ecx"); 1061cb0ef41Sopenharmony_ci &cpuid (); 1071cb0ef41Sopenharmony_ci &and ("edx",0xbfefffff); # force reserved bits #20, #30 to 0 1081cb0ef41Sopenharmony_ci &cmp ("ebp",0); 1091cb0ef41Sopenharmony_ci &jne (&label("notintel")); 1101cb0ef41Sopenharmony_ci &or ("edx",1<<30); # set reserved bit#30 on Intel CPUs 1111cb0ef41Sopenharmony_ci &and (&HB("eax"),15); # family ID 1121cb0ef41Sopenharmony_ci &cmp (&HB("eax"),15); # P4? 1131cb0ef41Sopenharmony_ci &jne (&label("notintel")); 1141cb0ef41Sopenharmony_ci &or ("edx",1<<20); # set reserved bit#20 to engage RC4_CHAR 1151cb0ef41Sopenharmony_ci&set_label("notintel"); 1161cb0ef41Sopenharmony_ci &bt ("edx",28); # test hyper-threading bit 1171cb0ef41Sopenharmony_ci &jnc (&label("generic")); 1181cb0ef41Sopenharmony_ci &and ("edx",0xefffffff); 1191cb0ef41Sopenharmony_ci &cmp ("esi",0); 1201cb0ef41Sopenharmony_ci &je (&label("generic")); 1211cb0ef41Sopenharmony_ci 1221cb0ef41Sopenharmony_ci &or ("edx",0x10000000); 1231cb0ef41Sopenharmony_ci &shr ("ebx",16); 1241cb0ef41Sopenharmony_ci &cmp (&LB("ebx"),1); 1251cb0ef41Sopenharmony_ci &ja (&label("generic")); 1261cb0ef41Sopenharmony_ci &and ("edx",0xefffffff); # clear hyper-threading bit if not 1271cb0ef41Sopenharmony_ci 1281cb0ef41Sopenharmony_ci&set_label("generic"); 1291cb0ef41Sopenharmony_ci &and ("ebp",1<<11); # isolate AMD XOP flag 1301cb0ef41Sopenharmony_ci &and ("ecx",0xfffff7ff); # force 11th bit to 0 1311cb0ef41Sopenharmony_ci &mov ("esi","edx"); # %ebp:%esi is copy of %ecx:%edx 1321cb0ef41Sopenharmony_ci &or ("ebp","ecx"); # merge AMD XOP flag 1331cb0ef41Sopenharmony_ci 1341cb0ef41Sopenharmony_ci &cmp ("edi",7); 1351cb0ef41Sopenharmony_ci &mov ("edi",&wparam(0)); 1361cb0ef41Sopenharmony_ci &jb (&label("no_extended_info")); 1371cb0ef41Sopenharmony_ci &mov ("eax",7); 1381cb0ef41Sopenharmony_ci &xor ("ecx","ecx"); 1391cb0ef41Sopenharmony_ci &cpuid (); 1401cb0ef41Sopenharmony_ci &mov (&DWP(8,"edi"),"ebx"); # save extended feature flag 1411cb0ef41Sopenharmony_ci&set_label("no_extended_info"); 1421cb0ef41Sopenharmony_ci 1431cb0ef41Sopenharmony_ci &bt ("ebp",27); # check OSXSAVE bit 1441cb0ef41Sopenharmony_ci &jnc (&label("clear_avx")); 1451cb0ef41Sopenharmony_ci &xor ("ecx","ecx"); 1461cb0ef41Sopenharmony_ci &data_byte(0x0f,0x01,0xd0); # xgetbv 1471cb0ef41Sopenharmony_ci &and ("eax",6); 1481cb0ef41Sopenharmony_ci &cmp ("eax",6); 1491cb0ef41Sopenharmony_ci &je (&label("done")); 1501cb0ef41Sopenharmony_ci &cmp ("eax",2); 1511cb0ef41Sopenharmony_ci &je (&label("clear_avx")); 1521cb0ef41Sopenharmony_ci&set_label("clear_xmm"); 1531cb0ef41Sopenharmony_ci &and ("ebp",0xfdfffffd); # clear AESNI and PCLMULQDQ bits 1541cb0ef41Sopenharmony_ci &and ("esi",0xfeffffff); # clear FXSR 1551cb0ef41Sopenharmony_ci&set_label("clear_avx"); 1561cb0ef41Sopenharmony_ci &and ("ebp",0xefffe7ff); # clear AVX, FMA and AMD XOP bits 1571cb0ef41Sopenharmony_ci &and (&DWP(8,"edi"),0xffffffdf); # clear AVX2 1581cb0ef41Sopenharmony_ci&set_label("done"); 1591cb0ef41Sopenharmony_ci &mov ("eax","esi"); 1601cb0ef41Sopenharmony_ci &mov ("edx","ebp"); 1611cb0ef41Sopenharmony_ci&set_label("nocpuid"); 1621cb0ef41Sopenharmony_ci&function_end("OPENSSL_ia32_cpuid"); 1631cb0ef41Sopenharmony_ci 1641cb0ef41Sopenharmony_ci&external_label("OPENSSL_ia32cap_P"); 1651cb0ef41Sopenharmony_ci 1661cb0ef41Sopenharmony_ci&function_begin_B("OPENSSL_rdtsc","EXTRN\t_OPENSSL_ia32cap_P:DWORD"); 1671cb0ef41Sopenharmony_ci &xor ("eax","eax"); 1681cb0ef41Sopenharmony_ci &xor ("edx","edx"); 1691cb0ef41Sopenharmony_ci &picmeup("ecx","OPENSSL_ia32cap_P"); 1701cb0ef41Sopenharmony_ci &bt (&DWP(0,"ecx"),4); 1711cb0ef41Sopenharmony_ci &jnc (&label("notsc")); 1721cb0ef41Sopenharmony_ci &rdtsc (); 1731cb0ef41Sopenharmony_ci&set_label("notsc"); 1741cb0ef41Sopenharmony_ci &ret (); 1751cb0ef41Sopenharmony_ci&function_end_B("OPENSSL_rdtsc"); 1761cb0ef41Sopenharmony_ci 1771cb0ef41Sopenharmony_ci# This works in Ring 0 only [read DJGPP+MS-DOS+privileged DPMI host], 1781cb0ef41Sopenharmony_ci# but it's safe to call it on any [supported] 32-bit platform... 1791cb0ef41Sopenharmony_ci# Just check for [non-]zero return value... 1801cb0ef41Sopenharmony_ci&function_begin_B("OPENSSL_instrument_halt","EXTRN\t_OPENSSL_ia32cap_P:DWORD"); 1811cb0ef41Sopenharmony_ci &picmeup("ecx","OPENSSL_ia32cap_P"); 1821cb0ef41Sopenharmony_ci &bt (&DWP(0,"ecx"),4); 1831cb0ef41Sopenharmony_ci &jnc (&label("nohalt")); # no TSC 1841cb0ef41Sopenharmony_ci 1851cb0ef41Sopenharmony_ci &data_word(0x9058900e); # push %cs; pop %eax 1861cb0ef41Sopenharmony_ci &and ("eax",3); 1871cb0ef41Sopenharmony_ci &jnz (&label("nohalt")); # not enough privileges 1881cb0ef41Sopenharmony_ci 1891cb0ef41Sopenharmony_ci &pushf (); 1901cb0ef41Sopenharmony_ci &pop ("eax"); 1911cb0ef41Sopenharmony_ci &bt ("eax",9); 1921cb0ef41Sopenharmony_ci &jnc (&label("nohalt")); # interrupts are disabled 1931cb0ef41Sopenharmony_ci 1941cb0ef41Sopenharmony_ci &rdtsc (); 1951cb0ef41Sopenharmony_ci &push ("edx"); 1961cb0ef41Sopenharmony_ci &push ("eax"); 1971cb0ef41Sopenharmony_ci &halt (); 1981cb0ef41Sopenharmony_ci &rdtsc (); 1991cb0ef41Sopenharmony_ci 2001cb0ef41Sopenharmony_ci &sub ("eax",&DWP(0,"esp")); 2011cb0ef41Sopenharmony_ci &sbb ("edx",&DWP(4,"esp")); 2021cb0ef41Sopenharmony_ci &add ("esp",8); 2031cb0ef41Sopenharmony_ci &ret (); 2041cb0ef41Sopenharmony_ci 2051cb0ef41Sopenharmony_ci&set_label("nohalt"); 2061cb0ef41Sopenharmony_ci &xor ("eax","eax"); 2071cb0ef41Sopenharmony_ci &xor ("edx","edx"); 2081cb0ef41Sopenharmony_ci &ret (); 2091cb0ef41Sopenharmony_ci&function_end_B("OPENSSL_instrument_halt"); 2101cb0ef41Sopenharmony_ci 2111cb0ef41Sopenharmony_ci# Essentially there is only one use for this function. Under DJGPP: 2121cb0ef41Sopenharmony_ci# 2131cb0ef41Sopenharmony_ci# #include <go32.h> 2141cb0ef41Sopenharmony_ci# ... 2151cb0ef41Sopenharmony_ci# i=OPENSSL_far_spin(_dos_ds,0x46c); 2161cb0ef41Sopenharmony_ci# ... 2171cb0ef41Sopenharmony_ci# to obtain the number of spins till closest timer interrupt. 2181cb0ef41Sopenharmony_ci 2191cb0ef41Sopenharmony_ci&function_begin_B("OPENSSL_far_spin"); 2201cb0ef41Sopenharmony_ci &pushf (); 2211cb0ef41Sopenharmony_ci &pop ("eax"); 2221cb0ef41Sopenharmony_ci &bt ("eax",9); 2231cb0ef41Sopenharmony_ci &jnc (&label("nospin")); # interrupts are disabled 2241cb0ef41Sopenharmony_ci 2251cb0ef41Sopenharmony_ci &mov ("eax",&DWP(4,"esp")); 2261cb0ef41Sopenharmony_ci &mov ("ecx",&DWP(8,"esp")); 2271cb0ef41Sopenharmony_ci &data_word (0x90d88e1e); # push %ds, mov %eax,%ds 2281cb0ef41Sopenharmony_ci &xor ("eax","eax"); 2291cb0ef41Sopenharmony_ci &mov ("edx",&DWP(0,"ecx")); 2301cb0ef41Sopenharmony_ci &jmp (&label("spin")); 2311cb0ef41Sopenharmony_ci 2321cb0ef41Sopenharmony_ci &align (16); 2331cb0ef41Sopenharmony_ci&set_label("spin"); 2341cb0ef41Sopenharmony_ci &inc ("eax"); 2351cb0ef41Sopenharmony_ci &cmp ("edx",&DWP(0,"ecx")); 2361cb0ef41Sopenharmony_ci &je (&label("spin")); 2371cb0ef41Sopenharmony_ci 2381cb0ef41Sopenharmony_ci &data_word (0x1f909090); # pop %ds 2391cb0ef41Sopenharmony_ci &ret (); 2401cb0ef41Sopenharmony_ci 2411cb0ef41Sopenharmony_ci&set_label("nospin"); 2421cb0ef41Sopenharmony_ci &xor ("eax","eax"); 2431cb0ef41Sopenharmony_ci &xor ("edx","edx"); 2441cb0ef41Sopenharmony_ci &ret (); 2451cb0ef41Sopenharmony_ci&function_end_B("OPENSSL_far_spin"); 2461cb0ef41Sopenharmony_ci 2471cb0ef41Sopenharmony_ci&function_begin_B("OPENSSL_wipe_cpu","EXTRN\t_OPENSSL_ia32cap_P:DWORD"); 2481cb0ef41Sopenharmony_ci &xor ("eax","eax"); 2491cb0ef41Sopenharmony_ci &xor ("edx","edx"); 2501cb0ef41Sopenharmony_ci &picmeup("ecx","OPENSSL_ia32cap_P"); 2511cb0ef41Sopenharmony_ci &mov ("ecx",&DWP(0,"ecx")); 2521cb0ef41Sopenharmony_ci &bt (&DWP(0,"ecx"),1); 2531cb0ef41Sopenharmony_ci &jnc (&label("no_x87")); 2541cb0ef41Sopenharmony_ci if ($sse2) { 2551cb0ef41Sopenharmony_ci &and ("ecx",1<<26|1<<24); # check SSE2 and FXSR bits 2561cb0ef41Sopenharmony_ci &cmp ("ecx",1<<26|1<<24); 2571cb0ef41Sopenharmony_ci &jne (&label("no_sse2")); 2581cb0ef41Sopenharmony_ci &pxor ("xmm0","xmm0"); 2591cb0ef41Sopenharmony_ci &pxor ("xmm1","xmm1"); 2601cb0ef41Sopenharmony_ci &pxor ("xmm2","xmm2"); 2611cb0ef41Sopenharmony_ci &pxor ("xmm3","xmm3"); 2621cb0ef41Sopenharmony_ci &pxor ("xmm4","xmm4"); 2631cb0ef41Sopenharmony_ci &pxor ("xmm5","xmm5"); 2641cb0ef41Sopenharmony_ci &pxor ("xmm6","xmm6"); 2651cb0ef41Sopenharmony_ci &pxor ("xmm7","xmm7"); 2661cb0ef41Sopenharmony_ci &set_label("no_sse2"); 2671cb0ef41Sopenharmony_ci } 2681cb0ef41Sopenharmony_ci # just a bunch of fldz to zap the fp/mm bank followed by finit... 2691cb0ef41Sopenharmony_ci &data_word(0xeed9eed9,0xeed9eed9,0xeed9eed9,0xeed9eed9,0x90e3db9b); 2701cb0ef41Sopenharmony_ci&set_label("no_x87"); 2711cb0ef41Sopenharmony_ci &lea ("eax",&DWP(4,"esp")); 2721cb0ef41Sopenharmony_ci &ret (); 2731cb0ef41Sopenharmony_ci&function_end_B("OPENSSL_wipe_cpu"); 2741cb0ef41Sopenharmony_ci 2751cb0ef41Sopenharmony_ci&function_begin_B("OPENSSL_atomic_add"); 2761cb0ef41Sopenharmony_ci &mov ("edx",&DWP(4,"esp")); # fetch the pointer, 1st arg 2771cb0ef41Sopenharmony_ci &mov ("ecx",&DWP(8,"esp")); # fetch the increment, 2nd arg 2781cb0ef41Sopenharmony_ci &push ("ebx"); 2791cb0ef41Sopenharmony_ci &nop (); 2801cb0ef41Sopenharmony_ci &mov ("eax",&DWP(0,"edx")); 2811cb0ef41Sopenharmony_ci&set_label("spin"); 2821cb0ef41Sopenharmony_ci &lea ("ebx",&DWP(0,"eax","ecx")); 2831cb0ef41Sopenharmony_ci &nop (); 2841cb0ef41Sopenharmony_ci &data_word(0x1ab10ff0); # lock; cmpxchg %ebx,(%edx) # %eax is involved and is always reloaded 2851cb0ef41Sopenharmony_ci &jne (&label("spin")); 2861cb0ef41Sopenharmony_ci &mov ("eax","ebx"); # OpenSSL expects the new value 2871cb0ef41Sopenharmony_ci &pop ("ebx"); 2881cb0ef41Sopenharmony_ci &ret (); 2891cb0ef41Sopenharmony_ci&function_end_B("OPENSSL_atomic_add"); 2901cb0ef41Sopenharmony_ci 2911cb0ef41Sopenharmony_ci&function_begin_B("OPENSSL_cleanse"); 2921cb0ef41Sopenharmony_ci &mov ("edx",&wparam(0)); 2931cb0ef41Sopenharmony_ci &mov ("ecx",&wparam(1)); 2941cb0ef41Sopenharmony_ci &xor ("eax","eax"); 2951cb0ef41Sopenharmony_ci &cmp ("ecx",7); 2961cb0ef41Sopenharmony_ci &jae (&label("lot")); 2971cb0ef41Sopenharmony_ci &cmp ("ecx",0); 2981cb0ef41Sopenharmony_ci &je (&label("ret")); 2991cb0ef41Sopenharmony_ci&set_label("little"); 3001cb0ef41Sopenharmony_ci &mov (&BP(0,"edx"),"al"); 3011cb0ef41Sopenharmony_ci &sub ("ecx",1); 3021cb0ef41Sopenharmony_ci &lea ("edx",&DWP(1,"edx")); 3031cb0ef41Sopenharmony_ci &jnz (&label("little")); 3041cb0ef41Sopenharmony_ci&set_label("ret"); 3051cb0ef41Sopenharmony_ci &ret (); 3061cb0ef41Sopenharmony_ci 3071cb0ef41Sopenharmony_ci&set_label("lot",16); 3081cb0ef41Sopenharmony_ci &test ("edx",3); 3091cb0ef41Sopenharmony_ci &jz (&label("aligned")); 3101cb0ef41Sopenharmony_ci &mov (&BP(0,"edx"),"al"); 3111cb0ef41Sopenharmony_ci &lea ("ecx",&DWP(-1,"ecx")); 3121cb0ef41Sopenharmony_ci &lea ("edx",&DWP(1,"edx")); 3131cb0ef41Sopenharmony_ci &jmp (&label("lot")); 3141cb0ef41Sopenharmony_ci&set_label("aligned"); 3151cb0ef41Sopenharmony_ci &mov (&DWP(0,"edx"),"eax"); 3161cb0ef41Sopenharmony_ci &lea ("ecx",&DWP(-4,"ecx")); 3171cb0ef41Sopenharmony_ci &test ("ecx",-4); 3181cb0ef41Sopenharmony_ci &lea ("edx",&DWP(4,"edx")); 3191cb0ef41Sopenharmony_ci &jnz (&label("aligned")); 3201cb0ef41Sopenharmony_ci &cmp ("ecx",0); 3211cb0ef41Sopenharmony_ci &jne (&label("little")); 3221cb0ef41Sopenharmony_ci &ret (); 3231cb0ef41Sopenharmony_ci&function_end_B("OPENSSL_cleanse"); 3241cb0ef41Sopenharmony_ci 3251cb0ef41Sopenharmony_ci&function_begin_B("CRYPTO_memcmp"); 3261cb0ef41Sopenharmony_ci &push ("esi"); 3271cb0ef41Sopenharmony_ci &push ("edi"); 3281cb0ef41Sopenharmony_ci &mov ("esi",&wparam(0)); 3291cb0ef41Sopenharmony_ci &mov ("edi",&wparam(1)); 3301cb0ef41Sopenharmony_ci &mov ("ecx",&wparam(2)); 3311cb0ef41Sopenharmony_ci &xor ("eax","eax"); 3321cb0ef41Sopenharmony_ci &xor ("edx","edx"); 3331cb0ef41Sopenharmony_ci &cmp ("ecx",0); 3341cb0ef41Sopenharmony_ci &je (&label("no_data")); 3351cb0ef41Sopenharmony_ci&set_label("loop"); 3361cb0ef41Sopenharmony_ci &mov ("dl",&BP(0,"esi")); 3371cb0ef41Sopenharmony_ci &lea ("esi",&DWP(1,"esi")); 3381cb0ef41Sopenharmony_ci &xor ("dl",&BP(0,"edi")); 3391cb0ef41Sopenharmony_ci &lea ("edi",&DWP(1,"edi")); 3401cb0ef41Sopenharmony_ci &or ("al","dl"); 3411cb0ef41Sopenharmony_ci &dec ("ecx"); 3421cb0ef41Sopenharmony_ci &jnz (&label("loop")); 3431cb0ef41Sopenharmony_ci &neg ("eax"); 3441cb0ef41Sopenharmony_ci &shr ("eax",31); 3451cb0ef41Sopenharmony_ci&set_label("no_data"); 3461cb0ef41Sopenharmony_ci &pop ("edi"); 3471cb0ef41Sopenharmony_ci &pop ("esi"); 3481cb0ef41Sopenharmony_ci &ret (); 3491cb0ef41Sopenharmony_ci&function_end_B("CRYPTO_memcmp"); 3501cb0ef41Sopenharmony_ci{ 3511cb0ef41Sopenharmony_cimy $lasttick = "esi"; 3521cb0ef41Sopenharmony_cimy $lastdiff = "ebx"; 3531cb0ef41Sopenharmony_cimy $out = "edi"; 3541cb0ef41Sopenharmony_cimy $cnt = "ecx"; 3551cb0ef41Sopenharmony_cimy $max = "ebp"; 3561cb0ef41Sopenharmony_ci 3571cb0ef41Sopenharmony_ci&function_begin("OPENSSL_instrument_bus"); 3581cb0ef41Sopenharmony_ci &mov ("eax",0); 3591cb0ef41Sopenharmony_ci if ($sse2) { 3601cb0ef41Sopenharmony_ci &picmeup("edx","OPENSSL_ia32cap_P"); 3611cb0ef41Sopenharmony_ci &bt (&DWP(0,"edx"),4); 3621cb0ef41Sopenharmony_ci &jnc (&label("nogo")); # no TSC 3631cb0ef41Sopenharmony_ci &bt (&DWP(0,"edx"),19); 3641cb0ef41Sopenharmony_ci &jnc (&label("nogo")); # no CLFLUSH 3651cb0ef41Sopenharmony_ci 3661cb0ef41Sopenharmony_ci &mov ($out,&wparam(0)); # load arguments 3671cb0ef41Sopenharmony_ci &mov ($cnt,&wparam(1)); 3681cb0ef41Sopenharmony_ci 3691cb0ef41Sopenharmony_ci # collect 1st tick 3701cb0ef41Sopenharmony_ci &rdtsc (); 3711cb0ef41Sopenharmony_ci &mov ($lasttick,"eax"); # lasttick = tick 3721cb0ef41Sopenharmony_ci &mov ($lastdiff,0); # lastdiff = 0 3731cb0ef41Sopenharmony_ci &clflush(&DWP(0,$out)); 3741cb0ef41Sopenharmony_ci &data_byte(0xf0); # lock 3751cb0ef41Sopenharmony_ci &add (&DWP(0,$out),$lastdiff); 3761cb0ef41Sopenharmony_ci &jmp (&label("loop")); 3771cb0ef41Sopenharmony_ci 3781cb0ef41Sopenharmony_ci&set_label("loop",16); 3791cb0ef41Sopenharmony_ci &rdtsc (); 3801cb0ef41Sopenharmony_ci &mov ("edx","eax"); # put aside tick (yes, I neglect edx) 3811cb0ef41Sopenharmony_ci &sub ("eax",$lasttick); # diff 3821cb0ef41Sopenharmony_ci &mov ($lasttick,"edx"); # lasttick = tick 3831cb0ef41Sopenharmony_ci &mov ($lastdiff,"eax"); # lastdiff = diff 3841cb0ef41Sopenharmony_ci &clflush(&DWP(0,$out)); 3851cb0ef41Sopenharmony_ci &data_byte(0xf0); # lock 3861cb0ef41Sopenharmony_ci &add (&DWP(0,$out),"eax"); # accumulate diff 3871cb0ef41Sopenharmony_ci &lea ($out,&DWP(4,$out)); # ++$out 3881cb0ef41Sopenharmony_ci &sub ($cnt,1); # --$cnt 3891cb0ef41Sopenharmony_ci &jnz (&label("loop")); 3901cb0ef41Sopenharmony_ci 3911cb0ef41Sopenharmony_ci &mov ("eax",&wparam(1)); 3921cb0ef41Sopenharmony_ci&set_label("nogo"); 3931cb0ef41Sopenharmony_ci } 3941cb0ef41Sopenharmony_ci&function_end("OPENSSL_instrument_bus"); 3951cb0ef41Sopenharmony_ci 3961cb0ef41Sopenharmony_ci&function_begin("OPENSSL_instrument_bus2"); 3971cb0ef41Sopenharmony_ci &mov ("eax",0); 3981cb0ef41Sopenharmony_ci if ($sse2) { 3991cb0ef41Sopenharmony_ci &picmeup("edx","OPENSSL_ia32cap_P"); 4001cb0ef41Sopenharmony_ci &bt (&DWP(0,"edx"),4); 4011cb0ef41Sopenharmony_ci &jnc (&label("nogo")); # no TSC 4021cb0ef41Sopenharmony_ci &bt (&DWP(0,"edx"),19); 4031cb0ef41Sopenharmony_ci &jnc (&label("nogo")); # no CLFLUSH 4041cb0ef41Sopenharmony_ci 4051cb0ef41Sopenharmony_ci &mov ($out,&wparam(0)); # load arguments 4061cb0ef41Sopenharmony_ci &mov ($cnt,&wparam(1)); 4071cb0ef41Sopenharmony_ci &mov ($max,&wparam(2)); 4081cb0ef41Sopenharmony_ci 4091cb0ef41Sopenharmony_ci &rdtsc (); # collect 1st tick 4101cb0ef41Sopenharmony_ci &mov ($lasttick,"eax"); # lasttick = tick 4111cb0ef41Sopenharmony_ci &mov ($lastdiff,0); # lastdiff = 0 4121cb0ef41Sopenharmony_ci 4131cb0ef41Sopenharmony_ci &clflush(&DWP(0,$out)); 4141cb0ef41Sopenharmony_ci &data_byte(0xf0); # lock 4151cb0ef41Sopenharmony_ci &add (&DWP(0,$out),$lastdiff); 4161cb0ef41Sopenharmony_ci 4171cb0ef41Sopenharmony_ci &rdtsc (); # collect 1st diff 4181cb0ef41Sopenharmony_ci &mov ("edx","eax"); # put aside tick (yes, I neglect edx) 4191cb0ef41Sopenharmony_ci &sub ("eax",$lasttick); # diff 4201cb0ef41Sopenharmony_ci &mov ($lasttick,"edx"); # lasttick = tick 4211cb0ef41Sopenharmony_ci &mov ($lastdiff,"eax"); # lastdiff = diff 4221cb0ef41Sopenharmony_ci &jmp (&label("loop2")); 4231cb0ef41Sopenharmony_ci 4241cb0ef41Sopenharmony_ci&set_label("loop2",16); 4251cb0ef41Sopenharmony_ci &clflush(&DWP(0,$out)); 4261cb0ef41Sopenharmony_ci &data_byte(0xf0); # lock 4271cb0ef41Sopenharmony_ci &add (&DWP(0,$out),"eax"); # accumulate diff 4281cb0ef41Sopenharmony_ci 4291cb0ef41Sopenharmony_ci &sub ($max,1); 4301cb0ef41Sopenharmony_ci &jz (&label("done2")); 4311cb0ef41Sopenharmony_ci 4321cb0ef41Sopenharmony_ci &rdtsc (); 4331cb0ef41Sopenharmony_ci &mov ("edx","eax"); # put aside tick (yes, I neglect edx) 4341cb0ef41Sopenharmony_ci &sub ("eax",$lasttick); # diff 4351cb0ef41Sopenharmony_ci &mov ($lasttick,"edx"); # lasttick = tick 4361cb0ef41Sopenharmony_ci &cmp ("eax",$lastdiff); 4371cb0ef41Sopenharmony_ci &mov ($lastdiff,"eax"); # lastdiff = diff 4381cb0ef41Sopenharmony_ci &mov ("edx",0); 4391cb0ef41Sopenharmony_ci &setne ("dl"); 4401cb0ef41Sopenharmony_ci &sub ($cnt,"edx"); # conditional --$cnt 4411cb0ef41Sopenharmony_ci &lea ($out,&DWP(0,$out,"edx",4)); # conditional ++$out 4421cb0ef41Sopenharmony_ci &jnz (&label("loop2")); 4431cb0ef41Sopenharmony_ci 4441cb0ef41Sopenharmony_ci&set_label("done2"); 4451cb0ef41Sopenharmony_ci &mov ("eax",&wparam(1)); 4461cb0ef41Sopenharmony_ci &sub ("eax",$cnt); 4471cb0ef41Sopenharmony_ci&set_label("nogo"); 4481cb0ef41Sopenharmony_ci } 4491cb0ef41Sopenharmony_ci&function_end("OPENSSL_instrument_bus2"); 4501cb0ef41Sopenharmony_ci} 4511cb0ef41Sopenharmony_ci 4521cb0ef41Sopenharmony_cisub gen_random { 4531cb0ef41Sopenharmony_cimy $rdop = shift; 4541cb0ef41Sopenharmony_ci&function_begin_B("OPENSSL_ia32_${rdop}_bytes"); 4551cb0ef41Sopenharmony_ci &push ("edi"); 4561cb0ef41Sopenharmony_ci &push ("ebx"); 4571cb0ef41Sopenharmony_ci &xor ("eax","eax"); # return value 4581cb0ef41Sopenharmony_ci &mov ("edi",&wparam(0)); 4591cb0ef41Sopenharmony_ci &mov ("ebx",&wparam(1)); 4601cb0ef41Sopenharmony_ci 4611cb0ef41Sopenharmony_ci &cmp ("ebx",0); 4621cb0ef41Sopenharmony_ci &je (&label("done")); 4631cb0ef41Sopenharmony_ci 4641cb0ef41Sopenharmony_ci &mov ("ecx",8); 4651cb0ef41Sopenharmony_ci&set_label("loop"); 4661cb0ef41Sopenharmony_ci &${rdop}("edx"); 4671cb0ef41Sopenharmony_ci &jc (&label("break")); 4681cb0ef41Sopenharmony_ci &loop (&label("loop")); 4691cb0ef41Sopenharmony_ci &jmp (&label("done")); 4701cb0ef41Sopenharmony_ci 4711cb0ef41Sopenharmony_ci&set_label("break",16); 4721cb0ef41Sopenharmony_ci &cmp ("ebx",4); 4731cb0ef41Sopenharmony_ci &jb (&label("tail")); 4741cb0ef41Sopenharmony_ci &mov (&DWP(0,"edi"),"edx"); 4751cb0ef41Sopenharmony_ci &lea ("edi",&DWP(4,"edi")); 4761cb0ef41Sopenharmony_ci &add ("eax",4); 4771cb0ef41Sopenharmony_ci &sub ("ebx",4); 4781cb0ef41Sopenharmony_ci &jz (&label("done")); 4791cb0ef41Sopenharmony_ci &mov ("ecx",8); 4801cb0ef41Sopenharmony_ci &jmp (&label("loop")); 4811cb0ef41Sopenharmony_ci 4821cb0ef41Sopenharmony_ci&set_label("tail",16); 4831cb0ef41Sopenharmony_ci &mov (&BP(0,"edi"),"dl"); 4841cb0ef41Sopenharmony_ci &lea ("edi",&DWP(1,"edi")); 4851cb0ef41Sopenharmony_ci &inc ("eax"); 4861cb0ef41Sopenharmony_ci &shr ("edx",8); 4871cb0ef41Sopenharmony_ci &dec ("ebx"); 4881cb0ef41Sopenharmony_ci &jnz (&label("tail")); 4891cb0ef41Sopenharmony_ci 4901cb0ef41Sopenharmony_ci&set_label("done"); 4911cb0ef41Sopenharmony_ci &xor ("edx","edx"); # Clear random value from registers 4921cb0ef41Sopenharmony_ci &pop ("ebx"); 4931cb0ef41Sopenharmony_ci &pop ("edi"); 4941cb0ef41Sopenharmony_ci &ret (); 4951cb0ef41Sopenharmony_ci&function_end_B("OPENSSL_ia32_${rdop}_bytes"); 4961cb0ef41Sopenharmony_ci} 4971cb0ef41Sopenharmony_ci&gen_random("rdrand"); 4981cb0ef41Sopenharmony_ci&gen_random("rdseed"); 4991cb0ef41Sopenharmony_ci 5001cb0ef41Sopenharmony_ci&initseg("OPENSSL_cpuid_setup"); 5011cb0ef41Sopenharmony_ci 5021cb0ef41Sopenharmony_ci&hidden("OPENSSL_cpuid_setup"); 5031cb0ef41Sopenharmony_ci&hidden("OPENSSL_ia32cap_P"); 5041cb0ef41Sopenharmony_ci 5051cb0ef41Sopenharmony_ci&asm_finish(); 5061cb0ef41Sopenharmony_ci 5071cb0ef41Sopenharmony_ciclose STDOUT or die "error closing STDOUT: $!"; 508