1e1051a39Sopenharmony_ci#! /usr/bin/env perl 2e1051a39Sopenharmony_ci# Copyright 2004-2020 The OpenSSL Project Authors. All Rights Reserved. 3e1051a39Sopenharmony_ci# 4e1051a39Sopenharmony_ci# Licensed under the Apache License 2.0 (the "License"). You may not use 5e1051a39Sopenharmony_ci# this file except in compliance with the License. You can obtain a copy 6e1051a39Sopenharmony_ci# in the file LICENSE in the source distribution or at 7e1051a39Sopenharmony_ci# https://www.openssl.org/source/license.html 8e1051a39Sopenharmony_ci 9e1051a39Sopenharmony_ci$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 10e1051a39Sopenharmony_cipush(@INC, "${dir}perlasm", "perlasm"); 11e1051a39Sopenharmony_cirequire "x86asm.pl"; 12e1051a39Sopenharmony_ci 13e1051a39Sopenharmony_ci$output = pop and open STDOUT,">$output"; 14e1051a39Sopenharmony_ci 15e1051a39Sopenharmony_ci&asm_init($ARGV[0]); 16e1051a39Sopenharmony_ci 17e1051a39Sopenharmony_cifor (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } 18e1051a39Sopenharmony_ci 19e1051a39Sopenharmony_ci&function_begin("OPENSSL_ia32_cpuid"); 20e1051a39Sopenharmony_ci &xor ("edx","edx"); 21e1051a39Sopenharmony_ci &pushf (); 22e1051a39Sopenharmony_ci &pop ("eax"); 23e1051a39Sopenharmony_ci &mov ("ecx","eax"); 24e1051a39Sopenharmony_ci &xor ("eax",1<<21); 25e1051a39Sopenharmony_ci &push ("eax"); 26e1051a39Sopenharmony_ci &popf (); 27e1051a39Sopenharmony_ci &pushf (); 28e1051a39Sopenharmony_ci &pop ("eax"); 29e1051a39Sopenharmony_ci &xor ("ecx","eax"); 30e1051a39Sopenharmony_ci &xor ("eax","eax"); 31e1051a39Sopenharmony_ci &mov ("esi",&wparam(0)); 32e1051a39Sopenharmony_ci &mov (&DWP(8,"esi"),"eax"); # clear extended feature flags 33e1051a39Sopenharmony_ci &bt ("ecx",21); 34e1051a39Sopenharmony_ci &jnc (&label("nocpuid")); 35e1051a39Sopenharmony_ci &cpuid (); 36e1051a39Sopenharmony_ci &mov ("edi","eax"); # max value for standard query level 37e1051a39Sopenharmony_ci 38e1051a39Sopenharmony_ci &xor ("eax","eax"); 39e1051a39Sopenharmony_ci &cmp ("ebx",0x756e6547); # "Genu" 40e1051a39Sopenharmony_ci &setne (&LB("eax")); 41e1051a39Sopenharmony_ci &mov ("ebp","eax"); 42e1051a39Sopenharmony_ci &cmp ("edx",0x49656e69); # "ineI" 43e1051a39Sopenharmony_ci &setne (&LB("eax")); 44e1051a39Sopenharmony_ci &or ("ebp","eax"); 45e1051a39Sopenharmony_ci &cmp ("ecx",0x6c65746e); # "ntel" 46e1051a39Sopenharmony_ci &setne (&LB("eax")); 47e1051a39Sopenharmony_ci &or ("ebp","eax"); # 0 indicates Intel CPU 48e1051a39Sopenharmony_ci &jz (&label("intel")); 49e1051a39Sopenharmony_ci 50e1051a39Sopenharmony_ci &cmp ("ebx",0x68747541); # "Auth" 51e1051a39Sopenharmony_ci &setne (&LB("eax")); 52e1051a39Sopenharmony_ci &mov ("esi","eax"); 53e1051a39Sopenharmony_ci &cmp ("edx",0x69746E65); # "enti" 54e1051a39Sopenharmony_ci &setne (&LB("eax")); 55e1051a39Sopenharmony_ci &or ("esi","eax"); 56e1051a39Sopenharmony_ci &cmp ("ecx",0x444D4163); # "cAMD" 57e1051a39Sopenharmony_ci &setne (&LB("eax")); 58e1051a39Sopenharmony_ci &or ("esi","eax"); # 0 indicates AMD CPU 59e1051a39Sopenharmony_ci &jnz (&label("intel")); 60e1051a39Sopenharmony_ci 61e1051a39Sopenharmony_ci # AMD specific 62e1051a39Sopenharmony_ci &mov ("eax",0x80000000); 63e1051a39Sopenharmony_ci &cpuid (); 64e1051a39Sopenharmony_ci &cmp ("eax",0x80000001); 65e1051a39Sopenharmony_ci &jb (&label("intel")); 66e1051a39Sopenharmony_ci &mov ("esi","eax"); 67e1051a39Sopenharmony_ci &mov ("eax",0x80000001); 68e1051a39Sopenharmony_ci &cpuid (); 69e1051a39Sopenharmony_ci &or ("ebp","ecx"); 70e1051a39Sopenharmony_ci &and ("ebp",1<<11|1); # isolate XOP bit 71e1051a39Sopenharmony_ci &cmp ("esi",0x80000008); 72e1051a39Sopenharmony_ci &jb (&label("intel")); 73e1051a39Sopenharmony_ci 74e1051a39Sopenharmony_ci &mov ("eax",0x80000008); 75e1051a39Sopenharmony_ci &cpuid (); 76e1051a39Sopenharmony_ci &movz ("esi",&LB("ecx")); # number of cores - 1 77e1051a39Sopenharmony_ci &inc ("esi"); # number of cores 78e1051a39Sopenharmony_ci 79e1051a39Sopenharmony_ci &mov ("eax",1); 80e1051a39Sopenharmony_ci &xor ("ecx","ecx"); 81e1051a39Sopenharmony_ci &cpuid (); 82e1051a39Sopenharmony_ci &bt ("edx",28); 83e1051a39Sopenharmony_ci &jnc (&label("generic")); 84e1051a39Sopenharmony_ci &shr ("ebx",16); 85e1051a39Sopenharmony_ci &and ("ebx",0xff); 86e1051a39Sopenharmony_ci &cmp ("ebx","esi"); 87e1051a39Sopenharmony_ci &ja (&label("generic")); 88e1051a39Sopenharmony_ci &and ("edx",0xefffffff); # clear hyper-threading bit 89e1051a39Sopenharmony_ci &jmp (&label("generic")); 90e1051a39Sopenharmony_ci 91e1051a39Sopenharmony_ci&set_label("intel"); 92e1051a39Sopenharmony_ci &cmp ("edi",4); 93e1051a39Sopenharmony_ci &mov ("esi",-1); 94e1051a39Sopenharmony_ci &jb (&label("nocacheinfo")); 95e1051a39Sopenharmony_ci 96e1051a39Sopenharmony_ci &mov ("eax",4); 97e1051a39Sopenharmony_ci &mov ("ecx",0); # query L1D 98e1051a39Sopenharmony_ci &cpuid (); 99e1051a39Sopenharmony_ci &mov ("esi","eax"); 100e1051a39Sopenharmony_ci &shr ("esi",14); 101e1051a39Sopenharmony_ci &and ("esi",0xfff); # number of cores -1 per L1D 102e1051a39Sopenharmony_ci 103e1051a39Sopenharmony_ci&set_label("nocacheinfo"); 104e1051a39Sopenharmony_ci &mov ("eax",1); 105e1051a39Sopenharmony_ci &xor ("ecx","ecx"); 106e1051a39Sopenharmony_ci &cpuid (); 107e1051a39Sopenharmony_ci &and ("edx",0xbfefffff); # force reserved bits #20, #30 to 0 108e1051a39Sopenharmony_ci &cmp ("ebp",0); 109e1051a39Sopenharmony_ci &jne (&label("notintel")); 110e1051a39Sopenharmony_ci &or ("edx",1<<30); # set reserved bit#30 on Intel CPUs 111e1051a39Sopenharmony_ci &and (&HB("eax"),15); # family ID 112e1051a39Sopenharmony_ci &cmp (&HB("eax"),15); # P4? 113e1051a39Sopenharmony_ci &jne (&label("notintel")); 114e1051a39Sopenharmony_ci &or ("edx",1<<20); # set reserved bit#20 to engage RC4_CHAR 115e1051a39Sopenharmony_ci&set_label("notintel"); 116e1051a39Sopenharmony_ci &bt ("edx",28); # test hyper-threading bit 117e1051a39Sopenharmony_ci &jnc (&label("generic")); 118e1051a39Sopenharmony_ci &and ("edx",0xefffffff); 119e1051a39Sopenharmony_ci &cmp ("esi",0); 120e1051a39Sopenharmony_ci &je (&label("generic")); 121e1051a39Sopenharmony_ci 122e1051a39Sopenharmony_ci &or ("edx",0x10000000); 123e1051a39Sopenharmony_ci &shr ("ebx",16); 124e1051a39Sopenharmony_ci &cmp (&LB("ebx"),1); 125e1051a39Sopenharmony_ci &ja (&label("generic")); 126e1051a39Sopenharmony_ci &and ("edx",0xefffffff); # clear hyper-threading bit if not 127e1051a39Sopenharmony_ci 128e1051a39Sopenharmony_ci&set_label("generic"); 129e1051a39Sopenharmony_ci &and ("ebp",1<<11); # isolate AMD XOP flag 130e1051a39Sopenharmony_ci &and ("ecx",0xfffff7ff); # force 11th bit to 0 131e1051a39Sopenharmony_ci &mov ("esi","edx"); # %ebp:%esi is copy of %ecx:%edx 132e1051a39Sopenharmony_ci &or ("ebp","ecx"); # merge AMD XOP flag 133e1051a39Sopenharmony_ci 134e1051a39Sopenharmony_ci &cmp ("edi",7); 135e1051a39Sopenharmony_ci &mov ("edi",&wparam(0)); 136e1051a39Sopenharmony_ci &jb (&label("no_extended_info")); 137e1051a39Sopenharmony_ci &mov ("eax",7); 138e1051a39Sopenharmony_ci &xor ("ecx","ecx"); 139e1051a39Sopenharmony_ci &cpuid (); 140e1051a39Sopenharmony_ci &mov (&DWP(8,"edi"),"ebx"); # save extended feature flag 141e1051a39Sopenharmony_ci&set_label("no_extended_info"); 142e1051a39Sopenharmony_ci 143e1051a39Sopenharmony_ci &bt ("ebp",27); # check OSXSAVE bit 144e1051a39Sopenharmony_ci &jnc (&label("clear_avx")); 145e1051a39Sopenharmony_ci &xor ("ecx","ecx"); 146e1051a39Sopenharmony_ci &data_byte(0x0f,0x01,0xd0); # xgetbv 147e1051a39Sopenharmony_ci &and ("eax",6); 148e1051a39Sopenharmony_ci &cmp ("eax",6); 149e1051a39Sopenharmony_ci &je (&label("done")); 150e1051a39Sopenharmony_ci &cmp ("eax",2); 151e1051a39Sopenharmony_ci &je (&label("clear_avx")); 152e1051a39Sopenharmony_ci&set_label("clear_xmm"); 153e1051a39Sopenharmony_ci &and ("ebp",0xfdfffffd); # clear AESNI and PCLMULQDQ bits 154e1051a39Sopenharmony_ci &and ("esi",0xfeffffff); # clear FXSR 155e1051a39Sopenharmony_ci&set_label("clear_avx"); 156e1051a39Sopenharmony_ci &and ("ebp",0xefffe7ff); # clear AVX, FMA and AMD XOP bits 157e1051a39Sopenharmony_ci &and (&DWP(8,"edi"),0xffffffdf); # clear AVX2 158e1051a39Sopenharmony_ci&set_label("done"); 159e1051a39Sopenharmony_ci &mov ("eax","esi"); 160e1051a39Sopenharmony_ci &mov ("edx","ebp"); 161e1051a39Sopenharmony_ci&set_label("nocpuid"); 162e1051a39Sopenharmony_ci&function_end("OPENSSL_ia32_cpuid"); 163e1051a39Sopenharmony_ci 164e1051a39Sopenharmony_ci&external_label("OPENSSL_ia32cap_P"); 165e1051a39Sopenharmony_ci 166e1051a39Sopenharmony_ci&function_begin_B("OPENSSL_rdtsc","EXTRN\t_OPENSSL_ia32cap_P:DWORD"); 167e1051a39Sopenharmony_ci &xor ("eax","eax"); 168e1051a39Sopenharmony_ci &xor ("edx","edx"); 169e1051a39Sopenharmony_ci &picmeup("ecx","OPENSSL_ia32cap_P"); 170e1051a39Sopenharmony_ci &bt (&DWP(0,"ecx"),4); 171e1051a39Sopenharmony_ci &jnc (&label("notsc")); 172e1051a39Sopenharmony_ci &rdtsc (); 173e1051a39Sopenharmony_ci&set_label("notsc"); 174e1051a39Sopenharmony_ci &ret (); 175e1051a39Sopenharmony_ci&function_end_B("OPENSSL_rdtsc"); 176e1051a39Sopenharmony_ci 177e1051a39Sopenharmony_ci# This works in Ring 0 only [read DJGPP+MS-DOS+privileged DPMI host], 178e1051a39Sopenharmony_ci# but it's safe to call it on any [supported] 32-bit platform... 179e1051a39Sopenharmony_ci# Just check for [non-]zero return value... 180e1051a39Sopenharmony_ci&function_begin_B("OPENSSL_instrument_halt","EXTRN\t_OPENSSL_ia32cap_P:DWORD"); 181e1051a39Sopenharmony_ci &picmeup("ecx","OPENSSL_ia32cap_P"); 182e1051a39Sopenharmony_ci &bt (&DWP(0,"ecx"),4); 183e1051a39Sopenharmony_ci &jnc (&label("nohalt")); # no TSC 184e1051a39Sopenharmony_ci 185e1051a39Sopenharmony_ci &data_word(0x9058900e); # push %cs; pop %eax 186e1051a39Sopenharmony_ci &and ("eax",3); 187e1051a39Sopenharmony_ci &jnz (&label("nohalt")); # not enough privileges 188e1051a39Sopenharmony_ci 189e1051a39Sopenharmony_ci &pushf (); 190e1051a39Sopenharmony_ci &pop ("eax"); 191e1051a39Sopenharmony_ci &bt ("eax",9); 192e1051a39Sopenharmony_ci &jnc (&label("nohalt")); # interrupts are disabled 193e1051a39Sopenharmony_ci 194e1051a39Sopenharmony_ci &rdtsc (); 195e1051a39Sopenharmony_ci &push ("edx"); 196e1051a39Sopenharmony_ci &push ("eax"); 197e1051a39Sopenharmony_ci &halt (); 198e1051a39Sopenharmony_ci &rdtsc (); 199e1051a39Sopenharmony_ci 200e1051a39Sopenharmony_ci &sub ("eax",&DWP(0,"esp")); 201e1051a39Sopenharmony_ci &sbb ("edx",&DWP(4,"esp")); 202e1051a39Sopenharmony_ci &add ("esp",8); 203e1051a39Sopenharmony_ci &ret (); 204e1051a39Sopenharmony_ci 205e1051a39Sopenharmony_ci&set_label("nohalt"); 206e1051a39Sopenharmony_ci &xor ("eax","eax"); 207e1051a39Sopenharmony_ci &xor ("edx","edx"); 208e1051a39Sopenharmony_ci &ret (); 209e1051a39Sopenharmony_ci&function_end_B("OPENSSL_instrument_halt"); 210e1051a39Sopenharmony_ci 211e1051a39Sopenharmony_ci# Essentially there is only one use for this function. Under DJGPP: 212e1051a39Sopenharmony_ci# 213e1051a39Sopenharmony_ci# #include <go32.h> 214e1051a39Sopenharmony_ci# ... 215e1051a39Sopenharmony_ci# i=OPENSSL_far_spin(_dos_ds,0x46c); 216e1051a39Sopenharmony_ci# ... 217e1051a39Sopenharmony_ci# to obtain the number of spins till closest timer interrupt. 218e1051a39Sopenharmony_ci 219e1051a39Sopenharmony_ci&function_begin_B("OPENSSL_far_spin"); 220e1051a39Sopenharmony_ci &pushf (); 221e1051a39Sopenharmony_ci &pop ("eax"); 222e1051a39Sopenharmony_ci &bt ("eax",9); 223e1051a39Sopenharmony_ci &jnc (&label("nospin")); # interrupts are disabled 224e1051a39Sopenharmony_ci 225e1051a39Sopenharmony_ci &mov ("eax",&DWP(4,"esp")); 226e1051a39Sopenharmony_ci &mov ("ecx",&DWP(8,"esp")); 227e1051a39Sopenharmony_ci &data_word (0x90d88e1e); # push %ds, mov %eax,%ds 228e1051a39Sopenharmony_ci &xor ("eax","eax"); 229e1051a39Sopenharmony_ci &mov ("edx",&DWP(0,"ecx")); 230e1051a39Sopenharmony_ci &jmp (&label("spin")); 231e1051a39Sopenharmony_ci 232e1051a39Sopenharmony_ci &align (16); 233e1051a39Sopenharmony_ci&set_label("spin"); 234e1051a39Sopenharmony_ci &inc ("eax"); 235e1051a39Sopenharmony_ci &cmp ("edx",&DWP(0,"ecx")); 236e1051a39Sopenharmony_ci &je (&label("spin")); 237e1051a39Sopenharmony_ci 238e1051a39Sopenharmony_ci &data_word (0x1f909090); # pop %ds 239e1051a39Sopenharmony_ci &ret (); 240e1051a39Sopenharmony_ci 241e1051a39Sopenharmony_ci&set_label("nospin"); 242e1051a39Sopenharmony_ci &xor ("eax","eax"); 243e1051a39Sopenharmony_ci &xor ("edx","edx"); 244e1051a39Sopenharmony_ci &ret (); 245e1051a39Sopenharmony_ci&function_end_B("OPENSSL_far_spin"); 246e1051a39Sopenharmony_ci 247e1051a39Sopenharmony_ci&function_begin_B("OPENSSL_wipe_cpu","EXTRN\t_OPENSSL_ia32cap_P:DWORD"); 248e1051a39Sopenharmony_ci &xor ("eax","eax"); 249e1051a39Sopenharmony_ci &xor ("edx","edx"); 250e1051a39Sopenharmony_ci &picmeup("ecx","OPENSSL_ia32cap_P"); 251e1051a39Sopenharmony_ci &mov ("ecx",&DWP(0,"ecx")); 252e1051a39Sopenharmony_ci &bt (&DWP(0,"ecx"),1); 253e1051a39Sopenharmony_ci &jnc (&label("no_x87")); 254e1051a39Sopenharmony_ci if ($sse2) { 255e1051a39Sopenharmony_ci &and ("ecx",1<<26|1<<24); # check SSE2 and FXSR bits 256e1051a39Sopenharmony_ci &cmp ("ecx",1<<26|1<<24); 257e1051a39Sopenharmony_ci &jne (&label("no_sse2")); 258e1051a39Sopenharmony_ci &pxor ("xmm0","xmm0"); 259e1051a39Sopenharmony_ci &pxor ("xmm1","xmm1"); 260e1051a39Sopenharmony_ci &pxor ("xmm2","xmm2"); 261e1051a39Sopenharmony_ci &pxor ("xmm3","xmm3"); 262e1051a39Sopenharmony_ci &pxor ("xmm4","xmm4"); 263e1051a39Sopenharmony_ci &pxor ("xmm5","xmm5"); 264e1051a39Sopenharmony_ci &pxor ("xmm6","xmm6"); 265e1051a39Sopenharmony_ci &pxor ("xmm7","xmm7"); 266e1051a39Sopenharmony_ci &set_label("no_sse2"); 267e1051a39Sopenharmony_ci } 268e1051a39Sopenharmony_ci # just a bunch of fldz to zap the fp/mm bank followed by finit... 269e1051a39Sopenharmony_ci &data_word(0xeed9eed9,0xeed9eed9,0xeed9eed9,0xeed9eed9,0x90e3db9b); 270e1051a39Sopenharmony_ci&set_label("no_x87"); 271e1051a39Sopenharmony_ci &lea ("eax",&DWP(4,"esp")); 272e1051a39Sopenharmony_ci &ret (); 273e1051a39Sopenharmony_ci&function_end_B("OPENSSL_wipe_cpu"); 274e1051a39Sopenharmony_ci 275e1051a39Sopenharmony_ci&function_begin_B("OPENSSL_atomic_add"); 276e1051a39Sopenharmony_ci &mov ("edx",&DWP(4,"esp")); # fetch the pointer, 1st arg 277e1051a39Sopenharmony_ci &mov ("ecx",&DWP(8,"esp")); # fetch the increment, 2nd arg 278e1051a39Sopenharmony_ci &push ("ebx"); 279e1051a39Sopenharmony_ci &nop (); 280e1051a39Sopenharmony_ci &mov ("eax",&DWP(0,"edx")); 281e1051a39Sopenharmony_ci&set_label("spin"); 282e1051a39Sopenharmony_ci &lea ("ebx",&DWP(0,"eax","ecx")); 283e1051a39Sopenharmony_ci &nop (); 284e1051a39Sopenharmony_ci &data_word(0x1ab10ff0); # lock; cmpxchg %ebx,(%edx) # %eax is involved and is always reloaded 285e1051a39Sopenharmony_ci &jne (&label("spin")); 286e1051a39Sopenharmony_ci &mov ("eax","ebx"); # OpenSSL expects the new value 287e1051a39Sopenharmony_ci &pop ("ebx"); 288e1051a39Sopenharmony_ci &ret (); 289e1051a39Sopenharmony_ci&function_end_B("OPENSSL_atomic_add"); 290e1051a39Sopenharmony_ci 291e1051a39Sopenharmony_ci&function_begin_B("OPENSSL_cleanse"); 292e1051a39Sopenharmony_ci &mov ("edx",&wparam(0)); 293e1051a39Sopenharmony_ci &mov ("ecx",&wparam(1)); 294e1051a39Sopenharmony_ci &xor ("eax","eax"); 295e1051a39Sopenharmony_ci &cmp ("ecx",7); 296e1051a39Sopenharmony_ci &jae (&label("lot")); 297e1051a39Sopenharmony_ci &cmp ("ecx",0); 298e1051a39Sopenharmony_ci &je (&label("ret")); 299e1051a39Sopenharmony_ci&set_label("little"); 300e1051a39Sopenharmony_ci &mov (&BP(0,"edx"),"al"); 301e1051a39Sopenharmony_ci &sub ("ecx",1); 302e1051a39Sopenharmony_ci &lea ("edx",&DWP(1,"edx")); 303e1051a39Sopenharmony_ci &jnz (&label("little")); 304e1051a39Sopenharmony_ci&set_label("ret"); 305e1051a39Sopenharmony_ci &ret (); 306e1051a39Sopenharmony_ci 307e1051a39Sopenharmony_ci&set_label("lot",16); 308e1051a39Sopenharmony_ci &test ("edx",3); 309e1051a39Sopenharmony_ci &jz (&label("aligned")); 310e1051a39Sopenharmony_ci &mov (&BP(0,"edx"),"al"); 311e1051a39Sopenharmony_ci &lea ("ecx",&DWP(-1,"ecx")); 312e1051a39Sopenharmony_ci &lea ("edx",&DWP(1,"edx")); 313e1051a39Sopenharmony_ci &jmp (&label("lot")); 314e1051a39Sopenharmony_ci&set_label("aligned"); 315e1051a39Sopenharmony_ci &mov (&DWP(0,"edx"),"eax"); 316e1051a39Sopenharmony_ci &lea ("ecx",&DWP(-4,"ecx")); 317e1051a39Sopenharmony_ci &test ("ecx",-4); 318e1051a39Sopenharmony_ci &lea ("edx",&DWP(4,"edx")); 319e1051a39Sopenharmony_ci &jnz (&label("aligned")); 320e1051a39Sopenharmony_ci &cmp ("ecx",0); 321e1051a39Sopenharmony_ci &jne (&label("little")); 322e1051a39Sopenharmony_ci &ret (); 323e1051a39Sopenharmony_ci&function_end_B("OPENSSL_cleanse"); 324e1051a39Sopenharmony_ci 325e1051a39Sopenharmony_ci&function_begin_B("CRYPTO_memcmp"); 326e1051a39Sopenharmony_ci &push ("esi"); 327e1051a39Sopenharmony_ci &push ("edi"); 328e1051a39Sopenharmony_ci &mov ("esi",&wparam(0)); 329e1051a39Sopenharmony_ci &mov ("edi",&wparam(1)); 330e1051a39Sopenharmony_ci &mov ("ecx",&wparam(2)); 331e1051a39Sopenharmony_ci &xor ("eax","eax"); 332e1051a39Sopenharmony_ci &xor ("edx","edx"); 333e1051a39Sopenharmony_ci &cmp ("ecx",0); 334e1051a39Sopenharmony_ci &je (&label("no_data")); 335e1051a39Sopenharmony_ci&set_label("loop"); 336e1051a39Sopenharmony_ci &mov ("dl",&BP(0,"esi")); 337e1051a39Sopenharmony_ci &lea ("esi",&DWP(1,"esi")); 338e1051a39Sopenharmony_ci &xor ("dl",&BP(0,"edi")); 339e1051a39Sopenharmony_ci &lea ("edi",&DWP(1,"edi")); 340e1051a39Sopenharmony_ci &or ("al","dl"); 341e1051a39Sopenharmony_ci &dec ("ecx"); 342e1051a39Sopenharmony_ci &jnz (&label("loop")); 343e1051a39Sopenharmony_ci &neg ("eax"); 344e1051a39Sopenharmony_ci &shr ("eax",31); 345e1051a39Sopenharmony_ci&set_label("no_data"); 346e1051a39Sopenharmony_ci &pop ("edi"); 347e1051a39Sopenharmony_ci &pop ("esi"); 348e1051a39Sopenharmony_ci &ret (); 349e1051a39Sopenharmony_ci&function_end_B("CRYPTO_memcmp"); 350e1051a39Sopenharmony_ci{ 351e1051a39Sopenharmony_cimy $lasttick = "esi"; 352e1051a39Sopenharmony_cimy $lastdiff = "ebx"; 353e1051a39Sopenharmony_cimy $out = "edi"; 354e1051a39Sopenharmony_cimy $cnt = "ecx"; 355e1051a39Sopenharmony_cimy $max = "ebp"; 356e1051a39Sopenharmony_ci 357e1051a39Sopenharmony_ci&function_begin("OPENSSL_instrument_bus"); 358e1051a39Sopenharmony_ci &mov ("eax",0); 359e1051a39Sopenharmony_ci if ($sse2) { 360e1051a39Sopenharmony_ci &picmeup("edx","OPENSSL_ia32cap_P"); 361e1051a39Sopenharmony_ci &bt (&DWP(0,"edx"),4); 362e1051a39Sopenharmony_ci &jnc (&label("nogo")); # no TSC 363e1051a39Sopenharmony_ci &bt (&DWP(0,"edx"),19); 364e1051a39Sopenharmony_ci &jnc (&label("nogo")); # no CLFLUSH 365e1051a39Sopenharmony_ci 366e1051a39Sopenharmony_ci &mov ($out,&wparam(0)); # load arguments 367e1051a39Sopenharmony_ci &mov ($cnt,&wparam(1)); 368e1051a39Sopenharmony_ci 369e1051a39Sopenharmony_ci # collect 1st tick 370e1051a39Sopenharmony_ci &rdtsc (); 371e1051a39Sopenharmony_ci &mov ($lasttick,"eax"); # lasttick = tick 372e1051a39Sopenharmony_ci &mov ($lastdiff,0); # lastdiff = 0 373e1051a39Sopenharmony_ci &clflush(&DWP(0,$out)); 374e1051a39Sopenharmony_ci &data_byte(0xf0); # lock 375e1051a39Sopenharmony_ci &add (&DWP(0,$out),$lastdiff); 376e1051a39Sopenharmony_ci &jmp (&label("loop")); 377e1051a39Sopenharmony_ci 378e1051a39Sopenharmony_ci&set_label("loop",16); 379e1051a39Sopenharmony_ci &rdtsc (); 380e1051a39Sopenharmony_ci &mov ("edx","eax"); # put aside tick (yes, I neglect edx) 381e1051a39Sopenharmony_ci &sub ("eax",$lasttick); # diff 382e1051a39Sopenharmony_ci &mov ($lasttick,"edx"); # lasttick = tick 383e1051a39Sopenharmony_ci &mov ($lastdiff,"eax"); # lastdiff = diff 384e1051a39Sopenharmony_ci &clflush(&DWP(0,$out)); 385e1051a39Sopenharmony_ci &data_byte(0xf0); # lock 386e1051a39Sopenharmony_ci &add (&DWP(0,$out),"eax"); # accumulate diff 387e1051a39Sopenharmony_ci &lea ($out,&DWP(4,$out)); # ++$out 388e1051a39Sopenharmony_ci &sub ($cnt,1); # --$cnt 389e1051a39Sopenharmony_ci &jnz (&label("loop")); 390e1051a39Sopenharmony_ci 391e1051a39Sopenharmony_ci &mov ("eax",&wparam(1)); 392e1051a39Sopenharmony_ci&set_label("nogo"); 393e1051a39Sopenharmony_ci } 394e1051a39Sopenharmony_ci&function_end("OPENSSL_instrument_bus"); 395e1051a39Sopenharmony_ci 396e1051a39Sopenharmony_ci&function_begin("OPENSSL_instrument_bus2"); 397e1051a39Sopenharmony_ci &mov ("eax",0); 398e1051a39Sopenharmony_ci if ($sse2) { 399e1051a39Sopenharmony_ci &picmeup("edx","OPENSSL_ia32cap_P"); 400e1051a39Sopenharmony_ci &bt (&DWP(0,"edx"),4); 401e1051a39Sopenharmony_ci &jnc (&label("nogo")); # no TSC 402e1051a39Sopenharmony_ci &bt (&DWP(0,"edx"),19); 403e1051a39Sopenharmony_ci &jnc (&label("nogo")); # no CLFLUSH 404e1051a39Sopenharmony_ci 405e1051a39Sopenharmony_ci &mov ($out,&wparam(0)); # load arguments 406e1051a39Sopenharmony_ci &mov ($cnt,&wparam(1)); 407e1051a39Sopenharmony_ci &mov ($max,&wparam(2)); 408e1051a39Sopenharmony_ci 409e1051a39Sopenharmony_ci &rdtsc (); # collect 1st tick 410e1051a39Sopenharmony_ci &mov ($lasttick,"eax"); # lasttick = tick 411e1051a39Sopenharmony_ci &mov ($lastdiff,0); # lastdiff = 0 412e1051a39Sopenharmony_ci 413e1051a39Sopenharmony_ci &clflush(&DWP(0,$out)); 414e1051a39Sopenharmony_ci &data_byte(0xf0); # lock 415e1051a39Sopenharmony_ci &add (&DWP(0,$out),$lastdiff); 416e1051a39Sopenharmony_ci 417e1051a39Sopenharmony_ci &rdtsc (); # collect 1st diff 418e1051a39Sopenharmony_ci &mov ("edx","eax"); # put aside tick (yes, I neglect edx) 419e1051a39Sopenharmony_ci &sub ("eax",$lasttick); # diff 420e1051a39Sopenharmony_ci &mov ($lasttick,"edx"); # lasttick = tick 421e1051a39Sopenharmony_ci &mov ($lastdiff,"eax"); # lastdiff = diff 422e1051a39Sopenharmony_ci &jmp (&label("loop2")); 423e1051a39Sopenharmony_ci 424e1051a39Sopenharmony_ci&set_label("loop2",16); 425e1051a39Sopenharmony_ci &clflush(&DWP(0,$out)); 426e1051a39Sopenharmony_ci &data_byte(0xf0); # lock 427e1051a39Sopenharmony_ci &add (&DWP(0,$out),"eax"); # accumulate diff 428e1051a39Sopenharmony_ci 429e1051a39Sopenharmony_ci &sub ($max,1); 430e1051a39Sopenharmony_ci &jz (&label("done2")); 431e1051a39Sopenharmony_ci 432e1051a39Sopenharmony_ci &rdtsc (); 433e1051a39Sopenharmony_ci &mov ("edx","eax"); # put aside tick (yes, I neglect edx) 434e1051a39Sopenharmony_ci &sub ("eax",$lasttick); # diff 435e1051a39Sopenharmony_ci &mov ($lasttick,"edx"); # lasttick = tick 436e1051a39Sopenharmony_ci &cmp ("eax",$lastdiff); 437e1051a39Sopenharmony_ci &mov ($lastdiff,"eax"); # lastdiff = diff 438e1051a39Sopenharmony_ci &mov ("edx",0); 439e1051a39Sopenharmony_ci &setne ("dl"); 440e1051a39Sopenharmony_ci &sub ($cnt,"edx"); # conditional --$cnt 441e1051a39Sopenharmony_ci &lea ($out,&DWP(0,$out,"edx",4)); # conditional ++$out 442e1051a39Sopenharmony_ci &jnz (&label("loop2")); 443e1051a39Sopenharmony_ci 444e1051a39Sopenharmony_ci&set_label("done2"); 445e1051a39Sopenharmony_ci &mov ("eax",&wparam(1)); 446e1051a39Sopenharmony_ci &sub ("eax",$cnt); 447e1051a39Sopenharmony_ci&set_label("nogo"); 448e1051a39Sopenharmony_ci } 449e1051a39Sopenharmony_ci&function_end("OPENSSL_instrument_bus2"); 450e1051a39Sopenharmony_ci} 451e1051a39Sopenharmony_ci 452e1051a39Sopenharmony_cisub gen_random { 453e1051a39Sopenharmony_cimy $rdop = shift; 454e1051a39Sopenharmony_ci&function_begin_B("OPENSSL_ia32_${rdop}_bytes"); 455e1051a39Sopenharmony_ci &push ("edi"); 456e1051a39Sopenharmony_ci &push ("ebx"); 457e1051a39Sopenharmony_ci &xor ("eax","eax"); # return value 458e1051a39Sopenharmony_ci &mov ("edi",&wparam(0)); 459e1051a39Sopenharmony_ci &mov ("ebx",&wparam(1)); 460e1051a39Sopenharmony_ci 461e1051a39Sopenharmony_ci &cmp ("ebx",0); 462e1051a39Sopenharmony_ci &je (&label("done")); 463e1051a39Sopenharmony_ci 464e1051a39Sopenharmony_ci &mov ("ecx",8); 465e1051a39Sopenharmony_ci&set_label("loop"); 466e1051a39Sopenharmony_ci &${rdop}("edx"); 467e1051a39Sopenharmony_ci &jc (&label("break")); 468e1051a39Sopenharmony_ci &loop (&label("loop")); 469e1051a39Sopenharmony_ci &jmp (&label("done")); 470e1051a39Sopenharmony_ci 471e1051a39Sopenharmony_ci&set_label("break",16); 472e1051a39Sopenharmony_ci &cmp ("ebx",4); 473e1051a39Sopenharmony_ci &jb (&label("tail")); 474e1051a39Sopenharmony_ci &mov (&DWP(0,"edi"),"edx"); 475e1051a39Sopenharmony_ci &lea ("edi",&DWP(4,"edi")); 476e1051a39Sopenharmony_ci &add ("eax",4); 477e1051a39Sopenharmony_ci &sub ("ebx",4); 478e1051a39Sopenharmony_ci &jz (&label("done")); 479e1051a39Sopenharmony_ci &mov ("ecx",8); 480e1051a39Sopenharmony_ci &jmp (&label("loop")); 481e1051a39Sopenharmony_ci 482e1051a39Sopenharmony_ci&set_label("tail",16); 483e1051a39Sopenharmony_ci &mov (&BP(0,"edi"),"dl"); 484e1051a39Sopenharmony_ci &lea ("edi",&DWP(1,"edi")); 485e1051a39Sopenharmony_ci &inc ("eax"); 486e1051a39Sopenharmony_ci &shr ("edx",8); 487e1051a39Sopenharmony_ci &dec ("ebx"); 488e1051a39Sopenharmony_ci &jnz (&label("tail")); 489e1051a39Sopenharmony_ci 490e1051a39Sopenharmony_ci&set_label("done"); 491e1051a39Sopenharmony_ci &xor ("edx","edx"); # Clear random value from registers 492e1051a39Sopenharmony_ci &pop ("ebx"); 493e1051a39Sopenharmony_ci &pop ("edi"); 494e1051a39Sopenharmony_ci &ret (); 495e1051a39Sopenharmony_ci&function_end_B("OPENSSL_ia32_${rdop}_bytes"); 496e1051a39Sopenharmony_ci} 497e1051a39Sopenharmony_ci&gen_random("rdrand"); 498e1051a39Sopenharmony_ci&gen_random("rdseed"); 499e1051a39Sopenharmony_ci 500e1051a39Sopenharmony_ci&initseg("OPENSSL_cpuid_setup"); 501e1051a39Sopenharmony_ci 502e1051a39Sopenharmony_ci&hidden("OPENSSL_cpuid_setup"); 503e1051a39Sopenharmony_ci&hidden("OPENSSL_ia32cap_P"); 504e1051a39Sopenharmony_ci 505e1051a39Sopenharmony_ci&asm_finish(); 506e1051a39Sopenharmony_ci 507e1051a39Sopenharmony_ciclose STDOUT or die "error closing STDOUT: $!"; 508