1e1051a39Sopenharmony_ci#! /usr/bin/env perl 2e1051a39Sopenharmony_ci# Author: Marc Bevand <bevand_m (at) epita.fr> 3e1051a39Sopenharmony_ci# Copyright 2005-2020 The OpenSSL Project Authors. All Rights Reserved. 4e1051a39Sopenharmony_ci# 5e1051a39Sopenharmony_ci# Licensed under the Apache License 2.0 (the "License"). You may not use 6e1051a39Sopenharmony_ci# this file except in compliance with the License. You can obtain a copy 7e1051a39Sopenharmony_ci# in the file LICENSE in the source distribution or at 8e1051a39Sopenharmony_ci# https://www.openssl.org/source/license.html 9e1051a39Sopenharmony_ci 10e1051a39Sopenharmony_ci# MD5 optimized for AMD64. 11e1051a39Sopenharmony_ci 12e1051a39Sopenharmony_ciuse strict; 13e1051a39Sopenharmony_ci 14e1051a39Sopenharmony_cimy $code; 15e1051a39Sopenharmony_ci 16e1051a39Sopenharmony_ci# round1_step() does: 17e1051a39Sopenharmony_ci# dst = x + ((dst + F(x,y,z) + X[k] + T_i) <<< s) 18e1051a39Sopenharmony_ci# %r10d = X[k_next] 19e1051a39Sopenharmony_ci# %r11d = z' (copy of z for the next step) 20e1051a39Sopenharmony_ci# Each round1_step() takes about 5.3 clocks (9 instructions, 1.7 IPC) 21e1051a39Sopenharmony_cisub round1_step 22e1051a39Sopenharmony_ci{ 23e1051a39Sopenharmony_ci my ($pos, $dst, $x, $y, $z, $k_next, $T_i, $s) = @_; 24e1051a39Sopenharmony_ci $code .= " mov 0*4(%rsi), %r10d /* (NEXT STEP) X[0] */\n" if ($pos == -1); 25e1051a39Sopenharmony_ci $code .= " mov %edx, %r11d /* (NEXT STEP) z' = %edx */\n" if ($pos == -1); 26e1051a39Sopenharmony_ci $code .= <<EOF; 27e1051a39Sopenharmony_ci xor $y, %r11d /* y ^ ... */ 28e1051a39Sopenharmony_ci lea $T_i($dst,%r10d),$dst /* Const + dst + ... */ 29e1051a39Sopenharmony_ci and $x, %r11d /* x & ... */ 30e1051a39Sopenharmony_ci mov $k_next*4(%rsi),%r10d /* (NEXT STEP) X[$k_next] */ 31e1051a39Sopenharmony_ci xor $z, %r11d /* z ^ ... */ 32e1051a39Sopenharmony_ci add %r11d, $dst /* dst += ... */ 33e1051a39Sopenharmony_ci rol \$$s, $dst /* dst <<< s */ 34e1051a39Sopenharmony_ci mov $y, %r11d /* (NEXT STEP) z' = $y */ 35e1051a39Sopenharmony_ci add $x, $dst /* dst += x */ 36e1051a39Sopenharmony_ciEOF 37e1051a39Sopenharmony_ci} 38e1051a39Sopenharmony_ci 39e1051a39Sopenharmony_ci# round2_step() does: 40e1051a39Sopenharmony_ci# dst = x + ((dst + G(x,y,z) + X[k] + T_i) <<< s) 41e1051a39Sopenharmony_ci# %r10d = X[k_next] 42e1051a39Sopenharmony_ci# %r11d = z' (copy of z for the next step) 43e1051a39Sopenharmony_ci# %r12d = z' (copy of z for the next step) 44e1051a39Sopenharmony_ci# Each round2_step() takes about 5.4 clocks (11 instructions, 2.0 IPC) 45e1051a39Sopenharmony_cisub round2_step 46e1051a39Sopenharmony_ci{ 47e1051a39Sopenharmony_ci my ($pos, $dst, $x, $y, $z, $k_next, $T_i, $s) = @_; 48e1051a39Sopenharmony_ci $code .= " mov %edx, %r11d /* (NEXT STEP) z' = %edx */\n" if ($pos == -1); 49e1051a39Sopenharmony_ci $code .= " mov %edx, %r12d /* (NEXT STEP) z' = %edx */\n" if ($pos == -1); 50e1051a39Sopenharmony_ci $code .= <<EOF; 51e1051a39Sopenharmony_ci not %r11d /* not z */ 52e1051a39Sopenharmony_ci and $x, %r12d /* x & z */ 53e1051a39Sopenharmony_ci lea $T_i($dst,%r10d),$dst /* Const + dst + ... */ 54e1051a39Sopenharmony_ci and $y, %r11d /* y & (not z) */ 55e1051a39Sopenharmony_ci mov $k_next*4(%rsi),%r10d /* (NEXT STEP) X[$k_next] */ 56e1051a39Sopenharmony_ci or %r11d, %r12d /* (y & (not z)) | (x & z) */ 57e1051a39Sopenharmony_ci mov $y, %r11d /* (NEXT STEP) z' = $y */ 58e1051a39Sopenharmony_ci add %r12d, $dst /* dst += ... */ 59e1051a39Sopenharmony_ci mov $y, %r12d /* (NEXT STEP) z' = $y */ 60e1051a39Sopenharmony_ci rol \$$s, $dst /* dst <<< s */ 61e1051a39Sopenharmony_ci add $x, $dst /* dst += x */ 62e1051a39Sopenharmony_ciEOF 63e1051a39Sopenharmony_ci} 64e1051a39Sopenharmony_ci 65e1051a39Sopenharmony_ci# round3_step() does: 66e1051a39Sopenharmony_ci# dst = x + ((dst + H(x,y,z) + X[k] + T_i) <<< s) 67e1051a39Sopenharmony_ci# %r10d = X[k_next] 68e1051a39Sopenharmony_ci# %r11d = y' (copy of y for the next step) 69e1051a39Sopenharmony_ci# Each round3_step() takes about 4.2 clocks (8 instructions, 1.9 IPC) 70e1051a39Sopenharmony_ci{ my $round3_alter=0; 71e1051a39Sopenharmony_cisub round3_step 72e1051a39Sopenharmony_ci{ 73e1051a39Sopenharmony_ci my ($pos, $dst, $x, $y, $z, $k_next, $T_i, $s) = @_; 74e1051a39Sopenharmony_ci $code .= " mov %ecx, %r11d /* (NEXT STEP) y' = %ecx */\n" if ($pos == -1); 75e1051a39Sopenharmony_ci $code .= <<EOF; 76e1051a39Sopenharmony_ci lea $T_i($dst,%r10d),$dst /* Const + dst + ... */ 77e1051a39Sopenharmony_ci xor $z, %r11d /* z ^ ... */ 78e1051a39Sopenharmony_ci mov $k_next*4(%rsi),%r10d /* (NEXT STEP) X[$k_next] */ 79e1051a39Sopenharmony_ci xor $x, %r11d /* x ^ ... */ 80e1051a39Sopenharmony_ci add %r11d, $dst /* dst += ... */ 81e1051a39Sopenharmony_ciEOF 82e1051a39Sopenharmony_ci $code .= <<EOF if ($round3_alter); 83e1051a39Sopenharmony_ci rol \$$s, $dst /* dst <<< s */ 84e1051a39Sopenharmony_ci mov $x, %r11d /* (NEXT STEP) y' = $x */ 85e1051a39Sopenharmony_ciEOF 86e1051a39Sopenharmony_ci $code .= <<EOF if (!$round3_alter); 87e1051a39Sopenharmony_ci mov $x, %r11d /* (NEXT STEP) y' = $x */ 88e1051a39Sopenharmony_ci rol \$$s, $dst /* dst <<< s */ 89e1051a39Sopenharmony_ciEOF 90e1051a39Sopenharmony_ci $code .= <<EOF; 91e1051a39Sopenharmony_ci add $x, $dst /* dst += x */ 92e1051a39Sopenharmony_ciEOF 93e1051a39Sopenharmony_ci $round3_alter^=1; 94e1051a39Sopenharmony_ci} 95e1051a39Sopenharmony_ci} 96e1051a39Sopenharmony_ci 97e1051a39Sopenharmony_ci# round4_step() does: 98e1051a39Sopenharmony_ci# dst = x + ((dst + I(x,y,z) + X[k] + T_i) <<< s) 99e1051a39Sopenharmony_ci# %r10d = X[k_next] 100e1051a39Sopenharmony_ci# %r11d = not z' (copy of not z for the next step) 101e1051a39Sopenharmony_ci# Each round4_step() takes about 5.2 clocks (9 instructions, 1.7 IPC) 102e1051a39Sopenharmony_cisub round4_step 103e1051a39Sopenharmony_ci{ 104e1051a39Sopenharmony_ci my ($pos, $dst, $x, $y, $z, $k_next, $T_i, $s) = @_; 105e1051a39Sopenharmony_ci $code .= " mov \$0xffffffff, %r11d\n" if ($pos == -1); 106e1051a39Sopenharmony_ci $code .= " xor %edx, %r11d /* (NEXT STEP) not z' = not %edx*/\n" 107e1051a39Sopenharmony_ci if ($pos == -1); 108e1051a39Sopenharmony_ci $code .= <<EOF; 109e1051a39Sopenharmony_ci lea $T_i($dst,%r10d),$dst /* Const + dst + ... */ 110e1051a39Sopenharmony_ci or $x, %r11d /* x | ... */ 111e1051a39Sopenharmony_ci mov $k_next*4(%rsi),%r10d /* (NEXT STEP) X[$k_next] */ 112e1051a39Sopenharmony_ci xor $y, %r11d /* y ^ ... */ 113e1051a39Sopenharmony_ci add %r11d, $dst /* dst += ... */ 114e1051a39Sopenharmony_ci mov \$0xffffffff, %r11d 115e1051a39Sopenharmony_ci rol \$$s, $dst /* dst <<< s */ 116e1051a39Sopenharmony_ci xor $y, %r11d /* (NEXT STEP) not z' = not $y */ 117e1051a39Sopenharmony_ci add $x, $dst /* dst += x */ 118e1051a39Sopenharmony_ciEOF 119e1051a39Sopenharmony_ci} 120e1051a39Sopenharmony_ci 121e1051a39Sopenharmony_cino warnings qw(uninitialized); 122e1051a39Sopenharmony_ci# $output is the last argument if it looks like a file (it has an extension) 123e1051a39Sopenharmony_ci# $flavour is the first argument if it doesn't look like a file 124e1051a39Sopenharmony_cimy $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; 125e1051a39Sopenharmony_cimy $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; 126e1051a39Sopenharmony_ci 127e1051a39Sopenharmony_cimy $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); 128e1051a39Sopenharmony_ci 129e1051a39Sopenharmony_ci$0 =~ m/(.*[\/\\])[^\/\\]+$/; my $dir=$1; my $xlate; 130e1051a39Sopenharmony_ci( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or 131e1051a39Sopenharmony_ci( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or 132e1051a39Sopenharmony_cidie "can't locate x86_64-xlate.pl"; 133e1051a39Sopenharmony_ci 134e1051a39Sopenharmony_ciopen OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"" 135e1051a39Sopenharmony_ci or die "can't call $xlate: $!"; 136e1051a39Sopenharmony_ci*STDOUT=*OUT; 137e1051a39Sopenharmony_ci 138e1051a39Sopenharmony_ci$code .= <<EOF; 139e1051a39Sopenharmony_ci.text 140e1051a39Sopenharmony_ci.align 16 141e1051a39Sopenharmony_ci 142e1051a39Sopenharmony_ci.globl ossl_md5_block_asm_data_order 143e1051a39Sopenharmony_ci.type ossl_md5_block_asm_data_order,\@function,3 144e1051a39Sopenharmony_ciossl_md5_block_asm_data_order: 145e1051a39Sopenharmony_ci.cfi_startproc 146e1051a39Sopenharmony_ci push %rbp 147e1051a39Sopenharmony_ci.cfi_push %rbp 148e1051a39Sopenharmony_ci push %rbx 149e1051a39Sopenharmony_ci.cfi_push %rbx 150e1051a39Sopenharmony_ci push %r12 151e1051a39Sopenharmony_ci.cfi_push %r12 152e1051a39Sopenharmony_ci push %r14 153e1051a39Sopenharmony_ci.cfi_push %r14 154e1051a39Sopenharmony_ci push %r15 155e1051a39Sopenharmony_ci.cfi_push %r15 156e1051a39Sopenharmony_ci.Lprologue: 157e1051a39Sopenharmony_ci 158e1051a39Sopenharmony_ci # rdi = arg #1 (ctx, MD5_CTX pointer) 159e1051a39Sopenharmony_ci # rsi = arg #2 (ptr, data pointer) 160e1051a39Sopenharmony_ci # rdx = arg #3 (nbr, number of 16-word blocks to process) 161e1051a39Sopenharmony_ci mov %rdi, %rbp # rbp = ctx 162e1051a39Sopenharmony_ci shl \$6, %rdx # rdx = nbr in bytes 163e1051a39Sopenharmony_ci lea (%rsi,%rdx), %rdi # rdi = end 164e1051a39Sopenharmony_ci mov 0*4(%rbp), %eax # eax = ctx->A 165e1051a39Sopenharmony_ci mov 1*4(%rbp), %ebx # ebx = ctx->B 166e1051a39Sopenharmony_ci mov 2*4(%rbp), %ecx # ecx = ctx->C 167e1051a39Sopenharmony_ci mov 3*4(%rbp), %edx # edx = ctx->D 168e1051a39Sopenharmony_ci # end is 'rdi' 169e1051a39Sopenharmony_ci # ptr is 'rsi' 170e1051a39Sopenharmony_ci # A is 'eax' 171e1051a39Sopenharmony_ci # B is 'ebx' 172e1051a39Sopenharmony_ci # C is 'ecx' 173e1051a39Sopenharmony_ci # D is 'edx' 174e1051a39Sopenharmony_ci 175e1051a39Sopenharmony_ci cmp %rdi, %rsi # cmp end with ptr 176e1051a39Sopenharmony_ci je .Lend # jmp if ptr == end 177e1051a39Sopenharmony_ci 178e1051a39Sopenharmony_ci # BEGIN of loop over 16-word blocks 179e1051a39Sopenharmony_ci.Lloop: # save old values of A, B, C, D 180e1051a39Sopenharmony_ci mov %eax, %r8d 181e1051a39Sopenharmony_ci mov %ebx, %r9d 182e1051a39Sopenharmony_ci mov %ecx, %r14d 183e1051a39Sopenharmony_ci mov %edx, %r15d 184e1051a39Sopenharmony_ciEOF 185e1051a39Sopenharmony_ciround1_step(-1,'%eax','%ebx','%ecx','%edx', '1','0xd76aa478', '7'); 186e1051a39Sopenharmony_ciround1_step( 0,'%edx','%eax','%ebx','%ecx', '2','0xe8c7b756','12'); 187e1051a39Sopenharmony_ciround1_step( 0,'%ecx','%edx','%eax','%ebx', '3','0x242070db','17'); 188e1051a39Sopenharmony_ciround1_step( 0,'%ebx','%ecx','%edx','%eax', '4','0xc1bdceee','22'); 189e1051a39Sopenharmony_ciround1_step( 0,'%eax','%ebx','%ecx','%edx', '5','0xf57c0faf', '7'); 190e1051a39Sopenharmony_ciround1_step( 0,'%edx','%eax','%ebx','%ecx', '6','0x4787c62a','12'); 191e1051a39Sopenharmony_ciround1_step( 0,'%ecx','%edx','%eax','%ebx', '7','0xa8304613','17'); 192e1051a39Sopenharmony_ciround1_step( 0,'%ebx','%ecx','%edx','%eax', '8','0xfd469501','22'); 193e1051a39Sopenharmony_ciround1_step( 0,'%eax','%ebx','%ecx','%edx', '9','0x698098d8', '7'); 194e1051a39Sopenharmony_ciround1_step( 0,'%edx','%eax','%ebx','%ecx','10','0x8b44f7af','12'); 195e1051a39Sopenharmony_ciround1_step( 0,'%ecx','%edx','%eax','%ebx','11','0xffff5bb1','17'); 196e1051a39Sopenharmony_ciround1_step( 0,'%ebx','%ecx','%edx','%eax','12','0x895cd7be','22'); 197e1051a39Sopenharmony_ciround1_step( 0,'%eax','%ebx','%ecx','%edx','13','0x6b901122', '7'); 198e1051a39Sopenharmony_ciround1_step( 0,'%edx','%eax','%ebx','%ecx','14','0xfd987193','12'); 199e1051a39Sopenharmony_ciround1_step( 0,'%ecx','%edx','%eax','%ebx','15','0xa679438e','17'); 200e1051a39Sopenharmony_ciround1_step( 1,'%ebx','%ecx','%edx','%eax', '1','0x49b40821','22'); 201e1051a39Sopenharmony_ci 202e1051a39Sopenharmony_ciround2_step(-1,'%eax','%ebx','%ecx','%edx', '6','0xf61e2562', '5'); 203e1051a39Sopenharmony_ciround2_step( 0,'%edx','%eax','%ebx','%ecx','11','0xc040b340', '9'); 204e1051a39Sopenharmony_ciround2_step( 0,'%ecx','%edx','%eax','%ebx', '0','0x265e5a51','14'); 205e1051a39Sopenharmony_ciround2_step( 0,'%ebx','%ecx','%edx','%eax', '5','0xe9b6c7aa','20'); 206e1051a39Sopenharmony_ciround2_step( 0,'%eax','%ebx','%ecx','%edx','10','0xd62f105d', '5'); 207e1051a39Sopenharmony_ciround2_step( 0,'%edx','%eax','%ebx','%ecx','15', '0x2441453', '9'); 208e1051a39Sopenharmony_ciround2_step( 0,'%ecx','%edx','%eax','%ebx', '4','0xd8a1e681','14'); 209e1051a39Sopenharmony_ciround2_step( 0,'%ebx','%ecx','%edx','%eax', '9','0xe7d3fbc8','20'); 210e1051a39Sopenharmony_ciround2_step( 0,'%eax','%ebx','%ecx','%edx','14','0x21e1cde6', '5'); 211e1051a39Sopenharmony_ciround2_step( 0,'%edx','%eax','%ebx','%ecx', '3','0xc33707d6', '9'); 212e1051a39Sopenharmony_ciround2_step( 0,'%ecx','%edx','%eax','%ebx', '8','0xf4d50d87','14'); 213e1051a39Sopenharmony_ciround2_step( 0,'%ebx','%ecx','%edx','%eax','13','0x455a14ed','20'); 214e1051a39Sopenharmony_ciround2_step( 0,'%eax','%ebx','%ecx','%edx', '2','0xa9e3e905', '5'); 215e1051a39Sopenharmony_ciround2_step( 0,'%edx','%eax','%ebx','%ecx', '7','0xfcefa3f8', '9'); 216e1051a39Sopenharmony_ciround2_step( 0,'%ecx','%edx','%eax','%ebx','12','0x676f02d9','14'); 217e1051a39Sopenharmony_ciround2_step( 1,'%ebx','%ecx','%edx','%eax', '5','0x8d2a4c8a','20'); 218e1051a39Sopenharmony_ci 219e1051a39Sopenharmony_ciround3_step(-1,'%eax','%ebx','%ecx','%edx', '8','0xfffa3942', '4'); 220e1051a39Sopenharmony_ciround3_step( 0,'%edx','%eax','%ebx','%ecx','11','0x8771f681','11'); 221e1051a39Sopenharmony_ciround3_step( 0,'%ecx','%edx','%eax','%ebx','14','0x6d9d6122','16'); 222e1051a39Sopenharmony_ciround3_step( 0,'%ebx','%ecx','%edx','%eax', '1','0xfde5380c','23'); 223e1051a39Sopenharmony_ciround3_step( 0,'%eax','%ebx','%ecx','%edx', '4','0xa4beea44', '4'); 224e1051a39Sopenharmony_ciround3_step( 0,'%edx','%eax','%ebx','%ecx', '7','0x4bdecfa9','11'); 225e1051a39Sopenharmony_ciround3_step( 0,'%ecx','%edx','%eax','%ebx','10','0xf6bb4b60','16'); 226e1051a39Sopenharmony_ciround3_step( 0,'%ebx','%ecx','%edx','%eax','13','0xbebfbc70','23'); 227e1051a39Sopenharmony_ciround3_step( 0,'%eax','%ebx','%ecx','%edx', '0','0x289b7ec6', '4'); 228e1051a39Sopenharmony_ciround3_step( 0,'%edx','%eax','%ebx','%ecx', '3','0xeaa127fa','11'); 229e1051a39Sopenharmony_ciround3_step( 0,'%ecx','%edx','%eax','%ebx', '6','0xd4ef3085','16'); 230e1051a39Sopenharmony_ciround3_step( 0,'%ebx','%ecx','%edx','%eax', '9', '0x4881d05','23'); 231e1051a39Sopenharmony_ciround3_step( 0,'%eax','%ebx','%ecx','%edx','12','0xd9d4d039', '4'); 232e1051a39Sopenharmony_ciround3_step( 0,'%edx','%eax','%ebx','%ecx','15','0xe6db99e5','11'); 233e1051a39Sopenharmony_ciround3_step( 0,'%ecx','%edx','%eax','%ebx', '2','0x1fa27cf8','16'); 234e1051a39Sopenharmony_ciround3_step( 1,'%ebx','%ecx','%edx','%eax', '0','0xc4ac5665','23'); 235e1051a39Sopenharmony_ci 236e1051a39Sopenharmony_ciround4_step(-1,'%eax','%ebx','%ecx','%edx', '7','0xf4292244', '6'); 237e1051a39Sopenharmony_ciround4_step( 0,'%edx','%eax','%ebx','%ecx','14','0x432aff97','10'); 238e1051a39Sopenharmony_ciround4_step( 0,'%ecx','%edx','%eax','%ebx', '5','0xab9423a7','15'); 239e1051a39Sopenharmony_ciround4_step( 0,'%ebx','%ecx','%edx','%eax','12','0xfc93a039','21'); 240e1051a39Sopenharmony_ciround4_step( 0,'%eax','%ebx','%ecx','%edx', '3','0x655b59c3', '6'); 241e1051a39Sopenharmony_ciround4_step( 0,'%edx','%eax','%ebx','%ecx','10','0x8f0ccc92','10'); 242e1051a39Sopenharmony_ciround4_step( 0,'%ecx','%edx','%eax','%ebx', '1','0xffeff47d','15'); 243e1051a39Sopenharmony_ciround4_step( 0,'%ebx','%ecx','%edx','%eax', '8','0x85845dd1','21'); 244e1051a39Sopenharmony_ciround4_step( 0,'%eax','%ebx','%ecx','%edx','15','0x6fa87e4f', '6'); 245e1051a39Sopenharmony_ciround4_step( 0,'%edx','%eax','%ebx','%ecx', '6','0xfe2ce6e0','10'); 246e1051a39Sopenharmony_ciround4_step( 0,'%ecx','%edx','%eax','%ebx','13','0xa3014314','15'); 247e1051a39Sopenharmony_ciround4_step( 0,'%ebx','%ecx','%edx','%eax', '4','0x4e0811a1','21'); 248e1051a39Sopenharmony_ciround4_step( 0,'%eax','%ebx','%ecx','%edx','11','0xf7537e82', '6'); 249e1051a39Sopenharmony_ciround4_step( 0,'%edx','%eax','%ebx','%ecx', '2','0xbd3af235','10'); 250e1051a39Sopenharmony_ciround4_step( 0,'%ecx','%edx','%eax','%ebx', '9','0x2ad7d2bb','15'); 251e1051a39Sopenharmony_ciround4_step( 1,'%ebx','%ecx','%edx','%eax', '0','0xeb86d391','21'); 252e1051a39Sopenharmony_ci$code .= <<EOF; 253e1051a39Sopenharmony_ci # add old values of A, B, C, D 254e1051a39Sopenharmony_ci add %r8d, %eax 255e1051a39Sopenharmony_ci add %r9d, %ebx 256e1051a39Sopenharmony_ci add %r14d, %ecx 257e1051a39Sopenharmony_ci add %r15d, %edx 258e1051a39Sopenharmony_ci 259e1051a39Sopenharmony_ci # loop control 260e1051a39Sopenharmony_ci add \$64, %rsi # ptr += 64 261e1051a39Sopenharmony_ci cmp %rdi, %rsi # cmp end with ptr 262e1051a39Sopenharmony_ci jb .Lloop # jmp if ptr < end 263e1051a39Sopenharmony_ci # END of loop over 16-word blocks 264e1051a39Sopenharmony_ci 265e1051a39Sopenharmony_ci.Lend: 266e1051a39Sopenharmony_ci mov %eax, 0*4(%rbp) # ctx->A = A 267e1051a39Sopenharmony_ci mov %ebx, 1*4(%rbp) # ctx->B = B 268e1051a39Sopenharmony_ci mov %ecx, 2*4(%rbp) # ctx->C = C 269e1051a39Sopenharmony_ci mov %edx, 3*4(%rbp) # ctx->D = D 270e1051a39Sopenharmony_ci 271e1051a39Sopenharmony_ci mov (%rsp),%r15 272e1051a39Sopenharmony_ci.cfi_restore %r15 273e1051a39Sopenharmony_ci mov 8(%rsp),%r14 274e1051a39Sopenharmony_ci.cfi_restore %r14 275e1051a39Sopenharmony_ci mov 16(%rsp),%r12 276e1051a39Sopenharmony_ci.cfi_restore %r12 277e1051a39Sopenharmony_ci mov 24(%rsp),%rbx 278e1051a39Sopenharmony_ci.cfi_restore %rbx 279e1051a39Sopenharmony_ci mov 32(%rsp),%rbp 280e1051a39Sopenharmony_ci.cfi_restore %rbp 281e1051a39Sopenharmony_ci add \$40,%rsp 282e1051a39Sopenharmony_ci.cfi_adjust_cfa_offset -40 283e1051a39Sopenharmony_ci.Lepilogue: 284e1051a39Sopenharmony_ci ret 285e1051a39Sopenharmony_ci.cfi_endproc 286e1051a39Sopenharmony_ci.size ossl_md5_block_asm_data_order,.-ossl_md5_block_asm_data_order 287e1051a39Sopenharmony_ciEOF 288e1051a39Sopenharmony_ci 289e1051a39Sopenharmony_ci# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, 290e1051a39Sopenharmony_ci# CONTEXT *context,DISPATCHER_CONTEXT *disp) 291e1051a39Sopenharmony_ciif ($win64) { 292e1051a39Sopenharmony_cimy $rec="%rcx"; 293e1051a39Sopenharmony_cimy $frame="%rdx"; 294e1051a39Sopenharmony_cimy $context="%r8"; 295e1051a39Sopenharmony_cimy $disp="%r9"; 296e1051a39Sopenharmony_ci 297e1051a39Sopenharmony_ci$code.=<<___; 298e1051a39Sopenharmony_ci.extern __imp_RtlVirtualUnwind 299e1051a39Sopenharmony_ci.type se_handler,\@abi-omnipotent 300e1051a39Sopenharmony_ci.align 16 301e1051a39Sopenharmony_cise_handler: 302e1051a39Sopenharmony_ci push %rsi 303e1051a39Sopenharmony_ci push %rdi 304e1051a39Sopenharmony_ci push %rbx 305e1051a39Sopenharmony_ci push %rbp 306e1051a39Sopenharmony_ci push %r12 307e1051a39Sopenharmony_ci push %r13 308e1051a39Sopenharmony_ci push %r14 309e1051a39Sopenharmony_ci push %r15 310e1051a39Sopenharmony_ci pushfq 311e1051a39Sopenharmony_ci sub \$64,%rsp 312e1051a39Sopenharmony_ci 313e1051a39Sopenharmony_ci mov 120($context),%rax # pull context->Rax 314e1051a39Sopenharmony_ci mov 248($context),%rbx # pull context->Rip 315e1051a39Sopenharmony_ci 316e1051a39Sopenharmony_ci lea .Lprologue(%rip),%r10 317e1051a39Sopenharmony_ci cmp %r10,%rbx # context->Rip<.Lprologue 318e1051a39Sopenharmony_ci jb .Lin_prologue 319e1051a39Sopenharmony_ci 320e1051a39Sopenharmony_ci mov 152($context),%rax # pull context->Rsp 321e1051a39Sopenharmony_ci 322e1051a39Sopenharmony_ci lea .Lepilogue(%rip),%r10 323e1051a39Sopenharmony_ci cmp %r10,%rbx # context->Rip>=.Lepilogue 324e1051a39Sopenharmony_ci jae .Lin_prologue 325e1051a39Sopenharmony_ci 326e1051a39Sopenharmony_ci lea 40(%rax),%rax 327e1051a39Sopenharmony_ci 328e1051a39Sopenharmony_ci mov -8(%rax),%rbp 329e1051a39Sopenharmony_ci mov -16(%rax),%rbx 330e1051a39Sopenharmony_ci mov -24(%rax),%r12 331e1051a39Sopenharmony_ci mov -32(%rax),%r14 332e1051a39Sopenharmony_ci mov -40(%rax),%r15 333e1051a39Sopenharmony_ci mov %rbx,144($context) # restore context->Rbx 334e1051a39Sopenharmony_ci mov %rbp,160($context) # restore context->Rbp 335e1051a39Sopenharmony_ci mov %r12,216($context) # restore context->R12 336e1051a39Sopenharmony_ci mov %r14,232($context) # restore context->R14 337e1051a39Sopenharmony_ci mov %r15,240($context) # restore context->R15 338e1051a39Sopenharmony_ci 339e1051a39Sopenharmony_ci.Lin_prologue: 340e1051a39Sopenharmony_ci mov 8(%rax),%rdi 341e1051a39Sopenharmony_ci mov 16(%rax),%rsi 342e1051a39Sopenharmony_ci mov %rax,152($context) # restore context->Rsp 343e1051a39Sopenharmony_ci mov %rsi,168($context) # restore context->Rsi 344e1051a39Sopenharmony_ci mov %rdi,176($context) # restore context->Rdi 345e1051a39Sopenharmony_ci 346e1051a39Sopenharmony_ci mov 40($disp),%rdi # disp->ContextRecord 347e1051a39Sopenharmony_ci mov $context,%rsi # context 348e1051a39Sopenharmony_ci mov \$154,%ecx # sizeof(CONTEXT) 349e1051a39Sopenharmony_ci .long 0xa548f3fc # cld; rep movsq 350e1051a39Sopenharmony_ci 351e1051a39Sopenharmony_ci mov $disp,%rsi 352e1051a39Sopenharmony_ci xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER 353e1051a39Sopenharmony_ci mov 8(%rsi),%rdx # arg2, disp->ImageBase 354e1051a39Sopenharmony_ci mov 0(%rsi),%r8 # arg3, disp->ControlPc 355e1051a39Sopenharmony_ci mov 16(%rsi),%r9 # arg4, disp->FunctionEntry 356e1051a39Sopenharmony_ci mov 40(%rsi),%r10 # disp->ContextRecord 357e1051a39Sopenharmony_ci lea 56(%rsi),%r11 # &disp->HandlerData 358e1051a39Sopenharmony_ci lea 24(%rsi),%r12 # &disp->EstablisherFrame 359e1051a39Sopenharmony_ci mov %r10,32(%rsp) # arg5 360e1051a39Sopenharmony_ci mov %r11,40(%rsp) # arg6 361e1051a39Sopenharmony_ci mov %r12,48(%rsp) # arg7 362e1051a39Sopenharmony_ci mov %rcx,56(%rsp) # arg8, (NULL) 363e1051a39Sopenharmony_ci call *__imp_RtlVirtualUnwind(%rip) 364e1051a39Sopenharmony_ci 365e1051a39Sopenharmony_ci mov \$1,%eax # ExceptionContinueSearch 366e1051a39Sopenharmony_ci add \$64,%rsp 367e1051a39Sopenharmony_ci popfq 368e1051a39Sopenharmony_ci pop %r15 369e1051a39Sopenharmony_ci pop %r14 370e1051a39Sopenharmony_ci pop %r13 371e1051a39Sopenharmony_ci pop %r12 372e1051a39Sopenharmony_ci pop %rbp 373e1051a39Sopenharmony_ci pop %rbx 374e1051a39Sopenharmony_ci pop %rdi 375e1051a39Sopenharmony_ci pop %rsi 376e1051a39Sopenharmony_ci ret 377e1051a39Sopenharmony_ci.size se_handler,.-se_handler 378e1051a39Sopenharmony_ci 379e1051a39Sopenharmony_ci.section .pdata 380e1051a39Sopenharmony_ci.align 4 381e1051a39Sopenharmony_ci .rva .LSEH_begin_ossl_md5_block_asm_data_order 382e1051a39Sopenharmony_ci .rva .LSEH_end_ossl_md5_block_asm_data_order 383e1051a39Sopenharmony_ci .rva .LSEH_info_ossl_md5_block_asm_data_order 384e1051a39Sopenharmony_ci 385e1051a39Sopenharmony_ci.section .xdata 386e1051a39Sopenharmony_ci.align 8 387e1051a39Sopenharmony_ci.LSEH_info_ossl_md5_block_asm_data_order: 388e1051a39Sopenharmony_ci .byte 9,0,0,0 389e1051a39Sopenharmony_ci .rva se_handler 390e1051a39Sopenharmony_ci___ 391e1051a39Sopenharmony_ci} 392e1051a39Sopenharmony_ci 393e1051a39Sopenharmony_ciprint $code; 394e1051a39Sopenharmony_ci 395e1051a39Sopenharmony_ciclose STDOUT or die "error closing STDOUT: $!"; 396