1e1051a39Sopenharmony_ci#! /usr/bin/env perl 2e1051a39Sopenharmony_ci# Copyright 2012-2021 The OpenSSL Project Authors. All Rights Reserved. 3e1051a39Sopenharmony_ci# 4e1051a39Sopenharmony_ci# Licensed under the Apache License 2.0 (the "License"). You may not use 5e1051a39Sopenharmony_ci# this file except in compliance with the License. You can obtain a copy 6e1051a39Sopenharmony_ci# in the file LICENSE in the source distribution or at 7e1051a39Sopenharmony_ci# https://www.openssl.org/source/license.html 8e1051a39Sopenharmony_ci 9e1051a39Sopenharmony_ci 10e1051a39Sopenharmony_ci# ==================================================================== 11e1051a39Sopenharmony_ci# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 12e1051a39Sopenharmony_ci# project. The module is, however, dual licensed under OpenSSL and 13e1051a39Sopenharmony_ci# CRYPTOGAMS licenses depending on where you obtain it. For further 14e1051a39Sopenharmony_ci# details see http://www.openssl.org/~appro/cryptogams/. 15e1051a39Sopenharmony_ci# 16e1051a39Sopenharmony_ci# Hardware SPARC T4 support by David S. Miller. 17e1051a39Sopenharmony_ci# ==================================================================== 18e1051a39Sopenharmony_ci 19e1051a39Sopenharmony_ci# MD5 for SPARCv9, 6.9 cycles per byte on UltraSPARC, >40% faster than 20e1051a39Sopenharmony_ci# code generated by Sun C 5.2. 21e1051a39Sopenharmony_ci 22e1051a39Sopenharmony_ci# SPARC T4 MD5 hardware achieves 3.20 cycles per byte, which is 2.1x 23e1051a39Sopenharmony_ci# faster than software. Multi-process benchmark saturates at 12x 24e1051a39Sopenharmony_ci# single-process result on 8-core processor, or ~11GBps per 2.85GHz 25e1051a39Sopenharmony_ci# socket. 26e1051a39Sopenharmony_ci 27e1051a39Sopenharmony_ci# $output is the last argument if it looks like a file (it has an extension) 28e1051a39Sopenharmony_ci$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; 29e1051a39Sopenharmony_ci 30e1051a39Sopenharmony_ci$output and open STDOUT,">$output"; 31e1051a39Sopenharmony_ci 32e1051a39Sopenharmony_ciuse integer; 33e1051a39Sopenharmony_ci 34e1051a39Sopenharmony_ci($ctx,$inp,$len)=("%i0","%i1","%i2"); # input arguments 35e1051a39Sopenharmony_ci 36e1051a39Sopenharmony_ci# 64-bit values 37e1051a39Sopenharmony_ci@X=("%o0","%o1","%o2","%o3","%o4","%o5","%o7","%g1","%g2"); 38e1051a39Sopenharmony_ci$tx="%g3"; 39e1051a39Sopenharmony_ci($AB,$CD)=("%g4","%g5"); 40e1051a39Sopenharmony_ci 41e1051a39Sopenharmony_ci# 32-bit values 42e1051a39Sopenharmony_ci@V=($A,$B,$C,$D)=map("%l$_",(0..3)); 43e1051a39Sopenharmony_ci($t1,$t2,$t3,$saved_asi)=map("%l$_",(4..7)); 44e1051a39Sopenharmony_ci($shr,$shl1,$shl2)=("%i3","%i4","%i5"); 45e1051a39Sopenharmony_ci 46e1051a39Sopenharmony_cimy @K=( 0xd76aa478,0xe8c7b756,0x242070db,0xc1bdceee, 47e1051a39Sopenharmony_ci 0xf57c0faf,0x4787c62a,0xa8304613,0xfd469501, 48e1051a39Sopenharmony_ci 0x698098d8,0x8b44f7af,0xffff5bb1,0x895cd7be, 49e1051a39Sopenharmony_ci 0x6b901122,0xfd987193,0xa679438e,0x49b40821, 50e1051a39Sopenharmony_ci 51e1051a39Sopenharmony_ci 0xf61e2562,0xc040b340,0x265e5a51,0xe9b6c7aa, 52e1051a39Sopenharmony_ci 0xd62f105d,0x02441453,0xd8a1e681,0xe7d3fbc8, 53e1051a39Sopenharmony_ci 0x21e1cde6,0xc33707d6,0xf4d50d87,0x455a14ed, 54e1051a39Sopenharmony_ci 0xa9e3e905,0xfcefa3f8,0x676f02d9,0x8d2a4c8a, 55e1051a39Sopenharmony_ci 56e1051a39Sopenharmony_ci 0xfffa3942,0x8771f681,0x6d9d6122,0xfde5380c, 57e1051a39Sopenharmony_ci 0xa4beea44,0x4bdecfa9,0xf6bb4b60,0xbebfbc70, 58e1051a39Sopenharmony_ci 0x289b7ec6,0xeaa127fa,0xd4ef3085,0x04881d05, 59e1051a39Sopenharmony_ci 0xd9d4d039,0xe6db99e5,0x1fa27cf8,0xc4ac5665, 60e1051a39Sopenharmony_ci 61e1051a39Sopenharmony_ci 0xf4292244,0x432aff97,0xab9423a7,0xfc93a039, 62e1051a39Sopenharmony_ci 0x655b59c3,0x8f0ccc92,0xffeff47d,0x85845dd1, 63e1051a39Sopenharmony_ci 0x6fa87e4f,0xfe2ce6e0,0xa3014314,0x4e0811a1, 64e1051a39Sopenharmony_ci 0xf7537e82,0xbd3af235,0x2ad7d2bb,0xeb86d391, 0 ); 65e1051a39Sopenharmony_ci 66e1051a39Sopenharmony_cisub R0 { 67e1051a39Sopenharmony_ci my ($i,$a,$b,$c,$d) = @_; 68e1051a39Sopenharmony_ci my $rot = (7,12,17,22)[$i%4]; 69e1051a39Sopenharmony_ci my $j = ($i+1)/2; 70e1051a39Sopenharmony_ci 71e1051a39Sopenharmony_ci if ($i&1) { 72e1051a39Sopenharmony_ci $code.=<<___; 73e1051a39Sopenharmony_ci srlx @X[$j],$shr,@X[$j] ! align X[`$i+1`] 74e1051a39Sopenharmony_ci and $b,$t1,$t1 ! round $i 75e1051a39Sopenharmony_ci sllx @X[$j+1],$shl1,$tx 76e1051a39Sopenharmony_ci add $t2,$a,$a 77e1051a39Sopenharmony_ci sllx $tx,$shl2,$tx 78e1051a39Sopenharmony_ci xor $d,$t1,$t1 79e1051a39Sopenharmony_ci or $tx,@X[$j],@X[$j] 80e1051a39Sopenharmony_ci sethi %hi(@K[$i+1]),$t2 81e1051a39Sopenharmony_ci add $t1,$a,$a 82e1051a39Sopenharmony_ci or $t2,%lo(@K[$i+1]),$t2 83e1051a39Sopenharmony_ci sll $a,$rot,$t3 84e1051a39Sopenharmony_ci add @X[$j],$t2,$t2 ! X[`$i+1`]+K[`$i+1`] 85e1051a39Sopenharmony_ci srl $a,32-$rot,$a 86e1051a39Sopenharmony_ci add $b,$t3,$t3 87e1051a39Sopenharmony_ci xor $b,$c,$t1 88e1051a39Sopenharmony_ci add $t3,$a,$a 89e1051a39Sopenharmony_ci___ 90e1051a39Sopenharmony_ci } else { 91e1051a39Sopenharmony_ci $code.=<<___; 92e1051a39Sopenharmony_ci srlx @X[$j],32,$tx ! extract X[`2*$j+1`] 93e1051a39Sopenharmony_ci and $b,$t1,$t1 ! round $i 94e1051a39Sopenharmony_ci add $t2,$a,$a 95e1051a39Sopenharmony_ci xor $d,$t1,$t1 96e1051a39Sopenharmony_ci sethi %hi(@K[$i+1]),$t2 97e1051a39Sopenharmony_ci add $t1,$a,$a 98e1051a39Sopenharmony_ci or $t2,%lo(@K[$i+1]),$t2 99e1051a39Sopenharmony_ci sll $a,$rot,$t3 100e1051a39Sopenharmony_ci add $tx,$t2,$t2 ! X[`2*$j+1`]+K[`$i+1`] 101e1051a39Sopenharmony_ci srl $a,32-$rot,$a 102e1051a39Sopenharmony_ci add $b,$t3,$t3 103e1051a39Sopenharmony_ci xor $b,$c,$t1 104e1051a39Sopenharmony_ci add $t3,$a,$a 105e1051a39Sopenharmony_ci___ 106e1051a39Sopenharmony_ci } 107e1051a39Sopenharmony_ci} 108e1051a39Sopenharmony_ci 109e1051a39Sopenharmony_cisub R0_1 { 110e1051a39Sopenharmony_ci my ($i,$a,$b,$c,$d) = @_; 111e1051a39Sopenharmony_ci my $rot = (7,12,17,22)[$i%4]; 112e1051a39Sopenharmony_ci 113e1051a39Sopenharmony_ci$code.=<<___; 114e1051a39Sopenharmony_ci srlx @X[0],32,$tx ! extract X[1] 115e1051a39Sopenharmony_ci and $b,$t1,$t1 ! round $i 116e1051a39Sopenharmony_ci add $t2,$a,$a 117e1051a39Sopenharmony_ci xor $d,$t1,$t1 118e1051a39Sopenharmony_ci sethi %hi(@K[$i+1]),$t2 119e1051a39Sopenharmony_ci add $t1,$a,$a 120e1051a39Sopenharmony_ci or $t2,%lo(@K[$i+1]),$t2 121e1051a39Sopenharmony_ci sll $a,$rot,$t3 122e1051a39Sopenharmony_ci add $tx,$t2,$t2 ! X[1]+K[`$i+1`] 123e1051a39Sopenharmony_ci srl $a,32-$rot,$a 124e1051a39Sopenharmony_ci add $b,$t3,$t3 125e1051a39Sopenharmony_ci andn $b,$c,$t1 126e1051a39Sopenharmony_ci add $t3,$a,$a 127e1051a39Sopenharmony_ci___ 128e1051a39Sopenharmony_ci} 129e1051a39Sopenharmony_ci 130e1051a39Sopenharmony_cisub R1 { 131e1051a39Sopenharmony_ci my ($i,$a,$b,$c,$d) = @_; 132e1051a39Sopenharmony_ci my $rot = (5,9,14,20)[$i%4]; 133e1051a39Sopenharmony_ci my $j = $i<31 ? (1+5*($i+1))%16 : (5+3*($i+1))%16; 134e1051a39Sopenharmony_ci my $xi = @X[$j/2]; 135e1051a39Sopenharmony_ci 136e1051a39Sopenharmony_ci$code.=<<___ if ($j&1 && ($xi=$tx)); 137e1051a39Sopenharmony_ci srlx @X[$j/2],32,$xi ! extract X[$j] 138e1051a39Sopenharmony_ci___ 139e1051a39Sopenharmony_ci$code.=<<___; 140e1051a39Sopenharmony_ci and $b,$d,$t3 ! round $i 141e1051a39Sopenharmony_ci add $t2,$a,$a 142e1051a39Sopenharmony_ci or $t3,$t1,$t1 143e1051a39Sopenharmony_ci sethi %hi(@K[$i+1]),$t2 144e1051a39Sopenharmony_ci add $t1,$a,$a 145e1051a39Sopenharmony_ci or $t2,%lo(@K[$i+1]),$t2 146e1051a39Sopenharmony_ci sll $a,$rot,$t3 147e1051a39Sopenharmony_ci add $xi,$t2,$t2 ! X[$j]+K[`$i+1`] 148e1051a39Sopenharmony_ci srl $a,32-$rot,$a 149e1051a39Sopenharmony_ci add $b,$t3,$t3 150e1051a39Sopenharmony_ci `$i<31?"andn":"xor"` $b,$c,$t1 151e1051a39Sopenharmony_ci add $t3,$a,$a 152e1051a39Sopenharmony_ci___ 153e1051a39Sopenharmony_ci} 154e1051a39Sopenharmony_ci 155e1051a39Sopenharmony_cisub R2 { 156e1051a39Sopenharmony_ci my ($i,$a,$b,$c,$d) = @_; 157e1051a39Sopenharmony_ci my $rot = (4,11,16,23)[$i%4]; 158e1051a39Sopenharmony_ci my $j = $i<47 ? (5+3*($i+1))%16 : (0+7*($i+1))%16; 159e1051a39Sopenharmony_ci my $xi = @X[$j/2]; 160e1051a39Sopenharmony_ci 161e1051a39Sopenharmony_ci$code.=<<___ if ($j&1 && ($xi=$tx)); 162e1051a39Sopenharmony_ci srlx @X[$j/2],32,$xi ! extract X[$j] 163e1051a39Sopenharmony_ci___ 164e1051a39Sopenharmony_ci$code.=<<___; 165e1051a39Sopenharmony_ci add $t2,$a,$a ! round $i 166e1051a39Sopenharmony_ci xor $b,$t1,$t1 167e1051a39Sopenharmony_ci sethi %hi(@K[$i+1]),$t2 168e1051a39Sopenharmony_ci add $t1,$a,$a 169e1051a39Sopenharmony_ci or $t2,%lo(@K[$i+1]),$t2 170e1051a39Sopenharmony_ci sll $a,$rot,$t3 171e1051a39Sopenharmony_ci add $xi,$t2,$t2 ! X[$j]+K[`$i+1`] 172e1051a39Sopenharmony_ci srl $a,32-$rot,$a 173e1051a39Sopenharmony_ci add $b,$t3,$t3 174e1051a39Sopenharmony_ci xor $b,$c,$t1 175e1051a39Sopenharmony_ci add $t3,$a,$a 176e1051a39Sopenharmony_ci___ 177e1051a39Sopenharmony_ci} 178e1051a39Sopenharmony_ci 179e1051a39Sopenharmony_cisub R3 { 180e1051a39Sopenharmony_ci my ($i,$a,$b,$c,$d) = @_; 181e1051a39Sopenharmony_ci my $rot = (6,10,15,21)[$i%4]; 182e1051a39Sopenharmony_ci my $j = (0+7*($i+1))%16; 183e1051a39Sopenharmony_ci my $xi = @X[$j/2]; 184e1051a39Sopenharmony_ci 185e1051a39Sopenharmony_ci$code.=<<___; 186e1051a39Sopenharmony_ci add $t2,$a,$a ! round $i 187e1051a39Sopenharmony_ci___ 188e1051a39Sopenharmony_ci$code.=<<___ if ($j&1 && ($xi=$tx)); 189e1051a39Sopenharmony_ci srlx @X[$j/2],32,$xi ! extract X[$j] 190e1051a39Sopenharmony_ci___ 191e1051a39Sopenharmony_ci$code.=<<___; 192e1051a39Sopenharmony_ci orn $b,$d,$t1 193e1051a39Sopenharmony_ci sethi %hi(@K[$i+1]),$t2 194e1051a39Sopenharmony_ci xor $c,$t1,$t1 195e1051a39Sopenharmony_ci or $t2,%lo(@K[$i+1]),$t2 196e1051a39Sopenharmony_ci add $t1,$a,$a 197e1051a39Sopenharmony_ci sll $a,$rot,$t3 198e1051a39Sopenharmony_ci add $xi,$t2,$t2 ! X[$j]+K[`$i+1`] 199e1051a39Sopenharmony_ci srl $a,32-$rot,$a 200e1051a39Sopenharmony_ci add $b,$t3,$t3 201e1051a39Sopenharmony_ci add $t3,$a,$a 202e1051a39Sopenharmony_ci___ 203e1051a39Sopenharmony_ci} 204e1051a39Sopenharmony_ci 205e1051a39Sopenharmony_ci$code.=<<___; 206e1051a39Sopenharmony_ci#ifndef __ASSEMBLER__ 207e1051a39Sopenharmony_ci# define __ASSEMBLER__ 1 208e1051a39Sopenharmony_ci#endif 209e1051a39Sopenharmony_ci#include "crypto/sparc_arch.h" 210e1051a39Sopenharmony_ci 211e1051a39Sopenharmony_ci#ifdef __arch64__ 212e1051a39Sopenharmony_ci.register %g2,#scratch 213e1051a39Sopenharmony_ci.register %g3,#scratch 214e1051a39Sopenharmony_ci#endif 215e1051a39Sopenharmony_ci 216e1051a39Sopenharmony_ci.section ".text",#alloc,#execinstr 217e1051a39Sopenharmony_ci 218e1051a39Sopenharmony_ci#ifdef __PIC__ 219e1051a39Sopenharmony_ciSPARC_PIC_THUNK(%g1) 220e1051a39Sopenharmony_ci#endif 221e1051a39Sopenharmony_ci 222e1051a39Sopenharmony_ci.globl ossl_md5_block_asm_data_order 223e1051a39Sopenharmony_ci.align 32 224e1051a39Sopenharmony_ciossl_md5_block_asm_data_order: 225e1051a39Sopenharmony_ci SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5) 226e1051a39Sopenharmony_ci ld [%g1+4],%g1 ! OPENSSL_sparcv9cap_P[1] 227e1051a39Sopenharmony_ci 228e1051a39Sopenharmony_ci andcc %g1, CFR_MD5, %g0 229e1051a39Sopenharmony_ci be .Lsoftware 230e1051a39Sopenharmony_ci nop 231e1051a39Sopenharmony_ci 232e1051a39Sopenharmony_ci mov 4, %g1 233e1051a39Sopenharmony_ci andcc %o1, 0x7, %g0 234e1051a39Sopenharmony_ci lda [%o0 + %g0]0x88, %f0 ! load context 235e1051a39Sopenharmony_ci lda [%o0 + %g1]0x88, %f1 236e1051a39Sopenharmony_ci add %o0, 8, %o0 237e1051a39Sopenharmony_ci lda [%o0 + %g0]0x88, %f2 238e1051a39Sopenharmony_ci lda [%o0 + %g1]0x88, %f3 239e1051a39Sopenharmony_ci bne,pn %icc, .Lhwunaligned 240e1051a39Sopenharmony_ci sub %o0, 8, %o0 241e1051a39Sopenharmony_ci 242e1051a39Sopenharmony_ci.Lhw_loop: 243e1051a39Sopenharmony_ci ldd [%o1 + 0x00], %f8 244e1051a39Sopenharmony_ci ldd [%o1 + 0x08], %f10 245e1051a39Sopenharmony_ci ldd [%o1 + 0x10], %f12 246e1051a39Sopenharmony_ci ldd [%o1 + 0x18], %f14 247e1051a39Sopenharmony_ci ldd [%o1 + 0x20], %f16 248e1051a39Sopenharmony_ci ldd [%o1 + 0x28], %f18 249e1051a39Sopenharmony_ci ldd [%o1 + 0x30], %f20 250e1051a39Sopenharmony_ci subcc %o2, 1, %o2 ! done yet? 251e1051a39Sopenharmony_ci ldd [%o1 + 0x38], %f22 252e1051a39Sopenharmony_ci add %o1, 0x40, %o1 253e1051a39Sopenharmony_ci prefetch [%o1 + 63], 20 254e1051a39Sopenharmony_ci 255e1051a39Sopenharmony_ci .word 0x81b02800 ! MD5 256e1051a39Sopenharmony_ci 257e1051a39Sopenharmony_ci bne,pt SIZE_T_CC, .Lhw_loop 258e1051a39Sopenharmony_ci nop 259e1051a39Sopenharmony_ci 260e1051a39Sopenharmony_ci.Lhwfinish: 261e1051a39Sopenharmony_ci sta %f0, [%o0 + %g0]0x88 ! store context 262e1051a39Sopenharmony_ci sta %f1, [%o0 + %g1]0x88 263e1051a39Sopenharmony_ci add %o0, 8, %o0 264e1051a39Sopenharmony_ci sta %f2, [%o0 + %g0]0x88 265e1051a39Sopenharmony_ci sta %f3, [%o0 + %g1]0x88 266e1051a39Sopenharmony_ci retl 267e1051a39Sopenharmony_ci nop 268e1051a39Sopenharmony_ci 269e1051a39Sopenharmony_ci.align 8 270e1051a39Sopenharmony_ci.Lhwunaligned: 271e1051a39Sopenharmony_ci alignaddr %o1, %g0, %o1 272e1051a39Sopenharmony_ci 273e1051a39Sopenharmony_ci ldd [%o1 + 0x00], %f10 274e1051a39Sopenharmony_ci.Lhwunaligned_loop: 275e1051a39Sopenharmony_ci ldd [%o1 + 0x08], %f12 276e1051a39Sopenharmony_ci ldd [%o1 + 0x10], %f14 277e1051a39Sopenharmony_ci ldd [%o1 + 0x18], %f16 278e1051a39Sopenharmony_ci ldd [%o1 + 0x20], %f18 279e1051a39Sopenharmony_ci ldd [%o1 + 0x28], %f20 280e1051a39Sopenharmony_ci ldd [%o1 + 0x30], %f22 281e1051a39Sopenharmony_ci ldd [%o1 + 0x38], %f24 282e1051a39Sopenharmony_ci subcc %o2, 1, %o2 ! done yet? 283e1051a39Sopenharmony_ci ldd [%o1 + 0x40], %f26 284e1051a39Sopenharmony_ci add %o1, 0x40, %o1 285e1051a39Sopenharmony_ci prefetch [%o1 + 63], 20 286e1051a39Sopenharmony_ci 287e1051a39Sopenharmony_ci faligndata %f10, %f12, %f8 288e1051a39Sopenharmony_ci faligndata %f12, %f14, %f10 289e1051a39Sopenharmony_ci faligndata %f14, %f16, %f12 290e1051a39Sopenharmony_ci faligndata %f16, %f18, %f14 291e1051a39Sopenharmony_ci faligndata %f18, %f20, %f16 292e1051a39Sopenharmony_ci faligndata %f20, %f22, %f18 293e1051a39Sopenharmony_ci faligndata %f22, %f24, %f20 294e1051a39Sopenharmony_ci faligndata %f24, %f26, %f22 295e1051a39Sopenharmony_ci 296e1051a39Sopenharmony_ci .word 0x81b02800 ! MD5 297e1051a39Sopenharmony_ci 298e1051a39Sopenharmony_ci bne,pt SIZE_T_CC, .Lhwunaligned_loop 299e1051a39Sopenharmony_ci for %f26, %f26, %f10 ! %f10=%f26 300e1051a39Sopenharmony_ci 301e1051a39Sopenharmony_ci ba .Lhwfinish 302e1051a39Sopenharmony_ci nop 303e1051a39Sopenharmony_ci 304e1051a39Sopenharmony_ci.align 16 305e1051a39Sopenharmony_ci.Lsoftware: 306e1051a39Sopenharmony_ci save %sp,-STACK_FRAME,%sp 307e1051a39Sopenharmony_ci 308e1051a39Sopenharmony_ci rd %asi,$saved_asi 309e1051a39Sopenharmony_ci wr %g0,0x88,%asi ! ASI_PRIMARY_LITTLE 310e1051a39Sopenharmony_ci and $inp,7,$shr 311e1051a39Sopenharmony_ci andn $inp,7,$inp 312e1051a39Sopenharmony_ci 313e1051a39Sopenharmony_ci sll $shr,3,$shr ! *=8 314e1051a39Sopenharmony_ci mov 56,$shl2 315e1051a39Sopenharmony_ci ld [$ctx+0],$A 316e1051a39Sopenharmony_ci sub $shl2,$shr,$shl2 317e1051a39Sopenharmony_ci ld [$ctx+4],$B 318e1051a39Sopenharmony_ci and $shl2,32,$shl1 319e1051a39Sopenharmony_ci add $shl2,8,$shl2 320e1051a39Sopenharmony_ci ld [$ctx+8],$C 321e1051a39Sopenharmony_ci sub $shl2,$shl1,$shl2 ! shr+shl1+shl2==64 322e1051a39Sopenharmony_ci ld [$ctx+12],$D 323e1051a39Sopenharmony_ci nop 324e1051a39Sopenharmony_ci 325e1051a39Sopenharmony_ci.Loop: 326e1051a39Sopenharmony_ci cmp $shr,0 ! was inp aligned? 327e1051a39Sopenharmony_ci ldxa [$inp+0]%asi,@X[0] ! load little-endian input 328e1051a39Sopenharmony_ci ldxa [$inp+8]%asi,@X[1] 329e1051a39Sopenharmony_ci ldxa [$inp+16]%asi,@X[2] 330e1051a39Sopenharmony_ci ldxa [$inp+24]%asi,@X[3] 331e1051a39Sopenharmony_ci ldxa [$inp+32]%asi,@X[4] 332e1051a39Sopenharmony_ci sllx $A,32,$AB ! pack A,B 333e1051a39Sopenharmony_ci ldxa [$inp+40]%asi,@X[5] 334e1051a39Sopenharmony_ci sllx $C,32,$CD ! pack C,D 335e1051a39Sopenharmony_ci ldxa [$inp+48]%asi,@X[6] 336e1051a39Sopenharmony_ci or $B,$AB,$AB 337e1051a39Sopenharmony_ci ldxa [$inp+56]%asi,@X[7] 338e1051a39Sopenharmony_ci or $D,$CD,$CD 339e1051a39Sopenharmony_ci bnz,a,pn %icc,.+8 340e1051a39Sopenharmony_ci ldxa [$inp+64]%asi,@X[8] 341e1051a39Sopenharmony_ci 342e1051a39Sopenharmony_ci srlx @X[0],$shr,@X[0] ! align X[0] 343e1051a39Sopenharmony_ci sllx @X[1],$shl1,$tx 344e1051a39Sopenharmony_ci sethi %hi(@K[0]),$t2 345e1051a39Sopenharmony_ci sllx $tx,$shl2,$tx 346e1051a39Sopenharmony_ci or $t2,%lo(@K[0]),$t2 347e1051a39Sopenharmony_ci or $tx,@X[0],@X[0] 348e1051a39Sopenharmony_ci xor $C,$D,$t1 349e1051a39Sopenharmony_ci add @X[0],$t2,$t2 ! X[0]+K[0] 350e1051a39Sopenharmony_ci___ 351e1051a39Sopenharmony_ci for ($i=0;$i<15;$i++) { &R0($i,@V); unshift(@V,pop(@V)); } 352e1051a39Sopenharmony_ci for (;$i<16;$i++) { &R0_1($i,@V); unshift(@V,pop(@V)); } 353e1051a39Sopenharmony_ci for (;$i<32;$i++) { &R1($i,@V); unshift(@V,pop(@V)); } 354e1051a39Sopenharmony_ci for (;$i<48;$i++) { &R2($i,@V); unshift(@V,pop(@V)); } 355e1051a39Sopenharmony_ci for (;$i<64;$i++) { &R3($i,@V); unshift(@V,pop(@V)); } 356e1051a39Sopenharmony_ci$code.=<<___; 357e1051a39Sopenharmony_ci srlx $AB,32,$t1 ! unpack A,B,C,D and accumulate 358e1051a39Sopenharmony_ci add $inp,64,$inp ! advance inp 359e1051a39Sopenharmony_ci srlx $CD,32,$t2 360e1051a39Sopenharmony_ci add $t1,$A,$A 361e1051a39Sopenharmony_ci subcc $len,1,$len ! done yet? 362e1051a39Sopenharmony_ci add $AB,$B,$B 363e1051a39Sopenharmony_ci add $t2,$C,$C 364e1051a39Sopenharmony_ci add $CD,$D,$D 365e1051a39Sopenharmony_ci srl $B,0,$B ! clruw $B 366e1051a39Sopenharmony_ci bne SIZE_T_CC,.Loop 367e1051a39Sopenharmony_ci srl $D,0,$D ! clruw $D 368e1051a39Sopenharmony_ci 369e1051a39Sopenharmony_ci st $A,[$ctx+0] ! write out ctx 370e1051a39Sopenharmony_ci st $B,[$ctx+4] 371e1051a39Sopenharmony_ci st $C,[$ctx+8] 372e1051a39Sopenharmony_ci st $D,[$ctx+12] 373e1051a39Sopenharmony_ci 374e1051a39Sopenharmony_ci wr %g0,$saved_asi,%asi 375e1051a39Sopenharmony_ci ret 376e1051a39Sopenharmony_ci restore 377e1051a39Sopenharmony_ci.type ossl_md5_block_asm_data_order,#function 378e1051a39Sopenharmony_ci.size ossl_md5_block_asm_data_order,(.-ossl_md5_block_asm_data_order) 379e1051a39Sopenharmony_ci 380e1051a39Sopenharmony_ci.asciz "MD5 block transform for SPARCv9, CRYPTOGAMS by <appro\@openssl.org>" 381e1051a39Sopenharmony_ci.align 4 382e1051a39Sopenharmony_ci___ 383e1051a39Sopenharmony_ci 384e1051a39Sopenharmony_ci# Purpose of these subroutines is to explicitly encode VIS instructions, 385e1051a39Sopenharmony_ci# so that one can compile the module without having to specify VIS 386e1051a39Sopenharmony_ci# extensions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a. 387e1051a39Sopenharmony_ci# Idea is to reserve for option to produce "universal" binary and let 388e1051a39Sopenharmony_ci# programmer detect if current CPU is VIS capable at run-time. 389e1051a39Sopenharmony_cisub unvis { 390e1051a39Sopenharmony_cimy ($mnemonic,$rs1,$rs2,$rd)=@_; 391e1051a39Sopenharmony_cimy $ref,$opf; 392e1051a39Sopenharmony_cimy %visopf = ( "faligndata" => 0x048, 393e1051a39Sopenharmony_ci "for" => 0x07c ); 394e1051a39Sopenharmony_ci 395e1051a39Sopenharmony_ci $ref = "$mnemonic\t$rs1,$rs2,$rd"; 396e1051a39Sopenharmony_ci 397e1051a39Sopenharmony_ci if ($opf=$visopf{$mnemonic}) { 398e1051a39Sopenharmony_ci foreach ($rs1,$rs2,$rd) { 399e1051a39Sopenharmony_ci return $ref if (!/%f([0-9]{1,2})/); 400e1051a39Sopenharmony_ci $_=$1; 401e1051a39Sopenharmony_ci if ($1>=32) { 402e1051a39Sopenharmony_ci return $ref if ($1&1); 403e1051a39Sopenharmony_ci # re-encode for upper double register addressing 404e1051a39Sopenharmony_ci $_=($1|$1>>5)&31; 405e1051a39Sopenharmony_ci } 406e1051a39Sopenharmony_ci } 407e1051a39Sopenharmony_ci 408e1051a39Sopenharmony_ci return sprintf ".word\t0x%08x !%s", 409e1051a39Sopenharmony_ci 0x81b00000|$rd<<25|$rs1<<14|$opf<<5|$rs2, 410e1051a39Sopenharmony_ci $ref; 411e1051a39Sopenharmony_ci } else { 412e1051a39Sopenharmony_ci return $ref; 413e1051a39Sopenharmony_ci } 414e1051a39Sopenharmony_ci} 415e1051a39Sopenharmony_cisub unalignaddr { 416e1051a39Sopenharmony_cimy ($mnemonic,$rs1,$rs2,$rd)=@_; 417e1051a39Sopenharmony_cimy %bias = ( "g" => 0, "o" => 8, "l" => 16, "i" => 24 ); 418e1051a39Sopenharmony_cimy $ref="$mnemonic\t$rs1,$rs2,$rd"; 419e1051a39Sopenharmony_ci 420e1051a39Sopenharmony_ci foreach ($rs1,$rs2,$rd) { 421e1051a39Sopenharmony_ci if (/%([goli])([0-7])/) { $_=$bias{$1}+$2; } 422e1051a39Sopenharmony_ci else { return $ref; } 423e1051a39Sopenharmony_ci } 424e1051a39Sopenharmony_ci return sprintf ".word\t0x%08x !%s", 425e1051a39Sopenharmony_ci 0x81b00300|$rd<<25|$rs1<<14|$rs2, 426e1051a39Sopenharmony_ci $ref; 427e1051a39Sopenharmony_ci} 428e1051a39Sopenharmony_ci 429e1051a39Sopenharmony_ciforeach (split("\n",$code)) { 430e1051a39Sopenharmony_ci s/\`([^\`]*)\`/eval $1/ge; 431e1051a39Sopenharmony_ci 432e1051a39Sopenharmony_ci s/\b(f[^\s]*)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})/ 433e1051a39Sopenharmony_ci &unvis($1,$2,$3,$4) 434e1051a39Sopenharmony_ci /ge; 435e1051a39Sopenharmony_ci s/\b(alignaddr)\s+(%[goli][0-7]),\s*(%[goli][0-7]),\s*(%[goli][0-7])/ 436e1051a39Sopenharmony_ci &unalignaddr($1,$2,$3,$4) 437e1051a39Sopenharmony_ci /ge; 438e1051a39Sopenharmony_ci 439e1051a39Sopenharmony_ci print $_,"\n"; 440e1051a39Sopenharmony_ci} 441e1051a39Sopenharmony_ci 442e1051a39Sopenharmony_ciclose STDOUT or die "error closing STDOUT: $!"; 443