11cb0ef41Sopenharmony_ci#! /usr/bin/env perl
21cb0ef41Sopenharmony_ci# Copyright 2004-2016 The OpenSSL Project Authors. All Rights Reserved.
31cb0ef41Sopenharmony_ci#
41cb0ef41Sopenharmony_ci# Licensed under the Apache License 2.0 (the "License").  You may not use
51cb0ef41Sopenharmony_ci# this file except in compliance with the License.  You can obtain a copy
61cb0ef41Sopenharmony_ci# in the file LICENSE in the source distribution or at
71cb0ef41Sopenharmony_ci# https://www.openssl.org/source/license.html
81cb0ef41Sopenharmony_ci
91cb0ef41Sopenharmony_ci#
101cb0ef41Sopenharmony_ci# ====================================================================
111cb0ef41Sopenharmony_ci# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
121cb0ef41Sopenharmony_ci# project. The module is, however, dual licensed under OpenSSL and
131cb0ef41Sopenharmony_ci# CRYPTOGAMS licenses depending on where you obtain it. For further
141cb0ef41Sopenharmony_ci# details see http://www.openssl.org/~appro/cryptogams/.
151cb0ef41Sopenharmony_ci# ====================================================================
161cb0ef41Sopenharmony_ci#
171cb0ef41Sopenharmony_ci# Eternal question is what's wrong with compiler generated code? The
181cb0ef41Sopenharmony_ci# trick is that it's possible to reduce the number of shifts required
191cb0ef41Sopenharmony_ci# to perform rotations by maintaining copy of 32-bit value in upper
201cb0ef41Sopenharmony_ci# bits of 64-bit register. Just follow mux2 and shrp instructions...
211cb0ef41Sopenharmony_ci# Performance under big-endian OS such as HP-UX is 179MBps*1GHz, which
221cb0ef41Sopenharmony_ci# is >50% better than HP C and >2x better than gcc.
231cb0ef41Sopenharmony_ci
241cb0ef41Sopenharmony_ci# $output is the last argument if it looks like a file (it has an extension)
251cb0ef41Sopenharmony_ci$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
261cb0ef41Sopenharmony_ci
271cb0ef41Sopenharmony_ci$code=<<___;
281cb0ef41Sopenharmony_ci.ident  \"sha1-ia64.s, version 1.3\"
291cb0ef41Sopenharmony_ci.ident  \"IA-64 ISA artwork by Andy Polyakov <appro\@fy.chalmers.se>\"
301cb0ef41Sopenharmony_ci.explicit
311cb0ef41Sopenharmony_ci
321cb0ef41Sopenharmony_ci___
331cb0ef41Sopenharmony_ci
341cb0ef41Sopenharmony_ci
351cb0ef41Sopenharmony_ciif ($^O eq "hpux") {
361cb0ef41Sopenharmony_ci    $ADDP="addp4";
371cb0ef41Sopenharmony_ci    for (@ARGV) { $ADDP="add" if (/[\+DD|\-mlp]64/); }
381cb0ef41Sopenharmony_ci} else { $ADDP="add"; }
391cb0ef41Sopenharmony_ci
401cb0ef41Sopenharmony_ci#$human=1;
411cb0ef41Sopenharmony_ciif ($human) {	# useful for visual code auditing...
421cb0ef41Sopenharmony_ci	($A,$B,$C,$D,$E)   = ("A","B","C","D","E");
431cb0ef41Sopenharmony_ci	($h0,$h1,$h2,$h3,$h4) = ("h0","h1","h2","h3","h4");
441cb0ef41Sopenharmony_ci	($K_00_19, $K_20_39, $K_40_59, $K_60_79) =
451cb0ef41Sopenharmony_ci	    (	"K_00_19","K_20_39","K_40_59","K_60_79"	);
461cb0ef41Sopenharmony_ci	@X= (	"X0", "X1", "X2", "X3", "X4", "X5", "X6", "X7",
471cb0ef41Sopenharmony_ci		"X8", "X9","X10","X11","X12","X13","X14","X15"	);
481cb0ef41Sopenharmony_ci}
491cb0ef41Sopenharmony_cielse {
501cb0ef41Sopenharmony_ci	($A,$B,$C,$D,$E)   =    ("loc0","loc1","loc2","loc3","loc4");
511cb0ef41Sopenharmony_ci	($h0,$h1,$h2,$h3,$h4) = ("loc5","loc6","loc7","loc8","loc9");
521cb0ef41Sopenharmony_ci	($K_00_19, $K_20_39, $K_40_59, $K_60_79) =
531cb0ef41Sopenharmony_ci	    (	"r14", "r15", "loc10", "loc11"	);
541cb0ef41Sopenharmony_ci	@X= (	"r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
551cb0ef41Sopenharmony_ci		"r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31"	);
561cb0ef41Sopenharmony_ci}
571cb0ef41Sopenharmony_ci
581cb0ef41Sopenharmony_cisub BODY_00_15 {
591cb0ef41Sopenharmony_cilocal	*code=shift;
601cb0ef41Sopenharmony_cimy	($i,$a,$b,$c,$d,$e)=@_;
611cb0ef41Sopenharmony_cimy	$j=$i+1;
621cb0ef41Sopenharmony_cimy	$Xn=@X[$j%16];
631cb0ef41Sopenharmony_ci
641cb0ef41Sopenharmony_ci$code.=<<___ if ($i==0);
651cb0ef41Sopenharmony_ci{ .mmi;	ld1	$X[$i]=[inp],2		    // MSB
661cb0ef41Sopenharmony_ci	ld1	tmp2=[tmp3],2		};;
671cb0ef41Sopenharmony_ci{ .mmi;	ld1	tmp0=[inp],2
681cb0ef41Sopenharmony_ci	ld1	tmp4=[tmp3],2		    // LSB
691cb0ef41Sopenharmony_ci	dep	$X[$i]=$X[$i],tmp2,8,8	};;
701cb0ef41Sopenharmony_ci___
711cb0ef41Sopenharmony_ciif ($i<15) {
721cb0ef41Sopenharmony_ci	$code.=<<___;
731cb0ef41Sopenharmony_ci{ .mmi;	ld1	$Xn=[inp],2		    // forward Xload
741cb0ef41Sopenharmony_ci	nop.m	0x0
751cb0ef41Sopenharmony_ci	dep	tmp1=tmp0,tmp4,8,8	};;
761cb0ef41Sopenharmony_ci{ .mmi;	ld1	tmp2=[tmp3],2		    // forward Xload
771cb0ef41Sopenharmony_ci	and	tmp4=$c,$b
781cb0ef41Sopenharmony_ci	dep	$X[$i]=$X[$i],tmp1,16,16} //;;
791cb0ef41Sopenharmony_ci{ .mmi;	add	$e=$e,$K_00_19		    // e+=K_00_19
801cb0ef41Sopenharmony_ci	andcm	tmp1=$d,$b
811cb0ef41Sopenharmony_ci	dep.z	tmp5=$a,5,27		};; // a<<5
821cb0ef41Sopenharmony_ci{ .mmi;	add	$e=$e,$X[$i]		    // e+=Xload
831cb0ef41Sopenharmony_ci	or	tmp4=tmp4,tmp1		    // F_00_19(b,c,d)=(b&c)|(~b&d)
841cb0ef41Sopenharmony_ci	extr.u	tmp1=$a,27,5		};; // a>>27
851cb0ef41Sopenharmony_ci{ .mmi;	ld1	tmp0=[inp],2		    // forward Xload
861cb0ef41Sopenharmony_ci	add	$e=$e,tmp4		    // e+=F_00_19(b,c,d)
871cb0ef41Sopenharmony_ci	shrp	$b=tmp6,tmp6,2		}   // b=ROTATE(b,30)
881cb0ef41Sopenharmony_ci{ .mmi;	ld1	tmp4=[tmp3],2		    // forward Xload
891cb0ef41Sopenharmony_ci	or	tmp5=tmp1,tmp5		    // ROTATE(a,5)
901cb0ef41Sopenharmony_ci	mux2	tmp6=$a,0x44		};; // see b in next iteration
911cb0ef41Sopenharmony_ci{ .mii;	add	$e=$e,tmp5		    // e+=ROTATE(a,5)
921cb0ef41Sopenharmony_ci	dep	$Xn=$Xn,tmp2,8,8	    // forward Xload
931cb0ef41Sopenharmony_ci	mux2	$X[$i]=$X[$i],0x44	} //;;
941cb0ef41Sopenharmony_ci
951cb0ef41Sopenharmony_ci___
961cb0ef41Sopenharmony_ci	}
971cb0ef41Sopenharmony_cielse	{
981cb0ef41Sopenharmony_ci	$code.=<<___;
991cb0ef41Sopenharmony_ci{ .mii;	and	tmp3=$c,$b
1001cb0ef41Sopenharmony_ci	dep	tmp1=tmp0,tmp4,8,8;;
1011cb0ef41Sopenharmony_ci	dep	$X[$i]=$X[$i],tmp1,16,16} //;;
1021cb0ef41Sopenharmony_ci{ .mmi;	add	$e=$e,$K_00_19		    // e+=K_00_19
1031cb0ef41Sopenharmony_ci	andcm	tmp1=$d,$b
1041cb0ef41Sopenharmony_ci	dep.z	tmp5=$a,5,27		};; // a<<5
1051cb0ef41Sopenharmony_ci{ .mmi;	add	$e=$e,$X[$i]		    // e+=Xupdate
1061cb0ef41Sopenharmony_ci	or	tmp4=tmp3,tmp1		    // F_00_19(b,c,d)=(b&c)|(~b&d)
1071cb0ef41Sopenharmony_ci	extr.u	tmp1=$a,27,5		}   // a>>27
1081cb0ef41Sopenharmony_ci{ .mmi;	xor	$Xn=$Xn,$X[($j+2)%16]	    // forward Xupdate
1091cb0ef41Sopenharmony_ci	xor	tmp3=$X[($j+8)%16],$X[($j+13)%16] // forward Xupdate
1101cb0ef41Sopenharmony_ci	nop.i	0			};;
1111cb0ef41Sopenharmony_ci{ .mmi;	add	$e=$e,tmp4		    // e+=F_00_19(b,c,d)
1121cb0ef41Sopenharmony_ci	xor	$Xn=$Xn,tmp3		    // forward Xupdate
1131cb0ef41Sopenharmony_ci	shrp	$b=tmp6,tmp6,2		}   // b=ROTATE(b,30)
1141cb0ef41Sopenharmony_ci{ .mmi; or	tmp1=tmp1,tmp5		    // ROTATE(a,5)
1151cb0ef41Sopenharmony_ci	mux2	tmp6=$a,0x44		};; // see b in next iteration
1161cb0ef41Sopenharmony_ci{ .mii;	add	$e=$e,tmp1		    // e+=ROTATE(a,5)
1171cb0ef41Sopenharmony_ci	shrp	$Xn=$Xn,$Xn,31		    // ROTATE(x[0]^x[2]^x[8]^x[13],1)
1181cb0ef41Sopenharmony_ci	mux2	$X[$i]=$X[$i],0x44	};;
1191cb0ef41Sopenharmony_ci
1201cb0ef41Sopenharmony_ci___
1211cb0ef41Sopenharmony_ci	}
1221cb0ef41Sopenharmony_ci}
1231cb0ef41Sopenharmony_ci
1241cb0ef41Sopenharmony_cisub BODY_16_19 {
1251cb0ef41Sopenharmony_cilocal	*code=shift;
1261cb0ef41Sopenharmony_cimy	($i,$a,$b,$c,$d,$e)=@_;
1271cb0ef41Sopenharmony_cimy	$j=$i+1;
1281cb0ef41Sopenharmony_cimy	$Xn=@X[$j%16];
1291cb0ef41Sopenharmony_ci
1301cb0ef41Sopenharmony_ci$code.=<<___;
1311cb0ef41Sopenharmony_ci{ .mib;	add	$e=$e,$K_00_19		    // e+=K_00_19
1321cb0ef41Sopenharmony_ci	dep.z	tmp5=$a,5,27		}   // a<<5
1331cb0ef41Sopenharmony_ci{ .mib;	andcm	tmp1=$d,$b
1341cb0ef41Sopenharmony_ci	and	tmp0=$c,$b		};;
1351cb0ef41Sopenharmony_ci{ .mmi;	add	$e=$e,$X[$i%16]		    // e+=Xupdate
1361cb0ef41Sopenharmony_ci	or	tmp0=tmp0,tmp1		    // F_00_19(b,c,d)=(b&c)|(~b&d)
1371cb0ef41Sopenharmony_ci	extr.u	tmp1=$a,27,5		}   // a>>27
1381cb0ef41Sopenharmony_ci{ .mmi;	xor	$Xn=$Xn,$X[($j+2)%16]	    // forward Xupdate
1391cb0ef41Sopenharmony_ci	xor	tmp3=$X[($j+8)%16],$X[($j+13)%16]	// forward Xupdate
1401cb0ef41Sopenharmony_ci	nop.i	0			};;
1411cb0ef41Sopenharmony_ci{ .mmi;	add	$e=$e,tmp0		    // f+=F_00_19(b,c,d)
1421cb0ef41Sopenharmony_ci	xor	$Xn=$Xn,tmp3		    // forward Xupdate
1431cb0ef41Sopenharmony_ci	shrp	$b=tmp6,tmp6,2		}   // b=ROTATE(b,30)
1441cb0ef41Sopenharmony_ci{ .mmi;	or	tmp1=tmp1,tmp5		    // ROTATE(a,5)
1451cb0ef41Sopenharmony_ci	mux2	tmp6=$a,0x44		};; // see b in next iteration
1461cb0ef41Sopenharmony_ci{ .mii;	add	$e=$e,tmp1		    // e+=ROTATE(a,5)
1471cb0ef41Sopenharmony_ci	shrp	$Xn=$Xn,$Xn,31		    // ROTATE(x[0]^x[2]^x[8]^x[13],1)
1481cb0ef41Sopenharmony_ci	nop.i	0			};;
1491cb0ef41Sopenharmony_ci
1501cb0ef41Sopenharmony_ci___
1511cb0ef41Sopenharmony_ci}
1521cb0ef41Sopenharmony_ci
1531cb0ef41Sopenharmony_cisub BODY_20_39 {
1541cb0ef41Sopenharmony_cilocal	*code=shift;
1551cb0ef41Sopenharmony_cimy	($i,$a,$b,$c,$d,$e,$Konst)=@_;
1561cb0ef41Sopenharmony_ci	$Konst = $K_20_39 if (!defined($Konst));
1571cb0ef41Sopenharmony_cimy	$j=$i+1;
1581cb0ef41Sopenharmony_cimy	$Xn=@X[$j%16];
1591cb0ef41Sopenharmony_ci
1601cb0ef41Sopenharmony_ciif ($i<79) {
1611cb0ef41Sopenharmony_ci$code.=<<___;
1621cb0ef41Sopenharmony_ci{ .mib;	add	$e=$e,$Konst		    // e+=K_XX_XX
1631cb0ef41Sopenharmony_ci	dep.z	tmp5=$a,5,27		}   // a<<5
1641cb0ef41Sopenharmony_ci{ .mib;	xor	tmp0=$c,$b
1651cb0ef41Sopenharmony_ci	xor	$Xn=$Xn,$X[($j+2)%16]	};; // forward Xupdate
1661cb0ef41Sopenharmony_ci{ .mib;	add	$e=$e,$X[$i%16]		    // e+=Xupdate
1671cb0ef41Sopenharmony_ci	extr.u	tmp1=$a,27,5		}   // a>>27
1681cb0ef41Sopenharmony_ci{ .mib;	xor	tmp0=tmp0,$d		    // F_20_39(b,c,d)=b^c^d
1691cb0ef41Sopenharmony_ci	xor	$Xn=$Xn,$X[($j+8)%16]	};; // forward Xupdate
1701cb0ef41Sopenharmony_ci{ .mmi;	add	$e=$e,tmp0		    // e+=F_20_39(b,c,d)
1711cb0ef41Sopenharmony_ci	xor	$Xn=$Xn,$X[($j+13)%16]	    // forward Xupdate
1721cb0ef41Sopenharmony_ci	shrp	$b=tmp6,tmp6,2		}   // b=ROTATE(b,30)
1731cb0ef41Sopenharmony_ci{ .mmi;	or	tmp1=tmp1,tmp5		    // ROTATE(a,5)
1741cb0ef41Sopenharmony_ci	mux2	tmp6=$a,0x44		};; // see b in next iteration
1751cb0ef41Sopenharmony_ci{ .mii;	add	$e=$e,tmp1		    // e+=ROTATE(a,5)
1761cb0ef41Sopenharmony_ci	shrp	$Xn=$Xn,$Xn,31		    // ROTATE(x[0]^x[2]^x[8]^x[13],1)
1771cb0ef41Sopenharmony_ci	nop.i	0			};;
1781cb0ef41Sopenharmony_ci
1791cb0ef41Sopenharmony_ci___
1801cb0ef41Sopenharmony_ci}
1811cb0ef41Sopenharmony_cielse {
1821cb0ef41Sopenharmony_ci$code.=<<___;
1831cb0ef41Sopenharmony_ci{ .mib;	add	$e=$e,$Konst		    // e+=K_60_79
1841cb0ef41Sopenharmony_ci	dep.z	tmp5=$a,5,27		}   // a<<5
1851cb0ef41Sopenharmony_ci{ .mib;	xor	tmp0=$c,$b
1861cb0ef41Sopenharmony_ci	add	$h1=$h1,$a		};; // wrap up
1871cb0ef41Sopenharmony_ci{ .mib;	add	$e=$e,$X[$i%16]		    // e+=Xupdate
1881cb0ef41Sopenharmony_ci	extr.u	tmp1=$a,27,5		}   // a>>27
1891cb0ef41Sopenharmony_ci{ .mib;	xor	tmp0=tmp0,$d		    // F_20_39(b,c,d)=b^c^d
1901cb0ef41Sopenharmony_ci	add	$h3=$h3,$c		};; // wrap up
1911cb0ef41Sopenharmony_ci{ .mmi;	add	$e=$e,tmp0		    // e+=F_20_39(b,c,d)
1921cb0ef41Sopenharmony_ci	or	tmp1=tmp1,tmp5		    // ROTATE(a,5)
1931cb0ef41Sopenharmony_ci	shrp	$b=tmp6,tmp6,2		};; // b=ROTATE(b,30) ;;?
1941cb0ef41Sopenharmony_ci{ .mmi;	add	$e=$e,tmp1		    // e+=ROTATE(a,5)
1951cb0ef41Sopenharmony_ci	add	tmp3=1,inp		    // used in unaligned codepath
1961cb0ef41Sopenharmony_ci	add	$h4=$h4,$d		};; // wrap up
1971cb0ef41Sopenharmony_ci
1981cb0ef41Sopenharmony_ci___
1991cb0ef41Sopenharmony_ci}
2001cb0ef41Sopenharmony_ci}
2011cb0ef41Sopenharmony_ci
2021cb0ef41Sopenharmony_cisub BODY_40_59 {
2031cb0ef41Sopenharmony_cilocal	*code=shift;
2041cb0ef41Sopenharmony_cimy	($i,$a,$b,$c,$d,$e)=@_;
2051cb0ef41Sopenharmony_cimy	$j=$i+1;
2061cb0ef41Sopenharmony_cimy	$Xn=@X[$j%16];
2071cb0ef41Sopenharmony_ci
2081cb0ef41Sopenharmony_ci$code.=<<___;
2091cb0ef41Sopenharmony_ci{ .mib;	add	$e=$e,$K_40_59		    // e+=K_40_59
2101cb0ef41Sopenharmony_ci	dep.z	tmp5=$a,5,27		}   // a<<5
2111cb0ef41Sopenharmony_ci{ .mib;	and	tmp1=$c,$d
2121cb0ef41Sopenharmony_ci	xor	tmp0=$c,$d		};;
2131cb0ef41Sopenharmony_ci{ .mmi;	add	$e=$e,$X[$i%16]		    // e+=Xupdate
2141cb0ef41Sopenharmony_ci	add	tmp5=tmp5,tmp1		    // a<<5+(c&d)
2151cb0ef41Sopenharmony_ci	extr.u	tmp1=$a,27,5		}   // a>>27
2161cb0ef41Sopenharmony_ci{ .mmi;	and	tmp0=tmp0,$b
2171cb0ef41Sopenharmony_ci	xor	$Xn=$Xn,$X[($j+2)%16]	    // forward Xupdate
2181cb0ef41Sopenharmony_ci	xor	tmp3=$X[($j+8)%16],$X[($j+13)%16] };;	// forward Xupdate
2191cb0ef41Sopenharmony_ci{ .mmi;	add	$e=$e,tmp0		    // e+=b&(c^d)
2201cb0ef41Sopenharmony_ci	add	tmp5=tmp5,tmp1		    // ROTATE(a,5)+(c&d)
2211cb0ef41Sopenharmony_ci	shrp	$b=tmp6,tmp6,2		}   // b=ROTATE(b,30)
2221cb0ef41Sopenharmony_ci{ .mmi;	xor	$Xn=$Xn,tmp3
2231cb0ef41Sopenharmony_ci	mux2	tmp6=$a,0x44		};; // see b in next iteration
2241cb0ef41Sopenharmony_ci{ .mii;	add	$e=$e,tmp5		    // e+=ROTATE(a,5)+(c&d)
2251cb0ef41Sopenharmony_ci	shrp	$Xn=$Xn,$Xn,31		    // ROTATE(x[0]^x[2]^x[8]^x[13],1)
2261cb0ef41Sopenharmony_ci	nop.i	0x0			};;
2271cb0ef41Sopenharmony_ci
2281cb0ef41Sopenharmony_ci___
2291cb0ef41Sopenharmony_ci}
2301cb0ef41Sopenharmony_cisub BODY_60_79	{ &BODY_20_39(@_,$K_60_79); }
2311cb0ef41Sopenharmony_ci
2321cb0ef41Sopenharmony_ci$code.=<<___;
2331cb0ef41Sopenharmony_ci.text
2341cb0ef41Sopenharmony_ci
2351cb0ef41Sopenharmony_citmp0=r8;
2361cb0ef41Sopenharmony_citmp1=r9;
2371cb0ef41Sopenharmony_citmp2=r10;
2381cb0ef41Sopenharmony_citmp3=r11;
2391cb0ef41Sopenharmony_cictx=r32;	// in0
2401cb0ef41Sopenharmony_ciinp=r33;	// in1
2411cb0ef41Sopenharmony_ci
2421cb0ef41Sopenharmony_ci// void sha1_block_data_order(SHA_CTX *c,const void *p,size_t num);
2431cb0ef41Sopenharmony_ci.global	sha1_block_data_order#
2441cb0ef41Sopenharmony_ci.proc	sha1_block_data_order#
2451cb0ef41Sopenharmony_ci.align	32
2461cb0ef41Sopenharmony_cisha1_block_data_order:
2471cb0ef41Sopenharmony_ci	.prologue
2481cb0ef41Sopenharmony_ci{ .mmi;	alloc	tmp1=ar.pfs,3,14,0,0
2491cb0ef41Sopenharmony_ci	$ADDP	tmp0=4,ctx
2501cb0ef41Sopenharmony_ci	.save	ar.lc,r3
2511cb0ef41Sopenharmony_ci	mov	r3=ar.lc		}
2521cb0ef41Sopenharmony_ci{ .mmi;	$ADDP	ctx=0,ctx
2531cb0ef41Sopenharmony_ci	$ADDP	inp=0,inp
2541cb0ef41Sopenharmony_ci	mov	r2=pr			};;
2551cb0ef41Sopenharmony_citmp4=in2;
2561cb0ef41Sopenharmony_citmp5=loc12;
2571cb0ef41Sopenharmony_citmp6=loc13;
2581cb0ef41Sopenharmony_ci	.body
2591cb0ef41Sopenharmony_ci{ .mlx;	ld4	$h0=[ctx],8
2601cb0ef41Sopenharmony_ci	movl	$K_00_19=0x5a827999	}
2611cb0ef41Sopenharmony_ci{ .mlx;	ld4	$h1=[tmp0],8
2621cb0ef41Sopenharmony_ci	movl	$K_20_39=0x6ed9eba1	};;
2631cb0ef41Sopenharmony_ci{ .mlx;	ld4	$h2=[ctx],8
2641cb0ef41Sopenharmony_ci	movl	$K_40_59=0x8f1bbcdc	}
2651cb0ef41Sopenharmony_ci{ .mlx;	ld4	$h3=[tmp0]
2661cb0ef41Sopenharmony_ci	movl	$K_60_79=0xca62c1d6	};;
2671cb0ef41Sopenharmony_ci{ .mmi;	ld4	$h4=[ctx],-16
2681cb0ef41Sopenharmony_ci	add	in2=-1,in2		    // adjust num for ar.lc
2691cb0ef41Sopenharmony_ci	mov	ar.ec=1			};;
2701cb0ef41Sopenharmony_ci{ .mmi;	nop.m	0
2711cb0ef41Sopenharmony_ci	add	tmp3=1,inp
2721cb0ef41Sopenharmony_ci	mov	ar.lc=in2		};; // brp.loop.imp: too far
2731cb0ef41Sopenharmony_ci
2741cb0ef41Sopenharmony_ci.Ldtop:
2751cb0ef41Sopenharmony_ci{ .mmi;	mov	$A=$h0
2761cb0ef41Sopenharmony_ci	mov	$B=$h1
2771cb0ef41Sopenharmony_ci	mux2	tmp6=$h1,0x44		}
2781cb0ef41Sopenharmony_ci{ .mmi;	mov	$C=$h2
2791cb0ef41Sopenharmony_ci	mov	$D=$h3
2801cb0ef41Sopenharmony_ci	mov	$E=$h4			};;
2811cb0ef41Sopenharmony_ci
2821cb0ef41Sopenharmony_ci___
2831cb0ef41Sopenharmony_ci
2841cb0ef41Sopenharmony_ci{ my $i;
2851cb0ef41Sopenharmony_ci  my @V=($A,$B,$C,$D,$E);
2861cb0ef41Sopenharmony_ci
2871cb0ef41Sopenharmony_ci	for($i=0;$i<16;$i++)	{ &BODY_00_15(\$code,$i,@V); unshift(@V,pop(@V)); }
2881cb0ef41Sopenharmony_ci	for(;$i<20;$i++)	{ &BODY_16_19(\$code,$i,@V); unshift(@V,pop(@V)); }
2891cb0ef41Sopenharmony_ci	for(;$i<40;$i++)	{ &BODY_20_39(\$code,$i,@V); unshift(@V,pop(@V)); }
2901cb0ef41Sopenharmony_ci	for(;$i<60;$i++)	{ &BODY_40_59(\$code,$i,@V); unshift(@V,pop(@V)); }
2911cb0ef41Sopenharmony_ci	for(;$i<80;$i++)	{ &BODY_60_79(\$code,$i,@V); unshift(@V,pop(@V)); }
2921cb0ef41Sopenharmony_ci
2931cb0ef41Sopenharmony_ci	(($V[0] eq $A) and ($V[4] eq $E)) or die;	# double-check
2941cb0ef41Sopenharmony_ci}
2951cb0ef41Sopenharmony_ci
2961cb0ef41Sopenharmony_ci$code.=<<___;
2971cb0ef41Sopenharmony_ci{ .mmb;	add	$h0=$h0,$A
2981cb0ef41Sopenharmony_ci	add	$h2=$h2,$C
2991cb0ef41Sopenharmony_ci	br.ctop.dptk.many	.Ldtop	};;
3001cb0ef41Sopenharmony_ci.Ldend:
3011cb0ef41Sopenharmony_ci{ .mmi;	add	tmp0=4,ctx
3021cb0ef41Sopenharmony_ci	mov	ar.lc=r3		};;
3031cb0ef41Sopenharmony_ci{ .mmi;	st4	[ctx]=$h0,8
3041cb0ef41Sopenharmony_ci	st4	[tmp0]=$h1,8		};;
3051cb0ef41Sopenharmony_ci{ .mmi;	st4	[ctx]=$h2,8
3061cb0ef41Sopenharmony_ci	st4	[tmp0]=$h3		};;
3071cb0ef41Sopenharmony_ci{ .mib;	st4	[ctx]=$h4,-16
3081cb0ef41Sopenharmony_ci	mov	pr=r2,0x1ffff
3091cb0ef41Sopenharmony_ci	br.ret.sptk.many	b0	};;
3101cb0ef41Sopenharmony_ci.endp	sha1_block_data_order#
3111cb0ef41Sopenharmony_cistringz	"SHA1 block transform for IA64, CRYPTOGAMS by <appro\@openssl.org>"
3121cb0ef41Sopenharmony_ci___
3131cb0ef41Sopenharmony_ci
3141cb0ef41Sopenharmony_ciopen STDOUT,">$output" if $output;
3151cb0ef41Sopenharmony_ciprint $code;
316