1e1051a39Sopenharmony_ci#! /usr/bin/env perl
2e1051a39Sopenharmony_ci# Copyright 2011-2020 The OpenSSL Project Authors. All Rights Reserved.
3e1051a39Sopenharmony_ci#
4e1051a39Sopenharmony_ci# Licensed under the Apache License 2.0 (the "License").  You may not use
5e1051a39Sopenharmony_ci# this file except in compliance with the License.  You can obtain a copy
6e1051a39Sopenharmony_ci# in the file LICENSE in the source distribution or at
7e1051a39Sopenharmony_ci# https://www.openssl.org/source/license.html
8e1051a39Sopenharmony_ci
9e1051a39Sopenharmony_ci#
10e1051a39Sopenharmony_ci# ====================================================================
11e1051a39Sopenharmony_ci# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12e1051a39Sopenharmony_ci# project. The module is, however, dual licensed under OpenSSL and
13e1051a39Sopenharmony_ci# CRYPTOGAMS licenses depending on where you obtain it. For further
14e1051a39Sopenharmony_ci# details see http://www.openssl.org/~appro/cryptogams/.
15e1051a39Sopenharmony_ci# ====================================================================
16e1051a39Sopenharmony_ci
17e1051a39Sopenharmony_ci# June 2011
18e1051a39Sopenharmony_ci#
19e1051a39Sopenharmony_ci# This is RC4+MD5 "stitch" implementation. The idea, as spelled in
20e1051a39Sopenharmony_ci# http://download.intel.com/design/intarch/papers/323686.pdf, is that
21e1051a39Sopenharmony_ci# since both algorithms exhibit instruction-level parallelism, ILP,
22e1051a39Sopenharmony_ci# below theoretical maximum, interleaving them would allow to utilize
23e1051a39Sopenharmony_ci# processor resources better and achieve better performance. RC4
24e1051a39Sopenharmony_ci# instruction sequence is virtually identical to rc4-x86_64.pl, which
25e1051a39Sopenharmony_ci# is heavily based on submission by Maxim Perminov, Maxim Locktyukhin
26e1051a39Sopenharmony_ci# and Jim Guilford of Intel. MD5 is fresh implementation aiming to
27e1051a39Sopenharmony_ci# minimize register usage, which was used as "main thread" with RC4
28e1051a39Sopenharmony_ci# weaved into it, one RC4 round per one MD5 round. In addition to the
29e1051a39Sopenharmony_ci# stiched subroutine the script can generate standalone replacement
30e1051a39Sopenharmony_ci# ossl_md5_block_asm_data_order and RC4. Below are performance numbers in
31e1051a39Sopenharmony_ci# cycles per processed byte, less is better, for these the standalone
32e1051a39Sopenharmony_ci# subroutines, sum of them, and stitched one:
33e1051a39Sopenharmony_ci#
34e1051a39Sopenharmony_ci#		RC4	MD5	RC4+MD5	stitch	gain
35e1051a39Sopenharmony_ci# Opteron	6.5(*)	5.4	11.9	7.0	+70%(*)
36e1051a39Sopenharmony_ci# Core2		6.5	5.8	12.3	7.7	+60%
37e1051a39Sopenharmony_ci# Westmere	4.3	5.2	9.5	7.0	+36%
38e1051a39Sopenharmony_ci# Sandy Bridge	4.2	5.5	9.7	6.8	+43%
39e1051a39Sopenharmony_ci# Ivy Bridge	4.1	5.2	9.3	6.0	+54%
40e1051a39Sopenharmony_ci# Haswell	4.0	5.0	9.0	5.7	+60%
41e1051a39Sopenharmony_ci# Skylake	6.3(**)	5.0	11.3	5.3	+110%
42e1051a39Sopenharmony_ci# Atom		9.3	6.5	15.8	11.1	+42%
43e1051a39Sopenharmony_ci# VIA Nano	6.3	5.4	11.7	8.6	+37%
44e1051a39Sopenharmony_ci# Bulldozer	4.5	5.4	9.9	7.7	+29%
45e1051a39Sopenharmony_ci#
46e1051a39Sopenharmony_ci# (*)	rc4-x86_64.pl delivers 5.3 on Opteron, so real improvement
47e1051a39Sopenharmony_ci#	is +53%...
48e1051a39Sopenharmony_ci# (**)	unidentified anomaly;
49e1051a39Sopenharmony_ci
50e1051a39Sopenharmony_cimy ($rc4,$md5)=(1,1);	# what to generate?
51e1051a39Sopenharmony_cimy $D="#" if (!$md5);	# if set to "#", MD5 is stitched into RC4(),
52e1051a39Sopenharmony_ci			# but its result is discarded. Idea here is
53e1051a39Sopenharmony_ci			# to be able to use 'openssl speed rc4' for
54e1051a39Sopenharmony_ci			# benchmarking the stitched subroutine...
55e1051a39Sopenharmony_ci
56e1051a39Sopenharmony_ci# $output is the last argument if it looks like a file (it has an extension)
57e1051a39Sopenharmony_ci# $flavour is the first argument if it doesn't look like a file
58e1051a39Sopenharmony_cimy $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
59e1051a39Sopenharmony_cimy $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
60e1051a39Sopenharmony_ci
61e1051a39Sopenharmony_cimy $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
62e1051a39Sopenharmony_ci
63e1051a39Sopenharmony_ci$0 =~ m/(.*[\/\\])[^\/\\]+$/; my $dir=$1; my $xlate;
64e1051a39Sopenharmony_ci( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
65e1051a39Sopenharmony_ci( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
66e1051a39Sopenharmony_cidie "can't locate x86_64-xlate.pl";
67e1051a39Sopenharmony_ci
68e1051a39Sopenharmony_ciopen OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""
69e1051a39Sopenharmony_ci    or die "can't call $xlate: $!";
70e1051a39Sopenharmony_ci*STDOUT=*OUT;
71e1051a39Sopenharmony_ci
72e1051a39Sopenharmony_cimy ($dat,$in0,$out,$ctx,$inp,$len, $func,$nargs);
73e1051a39Sopenharmony_ci
74e1051a39Sopenharmony_ciif ($rc4 && !$md5) {
75e1051a39Sopenharmony_ci  ($dat,$len,$in0,$out) = ("%rdi","%rsi","%rdx","%rcx");
76e1051a39Sopenharmony_ci  $func="RC4";				$nargs=4;
77e1051a39Sopenharmony_ci} elsif ($md5 && !$rc4) {
78e1051a39Sopenharmony_ci  ($ctx,$inp,$len) = ("%rdi","%rsi","%rdx");
79e1051a39Sopenharmony_ci  $func="ossl_md5_block_asm_data_order";	$nargs=3;
80e1051a39Sopenharmony_ci} else {
81e1051a39Sopenharmony_ci  ($dat,$in0,$out,$ctx,$inp,$len) = ("%rdi","%rsi","%rdx","%rcx","%r8","%r9");
82e1051a39Sopenharmony_ci  $func="rc4_md5_enc";			$nargs=6;
83e1051a39Sopenharmony_ci  # void rc4_md5_enc(
84e1051a39Sopenharmony_ci  #		RC4_KEY *key,		#
85e1051a39Sopenharmony_ci  #		const void *in0,	# RC4 input
86e1051a39Sopenharmony_ci  #		void *out,		# RC4 output
87e1051a39Sopenharmony_ci  #		MD5_CTX *ctx,		#
88e1051a39Sopenharmony_ci  #		const void *inp,	# MD5 input
89e1051a39Sopenharmony_ci  #		size_t len);		# number of 64-byte blocks
90e1051a39Sopenharmony_ci}
91e1051a39Sopenharmony_ci
92e1051a39Sopenharmony_cimy @K=(	0xd76aa478,0xe8c7b756,0x242070db,0xc1bdceee,
93e1051a39Sopenharmony_ci	0xf57c0faf,0x4787c62a,0xa8304613,0xfd469501,
94e1051a39Sopenharmony_ci	0x698098d8,0x8b44f7af,0xffff5bb1,0x895cd7be,
95e1051a39Sopenharmony_ci	0x6b901122,0xfd987193,0xa679438e,0x49b40821,
96e1051a39Sopenharmony_ci
97e1051a39Sopenharmony_ci	0xf61e2562,0xc040b340,0x265e5a51,0xe9b6c7aa,
98e1051a39Sopenharmony_ci	0xd62f105d,0x02441453,0xd8a1e681,0xe7d3fbc8,
99e1051a39Sopenharmony_ci	0x21e1cde6,0xc33707d6,0xf4d50d87,0x455a14ed,
100e1051a39Sopenharmony_ci	0xa9e3e905,0xfcefa3f8,0x676f02d9,0x8d2a4c8a,
101e1051a39Sopenharmony_ci
102e1051a39Sopenharmony_ci	0xfffa3942,0x8771f681,0x6d9d6122,0xfde5380c,
103e1051a39Sopenharmony_ci	0xa4beea44,0x4bdecfa9,0xf6bb4b60,0xbebfbc70,
104e1051a39Sopenharmony_ci	0x289b7ec6,0xeaa127fa,0xd4ef3085,0x04881d05,
105e1051a39Sopenharmony_ci	0xd9d4d039,0xe6db99e5,0x1fa27cf8,0xc4ac5665,
106e1051a39Sopenharmony_ci
107e1051a39Sopenharmony_ci	0xf4292244,0x432aff97,0xab9423a7,0xfc93a039,
108e1051a39Sopenharmony_ci	0x655b59c3,0x8f0ccc92,0xffeff47d,0x85845dd1,
109e1051a39Sopenharmony_ci	0x6fa87e4f,0xfe2ce6e0,0xa3014314,0x4e0811a1,
110e1051a39Sopenharmony_ci	0xf7537e82,0xbd3af235,0x2ad7d2bb,0xeb86d391	);
111e1051a39Sopenharmony_ci
112e1051a39Sopenharmony_cimy @V=("%r8d","%r9d","%r10d","%r11d");	# MD5 registers
113e1051a39Sopenharmony_cimy $tmp="%r12d";
114e1051a39Sopenharmony_ci
115e1051a39Sopenharmony_cimy @XX=("%rbp","%rsi");			# RC4 registers
116e1051a39Sopenharmony_cimy @TX=("%rax","%rbx");
117e1051a39Sopenharmony_cimy $YY="%rcx";
118e1051a39Sopenharmony_cimy $TY="%rdx";
119e1051a39Sopenharmony_ci
120e1051a39Sopenharmony_cimy $MOD=32;				# 16, 32 or 64
121e1051a39Sopenharmony_ci
122e1051a39Sopenharmony_ci$code.=<<___;
123e1051a39Sopenharmony_ci.text
124e1051a39Sopenharmony_ci.align 16
125e1051a39Sopenharmony_ci
126e1051a39Sopenharmony_ci.globl	$func
127e1051a39Sopenharmony_ci.type	$func,\@function,$nargs
128e1051a39Sopenharmony_ci$func:
129e1051a39Sopenharmony_ci.cfi_startproc
130e1051a39Sopenharmony_ci	cmp	\$0,$len
131e1051a39Sopenharmony_ci	je	.Labort
132e1051a39Sopenharmony_ci	push	%rbx
133e1051a39Sopenharmony_ci.cfi_push	%rbx
134e1051a39Sopenharmony_ci	push	%rbp
135e1051a39Sopenharmony_ci.cfi_push	%rbp
136e1051a39Sopenharmony_ci	push	%r12
137e1051a39Sopenharmony_ci.cfi_push	%r12
138e1051a39Sopenharmony_ci	push	%r13
139e1051a39Sopenharmony_ci.cfi_push	%r13
140e1051a39Sopenharmony_ci	push	%r14
141e1051a39Sopenharmony_ci.cfi_push	%r14
142e1051a39Sopenharmony_ci	push	%r15
143e1051a39Sopenharmony_ci.cfi_push	%r15
144e1051a39Sopenharmony_ci	sub	\$40,%rsp
145e1051a39Sopenharmony_ci.cfi_adjust_cfa_offset	40
146e1051a39Sopenharmony_ci.Lbody:
147e1051a39Sopenharmony_ci___
148e1051a39Sopenharmony_ciif ($rc4) {
149e1051a39Sopenharmony_ci$code.=<<___;
150e1051a39Sopenharmony_ci$D#md5#	mov	$ctx,%r11		# reassign arguments
151e1051a39Sopenharmony_ci	mov	$len,%r12
152e1051a39Sopenharmony_ci	mov	$in0,%r13
153e1051a39Sopenharmony_ci	mov	$out,%r14
154e1051a39Sopenharmony_ci$D#md5#	mov	$inp,%r15
155e1051a39Sopenharmony_ci___
156e1051a39Sopenharmony_ci    $ctx="%r11"	if ($md5);		# reassign arguments
157e1051a39Sopenharmony_ci    $len="%r12";
158e1051a39Sopenharmony_ci    $in0="%r13";
159e1051a39Sopenharmony_ci    $out="%r14";
160e1051a39Sopenharmony_ci    $inp="%r15"	if ($md5);
161e1051a39Sopenharmony_ci    $inp=$in0	if (!$md5);
162e1051a39Sopenharmony_ci$code.=<<___;
163e1051a39Sopenharmony_ci	xor	$XX[0],$XX[0]
164e1051a39Sopenharmony_ci	xor	$YY,$YY
165e1051a39Sopenharmony_ci
166e1051a39Sopenharmony_ci	lea	8($dat),$dat
167e1051a39Sopenharmony_ci	mov	-8($dat),$XX[0]#b
168e1051a39Sopenharmony_ci	mov	-4($dat),$YY#b
169e1051a39Sopenharmony_ci
170e1051a39Sopenharmony_ci	inc	$XX[0]#b
171e1051a39Sopenharmony_ci	sub	$in0,$out
172e1051a39Sopenharmony_ci	movl	($dat,$XX[0],4),$TX[0]#d
173e1051a39Sopenharmony_ci___
174e1051a39Sopenharmony_ci$code.=<<___ if (!$md5);
175e1051a39Sopenharmony_ci	xor	$TX[1],$TX[1]
176e1051a39Sopenharmony_ci	test	\$-128,$len
177e1051a39Sopenharmony_ci	jz	.Loop1
178e1051a39Sopenharmony_ci	sub	$XX[0],$TX[1]
179e1051a39Sopenharmony_ci	and	\$`$MOD-1`,$TX[1]
180e1051a39Sopenharmony_ci	jz	.Loop${MOD}_is_hot
181e1051a39Sopenharmony_ci	sub	$TX[1],$len
182e1051a39Sopenharmony_ci.Loop${MOD}_warmup:
183e1051a39Sopenharmony_ci	add	$TX[0]#b,$YY#b
184e1051a39Sopenharmony_ci	movl	($dat,$YY,4),$TY#d
185e1051a39Sopenharmony_ci	movl	$TX[0]#d,($dat,$YY,4)
186e1051a39Sopenharmony_ci	movl	$TY#d,($dat,$XX[0],4)
187e1051a39Sopenharmony_ci	add	$TY#b,$TX[0]#b
188e1051a39Sopenharmony_ci	inc	$XX[0]#b
189e1051a39Sopenharmony_ci	movl	($dat,$TX[0],4),$TY#d
190e1051a39Sopenharmony_ci	movl	($dat,$XX[0],4),$TX[0]#d
191e1051a39Sopenharmony_ci	xorb	($in0),$TY#b
192e1051a39Sopenharmony_ci	movb	$TY#b,($out,$in0)
193e1051a39Sopenharmony_ci	lea	1($in0),$in0
194e1051a39Sopenharmony_ci	dec	$TX[1]
195e1051a39Sopenharmony_ci	jnz	.Loop${MOD}_warmup
196e1051a39Sopenharmony_ci
197e1051a39Sopenharmony_ci	mov	$YY,$TX[1]
198e1051a39Sopenharmony_ci	xor	$YY,$YY
199e1051a39Sopenharmony_ci	mov	$TX[1]#b,$YY#b
200e1051a39Sopenharmony_ci
201e1051a39Sopenharmony_ci.Loop${MOD}_is_hot:
202e1051a39Sopenharmony_ci	mov	$len,32(%rsp)		# save original $len
203e1051a39Sopenharmony_ci	shr	\$6,$len		# number of 64-byte blocks
204e1051a39Sopenharmony_ci___
205e1051a39Sopenharmony_ci  if ($D && !$md5) {			# stitch in dummy MD5
206e1051a39Sopenharmony_ci    $md5=1;
207e1051a39Sopenharmony_ci    $ctx="%r11";
208e1051a39Sopenharmony_ci    $inp="%r15";
209e1051a39Sopenharmony_ci    $code.=<<___;
210e1051a39Sopenharmony_ci	mov	%rsp,$ctx
211e1051a39Sopenharmony_ci	mov	$in0,$inp
212e1051a39Sopenharmony_ci___
213e1051a39Sopenharmony_ci  }
214e1051a39Sopenharmony_ci}
215e1051a39Sopenharmony_ci$code.=<<___;
216e1051a39Sopenharmony_ci#rc4#	add	$TX[0]#b,$YY#b
217e1051a39Sopenharmony_ci#rc4#	lea	($dat,$XX[0],4),$XX[1]
218e1051a39Sopenharmony_ci	shl	\$6,$len
219e1051a39Sopenharmony_ci	add	$inp,$len		# pointer to the end of input
220e1051a39Sopenharmony_ci	mov	$len,16(%rsp)
221e1051a39Sopenharmony_ci
222e1051a39Sopenharmony_ci#md5#	mov	$ctx,24(%rsp)		# save pointer to MD5_CTX
223e1051a39Sopenharmony_ci#md5#	mov	0*4($ctx),$V[0]		# load current hash value from MD5_CTX
224e1051a39Sopenharmony_ci#md5#	mov	1*4($ctx),$V[1]
225e1051a39Sopenharmony_ci#md5#	mov	2*4($ctx),$V[2]
226e1051a39Sopenharmony_ci#md5#	mov	3*4($ctx),$V[3]
227e1051a39Sopenharmony_ci	jmp	.Loop
228e1051a39Sopenharmony_ci
229e1051a39Sopenharmony_ci.align	16
230e1051a39Sopenharmony_ci.Loop:
231e1051a39Sopenharmony_ci#md5#	mov	$V[0],0*4(%rsp)		# put aside current hash value
232e1051a39Sopenharmony_ci#md5#	mov	$V[1],1*4(%rsp)
233e1051a39Sopenharmony_ci#md5#	mov	$V[2],2*4(%rsp)
234e1051a39Sopenharmony_ci#md5#	mov	$V[3],$tmp		# forward reference
235e1051a39Sopenharmony_ci#md5#	mov	$V[3],3*4(%rsp)
236e1051a39Sopenharmony_ci___
237e1051a39Sopenharmony_ci
238e1051a39Sopenharmony_cisub R0 {
239e1051a39Sopenharmony_ci  my ($i,$a,$b,$c,$d)=@_;
240e1051a39Sopenharmony_ci  my @rot0=(7,12,17,22);
241e1051a39Sopenharmony_ci  my $j=$i%16;
242e1051a39Sopenharmony_ci  my $k=$i%$MOD;
243e1051a39Sopenharmony_ci  my $xmm="%xmm".($j&1);
244e1051a39Sopenharmony_ci    $code.="	movdqu	($in0),%xmm2\n"		if ($rc4 && $j==15);
245e1051a39Sopenharmony_ci    $code.="	add	\$$MOD,$XX[0]#b\n"	if ($rc4 && $j==15 && $k==$MOD-1);
246e1051a39Sopenharmony_ci    $code.="	pxor	$xmm,$xmm\n"		if ($rc4 && $j<=1);
247e1051a39Sopenharmony_ci    $code.=<<___;
248e1051a39Sopenharmony_ci#rc4#	movl	($dat,$YY,4),$TY#d
249e1051a39Sopenharmony_ci#md5#	xor	$c,$tmp
250e1051a39Sopenharmony_ci#rc4#	movl	$TX[0]#d,($dat,$YY,4)
251e1051a39Sopenharmony_ci#md5#	and	$b,$tmp
252e1051a39Sopenharmony_ci#md5#	add	4*`$j`($inp),$a
253e1051a39Sopenharmony_ci#rc4#	add	$TY#b,$TX[0]#b
254e1051a39Sopenharmony_ci#rc4#	movl	`4*(($k+1)%$MOD)`(`$k==$MOD-1?"$dat,$XX[0],4":"$XX[1]"`),$TX[1]#d
255e1051a39Sopenharmony_ci#md5#	add	\$$K[$i],$a
256e1051a39Sopenharmony_ci#md5#	xor	$d,$tmp
257e1051a39Sopenharmony_ci#rc4#	movz	$TX[0]#b,$TX[0]#d
258e1051a39Sopenharmony_ci#rc4#	movl	$TY#d,4*$k($XX[1])
259e1051a39Sopenharmony_ci#md5#	add	$tmp,$a
260e1051a39Sopenharmony_ci#rc4#	add	$TX[1]#b,$YY#b
261e1051a39Sopenharmony_ci#md5#	rol	\$$rot0[$j%4],$a
262e1051a39Sopenharmony_ci#md5#	mov	`$j==15?"$b":"$c"`,$tmp		# forward reference
263e1051a39Sopenharmony_ci#rc4#	pinsrw	\$`($j>>1)&7`,($dat,$TX[0],4),$xmm\n
264e1051a39Sopenharmony_ci#md5#	add	$b,$a
265e1051a39Sopenharmony_ci___
266e1051a39Sopenharmony_ci    $code.=<<___ if ($rc4 && $j==15 && $k==$MOD-1);
267e1051a39Sopenharmony_ci	mov	$YY,$XX[1]
268e1051a39Sopenharmony_ci	xor	$YY,$YY				# keyword to partial register
269e1051a39Sopenharmony_ci	mov	$XX[1]#b,$YY#b
270e1051a39Sopenharmony_ci	lea	($dat,$XX[0],4),$XX[1]
271e1051a39Sopenharmony_ci___
272e1051a39Sopenharmony_ci    $code.=<<___ if ($rc4 && $j==15);
273e1051a39Sopenharmony_ci	psllq	\$8,%xmm1
274e1051a39Sopenharmony_ci	pxor	%xmm0,%xmm2
275e1051a39Sopenharmony_ci	pxor	%xmm1,%xmm2
276e1051a39Sopenharmony_ci___
277e1051a39Sopenharmony_ci}
278e1051a39Sopenharmony_cisub R1 {
279e1051a39Sopenharmony_ci  my ($i,$a,$b,$c,$d)=@_;
280e1051a39Sopenharmony_ci  my @rot1=(5,9,14,20);
281e1051a39Sopenharmony_ci  my $j=$i%16;
282e1051a39Sopenharmony_ci  my $k=$i%$MOD;
283e1051a39Sopenharmony_ci  my $xmm="%xmm".($j&1);
284e1051a39Sopenharmony_ci    $code.="	movdqu	16($in0),%xmm3\n"	if ($rc4 && $j==15);
285e1051a39Sopenharmony_ci    $code.="	add	\$$MOD,$XX[0]#b\n"	if ($rc4 && $j==15 && $k==$MOD-1);
286e1051a39Sopenharmony_ci    $code.="	pxor	$xmm,$xmm\n"		if ($rc4 && $j<=1);
287e1051a39Sopenharmony_ci    $code.=<<___;
288e1051a39Sopenharmony_ci#rc4#	movl	($dat,$YY,4),$TY#d
289e1051a39Sopenharmony_ci#md5#	xor	$b,$tmp
290e1051a39Sopenharmony_ci#rc4#	movl	$TX[0]#d,($dat,$YY,4)
291e1051a39Sopenharmony_ci#md5#	and	$d,$tmp
292e1051a39Sopenharmony_ci#md5#	add	4*`((1+5*$j)%16)`($inp),$a
293e1051a39Sopenharmony_ci#rc4#	add	$TY#b,$TX[0]#b
294e1051a39Sopenharmony_ci#rc4#	movl	`4*(($k+1)%$MOD)`(`$k==$MOD-1?"$dat,$XX[0],4":"$XX[1]"`),$TX[1]#d
295e1051a39Sopenharmony_ci#md5#	add	\$$K[$i],$a
296e1051a39Sopenharmony_ci#md5#	xor	$c,$tmp
297e1051a39Sopenharmony_ci#rc4#	movz	$TX[0]#b,$TX[0]#d
298e1051a39Sopenharmony_ci#rc4#	movl	$TY#d,4*$k($XX[1])
299e1051a39Sopenharmony_ci#md5#	add	$tmp,$a
300e1051a39Sopenharmony_ci#rc4#	add	$TX[1]#b,$YY#b
301e1051a39Sopenharmony_ci#md5#	rol	\$$rot1[$j%4],$a
302e1051a39Sopenharmony_ci#md5#	mov	`$j==15?"$c":"$b"`,$tmp		# forward reference
303e1051a39Sopenharmony_ci#rc4#	pinsrw	\$`($j>>1)&7`,($dat,$TX[0],4),$xmm\n
304e1051a39Sopenharmony_ci#md5#	add	$b,$a
305e1051a39Sopenharmony_ci___
306e1051a39Sopenharmony_ci    $code.=<<___ if ($rc4 && $j==15 && $k==$MOD-1);
307e1051a39Sopenharmony_ci	mov	$YY,$XX[1]
308e1051a39Sopenharmony_ci	xor	$YY,$YY				# keyword to partial register
309e1051a39Sopenharmony_ci	mov	$XX[1]#b,$YY#b
310e1051a39Sopenharmony_ci	lea	($dat,$XX[0],4),$XX[1]
311e1051a39Sopenharmony_ci___
312e1051a39Sopenharmony_ci    $code.=<<___ if ($rc4 && $j==15);
313e1051a39Sopenharmony_ci	psllq	\$8,%xmm1
314e1051a39Sopenharmony_ci	pxor	%xmm0,%xmm3
315e1051a39Sopenharmony_ci	pxor	%xmm1,%xmm3
316e1051a39Sopenharmony_ci___
317e1051a39Sopenharmony_ci}
318e1051a39Sopenharmony_cisub R2 {
319e1051a39Sopenharmony_ci  my ($i,$a,$b,$c,$d)=@_;
320e1051a39Sopenharmony_ci  my @rot2=(4,11,16,23);
321e1051a39Sopenharmony_ci  my $j=$i%16;
322e1051a39Sopenharmony_ci  my $k=$i%$MOD;
323e1051a39Sopenharmony_ci  my $xmm="%xmm".($j&1);
324e1051a39Sopenharmony_ci    $code.="	movdqu	32($in0),%xmm4\n"	if ($rc4 && $j==15);
325e1051a39Sopenharmony_ci    $code.="	add	\$$MOD,$XX[0]#b\n"	if ($rc4 && $j==15 && $k==$MOD-1);
326e1051a39Sopenharmony_ci    $code.="	pxor	$xmm,$xmm\n"		if ($rc4 && $j<=1);
327e1051a39Sopenharmony_ci    $code.=<<___;
328e1051a39Sopenharmony_ci#rc4#	movl	($dat,$YY,4),$TY#d
329e1051a39Sopenharmony_ci#md5#	xor	$c,$tmp
330e1051a39Sopenharmony_ci#rc4#	movl	$TX[0]#d,($dat,$YY,4)
331e1051a39Sopenharmony_ci#md5#	xor	$b,$tmp
332e1051a39Sopenharmony_ci#md5#	add	4*`((5+3*$j)%16)`($inp),$a
333e1051a39Sopenharmony_ci#rc4#	add	$TY#b,$TX[0]#b
334e1051a39Sopenharmony_ci#rc4#	movl	`4*(($k+1)%$MOD)`(`$k==$MOD-1?"$dat,$XX[0],4":"$XX[1]"`),$TX[1]#d
335e1051a39Sopenharmony_ci#md5#	add	\$$K[$i],$a
336e1051a39Sopenharmony_ci#rc4#	movz	$TX[0]#b,$TX[0]#d
337e1051a39Sopenharmony_ci#md5#	add	$tmp,$a
338e1051a39Sopenharmony_ci#rc4#	movl	$TY#d,4*$k($XX[1])
339e1051a39Sopenharmony_ci#rc4#	add	$TX[1]#b,$YY#b
340e1051a39Sopenharmony_ci#md5#	rol	\$$rot2[$j%4],$a
341e1051a39Sopenharmony_ci#md5#	mov	`$j==15?"\\\$-1":"$c"`,$tmp	# forward reference
342e1051a39Sopenharmony_ci#rc4#	pinsrw	\$`($j>>1)&7`,($dat,$TX[0],4),$xmm\n
343e1051a39Sopenharmony_ci#md5#	add	$b,$a
344e1051a39Sopenharmony_ci___
345e1051a39Sopenharmony_ci    $code.=<<___ if ($rc4 && $j==15 && $k==$MOD-1);
346e1051a39Sopenharmony_ci	mov	$YY,$XX[1]
347e1051a39Sopenharmony_ci	xor	$YY,$YY				# keyword to partial register
348e1051a39Sopenharmony_ci	mov	$XX[1]#b,$YY#b
349e1051a39Sopenharmony_ci	lea	($dat,$XX[0],4),$XX[1]
350e1051a39Sopenharmony_ci___
351e1051a39Sopenharmony_ci    $code.=<<___ if ($rc4 && $j==15);
352e1051a39Sopenharmony_ci	psllq	\$8,%xmm1
353e1051a39Sopenharmony_ci	pxor	%xmm0,%xmm4
354e1051a39Sopenharmony_ci	pxor	%xmm1,%xmm4
355e1051a39Sopenharmony_ci___
356e1051a39Sopenharmony_ci}
357e1051a39Sopenharmony_cisub R3 {
358e1051a39Sopenharmony_ci  my ($i,$a,$b,$c,$d)=@_;
359e1051a39Sopenharmony_ci  my @rot3=(6,10,15,21);
360e1051a39Sopenharmony_ci  my $j=$i%16;
361e1051a39Sopenharmony_ci  my $k=$i%$MOD;
362e1051a39Sopenharmony_ci  my $xmm="%xmm".($j&1);
363e1051a39Sopenharmony_ci    $code.="	movdqu	48($in0),%xmm5\n"	if ($rc4 && $j==15);
364e1051a39Sopenharmony_ci    $code.="	add	\$$MOD,$XX[0]#b\n"	if ($rc4 && $j==15 && $k==$MOD-1);
365e1051a39Sopenharmony_ci    $code.="	pxor	$xmm,$xmm\n"		if ($rc4 && $j<=1);
366e1051a39Sopenharmony_ci    $code.=<<___;
367e1051a39Sopenharmony_ci#rc4#	movl	($dat,$YY,4),$TY#d
368e1051a39Sopenharmony_ci#md5#	xor	$d,$tmp
369e1051a39Sopenharmony_ci#rc4#	movl	$TX[0]#d,($dat,$YY,4)
370e1051a39Sopenharmony_ci#md5#	or	$b,$tmp
371e1051a39Sopenharmony_ci#md5#	add	4*`((7*$j)%16)`($inp),$a
372e1051a39Sopenharmony_ci#rc4#	add	$TY#b,$TX[0]#b
373e1051a39Sopenharmony_ci#rc4#	movl	`4*(($k+1)%$MOD)`(`$k==$MOD-1?"$dat,$XX[0],4":"$XX[1]"`),$TX[1]#d
374e1051a39Sopenharmony_ci#md5#	add	\$$K[$i],$a
375e1051a39Sopenharmony_ci#rc4#	movz	$TX[0]#b,$TX[0]#d
376e1051a39Sopenharmony_ci#md5#	xor	$c,$tmp
377e1051a39Sopenharmony_ci#rc4#	movl	$TY#d,4*$k($XX[1])
378e1051a39Sopenharmony_ci#md5#	add	$tmp,$a
379e1051a39Sopenharmony_ci#rc4#	add	$TX[1]#b,$YY#b
380e1051a39Sopenharmony_ci#md5#	rol	\$$rot3[$j%4],$a
381e1051a39Sopenharmony_ci#md5#	mov	\$-1,$tmp			# forward reference
382e1051a39Sopenharmony_ci#rc4#	pinsrw	\$`($j>>1)&7`,($dat,$TX[0],4),$xmm\n
383e1051a39Sopenharmony_ci#md5#	add	$b,$a
384e1051a39Sopenharmony_ci___
385e1051a39Sopenharmony_ci    $code.=<<___ if ($rc4 && $j==15);
386e1051a39Sopenharmony_ci	mov	$XX[0],$XX[1]
387e1051a39Sopenharmony_ci	xor	$XX[0],$XX[0]			# keyword to partial register
388e1051a39Sopenharmony_ci	mov	$XX[1]#b,$XX[0]#b
389e1051a39Sopenharmony_ci	mov	$YY,$XX[1]
390e1051a39Sopenharmony_ci	xor	$YY,$YY				# keyword to partial register
391e1051a39Sopenharmony_ci	mov	$XX[1]#b,$YY#b
392e1051a39Sopenharmony_ci	lea	($dat,$XX[0],4),$XX[1]
393e1051a39Sopenharmony_ci	psllq	\$8,%xmm1
394e1051a39Sopenharmony_ci	pxor	%xmm0,%xmm5
395e1051a39Sopenharmony_ci	pxor	%xmm1,%xmm5
396e1051a39Sopenharmony_ci___
397e1051a39Sopenharmony_ci}
398e1051a39Sopenharmony_ci
399e1051a39Sopenharmony_cimy $i=0;
400e1051a39Sopenharmony_cifor(;$i<16;$i++) { R0($i,@V); unshift(@V,pop(@V)); push(@TX,shift(@TX)); }
401e1051a39Sopenharmony_cifor(;$i<32;$i++) { R1($i,@V); unshift(@V,pop(@V)); push(@TX,shift(@TX)); }
402e1051a39Sopenharmony_cifor(;$i<48;$i++) { R2($i,@V); unshift(@V,pop(@V)); push(@TX,shift(@TX)); }
403e1051a39Sopenharmony_cifor(;$i<64;$i++) { R3($i,@V); unshift(@V,pop(@V)); push(@TX,shift(@TX)); }
404e1051a39Sopenharmony_ci
405e1051a39Sopenharmony_ci$code.=<<___;
406e1051a39Sopenharmony_ci#md5#	add	0*4(%rsp),$V[0]		# accumulate hash value
407e1051a39Sopenharmony_ci#md5#	add	1*4(%rsp),$V[1]
408e1051a39Sopenharmony_ci#md5#	add	2*4(%rsp),$V[2]
409e1051a39Sopenharmony_ci#md5#	add	3*4(%rsp),$V[3]
410e1051a39Sopenharmony_ci
411e1051a39Sopenharmony_ci#rc4#	movdqu	%xmm2,($out,$in0)	# write RC4 output
412e1051a39Sopenharmony_ci#rc4#	movdqu	%xmm3,16($out,$in0)
413e1051a39Sopenharmony_ci#rc4#	movdqu	%xmm4,32($out,$in0)
414e1051a39Sopenharmony_ci#rc4#	movdqu	%xmm5,48($out,$in0)
415e1051a39Sopenharmony_ci#md5#	lea	64($inp),$inp
416e1051a39Sopenharmony_ci#rc4#	lea	64($in0),$in0
417e1051a39Sopenharmony_ci	cmp	16(%rsp),$inp		# are we done?
418e1051a39Sopenharmony_ci	jb	.Loop
419e1051a39Sopenharmony_ci
420e1051a39Sopenharmony_ci#md5#	mov	24(%rsp),$len		# restore pointer to MD5_CTX
421e1051a39Sopenharmony_ci#rc4#	sub	$TX[0]#b,$YY#b		# correct $YY
422e1051a39Sopenharmony_ci#md5#	mov	$V[0],0*4($len)		# write MD5_CTX
423e1051a39Sopenharmony_ci#md5#	mov	$V[1],1*4($len)
424e1051a39Sopenharmony_ci#md5#	mov	$V[2],2*4($len)
425e1051a39Sopenharmony_ci#md5#	mov	$V[3],3*4($len)
426e1051a39Sopenharmony_ci___
427e1051a39Sopenharmony_ci$code.=<<___ if ($rc4 && (!$md5 || $D));
428e1051a39Sopenharmony_ci	mov	32(%rsp),$len		# restore original $len
429e1051a39Sopenharmony_ci	and	\$63,$len		# remaining bytes
430e1051a39Sopenharmony_ci	jnz	.Loop1
431e1051a39Sopenharmony_ci	jmp	.Ldone
432e1051a39Sopenharmony_ci
433e1051a39Sopenharmony_ci.align	16
434e1051a39Sopenharmony_ci.Loop1:
435e1051a39Sopenharmony_ci	add	$TX[0]#b,$YY#b
436e1051a39Sopenharmony_ci	movl	($dat,$YY,4),$TY#d
437e1051a39Sopenharmony_ci	movl	$TX[0]#d,($dat,$YY,4)
438e1051a39Sopenharmony_ci	movl	$TY#d,($dat,$XX[0],4)
439e1051a39Sopenharmony_ci	add	$TY#b,$TX[0]#b
440e1051a39Sopenharmony_ci	inc	$XX[0]#b
441e1051a39Sopenharmony_ci	movl	($dat,$TX[0],4),$TY#d
442e1051a39Sopenharmony_ci	movl	($dat,$XX[0],4),$TX[0]#d
443e1051a39Sopenharmony_ci	xorb	($in0),$TY#b
444e1051a39Sopenharmony_ci	movb	$TY#b,($out,$in0)
445e1051a39Sopenharmony_ci	lea	1($in0),$in0
446e1051a39Sopenharmony_ci	dec	$len
447e1051a39Sopenharmony_ci	jnz	.Loop1
448e1051a39Sopenharmony_ci
449e1051a39Sopenharmony_ci.Ldone:
450e1051a39Sopenharmony_ci___
451e1051a39Sopenharmony_ci$code.=<<___;
452e1051a39Sopenharmony_ci#rc4#	sub	\$1,$XX[0]#b
453e1051a39Sopenharmony_ci#rc4#	movl	$XX[0]#d,-8($dat)
454e1051a39Sopenharmony_ci#rc4#	movl	$YY#d,-4($dat)
455e1051a39Sopenharmony_ci
456e1051a39Sopenharmony_ci	mov	40(%rsp),%r15
457e1051a39Sopenharmony_ci.cfi_restore	%r15
458e1051a39Sopenharmony_ci	mov	48(%rsp),%r14
459e1051a39Sopenharmony_ci.cfi_restore	%r14
460e1051a39Sopenharmony_ci	mov	56(%rsp),%r13
461e1051a39Sopenharmony_ci.cfi_restore	%r13
462e1051a39Sopenharmony_ci	mov	64(%rsp),%r12
463e1051a39Sopenharmony_ci.cfi_restore	%r12
464e1051a39Sopenharmony_ci	mov	72(%rsp),%rbp
465e1051a39Sopenharmony_ci.cfi_restore	%rbp
466e1051a39Sopenharmony_ci	mov	80(%rsp),%rbx
467e1051a39Sopenharmony_ci.cfi_restore	%rbx
468e1051a39Sopenharmony_ci	lea	88(%rsp),%rsp
469e1051a39Sopenharmony_ci.cfi_adjust_cfa_offset	-88
470e1051a39Sopenharmony_ci.Lepilogue:
471e1051a39Sopenharmony_ci.Labort:
472e1051a39Sopenharmony_ci	ret
473e1051a39Sopenharmony_ci.cfi_endproc
474e1051a39Sopenharmony_ci.size $func,.-$func
475e1051a39Sopenharmony_ci___
476e1051a39Sopenharmony_ci
477e1051a39Sopenharmony_ciif ($rc4 && $D) {	# sole purpose of this section is to provide
478e1051a39Sopenharmony_ci			# option to use the generated module as drop-in
479e1051a39Sopenharmony_ci			# replacement for rc4-x86_64.pl for debugging
480e1051a39Sopenharmony_ci			# and testing purposes...
481e1051a39Sopenharmony_cimy ($idx,$ido)=("%r8","%r9");
482e1051a39Sopenharmony_cimy ($dat,$len,$inp)=("%rdi","%rsi","%rdx");
483e1051a39Sopenharmony_ci
484e1051a39Sopenharmony_ci$code.=<<___;
485e1051a39Sopenharmony_ci.globl	RC4_set_key
486e1051a39Sopenharmony_ci.type	RC4_set_key,\@function,3
487e1051a39Sopenharmony_ci.align	16
488e1051a39Sopenharmony_ciRC4_set_key:
489e1051a39Sopenharmony_ci.cfi_startproc
490e1051a39Sopenharmony_ci	lea	8($dat),$dat
491e1051a39Sopenharmony_ci	lea	($inp,$len),$inp
492e1051a39Sopenharmony_ci	neg	$len
493e1051a39Sopenharmony_ci	mov	$len,%rcx
494e1051a39Sopenharmony_ci	xor	%eax,%eax
495e1051a39Sopenharmony_ci	xor	$ido,$ido
496e1051a39Sopenharmony_ci	xor	%r10,%r10
497e1051a39Sopenharmony_ci	xor	%r11,%r11
498e1051a39Sopenharmony_ci	jmp	.Lw1stloop
499e1051a39Sopenharmony_ci
500e1051a39Sopenharmony_ci.align	16
501e1051a39Sopenharmony_ci.Lw1stloop:
502e1051a39Sopenharmony_ci	mov	%eax,($dat,%rax,4)
503e1051a39Sopenharmony_ci	add	\$1,%al
504e1051a39Sopenharmony_ci	jnc	.Lw1stloop
505e1051a39Sopenharmony_ci
506e1051a39Sopenharmony_ci	xor	$ido,$ido
507e1051a39Sopenharmony_ci	xor	$idx,$idx
508e1051a39Sopenharmony_ci.align	16
509e1051a39Sopenharmony_ci.Lw2ndloop:
510e1051a39Sopenharmony_ci	mov	($dat,$ido,4),%r10d
511e1051a39Sopenharmony_ci	add	($inp,$len,1),$idx#b
512e1051a39Sopenharmony_ci	add	%r10b,$idx#b
513e1051a39Sopenharmony_ci	add	\$1,$len
514e1051a39Sopenharmony_ci	mov	($dat,$idx,4),%r11d
515e1051a39Sopenharmony_ci	cmovz	%rcx,$len
516e1051a39Sopenharmony_ci	mov	%r10d,($dat,$idx,4)
517e1051a39Sopenharmony_ci	mov	%r11d,($dat,$ido,4)
518e1051a39Sopenharmony_ci	add	\$1,$ido#b
519e1051a39Sopenharmony_ci	jnc	.Lw2ndloop
520e1051a39Sopenharmony_ci
521e1051a39Sopenharmony_ci	xor	%eax,%eax
522e1051a39Sopenharmony_ci	mov	%eax,-8($dat)
523e1051a39Sopenharmony_ci	mov	%eax,-4($dat)
524e1051a39Sopenharmony_ci	ret
525e1051a39Sopenharmony_ci.cfi_endproc
526e1051a39Sopenharmony_ci.size	RC4_set_key,.-RC4_set_key
527e1051a39Sopenharmony_ci
528e1051a39Sopenharmony_ci.globl	RC4_options
529e1051a39Sopenharmony_ci.type	RC4_options,\@abi-omnipotent
530e1051a39Sopenharmony_ci.align	16
531e1051a39Sopenharmony_ciRC4_options:
532e1051a39Sopenharmony_ci	lea	.Lopts(%rip),%rax
533e1051a39Sopenharmony_ci	ret
534e1051a39Sopenharmony_ci.align	64
535e1051a39Sopenharmony_ci.Lopts:
536e1051a39Sopenharmony_ci.asciz	"rc4(64x,int)"
537e1051a39Sopenharmony_ci.align	64
538e1051a39Sopenharmony_ci.size	RC4_options,.-RC4_options
539e1051a39Sopenharmony_ci___
540e1051a39Sopenharmony_ci}
541e1051a39Sopenharmony_ci# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
542e1051a39Sopenharmony_ci#		CONTEXT *context,DISPATCHER_CONTEXT *disp)
543e1051a39Sopenharmony_ciif ($win64) {
544e1051a39Sopenharmony_cimy $rec="%rcx";
545e1051a39Sopenharmony_cimy $frame="%rdx";
546e1051a39Sopenharmony_cimy $context="%r8";
547e1051a39Sopenharmony_cimy $disp="%r9";
548e1051a39Sopenharmony_ci
549e1051a39Sopenharmony_ci$code.=<<___;
550e1051a39Sopenharmony_ci.extern	__imp_RtlVirtualUnwind
551e1051a39Sopenharmony_ci.type	se_handler,\@abi-omnipotent
552e1051a39Sopenharmony_ci.align	16
553e1051a39Sopenharmony_cise_handler:
554e1051a39Sopenharmony_ci	push	%rsi
555e1051a39Sopenharmony_ci	push	%rdi
556e1051a39Sopenharmony_ci	push	%rbx
557e1051a39Sopenharmony_ci	push	%rbp
558e1051a39Sopenharmony_ci	push	%r12
559e1051a39Sopenharmony_ci	push	%r13
560e1051a39Sopenharmony_ci	push	%r14
561e1051a39Sopenharmony_ci	push	%r15
562e1051a39Sopenharmony_ci	pushfq
563e1051a39Sopenharmony_ci	sub	\$64,%rsp
564e1051a39Sopenharmony_ci
565e1051a39Sopenharmony_ci	mov	120($context),%rax	# pull context->Rax
566e1051a39Sopenharmony_ci	mov	248($context),%rbx	# pull context->Rip
567e1051a39Sopenharmony_ci
568e1051a39Sopenharmony_ci	lea	.Lbody(%rip),%r10
569e1051a39Sopenharmony_ci	cmp	%r10,%rbx		# context->Rip<.Lbody
570e1051a39Sopenharmony_ci	jb	.Lin_prologue
571e1051a39Sopenharmony_ci
572e1051a39Sopenharmony_ci	mov	152($context),%rax	# pull context->Rsp
573e1051a39Sopenharmony_ci
574e1051a39Sopenharmony_ci	lea	.Lepilogue(%rip),%r10
575e1051a39Sopenharmony_ci	cmp	%r10,%rbx		# context->Rip>=.Lepilogue
576e1051a39Sopenharmony_ci	jae	.Lin_prologue
577e1051a39Sopenharmony_ci
578e1051a39Sopenharmony_ci	mov	40(%rax),%r15
579e1051a39Sopenharmony_ci	mov	48(%rax),%r14
580e1051a39Sopenharmony_ci	mov	56(%rax),%r13
581e1051a39Sopenharmony_ci	mov	64(%rax),%r12
582e1051a39Sopenharmony_ci	mov	72(%rax),%rbp
583e1051a39Sopenharmony_ci	mov	80(%rax),%rbx
584e1051a39Sopenharmony_ci	lea	88(%rax),%rax
585e1051a39Sopenharmony_ci
586e1051a39Sopenharmony_ci	mov	%rbx,144($context)	# restore context->Rbx
587e1051a39Sopenharmony_ci	mov	%rbp,160($context)	# restore context->Rbp
588e1051a39Sopenharmony_ci	mov	%r12,216($context)	# restore context->R12
589e1051a39Sopenharmony_ci	mov	%r13,224($context)	# restore context->R12
590e1051a39Sopenharmony_ci	mov	%r14,232($context)	# restore context->R14
591e1051a39Sopenharmony_ci	mov	%r15,240($context)	# restore context->R15
592e1051a39Sopenharmony_ci
593e1051a39Sopenharmony_ci.Lin_prologue:
594e1051a39Sopenharmony_ci	mov	8(%rax),%rdi
595e1051a39Sopenharmony_ci	mov	16(%rax),%rsi
596e1051a39Sopenharmony_ci	mov	%rax,152($context)	# restore context->Rsp
597e1051a39Sopenharmony_ci	mov	%rsi,168($context)	# restore context->Rsi
598e1051a39Sopenharmony_ci	mov	%rdi,176($context)	# restore context->Rdi
599e1051a39Sopenharmony_ci
600e1051a39Sopenharmony_ci	mov	40($disp),%rdi		# disp->ContextRecord
601e1051a39Sopenharmony_ci	mov	$context,%rsi		# context
602e1051a39Sopenharmony_ci	mov	\$154,%ecx		# sizeof(CONTEXT)
603e1051a39Sopenharmony_ci	.long	0xa548f3fc		# cld; rep movsq
604e1051a39Sopenharmony_ci
605e1051a39Sopenharmony_ci	mov	$disp,%rsi
606e1051a39Sopenharmony_ci	xor	%rcx,%rcx		# arg1, UNW_FLAG_NHANDLER
607e1051a39Sopenharmony_ci	mov	8(%rsi),%rdx		# arg2, disp->ImageBase
608e1051a39Sopenharmony_ci	mov	0(%rsi),%r8		# arg3, disp->ControlPc
609e1051a39Sopenharmony_ci	mov	16(%rsi),%r9		# arg4, disp->FunctionEntry
610e1051a39Sopenharmony_ci	mov	40(%rsi),%r10		# disp->ContextRecord
611e1051a39Sopenharmony_ci	lea	56(%rsi),%r11		# &disp->HandlerData
612e1051a39Sopenharmony_ci	lea	24(%rsi),%r12		# &disp->EstablisherFrame
613e1051a39Sopenharmony_ci	mov	%r10,32(%rsp)		# arg5
614e1051a39Sopenharmony_ci	mov	%r11,40(%rsp)		# arg6
615e1051a39Sopenharmony_ci	mov	%r12,48(%rsp)		# arg7
616e1051a39Sopenharmony_ci	mov	%rcx,56(%rsp)		# arg8, (NULL)
617e1051a39Sopenharmony_ci	call	*__imp_RtlVirtualUnwind(%rip)
618e1051a39Sopenharmony_ci
619e1051a39Sopenharmony_ci	mov	\$1,%eax		# ExceptionContinueSearch
620e1051a39Sopenharmony_ci	add	\$64,%rsp
621e1051a39Sopenharmony_ci	popfq
622e1051a39Sopenharmony_ci	pop	%r15
623e1051a39Sopenharmony_ci	pop	%r14
624e1051a39Sopenharmony_ci	pop	%r13
625e1051a39Sopenharmony_ci	pop	%r12
626e1051a39Sopenharmony_ci	pop	%rbp
627e1051a39Sopenharmony_ci	pop	%rbx
628e1051a39Sopenharmony_ci	pop	%rdi
629e1051a39Sopenharmony_ci	pop	%rsi
630e1051a39Sopenharmony_ci	ret
631e1051a39Sopenharmony_ci.size	se_handler,.-se_handler
632e1051a39Sopenharmony_ci
633e1051a39Sopenharmony_ci.section	.pdata
634e1051a39Sopenharmony_ci.align	4
635e1051a39Sopenharmony_ci	.rva	.LSEH_begin_$func
636e1051a39Sopenharmony_ci	.rva	.LSEH_end_$func
637e1051a39Sopenharmony_ci	.rva	.LSEH_info_$func
638e1051a39Sopenharmony_ci
639e1051a39Sopenharmony_ci.section	.xdata
640e1051a39Sopenharmony_ci.align	8
641e1051a39Sopenharmony_ci.LSEH_info_$func:
642e1051a39Sopenharmony_ci	.byte	9,0,0,0
643e1051a39Sopenharmony_ci	.rva	se_handler
644e1051a39Sopenharmony_ci___
645e1051a39Sopenharmony_ci}
646e1051a39Sopenharmony_ci
647e1051a39Sopenharmony_cisub reg_part {
648e1051a39Sopenharmony_cimy ($reg,$conv)=@_;
649e1051a39Sopenharmony_ci    if ($reg =~ /%r[0-9]+/)     { $reg .= $conv; }
650e1051a39Sopenharmony_ci    elsif ($conv eq "b")        { $reg =~ s/%[er]([^x]+)x?/%$1l/;       }
651e1051a39Sopenharmony_ci    elsif ($conv eq "w")        { $reg =~ s/%[er](.+)/%$1/;             }
652e1051a39Sopenharmony_ci    elsif ($conv eq "d")        { $reg =~ s/%[er](.+)/%e$1/;            }
653e1051a39Sopenharmony_ci    return $reg;
654e1051a39Sopenharmony_ci}
655e1051a39Sopenharmony_ci
656e1051a39Sopenharmony_ci$code =~ s/(%[a-z0-9]+)#([bwd])/reg_part($1,$2)/gem;
657e1051a39Sopenharmony_ci$code =~ s/\`([^\`]*)\`/eval $1/gem;
658e1051a39Sopenharmony_ci$code =~ s/pinsrw\s+\$0,/movd	/gm;
659e1051a39Sopenharmony_ci
660e1051a39Sopenharmony_ci$code =~ s/#md5#//gm	if ($md5);
661e1051a39Sopenharmony_ci$code =~ s/#rc4#//gm	if ($rc4);
662e1051a39Sopenharmony_ci
663e1051a39Sopenharmony_ciprint $code;
664e1051a39Sopenharmony_ci
665e1051a39Sopenharmony_ciclose STDOUT or die "error closing STDOUT: $!";
666