1e1051a39Sopenharmony_ci#! /usr/bin/env perl
2e1051a39Sopenharmony_ci# Copyright 1995-2020 The OpenSSL Project Authors. All Rights Reserved.
3e1051a39Sopenharmony_ci#
4e1051a39Sopenharmony_ci# Licensed under the Apache License 2.0 (the "License").  You may not use
5e1051a39Sopenharmony_ci# this file except in compliance with the License.  You can obtain a copy
6e1051a39Sopenharmony_ci# in the file LICENSE in the source distribution or at
7e1051a39Sopenharmony_ci# https://www.openssl.org/source/license.html
8e1051a39Sopenharmony_ci
9e1051a39Sopenharmony_ci
10e1051a39Sopenharmony_ci# This flag makes the inner loop one cycle longer, but generates
11e1051a39Sopenharmony_ci# code that runs %30 faster on the pentium pro/II, 44% faster
12e1051a39Sopenharmony_ci# of PIII, while only %7 slower on the pentium.
13e1051a39Sopenharmony_ci# By default, this flag is on.
14e1051a39Sopenharmony_ci$ppro=1;
15e1051a39Sopenharmony_ci
16e1051a39Sopenharmony_ci$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
17e1051a39Sopenharmony_cipush(@INC,"${dir}","${dir}../../perlasm");
18e1051a39Sopenharmony_cirequire "x86asm.pl";
19e1051a39Sopenharmony_cirequire "cbc.pl";
20e1051a39Sopenharmony_ci
21e1051a39Sopenharmony_ci$output=pop and open STDOUT,">$output";
22e1051a39Sopenharmony_ci
23e1051a39Sopenharmony_ci&asm_init($ARGV[0],$ARGV[$#ARGV] eq "386");
24e1051a39Sopenharmony_ci
25e1051a39Sopenharmony_ci$CAST_ROUNDS=16;
26e1051a39Sopenharmony_ci$L="edi";
27e1051a39Sopenharmony_ci$R="esi";
28e1051a39Sopenharmony_ci$K="ebp";
29e1051a39Sopenharmony_ci$tmp1="ecx";
30e1051a39Sopenharmony_ci$tmp2="ebx";
31e1051a39Sopenharmony_ci$tmp3="eax";
32e1051a39Sopenharmony_ci$tmp4="edx";
33e1051a39Sopenharmony_ci$S1="CAST_S_table0";
34e1051a39Sopenharmony_ci$S2="CAST_S_table1";
35e1051a39Sopenharmony_ci$S3="CAST_S_table2";
36e1051a39Sopenharmony_ci$S4="CAST_S_table3";
37e1051a39Sopenharmony_ci
38e1051a39Sopenharmony_ci@F1=("add","xor","sub");
39e1051a39Sopenharmony_ci@F2=("xor","sub","add");
40e1051a39Sopenharmony_ci@F3=("sub","add","xor");
41e1051a39Sopenharmony_ci
42e1051a39Sopenharmony_ci&CAST_encrypt("CAST_encrypt",1);
43e1051a39Sopenharmony_ci&CAST_encrypt("CAST_decrypt",0);
44e1051a39Sopenharmony_ci&cbc("CAST_cbc_encrypt","CAST_encrypt","CAST_decrypt",1,4,5,3,-1,-1);
45e1051a39Sopenharmony_ci
46e1051a39Sopenharmony_ci&asm_finish();
47e1051a39Sopenharmony_ci
48e1051a39Sopenharmony_ciclose STDOUT or die "error closing STDOUT: $!";
49e1051a39Sopenharmony_ci
50e1051a39Sopenharmony_cisub CAST_encrypt {
51e1051a39Sopenharmony_ci    local($name,$enc)=@_;
52e1051a39Sopenharmony_ci
53e1051a39Sopenharmony_ci    local($win_ex)=<<"EOF";
54e1051a39Sopenharmony_ciEXTERN	_CAST_S_table0:DWORD
55e1051a39Sopenharmony_ciEXTERN	_CAST_S_table1:DWORD
56e1051a39Sopenharmony_ciEXTERN	_CAST_S_table2:DWORD
57e1051a39Sopenharmony_ciEXTERN	_CAST_S_table3:DWORD
58e1051a39Sopenharmony_ciEOF
59e1051a39Sopenharmony_ci    &main::external_label(
60e1051a39Sopenharmony_ci			  "CAST_S_table0",
61e1051a39Sopenharmony_ci			  "CAST_S_table1",
62e1051a39Sopenharmony_ci			  "CAST_S_table2",
63e1051a39Sopenharmony_ci			  "CAST_S_table3",
64e1051a39Sopenharmony_ci			  );
65e1051a39Sopenharmony_ci
66e1051a39Sopenharmony_ci    &function_begin_B($name,$win_ex);
67e1051a39Sopenharmony_ci
68e1051a39Sopenharmony_ci    &comment("");
69e1051a39Sopenharmony_ci
70e1051a39Sopenharmony_ci    &push("ebp");
71e1051a39Sopenharmony_ci    &push("ebx");
72e1051a39Sopenharmony_ci    &mov($tmp2,&wparam(0));
73e1051a39Sopenharmony_ci    &mov($K,&wparam(1));
74e1051a39Sopenharmony_ci    &push("esi");
75e1051a39Sopenharmony_ci    &push("edi");
76e1051a39Sopenharmony_ci
77e1051a39Sopenharmony_ci    &comment("Load the 2 words");
78e1051a39Sopenharmony_ci    &mov($L,&DWP(0,$tmp2,"",0));
79e1051a39Sopenharmony_ci    &mov($R,&DWP(4,$tmp2,"",0));
80e1051a39Sopenharmony_ci
81e1051a39Sopenharmony_ci    &comment('Get short key flag');
82e1051a39Sopenharmony_ci    &mov($tmp3,&DWP(128,$K,"",0));
83e1051a39Sopenharmony_ci    if($enc) {
84e1051a39Sopenharmony_ci	&push($tmp3);
85e1051a39Sopenharmony_ci    } else {
86e1051a39Sopenharmony_ci	&or($tmp3,$tmp3);
87e1051a39Sopenharmony_ci	&jnz(&label('cast_dec_skip'));
88e1051a39Sopenharmony_ci    }
89e1051a39Sopenharmony_ci
90e1051a39Sopenharmony_ci    &xor($tmp3,	$tmp3);
91e1051a39Sopenharmony_ci
92e1051a39Sopenharmony_ci    # encrypting part
93e1051a39Sopenharmony_ci
94e1051a39Sopenharmony_ci    if ($enc) {
95e1051a39Sopenharmony_ci	&E_CAST( 0,$S,$L,$R,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4);
96e1051a39Sopenharmony_ci	&E_CAST( 1,$S,$R,$L,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4);
97e1051a39Sopenharmony_ci	&E_CAST( 2,$S,$L,$R,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4);
98e1051a39Sopenharmony_ci	&E_CAST( 3,$S,$R,$L,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4);
99e1051a39Sopenharmony_ci	&E_CAST( 4,$S,$L,$R,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4);
100e1051a39Sopenharmony_ci	&E_CAST( 5,$S,$R,$L,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4);
101e1051a39Sopenharmony_ci	&E_CAST( 6,$S,$L,$R,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4);
102e1051a39Sopenharmony_ci	&E_CAST( 7,$S,$R,$L,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4);
103e1051a39Sopenharmony_ci	&E_CAST( 8,$S,$L,$R,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4);
104e1051a39Sopenharmony_ci	&E_CAST( 9,$S,$R,$L,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4);
105e1051a39Sopenharmony_ci	&E_CAST(10,$S,$L,$R,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4);
106e1051a39Sopenharmony_ci	&E_CAST(11,$S,$R,$L,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4);
107e1051a39Sopenharmony_ci	&comment('test short key flag');
108e1051a39Sopenharmony_ci	&pop($tmp4);
109e1051a39Sopenharmony_ci	&or($tmp4,$tmp4);
110e1051a39Sopenharmony_ci	&jnz(&label('cast_enc_done'));
111e1051a39Sopenharmony_ci	&E_CAST(12,$S,$L,$R,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4);
112e1051a39Sopenharmony_ci	&E_CAST(13,$S,$R,$L,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4);
113e1051a39Sopenharmony_ci	&E_CAST(14,$S,$L,$R,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4);
114e1051a39Sopenharmony_ci	&E_CAST(15,$S,$R,$L,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4);
115e1051a39Sopenharmony_ci    } else {
116e1051a39Sopenharmony_ci	&E_CAST(15,$S,$L,$R,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4);
117e1051a39Sopenharmony_ci	&E_CAST(14,$S,$R,$L,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4);
118e1051a39Sopenharmony_ci	&E_CAST(13,$S,$L,$R,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4);
119e1051a39Sopenharmony_ci	&E_CAST(12,$S,$R,$L,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4);
120e1051a39Sopenharmony_ci	&set_label('cast_dec_skip');
121e1051a39Sopenharmony_ci	&E_CAST(11,$S,$L,$R,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4);
122e1051a39Sopenharmony_ci	&E_CAST(10,$S,$R,$L,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4);
123e1051a39Sopenharmony_ci	&E_CAST( 9,$S,$L,$R,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4);
124e1051a39Sopenharmony_ci	&E_CAST( 8,$S,$R,$L,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4);
125e1051a39Sopenharmony_ci	&E_CAST( 7,$S,$L,$R,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4);
126e1051a39Sopenharmony_ci	&E_CAST( 6,$S,$R,$L,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4);
127e1051a39Sopenharmony_ci	&E_CAST( 5,$S,$L,$R,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4);
128e1051a39Sopenharmony_ci	&E_CAST( 4,$S,$R,$L,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4);
129e1051a39Sopenharmony_ci	&E_CAST( 3,$S,$L,$R,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4);
130e1051a39Sopenharmony_ci	&E_CAST( 2,$S,$R,$L,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4);
131e1051a39Sopenharmony_ci	&E_CAST( 1,$S,$L,$R,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4);
132e1051a39Sopenharmony_ci	&E_CAST( 0,$S,$R,$L,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4);
133e1051a39Sopenharmony_ci    }
134e1051a39Sopenharmony_ci
135e1051a39Sopenharmony_ci    &set_label('cast_enc_done') if $enc;
136e1051a39Sopenharmony_ci# Why the nop? - Ben 17/1/99
137e1051a39Sopenharmony_ci    &nop();
138e1051a39Sopenharmony_ci    &mov($tmp3,&wparam(0));
139e1051a39Sopenharmony_ci    &mov(&DWP(4,$tmp3,"",0),$L);
140e1051a39Sopenharmony_ci    &mov(&DWP(0,$tmp3,"",0),$R);
141e1051a39Sopenharmony_ci    &function_end($name);
142e1051a39Sopenharmony_ci}
143e1051a39Sopenharmony_ci
144e1051a39Sopenharmony_cisub E_CAST {
145e1051a39Sopenharmony_ci    local($i,$S,$L,$R,$K,$OP1,$OP2,$OP3,$tmp1,$tmp2,$tmp3,$tmp4)=@_;
146e1051a39Sopenharmony_ci    # Ri needs to have 16 pre added.
147e1051a39Sopenharmony_ci
148e1051a39Sopenharmony_ci    &comment("round $i");
149e1051a39Sopenharmony_ci    &mov(	$tmp4,		&DWP($i*8,$K,"",1));
150e1051a39Sopenharmony_ci
151e1051a39Sopenharmony_ci    &mov(	$tmp1,		&DWP($i*8+4,$K,"",1));
152e1051a39Sopenharmony_ci    &$OP1(	$tmp4,		$R);
153e1051a39Sopenharmony_ci
154e1051a39Sopenharmony_ci    &rotl(	$tmp4,		&LB($tmp1));
155e1051a39Sopenharmony_ci
156e1051a39Sopenharmony_ci    if ($ppro) {
157e1051a39Sopenharmony_ci	&xor(	$tmp1,		$tmp1);
158e1051a39Sopenharmony_ci	&mov(	$tmp2,		0xff);
159e1051a39Sopenharmony_ci
160e1051a39Sopenharmony_ci	&movb(	&LB($tmp1),	&HB($tmp4));	# A
161e1051a39Sopenharmony_ci	&and(	$tmp2,		$tmp4);
162e1051a39Sopenharmony_ci
163e1051a39Sopenharmony_ci	&shr(	$tmp4,		16); 		#
164e1051a39Sopenharmony_ci	&xor(	$tmp3,		$tmp3);
165e1051a39Sopenharmony_ci    } else {
166e1051a39Sopenharmony_ci	&mov(	$tmp2,		$tmp4);		# B
167e1051a39Sopenharmony_ci	&movb(	&LB($tmp1),	&HB($tmp4));	# A	# BAD BAD BAD
168e1051a39Sopenharmony_ci
169e1051a39Sopenharmony_ci	&shr(	$tmp4,		16); 		#
170e1051a39Sopenharmony_ci	&and(	$tmp2,		0xff);
171e1051a39Sopenharmony_ci    }
172e1051a39Sopenharmony_ci
173e1051a39Sopenharmony_ci    &movb(	&LB($tmp3),	&HB($tmp4));	# C	# BAD BAD BAD
174e1051a39Sopenharmony_ci    &and(	$tmp4,		0xff);		# D
175e1051a39Sopenharmony_ci
176e1051a39Sopenharmony_ci    &mov(	$tmp1,		&DWP($S1,"",$tmp1,4));
177e1051a39Sopenharmony_ci    &mov(	$tmp2,		&DWP($S2,"",$tmp2,4));
178e1051a39Sopenharmony_ci
179e1051a39Sopenharmony_ci    &$OP2(	$tmp1,		$tmp2);
180e1051a39Sopenharmony_ci    &mov(	$tmp2,		&DWP($S3,"",$tmp3,4));
181e1051a39Sopenharmony_ci
182e1051a39Sopenharmony_ci    &$OP3(	$tmp1,		$tmp2);
183e1051a39Sopenharmony_ci    &mov(	$tmp2,		&DWP($S4,"",$tmp4,4));
184e1051a39Sopenharmony_ci
185e1051a39Sopenharmony_ci    &$OP1(	$tmp1,		$tmp2);
186e1051a39Sopenharmony_ci    # XXX
187e1051a39Sopenharmony_ci
188e1051a39Sopenharmony_ci    &xor(	$L,		$tmp1);
189e1051a39Sopenharmony_ci    # XXX
190e1051a39Sopenharmony_ci}
191e1051a39Sopenharmony_ci
192