1e1051a39Sopenharmony_ci#! /usr/bin/env perl
2e1051a39Sopenharmony_ci# Copyright 2007-2020 The OpenSSL Project Authors. All Rights Reserved.
3e1051a39Sopenharmony_ci#
4e1051a39Sopenharmony_ci# Licensed under the Apache License 2.0 (the "License").  You may not use
5e1051a39Sopenharmony_ci# this file except in compliance with the License.  You can obtain a copy
6e1051a39Sopenharmony_ci# in the file LICENSE in the source distribution or at
7e1051a39Sopenharmony_ci# https://www.openssl.org/source/license.html
8e1051a39Sopenharmony_ci
9e1051a39Sopenharmony_ci
10e1051a39Sopenharmony_ci# ====================================================================
11e1051a39Sopenharmony_ci# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12e1051a39Sopenharmony_ci# project. The module is, however, dual licensed under OpenSSL and
13e1051a39Sopenharmony_ci# CRYPTOGAMS licenses depending on where you obtain it. For further
14e1051a39Sopenharmony_ci# details see http://www.openssl.org/~appro/cryptogams/.
15e1051a39Sopenharmony_ci# ====================================================================
16e1051a39Sopenharmony_ci
17e1051a39Sopenharmony_ci# AES for s390x.
18e1051a39Sopenharmony_ci
19e1051a39Sopenharmony_ci# April 2007.
20e1051a39Sopenharmony_ci#
21e1051a39Sopenharmony_ci# Software performance improvement over gcc-generated code is ~70% and
22e1051a39Sopenharmony_ci# in absolute terms is ~73 cycles per byte processed with 128-bit key.
23e1051a39Sopenharmony_ci# You're likely to exclaim "why so slow?" Keep in mind that z-CPUs are
24e1051a39Sopenharmony_ci# *strictly* in-order execution and issued instruction [in this case
25e1051a39Sopenharmony_ci# load value from memory is critical] has to complete before execution
26e1051a39Sopenharmony_ci# flow proceeds. S-boxes are compressed to 2KB[+256B].
27e1051a39Sopenharmony_ci#
28e1051a39Sopenharmony_ci# As for hardware acceleration support. It's basically a "teaser," as
29e1051a39Sopenharmony_ci# it can and should be improved in several ways. Most notably support
30e1051a39Sopenharmony_ci# for CBC is not utilized, nor multiple blocks are ever processed.
31e1051a39Sopenharmony_ci# Then software key schedule can be postponed till hardware support
32e1051a39Sopenharmony_ci# detection... Performance improvement over assembler is reportedly
33e1051a39Sopenharmony_ci# ~2.5x, but can reach >8x [naturally on larger chunks] if proper
34e1051a39Sopenharmony_ci# support is implemented.
35e1051a39Sopenharmony_ci
36e1051a39Sopenharmony_ci# May 2007.
37e1051a39Sopenharmony_ci#
38e1051a39Sopenharmony_ci# Implement AES_set_[en|de]crypt_key. Key schedule setup is avoided
39e1051a39Sopenharmony_ci# for 128-bit keys, if hardware support is detected.
40e1051a39Sopenharmony_ci
41e1051a39Sopenharmony_ci# January 2009.
42e1051a39Sopenharmony_ci#
43e1051a39Sopenharmony_ci# Add support for hardware AES192/256 and reschedule instructions to
44e1051a39Sopenharmony_ci# minimize/avoid Address Generation Interlock hazard and to favour
45e1051a39Sopenharmony_ci# dual-issue z10 pipeline. This gave ~25% improvement on z10 and
46e1051a39Sopenharmony_ci# almost 50% on z9. The gain is smaller on z10, because being dual-
47e1051a39Sopenharmony_ci# issue z10 makes it impossible to eliminate the interlock condition:
48e1051a39Sopenharmony_ci# critical path is not long enough. Yet it spends ~24 cycles per byte
49e1051a39Sopenharmony_ci# processed with 128-bit key.
50e1051a39Sopenharmony_ci#
51e1051a39Sopenharmony_ci# Unlike previous version hardware support detection takes place only
52e1051a39Sopenharmony_ci# at the moment of key schedule setup, which is denoted in key->rounds.
53e1051a39Sopenharmony_ci# This is done, because deferred key setup can't be made MT-safe, not
54e1051a39Sopenharmony_ci# for keys longer than 128 bits.
55e1051a39Sopenharmony_ci#
56e1051a39Sopenharmony_ci# Add AES_cbc_encrypt, which gives incredible performance improvement,
57e1051a39Sopenharmony_ci# it was measured to be ~6.6x. It's less than previously mentioned 8x,
58e1051a39Sopenharmony_ci# because software implementation was optimized.
59e1051a39Sopenharmony_ci
60e1051a39Sopenharmony_ci# May 2010.
61e1051a39Sopenharmony_ci#
62e1051a39Sopenharmony_ci# Add AES_ctr32_encrypt. If hardware-assisted, it provides up to 4.3x
63e1051a39Sopenharmony_ci# performance improvement over "generic" counter mode routine relying
64e1051a39Sopenharmony_ci# on single-block, also hardware-assisted, AES_encrypt. "Up to" refers
65e1051a39Sopenharmony_ci# to the fact that exact throughput value depends on current stack
66e1051a39Sopenharmony_ci# frame alignment within 4KB page. In worst case you get ~75% of the
67e1051a39Sopenharmony_ci# maximum, but *on average* it would be as much as ~98%. Meaning that
68e1051a39Sopenharmony_ci# worst case is unlike, it's like hitting ravine on plateau.
69e1051a39Sopenharmony_ci
70e1051a39Sopenharmony_ci# November 2010.
71e1051a39Sopenharmony_ci#
72e1051a39Sopenharmony_ci# Adapt for -m31 build. If kernel supports what's called "highgprs"
73e1051a39Sopenharmony_ci# feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit
74e1051a39Sopenharmony_ci# instructions and achieve "64-bit" performance even in 31-bit legacy
75e1051a39Sopenharmony_ci# application context. The feature is not specific to any particular
76e1051a39Sopenharmony_ci# processor, as long as it's "z-CPU". Latter implies that the code
77e1051a39Sopenharmony_ci# remains z/Architecture specific. On z990 it was measured to perform
78e1051a39Sopenharmony_ci# 2x better than code generated by gcc 4.3.
79e1051a39Sopenharmony_ci
80e1051a39Sopenharmony_ci# December 2010.
81e1051a39Sopenharmony_ci#
82e1051a39Sopenharmony_ci# Add support for z196 "cipher message with counter" instruction.
83e1051a39Sopenharmony_ci# Note however that it's disengaged, because it was measured to
84e1051a39Sopenharmony_ci# perform ~12% worse than vanilla km-based code...
85e1051a39Sopenharmony_ci
86e1051a39Sopenharmony_ci# February 2011.
87e1051a39Sopenharmony_ci#
88e1051a39Sopenharmony_ci# Add AES_xts_[en|de]crypt. This includes support for z196 km-xts-aes
89e1051a39Sopenharmony_ci# instructions, which deliver ~70% improvement at 8KB block size over
90e1051a39Sopenharmony_ci# vanilla km-based code, 37% - at most like 512-bytes block size.
91e1051a39Sopenharmony_ci
92e1051a39Sopenharmony_ci# $output is the last argument if it looks like a file (it has an extension)
93e1051a39Sopenharmony_ci# $flavour is the first argument if it doesn't look like a file
94e1051a39Sopenharmony_ci$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
95e1051a39Sopenharmony_ci$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
96e1051a39Sopenharmony_ci
97e1051a39Sopenharmony_ciif ($flavour =~ /3[12]/) {
98e1051a39Sopenharmony_ci	$SIZE_T=4;
99e1051a39Sopenharmony_ci	$g="";
100e1051a39Sopenharmony_ci} else {
101e1051a39Sopenharmony_ci	$SIZE_T=8;
102e1051a39Sopenharmony_ci	$g="g";
103e1051a39Sopenharmony_ci}
104e1051a39Sopenharmony_ci
105e1051a39Sopenharmony_ci$output and open STDOUT,">$output";
106e1051a39Sopenharmony_ci
107e1051a39Sopenharmony_ci$softonly=0;	# allow hardware support
108e1051a39Sopenharmony_ci
109e1051a39Sopenharmony_ci$t0="%r0";	$mask="%r0";
110e1051a39Sopenharmony_ci$t1="%r1";
111e1051a39Sopenharmony_ci$t2="%r2";	$inp="%r2";
112e1051a39Sopenharmony_ci$t3="%r3";	$out="%r3";	$bits="%r3";
113e1051a39Sopenharmony_ci$key="%r4";
114e1051a39Sopenharmony_ci$i1="%r5";
115e1051a39Sopenharmony_ci$i2="%r6";
116e1051a39Sopenharmony_ci$i3="%r7";
117e1051a39Sopenharmony_ci$s0="%r8";
118e1051a39Sopenharmony_ci$s1="%r9";
119e1051a39Sopenharmony_ci$s2="%r10";
120e1051a39Sopenharmony_ci$s3="%r11";
121e1051a39Sopenharmony_ci$tbl="%r12";
122e1051a39Sopenharmony_ci$rounds="%r13";
123e1051a39Sopenharmony_ci$ra="%r14";
124e1051a39Sopenharmony_ci$sp="%r15";
125e1051a39Sopenharmony_ci
126e1051a39Sopenharmony_ci$stdframe=16*$SIZE_T+4*8;
127e1051a39Sopenharmony_ci
128e1051a39Sopenharmony_cisub _data_word()
129e1051a39Sopenharmony_ci{ my $i;
130e1051a39Sopenharmony_ci    while(defined($i=shift)) { $code.=sprintf".long\t0x%08x,0x%08x\n",$i,$i; }
131e1051a39Sopenharmony_ci}
132e1051a39Sopenharmony_ci
133e1051a39Sopenharmony_ci$code=<<___;
134e1051a39Sopenharmony_ci#include "s390x_arch.h"
135e1051a39Sopenharmony_ci
136e1051a39Sopenharmony_ci.text
137e1051a39Sopenharmony_ci
138e1051a39Sopenharmony_ci.type	AES_Te,\@object
139e1051a39Sopenharmony_ci.align	256
140e1051a39Sopenharmony_ciAES_Te:
141e1051a39Sopenharmony_ci___
142e1051a39Sopenharmony_ci&_data_word(
143e1051a39Sopenharmony_ci	0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
144e1051a39Sopenharmony_ci	0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
145e1051a39Sopenharmony_ci	0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
146e1051a39Sopenharmony_ci	0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
147e1051a39Sopenharmony_ci	0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
148e1051a39Sopenharmony_ci	0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
149e1051a39Sopenharmony_ci	0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
150e1051a39Sopenharmony_ci	0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
151e1051a39Sopenharmony_ci	0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
152e1051a39Sopenharmony_ci	0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
153e1051a39Sopenharmony_ci	0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
154e1051a39Sopenharmony_ci	0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
155e1051a39Sopenharmony_ci	0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
156e1051a39Sopenharmony_ci	0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
157e1051a39Sopenharmony_ci	0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
158e1051a39Sopenharmony_ci	0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
159e1051a39Sopenharmony_ci	0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
160e1051a39Sopenharmony_ci	0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
161e1051a39Sopenharmony_ci	0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
162e1051a39Sopenharmony_ci	0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
163e1051a39Sopenharmony_ci	0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
164e1051a39Sopenharmony_ci	0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
165e1051a39Sopenharmony_ci	0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
166e1051a39Sopenharmony_ci	0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
167e1051a39Sopenharmony_ci	0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
168e1051a39Sopenharmony_ci	0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
169e1051a39Sopenharmony_ci	0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
170e1051a39Sopenharmony_ci	0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
171e1051a39Sopenharmony_ci	0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
172e1051a39Sopenharmony_ci	0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
173e1051a39Sopenharmony_ci	0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
174e1051a39Sopenharmony_ci	0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
175e1051a39Sopenharmony_ci	0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
176e1051a39Sopenharmony_ci	0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
177e1051a39Sopenharmony_ci	0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
178e1051a39Sopenharmony_ci	0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
179e1051a39Sopenharmony_ci	0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
180e1051a39Sopenharmony_ci	0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
181e1051a39Sopenharmony_ci	0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
182e1051a39Sopenharmony_ci	0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
183e1051a39Sopenharmony_ci	0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
184e1051a39Sopenharmony_ci	0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
185e1051a39Sopenharmony_ci	0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
186e1051a39Sopenharmony_ci	0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
187e1051a39Sopenharmony_ci	0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
188e1051a39Sopenharmony_ci	0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
189e1051a39Sopenharmony_ci	0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
190e1051a39Sopenharmony_ci	0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
191e1051a39Sopenharmony_ci	0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
192e1051a39Sopenharmony_ci	0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
193e1051a39Sopenharmony_ci	0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
194e1051a39Sopenharmony_ci	0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
195e1051a39Sopenharmony_ci	0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
196e1051a39Sopenharmony_ci	0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
197e1051a39Sopenharmony_ci	0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
198e1051a39Sopenharmony_ci	0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
199e1051a39Sopenharmony_ci	0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
200e1051a39Sopenharmony_ci	0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
201e1051a39Sopenharmony_ci	0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
202e1051a39Sopenharmony_ci	0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
203e1051a39Sopenharmony_ci	0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
204e1051a39Sopenharmony_ci	0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
205e1051a39Sopenharmony_ci	0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
206e1051a39Sopenharmony_ci	0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
207e1051a39Sopenharmony_ci$code.=<<___;
208e1051a39Sopenharmony_ci# Te4[256]
209e1051a39Sopenharmony_ci.byte	0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
210e1051a39Sopenharmony_ci.byte	0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
211e1051a39Sopenharmony_ci.byte	0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
212e1051a39Sopenharmony_ci.byte	0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
213e1051a39Sopenharmony_ci.byte	0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
214e1051a39Sopenharmony_ci.byte	0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
215e1051a39Sopenharmony_ci.byte	0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
216e1051a39Sopenharmony_ci.byte	0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
217e1051a39Sopenharmony_ci.byte	0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
218e1051a39Sopenharmony_ci.byte	0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
219e1051a39Sopenharmony_ci.byte	0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
220e1051a39Sopenharmony_ci.byte	0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
221e1051a39Sopenharmony_ci.byte	0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
222e1051a39Sopenharmony_ci.byte	0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
223e1051a39Sopenharmony_ci.byte	0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
224e1051a39Sopenharmony_ci.byte	0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
225e1051a39Sopenharmony_ci.byte	0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
226e1051a39Sopenharmony_ci.byte	0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
227e1051a39Sopenharmony_ci.byte	0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
228e1051a39Sopenharmony_ci.byte	0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
229e1051a39Sopenharmony_ci.byte	0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
230e1051a39Sopenharmony_ci.byte	0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
231e1051a39Sopenharmony_ci.byte	0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
232e1051a39Sopenharmony_ci.byte	0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
233e1051a39Sopenharmony_ci.byte	0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
234e1051a39Sopenharmony_ci.byte	0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
235e1051a39Sopenharmony_ci.byte	0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
236e1051a39Sopenharmony_ci.byte	0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
237e1051a39Sopenharmony_ci.byte	0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
238e1051a39Sopenharmony_ci.byte	0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
239e1051a39Sopenharmony_ci.byte	0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
240e1051a39Sopenharmony_ci.byte	0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
241e1051a39Sopenharmony_ci# rcon[]
242e1051a39Sopenharmony_ci.long	0x01000000, 0x02000000, 0x04000000, 0x08000000
243e1051a39Sopenharmony_ci.long	0x10000000, 0x20000000, 0x40000000, 0x80000000
244e1051a39Sopenharmony_ci.long	0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
245e1051a39Sopenharmony_ci.align	256
246e1051a39Sopenharmony_ci.size	AES_Te,.-AES_Te
247e1051a39Sopenharmony_ci
248e1051a39Sopenharmony_ci# void AES_encrypt(const unsigned char *inp, unsigned char *out,
249e1051a39Sopenharmony_ci# 		 const AES_KEY *key) {
250e1051a39Sopenharmony_ci.globl	AES_encrypt
251e1051a39Sopenharmony_ci.type	AES_encrypt,\@function
252e1051a39Sopenharmony_ciAES_encrypt:
253e1051a39Sopenharmony_ci___
254e1051a39Sopenharmony_ci$code.=<<___ if (!$softonly);
255e1051a39Sopenharmony_ci	l	%r0,240($key)
256e1051a39Sopenharmony_ci	lhi	%r1,16
257e1051a39Sopenharmony_ci	clr	%r0,%r1
258e1051a39Sopenharmony_ci	jl	.Lesoft
259e1051a39Sopenharmony_ci
260e1051a39Sopenharmony_ci	la	%r1,0($key)
261e1051a39Sopenharmony_ci	#la	%r2,0($inp)
262e1051a39Sopenharmony_ci	la	%r4,0($out)
263e1051a39Sopenharmony_ci	lghi	%r3,16		# single block length
264e1051a39Sopenharmony_ci	.long	0xb92e0042	# km %r4,%r2
265e1051a39Sopenharmony_ci	brc	1,.-4		# can this happen?
266e1051a39Sopenharmony_ci	br	%r14
267e1051a39Sopenharmony_ci.align	64
268e1051a39Sopenharmony_ci.Lesoft:
269e1051a39Sopenharmony_ci___
270e1051a39Sopenharmony_ci$code.=<<___;
271e1051a39Sopenharmony_ci	stm${g}	%r3,$ra,3*$SIZE_T($sp)
272e1051a39Sopenharmony_ci
273e1051a39Sopenharmony_ci	llgf	$s0,0($inp)
274e1051a39Sopenharmony_ci	llgf	$s1,4($inp)
275e1051a39Sopenharmony_ci	llgf	$s2,8($inp)
276e1051a39Sopenharmony_ci	llgf	$s3,12($inp)
277e1051a39Sopenharmony_ci
278e1051a39Sopenharmony_ci	larl	$tbl,AES_Te
279e1051a39Sopenharmony_ci	bras	$ra,_s390x_AES_encrypt
280e1051a39Sopenharmony_ci
281e1051a39Sopenharmony_ci	l${g}	$out,3*$SIZE_T($sp)
282e1051a39Sopenharmony_ci	st	$s0,0($out)
283e1051a39Sopenharmony_ci	st	$s1,4($out)
284e1051a39Sopenharmony_ci	st	$s2,8($out)
285e1051a39Sopenharmony_ci	st	$s3,12($out)
286e1051a39Sopenharmony_ci
287e1051a39Sopenharmony_ci	lm${g}	%r6,$ra,6*$SIZE_T($sp)
288e1051a39Sopenharmony_ci	br	$ra
289e1051a39Sopenharmony_ci.size	AES_encrypt,.-AES_encrypt
290e1051a39Sopenharmony_ci
291e1051a39Sopenharmony_ci.type   _s390x_AES_encrypt,\@function
292e1051a39Sopenharmony_ci.align	16
293e1051a39Sopenharmony_ci_s390x_AES_encrypt:
294e1051a39Sopenharmony_ci	st${g}	$ra,15*$SIZE_T($sp)
295e1051a39Sopenharmony_ci	x	$s0,0($key)
296e1051a39Sopenharmony_ci	x	$s1,4($key)
297e1051a39Sopenharmony_ci	x	$s2,8($key)
298e1051a39Sopenharmony_ci	x	$s3,12($key)
299e1051a39Sopenharmony_ci	l	$rounds,240($key)
300e1051a39Sopenharmony_ci	llill	$mask,`0xff<<3`
301e1051a39Sopenharmony_ci	aghi	$rounds,-1
302e1051a39Sopenharmony_ci	j	.Lenc_loop
303e1051a39Sopenharmony_ci.align	16
304e1051a39Sopenharmony_ci.Lenc_loop:
305e1051a39Sopenharmony_ci	sllg	$t1,$s0,`0+3`
306e1051a39Sopenharmony_ci	srlg	$t2,$s0,`8-3`
307e1051a39Sopenharmony_ci	srlg	$t3,$s0,`16-3`
308e1051a39Sopenharmony_ci	srl	$s0,`24-3`
309e1051a39Sopenharmony_ci	nr	$s0,$mask
310e1051a39Sopenharmony_ci	ngr	$t1,$mask
311e1051a39Sopenharmony_ci	nr	$t2,$mask
312e1051a39Sopenharmony_ci	nr	$t3,$mask
313e1051a39Sopenharmony_ci
314e1051a39Sopenharmony_ci	srlg	$i1,$s1,`16-3`	# i0
315e1051a39Sopenharmony_ci	sllg	$i2,$s1,`0+3`
316e1051a39Sopenharmony_ci	srlg	$i3,$s1,`8-3`
317e1051a39Sopenharmony_ci	srl	$s1,`24-3`
318e1051a39Sopenharmony_ci	nr	$i1,$mask
319e1051a39Sopenharmony_ci	nr	$s1,$mask
320e1051a39Sopenharmony_ci	ngr	$i2,$mask
321e1051a39Sopenharmony_ci	nr	$i3,$mask
322e1051a39Sopenharmony_ci
323e1051a39Sopenharmony_ci	l	$s0,0($s0,$tbl)	# Te0[s0>>24]
324e1051a39Sopenharmony_ci	l	$t1,1($t1,$tbl)	# Te3[s0>>0]
325e1051a39Sopenharmony_ci	l	$t2,2($t2,$tbl) # Te2[s0>>8]
326e1051a39Sopenharmony_ci	l	$t3,3($t3,$tbl)	# Te1[s0>>16]
327e1051a39Sopenharmony_ci
328e1051a39Sopenharmony_ci	x	$s0,3($i1,$tbl)	# Te1[s1>>16]
329e1051a39Sopenharmony_ci	l	$s1,0($s1,$tbl)	# Te0[s1>>24]
330e1051a39Sopenharmony_ci	x	$t2,1($i2,$tbl)	# Te3[s1>>0]
331e1051a39Sopenharmony_ci	x	$t3,2($i3,$tbl)	# Te2[s1>>8]
332e1051a39Sopenharmony_ci
333e1051a39Sopenharmony_ci	srlg	$i1,$s2,`8-3`	# i0
334e1051a39Sopenharmony_ci	srlg	$i2,$s2,`16-3`	# i1
335e1051a39Sopenharmony_ci	nr	$i1,$mask
336e1051a39Sopenharmony_ci	nr	$i2,$mask
337e1051a39Sopenharmony_ci	sllg	$i3,$s2,`0+3`
338e1051a39Sopenharmony_ci	srl	$s2,`24-3`
339e1051a39Sopenharmony_ci	nr	$s2,$mask
340e1051a39Sopenharmony_ci	ngr	$i3,$mask
341e1051a39Sopenharmony_ci
342e1051a39Sopenharmony_ci	xr	$s1,$t1
343e1051a39Sopenharmony_ci	srlg	$ra,$s3,`8-3`	# i1
344e1051a39Sopenharmony_ci	sllg	$t1,$s3,`0+3`	# i0
345e1051a39Sopenharmony_ci	nr	$ra,$mask
346e1051a39Sopenharmony_ci	la	$key,16($key)
347e1051a39Sopenharmony_ci	ngr	$t1,$mask
348e1051a39Sopenharmony_ci
349e1051a39Sopenharmony_ci	x	$s0,2($i1,$tbl)	# Te2[s2>>8]
350e1051a39Sopenharmony_ci	x	$s1,3($i2,$tbl)	# Te1[s2>>16]
351e1051a39Sopenharmony_ci	l	$s2,0($s2,$tbl)	# Te0[s2>>24]
352e1051a39Sopenharmony_ci	x	$t3,1($i3,$tbl)	# Te3[s2>>0]
353e1051a39Sopenharmony_ci
354e1051a39Sopenharmony_ci	srlg	$i3,$s3,`16-3`	# i2
355e1051a39Sopenharmony_ci	xr	$s2,$t2
356e1051a39Sopenharmony_ci	srl	$s3,`24-3`
357e1051a39Sopenharmony_ci	nr	$i3,$mask
358e1051a39Sopenharmony_ci	nr	$s3,$mask
359e1051a39Sopenharmony_ci
360e1051a39Sopenharmony_ci	x	$s0,0($key)
361e1051a39Sopenharmony_ci	x	$s1,4($key)
362e1051a39Sopenharmony_ci	x	$s2,8($key)
363e1051a39Sopenharmony_ci	x	$t3,12($key)
364e1051a39Sopenharmony_ci
365e1051a39Sopenharmony_ci	x	$s0,1($t1,$tbl)	# Te3[s3>>0]
366e1051a39Sopenharmony_ci	x	$s1,2($ra,$tbl)	# Te2[s3>>8]
367e1051a39Sopenharmony_ci	x	$s2,3($i3,$tbl)	# Te1[s3>>16]
368e1051a39Sopenharmony_ci	l	$s3,0($s3,$tbl)	# Te0[s3>>24]
369e1051a39Sopenharmony_ci	xr	$s3,$t3
370e1051a39Sopenharmony_ci
371e1051a39Sopenharmony_ci	brct	$rounds,.Lenc_loop
372e1051a39Sopenharmony_ci	.align	16
373e1051a39Sopenharmony_ci
374e1051a39Sopenharmony_ci	sllg	$t1,$s0,`0+3`
375e1051a39Sopenharmony_ci	srlg	$t2,$s0,`8-3`
376e1051a39Sopenharmony_ci	ngr	$t1,$mask
377e1051a39Sopenharmony_ci	srlg	$t3,$s0,`16-3`
378e1051a39Sopenharmony_ci	srl	$s0,`24-3`
379e1051a39Sopenharmony_ci	nr	$s0,$mask
380e1051a39Sopenharmony_ci	nr	$t2,$mask
381e1051a39Sopenharmony_ci	nr	$t3,$mask
382e1051a39Sopenharmony_ci
383e1051a39Sopenharmony_ci	srlg	$i1,$s1,`16-3`	# i0
384e1051a39Sopenharmony_ci	sllg	$i2,$s1,`0+3`
385e1051a39Sopenharmony_ci	ngr	$i2,$mask
386e1051a39Sopenharmony_ci	srlg	$i3,$s1,`8-3`
387e1051a39Sopenharmony_ci	srl	$s1,`24-3`
388e1051a39Sopenharmony_ci	nr	$i1,$mask
389e1051a39Sopenharmony_ci	nr	$s1,$mask
390e1051a39Sopenharmony_ci	nr	$i3,$mask
391e1051a39Sopenharmony_ci
392e1051a39Sopenharmony_ci	llgc	$s0,2($s0,$tbl)	# Te4[s0>>24]
393e1051a39Sopenharmony_ci	llgc	$t1,2($t1,$tbl)	# Te4[s0>>0]
394e1051a39Sopenharmony_ci	sll	$s0,24
395e1051a39Sopenharmony_ci	llgc	$t2,2($t2,$tbl)	# Te4[s0>>8]
396e1051a39Sopenharmony_ci	llgc	$t3,2($t3,$tbl)	# Te4[s0>>16]
397e1051a39Sopenharmony_ci	sll	$t2,8
398e1051a39Sopenharmony_ci	sll	$t3,16
399e1051a39Sopenharmony_ci
400e1051a39Sopenharmony_ci	llgc	$i1,2($i1,$tbl)	# Te4[s1>>16]
401e1051a39Sopenharmony_ci	llgc	$s1,2($s1,$tbl)	# Te4[s1>>24]
402e1051a39Sopenharmony_ci	llgc	$i2,2($i2,$tbl)	# Te4[s1>>0]
403e1051a39Sopenharmony_ci	llgc	$i3,2($i3,$tbl)	# Te4[s1>>8]
404e1051a39Sopenharmony_ci	sll	$i1,16
405e1051a39Sopenharmony_ci	sll	$s1,24
406e1051a39Sopenharmony_ci	sll	$i3,8
407e1051a39Sopenharmony_ci	or	$s0,$i1
408e1051a39Sopenharmony_ci	or	$s1,$t1
409e1051a39Sopenharmony_ci	or	$t2,$i2
410e1051a39Sopenharmony_ci	or	$t3,$i3
411e1051a39Sopenharmony_ci
412e1051a39Sopenharmony_ci	srlg	$i1,$s2,`8-3`	# i0
413e1051a39Sopenharmony_ci	srlg	$i2,$s2,`16-3`	# i1
414e1051a39Sopenharmony_ci	nr	$i1,$mask
415e1051a39Sopenharmony_ci	nr	$i2,$mask
416e1051a39Sopenharmony_ci	sllg	$i3,$s2,`0+3`
417e1051a39Sopenharmony_ci	srl	$s2,`24-3`
418e1051a39Sopenharmony_ci	ngr	$i3,$mask
419e1051a39Sopenharmony_ci	nr	$s2,$mask
420e1051a39Sopenharmony_ci
421e1051a39Sopenharmony_ci	sllg	$t1,$s3,`0+3`	# i0
422e1051a39Sopenharmony_ci	srlg	$ra,$s3,`8-3`	# i1
423e1051a39Sopenharmony_ci	ngr	$t1,$mask
424e1051a39Sopenharmony_ci
425e1051a39Sopenharmony_ci	llgc	$i1,2($i1,$tbl)	# Te4[s2>>8]
426e1051a39Sopenharmony_ci	llgc	$i2,2($i2,$tbl)	# Te4[s2>>16]
427e1051a39Sopenharmony_ci	sll	$i1,8
428e1051a39Sopenharmony_ci	llgc	$s2,2($s2,$tbl)	# Te4[s2>>24]
429e1051a39Sopenharmony_ci	llgc	$i3,2($i3,$tbl)	# Te4[s2>>0]
430e1051a39Sopenharmony_ci	sll	$i2,16
431e1051a39Sopenharmony_ci	nr	$ra,$mask
432e1051a39Sopenharmony_ci	sll	$s2,24
433e1051a39Sopenharmony_ci	or	$s0,$i1
434e1051a39Sopenharmony_ci	or	$s1,$i2
435e1051a39Sopenharmony_ci	or	$s2,$t2
436e1051a39Sopenharmony_ci	or	$t3,$i3
437e1051a39Sopenharmony_ci
438e1051a39Sopenharmony_ci	srlg	$i3,$s3,`16-3`	# i2
439e1051a39Sopenharmony_ci	srl	$s3,`24-3`
440e1051a39Sopenharmony_ci	nr	$i3,$mask
441e1051a39Sopenharmony_ci	nr	$s3,$mask
442e1051a39Sopenharmony_ci
443e1051a39Sopenharmony_ci	l	$t0,16($key)
444e1051a39Sopenharmony_ci	l	$t2,20($key)
445e1051a39Sopenharmony_ci
446e1051a39Sopenharmony_ci	llgc	$i1,2($t1,$tbl)	# Te4[s3>>0]
447e1051a39Sopenharmony_ci	llgc	$i2,2($ra,$tbl)	# Te4[s3>>8]
448e1051a39Sopenharmony_ci	llgc	$i3,2($i3,$tbl)	# Te4[s3>>16]
449e1051a39Sopenharmony_ci	llgc	$s3,2($s3,$tbl)	# Te4[s3>>24]
450e1051a39Sopenharmony_ci	sll	$i2,8
451e1051a39Sopenharmony_ci	sll	$i3,16
452e1051a39Sopenharmony_ci	sll	$s3,24
453e1051a39Sopenharmony_ci	or	$s0,$i1
454e1051a39Sopenharmony_ci	or	$s1,$i2
455e1051a39Sopenharmony_ci	or	$s2,$i3
456e1051a39Sopenharmony_ci	or	$s3,$t3
457e1051a39Sopenharmony_ci
458e1051a39Sopenharmony_ci	l${g}	$ra,15*$SIZE_T($sp)
459e1051a39Sopenharmony_ci	xr	$s0,$t0
460e1051a39Sopenharmony_ci	xr	$s1,$t2
461e1051a39Sopenharmony_ci	x	$s2,24($key)
462e1051a39Sopenharmony_ci	x	$s3,28($key)
463e1051a39Sopenharmony_ci
464e1051a39Sopenharmony_ci	br	$ra
465e1051a39Sopenharmony_ci.size	_s390x_AES_encrypt,.-_s390x_AES_encrypt
466e1051a39Sopenharmony_ci___
467e1051a39Sopenharmony_ci
468e1051a39Sopenharmony_ci$code.=<<___;
469e1051a39Sopenharmony_ci.type	AES_Td,\@object
470e1051a39Sopenharmony_ci.align	256
471e1051a39Sopenharmony_ciAES_Td:
472e1051a39Sopenharmony_ci___
473e1051a39Sopenharmony_ci&_data_word(
474e1051a39Sopenharmony_ci	0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
475e1051a39Sopenharmony_ci	0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
476e1051a39Sopenharmony_ci	0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
477e1051a39Sopenharmony_ci	0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
478e1051a39Sopenharmony_ci	0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
479e1051a39Sopenharmony_ci	0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
480e1051a39Sopenharmony_ci	0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
481e1051a39Sopenharmony_ci	0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
482e1051a39Sopenharmony_ci	0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
483e1051a39Sopenharmony_ci	0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
484e1051a39Sopenharmony_ci	0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
485e1051a39Sopenharmony_ci	0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
486e1051a39Sopenharmony_ci	0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
487e1051a39Sopenharmony_ci	0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
488e1051a39Sopenharmony_ci	0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
489e1051a39Sopenharmony_ci	0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
490e1051a39Sopenharmony_ci	0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
491e1051a39Sopenharmony_ci	0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
492e1051a39Sopenharmony_ci	0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
493e1051a39Sopenharmony_ci	0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
494e1051a39Sopenharmony_ci	0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
495e1051a39Sopenharmony_ci	0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
496e1051a39Sopenharmony_ci	0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
497e1051a39Sopenharmony_ci	0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
498e1051a39Sopenharmony_ci	0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
499e1051a39Sopenharmony_ci	0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
500e1051a39Sopenharmony_ci	0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
501e1051a39Sopenharmony_ci	0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
502e1051a39Sopenharmony_ci	0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
503e1051a39Sopenharmony_ci	0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
504e1051a39Sopenharmony_ci	0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
505e1051a39Sopenharmony_ci	0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
506e1051a39Sopenharmony_ci	0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
507e1051a39Sopenharmony_ci	0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
508e1051a39Sopenharmony_ci	0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
509e1051a39Sopenharmony_ci	0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
510e1051a39Sopenharmony_ci	0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
511e1051a39Sopenharmony_ci	0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
512e1051a39Sopenharmony_ci	0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
513e1051a39Sopenharmony_ci	0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
514e1051a39Sopenharmony_ci	0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
515e1051a39Sopenharmony_ci	0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
516e1051a39Sopenharmony_ci	0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
517e1051a39Sopenharmony_ci	0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
518e1051a39Sopenharmony_ci	0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
519e1051a39Sopenharmony_ci	0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
520e1051a39Sopenharmony_ci	0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
521e1051a39Sopenharmony_ci	0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
522e1051a39Sopenharmony_ci	0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
523e1051a39Sopenharmony_ci	0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
524e1051a39Sopenharmony_ci	0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
525e1051a39Sopenharmony_ci	0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
526e1051a39Sopenharmony_ci	0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
527e1051a39Sopenharmony_ci	0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
528e1051a39Sopenharmony_ci	0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
529e1051a39Sopenharmony_ci	0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
530e1051a39Sopenharmony_ci	0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
531e1051a39Sopenharmony_ci	0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
532e1051a39Sopenharmony_ci	0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
533e1051a39Sopenharmony_ci	0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
534e1051a39Sopenharmony_ci	0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
535e1051a39Sopenharmony_ci	0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
536e1051a39Sopenharmony_ci	0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
537e1051a39Sopenharmony_ci	0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
538e1051a39Sopenharmony_ci$code.=<<___;
539e1051a39Sopenharmony_ci# Td4[256]
540e1051a39Sopenharmony_ci.byte	0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
541e1051a39Sopenharmony_ci.byte	0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
542e1051a39Sopenharmony_ci.byte	0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
543e1051a39Sopenharmony_ci.byte	0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
544e1051a39Sopenharmony_ci.byte	0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
545e1051a39Sopenharmony_ci.byte	0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
546e1051a39Sopenharmony_ci.byte	0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
547e1051a39Sopenharmony_ci.byte	0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
548e1051a39Sopenharmony_ci.byte	0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
549e1051a39Sopenharmony_ci.byte	0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
550e1051a39Sopenharmony_ci.byte	0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
551e1051a39Sopenharmony_ci.byte	0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
552e1051a39Sopenharmony_ci.byte	0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
553e1051a39Sopenharmony_ci.byte	0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
554e1051a39Sopenharmony_ci.byte	0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
555e1051a39Sopenharmony_ci.byte	0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
556e1051a39Sopenharmony_ci.byte	0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
557e1051a39Sopenharmony_ci.byte	0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
558e1051a39Sopenharmony_ci.byte	0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
559e1051a39Sopenharmony_ci.byte	0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
560e1051a39Sopenharmony_ci.byte	0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
561e1051a39Sopenharmony_ci.byte	0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
562e1051a39Sopenharmony_ci.byte	0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
563e1051a39Sopenharmony_ci.byte	0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
564e1051a39Sopenharmony_ci.byte	0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
565e1051a39Sopenharmony_ci.byte	0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
566e1051a39Sopenharmony_ci.byte	0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
567e1051a39Sopenharmony_ci.byte	0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
568e1051a39Sopenharmony_ci.byte	0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
569e1051a39Sopenharmony_ci.byte	0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
570e1051a39Sopenharmony_ci.byte	0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
571e1051a39Sopenharmony_ci.byte	0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
572e1051a39Sopenharmony_ci.size	AES_Td,.-AES_Td
573e1051a39Sopenharmony_ci
574e1051a39Sopenharmony_ci# void AES_decrypt(const unsigned char *inp, unsigned char *out,
575e1051a39Sopenharmony_ci# 		 const AES_KEY *key) {
576e1051a39Sopenharmony_ci.globl	AES_decrypt
577e1051a39Sopenharmony_ci.type	AES_decrypt,\@function
578e1051a39Sopenharmony_ciAES_decrypt:
579e1051a39Sopenharmony_ci___
580e1051a39Sopenharmony_ci$code.=<<___ if (!$softonly);
581e1051a39Sopenharmony_ci	l	%r0,240($key)
582e1051a39Sopenharmony_ci	lhi	%r1,16
583e1051a39Sopenharmony_ci	clr	%r0,%r1
584e1051a39Sopenharmony_ci	jl	.Ldsoft
585e1051a39Sopenharmony_ci
586e1051a39Sopenharmony_ci	la	%r1,0($key)
587e1051a39Sopenharmony_ci	#la	%r2,0($inp)
588e1051a39Sopenharmony_ci	la	%r4,0($out)
589e1051a39Sopenharmony_ci	lghi	%r3,16		# single block length
590e1051a39Sopenharmony_ci	.long	0xb92e0042	# km %r4,%r2
591e1051a39Sopenharmony_ci	brc	1,.-4		# can this happen?
592e1051a39Sopenharmony_ci	br	%r14
593e1051a39Sopenharmony_ci.align	64
594e1051a39Sopenharmony_ci.Ldsoft:
595e1051a39Sopenharmony_ci___
596e1051a39Sopenharmony_ci$code.=<<___;
597e1051a39Sopenharmony_ci	stm${g}	%r3,$ra,3*$SIZE_T($sp)
598e1051a39Sopenharmony_ci
599e1051a39Sopenharmony_ci	llgf	$s0,0($inp)
600e1051a39Sopenharmony_ci	llgf	$s1,4($inp)
601e1051a39Sopenharmony_ci	llgf	$s2,8($inp)
602e1051a39Sopenharmony_ci	llgf	$s3,12($inp)
603e1051a39Sopenharmony_ci
604e1051a39Sopenharmony_ci	larl	$tbl,AES_Td
605e1051a39Sopenharmony_ci	bras	$ra,_s390x_AES_decrypt
606e1051a39Sopenharmony_ci
607e1051a39Sopenharmony_ci	l${g}	$out,3*$SIZE_T($sp)
608e1051a39Sopenharmony_ci	st	$s0,0($out)
609e1051a39Sopenharmony_ci	st	$s1,4($out)
610e1051a39Sopenharmony_ci	st	$s2,8($out)
611e1051a39Sopenharmony_ci	st	$s3,12($out)
612e1051a39Sopenharmony_ci
613e1051a39Sopenharmony_ci	lm${g}	%r6,$ra,6*$SIZE_T($sp)
614e1051a39Sopenharmony_ci	br	$ra
615e1051a39Sopenharmony_ci.size	AES_decrypt,.-AES_decrypt
616e1051a39Sopenharmony_ci
617e1051a39Sopenharmony_ci.type   _s390x_AES_decrypt,\@function
618e1051a39Sopenharmony_ci.align	16
619e1051a39Sopenharmony_ci_s390x_AES_decrypt:
620e1051a39Sopenharmony_ci	st${g}	$ra,15*$SIZE_T($sp)
621e1051a39Sopenharmony_ci	x	$s0,0($key)
622e1051a39Sopenharmony_ci	x	$s1,4($key)
623e1051a39Sopenharmony_ci	x	$s2,8($key)
624e1051a39Sopenharmony_ci	x	$s3,12($key)
625e1051a39Sopenharmony_ci	l	$rounds,240($key)
626e1051a39Sopenharmony_ci	llill	$mask,`0xff<<3`
627e1051a39Sopenharmony_ci	aghi	$rounds,-1
628e1051a39Sopenharmony_ci	j	.Ldec_loop
629e1051a39Sopenharmony_ci.align	16
630e1051a39Sopenharmony_ci.Ldec_loop:
631e1051a39Sopenharmony_ci	srlg	$t1,$s0,`16-3`
632e1051a39Sopenharmony_ci	srlg	$t2,$s0,`8-3`
633e1051a39Sopenharmony_ci	sllg	$t3,$s0,`0+3`
634e1051a39Sopenharmony_ci	srl	$s0,`24-3`
635e1051a39Sopenharmony_ci	nr	$s0,$mask
636e1051a39Sopenharmony_ci	nr	$t1,$mask
637e1051a39Sopenharmony_ci	nr	$t2,$mask
638e1051a39Sopenharmony_ci	ngr	$t3,$mask
639e1051a39Sopenharmony_ci
640e1051a39Sopenharmony_ci	sllg	$i1,$s1,`0+3`	# i0
641e1051a39Sopenharmony_ci	srlg	$i2,$s1,`16-3`
642e1051a39Sopenharmony_ci	srlg	$i3,$s1,`8-3`
643e1051a39Sopenharmony_ci	srl	$s1,`24-3`
644e1051a39Sopenharmony_ci	ngr	$i1,$mask
645e1051a39Sopenharmony_ci	nr	$s1,$mask
646e1051a39Sopenharmony_ci	nr	$i2,$mask
647e1051a39Sopenharmony_ci	nr	$i3,$mask
648e1051a39Sopenharmony_ci
649e1051a39Sopenharmony_ci	l	$s0,0($s0,$tbl)	# Td0[s0>>24]
650e1051a39Sopenharmony_ci	l	$t1,3($t1,$tbl)	# Td1[s0>>16]
651e1051a39Sopenharmony_ci	l	$t2,2($t2,$tbl)	# Td2[s0>>8]
652e1051a39Sopenharmony_ci	l	$t3,1($t3,$tbl)	# Td3[s0>>0]
653e1051a39Sopenharmony_ci
654e1051a39Sopenharmony_ci	x	$s0,1($i1,$tbl)	# Td3[s1>>0]
655e1051a39Sopenharmony_ci	l	$s1,0($s1,$tbl)	# Td0[s1>>24]
656e1051a39Sopenharmony_ci	x	$t2,3($i2,$tbl)	# Td1[s1>>16]
657e1051a39Sopenharmony_ci	x	$t3,2($i3,$tbl)	# Td2[s1>>8]
658e1051a39Sopenharmony_ci
659e1051a39Sopenharmony_ci	srlg	$i1,$s2,`8-3`	# i0
660e1051a39Sopenharmony_ci	sllg	$i2,$s2,`0+3`	# i1
661e1051a39Sopenharmony_ci	srlg	$i3,$s2,`16-3`
662e1051a39Sopenharmony_ci	srl	$s2,`24-3`
663e1051a39Sopenharmony_ci	nr	$i1,$mask
664e1051a39Sopenharmony_ci	ngr	$i2,$mask
665e1051a39Sopenharmony_ci	nr	$s2,$mask
666e1051a39Sopenharmony_ci	nr	$i3,$mask
667e1051a39Sopenharmony_ci
668e1051a39Sopenharmony_ci	xr	$s1,$t1
669e1051a39Sopenharmony_ci	srlg	$ra,$s3,`8-3`	# i1
670e1051a39Sopenharmony_ci	srlg	$t1,$s3,`16-3`	# i0
671e1051a39Sopenharmony_ci	nr	$ra,$mask
672e1051a39Sopenharmony_ci	la	$key,16($key)
673e1051a39Sopenharmony_ci	nr	$t1,$mask
674e1051a39Sopenharmony_ci
675e1051a39Sopenharmony_ci	x	$s0,2($i1,$tbl)	# Td2[s2>>8]
676e1051a39Sopenharmony_ci	x	$s1,1($i2,$tbl)	# Td3[s2>>0]
677e1051a39Sopenharmony_ci	l	$s2,0($s2,$tbl)	# Td0[s2>>24]
678e1051a39Sopenharmony_ci	x	$t3,3($i3,$tbl)	# Td1[s2>>16]
679e1051a39Sopenharmony_ci
680e1051a39Sopenharmony_ci	sllg	$i3,$s3,`0+3`	# i2
681e1051a39Sopenharmony_ci	srl	$s3,`24-3`
682e1051a39Sopenharmony_ci	ngr	$i3,$mask
683e1051a39Sopenharmony_ci	nr	$s3,$mask
684e1051a39Sopenharmony_ci
685e1051a39Sopenharmony_ci	xr	$s2,$t2
686e1051a39Sopenharmony_ci	x	$s0,0($key)
687e1051a39Sopenharmony_ci	x	$s1,4($key)
688e1051a39Sopenharmony_ci	x	$s2,8($key)
689e1051a39Sopenharmony_ci	x	$t3,12($key)
690e1051a39Sopenharmony_ci
691e1051a39Sopenharmony_ci	x	$s0,3($t1,$tbl)	# Td1[s3>>16]
692e1051a39Sopenharmony_ci	x	$s1,2($ra,$tbl)	# Td2[s3>>8]
693e1051a39Sopenharmony_ci	x	$s2,1($i3,$tbl)	# Td3[s3>>0]
694e1051a39Sopenharmony_ci	l	$s3,0($s3,$tbl)	# Td0[s3>>24]
695e1051a39Sopenharmony_ci	xr	$s3,$t3
696e1051a39Sopenharmony_ci
697e1051a39Sopenharmony_ci	brct	$rounds,.Ldec_loop
698e1051a39Sopenharmony_ci	.align	16
699e1051a39Sopenharmony_ci
700e1051a39Sopenharmony_ci	l	$t1,`2048+0`($tbl)	# prefetch Td4
701e1051a39Sopenharmony_ci	l	$t2,`2048+64`($tbl)
702e1051a39Sopenharmony_ci	l	$t3,`2048+128`($tbl)
703e1051a39Sopenharmony_ci	l	$i1,`2048+192`($tbl)
704e1051a39Sopenharmony_ci	llill	$mask,0xff
705e1051a39Sopenharmony_ci
706e1051a39Sopenharmony_ci	srlg	$i3,$s0,24	# i0
707e1051a39Sopenharmony_ci	srlg	$t1,$s0,16
708e1051a39Sopenharmony_ci	srlg	$t2,$s0,8
709e1051a39Sopenharmony_ci	nr	$s0,$mask	# i3
710e1051a39Sopenharmony_ci	nr	$t1,$mask
711e1051a39Sopenharmony_ci
712e1051a39Sopenharmony_ci	srlg	$i1,$s1,24
713e1051a39Sopenharmony_ci	nr	$t2,$mask
714e1051a39Sopenharmony_ci	srlg	$i2,$s1,16
715e1051a39Sopenharmony_ci	srlg	$ra,$s1,8
716e1051a39Sopenharmony_ci	nr	$s1,$mask	# i0
717e1051a39Sopenharmony_ci	nr	$i2,$mask
718e1051a39Sopenharmony_ci	nr	$ra,$mask
719e1051a39Sopenharmony_ci
720e1051a39Sopenharmony_ci	llgc	$i3,2048($i3,$tbl)	# Td4[s0>>24]
721e1051a39Sopenharmony_ci	llgc	$t1,2048($t1,$tbl)	# Td4[s0>>16]
722e1051a39Sopenharmony_ci	llgc	$t2,2048($t2,$tbl)	# Td4[s0>>8]
723e1051a39Sopenharmony_ci	sll	$t1,16
724e1051a39Sopenharmony_ci	llgc	$t3,2048($s0,$tbl)	# Td4[s0>>0]
725e1051a39Sopenharmony_ci	sllg	$s0,$i3,24
726e1051a39Sopenharmony_ci	sll	$t2,8
727e1051a39Sopenharmony_ci
728e1051a39Sopenharmony_ci	llgc	$s1,2048($s1,$tbl)	# Td4[s1>>0]
729e1051a39Sopenharmony_ci	llgc	$i1,2048($i1,$tbl)	# Td4[s1>>24]
730e1051a39Sopenharmony_ci	llgc	$i2,2048($i2,$tbl)	# Td4[s1>>16]
731e1051a39Sopenharmony_ci	sll	$i1,24
732e1051a39Sopenharmony_ci	llgc	$i3,2048($ra,$tbl)	# Td4[s1>>8]
733e1051a39Sopenharmony_ci	sll	$i2,16
734e1051a39Sopenharmony_ci	sll	$i3,8
735e1051a39Sopenharmony_ci	or	$s0,$s1
736e1051a39Sopenharmony_ci	or	$t1,$i1
737e1051a39Sopenharmony_ci	or	$t2,$i2
738e1051a39Sopenharmony_ci	or	$t3,$i3
739e1051a39Sopenharmony_ci
740e1051a39Sopenharmony_ci	srlg	$i1,$s2,8	# i0
741e1051a39Sopenharmony_ci	srlg	$i2,$s2,24
742e1051a39Sopenharmony_ci	srlg	$i3,$s2,16
743e1051a39Sopenharmony_ci	nr	$s2,$mask	# i1
744e1051a39Sopenharmony_ci	nr	$i1,$mask
745e1051a39Sopenharmony_ci	nr	$i3,$mask
746e1051a39Sopenharmony_ci	llgc	$i1,2048($i1,$tbl)	# Td4[s2>>8]
747e1051a39Sopenharmony_ci	llgc	$s1,2048($s2,$tbl)	# Td4[s2>>0]
748e1051a39Sopenharmony_ci	llgc	$i2,2048($i2,$tbl)	# Td4[s2>>24]
749e1051a39Sopenharmony_ci	llgc	$i3,2048($i3,$tbl)	# Td4[s2>>16]
750e1051a39Sopenharmony_ci	sll	$i1,8
751e1051a39Sopenharmony_ci	sll	$i2,24
752e1051a39Sopenharmony_ci	or	$s0,$i1
753e1051a39Sopenharmony_ci	sll	$i3,16
754e1051a39Sopenharmony_ci	or	$t2,$i2
755e1051a39Sopenharmony_ci	or	$t3,$i3
756e1051a39Sopenharmony_ci
757e1051a39Sopenharmony_ci	srlg	$i1,$s3,16	# i0
758e1051a39Sopenharmony_ci	srlg	$i2,$s3,8	# i1
759e1051a39Sopenharmony_ci	srlg	$i3,$s3,24
760e1051a39Sopenharmony_ci	nr	$s3,$mask	# i2
761e1051a39Sopenharmony_ci	nr	$i1,$mask
762e1051a39Sopenharmony_ci	nr	$i2,$mask
763e1051a39Sopenharmony_ci
764e1051a39Sopenharmony_ci	l${g}	$ra,15*$SIZE_T($sp)
765e1051a39Sopenharmony_ci	or	$s1,$t1
766e1051a39Sopenharmony_ci	l	$t0,16($key)
767e1051a39Sopenharmony_ci	l	$t1,20($key)
768e1051a39Sopenharmony_ci
769e1051a39Sopenharmony_ci	llgc	$i1,2048($i1,$tbl)	# Td4[s3>>16]
770e1051a39Sopenharmony_ci	llgc	$i2,2048($i2,$tbl)	# Td4[s3>>8]
771e1051a39Sopenharmony_ci	sll	$i1,16
772e1051a39Sopenharmony_ci	llgc	$s2,2048($s3,$tbl)	# Td4[s3>>0]
773e1051a39Sopenharmony_ci	llgc	$s3,2048($i3,$tbl)	# Td4[s3>>24]
774e1051a39Sopenharmony_ci	sll	$i2,8
775e1051a39Sopenharmony_ci	sll	$s3,24
776e1051a39Sopenharmony_ci	or	$s0,$i1
777e1051a39Sopenharmony_ci	or	$s1,$i2
778e1051a39Sopenharmony_ci	or	$s2,$t2
779e1051a39Sopenharmony_ci	or	$s3,$t3
780e1051a39Sopenharmony_ci
781e1051a39Sopenharmony_ci	xr	$s0,$t0
782e1051a39Sopenharmony_ci	xr	$s1,$t1
783e1051a39Sopenharmony_ci	x	$s2,24($key)
784e1051a39Sopenharmony_ci	x	$s3,28($key)
785e1051a39Sopenharmony_ci
786e1051a39Sopenharmony_ci	br	$ra
787e1051a39Sopenharmony_ci.size	_s390x_AES_decrypt,.-_s390x_AES_decrypt
788e1051a39Sopenharmony_ci___
789e1051a39Sopenharmony_ci
790e1051a39Sopenharmony_ci$code.=<<___;
791e1051a39Sopenharmony_ci# void AES_set_encrypt_key(const unsigned char *in, int bits,
792e1051a39Sopenharmony_ci# 		 AES_KEY *key) {
793e1051a39Sopenharmony_ci.globl	AES_set_encrypt_key
794e1051a39Sopenharmony_ci.type	AES_set_encrypt_key,\@function
795e1051a39Sopenharmony_ci.align	16
796e1051a39Sopenharmony_ciAES_set_encrypt_key:
797e1051a39Sopenharmony_ci_s390x_AES_set_encrypt_key:
798e1051a39Sopenharmony_ci	lghi	$t0,0
799e1051a39Sopenharmony_ci	cl${g}r	$inp,$t0
800e1051a39Sopenharmony_ci	je	.Lminus1
801e1051a39Sopenharmony_ci	cl${g}r	$key,$t0
802e1051a39Sopenharmony_ci	je	.Lminus1
803e1051a39Sopenharmony_ci
804e1051a39Sopenharmony_ci	lghi	$t0,128
805e1051a39Sopenharmony_ci	clr	$bits,$t0
806e1051a39Sopenharmony_ci	je	.Lproceed
807e1051a39Sopenharmony_ci	lghi	$t0,192
808e1051a39Sopenharmony_ci	clr	$bits,$t0
809e1051a39Sopenharmony_ci	je	.Lproceed
810e1051a39Sopenharmony_ci	lghi	$t0,256
811e1051a39Sopenharmony_ci	clr	$bits,$t0
812e1051a39Sopenharmony_ci	je	.Lproceed
813e1051a39Sopenharmony_ci	lghi	%r2,-2
814e1051a39Sopenharmony_ci	br	%r14
815e1051a39Sopenharmony_ci
816e1051a39Sopenharmony_ci.align	16
817e1051a39Sopenharmony_ci.Lproceed:
818e1051a39Sopenharmony_ci___
819e1051a39Sopenharmony_ci$code.=<<___ if (!$softonly);
820e1051a39Sopenharmony_ci	# convert bits to km(c) code, [128,192,256]->[18,19,20]
821e1051a39Sopenharmony_ci	lhi	%r5,-128
822e1051a39Sopenharmony_ci	lhi	%r0,18
823e1051a39Sopenharmony_ci	ar	%r5,$bits
824e1051a39Sopenharmony_ci	srl	%r5,6
825e1051a39Sopenharmony_ci	ar	%r5,%r0
826e1051a39Sopenharmony_ci
827e1051a39Sopenharmony_ci	larl	%r1,OPENSSL_s390xcap_P
828e1051a39Sopenharmony_ci	llihh	%r0,0x8000
829e1051a39Sopenharmony_ci	srlg	%r0,%r0,0(%r5)
830e1051a39Sopenharmony_ci	ng	%r0,S390X_KM(%r1)  # check availability of both km...
831e1051a39Sopenharmony_ci	ng	%r0,S390X_KMC(%r1) # ...and kmc support for given key length
832e1051a39Sopenharmony_ci	jz	.Lekey_internal
833e1051a39Sopenharmony_ci
834e1051a39Sopenharmony_ci	lmg	%r0,%r1,0($inp)	# just copy 128 bits...
835e1051a39Sopenharmony_ci	stmg	%r0,%r1,0($key)
836e1051a39Sopenharmony_ci	lhi	%r0,192
837e1051a39Sopenharmony_ci	cr	$bits,%r0
838e1051a39Sopenharmony_ci	jl	1f
839e1051a39Sopenharmony_ci	lg	%r1,16($inp)
840e1051a39Sopenharmony_ci	stg	%r1,16($key)
841e1051a39Sopenharmony_ci	je	1f
842e1051a39Sopenharmony_ci	lg	%r1,24($inp)
843e1051a39Sopenharmony_ci	stg	%r1,24($key)
844e1051a39Sopenharmony_ci1:	st	$bits,236($key)	# save bits [for debugging purposes]
845e1051a39Sopenharmony_ci	lgr	$t0,%r5
846e1051a39Sopenharmony_ci	st	%r5,240($key)	# save km(c) code
847e1051a39Sopenharmony_ci	lghi	%r2,0
848e1051a39Sopenharmony_ci	br	%r14
849e1051a39Sopenharmony_ci___
850e1051a39Sopenharmony_ci$code.=<<___;
851e1051a39Sopenharmony_ci.align	16
852e1051a39Sopenharmony_ci.Lekey_internal:
853e1051a39Sopenharmony_ci	stm${g}	%r4,%r13,4*$SIZE_T($sp)	# all non-volatile regs and $key
854e1051a39Sopenharmony_ci
855e1051a39Sopenharmony_ci	larl	$tbl,AES_Te+2048
856e1051a39Sopenharmony_ci
857e1051a39Sopenharmony_ci	llgf	$s0,0($inp)
858e1051a39Sopenharmony_ci	llgf	$s1,4($inp)
859e1051a39Sopenharmony_ci	llgf	$s2,8($inp)
860e1051a39Sopenharmony_ci	llgf	$s3,12($inp)
861e1051a39Sopenharmony_ci	st	$s0,0($key)
862e1051a39Sopenharmony_ci	st	$s1,4($key)
863e1051a39Sopenharmony_ci	st	$s2,8($key)
864e1051a39Sopenharmony_ci	st	$s3,12($key)
865e1051a39Sopenharmony_ci	lghi	$t0,128
866e1051a39Sopenharmony_ci	cr	$bits,$t0
867e1051a39Sopenharmony_ci	jne	.Lnot128
868e1051a39Sopenharmony_ci
869e1051a39Sopenharmony_ci	llill	$mask,0xff
870e1051a39Sopenharmony_ci	lghi	$t3,0			# i=0
871e1051a39Sopenharmony_ci	lghi	$rounds,10
872e1051a39Sopenharmony_ci	st	$rounds,240($key)
873e1051a39Sopenharmony_ci
874e1051a39Sopenharmony_ci	llgfr	$t2,$s3			# temp=rk[3]
875e1051a39Sopenharmony_ci	srlg	$i1,$s3,8
876e1051a39Sopenharmony_ci	srlg	$i2,$s3,16
877e1051a39Sopenharmony_ci	srlg	$i3,$s3,24
878e1051a39Sopenharmony_ci	nr	$t2,$mask
879e1051a39Sopenharmony_ci	nr	$i1,$mask
880e1051a39Sopenharmony_ci	nr	$i2,$mask
881e1051a39Sopenharmony_ci
882e1051a39Sopenharmony_ci.align	16
883e1051a39Sopenharmony_ci.L128_loop:
884e1051a39Sopenharmony_ci	la	$t2,0($t2,$tbl)
885e1051a39Sopenharmony_ci	la	$i1,0($i1,$tbl)
886e1051a39Sopenharmony_ci	la	$i2,0($i2,$tbl)
887e1051a39Sopenharmony_ci	la	$i3,0($i3,$tbl)
888e1051a39Sopenharmony_ci	icm	$t2,2,0($t2)		# Te4[rk[3]>>0]<<8
889e1051a39Sopenharmony_ci	icm	$t2,4,0($i1)		# Te4[rk[3]>>8]<<16
890e1051a39Sopenharmony_ci	icm	$t2,8,0($i2)		# Te4[rk[3]>>16]<<24
891e1051a39Sopenharmony_ci	icm	$t2,1,0($i3)		# Te4[rk[3]>>24]
892e1051a39Sopenharmony_ci	x	$t2,256($t3,$tbl)	# rcon[i]
893e1051a39Sopenharmony_ci	xr	$s0,$t2			# rk[4]=rk[0]^...
894e1051a39Sopenharmony_ci	xr	$s1,$s0			# rk[5]=rk[1]^rk[4]
895e1051a39Sopenharmony_ci	xr	$s2,$s1			# rk[6]=rk[2]^rk[5]
896e1051a39Sopenharmony_ci	xr	$s3,$s2			# rk[7]=rk[3]^rk[6]
897e1051a39Sopenharmony_ci
898e1051a39Sopenharmony_ci	llgfr	$t2,$s3			# temp=rk[3]
899e1051a39Sopenharmony_ci	srlg	$i1,$s3,8
900e1051a39Sopenharmony_ci	srlg	$i2,$s3,16
901e1051a39Sopenharmony_ci	nr	$t2,$mask
902e1051a39Sopenharmony_ci	nr	$i1,$mask
903e1051a39Sopenharmony_ci	srlg	$i3,$s3,24
904e1051a39Sopenharmony_ci	nr	$i2,$mask
905e1051a39Sopenharmony_ci
906e1051a39Sopenharmony_ci	st	$s0,16($key)
907e1051a39Sopenharmony_ci	st	$s1,20($key)
908e1051a39Sopenharmony_ci	st	$s2,24($key)
909e1051a39Sopenharmony_ci	st	$s3,28($key)
910e1051a39Sopenharmony_ci	la	$key,16($key)		# key+=4
911e1051a39Sopenharmony_ci	la	$t3,4($t3)		# i++
912e1051a39Sopenharmony_ci	brct	$rounds,.L128_loop
913e1051a39Sopenharmony_ci	lghi	$t0,10
914e1051a39Sopenharmony_ci	lghi	%r2,0
915e1051a39Sopenharmony_ci	lm${g}	%r4,%r13,4*$SIZE_T($sp)
916e1051a39Sopenharmony_ci	br	$ra
917e1051a39Sopenharmony_ci
918e1051a39Sopenharmony_ci.align	16
919e1051a39Sopenharmony_ci.Lnot128:
920e1051a39Sopenharmony_ci	llgf	$t0,16($inp)
921e1051a39Sopenharmony_ci	llgf	$t1,20($inp)
922e1051a39Sopenharmony_ci	st	$t0,16($key)
923e1051a39Sopenharmony_ci	st	$t1,20($key)
924e1051a39Sopenharmony_ci	lghi	$t0,192
925e1051a39Sopenharmony_ci	cr	$bits,$t0
926e1051a39Sopenharmony_ci	jne	.Lnot192
927e1051a39Sopenharmony_ci
928e1051a39Sopenharmony_ci	llill	$mask,0xff
929e1051a39Sopenharmony_ci	lghi	$t3,0			# i=0
930e1051a39Sopenharmony_ci	lghi	$rounds,12
931e1051a39Sopenharmony_ci	st	$rounds,240($key)
932e1051a39Sopenharmony_ci	lghi	$rounds,8
933e1051a39Sopenharmony_ci
934e1051a39Sopenharmony_ci	srlg	$i1,$t1,8
935e1051a39Sopenharmony_ci	srlg	$i2,$t1,16
936e1051a39Sopenharmony_ci	srlg	$i3,$t1,24
937e1051a39Sopenharmony_ci	nr	$t1,$mask
938e1051a39Sopenharmony_ci	nr	$i1,$mask
939e1051a39Sopenharmony_ci	nr	$i2,$mask
940e1051a39Sopenharmony_ci
941e1051a39Sopenharmony_ci.align	16
942e1051a39Sopenharmony_ci.L192_loop:
943e1051a39Sopenharmony_ci	la	$t1,0($t1,$tbl)
944e1051a39Sopenharmony_ci	la	$i1,0($i1,$tbl)
945e1051a39Sopenharmony_ci	la	$i2,0($i2,$tbl)
946e1051a39Sopenharmony_ci	la	$i3,0($i3,$tbl)
947e1051a39Sopenharmony_ci	icm	$t1,2,0($t1)		# Te4[rk[5]>>0]<<8
948e1051a39Sopenharmony_ci	icm	$t1,4,0($i1)		# Te4[rk[5]>>8]<<16
949e1051a39Sopenharmony_ci	icm	$t1,8,0($i2)		# Te4[rk[5]>>16]<<24
950e1051a39Sopenharmony_ci	icm	$t1,1,0($i3)		# Te4[rk[5]>>24]
951e1051a39Sopenharmony_ci	x	$t1,256($t3,$tbl)	# rcon[i]
952e1051a39Sopenharmony_ci	xr	$s0,$t1			# rk[6]=rk[0]^...
953e1051a39Sopenharmony_ci	xr	$s1,$s0			# rk[7]=rk[1]^rk[6]
954e1051a39Sopenharmony_ci	xr	$s2,$s1			# rk[8]=rk[2]^rk[7]
955e1051a39Sopenharmony_ci	xr	$s3,$s2			# rk[9]=rk[3]^rk[8]
956e1051a39Sopenharmony_ci
957e1051a39Sopenharmony_ci	st	$s0,24($key)
958e1051a39Sopenharmony_ci	st	$s1,28($key)
959e1051a39Sopenharmony_ci	st	$s2,32($key)
960e1051a39Sopenharmony_ci	st	$s3,36($key)
961e1051a39Sopenharmony_ci	brct	$rounds,.L192_continue
962e1051a39Sopenharmony_ci	lghi	$t0,12
963e1051a39Sopenharmony_ci	lghi	%r2,0
964e1051a39Sopenharmony_ci	lm${g}	%r4,%r13,4*$SIZE_T($sp)
965e1051a39Sopenharmony_ci	br	$ra
966e1051a39Sopenharmony_ci
967e1051a39Sopenharmony_ci.align	16
968e1051a39Sopenharmony_ci.L192_continue:
969e1051a39Sopenharmony_ci	lgr	$t1,$s3
970e1051a39Sopenharmony_ci	x	$t1,16($key)		# rk[10]=rk[4]^rk[9]
971e1051a39Sopenharmony_ci	st	$t1,40($key)
972e1051a39Sopenharmony_ci	x	$t1,20($key)		# rk[11]=rk[5]^rk[10]
973e1051a39Sopenharmony_ci	st	$t1,44($key)
974e1051a39Sopenharmony_ci
975e1051a39Sopenharmony_ci	srlg	$i1,$t1,8
976e1051a39Sopenharmony_ci	srlg	$i2,$t1,16
977e1051a39Sopenharmony_ci	srlg	$i3,$t1,24
978e1051a39Sopenharmony_ci	nr	$t1,$mask
979e1051a39Sopenharmony_ci	nr	$i1,$mask
980e1051a39Sopenharmony_ci	nr	$i2,$mask
981e1051a39Sopenharmony_ci
982e1051a39Sopenharmony_ci	la	$key,24($key)		# key+=6
983e1051a39Sopenharmony_ci	la	$t3,4($t3)		# i++
984e1051a39Sopenharmony_ci	j	.L192_loop
985e1051a39Sopenharmony_ci
986e1051a39Sopenharmony_ci.align	16
987e1051a39Sopenharmony_ci.Lnot192:
988e1051a39Sopenharmony_ci	llgf	$t0,24($inp)
989e1051a39Sopenharmony_ci	llgf	$t1,28($inp)
990e1051a39Sopenharmony_ci	st	$t0,24($key)
991e1051a39Sopenharmony_ci	st	$t1,28($key)
992e1051a39Sopenharmony_ci	llill	$mask,0xff
993e1051a39Sopenharmony_ci	lghi	$t3,0			# i=0
994e1051a39Sopenharmony_ci	lghi	$rounds,14
995e1051a39Sopenharmony_ci	st	$rounds,240($key)
996e1051a39Sopenharmony_ci	lghi	$rounds,7
997e1051a39Sopenharmony_ci
998e1051a39Sopenharmony_ci	srlg	$i1,$t1,8
999e1051a39Sopenharmony_ci	srlg	$i2,$t1,16
1000e1051a39Sopenharmony_ci	srlg	$i3,$t1,24
1001e1051a39Sopenharmony_ci	nr	$t1,$mask
1002e1051a39Sopenharmony_ci	nr	$i1,$mask
1003e1051a39Sopenharmony_ci	nr	$i2,$mask
1004e1051a39Sopenharmony_ci
1005e1051a39Sopenharmony_ci.align	16
1006e1051a39Sopenharmony_ci.L256_loop:
1007e1051a39Sopenharmony_ci	la	$t1,0($t1,$tbl)
1008e1051a39Sopenharmony_ci	la	$i1,0($i1,$tbl)
1009e1051a39Sopenharmony_ci	la	$i2,0($i2,$tbl)
1010e1051a39Sopenharmony_ci	la	$i3,0($i3,$tbl)
1011e1051a39Sopenharmony_ci	icm	$t1,2,0($t1)		# Te4[rk[7]>>0]<<8
1012e1051a39Sopenharmony_ci	icm	$t1,4,0($i1)		# Te4[rk[7]>>8]<<16
1013e1051a39Sopenharmony_ci	icm	$t1,8,0($i2)		# Te4[rk[7]>>16]<<24
1014e1051a39Sopenharmony_ci	icm	$t1,1,0($i3)		# Te4[rk[7]>>24]
1015e1051a39Sopenharmony_ci	x	$t1,256($t3,$tbl)	# rcon[i]
1016e1051a39Sopenharmony_ci	xr	$s0,$t1			# rk[8]=rk[0]^...
1017e1051a39Sopenharmony_ci	xr	$s1,$s0			# rk[9]=rk[1]^rk[8]
1018e1051a39Sopenharmony_ci	xr	$s2,$s1			# rk[10]=rk[2]^rk[9]
1019e1051a39Sopenharmony_ci	xr	$s3,$s2			# rk[11]=rk[3]^rk[10]
1020e1051a39Sopenharmony_ci	st	$s0,32($key)
1021e1051a39Sopenharmony_ci	st	$s1,36($key)
1022e1051a39Sopenharmony_ci	st	$s2,40($key)
1023e1051a39Sopenharmony_ci	st	$s3,44($key)
1024e1051a39Sopenharmony_ci	brct	$rounds,.L256_continue
1025e1051a39Sopenharmony_ci	lghi	$t0,14
1026e1051a39Sopenharmony_ci	lghi	%r2,0
1027e1051a39Sopenharmony_ci	lm${g}	%r4,%r13,4*$SIZE_T($sp)
1028e1051a39Sopenharmony_ci	br	$ra
1029e1051a39Sopenharmony_ci
1030e1051a39Sopenharmony_ci.align	16
1031e1051a39Sopenharmony_ci.L256_continue:
1032e1051a39Sopenharmony_ci	lgr	$t1,$s3			# temp=rk[11]
1033e1051a39Sopenharmony_ci	srlg	$i1,$s3,8
1034e1051a39Sopenharmony_ci	srlg	$i2,$s3,16
1035e1051a39Sopenharmony_ci	srlg	$i3,$s3,24
1036e1051a39Sopenharmony_ci	nr	$t1,$mask
1037e1051a39Sopenharmony_ci	nr	$i1,$mask
1038e1051a39Sopenharmony_ci	nr	$i2,$mask
1039e1051a39Sopenharmony_ci	la	$t1,0($t1,$tbl)
1040e1051a39Sopenharmony_ci	la	$i1,0($i1,$tbl)
1041e1051a39Sopenharmony_ci	la	$i2,0($i2,$tbl)
1042e1051a39Sopenharmony_ci	la	$i3,0($i3,$tbl)
1043e1051a39Sopenharmony_ci	llgc	$t1,0($t1)		# Te4[rk[11]>>0]
1044e1051a39Sopenharmony_ci	icm	$t1,2,0($i1)		# Te4[rk[11]>>8]<<8
1045e1051a39Sopenharmony_ci	icm	$t1,4,0($i2)		# Te4[rk[11]>>16]<<16
1046e1051a39Sopenharmony_ci	icm	$t1,8,0($i3)		# Te4[rk[11]>>24]<<24
1047e1051a39Sopenharmony_ci	x	$t1,16($key)		# rk[12]=rk[4]^...
1048e1051a39Sopenharmony_ci	st	$t1,48($key)
1049e1051a39Sopenharmony_ci	x	$t1,20($key)		# rk[13]=rk[5]^rk[12]
1050e1051a39Sopenharmony_ci	st	$t1,52($key)
1051e1051a39Sopenharmony_ci	x	$t1,24($key)		# rk[14]=rk[6]^rk[13]
1052e1051a39Sopenharmony_ci	st	$t1,56($key)
1053e1051a39Sopenharmony_ci	x	$t1,28($key)		# rk[15]=rk[7]^rk[14]
1054e1051a39Sopenharmony_ci	st	$t1,60($key)
1055e1051a39Sopenharmony_ci
1056e1051a39Sopenharmony_ci	srlg	$i1,$t1,8
1057e1051a39Sopenharmony_ci	srlg	$i2,$t1,16
1058e1051a39Sopenharmony_ci	srlg	$i3,$t1,24
1059e1051a39Sopenharmony_ci	nr	$t1,$mask
1060e1051a39Sopenharmony_ci	nr	$i1,$mask
1061e1051a39Sopenharmony_ci	nr	$i2,$mask
1062e1051a39Sopenharmony_ci
1063e1051a39Sopenharmony_ci	la	$key,32($key)		# key+=8
1064e1051a39Sopenharmony_ci	la	$t3,4($t3)		# i++
1065e1051a39Sopenharmony_ci	j	.L256_loop
1066e1051a39Sopenharmony_ci
1067e1051a39Sopenharmony_ci.Lminus1:
1068e1051a39Sopenharmony_ci	lghi	%r2,-1
1069e1051a39Sopenharmony_ci	br	$ra
1070e1051a39Sopenharmony_ci.size	AES_set_encrypt_key,.-AES_set_encrypt_key
1071e1051a39Sopenharmony_ci
1072e1051a39Sopenharmony_ci# void AES_set_decrypt_key(const unsigned char *in, int bits,
1073e1051a39Sopenharmony_ci# 		 AES_KEY *key) {
1074e1051a39Sopenharmony_ci.globl	AES_set_decrypt_key
1075e1051a39Sopenharmony_ci.type	AES_set_decrypt_key,\@function
1076e1051a39Sopenharmony_ci.align	16
1077e1051a39Sopenharmony_ciAES_set_decrypt_key:
1078e1051a39Sopenharmony_ci	#st${g}	$key,4*$SIZE_T($sp)	# I rely on AES_set_encrypt_key to
1079e1051a39Sopenharmony_ci	st${g}	$ra,14*$SIZE_T($sp)	# save non-volatile registers and $key!
1080e1051a39Sopenharmony_ci	bras	$ra,_s390x_AES_set_encrypt_key
1081e1051a39Sopenharmony_ci	#l${g}	$key,4*$SIZE_T($sp)
1082e1051a39Sopenharmony_ci	l${g}	$ra,14*$SIZE_T($sp)
1083e1051a39Sopenharmony_ci	ltgr	%r2,%r2
1084e1051a39Sopenharmony_ci	bnzr	$ra
1085e1051a39Sopenharmony_ci___
1086e1051a39Sopenharmony_ci$code.=<<___ if (!$softonly);
1087e1051a39Sopenharmony_ci	#l	$t0,240($key)
1088e1051a39Sopenharmony_ci	lhi	$t1,16
1089e1051a39Sopenharmony_ci	cr	$t0,$t1
1090e1051a39Sopenharmony_ci	jl	.Lgo
1091e1051a39Sopenharmony_ci	oill	$t0,S390X_DECRYPT	# set "decrypt" bit
1092e1051a39Sopenharmony_ci	st	$t0,240($key)
1093e1051a39Sopenharmony_ci	br	$ra
1094e1051a39Sopenharmony_ci___
1095e1051a39Sopenharmony_ci$code.=<<___;
1096e1051a39Sopenharmony_ci.align	16
1097e1051a39Sopenharmony_ci.Lgo:	lgr	$rounds,$t0	#llgf	$rounds,240($key)
1098e1051a39Sopenharmony_ci	la	$i1,0($key)
1099e1051a39Sopenharmony_ci	sllg	$i2,$rounds,4
1100e1051a39Sopenharmony_ci	la	$i2,0($i2,$key)
1101e1051a39Sopenharmony_ci	srl	$rounds,1
1102e1051a39Sopenharmony_ci	lghi	$t1,-16
1103e1051a39Sopenharmony_ci
1104e1051a39Sopenharmony_ci.align	16
1105e1051a39Sopenharmony_ci.Linv:	lmg	$s0,$s1,0($i1)
1106e1051a39Sopenharmony_ci	lmg	$s2,$s3,0($i2)
1107e1051a39Sopenharmony_ci	stmg	$s0,$s1,0($i2)
1108e1051a39Sopenharmony_ci	stmg	$s2,$s3,0($i1)
1109e1051a39Sopenharmony_ci	la	$i1,16($i1)
1110e1051a39Sopenharmony_ci	la	$i2,0($t1,$i2)
1111e1051a39Sopenharmony_ci	brct	$rounds,.Linv
1112e1051a39Sopenharmony_ci___
1113e1051a39Sopenharmony_ci$mask80=$i1;
1114e1051a39Sopenharmony_ci$mask1b=$i2;
1115e1051a39Sopenharmony_ci$maskfe=$i3;
1116e1051a39Sopenharmony_ci$code.=<<___;
1117e1051a39Sopenharmony_ci	llgf	$rounds,240($key)
1118e1051a39Sopenharmony_ci	aghi	$rounds,-1
1119e1051a39Sopenharmony_ci	sll	$rounds,2	# (rounds-1)*4
1120e1051a39Sopenharmony_ci	llilh	$mask80,0x8080
1121e1051a39Sopenharmony_ci	llilh	$mask1b,0x1b1b
1122e1051a39Sopenharmony_ci	llilh	$maskfe,0xfefe
1123e1051a39Sopenharmony_ci	oill	$mask80,0x8080
1124e1051a39Sopenharmony_ci	oill	$mask1b,0x1b1b
1125e1051a39Sopenharmony_ci	oill	$maskfe,0xfefe
1126e1051a39Sopenharmony_ci
1127e1051a39Sopenharmony_ci.align	16
1128e1051a39Sopenharmony_ci.Lmix:	l	$s0,16($key)	# tp1
1129e1051a39Sopenharmony_ci	lr	$s1,$s0
1130e1051a39Sopenharmony_ci	ngr	$s1,$mask80
1131e1051a39Sopenharmony_ci	srlg	$t1,$s1,7
1132e1051a39Sopenharmony_ci	slr	$s1,$t1
1133e1051a39Sopenharmony_ci	nr	$s1,$mask1b
1134e1051a39Sopenharmony_ci	sllg	$t1,$s0,1
1135e1051a39Sopenharmony_ci	nr	$t1,$maskfe
1136e1051a39Sopenharmony_ci	xr	$s1,$t1		# tp2
1137e1051a39Sopenharmony_ci
1138e1051a39Sopenharmony_ci	lr	$s2,$s1
1139e1051a39Sopenharmony_ci	ngr	$s2,$mask80
1140e1051a39Sopenharmony_ci	srlg	$t1,$s2,7
1141e1051a39Sopenharmony_ci	slr	$s2,$t1
1142e1051a39Sopenharmony_ci	nr	$s2,$mask1b
1143e1051a39Sopenharmony_ci	sllg	$t1,$s1,1
1144e1051a39Sopenharmony_ci	nr	$t1,$maskfe
1145e1051a39Sopenharmony_ci	xr	$s2,$t1		# tp4
1146e1051a39Sopenharmony_ci
1147e1051a39Sopenharmony_ci	lr	$s3,$s2
1148e1051a39Sopenharmony_ci	ngr	$s3,$mask80
1149e1051a39Sopenharmony_ci	srlg	$t1,$s3,7
1150e1051a39Sopenharmony_ci	slr	$s3,$t1
1151e1051a39Sopenharmony_ci	nr	$s3,$mask1b
1152e1051a39Sopenharmony_ci	sllg	$t1,$s2,1
1153e1051a39Sopenharmony_ci	nr	$t1,$maskfe
1154e1051a39Sopenharmony_ci	xr	$s3,$t1		# tp8
1155e1051a39Sopenharmony_ci
1156e1051a39Sopenharmony_ci	xr	$s1,$s0		# tp2^tp1
1157e1051a39Sopenharmony_ci	xr	$s2,$s0		# tp4^tp1
1158e1051a39Sopenharmony_ci	rll	$s0,$s0,24	# = ROTATE(tp1,8)
1159e1051a39Sopenharmony_ci	xr	$s2,$s3		# ^=tp8
1160e1051a39Sopenharmony_ci	xr	$s0,$s1		# ^=tp2^tp1
1161e1051a39Sopenharmony_ci	xr	$s1,$s3		# tp2^tp1^tp8
1162e1051a39Sopenharmony_ci	xr	$s0,$s2		# ^=tp4^tp1^tp8
1163e1051a39Sopenharmony_ci	rll	$s1,$s1,8
1164e1051a39Sopenharmony_ci	rll	$s2,$s2,16
1165e1051a39Sopenharmony_ci	xr	$s0,$s1		# ^= ROTATE(tp8^tp2^tp1,24)
1166e1051a39Sopenharmony_ci	rll	$s3,$s3,24
1167e1051a39Sopenharmony_ci	xr	$s0,$s2    	# ^= ROTATE(tp8^tp4^tp1,16)
1168e1051a39Sopenharmony_ci	xr	$s0,$s3		# ^= ROTATE(tp8,8)
1169e1051a39Sopenharmony_ci
1170e1051a39Sopenharmony_ci	st	$s0,16($key)
1171e1051a39Sopenharmony_ci	la	$key,4($key)
1172e1051a39Sopenharmony_ci	brct	$rounds,.Lmix
1173e1051a39Sopenharmony_ci
1174e1051a39Sopenharmony_ci	lm${g}	%r6,%r13,6*$SIZE_T($sp)# as was saved by AES_set_encrypt_key!
1175e1051a39Sopenharmony_ci	lghi	%r2,0
1176e1051a39Sopenharmony_ci	br	$ra
1177e1051a39Sopenharmony_ci.size	AES_set_decrypt_key,.-AES_set_decrypt_key
1178e1051a39Sopenharmony_ci___
1179e1051a39Sopenharmony_ci
1180e1051a39Sopenharmony_ci########################################################################
1181e1051a39Sopenharmony_ci# void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
1182e1051a39Sopenharmony_ci#                     size_t length, const AES_KEY *key,
1183e1051a39Sopenharmony_ci#                     unsigned char *ivec, const int enc)
1184e1051a39Sopenharmony_ci{
1185e1051a39Sopenharmony_cimy $inp="%r2";
1186e1051a39Sopenharmony_cimy $out="%r4";	# length and out are swapped
1187e1051a39Sopenharmony_cimy $len="%r3";
1188e1051a39Sopenharmony_cimy $key="%r5";
1189e1051a39Sopenharmony_cimy $ivp="%r6";
1190e1051a39Sopenharmony_ci
1191e1051a39Sopenharmony_ci$code.=<<___;
1192e1051a39Sopenharmony_ci.globl	AES_cbc_encrypt
1193e1051a39Sopenharmony_ci.type	AES_cbc_encrypt,\@function
1194e1051a39Sopenharmony_ci.align	16
1195e1051a39Sopenharmony_ciAES_cbc_encrypt:
1196e1051a39Sopenharmony_ci	xgr	%r3,%r4		# flip %r3 and %r4, out and len
1197e1051a39Sopenharmony_ci	xgr	%r4,%r3
1198e1051a39Sopenharmony_ci	xgr	%r3,%r4
1199e1051a39Sopenharmony_ci___
1200e1051a39Sopenharmony_ci$code.=<<___ if (!$softonly);
1201e1051a39Sopenharmony_ci	lhi	%r0,16
1202e1051a39Sopenharmony_ci	cl	%r0,240($key)
1203e1051a39Sopenharmony_ci	jh	.Lcbc_software
1204e1051a39Sopenharmony_ci
1205e1051a39Sopenharmony_ci	lg	%r0,0($ivp)	# copy ivec
1206e1051a39Sopenharmony_ci	lg	%r1,8($ivp)
1207e1051a39Sopenharmony_ci	stmg	%r0,%r1,16($sp)
1208e1051a39Sopenharmony_ci	lmg	%r0,%r1,0($key)	# copy key, cover 256 bit
1209e1051a39Sopenharmony_ci	stmg	%r0,%r1,32($sp)
1210e1051a39Sopenharmony_ci	lmg	%r0,%r1,16($key)
1211e1051a39Sopenharmony_ci	stmg	%r0,%r1,48($sp)
1212e1051a39Sopenharmony_ci	l	%r0,240($key)	# load kmc code
1213e1051a39Sopenharmony_ci	lghi	$key,15		# res=len%16, len-=res;
1214e1051a39Sopenharmony_ci	ngr	$key,$len
1215e1051a39Sopenharmony_ci	sl${g}r	$len,$key
1216e1051a39Sopenharmony_ci	la	%r1,16($sp)	# parameter block - ivec || key
1217e1051a39Sopenharmony_ci	jz	.Lkmc_truncated
1218e1051a39Sopenharmony_ci	.long	0xb92f0042	# kmc %r4,%r2
1219e1051a39Sopenharmony_ci	brc	1,.-4		# pay attention to "partial completion"
1220e1051a39Sopenharmony_ci	ltr	$key,$key
1221e1051a39Sopenharmony_ci	jnz	.Lkmc_truncated
1222e1051a39Sopenharmony_ci.Lkmc_done:
1223e1051a39Sopenharmony_ci	lmg	%r0,%r1,16($sp)	# copy ivec to caller
1224e1051a39Sopenharmony_ci	stg	%r0,0($ivp)
1225e1051a39Sopenharmony_ci	stg	%r1,8($ivp)
1226e1051a39Sopenharmony_ci	br	$ra
1227e1051a39Sopenharmony_ci.align	16
1228e1051a39Sopenharmony_ci.Lkmc_truncated:
1229e1051a39Sopenharmony_ci	ahi	$key,-1		# it's the way it's encoded in mvc
1230e1051a39Sopenharmony_ci	tmll	%r0,S390X_DECRYPT
1231e1051a39Sopenharmony_ci	jnz	.Lkmc_truncated_dec
1232e1051a39Sopenharmony_ci	lghi	%r1,0
1233e1051a39Sopenharmony_ci	stg	%r1,16*$SIZE_T($sp)
1234e1051a39Sopenharmony_ci	stg	%r1,16*$SIZE_T+8($sp)
1235e1051a39Sopenharmony_ci	bras	%r1,1f
1236e1051a39Sopenharmony_ci	mvc	16*$SIZE_T(1,$sp),0($inp)
1237e1051a39Sopenharmony_ci1:	ex	$key,0(%r1)
1238e1051a39Sopenharmony_ci	la	%r1,16($sp)	# restore parameter block
1239e1051a39Sopenharmony_ci	la	$inp,16*$SIZE_T($sp)
1240e1051a39Sopenharmony_ci	lghi	$len,16
1241e1051a39Sopenharmony_ci	.long	0xb92f0042	# kmc %r4,%r2
1242e1051a39Sopenharmony_ci	j	.Lkmc_done
1243e1051a39Sopenharmony_ci.align	16
1244e1051a39Sopenharmony_ci.Lkmc_truncated_dec:
1245e1051a39Sopenharmony_ci	st${g}	$out,4*$SIZE_T($sp)
1246e1051a39Sopenharmony_ci	la	$out,16*$SIZE_T($sp)
1247e1051a39Sopenharmony_ci	lghi	$len,16
1248e1051a39Sopenharmony_ci	.long	0xb92f0042	# kmc %r4,%r2
1249e1051a39Sopenharmony_ci	l${g}	$out,4*$SIZE_T($sp)
1250e1051a39Sopenharmony_ci	bras	%r1,2f
1251e1051a39Sopenharmony_ci	mvc	0(1,$out),16*$SIZE_T($sp)
1252e1051a39Sopenharmony_ci2:	ex	$key,0(%r1)
1253e1051a39Sopenharmony_ci	j	.Lkmc_done
1254e1051a39Sopenharmony_ci.align	16
1255e1051a39Sopenharmony_ci.Lcbc_software:
1256e1051a39Sopenharmony_ci___
1257e1051a39Sopenharmony_ci$code.=<<___;
1258e1051a39Sopenharmony_ci	stm${g}	$key,$ra,5*$SIZE_T($sp)
1259e1051a39Sopenharmony_ci	lhi	%r0,0
1260e1051a39Sopenharmony_ci	cl	%r0,`$stdframe+$SIZE_T-4`($sp)
1261e1051a39Sopenharmony_ci	je	.Lcbc_decrypt
1262e1051a39Sopenharmony_ci
1263e1051a39Sopenharmony_ci	larl	$tbl,AES_Te
1264e1051a39Sopenharmony_ci
1265e1051a39Sopenharmony_ci	llgf	$s0,0($ivp)
1266e1051a39Sopenharmony_ci	llgf	$s1,4($ivp)
1267e1051a39Sopenharmony_ci	llgf	$s2,8($ivp)
1268e1051a39Sopenharmony_ci	llgf	$s3,12($ivp)
1269e1051a39Sopenharmony_ci
1270e1051a39Sopenharmony_ci	lghi	$t0,16
1271e1051a39Sopenharmony_ci	sl${g}r	$len,$t0
1272e1051a39Sopenharmony_ci	brc	4,.Lcbc_enc_tail	# if borrow
1273e1051a39Sopenharmony_ci.Lcbc_enc_loop:
1274e1051a39Sopenharmony_ci	stm${g}	$inp,$out,2*$SIZE_T($sp)
1275e1051a39Sopenharmony_ci	x	$s0,0($inp)
1276e1051a39Sopenharmony_ci	x	$s1,4($inp)
1277e1051a39Sopenharmony_ci	x	$s2,8($inp)
1278e1051a39Sopenharmony_ci	x	$s3,12($inp)
1279e1051a39Sopenharmony_ci	lgr	%r4,$key
1280e1051a39Sopenharmony_ci
1281e1051a39Sopenharmony_ci	bras	$ra,_s390x_AES_encrypt
1282e1051a39Sopenharmony_ci
1283e1051a39Sopenharmony_ci	lm${g}	$inp,$key,2*$SIZE_T($sp)
1284e1051a39Sopenharmony_ci	st	$s0,0($out)
1285e1051a39Sopenharmony_ci	st	$s1,4($out)
1286e1051a39Sopenharmony_ci	st	$s2,8($out)
1287e1051a39Sopenharmony_ci	st	$s3,12($out)
1288e1051a39Sopenharmony_ci
1289e1051a39Sopenharmony_ci	la	$inp,16($inp)
1290e1051a39Sopenharmony_ci	la	$out,16($out)
1291e1051a39Sopenharmony_ci	lghi	$t0,16
1292e1051a39Sopenharmony_ci	lt${g}r	$len,$len
1293e1051a39Sopenharmony_ci	jz	.Lcbc_enc_done
1294e1051a39Sopenharmony_ci	sl${g}r	$len,$t0
1295e1051a39Sopenharmony_ci	brc	4,.Lcbc_enc_tail	# if borrow
1296e1051a39Sopenharmony_ci	j	.Lcbc_enc_loop
1297e1051a39Sopenharmony_ci.align	16
1298e1051a39Sopenharmony_ci.Lcbc_enc_done:
1299e1051a39Sopenharmony_ci	l${g}	$ivp,6*$SIZE_T($sp)
1300e1051a39Sopenharmony_ci	st	$s0,0($ivp)
1301e1051a39Sopenharmony_ci	st	$s1,4($ivp)
1302e1051a39Sopenharmony_ci	st	$s2,8($ivp)
1303e1051a39Sopenharmony_ci	st	$s3,12($ivp)
1304e1051a39Sopenharmony_ci
1305e1051a39Sopenharmony_ci	lm${g}	%r7,$ra,7*$SIZE_T($sp)
1306e1051a39Sopenharmony_ci	br	$ra
1307e1051a39Sopenharmony_ci
1308e1051a39Sopenharmony_ci.align	16
1309e1051a39Sopenharmony_ci.Lcbc_enc_tail:
1310e1051a39Sopenharmony_ci	aghi	$len,15
1311e1051a39Sopenharmony_ci	lghi	$t0,0
1312e1051a39Sopenharmony_ci	stg	$t0,16*$SIZE_T($sp)
1313e1051a39Sopenharmony_ci	stg	$t0,16*$SIZE_T+8($sp)
1314e1051a39Sopenharmony_ci	bras	$t1,3f
1315e1051a39Sopenharmony_ci	mvc	16*$SIZE_T(1,$sp),0($inp)
1316e1051a39Sopenharmony_ci3:	ex	$len,0($t1)
1317e1051a39Sopenharmony_ci	lghi	$len,0
1318e1051a39Sopenharmony_ci	la	$inp,16*$SIZE_T($sp)
1319e1051a39Sopenharmony_ci	j	.Lcbc_enc_loop
1320e1051a39Sopenharmony_ci
1321e1051a39Sopenharmony_ci.align	16
1322e1051a39Sopenharmony_ci.Lcbc_decrypt:
1323e1051a39Sopenharmony_ci	larl	$tbl,AES_Td
1324e1051a39Sopenharmony_ci
1325e1051a39Sopenharmony_ci	lg	$t0,0($ivp)
1326e1051a39Sopenharmony_ci	lg	$t1,8($ivp)
1327e1051a39Sopenharmony_ci	stmg	$t0,$t1,16*$SIZE_T($sp)
1328e1051a39Sopenharmony_ci
1329e1051a39Sopenharmony_ci.Lcbc_dec_loop:
1330e1051a39Sopenharmony_ci	stm${g}	$inp,$out,2*$SIZE_T($sp)
1331e1051a39Sopenharmony_ci	llgf	$s0,0($inp)
1332e1051a39Sopenharmony_ci	llgf	$s1,4($inp)
1333e1051a39Sopenharmony_ci	llgf	$s2,8($inp)
1334e1051a39Sopenharmony_ci	llgf	$s3,12($inp)
1335e1051a39Sopenharmony_ci	lgr	%r4,$key
1336e1051a39Sopenharmony_ci
1337e1051a39Sopenharmony_ci	bras	$ra,_s390x_AES_decrypt
1338e1051a39Sopenharmony_ci
1339e1051a39Sopenharmony_ci	lm${g}	$inp,$key,2*$SIZE_T($sp)
1340e1051a39Sopenharmony_ci	sllg	$s0,$s0,32
1341e1051a39Sopenharmony_ci	sllg	$s2,$s2,32
1342e1051a39Sopenharmony_ci	lr	$s0,$s1
1343e1051a39Sopenharmony_ci	lr	$s2,$s3
1344e1051a39Sopenharmony_ci
1345e1051a39Sopenharmony_ci	lg	$t0,0($inp)
1346e1051a39Sopenharmony_ci	lg	$t1,8($inp)
1347e1051a39Sopenharmony_ci	xg	$s0,16*$SIZE_T($sp)
1348e1051a39Sopenharmony_ci	xg	$s2,16*$SIZE_T+8($sp)
1349e1051a39Sopenharmony_ci	lghi	$s1,16
1350e1051a39Sopenharmony_ci	sl${g}r	$len,$s1
1351e1051a39Sopenharmony_ci	brc	4,.Lcbc_dec_tail	# if borrow
1352e1051a39Sopenharmony_ci	brc	2,.Lcbc_dec_done	# if zero
1353e1051a39Sopenharmony_ci	stg	$s0,0($out)
1354e1051a39Sopenharmony_ci	stg	$s2,8($out)
1355e1051a39Sopenharmony_ci	stmg	$t0,$t1,16*$SIZE_T($sp)
1356e1051a39Sopenharmony_ci
1357e1051a39Sopenharmony_ci	la	$inp,16($inp)
1358e1051a39Sopenharmony_ci	la	$out,16($out)
1359e1051a39Sopenharmony_ci	j	.Lcbc_dec_loop
1360e1051a39Sopenharmony_ci
1361e1051a39Sopenharmony_ci.Lcbc_dec_done:
1362e1051a39Sopenharmony_ci	stg	$s0,0($out)
1363e1051a39Sopenharmony_ci	stg	$s2,8($out)
1364e1051a39Sopenharmony_ci.Lcbc_dec_exit:
1365e1051a39Sopenharmony_ci	lm${g}	%r6,$ra,6*$SIZE_T($sp)
1366e1051a39Sopenharmony_ci	stmg	$t0,$t1,0($ivp)
1367e1051a39Sopenharmony_ci
1368e1051a39Sopenharmony_ci	br	$ra
1369e1051a39Sopenharmony_ci
1370e1051a39Sopenharmony_ci.align	16
1371e1051a39Sopenharmony_ci.Lcbc_dec_tail:
1372e1051a39Sopenharmony_ci	aghi	$len,15
1373e1051a39Sopenharmony_ci	stg	$s0,16*$SIZE_T($sp)
1374e1051a39Sopenharmony_ci	stg	$s2,16*$SIZE_T+8($sp)
1375e1051a39Sopenharmony_ci	bras	$s1,4f
1376e1051a39Sopenharmony_ci	mvc	0(1,$out),16*$SIZE_T($sp)
1377e1051a39Sopenharmony_ci4:	ex	$len,0($s1)
1378e1051a39Sopenharmony_ci	j	.Lcbc_dec_exit
1379e1051a39Sopenharmony_ci.size	AES_cbc_encrypt,.-AES_cbc_encrypt
1380e1051a39Sopenharmony_ci___
1381e1051a39Sopenharmony_ci}
1382e1051a39Sopenharmony_ci########################################################################
1383e1051a39Sopenharmony_ci# void AES_ctr32_encrypt(const unsigned char *in, unsigned char *out,
1384e1051a39Sopenharmony_ci#                     size_t blocks, const AES_KEY *key,
1385e1051a39Sopenharmony_ci#                     const unsigned char *ivec)
1386e1051a39Sopenharmony_ci{
1387e1051a39Sopenharmony_cimy $inp="%r2";
1388e1051a39Sopenharmony_cimy $out="%r4";	# blocks and out are swapped
1389e1051a39Sopenharmony_cimy $len="%r3";
1390e1051a39Sopenharmony_cimy $key="%r5";	my $iv0="%r5";
1391e1051a39Sopenharmony_cimy $ivp="%r6";
1392e1051a39Sopenharmony_cimy $fp ="%r7";
1393e1051a39Sopenharmony_ci
1394e1051a39Sopenharmony_ci$code.=<<___;
1395e1051a39Sopenharmony_ci.globl	AES_ctr32_encrypt
1396e1051a39Sopenharmony_ci.type	AES_ctr32_encrypt,\@function
1397e1051a39Sopenharmony_ci.align	16
1398e1051a39Sopenharmony_ciAES_ctr32_encrypt:
1399e1051a39Sopenharmony_ci	xgr	%r3,%r4		# flip %r3 and %r4, $out and $len
1400e1051a39Sopenharmony_ci	xgr	%r4,%r3
1401e1051a39Sopenharmony_ci	xgr	%r3,%r4
1402e1051a39Sopenharmony_ci	llgfr	$len,$len	# safe in ctr32 subroutine even in 64-bit case
1403e1051a39Sopenharmony_ci___
1404e1051a39Sopenharmony_ci$code.=<<___ if (!$softonly);
1405e1051a39Sopenharmony_ci	l	%r0,240($key)
1406e1051a39Sopenharmony_ci	lhi	%r1,16
1407e1051a39Sopenharmony_ci	clr	%r0,%r1
1408e1051a39Sopenharmony_ci	jl	.Lctr32_software
1409e1051a39Sopenharmony_ci
1410e1051a39Sopenharmony_ci	st${g}	$s2,10*$SIZE_T($sp)
1411e1051a39Sopenharmony_ci	st${g}	$s3,11*$SIZE_T($sp)
1412e1051a39Sopenharmony_ci
1413e1051a39Sopenharmony_ci	clr	$len,%r1		# does work even in 64-bit mode
1414e1051a39Sopenharmony_ci	jle	.Lctr32_nokma		# kma is slower for <= 16 blocks
1415e1051a39Sopenharmony_ci
1416e1051a39Sopenharmony_ci	larl	%r1,OPENSSL_s390xcap_P
1417e1051a39Sopenharmony_ci	lr	$s2,%r0
1418e1051a39Sopenharmony_ci	llihh	$s3,0x8000
1419e1051a39Sopenharmony_ci	srlg	$s3,$s3,0($s2)
1420e1051a39Sopenharmony_ci	ng	$s3,S390X_KMA(%r1)		# check kma capability vector
1421e1051a39Sopenharmony_ci	jz	.Lctr32_nokma
1422e1051a39Sopenharmony_ci
1423e1051a39Sopenharmony_ci	l${g}hi	%r1,-$stdframe-112
1424e1051a39Sopenharmony_ci	l${g}r	$s3,$sp
1425e1051a39Sopenharmony_ci	la	$sp,0(%r1,$sp)			# prepare parameter block
1426e1051a39Sopenharmony_ci
1427e1051a39Sopenharmony_ci	lhi	%r1,0x0600
1428e1051a39Sopenharmony_ci	sllg	$len,$len,4
1429e1051a39Sopenharmony_ci	or	%r0,%r1				# set HS and LAAD flags
1430e1051a39Sopenharmony_ci
1431e1051a39Sopenharmony_ci	st${g}	$s3,0($sp)			# backchain
1432e1051a39Sopenharmony_ci	la	%r1,$stdframe($sp)
1433e1051a39Sopenharmony_ci
1434e1051a39Sopenharmony_ci	lmg	$s2,$s3,0($key)			# copy key
1435e1051a39Sopenharmony_ci	stg	$s2,$stdframe+80($sp)
1436e1051a39Sopenharmony_ci	stg	$s3,$stdframe+88($sp)
1437e1051a39Sopenharmony_ci	lmg	$s2,$s3,16($key)
1438e1051a39Sopenharmony_ci	stg	$s2,$stdframe+96($sp)
1439e1051a39Sopenharmony_ci	stg	$s3,$stdframe+104($sp)
1440e1051a39Sopenharmony_ci
1441e1051a39Sopenharmony_ci	lmg	$s2,$s3,0($ivp)			# copy iv
1442e1051a39Sopenharmony_ci	stg	$s2,$stdframe+64($sp)
1443e1051a39Sopenharmony_ci	ahi	$s3,-1				# kma requires counter-1
1444e1051a39Sopenharmony_ci	stg	$s3,$stdframe+72($sp)
1445e1051a39Sopenharmony_ci	st	$s3,$stdframe+12($sp)		# copy counter
1446e1051a39Sopenharmony_ci
1447e1051a39Sopenharmony_ci	lghi	$s2,0				# no AAD
1448e1051a39Sopenharmony_ci	lghi	$s3,0
1449e1051a39Sopenharmony_ci
1450e1051a39Sopenharmony_ci	.long	0xb929a042	# kma $out,$s2,$inp
1451e1051a39Sopenharmony_ci	brc	1,.-4		# pay attention to "partial completion"
1452e1051a39Sopenharmony_ci
1453e1051a39Sopenharmony_ci	stg	%r0,$stdframe+80($sp)		# wipe key
1454e1051a39Sopenharmony_ci	stg	%r0,$stdframe+88($sp)
1455e1051a39Sopenharmony_ci	stg	%r0,$stdframe+96($sp)
1456e1051a39Sopenharmony_ci	stg	%r0,$stdframe+104($sp)
1457e1051a39Sopenharmony_ci	la	$sp,$stdframe+112($sp)
1458e1051a39Sopenharmony_ci
1459e1051a39Sopenharmony_ci	lm${g}	$s2,$s3,10*$SIZE_T($sp)
1460e1051a39Sopenharmony_ci	br	$ra
1461e1051a39Sopenharmony_ci
1462e1051a39Sopenharmony_ci.align	16
1463e1051a39Sopenharmony_ci.Lctr32_nokma:
1464e1051a39Sopenharmony_ci	stm${g}	%r6,$s1,6*$SIZE_T($sp)
1465e1051a39Sopenharmony_ci
1466e1051a39Sopenharmony_ci	slgr	$out,$inp
1467e1051a39Sopenharmony_ci	la	%r1,0($key)	# %r1 is permanent copy of $key
1468e1051a39Sopenharmony_ci	lg	$iv0,0($ivp)	# load ivec
1469e1051a39Sopenharmony_ci	lg	$ivp,8($ivp)
1470e1051a39Sopenharmony_ci
1471e1051a39Sopenharmony_ci	# prepare and allocate stack frame at the top of 4K page
1472e1051a39Sopenharmony_ci	# with 1K reserved for eventual signal handling
1473e1051a39Sopenharmony_ci	lghi	$s0,-1024-256-16# guarantee at least 256-bytes buffer
1474e1051a39Sopenharmony_ci	lghi	$s1,-4096
1475e1051a39Sopenharmony_ci	algr	$s0,$sp
1476e1051a39Sopenharmony_ci	lgr	$fp,$sp
1477e1051a39Sopenharmony_ci	ngr	$s0,$s1		# align at page boundary
1478e1051a39Sopenharmony_ci	slgr	$fp,$s0		# total buffer size
1479e1051a39Sopenharmony_ci	lgr	$s2,$sp
1480e1051a39Sopenharmony_ci	lghi	$s1,1024+16	# sl[g]fi is extended-immediate facility
1481e1051a39Sopenharmony_ci	slgr	$fp,$s1		# deduct reservation to get usable buffer size
1482e1051a39Sopenharmony_ci	# buffer size is at lest 256 and at most 3072+256-16
1483e1051a39Sopenharmony_ci
1484e1051a39Sopenharmony_ci	la	$sp,1024($s0)	# alloca
1485e1051a39Sopenharmony_ci	srlg	$fp,$fp,4	# convert bytes to blocks, minimum 16
1486e1051a39Sopenharmony_ci	st${g}	$s2,0($sp)	# back-chain
1487e1051a39Sopenharmony_ci	st${g}	$fp,$SIZE_T($sp)
1488e1051a39Sopenharmony_ci
1489e1051a39Sopenharmony_ci	slgr	$len,$fp
1490e1051a39Sopenharmony_ci	brc	1,.Lctr32_hw_switch	# not zero, no borrow
1491e1051a39Sopenharmony_ci	algr	$fp,$len	# input is shorter than allocated buffer
1492e1051a39Sopenharmony_ci	lghi	$len,0
1493e1051a39Sopenharmony_ci	st${g}	$fp,$SIZE_T($sp)
1494e1051a39Sopenharmony_ci
1495e1051a39Sopenharmony_ci.Lctr32_hw_switch:
1496e1051a39Sopenharmony_ci___
1497e1051a39Sopenharmony_ci$code.=<<___ if (!$softonly && 0);# kmctr code was measured to be ~12% slower
1498e1051a39Sopenharmony_ci	llgfr	$s0,%r0
1499e1051a39Sopenharmony_ci	lgr	$s1,%r1
1500e1051a39Sopenharmony_ci	larl	%r1,OPENSSL_s390xcap_P
1501e1051a39Sopenharmony_ci	llihh	%r0,0x8000	# check if kmctr supports the function code
1502e1051a39Sopenharmony_ci	srlg	%r0,%r0,0($s0)
1503e1051a39Sopenharmony_ci	ng	%r0,S390X_KMCTR(%r1)	# check kmctr capability vector
1504e1051a39Sopenharmony_ci	lgr	%r0,$s0
1505e1051a39Sopenharmony_ci	lgr	%r1,$s1
1506e1051a39Sopenharmony_ci	jz	.Lctr32_km_loop
1507e1051a39Sopenharmony_ci
1508e1051a39Sopenharmony_ci####### kmctr code
1509e1051a39Sopenharmony_ci	algr	$out,$inp	# restore $out
1510e1051a39Sopenharmony_ci	lgr	$s1,$len	# $s1 undertakes $len
1511e1051a39Sopenharmony_ci	j	.Lctr32_kmctr_loop
1512e1051a39Sopenharmony_ci.align	16
1513e1051a39Sopenharmony_ci.Lctr32_kmctr_loop:
1514e1051a39Sopenharmony_ci	la	$s2,16($sp)
1515e1051a39Sopenharmony_ci	lgr	$s3,$fp
1516e1051a39Sopenharmony_ci.Lctr32_kmctr_prepare:
1517e1051a39Sopenharmony_ci	stg	$iv0,0($s2)
1518e1051a39Sopenharmony_ci	stg	$ivp,8($s2)
1519e1051a39Sopenharmony_ci	la	$s2,16($s2)
1520e1051a39Sopenharmony_ci	ahi	$ivp,1		# 32-bit increment, preserves upper half
1521e1051a39Sopenharmony_ci	brct	$s3,.Lctr32_kmctr_prepare
1522e1051a39Sopenharmony_ci
1523e1051a39Sopenharmony_ci	#la	$inp,0($inp)	# inp
1524e1051a39Sopenharmony_ci	sllg	$len,$fp,4	# len
1525e1051a39Sopenharmony_ci	#la	$out,0($out)	# out
1526e1051a39Sopenharmony_ci	la	$s2,16($sp)	# iv
1527e1051a39Sopenharmony_ci	.long	0xb92da042	# kmctr $out,$s2,$inp
1528e1051a39Sopenharmony_ci	brc	1,.-4		# pay attention to "partial completion"
1529e1051a39Sopenharmony_ci
1530e1051a39Sopenharmony_ci	slgr	$s1,$fp
1531e1051a39Sopenharmony_ci	brc	1,.Lctr32_kmctr_loop	# not zero, no borrow
1532e1051a39Sopenharmony_ci	algr	$fp,$s1
1533e1051a39Sopenharmony_ci	lghi	$s1,0
1534e1051a39Sopenharmony_ci	brc	4+1,.Lctr32_kmctr_loop	# not zero
1535e1051a39Sopenharmony_ci
1536e1051a39Sopenharmony_ci	l${g}	$sp,0($sp)
1537e1051a39Sopenharmony_ci	lm${g}	%r6,$s3,6*$SIZE_T($sp)
1538e1051a39Sopenharmony_ci	br	$ra
1539e1051a39Sopenharmony_ci.align	16
1540e1051a39Sopenharmony_ci___
1541e1051a39Sopenharmony_ci$code.=<<___ if (!$softonly);
1542e1051a39Sopenharmony_ci.Lctr32_km_loop:
1543e1051a39Sopenharmony_ci	la	$s2,16($sp)
1544e1051a39Sopenharmony_ci	lgr	$s3,$fp
1545e1051a39Sopenharmony_ci.Lctr32_km_prepare:
1546e1051a39Sopenharmony_ci	stg	$iv0,0($s2)
1547e1051a39Sopenharmony_ci	stg	$ivp,8($s2)
1548e1051a39Sopenharmony_ci	la	$s2,16($s2)
1549e1051a39Sopenharmony_ci	ahi	$ivp,1		# 32-bit increment, preserves upper half
1550e1051a39Sopenharmony_ci	brct	$s3,.Lctr32_km_prepare
1551e1051a39Sopenharmony_ci
1552e1051a39Sopenharmony_ci	la	$s0,16($sp)	# inp
1553e1051a39Sopenharmony_ci	sllg	$s1,$fp,4	# len
1554e1051a39Sopenharmony_ci	la	$s2,16($sp)	# out
1555e1051a39Sopenharmony_ci	.long	0xb92e00a8	# km %r10,%r8
1556e1051a39Sopenharmony_ci	brc	1,.-4		# pay attention to "partial completion"
1557e1051a39Sopenharmony_ci
1558e1051a39Sopenharmony_ci	la	$s2,16($sp)
1559e1051a39Sopenharmony_ci	lgr	$s3,$fp
1560e1051a39Sopenharmony_ci	slgr	$s2,$inp
1561e1051a39Sopenharmony_ci.Lctr32_km_xor:
1562e1051a39Sopenharmony_ci	lg	$s0,0($inp)
1563e1051a39Sopenharmony_ci	lg	$s1,8($inp)
1564e1051a39Sopenharmony_ci	xg	$s0,0($s2,$inp)
1565e1051a39Sopenharmony_ci	xg	$s1,8($s2,$inp)
1566e1051a39Sopenharmony_ci	stg	$s0,0($out,$inp)
1567e1051a39Sopenharmony_ci	stg	$s1,8($out,$inp)
1568e1051a39Sopenharmony_ci	la	$inp,16($inp)
1569e1051a39Sopenharmony_ci	brct	$s3,.Lctr32_km_xor
1570e1051a39Sopenharmony_ci
1571e1051a39Sopenharmony_ci	slgr	$len,$fp
1572e1051a39Sopenharmony_ci	brc	1,.Lctr32_km_loop	# not zero, no borrow
1573e1051a39Sopenharmony_ci	algr	$fp,$len
1574e1051a39Sopenharmony_ci	lghi	$len,0
1575e1051a39Sopenharmony_ci	brc	4+1,.Lctr32_km_loop	# not zero
1576e1051a39Sopenharmony_ci
1577e1051a39Sopenharmony_ci	l${g}	$s0,0($sp)
1578e1051a39Sopenharmony_ci	l${g}	$s1,$SIZE_T($sp)
1579e1051a39Sopenharmony_ci	la	$s2,16($sp)
1580e1051a39Sopenharmony_ci.Lctr32_km_zap:
1581e1051a39Sopenharmony_ci	stg	$s0,0($s2)
1582e1051a39Sopenharmony_ci	stg	$s0,8($s2)
1583e1051a39Sopenharmony_ci	la	$s2,16($s2)
1584e1051a39Sopenharmony_ci	brct	$s1,.Lctr32_km_zap
1585e1051a39Sopenharmony_ci
1586e1051a39Sopenharmony_ci	la	$sp,0($s0)
1587e1051a39Sopenharmony_ci	lm${g}	%r6,$s3,6*$SIZE_T($sp)
1588e1051a39Sopenharmony_ci	br	$ra
1589e1051a39Sopenharmony_ci.align	16
1590e1051a39Sopenharmony_ci.Lctr32_software:
1591e1051a39Sopenharmony_ci___
1592e1051a39Sopenharmony_ci$code.=<<___;
1593e1051a39Sopenharmony_ci	stm${g}	$key,$ra,5*$SIZE_T($sp)
1594e1051a39Sopenharmony_ci	sl${g}r	$inp,$out
1595e1051a39Sopenharmony_ci	larl	$tbl,AES_Te
1596e1051a39Sopenharmony_ci	llgf	$t1,12($ivp)
1597e1051a39Sopenharmony_ci
1598e1051a39Sopenharmony_ci.Lctr32_loop:
1599e1051a39Sopenharmony_ci	stm${g}	$inp,$out,2*$SIZE_T($sp)
1600e1051a39Sopenharmony_ci	llgf	$s0,0($ivp)
1601e1051a39Sopenharmony_ci	llgf	$s1,4($ivp)
1602e1051a39Sopenharmony_ci	llgf	$s2,8($ivp)
1603e1051a39Sopenharmony_ci	lgr	$s3,$t1
1604e1051a39Sopenharmony_ci	st	$t1,16*$SIZE_T($sp)
1605e1051a39Sopenharmony_ci	lgr	%r4,$key
1606e1051a39Sopenharmony_ci
1607e1051a39Sopenharmony_ci	bras	$ra,_s390x_AES_encrypt
1608e1051a39Sopenharmony_ci
1609e1051a39Sopenharmony_ci	lm${g}	$inp,$ivp,2*$SIZE_T($sp)
1610e1051a39Sopenharmony_ci	llgf	$t1,16*$SIZE_T($sp)
1611e1051a39Sopenharmony_ci	x	$s0,0($inp,$out)
1612e1051a39Sopenharmony_ci	x	$s1,4($inp,$out)
1613e1051a39Sopenharmony_ci	x	$s2,8($inp,$out)
1614e1051a39Sopenharmony_ci	x	$s3,12($inp,$out)
1615e1051a39Sopenharmony_ci	stm	$s0,$s3,0($out)
1616e1051a39Sopenharmony_ci
1617e1051a39Sopenharmony_ci	la	$out,16($out)
1618e1051a39Sopenharmony_ci	ahi	$t1,1		# 32-bit increment
1619e1051a39Sopenharmony_ci	brct	$len,.Lctr32_loop
1620e1051a39Sopenharmony_ci
1621e1051a39Sopenharmony_ci	lm${g}	%r6,$ra,6*$SIZE_T($sp)
1622e1051a39Sopenharmony_ci	br	$ra
1623e1051a39Sopenharmony_ci.size	AES_ctr32_encrypt,.-AES_ctr32_encrypt
1624e1051a39Sopenharmony_ci___
1625e1051a39Sopenharmony_ci}
1626e1051a39Sopenharmony_ci
1627e1051a39Sopenharmony_ci########################################################################
1628e1051a39Sopenharmony_ci# void AES_xts_encrypt(const unsigned char *inp, unsigned char *out,
1629e1051a39Sopenharmony_ci#	size_t len, const AES_KEY *key1, const AES_KEY *key2,
1630e1051a39Sopenharmony_ci#	const unsigned char iv[16]);
1631e1051a39Sopenharmony_ci#
1632e1051a39Sopenharmony_ci{
1633e1051a39Sopenharmony_cimy $inp="%r2";
1634e1051a39Sopenharmony_cimy $out="%r4";	# len and out are swapped
1635e1051a39Sopenharmony_cimy $len="%r3";
1636e1051a39Sopenharmony_cimy $key1="%r5";	# $i1
1637e1051a39Sopenharmony_cimy $key2="%r6";	# $i2
1638e1051a39Sopenharmony_cimy $fp="%r7";	# $i3
1639e1051a39Sopenharmony_cimy $tweak=16*$SIZE_T+16;	# or $stdframe-16, bottom of the frame...
1640e1051a39Sopenharmony_ci
1641e1051a39Sopenharmony_ci$code.=<<___;
1642e1051a39Sopenharmony_ci.type	_s390x_xts_km,\@function
1643e1051a39Sopenharmony_ci.align	16
1644e1051a39Sopenharmony_ci_s390x_xts_km:
1645e1051a39Sopenharmony_ci___
1646e1051a39Sopenharmony_ci$code.=<<___ if(1);
1647e1051a39Sopenharmony_ci	llgfr	$s0,%r0			# put aside the function code
1648e1051a39Sopenharmony_ci	lghi	$s1,0x7f
1649e1051a39Sopenharmony_ci	nr	$s1,%r0
1650e1051a39Sopenharmony_ci	larl	%r1,OPENSSL_s390xcap_P
1651e1051a39Sopenharmony_ci	llihh	%r0,0x8000
1652e1051a39Sopenharmony_ci	srlg	%r0,%r0,32($s1)		# check for 32+function code
1653e1051a39Sopenharmony_ci	ng	%r0,S390X_KM(%r1)	# check km capability vector
1654e1051a39Sopenharmony_ci	lgr	%r0,$s0			# restore the function code
1655e1051a39Sopenharmony_ci	la	%r1,0($key1)		# restore $key1
1656e1051a39Sopenharmony_ci	jz	.Lxts_km_vanilla
1657e1051a39Sopenharmony_ci
1658e1051a39Sopenharmony_ci	lmg	$i2,$i3,$tweak($sp)	# put aside the tweak value
1659e1051a39Sopenharmony_ci	algr	$out,$inp
1660e1051a39Sopenharmony_ci
1661e1051a39Sopenharmony_ci	oill	%r0,32			# switch to xts function code
1662e1051a39Sopenharmony_ci	aghi	$s1,-18			#
1663e1051a39Sopenharmony_ci	sllg	$s1,$s1,3		# (function code - 18)*8, 0 or 16
1664e1051a39Sopenharmony_ci	la	%r1,$tweak-16($sp)
1665e1051a39Sopenharmony_ci	slgr	%r1,$s1			# parameter block position
1666e1051a39Sopenharmony_ci	lmg	$s0,$s3,0($key1)	# load 256 bits of key material,
1667e1051a39Sopenharmony_ci	stmg	$s0,$s3,0(%r1)		# and copy it to parameter block.
1668e1051a39Sopenharmony_ci					# yes, it contains junk and overlaps
1669e1051a39Sopenharmony_ci					# with the tweak in 128-bit case.
1670e1051a39Sopenharmony_ci					# it's done to avoid conditional
1671e1051a39Sopenharmony_ci					# branch.
1672e1051a39Sopenharmony_ci	stmg	$i2,$i3,$tweak($sp)	# "re-seat" the tweak value
1673e1051a39Sopenharmony_ci
1674e1051a39Sopenharmony_ci	.long	0xb92e0042		# km %r4,%r2
1675e1051a39Sopenharmony_ci	brc	1,.-4			# pay attention to "partial completion"
1676e1051a39Sopenharmony_ci
1677e1051a39Sopenharmony_ci	lrvg	$s0,$tweak+0($sp)	# load the last tweak
1678e1051a39Sopenharmony_ci	lrvg	$s1,$tweak+8($sp)
1679e1051a39Sopenharmony_ci	stmg	%r0,%r3,$tweak-32($sp)	# wipe copy of the key
1680e1051a39Sopenharmony_ci
1681e1051a39Sopenharmony_ci	nill	%r0,0xffdf		# switch back to original function code
1682e1051a39Sopenharmony_ci	la	%r1,0($key1)		# restore pointer to $key1
1683e1051a39Sopenharmony_ci	slgr	$out,$inp
1684e1051a39Sopenharmony_ci
1685e1051a39Sopenharmony_ci	llgc	$len,2*$SIZE_T-1($sp)
1686e1051a39Sopenharmony_ci	nill	$len,0x0f		# $len%=16
1687e1051a39Sopenharmony_ci	br	$ra
1688e1051a39Sopenharmony_ci
1689e1051a39Sopenharmony_ci.align	16
1690e1051a39Sopenharmony_ci.Lxts_km_vanilla:
1691e1051a39Sopenharmony_ci___
1692e1051a39Sopenharmony_ci$code.=<<___;
1693e1051a39Sopenharmony_ci	# prepare and allocate stack frame at the top of 4K page
1694e1051a39Sopenharmony_ci	# with 1K reserved for eventual signal handling
1695e1051a39Sopenharmony_ci	lghi	$s0,-1024-256-16# guarantee at least 256-bytes buffer
1696e1051a39Sopenharmony_ci	lghi	$s1,-4096
1697e1051a39Sopenharmony_ci	algr	$s0,$sp
1698e1051a39Sopenharmony_ci	lgr	$fp,$sp
1699e1051a39Sopenharmony_ci	ngr	$s0,$s1		# align at page boundary
1700e1051a39Sopenharmony_ci	slgr	$fp,$s0		# total buffer size
1701e1051a39Sopenharmony_ci	lgr	$s2,$sp
1702e1051a39Sopenharmony_ci	lghi	$s1,1024+16	# sl[g]fi is extended-immediate facility
1703e1051a39Sopenharmony_ci	slgr	$fp,$s1		# deduct reservation to get usable buffer size
1704e1051a39Sopenharmony_ci	# buffer size is at lest 256 and at most 3072+256-16
1705e1051a39Sopenharmony_ci
1706e1051a39Sopenharmony_ci	la	$sp,1024($s0)	# alloca
1707e1051a39Sopenharmony_ci	nill	$fp,0xfff0	# round to 16*n
1708e1051a39Sopenharmony_ci	st${g}	$s2,0($sp)	# back-chain
1709e1051a39Sopenharmony_ci	nill	$len,0xfff0	# redundant
1710e1051a39Sopenharmony_ci	st${g}	$fp,$SIZE_T($sp)
1711e1051a39Sopenharmony_ci
1712e1051a39Sopenharmony_ci	slgr	$len,$fp
1713e1051a39Sopenharmony_ci	brc	1,.Lxts_km_go	# not zero, no borrow
1714e1051a39Sopenharmony_ci	algr	$fp,$len	# input is shorter than allocated buffer
1715e1051a39Sopenharmony_ci	lghi	$len,0
1716e1051a39Sopenharmony_ci	st${g}	$fp,$SIZE_T($sp)
1717e1051a39Sopenharmony_ci
1718e1051a39Sopenharmony_ci.Lxts_km_go:
1719e1051a39Sopenharmony_ci	lrvg	$s0,$tweak+0($s2)	# load the tweak value in little-endian
1720e1051a39Sopenharmony_ci	lrvg	$s1,$tweak+8($s2)
1721e1051a39Sopenharmony_ci
1722e1051a39Sopenharmony_ci	la	$s2,16($sp)		# vector of ascending tweak values
1723e1051a39Sopenharmony_ci	slgr	$s2,$inp
1724e1051a39Sopenharmony_ci	srlg	$s3,$fp,4
1725e1051a39Sopenharmony_ci	j	.Lxts_km_start
1726e1051a39Sopenharmony_ci
1727e1051a39Sopenharmony_ci.Lxts_km_loop:
1728e1051a39Sopenharmony_ci	la	$s2,16($sp)
1729e1051a39Sopenharmony_ci	slgr	$s2,$inp
1730e1051a39Sopenharmony_ci	srlg	$s3,$fp,4
1731e1051a39Sopenharmony_ci.Lxts_km_prepare:
1732e1051a39Sopenharmony_ci	lghi	$i1,0x87
1733e1051a39Sopenharmony_ci	srag	$i2,$s1,63		# broadcast upper bit
1734e1051a39Sopenharmony_ci	ngr	$i1,$i2			# rem
1735e1051a39Sopenharmony_ci	algr	$s0,$s0
1736e1051a39Sopenharmony_ci	alcgr	$s1,$s1
1737e1051a39Sopenharmony_ci	xgr	$s0,$i1
1738e1051a39Sopenharmony_ci.Lxts_km_start:
1739e1051a39Sopenharmony_ci	lrvgr	$i1,$s0			# flip byte order
1740e1051a39Sopenharmony_ci	lrvgr	$i2,$s1
1741e1051a39Sopenharmony_ci	stg	$i1,0($s2,$inp)
1742e1051a39Sopenharmony_ci	stg	$i2,8($s2,$inp)
1743e1051a39Sopenharmony_ci	xg	$i1,0($inp)
1744e1051a39Sopenharmony_ci	xg	$i2,8($inp)
1745e1051a39Sopenharmony_ci	stg	$i1,0($out,$inp)
1746e1051a39Sopenharmony_ci	stg	$i2,8($out,$inp)
1747e1051a39Sopenharmony_ci	la	$inp,16($inp)
1748e1051a39Sopenharmony_ci	brct	$s3,.Lxts_km_prepare
1749e1051a39Sopenharmony_ci
1750e1051a39Sopenharmony_ci	slgr	$inp,$fp		# rewind $inp
1751e1051a39Sopenharmony_ci	la	$s2,0($out,$inp)
1752e1051a39Sopenharmony_ci	lgr	$s3,$fp
1753e1051a39Sopenharmony_ci	.long	0xb92e00aa		# km $s2,$s2
1754e1051a39Sopenharmony_ci	brc	1,.-4			# pay attention to "partial completion"
1755e1051a39Sopenharmony_ci
1756e1051a39Sopenharmony_ci	la	$s2,16($sp)
1757e1051a39Sopenharmony_ci	slgr	$s2,$inp
1758e1051a39Sopenharmony_ci	srlg	$s3,$fp,4
1759e1051a39Sopenharmony_ci.Lxts_km_xor:
1760e1051a39Sopenharmony_ci	lg	$i1,0($out,$inp)
1761e1051a39Sopenharmony_ci	lg	$i2,8($out,$inp)
1762e1051a39Sopenharmony_ci	xg	$i1,0($s2,$inp)
1763e1051a39Sopenharmony_ci	xg	$i2,8($s2,$inp)
1764e1051a39Sopenharmony_ci	stg	$i1,0($out,$inp)
1765e1051a39Sopenharmony_ci	stg	$i2,8($out,$inp)
1766e1051a39Sopenharmony_ci	la	$inp,16($inp)
1767e1051a39Sopenharmony_ci	brct	$s3,.Lxts_km_xor
1768e1051a39Sopenharmony_ci
1769e1051a39Sopenharmony_ci	slgr	$len,$fp
1770e1051a39Sopenharmony_ci	brc	1,.Lxts_km_loop		# not zero, no borrow
1771e1051a39Sopenharmony_ci	algr	$fp,$len
1772e1051a39Sopenharmony_ci	lghi	$len,0
1773e1051a39Sopenharmony_ci	brc	4+1,.Lxts_km_loop	# not zero
1774e1051a39Sopenharmony_ci
1775e1051a39Sopenharmony_ci	l${g}	$i1,0($sp)		# back-chain
1776e1051a39Sopenharmony_ci	llgf	$fp,`2*$SIZE_T-4`($sp)	# bytes used
1777e1051a39Sopenharmony_ci	la	$i2,16($sp)
1778e1051a39Sopenharmony_ci	srlg	$fp,$fp,4
1779e1051a39Sopenharmony_ci.Lxts_km_zap:
1780e1051a39Sopenharmony_ci	stg	$i1,0($i2)
1781e1051a39Sopenharmony_ci	stg	$i1,8($i2)
1782e1051a39Sopenharmony_ci	la	$i2,16($i2)
1783e1051a39Sopenharmony_ci	brct	$fp,.Lxts_km_zap
1784e1051a39Sopenharmony_ci
1785e1051a39Sopenharmony_ci	la	$sp,0($i1)
1786e1051a39Sopenharmony_ci	llgc	$len,2*$SIZE_T-1($i1)
1787e1051a39Sopenharmony_ci	nill	$len,0x0f		# $len%=16
1788e1051a39Sopenharmony_ci	bzr	$ra
1789e1051a39Sopenharmony_ci
1790e1051a39Sopenharmony_ci	# generate one more tweak...
1791e1051a39Sopenharmony_ci	lghi	$i1,0x87
1792e1051a39Sopenharmony_ci	srag	$i2,$s1,63		# broadcast upper bit
1793e1051a39Sopenharmony_ci	ngr	$i1,$i2			# rem
1794e1051a39Sopenharmony_ci	algr	$s0,$s0
1795e1051a39Sopenharmony_ci	alcgr	$s1,$s1
1796e1051a39Sopenharmony_ci	xgr	$s0,$i1
1797e1051a39Sopenharmony_ci
1798e1051a39Sopenharmony_ci	ltr	$len,$len		# clear zero flag
1799e1051a39Sopenharmony_ci	br	$ra
1800e1051a39Sopenharmony_ci.size	_s390x_xts_km,.-_s390x_xts_km
1801e1051a39Sopenharmony_ci
1802e1051a39Sopenharmony_ci.globl	AES_xts_encrypt
1803e1051a39Sopenharmony_ci.type	AES_xts_encrypt,\@function
1804e1051a39Sopenharmony_ci.align	16
1805e1051a39Sopenharmony_ciAES_xts_encrypt:
1806e1051a39Sopenharmony_ci	xgr	%r3,%r4			# flip %r3 and %r4, $out and $len
1807e1051a39Sopenharmony_ci	xgr	%r4,%r3
1808e1051a39Sopenharmony_ci	xgr	%r3,%r4
1809e1051a39Sopenharmony_ci___
1810e1051a39Sopenharmony_ci$code.=<<___ if ($SIZE_T==4);
1811e1051a39Sopenharmony_ci	llgfr	$len,$len
1812e1051a39Sopenharmony_ci___
1813e1051a39Sopenharmony_ci$code.=<<___;
1814e1051a39Sopenharmony_ci	st${g}	$len,1*$SIZE_T($sp)	# save copy of $len
1815e1051a39Sopenharmony_ci	srag	$len,$len,4		# formally wrong, because it expands
1816e1051a39Sopenharmony_ci					# sign byte, but who can afford asking
1817e1051a39Sopenharmony_ci					# to process more than 2^63-1 bytes?
1818e1051a39Sopenharmony_ci					# I use it, because it sets condition
1819e1051a39Sopenharmony_ci					# code...
1820e1051a39Sopenharmony_ci	bcr	8,$ra			# abort if zero (i.e. less than 16)
1821e1051a39Sopenharmony_ci___
1822e1051a39Sopenharmony_ci$code.=<<___ if (!$softonly);
1823e1051a39Sopenharmony_ci	llgf	%r0,240($key2)
1824e1051a39Sopenharmony_ci	lhi	%r1,16
1825e1051a39Sopenharmony_ci	clr	%r0,%r1
1826e1051a39Sopenharmony_ci	jl	.Lxts_enc_software
1827e1051a39Sopenharmony_ci
1828e1051a39Sopenharmony_ci	st${g}	$ra,5*$SIZE_T($sp)
1829e1051a39Sopenharmony_ci	stm${g}	%r6,$s3,6*$SIZE_T($sp)
1830e1051a39Sopenharmony_ci
1831e1051a39Sopenharmony_ci	sllg	$len,$len,4		# $len&=~15
1832e1051a39Sopenharmony_ci	slgr	$out,$inp
1833e1051a39Sopenharmony_ci
1834e1051a39Sopenharmony_ci	# generate the tweak value
1835e1051a39Sopenharmony_ci	l${g}	$s3,$stdframe($sp)	# pointer to iv
1836e1051a39Sopenharmony_ci	la	$s2,$tweak($sp)
1837e1051a39Sopenharmony_ci	lmg	$s0,$s1,0($s3)
1838e1051a39Sopenharmony_ci	lghi	$s3,16
1839e1051a39Sopenharmony_ci	stmg	$s0,$s1,0($s2)
1840e1051a39Sopenharmony_ci	la	%r1,0($key2)		# $key2 is not needed anymore
1841e1051a39Sopenharmony_ci	.long	0xb92e00aa		# km $s2,$s2, generate the tweak
1842e1051a39Sopenharmony_ci	brc	1,.-4			# can this happen?
1843e1051a39Sopenharmony_ci
1844e1051a39Sopenharmony_ci	l	%r0,240($key1)
1845e1051a39Sopenharmony_ci	la	%r1,0($key1)		# $key1 is not needed anymore
1846e1051a39Sopenharmony_ci	bras	$ra,_s390x_xts_km
1847e1051a39Sopenharmony_ci	jz	.Lxts_enc_km_done
1848e1051a39Sopenharmony_ci
1849e1051a39Sopenharmony_ci	aghi	$inp,-16		# take one step back
1850e1051a39Sopenharmony_ci	la	$i3,0($out,$inp)	# put aside real $out
1851e1051a39Sopenharmony_ci.Lxts_enc_km_steal:
1852e1051a39Sopenharmony_ci	llgc	$i1,16($inp)
1853e1051a39Sopenharmony_ci	llgc	$i2,0($out,$inp)
1854e1051a39Sopenharmony_ci	stc	$i1,0($out,$inp)
1855e1051a39Sopenharmony_ci	stc	$i2,16($out,$inp)
1856e1051a39Sopenharmony_ci	la	$inp,1($inp)
1857e1051a39Sopenharmony_ci	brct	$len,.Lxts_enc_km_steal
1858e1051a39Sopenharmony_ci
1859e1051a39Sopenharmony_ci	la	$s2,0($i3)
1860e1051a39Sopenharmony_ci	lghi	$s3,16
1861e1051a39Sopenharmony_ci	lrvgr	$i1,$s0			# flip byte order
1862e1051a39Sopenharmony_ci	lrvgr	$i2,$s1
1863e1051a39Sopenharmony_ci	xg	$i1,0($s2)
1864e1051a39Sopenharmony_ci	xg	$i2,8($s2)
1865e1051a39Sopenharmony_ci	stg	$i1,0($s2)
1866e1051a39Sopenharmony_ci	stg	$i2,8($s2)
1867e1051a39Sopenharmony_ci	.long	0xb92e00aa		# km $s2,$s2
1868e1051a39Sopenharmony_ci	brc	1,.-4			# can this happen?
1869e1051a39Sopenharmony_ci	lrvgr	$i1,$s0			# flip byte order
1870e1051a39Sopenharmony_ci	lrvgr	$i2,$s1
1871e1051a39Sopenharmony_ci	xg	$i1,0($i3)
1872e1051a39Sopenharmony_ci	xg	$i2,8($i3)
1873e1051a39Sopenharmony_ci	stg	$i1,0($i3)
1874e1051a39Sopenharmony_ci	stg	$i2,8($i3)
1875e1051a39Sopenharmony_ci
1876e1051a39Sopenharmony_ci.Lxts_enc_km_done:
1877e1051a39Sopenharmony_ci	stg	$sp,$tweak+0($sp)	# wipe tweak
1878e1051a39Sopenharmony_ci	stg	$sp,$tweak+8($sp)
1879e1051a39Sopenharmony_ci	l${g}	$ra,5*$SIZE_T($sp)
1880e1051a39Sopenharmony_ci	lm${g}	%r6,$s3,6*$SIZE_T($sp)
1881e1051a39Sopenharmony_ci	br	$ra
1882e1051a39Sopenharmony_ci.align	16
1883e1051a39Sopenharmony_ci.Lxts_enc_software:
1884e1051a39Sopenharmony_ci___
1885e1051a39Sopenharmony_ci$code.=<<___;
1886e1051a39Sopenharmony_ci	stm${g}	%r6,$ra,6*$SIZE_T($sp)
1887e1051a39Sopenharmony_ci
1888e1051a39Sopenharmony_ci	slgr	$out,$inp
1889e1051a39Sopenharmony_ci
1890e1051a39Sopenharmony_ci	l${g}	$s3,$stdframe($sp)	# ivp
1891e1051a39Sopenharmony_ci	llgf	$s0,0($s3)		# load iv
1892e1051a39Sopenharmony_ci	llgf	$s1,4($s3)
1893e1051a39Sopenharmony_ci	llgf	$s2,8($s3)
1894e1051a39Sopenharmony_ci	llgf	$s3,12($s3)
1895e1051a39Sopenharmony_ci	stm${g}	%r2,%r5,2*$SIZE_T($sp)
1896e1051a39Sopenharmony_ci	la	$key,0($key2)
1897e1051a39Sopenharmony_ci	larl	$tbl,AES_Te
1898e1051a39Sopenharmony_ci	bras	$ra,_s390x_AES_encrypt	# generate the tweak
1899e1051a39Sopenharmony_ci	lm${g}	%r2,%r5,2*$SIZE_T($sp)
1900e1051a39Sopenharmony_ci	stm	$s0,$s3,$tweak($sp)	# save the tweak
1901e1051a39Sopenharmony_ci	j	.Lxts_enc_enter
1902e1051a39Sopenharmony_ci
1903e1051a39Sopenharmony_ci.align	16
1904e1051a39Sopenharmony_ci.Lxts_enc_loop:
1905e1051a39Sopenharmony_ci	lrvg	$s1,$tweak+0($sp)	# load the tweak in little-endian
1906e1051a39Sopenharmony_ci	lrvg	$s3,$tweak+8($sp)
1907e1051a39Sopenharmony_ci	lghi	%r1,0x87
1908e1051a39Sopenharmony_ci	srag	%r0,$s3,63		# broadcast upper bit
1909e1051a39Sopenharmony_ci	ngr	%r1,%r0			# rem
1910e1051a39Sopenharmony_ci	algr	$s1,$s1
1911e1051a39Sopenharmony_ci	alcgr	$s3,$s3
1912e1051a39Sopenharmony_ci	xgr	$s1,%r1
1913e1051a39Sopenharmony_ci	lrvgr	$s1,$s1			# flip byte order
1914e1051a39Sopenharmony_ci	lrvgr	$s3,$s3
1915e1051a39Sopenharmony_ci	srlg	$s0,$s1,32		# smash the tweak to 4x32-bits
1916e1051a39Sopenharmony_ci	stg	$s1,$tweak+0($sp)	# save the tweak
1917e1051a39Sopenharmony_ci	llgfr	$s1,$s1
1918e1051a39Sopenharmony_ci	srlg	$s2,$s3,32
1919e1051a39Sopenharmony_ci	stg	$s3,$tweak+8($sp)
1920e1051a39Sopenharmony_ci	llgfr	$s3,$s3
1921e1051a39Sopenharmony_ci	la	$inp,16($inp)		# $inp+=16
1922e1051a39Sopenharmony_ci.Lxts_enc_enter:
1923e1051a39Sopenharmony_ci	x	$s0,0($inp)		# ^=*($inp)
1924e1051a39Sopenharmony_ci	x	$s1,4($inp)
1925e1051a39Sopenharmony_ci	x	$s2,8($inp)
1926e1051a39Sopenharmony_ci	x	$s3,12($inp)
1927e1051a39Sopenharmony_ci	stm${g}	%r2,%r3,2*$SIZE_T($sp)	# only two registers are changing
1928e1051a39Sopenharmony_ci	la	$key,0($key1)
1929e1051a39Sopenharmony_ci	bras	$ra,_s390x_AES_encrypt
1930e1051a39Sopenharmony_ci	lm${g}	%r2,%r5,2*$SIZE_T($sp)
1931e1051a39Sopenharmony_ci	x	$s0,$tweak+0($sp)	# ^=tweak
1932e1051a39Sopenharmony_ci	x	$s1,$tweak+4($sp)
1933e1051a39Sopenharmony_ci	x	$s2,$tweak+8($sp)
1934e1051a39Sopenharmony_ci	x	$s3,$tweak+12($sp)
1935e1051a39Sopenharmony_ci	st	$s0,0($out,$inp)
1936e1051a39Sopenharmony_ci	st	$s1,4($out,$inp)
1937e1051a39Sopenharmony_ci	st	$s2,8($out,$inp)
1938e1051a39Sopenharmony_ci	st	$s3,12($out,$inp)
1939e1051a39Sopenharmony_ci	brct${g}	$len,.Lxts_enc_loop
1940e1051a39Sopenharmony_ci
1941e1051a39Sopenharmony_ci	llgc	$len,`2*$SIZE_T-1`($sp)
1942e1051a39Sopenharmony_ci	nill	$len,0x0f		# $len%16
1943e1051a39Sopenharmony_ci	jz	.Lxts_enc_done
1944e1051a39Sopenharmony_ci
1945e1051a39Sopenharmony_ci	la	$i3,0($inp,$out)	# put aside real $out
1946e1051a39Sopenharmony_ci.Lxts_enc_steal:
1947e1051a39Sopenharmony_ci	llgc	%r0,16($inp)
1948e1051a39Sopenharmony_ci	llgc	%r1,0($out,$inp)
1949e1051a39Sopenharmony_ci	stc	%r0,0($out,$inp)
1950e1051a39Sopenharmony_ci	stc	%r1,16($out,$inp)
1951e1051a39Sopenharmony_ci	la	$inp,1($inp)
1952e1051a39Sopenharmony_ci	brct	$len,.Lxts_enc_steal
1953e1051a39Sopenharmony_ci	la	$out,0($i3)		# restore real $out
1954e1051a39Sopenharmony_ci
1955e1051a39Sopenharmony_ci	# generate last tweak...
1956e1051a39Sopenharmony_ci	lrvg	$s1,$tweak+0($sp)	# load the tweak in little-endian
1957e1051a39Sopenharmony_ci	lrvg	$s3,$tweak+8($sp)
1958e1051a39Sopenharmony_ci	lghi	%r1,0x87
1959e1051a39Sopenharmony_ci	srag	%r0,$s3,63		# broadcast upper bit
1960e1051a39Sopenharmony_ci	ngr	%r1,%r0			# rem
1961e1051a39Sopenharmony_ci	algr	$s1,$s1
1962e1051a39Sopenharmony_ci	alcgr	$s3,$s3
1963e1051a39Sopenharmony_ci	xgr	$s1,%r1
1964e1051a39Sopenharmony_ci	lrvgr	$s1,$s1			# flip byte order
1965e1051a39Sopenharmony_ci	lrvgr	$s3,$s3
1966e1051a39Sopenharmony_ci	srlg	$s0,$s1,32		# smash the tweak to 4x32-bits
1967e1051a39Sopenharmony_ci	stg	$s1,$tweak+0($sp)	# save the tweak
1968e1051a39Sopenharmony_ci	llgfr	$s1,$s1
1969e1051a39Sopenharmony_ci	srlg	$s2,$s3,32
1970e1051a39Sopenharmony_ci	stg	$s3,$tweak+8($sp)
1971e1051a39Sopenharmony_ci	llgfr	$s3,$s3
1972e1051a39Sopenharmony_ci
1973e1051a39Sopenharmony_ci	x	$s0,0($out)		# ^=*(inp)|stolen cipther-text
1974e1051a39Sopenharmony_ci	x	$s1,4($out)
1975e1051a39Sopenharmony_ci	x	$s2,8($out)
1976e1051a39Sopenharmony_ci	x	$s3,12($out)
1977e1051a39Sopenharmony_ci	st${g}	$out,4*$SIZE_T($sp)
1978e1051a39Sopenharmony_ci	la	$key,0($key1)
1979e1051a39Sopenharmony_ci	bras	$ra,_s390x_AES_encrypt
1980e1051a39Sopenharmony_ci	l${g}	$out,4*$SIZE_T($sp)
1981e1051a39Sopenharmony_ci	x	$s0,`$tweak+0`($sp)	# ^=tweak
1982e1051a39Sopenharmony_ci	x	$s1,`$tweak+4`($sp)
1983e1051a39Sopenharmony_ci	x	$s2,`$tweak+8`($sp)
1984e1051a39Sopenharmony_ci	x	$s3,`$tweak+12`($sp)
1985e1051a39Sopenharmony_ci	st	$s0,0($out)
1986e1051a39Sopenharmony_ci	st	$s1,4($out)
1987e1051a39Sopenharmony_ci	st	$s2,8($out)
1988e1051a39Sopenharmony_ci	st	$s3,12($out)
1989e1051a39Sopenharmony_ci
1990e1051a39Sopenharmony_ci.Lxts_enc_done:
1991e1051a39Sopenharmony_ci	stg	$sp,$tweak+0($sp)	# wipe tweak
1992e1051a39Sopenharmony_ci	stg	$sp,$tweak+8($sp)
1993e1051a39Sopenharmony_ci	lm${g}	%r6,$ra,6*$SIZE_T($sp)
1994e1051a39Sopenharmony_ci	br	$ra
1995e1051a39Sopenharmony_ci.size	AES_xts_encrypt,.-AES_xts_encrypt
1996e1051a39Sopenharmony_ci___
1997e1051a39Sopenharmony_ci# void AES_xts_decrypt(const unsigned char *inp, unsigned char *out,
1998e1051a39Sopenharmony_ci#	size_t len, const AES_KEY *key1, const AES_KEY *key2,
1999e1051a39Sopenharmony_ci#	const unsigned char iv[16]);
2000e1051a39Sopenharmony_ci#
2001e1051a39Sopenharmony_ci$code.=<<___;
2002e1051a39Sopenharmony_ci.globl	AES_xts_decrypt
2003e1051a39Sopenharmony_ci.type	AES_xts_decrypt,\@function
2004e1051a39Sopenharmony_ci.align	16
2005e1051a39Sopenharmony_ciAES_xts_decrypt:
2006e1051a39Sopenharmony_ci	xgr	%r3,%r4			# flip %r3 and %r4, $out and $len
2007e1051a39Sopenharmony_ci	xgr	%r4,%r3
2008e1051a39Sopenharmony_ci	xgr	%r3,%r4
2009e1051a39Sopenharmony_ci___
2010e1051a39Sopenharmony_ci$code.=<<___ if ($SIZE_T==4);
2011e1051a39Sopenharmony_ci	llgfr	$len,$len
2012e1051a39Sopenharmony_ci___
2013e1051a39Sopenharmony_ci$code.=<<___;
2014e1051a39Sopenharmony_ci	st${g}	$len,1*$SIZE_T($sp)	# save copy of $len
2015e1051a39Sopenharmony_ci	aghi	$len,-16
2016e1051a39Sopenharmony_ci	bcr	4,$ra			# abort if less than zero. formally
2017e1051a39Sopenharmony_ci					# wrong, because $len is unsigned,
2018e1051a39Sopenharmony_ci					# but who can afford asking to
2019e1051a39Sopenharmony_ci					# process more than 2^63-1 bytes?
2020e1051a39Sopenharmony_ci	tmll	$len,0x0f
2021e1051a39Sopenharmony_ci	jnz	.Lxts_dec_proceed
2022e1051a39Sopenharmony_ci	aghi	$len,16
2023e1051a39Sopenharmony_ci.Lxts_dec_proceed:
2024e1051a39Sopenharmony_ci___
2025e1051a39Sopenharmony_ci$code.=<<___ if (!$softonly);
2026e1051a39Sopenharmony_ci	llgf	%r0,240($key2)
2027e1051a39Sopenharmony_ci	lhi	%r1,16
2028e1051a39Sopenharmony_ci	clr	%r0,%r1
2029e1051a39Sopenharmony_ci	jl	.Lxts_dec_software
2030e1051a39Sopenharmony_ci
2031e1051a39Sopenharmony_ci	st${g}	$ra,5*$SIZE_T($sp)
2032e1051a39Sopenharmony_ci	stm${g}	%r6,$s3,6*$SIZE_T($sp)
2033e1051a39Sopenharmony_ci
2034e1051a39Sopenharmony_ci	nill	$len,0xfff0		# $len&=~15
2035e1051a39Sopenharmony_ci	slgr	$out,$inp
2036e1051a39Sopenharmony_ci
2037e1051a39Sopenharmony_ci	# generate the tweak value
2038e1051a39Sopenharmony_ci	l${g}	$s3,$stdframe($sp)	# pointer to iv
2039e1051a39Sopenharmony_ci	la	$s2,$tweak($sp)
2040e1051a39Sopenharmony_ci	lmg	$s0,$s1,0($s3)
2041e1051a39Sopenharmony_ci	lghi	$s3,16
2042e1051a39Sopenharmony_ci	stmg	$s0,$s1,0($s2)
2043e1051a39Sopenharmony_ci	la	%r1,0($key2)		# $key2 is not needed past this point
2044e1051a39Sopenharmony_ci	.long	0xb92e00aa		# km $s2,$s2, generate the tweak
2045e1051a39Sopenharmony_ci	brc	1,.-4			# can this happen?
2046e1051a39Sopenharmony_ci
2047e1051a39Sopenharmony_ci	l	%r0,240($key1)
2048e1051a39Sopenharmony_ci	la	%r1,0($key1)		# $key1 is not needed anymore
2049e1051a39Sopenharmony_ci
2050e1051a39Sopenharmony_ci	ltgr	$len,$len
2051e1051a39Sopenharmony_ci	jz	.Lxts_dec_km_short
2052e1051a39Sopenharmony_ci	bras	$ra,_s390x_xts_km
2053e1051a39Sopenharmony_ci	jz	.Lxts_dec_km_done
2054e1051a39Sopenharmony_ci
2055e1051a39Sopenharmony_ci	lrvgr	$s2,$s0			# make copy in reverse byte order
2056e1051a39Sopenharmony_ci	lrvgr	$s3,$s1
2057e1051a39Sopenharmony_ci	j	.Lxts_dec_km_2ndtweak
2058e1051a39Sopenharmony_ci
2059e1051a39Sopenharmony_ci.Lxts_dec_km_short:
2060e1051a39Sopenharmony_ci	llgc	$len,`2*$SIZE_T-1`($sp)
2061e1051a39Sopenharmony_ci	nill	$len,0x0f		# $len%=16
2062e1051a39Sopenharmony_ci	lrvg	$s0,$tweak+0($sp)	# load the tweak
2063e1051a39Sopenharmony_ci	lrvg	$s1,$tweak+8($sp)
2064e1051a39Sopenharmony_ci	lrvgr	$s2,$s0			# make copy in reverse byte order
2065e1051a39Sopenharmony_ci	lrvgr	$s3,$s1
2066e1051a39Sopenharmony_ci
2067e1051a39Sopenharmony_ci.Lxts_dec_km_2ndtweak:
2068e1051a39Sopenharmony_ci	lghi	$i1,0x87
2069e1051a39Sopenharmony_ci	srag	$i2,$s1,63		# broadcast upper bit
2070e1051a39Sopenharmony_ci	ngr	$i1,$i2			# rem
2071e1051a39Sopenharmony_ci	algr	$s0,$s0
2072e1051a39Sopenharmony_ci	alcgr	$s1,$s1
2073e1051a39Sopenharmony_ci	xgr	$s0,$i1
2074e1051a39Sopenharmony_ci	lrvgr	$i1,$s0			# flip byte order
2075e1051a39Sopenharmony_ci	lrvgr	$i2,$s1
2076e1051a39Sopenharmony_ci
2077e1051a39Sopenharmony_ci	xg	$i1,0($inp)
2078e1051a39Sopenharmony_ci	xg	$i2,8($inp)
2079e1051a39Sopenharmony_ci	stg	$i1,0($out,$inp)
2080e1051a39Sopenharmony_ci	stg	$i2,8($out,$inp)
2081e1051a39Sopenharmony_ci	la	$i2,0($out,$inp)
2082e1051a39Sopenharmony_ci	lghi	$i3,16
2083e1051a39Sopenharmony_ci	.long	0xb92e0066		# km $i2,$i2
2084e1051a39Sopenharmony_ci	brc	1,.-4			# can this happen?
2085e1051a39Sopenharmony_ci	lrvgr	$i1,$s0
2086e1051a39Sopenharmony_ci	lrvgr	$i2,$s1
2087e1051a39Sopenharmony_ci	xg	$i1,0($out,$inp)
2088e1051a39Sopenharmony_ci	xg	$i2,8($out,$inp)
2089e1051a39Sopenharmony_ci	stg	$i1,0($out,$inp)
2090e1051a39Sopenharmony_ci	stg	$i2,8($out,$inp)
2091e1051a39Sopenharmony_ci
2092e1051a39Sopenharmony_ci	la	$i3,0($out,$inp)	# put aside real $out
2093e1051a39Sopenharmony_ci.Lxts_dec_km_steal:
2094e1051a39Sopenharmony_ci	llgc	$i1,16($inp)
2095e1051a39Sopenharmony_ci	llgc	$i2,0($out,$inp)
2096e1051a39Sopenharmony_ci	stc	$i1,0($out,$inp)
2097e1051a39Sopenharmony_ci	stc	$i2,16($out,$inp)
2098e1051a39Sopenharmony_ci	la	$inp,1($inp)
2099e1051a39Sopenharmony_ci	brct	$len,.Lxts_dec_km_steal
2100e1051a39Sopenharmony_ci
2101e1051a39Sopenharmony_ci	lgr	$s0,$s2
2102e1051a39Sopenharmony_ci	lgr	$s1,$s3
2103e1051a39Sopenharmony_ci	xg	$s0,0($i3)
2104e1051a39Sopenharmony_ci	xg	$s1,8($i3)
2105e1051a39Sopenharmony_ci	stg	$s0,0($i3)
2106e1051a39Sopenharmony_ci	stg	$s1,8($i3)
2107e1051a39Sopenharmony_ci	la	$s0,0($i3)
2108e1051a39Sopenharmony_ci	lghi	$s1,16
2109e1051a39Sopenharmony_ci	.long	0xb92e0088		# km $s0,$s0
2110e1051a39Sopenharmony_ci	brc	1,.-4			# can this happen?
2111e1051a39Sopenharmony_ci	xg	$s2,0($i3)
2112e1051a39Sopenharmony_ci	xg	$s3,8($i3)
2113e1051a39Sopenharmony_ci	stg	$s2,0($i3)
2114e1051a39Sopenharmony_ci	stg	$s3,8($i3)
2115e1051a39Sopenharmony_ci.Lxts_dec_km_done:
2116e1051a39Sopenharmony_ci	stg	$sp,$tweak+0($sp)	# wipe tweak
2117e1051a39Sopenharmony_ci	stg	$sp,$tweak+8($sp)
2118e1051a39Sopenharmony_ci	l${g}	$ra,5*$SIZE_T($sp)
2119e1051a39Sopenharmony_ci	lm${g}	%r6,$s3,6*$SIZE_T($sp)
2120e1051a39Sopenharmony_ci	br	$ra
2121e1051a39Sopenharmony_ci.align	16
2122e1051a39Sopenharmony_ci.Lxts_dec_software:
2123e1051a39Sopenharmony_ci___
2124e1051a39Sopenharmony_ci$code.=<<___;
2125e1051a39Sopenharmony_ci	stm${g}	%r6,$ra,6*$SIZE_T($sp)
2126e1051a39Sopenharmony_ci
2127e1051a39Sopenharmony_ci	srlg	$len,$len,4
2128e1051a39Sopenharmony_ci	slgr	$out,$inp
2129e1051a39Sopenharmony_ci
2130e1051a39Sopenharmony_ci	l${g}	$s3,$stdframe($sp)	# ivp
2131e1051a39Sopenharmony_ci	llgf	$s0,0($s3)		# load iv
2132e1051a39Sopenharmony_ci	llgf	$s1,4($s3)
2133e1051a39Sopenharmony_ci	llgf	$s2,8($s3)
2134e1051a39Sopenharmony_ci	llgf	$s3,12($s3)
2135e1051a39Sopenharmony_ci	stm${g}	%r2,%r5,2*$SIZE_T($sp)
2136e1051a39Sopenharmony_ci	la	$key,0($key2)
2137e1051a39Sopenharmony_ci	larl	$tbl,AES_Te
2138e1051a39Sopenharmony_ci	bras	$ra,_s390x_AES_encrypt	# generate the tweak
2139e1051a39Sopenharmony_ci	lm${g}	%r2,%r5,2*$SIZE_T($sp)
2140e1051a39Sopenharmony_ci	larl	$tbl,AES_Td
2141e1051a39Sopenharmony_ci	lt${g}r	$len,$len
2142e1051a39Sopenharmony_ci	stm	$s0,$s3,$tweak($sp)	# save the tweak
2143e1051a39Sopenharmony_ci	jz	.Lxts_dec_short
2144e1051a39Sopenharmony_ci	j	.Lxts_dec_enter
2145e1051a39Sopenharmony_ci
2146e1051a39Sopenharmony_ci.align	16
2147e1051a39Sopenharmony_ci.Lxts_dec_loop:
2148e1051a39Sopenharmony_ci	lrvg	$s1,$tweak+0($sp)	# load the tweak in little-endian
2149e1051a39Sopenharmony_ci	lrvg	$s3,$tweak+8($sp)
2150e1051a39Sopenharmony_ci	lghi	%r1,0x87
2151e1051a39Sopenharmony_ci	srag	%r0,$s3,63		# broadcast upper bit
2152e1051a39Sopenharmony_ci	ngr	%r1,%r0			# rem
2153e1051a39Sopenharmony_ci	algr	$s1,$s1
2154e1051a39Sopenharmony_ci	alcgr	$s3,$s3
2155e1051a39Sopenharmony_ci	xgr	$s1,%r1
2156e1051a39Sopenharmony_ci	lrvgr	$s1,$s1			# flip byte order
2157e1051a39Sopenharmony_ci	lrvgr	$s3,$s3
2158e1051a39Sopenharmony_ci	srlg	$s0,$s1,32		# smash the tweak to 4x32-bits
2159e1051a39Sopenharmony_ci	stg	$s1,$tweak+0($sp)	# save the tweak
2160e1051a39Sopenharmony_ci	llgfr	$s1,$s1
2161e1051a39Sopenharmony_ci	srlg	$s2,$s3,32
2162e1051a39Sopenharmony_ci	stg	$s3,$tweak+8($sp)
2163e1051a39Sopenharmony_ci	llgfr	$s3,$s3
2164e1051a39Sopenharmony_ci.Lxts_dec_enter:
2165e1051a39Sopenharmony_ci	x	$s0,0($inp)		# tweak^=*(inp)
2166e1051a39Sopenharmony_ci	x	$s1,4($inp)
2167e1051a39Sopenharmony_ci	x	$s2,8($inp)
2168e1051a39Sopenharmony_ci	x	$s3,12($inp)
2169e1051a39Sopenharmony_ci	stm${g}	%r2,%r3,2*$SIZE_T($sp)	# only two registers are changing
2170e1051a39Sopenharmony_ci	la	$key,0($key1)
2171e1051a39Sopenharmony_ci	bras	$ra,_s390x_AES_decrypt
2172e1051a39Sopenharmony_ci	lm${g}	%r2,%r5,2*$SIZE_T($sp)
2173e1051a39Sopenharmony_ci	x	$s0,$tweak+0($sp)	# ^=tweak
2174e1051a39Sopenharmony_ci	x	$s1,$tweak+4($sp)
2175e1051a39Sopenharmony_ci	x	$s2,$tweak+8($sp)
2176e1051a39Sopenharmony_ci	x	$s3,$tweak+12($sp)
2177e1051a39Sopenharmony_ci	st	$s0,0($out,$inp)
2178e1051a39Sopenharmony_ci	st	$s1,4($out,$inp)
2179e1051a39Sopenharmony_ci	st	$s2,8($out,$inp)
2180e1051a39Sopenharmony_ci	st	$s3,12($out,$inp)
2181e1051a39Sopenharmony_ci	la	$inp,16($inp)
2182e1051a39Sopenharmony_ci	brct${g}	$len,.Lxts_dec_loop
2183e1051a39Sopenharmony_ci
2184e1051a39Sopenharmony_ci	llgc	$len,`2*$SIZE_T-1`($sp)
2185e1051a39Sopenharmony_ci	nill	$len,0x0f		# $len%16
2186e1051a39Sopenharmony_ci	jz	.Lxts_dec_done
2187e1051a39Sopenharmony_ci
2188e1051a39Sopenharmony_ci	# generate pair of tweaks...
2189e1051a39Sopenharmony_ci	lrvg	$s1,$tweak+0($sp)	# load the tweak in little-endian
2190e1051a39Sopenharmony_ci	lrvg	$s3,$tweak+8($sp)
2191e1051a39Sopenharmony_ci	lghi	%r1,0x87
2192e1051a39Sopenharmony_ci	srag	%r0,$s3,63		# broadcast upper bit
2193e1051a39Sopenharmony_ci	ngr	%r1,%r0			# rem
2194e1051a39Sopenharmony_ci	algr	$s1,$s1
2195e1051a39Sopenharmony_ci	alcgr	$s3,$s3
2196e1051a39Sopenharmony_ci	xgr	$s1,%r1
2197e1051a39Sopenharmony_ci	lrvgr	$i2,$s1			# flip byte order
2198e1051a39Sopenharmony_ci	lrvgr	$i3,$s3
2199e1051a39Sopenharmony_ci	stmg	$i2,$i3,$tweak($sp)	# save the 1st tweak
2200e1051a39Sopenharmony_ci	j	.Lxts_dec_2ndtweak
2201e1051a39Sopenharmony_ci
2202e1051a39Sopenharmony_ci.align	16
2203e1051a39Sopenharmony_ci.Lxts_dec_short:
2204e1051a39Sopenharmony_ci	llgc	$len,`2*$SIZE_T-1`($sp)
2205e1051a39Sopenharmony_ci	nill	$len,0x0f		# $len%16
2206e1051a39Sopenharmony_ci	lrvg	$s1,$tweak+0($sp)	# load the tweak in little-endian
2207e1051a39Sopenharmony_ci	lrvg	$s3,$tweak+8($sp)
2208e1051a39Sopenharmony_ci.Lxts_dec_2ndtweak:
2209e1051a39Sopenharmony_ci	lghi	%r1,0x87
2210e1051a39Sopenharmony_ci	srag	%r0,$s3,63		# broadcast upper bit
2211e1051a39Sopenharmony_ci	ngr	%r1,%r0			# rem
2212e1051a39Sopenharmony_ci	algr	$s1,$s1
2213e1051a39Sopenharmony_ci	alcgr	$s3,$s3
2214e1051a39Sopenharmony_ci	xgr	$s1,%r1
2215e1051a39Sopenharmony_ci	lrvgr	$s1,$s1			# flip byte order
2216e1051a39Sopenharmony_ci	lrvgr	$s3,$s3
2217e1051a39Sopenharmony_ci	srlg	$s0,$s1,32		# smash the tweak to 4x32-bits
2218e1051a39Sopenharmony_ci	stg	$s1,$tweak-16+0($sp)	# save the 2nd tweak
2219e1051a39Sopenharmony_ci	llgfr	$s1,$s1
2220e1051a39Sopenharmony_ci	srlg	$s2,$s3,32
2221e1051a39Sopenharmony_ci	stg	$s3,$tweak-16+8($sp)
2222e1051a39Sopenharmony_ci	llgfr	$s3,$s3
2223e1051a39Sopenharmony_ci
2224e1051a39Sopenharmony_ci	x	$s0,0($inp)		# tweak_the_2nd^=*(inp)
2225e1051a39Sopenharmony_ci	x	$s1,4($inp)
2226e1051a39Sopenharmony_ci	x	$s2,8($inp)
2227e1051a39Sopenharmony_ci	x	$s3,12($inp)
2228e1051a39Sopenharmony_ci	stm${g}	%r2,%r3,2*$SIZE_T($sp)
2229e1051a39Sopenharmony_ci	la	$key,0($key1)
2230e1051a39Sopenharmony_ci	bras	$ra,_s390x_AES_decrypt
2231e1051a39Sopenharmony_ci	lm${g}	%r2,%r5,2*$SIZE_T($sp)
2232e1051a39Sopenharmony_ci	x	$s0,$tweak-16+0($sp)	# ^=tweak_the_2nd
2233e1051a39Sopenharmony_ci	x	$s1,$tweak-16+4($sp)
2234e1051a39Sopenharmony_ci	x	$s2,$tweak-16+8($sp)
2235e1051a39Sopenharmony_ci	x	$s3,$tweak-16+12($sp)
2236e1051a39Sopenharmony_ci	st	$s0,0($out,$inp)
2237e1051a39Sopenharmony_ci	st	$s1,4($out,$inp)
2238e1051a39Sopenharmony_ci	st	$s2,8($out,$inp)
2239e1051a39Sopenharmony_ci	st	$s3,12($out,$inp)
2240e1051a39Sopenharmony_ci
2241e1051a39Sopenharmony_ci	la	$i3,0($out,$inp)	# put aside real $out
2242e1051a39Sopenharmony_ci.Lxts_dec_steal:
2243e1051a39Sopenharmony_ci	llgc	%r0,16($inp)
2244e1051a39Sopenharmony_ci	llgc	%r1,0($out,$inp)
2245e1051a39Sopenharmony_ci	stc	%r0,0($out,$inp)
2246e1051a39Sopenharmony_ci	stc	%r1,16($out,$inp)
2247e1051a39Sopenharmony_ci	la	$inp,1($inp)
2248e1051a39Sopenharmony_ci	brct	$len,.Lxts_dec_steal
2249e1051a39Sopenharmony_ci	la	$out,0($i3)		# restore real $out
2250e1051a39Sopenharmony_ci
2251e1051a39Sopenharmony_ci	lm	$s0,$s3,$tweak($sp)	# load the 1st tweak
2252e1051a39Sopenharmony_ci	x	$s0,0($out)		# tweak^=*(inp)|stolen cipher-text
2253e1051a39Sopenharmony_ci	x	$s1,4($out)
2254e1051a39Sopenharmony_ci	x	$s2,8($out)
2255e1051a39Sopenharmony_ci	x	$s3,12($out)
2256e1051a39Sopenharmony_ci	st${g}	$out,4*$SIZE_T($sp)
2257e1051a39Sopenharmony_ci	la	$key,0($key1)
2258e1051a39Sopenharmony_ci	bras	$ra,_s390x_AES_decrypt
2259e1051a39Sopenharmony_ci	l${g}	$out,4*$SIZE_T($sp)
2260e1051a39Sopenharmony_ci	x	$s0,$tweak+0($sp)	# ^=tweak
2261e1051a39Sopenharmony_ci	x	$s1,$tweak+4($sp)
2262e1051a39Sopenharmony_ci	x	$s2,$tweak+8($sp)
2263e1051a39Sopenharmony_ci	x	$s3,$tweak+12($sp)
2264e1051a39Sopenharmony_ci	st	$s0,0($out)
2265e1051a39Sopenharmony_ci	st	$s1,4($out)
2266e1051a39Sopenharmony_ci	st	$s2,8($out)
2267e1051a39Sopenharmony_ci	st	$s3,12($out)
2268e1051a39Sopenharmony_ci	stg	$sp,$tweak-16+0($sp)	# wipe 2nd tweak
2269e1051a39Sopenharmony_ci	stg	$sp,$tweak-16+8($sp)
2270e1051a39Sopenharmony_ci.Lxts_dec_done:
2271e1051a39Sopenharmony_ci	stg	$sp,$tweak+0($sp)	# wipe tweak
2272e1051a39Sopenharmony_ci	stg	$sp,$tweak+8($sp)
2273e1051a39Sopenharmony_ci	lm${g}	%r6,$ra,6*$SIZE_T($sp)
2274e1051a39Sopenharmony_ci	br	$ra
2275e1051a39Sopenharmony_ci.size	AES_xts_decrypt,.-AES_xts_decrypt
2276e1051a39Sopenharmony_ci___
2277e1051a39Sopenharmony_ci}
2278e1051a39Sopenharmony_ci$code.=<<___;
2279e1051a39Sopenharmony_ci.string	"AES for s390x, CRYPTOGAMS by <appro\@openssl.org>"
2280e1051a39Sopenharmony_ci___
2281e1051a39Sopenharmony_ci
2282e1051a39Sopenharmony_ci$code =~ s/\`([^\`]*)\`/eval $1/gem;
2283e1051a39Sopenharmony_ciprint $code;
2284e1051a39Sopenharmony_ciclose STDOUT or die "error closing STDOUT: $!";	# force flush
2285