1 #include "s390x_arch.h"
2 
3 .text
4 
5 .type	AES_Te,@object
6 .align	256
7 AES_Te:
8 .long	0xc66363a5,0xc66363a5
9 .long	0xf87c7c84,0xf87c7c84
10 .long	0xee777799,0xee777799
11 .long	0xf67b7b8d,0xf67b7b8d
12 .long	0xfff2f20d,0xfff2f20d
13 .long	0xd66b6bbd,0xd66b6bbd
14 .long	0xde6f6fb1,0xde6f6fb1
15 .long	0x91c5c554,0x91c5c554
16 .long	0x60303050,0x60303050
17 .long	0x02010103,0x02010103
18 .long	0xce6767a9,0xce6767a9
19 .long	0x562b2b7d,0x562b2b7d
20 .long	0xe7fefe19,0xe7fefe19
21 .long	0xb5d7d762,0xb5d7d762
22 .long	0x4dababe6,0x4dababe6
23 .long	0xec76769a,0xec76769a
24 .long	0x8fcaca45,0x8fcaca45
25 .long	0x1f82829d,0x1f82829d
26 .long	0x89c9c940,0x89c9c940
27 .long	0xfa7d7d87,0xfa7d7d87
28 .long	0xeffafa15,0xeffafa15
29 .long	0xb25959eb,0xb25959eb
30 .long	0x8e4747c9,0x8e4747c9
31 .long	0xfbf0f00b,0xfbf0f00b
32 .long	0x41adadec,0x41adadec
33 .long	0xb3d4d467,0xb3d4d467
34 .long	0x5fa2a2fd,0x5fa2a2fd
35 .long	0x45afafea,0x45afafea
36 .long	0x239c9cbf,0x239c9cbf
37 .long	0x53a4a4f7,0x53a4a4f7
38 .long	0xe4727296,0xe4727296
39 .long	0x9bc0c05b,0x9bc0c05b
40 .long	0x75b7b7c2,0x75b7b7c2
41 .long	0xe1fdfd1c,0xe1fdfd1c
42 .long	0x3d9393ae,0x3d9393ae
43 .long	0x4c26266a,0x4c26266a
44 .long	0x6c36365a,0x6c36365a
45 .long	0x7e3f3f41,0x7e3f3f41
46 .long	0xf5f7f702,0xf5f7f702
47 .long	0x83cccc4f,0x83cccc4f
48 .long	0x6834345c,0x6834345c
49 .long	0x51a5a5f4,0x51a5a5f4
50 .long	0xd1e5e534,0xd1e5e534
51 .long	0xf9f1f108,0xf9f1f108
52 .long	0xe2717193,0xe2717193
53 .long	0xabd8d873,0xabd8d873
54 .long	0x62313153,0x62313153
55 .long	0x2a15153f,0x2a15153f
56 .long	0x0804040c,0x0804040c
57 .long	0x95c7c752,0x95c7c752
58 .long	0x46232365,0x46232365
59 .long	0x9dc3c35e,0x9dc3c35e
60 .long	0x30181828,0x30181828
61 .long	0x379696a1,0x379696a1
62 .long	0x0a05050f,0x0a05050f
63 .long	0x2f9a9ab5,0x2f9a9ab5
64 .long	0x0e070709,0x0e070709
65 .long	0x24121236,0x24121236
66 .long	0x1b80809b,0x1b80809b
67 .long	0xdfe2e23d,0xdfe2e23d
68 .long	0xcdebeb26,0xcdebeb26
69 .long	0x4e272769,0x4e272769
70 .long	0x7fb2b2cd,0x7fb2b2cd
71 .long	0xea75759f,0xea75759f
72 .long	0x1209091b,0x1209091b
73 .long	0x1d83839e,0x1d83839e
74 .long	0x582c2c74,0x582c2c74
75 .long	0x341a1a2e,0x341a1a2e
76 .long	0x361b1b2d,0x361b1b2d
77 .long	0xdc6e6eb2,0xdc6e6eb2
78 .long	0xb45a5aee,0xb45a5aee
79 .long	0x5ba0a0fb,0x5ba0a0fb
80 .long	0xa45252f6,0xa45252f6
81 .long	0x763b3b4d,0x763b3b4d
82 .long	0xb7d6d661,0xb7d6d661
83 .long	0x7db3b3ce,0x7db3b3ce
84 .long	0x5229297b,0x5229297b
85 .long	0xdde3e33e,0xdde3e33e
86 .long	0x5e2f2f71,0x5e2f2f71
87 .long	0x13848497,0x13848497
88 .long	0xa65353f5,0xa65353f5
89 .long	0xb9d1d168,0xb9d1d168
90 .long	0x00000000,0x00000000
91 .long	0xc1eded2c,0xc1eded2c
92 .long	0x40202060,0x40202060
93 .long	0xe3fcfc1f,0xe3fcfc1f
94 .long	0x79b1b1c8,0x79b1b1c8
95 .long	0xb65b5bed,0xb65b5bed
96 .long	0xd46a6abe,0xd46a6abe
97 .long	0x8dcbcb46,0x8dcbcb46
98 .long	0x67bebed9,0x67bebed9
99 .long	0x7239394b,0x7239394b
100 .long	0x944a4ade,0x944a4ade
101 .long	0x984c4cd4,0x984c4cd4
102 .long	0xb05858e8,0xb05858e8
103 .long	0x85cfcf4a,0x85cfcf4a
104 .long	0xbbd0d06b,0xbbd0d06b
105 .long	0xc5efef2a,0xc5efef2a
106 .long	0x4faaaae5,0x4faaaae5
107 .long	0xedfbfb16,0xedfbfb16
108 .long	0x864343c5,0x864343c5
109 .long	0x9a4d4dd7,0x9a4d4dd7
110 .long	0x66333355,0x66333355
111 .long	0x11858594,0x11858594
112 .long	0x8a4545cf,0x8a4545cf
113 .long	0xe9f9f910,0xe9f9f910
114 .long	0x04020206,0x04020206
115 .long	0xfe7f7f81,0xfe7f7f81
116 .long	0xa05050f0,0xa05050f0
117 .long	0x783c3c44,0x783c3c44
118 .long	0x259f9fba,0x259f9fba
119 .long	0x4ba8a8e3,0x4ba8a8e3
120 .long	0xa25151f3,0xa25151f3
121 .long	0x5da3a3fe,0x5da3a3fe
122 .long	0x804040c0,0x804040c0
123 .long	0x058f8f8a,0x058f8f8a
124 .long	0x3f9292ad,0x3f9292ad
125 .long	0x219d9dbc,0x219d9dbc
126 .long	0x70383848,0x70383848
127 .long	0xf1f5f504,0xf1f5f504
128 .long	0x63bcbcdf,0x63bcbcdf
129 .long	0x77b6b6c1,0x77b6b6c1
130 .long	0xafdada75,0xafdada75
131 .long	0x42212163,0x42212163
132 .long	0x20101030,0x20101030
133 .long	0xe5ffff1a,0xe5ffff1a
134 .long	0xfdf3f30e,0xfdf3f30e
135 .long	0xbfd2d26d,0xbfd2d26d
136 .long	0x81cdcd4c,0x81cdcd4c
137 .long	0x180c0c14,0x180c0c14
138 .long	0x26131335,0x26131335
139 .long	0xc3ecec2f,0xc3ecec2f
140 .long	0xbe5f5fe1,0xbe5f5fe1
141 .long	0x359797a2,0x359797a2
142 .long	0x884444cc,0x884444cc
143 .long	0x2e171739,0x2e171739
144 .long	0x93c4c457,0x93c4c457
145 .long	0x55a7a7f2,0x55a7a7f2
146 .long	0xfc7e7e82,0xfc7e7e82
147 .long	0x7a3d3d47,0x7a3d3d47
148 .long	0xc86464ac,0xc86464ac
149 .long	0xba5d5de7,0xba5d5de7
150 .long	0x3219192b,0x3219192b
151 .long	0xe6737395,0xe6737395
152 .long	0xc06060a0,0xc06060a0
153 .long	0x19818198,0x19818198
154 .long	0x9e4f4fd1,0x9e4f4fd1
155 .long	0xa3dcdc7f,0xa3dcdc7f
156 .long	0x44222266,0x44222266
157 .long	0x542a2a7e,0x542a2a7e
158 .long	0x3b9090ab,0x3b9090ab
159 .long	0x0b888883,0x0b888883
160 .long	0x8c4646ca,0x8c4646ca
161 .long	0xc7eeee29,0xc7eeee29
162 .long	0x6bb8b8d3,0x6bb8b8d3
163 .long	0x2814143c,0x2814143c
164 .long	0xa7dede79,0xa7dede79
165 .long	0xbc5e5ee2,0xbc5e5ee2
166 .long	0x160b0b1d,0x160b0b1d
167 .long	0xaddbdb76,0xaddbdb76
168 .long	0xdbe0e03b,0xdbe0e03b
169 .long	0x64323256,0x64323256
170 .long	0x743a3a4e,0x743a3a4e
171 .long	0x140a0a1e,0x140a0a1e
172 .long	0x924949db,0x924949db
173 .long	0x0c06060a,0x0c06060a
174 .long	0x4824246c,0x4824246c
175 .long	0xb85c5ce4,0xb85c5ce4
176 .long	0x9fc2c25d,0x9fc2c25d
177 .long	0xbdd3d36e,0xbdd3d36e
178 .long	0x43acacef,0x43acacef
179 .long	0xc46262a6,0xc46262a6
180 .long	0x399191a8,0x399191a8
181 .long	0x319595a4,0x319595a4
182 .long	0xd3e4e437,0xd3e4e437
183 .long	0xf279798b,0xf279798b
184 .long	0xd5e7e732,0xd5e7e732
185 .long	0x8bc8c843,0x8bc8c843
186 .long	0x6e373759,0x6e373759
187 .long	0xda6d6db7,0xda6d6db7
188 .long	0x018d8d8c,0x018d8d8c
189 .long	0xb1d5d564,0xb1d5d564
190 .long	0x9c4e4ed2,0x9c4e4ed2
191 .long	0x49a9a9e0,0x49a9a9e0
192 .long	0xd86c6cb4,0xd86c6cb4
193 .long	0xac5656fa,0xac5656fa
194 .long	0xf3f4f407,0xf3f4f407
195 .long	0xcfeaea25,0xcfeaea25
196 .long	0xca6565af,0xca6565af
197 .long	0xf47a7a8e,0xf47a7a8e
198 .long	0x47aeaee9,0x47aeaee9
199 .long	0x10080818,0x10080818
200 .long	0x6fbabad5,0x6fbabad5
201 .long	0xf0787888,0xf0787888
202 .long	0x4a25256f,0x4a25256f
203 .long	0x5c2e2e72,0x5c2e2e72
204 .long	0x381c1c24,0x381c1c24
205 .long	0x57a6a6f1,0x57a6a6f1
206 .long	0x73b4b4c7,0x73b4b4c7
207 .long	0x97c6c651,0x97c6c651
208 .long	0xcbe8e823,0xcbe8e823
209 .long	0xa1dddd7c,0xa1dddd7c
210 .long	0xe874749c,0xe874749c
211 .long	0x3e1f1f21,0x3e1f1f21
212 .long	0x964b4bdd,0x964b4bdd
213 .long	0x61bdbddc,0x61bdbddc
214 .long	0x0d8b8b86,0x0d8b8b86
215 .long	0x0f8a8a85,0x0f8a8a85
216 .long	0xe0707090,0xe0707090
217 .long	0x7c3e3e42,0x7c3e3e42
218 .long	0x71b5b5c4,0x71b5b5c4
219 .long	0xcc6666aa,0xcc6666aa
220 .long	0x904848d8,0x904848d8
221 .long	0x06030305,0x06030305
222 .long	0xf7f6f601,0xf7f6f601
223 .long	0x1c0e0e12,0x1c0e0e12
224 .long	0xc26161a3,0xc26161a3
225 .long	0x6a35355f,0x6a35355f
226 .long	0xae5757f9,0xae5757f9
227 .long	0x69b9b9d0,0x69b9b9d0
228 .long	0x17868691,0x17868691
229 .long	0x99c1c158,0x99c1c158
230 .long	0x3a1d1d27,0x3a1d1d27
231 .long	0x279e9eb9,0x279e9eb9
232 .long	0xd9e1e138,0xd9e1e138
233 .long	0xebf8f813,0xebf8f813
234 .long	0x2b9898b3,0x2b9898b3
235 .long	0x22111133,0x22111133
236 .long	0xd26969bb,0xd26969bb
237 .long	0xa9d9d970,0xa9d9d970
238 .long	0x078e8e89,0x078e8e89
239 .long	0x339494a7,0x339494a7
240 .long	0x2d9b9bb6,0x2d9b9bb6
241 .long	0x3c1e1e22,0x3c1e1e22
242 .long	0x15878792,0x15878792
243 .long	0xc9e9e920,0xc9e9e920
244 .long	0x87cece49,0x87cece49
245 .long	0xaa5555ff,0xaa5555ff
246 .long	0x50282878,0x50282878
247 .long	0xa5dfdf7a,0xa5dfdf7a
248 .long	0x038c8c8f,0x038c8c8f
249 .long	0x59a1a1f8,0x59a1a1f8
250 .long	0x09898980,0x09898980
251 .long	0x1a0d0d17,0x1a0d0d17
252 .long	0x65bfbfda,0x65bfbfda
253 .long	0xd7e6e631,0xd7e6e631
254 .long	0x844242c6,0x844242c6
255 .long	0xd06868b8,0xd06868b8
256 .long	0x824141c3,0x824141c3
257 .long	0x299999b0,0x299999b0
258 .long	0x5a2d2d77,0x5a2d2d77
259 .long	0x1e0f0f11,0x1e0f0f11
260 .long	0x7bb0b0cb,0x7bb0b0cb
261 .long	0xa85454fc,0xa85454fc
262 .long	0x6dbbbbd6,0x6dbbbbd6
263 .long	0x2c16163a,0x2c16163a
264 # Te4[256]
265 .byte	0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
266 .byte	0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
267 .byte	0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
268 .byte	0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
269 .byte	0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
270 .byte	0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
271 .byte	0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
272 .byte	0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
273 .byte	0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
274 .byte	0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
275 .byte	0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
276 .byte	0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
277 .byte	0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
278 .byte	0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
279 .byte	0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
280 .byte	0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
281 .byte	0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
282 .byte	0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
283 .byte	0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
284 .byte	0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
285 .byte	0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
286 .byte	0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
287 .byte	0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
288 .byte	0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
289 .byte	0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
290 .byte	0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
291 .byte	0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
292 .byte	0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
293 .byte	0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
294 .byte	0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
295 .byte	0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
296 .byte	0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
297 # rcon[]
298 .long	0x01000000, 0x02000000, 0x04000000, 0x08000000
299 .long	0x10000000, 0x20000000, 0x40000000, 0x80000000
300 .long	0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
301 .align	256
302 .size	AES_Te,.-AES_Te
303 
304 # void AES_encrypt(const unsigned char *inp, unsigned char *out,
305 # 		 const AES_KEY *key) {
306 .globl	AES_encrypt
307 .type	AES_encrypt,@function
308 AES_encrypt:
309 	l	%r0,240(%r4)
310 	lhi	%r1,16
311 	clr	%r0,%r1
312 	jl	.Lesoft
313 
314 	la	%r1,0(%r4)
315 	#la	%r2,0(%r2)
316 	la	%r4,0(%r3)
317 	lghi	%r3,16		# single block length
318 	.long	0xb92e0042	# km %r4,%r2
319 	brc	1,.-4		# can this happen?
320 	br	%r14
321 .align	64
322 .Lesoft:
323 	stm	%r3,%r14,3*4(%r15)
324 
325 	llgf	%r8,0(%r2)
326 	llgf	%r9,4(%r2)
327 	llgf	%r10,8(%r2)
328 	llgf	%r11,12(%r2)
329 
330 	larl	%r12,AES_Te
331 	bras	%r14,_s390x_AES_encrypt
332 
333 	l	%r3,3*4(%r15)
334 	st	%r8,0(%r3)
335 	st	%r9,4(%r3)
336 	st	%r10,8(%r3)
337 	st	%r11,12(%r3)
338 
339 	lm	%r6,%r14,6*4(%r15)
340 	br	%r14
341 .size	AES_encrypt,.-AES_encrypt
342 
343 .type   _s390x_AES_encrypt,@function
344 .align	16
345 _s390x_AES_encrypt:
346 	st	%r14,15*4(%r15)
347 	x	%r8,0(%r4)
348 	x	%r9,4(%r4)
349 	x	%r10,8(%r4)
350 	x	%r11,12(%r4)
351 	l	%r13,240(%r4)
352 	llill	%r0,2040
353 	aghi	%r13,-1
354 	j	.Lenc_loop
355 .align	16
356 .Lenc_loop:
357 	sllg	%r1,%r8,3
358 	srlg	%r2,%r8,5
359 	srlg	%r3,%r8,13
360 	srl	%r8,21
361 	nr	%r8,%r0
362 	ngr	%r1,%r0
363 	nr	%r2,%r0
364 	nr	%r3,%r0
365 
366 	srlg	%r5,%r9,13	# i0
367 	sllg	%r6,%r9,3
368 	srlg	%r7,%r9,5
369 	srl	%r9,21
370 	nr	%r5,%r0
371 	nr	%r9,%r0
372 	ngr	%r6,%r0
373 	nr	%r7,%r0
374 
375 	l	%r8,0(%r8,%r12)	# Te0[s0>>24]
376 	l	%r1,1(%r1,%r12)	# Te3[s0>>0]
377 	l	%r2,2(%r2,%r12) # Te2[s0>>8]
378 	l	%r3,3(%r3,%r12)	# Te1[s0>>16]
379 
380 	x	%r8,3(%r5,%r12)	# Te1[s1>>16]
381 	l	%r9,0(%r9,%r12)	# Te0[s1>>24]
382 	x	%r2,1(%r6,%r12)	# Te3[s1>>0]
383 	x	%r3,2(%r7,%r12)	# Te2[s1>>8]
384 
385 	srlg	%r5,%r10,5	# i0
386 	srlg	%r6,%r10,13	# i1
387 	nr	%r5,%r0
388 	nr	%r6,%r0
389 	sllg	%r7,%r10,3
390 	srl	%r10,21
391 	nr	%r10,%r0
392 	ngr	%r7,%r0
393 
394 	xr	%r9,%r1
395 	srlg	%r14,%r11,5	# i1
396 	sllg	%r1,%r11,3	# i0
397 	nr	%r14,%r0
398 	la	%r4,16(%r4)
399 	ngr	%r1,%r0
400 
401 	x	%r8,2(%r5,%r12)	# Te2[s2>>8]
402 	x	%r9,3(%r6,%r12)	# Te1[s2>>16]
403 	l	%r10,0(%r10,%r12)	# Te0[s2>>24]
404 	x	%r3,1(%r7,%r12)	# Te3[s2>>0]
405 
406 	srlg	%r7,%r11,13	# i2
407 	xr	%r10,%r2
408 	srl	%r11,21
409 	nr	%r7,%r0
410 	nr	%r11,%r0
411 
412 	x	%r8,0(%r4)
413 	x	%r9,4(%r4)
414 	x	%r10,8(%r4)
415 	x	%r3,12(%r4)
416 
417 	x	%r8,1(%r1,%r12)	# Te3[s3>>0]
418 	x	%r9,2(%r14,%r12)	# Te2[s3>>8]
419 	x	%r10,3(%r7,%r12)	# Te1[s3>>16]
420 	l	%r11,0(%r11,%r12)	# Te0[s3>>24]
421 	xr	%r11,%r3
422 
423 	brct	%r13,.Lenc_loop
424 	.align	16
425 
426 	sllg	%r1,%r8,3
427 	srlg	%r2,%r8,5
428 	ngr	%r1,%r0
429 	srlg	%r3,%r8,13
430 	srl	%r8,21
431 	nr	%r8,%r0
432 	nr	%r2,%r0
433 	nr	%r3,%r0
434 
435 	srlg	%r5,%r9,13	# i0
436 	sllg	%r6,%r9,3
437 	ngr	%r6,%r0
438 	srlg	%r7,%r9,5
439 	srl	%r9,21
440 	nr	%r5,%r0
441 	nr	%r9,%r0
442 	nr	%r7,%r0
443 
444 	llgc	%r8,2(%r8,%r12)	# Te4[s0>>24]
445 	llgc	%r1,2(%r1,%r12)	# Te4[s0>>0]
446 	sll	%r8,24
447 	llgc	%r2,2(%r2,%r12)	# Te4[s0>>8]
448 	llgc	%r3,2(%r3,%r12)	# Te4[s0>>16]
449 	sll	%r2,8
450 	sll	%r3,16
451 
452 	llgc	%r5,2(%r5,%r12)	# Te4[s1>>16]
453 	llgc	%r9,2(%r9,%r12)	# Te4[s1>>24]
454 	llgc	%r6,2(%r6,%r12)	# Te4[s1>>0]
455 	llgc	%r7,2(%r7,%r12)	# Te4[s1>>8]
456 	sll	%r5,16
457 	sll	%r9,24
458 	sll	%r7,8
459 	or	%r8,%r5
460 	or	%r9,%r1
461 	or	%r2,%r6
462 	or	%r3,%r7
463 
464 	srlg	%r5,%r10,5	# i0
465 	srlg	%r6,%r10,13	# i1
466 	nr	%r5,%r0
467 	nr	%r6,%r0
468 	sllg	%r7,%r10,3
469 	srl	%r10,21
470 	ngr	%r7,%r0
471 	nr	%r10,%r0
472 
473 	sllg	%r1,%r11,3	# i0
474 	srlg	%r14,%r11,5	# i1
475 	ngr	%r1,%r0
476 
477 	llgc	%r5,2(%r5,%r12)	# Te4[s2>>8]
478 	llgc	%r6,2(%r6,%r12)	# Te4[s2>>16]
479 	sll	%r5,8
480 	llgc	%r10,2(%r10,%r12)	# Te4[s2>>24]
481 	llgc	%r7,2(%r7,%r12)	# Te4[s2>>0]
482 	sll	%r6,16
483 	nr	%r14,%r0
484 	sll	%r10,24
485 	or	%r8,%r5
486 	or	%r9,%r6
487 	or	%r10,%r2
488 	or	%r3,%r7
489 
490 	srlg	%r7,%r11,13	# i2
491 	srl	%r11,21
492 	nr	%r7,%r0
493 	nr	%r11,%r0
494 
495 	l	%r0,16(%r4)
496 	l	%r2,20(%r4)
497 
498 	llgc	%r5,2(%r1,%r12)	# Te4[s3>>0]
499 	llgc	%r6,2(%r14,%r12)	# Te4[s3>>8]
500 	llgc	%r7,2(%r7,%r12)	# Te4[s3>>16]
501 	llgc	%r11,2(%r11,%r12)	# Te4[s3>>24]
502 	sll	%r6,8
503 	sll	%r7,16
504 	sll	%r11,24
505 	or	%r8,%r5
506 	or	%r9,%r6
507 	or	%r10,%r7
508 	or	%r11,%r3
509 
510 	l	%r14,15*4(%r15)
511 	xr	%r8,%r0
512 	xr	%r9,%r2
513 	x	%r10,24(%r4)
514 	x	%r11,28(%r4)
515 
516 	br	%r14
517 .size	_s390x_AES_encrypt,.-_s390x_AES_encrypt
518 .type	AES_Td,@object
519 .align	256
520 AES_Td:
521 .long	0x51f4a750,0x51f4a750
522 .long	0x7e416553,0x7e416553
523 .long	0x1a17a4c3,0x1a17a4c3
524 .long	0x3a275e96,0x3a275e96
525 .long	0x3bab6bcb,0x3bab6bcb
526 .long	0x1f9d45f1,0x1f9d45f1
527 .long	0xacfa58ab,0xacfa58ab
528 .long	0x4be30393,0x4be30393
529 .long	0x2030fa55,0x2030fa55
530 .long	0xad766df6,0xad766df6
531 .long	0x88cc7691,0x88cc7691
532 .long	0xf5024c25,0xf5024c25
533 .long	0x4fe5d7fc,0x4fe5d7fc
534 .long	0xc52acbd7,0xc52acbd7
535 .long	0x26354480,0x26354480
536 .long	0xb562a38f,0xb562a38f
537 .long	0xdeb15a49,0xdeb15a49
538 .long	0x25ba1b67,0x25ba1b67
539 .long	0x45ea0e98,0x45ea0e98
540 .long	0x5dfec0e1,0x5dfec0e1
541 .long	0xc32f7502,0xc32f7502
542 .long	0x814cf012,0x814cf012
543 .long	0x8d4697a3,0x8d4697a3
544 .long	0x6bd3f9c6,0x6bd3f9c6
545 .long	0x038f5fe7,0x038f5fe7
546 .long	0x15929c95,0x15929c95
547 .long	0xbf6d7aeb,0xbf6d7aeb
548 .long	0x955259da,0x955259da
549 .long	0xd4be832d,0xd4be832d
550 .long	0x587421d3,0x587421d3
551 .long	0x49e06929,0x49e06929
552 .long	0x8ec9c844,0x8ec9c844
553 .long	0x75c2896a,0x75c2896a
554 .long	0xf48e7978,0xf48e7978
555 .long	0x99583e6b,0x99583e6b
556 .long	0x27b971dd,0x27b971dd
557 .long	0xbee14fb6,0xbee14fb6
558 .long	0xf088ad17,0xf088ad17
559 .long	0xc920ac66,0xc920ac66
560 .long	0x7dce3ab4,0x7dce3ab4
561 .long	0x63df4a18,0x63df4a18
562 .long	0xe51a3182,0xe51a3182
563 .long	0x97513360,0x97513360
564 .long	0x62537f45,0x62537f45
565 .long	0xb16477e0,0xb16477e0
566 .long	0xbb6bae84,0xbb6bae84
567 .long	0xfe81a01c,0xfe81a01c
568 .long	0xf9082b94,0xf9082b94
569 .long	0x70486858,0x70486858
570 .long	0x8f45fd19,0x8f45fd19
571 .long	0x94de6c87,0x94de6c87
572 .long	0x527bf8b7,0x527bf8b7
573 .long	0xab73d323,0xab73d323
574 .long	0x724b02e2,0x724b02e2
575 .long	0xe31f8f57,0xe31f8f57
576 .long	0x6655ab2a,0x6655ab2a
577 .long	0xb2eb2807,0xb2eb2807
578 .long	0x2fb5c203,0x2fb5c203
579 .long	0x86c57b9a,0x86c57b9a
580 .long	0xd33708a5,0xd33708a5
581 .long	0x302887f2,0x302887f2
582 .long	0x23bfa5b2,0x23bfa5b2
583 .long	0x02036aba,0x02036aba
584 .long	0xed16825c,0xed16825c
585 .long	0x8acf1c2b,0x8acf1c2b
586 .long	0xa779b492,0xa779b492
587 .long	0xf307f2f0,0xf307f2f0
588 .long	0x4e69e2a1,0x4e69e2a1
589 .long	0x65daf4cd,0x65daf4cd
590 .long	0x0605bed5,0x0605bed5
591 .long	0xd134621f,0xd134621f
592 .long	0xc4a6fe8a,0xc4a6fe8a
593 .long	0x342e539d,0x342e539d
594 .long	0xa2f355a0,0xa2f355a0
595 .long	0x058ae132,0x058ae132
596 .long	0xa4f6eb75,0xa4f6eb75
597 .long	0x0b83ec39,0x0b83ec39
598 .long	0x4060efaa,0x4060efaa
599 .long	0x5e719f06,0x5e719f06
600 .long	0xbd6e1051,0xbd6e1051
601 .long	0x3e218af9,0x3e218af9
602 .long	0x96dd063d,0x96dd063d
603 .long	0xdd3e05ae,0xdd3e05ae
604 .long	0x4de6bd46,0x4de6bd46
605 .long	0x91548db5,0x91548db5
606 .long	0x71c45d05,0x71c45d05
607 .long	0x0406d46f,0x0406d46f
608 .long	0x605015ff,0x605015ff
609 .long	0x1998fb24,0x1998fb24
610 .long	0xd6bde997,0xd6bde997
611 .long	0x894043cc,0x894043cc
612 .long	0x67d99e77,0x67d99e77
613 .long	0xb0e842bd,0xb0e842bd
614 .long	0x07898b88,0x07898b88
615 .long	0xe7195b38,0xe7195b38
616 .long	0x79c8eedb,0x79c8eedb
617 .long	0xa17c0a47,0xa17c0a47
618 .long	0x7c420fe9,0x7c420fe9
619 .long	0xf8841ec9,0xf8841ec9
620 .long	0x00000000,0x00000000
621 .long	0x09808683,0x09808683
622 .long	0x322bed48,0x322bed48
623 .long	0x1e1170ac,0x1e1170ac
624 .long	0x6c5a724e,0x6c5a724e
625 .long	0xfd0efffb,0xfd0efffb
626 .long	0x0f853856,0x0f853856
627 .long	0x3daed51e,0x3daed51e
628 .long	0x362d3927,0x362d3927
629 .long	0x0a0fd964,0x0a0fd964
630 .long	0x685ca621,0x685ca621
631 .long	0x9b5b54d1,0x9b5b54d1
632 .long	0x24362e3a,0x24362e3a
633 .long	0x0c0a67b1,0x0c0a67b1
634 .long	0x9357e70f,0x9357e70f
635 .long	0xb4ee96d2,0xb4ee96d2
636 .long	0x1b9b919e,0x1b9b919e
637 .long	0x80c0c54f,0x80c0c54f
638 .long	0x61dc20a2,0x61dc20a2
639 .long	0x5a774b69,0x5a774b69
640 .long	0x1c121a16,0x1c121a16
641 .long	0xe293ba0a,0xe293ba0a
642 .long	0xc0a02ae5,0xc0a02ae5
643 .long	0x3c22e043,0x3c22e043
644 .long	0x121b171d,0x121b171d
645 .long	0x0e090d0b,0x0e090d0b
646 .long	0xf28bc7ad,0xf28bc7ad
647 .long	0x2db6a8b9,0x2db6a8b9
648 .long	0x141ea9c8,0x141ea9c8
649 .long	0x57f11985,0x57f11985
650 .long	0xaf75074c,0xaf75074c
651 .long	0xee99ddbb,0xee99ddbb
652 .long	0xa37f60fd,0xa37f60fd
653 .long	0xf701269f,0xf701269f
654 .long	0x5c72f5bc,0x5c72f5bc
655 .long	0x44663bc5,0x44663bc5
656 .long	0x5bfb7e34,0x5bfb7e34
657 .long	0x8b432976,0x8b432976
658 .long	0xcb23c6dc,0xcb23c6dc
659 .long	0xb6edfc68,0xb6edfc68
660 .long	0xb8e4f163,0xb8e4f163
661 .long	0xd731dcca,0xd731dcca
662 .long	0x42638510,0x42638510
663 .long	0x13972240,0x13972240
664 .long	0x84c61120,0x84c61120
665 .long	0x854a247d,0x854a247d
666 .long	0xd2bb3df8,0xd2bb3df8
667 .long	0xaef93211,0xaef93211
668 .long	0xc729a16d,0xc729a16d
669 .long	0x1d9e2f4b,0x1d9e2f4b
670 .long	0xdcb230f3,0xdcb230f3
671 .long	0x0d8652ec,0x0d8652ec
672 .long	0x77c1e3d0,0x77c1e3d0
673 .long	0x2bb3166c,0x2bb3166c
674 .long	0xa970b999,0xa970b999
675 .long	0x119448fa,0x119448fa
676 .long	0x47e96422,0x47e96422
677 .long	0xa8fc8cc4,0xa8fc8cc4
678 .long	0xa0f03f1a,0xa0f03f1a
679 .long	0x567d2cd8,0x567d2cd8
680 .long	0x223390ef,0x223390ef
681 .long	0x87494ec7,0x87494ec7
682 .long	0xd938d1c1,0xd938d1c1
683 .long	0x8ccaa2fe,0x8ccaa2fe
684 .long	0x98d40b36,0x98d40b36
685 .long	0xa6f581cf,0xa6f581cf
686 .long	0xa57ade28,0xa57ade28
687 .long	0xdab78e26,0xdab78e26
688 .long	0x3fadbfa4,0x3fadbfa4
689 .long	0x2c3a9de4,0x2c3a9de4
690 .long	0x5078920d,0x5078920d
691 .long	0x6a5fcc9b,0x6a5fcc9b
692 .long	0x547e4662,0x547e4662
693 .long	0xf68d13c2,0xf68d13c2
694 .long	0x90d8b8e8,0x90d8b8e8
695 .long	0x2e39f75e,0x2e39f75e
696 .long	0x82c3aff5,0x82c3aff5
697 .long	0x9f5d80be,0x9f5d80be
698 .long	0x69d0937c,0x69d0937c
699 .long	0x6fd52da9,0x6fd52da9
700 .long	0xcf2512b3,0xcf2512b3
701 .long	0xc8ac993b,0xc8ac993b
702 .long	0x10187da7,0x10187da7
703 .long	0xe89c636e,0xe89c636e
704 .long	0xdb3bbb7b,0xdb3bbb7b
705 .long	0xcd267809,0xcd267809
706 .long	0x6e5918f4,0x6e5918f4
707 .long	0xec9ab701,0xec9ab701
708 .long	0x834f9aa8,0x834f9aa8
709 .long	0xe6956e65,0xe6956e65
710 .long	0xaaffe67e,0xaaffe67e
711 .long	0x21bccf08,0x21bccf08
712 .long	0xef15e8e6,0xef15e8e6
713 .long	0xbae79bd9,0xbae79bd9
714 .long	0x4a6f36ce,0x4a6f36ce
715 .long	0xea9f09d4,0xea9f09d4
716 .long	0x29b07cd6,0x29b07cd6
717 .long	0x31a4b2af,0x31a4b2af
718 .long	0x2a3f2331,0x2a3f2331
719 .long	0xc6a59430,0xc6a59430
720 .long	0x35a266c0,0x35a266c0
721 .long	0x744ebc37,0x744ebc37
722 .long	0xfc82caa6,0xfc82caa6
723 .long	0xe090d0b0,0xe090d0b0
724 .long	0x33a7d815,0x33a7d815
725 .long	0xf104984a,0xf104984a
726 .long	0x41ecdaf7,0x41ecdaf7
727 .long	0x7fcd500e,0x7fcd500e
728 .long	0x1791f62f,0x1791f62f
729 .long	0x764dd68d,0x764dd68d
730 .long	0x43efb04d,0x43efb04d
731 .long	0xccaa4d54,0xccaa4d54
732 .long	0xe49604df,0xe49604df
733 .long	0x9ed1b5e3,0x9ed1b5e3
734 .long	0x4c6a881b,0x4c6a881b
735 .long	0xc12c1fb8,0xc12c1fb8
736 .long	0x4665517f,0x4665517f
737 .long	0x9d5eea04,0x9d5eea04
738 .long	0x018c355d,0x018c355d
739 .long	0xfa877473,0xfa877473
740 .long	0xfb0b412e,0xfb0b412e
741 .long	0xb3671d5a,0xb3671d5a
742 .long	0x92dbd252,0x92dbd252
743 .long	0xe9105633,0xe9105633
744 .long	0x6dd64713,0x6dd64713
745 .long	0x9ad7618c,0x9ad7618c
746 .long	0x37a10c7a,0x37a10c7a
747 .long	0x59f8148e,0x59f8148e
748 .long	0xeb133c89,0xeb133c89
749 .long	0xcea927ee,0xcea927ee
750 .long	0xb761c935,0xb761c935
751 .long	0xe11ce5ed,0xe11ce5ed
752 .long	0x7a47b13c,0x7a47b13c
753 .long	0x9cd2df59,0x9cd2df59
754 .long	0x55f2733f,0x55f2733f
755 .long	0x1814ce79,0x1814ce79
756 .long	0x73c737bf,0x73c737bf
757 .long	0x53f7cdea,0x53f7cdea
758 .long	0x5ffdaa5b,0x5ffdaa5b
759 .long	0xdf3d6f14,0xdf3d6f14
760 .long	0x7844db86,0x7844db86
761 .long	0xcaaff381,0xcaaff381
762 .long	0xb968c43e,0xb968c43e
763 .long	0x3824342c,0x3824342c
764 .long	0xc2a3405f,0xc2a3405f
765 .long	0x161dc372,0x161dc372
766 .long	0xbce2250c,0xbce2250c
767 .long	0x283c498b,0x283c498b
768 .long	0xff0d9541,0xff0d9541
769 .long	0x39a80171,0x39a80171
770 .long	0x080cb3de,0x080cb3de
771 .long	0xd8b4e49c,0xd8b4e49c
772 .long	0x6456c190,0x6456c190
773 .long	0x7bcb8461,0x7bcb8461
774 .long	0xd532b670,0xd532b670
775 .long	0x486c5c74,0x486c5c74
776 .long	0xd0b85742,0xd0b85742
777 # Td4[256]
778 .byte	0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
779 .byte	0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
780 .byte	0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
781 .byte	0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
782 .byte	0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
783 .byte	0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
784 .byte	0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
785 .byte	0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
786 .byte	0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
787 .byte	0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
788 .byte	0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
789 .byte	0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
790 .byte	0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
791 .byte	0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
792 .byte	0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
793 .byte	0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
794 .byte	0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
795 .byte	0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
796 .byte	0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
797 .byte	0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
798 .byte	0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
799 .byte	0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
800 .byte	0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
801 .byte	0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
802 .byte	0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
803 .byte	0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
804 .byte	0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
805 .byte	0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
806 .byte	0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
807 .byte	0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
808 .byte	0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
809 .byte	0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
810 .size	AES_Td,.-AES_Td
811 
812 # void AES_decrypt(const unsigned char *inp, unsigned char *out,
813 # 		 const AES_KEY *key) {
814 .globl	AES_decrypt
815 .type	AES_decrypt,@function
816 AES_decrypt:
817 	l	%r0,240(%r4)
818 	lhi	%r1,16
819 	clr	%r0,%r1
820 	jl	.Ldsoft
821 
822 	la	%r1,0(%r4)
823 	#la	%r2,0(%r2)
824 	la	%r4,0(%r3)
825 	lghi	%r3,16		# single block length
826 	.long	0xb92e0042	# km %r4,%r2
827 	brc	1,.-4		# can this happen?
828 	br	%r14
829 .align	64
830 .Ldsoft:
831 	stm	%r3,%r14,3*4(%r15)
832 
833 	llgf	%r8,0(%r2)
834 	llgf	%r9,4(%r2)
835 	llgf	%r10,8(%r2)
836 	llgf	%r11,12(%r2)
837 
838 	larl	%r12,AES_Td
839 	bras	%r14,_s390x_AES_decrypt
840 
841 	l	%r3,3*4(%r15)
842 	st	%r8,0(%r3)
843 	st	%r9,4(%r3)
844 	st	%r10,8(%r3)
845 	st	%r11,12(%r3)
846 
847 	lm	%r6,%r14,6*4(%r15)
848 	br	%r14
849 .size	AES_decrypt,.-AES_decrypt
850 
851 .type   _s390x_AES_decrypt,@function
852 .align	16
853 _s390x_AES_decrypt:
854 	st	%r14,15*4(%r15)
855 	x	%r8,0(%r4)
856 	x	%r9,4(%r4)
857 	x	%r10,8(%r4)
858 	x	%r11,12(%r4)
859 	l	%r13,240(%r4)
860 	llill	%r0,2040
861 	aghi	%r13,-1
862 	j	.Ldec_loop
863 .align	16
864 .Ldec_loop:
865 	srlg	%r1,%r8,13
866 	srlg	%r2,%r8,5
867 	sllg	%r3,%r8,3
868 	srl	%r8,21
869 	nr	%r8,%r0
870 	nr	%r1,%r0
871 	nr	%r2,%r0
872 	ngr	%r3,%r0
873 
874 	sllg	%r5,%r9,3	# i0
875 	srlg	%r6,%r9,13
876 	srlg	%r7,%r9,5
877 	srl	%r9,21
878 	ngr	%r5,%r0
879 	nr	%r9,%r0
880 	nr	%r6,%r0
881 	nr	%r7,%r0
882 
883 	l	%r8,0(%r8,%r12)	# Td0[s0>>24]
884 	l	%r1,3(%r1,%r12)	# Td1[s0>>16]
885 	l	%r2,2(%r2,%r12)	# Td2[s0>>8]
886 	l	%r3,1(%r3,%r12)	# Td3[s0>>0]
887 
888 	x	%r8,1(%r5,%r12)	# Td3[s1>>0]
889 	l	%r9,0(%r9,%r12)	# Td0[s1>>24]
890 	x	%r2,3(%r6,%r12)	# Td1[s1>>16]
891 	x	%r3,2(%r7,%r12)	# Td2[s1>>8]
892 
893 	srlg	%r5,%r10,5	# i0
894 	sllg	%r6,%r10,3	# i1
895 	srlg	%r7,%r10,13
896 	srl	%r10,21
897 	nr	%r5,%r0
898 	ngr	%r6,%r0
899 	nr	%r10,%r0
900 	nr	%r7,%r0
901 
902 	xr	%r9,%r1
903 	srlg	%r14,%r11,5	# i1
904 	srlg	%r1,%r11,13	# i0
905 	nr	%r14,%r0
906 	la	%r4,16(%r4)
907 	nr	%r1,%r0
908 
909 	x	%r8,2(%r5,%r12)	# Td2[s2>>8]
910 	x	%r9,1(%r6,%r12)	# Td3[s2>>0]
911 	l	%r10,0(%r10,%r12)	# Td0[s2>>24]
912 	x	%r3,3(%r7,%r12)	# Td1[s2>>16]
913 
914 	sllg	%r7,%r11,3	# i2
915 	srl	%r11,21
916 	ngr	%r7,%r0
917 	nr	%r11,%r0
918 
919 	xr	%r10,%r2
920 	x	%r8,0(%r4)
921 	x	%r9,4(%r4)
922 	x	%r10,8(%r4)
923 	x	%r3,12(%r4)
924 
925 	x	%r8,3(%r1,%r12)	# Td1[s3>>16]
926 	x	%r9,2(%r14,%r12)	# Td2[s3>>8]
927 	x	%r10,1(%r7,%r12)	# Td3[s3>>0]
928 	l	%r11,0(%r11,%r12)	# Td0[s3>>24]
929 	xr	%r11,%r3
930 
931 	brct	%r13,.Ldec_loop
932 	.align	16
933 
934 	l	%r1,2048(%r12)	# prefetch Td4
935 	l	%r2,2112(%r12)
936 	l	%r3,2176(%r12)
937 	l	%r5,2240(%r12)
938 	llill	%r0,0xff
939 
940 	srlg	%r7,%r8,24	# i0
941 	srlg	%r1,%r8,16
942 	srlg	%r2,%r8,8
943 	nr	%r8,%r0	# i3
944 	nr	%r1,%r0
945 
946 	srlg	%r5,%r9,24
947 	nr	%r2,%r0
948 	srlg	%r6,%r9,16
949 	srlg	%r14,%r9,8
950 	nr	%r9,%r0	# i0
951 	nr	%r6,%r0
952 	nr	%r14,%r0
953 
954 	llgc	%r7,2048(%r7,%r12)	# Td4[s0>>24]
955 	llgc	%r1,2048(%r1,%r12)	# Td4[s0>>16]
956 	llgc	%r2,2048(%r2,%r12)	# Td4[s0>>8]
957 	sll	%r1,16
958 	llgc	%r3,2048(%r8,%r12)	# Td4[s0>>0]
959 	sllg	%r8,%r7,24
960 	sll	%r2,8
961 
962 	llgc	%r9,2048(%r9,%r12)	# Td4[s1>>0]
963 	llgc	%r5,2048(%r5,%r12)	# Td4[s1>>24]
964 	llgc	%r6,2048(%r6,%r12)	# Td4[s1>>16]
965 	sll	%r5,24
966 	llgc	%r7,2048(%r14,%r12)	# Td4[s1>>8]
967 	sll	%r6,16
968 	sll	%r7,8
969 	or	%r8,%r9
970 	or	%r1,%r5
971 	or	%r2,%r6
972 	or	%r3,%r7
973 
974 	srlg	%r5,%r10,8	# i0
975 	srlg	%r6,%r10,24
976 	srlg	%r7,%r10,16
977 	nr	%r10,%r0	# i1
978 	nr	%r5,%r0
979 	nr	%r7,%r0
980 	llgc	%r5,2048(%r5,%r12)	# Td4[s2>>8]
981 	llgc	%r9,2048(%r10,%r12)	# Td4[s2>>0]
982 	llgc	%r6,2048(%r6,%r12)	# Td4[s2>>24]
983 	llgc	%r7,2048(%r7,%r12)	# Td4[s2>>16]
984 	sll	%r5,8
985 	sll	%r6,24
986 	or	%r8,%r5
987 	sll	%r7,16
988 	or	%r2,%r6
989 	or	%r3,%r7
990 
991 	srlg	%r5,%r11,16	# i0
992 	srlg	%r6,%r11,8	# i1
993 	srlg	%r7,%r11,24
994 	nr	%r11,%r0	# i2
995 	nr	%r5,%r0
996 	nr	%r6,%r0
997 
998 	l	%r14,15*4(%r15)
999 	or	%r9,%r1
1000 	l	%r0,16(%r4)
1001 	l	%r1,20(%r4)
1002 
1003 	llgc	%r5,2048(%r5,%r12)	# Td4[s3>>16]
1004 	llgc	%r6,2048(%r6,%r12)	# Td4[s3>>8]
1005 	sll	%r5,16
1006 	llgc	%r10,2048(%r11,%r12)	# Td4[s3>>0]
1007 	llgc	%r11,2048(%r7,%r12)	# Td4[s3>>24]
1008 	sll	%r6,8
1009 	sll	%r11,24
1010 	or	%r8,%r5
1011 	or	%r9,%r6
1012 	or	%r10,%r2
1013 	or	%r11,%r3
1014 
1015 	xr	%r8,%r0
1016 	xr	%r9,%r1
1017 	x	%r10,24(%r4)
1018 	x	%r11,28(%r4)
1019 
1020 	br	%r14
1021 .size	_s390x_AES_decrypt,.-_s390x_AES_decrypt
1022 # void AES_set_encrypt_key(const unsigned char *in, int bits,
1023 # 		 AES_KEY *key) {
1024 .globl	AES_set_encrypt_key
1025 .type	AES_set_encrypt_key,@function
1026 .align	16
1027 AES_set_encrypt_key:
1028 _s390x_AES_set_encrypt_key:
1029 	lghi	%r0,0
1030 	clr	%r2,%r0
1031 	je	.Lminus1
1032 	clr	%r4,%r0
1033 	je	.Lminus1
1034 
1035 	lghi	%r0,128
1036 	clr	%r3,%r0
1037 	je	.Lproceed
1038 	lghi	%r0,192
1039 	clr	%r3,%r0
1040 	je	.Lproceed
1041 	lghi	%r0,256
1042 	clr	%r3,%r0
1043 	je	.Lproceed
1044 	lghi	%r2,-2
1045 	br	%r14
1046 
1047 .align	16
1048 .Lproceed:
1049 	# convert bits to km(c) code, [128,192,256]->[18,19,20]
1050 	lhi	%r5,-128
1051 	lhi	%r0,18
1052 	ar	%r5,%r3
1053 	srl	%r5,6
1054 	ar	%r5,%r0
1055 
1056 	larl	%r1,OPENSSL_s390xcap_P
1057 	llihh	%r0,0x8000
1058 	srlg	%r0,%r0,0(%r5)
1059 	ng	%r0,S390X_KM(%r1)  # check availability of both km...
1060 	ng	%r0,S390X_KMC(%r1) # ...and kmc support for given key length
1061 	jz	.Lekey_internal
1062 
1063 	lmg	%r0,%r1,0(%r2)	# just copy 128 bits...
1064 	stmg	%r0,%r1,0(%r4)
1065 	lhi	%r0,192
1066 	cr	%r3,%r0
1067 	jl	1f
1068 	lg	%r1,16(%r2)
1069 	stg	%r1,16(%r4)
1070 	je	1f
1071 	lg	%r1,24(%r2)
1072 	stg	%r1,24(%r4)
1073 1:	st	%r3,236(%r4)	# save bits [for debugging purposes]
1074 	lgr	%r0,%r5
1075 	st	%r5,240(%r4)	# save km(c) code
1076 	lghi	%r2,0
1077 	br	%r14
1078 .align	16
1079 .Lekey_internal:
1080 	stm	%r4,%r13,4*4(%r15)	# all non-volatile regs and %r4
1081 
1082 	larl	%r12,AES_Te+2048
1083 
1084 	llgf	%r8,0(%r2)
1085 	llgf	%r9,4(%r2)
1086 	llgf	%r10,8(%r2)
1087 	llgf	%r11,12(%r2)
1088 	st	%r8,0(%r4)
1089 	st	%r9,4(%r4)
1090 	st	%r10,8(%r4)
1091 	st	%r11,12(%r4)
1092 	lghi	%r0,128
1093 	cr	%r3,%r0
1094 	jne	.Lnot128
1095 
1096 	llill	%r0,0xff
1097 	lghi	%r3,0			# i=0
1098 	lghi	%r13,10
1099 	st	%r13,240(%r4)
1100 
1101 	llgfr	%r2,%r11			# temp=rk[3]
1102 	srlg	%r5,%r11,8
1103 	srlg	%r6,%r11,16
1104 	srlg	%r7,%r11,24
1105 	nr	%r2,%r0
1106 	nr	%r5,%r0
1107 	nr	%r6,%r0
1108 
1109 .align	16
1110 .L128_loop:
1111 	la	%r2,0(%r2,%r12)
1112 	la	%r5,0(%r5,%r12)
1113 	la	%r6,0(%r6,%r12)
1114 	la	%r7,0(%r7,%r12)
1115 	icm	%r2,2,0(%r2)		# Te4[rk[3]>>0]<<8
1116 	icm	%r2,4,0(%r5)		# Te4[rk[3]>>8]<<16
1117 	icm	%r2,8,0(%r6)		# Te4[rk[3]>>16]<<24
1118 	icm	%r2,1,0(%r7)		# Te4[rk[3]>>24]
1119 	x	%r2,256(%r3,%r12)	# rcon[i]
1120 	xr	%r8,%r2			# rk[4]=rk[0]^...
1121 	xr	%r9,%r8			# rk[5]=rk[1]^rk[4]
1122 	xr	%r10,%r9			# rk[6]=rk[2]^rk[5]
1123 	xr	%r11,%r10			# rk[7]=rk[3]^rk[6]
1124 
1125 	llgfr	%r2,%r11			# temp=rk[3]
1126 	srlg	%r5,%r11,8
1127 	srlg	%r6,%r11,16
1128 	nr	%r2,%r0
1129 	nr	%r5,%r0
1130 	srlg	%r7,%r11,24
1131 	nr	%r6,%r0
1132 
1133 	st	%r8,16(%r4)
1134 	st	%r9,20(%r4)
1135 	st	%r10,24(%r4)
1136 	st	%r11,28(%r4)
1137 	la	%r4,16(%r4)		# key+=4
1138 	la	%r3,4(%r3)		# i++
1139 	brct	%r13,.L128_loop
1140 	lghi	%r0,10
1141 	lghi	%r2,0
1142 	lm	%r4,%r13,4*4(%r15)
1143 	br	%r14
1144 
1145 .align	16
1146 .Lnot128:
1147 	llgf	%r0,16(%r2)
1148 	llgf	%r1,20(%r2)
1149 	st	%r0,16(%r4)
1150 	st	%r1,20(%r4)
1151 	lghi	%r0,192
1152 	cr	%r3,%r0
1153 	jne	.Lnot192
1154 
1155 	llill	%r0,0xff
1156 	lghi	%r3,0			# i=0
1157 	lghi	%r13,12
1158 	st	%r13,240(%r4)
1159 	lghi	%r13,8
1160 
1161 	srlg	%r5,%r1,8
1162 	srlg	%r6,%r1,16
1163 	srlg	%r7,%r1,24
1164 	nr	%r1,%r0
1165 	nr	%r5,%r0
1166 	nr	%r6,%r0
1167 
1168 .align	16
1169 .L192_loop:
1170 	la	%r1,0(%r1,%r12)
1171 	la	%r5,0(%r5,%r12)
1172 	la	%r6,0(%r6,%r12)
1173 	la	%r7,0(%r7,%r12)
1174 	icm	%r1,2,0(%r1)		# Te4[rk[5]>>0]<<8
1175 	icm	%r1,4,0(%r5)		# Te4[rk[5]>>8]<<16
1176 	icm	%r1,8,0(%r6)		# Te4[rk[5]>>16]<<24
1177 	icm	%r1,1,0(%r7)		# Te4[rk[5]>>24]
1178 	x	%r1,256(%r3,%r12)	# rcon[i]
1179 	xr	%r8,%r1			# rk[6]=rk[0]^...
1180 	xr	%r9,%r8			# rk[7]=rk[1]^rk[6]
1181 	xr	%r10,%r9			# rk[8]=rk[2]^rk[7]
1182 	xr	%r11,%r10			# rk[9]=rk[3]^rk[8]
1183 
1184 	st	%r8,24(%r4)
1185 	st	%r9,28(%r4)
1186 	st	%r10,32(%r4)
1187 	st	%r11,36(%r4)
1188 	brct	%r13,.L192_continue
1189 	lghi	%r0,12
1190 	lghi	%r2,0
1191 	lm	%r4,%r13,4*4(%r15)
1192 	br	%r14
1193 
1194 .align	16
1195 .L192_continue:
1196 	lgr	%r1,%r11
1197 	x	%r1,16(%r4)		# rk[10]=rk[4]^rk[9]
1198 	st	%r1,40(%r4)
1199 	x	%r1,20(%r4)		# rk[11]=rk[5]^rk[10]
1200 	st	%r1,44(%r4)
1201 
1202 	srlg	%r5,%r1,8
1203 	srlg	%r6,%r1,16
1204 	srlg	%r7,%r1,24
1205 	nr	%r1,%r0
1206 	nr	%r5,%r0
1207 	nr	%r6,%r0
1208 
1209 	la	%r4,24(%r4)		# key+=6
1210 	la	%r3,4(%r3)		# i++
1211 	j	.L192_loop
1212 
1213 .align	16
1214 .Lnot192:
1215 	llgf	%r0,24(%r2)
1216 	llgf	%r1,28(%r2)
1217 	st	%r0,24(%r4)
1218 	st	%r1,28(%r4)
1219 	llill	%r0,0xff
1220 	lghi	%r3,0			# i=0
1221 	lghi	%r13,14
1222 	st	%r13,240(%r4)
1223 	lghi	%r13,7
1224 
1225 	srlg	%r5,%r1,8
1226 	srlg	%r6,%r1,16
1227 	srlg	%r7,%r1,24
1228 	nr	%r1,%r0
1229 	nr	%r5,%r0
1230 	nr	%r6,%r0
1231 
1232 .align	16
1233 .L256_loop:
1234 	la	%r1,0(%r1,%r12)
1235 	la	%r5,0(%r5,%r12)
1236 	la	%r6,0(%r6,%r12)
1237 	la	%r7,0(%r7,%r12)
1238 	icm	%r1,2,0(%r1)		# Te4[rk[7]>>0]<<8
1239 	icm	%r1,4,0(%r5)		# Te4[rk[7]>>8]<<16
1240 	icm	%r1,8,0(%r6)		# Te4[rk[7]>>16]<<24
1241 	icm	%r1,1,0(%r7)		# Te4[rk[7]>>24]
1242 	x	%r1,256(%r3,%r12)	# rcon[i]
1243 	xr	%r8,%r1			# rk[8]=rk[0]^...
1244 	xr	%r9,%r8			# rk[9]=rk[1]^rk[8]
1245 	xr	%r10,%r9			# rk[10]=rk[2]^rk[9]
1246 	xr	%r11,%r10			# rk[11]=rk[3]^rk[10]
1247 	st	%r8,32(%r4)
1248 	st	%r9,36(%r4)
1249 	st	%r10,40(%r4)
1250 	st	%r11,44(%r4)
1251 	brct	%r13,.L256_continue
1252 	lghi	%r0,14
1253 	lghi	%r2,0
1254 	lm	%r4,%r13,4*4(%r15)
1255 	br	%r14
1256 
1257 .align	16
1258 .L256_continue:
1259 	lgr	%r1,%r11			# temp=rk[11]
1260 	srlg	%r5,%r11,8
1261 	srlg	%r6,%r11,16
1262 	srlg	%r7,%r11,24
1263 	nr	%r1,%r0
1264 	nr	%r5,%r0
1265 	nr	%r6,%r0
1266 	la	%r1,0(%r1,%r12)
1267 	la	%r5,0(%r5,%r12)
1268 	la	%r6,0(%r6,%r12)
1269 	la	%r7,0(%r7,%r12)
1270 	llgc	%r1,0(%r1)		# Te4[rk[11]>>0]
1271 	icm	%r1,2,0(%r5)		# Te4[rk[11]>>8]<<8
1272 	icm	%r1,4,0(%r6)		# Te4[rk[11]>>16]<<16
1273 	icm	%r1,8,0(%r7)		# Te4[rk[11]>>24]<<24
1274 	x	%r1,16(%r4)		# rk[12]=rk[4]^...
1275 	st	%r1,48(%r4)
1276 	x	%r1,20(%r4)		# rk[13]=rk[5]^rk[12]
1277 	st	%r1,52(%r4)
1278 	x	%r1,24(%r4)		# rk[14]=rk[6]^rk[13]
1279 	st	%r1,56(%r4)
1280 	x	%r1,28(%r4)		# rk[15]=rk[7]^rk[14]
1281 	st	%r1,60(%r4)
1282 
1283 	srlg	%r5,%r1,8
1284 	srlg	%r6,%r1,16
1285 	srlg	%r7,%r1,24
1286 	nr	%r1,%r0
1287 	nr	%r5,%r0
1288 	nr	%r6,%r0
1289 
1290 	la	%r4,32(%r4)		# key+=8
1291 	la	%r3,4(%r3)		# i++
1292 	j	.L256_loop
1293 
1294 .Lminus1:
1295 	lghi	%r2,-1
1296 	br	%r14
1297 .size	AES_set_encrypt_key,.-AES_set_encrypt_key
1298 
1299 # void AES_set_decrypt_key(const unsigned char *in, int bits,
1300 # 		 AES_KEY *key) {
1301 .globl	AES_set_decrypt_key
1302 .type	AES_set_decrypt_key,@function
1303 .align	16
1304 AES_set_decrypt_key:
1305 	#st	%r4,4*4(%r15)	# I rely on AES_set_encrypt_key to
1306 	st	%r14,14*4(%r15)	# save non-volatile registers and %r4!
1307 	bras	%r14,_s390x_AES_set_encrypt_key
1308 	#l	%r4,4*4(%r15)
1309 	l	%r14,14*4(%r15)
1310 	ltgr	%r2,%r2
1311 	bnzr	%r14
1312 	#l	%r0,240(%r4)
1313 	lhi	%r1,16
1314 	cr	%r0,%r1
1315 	jl	.Lgo
1316 	oill	%r0,S390X_DECRYPT	# set "decrypt" bit
1317 	st	%r0,240(%r4)
1318 	br	%r14
1319 .align	16
1320 .Lgo:	lgr	%r13,%r0	#llgf	%r13,240(%r4)
1321 	la	%r5,0(%r4)
1322 	sllg	%r6,%r13,4
1323 	la	%r6,0(%r6,%r4)
1324 	srl	%r13,1
1325 	lghi	%r1,-16
1326 
1327 .align	16
1328 .Linv:	lmg	%r8,%r9,0(%r5)
1329 	lmg	%r10,%r11,0(%r6)
1330 	stmg	%r8,%r9,0(%r6)
1331 	stmg	%r10,%r11,0(%r5)
1332 	la	%r5,16(%r5)
1333 	la	%r6,0(%r1,%r6)
1334 	brct	%r13,.Linv
1335 	llgf	%r13,240(%r4)
1336 	aghi	%r13,-1
1337 	sll	%r13,2	# (rounds-1)*4
1338 	llilh	%r5,0x8080
1339 	llilh	%r6,0x1b1b
1340 	llilh	%r7,0xfefe
1341 	oill	%r5,0x8080
1342 	oill	%r6,0x1b1b
1343 	oill	%r7,0xfefe
1344 
1345 .align	16
1346 .Lmix:	l	%r8,16(%r4)	# tp1
1347 	lr	%r9,%r8
1348 	ngr	%r9,%r5
1349 	srlg	%r1,%r9,7
1350 	slr	%r9,%r1
1351 	nr	%r9,%r6
1352 	sllg	%r1,%r8,1
1353 	nr	%r1,%r7
1354 	xr	%r9,%r1		# tp2
1355 
1356 	lr	%r10,%r9
1357 	ngr	%r10,%r5
1358 	srlg	%r1,%r10,7
1359 	slr	%r10,%r1
1360 	nr	%r10,%r6
1361 	sllg	%r1,%r9,1
1362 	nr	%r1,%r7
1363 	xr	%r10,%r1		# tp4
1364 
1365 	lr	%r11,%r10
1366 	ngr	%r11,%r5
1367 	srlg	%r1,%r11,7
1368 	slr	%r11,%r1
1369 	nr	%r11,%r6
1370 	sllg	%r1,%r10,1
1371 	nr	%r1,%r7
1372 	xr	%r11,%r1		# tp8
1373 
1374 	xr	%r9,%r8		# tp2^tp1
1375 	xr	%r10,%r8		# tp4^tp1
1376 	rll	%r8,%r8,24	# = ROTATE(tp1,8)
1377 	xr	%r10,%r11		# ^=tp8
1378 	xr	%r8,%r9		# ^=tp2^tp1
1379 	xr	%r9,%r11		# tp2^tp1^tp8
1380 	xr	%r8,%r10		# ^=tp4^tp1^tp8
1381 	rll	%r9,%r9,8
1382 	rll	%r10,%r10,16
1383 	xr	%r8,%r9		# ^= ROTATE(tp8^tp2^tp1,24)
1384 	rll	%r11,%r11,24
1385 	xr	%r8,%r10    	# ^= ROTATE(tp8^tp4^tp1,16)
1386 	xr	%r8,%r11		# ^= ROTATE(tp8,8)
1387 
1388 	st	%r8,16(%r4)
1389 	la	%r4,4(%r4)
1390 	brct	%r13,.Lmix
1391 
1392 	lm	%r6,%r13,6*4(%r15)# as was saved by AES_set_encrypt_key!
1393 	lghi	%r2,0
1394 	br	%r14
1395 .size	AES_set_decrypt_key,.-AES_set_decrypt_key
1396 .globl	AES_cbc_encrypt
1397 .type	AES_cbc_encrypt,@function
1398 .align	16
1399 AES_cbc_encrypt:
1400 	xgr	%r3,%r4		# flip %r3 and %r4, out and len
1401 	xgr	%r4,%r3
1402 	xgr	%r3,%r4
1403 	lhi	%r0,16
1404 	cl	%r0,240(%r5)
1405 	jh	.Lcbc_software
1406 
1407 	lg	%r0,0(%r6)	# copy ivec
1408 	lg	%r1,8(%r6)
1409 	stmg	%r0,%r1,16(%r15)
1410 	lmg	%r0,%r1,0(%r5)	# copy key, cover 256 bit
1411 	stmg	%r0,%r1,32(%r15)
1412 	lmg	%r0,%r1,16(%r5)
1413 	stmg	%r0,%r1,48(%r15)
1414 	l	%r0,240(%r5)	# load kmc code
1415 	lghi	%r5,15		# res=len%16, len-=res;
1416 	ngr	%r5,%r3
1417 	slr	%r3,%r5
1418 	la	%r1,16(%r15)	# parameter block - ivec || key
1419 	jz	.Lkmc_truncated
1420 	.long	0xb92f0042	# kmc %r4,%r2
1421 	brc	1,.-4		# pay attention to "partial completion"
1422 	ltr	%r5,%r5
1423 	jnz	.Lkmc_truncated
1424 .Lkmc_done:
1425 	lmg	%r0,%r1,16(%r15)	# copy ivec to caller
1426 	stg	%r0,0(%r6)
1427 	stg	%r1,8(%r6)
1428 	br	%r14
1429 .align	16
1430 .Lkmc_truncated:
1431 	ahi	%r5,-1		# it's the way it's encoded in mvc
1432 	tmll	%r0,S390X_DECRYPT
1433 	jnz	.Lkmc_truncated_dec
1434 	lghi	%r1,0
1435 	stg	%r1,16*4(%r15)
1436 	stg	%r1,16*4+8(%r15)
1437 	bras	%r1,1f
1438 	mvc	16*4(1,%r15),0(%r2)
1439 1:	ex	%r5,0(%r1)
1440 	la	%r1,16(%r15)	# restore parameter block
1441 	la	%r2,16*4(%r15)
1442 	lghi	%r3,16
1443 	.long	0xb92f0042	# kmc %r4,%r2
1444 	j	.Lkmc_done
1445 .align	16
1446 .Lkmc_truncated_dec:
1447 	st	%r4,4*4(%r15)
1448 	la	%r4,16*4(%r15)
1449 	lghi	%r3,16
1450 	.long	0xb92f0042	# kmc %r4,%r2
1451 	l	%r4,4*4(%r15)
1452 	bras	%r1,2f
1453 	mvc	0(1,%r4),16*4(%r15)
1454 2:	ex	%r5,0(%r1)
1455 	j	.Lkmc_done
1456 .align	16
1457 .Lcbc_software:
1458 	stm	%r5,%r14,5*4(%r15)
1459 	lhi	%r0,0
1460 	cl	%r0,96(%r15)
1461 	je	.Lcbc_decrypt
1462 
1463 	larl	%r12,AES_Te
1464 
1465 	llgf	%r8,0(%r6)
1466 	llgf	%r9,4(%r6)
1467 	llgf	%r10,8(%r6)
1468 	llgf	%r11,12(%r6)
1469 
1470 	lghi	%r0,16
1471 	slr	%r3,%r0
1472 	brc	4,.Lcbc_enc_tail	# if borrow
1473 .Lcbc_enc_loop:
1474 	stm	%r2,%r4,2*4(%r15)
1475 	x	%r8,0(%r2)
1476 	x	%r9,4(%r2)
1477 	x	%r10,8(%r2)
1478 	x	%r11,12(%r2)
1479 	lgr	%r4,%r5
1480 
1481 	bras	%r14,_s390x_AES_encrypt
1482 
1483 	lm	%r2,%r5,2*4(%r15)
1484 	st	%r8,0(%r4)
1485 	st	%r9,4(%r4)
1486 	st	%r10,8(%r4)
1487 	st	%r11,12(%r4)
1488 
1489 	la	%r2,16(%r2)
1490 	la	%r4,16(%r4)
1491 	lghi	%r0,16
1492 	ltr	%r3,%r3
1493 	jz	.Lcbc_enc_done
1494 	slr	%r3,%r0
1495 	brc	4,.Lcbc_enc_tail	# if borrow
1496 	j	.Lcbc_enc_loop
1497 .align	16
1498 .Lcbc_enc_done:
1499 	l	%r6,6*4(%r15)
1500 	st	%r8,0(%r6)
1501 	st	%r9,4(%r6)
1502 	st	%r10,8(%r6)
1503 	st	%r11,12(%r6)
1504 
1505 	lm	%r7,%r14,7*4(%r15)
1506 	br	%r14
1507 
1508 .align	16
1509 .Lcbc_enc_tail:
1510 	aghi	%r3,15
1511 	lghi	%r0,0
1512 	stg	%r0,16*4(%r15)
1513 	stg	%r0,16*4+8(%r15)
1514 	bras	%r1,3f
1515 	mvc	16*4(1,%r15),0(%r2)
1516 3:	ex	%r3,0(%r1)
1517 	lghi	%r3,0
1518 	la	%r2,16*4(%r15)
1519 	j	.Lcbc_enc_loop
1520 
1521 .align	16
1522 .Lcbc_decrypt:
1523 	larl	%r12,AES_Td
1524 
1525 	lg	%r0,0(%r6)
1526 	lg	%r1,8(%r6)
1527 	stmg	%r0,%r1,16*4(%r15)
1528 
1529 .Lcbc_dec_loop:
1530 	stm	%r2,%r4,2*4(%r15)
1531 	llgf	%r8,0(%r2)
1532 	llgf	%r9,4(%r2)
1533 	llgf	%r10,8(%r2)
1534 	llgf	%r11,12(%r2)
1535 	lgr	%r4,%r5
1536 
1537 	bras	%r14,_s390x_AES_decrypt
1538 
1539 	lm	%r2,%r5,2*4(%r15)
1540 	sllg	%r8,%r8,32
1541 	sllg	%r10,%r10,32
1542 	lr	%r8,%r9
1543 	lr	%r10,%r11
1544 
1545 	lg	%r0,0(%r2)
1546 	lg	%r1,8(%r2)
1547 	xg	%r8,16*4(%r15)
1548 	xg	%r10,16*4+8(%r15)
1549 	lghi	%r9,16
1550 	slr	%r3,%r9
1551 	brc	4,.Lcbc_dec_tail	# if borrow
1552 	brc	2,.Lcbc_dec_done	# if zero
1553 	stg	%r8,0(%r4)
1554 	stg	%r10,8(%r4)
1555 	stmg	%r0,%r1,16*4(%r15)
1556 
1557 	la	%r2,16(%r2)
1558 	la	%r4,16(%r4)
1559 	j	.Lcbc_dec_loop
1560 
1561 .Lcbc_dec_done:
1562 	stg	%r8,0(%r4)
1563 	stg	%r10,8(%r4)
1564 .Lcbc_dec_exit:
1565 	lm	%r6,%r14,6*4(%r15)
1566 	stmg	%r0,%r1,0(%r6)
1567 
1568 	br	%r14
1569 
1570 .align	16
1571 .Lcbc_dec_tail:
1572 	aghi	%r3,15
1573 	stg	%r8,16*4(%r15)
1574 	stg	%r10,16*4+8(%r15)
1575 	bras	%r9,4f
1576 	mvc	0(1,%r4),16*4(%r15)
1577 4:	ex	%r3,0(%r9)
1578 	j	.Lcbc_dec_exit
1579 .size	AES_cbc_encrypt,.-AES_cbc_encrypt
1580 .globl	AES_ctr32_encrypt
1581 .type	AES_ctr32_encrypt,@function
1582 .align	16
1583 AES_ctr32_encrypt:
1584 	xgr	%r3,%r4		# flip %r3 and %r4, %r4 and %r3
1585 	xgr	%r4,%r3
1586 	xgr	%r3,%r4
1587 	llgfr	%r3,%r3	# safe in ctr32 subroutine even in 64-bit case
1588 	l	%r0,240(%r5)
1589 	lhi	%r1,16
1590 	clr	%r0,%r1
1591 	jl	.Lctr32_software
1592 
1593 	st	%r10,10*4(%r15)
1594 	st	%r11,11*4(%r15)
1595 
1596 	clr	%r3,%r1		# does work even in 64-bit mode
1597 	jle	.Lctr32_nokma		# kma is slower for <= 16 blocks
1598 
1599 	larl	%r1,OPENSSL_s390xcap_P
1600 	lr	%r10,%r0
1601 	llihh	%r11,0x8000
1602 	srlg	%r11,%r11,0(%r10)
1603 	ng	%r11,S390X_KMA(%r1)		# check kma capability vector
1604 	jz	.Lctr32_nokma
1605 
1606 	lhi	%r1,-96-112
1607 	lr	%r11,%r15
1608 	la	%r15,0(%r1,%r15)			# prepare parameter block
1609 
1610 	lhi	%r1,0x0600
1611 	sllg	%r3,%r3,4
1612 	or	%r0,%r1				# set HS and LAAD flags
1613 
1614 	st	%r11,0(%r15)			# backchain
1615 	la	%r1,96(%r15)
1616 
1617 	lmg	%r10,%r11,0(%r5)			# copy key
1618 	stg	%r10,96+80(%r15)
1619 	stg	%r11,96+88(%r15)
1620 	lmg	%r10,%r11,16(%r5)
1621 	stg	%r10,96+96(%r15)
1622 	stg	%r11,96+104(%r15)
1623 
1624 	lmg	%r10,%r11,0(%r6)			# copy iv
1625 	stg	%r10,96+64(%r15)
1626 	ahi	%r11,-1				# kma requires counter-1
1627 	stg	%r11,96+72(%r15)
1628 	st	%r11,96+12(%r15)		# copy counter
1629 
1630 	lghi	%r10,0				# no AAD
1631 	lghi	%r11,0
1632 
1633 	.long	0xb929a042	# kma %r4,%r10,%r2
1634 	brc	1,.-4		# pay attention to "partial completion"
1635 
1636 	stg	%r0,96+80(%r15)		# wipe key
1637 	stg	%r0,96+88(%r15)
1638 	stg	%r0,96+96(%r15)
1639 	stg	%r0,96+104(%r15)
1640 	la	%r15,96+112(%r15)
1641 
1642 	lm	%r10,%r11,10*4(%r15)
1643 	br	%r14
1644 
1645 .align	16
1646 .Lctr32_nokma:
1647 	stm	%r6,%r9,6*4(%r15)
1648 
1649 	slgr	%r4,%r2
1650 	la	%r1,0(%r5)	# %r1 is permanent copy of %r5
1651 	lg	%r5,0(%r6)	# load ivec
1652 	lg	%r6,8(%r6)
1653 
1654 	# prepare and allocate stack frame at the top of 4K page
1655 	# with 1K reserved for eventual signal handling
1656 	lghi	%r8,-1024-256-16# guarantee at least 256-bytes buffer
1657 	lghi	%r9,-4096
1658 	algr	%r8,%r15
1659 	lgr	%r7,%r15
1660 	ngr	%r8,%r9		# align at page boundary
1661 	slgr	%r7,%r8		# total buffer size
1662 	lgr	%r10,%r15
1663 	lghi	%r9,1024+16	# sl[g]fi is extended-immediate facility
1664 	slgr	%r7,%r9		# deduct reservation to get usable buffer size
1665 	# buffer size is at lest 256 and at most 3072+256-16
1666 
1667 	la	%r15,1024(%r8)	# alloca
1668 	srlg	%r7,%r7,4	# convert bytes to blocks, minimum 16
1669 	st	%r10,0(%r15)	# back-chain
1670 	st	%r7,4(%r15)
1671 
1672 	slgr	%r3,%r7
1673 	brc	1,.Lctr32_hw_switch	# not zero, no borrow
1674 	algr	%r7,%r3	# input is shorter than allocated buffer
1675 	lghi	%r3,0
1676 	st	%r7,4(%r15)
1677 
1678 .Lctr32_hw_switch:
1679 .Lctr32_km_loop:
1680 	la	%r10,16(%r15)
1681 	lgr	%r11,%r7
1682 .Lctr32_km_prepare:
1683 	stg	%r5,0(%r10)
1684 	stg	%r6,8(%r10)
1685 	la	%r10,16(%r10)
1686 	ahi	%r6,1		# 32-bit increment, preserves upper half
1687 	brct	%r11,.Lctr32_km_prepare
1688 
1689 	la	%r8,16(%r15)	# inp
1690 	sllg	%r9,%r7,4	# len
1691 	la	%r10,16(%r15)	# out
1692 	.long	0xb92e00a8	# km %r10,%r8
1693 	brc	1,.-4		# pay attention to "partial completion"
1694 
1695 	la	%r10,16(%r15)
1696 	lgr	%r11,%r7
1697 	slgr	%r10,%r2
1698 .Lctr32_km_xor:
1699 	lg	%r8,0(%r2)
1700 	lg	%r9,8(%r2)
1701 	xg	%r8,0(%r10,%r2)
1702 	xg	%r9,8(%r10,%r2)
1703 	stg	%r8,0(%r4,%r2)
1704 	stg	%r9,8(%r4,%r2)
1705 	la	%r2,16(%r2)
1706 	brct	%r11,.Lctr32_km_xor
1707 
1708 	slgr	%r3,%r7
1709 	brc	1,.Lctr32_km_loop	# not zero, no borrow
1710 	algr	%r7,%r3
1711 	lghi	%r3,0
1712 	brc	4+1,.Lctr32_km_loop	# not zero
1713 
1714 	l	%r8,0(%r15)
1715 	l	%r9,4(%r15)
1716 	la	%r10,16(%r15)
1717 .Lctr32_km_zap:
1718 	stg	%r8,0(%r10)
1719 	stg	%r8,8(%r10)
1720 	la	%r10,16(%r10)
1721 	brct	%r9,.Lctr32_km_zap
1722 
1723 	la	%r15,0(%r8)
1724 	lm	%r6,%r11,6*4(%r15)
1725 	br	%r14
1726 .align	16
1727 .Lctr32_software:
1728 	stm	%r5,%r14,5*4(%r15)
1729 	slr	%r2,%r4
1730 	larl	%r12,AES_Te
1731 	llgf	%r1,12(%r6)
1732 
1733 .Lctr32_loop:
1734 	stm	%r2,%r4,2*4(%r15)
1735 	llgf	%r8,0(%r6)
1736 	llgf	%r9,4(%r6)
1737 	llgf	%r10,8(%r6)
1738 	lgr	%r11,%r1
1739 	st	%r1,16*4(%r15)
1740 	lgr	%r4,%r5
1741 
1742 	bras	%r14,_s390x_AES_encrypt
1743 
1744 	lm	%r2,%r6,2*4(%r15)
1745 	llgf	%r1,16*4(%r15)
1746 	x	%r8,0(%r2,%r4)
1747 	x	%r9,4(%r2,%r4)
1748 	x	%r10,8(%r2,%r4)
1749 	x	%r11,12(%r2,%r4)
1750 	stm	%r8,%r11,0(%r4)
1751 
1752 	la	%r4,16(%r4)
1753 	ahi	%r1,1		# 32-bit increment
1754 	brct	%r3,.Lctr32_loop
1755 
1756 	lm	%r6,%r14,6*4(%r15)
1757 	br	%r14
1758 .size	AES_ctr32_encrypt,.-AES_ctr32_encrypt
1759 .type	_s390x_xts_km,@function
1760 .align	16
1761 _s390x_xts_km:
1762 	llgfr	%r8,%r0			# put aside the function code
1763 	lghi	%r9,0x7f
1764 	nr	%r9,%r0
1765 	larl	%r1,OPENSSL_s390xcap_P
1766 	llihh	%r0,0x8000
1767 	srlg	%r0,%r0,32(%r9)		# check for 32+function code
1768 	ng	%r0,S390X_KM(%r1)	# check km capability vector
1769 	lgr	%r0,%r8			# restore the function code
1770 	la	%r1,0(%r5)		# restore %r5
1771 	jz	.Lxts_km_vanilla
1772 
1773 	lmg	%r6,%r7,80(%r15)	# put aside the tweak value
1774 	algr	%r4,%r2
1775 
1776 	oill	%r0,32			# switch to xts function code
1777 	aghi	%r9,-18			#
1778 	sllg	%r9,%r9,3		# (function code - 18)*8, 0 or 16
1779 	la	%r1,80-16(%r15)
1780 	slgr	%r1,%r9			# parameter block position
1781 	lmg	%r8,%r11,0(%r5)	# load 256 bits of key material,
1782 	stmg	%r8,%r11,0(%r1)		# and copy it to parameter block.
1783 					# yes, it contains junk and overlaps
1784 					# with the tweak in 128-bit case.
1785 					# it's done to avoid conditional
1786 					# branch.
1787 	stmg	%r6,%r7,80(%r15)	# "re-seat" the tweak value
1788 
1789 	.long	0xb92e0042		# km %r4,%r2
1790 	brc	1,.-4			# pay attention to "partial completion"
1791 
1792 	lrvg	%r8,80+0(%r15)	# load the last tweak
1793 	lrvg	%r9,80+8(%r15)
1794 	stmg	%r0,%r3,80-32(%r15)	# wipe copy of the key
1795 
1796 	nill	%r0,0xffdf		# switch back to original function code
1797 	la	%r1,0(%r5)		# restore pointer to %r5
1798 	slgr	%r4,%r2
1799 
1800 	llgc	%r3,2*4-1(%r15)
1801 	nill	%r3,0x0f		# %r3%=16
1802 	br	%r14
1803 
1804 .align	16
1805 .Lxts_km_vanilla:
1806 	# prepare and allocate stack frame at the top of 4K page
1807 	# with 1K reserved for eventual signal handling
1808 	lghi	%r8,-1024-256-16# guarantee at least 256-bytes buffer
1809 	lghi	%r9,-4096
1810 	algr	%r8,%r15
1811 	lgr	%r7,%r15
1812 	ngr	%r8,%r9		# align at page boundary
1813 	slgr	%r7,%r8		# total buffer size
1814 	lgr	%r10,%r15
1815 	lghi	%r9,1024+16	# sl[g]fi is extended-immediate facility
1816 	slgr	%r7,%r9		# deduct reservation to get usable buffer size
1817 	# buffer size is at lest 256 and at most 3072+256-16
1818 
1819 	la	%r15,1024(%r8)	# alloca
1820 	nill	%r7,0xfff0	# round to 16*n
1821 	st	%r10,0(%r15)	# back-chain
1822 	nill	%r3,0xfff0	# redundant
1823 	st	%r7,4(%r15)
1824 
1825 	slgr	%r3,%r7
1826 	brc	1,.Lxts_km_go	# not zero, no borrow
1827 	algr	%r7,%r3	# input is shorter than allocated buffer
1828 	lghi	%r3,0
1829 	st	%r7,4(%r15)
1830 
1831 .Lxts_km_go:
1832 	lrvg	%r8,80+0(%r10)	# load the tweak value in little-endian
1833 	lrvg	%r9,80+8(%r10)
1834 
1835 	la	%r10,16(%r15)		# vector of ascending tweak values
1836 	slgr	%r10,%r2
1837 	srlg	%r11,%r7,4
1838 	j	.Lxts_km_start
1839 
1840 .Lxts_km_loop:
1841 	la	%r10,16(%r15)
1842 	slgr	%r10,%r2
1843 	srlg	%r11,%r7,4
1844 .Lxts_km_prepare:
1845 	lghi	%r5,0x87
1846 	srag	%r6,%r9,63		# broadcast upper bit
1847 	ngr	%r5,%r6			# rem
1848 	algr	%r8,%r8
1849 	alcgr	%r9,%r9
1850 	xgr	%r8,%r5
1851 .Lxts_km_start:
1852 	lrvgr	%r5,%r8			# flip byte order
1853 	lrvgr	%r6,%r9
1854 	stg	%r5,0(%r10,%r2)
1855 	stg	%r6,8(%r10,%r2)
1856 	xg	%r5,0(%r2)
1857 	xg	%r6,8(%r2)
1858 	stg	%r5,0(%r4,%r2)
1859 	stg	%r6,8(%r4,%r2)
1860 	la	%r2,16(%r2)
1861 	brct	%r11,.Lxts_km_prepare
1862 
1863 	slgr	%r2,%r7		# rewind %r2
1864 	la	%r10,0(%r4,%r2)
1865 	lgr	%r11,%r7
1866 	.long	0xb92e00aa		# km %r10,%r10
1867 	brc	1,.-4			# pay attention to "partial completion"
1868 
1869 	la	%r10,16(%r15)
1870 	slgr	%r10,%r2
1871 	srlg	%r11,%r7,4
1872 .Lxts_km_xor:
1873 	lg	%r5,0(%r4,%r2)
1874 	lg	%r6,8(%r4,%r2)
1875 	xg	%r5,0(%r10,%r2)
1876 	xg	%r6,8(%r10,%r2)
1877 	stg	%r5,0(%r4,%r2)
1878 	stg	%r6,8(%r4,%r2)
1879 	la	%r2,16(%r2)
1880 	brct	%r11,.Lxts_km_xor
1881 
1882 	slgr	%r3,%r7
1883 	brc	1,.Lxts_km_loop		# not zero, no borrow
1884 	algr	%r7,%r3
1885 	lghi	%r3,0
1886 	brc	4+1,.Lxts_km_loop	# not zero
1887 
1888 	l	%r5,0(%r15)		# back-chain
1889 	llgf	%r7,4(%r15)	# bytes used
1890 	la	%r6,16(%r15)
1891 	srlg	%r7,%r7,4
1892 .Lxts_km_zap:
1893 	stg	%r5,0(%r6)
1894 	stg	%r5,8(%r6)
1895 	la	%r6,16(%r6)
1896 	brct	%r7,.Lxts_km_zap
1897 
1898 	la	%r15,0(%r5)
1899 	llgc	%r3,2*4-1(%r5)
1900 	nill	%r3,0x0f		# %r3%=16
1901 	bzr	%r14
1902 
1903 	# generate one more tweak...
1904 	lghi	%r5,0x87
1905 	srag	%r6,%r9,63		# broadcast upper bit
1906 	ngr	%r5,%r6			# rem
1907 	algr	%r8,%r8
1908 	alcgr	%r9,%r9
1909 	xgr	%r8,%r5
1910 
1911 	ltr	%r3,%r3		# clear zero flag
1912 	br	%r14
1913 .size	_s390x_xts_km,.-_s390x_xts_km
1914 
1915 .globl	AES_xts_encrypt
1916 .type	AES_xts_encrypt,@function
1917 .align	16
1918 AES_xts_encrypt:
1919 	xgr	%r3,%r4			# flip %r3 and %r4, %r4 and %r3
1920 	xgr	%r4,%r3
1921 	xgr	%r3,%r4
1922 	llgfr	%r3,%r3
1923 	st	%r3,1*4(%r15)	# save copy of %r3
1924 	srag	%r3,%r3,4		# formally wrong, because it expands
1925 					# sign byte, but who can afford asking
1926 					# to process more than 2^63-1 bytes?
1927 					# I use it, because it sets condition
1928 					# code...
1929 	bcr	8,%r14			# abort if zero (i.e. less than 16)
1930 	llgf	%r0,240(%r6)
1931 	lhi	%r1,16
1932 	clr	%r0,%r1
1933 	jl	.Lxts_enc_software
1934 
1935 	st	%r14,5*4(%r15)
1936 	stm	%r6,%r11,6*4(%r15)
1937 
1938 	sllg	%r3,%r3,4		# %r3&=~15
1939 	slgr	%r4,%r2
1940 
1941 	# generate the tweak value
1942 	l	%r11,96(%r15)	# pointer to iv
1943 	la	%r10,80(%r15)
1944 	lmg	%r8,%r9,0(%r11)
1945 	lghi	%r11,16
1946 	stmg	%r8,%r9,0(%r10)
1947 	la	%r1,0(%r6)		# %r6 is not needed anymore
1948 	.long	0xb92e00aa		# km %r10,%r10, generate the tweak
1949 	brc	1,.-4			# can this happen?
1950 
1951 	l	%r0,240(%r5)
1952 	la	%r1,0(%r5)		# %r5 is not needed anymore
1953 	bras	%r14,_s390x_xts_km
1954 	jz	.Lxts_enc_km_done
1955 
1956 	aghi	%r2,-16		# take one step back
1957 	la	%r7,0(%r4,%r2)	# put aside real %r4
1958 .Lxts_enc_km_steal:
1959 	llgc	%r5,16(%r2)
1960 	llgc	%r6,0(%r4,%r2)
1961 	stc	%r5,0(%r4,%r2)
1962 	stc	%r6,16(%r4,%r2)
1963 	la	%r2,1(%r2)
1964 	brct	%r3,.Lxts_enc_km_steal
1965 
1966 	la	%r10,0(%r7)
1967 	lghi	%r11,16
1968 	lrvgr	%r5,%r8			# flip byte order
1969 	lrvgr	%r6,%r9
1970 	xg	%r5,0(%r10)
1971 	xg	%r6,8(%r10)
1972 	stg	%r5,0(%r10)
1973 	stg	%r6,8(%r10)
1974 	.long	0xb92e00aa		# km %r10,%r10
1975 	brc	1,.-4			# can this happen?
1976 	lrvgr	%r5,%r8			# flip byte order
1977 	lrvgr	%r6,%r9
1978 	xg	%r5,0(%r7)
1979 	xg	%r6,8(%r7)
1980 	stg	%r5,0(%r7)
1981 	stg	%r6,8(%r7)
1982 
1983 .Lxts_enc_km_done:
1984 	stg	%r15,80+0(%r15)	# wipe tweak
1985 	stg	%r15,80+8(%r15)
1986 	l	%r14,5*4(%r15)
1987 	lm	%r6,%r11,6*4(%r15)
1988 	br	%r14
1989 .align	16
1990 .Lxts_enc_software:
1991 	stm	%r6,%r14,6*4(%r15)
1992 
1993 	slgr	%r4,%r2
1994 
1995 	l	%r11,96(%r15)	# ivp
1996 	llgf	%r8,0(%r11)		# load iv
1997 	llgf	%r9,4(%r11)
1998 	llgf	%r10,8(%r11)
1999 	llgf	%r11,12(%r11)
2000 	stm	%r2,%r5,2*4(%r15)
2001 	la	%r4,0(%r6)
2002 	larl	%r12,AES_Te
2003 	bras	%r14,_s390x_AES_encrypt	# generate the tweak
2004 	lm	%r2,%r5,2*4(%r15)
2005 	stm	%r8,%r11,80(%r15)	# save the tweak
2006 	j	.Lxts_enc_enter
2007 
2008 .align	16
2009 .Lxts_enc_loop:
2010 	lrvg	%r9,80+0(%r15)	# load the tweak in little-endian
2011 	lrvg	%r11,80+8(%r15)
2012 	lghi	%r1,0x87
2013 	srag	%r0,%r11,63		# broadcast upper bit
2014 	ngr	%r1,%r0			# rem
2015 	algr	%r9,%r9
2016 	alcgr	%r11,%r11
2017 	xgr	%r9,%r1
2018 	lrvgr	%r9,%r9			# flip byte order
2019 	lrvgr	%r11,%r11
2020 	srlg	%r8,%r9,32		# smash the tweak to 4x32-bits
2021 	stg	%r9,80+0(%r15)	# save the tweak
2022 	llgfr	%r9,%r9
2023 	srlg	%r10,%r11,32
2024 	stg	%r11,80+8(%r15)
2025 	llgfr	%r11,%r11
2026 	la	%r2,16(%r2)		# %r2+=16
2027 .Lxts_enc_enter:
2028 	x	%r8,0(%r2)		# ^=*(%r2)
2029 	x	%r9,4(%r2)
2030 	x	%r10,8(%r2)
2031 	x	%r11,12(%r2)
2032 	stm	%r2,%r3,2*4(%r15)	# only two registers are changing
2033 	la	%r4,0(%r5)
2034 	bras	%r14,_s390x_AES_encrypt
2035 	lm	%r2,%r5,2*4(%r15)
2036 	x	%r8,80+0(%r15)	# ^=tweak
2037 	x	%r9,80+4(%r15)
2038 	x	%r10,80+8(%r15)
2039 	x	%r11,80+12(%r15)
2040 	st	%r8,0(%r4,%r2)
2041 	st	%r9,4(%r4,%r2)
2042 	st	%r10,8(%r4,%r2)
2043 	st	%r11,12(%r4,%r2)
2044 	brct	%r3,.Lxts_enc_loop
2045 
2046 	llgc	%r3,7(%r15)
2047 	nill	%r3,0x0f		# %r3%16
2048 	jz	.Lxts_enc_done
2049 
2050 	la	%r7,0(%r2,%r4)	# put aside real %r4
2051 .Lxts_enc_steal:
2052 	llgc	%r0,16(%r2)
2053 	llgc	%r1,0(%r4,%r2)
2054 	stc	%r0,0(%r4,%r2)
2055 	stc	%r1,16(%r4,%r2)
2056 	la	%r2,1(%r2)
2057 	brct	%r3,.Lxts_enc_steal
2058 	la	%r4,0(%r7)		# restore real %r4
2059 
2060 	# generate last tweak...
2061 	lrvg	%r9,80+0(%r15)	# load the tweak in little-endian
2062 	lrvg	%r11,80+8(%r15)
2063 	lghi	%r1,0x87
2064 	srag	%r0,%r11,63		# broadcast upper bit
2065 	ngr	%r1,%r0			# rem
2066 	algr	%r9,%r9
2067 	alcgr	%r11,%r11
2068 	xgr	%r9,%r1
2069 	lrvgr	%r9,%r9			# flip byte order
2070 	lrvgr	%r11,%r11
2071 	srlg	%r8,%r9,32		# smash the tweak to 4x32-bits
2072 	stg	%r9,80+0(%r15)	# save the tweak
2073 	llgfr	%r9,%r9
2074 	srlg	%r10,%r11,32
2075 	stg	%r11,80+8(%r15)
2076 	llgfr	%r11,%r11
2077 
2078 	x	%r8,0(%r4)		# ^=*(inp)|stolen cipther-text
2079 	x	%r9,4(%r4)
2080 	x	%r10,8(%r4)
2081 	x	%r11,12(%r4)
2082 	st	%r4,4*4(%r15)
2083 	la	%r4,0(%r5)
2084 	bras	%r14,_s390x_AES_encrypt
2085 	l	%r4,4*4(%r15)
2086 	x	%r8,80(%r15)	# ^=tweak
2087 	x	%r9,84(%r15)
2088 	x	%r10,88(%r15)
2089 	x	%r11,92(%r15)
2090 	st	%r8,0(%r4)
2091 	st	%r9,4(%r4)
2092 	st	%r10,8(%r4)
2093 	st	%r11,12(%r4)
2094 
2095 .Lxts_enc_done:
2096 	stg	%r15,80+0(%r15)	# wipe tweak
2097 	stg	%r15,80+8(%r15)
2098 	lm	%r6,%r14,6*4(%r15)
2099 	br	%r14
2100 .size	AES_xts_encrypt,.-AES_xts_encrypt
2101 .globl	AES_xts_decrypt
2102 .type	AES_xts_decrypt,@function
2103 .align	16
2104 AES_xts_decrypt:
2105 	xgr	%r3,%r4			# flip %r3 and %r4, %r4 and %r3
2106 	xgr	%r4,%r3
2107 	xgr	%r3,%r4
2108 	llgfr	%r3,%r3
2109 	st	%r3,1*4(%r15)	# save copy of %r3
2110 	aghi	%r3,-16
2111 	bcr	4,%r14			# abort if less than zero. formally
2112 					# wrong, because %r3 is unsigned,
2113 					# but who can afford asking to
2114 					# process more than 2^63-1 bytes?
2115 	tmll	%r3,0x0f
2116 	jnz	.Lxts_dec_proceed
2117 	aghi	%r3,16
2118 .Lxts_dec_proceed:
2119 	llgf	%r0,240(%r6)
2120 	lhi	%r1,16
2121 	clr	%r0,%r1
2122 	jl	.Lxts_dec_software
2123 
2124 	st	%r14,5*4(%r15)
2125 	stm	%r6,%r11,6*4(%r15)
2126 
2127 	nill	%r3,0xfff0		# %r3&=~15
2128 	slgr	%r4,%r2
2129 
2130 	# generate the tweak value
2131 	l	%r11,96(%r15)	# pointer to iv
2132 	la	%r10,80(%r15)
2133 	lmg	%r8,%r9,0(%r11)
2134 	lghi	%r11,16
2135 	stmg	%r8,%r9,0(%r10)
2136 	la	%r1,0(%r6)		# %r6 is not needed past this point
2137 	.long	0xb92e00aa		# km %r10,%r10, generate the tweak
2138 	brc	1,.-4			# can this happen?
2139 
2140 	l	%r0,240(%r5)
2141 	la	%r1,0(%r5)		# %r5 is not needed anymore
2142 
2143 	ltgr	%r3,%r3
2144 	jz	.Lxts_dec_km_short
2145 	bras	%r14,_s390x_xts_km
2146 	jz	.Lxts_dec_km_done
2147 
2148 	lrvgr	%r10,%r8			# make copy in reverse byte order
2149 	lrvgr	%r11,%r9
2150 	j	.Lxts_dec_km_2ndtweak
2151 
2152 .Lxts_dec_km_short:
2153 	llgc	%r3,7(%r15)
2154 	nill	%r3,0x0f		# %r3%=16
2155 	lrvg	%r8,80+0(%r15)	# load the tweak
2156 	lrvg	%r9,80+8(%r15)
2157 	lrvgr	%r10,%r8			# make copy in reverse byte order
2158 	lrvgr	%r11,%r9
2159 
2160 .Lxts_dec_km_2ndtweak:
2161 	lghi	%r5,0x87
2162 	srag	%r6,%r9,63		# broadcast upper bit
2163 	ngr	%r5,%r6			# rem
2164 	algr	%r8,%r8
2165 	alcgr	%r9,%r9
2166 	xgr	%r8,%r5
2167 	lrvgr	%r5,%r8			# flip byte order
2168 	lrvgr	%r6,%r9
2169 
2170 	xg	%r5,0(%r2)
2171 	xg	%r6,8(%r2)
2172 	stg	%r5,0(%r4,%r2)
2173 	stg	%r6,8(%r4,%r2)
2174 	la	%r6,0(%r4,%r2)
2175 	lghi	%r7,16
2176 	.long	0xb92e0066		# km %r6,%r6
2177 	brc	1,.-4			# can this happen?
2178 	lrvgr	%r5,%r8
2179 	lrvgr	%r6,%r9
2180 	xg	%r5,0(%r4,%r2)
2181 	xg	%r6,8(%r4,%r2)
2182 	stg	%r5,0(%r4,%r2)
2183 	stg	%r6,8(%r4,%r2)
2184 
2185 	la	%r7,0(%r4,%r2)	# put aside real %r4
2186 .Lxts_dec_km_steal:
2187 	llgc	%r5,16(%r2)
2188 	llgc	%r6,0(%r4,%r2)
2189 	stc	%r5,0(%r4,%r2)
2190 	stc	%r6,16(%r4,%r2)
2191 	la	%r2,1(%r2)
2192 	brct	%r3,.Lxts_dec_km_steal
2193 
2194 	lgr	%r8,%r10
2195 	lgr	%r9,%r11
2196 	xg	%r8,0(%r7)
2197 	xg	%r9,8(%r7)
2198 	stg	%r8,0(%r7)
2199 	stg	%r9,8(%r7)
2200 	la	%r8,0(%r7)
2201 	lghi	%r9,16
2202 	.long	0xb92e0088		# km %r8,%r8
2203 	brc	1,.-4			# can this happen?
2204 	xg	%r10,0(%r7)
2205 	xg	%r11,8(%r7)
2206 	stg	%r10,0(%r7)
2207 	stg	%r11,8(%r7)
2208 .Lxts_dec_km_done:
2209 	stg	%r15,80+0(%r15)	# wipe tweak
2210 	stg	%r15,80+8(%r15)
2211 	l	%r14,5*4(%r15)
2212 	lm	%r6,%r11,6*4(%r15)
2213 	br	%r14
2214 .align	16
2215 .Lxts_dec_software:
2216 	stm	%r6,%r14,6*4(%r15)
2217 
2218 	srlg	%r3,%r3,4
2219 	slgr	%r4,%r2
2220 
2221 	l	%r11,96(%r15)	# ivp
2222 	llgf	%r8,0(%r11)		# load iv
2223 	llgf	%r9,4(%r11)
2224 	llgf	%r10,8(%r11)
2225 	llgf	%r11,12(%r11)
2226 	stm	%r2,%r5,2*4(%r15)
2227 	la	%r4,0(%r6)
2228 	larl	%r12,AES_Te
2229 	bras	%r14,_s390x_AES_encrypt	# generate the tweak
2230 	lm	%r2,%r5,2*4(%r15)
2231 	larl	%r12,AES_Td
2232 	ltr	%r3,%r3
2233 	stm	%r8,%r11,80(%r15)	# save the tweak
2234 	jz	.Lxts_dec_short
2235 	j	.Lxts_dec_enter
2236 
2237 .align	16
2238 .Lxts_dec_loop:
2239 	lrvg	%r9,80+0(%r15)	# load the tweak in little-endian
2240 	lrvg	%r11,80+8(%r15)
2241 	lghi	%r1,0x87
2242 	srag	%r0,%r11,63		# broadcast upper bit
2243 	ngr	%r1,%r0			# rem
2244 	algr	%r9,%r9
2245 	alcgr	%r11,%r11
2246 	xgr	%r9,%r1
2247 	lrvgr	%r9,%r9			# flip byte order
2248 	lrvgr	%r11,%r11
2249 	srlg	%r8,%r9,32		# smash the tweak to 4x32-bits
2250 	stg	%r9,80+0(%r15)	# save the tweak
2251 	llgfr	%r9,%r9
2252 	srlg	%r10,%r11,32
2253 	stg	%r11,80+8(%r15)
2254 	llgfr	%r11,%r11
2255 .Lxts_dec_enter:
2256 	x	%r8,0(%r2)		# tweak^=*(inp)
2257 	x	%r9,4(%r2)
2258 	x	%r10,8(%r2)
2259 	x	%r11,12(%r2)
2260 	stm	%r2,%r3,2*4(%r15)	# only two registers are changing
2261 	la	%r4,0(%r5)
2262 	bras	%r14,_s390x_AES_decrypt
2263 	lm	%r2,%r5,2*4(%r15)
2264 	x	%r8,80+0(%r15)	# ^=tweak
2265 	x	%r9,80+4(%r15)
2266 	x	%r10,80+8(%r15)
2267 	x	%r11,80+12(%r15)
2268 	st	%r8,0(%r4,%r2)
2269 	st	%r9,4(%r4,%r2)
2270 	st	%r10,8(%r4,%r2)
2271 	st	%r11,12(%r4,%r2)
2272 	la	%r2,16(%r2)
2273 	brct	%r3,.Lxts_dec_loop
2274 
2275 	llgc	%r3,7(%r15)
2276 	nill	%r3,0x0f		# %r3%16
2277 	jz	.Lxts_dec_done
2278 
2279 	# generate pair of tweaks...
2280 	lrvg	%r9,80+0(%r15)	# load the tweak in little-endian
2281 	lrvg	%r11,80+8(%r15)
2282 	lghi	%r1,0x87
2283 	srag	%r0,%r11,63		# broadcast upper bit
2284 	ngr	%r1,%r0			# rem
2285 	algr	%r9,%r9
2286 	alcgr	%r11,%r11
2287 	xgr	%r9,%r1
2288 	lrvgr	%r6,%r9			# flip byte order
2289 	lrvgr	%r7,%r11
2290 	stmg	%r6,%r7,80(%r15)	# save the 1st tweak
2291 	j	.Lxts_dec_2ndtweak
2292 
2293 .align	16
2294 .Lxts_dec_short:
2295 	llgc	%r3,7(%r15)
2296 	nill	%r3,0x0f		# %r3%16
2297 	lrvg	%r9,80+0(%r15)	# load the tweak in little-endian
2298 	lrvg	%r11,80+8(%r15)
2299 .Lxts_dec_2ndtweak:
2300 	lghi	%r1,0x87
2301 	srag	%r0,%r11,63		# broadcast upper bit
2302 	ngr	%r1,%r0			# rem
2303 	algr	%r9,%r9
2304 	alcgr	%r11,%r11
2305 	xgr	%r9,%r1
2306 	lrvgr	%r9,%r9			# flip byte order
2307 	lrvgr	%r11,%r11
2308 	srlg	%r8,%r9,32		# smash the tweak to 4x32-bits
2309 	stg	%r9,80-16+0(%r15)	# save the 2nd tweak
2310 	llgfr	%r9,%r9
2311 	srlg	%r10,%r11,32
2312 	stg	%r11,80-16+8(%r15)
2313 	llgfr	%r11,%r11
2314 
2315 	x	%r8,0(%r2)		# tweak_the_2nd^=*(inp)
2316 	x	%r9,4(%r2)
2317 	x	%r10,8(%r2)
2318 	x	%r11,12(%r2)
2319 	stm	%r2,%r3,2*4(%r15)
2320 	la	%r4,0(%r5)
2321 	bras	%r14,_s390x_AES_decrypt
2322 	lm	%r2,%r5,2*4(%r15)
2323 	x	%r8,80-16+0(%r15)	# ^=tweak_the_2nd
2324 	x	%r9,80-16+4(%r15)
2325 	x	%r10,80-16+8(%r15)
2326 	x	%r11,80-16+12(%r15)
2327 	st	%r8,0(%r4,%r2)
2328 	st	%r9,4(%r4,%r2)
2329 	st	%r10,8(%r4,%r2)
2330 	st	%r11,12(%r4,%r2)
2331 
2332 	la	%r7,0(%r4,%r2)	# put aside real %r4
2333 .Lxts_dec_steal:
2334 	llgc	%r0,16(%r2)
2335 	llgc	%r1,0(%r4,%r2)
2336 	stc	%r0,0(%r4,%r2)
2337 	stc	%r1,16(%r4,%r2)
2338 	la	%r2,1(%r2)
2339 	brct	%r3,.Lxts_dec_steal
2340 	la	%r4,0(%r7)		# restore real %r4
2341 
2342 	lm	%r8,%r11,80(%r15)	# load the 1st tweak
2343 	x	%r8,0(%r4)		# tweak^=*(inp)|stolen cipher-text
2344 	x	%r9,4(%r4)
2345 	x	%r10,8(%r4)
2346 	x	%r11,12(%r4)
2347 	st	%r4,4*4(%r15)
2348 	la	%r4,0(%r5)
2349 	bras	%r14,_s390x_AES_decrypt
2350 	l	%r4,4*4(%r15)
2351 	x	%r8,80+0(%r15)	# ^=tweak
2352 	x	%r9,80+4(%r15)
2353 	x	%r10,80+8(%r15)
2354 	x	%r11,80+12(%r15)
2355 	st	%r8,0(%r4)
2356 	st	%r9,4(%r4)
2357 	st	%r10,8(%r4)
2358 	st	%r11,12(%r4)
2359 	stg	%r15,80-16+0(%r15)	# wipe 2nd tweak
2360 	stg	%r15,80-16+8(%r15)
2361 .Lxts_dec_done:
2362 	stg	%r15,80+0(%r15)	# wipe tweak
2363 	stg	%r15,80+8(%r15)
2364 	lm	%r6,%r14,6*4(%r15)
2365 	br	%r14
2366 .size	AES_xts_decrypt,.-AES_xts_decrypt
2367 .string	"AES for s390x, CRYPTOGAMS by <appro@openssl.org>"
2368