1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2  #
3  # Accelerated AES-GCM stitched implementation for ppc64le.
4  #
5  # Copyright 2022- IBM Inc. All rights reserved
6  #
7  #===================================================================================
8  # Written by Danny Tsen <dtsen@linux.ibm.com>
9  #
10  # GHASH is based on the Karatsuba multiplication method.
11  #
12  #    Xi xor X1
13  #
14  #    X1 * H^4 + X2 * H^3 + x3 * H^2 + X4 * H =
15  #      (X1.h * H4.h + xX.l * H4.l + X1 * H4) +
16  #      (X2.h * H3.h + X2.l * H3.l + X2 * H3) +
17  #      (X3.h * H2.h + X3.l * H2.l + X3 * H2) +
18  #      (X4.h * H.h + X4.l * H.l + X4 * H)
19  #
20  # Xi = v0
21  # H Poly = v2
22  # Hash keys = v3 - v14
23  #     ( H.l, H, H.h)
24  #     ( H^2.l, H^2, H^2.h)
25  #     ( H^3.l, H^3, H^3.h)
26  #     ( H^4.l, H^4, H^4.h)
27  #
28  # v30 is IV
29  # v31 - counter 1
30  #
31  # AES used,
32  #     vs0 - vs14 for round keys
33  #     v15, v16, v17, v18, v19, v20, v21, v22 for 8 blocks (encrypted)
34  #
35  # This implementation uses stitched AES-GCM approach to improve overall performance.
36  # AES is implemented with 8x blocks and GHASH is using 2 4x blocks.
37  #
38  # ===================================================================================
39  #
40 
41 #include <asm/ppc_asm.h>
42 #include <linux/linkage.h>
43 
44 .machine        "any"
45 .text
46 
47  # 4x loops
48  # v15 - v18 - input states
49  # vs1 - vs9 - round keys
50  #
51 .macro Loop_aes_middle4x
52 	xxlor	19+32, 1, 1
53 	xxlor	20+32, 2, 2
54 	xxlor	21+32, 3, 3
55 	xxlor	22+32, 4, 4
56 
57 	vcipher	15, 15, 19
58 	vcipher	16, 16, 19
59 	vcipher	17, 17, 19
60 	vcipher	18, 18, 19
61 
62 	vcipher	15, 15, 20
63 	vcipher	16, 16, 20
64 	vcipher	17, 17, 20
65 	vcipher	18, 18, 20
66 
67 	vcipher	15, 15, 21
68 	vcipher	16, 16, 21
69 	vcipher	17, 17, 21
70 	vcipher	18, 18, 21
71 
72 	vcipher	15, 15, 22
73 	vcipher	16, 16, 22
74 	vcipher	17, 17, 22
75 	vcipher	18, 18, 22
76 
77 	xxlor	19+32, 5, 5
78 	xxlor	20+32, 6, 6
79 	xxlor	21+32, 7, 7
80 	xxlor	22+32, 8, 8
81 
82 	vcipher	15, 15, 19
83 	vcipher	16, 16, 19
84 	vcipher	17, 17, 19
85 	vcipher	18, 18, 19
86 
87 	vcipher	15, 15, 20
88 	vcipher	16, 16, 20
89 	vcipher	17, 17, 20
90 	vcipher	18, 18, 20
91 
92 	vcipher	15, 15, 21
93 	vcipher	16, 16, 21
94 	vcipher	17, 17, 21
95 	vcipher	18, 18, 21
96 
97 	vcipher	15, 15, 22
98 	vcipher	16, 16, 22
99 	vcipher	17, 17, 22
100 	vcipher	18, 18, 22
101 
102 	xxlor	23+32, 9, 9
103 	vcipher	15, 15, 23
104 	vcipher	16, 16, 23
105 	vcipher	17, 17, 23
106 	vcipher	18, 18, 23
107 .endm
108 
109  # 8x loops
110  # v15 - v22 - input states
111  # vs1 - vs9 - round keys
112  #
113 .macro Loop_aes_middle8x
114 	xxlor	23+32, 1, 1
115 	xxlor	24+32, 2, 2
116 	xxlor	25+32, 3, 3
117 	xxlor	26+32, 4, 4
118 
119 	vcipher	15, 15, 23
120 	vcipher	16, 16, 23
121 	vcipher	17, 17, 23
122 	vcipher	18, 18, 23
123 	vcipher	19, 19, 23
124 	vcipher	20, 20, 23
125 	vcipher	21, 21, 23
126 	vcipher	22, 22, 23
127 
128 	vcipher	15, 15, 24
129 	vcipher	16, 16, 24
130 	vcipher	17, 17, 24
131 	vcipher	18, 18, 24
132 	vcipher	19, 19, 24
133 	vcipher	20, 20, 24
134 	vcipher	21, 21, 24
135 	vcipher	22, 22, 24
136 
137 	vcipher	15, 15, 25
138 	vcipher	16, 16, 25
139 	vcipher	17, 17, 25
140 	vcipher	18, 18, 25
141 	vcipher	19, 19, 25
142 	vcipher	20, 20, 25
143 	vcipher	21, 21, 25
144 	vcipher	22, 22, 25
145 
146 	vcipher	15, 15, 26
147 	vcipher	16, 16, 26
148 	vcipher	17, 17, 26
149 	vcipher	18, 18, 26
150 	vcipher	19, 19, 26
151 	vcipher	20, 20, 26
152 	vcipher	21, 21, 26
153 	vcipher	22, 22, 26
154 
155 	xxlor	23+32, 5, 5
156 	xxlor	24+32, 6, 6
157 	xxlor	25+32, 7, 7
158 	xxlor	26+32, 8, 8
159 
160 	vcipher	15, 15, 23
161 	vcipher	16, 16, 23
162 	vcipher	17, 17, 23
163 	vcipher	18, 18, 23
164 	vcipher	19, 19, 23
165 	vcipher	20, 20, 23
166 	vcipher	21, 21, 23
167 	vcipher	22, 22, 23
168 
169 	vcipher	15, 15, 24
170 	vcipher	16, 16, 24
171 	vcipher	17, 17, 24
172 	vcipher	18, 18, 24
173 	vcipher	19, 19, 24
174 	vcipher	20, 20, 24
175 	vcipher	21, 21, 24
176 	vcipher	22, 22, 24
177 
178 	vcipher	15, 15, 25
179 	vcipher	16, 16, 25
180 	vcipher	17, 17, 25
181 	vcipher	18, 18, 25
182 	vcipher	19, 19, 25
183 	vcipher	20, 20, 25
184 	vcipher	21, 21, 25
185 	vcipher	22, 22, 25
186 
187 	vcipher	15, 15, 26
188 	vcipher	16, 16, 26
189 	vcipher	17, 17, 26
190 	vcipher	18, 18, 26
191 	vcipher	19, 19, 26
192 	vcipher	20, 20, 26
193 	vcipher	21, 21, 26
194 	vcipher	22, 22, 26
195 
196 	xxlor	23+32, 9, 9
197 	vcipher	15, 15, 23
198 	vcipher	16, 16, 23
199 	vcipher	17, 17, 23
200 	vcipher	18, 18, 23
201 	vcipher	19, 19, 23
202 	vcipher	20, 20, 23
203 	vcipher	21, 21, 23
204 	vcipher	22, 22, 23
205 .endm
206 
207 .macro Loop_aes_middle_1x
208 	xxlor	19+32, 1, 1
209 	xxlor	20+32, 2, 2
210 	xxlor	21+32, 3, 3
211 	xxlor	22+32, 4, 4
212 
213 	vcipher 15, 15, 19
214 	vcipher 15, 15, 20
215 	vcipher 15, 15, 21
216 	vcipher 15, 15, 22
217 
218 	xxlor	19+32, 5, 5
219 	xxlor	20+32, 6, 6
220 	xxlor	21+32, 7, 7
221 	xxlor	22+32, 8, 8
222 
223 	vcipher 15, 15, 19
224 	vcipher 15, 15, 20
225 	vcipher 15, 15, 21
226 	vcipher 15, 15, 22
227 
228 	xxlor	19+32, 9, 9
229 	vcipher 15, 15, 19
230 .endm
231 
232  #
233  # Compute 4x hash values based on Karatsuba method.
234  #
235 .macro ppc_aes_gcm_ghash
236 	vxor		15, 15, 0
237 
238 	vpmsumd		23, 12, 15		# H4.L * X.L
239 	vpmsumd		24, 9, 16
240 	vpmsumd		25, 6, 17
241 	vpmsumd		26, 3, 18
242 
243 	vxor		23, 23, 24
244 	vxor		23, 23, 25
245 	vxor		23, 23, 26		# L
246 
247 	vpmsumd		24, 13, 15		# H4.L * X.H + H4.H * X.L
248 	vpmsumd		25, 10, 16		# H3.L * X1.H + H3.H * X1.L
249 	vpmsumd		26, 7, 17
250 	vpmsumd		27, 4, 18
251 
252 	vxor		24, 24, 25
253 	vxor		24, 24, 26
254 	vxor		24, 24, 27		# M
255 
256 	# sum hash and reduction with H Poly
257 	vpmsumd		28, 23, 2		# reduction
258 
259 	vxor		29, 29, 29
260 	vsldoi		26, 24, 29, 8		# mL
261 	vsldoi		29, 29, 24, 8		# mH
262 	vxor		23, 23, 26		# mL + L
263 
264 	vsldoi		23, 23, 23, 8		# swap
265 	vxor		23, 23, 28
266 
267 	vpmsumd		24, 14, 15		# H4.H * X.H
268 	vpmsumd		25, 11, 16
269 	vpmsumd		26, 8, 17
270 	vpmsumd		27, 5, 18
271 
272 	vxor		24, 24, 25
273 	vxor		24, 24, 26
274 	vxor		24, 24, 27
275 
276 	vxor		24, 24, 29
277 
278 	# sum hash and reduction with H Poly
279 	vsldoi		27, 23, 23, 8		# swap
280 	vpmsumd		23, 23, 2
281 	vxor		27, 27, 24
282 	vxor		23, 23, 27
283 
284 	xxlor		32, 23+32, 23+32		# update hash
285 
286 .endm
287 
288  #
289  # Combine two 4x ghash
290  # v15 - v22 - input blocks
291  #
292 .macro ppc_aes_gcm_ghash2_4x
293 	# first 4x hash
294 	vxor		15, 15, 0		# Xi + X
295 
296 	vpmsumd		23, 12, 15		# H4.L * X.L
297 	vpmsumd		24, 9, 16
298 	vpmsumd		25, 6, 17
299 	vpmsumd		26, 3, 18
300 
301 	vxor		23, 23, 24
302 	vxor		23, 23, 25
303 	vxor		23, 23, 26		# L
304 
305 	vpmsumd		24, 13, 15		# H4.L * X.H + H4.H * X.L
306 	vpmsumd		25, 10, 16		# H3.L * X1.H + H3.H * X1.L
307 	vpmsumd		26, 7, 17
308 	vpmsumd		27, 4, 18
309 
310 	vxor		24, 24, 25
311 	vxor		24, 24, 26
312 
313 	# sum hash and reduction with H Poly
314 	vpmsumd		28, 23, 2		# reduction
315 
316 	vxor		29, 29, 29
317 
318 	vxor		24, 24, 27		# M
319 	vsldoi		26, 24, 29, 8		# mL
320 	vsldoi		29, 29, 24, 8		# mH
321 	vxor		23, 23, 26		# mL + L
322 
323 	vsldoi		23, 23, 23, 8		# swap
324 	vxor		23, 23, 28
325 
326 	vpmsumd		24, 14, 15		# H4.H * X.H
327 	vpmsumd		25, 11, 16
328 	vpmsumd		26, 8, 17
329 	vpmsumd		27, 5, 18
330 
331 	vxor		24, 24, 25
332 	vxor		24, 24, 26
333 	vxor		24, 24, 27		# H
334 
335 	vxor		24, 24, 29		# H + mH
336 
337 	# sum hash and reduction with H Poly
338 	vsldoi		27, 23, 23, 8		# swap
339 	vpmsumd		23, 23, 2
340 	vxor		27, 27, 24
341 	vxor		27, 23, 27		# 1st Xi
342 
343 	# 2nd 4x hash
344 	vpmsumd		24, 9, 20
345 	vpmsumd		25, 6, 21
346 	vpmsumd		26, 3, 22
347 	vxor		19, 19, 27		# Xi + X
348 	vpmsumd		23, 12, 19		# H4.L * X.L
349 
350 	vxor		23, 23, 24
351 	vxor		23, 23, 25
352 	vxor		23, 23, 26		# L
353 
354 	vpmsumd		24, 13, 19		# H4.L * X.H + H4.H * X.L
355 	vpmsumd		25, 10, 20		# H3.L * X1.H + H3.H * X1.L
356 	vpmsumd		26, 7, 21
357 	vpmsumd		27, 4, 22
358 
359 	vxor		24, 24, 25
360 	vxor		24, 24, 26
361 
362 	# sum hash and reduction with H Poly
363 	vpmsumd		28, 23, 2		# reduction
364 
365 	vxor		29, 29, 29
366 
367 	vxor		24, 24, 27		# M
368 	vsldoi		26, 24, 29, 8		# mL
369 	vsldoi		29, 29, 24, 8		# mH
370 	vxor		23, 23, 26		# mL + L
371 
372 	vsldoi		23, 23, 23, 8		# swap
373 	vxor		23, 23, 28
374 
375 	vpmsumd		24, 14, 19		# H4.H * X.H
376 	vpmsumd		25, 11, 20
377 	vpmsumd		26, 8, 21
378 	vpmsumd		27, 5, 22
379 
380 	vxor		24, 24, 25
381 	vxor		24, 24, 26
382 	vxor		24, 24, 27		# H
383 
384 	vxor		24, 24, 29		# H + mH
385 
386 	# sum hash and reduction with H Poly
387 	vsldoi		27, 23, 23, 8		# swap
388 	vpmsumd		23, 23, 2
389 	vxor		27, 27, 24
390 	vxor		23, 23, 27
391 
392 	xxlor		32, 23+32, 23+32		# update hash
393 
394 .endm
395 
396  #
397  # Compute update single hash
398  #
399 .macro ppc_update_hash_1x
400 	vxor		28, 28, 0
401 
402 	vxor		19, 19, 19
403 
404 	vpmsumd		22, 3, 28		# L
405 	vpmsumd		23, 4, 28		# M
406 	vpmsumd		24, 5, 28		# H
407 
408 	vpmsumd		27, 22, 2		# reduction
409 
410 	vsldoi		25, 23, 19, 8		# mL
411 	vsldoi		26, 19, 23, 8		# mH
412 	vxor		22, 22, 25		# LL + LL
413 	vxor		24, 24, 26		# HH + HH
414 
415 	vsldoi		22, 22, 22, 8		# swap
416 	vxor		22, 22, 27
417 
418 	vsldoi		20, 22, 22, 8		# swap
419 	vpmsumd		22, 22, 2		# reduction
420 	vxor		20, 20, 24
421 	vxor		22, 22, 20
422 
423 	vmr		0, 22			# update hash
424 
425 .endm
426 
427 .macro SAVE_REGS
428 	stdu 1,-640(1)
429 	mflr 0
430 
431 	std	14,112(1)
432 	std	15,120(1)
433 	std	16,128(1)
434 	std	17,136(1)
435 	std	18,144(1)
436 	std	19,152(1)
437 	std	20,160(1)
438 	std	21,168(1)
439 	li	9, 256
440 	stvx	20, 9, 1
441 	addi	9, 9, 16
442 	stvx	21, 9, 1
443 	addi	9, 9, 16
444 	stvx	22, 9, 1
445 	addi	9, 9, 16
446 	stvx	23, 9, 1
447 	addi	9, 9, 16
448 	stvx	24, 9, 1
449 	addi	9, 9, 16
450 	stvx	25, 9, 1
451 	addi	9, 9, 16
452 	stvx	26, 9, 1
453 	addi	9, 9, 16
454 	stvx	27, 9, 1
455 	addi	9, 9, 16
456 	stvx	28, 9, 1
457 	addi	9, 9, 16
458 	stvx	29, 9, 1
459 	addi	9, 9, 16
460 	stvx	30, 9, 1
461 	addi	9, 9, 16
462 	stvx	31, 9, 1
463 	stxv	14, 464(1)
464 	stxv	15, 480(1)
465 	stxv	16, 496(1)
466 	stxv	17, 512(1)
467 	stxv	18, 528(1)
468 	stxv	19, 544(1)
469 	stxv	20, 560(1)
470 	stxv	21, 576(1)
471 	stxv	22, 592(1)
472 	std	0, 656(1)
473 .endm
474 
475 .macro RESTORE_REGS
476 	lxv	14, 464(1)
477 	lxv	15, 480(1)
478 	lxv	16, 496(1)
479 	lxv	17, 512(1)
480 	lxv	18, 528(1)
481 	lxv	19, 544(1)
482 	lxv	20, 560(1)
483 	lxv	21, 576(1)
484 	lxv	22, 592(1)
485 	li	9, 256
486 	lvx	20, 9, 1
487 	addi	9, 9, 16
488 	lvx	21, 9, 1
489 	addi	9, 9, 16
490 	lvx	22, 9, 1
491 	addi	9, 9, 16
492 	lvx	23, 9, 1
493 	addi	9, 9, 16
494 	lvx	24, 9, 1
495 	addi	9, 9, 16
496 	lvx	25, 9, 1
497 	addi	9, 9, 16
498 	lvx	26, 9, 1
499 	addi	9, 9, 16
500 	lvx	27, 9, 1
501 	addi	9, 9, 16
502 	lvx	28, 9, 1
503 	addi	9, 9, 16
504 	lvx	29, 9, 1
505 	addi	9, 9, 16
506 	lvx	30, 9, 1
507 	addi	9, 9, 16
508 	lvx	31, 9, 1
509 
510 	ld	0, 656(1)
511 	ld      14,112(1)
512 	ld      15,120(1)
513 	ld      16,128(1)
514 	ld      17,136(1)
515 	ld      18,144(1)
516 	ld      19,152(1)
517 	ld      20,160(1)
518 	ld	21,168(1)
519 
520 	mtlr	0
521 	addi	1, 1, 640
522 .endm
523 
524 .macro LOAD_HASH_TABLE
525 	# Load Xi
526 	lxvb16x	32, 0, 8	# load Xi
527 
528 	# load Hash - h^4, h^3, h^2, h
529 	li	10, 32
530 	lxvd2x	2+32, 10, 8	# H Poli
531 	li	10, 48
532 	lxvd2x	3+32, 10, 8	# Hl
533 	li	10, 64
534 	lxvd2x	4+32, 10, 8	# H
535 	li	10, 80
536 	lxvd2x	5+32, 10, 8	# Hh
537 
538 	li	10, 96
539 	lxvd2x	6+32, 10, 8	# H^2l
540 	li	10, 112
541 	lxvd2x	7+32, 10, 8	# H^2
542 	li	10, 128
543 	lxvd2x	8+32, 10, 8	# H^2h
544 
545 	li	10, 144
546 	lxvd2x	9+32, 10, 8	# H^3l
547 	li	10, 160
548 	lxvd2x	10+32, 10, 8	# H^3
549 	li	10, 176
550 	lxvd2x	11+32, 10, 8	# H^3h
551 
552 	li	10, 192
553 	lxvd2x	12+32, 10, 8	# H^4l
554 	li	10, 208
555 	lxvd2x	13+32, 10, 8	# H^4
556 	li	10, 224
557 	lxvd2x	14+32, 10, 8	# H^4h
558 .endm
559 
560  #
561  # aes_p10_gcm_encrypt (const void *inp, void *out, size_t len,
562  #               const char *rk, unsigned char iv[16], void *Xip);
563  #
564  #    r3 - inp
565  #    r4 - out
566  #    r5 - len
567  #    r6 - AES round keys
568  #    r7 - iv and other data
569  #    r8 - Xi, HPoli, hash keys
570  #
571  #    rounds is at offset 240 in rk
572  #    Xi is at 0 in gcm_table (Xip).
573  #
574 _GLOBAL(aes_p10_gcm_encrypt)
575 .align 5
576 
577 	SAVE_REGS
578 
579 	LOAD_HASH_TABLE
580 
581 	# initialize ICB: GHASH( IV ), IV - r7
582 	lxvb16x	30+32, 0, 7	# load IV  - v30
583 
584 	mr	12, 5		# length
585 	li	11, 0		# block index
586 
587 	# counter 1
588 	vxor	31, 31, 31
589 	vspltisb 22, 1
590 	vsldoi	31, 31, 22,1	# counter 1
591 
592 	# load round key to VSR
593 	lxv	0, 0(6)
594 	lxv	1, 0x10(6)
595 	lxv	2, 0x20(6)
596 	lxv	3, 0x30(6)
597 	lxv	4, 0x40(6)
598 	lxv	5, 0x50(6)
599 	lxv	6, 0x60(6)
600 	lxv	7, 0x70(6)
601 	lxv	8, 0x80(6)
602 	lxv	9, 0x90(6)
603 	lxv	10, 0xa0(6)
604 
605 	# load rounds - 10 (128), 12 (192), 14 (256)
606 	lwz	9,240(6)
607 
608 	#
609 	# vxor	state, state, w # addroundkey
610 	xxlor	32+29, 0, 0
611 	vxor	15, 30, 29	# IV + round key - add round key 0
612 
613 	cmpdi	9, 10
614 	beq	Loop_aes_gcm_8x
615 
616 	# load 2 more round keys (v11, v12)
617 	lxv	11, 0xb0(6)
618 	lxv	12, 0xc0(6)
619 
620 	cmpdi	9, 12
621 	beq	Loop_aes_gcm_8x
622 
623 	# load 2 more round keys (v11, v12, v13, v14)
624 	lxv	13, 0xd0(6)
625 	lxv	14, 0xe0(6)
626 	cmpdi	9, 14
627 	beq	Loop_aes_gcm_8x
628 
629 	b	aes_gcm_out
630 
631 .align 5
632 Loop_aes_gcm_8x:
633 	mr	14, 3
634 	mr	9, 4
635 
636 	#
637 	# check partial block
638 	#
639 Continue_partial_check:
640 	ld	15, 56(7)
641 	cmpdi	15, 0
642 	beq	Continue
643 	bgt	Final_block
644 	cmpdi	15, 16
645 	blt	Final_block
646 
647 Continue:
648 	# n blcoks
649 	li	10, 128
650 	divdu	10, 12, 10	# n 128 bytes-blocks
651 	cmpdi	10, 0
652 	beq	Loop_last_block
653 
654 	vaddudm	30, 30, 31	# IV + counter
655 	vxor	16, 30, 29
656 	vaddudm	30, 30, 31
657 	vxor	17, 30, 29
658 	vaddudm	30, 30, 31
659 	vxor	18, 30, 29
660 	vaddudm	30, 30, 31
661 	vxor	19, 30, 29
662 	vaddudm	30, 30, 31
663 	vxor	20, 30, 29
664 	vaddudm	30, 30, 31
665 	vxor	21, 30, 29
666 	vaddudm	30, 30, 31
667 	vxor	22, 30, 29
668 
669 	mtctr	10
670 
671 	li	15, 16
672 	li	16, 32
673 	li	17, 48
674 	li	18, 64
675 	li	19, 80
676 	li	20, 96
677 	li	21, 112
678 
679 	lwz	10, 240(6)
680 
681 Loop_8x_block:
682 
683 	lxvb16x		15, 0, 14	# load block
684 	lxvb16x		16, 15, 14	# load block
685 	lxvb16x		17, 16, 14	# load block
686 	lxvb16x		18, 17, 14	# load block
687 	lxvb16x		19, 18, 14	# load block
688 	lxvb16x		20, 19, 14	# load block
689 	lxvb16x		21, 20, 14	# load block
690 	lxvb16x		22, 21, 14	# load block
691 	addi		14, 14, 128
692 
693 	Loop_aes_middle8x
694 
695 	xxlor	23+32, 10, 10
696 
697 	cmpdi	10, 10
698 	beq	Do_next_ghash
699 
700 	# 192 bits
701 	xxlor	24+32, 11, 11
702 
703 	vcipher	15, 15, 23
704 	vcipher	16, 16, 23
705 	vcipher	17, 17, 23
706 	vcipher	18, 18, 23
707 	vcipher	19, 19, 23
708 	vcipher	20, 20, 23
709 	vcipher	21, 21, 23
710 	vcipher	22, 22, 23
711 
712 	vcipher	15, 15, 24
713 	vcipher	16, 16, 24
714 	vcipher	17, 17, 24
715 	vcipher	18, 18, 24
716 	vcipher	19, 19, 24
717 	vcipher	20, 20, 24
718 	vcipher	21, 21, 24
719 	vcipher	22, 22, 24
720 
721 	xxlor	23+32, 12, 12
722 
723 	cmpdi	10, 12
724 	beq	Do_next_ghash
725 
726 	# 256 bits
727 	xxlor	24+32, 13, 13
728 
729 	vcipher	15, 15, 23
730 	vcipher	16, 16, 23
731 	vcipher	17, 17, 23
732 	vcipher	18, 18, 23
733 	vcipher	19, 19, 23
734 	vcipher	20, 20, 23
735 	vcipher	21, 21, 23
736 	vcipher	22, 22, 23
737 
738 	vcipher	15, 15, 24
739 	vcipher	16, 16, 24
740 	vcipher	17, 17, 24
741 	vcipher	18, 18, 24
742 	vcipher	19, 19, 24
743 	vcipher	20, 20, 24
744 	vcipher	21, 21, 24
745 	vcipher	22, 22, 24
746 
747 	xxlor	23+32, 14, 14
748 
749 	cmpdi	10, 14
750 	beq	Do_next_ghash
751 	b	aes_gcm_out
752 
753 Do_next_ghash:
754 
755 	#
756 	# last round
757 	vcipherlast     15, 15, 23
758 	vcipherlast     16, 16, 23
759 
760 	xxlxor		47, 47, 15
761 	stxvb16x        47, 0, 9	# store output
762 	xxlxor		48, 48, 16
763 	stxvb16x        48, 15, 9	# store output
764 
765 	vcipherlast     17, 17, 23
766 	vcipherlast     18, 18, 23
767 
768 	xxlxor		49, 49, 17
769 	stxvb16x        49, 16, 9	# store output
770 	xxlxor		50, 50, 18
771 	stxvb16x        50, 17, 9	# store output
772 
773 	vcipherlast     19, 19, 23
774 	vcipherlast     20, 20, 23
775 
776 	xxlxor		51, 51, 19
777 	stxvb16x        51, 18, 9	# store output
778 	xxlxor		52, 52, 20
779 	stxvb16x        52, 19, 9	# store output
780 
781 	vcipherlast     21, 21, 23
782 	vcipherlast     22, 22, 23
783 
784 	xxlxor		53, 53, 21
785 	stxvb16x        53, 20, 9	# store output
786 	xxlxor		54, 54, 22
787 	stxvb16x        54, 21, 9	# store output
788 
789 	addi		9, 9, 128
790 
791 	# ghash here
792 	ppc_aes_gcm_ghash2_4x
793 
794 	xxlor	27+32, 0, 0
795 	vaddudm 30, 30, 31		# IV + counter
796 	vmr	29, 30
797 	vxor    15, 30, 27		# add round key
798 	vaddudm 30, 30, 31
799 	vxor    16, 30, 27
800 	vaddudm 30, 30, 31
801 	vxor    17, 30, 27
802 	vaddudm 30, 30, 31
803 	vxor    18, 30, 27
804 	vaddudm 30, 30, 31
805 	vxor    19, 30, 27
806 	vaddudm 30, 30, 31
807 	vxor    20, 30, 27
808 	vaddudm 30, 30, 31
809 	vxor    21, 30, 27
810 	vaddudm 30, 30, 31
811 	vxor    22, 30, 27
812 
813 	addi    12, 12, -128
814 	addi    11, 11, 128
815 
816 	bdnz	Loop_8x_block
817 
818 	vmr	30, 29
819 	stxvb16x 30+32, 0, 7		# update IV
820 
821 Loop_last_block:
822 	cmpdi   12, 0
823 	beq     aes_gcm_out
824 
825 	# loop last few blocks
826 	li      10, 16
827 	divdu   10, 12, 10
828 
829 	mtctr   10
830 
831 	lwz	10, 240(6)
832 
833 	cmpdi   12, 16
834 	blt     Final_block
835 
836 Next_rem_block:
837 	lxvb16x 15, 0, 14		# load block
838 
839 	Loop_aes_middle_1x
840 
841 	xxlor	23+32, 10, 10
842 
843 	cmpdi	10, 10
844 	beq	Do_next_1x
845 
846 	# 192 bits
847 	xxlor	24+32, 11, 11
848 
849 	vcipher	15, 15, 23
850 	vcipher	15, 15, 24
851 
852 	xxlor	23+32, 12, 12
853 
854 	cmpdi	10, 12
855 	beq	Do_next_1x
856 
857 	# 256 bits
858 	xxlor	24+32, 13, 13
859 
860 	vcipher	15, 15, 23
861 	vcipher	15, 15, 24
862 
863 	xxlor	23+32, 14, 14
864 
865 	cmpdi	10, 14
866 	beq	Do_next_1x
867 
868 Do_next_1x:
869 	vcipherlast     15, 15, 23
870 
871 	xxlxor		47, 47, 15
872 	stxvb16x	47, 0, 9	# store output
873 	addi		14, 14, 16
874 	addi		9, 9, 16
875 
876 	vmr		28, 15
877 	ppc_update_hash_1x
878 
879 	addi		12, 12, -16
880 	addi		11, 11, 16
881 	xxlor		19+32, 0, 0
882 	vaddudm		30, 30, 31		# IV + counter
883 	vxor		15, 30, 19		# add round key
884 
885 	bdnz	Next_rem_block
886 
887 	li	15, 0
888 	std	15, 56(7)		# clear partial?
889 	stxvb16x 30+32, 0, 7		# update IV
890 	cmpdi	12, 0
891 	beq	aes_gcm_out
892 
893 Final_block:
894 	lwz	10, 240(6)
895 	Loop_aes_middle_1x
896 
897 	xxlor	23+32, 10, 10
898 
899 	cmpdi	10, 10
900 	beq	Do_final_1x
901 
902 	# 192 bits
903 	xxlor	24+32, 11, 11
904 
905 	vcipher	15, 15, 23
906 	vcipher	15, 15, 24
907 
908 	xxlor	23+32, 12, 12
909 
910 	cmpdi	10, 12
911 	beq	Do_final_1x
912 
913 	# 256 bits
914 	xxlor	24+32, 13, 13
915 
916 	vcipher	15, 15, 23
917 	vcipher	15, 15, 24
918 
919 	xxlor	23+32, 14, 14
920 
921 	cmpdi	10, 14
922 	beq	Do_final_1x
923 
924 Do_final_1x:
925 	vcipherlast     15, 15, 23
926 
927 	# check partial block
928 	li	21, 0			# encrypt
929 	ld	15, 56(7)		# partial?
930 	cmpdi	15, 0
931 	beq	Normal_block
932 	bl	Do_partial_block
933 
934 	cmpdi	12, 0
935 	ble aes_gcm_out
936 
937 	b Continue_partial_check
938 
939 Normal_block:
940 	lxvb16x	15, 0, 14		# load last block
941 	xxlxor	47, 47, 15
942 
943 	# create partial block mask
944 	li	15, 16
945 	sub	15, 15, 12		# index to the mask
946 
947 	vspltisb	16, -1		# first 16 bytes - 0xffff...ff
948 	vspltisb	17, 0		# second 16 bytes - 0x0000...00
949 	li	10, 192
950 	stvx	16, 10, 1
951 	addi	10, 10, 16
952 	stvx	17, 10, 1
953 
954 	addi	10, 1, 192
955 	lxvb16x	16, 15, 10		# load partial block mask
956 	xxland	47, 47, 16
957 
958 	vmr	28, 15
959 	ppc_update_hash_1x
960 
961 	# * should store only the remaining bytes.
962 	bl	Write_partial_block
963 
964 	stxvb16x 30+32, 0, 7		# update IV
965 	std	12, 56(7)		# update partial?
966 	li	16, 16
967 
968 	stxvb16x	32, 0, 8		# write out Xi
969 	stxvb16x	32, 16, 8		# write out Xi
970 	b aes_gcm_out
971 
972  #
973  # Compute data mask
974  #
975 .macro GEN_MASK _mask _start _end
976 	vspltisb	16, -1		# first 16 bytes - 0xffff...ff
977 	vspltisb	17, 0		# second 16 bytes - 0x0000...00
978 	li	10, 192
979 	stxvb16x	17+32, 10, 1
980 	add	10, 10, \_start
981 	stxvb16x	16+32, 10, 1
982 	add	10, 10, \_end
983 	stxvb16x	17+32, 10, 1
984 
985 	addi	10, 1, 192
986 	lxvb16x	\_mask, 0, 10		# load partial block mask
987 .endm
988 
989  #
990  # Handle multiple partial blocks for encrypt and decrypt
991  #   operations.
992  #
993 SYM_FUNC_START_LOCAL(Do_partial_block)
994 	add	17, 15, 5
995 	cmpdi	17, 16
996 	bgt	Big_block
997 	GEN_MASK 18, 15, 5
998 	b	_Partial
999 SYM_FUNC_END(Do_partial_block)
1000 Big_block:
1001 	li	16, 16
1002 	GEN_MASK 18, 15, 16
1003 
1004 _Partial:
1005 	lxvb16x	17+32, 0, 14		# load last block
1006 	sldi	16, 15, 3
1007 	mtvsrdd	32+16, 0, 16
1008 	vsro	17, 17, 16
1009 	xxlxor	47, 47, 17+32
1010 	xxland	47, 47, 18
1011 
1012 	vxor	0, 0, 0			# clear Xi
1013 	vmr	28, 15
1014 
1015 	cmpdi	21, 0			# encrypt/decrypt ops?
1016 	beq	Skip_decrypt
1017 	xxland	32+28, 32+17, 18
1018 
1019 Skip_decrypt:
1020 
1021 	ppc_update_hash_1x
1022 
1023 	li	16, 16
1024 	lxvb16x 32+29, 16, 8
1025 	vxor	0, 0, 29
1026 	stxvb16x 32, 0, 8		# save Xi
1027 	stxvb16x 32, 16, 8		# save Xi
1028 
1029 	# store partial block
1030 	# loop the rest of the stream if any
1031 	sldi	16, 15, 3
1032 	mtvsrdd	32+16, 0, 16
1033 	vslo	15, 15, 16
1034 	#stxvb16x 15+32, 0, 9		# last block
1035 
1036 	li	16, 16
1037 	sub	17, 16, 15		# 16 - partial
1038 
1039 	add	16, 15, 5
1040 	cmpdi	16, 16
1041 	bgt	Larger_16
1042 	mr	17, 5
1043 Larger_16:
1044 
1045 	# write partial
1046 	li		10, 192
1047 	stxvb16x	15+32, 10, 1	# save current block
1048 
1049 	addi		10, 9, -1
1050 	addi		16, 1, 191
1051 	mtctr		17		# move partial byte count
1052 
1053 Write_last_partial:
1054         lbzu		18, 1(16)
1055 	stbu		18, 1(10)
1056         bdnz		Write_last_partial
1057 	# Complete loop partial
1058 
1059 	add	14, 14, 17
1060 	add	9, 9, 17
1061 	sub	12, 12, 17
1062 	add	11, 11, 17
1063 
1064 	add	15, 15, 5
1065 	cmpdi	15, 16
1066 	blt	Save_partial
1067 
1068 	vaddudm	30, 30, 31
1069 	stxvb16x 30+32, 0, 7		# update IV
1070 	xxlor	32+29, 0, 0
1071 	vxor	15, 30, 29		# IV + round key - add round key 0
1072 	li	15, 0
1073 	std	15, 56(7)		# partial done - clear
1074 	b	Partial_done
1075 Save_partial:
1076 	std	15, 56(7)		# partial
1077 
1078 Partial_done:
1079 	blr
1080 
1081  #
1082  # Write partial block
1083  # r9 - output
1084  # r12 - remaining bytes
1085  # v15 - partial input data
1086  #
1087 SYM_FUNC_START_LOCAL(Write_partial_block)
1088 	li		10, 192
1089 	stxvb16x	15+32, 10, 1		# last block
1090 
1091 	addi		10, 9, -1
1092 	addi		16, 1, 191
1093 
1094         mtctr		12			# remaining bytes
1095 	li		15, 0
1096 
1097 Write_last_byte:
1098         lbzu		14, 1(16)
1099 	stbu		14, 1(10)
1100         bdnz		Write_last_byte
1101 	blr
1102 SYM_FUNC_END(Write_partial_block)
1103 
1104 aes_gcm_out:
1105 	# out = state
1106 	stxvb16x	32, 0, 8		# write out Xi
1107 	add	3, 11, 12		# return count
1108 
1109 	RESTORE_REGS
1110 	blr
1111 
1112  #
1113  # 8x Decrypt
1114  #
1115 _GLOBAL(aes_p10_gcm_decrypt)
1116 .align 5
1117 
1118 	SAVE_REGS
1119 
1120 	LOAD_HASH_TABLE
1121 
1122 	# initialize ICB: GHASH( IV ), IV - r7
1123 	lxvb16x	30+32, 0, 7	# load IV  - v30
1124 
1125 	mr	12, 5		# length
1126 	li	11, 0		# block index
1127 
1128 	# counter 1
1129 	vxor	31, 31, 31
1130 	vspltisb 22, 1
1131 	vsldoi	31, 31, 22,1	# counter 1
1132 
1133 	# load round key to VSR
1134 	lxv	0, 0(6)
1135 	lxv	1, 0x10(6)
1136 	lxv	2, 0x20(6)
1137 	lxv	3, 0x30(6)
1138 	lxv	4, 0x40(6)
1139 	lxv	5, 0x50(6)
1140 	lxv	6, 0x60(6)
1141 	lxv	7, 0x70(6)
1142 	lxv	8, 0x80(6)
1143 	lxv	9, 0x90(6)
1144 	lxv	10, 0xa0(6)
1145 
1146 	# load rounds - 10 (128), 12 (192), 14 (256)
1147 	lwz	9,240(6)
1148 
1149 	#
1150 	# vxor	state, state, w # addroundkey
1151 	xxlor	32+29, 0, 0
1152 	vxor	15, 30, 29	# IV + round key - add round key 0
1153 
1154 	cmpdi	9, 10
1155 	beq	Loop_aes_gcm_8x_dec
1156 
1157 	# load 2 more round keys (v11, v12)
1158 	lxv	11, 0xb0(6)
1159 	lxv	12, 0xc0(6)
1160 
1161 	cmpdi	9, 12
1162 	beq	Loop_aes_gcm_8x_dec
1163 
1164 	# load 2 more round keys (v11, v12, v13, v14)
1165 	lxv	13, 0xd0(6)
1166 	lxv	14, 0xe0(6)
1167 	cmpdi	9, 14
1168 	beq	Loop_aes_gcm_8x_dec
1169 
1170 	b	aes_gcm_out
1171 
1172 .align 5
1173 Loop_aes_gcm_8x_dec:
1174 	mr	14, 3
1175 	mr	9, 4
1176 
1177 	#
1178 	# check partial block
1179 	#
1180 Continue_partial_check_dec:
1181 	ld	15, 56(7)
1182 	cmpdi	15, 0
1183 	beq	Continue_dec
1184 	bgt	Final_block_dec
1185 	cmpdi	15, 16
1186 	blt	Final_block_dec
1187 
1188 Continue_dec:
1189 	# n blcoks
1190 	li	10, 128
1191 	divdu	10, 12, 10	# n 128 bytes-blocks
1192 	cmpdi	10, 0
1193 	beq	Loop_last_block_dec
1194 
1195 	vaddudm	30, 30, 31	# IV + counter
1196 	vxor	16, 30, 29
1197 	vaddudm	30, 30, 31
1198 	vxor	17, 30, 29
1199 	vaddudm	30, 30, 31
1200 	vxor	18, 30, 29
1201 	vaddudm	30, 30, 31
1202 	vxor	19, 30, 29
1203 	vaddudm	30, 30, 31
1204 	vxor	20, 30, 29
1205 	vaddudm	30, 30, 31
1206 	vxor	21, 30, 29
1207 	vaddudm	30, 30, 31
1208 	vxor	22, 30, 29
1209 
1210 	mtctr	10
1211 
1212 	li	15, 16
1213 	li	16, 32
1214 	li	17, 48
1215 	li	18, 64
1216 	li	19, 80
1217 	li	20, 96
1218 	li	21, 112
1219 
1220 	lwz	10, 240(6)
1221 
1222 Loop_8x_block_dec:
1223 
1224 	lxvb16x		15, 0, 14	# load block
1225 	lxvb16x		16, 15, 14	# load block
1226 	lxvb16x		17, 16, 14	# load block
1227 	lxvb16x		18, 17, 14	# load block
1228 	lxvb16x		19, 18, 14	# load block
1229 	lxvb16x		20, 19, 14	# load block
1230 	lxvb16x		21, 20, 14	# load block
1231 	lxvb16x		22, 21, 14	# load block
1232 	addi		14, 14, 128
1233 
1234 	Loop_aes_middle8x
1235 
1236 	xxlor	23+32, 10, 10
1237 
1238 	cmpdi	10, 10
1239 	beq	Do_next_ghash_dec
1240 
1241 	# 192 bits
1242 	xxlor	24+32, 11, 11
1243 
1244 	vcipher	15, 15, 23
1245 	vcipher	16, 16, 23
1246 	vcipher	17, 17, 23
1247 	vcipher	18, 18, 23
1248 	vcipher	19, 19, 23
1249 	vcipher	20, 20, 23
1250 	vcipher	21, 21, 23
1251 	vcipher	22, 22, 23
1252 
1253 	vcipher	15, 15, 24
1254 	vcipher	16, 16, 24
1255 	vcipher	17, 17, 24
1256 	vcipher	18, 18, 24
1257 	vcipher	19, 19, 24
1258 	vcipher	20, 20, 24
1259 	vcipher	21, 21, 24
1260 	vcipher	22, 22, 24
1261 
1262 	xxlor	23+32, 12, 12
1263 
1264 	cmpdi	10, 12
1265 	beq	Do_next_ghash_dec
1266 
1267 	# 256 bits
1268 	xxlor	24+32, 13, 13
1269 
1270 	vcipher	15, 15, 23
1271 	vcipher	16, 16, 23
1272 	vcipher	17, 17, 23
1273 	vcipher	18, 18, 23
1274 	vcipher	19, 19, 23
1275 	vcipher	20, 20, 23
1276 	vcipher	21, 21, 23
1277 	vcipher	22, 22, 23
1278 
1279 	vcipher	15, 15, 24
1280 	vcipher	16, 16, 24
1281 	vcipher	17, 17, 24
1282 	vcipher	18, 18, 24
1283 	vcipher	19, 19, 24
1284 	vcipher	20, 20, 24
1285 	vcipher	21, 21, 24
1286 	vcipher	22, 22, 24
1287 
1288 	xxlor	23+32, 14, 14
1289 
1290 	cmpdi	10, 14
1291 	beq	Do_next_ghash_dec
1292 	b	aes_gcm_out
1293 
1294 Do_next_ghash_dec:
1295 
1296 	#
1297 	# last round
1298 	vcipherlast     15, 15, 23
1299 	vcipherlast     16, 16, 23
1300 
1301 	xxlxor		47, 47, 15
1302 	stxvb16x        47, 0, 9	# store output
1303 	xxlxor		48, 48, 16
1304 	stxvb16x        48, 15, 9	# store output
1305 
1306 	vcipherlast     17, 17, 23
1307 	vcipherlast     18, 18, 23
1308 
1309 	xxlxor		49, 49, 17
1310 	stxvb16x        49, 16, 9	# store output
1311 	xxlxor		50, 50, 18
1312 	stxvb16x        50, 17, 9	# store output
1313 
1314 	vcipherlast     19, 19, 23
1315 	vcipherlast     20, 20, 23
1316 
1317 	xxlxor		51, 51, 19
1318 	stxvb16x        51, 18, 9	# store output
1319 	xxlxor		52, 52, 20
1320 	stxvb16x        52, 19, 9	# store output
1321 
1322 	vcipherlast     21, 21, 23
1323 	vcipherlast     22, 22, 23
1324 
1325 	xxlxor		53, 53, 21
1326 	stxvb16x        53, 20, 9	# store output
1327 	xxlxor		54, 54, 22
1328 	stxvb16x        54, 21, 9	# store output
1329 
1330 	addi		9, 9, 128
1331 
1332 	xxlor           15+32, 15, 15
1333 	xxlor           16+32, 16, 16
1334 	xxlor           17+32, 17, 17
1335 	xxlor           18+32, 18, 18
1336 	xxlor           19+32, 19, 19
1337 	xxlor           20+32, 20, 20
1338 	xxlor           21+32, 21, 21
1339 	xxlor           22+32, 22, 22
1340 
1341 	# ghash here
1342 	ppc_aes_gcm_ghash2_4x
1343 
1344 	xxlor	27+32, 0, 0
1345 	vaddudm 30, 30, 31		# IV + counter
1346 	vmr	29, 30
1347 	vxor    15, 30, 27		# add round key
1348 	vaddudm 30, 30, 31
1349 	vxor    16, 30, 27
1350 	vaddudm 30, 30, 31
1351 	vxor    17, 30, 27
1352 	vaddudm 30, 30, 31
1353 	vxor    18, 30, 27
1354 	vaddudm 30, 30, 31
1355 	vxor    19, 30, 27
1356 	vaddudm 30, 30, 31
1357 	vxor    20, 30, 27
1358 	vaddudm 30, 30, 31
1359 	vxor    21, 30, 27
1360 	vaddudm 30, 30, 31
1361 	vxor    22, 30, 27
1362 
1363 	addi    12, 12, -128
1364 	addi    11, 11, 128
1365 
1366 	bdnz	Loop_8x_block_dec
1367 
1368 	vmr	30, 29
1369 	stxvb16x 30+32, 0, 7		# update IV
1370 
1371 Loop_last_block_dec:
1372 	cmpdi   12, 0
1373 	beq     aes_gcm_out
1374 
1375 	# loop last few blocks
1376 	li      10, 16
1377 	divdu   10, 12, 10
1378 
1379 	mtctr   10
1380 
1381 	lwz	10, 240(6)
1382 
1383 	cmpdi   12, 16
1384 	blt     Final_block_dec
1385 
1386 Next_rem_block_dec:
1387 	lxvb16x 15, 0, 14		# load block
1388 
1389 	Loop_aes_middle_1x
1390 
1391 	xxlor	23+32, 10, 10
1392 
1393 	cmpdi	10, 10
1394 	beq	Do_next_1x_dec
1395 
1396 	# 192 bits
1397 	xxlor	24+32, 11, 11
1398 
1399 	vcipher	15, 15, 23
1400 	vcipher	15, 15, 24
1401 
1402 	xxlor	23+32, 12, 12
1403 
1404 	cmpdi	10, 12
1405 	beq	Do_next_1x_dec
1406 
1407 	# 256 bits
1408 	xxlor	24+32, 13, 13
1409 
1410 	vcipher	15, 15, 23
1411 	vcipher	15, 15, 24
1412 
1413 	xxlor	23+32, 14, 14
1414 
1415 	cmpdi	10, 14
1416 	beq	Do_next_1x_dec
1417 
1418 Do_next_1x_dec:
1419 	vcipherlast     15, 15, 23
1420 
1421 	xxlxor		47, 47, 15
1422 	stxvb16x	47, 0, 9	# store output
1423 	addi		14, 14, 16
1424 	addi		9, 9, 16
1425 
1426 	xxlor           28+32, 15, 15
1427 	#vmr		28, 15
1428 	ppc_update_hash_1x
1429 
1430 	addi		12, 12, -16
1431 	addi		11, 11, 16
1432 	xxlor		19+32, 0, 0
1433 	vaddudm		30, 30, 31		# IV + counter
1434 	vxor		15, 30, 19		# add round key
1435 
1436 	bdnz	Next_rem_block_dec
1437 
1438 	li	15, 0
1439 	std	15, 56(7)		# clear partial?
1440 	stxvb16x 30+32, 0, 7		# update IV
1441 	cmpdi	12, 0
1442 	beq	aes_gcm_out
1443 
1444 Final_block_dec:
1445 	lwz	10, 240(6)
1446 	Loop_aes_middle_1x
1447 
1448 	xxlor	23+32, 10, 10
1449 
1450 	cmpdi	10, 10
1451 	beq	Do_final_1x_dec
1452 
1453 	# 192 bits
1454 	xxlor	24+32, 11, 11
1455 
1456 	vcipher	15, 15, 23
1457 	vcipher	15, 15, 24
1458 
1459 	xxlor	23+32, 12, 12
1460 
1461 	cmpdi	10, 12
1462 	beq	Do_final_1x_dec
1463 
1464 	# 256 bits
1465 	xxlor	24+32, 13, 13
1466 
1467 	vcipher	15, 15, 23
1468 	vcipher	15, 15, 24
1469 
1470 	xxlor	23+32, 14, 14
1471 
1472 	cmpdi	10, 14
1473 	beq	Do_final_1x_dec
1474 
1475 Do_final_1x_dec:
1476 	vcipherlast     15, 15, 23
1477 
1478 	# check partial block
1479 	li	21, 1			# decrypt
1480 	ld	15, 56(7)		# partial?
1481 	cmpdi	15, 0
1482 	beq	Normal_block_dec
1483 	bl	Do_partial_block
1484 	cmpdi	12, 0
1485 	ble aes_gcm_out
1486 
1487 	b Continue_partial_check_dec
1488 
1489 Normal_block_dec:
1490 	lxvb16x	15, 0, 14		# load last block
1491 	xxlxor	47, 47, 15
1492 
1493 	# create partial block mask
1494 	li	15, 16
1495 	sub	15, 15, 12		# index to the mask
1496 
1497 	vspltisb	16, -1		# first 16 bytes - 0xffff...ff
1498 	vspltisb	17, 0		# second 16 bytes - 0x0000...00
1499 	li	10, 192
1500 	stvx	16, 10, 1
1501 	addi	10, 10, 16
1502 	stvx	17, 10, 1
1503 
1504 	addi	10, 1, 192
1505 	lxvb16x	16, 15, 10		# load partial block mask
1506 	xxland	47, 47, 16
1507 
1508 	xxland	32+28, 15, 16
1509 	#vmr	28, 15
1510 	ppc_update_hash_1x
1511 
1512 	# * should store only the remaining bytes.
1513 	bl	Write_partial_block
1514 
1515 	stxvb16x 30+32, 0, 7		# update IV
1516 	std	12, 56(7)		# update partial?
1517 	li	16, 16
1518 
1519 	stxvb16x	32, 0, 8		# write out Xi
1520 	stxvb16x	32, 16, 8		# write out Xi
1521 	b aes_gcm_out
1522