162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-or-later */
262306a36Sopenharmony_ci#
362306a36Sopenharmony_ci# Accelerated chacha20 implementation for ppc64le.
462306a36Sopenharmony_ci#
562306a36Sopenharmony_ci# Copyright 2023- IBM Corp. All rights reserved
662306a36Sopenharmony_ci#
762306a36Sopenharmony_ci#===================================================================================
862306a36Sopenharmony_ci# Written by Danny Tsen <dtsen@us.ibm.com>
962306a36Sopenharmony_ci#
1062306a36Sopenharmony_ci# chacha_p10le_8x(u32 *state, byte *dst, const byte *src,
1162306a36Sopenharmony_ci#				 size_t len, int nrounds);
1262306a36Sopenharmony_ci#
1362306a36Sopenharmony_ci# do rounds,  8 quarter rounds
1462306a36Sopenharmony_ci# 1.  a += b; d ^= a; d <<<= 16;
1562306a36Sopenharmony_ci# 2.  c += d; b ^= c; b <<<= 12;
1662306a36Sopenharmony_ci# 3.  a += b; d ^= a; d <<<= 8;
1762306a36Sopenharmony_ci# 4.  c += d; b ^= c; b <<<= 7
1862306a36Sopenharmony_ci#
1962306a36Sopenharmony_ci# row1 = (row1 + row2),  row4 = row1 xor row4,  row4 rotate each word by 16
2062306a36Sopenharmony_ci# row3 = (row3 + row4),  row2 = row3 xor row2,  row2 rotate each word by 12
2162306a36Sopenharmony_ci# row1 = (row1 + row2), row4 = row1 xor row4,  row4 rotate each word by 8
2262306a36Sopenharmony_ci# row3 = (row3 + row4), row2 = row3 xor row2,  row2 rotate each word by 7
2362306a36Sopenharmony_ci#
2462306a36Sopenharmony_ci# 4 blocks (a b c d)
2562306a36Sopenharmony_ci#
2662306a36Sopenharmony_ci# a0 b0 c0 d0
2762306a36Sopenharmony_ci# a1 b1 c1 d1
2862306a36Sopenharmony_ci# ...
2962306a36Sopenharmony_ci# a4 b4 c4 d4
3062306a36Sopenharmony_ci# ...
3162306a36Sopenharmony_ci# a8 b8 c8 d8
3262306a36Sopenharmony_ci# ...
3362306a36Sopenharmony_ci# a12 b12 c12 d12
3462306a36Sopenharmony_ci# a13 ...
3562306a36Sopenharmony_ci# a14 ...
3662306a36Sopenharmony_ci# a15 b15 c15 d15
3762306a36Sopenharmony_ci#
3862306a36Sopenharmony_ci# Column round (v0, v4,  v8, v12, v1, v5,  v9, v13, v2, v6, v10, v14, v3, v7, v11, v15)
3962306a36Sopenharmony_ci# Diagnal round (v0, v5, v10, v15, v1, v6, v11, v12, v2, v7,  v8, v13, v3, v4,  v9, v14)
4062306a36Sopenharmony_ci#
4162306a36Sopenharmony_ci
4262306a36Sopenharmony_ci#include <asm/ppc_asm.h>
4362306a36Sopenharmony_ci#include <asm/asm-offsets.h>
4462306a36Sopenharmony_ci#include <asm/asm-compat.h>
4562306a36Sopenharmony_ci#include <linux/linkage.h>
4662306a36Sopenharmony_ci
4762306a36Sopenharmony_ci.machine	"any"
4862306a36Sopenharmony_ci.text
4962306a36Sopenharmony_ci
5062306a36Sopenharmony_ci.macro	SAVE_GPR GPR OFFSET FRAME
5162306a36Sopenharmony_ci	std	\GPR,\OFFSET(\FRAME)
5262306a36Sopenharmony_ci.endm
5362306a36Sopenharmony_ci
5462306a36Sopenharmony_ci.macro	SAVE_VRS VRS OFFSET FRAME
5562306a36Sopenharmony_ci	li	16, \OFFSET
5662306a36Sopenharmony_ci	stvx	\VRS, 16, \FRAME
5762306a36Sopenharmony_ci.endm
5862306a36Sopenharmony_ci
5962306a36Sopenharmony_ci.macro	SAVE_VSX VSX OFFSET FRAME
6062306a36Sopenharmony_ci	li	16, \OFFSET
6162306a36Sopenharmony_ci	stxvx	\VSX, 16, \FRAME
6262306a36Sopenharmony_ci.endm
6362306a36Sopenharmony_ci
6462306a36Sopenharmony_ci.macro	RESTORE_GPR GPR OFFSET FRAME
6562306a36Sopenharmony_ci	ld	\GPR,\OFFSET(\FRAME)
6662306a36Sopenharmony_ci.endm
6762306a36Sopenharmony_ci
6862306a36Sopenharmony_ci.macro	RESTORE_VRS VRS OFFSET FRAME
6962306a36Sopenharmony_ci	li	16, \OFFSET
7062306a36Sopenharmony_ci	lvx	\VRS, 16, \FRAME
7162306a36Sopenharmony_ci.endm
7262306a36Sopenharmony_ci
7362306a36Sopenharmony_ci.macro	RESTORE_VSX VSX OFFSET FRAME
7462306a36Sopenharmony_ci	li	16, \OFFSET
7562306a36Sopenharmony_ci	lxvx	\VSX, 16, \FRAME
7662306a36Sopenharmony_ci.endm
7762306a36Sopenharmony_ci
7862306a36Sopenharmony_ci.macro SAVE_REGS
7962306a36Sopenharmony_ci	mflr 0
8062306a36Sopenharmony_ci	std 0, 16(1)
8162306a36Sopenharmony_ci	stdu 1,-752(1)
8262306a36Sopenharmony_ci
8362306a36Sopenharmony_ci	SAVE_GPR 14, 112, 1
8462306a36Sopenharmony_ci	SAVE_GPR 15, 120, 1
8562306a36Sopenharmony_ci	SAVE_GPR 16, 128, 1
8662306a36Sopenharmony_ci	SAVE_GPR 17, 136, 1
8762306a36Sopenharmony_ci	SAVE_GPR 18, 144, 1
8862306a36Sopenharmony_ci	SAVE_GPR 19, 152, 1
8962306a36Sopenharmony_ci	SAVE_GPR 20, 160, 1
9062306a36Sopenharmony_ci	SAVE_GPR 21, 168, 1
9162306a36Sopenharmony_ci	SAVE_GPR 22, 176, 1
9262306a36Sopenharmony_ci	SAVE_GPR 23, 184, 1
9362306a36Sopenharmony_ci	SAVE_GPR 24, 192, 1
9462306a36Sopenharmony_ci	SAVE_GPR 25, 200, 1
9562306a36Sopenharmony_ci	SAVE_GPR 26, 208, 1
9662306a36Sopenharmony_ci	SAVE_GPR 27, 216, 1
9762306a36Sopenharmony_ci	SAVE_GPR 28, 224, 1
9862306a36Sopenharmony_ci	SAVE_GPR 29, 232, 1
9962306a36Sopenharmony_ci	SAVE_GPR 30, 240, 1
10062306a36Sopenharmony_ci	SAVE_GPR 31, 248, 1
10162306a36Sopenharmony_ci
10262306a36Sopenharmony_ci	addi	9, 1, 256
10362306a36Sopenharmony_ci	SAVE_VRS 20, 0, 9
10462306a36Sopenharmony_ci	SAVE_VRS 21, 16, 9
10562306a36Sopenharmony_ci	SAVE_VRS 22, 32, 9
10662306a36Sopenharmony_ci	SAVE_VRS 23, 48, 9
10762306a36Sopenharmony_ci	SAVE_VRS 24, 64, 9
10862306a36Sopenharmony_ci	SAVE_VRS 25, 80, 9
10962306a36Sopenharmony_ci	SAVE_VRS 26, 96, 9
11062306a36Sopenharmony_ci	SAVE_VRS 27, 112, 9
11162306a36Sopenharmony_ci	SAVE_VRS 28, 128, 9
11262306a36Sopenharmony_ci	SAVE_VRS 29, 144, 9
11362306a36Sopenharmony_ci	SAVE_VRS 30, 160, 9
11462306a36Sopenharmony_ci	SAVE_VRS 31, 176, 9
11562306a36Sopenharmony_ci
11662306a36Sopenharmony_ci	SAVE_VSX 14, 192, 9
11762306a36Sopenharmony_ci	SAVE_VSX 15, 208, 9
11862306a36Sopenharmony_ci	SAVE_VSX 16, 224, 9
11962306a36Sopenharmony_ci	SAVE_VSX 17, 240, 9
12062306a36Sopenharmony_ci	SAVE_VSX 18, 256, 9
12162306a36Sopenharmony_ci	SAVE_VSX 19, 272, 9
12262306a36Sopenharmony_ci	SAVE_VSX 20, 288, 9
12362306a36Sopenharmony_ci	SAVE_VSX 21, 304, 9
12462306a36Sopenharmony_ci	SAVE_VSX 22, 320, 9
12562306a36Sopenharmony_ci	SAVE_VSX 23, 336, 9
12662306a36Sopenharmony_ci	SAVE_VSX 24, 352, 9
12762306a36Sopenharmony_ci	SAVE_VSX 25, 368, 9
12862306a36Sopenharmony_ci	SAVE_VSX 26, 384, 9
12962306a36Sopenharmony_ci	SAVE_VSX 27, 400, 9
13062306a36Sopenharmony_ci	SAVE_VSX 28, 416, 9
13162306a36Sopenharmony_ci	SAVE_VSX 29, 432, 9
13262306a36Sopenharmony_ci	SAVE_VSX 30, 448, 9
13362306a36Sopenharmony_ci	SAVE_VSX 31, 464, 9
13462306a36Sopenharmony_ci.endm # SAVE_REGS
13562306a36Sopenharmony_ci
13662306a36Sopenharmony_ci.macro RESTORE_REGS
13762306a36Sopenharmony_ci	addi	9, 1, 256
13862306a36Sopenharmony_ci	RESTORE_VRS 20, 0, 9
13962306a36Sopenharmony_ci	RESTORE_VRS 21, 16, 9
14062306a36Sopenharmony_ci	RESTORE_VRS 22, 32, 9
14162306a36Sopenharmony_ci	RESTORE_VRS 23, 48, 9
14262306a36Sopenharmony_ci	RESTORE_VRS 24, 64, 9
14362306a36Sopenharmony_ci	RESTORE_VRS 25, 80, 9
14462306a36Sopenharmony_ci	RESTORE_VRS 26, 96, 9
14562306a36Sopenharmony_ci	RESTORE_VRS 27, 112, 9
14662306a36Sopenharmony_ci	RESTORE_VRS 28, 128, 9
14762306a36Sopenharmony_ci	RESTORE_VRS 29, 144, 9
14862306a36Sopenharmony_ci	RESTORE_VRS 30, 160, 9
14962306a36Sopenharmony_ci	RESTORE_VRS 31, 176, 9
15062306a36Sopenharmony_ci
15162306a36Sopenharmony_ci	RESTORE_VSX 14, 192, 9
15262306a36Sopenharmony_ci	RESTORE_VSX 15, 208, 9
15362306a36Sopenharmony_ci	RESTORE_VSX 16, 224, 9
15462306a36Sopenharmony_ci	RESTORE_VSX 17, 240, 9
15562306a36Sopenharmony_ci	RESTORE_VSX 18, 256, 9
15662306a36Sopenharmony_ci	RESTORE_VSX 19, 272, 9
15762306a36Sopenharmony_ci	RESTORE_VSX 20, 288, 9
15862306a36Sopenharmony_ci	RESTORE_VSX 21, 304, 9
15962306a36Sopenharmony_ci	RESTORE_VSX 22, 320, 9
16062306a36Sopenharmony_ci	RESTORE_VSX 23, 336, 9
16162306a36Sopenharmony_ci	RESTORE_VSX 24, 352, 9
16262306a36Sopenharmony_ci	RESTORE_VSX 25, 368, 9
16362306a36Sopenharmony_ci	RESTORE_VSX 26, 384, 9
16462306a36Sopenharmony_ci	RESTORE_VSX 27, 400, 9
16562306a36Sopenharmony_ci	RESTORE_VSX 28, 416, 9
16662306a36Sopenharmony_ci	RESTORE_VSX 29, 432, 9
16762306a36Sopenharmony_ci	RESTORE_VSX 30, 448, 9
16862306a36Sopenharmony_ci	RESTORE_VSX 31, 464, 9
16962306a36Sopenharmony_ci
17062306a36Sopenharmony_ci	RESTORE_GPR 14, 112, 1
17162306a36Sopenharmony_ci	RESTORE_GPR 15, 120, 1
17262306a36Sopenharmony_ci	RESTORE_GPR 16, 128, 1
17362306a36Sopenharmony_ci	RESTORE_GPR 17, 136, 1
17462306a36Sopenharmony_ci	RESTORE_GPR 18, 144, 1
17562306a36Sopenharmony_ci	RESTORE_GPR 19, 152, 1
17662306a36Sopenharmony_ci	RESTORE_GPR 20, 160, 1
17762306a36Sopenharmony_ci	RESTORE_GPR 21, 168, 1
17862306a36Sopenharmony_ci	RESTORE_GPR 22, 176, 1
17962306a36Sopenharmony_ci	RESTORE_GPR 23, 184, 1
18062306a36Sopenharmony_ci	RESTORE_GPR 24, 192, 1
18162306a36Sopenharmony_ci	RESTORE_GPR 25, 200, 1
18262306a36Sopenharmony_ci	RESTORE_GPR 26, 208, 1
18362306a36Sopenharmony_ci	RESTORE_GPR 27, 216, 1
18462306a36Sopenharmony_ci	RESTORE_GPR 28, 224, 1
18562306a36Sopenharmony_ci	RESTORE_GPR 29, 232, 1
18662306a36Sopenharmony_ci	RESTORE_GPR 30, 240, 1
18762306a36Sopenharmony_ci	RESTORE_GPR 31, 248, 1
18862306a36Sopenharmony_ci
18962306a36Sopenharmony_ci	addi    1, 1, 752
19062306a36Sopenharmony_ci	ld 0, 16(1)
19162306a36Sopenharmony_ci	mtlr 0
19262306a36Sopenharmony_ci.endm # RESTORE_REGS
19362306a36Sopenharmony_ci
19462306a36Sopenharmony_ci.macro QT_loop_8x
19562306a36Sopenharmony_ci	# QR(v0, v4,  v8, v12, v1, v5,  v9, v13, v2, v6, v10, v14, v3, v7, v11, v15)
19662306a36Sopenharmony_ci	xxlor	0, 32+25, 32+25
19762306a36Sopenharmony_ci	xxlor	32+25, 20, 20
19862306a36Sopenharmony_ci	vadduwm 0, 0, 4
19962306a36Sopenharmony_ci	vadduwm 1, 1, 5
20062306a36Sopenharmony_ci	vadduwm 2, 2, 6
20162306a36Sopenharmony_ci	vadduwm 3, 3, 7
20262306a36Sopenharmony_ci	  vadduwm 16, 16, 20
20362306a36Sopenharmony_ci	  vadduwm 17, 17, 21
20462306a36Sopenharmony_ci	  vadduwm 18, 18, 22
20562306a36Sopenharmony_ci	  vadduwm 19, 19, 23
20662306a36Sopenharmony_ci
20762306a36Sopenharmony_ci	  vpermxor 12, 12, 0, 25
20862306a36Sopenharmony_ci	  vpermxor 13, 13, 1, 25
20962306a36Sopenharmony_ci	  vpermxor 14, 14, 2, 25
21062306a36Sopenharmony_ci	  vpermxor 15, 15, 3, 25
21162306a36Sopenharmony_ci	  vpermxor 28, 28, 16, 25
21262306a36Sopenharmony_ci	  vpermxor 29, 29, 17, 25
21362306a36Sopenharmony_ci	  vpermxor 30, 30, 18, 25
21462306a36Sopenharmony_ci	  vpermxor 31, 31, 19, 25
21562306a36Sopenharmony_ci	xxlor	32+25, 0, 0
21662306a36Sopenharmony_ci	vadduwm 8, 8, 12
21762306a36Sopenharmony_ci	vadduwm 9, 9, 13
21862306a36Sopenharmony_ci	vadduwm 10, 10, 14
21962306a36Sopenharmony_ci	vadduwm 11, 11, 15
22062306a36Sopenharmony_ci	  vadduwm 24, 24, 28
22162306a36Sopenharmony_ci	  vadduwm 25, 25, 29
22262306a36Sopenharmony_ci	  vadduwm 26, 26, 30
22362306a36Sopenharmony_ci	  vadduwm 27, 27, 31
22462306a36Sopenharmony_ci	vxor 4, 4, 8
22562306a36Sopenharmony_ci	vxor 5, 5, 9
22662306a36Sopenharmony_ci	vxor 6, 6, 10
22762306a36Sopenharmony_ci	vxor 7, 7, 11
22862306a36Sopenharmony_ci	  vxor 20, 20, 24
22962306a36Sopenharmony_ci	  vxor 21, 21, 25
23062306a36Sopenharmony_ci	  vxor 22, 22, 26
23162306a36Sopenharmony_ci	  vxor 23, 23, 27
23262306a36Sopenharmony_ci
23362306a36Sopenharmony_ci	xxlor	0, 32+25, 32+25
23462306a36Sopenharmony_ci	xxlor	32+25, 21, 21
23562306a36Sopenharmony_ci	vrlw 4, 4, 25  #
23662306a36Sopenharmony_ci	vrlw 5, 5, 25
23762306a36Sopenharmony_ci	vrlw 6, 6, 25
23862306a36Sopenharmony_ci	vrlw 7, 7, 25
23962306a36Sopenharmony_ci	  vrlw 20, 20, 25  #
24062306a36Sopenharmony_ci	  vrlw 21, 21, 25
24162306a36Sopenharmony_ci	  vrlw 22, 22, 25
24262306a36Sopenharmony_ci	  vrlw 23, 23, 25
24362306a36Sopenharmony_ci	xxlor	32+25, 0, 0
24462306a36Sopenharmony_ci	vadduwm 0, 0, 4
24562306a36Sopenharmony_ci	vadduwm 1, 1, 5
24662306a36Sopenharmony_ci	vadduwm 2, 2, 6
24762306a36Sopenharmony_ci	vadduwm 3, 3, 7
24862306a36Sopenharmony_ci	  vadduwm 16, 16, 20
24962306a36Sopenharmony_ci	  vadduwm 17, 17, 21
25062306a36Sopenharmony_ci	  vadduwm 18, 18, 22
25162306a36Sopenharmony_ci	  vadduwm 19, 19, 23
25262306a36Sopenharmony_ci
25362306a36Sopenharmony_ci	xxlor	0, 32+25, 32+25
25462306a36Sopenharmony_ci	xxlor	32+25, 22, 22
25562306a36Sopenharmony_ci	  vpermxor 12, 12, 0, 25
25662306a36Sopenharmony_ci	  vpermxor 13, 13, 1, 25
25762306a36Sopenharmony_ci	  vpermxor 14, 14, 2, 25
25862306a36Sopenharmony_ci	  vpermxor 15, 15, 3, 25
25962306a36Sopenharmony_ci	  vpermxor 28, 28, 16, 25
26062306a36Sopenharmony_ci	  vpermxor 29, 29, 17, 25
26162306a36Sopenharmony_ci	  vpermxor 30, 30, 18, 25
26262306a36Sopenharmony_ci	  vpermxor 31, 31, 19, 25
26362306a36Sopenharmony_ci	xxlor	32+25, 0, 0
26462306a36Sopenharmony_ci	vadduwm 8, 8, 12
26562306a36Sopenharmony_ci	vadduwm 9, 9, 13
26662306a36Sopenharmony_ci	vadduwm 10, 10, 14
26762306a36Sopenharmony_ci	vadduwm 11, 11, 15
26862306a36Sopenharmony_ci	  vadduwm 24, 24, 28
26962306a36Sopenharmony_ci	  vadduwm 25, 25, 29
27062306a36Sopenharmony_ci	  vadduwm 26, 26, 30
27162306a36Sopenharmony_ci	  vadduwm 27, 27, 31
27262306a36Sopenharmony_ci	xxlor	0, 32+28, 32+28
27362306a36Sopenharmony_ci	xxlor	32+28, 23, 23
27462306a36Sopenharmony_ci	vxor 4, 4, 8
27562306a36Sopenharmony_ci	vxor 5, 5, 9
27662306a36Sopenharmony_ci	vxor 6, 6, 10
27762306a36Sopenharmony_ci	vxor 7, 7, 11
27862306a36Sopenharmony_ci	  vxor 20, 20, 24
27962306a36Sopenharmony_ci	  vxor 21, 21, 25
28062306a36Sopenharmony_ci	  vxor 22, 22, 26
28162306a36Sopenharmony_ci	  vxor 23, 23, 27
28262306a36Sopenharmony_ci	vrlw 4, 4, 28  #
28362306a36Sopenharmony_ci	vrlw 5, 5, 28
28462306a36Sopenharmony_ci	vrlw 6, 6, 28
28562306a36Sopenharmony_ci	vrlw 7, 7, 28
28662306a36Sopenharmony_ci	  vrlw 20, 20, 28  #
28762306a36Sopenharmony_ci	  vrlw 21, 21, 28
28862306a36Sopenharmony_ci	  vrlw 22, 22, 28
28962306a36Sopenharmony_ci	  vrlw 23, 23, 28
29062306a36Sopenharmony_ci	xxlor	32+28, 0, 0
29162306a36Sopenharmony_ci
29262306a36Sopenharmony_ci	# QR(v0, v5, v10, v15, v1, v6, v11, v12, v2, v7,  v8, v13, v3, v4,  v9, v14)
29362306a36Sopenharmony_ci	xxlor	0, 32+25, 32+25
29462306a36Sopenharmony_ci	xxlor	32+25, 20, 20
29562306a36Sopenharmony_ci	vadduwm 0, 0, 5
29662306a36Sopenharmony_ci	vadduwm 1, 1, 6
29762306a36Sopenharmony_ci	vadduwm 2, 2, 7
29862306a36Sopenharmony_ci	vadduwm 3, 3, 4
29962306a36Sopenharmony_ci	  vadduwm 16, 16, 21
30062306a36Sopenharmony_ci	  vadduwm 17, 17, 22
30162306a36Sopenharmony_ci	  vadduwm 18, 18, 23
30262306a36Sopenharmony_ci	  vadduwm 19, 19, 20
30362306a36Sopenharmony_ci
30462306a36Sopenharmony_ci	  vpermxor 15, 15, 0, 25
30562306a36Sopenharmony_ci	  vpermxor 12, 12, 1, 25
30662306a36Sopenharmony_ci	  vpermxor 13, 13, 2, 25
30762306a36Sopenharmony_ci	  vpermxor 14, 14, 3, 25
30862306a36Sopenharmony_ci	  vpermxor 31, 31, 16, 25
30962306a36Sopenharmony_ci	  vpermxor 28, 28, 17, 25
31062306a36Sopenharmony_ci	  vpermxor 29, 29, 18, 25
31162306a36Sopenharmony_ci	  vpermxor 30, 30, 19, 25
31262306a36Sopenharmony_ci
31362306a36Sopenharmony_ci	xxlor	32+25, 0, 0
31462306a36Sopenharmony_ci	vadduwm 10, 10, 15
31562306a36Sopenharmony_ci	vadduwm 11, 11, 12
31662306a36Sopenharmony_ci	vadduwm 8, 8, 13
31762306a36Sopenharmony_ci	vadduwm 9, 9, 14
31862306a36Sopenharmony_ci	  vadduwm 26, 26, 31
31962306a36Sopenharmony_ci	  vadduwm 27, 27, 28
32062306a36Sopenharmony_ci	  vadduwm 24, 24, 29
32162306a36Sopenharmony_ci	  vadduwm 25, 25, 30
32262306a36Sopenharmony_ci	vxor 5, 5, 10
32362306a36Sopenharmony_ci	vxor 6, 6, 11
32462306a36Sopenharmony_ci	vxor 7, 7, 8
32562306a36Sopenharmony_ci	vxor 4, 4, 9
32662306a36Sopenharmony_ci	  vxor 21, 21, 26
32762306a36Sopenharmony_ci	  vxor 22, 22, 27
32862306a36Sopenharmony_ci	  vxor 23, 23, 24
32962306a36Sopenharmony_ci	  vxor 20, 20, 25
33062306a36Sopenharmony_ci
33162306a36Sopenharmony_ci	xxlor	0, 32+25, 32+25
33262306a36Sopenharmony_ci	xxlor	32+25, 21, 21
33362306a36Sopenharmony_ci	vrlw 5, 5, 25
33462306a36Sopenharmony_ci	vrlw 6, 6, 25
33562306a36Sopenharmony_ci	vrlw 7, 7, 25
33662306a36Sopenharmony_ci	vrlw 4, 4, 25
33762306a36Sopenharmony_ci	  vrlw 21, 21, 25
33862306a36Sopenharmony_ci	  vrlw 22, 22, 25
33962306a36Sopenharmony_ci	  vrlw 23, 23, 25
34062306a36Sopenharmony_ci	  vrlw 20, 20, 25
34162306a36Sopenharmony_ci	xxlor	32+25, 0, 0
34262306a36Sopenharmony_ci
34362306a36Sopenharmony_ci	vadduwm 0, 0, 5
34462306a36Sopenharmony_ci	vadduwm 1, 1, 6
34562306a36Sopenharmony_ci	vadduwm 2, 2, 7
34662306a36Sopenharmony_ci	vadduwm 3, 3, 4
34762306a36Sopenharmony_ci	  vadduwm 16, 16, 21
34862306a36Sopenharmony_ci	  vadduwm 17, 17, 22
34962306a36Sopenharmony_ci	  vadduwm 18, 18, 23
35062306a36Sopenharmony_ci	  vadduwm 19, 19, 20
35162306a36Sopenharmony_ci
35262306a36Sopenharmony_ci	xxlor	0, 32+25, 32+25
35362306a36Sopenharmony_ci	xxlor	32+25, 22, 22
35462306a36Sopenharmony_ci	  vpermxor 15, 15, 0, 25
35562306a36Sopenharmony_ci	  vpermxor 12, 12, 1, 25
35662306a36Sopenharmony_ci	  vpermxor 13, 13, 2, 25
35762306a36Sopenharmony_ci	  vpermxor 14, 14, 3, 25
35862306a36Sopenharmony_ci	  vpermxor 31, 31, 16, 25
35962306a36Sopenharmony_ci	  vpermxor 28, 28, 17, 25
36062306a36Sopenharmony_ci	  vpermxor 29, 29, 18, 25
36162306a36Sopenharmony_ci	  vpermxor 30, 30, 19, 25
36262306a36Sopenharmony_ci	xxlor	32+25, 0, 0
36362306a36Sopenharmony_ci
36462306a36Sopenharmony_ci	vadduwm 10, 10, 15
36562306a36Sopenharmony_ci	vadduwm 11, 11, 12
36662306a36Sopenharmony_ci	vadduwm 8, 8, 13
36762306a36Sopenharmony_ci	vadduwm 9, 9, 14
36862306a36Sopenharmony_ci	  vadduwm 26, 26, 31
36962306a36Sopenharmony_ci	  vadduwm 27, 27, 28
37062306a36Sopenharmony_ci	  vadduwm 24, 24, 29
37162306a36Sopenharmony_ci	  vadduwm 25, 25, 30
37262306a36Sopenharmony_ci
37362306a36Sopenharmony_ci	xxlor	0, 32+28, 32+28
37462306a36Sopenharmony_ci	xxlor	32+28, 23, 23
37562306a36Sopenharmony_ci	vxor 5, 5, 10
37662306a36Sopenharmony_ci	vxor 6, 6, 11
37762306a36Sopenharmony_ci	vxor 7, 7, 8
37862306a36Sopenharmony_ci	vxor 4, 4, 9
37962306a36Sopenharmony_ci	  vxor 21, 21, 26
38062306a36Sopenharmony_ci	  vxor 22, 22, 27
38162306a36Sopenharmony_ci	  vxor 23, 23, 24
38262306a36Sopenharmony_ci	  vxor 20, 20, 25
38362306a36Sopenharmony_ci	vrlw 5, 5, 28
38462306a36Sopenharmony_ci	vrlw 6, 6, 28
38562306a36Sopenharmony_ci	vrlw 7, 7, 28
38662306a36Sopenharmony_ci	vrlw 4, 4, 28
38762306a36Sopenharmony_ci	  vrlw 21, 21, 28
38862306a36Sopenharmony_ci	  vrlw 22, 22, 28
38962306a36Sopenharmony_ci	  vrlw 23, 23, 28
39062306a36Sopenharmony_ci	  vrlw 20, 20, 28
39162306a36Sopenharmony_ci	xxlor	32+28, 0, 0
39262306a36Sopenharmony_ci.endm
39362306a36Sopenharmony_ci
39462306a36Sopenharmony_ci.macro QT_loop_4x
39562306a36Sopenharmony_ci	# QR(v0, v4,  v8, v12, v1, v5,  v9, v13, v2, v6, v10, v14, v3, v7, v11, v15)
39662306a36Sopenharmony_ci	vadduwm 0, 0, 4
39762306a36Sopenharmony_ci	vadduwm 1, 1, 5
39862306a36Sopenharmony_ci	vadduwm 2, 2, 6
39962306a36Sopenharmony_ci	vadduwm 3, 3, 7
40062306a36Sopenharmony_ci	  vpermxor 12, 12, 0, 20
40162306a36Sopenharmony_ci	  vpermxor 13, 13, 1, 20
40262306a36Sopenharmony_ci	  vpermxor 14, 14, 2, 20
40362306a36Sopenharmony_ci	  vpermxor 15, 15, 3, 20
40462306a36Sopenharmony_ci	vadduwm 8, 8, 12
40562306a36Sopenharmony_ci	vadduwm 9, 9, 13
40662306a36Sopenharmony_ci	vadduwm 10, 10, 14
40762306a36Sopenharmony_ci	vadduwm 11, 11, 15
40862306a36Sopenharmony_ci	vxor 4, 4, 8
40962306a36Sopenharmony_ci	vxor 5, 5, 9
41062306a36Sopenharmony_ci	vxor 6, 6, 10
41162306a36Sopenharmony_ci	vxor 7, 7, 11
41262306a36Sopenharmony_ci	vrlw 4, 4, 21
41362306a36Sopenharmony_ci	vrlw 5, 5, 21
41462306a36Sopenharmony_ci	vrlw 6, 6, 21
41562306a36Sopenharmony_ci	vrlw 7, 7, 21
41662306a36Sopenharmony_ci	vadduwm 0, 0, 4
41762306a36Sopenharmony_ci	vadduwm 1, 1, 5
41862306a36Sopenharmony_ci	vadduwm 2, 2, 6
41962306a36Sopenharmony_ci	vadduwm 3, 3, 7
42062306a36Sopenharmony_ci	  vpermxor 12, 12, 0, 22
42162306a36Sopenharmony_ci	  vpermxor 13, 13, 1, 22
42262306a36Sopenharmony_ci	  vpermxor 14, 14, 2, 22
42362306a36Sopenharmony_ci	  vpermxor 15, 15, 3, 22
42462306a36Sopenharmony_ci	vadduwm 8, 8, 12
42562306a36Sopenharmony_ci	vadduwm 9, 9, 13
42662306a36Sopenharmony_ci	vadduwm 10, 10, 14
42762306a36Sopenharmony_ci	vadduwm 11, 11, 15
42862306a36Sopenharmony_ci	vxor 4, 4, 8
42962306a36Sopenharmony_ci	vxor 5, 5, 9
43062306a36Sopenharmony_ci	vxor 6, 6, 10
43162306a36Sopenharmony_ci	vxor 7, 7, 11
43262306a36Sopenharmony_ci	vrlw 4, 4, 23
43362306a36Sopenharmony_ci	vrlw 5, 5, 23
43462306a36Sopenharmony_ci	vrlw 6, 6, 23
43562306a36Sopenharmony_ci	vrlw 7, 7, 23
43662306a36Sopenharmony_ci
43762306a36Sopenharmony_ci	# QR(v0, v5, v10, v15, v1, v6, v11, v12, v2, v7,  v8, v13, v3, v4,  v9, v14)
43862306a36Sopenharmony_ci	vadduwm 0, 0, 5
43962306a36Sopenharmony_ci	vadduwm 1, 1, 6
44062306a36Sopenharmony_ci	vadduwm 2, 2, 7
44162306a36Sopenharmony_ci	vadduwm 3, 3, 4
44262306a36Sopenharmony_ci	  vpermxor 15, 15, 0, 20
44362306a36Sopenharmony_ci	  vpermxor 12, 12, 1, 20
44462306a36Sopenharmony_ci	  vpermxor 13, 13, 2, 20
44562306a36Sopenharmony_ci	  vpermxor 14, 14, 3, 20
44662306a36Sopenharmony_ci	vadduwm 10, 10, 15
44762306a36Sopenharmony_ci	vadduwm 11, 11, 12
44862306a36Sopenharmony_ci	vadduwm 8, 8, 13
44962306a36Sopenharmony_ci	vadduwm 9, 9, 14
45062306a36Sopenharmony_ci	vxor 5, 5, 10
45162306a36Sopenharmony_ci	vxor 6, 6, 11
45262306a36Sopenharmony_ci	vxor 7, 7, 8
45362306a36Sopenharmony_ci	vxor 4, 4, 9
45462306a36Sopenharmony_ci	vrlw 5, 5, 21
45562306a36Sopenharmony_ci	vrlw 6, 6, 21
45662306a36Sopenharmony_ci	vrlw 7, 7, 21
45762306a36Sopenharmony_ci	vrlw 4, 4, 21
45862306a36Sopenharmony_ci	vadduwm 0, 0, 5
45962306a36Sopenharmony_ci	vadduwm 1, 1, 6
46062306a36Sopenharmony_ci	vadduwm 2, 2, 7
46162306a36Sopenharmony_ci	vadduwm 3, 3, 4
46262306a36Sopenharmony_ci	  vpermxor 15, 15, 0, 22
46362306a36Sopenharmony_ci	  vpermxor 12, 12, 1, 22
46462306a36Sopenharmony_ci	  vpermxor 13, 13, 2, 22
46562306a36Sopenharmony_ci	  vpermxor 14, 14, 3, 22
46662306a36Sopenharmony_ci	vadduwm 10, 10, 15
46762306a36Sopenharmony_ci	vadduwm 11, 11, 12
46862306a36Sopenharmony_ci	vadduwm 8, 8, 13
46962306a36Sopenharmony_ci	vadduwm 9, 9, 14
47062306a36Sopenharmony_ci	vxor 5, 5, 10
47162306a36Sopenharmony_ci	vxor 6, 6, 11
47262306a36Sopenharmony_ci	vxor 7, 7, 8
47362306a36Sopenharmony_ci	vxor 4, 4, 9
47462306a36Sopenharmony_ci	vrlw 5, 5, 23
47562306a36Sopenharmony_ci	vrlw 6, 6, 23
47662306a36Sopenharmony_ci	vrlw 7, 7, 23
47762306a36Sopenharmony_ci	vrlw 4, 4, 23
47862306a36Sopenharmony_ci.endm
47962306a36Sopenharmony_ci
48062306a36Sopenharmony_ci# Transpose
48162306a36Sopenharmony_ci.macro TP_4x a0 a1 a2 a3
48262306a36Sopenharmony_ci	xxmrghw  10, 32+\a0, 32+\a1	# a0, a1, b0, b1
48362306a36Sopenharmony_ci	xxmrghw  11, 32+\a2, 32+\a3	# a2, a3, b2, b3
48462306a36Sopenharmony_ci	xxmrglw  12, 32+\a0, 32+\a1	# c0, c1, d0, d1
48562306a36Sopenharmony_ci	xxmrglw  13, 32+\a2, 32+\a3	# c2, c3, d2, d3
48662306a36Sopenharmony_ci	xxpermdi	32+\a0, 10, 11, 0	# a0, a1, a2, a3
48762306a36Sopenharmony_ci	xxpermdi	32+\a1, 10, 11, 3	# b0, b1, b2, b3
48862306a36Sopenharmony_ci	xxpermdi	32+\a2, 12, 13, 0	# c0, c1, c2, c3
48962306a36Sopenharmony_ci	xxpermdi	32+\a3, 12, 13, 3	# d0, d1, d2, d3
49062306a36Sopenharmony_ci.endm
49162306a36Sopenharmony_ci
49262306a36Sopenharmony_ci# key stream = working state + state
49362306a36Sopenharmony_ci.macro Add_state S
49462306a36Sopenharmony_ci	vadduwm \S+0, \S+0, 16-\S
49562306a36Sopenharmony_ci	vadduwm \S+4, \S+4, 17-\S
49662306a36Sopenharmony_ci	vadduwm \S+8, \S+8, 18-\S
49762306a36Sopenharmony_ci	vadduwm \S+12, \S+12, 19-\S
49862306a36Sopenharmony_ci
49962306a36Sopenharmony_ci	vadduwm \S+1, \S+1, 16-\S
50062306a36Sopenharmony_ci	vadduwm \S+5, \S+5, 17-\S
50162306a36Sopenharmony_ci	vadduwm \S+9, \S+9, 18-\S
50262306a36Sopenharmony_ci	vadduwm \S+13, \S+13, 19-\S
50362306a36Sopenharmony_ci
50462306a36Sopenharmony_ci	vadduwm \S+2, \S+2, 16-\S
50562306a36Sopenharmony_ci	vadduwm \S+6, \S+6, 17-\S
50662306a36Sopenharmony_ci	vadduwm \S+10, \S+10, 18-\S
50762306a36Sopenharmony_ci	vadduwm \S+14, \S+14, 19-\S
50862306a36Sopenharmony_ci
50962306a36Sopenharmony_ci	vadduwm	\S+3, \S+3, 16-\S
51062306a36Sopenharmony_ci	vadduwm	\S+7, \S+7, 17-\S
51162306a36Sopenharmony_ci	vadduwm	\S+11, \S+11, 18-\S
51262306a36Sopenharmony_ci	vadduwm	\S+15, \S+15, 19-\S
51362306a36Sopenharmony_ci.endm
51462306a36Sopenharmony_ci
51562306a36Sopenharmony_ci#
51662306a36Sopenharmony_ci# write 256 bytes
51762306a36Sopenharmony_ci#
51862306a36Sopenharmony_ci.macro Write_256 S
51962306a36Sopenharmony_ci	add 9, 14, 5
52062306a36Sopenharmony_ci	add 16, 14, 4
52162306a36Sopenharmony_ci	lxvw4x 0, 0, 9
52262306a36Sopenharmony_ci	lxvw4x 1, 17, 9
52362306a36Sopenharmony_ci	lxvw4x 2, 18, 9
52462306a36Sopenharmony_ci	lxvw4x 3, 19, 9
52562306a36Sopenharmony_ci	lxvw4x 4, 20, 9
52662306a36Sopenharmony_ci	lxvw4x 5, 21, 9
52762306a36Sopenharmony_ci	lxvw4x 6, 22, 9
52862306a36Sopenharmony_ci	lxvw4x 7, 23, 9
52962306a36Sopenharmony_ci	lxvw4x 8, 24, 9
53062306a36Sopenharmony_ci	lxvw4x 9, 25, 9
53162306a36Sopenharmony_ci	lxvw4x 10, 26, 9
53262306a36Sopenharmony_ci	lxvw4x 11, 27, 9
53362306a36Sopenharmony_ci	lxvw4x 12, 28, 9
53462306a36Sopenharmony_ci	lxvw4x 13, 29, 9
53562306a36Sopenharmony_ci	lxvw4x 14, 30, 9
53662306a36Sopenharmony_ci	lxvw4x 15, 31, 9
53762306a36Sopenharmony_ci
53862306a36Sopenharmony_ci	xxlxor \S+32, \S+32, 0
53962306a36Sopenharmony_ci	xxlxor \S+36, \S+36, 1
54062306a36Sopenharmony_ci	xxlxor \S+40, \S+40, 2
54162306a36Sopenharmony_ci	xxlxor \S+44, \S+44, 3
54262306a36Sopenharmony_ci	xxlxor \S+33, \S+33, 4
54362306a36Sopenharmony_ci	xxlxor \S+37, \S+37, 5
54462306a36Sopenharmony_ci	xxlxor \S+41, \S+41, 6
54562306a36Sopenharmony_ci	xxlxor \S+45, \S+45, 7
54662306a36Sopenharmony_ci	xxlxor \S+34, \S+34, 8
54762306a36Sopenharmony_ci	xxlxor \S+38, \S+38, 9
54862306a36Sopenharmony_ci	xxlxor \S+42, \S+42, 10
54962306a36Sopenharmony_ci	xxlxor \S+46, \S+46, 11
55062306a36Sopenharmony_ci	xxlxor \S+35, \S+35, 12
55162306a36Sopenharmony_ci	xxlxor \S+39, \S+39, 13
55262306a36Sopenharmony_ci	xxlxor \S+43, \S+43, 14
55362306a36Sopenharmony_ci	xxlxor \S+47, \S+47, 15
55462306a36Sopenharmony_ci
55562306a36Sopenharmony_ci	stxvw4x \S+32, 0, 16
55662306a36Sopenharmony_ci	stxvw4x \S+36, 17, 16
55762306a36Sopenharmony_ci	stxvw4x \S+40, 18, 16
55862306a36Sopenharmony_ci	stxvw4x \S+44, 19, 16
55962306a36Sopenharmony_ci
56062306a36Sopenharmony_ci	stxvw4x \S+33, 20, 16
56162306a36Sopenharmony_ci	stxvw4x \S+37, 21, 16
56262306a36Sopenharmony_ci	stxvw4x \S+41, 22, 16
56362306a36Sopenharmony_ci	stxvw4x \S+45, 23, 16
56462306a36Sopenharmony_ci
56562306a36Sopenharmony_ci	stxvw4x \S+34, 24, 16
56662306a36Sopenharmony_ci	stxvw4x \S+38, 25, 16
56762306a36Sopenharmony_ci	stxvw4x \S+42, 26, 16
56862306a36Sopenharmony_ci	stxvw4x \S+46, 27, 16
56962306a36Sopenharmony_ci
57062306a36Sopenharmony_ci	stxvw4x \S+35, 28, 16
57162306a36Sopenharmony_ci	stxvw4x \S+39, 29, 16
57262306a36Sopenharmony_ci	stxvw4x \S+43, 30, 16
57362306a36Sopenharmony_ci	stxvw4x \S+47, 31, 16
57462306a36Sopenharmony_ci
57562306a36Sopenharmony_ci.endm
57662306a36Sopenharmony_ci
57762306a36Sopenharmony_ci#
57862306a36Sopenharmony_ci# chacha20_p10le_8x(u32 *state, byte *dst, const byte *src, size_t len, int nrounds);
57962306a36Sopenharmony_ci#
58062306a36Sopenharmony_ciSYM_FUNC_START(chacha_p10le_8x)
58162306a36Sopenharmony_ci.align 5
58262306a36Sopenharmony_ci	cmpdi	6, 0
58362306a36Sopenharmony_ci	ble	Out_no_chacha
58462306a36Sopenharmony_ci
58562306a36Sopenharmony_ci	SAVE_REGS
58662306a36Sopenharmony_ci
58762306a36Sopenharmony_ci	# r17 - r31 mainly for Write_256 macro.
58862306a36Sopenharmony_ci	li	17, 16
58962306a36Sopenharmony_ci	li	18, 32
59062306a36Sopenharmony_ci	li	19, 48
59162306a36Sopenharmony_ci	li	20, 64
59262306a36Sopenharmony_ci	li	21, 80
59362306a36Sopenharmony_ci	li	22, 96
59462306a36Sopenharmony_ci	li	23, 112
59562306a36Sopenharmony_ci	li	24, 128
59662306a36Sopenharmony_ci	li	25, 144
59762306a36Sopenharmony_ci	li	26, 160
59862306a36Sopenharmony_ci	li	27, 176
59962306a36Sopenharmony_ci	li	28, 192
60062306a36Sopenharmony_ci	li	29, 208
60162306a36Sopenharmony_ci	li	30, 224
60262306a36Sopenharmony_ci	li	31, 240
60362306a36Sopenharmony_ci
60462306a36Sopenharmony_ci	mr 15, 6			# len
60562306a36Sopenharmony_ci	li 14, 0			# offset to inp and outp
60662306a36Sopenharmony_ci
60762306a36Sopenharmony_ci        lxvw4x	48, 0, 3		#  vr16, constants
60862306a36Sopenharmony_ci	lxvw4x	49, 17, 3		#  vr17, key 1
60962306a36Sopenharmony_ci	lxvw4x	50, 18, 3		#  vr18, key 2
61062306a36Sopenharmony_ci	lxvw4x	51, 19, 3		#  vr19, counter, nonce
61162306a36Sopenharmony_ci
61262306a36Sopenharmony_ci	# create (0, 1, 2, 3) counters
61362306a36Sopenharmony_ci	vspltisw 0, 0
61462306a36Sopenharmony_ci	vspltisw 1, 1
61562306a36Sopenharmony_ci	vspltisw 2, 2
61662306a36Sopenharmony_ci	vspltisw 3, 3
61762306a36Sopenharmony_ci	vmrghw	4, 0, 1
61862306a36Sopenharmony_ci	vmrglw	5, 2, 3
61962306a36Sopenharmony_ci	vsldoi	30, 4, 5, 8		# vr30 counter, 4 (0, 1, 2, 3)
62062306a36Sopenharmony_ci
62162306a36Sopenharmony_ci	vspltisw 21, 12
62262306a36Sopenharmony_ci	vspltisw 23, 7
62362306a36Sopenharmony_ci
62462306a36Sopenharmony_ci	addis	11, 2, permx@toc@ha
62562306a36Sopenharmony_ci	addi	11, 11, permx@toc@l
62662306a36Sopenharmony_ci	lxvw4x	32+20, 0, 11
62762306a36Sopenharmony_ci	lxvw4x	32+22, 17, 11
62862306a36Sopenharmony_ci
62962306a36Sopenharmony_ci	sradi	8, 7, 1
63062306a36Sopenharmony_ci
63162306a36Sopenharmony_ci	mtctr 8
63262306a36Sopenharmony_ci
63362306a36Sopenharmony_ci	# save constants to vsx
63462306a36Sopenharmony_ci	xxlor	16, 48, 48
63562306a36Sopenharmony_ci	xxlor	17, 49, 49
63662306a36Sopenharmony_ci	xxlor	18, 50, 50
63762306a36Sopenharmony_ci	xxlor	19, 51, 51
63862306a36Sopenharmony_ci
63962306a36Sopenharmony_ci	vspltisw 25, 4
64062306a36Sopenharmony_ci	vspltisw 26, 8
64162306a36Sopenharmony_ci
64262306a36Sopenharmony_ci	xxlor	25, 32+26, 32+26
64362306a36Sopenharmony_ci	xxlor	24, 32+25, 32+25
64462306a36Sopenharmony_ci
64562306a36Sopenharmony_ci	vadduwm	31, 30, 25		# counter = (0, 1, 2, 3) + (4, 4, 4, 4)
64662306a36Sopenharmony_ci	xxlor	30, 32+30, 32+30
64762306a36Sopenharmony_ci	xxlor	31, 32+31, 32+31
64862306a36Sopenharmony_ci
64962306a36Sopenharmony_ci	xxlor	20, 32+20, 32+20
65062306a36Sopenharmony_ci	xxlor	21, 32+21, 32+21
65162306a36Sopenharmony_ci	xxlor	22, 32+22, 32+22
65262306a36Sopenharmony_ci	xxlor	23, 32+23, 32+23
65362306a36Sopenharmony_ci
65462306a36Sopenharmony_ci	cmpdi	6, 512
65562306a36Sopenharmony_ci	blt	Loop_last
65662306a36Sopenharmony_ci
65762306a36Sopenharmony_ciLoop_8x:
65862306a36Sopenharmony_ci	xxspltw  32+0, 16, 0
65962306a36Sopenharmony_ci	xxspltw  32+1, 16, 1
66062306a36Sopenharmony_ci	xxspltw  32+2, 16, 2
66162306a36Sopenharmony_ci	xxspltw  32+3, 16, 3
66262306a36Sopenharmony_ci
66362306a36Sopenharmony_ci	xxspltw  32+4, 17, 0
66462306a36Sopenharmony_ci	xxspltw  32+5, 17, 1
66562306a36Sopenharmony_ci	xxspltw  32+6, 17, 2
66662306a36Sopenharmony_ci	xxspltw  32+7, 17, 3
66762306a36Sopenharmony_ci	xxspltw  32+8, 18, 0
66862306a36Sopenharmony_ci	xxspltw  32+9, 18, 1
66962306a36Sopenharmony_ci	xxspltw  32+10, 18, 2
67062306a36Sopenharmony_ci	xxspltw  32+11, 18, 3
67162306a36Sopenharmony_ci	xxspltw  32+12, 19, 0
67262306a36Sopenharmony_ci	xxspltw  32+13, 19, 1
67362306a36Sopenharmony_ci	xxspltw  32+14, 19, 2
67462306a36Sopenharmony_ci	xxspltw  32+15, 19, 3
67562306a36Sopenharmony_ci	vadduwm	12, 12, 30	# increase counter
67662306a36Sopenharmony_ci
67762306a36Sopenharmony_ci	xxspltw  32+16, 16, 0
67862306a36Sopenharmony_ci	xxspltw  32+17, 16, 1
67962306a36Sopenharmony_ci	xxspltw  32+18, 16, 2
68062306a36Sopenharmony_ci	xxspltw  32+19, 16, 3
68162306a36Sopenharmony_ci
68262306a36Sopenharmony_ci	xxspltw  32+20, 17, 0
68362306a36Sopenharmony_ci	xxspltw  32+21, 17, 1
68462306a36Sopenharmony_ci	xxspltw  32+22, 17, 2
68562306a36Sopenharmony_ci	xxspltw  32+23, 17, 3
68662306a36Sopenharmony_ci	xxspltw  32+24, 18, 0
68762306a36Sopenharmony_ci	xxspltw  32+25, 18, 1
68862306a36Sopenharmony_ci	xxspltw  32+26, 18, 2
68962306a36Sopenharmony_ci	xxspltw  32+27, 18, 3
69062306a36Sopenharmony_ci	xxspltw  32+28, 19, 0
69162306a36Sopenharmony_ci	xxspltw  32+29, 19, 1
69262306a36Sopenharmony_ci	vadduwm	28, 28, 31	# increase counter
69362306a36Sopenharmony_ci	xxspltw  32+30, 19, 2
69462306a36Sopenharmony_ci	xxspltw  32+31, 19, 3
69562306a36Sopenharmony_ci
69662306a36Sopenharmony_ci.align 5
69762306a36Sopenharmony_ciquarter_loop_8x:
69862306a36Sopenharmony_ci	QT_loop_8x
69962306a36Sopenharmony_ci
70062306a36Sopenharmony_ci	bdnz	quarter_loop_8x
70162306a36Sopenharmony_ci
70262306a36Sopenharmony_ci	xxlor	0, 32+30, 32+30
70362306a36Sopenharmony_ci	xxlor	32+30, 30, 30
70462306a36Sopenharmony_ci	vadduwm	12, 12, 30
70562306a36Sopenharmony_ci	xxlor	32+30, 0, 0
70662306a36Sopenharmony_ci	TP_4x 0, 1, 2, 3
70762306a36Sopenharmony_ci	TP_4x 4, 5, 6, 7
70862306a36Sopenharmony_ci	TP_4x 8, 9, 10, 11
70962306a36Sopenharmony_ci	TP_4x 12, 13, 14, 15
71062306a36Sopenharmony_ci
71162306a36Sopenharmony_ci	xxlor	0, 48, 48
71262306a36Sopenharmony_ci	xxlor	1, 49, 49
71362306a36Sopenharmony_ci	xxlor	2, 50, 50
71462306a36Sopenharmony_ci	xxlor	3, 51, 51
71562306a36Sopenharmony_ci	xxlor	48, 16, 16
71662306a36Sopenharmony_ci	xxlor	49, 17, 17
71762306a36Sopenharmony_ci	xxlor	50, 18, 18
71862306a36Sopenharmony_ci	xxlor	51, 19, 19
71962306a36Sopenharmony_ci	Add_state 0
72062306a36Sopenharmony_ci	xxlor	48, 0, 0
72162306a36Sopenharmony_ci	xxlor	49, 1, 1
72262306a36Sopenharmony_ci	xxlor	50, 2, 2
72362306a36Sopenharmony_ci	xxlor	51, 3, 3
72462306a36Sopenharmony_ci	Write_256 0
72562306a36Sopenharmony_ci	addi	14, 14, 256	# offset +=256
72662306a36Sopenharmony_ci	addi	15, 15, -256	# len -=256
72762306a36Sopenharmony_ci
72862306a36Sopenharmony_ci	xxlor	5, 32+31, 32+31
72962306a36Sopenharmony_ci	xxlor	32+31, 31, 31
73062306a36Sopenharmony_ci	vadduwm	28, 28, 31
73162306a36Sopenharmony_ci	xxlor	32+31, 5, 5
73262306a36Sopenharmony_ci	TP_4x 16+0, 16+1, 16+2, 16+3
73362306a36Sopenharmony_ci	TP_4x 16+4, 16+5, 16+6, 16+7
73462306a36Sopenharmony_ci	TP_4x 16+8, 16+9, 16+10, 16+11
73562306a36Sopenharmony_ci	TP_4x 16+12, 16+13, 16+14, 16+15
73662306a36Sopenharmony_ci
73762306a36Sopenharmony_ci	xxlor	32, 16, 16
73862306a36Sopenharmony_ci	xxlor	33, 17, 17
73962306a36Sopenharmony_ci	xxlor	34, 18, 18
74062306a36Sopenharmony_ci	xxlor	35, 19, 19
74162306a36Sopenharmony_ci	Add_state 16
74262306a36Sopenharmony_ci	Write_256 16
74362306a36Sopenharmony_ci	addi	14, 14, 256	# offset +=256
74462306a36Sopenharmony_ci	addi	15, 15, -256	# len +=256
74562306a36Sopenharmony_ci
74662306a36Sopenharmony_ci	xxlor	32+24, 24, 24
74762306a36Sopenharmony_ci	xxlor	32+25, 25, 25
74862306a36Sopenharmony_ci	xxlor	32+30, 30, 30
74962306a36Sopenharmony_ci	vadduwm	30, 30, 25
75062306a36Sopenharmony_ci	vadduwm	31, 30, 24
75162306a36Sopenharmony_ci	xxlor	30, 32+30, 32+30
75262306a36Sopenharmony_ci	xxlor	31, 32+31, 32+31
75362306a36Sopenharmony_ci
75462306a36Sopenharmony_ci	cmpdi	15, 0
75562306a36Sopenharmony_ci	beq	Out_loop
75662306a36Sopenharmony_ci
75762306a36Sopenharmony_ci	cmpdi	15, 512
75862306a36Sopenharmony_ci	blt	Loop_last
75962306a36Sopenharmony_ci
76062306a36Sopenharmony_ci	mtctr 8
76162306a36Sopenharmony_ci	b Loop_8x
76262306a36Sopenharmony_ci
76362306a36Sopenharmony_ciLoop_last:
76462306a36Sopenharmony_ci        lxvw4x	48, 0, 3		#  vr16, constants
76562306a36Sopenharmony_ci	lxvw4x	49, 17, 3		#  vr17, key 1
76662306a36Sopenharmony_ci	lxvw4x	50, 18, 3		#  vr18, key 2
76762306a36Sopenharmony_ci	lxvw4x	51, 19, 3		#  vr19, counter, nonce
76862306a36Sopenharmony_ci
76962306a36Sopenharmony_ci	vspltisw 21, 12
77062306a36Sopenharmony_ci	vspltisw 23, 7
77162306a36Sopenharmony_ci	addis	11, 2, permx@toc@ha
77262306a36Sopenharmony_ci	addi	11, 11, permx@toc@l
77362306a36Sopenharmony_ci	lxvw4x	32+20, 0, 11
77462306a36Sopenharmony_ci	lxvw4x	32+22, 17, 11
77562306a36Sopenharmony_ci
77662306a36Sopenharmony_ci	sradi	8, 7, 1
77762306a36Sopenharmony_ci	mtctr 8
77862306a36Sopenharmony_ci
77962306a36Sopenharmony_ciLoop_4x:
78062306a36Sopenharmony_ci	vspltw  0, 16, 0
78162306a36Sopenharmony_ci	vspltw  1, 16, 1
78262306a36Sopenharmony_ci	vspltw  2, 16, 2
78362306a36Sopenharmony_ci	vspltw  3, 16, 3
78462306a36Sopenharmony_ci
78562306a36Sopenharmony_ci	vspltw  4, 17, 0
78662306a36Sopenharmony_ci	vspltw  5, 17, 1
78762306a36Sopenharmony_ci	vspltw  6, 17, 2
78862306a36Sopenharmony_ci	vspltw  7, 17, 3
78962306a36Sopenharmony_ci	vspltw  8, 18, 0
79062306a36Sopenharmony_ci	vspltw  9, 18, 1
79162306a36Sopenharmony_ci	vspltw  10, 18, 2
79262306a36Sopenharmony_ci	vspltw  11, 18, 3
79362306a36Sopenharmony_ci	vspltw  12, 19, 0
79462306a36Sopenharmony_ci	vadduwm	12, 12, 30	# increase counter
79562306a36Sopenharmony_ci	vspltw  13, 19, 1
79662306a36Sopenharmony_ci	vspltw  14, 19, 2
79762306a36Sopenharmony_ci	vspltw  15, 19, 3
79862306a36Sopenharmony_ci
79962306a36Sopenharmony_ci.align 5
80062306a36Sopenharmony_ciquarter_loop:
80162306a36Sopenharmony_ci	QT_loop_4x
80262306a36Sopenharmony_ci
80362306a36Sopenharmony_ci	bdnz	quarter_loop
80462306a36Sopenharmony_ci
80562306a36Sopenharmony_ci	vadduwm	12, 12, 30
80662306a36Sopenharmony_ci	TP_4x 0, 1, 2, 3
80762306a36Sopenharmony_ci	TP_4x 4, 5, 6, 7
80862306a36Sopenharmony_ci	TP_4x 8, 9, 10, 11
80962306a36Sopenharmony_ci	TP_4x 12, 13, 14, 15
81062306a36Sopenharmony_ci
81162306a36Sopenharmony_ci	Add_state 0
81262306a36Sopenharmony_ci	Write_256 0
81362306a36Sopenharmony_ci	addi	14, 14, 256	# offset += 256
81462306a36Sopenharmony_ci	addi	15, 15, -256	# len += 256
81562306a36Sopenharmony_ci
81662306a36Sopenharmony_ci	# Update state counter
81762306a36Sopenharmony_ci	vspltisw 25, 4
81862306a36Sopenharmony_ci	vadduwm	30, 30, 25
81962306a36Sopenharmony_ci
82062306a36Sopenharmony_ci	cmpdi	15, 0
82162306a36Sopenharmony_ci	beq	Out_loop
82262306a36Sopenharmony_ci	cmpdi	15, 256
82362306a36Sopenharmony_ci	blt	Out_loop
82462306a36Sopenharmony_ci
82562306a36Sopenharmony_ci	mtctr 8
82662306a36Sopenharmony_ci	b Loop_4x
82762306a36Sopenharmony_ci
82862306a36Sopenharmony_ciOut_loop:
82962306a36Sopenharmony_ci	RESTORE_REGS
83062306a36Sopenharmony_ci	blr
83162306a36Sopenharmony_ci
83262306a36Sopenharmony_ciOut_no_chacha:
83362306a36Sopenharmony_ci	li	3, 0
83462306a36Sopenharmony_ci	blr
83562306a36Sopenharmony_ciSYM_FUNC_END(chacha_p10le_8x)
83662306a36Sopenharmony_ci
83762306a36Sopenharmony_ciSYM_DATA_START_LOCAL(PERMX)
83862306a36Sopenharmony_ci.align 5
83962306a36Sopenharmony_cipermx:
84062306a36Sopenharmony_ci.long 0x22330011, 0x66774455, 0xaabb8899, 0xeeffccdd
84162306a36Sopenharmony_ci.long 0x11223300, 0x55667744, 0x99aabb88, 0xddeeffcc
84262306a36Sopenharmony_ciSYM_DATA_END(PERMX)
843