162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Original implementation written by Andy Polyakov, @dot-asm.
462306a36Sopenharmony_ci * This is an adaptation of the original code for kernel use.
562306a36Sopenharmony_ci *
662306a36Sopenharmony_ci * Copyright (C) 2006-2019 CRYPTOGAMS by <appro@openssl.org>. All Rights Reserved.
762306a36Sopenharmony_ci */
862306a36Sopenharmony_ci
962306a36Sopenharmony_ci#include <linux/linkage.h>
1062306a36Sopenharmony_ci#include <asm/nospec-insn.h>
1162306a36Sopenharmony_ci#include <asm/vx-insn.h>
1262306a36Sopenharmony_ci
1362306a36Sopenharmony_ci#define SP	%r15
1462306a36Sopenharmony_ci#define FRAME	(16 * 8 + 4 * 8)
1562306a36Sopenharmony_ci
1662306a36Sopenharmony_ci	.data
1762306a36Sopenharmony_ci	.balign	32
1862306a36Sopenharmony_ci
1962306a36Sopenharmony_ciSYM_DATA_START_LOCAL(sigma)
2062306a36Sopenharmony_ci	.long	0x61707865,0x3320646e,0x79622d32,0x6b206574	# endian-neutral
2162306a36Sopenharmony_ci	.long	1,0,0,0
2262306a36Sopenharmony_ci	.long	2,0,0,0
2362306a36Sopenharmony_ci	.long	3,0,0,0
2462306a36Sopenharmony_ci	.long	0x03020100,0x07060504,0x0b0a0908,0x0f0e0d0c	# byte swap
2562306a36Sopenharmony_ci
2662306a36Sopenharmony_ci	.long	0,1,2,3
2762306a36Sopenharmony_ci	.long	0x61707865,0x61707865,0x61707865,0x61707865	# smashed sigma
2862306a36Sopenharmony_ci	.long	0x3320646e,0x3320646e,0x3320646e,0x3320646e
2962306a36Sopenharmony_ci	.long	0x79622d32,0x79622d32,0x79622d32,0x79622d32
3062306a36Sopenharmony_ci	.long	0x6b206574,0x6b206574,0x6b206574,0x6b206574
3162306a36Sopenharmony_ciSYM_DATA_END(sigma)
3262306a36Sopenharmony_ci
3362306a36Sopenharmony_ci	.previous
3462306a36Sopenharmony_ci
3562306a36Sopenharmony_ci	GEN_BR_THUNK %r14
3662306a36Sopenharmony_ci
3762306a36Sopenharmony_ci	.text
3862306a36Sopenharmony_ci
3962306a36Sopenharmony_ci#############################################################################
4062306a36Sopenharmony_ci# void chacha20_vx_4x(u8 *out, counst u8 *inp, size_t len,
4162306a36Sopenharmony_ci#		      counst u32 *key, const u32 *counter)
4262306a36Sopenharmony_ci
4362306a36Sopenharmony_ci#define	OUT		%r2
4462306a36Sopenharmony_ci#define	INP		%r3
4562306a36Sopenharmony_ci#define	LEN		%r4
4662306a36Sopenharmony_ci#define	KEY		%r5
4762306a36Sopenharmony_ci#define	COUNTER		%r6
4862306a36Sopenharmony_ci
4962306a36Sopenharmony_ci#define BEPERM		%v31
5062306a36Sopenharmony_ci#define CTR		%v26
5162306a36Sopenharmony_ci
5262306a36Sopenharmony_ci#define K0		%v16
5362306a36Sopenharmony_ci#define K1		%v17
5462306a36Sopenharmony_ci#define K2		%v18
5562306a36Sopenharmony_ci#define K3		%v19
5662306a36Sopenharmony_ci
5762306a36Sopenharmony_ci#define XA0		%v0
5862306a36Sopenharmony_ci#define XA1		%v1
5962306a36Sopenharmony_ci#define XA2		%v2
6062306a36Sopenharmony_ci#define XA3		%v3
6162306a36Sopenharmony_ci
6262306a36Sopenharmony_ci#define XB0		%v4
6362306a36Sopenharmony_ci#define XB1		%v5
6462306a36Sopenharmony_ci#define XB2		%v6
6562306a36Sopenharmony_ci#define XB3		%v7
6662306a36Sopenharmony_ci
6762306a36Sopenharmony_ci#define XC0		%v8
6862306a36Sopenharmony_ci#define XC1		%v9
6962306a36Sopenharmony_ci#define XC2		%v10
7062306a36Sopenharmony_ci#define XC3		%v11
7162306a36Sopenharmony_ci
7262306a36Sopenharmony_ci#define XD0		%v12
7362306a36Sopenharmony_ci#define XD1		%v13
7462306a36Sopenharmony_ci#define XD2		%v14
7562306a36Sopenharmony_ci#define XD3		%v15
7662306a36Sopenharmony_ci
7762306a36Sopenharmony_ci#define XT0		%v27
7862306a36Sopenharmony_ci#define XT1		%v28
7962306a36Sopenharmony_ci#define XT2		%v29
8062306a36Sopenharmony_ci#define XT3		%v30
8162306a36Sopenharmony_ci
8262306a36Sopenharmony_ciSYM_FUNC_START(chacha20_vx_4x)
8362306a36Sopenharmony_ci	stmg	%r6,%r7,6*8(SP)
8462306a36Sopenharmony_ci
8562306a36Sopenharmony_ci	larl	%r7,sigma
8662306a36Sopenharmony_ci	lhi	%r0,10
8762306a36Sopenharmony_ci	lhi	%r1,0
8862306a36Sopenharmony_ci
8962306a36Sopenharmony_ci	VL	K0,0,,%r7		# load sigma
9062306a36Sopenharmony_ci	VL	K1,0,,KEY		# load key
9162306a36Sopenharmony_ci	VL	K2,16,,KEY
9262306a36Sopenharmony_ci	VL	K3,0,,COUNTER		# load counter
9362306a36Sopenharmony_ci
9462306a36Sopenharmony_ci	VL	BEPERM,0x40,,%r7
9562306a36Sopenharmony_ci	VL	CTR,0x50,,%r7
9662306a36Sopenharmony_ci
9762306a36Sopenharmony_ci	VLM	XA0,XA3,0x60,%r7,4	# load [smashed] sigma
9862306a36Sopenharmony_ci
9962306a36Sopenharmony_ci	VREPF	XB0,K1,0		# smash the key
10062306a36Sopenharmony_ci	VREPF	XB1,K1,1
10162306a36Sopenharmony_ci	VREPF	XB2,K1,2
10262306a36Sopenharmony_ci	VREPF	XB3,K1,3
10362306a36Sopenharmony_ci
10462306a36Sopenharmony_ci	VREPF	XD0,K3,0
10562306a36Sopenharmony_ci	VREPF	XD1,K3,1
10662306a36Sopenharmony_ci	VREPF	XD2,K3,2
10762306a36Sopenharmony_ci	VREPF	XD3,K3,3
10862306a36Sopenharmony_ci	VAF	XD0,XD0,CTR
10962306a36Sopenharmony_ci
11062306a36Sopenharmony_ci	VREPF	XC0,K2,0
11162306a36Sopenharmony_ci	VREPF	XC1,K2,1
11262306a36Sopenharmony_ci	VREPF	XC2,K2,2
11362306a36Sopenharmony_ci	VREPF	XC3,K2,3
11462306a36Sopenharmony_ci
11562306a36Sopenharmony_ci.Loop_4x:
11662306a36Sopenharmony_ci	VAF	XA0,XA0,XB0
11762306a36Sopenharmony_ci	VX	XD0,XD0,XA0
11862306a36Sopenharmony_ci	VERLLF	XD0,XD0,16
11962306a36Sopenharmony_ci
12062306a36Sopenharmony_ci	VAF	XA1,XA1,XB1
12162306a36Sopenharmony_ci	VX	XD1,XD1,XA1
12262306a36Sopenharmony_ci	VERLLF	XD1,XD1,16
12362306a36Sopenharmony_ci
12462306a36Sopenharmony_ci	VAF	XA2,XA2,XB2
12562306a36Sopenharmony_ci	VX	XD2,XD2,XA2
12662306a36Sopenharmony_ci	VERLLF	XD2,XD2,16
12762306a36Sopenharmony_ci
12862306a36Sopenharmony_ci	VAF	XA3,XA3,XB3
12962306a36Sopenharmony_ci	VX	XD3,XD3,XA3
13062306a36Sopenharmony_ci	VERLLF	XD3,XD3,16
13162306a36Sopenharmony_ci
13262306a36Sopenharmony_ci	VAF	XC0,XC0,XD0
13362306a36Sopenharmony_ci	VX	XB0,XB0,XC0
13462306a36Sopenharmony_ci	VERLLF	XB0,XB0,12
13562306a36Sopenharmony_ci
13662306a36Sopenharmony_ci	VAF	XC1,XC1,XD1
13762306a36Sopenharmony_ci	VX	XB1,XB1,XC1
13862306a36Sopenharmony_ci	VERLLF	XB1,XB1,12
13962306a36Sopenharmony_ci
14062306a36Sopenharmony_ci	VAF	XC2,XC2,XD2
14162306a36Sopenharmony_ci	VX	XB2,XB2,XC2
14262306a36Sopenharmony_ci	VERLLF	XB2,XB2,12
14362306a36Sopenharmony_ci
14462306a36Sopenharmony_ci	VAF	XC3,XC3,XD3
14562306a36Sopenharmony_ci	VX	XB3,XB3,XC3
14662306a36Sopenharmony_ci	VERLLF	XB3,XB3,12
14762306a36Sopenharmony_ci
14862306a36Sopenharmony_ci	VAF	XA0,XA0,XB0
14962306a36Sopenharmony_ci	VX	XD0,XD0,XA0
15062306a36Sopenharmony_ci	VERLLF	XD0,XD0,8
15162306a36Sopenharmony_ci
15262306a36Sopenharmony_ci	VAF	XA1,XA1,XB1
15362306a36Sopenharmony_ci	VX	XD1,XD1,XA1
15462306a36Sopenharmony_ci	VERLLF	XD1,XD1,8
15562306a36Sopenharmony_ci
15662306a36Sopenharmony_ci	VAF	XA2,XA2,XB2
15762306a36Sopenharmony_ci	VX	XD2,XD2,XA2
15862306a36Sopenharmony_ci	VERLLF	XD2,XD2,8
15962306a36Sopenharmony_ci
16062306a36Sopenharmony_ci	VAF	XA3,XA3,XB3
16162306a36Sopenharmony_ci	VX	XD3,XD3,XA3
16262306a36Sopenharmony_ci	VERLLF	XD3,XD3,8
16362306a36Sopenharmony_ci
16462306a36Sopenharmony_ci	VAF	XC0,XC0,XD0
16562306a36Sopenharmony_ci	VX	XB0,XB0,XC0
16662306a36Sopenharmony_ci	VERLLF	XB0,XB0,7
16762306a36Sopenharmony_ci
16862306a36Sopenharmony_ci	VAF	XC1,XC1,XD1
16962306a36Sopenharmony_ci	VX	XB1,XB1,XC1
17062306a36Sopenharmony_ci	VERLLF	XB1,XB1,7
17162306a36Sopenharmony_ci
17262306a36Sopenharmony_ci	VAF	XC2,XC2,XD2
17362306a36Sopenharmony_ci	VX	XB2,XB2,XC2
17462306a36Sopenharmony_ci	VERLLF	XB2,XB2,7
17562306a36Sopenharmony_ci
17662306a36Sopenharmony_ci	VAF	XC3,XC3,XD3
17762306a36Sopenharmony_ci	VX	XB3,XB3,XC3
17862306a36Sopenharmony_ci	VERLLF	XB3,XB3,7
17962306a36Sopenharmony_ci
18062306a36Sopenharmony_ci	VAF	XA0,XA0,XB1
18162306a36Sopenharmony_ci	VX	XD3,XD3,XA0
18262306a36Sopenharmony_ci	VERLLF	XD3,XD3,16
18362306a36Sopenharmony_ci
18462306a36Sopenharmony_ci	VAF	XA1,XA1,XB2
18562306a36Sopenharmony_ci	VX	XD0,XD0,XA1
18662306a36Sopenharmony_ci	VERLLF	XD0,XD0,16
18762306a36Sopenharmony_ci
18862306a36Sopenharmony_ci	VAF	XA2,XA2,XB3
18962306a36Sopenharmony_ci	VX	XD1,XD1,XA2
19062306a36Sopenharmony_ci	VERLLF	XD1,XD1,16
19162306a36Sopenharmony_ci
19262306a36Sopenharmony_ci	VAF	XA3,XA3,XB0
19362306a36Sopenharmony_ci	VX	XD2,XD2,XA3
19462306a36Sopenharmony_ci	VERLLF	XD2,XD2,16
19562306a36Sopenharmony_ci
19662306a36Sopenharmony_ci	VAF	XC2,XC2,XD3
19762306a36Sopenharmony_ci	VX	XB1,XB1,XC2
19862306a36Sopenharmony_ci	VERLLF	XB1,XB1,12
19962306a36Sopenharmony_ci
20062306a36Sopenharmony_ci	VAF	XC3,XC3,XD0
20162306a36Sopenharmony_ci	VX	XB2,XB2,XC3
20262306a36Sopenharmony_ci	VERLLF	XB2,XB2,12
20362306a36Sopenharmony_ci
20462306a36Sopenharmony_ci	VAF	XC0,XC0,XD1
20562306a36Sopenharmony_ci	VX	XB3,XB3,XC0
20662306a36Sopenharmony_ci	VERLLF	XB3,XB3,12
20762306a36Sopenharmony_ci
20862306a36Sopenharmony_ci	VAF	XC1,XC1,XD2
20962306a36Sopenharmony_ci	VX	XB0,XB0,XC1
21062306a36Sopenharmony_ci	VERLLF	XB0,XB0,12
21162306a36Sopenharmony_ci
21262306a36Sopenharmony_ci	VAF	XA0,XA0,XB1
21362306a36Sopenharmony_ci	VX	XD3,XD3,XA0
21462306a36Sopenharmony_ci	VERLLF	XD3,XD3,8
21562306a36Sopenharmony_ci
21662306a36Sopenharmony_ci	VAF	XA1,XA1,XB2
21762306a36Sopenharmony_ci	VX	XD0,XD0,XA1
21862306a36Sopenharmony_ci	VERLLF	XD0,XD0,8
21962306a36Sopenharmony_ci
22062306a36Sopenharmony_ci	VAF	XA2,XA2,XB3
22162306a36Sopenharmony_ci	VX	XD1,XD1,XA2
22262306a36Sopenharmony_ci	VERLLF	XD1,XD1,8
22362306a36Sopenharmony_ci
22462306a36Sopenharmony_ci	VAF	XA3,XA3,XB0
22562306a36Sopenharmony_ci	VX	XD2,XD2,XA3
22662306a36Sopenharmony_ci	VERLLF	XD2,XD2,8
22762306a36Sopenharmony_ci
22862306a36Sopenharmony_ci	VAF	XC2,XC2,XD3
22962306a36Sopenharmony_ci	VX	XB1,XB1,XC2
23062306a36Sopenharmony_ci	VERLLF	XB1,XB1,7
23162306a36Sopenharmony_ci
23262306a36Sopenharmony_ci	VAF	XC3,XC3,XD0
23362306a36Sopenharmony_ci	VX	XB2,XB2,XC3
23462306a36Sopenharmony_ci	VERLLF	XB2,XB2,7
23562306a36Sopenharmony_ci
23662306a36Sopenharmony_ci	VAF	XC0,XC0,XD1
23762306a36Sopenharmony_ci	VX	XB3,XB3,XC0
23862306a36Sopenharmony_ci	VERLLF	XB3,XB3,7
23962306a36Sopenharmony_ci
24062306a36Sopenharmony_ci	VAF	XC1,XC1,XD2
24162306a36Sopenharmony_ci	VX	XB0,XB0,XC1
24262306a36Sopenharmony_ci	VERLLF	XB0,XB0,7
24362306a36Sopenharmony_ci	brct	%r0,.Loop_4x
24462306a36Sopenharmony_ci
24562306a36Sopenharmony_ci	VAF	XD0,XD0,CTR
24662306a36Sopenharmony_ci
24762306a36Sopenharmony_ci	VMRHF	XT0,XA0,XA1		# transpose data
24862306a36Sopenharmony_ci	VMRHF	XT1,XA2,XA3
24962306a36Sopenharmony_ci	VMRLF	XT2,XA0,XA1
25062306a36Sopenharmony_ci	VMRLF	XT3,XA2,XA3
25162306a36Sopenharmony_ci	VPDI	XA0,XT0,XT1,0b0000
25262306a36Sopenharmony_ci	VPDI	XA1,XT0,XT1,0b0101
25362306a36Sopenharmony_ci	VPDI	XA2,XT2,XT3,0b0000
25462306a36Sopenharmony_ci	VPDI	XA3,XT2,XT3,0b0101
25562306a36Sopenharmony_ci
25662306a36Sopenharmony_ci	VMRHF	XT0,XB0,XB1
25762306a36Sopenharmony_ci	VMRHF	XT1,XB2,XB3
25862306a36Sopenharmony_ci	VMRLF	XT2,XB0,XB1
25962306a36Sopenharmony_ci	VMRLF	XT3,XB2,XB3
26062306a36Sopenharmony_ci	VPDI	XB0,XT0,XT1,0b0000
26162306a36Sopenharmony_ci	VPDI	XB1,XT0,XT1,0b0101
26262306a36Sopenharmony_ci	VPDI	XB2,XT2,XT3,0b0000
26362306a36Sopenharmony_ci	VPDI	XB3,XT2,XT3,0b0101
26462306a36Sopenharmony_ci
26562306a36Sopenharmony_ci	VMRHF	XT0,XC0,XC1
26662306a36Sopenharmony_ci	VMRHF	XT1,XC2,XC3
26762306a36Sopenharmony_ci	VMRLF	XT2,XC0,XC1
26862306a36Sopenharmony_ci	VMRLF	XT3,XC2,XC3
26962306a36Sopenharmony_ci	VPDI	XC0,XT0,XT1,0b0000
27062306a36Sopenharmony_ci	VPDI	XC1,XT0,XT1,0b0101
27162306a36Sopenharmony_ci	VPDI	XC2,XT2,XT3,0b0000
27262306a36Sopenharmony_ci	VPDI	XC3,XT2,XT3,0b0101
27362306a36Sopenharmony_ci
27462306a36Sopenharmony_ci	VMRHF	XT0,XD0,XD1
27562306a36Sopenharmony_ci	VMRHF	XT1,XD2,XD3
27662306a36Sopenharmony_ci	VMRLF	XT2,XD0,XD1
27762306a36Sopenharmony_ci	VMRLF	XT3,XD2,XD3
27862306a36Sopenharmony_ci	VPDI	XD0,XT0,XT1,0b0000
27962306a36Sopenharmony_ci	VPDI	XD1,XT0,XT1,0b0101
28062306a36Sopenharmony_ci	VPDI	XD2,XT2,XT3,0b0000
28162306a36Sopenharmony_ci	VPDI	XD3,XT2,XT3,0b0101
28262306a36Sopenharmony_ci
28362306a36Sopenharmony_ci	VAF	XA0,XA0,K0
28462306a36Sopenharmony_ci	VAF	XB0,XB0,K1
28562306a36Sopenharmony_ci	VAF	XC0,XC0,K2
28662306a36Sopenharmony_ci	VAF	XD0,XD0,K3
28762306a36Sopenharmony_ci
28862306a36Sopenharmony_ci	VPERM	XA0,XA0,XA0,BEPERM
28962306a36Sopenharmony_ci	VPERM	XB0,XB0,XB0,BEPERM
29062306a36Sopenharmony_ci	VPERM	XC0,XC0,XC0,BEPERM
29162306a36Sopenharmony_ci	VPERM	XD0,XD0,XD0,BEPERM
29262306a36Sopenharmony_ci
29362306a36Sopenharmony_ci	VLM	XT0,XT3,0,INP,0
29462306a36Sopenharmony_ci
29562306a36Sopenharmony_ci	VX	XT0,XT0,XA0
29662306a36Sopenharmony_ci	VX	XT1,XT1,XB0
29762306a36Sopenharmony_ci	VX	XT2,XT2,XC0
29862306a36Sopenharmony_ci	VX	XT3,XT3,XD0
29962306a36Sopenharmony_ci
30062306a36Sopenharmony_ci	VSTM	XT0,XT3,0,OUT,0
30162306a36Sopenharmony_ci
30262306a36Sopenharmony_ci	la	INP,0x40(INP)
30362306a36Sopenharmony_ci	la	OUT,0x40(OUT)
30462306a36Sopenharmony_ci	aghi	LEN,-0x40
30562306a36Sopenharmony_ci
30662306a36Sopenharmony_ci	VAF	XA0,XA1,K0
30762306a36Sopenharmony_ci	VAF	XB0,XB1,K1
30862306a36Sopenharmony_ci	VAF	XC0,XC1,K2
30962306a36Sopenharmony_ci	VAF	XD0,XD1,K3
31062306a36Sopenharmony_ci
31162306a36Sopenharmony_ci	VPERM	XA0,XA0,XA0,BEPERM
31262306a36Sopenharmony_ci	VPERM	XB0,XB0,XB0,BEPERM
31362306a36Sopenharmony_ci	VPERM	XC0,XC0,XC0,BEPERM
31462306a36Sopenharmony_ci	VPERM	XD0,XD0,XD0,BEPERM
31562306a36Sopenharmony_ci
31662306a36Sopenharmony_ci	clgfi	LEN,0x40
31762306a36Sopenharmony_ci	jl	.Ltail_4x
31862306a36Sopenharmony_ci
31962306a36Sopenharmony_ci	VLM	XT0,XT3,0,INP,0
32062306a36Sopenharmony_ci
32162306a36Sopenharmony_ci	VX	XT0,XT0,XA0
32262306a36Sopenharmony_ci	VX	XT1,XT1,XB0
32362306a36Sopenharmony_ci	VX	XT2,XT2,XC0
32462306a36Sopenharmony_ci	VX	XT3,XT3,XD0
32562306a36Sopenharmony_ci
32662306a36Sopenharmony_ci	VSTM	XT0,XT3,0,OUT,0
32762306a36Sopenharmony_ci
32862306a36Sopenharmony_ci	la	INP,0x40(INP)
32962306a36Sopenharmony_ci	la	OUT,0x40(OUT)
33062306a36Sopenharmony_ci	aghi	LEN,-0x40
33162306a36Sopenharmony_ci	je	.Ldone_4x
33262306a36Sopenharmony_ci
33362306a36Sopenharmony_ci	VAF	XA0,XA2,K0
33462306a36Sopenharmony_ci	VAF	XB0,XB2,K1
33562306a36Sopenharmony_ci	VAF	XC0,XC2,K2
33662306a36Sopenharmony_ci	VAF	XD0,XD2,K3
33762306a36Sopenharmony_ci
33862306a36Sopenharmony_ci	VPERM	XA0,XA0,XA0,BEPERM
33962306a36Sopenharmony_ci	VPERM	XB0,XB0,XB0,BEPERM
34062306a36Sopenharmony_ci	VPERM	XC0,XC0,XC0,BEPERM
34162306a36Sopenharmony_ci	VPERM	XD0,XD0,XD0,BEPERM
34262306a36Sopenharmony_ci
34362306a36Sopenharmony_ci	clgfi	LEN,0x40
34462306a36Sopenharmony_ci	jl	.Ltail_4x
34562306a36Sopenharmony_ci
34662306a36Sopenharmony_ci	VLM	XT0,XT3,0,INP,0
34762306a36Sopenharmony_ci
34862306a36Sopenharmony_ci	VX	XT0,XT0,XA0
34962306a36Sopenharmony_ci	VX	XT1,XT1,XB0
35062306a36Sopenharmony_ci	VX	XT2,XT2,XC0
35162306a36Sopenharmony_ci	VX	XT3,XT3,XD0
35262306a36Sopenharmony_ci
35362306a36Sopenharmony_ci	VSTM	XT0,XT3,0,OUT,0
35462306a36Sopenharmony_ci
35562306a36Sopenharmony_ci	la	INP,0x40(INP)
35662306a36Sopenharmony_ci	la	OUT,0x40(OUT)
35762306a36Sopenharmony_ci	aghi	LEN,-0x40
35862306a36Sopenharmony_ci	je	.Ldone_4x
35962306a36Sopenharmony_ci
36062306a36Sopenharmony_ci	VAF	XA0,XA3,K0
36162306a36Sopenharmony_ci	VAF	XB0,XB3,K1
36262306a36Sopenharmony_ci	VAF	XC0,XC3,K2
36362306a36Sopenharmony_ci	VAF	XD0,XD3,K3
36462306a36Sopenharmony_ci
36562306a36Sopenharmony_ci	VPERM	XA0,XA0,XA0,BEPERM
36662306a36Sopenharmony_ci	VPERM	XB0,XB0,XB0,BEPERM
36762306a36Sopenharmony_ci	VPERM	XC0,XC0,XC0,BEPERM
36862306a36Sopenharmony_ci	VPERM	XD0,XD0,XD0,BEPERM
36962306a36Sopenharmony_ci
37062306a36Sopenharmony_ci	clgfi	LEN,0x40
37162306a36Sopenharmony_ci	jl	.Ltail_4x
37262306a36Sopenharmony_ci
37362306a36Sopenharmony_ci	VLM	XT0,XT3,0,INP,0
37462306a36Sopenharmony_ci
37562306a36Sopenharmony_ci	VX	XT0,XT0,XA0
37662306a36Sopenharmony_ci	VX	XT1,XT1,XB0
37762306a36Sopenharmony_ci	VX	XT2,XT2,XC0
37862306a36Sopenharmony_ci	VX	XT3,XT3,XD0
37962306a36Sopenharmony_ci
38062306a36Sopenharmony_ci	VSTM	XT0,XT3,0,OUT,0
38162306a36Sopenharmony_ci
38262306a36Sopenharmony_ci.Ldone_4x:
38362306a36Sopenharmony_ci	lmg	%r6,%r7,6*8(SP)
38462306a36Sopenharmony_ci	BR_EX	%r14
38562306a36Sopenharmony_ci
38662306a36Sopenharmony_ci.Ltail_4x:
38762306a36Sopenharmony_ci	VLR	XT0,XC0
38862306a36Sopenharmony_ci	VLR	XT1,XD0
38962306a36Sopenharmony_ci
39062306a36Sopenharmony_ci	VST	XA0,8*8+0x00,,SP
39162306a36Sopenharmony_ci	VST	XB0,8*8+0x10,,SP
39262306a36Sopenharmony_ci	VST	XT0,8*8+0x20,,SP
39362306a36Sopenharmony_ci	VST	XT1,8*8+0x30,,SP
39462306a36Sopenharmony_ci
39562306a36Sopenharmony_ci	lghi	%r1,0
39662306a36Sopenharmony_ci
39762306a36Sopenharmony_ci.Loop_tail_4x:
39862306a36Sopenharmony_ci	llgc	%r5,0(%r1,INP)
39962306a36Sopenharmony_ci	llgc	%r6,8*8(%r1,SP)
40062306a36Sopenharmony_ci	xr	%r6,%r5
40162306a36Sopenharmony_ci	stc	%r6,0(%r1,OUT)
40262306a36Sopenharmony_ci	la	%r1,1(%r1)
40362306a36Sopenharmony_ci	brct	LEN,.Loop_tail_4x
40462306a36Sopenharmony_ci
40562306a36Sopenharmony_ci	lmg	%r6,%r7,6*8(SP)
40662306a36Sopenharmony_ci	BR_EX	%r14
40762306a36Sopenharmony_ciSYM_FUNC_END(chacha20_vx_4x)
40862306a36Sopenharmony_ci
40962306a36Sopenharmony_ci#undef	OUT
41062306a36Sopenharmony_ci#undef	INP
41162306a36Sopenharmony_ci#undef	LEN
41262306a36Sopenharmony_ci#undef	KEY
41362306a36Sopenharmony_ci#undef	COUNTER
41462306a36Sopenharmony_ci
41562306a36Sopenharmony_ci#undef BEPERM
41662306a36Sopenharmony_ci
41762306a36Sopenharmony_ci#undef K0
41862306a36Sopenharmony_ci#undef K1
41962306a36Sopenharmony_ci#undef K2
42062306a36Sopenharmony_ci#undef K3
42162306a36Sopenharmony_ci
42262306a36Sopenharmony_ci
42362306a36Sopenharmony_ci#############################################################################
42462306a36Sopenharmony_ci# void chacha20_vx(u8 *out, counst u8 *inp, size_t len,
42562306a36Sopenharmony_ci#		   counst u32 *key, const u32 *counter)
42662306a36Sopenharmony_ci
42762306a36Sopenharmony_ci#define	OUT		%r2
42862306a36Sopenharmony_ci#define	INP		%r3
42962306a36Sopenharmony_ci#define	LEN		%r4
43062306a36Sopenharmony_ci#define	KEY		%r5
43162306a36Sopenharmony_ci#define	COUNTER		%r6
43262306a36Sopenharmony_ci
43362306a36Sopenharmony_ci#define BEPERM		%v31
43462306a36Sopenharmony_ci
43562306a36Sopenharmony_ci#define K0		%v27
43662306a36Sopenharmony_ci#define K1		%v24
43762306a36Sopenharmony_ci#define K2		%v25
43862306a36Sopenharmony_ci#define K3		%v26
43962306a36Sopenharmony_ci
44062306a36Sopenharmony_ci#define A0		%v0
44162306a36Sopenharmony_ci#define B0		%v1
44262306a36Sopenharmony_ci#define C0		%v2
44362306a36Sopenharmony_ci#define D0		%v3
44462306a36Sopenharmony_ci
44562306a36Sopenharmony_ci#define A1		%v4
44662306a36Sopenharmony_ci#define B1		%v5
44762306a36Sopenharmony_ci#define C1		%v6
44862306a36Sopenharmony_ci#define D1		%v7
44962306a36Sopenharmony_ci
45062306a36Sopenharmony_ci#define A2		%v8
45162306a36Sopenharmony_ci#define B2		%v9
45262306a36Sopenharmony_ci#define C2		%v10
45362306a36Sopenharmony_ci#define D2		%v11
45462306a36Sopenharmony_ci
45562306a36Sopenharmony_ci#define A3		%v12
45662306a36Sopenharmony_ci#define B3		%v13
45762306a36Sopenharmony_ci#define C3		%v14
45862306a36Sopenharmony_ci#define D3		%v15
45962306a36Sopenharmony_ci
46062306a36Sopenharmony_ci#define A4		%v16
46162306a36Sopenharmony_ci#define B4		%v17
46262306a36Sopenharmony_ci#define C4		%v18
46362306a36Sopenharmony_ci#define D4		%v19
46462306a36Sopenharmony_ci
46562306a36Sopenharmony_ci#define A5		%v20
46662306a36Sopenharmony_ci#define B5		%v21
46762306a36Sopenharmony_ci#define C5		%v22
46862306a36Sopenharmony_ci#define D5		%v23
46962306a36Sopenharmony_ci
47062306a36Sopenharmony_ci#define T0		%v27
47162306a36Sopenharmony_ci#define T1		%v28
47262306a36Sopenharmony_ci#define T2		%v29
47362306a36Sopenharmony_ci#define T3		%v30
47462306a36Sopenharmony_ci
47562306a36Sopenharmony_ciSYM_FUNC_START(chacha20_vx)
47662306a36Sopenharmony_ci	clgfi	LEN,256
47762306a36Sopenharmony_ci	jle	chacha20_vx_4x
47862306a36Sopenharmony_ci	stmg	%r6,%r7,6*8(SP)
47962306a36Sopenharmony_ci
48062306a36Sopenharmony_ci	lghi	%r1,-FRAME
48162306a36Sopenharmony_ci	lgr	%r0,SP
48262306a36Sopenharmony_ci	la	SP,0(%r1,SP)
48362306a36Sopenharmony_ci	stg	%r0,0(SP)		# back-chain
48462306a36Sopenharmony_ci
48562306a36Sopenharmony_ci	larl	%r7,sigma
48662306a36Sopenharmony_ci	lhi	%r0,10
48762306a36Sopenharmony_ci
48862306a36Sopenharmony_ci	VLM	K1,K2,0,KEY,0		# load key
48962306a36Sopenharmony_ci	VL	K3,0,,COUNTER		# load counter
49062306a36Sopenharmony_ci
49162306a36Sopenharmony_ci	VLM	K0,BEPERM,0,%r7,4	# load sigma, increments, ...
49262306a36Sopenharmony_ci
49362306a36Sopenharmony_ci.Loop_outer_vx:
49462306a36Sopenharmony_ci	VLR	A0,K0
49562306a36Sopenharmony_ci	VLR	B0,K1
49662306a36Sopenharmony_ci	VLR	A1,K0
49762306a36Sopenharmony_ci	VLR	B1,K1
49862306a36Sopenharmony_ci	VLR	A2,K0
49962306a36Sopenharmony_ci	VLR	B2,K1
50062306a36Sopenharmony_ci	VLR	A3,K0
50162306a36Sopenharmony_ci	VLR	B3,K1
50262306a36Sopenharmony_ci	VLR	A4,K0
50362306a36Sopenharmony_ci	VLR	B4,K1
50462306a36Sopenharmony_ci	VLR	A5,K0
50562306a36Sopenharmony_ci	VLR	B5,K1
50662306a36Sopenharmony_ci
50762306a36Sopenharmony_ci	VLR	D0,K3
50862306a36Sopenharmony_ci	VAF	D1,K3,T1		# K[3]+1
50962306a36Sopenharmony_ci	VAF	D2,K3,T2		# K[3]+2
51062306a36Sopenharmony_ci	VAF	D3,K3,T3		# K[3]+3
51162306a36Sopenharmony_ci	VAF	D4,D2,T2		# K[3]+4
51262306a36Sopenharmony_ci	VAF	D5,D2,T3		# K[3]+5
51362306a36Sopenharmony_ci
51462306a36Sopenharmony_ci	VLR	C0,K2
51562306a36Sopenharmony_ci	VLR	C1,K2
51662306a36Sopenharmony_ci	VLR	C2,K2
51762306a36Sopenharmony_ci	VLR	C3,K2
51862306a36Sopenharmony_ci	VLR	C4,K2
51962306a36Sopenharmony_ci	VLR	C5,K2
52062306a36Sopenharmony_ci
52162306a36Sopenharmony_ci	VLR	T1,D1
52262306a36Sopenharmony_ci	VLR	T2,D2
52362306a36Sopenharmony_ci	VLR	T3,D3
52462306a36Sopenharmony_ci
52562306a36Sopenharmony_ci.Loop_vx:
52662306a36Sopenharmony_ci	VAF	A0,A0,B0
52762306a36Sopenharmony_ci	VAF	A1,A1,B1
52862306a36Sopenharmony_ci	VAF	A2,A2,B2
52962306a36Sopenharmony_ci	VAF	A3,A3,B3
53062306a36Sopenharmony_ci	VAF	A4,A4,B4
53162306a36Sopenharmony_ci	VAF	A5,A5,B5
53262306a36Sopenharmony_ci	VX	D0,D0,A0
53362306a36Sopenharmony_ci	VX	D1,D1,A1
53462306a36Sopenharmony_ci	VX	D2,D2,A2
53562306a36Sopenharmony_ci	VX	D3,D3,A3
53662306a36Sopenharmony_ci	VX	D4,D4,A4
53762306a36Sopenharmony_ci	VX	D5,D5,A5
53862306a36Sopenharmony_ci	VERLLF	D0,D0,16
53962306a36Sopenharmony_ci	VERLLF	D1,D1,16
54062306a36Sopenharmony_ci	VERLLF	D2,D2,16
54162306a36Sopenharmony_ci	VERLLF	D3,D3,16
54262306a36Sopenharmony_ci	VERLLF	D4,D4,16
54362306a36Sopenharmony_ci	VERLLF	D5,D5,16
54462306a36Sopenharmony_ci
54562306a36Sopenharmony_ci	VAF	C0,C0,D0
54662306a36Sopenharmony_ci	VAF	C1,C1,D1
54762306a36Sopenharmony_ci	VAF	C2,C2,D2
54862306a36Sopenharmony_ci	VAF	C3,C3,D3
54962306a36Sopenharmony_ci	VAF	C4,C4,D4
55062306a36Sopenharmony_ci	VAF	C5,C5,D5
55162306a36Sopenharmony_ci	VX	B0,B0,C0
55262306a36Sopenharmony_ci	VX	B1,B1,C1
55362306a36Sopenharmony_ci	VX	B2,B2,C2
55462306a36Sopenharmony_ci	VX	B3,B3,C3
55562306a36Sopenharmony_ci	VX	B4,B4,C4
55662306a36Sopenharmony_ci	VX	B5,B5,C5
55762306a36Sopenharmony_ci	VERLLF	B0,B0,12
55862306a36Sopenharmony_ci	VERLLF	B1,B1,12
55962306a36Sopenharmony_ci	VERLLF	B2,B2,12
56062306a36Sopenharmony_ci	VERLLF	B3,B3,12
56162306a36Sopenharmony_ci	VERLLF	B4,B4,12
56262306a36Sopenharmony_ci	VERLLF	B5,B5,12
56362306a36Sopenharmony_ci
56462306a36Sopenharmony_ci	VAF	A0,A0,B0
56562306a36Sopenharmony_ci	VAF	A1,A1,B1
56662306a36Sopenharmony_ci	VAF	A2,A2,B2
56762306a36Sopenharmony_ci	VAF	A3,A3,B3
56862306a36Sopenharmony_ci	VAF	A4,A4,B4
56962306a36Sopenharmony_ci	VAF	A5,A5,B5
57062306a36Sopenharmony_ci	VX	D0,D0,A0
57162306a36Sopenharmony_ci	VX	D1,D1,A1
57262306a36Sopenharmony_ci	VX	D2,D2,A2
57362306a36Sopenharmony_ci	VX	D3,D3,A3
57462306a36Sopenharmony_ci	VX	D4,D4,A4
57562306a36Sopenharmony_ci	VX	D5,D5,A5
57662306a36Sopenharmony_ci	VERLLF	D0,D0,8
57762306a36Sopenharmony_ci	VERLLF	D1,D1,8
57862306a36Sopenharmony_ci	VERLLF	D2,D2,8
57962306a36Sopenharmony_ci	VERLLF	D3,D3,8
58062306a36Sopenharmony_ci	VERLLF	D4,D4,8
58162306a36Sopenharmony_ci	VERLLF	D5,D5,8
58262306a36Sopenharmony_ci
58362306a36Sopenharmony_ci	VAF	C0,C0,D0
58462306a36Sopenharmony_ci	VAF	C1,C1,D1
58562306a36Sopenharmony_ci	VAF	C2,C2,D2
58662306a36Sopenharmony_ci	VAF	C3,C3,D3
58762306a36Sopenharmony_ci	VAF	C4,C4,D4
58862306a36Sopenharmony_ci	VAF	C5,C5,D5
58962306a36Sopenharmony_ci	VX	B0,B0,C0
59062306a36Sopenharmony_ci	VX	B1,B1,C1
59162306a36Sopenharmony_ci	VX	B2,B2,C2
59262306a36Sopenharmony_ci	VX	B3,B3,C3
59362306a36Sopenharmony_ci	VX	B4,B4,C4
59462306a36Sopenharmony_ci	VX	B5,B5,C5
59562306a36Sopenharmony_ci	VERLLF	B0,B0,7
59662306a36Sopenharmony_ci	VERLLF	B1,B1,7
59762306a36Sopenharmony_ci	VERLLF	B2,B2,7
59862306a36Sopenharmony_ci	VERLLF	B3,B3,7
59962306a36Sopenharmony_ci	VERLLF	B4,B4,7
60062306a36Sopenharmony_ci	VERLLF	B5,B5,7
60162306a36Sopenharmony_ci
60262306a36Sopenharmony_ci	VSLDB	C0,C0,C0,8
60362306a36Sopenharmony_ci	VSLDB	C1,C1,C1,8
60462306a36Sopenharmony_ci	VSLDB	C2,C2,C2,8
60562306a36Sopenharmony_ci	VSLDB	C3,C3,C3,8
60662306a36Sopenharmony_ci	VSLDB	C4,C4,C4,8
60762306a36Sopenharmony_ci	VSLDB	C5,C5,C5,8
60862306a36Sopenharmony_ci	VSLDB	B0,B0,B0,4
60962306a36Sopenharmony_ci	VSLDB	B1,B1,B1,4
61062306a36Sopenharmony_ci	VSLDB	B2,B2,B2,4
61162306a36Sopenharmony_ci	VSLDB	B3,B3,B3,4
61262306a36Sopenharmony_ci	VSLDB	B4,B4,B4,4
61362306a36Sopenharmony_ci	VSLDB	B5,B5,B5,4
61462306a36Sopenharmony_ci	VSLDB	D0,D0,D0,12
61562306a36Sopenharmony_ci	VSLDB	D1,D1,D1,12
61662306a36Sopenharmony_ci	VSLDB	D2,D2,D2,12
61762306a36Sopenharmony_ci	VSLDB	D3,D3,D3,12
61862306a36Sopenharmony_ci	VSLDB	D4,D4,D4,12
61962306a36Sopenharmony_ci	VSLDB	D5,D5,D5,12
62062306a36Sopenharmony_ci
62162306a36Sopenharmony_ci	VAF	A0,A0,B0
62262306a36Sopenharmony_ci	VAF	A1,A1,B1
62362306a36Sopenharmony_ci	VAF	A2,A2,B2
62462306a36Sopenharmony_ci	VAF	A3,A3,B3
62562306a36Sopenharmony_ci	VAF	A4,A4,B4
62662306a36Sopenharmony_ci	VAF	A5,A5,B5
62762306a36Sopenharmony_ci	VX	D0,D0,A0
62862306a36Sopenharmony_ci	VX	D1,D1,A1
62962306a36Sopenharmony_ci	VX	D2,D2,A2
63062306a36Sopenharmony_ci	VX	D3,D3,A3
63162306a36Sopenharmony_ci	VX	D4,D4,A4
63262306a36Sopenharmony_ci	VX	D5,D5,A5
63362306a36Sopenharmony_ci	VERLLF	D0,D0,16
63462306a36Sopenharmony_ci	VERLLF	D1,D1,16
63562306a36Sopenharmony_ci	VERLLF	D2,D2,16
63662306a36Sopenharmony_ci	VERLLF	D3,D3,16
63762306a36Sopenharmony_ci	VERLLF	D4,D4,16
63862306a36Sopenharmony_ci	VERLLF	D5,D5,16
63962306a36Sopenharmony_ci
64062306a36Sopenharmony_ci	VAF	C0,C0,D0
64162306a36Sopenharmony_ci	VAF	C1,C1,D1
64262306a36Sopenharmony_ci	VAF	C2,C2,D2
64362306a36Sopenharmony_ci	VAF	C3,C3,D3
64462306a36Sopenharmony_ci	VAF	C4,C4,D4
64562306a36Sopenharmony_ci	VAF	C5,C5,D5
64662306a36Sopenharmony_ci	VX	B0,B0,C0
64762306a36Sopenharmony_ci	VX	B1,B1,C1
64862306a36Sopenharmony_ci	VX	B2,B2,C2
64962306a36Sopenharmony_ci	VX	B3,B3,C3
65062306a36Sopenharmony_ci	VX	B4,B4,C4
65162306a36Sopenharmony_ci	VX	B5,B5,C5
65262306a36Sopenharmony_ci	VERLLF	B0,B0,12
65362306a36Sopenharmony_ci	VERLLF	B1,B1,12
65462306a36Sopenharmony_ci	VERLLF	B2,B2,12
65562306a36Sopenharmony_ci	VERLLF	B3,B3,12
65662306a36Sopenharmony_ci	VERLLF	B4,B4,12
65762306a36Sopenharmony_ci	VERLLF	B5,B5,12
65862306a36Sopenharmony_ci
65962306a36Sopenharmony_ci	VAF	A0,A0,B0
66062306a36Sopenharmony_ci	VAF	A1,A1,B1
66162306a36Sopenharmony_ci	VAF	A2,A2,B2
66262306a36Sopenharmony_ci	VAF	A3,A3,B3
66362306a36Sopenharmony_ci	VAF	A4,A4,B4
66462306a36Sopenharmony_ci	VAF	A5,A5,B5
66562306a36Sopenharmony_ci	VX	D0,D0,A0
66662306a36Sopenharmony_ci	VX	D1,D1,A1
66762306a36Sopenharmony_ci	VX	D2,D2,A2
66862306a36Sopenharmony_ci	VX	D3,D3,A3
66962306a36Sopenharmony_ci	VX	D4,D4,A4
67062306a36Sopenharmony_ci	VX	D5,D5,A5
67162306a36Sopenharmony_ci	VERLLF	D0,D0,8
67262306a36Sopenharmony_ci	VERLLF	D1,D1,8
67362306a36Sopenharmony_ci	VERLLF	D2,D2,8
67462306a36Sopenharmony_ci	VERLLF	D3,D3,8
67562306a36Sopenharmony_ci	VERLLF	D4,D4,8
67662306a36Sopenharmony_ci	VERLLF	D5,D5,8
67762306a36Sopenharmony_ci
67862306a36Sopenharmony_ci	VAF	C0,C0,D0
67962306a36Sopenharmony_ci	VAF	C1,C1,D1
68062306a36Sopenharmony_ci	VAF	C2,C2,D2
68162306a36Sopenharmony_ci	VAF	C3,C3,D3
68262306a36Sopenharmony_ci	VAF	C4,C4,D4
68362306a36Sopenharmony_ci	VAF	C5,C5,D5
68462306a36Sopenharmony_ci	VX	B0,B0,C0
68562306a36Sopenharmony_ci	VX	B1,B1,C1
68662306a36Sopenharmony_ci	VX	B2,B2,C2
68762306a36Sopenharmony_ci	VX	B3,B3,C3
68862306a36Sopenharmony_ci	VX	B4,B4,C4
68962306a36Sopenharmony_ci	VX	B5,B5,C5
69062306a36Sopenharmony_ci	VERLLF	B0,B0,7
69162306a36Sopenharmony_ci	VERLLF	B1,B1,7
69262306a36Sopenharmony_ci	VERLLF	B2,B2,7
69362306a36Sopenharmony_ci	VERLLF	B3,B3,7
69462306a36Sopenharmony_ci	VERLLF	B4,B4,7
69562306a36Sopenharmony_ci	VERLLF	B5,B5,7
69662306a36Sopenharmony_ci
69762306a36Sopenharmony_ci	VSLDB	C0,C0,C0,8
69862306a36Sopenharmony_ci	VSLDB	C1,C1,C1,8
69962306a36Sopenharmony_ci	VSLDB	C2,C2,C2,8
70062306a36Sopenharmony_ci	VSLDB	C3,C3,C3,8
70162306a36Sopenharmony_ci	VSLDB	C4,C4,C4,8
70262306a36Sopenharmony_ci	VSLDB	C5,C5,C5,8
70362306a36Sopenharmony_ci	VSLDB	B0,B0,B0,12
70462306a36Sopenharmony_ci	VSLDB	B1,B1,B1,12
70562306a36Sopenharmony_ci	VSLDB	B2,B2,B2,12
70662306a36Sopenharmony_ci	VSLDB	B3,B3,B3,12
70762306a36Sopenharmony_ci	VSLDB	B4,B4,B4,12
70862306a36Sopenharmony_ci	VSLDB	B5,B5,B5,12
70962306a36Sopenharmony_ci	VSLDB	D0,D0,D0,4
71062306a36Sopenharmony_ci	VSLDB	D1,D1,D1,4
71162306a36Sopenharmony_ci	VSLDB	D2,D2,D2,4
71262306a36Sopenharmony_ci	VSLDB	D3,D3,D3,4
71362306a36Sopenharmony_ci	VSLDB	D4,D4,D4,4
71462306a36Sopenharmony_ci	VSLDB	D5,D5,D5,4
71562306a36Sopenharmony_ci	brct	%r0,.Loop_vx
71662306a36Sopenharmony_ci
71762306a36Sopenharmony_ci	VAF	A0,A0,K0
71862306a36Sopenharmony_ci	VAF	B0,B0,K1
71962306a36Sopenharmony_ci	VAF	C0,C0,K2
72062306a36Sopenharmony_ci	VAF	D0,D0,K3
72162306a36Sopenharmony_ci	VAF	A1,A1,K0
72262306a36Sopenharmony_ci	VAF	D1,D1,T1		# +K[3]+1
72362306a36Sopenharmony_ci
72462306a36Sopenharmony_ci	VPERM	A0,A0,A0,BEPERM
72562306a36Sopenharmony_ci	VPERM	B0,B0,B0,BEPERM
72662306a36Sopenharmony_ci	VPERM	C0,C0,C0,BEPERM
72762306a36Sopenharmony_ci	VPERM	D0,D0,D0,BEPERM
72862306a36Sopenharmony_ci
72962306a36Sopenharmony_ci	clgfi	LEN,0x40
73062306a36Sopenharmony_ci	jl	.Ltail_vx
73162306a36Sopenharmony_ci
73262306a36Sopenharmony_ci	VAF	D2,D2,T2		# +K[3]+2
73362306a36Sopenharmony_ci	VAF	D3,D3,T3		# +K[3]+3
73462306a36Sopenharmony_ci	VLM	T0,T3,0,INP,0
73562306a36Sopenharmony_ci
73662306a36Sopenharmony_ci	VX	A0,A0,T0
73762306a36Sopenharmony_ci	VX	B0,B0,T1
73862306a36Sopenharmony_ci	VX	C0,C0,T2
73962306a36Sopenharmony_ci	VX	D0,D0,T3
74062306a36Sopenharmony_ci
74162306a36Sopenharmony_ci	VLM	K0,T3,0,%r7,4		# re-load sigma and increments
74262306a36Sopenharmony_ci
74362306a36Sopenharmony_ci	VSTM	A0,D0,0,OUT,0
74462306a36Sopenharmony_ci
74562306a36Sopenharmony_ci	la	INP,0x40(INP)
74662306a36Sopenharmony_ci	la	OUT,0x40(OUT)
74762306a36Sopenharmony_ci	aghi	LEN,-0x40
74862306a36Sopenharmony_ci	je	.Ldone_vx
74962306a36Sopenharmony_ci
75062306a36Sopenharmony_ci	VAF	B1,B1,K1
75162306a36Sopenharmony_ci	VAF	C1,C1,K2
75262306a36Sopenharmony_ci
75362306a36Sopenharmony_ci	VPERM	A0,A1,A1,BEPERM
75462306a36Sopenharmony_ci	VPERM	B0,B1,B1,BEPERM
75562306a36Sopenharmony_ci	VPERM	C0,C1,C1,BEPERM
75662306a36Sopenharmony_ci	VPERM	D0,D1,D1,BEPERM
75762306a36Sopenharmony_ci
75862306a36Sopenharmony_ci	clgfi	LEN,0x40
75962306a36Sopenharmony_ci	jl	.Ltail_vx
76062306a36Sopenharmony_ci
76162306a36Sopenharmony_ci	VLM	A1,D1,0,INP,0
76262306a36Sopenharmony_ci
76362306a36Sopenharmony_ci	VX	A0,A0,A1
76462306a36Sopenharmony_ci	VX	B0,B0,B1
76562306a36Sopenharmony_ci	VX	C0,C0,C1
76662306a36Sopenharmony_ci	VX	D0,D0,D1
76762306a36Sopenharmony_ci
76862306a36Sopenharmony_ci	VSTM	A0,D0,0,OUT,0
76962306a36Sopenharmony_ci
77062306a36Sopenharmony_ci	la	INP,0x40(INP)
77162306a36Sopenharmony_ci	la	OUT,0x40(OUT)
77262306a36Sopenharmony_ci	aghi	LEN,-0x40
77362306a36Sopenharmony_ci	je	.Ldone_vx
77462306a36Sopenharmony_ci
77562306a36Sopenharmony_ci	VAF	A2,A2,K0
77662306a36Sopenharmony_ci	VAF	B2,B2,K1
77762306a36Sopenharmony_ci	VAF	C2,C2,K2
77862306a36Sopenharmony_ci
77962306a36Sopenharmony_ci	VPERM	A0,A2,A2,BEPERM
78062306a36Sopenharmony_ci	VPERM	B0,B2,B2,BEPERM
78162306a36Sopenharmony_ci	VPERM	C0,C2,C2,BEPERM
78262306a36Sopenharmony_ci	VPERM	D0,D2,D2,BEPERM
78362306a36Sopenharmony_ci
78462306a36Sopenharmony_ci	clgfi	LEN,0x40
78562306a36Sopenharmony_ci	jl	.Ltail_vx
78662306a36Sopenharmony_ci
78762306a36Sopenharmony_ci	VLM	A1,D1,0,INP,0
78862306a36Sopenharmony_ci
78962306a36Sopenharmony_ci	VX	A0,A0,A1
79062306a36Sopenharmony_ci	VX	B0,B0,B1
79162306a36Sopenharmony_ci	VX	C0,C0,C1
79262306a36Sopenharmony_ci	VX	D0,D0,D1
79362306a36Sopenharmony_ci
79462306a36Sopenharmony_ci	VSTM	A0,D0,0,OUT,0
79562306a36Sopenharmony_ci
79662306a36Sopenharmony_ci	la	INP,0x40(INP)
79762306a36Sopenharmony_ci	la	OUT,0x40(OUT)
79862306a36Sopenharmony_ci	aghi	LEN,-0x40
79962306a36Sopenharmony_ci	je	.Ldone_vx
80062306a36Sopenharmony_ci
80162306a36Sopenharmony_ci	VAF	A3,A3,K0
80262306a36Sopenharmony_ci	VAF	B3,B3,K1
80362306a36Sopenharmony_ci	VAF	C3,C3,K2
80462306a36Sopenharmony_ci	VAF	D2,K3,T3		# K[3]+3
80562306a36Sopenharmony_ci
80662306a36Sopenharmony_ci	VPERM	A0,A3,A3,BEPERM
80762306a36Sopenharmony_ci	VPERM	B0,B3,B3,BEPERM
80862306a36Sopenharmony_ci	VPERM	C0,C3,C3,BEPERM
80962306a36Sopenharmony_ci	VPERM	D0,D3,D3,BEPERM
81062306a36Sopenharmony_ci
81162306a36Sopenharmony_ci	clgfi	LEN,0x40
81262306a36Sopenharmony_ci	jl	.Ltail_vx
81362306a36Sopenharmony_ci
81462306a36Sopenharmony_ci	VAF	D3,D2,T1		# K[3]+4
81562306a36Sopenharmony_ci	VLM	A1,D1,0,INP,0
81662306a36Sopenharmony_ci
81762306a36Sopenharmony_ci	VX	A0,A0,A1
81862306a36Sopenharmony_ci	VX	B0,B0,B1
81962306a36Sopenharmony_ci	VX	C0,C0,C1
82062306a36Sopenharmony_ci	VX	D0,D0,D1
82162306a36Sopenharmony_ci
82262306a36Sopenharmony_ci	VSTM	A0,D0,0,OUT,0
82362306a36Sopenharmony_ci
82462306a36Sopenharmony_ci	la	INP,0x40(INP)
82562306a36Sopenharmony_ci	la	OUT,0x40(OUT)
82662306a36Sopenharmony_ci	aghi	LEN,-0x40
82762306a36Sopenharmony_ci	je	.Ldone_vx
82862306a36Sopenharmony_ci
82962306a36Sopenharmony_ci	VAF	A4,A4,K0
83062306a36Sopenharmony_ci	VAF	B4,B4,K1
83162306a36Sopenharmony_ci	VAF	C4,C4,K2
83262306a36Sopenharmony_ci	VAF	D4,D4,D3		# +K[3]+4
83362306a36Sopenharmony_ci	VAF	D3,D3,T1		# K[3]+5
83462306a36Sopenharmony_ci	VAF	K3,D2,T3		# K[3]+=6
83562306a36Sopenharmony_ci
83662306a36Sopenharmony_ci	VPERM	A0,A4,A4,BEPERM
83762306a36Sopenharmony_ci	VPERM	B0,B4,B4,BEPERM
83862306a36Sopenharmony_ci	VPERM	C0,C4,C4,BEPERM
83962306a36Sopenharmony_ci	VPERM	D0,D4,D4,BEPERM
84062306a36Sopenharmony_ci
84162306a36Sopenharmony_ci	clgfi	LEN,0x40
84262306a36Sopenharmony_ci	jl	.Ltail_vx
84362306a36Sopenharmony_ci
84462306a36Sopenharmony_ci	VLM	A1,D1,0,INP,0
84562306a36Sopenharmony_ci
84662306a36Sopenharmony_ci	VX	A0,A0,A1
84762306a36Sopenharmony_ci	VX	B0,B0,B1
84862306a36Sopenharmony_ci	VX	C0,C0,C1
84962306a36Sopenharmony_ci	VX	D0,D0,D1
85062306a36Sopenharmony_ci
85162306a36Sopenharmony_ci	VSTM	A0,D0,0,OUT,0
85262306a36Sopenharmony_ci
85362306a36Sopenharmony_ci	la	INP,0x40(INP)
85462306a36Sopenharmony_ci	la	OUT,0x40(OUT)
85562306a36Sopenharmony_ci	aghi	LEN,-0x40
85662306a36Sopenharmony_ci	je	.Ldone_vx
85762306a36Sopenharmony_ci
85862306a36Sopenharmony_ci	VAF	A5,A5,K0
85962306a36Sopenharmony_ci	VAF	B5,B5,K1
86062306a36Sopenharmony_ci	VAF	C5,C5,K2
86162306a36Sopenharmony_ci	VAF	D5,D5,D3		# +K[3]+5
86262306a36Sopenharmony_ci
86362306a36Sopenharmony_ci	VPERM	A0,A5,A5,BEPERM
86462306a36Sopenharmony_ci	VPERM	B0,B5,B5,BEPERM
86562306a36Sopenharmony_ci	VPERM	C0,C5,C5,BEPERM
86662306a36Sopenharmony_ci	VPERM	D0,D5,D5,BEPERM
86762306a36Sopenharmony_ci
86862306a36Sopenharmony_ci	clgfi	LEN,0x40
86962306a36Sopenharmony_ci	jl	.Ltail_vx
87062306a36Sopenharmony_ci
87162306a36Sopenharmony_ci	VLM	A1,D1,0,INP,0
87262306a36Sopenharmony_ci
87362306a36Sopenharmony_ci	VX	A0,A0,A1
87462306a36Sopenharmony_ci	VX	B0,B0,B1
87562306a36Sopenharmony_ci	VX	C0,C0,C1
87662306a36Sopenharmony_ci	VX	D0,D0,D1
87762306a36Sopenharmony_ci
87862306a36Sopenharmony_ci	VSTM	A0,D0,0,OUT,0
87962306a36Sopenharmony_ci
88062306a36Sopenharmony_ci	la	INP,0x40(INP)
88162306a36Sopenharmony_ci	la	OUT,0x40(OUT)
88262306a36Sopenharmony_ci	lhi	%r0,10
88362306a36Sopenharmony_ci	aghi	LEN,-0x40
88462306a36Sopenharmony_ci	jne	.Loop_outer_vx
88562306a36Sopenharmony_ci
88662306a36Sopenharmony_ci.Ldone_vx:
88762306a36Sopenharmony_ci	lmg	%r6,%r7,FRAME+6*8(SP)
88862306a36Sopenharmony_ci	la	SP,FRAME(SP)
88962306a36Sopenharmony_ci	BR_EX	%r14
89062306a36Sopenharmony_ci
89162306a36Sopenharmony_ci.Ltail_vx:
89262306a36Sopenharmony_ci	VSTM	A0,D0,8*8,SP,3
89362306a36Sopenharmony_ci	lghi	%r1,0
89462306a36Sopenharmony_ci
89562306a36Sopenharmony_ci.Loop_tail_vx:
89662306a36Sopenharmony_ci	llgc	%r5,0(%r1,INP)
89762306a36Sopenharmony_ci	llgc	%r6,8*8(%r1,SP)
89862306a36Sopenharmony_ci	xr	%r6,%r5
89962306a36Sopenharmony_ci	stc	%r6,0(%r1,OUT)
90062306a36Sopenharmony_ci	la	%r1,1(%r1)
90162306a36Sopenharmony_ci	brct	LEN,.Loop_tail_vx
90262306a36Sopenharmony_ci
90362306a36Sopenharmony_ci	lmg	%r6,%r7,FRAME+6*8(SP)
90462306a36Sopenharmony_ci	la	SP,FRAME(SP)
90562306a36Sopenharmony_ci	BR_EX	%r14
90662306a36Sopenharmony_ciSYM_FUNC_END(chacha20_vx)
90762306a36Sopenharmony_ci
90862306a36Sopenharmony_ci.previous
909