162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-or-later */
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Fast AES implementation for SPE instruction set (PPC)
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * This code makes use of the SPE SIMD instruction set as defined in
662306a36Sopenharmony_ci * http://cache.freescale.com/files/32bit/doc/ref_manual/SPEPIM.pdf
762306a36Sopenharmony_ci * Implementation is based on optimization guide notes from
862306a36Sopenharmony_ci * http://cache.freescale.com/files/32bit/doc/app_note/AN2665.pdf
962306a36Sopenharmony_ci *
1062306a36Sopenharmony_ci * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
1162306a36Sopenharmony_ci */
1262306a36Sopenharmony_ci
1362306a36Sopenharmony_ci#include <asm/ppc_asm.h>
1462306a36Sopenharmony_ci#include "aes-spe-regs.h"
1562306a36Sopenharmony_ci
1662306a36Sopenharmony_ci#define	EAD(in, bpos) \
1762306a36Sopenharmony_ci	rlwimi		rT0,in,28-((bpos+3)%4)*8,20,27;
1862306a36Sopenharmony_ci
1962306a36Sopenharmony_ci#define DAD(in, bpos) \
2062306a36Sopenharmony_ci	rlwimi		rT1,in,24-((bpos+3)%4)*8,24,31;
2162306a36Sopenharmony_ci
2262306a36Sopenharmony_ci#define LWH(out, off) \
2362306a36Sopenharmony_ci	evlwwsplat	out,off(rT0);	/* load word high		*/
2462306a36Sopenharmony_ci
2562306a36Sopenharmony_ci#define LWL(out, off) \
2662306a36Sopenharmony_ci	lwz		out,off(rT0);	/* load word low		*/
2762306a36Sopenharmony_ci
2862306a36Sopenharmony_ci#define LBZ(out, tab, off) \
2962306a36Sopenharmony_ci	lbz		out,off(tab);	/* load byte			*/
3062306a36Sopenharmony_ci
3162306a36Sopenharmony_ci#define LAH(out, in, bpos, off) \
3262306a36Sopenharmony_ci	EAD(in, bpos)			/* calc addr + load word high	*/ \
3362306a36Sopenharmony_ci	LWH(out, off)
3462306a36Sopenharmony_ci
3562306a36Sopenharmony_ci#define LAL(out, in, bpos, off) \
3662306a36Sopenharmony_ci	EAD(in, bpos)			/* calc addr + load word low	*/ \
3762306a36Sopenharmony_ci	LWL(out, off)
3862306a36Sopenharmony_ci
3962306a36Sopenharmony_ci#define LAE(out, in, bpos) \
4062306a36Sopenharmony_ci	EAD(in, bpos)			/* calc addr + load enc byte	*/ \
4162306a36Sopenharmony_ci	LBZ(out, rT0, 8)
4262306a36Sopenharmony_ci
4362306a36Sopenharmony_ci#define LBE(out) \
4462306a36Sopenharmony_ci	LBZ(out, rT0, 8)		/* load enc byte		*/
4562306a36Sopenharmony_ci
4662306a36Sopenharmony_ci#define LAD(out, in, bpos) \
4762306a36Sopenharmony_ci	DAD(in, bpos)			/* calc addr + load dec byte	*/ \
4862306a36Sopenharmony_ci	LBZ(out, rT1, 0)
4962306a36Sopenharmony_ci
5062306a36Sopenharmony_ci#define LBD(out) \
5162306a36Sopenharmony_ci	LBZ(out, rT1, 0)
5262306a36Sopenharmony_ci
5362306a36Sopenharmony_ci/*
5462306a36Sopenharmony_ci * ppc_encrypt_block: The central encryption function for a single 16 bytes
5562306a36Sopenharmony_ci * block. It does no stack handling or register saving to support fast calls
5662306a36Sopenharmony_ci * via bl/blr. It expects that caller has pre-xored input data with first
5762306a36Sopenharmony_ci * 4 words of encryption key into rD0-rD3. Pointer/counter registers must
5862306a36Sopenharmony_ci * have also been set up before (rT0, rKP, CTR). Output is stored in rD0-rD3
5962306a36Sopenharmony_ci * and rW0-rW3 and caller must execute a final xor on the output registers.
6062306a36Sopenharmony_ci * All working registers rD0-rD3 & rW0-rW7 are overwritten during processing.
6162306a36Sopenharmony_ci *
6262306a36Sopenharmony_ci */
6362306a36Sopenharmony_ci_GLOBAL(ppc_encrypt_block)
6462306a36Sopenharmony_ci	LAH(rW4, rD1, 2, 4)
6562306a36Sopenharmony_ci	LAH(rW6, rD0, 3, 0)
6662306a36Sopenharmony_ci	LAH(rW3, rD0, 1, 8)
6762306a36Sopenharmony_cippc_encrypt_block_loop:
6862306a36Sopenharmony_ci	LAH(rW0, rD3, 0, 12)
6962306a36Sopenharmony_ci	LAL(rW0, rD0, 0, 12)
7062306a36Sopenharmony_ci	LAH(rW1, rD1, 0, 12)
7162306a36Sopenharmony_ci	LAH(rW2, rD2, 1, 8)
7262306a36Sopenharmony_ci	LAL(rW2, rD3, 1, 8)
7362306a36Sopenharmony_ci	LAL(rW3, rD1, 1, 8)
7462306a36Sopenharmony_ci	LAL(rW4, rD2, 2, 4)
7562306a36Sopenharmony_ci	LAL(rW6, rD1, 3, 0)
7662306a36Sopenharmony_ci	LAH(rW5, rD3, 2, 4)
7762306a36Sopenharmony_ci	LAL(rW5, rD0, 2, 4)
7862306a36Sopenharmony_ci	LAH(rW7, rD2, 3, 0)
7962306a36Sopenharmony_ci	evldw		rD1,16(rKP)
8062306a36Sopenharmony_ci	EAD(rD3, 3)
8162306a36Sopenharmony_ci	evxor		rW2,rW2,rW4
8262306a36Sopenharmony_ci	LWL(rW7, 0)
8362306a36Sopenharmony_ci	evxor		rW2,rW2,rW6
8462306a36Sopenharmony_ci	EAD(rD2, 0)
8562306a36Sopenharmony_ci	evxor		rD1,rD1,rW2
8662306a36Sopenharmony_ci	LWL(rW1, 12)
8762306a36Sopenharmony_ci	evxor		rD1,rD1,rW0
8862306a36Sopenharmony_ci	evldw		rD3,24(rKP)
8962306a36Sopenharmony_ci	evmergehi	rD0,rD0,rD1
9062306a36Sopenharmony_ci	EAD(rD1, 2)
9162306a36Sopenharmony_ci	evxor		rW3,rW3,rW5
9262306a36Sopenharmony_ci	LWH(rW4, 4)
9362306a36Sopenharmony_ci	evxor		rW3,rW3,rW7
9462306a36Sopenharmony_ci	EAD(rD0, 3)
9562306a36Sopenharmony_ci	evxor		rD3,rD3,rW3
9662306a36Sopenharmony_ci	LWH(rW6, 0)
9762306a36Sopenharmony_ci	evxor		rD3,rD3,rW1
9862306a36Sopenharmony_ci	EAD(rD0, 1)
9962306a36Sopenharmony_ci	evmergehi	rD2,rD2,rD3
10062306a36Sopenharmony_ci	LWH(rW3, 8)
10162306a36Sopenharmony_ci	LAH(rW0, rD3, 0, 12)
10262306a36Sopenharmony_ci	LAL(rW0, rD0, 0, 12)
10362306a36Sopenharmony_ci	LAH(rW1, rD1, 0, 12)
10462306a36Sopenharmony_ci	LAH(rW2, rD2, 1, 8)
10562306a36Sopenharmony_ci	LAL(rW2, rD3, 1, 8)
10662306a36Sopenharmony_ci	LAL(rW3, rD1, 1, 8)
10762306a36Sopenharmony_ci	LAL(rW4, rD2, 2, 4)
10862306a36Sopenharmony_ci	LAL(rW6, rD1, 3, 0)
10962306a36Sopenharmony_ci	LAH(rW5, rD3, 2, 4)
11062306a36Sopenharmony_ci	LAL(rW5, rD0, 2, 4)
11162306a36Sopenharmony_ci	LAH(rW7, rD2, 3, 0)
11262306a36Sopenharmony_ci	evldw		rD1,32(rKP)
11362306a36Sopenharmony_ci	EAD(rD3, 3)
11462306a36Sopenharmony_ci	evxor		rW2,rW2,rW4
11562306a36Sopenharmony_ci	LWL(rW7, 0)
11662306a36Sopenharmony_ci	evxor		rW2,rW2,rW6
11762306a36Sopenharmony_ci	EAD(rD2, 0)
11862306a36Sopenharmony_ci	evxor		rD1,rD1,rW2
11962306a36Sopenharmony_ci	LWL(rW1, 12)
12062306a36Sopenharmony_ci	evxor		rD1,rD1,rW0
12162306a36Sopenharmony_ci	evldw		rD3,40(rKP)
12262306a36Sopenharmony_ci	evmergehi	rD0,rD0,rD1
12362306a36Sopenharmony_ci	EAD(rD1, 2)
12462306a36Sopenharmony_ci	evxor		rW3,rW3,rW5
12562306a36Sopenharmony_ci	LWH(rW4, 4)
12662306a36Sopenharmony_ci	evxor		rW3,rW3,rW7
12762306a36Sopenharmony_ci	EAD(rD0, 3)
12862306a36Sopenharmony_ci	evxor		rD3,rD3,rW3
12962306a36Sopenharmony_ci	LWH(rW6, 0)
13062306a36Sopenharmony_ci	evxor		rD3,rD3,rW1
13162306a36Sopenharmony_ci	EAD(rD0, 1)
13262306a36Sopenharmony_ci	evmergehi	rD2,rD2,rD3
13362306a36Sopenharmony_ci	LWH(rW3, 8)
13462306a36Sopenharmony_ci	addi		rKP,rKP,32
13562306a36Sopenharmony_ci	bdnz		ppc_encrypt_block_loop
13662306a36Sopenharmony_ci	LAH(rW0, rD3, 0, 12)
13762306a36Sopenharmony_ci	LAL(rW0, rD0, 0, 12)
13862306a36Sopenharmony_ci	LAH(rW1, rD1, 0, 12)
13962306a36Sopenharmony_ci	LAH(rW2, rD2, 1, 8)
14062306a36Sopenharmony_ci	LAL(rW2, rD3, 1, 8)
14162306a36Sopenharmony_ci	LAL(rW3, rD1, 1, 8)
14262306a36Sopenharmony_ci	LAL(rW4, rD2, 2, 4)
14362306a36Sopenharmony_ci	LAH(rW5, rD3, 2, 4)
14462306a36Sopenharmony_ci	LAL(rW6, rD1, 3, 0)
14562306a36Sopenharmony_ci	LAL(rW5, rD0, 2, 4)
14662306a36Sopenharmony_ci	LAH(rW7, rD2, 3, 0)
14762306a36Sopenharmony_ci	evldw		rD1,16(rKP)
14862306a36Sopenharmony_ci	EAD(rD3, 3)
14962306a36Sopenharmony_ci	evxor		rW2,rW2,rW4
15062306a36Sopenharmony_ci	LWL(rW7, 0)
15162306a36Sopenharmony_ci	evxor		rW2,rW2,rW6
15262306a36Sopenharmony_ci	EAD(rD2, 0)
15362306a36Sopenharmony_ci	evxor		rD1,rD1,rW2
15462306a36Sopenharmony_ci	LWL(rW1, 12)
15562306a36Sopenharmony_ci	evxor		rD1,rD1,rW0
15662306a36Sopenharmony_ci	evldw		rD3,24(rKP)
15762306a36Sopenharmony_ci	evmergehi	rD0,rD0,rD1
15862306a36Sopenharmony_ci	EAD(rD1, 0)
15962306a36Sopenharmony_ci	evxor		rW3,rW3,rW5
16062306a36Sopenharmony_ci	LBE(rW2)
16162306a36Sopenharmony_ci	evxor		rW3,rW3,rW7
16262306a36Sopenharmony_ci	EAD(rD0, 1)
16362306a36Sopenharmony_ci	evxor		rD3,rD3,rW3
16462306a36Sopenharmony_ci	LBE(rW6)
16562306a36Sopenharmony_ci	evxor		rD3,rD3,rW1
16662306a36Sopenharmony_ci	EAD(rD0, 0)
16762306a36Sopenharmony_ci	evmergehi	rD2,rD2,rD3
16862306a36Sopenharmony_ci	LBE(rW1)
16962306a36Sopenharmony_ci	LAE(rW0, rD3, 0)
17062306a36Sopenharmony_ci	LAE(rW1, rD0, 0)
17162306a36Sopenharmony_ci	LAE(rW4, rD2, 1)
17262306a36Sopenharmony_ci	LAE(rW5, rD3, 1)
17362306a36Sopenharmony_ci	LAE(rW3, rD2, 0)
17462306a36Sopenharmony_ci	LAE(rW7, rD1, 1)
17562306a36Sopenharmony_ci	rlwimi		rW0,rW4,8,16,23
17662306a36Sopenharmony_ci	rlwimi		rW1,rW5,8,16,23
17762306a36Sopenharmony_ci	LAE(rW4, rD1, 2)
17862306a36Sopenharmony_ci	LAE(rW5, rD2, 2)
17962306a36Sopenharmony_ci	rlwimi		rW2,rW6,8,16,23
18062306a36Sopenharmony_ci	rlwimi		rW3,rW7,8,16,23
18162306a36Sopenharmony_ci	LAE(rW6, rD3, 2)
18262306a36Sopenharmony_ci	LAE(rW7, rD0, 2)
18362306a36Sopenharmony_ci	rlwimi		rW0,rW4,16,8,15
18462306a36Sopenharmony_ci	rlwimi		rW1,rW5,16,8,15
18562306a36Sopenharmony_ci	LAE(rW4, rD0, 3)
18662306a36Sopenharmony_ci	LAE(rW5, rD1, 3)
18762306a36Sopenharmony_ci	rlwimi		rW2,rW6,16,8,15
18862306a36Sopenharmony_ci	lwz		rD0,32(rKP)
18962306a36Sopenharmony_ci	rlwimi		rW3,rW7,16,8,15
19062306a36Sopenharmony_ci	lwz		rD1,36(rKP)
19162306a36Sopenharmony_ci	LAE(rW6, rD2, 3)
19262306a36Sopenharmony_ci	LAE(rW7, rD3, 3)
19362306a36Sopenharmony_ci	rlwimi		rW0,rW4,24,0,7
19462306a36Sopenharmony_ci	lwz		rD2,40(rKP)
19562306a36Sopenharmony_ci	rlwimi		rW1,rW5,24,0,7
19662306a36Sopenharmony_ci	lwz		rD3,44(rKP)
19762306a36Sopenharmony_ci	rlwimi		rW2,rW6,24,0,7
19862306a36Sopenharmony_ci	rlwimi		rW3,rW7,24,0,7
19962306a36Sopenharmony_ci	blr
20062306a36Sopenharmony_ci
20162306a36Sopenharmony_ci/*
20262306a36Sopenharmony_ci * ppc_decrypt_block: The central decryption function for a single 16 bytes
20362306a36Sopenharmony_ci * block. It does no stack handling or register saving to support fast calls
20462306a36Sopenharmony_ci * via bl/blr. It expects that caller has pre-xored input data with first
20562306a36Sopenharmony_ci * 4 words of encryption key into rD0-rD3. Pointer/counter registers must
20662306a36Sopenharmony_ci * have also been set up before (rT0, rKP, CTR). Output is stored in rD0-rD3
20762306a36Sopenharmony_ci * and rW0-rW3 and caller must execute a final xor on the output registers.
20862306a36Sopenharmony_ci * All working registers rD0-rD3 & rW0-rW7 are overwritten during processing.
20962306a36Sopenharmony_ci *
21062306a36Sopenharmony_ci */
21162306a36Sopenharmony_ci_GLOBAL(ppc_decrypt_block)
21262306a36Sopenharmony_ci	LAH(rW0, rD1, 0, 12)
21362306a36Sopenharmony_ci	LAH(rW6, rD0, 3, 0)
21462306a36Sopenharmony_ci	LAH(rW3, rD0, 1, 8)
21562306a36Sopenharmony_cippc_decrypt_block_loop:
21662306a36Sopenharmony_ci	LAH(rW1, rD3, 0, 12)
21762306a36Sopenharmony_ci	LAL(rW0, rD2, 0, 12)
21862306a36Sopenharmony_ci	LAH(rW2, rD2, 1, 8)
21962306a36Sopenharmony_ci	LAL(rW2, rD3, 1, 8)
22062306a36Sopenharmony_ci	LAH(rW4, rD3, 2, 4)
22162306a36Sopenharmony_ci	LAL(rW4, rD0, 2, 4)
22262306a36Sopenharmony_ci	LAL(rW6, rD1, 3, 0)
22362306a36Sopenharmony_ci	LAH(rW5, rD1, 2, 4)
22462306a36Sopenharmony_ci	LAH(rW7, rD2, 3, 0)
22562306a36Sopenharmony_ci	LAL(rW7, rD3, 3, 0)
22662306a36Sopenharmony_ci	LAL(rW3, rD1, 1, 8)
22762306a36Sopenharmony_ci	evldw		rD1,16(rKP)
22862306a36Sopenharmony_ci	EAD(rD0, 0)
22962306a36Sopenharmony_ci	evxor		rW4,rW4,rW6
23062306a36Sopenharmony_ci	LWL(rW1, 12)
23162306a36Sopenharmony_ci	evxor		rW0,rW0,rW4
23262306a36Sopenharmony_ci	EAD(rD2, 2)
23362306a36Sopenharmony_ci	evxor		rW0,rW0,rW2
23462306a36Sopenharmony_ci	LWL(rW5, 4)
23562306a36Sopenharmony_ci	evxor		rD1,rD1,rW0
23662306a36Sopenharmony_ci	evldw		rD3,24(rKP)
23762306a36Sopenharmony_ci	evmergehi	rD0,rD0,rD1
23862306a36Sopenharmony_ci	EAD(rD1, 0)
23962306a36Sopenharmony_ci	evxor		rW3,rW3,rW7
24062306a36Sopenharmony_ci	LWH(rW0, 12)
24162306a36Sopenharmony_ci	evxor		rW3,rW3,rW1
24262306a36Sopenharmony_ci	EAD(rD0, 3)
24362306a36Sopenharmony_ci	evxor		rD3,rD3,rW3
24462306a36Sopenharmony_ci	LWH(rW6, 0)
24562306a36Sopenharmony_ci	evxor		rD3,rD3,rW5
24662306a36Sopenharmony_ci	EAD(rD0, 1)
24762306a36Sopenharmony_ci	evmergehi	rD2,rD2,rD3
24862306a36Sopenharmony_ci	LWH(rW3, 8)
24962306a36Sopenharmony_ci	LAH(rW1, rD3, 0, 12)
25062306a36Sopenharmony_ci	LAL(rW0, rD2, 0, 12)
25162306a36Sopenharmony_ci	LAH(rW2, rD2, 1, 8)
25262306a36Sopenharmony_ci	LAL(rW2, rD3, 1, 8)
25362306a36Sopenharmony_ci	LAH(rW4, rD3, 2, 4)
25462306a36Sopenharmony_ci	LAL(rW4, rD0, 2, 4)
25562306a36Sopenharmony_ci	LAL(rW6, rD1, 3, 0)
25662306a36Sopenharmony_ci	LAH(rW5, rD1, 2, 4)
25762306a36Sopenharmony_ci	LAH(rW7, rD2, 3, 0)
25862306a36Sopenharmony_ci	LAL(rW7, rD3, 3, 0)
25962306a36Sopenharmony_ci	LAL(rW3, rD1, 1, 8)
26062306a36Sopenharmony_ci	evldw		 rD1,32(rKP)
26162306a36Sopenharmony_ci	EAD(rD0, 0)
26262306a36Sopenharmony_ci	evxor		rW4,rW4,rW6
26362306a36Sopenharmony_ci	LWL(rW1, 12)
26462306a36Sopenharmony_ci	evxor		rW0,rW0,rW4
26562306a36Sopenharmony_ci	EAD(rD2, 2)
26662306a36Sopenharmony_ci	evxor		rW0,rW0,rW2
26762306a36Sopenharmony_ci	LWL(rW5, 4)
26862306a36Sopenharmony_ci	evxor		rD1,rD1,rW0
26962306a36Sopenharmony_ci	evldw		rD3,40(rKP)
27062306a36Sopenharmony_ci	evmergehi	rD0,rD0,rD1
27162306a36Sopenharmony_ci	EAD(rD1, 0)
27262306a36Sopenharmony_ci	evxor		rW3,rW3,rW7
27362306a36Sopenharmony_ci	LWH(rW0, 12)
27462306a36Sopenharmony_ci	evxor		rW3,rW3,rW1
27562306a36Sopenharmony_ci	EAD(rD0, 3)
27662306a36Sopenharmony_ci	evxor		rD3,rD3,rW3
27762306a36Sopenharmony_ci	LWH(rW6, 0)
27862306a36Sopenharmony_ci	evxor		rD3,rD3,rW5
27962306a36Sopenharmony_ci	EAD(rD0, 1)
28062306a36Sopenharmony_ci	evmergehi	rD2,rD2,rD3
28162306a36Sopenharmony_ci	LWH(rW3, 8)
28262306a36Sopenharmony_ci	addi		rKP,rKP,32
28362306a36Sopenharmony_ci	bdnz		ppc_decrypt_block_loop
28462306a36Sopenharmony_ci	LAH(rW1, rD3, 0, 12)
28562306a36Sopenharmony_ci	LAL(rW0, rD2, 0, 12)
28662306a36Sopenharmony_ci	LAH(rW2, rD2, 1, 8)
28762306a36Sopenharmony_ci	LAL(rW2, rD3, 1, 8)
28862306a36Sopenharmony_ci	LAH(rW4, rD3, 2, 4)
28962306a36Sopenharmony_ci	LAL(rW4, rD0, 2, 4)
29062306a36Sopenharmony_ci	LAL(rW6, rD1, 3, 0)
29162306a36Sopenharmony_ci	LAH(rW5, rD1, 2, 4)
29262306a36Sopenharmony_ci	LAH(rW7, rD2, 3, 0)
29362306a36Sopenharmony_ci	LAL(rW7, rD3, 3, 0)
29462306a36Sopenharmony_ci	LAL(rW3, rD1, 1, 8)
29562306a36Sopenharmony_ci	evldw		 rD1,16(rKP)
29662306a36Sopenharmony_ci	EAD(rD0, 0)
29762306a36Sopenharmony_ci	evxor		rW4,rW4,rW6
29862306a36Sopenharmony_ci	LWL(rW1, 12)
29962306a36Sopenharmony_ci	evxor		rW0,rW0,rW4
30062306a36Sopenharmony_ci	EAD(rD2, 2)
30162306a36Sopenharmony_ci	evxor		rW0,rW0,rW2
30262306a36Sopenharmony_ci	LWL(rW5, 4)
30362306a36Sopenharmony_ci	evxor		rD1,rD1,rW0
30462306a36Sopenharmony_ci	evldw		rD3,24(rKP)
30562306a36Sopenharmony_ci	evmergehi	rD0,rD0,rD1
30662306a36Sopenharmony_ci	DAD(rD1, 0)
30762306a36Sopenharmony_ci	evxor		rW3,rW3,rW7
30862306a36Sopenharmony_ci	LBD(rW0)
30962306a36Sopenharmony_ci	evxor		rW3,rW3,rW1
31062306a36Sopenharmony_ci	DAD(rD0, 1)
31162306a36Sopenharmony_ci	evxor		rD3,rD3,rW3
31262306a36Sopenharmony_ci	LBD(rW6)
31362306a36Sopenharmony_ci	evxor		rD3,rD3,rW5
31462306a36Sopenharmony_ci	DAD(rD0, 0)
31562306a36Sopenharmony_ci	evmergehi	rD2,rD2,rD3
31662306a36Sopenharmony_ci	LBD(rW3)
31762306a36Sopenharmony_ci	LAD(rW2, rD3, 0)
31862306a36Sopenharmony_ci	LAD(rW1, rD2, 0)
31962306a36Sopenharmony_ci	LAD(rW4, rD2, 1)
32062306a36Sopenharmony_ci	LAD(rW5, rD3, 1)
32162306a36Sopenharmony_ci	LAD(rW7, rD1, 1)
32262306a36Sopenharmony_ci	rlwimi		rW0,rW4,8,16,23
32362306a36Sopenharmony_ci	rlwimi		rW1,rW5,8,16,23
32462306a36Sopenharmony_ci	LAD(rW4, rD3, 2)
32562306a36Sopenharmony_ci	LAD(rW5, rD0, 2)
32662306a36Sopenharmony_ci	rlwimi		rW2,rW6,8,16,23
32762306a36Sopenharmony_ci	rlwimi		rW3,rW7,8,16,23
32862306a36Sopenharmony_ci	LAD(rW6, rD1, 2)
32962306a36Sopenharmony_ci	LAD(rW7, rD2, 2)
33062306a36Sopenharmony_ci	rlwimi		rW0,rW4,16,8,15
33162306a36Sopenharmony_ci	rlwimi		rW1,rW5,16,8,15
33262306a36Sopenharmony_ci	LAD(rW4, rD0, 3)
33362306a36Sopenharmony_ci	LAD(rW5, rD1, 3)
33462306a36Sopenharmony_ci	rlwimi		rW2,rW6,16,8,15
33562306a36Sopenharmony_ci	lwz		rD0,32(rKP)
33662306a36Sopenharmony_ci	rlwimi		rW3,rW7,16,8,15
33762306a36Sopenharmony_ci	lwz		rD1,36(rKP)
33862306a36Sopenharmony_ci	LAD(rW6, rD2, 3)
33962306a36Sopenharmony_ci	LAD(rW7, rD3, 3)
34062306a36Sopenharmony_ci	rlwimi		rW0,rW4,24,0,7
34162306a36Sopenharmony_ci	lwz		rD2,40(rKP)
34262306a36Sopenharmony_ci	rlwimi		rW1,rW5,24,0,7
34362306a36Sopenharmony_ci	lwz		rD3,44(rKP)
34462306a36Sopenharmony_ci	rlwimi		rW2,rW6,24,0,7
34562306a36Sopenharmony_ci	rlwimi		rW3,rW7,24,0,7
34662306a36Sopenharmony_ci	blr
347