18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-or-later */
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * Fast AES implementation for SPE instruction set (PPC)
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * This code makes use of the SPE SIMD instruction set as defined in
68c2ecf20Sopenharmony_ci * http://cache.freescale.com/files/32bit/doc/ref_manual/SPEPIM.pdf
78c2ecf20Sopenharmony_ci * Implementation is based on optimization guide notes from
88c2ecf20Sopenharmony_ci * http://cache.freescale.com/files/32bit/doc/app_note/AN2665.pdf
98c2ecf20Sopenharmony_ci *
108c2ecf20Sopenharmony_ci * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
118c2ecf20Sopenharmony_ci */
128c2ecf20Sopenharmony_ci
138c2ecf20Sopenharmony_ci#include <asm/ppc_asm.h>
148c2ecf20Sopenharmony_ci#include "aes-spe-regs.h"
158c2ecf20Sopenharmony_ci
168c2ecf20Sopenharmony_ci#define	EAD(in, bpos) \
178c2ecf20Sopenharmony_ci	rlwimi		rT0,in,28-((bpos+3)%4)*8,20,27;
188c2ecf20Sopenharmony_ci
198c2ecf20Sopenharmony_ci#define DAD(in, bpos) \
208c2ecf20Sopenharmony_ci	rlwimi		rT1,in,24-((bpos+3)%4)*8,24,31;
218c2ecf20Sopenharmony_ci
228c2ecf20Sopenharmony_ci#define LWH(out, off) \
238c2ecf20Sopenharmony_ci	evlwwsplat	out,off(rT0);	/* load word high		*/
248c2ecf20Sopenharmony_ci
258c2ecf20Sopenharmony_ci#define LWL(out, off) \
268c2ecf20Sopenharmony_ci	lwz		out,off(rT0);	/* load word low		*/
278c2ecf20Sopenharmony_ci
288c2ecf20Sopenharmony_ci#define LBZ(out, tab, off) \
298c2ecf20Sopenharmony_ci	lbz		out,off(tab);	/* load byte			*/
308c2ecf20Sopenharmony_ci
318c2ecf20Sopenharmony_ci#define LAH(out, in, bpos, off) \
328c2ecf20Sopenharmony_ci	EAD(in, bpos)			/* calc addr + load word high	*/ \
338c2ecf20Sopenharmony_ci	LWH(out, off)
348c2ecf20Sopenharmony_ci
358c2ecf20Sopenharmony_ci#define LAL(out, in, bpos, off) \
368c2ecf20Sopenharmony_ci	EAD(in, bpos)			/* calc addr + load word low	*/ \
378c2ecf20Sopenharmony_ci	LWL(out, off)
388c2ecf20Sopenharmony_ci
398c2ecf20Sopenharmony_ci#define LAE(out, in, bpos) \
408c2ecf20Sopenharmony_ci	EAD(in, bpos)			/* calc addr + load enc byte	*/ \
418c2ecf20Sopenharmony_ci	LBZ(out, rT0, 8)
428c2ecf20Sopenharmony_ci
438c2ecf20Sopenharmony_ci#define LBE(out) \
448c2ecf20Sopenharmony_ci	LBZ(out, rT0, 8)		/* load enc byte		*/
458c2ecf20Sopenharmony_ci
468c2ecf20Sopenharmony_ci#define LAD(out, in, bpos) \
478c2ecf20Sopenharmony_ci	DAD(in, bpos)			/* calc addr + load dec byte	*/ \
488c2ecf20Sopenharmony_ci	LBZ(out, rT1, 0)
498c2ecf20Sopenharmony_ci
508c2ecf20Sopenharmony_ci#define LBD(out) \
518c2ecf20Sopenharmony_ci	LBZ(out, rT1, 0)
528c2ecf20Sopenharmony_ci
538c2ecf20Sopenharmony_ci/*
548c2ecf20Sopenharmony_ci * ppc_encrypt_block: The central encryption function for a single 16 bytes
558c2ecf20Sopenharmony_ci * block. It does no stack handling or register saving to support fast calls
568c2ecf20Sopenharmony_ci * via bl/blr. It expects that caller has pre-xored input data with first
578c2ecf20Sopenharmony_ci * 4 words of encryption key into rD0-rD3. Pointer/counter registers must
588c2ecf20Sopenharmony_ci * have also been set up before (rT0, rKP, CTR). Output is stored in rD0-rD3
598c2ecf20Sopenharmony_ci * and rW0-rW3 and caller must execute a final xor on the output registers.
608c2ecf20Sopenharmony_ci * All working registers rD0-rD3 & rW0-rW7 are overwritten during processing.
618c2ecf20Sopenharmony_ci *
628c2ecf20Sopenharmony_ci */
638c2ecf20Sopenharmony_ci_GLOBAL(ppc_encrypt_block)
648c2ecf20Sopenharmony_ci	LAH(rW4, rD1, 2, 4)
658c2ecf20Sopenharmony_ci	LAH(rW6, rD0, 3, 0)
668c2ecf20Sopenharmony_ci	LAH(rW3, rD0, 1, 8)
678c2ecf20Sopenharmony_cippc_encrypt_block_loop:
688c2ecf20Sopenharmony_ci	LAH(rW0, rD3, 0, 12)
698c2ecf20Sopenharmony_ci	LAL(rW0, rD0, 0, 12)
708c2ecf20Sopenharmony_ci	LAH(rW1, rD1, 0, 12)
718c2ecf20Sopenharmony_ci	LAH(rW2, rD2, 1, 8)
728c2ecf20Sopenharmony_ci	LAL(rW2, rD3, 1, 8)
738c2ecf20Sopenharmony_ci	LAL(rW3, rD1, 1, 8)
748c2ecf20Sopenharmony_ci	LAL(rW4, rD2, 2, 4)
758c2ecf20Sopenharmony_ci	LAL(rW6, rD1, 3, 0)
768c2ecf20Sopenharmony_ci	LAH(rW5, rD3, 2, 4)
778c2ecf20Sopenharmony_ci	LAL(rW5, rD0, 2, 4)
788c2ecf20Sopenharmony_ci	LAH(rW7, rD2, 3, 0)
798c2ecf20Sopenharmony_ci	evldw		rD1,16(rKP)
808c2ecf20Sopenharmony_ci	EAD(rD3, 3)
818c2ecf20Sopenharmony_ci	evxor		rW2,rW2,rW4
828c2ecf20Sopenharmony_ci	LWL(rW7, 0)
838c2ecf20Sopenharmony_ci	evxor		rW2,rW2,rW6
848c2ecf20Sopenharmony_ci	EAD(rD2, 0)
858c2ecf20Sopenharmony_ci	evxor		rD1,rD1,rW2
868c2ecf20Sopenharmony_ci	LWL(rW1, 12)
878c2ecf20Sopenharmony_ci	evxor		rD1,rD1,rW0
888c2ecf20Sopenharmony_ci	evldw		rD3,24(rKP)
898c2ecf20Sopenharmony_ci	evmergehi	rD0,rD0,rD1
908c2ecf20Sopenharmony_ci	EAD(rD1, 2)
918c2ecf20Sopenharmony_ci	evxor		rW3,rW3,rW5
928c2ecf20Sopenharmony_ci	LWH(rW4, 4)
938c2ecf20Sopenharmony_ci	evxor		rW3,rW3,rW7
948c2ecf20Sopenharmony_ci	EAD(rD0, 3)
958c2ecf20Sopenharmony_ci	evxor		rD3,rD3,rW3
968c2ecf20Sopenharmony_ci	LWH(rW6, 0)
978c2ecf20Sopenharmony_ci	evxor		rD3,rD3,rW1
988c2ecf20Sopenharmony_ci	EAD(rD0, 1)
998c2ecf20Sopenharmony_ci	evmergehi	rD2,rD2,rD3
1008c2ecf20Sopenharmony_ci	LWH(rW3, 8)
1018c2ecf20Sopenharmony_ci	LAH(rW0, rD3, 0, 12)
1028c2ecf20Sopenharmony_ci	LAL(rW0, rD0, 0, 12)
1038c2ecf20Sopenharmony_ci	LAH(rW1, rD1, 0, 12)
1048c2ecf20Sopenharmony_ci	LAH(rW2, rD2, 1, 8)
1058c2ecf20Sopenharmony_ci	LAL(rW2, rD3, 1, 8)
1068c2ecf20Sopenharmony_ci	LAL(rW3, rD1, 1, 8)
1078c2ecf20Sopenharmony_ci	LAL(rW4, rD2, 2, 4)
1088c2ecf20Sopenharmony_ci	LAL(rW6, rD1, 3, 0)
1098c2ecf20Sopenharmony_ci	LAH(rW5, rD3, 2, 4)
1108c2ecf20Sopenharmony_ci	LAL(rW5, rD0, 2, 4)
1118c2ecf20Sopenharmony_ci	LAH(rW7, rD2, 3, 0)
1128c2ecf20Sopenharmony_ci	evldw		rD1,32(rKP)
1138c2ecf20Sopenharmony_ci	EAD(rD3, 3)
1148c2ecf20Sopenharmony_ci	evxor		rW2,rW2,rW4
1158c2ecf20Sopenharmony_ci	LWL(rW7, 0)
1168c2ecf20Sopenharmony_ci	evxor		rW2,rW2,rW6
1178c2ecf20Sopenharmony_ci	EAD(rD2, 0)
1188c2ecf20Sopenharmony_ci	evxor		rD1,rD1,rW2
1198c2ecf20Sopenharmony_ci	LWL(rW1, 12)
1208c2ecf20Sopenharmony_ci	evxor		rD1,rD1,rW0
1218c2ecf20Sopenharmony_ci	evldw		rD3,40(rKP)
1228c2ecf20Sopenharmony_ci	evmergehi	rD0,rD0,rD1
1238c2ecf20Sopenharmony_ci	EAD(rD1, 2)
1248c2ecf20Sopenharmony_ci	evxor		rW3,rW3,rW5
1258c2ecf20Sopenharmony_ci	LWH(rW4, 4)
1268c2ecf20Sopenharmony_ci	evxor		rW3,rW3,rW7
1278c2ecf20Sopenharmony_ci	EAD(rD0, 3)
1288c2ecf20Sopenharmony_ci	evxor		rD3,rD3,rW3
1298c2ecf20Sopenharmony_ci	LWH(rW6, 0)
1308c2ecf20Sopenharmony_ci	evxor		rD3,rD3,rW1
1318c2ecf20Sopenharmony_ci	EAD(rD0, 1)
1328c2ecf20Sopenharmony_ci	evmergehi	rD2,rD2,rD3
1338c2ecf20Sopenharmony_ci	LWH(rW3, 8)
1348c2ecf20Sopenharmony_ci	addi		rKP,rKP,32
1358c2ecf20Sopenharmony_ci	bdnz		ppc_encrypt_block_loop
1368c2ecf20Sopenharmony_ci	LAH(rW0, rD3, 0, 12)
1378c2ecf20Sopenharmony_ci	LAL(rW0, rD0, 0, 12)
1388c2ecf20Sopenharmony_ci	LAH(rW1, rD1, 0, 12)
1398c2ecf20Sopenharmony_ci	LAH(rW2, rD2, 1, 8)
1408c2ecf20Sopenharmony_ci	LAL(rW2, rD3, 1, 8)
1418c2ecf20Sopenharmony_ci	LAL(rW3, rD1, 1, 8)
1428c2ecf20Sopenharmony_ci	LAL(rW4, rD2, 2, 4)
1438c2ecf20Sopenharmony_ci	LAH(rW5, rD3, 2, 4)
1448c2ecf20Sopenharmony_ci	LAL(rW6, rD1, 3, 0)
1458c2ecf20Sopenharmony_ci	LAL(rW5, rD0, 2, 4)
1468c2ecf20Sopenharmony_ci	LAH(rW7, rD2, 3, 0)
1478c2ecf20Sopenharmony_ci	evldw		rD1,16(rKP)
1488c2ecf20Sopenharmony_ci	EAD(rD3, 3)
1498c2ecf20Sopenharmony_ci	evxor		rW2,rW2,rW4
1508c2ecf20Sopenharmony_ci	LWL(rW7, 0)
1518c2ecf20Sopenharmony_ci	evxor		rW2,rW2,rW6
1528c2ecf20Sopenharmony_ci	EAD(rD2, 0)
1538c2ecf20Sopenharmony_ci	evxor		rD1,rD1,rW2
1548c2ecf20Sopenharmony_ci	LWL(rW1, 12)
1558c2ecf20Sopenharmony_ci	evxor		rD1,rD1,rW0
1568c2ecf20Sopenharmony_ci	evldw		rD3,24(rKP)
1578c2ecf20Sopenharmony_ci	evmergehi	rD0,rD0,rD1
1588c2ecf20Sopenharmony_ci	EAD(rD1, 0)
1598c2ecf20Sopenharmony_ci	evxor		rW3,rW3,rW5
1608c2ecf20Sopenharmony_ci	LBE(rW2)
1618c2ecf20Sopenharmony_ci	evxor		rW3,rW3,rW7
1628c2ecf20Sopenharmony_ci	EAD(rD0, 1)
1638c2ecf20Sopenharmony_ci	evxor		rD3,rD3,rW3
1648c2ecf20Sopenharmony_ci	LBE(rW6)
1658c2ecf20Sopenharmony_ci	evxor		rD3,rD3,rW1
1668c2ecf20Sopenharmony_ci	EAD(rD0, 0)
1678c2ecf20Sopenharmony_ci	evmergehi	rD2,rD2,rD3
1688c2ecf20Sopenharmony_ci	LBE(rW1)
1698c2ecf20Sopenharmony_ci	LAE(rW0, rD3, 0)
1708c2ecf20Sopenharmony_ci	LAE(rW1, rD0, 0)
1718c2ecf20Sopenharmony_ci	LAE(rW4, rD2, 1)
1728c2ecf20Sopenharmony_ci	LAE(rW5, rD3, 1)
1738c2ecf20Sopenharmony_ci	LAE(rW3, rD2, 0)
1748c2ecf20Sopenharmony_ci	LAE(rW7, rD1, 1)
1758c2ecf20Sopenharmony_ci	rlwimi		rW0,rW4,8,16,23
1768c2ecf20Sopenharmony_ci	rlwimi		rW1,rW5,8,16,23
1778c2ecf20Sopenharmony_ci	LAE(rW4, rD1, 2)
1788c2ecf20Sopenharmony_ci	LAE(rW5, rD2, 2)
1798c2ecf20Sopenharmony_ci	rlwimi		rW2,rW6,8,16,23
1808c2ecf20Sopenharmony_ci	rlwimi		rW3,rW7,8,16,23
1818c2ecf20Sopenharmony_ci	LAE(rW6, rD3, 2)
1828c2ecf20Sopenharmony_ci	LAE(rW7, rD0, 2)
1838c2ecf20Sopenharmony_ci	rlwimi		rW0,rW4,16,8,15
1848c2ecf20Sopenharmony_ci	rlwimi		rW1,rW5,16,8,15
1858c2ecf20Sopenharmony_ci	LAE(rW4, rD0, 3)
1868c2ecf20Sopenharmony_ci	LAE(rW5, rD1, 3)
1878c2ecf20Sopenharmony_ci	rlwimi		rW2,rW6,16,8,15
1888c2ecf20Sopenharmony_ci	lwz		rD0,32(rKP)
1898c2ecf20Sopenharmony_ci	rlwimi		rW3,rW7,16,8,15
1908c2ecf20Sopenharmony_ci	lwz		rD1,36(rKP)
1918c2ecf20Sopenharmony_ci	LAE(rW6, rD2, 3)
1928c2ecf20Sopenharmony_ci	LAE(rW7, rD3, 3)
1938c2ecf20Sopenharmony_ci	rlwimi		rW0,rW4,24,0,7
1948c2ecf20Sopenharmony_ci	lwz		rD2,40(rKP)
1958c2ecf20Sopenharmony_ci	rlwimi		rW1,rW5,24,0,7
1968c2ecf20Sopenharmony_ci	lwz		rD3,44(rKP)
1978c2ecf20Sopenharmony_ci	rlwimi		rW2,rW6,24,0,7
1988c2ecf20Sopenharmony_ci	rlwimi		rW3,rW7,24,0,7
1998c2ecf20Sopenharmony_ci	blr
2008c2ecf20Sopenharmony_ci
2018c2ecf20Sopenharmony_ci/*
2028c2ecf20Sopenharmony_ci * ppc_decrypt_block: The central decryption function for a single 16 bytes
2038c2ecf20Sopenharmony_ci * block. It does no stack handling or register saving to support fast calls
2048c2ecf20Sopenharmony_ci * via bl/blr. It expects that caller has pre-xored input data with first
2058c2ecf20Sopenharmony_ci * 4 words of encryption key into rD0-rD3. Pointer/counter registers must
2068c2ecf20Sopenharmony_ci * have also been set up before (rT0, rKP, CTR). Output is stored in rD0-rD3
2078c2ecf20Sopenharmony_ci * and rW0-rW3 and caller must execute a final xor on the output registers.
2088c2ecf20Sopenharmony_ci * All working registers rD0-rD3 & rW0-rW7 are overwritten during processing.
2098c2ecf20Sopenharmony_ci *
2108c2ecf20Sopenharmony_ci */
2118c2ecf20Sopenharmony_ci_GLOBAL(ppc_decrypt_block)
2128c2ecf20Sopenharmony_ci	LAH(rW0, rD1, 0, 12)
2138c2ecf20Sopenharmony_ci	LAH(rW6, rD0, 3, 0)
2148c2ecf20Sopenharmony_ci	LAH(rW3, rD0, 1, 8)
2158c2ecf20Sopenharmony_cippc_decrypt_block_loop:
2168c2ecf20Sopenharmony_ci	LAH(rW1, rD3, 0, 12)
2178c2ecf20Sopenharmony_ci	LAL(rW0, rD2, 0, 12)
2188c2ecf20Sopenharmony_ci	LAH(rW2, rD2, 1, 8)
2198c2ecf20Sopenharmony_ci	LAL(rW2, rD3, 1, 8)
2208c2ecf20Sopenharmony_ci	LAH(rW4, rD3, 2, 4)
2218c2ecf20Sopenharmony_ci	LAL(rW4, rD0, 2, 4)
2228c2ecf20Sopenharmony_ci	LAL(rW6, rD1, 3, 0)
2238c2ecf20Sopenharmony_ci	LAH(rW5, rD1, 2, 4)
2248c2ecf20Sopenharmony_ci	LAH(rW7, rD2, 3, 0)
2258c2ecf20Sopenharmony_ci	LAL(rW7, rD3, 3, 0)
2268c2ecf20Sopenharmony_ci	LAL(rW3, rD1, 1, 8)
2278c2ecf20Sopenharmony_ci	evldw		rD1,16(rKP)
2288c2ecf20Sopenharmony_ci	EAD(rD0, 0)
2298c2ecf20Sopenharmony_ci	evxor		rW4,rW4,rW6
2308c2ecf20Sopenharmony_ci	LWL(rW1, 12)
2318c2ecf20Sopenharmony_ci	evxor		rW0,rW0,rW4
2328c2ecf20Sopenharmony_ci	EAD(rD2, 2)
2338c2ecf20Sopenharmony_ci	evxor		rW0,rW0,rW2
2348c2ecf20Sopenharmony_ci	LWL(rW5, 4)
2358c2ecf20Sopenharmony_ci	evxor		rD1,rD1,rW0
2368c2ecf20Sopenharmony_ci	evldw		rD3,24(rKP)
2378c2ecf20Sopenharmony_ci	evmergehi	rD0,rD0,rD1
2388c2ecf20Sopenharmony_ci	EAD(rD1, 0)
2398c2ecf20Sopenharmony_ci	evxor		rW3,rW3,rW7
2408c2ecf20Sopenharmony_ci	LWH(rW0, 12)
2418c2ecf20Sopenharmony_ci	evxor		rW3,rW3,rW1
2428c2ecf20Sopenharmony_ci	EAD(rD0, 3)
2438c2ecf20Sopenharmony_ci	evxor		rD3,rD3,rW3
2448c2ecf20Sopenharmony_ci	LWH(rW6, 0)
2458c2ecf20Sopenharmony_ci	evxor		rD3,rD3,rW5
2468c2ecf20Sopenharmony_ci	EAD(rD0, 1)
2478c2ecf20Sopenharmony_ci	evmergehi	rD2,rD2,rD3
2488c2ecf20Sopenharmony_ci	LWH(rW3, 8)
2498c2ecf20Sopenharmony_ci	LAH(rW1, rD3, 0, 12)
2508c2ecf20Sopenharmony_ci	LAL(rW0, rD2, 0, 12)
2518c2ecf20Sopenharmony_ci	LAH(rW2, rD2, 1, 8)
2528c2ecf20Sopenharmony_ci	LAL(rW2, rD3, 1, 8)
2538c2ecf20Sopenharmony_ci	LAH(rW4, rD3, 2, 4)
2548c2ecf20Sopenharmony_ci	LAL(rW4, rD0, 2, 4)
2558c2ecf20Sopenharmony_ci	LAL(rW6, rD1, 3, 0)
2568c2ecf20Sopenharmony_ci	LAH(rW5, rD1, 2, 4)
2578c2ecf20Sopenharmony_ci	LAH(rW7, rD2, 3, 0)
2588c2ecf20Sopenharmony_ci	LAL(rW7, rD3, 3, 0)
2598c2ecf20Sopenharmony_ci	LAL(rW3, rD1, 1, 8)
2608c2ecf20Sopenharmony_ci	evldw		 rD1,32(rKP)
2618c2ecf20Sopenharmony_ci	EAD(rD0, 0)
2628c2ecf20Sopenharmony_ci	evxor		rW4,rW4,rW6
2638c2ecf20Sopenharmony_ci	LWL(rW1, 12)
2648c2ecf20Sopenharmony_ci	evxor		rW0,rW0,rW4
2658c2ecf20Sopenharmony_ci	EAD(rD2, 2)
2668c2ecf20Sopenharmony_ci	evxor		rW0,rW0,rW2
2678c2ecf20Sopenharmony_ci	LWL(rW5, 4)
2688c2ecf20Sopenharmony_ci	evxor		rD1,rD1,rW0
2698c2ecf20Sopenharmony_ci	evldw		rD3,40(rKP)
2708c2ecf20Sopenharmony_ci	evmergehi	rD0,rD0,rD1
2718c2ecf20Sopenharmony_ci	EAD(rD1, 0)
2728c2ecf20Sopenharmony_ci	evxor		rW3,rW3,rW7
2738c2ecf20Sopenharmony_ci	LWH(rW0, 12)
2748c2ecf20Sopenharmony_ci	evxor		rW3,rW3,rW1
2758c2ecf20Sopenharmony_ci	EAD(rD0, 3)
2768c2ecf20Sopenharmony_ci	evxor		rD3,rD3,rW3
2778c2ecf20Sopenharmony_ci	LWH(rW6, 0)
2788c2ecf20Sopenharmony_ci	evxor		rD3,rD3,rW5
2798c2ecf20Sopenharmony_ci	EAD(rD0, 1)
2808c2ecf20Sopenharmony_ci	evmergehi	rD2,rD2,rD3
2818c2ecf20Sopenharmony_ci	LWH(rW3, 8)
2828c2ecf20Sopenharmony_ci	addi		rKP,rKP,32
2838c2ecf20Sopenharmony_ci	bdnz		ppc_decrypt_block_loop
2848c2ecf20Sopenharmony_ci	LAH(rW1, rD3, 0, 12)
2858c2ecf20Sopenharmony_ci	LAL(rW0, rD2, 0, 12)
2868c2ecf20Sopenharmony_ci	LAH(rW2, rD2, 1, 8)
2878c2ecf20Sopenharmony_ci	LAL(rW2, rD3, 1, 8)
2888c2ecf20Sopenharmony_ci	LAH(rW4, rD3, 2, 4)
2898c2ecf20Sopenharmony_ci	LAL(rW4, rD0, 2, 4)
2908c2ecf20Sopenharmony_ci	LAL(rW6, rD1, 3, 0)
2918c2ecf20Sopenharmony_ci	LAH(rW5, rD1, 2, 4)
2928c2ecf20Sopenharmony_ci	LAH(rW7, rD2, 3, 0)
2938c2ecf20Sopenharmony_ci	LAL(rW7, rD3, 3, 0)
2948c2ecf20Sopenharmony_ci	LAL(rW3, rD1, 1, 8)
2958c2ecf20Sopenharmony_ci	evldw		 rD1,16(rKP)
2968c2ecf20Sopenharmony_ci	EAD(rD0, 0)
2978c2ecf20Sopenharmony_ci	evxor		rW4,rW4,rW6
2988c2ecf20Sopenharmony_ci	LWL(rW1, 12)
2998c2ecf20Sopenharmony_ci	evxor		rW0,rW0,rW4
3008c2ecf20Sopenharmony_ci	EAD(rD2, 2)
3018c2ecf20Sopenharmony_ci	evxor		rW0,rW0,rW2
3028c2ecf20Sopenharmony_ci	LWL(rW5, 4)
3038c2ecf20Sopenharmony_ci	evxor		rD1,rD1,rW0
3048c2ecf20Sopenharmony_ci	evldw		rD3,24(rKP)
3058c2ecf20Sopenharmony_ci	evmergehi	rD0,rD0,rD1
3068c2ecf20Sopenharmony_ci	DAD(rD1, 0)
3078c2ecf20Sopenharmony_ci	evxor		rW3,rW3,rW7
3088c2ecf20Sopenharmony_ci	LBD(rW0)
3098c2ecf20Sopenharmony_ci	evxor		rW3,rW3,rW1
3108c2ecf20Sopenharmony_ci	DAD(rD0, 1)
3118c2ecf20Sopenharmony_ci	evxor		rD3,rD3,rW3
3128c2ecf20Sopenharmony_ci	LBD(rW6)
3138c2ecf20Sopenharmony_ci	evxor		rD3,rD3,rW5
3148c2ecf20Sopenharmony_ci	DAD(rD0, 0)
3158c2ecf20Sopenharmony_ci	evmergehi	rD2,rD2,rD3
3168c2ecf20Sopenharmony_ci	LBD(rW3)
3178c2ecf20Sopenharmony_ci	LAD(rW2, rD3, 0)
3188c2ecf20Sopenharmony_ci	LAD(rW1, rD2, 0)
3198c2ecf20Sopenharmony_ci	LAD(rW4, rD2, 1)
3208c2ecf20Sopenharmony_ci	LAD(rW5, rD3, 1)
3218c2ecf20Sopenharmony_ci	LAD(rW7, rD1, 1)
3228c2ecf20Sopenharmony_ci	rlwimi		rW0,rW4,8,16,23
3238c2ecf20Sopenharmony_ci	rlwimi		rW1,rW5,8,16,23
3248c2ecf20Sopenharmony_ci	LAD(rW4, rD3, 2)
3258c2ecf20Sopenharmony_ci	LAD(rW5, rD0, 2)
3268c2ecf20Sopenharmony_ci	rlwimi		rW2,rW6,8,16,23
3278c2ecf20Sopenharmony_ci	rlwimi		rW3,rW7,8,16,23
3288c2ecf20Sopenharmony_ci	LAD(rW6, rD1, 2)
3298c2ecf20Sopenharmony_ci	LAD(rW7, rD2, 2)
3308c2ecf20Sopenharmony_ci	rlwimi		rW0,rW4,16,8,15
3318c2ecf20Sopenharmony_ci	rlwimi		rW1,rW5,16,8,15
3328c2ecf20Sopenharmony_ci	LAD(rW4, rD0, 3)
3338c2ecf20Sopenharmony_ci	LAD(rW5, rD1, 3)
3348c2ecf20Sopenharmony_ci	rlwimi		rW2,rW6,16,8,15
3358c2ecf20Sopenharmony_ci	lwz		rD0,32(rKP)
3368c2ecf20Sopenharmony_ci	rlwimi		rW3,rW7,16,8,15
3378c2ecf20Sopenharmony_ci	lwz		rD1,36(rKP)
3388c2ecf20Sopenharmony_ci	LAD(rW6, rD2, 3)
3398c2ecf20Sopenharmony_ci	LAD(rW7, rD3, 3)
3408c2ecf20Sopenharmony_ci	rlwimi		rW0,rW4,24,0,7
3418c2ecf20Sopenharmony_ci	lwz		rD2,40(rKP)
3428c2ecf20Sopenharmony_ci	rlwimi		rW1,rW5,24,0,7
3438c2ecf20Sopenharmony_ci	lwz		rD3,44(rKP)
3448c2ecf20Sopenharmony_ci	rlwimi		rW2,rW6,24,0,7
3458c2ecf20Sopenharmony_ci	rlwimi		rW3,rW7,24,0,7
3468c2ecf20Sopenharmony_ci	blr
347