18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-or-later */
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * Fast SHA-1 implementation for SPE instruction set (PPC)
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * This code makes use of the SPE SIMD instruction set as defined in
68c2ecf20Sopenharmony_ci * http://cache.freescale.com/files/32bit/doc/ref_manual/SPEPIM.pdf
78c2ecf20Sopenharmony_ci * Implementation is based on optimization guide notes from
88c2ecf20Sopenharmony_ci * http://cache.freescale.com/files/32bit/doc/app_note/AN2665.pdf
98c2ecf20Sopenharmony_ci *
108c2ecf20Sopenharmony_ci * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
118c2ecf20Sopenharmony_ci */
128c2ecf20Sopenharmony_ci
138c2ecf20Sopenharmony_ci#include <asm/ppc_asm.h>
148c2ecf20Sopenharmony_ci#include <asm/asm-offsets.h>
158c2ecf20Sopenharmony_ci
168c2ecf20Sopenharmony_ci#define rHP	r3	/* pointer to hash value			*/
178c2ecf20Sopenharmony_ci#define rWP	r4	/* pointer to input				*/
188c2ecf20Sopenharmony_ci#define rKP	r5	/* pointer to constants				*/
198c2ecf20Sopenharmony_ci
208c2ecf20Sopenharmony_ci#define rW0	r14	/* 64 bit round words				*/
218c2ecf20Sopenharmony_ci#define rW1	r15
228c2ecf20Sopenharmony_ci#define rW2	r16
238c2ecf20Sopenharmony_ci#define rW3	r17
248c2ecf20Sopenharmony_ci#define rW4	r18
258c2ecf20Sopenharmony_ci#define rW5	r19
268c2ecf20Sopenharmony_ci#define rW6	r20
278c2ecf20Sopenharmony_ci#define rW7	r21
288c2ecf20Sopenharmony_ci
298c2ecf20Sopenharmony_ci#define rH0	r6	/* 32 bit hash values 				*/
308c2ecf20Sopenharmony_ci#define rH1	r7
318c2ecf20Sopenharmony_ci#define rH2	r8
328c2ecf20Sopenharmony_ci#define rH3	r9
338c2ecf20Sopenharmony_ci#define rH4	r10
348c2ecf20Sopenharmony_ci
358c2ecf20Sopenharmony_ci#define rT0	r22	/* 64 bit temporary				*/
368c2ecf20Sopenharmony_ci#define rT1	r0	/* 32 bit temporaries				*/
378c2ecf20Sopenharmony_ci#define rT2	r11
388c2ecf20Sopenharmony_ci#define rT3	r12
398c2ecf20Sopenharmony_ci
408c2ecf20Sopenharmony_ci#define rK	r23	/* 64 bit constant in volatile register		*/
418c2ecf20Sopenharmony_ci
428c2ecf20Sopenharmony_ci#define LOAD_K01
438c2ecf20Sopenharmony_ci
448c2ecf20Sopenharmony_ci#define LOAD_K11 \
458c2ecf20Sopenharmony_ci	evlwwsplat	rK,0(rKP);
468c2ecf20Sopenharmony_ci
478c2ecf20Sopenharmony_ci#define LOAD_K21 \
488c2ecf20Sopenharmony_ci	evlwwsplat	rK,4(rKP);
498c2ecf20Sopenharmony_ci
508c2ecf20Sopenharmony_ci#define LOAD_K31 \
518c2ecf20Sopenharmony_ci	evlwwsplat	rK,8(rKP);
528c2ecf20Sopenharmony_ci
538c2ecf20Sopenharmony_ci#define LOAD_K41 \
548c2ecf20Sopenharmony_ci	evlwwsplat	rK,12(rKP);
558c2ecf20Sopenharmony_ci
568c2ecf20Sopenharmony_ci#define INITIALIZE \
578c2ecf20Sopenharmony_ci	stwu		r1,-128(r1);	/* create stack frame		*/ \
588c2ecf20Sopenharmony_ci	evstdw		r14,8(r1);	/* We must save non volatile	*/ \
598c2ecf20Sopenharmony_ci	evstdw		r15,16(r1);	/* registers. Take the chance	*/ \
608c2ecf20Sopenharmony_ci	evstdw		r16,24(r1);	/* and save the SPE part too	*/ \
618c2ecf20Sopenharmony_ci	evstdw		r17,32(r1);					   \
628c2ecf20Sopenharmony_ci	evstdw		r18,40(r1);					   \
638c2ecf20Sopenharmony_ci	evstdw		r19,48(r1);					   \
648c2ecf20Sopenharmony_ci	evstdw		r20,56(r1);					   \
658c2ecf20Sopenharmony_ci	evstdw		r21,64(r1);					   \
668c2ecf20Sopenharmony_ci	evstdw		r22,72(r1);					   \
678c2ecf20Sopenharmony_ci	evstdw		r23,80(r1);
688c2ecf20Sopenharmony_ci
698c2ecf20Sopenharmony_ci
708c2ecf20Sopenharmony_ci#define FINALIZE \
718c2ecf20Sopenharmony_ci	evldw		r14,8(r1);	/* restore SPE registers	*/ \
728c2ecf20Sopenharmony_ci	evldw		r15,16(r1);					   \
738c2ecf20Sopenharmony_ci	evldw		r16,24(r1);					   \
748c2ecf20Sopenharmony_ci	evldw		r17,32(r1);					   \
758c2ecf20Sopenharmony_ci	evldw		r18,40(r1);					   \
768c2ecf20Sopenharmony_ci	evldw		r19,48(r1);					   \
778c2ecf20Sopenharmony_ci	evldw		r20,56(r1);					   \
788c2ecf20Sopenharmony_ci	evldw		r21,64(r1);					   \
798c2ecf20Sopenharmony_ci	evldw		r22,72(r1);					   \
808c2ecf20Sopenharmony_ci	evldw		r23,80(r1);					   \
818c2ecf20Sopenharmony_ci	xor		r0,r0,r0;					   \
828c2ecf20Sopenharmony_ci	stw		r0,8(r1);	/* Delete sensitive data	*/ \
838c2ecf20Sopenharmony_ci	stw		r0,16(r1);	/* that we might have pushed	*/ \
848c2ecf20Sopenharmony_ci	stw		r0,24(r1);	/* from other context that runs	*/ \
858c2ecf20Sopenharmony_ci	stw		r0,32(r1);	/* the same code. Assume that	*/ \
868c2ecf20Sopenharmony_ci	stw		r0,40(r1);	/* the lower part of the GPRs	*/ \
878c2ecf20Sopenharmony_ci	stw		r0,48(r1);	/* were already overwritten on	*/ \
888c2ecf20Sopenharmony_ci	stw		r0,56(r1);	/* the way down to here		*/ \
898c2ecf20Sopenharmony_ci	stw		r0,64(r1);					   \
908c2ecf20Sopenharmony_ci	stw		r0,72(r1);					   \
918c2ecf20Sopenharmony_ci	stw		r0,80(r1);					   \
928c2ecf20Sopenharmony_ci	addi		r1,r1,128;	/* cleanup stack frame		*/
938c2ecf20Sopenharmony_ci
948c2ecf20Sopenharmony_ci#ifdef __BIG_ENDIAN__
958c2ecf20Sopenharmony_ci#define LOAD_DATA(reg, off) \
968c2ecf20Sopenharmony_ci	lwz		reg,off(rWP);	/* load data			*/
978c2ecf20Sopenharmony_ci#define NEXT_BLOCK \
988c2ecf20Sopenharmony_ci	addi		rWP,rWP,64;	/* increment per block		*/
998c2ecf20Sopenharmony_ci#else
1008c2ecf20Sopenharmony_ci#define LOAD_DATA(reg, off) \
1018c2ecf20Sopenharmony_ci	lwbrx		reg,0,rWP;	/* load data			*/ \
1028c2ecf20Sopenharmony_ci	addi		rWP,rWP,4;	/* increment per word		*/
1038c2ecf20Sopenharmony_ci#define NEXT_BLOCK			/* nothing to do		*/
1048c2ecf20Sopenharmony_ci#endif
1058c2ecf20Sopenharmony_ci
1068c2ecf20Sopenharmony_ci#define	R_00_15(a, b, c, d, e, w0, w1, k, off) \
1078c2ecf20Sopenharmony_ci	LOAD_DATA(w0, off)		/* 1: W				*/ \
1088c2ecf20Sopenharmony_ci	and		rT2,b,c;	/* 1: F' = B and C 		*/ \
1098c2ecf20Sopenharmony_ci	LOAD_K##k##1							   \
1108c2ecf20Sopenharmony_ci	andc		rT1,d,b;	/* 1: F" = ~B and D 		*/ \
1118c2ecf20Sopenharmony_ci	rotrwi		rT0,a,27;	/* 1: A' = A rotl 5		*/ \
1128c2ecf20Sopenharmony_ci	or		rT2,rT2,rT1;	/* 1: F = F' or F"		*/ \
1138c2ecf20Sopenharmony_ci	add		e,e,rT0;	/* 1: E = E + A'		*/ \
1148c2ecf20Sopenharmony_ci	rotrwi		b,b,2;		/* 1: B = B rotl 30		*/ \
1158c2ecf20Sopenharmony_ci	add		e,e,w0;		/* 1: E = E + W			*/ \
1168c2ecf20Sopenharmony_ci	LOAD_DATA(w1, off+4)		/* 2: W				*/ \
1178c2ecf20Sopenharmony_ci	add		e,e,rT2;	/* 1: E = E + F			*/ \
1188c2ecf20Sopenharmony_ci	and		rT1,a,b;	/* 2: F' = B and C 		*/ \
1198c2ecf20Sopenharmony_ci	add		e,e,rK;		/* 1: E = E + K			*/ \
1208c2ecf20Sopenharmony_ci	andc		rT2,c,a;	/* 2: F" = ~B and D 		*/ \
1218c2ecf20Sopenharmony_ci	add		d,d,rK;		/* 2: E = E + K			*/ \
1228c2ecf20Sopenharmony_ci	or		rT2,rT2,rT1;	/* 2: F = F' or F"		*/ \
1238c2ecf20Sopenharmony_ci	rotrwi		rT0,e,27;	/* 2: A' = A rotl 5		*/ \
1248c2ecf20Sopenharmony_ci	add		d,d,w1;		/* 2: E = E + W			*/ \
1258c2ecf20Sopenharmony_ci	rotrwi		a,a,2;		/* 2: B = B rotl 30		*/ \
1268c2ecf20Sopenharmony_ci	add		d,d,rT0;	/* 2: E = E + A'		*/ \
1278c2ecf20Sopenharmony_ci	evmergelo	w1,w1,w0;	/*    mix W[0]/W[1]		*/ \
1288c2ecf20Sopenharmony_ci	add		d,d,rT2		/* 2: E = E + F			*/
1298c2ecf20Sopenharmony_ci
1308c2ecf20Sopenharmony_ci#define R_16_19(a, b, c, d, e, w0, w1, w4, w6, w7, k) \
1318c2ecf20Sopenharmony_ci	and		rT2,b,c;	/* 1: F' = B and C 		*/ \
1328c2ecf20Sopenharmony_ci	evmergelohi	rT0,w7,w6;	/*    W[-3]			*/ \
1338c2ecf20Sopenharmony_ci	andc		rT1,d,b;	/* 1: F" = ~B and D 		*/ \
1348c2ecf20Sopenharmony_ci	evxor		w0,w0,rT0;	/*    W = W[-16] xor W[-3]	*/ \
1358c2ecf20Sopenharmony_ci	or		rT1,rT1,rT2;	/* 1: F = F' or F"		*/ \
1368c2ecf20Sopenharmony_ci	evxor		w0,w0,w4;	/*    W = W xor W[-8]		*/ \
1378c2ecf20Sopenharmony_ci	add		e,e,rT1;	/* 1: E = E + F			*/ \
1388c2ecf20Sopenharmony_ci	evxor		w0,w0,w1;	/*    W = W xor W[-14]		*/ \
1398c2ecf20Sopenharmony_ci	rotrwi		rT2,a,27;	/* 1: A' = A rotl 5		*/ \
1408c2ecf20Sopenharmony_ci	evrlwi		w0,w0,1;	/*    W = W rotl 1		*/ \
1418c2ecf20Sopenharmony_ci	add		e,e,rT2;	/* 1: E = E + A'		*/ \
1428c2ecf20Sopenharmony_ci	evaddw		rT0,w0,rK;	/*    WK = W + K		*/ \
1438c2ecf20Sopenharmony_ci	rotrwi		b,b,2;		/* 1: B = B rotl 30		*/ \
1448c2ecf20Sopenharmony_ci	LOAD_K##k##1							   \
1458c2ecf20Sopenharmony_ci	evmergehi	rT1,rT1,rT0;	/*    WK1/WK2			*/ \
1468c2ecf20Sopenharmony_ci	add		e,e,rT0;	/* 1: E = E + WK		*/ \
1478c2ecf20Sopenharmony_ci	add		d,d,rT1;	/* 2: E = E + WK		*/ \
1488c2ecf20Sopenharmony_ci	and		rT2,a,b;	/* 2: F' = B and C 		*/ \
1498c2ecf20Sopenharmony_ci	andc		rT1,c,a;	/* 2: F" = ~B and D 		*/ \
1508c2ecf20Sopenharmony_ci	rotrwi		rT0,e,27;	/* 2: A' = A rotl 5		*/ \
1518c2ecf20Sopenharmony_ci	or		rT1,rT1,rT2;	/* 2: F = F' or F"		*/ \
1528c2ecf20Sopenharmony_ci	add		d,d,rT0;	/* 2: E = E + A'		*/ \
1538c2ecf20Sopenharmony_ci	rotrwi		a,a,2;		/* 2: B = B rotl 30		*/ \
1548c2ecf20Sopenharmony_ci	add		d,d,rT1		/* 2: E = E + F			*/
1558c2ecf20Sopenharmony_ci
1568c2ecf20Sopenharmony_ci#define R_20_39(a, b, c, d, e, w0, w1, w4, w6, w7, k) \
1578c2ecf20Sopenharmony_ci	evmergelohi	rT0,w7,w6;	/*    W[-3]			*/ \
1588c2ecf20Sopenharmony_ci	xor		rT2,b,c;	/* 1: F' = B xor C		*/ \
1598c2ecf20Sopenharmony_ci	evxor		w0,w0,rT0;	/*    W = W[-16] xor W[-3]	*/ \
1608c2ecf20Sopenharmony_ci	xor		rT2,rT2,d;	/* 1: F = F' xor D		*/ \
1618c2ecf20Sopenharmony_ci	evxor		w0,w0,w4;	/*    W = W xor W[-8]		*/ \
1628c2ecf20Sopenharmony_ci	add		e,e,rT2;	/* 1: E = E + F			*/ \
1638c2ecf20Sopenharmony_ci	evxor		w0,w0,w1;	/*    W = W xor W[-14]		*/ \
1648c2ecf20Sopenharmony_ci	rotrwi		rT2,a,27;	/* 1: A' = A rotl 5		*/ \
1658c2ecf20Sopenharmony_ci	evrlwi		w0,w0,1;	/*    W = W rotl 1		*/ \
1668c2ecf20Sopenharmony_ci	add		e,e,rT2;	/* 1: E = E + A'		*/ \
1678c2ecf20Sopenharmony_ci	evaddw		rT0,w0,rK;	/*    WK = W + K		*/ \
1688c2ecf20Sopenharmony_ci	rotrwi		b,b,2;		/* 1: B = B rotl 30		*/ \
1698c2ecf20Sopenharmony_ci	LOAD_K##k##1							   \
1708c2ecf20Sopenharmony_ci	evmergehi	rT1,rT1,rT0;	/*    WK1/WK2			*/ \
1718c2ecf20Sopenharmony_ci	add		e,e,rT0;	/* 1: E = E + WK		*/ \
1728c2ecf20Sopenharmony_ci	xor		rT2,a,b;	/* 2: F' = B xor C		*/ \
1738c2ecf20Sopenharmony_ci	add		d,d,rT1;	/* 2: E = E + WK		*/ \
1748c2ecf20Sopenharmony_ci	xor		rT2,rT2,c;	/* 2: F = F' xor D		*/ \
1758c2ecf20Sopenharmony_ci	rotrwi		rT0,e,27;	/* 2: A' = A rotl 5		*/ \
1768c2ecf20Sopenharmony_ci	add		d,d,rT2;	/* 2: E = E + F			*/ \
1778c2ecf20Sopenharmony_ci	rotrwi		a,a,2;		/* 2: B = B rotl 30		*/ \
1788c2ecf20Sopenharmony_ci	add		d,d,rT0		/* 2: E = E + A'		*/
1798c2ecf20Sopenharmony_ci
1808c2ecf20Sopenharmony_ci#define R_40_59(a, b, c, d, e, w0, w1, w4, w6, w7, k) \
1818c2ecf20Sopenharmony_ci	and		rT2,b,c;	/* 1: F' = B and C		*/ \
1828c2ecf20Sopenharmony_ci	evmergelohi	rT0,w7,w6;	/*    W[-3]			*/ \
1838c2ecf20Sopenharmony_ci	or		rT1,b,c;	/* 1: F" = B or C		*/ \
1848c2ecf20Sopenharmony_ci	evxor		w0,w0,rT0;	/*    W = W[-16] xor W[-3]	*/ \
1858c2ecf20Sopenharmony_ci	and		rT1,d,rT1;	/* 1: F" = F" and D		*/ \
1868c2ecf20Sopenharmony_ci	evxor		w0,w0,w4;	/*    W = W xor W[-8]		*/ \
1878c2ecf20Sopenharmony_ci	or		rT2,rT2,rT1;	/* 1: F = F' or F"		*/ \
1888c2ecf20Sopenharmony_ci	evxor		w0,w0,w1;	/*    W = W xor W[-14]		*/ \
1898c2ecf20Sopenharmony_ci	add		e,e,rT2;	/* 1: E = E + F			*/ \
1908c2ecf20Sopenharmony_ci	evrlwi		w0,w0,1;	/*    W = W rotl 1		*/ \
1918c2ecf20Sopenharmony_ci	rotrwi		rT2,a,27;	/* 1: A' = A rotl 5		*/ \
1928c2ecf20Sopenharmony_ci	evaddw		rT0,w0,rK;	/*    WK = W + K		*/ \
1938c2ecf20Sopenharmony_ci	add		e,e,rT2;	/* 1: E = E + A'		*/ \
1948c2ecf20Sopenharmony_ci	LOAD_K##k##1							   \
1958c2ecf20Sopenharmony_ci	evmergehi	rT1,rT1,rT0;	/*    WK1/WK2			*/ \
1968c2ecf20Sopenharmony_ci	rotrwi		b,b,2;		/* 1: B = B rotl 30		*/ \
1978c2ecf20Sopenharmony_ci	add		e,e,rT0;	/* 1: E = E + WK		*/ \
1988c2ecf20Sopenharmony_ci	and		rT2,a,b;	/* 2: F' = B and C		*/ \
1998c2ecf20Sopenharmony_ci	or		rT0,a,b;	/* 2: F" = B or C		*/ \
2008c2ecf20Sopenharmony_ci	add		d,d,rT1;	/* 2: E = E + WK		*/ \
2018c2ecf20Sopenharmony_ci	and		rT0,c,rT0;	/* 2: F" = F" and D		*/ \
2028c2ecf20Sopenharmony_ci	rotrwi		a,a,2;		/* 2: B = B rotl 30		*/ \
2038c2ecf20Sopenharmony_ci	or		rT2,rT2,rT0;	/* 2: F = F' or F"		*/ \
2048c2ecf20Sopenharmony_ci	rotrwi		rT0,e,27;	/* 2: A' = A rotl 5		*/ \
2058c2ecf20Sopenharmony_ci	add		d,d,rT2;	/* 2: E = E + F			*/ \
2068c2ecf20Sopenharmony_ci	add		d,d,rT0		/* 2: E = E + A'		*/
2078c2ecf20Sopenharmony_ci
2088c2ecf20Sopenharmony_ci#define R_60_79(a, b, c, d, e, w0, w1, w4, w6, w7, k) \
2098c2ecf20Sopenharmony_ci	R_20_39(a, b, c, d, e, w0, w1, w4, w6, w7, k)
2108c2ecf20Sopenharmony_ci
2118c2ecf20Sopenharmony_ci_GLOBAL(ppc_spe_sha1_transform)
2128c2ecf20Sopenharmony_ci	INITIALIZE
2138c2ecf20Sopenharmony_ci
2148c2ecf20Sopenharmony_ci	lwz		rH0,0(rHP)
2158c2ecf20Sopenharmony_ci	lwz		rH1,4(rHP)
2168c2ecf20Sopenharmony_ci	mtctr		r5
2178c2ecf20Sopenharmony_ci	lwz		rH2,8(rHP)
2188c2ecf20Sopenharmony_ci	lis		rKP,PPC_SPE_SHA1_K@h
2198c2ecf20Sopenharmony_ci	lwz		rH3,12(rHP)
2208c2ecf20Sopenharmony_ci	ori		rKP,rKP,PPC_SPE_SHA1_K@l
2218c2ecf20Sopenharmony_ci	lwz		rH4,16(rHP)
2228c2ecf20Sopenharmony_ci
2238c2ecf20Sopenharmony_cippc_spe_sha1_main:
2248c2ecf20Sopenharmony_ci	R_00_15(rH0, rH1, rH2, rH3, rH4, rW1, rW0, 1, 0)
2258c2ecf20Sopenharmony_ci	R_00_15(rH3, rH4, rH0, rH1, rH2, rW2, rW1, 0, 8)
2268c2ecf20Sopenharmony_ci	R_00_15(rH1, rH2, rH3, rH4, rH0, rW3, rW2, 0, 16)
2278c2ecf20Sopenharmony_ci	R_00_15(rH4, rH0, rH1, rH2, rH3, rW4, rW3, 0, 24)
2288c2ecf20Sopenharmony_ci	R_00_15(rH2, rH3, rH4, rH0, rH1, rW5, rW4, 0, 32)
2298c2ecf20Sopenharmony_ci	R_00_15(rH0, rH1, rH2, rH3, rH4, rW6, rW5, 0, 40)
2308c2ecf20Sopenharmony_ci	R_00_15(rH3, rH4, rH0, rH1, rH2, rT3, rW6, 0, 48)
2318c2ecf20Sopenharmony_ci	R_00_15(rH1, rH2, rH3, rH4, rH0, rT3, rW7, 0, 56)
2328c2ecf20Sopenharmony_ci
2338c2ecf20Sopenharmony_ci	R_16_19(rH4, rH0, rH1, rH2, rH3, rW0, rW1, rW4, rW6, rW7, 0)
2348c2ecf20Sopenharmony_ci	R_16_19(rH2, rH3, rH4, rH0, rH1, rW1, rW2, rW5, rW7, rW0, 2)
2358c2ecf20Sopenharmony_ci
2368c2ecf20Sopenharmony_ci	R_20_39(rH0, rH1, rH2, rH3, rH4, rW2, rW3, rW6, rW0, rW1, 0)
2378c2ecf20Sopenharmony_ci	R_20_39(rH3, rH4, rH0, rH1, rH2, rW3, rW4, rW7, rW1, rW2, 0)
2388c2ecf20Sopenharmony_ci	R_20_39(rH1, rH2, rH3, rH4, rH0, rW4, rW5, rW0, rW2, rW3, 0)
2398c2ecf20Sopenharmony_ci	R_20_39(rH4, rH0, rH1, rH2, rH3, rW5, rW6, rW1, rW3, rW4, 0)
2408c2ecf20Sopenharmony_ci	R_20_39(rH2, rH3, rH4, rH0, rH1, rW6, rW7, rW2, rW4, rW5, 0)
2418c2ecf20Sopenharmony_ci	R_20_39(rH0, rH1, rH2, rH3, rH4, rW7, rW0, rW3, rW5, rW6, 0)
2428c2ecf20Sopenharmony_ci	R_20_39(rH3, rH4, rH0, rH1, rH2, rW0, rW1, rW4, rW6, rW7, 0)
2438c2ecf20Sopenharmony_ci	R_20_39(rH1, rH2, rH3, rH4, rH0, rW1, rW2, rW5, rW7, rW0, 0)
2448c2ecf20Sopenharmony_ci	R_20_39(rH4, rH0, rH1, rH2, rH3, rW2, rW3, rW6, rW0, rW1, 0)
2458c2ecf20Sopenharmony_ci	R_20_39(rH2, rH3, rH4, rH0, rH1, rW3, rW4, rW7, rW1, rW2, 3)
2468c2ecf20Sopenharmony_ci
2478c2ecf20Sopenharmony_ci	R_40_59(rH0, rH1, rH2, rH3, rH4, rW4, rW5, rW0, rW2, rW3, 0)
2488c2ecf20Sopenharmony_ci	R_40_59(rH3, rH4, rH0, rH1, rH2, rW5, rW6, rW1, rW3, rW4, 0)
2498c2ecf20Sopenharmony_ci	R_40_59(rH1, rH2, rH3, rH4, rH0, rW6, rW7, rW2, rW4, rW5, 0)
2508c2ecf20Sopenharmony_ci	R_40_59(rH4, rH0, rH1, rH2, rH3, rW7, rW0, rW3, rW5, rW6, 0)
2518c2ecf20Sopenharmony_ci	R_40_59(rH2, rH3, rH4, rH0, rH1, rW0, rW1, rW4, rW6, rW7, 0)
2528c2ecf20Sopenharmony_ci	R_40_59(rH0, rH1, rH2, rH3, rH4, rW1, rW2, rW5, rW7, rW0, 0)
2538c2ecf20Sopenharmony_ci	R_40_59(rH3, rH4, rH0, rH1, rH2, rW2, rW3, rW6, rW0, rW1, 0)
2548c2ecf20Sopenharmony_ci	R_40_59(rH1, rH2, rH3, rH4, rH0, rW3, rW4, rW7, rW1, rW2, 0)
2558c2ecf20Sopenharmony_ci	R_40_59(rH4, rH0, rH1, rH2, rH3, rW4, rW5, rW0, rW2, rW3, 0)
2568c2ecf20Sopenharmony_ci	R_40_59(rH2, rH3, rH4, rH0, rH1, rW5, rW6, rW1, rW3, rW4, 4)
2578c2ecf20Sopenharmony_ci
2588c2ecf20Sopenharmony_ci	R_60_79(rH0, rH1, rH2, rH3, rH4, rW6, rW7, rW2, rW4, rW5, 0)
2598c2ecf20Sopenharmony_ci	R_60_79(rH3, rH4, rH0, rH1, rH2, rW7, rW0, rW3, rW5, rW6, 0)
2608c2ecf20Sopenharmony_ci	R_60_79(rH1, rH2, rH3, rH4, rH0, rW0, rW1, rW4, rW6, rW7, 0)
2618c2ecf20Sopenharmony_ci	R_60_79(rH4, rH0, rH1, rH2, rH3, rW1, rW2, rW5, rW7, rW0, 0)
2628c2ecf20Sopenharmony_ci	R_60_79(rH2, rH3, rH4, rH0, rH1, rW2, rW3, rW6, rW0, rW1, 0)
2638c2ecf20Sopenharmony_ci	R_60_79(rH0, rH1, rH2, rH3, rH4, rW3, rW4, rW7, rW1, rW2, 0)
2648c2ecf20Sopenharmony_ci	R_60_79(rH3, rH4, rH0, rH1, rH2, rW4, rW5, rW0, rW2, rW3, 0)
2658c2ecf20Sopenharmony_ci	lwz		rT3,0(rHP)
2668c2ecf20Sopenharmony_ci	R_60_79(rH1, rH2, rH3, rH4, rH0, rW5, rW6, rW1, rW3, rW4, 0)
2678c2ecf20Sopenharmony_ci	lwz		rW1,4(rHP)
2688c2ecf20Sopenharmony_ci	R_60_79(rH4, rH0, rH1, rH2, rH3, rW6, rW7, rW2, rW4, rW5, 0)
2698c2ecf20Sopenharmony_ci	lwz		rW2,8(rHP)
2708c2ecf20Sopenharmony_ci	R_60_79(rH2, rH3, rH4, rH0, rH1, rW7, rW0, rW3, rW5, rW6, 0)
2718c2ecf20Sopenharmony_ci	lwz		rW3,12(rHP)
2728c2ecf20Sopenharmony_ci	NEXT_BLOCK
2738c2ecf20Sopenharmony_ci	lwz		rW4,16(rHP)
2748c2ecf20Sopenharmony_ci
2758c2ecf20Sopenharmony_ci	add		rH0,rH0,rT3
2768c2ecf20Sopenharmony_ci	stw		rH0,0(rHP)
2778c2ecf20Sopenharmony_ci	add		rH1,rH1,rW1
2788c2ecf20Sopenharmony_ci	stw		rH1,4(rHP)
2798c2ecf20Sopenharmony_ci	add		rH2,rH2,rW2
2808c2ecf20Sopenharmony_ci	stw		rH2,8(rHP)
2818c2ecf20Sopenharmony_ci	add		rH3,rH3,rW3
2828c2ecf20Sopenharmony_ci	stw		rH3,12(rHP)
2838c2ecf20Sopenharmony_ci	add		rH4,rH4,rW4
2848c2ecf20Sopenharmony_ci	stw		rH4,16(rHP)
2858c2ecf20Sopenharmony_ci
2868c2ecf20Sopenharmony_ci	bdnz		ppc_spe_sha1_main
2878c2ecf20Sopenharmony_ci
2888c2ecf20Sopenharmony_ci	FINALIZE
2898c2ecf20Sopenharmony_ci	blr
2908c2ecf20Sopenharmony_ci
2918c2ecf20Sopenharmony_ci.data
2928c2ecf20Sopenharmony_ci.align 4
2938c2ecf20Sopenharmony_ciPPC_SPE_SHA1_K:
2948c2ecf20Sopenharmony_ci	.long 0x5A827999,0x6ED9EBA1,0x8F1BBCDC,0xCA62C1D6
295