18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-or-later */
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * Fast SHA-256 implementation for SPE instruction set (PPC)
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * This code makes use of the SPE SIMD instruction set as defined in
68c2ecf20Sopenharmony_ci * http://cache.freescale.com/files/32bit/doc/ref_manual/SPEPIM.pdf
78c2ecf20Sopenharmony_ci * Implementation is based on optimization guide notes from
88c2ecf20Sopenharmony_ci * http://cache.freescale.com/files/32bit/doc/app_note/AN2665.pdf
98c2ecf20Sopenharmony_ci *
108c2ecf20Sopenharmony_ci * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
118c2ecf20Sopenharmony_ci */
128c2ecf20Sopenharmony_ci
138c2ecf20Sopenharmony_ci#include <asm/ppc_asm.h>
148c2ecf20Sopenharmony_ci#include <asm/asm-offsets.h>
158c2ecf20Sopenharmony_ci
168c2ecf20Sopenharmony_ci#define rHP	r3	/* pointer to hash values in memory		*/
178c2ecf20Sopenharmony_ci#define rKP	r24	/* pointer to round constants			*/
188c2ecf20Sopenharmony_ci#define rWP	r4	/* pointer to input data			*/
198c2ecf20Sopenharmony_ci
208c2ecf20Sopenharmony_ci#define rH0	r5	/* 8 32 bit hash values in 8 registers		*/
218c2ecf20Sopenharmony_ci#define rH1	r6
228c2ecf20Sopenharmony_ci#define rH2	r7
238c2ecf20Sopenharmony_ci#define rH3	r8
248c2ecf20Sopenharmony_ci#define rH4	r9
258c2ecf20Sopenharmony_ci#define rH5	r10
268c2ecf20Sopenharmony_ci#define rH6	r11
278c2ecf20Sopenharmony_ci#define rH7	r12
288c2ecf20Sopenharmony_ci
298c2ecf20Sopenharmony_ci#define rW0	r14	/* 64 bit registers. 16 words in 8 registers	*/
308c2ecf20Sopenharmony_ci#define rW1	r15
318c2ecf20Sopenharmony_ci#define rW2	r16
328c2ecf20Sopenharmony_ci#define rW3	r17
338c2ecf20Sopenharmony_ci#define rW4	r18
348c2ecf20Sopenharmony_ci#define rW5	r19
358c2ecf20Sopenharmony_ci#define rW6	r20
368c2ecf20Sopenharmony_ci#define rW7	r21
378c2ecf20Sopenharmony_ci
388c2ecf20Sopenharmony_ci#define rT0	r22	/* 64 bit temporaries 				*/
398c2ecf20Sopenharmony_ci#define rT1	r23
408c2ecf20Sopenharmony_ci#define rT2	r0	/* 32 bit temporaries				*/
418c2ecf20Sopenharmony_ci#define rT3	r25
428c2ecf20Sopenharmony_ci
438c2ecf20Sopenharmony_ci#define CMP_KN_LOOP
448c2ecf20Sopenharmony_ci#define CMP_KC_LOOP \
458c2ecf20Sopenharmony_ci	cmpwi		rT1,0;
468c2ecf20Sopenharmony_ci
478c2ecf20Sopenharmony_ci#define INITIALIZE \
488c2ecf20Sopenharmony_ci	stwu		r1,-128(r1);	/* create stack frame		*/ \
498c2ecf20Sopenharmony_ci	evstdw		r14,8(r1);	/* We must save non volatile	*/ \
508c2ecf20Sopenharmony_ci	evstdw		r15,16(r1);	/* registers. Take the chance	*/ \
518c2ecf20Sopenharmony_ci	evstdw		r16,24(r1);	/* and save the SPE part too	*/ \
528c2ecf20Sopenharmony_ci	evstdw		r17,32(r1);					   \
538c2ecf20Sopenharmony_ci	evstdw		r18,40(r1);					   \
548c2ecf20Sopenharmony_ci	evstdw		r19,48(r1);					   \
558c2ecf20Sopenharmony_ci	evstdw		r20,56(r1);					   \
568c2ecf20Sopenharmony_ci	evstdw		r21,64(r1);					   \
578c2ecf20Sopenharmony_ci	evstdw		r22,72(r1);					   \
588c2ecf20Sopenharmony_ci	evstdw		r23,80(r1);					   \
598c2ecf20Sopenharmony_ci	stw		r24,88(r1);	/* save normal registers	*/ \
608c2ecf20Sopenharmony_ci	stw		r25,92(r1);
618c2ecf20Sopenharmony_ci
628c2ecf20Sopenharmony_ci
638c2ecf20Sopenharmony_ci#define FINALIZE \
648c2ecf20Sopenharmony_ci	evldw		r14,8(r1);	/* restore SPE registers	*/ \
658c2ecf20Sopenharmony_ci	evldw		r15,16(r1);					   \
668c2ecf20Sopenharmony_ci	evldw		r16,24(r1);					   \
678c2ecf20Sopenharmony_ci	evldw		r17,32(r1);					   \
688c2ecf20Sopenharmony_ci	evldw		r18,40(r1);					   \
698c2ecf20Sopenharmony_ci	evldw		r19,48(r1);					   \
708c2ecf20Sopenharmony_ci	evldw		r20,56(r1);					   \
718c2ecf20Sopenharmony_ci	evldw		r21,64(r1);					   \
728c2ecf20Sopenharmony_ci	evldw		r22,72(r1);					   \
738c2ecf20Sopenharmony_ci	evldw		r23,80(r1);					   \
748c2ecf20Sopenharmony_ci	lwz		r24,88(r1);	/* restore normal registers	*/ \
758c2ecf20Sopenharmony_ci	lwz		r25,92(r1);					   \
768c2ecf20Sopenharmony_ci	xor		r0,r0,r0;					   \
778c2ecf20Sopenharmony_ci	stw		r0,8(r1);	/* Delete sensitive data	*/ \
788c2ecf20Sopenharmony_ci	stw		r0,16(r1);	/* that we might have pushed	*/ \
798c2ecf20Sopenharmony_ci	stw		r0,24(r1);	/* from other context that runs	*/ \
808c2ecf20Sopenharmony_ci	stw		r0,32(r1);	/* the same code. Assume that	*/ \
818c2ecf20Sopenharmony_ci	stw		r0,40(r1);	/* the lower part of the GPRs	*/ \
828c2ecf20Sopenharmony_ci	stw		r0,48(r1);	/* was already overwritten on	*/ \
838c2ecf20Sopenharmony_ci	stw		r0,56(r1);	/* the way down to here		*/ \
848c2ecf20Sopenharmony_ci	stw		r0,64(r1);					   \
858c2ecf20Sopenharmony_ci	stw		r0,72(r1);					   \
868c2ecf20Sopenharmony_ci	stw		r0,80(r1);					   \
878c2ecf20Sopenharmony_ci	addi		r1,r1,128;	/* cleanup stack frame		*/
888c2ecf20Sopenharmony_ci
898c2ecf20Sopenharmony_ci#ifdef __BIG_ENDIAN__
908c2ecf20Sopenharmony_ci#define LOAD_DATA(reg, off) \
918c2ecf20Sopenharmony_ci	lwz		reg,off(rWP);	/* load data			*/
928c2ecf20Sopenharmony_ci#define NEXT_BLOCK \
938c2ecf20Sopenharmony_ci	addi		rWP,rWP,64;	/* increment per block		*/
948c2ecf20Sopenharmony_ci#else
958c2ecf20Sopenharmony_ci#define LOAD_DATA(reg, off) \
968c2ecf20Sopenharmony_ci	lwbrx		reg,0,rWP; 	/* load data			*/ \
978c2ecf20Sopenharmony_ci	addi		rWP,rWP,4;	/* increment per word		*/
988c2ecf20Sopenharmony_ci#define NEXT_BLOCK			/* nothing to do		*/
998c2ecf20Sopenharmony_ci#endif
1008c2ecf20Sopenharmony_ci
1018c2ecf20Sopenharmony_ci#define R_LOAD_W(a, b, c, d, e, f, g, h, w, off) \
1028c2ecf20Sopenharmony_ci	LOAD_DATA(w, off)		/* 1: W				*/ \
1038c2ecf20Sopenharmony_ci	rotrwi		rT0,e,6;	/* 1: S1 = e rotr 6		*/ \
1048c2ecf20Sopenharmony_ci	rotrwi		rT1,e,11;	/* 1: S1' = e rotr 11		*/ \
1058c2ecf20Sopenharmony_ci	rotrwi		rT2,e,25;	/* 1: S1" = e rotr 25		*/ \
1068c2ecf20Sopenharmony_ci	xor		rT0,rT0,rT1;	/* 1: S1 = S1 xor S1'		*/ \
1078c2ecf20Sopenharmony_ci	and		rT3,e,f;	/* 1: ch = e and f		*/ \
1088c2ecf20Sopenharmony_ci	xor		rT0,rT0,rT2;	/* 1: S1 = S1 xor S1"		*/ \
1098c2ecf20Sopenharmony_ci	andc		rT1,g,e;	/* 1: ch' = ~e and g		*/ \
1108c2ecf20Sopenharmony_ci	lwz		rT2,off(rKP);	/* 1: K				*/ \
1118c2ecf20Sopenharmony_ci	xor		rT3,rT3,rT1;	/* 1: ch = ch xor ch'		*/ \
1128c2ecf20Sopenharmony_ci	add		h,h,rT0;	/* 1: temp1 = h + S1		*/ \
1138c2ecf20Sopenharmony_ci	add		rT3,rT3,w;	/* 1: temp1' = ch + w		*/ \
1148c2ecf20Sopenharmony_ci	rotrwi		rT0,a,2;	/* 1: S0 = a rotr 2		*/ \
1158c2ecf20Sopenharmony_ci	add		h,h,rT3;	/* 1: temp1 = temp1 + temp1'	*/ \
1168c2ecf20Sopenharmony_ci	rotrwi		rT1,a,13;	/* 1: S0' = a rotr 13		*/ \
1178c2ecf20Sopenharmony_ci	add		h,h,rT2;	/* 1: temp1 = temp1 + K		*/ \
1188c2ecf20Sopenharmony_ci	rotrwi		rT3,a,22;	/* 1: S0" = a rotr 22		*/ \
1198c2ecf20Sopenharmony_ci	xor		rT0,rT0,rT1;	/* 1: S0 = S0 xor S0'		*/ \
1208c2ecf20Sopenharmony_ci	add		d,d,h;		/* 1: d = d + temp1		*/ \
1218c2ecf20Sopenharmony_ci	xor		rT3,rT0,rT3;	/* 1: S0 = S0 xor S0"		*/ \
1228c2ecf20Sopenharmony_ci	evmergelo	w,w,w;		/*    shift W			*/ \
1238c2ecf20Sopenharmony_ci	or		rT2,a,b;	/* 1: maj = a or b		*/ \
1248c2ecf20Sopenharmony_ci	and		rT1,a,b;	/* 1: maj' = a and b		*/ \
1258c2ecf20Sopenharmony_ci	and		rT2,rT2,c;	/* 1: maj = maj and c		*/ \
1268c2ecf20Sopenharmony_ci	LOAD_DATA(w, off+4)		/* 2: W				*/ \
1278c2ecf20Sopenharmony_ci	or		rT2,rT1,rT2;	/* 1: maj = maj or maj'		*/ \
1288c2ecf20Sopenharmony_ci	rotrwi		rT0,d,6;	/* 2: S1 = e rotr 6		*/ \
1298c2ecf20Sopenharmony_ci	add		rT3,rT3,rT2;	/* 1: temp2 = S0 + maj		*/ \
1308c2ecf20Sopenharmony_ci	rotrwi		rT1,d,11;	/* 2: S1' = e rotr 11		*/ \
1318c2ecf20Sopenharmony_ci	add		h,h,rT3;	/* 1: h = temp1 + temp2		*/ \
1328c2ecf20Sopenharmony_ci	rotrwi		rT2,d,25;	/* 2: S1" = e rotr 25		*/ \
1338c2ecf20Sopenharmony_ci	xor		rT0,rT0,rT1;	/* 2: S1 = S1 xor S1'		*/ \
1348c2ecf20Sopenharmony_ci	and		rT3,d,e;	/* 2: ch = e and f		*/ \
1358c2ecf20Sopenharmony_ci	xor		rT0,rT0,rT2;	/* 2: S1 = S1 xor S1"		*/ \
1368c2ecf20Sopenharmony_ci	andc		rT1,f,d;	/* 2: ch' = ~e and g		*/ \
1378c2ecf20Sopenharmony_ci	lwz		rT2,off+4(rKP);	/* 2: K				*/ \
1388c2ecf20Sopenharmony_ci	xor		rT3,rT3,rT1;	/* 2: ch = ch xor ch'		*/ \
1398c2ecf20Sopenharmony_ci	add		g,g,rT0;	/* 2: temp1 = h + S1		*/ \
1408c2ecf20Sopenharmony_ci	add		rT3,rT3,w;	/* 2: temp1' = ch + w		*/ \
1418c2ecf20Sopenharmony_ci	rotrwi		rT0,h,2;	/* 2: S0 = a rotr 2		*/ \
1428c2ecf20Sopenharmony_ci	add		g,g,rT3;	/* 2: temp1 = temp1 + temp1'	*/ \
1438c2ecf20Sopenharmony_ci	rotrwi		rT1,h,13;	/* 2: S0' = a rotr 13		*/ \
1448c2ecf20Sopenharmony_ci	add		g,g,rT2;	/* 2: temp1 = temp1 + K		*/ \
1458c2ecf20Sopenharmony_ci	rotrwi		rT3,h,22;	/* 2: S0" = a rotr 22		*/ \
1468c2ecf20Sopenharmony_ci	xor		rT0,rT0,rT1;	/* 2: S0 = S0 xor S0'		*/ \
1478c2ecf20Sopenharmony_ci	or		rT2,h,a;	/* 2: maj = a or b		*/ \
1488c2ecf20Sopenharmony_ci	xor		rT3,rT0,rT3;	/* 2: S0 = S0 xor S0"		*/ \
1498c2ecf20Sopenharmony_ci	and		rT1,h,a;	/* 2: maj' = a and b		*/ \
1508c2ecf20Sopenharmony_ci	and		rT2,rT2,b;	/* 2: maj = maj and c		*/ \
1518c2ecf20Sopenharmony_ci	add		c,c,g;		/* 2: d = d + temp1		*/ \
1528c2ecf20Sopenharmony_ci	or		rT2,rT1,rT2;	/* 2: maj = maj or maj'		*/ \
1538c2ecf20Sopenharmony_ci	add		rT3,rT3,rT2;	/* 2: temp2 = S0 + maj		*/ \
1548c2ecf20Sopenharmony_ci	add		g,g,rT3		/* 2: h = temp1 + temp2		*/
1558c2ecf20Sopenharmony_ci
1568c2ecf20Sopenharmony_ci#define R_CALC_W(a, b, c, d, e, f, g, h, w0, w1, w4, w5, w7, k, off) \
1578c2ecf20Sopenharmony_ci	rotrwi		rT2,e,6;	/* 1: S1 = e rotr 6		*/ \
1588c2ecf20Sopenharmony_ci	evmergelohi	rT0,w0,w1;	/*    w[-15]			*/ \
1598c2ecf20Sopenharmony_ci	rotrwi		rT3,e,11;	/* 1: S1' = e rotr 11		*/ \
1608c2ecf20Sopenharmony_ci	evsrwiu		rT1,rT0,3;	/*    s0 = w[-15] >> 3		*/ \
1618c2ecf20Sopenharmony_ci	xor		rT2,rT2,rT3;	/* 1: S1 = S1 xor S1'		*/ \
1628c2ecf20Sopenharmony_ci	evrlwi		rT0,rT0,25;	/*    s0' = w[-15] rotr	7	*/ \
1638c2ecf20Sopenharmony_ci	rotrwi		rT3,e,25;	/* 1: S1' = e rotr 25		*/ \
1648c2ecf20Sopenharmony_ci	evxor		rT1,rT1,rT0;	/*    s0 = s0 xor s0'		*/ \
1658c2ecf20Sopenharmony_ci	xor		rT2,rT2,rT3;	/* 1: S1 = S1 xor S1'		*/ \
1668c2ecf20Sopenharmony_ci	evrlwi		rT0,rT0,21;	/*    s0' = w[-15] rotr 18	*/ \
1678c2ecf20Sopenharmony_ci	add		h,h,rT2;	/* 1: temp1 = h + S1		*/ \
1688c2ecf20Sopenharmony_ci	evxor		rT0,rT0,rT1;	/*    s0 = s0 xor s0'		*/ \
1698c2ecf20Sopenharmony_ci	and		rT2,e,f;	/* 1: ch = e and f		*/ \
1708c2ecf20Sopenharmony_ci	evaddw		w0,w0,rT0;	/*    w = w[-16] + s0		*/ \
1718c2ecf20Sopenharmony_ci	andc		rT3,g,e;	/* 1: ch' = ~e and g		*/ \
1728c2ecf20Sopenharmony_ci	evsrwiu		rT0,w7,10;	/*    s1 = w[-2] >> 10		*/ \
1738c2ecf20Sopenharmony_ci	xor		rT2,rT2,rT3;	/* 1: ch = ch xor ch'		*/ \
1748c2ecf20Sopenharmony_ci	evrlwi		rT1,w7,15;	/*    s1' = w[-2] rotr 17	*/ \
1758c2ecf20Sopenharmony_ci	add		h,h,rT2;	/* 1: temp1 = temp1 + ch	*/ \
1768c2ecf20Sopenharmony_ci	evxor		rT0,rT0,rT1;	/*    s1 = s1 xor s1'		*/ \
1778c2ecf20Sopenharmony_ci	rotrwi		rT2,a,2;	/* 1: S0 = a rotr 2		*/ \
1788c2ecf20Sopenharmony_ci	evrlwi		rT1,w7,13;	/*    s1' = w[-2] rotr 19	*/ \
1798c2ecf20Sopenharmony_ci	rotrwi		rT3,a,13;	/* 1: S0' = a rotr 13		*/ \
1808c2ecf20Sopenharmony_ci	evxor		rT0,rT0,rT1;	/*    s1 = s1 xor s1'		*/ \
1818c2ecf20Sopenharmony_ci	xor		rT2,rT2,rT3;	/* 1: S0 = S0 xor S0'		*/ \
1828c2ecf20Sopenharmony_ci	evldw		rT1,off(rKP);	/*    k				*/ \
1838c2ecf20Sopenharmony_ci	rotrwi		rT3,a,22;	/* 1: S0' = a rotr 22		*/ \
1848c2ecf20Sopenharmony_ci	evaddw		w0,w0,rT0;	/*    w = w + s1		*/ \
1858c2ecf20Sopenharmony_ci	xor		rT2,rT2,rT3;	/* 1: S0 = S0 xor S0'		*/ \
1868c2ecf20Sopenharmony_ci	evmergelohi	rT0,w4,w5;	/*    w[-7]			*/ \
1878c2ecf20Sopenharmony_ci	and		rT3,a,b;	/* 1: maj = a and b		*/ \
1888c2ecf20Sopenharmony_ci	evaddw		w0,w0,rT0;	/*    w = w + w[-7]		*/ \
1898c2ecf20Sopenharmony_ci	CMP_K##k##_LOOP							   \
1908c2ecf20Sopenharmony_ci	add		rT2,rT2,rT3;	/* 1: temp2 = S0 + maj		*/ \
1918c2ecf20Sopenharmony_ci	evaddw		rT1,rT1,w0;	/*    wk = w + k		*/ \
1928c2ecf20Sopenharmony_ci	xor		rT3,a,b;	/* 1: maj = a xor b		*/ \
1938c2ecf20Sopenharmony_ci	evmergehi	rT0,rT1,rT1;	/*    wk1/wk2			*/ \
1948c2ecf20Sopenharmony_ci	and		rT3,rT3,c;	/* 1: maj = maj and c		*/ \
1958c2ecf20Sopenharmony_ci	add		h,h,rT0;	/* 1: temp1 = temp1 + wk	*/ \
1968c2ecf20Sopenharmony_ci	add		rT2,rT2,rT3;	/* 1: temp2 = temp2 + maj	*/ \
1978c2ecf20Sopenharmony_ci	add		g,g,rT1;	/* 2: temp1 = temp1 + wk	*/ \
1988c2ecf20Sopenharmony_ci	add		d,d,h;		/* 1: d = d + temp1		*/ \
1998c2ecf20Sopenharmony_ci	rotrwi		rT0,d,6;	/* 2: S1 = e rotr 6		*/ \
2008c2ecf20Sopenharmony_ci	add		h,h,rT2;	/* 1: h = temp1 + temp2		*/ \
2018c2ecf20Sopenharmony_ci	rotrwi		rT1,d,11;	/* 2: S1' = e rotr 11		*/ \
2028c2ecf20Sopenharmony_ci	rotrwi		rT2,d,25;	/* 2: S" = e rotr 25		*/ \
2038c2ecf20Sopenharmony_ci	xor		rT0,rT0,rT1;	/* 2: S1 = S1 xor S1'		*/ \
2048c2ecf20Sopenharmony_ci	and		rT3,d,e;	/* 2: ch = e and f		*/ \
2058c2ecf20Sopenharmony_ci	xor		rT0,rT0,rT2;	/* 2: S1 = S1 xor S1"		*/ \
2068c2ecf20Sopenharmony_ci	andc		rT1,f,d;	/* 2: ch' = ~e and g		*/ \
2078c2ecf20Sopenharmony_ci	add		g,g,rT0;	/* 2: temp1 = h + S1		*/ \
2088c2ecf20Sopenharmony_ci	xor		rT3,rT3,rT1;	/* 2: ch = ch xor ch'		*/ \
2098c2ecf20Sopenharmony_ci	rotrwi		rT0,h,2;	/* 2: S0 = a rotr 2		*/ \
2108c2ecf20Sopenharmony_ci	add		g,g,rT3;	/* 2: temp1 = temp1 + ch	*/ \
2118c2ecf20Sopenharmony_ci	rotrwi		rT1,h,13;	/* 2: S0' = a rotr 13		*/ \
2128c2ecf20Sopenharmony_ci	rotrwi		rT3,h,22;	/* 2: S0" = a rotr 22		*/ \
2138c2ecf20Sopenharmony_ci	xor		rT0,rT0,rT1;	/* 2: S0 = S0 xor S0'		*/ \
2148c2ecf20Sopenharmony_ci	or		rT2,h,a;	/* 2: maj = a or b		*/ \
2158c2ecf20Sopenharmony_ci	and		rT1,h,a;	/* 2: maj' = a and b		*/ \
2168c2ecf20Sopenharmony_ci	and		rT2,rT2,b;	/* 2: maj = maj and c		*/ \
2178c2ecf20Sopenharmony_ci	xor		rT3,rT0,rT3;	/* 2: S0 = S0 xor S0"		*/ \
2188c2ecf20Sopenharmony_ci	or		rT2,rT1,rT2;	/* 2: maj = maj or maj'		*/ \
2198c2ecf20Sopenharmony_ci	add		c,c,g;		/* 2: d = d + temp1		*/ \
2208c2ecf20Sopenharmony_ci	add		rT3,rT3,rT2;	/* 2: temp2 = S0 + maj		*/ \
2218c2ecf20Sopenharmony_ci	add		g,g,rT3		/* 2: h = temp1 + temp2		*/
2228c2ecf20Sopenharmony_ci
2238c2ecf20Sopenharmony_ci_GLOBAL(ppc_spe_sha256_transform)
2248c2ecf20Sopenharmony_ci	INITIALIZE
2258c2ecf20Sopenharmony_ci
2268c2ecf20Sopenharmony_ci	mtctr		r5
2278c2ecf20Sopenharmony_ci	lwz		rH0,0(rHP)
2288c2ecf20Sopenharmony_ci	lwz		rH1,4(rHP)
2298c2ecf20Sopenharmony_ci	lwz		rH2,8(rHP)
2308c2ecf20Sopenharmony_ci	lwz		rH3,12(rHP)
2318c2ecf20Sopenharmony_ci	lwz		rH4,16(rHP)
2328c2ecf20Sopenharmony_ci	lwz		rH5,20(rHP)
2338c2ecf20Sopenharmony_ci	lwz		rH6,24(rHP)
2348c2ecf20Sopenharmony_ci	lwz		rH7,28(rHP)
2358c2ecf20Sopenharmony_ci
2368c2ecf20Sopenharmony_cippc_spe_sha256_main:
2378c2ecf20Sopenharmony_ci	lis		rKP,PPC_SPE_SHA256_K@ha
2388c2ecf20Sopenharmony_ci	addi		rKP,rKP,PPC_SPE_SHA256_K@l
2398c2ecf20Sopenharmony_ci
2408c2ecf20Sopenharmony_ci	R_LOAD_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7, rW0, 0)
2418c2ecf20Sopenharmony_ci	R_LOAD_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5, rW1, 8)
2428c2ecf20Sopenharmony_ci	R_LOAD_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3, rW2, 16)
2438c2ecf20Sopenharmony_ci	R_LOAD_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1, rW3, 24)
2448c2ecf20Sopenharmony_ci	R_LOAD_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7, rW4, 32)
2458c2ecf20Sopenharmony_ci	R_LOAD_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5, rW5, 40)
2468c2ecf20Sopenharmony_ci	R_LOAD_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3, rW6, 48)
2478c2ecf20Sopenharmony_ci	R_LOAD_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1, rW7, 56)
2488c2ecf20Sopenharmony_cippc_spe_sha256_16_rounds:
2498c2ecf20Sopenharmony_ci	addi		rKP,rKP,64
2508c2ecf20Sopenharmony_ci	R_CALC_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7,
2518c2ecf20Sopenharmony_ci		 rW0, rW1, rW4, rW5, rW7, N, 0)
2528c2ecf20Sopenharmony_ci	R_CALC_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5,
2538c2ecf20Sopenharmony_ci		 rW1, rW2, rW5, rW6, rW0, N, 8)
2548c2ecf20Sopenharmony_ci	R_CALC_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3,
2558c2ecf20Sopenharmony_ci		 rW2, rW3, rW6, rW7, rW1, N, 16)
2568c2ecf20Sopenharmony_ci	R_CALC_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1,
2578c2ecf20Sopenharmony_ci		 rW3, rW4, rW7, rW0, rW2, N, 24)
2588c2ecf20Sopenharmony_ci	R_CALC_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7,
2598c2ecf20Sopenharmony_ci		 rW4, rW5, rW0, rW1, rW3, N, 32)
2608c2ecf20Sopenharmony_ci	R_CALC_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5,
2618c2ecf20Sopenharmony_ci		 rW5, rW6, rW1, rW2, rW4, N, 40)
2628c2ecf20Sopenharmony_ci	R_CALC_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3,
2638c2ecf20Sopenharmony_ci		 rW6, rW7, rW2, rW3, rW5, N, 48)
2648c2ecf20Sopenharmony_ci	R_CALC_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1,
2658c2ecf20Sopenharmony_ci		 rW7, rW0, rW3, rW4, rW6, C, 56)
2668c2ecf20Sopenharmony_ci	bt		gt,ppc_spe_sha256_16_rounds
2678c2ecf20Sopenharmony_ci
2688c2ecf20Sopenharmony_ci	lwz		rW0,0(rHP)
2698c2ecf20Sopenharmony_ci	NEXT_BLOCK
2708c2ecf20Sopenharmony_ci	lwz		rW1,4(rHP)
2718c2ecf20Sopenharmony_ci	lwz		rW2,8(rHP)
2728c2ecf20Sopenharmony_ci	lwz		rW3,12(rHP)
2738c2ecf20Sopenharmony_ci	lwz		rW4,16(rHP)
2748c2ecf20Sopenharmony_ci	lwz		rW5,20(rHP)
2758c2ecf20Sopenharmony_ci	lwz		rW6,24(rHP)
2768c2ecf20Sopenharmony_ci	lwz		rW7,28(rHP)
2778c2ecf20Sopenharmony_ci
2788c2ecf20Sopenharmony_ci	add		rH0,rH0,rW0
2798c2ecf20Sopenharmony_ci	stw		rH0,0(rHP)
2808c2ecf20Sopenharmony_ci	add		rH1,rH1,rW1
2818c2ecf20Sopenharmony_ci	stw		rH1,4(rHP)
2828c2ecf20Sopenharmony_ci	add		rH2,rH2,rW2
2838c2ecf20Sopenharmony_ci	stw		rH2,8(rHP)
2848c2ecf20Sopenharmony_ci	add		rH3,rH3,rW3
2858c2ecf20Sopenharmony_ci	stw		rH3,12(rHP)
2868c2ecf20Sopenharmony_ci	add		rH4,rH4,rW4
2878c2ecf20Sopenharmony_ci	stw		rH4,16(rHP)
2888c2ecf20Sopenharmony_ci	add		rH5,rH5,rW5
2898c2ecf20Sopenharmony_ci	stw		rH5,20(rHP)
2908c2ecf20Sopenharmony_ci	add		rH6,rH6,rW6
2918c2ecf20Sopenharmony_ci	stw		rH6,24(rHP)
2928c2ecf20Sopenharmony_ci	add		rH7,rH7,rW7
2938c2ecf20Sopenharmony_ci	stw		rH7,28(rHP)
2948c2ecf20Sopenharmony_ci
2958c2ecf20Sopenharmony_ci	bdnz		ppc_spe_sha256_main
2968c2ecf20Sopenharmony_ci
2978c2ecf20Sopenharmony_ci	FINALIZE
2988c2ecf20Sopenharmony_ci	blr
2998c2ecf20Sopenharmony_ci
3008c2ecf20Sopenharmony_ci.data
3018c2ecf20Sopenharmony_ci.align 5
3028c2ecf20Sopenharmony_ciPPC_SPE_SHA256_K:
3038c2ecf20Sopenharmony_ci	.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
3048c2ecf20Sopenharmony_ci	.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
3058c2ecf20Sopenharmony_ci	.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
3068c2ecf20Sopenharmony_ci	.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
3078c2ecf20Sopenharmony_ci	.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
3088c2ecf20Sopenharmony_ci	.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
3098c2ecf20Sopenharmony_ci	.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
3108c2ecf20Sopenharmony_ci	.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
3118c2ecf20Sopenharmony_ci	.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
3128c2ecf20Sopenharmony_ci	.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
3138c2ecf20Sopenharmony_ci	.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
3148c2ecf20Sopenharmony_ci	.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
3158c2ecf20Sopenharmony_ci	.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
3168c2ecf20Sopenharmony_ci	.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
3178c2ecf20Sopenharmony_ci	.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
3188c2ecf20Sopenharmony_ci	.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
319