162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-or-later */
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Fast SHA-256 implementation for SPE instruction set (PPC)
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * This code makes use of the SPE SIMD instruction set as defined in
662306a36Sopenharmony_ci * http://cache.freescale.com/files/32bit/doc/ref_manual/SPEPIM.pdf
762306a36Sopenharmony_ci * Implementation is based on optimization guide notes from
862306a36Sopenharmony_ci * http://cache.freescale.com/files/32bit/doc/app_note/AN2665.pdf
962306a36Sopenharmony_ci *
1062306a36Sopenharmony_ci * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
1162306a36Sopenharmony_ci */
1262306a36Sopenharmony_ci
1362306a36Sopenharmony_ci#include <asm/ppc_asm.h>
1462306a36Sopenharmony_ci#include <asm/asm-offsets.h>
1562306a36Sopenharmony_ci
1662306a36Sopenharmony_ci#define rHP	r3	/* pointer to hash values in memory		*/
1762306a36Sopenharmony_ci#define rKP	r24	/* pointer to round constants			*/
1862306a36Sopenharmony_ci#define rWP	r4	/* pointer to input data			*/
1962306a36Sopenharmony_ci
2062306a36Sopenharmony_ci#define rH0	r5	/* 8 32 bit hash values in 8 registers		*/
2162306a36Sopenharmony_ci#define rH1	r6
2262306a36Sopenharmony_ci#define rH2	r7
2362306a36Sopenharmony_ci#define rH3	r8
2462306a36Sopenharmony_ci#define rH4	r9
2562306a36Sopenharmony_ci#define rH5	r10
2662306a36Sopenharmony_ci#define rH6	r11
2762306a36Sopenharmony_ci#define rH7	r12
2862306a36Sopenharmony_ci
2962306a36Sopenharmony_ci#define rW0	r14	/* 64 bit registers. 16 words in 8 registers	*/
3062306a36Sopenharmony_ci#define rW1	r15
3162306a36Sopenharmony_ci#define rW2	r16
3262306a36Sopenharmony_ci#define rW3	r17
3362306a36Sopenharmony_ci#define rW4	r18
3462306a36Sopenharmony_ci#define rW5	r19
3562306a36Sopenharmony_ci#define rW6	r20
3662306a36Sopenharmony_ci#define rW7	r21
3762306a36Sopenharmony_ci
3862306a36Sopenharmony_ci#define rT0	r22	/* 64 bit temporaries 				*/
3962306a36Sopenharmony_ci#define rT1	r23
4062306a36Sopenharmony_ci#define rT2	r0	/* 32 bit temporaries				*/
4162306a36Sopenharmony_ci#define rT3	r25
4262306a36Sopenharmony_ci
4362306a36Sopenharmony_ci#define CMP_KN_LOOP
4462306a36Sopenharmony_ci#define CMP_KC_LOOP \
4562306a36Sopenharmony_ci	cmpwi		rT1,0;
4662306a36Sopenharmony_ci
4762306a36Sopenharmony_ci#define INITIALIZE \
4862306a36Sopenharmony_ci	stwu		r1,-128(r1);	/* create stack frame		*/ \
4962306a36Sopenharmony_ci	evstdw		r14,8(r1);	/* We must save non volatile	*/ \
5062306a36Sopenharmony_ci	evstdw		r15,16(r1);	/* registers. Take the chance	*/ \
5162306a36Sopenharmony_ci	evstdw		r16,24(r1);	/* and save the SPE part too	*/ \
5262306a36Sopenharmony_ci	evstdw		r17,32(r1);					   \
5362306a36Sopenharmony_ci	evstdw		r18,40(r1);					   \
5462306a36Sopenharmony_ci	evstdw		r19,48(r1);					   \
5562306a36Sopenharmony_ci	evstdw		r20,56(r1);					   \
5662306a36Sopenharmony_ci	evstdw		r21,64(r1);					   \
5762306a36Sopenharmony_ci	evstdw		r22,72(r1);					   \
5862306a36Sopenharmony_ci	evstdw		r23,80(r1);					   \
5962306a36Sopenharmony_ci	stw		r24,88(r1);	/* save normal registers	*/ \
6062306a36Sopenharmony_ci	stw		r25,92(r1);
6162306a36Sopenharmony_ci
6262306a36Sopenharmony_ci
6362306a36Sopenharmony_ci#define FINALIZE \
6462306a36Sopenharmony_ci	evldw		r14,8(r1);	/* restore SPE registers	*/ \
6562306a36Sopenharmony_ci	evldw		r15,16(r1);					   \
6662306a36Sopenharmony_ci	evldw		r16,24(r1);					   \
6762306a36Sopenharmony_ci	evldw		r17,32(r1);					   \
6862306a36Sopenharmony_ci	evldw		r18,40(r1);					   \
6962306a36Sopenharmony_ci	evldw		r19,48(r1);					   \
7062306a36Sopenharmony_ci	evldw		r20,56(r1);					   \
7162306a36Sopenharmony_ci	evldw		r21,64(r1);					   \
7262306a36Sopenharmony_ci	evldw		r22,72(r1);					   \
7362306a36Sopenharmony_ci	evldw		r23,80(r1);					   \
7462306a36Sopenharmony_ci	lwz		r24,88(r1);	/* restore normal registers	*/ \
7562306a36Sopenharmony_ci	lwz		r25,92(r1);					   \
7662306a36Sopenharmony_ci	xor		r0,r0,r0;					   \
7762306a36Sopenharmony_ci	stw		r0,8(r1);	/* Delete sensitive data	*/ \
7862306a36Sopenharmony_ci	stw		r0,16(r1);	/* that we might have pushed	*/ \
7962306a36Sopenharmony_ci	stw		r0,24(r1);	/* from other context that runs	*/ \
8062306a36Sopenharmony_ci	stw		r0,32(r1);	/* the same code. Assume that	*/ \
8162306a36Sopenharmony_ci	stw		r0,40(r1);	/* the lower part of the GPRs	*/ \
8262306a36Sopenharmony_ci	stw		r0,48(r1);	/* was already overwritten on	*/ \
8362306a36Sopenharmony_ci	stw		r0,56(r1);	/* the way down to here		*/ \
8462306a36Sopenharmony_ci	stw		r0,64(r1);					   \
8562306a36Sopenharmony_ci	stw		r0,72(r1);					   \
8662306a36Sopenharmony_ci	stw		r0,80(r1);					   \
8762306a36Sopenharmony_ci	addi		r1,r1,128;	/* cleanup stack frame		*/
8862306a36Sopenharmony_ci
8962306a36Sopenharmony_ci#ifdef __BIG_ENDIAN__
9062306a36Sopenharmony_ci#define LOAD_DATA(reg, off) \
9162306a36Sopenharmony_ci	lwz		reg,off(rWP);	/* load data			*/
9262306a36Sopenharmony_ci#define NEXT_BLOCK \
9362306a36Sopenharmony_ci	addi		rWP,rWP,64;	/* increment per block		*/
9462306a36Sopenharmony_ci#else
9562306a36Sopenharmony_ci#define LOAD_DATA(reg, off) \
9662306a36Sopenharmony_ci	lwbrx		reg,0,rWP; 	/* load data			*/ \
9762306a36Sopenharmony_ci	addi		rWP,rWP,4;	/* increment per word		*/
9862306a36Sopenharmony_ci#define NEXT_BLOCK			/* nothing to do		*/
9962306a36Sopenharmony_ci#endif
10062306a36Sopenharmony_ci
10162306a36Sopenharmony_ci#define R_LOAD_W(a, b, c, d, e, f, g, h, w, off) \
10262306a36Sopenharmony_ci	LOAD_DATA(w, off)		/* 1: W				*/ \
10362306a36Sopenharmony_ci	rotrwi		rT0,e,6;	/* 1: S1 = e rotr 6		*/ \
10462306a36Sopenharmony_ci	rotrwi		rT1,e,11;	/* 1: S1' = e rotr 11		*/ \
10562306a36Sopenharmony_ci	rotrwi		rT2,e,25;	/* 1: S1" = e rotr 25		*/ \
10662306a36Sopenharmony_ci	xor		rT0,rT0,rT1;	/* 1: S1 = S1 xor S1'		*/ \
10762306a36Sopenharmony_ci	and		rT3,e,f;	/* 1: ch = e and f		*/ \
10862306a36Sopenharmony_ci	xor		rT0,rT0,rT2;	/* 1: S1 = S1 xor S1"		*/ \
10962306a36Sopenharmony_ci	andc		rT1,g,e;	/* 1: ch' = ~e and g		*/ \
11062306a36Sopenharmony_ci	lwz		rT2,off(rKP);	/* 1: K				*/ \
11162306a36Sopenharmony_ci	xor		rT3,rT3,rT1;	/* 1: ch = ch xor ch'		*/ \
11262306a36Sopenharmony_ci	add		h,h,rT0;	/* 1: temp1 = h + S1		*/ \
11362306a36Sopenharmony_ci	add		rT3,rT3,w;	/* 1: temp1' = ch + w		*/ \
11462306a36Sopenharmony_ci	rotrwi		rT0,a,2;	/* 1: S0 = a rotr 2		*/ \
11562306a36Sopenharmony_ci	add		h,h,rT3;	/* 1: temp1 = temp1 + temp1'	*/ \
11662306a36Sopenharmony_ci	rotrwi		rT1,a,13;	/* 1: S0' = a rotr 13		*/ \
11762306a36Sopenharmony_ci	add		h,h,rT2;	/* 1: temp1 = temp1 + K		*/ \
11862306a36Sopenharmony_ci	rotrwi		rT3,a,22;	/* 1: S0" = a rotr 22		*/ \
11962306a36Sopenharmony_ci	xor		rT0,rT0,rT1;	/* 1: S0 = S0 xor S0'		*/ \
12062306a36Sopenharmony_ci	add		d,d,h;		/* 1: d = d + temp1		*/ \
12162306a36Sopenharmony_ci	xor		rT3,rT0,rT3;	/* 1: S0 = S0 xor S0"		*/ \
12262306a36Sopenharmony_ci	evmergelo	w,w,w;		/*    shift W			*/ \
12362306a36Sopenharmony_ci	or		rT2,a,b;	/* 1: maj = a or b		*/ \
12462306a36Sopenharmony_ci	and		rT1,a,b;	/* 1: maj' = a and b		*/ \
12562306a36Sopenharmony_ci	and		rT2,rT2,c;	/* 1: maj = maj and c		*/ \
12662306a36Sopenharmony_ci	LOAD_DATA(w, off+4)		/* 2: W				*/ \
12762306a36Sopenharmony_ci	or		rT2,rT1,rT2;	/* 1: maj = maj or maj'		*/ \
12862306a36Sopenharmony_ci	rotrwi		rT0,d,6;	/* 2: S1 = e rotr 6		*/ \
12962306a36Sopenharmony_ci	add		rT3,rT3,rT2;	/* 1: temp2 = S0 + maj		*/ \
13062306a36Sopenharmony_ci	rotrwi		rT1,d,11;	/* 2: S1' = e rotr 11		*/ \
13162306a36Sopenharmony_ci	add		h,h,rT3;	/* 1: h = temp1 + temp2		*/ \
13262306a36Sopenharmony_ci	rotrwi		rT2,d,25;	/* 2: S1" = e rotr 25		*/ \
13362306a36Sopenharmony_ci	xor		rT0,rT0,rT1;	/* 2: S1 = S1 xor S1'		*/ \
13462306a36Sopenharmony_ci	and		rT3,d,e;	/* 2: ch = e and f		*/ \
13562306a36Sopenharmony_ci	xor		rT0,rT0,rT2;	/* 2: S1 = S1 xor S1"		*/ \
13662306a36Sopenharmony_ci	andc		rT1,f,d;	/* 2: ch' = ~e and g		*/ \
13762306a36Sopenharmony_ci	lwz		rT2,off+4(rKP);	/* 2: K				*/ \
13862306a36Sopenharmony_ci	xor		rT3,rT3,rT1;	/* 2: ch = ch xor ch'		*/ \
13962306a36Sopenharmony_ci	add		g,g,rT0;	/* 2: temp1 = h + S1		*/ \
14062306a36Sopenharmony_ci	add		rT3,rT3,w;	/* 2: temp1' = ch + w		*/ \
14162306a36Sopenharmony_ci	rotrwi		rT0,h,2;	/* 2: S0 = a rotr 2		*/ \
14262306a36Sopenharmony_ci	add		g,g,rT3;	/* 2: temp1 = temp1 + temp1'	*/ \
14362306a36Sopenharmony_ci	rotrwi		rT1,h,13;	/* 2: S0' = a rotr 13		*/ \
14462306a36Sopenharmony_ci	add		g,g,rT2;	/* 2: temp1 = temp1 + K		*/ \
14562306a36Sopenharmony_ci	rotrwi		rT3,h,22;	/* 2: S0" = a rotr 22		*/ \
14662306a36Sopenharmony_ci	xor		rT0,rT0,rT1;	/* 2: S0 = S0 xor S0'		*/ \
14762306a36Sopenharmony_ci	or		rT2,h,a;	/* 2: maj = a or b		*/ \
14862306a36Sopenharmony_ci	xor		rT3,rT0,rT3;	/* 2: S0 = S0 xor S0"		*/ \
14962306a36Sopenharmony_ci	and		rT1,h,a;	/* 2: maj' = a and b		*/ \
15062306a36Sopenharmony_ci	and		rT2,rT2,b;	/* 2: maj = maj and c		*/ \
15162306a36Sopenharmony_ci	add		c,c,g;		/* 2: d = d + temp1		*/ \
15262306a36Sopenharmony_ci	or		rT2,rT1,rT2;	/* 2: maj = maj or maj'		*/ \
15362306a36Sopenharmony_ci	add		rT3,rT3,rT2;	/* 2: temp2 = S0 + maj		*/ \
15462306a36Sopenharmony_ci	add		g,g,rT3		/* 2: h = temp1 + temp2		*/
15562306a36Sopenharmony_ci
15662306a36Sopenharmony_ci#define R_CALC_W(a, b, c, d, e, f, g, h, w0, w1, w4, w5, w7, k, off) \
15762306a36Sopenharmony_ci	rotrwi		rT2,e,6;	/* 1: S1 = e rotr 6		*/ \
15862306a36Sopenharmony_ci	evmergelohi	rT0,w0,w1;	/*    w[-15]			*/ \
15962306a36Sopenharmony_ci	rotrwi		rT3,e,11;	/* 1: S1' = e rotr 11		*/ \
16062306a36Sopenharmony_ci	evsrwiu		rT1,rT0,3;	/*    s0 = w[-15] >> 3		*/ \
16162306a36Sopenharmony_ci	xor		rT2,rT2,rT3;	/* 1: S1 = S1 xor S1'		*/ \
16262306a36Sopenharmony_ci	evrlwi		rT0,rT0,25;	/*    s0' = w[-15] rotr	7	*/ \
16362306a36Sopenharmony_ci	rotrwi		rT3,e,25;	/* 1: S1' = e rotr 25		*/ \
16462306a36Sopenharmony_ci	evxor		rT1,rT1,rT0;	/*    s0 = s0 xor s0'		*/ \
16562306a36Sopenharmony_ci	xor		rT2,rT2,rT3;	/* 1: S1 = S1 xor S1'		*/ \
16662306a36Sopenharmony_ci	evrlwi		rT0,rT0,21;	/*    s0' = w[-15] rotr 18	*/ \
16762306a36Sopenharmony_ci	add		h,h,rT2;	/* 1: temp1 = h + S1		*/ \
16862306a36Sopenharmony_ci	evxor		rT0,rT0,rT1;	/*    s0 = s0 xor s0'		*/ \
16962306a36Sopenharmony_ci	and		rT2,e,f;	/* 1: ch = e and f		*/ \
17062306a36Sopenharmony_ci	evaddw		w0,w0,rT0;	/*    w = w[-16] + s0		*/ \
17162306a36Sopenharmony_ci	andc		rT3,g,e;	/* 1: ch' = ~e and g		*/ \
17262306a36Sopenharmony_ci	evsrwiu		rT0,w7,10;	/*    s1 = w[-2] >> 10		*/ \
17362306a36Sopenharmony_ci	xor		rT2,rT2,rT3;	/* 1: ch = ch xor ch'		*/ \
17462306a36Sopenharmony_ci	evrlwi		rT1,w7,15;	/*    s1' = w[-2] rotr 17	*/ \
17562306a36Sopenharmony_ci	add		h,h,rT2;	/* 1: temp1 = temp1 + ch	*/ \
17662306a36Sopenharmony_ci	evxor		rT0,rT0,rT1;	/*    s1 = s1 xor s1'		*/ \
17762306a36Sopenharmony_ci	rotrwi		rT2,a,2;	/* 1: S0 = a rotr 2		*/ \
17862306a36Sopenharmony_ci	evrlwi		rT1,w7,13;	/*    s1' = w[-2] rotr 19	*/ \
17962306a36Sopenharmony_ci	rotrwi		rT3,a,13;	/* 1: S0' = a rotr 13		*/ \
18062306a36Sopenharmony_ci	evxor		rT0,rT0,rT1;	/*    s1 = s1 xor s1'		*/ \
18162306a36Sopenharmony_ci	xor		rT2,rT2,rT3;	/* 1: S0 = S0 xor S0'		*/ \
18262306a36Sopenharmony_ci	evldw		rT1,off(rKP);	/*    k				*/ \
18362306a36Sopenharmony_ci	rotrwi		rT3,a,22;	/* 1: S0' = a rotr 22		*/ \
18462306a36Sopenharmony_ci	evaddw		w0,w0,rT0;	/*    w = w + s1		*/ \
18562306a36Sopenharmony_ci	xor		rT2,rT2,rT3;	/* 1: S0 = S0 xor S0'		*/ \
18662306a36Sopenharmony_ci	evmergelohi	rT0,w4,w5;	/*    w[-7]			*/ \
18762306a36Sopenharmony_ci	and		rT3,a,b;	/* 1: maj = a and b		*/ \
18862306a36Sopenharmony_ci	evaddw		w0,w0,rT0;	/*    w = w + w[-7]		*/ \
18962306a36Sopenharmony_ci	CMP_K##k##_LOOP							   \
19062306a36Sopenharmony_ci	add		rT2,rT2,rT3;	/* 1: temp2 = S0 + maj		*/ \
19162306a36Sopenharmony_ci	evaddw		rT1,rT1,w0;	/*    wk = w + k		*/ \
19262306a36Sopenharmony_ci	xor		rT3,a,b;	/* 1: maj = a xor b		*/ \
19362306a36Sopenharmony_ci	evmergehi	rT0,rT1,rT1;	/*    wk1/wk2			*/ \
19462306a36Sopenharmony_ci	and		rT3,rT3,c;	/* 1: maj = maj and c		*/ \
19562306a36Sopenharmony_ci	add		h,h,rT0;	/* 1: temp1 = temp1 + wk	*/ \
19662306a36Sopenharmony_ci	add		rT2,rT2,rT3;	/* 1: temp2 = temp2 + maj	*/ \
19762306a36Sopenharmony_ci	add		g,g,rT1;	/* 2: temp1 = temp1 + wk	*/ \
19862306a36Sopenharmony_ci	add		d,d,h;		/* 1: d = d + temp1		*/ \
19962306a36Sopenharmony_ci	rotrwi		rT0,d,6;	/* 2: S1 = e rotr 6		*/ \
20062306a36Sopenharmony_ci	add		h,h,rT2;	/* 1: h = temp1 + temp2		*/ \
20162306a36Sopenharmony_ci	rotrwi		rT1,d,11;	/* 2: S1' = e rotr 11		*/ \
20262306a36Sopenharmony_ci	rotrwi		rT2,d,25;	/* 2: S" = e rotr 25		*/ \
20362306a36Sopenharmony_ci	xor		rT0,rT0,rT1;	/* 2: S1 = S1 xor S1'		*/ \
20462306a36Sopenharmony_ci	and		rT3,d,e;	/* 2: ch = e and f		*/ \
20562306a36Sopenharmony_ci	xor		rT0,rT0,rT2;	/* 2: S1 = S1 xor S1"		*/ \
20662306a36Sopenharmony_ci	andc		rT1,f,d;	/* 2: ch' = ~e and g		*/ \
20762306a36Sopenharmony_ci	add		g,g,rT0;	/* 2: temp1 = h + S1		*/ \
20862306a36Sopenharmony_ci	xor		rT3,rT3,rT1;	/* 2: ch = ch xor ch'		*/ \
20962306a36Sopenharmony_ci	rotrwi		rT0,h,2;	/* 2: S0 = a rotr 2		*/ \
21062306a36Sopenharmony_ci	add		g,g,rT3;	/* 2: temp1 = temp1 + ch	*/ \
21162306a36Sopenharmony_ci	rotrwi		rT1,h,13;	/* 2: S0' = a rotr 13		*/ \
21262306a36Sopenharmony_ci	rotrwi		rT3,h,22;	/* 2: S0" = a rotr 22		*/ \
21362306a36Sopenharmony_ci	xor		rT0,rT0,rT1;	/* 2: S0 = S0 xor S0'		*/ \
21462306a36Sopenharmony_ci	or		rT2,h,a;	/* 2: maj = a or b		*/ \
21562306a36Sopenharmony_ci	and		rT1,h,a;	/* 2: maj' = a and b		*/ \
21662306a36Sopenharmony_ci	and		rT2,rT2,b;	/* 2: maj = maj and c		*/ \
21762306a36Sopenharmony_ci	xor		rT3,rT0,rT3;	/* 2: S0 = S0 xor S0"		*/ \
21862306a36Sopenharmony_ci	or		rT2,rT1,rT2;	/* 2: maj = maj or maj'		*/ \
21962306a36Sopenharmony_ci	add		c,c,g;		/* 2: d = d + temp1		*/ \
22062306a36Sopenharmony_ci	add		rT3,rT3,rT2;	/* 2: temp2 = S0 + maj		*/ \
22162306a36Sopenharmony_ci	add		g,g,rT3		/* 2: h = temp1 + temp2		*/
22262306a36Sopenharmony_ci
22362306a36Sopenharmony_ci_GLOBAL(ppc_spe_sha256_transform)
22462306a36Sopenharmony_ci	INITIALIZE
22562306a36Sopenharmony_ci
22662306a36Sopenharmony_ci	mtctr		r5
22762306a36Sopenharmony_ci	lwz		rH0,0(rHP)
22862306a36Sopenharmony_ci	lwz		rH1,4(rHP)
22962306a36Sopenharmony_ci	lwz		rH2,8(rHP)
23062306a36Sopenharmony_ci	lwz		rH3,12(rHP)
23162306a36Sopenharmony_ci	lwz		rH4,16(rHP)
23262306a36Sopenharmony_ci	lwz		rH5,20(rHP)
23362306a36Sopenharmony_ci	lwz		rH6,24(rHP)
23462306a36Sopenharmony_ci	lwz		rH7,28(rHP)
23562306a36Sopenharmony_ci
23662306a36Sopenharmony_cippc_spe_sha256_main:
23762306a36Sopenharmony_ci	lis		rKP,PPC_SPE_SHA256_K@ha
23862306a36Sopenharmony_ci	addi		rKP,rKP,PPC_SPE_SHA256_K@l
23962306a36Sopenharmony_ci
24062306a36Sopenharmony_ci	R_LOAD_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7, rW0, 0)
24162306a36Sopenharmony_ci	R_LOAD_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5, rW1, 8)
24262306a36Sopenharmony_ci	R_LOAD_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3, rW2, 16)
24362306a36Sopenharmony_ci	R_LOAD_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1, rW3, 24)
24462306a36Sopenharmony_ci	R_LOAD_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7, rW4, 32)
24562306a36Sopenharmony_ci	R_LOAD_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5, rW5, 40)
24662306a36Sopenharmony_ci	R_LOAD_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3, rW6, 48)
24762306a36Sopenharmony_ci	R_LOAD_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1, rW7, 56)
24862306a36Sopenharmony_cippc_spe_sha256_16_rounds:
24962306a36Sopenharmony_ci	addi		rKP,rKP,64
25062306a36Sopenharmony_ci	R_CALC_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7,
25162306a36Sopenharmony_ci		 rW0, rW1, rW4, rW5, rW7, N, 0)
25262306a36Sopenharmony_ci	R_CALC_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5,
25362306a36Sopenharmony_ci		 rW1, rW2, rW5, rW6, rW0, N, 8)
25462306a36Sopenharmony_ci	R_CALC_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3,
25562306a36Sopenharmony_ci		 rW2, rW3, rW6, rW7, rW1, N, 16)
25662306a36Sopenharmony_ci	R_CALC_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1,
25762306a36Sopenharmony_ci		 rW3, rW4, rW7, rW0, rW2, N, 24)
25862306a36Sopenharmony_ci	R_CALC_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7,
25962306a36Sopenharmony_ci		 rW4, rW5, rW0, rW1, rW3, N, 32)
26062306a36Sopenharmony_ci	R_CALC_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5,
26162306a36Sopenharmony_ci		 rW5, rW6, rW1, rW2, rW4, N, 40)
26262306a36Sopenharmony_ci	R_CALC_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3,
26362306a36Sopenharmony_ci		 rW6, rW7, rW2, rW3, rW5, N, 48)
26462306a36Sopenharmony_ci	R_CALC_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1,
26562306a36Sopenharmony_ci		 rW7, rW0, rW3, rW4, rW6, C, 56)
26662306a36Sopenharmony_ci	bt		gt,ppc_spe_sha256_16_rounds
26762306a36Sopenharmony_ci
26862306a36Sopenharmony_ci	lwz		rW0,0(rHP)
26962306a36Sopenharmony_ci	NEXT_BLOCK
27062306a36Sopenharmony_ci	lwz		rW1,4(rHP)
27162306a36Sopenharmony_ci	lwz		rW2,8(rHP)
27262306a36Sopenharmony_ci	lwz		rW3,12(rHP)
27362306a36Sopenharmony_ci	lwz		rW4,16(rHP)
27462306a36Sopenharmony_ci	lwz		rW5,20(rHP)
27562306a36Sopenharmony_ci	lwz		rW6,24(rHP)
27662306a36Sopenharmony_ci	lwz		rW7,28(rHP)
27762306a36Sopenharmony_ci
27862306a36Sopenharmony_ci	add		rH0,rH0,rW0
27962306a36Sopenharmony_ci	stw		rH0,0(rHP)
28062306a36Sopenharmony_ci	add		rH1,rH1,rW1
28162306a36Sopenharmony_ci	stw		rH1,4(rHP)
28262306a36Sopenharmony_ci	add		rH2,rH2,rW2
28362306a36Sopenharmony_ci	stw		rH2,8(rHP)
28462306a36Sopenharmony_ci	add		rH3,rH3,rW3
28562306a36Sopenharmony_ci	stw		rH3,12(rHP)
28662306a36Sopenharmony_ci	add		rH4,rH4,rW4
28762306a36Sopenharmony_ci	stw		rH4,16(rHP)
28862306a36Sopenharmony_ci	add		rH5,rH5,rW5
28962306a36Sopenharmony_ci	stw		rH5,20(rHP)
29062306a36Sopenharmony_ci	add		rH6,rH6,rW6
29162306a36Sopenharmony_ci	stw		rH6,24(rHP)
29262306a36Sopenharmony_ci	add		rH7,rH7,rW7
29362306a36Sopenharmony_ci	stw		rH7,28(rHP)
29462306a36Sopenharmony_ci
29562306a36Sopenharmony_ci	bdnz		ppc_spe_sha256_main
29662306a36Sopenharmony_ci
29762306a36Sopenharmony_ci	FINALIZE
29862306a36Sopenharmony_ci	blr
29962306a36Sopenharmony_ci
30062306a36Sopenharmony_ci.data
30162306a36Sopenharmony_ci.align 5
30262306a36Sopenharmony_ciPPC_SPE_SHA256_K:
30362306a36Sopenharmony_ci	.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
30462306a36Sopenharmony_ci	.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
30562306a36Sopenharmony_ci	.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
30662306a36Sopenharmony_ci	.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
30762306a36Sopenharmony_ci	.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
30862306a36Sopenharmony_ci	.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
30962306a36Sopenharmony_ci	.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
31062306a36Sopenharmony_ci	.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
31162306a36Sopenharmony_ci	.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
31262306a36Sopenharmony_ci	.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
31362306a36Sopenharmony_ci	.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
31462306a36Sopenharmony_ci	.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
31562306a36Sopenharmony_ci	.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
31662306a36Sopenharmony_ci	.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
31762306a36Sopenharmony_ci	.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
31862306a36Sopenharmony_ci	.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
319