1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /*
3  * Fast MD5 implementation for PPC
4  *
5  * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
6  */
7 #include <asm/ppc_asm.h>
8 #include <asm/asm-offsets.h>
9 #include <asm/asm-compat.h>
10 
11 #define rHP	r3
12 #define rWP	r4
13 
14 #define rH0	r0
15 #define rH1	r6
16 #define rH2	r7
17 #define rH3	r5
18 
19 #define rW00	r8
20 #define rW01	r9
21 #define rW02	r10
22 #define rW03	r11
23 #define rW04	r12
24 #define rW05	r14
25 #define rW06	r15
26 #define rW07	r16
27 #define rW08	r17
28 #define rW09	r18
29 #define rW10	r19
30 #define rW11	r20
31 #define rW12	r21
32 #define rW13	r22
33 #define rW14	r23
34 #define rW15	r24
35 
36 #define rT0	r25
37 #define rT1	r26
38 
39 #define INITIALIZE \
40 	PPC_STLU r1,-INT_FRAME_SIZE(r1); \
41 	SAVE_8GPRS(14, r1);		/* push registers onto stack	*/ \
42 	SAVE_4GPRS(22, r1);						   \
43 	SAVE_GPR(26, r1)
44 
45 #define FINALIZE \
46 	REST_8GPRS(14, r1);		/* pop registers from stack	*/ \
47 	REST_4GPRS(22, r1);						   \
48 	REST_GPR(26, r1);						   \
49 	addi	r1,r1,INT_FRAME_SIZE;
50 
51 #ifdef __BIG_ENDIAN__
52 #define LOAD_DATA(reg, off) \
53 	lwbrx		reg,0,rWP;	/* load data			*/
54 #define INC_PTR \
55 	addi		rWP,rWP,4;	/* increment per word		*/
56 #define NEXT_BLOCK			/* nothing to do		*/
57 #else
58 #define LOAD_DATA(reg, off) \
59 	lwz		reg,off(rWP);	/* load data			*/
60 #define INC_PTR				/* nothing to do		*/
61 #define NEXT_BLOCK \
62 	addi		rWP,rWP,64;	/* increment per block		*/
63 #endif
64 
65 #define R_00_15(a, b, c, d, w0, w1, p, q, off, k0h, k0l, k1h, k1l) \
66 	LOAD_DATA(w0, off)		/*    W				*/ \
67 	and		rT0,b,c;	/* 1: f = b and c		*/ \
68 	INC_PTR				/*    ptr++			*/ \
69 	andc		rT1,d,b;	/* 1: f' = ~b and d		*/ \
70 	LOAD_DATA(w1, off+4)		/*    W				*/ \
71 	or		rT0,rT0,rT1;	/* 1: f = f or f'		*/ \
72 	addi		w0,w0,k0l;	/* 1: wk = w + k		*/ \
73 	add		a,a,rT0;	/* 1: a = a + f			*/ \
74 	addis		w0,w0,k0h;	/* 1: wk = w + k'		*/ \
75 	addis		w1,w1,k1h;	/* 2: wk = w + k		*/ \
76 	add		a,a,w0;		/* 1: a = a + wk		*/ \
77 	addi		w1,w1,k1l;	/* 2: wk = w + k'		*/ \
78 	rotrwi		a,a,p;		/* 1: a = a rotl x		*/ \
79 	add		d,d,w1;		/* 2: a = a + wk		*/ \
80 	add		a,a,b;		/* 1: a = a + b			*/ \
81 	and		rT0,a,b;	/* 2: f = b and c		*/ \
82 	andc		rT1,c,a;	/* 2: f' = ~b and d		*/ \
83 	or		rT0,rT0,rT1;	/* 2: f = f or f'		*/ \
84 	add		d,d,rT0;	/* 2: a = a + f			*/ \
85 	INC_PTR				/*    ptr++			*/ \
86 	rotrwi		d,d,q;		/* 2: a = a rotl x		*/ \
87 	add		d,d,a;		/* 2: a = a + b			*/
88 
89 #define R_16_31(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \
90 	andc		rT0,c,d;	/* 1: f = c and ~d		*/ \
91 	and		rT1,b,d;	/* 1: f' = b and d		*/ \
92 	addi		w0,w0,k0l;	/* 1: wk = w + k		*/ \
93 	or		rT0,rT0,rT1;	/* 1: f = f or f'		*/ \
94 	addis		w0,w0,k0h;	/* 1: wk = w + k'		*/ \
95 	add		a,a,rT0;	/* 1: a = a + f			*/ \
96 	addi		w1,w1,k1l;	/* 2: wk = w + k		*/ \
97 	add		a,a,w0;		/* 1: a = a + wk		*/ \
98 	addis		w1,w1,k1h;	/* 2: wk = w + k'		*/ \
99 	andc		rT0,b,c;	/* 2: f = c and ~d		*/ \
100 	rotrwi		a,a,p;		/* 1: a = a rotl x		*/ \
101 	add		a,a,b;		/* 1: a = a + b			*/ \
102 	add		d,d,w1;		/* 2: a = a + wk		*/ \
103 	and		rT1,a,c;	/* 2: f' = b and d		*/ \
104 	or		rT0,rT0,rT1;	/* 2: f = f or f'		*/ \
105 	add		d,d,rT0;	/* 2: a = a + f			*/ \
106 	rotrwi		d,d,q;		/* 2: a = a rotl x		*/ \
107 	add		d,d,a;		/* 2: a = a +b			*/
108 
109 #define R_32_47(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \
110 	xor		rT0,b,c;	/* 1: f' = b xor c		*/ \
111 	addi		w0,w0,k0l;	/* 1: wk = w + k		*/ \
112 	xor		rT1,rT0,d;	/* 1: f = f xor f'		*/ \
113 	addis		w0,w0,k0h;	/* 1: wk = w + k'		*/ \
114 	add		a,a,rT1;	/* 1: a = a + f			*/ \
115 	addi		w1,w1,k1l;	/* 2: wk = w + k		*/ \
116 	add		a,a,w0;		/* 1: a = a + wk		*/ \
117 	addis		w1,w1,k1h;	/* 2: wk = w + k'		*/ \
118 	rotrwi		a,a,p;		/* 1: a = a rotl x		*/ \
119 	add		d,d,w1;		/* 2: a = a + wk		*/ \
120 	add		a,a,b;		/* 1: a = a + b			*/ \
121 	xor		rT1,rT0,a;	/* 2: f = b xor f'		*/ \
122 	add		d,d,rT1;	/* 2: a = a + f			*/ \
123 	rotrwi		d,d,q;		/* 2: a = a rotl x		*/ \
124 	add		d,d,a;		/* 2: a = a + b			*/
125 
126 #define R_48_63(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \
127 	addi		w0,w0,k0l;	/* 1: w = w + k			*/ \
128 	orc		rT0,b,d;	/* 1: f = b or ~d		*/ \
129 	addis		w0,w0,k0h;	/* 1: w = w + k'		*/ \
130 	xor		rT0,rT0,c;	/* 1: f = f xor c		*/ \
131 	add		a,a,w0;		/* 1: a = a + wk		*/ \
132 	addi		w1,w1,k1l;	/* 2: w = w + k			*/ \
133 	add		a,a,rT0;	/* 1: a = a + f			*/ \
134 	addis		w1,w1,k1h;	/* 2: w = w + k'		*/ \
135 	rotrwi		a,a,p;		/* 1: a = a rotl x		*/ \
136 	add		a,a,b;		/* 1: a = a + b			*/ \
137 	orc		rT0,a,c;	/* 2: f = b or ~d		*/ \
138 	add		d,d,w1;		/* 2: a = a + wk		*/ \
139 	xor		rT0,rT0,b;	/* 2: f = f xor c		*/ \
140 	add		d,d,rT0;	/* 2: a = a + f			*/ \
141 	rotrwi		d,d,q;		/* 2: a = a rotl x		*/ \
142 	add		d,d,a;		/* 2: a = a + b			*/
143 
144 _GLOBAL(ppc_md5_transform)
145 	INITIALIZE
146 
147 	mtctr		r5
148 	lwz		rH0,0(rHP)
149 	lwz		rH1,4(rHP)
150 	lwz		rH2,8(rHP)
151 	lwz		rH3,12(rHP)
152 
153 ppc_md5_main:
154 	R_00_15(rH0, rH1, rH2, rH3, rW00, rW01, 25, 20, 0,
155 		0xd76b, -23432, 0xe8c8, -18602)
156 	R_00_15(rH2, rH3, rH0, rH1, rW02, rW03, 15, 10, 8,
157 		0x2420, 0x70db, 0xc1be, -12562)
158 	R_00_15(rH0, rH1, rH2, rH3, rW04, rW05, 25, 20, 16,
159 		0xf57c, 0x0faf, 0x4788, -14806)
160 	R_00_15(rH2, rH3, rH0, rH1, rW06, rW07, 15, 10, 24,
161 		0xa830, 0x4613, 0xfd47, -27391)
162 	R_00_15(rH0, rH1, rH2, rH3, rW08, rW09, 25, 20, 32,
163 		0x6981, -26408, 0x8b45,  -2129)
164 	R_00_15(rH2, rH3, rH0, rH1, rW10, rW11, 15, 10, 40,
165 		0xffff, 0x5bb1, 0x895d, -10306)
166 	R_00_15(rH0, rH1, rH2, rH3, rW12, rW13, 25, 20, 48,
167 		0x6b90, 0x1122, 0xfd98, 0x7193)
168 	R_00_15(rH2, rH3, rH0, rH1, rW14, rW15, 15, 10, 56,
169 		0xa679, 0x438e, 0x49b4, 0x0821)
170 
171 	R_16_31(rH0, rH1, rH2, rH3, rW01, rW06, 27, 23,
172 		0x0d56, 0x6e0c, 0x1810, 0x6d2d)
173 	R_16_31(rH2, rH3, rH0, rH1, rW11, rW00, 18, 12,
174 		0x9d02, -32109, 0x124c, 0x2332)
175 	R_16_31(rH0, rH1, rH2, rH3, rW05, rW10, 27, 23,
176 		0x8ea7, 0x4a33, 0x0245, -18270)
177 	R_16_31(rH2, rH3, rH0, rH1, rW15, rW04, 18, 12,
178 		0x8eee,  -8608, 0xf258,  -5095)
179 	R_16_31(rH0, rH1, rH2, rH3, rW09, rW14, 27, 23,
180 		0x969d, -10697, 0x1cbe, -15288)
181 	R_16_31(rH2, rH3, rH0, rH1, rW03, rW08, 18, 12,
182 		0x3317, 0x3e99, 0xdbd9, 0x7c15)
183 	R_16_31(rH0, rH1, rH2, rH3, rW13, rW02, 27, 23,
184 		0xac4b, 0x7772, 0xd8cf, 0x331d)
185 	R_16_31(rH2, rH3, rH0, rH1, rW07, rW12, 18, 12,
186 		0x6a28, 0x6dd8, 0x219a, 0x3b68)
187 
188 	R_32_47(rH0, rH1, rH2, rH3, rW05, rW08, 28, 21,
189 		0x29cb, 0x28e5, 0x4218,  -7788)
190 	R_32_47(rH2, rH3, rH0, rH1, rW11, rW14, 16,  9,
191 		0x473f, 0x06d1, 0x3aae, 0x3036)
192 	R_32_47(rH0, rH1, rH2, rH3, rW01, rW04, 28, 21,
193 		0xaea1, -15134, 0x640b, -11295)
194 	R_32_47(rH2, rH3, rH0, rH1, rW07, rW10, 16,  9,
195 		0x8f4c, 0x4887, 0xbc7c, -22499)
196 	R_32_47(rH0, rH1, rH2, rH3, rW13, rW00, 28, 21,
197 		0x7eb8, -27199, 0x00ea, 0x6050)
198 	R_32_47(rH2, rH3, rH0, rH1, rW03, rW06, 16,  9,
199 		0xe01a, 0x22fe, 0x4447, 0x69c5)
200 	R_32_47(rH0, rH1, rH2, rH3, rW09, rW12, 28, 21,
201 		0xb7f3, 0x0253, 0x59b1, 0x4d5b)
202 	R_32_47(rH2, rH3, rH0, rH1, rW15, rW02, 16,  9,
203 		0x4701, -27017, 0xc7bd, -19859)
204 
205 	R_48_63(rH0, rH1, rH2, rH3, rW00, rW07, 26, 22,
206 		0x0988,  -1462, 0x4c70, -19401)
207 	R_48_63(rH2, rH3, rH0, rH1, rW14, rW05, 17, 11,
208 		0xadaf,  -5221, 0xfc99, 0x66f7)
209 	R_48_63(rH0, rH1, rH2, rH3, rW12, rW03, 26, 22,
210 		0x7e80, -16418, 0xba1e, -25587)
211 	R_48_63(rH2, rH3, rH0, rH1, rW10, rW01, 17, 11,
212 		0x4130, 0x380d, 0xe0c5, 0x738d)
213 	lwz		rW00,0(rHP)
214 	R_48_63(rH0, rH1, rH2, rH3, rW08, rW15, 26, 22,
215 		0xe837, -30770, 0xde8a, 0x69e8)
216 	lwz		rW14,4(rHP)
217 	R_48_63(rH2, rH3, rH0, rH1, rW06, rW13, 17, 11,
218 		0x9e79, 0x260f, 0x256d, -27941)
219 	lwz		rW12,8(rHP)
220 	R_48_63(rH0, rH1, rH2, rH3, rW04, rW11, 26, 22,
221 		0xab75, -20775, 0x4f9e, -28397)
222 	lwz		rW10,12(rHP)
223 	R_48_63(rH2, rH3, rH0, rH1, rW02, rW09, 17, 11,
224 		0x662b, 0x7c56, 0x11b2, 0x0358)
225 
226 	add		rH0,rH0,rW00
227 	stw		rH0,0(rHP)
228 	add		rH1,rH1,rW14
229 	stw		rH1,4(rHP)
230 	add		rH2,rH2,rW12
231 	stw		rH2,8(rHP)
232 	add		rH3,rH3,rW10
233 	stw		rH3,12(rHP)
234 	NEXT_BLOCK
235 
236 	bdnz		ppc_md5_main
237 
238 	FINALIZE
239 	blr
240