1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /*
3  * Memory copy functions for 32-bit PowerPC.
4  *
5  * Copyright (C) 1996-2005 Paul Mackerras.
6  */
7 #include <linux/export.h>
8 #include <asm/processor.h>
9 #include <asm/cache.h>
10 #include <asm/errno.h>
11 #include <asm/ppc_asm.h>
12 #include <asm/code-patching-asm.h>
13 #include <asm/kasan.h>
14 
15 #define COPY_16_BYTES		\
16 	lwz	r7,4(r4);	\
17 	lwz	r8,8(r4);	\
18 	lwz	r9,12(r4);	\
19 	lwzu	r10,16(r4);	\
20 	stw	r7,4(r6);	\
21 	stw	r8,8(r6);	\
22 	stw	r9,12(r6);	\
23 	stwu	r10,16(r6)
24 
25 #define COPY_16_BYTES_WITHEX(n)	\
26 8 ## n ## 0:			\
27 	lwz	r7,4(r4);	\
28 8 ## n ## 1:			\
29 	lwz	r8,8(r4);	\
30 8 ## n ## 2:			\
31 	lwz	r9,12(r4);	\
32 8 ## n ## 3:			\
33 	lwzu	r10,16(r4);	\
34 8 ## n ## 4:			\
35 	stw	r7,4(r6);	\
36 8 ## n ## 5:			\
37 	stw	r8,8(r6);	\
38 8 ## n ## 6:			\
39 	stw	r9,12(r6);	\
40 8 ## n ## 7:			\
41 	stwu	r10,16(r6)
42 
43 #define COPY_16_BYTES_EXCODE(n)			\
44 9 ## n ## 0:					\
45 	addi	r5,r5,-(16 * n);		\
46 	b	104f;				\
47 9 ## n ## 1:					\
48 	addi	r5,r5,-(16 * n);		\
49 	b	105f;				\
50 	EX_TABLE(8 ## n ## 0b,9 ## n ## 0b);	\
51 	EX_TABLE(8 ## n ## 1b,9 ## n ## 0b);	\
52 	EX_TABLE(8 ## n ## 2b,9 ## n ## 0b);	\
53 	EX_TABLE(8 ## n ## 3b,9 ## n ## 0b);	\
54 	EX_TABLE(8 ## n ## 4b,9 ## n ## 1b);	\
55 	EX_TABLE(8 ## n ## 5b,9 ## n ## 1b);	\
56 	EX_TABLE(8 ## n ## 6b,9 ## n ## 1b);	\
57 	EX_TABLE(8 ## n ## 7b,9 ## n ## 1b)
58 
59 	.text
60 
61 CACHELINE_BYTES = L1_CACHE_BYTES
62 LG_CACHELINE_BYTES = L1_CACHE_SHIFT
63 CACHELINE_MASK = (L1_CACHE_BYTES-1)
64 
65 #ifndef CONFIG_KASAN
66 _GLOBAL(memset16)
67 	rlwinm.	r0 ,r5, 31, 1, 31
68 	addi	r6, r3, -4
69 	beq-	2f
70 	rlwimi	r4 ,r4 ,16 ,0 ,15
71 	mtctr	r0
72 1:	stwu	r4, 4(r6)
73 	bdnz	1b
74 2:	andi.	r0, r5, 1
75 	beqlr
76 	sth	r4, 4(r6)
77 	blr
78 EXPORT_SYMBOL(memset16)
79 #endif
80 
81 /*
82  * Use dcbz on the complete cache lines in the destination
83  * to set them to zero.  This requires that the destination
84  * area is cacheable.  -- paulus
85  *
86  * During early init, cache might not be active yet, so dcbz cannot be used.
87  * We therefore skip the optimised bloc that uses dcbz. This jump is
88  * replaced by a nop once cache is active. This is done in machine_init()
89  */
90 _GLOBAL_KASAN(memset)
91 	cmplwi	0,r5,4
92 	blt	7f
93 
94 	rlwimi	r4,r4,8,16,23
95 	rlwimi	r4,r4,16,0,15
96 
97 	stw	r4,0(r3)
98 	beqlr
99 	andi.	r0,r3,3
100 	add	r5,r0,r5
101 	subf	r6,r0,r3
102 	cmplwi	0,r4,0
103 	/*
104 	 * Skip optimised bloc until cache is enabled. Will be replaced
105 	 * by 'bne' during boot to use normal procedure if r4 is not zero
106 	 */
107 5:	b	2f
108 	patch_site	5b, patch__memset_nocache
109 
110 	clrlwi	r7,r6,32-LG_CACHELINE_BYTES
111 	add	r8,r7,r5
112 	srwi	r9,r8,LG_CACHELINE_BYTES
113 	addic.	r9,r9,-1	/* total number of complete cachelines */
114 	ble	2f
115 	xori	r0,r7,CACHELINE_MASK & ~3
116 	srwi.	r0,r0,2
117 	beq	3f
118 	mtctr	r0
119 4:	stwu	r4,4(r6)
120 	bdnz	4b
121 3:	mtctr	r9
122 	li	r7,4
123 10:	dcbz	r7,r6
124 	addi	r6,r6,CACHELINE_BYTES
125 	bdnz	10b
126 	clrlwi	r5,r8,32-LG_CACHELINE_BYTES
127 	addi	r5,r5,4
128 
129 2:	srwi	r0,r5,2
130 	mtctr	r0
131 	bdz	6f
132 1:	stwu	r4,4(r6)
133 	bdnz	1b
134 6:	andi.	r5,r5,3
135 	beqlr
136 	mtctr	r5
137 	addi	r6,r6,3
138 8:	stbu	r4,1(r6)
139 	bdnz	8b
140 	blr
141 
142 7:	cmpwi	0,r5,0
143 	beqlr
144 	mtctr	r5
145 	addi	r6,r3,-1
146 9:	stbu	r4,1(r6)
147 	bdnz	9b
148 	blr
149 EXPORT_SYMBOL(memset)
150 EXPORT_SYMBOL_KASAN(memset)
151 
152 /*
153  * This version uses dcbz on the complete cache lines in the
154  * destination area to reduce memory traffic.  This requires that
155  * the destination area is cacheable.
156  * We only use this version if the source and dest don't overlap.
157  * -- paulus.
158  *
159  * During early init, cache might not be active yet, so dcbz cannot be used.
160  * We therefore jump to generic_memcpy which doesn't use dcbz. This jump is
161  * replaced by a nop once cache is active. This is done in machine_init()
162  */
163 _GLOBAL_KASAN(memmove)
164 	cmplw	0,r3,r4
165 	bgt	backwards_memcpy
166 	/* fall through */
167 
168 _GLOBAL_KASAN(memcpy)
169 1:	b	generic_memcpy
170 	patch_site	1b, patch__memcpy_nocache
171 
172 	add	r7,r3,r5		/* test if the src & dst overlap */
173 	add	r8,r4,r5
174 	cmplw	0,r4,r7
175 	cmplw	1,r3,r8
176 	crand	0,0,4			/* cr0.lt &= cr1.lt */
177 	blt	generic_memcpy		/* if regions overlap */
178 
179 	addi	r4,r4,-4
180 	addi	r6,r3,-4
181 	neg	r0,r3
182 	andi.	r0,r0,CACHELINE_MASK	/* # bytes to start of cache line */
183 	beq	58f
184 
185 	cmplw	0,r5,r0			/* is this more than total to do? */
186 	blt	63f			/* if not much to do */
187 	andi.	r8,r0,3			/* get it word-aligned first */
188 	subf	r5,r0,r5
189 	mtctr	r8
190 	beq+	61f
191 70:	lbz	r9,4(r4)		/* do some bytes */
192 	addi	r4,r4,1
193 	addi	r6,r6,1
194 	stb	r9,3(r6)
195 	bdnz	70b
196 61:	srwi.	r0,r0,2
197 	mtctr	r0
198 	beq	58f
199 72:	lwzu	r9,4(r4)		/* do some words */
200 	stwu	r9,4(r6)
201 	bdnz	72b
202 
203 58:	srwi.	r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
204 	clrlwi	r5,r5,32-LG_CACHELINE_BYTES
205 	li	r11,4
206 	mtctr	r0
207 	beq	63f
208 53:
209 	dcbz	r11,r6
210 	COPY_16_BYTES
211 #if L1_CACHE_BYTES >= 32
212 	COPY_16_BYTES
213 #if L1_CACHE_BYTES >= 64
214 	COPY_16_BYTES
215 	COPY_16_BYTES
216 #if L1_CACHE_BYTES >= 128
217 	COPY_16_BYTES
218 	COPY_16_BYTES
219 	COPY_16_BYTES
220 	COPY_16_BYTES
221 #endif
222 #endif
223 #endif
224 	bdnz	53b
225 
226 63:	srwi.	r0,r5,2
227 	mtctr	r0
228 	beq	64f
229 30:	lwzu	r0,4(r4)
230 	stwu	r0,4(r6)
231 	bdnz	30b
232 
233 64:	andi.	r0,r5,3
234 	mtctr	r0
235 	beq+	65f
236 	addi	r4,r4,3
237 	addi	r6,r6,3
238 40:	lbzu	r0,1(r4)
239 	stbu	r0,1(r6)
240 	bdnz	40b
241 65:	blr
242 EXPORT_SYMBOL(memcpy)
243 EXPORT_SYMBOL(memmove)
244 EXPORT_SYMBOL_KASAN(memcpy)
245 EXPORT_SYMBOL_KASAN(memmove)
246 
247 generic_memcpy:
248 	srwi.	r7,r5,3
249 	addi	r6,r3,-4
250 	addi	r4,r4,-4
251 	beq	2f			/* if less than 8 bytes to do */
252 	andi.	r0,r6,3			/* get dest word aligned */
253 	mtctr	r7
254 	bne	5f
255 1:	lwz	r7,4(r4)
256 	lwzu	r8,8(r4)
257 	stw	r7,4(r6)
258 	stwu	r8,8(r6)
259 	bdnz	1b
260 	andi.	r5,r5,7
261 2:	cmplwi	0,r5,4
262 	blt	3f
263 	lwzu	r0,4(r4)
264 	addi	r5,r5,-4
265 	stwu	r0,4(r6)
266 3:	cmpwi	0,r5,0
267 	beqlr
268 	mtctr	r5
269 	addi	r4,r4,3
270 	addi	r6,r6,3
271 4:	lbzu	r0,1(r4)
272 	stbu	r0,1(r6)
273 	bdnz	4b
274 	blr
275 5:	subfic	r0,r0,4
276 	mtctr	r0
277 6:	lbz	r7,4(r4)
278 	addi	r4,r4,1
279 	stb	r7,4(r6)
280 	addi	r6,r6,1
281 	bdnz	6b
282 	subf	r5,r0,r5
283 	rlwinm.	r7,r5,32-3,3,31
284 	beq	2b
285 	mtctr	r7
286 	b	1b
287 
288 _GLOBAL(backwards_memcpy)
289 	rlwinm.	r7,r5,32-3,3,31		/* r0 = r5 >> 3 */
290 	add	r6,r3,r5
291 	add	r4,r4,r5
292 	beq	2f
293 	andi.	r0,r6,3
294 	mtctr	r7
295 	bne	5f
296 1:	lwz	r7,-4(r4)
297 	lwzu	r8,-8(r4)
298 	stw	r7,-4(r6)
299 	stwu	r8,-8(r6)
300 	bdnz	1b
301 	andi.	r5,r5,7
302 2:	cmplwi	0,r5,4
303 	blt	3f
304 	lwzu	r0,-4(r4)
305 	subi	r5,r5,4
306 	stwu	r0,-4(r6)
307 3:	cmpwi	0,r5,0
308 	beqlr
309 	mtctr	r5
310 4:	lbzu	r0,-1(r4)
311 	stbu	r0,-1(r6)
312 	bdnz	4b
313 	blr
314 5:	mtctr	r0
315 6:	lbzu	r7,-1(r4)
316 	stbu	r7,-1(r6)
317 	bdnz	6b
318 	subf	r5,r0,r5
319 	rlwinm.	r7,r5,32-3,3,31
320 	beq	2b
321 	mtctr	r7
322 	b	1b
323 
324 _GLOBAL(__copy_tofrom_user)
325 	addi	r4,r4,-4
326 	addi	r6,r3,-4
327 	neg	r0,r3
328 	andi.	r0,r0,CACHELINE_MASK	/* # bytes to start of cache line */
329 	beq	58f
330 
331 	cmplw	0,r5,r0			/* is this more than total to do? */
332 	blt	63f			/* if not much to do */
333 	andi.	r8,r0,3			/* get it word-aligned first */
334 	mtctr	r8
335 	beq+	61f
336 70:	lbz	r9,4(r4)		/* do some bytes */
337 71:	stb	r9,4(r6)
338 	addi	r4,r4,1
339 	addi	r6,r6,1
340 	bdnz	70b
341 61:	subf	r5,r0,r5
342 	srwi.	r0,r0,2
343 	mtctr	r0
344 	beq	58f
345 72:	lwzu	r9,4(r4)		/* do some words */
346 73:	stwu	r9,4(r6)
347 	bdnz	72b
348 
349 	EX_TABLE(70b,100f)
350 	EX_TABLE(71b,101f)
351 	EX_TABLE(72b,102f)
352 	EX_TABLE(73b,103f)
353 
354 58:	srwi.	r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
355 	clrlwi	r5,r5,32-LG_CACHELINE_BYTES
356 	li	r11,4
357 	beq	63f
358 
359 	/* Here we decide how far ahead to prefetch the source */
360 	li	r3,4
361 	cmpwi	r0,1
362 	li	r7,0
363 	ble	114f
364 	li	r7,1
365 #if MAX_COPY_PREFETCH > 1
366 	/* Heuristically, for large transfers we prefetch
367 	   MAX_COPY_PREFETCH cachelines ahead.  For small transfers
368 	   we prefetch 1 cacheline ahead. */
369 	cmpwi	r0,MAX_COPY_PREFETCH
370 	ble	112f
371 	li	r7,MAX_COPY_PREFETCH
372 112:	mtctr	r7
373 111:	dcbt	r3,r4
374 	addi	r3,r3,CACHELINE_BYTES
375 	bdnz	111b
376 #else
377 	dcbt	r3,r4
378 	addi	r3,r3,CACHELINE_BYTES
379 #endif /* MAX_COPY_PREFETCH > 1 */
380 
381 114:	subf	r8,r7,r0
382 	mr	r0,r7
383 	mtctr	r8
384 
385 53:	dcbt	r3,r4
386 54:	dcbz	r11,r6
387 	EX_TABLE(54b,105f)
388 /* the main body of the cacheline loop */
389 	COPY_16_BYTES_WITHEX(0)
390 #if L1_CACHE_BYTES >= 32
391 	COPY_16_BYTES_WITHEX(1)
392 #if L1_CACHE_BYTES >= 64
393 	COPY_16_BYTES_WITHEX(2)
394 	COPY_16_BYTES_WITHEX(3)
395 #if L1_CACHE_BYTES >= 128
396 	COPY_16_BYTES_WITHEX(4)
397 	COPY_16_BYTES_WITHEX(5)
398 	COPY_16_BYTES_WITHEX(6)
399 	COPY_16_BYTES_WITHEX(7)
400 #endif
401 #endif
402 #endif
403 	bdnz	53b
404 	cmpwi	r0,0
405 	li	r3,4
406 	li	r7,0
407 	bne	114b
408 
409 63:	srwi.	r0,r5,2
410 	mtctr	r0
411 	beq	64f
412 30:	lwzu	r0,4(r4)
413 31:	stwu	r0,4(r6)
414 	bdnz	30b
415 
416 64:	andi.	r0,r5,3
417 	mtctr	r0
418 	beq+	65f
419 40:	lbz	r0,4(r4)
420 41:	stb	r0,4(r6)
421 	addi	r4,r4,1
422 	addi	r6,r6,1
423 	bdnz	40b
424 65:	li	r3,0
425 	blr
426 
427 /* read fault, initial single-byte copy */
428 100:	li	r9,0
429 	b	90f
430 /* write fault, initial single-byte copy */
431 101:	li	r9,1
432 90:	subf	r5,r8,r5
433 	li	r3,0
434 	b	99f
435 /* read fault, initial word copy */
436 102:	li	r9,0
437 	b	91f
438 /* write fault, initial word copy */
439 103:	li	r9,1
440 91:	li	r3,2
441 	b	99f
442 
443 /*
444  * this stuff handles faults in the cacheline loop and branches to either
445  * 104f (if in read part) or 105f (if in write part), after updating r5
446  */
447 	COPY_16_BYTES_EXCODE(0)
448 #if L1_CACHE_BYTES >= 32
449 	COPY_16_BYTES_EXCODE(1)
450 #if L1_CACHE_BYTES >= 64
451 	COPY_16_BYTES_EXCODE(2)
452 	COPY_16_BYTES_EXCODE(3)
453 #if L1_CACHE_BYTES >= 128
454 	COPY_16_BYTES_EXCODE(4)
455 	COPY_16_BYTES_EXCODE(5)
456 	COPY_16_BYTES_EXCODE(6)
457 	COPY_16_BYTES_EXCODE(7)
458 #endif
459 #endif
460 #endif
461 
462 /* read fault in cacheline loop */
463 104:	li	r9,0
464 	b	92f
465 /* fault on dcbz (effectively a write fault) */
466 /* or write fault in cacheline loop */
467 105:	li	r9,1
468 92:	li	r3,LG_CACHELINE_BYTES
469 	mfctr	r8
470 	add	r0,r0,r8
471 	b	106f
472 /* read fault in final word loop */
473 108:	li	r9,0
474 	b	93f
475 /* write fault in final word loop */
476 109:	li	r9,1
477 93:	andi.	r5,r5,3
478 	li	r3,2
479 	b	99f
480 /* read fault in final byte loop */
481 110:	li	r9,0
482 	b	94f
483 /* write fault in final byte loop */
484 111:	li	r9,1
485 94:	li	r5,0
486 	li	r3,0
487 /*
488  * At this stage the number of bytes not copied is
489  * r5 + (ctr << r3), and r9 is 0 for read or 1 for write.
490  */
491 99:	mfctr	r0
492 106:	slw	r3,r0,r3
493 	add.	r3,r3,r5
494 	beq	120f			/* shouldn't happen */
495 	cmpwi	0,r9,0
496 	bne	120f
497 /* for a read fault, first try to continue the copy one byte at a time */
498 	mtctr	r3
499 130:	lbz	r0,4(r4)
500 131:	stb	r0,4(r6)
501 	addi	r4,r4,1
502 	addi	r6,r6,1
503 	bdnz	130b
504 /* then clear out the destination: r3 bytes starting at 4(r6) */
505 132:	mfctr	r3
506 120:	blr
507 
508 	EX_TABLE(30b,108b)
509 	EX_TABLE(31b,109b)
510 	EX_TABLE(40b,110b)
511 	EX_TABLE(41b,111b)
512 	EX_TABLE(130b,132b)
513 	EX_TABLE(131b,120b)
514 
515 EXPORT_SYMBOL(__copy_tofrom_user)
516