18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-or-later */
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci *
48c2ecf20Sopenharmony_ci * Copyright (C) IBM Corporation, 2012
58c2ecf20Sopenharmony_ci *
68c2ecf20Sopenharmony_ci * Author: Anton Blanchard <anton@au.ibm.com>
78c2ecf20Sopenharmony_ci */
88c2ecf20Sopenharmony_ci
98c2ecf20Sopenharmony_ci#include <asm/ppc_asm.h>
108c2ecf20Sopenharmony_ci#include <asm/linkage.h>
118c2ecf20Sopenharmony_ci#include <asm/asm-offsets.h>
128c2ecf20Sopenharmony_ci#include <asm/export.h>
138c2ecf20Sopenharmony_ci
148c2ecf20Sopenharmony_ci	.section	".toc","aw"
158c2ecf20Sopenharmony_ciPPC64_CACHES:
168c2ecf20Sopenharmony_ci	.tc		ppc64_caches[TC],ppc64_caches
178c2ecf20Sopenharmony_ci	.section	".text"
188c2ecf20Sopenharmony_ci
198c2ecf20Sopenharmony_ci/**
208c2ecf20Sopenharmony_ci * __arch_clear_user: - Zero a block of memory in user space, with less checking.
218c2ecf20Sopenharmony_ci * @to:   Destination address, in user space.
228c2ecf20Sopenharmony_ci * @n:    Number of bytes to zero.
238c2ecf20Sopenharmony_ci *
248c2ecf20Sopenharmony_ci * Zero a block of memory in user space.  Caller must check
258c2ecf20Sopenharmony_ci * the specified block with access_ok() before calling this function.
268c2ecf20Sopenharmony_ci *
278c2ecf20Sopenharmony_ci * Returns number of bytes that could not be cleared.
288c2ecf20Sopenharmony_ci * On success, this will be zero.
298c2ecf20Sopenharmony_ci */
308c2ecf20Sopenharmony_ci
318c2ecf20Sopenharmony_ci	.macro err1
328c2ecf20Sopenharmony_ci100:
338c2ecf20Sopenharmony_ci	EX_TABLE(100b,.Ldo_err1)
348c2ecf20Sopenharmony_ci	.endm
358c2ecf20Sopenharmony_ci
368c2ecf20Sopenharmony_ci	.macro err2
378c2ecf20Sopenharmony_ci200:
388c2ecf20Sopenharmony_ci	EX_TABLE(200b,.Ldo_err2)
398c2ecf20Sopenharmony_ci	.endm
408c2ecf20Sopenharmony_ci
418c2ecf20Sopenharmony_ci	.macro err3
428c2ecf20Sopenharmony_ci300:
438c2ecf20Sopenharmony_ci	EX_TABLE(300b,.Ldo_err3)
448c2ecf20Sopenharmony_ci	.endm
458c2ecf20Sopenharmony_ci
468c2ecf20Sopenharmony_ci.Ldo_err1:
478c2ecf20Sopenharmony_ci	mr	r3,r8
488c2ecf20Sopenharmony_ci
498c2ecf20Sopenharmony_ci.Ldo_err2:
508c2ecf20Sopenharmony_ci	mtctr	r4
518c2ecf20Sopenharmony_ci1:
528c2ecf20Sopenharmony_cierr3;	stb	r0,0(r3)
538c2ecf20Sopenharmony_ci	addi	r3,r3,1
548c2ecf20Sopenharmony_ci	addi	r4,r4,-1
558c2ecf20Sopenharmony_ci	bdnz	1b
568c2ecf20Sopenharmony_ci
578c2ecf20Sopenharmony_ci.Ldo_err3:
588c2ecf20Sopenharmony_ci	mr	r3,r4
598c2ecf20Sopenharmony_ci	blr
608c2ecf20Sopenharmony_ci
618c2ecf20Sopenharmony_ci_GLOBAL_TOC(__arch_clear_user)
628c2ecf20Sopenharmony_ci	cmpdi	r4,32
638c2ecf20Sopenharmony_ci	neg	r6,r3
648c2ecf20Sopenharmony_ci	li	r0,0
658c2ecf20Sopenharmony_ci	blt	.Lshort_clear
668c2ecf20Sopenharmony_ci	mr	r8,r3
678c2ecf20Sopenharmony_ci	mtocrf	0x01,r6
688c2ecf20Sopenharmony_ci	clrldi	r6,r6,(64-3)
698c2ecf20Sopenharmony_ci
708c2ecf20Sopenharmony_ci	/* Get the destination 8 byte aligned */
718c2ecf20Sopenharmony_ci	bf	cr7*4+3,1f
728c2ecf20Sopenharmony_cierr1;	stb	r0,0(r3)
738c2ecf20Sopenharmony_ci	addi	r3,r3,1
748c2ecf20Sopenharmony_ci
758c2ecf20Sopenharmony_ci1:	bf	cr7*4+2,2f
768c2ecf20Sopenharmony_cierr1;	sth	r0,0(r3)
778c2ecf20Sopenharmony_ci	addi	r3,r3,2
788c2ecf20Sopenharmony_ci
798c2ecf20Sopenharmony_ci2:	bf	cr7*4+1,3f
808c2ecf20Sopenharmony_cierr1;	stw	r0,0(r3)
818c2ecf20Sopenharmony_ci	addi	r3,r3,4
828c2ecf20Sopenharmony_ci
838c2ecf20Sopenharmony_ci3:	sub	r4,r4,r6
848c2ecf20Sopenharmony_ci
858c2ecf20Sopenharmony_ci	cmpdi	r4,32
868c2ecf20Sopenharmony_ci	cmpdi	cr1,r4,512
878c2ecf20Sopenharmony_ci	blt	.Lshort_clear
888c2ecf20Sopenharmony_ci	bgt	cr1,.Llong_clear
898c2ecf20Sopenharmony_ci
908c2ecf20Sopenharmony_ci.Lmedium_clear:
918c2ecf20Sopenharmony_ci	srdi	r6,r4,5
928c2ecf20Sopenharmony_ci	mtctr	r6
938c2ecf20Sopenharmony_ci
948c2ecf20Sopenharmony_ci	/* Do 32 byte chunks */
958c2ecf20Sopenharmony_ci4:
968c2ecf20Sopenharmony_cierr2;	std	r0,0(r3)
978c2ecf20Sopenharmony_cierr2;	std	r0,8(r3)
988c2ecf20Sopenharmony_cierr2;	std	r0,16(r3)
998c2ecf20Sopenharmony_cierr2;	std	r0,24(r3)
1008c2ecf20Sopenharmony_ci	addi	r3,r3,32
1018c2ecf20Sopenharmony_ci	addi	r4,r4,-32
1028c2ecf20Sopenharmony_ci	bdnz	4b
1038c2ecf20Sopenharmony_ci
1048c2ecf20Sopenharmony_ci.Lshort_clear:
1058c2ecf20Sopenharmony_ci	/* up to 31 bytes to go */
1068c2ecf20Sopenharmony_ci	cmpdi	r4,16
1078c2ecf20Sopenharmony_ci	blt	6f
1088c2ecf20Sopenharmony_cierr2;	std	r0,0(r3)
1098c2ecf20Sopenharmony_cierr2;	std	r0,8(r3)
1108c2ecf20Sopenharmony_ci	addi	r3,r3,16
1118c2ecf20Sopenharmony_ci	addi	r4,r4,-16
1128c2ecf20Sopenharmony_ci
1138c2ecf20Sopenharmony_ci	/* Up to 15 bytes to go */
1148c2ecf20Sopenharmony_ci6:	mr	r8,r3
1158c2ecf20Sopenharmony_ci	clrldi	r4,r4,(64-4)
1168c2ecf20Sopenharmony_ci	mtocrf	0x01,r4
1178c2ecf20Sopenharmony_ci	bf	cr7*4+0,7f
1188c2ecf20Sopenharmony_cierr1;	std	r0,0(r3)
1198c2ecf20Sopenharmony_ci	addi	r3,r3,8
1208c2ecf20Sopenharmony_ci
1218c2ecf20Sopenharmony_ci7:	bf	cr7*4+1,8f
1228c2ecf20Sopenharmony_cierr1;	stw	r0,0(r3)
1238c2ecf20Sopenharmony_ci	addi	r3,r3,4
1248c2ecf20Sopenharmony_ci
1258c2ecf20Sopenharmony_ci8:	bf	cr7*4+2,9f
1268c2ecf20Sopenharmony_cierr1;	sth	r0,0(r3)
1278c2ecf20Sopenharmony_ci	addi	r3,r3,2
1288c2ecf20Sopenharmony_ci
1298c2ecf20Sopenharmony_ci9:	bf	cr7*4+3,10f
1308c2ecf20Sopenharmony_cierr1;	stb	r0,0(r3)
1318c2ecf20Sopenharmony_ci
1328c2ecf20Sopenharmony_ci10:	li	r3,0
1338c2ecf20Sopenharmony_ci	blr
1348c2ecf20Sopenharmony_ci
1358c2ecf20Sopenharmony_ci.Llong_clear:
1368c2ecf20Sopenharmony_ci	ld	r5,PPC64_CACHES@toc(r2)
1378c2ecf20Sopenharmony_ci
1388c2ecf20Sopenharmony_ci	bf	cr7*4+0,11f
1398c2ecf20Sopenharmony_cierr2;	std	r0,0(r3)
1408c2ecf20Sopenharmony_ci	addi	r3,r3,8
1418c2ecf20Sopenharmony_ci	addi	r4,r4,-8
1428c2ecf20Sopenharmony_ci
1438c2ecf20Sopenharmony_ci	/* Destination is 16 byte aligned, need to get it cache block aligned */
1448c2ecf20Sopenharmony_ci11:	lwz	r7,DCACHEL1LOGBLOCKSIZE(r5)
1458c2ecf20Sopenharmony_ci	lwz	r9,DCACHEL1BLOCKSIZE(r5)
1468c2ecf20Sopenharmony_ci
1478c2ecf20Sopenharmony_ci	/*
1488c2ecf20Sopenharmony_ci	 * With worst case alignment the long clear loop takes a minimum
1498c2ecf20Sopenharmony_ci	 * of 1 byte less than 2 cachelines.
1508c2ecf20Sopenharmony_ci	 */
1518c2ecf20Sopenharmony_ci	sldi	r10,r9,2
1528c2ecf20Sopenharmony_ci	cmpd	r4,r10
1538c2ecf20Sopenharmony_ci	blt	.Lmedium_clear
1548c2ecf20Sopenharmony_ci
1558c2ecf20Sopenharmony_ci	neg	r6,r3
1568c2ecf20Sopenharmony_ci	addi	r10,r9,-1
1578c2ecf20Sopenharmony_ci	and.	r5,r6,r10
1588c2ecf20Sopenharmony_ci	beq	13f
1598c2ecf20Sopenharmony_ci
1608c2ecf20Sopenharmony_ci	srdi	r6,r5,4
1618c2ecf20Sopenharmony_ci	mtctr	r6
1628c2ecf20Sopenharmony_ci	mr	r8,r3
1638c2ecf20Sopenharmony_ci12:
1648c2ecf20Sopenharmony_cierr1;	std	r0,0(r3)
1658c2ecf20Sopenharmony_cierr1;	std	r0,8(r3)
1668c2ecf20Sopenharmony_ci	addi	r3,r3,16
1678c2ecf20Sopenharmony_ci	bdnz	12b
1688c2ecf20Sopenharmony_ci
1698c2ecf20Sopenharmony_ci	sub	r4,r4,r5
1708c2ecf20Sopenharmony_ci
1718c2ecf20Sopenharmony_ci13:	srd	r6,r4,r7
1728c2ecf20Sopenharmony_ci	mtctr	r6
1738c2ecf20Sopenharmony_ci	mr	r8,r3
1748c2ecf20Sopenharmony_ci14:
1758c2ecf20Sopenharmony_cierr1;	dcbz	0,r3
1768c2ecf20Sopenharmony_ci	add	r3,r3,r9
1778c2ecf20Sopenharmony_ci	bdnz	14b
1788c2ecf20Sopenharmony_ci
1798c2ecf20Sopenharmony_ci	and	r4,r4,r10
1808c2ecf20Sopenharmony_ci
1818c2ecf20Sopenharmony_ci	cmpdi	r4,32
1828c2ecf20Sopenharmony_ci	blt	.Lshort_clear
1838c2ecf20Sopenharmony_ci	b	.Lmedium_clear
1848c2ecf20Sopenharmony_ciEXPORT_SYMBOL(__arch_clear_user)
185