18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-or-later */
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * Copyright (C) 2002 Paul Mackerras, IBM Corp.
48c2ecf20Sopenharmony_ci */
58c2ecf20Sopenharmony_ci#include <asm/processor.h>
68c2ecf20Sopenharmony_ci#include <asm/ppc_asm.h>
78c2ecf20Sopenharmony_ci#include <asm/export.h>
88c2ecf20Sopenharmony_ci#include <asm/asm-compat.h>
98c2ecf20Sopenharmony_ci#include <asm/feature-fixups.h>
108c2ecf20Sopenharmony_ci#include <asm/kasan.h>
118c2ecf20Sopenharmony_ci
128c2ecf20Sopenharmony_ci#ifndef SELFTEST_CASE
138c2ecf20Sopenharmony_ci/* For big-endian, 0 == most CPUs, 1 == POWER6, 2 == Cell */
148c2ecf20Sopenharmony_ci#define SELFTEST_CASE	0
158c2ecf20Sopenharmony_ci#endif
168c2ecf20Sopenharmony_ci
178c2ecf20Sopenharmony_ci	.align	7
188c2ecf20Sopenharmony_ci_GLOBAL_TOC_KASAN(memcpy)
198c2ecf20Sopenharmony_ciBEGIN_FTR_SECTION
208c2ecf20Sopenharmony_ci#ifdef __LITTLE_ENDIAN__
218c2ecf20Sopenharmony_ci	cmpdi	cr7,r5,0
228c2ecf20Sopenharmony_ci#else
238c2ecf20Sopenharmony_ci	std	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)	/* save destination pointer for return value */
248c2ecf20Sopenharmony_ci#endif
258c2ecf20Sopenharmony_ciFTR_SECTION_ELSE
268c2ecf20Sopenharmony_ci#ifdef CONFIG_PPC_BOOK3S_64
278c2ecf20Sopenharmony_ci	b	memcpy_power7
288c2ecf20Sopenharmony_ci#endif
298c2ecf20Sopenharmony_ciALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
308c2ecf20Sopenharmony_ci#ifdef __LITTLE_ENDIAN__
318c2ecf20Sopenharmony_ci	/* dumb little-endian memcpy that will get replaced at runtime */
328c2ecf20Sopenharmony_ci	addi r9,r3,-1
338c2ecf20Sopenharmony_ci	addi r4,r4,-1
348c2ecf20Sopenharmony_ci	beqlr cr7
358c2ecf20Sopenharmony_ci	mtctr r5
368c2ecf20Sopenharmony_ci1:	lbzu r10,1(r4)
378c2ecf20Sopenharmony_ci	stbu r10,1(r9)
388c2ecf20Sopenharmony_ci	bdnz 1b
398c2ecf20Sopenharmony_ci	blr
408c2ecf20Sopenharmony_ci#else
418c2ecf20Sopenharmony_ci	PPC_MTOCRF(0x01,r5)
428c2ecf20Sopenharmony_ci	cmpldi	cr1,r5,16
438c2ecf20Sopenharmony_ci	neg	r6,r3		# LS 3 bits = # bytes to 8-byte dest bdry
448c2ecf20Sopenharmony_ci	andi.	r6,r6,7
458c2ecf20Sopenharmony_ci	dcbt	0,r4
468c2ecf20Sopenharmony_ci	blt	cr1,.Lshort_copy
478c2ecf20Sopenharmony_ci/* Below we want to nop out the bne if we're on a CPU that has the
488c2ecf20Sopenharmony_ci   CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
498c2ecf20Sopenharmony_ci   cleared.
508c2ecf20Sopenharmony_ci   At the time of writing the only CPU that has this combination of bits
518c2ecf20Sopenharmony_ci   set is Power6. */
528c2ecf20Sopenharmony_citest_feature = (SELFTEST_CASE == 1)
538c2ecf20Sopenharmony_ciBEGIN_FTR_SECTION
548c2ecf20Sopenharmony_ci	nop
558c2ecf20Sopenharmony_ciFTR_SECTION_ELSE
568c2ecf20Sopenharmony_ci	bne	.Ldst_unaligned
578c2ecf20Sopenharmony_ciALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
588c2ecf20Sopenharmony_ci                    CPU_FTR_UNALIGNED_LD_STD)
598c2ecf20Sopenharmony_ci.Ldst_aligned:
608c2ecf20Sopenharmony_ci	addi	r3,r3,-16
618c2ecf20Sopenharmony_citest_feature = (SELFTEST_CASE == 0)
628c2ecf20Sopenharmony_ciBEGIN_FTR_SECTION
638c2ecf20Sopenharmony_ci	andi.	r0,r4,7
648c2ecf20Sopenharmony_ci	bne	.Lsrc_unaligned
658c2ecf20Sopenharmony_ciEND_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
668c2ecf20Sopenharmony_ci	srdi	r7,r5,4
678c2ecf20Sopenharmony_ci	ld	r9,0(r4)
688c2ecf20Sopenharmony_ci	addi	r4,r4,-8
698c2ecf20Sopenharmony_ci	mtctr	r7
708c2ecf20Sopenharmony_ci	andi.	r5,r5,7
718c2ecf20Sopenharmony_ci	bf	cr7*4+0,2f
728c2ecf20Sopenharmony_ci	addi	r3,r3,8
738c2ecf20Sopenharmony_ci	addi	r4,r4,8
748c2ecf20Sopenharmony_ci	mr	r8,r9
758c2ecf20Sopenharmony_ci	blt	cr1,3f
768c2ecf20Sopenharmony_ci1:	ld	r9,8(r4)
778c2ecf20Sopenharmony_ci	std	r8,8(r3)
788c2ecf20Sopenharmony_ci2:	ldu	r8,16(r4)
798c2ecf20Sopenharmony_ci	stdu	r9,16(r3)
808c2ecf20Sopenharmony_ci	bdnz	1b
818c2ecf20Sopenharmony_ci3:	std	r8,8(r3)
828c2ecf20Sopenharmony_ci	beq	3f
838c2ecf20Sopenharmony_ci	addi	r3,r3,16
848c2ecf20Sopenharmony_ci.Ldo_tail:
858c2ecf20Sopenharmony_ci	bf	cr7*4+1,1f
868c2ecf20Sopenharmony_ci	lwz	r9,8(r4)
878c2ecf20Sopenharmony_ci	addi	r4,r4,4
888c2ecf20Sopenharmony_ci	stw	r9,0(r3)
898c2ecf20Sopenharmony_ci	addi	r3,r3,4
908c2ecf20Sopenharmony_ci1:	bf	cr7*4+2,2f
918c2ecf20Sopenharmony_ci	lhz	r9,8(r4)
928c2ecf20Sopenharmony_ci	addi	r4,r4,2
938c2ecf20Sopenharmony_ci	sth	r9,0(r3)
948c2ecf20Sopenharmony_ci	addi	r3,r3,2
958c2ecf20Sopenharmony_ci2:	bf	cr7*4+3,3f
968c2ecf20Sopenharmony_ci	lbz	r9,8(r4)
978c2ecf20Sopenharmony_ci	stb	r9,0(r3)
988c2ecf20Sopenharmony_ci3:	ld	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)	/* return dest pointer */
998c2ecf20Sopenharmony_ci	blr
1008c2ecf20Sopenharmony_ci
1018c2ecf20Sopenharmony_ci.Lsrc_unaligned:
1028c2ecf20Sopenharmony_ci	srdi	r6,r5,3
1038c2ecf20Sopenharmony_ci	addi	r5,r5,-16
1048c2ecf20Sopenharmony_ci	subf	r4,r0,r4
1058c2ecf20Sopenharmony_ci	srdi	r7,r5,4
1068c2ecf20Sopenharmony_ci	sldi	r10,r0,3
1078c2ecf20Sopenharmony_ci	cmpdi	cr6,r6,3
1088c2ecf20Sopenharmony_ci	andi.	r5,r5,7
1098c2ecf20Sopenharmony_ci	mtctr	r7
1108c2ecf20Sopenharmony_ci	subfic	r11,r10,64
1118c2ecf20Sopenharmony_ci	add	r5,r5,r0
1128c2ecf20Sopenharmony_ci
1138c2ecf20Sopenharmony_ci	bt	cr7*4+0,0f
1148c2ecf20Sopenharmony_ci
1158c2ecf20Sopenharmony_ci	ld	r9,0(r4)	# 3+2n loads, 2+2n stores
1168c2ecf20Sopenharmony_ci	ld	r0,8(r4)
1178c2ecf20Sopenharmony_ci	sld	r6,r9,r10
1188c2ecf20Sopenharmony_ci	ldu	r9,16(r4)
1198c2ecf20Sopenharmony_ci	srd	r7,r0,r11
1208c2ecf20Sopenharmony_ci	sld	r8,r0,r10
1218c2ecf20Sopenharmony_ci	or	r7,r7,r6
1228c2ecf20Sopenharmony_ci	blt	cr6,4f
1238c2ecf20Sopenharmony_ci	ld	r0,8(r4)
1248c2ecf20Sopenharmony_ci	# s1<< in r8, d0=(s0<<|s1>>) in r7, s3 in r0, s2 in r9, nix in r6 & r12
1258c2ecf20Sopenharmony_ci	b	2f
1268c2ecf20Sopenharmony_ci
1278c2ecf20Sopenharmony_ci0:	ld	r0,0(r4)	# 4+2n loads, 3+2n stores
1288c2ecf20Sopenharmony_ci	ldu	r9,8(r4)
1298c2ecf20Sopenharmony_ci	sld	r8,r0,r10
1308c2ecf20Sopenharmony_ci	addi	r3,r3,-8
1318c2ecf20Sopenharmony_ci	blt	cr6,5f
1328c2ecf20Sopenharmony_ci	ld	r0,8(r4)
1338c2ecf20Sopenharmony_ci	srd	r12,r9,r11
1348c2ecf20Sopenharmony_ci	sld	r6,r9,r10
1358c2ecf20Sopenharmony_ci	ldu	r9,16(r4)
1368c2ecf20Sopenharmony_ci	or	r12,r8,r12
1378c2ecf20Sopenharmony_ci	srd	r7,r0,r11
1388c2ecf20Sopenharmony_ci	sld	r8,r0,r10
1398c2ecf20Sopenharmony_ci	addi	r3,r3,16
1408c2ecf20Sopenharmony_ci	beq	cr6,3f
1418c2ecf20Sopenharmony_ci
1428c2ecf20Sopenharmony_ci	# d0=(s0<<|s1>>) in r12, s1<< in r6, s2>> in r7, s2<< in r8, s3 in r9
1438c2ecf20Sopenharmony_ci1:	or	r7,r7,r6
1448c2ecf20Sopenharmony_ci	ld	r0,8(r4)
1458c2ecf20Sopenharmony_ci	std	r12,8(r3)
1468c2ecf20Sopenharmony_ci2:	srd	r12,r9,r11
1478c2ecf20Sopenharmony_ci	sld	r6,r9,r10
1488c2ecf20Sopenharmony_ci	ldu	r9,16(r4)
1498c2ecf20Sopenharmony_ci	or	r12,r8,r12
1508c2ecf20Sopenharmony_ci	stdu	r7,16(r3)
1518c2ecf20Sopenharmony_ci	srd	r7,r0,r11
1528c2ecf20Sopenharmony_ci	sld	r8,r0,r10
1538c2ecf20Sopenharmony_ci	bdnz	1b
1548c2ecf20Sopenharmony_ci
1558c2ecf20Sopenharmony_ci3:	std	r12,8(r3)
1568c2ecf20Sopenharmony_ci	or	r7,r7,r6
1578c2ecf20Sopenharmony_ci4:	std	r7,16(r3)
1588c2ecf20Sopenharmony_ci5:	srd	r12,r9,r11
1598c2ecf20Sopenharmony_ci	or	r12,r8,r12
1608c2ecf20Sopenharmony_ci	std	r12,24(r3)
1618c2ecf20Sopenharmony_ci	beq	4f
1628c2ecf20Sopenharmony_ci	cmpwi	cr1,r5,8
1638c2ecf20Sopenharmony_ci	addi	r3,r3,32
1648c2ecf20Sopenharmony_ci	sld	r9,r9,r10
1658c2ecf20Sopenharmony_ci	ble	cr1,6f
1668c2ecf20Sopenharmony_ci	ld	r0,8(r4)
1678c2ecf20Sopenharmony_ci	srd	r7,r0,r11
1688c2ecf20Sopenharmony_ci	or	r9,r7,r9
1698c2ecf20Sopenharmony_ci6:
1708c2ecf20Sopenharmony_ci	bf	cr7*4+1,1f
1718c2ecf20Sopenharmony_ci	rotldi	r9,r9,32
1728c2ecf20Sopenharmony_ci	stw	r9,0(r3)
1738c2ecf20Sopenharmony_ci	addi	r3,r3,4
1748c2ecf20Sopenharmony_ci1:	bf	cr7*4+2,2f
1758c2ecf20Sopenharmony_ci	rotldi	r9,r9,16
1768c2ecf20Sopenharmony_ci	sth	r9,0(r3)
1778c2ecf20Sopenharmony_ci	addi	r3,r3,2
1788c2ecf20Sopenharmony_ci2:	bf	cr7*4+3,3f
1798c2ecf20Sopenharmony_ci	rotldi	r9,r9,8
1808c2ecf20Sopenharmony_ci	stb	r9,0(r3)
1818c2ecf20Sopenharmony_ci3:	ld	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)	/* return dest pointer */
1828c2ecf20Sopenharmony_ci	blr
1838c2ecf20Sopenharmony_ci
1848c2ecf20Sopenharmony_ci.Ldst_unaligned:
1858c2ecf20Sopenharmony_ci	PPC_MTOCRF(0x01,r6)		# put #bytes to 8B bdry into cr7
1868c2ecf20Sopenharmony_ci	subf	r5,r6,r5
1878c2ecf20Sopenharmony_ci	li	r7,0
1888c2ecf20Sopenharmony_ci	cmpldi	cr1,r5,16
1898c2ecf20Sopenharmony_ci	bf	cr7*4+3,1f
1908c2ecf20Sopenharmony_ci	lbz	r0,0(r4)
1918c2ecf20Sopenharmony_ci	stb	r0,0(r3)
1928c2ecf20Sopenharmony_ci	addi	r7,r7,1
1938c2ecf20Sopenharmony_ci1:	bf	cr7*4+2,2f
1948c2ecf20Sopenharmony_ci	lhzx	r0,r7,r4
1958c2ecf20Sopenharmony_ci	sthx	r0,r7,r3
1968c2ecf20Sopenharmony_ci	addi	r7,r7,2
1978c2ecf20Sopenharmony_ci2:	bf	cr7*4+1,3f
1988c2ecf20Sopenharmony_ci	lwzx	r0,r7,r4
1998c2ecf20Sopenharmony_ci	stwx	r0,r7,r3
2008c2ecf20Sopenharmony_ci3:	PPC_MTOCRF(0x01,r5)
2018c2ecf20Sopenharmony_ci	add	r4,r6,r4
2028c2ecf20Sopenharmony_ci	add	r3,r6,r3
2038c2ecf20Sopenharmony_ci	b	.Ldst_aligned
2048c2ecf20Sopenharmony_ci
2058c2ecf20Sopenharmony_ci.Lshort_copy:
2068c2ecf20Sopenharmony_ci	bf	cr7*4+0,1f
2078c2ecf20Sopenharmony_ci	lwz	r0,0(r4)
2088c2ecf20Sopenharmony_ci	lwz	r9,4(r4)
2098c2ecf20Sopenharmony_ci	addi	r4,r4,8
2108c2ecf20Sopenharmony_ci	stw	r0,0(r3)
2118c2ecf20Sopenharmony_ci	stw	r9,4(r3)
2128c2ecf20Sopenharmony_ci	addi	r3,r3,8
2138c2ecf20Sopenharmony_ci1:	bf	cr7*4+1,2f
2148c2ecf20Sopenharmony_ci	lwz	r0,0(r4)
2158c2ecf20Sopenharmony_ci	addi	r4,r4,4
2168c2ecf20Sopenharmony_ci	stw	r0,0(r3)
2178c2ecf20Sopenharmony_ci	addi	r3,r3,4
2188c2ecf20Sopenharmony_ci2:	bf	cr7*4+2,3f
2198c2ecf20Sopenharmony_ci	lhz	r0,0(r4)
2208c2ecf20Sopenharmony_ci	addi	r4,r4,2
2218c2ecf20Sopenharmony_ci	sth	r0,0(r3)
2228c2ecf20Sopenharmony_ci	addi	r3,r3,2
2238c2ecf20Sopenharmony_ci3:	bf	cr7*4+3,4f
2248c2ecf20Sopenharmony_ci	lbz	r0,0(r4)
2258c2ecf20Sopenharmony_ci	stb	r0,0(r3)
2268c2ecf20Sopenharmony_ci4:	ld	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)	/* return dest pointer */
2278c2ecf20Sopenharmony_ci	blr
2288c2ecf20Sopenharmony_ci#endif
2298c2ecf20Sopenharmony_ciEXPORT_SYMBOL(memcpy)
2308c2ecf20Sopenharmony_ciEXPORT_SYMBOL_KASAN(memcpy)
231