162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-or-later */
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Copyright (C) 2002 Paul Mackerras, IBM Corp.
462306a36Sopenharmony_ci */
562306a36Sopenharmony_ci#include <linux/export.h>
662306a36Sopenharmony_ci#include <asm/processor.h>
762306a36Sopenharmony_ci#include <asm/ppc_asm.h>
862306a36Sopenharmony_ci#include <asm/asm-compat.h>
962306a36Sopenharmony_ci#include <asm/feature-fixups.h>
1062306a36Sopenharmony_ci#include <asm/kasan.h>
1162306a36Sopenharmony_ci
1262306a36Sopenharmony_ci#ifndef SELFTEST_CASE
1362306a36Sopenharmony_ci/* For big-endian, 0 == most CPUs, 1 == POWER6, 2 == Cell */
1462306a36Sopenharmony_ci#define SELFTEST_CASE	0
1562306a36Sopenharmony_ci#endif
1662306a36Sopenharmony_ci
1762306a36Sopenharmony_ci	.align	7
1862306a36Sopenharmony_ci_GLOBAL_TOC_KASAN(memcpy)
1962306a36Sopenharmony_ciBEGIN_FTR_SECTION
2062306a36Sopenharmony_ci#ifdef __LITTLE_ENDIAN__
2162306a36Sopenharmony_ci	cmpdi	cr7,r5,0
2262306a36Sopenharmony_ci#else
2362306a36Sopenharmony_ci	std	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)	/* save destination pointer for return value */
2462306a36Sopenharmony_ci#endif
2562306a36Sopenharmony_ciFTR_SECTION_ELSE
2662306a36Sopenharmony_ci#ifdef CONFIG_PPC_BOOK3S_64
2762306a36Sopenharmony_ci	b	memcpy_power7
2862306a36Sopenharmony_ci#endif
2962306a36Sopenharmony_ciALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
3062306a36Sopenharmony_ci#ifdef __LITTLE_ENDIAN__
3162306a36Sopenharmony_ci	/* dumb little-endian memcpy that will get replaced at runtime */
3262306a36Sopenharmony_ci	addi r9,r3,-1
3362306a36Sopenharmony_ci	addi r4,r4,-1
3462306a36Sopenharmony_ci	beqlr cr7
3562306a36Sopenharmony_ci	mtctr r5
3662306a36Sopenharmony_ci1:	lbzu r10,1(r4)
3762306a36Sopenharmony_ci	stbu r10,1(r9)
3862306a36Sopenharmony_ci	bdnz 1b
3962306a36Sopenharmony_ci	blr
4062306a36Sopenharmony_ci#else
4162306a36Sopenharmony_ci	PPC_MTOCRF(0x01,r5)
4262306a36Sopenharmony_ci	cmpldi	cr1,r5,16
4362306a36Sopenharmony_ci	neg	r6,r3		# LS 3 bits = # bytes to 8-byte dest bdry
4462306a36Sopenharmony_ci	andi.	r6,r6,7
4562306a36Sopenharmony_ci	dcbt	0,r4
4662306a36Sopenharmony_ci	blt	cr1,.Lshort_copy
4762306a36Sopenharmony_ci/* Below we want to nop out the bne if we're on a CPU that has the
4862306a36Sopenharmony_ci   CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
4962306a36Sopenharmony_ci   cleared.
5062306a36Sopenharmony_ci   At the time of writing the only CPU that has this combination of bits
5162306a36Sopenharmony_ci   set is Power6. */
5262306a36Sopenharmony_citest_feature = (SELFTEST_CASE == 1)
5362306a36Sopenharmony_ciBEGIN_FTR_SECTION
5462306a36Sopenharmony_ci	nop
5562306a36Sopenharmony_ciFTR_SECTION_ELSE
5662306a36Sopenharmony_ci	bne	.Ldst_unaligned
5762306a36Sopenharmony_ciALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
5862306a36Sopenharmony_ci                    CPU_FTR_UNALIGNED_LD_STD)
5962306a36Sopenharmony_ci.Ldst_aligned:
6062306a36Sopenharmony_ci	addi	r3,r3,-16
6162306a36Sopenharmony_citest_feature = (SELFTEST_CASE == 0)
6262306a36Sopenharmony_ciBEGIN_FTR_SECTION
6362306a36Sopenharmony_ci	andi.	r0,r4,7
6462306a36Sopenharmony_ci	bne	.Lsrc_unaligned
6562306a36Sopenharmony_ciEND_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
6662306a36Sopenharmony_ci	srdi	r7,r5,4
6762306a36Sopenharmony_ci	ld	r9,0(r4)
6862306a36Sopenharmony_ci	addi	r4,r4,-8
6962306a36Sopenharmony_ci	mtctr	r7
7062306a36Sopenharmony_ci	andi.	r5,r5,7
7162306a36Sopenharmony_ci	bf	cr7*4+0,2f
7262306a36Sopenharmony_ci	addi	r3,r3,8
7362306a36Sopenharmony_ci	addi	r4,r4,8
7462306a36Sopenharmony_ci	mr	r8,r9
7562306a36Sopenharmony_ci	blt	cr1,3f
7662306a36Sopenharmony_ci1:	ld	r9,8(r4)
7762306a36Sopenharmony_ci	std	r8,8(r3)
7862306a36Sopenharmony_ci2:	ldu	r8,16(r4)
7962306a36Sopenharmony_ci	stdu	r9,16(r3)
8062306a36Sopenharmony_ci	bdnz	1b
8162306a36Sopenharmony_ci3:	std	r8,8(r3)
8262306a36Sopenharmony_ci	beq	3f
8362306a36Sopenharmony_ci	addi	r3,r3,16
8462306a36Sopenharmony_ci.Ldo_tail:
8562306a36Sopenharmony_ci	bf	cr7*4+1,1f
8662306a36Sopenharmony_ci	lwz	r9,8(r4)
8762306a36Sopenharmony_ci	addi	r4,r4,4
8862306a36Sopenharmony_ci	stw	r9,0(r3)
8962306a36Sopenharmony_ci	addi	r3,r3,4
9062306a36Sopenharmony_ci1:	bf	cr7*4+2,2f
9162306a36Sopenharmony_ci	lhz	r9,8(r4)
9262306a36Sopenharmony_ci	addi	r4,r4,2
9362306a36Sopenharmony_ci	sth	r9,0(r3)
9462306a36Sopenharmony_ci	addi	r3,r3,2
9562306a36Sopenharmony_ci2:	bf	cr7*4+3,3f
9662306a36Sopenharmony_ci	lbz	r9,8(r4)
9762306a36Sopenharmony_ci	stb	r9,0(r3)
9862306a36Sopenharmony_ci3:	ld	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)	/* return dest pointer */
9962306a36Sopenharmony_ci	blr
10062306a36Sopenharmony_ci
10162306a36Sopenharmony_ci.Lsrc_unaligned:
10262306a36Sopenharmony_ci	srdi	r6,r5,3
10362306a36Sopenharmony_ci	addi	r5,r5,-16
10462306a36Sopenharmony_ci	subf	r4,r0,r4
10562306a36Sopenharmony_ci	srdi	r7,r5,4
10662306a36Sopenharmony_ci	sldi	r10,r0,3
10762306a36Sopenharmony_ci	cmpdi	cr6,r6,3
10862306a36Sopenharmony_ci	andi.	r5,r5,7
10962306a36Sopenharmony_ci	mtctr	r7
11062306a36Sopenharmony_ci	subfic	r11,r10,64
11162306a36Sopenharmony_ci	add	r5,r5,r0
11262306a36Sopenharmony_ci
11362306a36Sopenharmony_ci	bt	cr7*4+0,0f
11462306a36Sopenharmony_ci
11562306a36Sopenharmony_ci	ld	r9,0(r4)	# 3+2n loads, 2+2n stores
11662306a36Sopenharmony_ci	ld	r0,8(r4)
11762306a36Sopenharmony_ci	sld	r6,r9,r10
11862306a36Sopenharmony_ci	ldu	r9,16(r4)
11962306a36Sopenharmony_ci	srd	r7,r0,r11
12062306a36Sopenharmony_ci	sld	r8,r0,r10
12162306a36Sopenharmony_ci	or	r7,r7,r6
12262306a36Sopenharmony_ci	blt	cr6,4f
12362306a36Sopenharmony_ci	ld	r0,8(r4)
12462306a36Sopenharmony_ci	# s1<< in r8, d0=(s0<<|s1>>) in r7, s3 in r0, s2 in r9, nix in r6 & r12
12562306a36Sopenharmony_ci	b	2f
12662306a36Sopenharmony_ci
12762306a36Sopenharmony_ci0:	ld	r0,0(r4)	# 4+2n loads, 3+2n stores
12862306a36Sopenharmony_ci	ldu	r9,8(r4)
12962306a36Sopenharmony_ci	sld	r8,r0,r10
13062306a36Sopenharmony_ci	addi	r3,r3,-8
13162306a36Sopenharmony_ci	blt	cr6,5f
13262306a36Sopenharmony_ci	ld	r0,8(r4)
13362306a36Sopenharmony_ci	srd	r12,r9,r11
13462306a36Sopenharmony_ci	sld	r6,r9,r10
13562306a36Sopenharmony_ci	ldu	r9,16(r4)
13662306a36Sopenharmony_ci	or	r12,r8,r12
13762306a36Sopenharmony_ci	srd	r7,r0,r11
13862306a36Sopenharmony_ci	sld	r8,r0,r10
13962306a36Sopenharmony_ci	addi	r3,r3,16
14062306a36Sopenharmony_ci	beq	cr6,3f
14162306a36Sopenharmony_ci
14262306a36Sopenharmony_ci	# d0=(s0<<|s1>>) in r12, s1<< in r6, s2>> in r7, s2<< in r8, s3 in r9
14362306a36Sopenharmony_ci1:	or	r7,r7,r6
14462306a36Sopenharmony_ci	ld	r0,8(r4)
14562306a36Sopenharmony_ci	std	r12,8(r3)
14662306a36Sopenharmony_ci2:	srd	r12,r9,r11
14762306a36Sopenharmony_ci	sld	r6,r9,r10
14862306a36Sopenharmony_ci	ldu	r9,16(r4)
14962306a36Sopenharmony_ci	or	r12,r8,r12
15062306a36Sopenharmony_ci	stdu	r7,16(r3)
15162306a36Sopenharmony_ci	srd	r7,r0,r11
15262306a36Sopenharmony_ci	sld	r8,r0,r10
15362306a36Sopenharmony_ci	bdnz	1b
15462306a36Sopenharmony_ci
15562306a36Sopenharmony_ci3:	std	r12,8(r3)
15662306a36Sopenharmony_ci	or	r7,r7,r6
15762306a36Sopenharmony_ci4:	std	r7,16(r3)
15862306a36Sopenharmony_ci5:	srd	r12,r9,r11
15962306a36Sopenharmony_ci	or	r12,r8,r12
16062306a36Sopenharmony_ci	std	r12,24(r3)
16162306a36Sopenharmony_ci	beq	4f
16262306a36Sopenharmony_ci	cmpwi	cr1,r5,8
16362306a36Sopenharmony_ci	addi	r3,r3,32
16462306a36Sopenharmony_ci	sld	r9,r9,r10
16562306a36Sopenharmony_ci	ble	cr1,6f
16662306a36Sopenharmony_ci	ld	r0,8(r4)
16762306a36Sopenharmony_ci	srd	r7,r0,r11
16862306a36Sopenharmony_ci	or	r9,r7,r9
16962306a36Sopenharmony_ci6:
17062306a36Sopenharmony_ci	bf	cr7*4+1,1f
17162306a36Sopenharmony_ci	rotldi	r9,r9,32
17262306a36Sopenharmony_ci	stw	r9,0(r3)
17362306a36Sopenharmony_ci	addi	r3,r3,4
17462306a36Sopenharmony_ci1:	bf	cr7*4+2,2f
17562306a36Sopenharmony_ci	rotldi	r9,r9,16
17662306a36Sopenharmony_ci	sth	r9,0(r3)
17762306a36Sopenharmony_ci	addi	r3,r3,2
17862306a36Sopenharmony_ci2:	bf	cr7*4+3,3f
17962306a36Sopenharmony_ci	rotldi	r9,r9,8
18062306a36Sopenharmony_ci	stb	r9,0(r3)
18162306a36Sopenharmony_ci3:	ld	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)	/* return dest pointer */
18262306a36Sopenharmony_ci	blr
18362306a36Sopenharmony_ci
18462306a36Sopenharmony_ci.Ldst_unaligned:
18562306a36Sopenharmony_ci	PPC_MTOCRF(0x01,r6)		# put #bytes to 8B bdry into cr7
18662306a36Sopenharmony_ci	subf	r5,r6,r5
18762306a36Sopenharmony_ci	li	r7,0
18862306a36Sopenharmony_ci	cmpldi	cr1,r5,16
18962306a36Sopenharmony_ci	bf	cr7*4+3,1f
19062306a36Sopenharmony_ci	lbz	r0,0(r4)
19162306a36Sopenharmony_ci	stb	r0,0(r3)
19262306a36Sopenharmony_ci	addi	r7,r7,1
19362306a36Sopenharmony_ci1:	bf	cr7*4+2,2f
19462306a36Sopenharmony_ci	lhzx	r0,r7,r4
19562306a36Sopenharmony_ci	sthx	r0,r7,r3
19662306a36Sopenharmony_ci	addi	r7,r7,2
19762306a36Sopenharmony_ci2:	bf	cr7*4+1,3f
19862306a36Sopenharmony_ci	lwzx	r0,r7,r4
19962306a36Sopenharmony_ci	stwx	r0,r7,r3
20062306a36Sopenharmony_ci3:	PPC_MTOCRF(0x01,r5)
20162306a36Sopenharmony_ci	add	r4,r6,r4
20262306a36Sopenharmony_ci	add	r3,r6,r3
20362306a36Sopenharmony_ci	b	.Ldst_aligned
20462306a36Sopenharmony_ci
20562306a36Sopenharmony_ci.Lshort_copy:
20662306a36Sopenharmony_ci	bf	cr7*4+0,1f
20762306a36Sopenharmony_ci	lwz	r0,0(r4)
20862306a36Sopenharmony_ci	lwz	r9,4(r4)
20962306a36Sopenharmony_ci	addi	r4,r4,8
21062306a36Sopenharmony_ci	stw	r0,0(r3)
21162306a36Sopenharmony_ci	stw	r9,4(r3)
21262306a36Sopenharmony_ci	addi	r3,r3,8
21362306a36Sopenharmony_ci1:	bf	cr7*4+1,2f
21462306a36Sopenharmony_ci	lwz	r0,0(r4)
21562306a36Sopenharmony_ci	addi	r4,r4,4
21662306a36Sopenharmony_ci	stw	r0,0(r3)
21762306a36Sopenharmony_ci	addi	r3,r3,4
21862306a36Sopenharmony_ci2:	bf	cr7*4+2,3f
21962306a36Sopenharmony_ci	lhz	r0,0(r4)
22062306a36Sopenharmony_ci	addi	r4,r4,2
22162306a36Sopenharmony_ci	sth	r0,0(r3)
22262306a36Sopenharmony_ci	addi	r3,r3,2
22362306a36Sopenharmony_ci3:	bf	cr7*4+3,4f
22462306a36Sopenharmony_ci	lbz	r0,0(r4)
22562306a36Sopenharmony_ci	stb	r0,0(r3)
22662306a36Sopenharmony_ci4:	ld	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)	/* return dest pointer */
22762306a36Sopenharmony_ci	blr
22862306a36Sopenharmony_ci#endif
22962306a36Sopenharmony_ciEXPORT_SYMBOL(memcpy)
23062306a36Sopenharmony_ciEXPORT_SYMBOL_KASAN(memcpy)
231