1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /*
3  * Copyright (C) 2002 Paul Mackerras, IBM Corp.
4  */
5 #include <linux/export.h>
6 #include <asm/processor.h>
7 #include <asm/ppc_asm.h>
8 #include <asm/asm-compat.h>
9 #include <asm/feature-fixups.h>
10 #include <asm/kasan.h>
11 
12 #ifndef SELFTEST_CASE
13 /* For big-endian, 0 == most CPUs, 1 == POWER6, 2 == Cell */
14 #define SELFTEST_CASE	0
15 #endif
16 
17 	.align	7
18 _GLOBAL_TOC_KASAN(memcpy)
19 BEGIN_FTR_SECTION
20 #ifdef __LITTLE_ENDIAN__
21 	cmpdi	cr7,r5,0
22 #else
23 	std	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)	/* save destination pointer for return value */
24 #endif
25 FTR_SECTION_ELSE
26 #ifdef CONFIG_PPC_BOOK3S_64
27 	b	memcpy_power7
28 #endif
29 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
30 #ifdef __LITTLE_ENDIAN__
31 	/* dumb little-endian memcpy that will get replaced at runtime */
32 	addi r9,r3,-1
33 	addi r4,r4,-1
34 	beqlr cr7
35 	mtctr r5
36 1:	lbzu r10,1(r4)
37 	stbu r10,1(r9)
38 	bdnz 1b
39 	blr
40 #else
41 	PPC_MTOCRF(0x01,r5)
42 	cmpldi	cr1,r5,16
43 	neg	r6,r3		# LS 3 bits = # bytes to 8-byte dest bdry
44 	andi.	r6,r6,7
45 	dcbt	0,r4
46 	blt	cr1,.Lshort_copy
47 /* Below we want to nop out the bne if we're on a CPU that has the
48    CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
49    cleared.
50    At the time of writing the only CPU that has this combination of bits
51    set is Power6. */
52 test_feature = (SELFTEST_CASE == 1)
53 BEGIN_FTR_SECTION
54 	nop
55 FTR_SECTION_ELSE
56 	bne	.Ldst_unaligned
57 ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
58                     CPU_FTR_UNALIGNED_LD_STD)
59 .Ldst_aligned:
60 	addi	r3,r3,-16
61 test_feature = (SELFTEST_CASE == 0)
62 BEGIN_FTR_SECTION
63 	andi.	r0,r4,7
64 	bne	.Lsrc_unaligned
65 END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
66 	srdi	r7,r5,4
67 	ld	r9,0(r4)
68 	addi	r4,r4,-8
69 	mtctr	r7
70 	andi.	r5,r5,7
71 	bf	cr7*4+0,2f
72 	addi	r3,r3,8
73 	addi	r4,r4,8
74 	mr	r8,r9
75 	blt	cr1,3f
76 1:	ld	r9,8(r4)
77 	std	r8,8(r3)
78 2:	ldu	r8,16(r4)
79 	stdu	r9,16(r3)
80 	bdnz	1b
81 3:	std	r8,8(r3)
82 	beq	3f
83 	addi	r3,r3,16
84 .Ldo_tail:
85 	bf	cr7*4+1,1f
86 	lwz	r9,8(r4)
87 	addi	r4,r4,4
88 	stw	r9,0(r3)
89 	addi	r3,r3,4
90 1:	bf	cr7*4+2,2f
91 	lhz	r9,8(r4)
92 	addi	r4,r4,2
93 	sth	r9,0(r3)
94 	addi	r3,r3,2
95 2:	bf	cr7*4+3,3f
96 	lbz	r9,8(r4)
97 	stb	r9,0(r3)
98 3:	ld	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)	/* return dest pointer */
99 	blr
100 
101 .Lsrc_unaligned:
102 	srdi	r6,r5,3
103 	addi	r5,r5,-16
104 	subf	r4,r0,r4
105 	srdi	r7,r5,4
106 	sldi	r10,r0,3
107 	cmpdi	cr6,r6,3
108 	andi.	r5,r5,7
109 	mtctr	r7
110 	subfic	r11,r10,64
111 	add	r5,r5,r0
112 
113 	bt	cr7*4+0,0f
114 
115 	ld	r9,0(r4)	# 3+2n loads, 2+2n stores
116 	ld	r0,8(r4)
117 	sld	r6,r9,r10
118 	ldu	r9,16(r4)
119 	srd	r7,r0,r11
120 	sld	r8,r0,r10
121 	or	r7,r7,r6
122 	blt	cr6,4f
123 	ld	r0,8(r4)
124 	# s1<< in r8, d0=(s0<<|s1>>) in r7, s3 in r0, s2 in r9, nix in r6 & r12
125 	b	2f
126 
127 0:	ld	r0,0(r4)	# 4+2n loads, 3+2n stores
128 	ldu	r9,8(r4)
129 	sld	r8,r0,r10
130 	addi	r3,r3,-8
131 	blt	cr6,5f
132 	ld	r0,8(r4)
133 	srd	r12,r9,r11
134 	sld	r6,r9,r10
135 	ldu	r9,16(r4)
136 	or	r12,r8,r12
137 	srd	r7,r0,r11
138 	sld	r8,r0,r10
139 	addi	r3,r3,16
140 	beq	cr6,3f
141 
142 	# d0=(s0<<|s1>>) in r12, s1<< in r6, s2>> in r7, s2<< in r8, s3 in r9
143 1:	or	r7,r7,r6
144 	ld	r0,8(r4)
145 	std	r12,8(r3)
146 2:	srd	r12,r9,r11
147 	sld	r6,r9,r10
148 	ldu	r9,16(r4)
149 	or	r12,r8,r12
150 	stdu	r7,16(r3)
151 	srd	r7,r0,r11
152 	sld	r8,r0,r10
153 	bdnz	1b
154 
155 3:	std	r12,8(r3)
156 	or	r7,r7,r6
157 4:	std	r7,16(r3)
158 5:	srd	r12,r9,r11
159 	or	r12,r8,r12
160 	std	r12,24(r3)
161 	beq	4f
162 	cmpwi	cr1,r5,8
163 	addi	r3,r3,32
164 	sld	r9,r9,r10
165 	ble	cr1,6f
166 	ld	r0,8(r4)
167 	srd	r7,r0,r11
168 	or	r9,r7,r9
169 6:
170 	bf	cr7*4+1,1f
171 	rotldi	r9,r9,32
172 	stw	r9,0(r3)
173 	addi	r3,r3,4
174 1:	bf	cr7*4+2,2f
175 	rotldi	r9,r9,16
176 	sth	r9,0(r3)
177 	addi	r3,r3,2
178 2:	bf	cr7*4+3,3f
179 	rotldi	r9,r9,8
180 	stb	r9,0(r3)
181 3:	ld	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)	/* return dest pointer */
182 	blr
183 
184 .Ldst_unaligned:
185 	PPC_MTOCRF(0x01,r6)		# put #bytes to 8B bdry into cr7
186 	subf	r5,r6,r5
187 	li	r7,0
188 	cmpldi	cr1,r5,16
189 	bf	cr7*4+3,1f
190 	lbz	r0,0(r4)
191 	stb	r0,0(r3)
192 	addi	r7,r7,1
193 1:	bf	cr7*4+2,2f
194 	lhzx	r0,r7,r4
195 	sthx	r0,r7,r3
196 	addi	r7,r7,2
197 2:	bf	cr7*4+1,3f
198 	lwzx	r0,r7,r4
199 	stwx	r0,r7,r3
200 3:	PPC_MTOCRF(0x01,r5)
201 	add	r4,r6,r4
202 	add	r3,r6,r3
203 	b	.Ldst_aligned
204 
205 .Lshort_copy:
206 	bf	cr7*4+0,1f
207 	lwz	r0,0(r4)
208 	lwz	r9,4(r4)
209 	addi	r4,r4,8
210 	stw	r0,0(r3)
211 	stw	r9,4(r3)
212 	addi	r3,r3,8
213 1:	bf	cr7*4+1,2f
214 	lwz	r0,0(r4)
215 	addi	r4,r4,4
216 	stw	r0,0(r3)
217 	addi	r3,r3,4
218 2:	bf	cr7*4+2,3f
219 	lhz	r0,0(r4)
220 	addi	r4,r4,2
221 	sth	r0,0(r3)
222 	addi	r3,r3,2
223 3:	bf	cr7*4+3,4f
224 	lbz	r0,0(r4)
225 	stb	r0,0(r3)
226 4:	ld	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)	/* return dest pointer */
227 	blr
228 #endif
229 EXPORT_SYMBOL(memcpy)
230 EXPORT_SYMBOL_KASAN(memcpy)
231