1 /* SPDX-License-Identifier: GPL-2.0 */
2 /*
3  * Copyright (C) IBM Corporation, 2011
4  * Derived from copyuser_power7.s by Anton Blanchard <anton@au.ibm.com>
5  * Author - Balbir Singh <bsingharora@gmail.com>
6  */
7 #include <linux/export.h>
8 #include <asm/ppc_asm.h>
9 #include <asm/errno.h>
10 
11 	.macro err1
12 100:
13 	EX_TABLE(100b,.Ldo_err1)
14 	.endm
15 
16 	.macro err2
17 200:
18 	EX_TABLE(200b,.Ldo_err2)
19 	.endm
20 
21 	.macro err3
22 300:	EX_TABLE(300b,.Ldone)
23 	.endm
24 
25 .Ldo_err2:
26 	ld	r22,STK_REG(R22)(r1)
27 	ld	r21,STK_REG(R21)(r1)
28 	ld	r20,STK_REG(R20)(r1)
29 	ld	r19,STK_REG(R19)(r1)
30 	ld	r18,STK_REG(R18)(r1)
31 	ld	r17,STK_REG(R17)(r1)
32 	ld	r16,STK_REG(R16)(r1)
33 	ld	r15,STK_REG(R15)(r1)
34 	ld	r14,STK_REG(R14)(r1)
35 	addi	r1,r1,STACKFRAMESIZE
36 .Ldo_err1:
37 	/* Do a byte by byte copy to get the exact remaining size */
38 	mtctr	r7
39 46:
40 err3;	lbz	r0,0(r4)
41 	addi	r4,r4,1
42 err3;	stb	r0,0(r3)
43 	addi	r3,r3,1
44 	bdnz	46b
45 	li	r3,0
46 	blr
47 
48 .Ldone:
49 	mfctr	r3
50 	blr
51 
52 
53 _GLOBAL(copy_mc_generic)
54 	mr	r7,r5
55 	cmpldi	r5,16
56 	blt	.Lshort_copy
57 
58 .Lcopy:
59 	/* Get the source 8B aligned */
60 	neg	r6,r4
61 	mtocrf	0x01,r6
62 	clrldi	r6,r6,(64-3)
63 
64 	bf	cr7*4+3,1f
65 err1;	lbz	r0,0(r4)
66 	addi	r4,r4,1
67 err1;	stb	r0,0(r3)
68 	addi	r3,r3,1
69 	subi	r7,r7,1
70 
71 1:	bf	cr7*4+2,2f
72 err1;	lhz	r0,0(r4)
73 	addi	r4,r4,2
74 err1;	sth	r0,0(r3)
75 	addi	r3,r3,2
76 	subi	r7,r7,2
77 
78 2:	bf	cr7*4+1,3f
79 err1;	lwz	r0,0(r4)
80 	addi	r4,r4,4
81 err1;	stw	r0,0(r3)
82 	addi	r3,r3,4
83 	subi	r7,r7,4
84 
85 3:	sub	r5,r5,r6
86 	cmpldi	r5,128
87 
88 	mflr	r0
89 	stdu	r1,-STACKFRAMESIZE(r1)
90 	std	r14,STK_REG(R14)(r1)
91 	std	r15,STK_REG(R15)(r1)
92 	std	r16,STK_REG(R16)(r1)
93 	std	r17,STK_REG(R17)(r1)
94 	std	r18,STK_REG(R18)(r1)
95 	std	r19,STK_REG(R19)(r1)
96 	std	r20,STK_REG(R20)(r1)
97 	std	r21,STK_REG(R21)(r1)
98 	std	r22,STK_REG(R22)(r1)
99 	std	r0,STACKFRAMESIZE+16(r1)
100 
101 	blt	5f
102 	srdi	r6,r5,7
103 	mtctr	r6
104 
105 	/* Now do cacheline (128B) sized loads and stores. */
106 	.align	5
107 4:
108 err2;	ld	r0,0(r4)
109 err2;	ld	r6,8(r4)
110 err2;	ld	r8,16(r4)
111 err2;	ld	r9,24(r4)
112 err2;	ld	r10,32(r4)
113 err2;	ld	r11,40(r4)
114 err2;	ld	r12,48(r4)
115 err2;	ld	r14,56(r4)
116 err2;	ld	r15,64(r4)
117 err2;	ld	r16,72(r4)
118 err2;	ld	r17,80(r4)
119 err2;	ld	r18,88(r4)
120 err2;	ld	r19,96(r4)
121 err2;	ld	r20,104(r4)
122 err2;	ld	r21,112(r4)
123 err2;	ld	r22,120(r4)
124 	addi	r4,r4,128
125 err2;	std	r0,0(r3)
126 err2;	std	r6,8(r3)
127 err2;	std	r8,16(r3)
128 err2;	std	r9,24(r3)
129 err2;	std	r10,32(r3)
130 err2;	std	r11,40(r3)
131 err2;	std	r12,48(r3)
132 err2;	std	r14,56(r3)
133 err2;	std	r15,64(r3)
134 err2;	std	r16,72(r3)
135 err2;	std	r17,80(r3)
136 err2;	std	r18,88(r3)
137 err2;	std	r19,96(r3)
138 err2;	std	r20,104(r3)
139 err2;	std	r21,112(r3)
140 err2;	std	r22,120(r3)
141 	addi	r3,r3,128
142 	subi	r7,r7,128
143 	bdnz	4b
144 
145 	clrldi	r5,r5,(64-7)
146 
147 	/* Up to 127B to go */
148 5:	srdi	r6,r5,4
149 	mtocrf	0x01,r6
150 
151 6:	bf	cr7*4+1,7f
152 err2;	ld	r0,0(r4)
153 err2;	ld	r6,8(r4)
154 err2;	ld	r8,16(r4)
155 err2;	ld	r9,24(r4)
156 err2;	ld	r10,32(r4)
157 err2;	ld	r11,40(r4)
158 err2;	ld	r12,48(r4)
159 err2;	ld	r14,56(r4)
160 	addi	r4,r4,64
161 err2;	std	r0,0(r3)
162 err2;	std	r6,8(r3)
163 err2;	std	r8,16(r3)
164 err2;	std	r9,24(r3)
165 err2;	std	r10,32(r3)
166 err2;	std	r11,40(r3)
167 err2;	std	r12,48(r3)
168 err2;	std	r14,56(r3)
169 	addi	r3,r3,64
170 	subi	r7,r7,64
171 
172 7:	ld	r14,STK_REG(R14)(r1)
173 	ld	r15,STK_REG(R15)(r1)
174 	ld	r16,STK_REG(R16)(r1)
175 	ld	r17,STK_REG(R17)(r1)
176 	ld	r18,STK_REG(R18)(r1)
177 	ld	r19,STK_REG(R19)(r1)
178 	ld	r20,STK_REG(R20)(r1)
179 	ld	r21,STK_REG(R21)(r1)
180 	ld	r22,STK_REG(R22)(r1)
181 	addi	r1,r1,STACKFRAMESIZE
182 
183 	/* Up to 63B to go */
184 	bf	cr7*4+2,8f
185 err1;	ld	r0,0(r4)
186 err1;	ld	r6,8(r4)
187 err1;	ld	r8,16(r4)
188 err1;	ld	r9,24(r4)
189 	addi	r4,r4,32
190 err1;	std	r0,0(r3)
191 err1;	std	r6,8(r3)
192 err1;	std	r8,16(r3)
193 err1;	std	r9,24(r3)
194 	addi	r3,r3,32
195 	subi	r7,r7,32
196 
197 	/* Up to 31B to go */
198 8:	bf	cr7*4+3,9f
199 err1;	ld	r0,0(r4)
200 err1;	ld	r6,8(r4)
201 	addi	r4,r4,16
202 err1;	std	r0,0(r3)
203 err1;	std	r6,8(r3)
204 	addi	r3,r3,16
205 	subi	r7,r7,16
206 
207 9:	clrldi	r5,r5,(64-4)
208 
209 	/* Up to 15B to go */
210 .Lshort_copy:
211 	mtocrf	0x01,r5
212 	bf	cr7*4+0,12f
213 err1;	lwz	r0,0(r4)	/* Less chance of a reject with word ops */
214 err1;	lwz	r6,4(r4)
215 	addi	r4,r4,8
216 err1;	stw	r0,0(r3)
217 err1;	stw	r6,4(r3)
218 	addi	r3,r3,8
219 	subi	r7,r7,8
220 
221 12:	bf	cr7*4+1,13f
222 err1;	lwz	r0,0(r4)
223 	addi	r4,r4,4
224 err1;	stw	r0,0(r3)
225 	addi	r3,r3,4
226 	subi	r7,r7,4
227 
228 13:	bf	cr7*4+2,14f
229 err1;	lhz	r0,0(r4)
230 	addi	r4,r4,2
231 err1;	sth	r0,0(r3)
232 	addi	r3,r3,2
233 	subi	r7,r7,2
234 
235 14:	bf	cr7*4+3,15f
236 err1;	lbz	r0,0(r4)
237 err1;	stb	r0,0(r3)
238 
239 15:	li	r3,0
240 	blr
241 
242 EXPORT_SYMBOL_GPL(copy_mc_generic);
243