1 /* SPDX-License-Identifier: GPL-2.0 */
2 // Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
3 
4 #include <linux/linkage.h>
5 
6 .macro	GET_FRONT_BITS rx y
7 #ifdef	__cskyLE__
8 	lsri	\rx, \y
9 #else
10 	lsli	\rx, \y
11 #endif
12 .endm
13 
14 .macro	GET_AFTER_BITS rx y
15 #ifdef	__cskyLE__
16 	lsli	\rx, \y
17 #else
18 	lsri	\rx, \y
19 #endif
20 .endm
21 
22 /* void *memcpy(void *dest, const void *src, size_t n); */
23 ENTRY(memcpy)
24 	mov	r7, r2
25 	cmplti	r4, 4
26 	bt	.L_copy_by_byte
27 	mov	r6, r2
28 	andi	r6, 3
29 	cmpnei	r6, 0
30 	jbt	.L_dest_not_aligned
31 	mov	r6, r3
32 	andi	r6, 3
33 	cmpnei	r6, 0
34 	jbt	.L_dest_aligned_but_src_not_aligned
35 .L0:
36 	cmplti	r4, 16
37 	jbt	.L_aligned_and_len_less_16bytes
38 	subi	sp, 8
39 	stw	r8, (sp, 0)
40 .L_aligned_and_len_larger_16bytes:
41 	ldw	r1, (r3, 0)
42 	ldw	r5, (r3, 4)
43 	ldw	r8, (r3, 8)
44 	stw	r1, (r7, 0)
45 	ldw	r1, (r3, 12)
46 	stw	r5, (r7, 4)
47 	stw	r8, (r7, 8)
48 	stw	r1, (r7, 12)
49 	subi	r4, 16
50 	addi	r3, 16
51 	addi	r7, 16
52 	cmplti	r4, 16
53 	jbf	.L_aligned_and_len_larger_16bytes
54 	ldw	r8, (sp, 0)
55 	addi	sp, 8
56 	cmpnei	r4, 0
57 	jbf	.L_return
58 
59 .L_aligned_and_len_less_16bytes:
60 	cmplti	r4, 4
61 	bt	.L_copy_by_byte
62 .L1:
63 	ldw	r1, (r3, 0)
64 	stw	r1, (r7, 0)
65 	subi	r4, 4
66 	addi	r3, 4
67 	addi	r7, 4
68 	cmplti	r4, 4
69 	jbf	.L1
70 	br	.L_copy_by_byte
71 
72 .L_return:
73 	rts
74 
75 .L_copy_by_byte:                      /* len less than 4 bytes */
76 	cmpnei	r4, 0
77 	jbf	.L_return
78 .L4:
79 	ldb	r1, (r3, 0)
80 	stb	r1, (r7, 0)
81 	addi	r3, 1
82 	addi	r7, 1
83 	decne	r4
84 	jbt	.L4
85 	rts
86 
87 /*
88  * If dest is not aligned, just copying some bytes makes the dest align.
89  * Afther that, we judge whether the src is aligned.
90  */
91 .L_dest_not_aligned:
92 	mov	r5, r3
93 	rsub	r5, r5, r7
94 	abs	r5, r5
95 	cmplt	r5, r4
96 	bt	.L_copy_by_byte
97 	mov	r5, r7
98 	sub	r5, r3
99 	cmphs	r5, r4
100 	bf	.L_copy_by_byte
101 	mov	r5, r6
102 .L5:
103 	ldb	r1, (r3, 0)              /* makes the dest align. */
104 	stb	r1, (r7, 0)
105 	addi	r5, 1
106 	subi	r4, 1
107 	addi	r3, 1
108 	addi	r7, 1
109 	cmpnei	r5, 4
110 	jbt	.L5
111 	cmplti	r4, 4
112 	jbt	.L_copy_by_byte
113 	mov	r6, r3                   /* judge whether the src is aligned. */
114 	andi	r6, 3
115 	cmpnei	r6, 0
116 	jbf	.L0
117 
118 /* Judge the number of misaligned, 1, 2, 3? */
119 .L_dest_aligned_but_src_not_aligned:
120 	mov	r5, r3
121 	rsub	r5, r5, r7
122 	abs	r5, r5
123 	cmplt	r5, r4
124 	bt	.L_copy_by_byte
125 	bclri	r3, 0
126 	bclri	r3, 1
127 	ldw	r1, (r3, 0)
128 	addi	r3, 4
129 	cmpnei	r6, 2
130 	bf	.L_dest_aligned_but_src_not_aligned_2bytes
131 	cmpnei	r6, 3
132 	bf	.L_dest_aligned_but_src_not_aligned_3bytes
133 
134 .L_dest_aligned_but_src_not_aligned_1byte:
135 	mov	r5, r7
136 	sub	r5, r3
137 	cmphs	r5, r4
138 	bf	.L_copy_by_byte
139 	cmplti	r4, 16
140 	bf	.L11
141 .L10:                                     /* If the len is less than 16 bytes */
142 	GET_FRONT_BITS r1 8
143 	mov	r5, r1
144 	ldw	r6, (r3, 0)
145 	mov	r1, r6
146 	GET_AFTER_BITS r6 24
147 	or	r5, r6
148 	stw	r5, (r7, 0)
149 	subi	r4, 4
150 	addi	r3, 4
151 	addi	r7, 4
152 	cmplti	r4, 4
153 	bf	.L10
154 	subi	r3, 3
155 	br	.L_copy_by_byte
156 .L11:
157 	subi	sp, 16
158 	stw	r8, (sp, 0)
159 	stw	r9, (sp, 4)
160 	stw	r10, (sp, 8)
161 	stw	r11, (sp, 12)
162 .L12:
163 	ldw	r5, (r3, 0)
164 	ldw	r11, (r3, 4)
165 	ldw	r8, (r3, 8)
166 	ldw	r9, (r3, 12)
167 
168 	GET_FRONT_BITS r1 8               /* little or big endian? */
169 	mov	r10, r5
170 	GET_AFTER_BITS r5 24
171 	or	r5, r1
172 
173 	GET_FRONT_BITS r10 8
174 	mov	r1, r11
175 	GET_AFTER_BITS r11 24
176 	or	r11, r10
177 
178 	GET_FRONT_BITS r1 8
179 	mov	r10, r8
180 	GET_AFTER_BITS r8 24
181 	or	r8, r1
182 
183 	GET_FRONT_BITS r10 8
184 	mov	r1, r9
185 	GET_AFTER_BITS r9 24
186 	or	r9, r10
187 
188 	stw	r5, (r7, 0)
189 	stw	r11, (r7, 4)
190 	stw	r8, (r7, 8)
191 	stw	r9, (r7, 12)
192 	subi	r4, 16
193 	addi	r3, 16
194 	addi	r7, 16
195 	cmplti	r4, 16
196 	jbf	.L12
197 	ldw	r8, (sp, 0)
198 	ldw	r9, (sp, 4)
199 	ldw	r10, (sp, 8)
200 	ldw	r11, (sp, 12)
201 	addi	sp , 16
202 	cmplti	r4, 4
203 	bf	.L10
204 	subi	r3, 3
205 	br	.L_copy_by_byte
206 
207 .L_dest_aligned_but_src_not_aligned_2bytes:
208 	cmplti	r4, 16
209 	bf	.L21
210 .L20:
211 	GET_FRONT_BITS r1 16
212 	mov	r5, r1
213 	ldw	r6, (r3, 0)
214 	mov	r1, r6
215 	GET_AFTER_BITS r6 16
216 	or	r5, r6
217 	stw	r5, (r7, 0)
218 	subi	r4, 4
219 	addi	r3, 4
220 	addi	r7, 4
221 	cmplti	r4, 4
222 	bf	.L20
223 	subi	r3, 2
224 	br	.L_copy_by_byte
225 	rts
226 
227 .L21:	/* n > 16 */
228 	subi 	sp, 16
229 	stw	r8, (sp, 0)
230 	stw	r9, (sp, 4)
231 	stw	r10, (sp, 8)
232 	stw	r11, (sp, 12)
233 
234 .L22:
235 	ldw	r5, (r3, 0)
236 	ldw	r11, (r3, 4)
237 	ldw	r8, (r3, 8)
238 	ldw	r9, (r3, 12)
239 
240 	GET_FRONT_BITS r1 16
241 	mov	r10, r5
242 	GET_AFTER_BITS r5 16
243 	or	r5, r1
244 
245 	GET_FRONT_BITS r10 16
246 	mov	r1, r11
247 	GET_AFTER_BITS r11 16
248 	or	r11, r10
249 
250 	GET_FRONT_BITS r1 16
251 	mov	r10, r8
252 	GET_AFTER_BITS r8 16
253 	or	r8, r1
254 
255 	GET_FRONT_BITS r10 16
256 	mov	r1, r9
257 	GET_AFTER_BITS r9 16
258 	or	r9, r10
259 
260 	stw	r5, (r7, 0)
261 	stw	r11, (r7, 4)
262 	stw	r8, (r7, 8)
263 	stw	r9, (r7, 12)
264 	subi	r4, 16
265 	addi	r3, 16
266 	addi	r7, 16
267 	cmplti	r4, 16
268 	jbf	.L22
269 	ldw	r8, (sp, 0)
270 	ldw	r9, (sp, 4)
271 	ldw	r10, (sp, 8)
272 	ldw	r11, (sp, 12)
273 	addi	sp, 16
274 	cmplti	r4, 4
275 	bf	.L20
276 	subi	r3, 2
277 	br	.L_copy_by_byte
278 
279 
280 .L_dest_aligned_but_src_not_aligned_3bytes:
281 	cmplti	r4, 16
282 	bf	.L31
283 .L30:
284 	GET_FRONT_BITS r1 24
285 	mov	r5, r1
286 	ldw	r6, (r3, 0)
287 	mov	r1, r6
288 	GET_AFTER_BITS r6 8
289 	or	r5, r6
290 	stw	r5, (r7, 0)
291 	subi	r4, 4
292 	addi	r3, 4
293 	addi	r7, 4
294 	cmplti	r4, 4
295 	bf	.L30
296 	subi	r3, 1
297 	br	.L_copy_by_byte
298 .L31:
299 	subi	sp, 16
300 	stw	r8, (sp, 0)
301 	stw	r9, (sp, 4)
302 	stw	r10, (sp, 8)
303 	stw	r11, (sp, 12)
304 .L32:
305 	ldw	r5, (r3, 0)
306 	ldw	r11, (r3, 4)
307 	ldw	r8, (r3, 8)
308 	ldw	r9, (r3, 12)
309 
310 	GET_FRONT_BITS r1 24
311 	mov	r10, r5
312 	GET_AFTER_BITS r5 8
313 	or	r5, r1
314 
315 	GET_FRONT_BITS r10 24
316 	mov	r1, r11
317 	GET_AFTER_BITS r11 8
318 	or	r11, r10
319 
320 	GET_FRONT_BITS r1 24
321 	mov	r10, r8
322 	GET_AFTER_BITS r8 8
323 	or	r8, r1
324 
325 	GET_FRONT_BITS r10 24
326 	mov	r1, r9
327 	GET_AFTER_BITS r9 8
328 	or	r9, r10
329 
330 	stw	r5, (r7, 0)
331 	stw	r11, (r7, 4)
332 	stw	r8, (r7, 8)
333 	stw	r9, (r7, 12)
334 	subi	r4, 16
335 	addi	r3, 16
336 	addi	r7, 16
337 	cmplti	r4, 16
338 	jbf	.L32
339 	ldw	r8, (sp, 0)
340 	ldw	r9, (sp, 4)
341 	ldw	r10, (sp, 8)
342 	ldw	r11, (sp, 12)
343 	addi	sp, 16
344 	cmplti	r4, 4
345 	bf	.L30
346 	subi	r3, 1
347 	br	.L_copy_by_byte
348