1 /*	$NetBSD: memcpy_arm.S,v 1.1 2003/10/14 07:51:45 scw Exp $	*/
2 
3 /*-
4  * Copyright (c) 1997 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Neil A. Carson and Mark Brinicombe
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include "asm.h"
33 
34 // We do not need to check whether the addresses are in the
35 // kernel or virtual address spaces, since we only access them
36 // using user privileges.
37 
38 .syntax unified
39 .arm
40 
41 // size_t _arm_user_copy(void *dst, const void *src, size_t len)
42 FUNCTION(_arm_user_copy)
43     /* save leaf functions having to store this away */
44     stmdb   sp!, {r0, r1, r2, lr}       /* _arm_user_copy() returns dest addr */
45 
46     subs    r2, r2, #4
47     blt     .Lmemcpy_l4         /* less than 4 bytes */
48     ands    r12, r0, #3
49     bne     .Lmemcpy_destul     /* oh unaligned destination addr */
50     ands    r12, r1, #3
51     bne     .Lmemcpy_srcul      /* oh unaligned source addr */
52 
53 .Lmemcpy_t8:
54     /* We have aligned source and destination */
55     subs    r2, r2, #8
56     blt     .Lmemcpy_l12        /* less than 12 bytes (4 from above) */
57     subs    r2, r2, #0x14
58     blt     .Lmemcpy_l32        /* less than 32 bytes (12 from above) */
59     stmdb   sp!, {r4}           /* borrow r4 */
60 
61     /* blat 32 bytes at a time */
62     /* XXX for really big copies perhaps we should use more registers */
63 .Lmemcpy_loop32:
64 0:  ldmia   r1!, {r3, r4, r12, lr}
65 1:  stmia   r0!, {r3, r4, r12, lr}
66 2:  ldmia   r1!, {r3, r4, r12, lr}
67 3:  stmia   r0!, {r3, r4, r12, lr}
68     subs    r2, r2, #0x20
69     bge     .Lmemcpy_loop32
70 
71     cmn     r2, #0x10
72 4:  ldmiage r1!, {r3, r4, r12, lr}  /* blat a remaining 16 bytes */
73 5:  stmiage r0!, {r3, r4, r12, lr}
74     subge   r2, r2, #0x10
75     ldmia   sp!, {r4}           /* return r4 */
76 
77 .Lmemcpy_l32:
78     adds    r2, r2, #0x14
79 
80     /* blat 12 bytes at a time */
81 .Lmemcpy_loop12:
82 6:  ldmiage r1!, {r3, r12, lr}
83 7:  stmiage r0!, {r3, r12, lr}
84     subsge  r2, r2, #0x0c
85     bge     .Lmemcpy_loop12
86 
87 .Lmemcpy_l12:
88     adds    r2, r2, #8
89     blt     .Lmemcpy_l4
90 
91     subs    r2, r2, #4
92 8:  ldrlt   r3, [r1], #4
93 9:  strlt   r3, [r0], #4
94 10: ldmiage r1!, {r3, r12}
95 11: stmiage r0!, {r3, r12}
96     subge   r2, r2, #4
97 
98 .Lmemcpy_l4:
99     /* less than 4 bytes to go */
100     adds    r2, r2, #4
101     beq     .Lmemcpy_return
102     /* copy the crud byte at a time */
103     cmp     r2, #2
104 12: ldrb    r3, [r1], #1
105 13: strb    r3, [r0], #1
106 14: ldrbge  r3, [r1], #1
107 15: strbge  r3, [r0], #1
108 16: ldrbgt  r3, [r1], #1
109 17: strbgt  r3, [r0], #1
110 
111 .Lmemcpy_return:
112     ldmia   sp!, {r0, r1, r2, lr}
113     mov     r0, 0
114     bx      lr
115 
116     /* erg - unaligned destination */
117 .Lmemcpy_destul:
118     rsb     r12, r12, #4
119     cmp     r12, #2
120 
121     /* align destination with byte copies */
122 18: ldrb    r3, [r1], #1
123 19: strb    r3, [r0], #1
124 20: ldrbge  r3, [r1], #1
125 21: strbge  r3, [r0], #1
126 22: ldrbgt  r3, [r1], #1
127 23: strbgt  r3, [r0], #1
128     subs    r2, r2, r12
129     blt     .Lmemcpy_l4         /* less the 4 bytes */
130 
131     ands    r12, r1, #3
132     beq     .Lmemcpy_t8         /* we have an aligned source */
133 
134     /* erg - unaligned source */
135     /* This is where it gets nasty ... */
136 .Lmemcpy_srcul:
137     bic     r1, r1, #3
138 24: ldr     lr, [r1], #4
139     cmp     r12, #2
140     bgt     .Lmemcpy_srcul3
141     beq     .Lmemcpy_srcul2
142     cmp     r2, #0x0c
143     blt     .Lmemcpy_srcul1loop4
144     sub     r2, r2, #0x0c
145     stmdb   sp!, {r4, r5}
146 
147 .Lmemcpy_srcul1loop16:
148     mov     r3, lr, lsr #8
149 25: ldmia   r1!, {r4, r5, r12, lr}
150     orr     r3, r3, r4, lsl #24
151     mov     r4, r4, lsr #8
152     orr     r4, r4, r5, lsl #24
153     mov     r5, r5, lsr #8
154     orr     r5, r5, r12, lsl #24
155     mov     r12, r12, lsr #8
156     orr     r12, r12, lr, lsl #24
157 26: stmia   r0!, {r3-r5, r12}
158     subs    r2, r2, #0x10
159     bge     .Lmemcpy_srcul1loop16
160     ldmia   sp!, {r4, r5}
161     adds    r2, r2, #0x0c
162     blt     .Lmemcpy_srcul1l4
163 
164 .Lmemcpy_srcul1loop4:
165     mov     r12, lr, lsr #8
166 27: ldr     lr, [r1], #4
167     orr     r12, r12, lr, lsl #24
168 28: str     r12, [r0], #4
169     subs    r2, r2, #4
170     bge     .Lmemcpy_srcul1loop4
171 
172 .Lmemcpy_srcul1l4:
173     sub     r1, r1, #3
174     b       .Lmemcpy_l4
175 
176 .Lmemcpy_srcul2:
177     cmp     r2, #0x0c
178     blt     .Lmemcpy_srcul2loop4
179     sub     r2, r2, #0x0c
180     stmdb   sp!, {r4, r5}
181 
182 .Lmemcpy_srcul2loop16:
183     mov     r3, lr, lsr #16
184 29: ldmia   r1!, {r4, r5, r12, lr}
185     orr     r3, r3, r4, lsl #16
186     mov     r4, r4, lsr #16
187     orr     r4, r4, r5, lsl #16
188     mov     r5, r5, lsr #16
189     orr     r5, r5, r12, lsl #16
190     mov     r12, r12, lsr #16
191     orr     r12, r12, lr, lsl #16
192 30: stmia   r0!, {r3-r5, r12}
193     subs    r2, r2, #0x10
194     bge     .Lmemcpy_srcul2loop16
195     ldmia   sp!, {r4, r5}
196     adds    r2, r2, #0x0c
197     blt     .Lmemcpy_srcul2l4
198 
199 .Lmemcpy_srcul2loop4:
200     mov     r12, lr, lsr #16
201 31: ldr     lr, [r1], #4
202     orr     r12, r12, lr, lsl #16
203 32: str     r12, [r0], #4
204     subs    r2, r2, #4
205     bge     .Lmemcpy_srcul2loop4
206 
207 .Lmemcpy_srcul2l4:
208     sub     r1, r1, #2
209     b       .Lmemcpy_l4
210 
211 .Lmemcpy_srcul3:
212     cmp     r2, #0x0c
213     blt     .Lmemcpy_srcul3loop4
214     sub     r2, r2, #0x0c
215     stmdb   sp!, {r4, r5}
216 
217 .Lmemcpy_srcul3loop16:
218     mov     r3, lr, lsr #24
219 33: ldmia   r1!, {r4, r5, r12, lr}
220     orr     r3, r3, r4, lsl #8
221     mov     r4, r4, lsr #24
222     orr     r4, r4, r5, lsl #8
223     mov     r5, r5, lsr #24
224     orr     r5, r5, r12, lsl #8
225     mov     r12, r12, lsr #24
226     orr     r12, r12, lr, lsl #8
227 34: stmia   r0!, {r3-r5, r12}
228     subs    r2, r2, #0x10
229     bge     .Lmemcpy_srcul3loop16
230     ldmia   sp!, {r4, r5}
231     adds    r2, r2, #0x0c
232     blt     .Lmemcpy_srcul3l4
233 
234 .Lmemcpy_srcul3loop4:
235     mov     r12, lr, lsr #24
236 35: ldr     lr, [r1], #4
237     orr     r12, r12, lr, lsl #8
238 36: str     r12, [r0], #4
239     subs    r2, r2, #4
240     bge     .Lmemcpy_srcul3loop4
241 
242 .Lmemcpy_srcul3l4:
243     sub     r1, r1, #1
244     b       .Lmemcpy_l4
245 
246 .Lfix_return1:
247     ldmia   sp!, {r4}
248 .Lfix_return2:
249     ldmia   sp!, {r0, r1}
250     cmp     r0, r1
251     bhs     .Lfix_return
252     sub     r0, r2, r0
253     b       .Lfix_return_done
254 .Lfix_return:
255     sub     r0, r2, r1
256 .Lfix_return_done:
257     ldmia   sp!, {r2, lr}
258     sub     r0, r2, r0
259     bx      lr
260 .Lfix_return3:
261     ldmia   sp!, {r4, r5}
262     b       .Lfix_return2
263 
264 .pushsection __exc_table, "a"
265     .long   0b,  .Lfix_return1
266     .long   1b,  .Lfix_return1
267     .long   2b,  .Lfix_return1
268     .long   3b,  .Lfix_return1
269     .long   4b,  .Lfix_return1
270     .long   5b,  .Lfix_return1
271     .long   6b,  .Lfix_return2
272     .long   7b,  .Lfix_return2
273     .long   8b,  .Lfix_return2
274     .long   9b,  .Lfix_return2
275     .long   10b, .Lfix_return2
276     .long   11b, .Lfix_return2
277     .long   12b, .Lfix_return2
278     .long   13b, .Lfix_return2
279     .long   14b, .Lfix_return2
280     .long   15b, .Lfix_return2
281     .long   16b, .Lfix_return2
282     .long   17b, .Lfix_return2
283     .long   18b, .Lfix_return2
284     .long   19b, .Lfix_return2
285     .long   20b, .Lfix_return2
286     .long   21b, .Lfix_return2
287     .long   22b, .Lfix_return2
288     .long   23b, .Lfix_return2
289     .long   24b, .Lfix_return2
290     .long   25b, .Lfix_return3
291     .long   26b, .Lfix_return3
292     .long   27b, .Lfix_return2
293     .long   28b, .Lfix_return2
294     .long   29b, .Lfix_return3
295     .long   30b, .Lfix_return3
296     .long   31b, .Lfix_return2
297     .long   32b, .Lfix_return2
298     .long   33b, .Lfix_return3
299     .long   34b, .Lfix_return3
300     .long   35b, .Lfix_return2
301     .long   36b, .Lfix_return2
302 .popsection
303