1 /*
2  * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
3  *
4  * Author: Nicolas Pitre <nico@fluxnic.net>
5  *   - contributed to gcc-3.4 on Sep 30, 2003
6  *   - adapted for the Linux kernel on Oct 2, 2003
7  */
8 
9 /* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
10 
11 This file is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published by the
13 Free Software Foundation; either version 2, or (at your option) any
14 later version.
15 
16 In addition to the permissions in the GNU General Public License, the
17 Free Software Foundation gives you unlimited permission to link the
18 compiled version of this file into combinations with other programs,
19 and to distribute those combinations without any restriction coming
20 from the use of this file.  (The General Public License restrictions
21 do apply in other respects; for example, they cover modification of
22 the file, and distribution when not linked into a combine
23 executable.)
24 
25 This file is distributed in the hope that it will be useful, but
26 WITHOUT ANY WARRANTY; without even the implied warranty of
27 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
28 General Public License for more details.
29 
30 You should have received a copy of the GNU General Public License
31 along with this program; see the file COPYING.  If not, write to
32 the Free Software Foundation, 59 Temple Place - Suite 330,
33 Boston, MA 02111-1307, USA.  */
34 
35 
36 #include <linux/linkage.h>
37 #include <asm/assembler.h>
38 #include <asm/unwind.h>
39 
40 .macro ARM_DIV_BODY dividend, divisor, result, curbit
41 
42 #if __LINUX_ARM_ARCH__ >= 5
43 
44 	clz	\curbit, \divisor
45 	clz	\result, \dividend
46 	sub	\result, \curbit, \result
47 	mov	\curbit, #1
48 	mov	\divisor, \divisor, lsl \result
49 	mov	\curbit, \curbit, lsl \result
50 	mov	\result, #0
51 
52 #else
53 
54 	@ Initially shift the divisor left 3 bits if possible,
55 	@ set curbit accordingly.  This allows for curbit to be located
56 	@ at the left end of each 4 bit nibbles in the division loop
57 	@ to save one loop in most cases.
58 	tst	\divisor, #0xe0000000
59 	moveq	\divisor, \divisor, lsl #3
60 	moveq	\curbit, #8
61 	movne	\curbit, #1
62 
63 	@ Unless the divisor is very big, shift it up in multiples of
64 	@ four bits, since this is the amount of unwinding in the main
65 	@ division loop.  Continue shifting until the divisor is
66 	@ larger than the dividend.
67 1:	cmp	\divisor, #0x10000000
68 	cmplo	\divisor, \dividend
69 	movlo	\divisor, \divisor, lsl #4
70 	movlo	\curbit, \curbit, lsl #4
71 	blo	1b
72 
73 	@ For very big divisors, we must shift it a bit at a time, or
74 	@ we will be in danger of overflowing.
75 1:	cmp	\divisor, #0x80000000
76 	cmplo	\divisor, \dividend
77 	movlo	\divisor, \divisor, lsl #1
78 	movlo	\curbit, \curbit, lsl #1
79 	blo	1b
80 
81 	mov	\result, #0
82 
83 #endif
84 
85 	@ Division loop
86 1:	cmp	\dividend, \divisor
87 	subhs	\dividend, \dividend, \divisor
88 	orrhs	\result,   \result,   \curbit
89 	cmp	\dividend, \divisor,  lsr #1
90 	subhs	\dividend, \dividend, \divisor, lsr #1
91 	orrhs	\result,   \result,   \curbit,  lsr #1
92 	cmp	\dividend, \divisor,  lsr #2
93 	subhs	\dividend, \dividend, \divisor, lsr #2
94 	orrhs	\result,   \result,   \curbit,  lsr #2
95 	cmp	\dividend, \divisor,  lsr #3
96 	subhs	\dividend, \dividend, \divisor, lsr #3
97 	orrhs	\result,   \result,   \curbit,  lsr #3
98 	cmp	\dividend, #0			@ Early termination?
99 	movsne	\curbit,   \curbit,  lsr #4	@ No, any more bits to do?
100 	movne	\divisor,  \divisor, lsr #4
101 	bne	1b
102 
103 .endm
104 
105 
106 .macro ARM_DIV2_ORDER divisor, order
107 
108 #if __LINUX_ARM_ARCH__ >= 5
109 
110 	clz	\order, \divisor
111 	rsb	\order, \order, #31
112 
113 #else
114 
115 	cmp	\divisor, #(1 << 16)
116 	movhs	\divisor, \divisor, lsr #16
117 	movhs	\order, #16
118 	movlo	\order, #0
119 
120 	cmp	\divisor, #(1 << 8)
121 	movhs	\divisor, \divisor, lsr #8
122 	addhs	\order, \order, #8
123 
124 	cmp	\divisor, #(1 << 4)
125 	movhs	\divisor, \divisor, lsr #4
126 	addhs	\order, \order, #4
127 
128 	cmp	\divisor, #(1 << 2)
129 	addhi	\order, \order, #3
130 	addls	\order, \order, \divisor, lsr #1
131 
132 #endif
133 
134 .endm
135 
136 
137 .macro ARM_MOD_BODY dividend, divisor, order, spare
138 
139 #if __LINUX_ARM_ARCH__ >= 5
140 
141 	clz	\order, \divisor
142 	clz	\spare, \dividend
143 	sub	\order, \order, \spare
144 	mov	\divisor, \divisor, lsl \order
145 
146 #else
147 
148 	mov	\order, #0
149 
150 	@ Unless the divisor is very big, shift it up in multiples of
151 	@ four bits, since this is the amount of unwinding in the main
152 	@ division loop.  Continue shifting until the divisor is
153 	@ larger than the dividend.
154 1:	cmp	\divisor, #0x10000000
155 	cmplo	\divisor, \dividend
156 	movlo	\divisor, \divisor, lsl #4
157 	addlo	\order, \order, #4
158 	blo	1b
159 
160 	@ For very big divisors, we must shift it a bit at a time, or
161 	@ we will be in danger of overflowing.
162 1:	cmp	\divisor, #0x80000000
163 	cmplo	\divisor, \dividend
164 	movlo	\divisor, \divisor, lsl #1
165 	addlo	\order, \order, #1
166 	blo	1b
167 
168 #endif
169 
170 	@ Perform all needed subtractions to keep only the reminder.
171 	@ Do comparisons in batch of 4 first.
172 	subs	\order, \order, #3		@ yes, 3 is intended here
173 	blt	2f
174 
175 1:	cmp	\dividend, \divisor
176 	subhs	\dividend, \dividend, \divisor
177 	cmp	\dividend, \divisor,  lsr #1
178 	subhs	\dividend, \dividend, \divisor, lsr #1
179 	cmp	\dividend, \divisor,  lsr #2
180 	subhs	\dividend, \dividend, \divisor, lsr #2
181 	cmp	\dividend, \divisor,  lsr #3
182 	subhs	\dividend, \dividend, \divisor, lsr #3
183 	cmp	\dividend, #1
184 	mov	\divisor, \divisor, lsr #4
185 	subsge	\order, \order, #4
186 	bge	1b
187 
188 	tst	\order, #3
189 	teqne	\dividend, #0
190 	beq	5f
191 
192 	@ Either 1, 2 or 3 comparison/subtractions are left.
193 2:	cmn	\order, #2
194 	blt	4f
195 	beq	3f
196 	cmp	\dividend, \divisor
197 	subhs	\dividend, \dividend, \divisor
198 	mov	\divisor,  \divisor,  lsr #1
199 3:	cmp	\dividend, \divisor
200 	subhs	\dividend, \dividend, \divisor
201 	mov	\divisor,  \divisor,  lsr #1
202 4:	cmp	\dividend, \divisor
203 	subhs	\dividend, \dividend, \divisor
204 5:
205 .endm
206 
207 
208 #ifdef CONFIG_ARM_PATCH_IDIV
209 	.align	3
210 #endif
211 
212 ENTRY(__udivsi3)
213 ENTRY(__aeabi_uidiv)
214 UNWIND(.fnstart)
215 
216 	subs	r2, r1, #1
217 	reteq	lr
218 	bcc	Ldiv0
219 	cmp	r0, r1
220 	bls	11f
221 	tst	r1, r2
222 	beq	12f
223 
224 	ARM_DIV_BODY r0, r1, r2, r3
225 
226 	mov	r0, r2
227 	ret	lr
228 
229 11:	moveq	r0, #1
230 	movne	r0, #0
231 	ret	lr
232 
233 12:	ARM_DIV2_ORDER r1, r2
234 
235 	mov	r0, r0, lsr r2
236 	ret	lr
237 
238 UNWIND(.fnend)
239 ENDPROC(__udivsi3)
240 ENDPROC(__aeabi_uidiv)
241 
242 ENTRY(__umodsi3)
243 UNWIND(.fnstart)
244 
245 	subs	r2, r1, #1			@ compare divisor with 1
246 	bcc	Ldiv0
247 	cmpne	r0, r1				@ compare dividend with divisor
248 	moveq   r0, #0
249 	tsthi	r1, r2				@ see if divisor is power of 2
250 	andeq	r0, r0, r2
251 	retls	lr
252 
253 	ARM_MOD_BODY r0, r1, r2, r3
254 
255 	ret	lr
256 
257 UNWIND(.fnend)
258 ENDPROC(__umodsi3)
259 
260 #ifdef CONFIG_ARM_PATCH_IDIV
261 	.align 3
262 #endif
263 
264 ENTRY(__divsi3)
265 ENTRY(__aeabi_idiv)
266 UNWIND(.fnstart)
267 
268 	cmp	r1, #0
269 	eor	ip, r0, r1			@ save the sign of the result.
270 	beq	Ldiv0
271 	rsbmi	r1, r1, #0			@ loops below use unsigned.
272 	subs	r2, r1, #1			@ division by 1 or -1 ?
273 	beq	10f
274 	movs	r3, r0
275 	rsbmi	r3, r0, #0			@ positive dividend value
276 	cmp	r3, r1
277 	bls	11f
278 	tst	r1, r2				@ divisor is power of 2 ?
279 	beq	12f
280 
281 	ARM_DIV_BODY r3, r1, r0, r2
282 
283 	cmp	ip, #0
284 	rsbmi	r0, r0, #0
285 	ret	lr
286 
287 10:	teq	ip, r0				@ same sign ?
288 	rsbmi	r0, r0, #0
289 	ret	lr
290 
291 11:	movlo	r0, #0
292 	moveq	r0, ip, asr #31
293 	orreq	r0, r0, #1
294 	ret	lr
295 
296 12:	ARM_DIV2_ORDER r1, r2
297 
298 	cmp	ip, #0
299 	mov	r0, r3, lsr r2
300 	rsbmi	r0, r0, #0
301 	ret	lr
302 
303 UNWIND(.fnend)
304 ENDPROC(__divsi3)
305 ENDPROC(__aeabi_idiv)
306 
307 ENTRY(__modsi3)
308 UNWIND(.fnstart)
309 
310 	cmp	r1, #0
311 	beq	Ldiv0
312 	rsbmi	r1, r1, #0			@ loops below use unsigned.
313 	movs	ip, r0				@ preserve sign of dividend
314 	rsbmi	r0, r0, #0			@ if negative make positive
315 	subs	r2, r1, #1			@ compare divisor with 1
316 	cmpne	r0, r1				@ compare dividend with divisor
317 	moveq	r0, #0
318 	tsthi	r1, r2				@ see if divisor is power of 2
319 	andeq	r0, r0, r2
320 	bls	10f
321 
322 	ARM_MOD_BODY r0, r1, r2, r3
323 
324 10:	cmp	ip, #0
325 	rsbmi	r0, r0, #0
326 	ret	lr
327 
328 UNWIND(.fnend)
329 ENDPROC(__modsi3)
330 
331 #ifdef CONFIG_AEABI
332 
333 ENTRY(__aeabi_uidivmod)
334 UNWIND(.fnstart)
335 UNWIND(.save {r0, r1, ip, lr}	)
336 
337 	stmfd	sp!, {r0, r1, ip, lr}
338 	bl	__aeabi_uidiv
339 	ldmfd	sp!, {r1, r2, ip, lr}
340 	mul	r3, r0, r2
341 	sub	r1, r1, r3
342 	ret	lr
343 
344 UNWIND(.fnend)
345 ENDPROC(__aeabi_uidivmod)
346 
347 ENTRY(__aeabi_idivmod)
348 UNWIND(.fnstart)
349 UNWIND(.save {r0, r1, ip, lr}	)
350 	stmfd	sp!, {r0, r1, ip, lr}
351 	bl	__aeabi_idiv
352 	ldmfd	sp!, {r1, r2, ip, lr}
353 	mul	r3, r0, r2
354 	sub	r1, r1, r3
355 	ret	lr
356 
357 UNWIND(.fnend)
358 ENDPROC(__aeabi_idivmod)
359 
360 #endif
361 
362 Ldiv0:
363 UNWIND(.fnstart)
364 UNWIND(.pad #4)
365 UNWIND(.save {lr})
366 	str	lr, [sp, #-8]!
367 	bl	__div0
368 	mov	r0, #0			@ About as wrong as it could be.
369 	ldr	pc, [sp], #8
370 UNWIND(.fnend)
371 ENDPROC(Ldiv0)
372