xref: /kernel/linux/linux-6.6/arch/arm/lib/div64.S (revision 62306a36)
162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci *  linux/arch/arm/lib/div64.S
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci *  Optimized computation of 64-bit dividend / 32-bit divisor
662306a36Sopenharmony_ci *
762306a36Sopenharmony_ci *  Author:	Nicolas Pitre
862306a36Sopenharmony_ci *  Created:	Oct 5, 2003
962306a36Sopenharmony_ci *  Copyright:	Monta Vista Software, Inc.
1062306a36Sopenharmony_ci */
1162306a36Sopenharmony_ci
1262306a36Sopenharmony_ci#include <linux/linkage.h>
1362306a36Sopenharmony_ci#include <asm/assembler.h>
1462306a36Sopenharmony_ci#include <asm/unwind.h>
1562306a36Sopenharmony_ci
1662306a36Sopenharmony_ci#ifdef __ARMEB__
1762306a36Sopenharmony_ci#define xh r0
1862306a36Sopenharmony_ci#define xl r1
1962306a36Sopenharmony_ci#define yh r2
2062306a36Sopenharmony_ci#define yl r3
2162306a36Sopenharmony_ci#else
2262306a36Sopenharmony_ci#define xl r0
2362306a36Sopenharmony_ci#define xh r1
2462306a36Sopenharmony_ci#define yl r2
2562306a36Sopenharmony_ci#define yh r3
2662306a36Sopenharmony_ci#endif
2762306a36Sopenharmony_ci
2862306a36Sopenharmony_ci/*
2962306a36Sopenharmony_ci * __do_div64: perform a division with 64-bit dividend and 32-bit divisor.
3062306a36Sopenharmony_ci *
3162306a36Sopenharmony_ci * Note: Calling convention is totally non standard for optimal code.
3262306a36Sopenharmony_ci *       This is meant to be used by do_div() from include/asm/div64.h only.
3362306a36Sopenharmony_ci *
3462306a36Sopenharmony_ci * Input parameters:
3562306a36Sopenharmony_ci * 	xh-xl	= dividend (clobbered)
3662306a36Sopenharmony_ci * 	r4	= divisor (preserved)
3762306a36Sopenharmony_ci *
3862306a36Sopenharmony_ci * Output values:
3962306a36Sopenharmony_ci * 	yh-yl	= result
4062306a36Sopenharmony_ci * 	xh	= remainder
4162306a36Sopenharmony_ci *
4262306a36Sopenharmony_ci * Clobbered regs: xl, ip
4362306a36Sopenharmony_ci */
4462306a36Sopenharmony_ci
4562306a36Sopenharmony_ciENTRY(__do_div64)
4662306a36Sopenharmony_ciUNWIND(.fnstart)
4762306a36Sopenharmony_ci
4862306a36Sopenharmony_ci	@ Test for easy paths first.
4962306a36Sopenharmony_ci	subs	ip, r4, #1
5062306a36Sopenharmony_ci	bls	9f			@ divisor is 0 or 1
5162306a36Sopenharmony_ci	tst	ip, r4
5262306a36Sopenharmony_ci	beq	8f			@ divisor is power of 2
5362306a36Sopenharmony_ci
5462306a36Sopenharmony_ci	@ See if we need to handle upper 32-bit result.
5562306a36Sopenharmony_ci	cmp	xh, r4
5662306a36Sopenharmony_ci	mov	yh, #0
5762306a36Sopenharmony_ci	blo	3f
5862306a36Sopenharmony_ci
5962306a36Sopenharmony_ci	@ Align divisor with upper part of dividend.
6062306a36Sopenharmony_ci	@ The aligned divisor is stored in yl preserving the original.
6162306a36Sopenharmony_ci	@ The bit position is stored in ip.
6262306a36Sopenharmony_ci
6362306a36Sopenharmony_ci#if __LINUX_ARM_ARCH__ >= 5
6462306a36Sopenharmony_ci
6562306a36Sopenharmony_ci	clz	yl, r4
6662306a36Sopenharmony_ci	clz	ip, xh
6762306a36Sopenharmony_ci	sub	yl, yl, ip
6862306a36Sopenharmony_ci	mov	ip, #1
6962306a36Sopenharmony_ci	mov	ip, ip, lsl yl
7062306a36Sopenharmony_ci	mov	yl, r4, lsl yl
7162306a36Sopenharmony_ci
7262306a36Sopenharmony_ci#else
7362306a36Sopenharmony_ci
7462306a36Sopenharmony_ci	mov	yl, r4
7562306a36Sopenharmony_ci	mov	ip, #1
7662306a36Sopenharmony_ci1:	cmp	yl, #0x80000000
7762306a36Sopenharmony_ci	cmpcc	yl, xh
7862306a36Sopenharmony_ci	movcc	yl, yl, lsl #1
7962306a36Sopenharmony_ci	movcc	ip, ip, lsl #1
8062306a36Sopenharmony_ci	bcc	1b
8162306a36Sopenharmony_ci
8262306a36Sopenharmony_ci#endif
8362306a36Sopenharmony_ci
8462306a36Sopenharmony_ci	@ The division loop for needed upper bit positions.
8562306a36Sopenharmony_ci 	@ Break out early if dividend reaches 0.
8662306a36Sopenharmony_ci2:	cmp	xh, yl
8762306a36Sopenharmony_ci	orrcs	yh, yh, ip
8862306a36Sopenharmony_ci	subscs	xh, xh, yl
8962306a36Sopenharmony_ci	movsne	ip, ip, lsr #1
9062306a36Sopenharmony_ci	mov	yl, yl, lsr #1
9162306a36Sopenharmony_ci	bne	2b
9262306a36Sopenharmony_ci
9362306a36Sopenharmony_ci	@ See if we need to handle lower 32-bit result.
9462306a36Sopenharmony_ci3:	cmp	xh, #0
9562306a36Sopenharmony_ci	mov	yl, #0
9662306a36Sopenharmony_ci	cmpeq	xl, r4
9762306a36Sopenharmony_ci	movlo	xh, xl
9862306a36Sopenharmony_ci	retlo	lr
9962306a36Sopenharmony_ci
10062306a36Sopenharmony_ci	@ The division loop for lower bit positions.
10162306a36Sopenharmony_ci	@ Here we shift remainer bits leftwards rather than moving the
10262306a36Sopenharmony_ci	@ divisor for comparisons, considering the carry-out bit as well.
10362306a36Sopenharmony_ci	mov	ip, #0x80000000
10462306a36Sopenharmony_ci4:	movs	xl, xl, lsl #1
10562306a36Sopenharmony_ci	adcs	xh, xh, xh
10662306a36Sopenharmony_ci	beq	6f
10762306a36Sopenharmony_ci	cmpcc	xh, r4
10862306a36Sopenharmony_ci5:	orrcs	yl, yl, ip
10962306a36Sopenharmony_ci	subcs	xh, xh, r4
11062306a36Sopenharmony_ci	movs	ip, ip, lsr #1
11162306a36Sopenharmony_ci	bne	4b
11262306a36Sopenharmony_ci	ret	lr
11362306a36Sopenharmony_ci
11462306a36Sopenharmony_ci	@ The top part of remainder became zero.  If carry is set
11562306a36Sopenharmony_ci	@ (the 33th bit) this is a false positive so resume the loop.
11662306a36Sopenharmony_ci	@ Otherwise, if lower part is also null then we are done.
11762306a36Sopenharmony_ci6:	bcs	5b
11862306a36Sopenharmony_ci	cmp	xl, #0
11962306a36Sopenharmony_ci	reteq	lr
12062306a36Sopenharmony_ci
12162306a36Sopenharmony_ci	@ We still have remainer bits in the low part.  Bring them up.
12262306a36Sopenharmony_ci
12362306a36Sopenharmony_ci#if __LINUX_ARM_ARCH__ >= 5
12462306a36Sopenharmony_ci
12562306a36Sopenharmony_ci	clz	xh, xl			@ we know xh is zero here so...
12662306a36Sopenharmony_ci	add	xh, xh, #1
12762306a36Sopenharmony_ci	mov	xl, xl, lsl xh
12862306a36Sopenharmony_ci	mov	ip, ip, lsr xh
12962306a36Sopenharmony_ci
13062306a36Sopenharmony_ci#else
13162306a36Sopenharmony_ci
13262306a36Sopenharmony_ci7:	movs	xl, xl, lsl #1
13362306a36Sopenharmony_ci	mov	ip, ip, lsr #1
13462306a36Sopenharmony_ci	bcc	7b
13562306a36Sopenharmony_ci
13662306a36Sopenharmony_ci#endif
13762306a36Sopenharmony_ci
13862306a36Sopenharmony_ci	@ Current remainder is now 1.  It is worthless to compare with
13962306a36Sopenharmony_ci	@ divisor at this point since divisor can not be smaller than 3 here.
14062306a36Sopenharmony_ci	@ If possible, branch for another shift in the division loop.
14162306a36Sopenharmony_ci	@ If no bit position left then we are done.
14262306a36Sopenharmony_ci	movs	ip, ip, lsr #1
14362306a36Sopenharmony_ci	mov	xh, #1
14462306a36Sopenharmony_ci	bne	4b
14562306a36Sopenharmony_ci	ret	lr
14662306a36Sopenharmony_ci
14762306a36Sopenharmony_ci8:	@ Division by a power of 2: determine what that divisor order is
14862306a36Sopenharmony_ci	@ then simply shift values around
14962306a36Sopenharmony_ci
15062306a36Sopenharmony_ci#if __LINUX_ARM_ARCH__ >= 5
15162306a36Sopenharmony_ci
15262306a36Sopenharmony_ci	clz	ip, r4
15362306a36Sopenharmony_ci	rsb	ip, ip, #31
15462306a36Sopenharmony_ci
15562306a36Sopenharmony_ci#else
15662306a36Sopenharmony_ci
15762306a36Sopenharmony_ci	mov	yl, r4
15862306a36Sopenharmony_ci	cmp	r4, #(1 << 16)
15962306a36Sopenharmony_ci	mov	ip, #0
16062306a36Sopenharmony_ci	movhs	yl, yl, lsr #16
16162306a36Sopenharmony_ci	movhs	ip, #16
16262306a36Sopenharmony_ci
16362306a36Sopenharmony_ci	cmp	yl, #(1 << 8)
16462306a36Sopenharmony_ci	movhs	yl, yl, lsr #8
16562306a36Sopenharmony_ci	addhs	ip, ip, #8
16662306a36Sopenharmony_ci
16762306a36Sopenharmony_ci	cmp	yl, #(1 << 4)
16862306a36Sopenharmony_ci	movhs	yl, yl, lsr #4
16962306a36Sopenharmony_ci	addhs	ip, ip, #4
17062306a36Sopenharmony_ci
17162306a36Sopenharmony_ci	cmp	yl, #(1 << 2)
17262306a36Sopenharmony_ci	addhi	ip, ip, #3
17362306a36Sopenharmony_ci	addls	ip, ip, yl, lsr #1
17462306a36Sopenharmony_ci
17562306a36Sopenharmony_ci#endif
17662306a36Sopenharmony_ci
17762306a36Sopenharmony_ci	mov	yh, xh, lsr ip
17862306a36Sopenharmony_ci	mov	yl, xl, lsr ip
17962306a36Sopenharmony_ci	rsb	ip, ip, #32
18062306a36Sopenharmony_ci ARM(	orr	yl, yl, xh, lsl ip	)
18162306a36Sopenharmony_ci THUMB(	lsl	xh, xh, ip		)
18262306a36Sopenharmony_ci THUMB(	orr	yl, yl, xh		)
18362306a36Sopenharmony_ci	mov	xh, xl, lsl ip
18462306a36Sopenharmony_ci	mov	xh, xh, lsr ip
18562306a36Sopenharmony_ci	ret	lr
18662306a36Sopenharmony_ci
18762306a36Sopenharmony_ci	@ eq -> division by 1: obvious enough...
18862306a36Sopenharmony_ci9:	moveq	yl, xl
18962306a36Sopenharmony_ci	moveq	yh, xh
19062306a36Sopenharmony_ci	moveq	xh, #0
19162306a36Sopenharmony_ci	reteq	lr
19262306a36Sopenharmony_ciUNWIND(.fnend)
19362306a36Sopenharmony_ci
19462306a36Sopenharmony_ciUNWIND(.fnstart)
19562306a36Sopenharmony_ciUNWIND(.pad #4)
19662306a36Sopenharmony_ciUNWIND(.save {lr})
19762306a36Sopenharmony_ciLdiv0_64:
19862306a36Sopenharmony_ci	@ Division by 0:
19962306a36Sopenharmony_ci	str	lr, [sp, #-8]!
20062306a36Sopenharmony_ci	bl	__div0
20162306a36Sopenharmony_ci
20262306a36Sopenharmony_ci	@ as wrong as it could be...
20362306a36Sopenharmony_ci	mov	yl, #0
20462306a36Sopenharmony_ci	mov	yh, #0
20562306a36Sopenharmony_ci	mov	xh, #0
20662306a36Sopenharmony_ci	ldr	pc, [sp], #8
20762306a36Sopenharmony_ci
20862306a36Sopenharmony_ciUNWIND(.fnend)
20962306a36Sopenharmony_ciENDPROC(__do_div64)
210