18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci *  linux/arch/arm/lib/div64.S
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci *  Optimized computation of 64-bit dividend / 32-bit divisor
68c2ecf20Sopenharmony_ci *
78c2ecf20Sopenharmony_ci *  Author:	Nicolas Pitre
88c2ecf20Sopenharmony_ci *  Created:	Oct 5, 2003
98c2ecf20Sopenharmony_ci *  Copyright:	Monta Vista Software, Inc.
108c2ecf20Sopenharmony_ci */
118c2ecf20Sopenharmony_ci
128c2ecf20Sopenharmony_ci#include <linux/linkage.h>
138c2ecf20Sopenharmony_ci#include <asm/assembler.h>
148c2ecf20Sopenharmony_ci#include <asm/unwind.h>
158c2ecf20Sopenharmony_ci
168c2ecf20Sopenharmony_ci#ifdef __ARMEB__
178c2ecf20Sopenharmony_ci#define xh r0
188c2ecf20Sopenharmony_ci#define xl r1
198c2ecf20Sopenharmony_ci#define yh r2
208c2ecf20Sopenharmony_ci#define yl r3
218c2ecf20Sopenharmony_ci#else
228c2ecf20Sopenharmony_ci#define xl r0
238c2ecf20Sopenharmony_ci#define xh r1
248c2ecf20Sopenharmony_ci#define yl r2
258c2ecf20Sopenharmony_ci#define yh r3
268c2ecf20Sopenharmony_ci#endif
278c2ecf20Sopenharmony_ci
288c2ecf20Sopenharmony_ci/*
298c2ecf20Sopenharmony_ci * __do_div64: perform a division with 64-bit dividend and 32-bit divisor.
308c2ecf20Sopenharmony_ci *
318c2ecf20Sopenharmony_ci * Note: Calling convention is totally non standard for optimal code.
328c2ecf20Sopenharmony_ci *       This is meant to be used by do_div() from include/asm/div64.h only.
338c2ecf20Sopenharmony_ci *
348c2ecf20Sopenharmony_ci * Input parameters:
358c2ecf20Sopenharmony_ci * 	xh-xl	= dividend (clobbered)
368c2ecf20Sopenharmony_ci * 	r4	= divisor (preserved)
378c2ecf20Sopenharmony_ci *
388c2ecf20Sopenharmony_ci * Output values:
398c2ecf20Sopenharmony_ci * 	yh-yl	= result
408c2ecf20Sopenharmony_ci * 	xh	= remainder
418c2ecf20Sopenharmony_ci *
428c2ecf20Sopenharmony_ci * Clobbered regs: xl, ip
438c2ecf20Sopenharmony_ci */
448c2ecf20Sopenharmony_ci
458c2ecf20Sopenharmony_ciENTRY(__do_div64)
468c2ecf20Sopenharmony_ciUNWIND(.fnstart)
478c2ecf20Sopenharmony_ci
488c2ecf20Sopenharmony_ci	@ Test for easy paths first.
498c2ecf20Sopenharmony_ci	subs	ip, r4, #1
508c2ecf20Sopenharmony_ci	bls	9f			@ divisor is 0 or 1
518c2ecf20Sopenharmony_ci	tst	ip, r4
528c2ecf20Sopenharmony_ci	beq	8f			@ divisor is power of 2
538c2ecf20Sopenharmony_ci
548c2ecf20Sopenharmony_ci	@ See if we need to handle upper 32-bit result.
558c2ecf20Sopenharmony_ci	cmp	xh, r4
568c2ecf20Sopenharmony_ci	mov	yh, #0
578c2ecf20Sopenharmony_ci	blo	3f
588c2ecf20Sopenharmony_ci
598c2ecf20Sopenharmony_ci	@ Align divisor with upper part of dividend.
608c2ecf20Sopenharmony_ci	@ The aligned divisor is stored in yl preserving the original.
618c2ecf20Sopenharmony_ci	@ The bit position is stored in ip.
628c2ecf20Sopenharmony_ci
638c2ecf20Sopenharmony_ci#if __LINUX_ARM_ARCH__ >= 5
648c2ecf20Sopenharmony_ci
658c2ecf20Sopenharmony_ci	clz	yl, r4
668c2ecf20Sopenharmony_ci	clz	ip, xh
678c2ecf20Sopenharmony_ci	sub	yl, yl, ip
688c2ecf20Sopenharmony_ci	mov	ip, #1
698c2ecf20Sopenharmony_ci	mov	ip, ip, lsl yl
708c2ecf20Sopenharmony_ci	mov	yl, r4, lsl yl
718c2ecf20Sopenharmony_ci
728c2ecf20Sopenharmony_ci#else
738c2ecf20Sopenharmony_ci
748c2ecf20Sopenharmony_ci	mov	yl, r4
758c2ecf20Sopenharmony_ci	mov	ip, #1
768c2ecf20Sopenharmony_ci1:	cmp	yl, #0x80000000
778c2ecf20Sopenharmony_ci	cmpcc	yl, xh
788c2ecf20Sopenharmony_ci	movcc	yl, yl, lsl #1
798c2ecf20Sopenharmony_ci	movcc	ip, ip, lsl #1
808c2ecf20Sopenharmony_ci	bcc	1b
818c2ecf20Sopenharmony_ci
828c2ecf20Sopenharmony_ci#endif
838c2ecf20Sopenharmony_ci
848c2ecf20Sopenharmony_ci	@ The division loop for needed upper bit positions.
858c2ecf20Sopenharmony_ci 	@ Break out early if dividend reaches 0.
868c2ecf20Sopenharmony_ci2:	cmp	xh, yl
878c2ecf20Sopenharmony_ci	orrcs	yh, yh, ip
888c2ecf20Sopenharmony_ci	subscs	xh, xh, yl
898c2ecf20Sopenharmony_ci	movsne	ip, ip, lsr #1
908c2ecf20Sopenharmony_ci	mov	yl, yl, lsr #1
918c2ecf20Sopenharmony_ci	bne	2b
928c2ecf20Sopenharmony_ci
938c2ecf20Sopenharmony_ci	@ See if we need to handle lower 32-bit result.
948c2ecf20Sopenharmony_ci3:	cmp	xh, #0
958c2ecf20Sopenharmony_ci	mov	yl, #0
968c2ecf20Sopenharmony_ci	cmpeq	xl, r4
978c2ecf20Sopenharmony_ci	movlo	xh, xl
988c2ecf20Sopenharmony_ci	retlo	lr
998c2ecf20Sopenharmony_ci
1008c2ecf20Sopenharmony_ci	@ The division loop for lower bit positions.
1018c2ecf20Sopenharmony_ci	@ Here we shift remainer bits leftwards rather than moving the
1028c2ecf20Sopenharmony_ci	@ divisor for comparisons, considering the carry-out bit as well.
1038c2ecf20Sopenharmony_ci	mov	ip, #0x80000000
1048c2ecf20Sopenharmony_ci4:	movs	xl, xl, lsl #1
1058c2ecf20Sopenharmony_ci	adcs	xh, xh, xh
1068c2ecf20Sopenharmony_ci	beq	6f
1078c2ecf20Sopenharmony_ci	cmpcc	xh, r4
1088c2ecf20Sopenharmony_ci5:	orrcs	yl, yl, ip
1098c2ecf20Sopenharmony_ci	subcs	xh, xh, r4
1108c2ecf20Sopenharmony_ci	movs	ip, ip, lsr #1
1118c2ecf20Sopenharmony_ci	bne	4b
1128c2ecf20Sopenharmony_ci	ret	lr
1138c2ecf20Sopenharmony_ci
1148c2ecf20Sopenharmony_ci	@ The top part of remainder became zero.  If carry is set
1158c2ecf20Sopenharmony_ci	@ (the 33th bit) this is a false positive so resume the loop.
1168c2ecf20Sopenharmony_ci	@ Otherwise, if lower part is also null then we are done.
1178c2ecf20Sopenharmony_ci6:	bcs	5b
1188c2ecf20Sopenharmony_ci	cmp	xl, #0
1198c2ecf20Sopenharmony_ci	reteq	lr
1208c2ecf20Sopenharmony_ci
1218c2ecf20Sopenharmony_ci	@ We still have remainer bits in the low part.  Bring them up.
1228c2ecf20Sopenharmony_ci
1238c2ecf20Sopenharmony_ci#if __LINUX_ARM_ARCH__ >= 5
1248c2ecf20Sopenharmony_ci
1258c2ecf20Sopenharmony_ci	clz	xh, xl			@ we know xh is zero here so...
1268c2ecf20Sopenharmony_ci	add	xh, xh, #1
1278c2ecf20Sopenharmony_ci	mov	xl, xl, lsl xh
1288c2ecf20Sopenharmony_ci	mov	ip, ip, lsr xh
1298c2ecf20Sopenharmony_ci
1308c2ecf20Sopenharmony_ci#else
1318c2ecf20Sopenharmony_ci
1328c2ecf20Sopenharmony_ci7:	movs	xl, xl, lsl #1
1338c2ecf20Sopenharmony_ci	mov	ip, ip, lsr #1
1348c2ecf20Sopenharmony_ci	bcc	7b
1358c2ecf20Sopenharmony_ci
1368c2ecf20Sopenharmony_ci#endif
1378c2ecf20Sopenharmony_ci
1388c2ecf20Sopenharmony_ci	@ Current remainder is now 1.  It is worthless to compare with
1398c2ecf20Sopenharmony_ci	@ divisor at this point since divisor can not be smaller than 3 here.
1408c2ecf20Sopenharmony_ci	@ If possible, branch for another shift in the division loop.
1418c2ecf20Sopenharmony_ci	@ If no bit position left then we are done.
1428c2ecf20Sopenharmony_ci	movs	ip, ip, lsr #1
1438c2ecf20Sopenharmony_ci	mov	xh, #1
1448c2ecf20Sopenharmony_ci	bne	4b
1458c2ecf20Sopenharmony_ci	ret	lr
1468c2ecf20Sopenharmony_ci
1478c2ecf20Sopenharmony_ci8:	@ Division by a power of 2: determine what that divisor order is
1488c2ecf20Sopenharmony_ci	@ then simply shift values around
1498c2ecf20Sopenharmony_ci
1508c2ecf20Sopenharmony_ci#if __LINUX_ARM_ARCH__ >= 5
1518c2ecf20Sopenharmony_ci
1528c2ecf20Sopenharmony_ci	clz	ip, r4
1538c2ecf20Sopenharmony_ci	rsb	ip, ip, #31
1548c2ecf20Sopenharmony_ci
1558c2ecf20Sopenharmony_ci#else
1568c2ecf20Sopenharmony_ci
1578c2ecf20Sopenharmony_ci	mov	yl, r4
1588c2ecf20Sopenharmony_ci	cmp	r4, #(1 << 16)
1598c2ecf20Sopenharmony_ci	mov	ip, #0
1608c2ecf20Sopenharmony_ci	movhs	yl, yl, lsr #16
1618c2ecf20Sopenharmony_ci	movhs	ip, #16
1628c2ecf20Sopenharmony_ci
1638c2ecf20Sopenharmony_ci	cmp	yl, #(1 << 8)
1648c2ecf20Sopenharmony_ci	movhs	yl, yl, lsr #8
1658c2ecf20Sopenharmony_ci	addhs	ip, ip, #8
1668c2ecf20Sopenharmony_ci
1678c2ecf20Sopenharmony_ci	cmp	yl, #(1 << 4)
1688c2ecf20Sopenharmony_ci	movhs	yl, yl, lsr #4
1698c2ecf20Sopenharmony_ci	addhs	ip, ip, #4
1708c2ecf20Sopenharmony_ci
1718c2ecf20Sopenharmony_ci	cmp	yl, #(1 << 2)
1728c2ecf20Sopenharmony_ci	addhi	ip, ip, #3
1738c2ecf20Sopenharmony_ci	addls	ip, ip, yl, lsr #1
1748c2ecf20Sopenharmony_ci
1758c2ecf20Sopenharmony_ci#endif
1768c2ecf20Sopenharmony_ci
1778c2ecf20Sopenharmony_ci	mov	yh, xh, lsr ip
1788c2ecf20Sopenharmony_ci	mov	yl, xl, lsr ip
1798c2ecf20Sopenharmony_ci	rsb	ip, ip, #32
1808c2ecf20Sopenharmony_ci ARM(	orr	yl, yl, xh, lsl ip	)
1818c2ecf20Sopenharmony_ci THUMB(	lsl	xh, xh, ip		)
1828c2ecf20Sopenharmony_ci THUMB(	orr	yl, yl, xh		)
1838c2ecf20Sopenharmony_ci	mov	xh, xl, lsl ip
1848c2ecf20Sopenharmony_ci	mov	xh, xh, lsr ip
1858c2ecf20Sopenharmony_ci	ret	lr
1868c2ecf20Sopenharmony_ci
1878c2ecf20Sopenharmony_ci	@ eq -> division by 1: obvious enough...
1888c2ecf20Sopenharmony_ci9:	moveq	yl, xl
1898c2ecf20Sopenharmony_ci	moveq	yh, xh
1908c2ecf20Sopenharmony_ci	moveq	xh, #0
1918c2ecf20Sopenharmony_ci	reteq	lr
1928c2ecf20Sopenharmony_ciUNWIND(.fnend)
1938c2ecf20Sopenharmony_ci
1948c2ecf20Sopenharmony_ciUNWIND(.fnstart)
1958c2ecf20Sopenharmony_ciUNWIND(.pad #4)
1968c2ecf20Sopenharmony_ciUNWIND(.save {lr})
1978c2ecf20Sopenharmony_ciLdiv0_64:
1988c2ecf20Sopenharmony_ci	@ Division by 0:
1998c2ecf20Sopenharmony_ci	str	lr, [sp, #-8]!
2008c2ecf20Sopenharmony_ci	bl	__div0
2018c2ecf20Sopenharmony_ci
2028c2ecf20Sopenharmony_ci	@ as wrong as it could be...
2038c2ecf20Sopenharmony_ci	mov	yl, #0
2048c2ecf20Sopenharmony_ci	mov	yh, #0
2058c2ecf20Sopenharmony_ci	mov	xh, #0
2068c2ecf20Sopenharmony_ci	ldr	pc, [sp], #8
2078c2ecf20Sopenharmony_ci
2088c2ecf20Sopenharmony_ciUNWIND(.fnend)
2098c2ecf20Sopenharmony_ciENDPROC(__do_div64)
210