162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */ 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * linux/arch/arm/lib/div64.S 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Optimized computation of 64-bit dividend / 32-bit divisor 662306a36Sopenharmony_ci * 762306a36Sopenharmony_ci * Author: Nicolas Pitre 862306a36Sopenharmony_ci * Created: Oct 5, 2003 962306a36Sopenharmony_ci * Copyright: Monta Vista Software, Inc. 1062306a36Sopenharmony_ci */ 1162306a36Sopenharmony_ci 1262306a36Sopenharmony_ci#include <linux/linkage.h> 1362306a36Sopenharmony_ci#include <asm/assembler.h> 1462306a36Sopenharmony_ci#include <asm/unwind.h> 1562306a36Sopenharmony_ci 1662306a36Sopenharmony_ci#ifdef __ARMEB__ 1762306a36Sopenharmony_ci#define xh r0 1862306a36Sopenharmony_ci#define xl r1 1962306a36Sopenharmony_ci#define yh r2 2062306a36Sopenharmony_ci#define yl r3 2162306a36Sopenharmony_ci#else 2262306a36Sopenharmony_ci#define xl r0 2362306a36Sopenharmony_ci#define xh r1 2462306a36Sopenharmony_ci#define yl r2 2562306a36Sopenharmony_ci#define yh r3 2662306a36Sopenharmony_ci#endif 2762306a36Sopenharmony_ci 2862306a36Sopenharmony_ci/* 2962306a36Sopenharmony_ci * __do_div64: perform a division with 64-bit dividend and 32-bit divisor. 3062306a36Sopenharmony_ci * 3162306a36Sopenharmony_ci * Note: Calling convention is totally non standard for optimal code. 3262306a36Sopenharmony_ci * This is meant to be used by do_div() from include/asm/div64.h only. 3362306a36Sopenharmony_ci * 3462306a36Sopenharmony_ci * Input parameters: 3562306a36Sopenharmony_ci * xh-xl = dividend (clobbered) 3662306a36Sopenharmony_ci * r4 = divisor (preserved) 3762306a36Sopenharmony_ci * 3862306a36Sopenharmony_ci * Output values: 3962306a36Sopenharmony_ci * yh-yl = result 4062306a36Sopenharmony_ci * xh = remainder 4162306a36Sopenharmony_ci * 4262306a36Sopenharmony_ci * Clobbered regs: xl, ip 4362306a36Sopenharmony_ci */ 4462306a36Sopenharmony_ci 4562306a36Sopenharmony_ciENTRY(__do_div64) 4662306a36Sopenharmony_ciUNWIND(.fnstart) 4762306a36Sopenharmony_ci 4862306a36Sopenharmony_ci @ Test for easy paths first. 4962306a36Sopenharmony_ci subs ip, r4, #1 5062306a36Sopenharmony_ci bls 9f @ divisor is 0 or 1 5162306a36Sopenharmony_ci tst ip, r4 5262306a36Sopenharmony_ci beq 8f @ divisor is power of 2 5362306a36Sopenharmony_ci 5462306a36Sopenharmony_ci @ See if we need to handle upper 32-bit result. 5562306a36Sopenharmony_ci cmp xh, r4 5662306a36Sopenharmony_ci mov yh, #0 5762306a36Sopenharmony_ci blo 3f 5862306a36Sopenharmony_ci 5962306a36Sopenharmony_ci @ Align divisor with upper part of dividend. 6062306a36Sopenharmony_ci @ The aligned divisor is stored in yl preserving the original. 6162306a36Sopenharmony_ci @ The bit position is stored in ip. 6262306a36Sopenharmony_ci 6362306a36Sopenharmony_ci#if __LINUX_ARM_ARCH__ >= 5 6462306a36Sopenharmony_ci 6562306a36Sopenharmony_ci clz yl, r4 6662306a36Sopenharmony_ci clz ip, xh 6762306a36Sopenharmony_ci sub yl, yl, ip 6862306a36Sopenharmony_ci mov ip, #1 6962306a36Sopenharmony_ci mov ip, ip, lsl yl 7062306a36Sopenharmony_ci mov yl, r4, lsl yl 7162306a36Sopenharmony_ci 7262306a36Sopenharmony_ci#else 7362306a36Sopenharmony_ci 7462306a36Sopenharmony_ci mov yl, r4 7562306a36Sopenharmony_ci mov ip, #1 7662306a36Sopenharmony_ci1: cmp yl, #0x80000000 7762306a36Sopenharmony_ci cmpcc yl, xh 7862306a36Sopenharmony_ci movcc yl, yl, lsl #1 7962306a36Sopenharmony_ci movcc ip, ip, lsl #1 8062306a36Sopenharmony_ci bcc 1b 8162306a36Sopenharmony_ci 8262306a36Sopenharmony_ci#endif 8362306a36Sopenharmony_ci 8462306a36Sopenharmony_ci @ The division loop for needed upper bit positions. 8562306a36Sopenharmony_ci @ Break out early if dividend reaches 0. 8662306a36Sopenharmony_ci2: cmp xh, yl 8762306a36Sopenharmony_ci orrcs yh, yh, ip 8862306a36Sopenharmony_ci subscs xh, xh, yl 8962306a36Sopenharmony_ci movsne ip, ip, lsr #1 9062306a36Sopenharmony_ci mov yl, yl, lsr #1 9162306a36Sopenharmony_ci bne 2b 9262306a36Sopenharmony_ci 9362306a36Sopenharmony_ci @ See if we need to handle lower 32-bit result. 9462306a36Sopenharmony_ci3: cmp xh, #0 9562306a36Sopenharmony_ci mov yl, #0 9662306a36Sopenharmony_ci cmpeq xl, r4 9762306a36Sopenharmony_ci movlo xh, xl 9862306a36Sopenharmony_ci retlo lr 9962306a36Sopenharmony_ci 10062306a36Sopenharmony_ci @ The division loop for lower bit positions. 10162306a36Sopenharmony_ci @ Here we shift remainer bits leftwards rather than moving the 10262306a36Sopenharmony_ci @ divisor for comparisons, considering the carry-out bit as well. 10362306a36Sopenharmony_ci mov ip, #0x80000000 10462306a36Sopenharmony_ci4: movs xl, xl, lsl #1 10562306a36Sopenharmony_ci adcs xh, xh, xh 10662306a36Sopenharmony_ci beq 6f 10762306a36Sopenharmony_ci cmpcc xh, r4 10862306a36Sopenharmony_ci5: orrcs yl, yl, ip 10962306a36Sopenharmony_ci subcs xh, xh, r4 11062306a36Sopenharmony_ci movs ip, ip, lsr #1 11162306a36Sopenharmony_ci bne 4b 11262306a36Sopenharmony_ci ret lr 11362306a36Sopenharmony_ci 11462306a36Sopenharmony_ci @ The top part of remainder became zero. If carry is set 11562306a36Sopenharmony_ci @ (the 33th bit) this is a false positive so resume the loop. 11662306a36Sopenharmony_ci @ Otherwise, if lower part is also null then we are done. 11762306a36Sopenharmony_ci6: bcs 5b 11862306a36Sopenharmony_ci cmp xl, #0 11962306a36Sopenharmony_ci reteq lr 12062306a36Sopenharmony_ci 12162306a36Sopenharmony_ci @ We still have remainer bits in the low part. Bring them up. 12262306a36Sopenharmony_ci 12362306a36Sopenharmony_ci#if __LINUX_ARM_ARCH__ >= 5 12462306a36Sopenharmony_ci 12562306a36Sopenharmony_ci clz xh, xl @ we know xh is zero here so... 12662306a36Sopenharmony_ci add xh, xh, #1 12762306a36Sopenharmony_ci mov xl, xl, lsl xh 12862306a36Sopenharmony_ci mov ip, ip, lsr xh 12962306a36Sopenharmony_ci 13062306a36Sopenharmony_ci#else 13162306a36Sopenharmony_ci 13262306a36Sopenharmony_ci7: movs xl, xl, lsl #1 13362306a36Sopenharmony_ci mov ip, ip, lsr #1 13462306a36Sopenharmony_ci bcc 7b 13562306a36Sopenharmony_ci 13662306a36Sopenharmony_ci#endif 13762306a36Sopenharmony_ci 13862306a36Sopenharmony_ci @ Current remainder is now 1. It is worthless to compare with 13962306a36Sopenharmony_ci @ divisor at this point since divisor can not be smaller than 3 here. 14062306a36Sopenharmony_ci @ If possible, branch for another shift in the division loop. 14162306a36Sopenharmony_ci @ If no bit position left then we are done. 14262306a36Sopenharmony_ci movs ip, ip, lsr #1 14362306a36Sopenharmony_ci mov xh, #1 14462306a36Sopenharmony_ci bne 4b 14562306a36Sopenharmony_ci ret lr 14662306a36Sopenharmony_ci 14762306a36Sopenharmony_ci8: @ Division by a power of 2: determine what that divisor order is 14862306a36Sopenharmony_ci @ then simply shift values around 14962306a36Sopenharmony_ci 15062306a36Sopenharmony_ci#if __LINUX_ARM_ARCH__ >= 5 15162306a36Sopenharmony_ci 15262306a36Sopenharmony_ci clz ip, r4 15362306a36Sopenharmony_ci rsb ip, ip, #31 15462306a36Sopenharmony_ci 15562306a36Sopenharmony_ci#else 15662306a36Sopenharmony_ci 15762306a36Sopenharmony_ci mov yl, r4 15862306a36Sopenharmony_ci cmp r4, #(1 << 16) 15962306a36Sopenharmony_ci mov ip, #0 16062306a36Sopenharmony_ci movhs yl, yl, lsr #16 16162306a36Sopenharmony_ci movhs ip, #16 16262306a36Sopenharmony_ci 16362306a36Sopenharmony_ci cmp yl, #(1 << 8) 16462306a36Sopenharmony_ci movhs yl, yl, lsr #8 16562306a36Sopenharmony_ci addhs ip, ip, #8 16662306a36Sopenharmony_ci 16762306a36Sopenharmony_ci cmp yl, #(1 << 4) 16862306a36Sopenharmony_ci movhs yl, yl, lsr #4 16962306a36Sopenharmony_ci addhs ip, ip, #4 17062306a36Sopenharmony_ci 17162306a36Sopenharmony_ci cmp yl, #(1 << 2) 17262306a36Sopenharmony_ci addhi ip, ip, #3 17362306a36Sopenharmony_ci addls ip, ip, yl, lsr #1 17462306a36Sopenharmony_ci 17562306a36Sopenharmony_ci#endif 17662306a36Sopenharmony_ci 17762306a36Sopenharmony_ci mov yh, xh, lsr ip 17862306a36Sopenharmony_ci mov yl, xl, lsr ip 17962306a36Sopenharmony_ci rsb ip, ip, #32 18062306a36Sopenharmony_ci ARM( orr yl, yl, xh, lsl ip ) 18162306a36Sopenharmony_ci THUMB( lsl xh, xh, ip ) 18262306a36Sopenharmony_ci THUMB( orr yl, yl, xh ) 18362306a36Sopenharmony_ci mov xh, xl, lsl ip 18462306a36Sopenharmony_ci mov xh, xh, lsr ip 18562306a36Sopenharmony_ci ret lr 18662306a36Sopenharmony_ci 18762306a36Sopenharmony_ci @ eq -> division by 1: obvious enough... 18862306a36Sopenharmony_ci9: moveq yl, xl 18962306a36Sopenharmony_ci moveq yh, xh 19062306a36Sopenharmony_ci moveq xh, #0 19162306a36Sopenharmony_ci reteq lr 19262306a36Sopenharmony_ciUNWIND(.fnend) 19362306a36Sopenharmony_ci 19462306a36Sopenharmony_ciUNWIND(.fnstart) 19562306a36Sopenharmony_ciUNWIND(.pad #4) 19662306a36Sopenharmony_ciUNWIND(.save {lr}) 19762306a36Sopenharmony_ciLdiv0_64: 19862306a36Sopenharmony_ci @ Division by 0: 19962306a36Sopenharmony_ci str lr, [sp, #-8]! 20062306a36Sopenharmony_ci bl __div0 20162306a36Sopenharmony_ci 20262306a36Sopenharmony_ci @ as wrong as it could be... 20362306a36Sopenharmony_ci mov yl, #0 20462306a36Sopenharmony_ci mov yh, #0 20562306a36Sopenharmony_ci mov xh, #0 20662306a36Sopenharmony_ci ldr pc, [sp], #8 20762306a36Sopenharmony_ci 20862306a36Sopenharmony_ciUNWIND(.fnend) 20962306a36Sopenharmony_ciENDPROC(__do_div64) 210