18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */ 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * linux/arch/arm/lib/div64.S 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * Optimized computation of 64-bit dividend / 32-bit divisor 68c2ecf20Sopenharmony_ci * 78c2ecf20Sopenharmony_ci * Author: Nicolas Pitre 88c2ecf20Sopenharmony_ci * Created: Oct 5, 2003 98c2ecf20Sopenharmony_ci * Copyright: Monta Vista Software, Inc. 108c2ecf20Sopenharmony_ci */ 118c2ecf20Sopenharmony_ci 128c2ecf20Sopenharmony_ci#include <linux/linkage.h> 138c2ecf20Sopenharmony_ci#include <asm/assembler.h> 148c2ecf20Sopenharmony_ci#include <asm/unwind.h> 158c2ecf20Sopenharmony_ci 168c2ecf20Sopenharmony_ci#ifdef __ARMEB__ 178c2ecf20Sopenharmony_ci#define xh r0 188c2ecf20Sopenharmony_ci#define xl r1 198c2ecf20Sopenharmony_ci#define yh r2 208c2ecf20Sopenharmony_ci#define yl r3 218c2ecf20Sopenharmony_ci#else 228c2ecf20Sopenharmony_ci#define xl r0 238c2ecf20Sopenharmony_ci#define xh r1 248c2ecf20Sopenharmony_ci#define yl r2 258c2ecf20Sopenharmony_ci#define yh r3 268c2ecf20Sopenharmony_ci#endif 278c2ecf20Sopenharmony_ci 288c2ecf20Sopenharmony_ci/* 298c2ecf20Sopenharmony_ci * __do_div64: perform a division with 64-bit dividend and 32-bit divisor. 308c2ecf20Sopenharmony_ci * 318c2ecf20Sopenharmony_ci * Note: Calling convention is totally non standard for optimal code. 328c2ecf20Sopenharmony_ci * This is meant to be used by do_div() from include/asm/div64.h only. 338c2ecf20Sopenharmony_ci * 348c2ecf20Sopenharmony_ci * Input parameters: 358c2ecf20Sopenharmony_ci * xh-xl = dividend (clobbered) 368c2ecf20Sopenharmony_ci * r4 = divisor (preserved) 378c2ecf20Sopenharmony_ci * 388c2ecf20Sopenharmony_ci * Output values: 398c2ecf20Sopenharmony_ci * yh-yl = result 408c2ecf20Sopenharmony_ci * xh = remainder 418c2ecf20Sopenharmony_ci * 428c2ecf20Sopenharmony_ci * Clobbered regs: xl, ip 438c2ecf20Sopenharmony_ci */ 448c2ecf20Sopenharmony_ci 458c2ecf20Sopenharmony_ciENTRY(__do_div64) 468c2ecf20Sopenharmony_ciUNWIND(.fnstart) 478c2ecf20Sopenharmony_ci 488c2ecf20Sopenharmony_ci @ Test for easy paths first. 498c2ecf20Sopenharmony_ci subs ip, r4, #1 508c2ecf20Sopenharmony_ci bls 9f @ divisor is 0 or 1 518c2ecf20Sopenharmony_ci tst ip, r4 528c2ecf20Sopenharmony_ci beq 8f @ divisor is power of 2 538c2ecf20Sopenharmony_ci 548c2ecf20Sopenharmony_ci @ See if we need to handle upper 32-bit result. 558c2ecf20Sopenharmony_ci cmp xh, r4 568c2ecf20Sopenharmony_ci mov yh, #0 578c2ecf20Sopenharmony_ci blo 3f 588c2ecf20Sopenharmony_ci 598c2ecf20Sopenharmony_ci @ Align divisor with upper part of dividend. 608c2ecf20Sopenharmony_ci @ The aligned divisor is stored in yl preserving the original. 618c2ecf20Sopenharmony_ci @ The bit position is stored in ip. 628c2ecf20Sopenharmony_ci 638c2ecf20Sopenharmony_ci#if __LINUX_ARM_ARCH__ >= 5 648c2ecf20Sopenharmony_ci 658c2ecf20Sopenharmony_ci clz yl, r4 668c2ecf20Sopenharmony_ci clz ip, xh 678c2ecf20Sopenharmony_ci sub yl, yl, ip 688c2ecf20Sopenharmony_ci mov ip, #1 698c2ecf20Sopenharmony_ci mov ip, ip, lsl yl 708c2ecf20Sopenharmony_ci mov yl, r4, lsl yl 718c2ecf20Sopenharmony_ci 728c2ecf20Sopenharmony_ci#else 738c2ecf20Sopenharmony_ci 748c2ecf20Sopenharmony_ci mov yl, r4 758c2ecf20Sopenharmony_ci mov ip, #1 768c2ecf20Sopenharmony_ci1: cmp yl, #0x80000000 778c2ecf20Sopenharmony_ci cmpcc yl, xh 788c2ecf20Sopenharmony_ci movcc yl, yl, lsl #1 798c2ecf20Sopenharmony_ci movcc ip, ip, lsl #1 808c2ecf20Sopenharmony_ci bcc 1b 818c2ecf20Sopenharmony_ci 828c2ecf20Sopenharmony_ci#endif 838c2ecf20Sopenharmony_ci 848c2ecf20Sopenharmony_ci @ The division loop for needed upper bit positions. 858c2ecf20Sopenharmony_ci @ Break out early if dividend reaches 0. 868c2ecf20Sopenharmony_ci2: cmp xh, yl 878c2ecf20Sopenharmony_ci orrcs yh, yh, ip 888c2ecf20Sopenharmony_ci subscs xh, xh, yl 898c2ecf20Sopenharmony_ci movsne ip, ip, lsr #1 908c2ecf20Sopenharmony_ci mov yl, yl, lsr #1 918c2ecf20Sopenharmony_ci bne 2b 928c2ecf20Sopenharmony_ci 938c2ecf20Sopenharmony_ci @ See if we need to handle lower 32-bit result. 948c2ecf20Sopenharmony_ci3: cmp xh, #0 958c2ecf20Sopenharmony_ci mov yl, #0 968c2ecf20Sopenharmony_ci cmpeq xl, r4 978c2ecf20Sopenharmony_ci movlo xh, xl 988c2ecf20Sopenharmony_ci retlo lr 998c2ecf20Sopenharmony_ci 1008c2ecf20Sopenharmony_ci @ The division loop for lower bit positions. 1018c2ecf20Sopenharmony_ci @ Here we shift remainer bits leftwards rather than moving the 1028c2ecf20Sopenharmony_ci @ divisor for comparisons, considering the carry-out bit as well. 1038c2ecf20Sopenharmony_ci mov ip, #0x80000000 1048c2ecf20Sopenharmony_ci4: movs xl, xl, lsl #1 1058c2ecf20Sopenharmony_ci adcs xh, xh, xh 1068c2ecf20Sopenharmony_ci beq 6f 1078c2ecf20Sopenharmony_ci cmpcc xh, r4 1088c2ecf20Sopenharmony_ci5: orrcs yl, yl, ip 1098c2ecf20Sopenharmony_ci subcs xh, xh, r4 1108c2ecf20Sopenharmony_ci movs ip, ip, lsr #1 1118c2ecf20Sopenharmony_ci bne 4b 1128c2ecf20Sopenharmony_ci ret lr 1138c2ecf20Sopenharmony_ci 1148c2ecf20Sopenharmony_ci @ The top part of remainder became zero. If carry is set 1158c2ecf20Sopenharmony_ci @ (the 33th bit) this is a false positive so resume the loop. 1168c2ecf20Sopenharmony_ci @ Otherwise, if lower part is also null then we are done. 1178c2ecf20Sopenharmony_ci6: bcs 5b 1188c2ecf20Sopenharmony_ci cmp xl, #0 1198c2ecf20Sopenharmony_ci reteq lr 1208c2ecf20Sopenharmony_ci 1218c2ecf20Sopenharmony_ci @ We still have remainer bits in the low part. Bring them up. 1228c2ecf20Sopenharmony_ci 1238c2ecf20Sopenharmony_ci#if __LINUX_ARM_ARCH__ >= 5 1248c2ecf20Sopenharmony_ci 1258c2ecf20Sopenharmony_ci clz xh, xl @ we know xh is zero here so... 1268c2ecf20Sopenharmony_ci add xh, xh, #1 1278c2ecf20Sopenharmony_ci mov xl, xl, lsl xh 1288c2ecf20Sopenharmony_ci mov ip, ip, lsr xh 1298c2ecf20Sopenharmony_ci 1308c2ecf20Sopenharmony_ci#else 1318c2ecf20Sopenharmony_ci 1328c2ecf20Sopenharmony_ci7: movs xl, xl, lsl #1 1338c2ecf20Sopenharmony_ci mov ip, ip, lsr #1 1348c2ecf20Sopenharmony_ci bcc 7b 1358c2ecf20Sopenharmony_ci 1368c2ecf20Sopenharmony_ci#endif 1378c2ecf20Sopenharmony_ci 1388c2ecf20Sopenharmony_ci @ Current remainder is now 1. It is worthless to compare with 1398c2ecf20Sopenharmony_ci @ divisor at this point since divisor can not be smaller than 3 here. 1408c2ecf20Sopenharmony_ci @ If possible, branch for another shift in the division loop. 1418c2ecf20Sopenharmony_ci @ If no bit position left then we are done. 1428c2ecf20Sopenharmony_ci movs ip, ip, lsr #1 1438c2ecf20Sopenharmony_ci mov xh, #1 1448c2ecf20Sopenharmony_ci bne 4b 1458c2ecf20Sopenharmony_ci ret lr 1468c2ecf20Sopenharmony_ci 1478c2ecf20Sopenharmony_ci8: @ Division by a power of 2: determine what that divisor order is 1488c2ecf20Sopenharmony_ci @ then simply shift values around 1498c2ecf20Sopenharmony_ci 1508c2ecf20Sopenharmony_ci#if __LINUX_ARM_ARCH__ >= 5 1518c2ecf20Sopenharmony_ci 1528c2ecf20Sopenharmony_ci clz ip, r4 1538c2ecf20Sopenharmony_ci rsb ip, ip, #31 1548c2ecf20Sopenharmony_ci 1558c2ecf20Sopenharmony_ci#else 1568c2ecf20Sopenharmony_ci 1578c2ecf20Sopenharmony_ci mov yl, r4 1588c2ecf20Sopenharmony_ci cmp r4, #(1 << 16) 1598c2ecf20Sopenharmony_ci mov ip, #0 1608c2ecf20Sopenharmony_ci movhs yl, yl, lsr #16 1618c2ecf20Sopenharmony_ci movhs ip, #16 1628c2ecf20Sopenharmony_ci 1638c2ecf20Sopenharmony_ci cmp yl, #(1 << 8) 1648c2ecf20Sopenharmony_ci movhs yl, yl, lsr #8 1658c2ecf20Sopenharmony_ci addhs ip, ip, #8 1668c2ecf20Sopenharmony_ci 1678c2ecf20Sopenharmony_ci cmp yl, #(1 << 4) 1688c2ecf20Sopenharmony_ci movhs yl, yl, lsr #4 1698c2ecf20Sopenharmony_ci addhs ip, ip, #4 1708c2ecf20Sopenharmony_ci 1718c2ecf20Sopenharmony_ci cmp yl, #(1 << 2) 1728c2ecf20Sopenharmony_ci addhi ip, ip, #3 1738c2ecf20Sopenharmony_ci addls ip, ip, yl, lsr #1 1748c2ecf20Sopenharmony_ci 1758c2ecf20Sopenharmony_ci#endif 1768c2ecf20Sopenharmony_ci 1778c2ecf20Sopenharmony_ci mov yh, xh, lsr ip 1788c2ecf20Sopenharmony_ci mov yl, xl, lsr ip 1798c2ecf20Sopenharmony_ci rsb ip, ip, #32 1808c2ecf20Sopenharmony_ci ARM( orr yl, yl, xh, lsl ip ) 1818c2ecf20Sopenharmony_ci THUMB( lsl xh, xh, ip ) 1828c2ecf20Sopenharmony_ci THUMB( orr yl, yl, xh ) 1838c2ecf20Sopenharmony_ci mov xh, xl, lsl ip 1848c2ecf20Sopenharmony_ci mov xh, xh, lsr ip 1858c2ecf20Sopenharmony_ci ret lr 1868c2ecf20Sopenharmony_ci 1878c2ecf20Sopenharmony_ci @ eq -> division by 1: obvious enough... 1888c2ecf20Sopenharmony_ci9: moveq yl, xl 1898c2ecf20Sopenharmony_ci moveq yh, xh 1908c2ecf20Sopenharmony_ci moveq xh, #0 1918c2ecf20Sopenharmony_ci reteq lr 1928c2ecf20Sopenharmony_ciUNWIND(.fnend) 1938c2ecf20Sopenharmony_ci 1948c2ecf20Sopenharmony_ciUNWIND(.fnstart) 1958c2ecf20Sopenharmony_ciUNWIND(.pad #4) 1968c2ecf20Sopenharmony_ciUNWIND(.save {lr}) 1978c2ecf20Sopenharmony_ciLdiv0_64: 1988c2ecf20Sopenharmony_ci @ Division by 0: 1998c2ecf20Sopenharmony_ci str lr, [sp, #-8]! 2008c2ecf20Sopenharmony_ci bl __div0 2018c2ecf20Sopenharmony_ci 2028c2ecf20Sopenharmony_ci @ as wrong as it could be... 2038c2ecf20Sopenharmony_ci mov yl, #0 2048c2ecf20Sopenharmony_ci mov yh, #0 2058c2ecf20Sopenharmony_ci mov xh, #0 2068c2ecf20Sopenharmony_ci ldr pc, [sp], #8 2078c2ecf20Sopenharmony_ci 2088c2ecf20Sopenharmony_ciUNWIND(.fnend) 2098c2ecf20Sopenharmony_ciENDPROC(__do_div64) 210