18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */ 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * arch/alpha/lib/divide.S 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * (C) 1995 Linus Torvalds 68c2ecf20Sopenharmony_ci * 78c2ecf20Sopenharmony_ci * Alpha division.. 88c2ecf20Sopenharmony_ci */ 98c2ecf20Sopenharmony_ci 108c2ecf20Sopenharmony_ci/* 118c2ecf20Sopenharmony_ci * The alpha chip doesn't provide hardware division, so we have to do it 128c2ecf20Sopenharmony_ci * by hand. The compiler expects the functions 138c2ecf20Sopenharmony_ci * 148c2ecf20Sopenharmony_ci * __divqu: 64-bit unsigned long divide 158c2ecf20Sopenharmony_ci * __remqu: 64-bit unsigned long remainder 168c2ecf20Sopenharmony_ci * __divqs/__remqs: signed 64-bit 178c2ecf20Sopenharmony_ci * __divlu/__remlu: unsigned 32-bit 188c2ecf20Sopenharmony_ci * __divls/__remls: signed 32-bit 198c2ecf20Sopenharmony_ci * 208c2ecf20Sopenharmony_ci * These are not normal C functions: instead of the normal 218c2ecf20Sopenharmony_ci * calling sequence, these expect their arguments in registers 228c2ecf20Sopenharmony_ci * $24 and $25, and return the result in $27. Register $28 may 238c2ecf20Sopenharmony_ci * be clobbered (assembly temporary), anything else must be saved. 248c2ecf20Sopenharmony_ci * 258c2ecf20Sopenharmony_ci * In short: painful. 268c2ecf20Sopenharmony_ci * 278c2ecf20Sopenharmony_ci * This is a rather simple bit-at-a-time algorithm: it's very good 288c2ecf20Sopenharmony_ci * at dividing random 64-bit numbers, but the more usual case where 298c2ecf20Sopenharmony_ci * the divisor is small is handled better by the DEC algorithm 308c2ecf20Sopenharmony_ci * using lookup tables. This uses much less memory, though, and is 318c2ecf20Sopenharmony_ci * nicer on the cache.. Besides, I don't know the copyright status 328c2ecf20Sopenharmony_ci * of the DEC code. 338c2ecf20Sopenharmony_ci */ 348c2ecf20Sopenharmony_ci 358c2ecf20Sopenharmony_ci/* 368c2ecf20Sopenharmony_ci * My temporaries: 378c2ecf20Sopenharmony_ci * $0 - current bit 388c2ecf20Sopenharmony_ci * $1 - shifted divisor 398c2ecf20Sopenharmony_ci * $2 - modulus/quotient 408c2ecf20Sopenharmony_ci * 418c2ecf20Sopenharmony_ci * $23 - return address 428c2ecf20Sopenharmony_ci * $24 - dividend 438c2ecf20Sopenharmony_ci * $25 - divisor 448c2ecf20Sopenharmony_ci * 458c2ecf20Sopenharmony_ci * $27 - quotient/modulus 468c2ecf20Sopenharmony_ci * $28 - compare status 478c2ecf20Sopenharmony_ci */ 488c2ecf20Sopenharmony_ci 498c2ecf20Sopenharmony_ci#include <asm/export.h> 508c2ecf20Sopenharmony_ci#define halt .long 0 518c2ecf20Sopenharmony_ci 528c2ecf20Sopenharmony_ci/* 538c2ecf20Sopenharmony_ci * Select function type and registers 548c2ecf20Sopenharmony_ci */ 558c2ecf20Sopenharmony_ci#define mask $0 568c2ecf20Sopenharmony_ci#define divisor $1 578c2ecf20Sopenharmony_ci#define compare $28 588c2ecf20Sopenharmony_ci#define tmp1 $3 598c2ecf20Sopenharmony_ci#define tmp2 $4 608c2ecf20Sopenharmony_ci 618c2ecf20Sopenharmony_ci#ifdef DIV 628c2ecf20Sopenharmony_ci#define DIV_ONLY(x,y...) x,##y 638c2ecf20Sopenharmony_ci#define MOD_ONLY(x,y...) 648c2ecf20Sopenharmony_ci#define func(x) __div##x 658c2ecf20Sopenharmony_ci#define modulus $2 668c2ecf20Sopenharmony_ci#define quotient $27 678c2ecf20Sopenharmony_ci#define GETSIGN(x) xor $24,$25,x 688c2ecf20Sopenharmony_ci#define STACK 48 698c2ecf20Sopenharmony_ci#else 708c2ecf20Sopenharmony_ci#define DIV_ONLY(x,y...) 718c2ecf20Sopenharmony_ci#define MOD_ONLY(x,y...) x,##y 728c2ecf20Sopenharmony_ci#define func(x) __rem##x 738c2ecf20Sopenharmony_ci#define modulus $27 748c2ecf20Sopenharmony_ci#define quotient $2 758c2ecf20Sopenharmony_ci#define GETSIGN(x) bis $24,$24,x 768c2ecf20Sopenharmony_ci#define STACK 32 778c2ecf20Sopenharmony_ci#endif 788c2ecf20Sopenharmony_ci 798c2ecf20Sopenharmony_ci/* 808c2ecf20Sopenharmony_ci * For 32-bit operations, we need to extend to 64-bit 818c2ecf20Sopenharmony_ci */ 828c2ecf20Sopenharmony_ci#ifdef INTSIZE 838c2ecf20Sopenharmony_ci#define ufunction func(lu) 848c2ecf20Sopenharmony_ci#define sfunction func(l) 858c2ecf20Sopenharmony_ci#define LONGIFY(x) zapnot x,15,x 868c2ecf20Sopenharmony_ci#define SLONGIFY(x) addl x,0,x 878c2ecf20Sopenharmony_ci#else 888c2ecf20Sopenharmony_ci#define ufunction func(qu) 898c2ecf20Sopenharmony_ci#define sfunction func(q) 908c2ecf20Sopenharmony_ci#define LONGIFY(x) 918c2ecf20Sopenharmony_ci#define SLONGIFY(x) 928c2ecf20Sopenharmony_ci#endif 938c2ecf20Sopenharmony_ci 948c2ecf20Sopenharmony_ci.set noat 958c2ecf20Sopenharmony_ci.align 3 968c2ecf20Sopenharmony_ci.globl ufunction 978c2ecf20Sopenharmony_ci.ent ufunction 988c2ecf20Sopenharmony_ciufunction: 998c2ecf20Sopenharmony_ci subq $30,STACK,$30 1008c2ecf20Sopenharmony_ci .frame $30,STACK,$23 1018c2ecf20Sopenharmony_ci .prologue 0 1028c2ecf20Sopenharmony_ci 1038c2ecf20Sopenharmony_ci7: stq $1, 0($30) 1048c2ecf20Sopenharmony_ci bis $25,$25,divisor 1058c2ecf20Sopenharmony_ci stq $2, 8($30) 1068c2ecf20Sopenharmony_ci bis $24,$24,modulus 1078c2ecf20Sopenharmony_ci stq $0,16($30) 1088c2ecf20Sopenharmony_ci bis $31,$31,quotient 1098c2ecf20Sopenharmony_ci LONGIFY(divisor) 1108c2ecf20Sopenharmony_ci stq tmp1,24($30) 1118c2ecf20Sopenharmony_ci LONGIFY(modulus) 1128c2ecf20Sopenharmony_ci bis $31,1,mask 1138c2ecf20Sopenharmony_ci DIV_ONLY(stq tmp2,32($30)) 1148c2ecf20Sopenharmony_ci beq divisor, 9f /* div by zero */ 1158c2ecf20Sopenharmony_ci 1168c2ecf20Sopenharmony_ci#ifdef INTSIZE 1178c2ecf20Sopenharmony_ci /* 1188c2ecf20Sopenharmony_ci * shift divisor left, using 3-bit shifts for 1198c2ecf20Sopenharmony_ci * 32-bit divides as we can't overflow. Three-bit 1208c2ecf20Sopenharmony_ci * shifts will result in looping three times less 1218c2ecf20Sopenharmony_ci * here, but can result in two loops more later. 1228c2ecf20Sopenharmony_ci * Thus using a large shift isn't worth it (and 1238c2ecf20Sopenharmony_ci * s8add pairs better than a sll..) 1248c2ecf20Sopenharmony_ci */ 1258c2ecf20Sopenharmony_ci1: cmpult divisor,modulus,compare 1268c2ecf20Sopenharmony_ci s8addq divisor,$31,divisor 1278c2ecf20Sopenharmony_ci s8addq mask,$31,mask 1288c2ecf20Sopenharmony_ci bne compare,1b 1298c2ecf20Sopenharmony_ci#else 1308c2ecf20Sopenharmony_ci1: cmpult divisor,modulus,compare 1318c2ecf20Sopenharmony_ci blt divisor, 2f 1328c2ecf20Sopenharmony_ci addq divisor,divisor,divisor 1338c2ecf20Sopenharmony_ci addq mask,mask,mask 1348c2ecf20Sopenharmony_ci bne compare,1b 1358c2ecf20Sopenharmony_ci unop 1368c2ecf20Sopenharmony_ci#endif 1378c2ecf20Sopenharmony_ci 1388c2ecf20Sopenharmony_ci /* ok, start to go right again.. */ 1398c2ecf20Sopenharmony_ci2: DIV_ONLY(addq quotient,mask,tmp2) 1408c2ecf20Sopenharmony_ci srl mask,1,mask 1418c2ecf20Sopenharmony_ci cmpule divisor,modulus,compare 1428c2ecf20Sopenharmony_ci subq modulus,divisor,tmp1 1438c2ecf20Sopenharmony_ci DIV_ONLY(cmovne compare,tmp2,quotient) 1448c2ecf20Sopenharmony_ci srl divisor,1,divisor 1458c2ecf20Sopenharmony_ci cmovne compare,tmp1,modulus 1468c2ecf20Sopenharmony_ci bne mask,2b 1478c2ecf20Sopenharmony_ci 1488c2ecf20Sopenharmony_ci9: ldq $1, 0($30) 1498c2ecf20Sopenharmony_ci ldq $2, 8($30) 1508c2ecf20Sopenharmony_ci ldq $0,16($30) 1518c2ecf20Sopenharmony_ci ldq tmp1,24($30) 1528c2ecf20Sopenharmony_ci DIV_ONLY(ldq tmp2,32($30)) 1538c2ecf20Sopenharmony_ci addq $30,STACK,$30 1548c2ecf20Sopenharmony_ci ret $31,($23),1 1558c2ecf20Sopenharmony_ci .end ufunction 1568c2ecf20Sopenharmony_ciEXPORT_SYMBOL(ufunction) 1578c2ecf20Sopenharmony_ci 1588c2ecf20Sopenharmony_ci/* 1598c2ecf20Sopenharmony_ci * Uhh.. Ugly signed division. I'd rather not have it at all, but 1608c2ecf20Sopenharmony_ci * it's needed in some circumstances. There are different ways to 1618c2ecf20Sopenharmony_ci * handle this, really. This does: 1628c2ecf20Sopenharmony_ci * -a / b = a / -b = -(a / b) 1638c2ecf20Sopenharmony_ci * -a % b = -(a % b) 1648c2ecf20Sopenharmony_ci * a % -b = a % b 1658c2ecf20Sopenharmony_ci * which is probably not the best solution, but at least should 1668c2ecf20Sopenharmony_ci * have the property that (x/y)*y + (x%y) = x. 1678c2ecf20Sopenharmony_ci */ 1688c2ecf20Sopenharmony_ci.align 3 1698c2ecf20Sopenharmony_ci.globl sfunction 1708c2ecf20Sopenharmony_ci.ent sfunction 1718c2ecf20Sopenharmony_cisfunction: 1728c2ecf20Sopenharmony_ci subq $30,STACK,$30 1738c2ecf20Sopenharmony_ci .frame $30,STACK,$23 1748c2ecf20Sopenharmony_ci .prologue 0 1758c2ecf20Sopenharmony_ci bis $24,$25,$28 1768c2ecf20Sopenharmony_ci SLONGIFY($28) 1778c2ecf20Sopenharmony_ci bge $28,7b 1788c2ecf20Sopenharmony_ci stq $24,0($30) 1798c2ecf20Sopenharmony_ci subq $31,$24,$28 1808c2ecf20Sopenharmony_ci stq $25,8($30) 1818c2ecf20Sopenharmony_ci cmovlt $24,$28,$24 /* abs($24) */ 1828c2ecf20Sopenharmony_ci stq $23,16($30) 1838c2ecf20Sopenharmony_ci subq $31,$25,$28 1848c2ecf20Sopenharmony_ci stq tmp1,24($30) 1858c2ecf20Sopenharmony_ci cmovlt $25,$28,$25 /* abs($25) */ 1868c2ecf20Sopenharmony_ci unop 1878c2ecf20Sopenharmony_ci bsr $23,ufunction 1888c2ecf20Sopenharmony_ci ldq $24,0($30) 1898c2ecf20Sopenharmony_ci ldq $25,8($30) 1908c2ecf20Sopenharmony_ci GETSIGN($28) 1918c2ecf20Sopenharmony_ci subq $31,$27,tmp1 1928c2ecf20Sopenharmony_ci SLONGIFY($28) 1938c2ecf20Sopenharmony_ci ldq $23,16($30) 1948c2ecf20Sopenharmony_ci cmovlt $28,tmp1,$27 1958c2ecf20Sopenharmony_ci ldq tmp1,24($30) 1968c2ecf20Sopenharmony_ci addq $30,STACK,$30 1978c2ecf20Sopenharmony_ci ret $31,($23),1 1988c2ecf20Sopenharmony_ci .end sfunction 1998c2ecf20Sopenharmony_ciEXPORT_SYMBOL(sfunction) 200