162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */ 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * arch/alpha/lib/divide.S 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * (C) 1995 Linus Torvalds 662306a36Sopenharmony_ci * 762306a36Sopenharmony_ci * Alpha division.. 862306a36Sopenharmony_ci */ 962306a36Sopenharmony_ci 1062306a36Sopenharmony_ci/* 1162306a36Sopenharmony_ci * The alpha chip doesn't provide hardware division, so we have to do it 1262306a36Sopenharmony_ci * by hand. The compiler expects the functions 1362306a36Sopenharmony_ci * 1462306a36Sopenharmony_ci * __divqu: 64-bit unsigned long divide 1562306a36Sopenharmony_ci * __remqu: 64-bit unsigned long remainder 1662306a36Sopenharmony_ci * __divqs/__remqs: signed 64-bit 1762306a36Sopenharmony_ci * __divlu/__remlu: unsigned 32-bit 1862306a36Sopenharmony_ci * __divls/__remls: signed 32-bit 1962306a36Sopenharmony_ci * 2062306a36Sopenharmony_ci * These are not normal C functions: instead of the normal 2162306a36Sopenharmony_ci * calling sequence, these expect their arguments in registers 2262306a36Sopenharmony_ci * $24 and $25, and return the result in $27. Register $28 may 2362306a36Sopenharmony_ci * be clobbered (assembly temporary), anything else must be saved. 2462306a36Sopenharmony_ci * 2562306a36Sopenharmony_ci * In short: painful. 2662306a36Sopenharmony_ci * 2762306a36Sopenharmony_ci * This is a rather simple bit-at-a-time algorithm: it's very good 2862306a36Sopenharmony_ci * at dividing random 64-bit numbers, but the more usual case where 2962306a36Sopenharmony_ci * the divisor is small is handled better by the DEC algorithm 3062306a36Sopenharmony_ci * using lookup tables. This uses much less memory, though, and is 3162306a36Sopenharmony_ci * nicer on the cache.. Besides, I don't know the copyright status 3262306a36Sopenharmony_ci * of the DEC code. 3362306a36Sopenharmony_ci */ 3462306a36Sopenharmony_ci 3562306a36Sopenharmony_ci/* 3662306a36Sopenharmony_ci * My temporaries: 3762306a36Sopenharmony_ci * $0 - current bit 3862306a36Sopenharmony_ci * $1 - shifted divisor 3962306a36Sopenharmony_ci * $2 - modulus/quotient 4062306a36Sopenharmony_ci * 4162306a36Sopenharmony_ci * $23 - return address 4262306a36Sopenharmony_ci * $24 - dividend 4362306a36Sopenharmony_ci * $25 - divisor 4462306a36Sopenharmony_ci * 4562306a36Sopenharmony_ci * $27 - quotient/modulus 4662306a36Sopenharmony_ci * $28 - compare status 4762306a36Sopenharmony_ci */ 4862306a36Sopenharmony_ci 4962306a36Sopenharmony_ci#include <linux/export.h> 5062306a36Sopenharmony_ci#define halt .long 0 5162306a36Sopenharmony_ci 5262306a36Sopenharmony_ci/* 5362306a36Sopenharmony_ci * Select function type and registers 5462306a36Sopenharmony_ci */ 5562306a36Sopenharmony_ci#define mask $0 5662306a36Sopenharmony_ci#define divisor $1 5762306a36Sopenharmony_ci#define compare $28 5862306a36Sopenharmony_ci#define tmp1 $3 5962306a36Sopenharmony_ci#define tmp2 $4 6062306a36Sopenharmony_ci 6162306a36Sopenharmony_ci#ifdef DIV 6262306a36Sopenharmony_ci#define DIV_ONLY(x,y...) x,##y 6362306a36Sopenharmony_ci#define MOD_ONLY(x,y...) 6462306a36Sopenharmony_ci#define func(x) __div##x 6562306a36Sopenharmony_ci#define modulus $2 6662306a36Sopenharmony_ci#define quotient $27 6762306a36Sopenharmony_ci#define GETSIGN(x) xor $24,$25,x 6862306a36Sopenharmony_ci#define STACK 48 6962306a36Sopenharmony_ci#else 7062306a36Sopenharmony_ci#define DIV_ONLY(x,y...) 7162306a36Sopenharmony_ci#define MOD_ONLY(x,y...) x,##y 7262306a36Sopenharmony_ci#define func(x) __rem##x 7362306a36Sopenharmony_ci#define modulus $27 7462306a36Sopenharmony_ci#define quotient $2 7562306a36Sopenharmony_ci#define GETSIGN(x) bis $24,$24,x 7662306a36Sopenharmony_ci#define STACK 32 7762306a36Sopenharmony_ci#endif 7862306a36Sopenharmony_ci 7962306a36Sopenharmony_ci/* 8062306a36Sopenharmony_ci * For 32-bit operations, we need to extend to 64-bit 8162306a36Sopenharmony_ci */ 8262306a36Sopenharmony_ci#ifdef INTSIZE 8362306a36Sopenharmony_ci#define ufunction func(lu) 8462306a36Sopenharmony_ci#define sfunction func(l) 8562306a36Sopenharmony_ci#define LONGIFY(x) zapnot x,15,x 8662306a36Sopenharmony_ci#define SLONGIFY(x) addl x,0,x 8762306a36Sopenharmony_ci#else 8862306a36Sopenharmony_ci#define ufunction func(qu) 8962306a36Sopenharmony_ci#define sfunction func(q) 9062306a36Sopenharmony_ci#define LONGIFY(x) 9162306a36Sopenharmony_ci#define SLONGIFY(x) 9262306a36Sopenharmony_ci#endif 9362306a36Sopenharmony_ci 9462306a36Sopenharmony_ci.set noat 9562306a36Sopenharmony_ci.align 3 9662306a36Sopenharmony_ci.globl ufunction 9762306a36Sopenharmony_ci.ent ufunction 9862306a36Sopenharmony_ciufunction: 9962306a36Sopenharmony_ci subq $30,STACK,$30 10062306a36Sopenharmony_ci .frame $30,STACK,$23 10162306a36Sopenharmony_ci .prologue 0 10262306a36Sopenharmony_ci 10362306a36Sopenharmony_ci7: stq $1, 0($30) 10462306a36Sopenharmony_ci bis $25,$25,divisor 10562306a36Sopenharmony_ci stq $2, 8($30) 10662306a36Sopenharmony_ci bis $24,$24,modulus 10762306a36Sopenharmony_ci stq $0,16($30) 10862306a36Sopenharmony_ci bis $31,$31,quotient 10962306a36Sopenharmony_ci LONGIFY(divisor) 11062306a36Sopenharmony_ci stq tmp1,24($30) 11162306a36Sopenharmony_ci LONGIFY(modulus) 11262306a36Sopenharmony_ci bis $31,1,mask 11362306a36Sopenharmony_ci DIV_ONLY(stq tmp2,32($30)) 11462306a36Sopenharmony_ci beq divisor, 9f /* div by zero */ 11562306a36Sopenharmony_ci 11662306a36Sopenharmony_ci#ifdef INTSIZE 11762306a36Sopenharmony_ci /* 11862306a36Sopenharmony_ci * shift divisor left, using 3-bit shifts for 11962306a36Sopenharmony_ci * 32-bit divides as we can't overflow. Three-bit 12062306a36Sopenharmony_ci * shifts will result in looping three times less 12162306a36Sopenharmony_ci * here, but can result in two loops more later. 12262306a36Sopenharmony_ci * Thus using a large shift isn't worth it (and 12362306a36Sopenharmony_ci * s8add pairs better than a sll..) 12462306a36Sopenharmony_ci */ 12562306a36Sopenharmony_ci1: cmpult divisor,modulus,compare 12662306a36Sopenharmony_ci s8addq divisor,$31,divisor 12762306a36Sopenharmony_ci s8addq mask,$31,mask 12862306a36Sopenharmony_ci bne compare,1b 12962306a36Sopenharmony_ci#else 13062306a36Sopenharmony_ci1: cmpult divisor,modulus,compare 13162306a36Sopenharmony_ci blt divisor, 2f 13262306a36Sopenharmony_ci addq divisor,divisor,divisor 13362306a36Sopenharmony_ci addq mask,mask,mask 13462306a36Sopenharmony_ci bne compare,1b 13562306a36Sopenharmony_ci unop 13662306a36Sopenharmony_ci#endif 13762306a36Sopenharmony_ci 13862306a36Sopenharmony_ci /* ok, start to go right again.. */ 13962306a36Sopenharmony_ci2: DIV_ONLY(addq quotient,mask,tmp2) 14062306a36Sopenharmony_ci srl mask,1,mask 14162306a36Sopenharmony_ci cmpule divisor,modulus,compare 14262306a36Sopenharmony_ci subq modulus,divisor,tmp1 14362306a36Sopenharmony_ci DIV_ONLY(cmovne compare,tmp2,quotient) 14462306a36Sopenharmony_ci srl divisor,1,divisor 14562306a36Sopenharmony_ci cmovne compare,tmp1,modulus 14662306a36Sopenharmony_ci bne mask,2b 14762306a36Sopenharmony_ci 14862306a36Sopenharmony_ci9: ldq $1, 0($30) 14962306a36Sopenharmony_ci ldq $2, 8($30) 15062306a36Sopenharmony_ci ldq $0,16($30) 15162306a36Sopenharmony_ci ldq tmp1,24($30) 15262306a36Sopenharmony_ci DIV_ONLY(ldq tmp2,32($30)) 15362306a36Sopenharmony_ci addq $30,STACK,$30 15462306a36Sopenharmony_ci ret $31,($23),1 15562306a36Sopenharmony_ci .end ufunction 15662306a36Sopenharmony_ciEXPORT_SYMBOL(ufunction) 15762306a36Sopenharmony_ci 15862306a36Sopenharmony_ci/* 15962306a36Sopenharmony_ci * Uhh.. Ugly signed division. I'd rather not have it at all, but 16062306a36Sopenharmony_ci * it's needed in some circumstances. There are different ways to 16162306a36Sopenharmony_ci * handle this, really. This does: 16262306a36Sopenharmony_ci * -a / b = a / -b = -(a / b) 16362306a36Sopenharmony_ci * -a % b = -(a % b) 16462306a36Sopenharmony_ci * a % -b = a % b 16562306a36Sopenharmony_ci * which is probably not the best solution, but at least should 16662306a36Sopenharmony_ci * have the property that (x/y)*y + (x%y) = x. 16762306a36Sopenharmony_ci */ 16862306a36Sopenharmony_ci.align 3 16962306a36Sopenharmony_ci.globl sfunction 17062306a36Sopenharmony_ci.ent sfunction 17162306a36Sopenharmony_cisfunction: 17262306a36Sopenharmony_ci subq $30,STACK,$30 17362306a36Sopenharmony_ci .frame $30,STACK,$23 17462306a36Sopenharmony_ci .prologue 0 17562306a36Sopenharmony_ci bis $24,$25,$28 17662306a36Sopenharmony_ci SLONGIFY($28) 17762306a36Sopenharmony_ci bge $28,7b 17862306a36Sopenharmony_ci stq $24,0($30) 17962306a36Sopenharmony_ci subq $31,$24,$28 18062306a36Sopenharmony_ci stq $25,8($30) 18162306a36Sopenharmony_ci cmovlt $24,$28,$24 /* abs($24) */ 18262306a36Sopenharmony_ci stq $23,16($30) 18362306a36Sopenharmony_ci subq $31,$25,$28 18462306a36Sopenharmony_ci stq tmp1,24($30) 18562306a36Sopenharmony_ci cmovlt $25,$28,$25 /* abs($25) */ 18662306a36Sopenharmony_ci unop 18762306a36Sopenharmony_ci bsr $23,ufunction 18862306a36Sopenharmony_ci ldq $24,0($30) 18962306a36Sopenharmony_ci ldq $25,8($30) 19062306a36Sopenharmony_ci GETSIGN($28) 19162306a36Sopenharmony_ci subq $31,$27,tmp1 19262306a36Sopenharmony_ci SLONGIFY($28) 19362306a36Sopenharmony_ci ldq $23,16($30) 19462306a36Sopenharmony_ci cmovlt $28,tmp1,$27 19562306a36Sopenharmony_ci ldq tmp1,24($30) 19662306a36Sopenharmony_ci addq $30,STACK,$30 19762306a36Sopenharmony_ci ret $31,($23),1 19862306a36Sopenharmony_ci .end sfunction 19962306a36Sopenharmony_ciEXPORT_SYMBOL(sfunction) 200