162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-or-later */
262306a36Sopenharmony_ci/* multi_arith.h: multi-precision integer arithmetic functions, needed
362306a36Sopenharmony_ci   to do extended-precision floating point.
462306a36Sopenharmony_ci
562306a36Sopenharmony_ci   (c) 1998 David Huggins-Daines.
662306a36Sopenharmony_ci
762306a36Sopenharmony_ci   Somewhat based on arch/alpha/math-emu/ieee-math.c, which is (c)
862306a36Sopenharmony_ci   David Mosberger-Tang.
962306a36Sopenharmony_ci
1062306a36Sopenharmony_ci */
1162306a36Sopenharmony_ci
1262306a36Sopenharmony_ci/* Note:
1362306a36Sopenharmony_ci
1462306a36Sopenharmony_ci   These are not general multi-precision math routines.  Rather, they
1562306a36Sopenharmony_ci   implement the subset of integer arithmetic that we need in order to
1662306a36Sopenharmony_ci   multiply, divide, and normalize 128-bit unsigned mantissae.  */
1762306a36Sopenharmony_ci
1862306a36Sopenharmony_ci#ifndef MULTI_ARITH_H
1962306a36Sopenharmony_ci#define MULTI_ARITH_H
2062306a36Sopenharmony_ci
2162306a36Sopenharmony_cistatic inline void fp_denormalize(struct fp_ext *reg, unsigned int cnt)
2262306a36Sopenharmony_ci{
2362306a36Sopenharmony_ci	reg->exp += cnt;
2462306a36Sopenharmony_ci
2562306a36Sopenharmony_ci	switch (cnt) {
2662306a36Sopenharmony_ci	case 0 ... 8:
2762306a36Sopenharmony_ci		reg->lowmant = reg->mant.m32[1] << (8 - cnt);
2862306a36Sopenharmony_ci		reg->mant.m32[1] = (reg->mant.m32[1] >> cnt) |
2962306a36Sopenharmony_ci				   (reg->mant.m32[0] << (32 - cnt));
3062306a36Sopenharmony_ci		reg->mant.m32[0] = reg->mant.m32[0] >> cnt;
3162306a36Sopenharmony_ci		break;
3262306a36Sopenharmony_ci	case 9 ... 32:
3362306a36Sopenharmony_ci		reg->lowmant = reg->mant.m32[1] >> (cnt - 8);
3462306a36Sopenharmony_ci		if (reg->mant.m32[1] << (40 - cnt))
3562306a36Sopenharmony_ci			reg->lowmant |= 1;
3662306a36Sopenharmony_ci		reg->mant.m32[1] = (reg->mant.m32[1] >> cnt) |
3762306a36Sopenharmony_ci				   (reg->mant.m32[0] << (32 - cnt));
3862306a36Sopenharmony_ci		reg->mant.m32[0] = reg->mant.m32[0] >> cnt;
3962306a36Sopenharmony_ci		break;
4062306a36Sopenharmony_ci	case 33 ... 39:
4162306a36Sopenharmony_ci		asm volatile ("bfextu %1{%2,#8},%0" : "=d" (reg->lowmant)
4262306a36Sopenharmony_ci			: "m" (reg->mant.m32[0]), "d" (64 - cnt));
4362306a36Sopenharmony_ci		if (reg->mant.m32[1] << (40 - cnt))
4462306a36Sopenharmony_ci			reg->lowmant |= 1;
4562306a36Sopenharmony_ci		reg->mant.m32[1] = reg->mant.m32[0] >> (cnt - 32);
4662306a36Sopenharmony_ci		reg->mant.m32[0] = 0;
4762306a36Sopenharmony_ci		break;
4862306a36Sopenharmony_ci	case 40 ... 71:
4962306a36Sopenharmony_ci		reg->lowmant = reg->mant.m32[0] >> (cnt - 40);
5062306a36Sopenharmony_ci		if ((reg->mant.m32[0] << (72 - cnt)) || reg->mant.m32[1])
5162306a36Sopenharmony_ci			reg->lowmant |= 1;
5262306a36Sopenharmony_ci		reg->mant.m32[1] = reg->mant.m32[0] >> (cnt - 32);
5362306a36Sopenharmony_ci		reg->mant.m32[0] = 0;
5462306a36Sopenharmony_ci		break;
5562306a36Sopenharmony_ci	default:
5662306a36Sopenharmony_ci		reg->lowmant = reg->mant.m32[0] || reg->mant.m32[1];
5762306a36Sopenharmony_ci		reg->mant.m32[0] = 0;
5862306a36Sopenharmony_ci		reg->mant.m32[1] = 0;
5962306a36Sopenharmony_ci		break;
6062306a36Sopenharmony_ci	}
6162306a36Sopenharmony_ci}
6262306a36Sopenharmony_ci
6362306a36Sopenharmony_cistatic inline int fp_overnormalize(struct fp_ext *reg)
6462306a36Sopenharmony_ci{
6562306a36Sopenharmony_ci	int shift;
6662306a36Sopenharmony_ci
6762306a36Sopenharmony_ci	if (reg->mant.m32[0]) {
6862306a36Sopenharmony_ci		asm ("bfffo %1{#0,#32},%0" : "=d" (shift) : "dm" (reg->mant.m32[0]));
6962306a36Sopenharmony_ci		reg->mant.m32[0] = (reg->mant.m32[0] << shift) | (reg->mant.m32[1] >> (32 - shift));
7062306a36Sopenharmony_ci		reg->mant.m32[1] = (reg->mant.m32[1] << shift);
7162306a36Sopenharmony_ci	} else {
7262306a36Sopenharmony_ci		asm ("bfffo %1{#0,#32},%0" : "=d" (shift) : "dm" (reg->mant.m32[1]));
7362306a36Sopenharmony_ci		reg->mant.m32[0] = (reg->mant.m32[1] << shift);
7462306a36Sopenharmony_ci		reg->mant.m32[1] = 0;
7562306a36Sopenharmony_ci		shift += 32;
7662306a36Sopenharmony_ci	}
7762306a36Sopenharmony_ci
7862306a36Sopenharmony_ci	return shift;
7962306a36Sopenharmony_ci}
8062306a36Sopenharmony_ci
8162306a36Sopenharmony_cistatic inline int fp_addmant(struct fp_ext *dest, struct fp_ext *src)
8262306a36Sopenharmony_ci{
8362306a36Sopenharmony_ci	int carry;
8462306a36Sopenharmony_ci
8562306a36Sopenharmony_ci	/* we assume here, gcc only insert move and a clr instr */
8662306a36Sopenharmony_ci	asm volatile ("add.b %1,%0" : "=d,g" (dest->lowmant)
8762306a36Sopenharmony_ci		: "g,d" (src->lowmant), "0,0" (dest->lowmant));
8862306a36Sopenharmony_ci	asm volatile ("addx.l %1,%0" : "=d" (dest->mant.m32[1])
8962306a36Sopenharmony_ci		: "d" (src->mant.m32[1]), "0" (dest->mant.m32[1]));
9062306a36Sopenharmony_ci	asm volatile ("addx.l %1,%0" : "=d" (dest->mant.m32[0])
9162306a36Sopenharmony_ci		: "d" (src->mant.m32[0]), "0" (dest->mant.m32[0]));
9262306a36Sopenharmony_ci	asm volatile ("addx.l %0,%0" : "=d" (carry) : "0" (0));
9362306a36Sopenharmony_ci
9462306a36Sopenharmony_ci	return carry;
9562306a36Sopenharmony_ci}
9662306a36Sopenharmony_ci
9762306a36Sopenharmony_cistatic inline int fp_addcarry(struct fp_ext *reg)
9862306a36Sopenharmony_ci{
9962306a36Sopenharmony_ci	if (++reg->exp == 0x7fff) {
10062306a36Sopenharmony_ci		if (reg->mant.m64)
10162306a36Sopenharmony_ci			fp_set_sr(FPSR_EXC_INEX2);
10262306a36Sopenharmony_ci		reg->mant.m64 = 0;
10362306a36Sopenharmony_ci		fp_set_sr(FPSR_EXC_OVFL);
10462306a36Sopenharmony_ci		return 0;
10562306a36Sopenharmony_ci	}
10662306a36Sopenharmony_ci	reg->lowmant = (reg->mant.m32[1] << 7) | (reg->lowmant ? 1 : 0);
10762306a36Sopenharmony_ci	reg->mant.m32[1] = (reg->mant.m32[1] >> 1) |
10862306a36Sopenharmony_ci			   (reg->mant.m32[0] << 31);
10962306a36Sopenharmony_ci	reg->mant.m32[0] = (reg->mant.m32[0] >> 1) | 0x80000000;
11062306a36Sopenharmony_ci
11162306a36Sopenharmony_ci	return 1;
11262306a36Sopenharmony_ci}
11362306a36Sopenharmony_ci
11462306a36Sopenharmony_cistatic inline void fp_submant(struct fp_ext *dest, struct fp_ext *src1,
11562306a36Sopenharmony_ci			      struct fp_ext *src2)
11662306a36Sopenharmony_ci{
11762306a36Sopenharmony_ci	/* we assume here, gcc only insert move and a clr instr */
11862306a36Sopenharmony_ci	asm volatile ("sub.b %1,%0" : "=d,g" (dest->lowmant)
11962306a36Sopenharmony_ci		: "g,d" (src2->lowmant), "0,0" (src1->lowmant));
12062306a36Sopenharmony_ci	asm volatile ("subx.l %1,%0" : "=d" (dest->mant.m32[1])
12162306a36Sopenharmony_ci		: "d" (src2->mant.m32[1]), "0" (src1->mant.m32[1]));
12262306a36Sopenharmony_ci	asm volatile ("subx.l %1,%0" : "=d" (dest->mant.m32[0])
12362306a36Sopenharmony_ci		: "d" (src2->mant.m32[0]), "0" (src1->mant.m32[0]));
12462306a36Sopenharmony_ci}
12562306a36Sopenharmony_ci
12662306a36Sopenharmony_ci#define fp_mul64(desth, destl, src1, src2) ({				\
12762306a36Sopenharmony_ci	asm ("mulu.l %2,%1:%0" : "=d" (destl), "=d" (desth)		\
12862306a36Sopenharmony_ci		: "dm" (src1), "0" (src2));				\
12962306a36Sopenharmony_ci})
13062306a36Sopenharmony_ci#define fp_div64(quot, rem, srch, srcl, div)				\
13162306a36Sopenharmony_ci	asm ("divu.l %2,%1:%0" : "=d" (quot), "=d" (rem)		\
13262306a36Sopenharmony_ci		: "dm" (div), "1" (srch), "0" (srcl))
13362306a36Sopenharmony_ci#define fp_add64(dest1, dest2, src1, src2) ({				\
13462306a36Sopenharmony_ci	asm ("add.l %1,%0" : "=d,dm" (dest2)				\
13562306a36Sopenharmony_ci		: "dm,d" (src2), "0,0" (dest2));			\
13662306a36Sopenharmony_ci	asm ("addx.l %1,%0" : "=d" (dest1)				\
13762306a36Sopenharmony_ci		: "d" (src1), "0" (dest1));				\
13862306a36Sopenharmony_ci})
13962306a36Sopenharmony_ci#define fp_addx96(dest, src) ({						\
14062306a36Sopenharmony_ci	/* we assume here, gcc only insert move and a clr instr */	\
14162306a36Sopenharmony_ci	asm volatile ("add.l %1,%0" : "=d,g" (dest->m32[2])		\
14262306a36Sopenharmony_ci		: "g,d" (temp.m32[1]), "0,0" (dest->m32[2]));		\
14362306a36Sopenharmony_ci	asm volatile ("addx.l %1,%0" : "=d" (dest->m32[1])		\
14462306a36Sopenharmony_ci		: "d" (temp.m32[0]), "0" (dest->m32[1]));		\
14562306a36Sopenharmony_ci	asm volatile ("addx.l %1,%0" : "=d" (dest->m32[0])		\
14662306a36Sopenharmony_ci		: "d" (0), "0" (dest->m32[0]));				\
14762306a36Sopenharmony_ci})
14862306a36Sopenharmony_ci#define fp_sub64(dest, src) ({						\
14962306a36Sopenharmony_ci	asm ("sub.l %1,%0" : "=d,dm" (dest.m32[1])			\
15062306a36Sopenharmony_ci		: "dm,d" (src.m32[1]), "0,0" (dest.m32[1]));		\
15162306a36Sopenharmony_ci	asm ("subx.l %1,%0" : "=d" (dest.m32[0])			\
15262306a36Sopenharmony_ci		: "d" (src.m32[0]), "0" (dest.m32[0]));			\
15362306a36Sopenharmony_ci})
15462306a36Sopenharmony_ci#define fp_sub96c(dest, srch, srcm, srcl) ({				\
15562306a36Sopenharmony_ci	char carry;							\
15662306a36Sopenharmony_ci	asm ("sub.l %1,%0" : "=d,dm" (dest.m32[2])			\
15762306a36Sopenharmony_ci		: "dm,d" (srcl), "0,0" (dest.m32[2]));			\
15862306a36Sopenharmony_ci	asm ("subx.l %1,%0" : "=d" (dest.m32[1])			\
15962306a36Sopenharmony_ci		: "d" (srcm), "0" (dest.m32[1]));			\
16062306a36Sopenharmony_ci	asm ("subx.l %2,%1; scs %0" : "=d" (carry), "=d" (dest.m32[0])	\
16162306a36Sopenharmony_ci		: "d" (srch), "1" (dest.m32[0]));			\
16262306a36Sopenharmony_ci	carry;								\
16362306a36Sopenharmony_ci})
16462306a36Sopenharmony_ci
16562306a36Sopenharmony_cistatic inline void fp_multiplymant(union fp_mant128 *dest, struct fp_ext *src1,
16662306a36Sopenharmony_ci				   struct fp_ext *src2)
16762306a36Sopenharmony_ci{
16862306a36Sopenharmony_ci	union fp_mant64 temp;
16962306a36Sopenharmony_ci
17062306a36Sopenharmony_ci	fp_mul64(dest->m32[0], dest->m32[1], src1->mant.m32[0], src2->mant.m32[0]);
17162306a36Sopenharmony_ci	fp_mul64(dest->m32[2], dest->m32[3], src1->mant.m32[1], src2->mant.m32[1]);
17262306a36Sopenharmony_ci
17362306a36Sopenharmony_ci	fp_mul64(temp.m32[0], temp.m32[1], src1->mant.m32[0], src2->mant.m32[1]);
17462306a36Sopenharmony_ci	fp_addx96(dest, temp);
17562306a36Sopenharmony_ci
17662306a36Sopenharmony_ci	fp_mul64(temp.m32[0], temp.m32[1], src1->mant.m32[1], src2->mant.m32[0]);
17762306a36Sopenharmony_ci	fp_addx96(dest, temp);
17862306a36Sopenharmony_ci}
17962306a36Sopenharmony_ci
18062306a36Sopenharmony_cistatic inline void fp_dividemant(union fp_mant128 *dest, struct fp_ext *src,
18162306a36Sopenharmony_ci				 struct fp_ext *div)
18262306a36Sopenharmony_ci{
18362306a36Sopenharmony_ci	union fp_mant128 tmp;
18462306a36Sopenharmony_ci	union fp_mant64 tmp64;
18562306a36Sopenharmony_ci	unsigned long *mantp = dest->m32;
18662306a36Sopenharmony_ci	unsigned long fix, rem, first, dummy;
18762306a36Sopenharmony_ci	int i;
18862306a36Sopenharmony_ci
18962306a36Sopenharmony_ci	/* the algorithm below requires dest to be smaller than div,
19062306a36Sopenharmony_ci	   but both have the high bit set */
19162306a36Sopenharmony_ci	if (src->mant.m64 >= div->mant.m64) {
19262306a36Sopenharmony_ci		fp_sub64(src->mant, div->mant);
19362306a36Sopenharmony_ci		*mantp = 1;
19462306a36Sopenharmony_ci	} else
19562306a36Sopenharmony_ci		*mantp = 0;
19662306a36Sopenharmony_ci	mantp++;
19762306a36Sopenharmony_ci
19862306a36Sopenharmony_ci	/* basic idea behind this algorithm: we can't divide two 64bit numbers
19962306a36Sopenharmony_ci	   (AB/CD) directly, but we can calculate AB/C0, but this means this
20062306a36Sopenharmony_ci	   quotient is off by C0/CD, so we have to multiply the first result
20162306a36Sopenharmony_ci	   to fix the result, after that we have nearly the correct result
20262306a36Sopenharmony_ci	   and only a few corrections are needed. */
20362306a36Sopenharmony_ci
20462306a36Sopenharmony_ci	/* C0/CD can be precalculated, but it's an 64bit division again, but
20562306a36Sopenharmony_ci	   we can make it a bit easier, by dividing first through C so we get
20662306a36Sopenharmony_ci	   10/1D and now only a single shift and the value fits into 32bit. */
20762306a36Sopenharmony_ci	fix = 0x80000000;
20862306a36Sopenharmony_ci	dummy = div->mant.m32[1] / div->mant.m32[0] + 1;
20962306a36Sopenharmony_ci	dummy = (dummy >> 1) | fix;
21062306a36Sopenharmony_ci	fp_div64(fix, dummy, fix, 0, dummy);
21162306a36Sopenharmony_ci	fix--;
21262306a36Sopenharmony_ci
21362306a36Sopenharmony_ci	for (i = 0; i < 3; i++, mantp++) {
21462306a36Sopenharmony_ci		if (src->mant.m32[0] == div->mant.m32[0]) {
21562306a36Sopenharmony_ci			fp_div64(first, rem, 0, src->mant.m32[1], div->mant.m32[0]);
21662306a36Sopenharmony_ci
21762306a36Sopenharmony_ci			fp_mul64(*mantp, dummy, first, fix);
21862306a36Sopenharmony_ci			*mantp += fix;
21962306a36Sopenharmony_ci		} else {
22062306a36Sopenharmony_ci			fp_div64(first, rem, src->mant.m32[0], src->mant.m32[1], div->mant.m32[0]);
22162306a36Sopenharmony_ci
22262306a36Sopenharmony_ci			fp_mul64(*mantp, dummy, first, fix);
22362306a36Sopenharmony_ci		}
22462306a36Sopenharmony_ci
22562306a36Sopenharmony_ci		fp_mul64(tmp.m32[0], tmp.m32[1], div->mant.m32[0], first - *mantp);
22662306a36Sopenharmony_ci		fp_add64(tmp.m32[0], tmp.m32[1], 0, rem);
22762306a36Sopenharmony_ci		tmp.m32[2] = 0;
22862306a36Sopenharmony_ci
22962306a36Sopenharmony_ci		fp_mul64(tmp64.m32[0], tmp64.m32[1], *mantp, div->mant.m32[1]);
23062306a36Sopenharmony_ci		fp_sub96c(tmp, 0, tmp64.m32[0], tmp64.m32[1]);
23162306a36Sopenharmony_ci
23262306a36Sopenharmony_ci		src->mant.m32[0] = tmp.m32[1];
23362306a36Sopenharmony_ci		src->mant.m32[1] = tmp.m32[2];
23462306a36Sopenharmony_ci
23562306a36Sopenharmony_ci		while (!fp_sub96c(tmp, 0, div->mant.m32[0], div->mant.m32[1])) {
23662306a36Sopenharmony_ci			src->mant.m32[0] = tmp.m32[1];
23762306a36Sopenharmony_ci			src->mant.m32[1] = tmp.m32[2];
23862306a36Sopenharmony_ci			*mantp += 1;
23962306a36Sopenharmony_ci		}
24062306a36Sopenharmony_ci	}
24162306a36Sopenharmony_ci}
24262306a36Sopenharmony_ci
24362306a36Sopenharmony_cistatic inline void fp_putmant128(struct fp_ext *dest, union fp_mant128 *src,
24462306a36Sopenharmony_ci				 int shift)
24562306a36Sopenharmony_ci{
24662306a36Sopenharmony_ci	unsigned long tmp;
24762306a36Sopenharmony_ci
24862306a36Sopenharmony_ci	switch (shift) {
24962306a36Sopenharmony_ci	case 0:
25062306a36Sopenharmony_ci		dest->mant.m64 = src->m64[0];
25162306a36Sopenharmony_ci		dest->lowmant = src->m32[2] >> 24;
25262306a36Sopenharmony_ci		if (src->m32[3] || (src->m32[2] << 8))
25362306a36Sopenharmony_ci			dest->lowmant |= 1;
25462306a36Sopenharmony_ci		break;
25562306a36Sopenharmony_ci	case 1:
25662306a36Sopenharmony_ci		asm volatile ("lsl.l #1,%0"
25762306a36Sopenharmony_ci			: "=d" (tmp) : "0" (src->m32[2]));
25862306a36Sopenharmony_ci		asm volatile ("roxl.l #1,%0"
25962306a36Sopenharmony_ci			: "=d" (dest->mant.m32[1]) : "0" (src->m32[1]));
26062306a36Sopenharmony_ci		asm volatile ("roxl.l #1,%0"
26162306a36Sopenharmony_ci			: "=d" (dest->mant.m32[0]) : "0" (src->m32[0]));
26262306a36Sopenharmony_ci		dest->lowmant = tmp >> 24;
26362306a36Sopenharmony_ci		if (src->m32[3] || (tmp << 8))
26462306a36Sopenharmony_ci			dest->lowmant |= 1;
26562306a36Sopenharmony_ci		break;
26662306a36Sopenharmony_ci	case 31:
26762306a36Sopenharmony_ci		asm volatile ("lsr.l #1,%1; roxr.l #1,%0"
26862306a36Sopenharmony_ci			: "=d" (dest->mant.m32[0])
26962306a36Sopenharmony_ci			: "d" (src->m32[0]), "0" (src->m32[1]));
27062306a36Sopenharmony_ci		asm volatile ("roxr.l #1,%0"
27162306a36Sopenharmony_ci			: "=d" (dest->mant.m32[1]) : "0" (src->m32[2]));
27262306a36Sopenharmony_ci		asm volatile ("roxr.l #1,%0"
27362306a36Sopenharmony_ci			: "=d" (tmp) : "0" (src->m32[3]));
27462306a36Sopenharmony_ci		dest->lowmant = tmp >> 24;
27562306a36Sopenharmony_ci		if (src->m32[3] << 7)
27662306a36Sopenharmony_ci			dest->lowmant |= 1;
27762306a36Sopenharmony_ci		break;
27862306a36Sopenharmony_ci	case 32:
27962306a36Sopenharmony_ci		dest->mant.m32[0] = src->m32[1];
28062306a36Sopenharmony_ci		dest->mant.m32[1] = src->m32[2];
28162306a36Sopenharmony_ci		dest->lowmant = src->m32[3] >> 24;
28262306a36Sopenharmony_ci		if (src->m32[3] << 8)
28362306a36Sopenharmony_ci			dest->lowmant |= 1;
28462306a36Sopenharmony_ci		break;
28562306a36Sopenharmony_ci	}
28662306a36Sopenharmony_ci}
28762306a36Sopenharmony_ci
28862306a36Sopenharmony_ci#endif	/* MULTI_ARITH_H */
289