162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * IEEE754 floating point arithmetic
462306a36Sopenharmony_ci * double precision: MADDF.f (Fused Multiply Add)
562306a36Sopenharmony_ci * MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft])
662306a36Sopenharmony_ci *
762306a36Sopenharmony_ci * MIPS floating point support
862306a36Sopenharmony_ci * Copyright (C) 2015 Imagination Technologies, Ltd.
962306a36Sopenharmony_ci * Author: Markos Chandras <markos.chandras@imgtec.com>
1062306a36Sopenharmony_ci */
1162306a36Sopenharmony_ci
1262306a36Sopenharmony_ci#include "ieee754dp.h"
1362306a36Sopenharmony_ci
1462306a36Sopenharmony_ci
1562306a36Sopenharmony_ci/* 128 bits shift right logical with rounding. */
1662306a36Sopenharmony_cistatic void srl128(u64 *hptr, u64 *lptr, int count)
1762306a36Sopenharmony_ci{
1862306a36Sopenharmony_ci	u64 low;
1962306a36Sopenharmony_ci
2062306a36Sopenharmony_ci	if (count >= 128) {
2162306a36Sopenharmony_ci		*lptr = *hptr != 0 || *lptr != 0;
2262306a36Sopenharmony_ci		*hptr = 0;
2362306a36Sopenharmony_ci	} else if (count >= 64) {
2462306a36Sopenharmony_ci		if (count == 64) {
2562306a36Sopenharmony_ci			*lptr = *hptr | (*lptr != 0);
2662306a36Sopenharmony_ci		} else {
2762306a36Sopenharmony_ci			low = *lptr;
2862306a36Sopenharmony_ci			*lptr = *hptr >> (count - 64);
2962306a36Sopenharmony_ci			*lptr |= (*hptr << (128 - count)) != 0 || low != 0;
3062306a36Sopenharmony_ci		}
3162306a36Sopenharmony_ci		*hptr = 0;
3262306a36Sopenharmony_ci	} else {
3362306a36Sopenharmony_ci		low = *lptr;
3462306a36Sopenharmony_ci		*lptr = low >> count | *hptr << (64 - count);
3562306a36Sopenharmony_ci		*lptr |= (low << (64 - count)) != 0;
3662306a36Sopenharmony_ci		*hptr = *hptr >> count;
3762306a36Sopenharmony_ci	}
3862306a36Sopenharmony_ci}
3962306a36Sopenharmony_ci
4062306a36Sopenharmony_cistatic union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
4162306a36Sopenharmony_ci				 union ieee754dp y, enum maddf_flags flags)
4262306a36Sopenharmony_ci{
4362306a36Sopenharmony_ci	int re;
4462306a36Sopenharmony_ci	int rs;
4562306a36Sopenharmony_ci	unsigned int lxm;
4662306a36Sopenharmony_ci	unsigned int hxm;
4762306a36Sopenharmony_ci	unsigned int lym;
4862306a36Sopenharmony_ci	unsigned int hym;
4962306a36Sopenharmony_ci	u64 lrm;
5062306a36Sopenharmony_ci	u64 hrm;
5162306a36Sopenharmony_ci	u64 lzm;
5262306a36Sopenharmony_ci	u64 hzm;
5362306a36Sopenharmony_ci	u64 t;
5462306a36Sopenharmony_ci	u64 at;
5562306a36Sopenharmony_ci	int s;
5662306a36Sopenharmony_ci
5762306a36Sopenharmony_ci	COMPXDP;
5862306a36Sopenharmony_ci	COMPYDP;
5962306a36Sopenharmony_ci	COMPZDP;
6062306a36Sopenharmony_ci
6162306a36Sopenharmony_ci	EXPLODEXDP;
6262306a36Sopenharmony_ci	EXPLODEYDP;
6362306a36Sopenharmony_ci	EXPLODEZDP;
6462306a36Sopenharmony_ci
6562306a36Sopenharmony_ci	FLUSHXDP;
6662306a36Sopenharmony_ci	FLUSHYDP;
6762306a36Sopenharmony_ci	FLUSHZDP;
6862306a36Sopenharmony_ci
6962306a36Sopenharmony_ci	ieee754_clearcx();
7062306a36Sopenharmony_ci
7162306a36Sopenharmony_ci	rs = xs ^ ys;
7262306a36Sopenharmony_ci	if (flags & MADDF_NEGATE_PRODUCT)
7362306a36Sopenharmony_ci		rs ^= 1;
7462306a36Sopenharmony_ci	if (flags & MADDF_NEGATE_ADDITION)
7562306a36Sopenharmony_ci		zs ^= 1;
7662306a36Sopenharmony_ci
7762306a36Sopenharmony_ci	/*
7862306a36Sopenharmony_ci	 * Handle the cases when at least one of x, y or z is a NaN.
7962306a36Sopenharmony_ci	 * Order of precedence is sNaN, qNaN and z, x, y.
8062306a36Sopenharmony_ci	 */
8162306a36Sopenharmony_ci	if (zc == IEEE754_CLASS_SNAN)
8262306a36Sopenharmony_ci		return ieee754dp_nanxcpt(z);
8362306a36Sopenharmony_ci	if (xc == IEEE754_CLASS_SNAN)
8462306a36Sopenharmony_ci		return ieee754dp_nanxcpt(x);
8562306a36Sopenharmony_ci	if (yc == IEEE754_CLASS_SNAN)
8662306a36Sopenharmony_ci		return ieee754dp_nanxcpt(y);
8762306a36Sopenharmony_ci	if (zc == IEEE754_CLASS_QNAN)
8862306a36Sopenharmony_ci		return z;
8962306a36Sopenharmony_ci	if (xc == IEEE754_CLASS_QNAN)
9062306a36Sopenharmony_ci		return x;
9162306a36Sopenharmony_ci	if (yc == IEEE754_CLASS_QNAN)
9262306a36Sopenharmony_ci		return y;
9362306a36Sopenharmony_ci
9462306a36Sopenharmony_ci	if (zc == IEEE754_CLASS_DNORM)
9562306a36Sopenharmony_ci		DPDNORMZ;
9662306a36Sopenharmony_ci	/* ZERO z cases are handled separately below */
9762306a36Sopenharmony_ci
9862306a36Sopenharmony_ci	switch (CLPAIR(xc, yc)) {
9962306a36Sopenharmony_ci
10062306a36Sopenharmony_ci	/*
10162306a36Sopenharmony_ci	 * Infinity handling
10262306a36Sopenharmony_ci	 */
10362306a36Sopenharmony_ci	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
10462306a36Sopenharmony_ci	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
10562306a36Sopenharmony_ci		ieee754_setcx(IEEE754_INVALID_OPERATION);
10662306a36Sopenharmony_ci		return ieee754dp_indef();
10762306a36Sopenharmony_ci
10862306a36Sopenharmony_ci	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
10962306a36Sopenharmony_ci	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
11062306a36Sopenharmony_ci	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
11162306a36Sopenharmony_ci	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
11262306a36Sopenharmony_ci	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
11362306a36Sopenharmony_ci		if ((zc == IEEE754_CLASS_INF) && (zs != rs)) {
11462306a36Sopenharmony_ci			/*
11562306a36Sopenharmony_ci			 * Cases of addition of infinities with opposite signs
11662306a36Sopenharmony_ci			 * or subtraction of infinities with same signs.
11762306a36Sopenharmony_ci			 */
11862306a36Sopenharmony_ci			ieee754_setcx(IEEE754_INVALID_OPERATION);
11962306a36Sopenharmony_ci			return ieee754dp_indef();
12062306a36Sopenharmony_ci		}
12162306a36Sopenharmony_ci		/*
12262306a36Sopenharmony_ci		 * z is here either not an infinity, or an infinity having the
12362306a36Sopenharmony_ci		 * same sign as product (x*y). The result must be an infinity,
12462306a36Sopenharmony_ci		 * and its sign is determined only by the sign of product (x*y).
12562306a36Sopenharmony_ci		 */
12662306a36Sopenharmony_ci		return ieee754dp_inf(rs);
12762306a36Sopenharmony_ci
12862306a36Sopenharmony_ci	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
12962306a36Sopenharmony_ci	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
13062306a36Sopenharmony_ci	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM):
13162306a36Sopenharmony_ci	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
13262306a36Sopenharmony_ci	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
13362306a36Sopenharmony_ci		if (zc == IEEE754_CLASS_INF)
13462306a36Sopenharmony_ci			return ieee754dp_inf(zs);
13562306a36Sopenharmony_ci		if (zc == IEEE754_CLASS_ZERO) {
13662306a36Sopenharmony_ci			/* Handle cases +0 + (-0) and similar ones. */
13762306a36Sopenharmony_ci			if (zs == rs)
13862306a36Sopenharmony_ci				/*
13962306a36Sopenharmony_ci				 * Cases of addition of zeros of equal signs
14062306a36Sopenharmony_ci				 * or subtraction of zeroes of opposite signs.
14162306a36Sopenharmony_ci				 * The sign of the resulting zero is in any
14262306a36Sopenharmony_ci				 * such case determined only by the sign of z.
14362306a36Sopenharmony_ci				 */
14462306a36Sopenharmony_ci				return z;
14562306a36Sopenharmony_ci
14662306a36Sopenharmony_ci			return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD);
14762306a36Sopenharmony_ci		}
14862306a36Sopenharmony_ci		/* x*y is here 0, and z is not 0, so just return z */
14962306a36Sopenharmony_ci		return z;
15062306a36Sopenharmony_ci
15162306a36Sopenharmony_ci	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
15262306a36Sopenharmony_ci		DPDNORMX;
15362306a36Sopenharmony_ci		fallthrough;
15462306a36Sopenharmony_ci	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
15562306a36Sopenharmony_ci		if (zc == IEEE754_CLASS_INF)
15662306a36Sopenharmony_ci			return ieee754dp_inf(zs);
15762306a36Sopenharmony_ci		DPDNORMY;
15862306a36Sopenharmony_ci		break;
15962306a36Sopenharmony_ci
16062306a36Sopenharmony_ci	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):
16162306a36Sopenharmony_ci		if (zc == IEEE754_CLASS_INF)
16262306a36Sopenharmony_ci			return ieee754dp_inf(zs);
16362306a36Sopenharmony_ci		DPDNORMX;
16462306a36Sopenharmony_ci		break;
16562306a36Sopenharmony_ci
16662306a36Sopenharmony_ci	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
16762306a36Sopenharmony_ci		if (zc == IEEE754_CLASS_INF)
16862306a36Sopenharmony_ci			return ieee754dp_inf(zs);
16962306a36Sopenharmony_ci		/* continue to real computations */
17062306a36Sopenharmony_ci	}
17162306a36Sopenharmony_ci
17262306a36Sopenharmony_ci	/* Finally get to do some computation */
17362306a36Sopenharmony_ci
17462306a36Sopenharmony_ci	/*
17562306a36Sopenharmony_ci	 * Do the multiplication bit first
17662306a36Sopenharmony_ci	 *
17762306a36Sopenharmony_ci	 * rm = xm * ym, re = xe + ye basically
17862306a36Sopenharmony_ci	 *
17962306a36Sopenharmony_ci	 * At this point xm and ym should have been normalized.
18062306a36Sopenharmony_ci	 */
18162306a36Sopenharmony_ci	assert(xm & DP_HIDDEN_BIT);
18262306a36Sopenharmony_ci	assert(ym & DP_HIDDEN_BIT);
18362306a36Sopenharmony_ci
18462306a36Sopenharmony_ci	re = xe + ye;
18562306a36Sopenharmony_ci
18662306a36Sopenharmony_ci	/* shunt to top of word */
18762306a36Sopenharmony_ci	xm <<= 64 - (DP_FBITS + 1);
18862306a36Sopenharmony_ci	ym <<= 64 - (DP_FBITS + 1);
18962306a36Sopenharmony_ci
19062306a36Sopenharmony_ci	/*
19162306a36Sopenharmony_ci	 * Multiply 64 bits xm and ym to give 128 bits result in hrm:lrm.
19262306a36Sopenharmony_ci	 */
19362306a36Sopenharmony_ci
19462306a36Sopenharmony_ci	lxm = xm;
19562306a36Sopenharmony_ci	hxm = xm >> 32;
19662306a36Sopenharmony_ci	lym = ym;
19762306a36Sopenharmony_ci	hym = ym >> 32;
19862306a36Sopenharmony_ci
19962306a36Sopenharmony_ci	lrm = DPXMULT(lxm, lym);
20062306a36Sopenharmony_ci	hrm = DPXMULT(hxm, hym);
20162306a36Sopenharmony_ci
20262306a36Sopenharmony_ci	t = DPXMULT(lxm, hym);
20362306a36Sopenharmony_ci
20462306a36Sopenharmony_ci	at = lrm + (t << 32);
20562306a36Sopenharmony_ci	hrm += at < lrm;
20662306a36Sopenharmony_ci	lrm = at;
20762306a36Sopenharmony_ci
20862306a36Sopenharmony_ci	hrm = hrm + (t >> 32);
20962306a36Sopenharmony_ci
21062306a36Sopenharmony_ci	t = DPXMULT(hxm, lym);
21162306a36Sopenharmony_ci
21262306a36Sopenharmony_ci	at = lrm + (t << 32);
21362306a36Sopenharmony_ci	hrm += at < lrm;
21462306a36Sopenharmony_ci	lrm = at;
21562306a36Sopenharmony_ci
21662306a36Sopenharmony_ci	hrm = hrm + (t >> 32);
21762306a36Sopenharmony_ci
21862306a36Sopenharmony_ci	/* Put explicit bit at bit 126 if necessary */
21962306a36Sopenharmony_ci	if ((int64_t)hrm < 0) {
22062306a36Sopenharmony_ci		lrm = (hrm << 63) | (lrm >> 1);
22162306a36Sopenharmony_ci		hrm = hrm >> 1;
22262306a36Sopenharmony_ci		re++;
22362306a36Sopenharmony_ci	}
22462306a36Sopenharmony_ci
22562306a36Sopenharmony_ci	assert(hrm & (1 << 62));
22662306a36Sopenharmony_ci
22762306a36Sopenharmony_ci	if (zc == IEEE754_CLASS_ZERO) {
22862306a36Sopenharmony_ci		/*
22962306a36Sopenharmony_ci		 * Move explicit bit from bit 126 to bit 55 since the
23062306a36Sopenharmony_ci		 * ieee754dp_format code expects the mantissa to be
23162306a36Sopenharmony_ci		 * 56 bits wide (53 + 3 rounding bits).
23262306a36Sopenharmony_ci		 */
23362306a36Sopenharmony_ci		srl128(&hrm, &lrm, (126 - 55));
23462306a36Sopenharmony_ci		return ieee754dp_format(rs, re, lrm);
23562306a36Sopenharmony_ci	}
23662306a36Sopenharmony_ci
23762306a36Sopenharmony_ci	/* Move explicit bit from bit 52 to bit 126 */
23862306a36Sopenharmony_ci	lzm = 0;
23962306a36Sopenharmony_ci	hzm = zm << 10;
24062306a36Sopenharmony_ci	assert(hzm & (1 << 62));
24162306a36Sopenharmony_ci
24262306a36Sopenharmony_ci	/* Make the exponents the same */
24362306a36Sopenharmony_ci	if (ze > re) {
24462306a36Sopenharmony_ci		/*
24562306a36Sopenharmony_ci		 * Have to shift y fraction right to align.
24662306a36Sopenharmony_ci		 */
24762306a36Sopenharmony_ci		s = ze - re;
24862306a36Sopenharmony_ci		srl128(&hrm, &lrm, s);
24962306a36Sopenharmony_ci		re += s;
25062306a36Sopenharmony_ci	} else if (re > ze) {
25162306a36Sopenharmony_ci		/*
25262306a36Sopenharmony_ci		 * Have to shift x fraction right to align.
25362306a36Sopenharmony_ci		 */
25462306a36Sopenharmony_ci		s = re - ze;
25562306a36Sopenharmony_ci		srl128(&hzm, &lzm, s);
25662306a36Sopenharmony_ci		ze += s;
25762306a36Sopenharmony_ci	}
25862306a36Sopenharmony_ci	assert(ze == re);
25962306a36Sopenharmony_ci	assert(ze <= DP_EMAX);
26062306a36Sopenharmony_ci
26162306a36Sopenharmony_ci	/* Do the addition */
26262306a36Sopenharmony_ci	if (zs == rs) {
26362306a36Sopenharmony_ci		/*
26462306a36Sopenharmony_ci		 * Generate 128 bit result by adding two 127 bit numbers
26562306a36Sopenharmony_ci		 * leaving result in hzm:lzm, zs and ze.
26662306a36Sopenharmony_ci		 */
26762306a36Sopenharmony_ci		hzm = hzm + hrm + (lzm > (lzm + lrm));
26862306a36Sopenharmony_ci		lzm = lzm + lrm;
26962306a36Sopenharmony_ci		if ((int64_t)hzm < 0) {        /* carry out */
27062306a36Sopenharmony_ci			srl128(&hzm, &lzm, 1);
27162306a36Sopenharmony_ci			ze++;
27262306a36Sopenharmony_ci		}
27362306a36Sopenharmony_ci	} else {
27462306a36Sopenharmony_ci		if (hzm > hrm || (hzm == hrm && lzm >= lrm)) {
27562306a36Sopenharmony_ci			hzm = hzm - hrm - (lzm < lrm);
27662306a36Sopenharmony_ci			lzm = lzm - lrm;
27762306a36Sopenharmony_ci		} else {
27862306a36Sopenharmony_ci			hzm = hrm - hzm - (lrm < lzm);
27962306a36Sopenharmony_ci			lzm = lrm - lzm;
28062306a36Sopenharmony_ci			zs = rs;
28162306a36Sopenharmony_ci		}
28262306a36Sopenharmony_ci		if (lzm == 0 && hzm == 0)
28362306a36Sopenharmony_ci			return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD);
28462306a36Sopenharmony_ci
28562306a36Sopenharmony_ci		/*
28662306a36Sopenharmony_ci		 * Put explicit bit at bit 126 if necessary.
28762306a36Sopenharmony_ci		 */
28862306a36Sopenharmony_ci		if (hzm == 0) {
28962306a36Sopenharmony_ci			/* left shift by 63 or 64 bits */
29062306a36Sopenharmony_ci			if ((int64_t)lzm < 0) {
29162306a36Sopenharmony_ci				/* MSB of lzm is the explicit bit */
29262306a36Sopenharmony_ci				hzm = lzm >> 1;
29362306a36Sopenharmony_ci				lzm = lzm << 63;
29462306a36Sopenharmony_ci				ze -= 63;
29562306a36Sopenharmony_ci			} else {
29662306a36Sopenharmony_ci				hzm = lzm;
29762306a36Sopenharmony_ci				lzm = 0;
29862306a36Sopenharmony_ci				ze -= 64;
29962306a36Sopenharmony_ci			}
30062306a36Sopenharmony_ci		}
30162306a36Sopenharmony_ci
30262306a36Sopenharmony_ci		t = 0;
30362306a36Sopenharmony_ci		while ((hzm >> (62 - t)) == 0)
30462306a36Sopenharmony_ci			t++;
30562306a36Sopenharmony_ci
30662306a36Sopenharmony_ci		assert(t <= 62);
30762306a36Sopenharmony_ci		if (t) {
30862306a36Sopenharmony_ci			hzm = hzm << t | lzm >> (64 - t);
30962306a36Sopenharmony_ci			lzm = lzm << t;
31062306a36Sopenharmony_ci			ze -= t;
31162306a36Sopenharmony_ci		}
31262306a36Sopenharmony_ci	}
31362306a36Sopenharmony_ci
31462306a36Sopenharmony_ci	/*
31562306a36Sopenharmony_ci	 * Move explicit bit from bit 126 to bit 55 since the
31662306a36Sopenharmony_ci	 * ieee754dp_format code expects the mantissa to be
31762306a36Sopenharmony_ci	 * 56 bits wide (53 + 3 rounding bits).
31862306a36Sopenharmony_ci	 */
31962306a36Sopenharmony_ci	srl128(&hzm, &lzm, (126 - 55));
32062306a36Sopenharmony_ci
32162306a36Sopenharmony_ci	return ieee754dp_format(zs, ze, lzm);
32262306a36Sopenharmony_ci}
32362306a36Sopenharmony_ci
32462306a36Sopenharmony_ciunion ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x,
32562306a36Sopenharmony_ci				union ieee754dp y)
32662306a36Sopenharmony_ci{
32762306a36Sopenharmony_ci	return _dp_maddf(z, x, y, 0);
32862306a36Sopenharmony_ci}
32962306a36Sopenharmony_ci
33062306a36Sopenharmony_ciunion ieee754dp ieee754dp_msubf(union ieee754dp z, union ieee754dp x,
33162306a36Sopenharmony_ci				union ieee754dp y)
33262306a36Sopenharmony_ci{
33362306a36Sopenharmony_ci	return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);
33462306a36Sopenharmony_ci}
33562306a36Sopenharmony_ci
33662306a36Sopenharmony_ciunion ieee754dp ieee754dp_madd(union ieee754dp z, union ieee754dp x,
33762306a36Sopenharmony_ci				union ieee754dp y)
33862306a36Sopenharmony_ci{
33962306a36Sopenharmony_ci	return _dp_maddf(z, x, y, 0);
34062306a36Sopenharmony_ci}
34162306a36Sopenharmony_ci
34262306a36Sopenharmony_ciunion ieee754dp ieee754dp_msub(union ieee754dp z, union ieee754dp x,
34362306a36Sopenharmony_ci				union ieee754dp y)
34462306a36Sopenharmony_ci{
34562306a36Sopenharmony_ci	return _dp_maddf(z, x, y, MADDF_NEGATE_ADDITION);
34662306a36Sopenharmony_ci}
34762306a36Sopenharmony_ci
34862306a36Sopenharmony_ciunion ieee754dp ieee754dp_nmadd(union ieee754dp z, union ieee754dp x,
34962306a36Sopenharmony_ci				union ieee754dp y)
35062306a36Sopenharmony_ci{
35162306a36Sopenharmony_ci	return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT|MADDF_NEGATE_ADDITION);
35262306a36Sopenharmony_ci}
35362306a36Sopenharmony_ci
35462306a36Sopenharmony_ciunion ieee754dp ieee754dp_nmsub(union ieee754dp z, union ieee754dp x,
35562306a36Sopenharmony_ci				union ieee754dp y)
35662306a36Sopenharmony_ci{
35762306a36Sopenharmony_ci	return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);
35862306a36Sopenharmony_ci}
359