162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * IEEE754 floating point arithmetic 462306a36Sopenharmony_ci * double precision: MADDF.f (Fused Multiply Add) 562306a36Sopenharmony_ci * MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft]) 662306a36Sopenharmony_ci * 762306a36Sopenharmony_ci * MIPS floating point support 862306a36Sopenharmony_ci * Copyright (C) 2015 Imagination Technologies, Ltd. 962306a36Sopenharmony_ci * Author: Markos Chandras <markos.chandras@imgtec.com> 1062306a36Sopenharmony_ci */ 1162306a36Sopenharmony_ci 1262306a36Sopenharmony_ci#include "ieee754dp.h" 1362306a36Sopenharmony_ci 1462306a36Sopenharmony_ci 1562306a36Sopenharmony_ci/* 128 bits shift right logical with rounding. */ 1662306a36Sopenharmony_cistatic void srl128(u64 *hptr, u64 *lptr, int count) 1762306a36Sopenharmony_ci{ 1862306a36Sopenharmony_ci u64 low; 1962306a36Sopenharmony_ci 2062306a36Sopenharmony_ci if (count >= 128) { 2162306a36Sopenharmony_ci *lptr = *hptr != 0 || *lptr != 0; 2262306a36Sopenharmony_ci *hptr = 0; 2362306a36Sopenharmony_ci } else if (count >= 64) { 2462306a36Sopenharmony_ci if (count == 64) { 2562306a36Sopenharmony_ci *lptr = *hptr | (*lptr != 0); 2662306a36Sopenharmony_ci } else { 2762306a36Sopenharmony_ci low = *lptr; 2862306a36Sopenharmony_ci *lptr = *hptr >> (count - 64); 2962306a36Sopenharmony_ci *lptr |= (*hptr << (128 - count)) != 0 || low != 0; 3062306a36Sopenharmony_ci } 3162306a36Sopenharmony_ci *hptr = 0; 3262306a36Sopenharmony_ci } else { 3362306a36Sopenharmony_ci low = *lptr; 3462306a36Sopenharmony_ci *lptr = low >> count | *hptr << (64 - count); 3562306a36Sopenharmony_ci *lptr |= (low << (64 - count)) != 0; 3662306a36Sopenharmony_ci *hptr = *hptr >> count; 3762306a36Sopenharmony_ci } 3862306a36Sopenharmony_ci} 3962306a36Sopenharmony_ci 4062306a36Sopenharmony_cistatic union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x, 4162306a36Sopenharmony_ci union ieee754dp y, enum maddf_flags flags) 4262306a36Sopenharmony_ci{ 4362306a36Sopenharmony_ci int re; 4462306a36Sopenharmony_ci int rs; 4562306a36Sopenharmony_ci unsigned int lxm; 4662306a36Sopenharmony_ci unsigned int hxm; 4762306a36Sopenharmony_ci unsigned int lym; 4862306a36Sopenharmony_ci unsigned int hym; 4962306a36Sopenharmony_ci u64 lrm; 5062306a36Sopenharmony_ci u64 hrm; 5162306a36Sopenharmony_ci u64 lzm; 5262306a36Sopenharmony_ci u64 hzm; 5362306a36Sopenharmony_ci u64 t; 5462306a36Sopenharmony_ci u64 at; 5562306a36Sopenharmony_ci int s; 5662306a36Sopenharmony_ci 5762306a36Sopenharmony_ci COMPXDP; 5862306a36Sopenharmony_ci COMPYDP; 5962306a36Sopenharmony_ci COMPZDP; 6062306a36Sopenharmony_ci 6162306a36Sopenharmony_ci EXPLODEXDP; 6262306a36Sopenharmony_ci EXPLODEYDP; 6362306a36Sopenharmony_ci EXPLODEZDP; 6462306a36Sopenharmony_ci 6562306a36Sopenharmony_ci FLUSHXDP; 6662306a36Sopenharmony_ci FLUSHYDP; 6762306a36Sopenharmony_ci FLUSHZDP; 6862306a36Sopenharmony_ci 6962306a36Sopenharmony_ci ieee754_clearcx(); 7062306a36Sopenharmony_ci 7162306a36Sopenharmony_ci rs = xs ^ ys; 7262306a36Sopenharmony_ci if (flags & MADDF_NEGATE_PRODUCT) 7362306a36Sopenharmony_ci rs ^= 1; 7462306a36Sopenharmony_ci if (flags & MADDF_NEGATE_ADDITION) 7562306a36Sopenharmony_ci zs ^= 1; 7662306a36Sopenharmony_ci 7762306a36Sopenharmony_ci /* 7862306a36Sopenharmony_ci * Handle the cases when at least one of x, y or z is a NaN. 7962306a36Sopenharmony_ci * Order of precedence is sNaN, qNaN and z, x, y. 8062306a36Sopenharmony_ci */ 8162306a36Sopenharmony_ci if (zc == IEEE754_CLASS_SNAN) 8262306a36Sopenharmony_ci return ieee754dp_nanxcpt(z); 8362306a36Sopenharmony_ci if (xc == IEEE754_CLASS_SNAN) 8462306a36Sopenharmony_ci return ieee754dp_nanxcpt(x); 8562306a36Sopenharmony_ci if (yc == IEEE754_CLASS_SNAN) 8662306a36Sopenharmony_ci return ieee754dp_nanxcpt(y); 8762306a36Sopenharmony_ci if (zc == IEEE754_CLASS_QNAN) 8862306a36Sopenharmony_ci return z; 8962306a36Sopenharmony_ci if (xc == IEEE754_CLASS_QNAN) 9062306a36Sopenharmony_ci return x; 9162306a36Sopenharmony_ci if (yc == IEEE754_CLASS_QNAN) 9262306a36Sopenharmony_ci return y; 9362306a36Sopenharmony_ci 9462306a36Sopenharmony_ci if (zc == IEEE754_CLASS_DNORM) 9562306a36Sopenharmony_ci DPDNORMZ; 9662306a36Sopenharmony_ci /* ZERO z cases are handled separately below */ 9762306a36Sopenharmony_ci 9862306a36Sopenharmony_ci switch (CLPAIR(xc, yc)) { 9962306a36Sopenharmony_ci 10062306a36Sopenharmony_ci /* 10162306a36Sopenharmony_ci * Infinity handling 10262306a36Sopenharmony_ci */ 10362306a36Sopenharmony_ci case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): 10462306a36Sopenharmony_ci case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): 10562306a36Sopenharmony_ci ieee754_setcx(IEEE754_INVALID_OPERATION); 10662306a36Sopenharmony_ci return ieee754dp_indef(); 10762306a36Sopenharmony_ci 10862306a36Sopenharmony_ci case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): 10962306a36Sopenharmony_ci case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): 11062306a36Sopenharmony_ci case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): 11162306a36Sopenharmony_ci case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): 11262306a36Sopenharmony_ci case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): 11362306a36Sopenharmony_ci if ((zc == IEEE754_CLASS_INF) && (zs != rs)) { 11462306a36Sopenharmony_ci /* 11562306a36Sopenharmony_ci * Cases of addition of infinities with opposite signs 11662306a36Sopenharmony_ci * or subtraction of infinities with same signs. 11762306a36Sopenharmony_ci */ 11862306a36Sopenharmony_ci ieee754_setcx(IEEE754_INVALID_OPERATION); 11962306a36Sopenharmony_ci return ieee754dp_indef(); 12062306a36Sopenharmony_ci } 12162306a36Sopenharmony_ci /* 12262306a36Sopenharmony_ci * z is here either not an infinity, or an infinity having the 12362306a36Sopenharmony_ci * same sign as product (x*y). The result must be an infinity, 12462306a36Sopenharmony_ci * and its sign is determined only by the sign of product (x*y). 12562306a36Sopenharmony_ci */ 12662306a36Sopenharmony_ci return ieee754dp_inf(rs); 12762306a36Sopenharmony_ci 12862306a36Sopenharmony_ci case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): 12962306a36Sopenharmony_ci case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): 13062306a36Sopenharmony_ci case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): 13162306a36Sopenharmony_ci case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): 13262306a36Sopenharmony_ci case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): 13362306a36Sopenharmony_ci if (zc == IEEE754_CLASS_INF) 13462306a36Sopenharmony_ci return ieee754dp_inf(zs); 13562306a36Sopenharmony_ci if (zc == IEEE754_CLASS_ZERO) { 13662306a36Sopenharmony_ci /* Handle cases +0 + (-0) and similar ones. */ 13762306a36Sopenharmony_ci if (zs == rs) 13862306a36Sopenharmony_ci /* 13962306a36Sopenharmony_ci * Cases of addition of zeros of equal signs 14062306a36Sopenharmony_ci * or subtraction of zeroes of opposite signs. 14162306a36Sopenharmony_ci * The sign of the resulting zero is in any 14262306a36Sopenharmony_ci * such case determined only by the sign of z. 14362306a36Sopenharmony_ci */ 14462306a36Sopenharmony_ci return z; 14562306a36Sopenharmony_ci 14662306a36Sopenharmony_ci return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD); 14762306a36Sopenharmony_ci } 14862306a36Sopenharmony_ci /* x*y is here 0, and z is not 0, so just return z */ 14962306a36Sopenharmony_ci return z; 15062306a36Sopenharmony_ci 15162306a36Sopenharmony_ci case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): 15262306a36Sopenharmony_ci DPDNORMX; 15362306a36Sopenharmony_ci fallthrough; 15462306a36Sopenharmony_ci case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): 15562306a36Sopenharmony_ci if (zc == IEEE754_CLASS_INF) 15662306a36Sopenharmony_ci return ieee754dp_inf(zs); 15762306a36Sopenharmony_ci DPDNORMY; 15862306a36Sopenharmony_ci break; 15962306a36Sopenharmony_ci 16062306a36Sopenharmony_ci case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): 16162306a36Sopenharmony_ci if (zc == IEEE754_CLASS_INF) 16262306a36Sopenharmony_ci return ieee754dp_inf(zs); 16362306a36Sopenharmony_ci DPDNORMX; 16462306a36Sopenharmony_ci break; 16562306a36Sopenharmony_ci 16662306a36Sopenharmony_ci case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM): 16762306a36Sopenharmony_ci if (zc == IEEE754_CLASS_INF) 16862306a36Sopenharmony_ci return ieee754dp_inf(zs); 16962306a36Sopenharmony_ci /* continue to real computations */ 17062306a36Sopenharmony_ci } 17162306a36Sopenharmony_ci 17262306a36Sopenharmony_ci /* Finally get to do some computation */ 17362306a36Sopenharmony_ci 17462306a36Sopenharmony_ci /* 17562306a36Sopenharmony_ci * Do the multiplication bit first 17662306a36Sopenharmony_ci * 17762306a36Sopenharmony_ci * rm = xm * ym, re = xe + ye basically 17862306a36Sopenharmony_ci * 17962306a36Sopenharmony_ci * At this point xm and ym should have been normalized. 18062306a36Sopenharmony_ci */ 18162306a36Sopenharmony_ci assert(xm & DP_HIDDEN_BIT); 18262306a36Sopenharmony_ci assert(ym & DP_HIDDEN_BIT); 18362306a36Sopenharmony_ci 18462306a36Sopenharmony_ci re = xe + ye; 18562306a36Sopenharmony_ci 18662306a36Sopenharmony_ci /* shunt to top of word */ 18762306a36Sopenharmony_ci xm <<= 64 - (DP_FBITS + 1); 18862306a36Sopenharmony_ci ym <<= 64 - (DP_FBITS + 1); 18962306a36Sopenharmony_ci 19062306a36Sopenharmony_ci /* 19162306a36Sopenharmony_ci * Multiply 64 bits xm and ym to give 128 bits result in hrm:lrm. 19262306a36Sopenharmony_ci */ 19362306a36Sopenharmony_ci 19462306a36Sopenharmony_ci lxm = xm; 19562306a36Sopenharmony_ci hxm = xm >> 32; 19662306a36Sopenharmony_ci lym = ym; 19762306a36Sopenharmony_ci hym = ym >> 32; 19862306a36Sopenharmony_ci 19962306a36Sopenharmony_ci lrm = DPXMULT(lxm, lym); 20062306a36Sopenharmony_ci hrm = DPXMULT(hxm, hym); 20162306a36Sopenharmony_ci 20262306a36Sopenharmony_ci t = DPXMULT(lxm, hym); 20362306a36Sopenharmony_ci 20462306a36Sopenharmony_ci at = lrm + (t << 32); 20562306a36Sopenharmony_ci hrm += at < lrm; 20662306a36Sopenharmony_ci lrm = at; 20762306a36Sopenharmony_ci 20862306a36Sopenharmony_ci hrm = hrm + (t >> 32); 20962306a36Sopenharmony_ci 21062306a36Sopenharmony_ci t = DPXMULT(hxm, lym); 21162306a36Sopenharmony_ci 21262306a36Sopenharmony_ci at = lrm + (t << 32); 21362306a36Sopenharmony_ci hrm += at < lrm; 21462306a36Sopenharmony_ci lrm = at; 21562306a36Sopenharmony_ci 21662306a36Sopenharmony_ci hrm = hrm + (t >> 32); 21762306a36Sopenharmony_ci 21862306a36Sopenharmony_ci /* Put explicit bit at bit 126 if necessary */ 21962306a36Sopenharmony_ci if ((int64_t)hrm < 0) { 22062306a36Sopenharmony_ci lrm = (hrm << 63) | (lrm >> 1); 22162306a36Sopenharmony_ci hrm = hrm >> 1; 22262306a36Sopenharmony_ci re++; 22362306a36Sopenharmony_ci } 22462306a36Sopenharmony_ci 22562306a36Sopenharmony_ci assert(hrm & (1 << 62)); 22662306a36Sopenharmony_ci 22762306a36Sopenharmony_ci if (zc == IEEE754_CLASS_ZERO) { 22862306a36Sopenharmony_ci /* 22962306a36Sopenharmony_ci * Move explicit bit from bit 126 to bit 55 since the 23062306a36Sopenharmony_ci * ieee754dp_format code expects the mantissa to be 23162306a36Sopenharmony_ci * 56 bits wide (53 + 3 rounding bits). 23262306a36Sopenharmony_ci */ 23362306a36Sopenharmony_ci srl128(&hrm, &lrm, (126 - 55)); 23462306a36Sopenharmony_ci return ieee754dp_format(rs, re, lrm); 23562306a36Sopenharmony_ci } 23662306a36Sopenharmony_ci 23762306a36Sopenharmony_ci /* Move explicit bit from bit 52 to bit 126 */ 23862306a36Sopenharmony_ci lzm = 0; 23962306a36Sopenharmony_ci hzm = zm << 10; 24062306a36Sopenharmony_ci assert(hzm & (1 << 62)); 24162306a36Sopenharmony_ci 24262306a36Sopenharmony_ci /* Make the exponents the same */ 24362306a36Sopenharmony_ci if (ze > re) { 24462306a36Sopenharmony_ci /* 24562306a36Sopenharmony_ci * Have to shift y fraction right to align. 24662306a36Sopenharmony_ci */ 24762306a36Sopenharmony_ci s = ze - re; 24862306a36Sopenharmony_ci srl128(&hrm, &lrm, s); 24962306a36Sopenharmony_ci re += s; 25062306a36Sopenharmony_ci } else if (re > ze) { 25162306a36Sopenharmony_ci /* 25262306a36Sopenharmony_ci * Have to shift x fraction right to align. 25362306a36Sopenharmony_ci */ 25462306a36Sopenharmony_ci s = re - ze; 25562306a36Sopenharmony_ci srl128(&hzm, &lzm, s); 25662306a36Sopenharmony_ci ze += s; 25762306a36Sopenharmony_ci } 25862306a36Sopenharmony_ci assert(ze == re); 25962306a36Sopenharmony_ci assert(ze <= DP_EMAX); 26062306a36Sopenharmony_ci 26162306a36Sopenharmony_ci /* Do the addition */ 26262306a36Sopenharmony_ci if (zs == rs) { 26362306a36Sopenharmony_ci /* 26462306a36Sopenharmony_ci * Generate 128 bit result by adding two 127 bit numbers 26562306a36Sopenharmony_ci * leaving result in hzm:lzm, zs and ze. 26662306a36Sopenharmony_ci */ 26762306a36Sopenharmony_ci hzm = hzm + hrm + (lzm > (lzm + lrm)); 26862306a36Sopenharmony_ci lzm = lzm + lrm; 26962306a36Sopenharmony_ci if ((int64_t)hzm < 0) { /* carry out */ 27062306a36Sopenharmony_ci srl128(&hzm, &lzm, 1); 27162306a36Sopenharmony_ci ze++; 27262306a36Sopenharmony_ci } 27362306a36Sopenharmony_ci } else { 27462306a36Sopenharmony_ci if (hzm > hrm || (hzm == hrm && lzm >= lrm)) { 27562306a36Sopenharmony_ci hzm = hzm - hrm - (lzm < lrm); 27662306a36Sopenharmony_ci lzm = lzm - lrm; 27762306a36Sopenharmony_ci } else { 27862306a36Sopenharmony_ci hzm = hrm - hzm - (lrm < lzm); 27962306a36Sopenharmony_ci lzm = lrm - lzm; 28062306a36Sopenharmony_ci zs = rs; 28162306a36Sopenharmony_ci } 28262306a36Sopenharmony_ci if (lzm == 0 && hzm == 0) 28362306a36Sopenharmony_ci return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD); 28462306a36Sopenharmony_ci 28562306a36Sopenharmony_ci /* 28662306a36Sopenharmony_ci * Put explicit bit at bit 126 if necessary. 28762306a36Sopenharmony_ci */ 28862306a36Sopenharmony_ci if (hzm == 0) { 28962306a36Sopenharmony_ci /* left shift by 63 or 64 bits */ 29062306a36Sopenharmony_ci if ((int64_t)lzm < 0) { 29162306a36Sopenharmony_ci /* MSB of lzm is the explicit bit */ 29262306a36Sopenharmony_ci hzm = lzm >> 1; 29362306a36Sopenharmony_ci lzm = lzm << 63; 29462306a36Sopenharmony_ci ze -= 63; 29562306a36Sopenharmony_ci } else { 29662306a36Sopenharmony_ci hzm = lzm; 29762306a36Sopenharmony_ci lzm = 0; 29862306a36Sopenharmony_ci ze -= 64; 29962306a36Sopenharmony_ci } 30062306a36Sopenharmony_ci } 30162306a36Sopenharmony_ci 30262306a36Sopenharmony_ci t = 0; 30362306a36Sopenharmony_ci while ((hzm >> (62 - t)) == 0) 30462306a36Sopenharmony_ci t++; 30562306a36Sopenharmony_ci 30662306a36Sopenharmony_ci assert(t <= 62); 30762306a36Sopenharmony_ci if (t) { 30862306a36Sopenharmony_ci hzm = hzm << t | lzm >> (64 - t); 30962306a36Sopenharmony_ci lzm = lzm << t; 31062306a36Sopenharmony_ci ze -= t; 31162306a36Sopenharmony_ci } 31262306a36Sopenharmony_ci } 31362306a36Sopenharmony_ci 31462306a36Sopenharmony_ci /* 31562306a36Sopenharmony_ci * Move explicit bit from bit 126 to bit 55 since the 31662306a36Sopenharmony_ci * ieee754dp_format code expects the mantissa to be 31762306a36Sopenharmony_ci * 56 bits wide (53 + 3 rounding bits). 31862306a36Sopenharmony_ci */ 31962306a36Sopenharmony_ci srl128(&hzm, &lzm, (126 - 55)); 32062306a36Sopenharmony_ci 32162306a36Sopenharmony_ci return ieee754dp_format(zs, ze, lzm); 32262306a36Sopenharmony_ci} 32362306a36Sopenharmony_ci 32462306a36Sopenharmony_ciunion ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x, 32562306a36Sopenharmony_ci union ieee754dp y) 32662306a36Sopenharmony_ci{ 32762306a36Sopenharmony_ci return _dp_maddf(z, x, y, 0); 32862306a36Sopenharmony_ci} 32962306a36Sopenharmony_ci 33062306a36Sopenharmony_ciunion ieee754dp ieee754dp_msubf(union ieee754dp z, union ieee754dp x, 33162306a36Sopenharmony_ci union ieee754dp y) 33262306a36Sopenharmony_ci{ 33362306a36Sopenharmony_ci return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT); 33462306a36Sopenharmony_ci} 33562306a36Sopenharmony_ci 33662306a36Sopenharmony_ciunion ieee754dp ieee754dp_madd(union ieee754dp z, union ieee754dp x, 33762306a36Sopenharmony_ci union ieee754dp y) 33862306a36Sopenharmony_ci{ 33962306a36Sopenharmony_ci return _dp_maddf(z, x, y, 0); 34062306a36Sopenharmony_ci} 34162306a36Sopenharmony_ci 34262306a36Sopenharmony_ciunion ieee754dp ieee754dp_msub(union ieee754dp z, union ieee754dp x, 34362306a36Sopenharmony_ci union ieee754dp y) 34462306a36Sopenharmony_ci{ 34562306a36Sopenharmony_ci return _dp_maddf(z, x, y, MADDF_NEGATE_ADDITION); 34662306a36Sopenharmony_ci} 34762306a36Sopenharmony_ci 34862306a36Sopenharmony_ciunion ieee754dp ieee754dp_nmadd(union ieee754dp z, union ieee754dp x, 34962306a36Sopenharmony_ci union ieee754dp y) 35062306a36Sopenharmony_ci{ 35162306a36Sopenharmony_ci return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT|MADDF_NEGATE_ADDITION); 35262306a36Sopenharmony_ci} 35362306a36Sopenharmony_ci 35462306a36Sopenharmony_ciunion ieee754dp ieee754dp_nmsub(union ieee754dp z, union ieee754dp x, 35562306a36Sopenharmony_ci union ieee754dp y) 35662306a36Sopenharmony_ci{ 35762306a36Sopenharmony_ci return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT); 35862306a36Sopenharmony_ci} 359