162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * IEEE754 floating point arithmetic 462306a36Sopenharmony_ci * single precision: MADDF.f (Fused Multiply Add) 562306a36Sopenharmony_ci * MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft]) 662306a36Sopenharmony_ci * 762306a36Sopenharmony_ci * MIPS floating point support 862306a36Sopenharmony_ci * Copyright (C) 2015 Imagination Technologies, Ltd. 962306a36Sopenharmony_ci * Author: Markos Chandras <markos.chandras@imgtec.com> 1062306a36Sopenharmony_ci */ 1162306a36Sopenharmony_ci 1262306a36Sopenharmony_ci#include "ieee754sp.h" 1362306a36Sopenharmony_ci 1462306a36Sopenharmony_ci 1562306a36Sopenharmony_cistatic union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x, 1662306a36Sopenharmony_ci union ieee754sp y, enum maddf_flags flags) 1762306a36Sopenharmony_ci{ 1862306a36Sopenharmony_ci int re; 1962306a36Sopenharmony_ci int rs; 2062306a36Sopenharmony_ci unsigned int rm; 2162306a36Sopenharmony_ci u64 rm64; 2262306a36Sopenharmony_ci u64 zm64; 2362306a36Sopenharmony_ci int s; 2462306a36Sopenharmony_ci 2562306a36Sopenharmony_ci COMPXSP; 2662306a36Sopenharmony_ci COMPYSP; 2762306a36Sopenharmony_ci COMPZSP; 2862306a36Sopenharmony_ci 2962306a36Sopenharmony_ci EXPLODEXSP; 3062306a36Sopenharmony_ci EXPLODEYSP; 3162306a36Sopenharmony_ci EXPLODEZSP; 3262306a36Sopenharmony_ci 3362306a36Sopenharmony_ci FLUSHXSP; 3462306a36Sopenharmony_ci FLUSHYSP; 3562306a36Sopenharmony_ci FLUSHZSP; 3662306a36Sopenharmony_ci 3762306a36Sopenharmony_ci ieee754_clearcx(); 3862306a36Sopenharmony_ci 3962306a36Sopenharmony_ci rs = xs ^ ys; 4062306a36Sopenharmony_ci if (flags & MADDF_NEGATE_PRODUCT) 4162306a36Sopenharmony_ci rs ^= 1; 4262306a36Sopenharmony_ci if (flags & MADDF_NEGATE_ADDITION) 4362306a36Sopenharmony_ci zs ^= 1; 4462306a36Sopenharmony_ci 4562306a36Sopenharmony_ci /* 4662306a36Sopenharmony_ci * Handle the cases when at least one of x, y or z is a NaN. 4762306a36Sopenharmony_ci * Order of precedence is sNaN, qNaN and z, x, y. 4862306a36Sopenharmony_ci */ 4962306a36Sopenharmony_ci if (zc == IEEE754_CLASS_SNAN) 5062306a36Sopenharmony_ci return ieee754sp_nanxcpt(z); 5162306a36Sopenharmony_ci if (xc == IEEE754_CLASS_SNAN) 5262306a36Sopenharmony_ci return ieee754sp_nanxcpt(x); 5362306a36Sopenharmony_ci if (yc == IEEE754_CLASS_SNAN) 5462306a36Sopenharmony_ci return ieee754sp_nanxcpt(y); 5562306a36Sopenharmony_ci if (zc == IEEE754_CLASS_QNAN) 5662306a36Sopenharmony_ci return z; 5762306a36Sopenharmony_ci if (xc == IEEE754_CLASS_QNAN) 5862306a36Sopenharmony_ci return x; 5962306a36Sopenharmony_ci if (yc == IEEE754_CLASS_QNAN) 6062306a36Sopenharmony_ci return y; 6162306a36Sopenharmony_ci 6262306a36Sopenharmony_ci if (zc == IEEE754_CLASS_DNORM) 6362306a36Sopenharmony_ci SPDNORMZ; 6462306a36Sopenharmony_ci /* ZERO z cases are handled separately below */ 6562306a36Sopenharmony_ci 6662306a36Sopenharmony_ci switch (CLPAIR(xc, yc)) { 6762306a36Sopenharmony_ci 6862306a36Sopenharmony_ci 6962306a36Sopenharmony_ci /* 7062306a36Sopenharmony_ci * Infinity handling 7162306a36Sopenharmony_ci */ 7262306a36Sopenharmony_ci case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): 7362306a36Sopenharmony_ci case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): 7462306a36Sopenharmony_ci ieee754_setcx(IEEE754_INVALID_OPERATION); 7562306a36Sopenharmony_ci return ieee754sp_indef(); 7662306a36Sopenharmony_ci 7762306a36Sopenharmony_ci case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): 7862306a36Sopenharmony_ci case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): 7962306a36Sopenharmony_ci case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): 8062306a36Sopenharmony_ci case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): 8162306a36Sopenharmony_ci case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): 8262306a36Sopenharmony_ci if ((zc == IEEE754_CLASS_INF) && (zs != rs)) { 8362306a36Sopenharmony_ci /* 8462306a36Sopenharmony_ci * Cases of addition of infinities with opposite signs 8562306a36Sopenharmony_ci * or subtraction of infinities with same signs. 8662306a36Sopenharmony_ci */ 8762306a36Sopenharmony_ci ieee754_setcx(IEEE754_INVALID_OPERATION); 8862306a36Sopenharmony_ci return ieee754sp_indef(); 8962306a36Sopenharmony_ci } 9062306a36Sopenharmony_ci /* 9162306a36Sopenharmony_ci * z is here either not an infinity, or an infinity having the 9262306a36Sopenharmony_ci * same sign as product (x*y). The result must be an infinity, 9362306a36Sopenharmony_ci * and its sign is determined only by the sign of product (x*y). 9462306a36Sopenharmony_ci */ 9562306a36Sopenharmony_ci return ieee754sp_inf(rs); 9662306a36Sopenharmony_ci 9762306a36Sopenharmony_ci case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): 9862306a36Sopenharmony_ci case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): 9962306a36Sopenharmony_ci case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): 10062306a36Sopenharmony_ci case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): 10162306a36Sopenharmony_ci case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): 10262306a36Sopenharmony_ci if (zc == IEEE754_CLASS_INF) 10362306a36Sopenharmony_ci return ieee754sp_inf(zs); 10462306a36Sopenharmony_ci if (zc == IEEE754_CLASS_ZERO) { 10562306a36Sopenharmony_ci /* Handle cases +0 + (-0) and similar ones. */ 10662306a36Sopenharmony_ci if (zs == rs) 10762306a36Sopenharmony_ci /* 10862306a36Sopenharmony_ci * Cases of addition of zeros of equal signs 10962306a36Sopenharmony_ci * or subtraction of zeroes of opposite signs. 11062306a36Sopenharmony_ci * The sign of the resulting zero is in any 11162306a36Sopenharmony_ci * such case determined only by the sign of z. 11262306a36Sopenharmony_ci */ 11362306a36Sopenharmony_ci return z; 11462306a36Sopenharmony_ci 11562306a36Sopenharmony_ci return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD); 11662306a36Sopenharmony_ci } 11762306a36Sopenharmony_ci /* x*y is here 0, and z is not 0, so just return z */ 11862306a36Sopenharmony_ci return z; 11962306a36Sopenharmony_ci 12062306a36Sopenharmony_ci case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): 12162306a36Sopenharmony_ci SPDNORMX; 12262306a36Sopenharmony_ci fallthrough; 12362306a36Sopenharmony_ci case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): 12462306a36Sopenharmony_ci if (zc == IEEE754_CLASS_INF) 12562306a36Sopenharmony_ci return ieee754sp_inf(zs); 12662306a36Sopenharmony_ci SPDNORMY; 12762306a36Sopenharmony_ci break; 12862306a36Sopenharmony_ci 12962306a36Sopenharmony_ci case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): 13062306a36Sopenharmony_ci if (zc == IEEE754_CLASS_INF) 13162306a36Sopenharmony_ci return ieee754sp_inf(zs); 13262306a36Sopenharmony_ci SPDNORMX; 13362306a36Sopenharmony_ci break; 13462306a36Sopenharmony_ci 13562306a36Sopenharmony_ci case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM): 13662306a36Sopenharmony_ci if (zc == IEEE754_CLASS_INF) 13762306a36Sopenharmony_ci return ieee754sp_inf(zs); 13862306a36Sopenharmony_ci /* continue to real computations */ 13962306a36Sopenharmony_ci } 14062306a36Sopenharmony_ci 14162306a36Sopenharmony_ci /* Finally get to do some computation */ 14262306a36Sopenharmony_ci 14362306a36Sopenharmony_ci /* 14462306a36Sopenharmony_ci * Do the multiplication bit first 14562306a36Sopenharmony_ci * 14662306a36Sopenharmony_ci * rm = xm * ym, re = xe + ye basically 14762306a36Sopenharmony_ci * 14862306a36Sopenharmony_ci * At this point xm and ym should have been normalized. 14962306a36Sopenharmony_ci */ 15062306a36Sopenharmony_ci 15162306a36Sopenharmony_ci /* rm = xm * ym, re = xe+ye basically */ 15262306a36Sopenharmony_ci assert(xm & SP_HIDDEN_BIT); 15362306a36Sopenharmony_ci assert(ym & SP_HIDDEN_BIT); 15462306a36Sopenharmony_ci 15562306a36Sopenharmony_ci re = xe + ye; 15662306a36Sopenharmony_ci 15762306a36Sopenharmony_ci /* Multiple 24 bit xm and ym to give 48 bit results */ 15862306a36Sopenharmony_ci rm64 = (uint64_t)xm * ym; 15962306a36Sopenharmony_ci 16062306a36Sopenharmony_ci /* Shunt to top of word */ 16162306a36Sopenharmony_ci rm64 = rm64 << 16; 16262306a36Sopenharmony_ci 16362306a36Sopenharmony_ci /* Put explicit bit at bit 62 if necessary */ 16462306a36Sopenharmony_ci if ((int64_t) rm64 < 0) { 16562306a36Sopenharmony_ci rm64 = rm64 >> 1; 16662306a36Sopenharmony_ci re++; 16762306a36Sopenharmony_ci } 16862306a36Sopenharmony_ci 16962306a36Sopenharmony_ci assert(rm64 & (1 << 62)); 17062306a36Sopenharmony_ci 17162306a36Sopenharmony_ci if (zc == IEEE754_CLASS_ZERO) { 17262306a36Sopenharmony_ci /* 17362306a36Sopenharmony_ci * Move explicit bit from bit 62 to bit 26 since the 17462306a36Sopenharmony_ci * ieee754sp_format code expects the mantissa to be 17562306a36Sopenharmony_ci * 27 bits wide (24 + 3 rounding bits). 17662306a36Sopenharmony_ci */ 17762306a36Sopenharmony_ci rm = XSPSRS64(rm64, (62 - 26)); 17862306a36Sopenharmony_ci return ieee754sp_format(rs, re, rm); 17962306a36Sopenharmony_ci } 18062306a36Sopenharmony_ci 18162306a36Sopenharmony_ci /* Move explicit bit from bit 23 to bit 62 */ 18262306a36Sopenharmony_ci zm64 = (uint64_t)zm << (62 - 23); 18362306a36Sopenharmony_ci assert(zm64 & (1 << 62)); 18462306a36Sopenharmony_ci 18562306a36Sopenharmony_ci /* Make the exponents the same */ 18662306a36Sopenharmony_ci if (ze > re) { 18762306a36Sopenharmony_ci /* 18862306a36Sopenharmony_ci * Have to shift r fraction right to align. 18962306a36Sopenharmony_ci */ 19062306a36Sopenharmony_ci s = ze - re; 19162306a36Sopenharmony_ci rm64 = XSPSRS64(rm64, s); 19262306a36Sopenharmony_ci re += s; 19362306a36Sopenharmony_ci } else if (re > ze) { 19462306a36Sopenharmony_ci /* 19562306a36Sopenharmony_ci * Have to shift z fraction right to align. 19662306a36Sopenharmony_ci */ 19762306a36Sopenharmony_ci s = re - ze; 19862306a36Sopenharmony_ci zm64 = XSPSRS64(zm64, s); 19962306a36Sopenharmony_ci ze += s; 20062306a36Sopenharmony_ci } 20162306a36Sopenharmony_ci assert(ze == re); 20262306a36Sopenharmony_ci assert(ze <= SP_EMAX); 20362306a36Sopenharmony_ci 20462306a36Sopenharmony_ci /* Do the addition */ 20562306a36Sopenharmony_ci if (zs == rs) { 20662306a36Sopenharmony_ci /* 20762306a36Sopenharmony_ci * Generate 64 bit result by adding two 63 bit numbers 20862306a36Sopenharmony_ci * leaving result in zm64, zs and ze. 20962306a36Sopenharmony_ci */ 21062306a36Sopenharmony_ci zm64 = zm64 + rm64; 21162306a36Sopenharmony_ci if ((int64_t)zm64 < 0) { /* carry out */ 21262306a36Sopenharmony_ci zm64 = XSPSRS1(zm64); 21362306a36Sopenharmony_ci ze++; 21462306a36Sopenharmony_ci } 21562306a36Sopenharmony_ci } else { 21662306a36Sopenharmony_ci if (zm64 >= rm64) { 21762306a36Sopenharmony_ci zm64 = zm64 - rm64; 21862306a36Sopenharmony_ci } else { 21962306a36Sopenharmony_ci zm64 = rm64 - zm64; 22062306a36Sopenharmony_ci zs = rs; 22162306a36Sopenharmony_ci } 22262306a36Sopenharmony_ci if (zm64 == 0) 22362306a36Sopenharmony_ci return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD); 22462306a36Sopenharmony_ci 22562306a36Sopenharmony_ci /* 22662306a36Sopenharmony_ci * Put explicit bit at bit 62 if necessary. 22762306a36Sopenharmony_ci */ 22862306a36Sopenharmony_ci while ((zm64 >> 62) == 0) { 22962306a36Sopenharmony_ci zm64 <<= 1; 23062306a36Sopenharmony_ci ze--; 23162306a36Sopenharmony_ci } 23262306a36Sopenharmony_ci } 23362306a36Sopenharmony_ci 23462306a36Sopenharmony_ci /* 23562306a36Sopenharmony_ci * Move explicit bit from bit 62 to bit 26 since the 23662306a36Sopenharmony_ci * ieee754sp_format code expects the mantissa to be 23762306a36Sopenharmony_ci * 27 bits wide (24 + 3 rounding bits). 23862306a36Sopenharmony_ci */ 23962306a36Sopenharmony_ci zm = XSPSRS64(zm64, (62 - 26)); 24062306a36Sopenharmony_ci 24162306a36Sopenharmony_ci return ieee754sp_format(zs, ze, zm); 24262306a36Sopenharmony_ci} 24362306a36Sopenharmony_ci 24462306a36Sopenharmony_ciunion ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x, 24562306a36Sopenharmony_ci union ieee754sp y) 24662306a36Sopenharmony_ci{ 24762306a36Sopenharmony_ci return _sp_maddf(z, x, y, 0); 24862306a36Sopenharmony_ci} 24962306a36Sopenharmony_ci 25062306a36Sopenharmony_ciunion ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x, 25162306a36Sopenharmony_ci union ieee754sp y) 25262306a36Sopenharmony_ci{ 25362306a36Sopenharmony_ci return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT); 25462306a36Sopenharmony_ci} 25562306a36Sopenharmony_ci 25662306a36Sopenharmony_ciunion ieee754sp ieee754sp_madd(union ieee754sp z, union ieee754sp x, 25762306a36Sopenharmony_ci union ieee754sp y) 25862306a36Sopenharmony_ci{ 25962306a36Sopenharmony_ci return _sp_maddf(z, x, y, 0); 26062306a36Sopenharmony_ci} 26162306a36Sopenharmony_ci 26262306a36Sopenharmony_ciunion ieee754sp ieee754sp_msub(union ieee754sp z, union ieee754sp x, 26362306a36Sopenharmony_ci union ieee754sp y) 26462306a36Sopenharmony_ci{ 26562306a36Sopenharmony_ci return _sp_maddf(z, x, y, MADDF_NEGATE_ADDITION); 26662306a36Sopenharmony_ci} 26762306a36Sopenharmony_ci 26862306a36Sopenharmony_ciunion ieee754sp ieee754sp_nmadd(union ieee754sp z, union ieee754sp x, 26962306a36Sopenharmony_ci union ieee754sp y) 27062306a36Sopenharmony_ci{ 27162306a36Sopenharmony_ci return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT|MADDF_NEGATE_ADDITION); 27262306a36Sopenharmony_ci} 27362306a36Sopenharmony_ci 27462306a36Sopenharmony_ciunion ieee754sp ieee754sp_nmsub(union ieee754sp z, union ieee754sp x, 27562306a36Sopenharmony_ci union ieee754sp y) 27662306a36Sopenharmony_ci{ 27762306a36Sopenharmony_ci return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT); 27862306a36Sopenharmony_ci} 279