1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * License for Berkeley SoftFloat Release 3e 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * John R. Hauser 5bf215546Sopenharmony_ci * 2018 January 20 6bf215546Sopenharmony_ci * 7bf215546Sopenharmony_ci * The following applies to the whole of SoftFloat Release 3e as well as to 8bf215546Sopenharmony_ci * each source file individually. 9bf215546Sopenharmony_ci * 10bf215546Sopenharmony_ci * Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018 The Regents of the 11bf215546Sopenharmony_ci * University of California. All rights reserved. 12bf215546Sopenharmony_ci * 13bf215546Sopenharmony_ci * Redistribution and use in source and binary forms, with or without 14bf215546Sopenharmony_ci * modification, are permitted provided that the following conditions are met: 15bf215546Sopenharmony_ci * 16bf215546Sopenharmony_ci * 1. Redistributions of source code must retain the above copyright notice, 17bf215546Sopenharmony_ci * this list of conditions, and the following disclaimer. 18bf215546Sopenharmony_ci * 19bf215546Sopenharmony_ci * 2. Redistributions in binary form must reproduce the above copyright 20bf215546Sopenharmony_ci * notice, this list of conditions, and the following disclaimer in the 21bf215546Sopenharmony_ci * documentation and/or other materials provided with the distribution. 22bf215546Sopenharmony_ci * 23bf215546Sopenharmony_ci * 3. Neither the name of the University nor the names of its contributors 24bf215546Sopenharmony_ci * may be used to endorse or promote products derived from this software 25bf215546Sopenharmony_ci * without specific prior written permission. 26bf215546Sopenharmony_ci * 27bf215546Sopenharmony_ci * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY 28bf215546Sopenharmony_ci * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 29bf215546Sopenharmony_ci * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE 30bf215546Sopenharmony_ci * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY 31bf215546Sopenharmony_ci * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 32bf215546Sopenharmony_ci * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 33bf215546Sopenharmony_ci * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 34bf215546Sopenharmony_ci * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 35bf215546Sopenharmony_ci * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 36bf215546Sopenharmony_ci * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 37bf215546Sopenharmony_ci * 38bf215546Sopenharmony_ci * 39bf215546Sopenharmony_ci * The functions listed in this file are modified versions of the ones 40bf215546Sopenharmony_ci * from the Berkeley SoftFloat 3e Library. 41bf215546Sopenharmony_ci * 42bf215546Sopenharmony_ci * Their implementation correctness has been checked with the Berkeley 43bf215546Sopenharmony_ci * TestFloat Release 3e tool for x86_64. 44bf215546Sopenharmony_ci */ 45bf215546Sopenharmony_ci 46bf215546Sopenharmony_ci#include "rounding.h" 47bf215546Sopenharmony_ci#include "bitscan.h" 48bf215546Sopenharmony_ci#include "softfloat.h" 49bf215546Sopenharmony_ci 50bf215546Sopenharmony_ci#if defined(BIG_ENDIAN) 51bf215546Sopenharmony_ci#define word_incr -1 52bf215546Sopenharmony_ci#define index_word(total, n) ((total) - 1 - (n)) 53bf215546Sopenharmony_ci#define index_word_hi(total) 0 54bf215546Sopenharmony_ci#define index_word_lo(total) ((total) - 1) 55bf215546Sopenharmony_ci#define index_multiword_hi(total, n) 0 56bf215546Sopenharmony_ci#define index_multiword_lo(total, n) ((total) - (n)) 57bf215546Sopenharmony_ci#define index_multiword_hi_but(total, n) 0 58bf215546Sopenharmony_ci#define index_multiword_lo_but(total, n) (n) 59bf215546Sopenharmony_ci#else 60bf215546Sopenharmony_ci#define word_incr 1 61bf215546Sopenharmony_ci#define index_word(total, n) (n) 62bf215546Sopenharmony_ci#define index_word_hi(total) ((total) - 1) 63bf215546Sopenharmony_ci#define index_word_lo(total) 0 64bf215546Sopenharmony_ci#define index_multiword_hi(total, n) ((total) - (n)) 65bf215546Sopenharmony_ci#define index_multiword_lo(total, n) 0 66bf215546Sopenharmony_ci#define index_multiword_hi_but(total, n) (n) 67bf215546Sopenharmony_ci#define index_multiword_lo_but(total, n) 0 68bf215546Sopenharmony_ci#endif 69bf215546Sopenharmony_ci 70bf215546Sopenharmony_citypedef union { double f; int64_t i; uint64_t u; } di_type; 71bf215546Sopenharmony_citypedef union { float f; int32_t i; uint32_t u; } fi_type; 72bf215546Sopenharmony_ci 73bf215546Sopenharmony_ciconst uint8_t count_leading_zeros8[256] = { 74bf215546Sopenharmony_ci 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 75bf215546Sopenharmony_ci 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 76bf215546Sopenharmony_ci 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 77bf215546Sopenharmony_ci 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 78bf215546Sopenharmony_ci 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 79bf215546Sopenharmony_ci 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 80bf215546Sopenharmony_ci 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 81bf215546Sopenharmony_ci 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 82bf215546Sopenharmony_ci 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 83bf215546Sopenharmony_ci 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 84bf215546Sopenharmony_ci 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 85bf215546Sopenharmony_ci 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 86bf215546Sopenharmony_ci 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 87bf215546Sopenharmony_ci 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 88bf215546Sopenharmony_ci 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 89bf215546Sopenharmony_ci 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 90bf215546Sopenharmony_ci}; 91bf215546Sopenharmony_ci 92bf215546Sopenharmony_ci/** 93bf215546Sopenharmony_ci * \brief Shifts 'a' right by the number of bits given in 'dist', which must be in 94bf215546Sopenharmony_ci * the range 1 to 63. If any nonzero bits are shifted off, they are "jammed" 95bf215546Sopenharmony_ci * into the least-significant bit of the shifted value by setting the 96bf215546Sopenharmony_ci * least-significant bit to 1. This shifted-and-jammed value is returned. 97bf215546Sopenharmony_ci * 98bf215546Sopenharmony_ci * From softfloat_shortShiftRightJam64() 99bf215546Sopenharmony_ci */ 100bf215546Sopenharmony_cistatic inline 101bf215546Sopenharmony_ciuint64_t _mesa_short_shift_right_jam64(uint64_t a, uint8_t dist) 102bf215546Sopenharmony_ci{ 103bf215546Sopenharmony_ci return a >> dist | ((a & (((uint64_t) 1 << dist) - 1)) != 0); 104bf215546Sopenharmony_ci} 105bf215546Sopenharmony_ci 106bf215546Sopenharmony_ci/** 107bf215546Sopenharmony_ci * \brief Shifts 'a' right by the number of bits given in 'dist', which must not 108bf215546Sopenharmony_ci * be zero. If any nonzero bits are shifted off, they are "jammed" into the 109bf215546Sopenharmony_ci * least-significant bit of the shifted value by setting the least-significant 110bf215546Sopenharmony_ci * bit to 1. This shifted-and-jammed value is returned. 111bf215546Sopenharmony_ci * The value of 'dist' can be arbitrarily large. In particular, if 'dist' is 112bf215546Sopenharmony_ci * greater than 64, the result will be either 0 or 1, depending on whether 'a' 113bf215546Sopenharmony_ci * is zero or nonzero. 114bf215546Sopenharmony_ci * 115bf215546Sopenharmony_ci * From softfloat_shiftRightJam64() 116bf215546Sopenharmony_ci */ 117bf215546Sopenharmony_cistatic inline 118bf215546Sopenharmony_ciuint64_t _mesa_shift_right_jam64(uint64_t a, uint32_t dist) 119bf215546Sopenharmony_ci{ 120bf215546Sopenharmony_ci return 121bf215546Sopenharmony_ci (dist < 63) ? a >> dist | ((uint64_t) (a << (-dist & 63)) != 0) : (a != 0); 122bf215546Sopenharmony_ci} 123bf215546Sopenharmony_ci 124bf215546Sopenharmony_ci/** 125bf215546Sopenharmony_ci * \brief Shifts 'a' right by the number of bits given in 'dist', which must not be 126bf215546Sopenharmony_ci * zero. If any nonzero bits are shifted off, they are "jammed" into the 127bf215546Sopenharmony_ci * least-significant bit of the shifted value by setting the least-significant 128bf215546Sopenharmony_ci * bit to 1. This shifted-and-jammed value is returned. 129bf215546Sopenharmony_ci * The value of 'dist' can be arbitrarily large. In particular, if 'dist' is 130bf215546Sopenharmony_ci * greater than 32, the result will be either 0 or 1, depending on whether 'a' 131bf215546Sopenharmony_ci * is zero or nonzero. 132bf215546Sopenharmony_ci * 133bf215546Sopenharmony_ci * From softfloat_shiftRightJam32() 134bf215546Sopenharmony_ci */ 135bf215546Sopenharmony_cistatic inline 136bf215546Sopenharmony_ciuint32_t _mesa_shift_right_jam32(uint32_t a, uint16_t dist) 137bf215546Sopenharmony_ci{ 138bf215546Sopenharmony_ci return 139bf215546Sopenharmony_ci (dist < 31) ? a >> dist | ((uint32_t) (a << (-dist & 31)) != 0) : (a != 0); 140bf215546Sopenharmony_ci} 141bf215546Sopenharmony_ci 142bf215546Sopenharmony_ci/** 143bf215546Sopenharmony_ci * \brief Extracted from softfloat_roundPackToF64() 144bf215546Sopenharmony_ci */ 145bf215546Sopenharmony_cistatic inline 146bf215546Sopenharmony_cidouble _mesa_roundtozero_f64(int64_t s, int64_t e, int64_t m) 147bf215546Sopenharmony_ci{ 148bf215546Sopenharmony_ci di_type result; 149bf215546Sopenharmony_ci 150bf215546Sopenharmony_ci if ((uint64_t) e >= 0x7fd) { 151bf215546Sopenharmony_ci if (e < 0) { 152bf215546Sopenharmony_ci m = _mesa_shift_right_jam64(m, -e); 153bf215546Sopenharmony_ci e = 0; 154bf215546Sopenharmony_ci } else if ((e > 0x7fd) || (0x8000000000000000 <= m)) { 155bf215546Sopenharmony_ci e = 0x7ff; 156bf215546Sopenharmony_ci m = 0; 157bf215546Sopenharmony_ci result.u = (s << 63) + (e << 52) + m; 158bf215546Sopenharmony_ci result.u -= 1; 159bf215546Sopenharmony_ci return result.f; 160bf215546Sopenharmony_ci } 161bf215546Sopenharmony_ci } 162bf215546Sopenharmony_ci 163bf215546Sopenharmony_ci m >>= 10; 164bf215546Sopenharmony_ci if (m == 0) 165bf215546Sopenharmony_ci e = 0; 166bf215546Sopenharmony_ci 167bf215546Sopenharmony_ci result.u = (s << 63) + (e << 52) + m; 168bf215546Sopenharmony_ci return result.f; 169bf215546Sopenharmony_ci} 170bf215546Sopenharmony_ci 171bf215546Sopenharmony_ci/** 172bf215546Sopenharmony_ci * \brief Extracted from softfloat_roundPackToF32() 173bf215546Sopenharmony_ci */ 174bf215546Sopenharmony_cistatic inline 175bf215546Sopenharmony_cifloat _mesa_round_f32(int32_t s, int32_t e, int32_t m, bool rtz) 176bf215546Sopenharmony_ci{ 177bf215546Sopenharmony_ci fi_type result; 178bf215546Sopenharmony_ci uint8_t round_increment = rtz ? 0 : 0x40; 179bf215546Sopenharmony_ci 180bf215546Sopenharmony_ci if ((uint32_t) e >= 0xfd) { 181bf215546Sopenharmony_ci if (e < 0) { 182bf215546Sopenharmony_ci m = _mesa_shift_right_jam32(m, -e); 183bf215546Sopenharmony_ci e = 0; 184bf215546Sopenharmony_ci } else if ((e > 0xfd) || (0x80000000 <= m + round_increment)) { 185bf215546Sopenharmony_ci e = 0xff; 186bf215546Sopenharmony_ci m = 0; 187bf215546Sopenharmony_ci result.u = (s << 31) + (e << 23) + m; 188bf215546Sopenharmony_ci result.u -= !round_increment; 189bf215546Sopenharmony_ci return result.f; 190bf215546Sopenharmony_ci } 191bf215546Sopenharmony_ci } 192bf215546Sopenharmony_ci 193bf215546Sopenharmony_ci uint8_t round_bits; 194bf215546Sopenharmony_ci round_bits = m & 0x7f; 195bf215546Sopenharmony_ci m = ((uint32_t) m + round_increment) >> 7; 196bf215546Sopenharmony_ci m &= ~(uint32_t) (! (round_bits ^ 0x40) & !rtz); 197bf215546Sopenharmony_ci if (m == 0) 198bf215546Sopenharmony_ci e = 0; 199bf215546Sopenharmony_ci 200bf215546Sopenharmony_ci result.u = (s << 31) + (e << 23) + m; 201bf215546Sopenharmony_ci return result.f; 202bf215546Sopenharmony_ci} 203bf215546Sopenharmony_ci 204bf215546Sopenharmony_ci/** 205bf215546Sopenharmony_ci * \brief Extracted from softfloat_roundPackToF16() 206bf215546Sopenharmony_ci */ 207bf215546Sopenharmony_cistatic inline 208bf215546Sopenharmony_ciuint16_t _mesa_roundtozero_f16(int16_t s, int16_t e, int16_t m) 209bf215546Sopenharmony_ci{ 210bf215546Sopenharmony_ci if ((uint16_t) e >= 0x1d) { 211bf215546Sopenharmony_ci if (e < 0) { 212bf215546Sopenharmony_ci m = _mesa_shift_right_jam32(m, -e); 213bf215546Sopenharmony_ci e = 0; 214bf215546Sopenharmony_ci } else if (e > 0x1d) { 215bf215546Sopenharmony_ci e = 0x1f; 216bf215546Sopenharmony_ci m = 0; 217bf215546Sopenharmony_ci return (s << 15) + (e << 10) + m - 1; 218bf215546Sopenharmony_ci } 219bf215546Sopenharmony_ci } 220bf215546Sopenharmony_ci 221bf215546Sopenharmony_ci m >>= 4; 222bf215546Sopenharmony_ci if (m == 0) 223bf215546Sopenharmony_ci e = 0; 224bf215546Sopenharmony_ci 225bf215546Sopenharmony_ci return (s << 15) + (e << 10) + m; 226bf215546Sopenharmony_ci} 227bf215546Sopenharmony_ci 228bf215546Sopenharmony_ci/** 229bf215546Sopenharmony_ci * \brief Shifts the N-bit unsigned integer pointed to by 'a' left by the number of 230bf215546Sopenharmony_ci * bits given in 'dist', where N = 'size_words' * 32. The value of 'dist' 231bf215546Sopenharmony_ci * must be in the range 1 to 31. Any nonzero bits shifted off are lost. The 232bf215546Sopenharmony_ci * shifted N-bit result is stored at the location pointed to by 'm_out'. Each 233bf215546Sopenharmony_ci * of 'a' and 'm_out' points to a 'size_words'-long array of 32-bit elements 234bf215546Sopenharmony_ci * that concatenate in the platform's normal endian order to form an N-bit 235bf215546Sopenharmony_ci * integer. 236bf215546Sopenharmony_ci * 237bf215546Sopenharmony_ci * From softfloat_shortShiftLeftM() 238bf215546Sopenharmony_ci */ 239bf215546Sopenharmony_cistatic inline void 240bf215546Sopenharmony_ci_mesa_short_shift_left_m(uint8_t size_words, const uint32_t *a, uint8_t dist, uint32_t *m_out) 241bf215546Sopenharmony_ci{ 242bf215546Sopenharmony_ci uint8_t neg_dist; 243bf215546Sopenharmony_ci unsigned index, last_index; 244bf215546Sopenharmony_ci uint32_t part_word, a_word; 245bf215546Sopenharmony_ci 246bf215546Sopenharmony_ci neg_dist = -dist; 247bf215546Sopenharmony_ci index = index_word_hi(size_words); 248bf215546Sopenharmony_ci last_index = index_word_lo(size_words); 249bf215546Sopenharmony_ci part_word = a[index] << dist; 250bf215546Sopenharmony_ci while (index != last_index) { 251bf215546Sopenharmony_ci a_word = a[index - word_incr]; 252bf215546Sopenharmony_ci m_out[index] = part_word | a_word >> (neg_dist & 31); 253bf215546Sopenharmony_ci index -= word_incr; 254bf215546Sopenharmony_ci part_word = a_word << dist; 255bf215546Sopenharmony_ci } 256bf215546Sopenharmony_ci m_out[index] = part_word; 257bf215546Sopenharmony_ci} 258bf215546Sopenharmony_ci 259bf215546Sopenharmony_ci/** 260bf215546Sopenharmony_ci * \brief Shifts the N-bit unsigned integer pointed to by 'a' left by the number of 261bf215546Sopenharmony_ci * bits given in 'dist', where N = 'size_words' * 32. The value of 'dist' 262bf215546Sopenharmony_ci * must not be zero. Any nonzero bits shifted off are lost. The shifted 263bf215546Sopenharmony_ci * N-bit result is stored at the location pointed to by 'm_out'. Each of 'a' 264bf215546Sopenharmony_ci * and 'm_out' points to a 'size_words'-long array of 32-bit elements that 265bf215546Sopenharmony_ci * concatenate in the platform's normal endian order to form an N-bit 266bf215546Sopenharmony_ci * integer. The value of 'dist' can be arbitrarily large. In particular, if 267bf215546Sopenharmony_ci * 'dist' is greater than N, the stored result will be 0. 268bf215546Sopenharmony_ci * 269bf215546Sopenharmony_ci * From softfloat_shiftLeftM() 270bf215546Sopenharmony_ci */ 271bf215546Sopenharmony_cistatic inline void 272bf215546Sopenharmony_ci_mesa_shift_left_m(uint8_t size_words, const uint32_t *a, uint32_t dist, uint32_t *m_out) 273bf215546Sopenharmony_ci{ 274bf215546Sopenharmony_ci uint32_t word_dist; 275bf215546Sopenharmony_ci uint8_t inner_dist; 276bf215546Sopenharmony_ci uint8_t i; 277bf215546Sopenharmony_ci 278bf215546Sopenharmony_ci word_dist = dist >> 5; 279bf215546Sopenharmony_ci if (word_dist < size_words) { 280bf215546Sopenharmony_ci a += index_multiword_lo_but(size_words, word_dist); 281bf215546Sopenharmony_ci inner_dist = dist & 31; 282bf215546Sopenharmony_ci if (inner_dist) { 283bf215546Sopenharmony_ci _mesa_short_shift_left_m(size_words - word_dist, a, inner_dist, 284bf215546Sopenharmony_ci m_out + index_multiword_hi_but(size_words, word_dist)); 285bf215546Sopenharmony_ci if (!word_dist) 286bf215546Sopenharmony_ci return; 287bf215546Sopenharmony_ci } else { 288bf215546Sopenharmony_ci uint32_t *dest = m_out + index_word_hi(size_words); 289bf215546Sopenharmony_ci a += index_word_hi(size_words - word_dist); 290bf215546Sopenharmony_ci for (i = size_words - word_dist; i; --i) { 291bf215546Sopenharmony_ci *dest = *a; 292bf215546Sopenharmony_ci a -= word_incr; 293bf215546Sopenharmony_ci dest -= word_incr; 294bf215546Sopenharmony_ci } 295bf215546Sopenharmony_ci } 296bf215546Sopenharmony_ci m_out += index_multiword_lo(size_words, word_dist); 297bf215546Sopenharmony_ci } else { 298bf215546Sopenharmony_ci word_dist = size_words; 299bf215546Sopenharmony_ci } 300bf215546Sopenharmony_ci do { 301bf215546Sopenharmony_ci *m_out++ = 0; 302bf215546Sopenharmony_ci --word_dist; 303bf215546Sopenharmony_ci } while (word_dist); 304bf215546Sopenharmony_ci} 305bf215546Sopenharmony_ci 306bf215546Sopenharmony_ci/** 307bf215546Sopenharmony_ci * \brief Shifts the N-bit unsigned integer pointed to by 'a' right by the number of 308bf215546Sopenharmony_ci * bits given in 'dist', where N = 'size_words' * 32. The value of 'dist' 309bf215546Sopenharmony_ci * must be in the range 1 to 31. Any nonzero bits shifted off are lost. The 310bf215546Sopenharmony_ci * shifted N-bit result is stored at the location pointed to by 'm_out'. Each 311bf215546Sopenharmony_ci * of 'a' and 'm_out' points to a 'size_words'-long array of 32-bit elements 312bf215546Sopenharmony_ci * that concatenate in the platform's normal endian order to form an N-bit 313bf215546Sopenharmony_ci * integer. 314bf215546Sopenharmony_ci * 315bf215546Sopenharmony_ci * From softfloat_shortShiftRightM() 316bf215546Sopenharmony_ci */ 317bf215546Sopenharmony_cistatic inline void 318bf215546Sopenharmony_ci_mesa_short_shift_right_m(uint8_t size_words, const uint32_t *a, uint8_t dist, uint32_t *m_out) 319bf215546Sopenharmony_ci{ 320bf215546Sopenharmony_ci uint8_t neg_dist; 321bf215546Sopenharmony_ci unsigned index, last_index; 322bf215546Sopenharmony_ci uint32_t part_word, a_word; 323bf215546Sopenharmony_ci 324bf215546Sopenharmony_ci neg_dist = -dist; 325bf215546Sopenharmony_ci index = index_word_lo(size_words); 326bf215546Sopenharmony_ci last_index = index_word_hi(size_words); 327bf215546Sopenharmony_ci part_word = a[index] >> dist; 328bf215546Sopenharmony_ci while (index != last_index) { 329bf215546Sopenharmony_ci a_word = a[index + word_incr]; 330bf215546Sopenharmony_ci m_out[index] = a_word << (neg_dist & 31) | part_word; 331bf215546Sopenharmony_ci index += word_incr; 332bf215546Sopenharmony_ci part_word = a_word >> dist; 333bf215546Sopenharmony_ci } 334bf215546Sopenharmony_ci m_out[index] = part_word; 335bf215546Sopenharmony_ci} 336bf215546Sopenharmony_ci 337bf215546Sopenharmony_ci/** 338bf215546Sopenharmony_ci * \brief Shifts the N-bit unsigned integer pointed to by 'a' right by the number of 339bf215546Sopenharmony_ci * bits given in 'dist', where N = 'size_words' * 32. The value of 'dist' 340bf215546Sopenharmony_ci * must be in the range 1 to 31. If any nonzero bits are shifted off, they 341bf215546Sopenharmony_ci * are "jammed" into the least-significant bit of the shifted value by setting 342bf215546Sopenharmony_ci * the least-significant bit to 1. This shifted-and-jammed N-bit result is 343bf215546Sopenharmony_ci * stored at the location pointed to by 'm_out'. Each of 'a' and 'm_out' 344bf215546Sopenharmony_ci * points to a 'size_words'-long array of 32-bit elements that concatenate in 345bf215546Sopenharmony_ci * the platform's normal endian order to form an N-bit integer. 346bf215546Sopenharmony_ci * 347bf215546Sopenharmony_ci * 348bf215546Sopenharmony_ci * From softfloat_shortShiftRightJamM() 349bf215546Sopenharmony_ci */ 350bf215546Sopenharmony_cistatic inline void 351bf215546Sopenharmony_ci_mesa_short_shift_right_jam_m(uint8_t size_words, const uint32_t *a, uint8_t dist, uint32_t *m_out) 352bf215546Sopenharmony_ci{ 353bf215546Sopenharmony_ci uint8_t neg_dist; 354bf215546Sopenharmony_ci unsigned index, last_index; 355bf215546Sopenharmony_ci uint64_t part_word, a_word; 356bf215546Sopenharmony_ci 357bf215546Sopenharmony_ci neg_dist = -dist; 358bf215546Sopenharmony_ci index = index_word_lo(size_words); 359bf215546Sopenharmony_ci last_index = index_word_hi(size_words); 360bf215546Sopenharmony_ci a_word = a[index]; 361bf215546Sopenharmony_ci part_word = a_word >> dist; 362bf215546Sopenharmony_ci if (part_word << dist != a_word ) 363bf215546Sopenharmony_ci part_word |= 1; 364bf215546Sopenharmony_ci while (index != last_index) { 365bf215546Sopenharmony_ci a_word = a[index + word_incr]; 366bf215546Sopenharmony_ci m_out[index] = a_word << (neg_dist & 31) | part_word; 367bf215546Sopenharmony_ci index += word_incr; 368bf215546Sopenharmony_ci part_word = a_word >> dist; 369bf215546Sopenharmony_ci } 370bf215546Sopenharmony_ci m_out[index] = part_word; 371bf215546Sopenharmony_ci} 372bf215546Sopenharmony_ci 373bf215546Sopenharmony_ci/** 374bf215546Sopenharmony_ci * \brief Shifts the N-bit unsigned integer pointed to by 'a' right by the number of 375bf215546Sopenharmony_ci * bits given in 'dist', where N = 'size_words' * 32. The value of 'dist' 376bf215546Sopenharmony_ci * must not be zero. If any nonzero bits are shifted off, they are "jammed" 377bf215546Sopenharmony_ci * into the least-significant bit of the shifted value by setting the 378bf215546Sopenharmony_ci * least-significant bit to 1. This shifted-and-jammed N-bit result is stored 379bf215546Sopenharmony_ci * at the location pointed to by 'm_out'. Each of 'a' and 'm_out' points to a 380bf215546Sopenharmony_ci * 'size_words'-long array of 32-bit elements that concatenate in the 381bf215546Sopenharmony_ci * platform's normal endian order to form an N-bit integer. The value of 382bf215546Sopenharmony_ci * 'dist' can be arbitrarily large. In particular, if 'dist' is greater than 383bf215546Sopenharmony_ci * N, the stored result will be either 0 or 1, depending on whether the 384bf215546Sopenharmony_ci * original N bits are all zeros. 385bf215546Sopenharmony_ci * 386bf215546Sopenharmony_ci * From softfloat_shiftRightJamM() 387bf215546Sopenharmony_ci */ 388bf215546Sopenharmony_cistatic inline void 389bf215546Sopenharmony_ci_mesa_shift_right_jam_m(uint8_t size_words, const uint32_t *a, uint32_t dist, uint32_t *m_out) 390bf215546Sopenharmony_ci{ 391bf215546Sopenharmony_ci uint32_t word_jam, word_dist, *tmp; 392bf215546Sopenharmony_ci uint8_t i, inner_dist; 393bf215546Sopenharmony_ci 394bf215546Sopenharmony_ci word_jam = 0; 395bf215546Sopenharmony_ci word_dist = dist >> 5; 396bf215546Sopenharmony_ci tmp = NULL; 397bf215546Sopenharmony_ci if (word_dist) { 398bf215546Sopenharmony_ci if (size_words < word_dist) 399bf215546Sopenharmony_ci word_dist = size_words; 400bf215546Sopenharmony_ci tmp = (uint32_t *) (a + index_multiword_lo(size_words, word_dist)); 401bf215546Sopenharmony_ci i = word_dist; 402bf215546Sopenharmony_ci do { 403bf215546Sopenharmony_ci word_jam = *tmp++; 404bf215546Sopenharmony_ci if (word_jam) 405bf215546Sopenharmony_ci break; 406bf215546Sopenharmony_ci --i; 407bf215546Sopenharmony_ci } while (i); 408bf215546Sopenharmony_ci tmp = m_out; 409bf215546Sopenharmony_ci } 410bf215546Sopenharmony_ci if (word_dist < size_words) { 411bf215546Sopenharmony_ci a += index_multiword_hi_but(size_words, word_dist); 412bf215546Sopenharmony_ci inner_dist = dist & 31; 413bf215546Sopenharmony_ci if (inner_dist) { 414bf215546Sopenharmony_ci _mesa_short_shift_right_jam_m(size_words - word_dist, a, inner_dist, 415bf215546Sopenharmony_ci m_out + index_multiword_lo_but(size_words, word_dist)); 416bf215546Sopenharmony_ci if (!word_dist) { 417bf215546Sopenharmony_ci if (word_jam) 418bf215546Sopenharmony_ci m_out[index_word_lo(size_words)] |= 1; 419bf215546Sopenharmony_ci return; 420bf215546Sopenharmony_ci } 421bf215546Sopenharmony_ci } else { 422bf215546Sopenharmony_ci a += index_word_lo(size_words - word_dist); 423bf215546Sopenharmony_ci tmp = m_out + index_word_lo(size_words); 424bf215546Sopenharmony_ci for (i = size_words - word_dist; i; --i) { 425bf215546Sopenharmony_ci *tmp = *a; 426bf215546Sopenharmony_ci a += word_incr; 427bf215546Sopenharmony_ci tmp += word_incr; 428bf215546Sopenharmony_ci } 429bf215546Sopenharmony_ci } 430bf215546Sopenharmony_ci tmp = m_out + index_multiword_hi(size_words, word_dist); 431bf215546Sopenharmony_ci } 432bf215546Sopenharmony_ci if (tmp) { 433bf215546Sopenharmony_ci do { 434bf215546Sopenharmony_ci *tmp++ = 0; 435bf215546Sopenharmony_ci --word_dist; 436bf215546Sopenharmony_ci } while (word_dist); 437bf215546Sopenharmony_ci } 438bf215546Sopenharmony_ci if (word_jam) 439bf215546Sopenharmony_ci m_out[index_word_lo(size_words)] |= 1; 440bf215546Sopenharmony_ci} 441bf215546Sopenharmony_ci 442bf215546Sopenharmony_ci/** 443bf215546Sopenharmony_ci * \brief Calculate a + b but rounding to zero. 444bf215546Sopenharmony_ci * 445bf215546Sopenharmony_ci * Notice that this mainly differs from the original Berkeley SoftFloat 3e 446bf215546Sopenharmony_ci * implementation in that we don't really treat NaNs, Zeroes nor the 447bf215546Sopenharmony_ci * signalling flags. Any NaN is good for us and the sign of the Zero is not 448bf215546Sopenharmony_ci * important. 449bf215546Sopenharmony_ci * 450bf215546Sopenharmony_ci * From f64_add() 451bf215546Sopenharmony_ci */ 452bf215546Sopenharmony_cidouble 453bf215546Sopenharmony_ci_mesa_double_add_rtz(double a, double b) 454bf215546Sopenharmony_ci{ 455bf215546Sopenharmony_ci const di_type a_di = {a}; 456bf215546Sopenharmony_ci uint64_t a_flt_m = a_di.u & 0x0fffffffffffff; 457bf215546Sopenharmony_ci uint64_t a_flt_e = (a_di.u >> 52) & 0x7ff; 458bf215546Sopenharmony_ci uint64_t a_flt_s = (a_di.u >> 63) & 0x1; 459bf215546Sopenharmony_ci const di_type b_di = {b}; 460bf215546Sopenharmony_ci uint64_t b_flt_m = b_di.u & 0x0fffffffffffff; 461bf215546Sopenharmony_ci uint64_t b_flt_e = (b_di.u >> 52) & 0x7ff; 462bf215546Sopenharmony_ci uint64_t b_flt_s = (b_di.u >> 63) & 0x1; 463bf215546Sopenharmony_ci int64_t s, e, m = 0; 464bf215546Sopenharmony_ci 465bf215546Sopenharmony_ci s = a_flt_s; 466bf215546Sopenharmony_ci 467bf215546Sopenharmony_ci const int64_t exp_diff = a_flt_e - b_flt_e; 468bf215546Sopenharmony_ci 469bf215546Sopenharmony_ci /* Handle special cases */ 470bf215546Sopenharmony_ci 471bf215546Sopenharmony_ci if (a_flt_s != b_flt_s) { 472bf215546Sopenharmony_ci return _mesa_double_sub_rtz(a, -b); 473bf215546Sopenharmony_ci } else if ((a_flt_e == 0) && (a_flt_m == 0)) { 474bf215546Sopenharmony_ci /* 'a' is zero, return 'b' */ 475bf215546Sopenharmony_ci return b; 476bf215546Sopenharmony_ci } else if ((b_flt_e == 0) && (b_flt_m == 0)) { 477bf215546Sopenharmony_ci /* 'b' is zero, return 'a' */ 478bf215546Sopenharmony_ci return a; 479bf215546Sopenharmony_ci } else if (a_flt_e == 0x7ff && a_flt_m != 0) { 480bf215546Sopenharmony_ci /* 'a' is a NaN, return NaN */ 481bf215546Sopenharmony_ci return a; 482bf215546Sopenharmony_ci } else if (b_flt_e == 0x7ff && b_flt_m != 0) { 483bf215546Sopenharmony_ci /* 'b' is a NaN, return NaN */ 484bf215546Sopenharmony_ci return b; 485bf215546Sopenharmony_ci } else if (a_flt_e == 0x7ff && a_flt_m == 0) { 486bf215546Sopenharmony_ci /* Inf + x = Inf */ 487bf215546Sopenharmony_ci return a; 488bf215546Sopenharmony_ci } else if (b_flt_e == 0x7ff && b_flt_m == 0) { 489bf215546Sopenharmony_ci /* x + Inf = Inf */ 490bf215546Sopenharmony_ci return b; 491bf215546Sopenharmony_ci } else if (exp_diff == 0 && a_flt_e == 0) { 492bf215546Sopenharmony_ci di_type result_di; 493bf215546Sopenharmony_ci result_di.u = a_di.u + b_flt_m; 494bf215546Sopenharmony_ci return result_di.f; 495bf215546Sopenharmony_ci } else if (exp_diff == 0) { 496bf215546Sopenharmony_ci e = a_flt_e; 497bf215546Sopenharmony_ci m = 0x0020000000000000 + a_flt_m + b_flt_m; 498bf215546Sopenharmony_ci m <<= 9; 499bf215546Sopenharmony_ci } else if (exp_diff < 0) { 500bf215546Sopenharmony_ci a_flt_m <<= 9; 501bf215546Sopenharmony_ci b_flt_m <<= 9; 502bf215546Sopenharmony_ci e = b_flt_e; 503bf215546Sopenharmony_ci 504bf215546Sopenharmony_ci if (a_flt_e != 0) 505bf215546Sopenharmony_ci a_flt_m += 0x2000000000000000; 506bf215546Sopenharmony_ci else 507bf215546Sopenharmony_ci a_flt_m <<= 1; 508bf215546Sopenharmony_ci 509bf215546Sopenharmony_ci a_flt_m = _mesa_shift_right_jam64(a_flt_m, -exp_diff); 510bf215546Sopenharmony_ci m = 0x2000000000000000 + a_flt_m + b_flt_m; 511bf215546Sopenharmony_ci if (m < 0x4000000000000000) { 512bf215546Sopenharmony_ci --e; 513bf215546Sopenharmony_ci m <<= 1; 514bf215546Sopenharmony_ci } 515bf215546Sopenharmony_ci } else { 516bf215546Sopenharmony_ci a_flt_m <<= 9; 517bf215546Sopenharmony_ci b_flt_m <<= 9; 518bf215546Sopenharmony_ci e = a_flt_e; 519bf215546Sopenharmony_ci 520bf215546Sopenharmony_ci if (b_flt_e != 0) 521bf215546Sopenharmony_ci b_flt_m += 0x2000000000000000; 522bf215546Sopenharmony_ci else 523bf215546Sopenharmony_ci b_flt_m <<= 1; 524bf215546Sopenharmony_ci 525bf215546Sopenharmony_ci b_flt_m = _mesa_shift_right_jam64(b_flt_m, exp_diff); 526bf215546Sopenharmony_ci m = 0x2000000000000000 + a_flt_m + b_flt_m; 527bf215546Sopenharmony_ci if (m < 0x4000000000000000) { 528bf215546Sopenharmony_ci --e; 529bf215546Sopenharmony_ci m <<= 1; 530bf215546Sopenharmony_ci } 531bf215546Sopenharmony_ci } 532bf215546Sopenharmony_ci 533bf215546Sopenharmony_ci return _mesa_roundtozero_f64(s, e, m); 534bf215546Sopenharmony_ci} 535bf215546Sopenharmony_ci 536bf215546Sopenharmony_ci/** 537bf215546Sopenharmony_ci * \brief Returns the number of leading 0 bits before the most-significant 1 bit of 538bf215546Sopenharmony_ci * 'a'. If 'a' is zero, 64 is returned. 539bf215546Sopenharmony_ci */ 540bf215546Sopenharmony_cistatic inline unsigned 541bf215546Sopenharmony_ci_mesa_count_leading_zeros64(uint64_t a) 542bf215546Sopenharmony_ci{ 543bf215546Sopenharmony_ci return 64 - util_last_bit64(a); 544bf215546Sopenharmony_ci} 545bf215546Sopenharmony_ci 546bf215546Sopenharmony_ci/** 547bf215546Sopenharmony_ci * \brief Returns the number of leading 0 bits before the most-significant 1 bit of 548bf215546Sopenharmony_ci * 'a'. If 'a' is zero, 32 is returned. 549bf215546Sopenharmony_ci */ 550bf215546Sopenharmony_cistatic inline unsigned 551bf215546Sopenharmony_ci_mesa_count_leading_zeros32(uint32_t a) 552bf215546Sopenharmony_ci{ 553bf215546Sopenharmony_ci return 32 - util_last_bit(a); 554bf215546Sopenharmony_ci} 555bf215546Sopenharmony_ci 556bf215546Sopenharmony_cistatic inline double 557bf215546Sopenharmony_ci_mesa_norm_round_pack_f64(int64_t s, int64_t e, int64_t m) 558bf215546Sopenharmony_ci{ 559bf215546Sopenharmony_ci int8_t shift_dist; 560bf215546Sopenharmony_ci 561bf215546Sopenharmony_ci shift_dist = _mesa_count_leading_zeros64(m) - 1; 562bf215546Sopenharmony_ci e -= shift_dist; 563bf215546Sopenharmony_ci if ((10 <= shift_dist) && ((unsigned) e < 0x7fd)) { 564bf215546Sopenharmony_ci di_type result; 565bf215546Sopenharmony_ci result.u = (s << 63) + ((m ? e : 0) << 52) + (m << (shift_dist - 10)); 566bf215546Sopenharmony_ci return result.f; 567bf215546Sopenharmony_ci } else { 568bf215546Sopenharmony_ci return _mesa_roundtozero_f64(s, e, m << shift_dist); 569bf215546Sopenharmony_ci } 570bf215546Sopenharmony_ci} 571bf215546Sopenharmony_ci 572bf215546Sopenharmony_ci/** 573bf215546Sopenharmony_ci * \brief Replaces the N-bit unsigned integer pointed to by 'm_out' by the 574bf215546Sopenharmony_ci * 2s-complement of itself, where N = 'size_words' * 32. Argument 'm_out' 575bf215546Sopenharmony_ci * points to a 'size_words'-long array of 32-bit elements that concatenate in 576bf215546Sopenharmony_ci * the platform's normal endian order to form an N-bit integer. 577bf215546Sopenharmony_ci * 578bf215546Sopenharmony_ci * From softfloat_negXM() 579bf215546Sopenharmony_ci */ 580bf215546Sopenharmony_cistatic inline void 581bf215546Sopenharmony_ci_mesa_neg_x_m(uint8_t size_words, uint32_t *m_out) 582bf215546Sopenharmony_ci{ 583bf215546Sopenharmony_ci unsigned index, last_index; 584bf215546Sopenharmony_ci uint8_t carry; 585bf215546Sopenharmony_ci uint32_t word; 586bf215546Sopenharmony_ci 587bf215546Sopenharmony_ci index = index_word_lo(size_words); 588bf215546Sopenharmony_ci last_index = index_word_hi(size_words); 589bf215546Sopenharmony_ci carry = 1; 590bf215546Sopenharmony_ci for (;;) { 591bf215546Sopenharmony_ci word = ~m_out[index] + carry; 592bf215546Sopenharmony_ci m_out[index] = word; 593bf215546Sopenharmony_ci if (index == last_index) 594bf215546Sopenharmony_ci break; 595bf215546Sopenharmony_ci index += word_incr; 596bf215546Sopenharmony_ci if (word) 597bf215546Sopenharmony_ci carry = 0; 598bf215546Sopenharmony_ci } 599bf215546Sopenharmony_ci} 600bf215546Sopenharmony_ci 601bf215546Sopenharmony_ci/** 602bf215546Sopenharmony_ci * \brief Adds the two N-bit integers pointed to by 'a' and 'b', where N = 603bf215546Sopenharmony_ci * 'size_words' * 32. The addition is modulo 2^N, so any carry out is 604bf215546Sopenharmony_ci * lost. The N-bit sum is stored at the location pointed to by 'm_out'. Each 605bf215546Sopenharmony_ci * of 'a', 'b', and 'm_out' points to a 'size_words'-long array of 32-bit 606bf215546Sopenharmony_ci * elements that concatenate in the platform's normal endian order to form an 607bf215546Sopenharmony_ci * N-bit integer. 608bf215546Sopenharmony_ci * 609bf215546Sopenharmony_ci * From softfloat_addM() 610bf215546Sopenharmony_ci */ 611bf215546Sopenharmony_cistatic inline void 612bf215546Sopenharmony_ci_mesa_add_m(uint8_t size_words, const uint32_t *a, const uint32_t *b, uint32_t *m_out) 613bf215546Sopenharmony_ci{ 614bf215546Sopenharmony_ci unsigned index, last_index; 615bf215546Sopenharmony_ci uint8_t carry; 616bf215546Sopenharmony_ci uint32_t a_word, word; 617bf215546Sopenharmony_ci 618bf215546Sopenharmony_ci index = index_word_lo(size_words); 619bf215546Sopenharmony_ci last_index = index_word_hi(size_words); 620bf215546Sopenharmony_ci carry = 0; 621bf215546Sopenharmony_ci for (;;) { 622bf215546Sopenharmony_ci a_word = a[index]; 623bf215546Sopenharmony_ci word = a_word + b[index] + carry; 624bf215546Sopenharmony_ci m_out[index] = word; 625bf215546Sopenharmony_ci if (index == last_index) 626bf215546Sopenharmony_ci break; 627bf215546Sopenharmony_ci if (word != a_word) 628bf215546Sopenharmony_ci carry = (word < a_word); 629bf215546Sopenharmony_ci index += word_incr; 630bf215546Sopenharmony_ci } 631bf215546Sopenharmony_ci} 632bf215546Sopenharmony_ci 633bf215546Sopenharmony_ci/** 634bf215546Sopenharmony_ci * \brief Subtracts the two N-bit integers pointed to by 'a' and 'b', where N = 635bf215546Sopenharmony_ci * 'size_words' * 32. The subtraction is modulo 2^N, so any borrow out (carry 636bf215546Sopenharmony_ci * out) is lost. The N-bit difference is stored at the location pointed to by 637bf215546Sopenharmony_ci * 'm_out'. Each of 'a', 'b', and 'm_out' points to a 'size_words'-long array 638bf215546Sopenharmony_ci * of 32-bit elements that concatenate in the platform's normal endian order 639bf215546Sopenharmony_ci * to form an N-bit integer. 640bf215546Sopenharmony_ci * 641bf215546Sopenharmony_ci * From softfloat_subM() 642bf215546Sopenharmony_ci */ 643bf215546Sopenharmony_cistatic inline void 644bf215546Sopenharmony_ci_mesa_sub_m(uint8_t size_words, const uint32_t *a, const uint32_t *b, uint32_t *m_out) 645bf215546Sopenharmony_ci{ 646bf215546Sopenharmony_ci unsigned index, last_index; 647bf215546Sopenharmony_ci uint8_t borrow; 648bf215546Sopenharmony_ci uint32_t a_word, b_word; 649bf215546Sopenharmony_ci 650bf215546Sopenharmony_ci index = index_word_lo(size_words); 651bf215546Sopenharmony_ci last_index = index_word_hi(size_words); 652bf215546Sopenharmony_ci borrow = 0; 653bf215546Sopenharmony_ci for (;;) { 654bf215546Sopenharmony_ci a_word = a[index]; 655bf215546Sopenharmony_ci b_word = b[index]; 656bf215546Sopenharmony_ci m_out[index] = a_word - b_word - borrow; 657bf215546Sopenharmony_ci if (index == last_index) 658bf215546Sopenharmony_ci break; 659bf215546Sopenharmony_ci borrow = borrow ? (a_word <= b_word) : (a_word < b_word); 660bf215546Sopenharmony_ci index += word_incr; 661bf215546Sopenharmony_ci } 662bf215546Sopenharmony_ci} 663bf215546Sopenharmony_ci 664bf215546Sopenharmony_ci/* Calculate a - b but rounding to zero. 665bf215546Sopenharmony_ci * 666bf215546Sopenharmony_ci * Notice that this mainly differs from the original Berkeley SoftFloat 3e 667bf215546Sopenharmony_ci * implementation in that we don't really treat NaNs, Zeroes nor the 668bf215546Sopenharmony_ci * signalling flags. Any NaN is good for us and the sign of the Zero is not 669bf215546Sopenharmony_ci * important. 670bf215546Sopenharmony_ci * 671bf215546Sopenharmony_ci * From f64_sub() 672bf215546Sopenharmony_ci */ 673bf215546Sopenharmony_cidouble 674bf215546Sopenharmony_ci_mesa_double_sub_rtz(double a, double b) 675bf215546Sopenharmony_ci{ 676bf215546Sopenharmony_ci const di_type a_di = {a}; 677bf215546Sopenharmony_ci uint64_t a_flt_m = a_di.u & 0x0fffffffffffff; 678bf215546Sopenharmony_ci uint64_t a_flt_e = (a_di.u >> 52) & 0x7ff; 679bf215546Sopenharmony_ci uint64_t a_flt_s = (a_di.u >> 63) & 0x1; 680bf215546Sopenharmony_ci const di_type b_di = {b}; 681bf215546Sopenharmony_ci uint64_t b_flt_m = b_di.u & 0x0fffffffffffff; 682bf215546Sopenharmony_ci uint64_t b_flt_e = (b_di.u >> 52) & 0x7ff; 683bf215546Sopenharmony_ci uint64_t b_flt_s = (b_di.u >> 63) & 0x1; 684bf215546Sopenharmony_ci int64_t s, e, m = 0; 685bf215546Sopenharmony_ci int64_t m_diff = 0; 686bf215546Sopenharmony_ci unsigned shift_dist = 0; 687bf215546Sopenharmony_ci 688bf215546Sopenharmony_ci s = a_flt_s; 689bf215546Sopenharmony_ci 690bf215546Sopenharmony_ci const int64_t exp_diff = a_flt_e - b_flt_e; 691bf215546Sopenharmony_ci 692bf215546Sopenharmony_ci /* Handle special cases */ 693bf215546Sopenharmony_ci 694bf215546Sopenharmony_ci if (a_flt_s != b_flt_s) { 695bf215546Sopenharmony_ci return _mesa_double_add_rtz(a, -b); 696bf215546Sopenharmony_ci } else if ((a_flt_e == 0) && (a_flt_m == 0)) { 697bf215546Sopenharmony_ci /* 'a' is zero, return '-b' */ 698bf215546Sopenharmony_ci return -b; 699bf215546Sopenharmony_ci } else if ((b_flt_e == 0) && (b_flt_m == 0)) { 700bf215546Sopenharmony_ci /* 'b' is zero, return 'a' */ 701bf215546Sopenharmony_ci return a; 702bf215546Sopenharmony_ci } else if (a_flt_e == 0x7ff && a_flt_m != 0) { 703bf215546Sopenharmony_ci /* 'a' is a NaN, return NaN */ 704bf215546Sopenharmony_ci return a; 705bf215546Sopenharmony_ci } else if (b_flt_e == 0x7ff && b_flt_m != 0) { 706bf215546Sopenharmony_ci /* 'b' is a NaN, return NaN */ 707bf215546Sopenharmony_ci return b; 708bf215546Sopenharmony_ci } else if (a_flt_e == 0x7ff && a_flt_m == 0) { 709bf215546Sopenharmony_ci if (b_flt_e == 0x7ff && b_flt_m == 0) { 710bf215546Sopenharmony_ci /* Inf - Inf = NaN */ 711bf215546Sopenharmony_ci di_type result; 712bf215546Sopenharmony_ci e = 0x7ff; 713bf215546Sopenharmony_ci result.u = (s << 63) + (e << 52) + 0x1; 714bf215546Sopenharmony_ci return result.f; 715bf215546Sopenharmony_ci } 716bf215546Sopenharmony_ci /* Inf - x = Inf */ 717bf215546Sopenharmony_ci return a; 718bf215546Sopenharmony_ci } else if (b_flt_e == 0x7ff && b_flt_m == 0) { 719bf215546Sopenharmony_ci /* x - Inf = -Inf */ 720bf215546Sopenharmony_ci return -b; 721bf215546Sopenharmony_ci } else if (exp_diff == 0) { 722bf215546Sopenharmony_ci m_diff = a_flt_m - b_flt_m; 723bf215546Sopenharmony_ci 724bf215546Sopenharmony_ci if (m_diff == 0) 725bf215546Sopenharmony_ci return 0; 726bf215546Sopenharmony_ci if (a_flt_e) 727bf215546Sopenharmony_ci --a_flt_e; 728bf215546Sopenharmony_ci if (m_diff < 0) { 729bf215546Sopenharmony_ci s = !s; 730bf215546Sopenharmony_ci m_diff = -m_diff; 731bf215546Sopenharmony_ci } 732bf215546Sopenharmony_ci 733bf215546Sopenharmony_ci shift_dist = _mesa_count_leading_zeros64(m_diff) - 11; 734bf215546Sopenharmony_ci e = a_flt_e - shift_dist; 735bf215546Sopenharmony_ci if (e < 0) { 736bf215546Sopenharmony_ci shift_dist = a_flt_e; 737bf215546Sopenharmony_ci e = 0; 738bf215546Sopenharmony_ci } 739bf215546Sopenharmony_ci 740bf215546Sopenharmony_ci di_type result; 741bf215546Sopenharmony_ci result.u = (s << 63) + (e << 52) + (m_diff << shift_dist); 742bf215546Sopenharmony_ci return result.f; 743bf215546Sopenharmony_ci } else if (exp_diff < 0) { 744bf215546Sopenharmony_ci a_flt_m <<= 10; 745bf215546Sopenharmony_ci b_flt_m <<= 10; 746bf215546Sopenharmony_ci s = !s; 747bf215546Sopenharmony_ci 748bf215546Sopenharmony_ci a_flt_m += (a_flt_e) ? 0x4000000000000000 : a_flt_m; 749bf215546Sopenharmony_ci a_flt_m = _mesa_shift_right_jam64(a_flt_m, -exp_diff); 750bf215546Sopenharmony_ci b_flt_m |= 0x4000000000000000; 751bf215546Sopenharmony_ci e = b_flt_e; 752bf215546Sopenharmony_ci m = b_flt_m - a_flt_m; 753bf215546Sopenharmony_ci } else { 754bf215546Sopenharmony_ci a_flt_m <<= 10; 755bf215546Sopenharmony_ci b_flt_m <<= 10; 756bf215546Sopenharmony_ci 757bf215546Sopenharmony_ci b_flt_m += (b_flt_e) ? 0x4000000000000000 : b_flt_m; 758bf215546Sopenharmony_ci b_flt_m = _mesa_shift_right_jam64(b_flt_m, exp_diff); 759bf215546Sopenharmony_ci a_flt_m |= 0x4000000000000000; 760bf215546Sopenharmony_ci e = a_flt_e; 761bf215546Sopenharmony_ci m = a_flt_m - b_flt_m; 762bf215546Sopenharmony_ci } 763bf215546Sopenharmony_ci 764bf215546Sopenharmony_ci return _mesa_norm_round_pack_f64(s, e - 1, m); 765bf215546Sopenharmony_ci} 766bf215546Sopenharmony_ci 767bf215546Sopenharmony_cistatic inline void 768bf215546Sopenharmony_ci_mesa_norm_subnormal_mantissa_f64(uint64_t m, uint64_t *exp, uint64_t *m_out) 769bf215546Sopenharmony_ci{ 770bf215546Sopenharmony_ci int shift_dist; 771bf215546Sopenharmony_ci 772bf215546Sopenharmony_ci shift_dist = _mesa_count_leading_zeros64(m) - 11; 773bf215546Sopenharmony_ci *exp = 1 - shift_dist; 774bf215546Sopenharmony_ci *m_out = m << shift_dist; 775bf215546Sopenharmony_ci} 776bf215546Sopenharmony_ci 777bf215546Sopenharmony_cistatic inline void 778bf215546Sopenharmony_ci_mesa_norm_subnormal_mantissa_f32(uint32_t m, uint32_t *exp, uint32_t *m_out) 779bf215546Sopenharmony_ci{ 780bf215546Sopenharmony_ci int shift_dist; 781bf215546Sopenharmony_ci 782bf215546Sopenharmony_ci shift_dist = _mesa_count_leading_zeros32(m) - 8; 783bf215546Sopenharmony_ci *exp = 1 - shift_dist; 784bf215546Sopenharmony_ci *m_out = m << shift_dist; 785bf215546Sopenharmony_ci} 786bf215546Sopenharmony_ci 787bf215546Sopenharmony_ci/** 788bf215546Sopenharmony_ci * \brief Multiplies 'a' and 'b' and stores the 128-bit product at the location 789bf215546Sopenharmony_ci * pointed to by 'zPtr'. Argument 'zPtr' points to an array of four 32-bit 790bf215546Sopenharmony_ci * elements that concatenate in the platform's normal endian order to form a 791bf215546Sopenharmony_ci * 128-bit integer. 792bf215546Sopenharmony_ci * 793bf215546Sopenharmony_ci * From softfloat_mul64To128M() 794bf215546Sopenharmony_ci */ 795bf215546Sopenharmony_cistatic inline void 796bf215546Sopenharmony_ci_mesa_softfloat_mul_f64_to_f128_m(uint64_t a, uint64_t b, uint32_t *m_out) 797bf215546Sopenharmony_ci{ 798bf215546Sopenharmony_ci uint32_t a32, a0, b32, b0; 799bf215546Sopenharmony_ci uint64_t z0, mid1, z64, mid; 800bf215546Sopenharmony_ci 801bf215546Sopenharmony_ci a32 = a >> 32; 802bf215546Sopenharmony_ci a0 = a; 803bf215546Sopenharmony_ci b32 = b >> 32; 804bf215546Sopenharmony_ci b0 = b; 805bf215546Sopenharmony_ci z0 = (uint64_t) a0 * b0; 806bf215546Sopenharmony_ci mid1 = (uint64_t) a32 * b0; 807bf215546Sopenharmony_ci mid = mid1 + (uint64_t) a0 * b32; 808bf215546Sopenharmony_ci z64 = (uint64_t) a32 * b32; 809bf215546Sopenharmony_ci z64 += (uint64_t) (mid < mid1) << 32 | mid >> 32; 810bf215546Sopenharmony_ci mid <<= 32; 811bf215546Sopenharmony_ci z0 += mid; 812bf215546Sopenharmony_ci m_out[index_word(4, 1)] = z0 >> 32; 813bf215546Sopenharmony_ci m_out[index_word(4, 0)] = z0; 814bf215546Sopenharmony_ci z64 += (z0 < mid); 815bf215546Sopenharmony_ci m_out[index_word(4, 3)] = z64 >> 32; 816bf215546Sopenharmony_ci m_out[index_word(4, 2)] = z64; 817bf215546Sopenharmony_ci} 818bf215546Sopenharmony_ci 819bf215546Sopenharmony_ci/* Calculate a * b but rounding to zero. 820bf215546Sopenharmony_ci * 821bf215546Sopenharmony_ci * Notice that this mainly differs from the original Berkeley SoftFloat 3e 822bf215546Sopenharmony_ci * implementation in that we don't really treat NaNs, Zeroes nor the 823bf215546Sopenharmony_ci * signalling flags. Any NaN is good for us and the sign of the Zero is not 824bf215546Sopenharmony_ci * important. 825bf215546Sopenharmony_ci * 826bf215546Sopenharmony_ci * From f64_mul() 827bf215546Sopenharmony_ci */ 828bf215546Sopenharmony_cidouble 829bf215546Sopenharmony_ci_mesa_double_mul_rtz(double a, double b) 830bf215546Sopenharmony_ci{ 831bf215546Sopenharmony_ci const di_type a_di = {a}; 832bf215546Sopenharmony_ci uint64_t a_flt_m = a_di.u & 0x0fffffffffffff; 833bf215546Sopenharmony_ci uint64_t a_flt_e = (a_di.u >> 52) & 0x7ff; 834bf215546Sopenharmony_ci uint64_t a_flt_s = (a_di.u >> 63) & 0x1; 835bf215546Sopenharmony_ci const di_type b_di = {b}; 836bf215546Sopenharmony_ci uint64_t b_flt_m = b_di.u & 0x0fffffffffffff; 837bf215546Sopenharmony_ci uint64_t b_flt_e = (b_di.u >> 52) & 0x7ff; 838bf215546Sopenharmony_ci uint64_t b_flt_s = (b_di.u >> 63) & 0x1; 839bf215546Sopenharmony_ci int64_t s, e, m = 0; 840bf215546Sopenharmony_ci 841bf215546Sopenharmony_ci s = a_flt_s ^ b_flt_s; 842bf215546Sopenharmony_ci 843bf215546Sopenharmony_ci if (a_flt_e == 0x7ff) { 844bf215546Sopenharmony_ci if (a_flt_m != 0) { 845bf215546Sopenharmony_ci /* 'a' is a NaN, return NaN */ 846bf215546Sopenharmony_ci return a; 847bf215546Sopenharmony_ci } else if (b_flt_e == 0x7ff && b_flt_m != 0) { 848bf215546Sopenharmony_ci /* 'b' is a NaN, return NaN */ 849bf215546Sopenharmony_ci return b; 850bf215546Sopenharmony_ci } 851bf215546Sopenharmony_ci 852bf215546Sopenharmony_ci if (!(b_flt_e | b_flt_m)) { 853bf215546Sopenharmony_ci /* Inf * 0 = NaN */ 854bf215546Sopenharmony_ci di_type result; 855bf215546Sopenharmony_ci e = 0x7ff; 856bf215546Sopenharmony_ci result.u = (s << 63) + (e << 52) + 0x1; 857bf215546Sopenharmony_ci return result.f; 858bf215546Sopenharmony_ci } 859bf215546Sopenharmony_ci /* Inf * x = Inf */ 860bf215546Sopenharmony_ci di_type result; 861bf215546Sopenharmony_ci e = 0x7ff; 862bf215546Sopenharmony_ci result.u = (s << 63) + (e << 52) + 0; 863bf215546Sopenharmony_ci return result.f; 864bf215546Sopenharmony_ci } 865bf215546Sopenharmony_ci 866bf215546Sopenharmony_ci if (b_flt_e == 0x7ff) { 867bf215546Sopenharmony_ci if (b_flt_m != 0) { 868bf215546Sopenharmony_ci /* 'b' is a NaN, return NaN */ 869bf215546Sopenharmony_ci return b; 870bf215546Sopenharmony_ci } 871bf215546Sopenharmony_ci if (!(a_flt_e | a_flt_m)) { 872bf215546Sopenharmony_ci /* 0 * Inf = NaN */ 873bf215546Sopenharmony_ci di_type result; 874bf215546Sopenharmony_ci e = 0x7ff; 875bf215546Sopenharmony_ci result.u = (s << 63) + (e << 52) + 0x1; 876bf215546Sopenharmony_ci return result.f; 877bf215546Sopenharmony_ci } 878bf215546Sopenharmony_ci /* x * Inf = Inf */ 879bf215546Sopenharmony_ci di_type result; 880bf215546Sopenharmony_ci e = 0x7ff; 881bf215546Sopenharmony_ci result.u = (s << 63) + (e << 52) + 0; 882bf215546Sopenharmony_ci return result.f; 883bf215546Sopenharmony_ci } 884bf215546Sopenharmony_ci 885bf215546Sopenharmony_ci if (a_flt_e == 0) { 886bf215546Sopenharmony_ci if (a_flt_m == 0) { 887bf215546Sopenharmony_ci /* 'a' is zero. Return zero */ 888bf215546Sopenharmony_ci di_type result; 889bf215546Sopenharmony_ci result.u = (s << 63) + 0; 890bf215546Sopenharmony_ci return result.f; 891bf215546Sopenharmony_ci } 892bf215546Sopenharmony_ci _mesa_norm_subnormal_mantissa_f64(a_flt_m , &a_flt_e, &a_flt_m); 893bf215546Sopenharmony_ci } 894bf215546Sopenharmony_ci if (b_flt_e == 0) { 895bf215546Sopenharmony_ci if (b_flt_m == 0) { 896bf215546Sopenharmony_ci /* 'b' is zero. Return zero */ 897bf215546Sopenharmony_ci di_type result; 898bf215546Sopenharmony_ci result.u = (s << 63) + 0; 899bf215546Sopenharmony_ci return result.f; 900bf215546Sopenharmony_ci } 901bf215546Sopenharmony_ci _mesa_norm_subnormal_mantissa_f64(b_flt_m , &b_flt_e, &b_flt_m); 902bf215546Sopenharmony_ci } 903bf215546Sopenharmony_ci 904bf215546Sopenharmony_ci e = a_flt_e + b_flt_e - 0x3ff; 905bf215546Sopenharmony_ci a_flt_m = (a_flt_m | 0x0010000000000000) << 10; 906bf215546Sopenharmony_ci b_flt_m = (b_flt_m | 0x0010000000000000) << 11; 907bf215546Sopenharmony_ci 908bf215546Sopenharmony_ci uint32_t m_128[4]; 909bf215546Sopenharmony_ci _mesa_softfloat_mul_f64_to_f128_m(a_flt_m, b_flt_m, m_128); 910bf215546Sopenharmony_ci 911bf215546Sopenharmony_ci m = (uint64_t) m_128[index_word(4, 3)] << 32 | m_128[index_word(4, 2)]; 912bf215546Sopenharmony_ci if (m_128[index_word(4, 1)] || m_128[index_word(4, 0)]) 913bf215546Sopenharmony_ci m |= 1; 914bf215546Sopenharmony_ci 915bf215546Sopenharmony_ci if (m < 0x4000000000000000) { 916bf215546Sopenharmony_ci --e; 917bf215546Sopenharmony_ci m <<= 1; 918bf215546Sopenharmony_ci } 919bf215546Sopenharmony_ci 920bf215546Sopenharmony_ci return _mesa_roundtozero_f64(s, e, m); 921bf215546Sopenharmony_ci} 922bf215546Sopenharmony_ci 923bf215546Sopenharmony_ci 924bf215546Sopenharmony_ci/** 925bf215546Sopenharmony_ci * \brief Calculate a * b + c but rounding to zero. 926bf215546Sopenharmony_ci * 927bf215546Sopenharmony_ci * Notice that this mainly differs from the original Berkeley SoftFloat 3e 928bf215546Sopenharmony_ci * implementation in that we don't really treat NaNs, Zeroes nor the 929bf215546Sopenharmony_ci * signalling flags. Any NaN is good for us and the sign of the Zero is not 930bf215546Sopenharmony_ci * important. 931bf215546Sopenharmony_ci * 932bf215546Sopenharmony_ci * From f64_mulAdd() 933bf215546Sopenharmony_ci */ 934bf215546Sopenharmony_cidouble 935bf215546Sopenharmony_ci_mesa_double_fma_rtz(double a, double b, double c) 936bf215546Sopenharmony_ci{ 937bf215546Sopenharmony_ci const di_type a_di = {a}; 938bf215546Sopenharmony_ci uint64_t a_flt_m = a_di.u & 0x0fffffffffffff; 939bf215546Sopenharmony_ci uint64_t a_flt_e = (a_di.u >> 52) & 0x7ff; 940bf215546Sopenharmony_ci uint64_t a_flt_s = (a_di.u >> 63) & 0x1; 941bf215546Sopenharmony_ci const di_type b_di = {b}; 942bf215546Sopenharmony_ci uint64_t b_flt_m = b_di.u & 0x0fffffffffffff; 943bf215546Sopenharmony_ci uint64_t b_flt_e = (b_di.u >> 52) & 0x7ff; 944bf215546Sopenharmony_ci uint64_t b_flt_s = (b_di.u >> 63) & 0x1; 945bf215546Sopenharmony_ci const di_type c_di = {c}; 946bf215546Sopenharmony_ci uint64_t c_flt_m = c_di.u & 0x0fffffffffffff; 947bf215546Sopenharmony_ci uint64_t c_flt_e = (c_di.u >> 52) & 0x7ff; 948bf215546Sopenharmony_ci uint64_t c_flt_s = (c_di.u >> 63) & 0x1; 949bf215546Sopenharmony_ci int64_t s, e, m = 0; 950bf215546Sopenharmony_ci 951bf215546Sopenharmony_ci c_flt_s ^= 0; 952bf215546Sopenharmony_ci s = a_flt_s ^ b_flt_s ^ 0; 953bf215546Sopenharmony_ci 954bf215546Sopenharmony_ci if (a_flt_e == 0x7ff) { 955bf215546Sopenharmony_ci if (a_flt_m != 0) { 956bf215546Sopenharmony_ci /* 'a' is a NaN, return NaN */ 957bf215546Sopenharmony_ci return a; 958bf215546Sopenharmony_ci } else if (b_flt_e == 0x7ff && b_flt_m != 0) { 959bf215546Sopenharmony_ci /* 'b' is a NaN, return NaN */ 960bf215546Sopenharmony_ci return b; 961bf215546Sopenharmony_ci } else if (c_flt_e == 0x7ff && c_flt_m != 0) { 962bf215546Sopenharmony_ci /* 'c' is a NaN, return NaN */ 963bf215546Sopenharmony_ci return c; 964bf215546Sopenharmony_ci } 965bf215546Sopenharmony_ci 966bf215546Sopenharmony_ci if (!(b_flt_e | b_flt_m)) { 967bf215546Sopenharmony_ci /* Inf * 0 + y = NaN */ 968bf215546Sopenharmony_ci di_type result; 969bf215546Sopenharmony_ci e = 0x7ff; 970bf215546Sopenharmony_ci result.u = (s << 63) + (e << 52) + 0x1; 971bf215546Sopenharmony_ci return result.f; 972bf215546Sopenharmony_ci } 973bf215546Sopenharmony_ci 974bf215546Sopenharmony_ci if ((c_flt_e == 0x7ff && c_flt_m == 0) && (s != c_flt_s)) { 975bf215546Sopenharmony_ci /* Inf * x - Inf = NaN */ 976bf215546Sopenharmony_ci di_type result; 977bf215546Sopenharmony_ci e = 0x7ff; 978bf215546Sopenharmony_ci result.u = (s << 63) + (e << 52) + 0x1; 979bf215546Sopenharmony_ci return result.f; 980bf215546Sopenharmony_ci } 981bf215546Sopenharmony_ci 982bf215546Sopenharmony_ci /* Inf * x + y = Inf */ 983bf215546Sopenharmony_ci di_type result; 984bf215546Sopenharmony_ci e = 0x7ff; 985bf215546Sopenharmony_ci result.u = (s << 63) + (e << 52) + 0; 986bf215546Sopenharmony_ci return result.f; 987bf215546Sopenharmony_ci } 988bf215546Sopenharmony_ci 989bf215546Sopenharmony_ci if (b_flt_e == 0x7ff) { 990bf215546Sopenharmony_ci if (b_flt_m != 0) { 991bf215546Sopenharmony_ci /* 'b' is a NaN, return NaN */ 992bf215546Sopenharmony_ci return b; 993bf215546Sopenharmony_ci } else if (c_flt_e == 0x7ff && c_flt_m != 0) { 994bf215546Sopenharmony_ci /* 'c' is a NaN, return NaN */ 995bf215546Sopenharmony_ci return c; 996bf215546Sopenharmony_ci } 997bf215546Sopenharmony_ci 998bf215546Sopenharmony_ci if (!(a_flt_e | a_flt_m)) { 999bf215546Sopenharmony_ci /* 0 * Inf + y = NaN */ 1000bf215546Sopenharmony_ci di_type result; 1001bf215546Sopenharmony_ci e = 0x7ff; 1002bf215546Sopenharmony_ci result.u = (s << 63) + (e << 52) + 0x1; 1003bf215546Sopenharmony_ci return result.f; 1004bf215546Sopenharmony_ci } 1005bf215546Sopenharmony_ci 1006bf215546Sopenharmony_ci if ((c_flt_e == 0x7ff && c_flt_m == 0) && (s != c_flt_s)) { 1007bf215546Sopenharmony_ci /* x * Inf - Inf = NaN */ 1008bf215546Sopenharmony_ci di_type result; 1009bf215546Sopenharmony_ci e = 0x7ff; 1010bf215546Sopenharmony_ci result.u = (s << 63) + (e << 52) + 0x1; 1011bf215546Sopenharmony_ci return result.f; 1012bf215546Sopenharmony_ci } 1013bf215546Sopenharmony_ci 1014bf215546Sopenharmony_ci /* x * Inf + y = Inf */ 1015bf215546Sopenharmony_ci di_type result; 1016bf215546Sopenharmony_ci e = 0x7ff; 1017bf215546Sopenharmony_ci result.u = (s << 63) + (e << 52) + 0; 1018bf215546Sopenharmony_ci return result.f; 1019bf215546Sopenharmony_ci } 1020bf215546Sopenharmony_ci 1021bf215546Sopenharmony_ci if (c_flt_e == 0x7ff) { 1022bf215546Sopenharmony_ci if (c_flt_m != 0) { 1023bf215546Sopenharmony_ci /* 'c' is a NaN, return NaN */ 1024bf215546Sopenharmony_ci return c; 1025bf215546Sopenharmony_ci } 1026bf215546Sopenharmony_ci 1027bf215546Sopenharmony_ci /* x * y + Inf = Inf */ 1028bf215546Sopenharmony_ci return c; 1029bf215546Sopenharmony_ci } 1030bf215546Sopenharmony_ci 1031bf215546Sopenharmony_ci if (a_flt_e == 0) { 1032bf215546Sopenharmony_ci if (a_flt_m == 0) { 1033bf215546Sopenharmony_ci /* 'a' is zero, return 'c' */ 1034bf215546Sopenharmony_ci return c; 1035bf215546Sopenharmony_ci } 1036bf215546Sopenharmony_ci _mesa_norm_subnormal_mantissa_f64(a_flt_m , &a_flt_e, &a_flt_m); 1037bf215546Sopenharmony_ci } 1038bf215546Sopenharmony_ci 1039bf215546Sopenharmony_ci if (b_flt_e == 0) { 1040bf215546Sopenharmony_ci if (b_flt_m == 0) { 1041bf215546Sopenharmony_ci /* 'b' is zero, return 'c' */ 1042bf215546Sopenharmony_ci return c; 1043bf215546Sopenharmony_ci } 1044bf215546Sopenharmony_ci _mesa_norm_subnormal_mantissa_f64(b_flt_m , &b_flt_e, &b_flt_m); 1045bf215546Sopenharmony_ci } 1046bf215546Sopenharmony_ci 1047bf215546Sopenharmony_ci e = a_flt_e + b_flt_e - 0x3fe; 1048bf215546Sopenharmony_ci a_flt_m = (a_flt_m | 0x0010000000000000) << 10; 1049bf215546Sopenharmony_ci b_flt_m = (b_flt_m | 0x0010000000000000) << 11; 1050bf215546Sopenharmony_ci 1051bf215546Sopenharmony_ci uint32_t m_128[4]; 1052bf215546Sopenharmony_ci _mesa_softfloat_mul_f64_to_f128_m(a_flt_m, b_flt_m, m_128); 1053bf215546Sopenharmony_ci 1054bf215546Sopenharmony_ci m = (uint64_t) m_128[index_word(4, 3)] << 32 | m_128[index_word(4, 2)]; 1055bf215546Sopenharmony_ci 1056bf215546Sopenharmony_ci int64_t shift_dist = 0; 1057bf215546Sopenharmony_ci if (!(m & 0x4000000000000000)) { 1058bf215546Sopenharmony_ci --e; 1059bf215546Sopenharmony_ci shift_dist = -1; 1060bf215546Sopenharmony_ci } 1061bf215546Sopenharmony_ci 1062bf215546Sopenharmony_ci if (c_flt_e == 0) { 1063bf215546Sopenharmony_ci if (c_flt_m == 0) { 1064bf215546Sopenharmony_ci /* 'c' is zero, return 'a * b' */ 1065bf215546Sopenharmony_ci if (shift_dist) 1066bf215546Sopenharmony_ci m <<= 1; 1067bf215546Sopenharmony_ci 1068bf215546Sopenharmony_ci if (m_128[index_word(4, 1)] || m_128[index_word(4, 0)]) 1069bf215546Sopenharmony_ci m |= 1; 1070bf215546Sopenharmony_ci return _mesa_roundtozero_f64(s, e - 1, m); 1071bf215546Sopenharmony_ci } 1072bf215546Sopenharmony_ci _mesa_norm_subnormal_mantissa_f64(c_flt_m , &c_flt_e, &c_flt_m); 1073bf215546Sopenharmony_ci } 1074bf215546Sopenharmony_ci c_flt_m = (c_flt_m | 0x0010000000000000) << 10; 1075bf215546Sopenharmony_ci 1076bf215546Sopenharmony_ci uint32_t c_flt_m_128[4]; 1077bf215546Sopenharmony_ci int64_t exp_diff = e - c_flt_e; 1078bf215546Sopenharmony_ci if (exp_diff < 0) { 1079bf215546Sopenharmony_ci e = c_flt_e; 1080bf215546Sopenharmony_ci if ((s == c_flt_s) || (exp_diff < -1)) { 1081bf215546Sopenharmony_ci shift_dist -= exp_diff; 1082bf215546Sopenharmony_ci if (shift_dist) { 1083bf215546Sopenharmony_ci m = _mesa_shift_right_jam64(m, shift_dist); 1084bf215546Sopenharmony_ci } 1085bf215546Sopenharmony_ci } else { 1086bf215546Sopenharmony_ci if (!shift_dist) { 1087bf215546Sopenharmony_ci _mesa_short_shift_right_m(4, m_128, 1, m_128); 1088bf215546Sopenharmony_ci } 1089bf215546Sopenharmony_ci } 1090bf215546Sopenharmony_ci } else { 1091bf215546Sopenharmony_ci if (shift_dist) 1092bf215546Sopenharmony_ci _mesa_add_m(4, m_128, m_128, m_128); 1093bf215546Sopenharmony_ci if (!exp_diff) { 1094bf215546Sopenharmony_ci m = (uint64_t) m_128[index_word(4, 3)] << 32 1095bf215546Sopenharmony_ci | m_128[index_word(4, 2)]; 1096bf215546Sopenharmony_ci } else { 1097bf215546Sopenharmony_ci c_flt_m_128[index_word(4, 3)] = c_flt_m >> 32; 1098bf215546Sopenharmony_ci c_flt_m_128[index_word(4, 2)] = c_flt_m; 1099bf215546Sopenharmony_ci c_flt_m_128[index_word(4, 1)] = 0; 1100bf215546Sopenharmony_ci c_flt_m_128[index_word(4, 0)] = 0; 1101bf215546Sopenharmony_ci _mesa_shift_right_jam_m(4, c_flt_m_128, exp_diff, c_flt_m_128); 1102bf215546Sopenharmony_ci } 1103bf215546Sopenharmony_ci } 1104bf215546Sopenharmony_ci 1105bf215546Sopenharmony_ci if (s == c_flt_s) { 1106bf215546Sopenharmony_ci if (exp_diff <= 0) { 1107bf215546Sopenharmony_ci m += c_flt_m; 1108bf215546Sopenharmony_ci } else { 1109bf215546Sopenharmony_ci _mesa_add_m(4, m_128, c_flt_m_128, m_128); 1110bf215546Sopenharmony_ci m = (uint64_t) m_128[index_word(4, 3)] << 32 1111bf215546Sopenharmony_ci | m_128[index_word(4, 2)]; 1112bf215546Sopenharmony_ci } 1113bf215546Sopenharmony_ci if (m & 0x8000000000000000) { 1114bf215546Sopenharmony_ci e++; 1115bf215546Sopenharmony_ci m = _mesa_short_shift_right_jam64(m, 1); 1116bf215546Sopenharmony_ci } 1117bf215546Sopenharmony_ci } else { 1118bf215546Sopenharmony_ci if (exp_diff < 0) { 1119bf215546Sopenharmony_ci s = c_flt_s; 1120bf215546Sopenharmony_ci if (exp_diff < -1) { 1121bf215546Sopenharmony_ci m = c_flt_m - m; 1122bf215546Sopenharmony_ci if (m_128[index_word(4, 1)] || m_128[index_word(4, 0)]) { 1123bf215546Sopenharmony_ci m = (m - 1) | 1; 1124bf215546Sopenharmony_ci } 1125bf215546Sopenharmony_ci if (!(m & 0x4000000000000000)) { 1126bf215546Sopenharmony_ci --e; 1127bf215546Sopenharmony_ci m <<= 1; 1128bf215546Sopenharmony_ci } 1129bf215546Sopenharmony_ci return _mesa_roundtozero_f64(s, e - 1, m); 1130bf215546Sopenharmony_ci } else { 1131bf215546Sopenharmony_ci c_flt_m_128[index_word(4, 3)] = c_flt_m >> 32; 1132bf215546Sopenharmony_ci c_flt_m_128[index_word(4, 2)] = c_flt_m; 1133bf215546Sopenharmony_ci c_flt_m_128[index_word(4, 1)] = 0; 1134bf215546Sopenharmony_ci c_flt_m_128[index_word(4, 0)] = 0; 1135bf215546Sopenharmony_ci _mesa_sub_m(4, c_flt_m_128, m_128, m_128); 1136bf215546Sopenharmony_ci } 1137bf215546Sopenharmony_ci } else if (!exp_diff) { 1138bf215546Sopenharmony_ci m -= c_flt_m; 1139bf215546Sopenharmony_ci if (!m && !m_128[index_word(4, 1)] && !m_128[index_word(4, 0)]) { 1140bf215546Sopenharmony_ci /* Return zero */ 1141bf215546Sopenharmony_ci di_type result; 1142bf215546Sopenharmony_ci result.u = (s << 63) + 0; 1143bf215546Sopenharmony_ci return result.f; 1144bf215546Sopenharmony_ci } 1145bf215546Sopenharmony_ci m_128[index_word(4, 3)] = m >> 32; 1146bf215546Sopenharmony_ci m_128[index_word(4, 2)] = m; 1147bf215546Sopenharmony_ci if (m & 0x8000000000000000) { 1148bf215546Sopenharmony_ci s = !s; 1149bf215546Sopenharmony_ci _mesa_neg_x_m(4, m_128); 1150bf215546Sopenharmony_ci } 1151bf215546Sopenharmony_ci } else { 1152bf215546Sopenharmony_ci _mesa_sub_m(4, m_128, c_flt_m_128, m_128); 1153bf215546Sopenharmony_ci if (1 < exp_diff) { 1154bf215546Sopenharmony_ci m = (uint64_t) m_128[index_word(4, 3)] << 32 1155bf215546Sopenharmony_ci | m_128[index_word(4, 2)]; 1156bf215546Sopenharmony_ci if (!(m & 0x4000000000000000)) { 1157bf215546Sopenharmony_ci --e; 1158bf215546Sopenharmony_ci m <<= 1; 1159bf215546Sopenharmony_ci } 1160bf215546Sopenharmony_ci if (m_128[index_word(4, 1)] || m_128[index_word(4, 0)]) 1161bf215546Sopenharmony_ci m |= 1; 1162bf215546Sopenharmony_ci return _mesa_roundtozero_f64(s, e - 1, m); 1163bf215546Sopenharmony_ci } 1164bf215546Sopenharmony_ci } 1165bf215546Sopenharmony_ci 1166bf215546Sopenharmony_ci shift_dist = 0; 1167bf215546Sopenharmony_ci m = (uint64_t) m_128[index_word(4, 3)] << 32 1168bf215546Sopenharmony_ci | m_128[index_word(4, 2)]; 1169bf215546Sopenharmony_ci if (!m) { 1170bf215546Sopenharmony_ci shift_dist = 64; 1171bf215546Sopenharmony_ci m = (uint64_t) m_128[index_word(4, 1)] << 32 1172bf215546Sopenharmony_ci | m_128[index_word(4, 0)]; 1173bf215546Sopenharmony_ci } 1174bf215546Sopenharmony_ci shift_dist += _mesa_count_leading_zeros64(m) - 1; 1175bf215546Sopenharmony_ci if (shift_dist) { 1176bf215546Sopenharmony_ci e -= shift_dist; 1177bf215546Sopenharmony_ci _mesa_shift_left_m(4, m_128, shift_dist, m_128); 1178bf215546Sopenharmony_ci m = (uint64_t) m_128[index_word(4, 3)] << 32 1179bf215546Sopenharmony_ci | m_128[index_word(4, 2)]; 1180bf215546Sopenharmony_ci } 1181bf215546Sopenharmony_ci } 1182bf215546Sopenharmony_ci 1183bf215546Sopenharmony_ci if (m_128[index_word(4, 1)] || m_128[index_word(4, 0)]) 1184bf215546Sopenharmony_ci m |= 1; 1185bf215546Sopenharmony_ci return _mesa_roundtozero_f64(s, e - 1, m); 1186bf215546Sopenharmony_ci} 1187bf215546Sopenharmony_ci 1188bf215546Sopenharmony_ci 1189bf215546Sopenharmony_ci/** 1190bf215546Sopenharmony_ci * \brief Calculate a * b + c but rounding to zero. 1191bf215546Sopenharmony_ci * 1192bf215546Sopenharmony_ci * Notice that this mainly differs from the original Berkeley SoftFloat 3e 1193bf215546Sopenharmony_ci * implementation in that we don't really treat NaNs, Zeroes nor the 1194bf215546Sopenharmony_ci * signalling flags. Any NaN is good for us and the sign of the Zero is not 1195bf215546Sopenharmony_ci * important. 1196bf215546Sopenharmony_ci * 1197bf215546Sopenharmony_ci * From f32_mulAdd() 1198bf215546Sopenharmony_ci */ 1199bf215546Sopenharmony_cifloat 1200bf215546Sopenharmony_ci_mesa_float_fma_rtz(float a, float b, float c) 1201bf215546Sopenharmony_ci{ 1202bf215546Sopenharmony_ci const fi_type a_fi = {a}; 1203bf215546Sopenharmony_ci uint32_t a_flt_m = a_fi.u & 0x07fffff; 1204bf215546Sopenharmony_ci uint32_t a_flt_e = (a_fi.u >> 23) & 0xff; 1205bf215546Sopenharmony_ci uint32_t a_flt_s = (a_fi.u >> 31) & 0x1; 1206bf215546Sopenharmony_ci const fi_type b_fi = {b}; 1207bf215546Sopenharmony_ci uint32_t b_flt_m = b_fi.u & 0x07fffff; 1208bf215546Sopenharmony_ci uint32_t b_flt_e = (b_fi.u >> 23) & 0xff; 1209bf215546Sopenharmony_ci uint32_t b_flt_s = (b_fi.u >> 31) & 0x1; 1210bf215546Sopenharmony_ci const fi_type c_fi = {c}; 1211bf215546Sopenharmony_ci uint32_t c_flt_m = c_fi.u & 0x07fffff; 1212bf215546Sopenharmony_ci uint32_t c_flt_e = (c_fi.u >> 23) & 0xff; 1213bf215546Sopenharmony_ci uint32_t c_flt_s = (c_fi.u >> 31) & 0x1; 1214bf215546Sopenharmony_ci int32_t s, e, m = 0; 1215bf215546Sopenharmony_ci 1216bf215546Sopenharmony_ci c_flt_s ^= 0; 1217bf215546Sopenharmony_ci s = a_flt_s ^ b_flt_s ^ 0; 1218bf215546Sopenharmony_ci 1219bf215546Sopenharmony_ci if (a_flt_e == 0xff) { 1220bf215546Sopenharmony_ci if (a_flt_m != 0) { 1221bf215546Sopenharmony_ci /* 'a' is a NaN, return NaN */ 1222bf215546Sopenharmony_ci return a; 1223bf215546Sopenharmony_ci } else if (b_flt_e == 0xff && b_flt_m != 0) { 1224bf215546Sopenharmony_ci /* 'b' is a NaN, return NaN */ 1225bf215546Sopenharmony_ci return b; 1226bf215546Sopenharmony_ci } else if (c_flt_e == 0xff && c_flt_m != 0) { 1227bf215546Sopenharmony_ci /* 'c' is a NaN, return NaN */ 1228bf215546Sopenharmony_ci return c; 1229bf215546Sopenharmony_ci } 1230bf215546Sopenharmony_ci 1231bf215546Sopenharmony_ci if (!(b_flt_e | b_flt_m)) { 1232bf215546Sopenharmony_ci /* Inf * 0 + y = NaN */ 1233bf215546Sopenharmony_ci fi_type result; 1234bf215546Sopenharmony_ci e = 0xff; 1235bf215546Sopenharmony_ci result.u = (s << 31) + (e << 23) + 0x1; 1236bf215546Sopenharmony_ci return result.f; 1237bf215546Sopenharmony_ci } 1238bf215546Sopenharmony_ci 1239bf215546Sopenharmony_ci if ((c_flt_e == 0xff && c_flt_m == 0) && (s != c_flt_s)) { 1240bf215546Sopenharmony_ci /* Inf * x - Inf = NaN */ 1241bf215546Sopenharmony_ci fi_type result; 1242bf215546Sopenharmony_ci e = 0xff; 1243bf215546Sopenharmony_ci result.u = (s << 31) + (e << 23) + 0x1; 1244bf215546Sopenharmony_ci return result.f; 1245bf215546Sopenharmony_ci } 1246bf215546Sopenharmony_ci 1247bf215546Sopenharmony_ci /* Inf * x + y = Inf */ 1248bf215546Sopenharmony_ci fi_type result; 1249bf215546Sopenharmony_ci e = 0xff; 1250bf215546Sopenharmony_ci result.u = (s << 31) + (e << 23) + 0; 1251bf215546Sopenharmony_ci return result.f; 1252bf215546Sopenharmony_ci } 1253bf215546Sopenharmony_ci 1254bf215546Sopenharmony_ci if (b_flt_e == 0xff) { 1255bf215546Sopenharmony_ci if (b_flt_m != 0) { 1256bf215546Sopenharmony_ci /* 'b' is a NaN, return NaN */ 1257bf215546Sopenharmony_ci return b; 1258bf215546Sopenharmony_ci } else if (c_flt_e == 0xff && c_flt_m != 0) { 1259bf215546Sopenharmony_ci /* 'c' is a NaN, return NaN */ 1260bf215546Sopenharmony_ci return c; 1261bf215546Sopenharmony_ci } 1262bf215546Sopenharmony_ci 1263bf215546Sopenharmony_ci if (!(a_flt_e | a_flt_m)) { 1264bf215546Sopenharmony_ci /* 0 * Inf + y = NaN */ 1265bf215546Sopenharmony_ci fi_type result; 1266bf215546Sopenharmony_ci e = 0xff; 1267bf215546Sopenharmony_ci result.u = (s << 31) + (e << 23) + 0x1; 1268bf215546Sopenharmony_ci return result.f; 1269bf215546Sopenharmony_ci } 1270bf215546Sopenharmony_ci 1271bf215546Sopenharmony_ci if ((c_flt_e == 0xff && c_flt_m == 0) && (s != c_flt_s)) { 1272bf215546Sopenharmony_ci /* x * Inf - Inf = NaN */ 1273bf215546Sopenharmony_ci fi_type result; 1274bf215546Sopenharmony_ci e = 0xff; 1275bf215546Sopenharmony_ci result.u = (s << 31) + (e << 23) + 0x1; 1276bf215546Sopenharmony_ci return result.f; 1277bf215546Sopenharmony_ci } 1278bf215546Sopenharmony_ci 1279bf215546Sopenharmony_ci /* x * Inf + y = Inf */ 1280bf215546Sopenharmony_ci fi_type result; 1281bf215546Sopenharmony_ci e = 0xff; 1282bf215546Sopenharmony_ci result.u = (s << 31) + (e << 23) + 0; 1283bf215546Sopenharmony_ci return result.f; 1284bf215546Sopenharmony_ci } 1285bf215546Sopenharmony_ci 1286bf215546Sopenharmony_ci if (c_flt_e == 0xff) { 1287bf215546Sopenharmony_ci if (c_flt_m != 0) { 1288bf215546Sopenharmony_ci /* 'c' is a NaN, return NaN */ 1289bf215546Sopenharmony_ci return c; 1290bf215546Sopenharmony_ci } 1291bf215546Sopenharmony_ci 1292bf215546Sopenharmony_ci /* x * y + Inf = Inf */ 1293bf215546Sopenharmony_ci return c; 1294bf215546Sopenharmony_ci } 1295bf215546Sopenharmony_ci 1296bf215546Sopenharmony_ci if (a_flt_e == 0) { 1297bf215546Sopenharmony_ci if (a_flt_m == 0) { 1298bf215546Sopenharmony_ci /* 'a' is zero, return 'c' */ 1299bf215546Sopenharmony_ci return c; 1300bf215546Sopenharmony_ci } 1301bf215546Sopenharmony_ci _mesa_norm_subnormal_mantissa_f32(a_flt_m , &a_flt_e, &a_flt_m); 1302bf215546Sopenharmony_ci } 1303bf215546Sopenharmony_ci 1304bf215546Sopenharmony_ci if (b_flt_e == 0) { 1305bf215546Sopenharmony_ci if (b_flt_m == 0) { 1306bf215546Sopenharmony_ci /* 'b' is zero, return 'c' */ 1307bf215546Sopenharmony_ci return c; 1308bf215546Sopenharmony_ci } 1309bf215546Sopenharmony_ci _mesa_norm_subnormal_mantissa_f32(b_flt_m , &b_flt_e, &b_flt_m); 1310bf215546Sopenharmony_ci } 1311bf215546Sopenharmony_ci 1312bf215546Sopenharmony_ci e = a_flt_e + b_flt_e - 0x7e; 1313bf215546Sopenharmony_ci a_flt_m = (a_flt_m | 0x00800000) << 7; 1314bf215546Sopenharmony_ci b_flt_m = (b_flt_m | 0x00800000) << 7; 1315bf215546Sopenharmony_ci 1316bf215546Sopenharmony_ci uint64_t m_64 = (uint64_t) a_flt_m * b_flt_m; 1317bf215546Sopenharmony_ci if (m_64 < 0x2000000000000000) { 1318bf215546Sopenharmony_ci --e; 1319bf215546Sopenharmony_ci m_64 <<= 1; 1320bf215546Sopenharmony_ci } 1321bf215546Sopenharmony_ci 1322bf215546Sopenharmony_ci if (c_flt_e == 0) { 1323bf215546Sopenharmony_ci if (c_flt_m == 0) { 1324bf215546Sopenharmony_ci /* 'c' is zero, return 'a * b' */ 1325bf215546Sopenharmony_ci m = _mesa_short_shift_right_jam64(m_64, 31); 1326bf215546Sopenharmony_ci return _mesa_round_f32(s, e - 1, m, true); 1327bf215546Sopenharmony_ci } 1328bf215546Sopenharmony_ci _mesa_norm_subnormal_mantissa_f32(c_flt_m , &c_flt_e, &c_flt_m); 1329bf215546Sopenharmony_ci } 1330bf215546Sopenharmony_ci c_flt_m = (c_flt_m | 0x00800000) << 6; 1331bf215546Sopenharmony_ci 1332bf215546Sopenharmony_ci int16_t exp_diff = e - c_flt_e; 1333bf215546Sopenharmony_ci if (s == c_flt_s) { 1334bf215546Sopenharmony_ci if (exp_diff <= 0) { 1335bf215546Sopenharmony_ci e = c_flt_e; 1336bf215546Sopenharmony_ci m = c_flt_m + _mesa_shift_right_jam64(m_64, 32 - exp_diff); 1337bf215546Sopenharmony_ci } else { 1338bf215546Sopenharmony_ci m_64 += _mesa_shift_right_jam64((uint64_t) c_flt_m << 32, exp_diff); 1339bf215546Sopenharmony_ci m = _mesa_short_shift_right_jam64(m_64, 32); 1340bf215546Sopenharmony_ci } 1341bf215546Sopenharmony_ci if (m < 0x40000000) { 1342bf215546Sopenharmony_ci --e; 1343bf215546Sopenharmony_ci m <<= 1; 1344bf215546Sopenharmony_ci } 1345bf215546Sopenharmony_ci } else { 1346bf215546Sopenharmony_ci uint64_t c_flt_m_64 = (uint64_t) c_flt_m << 32; 1347bf215546Sopenharmony_ci if (exp_diff < 0) { 1348bf215546Sopenharmony_ci s = c_flt_s; 1349bf215546Sopenharmony_ci e = c_flt_e; 1350bf215546Sopenharmony_ci m_64 = c_flt_m_64 - _mesa_shift_right_jam64(m_64, -exp_diff); 1351bf215546Sopenharmony_ci } else if (!exp_diff) { 1352bf215546Sopenharmony_ci m_64 -= c_flt_m_64; 1353bf215546Sopenharmony_ci if (!m_64) { 1354bf215546Sopenharmony_ci /* Return zero */ 1355bf215546Sopenharmony_ci fi_type result; 1356bf215546Sopenharmony_ci result.u = (s << 31) + 0; 1357bf215546Sopenharmony_ci return result.f; 1358bf215546Sopenharmony_ci } 1359bf215546Sopenharmony_ci if (m_64 & 0x8000000000000000) { 1360bf215546Sopenharmony_ci s = !s; 1361bf215546Sopenharmony_ci m_64 = -m_64; 1362bf215546Sopenharmony_ci } 1363bf215546Sopenharmony_ci } else { 1364bf215546Sopenharmony_ci m_64 -= _mesa_shift_right_jam64(c_flt_m_64, exp_diff); 1365bf215546Sopenharmony_ci } 1366bf215546Sopenharmony_ci int8_t shift_dist = _mesa_count_leading_zeros64(m_64) - 1; 1367bf215546Sopenharmony_ci e -= shift_dist; 1368bf215546Sopenharmony_ci shift_dist -= 32; 1369bf215546Sopenharmony_ci if (shift_dist < 0) { 1370bf215546Sopenharmony_ci m = _mesa_short_shift_right_jam64(m_64, -shift_dist); 1371bf215546Sopenharmony_ci } else { 1372bf215546Sopenharmony_ci m = (uint32_t) m_64 << shift_dist; 1373bf215546Sopenharmony_ci } 1374bf215546Sopenharmony_ci } 1375bf215546Sopenharmony_ci 1376bf215546Sopenharmony_ci return _mesa_round_f32(s, e, m, true); 1377bf215546Sopenharmony_ci} 1378bf215546Sopenharmony_ci 1379bf215546Sopenharmony_ci 1380bf215546Sopenharmony_ci/** 1381bf215546Sopenharmony_ci * \brief Converts from 64bits to 32bits float and rounds according to 1382bf215546Sopenharmony_ci * instructed. 1383bf215546Sopenharmony_ci * 1384bf215546Sopenharmony_ci * From f64_to_f32() 1385bf215546Sopenharmony_ci */ 1386bf215546Sopenharmony_cifloat 1387bf215546Sopenharmony_ci_mesa_double_to_f32(double val, bool rtz) 1388bf215546Sopenharmony_ci{ 1389bf215546Sopenharmony_ci const di_type di = {val}; 1390bf215546Sopenharmony_ci uint64_t flt_m = di.u & 0x0fffffffffffff; 1391bf215546Sopenharmony_ci uint64_t flt_e = (di.u >> 52) & 0x7ff; 1392bf215546Sopenharmony_ci uint64_t flt_s = (di.u >> 63) & 0x1; 1393bf215546Sopenharmony_ci int32_t s, e, m = 0; 1394bf215546Sopenharmony_ci 1395bf215546Sopenharmony_ci s = flt_s; 1396bf215546Sopenharmony_ci 1397bf215546Sopenharmony_ci if (flt_e == 0x7ff) { 1398bf215546Sopenharmony_ci if (flt_m != 0) { 1399bf215546Sopenharmony_ci /* 'val' is a NaN, return NaN */ 1400bf215546Sopenharmony_ci fi_type result; 1401bf215546Sopenharmony_ci e = 0xff; 1402bf215546Sopenharmony_ci m = 0x1; 1403bf215546Sopenharmony_ci result.u = (s << 31) + (e << 23) + m; 1404bf215546Sopenharmony_ci return result.f; 1405bf215546Sopenharmony_ci } 1406bf215546Sopenharmony_ci 1407bf215546Sopenharmony_ci /* 'val' is Inf, return Inf */ 1408bf215546Sopenharmony_ci fi_type result; 1409bf215546Sopenharmony_ci e = 0xff; 1410bf215546Sopenharmony_ci result.u = (s << 31) + (e << 23) + m; 1411bf215546Sopenharmony_ci return result.f; 1412bf215546Sopenharmony_ci } 1413bf215546Sopenharmony_ci 1414bf215546Sopenharmony_ci if (!(flt_e | flt_m)) { 1415bf215546Sopenharmony_ci /* 'val' is zero, return zero */ 1416bf215546Sopenharmony_ci fi_type result; 1417bf215546Sopenharmony_ci e = 0; 1418bf215546Sopenharmony_ci result.u = (s << 31) + (e << 23) + m; 1419bf215546Sopenharmony_ci return result.f; 1420bf215546Sopenharmony_ci } 1421bf215546Sopenharmony_ci 1422bf215546Sopenharmony_ci m = _mesa_short_shift_right_jam64(flt_m, 22); 1423bf215546Sopenharmony_ci if ( ! (flt_e | m) ) { 1424bf215546Sopenharmony_ci /* 'val' is denorm, return zero */ 1425bf215546Sopenharmony_ci fi_type result; 1426bf215546Sopenharmony_ci e = 0; 1427bf215546Sopenharmony_ci result.u = (s << 31) + (e << 23) + m; 1428bf215546Sopenharmony_ci return result.f; 1429bf215546Sopenharmony_ci } 1430bf215546Sopenharmony_ci 1431bf215546Sopenharmony_ci return _mesa_round_f32(s, flt_e - 0x381, m | 0x40000000, rtz); 1432bf215546Sopenharmony_ci} 1433bf215546Sopenharmony_ci 1434bf215546Sopenharmony_ci 1435bf215546Sopenharmony_ci/** 1436bf215546Sopenharmony_ci * \brief Converts from 32bits to 16bits float and rounds the result to zero. 1437bf215546Sopenharmony_ci * 1438bf215546Sopenharmony_ci * From f32_to_f16() 1439bf215546Sopenharmony_ci */ 1440bf215546Sopenharmony_ciuint16_t 1441bf215546Sopenharmony_ci_mesa_float_to_half_rtz_slow(float val) 1442bf215546Sopenharmony_ci{ 1443bf215546Sopenharmony_ci const fi_type fi = {val}; 1444bf215546Sopenharmony_ci const uint32_t flt_m = fi.u & 0x7fffff; 1445bf215546Sopenharmony_ci const uint32_t flt_e = (fi.u >> 23) & 0xff; 1446bf215546Sopenharmony_ci const uint32_t flt_s = (fi.u >> 31) & 0x1; 1447bf215546Sopenharmony_ci int16_t s, e, m = 0; 1448bf215546Sopenharmony_ci 1449bf215546Sopenharmony_ci s = flt_s; 1450bf215546Sopenharmony_ci 1451bf215546Sopenharmony_ci if (flt_e == 0xff) { 1452bf215546Sopenharmony_ci if (flt_m != 0) { 1453bf215546Sopenharmony_ci /* 'val' is a NaN, return NaN */ 1454bf215546Sopenharmony_ci e = 0x1f; 1455bf215546Sopenharmony_ci /* Retain the top bits of a NaN to make sure that the quiet/signaling 1456bf215546Sopenharmony_ci * status stays the same. 1457bf215546Sopenharmony_ci */ 1458bf215546Sopenharmony_ci m = flt_m >> 13; 1459bf215546Sopenharmony_ci if (!m) 1460bf215546Sopenharmony_ci m = 1; 1461bf215546Sopenharmony_ci return (s << 15) + (e << 10) + m; 1462bf215546Sopenharmony_ci } 1463bf215546Sopenharmony_ci 1464bf215546Sopenharmony_ci /* 'val' is Inf, return Inf */ 1465bf215546Sopenharmony_ci e = 0x1f; 1466bf215546Sopenharmony_ci return (s << 15) + (e << 10) + m; 1467bf215546Sopenharmony_ci } 1468bf215546Sopenharmony_ci 1469bf215546Sopenharmony_ci if (!(flt_e | flt_m)) { 1470bf215546Sopenharmony_ci /* 'val' is zero, return zero */ 1471bf215546Sopenharmony_ci e = 0; 1472bf215546Sopenharmony_ci return (s << 15) + (e << 10) + m; 1473bf215546Sopenharmony_ci } 1474bf215546Sopenharmony_ci 1475bf215546Sopenharmony_ci m = flt_m >> 9 | ((flt_m & 0x1ff) != 0); 1476bf215546Sopenharmony_ci if ( ! (flt_e | m) ) { 1477bf215546Sopenharmony_ci /* 'val' is denorm, return zero */ 1478bf215546Sopenharmony_ci e = 0; 1479bf215546Sopenharmony_ci return (s << 15) + (e << 10) + m; 1480bf215546Sopenharmony_ci } 1481bf215546Sopenharmony_ci 1482bf215546Sopenharmony_ci return _mesa_roundtozero_f16(s, flt_e - 0x71, m | 0x4000); 1483bf215546Sopenharmony_ci} 1484