1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * License for Berkeley SoftFloat Release 3e
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * John R. Hauser
5bf215546Sopenharmony_ci * 2018 January 20
6bf215546Sopenharmony_ci *
7bf215546Sopenharmony_ci * The following applies to the whole of SoftFloat Release 3e as well as to
8bf215546Sopenharmony_ci * each source file individually.
9bf215546Sopenharmony_ci *
10bf215546Sopenharmony_ci * Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018 The Regents of the
11bf215546Sopenharmony_ci * University of California.  All rights reserved.
12bf215546Sopenharmony_ci *
13bf215546Sopenharmony_ci * Redistribution and use in source and binary forms, with or without
14bf215546Sopenharmony_ci * modification, are permitted provided that the following conditions are met:
15bf215546Sopenharmony_ci *
16bf215546Sopenharmony_ci *  1. Redistributions of source code must retain the above copyright notice,
17bf215546Sopenharmony_ci *     this list of conditions, and the following disclaimer.
18bf215546Sopenharmony_ci *
19bf215546Sopenharmony_ci *  2. Redistributions in binary form must reproduce the above copyright
20bf215546Sopenharmony_ci *     notice, this list of conditions, and the following disclaimer in the
21bf215546Sopenharmony_ci *     documentation and/or other materials provided with the distribution.
22bf215546Sopenharmony_ci *
23bf215546Sopenharmony_ci *  3. Neither the name of the University nor the names of its contributors
24bf215546Sopenharmony_ci *     may be used to endorse or promote products derived from this software
25bf215546Sopenharmony_ci *     without specific prior written permission.
26bf215546Sopenharmony_ci *
27bf215546Sopenharmony_ci * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
28bf215546Sopenharmony_ci * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
29bf215546Sopenharmony_ci * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
30bf215546Sopenharmony_ci * DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
31bf215546Sopenharmony_ci * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
32bf215546Sopenharmony_ci * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
33bf215546Sopenharmony_ci * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
34bf215546Sopenharmony_ci * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35bf215546Sopenharmony_ci * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
36bf215546Sopenharmony_ci * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37bf215546Sopenharmony_ci *
38bf215546Sopenharmony_ci *
39bf215546Sopenharmony_ci * The functions listed in this file are modified versions of the ones
40bf215546Sopenharmony_ci * from the Berkeley SoftFloat 3e Library.
41bf215546Sopenharmony_ci *
42bf215546Sopenharmony_ci * Their implementation correctness has been checked with the Berkeley
43bf215546Sopenharmony_ci * TestFloat Release 3e tool for x86_64.
44bf215546Sopenharmony_ci */
45bf215546Sopenharmony_ci
46bf215546Sopenharmony_ci#include "rounding.h"
47bf215546Sopenharmony_ci#include "bitscan.h"
48bf215546Sopenharmony_ci#include "softfloat.h"
49bf215546Sopenharmony_ci
50bf215546Sopenharmony_ci#if defined(BIG_ENDIAN)
51bf215546Sopenharmony_ci#define word_incr -1
52bf215546Sopenharmony_ci#define index_word(total, n) ((total) - 1 - (n))
53bf215546Sopenharmony_ci#define index_word_hi(total) 0
54bf215546Sopenharmony_ci#define index_word_lo(total) ((total) - 1)
55bf215546Sopenharmony_ci#define index_multiword_hi(total, n) 0
56bf215546Sopenharmony_ci#define index_multiword_lo(total, n) ((total) - (n))
57bf215546Sopenharmony_ci#define index_multiword_hi_but(total, n) 0
58bf215546Sopenharmony_ci#define index_multiword_lo_but(total, n) (n)
59bf215546Sopenharmony_ci#else
60bf215546Sopenharmony_ci#define word_incr 1
61bf215546Sopenharmony_ci#define index_word(total, n) (n)
62bf215546Sopenharmony_ci#define index_word_hi(total) ((total) - 1)
63bf215546Sopenharmony_ci#define index_word_lo(total) 0
64bf215546Sopenharmony_ci#define index_multiword_hi(total, n) ((total) - (n))
65bf215546Sopenharmony_ci#define index_multiword_lo(total, n) 0
66bf215546Sopenharmony_ci#define index_multiword_hi_but(total, n) (n)
67bf215546Sopenharmony_ci#define index_multiword_lo_but(total, n) 0
68bf215546Sopenharmony_ci#endif
69bf215546Sopenharmony_ci
70bf215546Sopenharmony_citypedef union { double f; int64_t i; uint64_t u; } di_type;
71bf215546Sopenharmony_citypedef union { float f; int32_t i; uint32_t u; } fi_type;
72bf215546Sopenharmony_ci
73bf215546Sopenharmony_ciconst uint8_t count_leading_zeros8[256] = {
74bf215546Sopenharmony_ci    8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
75bf215546Sopenharmony_ci    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
76bf215546Sopenharmony_ci    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
77bf215546Sopenharmony_ci    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
78bf215546Sopenharmony_ci    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
79bf215546Sopenharmony_ci    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
80bf215546Sopenharmony_ci    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
81bf215546Sopenharmony_ci    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
82bf215546Sopenharmony_ci    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
83bf215546Sopenharmony_ci    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
84bf215546Sopenharmony_ci    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
85bf215546Sopenharmony_ci    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
86bf215546Sopenharmony_ci    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
87bf215546Sopenharmony_ci    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
88bf215546Sopenharmony_ci    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
89bf215546Sopenharmony_ci    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
90bf215546Sopenharmony_ci};
91bf215546Sopenharmony_ci
92bf215546Sopenharmony_ci/**
93bf215546Sopenharmony_ci * \brief Shifts 'a' right by the number of bits given in 'dist', which must be in
94bf215546Sopenharmony_ci * the range 1 to 63.  If any nonzero bits are shifted off, they are "jammed"
95bf215546Sopenharmony_ci * into the least-significant bit of the shifted value by setting the
96bf215546Sopenharmony_ci * least-significant bit to 1.  This shifted-and-jammed value is returned.
97bf215546Sopenharmony_ci *
98bf215546Sopenharmony_ci * From softfloat_shortShiftRightJam64()
99bf215546Sopenharmony_ci */
100bf215546Sopenharmony_cistatic inline
101bf215546Sopenharmony_ciuint64_t _mesa_short_shift_right_jam64(uint64_t a, uint8_t dist)
102bf215546Sopenharmony_ci{
103bf215546Sopenharmony_ci    return a >> dist | ((a & (((uint64_t) 1 << dist) - 1)) != 0);
104bf215546Sopenharmony_ci}
105bf215546Sopenharmony_ci
106bf215546Sopenharmony_ci/**
107bf215546Sopenharmony_ci * \brief Shifts 'a' right by the number of bits given in 'dist', which must not
108bf215546Sopenharmony_ci * be zero.  If any nonzero bits are shifted off, they are "jammed" into the
109bf215546Sopenharmony_ci * least-significant bit of the shifted value by setting the least-significant
110bf215546Sopenharmony_ci * bit to 1.  This shifted-and-jammed value is returned.
111bf215546Sopenharmony_ci * The value of 'dist' can be arbitrarily large.  In particular, if 'dist' is
112bf215546Sopenharmony_ci * greater than 64, the result will be either 0 or 1, depending on whether 'a'
113bf215546Sopenharmony_ci * is zero or nonzero.
114bf215546Sopenharmony_ci *
115bf215546Sopenharmony_ci * From softfloat_shiftRightJam64()
116bf215546Sopenharmony_ci */
117bf215546Sopenharmony_cistatic inline
118bf215546Sopenharmony_ciuint64_t _mesa_shift_right_jam64(uint64_t a, uint32_t dist)
119bf215546Sopenharmony_ci{
120bf215546Sopenharmony_ci    return
121bf215546Sopenharmony_ci        (dist < 63) ? a >> dist | ((uint64_t) (a << (-dist & 63)) != 0) : (a != 0);
122bf215546Sopenharmony_ci}
123bf215546Sopenharmony_ci
124bf215546Sopenharmony_ci/**
125bf215546Sopenharmony_ci * \brief Shifts 'a' right by the number of bits given in 'dist', which must not be
126bf215546Sopenharmony_ci * zero.  If any nonzero bits are shifted off, they are "jammed" into the
127bf215546Sopenharmony_ci * least-significant bit of the shifted value by setting the least-significant
128bf215546Sopenharmony_ci * bit to 1.  This shifted-and-jammed value is returned.
129bf215546Sopenharmony_ci * The value of 'dist' can be arbitrarily large.  In particular, if 'dist' is
130bf215546Sopenharmony_ci * greater than 32, the result will be either 0 or 1, depending on whether 'a'
131bf215546Sopenharmony_ci * is zero or nonzero.
132bf215546Sopenharmony_ci *
133bf215546Sopenharmony_ci * From softfloat_shiftRightJam32()
134bf215546Sopenharmony_ci */
135bf215546Sopenharmony_cistatic inline
136bf215546Sopenharmony_ciuint32_t _mesa_shift_right_jam32(uint32_t a, uint16_t dist)
137bf215546Sopenharmony_ci{
138bf215546Sopenharmony_ci    return
139bf215546Sopenharmony_ci        (dist < 31) ? a >> dist | ((uint32_t) (a << (-dist & 31)) != 0) : (a != 0);
140bf215546Sopenharmony_ci}
141bf215546Sopenharmony_ci
142bf215546Sopenharmony_ci/**
143bf215546Sopenharmony_ci * \brief Extracted from softfloat_roundPackToF64()
144bf215546Sopenharmony_ci */
145bf215546Sopenharmony_cistatic inline
146bf215546Sopenharmony_cidouble _mesa_roundtozero_f64(int64_t s, int64_t e, int64_t m)
147bf215546Sopenharmony_ci{
148bf215546Sopenharmony_ci    di_type result;
149bf215546Sopenharmony_ci
150bf215546Sopenharmony_ci    if ((uint64_t) e >= 0x7fd) {
151bf215546Sopenharmony_ci        if (e < 0) {
152bf215546Sopenharmony_ci            m = _mesa_shift_right_jam64(m, -e);
153bf215546Sopenharmony_ci            e = 0;
154bf215546Sopenharmony_ci        } else if ((e > 0x7fd) || (0x8000000000000000 <= m)) {
155bf215546Sopenharmony_ci            e = 0x7ff;
156bf215546Sopenharmony_ci            m = 0;
157bf215546Sopenharmony_ci            result.u = (s << 63) + (e << 52) + m;
158bf215546Sopenharmony_ci            result.u -= 1;
159bf215546Sopenharmony_ci            return result.f;
160bf215546Sopenharmony_ci        }
161bf215546Sopenharmony_ci    }
162bf215546Sopenharmony_ci
163bf215546Sopenharmony_ci    m >>= 10;
164bf215546Sopenharmony_ci    if (m == 0)
165bf215546Sopenharmony_ci        e = 0;
166bf215546Sopenharmony_ci
167bf215546Sopenharmony_ci    result.u = (s << 63) + (e << 52) + m;
168bf215546Sopenharmony_ci    return result.f;
169bf215546Sopenharmony_ci}
170bf215546Sopenharmony_ci
171bf215546Sopenharmony_ci/**
172bf215546Sopenharmony_ci * \brief Extracted from softfloat_roundPackToF32()
173bf215546Sopenharmony_ci */
174bf215546Sopenharmony_cistatic inline
175bf215546Sopenharmony_cifloat _mesa_round_f32(int32_t s, int32_t e, int32_t m, bool rtz)
176bf215546Sopenharmony_ci{
177bf215546Sopenharmony_ci    fi_type result;
178bf215546Sopenharmony_ci    uint8_t round_increment = rtz ? 0 : 0x40;
179bf215546Sopenharmony_ci
180bf215546Sopenharmony_ci    if ((uint32_t) e >= 0xfd) {
181bf215546Sopenharmony_ci        if (e < 0) {
182bf215546Sopenharmony_ci            m = _mesa_shift_right_jam32(m, -e);
183bf215546Sopenharmony_ci            e = 0;
184bf215546Sopenharmony_ci        } else if ((e > 0xfd) || (0x80000000 <= m + round_increment)) {
185bf215546Sopenharmony_ci            e = 0xff;
186bf215546Sopenharmony_ci            m = 0;
187bf215546Sopenharmony_ci            result.u = (s << 31) + (e << 23) + m;
188bf215546Sopenharmony_ci            result.u -= !round_increment;
189bf215546Sopenharmony_ci            return result.f;
190bf215546Sopenharmony_ci        }
191bf215546Sopenharmony_ci    }
192bf215546Sopenharmony_ci
193bf215546Sopenharmony_ci    uint8_t round_bits;
194bf215546Sopenharmony_ci    round_bits = m & 0x7f;
195bf215546Sopenharmony_ci    m = ((uint32_t) m + round_increment) >> 7;
196bf215546Sopenharmony_ci    m &= ~(uint32_t) (! (round_bits ^ 0x40) & !rtz);
197bf215546Sopenharmony_ci    if (m == 0)
198bf215546Sopenharmony_ci        e = 0;
199bf215546Sopenharmony_ci
200bf215546Sopenharmony_ci    result.u = (s << 31) + (e << 23) + m;
201bf215546Sopenharmony_ci    return result.f;
202bf215546Sopenharmony_ci}
203bf215546Sopenharmony_ci
204bf215546Sopenharmony_ci/**
205bf215546Sopenharmony_ci * \brief Extracted from softfloat_roundPackToF16()
206bf215546Sopenharmony_ci */
207bf215546Sopenharmony_cistatic inline
208bf215546Sopenharmony_ciuint16_t _mesa_roundtozero_f16(int16_t s, int16_t e, int16_t m)
209bf215546Sopenharmony_ci{
210bf215546Sopenharmony_ci    if ((uint16_t) e >= 0x1d) {
211bf215546Sopenharmony_ci        if (e < 0) {
212bf215546Sopenharmony_ci            m = _mesa_shift_right_jam32(m, -e);
213bf215546Sopenharmony_ci            e = 0;
214bf215546Sopenharmony_ci        } else if (e > 0x1d) {
215bf215546Sopenharmony_ci            e = 0x1f;
216bf215546Sopenharmony_ci            m = 0;
217bf215546Sopenharmony_ci            return (s << 15) + (e << 10) + m - 1;
218bf215546Sopenharmony_ci        }
219bf215546Sopenharmony_ci    }
220bf215546Sopenharmony_ci
221bf215546Sopenharmony_ci    m >>= 4;
222bf215546Sopenharmony_ci    if (m == 0)
223bf215546Sopenharmony_ci        e = 0;
224bf215546Sopenharmony_ci
225bf215546Sopenharmony_ci    return (s << 15) + (e << 10) + m;
226bf215546Sopenharmony_ci}
227bf215546Sopenharmony_ci
228bf215546Sopenharmony_ci/**
229bf215546Sopenharmony_ci * \brief Shifts the N-bit unsigned integer pointed to by 'a' left by the number of
230bf215546Sopenharmony_ci * bits given in 'dist', where N = 'size_words' * 32.  The value of 'dist'
231bf215546Sopenharmony_ci * must be in the range 1 to 31.  Any nonzero bits shifted off are lost.  The
232bf215546Sopenharmony_ci * shifted N-bit result is stored at the location pointed to by 'm_out'.  Each
233bf215546Sopenharmony_ci * of 'a' and 'm_out' points to a 'size_words'-long array of 32-bit elements
234bf215546Sopenharmony_ci * that concatenate in the platform's normal endian order to form an N-bit
235bf215546Sopenharmony_ci * integer.
236bf215546Sopenharmony_ci *
237bf215546Sopenharmony_ci * From softfloat_shortShiftLeftM()
238bf215546Sopenharmony_ci */
239bf215546Sopenharmony_cistatic inline void
240bf215546Sopenharmony_ci_mesa_short_shift_left_m(uint8_t size_words, const uint32_t *a, uint8_t dist, uint32_t *m_out)
241bf215546Sopenharmony_ci{
242bf215546Sopenharmony_ci    uint8_t neg_dist;
243bf215546Sopenharmony_ci    unsigned index, last_index;
244bf215546Sopenharmony_ci    uint32_t part_word, a_word;
245bf215546Sopenharmony_ci
246bf215546Sopenharmony_ci    neg_dist = -dist;
247bf215546Sopenharmony_ci    index = index_word_hi(size_words);
248bf215546Sopenharmony_ci    last_index = index_word_lo(size_words);
249bf215546Sopenharmony_ci    part_word = a[index] << dist;
250bf215546Sopenharmony_ci    while (index != last_index) {
251bf215546Sopenharmony_ci        a_word = a[index - word_incr];
252bf215546Sopenharmony_ci        m_out[index] = part_word | a_word >> (neg_dist & 31);
253bf215546Sopenharmony_ci        index -= word_incr;
254bf215546Sopenharmony_ci        part_word = a_word << dist;
255bf215546Sopenharmony_ci    }
256bf215546Sopenharmony_ci    m_out[index] = part_word;
257bf215546Sopenharmony_ci}
258bf215546Sopenharmony_ci
259bf215546Sopenharmony_ci/**
260bf215546Sopenharmony_ci * \brief Shifts the N-bit unsigned integer pointed to by 'a' left by the number of
261bf215546Sopenharmony_ci * bits given in 'dist', where N = 'size_words' * 32.  The value of 'dist'
262bf215546Sopenharmony_ci * must not be zero.  Any nonzero bits shifted off are lost.  The shifted
263bf215546Sopenharmony_ci * N-bit result is stored at the location pointed to by 'm_out'.  Each of 'a'
264bf215546Sopenharmony_ci * and 'm_out' points to a 'size_words'-long array of 32-bit elements that
265bf215546Sopenharmony_ci * concatenate in the platform's normal endian order to form an N-bit
266bf215546Sopenharmony_ci * integer. The value of 'dist' can be arbitrarily large.  In particular, if
267bf215546Sopenharmony_ci * 'dist' is greater than N, the stored result will be 0.
268bf215546Sopenharmony_ci *
269bf215546Sopenharmony_ci * From softfloat_shiftLeftM()
270bf215546Sopenharmony_ci */
271bf215546Sopenharmony_cistatic inline void
272bf215546Sopenharmony_ci_mesa_shift_left_m(uint8_t size_words, const uint32_t *a, uint32_t dist, uint32_t *m_out)
273bf215546Sopenharmony_ci{
274bf215546Sopenharmony_ci    uint32_t word_dist;
275bf215546Sopenharmony_ci    uint8_t inner_dist;
276bf215546Sopenharmony_ci    uint8_t i;
277bf215546Sopenharmony_ci
278bf215546Sopenharmony_ci    word_dist = dist >> 5;
279bf215546Sopenharmony_ci    if (word_dist < size_words) {
280bf215546Sopenharmony_ci        a += index_multiword_lo_but(size_words, word_dist);
281bf215546Sopenharmony_ci        inner_dist = dist & 31;
282bf215546Sopenharmony_ci        if (inner_dist) {
283bf215546Sopenharmony_ci            _mesa_short_shift_left_m(size_words - word_dist, a, inner_dist,
284bf215546Sopenharmony_ci                                     m_out + index_multiword_hi_but(size_words, word_dist));
285bf215546Sopenharmony_ci            if (!word_dist)
286bf215546Sopenharmony_ci                return;
287bf215546Sopenharmony_ci        } else {
288bf215546Sopenharmony_ci            uint32_t *dest = m_out + index_word_hi(size_words);
289bf215546Sopenharmony_ci            a += index_word_hi(size_words - word_dist);
290bf215546Sopenharmony_ci            for (i = size_words - word_dist; i; --i) {
291bf215546Sopenharmony_ci                *dest = *a;
292bf215546Sopenharmony_ci                a -= word_incr;
293bf215546Sopenharmony_ci                dest -= word_incr;
294bf215546Sopenharmony_ci            }
295bf215546Sopenharmony_ci        }
296bf215546Sopenharmony_ci        m_out += index_multiword_lo(size_words, word_dist);
297bf215546Sopenharmony_ci    } else {
298bf215546Sopenharmony_ci        word_dist = size_words;
299bf215546Sopenharmony_ci    }
300bf215546Sopenharmony_ci    do {
301bf215546Sopenharmony_ci        *m_out++ = 0;
302bf215546Sopenharmony_ci        --word_dist;
303bf215546Sopenharmony_ci    } while (word_dist);
304bf215546Sopenharmony_ci}
305bf215546Sopenharmony_ci
306bf215546Sopenharmony_ci/**
307bf215546Sopenharmony_ci * \brief Shifts the N-bit unsigned integer pointed to by 'a' right by the number of
308bf215546Sopenharmony_ci * bits given in 'dist', where N = 'size_words' * 32.  The value of 'dist'
309bf215546Sopenharmony_ci * must be in the range 1 to 31.  Any nonzero bits shifted off are lost.  The
310bf215546Sopenharmony_ci * shifted N-bit result is stored at the location pointed to by 'm_out'.  Each
311bf215546Sopenharmony_ci * of 'a' and 'm_out' points to a 'size_words'-long array of 32-bit elements
312bf215546Sopenharmony_ci * that concatenate in the platform's normal endian order to form an N-bit
313bf215546Sopenharmony_ci * integer.
314bf215546Sopenharmony_ci *
315bf215546Sopenharmony_ci * From softfloat_shortShiftRightM()
316bf215546Sopenharmony_ci */
317bf215546Sopenharmony_cistatic inline void
318bf215546Sopenharmony_ci_mesa_short_shift_right_m(uint8_t size_words, const uint32_t *a, uint8_t dist, uint32_t *m_out)
319bf215546Sopenharmony_ci{
320bf215546Sopenharmony_ci    uint8_t neg_dist;
321bf215546Sopenharmony_ci    unsigned index, last_index;
322bf215546Sopenharmony_ci    uint32_t part_word, a_word;
323bf215546Sopenharmony_ci
324bf215546Sopenharmony_ci    neg_dist = -dist;
325bf215546Sopenharmony_ci    index = index_word_lo(size_words);
326bf215546Sopenharmony_ci    last_index = index_word_hi(size_words);
327bf215546Sopenharmony_ci    part_word = a[index] >> dist;
328bf215546Sopenharmony_ci    while (index != last_index) {
329bf215546Sopenharmony_ci        a_word = a[index + word_incr];
330bf215546Sopenharmony_ci        m_out[index] = a_word << (neg_dist & 31) | part_word;
331bf215546Sopenharmony_ci        index += word_incr;
332bf215546Sopenharmony_ci        part_word = a_word >> dist;
333bf215546Sopenharmony_ci    }
334bf215546Sopenharmony_ci    m_out[index] = part_word;
335bf215546Sopenharmony_ci}
336bf215546Sopenharmony_ci
337bf215546Sopenharmony_ci/**
338bf215546Sopenharmony_ci * \brief Shifts the N-bit unsigned integer pointed to by 'a' right by the number of
339bf215546Sopenharmony_ci * bits given in 'dist', where N = 'size_words' * 32.  The value of 'dist'
340bf215546Sopenharmony_ci * must be in the range 1 to 31.  If any nonzero bits are shifted off, they
341bf215546Sopenharmony_ci * are "jammed" into the least-significant bit of the shifted value by setting
342bf215546Sopenharmony_ci * the least-significant bit to 1.  This shifted-and-jammed N-bit result is
343bf215546Sopenharmony_ci * stored at the location pointed to by 'm_out'.  Each of 'a' and 'm_out'
344bf215546Sopenharmony_ci * points to a 'size_words'-long array of 32-bit elements that concatenate in
345bf215546Sopenharmony_ci * the platform's normal endian order to form an N-bit integer.
346bf215546Sopenharmony_ci *
347bf215546Sopenharmony_ci *
348bf215546Sopenharmony_ci * From softfloat_shortShiftRightJamM()
349bf215546Sopenharmony_ci */
350bf215546Sopenharmony_cistatic inline void
351bf215546Sopenharmony_ci_mesa_short_shift_right_jam_m(uint8_t size_words, const uint32_t *a, uint8_t dist, uint32_t *m_out)
352bf215546Sopenharmony_ci{
353bf215546Sopenharmony_ci    uint8_t neg_dist;
354bf215546Sopenharmony_ci    unsigned index, last_index;
355bf215546Sopenharmony_ci    uint64_t part_word, a_word;
356bf215546Sopenharmony_ci
357bf215546Sopenharmony_ci    neg_dist = -dist;
358bf215546Sopenharmony_ci    index = index_word_lo(size_words);
359bf215546Sopenharmony_ci    last_index = index_word_hi(size_words);
360bf215546Sopenharmony_ci    a_word = a[index];
361bf215546Sopenharmony_ci    part_word = a_word >> dist;
362bf215546Sopenharmony_ci    if (part_word << dist != a_word )
363bf215546Sopenharmony_ci        part_word |= 1;
364bf215546Sopenharmony_ci    while (index != last_index) {
365bf215546Sopenharmony_ci        a_word = a[index + word_incr];
366bf215546Sopenharmony_ci        m_out[index] = a_word << (neg_dist & 31) | part_word;
367bf215546Sopenharmony_ci        index += word_incr;
368bf215546Sopenharmony_ci        part_word = a_word >> dist;
369bf215546Sopenharmony_ci    }
370bf215546Sopenharmony_ci    m_out[index] = part_word;
371bf215546Sopenharmony_ci}
372bf215546Sopenharmony_ci
373bf215546Sopenharmony_ci/**
374bf215546Sopenharmony_ci * \brief Shifts the N-bit unsigned integer pointed to by 'a' right by the number of
375bf215546Sopenharmony_ci * bits given in 'dist', where N = 'size_words' * 32.  The value of 'dist'
376bf215546Sopenharmony_ci * must not be zero.  If any nonzero bits are shifted off, they are "jammed"
377bf215546Sopenharmony_ci * into the least-significant bit of the shifted value by setting the
378bf215546Sopenharmony_ci * least-significant bit to 1.  This shifted-and-jammed N-bit result is stored
379bf215546Sopenharmony_ci * at the location pointed to by 'm_out'.  Each of 'a' and 'm_out' points to a
380bf215546Sopenharmony_ci * 'size_words'-long array of 32-bit elements that concatenate in the
381bf215546Sopenharmony_ci * platform's normal endian order to form an N-bit integer.  The value of
382bf215546Sopenharmony_ci * 'dist' can be arbitrarily large.  In particular, if 'dist' is greater than
383bf215546Sopenharmony_ci * N, the stored result will be either 0 or 1, depending on whether the
384bf215546Sopenharmony_ci * original N bits are all zeros.
385bf215546Sopenharmony_ci *
386bf215546Sopenharmony_ci * From softfloat_shiftRightJamM()
387bf215546Sopenharmony_ci */
388bf215546Sopenharmony_cistatic inline void
389bf215546Sopenharmony_ci_mesa_shift_right_jam_m(uint8_t size_words, const uint32_t *a, uint32_t dist, uint32_t *m_out)
390bf215546Sopenharmony_ci{
391bf215546Sopenharmony_ci    uint32_t word_jam, word_dist, *tmp;
392bf215546Sopenharmony_ci    uint8_t i, inner_dist;
393bf215546Sopenharmony_ci
394bf215546Sopenharmony_ci    word_jam = 0;
395bf215546Sopenharmony_ci    word_dist = dist >> 5;
396bf215546Sopenharmony_ci    tmp = NULL;
397bf215546Sopenharmony_ci    if (word_dist) {
398bf215546Sopenharmony_ci        if (size_words < word_dist)
399bf215546Sopenharmony_ci            word_dist = size_words;
400bf215546Sopenharmony_ci        tmp = (uint32_t *) (a + index_multiword_lo(size_words, word_dist));
401bf215546Sopenharmony_ci        i = word_dist;
402bf215546Sopenharmony_ci        do {
403bf215546Sopenharmony_ci            word_jam = *tmp++;
404bf215546Sopenharmony_ci            if (word_jam)
405bf215546Sopenharmony_ci                break;
406bf215546Sopenharmony_ci            --i;
407bf215546Sopenharmony_ci        } while (i);
408bf215546Sopenharmony_ci        tmp = m_out;
409bf215546Sopenharmony_ci    }
410bf215546Sopenharmony_ci    if (word_dist < size_words) {
411bf215546Sopenharmony_ci        a += index_multiword_hi_but(size_words, word_dist);
412bf215546Sopenharmony_ci        inner_dist = dist & 31;
413bf215546Sopenharmony_ci        if (inner_dist) {
414bf215546Sopenharmony_ci            _mesa_short_shift_right_jam_m(size_words - word_dist, a, inner_dist,
415bf215546Sopenharmony_ci                                          m_out + index_multiword_lo_but(size_words, word_dist));
416bf215546Sopenharmony_ci            if (!word_dist) {
417bf215546Sopenharmony_ci                if (word_jam)
418bf215546Sopenharmony_ci                    m_out[index_word_lo(size_words)] |= 1;
419bf215546Sopenharmony_ci                return;
420bf215546Sopenharmony_ci            }
421bf215546Sopenharmony_ci        } else {
422bf215546Sopenharmony_ci            a += index_word_lo(size_words - word_dist);
423bf215546Sopenharmony_ci            tmp = m_out + index_word_lo(size_words);
424bf215546Sopenharmony_ci            for (i = size_words - word_dist; i; --i) {
425bf215546Sopenharmony_ci                *tmp = *a;
426bf215546Sopenharmony_ci                a += word_incr;
427bf215546Sopenharmony_ci                tmp += word_incr;
428bf215546Sopenharmony_ci            }
429bf215546Sopenharmony_ci        }
430bf215546Sopenharmony_ci        tmp = m_out + index_multiword_hi(size_words, word_dist);
431bf215546Sopenharmony_ci    }
432bf215546Sopenharmony_ci    if (tmp) {
433bf215546Sopenharmony_ci       do {
434bf215546Sopenharmony_ci           *tmp++ = 0;
435bf215546Sopenharmony_ci           --word_dist;
436bf215546Sopenharmony_ci       } while (word_dist);
437bf215546Sopenharmony_ci    }
438bf215546Sopenharmony_ci    if (word_jam)
439bf215546Sopenharmony_ci        m_out[index_word_lo(size_words)] |= 1;
440bf215546Sopenharmony_ci}
441bf215546Sopenharmony_ci
442bf215546Sopenharmony_ci/**
443bf215546Sopenharmony_ci * \brief Calculate a + b but rounding to zero.
444bf215546Sopenharmony_ci *
445bf215546Sopenharmony_ci * Notice that this mainly differs from the original Berkeley SoftFloat 3e
446bf215546Sopenharmony_ci * implementation in that we don't really treat NaNs, Zeroes nor the
447bf215546Sopenharmony_ci * signalling flags. Any NaN is good for us and the sign of the Zero is not
448bf215546Sopenharmony_ci * important.
449bf215546Sopenharmony_ci *
450bf215546Sopenharmony_ci * From f64_add()
451bf215546Sopenharmony_ci */
452bf215546Sopenharmony_cidouble
453bf215546Sopenharmony_ci_mesa_double_add_rtz(double a, double b)
454bf215546Sopenharmony_ci{
455bf215546Sopenharmony_ci    const di_type a_di = {a};
456bf215546Sopenharmony_ci    uint64_t a_flt_m = a_di.u & 0x0fffffffffffff;
457bf215546Sopenharmony_ci    uint64_t a_flt_e = (a_di.u >> 52) & 0x7ff;
458bf215546Sopenharmony_ci    uint64_t a_flt_s = (a_di.u >> 63) & 0x1;
459bf215546Sopenharmony_ci    const di_type b_di = {b};
460bf215546Sopenharmony_ci    uint64_t b_flt_m = b_di.u & 0x0fffffffffffff;
461bf215546Sopenharmony_ci    uint64_t b_flt_e = (b_di.u >> 52) & 0x7ff;
462bf215546Sopenharmony_ci    uint64_t b_flt_s = (b_di.u >> 63) & 0x1;
463bf215546Sopenharmony_ci    int64_t s, e, m = 0;
464bf215546Sopenharmony_ci
465bf215546Sopenharmony_ci    s = a_flt_s;
466bf215546Sopenharmony_ci
467bf215546Sopenharmony_ci    const int64_t exp_diff = a_flt_e - b_flt_e;
468bf215546Sopenharmony_ci
469bf215546Sopenharmony_ci    /* Handle special cases */
470bf215546Sopenharmony_ci
471bf215546Sopenharmony_ci    if (a_flt_s != b_flt_s) {
472bf215546Sopenharmony_ci        return _mesa_double_sub_rtz(a, -b);
473bf215546Sopenharmony_ci    } else if ((a_flt_e == 0) && (a_flt_m == 0)) {
474bf215546Sopenharmony_ci        /* 'a' is zero, return 'b' */
475bf215546Sopenharmony_ci        return b;
476bf215546Sopenharmony_ci    } else if ((b_flt_e == 0) && (b_flt_m == 0)) {
477bf215546Sopenharmony_ci        /* 'b' is zero, return 'a' */
478bf215546Sopenharmony_ci        return a;
479bf215546Sopenharmony_ci    } else if (a_flt_e == 0x7ff && a_flt_m != 0) {
480bf215546Sopenharmony_ci        /* 'a' is a NaN, return NaN */
481bf215546Sopenharmony_ci        return a;
482bf215546Sopenharmony_ci    } else if (b_flt_e == 0x7ff && b_flt_m != 0) {
483bf215546Sopenharmony_ci        /* 'b' is a NaN, return NaN */
484bf215546Sopenharmony_ci        return b;
485bf215546Sopenharmony_ci    } else if (a_flt_e == 0x7ff && a_flt_m == 0) {
486bf215546Sopenharmony_ci        /* Inf + x = Inf */
487bf215546Sopenharmony_ci        return a;
488bf215546Sopenharmony_ci    } else if (b_flt_e == 0x7ff && b_flt_m == 0) {
489bf215546Sopenharmony_ci        /* x + Inf = Inf */
490bf215546Sopenharmony_ci        return b;
491bf215546Sopenharmony_ci    } else if (exp_diff == 0 && a_flt_e == 0) {
492bf215546Sopenharmony_ci        di_type result_di;
493bf215546Sopenharmony_ci        result_di.u = a_di.u + b_flt_m;
494bf215546Sopenharmony_ci        return result_di.f;
495bf215546Sopenharmony_ci    } else if (exp_diff == 0) {
496bf215546Sopenharmony_ci        e = a_flt_e;
497bf215546Sopenharmony_ci        m = 0x0020000000000000 + a_flt_m + b_flt_m;
498bf215546Sopenharmony_ci        m <<= 9;
499bf215546Sopenharmony_ci    } else if (exp_diff < 0) {
500bf215546Sopenharmony_ci        a_flt_m <<= 9;
501bf215546Sopenharmony_ci        b_flt_m <<= 9;
502bf215546Sopenharmony_ci        e = b_flt_e;
503bf215546Sopenharmony_ci
504bf215546Sopenharmony_ci        if (a_flt_e != 0)
505bf215546Sopenharmony_ci            a_flt_m += 0x2000000000000000;
506bf215546Sopenharmony_ci        else
507bf215546Sopenharmony_ci            a_flt_m <<= 1;
508bf215546Sopenharmony_ci
509bf215546Sopenharmony_ci        a_flt_m = _mesa_shift_right_jam64(a_flt_m, -exp_diff);
510bf215546Sopenharmony_ci        m = 0x2000000000000000 + a_flt_m + b_flt_m;
511bf215546Sopenharmony_ci        if (m < 0x4000000000000000) {
512bf215546Sopenharmony_ci            --e;
513bf215546Sopenharmony_ci            m <<= 1;
514bf215546Sopenharmony_ci        }
515bf215546Sopenharmony_ci    } else {
516bf215546Sopenharmony_ci        a_flt_m <<= 9;
517bf215546Sopenharmony_ci        b_flt_m <<= 9;
518bf215546Sopenharmony_ci        e = a_flt_e;
519bf215546Sopenharmony_ci
520bf215546Sopenharmony_ci        if (b_flt_e != 0)
521bf215546Sopenharmony_ci            b_flt_m += 0x2000000000000000;
522bf215546Sopenharmony_ci        else
523bf215546Sopenharmony_ci            b_flt_m <<= 1;
524bf215546Sopenharmony_ci
525bf215546Sopenharmony_ci        b_flt_m = _mesa_shift_right_jam64(b_flt_m, exp_diff);
526bf215546Sopenharmony_ci        m = 0x2000000000000000 + a_flt_m + b_flt_m;
527bf215546Sopenharmony_ci        if (m < 0x4000000000000000) {
528bf215546Sopenharmony_ci            --e;
529bf215546Sopenharmony_ci            m <<= 1;
530bf215546Sopenharmony_ci        }
531bf215546Sopenharmony_ci    }
532bf215546Sopenharmony_ci
533bf215546Sopenharmony_ci    return _mesa_roundtozero_f64(s, e, m);
534bf215546Sopenharmony_ci}
535bf215546Sopenharmony_ci
536bf215546Sopenharmony_ci/**
537bf215546Sopenharmony_ci * \brief Returns the number of leading 0 bits before the most-significant 1 bit of
538bf215546Sopenharmony_ci * 'a'.  If 'a' is zero, 64 is returned.
539bf215546Sopenharmony_ci */
540bf215546Sopenharmony_cistatic inline unsigned
541bf215546Sopenharmony_ci_mesa_count_leading_zeros64(uint64_t a)
542bf215546Sopenharmony_ci{
543bf215546Sopenharmony_ci    return 64 - util_last_bit64(a);
544bf215546Sopenharmony_ci}
545bf215546Sopenharmony_ci
546bf215546Sopenharmony_ci/**
547bf215546Sopenharmony_ci * \brief Returns the number of leading 0 bits before the most-significant 1 bit of
548bf215546Sopenharmony_ci * 'a'.  If 'a' is zero, 32 is returned.
549bf215546Sopenharmony_ci */
550bf215546Sopenharmony_cistatic inline unsigned
551bf215546Sopenharmony_ci_mesa_count_leading_zeros32(uint32_t a)
552bf215546Sopenharmony_ci{
553bf215546Sopenharmony_ci    return 32 - util_last_bit(a);
554bf215546Sopenharmony_ci}
555bf215546Sopenharmony_ci
556bf215546Sopenharmony_cistatic inline double
557bf215546Sopenharmony_ci_mesa_norm_round_pack_f64(int64_t s, int64_t e, int64_t m)
558bf215546Sopenharmony_ci{
559bf215546Sopenharmony_ci    int8_t shift_dist;
560bf215546Sopenharmony_ci
561bf215546Sopenharmony_ci    shift_dist = _mesa_count_leading_zeros64(m) - 1;
562bf215546Sopenharmony_ci    e -= shift_dist;
563bf215546Sopenharmony_ci    if ((10 <= shift_dist) && ((unsigned) e < 0x7fd)) {
564bf215546Sopenharmony_ci        di_type result;
565bf215546Sopenharmony_ci        result.u = (s << 63) + ((m ? e : 0) << 52) + (m << (shift_dist - 10));
566bf215546Sopenharmony_ci        return result.f;
567bf215546Sopenharmony_ci    } else {
568bf215546Sopenharmony_ci        return _mesa_roundtozero_f64(s, e, m << shift_dist);
569bf215546Sopenharmony_ci    }
570bf215546Sopenharmony_ci}
571bf215546Sopenharmony_ci
572bf215546Sopenharmony_ci/**
573bf215546Sopenharmony_ci * \brief Replaces the N-bit unsigned integer pointed to by 'm_out' by the
574bf215546Sopenharmony_ci * 2s-complement of itself, where N = 'size_words' * 32.  Argument 'm_out'
575bf215546Sopenharmony_ci * points to a 'size_words'-long array of 32-bit elements that concatenate in
576bf215546Sopenharmony_ci * the platform's normal endian order to form an N-bit integer.
577bf215546Sopenharmony_ci *
578bf215546Sopenharmony_ci * From softfloat_negXM()
579bf215546Sopenharmony_ci */
580bf215546Sopenharmony_cistatic inline void
581bf215546Sopenharmony_ci_mesa_neg_x_m(uint8_t size_words, uint32_t *m_out)
582bf215546Sopenharmony_ci{
583bf215546Sopenharmony_ci    unsigned index, last_index;
584bf215546Sopenharmony_ci    uint8_t carry;
585bf215546Sopenharmony_ci    uint32_t word;
586bf215546Sopenharmony_ci
587bf215546Sopenharmony_ci    index = index_word_lo(size_words);
588bf215546Sopenharmony_ci    last_index = index_word_hi(size_words);
589bf215546Sopenharmony_ci    carry = 1;
590bf215546Sopenharmony_ci    for (;;) {
591bf215546Sopenharmony_ci        word = ~m_out[index] + carry;
592bf215546Sopenharmony_ci        m_out[index] = word;
593bf215546Sopenharmony_ci        if (index == last_index)
594bf215546Sopenharmony_ci            break;
595bf215546Sopenharmony_ci        index += word_incr;
596bf215546Sopenharmony_ci        if (word)
597bf215546Sopenharmony_ci            carry = 0;
598bf215546Sopenharmony_ci    }
599bf215546Sopenharmony_ci}
600bf215546Sopenharmony_ci
601bf215546Sopenharmony_ci/**
602bf215546Sopenharmony_ci * \brief Adds the two N-bit integers pointed to by 'a' and 'b', where N =
603bf215546Sopenharmony_ci * 'size_words' * 32.  The addition is modulo 2^N, so any carry out is
604bf215546Sopenharmony_ci * lost. The N-bit sum is stored at the location pointed to by 'm_out'.  Each
605bf215546Sopenharmony_ci * of 'a', 'b', and 'm_out' points to a 'size_words'-long array of 32-bit
606bf215546Sopenharmony_ci * elements that concatenate in the platform's normal endian order to form an
607bf215546Sopenharmony_ci * N-bit integer.
608bf215546Sopenharmony_ci *
609bf215546Sopenharmony_ci * From softfloat_addM()
610bf215546Sopenharmony_ci */
611bf215546Sopenharmony_cistatic inline void
612bf215546Sopenharmony_ci_mesa_add_m(uint8_t size_words, const uint32_t *a, const uint32_t *b, uint32_t *m_out)
613bf215546Sopenharmony_ci{
614bf215546Sopenharmony_ci    unsigned index, last_index;
615bf215546Sopenharmony_ci    uint8_t carry;
616bf215546Sopenharmony_ci    uint32_t a_word, word;
617bf215546Sopenharmony_ci
618bf215546Sopenharmony_ci    index = index_word_lo(size_words);
619bf215546Sopenharmony_ci    last_index = index_word_hi(size_words);
620bf215546Sopenharmony_ci    carry = 0;
621bf215546Sopenharmony_ci    for (;;) {
622bf215546Sopenharmony_ci        a_word = a[index];
623bf215546Sopenharmony_ci        word = a_word + b[index] + carry;
624bf215546Sopenharmony_ci        m_out[index] = word;
625bf215546Sopenharmony_ci        if (index == last_index)
626bf215546Sopenharmony_ci            break;
627bf215546Sopenharmony_ci        if (word != a_word)
628bf215546Sopenharmony_ci            carry = (word < a_word);
629bf215546Sopenharmony_ci        index += word_incr;
630bf215546Sopenharmony_ci    }
631bf215546Sopenharmony_ci}
632bf215546Sopenharmony_ci
633bf215546Sopenharmony_ci/**
634bf215546Sopenharmony_ci * \brief Subtracts the two N-bit integers pointed to by 'a' and 'b', where N =
635bf215546Sopenharmony_ci * 'size_words' * 32.  The subtraction is modulo 2^N, so any borrow out (carry
636bf215546Sopenharmony_ci * out) is lost.  The N-bit difference is stored at the location pointed to by
637bf215546Sopenharmony_ci * 'm_out'.  Each of 'a', 'b', and 'm_out' points to a 'size_words'-long array
638bf215546Sopenharmony_ci * of 32-bit elements that concatenate in the platform's normal endian order
639bf215546Sopenharmony_ci * to form an N-bit integer.
640bf215546Sopenharmony_ci *
641bf215546Sopenharmony_ci * From softfloat_subM()
642bf215546Sopenharmony_ci */
643bf215546Sopenharmony_cistatic inline void
644bf215546Sopenharmony_ci_mesa_sub_m(uint8_t size_words, const uint32_t *a, const uint32_t *b, uint32_t *m_out)
645bf215546Sopenharmony_ci{
646bf215546Sopenharmony_ci    unsigned index, last_index;
647bf215546Sopenharmony_ci    uint8_t borrow;
648bf215546Sopenharmony_ci    uint32_t a_word, b_word;
649bf215546Sopenharmony_ci
650bf215546Sopenharmony_ci    index = index_word_lo(size_words);
651bf215546Sopenharmony_ci    last_index = index_word_hi(size_words);
652bf215546Sopenharmony_ci    borrow = 0;
653bf215546Sopenharmony_ci    for (;;) {
654bf215546Sopenharmony_ci        a_word = a[index];
655bf215546Sopenharmony_ci        b_word = b[index];
656bf215546Sopenharmony_ci        m_out[index] = a_word - b_word - borrow;
657bf215546Sopenharmony_ci        if (index == last_index)
658bf215546Sopenharmony_ci            break;
659bf215546Sopenharmony_ci        borrow = borrow ? (a_word <= b_word) : (a_word < b_word);
660bf215546Sopenharmony_ci        index += word_incr;
661bf215546Sopenharmony_ci    }
662bf215546Sopenharmony_ci}
663bf215546Sopenharmony_ci
664bf215546Sopenharmony_ci/* Calculate a - b but rounding to zero.
665bf215546Sopenharmony_ci *
666bf215546Sopenharmony_ci * Notice that this mainly differs from the original Berkeley SoftFloat 3e
667bf215546Sopenharmony_ci * implementation in that we don't really treat NaNs, Zeroes nor the
668bf215546Sopenharmony_ci * signalling flags. Any NaN is good for us and the sign of the Zero is not
669bf215546Sopenharmony_ci * important.
670bf215546Sopenharmony_ci *
671bf215546Sopenharmony_ci * From f64_sub()
672bf215546Sopenharmony_ci */
673bf215546Sopenharmony_cidouble
674bf215546Sopenharmony_ci_mesa_double_sub_rtz(double a, double b)
675bf215546Sopenharmony_ci{
676bf215546Sopenharmony_ci    const di_type a_di = {a};
677bf215546Sopenharmony_ci    uint64_t a_flt_m = a_di.u & 0x0fffffffffffff;
678bf215546Sopenharmony_ci    uint64_t a_flt_e = (a_di.u >> 52) & 0x7ff;
679bf215546Sopenharmony_ci    uint64_t a_flt_s = (a_di.u >> 63) & 0x1;
680bf215546Sopenharmony_ci    const di_type b_di = {b};
681bf215546Sopenharmony_ci    uint64_t b_flt_m = b_di.u & 0x0fffffffffffff;
682bf215546Sopenharmony_ci    uint64_t b_flt_e = (b_di.u >> 52) & 0x7ff;
683bf215546Sopenharmony_ci    uint64_t b_flt_s = (b_di.u >> 63) & 0x1;
684bf215546Sopenharmony_ci    int64_t s, e, m = 0;
685bf215546Sopenharmony_ci    int64_t m_diff = 0;
686bf215546Sopenharmony_ci    unsigned shift_dist = 0;
687bf215546Sopenharmony_ci
688bf215546Sopenharmony_ci    s = a_flt_s;
689bf215546Sopenharmony_ci
690bf215546Sopenharmony_ci    const int64_t exp_diff = a_flt_e - b_flt_e;
691bf215546Sopenharmony_ci
692bf215546Sopenharmony_ci    /* Handle special cases */
693bf215546Sopenharmony_ci
694bf215546Sopenharmony_ci    if (a_flt_s != b_flt_s) {
695bf215546Sopenharmony_ci        return _mesa_double_add_rtz(a, -b);
696bf215546Sopenharmony_ci    } else if ((a_flt_e == 0) && (a_flt_m == 0)) {
697bf215546Sopenharmony_ci        /* 'a' is zero, return '-b' */
698bf215546Sopenharmony_ci        return -b;
699bf215546Sopenharmony_ci    } else if ((b_flt_e == 0) && (b_flt_m == 0)) {
700bf215546Sopenharmony_ci        /* 'b' is zero, return 'a' */
701bf215546Sopenharmony_ci        return a;
702bf215546Sopenharmony_ci    } else if (a_flt_e == 0x7ff && a_flt_m != 0) {
703bf215546Sopenharmony_ci        /* 'a' is a NaN, return NaN */
704bf215546Sopenharmony_ci        return a;
705bf215546Sopenharmony_ci    } else if (b_flt_e == 0x7ff && b_flt_m != 0) {
706bf215546Sopenharmony_ci        /* 'b' is a NaN, return NaN */
707bf215546Sopenharmony_ci        return b;
708bf215546Sopenharmony_ci    } else if (a_flt_e == 0x7ff && a_flt_m == 0) {
709bf215546Sopenharmony_ci        if (b_flt_e == 0x7ff && b_flt_m == 0) {
710bf215546Sopenharmony_ci            /* Inf - Inf =  NaN */
711bf215546Sopenharmony_ci            di_type result;
712bf215546Sopenharmony_ci            e = 0x7ff;
713bf215546Sopenharmony_ci            result.u = (s << 63) + (e << 52) + 0x1;
714bf215546Sopenharmony_ci            return result.f;
715bf215546Sopenharmony_ci        }
716bf215546Sopenharmony_ci        /* Inf - x = Inf */
717bf215546Sopenharmony_ci        return a;
718bf215546Sopenharmony_ci    } else if (b_flt_e == 0x7ff && b_flt_m == 0) {
719bf215546Sopenharmony_ci        /* x - Inf = -Inf */
720bf215546Sopenharmony_ci        return -b;
721bf215546Sopenharmony_ci    } else if (exp_diff == 0) {
722bf215546Sopenharmony_ci        m_diff = a_flt_m - b_flt_m;
723bf215546Sopenharmony_ci
724bf215546Sopenharmony_ci        if (m_diff == 0)
725bf215546Sopenharmony_ci            return 0;
726bf215546Sopenharmony_ci        if (a_flt_e)
727bf215546Sopenharmony_ci            --a_flt_e;
728bf215546Sopenharmony_ci        if (m_diff < 0) {
729bf215546Sopenharmony_ci            s = !s;
730bf215546Sopenharmony_ci            m_diff = -m_diff;
731bf215546Sopenharmony_ci        }
732bf215546Sopenharmony_ci
733bf215546Sopenharmony_ci        shift_dist = _mesa_count_leading_zeros64(m_diff) - 11;
734bf215546Sopenharmony_ci        e = a_flt_e - shift_dist;
735bf215546Sopenharmony_ci        if (e < 0) {
736bf215546Sopenharmony_ci            shift_dist = a_flt_e;
737bf215546Sopenharmony_ci            e = 0;
738bf215546Sopenharmony_ci        }
739bf215546Sopenharmony_ci
740bf215546Sopenharmony_ci        di_type result;
741bf215546Sopenharmony_ci        result.u = (s << 63) + (e << 52) + (m_diff << shift_dist);
742bf215546Sopenharmony_ci        return result.f;
743bf215546Sopenharmony_ci    } else if (exp_diff < 0) {
744bf215546Sopenharmony_ci        a_flt_m <<= 10;
745bf215546Sopenharmony_ci        b_flt_m <<= 10;
746bf215546Sopenharmony_ci        s = !s;
747bf215546Sopenharmony_ci
748bf215546Sopenharmony_ci        a_flt_m += (a_flt_e) ? 0x4000000000000000 : a_flt_m;
749bf215546Sopenharmony_ci        a_flt_m = _mesa_shift_right_jam64(a_flt_m, -exp_diff);
750bf215546Sopenharmony_ci        b_flt_m |= 0x4000000000000000;
751bf215546Sopenharmony_ci        e = b_flt_e;
752bf215546Sopenharmony_ci        m = b_flt_m - a_flt_m;
753bf215546Sopenharmony_ci    } else {
754bf215546Sopenharmony_ci        a_flt_m <<= 10;
755bf215546Sopenharmony_ci        b_flt_m <<= 10;
756bf215546Sopenharmony_ci
757bf215546Sopenharmony_ci        b_flt_m += (b_flt_e) ? 0x4000000000000000 : b_flt_m;
758bf215546Sopenharmony_ci        b_flt_m = _mesa_shift_right_jam64(b_flt_m, exp_diff);
759bf215546Sopenharmony_ci        a_flt_m |= 0x4000000000000000;
760bf215546Sopenharmony_ci        e = a_flt_e;
761bf215546Sopenharmony_ci        m = a_flt_m - b_flt_m;
762bf215546Sopenharmony_ci    }
763bf215546Sopenharmony_ci
764bf215546Sopenharmony_ci    return _mesa_norm_round_pack_f64(s, e - 1, m);
765bf215546Sopenharmony_ci}
766bf215546Sopenharmony_ci
767bf215546Sopenharmony_cistatic inline void
768bf215546Sopenharmony_ci_mesa_norm_subnormal_mantissa_f64(uint64_t m, uint64_t *exp, uint64_t *m_out)
769bf215546Sopenharmony_ci{
770bf215546Sopenharmony_ci    int shift_dist;
771bf215546Sopenharmony_ci
772bf215546Sopenharmony_ci    shift_dist = _mesa_count_leading_zeros64(m) - 11;
773bf215546Sopenharmony_ci    *exp = 1 - shift_dist;
774bf215546Sopenharmony_ci    *m_out = m << shift_dist;
775bf215546Sopenharmony_ci}
776bf215546Sopenharmony_ci
777bf215546Sopenharmony_cistatic inline void
778bf215546Sopenharmony_ci_mesa_norm_subnormal_mantissa_f32(uint32_t m, uint32_t *exp, uint32_t *m_out)
779bf215546Sopenharmony_ci{
780bf215546Sopenharmony_ci    int shift_dist;
781bf215546Sopenharmony_ci
782bf215546Sopenharmony_ci    shift_dist = _mesa_count_leading_zeros32(m) - 8;
783bf215546Sopenharmony_ci    *exp = 1 - shift_dist;
784bf215546Sopenharmony_ci    *m_out = m << shift_dist;
785bf215546Sopenharmony_ci}
786bf215546Sopenharmony_ci
787bf215546Sopenharmony_ci/**
788bf215546Sopenharmony_ci * \brief Multiplies 'a' and 'b' and stores the 128-bit product at the location
789bf215546Sopenharmony_ci * pointed to by 'zPtr'.  Argument 'zPtr' points to an array of four 32-bit
790bf215546Sopenharmony_ci * elements that concatenate in the platform's normal endian order to form a
791bf215546Sopenharmony_ci * 128-bit integer.
792bf215546Sopenharmony_ci *
793bf215546Sopenharmony_ci * From softfloat_mul64To128M()
794bf215546Sopenharmony_ci */
795bf215546Sopenharmony_cistatic inline void
796bf215546Sopenharmony_ci_mesa_softfloat_mul_f64_to_f128_m(uint64_t a, uint64_t b, uint32_t *m_out)
797bf215546Sopenharmony_ci{
798bf215546Sopenharmony_ci    uint32_t a32, a0, b32, b0;
799bf215546Sopenharmony_ci    uint64_t z0, mid1, z64, mid;
800bf215546Sopenharmony_ci
801bf215546Sopenharmony_ci    a32 = a >> 32;
802bf215546Sopenharmony_ci    a0 = a;
803bf215546Sopenharmony_ci    b32 = b >> 32;
804bf215546Sopenharmony_ci    b0 = b;
805bf215546Sopenharmony_ci    z0 = (uint64_t) a0 * b0;
806bf215546Sopenharmony_ci    mid1 = (uint64_t) a32 * b0;
807bf215546Sopenharmony_ci    mid = mid1 + (uint64_t) a0 * b32;
808bf215546Sopenharmony_ci    z64 = (uint64_t) a32 * b32;
809bf215546Sopenharmony_ci    z64 += (uint64_t) (mid < mid1) << 32 | mid >> 32;
810bf215546Sopenharmony_ci    mid <<= 32;
811bf215546Sopenharmony_ci    z0 += mid;
812bf215546Sopenharmony_ci    m_out[index_word(4, 1)] = z0 >> 32;
813bf215546Sopenharmony_ci    m_out[index_word(4, 0)] = z0;
814bf215546Sopenharmony_ci    z64 += (z0 < mid);
815bf215546Sopenharmony_ci    m_out[index_word(4, 3)] = z64 >> 32;
816bf215546Sopenharmony_ci    m_out[index_word(4, 2)] = z64;
817bf215546Sopenharmony_ci}
818bf215546Sopenharmony_ci
819bf215546Sopenharmony_ci/* Calculate a * b but rounding to zero.
820bf215546Sopenharmony_ci *
821bf215546Sopenharmony_ci * Notice that this mainly differs from the original Berkeley SoftFloat 3e
822bf215546Sopenharmony_ci * implementation in that we don't really treat NaNs, Zeroes nor the
823bf215546Sopenharmony_ci * signalling flags. Any NaN is good for us and the sign of the Zero is not
824bf215546Sopenharmony_ci * important.
825bf215546Sopenharmony_ci *
826bf215546Sopenharmony_ci * From f64_mul()
827bf215546Sopenharmony_ci */
828bf215546Sopenharmony_cidouble
829bf215546Sopenharmony_ci_mesa_double_mul_rtz(double a, double b)
830bf215546Sopenharmony_ci{
831bf215546Sopenharmony_ci    const di_type a_di = {a};
832bf215546Sopenharmony_ci    uint64_t a_flt_m = a_di.u & 0x0fffffffffffff;
833bf215546Sopenharmony_ci    uint64_t a_flt_e = (a_di.u >> 52) & 0x7ff;
834bf215546Sopenharmony_ci    uint64_t a_flt_s = (a_di.u >> 63) & 0x1;
835bf215546Sopenharmony_ci    const di_type b_di = {b};
836bf215546Sopenharmony_ci    uint64_t b_flt_m = b_di.u & 0x0fffffffffffff;
837bf215546Sopenharmony_ci    uint64_t b_flt_e = (b_di.u >> 52) & 0x7ff;
838bf215546Sopenharmony_ci    uint64_t b_flt_s = (b_di.u >> 63) & 0x1;
839bf215546Sopenharmony_ci    int64_t s, e, m = 0;
840bf215546Sopenharmony_ci
841bf215546Sopenharmony_ci    s = a_flt_s ^ b_flt_s;
842bf215546Sopenharmony_ci
843bf215546Sopenharmony_ci    if (a_flt_e == 0x7ff) {
844bf215546Sopenharmony_ci        if (a_flt_m != 0) {
845bf215546Sopenharmony_ci            /* 'a' is a NaN, return NaN */
846bf215546Sopenharmony_ci            return a;
847bf215546Sopenharmony_ci        } else if (b_flt_e == 0x7ff && b_flt_m != 0) {
848bf215546Sopenharmony_ci            /* 'b' is a NaN, return NaN */
849bf215546Sopenharmony_ci            return b;
850bf215546Sopenharmony_ci        }
851bf215546Sopenharmony_ci
852bf215546Sopenharmony_ci        if (!(b_flt_e | b_flt_m)) {
853bf215546Sopenharmony_ci            /* Inf * 0 = NaN */
854bf215546Sopenharmony_ci            di_type result;
855bf215546Sopenharmony_ci            e = 0x7ff;
856bf215546Sopenharmony_ci            result.u = (s << 63) + (e << 52) + 0x1;
857bf215546Sopenharmony_ci            return result.f;
858bf215546Sopenharmony_ci        }
859bf215546Sopenharmony_ci        /* Inf * x = Inf */
860bf215546Sopenharmony_ci        di_type result;
861bf215546Sopenharmony_ci        e = 0x7ff;
862bf215546Sopenharmony_ci        result.u = (s << 63) + (e << 52) + 0;
863bf215546Sopenharmony_ci        return result.f;
864bf215546Sopenharmony_ci    }
865bf215546Sopenharmony_ci
866bf215546Sopenharmony_ci    if (b_flt_e == 0x7ff) {
867bf215546Sopenharmony_ci        if (b_flt_m != 0) {
868bf215546Sopenharmony_ci            /* 'b' is a NaN, return NaN */
869bf215546Sopenharmony_ci            return b;
870bf215546Sopenharmony_ci        }
871bf215546Sopenharmony_ci        if (!(a_flt_e | a_flt_m)) {
872bf215546Sopenharmony_ci            /* 0 * Inf = NaN */
873bf215546Sopenharmony_ci            di_type result;
874bf215546Sopenharmony_ci            e = 0x7ff;
875bf215546Sopenharmony_ci            result.u = (s << 63) + (e << 52) + 0x1;
876bf215546Sopenharmony_ci            return result.f;
877bf215546Sopenharmony_ci        }
878bf215546Sopenharmony_ci        /* x * Inf = Inf */
879bf215546Sopenharmony_ci        di_type result;
880bf215546Sopenharmony_ci        e = 0x7ff;
881bf215546Sopenharmony_ci        result.u = (s << 63) + (e << 52) + 0;
882bf215546Sopenharmony_ci        return result.f;
883bf215546Sopenharmony_ci    }
884bf215546Sopenharmony_ci
885bf215546Sopenharmony_ci    if (a_flt_e == 0) {
886bf215546Sopenharmony_ci        if (a_flt_m == 0) {
887bf215546Sopenharmony_ci            /* 'a' is zero. Return zero */
888bf215546Sopenharmony_ci            di_type result;
889bf215546Sopenharmony_ci            result.u = (s << 63) + 0;
890bf215546Sopenharmony_ci            return result.f;
891bf215546Sopenharmony_ci        }
892bf215546Sopenharmony_ci        _mesa_norm_subnormal_mantissa_f64(a_flt_m , &a_flt_e, &a_flt_m);
893bf215546Sopenharmony_ci    }
894bf215546Sopenharmony_ci    if (b_flt_e == 0) {
895bf215546Sopenharmony_ci        if (b_flt_m == 0) {
896bf215546Sopenharmony_ci            /* 'b' is zero. Return zero */
897bf215546Sopenharmony_ci            di_type result;
898bf215546Sopenharmony_ci            result.u = (s << 63) + 0;
899bf215546Sopenharmony_ci            return result.f;
900bf215546Sopenharmony_ci        }
901bf215546Sopenharmony_ci        _mesa_norm_subnormal_mantissa_f64(b_flt_m , &b_flt_e, &b_flt_m);
902bf215546Sopenharmony_ci    }
903bf215546Sopenharmony_ci
904bf215546Sopenharmony_ci    e = a_flt_e + b_flt_e - 0x3ff;
905bf215546Sopenharmony_ci    a_flt_m = (a_flt_m | 0x0010000000000000) << 10;
906bf215546Sopenharmony_ci    b_flt_m = (b_flt_m | 0x0010000000000000) << 11;
907bf215546Sopenharmony_ci
908bf215546Sopenharmony_ci    uint32_t m_128[4];
909bf215546Sopenharmony_ci    _mesa_softfloat_mul_f64_to_f128_m(a_flt_m, b_flt_m, m_128);
910bf215546Sopenharmony_ci
911bf215546Sopenharmony_ci    m = (uint64_t) m_128[index_word(4, 3)] << 32 | m_128[index_word(4, 2)];
912bf215546Sopenharmony_ci    if (m_128[index_word(4, 1)] || m_128[index_word(4, 0)])
913bf215546Sopenharmony_ci        m |= 1;
914bf215546Sopenharmony_ci
915bf215546Sopenharmony_ci    if (m < 0x4000000000000000) {
916bf215546Sopenharmony_ci        --e;
917bf215546Sopenharmony_ci        m <<= 1;
918bf215546Sopenharmony_ci    }
919bf215546Sopenharmony_ci
920bf215546Sopenharmony_ci    return _mesa_roundtozero_f64(s, e, m);
921bf215546Sopenharmony_ci}
922bf215546Sopenharmony_ci
923bf215546Sopenharmony_ci
924bf215546Sopenharmony_ci/**
925bf215546Sopenharmony_ci * \brief Calculate a * b + c but rounding to zero.
926bf215546Sopenharmony_ci *
927bf215546Sopenharmony_ci * Notice that this mainly differs from the original Berkeley SoftFloat 3e
928bf215546Sopenharmony_ci * implementation in that we don't really treat NaNs, Zeroes nor the
929bf215546Sopenharmony_ci * signalling flags. Any NaN is good for us and the sign of the Zero is not
930bf215546Sopenharmony_ci * important.
931bf215546Sopenharmony_ci *
932bf215546Sopenharmony_ci * From f64_mulAdd()
933bf215546Sopenharmony_ci */
934bf215546Sopenharmony_cidouble
935bf215546Sopenharmony_ci_mesa_double_fma_rtz(double a, double b, double c)
936bf215546Sopenharmony_ci{
937bf215546Sopenharmony_ci    const di_type a_di = {a};
938bf215546Sopenharmony_ci    uint64_t a_flt_m = a_di.u & 0x0fffffffffffff;
939bf215546Sopenharmony_ci    uint64_t a_flt_e = (a_di.u >> 52) & 0x7ff;
940bf215546Sopenharmony_ci    uint64_t a_flt_s = (a_di.u >> 63) & 0x1;
941bf215546Sopenharmony_ci    const di_type b_di = {b};
942bf215546Sopenharmony_ci    uint64_t b_flt_m = b_di.u & 0x0fffffffffffff;
943bf215546Sopenharmony_ci    uint64_t b_flt_e = (b_di.u >> 52) & 0x7ff;
944bf215546Sopenharmony_ci    uint64_t b_flt_s = (b_di.u >> 63) & 0x1;
945bf215546Sopenharmony_ci    const di_type c_di = {c};
946bf215546Sopenharmony_ci    uint64_t c_flt_m = c_di.u & 0x0fffffffffffff;
947bf215546Sopenharmony_ci    uint64_t c_flt_e = (c_di.u >> 52) & 0x7ff;
948bf215546Sopenharmony_ci    uint64_t c_flt_s = (c_di.u >> 63) & 0x1;
949bf215546Sopenharmony_ci    int64_t s, e, m = 0;
950bf215546Sopenharmony_ci
951bf215546Sopenharmony_ci    c_flt_s ^= 0;
952bf215546Sopenharmony_ci    s = a_flt_s ^ b_flt_s ^ 0;
953bf215546Sopenharmony_ci
954bf215546Sopenharmony_ci    if (a_flt_e == 0x7ff) {
955bf215546Sopenharmony_ci        if (a_flt_m != 0) {
956bf215546Sopenharmony_ci            /* 'a' is a NaN, return NaN */
957bf215546Sopenharmony_ci            return a;
958bf215546Sopenharmony_ci        } else if (b_flt_e == 0x7ff && b_flt_m != 0) {
959bf215546Sopenharmony_ci            /* 'b' is a NaN, return NaN */
960bf215546Sopenharmony_ci            return b;
961bf215546Sopenharmony_ci        } else if (c_flt_e == 0x7ff && c_flt_m != 0) {
962bf215546Sopenharmony_ci            /* 'c' is a NaN, return NaN */
963bf215546Sopenharmony_ci            return c;
964bf215546Sopenharmony_ci        }
965bf215546Sopenharmony_ci
966bf215546Sopenharmony_ci        if (!(b_flt_e | b_flt_m)) {
967bf215546Sopenharmony_ci            /* Inf * 0 + y = NaN */
968bf215546Sopenharmony_ci            di_type result;
969bf215546Sopenharmony_ci            e = 0x7ff;
970bf215546Sopenharmony_ci            result.u = (s << 63) + (e << 52) + 0x1;
971bf215546Sopenharmony_ci            return result.f;
972bf215546Sopenharmony_ci        }
973bf215546Sopenharmony_ci
974bf215546Sopenharmony_ci        if ((c_flt_e == 0x7ff && c_flt_m == 0) && (s != c_flt_s)) {
975bf215546Sopenharmony_ci            /* Inf * x - Inf = NaN */
976bf215546Sopenharmony_ci            di_type result;
977bf215546Sopenharmony_ci            e = 0x7ff;
978bf215546Sopenharmony_ci            result.u = (s << 63) + (e << 52) + 0x1;
979bf215546Sopenharmony_ci            return result.f;
980bf215546Sopenharmony_ci        }
981bf215546Sopenharmony_ci
982bf215546Sopenharmony_ci        /* Inf * x + y = Inf */
983bf215546Sopenharmony_ci        di_type result;
984bf215546Sopenharmony_ci        e = 0x7ff;
985bf215546Sopenharmony_ci        result.u = (s << 63) + (e << 52) + 0;
986bf215546Sopenharmony_ci        return result.f;
987bf215546Sopenharmony_ci    }
988bf215546Sopenharmony_ci
989bf215546Sopenharmony_ci    if (b_flt_e == 0x7ff) {
990bf215546Sopenharmony_ci        if (b_flt_m != 0) {
991bf215546Sopenharmony_ci            /* 'b' is a NaN, return NaN */
992bf215546Sopenharmony_ci            return b;
993bf215546Sopenharmony_ci        } else if (c_flt_e == 0x7ff && c_flt_m != 0) {
994bf215546Sopenharmony_ci            /* 'c' is a NaN, return NaN */
995bf215546Sopenharmony_ci            return c;
996bf215546Sopenharmony_ci        }
997bf215546Sopenharmony_ci
998bf215546Sopenharmony_ci        if (!(a_flt_e | a_flt_m)) {
999bf215546Sopenharmony_ci            /* 0 * Inf + y = NaN */
1000bf215546Sopenharmony_ci            di_type result;
1001bf215546Sopenharmony_ci            e = 0x7ff;
1002bf215546Sopenharmony_ci            result.u = (s << 63) + (e << 52) + 0x1;
1003bf215546Sopenharmony_ci            return result.f;
1004bf215546Sopenharmony_ci        }
1005bf215546Sopenharmony_ci
1006bf215546Sopenharmony_ci        if ((c_flt_e == 0x7ff && c_flt_m == 0) && (s != c_flt_s)) {
1007bf215546Sopenharmony_ci            /* x * Inf - Inf = NaN */
1008bf215546Sopenharmony_ci            di_type result;
1009bf215546Sopenharmony_ci            e = 0x7ff;
1010bf215546Sopenharmony_ci            result.u = (s << 63) + (e << 52) + 0x1;
1011bf215546Sopenharmony_ci            return result.f;
1012bf215546Sopenharmony_ci        }
1013bf215546Sopenharmony_ci
1014bf215546Sopenharmony_ci        /* x * Inf + y = Inf */
1015bf215546Sopenharmony_ci        di_type result;
1016bf215546Sopenharmony_ci        e = 0x7ff;
1017bf215546Sopenharmony_ci        result.u = (s << 63) + (e << 52) + 0;
1018bf215546Sopenharmony_ci        return result.f;
1019bf215546Sopenharmony_ci    }
1020bf215546Sopenharmony_ci
1021bf215546Sopenharmony_ci    if (c_flt_e == 0x7ff) {
1022bf215546Sopenharmony_ci        if (c_flt_m != 0) {
1023bf215546Sopenharmony_ci            /* 'c' is a NaN, return NaN */
1024bf215546Sopenharmony_ci            return c;
1025bf215546Sopenharmony_ci        }
1026bf215546Sopenharmony_ci
1027bf215546Sopenharmony_ci        /* x * y + Inf = Inf */
1028bf215546Sopenharmony_ci        return c;
1029bf215546Sopenharmony_ci    }
1030bf215546Sopenharmony_ci
1031bf215546Sopenharmony_ci    if (a_flt_e == 0) {
1032bf215546Sopenharmony_ci        if (a_flt_m == 0) {
1033bf215546Sopenharmony_ci            /* 'a' is zero, return 'c' */
1034bf215546Sopenharmony_ci            return c;
1035bf215546Sopenharmony_ci        }
1036bf215546Sopenharmony_ci        _mesa_norm_subnormal_mantissa_f64(a_flt_m , &a_flt_e, &a_flt_m);
1037bf215546Sopenharmony_ci    }
1038bf215546Sopenharmony_ci
1039bf215546Sopenharmony_ci    if (b_flt_e == 0) {
1040bf215546Sopenharmony_ci        if (b_flt_m == 0) {
1041bf215546Sopenharmony_ci            /* 'b' is zero, return 'c' */
1042bf215546Sopenharmony_ci            return c;
1043bf215546Sopenharmony_ci        }
1044bf215546Sopenharmony_ci        _mesa_norm_subnormal_mantissa_f64(b_flt_m , &b_flt_e, &b_flt_m);
1045bf215546Sopenharmony_ci    }
1046bf215546Sopenharmony_ci
1047bf215546Sopenharmony_ci    e = a_flt_e + b_flt_e - 0x3fe;
1048bf215546Sopenharmony_ci    a_flt_m = (a_flt_m | 0x0010000000000000) << 10;
1049bf215546Sopenharmony_ci    b_flt_m = (b_flt_m | 0x0010000000000000) << 11;
1050bf215546Sopenharmony_ci
1051bf215546Sopenharmony_ci    uint32_t m_128[4];
1052bf215546Sopenharmony_ci    _mesa_softfloat_mul_f64_to_f128_m(a_flt_m, b_flt_m, m_128);
1053bf215546Sopenharmony_ci
1054bf215546Sopenharmony_ci    m = (uint64_t) m_128[index_word(4, 3)] << 32 | m_128[index_word(4, 2)];
1055bf215546Sopenharmony_ci
1056bf215546Sopenharmony_ci    int64_t shift_dist = 0;
1057bf215546Sopenharmony_ci    if (!(m & 0x4000000000000000)) {
1058bf215546Sopenharmony_ci        --e;
1059bf215546Sopenharmony_ci        shift_dist = -1;
1060bf215546Sopenharmony_ci    }
1061bf215546Sopenharmony_ci
1062bf215546Sopenharmony_ci    if (c_flt_e == 0) {
1063bf215546Sopenharmony_ci        if (c_flt_m == 0) {
1064bf215546Sopenharmony_ci            /* 'c' is zero, return 'a * b' */
1065bf215546Sopenharmony_ci            if (shift_dist)
1066bf215546Sopenharmony_ci                m <<= 1;
1067bf215546Sopenharmony_ci
1068bf215546Sopenharmony_ci            if (m_128[index_word(4, 1)] || m_128[index_word(4, 0)])
1069bf215546Sopenharmony_ci                m |= 1;
1070bf215546Sopenharmony_ci            return _mesa_roundtozero_f64(s, e - 1, m);
1071bf215546Sopenharmony_ci        }
1072bf215546Sopenharmony_ci        _mesa_norm_subnormal_mantissa_f64(c_flt_m , &c_flt_e, &c_flt_m);
1073bf215546Sopenharmony_ci    }
1074bf215546Sopenharmony_ci    c_flt_m = (c_flt_m | 0x0010000000000000) << 10;
1075bf215546Sopenharmony_ci
1076bf215546Sopenharmony_ci    uint32_t c_flt_m_128[4];
1077bf215546Sopenharmony_ci    int64_t exp_diff = e - c_flt_e;
1078bf215546Sopenharmony_ci    if (exp_diff < 0) {
1079bf215546Sopenharmony_ci        e = c_flt_e;
1080bf215546Sopenharmony_ci        if ((s == c_flt_s) || (exp_diff < -1)) {
1081bf215546Sopenharmony_ci            shift_dist -= exp_diff;
1082bf215546Sopenharmony_ci            if (shift_dist) {
1083bf215546Sopenharmony_ci                m = _mesa_shift_right_jam64(m, shift_dist);
1084bf215546Sopenharmony_ci            }
1085bf215546Sopenharmony_ci        } else {
1086bf215546Sopenharmony_ci            if (!shift_dist) {
1087bf215546Sopenharmony_ci                _mesa_short_shift_right_m(4, m_128, 1, m_128);
1088bf215546Sopenharmony_ci            }
1089bf215546Sopenharmony_ci        }
1090bf215546Sopenharmony_ci    } else {
1091bf215546Sopenharmony_ci        if (shift_dist)
1092bf215546Sopenharmony_ci            _mesa_add_m(4, m_128, m_128, m_128);
1093bf215546Sopenharmony_ci        if (!exp_diff) {
1094bf215546Sopenharmony_ci            m = (uint64_t) m_128[index_word(4, 3)] << 32
1095bf215546Sopenharmony_ci                | m_128[index_word(4, 2)];
1096bf215546Sopenharmony_ci        } else {
1097bf215546Sopenharmony_ci            c_flt_m_128[index_word(4, 3)] = c_flt_m >> 32;
1098bf215546Sopenharmony_ci            c_flt_m_128[index_word(4, 2)] = c_flt_m;
1099bf215546Sopenharmony_ci            c_flt_m_128[index_word(4, 1)] = 0;
1100bf215546Sopenharmony_ci            c_flt_m_128[index_word(4, 0)] = 0;
1101bf215546Sopenharmony_ci            _mesa_shift_right_jam_m(4, c_flt_m_128, exp_diff, c_flt_m_128);
1102bf215546Sopenharmony_ci        }
1103bf215546Sopenharmony_ci    }
1104bf215546Sopenharmony_ci
1105bf215546Sopenharmony_ci    if (s == c_flt_s) {
1106bf215546Sopenharmony_ci        if (exp_diff <= 0) {
1107bf215546Sopenharmony_ci            m += c_flt_m;
1108bf215546Sopenharmony_ci        } else {
1109bf215546Sopenharmony_ci            _mesa_add_m(4, m_128, c_flt_m_128, m_128);
1110bf215546Sopenharmony_ci            m = (uint64_t) m_128[index_word(4, 3)] << 32
1111bf215546Sopenharmony_ci                | m_128[index_word(4, 2)];
1112bf215546Sopenharmony_ci        }
1113bf215546Sopenharmony_ci        if (m & 0x8000000000000000) {
1114bf215546Sopenharmony_ci            e++;
1115bf215546Sopenharmony_ci            m = _mesa_short_shift_right_jam64(m, 1);
1116bf215546Sopenharmony_ci        }
1117bf215546Sopenharmony_ci    } else {
1118bf215546Sopenharmony_ci        if (exp_diff < 0) {
1119bf215546Sopenharmony_ci            s = c_flt_s;
1120bf215546Sopenharmony_ci            if (exp_diff < -1) {
1121bf215546Sopenharmony_ci                m = c_flt_m - m;
1122bf215546Sopenharmony_ci                if (m_128[index_word(4, 1)] || m_128[index_word(4, 0)]) {
1123bf215546Sopenharmony_ci                    m = (m - 1) | 1;
1124bf215546Sopenharmony_ci                }
1125bf215546Sopenharmony_ci                if (!(m & 0x4000000000000000)) {
1126bf215546Sopenharmony_ci                    --e;
1127bf215546Sopenharmony_ci                    m <<= 1;
1128bf215546Sopenharmony_ci                }
1129bf215546Sopenharmony_ci                return _mesa_roundtozero_f64(s, e - 1, m);
1130bf215546Sopenharmony_ci            } else {
1131bf215546Sopenharmony_ci                c_flt_m_128[index_word(4, 3)] = c_flt_m >> 32;
1132bf215546Sopenharmony_ci                c_flt_m_128[index_word(4, 2)] = c_flt_m;
1133bf215546Sopenharmony_ci                c_flt_m_128[index_word(4, 1)] = 0;
1134bf215546Sopenharmony_ci                c_flt_m_128[index_word(4, 0)] = 0;
1135bf215546Sopenharmony_ci                _mesa_sub_m(4, c_flt_m_128, m_128, m_128);
1136bf215546Sopenharmony_ci            }
1137bf215546Sopenharmony_ci        } else if (!exp_diff) {
1138bf215546Sopenharmony_ci            m -= c_flt_m;
1139bf215546Sopenharmony_ci            if (!m && !m_128[index_word(4, 1)] && !m_128[index_word(4, 0)]) {
1140bf215546Sopenharmony_ci                /* Return zero */
1141bf215546Sopenharmony_ci                di_type result;
1142bf215546Sopenharmony_ci                result.u = (s << 63) + 0;
1143bf215546Sopenharmony_ci                return result.f;
1144bf215546Sopenharmony_ci            }
1145bf215546Sopenharmony_ci            m_128[index_word(4, 3)] = m >> 32;
1146bf215546Sopenharmony_ci            m_128[index_word(4, 2)] = m;
1147bf215546Sopenharmony_ci            if (m & 0x8000000000000000) {
1148bf215546Sopenharmony_ci                s = !s;
1149bf215546Sopenharmony_ci                _mesa_neg_x_m(4, m_128);
1150bf215546Sopenharmony_ci            }
1151bf215546Sopenharmony_ci        } else {
1152bf215546Sopenharmony_ci            _mesa_sub_m(4, m_128, c_flt_m_128, m_128);
1153bf215546Sopenharmony_ci            if (1 < exp_diff) {
1154bf215546Sopenharmony_ci                m = (uint64_t) m_128[index_word(4, 3)] << 32
1155bf215546Sopenharmony_ci                    | m_128[index_word(4, 2)];
1156bf215546Sopenharmony_ci                if (!(m & 0x4000000000000000)) {
1157bf215546Sopenharmony_ci                    --e;
1158bf215546Sopenharmony_ci                    m <<= 1;
1159bf215546Sopenharmony_ci                }
1160bf215546Sopenharmony_ci                if (m_128[index_word(4, 1)] || m_128[index_word(4, 0)])
1161bf215546Sopenharmony_ci                    m |= 1;
1162bf215546Sopenharmony_ci                return _mesa_roundtozero_f64(s, e - 1, m);
1163bf215546Sopenharmony_ci            }
1164bf215546Sopenharmony_ci        }
1165bf215546Sopenharmony_ci
1166bf215546Sopenharmony_ci        shift_dist = 0;
1167bf215546Sopenharmony_ci        m = (uint64_t) m_128[index_word(4, 3)] << 32
1168bf215546Sopenharmony_ci            | m_128[index_word(4, 2)];
1169bf215546Sopenharmony_ci        if (!m) {
1170bf215546Sopenharmony_ci            shift_dist = 64;
1171bf215546Sopenharmony_ci            m = (uint64_t) m_128[index_word(4, 1)] << 32
1172bf215546Sopenharmony_ci                | m_128[index_word(4, 0)];
1173bf215546Sopenharmony_ci        }
1174bf215546Sopenharmony_ci        shift_dist += _mesa_count_leading_zeros64(m) - 1;
1175bf215546Sopenharmony_ci        if (shift_dist) {
1176bf215546Sopenharmony_ci            e -= shift_dist;
1177bf215546Sopenharmony_ci            _mesa_shift_left_m(4, m_128, shift_dist, m_128);
1178bf215546Sopenharmony_ci            m = (uint64_t) m_128[index_word(4, 3)] << 32
1179bf215546Sopenharmony_ci                | m_128[index_word(4, 2)];
1180bf215546Sopenharmony_ci        }
1181bf215546Sopenharmony_ci    }
1182bf215546Sopenharmony_ci
1183bf215546Sopenharmony_ci    if (m_128[index_word(4, 1)] || m_128[index_word(4, 0)])
1184bf215546Sopenharmony_ci        m |= 1;
1185bf215546Sopenharmony_ci    return _mesa_roundtozero_f64(s, e - 1, m);
1186bf215546Sopenharmony_ci}
1187bf215546Sopenharmony_ci
1188bf215546Sopenharmony_ci
1189bf215546Sopenharmony_ci/**
1190bf215546Sopenharmony_ci * \brief Calculate a * b + c but rounding to zero.
1191bf215546Sopenharmony_ci *
1192bf215546Sopenharmony_ci * Notice that this mainly differs from the original Berkeley SoftFloat 3e
1193bf215546Sopenharmony_ci * implementation in that we don't really treat NaNs, Zeroes nor the
1194bf215546Sopenharmony_ci * signalling flags. Any NaN is good for us and the sign of the Zero is not
1195bf215546Sopenharmony_ci * important.
1196bf215546Sopenharmony_ci *
1197bf215546Sopenharmony_ci * From f32_mulAdd()
1198bf215546Sopenharmony_ci */
1199bf215546Sopenharmony_cifloat
1200bf215546Sopenharmony_ci_mesa_float_fma_rtz(float a, float b, float c)
1201bf215546Sopenharmony_ci{
1202bf215546Sopenharmony_ci    const fi_type a_fi = {a};
1203bf215546Sopenharmony_ci    uint32_t a_flt_m = a_fi.u & 0x07fffff;
1204bf215546Sopenharmony_ci    uint32_t a_flt_e = (a_fi.u >> 23) & 0xff;
1205bf215546Sopenharmony_ci    uint32_t a_flt_s = (a_fi.u >> 31) & 0x1;
1206bf215546Sopenharmony_ci    const fi_type b_fi = {b};
1207bf215546Sopenharmony_ci    uint32_t b_flt_m = b_fi.u & 0x07fffff;
1208bf215546Sopenharmony_ci    uint32_t b_flt_e = (b_fi.u >> 23) & 0xff;
1209bf215546Sopenharmony_ci    uint32_t b_flt_s = (b_fi.u >> 31) & 0x1;
1210bf215546Sopenharmony_ci    const fi_type c_fi = {c};
1211bf215546Sopenharmony_ci    uint32_t c_flt_m = c_fi.u & 0x07fffff;
1212bf215546Sopenharmony_ci    uint32_t c_flt_e = (c_fi.u >> 23) & 0xff;
1213bf215546Sopenharmony_ci    uint32_t c_flt_s = (c_fi.u >> 31) & 0x1;
1214bf215546Sopenharmony_ci    int32_t s, e, m = 0;
1215bf215546Sopenharmony_ci
1216bf215546Sopenharmony_ci    c_flt_s ^= 0;
1217bf215546Sopenharmony_ci    s = a_flt_s ^ b_flt_s ^ 0;
1218bf215546Sopenharmony_ci
1219bf215546Sopenharmony_ci    if (a_flt_e == 0xff) {
1220bf215546Sopenharmony_ci        if (a_flt_m != 0) {
1221bf215546Sopenharmony_ci            /* 'a' is a NaN, return NaN */
1222bf215546Sopenharmony_ci            return a;
1223bf215546Sopenharmony_ci        } else if (b_flt_e == 0xff && b_flt_m != 0) {
1224bf215546Sopenharmony_ci            /* 'b' is a NaN, return NaN */
1225bf215546Sopenharmony_ci            return b;
1226bf215546Sopenharmony_ci        } else if (c_flt_e == 0xff && c_flt_m != 0) {
1227bf215546Sopenharmony_ci            /* 'c' is a NaN, return NaN */
1228bf215546Sopenharmony_ci            return c;
1229bf215546Sopenharmony_ci        }
1230bf215546Sopenharmony_ci
1231bf215546Sopenharmony_ci        if (!(b_flt_e | b_flt_m)) {
1232bf215546Sopenharmony_ci            /* Inf * 0 + y = NaN */
1233bf215546Sopenharmony_ci            fi_type result;
1234bf215546Sopenharmony_ci            e = 0xff;
1235bf215546Sopenharmony_ci            result.u = (s << 31) + (e << 23) + 0x1;
1236bf215546Sopenharmony_ci            return result.f;
1237bf215546Sopenharmony_ci        }
1238bf215546Sopenharmony_ci
1239bf215546Sopenharmony_ci        if ((c_flt_e == 0xff && c_flt_m == 0) && (s != c_flt_s)) {
1240bf215546Sopenharmony_ci            /* Inf * x - Inf = NaN */
1241bf215546Sopenharmony_ci            fi_type result;
1242bf215546Sopenharmony_ci            e = 0xff;
1243bf215546Sopenharmony_ci            result.u = (s << 31) + (e << 23) + 0x1;
1244bf215546Sopenharmony_ci            return result.f;
1245bf215546Sopenharmony_ci        }
1246bf215546Sopenharmony_ci
1247bf215546Sopenharmony_ci        /* Inf * x + y = Inf */
1248bf215546Sopenharmony_ci        fi_type result;
1249bf215546Sopenharmony_ci        e = 0xff;
1250bf215546Sopenharmony_ci        result.u = (s << 31) + (e << 23) + 0;
1251bf215546Sopenharmony_ci        return result.f;
1252bf215546Sopenharmony_ci    }
1253bf215546Sopenharmony_ci
1254bf215546Sopenharmony_ci    if (b_flt_e == 0xff) {
1255bf215546Sopenharmony_ci        if (b_flt_m != 0) {
1256bf215546Sopenharmony_ci            /* 'b' is a NaN, return NaN */
1257bf215546Sopenharmony_ci            return b;
1258bf215546Sopenharmony_ci        } else if (c_flt_e == 0xff && c_flt_m != 0) {
1259bf215546Sopenharmony_ci            /* 'c' is a NaN, return NaN */
1260bf215546Sopenharmony_ci            return c;
1261bf215546Sopenharmony_ci        }
1262bf215546Sopenharmony_ci
1263bf215546Sopenharmony_ci        if (!(a_flt_e | a_flt_m)) {
1264bf215546Sopenharmony_ci            /* 0 * Inf + y = NaN */
1265bf215546Sopenharmony_ci            fi_type result;
1266bf215546Sopenharmony_ci            e = 0xff;
1267bf215546Sopenharmony_ci            result.u = (s << 31) + (e << 23) + 0x1;
1268bf215546Sopenharmony_ci            return result.f;
1269bf215546Sopenharmony_ci        }
1270bf215546Sopenharmony_ci
1271bf215546Sopenharmony_ci        if ((c_flt_e == 0xff && c_flt_m == 0) && (s != c_flt_s)) {
1272bf215546Sopenharmony_ci            /* x * Inf - Inf = NaN */
1273bf215546Sopenharmony_ci            fi_type result;
1274bf215546Sopenharmony_ci            e = 0xff;
1275bf215546Sopenharmony_ci            result.u = (s << 31) + (e << 23) + 0x1;
1276bf215546Sopenharmony_ci            return result.f;
1277bf215546Sopenharmony_ci        }
1278bf215546Sopenharmony_ci
1279bf215546Sopenharmony_ci        /* x * Inf + y = Inf */
1280bf215546Sopenharmony_ci        fi_type result;
1281bf215546Sopenharmony_ci        e = 0xff;
1282bf215546Sopenharmony_ci        result.u = (s << 31) + (e << 23) + 0;
1283bf215546Sopenharmony_ci        return result.f;
1284bf215546Sopenharmony_ci    }
1285bf215546Sopenharmony_ci
1286bf215546Sopenharmony_ci    if (c_flt_e == 0xff) {
1287bf215546Sopenharmony_ci        if (c_flt_m != 0) {
1288bf215546Sopenharmony_ci            /* 'c' is a NaN, return NaN */
1289bf215546Sopenharmony_ci            return c;
1290bf215546Sopenharmony_ci        }
1291bf215546Sopenharmony_ci
1292bf215546Sopenharmony_ci        /* x * y + Inf = Inf */
1293bf215546Sopenharmony_ci        return c;
1294bf215546Sopenharmony_ci    }
1295bf215546Sopenharmony_ci
1296bf215546Sopenharmony_ci    if (a_flt_e == 0) {
1297bf215546Sopenharmony_ci        if (a_flt_m == 0) {
1298bf215546Sopenharmony_ci            /* 'a' is zero, return 'c' */
1299bf215546Sopenharmony_ci            return c;
1300bf215546Sopenharmony_ci        }
1301bf215546Sopenharmony_ci        _mesa_norm_subnormal_mantissa_f32(a_flt_m , &a_flt_e, &a_flt_m);
1302bf215546Sopenharmony_ci    }
1303bf215546Sopenharmony_ci
1304bf215546Sopenharmony_ci    if (b_flt_e == 0) {
1305bf215546Sopenharmony_ci        if (b_flt_m == 0) {
1306bf215546Sopenharmony_ci            /* 'b' is zero, return 'c' */
1307bf215546Sopenharmony_ci            return c;
1308bf215546Sopenharmony_ci        }
1309bf215546Sopenharmony_ci        _mesa_norm_subnormal_mantissa_f32(b_flt_m , &b_flt_e, &b_flt_m);
1310bf215546Sopenharmony_ci    }
1311bf215546Sopenharmony_ci
1312bf215546Sopenharmony_ci    e = a_flt_e + b_flt_e - 0x7e;
1313bf215546Sopenharmony_ci    a_flt_m = (a_flt_m | 0x00800000) << 7;
1314bf215546Sopenharmony_ci    b_flt_m = (b_flt_m | 0x00800000) << 7;
1315bf215546Sopenharmony_ci
1316bf215546Sopenharmony_ci    uint64_t m_64 = (uint64_t) a_flt_m * b_flt_m;
1317bf215546Sopenharmony_ci    if (m_64 < 0x2000000000000000) {
1318bf215546Sopenharmony_ci        --e;
1319bf215546Sopenharmony_ci        m_64 <<= 1;
1320bf215546Sopenharmony_ci    }
1321bf215546Sopenharmony_ci
1322bf215546Sopenharmony_ci    if (c_flt_e == 0) {
1323bf215546Sopenharmony_ci        if (c_flt_m == 0) {
1324bf215546Sopenharmony_ci            /* 'c' is zero, return 'a * b' */
1325bf215546Sopenharmony_ci            m = _mesa_short_shift_right_jam64(m_64, 31);
1326bf215546Sopenharmony_ci            return _mesa_round_f32(s, e - 1, m, true);
1327bf215546Sopenharmony_ci        }
1328bf215546Sopenharmony_ci        _mesa_norm_subnormal_mantissa_f32(c_flt_m , &c_flt_e, &c_flt_m);
1329bf215546Sopenharmony_ci    }
1330bf215546Sopenharmony_ci    c_flt_m = (c_flt_m | 0x00800000) << 6;
1331bf215546Sopenharmony_ci
1332bf215546Sopenharmony_ci    int16_t exp_diff = e - c_flt_e;
1333bf215546Sopenharmony_ci    if (s == c_flt_s) {
1334bf215546Sopenharmony_ci        if (exp_diff <= 0) {
1335bf215546Sopenharmony_ci            e = c_flt_e;
1336bf215546Sopenharmony_ci            m = c_flt_m + _mesa_shift_right_jam64(m_64, 32 - exp_diff);
1337bf215546Sopenharmony_ci        } else {
1338bf215546Sopenharmony_ci            m_64 += _mesa_shift_right_jam64((uint64_t) c_flt_m << 32, exp_diff);
1339bf215546Sopenharmony_ci            m = _mesa_short_shift_right_jam64(m_64, 32);
1340bf215546Sopenharmony_ci        }
1341bf215546Sopenharmony_ci        if (m < 0x40000000) {
1342bf215546Sopenharmony_ci            --e;
1343bf215546Sopenharmony_ci            m <<= 1;
1344bf215546Sopenharmony_ci        }
1345bf215546Sopenharmony_ci    } else {
1346bf215546Sopenharmony_ci        uint64_t c_flt_m_64 = (uint64_t) c_flt_m << 32;
1347bf215546Sopenharmony_ci        if (exp_diff < 0) {
1348bf215546Sopenharmony_ci            s = c_flt_s;
1349bf215546Sopenharmony_ci            e = c_flt_e;
1350bf215546Sopenharmony_ci            m_64 = c_flt_m_64 - _mesa_shift_right_jam64(m_64, -exp_diff);
1351bf215546Sopenharmony_ci        } else if (!exp_diff) {
1352bf215546Sopenharmony_ci            m_64 -= c_flt_m_64;
1353bf215546Sopenharmony_ci            if (!m_64) {
1354bf215546Sopenharmony_ci                /* Return zero */
1355bf215546Sopenharmony_ci                fi_type result;
1356bf215546Sopenharmony_ci                result.u = (s << 31) + 0;
1357bf215546Sopenharmony_ci                return result.f;
1358bf215546Sopenharmony_ci            }
1359bf215546Sopenharmony_ci            if (m_64 & 0x8000000000000000) {
1360bf215546Sopenharmony_ci                s = !s;
1361bf215546Sopenharmony_ci                m_64 = -m_64;
1362bf215546Sopenharmony_ci            }
1363bf215546Sopenharmony_ci        } else {
1364bf215546Sopenharmony_ci            m_64 -= _mesa_shift_right_jam64(c_flt_m_64, exp_diff);
1365bf215546Sopenharmony_ci        }
1366bf215546Sopenharmony_ci        int8_t shift_dist = _mesa_count_leading_zeros64(m_64) - 1;
1367bf215546Sopenharmony_ci        e -= shift_dist;
1368bf215546Sopenharmony_ci        shift_dist -= 32;
1369bf215546Sopenharmony_ci        if (shift_dist < 0) {
1370bf215546Sopenharmony_ci            m = _mesa_short_shift_right_jam64(m_64, -shift_dist);
1371bf215546Sopenharmony_ci        } else {
1372bf215546Sopenharmony_ci            m = (uint32_t) m_64 << shift_dist;
1373bf215546Sopenharmony_ci        }
1374bf215546Sopenharmony_ci    }
1375bf215546Sopenharmony_ci
1376bf215546Sopenharmony_ci    return _mesa_round_f32(s, e, m, true);
1377bf215546Sopenharmony_ci}
1378bf215546Sopenharmony_ci
1379bf215546Sopenharmony_ci
1380bf215546Sopenharmony_ci/**
1381bf215546Sopenharmony_ci * \brief Converts from 64bits to 32bits float and rounds according to
1382bf215546Sopenharmony_ci * instructed.
1383bf215546Sopenharmony_ci *
1384bf215546Sopenharmony_ci * From f64_to_f32()
1385bf215546Sopenharmony_ci */
1386bf215546Sopenharmony_cifloat
1387bf215546Sopenharmony_ci_mesa_double_to_f32(double val, bool rtz)
1388bf215546Sopenharmony_ci{
1389bf215546Sopenharmony_ci    const di_type di = {val};
1390bf215546Sopenharmony_ci    uint64_t flt_m = di.u & 0x0fffffffffffff;
1391bf215546Sopenharmony_ci    uint64_t flt_e = (di.u >> 52) & 0x7ff;
1392bf215546Sopenharmony_ci    uint64_t flt_s = (di.u >> 63) & 0x1;
1393bf215546Sopenharmony_ci    int32_t s, e, m = 0;
1394bf215546Sopenharmony_ci
1395bf215546Sopenharmony_ci    s = flt_s;
1396bf215546Sopenharmony_ci
1397bf215546Sopenharmony_ci    if (flt_e == 0x7ff) {
1398bf215546Sopenharmony_ci        if (flt_m != 0) {
1399bf215546Sopenharmony_ci            /* 'val' is a NaN, return NaN */
1400bf215546Sopenharmony_ci            fi_type result;
1401bf215546Sopenharmony_ci            e = 0xff;
1402bf215546Sopenharmony_ci            m = 0x1;
1403bf215546Sopenharmony_ci            result.u = (s << 31) + (e << 23) + m;
1404bf215546Sopenharmony_ci            return result.f;
1405bf215546Sopenharmony_ci        }
1406bf215546Sopenharmony_ci
1407bf215546Sopenharmony_ci        /* 'val' is Inf, return Inf */
1408bf215546Sopenharmony_ci        fi_type result;
1409bf215546Sopenharmony_ci        e = 0xff;
1410bf215546Sopenharmony_ci        result.u = (s << 31) + (e << 23) + m;
1411bf215546Sopenharmony_ci        return result.f;
1412bf215546Sopenharmony_ci    }
1413bf215546Sopenharmony_ci
1414bf215546Sopenharmony_ci    if (!(flt_e | flt_m)) {
1415bf215546Sopenharmony_ci        /* 'val' is zero, return zero */
1416bf215546Sopenharmony_ci        fi_type result;
1417bf215546Sopenharmony_ci        e = 0;
1418bf215546Sopenharmony_ci        result.u = (s << 31) + (e << 23) + m;
1419bf215546Sopenharmony_ci        return result.f;
1420bf215546Sopenharmony_ci    }
1421bf215546Sopenharmony_ci
1422bf215546Sopenharmony_ci    m = _mesa_short_shift_right_jam64(flt_m, 22);
1423bf215546Sopenharmony_ci    if ( ! (flt_e | m) ) {
1424bf215546Sopenharmony_ci        /* 'val' is denorm, return zero */
1425bf215546Sopenharmony_ci        fi_type result;
1426bf215546Sopenharmony_ci        e = 0;
1427bf215546Sopenharmony_ci        result.u = (s << 31) + (e << 23) + m;
1428bf215546Sopenharmony_ci        return result.f;
1429bf215546Sopenharmony_ci    }
1430bf215546Sopenharmony_ci
1431bf215546Sopenharmony_ci    return _mesa_round_f32(s, flt_e - 0x381, m | 0x40000000, rtz);
1432bf215546Sopenharmony_ci}
1433bf215546Sopenharmony_ci
1434bf215546Sopenharmony_ci
1435bf215546Sopenharmony_ci/**
1436bf215546Sopenharmony_ci * \brief Converts from 32bits to 16bits float and rounds the result to zero.
1437bf215546Sopenharmony_ci *
1438bf215546Sopenharmony_ci * From f32_to_f16()
1439bf215546Sopenharmony_ci */
1440bf215546Sopenharmony_ciuint16_t
1441bf215546Sopenharmony_ci_mesa_float_to_half_rtz_slow(float val)
1442bf215546Sopenharmony_ci{
1443bf215546Sopenharmony_ci    const fi_type fi = {val};
1444bf215546Sopenharmony_ci    const uint32_t flt_m = fi.u & 0x7fffff;
1445bf215546Sopenharmony_ci    const uint32_t flt_e = (fi.u >> 23) & 0xff;
1446bf215546Sopenharmony_ci    const uint32_t flt_s = (fi.u >> 31) & 0x1;
1447bf215546Sopenharmony_ci    int16_t s, e, m = 0;
1448bf215546Sopenharmony_ci
1449bf215546Sopenharmony_ci    s = flt_s;
1450bf215546Sopenharmony_ci
1451bf215546Sopenharmony_ci    if (flt_e == 0xff) {
1452bf215546Sopenharmony_ci        if (flt_m != 0) {
1453bf215546Sopenharmony_ci            /* 'val' is a NaN, return NaN */
1454bf215546Sopenharmony_ci            e = 0x1f;
1455bf215546Sopenharmony_ci            /* Retain the top bits of a NaN to make sure that the quiet/signaling
1456bf215546Sopenharmony_ci            * status stays the same.
1457bf215546Sopenharmony_ci            */
1458bf215546Sopenharmony_ci            m = flt_m >> 13;
1459bf215546Sopenharmony_ci            if (!m)
1460bf215546Sopenharmony_ci               m = 1;
1461bf215546Sopenharmony_ci            return (s << 15) + (e << 10) + m;
1462bf215546Sopenharmony_ci        }
1463bf215546Sopenharmony_ci
1464bf215546Sopenharmony_ci        /* 'val' is Inf, return Inf */
1465bf215546Sopenharmony_ci        e = 0x1f;
1466bf215546Sopenharmony_ci        return (s << 15) + (e << 10) + m;
1467bf215546Sopenharmony_ci    }
1468bf215546Sopenharmony_ci
1469bf215546Sopenharmony_ci    if (!(flt_e | flt_m)) {
1470bf215546Sopenharmony_ci        /* 'val' is zero, return zero */
1471bf215546Sopenharmony_ci        e = 0;
1472bf215546Sopenharmony_ci        return (s << 15) + (e << 10) + m;
1473bf215546Sopenharmony_ci    }
1474bf215546Sopenharmony_ci
1475bf215546Sopenharmony_ci    m = flt_m >> 9 | ((flt_m & 0x1ff) != 0);
1476bf215546Sopenharmony_ci    if ( ! (flt_e | m) ) {
1477bf215546Sopenharmony_ci        /* 'val' is denorm, return zero */
1478bf215546Sopenharmony_ci        e = 0;
1479bf215546Sopenharmony_ci        return (s << 15) + (e << 10) + m;
1480bf215546Sopenharmony_ci    }
1481bf215546Sopenharmony_ci
1482bf215546Sopenharmony_ci    return _mesa_roundtozero_f16(s, flt_e - 0x71, m | 0x4000);
1483bf215546Sopenharmony_ci}
1484