1e5c31af7Sopenharmony_ci/*-------------------------------------------------------------------------
2e5c31af7Sopenharmony_ci * drawElements Base Portability Library
3e5c31af7Sopenharmony_ci * -------------------------------------
4e5c31af7Sopenharmony_ci *
5e5c31af7Sopenharmony_ci * Copyright 2014 The Android Open Source Project
6e5c31af7Sopenharmony_ci *
7e5c31af7Sopenharmony_ci * Licensed under the Apache License, Version 2.0 (the "License");
8e5c31af7Sopenharmony_ci * you may not use this file except in compliance with the License.
9e5c31af7Sopenharmony_ci * You may obtain a copy of the License at
10e5c31af7Sopenharmony_ci *
11e5c31af7Sopenharmony_ci *      http://www.apache.org/licenses/LICENSE-2.0
12e5c31af7Sopenharmony_ci *
13e5c31af7Sopenharmony_ci * Unless required by applicable law or agreed to in writing, software
14e5c31af7Sopenharmony_ci * distributed under the License is distributed on an "AS IS" BASIS,
15e5c31af7Sopenharmony_ci * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16e5c31af7Sopenharmony_ci * See the License for the specific language governing permissions and
17e5c31af7Sopenharmony_ci * limitations under the License.
18e5c31af7Sopenharmony_ci *
19e5c31af7Sopenharmony_ci *//*!
20e5c31af7Sopenharmony_ci * \file
21e5c31af7Sopenharmony_ci * \brief 16-bit floating-point math.
22e5c31af7Sopenharmony_ci *//*--------------------------------------------------------------------*/
23e5c31af7Sopenharmony_ci
24e5c31af7Sopenharmony_ci#include "deFloat16.h"
25e5c31af7Sopenharmony_ci
26e5c31af7Sopenharmony_ciDE_BEGIN_EXTERN_C
27e5c31af7Sopenharmony_ci
28e5c31af7Sopenharmony_cideFloat16 deFloat32To16 (float val32)
29e5c31af7Sopenharmony_ci{
30e5c31af7Sopenharmony_ci	deUint32	sign;
31e5c31af7Sopenharmony_ci	int			expotent;
32e5c31af7Sopenharmony_ci	deUint32	mantissa;
33e5c31af7Sopenharmony_ci	union
34e5c31af7Sopenharmony_ci	{
35e5c31af7Sopenharmony_ci		float		f;
36e5c31af7Sopenharmony_ci		deUint32	u;
37e5c31af7Sopenharmony_ci	} x;
38e5c31af7Sopenharmony_ci
39e5c31af7Sopenharmony_ci	x.f			= val32;
40e5c31af7Sopenharmony_ci	sign		= (x.u >> 16u) & 0x00008000u;
41e5c31af7Sopenharmony_ci	expotent	= (int)((x.u >> 23u) & 0x000000ffu) - (127 - 15);
42e5c31af7Sopenharmony_ci	mantissa	= x.u & 0x007fffffu;
43e5c31af7Sopenharmony_ci
44e5c31af7Sopenharmony_ci	if (expotent <= 0)
45e5c31af7Sopenharmony_ci	{
46e5c31af7Sopenharmony_ci		if (expotent < -10)
47e5c31af7Sopenharmony_ci		{
48e5c31af7Sopenharmony_ci			/* Rounds to zero. */
49e5c31af7Sopenharmony_ci			return (deFloat16) sign;
50e5c31af7Sopenharmony_ci		}
51e5c31af7Sopenharmony_ci
52e5c31af7Sopenharmony_ci		/* Converted to denormalized half, add leading 1 to significand. */
53e5c31af7Sopenharmony_ci		mantissa = mantissa | 0x00800000u;
54e5c31af7Sopenharmony_ci
55e5c31af7Sopenharmony_ci		/* Round mantissa to nearest (10+e) */
56e5c31af7Sopenharmony_ci		{
57e5c31af7Sopenharmony_ci			deUint32 t = 14u - expotent;
58e5c31af7Sopenharmony_ci			deUint32 a = (1u << (t - 1u)) - 1u;
59e5c31af7Sopenharmony_ci			deUint32 b = (mantissa >> t) & 1u;
60e5c31af7Sopenharmony_ci
61e5c31af7Sopenharmony_ci			mantissa = (mantissa + a + b) >> t;
62e5c31af7Sopenharmony_ci		}
63e5c31af7Sopenharmony_ci
64e5c31af7Sopenharmony_ci		return (deFloat16) (sign | mantissa);
65e5c31af7Sopenharmony_ci	}
66e5c31af7Sopenharmony_ci	else if (expotent == 0xff - (127 - 15))
67e5c31af7Sopenharmony_ci	{
68e5c31af7Sopenharmony_ci		if (mantissa == 0u)
69e5c31af7Sopenharmony_ci		{
70e5c31af7Sopenharmony_ci			/* InF */
71e5c31af7Sopenharmony_ci			return (deFloat16) (sign | 0x7c00u);
72e5c31af7Sopenharmony_ci		}
73e5c31af7Sopenharmony_ci		else
74e5c31af7Sopenharmony_ci		{
75e5c31af7Sopenharmony_ci			/* NaN */
76e5c31af7Sopenharmony_ci			mantissa >>= 13u;
77e5c31af7Sopenharmony_ci			return (deFloat16) (sign | 0x7c00u | mantissa | (mantissa == 0u));
78e5c31af7Sopenharmony_ci		}
79e5c31af7Sopenharmony_ci	}
80e5c31af7Sopenharmony_ci	else
81e5c31af7Sopenharmony_ci	{
82e5c31af7Sopenharmony_ci		/* Normalized float. */
83e5c31af7Sopenharmony_ci		mantissa = mantissa + 0x00000fffu + ((mantissa >> 13u) & 1u);
84e5c31af7Sopenharmony_ci
85e5c31af7Sopenharmony_ci		if (mantissa & 0x00800000u)
86e5c31af7Sopenharmony_ci		{
87e5c31af7Sopenharmony_ci			/* Overflow in mantissa. */
88e5c31af7Sopenharmony_ci			mantissa  = 0u;
89e5c31af7Sopenharmony_ci			expotent += 1;
90e5c31af7Sopenharmony_ci		}
91e5c31af7Sopenharmony_ci
92e5c31af7Sopenharmony_ci		if (expotent > 30)
93e5c31af7Sopenharmony_ci		{
94e5c31af7Sopenharmony_ci			/* \todo [pyry] Cause hw fp overflow */
95e5c31af7Sopenharmony_ci			return (deFloat16) (sign | 0x7c00u);
96e5c31af7Sopenharmony_ci		}
97e5c31af7Sopenharmony_ci
98e5c31af7Sopenharmony_ci		return (deFloat16) (sign | ((deUint32)expotent << 10u) | (mantissa >> 13u));
99e5c31af7Sopenharmony_ci	}
100e5c31af7Sopenharmony_ci}
101e5c31af7Sopenharmony_ci
102e5c31af7Sopenharmony_cideFloat16 deFloat64To16 (double val64)
103e5c31af7Sopenharmony_ci{
104e5c31af7Sopenharmony_ci	deUint64	sign;
105e5c31af7Sopenharmony_ci	long		expotent;
106e5c31af7Sopenharmony_ci	deUint64	mantissa;
107e5c31af7Sopenharmony_ci	union
108e5c31af7Sopenharmony_ci	{
109e5c31af7Sopenharmony_ci		double		f;
110e5c31af7Sopenharmony_ci		deUint64	u;
111e5c31af7Sopenharmony_ci	} x;
112e5c31af7Sopenharmony_ci
113e5c31af7Sopenharmony_ci	x.f			= val64;
114e5c31af7Sopenharmony_ci	sign		= (x.u >> 48u) & 0x00008000u;
115e5c31af7Sopenharmony_ci	expotent	= (long int)((x.u >> 52u) & 0x000007ffu) - (1023 - 15);
116e5c31af7Sopenharmony_ci	mantissa	= x.u & 0x00fffffffffffffu;
117e5c31af7Sopenharmony_ci
118e5c31af7Sopenharmony_ci	if (expotent <= 0)
119e5c31af7Sopenharmony_ci	{
120e5c31af7Sopenharmony_ci		if (expotent < -10)
121e5c31af7Sopenharmony_ci		{
122e5c31af7Sopenharmony_ci			/* Rounds to zero. */
123e5c31af7Sopenharmony_ci			return (deFloat16) sign;
124e5c31af7Sopenharmony_ci		}
125e5c31af7Sopenharmony_ci
126e5c31af7Sopenharmony_ci		/* Converted to denormalized half, add leading 1 to significand. */
127e5c31af7Sopenharmony_ci		mantissa = mantissa | 0x0010000000000000u;
128e5c31af7Sopenharmony_ci
129e5c31af7Sopenharmony_ci		/* Round mantissa to nearest (10+e) */
130e5c31af7Sopenharmony_ci		{
131e5c31af7Sopenharmony_ci			deUint64 t = 43u - expotent;
132e5c31af7Sopenharmony_ci			deUint64 a = (1u << (t - 1u)) - 1u;
133e5c31af7Sopenharmony_ci			deUint64 b = (mantissa >> t) & 1u;
134e5c31af7Sopenharmony_ci
135e5c31af7Sopenharmony_ci			mantissa = (mantissa + a + b) >> t;
136e5c31af7Sopenharmony_ci		}
137e5c31af7Sopenharmony_ci
138e5c31af7Sopenharmony_ci		return (deFloat16) (sign | mantissa);
139e5c31af7Sopenharmony_ci	}
140e5c31af7Sopenharmony_ci	else if (expotent == 0x7ff - (1023 - 15))
141e5c31af7Sopenharmony_ci	{
142e5c31af7Sopenharmony_ci		if (mantissa == 0u)
143e5c31af7Sopenharmony_ci		{
144e5c31af7Sopenharmony_ci			/* InF */
145e5c31af7Sopenharmony_ci			return (deFloat16) (sign | 0x7c00u);
146e5c31af7Sopenharmony_ci		}
147e5c31af7Sopenharmony_ci		else
148e5c31af7Sopenharmony_ci		{
149e5c31af7Sopenharmony_ci			/* NaN */
150e5c31af7Sopenharmony_ci			mantissa >>= 42u;
151e5c31af7Sopenharmony_ci			return (deFloat16) (sign | 0x7c00u | mantissa | (mantissa == 0u));
152e5c31af7Sopenharmony_ci		}
153e5c31af7Sopenharmony_ci	}
154e5c31af7Sopenharmony_ci	else
155e5c31af7Sopenharmony_ci	{
156e5c31af7Sopenharmony_ci		/* Normalized float. */
157e5c31af7Sopenharmony_ci		mantissa = mantissa + 0x000001ffffffffffu + ((mantissa >> 42u) & 1u);
158e5c31af7Sopenharmony_ci
159e5c31af7Sopenharmony_ci		if (mantissa & 0x010000000000000u)
160e5c31af7Sopenharmony_ci		{
161e5c31af7Sopenharmony_ci			/* Overflow in mantissa. */
162e5c31af7Sopenharmony_ci			mantissa  = 0u;
163e5c31af7Sopenharmony_ci			expotent += 1;
164e5c31af7Sopenharmony_ci		}
165e5c31af7Sopenharmony_ci
166e5c31af7Sopenharmony_ci		if (expotent > 30)
167e5c31af7Sopenharmony_ci		{
168e5c31af7Sopenharmony_ci			return (deFloat16) (sign | 0x7c00u);
169e5c31af7Sopenharmony_ci		}
170e5c31af7Sopenharmony_ci
171e5c31af7Sopenharmony_ci		return (deFloat16) (sign | ((deUint32)expotent << 10u) | (mantissa >> 42u));
172e5c31af7Sopenharmony_ci	}
173e5c31af7Sopenharmony_ci}
174e5c31af7Sopenharmony_ci
175e5c31af7Sopenharmony_ci/*--------------------------------------------------------------------*//*!
176e5c31af7Sopenharmony_ci * \brief Round the given number `val` to nearest even by discarding
177e5c31af7Sopenharmony_ci *        the last `numBitsToDiscard` bits.
178e5c31af7Sopenharmony_ci * \param val value to round
179e5c31af7Sopenharmony_ci * \param numBitsToDiscard number of (least significant) bits to discard
180e5c31af7Sopenharmony_ci * \return The rounded value with the last `numBitsToDiscard` removed
181e5c31af7Sopenharmony_ci *//*--------------------------------------------------------------------*/
182e5c31af7Sopenharmony_cistatic deUint32 roundToNearestEven (deUint32 val, const deUint32 numBitsToDiscard)
183e5c31af7Sopenharmony_ci{
184e5c31af7Sopenharmony_ci	const deUint32	lastBits	= val & ((1 << numBitsToDiscard) - 1);
185e5c31af7Sopenharmony_ci	const deUint32	headBit		= val & (1 << (numBitsToDiscard - 1));
186e5c31af7Sopenharmony_ci
187e5c31af7Sopenharmony_ci	DE_ASSERT(numBitsToDiscard > 0 && numBitsToDiscard < 32);	/* Make sure no overflow. */
188e5c31af7Sopenharmony_ci	val >>= numBitsToDiscard;
189e5c31af7Sopenharmony_ci
190e5c31af7Sopenharmony_ci	if (headBit == 0)
191e5c31af7Sopenharmony_ci	{
192e5c31af7Sopenharmony_ci		return val;
193e5c31af7Sopenharmony_ci	}
194e5c31af7Sopenharmony_ci	else if (headBit == lastBits)
195e5c31af7Sopenharmony_ci	{
196e5c31af7Sopenharmony_ci		if ((val & 0x1) == 0x1)
197e5c31af7Sopenharmony_ci		{
198e5c31af7Sopenharmony_ci			return val + 1;
199e5c31af7Sopenharmony_ci		}
200e5c31af7Sopenharmony_ci		else
201e5c31af7Sopenharmony_ci		{
202e5c31af7Sopenharmony_ci			return val;
203e5c31af7Sopenharmony_ci		}
204e5c31af7Sopenharmony_ci	}
205e5c31af7Sopenharmony_ci	else
206e5c31af7Sopenharmony_ci	{
207e5c31af7Sopenharmony_ci		return val + 1;
208e5c31af7Sopenharmony_ci	}
209e5c31af7Sopenharmony_ci}
210e5c31af7Sopenharmony_ci
211e5c31af7Sopenharmony_cideFloat16 deFloat32To16Round (float val32, deRoundingMode mode)
212e5c31af7Sopenharmony_ci{
213e5c31af7Sopenharmony_ci	union
214e5c31af7Sopenharmony_ci	{
215e5c31af7Sopenharmony_ci		float		f;		/* Interpret as 32-bit float */
216e5c31af7Sopenharmony_ci		deUint32	u;		/* Interpret as 32-bit unsigned integer */
217e5c31af7Sopenharmony_ci	} x;
218e5c31af7Sopenharmony_ci	deUint32	sign;		/* sign : 0000 0000 0000 0000 X000 0000 0000 0000 */
219e5c31af7Sopenharmony_ci	deUint32	exp32;		/* exp32: biased exponent for 32-bit floats */
220e5c31af7Sopenharmony_ci	int			exp16;		/* exp16: biased exponent for 16-bit floats */
221e5c31af7Sopenharmony_ci	deUint32	mantissa;
222e5c31af7Sopenharmony_ci
223e5c31af7Sopenharmony_ci	/* We only support these two rounding modes for now */
224e5c31af7Sopenharmony_ci	DE_ASSERT(mode == DE_ROUNDINGMODE_TO_ZERO || mode == DE_ROUNDINGMODE_TO_NEAREST_EVEN);
225e5c31af7Sopenharmony_ci
226e5c31af7Sopenharmony_ci	x.f			= val32;
227e5c31af7Sopenharmony_ci	sign		= (x.u >> 16u) & 0x00008000u;
228e5c31af7Sopenharmony_ci	exp32		= (x.u >> 23u) & 0x000000ffu;
229e5c31af7Sopenharmony_ci	exp16		= (int) (exp32) - 127 + 15;	/* 15/127: exponent bias for 16-bit/32-bit floats */
230e5c31af7Sopenharmony_ci	mantissa	= x.u & 0x007fffffu;
231e5c31af7Sopenharmony_ci
232e5c31af7Sopenharmony_ci	/* Case: zero and denormalized floats */
233e5c31af7Sopenharmony_ci	if (exp32 == 0)
234e5c31af7Sopenharmony_ci	{
235e5c31af7Sopenharmony_ci		/* Denormalized floats are < 2^(1-127), not representable in 16-bit floats, rounding to zero. */
236e5c31af7Sopenharmony_ci		return (deFloat16) sign;
237e5c31af7Sopenharmony_ci	}
238e5c31af7Sopenharmony_ci	/* Case: Inf and NaN */
239e5c31af7Sopenharmony_ci	else if (exp32 == 0x000000ffu)
240e5c31af7Sopenharmony_ci	{
241e5c31af7Sopenharmony_ci		if (mantissa == 0u)
242e5c31af7Sopenharmony_ci		{
243e5c31af7Sopenharmony_ci			/* Inf */
244e5c31af7Sopenharmony_ci			return (deFloat16) (sign | 0x7c00u);
245e5c31af7Sopenharmony_ci		}
246e5c31af7Sopenharmony_ci		else
247e5c31af7Sopenharmony_ci		{
248e5c31af7Sopenharmony_ci			/* NaN */
249e5c31af7Sopenharmony_ci			mantissa >>= 13u;	/* 16-bit floats has 10-bit for mantissa, 13-bit less than 32-bit floats. */
250e5c31af7Sopenharmony_ci			/* Make sure we don't turn NaN into zero by | (mantissa == 0). */
251e5c31af7Sopenharmony_ci			return (deFloat16) (sign | 0x7c00u | mantissa | (mantissa == 0u));
252e5c31af7Sopenharmony_ci		}
253e5c31af7Sopenharmony_ci	}
254e5c31af7Sopenharmony_ci	/* The following are cases for normalized floats.
255e5c31af7Sopenharmony_ci	 *
256e5c31af7Sopenharmony_ci	 * * If exp16 is less than 0, we are experiencing underflow for the exponent. To encode this underflowed exponent,
257e5c31af7Sopenharmony_ci	 *   we can only shift the mantissa further right.
258e5c31af7Sopenharmony_ci	 *   The real exponent is exp16 - 15. A denormalized 16-bit float can represent -14 via its exponent.
259e5c31af7Sopenharmony_ci	 *   Note that the most significant bit in the mantissa of a denormalized float is already -1 as for exponent.
260e5c31af7Sopenharmony_ci	 *   So, we just need to right shift the mantissa -exp16 bits.
261e5c31af7Sopenharmony_ci	 * * If exp16 is 0, mantissa shifting requirement is similar to the above.
262e5c31af7Sopenharmony_ci	 * * If exp16 is greater than 30 (0b11110), we are experiencing overflow for the exponent of 16-bit normalized floats.
263e5c31af7Sopenharmony_ci	 */
264e5c31af7Sopenharmony_ci	/* Case: normalized floats -> zero */
265e5c31af7Sopenharmony_ci	else if (exp16 < -10)
266e5c31af7Sopenharmony_ci	{
267e5c31af7Sopenharmony_ci		/* 16-bit floats have only 10 bits for mantissa. Minimal 16-bit denormalized float is (2^-10) * (2^-14). */
268e5c31af7Sopenharmony_ci		/* Expecting a number < (2^-10) * (2^-14) here, not representable, round to zero. */
269e5c31af7Sopenharmony_ci		return (deFloat16) sign;
270e5c31af7Sopenharmony_ci	}
271e5c31af7Sopenharmony_ci	/* Case: normalized floats -> zero and denormalized halfs */
272e5c31af7Sopenharmony_ci	else if (exp16 <= 0)
273e5c31af7Sopenharmony_ci	{
274e5c31af7Sopenharmony_ci		/* Add the implicit leading 1 in mormalized float to mantissa. */
275e5c31af7Sopenharmony_ci		mantissa |= 0x00800000u;
276e5c31af7Sopenharmony_ci		/* We have a (23 + 1)-bit mantissa, but 16-bit floats only expect 10-bit mantissa.
277e5c31af7Sopenharmony_ci		 * Need to discard the last 14-bits considering rounding mode.
278e5c31af7Sopenharmony_ci		 * We also need to shift right -exp16 bits to encode the underflowed exponent.
279e5c31af7Sopenharmony_ci		 */
280e5c31af7Sopenharmony_ci		if (mode == DE_ROUNDINGMODE_TO_ZERO)
281e5c31af7Sopenharmony_ci		{
282e5c31af7Sopenharmony_ci			mantissa >>= (14 - exp16);
283e5c31af7Sopenharmony_ci		}
284e5c31af7Sopenharmony_ci		else
285e5c31af7Sopenharmony_ci		{
286e5c31af7Sopenharmony_ci			/* mantissa in the above may exceed 10-bits, in which case overflow happens.
287e5c31af7Sopenharmony_ci			 * The overflowed bit is automatically carried to exponent then.
288e5c31af7Sopenharmony_ci			 */
289e5c31af7Sopenharmony_ci			mantissa = roundToNearestEven(mantissa, 14 - exp16);
290e5c31af7Sopenharmony_ci		}
291e5c31af7Sopenharmony_ci		return (deFloat16) (sign | mantissa);
292e5c31af7Sopenharmony_ci	}
293e5c31af7Sopenharmony_ci	/* Case: normalized floats -> normalized floats */
294e5c31af7Sopenharmony_ci	else if (exp16 <= 30)
295e5c31af7Sopenharmony_ci	{
296e5c31af7Sopenharmony_ci		if (mode == DE_ROUNDINGMODE_TO_ZERO)
297e5c31af7Sopenharmony_ci		{
298e5c31af7Sopenharmony_ci			return (deFloat16) (sign | ((deUint32)exp16 << 10u) | (mantissa >> 13u));
299e5c31af7Sopenharmony_ci		}
300e5c31af7Sopenharmony_ci		else
301e5c31af7Sopenharmony_ci		{
302e5c31af7Sopenharmony_ci			mantissa	= roundToNearestEven(mantissa, 13);
303e5c31af7Sopenharmony_ci			/* Handle overflow. exp16 may overflow (and become Inf) itself, but that's correct. */
304e5c31af7Sopenharmony_ci			exp16		= (exp16 << 10u) + (mantissa & (1 << 10));
305e5c31af7Sopenharmony_ci			mantissa	&= (1u << 10) - 1;
306e5c31af7Sopenharmony_ci			return (deFloat16) (sign | ((deUint32) exp16) | mantissa);
307e5c31af7Sopenharmony_ci		}
308e5c31af7Sopenharmony_ci	}
309e5c31af7Sopenharmony_ci	/* Case: normalized floats (too large to be representable as 16-bit floats) */
310e5c31af7Sopenharmony_ci	else
311e5c31af7Sopenharmony_ci	{
312e5c31af7Sopenharmony_ci		/* According to IEEE Std 754-2008 Section 7.4,
313e5c31af7Sopenharmony_ci		 * * roundTiesToEven and roundTiesToAway carry all overflows to Inf with the sign
314e5c31af7Sopenharmony_ci		 *   of the intermediate  result.
315e5c31af7Sopenharmony_ci		 * * roundTowardZero carries all overflows to the format's largest finite number
316e5c31af7Sopenharmony_ci		 *   with the sign of the intermediate result.
317e5c31af7Sopenharmony_ci		 */
318e5c31af7Sopenharmony_ci		if (mode == DE_ROUNDINGMODE_TO_ZERO)
319e5c31af7Sopenharmony_ci		{
320e5c31af7Sopenharmony_ci			return (deFloat16) (sign | 0x7bffu); /* 111 1011 1111 1111 */
321e5c31af7Sopenharmony_ci		}
322e5c31af7Sopenharmony_ci		else
323e5c31af7Sopenharmony_ci		{
324e5c31af7Sopenharmony_ci			return (deFloat16) (sign | (0x1f << 10));
325e5c31af7Sopenharmony_ci		}
326e5c31af7Sopenharmony_ci	}
327e5c31af7Sopenharmony_ci
328e5c31af7Sopenharmony_ci	/* Make compiler happy */
329e5c31af7Sopenharmony_ci	return (deFloat16) 0;
330e5c31af7Sopenharmony_ci}
331e5c31af7Sopenharmony_ci
332e5c31af7Sopenharmony_ci/*--------------------------------------------------------------------*//*!
333e5c31af7Sopenharmony_ci * \brief Round the given number `val` to nearest even by discarding
334e5c31af7Sopenharmony_ci *        the last `numBitsToDiscard` bits.
335e5c31af7Sopenharmony_ci * \param val value to round
336e5c31af7Sopenharmony_ci * \param numBitsToDiscard number of (least significant) bits to discard
337e5c31af7Sopenharmony_ci * \return The rounded value with the last `numBitsToDiscard` removed
338e5c31af7Sopenharmony_ci *//*--------------------------------------------------------------------*/
339e5c31af7Sopenharmony_cistatic deUint64 roundToNearestEven64 (deUint64 val, const deUint64 numBitsToDiscard)
340e5c31af7Sopenharmony_ci{
341e5c31af7Sopenharmony_ci	const deUint64	lastBits	= val & (((deUint64)1 << numBitsToDiscard) - 1);
342e5c31af7Sopenharmony_ci	const deUint64	headBit		= val & ((deUint64)1 << (numBitsToDiscard - 1));
343e5c31af7Sopenharmony_ci
344e5c31af7Sopenharmony_ci	DE_ASSERT(numBitsToDiscard > 0 && numBitsToDiscard < 64);	/* Make sure no overflow. */
345e5c31af7Sopenharmony_ci	val >>= numBitsToDiscard;
346e5c31af7Sopenharmony_ci
347e5c31af7Sopenharmony_ci	if (headBit == 0)
348e5c31af7Sopenharmony_ci	{
349e5c31af7Sopenharmony_ci		return val;
350e5c31af7Sopenharmony_ci	}
351e5c31af7Sopenharmony_ci	else if (headBit == lastBits)
352e5c31af7Sopenharmony_ci	{
353e5c31af7Sopenharmony_ci		if ((val & 0x1) == 0x1)
354e5c31af7Sopenharmony_ci		{
355e5c31af7Sopenharmony_ci			return val + 1;
356e5c31af7Sopenharmony_ci		}
357e5c31af7Sopenharmony_ci		else
358e5c31af7Sopenharmony_ci		{
359e5c31af7Sopenharmony_ci			return val;
360e5c31af7Sopenharmony_ci		}
361e5c31af7Sopenharmony_ci	}
362e5c31af7Sopenharmony_ci	else
363e5c31af7Sopenharmony_ci	{
364e5c31af7Sopenharmony_ci		return val + 1;
365e5c31af7Sopenharmony_ci	}
366e5c31af7Sopenharmony_ci}
367e5c31af7Sopenharmony_ci
368e5c31af7Sopenharmony_cideFloat16 deFloat64To16Round (double val64, deRoundingMode mode)
369e5c31af7Sopenharmony_ci{
370e5c31af7Sopenharmony_ci	union
371e5c31af7Sopenharmony_ci	{
372e5c31af7Sopenharmony_ci		double		f;		/* Interpret as 64-bit float */
373e5c31af7Sopenharmony_ci		deUint64	u;		/* Interpret as 64-bit unsigned integer */
374e5c31af7Sopenharmony_ci	} x;
375e5c31af7Sopenharmony_ci	deUint64	sign;		/* sign : 0000 0000 0000 0000 X000 0000 0000 0000 */
376e5c31af7Sopenharmony_ci	deUint64	exp64;		/* exp32: biased exponent for 64-bit floats */
377e5c31af7Sopenharmony_ci	int			exp16;		/* exp16: biased exponent for 16-bit floats */
378e5c31af7Sopenharmony_ci	deUint64	mantissa;
379e5c31af7Sopenharmony_ci
380e5c31af7Sopenharmony_ci	/* We only support these two rounding modes for now */
381e5c31af7Sopenharmony_ci	DE_ASSERT(mode == DE_ROUNDINGMODE_TO_ZERO || mode == DE_ROUNDINGMODE_TO_NEAREST_EVEN);
382e5c31af7Sopenharmony_ci
383e5c31af7Sopenharmony_ci	x.f			= val64;
384e5c31af7Sopenharmony_ci	sign		= (x.u >> 48u) & 0x00008000u;
385e5c31af7Sopenharmony_ci	exp64		= (x.u >> 52u) & 0x000007ffu;
386e5c31af7Sopenharmony_ci	exp16		= (int) (exp64) - 1023 + 15;	/* 15/127: exponent bias for 16-bit/32-bit floats */
387e5c31af7Sopenharmony_ci	mantissa	= x.u & 0x00fffffffffffffu;
388e5c31af7Sopenharmony_ci
389e5c31af7Sopenharmony_ci	/* Case: zero and denormalized floats */
390e5c31af7Sopenharmony_ci	if (exp64 == 0)
391e5c31af7Sopenharmony_ci	{
392e5c31af7Sopenharmony_ci		/* Denormalized floats are < 2^(1-1023), not representable in 16-bit floats, rounding to zero. */
393e5c31af7Sopenharmony_ci		return (deFloat16) sign;
394e5c31af7Sopenharmony_ci	}
395e5c31af7Sopenharmony_ci	/* Case: Inf and NaN */
396e5c31af7Sopenharmony_ci	else if (exp64 == 0x000007ffu)
397e5c31af7Sopenharmony_ci	{
398e5c31af7Sopenharmony_ci		if (mantissa == 0u)
399e5c31af7Sopenharmony_ci		{
400e5c31af7Sopenharmony_ci			/* Inf */
401e5c31af7Sopenharmony_ci			return (deFloat16) (sign | 0x7c00u);
402e5c31af7Sopenharmony_ci		}
403e5c31af7Sopenharmony_ci		else
404e5c31af7Sopenharmony_ci		{
405e5c31af7Sopenharmony_ci			/* NaN */
406e5c31af7Sopenharmony_ci			mantissa >>= 42u;	/* 16-bit floats has 10-bit for mantissa, 42-bit less than 64-bit floats. */
407e5c31af7Sopenharmony_ci			/* Make sure we don't turn NaN into zero by | (mantissa == 0). */
408e5c31af7Sopenharmony_ci			return (deFloat16) (sign | 0x7c00u | mantissa | (mantissa == 0u));
409e5c31af7Sopenharmony_ci		}
410e5c31af7Sopenharmony_ci	}
411e5c31af7Sopenharmony_ci	/* The following are cases for normalized floats.
412e5c31af7Sopenharmony_ci	 *
413e5c31af7Sopenharmony_ci	 * * If exp16 is less than 0, we are experiencing underflow for the exponent. To encode this underflowed exponent,
414e5c31af7Sopenharmony_ci	 *   we can only shift the mantissa further right.
415e5c31af7Sopenharmony_ci	 *   The real exponent is exp16 - 15. A denormalized 16-bit float can represent -14 via its exponent.
416e5c31af7Sopenharmony_ci	 *   Note that the most significant bit in the mantissa of a denormalized float is already -1 as for exponent.
417e5c31af7Sopenharmony_ci	 *   So, we just need to right shift the mantissa -exp16 bits.
418e5c31af7Sopenharmony_ci	 * * If exp16 is 0, mantissa shifting requirement is similar to the above.
419e5c31af7Sopenharmony_ci	 * * If exp16 is greater than 30 (0b11110), we are experiencing overflow for the exponent of 16-bit normalized floats.
420e5c31af7Sopenharmony_ci	 */
421e5c31af7Sopenharmony_ci	/* Case: normalized floats -> zero */
422e5c31af7Sopenharmony_ci	else if (exp16 < -10)
423e5c31af7Sopenharmony_ci	{
424e5c31af7Sopenharmony_ci		/* 16-bit floats have only 10 bits for mantissa. Minimal 16-bit denormalized float is (2^-10) * (2^-14). */
425e5c31af7Sopenharmony_ci		/* Expecting a number < (2^-10) * (2^-14) here, not representable, round to zero. */
426e5c31af7Sopenharmony_ci		return (deFloat16) sign;
427e5c31af7Sopenharmony_ci	}
428e5c31af7Sopenharmony_ci	/* Case: normalized floats -> zero and denormalized halfs */
429e5c31af7Sopenharmony_ci	else if (exp16 <= 0)
430e5c31af7Sopenharmony_ci	{
431e5c31af7Sopenharmony_ci		/* Add the implicit leading 1 in mormalized float to mantissa. */
432e5c31af7Sopenharmony_ci		mantissa |= 0x0010000000000000u;
433e5c31af7Sopenharmony_ci		/* We have a (23 + 1)-bit mantissa, but 16-bit floats only expect 10-bit mantissa.
434e5c31af7Sopenharmony_ci		 * Need to discard the last 14-bits considering rounding mode.
435e5c31af7Sopenharmony_ci		 * We also need to shift right -exp16 bits to encode the underflowed exponent.
436e5c31af7Sopenharmony_ci		 */
437e5c31af7Sopenharmony_ci		if (mode == DE_ROUNDINGMODE_TO_ZERO)
438e5c31af7Sopenharmony_ci		{
439e5c31af7Sopenharmony_ci			mantissa >>= (43 - exp16);
440e5c31af7Sopenharmony_ci		}
441e5c31af7Sopenharmony_ci		else
442e5c31af7Sopenharmony_ci		{
443e5c31af7Sopenharmony_ci			/* mantissa in the above may exceed 10-bits, in which case overflow happens.
444e5c31af7Sopenharmony_ci			 * The overflowed bit is automatically carried to exponent then.
445e5c31af7Sopenharmony_ci			 */
446e5c31af7Sopenharmony_ci			mantissa = roundToNearestEven64(mantissa, 43 - exp16);
447e5c31af7Sopenharmony_ci		}
448e5c31af7Sopenharmony_ci		return (deFloat16) (sign | mantissa);
449e5c31af7Sopenharmony_ci	}
450e5c31af7Sopenharmony_ci	/* Case: normalized floats -> normalized floats */
451e5c31af7Sopenharmony_ci	else if (exp16 <= 30)
452e5c31af7Sopenharmony_ci	{
453e5c31af7Sopenharmony_ci		if (mode == DE_ROUNDINGMODE_TO_ZERO)
454e5c31af7Sopenharmony_ci		{
455e5c31af7Sopenharmony_ci			return (deFloat16) (sign | ((deUint32)exp16 << 10u) | (mantissa >> 42u));
456e5c31af7Sopenharmony_ci		}
457e5c31af7Sopenharmony_ci		else
458e5c31af7Sopenharmony_ci		{
459e5c31af7Sopenharmony_ci			mantissa	= roundToNearestEven64(mantissa, 42);
460e5c31af7Sopenharmony_ci			/* Handle overflow. exp16 may overflow (and become Inf) itself, but that's correct. */
461e5c31af7Sopenharmony_ci			exp16		= (exp16 << 10u) + (deFloat16)(mantissa & (1 << 10));
462e5c31af7Sopenharmony_ci			mantissa	&= (1u << 10) - 1;
463e5c31af7Sopenharmony_ci			return (deFloat16) (sign | ((deUint32) exp16) | mantissa);
464e5c31af7Sopenharmony_ci		}
465e5c31af7Sopenharmony_ci	}
466e5c31af7Sopenharmony_ci	/* Case: normalized floats (too large to be representable as 16-bit floats) */
467e5c31af7Sopenharmony_ci	else
468e5c31af7Sopenharmony_ci	{
469e5c31af7Sopenharmony_ci		/* According to IEEE Std 754-2008 Section 7.4,
470e5c31af7Sopenharmony_ci		 * * roundTiesToEven and roundTiesToAway carry all overflows to Inf with the sign
471e5c31af7Sopenharmony_ci		 *   of the intermediate  result.
472e5c31af7Sopenharmony_ci		 * * roundTowardZero carries all overflows to the format's largest finite number
473e5c31af7Sopenharmony_ci		 *   with the sign of the intermediate result.
474e5c31af7Sopenharmony_ci		 */
475e5c31af7Sopenharmony_ci		if (mode == DE_ROUNDINGMODE_TO_ZERO)
476e5c31af7Sopenharmony_ci		{
477e5c31af7Sopenharmony_ci			return (deFloat16) (sign | 0x7bffu); /* 111 1011 1111 1111 */
478e5c31af7Sopenharmony_ci		}
479e5c31af7Sopenharmony_ci		else
480e5c31af7Sopenharmony_ci		{
481e5c31af7Sopenharmony_ci			return (deFloat16) (sign | (0x1f << 10));
482e5c31af7Sopenharmony_ci		}
483e5c31af7Sopenharmony_ci	}
484e5c31af7Sopenharmony_ci
485e5c31af7Sopenharmony_ci	/* Make compiler happy */
486e5c31af7Sopenharmony_ci	return (deFloat16) 0;
487e5c31af7Sopenharmony_ci}
488e5c31af7Sopenharmony_ci
489e5c31af7Sopenharmony_cifloat deFloat16To32 (deFloat16 val16)
490e5c31af7Sopenharmony_ci{
491e5c31af7Sopenharmony_ci	deUint32 sign;
492e5c31af7Sopenharmony_ci	deUint32 expotent;
493e5c31af7Sopenharmony_ci	deUint32 mantissa;
494e5c31af7Sopenharmony_ci	union
495e5c31af7Sopenharmony_ci	{
496e5c31af7Sopenharmony_ci		float		f;
497e5c31af7Sopenharmony_ci		deUint32	u;
498e5c31af7Sopenharmony_ci	} x;
499e5c31af7Sopenharmony_ci
500e5c31af7Sopenharmony_ci	x.u			= 0u;
501e5c31af7Sopenharmony_ci
502e5c31af7Sopenharmony_ci	sign		= ((deUint32)val16 >> 15u) & 0x00000001u;
503e5c31af7Sopenharmony_ci	expotent	= ((deUint32)val16 >> 10u) & 0x0000001fu;
504e5c31af7Sopenharmony_ci	mantissa	= (deUint32)val16 & 0x000003ffu;
505e5c31af7Sopenharmony_ci
506e5c31af7Sopenharmony_ci	if (expotent == 0u)
507e5c31af7Sopenharmony_ci	{
508e5c31af7Sopenharmony_ci		if (mantissa == 0u)
509e5c31af7Sopenharmony_ci		{
510e5c31af7Sopenharmony_ci			/* +/- 0 */
511e5c31af7Sopenharmony_ci			x.u = sign << 31u;
512e5c31af7Sopenharmony_ci			return x.f;
513e5c31af7Sopenharmony_ci		}
514e5c31af7Sopenharmony_ci		else
515e5c31af7Sopenharmony_ci		{
516e5c31af7Sopenharmony_ci			/* Denormalized, normalize it. */
517e5c31af7Sopenharmony_ci
518e5c31af7Sopenharmony_ci			while (!(mantissa & 0x00000400u))
519e5c31af7Sopenharmony_ci			{
520e5c31af7Sopenharmony_ci				mantissa <<= 1u;
521e5c31af7Sopenharmony_ci				expotent -=  1u;
522e5c31af7Sopenharmony_ci			}
523e5c31af7Sopenharmony_ci
524e5c31af7Sopenharmony_ci			expotent += 1u;
525e5c31af7Sopenharmony_ci			mantissa &= ~0x00000400u;
526e5c31af7Sopenharmony_ci		}
527e5c31af7Sopenharmony_ci	}
528e5c31af7Sopenharmony_ci	else if (expotent == 31u)
529e5c31af7Sopenharmony_ci	{
530e5c31af7Sopenharmony_ci		if (mantissa == 0u)
531e5c31af7Sopenharmony_ci		{
532e5c31af7Sopenharmony_ci			/* +/- InF */
533e5c31af7Sopenharmony_ci			x.u = (sign << 31u) | 0x7f800000u;
534e5c31af7Sopenharmony_ci			return x.f;
535e5c31af7Sopenharmony_ci		}
536e5c31af7Sopenharmony_ci		else
537e5c31af7Sopenharmony_ci		{
538e5c31af7Sopenharmony_ci			/* +/- NaN */
539e5c31af7Sopenharmony_ci			x.u = (sign << 31u) | 0x7f800000u | (mantissa << 13u);
540e5c31af7Sopenharmony_ci			return x.f;
541e5c31af7Sopenharmony_ci		}
542e5c31af7Sopenharmony_ci	}
543e5c31af7Sopenharmony_ci
544e5c31af7Sopenharmony_ci	expotent = expotent + (127u - 15u);
545e5c31af7Sopenharmony_ci	mantissa = mantissa << 13u;
546e5c31af7Sopenharmony_ci
547e5c31af7Sopenharmony_ci	x.u = (sign << 31u) | (expotent << 23u) | mantissa;
548e5c31af7Sopenharmony_ci	return x.f;
549e5c31af7Sopenharmony_ci}
550e5c31af7Sopenharmony_ci
551e5c31af7Sopenharmony_cidouble deFloat16To64 (deFloat16 val16)
552e5c31af7Sopenharmony_ci{
553e5c31af7Sopenharmony_ci	deUint64 sign;
554e5c31af7Sopenharmony_ci	deUint64 expotent;
555e5c31af7Sopenharmony_ci	deUint64 mantissa;
556e5c31af7Sopenharmony_ci	union
557e5c31af7Sopenharmony_ci	{
558e5c31af7Sopenharmony_ci		double		f;
559e5c31af7Sopenharmony_ci		deUint64	u;
560e5c31af7Sopenharmony_ci	} x;
561e5c31af7Sopenharmony_ci
562e5c31af7Sopenharmony_ci	x.u			= 0u;
563e5c31af7Sopenharmony_ci
564e5c31af7Sopenharmony_ci	sign		= ((deUint32)val16 >> 15u) & 0x00000001u;
565e5c31af7Sopenharmony_ci	expotent	= ((deUint32)val16 >> 10u) & 0x0000001fu;
566e5c31af7Sopenharmony_ci	mantissa	= (deUint32)val16 & 0x000003ffu;
567e5c31af7Sopenharmony_ci
568e5c31af7Sopenharmony_ci	if (expotent == 0u)
569e5c31af7Sopenharmony_ci	{
570e5c31af7Sopenharmony_ci		if (mantissa == 0u)
571e5c31af7Sopenharmony_ci		{
572e5c31af7Sopenharmony_ci			/* +/- 0 */
573e5c31af7Sopenharmony_ci			x.u = sign << 63u;
574e5c31af7Sopenharmony_ci			return x.f;
575e5c31af7Sopenharmony_ci		}
576e5c31af7Sopenharmony_ci		else
577e5c31af7Sopenharmony_ci		{
578e5c31af7Sopenharmony_ci			/* Denormalized, normalize it. */
579e5c31af7Sopenharmony_ci
580e5c31af7Sopenharmony_ci			while (!(mantissa & 0x00000400u))
581e5c31af7Sopenharmony_ci			{
582e5c31af7Sopenharmony_ci				mantissa <<= 1u;
583e5c31af7Sopenharmony_ci				expotent -=  1u;
584e5c31af7Sopenharmony_ci			}
585e5c31af7Sopenharmony_ci
586e5c31af7Sopenharmony_ci			expotent += 1u;
587e5c31af7Sopenharmony_ci			mantissa &= ~0x00000400u;
588e5c31af7Sopenharmony_ci		}
589e5c31af7Sopenharmony_ci	}
590e5c31af7Sopenharmony_ci	else if (expotent == 31u)
591e5c31af7Sopenharmony_ci	{
592e5c31af7Sopenharmony_ci		if (mantissa == 0u)
593e5c31af7Sopenharmony_ci		{
594e5c31af7Sopenharmony_ci			/* +/- InF */
595e5c31af7Sopenharmony_ci			x.u = (sign << 63u) | 0x7ff0000000000000u;
596e5c31af7Sopenharmony_ci			return x.f;
597e5c31af7Sopenharmony_ci		}
598e5c31af7Sopenharmony_ci		else
599e5c31af7Sopenharmony_ci		{
600e5c31af7Sopenharmony_ci			/* +/- NaN */
601e5c31af7Sopenharmony_ci			x.u = (sign << 63u) | 0x7ff0000000000000u | (mantissa << 42u);
602e5c31af7Sopenharmony_ci			return x.f;
603e5c31af7Sopenharmony_ci		}
604e5c31af7Sopenharmony_ci	}
605e5c31af7Sopenharmony_ci
606e5c31af7Sopenharmony_ci	expotent = expotent + (1023u - 15u);
607e5c31af7Sopenharmony_ci	mantissa = mantissa << 42u;
608e5c31af7Sopenharmony_ci
609e5c31af7Sopenharmony_ci	x.u = (sign << 63u) | (expotent << 52u) | mantissa;
610e5c31af7Sopenharmony_ci	return x.f;
611e5c31af7Sopenharmony_ci}
612e5c31af7Sopenharmony_ci
613e5c31af7Sopenharmony_ciDE_END_EXTERN_C
614