1/*-------------------------------------------------------------------------
2 * drawElements Base Portability Library
3 * -------------------------------------
4 *
5 * Copyright 2014 The Android Open Source Project
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 *      http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file
21 * \brief 16-bit floating-point math.
22 *//*--------------------------------------------------------------------*/
23
24#include "deFloat16.h"
25
26DE_BEGIN_EXTERN_C
27
28deFloat16 deFloat32To16 (float val32)
29{
30	deUint32	sign;
31	int			expotent;
32	deUint32	mantissa;
33	union
34	{
35		float		f;
36		deUint32	u;
37	} x;
38
39	x.f			= val32;
40	sign		= (x.u >> 16u) & 0x00008000u;
41	expotent	= (int)((x.u >> 23u) & 0x000000ffu) - (127 - 15);
42	mantissa	= x.u & 0x007fffffu;
43
44	if (expotent <= 0)
45	{
46		if (expotent < -10)
47		{
48			/* Rounds to zero. */
49			return (deFloat16) sign;
50		}
51
52		/* Converted to denormalized half, add leading 1 to significand. */
53		mantissa = mantissa | 0x00800000u;
54
55		/* Round mantissa to nearest (10+e) */
56		{
57			deUint32 t = 14u - expotent;
58			deUint32 a = (1u << (t - 1u)) - 1u;
59			deUint32 b = (mantissa >> t) & 1u;
60
61			mantissa = (mantissa + a + b) >> t;
62		}
63
64		return (deFloat16) (sign | mantissa);
65	}
66	else if (expotent == 0xff - (127 - 15))
67	{
68		if (mantissa == 0u)
69		{
70			/* InF */
71			return (deFloat16) (sign | 0x7c00u);
72		}
73		else
74		{
75			/* NaN */
76			mantissa >>= 13u;
77			return (deFloat16) (sign | 0x7c00u | mantissa | (mantissa == 0u));
78		}
79	}
80	else
81	{
82		/* Normalized float. */
83		mantissa = mantissa + 0x00000fffu + ((mantissa >> 13u) & 1u);
84
85		if (mantissa & 0x00800000u)
86		{
87			/* Overflow in mantissa. */
88			mantissa  = 0u;
89			expotent += 1;
90		}
91
92		if (expotent > 30)
93		{
94			/* \todo [pyry] Cause hw fp overflow */
95			return (deFloat16) (sign | 0x7c00u);
96		}
97
98		return (deFloat16) (sign | ((deUint32)expotent << 10u) | (mantissa >> 13u));
99	}
100}
101
102deFloat16 deFloat64To16 (double val64)
103{
104	deUint64	sign;
105	long		expotent;
106	deUint64	mantissa;
107	union
108	{
109		double		f;
110		deUint64	u;
111	} x;
112
113	x.f			= val64;
114	sign		= (x.u >> 48u) & 0x00008000u;
115	expotent	= (long int)((x.u >> 52u) & 0x000007ffu) - (1023 - 15);
116	mantissa	= x.u & 0x00fffffffffffffu;
117
118	if (expotent <= 0)
119	{
120		if (expotent < -10)
121		{
122			/* Rounds to zero. */
123			return (deFloat16) sign;
124		}
125
126		/* Converted to denormalized half, add leading 1 to significand. */
127		mantissa = mantissa | 0x0010000000000000u;
128
129		/* Round mantissa to nearest (10+e) */
130		{
131			deUint64 t = 43u - expotent;
132			deUint64 a = (1u << (t - 1u)) - 1u;
133			deUint64 b = (mantissa >> t) & 1u;
134
135			mantissa = (mantissa + a + b) >> t;
136		}
137
138		return (deFloat16) (sign | mantissa);
139	}
140	else if (expotent == 0x7ff - (1023 - 15))
141	{
142		if (mantissa == 0u)
143		{
144			/* InF */
145			return (deFloat16) (sign | 0x7c00u);
146		}
147		else
148		{
149			/* NaN */
150			mantissa >>= 42u;
151			return (deFloat16) (sign | 0x7c00u | mantissa | (mantissa == 0u));
152		}
153	}
154	else
155	{
156		/* Normalized float. */
157		mantissa = mantissa + 0x000001ffffffffffu + ((mantissa >> 42u) & 1u);
158
159		if (mantissa & 0x010000000000000u)
160		{
161			/* Overflow in mantissa. */
162			mantissa  = 0u;
163			expotent += 1;
164		}
165
166		if (expotent > 30)
167		{
168			return (deFloat16) (sign | 0x7c00u);
169		}
170
171		return (deFloat16) (sign | ((deUint32)expotent << 10u) | (mantissa >> 42u));
172	}
173}
174
175/*--------------------------------------------------------------------*//*!
176 * \brief Round the given number `val` to nearest even by discarding
177 *        the last `numBitsToDiscard` bits.
178 * \param val value to round
179 * \param numBitsToDiscard number of (least significant) bits to discard
180 * \return The rounded value with the last `numBitsToDiscard` removed
181 *//*--------------------------------------------------------------------*/
182static deUint32 roundToNearestEven (deUint32 val, const deUint32 numBitsToDiscard)
183{
184	const deUint32	lastBits	= val & ((1 << numBitsToDiscard) - 1);
185	const deUint32	headBit		= val & (1 << (numBitsToDiscard - 1));
186
187	DE_ASSERT(numBitsToDiscard > 0 && numBitsToDiscard < 32);	/* Make sure no overflow. */
188	val >>= numBitsToDiscard;
189
190	if (headBit == 0)
191	{
192		return val;
193	}
194	else if (headBit == lastBits)
195	{
196		if ((val & 0x1) == 0x1)
197		{
198			return val + 1;
199		}
200		else
201		{
202			return val;
203		}
204	}
205	else
206	{
207		return val + 1;
208	}
209}
210
211deFloat16 deFloat32To16Round (float val32, deRoundingMode mode)
212{
213	union
214	{
215		float		f;		/* Interpret as 32-bit float */
216		deUint32	u;		/* Interpret as 32-bit unsigned integer */
217	} x;
218	deUint32	sign;		/* sign : 0000 0000 0000 0000 X000 0000 0000 0000 */
219	deUint32	exp32;		/* exp32: biased exponent for 32-bit floats */
220	int			exp16;		/* exp16: biased exponent for 16-bit floats */
221	deUint32	mantissa;
222
223	/* We only support these two rounding modes for now */
224	DE_ASSERT(mode == DE_ROUNDINGMODE_TO_ZERO || mode == DE_ROUNDINGMODE_TO_NEAREST_EVEN);
225
226	x.f			= val32;
227	sign		= (x.u >> 16u) & 0x00008000u;
228	exp32		= (x.u >> 23u) & 0x000000ffu;
229	exp16		= (int) (exp32) - 127 + 15;	/* 15/127: exponent bias for 16-bit/32-bit floats */
230	mantissa	= x.u & 0x007fffffu;
231
232	/* Case: zero and denormalized floats */
233	if (exp32 == 0)
234	{
235		/* Denormalized floats are < 2^(1-127), not representable in 16-bit floats, rounding to zero. */
236		return (deFloat16) sign;
237	}
238	/* Case: Inf and NaN */
239	else if (exp32 == 0x000000ffu)
240	{
241		if (mantissa == 0u)
242		{
243			/* Inf */
244			return (deFloat16) (sign | 0x7c00u);
245		}
246		else
247		{
248			/* NaN */
249			mantissa >>= 13u;	/* 16-bit floats has 10-bit for mantissa, 13-bit less than 32-bit floats. */
250			/* Make sure we don't turn NaN into zero by | (mantissa == 0). */
251			return (deFloat16) (sign | 0x7c00u | mantissa | (mantissa == 0u));
252		}
253	}
254	/* The following are cases for normalized floats.
255	 *
256	 * * If exp16 is less than 0, we are experiencing underflow for the exponent. To encode this underflowed exponent,
257	 *   we can only shift the mantissa further right.
258	 *   The real exponent is exp16 - 15. A denormalized 16-bit float can represent -14 via its exponent.
259	 *   Note that the most significant bit in the mantissa of a denormalized float is already -1 as for exponent.
260	 *   So, we just need to right shift the mantissa -exp16 bits.
261	 * * If exp16 is 0, mantissa shifting requirement is similar to the above.
262	 * * If exp16 is greater than 30 (0b11110), we are experiencing overflow for the exponent of 16-bit normalized floats.
263	 */
264	/* Case: normalized floats -> zero */
265	else if (exp16 < -10)
266	{
267		/* 16-bit floats have only 10 bits for mantissa. Minimal 16-bit denormalized float is (2^-10) * (2^-14). */
268		/* Expecting a number < (2^-10) * (2^-14) here, not representable, round to zero. */
269		return (deFloat16) sign;
270	}
271	/* Case: normalized floats -> zero and denormalized halfs */
272	else if (exp16 <= 0)
273	{
274		/* Add the implicit leading 1 in mormalized float to mantissa. */
275		mantissa |= 0x00800000u;
276		/* We have a (23 + 1)-bit mantissa, but 16-bit floats only expect 10-bit mantissa.
277		 * Need to discard the last 14-bits considering rounding mode.
278		 * We also need to shift right -exp16 bits to encode the underflowed exponent.
279		 */
280		if (mode == DE_ROUNDINGMODE_TO_ZERO)
281		{
282			mantissa >>= (14 - exp16);
283		}
284		else
285		{
286			/* mantissa in the above may exceed 10-bits, in which case overflow happens.
287			 * The overflowed bit is automatically carried to exponent then.
288			 */
289			mantissa = roundToNearestEven(mantissa, 14 - exp16);
290		}
291		return (deFloat16) (sign | mantissa);
292	}
293	/* Case: normalized floats -> normalized floats */
294	else if (exp16 <= 30)
295	{
296		if (mode == DE_ROUNDINGMODE_TO_ZERO)
297		{
298			return (deFloat16) (sign | ((deUint32)exp16 << 10u) | (mantissa >> 13u));
299		}
300		else
301		{
302			mantissa	= roundToNearestEven(mantissa, 13);
303			/* Handle overflow. exp16 may overflow (and become Inf) itself, but that's correct. */
304			exp16		= (exp16 << 10u) + (mantissa & (1 << 10));
305			mantissa	&= (1u << 10) - 1;
306			return (deFloat16) (sign | ((deUint32) exp16) | mantissa);
307		}
308	}
309	/* Case: normalized floats (too large to be representable as 16-bit floats) */
310	else
311	{
312		/* According to IEEE Std 754-2008 Section 7.4,
313		 * * roundTiesToEven and roundTiesToAway carry all overflows to Inf with the sign
314		 *   of the intermediate  result.
315		 * * roundTowardZero carries all overflows to the format's largest finite number
316		 *   with the sign of the intermediate result.
317		 */
318		if (mode == DE_ROUNDINGMODE_TO_ZERO)
319		{
320			return (deFloat16) (sign | 0x7bffu); /* 111 1011 1111 1111 */
321		}
322		else
323		{
324			return (deFloat16) (sign | (0x1f << 10));
325		}
326	}
327
328	/* Make compiler happy */
329	return (deFloat16) 0;
330}
331
332/*--------------------------------------------------------------------*//*!
333 * \brief Round the given number `val` to nearest even by discarding
334 *        the last `numBitsToDiscard` bits.
335 * \param val value to round
336 * \param numBitsToDiscard number of (least significant) bits to discard
337 * \return The rounded value with the last `numBitsToDiscard` removed
338 *//*--------------------------------------------------------------------*/
339static deUint64 roundToNearestEven64 (deUint64 val, const deUint64 numBitsToDiscard)
340{
341	const deUint64	lastBits	= val & (((deUint64)1 << numBitsToDiscard) - 1);
342	const deUint64	headBit		= val & ((deUint64)1 << (numBitsToDiscard - 1));
343
344	DE_ASSERT(numBitsToDiscard > 0 && numBitsToDiscard < 64);	/* Make sure no overflow. */
345	val >>= numBitsToDiscard;
346
347	if (headBit == 0)
348	{
349		return val;
350	}
351	else if (headBit == lastBits)
352	{
353		if ((val & 0x1) == 0x1)
354		{
355			return val + 1;
356		}
357		else
358		{
359			return val;
360		}
361	}
362	else
363	{
364		return val + 1;
365	}
366}
367
368deFloat16 deFloat64To16Round (double val64, deRoundingMode mode)
369{
370	union
371	{
372		double		f;		/* Interpret as 64-bit float */
373		deUint64	u;		/* Interpret as 64-bit unsigned integer */
374	} x;
375	deUint64	sign;		/* sign : 0000 0000 0000 0000 X000 0000 0000 0000 */
376	deUint64	exp64;		/* exp32: biased exponent for 64-bit floats */
377	int			exp16;		/* exp16: biased exponent for 16-bit floats */
378	deUint64	mantissa;
379
380	/* We only support these two rounding modes for now */
381	DE_ASSERT(mode == DE_ROUNDINGMODE_TO_ZERO || mode == DE_ROUNDINGMODE_TO_NEAREST_EVEN);
382
383	x.f			= val64;
384	sign		= (x.u >> 48u) & 0x00008000u;
385	exp64		= (x.u >> 52u) & 0x000007ffu;
386	exp16		= (int) (exp64) - 1023 + 15;	/* 15/127: exponent bias for 16-bit/32-bit floats */
387	mantissa	= x.u & 0x00fffffffffffffu;
388
389	/* Case: zero and denormalized floats */
390	if (exp64 == 0)
391	{
392		/* Denormalized floats are < 2^(1-1023), not representable in 16-bit floats, rounding to zero. */
393		return (deFloat16) sign;
394	}
395	/* Case: Inf and NaN */
396	else if (exp64 == 0x000007ffu)
397	{
398		if (mantissa == 0u)
399		{
400			/* Inf */
401			return (deFloat16) (sign | 0x7c00u);
402		}
403		else
404		{
405			/* NaN */
406			mantissa >>= 42u;	/* 16-bit floats has 10-bit for mantissa, 42-bit less than 64-bit floats. */
407			/* Make sure we don't turn NaN into zero by | (mantissa == 0). */
408			return (deFloat16) (sign | 0x7c00u | mantissa | (mantissa == 0u));
409		}
410	}
411	/* The following are cases for normalized floats.
412	 *
413	 * * If exp16 is less than 0, we are experiencing underflow for the exponent. To encode this underflowed exponent,
414	 *   we can only shift the mantissa further right.
415	 *   The real exponent is exp16 - 15. A denormalized 16-bit float can represent -14 via its exponent.
416	 *   Note that the most significant bit in the mantissa of a denormalized float is already -1 as for exponent.
417	 *   So, we just need to right shift the mantissa -exp16 bits.
418	 * * If exp16 is 0, mantissa shifting requirement is similar to the above.
419	 * * If exp16 is greater than 30 (0b11110), we are experiencing overflow for the exponent of 16-bit normalized floats.
420	 */
421	/* Case: normalized floats -> zero */
422	else if (exp16 < -10)
423	{
424		/* 16-bit floats have only 10 bits for mantissa. Minimal 16-bit denormalized float is (2^-10) * (2^-14). */
425		/* Expecting a number < (2^-10) * (2^-14) here, not representable, round to zero. */
426		return (deFloat16) sign;
427	}
428	/* Case: normalized floats -> zero and denormalized halfs */
429	else if (exp16 <= 0)
430	{
431		/* Add the implicit leading 1 in mormalized float to mantissa. */
432		mantissa |= 0x0010000000000000u;
433		/* We have a (23 + 1)-bit mantissa, but 16-bit floats only expect 10-bit mantissa.
434		 * Need to discard the last 14-bits considering rounding mode.
435		 * We also need to shift right -exp16 bits to encode the underflowed exponent.
436		 */
437		if (mode == DE_ROUNDINGMODE_TO_ZERO)
438		{
439			mantissa >>= (43 - exp16);
440		}
441		else
442		{
443			/* mantissa in the above may exceed 10-bits, in which case overflow happens.
444			 * The overflowed bit is automatically carried to exponent then.
445			 */
446			mantissa = roundToNearestEven64(mantissa, 43 - exp16);
447		}
448		return (deFloat16) (sign | mantissa);
449	}
450	/* Case: normalized floats -> normalized floats */
451	else if (exp16 <= 30)
452	{
453		if (mode == DE_ROUNDINGMODE_TO_ZERO)
454		{
455			return (deFloat16) (sign | ((deUint32)exp16 << 10u) | (mantissa >> 42u));
456		}
457		else
458		{
459			mantissa	= roundToNearestEven64(mantissa, 42);
460			/* Handle overflow. exp16 may overflow (and become Inf) itself, but that's correct. */
461			exp16		= (exp16 << 10u) + (deFloat16)(mantissa & (1 << 10));
462			mantissa	&= (1u << 10) - 1;
463			return (deFloat16) (sign | ((deUint32) exp16) | mantissa);
464		}
465	}
466	/* Case: normalized floats (too large to be representable as 16-bit floats) */
467	else
468	{
469		/* According to IEEE Std 754-2008 Section 7.4,
470		 * * roundTiesToEven and roundTiesToAway carry all overflows to Inf with the sign
471		 *   of the intermediate  result.
472		 * * roundTowardZero carries all overflows to the format's largest finite number
473		 *   with the sign of the intermediate result.
474		 */
475		if (mode == DE_ROUNDINGMODE_TO_ZERO)
476		{
477			return (deFloat16) (sign | 0x7bffu); /* 111 1011 1111 1111 */
478		}
479		else
480		{
481			return (deFloat16) (sign | (0x1f << 10));
482		}
483	}
484
485	/* Make compiler happy */
486	return (deFloat16) 0;
487}
488
489float deFloat16To32 (deFloat16 val16)
490{
491	deUint32 sign;
492	deUint32 expotent;
493	deUint32 mantissa;
494	union
495	{
496		float		f;
497		deUint32	u;
498	} x;
499
500	x.u			= 0u;
501
502	sign		= ((deUint32)val16 >> 15u) & 0x00000001u;
503	expotent	= ((deUint32)val16 >> 10u) & 0x0000001fu;
504	mantissa	= (deUint32)val16 & 0x000003ffu;
505
506	if (expotent == 0u)
507	{
508		if (mantissa == 0u)
509		{
510			/* +/- 0 */
511			x.u = sign << 31u;
512			return x.f;
513		}
514		else
515		{
516			/* Denormalized, normalize it. */
517
518			while (!(mantissa & 0x00000400u))
519			{
520				mantissa <<= 1u;
521				expotent -=  1u;
522			}
523
524			expotent += 1u;
525			mantissa &= ~0x00000400u;
526		}
527	}
528	else if (expotent == 31u)
529	{
530		if (mantissa == 0u)
531		{
532			/* +/- InF */
533			x.u = (sign << 31u) | 0x7f800000u;
534			return x.f;
535		}
536		else
537		{
538			/* +/- NaN */
539			x.u = (sign << 31u) | 0x7f800000u | (mantissa << 13u);
540			return x.f;
541		}
542	}
543
544	expotent = expotent + (127u - 15u);
545	mantissa = mantissa << 13u;
546
547	x.u = (sign << 31u) | (expotent << 23u) | mantissa;
548	return x.f;
549}
550
551double deFloat16To64 (deFloat16 val16)
552{
553	deUint64 sign;
554	deUint64 expotent;
555	deUint64 mantissa;
556	union
557	{
558		double		f;
559		deUint64	u;
560	} x;
561
562	x.u			= 0u;
563
564	sign		= ((deUint32)val16 >> 15u) & 0x00000001u;
565	expotent	= ((deUint32)val16 >> 10u) & 0x0000001fu;
566	mantissa	= (deUint32)val16 & 0x000003ffu;
567
568	if (expotent == 0u)
569	{
570		if (mantissa == 0u)
571		{
572			/* +/- 0 */
573			x.u = sign << 63u;
574			return x.f;
575		}
576		else
577		{
578			/* Denormalized, normalize it. */
579
580			while (!(mantissa & 0x00000400u))
581			{
582				mantissa <<= 1u;
583				expotent -=  1u;
584			}
585
586			expotent += 1u;
587			mantissa &= ~0x00000400u;
588		}
589	}
590	else if (expotent == 31u)
591	{
592		if (mantissa == 0u)
593		{
594			/* +/- InF */
595			x.u = (sign << 63u) | 0x7ff0000000000000u;
596			return x.f;
597		}
598		else
599		{
600			/* +/- NaN */
601			x.u = (sign << 63u) | 0x7ff0000000000000u | (mantissa << 42u);
602			return x.f;
603		}
604	}
605
606	expotent = expotent + (1023u - 15u);
607	mantissa = mantissa << 42u;
608
609	x.u = (sign << 63u) | (expotent << 52u) | mantissa;
610	return x.f;
611}
612
613DE_END_EXTERN_C
614