180d59932Sopenharmony_ci/******************************************************************************* 280d59932Sopenharmony_ci * Copyright (c) 2019-2020 The Khronos Group Inc. 380d59932Sopenharmony_ci * 480d59932Sopenharmony_ci * Licensed under the Apache License, Version 2.0 (the "License"); 580d59932Sopenharmony_ci * you may not use this file except in compliance with the License. 680d59932Sopenharmony_ci * You may obtain a copy of the License at 780d59932Sopenharmony_ci * 880d59932Sopenharmony_ci * http://www.apache.org/licenses/LICENSE-2.0 980d59932Sopenharmony_ci * 1080d59932Sopenharmony_ci * Unless required by applicable law or agreed to in writing, software 1180d59932Sopenharmony_ci * distributed under the License is distributed on an "AS IS" BASIS, 1280d59932Sopenharmony_ci * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1380d59932Sopenharmony_ci * See the License for the specific language governing permissions and 1480d59932Sopenharmony_ci * limitations under the License. 1580d59932Sopenharmony_ci ******************************************************************************/ 1680d59932Sopenharmony_ci 1780d59932Sopenharmony_ci/** 1880d59932Sopenharmony_ci * This is a header-only utility library that provides OpenCL host code with 1980d59932Sopenharmony_ci * routines for converting to/from cl_half values. 2080d59932Sopenharmony_ci * 2180d59932Sopenharmony_ci * Example usage: 2280d59932Sopenharmony_ci * 2380d59932Sopenharmony_ci * #include <CL/cl_half.h> 2480d59932Sopenharmony_ci * ... 2580d59932Sopenharmony_ci * cl_half h = cl_half_from_float(0.5f, CL_HALF_RTE); 2680d59932Sopenharmony_ci * cl_float f = cl_half_to_float(h); 2780d59932Sopenharmony_ci */ 2880d59932Sopenharmony_ci 2980d59932Sopenharmony_ci#ifndef OPENCL_CL_HALF_H 3080d59932Sopenharmony_ci#define OPENCL_CL_HALF_H 3180d59932Sopenharmony_ci 3280d59932Sopenharmony_ci#include <CL/cl_platform.h> 3380d59932Sopenharmony_ci 3480d59932Sopenharmony_ci#include <stdint.h> 3580d59932Sopenharmony_ci 3680d59932Sopenharmony_ci#ifdef __cplusplus 3780d59932Sopenharmony_ciextern "C" { 3880d59932Sopenharmony_ci#endif 3980d59932Sopenharmony_ci 4080d59932Sopenharmony_ci 4180d59932Sopenharmony_ci/** 4280d59932Sopenharmony_ci * Rounding mode used when converting to cl_half. 4380d59932Sopenharmony_ci */ 4480d59932Sopenharmony_citypedef enum 4580d59932Sopenharmony_ci{ 4680d59932Sopenharmony_ci CL_HALF_RTE, // round to nearest even 4780d59932Sopenharmony_ci CL_HALF_RTZ, // round towards zero 4880d59932Sopenharmony_ci CL_HALF_RTP, // round towards positive infinity 4980d59932Sopenharmony_ci CL_HALF_RTN, // round towards negative infinity 5080d59932Sopenharmony_ci} cl_half_rounding_mode; 5180d59932Sopenharmony_ci 5280d59932Sopenharmony_ci 5380d59932Sopenharmony_ci/* Private utility macros. */ 5480d59932Sopenharmony_ci#define CL_HALF_EXP_MASK 0x7C00 5580d59932Sopenharmony_ci#define CL_HALF_MAX_FINITE_MAG 0x7BFF 5680d59932Sopenharmony_ci 5780d59932Sopenharmony_ci 5880d59932Sopenharmony_ci/* 5980d59932Sopenharmony_ci * Utility to deal with values that overflow when converting to half precision. 6080d59932Sopenharmony_ci */ 6180d59932Sopenharmony_cistatic inline cl_half cl_half_handle_overflow(cl_half_rounding_mode rounding_mode, 6280d59932Sopenharmony_ci uint16_t sign) 6380d59932Sopenharmony_ci{ 6480d59932Sopenharmony_ci if (rounding_mode == CL_HALF_RTZ) 6580d59932Sopenharmony_ci { 6680d59932Sopenharmony_ci // Round overflow towards zero -> largest finite number (preserving sign) 6780d59932Sopenharmony_ci return (sign << 15) | CL_HALF_MAX_FINITE_MAG; 6880d59932Sopenharmony_ci } 6980d59932Sopenharmony_ci else if (rounding_mode == CL_HALF_RTP && sign) 7080d59932Sopenharmony_ci { 7180d59932Sopenharmony_ci // Round negative overflow towards positive infinity -> most negative finite number 7280d59932Sopenharmony_ci return (1 << 15) | CL_HALF_MAX_FINITE_MAG; 7380d59932Sopenharmony_ci } 7480d59932Sopenharmony_ci else if (rounding_mode == CL_HALF_RTN && !sign) 7580d59932Sopenharmony_ci { 7680d59932Sopenharmony_ci // Round positive overflow towards negative infinity -> largest finite number 7780d59932Sopenharmony_ci return CL_HALF_MAX_FINITE_MAG; 7880d59932Sopenharmony_ci } 7980d59932Sopenharmony_ci 8080d59932Sopenharmony_ci // Overflow to infinity 8180d59932Sopenharmony_ci return (sign << 15) | CL_HALF_EXP_MASK; 8280d59932Sopenharmony_ci} 8380d59932Sopenharmony_ci 8480d59932Sopenharmony_ci/* 8580d59932Sopenharmony_ci * Utility to deal with values that underflow when converting to half precision. 8680d59932Sopenharmony_ci */ 8780d59932Sopenharmony_cistatic inline cl_half cl_half_handle_underflow(cl_half_rounding_mode rounding_mode, 8880d59932Sopenharmony_ci uint16_t sign) 8980d59932Sopenharmony_ci{ 9080d59932Sopenharmony_ci if (rounding_mode == CL_HALF_RTP && !sign) 9180d59932Sopenharmony_ci { 9280d59932Sopenharmony_ci // Round underflow towards positive infinity -> smallest positive value 9380d59932Sopenharmony_ci return (sign << 15) | 1; 9480d59932Sopenharmony_ci } 9580d59932Sopenharmony_ci else if (rounding_mode == CL_HALF_RTN && sign) 9680d59932Sopenharmony_ci { 9780d59932Sopenharmony_ci // Round underflow towards negative infinity -> largest negative value 9880d59932Sopenharmony_ci return (sign << 15) | 1; 9980d59932Sopenharmony_ci } 10080d59932Sopenharmony_ci 10180d59932Sopenharmony_ci // Flush to zero 10280d59932Sopenharmony_ci return (sign << 15); 10380d59932Sopenharmony_ci} 10480d59932Sopenharmony_ci 10580d59932Sopenharmony_ci 10680d59932Sopenharmony_ci/** 10780d59932Sopenharmony_ci * Convert a cl_float to a cl_half. 10880d59932Sopenharmony_ci */ 10980d59932Sopenharmony_cistatic inline cl_half cl_half_from_float(cl_float f, cl_half_rounding_mode rounding_mode) 11080d59932Sopenharmony_ci{ 11180d59932Sopenharmony_ci // Type-punning to get direct access to underlying bits 11280d59932Sopenharmony_ci union 11380d59932Sopenharmony_ci { 11480d59932Sopenharmony_ci cl_float f; 11580d59932Sopenharmony_ci uint32_t i; 11680d59932Sopenharmony_ci } f32; 11780d59932Sopenharmony_ci f32.f = f; 11880d59932Sopenharmony_ci 11980d59932Sopenharmony_ci // Extract sign bit 12080d59932Sopenharmony_ci uint16_t sign = f32.i >> 31; 12180d59932Sopenharmony_ci 12280d59932Sopenharmony_ci // Extract FP32 exponent and mantissa 12380d59932Sopenharmony_ci uint32_t f_exp = (f32.i >> (CL_FLT_MANT_DIG - 1)) & 0xFF; 12480d59932Sopenharmony_ci uint32_t f_mant = f32.i & ((1 << (CL_FLT_MANT_DIG - 1)) - 1); 12580d59932Sopenharmony_ci 12680d59932Sopenharmony_ci // Remove FP32 exponent bias 12780d59932Sopenharmony_ci int32_t exp = f_exp - CL_FLT_MAX_EXP + 1; 12880d59932Sopenharmony_ci 12980d59932Sopenharmony_ci // Add FP16 exponent bias 13080d59932Sopenharmony_ci uint16_t h_exp = (uint16_t)(exp + CL_HALF_MAX_EXP - 1); 13180d59932Sopenharmony_ci 13280d59932Sopenharmony_ci // Position of the bit that will become the FP16 mantissa LSB 13380d59932Sopenharmony_ci uint32_t lsb_pos = CL_FLT_MANT_DIG - CL_HALF_MANT_DIG; 13480d59932Sopenharmony_ci 13580d59932Sopenharmony_ci // Check for NaN / infinity 13680d59932Sopenharmony_ci if (f_exp == 0xFF) 13780d59932Sopenharmony_ci { 13880d59932Sopenharmony_ci if (f_mant) 13980d59932Sopenharmony_ci { 14080d59932Sopenharmony_ci // NaN -> propagate mantissa and silence it 14180d59932Sopenharmony_ci uint16_t h_mant = (uint16_t)(f_mant >> lsb_pos); 14280d59932Sopenharmony_ci h_mant |= 0x200; 14380d59932Sopenharmony_ci return (sign << 15) | CL_HALF_EXP_MASK | h_mant; 14480d59932Sopenharmony_ci } 14580d59932Sopenharmony_ci else 14680d59932Sopenharmony_ci { 14780d59932Sopenharmony_ci // Infinity -> zero mantissa 14880d59932Sopenharmony_ci return (sign << 15) | CL_HALF_EXP_MASK; 14980d59932Sopenharmony_ci } 15080d59932Sopenharmony_ci } 15180d59932Sopenharmony_ci 15280d59932Sopenharmony_ci // Check for zero 15380d59932Sopenharmony_ci if (!f_exp && !f_mant) 15480d59932Sopenharmony_ci { 15580d59932Sopenharmony_ci return (sign << 15); 15680d59932Sopenharmony_ci } 15780d59932Sopenharmony_ci 15880d59932Sopenharmony_ci // Check for overflow 15980d59932Sopenharmony_ci if (exp >= CL_HALF_MAX_EXP) 16080d59932Sopenharmony_ci { 16180d59932Sopenharmony_ci return cl_half_handle_overflow(rounding_mode, sign); 16280d59932Sopenharmony_ci } 16380d59932Sopenharmony_ci 16480d59932Sopenharmony_ci // Check for underflow 16580d59932Sopenharmony_ci if (exp < (CL_HALF_MIN_EXP - CL_HALF_MANT_DIG - 1)) 16680d59932Sopenharmony_ci { 16780d59932Sopenharmony_ci return cl_half_handle_underflow(rounding_mode, sign); 16880d59932Sopenharmony_ci } 16980d59932Sopenharmony_ci 17080d59932Sopenharmony_ci // Check for value that will become denormal 17180d59932Sopenharmony_ci if (exp < -14) 17280d59932Sopenharmony_ci { 17380d59932Sopenharmony_ci // Denormal -> include the implicit 1 from the FP32 mantissa 17480d59932Sopenharmony_ci h_exp = 0; 17580d59932Sopenharmony_ci f_mant |= 1 << (CL_FLT_MANT_DIG - 1); 17680d59932Sopenharmony_ci 17780d59932Sopenharmony_ci // Mantissa shift amount depends on exponent 17880d59932Sopenharmony_ci lsb_pos = -exp + (CL_FLT_MANT_DIG - 25); 17980d59932Sopenharmony_ci } 18080d59932Sopenharmony_ci 18180d59932Sopenharmony_ci // Generate FP16 mantissa by shifting FP32 mantissa 18280d59932Sopenharmony_ci uint16_t h_mant = (uint16_t)(f_mant >> lsb_pos); 18380d59932Sopenharmony_ci 18480d59932Sopenharmony_ci // Check whether we need to round 18580d59932Sopenharmony_ci uint32_t halfway = 1 << (lsb_pos - 1); 18680d59932Sopenharmony_ci uint32_t mask = (halfway << 1) - 1; 18780d59932Sopenharmony_ci switch (rounding_mode) 18880d59932Sopenharmony_ci { 18980d59932Sopenharmony_ci case CL_HALF_RTE: 19080d59932Sopenharmony_ci if ((f_mant & mask) > halfway) 19180d59932Sopenharmony_ci { 19280d59932Sopenharmony_ci // More than halfway -> round up 19380d59932Sopenharmony_ci h_mant += 1; 19480d59932Sopenharmony_ci } 19580d59932Sopenharmony_ci else if ((f_mant & mask) == halfway) 19680d59932Sopenharmony_ci { 19780d59932Sopenharmony_ci // Exactly halfway -> round to nearest even 19880d59932Sopenharmony_ci if (h_mant & 0x1) 19980d59932Sopenharmony_ci h_mant += 1; 20080d59932Sopenharmony_ci } 20180d59932Sopenharmony_ci break; 20280d59932Sopenharmony_ci case CL_HALF_RTZ: 20380d59932Sopenharmony_ci // Mantissa has already been truncated -> do nothing 20480d59932Sopenharmony_ci break; 20580d59932Sopenharmony_ci case CL_HALF_RTP: 20680d59932Sopenharmony_ci if ((f_mant & mask) && !sign) 20780d59932Sopenharmony_ci { 20880d59932Sopenharmony_ci // Round positive numbers up 20980d59932Sopenharmony_ci h_mant += 1; 21080d59932Sopenharmony_ci } 21180d59932Sopenharmony_ci break; 21280d59932Sopenharmony_ci case CL_HALF_RTN: 21380d59932Sopenharmony_ci if ((f_mant & mask) && sign) 21480d59932Sopenharmony_ci { 21580d59932Sopenharmony_ci // Round negative numbers down 21680d59932Sopenharmony_ci h_mant += 1; 21780d59932Sopenharmony_ci } 21880d59932Sopenharmony_ci break; 21980d59932Sopenharmony_ci } 22080d59932Sopenharmony_ci 22180d59932Sopenharmony_ci // Check for mantissa overflow 22280d59932Sopenharmony_ci if (h_mant & 0x400) 22380d59932Sopenharmony_ci { 22480d59932Sopenharmony_ci h_exp += 1; 22580d59932Sopenharmony_ci h_mant = 0; 22680d59932Sopenharmony_ci } 22780d59932Sopenharmony_ci 22880d59932Sopenharmony_ci return (sign << 15) | (h_exp << 10) | h_mant; 22980d59932Sopenharmony_ci} 23080d59932Sopenharmony_ci 23180d59932Sopenharmony_ci 23280d59932Sopenharmony_ci/** 23380d59932Sopenharmony_ci * Convert a cl_double to a cl_half. 23480d59932Sopenharmony_ci */ 23580d59932Sopenharmony_cistatic inline cl_half cl_half_from_double(cl_double d, cl_half_rounding_mode rounding_mode) 23680d59932Sopenharmony_ci{ 23780d59932Sopenharmony_ci // Type-punning to get direct access to underlying bits 23880d59932Sopenharmony_ci union 23980d59932Sopenharmony_ci { 24080d59932Sopenharmony_ci cl_double d; 24180d59932Sopenharmony_ci uint64_t i; 24280d59932Sopenharmony_ci } f64; 24380d59932Sopenharmony_ci f64.d = d; 24480d59932Sopenharmony_ci 24580d59932Sopenharmony_ci // Extract sign bit 24680d59932Sopenharmony_ci uint16_t sign = f64.i >> 63; 24780d59932Sopenharmony_ci 24880d59932Sopenharmony_ci // Extract FP64 exponent and mantissa 24980d59932Sopenharmony_ci uint64_t d_exp = (f64.i >> (CL_DBL_MANT_DIG - 1)) & 0x7FF; 25080d59932Sopenharmony_ci uint64_t d_mant = f64.i & (((uint64_t)1 << (CL_DBL_MANT_DIG - 1)) - 1); 25180d59932Sopenharmony_ci 25280d59932Sopenharmony_ci // Remove FP64 exponent bias 25380d59932Sopenharmony_ci int64_t exp = d_exp - CL_DBL_MAX_EXP + 1; 25480d59932Sopenharmony_ci 25580d59932Sopenharmony_ci // Add FP16 exponent bias 25680d59932Sopenharmony_ci uint16_t h_exp = (uint16_t)(exp + CL_HALF_MAX_EXP - 1); 25780d59932Sopenharmony_ci 25880d59932Sopenharmony_ci // Position of the bit that will become the FP16 mantissa LSB 25980d59932Sopenharmony_ci uint32_t lsb_pos = CL_DBL_MANT_DIG - CL_HALF_MANT_DIG; 26080d59932Sopenharmony_ci 26180d59932Sopenharmony_ci // Check for NaN / infinity 26280d59932Sopenharmony_ci if (d_exp == 0x7FF) 26380d59932Sopenharmony_ci { 26480d59932Sopenharmony_ci if (d_mant) 26580d59932Sopenharmony_ci { 26680d59932Sopenharmony_ci // NaN -> propagate mantissa and silence it 26780d59932Sopenharmony_ci uint16_t h_mant = (uint16_t)(d_mant >> lsb_pos); 26880d59932Sopenharmony_ci h_mant |= 0x200; 26980d59932Sopenharmony_ci return (sign << 15) | CL_HALF_EXP_MASK | h_mant; 27080d59932Sopenharmony_ci } 27180d59932Sopenharmony_ci else 27280d59932Sopenharmony_ci { 27380d59932Sopenharmony_ci // Infinity -> zero mantissa 27480d59932Sopenharmony_ci return (sign << 15) | CL_HALF_EXP_MASK; 27580d59932Sopenharmony_ci } 27680d59932Sopenharmony_ci } 27780d59932Sopenharmony_ci 27880d59932Sopenharmony_ci // Check for zero 27980d59932Sopenharmony_ci if (!d_exp && !d_mant) 28080d59932Sopenharmony_ci { 28180d59932Sopenharmony_ci return (sign << 15); 28280d59932Sopenharmony_ci } 28380d59932Sopenharmony_ci 28480d59932Sopenharmony_ci // Check for overflow 28580d59932Sopenharmony_ci if (exp >= CL_HALF_MAX_EXP) 28680d59932Sopenharmony_ci { 28780d59932Sopenharmony_ci return cl_half_handle_overflow(rounding_mode, sign); 28880d59932Sopenharmony_ci } 28980d59932Sopenharmony_ci 29080d59932Sopenharmony_ci // Check for underflow 29180d59932Sopenharmony_ci if (exp < (CL_HALF_MIN_EXP - CL_HALF_MANT_DIG - 1)) 29280d59932Sopenharmony_ci { 29380d59932Sopenharmony_ci return cl_half_handle_underflow(rounding_mode, sign); 29480d59932Sopenharmony_ci } 29580d59932Sopenharmony_ci 29680d59932Sopenharmony_ci // Check for value that will become denormal 29780d59932Sopenharmony_ci if (exp < -14) 29880d59932Sopenharmony_ci { 29980d59932Sopenharmony_ci // Include the implicit 1 from the FP64 mantissa 30080d59932Sopenharmony_ci h_exp = 0; 30180d59932Sopenharmony_ci d_mant |= (uint64_t)1 << (CL_DBL_MANT_DIG - 1); 30280d59932Sopenharmony_ci 30380d59932Sopenharmony_ci // Mantissa shift amount depends on exponent 30480d59932Sopenharmony_ci lsb_pos = (uint32_t)(-exp + (CL_DBL_MANT_DIG - 25)); 30580d59932Sopenharmony_ci } 30680d59932Sopenharmony_ci 30780d59932Sopenharmony_ci // Generate FP16 mantissa by shifting FP64 mantissa 30880d59932Sopenharmony_ci uint16_t h_mant = (uint16_t)(d_mant >> lsb_pos); 30980d59932Sopenharmony_ci 31080d59932Sopenharmony_ci // Check whether we need to round 31180d59932Sopenharmony_ci uint64_t halfway = (uint64_t)1 << (lsb_pos - 1); 31280d59932Sopenharmony_ci uint64_t mask = (halfway << 1) - 1; 31380d59932Sopenharmony_ci switch (rounding_mode) 31480d59932Sopenharmony_ci { 31580d59932Sopenharmony_ci case CL_HALF_RTE: 31680d59932Sopenharmony_ci if ((d_mant & mask) > halfway) 31780d59932Sopenharmony_ci { 31880d59932Sopenharmony_ci // More than halfway -> round up 31980d59932Sopenharmony_ci h_mant += 1; 32080d59932Sopenharmony_ci } 32180d59932Sopenharmony_ci else if ((d_mant & mask) == halfway) 32280d59932Sopenharmony_ci { 32380d59932Sopenharmony_ci // Exactly halfway -> round to nearest even 32480d59932Sopenharmony_ci if (h_mant & 0x1) 32580d59932Sopenharmony_ci h_mant += 1; 32680d59932Sopenharmony_ci } 32780d59932Sopenharmony_ci break; 32880d59932Sopenharmony_ci case CL_HALF_RTZ: 32980d59932Sopenharmony_ci // Mantissa has already been truncated -> do nothing 33080d59932Sopenharmony_ci break; 33180d59932Sopenharmony_ci case CL_HALF_RTP: 33280d59932Sopenharmony_ci if ((d_mant & mask) && !sign) 33380d59932Sopenharmony_ci { 33480d59932Sopenharmony_ci // Round positive numbers up 33580d59932Sopenharmony_ci h_mant += 1; 33680d59932Sopenharmony_ci } 33780d59932Sopenharmony_ci break; 33880d59932Sopenharmony_ci case CL_HALF_RTN: 33980d59932Sopenharmony_ci if ((d_mant & mask) && sign) 34080d59932Sopenharmony_ci { 34180d59932Sopenharmony_ci // Round negative numbers down 34280d59932Sopenharmony_ci h_mant += 1; 34380d59932Sopenharmony_ci } 34480d59932Sopenharmony_ci break; 34580d59932Sopenharmony_ci } 34680d59932Sopenharmony_ci 34780d59932Sopenharmony_ci // Check for mantissa overflow 34880d59932Sopenharmony_ci if (h_mant & 0x400) 34980d59932Sopenharmony_ci { 35080d59932Sopenharmony_ci h_exp += 1; 35180d59932Sopenharmony_ci h_mant = 0; 35280d59932Sopenharmony_ci } 35380d59932Sopenharmony_ci 35480d59932Sopenharmony_ci return (sign << 15) | (h_exp << 10) | h_mant; 35580d59932Sopenharmony_ci} 35680d59932Sopenharmony_ci 35780d59932Sopenharmony_ci 35880d59932Sopenharmony_ci/** 35980d59932Sopenharmony_ci * Convert a cl_half to a cl_float. 36080d59932Sopenharmony_ci */ 36180d59932Sopenharmony_cistatic inline cl_float cl_half_to_float(cl_half h) 36280d59932Sopenharmony_ci{ 36380d59932Sopenharmony_ci // Type-punning to get direct access to underlying bits 36480d59932Sopenharmony_ci union 36580d59932Sopenharmony_ci { 36680d59932Sopenharmony_ci cl_float f; 36780d59932Sopenharmony_ci uint32_t i; 36880d59932Sopenharmony_ci } f32; 36980d59932Sopenharmony_ci 37080d59932Sopenharmony_ci // Extract sign bit 37180d59932Sopenharmony_ci uint16_t sign = h >> 15; 37280d59932Sopenharmony_ci 37380d59932Sopenharmony_ci // Extract FP16 exponent and mantissa 37480d59932Sopenharmony_ci uint16_t h_exp = (h >> (CL_HALF_MANT_DIG - 1)) & 0x1F; 37580d59932Sopenharmony_ci uint16_t h_mant = h & 0x3FF; 37680d59932Sopenharmony_ci 37780d59932Sopenharmony_ci // Remove FP16 exponent bias 37880d59932Sopenharmony_ci int32_t exp = h_exp - CL_HALF_MAX_EXP + 1; 37980d59932Sopenharmony_ci 38080d59932Sopenharmony_ci // Add FP32 exponent bias 38180d59932Sopenharmony_ci uint32_t f_exp = exp + CL_FLT_MAX_EXP - 1; 38280d59932Sopenharmony_ci 38380d59932Sopenharmony_ci // Check for NaN / infinity 38480d59932Sopenharmony_ci if (h_exp == 0x1F) 38580d59932Sopenharmony_ci { 38680d59932Sopenharmony_ci if (h_mant) 38780d59932Sopenharmony_ci { 38880d59932Sopenharmony_ci // NaN -> propagate mantissa and silence it 38980d59932Sopenharmony_ci uint32_t f_mant = h_mant << (CL_FLT_MANT_DIG - CL_HALF_MANT_DIG); 39080d59932Sopenharmony_ci f_mant |= 0x400000; 39180d59932Sopenharmony_ci f32.i = (sign << 31) | 0x7F800000 | f_mant; 39280d59932Sopenharmony_ci return f32.f; 39380d59932Sopenharmony_ci } 39480d59932Sopenharmony_ci else 39580d59932Sopenharmony_ci { 39680d59932Sopenharmony_ci // Infinity -> zero mantissa 39780d59932Sopenharmony_ci f32.i = (sign << 31) | 0x7F800000; 39880d59932Sopenharmony_ci return f32.f; 39980d59932Sopenharmony_ci } 40080d59932Sopenharmony_ci } 40180d59932Sopenharmony_ci 40280d59932Sopenharmony_ci // Check for zero / denormal 40380d59932Sopenharmony_ci if (h_exp == 0) 40480d59932Sopenharmony_ci { 40580d59932Sopenharmony_ci if (h_mant == 0) 40680d59932Sopenharmony_ci { 40780d59932Sopenharmony_ci // Zero -> zero exponent 40880d59932Sopenharmony_ci f_exp = 0; 40980d59932Sopenharmony_ci } 41080d59932Sopenharmony_ci else 41180d59932Sopenharmony_ci { 41280d59932Sopenharmony_ci // Denormal -> normalize it 41380d59932Sopenharmony_ci // - Shift mantissa to make most-significant 1 implicit 41480d59932Sopenharmony_ci // - Adjust exponent accordingly 41580d59932Sopenharmony_ci uint32_t shift = 0; 41680d59932Sopenharmony_ci while ((h_mant & 0x400) == 0) 41780d59932Sopenharmony_ci { 41880d59932Sopenharmony_ci h_mant <<= 1; 41980d59932Sopenharmony_ci shift++; 42080d59932Sopenharmony_ci } 42180d59932Sopenharmony_ci h_mant &= 0x3FF; 42280d59932Sopenharmony_ci f_exp -= shift - 1; 42380d59932Sopenharmony_ci } 42480d59932Sopenharmony_ci } 42580d59932Sopenharmony_ci 42680d59932Sopenharmony_ci f32.i = (sign << 31) | (f_exp << 23) | (h_mant << 13); 42780d59932Sopenharmony_ci return f32.f; 42880d59932Sopenharmony_ci} 42980d59932Sopenharmony_ci 43080d59932Sopenharmony_ci 43180d59932Sopenharmony_ci#undef CL_HALF_EXP_MASK 43280d59932Sopenharmony_ci#undef CL_HALF_MAX_FINITE_MAG 43380d59932Sopenharmony_ci 43480d59932Sopenharmony_ci 43580d59932Sopenharmony_ci#ifdef __cplusplus 43680d59932Sopenharmony_ci} 43780d59932Sopenharmony_ci#endif 43880d59932Sopenharmony_ci 43980d59932Sopenharmony_ci 44080d59932Sopenharmony_ci#endif /* OPENCL_CL_HALF_H */ 441