180d59932Sopenharmony_ci/*******************************************************************************
280d59932Sopenharmony_ci * Copyright (c) 2019-2020 The Khronos Group Inc.
380d59932Sopenharmony_ci *
480d59932Sopenharmony_ci * Licensed under the Apache License, Version 2.0 (the "License");
580d59932Sopenharmony_ci * you may not use this file except in compliance with the License.
680d59932Sopenharmony_ci * You may obtain a copy of the License at
780d59932Sopenharmony_ci *
880d59932Sopenharmony_ci *    http://www.apache.org/licenses/LICENSE-2.0
980d59932Sopenharmony_ci *
1080d59932Sopenharmony_ci * Unless required by applicable law or agreed to in writing, software
1180d59932Sopenharmony_ci * distributed under the License is distributed on an "AS IS" BASIS,
1280d59932Sopenharmony_ci * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1380d59932Sopenharmony_ci * See the License for the specific language governing permissions and
1480d59932Sopenharmony_ci * limitations under the License.
1580d59932Sopenharmony_ci ******************************************************************************/
1680d59932Sopenharmony_ci
1780d59932Sopenharmony_ci/**
1880d59932Sopenharmony_ci * This is a header-only utility library that provides OpenCL host code with
1980d59932Sopenharmony_ci * routines for converting to/from cl_half values.
2080d59932Sopenharmony_ci *
2180d59932Sopenharmony_ci * Example usage:
2280d59932Sopenharmony_ci *
2380d59932Sopenharmony_ci *    #include <CL/cl_half.h>
2480d59932Sopenharmony_ci *    ...
2580d59932Sopenharmony_ci *    cl_half h = cl_half_from_float(0.5f, CL_HALF_RTE);
2680d59932Sopenharmony_ci *    cl_float f = cl_half_to_float(h);
2780d59932Sopenharmony_ci */
2880d59932Sopenharmony_ci
2980d59932Sopenharmony_ci#ifndef OPENCL_CL_HALF_H
3080d59932Sopenharmony_ci#define OPENCL_CL_HALF_H
3180d59932Sopenharmony_ci
3280d59932Sopenharmony_ci#include <CL/cl_platform.h>
3380d59932Sopenharmony_ci
3480d59932Sopenharmony_ci#include <stdint.h>
3580d59932Sopenharmony_ci
3680d59932Sopenharmony_ci#ifdef __cplusplus
3780d59932Sopenharmony_ciextern "C" {
3880d59932Sopenharmony_ci#endif
3980d59932Sopenharmony_ci
4080d59932Sopenharmony_ci
4180d59932Sopenharmony_ci/**
4280d59932Sopenharmony_ci * Rounding mode used when converting to cl_half.
4380d59932Sopenharmony_ci */
4480d59932Sopenharmony_citypedef enum
4580d59932Sopenharmony_ci{
4680d59932Sopenharmony_ci  CL_HALF_RTE, // round to nearest even
4780d59932Sopenharmony_ci  CL_HALF_RTZ, // round towards zero
4880d59932Sopenharmony_ci  CL_HALF_RTP, // round towards positive infinity
4980d59932Sopenharmony_ci  CL_HALF_RTN, // round towards negative infinity
5080d59932Sopenharmony_ci} cl_half_rounding_mode;
5180d59932Sopenharmony_ci
5280d59932Sopenharmony_ci
5380d59932Sopenharmony_ci/* Private utility macros. */
5480d59932Sopenharmony_ci#define CL_HALF_EXP_MASK 0x7C00
5580d59932Sopenharmony_ci#define CL_HALF_MAX_FINITE_MAG 0x7BFF
5680d59932Sopenharmony_ci
5780d59932Sopenharmony_ci
5880d59932Sopenharmony_ci/*
5980d59932Sopenharmony_ci * Utility to deal with values that overflow when converting to half precision.
6080d59932Sopenharmony_ci */
6180d59932Sopenharmony_cistatic inline cl_half cl_half_handle_overflow(cl_half_rounding_mode rounding_mode,
6280d59932Sopenharmony_ci                                              uint16_t sign)
6380d59932Sopenharmony_ci{
6480d59932Sopenharmony_ci  if (rounding_mode == CL_HALF_RTZ)
6580d59932Sopenharmony_ci  {
6680d59932Sopenharmony_ci    // Round overflow towards zero -> largest finite number (preserving sign)
6780d59932Sopenharmony_ci    return (sign << 15) | CL_HALF_MAX_FINITE_MAG;
6880d59932Sopenharmony_ci  }
6980d59932Sopenharmony_ci  else if (rounding_mode == CL_HALF_RTP && sign)
7080d59932Sopenharmony_ci  {
7180d59932Sopenharmony_ci    // Round negative overflow towards positive infinity -> most negative finite number
7280d59932Sopenharmony_ci    return (1 << 15) | CL_HALF_MAX_FINITE_MAG;
7380d59932Sopenharmony_ci  }
7480d59932Sopenharmony_ci  else if (rounding_mode == CL_HALF_RTN && !sign)
7580d59932Sopenharmony_ci  {
7680d59932Sopenharmony_ci    // Round positive overflow towards negative infinity -> largest finite number
7780d59932Sopenharmony_ci    return CL_HALF_MAX_FINITE_MAG;
7880d59932Sopenharmony_ci  }
7980d59932Sopenharmony_ci
8080d59932Sopenharmony_ci  // Overflow to infinity
8180d59932Sopenharmony_ci  return (sign << 15) | CL_HALF_EXP_MASK;
8280d59932Sopenharmony_ci}
8380d59932Sopenharmony_ci
8480d59932Sopenharmony_ci/*
8580d59932Sopenharmony_ci * Utility to deal with values that underflow when converting to half precision.
8680d59932Sopenharmony_ci */
8780d59932Sopenharmony_cistatic inline cl_half cl_half_handle_underflow(cl_half_rounding_mode rounding_mode,
8880d59932Sopenharmony_ci                                               uint16_t sign)
8980d59932Sopenharmony_ci{
9080d59932Sopenharmony_ci  if (rounding_mode == CL_HALF_RTP && !sign)
9180d59932Sopenharmony_ci  {
9280d59932Sopenharmony_ci    // Round underflow towards positive infinity -> smallest positive value
9380d59932Sopenharmony_ci    return (sign << 15) | 1;
9480d59932Sopenharmony_ci  }
9580d59932Sopenharmony_ci  else if (rounding_mode == CL_HALF_RTN && sign)
9680d59932Sopenharmony_ci  {
9780d59932Sopenharmony_ci    // Round underflow towards negative infinity -> largest negative value
9880d59932Sopenharmony_ci    return (sign << 15) | 1;
9980d59932Sopenharmony_ci  }
10080d59932Sopenharmony_ci
10180d59932Sopenharmony_ci  // Flush to zero
10280d59932Sopenharmony_ci  return (sign << 15);
10380d59932Sopenharmony_ci}
10480d59932Sopenharmony_ci
10580d59932Sopenharmony_ci
10680d59932Sopenharmony_ci/**
10780d59932Sopenharmony_ci * Convert a cl_float to a cl_half.
10880d59932Sopenharmony_ci */
10980d59932Sopenharmony_cistatic inline cl_half cl_half_from_float(cl_float f, cl_half_rounding_mode rounding_mode)
11080d59932Sopenharmony_ci{
11180d59932Sopenharmony_ci  // Type-punning to get direct access to underlying bits
11280d59932Sopenharmony_ci  union
11380d59932Sopenharmony_ci  {
11480d59932Sopenharmony_ci    cl_float f;
11580d59932Sopenharmony_ci    uint32_t i;
11680d59932Sopenharmony_ci  } f32;
11780d59932Sopenharmony_ci  f32.f = f;
11880d59932Sopenharmony_ci
11980d59932Sopenharmony_ci  // Extract sign bit
12080d59932Sopenharmony_ci  uint16_t sign = f32.i >> 31;
12180d59932Sopenharmony_ci
12280d59932Sopenharmony_ci  // Extract FP32 exponent and mantissa
12380d59932Sopenharmony_ci  uint32_t f_exp = (f32.i >> (CL_FLT_MANT_DIG - 1)) & 0xFF;
12480d59932Sopenharmony_ci  uint32_t f_mant = f32.i & ((1 << (CL_FLT_MANT_DIG - 1)) - 1);
12580d59932Sopenharmony_ci
12680d59932Sopenharmony_ci  // Remove FP32 exponent bias
12780d59932Sopenharmony_ci  int32_t exp = f_exp - CL_FLT_MAX_EXP + 1;
12880d59932Sopenharmony_ci
12980d59932Sopenharmony_ci  // Add FP16 exponent bias
13080d59932Sopenharmony_ci  uint16_t h_exp = (uint16_t)(exp + CL_HALF_MAX_EXP - 1);
13180d59932Sopenharmony_ci
13280d59932Sopenharmony_ci  // Position of the bit that will become the FP16 mantissa LSB
13380d59932Sopenharmony_ci  uint32_t lsb_pos = CL_FLT_MANT_DIG - CL_HALF_MANT_DIG;
13480d59932Sopenharmony_ci
13580d59932Sopenharmony_ci  // Check for NaN / infinity
13680d59932Sopenharmony_ci  if (f_exp == 0xFF)
13780d59932Sopenharmony_ci  {
13880d59932Sopenharmony_ci    if (f_mant)
13980d59932Sopenharmony_ci    {
14080d59932Sopenharmony_ci      // NaN -> propagate mantissa and silence it
14180d59932Sopenharmony_ci      uint16_t h_mant = (uint16_t)(f_mant >> lsb_pos);
14280d59932Sopenharmony_ci      h_mant |= 0x200;
14380d59932Sopenharmony_ci      return (sign << 15) | CL_HALF_EXP_MASK | h_mant;
14480d59932Sopenharmony_ci    }
14580d59932Sopenharmony_ci    else
14680d59932Sopenharmony_ci    {
14780d59932Sopenharmony_ci      // Infinity -> zero mantissa
14880d59932Sopenharmony_ci      return (sign << 15) | CL_HALF_EXP_MASK;
14980d59932Sopenharmony_ci    }
15080d59932Sopenharmony_ci  }
15180d59932Sopenharmony_ci
15280d59932Sopenharmony_ci  // Check for zero
15380d59932Sopenharmony_ci  if (!f_exp && !f_mant)
15480d59932Sopenharmony_ci  {
15580d59932Sopenharmony_ci    return (sign << 15);
15680d59932Sopenharmony_ci  }
15780d59932Sopenharmony_ci
15880d59932Sopenharmony_ci  // Check for overflow
15980d59932Sopenharmony_ci  if (exp >= CL_HALF_MAX_EXP)
16080d59932Sopenharmony_ci  {
16180d59932Sopenharmony_ci    return cl_half_handle_overflow(rounding_mode, sign);
16280d59932Sopenharmony_ci  }
16380d59932Sopenharmony_ci
16480d59932Sopenharmony_ci  // Check for underflow
16580d59932Sopenharmony_ci  if (exp < (CL_HALF_MIN_EXP - CL_HALF_MANT_DIG - 1))
16680d59932Sopenharmony_ci  {
16780d59932Sopenharmony_ci    return cl_half_handle_underflow(rounding_mode, sign);
16880d59932Sopenharmony_ci  }
16980d59932Sopenharmony_ci
17080d59932Sopenharmony_ci  // Check for value that will become denormal
17180d59932Sopenharmony_ci  if (exp < -14)
17280d59932Sopenharmony_ci  {
17380d59932Sopenharmony_ci    // Denormal -> include the implicit 1 from the FP32 mantissa
17480d59932Sopenharmony_ci    h_exp = 0;
17580d59932Sopenharmony_ci    f_mant |= 1 << (CL_FLT_MANT_DIG - 1);
17680d59932Sopenharmony_ci
17780d59932Sopenharmony_ci    // Mantissa shift amount depends on exponent
17880d59932Sopenharmony_ci    lsb_pos = -exp + (CL_FLT_MANT_DIG - 25);
17980d59932Sopenharmony_ci  }
18080d59932Sopenharmony_ci
18180d59932Sopenharmony_ci  // Generate FP16 mantissa by shifting FP32 mantissa
18280d59932Sopenharmony_ci  uint16_t h_mant = (uint16_t)(f_mant >> lsb_pos);
18380d59932Sopenharmony_ci
18480d59932Sopenharmony_ci  // Check whether we need to round
18580d59932Sopenharmony_ci  uint32_t halfway = 1 << (lsb_pos - 1);
18680d59932Sopenharmony_ci  uint32_t mask = (halfway << 1) - 1;
18780d59932Sopenharmony_ci  switch (rounding_mode)
18880d59932Sopenharmony_ci  {
18980d59932Sopenharmony_ci    case CL_HALF_RTE:
19080d59932Sopenharmony_ci      if ((f_mant & mask) > halfway)
19180d59932Sopenharmony_ci      {
19280d59932Sopenharmony_ci        // More than halfway -> round up
19380d59932Sopenharmony_ci        h_mant += 1;
19480d59932Sopenharmony_ci      }
19580d59932Sopenharmony_ci      else if ((f_mant & mask) == halfway)
19680d59932Sopenharmony_ci      {
19780d59932Sopenharmony_ci        // Exactly halfway -> round to nearest even
19880d59932Sopenharmony_ci        if (h_mant & 0x1)
19980d59932Sopenharmony_ci          h_mant += 1;
20080d59932Sopenharmony_ci      }
20180d59932Sopenharmony_ci      break;
20280d59932Sopenharmony_ci    case CL_HALF_RTZ:
20380d59932Sopenharmony_ci      // Mantissa has already been truncated -> do nothing
20480d59932Sopenharmony_ci      break;
20580d59932Sopenharmony_ci    case CL_HALF_RTP:
20680d59932Sopenharmony_ci      if ((f_mant & mask) && !sign)
20780d59932Sopenharmony_ci      {
20880d59932Sopenharmony_ci        // Round positive numbers up
20980d59932Sopenharmony_ci        h_mant += 1;
21080d59932Sopenharmony_ci      }
21180d59932Sopenharmony_ci      break;
21280d59932Sopenharmony_ci    case CL_HALF_RTN:
21380d59932Sopenharmony_ci      if ((f_mant & mask) && sign)
21480d59932Sopenharmony_ci      {
21580d59932Sopenharmony_ci        // Round negative numbers down
21680d59932Sopenharmony_ci        h_mant += 1;
21780d59932Sopenharmony_ci      }
21880d59932Sopenharmony_ci      break;
21980d59932Sopenharmony_ci  }
22080d59932Sopenharmony_ci
22180d59932Sopenharmony_ci  // Check for mantissa overflow
22280d59932Sopenharmony_ci  if (h_mant & 0x400)
22380d59932Sopenharmony_ci  {
22480d59932Sopenharmony_ci    h_exp += 1;
22580d59932Sopenharmony_ci    h_mant = 0;
22680d59932Sopenharmony_ci  }
22780d59932Sopenharmony_ci
22880d59932Sopenharmony_ci  return (sign << 15) | (h_exp << 10) | h_mant;
22980d59932Sopenharmony_ci}
23080d59932Sopenharmony_ci
23180d59932Sopenharmony_ci
23280d59932Sopenharmony_ci/**
23380d59932Sopenharmony_ci * Convert a cl_double to a cl_half.
23480d59932Sopenharmony_ci */
23580d59932Sopenharmony_cistatic inline cl_half cl_half_from_double(cl_double d, cl_half_rounding_mode rounding_mode)
23680d59932Sopenharmony_ci{
23780d59932Sopenharmony_ci  // Type-punning to get direct access to underlying bits
23880d59932Sopenharmony_ci  union
23980d59932Sopenharmony_ci  {
24080d59932Sopenharmony_ci    cl_double d;
24180d59932Sopenharmony_ci    uint64_t i;
24280d59932Sopenharmony_ci  } f64;
24380d59932Sopenharmony_ci  f64.d = d;
24480d59932Sopenharmony_ci
24580d59932Sopenharmony_ci  // Extract sign bit
24680d59932Sopenharmony_ci  uint16_t sign = f64.i >> 63;
24780d59932Sopenharmony_ci
24880d59932Sopenharmony_ci  // Extract FP64 exponent and mantissa
24980d59932Sopenharmony_ci  uint64_t d_exp = (f64.i >> (CL_DBL_MANT_DIG - 1)) & 0x7FF;
25080d59932Sopenharmony_ci  uint64_t d_mant = f64.i & (((uint64_t)1 << (CL_DBL_MANT_DIG - 1)) - 1);
25180d59932Sopenharmony_ci
25280d59932Sopenharmony_ci  // Remove FP64 exponent bias
25380d59932Sopenharmony_ci  int64_t exp = d_exp - CL_DBL_MAX_EXP + 1;
25480d59932Sopenharmony_ci
25580d59932Sopenharmony_ci  // Add FP16 exponent bias
25680d59932Sopenharmony_ci  uint16_t h_exp = (uint16_t)(exp + CL_HALF_MAX_EXP - 1);
25780d59932Sopenharmony_ci
25880d59932Sopenharmony_ci  // Position of the bit that will become the FP16 mantissa LSB
25980d59932Sopenharmony_ci  uint32_t lsb_pos = CL_DBL_MANT_DIG - CL_HALF_MANT_DIG;
26080d59932Sopenharmony_ci
26180d59932Sopenharmony_ci  // Check for NaN / infinity
26280d59932Sopenharmony_ci  if (d_exp == 0x7FF)
26380d59932Sopenharmony_ci  {
26480d59932Sopenharmony_ci    if (d_mant)
26580d59932Sopenharmony_ci    {
26680d59932Sopenharmony_ci      // NaN -> propagate mantissa and silence it
26780d59932Sopenharmony_ci      uint16_t h_mant = (uint16_t)(d_mant >> lsb_pos);
26880d59932Sopenharmony_ci      h_mant |= 0x200;
26980d59932Sopenharmony_ci      return (sign << 15) | CL_HALF_EXP_MASK | h_mant;
27080d59932Sopenharmony_ci    }
27180d59932Sopenharmony_ci    else
27280d59932Sopenharmony_ci    {
27380d59932Sopenharmony_ci      // Infinity -> zero mantissa
27480d59932Sopenharmony_ci      return (sign << 15) | CL_HALF_EXP_MASK;
27580d59932Sopenharmony_ci    }
27680d59932Sopenharmony_ci  }
27780d59932Sopenharmony_ci
27880d59932Sopenharmony_ci  // Check for zero
27980d59932Sopenharmony_ci  if (!d_exp && !d_mant)
28080d59932Sopenharmony_ci  {
28180d59932Sopenharmony_ci    return (sign << 15);
28280d59932Sopenharmony_ci  }
28380d59932Sopenharmony_ci
28480d59932Sopenharmony_ci  // Check for overflow
28580d59932Sopenharmony_ci  if (exp >= CL_HALF_MAX_EXP)
28680d59932Sopenharmony_ci  {
28780d59932Sopenharmony_ci    return cl_half_handle_overflow(rounding_mode, sign);
28880d59932Sopenharmony_ci  }
28980d59932Sopenharmony_ci
29080d59932Sopenharmony_ci  // Check for underflow
29180d59932Sopenharmony_ci  if (exp < (CL_HALF_MIN_EXP - CL_HALF_MANT_DIG - 1))
29280d59932Sopenharmony_ci  {
29380d59932Sopenharmony_ci    return cl_half_handle_underflow(rounding_mode, sign);
29480d59932Sopenharmony_ci  }
29580d59932Sopenharmony_ci
29680d59932Sopenharmony_ci  // Check for value that will become denormal
29780d59932Sopenharmony_ci  if (exp < -14)
29880d59932Sopenharmony_ci  {
29980d59932Sopenharmony_ci    // Include the implicit 1 from the FP64 mantissa
30080d59932Sopenharmony_ci    h_exp = 0;
30180d59932Sopenharmony_ci    d_mant |= (uint64_t)1 << (CL_DBL_MANT_DIG - 1);
30280d59932Sopenharmony_ci
30380d59932Sopenharmony_ci    // Mantissa shift amount depends on exponent
30480d59932Sopenharmony_ci    lsb_pos = (uint32_t)(-exp + (CL_DBL_MANT_DIG - 25));
30580d59932Sopenharmony_ci  }
30680d59932Sopenharmony_ci
30780d59932Sopenharmony_ci  // Generate FP16 mantissa by shifting FP64 mantissa
30880d59932Sopenharmony_ci  uint16_t h_mant = (uint16_t)(d_mant >> lsb_pos);
30980d59932Sopenharmony_ci
31080d59932Sopenharmony_ci  // Check whether we need to round
31180d59932Sopenharmony_ci  uint64_t halfway = (uint64_t)1 << (lsb_pos - 1);
31280d59932Sopenharmony_ci  uint64_t mask = (halfway << 1) - 1;
31380d59932Sopenharmony_ci  switch (rounding_mode)
31480d59932Sopenharmony_ci  {
31580d59932Sopenharmony_ci    case CL_HALF_RTE:
31680d59932Sopenharmony_ci      if ((d_mant & mask) > halfway)
31780d59932Sopenharmony_ci      {
31880d59932Sopenharmony_ci        // More than halfway -> round up
31980d59932Sopenharmony_ci        h_mant += 1;
32080d59932Sopenharmony_ci      }
32180d59932Sopenharmony_ci      else if ((d_mant & mask) == halfway)
32280d59932Sopenharmony_ci      {
32380d59932Sopenharmony_ci        // Exactly halfway -> round to nearest even
32480d59932Sopenharmony_ci        if (h_mant & 0x1)
32580d59932Sopenharmony_ci          h_mant += 1;
32680d59932Sopenharmony_ci      }
32780d59932Sopenharmony_ci      break;
32880d59932Sopenharmony_ci    case CL_HALF_RTZ:
32980d59932Sopenharmony_ci      // Mantissa has already been truncated -> do nothing
33080d59932Sopenharmony_ci      break;
33180d59932Sopenharmony_ci    case CL_HALF_RTP:
33280d59932Sopenharmony_ci      if ((d_mant & mask) && !sign)
33380d59932Sopenharmony_ci      {
33480d59932Sopenharmony_ci        // Round positive numbers up
33580d59932Sopenharmony_ci        h_mant += 1;
33680d59932Sopenharmony_ci      }
33780d59932Sopenharmony_ci      break;
33880d59932Sopenharmony_ci    case CL_HALF_RTN:
33980d59932Sopenharmony_ci      if ((d_mant & mask) && sign)
34080d59932Sopenharmony_ci      {
34180d59932Sopenharmony_ci        // Round negative numbers down
34280d59932Sopenharmony_ci        h_mant += 1;
34380d59932Sopenharmony_ci      }
34480d59932Sopenharmony_ci      break;
34580d59932Sopenharmony_ci  }
34680d59932Sopenharmony_ci
34780d59932Sopenharmony_ci  // Check for mantissa overflow
34880d59932Sopenharmony_ci  if (h_mant & 0x400)
34980d59932Sopenharmony_ci  {
35080d59932Sopenharmony_ci    h_exp += 1;
35180d59932Sopenharmony_ci    h_mant = 0;
35280d59932Sopenharmony_ci  }
35380d59932Sopenharmony_ci
35480d59932Sopenharmony_ci  return (sign << 15) | (h_exp << 10) | h_mant;
35580d59932Sopenharmony_ci}
35680d59932Sopenharmony_ci
35780d59932Sopenharmony_ci
35880d59932Sopenharmony_ci/**
35980d59932Sopenharmony_ci * Convert a cl_half to a cl_float.
36080d59932Sopenharmony_ci */
36180d59932Sopenharmony_cistatic inline cl_float cl_half_to_float(cl_half h)
36280d59932Sopenharmony_ci{
36380d59932Sopenharmony_ci  // Type-punning to get direct access to underlying bits
36480d59932Sopenharmony_ci  union
36580d59932Sopenharmony_ci  {
36680d59932Sopenharmony_ci    cl_float f;
36780d59932Sopenharmony_ci    uint32_t i;
36880d59932Sopenharmony_ci  } f32;
36980d59932Sopenharmony_ci
37080d59932Sopenharmony_ci  // Extract sign bit
37180d59932Sopenharmony_ci  uint16_t sign = h >> 15;
37280d59932Sopenharmony_ci
37380d59932Sopenharmony_ci  // Extract FP16 exponent and mantissa
37480d59932Sopenharmony_ci  uint16_t h_exp = (h >> (CL_HALF_MANT_DIG - 1)) & 0x1F;
37580d59932Sopenharmony_ci  uint16_t h_mant = h & 0x3FF;
37680d59932Sopenharmony_ci
37780d59932Sopenharmony_ci  // Remove FP16 exponent bias
37880d59932Sopenharmony_ci  int32_t exp = h_exp - CL_HALF_MAX_EXP + 1;
37980d59932Sopenharmony_ci
38080d59932Sopenharmony_ci  // Add FP32 exponent bias
38180d59932Sopenharmony_ci  uint32_t f_exp = exp + CL_FLT_MAX_EXP - 1;
38280d59932Sopenharmony_ci
38380d59932Sopenharmony_ci  // Check for NaN / infinity
38480d59932Sopenharmony_ci  if (h_exp == 0x1F)
38580d59932Sopenharmony_ci  {
38680d59932Sopenharmony_ci    if (h_mant)
38780d59932Sopenharmony_ci    {
38880d59932Sopenharmony_ci      // NaN -> propagate mantissa and silence it
38980d59932Sopenharmony_ci      uint32_t f_mant = h_mant << (CL_FLT_MANT_DIG - CL_HALF_MANT_DIG);
39080d59932Sopenharmony_ci      f_mant |= 0x400000;
39180d59932Sopenharmony_ci      f32.i = (sign << 31) | 0x7F800000 | f_mant;
39280d59932Sopenharmony_ci      return f32.f;
39380d59932Sopenharmony_ci    }
39480d59932Sopenharmony_ci    else
39580d59932Sopenharmony_ci    {
39680d59932Sopenharmony_ci      // Infinity -> zero mantissa
39780d59932Sopenharmony_ci      f32.i = (sign << 31) | 0x7F800000;
39880d59932Sopenharmony_ci      return f32.f;
39980d59932Sopenharmony_ci    }
40080d59932Sopenharmony_ci  }
40180d59932Sopenharmony_ci
40280d59932Sopenharmony_ci  // Check for zero / denormal
40380d59932Sopenharmony_ci  if (h_exp == 0)
40480d59932Sopenharmony_ci  {
40580d59932Sopenharmony_ci    if (h_mant == 0)
40680d59932Sopenharmony_ci    {
40780d59932Sopenharmony_ci      // Zero -> zero exponent
40880d59932Sopenharmony_ci      f_exp = 0;
40980d59932Sopenharmony_ci    }
41080d59932Sopenharmony_ci    else
41180d59932Sopenharmony_ci    {
41280d59932Sopenharmony_ci      // Denormal -> normalize it
41380d59932Sopenharmony_ci      // - Shift mantissa to make most-significant 1 implicit
41480d59932Sopenharmony_ci      // - Adjust exponent accordingly
41580d59932Sopenharmony_ci      uint32_t shift = 0;
41680d59932Sopenharmony_ci      while ((h_mant & 0x400) == 0)
41780d59932Sopenharmony_ci      {
41880d59932Sopenharmony_ci        h_mant <<= 1;
41980d59932Sopenharmony_ci        shift++;
42080d59932Sopenharmony_ci      }
42180d59932Sopenharmony_ci      h_mant &= 0x3FF;
42280d59932Sopenharmony_ci      f_exp -= shift - 1;
42380d59932Sopenharmony_ci    }
42480d59932Sopenharmony_ci  }
42580d59932Sopenharmony_ci
42680d59932Sopenharmony_ci  f32.i = (sign << 31) | (f_exp << 23) | (h_mant << 13);
42780d59932Sopenharmony_ci  return f32.f;
42880d59932Sopenharmony_ci}
42980d59932Sopenharmony_ci
43080d59932Sopenharmony_ci
43180d59932Sopenharmony_ci#undef CL_HALF_EXP_MASK
43280d59932Sopenharmony_ci#undef CL_HALF_MAX_FINITE_MAG
43380d59932Sopenharmony_ci
43480d59932Sopenharmony_ci
43580d59932Sopenharmony_ci#ifdef __cplusplus
43680d59932Sopenharmony_ci}
43780d59932Sopenharmony_ci#endif
43880d59932Sopenharmony_ci
43980d59932Sopenharmony_ci
44080d59932Sopenharmony_ci#endif  /* OPENCL_CL_HALF_H */
441