compiler/nir/nir_constant_expressions.py

bf215546Sopenharmony_ciimport re
bf215546Sopenharmony_cifrom nir_opcodes import opcodes
bf215546Sopenharmony_cifrom nir_opcodes import type_has_size, type_size, type_sizes, type_base_type
bf215546Sopenharmony_ci
bf215546Sopenharmony_cidef type_add_size(type_, size):
bf215546Sopenharmony_ci    if type_has_size(type_):
bf215546Sopenharmony_ci        return type_
bf215546Sopenharmony_ci    return type_ + str(size)
bf215546Sopenharmony_ci
bf215546Sopenharmony_cidef op_bit_sizes(op):
bf215546Sopenharmony_ci    sizes = None
bf215546Sopenharmony_ci    if not type_has_size(op.output_type):
bf215546Sopenharmony_ci        sizes = set(type_sizes(op.output_type))
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    for input_type in op.input_types:
bf215546Sopenharmony_ci        if not type_has_size(input_type):
bf215546Sopenharmony_ci            if sizes is None:
bf215546Sopenharmony_ci                sizes = set(type_sizes(input_type))
bf215546Sopenharmony_ci            else:
bf215546Sopenharmony_ci                sizes = sizes.intersection(set(type_sizes(input_type)))
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci    return sorted(list(sizes)) if sizes is not None else None
bf215546Sopenharmony_ci
bf215546Sopenharmony_cidef get_const_field(type_):
bf215546Sopenharmony_ci    if type_size(type_) == 1:
bf215546Sopenharmony_ci        return 'b'
bf215546Sopenharmony_ci    elif type_base_type(type_) == 'bool':
bf215546Sopenharmony_ci        return 'i' + str(type_size(type_))
bf215546Sopenharmony_ci    elif type_ == "float16":
bf215546Sopenharmony_ci        return "u16"
bf215546Sopenharmony_ci    else:
bf215546Sopenharmony_ci        return type_base_type(type_)[0] + str(type_size(type_))
bf215546Sopenharmony_ci
bf215546Sopenharmony_citemplate = """\
bf215546Sopenharmony_ci/*
bf215546Sopenharmony_ci * Copyright (C) 2014 Intel Corporation
bf215546Sopenharmony_ci *
bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
bf215546Sopenharmony_ci *
bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
bf215546Sopenharmony_ci * Software.
bf215546Sopenharmony_ci *
bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
bf215546Sopenharmony_ci * IN THE SOFTWARE.
bf215546Sopenharmony_ci *
bf215546Sopenharmony_ci * Authors:
bf215546Sopenharmony_ci *    Jason Ekstrand (jason@jlekstrand.net)
bf215546Sopenharmony_ci */
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci#include <math.h>
bf215546Sopenharmony_ci#include "util/rounding.h" /* for _mesa_roundeven */
bf215546Sopenharmony_ci#include "util/half_float.h"
bf215546Sopenharmony_ci#include "util/double.h"
bf215546Sopenharmony_ci#include "util/softfloat.h"
bf215546Sopenharmony_ci#include "util/bigmath.h"
bf215546Sopenharmony_ci#include "util/format/format_utils.h"
bf215546Sopenharmony_ci#include "nir_constant_expressions.h"
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci/**
bf215546Sopenharmony_ci * \brief Checks if the provided value is a denorm and flushes it to zero.
bf215546Sopenharmony_ci */
bf215546Sopenharmony_cistatic void
bf215546Sopenharmony_ciconstant_denorm_flush_to_zero(nir_const_value *value, unsigned bit_size)
bf215546Sopenharmony_ci{
bf215546Sopenharmony_ci    switch(bit_size) {
bf215546Sopenharmony_ci    case 64:
bf215546Sopenharmony_ci        if (0 == (value->u64 & 0x7ff0000000000000))
bf215546Sopenharmony_ci            value->u64 &= 0x8000000000000000;
bf215546Sopenharmony_ci        break;
bf215546Sopenharmony_ci    case 32:
bf215546Sopenharmony_ci        if (0 == (value->u32 & 0x7f800000))
bf215546Sopenharmony_ci            value->u32 &= 0x80000000;
bf215546Sopenharmony_ci        break;
bf215546Sopenharmony_ci    case 16:
bf215546Sopenharmony_ci        if (0 == (value->u16 & 0x7c00))
bf215546Sopenharmony_ci            value->u16 &= 0x8000;
bf215546Sopenharmony_ci    }
bf215546Sopenharmony_ci}
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci/**
bf215546Sopenharmony_ci * Evaluate one component of packSnorm4x8.
bf215546Sopenharmony_ci */
bf215546Sopenharmony_cistatic uint8_t
bf215546Sopenharmony_cipack_snorm_1x8(float x)
bf215546Sopenharmony_ci{
bf215546Sopenharmony_ci    /* From section 8.4 of the GLSL 4.30 spec:
bf215546Sopenharmony_ci     *
bf215546Sopenharmony_ci     *    packSnorm4x8
bf215546Sopenharmony_ci     *    ------------
bf215546Sopenharmony_ci     *    The conversion for component c of v to fixed point is done as
bf215546Sopenharmony_ci     *    follows:
bf215546Sopenharmony_ci     *
bf215546Sopenharmony_ci     *      packSnorm4x8: round(clamp(c, -1, +1) * 127.0)
bf215546Sopenharmony_ci     *
bf215546Sopenharmony_ci     * We must first cast the float to an int, because casting a negative
bf215546Sopenharmony_ci     * float to a uint is undefined.
bf215546Sopenharmony_ci     */
bf215546Sopenharmony_ci   return (uint8_t) (int)
bf215546Sopenharmony_ci          _mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 127.0f);
bf215546Sopenharmony_ci}
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci/**
bf215546Sopenharmony_ci * Evaluate one component of packSnorm2x16.
bf215546Sopenharmony_ci */
bf215546Sopenharmony_cistatic uint16_t
bf215546Sopenharmony_cipack_snorm_1x16(float x)
bf215546Sopenharmony_ci{
bf215546Sopenharmony_ci    /* From section 8.4 of the GLSL ES 3.00 spec:
bf215546Sopenharmony_ci     *
bf215546Sopenharmony_ci     *    packSnorm2x16
bf215546Sopenharmony_ci     *    -------------
bf215546Sopenharmony_ci     *    The conversion for component c of v to fixed point is done as
bf215546Sopenharmony_ci     *    follows:
bf215546Sopenharmony_ci     *
bf215546Sopenharmony_ci     *      packSnorm2x16: round(clamp(c, -1, +1) * 32767.0)
bf215546Sopenharmony_ci     *
bf215546Sopenharmony_ci     * We must first cast the float to an int, because casting a negative
bf215546Sopenharmony_ci     * float to a uint is undefined.
bf215546Sopenharmony_ci     */
bf215546Sopenharmony_ci   return (uint16_t) (int)
bf215546Sopenharmony_ci          _mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 32767.0f);
bf215546Sopenharmony_ci}
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci/**
bf215546Sopenharmony_ci * Evaluate one component of unpackSnorm4x8.
bf215546Sopenharmony_ci */
bf215546Sopenharmony_cistatic float
bf215546Sopenharmony_ciunpack_snorm_1x8(uint8_t u)
bf215546Sopenharmony_ci{
bf215546Sopenharmony_ci    /* From section 8.4 of the GLSL 4.30 spec:
bf215546Sopenharmony_ci     *
bf215546Sopenharmony_ci     *    unpackSnorm4x8
bf215546Sopenharmony_ci     *    --------------
bf215546Sopenharmony_ci     *    The conversion for unpacked fixed-point value f to floating point is
bf215546Sopenharmony_ci     *    done as follows:
bf215546Sopenharmony_ci     *
bf215546Sopenharmony_ci     *       unpackSnorm4x8: clamp(f / 127.0, -1, +1)
bf215546Sopenharmony_ci     */
bf215546Sopenharmony_ci   return CLAMP((int8_t) u / 127.0f, -1.0f, +1.0f);
bf215546Sopenharmony_ci}
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci/**
bf215546Sopenharmony_ci * Evaluate one component of unpackSnorm2x16.
bf215546Sopenharmony_ci */
bf215546Sopenharmony_cistatic float
bf215546Sopenharmony_ciunpack_snorm_1x16(uint16_t u)
bf215546Sopenharmony_ci{
bf215546Sopenharmony_ci    /* From section 8.4 of the GLSL ES 3.00 spec:
bf215546Sopenharmony_ci     *
bf215546Sopenharmony_ci     *    unpackSnorm2x16
bf215546Sopenharmony_ci     *    ---------------
bf215546Sopenharmony_ci     *    The conversion for unpacked fixed-point value f to floating point is
bf215546Sopenharmony_ci     *    done as follows:
bf215546Sopenharmony_ci     *
bf215546Sopenharmony_ci     *       unpackSnorm2x16: clamp(f / 32767.0, -1, +1)
bf215546Sopenharmony_ci     */
bf215546Sopenharmony_ci   return CLAMP((int16_t) u / 32767.0f, -1.0f, +1.0f);
bf215546Sopenharmony_ci}
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci/**
bf215546Sopenharmony_ci * Evaluate one component packUnorm4x8.
bf215546Sopenharmony_ci */
bf215546Sopenharmony_cistatic uint8_t
bf215546Sopenharmony_cipack_unorm_1x8(float x)
bf215546Sopenharmony_ci{
bf215546Sopenharmony_ci    /* From section 8.4 of the GLSL 4.30 spec:
bf215546Sopenharmony_ci     *
bf215546Sopenharmony_ci     *    packUnorm4x8
bf215546Sopenharmony_ci     *    ------------
bf215546Sopenharmony_ci     *    The conversion for component c of v to fixed point is done as
bf215546Sopenharmony_ci     *    follows:
bf215546Sopenharmony_ci     *
bf215546Sopenharmony_ci     *       packUnorm4x8: round(clamp(c, 0, +1) * 255.0)
bf215546Sopenharmony_ci     */
bf215546Sopenharmony_ci   return (uint8_t) (int)
bf215546Sopenharmony_ci          _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 255.0f);
bf215546Sopenharmony_ci}
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci/**
bf215546Sopenharmony_ci * Evaluate one component packUnorm2x16.
bf215546Sopenharmony_ci */
bf215546Sopenharmony_cistatic uint16_t
bf215546Sopenharmony_cipack_unorm_1x16(float x)
bf215546Sopenharmony_ci{
bf215546Sopenharmony_ci    /* From section 8.4 of the GLSL ES 3.00 spec:
bf215546Sopenharmony_ci     *
bf215546Sopenharmony_ci     *    packUnorm2x16
bf215546Sopenharmony_ci     *    -------------
bf215546Sopenharmony_ci     *    The conversion for component c of v to fixed point is done as
bf215546Sopenharmony_ci     *    follows:
bf215546Sopenharmony_ci     *
bf215546Sopenharmony_ci     *       packUnorm2x16: round(clamp(c, 0, +1) * 65535.0)
bf215546Sopenharmony_ci     */
bf215546Sopenharmony_ci   return (uint16_t) (int)
bf215546Sopenharmony_ci          _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 65535.0f);
bf215546Sopenharmony_ci}
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci/**
bf215546Sopenharmony_ci * Evaluate one component of unpackUnorm4x8.
bf215546Sopenharmony_ci */
bf215546Sopenharmony_cistatic float
bf215546Sopenharmony_ciunpack_unorm_1x8(uint8_t u)
bf215546Sopenharmony_ci{
bf215546Sopenharmony_ci    /* From section 8.4 of the GLSL 4.30 spec:
bf215546Sopenharmony_ci     *
bf215546Sopenharmony_ci     *    unpackUnorm4x8
bf215546Sopenharmony_ci     *    --------------
bf215546Sopenharmony_ci     *    The conversion for unpacked fixed-point value f to floating point is
bf215546Sopenharmony_ci     *    done as follows:
bf215546Sopenharmony_ci     *
bf215546Sopenharmony_ci     *       unpackUnorm4x8: f / 255.0
bf215546Sopenharmony_ci     */
bf215546Sopenharmony_ci   return (float) u / 255.0f;
bf215546Sopenharmony_ci}
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci/**
bf215546Sopenharmony_ci * Evaluate one component of unpackUnorm2x16.
bf215546Sopenharmony_ci */
bf215546Sopenharmony_cistatic float
bf215546Sopenharmony_ciunpack_unorm_1x16(uint16_t u)
bf215546Sopenharmony_ci{
bf215546Sopenharmony_ci    /* From section 8.4 of the GLSL ES 3.00 spec:
bf215546Sopenharmony_ci     *
bf215546Sopenharmony_ci     *    unpackUnorm2x16
bf215546Sopenharmony_ci     *    ---------------
bf215546Sopenharmony_ci     *    The conversion for unpacked fixed-point value f to floating point is
bf215546Sopenharmony_ci     *    done as follows:
bf215546Sopenharmony_ci     *
bf215546Sopenharmony_ci     *       unpackUnorm2x16: f / 65535.0
bf215546Sopenharmony_ci     */
bf215546Sopenharmony_ci   return (float) u / 65535.0f;
bf215546Sopenharmony_ci}
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci/**
bf215546Sopenharmony_ci * Evaluate one component of packHalf2x16.
bf215546Sopenharmony_ci */
bf215546Sopenharmony_cistatic uint16_t
bf215546Sopenharmony_cipack_half_1x16(float x)
bf215546Sopenharmony_ci{
bf215546Sopenharmony_ci   return _mesa_float_to_half(x);
bf215546Sopenharmony_ci}
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci/**
bf215546Sopenharmony_ci * Evaluate one component of unpackHalf2x16.
bf215546Sopenharmony_ci */
bf215546Sopenharmony_cistatic float
bf215546Sopenharmony_ciunpack_half_1x16_flush_to_zero(uint16_t u)
bf215546Sopenharmony_ci{
bf215546Sopenharmony_ci   if (0 == (u & 0x7c00))
bf215546Sopenharmony_ci      u &= 0x8000;
bf215546Sopenharmony_ci   return _mesa_half_to_float(u);
bf215546Sopenharmony_ci}
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci/**
bf215546Sopenharmony_ci * Evaluate one component of unpackHalf2x16.
bf215546Sopenharmony_ci */
bf215546Sopenharmony_cistatic float
bf215546Sopenharmony_ciunpack_half_1x16(uint16_t u)
bf215546Sopenharmony_ci{
bf215546Sopenharmony_ci   return _mesa_half_to_float(u);
bf215546Sopenharmony_ci}
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci/* Some typed vector structures to make things like src0.y work */
bf215546Sopenharmony_citypedef int8_t int1_t;
bf215546Sopenharmony_citypedef uint8_t uint1_t;
bf215546Sopenharmony_citypedef float float16_t;
bf215546Sopenharmony_citypedef float float32_t;
bf215546Sopenharmony_citypedef double float64_t;
bf215546Sopenharmony_citypedef bool bool1_t;
bf215546Sopenharmony_citypedef bool bool8_t;
bf215546Sopenharmony_citypedef bool bool16_t;
bf215546Sopenharmony_citypedef bool bool32_t;
bf215546Sopenharmony_citypedef bool bool64_t;
bf215546Sopenharmony_ci% for type in ["float", "int", "uint", "bool"]:
bf215546Sopenharmony_ci% for width in type_sizes(type):
bf215546Sopenharmony_cistruct ${type}${width}_vec {
bf215546Sopenharmony_ci   ${type}${width}_t x;
bf215546Sopenharmony_ci   ${type}${width}_t y;
bf215546Sopenharmony_ci   ${type}${width}_t z;
bf215546Sopenharmony_ci   ${type}${width}_t w;
bf215546Sopenharmony_ci   ${type}${width}_t e;
bf215546Sopenharmony_ci   ${type}${width}_t f;
bf215546Sopenharmony_ci   ${type}${width}_t g;
bf215546Sopenharmony_ci   ${type}${width}_t h;
bf215546Sopenharmony_ci   ${type}${width}_t i;
bf215546Sopenharmony_ci   ${type}${width}_t j;
bf215546Sopenharmony_ci   ${type}${width}_t k;
bf215546Sopenharmony_ci   ${type}${width}_t l;
bf215546Sopenharmony_ci   ${type}${width}_t m;
bf215546Sopenharmony_ci   ${type}${width}_t n;
bf215546Sopenharmony_ci   ${type}${width}_t o;
bf215546Sopenharmony_ci   ${type}${width}_t p;
bf215546Sopenharmony_ci};
bf215546Sopenharmony_ci% endfor
bf215546Sopenharmony_ci% endfor
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci<%def name="evaluate_op(op, bit_size, execution_mode)">
bf215546Sopenharmony_ci   <%
bf215546Sopenharmony_ci   output_type = type_add_size(op.output_type, bit_size)
bf215546Sopenharmony_ci   input_types = [type_add_size(type_, bit_size) for type_ in op.input_types]
bf215546Sopenharmony_ci   %>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   ## For each non-per-component input, create a variable srcN that
bf215546Sopenharmony_ci   ## contains x, y, z, and w elements which are filled in with the
bf215546Sopenharmony_ci   ## appropriately-typed values.
bf215546Sopenharmony_ci   % for j in range(op.num_inputs):
bf215546Sopenharmony_ci      % if op.input_sizes[j] == 0:
bf215546Sopenharmony_ci         <% continue %>
bf215546Sopenharmony_ci      % elif "src" + str(j) not in op.const_expr:
bf215546Sopenharmony_ci         ## Avoid unused variable warnings
bf215546Sopenharmony_ci         <% continue %>
bf215546Sopenharmony_ci      %endif
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci      const struct ${input_types[j]}_vec src${j} = {
bf215546Sopenharmony_ci      % for k in range(op.input_sizes[j]):
bf215546Sopenharmony_ci         % if input_types[j] == "int1":
bf215546Sopenharmony_ci             /* 1-bit integers use a 0/-1 convention */
bf215546Sopenharmony_ci             -(int1_t)_src[${j}][${k}].b,
bf215546Sopenharmony_ci         % elif input_types[j] == "float16":
bf215546Sopenharmony_ci            _mesa_half_to_float(_src[${j}][${k}].u16),
bf215546Sopenharmony_ci         % else:
bf215546Sopenharmony_ci            _src[${j}][${k}].${get_const_field(input_types[j])},
bf215546Sopenharmony_ci         % endif
bf215546Sopenharmony_ci      % endfor
bf215546Sopenharmony_ci      % for k in range(op.input_sizes[j], 16):
bf215546Sopenharmony_ci         0,
bf215546Sopenharmony_ci      % endfor
bf215546Sopenharmony_ci      };
bf215546Sopenharmony_ci   % endfor
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci   % if op.output_size == 0:
bf215546Sopenharmony_ci      ## For per-component instructions, we need to iterate over the
bf215546Sopenharmony_ci      ## components and apply the constant expression one component
bf215546Sopenharmony_ci      ## at a time.
bf215546Sopenharmony_ci      for (unsigned _i = 0; _i < num_components; _i++) {
bf215546Sopenharmony_ci         ## For each per-component input, create a variable srcN that
bf215546Sopenharmony_ci         ## contains the value of the current (_i'th) component.
bf215546Sopenharmony_ci         % for j in range(op.num_inputs):
bf215546Sopenharmony_ci            % if op.input_sizes[j] != 0:
bf215546Sopenharmony_ci               <% continue %>
bf215546Sopenharmony_ci            % elif "src" + str(j) not in op.const_expr:
bf215546Sopenharmony_ci               ## Avoid unused variable warnings
bf215546Sopenharmony_ci               <% continue %>
bf215546Sopenharmony_ci            % elif input_types[j] == "int1":
bf215546Sopenharmony_ci               /* 1-bit integers use a 0/-1 convention */
bf215546Sopenharmony_ci               const int1_t src${j} = -(int1_t)_src[${j}][_i].b;
bf215546Sopenharmony_ci            % elif input_types[j] == "float16":
bf215546Sopenharmony_ci               const float src${j} =
bf215546Sopenharmony_ci                  _mesa_half_to_float(_src[${j}][_i].u16);
bf215546Sopenharmony_ci            % else:
bf215546Sopenharmony_ci               const ${input_types[j]}_t src${j} =
bf215546Sopenharmony_ci                  _src[${j}][_i].${get_const_field(input_types[j])};
bf215546Sopenharmony_ci            % endif
bf215546Sopenharmony_ci         % endfor
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci         ## Create an appropriately-typed variable dst and assign the
bf215546Sopenharmony_ci         ## result of the const_expr to it.  If const_expr already contains
bf215546Sopenharmony_ci         ## writes to dst, just include const_expr directly.
bf215546Sopenharmony_ci         % if "dst" in op.const_expr:
bf215546Sopenharmony_ci            ${output_type}_t dst;
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci            ${op.const_expr}
bf215546Sopenharmony_ci         % else:
bf215546Sopenharmony_ci            ${output_type}_t dst = ${op.const_expr};
bf215546Sopenharmony_ci         % endif
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci         ## Store the current component of the actual destination to the
bf215546Sopenharmony_ci         ## value of dst.
bf215546Sopenharmony_ci         % if output_type == "int1" or output_type == "uint1":
bf215546Sopenharmony_ci            /* 1-bit integers get truncated */
bf215546Sopenharmony_ci            _dst_val[_i].b = dst & 1;
bf215546Sopenharmony_ci         % elif output_type.startswith("bool"):
bf215546Sopenharmony_ci            ## Sanitize the C value to a proper NIR 0/-1 bool
bf215546Sopenharmony_ci            _dst_val[_i].${get_const_field(output_type)} = -(int)dst;
bf215546Sopenharmony_ci         % elif output_type == "float16":
bf215546Sopenharmony_ci            if (nir_is_rounding_mode_rtz(execution_mode, 16)) {
bf215546Sopenharmony_ci               _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst);
bf215546Sopenharmony_ci            } else {
bf215546Sopenharmony_ci               _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst);
bf215546Sopenharmony_ci            }
bf215546Sopenharmony_ci         % else:
bf215546Sopenharmony_ci            _dst_val[_i].${get_const_field(output_type)} = dst;
bf215546Sopenharmony_ci         % endif
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci         % if op.name != "fquantize2f16" and type_base_type(output_type) == "float":
bf215546Sopenharmony_ci            % if type_has_size(output_type):
bf215546Sopenharmony_ci               if (nir_is_denorm_flush_to_zero(execution_mode, ${type_size(output_type)})) {
bf215546Sopenharmony_ci                  constant_denorm_flush_to_zero(&_dst_val[_i], ${type_size(output_type)});
bf215546Sopenharmony_ci               }
bf215546Sopenharmony_ci            % else:
bf215546Sopenharmony_ci               if (nir_is_denorm_flush_to_zero(execution_mode, ${bit_size})) {
bf215546Sopenharmony_ci                  constant_denorm_flush_to_zero(&_dst_val[i], bit_size);
bf215546Sopenharmony_ci               }
bf215546Sopenharmony_ci            %endif
bf215546Sopenharmony_ci         % endif
bf215546Sopenharmony_ci      }
bf215546Sopenharmony_ci   % else:
bf215546Sopenharmony_ci      ## In the non-per-component case, create a struct dst with
bf215546Sopenharmony_ci      ## appropriately-typed elements x, y, z, and w and assign the result
bf215546Sopenharmony_ci      ## of the const_expr to all components of dst, or include the
bf215546Sopenharmony_ci      ## const_expr directly if it writes to dst already.
bf215546Sopenharmony_ci      struct ${output_type}_vec dst;
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci      % if "dst" in op.const_expr:
bf215546Sopenharmony_ci         ${op.const_expr}
bf215546Sopenharmony_ci      % else:
bf215546Sopenharmony_ci         ## Splat the value to all components.  This way expressions which
bf215546Sopenharmony_ci         ## write the same value to all components don't need to explicitly
bf215546Sopenharmony_ci         ## write to dest.
bf215546Sopenharmony_ci         dst.x = dst.y = dst.z = dst.w = ${op.const_expr};
bf215546Sopenharmony_ci      % endif
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci      ## For each component in the destination, copy the value of dst to
bf215546Sopenharmony_ci      ## the actual destination.
bf215546Sopenharmony_ci      % for k in range(op.output_size):
bf215546Sopenharmony_ci         % if output_type == "int1" or output_type == "uint1":
bf215546Sopenharmony_ci            /* 1-bit integers get truncated */
bf215546Sopenharmony_ci            _dst_val[${k}].b = dst.${"xyzwefghijklmnop"[k]} & 1;
bf215546Sopenharmony_ci         % elif output_type.startswith("bool"):
bf215546Sopenharmony_ci            ## Sanitize the C value to a proper NIR 0/-1 bool
bf215546Sopenharmony_ci            _dst_val[${k}].${get_const_field(output_type)} = -(int)dst.${"xyzwefghijklmnop"[k]};
bf215546Sopenharmony_ci         % elif output_type == "float16":
bf215546Sopenharmony_ci            if (nir_is_rounding_mode_rtz(execution_mode, 16)) {
bf215546Sopenharmony_ci               _dst_val[${k}].u16 = _mesa_float_to_float16_rtz(dst.${"xyzwefghijklmnop"[k]});
bf215546Sopenharmony_ci            } else {
bf215546Sopenharmony_ci               _dst_val[${k}].u16 = _mesa_float_to_float16_rtne(dst.${"xyzwefghijklmnop"[k]});
bf215546Sopenharmony_ci            }
bf215546Sopenharmony_ci         % else:
bf215546Sopenharmony_ci            _dst_val[${k}].${get_const_field(output_type)} = dst.${"xyzwefghijklmnop"[k]};
bf215546Sopenharmony_ci         % endif
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci         % if op.name != "fquantize2f16" and type_base_type(output_type) == "float":
bf215546Sopenharmony_ci            % if type_has_size(output_type):
bf215546Sopenharmony_ci               if (nir_is_denorm_flush_to_zero(execution_mode, ${type_size(output_type)})) {
bf215546Sopenharmony_ci                  constant_denorm_flush_to_zero(&_dst_val[${k}], ${type_size(output_type)});
bf215546Sopenharmony_ci               }
bf215546Sopenharmony_ci            % else:
bf215546Sopenharmony_ci               if (nir_is_denorm_flush_to_zero(execution_mode, ${bit_size})) {
bf215546Sopenharmony_ci                  constant_denorm_flush_to_zero(&_dst_val[${k}], bit_size);
bf215546Sopenharmony_ci               }
bf215546Sopenharmony_ci            % endif
bf215546Sopenharmony_ci         % endif
bf215546Sopenharmony_ci      % endfor
bf215546Sopenharmony_ci   % endif
bf215546Sopenharmony_ci</%def>
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci% for name, op in sorted(opcodes.items()):
bf215546Sopenharmony_ci% if op.name == "fsat":
bf215546Sopenharmony_ci#if defined(_MSC_VER) && (defined(_M_ARM64) || defined(_M_ARM64EC))
bf215546Sopenharmony_ci#pragma optimize("", off) /* Temporary work-around for MSVC compiler bug, present in VS2019 16.9.2 */
bf215546Sopenharmony_ci#endif
bf215546Sopenharmony_ci% endif
bf215546Sopenharmony_cistatic void
bf215546Sopenharmony_cievaluate_${name}(nir_const_value *_dst_val,
bf215546Sopenharmony_ci                 UNUSED unsigned num_components,
bf215546Sopenharmony_ci                 ${"UNUSED" if op_bit_sizes(op) is None else ""} unsigned bit_size,
bf215546Sopenharmony_ci                 UNUSED nir_const_value **_src,
bf215546Sopenharmony_ci                 UNUSED unsigned execution_mode)
bf215546Sopenharmony_ci{
bf215546Sopenharmony_ci   % if op_bit_sizes(op) is not None:
bf215546Sopenharmony_ci      switch (bit_size) {
bf215546Sopenharmony_ci      % for bit_size in op_bit_sizes(op):
bf215546Sopenharmony_ci      case ${bit_size}: {
bf215546Sopenharmony_ci         ${evaluate_op(op, bit_size, execution_mode)}
bf215546Sopenharmony_ci         break;
bf215546Sopenharmony_ci      }
bf215546Sopenharmony_ci      % endfor
bf215546Sopenharmony_ci
bf215546Sopenharmony_ci      default:
bf215546Sopenharmony_ci         unreachable("unknown bit width");
bf215546Sopenharmony_ci      }
bf215546Sopenharmony_ci   % else:
bf215546Sopenharmony_ci      ${evaluate_op(op, 0, execution_mode)}
bf215546Sopenharmony_ci   % endif
bf215546Sopenharmony_ci}
bf215546Sopenharmony_ci% if op.name == "fsat":
bf215546Sopenharmony_ci#if defined(_MSC_VER) && (defined(_M_ARM64) || defined(_M_ARM64EC))
bf215546Sopenharmony_ci#pragma optimize("", on) /* Temporary work-around for MSVC compiler bug, present in VS2019 16.9.2 */
bf215546Sopenharmony_ci#endif
bf215546Sopenharmony_ci% endif
bf215546Sopenharmony_ci% endfor
bf215546Sopenharmony_ci
bf215546Sopenharmony_civoid
bf215546Sopenharmony_cinir_eval_const_opcode(nir_op op, nir_const_value *dest,
bf215546Sopenharmony_ci                      unsigned num_components, unsigned bit_width,
bf215546Sopenharmony_ci                      nir_const_value **src,
bf215546Sopenharmony_ci                      unsigned float_controls_execution_mode)
bf215546Sopenharmony_ci{
bf215546Sopenharmony_ci   switch (op) {
bf215546Sopenharmony_ci% for name in sorted(opcodes.keys()):
bf215546Sopenharmony_ci   case nir_op_${name}:
bf215546Sopenharmony_ci      evaluate_${name}(dest, num_components, bit_width, src, float_controls_execution_mode);
bf215546Sopenharmony_ci      return;
bf215546Sopenharmony_ci% endfor
bf215546Sopenharmony_ci   default:
bf215546Sopenharmony_ci      unreachable("shouldn't get here");
bf215546Sopenharmony_ci   }
bf215546Sopenharmony_ci}"""
bf215546Sopenharmony_ci
bf215546Sopenharmony_cifrom mako.template import Template
bf215546Sopenharmony_ci
bf215546Sopenharmony_ciprint(Template(template).render(opcodes=opcodes, type_sizes=type_sizes,
bf215546Sopenharmony_ci                                type_base_type=type_base_type,
bf215546Sopenharmony_ci                                type_size=type_size,
bf215546Sopenharmony_ci                                type_has_size=type_has_size,
bf215546Sopenharmony_ci                                type_add_size=type_add_size,
bf215546Sopenharmony_ci                                op_bit_sizes=op_bit_sizes,
bf215546Sopenharmony_ci                                get_const_field=get_const_field))