1bf215546Sopenharmony_ciimport re
2bf215546Sopenharmony_cifrom nir_opcodes import opcodes
3bf215546Sopenharmony_cifrom nir_opcodes import type_has_size, type_size, type_sizes, type_base_type
4bf215546Sopenharmony_ci
5bf215546Sopenharmony_cidef type_add_size(type_, size):
6bf215546Sopenharmony_ci    if type_has_size(type_):
7bf215546Sopenharmony_ci        return type_
8bf215546Sopenharmony_ci    return type_ + str(size)
9bf215546Sopenharmony_ci
10bf215546Sopenharmony_cidef op_bit_sizes(op):
11bf215546Sopenharmony_ci    sizes = None
12bf215546Sopenharmony_ci    if not type_has_size(op.output_type):
13bf215546Sopenharmony_ci        sizes = set(type_sizes(op.output_type))
14bf215546Sopenharmony_ci
15bf215546Sopenharmony_ci    for input_type in op.input_types:
16bf215546Sopenharmony_ci        if not type_has_size(input_type):
17bf215546Sopenharmony_ci            if sizes is None:
18bf215546Sopenharmony_ci                sizes = set(type_sizes(input_type))
19bf215546Sopenharmony_ci            else:
20bf215546Sopenharmony_ci                sizes = sizes.intersection(set(type_sizes(input_type)))
21bf215546Sopenharmony_ci
22bf215546Sopenharmony_ci    return sorted(list(sizes)) if sizes is not None else None
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_cidef get_const_field(type_):
25bf215546Sopenharmony_ci    if type_size(type_) == 1:
26bf215546Sopenharmony_ci        return 'b'
27bf215546Sopenharmony_ci    elif type_base_type(type_) == 'bool':
28bf215546Sopenharmony_ci        return 'i' + str(type_size(type_))
29bf215546Sopenharmony_ci    elif type_ == "float16":
30bf215546Sopenharmony_ci        return "u16"
31bf215546Sopenharmony_ci    else:
32bf215546Sopenharmony_ci        return type_base_type(type_)[0] + str(type_size(type_))
33bf215546Sopenharmony_ci
34bf215546Sopenharmony_citemplate = """\
35bf215546Sopenharmony_ci/*
36bf215546Sopenharmony_ci * Copyright (C) 2014 Intel Corporation
37bf215546Sopenharmony_ci *
38bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
39bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
40bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
41bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
42bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
43bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
44bf215546Sopenharmony_ci *
45bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
46bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
47bf215546Sopenharmony_ci * Software.
48bf215546Sopenharmony_ci *
49bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
50bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
51bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
52bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
53bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
54bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
55bf215546Sopenharmony_ci * IN THE SOFTWARE.
56bf215546Sopenharmony_ci *
57bf215546Sopenharmony_ci * Authors:
58bf215546Sopenharmony_ci *    Jason Ekstrand (jason@jlekstrand.net)
59bf215546Sopenharmony_ci */
60bf215546Sopenharmony_ci
61bf215546Sopenharmony_ci#include <math.h>
62bf215546Sopenharmony_ci#include "util/rounding.h" /* for _mesa_roundeven */
63bf215546Sopenharmony_ci#include "util/half_float.h"
64bf215546Sopenharmony_ci#include "util/double.h"
65bf215546Sopenharmony_ci#include "util/softfloat.h"
66bf215546Sopenharmony_ci#include "util/bigmath.h"
67bf215546Sopenharmony_ci#include "util/format/format_utils.h"
68bf215546Sopenharmony_ci#include "nir_constant_expressions.h"
69bf215546Sopenharmony_ci
70bf215546Sopenharmony_ci/**
71bf215546Sopenharmony_ci * \brief Checks if the provided value is a denorm and flushes it to zero.
72bf215546Sopenharmony_ci */
73bf215546Sopenharmony_cistatic void
74bf215546Sopenharmony_ciconstant_denorm_flush_to_zero(nir_const_value *value, unsigned bit_size)
75bf215546Sopenharmony_ci{
76bf215546Sopenharmony_ci    switch(bit_size) {
77bf215546Sopenharmony_ci    case 64:
78bf215546Sopenharmony_ci        if (0 == (value->u64 & 0x7ff0000000000000))
79bf215546Sopenharmony_ci            value->u64 &= 0x8000000000000000;
80bf215546Sopenharmony_ci        break;
81bf215546Sopenharmony_ci    case 32:
82bf215546Sopenharmony_ci        if (0 == (value->u32 & 0x7f800000))
83bf215546Sopenharmony_ci            value->u32 &= 0x80000000;
84bf215546Sopenharmony_ci        break;
85bf215546Sopenharmony_ci    case 16:
86bf215546Sopenharmony_ci        if (0 == (value->u16 & 0x7c00))
87bf215546Sopenharmony_ci            value->u16 &= 0x8000;
88bf215546Sopenharmony_ci    }
89bf215546Sopenharmony_ci}
90bf215546Sopenharmony_ci
91bf215546Sopenharmony_ci/**
92bf215546Sopenharmony_ci * Evaluate one component of packSnorm4x8.
93bf215546Sopenharmony_ci */
94bf215546Sopenharmony_cistatic uint8_t
95bf215546Sopenharmony_cipack_snorm_1x8(float x)
96bf215546Sopenharmony_ci{
97bf215546Sopenharmony_ci    /* From section 8.4 of the GLSL 4.30 spec:
98bf215546Sopenharmony_ci     *
99bf215546Sopenharmony_ci     *    packSnorm4x8
100bf215546Sopenharmony_ci     *    ------------
101bf215546Sopenharmony_ci     *    The conversion for component c of v to fixed point is done as
102bf215546Sopenharmony_ci     *    follows:
103bf215546Sopenharmony_ci     *
104bf215546Sopenharmony_ci     *      packSnorm4x8: round(clamp(c, -1, +1) * 127.0)
105bf215546Sopenharmony_ci     *
106bf215546Sopenharmony_ci     * We must first cast the float to an int, because casting a negative
107bf215546Sopenharmony_ci     * float to a uint is undefined.
108bf215546Sopenharmony_ci     */
109bf215546Sopenharmony_ci   return (uint8_t) (int)
110bf215546Sopenharmony_ci          _mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 127.0f);
111bf215546Sopenharmony_ci}
112bf215546Sopenharmony_ci
113bf215546Sopenharmony_ci/**
114bf215546Sopenharmony_ci * Evaluate one component of packSnorm2x16.
115bf215546Sopenharmony_ci */
116bf215546Sopenharmony_cistatic uint16_t
117bf215546Sopenharmony_cipack_snorm_1x16(float x)
118bf215546Sopenharmony_ci{
119bf215546Sopenharmony_ci    /* From section 8.4 of the GLSL ES 3.00 spec:
120bf215546Sopenharmony_ci     *
121bf215546Sopenharmony_ci     *    packSnorm2x16
122bf215546Sopenharmony_ci     *    -------------
123bf215546Sopenharmony_ci     *    The conversion for component c of v to fixed point is done as
124bf215546Sopenharmony_ci     *    follows:
125bf215546Sopenharmony_ci     *
126bf215546Sopenharmony_ci     *      packSnorm2x16: round(clamp(c, -1, +1) * 32767.0)
127bf215546Sopenharmony_ci     *
128bf215546Sopenharmony_ci     * We must first cast the float to an int, because casting a negative
129bf215546Sopenharmony_ci     * float to a uint is undefined.
130bf215546Sopenharmony_ci     */
131bf215546Sopenharmony_ci   return (uint16_t) (int)
132bf215546Sopenharmony_ci          _mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 32767.0f);
133bf215546Sopenharmony_ci}
134bf215546Sopenharmony_ci
135bf215546Sopenharmony_ci/**
136bf215546Sopenharmony_ci * Evaluate one component of unpackSnorm4x8.
137bf215546Sopenharmony_ci */
138bf215546Sopenharmony_cistatic float
139bf215546Sopenharmony_ciunpack_snorm_1x8(uint8_t u)
140bf215546Sopenharmony_ci{
141bf215546Sopenharmony_ci    /* From section 8.4 of the GLSL 4.30 spec:
142bf215546Sopenharmony_ci     *
143bf215546Sopenharmony_ci     *    unpackSnorm4x8
144bf215546Sopenharmony_ci     *    --------------
145bf215546Sopenharmony_ci     *    The conversion for unpacked fixed-point value f to floating point is
146bf215546Sopenharmony_ci     *    done as follows:
147bf215546Sopenharmony_ci     *
148bf215546Sopenharmony_ci     *       unpackSnorm4x8: clamp(f / 127.0, -1, +1)
149bf215546Sopenharmony_ci     */
150bf215546Sopenharmony_ci   return CLAMP((int8_t) u / 127.0f, -1.0f, +1.0f);
151bf215546Sopenharmony_ci}
152bf215546Sopenharmony_ci
153bf215546Sopenharmony_ci/**
154bf215546Sopenharmony_ci * Evaluate one component of unpackSnorm2x16.
155bf215546Sopenharmony_ci */
156bf215546Sopenharmony_cistatic float
157bf215546Sopenharmony_ciunpack_snorm_1x16(uint16_t u)
158bf215546Sopenharmony_ci{
159bf215546Sopenharmony_ci    /* From section 8.4 of the GLSL ES 3.00 spec:
160bf215546Sopenharmony_ci     *
161bf215546Sopenharmony_ci     *    unpackSnorm2x16
162bf215546Sopenharmony_ci     *    ---------------
163bf215546Sopenharmony_ci     *    The conversion for unpacked fixed-point value f to floating point is
164bf215546Sopenharmony_ci     *    done as follows:
165bf215546Sopenharmony_ci     *
166bf215546Sopenharmony_ci     *       unpackSnorm2x16: clamp(f / 32767.0, -1, +1)
167bf215546Sopenharmony_ci     */
168bf215546Sopenharmony_ci   return CLAMP((int16_t) u / 32767.0f, -1.0f, +1.0f);
169bf215546Sopenharmony_ci}
170bf215546Sopenharmony_ci
171bf215546Sopenharmony_ci/**
172bf215546Sopenharmony_ci * Evaluate one component packUnorm4x8.
173bf215546Sopenharmony_ci */
174bf215546Sopenharmony_cistatic uint8_t
175bf215546Sopenharmony_cipack_unorm_1x8(float x)
176bf215546Sopenharmony_ci{
177bf215546Sopenharmony_ci    /* From section 8.4 of the GLSL 4.30 spec:
178bf215546Sopenharmony_ci     *
179bf215546Sopenharmony_ci     *    packUnorm4x8
180bf215546Sopenharmony_ci     *    ------------
181bf215546Sopenharmony_ci     *    The conversion for component c of v to fixed point is done as
182bf215546Sopenharmony_ci     *    follows:
183bf215546Sopenharmony_ci     *
184bf215546Sopenharmony_ci     *       packUnorm4x8: round(clamp(c, 0, +1) * 255.0)
185bf215546Sopenharmony_ci     */
186bf215546Sopenharmony_ci   return (uint8_t) (int)
187bf215546Sopenharmony_ci          _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 255.0f);
188bf215546Sopenharmony_ci}
189bf215546Sopenharmony_ci
190bf215546Sopenharmony_ci/**
191bf215546Sopenharmony_ci * Evaluate one component packUnorm2x16.
192bf215546Sopenharmony_ci */
193bf215546Sopenharmony_cistatic uint16_t
194bf215546Sopenharmony_cipack_unorm_1x16(float x)
195bf215546Sopenharmony_ci{
196bf215546Sopenharmony_ci    /* From section 8.4 of the GLSL ES 3.00 spec:
197bf215546Sopenharmony_ci     *
198bf215546Sopenharmony_ci     *    packUnorm2x16
199bf215546Sopenharmony_ci     *    -------------
200bf215546Sopenharmony_ci     *    The conversion for component c of v to fixed point is done as
201bf215546Sopenharmony_ci     *    follows:
202bf215546Sopenharmony_ci     *
203bf215546Sopenharmony_ci     *       packUnorm2x16: round(clamp(c, 0, +1) * 65535.0)
204bf215546Sopenharmony_ci     */
205bf215546Sopenharmony_ci   return (uint16_t) (int)
206bf215546Sopenharmony_ci          _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 65535.0f);
207bf215546Sopenharmony_ci}
208bf215546Sopenharmony_ci
209bf215546Sopenharmony_ci/**
210bf215546Sopenharmony_ci * Evaluate one component of unpackUnorm4x8.
211bf215546Sopenharmony_ci */
212bf215546Sopenharmony_cistatic float
213bf215546Sopenharmony_ciunpack_unorm_1x8(uint8_t u)
214bf215546Sopenharmony_ci{
215bf215546Sopenharmony_ci    /* From section 8.4 of the GLSL 4.30 spec:
216bf215546Sopenharmony_ci     *
217bf215546Sopenharmony_ci     *    unpackUnorm4x8
218bf215546Sopenharmony_ci     *    --------------
219bf215546Sopenharmony_ci     *    The conversion for unpacked fixed-point value f to floating point is
220bf215546Sopenharmony_ci     *    done as follows:
221bf215546Sopenharmony_ci     *
222bf215546Sopenharmony_ci     *       unpackUnorm4x8: f / 255.0
223bf215546Sopenharmony_ci     */
224bf215546Sopenharmony_ci   return (float) u / 255.0f;
225bf215546Sopenharmony_ci}
226bf215546Sopenharmony_ci
227bf215546Sopenharmony_ci/**
228bf215546Sopenharmony_ci * Evaluate one component of unpackUnorm2x16.
229bf215546Sopenharmony_ci */
230bf215546Sopenharmony_cistatic float
231bf215546Sopenharmony_ciunpack_unorm_1x16(uint16_t u)
232bf215546Sopenharmony_ci{
233bf215546Sopenharmony_ci    /* From section 8.4 of the GLSL ES 3.00 spec:
234bf215546Sopenharmony_ci     *
235bf215546Sopenharmony_ci     *    unpackUnorm2x16
236bf215546Sopenharmony_ci     *    ---------------
237bf215546Sopenharmony_ci     *    The conversion for unpacked fixed-point value f to floating point is
238bf215546Sopenharmony_ci     *    done as follows:
239bf215546Sopenharmony_ci     *
240bf215546Sopenharmony_ci     *       unpackUnorm2x16: f / 65535.0
241bf215546Sopenharmony_ci     */
242bf215546Sopenharmony_ci   return (float) u / 65535.0f;
243bf215546Sopenharmony_ci}
244bf215546Sopenharmony_ci
245bf215546Sopenharmony_ci/**
246bf215546Sopenharmony_ci * Evaluate one component of packHalf2x16.
247bf215546Sopenharmony_ci */
248bf215546Sopenharmony_cistatic uint16_t
249bf215546Sopenharmony_cipack_half_1x16(float x)
250bf215546Sopenharmony_ci{
251bf215546Sopenharmony_ci   return _mesa_float_to_half(x);
252bf215546Sopenharmony_ci}
253bf215546Sopenharmony_ci
254bf215546Sopenharmony_ci/**
255bf215546Sopenharmony_ci * Evaluate one component of unpackHalf2x16.
256bf215546Sopenharmony_ci */
257bf215546Sopenharmony_cistatic float
258bf215546Sopenharmony_ciunpack_half_1x16_flush_to_zero(uint16_t u)
259bf215546Sopenharmony_ci{
260bf215546Sopenharmony_ci   if (0 == (u & 0x7c00))
261bf215546Sopenharmony_ci      u &= 0x8000;
262bf215546Sopenharmony_ci   return _mesa_half_to_float(u);
263bf215546Sopenharmony_ci}
264bf215546Sopenharmony_ci
265bf215546Sopenharmony_ci/**
266bf215546Sopenharmony_ci * Evaluate one component of unpackHalf2x16.
267bf215546Sopenharmony_ci */
268bf215546Sopenharmony_cistatic float
269bf215546Sopenharmony_ciunpack_half_1x16(uint16_t u)
270bf215546Sopenharmony_ci{
271bf215546Sopenharmony_ci   return _mesa_half_to_float(u);
272bf215546Sopenharmony_ci}
273bf215546Sopenharmony_ci
274bf215546Sopenharmony_ci/* Some typed vector structures to make things like src0.y work */
275bf215546Sopenharmony_citypedef int8_t int1_t;
276bf215546Sopenharmony_citypedef uint8_t uint1_t;
277bf215546Sopenharmony_citypedef float float16_t;
278bf215546Sopenharmony_citypedef float float32_t;
279bf215546Sopenharmony_citypedef double float64_t;
280bf215546Sopenharmony_citypedef bool bool1_t;
281bf215546Sopenharmony_citypedef bool bool8_t;
282bf215546Sopenharmony_citypedef bool bool16_t;
283bf215546Sopenharmony_citypedef bool bool32_t;
284bf215546Sopenharmony_citypedef bool bool64_t;
285bf215546Sopenharmony_ci% for type in ["float", "int", "uint", "bool"]:
286bf215546Sopenharmony_ci% for width in type_sizes(type):
287bf215546Sopenharmony_cistruct ${type}${width}_vec {
288bf215546Sopenharmony_ci   ${type}${width}_t x;
289bf215546Sopenharmony_ci   ${type}${width}_t y;
290bf215546Sopenharmony_ci   ${type}${width}_t z;
291bf215546Sopenharmony_ci   ${type}${width}_t w;
292bf215546Sopenharmony_ci   ${type}${width}_t e;
293bf215546Sopenharmony_ci   ${type}${width}_t f;
294bf215546Sopenharmony_ci   ${type}${width}_t g;
295bf215546Sopenharmony_ci   ${type}${width}_t h;
296bf215546Sopenharmony_ci   ${type}${width}_t i;
297bf215546Sopenharmony_ci   ${type}${width}_t j;
298bf215546Sopenharmony_ci   ${type}${width}_t k;
299bf215546Sopenharmony_ci   ${type}${width}_t l;
300bf215546Sopenharmony_ci   ${type}${width}_t m;
301bf215546Sopenharmony_ci   ${type}${width}_t n;
302bf215546Sopenharmony_ci   ${type}${width}_t o;
303bf215546Sopenharmony_ci   ${type}${width}_t p;
304bf215546Sopenharmony_ci};
305bf215546Sopenharmony_ci% endfor
306bf215546Sopenharmony_ci% endfor
307bf215546Sopenharmony_ci
308bf215546Sopenharmony_ci<%def name="evaluate_op(op, bit_size, execution_mode)">
309bf215546Sopenharmony_ci   <%
310bf215546Sopenharmony_ci   output_type = type_add_size(op.output_type, bit_size)
311bf215546Sopenharmony_ci   input_types = [type_add_size(type_, bit_size) for type_ in op.input_types]
312bf215546Sopenharmony_ci   %>
313bf215546Sopenharmony_ci
314bf215546Sopenharmony_ci   ## For each non-per-component input, create a variable srcN that
315bf215546Sopenharmony_ci   ## contains x, y, z, and w elements which are filled in with the
316bf215546Sopenharmony_ci   ## appropriately-typed values.
317bf215546Sopenharmony_ci   % for j in range(op.num_inputs):
318bf215546Sopenharmony_ci      % if op.input_sizes[j] == 0:
319bf215546Sopenharmony_ci         <% continue %>
320bf215546Sopenharmony_ci      % elif "src" + str(j) not in op.const_expr:
321bf215546Sopenharmony_ci         ## Avoid unused variable warnings
322bf215546Sopenharmony_ci         <% continue %>
323bf215546Sopenharmony_ci      %endif
324bf215546Sopenharmony_ci
325bf215546Sopenharmony_ci      const struct ${input_types[j]}_vec src${j} = {
326bf215546Sopenharmony_ci      % for k in range(op.input_sizes[j]):
327bf215546Sopenharmony_ci         % if input_types[j] == "int1":
328bf215546Sopenharmony_ci             /* 1-bit integers use a 0/-1 convention */
329bf215546Sopenharmony_ci             -(int1_t)_src[${j}][${k}].b,
330bf215546Sopenharmony_ci         % elif input_types[j] == "float16":
331bf215546Sopenharmony_ci            _mesa_half_to_float(_src[${j}][${k}].u16),
332bf215546Sopenharmony_ci         % else:
333bf215546Sopenharmony_ci            _src[${j}][${k}].${get_const_field(input_types[j])},
334bf215546Sopenharmony_ci         % endif
335bf215546Sopenharmony_ci      % endfor
336bf215546Sopenharmony_ci      % for k in range(op.input_sizes[j], 16):
337bf215546Sopenharmony_ci         0,
338bf215546Sopenharmony_ci      % endfor
339bf215546Sopenharmony_ci      };
340bf215546Sopenharmony_ci   % endfor
341bf215546Sopenharmony_ci
342bf215546Sopenharmony_ci   % if op.output_size == 0:
343bf215546Sopenharmony_ci      ## For per-component instructions, we need to iterate over the
344bf215546Sopenharmony_ci      ## components and apply the constant expression one component
345bf215546Sopenharmony_ci      ## at a time.
346bf215546Sopenharmony_ci      for (unsigned _i = 0; _i < num_components; _i++) {
347bf215546Sopenharmony_ci         ## For each per-component input, create a variable srcN that
348bf215546Sopenharmony_ci         ## contains the value of the current (_i'th) component.
349bf215546Sopenharmony_ci         % for j in range(op.num_inputs):
350bf215546Sopenharmony_ci            % if op.input_sizes[j] != 0:
351bf215546Sopenharmony_ci               <% continue %>
352bf215546Sopenharmony_ci            % elif "src" + str(j) not in op.const_expr:
353bf215546Sopenharmony_ci               ## Avoid unused variable warnings
354bf215546Sopenharmony_ci               <% continue %>
355bf215546Sopenharmony_ci            % elif input_types[j] == "int1":
356bf215546Sopenharmony_ci               /* 1-bit integers use a 0/-1 convention */
357bf215546Sopenharmony_ci               const int1_t src${j} = -(int1_t)_src[${j}][_i].b;
358bf215546Sopenharmony_ci            % elif input_types[j] == "float16":
359bf215546Sopenharmony_ci               const float src${j} =
360bf215546Sopenharmony_ci                  _mesa_half_to_float(_src[${j}][_i].u16);
361bf215546Sopenharmony_ci            % else:
362bf215546Sopenharmony_ci               const ${input_types[j]}_t src${j} =
363bf215546Sopenharmony_ci                  _src[${j}][_i].${get_const_field(input_types[j])};
364bf215546Sopenharmony_ci            % endif
365bf215546Sopenharmony_ci         % endfor
366bf215546Sopenharmony_ci
367bf215546Sopenharmony_ci         ## Create an appropriately-typed variable dst and assign the
368bf215546Sopenharmony_ci         ## result of the const_expr to it.  If const_expr already contains
369bf215546Sopenharmony_ci         ## writes to dst, just include const_expr directly.
370bf215546Sopenharmony_ci         % if "dst" in op.const_expr:
371bf215546Sopenharmony_ci            ${output_type}_t dst;
372bf215546Sopenharmony_ci
373bf215546Sopenharmony_ci            ${op.const_expr}
374bf215546Sopenharmony_ci         % else:
375bf215546Sopenharmony_ci            ${output_type}_t dst = ${op.const_expr};
376bf215546Sopenharmony_ci         % endif
377bf215546Sopenharmony_ci
378bf215546Sopenharmony_ci         ## Store the current component of the actual destination to the
379bf215546Sopenharmony_ci         ## value of dst.
380bf215546Sopenharmony_ci         % if output_type == "int1" or output_type == "uint1":
381bf215546Sopenharmony_ci            /* 1-bit integers get truncated */
382bf215546Sopenharmony_ci            _dst_val[_i].b = dst & 1;
383bf215546Sopenharmony_ci         % elif output_type.startswith("bool"):
384bf215546Sopenharmony_ci            ## Sanitize the C value to a proper NIR 0/-1 bool
385bf215546Sopenharmony_ci            _dst_val[_i].${get_const_field(output_type)} = -(int)dst;
386bf215546Sopenharmony_ci         % elif output_type == "float16":
387bf215546Sopenharmony_ci            if (nir_is_rounding_mode_rtz(execution_mode, 16)) {
388bf215546Sopenharmony_ci               _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst);
389bf215546Sopenharmony_ci            } else {
390bf215546Sopenharmony_ci               _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst);
391bf215546Sopenharmony_ci            }
392bf215546Sopenharmony_ci         % else:
393bf215546Sopenharmony_ci            _dst_val[_i].${get_const_field(output_type)} = dst;
394bf215546Sopenharmony_ci         % endif
395bf215546Sopenharmony_ci
396bf215546Sopenharmony_ci         % if op.name != "fquantize2f16" and type_base_type(output_type) == "float":
397bf215546Sopenharmony_ci            % if type_has_size(output_type):
398bf215546Sopenharmony_ci               if (nir_is_denorm_flush_to_zero(execution_mode, ${type_size(output_type)})) {
399bf215546Sopenharmony_ci                  constant_denorm_flush_to_zero(&_dst_val[_i], ${type_size(output_type)});
400bf215546Sopenharmony_ci               }
401bf215546Sopenharmony_ci            % else:
402bf215546Sopenharmony_ci               if (nir_is_denorm_flush_to_zero(execution_mode, ${bit_size})) {
403bf215546Sopenharmony_ci                  constant_denorm_flush_to_zero(&_dst_val[i], bit_size);
404bf215546Sopenharmony_ci               }
405bf215546Sopenharmony_ci            %endif
406bf215546Sopenharmony_ci         % endif
407bf215546Sopenharmony_ci      }
408bf215546Sopenharmony_ci   % else:
409bf215546Sopenharmony_ci      ## In the non-per-component case, create a struct dst with
410bf215546Sopenharmony_ci      ## appropriately-typed elements x, y, z, and w and assign the result
411bf215546Sopenharmony_ci      ## of the const_expr to all components of dst, or include the
412bf215546Sopenharmony_ci      ## const_expr directly if it writes to dst already.
413bf215546Sopenharmony_ci      struct ${output_type}_vec dst;
414bf215546Sopenharmony_ci
415bf215546Sopenharmony_ci      % if "dst" in op.const_expr:
416bf215546Sopenharmony_ci         ${op.const_expr}
417bf215546Sopenharmony_ci      % else:
418bf215546Sopenharmony_ci         ## Splat the value to all components.  This way expressions which
419bf215546Sopenharmony_ci         ## write the same value to all components don't need to explicitly
420bf215546Sopenharmony_ci         ## write to dest.
421bf215546Sopenharmony_ci         dst.x = dst.y = dst.z = dst.w = ${op.const_expr};
422bf215546Sopenharmony_ci      % endif
423bf215546Sopenharmony_ci
424bf215546Sopenharmony_ci      ## For each component in the destination, copy the value of dst to
425bf215546Sopenharmony_ci      ## the actual destination.
426bf215546Sopenharmony_ci      % for k in range(op.output_size):
427bf215546Sopenharmony_ci         % if output_type == "int1" or output_type == "uint1":
428bf215546Sopenharmony_ci            /* 1-bit integers get truncated */
429bf215546Sopenharmony_ci            _dst_val[${k}].b = dst.${"xyzwefghijklmnop"[k]} & 1;
430bf215546Sopenharmony_ci         % elif output_type.startswith("bool"):
431bf215546Sopenharmony_ci            ## Sanitize the C value to a proper NIR 0/-1 bool
432bf215546Sopenharmony_ci            _dst_val[${k}].${get_const_field(output_type)} = -(int)dst.${"xyzwefghijklmnop"[k]};
433bf215546Sopenharmony_ci         % elif output_type == "float16":
434bf215546Sopenharmony_ci            if (nir_is_rounding_mode_rtz(execution_mode, 16)) {
435bf215546Sopenharmony_ci               _dst_val[${k}].u16 = _mesa_float_to_float16_rtz(dst.${"xyzwefghijklmnop"[k]});
436bf215546Sopenharmony_ci            } else {
437bf215546Sopenharmony_ci               _dst_val[${k}].u16 = _mesa_float_to_float16_rtne(dst.${"xyzwefghijklmnop"[k]});
438bf215546Sopenharmony_ci            }
439bf215546Sopenharmony_ci         % else:
440bf215546Sopenharmony_ci            _dst_val[${k}].${get_const_field(output_type)} = dst.${"xyzwefghijklmnop"[k]};
441bf215546Sopenharmony_ci         % endif
442bf215546Sopenharmony_ci
443bf215546Sopenharmony_ci         % if op.name != "fquantize2f16" and type_base_type(output_type) == "float":
444bf215546Sopenharmony_ci            % if type_has_size(output_type):
445bf215546Sopenharmony_ci               if (nir_is_denorm_flush_to_zero(execution_mode, ${type_size(output_type)})) {
446bf215546Sopenharmony_ci                  constant_denorm_flush_to_zero(&_dst_val[${k}], ${type_size(output_type)});
447bf215546Sopenharmony_ci               }
448bf215546Sopenharmony_ci            % else:
449bf215546Sopenharmony_ci               if (nir_is_denorm_flush_to_zero(execution_mode, ${bit_size})) {
450bf215546Sopenharmony_ci                  constant_denorm_flush_to_zero(&_dst_val[${k}], bit_size);
451bf215546Sopenharmony_ci               }
452bf215546Sopenharmony_ci            % endif
453bf215546Sopenharmony_ci         % endif
454bf215546Sopenharmony_ci      % endfor
455bf215546Sopenharmony_ci   % endif
456bf215546Sopenharmony_ci</%def>
457bf215546Sopenharmony_ci
458bf215546Sopenharmony_ci% for name, op in sorted(opcodes.items()):
459bf215546Sopenharmony_ci% if op.name == "fsat":
460bf215546Sopenharmony_ci#if defined(_MSC_VER) && (defined(_M_ARM64) || defined(_M_ARM64EC))
461bf215546Sopenharmony_ci#pragma optimize("", off) /* Temporary work-around for MSVC compiler bug, present in VS2019 16.9.2 */
462bf215546Sopenharmony_ci#endif
463bf215546Sopenharmony_ci% endif
464bf215546Sopenharmony_cistatic void
465bf215546Sopenharmony_cievaluate_${name}(nir_const_value *_dst_val,
466bf215546Sopenharmony_ci                 UNUSED unsigned num_components,
467bf215546Sopenharmony_ci                 ${"UNUSED" if op_bit_sizes(op) is None else ""} unsigned bit_size,
468bf215546Sopenharmony_ci                 UNUSED nir_const_value **_src,
469bf215546Sopenharmony_ci                 UNUSED unsigned execution_mode)
470bf215546Sopenharmony_ci{
471bf215546Sopenharmony_ci   % if op_bit_sizes(op) is not None:
472bf215546Sopenharmony_ci      switch (bit_size) {
473bf215546Sopenharmony_ci      % for bit_size in op_bit_sizes(op):
474bf215546Sopenharmony_ci      case ${bit_size}: {
475bf215546Sopenharmony_ci         ${evaluate_op(op, bit_size, execution_mode)}
476bf215546Sopenharmony_ci         break;
477bf215546Sopenharmony_ci      }
478bf215546Sopenharmony_ci      % endfor
479bf215546Sopenharmony_ci
480bf215546Sopenharmony_ci      default:
481bf215546Sopenharmony_ci         unreachable("unknown bit width");
482bf215546Sopenharmony_ci      }
483bf215546Sopenharmony_ci   % else:
484bf215546Sopenharmony_ci      ${evaluate_op(op, 0, execution_mode)}
485bf215546Sopenharmony_ci   % endif
486bf215546Sopenharmony_ci}
487bf215546Sopenharmony_ci% if op.name == "fsat":
488bf215546Sopenharmony_ci#if defined(_MSC_VER) && (defined(_M_ARM64) || defined(_M_ARM64EC))
489bf215546Sopenharmony_ci#pragma optimize("", on) /* Temporary work-around for MSVC compiler bug, present in VS2019 16.9.2 */
490bf215546Sopenharmony_ci#endif
491bf215546Sopenharmony_ci% endif
492bf215546Sopenharmony_ci% endfor
493bf215546Sopenharmony_ci
494bf215546Sopenharmony_civoid
495bf215546Sopenharmony_cinir_eval_const_opcode(nir_op op, nir_const_value *dest,
496bf215546Sopenharmony_ci                      unsigned num_components, unsigned bit_width,
497bf215546Sopenharmony_ci                      nir_const_value **src,
498bf215546Sopenharmony_ci                      unsigned float_controls_execution_mode)
499bf215546Sopenharmony_ci{
500bf215546Sopenharmony_ci   switch (op) {
501bf215546Sopenharmony_ci% for name in sorted(opcodes.keys()):
502bf215546Sopenharmony_ci   case nir_op_${name}:
503bf215546Sopenharmony_ci      evaluate_${name}(dest, num_components, bit_width, src, float_controls_execution_mode);
504bf215546Sopenharmony_ci      return;
505bf215546Sopenharmony_ci% endfor
506bf215546Sopenharmony_ci   default:
507bf215546Sopenharmony_ci      unreachable("shouldn't get here");
508bf215546Sopenharmony_ci   }
509bf215546Sopenharmony_ci}"""
510bf215546Sopenharmony_ci
511bf215546Sopenharmony_cifrom mako.template import Template
512bf215546Sopenharmony_ci
513bf215546Sopenharmony_ciprint(Template(template).render(opcodes=opcodes, type_sizes=type_sizes,
514bf215546Sopenharmony_ci                                type_base_type=type_base_type,
515bf215546Sopenharmony_ci                                type_size=type_size,
516bf215546Sopenharmony_ci                                type_has_size=type_has_size,
517bf215546Sopenharmony_ci                                type_add_size=type_add_size,
518bf215546Sopenharmony_ci                                op_bit_sizes=op_bit_sizes,
519bf215546Sopenharmony_ci                                get_const_field=get_const_field))
520