1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright (C) 2021 Collabora Ltd.
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21bf215546Sopenharmony_ci * SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ci#include "va_compiler.h"
25bf215546Sopenharmony_ci#include "valhall.h"
26bf215546Sopenharmony_ci#include "bi_builder.h"
27bf215546Sopenharmony_ci
28bf215546Sopenharmony_ci/* Only some special immediates are available, as specified in the Table of
29bf215546Sopenharmony_ci * Immediates in the specification. Other immediates must be lowered, either to
30bf215546Sopenharmony_ci * uniforms or to moves.
31bf215546Sopenharmony_ci */
32bf215546Sopenharmony_ci
33bf215546Sopenharmony_cistatic bi_index
34bf215546Sopenharmony_civa_mov_imm(bi_builder *b, uint32_t imm)
35bf215546Sopenharmony_ci{
36bf215546Sopenharmony_ci   bi_index zero = bi_fau(BIR_FAU_IMMEDIATE | 0, false);
37bf215546Sopenharmony_ci   return bi_iadd_imm_i32(b, zero, imm);
38bf215546Sopenharmony_ci}
39bf215546Sopenharmony_ci
40bf215546Sopenharmony_cistatic bi_index
41bf215546Sopenharmony_civa_lut_index_32(uint32_t imm)
42bf215546Sopenharmony_ci{
43bf215546Sopenharmony_ci   for (unsigned i = 0; i < ARRAY_SIZE(valhall_immediates); ++i) {
44bf215546Sopenharmony_ci      if (valhall_immediates[i] == imm)
45bf215546Sopenharmony_ci         return va_lut(i);
46bf215546Sopenharmony_ci   }
47bf215546Sopenharmony_ci
48bf215546Sopenharmony_ci   return bi_null();
49bf215546Sopenharmony_ci}
50bf215546Sopenharmony_ci
51bf215546Sopenharmony_cistatic bi_index
52bf215546Sopenharmony_civa_lut_index_16(uint16_t imm)
53bf215546Sopenharmony_ci{
54bf215546Sopenharmony_ci   uint16_t *arr16 = (uint16_t *) valhall_immediates;
55bf215546Sopenharmony_ci
56bf215546Sopenharmony_ci   for (unsigned i = 0; i < (2 * ARRAY_SIZE(valhall_immediates)); ++i) {
57bf215546Sopenharmony_ci      if (arr16[i] == imm)
58bf215546Sopenharmony_ci         return bi_half(va_lut(i >> 1), i & 1);
59bf215546Sopenharmony_ci   }
60bf215546Sopenharmony_ci
61bf215546Sopenharmony_ci   return bi_null();
62bf215546Sopenharmony_ci}
63bf215546Sopenharmony_ci
64bf215546Sopenharmony_ciUNUSED static bi_index
65bf215546Sopenharmony_civa_lut_index_8(uint8_t imm)
66bf215546Sopenharmony_ci{
67bf215546Sopenharmony_ci   uint8_t *arr8 = (uint8_t *) valhall_immediates;
68bf215546Sopenharmony_ci
69bf215546Sopenharmony_ci   for (unsigned i = 0; i < (4 * ARRAY_SIZE(valhall_immediates)); ++i) {
70bf215546Sopenharmony_ci      if (arr8[i] == imm)
71bf215546Sopenharmony_ci         return bi_byte(va_lut(i >> 2), i & 3);
72bf215546Sopenharmony_ci   }
73bf215546Sopenharmony_ci
74bf215546Sopenharmony_ci   return bi_null();
75bf215546Sopenharmony_ci}
76bf215546Sopenharmony_ci
77bf215546Sopenharmony_cistatic bi_index
78bf215546Sopenharmony_civa_demote_constant_fp16(uint32_t value)
79bf215546Sopenharmony_ci{
80bf215546Sopenharmony_ci   uint16_t fp16 = _mesa_float_to_half(uif(value));
81bf215546Sopenharmony_ci
82bf215546Sopenharmony_ci   /* Only convert if it is exact */
83bf215546Sopenharmony_ci   if (fui(_mesa_half_to_float(fp16)) == value)
84bf215546Sopenharmony_ci      return va_lut_index_16(fp16);
85bf215546Sopenharmony_ci   else
86bf215546Sopenharmony_ci      return bi_null();
87bf215546Sopenharmony_ci}
88bf215546Sopenharmony_ci
89bf215546Sopenharmony_ci/*
90bf215546Sopenharmony_ci * Test if a 32-bit word arises as a sign or zero extension of some 8/16-bit
91bf215546Sopenharmony_ci * value.
92bf215546Sopenharmony_ci */
93bf215546Sopenharmony_cistatic bool
94bf215546Sopenharmony_ciis_extension_of_8(uint32_t x, bool is_signed)
95bf215546Sopenharmony_ci{
96bf215546Sopenharmony_ci   if (is_signed)
97bf215546Sopenharmony_ci      return (x <= INT8_MAX) || ((x >> 7) == BITFIELD_MASK(24 + 1));
98bf215546Sopenharmony_ci   else
99bf215546Sopenharmony_ci      return (x <= UINT8_MAX);
100bf215546Sopenharmony_ci}
101bf215546Sopenharmony_ci
102bf215546Sopenharmony_cistatic bool
103bf215546Sopenharmony_ciis_extension_of_16(uint32_t x, bool is_signed)
104bf215546Sopenharmony_ci{
105bf215546Sopenharmony_ci   if (is_signed)
106bf215546Sopenharmony_ci      return (x <= INT16_MAX) || ((x >> 15) == BITFIELD_MASK(16 + 1));
107bf215546Sopenharmony_ci   else
108bf215546Sopenharmony_ci      return (x <= UINT16_MAX);
109bf215546Sopenharmony_ci}
110bf215546Sopenharmony_ci
111bf215546Sopenharmony_cistatic bi_index
112bf215546Sopenharmony_civa_resolve_constant(bi_builder *b, uint32_t value, struct va_src_info info, bool is_signed, bool staging)
113bf215546Sopenharmony_ci{
114bf215546Sopenharmony_ci   /* Try the constant as-is */
115bf215546Sopenharmony_ci   if (!staging) {
116bf215546Sopenharmony_ci      bi_index lut = va_lut_index_32(value);
117bf215546Sopenharmony_ci      if (!bi_is_null(lut)) return lut;
118bf215546Sopenharmony_ci
119bf215546Sopenharmony_ci      /* ...or negated as a FP32 constant */
120bf215546Sopenharmony_ci      if (info.absneg && info.size == VA_SIZE_32) {
121bf215546Sopenharmony_ci         lut = bi_neg(va_lut_index_32(fui(-uif(value))));
122bf215546Sopenharmony_ci         if (!bi_is_null(lut)) return lut;
123bf215546Sopenharmony_ci      }
124bf215546Sopenharmony_ci
125bf215546Sopenharmony_ci      /* ...or negated as a FP16 constant */
126bf215546Sopenharmony_ci      if (info.absneg && info.size == VA_SIZE_16) {
127bf215546Sopenharmony_ci         lut = bi_neg(va_lut_index_32(value ^ 0x80008000));
128bf215546Sopenharmony_ci         if (!bi_is_null(lut)) return lut;
129bf215546Sopenharmony_ci      }
130bf215546Sopenharmony_ci   }
131bf215546Sopenharmony_ci
132bf215546Sopenharmony_ci   /* Try using a single half of a FP16 constant */
133bf215546Sopenharmony_ci   bool replicated_halves = (value & 0xFFFF) == (value >> 16);
134bf215546Sopenharmony_ci   if (!staging && info.swizzle && info.size == VA_SIZE_16 && replicated_halves) {
135bf215546Sopenharmony_ci      bi_index lut = va_lut_index_16(value & 0xFFFF);
136bf215546Sopenharmony_ci      if (!bi_is_null(lut)) return lut;
137bf215546Sopenharmony_ci
138bf215546Sopenharmony_ci      /* ...possibly negated */
139bf215546Sopenharmony_ci      if (info.absneg) {
140bf215546Sopenharmony_ci         lut = bi_neg(va_lut_index_16((value & 0xFFFF) ^ 0x8000));
141bf215546Sopenharmony_ci         if (!bi_is_null(lut)) return lut;
142bf215546Sopenharmony_ci      }
143bf215546Sopenharmony_ci   }
144bf215546Sopenharmony_ci
145bf215546Sopenharmony_ci   /* Try extending a byte */
146bf215546Sopenharmony_ci   if (!staging && (info.widen || info.lanes) &&
147bf215546Sopenharmony_ci       is_extension_of_8(value, is_signed)) {
148bf215546Sopenharmony_ci
149bf215546Sopenharmony_ci      bi_index lut = va_lut_index_8(value & 0xFF);
150bf215546Sopenharmony_ci      if (!bi_is_null(lut)) return lut;
151bf215546Sopenharmony_ci   }
152bf215546Sopenharmony_ci
153bf215546Sopenharmony_ci   /* Try extending a halfword */
154bf215546Sopenharmony_ci   if (!staging && info.widen &&
155bf215546Sopenharmony_ci       is_extension_of_16(value, is_signed)) {
156bf215546Sopenharmony_ci
157bf215546Sopenharmony_ci      bi_index lut = va_lut_index_16(value & 0xFFFF);
158bf215546Sopenharmony_ci      if (!bi_is_null(lut)) return lut;
159bf215546Sopenharmony_ci   }
160bf215546Sopenharmony_ci
161bf215546Sopenharmony_ci   /* Try demoting the constant to FP16 */
162bf215546Sopenharmony_ci   if (!staging && info.swizzle && info.size == VA_SIZE_32) {
163bf215546Sopenharmony_ci      bi_index lut = va_demote_constant_fp16(value);
164bf215546Sopenharmony_ci      if (!bi_is_null(lut)) return lut;
165bf215546Sopenharmony_ci
166bf215546Sopenharmony_ci      if (info.absneg) {
167bf215546Sopenharmony_ci         bi_index lut = bi_neg(va_demote_constant_fp16(fui(-uif(value))));
168bf215546Sopenharmony_ci         if (!bi_is_null(lut)) return lut;
169bf215546Sopenharmony_ci      }
170bf215546Sopenharmony_ci   }
171bf215546Sopenharmony_ci
172bf215546Sopenharmony_ci   /* TODO: Optimize to uniform */
173bf215546Sopenharmony_ci   return va_mov_imm(b, value);
174bf215546Sopenharmony_ci}
175bf215546Sopenharmony_ci
176bf215546Sopenharmony_civoid
177bf215546Sopenharmony_civa_lower_constants(bi_context *ctx, bi_instr *I)
178bf215546Sopenharmony_ci{
179bf215546Sopenharmony_ci   bi_builder b = bi_init_builder(ctx, bi_before_instr(I));
180bf215546Sopenharmony_ci
181bf215546Sopenharmony_ci   bi_foreach_src(I, s) {
182bf215546Sopenharmony_ci      if (I->src[s].type == BI_INDEX_CONSTANT) {
183bf215546Sopenharmony_ci         /* abs(#c) is pointless, but -#c occurs in transcendental sequences */
184bf215546Sopenharmony_ci         assert(!I->src[s].abs && "redundant .abs modifier");
185bf215546Sopenharmony_ci
186bf215546Sopenharmony_ci         bool is_signed = valhall_opcodes[I->op].is_signed;
187bf215546Sopenharmony_ci         bool staging = (s < valhall_opcodes[I->op].nr_staging_srcs);
188bf215546Sopenharmony_ci         struct va_src_info info = va_src_info(I->op, s);
189bf215546Sopenharmony_ci         uint32_t value = I->src[s].value;
190bf215546Sopenharmony_ci         enum bi_swizzle swz = I->src[s].swizzle;
191bf215546Sopenharmony_ci
192bf215546Sopenharmony_ci         /* Resolve any swizzle, keeping in mind the different interpretations
193bf215546Sopenharmony_ci          * swizzles in different contexts.
194bf215546Sopenharmony_ci          */
195bf215546Sopenharmony_ci         if (info.size == VA_SIZE_32) {
196bf215546Sopenharmony_ci            /* Extracting a half from the 32-bit value */
197bf215546Sopenharmony_ci            if (swz == BI_SWIZZLE_H00)
198bf215546Sopenharmony_ci               value = (value & 0xFFFF);
199bf215546Sopenharmony_ci            else if (swz == BI_SWIZZLE_H11)
200bf215546Sopenharmony_ci               value = (value >> 16);
201bf215546Sopenharmony_ci            else
202bf215546Sopenharmony_ci               assert(swz == BI_SWIZZLE_H01);
203bf215546Sopenharmony_ci
204bf215546Sopenharmony_ci            /* FP16 -> FP32 */
205bf215546Sopenharmony_ci            if (info.swizzle && swz != BI_SWIZZLE_H01)
206bf215546Sopenharmony_ci               value = fui(_mesa_half_to_float(value));
207bf215546Sopenharmony_ci         } else if (info.size == VA_SIZE_16) {
208bf215546Sopenharmony_ci            assert(swz >= BI_SWIZZLE_H00 && swz <= BI_SWIZZLE_H11);
209bf215546Sopenharmony_ci            value = bi_apply_swizzle(value, swz);
210bf215546Sopenharmony_ci         } else if (info.size == VA_SIZE_8 && info.lanes) {
211bf215546Sopenharmony_ci            /* 8-bit extract */
212bf215546Sopenharmony_ci            unsigned chan = (swz - BI_SWIZZLE_B0000);
213bf215546Sopenharmony_ci            assert(chan < 4);
214bf215546Sopenharmony_ci
215bf215546Sopenharmony_ci            value = (value >> (8 * chan)) & 0xFF;
216bf215546Sopenharmony_ci         } else {
217bf215546Sopenharmony_ci            /* TODO: Any other special handling? */
218bf215546Sopenharmony_ci            value = bi_apply_swizzle(value, swz);
219bf215546Sopenharmony_ci         }
220bf215546Sopenharmony_ci
221bf215546Sopenharmony_ci         bi_index cons = va_resolve_constant(&b, value, info, is_signed, staging);
222bf215546Sopenharmony_ci         cons.neg ^= I->src[s].neg;
223bf215546Sopenharmony_ci         I->src[s] = cons;
224bf215546Sopenharmony_ci      }
225bf215546Sopenharmony_ci   }
226bf215546Sopenharmony_ci}
227