1/*
2 * Copyright (C) 2021 Collabora Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24#include "va_compiler.h"
25#include "valhall.h"
26#include "bi_builder.h"
27
28/* Only some special immediates are available, as specified in the Table of
29 * Immediates in the specification. Other immediates must be lowered, either to
30 * uniforms or to moves.
31 */
32
33static bi_index
34va_mov_imm(bi_builder *b, uint32_t imm)
35{
36   bi_index zero = bi_fau(BIR_FAU_IMMEDIATE | 0, false);
37   return bi_iadd_imm_i32(b, zero, imm);
38}
39
40static bi_index
41va_lut_index_32(uint32_t imm)
42{
43   for (unsigned i = 0; i < ARRAY_SIZE(valhall_immediates); ++i) {
44      if (valhall_immediates[i] == imm)
45         return va_lut(i);
46   }
47
48   return bi_null();
49}
50
51static bi_index
52va_lut_index_16(uint16_t imm)
53{
54   uint16_t *arr16 = (uint16_t *) valhall_immediates;
55
56   for (unsigned i = 0; i < (2 * ARRAY_SIZE(valhall_immediates)); ++i) {
57      if (arr16[i] == imm)
58         return bi_half(va_lut(i >> 1), i & 1);
59   }
60
61   return bi_null();
62}
63
64UNUSED static bi_index
65va_lut_index_8(uint8_t imm)
66{
67   uint8_t *arr8 = (uint8_t *) valhall_immediates;
68
69   for (unsigned i = 0; i < (4 * ARRAY_SIZE(valhall_immediates)); ++i) {
70      if (arr8[i] == imm)
71         return bi_byte(va_lut(i >> 2), i & 3);
72   }
73
74   return bi_null();
75}
76
77static bi_index
78va_demote_constant_fp16(uint32_t value)
79{
80   uint16_t fp16 = _mesa_float_to_half(uif(value));
81
82   /* Only convert if it is exact */
83   if (fui(_mesa_half_to_float(fp16)) == value)
84      return va_lut_index_16(fp16);
85   else
86      return bi_null();
87}
88
89/*
90 * Test if a 32-bit word arises as a sign or zero extension of some 8/16-bit
91 * value.
92 */
93static bool
94is_extension_of_8(uint32_t x, bool is_signed)
95{
96   if (is_signed)
97      return (x <= INT8_MAX) || ((x >> 7) == BITFIELD_MASK(24 + 1));
98   else
99      return (x <= UINT8_MAX);
100}
101
102static bool
103is_extension_of_16(uint32_t x, bool is_signed)
104{
105   if (is_signed)
106      return (x <= INT16_MAX) || ((x >> 15) == BITFIELD_MASK(16 + 1));
107   else
108      return (x <= UINT16_MAX);
109}
110
111static bi_index
112va_resolve_constant(bi_builder *b, uint32_t value, struct va_src_info info, bool is_signed, bool staging)
113{
114   /* Try the constant as-is */
115   if (!staging) {
116      bi_index lut = va_lut_index_32(value);
117      if (!bi_is_null(lut)) return lut;
118
119      /* ...or negated as a FP32 constant */
120      if (info.absneg && info.size == VA_SIZE_32) {
121         lut = bi_neg(va_lut_index_32(fui(-uif(value))));
122         if (!bi_is_null(lut)) return lut;
123      }
124
125      /* ...or negated as a FP16 constant */
126      if (info.absneg && info.size == VA_SIZE_16) {
127         lut = bi_neg(va_lut_index_32(value ^ 0x80008000));
128         if (!bi_is_null(lut)) return lut;
129      }
130   }
131
132   /* Try using a single half of a FP16 constant */
133   bool replicated_halves = (value & 0xFFFF) == (value >> 16);
134   if (!staging && info.swizzle && info.size == VA_SIZE_16 && replicated_halves) {
135      bi_index lut = va_lut_index_16(value & 0xFFFF);
136      if (!bi_is_null(lut)) return lut;
137
138      /* ...possibly negated */
139      if (info.absneg) {
140         lut = bi_neg(va_lut_index_16((value & 0xFFFF) ^ 0x8000));
141         if (!bi_is_null(lut)) return lut;
142      }
143   }
144
145   /* Try extending a byte */
146   if (!staging && (info.widen || info.lanes) &&
147       is_extension_of_8(value, is_signed)) {
148
149      bi_index lut = va_lut_index_8(value & 0xFF);
150      if (!bi_is_null(lut)) return lut;
151   }
152
153   /* Try extending a halfword */
154   if (!staging && info.widen &&
155       is_extension_of_16(value, is_signed)) {
156
157      bi_index lut = va_lut_index_16(value & 0xFFFF);
158      if (!bi_is_null(lut)) return lut;
159   }
160
161   /* Try demoting the constant to FP16 */
162   if (!staging && info.swizzle && info.size == VA_SIZE_32) {
163      bi_index lut = va_demote_constant_fp16(value);
164      if (!bi_is_null(lut)) return lut;
165
166      if (info.absneg) {
167         bi_index lut = bi_neg(va_demote_constant_fp16(fui(-uif(value))));
168         if (!bi_is_null(lut)) return lut;
169      }
170   }
171
172   /* TODO: Optimize to uniform */
173   return va_mov_imm(b, value);
174}
175
176void
177va_lower_constants(bi_context *ctx, bi_instr *I)
178{
179   bi_builder b = bi_init_builder(ctx, bi_before_instr(I));
180
181   bi_foreach_src(I, s) {
182      if (I->src[s].type == BI_INDEX_CONSTANT) {
183         /* abs(#c) is pointless, but -#c occurs in transcendental sequences */
184         assert(!I->src[s].abs && "redundant .abs modifier");
185
186         bool is_signed = valhall_opcodes[I->op].is_signed;
187         bool staging = (s < valhall_opcodes[I->op].nr_staging_srcs);
188         struct va_src_info info = va_src_info(I->op, s);
189         uint32_t value = I->src[s].value;
190         enum bi_swizzle swz = I->src[s].swizzle;
191
192         /* Resolve any swizzle, keeping in mind the different interpretations
193          * swizzles in different contexts.
194          */
195         if (info.size == VA_SIZE_32) {
196            /* Extracting a half from the 32-bit value */
197            if (swz == BI_SWIZZLE_H00)
198               value = (value & 0xFFFF);
199            else if (swz == BI_SWIZZLE_H11)
200               value = (value >> 16);
201            else
202               assert(swz == BI_SWIZZLE_H01);
203
204            /* FP16 -> FP32 */
205            if (info.swizzle && swz != BI_SWIZZLE_H01)
206               value = fui(_mesa_half_to_float(value));
207         } else if (info.size == VA_SIZE_16) {
208            assert(swz >= BI_SWIZZLE_H00 && swz <= BI_SWIZZLE_H11);
209            value = bi_apply_swizzle(value, swz);
210         } else if (info.size == VA_SIZE_8 && info.lanes) {
211            /* 8-bit extract */
212            unsigned chan = (swz - BI_SWIZZLE_B0000);
213            assert(chan < 4);
214
215            value = (value >> (8 * chan)) & 0xFF;
216         } else {
217            /* TODO: Any other special handling? */
218            value = bi_apply_swizzle(value, swz);
219         }
220
221         bi_index cons = va_resolve_constant(&b, value, info, is_signed, staging);
222         cons.neg ^= I->src[s].neg;
223         I->src[s] = cons;
224      }
225   }
226}
227