1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright 2014, 2015 Red Hat.
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * on the rights to use, copy, modify, merge, publish, distribute, sub
8bf215546Sopenharmony_ci * license, and/or sell copies of the Software, and to permit persons to whom
9bf215546Sopenharmony_ci * the Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19bf215546Sopenharmony_ci * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20bf215546Sopenharmony_ci * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21bf215546Sopenharmony_ci * USE OR OTHER DEALINGS IN THE SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ci/* the virgl hw tgsi vs what the current gallium want will diverge over time.
25bf215546Sopenharmony_ci   so add a transform stage to remove things we don't want to send unless
26bf215546Sopenharmony_ci   the receiver supports it.
27bf215546Sopenharmony_ci*/
28bf215546Sopenharmony_ci
29bf215546Sopenharmony_ci#include "tgsi/tgsi_transform.h"
30bf215546Sopenharmony_ci#include "tgsi/tgsi_info.h"
31bf215546Sopenharmony_ci#include "tgsi/tgsi_scan.h"
32bf215546Sopenharmony_ci#include "virgl_context.h"
33bf215546Sopenharmony_ci#include "virgl_screen.h"
34bf215546Sopenharmony_ci
35bf215546Sopenharmony_cistruct virgl_input_temp {
36bf215546Sopenharmony_ci   enum tgsi_file_type file;
37bf215546Sopenharmony_ci
38bf215546Sopenharmony_ci   /* Index within in the INPUT or SV files, or ~0 if no DCL of this input */
39bf215546Sopenharmony_ci   unsigned index;
40bf215546Sopenharmony_ci
41bf215546Sopenharmony_ci   /* TGSI_FILE_TEMPORARY index it will be mapped to. */
42bf215546Sopenharmony_ci   unsigned temp;
43bf215546Sopenharmony_ci
44bf215546Sopenharmony_ci   bool sint;
45bf215546Sopenharmony_ci};
46bf215546Sopenharmony_ci
47bf215546Sopenharmony_cienum virgl_input_temps {
48bf215546Sopenharmony_ci   INPUT_TEMP_LAYER,
49bf215546Sopenharmony_ci   INPUT_TEMP_VIEWPORT_INDEX,
50bf215546Sopenharmony_ci   INPUT_TEMP_BLOCK_ID,
51bf215546Sopenharmony_ci   INPUT_TEMP_HELPER_INVOCATION,
52bf215546Sopenharmony_ci   INPUT_TEMP_COUNT,
53bf215546Sopenharmony_ci};
54bf215546Sopenharmony_ci
55bf215546Sopenharmony_cistruct virgl_transform_context {
56bf215546Sopenharmony_ci   struct tgsi_transform_context base;
57bf215546Sopenharmony_ci   struct tgsi_shader_info info;
58bf215546Sopenharmony_ci
59bf215546Sopenharmony_ci   bool cull_enabled;
60bf215546Sopenharmony_ci   bool has_precise;
61bf215546Sopenharmony_ci   bool fake_fp64;
62bf215546Sopenharmony_ci   bool is_separable;
63bf215546Sopenharmony_ci
64bf215546Sopenharmony_ci   unsigned next_temp;
65bf215546Sopenharmony_ci
66bf215546Sopenharmony_ci   unsigned src_temp;
67bf215546Sopenharmony_ci
68bf215546Sopenharmony_ci   unsigned writemask_fixup_outs[5];
69bf215546Sopenharmony_ci   unsigned writemask_fixup_temps;
70bf215546Sopenharmony_ci   unsigned num_writemask_fixups;
71bf215546Sopenharmony_ci
72bf215546Sopenharmony_ci   struct virgl_input_temp input_temp[INPUT_TEMP_COUNT];
73bf215546Sopenharmony_ci
74bf215546Sopenharmony_ci   uint32_t *precise_flags;
75bf215546Sopenharmony_ci};
76bf215546Sopenharmony_ci
77bf215546Sopenharmony_cistatic void
78bf215546Sopenharmony_civirgl_tgsi_transform_declaration_input_temp(const struct tgsi_full_declaration *decl,
79bf215546Sopenharmony_ci                                            struct virgl_input_temp *input_temp,
80bf215546Sopenharmony_ci                                            enum tgsi_semantic semantic_name)
81bf215546Sopenharmony_ci{
82bf215546Sopenharmony_ci   if (decl->Semantic.Name == semantic_name) {
83bf215546Sopenharmony_ci      input_temp->file = decl->Declaration.File;
84bf215546Sopenharmony_ci      input_temp->index = decl->Range.First;
85bf215546Sopenharmony_ci   }
86bf215546Sopenharmony_ci}
87bf215546Sopenharmony_ci
88bf215546Sopenharmony_cistatic void
89bf215546Sopenharmony_civirgl_tgsi_transform_declaration(struct tgsi_transform_context *ctx,
90bf215546Sopenharmony_ci                                 struct tgsi_full_declaration *decl)
91bf215546Sopenharmony_ci{
92bf215546Sopenharmony_ci   struct virgl_transform_context *vtctx = (struct virgl_transform_context *)ctx;
93bf215546Sopenharmony_ci
94bf215546Sopenharmony_ci   switch (decl->Declaration.File) {
95bf215546Sopenharmony_ci   case TGSI_FILE_CONSTANT:
96bf215546Sopenharmony_ci      if (decl->Declaration.Dimension) {
97bf215546Sopenharmony_ci         if (decl->Dim.Index2D == 0)
98bf215546Sopenharmony_ci            decl->Declaration.Dimension = 0;
99bf215546Sopenharmony_ci      }
100bf215546Sopenharmony_ci      break;
101bf215546Sopenharmony_ci   case TGSI_FILE_INPUT:
102bf215546Sopenharmony_ci      virgl_tgsi_transform_declaration_input_temp(decl, &vtctx->input_temp[INPUT_TEMP_LAYER],
103bf215546Sopenharmony_ci                                                   TGSI_SEMANTIC_LAYER);
104bf215546Sopenharmony_ci      virgl_tgsi_transform_declaration_input_temp(decl, &vtctx->input_temp[INPUT_TEMP_VIEWPORT_INDEX],
105bf215546Sopenharmony_ci                                                   TGSI_SEMANTIC_VIEWPORT_INDEX);
106bf215546Sopenharmony_ci      break;
107bf215546Sopenharmony_ci   case TGSI_FILE_SYSTEM_VALUE:
108bf215546Sopenharmony_ci      virgl_tgsi_transform_declaration_input_temp(decl, &vtctx->input_temp[INPUT_TEMP_BLOCK_ID],
109bf215546Sopenharmony_ci                                                   TGSI_SEMANTIC_BLOCK_ID);
110bf215546Sopenharmony_ci      virgl_tgsi_transform_declaration_input_temp(decl, &vtctx->input_temp[INPUT_TEMP_HELPER_INVOCATION],
111bf215546Sopenharmony_ci                                                   TGSI_SEMANTIC_HELPER_INVOCATION);
112bf215546Sopenharmony_ci      break;
113bf215546Sopenharmony_ci   case TGSI_FILE_OUTPUT:
114bf215546Sopenharmony_ci      switch (decl->Semantic.Name) {
115bf215546Sopenharmony_ci      case TGSI_SEMANTIC_CLIPDIST:
116bf215546Sopenharmony_ci         vtctx->writemask_fixup_outs[vtctx->num_writemask_fixups++] = decl->Range.First;
117bf215546Sopenharmony_ci         if (decl->Range.Last != decl->Range.First)
118bf215546Sopenharmony_ci            vtctx->writemask_fixup_outs[vtctx->num_writemask_fixups++] = decl->Range.Last;
119bf215546Sopenharmony_ci         break;
120bf215546Sopenharmony_ci      case TGSI_SEMANTIC_CLIPVERTEX:
121bf215546Sopenharmony_ci         vtctx->writemask_fixup_outs[vtctx->num_writemask_fixups++] = decl->Range.First;
122bf215546Sopenharmony_ci         break;
123bf215546Sopenharmony_ci      case TGSI_SEMANTIC_COLOR:
124bf215546Sopenharmony_ci         /* Vertex front/backface color output also has issues with writemasking */
125bf215546Sopenharmony_ci         if (vtctx->base.processor != PIPE_SHADER_FRAGMENT)
126bf215546Sopenharmony_ci            vtctx->writemask_fixup_outs[vtctx->num_writemask_fixups++] = decl->Range.First;
127bf215546Sopenharmony_ci         break;
128bf215546Sopenharmony_ci      }
129bf215546Sopenharmony_ci      break;
130bf215546Sopenharmony_ci   case TGSI_FILE_TEMPORARY:
131bf215546Sopenharmony_ci      vtctx->next_temp = MAX2(vtctx->next_temp, decl->Range.Last + 1);
132bf215546Sopenharmony_ci      break;
133bf215546Sopenharmony_ci   default:
134bf215546Sopenharmony_ci      break;
135bf215546Sopenharmony_ci   }
136bf215546Sopenharmony_ci   assert(vtctx->num_writemask_fixups <= ARRAY_SIZE(vtctx->writemask_fixup_outs));
137bf215546Sopenharmony_ci
138bf215546Sopenharmony_ci   ctx->emit_declaration(ctx, decl);
139bf215546Sopenharmony_ci}
140bf215546Sopenharmony_ci
141bf215546Sopenharmony_ci/* for now just strip out the new properties the remote doesn't understand
142bf215546Sopenharmony_ci   yet */
143bf215546Sopenharmony_cistatic void
144bf215546Sopenharmony_civirgl_tgsi_transform_property(struct tgsi_transform_context *ctx,
145bf215546Sopenharmony_ci                              struct tgsi_full_property *prop)
146bf215546Sopenharmony_ci{
147bf215546Sopenharmony_ci   struct virgl_transform_context *vtctx = (struct virgl_transform_context *)ctx;
148bf215546Sopenharmony_ci   switch (prop->Property.PropertyName) {
149bf215546Sopenharmony_ci   case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED:
150bf215546Sopenharmony_ci   case TGSI_PROPERTY_NUM_CULLDIST_ENABLED:
151bf215546Sopenharmony_ci      if (vtctx->cull_enabled)
152bf215546Sopenharmony_ci    ctx->emit_property(ctx, prop);
153bf215546Sopenharmony_ci      break;
154bf215546Sopenharmony_ci   case TGSI_PROPERTY_NEXT_SHADER:
155bf215546Sopenharmony_ci      break;
156bf215546Sopenharmony_ci   default:
157bf215546Sopenharmony_ci      ctx->emit_property(ctx, prop);
158bf215546Sopenharmony_ci      break;
159bf215546Sopenharmony_ci   }
160bf215546Sopenharmony_ci}
161bf215546Sopenharmony_ci
162bf215546Sopenharmony_cistatic void
163bf215546Sopenharmony_civirgl_mov_input_temp_sint(struct tgsi_transform_context * ctx,
164bf215546Sopenharmony_ci                          struct virgl_input_temp *temp)
165bf215546Sopenharmony_ci{
166bf215546Sopenharmony_ci   if (temp->index != ~0) {
167bf215546Sopenharmony_ci      tgsi_transform_op2_inst(ctx, TGSI_OPCODE_IMAX,
168bf215546Sopenharmony_ci                              TGSI_FILE_TEMPORARY, temp->temp, TGSI_WRITEMASK_XYZW,
169bf215546Sopenharmony_ci                              temp->file, temp->index,
170bf215546Sopenharmony_ci                              temp->file, temp->index, 0);
171bf215546Sopenharmony_ci   }
172bf215546Sopenharmony_ci}
173bf215546Sopenharmony_ci
174bf215546Sopenharmony_cistatic void
175bf215546Sopenharmony_civirgl_mov_input_temp_uint(struct tgsi_transform_context * ctx,
176bf215546Sopenharmony_ci                          struct virgl_input_temp *temp)
177bf215546Sopenharmony_ci{
178bf215546Sopenharmony_ci   if (temp->index != ~0) {
179bf215546Sopenharmony_ci      tgsi_transform_op1_inst(ctx, TGSI_OPCODE_MOV,
180bf215546Sopenharmony_ci                              TGSI_FILE_TEMPORARY, temp->temp, TGSI_WRITEMASK_XYZW,
181bf215546Sopenharmony_ci                              temp->file, temp->index);
182bf215546Sopenharmony_ci   }
183bf215546Sopenharmony_ci}
184bf215546Sopenharmony_ci
185bf215546Sopenharmony_cistatic void
186bf215546Sopenharmony_civirgl_tgsi_transform_prolog(struct tgsi_transform_context * ctx)
187bf215546Sopenharmony_ci{
188bf215546Sopenharmony_ci   struct virgl_transform_context *vtctx = (struct virgl_transform_context *)ctx;
189bf215546Sopenharmony_ci
190bf215546Sopenharmony_ci   if (vtctx->is_separable) {
191bf215546Sopenharmony_ci      struct tgsi_full_property prop = tgsi_default_full_property();
192bf215546Sopenharmony_ci      prop.Property.PropertyName = TGSI_PROPERTY_SEPARABLE_PROGRAM;
193bf215546Sopenharmony_ci      prop.Property.NrTokens += 1;
194bf215546Sopenharmony_ci      prop.u[0].Data = 1;
195bf215546Sopenharmony_ci      ctx->emit_property(ctx, &prop);
196bf215546Sopenharmony_ci   }
197bf215546Sopenharmony_ci
198bf215546Sopenharmony_ci   vtctx->src_temp = vtctx->next_temp;
199bf215546Sopenharmony_ci   vtctx->next_temp += 4;
200bf215546Sopenharmony_ci   tgsi_transform_temps_decl(ctx, vtctx->src_temp, vtctx->src_temp + 3);
201bf215546Sopenharmony_ci
202bf215546Sopenharmony_ci   if (vtctx->num_writemask_fixups) {
203bf215546Sopenharmony_ci      vtctx->writemask_fixup_temps = vtctx->next_temp;
204bf215546Sopenharmony_ci      vtctx->next_temp += vtctx->num_writemask_fixups;
205bf215546Sopenharmony_ci      tgsi_transform_temps_decl(ctx,
206bf215546Sopenharmony_ci                                vtctx->writemask_fixup_temps,
207bf215546Sopenharmony_ci                                vtctx->writemask_fixup_temps + vtctx->num_writemask_fixups - 1);
208bf215546Sopenharmony_ci   }
209bf215546Sopenharmony_ci
210bf215546Sopenharmony_ci   /* Assign input temps before we emit any instructions, but after we parsed
211bf215546Sopenharmony_ci    * existing temp decls.
212bf215546Sopenharmony_ci    */
213bf215546Sopenharmony_ci   for (int i = 0; i < ARRAY_SIZE(vtctx->input_temp); i++) {
214bf215546Sopenharmony_ci      if (vtctx->input_temp[i].index != ~0) {
215bf215546Sopenharmony_ci         vtctx->input_temp[i].temp = vtctx->next_temp++;
216bf215546Sopenharmony_ci         tgsi_transform_temp_decl(ctx, vtctx->input_temp[i].temp);
217bf215546Sopenharmony_ci      }
218bf215546Sopenharmony_ci   }
219bf215546Sopenharmony_ci
220bf215546Sopenharmony_ci   /* virglrenderer makes mistakes in the types of layer/viewport input
221bf215546Sopenharmony_ci    * references from unsigned ops, so we use a temp that we do a no-op signed
222bf215546Sopenharmony_ci    * op to at the top of the shader.
223bf215546Sopenharmony_ci    *
224bf215546Sopenharmony_ci    * https://gitlab.freedesktop.org/virgl/virglrenderer/-/merge_requests/615
225bf215546Sopenharmony_ci    */
226bf215546Sopenharmony_ci   virgl_mov_input_temp_sint(ctx, &vtctx->input_temp[INPUT_TEMP_LAYER]);
227bf215546Sopenharmony_ci   virgl_mov_input_temp_sint(ctx, &vtctx->input_temp[INPUT_TEMP_VIEWPORT_INDEX]);
228bf215546Sopenharmony_ci
229bf215546Sopenharmony_ci   /* virglrenderer also makes mistakes in the types of block id input
230bf215546Sopenharmony_ci    * references from signed ops, so we use a temp that we do a plain MOV to at
231bf215546Sopenharmony_ci    * the top of the shader.  Also, it falls over if an unused channel's swizzle
232bf215546Sopenharmony_ci    * uses the .w of the block id.
233bf215546Sopenharmony_ci    */
234bf215546Sopenharmony_ci   if (vtctx->input_temp[INPUT_TEMP_BLOCK_ID].index != ~0) {
235bf215546Sopenharmony_ci      struct tgsi_full_instruction inst = tgsi_default_full_instruction();
236bf215546Sopenharmony_ci      inst.Instruction.Opcode = TGSI_OPCODE_MOV;
237bf215546Sopenharmony_ci      inst.Instruction.NumDstRegs = 1;
238bf215546Sopenharmony_ci      inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY,
239bf215546Sopenharmony_ci      inst.Dst[0].Register.Index = vtctx->input_temp[INPUT_TEMP_BLOCK_ID].temp;
240bf215546Sopenharmony_ci      inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZ;
241bf215546Sopenharmony_ci      inst.Instruction.NumSrcRegs = 1;
242bf215546Sopenharmony_ci      tgsi_transform_src_reg_xyzw(&inst.Src[0],
243bf215546Sopenharmony_ci                                  vtctx->input_temp[INPUT_TEMP_BLOCK_ID].file,
244bf215546Sopenharmony_ci                                  vtctx->input_temp[INPUT_TEMP_BLOCK_ID].index);
245bf215546Sopenharmony_ci      inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
246bf215546Sopenharmony_ci      inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_Y;
247bf215546Sopenharmony_ci      inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_Z;
248bf215546Sopenharmony_ci      inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_Z;
249bf215546Sopenharmony_ci      ctx->emit_instruction(ctx, &inst);
250bf215546Sopenharmony_ci   }
251bf215546Sopenharmony_ci
252bf215546Sopenharmony_ci   virgl_mov_input_temp_uint(ctx, &vtctx->input_temp[INPUT_TEMP_HELPER_INVOCATION]);
253bf215546Sopenharmony_ci
254bf215546Sopenharmony_ci   vtctx->precise_flags = calloc((vtctx->next_temp + 7)/8, sizeof(uint32_t));
255bf215546Sopenharmony_ci}
256bf215546Sopenharmony_ci
257bf215546Sopenharmony_cistatic void
258bf215546Sopenharmony_civirgl_tgsi_rewrite_src_for_input_temp(struct virgl_input_temp *temp, struct tgsi_full_src_register *src)
259bf215546Sopenharmony_ci{
260bf215546Sopenharmony_ci   if (src->Register.File == temp->file && src->Register.Index == temp->index) {
261bf215546Sopenharmony_ci      src->Register.File = TGSI_FILE_TEMPORARY;
262bf215546Sopenharmony_ci      src->Register.Index = temp->temp;
263bf215546Sopenharmony_ci   }
264bf215546Sopenharmony_ci}
265bf215546Sopenharmony_ci
266bf215546Sopenharmony_cistatic void
267bf215546Sopenharmony_civirgl_tgsi_transform_instruction(struct tgsi_transform_context *ctx,
268bf215546Sopenharmony_ci             struct tgsi_full_instruction *inst)
269bf215546Sopenharmony_ci{
270bf215546Sopenharmony_ci   struct virgl_transform_context *vtctx = (struct virgl_transform_context *)ctx;
271bf215546Sopenharmony_ci   if (vtctx->fake_fp64 &&
272bf215546Sopenharmony_ci       (tgsi_opcode_infer_src_type(inst->Instruction.Opcode, 0) == TGSI_TYPE_DOUBLE ||
273bf215546Sopenharmony_ci        tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, 0) == TGSI_TYPE_DOUBLE)) {
274bf215546Sopenharmony_ci      debug_printf("VIRGL: ARB_gpu_shader_fp64 is exposed but not supported.");
275bf215546Sopenharmony_ci      return;
276bf215546Sopenharmony_ci   }
277bf215546Sopenharmony_ci
278bf215546Sopenharmony_ci   if (!vtctx->has_precise && inst->Instruction.Precise)
279bf215546Sopenharmony_ci      inst->Instruction.Precise = 0;
280bf215546Sopenharmony_ci
281bf215546Sopenharmony_ci   /* For outputs NTT adds a final mov op but NIR doesn't propagate precise with moves,
282bf215546Sopenharmony_ci    * so that we don't see whether the assignment is from a precise instruction, but
283bf215546Sopenharmony_ci    * we need to know this to set the output decoration correctly, so propagate the
284bf215546Sopenharmony_ci    * precise flag with TGSI */
285bf215546Sopenharmony_ci   for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) {
286bf215546Sopenharmony_ci      if (inst->Dst[i].Register.File == TGSI_FILE_TEMPORARY) {
287bf215546Sopenharmony_ci         uint32_t index = inst->Dst[i].Register.Index / 8;
288bf215546Sopenharmony_ci         uint32_t bits = inst->Dst[i].Register.WriteMask << (inst->Dst[i].Register.Index % 8);
289bf215546Sopenharmony_ci
290bf215546Sopenharmony_ci         /* Since we re-use temps set and clear the precise flag according to the last use
291bf215546Sopenharmony_ci          * for the register index and written components. Since moves are not marked
292bf215546Sopenharmony_ci          * as precise originally, and we may end up with an if/else clause that assignes
293bf215546Sopenharmony_ci          * a precise result in the if branche, but does a simple move from a constant
294bf215546Sopenharmony_ci          * on the else branche, we don't clear the flag when we hit a mov.
295bf215546Sopenharmony_ci          * We do the conservatiove approach here, because virglrenderer emits different temp
296bf215546Sopenharmony_ci          * ranges, and we don't want to mark all temps as precise only because we have
297bf215546Sopenharmony_ci          * one precise output */
298bf215546Sopenharmony_ci         if (inst->Instruction.Precise)
299bf215546Sopenharmony_ci            vtctx->precise_flags[index] |= bits;
300bf215546Sopenharmony_ci      } else if (inst->Instruction.Opcode == TGSI_OPCODE_MOV) {
301bf215546Sopenharmony_ci         for (int i = 0; i < inst->Instruction.NumSrcRegs; ++i) {
302bf215546Sopenharmony_ci            if (inst->Src[i].Register.File == TGSI_FILE_TEMPORARY) {
303bf215546Sopenharmony_ci               uint32_t index = inst->Src[i].Register.Index / 8;
304bf215546Sopenharmony_ci               uint32_t read_mask = (1 << inst->Src[i].Register.SwizzleX) |
305bf215546Sopenharmony_ci                                    (1 << inst->Src[i].Register.SwizzleY) |
306bf215546Sopenharmony_ci                                    (1 << inst->Src[i].Register.SwizzleZ) |
307bf215546Sopenharmony_ci                                    (1 << inst->Src[i].Register.SwizzleW);
308bf215546Sopenharmony_ci               uint32_t bits = read_mask << (inst->Dst[i].Register.Index % 8);
309bf215546Sopenharmony_ci               if (vtctx->precise_flags[index] & bits) {
310bf215546Sopenharmony_ci                  inst->Instruction.Precise = 1;
311bf215546Sopenharmony_ci                  break;
312bf215546Sopenharmony_ci               }
313bf215546Sopenharmony_ci            }
314bf215546Sopenharmony_ci         }
315bf215546Sopenharmony_ci      }
316bf215546Sopenharmony_ci   }
317bf215546Sopenharmony_ci
318bf215546Sopenharmony_ci   /* virglrenderer can run out of space in internal buffers for immediates as
319bf215546Sopenharmony_ci    * tex operands.  Move the first immediate tex arg to a temp to save space in
320bf215546Sopenharmony_ci    * the buffer.
321bf215546Sopenharmony_ci    *
322bf215546Sopenharmony_ci    * https://gitlab.freedesktop.org/virgl/virglrenderer/-/merge_requests/582
323bf215546Sopenharmony_ci    */
324bf215546Sopenharmony_ci   if (tgsi_get_opcode_info(inst->Instruction.Opcode)->is_tex &&
325bf215546Sopenharmony_ci       inst->Src[0].Register.File == TGSI_FILE_IMMEDIATE) {
326bf215546Sopenharmony_ci      tgsi_transform_op1_inst(ctx, TGSI_OPCODE_MOV,
327bf215546Sopenharmony_ci                              TGSI_FILE_TEMPORARY, vtctx->src_temp,
328bf215546Sopenharmony_ci                              TGSI_WRITEMASK_XYZW,
329bf215546Sopenharmony_ci                              inst->Src[0].Register.File,
330bf215546Sopenharmony_ci                              inst->Src[0].Register.Index);
331bf215546Sopenharmony_ci      inst->Src[0].Register.File = TGSI_FILE_TEMPORARY;
332bf215546Sopenharmony_ci      inst->Src[0].Register.Index = vtctx->src_temp;
333bf215546Sopenharmony_ci   }
334bf215546Sopenharmony_ci
335bf215546Sopenharmony_ci   for (unsigned i = 0; i < inst->Instruction.NumDstRegs; i++) {
336bf215546Sopenharmony_ci      /* virglrenderer would fail to compile on clipdist, clipvertex, and some
337bf215546Sopenharmony_ci       * two-sided-related color writes without a full writemask.  So, we write
338bf215546Sopenharmony_ci       * to a temp and store that temp with a full writemask.
339bf215546Sopenharmony_ci       *
340bf215546Sopenharmony_ci       * https://gitlab.freedesktop.org/virgl/virglrenderer/-/merge_requests/616
341bf215546Sopenharmony_ci       */
342bf215546Sopenharmony_ci      if (inst->Dst[i].Register.File == TGSI_FILE_OUTPUT) {
343bf215546Sopenharmony_ci         for (int j = 0; j < vtctx->num_writemask_fixups; j++) {
344bf215546Sopenharmony_ci            if (inst->Dst[i].Register.Index == vtctx->writemask_fixup_outs[j]) {
345bf215546Sopenharmony_ci               inst->Dst[i].Register.File = TGSI_FILE_TEMPORARY;
346bf215546Sopenharmony_ci               inst->Dst[i].Register.Index = vtctx->writemask_fixup_temps + j;
347bf215546Sopenharmony_ci               break;
348bf215546Sopenharmony_ci            }
349bf215546Sopenharmony_ci         }
350bf215546Sopenharmony_ci      }
351bf215546Sopenharmony_ci   }
352bf215546Sopenharmony_ci
353bf215546Sopenharmony_ci   for (unsigned i = 0; i < inst->Instruction.NumSrcRegs; i++) {
354bf215546Sopenharmony_ci      if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT &&
355bf215546Sopenharmony_ci          inst->Src[i].Register.Dimension &&
356bf215546Sopenharmony_ci          inst->Src[i].Dimension.Index == 0)
357bf215546Sopenharmony_ci         inst->Src[i].Register.Dimension = 0;
358bf215546Sopenharmony_ci
359bf215546Sopenharmony_ci      for (int j = 0; j < ARRAY_SIZE(vtctx->input_temp); j++)
360bf215546Sopenharmony_ci         virgl_tgsi_rewrite_src_for_input_temp(&vtctx->input_temp[j], &inst->Src[i]);
361bf215546Sopenharmony_ci
362bf215546Sopenharmony_ci      /* virglrenderer double inputs twice, so move them to temps and drop the
363bf215546Sopenharmony_ci       * swizzle from the double op.
364bf215546Sopenharmony_ci       */
365bf215546Sopenharmony_ci      if (tgsi_opcode_infer_src_type(inst->Instruction.Opcode, i) == TGSI_TYPE_DOUBLE) {
366bf215546Sopenharmony_ci         struct tgsi_full_instruction temp_inst = tgsi_default_full_instruction();
367bf215546Sopenharmony_ci         temp_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
368bf215546Sopenharmony_ci         temp_inst.Instruction.NumDstRegs = 1;
369bf215546Sopenharmony_ci         temp_inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY,
370bf215546Sopenharmony_ci         temp_inst.Dst[0].Register.Index = vtctx->src_temp + i;
371bf215546Sopenharmony_ci         temp_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZ;
372bf215546Sopenharmony_ci         temp_inst.Instruction.NumSrcRegs = 1;
373bf215546Sopenharmony_ci         tgsi_transform_src_reg_xyzw(&temp_inst.Src[0], inst->Src[i].Register.File, inst->Src[i].Register.Index);
374bf215546Sopenharmony_ci         temp_inst.Src[0].Register.SwizzleX = inst->Src[i].Register.SwizzleX;
375bf215546Sopenharmony_ci         temp_inst.Src[0].Register.SwizzleY = inst->Src[i].Register.SwizzleY;
376bf215546Sopenharmony_ci         temp_inst.Src[0].Register.SwizzleZ = inst->Src[i].Register.SwizzleZ;
377bf215546Sopenharmony_ci         temp_inst.Src[0].Register.SwizzleW = inst->Src[i].Register.SwizzleW;
378bf215546Sopenharmony_ci         ctx->emit_instruction(ctx, &temp_inst);
379bf215546Sopenharmony_ci
380bf215546Sopenharmony_ci         inst->Src[i].Register.File = TGSI_FILE_TEMPORARY;
381bf215546Sopenharmony_ci         inst->Src[i].Register.Index = vtctx->src_temp + i;
382bf215546Sopenharmony_ci         inst->Src[i].Register.SwizzleX = TGSI_SWIZZLE_X;
383bf215546Sopenharmony_ci         inst->Src[i].Register.SwizzleY = TGSI_SWIZZLE_Y;
384bf215546Sopenharmony_ci         inst->Src[i].Register.SwizzleZ = TGSI_SWIZZLE_Z;
385bf215546Sopenharmony_ci         inst->Src[i].Register.SwizzleW = TGSI_SWIZZLE_W;
386bf215546Sopenharmony_ci      }
387bf215546Sopenharmony_ci   }
388bf215546Sopenharmony_ci
389bf215546Sopenharmony_ci   /* virglrenderer doesn't resolve non-float output write properly,
390bf215546Sopenharmony_ci    * so we have to first write to a temporary */
391bf215546Sopenharmony_ci   if (inst->Instruction.Opcode != TGSI_OPCODE_MOV &&
392bf215546Sopenharmony_ci       !tgsi_get_opcode_info(inst->Instruction.Opcode)->is_tex &&
393bf215546Sopenharmony_ci       !tgsi_get_opcode_info(inst->Instruction.Opcode)->is_store &&
394bf215546Sopenharmony_ci       inst->Dst[0].Register.File == TGSI_FILE_OUTPUT &&
395bf215546Sopenharmony_ci       tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, 0) != TGSI_TYPE_FLOAT)  {
396bf215546Sopenharmony_ci      struct tgsi_full_instruction op_to_temp = *inst;
397bf215546Sopenharmony_ci      op_to_temp.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
398bf215546Sopenharmony_ci      op_to_temp.Dst[0].Register.Index = vtctx->src_temp;
399bf215546Sopenharmony_ci      op_to_temp.Dst[0].Dimension.Indirect = 0;
400bf215546Sopenharmony_ci      op_to_temp.Dst[0].Register.Indirect = 0;
401bf215546Sopenharmony_ci      ctx->emit_instruction(ctx, &op_to_temp);
402bf215546Sopenharmony_ci
403bf215546Sopenharmony_ci      inst->Instruction.Opcode = TGSI_OPCODE_MOV;
404bf215546Sopenharmony_ci      inst->Instruction.NumSrcRegs = 1;
405bf215546Sopenharmony_ci
406bf215546Sopenharmony_ci      memset(&inst->Src[0], 0, sizeof(inst->Src[0]));
407bf215546Sopenharmony_ci      inst->Src[0].Register.File = TGSI_FILE_TEMPORARY;
408bf215546Sopenharmony_ci      inst->Src[0].Register.Index = vtctx->src_temp;
409bf215546Sopenharmony_ci      inst->Src[0].Register.SwizzleY = 1;
410bf215546Sopenharmony_ci      inst->Src[0].Register.SwizzleZ = 2;
411bf215546Sopenharmony_ci      inst->Src[0].Register.SwizzleW = 3;
412bf215546Sopenharmony_ci   }
413bf215546Sopenharmony_ci
414bf215546Sopenharmony_ci   ctx->emit_instruction(ctx, inst);
415bf215546Sopenharmony_ci
416bf215546Sopenharmony_ci   for (unsigned i = 0; i < inst->Instruction.NumDstRegs; i++) {
417bf215546Sopenharmony_ci      if (vtctx->num_writemask_fixups &&
418bf215546Sopenharmony_ci         inst->Dst[i].Register.File == TGSI_FILE_TEMPORARY &&
419bf215546Sopenharmony_ci         inst->Dst[i].Register.Index >= vtctx->writemask_fixup_temps &&
420bf215546Sopenharmony_ci         inst->Dst[i].Register.Index < vtctx->writemask_fixup_temps + vtctx->num_writemask_fixups) {
421bf215546Sopenharmony_ci         /* Emit the fixup MOV from the clipdist/vert temporary to the real output. */
422bf215546Sopenharmony_ci         unsigned real_out = vtctx->writemask_fixup_outs[inst->Dst[i].Register.Index - vtctx->writemask_fixup_temps];
423bf215546Sopenharmony_ci         tgsi_transform_op1_inst(ctx, TGSI_OPCODE_MOV,
424bf215546Sopenharmony_ci                                 TGSI_FILE_OUTPUT, real_out, TGSI_WRITEMASK_XYZW,
425bf215546Sopenharmony_ci                                 inst->Dst[i].Register.File, inst->Dst[i].Register.Index);
426bf215546Sopenharmony_ci      }
427bf215546Sopenharmony_ci   }
428bf215546Sopenharmony_ci}
429bf215546Sopenharmony_ci
430bf215546Sopenharmony_cistruct tgsi_token *virgl_tgsi_transform(struct virgl_screen *vscreen, const struct tgsi_token *tokens_in,
431bf215546Sopenharmony_ci                                        bool is_separable)
432bf215546Sopenharmony_ci{
433bf215546Sopenharmony_ci   struct virgl_transform_context transform;
434bf215546Sopenharmony_ci   const uint newLen = tgsi_num_tokens(tokens_in);
435bf215546Sopenharmony_ci
436bf215546Sopenharmony_ci   memset(&transform, 0, sizeof(transform));
437bf215546Sopenharmony_ci   transform.base.transform_declaration = virgl_tgsi_transform_declaration;
438bf215546Sopenharmony_ci   transform.base.transform_property = virgl_tgsi_transform_property;
439bf215546Sopenharmony_ci   transform.base.transform_instruction = virgl_tgsi_transform_instruction;
440bf215546Sopenharmony_ci   transform.base.prolog = virgl_tgsi_transform_prolog;
441bf215546Sopenharmony_ci   transform.cull_enabled = vscreen->caps.caps.v1.bset.has_cull;
442bf215546Sopenharmony_ci   transform.has_precise = vscreen->caps.caps.v2.capability_bits & VIRGL_CAP_TGSI_PRECISE;
443bf215546Sopenharmony_ci   transform.fake_fp64 =
444bf215546Sopenharmony_ci      vscreen->caps.caps.v2.capability_bits & VIRGL_CAP_FAKE_FP64;
445bf215546Sopenharmony_ci   transform.is_separable = is_separable && (vscreen->caps.caps.v2.capability_bits_v2 & VIRGL_CAP_V2_SSO);
446bf215546Sopenharmony_ci
447bf215546Sopenharmony_ci   for (int i = 0; i < ARRAY_SIZE(transform.input_temp); i++)
448bf215546Sopenharmony_ci      transform.input_temp[i].index = ~0;
449bf215546Sopenharmony_ci
450bf215546Sopenharmony_ci   tgsi_scan_shader(tokens_in, &transform.info);
451bf215546Sopenharmony_ci
452bf215546Sopenharmony_ci   struct tgsi_token *new_tokens = tgsi_transform_shader(tokens_in, newLen, &transform.base);
453bf215546Sopenharmony_ci   free(transform.precise_flags);
454bf215546Sopenharmony_ci   return new_tokens;
455bf215546Sopenharmony_ci
456bf215546Sopenharmony_ci}
457