1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright 2014, 2015 Red Hat. 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * on the rights to use, copy, modify, merge, publish, distribute, sub 8bf215546Sopenharmony_ci * license, and/or sell copies of the Software, and to permit persons to whom 9bf215546Sopenharmony_ci * the Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19bf215546Sopenharmony_ci * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20bf215546Sopenharmony_ci * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21bf215546Sopenharmony_ci * USE OR OTHER DEALINGS IN THE SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci/* the virgl hw tgsi vs what the current gallium want will diverge over time. 25bf215546Sopenharmony_ci so add a transform stage to remove things we don't want to send unless 26bf215546Sopenharmony_ci the receiver supports it. 27bf215546Sopenharmony_ci*/ 28bf215546Sopenharmony_ci 29bf215546Sopenharmony_ci#include "tgsi/tgsi_transform.h" 30bf215546Sopenharmony_ci#include "tgsi/tgsi_info.h" 31bf215546Sopenharmony_ci#include "tgsi/tgsi_scan.h" 32bf215546Sopenharmony_ci#include "virgl_context.h" 33bf215546Sopenharmony_ci#include "virgl_screen.h" 34bf215546Sopenharmony_ci 35bf215546Sopenharmony_cistruct virgl_input_temp { 36bf215546Sopenharmony_ci enum tgsi_file_type file; 37bf215546Sopenharmony_ci 38bf215546Sopenharmony_ci /* Index within in the INPUT or SV files, or ~0 if no DCL of this input */ 39bf215546Sopenharmony_ci unsigned index; 40bf215546Sopenharmony_ci 41bf215546Sopenharmony_ci /* TGSI_FILE_TEMPORARY index it will be mapped to. */ 42bf215546Sopenharmony_ci unsigned temp; 43bf215546Sopenharmony_ci 44bf215546Sopenharmony_ci bool sint; 45bf215546Sopenharmony_ci}; 46bf215546Sopenharmony_ci 47bf215546Sopenharmony_cienum virgl_input_temps { 48bf215546Sopenharmony_ci INPUT_TEMP_LAYER, 49bf215546Sopenharmony_ci INPUT_TEMP_VIEWPORT_INDEX, 50bf215546Sopenharmony_ci INPUT_TEMP_BLOCK_ID, 51bf215546Sopenharmony_ci INPUT_TEMP_HELPER_INVOCATION, 52bf215546Sopenharmony_ci INPUT_TEMP_COUNT, 53bf215546Sopenharmony_ci}; 54bf215546Sopenharmony_ci 55bf215546Sopenharmony_cistruct virgl_transform_context { 56bf215546Sopenharmony_ci struct tgsi_transform_context base; 57bf215546Sopenharmony_ci struct tgsi_shader_info info; 58bf215546Sopenharmony_ci 59bf215546Sopenharmony_ci bool cull_enabled; 60bf215546Sopenharmony_ci bool has_precise; 61bf215546Sopenharmony_ci bool fake_fp64; 62bf215546Sopenharmony_ci bool is_separable; 63bf215546Sopenharmony_ci 64bf215546Sopenharmony_ci unsigned next_temp; 65bf215546Sopenharmony_ci 66bf215546Sopenharmony_ci unsigned src_temp; 67bf215546Sopenharmony_ci 68bf215546Sopenharmony_ci unsigned writemask_fixup_outs[5]; 69bf215546Sopenharmony_ci unsigned writemask_fixup_temps; 70bf215546Sopenharmony_ci unsigned num_writemask_fixups; 71bf215546Sopenharmony_ci 72bf215546Sopenharmony_ci struct virgl_input_temp input_temp[INPUT_TEMP_COUNT]; 73bf215546Sopenharmony_ci 74bf215546Sopenharmony_ci uint32_t *precise_flags; 75bf215546Sopenharmony_ci}; 76bf215546Sopenharmony_ci 77bf215546Sopenharmony_cistatic void 78bf215546Sopenharmony_civirgl_tgsi_transform_declaration_input_temp(const struct tgsi_full_declaration *decl, 79bf215546Sopenharmony_ci struct virgl_input_temp *input_temp, 80bf215546Sopenharmony_ci enum tgsi_semantic semantic_name) 81bf215546Sopenharmony_ci{ 82bf215546Sopenharmony_ci if (decl->Semantic.Name == semantic_name) { 83bf215546Sopenharmony_ci input_temp->file = decl->Declaration.File; 84bf215546Sopenharmony_ci input_temp->index = decl->Range.First; 85bf215546Sopenharmony_ci } 86bf215546Sopenharmony_ci} 87bf215546Sopenharmony_ci 88bf215546Sopenharmony_cistatic void 89bf215546Sopenharmony_civirgl_tgsi_transform_declaration(struct tgsi_transform_context *ctx, 90bf215546Sopenharmony_ci struct tgsi_full_declaration *decl) 91bf215546Sopenharmony_ci{ 92bf215546Sopenharmony_ci struct virgl_transform_context *vtctx = (struct virgl_transform_context *)ctx; 93bf215546Sopenharmony_ci 94bf215546Sopenharmony_ci switch (decl->Declaration.File) { 95bf215546Sopenharmony_ci case TGSI_FILE_CONSTANT: 96bf215546Sopenharmony_ci if (decl->Declaration.Dimension) { 97bf215546Sopenharmony_ci if (decl->Dim.Index2D == 0) 98bf215546Sopenharmony_ci decl->Declaration.Dimension = 0; 99bf215546Sopenharmony_ci } 100bf215546Sopenharmony_ci break; 101bf215546Sopenharmony_ci case TGSI_FILE_INPUT: 102bf215546Sopenharmony_ci virgl_tgsi_transform_declaration_input_temp(decl, &vtctx->input_temp[INPUT_TEMP_LAYER], 103bf215546Sopenharmony_ci TGSI_SEMANTIC_LAYER); 104bf215546Sopenharmony_ci virgl_tgsi_transform_declaration_input_temp(decl, &vtctx->input_temp[INPUT_TEMP_VIEWPORT_INDEX], 105bf215546Sopenharmony_ci TGSI_SEMANTIC_VIEWPORT_INDEX); 106bf215546Sopenharmony_ci break; 107bf215546Sopenharmony_ci case TGSI_FILE_SYSTEM_VALUE: 108bf215546Sopenharmony_ci virgl_tgsi_transform_declaration_input_temp(decl, &vtctx->input_temp[INPUT_TEMP_BLOCK_ID], 109bf215546Sopenharmony_ci TGSI_SEMANTIC_BLOCK_ID); 110bf215546Sopenharmony_ci virgl_tgsi_transform_declaration_input_temp(decl, &vtctx->input_temp[INPUT_TEMP_HELPER_INVOCATION], 111bf215546Sopenharmony_ci TGSI_SEMANTIC_HELPER_INVOCATION); 112bf215546Sopenharmony_ci break; 113bf215546Sopenharmony_ci case TGSI_FILE_OUTPUT: 114bf215546Sopenharmony_ci switch (decl->Semantic.Name) { 115bf215546Sopenharmony_ci case TGSI_SEMANTIC_CLIPDIST: 116bf215546Sopenharmony_ci vtctx->writemask_fixup_outs[vtctx->num_writemask_fixups++] = decl->Range.First; 117bf215546Sopenharmony_ci if (decl->Range.Last != decl->Range.First) 118bf215546Sopenharmony_ci vtctx->writemask_fixup_outs[vtctx->num_writemask_fixups++] = decl->Range.Last; 119bf215546Sopenharmony_ci break; 120bf215546Sopenharmony_ci case TGSI_SEMANTIC_CLIPVERTEX: 121bf215546Sopenharmony_ci vtctx->writemask_fixup_outs[vtctx->num_writemask_fixups++] = decl->Range.First; 122bf215546Sopenharmony_ci break; 123bf215546Sopenharmony_ci case TGSI_SEMANTIC_COLOR: 124bf215546Sopenharmony_ci /* Vertex front/backface color output also has issues with writemasking */ 125bf215546Sopenharmony_ci if (vtctx->base.processor != PIPE_SHADER_FRAGMENT) 126bf215546Sopenharmony_ci vtctx->writemask_fixup_outs[vtctx->num_writemask_fixups++] = decl->Range.First; 127bf215546Sopenharmony_ci break; 128bf215546Sopenharmony_ci } 129bf215546Sopenharmony_ci break; 130bf215546Sopenharmony_ci case TGSI_FILE_TEMPORARY: 131bf215546Sopenharmony_ci vtctx->next_temp = MAX2(vtctx->next_temp, decl->Range.Last + 1); 132bf215546Sopenharmony_ci break; 133bf215546Sopenharmony_ci default: 134bf215546Sopenharmony_ci break; 135bf215546Sopenharmony_ci } 136bf215546Sopenharmony_ci assert(vtctx->num_writemask_fixups <= ARRAY_SIZE(vtctx->writemask_fixup_outs)); 137bf215546Sopenharmony_ci 138bf215546Sopenharmony_ci ctx->emit_declaration(ctx, decl); 139bf215546Sopenharmony_ci} 140bf215546Sopenharmony_ci 141bf215546Sopenharmony_ci/* for now just strip out the new properties the remote doesn't understand 142bf215546Sopenharmony_ci yet */ 143bf215546Sopenharmony_cistatic void 144bf215546Sopenharmony_civirgl_tgsi_transform_property(struct tgsi_transform_context *ctx, 145bf215546Sopenharmony_ci struct tgsi_full_property *prop) 146bf215546Sopenharmony_ci{ 147bf215546Sopenharmony_ci struct virgl_transform_context *vtctx = (struct virgl_transform_context *)ctx; 148bf215546Sopenharmony_ci switch (prop->Property.PropertyName) { 149bf215546Sopenharmony_ci case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED: 150bf215546Sopenharmony_ci case TGSI_PROPERTY_NUM_CULLDIST_ENABLED: 151bf215546Sopenharmony_ci if (vtctx->cull_enabled) 152bf215546Sopenharmony_ci ctx->emit_property(ctx, prop); 153bf215546Sopenharmony_ci break; 154bf215546Sopenharmony_ci case TGSI_PROPERTY_NEXT_SHADER: 155bf215546Sopenharmony_ci break; 156bf215546Sopenharmony_ci default: 157bf215546Sopenharmony_ci ctx->emit_property(ctx, prop); 158bf215546Sopenharmony_ci break; 159bf215546Sopenharmony_ci } 160bf215546Sopenharmony_ci} 161bf215546Sopenharmony_ci 162bf215546Sopenharmony_cistatic void 163bf215546Sopenharmony_civirgl_mov_input_temp_sint(struct tgsi_transform_context * ctx, 164bf215546Sopenharmony_ci struct virgl_input_temp *temp) 165bf215546Sopenharmony_ci{ 166bf215546Sopenharmony_ci if (temp->index != ~0) { 167bf215546Sopenharmony_ci tgsi_transform_op2_inst(ctx, TGSI_OPCODE_IMAX, 168bf215546Sopenharmony_ci TGSI_FILE_TEMPORARY, temp->temp, TGSI_WRITEMASK_XYZW, 169bf215546Sopenharmony_ci temp->file, temp->index, 170bf215546Sopenharmony_ci temp->file, temp->index, 0); 171bf215546Sopenharmony_ci } 172bf215546Sopenharmony_ci} 173bf215546Sopenharmony_ci 174bf215546Sopenharmony_cistatic void 175bf215546Sopenharmony_civirgl_mov_input_temp_uint(struct tgsi_transform_context * ctx, 176bf215546Sopenharmony_ci struct virgl_input_temp *temp) 177bf215546Sopenharmony_ci{ 178bf215546Sopenharmony_ci if (temp->index != ~0) { 179bf215546Sopenharmony_ci tgsi_transform_op1_inst(ctx, TGSI_OPCODE_MOV, 180bf215546Sopenharmony_ci TGSI_FILE_TEMPORARY, temp->temp, TGSI_WRITEMASK_XYZW, 181bf215546Sopenharmony_ci temp->file, temp->index); 182bf215546Sopenharmony_ci } 183bf215546Sopenharmony_ci} 184bf215546Sopenharmony_ci 185bf215546Sopenharmony_cistatic void 186bf215546Sopenharmony_civirgl_tgsi_transform_prolog(struct tgsi_transform_context * ctx) 187bf215546Sopenharmony_ci{ 188bf215546Sopenharmony_ci struct virgl_transform_context *vtctx = (struct virgl_transform_context *)ctx; 189bf215546Sopenharmony_ci 190bf215546Sopenharmony_ci if (vtctx->is_separable) { 191bf215546Sopenharmony_ci struct tgsi_full_property prop = tgsi_default_full_property(); 192bf215546Sopenharmony_ci prop.Property.PropertyName = TGSI_PROPERTY_SEPARABLE_PROGRAM; 193bf215546Sopenharmony_ci prop.Property.NrTokens += 1; 194bf215546Sopenharmony_ci prop.u[0].Data = 1; 195bf215546Sopenharmony_ci ctx->emit_property(ctx, &prop); 196bf215546Sopenharmony_ci } 197bf215546Sopenharmony_ci 198bf215546Sopenharmony_ci vtctx->src_temp = vtctx->next_temp; 199bf215546Sopenharmony_ci vtctx->next_temp += 4; 200bf215546Sopenharmony_ci tgsi_transform_temps_decl(ctx, vtctx->src_temp, vtctx->src_temp + 3); 201bf215546Sopenharmony_ci 202bf215546Sopenharmony_ci if (vtctx->num_writemask_fixups) { 203bf215546Sopenharmony_ci vtctx->writemask_fixup_temps = vtctx->next_temp; 204bf215546Sopenharmony_ci vtctx->next_temp += vtctx->num_writemask_fixups; 205bf215546Sopenharmony_ci tgsi_transform_temps_decl(ctx, 206bf215546Sopenharmony_ci vtctx->writemask_fixup_temps, 207bf215546Sopenharmony_ci vtctx->writemask_fixup_temps + vtctx->num_writemask_fixups - 1); 208bf215546Sopenharmony_ci } 209bf215546Sopenharmony_ci 210bf215546Sopenharmony_ci /* Assign input temps before we emit any instructions, but after we parsed 211bf215546Sopenharmony_ci * existing temp decls. 212bf215546Sopenharmony_ci */ 213bf215546Sopenharmony_ci for (int i = 0; i < ARRAY_SIZE(vtctx->input_temp); i++) { 214bf215546Sopenharmony_ci if (vtctx->input_temp[i].index != ~0) { 215bf215546Sopenharmony_ci vtctx->input_temp[i].temp = vtctx->next_temp++; 216bf215546Sopenharmony_ci tgsi_transform_temp_decl(ctx, vtctx->input_temp[i].temp); 217bf215546Sopenharmony_ci } 218bf215546Sopenharmony_ci } 219bf215546Sopenharmony_ci 220bf215546Sopenharmony_ci /* virglrenderer makes mistakes in the types of layer/viewport input 221bf215546Sopenharmony_ci * references from unsigned ops, so we use a temp that we do a no-op signed 222bf215546Sopenharmony_ci * op to at the top of the shader. 223bf215546Sopenharmony_ci * 224bf215546Sopenharmony_ci * https://gitlab.freedesktop.org/virgl/virglrenderer/-/merge_requests/615 225bf215546Sopenharmony_ci */ 226bf215546Sopenharmony_ci virgl_mov_input_temp_sint(ctx, &vtctx->input_temp[INPUT_TEMP_LAYER]); 227bf215546Sopenharmony_ci virgl_mov_input_temp_sint(ctx, &vtctx->input_temp[INPUT_TEMP_VIEWPORT_INDEX]); 228bf215546Sopenharmony_ci 229bf215546Sopenharmony_ci /* virglrenderer also makes mistakes in the types of block id input 230bf215546Sopenharmony_ci * references from signed ops, so we use a temp that we do a plain MOV to at 231bf215546Sopenharmony_ci * the top of the shader. Also, it falls over if an unused channel's swizzle 232bf215546Sopenharmony_ci * uses the .w of the block id. 233bf215546Sopenharmony_ci */ 234bf215546Sopenharmony_ci if (vtctx->input_temp[INPUT_TEMP_BLOCK_ID].index != ~0) { 235bf215546Sopenharmony_ci struct tgsi_full_instruction inst = tgsi_default_full_instruction(); 236bf215546Sopenharmony_ci inst.Instruction.Opcode = TGSI_OPCODE_MOV; 237bf215546Sopenharmony_ci inst.Instruction.NumDstRegs = 1; 238bf215546Sopenharmony_ci inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY, 239bf215546Sopenharmony_ci inst.Dst[0].Register.Index = vtctx->input_temp[INPUT_TEMP_BLOCK_ID].temp; 240bf215546Sopenharmony_ci inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZ; 241bf215546Sopenharmony_ci inst.Instruction.NumSrcRegs = 1; 242bf215546Sopenharmony_ci tgsi_transform_src_reg_xyzw(&inst.Src[0], 243bf215546Sopenharmony_ci vtctx->input_temp[INPUT_TEMP_BLOCK_ID].file, 244bf215546Sopenharmony_ci vtctx->input_temp[INPUT_TEMP_BLOCK_ID].index); 245bf215546Sopenharmony_ci inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X; 246bf215546Sopenharmony_ci inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_Y; 247bf215546Sopenharmony_ci inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_Z; 248bf215546Sopenharmony_ci inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_Z; 249bf215546Sopenharmony_ci ctx->emit_instruction(ctx, &inst); 250bf215546Sopenharmony_ci } 251bf215546Sopenharmony_ci 252bf215546Sopenharmony_ci virgl_mov_input_temp_uint(ctx, &vtctx->input_temp[INPUT_TEMP_HELPER_INVOCATION]); 253bf215546Sopenharmony_ci 254bf215546Sopenharmony_ci vtctx->precise_flags = calloc((vtctx->next_temp + 7)/8, sizeof(uint32_t)); 255bf215546Sopenharmony_ci} 256bf215546Sopenharmony_ci 257bf215546Sopenharmony_cistatic void 258bf215546Sopenharmony_civirgl_tgsi_rewrite_src_for_input_temp(struct virgl_input_temp *temp, struct tgsi_full_src_register *src) 259bf215546Sopenharmony_ci{ 260bf215546Sopenharmony_ci if (src->Register.File == temp->file && src->Register.Index == temp->index) { 261bf215546Sopenharmony_ci src->Register.File = TGSI_FILE_TEMPORARY; 262bf215546Sopenharmony_ci src->Register.Index = temp->temp; 263bf215546Sopenharmony_ci } 264bf215546Sopenharmony_ci} 265bf215546Sopenharmony_ci 266bf215546Sopenharmony_cistatic void 267bf215546Sopenharmony_civirgl_tgsi_transform_instruction(struct tgsi_transform_context *ctx, 268bf215546Sopenharmony_ci struct tgsi_full_instruction *inst) 269bf215546Sopenharmony_ci{ 270bf215546Sopenharmony_ci struct virgl_transform_context *vtctx = (struct virgl_transform_context *)ctx; 271bf215546Sopenharmony_ci if (vtctx->fake_fp64 && 272bf215546Sopenharmony_ci (tgsi_opcode_infer_src_type(inst->Instruction.Opcode, 0) == TGSI_TYPE_DOUBLE || 273bf215546Sopenharmony_ci tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, 0) == TGSI_TYPE_DOUBLE)) { 274bf215546Sopenharmony_ci debug_printf("VIRGL: ARB_gpu_shader_fp64 is exposed but not supported."); 275bf215546Sopenharmony_ci return; 276bf215546Sopenharmony_ci } 277bf215546Sopenharmony_ci 278bf215546Sopenharmony_ci if (!vtctx->has_precise && inst->Instruction.Precise) 279bf215546Sopenharmony_ci inst->Instruction.Precise = 0; 280bf215546Sopenharmony_ci 281bf215546Sopenharmony_ci /* For outputs NTT adds a final mov op but NIR doesn't propagate precise with moves, 282bf215546Sopenharmony_ci * so that we don't see whether the assignment is from a precise instruction, but 283bf215546Sopenharmony_ci * we need to know this to set the output decoration correctly, so propagate the 284bf215546Sopenharmony_ci * precise flag with TGSI */ 285bf215546Sopenharmony_ci for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) { 286bf215546Sopenharmony_ci if (inst->Dst[i].Register.File == TGSI_FILE_TEMPORARY) { 287bf215546Sopenharmony_ci uint32_t index = inst->Dst[i].Register.Index / 8; 288bf215546Sopenharmony_ci uint32_t bits = inst->Dst[i].Register.WriteMask << (inst->Dst[i].Register.Index % 8); 289bf215546Sopenharmony_ci 290bf215546Sopenharmony_ci /* Since we re-use temps set and clear the precise flag according to the last use 291bf215546Sopenharmony_ci * for the register index and written components. Since moves are not marked 292bf215546Sopenharmony_ci * as precise originally, and we may end up with an if/else clause that assignes 293bf215546Sopenharmony_ci * a precise result in the if branche, but does a simple move from a constant 294bf215546Sopenharmony_ci * on the else branche, we don't clear the flag when we hit a mov. 295bf215546Sopenharmony_ci * We do the conservatiove approach here, because virglrenderer emits different temp 296bf215546Sopenharmony_ci * ranges, and we don't want to mark all temps as precise only because we have 297bf215546Sopenharmony_ci * one precise output */ 298bf215546Sopenharmony_ci if (inst->Instruction.Precise) 299bf215546Sopenharmony_ci vtctx->precise_flags[index] |= bits; 300bf215546Sopenharmony_ci } else if (inst->Instruction.Opcode == TGSI_OPCODE_MOV) { 301bf215546Sopenharmony_ci for (int i = 0; i < inst->Instruction.NumSrcRegs; ++i) { 302bf215546Sopenharmony_ci if (inst->Src[i].Register.File == TGSI_FILE_TEMPORARY) { 303bf215546Sopenharmony_ci uint32_t index = inst->Src[i].Register.Index / 8; 304bf215546Sopenharmony_ci uint32_t read_mask = (1 << inst->Src[i].Register.SwizzleX) | 305bf215546Sopenharmony_ci (1 << inst->Src[i].Register.SwizzleY) | 306bf215546Sopenharmony_ci (1 << inst->Src[i].Register.SwizzleZ) | 307bf215546Sopenharmony_ci (1 << inst->Src[i].Register.SwizzleW); 308bf215546Sopenharmony_ci uint32_t bits = read_mask << (inst->Dst[i].Register.Index % 8); 309bf215546Sopenharmony_ci if (vtctx->precise_flags[index] & bits) { 310bf215546Sopenharmony_ci inst->Instruction.Precise = 1; 311bf215546Sopenharmony_ci break; 312bf215546Sopenharmony_ci } 313bf215546Sopenharmony_ci } 314bf215546Sopenharmony_ci } 315bf215546Sopenharmony_ci } 316bf215546Sopenharmony_ci } 317bf215546Sopenharmony_ci 318bf215546Sopenharmony_ci /* virglrenderer can run out of space in internal buffers for immediates as 319bf215546Sopenharmony_ci * tex operands. Move the first immediate tex arg to a temp to save space in 320bf215546Sopenharmony_ci * the buffer. 321bf215546Sopenharmony_ci * 322bf215546Sopenharmony_ci * https://gitlab.freedesktop.org/virgl/virglrenderer/-/merge_requests/582 323bf215546Sopenharmony_ci */ 324bf215546Sopenharmony_ci if (tgsi_get_opcode_info(inst->Instruction.Opcode)->is_tex && 325bf215546Sopenharmony_ci inst->Src[0].Register.File == TGSI_FILE_IMMEDIATE) { 326bf215546Sopenharmony_ci tgsi_transform_op1_inst(ctx, TGSI_OPCODE_MOV, 327bf215546Sopenharmony_ci TGSI_FILE_TEMPORARY, vtctx->src_temp, 328bf215546Sopenharmony_ci TGSI_WRITEMASK_XYZW, 329bf215546Sopenharmony_ci inst->Src[0].Register.File, 330bf215546Sopenharmony_ci inst->Src[0].Register.Index); 331bf215546Sopenharmony_ci inst->Src[0].Register.File = TGSI_FILE_TEMPORARY; 332bf215546Sopenharmony_ci inst->Src[0].Register.Index = vtctx->src_temp; 333bf215546Sopenharmony_ci } 334bf215546Sopenharmony_ci 335bf215546Sopenharmony_ci for (unsigned i = 0; i < inst->Instruction.NumDstRegs; i++) { 336bf215546Sopenharmony_ci /* virglrenderer would fail to compile on clipdist, clipvertex, and some 337bf215546Sopenharmony_ci * two-sided-related color writes without a full writemask. So, we write 338bf215546Sopenharmony_ci * to a temp and store that temp with a full writemask. 339bf215546Sopenharmony_ci * 340bf215546Sopenharmony_ci * https://gitlab.freedesktop.org/virgl/virglrenderer/-/merge_requests/616 341bf215546Sopenharmony_ci */ 342bf215546Sopenharmony_ci if (inst->Dst[i].Register.File == TGSI_FILE_OUTPUT) { 343bf215546Sopenharmony_ci for (int j = 0; j < vtctx->num_writemask_fixups; j++) { 344bf215546Sopenharmony_ci if (inst->Dst[i].Register.Index == vtctx->writemask_fixup_outs[j]) { 345bf215546Sopenharmony_ci inst->Dst[i].Register.File = TGSI_FILE_TEMPORARY; 346bf215546Sopenharmony_ci inst->Dst[i].Register.Index = vtctx->writemask_fixup_temps + j; 347bf215546Sopenharmony_ci break; 348bf215546Sopenharmony_ci } 349bf215546Sopenharmony_ci } 350bf215546Sopenharmony_ci } 351bf215546Sopenharmony_ci } 352bf215546Sopenharmony_ci 353bf215546Sopenharmony_ci for (unsigned i = 0; i < inst->Instruction.NumSrcRegs; i++) { 354bf215546Sopenharmony_ci if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT && 355bf215546Sopenharmony_ci inst->Src[i].Register.Dimension && 356bf215546Sopenharmony_ci inst->Src[i].Dimension.Index == 0) 357bf215546Sopenharmony_ci inst->Src[i].Register.Dimension = 0; 358bf215546Sopenharmony_ci 359bf215546Sopenharmony_ci for (int j = 0; j < ARRAY_SIZE(vtctx->input_temp); j++) 360bf215546Sopenharmony_ci virgl_tgsi_rewrite_src_for_input_temp(&vtctx->input_temp[j], &inst->Src[i]); 361bf215546Sopenharmony_ci 362bf215546Sopenharmony_ci /* virglrenderer double inputs twice, so move them to temps and drop the 363bf215546Sopenharmony_ci * swizzle from the double op. 364bf215546Sopenharmony_ci */ 365bf215546Sopenharmony_ci if (tgsi_opcode_infer_src_type(inst->Instruction.Opcode, i) == TGSI_TYPE_DOUBLE) { 366bf215546Sopenharmony_ci struct tgsi_full_instruction temp_inst = tgsi_default_full_instruction(); 367bf215546Sopenharmony_ci temp_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 368bf215546Sopenharmony_ci temp_inst.Instruction.NumDstRegs = 1; 369bf215546Sopenharmony_ci temp_inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY, 370bf215546Sopenharmony_ci temp_inst.Dst[0].Register.Index = vtctx->src_temp + i; 371bf215546Sopenharmony_ci temp_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZ; 372bf215546Sopenharmony_ci temp_inst.Instruction.NumSrcRegs = 1; 373bf215546Sopenharmony_ci tgsi_transform_src_reg_xyzw(&temp_inst.Src[0], inst->Src[i].Register.File, inst->Src[i].Register.Index); 374bf215546Sopenharmony_ci temp_inst.Src[0].Register.SwizzleX = inst->Src[i].Register.SwizzleX; 375bf215546Sopenharmony_ci temp_inst.Src[0].Register.SwizzleY = inst->Src[i].Register.SwizzleY; 376bf215546Sopenharmony_ci temp_inst.Src[0].Register.SwizzleZ = inst->Src[i].Register.SwizzleZ; 377bf215546Sopenharmony_ci temp_inst.Src[0].Register.SwizzleW = inst->Src[i].Register.SwizzleW; 378bf215546Sopenharmony_ci ctx->emit_instruction(ctx, &temp_inst); 379bf215546Sopenharmony_ci 380bf215546Sopenharmony_ci inst->Src[i].Register.File = TGSI_FILE_TEMPORARY; 381bf215546Sopenharmony_ci inst->Src[i].Register.Index = vtctx->src_temp + i; 382bf215546Sopenharmony_ci inst->Src[i].Register.SwizzleX = TGSI_SWIZZLE_X; 383bf215546Sopenharmony_ci inst->Src[i].Register.SwizzleY = TGSI_SWIZZLE_Y; 384bf215546Sopenharmony_ci inst->Src[i].Register.SwizzleZ = TGSI_SWIZZLE_Z; 385bf215546Sopenharmony_ci inst->Src[i].Register.SwizzleW = TGSI_SWIZZLE_W; 386bf215546Sopenharmony_ci } 387bf215546Sopenharmony_ci } 388bf215546Sopenharmony_ci 389bf215546Sopenharmony_ci /* virglrenderer doesn't resolve non-float output write properly, 390bf215546Sopenharmony_ci * so we have to first write to a temporary */ 391bf215546Sopenharmony_ci if (inst->Instruction.Opcode != TGSI_OPCODE_MOV && 392bf215546Sopenharmony_ci !tgsi_get_opcode_info(inst->Instruction.Opcode)->is_tex && 393bf215546Sopenharmony_ci !tgsi_get_opcode_info(inst->Instruction.Opcode)->is_store && 394bf215546Sopenharmony_ci inst->Dst[0].Register.File == TGSI_FILE_OUTPUT && 395bf215546Sopenharmony_ci tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, 0) != TGSI_TYPE_FLOAT) { 396bf215546Sopenharmony_ci struct tgsi_full_instruction op_to_temp = *inst; 397bf215546Sopenharmony_ci op_to_temp.Dst[0].Register.File = TGSI_FILE_TEMPORARY; 398bf215546Sopenharmony_ci op_to_temp.Dst[0].Register.Index = vtctx->src_temp; 399bf215546Sopenharmony_ci op_to_temp.Dst[0].Dimension.Indirect = 0; 400bf215546Sopenharmony_ci op_to_temp.Dst[0].Register.Indirect = 0; 401bf215546Sopenharmony_ci ctx->emit_instruction(ctx, &op_to_temp); 402bf215546Sopenharmony_ci 403bf215546Sopenharmony_ci inst->Instruction.Opcode = TGSI_OPCODE_MOV; 404bf215546Sopenharmony_ci inst->Instruction.NumSrcRegs = 1; 405bf215546Sopenharmony_ci 406bf215546Sopenharmony_ci memset(&inst->Src[0], 0, sizeof(inst->Src[0])); 407bf215546Sopenharmony_ci inst->Src[0].Register.File = TGSI_FILE_TEMPORARY; 408bf215546Sopenharmony_ci inst->Src[0].Register.Index = vtctx->src_temp; 409bf215546Sopenharmony_ci inst->Src[0].Register.SwizzleY = 1; 410bf215546Sopenharmony_ci inst->Src[0].Register.SwizzleZ = 2; 411bf215546Sopenharmony_ci inst->Src[0].Register.SwizzleW = 3; 412bf215546Sopenharmony_ci } 413bf215546Sopenharmony_ci 414bf215546Sopenharmony_ci ctx->emit_instruction(ctx, inst); 415bf215546Sopenharmony_ci 416bf215546Sopenharmony_ci for (unsigned i = 0; i < inst->Instruction.NumDstRegs; i++) { 417bf215546Sopenharmony_ci if (vtctx->num_writemask_fixups && 418bf215546Sopenharmony_ci inst->Dst[i].Register.File == TGSI_FILE_TEMPORARY && 419bf215546Sopenharmony_ci inst->Dst[i].Register.Index >= vtctx->writemask_fixup_temps && 420bf215546Sopenharmony_ci inst->Dst[i].Register.Index < vtctx->writemask_fixup_temps + vtctx->num_writemask_fixups) { 421bf215546Sopenharmony_ci /* Emit the fixup MOV from the clipdist/vert temporary to the real output. */ 422bf215546Sopenharmony_ci unsigned real_out = vtctx->writemask_fixup_outs[inst->Dst[i].Register.Index - vtctx->writemask_fixup_temps]; 423bf215546Sopenharmony_ci tgsi_transform_op1_inst(ctx, TGSI_OPCODE_MOV, 424bf215546Sopenharmony_ci TGSI_FILE_OUTPUT, real_out, TGSI_WRITEMASK_XYZW, 425bf215546Sopenharmony_ci inst->Dst[i].Register.File, inst->Dst[i].Register.Index); 426bf215546Sopenharmony_ci } 427bf215546Sopenharmony_ci } 428bf215546Sopenharmony_ci} 429bf215546Sopenharmony_ci 430bf215546Sopenharmony_cistruct tgsi_token *virgl_tgsi_transform(struct virgl_screen *vscreen, const struct tgsi_token *tokens_in, 431bf215546Sopenharmony_ci bool is_separable) 432bf215546Sopenharmony_ci{ 433bf215546Sopenharmony_ci struct virgl_transform_context transform; 434bf215546Sopenharmony_ci const uint newLen = tgsi_num_tokens(tokens_in); 435bf215546Sopenharmony_ci 436bf215546Sopenharmony_ci memset(&transform, 0, sizeof(transform)); 437bf215546Sopenharmony_ci transform.base.transform_declaration = virgl_tgsi_transform_declaration; 438bf215546Sopenharmony_ci transform.base.transform_property = virgl_tgsi_transform_property; 439bf215546Sopenharmony_ci transform.base.transform_instruction = virgl_tgsi_transform_instruction; 440bf215546Sopenharmony_ci transform.base.prolog = virgl_tgsi_transform_prolog; 441bf215546Sopenharmony_ci transform.cull_enabled = vscreen->caps.caps.v1.bset.has_cull; 442bf215546Sopenharmony_ci transform.has_precise = vscreen->caps.caps.v2.capability_bits & VIRGL_CAP_TGSI_PRECISE; 443bf215546Sopenharmony_ci transform.fake_fp64 = 444bf215546Sopenharmony_ci vscreen->caps.caps.v2.capability_bits & VIRGL_CAP_FAKE_FP64; 445bf215546Sopenharmony_ci transform.is_separable = is_separable && (vscreen->caps.caps.v2.capability_bits_v2 & VIRGL_CAP_V2_SSO); 446bf215546Sopenharmony_ci 447bf215546Sopenharmony_ci for (int i = 0; i < ARRAY_SIZE(transform.input_temp); i++) 448bf215546Sopenharmony_ci transform.input_temp[i].index = ~0; 449bf215546Sopenharmony_ci 450bf215546Sopenharmony_ci tgsi_scan_shader(tokens_in, &transform.info); 451bf215546Sopenharmony_ci 452bf215546Sopenharmony_ci struct tgsi_token *new_tokens = tgsi_transform_shader(tokens_in, newLen, &transform.base); 453bf215546Sopenharmony_ci free(transform.precise_flags); 454bf215546Sopenharmony_ci return new_tokens; 455bf215546Sopenharmony_ci 456bf215546Sopenharmony_ci} 457