1/* 2 * Copyright 2014, 2015 Red Hat. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 */ 23 24/* the virgl hw tgsi vs what the current gallium want will diverge over time. 25 so add a transform stage to remove things we don't want to send unless 26 the receiver supports it. 27*/ 28 29#include "tgsi/tgsi_transform.h" 30#include "tgsi/tgsi_info.h" 31#include "tgsi/tgsi_scan.h" 32#include "virgl_context.h" 33#include "virgl_screen.h" 34 35struct virgl_input_temp { 36 enum tgsi_file_type file; 37 38 /* Index within in the INPUT or SV files, or ~0 if no DCL of this input */ 39 unsigned index; 40 41 /* TGSI_FILE_TEMPORARY index it will be mapped to. */ 42 unsigned temp; 43 44 bool sint; 45}; 46 47enum virgl_input_temps { 48 INPUT_TEMP_LAYER, 49 INPUT_TEMP_VIEWPORT_INDEX, 50 INPUT_TEMP_BLOCK_ID, 51 INPUT_TEMP_HELPER_INVOCATION, 52 INPUT_TEMP_COUNT, 53}; 54 55struct virgl_transform_context { 56 struct tgsi_transform_context base; 57 struct tgsi_shader_info info; 58 59 bool cull_enabled; 60 bool has_precise; 61 bool fake_fp64; 62 bool is_separable; 63 64 unsigned next_temp; 65 66 unsigned src_temp; 67 68 unsigned writemask_fixup_outs[5]; 69 unsigned writemask_fixup_temps; 70 unsigned num_writemask_fixups; 71 72 struct virgl_input_temp input_temp[INPUT_TEMP_COUNT]; 73 74 uint32_t *precise_flags; 75}; 76 77static void 78virgl_tgsi_transform_declaration_input_temp(const struct tgsi_full_declaration *decl, 79 struct virgl_input_temp *input_temp, 80 enum tgsi_semantic semantic_name) 81{ 82 if (decl->Semantic.Name == semantic_name) { 83 input_temp->file = decl->Declaration.File; 84 input_temp->index = decl->Range.First; 85 } 86} 87 88static void 89virgl_tgsi_transform_declaration(struct tgsi_transform_context *ctx, 90 struct tgsi_full_declaration *decl) 91{ 92 struct virgl_transform_context *vtctx = (struct virgl_transform_context *)ctx; 93 94 switch (decl->Declaration.File) { 95 case TGSI_FILE_CONSTANT: 96 if (decl->Declaration.Dimension) { 97 if (decl->Dim.Index2D == 0) 98 decl->Declaration.Dimension = 0; 99 } 100 break; 101 case TGSI_FILE_INPUT: 102 virgl_tgsi_transform_declaration_input_temp(decl, &vtctx->input_temp[INPUT_TEMP_LAYER], 103 TGSI_SEMANTIC_LAYER); 104 virgl_tgsi_transform_declaration_input_temp(decl, &vtctx->input_temp[INPUT_TEMP_VIEWPORT_INDEX], 105 TGSI_SEMANTIC_VIEWPORT_INDEX); 106 break; 107 case TGSI_FILE_SYSTEM_VALUE: 108 virgl_tgsi_transform_declaration_input_temp(decl, &vtctx->input_temp[INPUT_TEMP_BLOCK_ID], 109 TGSI_SEMANTIC_BLOCK_ID); 110 virgl_tgsi_transform_declaration_input_temp(decl, &vtctx->input_temp[INPUT_TEMP_HELPER_INVOCATION], 111 TGSI_SEMANTIC_HELPER_INVOCATION); 112 break; 113 case TGSI_FILE_OUTPUT: 114 switch (decl->Semantic.Name) { 115 case TGSI_SEMANTIC_CLIPDIST: 116 vtctx->writemask_fixup_outs[vtctx->num_writemask_fixups++] = decl->Range.First; 117 if (decl->Range.Last != decl->Range.First) 118 vtctx->writemask_fixup_outs[vtctx->num_writemask_fixups++] = decl->Range.Last; 119 break; 120 case TGSI_SEMANTIC_CLIPVERTEX: 121 vtctx->writemask_fixup_outs[vtctx->num_writemask_fixups++] = decl->Range.First; 122 break; 123 case TGSI_SEMANTIC_COLOR: 124 /* Vertex front/backface color output also has issues with writemasking */ 125 if (vtctx->base.processor != PIPE_SHADER_FRAGMENT) 126 vtctx->writemask_fixup_outs[vtctx->num_writemask_fixups++] = decl->Range.First; 127 break; 128 } 129 break; 130 case TGSI_FILE_TEMPORARY: 131 vtctx->next_temp = MAX2(vtctx->next_temp, decl->Range.Last + 1); 132 break; 133 default: 134 break; 135 } 136 assert(vtctx->num_writemask_fixups <= ARRAY_SIZE(vtctx->writemask_fixup_outs)); 137 138 ctx->emit_declaration(ctx, decl); 139} 140 141/* for now just strip out the new properties the remote doesn't understand 142 yet */ 143static void 144virgl_tgsi_transform_property(struct tgsi_transform_context *ctx, 145 struct tgsi_full_property *prop) 146{ 147 struct virgl_transform_context *vtctx = (struct virgl_transform_context *)ctx; 148 switch (prop->Property.PropertyName) { 149 case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED: 150 case TGSI_PROPERTY_NUM_CULLDIST_ENABLED: 151 if (vtctx->cull_enabled) 152 ctx->emit_property(ctx, prop); 153 break; 154 case TGSI_PROPERTY_NEXT_SHADER: 155 break; 156 default: 157 ctx->emit_property(ctx, prop); 158 break; 159 } 160} 161 162static void 163virgl_mov_input_temp_sint(struct tgsi_transform_context * ctx, 164 struct virgl_input_temp *temp) 165{ 166 if (temp->index != ~0) { 167 tgsi_transform_op2_inst(ctx, TGSI_OPCODE_IMAX, 168 TGSI_FILE_TEMPORARY, temp->temp, TGSI_WRITEMASK_XYZW, 169 temp->file, temp->index, 170 temp->file, temp->index, 0); 171 } 172} 173 174static void 175virgl_mov_input_temp_uint(struct tgsi_transform_context * ctx, 176 struct virgl_input_temp *temp) 177{ 178 if (temp->index != ~0) { 179 tgsi_transform_op1_inst(ctx, TGSI_OPCODE_MOV, 180 TGSI_FILE_TEMPORARY, temp->temp, TGSI_WRITEMASK_XYZW, 181 temp->file, temp->index); 182 } 183} 184 185static void 186virgl_tgsi_transform_prolog(struct tgsi_transform_context * ctx) 187{ 188 struct virgl_transform_context *vtctx = (struct virgl_transform_context *)ctx; 189 190 if (vtctx->is_separable) { 191 struct tgsi_full_property prop = tgsi_default_full_property(); 192 prop.Property.PropertyName = TGSI_PROPERTY_SEPARABLE_PROGRAM; 193 prop.Property.NrTokens += 1; 194 prop.u[0].Data = 1; 195 ctx->emit_property(ctx, &prop); 196 } 197 198 vtctx->src_temp = vtctx->next_temp; 199 vtctx->next_temp += 4; 200 tgsi_transform_temps_decl(ctx, vtctx->src_temp, vtctx->src_temp + 3); 201 202 if (vtctx->num_writemask_fixups) { 203 vtctx->writemask_fixup_temps = vtctx->next_temp; 204 vtctx->next_temp += vtctx->num_writemask_fixups; 205 tgsi_transform_temps_decl(ctx, 206 vtctx->writemask_fixup_temps, 207 vtctx->writemask_fixup_temps + vtctx->num_writemask_fixups - 1); 208 } 209 210 /* Assign input temps before we emit any instructions, but after we parsed 211 * existing temp decls. 212 */ 213 for (int i = 0; i < ARRAY_SIZE(vtctx->input_temp); i++) { 214 if (vtctx->input_temp[i].index != ~0) { 215 vtctx->input_temp[i].temp = vtctx->next_temp++; 216 tgsi_transform_temp_decl(ctx, vtctx->input_temp[i].temp); 217 } 218 } 219 220 /* virglrenderer makes mistakes in the types of layer/viewport input 221 * references from unsigned ops, so we use a temp that we do a no-op signed 222 * op to at the top of the shader. 223 * 224 * https://gitlab.freedesktop.org/virgl/virglrenderer/-/merge_requests/615 225 */ 226 virgl_mov_input_temp_sint(ctx, &vtctx->input_temp[INPUT_TEMP_LAYER]); 227 virgl_mov_input_temp_sint(ctx, &vtctx->input_temp[INPUT_TEMP_VIEWPORT_INDEX]); 228 229 /* virglrenderer also makes mistakes in the types of block id input 230 * references from signed ops, so we use a temp that we do a plain MOV to at 231 * the top of the shader. Also, it falls over if an unused channel's swizzle 232 * uses the .w of the block id. 233 */ 234 if (vtctx->input_temp[INPUT_TEMP_BLOCK_ID].index != ~0) { 235 struct tgsi_full_instruction inst = tgsi_default_full_instruction(); 236 inst.Instruction.Opcode = TGSI_OPCODE_MOV; 237 inst.Instruction.NumDstRegs = 1; 238 inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY, 239 inst.Dst[0].Register.Index = vtctx->input_temp[INPUT_TEMP_BLOCK_ID].temp; 240 inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZ; 241 inst.Instruction.NumSrcRegs = 1; 242 tgsi_transform_src_reg_xyzw(&inst.Src[0], 243 vtctx->input_temp[INPUT_TEMP_BLOCK_ID].file, 244 vtctx->input_temp[INPUT_TEMP_BLOCK_ID].index); 245 inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X; 246 inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_Y; 247 inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_Z; 248 inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_Z; 249 ctx->emit_instruction(ctx, &inst); 250 } 251 252 virgl_mov_input_temp_uint(ctx, &vtctx->input_temp[INPUT_TEMP_HELPER_INVOCATION]); 253 254 vtctx->precise_flags = calloc((vtctx->next_temp + 7)/8, sizeof(uint32_t)); 255} 256 257static void 258virgl_tgsi_rewrite_src_for_input_temp(struct virgl_input_temp *temp, struct tgsi_full_src_register *src) 259{ 260 if (src->Register.File == temp->file && src->Register.Index == temp->index) { 261 src->Register.File = TGSI_FILE_TEMPORARY; 262 src->Register.Index = temp->temp; 263 } 264} 265 266static void 267virgl_tgsi_transform_instruction(struct tgsi_transform_context *ctx, 268 struct tgsi_full_instruction *inst) 269{ 270 struct virgl_transform_context *vtctx = (struct virgl_transform_context *)ctx; 271 if (vtctx->fake_fp64 && 272 (tgsi_opcode_infer_src_type(inst->Instruction.Opcode, 0) == TGSI_TYPE_DOUBLE || 273 tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, 0) == TGSI_TYPE_DOUBLE)) { 274 debug_printf("VIRGL: ARB_gpu_shader_fp64 is exposed but not supported."); 275 return; 276 } 277 278 if (!vtctx->has_precise && inst->Instruction.Precise) 279 inst->Instruction.Precise = 0; 280 281 /* For outputs NTT adds a final mov op but NIR doesn't propagate precise with moves, 282 * so that we don't see whether the assignment is from a precise instruction, but 283 * we need to know this to set the output decoration correctly, so propagate the 284 * precise flag with TGSI */ 285 for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) { 286 if (inst->Dst[i].Register.File == TGSI_FILE_TEMPORARY) { 287 uint32_t index = inst->Dst[i].Register.Index / 8; 288 uint32_t bits = inst->Dst[i].Register.WriteMask << (inst->Dst[i].Register.Index % 8); 289 290 /* Since we re-use temps set and clear the precise flag according to the last use 291 * for the register index and written components. Since moves are not marked 292 * as precise originally, and we may end up with an if/else clause that assignes 293 * a precise result in the if branche, but does a simple move from a constant 294 * on the else branche, we don't clear the flag when we hit a mov. 295 * We do the conservatiove approach here, because virglrenderer emits different temp 296 * ranges, and we don't want to mark all temps as precise only because we have 297 * one precise output */ 298 if (inst->Instruction.Precise) 299 vtctx->precise_flags[index] |= bits; 300 } else if (inst->Instruction.Opcode == TGSI_OPCODE_MOV) { 301 for (int i = 0; i < inst->Instruction.NumSrcRegs; ++i) { 302 if (inst->Src[i].Register.File == TGSI_FILE_TEMPORARY) { 303 uint32_t index = inst->Src[i].Register.Index / 8; 304 uint32_t read_mask = (1 << inst->Src[i].Register.SwizzleX) | 305 (1 << inst->Src[i].Register.SwizzleY) | 306 (1 << inst->Src[i].Register.SwizzleZ) | 307 (1 << inst->Src[i].Register.SwizzleW); 308 uint32_t bits = read_mask << (inst->Dst[i].Register.Index % 8); 309 if (vtctx->precise_flags[index] & bits) { 310 inst->Instruction.Precise = 1; 311 break; 312 } 313 } 314 } 315 } 316 } 317 318 /* virglrenderer can run out of space in internal buffers for immediates as 319 * tex operands. Move the first immediate tex arg to a temp to save space in 320 * the buffer. 321 * 322 * https://gitlab.freedesktop.org/virgl/virglrenderer/-/merge_requests/582 323 */ 324 if (tgsi_get_opcode_info(inst->Instruction.Opcode)->is_tex && 325 inst->Src[0].Register.File == TGSI_FILE_IMMEDIATE) { 326 tgsi_transform_op1_inst(ctx, TGSI_OPCODE_MOV, 327 TGSI_FILE_TEMPORARY, vtctx->src_temp, 328 TGSI_WRITEMASK_XYZW, 329 inst->Src[0].Register.File, 330 inst->Src[0].Register.Index); 331 inst->Src[0].Register.File = TGSI_FILE_TEMPORARY; 332 inst->Src[0].Register.Index = vtctx->src_temp; 333 } 334 335 for (unsigned i = 0; i < inst->Instruction.NumDstRegs; i++) { 336 /* virglrenderer would fail to compile on clipdist, clipvertex, and some 337 * two-sided-related color writes without a full writemask. So, we write 338 * to a temp and store that temp with a full writemask. 339 * 340 * https://gitlab.freedesktop.org/virgl/virglrenderer/-/merge_requests/616 341 */ 342 if (inst->Dst[i].Register.File == TGSI_FILE_OUTPUT) { 343 for (int j = 0; j < vtctx->num_writemask_fixups; j++) { 344 if (inst->Dst[i].Register.Index == vtctx->writemask_fixup_outs[j]) { 345 inst->Dst[i].Register.File = TGSI_FILE_TEMPORARY; 346 inst->Dst[i].Register.Index = vtctx->writemask_fixup_temps + j; 347 break; 348 } 349 } 350 } 351 } 352 353 for (unsigned i = 0; i < inst->Instruction.NumSrcRegs; i++) { 354 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT && 355 inst->Src[i].Register.Dimension && 356 inst->Src[i].Dimension.Index == 0) 357 inst->Src[i].Register.Dimension = 0; 358 359 for (int j = 0; j < ARRAY_SIZE(vtctx->input_temp); j++) 360 virgl_tgsi_rewrite_src_for_input_temp(&vtctx->input_temp[j], &inst->Src[i]); 361 362 /* virglrenderer double inputs twice, so move them to temps and drop the 363 * swizzle from the double op. 364 */ 365 if (tgsi_opcode_infer_src_type(inst->Instruction.Opcode, i) == TGSI_TYPE_DOUBLE) { 366 struct tgsi_full_instruction temp_inst = tgsi_default_full_instruction(); 367 temp_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 368 temp_inst.Instruction.NumDstRegs = 1; 369 temp_inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY, 370 temp_inst.Dst[0].Register.Index = vtctx->src_temp + i; 371 temp_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZ; 372 temp_inst.Instruction.NumSrcRegs = 1; 373 tgsi_transform_src_reg_xyzw(&temp_inst.Src[0], inst->Src[i].Register.File, inst->Src[i].Register.Index); 374 temp_inst.Src[0].Register.SwizzleX = inst->Src[i].Register.SwizzleX; 375 temp_inst.Src[0].Register.SwizzleY = inst->Src[i].Register.SwizzleY; 376 temp_inst.Src[0].Register.SwizzleZ = inst->Src[i].Register.SwizzleZ; 377 temp_inst.Src[0].Register.SwizzleW = inst->Src[i].Register.SwizzleW; 378 ctx->emit_instruction(ctx, &temp_inst); 379 380 inst->Src[i].Register.File = TGSI_FILE_TEMPORARY; 381 inst->Src[i].Register.Index = vtctx->src_temp + i; 382 inst->Src[i].Register.SwizzleX = TGSI_SWIZZLE_X; 383 inst->Src[i].Register.SwizzleY = TGSI_SWIZZLE_Y; 384 inst->Src[i].Register.SwizzleZ = TGSI_SWIZZLE_Z; 385 inst->Src[i].Register.SwizzleW = TGSI_SWIZZLE_W; 386 } 387 } 388 389 /* virglrenderer doesn't resolve non-float output write properly, 390 * so we have to first write to a temporary */ 391 if (inst->Instruction.Opcode != TGSI_OPCODE_MOV && 392 !tgsi_get_opcode_info(inst->Instruction.Opcode)->is_tex && 393 !tgsi_get_opcode_info(inst->Instruction.Opcode)->is_store && 394 inst->Dst[0].Register.File == TGSI_FILE_OUTPUT && 395 tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, 0) != TGSI_TYPE_FLOAT) { 396 struct tgsi_full_instruction op_to_temp = *inst; 397 op_to_temp.Dst[0].Register.File = TGSI_FILE_TEMPORARY; 398 op_to_temp.Dst[0].Register.Index = vtctx->src_temp; 399 op_to_temp.Dst[0].Dimension.Indirect = 0; 400 op_to_temp.Dst[0].Register.Indirect = 0; 401 ctx->emit_instruction(ctx, &op_to_temp); 402 403 inst->Instruction.Opcode = TGSI_OPCODE_MOV; 404 inst->Instruction.NumSrcRegs = 1; 405 406 memset(&inst->Src[0], 0, sizeof(inst->Src[0])); 407 inst->Src[0].Register.File = TGSI_FILE_TEMPORARY; 408 inst->Src[0].Register.Index = vtctx->src_temp; 409 inst->Src[0].Register.SwizzleY = 1; 410 inst->Src[0].Register.SwizzleZ = 2; 411 inst->Src[0].Register.SwizzleW = 3; 412 } 413 414 ctx->emit_instruction(ctx, inst); 415 416 for (unsigned i = 0; i < inst->Instruction.NumDstRegs; i++) { 417 if (vtctx->num_writemask_fixups && 418 inst->Dst[i].Register.File == TGSI_FILE_TEMPORARY && 419 inst->Dst[i].Register.Index >= vtctx->writemask_fixup_temps && 420 inst->Dst[i].Register.Index < vtctx->writemask_fixup_temps + vtctx->num_writemask_fixups) { 421 /* Emit the fixup MOV from the clipdist/vert temporary to the real output. */ 422 unsigned real_out = vtctx->writemask_fixup_outs[inst->Dst[i].Register.Index - vtctx->writemask_fixup_temps]; 423 tgsi_transform_op1_inst(ctx, TGSI_OPCODE_MOV, 424 TGSI_FILE_OUTPUT, real_out, TGSI_WRITEMASK_XYZW, 425 inst->Dst[i].Register.File, inst->Dst[i].Register.Index); 426 } 427 } 428} 429 430struct tgsi_token *virgl_tgsi_transform(struct virgl_screen *vscreen, const struct tgsi_token *tokens_in, 431 bool is_separable) 432{ 433 struct virgl_transform_context transform; 434 const uint newLen = tgsi_num_tokens(tokens_in); 435 436 memset(&transform, 0, sizeof(transform)); 437 transform.base.transform_declaration = virgl_tgsi_transform_declaration; 438 transform.base.transform_property = virgl_tgsi_transform_property; 439 transform.base.transform_instruction = virgl_tgsi_transform_instruction; 440 transform.base.prolog = virgl_tgsi_transform_prolog; 441 transform.cull_enabled = vscreen->caps.caps.v1.bset.has_cull; 442 transform.has_precise = vscreen->caps.caps.v2.capability_bits & VIRGL_CAP_TGSI_PRECISE; 443 transform.fake_fp64 = 444 vscreen->caps.caps.v2.capability_bits & VIRGL_CAP_FAKE_FP64; 445 transform.is_separable = is_separable && (vscreen->caps.caps.v2.capability_bits_v2 & VIRGL_CAP_V2_SSO); 446 447 for (int i = 0; i < ARRAY_SIZE(transform.input_temp); i++) 448 transform.input_temp[i].index = ~0; 449 450 tgsi_scan_shader(tokens_in, &transform.info); 451 452 struct tgsi_token *new_tokens = tgsi_transform_shader(tokens_in, newLen, &transform.base); 453 free(transform.precise_flags); 454 return new_tokens; 455 456} 457