1/********************************************************** 2 * Copyright 1998-2022 VMware, Inc. All rights reserved. 3 * 4 * Permission is hereby granted, free of charge, to any person 5 * obtaining a copy of this software and associated documentation 6 * files (the "Software"), to deal in the Software without 7 * restriction, including without limitation the rights to use, copy, 8 * modify, merge, publish, distribute, sublicense, and/or sell copies 9 * of the Software, and to permit persons to whom the Software is 10 * furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be 13 * included in all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 * 24 **********************************************************/ 25 26/** 27 * @file svga_tgsi_vgpu10.c 28 * 29 * TGSI -> VGPU10 shader translation. 30 * 31 * \author Mingcheng Chen 32 * \author Brian Paul 33 */ 34 35#include "pipe/p_compiler.h" 36#include "pipe/p_shader_tokens.h" 37#include "pipe/p_defines.h" 38#include "tgsi/tgsi_build.h" 39#include "tgsi/tgsi_dump.h" 40#include "tgsi/tgsi_info.h" 41#include "tgsi/tgsi_parse.h" 42#include "tgsi/tgsi_scan.h" 43#include "tgsi/tgsi_strings.h" 44#include "tgsi/tgsi_two_side.h" 45#include "tgsi/tgsi_aa_point.h" 46#include "tgsi/tgsi_util.h" 47#include "util/u_math.h" 48#include "util/u_memory.h" 49#include "util/u_bitmask.h" 50#include "util/u_debug.h" 51#include "util/u_pstipple.h" 52 53#include "svga_context.h" 54#include "svga_debug.h" 55#include "svga_link.h" 56#include "svga_shader.h" 57#include "svga_tgsi.h" 58 59#include "VGPU10ShaderTokens.h" 60 61 62#define INVALID_INDEX 99999 63#define MAX_INTERNAL_TEMPS 4 64#define MAX_SYSTEM_VALUES 4 65#define MAX_IMMEDIATE_COUNT \ 66 (VGPU10_MAX_IMMEDIATE_CONSTANT_BUFFER_ELEMENT_COUNT/4) 67#define MAX_TEMP_ARRAYS 64 /* Enough? */ 68 69/** 70 * Clipping is complicated. There's four different cases which we 71 * handle during VS/GS shader translation: 72 */ 73enum clipping_mode 74{ 75 CLIP_NONE, /**< No clipping enabled */ 76 CLIP_LEGACY, /**< The shader has no clipping declarations or code but 77 * one or more user-defined clip planes are enabled. We 78 * generate extra code to emit clip distances. 79 */ 80 CLIP_DISTANCE, /**< The shader already declares clip distance output 81 * registers and has code to write to them. 82 */ 83 CLIP_VERTEX /**< The shader declares a clip vertex output register and 84 * has code that writes to the register. We convert the 85 * clipvertex position into one or more clip distances. 86 */ 87}; 88 89 90/* Shader signature info */ 91struct svga_shader_signature 92{ 93 SVGA3dDXShaderSignatureHeader header; 94 SVGA3dDXShaderSignatureEntry inputs[PIPE_MAX_SHADER_INPUTS]; 95 SVGA3dDXShaderSignatureEntry outputs[PIPE_MAX_SHADER_OUTPUTS]; 96 SVGA3dDXShaderSignatureEntry patchConstants[PIPE_MAX_SHADER_OUTPUTS]; 97}; 98 99static inline void 100set_shader_signature_entry(SVGA3dDXShaderSignatureEntry *e, 101 unsigned index, 102 SVGA3dDXSignatureSemanticName sgnName, 103 unsigned mask, 104 SVGA3dDXSignatureRegisterComponentType compType, 105 SVGA3dDXSignatureMinPrecision minPrecision) 106{ 107 e->registerIndex = index; 108 e->semanticName = sgnName; 109 e->mask = mask; 110 e->componentType = compType; 111 e->minPrecision = minPrecision; 112}; 113 114static const SVGA3dDXSignatureSemanticName 115tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_COUNT] = { 116 SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION, 117 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 118 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 119 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 120 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 121 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 122 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 123 SVGADX_SIGNATURE_SEMANTIC_NAME_IS_FRONT_FACE, 124 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 125 SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID, 126 SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID, 127 SVGADX_SIGNATURE_SEMANTIC_NAME_VERTEX_ID, 128 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 129 SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE, 130 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 131 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 132 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 133 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 134 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 135 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 136 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 137 SVGADX_SIGNATURE_SEMANTIC_NAME_VIEWPORT_ARRAY_INDEX, 138 SVGADX_SIGNATURE_SEMANTIC_NAME_RENDER_TARGET_ARRAY_INDEX, 139 SVGADX_SIGNATURE_SEMANTIC_NAME_SAMPLE_INDEX, 140 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 141 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 142 SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID, 143 SVGADX_SIGNATURE_SEMANTIC_NAME_VERTEX_ID, 144 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 145 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 146 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 147 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 148 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 149 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 150 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 151 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 152 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 153 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 154 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 155 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 156 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 157 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 158 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 159 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 160 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED 161}; 162 163 164/** 165 * Map tgsi semantic name to SVGA signature semantic name 166 */ 167static inline SVGA3dDXSignatureSemanticName 168map_tgsi_semantic_to_sgn_name(enum tgsi_semantic name) 169{ 170 assert(name < TGSI_SEMANTIC_COUNT); 171 172 /* Do a few asserts here to spot check the mapping */ 173 assert(tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_PRIMID] == 174 SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID); 175 assert(tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_VIEWPORT_INDEX] == 176 SVGADX_SIGNATURE_SEMANTIC_NAME_VIEWPORT_ARRAY_INDEX); 177 assert(tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_INVOCATIONID] == 178 SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID); 179 180 return tgsi_semantic_to_sgn_name[name]; 181} 182 183enum reemit_mode { 184 REEMIT_FALSE = 0, 185 REEMIT_TRUE = 1, 186 REEMIT_IN_PROGRESS = 2 187}; 188 189struct svga_raw_buf_tmp { 190 bool indirect; 191 unsigned buffer_index:8; 192 unsigned element_index:8; 193 unsigned element_rel:8; 194}; 195 196struct svga_shader_emitter_v10 197{ 198 /* The token output buffer */ 199 unsigned size; 200 char *buf; 201 char *ptr; 202 203 /* Information about the shader and state (does not change) */ 204 struct svga_compile_key key; 205 struct tgsi_shader_info info; 206 unsigned unit; 207 unsigned version; /**< Either 40, 41, 50 or 51 at this time */ 208 209 unsigned cur_tgsi_token; /**< current tgsi token position */ 210 unsigned inst_start_token; 211 boolean discard_instruction; /**< throw away current instruction? */ 212 boolean reemit_instruction; /**< reemit current instruction */ 213 boolean reemit_tgsi_instruction; /**< reemit current tgsi instruction */ 214 boolean skip_instruction; /**< skip current instruction */ 215 boolean use_sampler_state_mapping; /* use sampler state mapping */ 216 enum reemit_mode reemit_rawbuf_instruction; 217 218 union tgsi_immediate_data immediates[MAX_IMMEDIATE_COUNT][4]; 219 double (*immediates_dbl)[2]; 220 unsigned num_immediates; /**< Number of immediates emitted */ 221 unsigned common_immediate_pos[20]; /**< literals for common immediates */ 222 unsigned num_common_immediates; 223 boolean immediates_emitted; 224 225 unsigned num_outputs; /**< include any extra outputs */ 226 /** The first extra output is reserved for 227 * non-adjusted vertex position for 228 * stream output purpose 229 */ 230 231 /* Temporary Registers */ 232 unsigned num_shader_temps; /**< num of temps used by original shader */ 233 unsigned internal_temp_count; /**< currently allocated internal temps */ 234 struct { 235 unsigned start, size; 236 } temp_arrays[MAX_TEMP_ARRAYS]; 237 unsigned num_temp_arrays; 238 239 /** Map TGSI temp registers to VGPU10 temp array IDs and indexes */ 240 struct { 241 unsigned arrayId, index; 242 boolean initialized; 243 } temp_map[VGPU10_MAX_TEMPS]; /**< arrayId, element */ 244 245 unsigned initialize_temp_index; 246 247 /** Number of constants used by original shader for each constant buffer. 248 * The size should probably always match with that of svga_state.constbufs. 249 */ 250 unsigned num_shader_consts[SVGA_MAX_CONST_BUFS]; 251 252 /* Raw constant buffers */ 253 unsigned raw_buf_srv_start_index; /* starting srv index for raw buffers */ 254 unsigned raw_bufs; /* raw buffers bitmask */ 255 unsigned raw_buf_tmp_index; /* starting temp index for raw buffers */ 256 unsigned raw_buf_cur_tmp_index; /* current temp index for raw buffers */ 257 struct svga_raw_buf_tmp raw_buf_tmp[3]; /* temporaries for raw buf source */ 258 259 /* Samplers */ 260 unsigned num_samplers; 261 boolean sampler_view[PIPE_MAX_SAMPLERS]; /**< True if sampler view exists*/ 262 ubyte sampler_target[PIPE_MAX_SAMPLERS]; /**< TGSI_TEXTURE_x */ 263 ubyte sampler_return_type[PIPE_MAX_SAMPLERS]; /**< TGSI_RETURN_TYPE_x */ 264 265 /* Images */ 266 unsigned num_images; 267 unsigned image_mask; 268 struct tgsi_declaration_image image[PIPE_MAX_SHADER_IMAGES]; 269 unsigned image_size_index; /* starting index to cbuf for image size */ 270 271 /* Shader buffers */ 272 unsigned num_shader_bufs; 273 274 /* HW atomic buffers */ 275 unsigned num_atomic_bufs; 276 unsigned atomic_bufs_mask; 277 unsigned max_atomic_counter_index; 278 VGPU10_OPCODE_TYPE cur_atomic_opcode; /* current atomic opcode */ 279 280 boolean uav_declared; /* True if uav is declared */ 281 282 /* Index Range declaration */ 283 struct { 284 unsigned start_index; 285 unsigned count; 286 boolean required; 287 unsigned operandType; 288 unsigned size; 289 unsigned dim; 290 } index_range; 291 292 /* Address regs (really implemented with temps) */ 293 unsigned num_address_regs; 294 unsigned address_reg_index[MAX_VGPU10_ADDR_REGS]; 295 296 /* Output register usage masks */ 297 ubyte output_usage_mask[PIPE_MAX_SHADER_OUTPUTS]; 298 299 /* To map TGSI system value index to VGPU shader input indexes */ 300 ubyte system_value_indexes[MAX_SYSTEM_VALUES]; 301 302 struct { 303 /* vertex position scale/translation */ 304 unsigned out_index; /**< the real position output reg */ 305 unsigned tmp_index; /**< the fake/temp position output reg */ 306 unsigned so_index; /**< the non-adjusted position output reg */ 307 unsigned prescale_cbuf_index; /* index to the const buf for prescale */ 308 unsigned prescale_scale_index, prescale_trans_index; 309 unsigned num_prescale; /* number of prescale factor in const buf */ 310 unsigned viewport_index; 311 unsigned need_prescale:1; 312 unsigned have_prescale:1; 313 } vposition; 314 315 /* Shader limits */ 316 unsigned max_vs_inputs; 317 unsigned max_vs_outputs; 318 unsigned max_gs_inputs; 319 320 /* For vertex shaders only */ 321 struct { 322 /* viewport constant */ 323 unsigned viewport_index; 324 325 unsigned vertex_id_bias_index; 326 unsigned vertex_id_sys_index; 327 unsigned vertex_id_tmp_index; 328 329 /* temp index of adjusted vertex attributes */ 330 unsigned adjusted_input[PIPE_MAX_SHADER_INPUTS]; 331 } vs; 332 333 /* For fragment shaders only */ 334 struct { 335 unsigned color_out_index[PIPE_MAX_COLOR_BUFS]; /**< the real color output regs */ 336 unsigned num_color_outputs; 337 unsigned color_tmp_index; /**< fake/temp color output reg */ 338 unsigned alpha_ref_index; /**< immediate constant for alpha ref */ 339 340 /* front-face */ 341 unsigned face_input_index; /**< real fragment shader face reg (bool) */ 342 unsigned face_tmp_index; /**< temp face reg converted to -1 / +1 */ 343 344 unsigned pstipple_sampler_unit; 345 unsigned pstipple_sampler_state_index; 346 347 unsigned fragcoord_input_index; /**< real fragment position input reg */ 348 unsigned fragcoord_tmp_index; /**< 1/w modified position temp reg */ 349 350 unsigned sample_id_sys_index; /**< TGSI index of sample id sys value */ 351 352 unsigned sample_pos_sys_index; /**< TGSI index of sample pos sys value */ 353 unsigned sample_pos_tmp_index; /**< which temp reg has the sample pos */ 354 355 /** TGSI index of sample mask input sys value */ 356 unsigned sample_mask_in_sys_index; 357 358 /* layer */ 359 unsigned layer_input_index; /**< TGSI index of layer */ 360 unsigned layer_imm_index; /**< immediate for default layer 0 */ 361 362 boolean forceEarlyDepthStencil; /**< true if Early Depth stencil test is enabled */ 363 } fs; 364 365 /* For geometry shaders only */ 366 struct { 367 VGPU10_PRIMITIVE prim_type;/**< VGPU10 primitive type */ 368 VGPU10_PRIMITIVE_TOPOLOGY prim_topology; /**< VGPU10 primitive topology */ 369 unsigned input_size; /**< size of input arrays */ 370 unsigned prim_id_index; /**< primitive id register index */ 371 unsigned max_out_vertices; /**< maximum number of output vertices */ 372 unsigned invocations; 373 unsigned invocation_id_sys_index; 374 375 unsigned viewport_index_out_index; 376 unsigned viewport_index_tmp_index; 377 } gs; 378 379 /* For tessellation control shaders only */ 380 struct { 381 unsigned vertices_per_patch_index; /**< vertices_per_patch system value index */ 382 unsigned imm_index; /**< immediate for tcs */ 383 unsigned invocation_id_sys_index; /**< invocation id */ 384 unsigned invocation_id_tmp_index; 385 unsigned instruction_token_pos; /* token pos for the first instruction */ 386 unsigned control_point_input_index; /* control point input register index */ 387 unsigned control_point_addr_index; /* control point input address register */ 388 unsigned control_point_out_index; /* control point output register index */ 389 unsigned control_point_tmp_index; /* control point temporary register */ 390 unsigned control_point_out_count; /* control point output count */ 391 boolean control_point_phase; /* true if in control point phase */ 392 boolean fork_phase_add_signature; /* true if needs to add signature in fork phase */ 393 unsigned patch_generic_out_count; /* per-patch generic output count */ 394 unsigned patch_generic_out_index; /* per-patch generic output register index*/ 395 unsigned patch_generic_tmp_index; /* per-patch generic temporary register index*/ 396 unsigned prim_id_index; /* primitive id */ 397 struct { 398 unsigned out_index; /* real tessinner output register */ 399 unsigned temp_index; /* tessinner temp register */ 400 unsigned tgsi_index; /* tgsi tessinner output register */ 401 } inner; 402 struct { 403 unsigned out_index; /* real tessouter output register */ 404 unsigned temp_index; /* tessouter temp register */ 405 unsigned tgsi_index; /* tgsi tessouter output register */ 406 } outer; 407 } tcs; 408 409 /* For tessellation evaluation shaders only */ 410 struct { 411 enum pipe_prim_type prim_mode; 412 enum pipe_tess_spacing spacing; 413 boolean vertices_order_cw; 414 boolean point_mode; 415 unsigned tesscoord_sys_index; 416 unsigned swizzle_max; 417 unsigned prim_id_index; /* primitive id */ 418 struct { 419 unsigned in_index; /* real tessinner input register */ 420 unsigned temp_index; /* tessinner temp register */ 421 unsigned tgsi_index; /* tgsi tessinner input register */ 422 } inner; 423 struct { 424 unsigned in_index; /* real tessouter input register */ 425 unsigned temp_index; /* tessouter temp register */ 426 unsigned tgsi_index; /* tgsi tessouter input register */ 427 } outer; 428 } tes; 429 430 struct { 431 unsigned block_width; /* thread group size in x dimension */ 432 unsigned block_height; /* thread group size in y dimension */ 433 unsigned block_depth; /* thread group size in z dimension */ 434 unsigned thread_id_index; /* thread id tgsi index */ 435 unsigned block_id_index; /* block id tgsi index */ 436 bool shared_memory_declared; /* set if shared memory is declared */ 437 struct { 438 unsigned tgsi_index; /* grid size tgsi index */ 439 unsigned imm_index; /* grid size imm index */ 440 } grid_size; 441 } cs; 442 443 /* For vertex or geometry shaders */ 444 enum clipping_mode clip_mode; 445 unsigned clip_dist_out_index; /**< clip distance output register index */ 446 unsigned clip_dist_tmp_index; /**< clip distance temporary register */ 447 unsigned clip_dist_so_index; /**< clip distance shadow copy */ 448 449 /** Index of temporary holding the clipvertex coordinate */ 450 unsigned clip_vertex_out_index; /**< clip vertex output register index */ 451 unsigned clip_vertex_tmp_index; /**< clip vertex temporary index */ 452 453 /* user clip plane constant slot indexes */ 454 unsigned clip_plane_const[PIPE_MAX_CLIP_PLANES]; 455 456 unsigned num_output_writes; 457 boolean constant_color_output; 458 459 boolean uses_flat_interp; 460 461 unsigned reserved_token; /* index to the reserved token */ 462 boolean uses_precise_qualifier; 463 464 /* For all shaders: const reg index for RECT coord scaling */ 465 unsigned texcoord_scale_index[PIPE_MAX_SAMPLERS]; 466 467 /* For all shaders: const reg index for texture buffer size */ 468 unsigned texture_buffer_size_index[PIPE_MAX_SAMPLERS]; 469 470 /** Which texture units are doing shadow comparison in the shader code */ 471 unsigned shadow_compare_units; 472 473 /* VS/TCS/TES/GS/FS Linkage info */ 474 struct shader_linkage linkage; 475 struct tgsi_shader_info *prevShaderInfo; 476 477 /* Shader signature */ 478 struct svga_shader_signature signature; 479 480 bool register_overflow; /**< Set if we exceed a VGPU10 register limit */ 481 482 /* For util_debug_message */ 483 struct util_debug_callback svga_debug_callback; 484 485 /* current loop depth in shader */ 486 unsigned current_loop_depth; 487}; 488 489 490static void emit_tcs_input_declarations(struct svga_shader_emitter_v10 *emit); 491static void emit_tcs_output_declarations(struct svga_shader_emitter_v10 *emit); 492static boolean emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit); 493static boolean emit_constant_declaration(struct svga_shader_emitter_v10 *emit); 494static boolean emit_sampler_declarations(struct svga_shader_emitter_v10 *emit); 495static boolean emit_resource_declarations(struct svga_shader_emitter_v10 *emit); 496static boolean emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 *emit); 497static boolean emit_index_range_declaration(struct svga_shader_emitter_v10 *emit); 498static void emit_image_declarations(struct svga_shader_emitter_v10 *emit); 499static void emit_shader_buf_declarations(struct svga_shader_emitter_v10 *emit); 500static void emit_atomic_buf_declarations(struct svga_shader_emitter_v10 *emit); 501static void emit_temp_prescale_instructions(struct svga_shader_emitter_v10 *emit); 502 503static boolean 504emit_post_helpers(struct svga_shader_emitter_v10 *emit); 505 506static boolean 507emit_vertex(struct svga_shader_emitter_v10 *emit, 508 const struct tgsi_full_instruction *inst); 509 510static boolean 511emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit, 512 unsigned inst_number, 513 const struct tgsi_full_instruction *inst); 514 515static void 516emit_input_declaration(struct svga_shader_emitter_v10 *emit, 517 unsigned opcodeType, unsigned operandType, 518 unsigned dim, unsigned index, unsigned size, 519 unsigned name, unsigned numComp, 520 unsigned selMode, unsigned usageMask, 521 unsigned interpMode, 522 boolean addSignature, 523 SVGA3dDXSignatureSemanticName sgnName); 524 525static boolean 526emit_rawbuf_instruction(struct svga_shader_emitter_v10 *emit, 527 unsigned inst_number, 528 const struct tgsi_full_instruction *inst); 529 530static void 531create_temp_array(struct svga_shader_emitter_v10 *emit, 532 unsigned arrayID, unsigned first, unsigned count, 533 unsigned startIndex); 534 535static char err_buf[128]; 536 537static boolean 538expand(struct svga_shader_emitter_v10 *emit) 539{ 540 char *new_buf; 541 unsigned newsize = emit->size * 2; 542 543 if (emit->buf != err_buf) 544 new_buf = REALLOC(emit->buf, emit->size, newsize); 545 else 546 new_buf = NULL; 547 548 if (!new_buf) { 549 emit->ptr = err_buf; 550 emit->buf = err_buf; 551 emit->size = sizeof(err_buf); 552 return FALSE; 553 } 554 555 emit->size = newsize; 556 emit->ptr = new_buf + (emit->ptr - emit->buf); 557 emit->buf = new_buf; 558 return TRUE; 559} 560 561/** 562 * Create and initialize a new svga_shader_emitter_v10 object. 563 */ 564static struct svga_shader_emitter_v10 * 565alloc_emitter(void) 566{ 567 struct svga_shader_emitter_v10 *emit = CALLOC(1, sizeof(*emit)); 568 569 if (!emit) 570 return NULL; 571 572 /* to initialize the output buffer */ 573 emit->size = 512; 574 if (!expand(emit)) { 575 FREE(emit); 576 return NULL; 577 } 578 return emit; 579} 580 581/** 582 * Free an svga_shader_emitter_v10 object. 583 */ 584static void 585free_emitter(struct svga_shader_emitter_v10 *emit) 586{ 587 assert(emit); 588 FREE(emit->buf); /* will be NULL if translation succeeded */ 589 FREE(emit); 590} 591 592static inline boolean 593reserve(struct svga_shader_emitter_v10 *emit, 594 unsigned nr_dwords) 595{ 596 while (emit->ptr - emit->buf + nr_dwords * sizeof(uint32) >= emit->size) { 597 if (!expand(emit)) 598 return FALSE; 599 } 600 601 return TRUE; 602} 603 604static boolean 605emit_dword(struct svga_shader_emitter_v10 *emit, uint32 dword) 606{ 607 if (!reserve(emit, 1)) 608 return FALSE; 609 610 *(uint32 *)emit->ptr = dword; 611 emit->ptr += sizeof dword; 612 return TRUE; 613} 614 615static boolean 616emit_dwords(struct svga_shader_emitter_v10 *emit, 617 const uint32 *dwords, 618 unsigned nr) 619{ 620 if (!reserve(emit, nr)) 621 return FALSE; 622 623 memcpy(emit->ptr, dwords, nr * sizeof *dwords); 624 emit->ptr += nr * sizeof *dwords; 625 return TRUE; 626} 627 628/** Return the number of tokens in the emitter's buffer */ 629static unsigned 630emit_get_num_tokens(const struct svga_shader_emitter_v10 *emit) 631{ 632 return (emit->ptr - emit->buf) / sizeof(unsigned); 633} 634 635 636/** 637 * Check for register overflow. If we overflow we'll set an 638 * error flag. This function can be called for register declarations 639 * or use as src/dst instruction operands. 640 * \param type register type. One of VGPU10_OPERAND_TYPE_x 641 or VGPU10_OPCODE_DCL_x 642 * \param index the register index 643 */ 644static void 645check_register_index(struct svga_shader_emitter_v10 *emit, 646 unsigned operandType, unsigned index) 647{ 648 bool overflow_before = emit->register_overflow; 649 650 switch (operandType) { 651 case VGPU10_OPERAND_TYPE_TEMP: 652 case VGPU10_OPERAND_TYPE_INDEXABLE_TEMP: 653 case VGPU10_OPCODE_DCL_TEMPS: 654 if (index >= VGPU10_MAX_TEMPS) { 655 emit->register_overflow = TRUE; 656 } 657 break; 658 case VGPU10_OPERAND_TYPE_CONSTANT_BUFFER: 659 case VGPU10_OPCODE_DCL_CONSTANT_BUFFER: 660 if (index >= VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) { 661 emit->register_overflow = TRUE; 662 } 663 break; 664 case VGPU10_OPERAND_TYPE_INPUT: 665 case VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID: 666 case VGPU10_OPCODE_DCL_INPUT: 667 case VGPU10_OPCODE_DCL_INPUT_SGV: 668 case VGPU10_OPCODE_DCL_INPUT_SIV: 669 case VGPU10_OPCODE_DCL_INPUT_PS: 670 case VGPU10_OPCODE_DCL_INPUT_PS_SGV: 671 case VGPU10_OPCODE_DCL_INPUT_PS_SIV: 672 if ((emit->unit == PIPE_SHADER_VERTEX && 673 index >= emit->max_vs_inputs) || 674 (emit->unit == PIPE_SHADER_GEOMETRY && 675 index >= emit->max_gs_inputs) || 676 (emit->unit == PIPE_SHADER_FRAGMENT && 677 index >= VGPU10_MAX_FS_INPUTS) || 678 (emit->unit == PIPE_SHADER_TESS_CTRL && 679 index >= VGPU11_MAX_HS_INPUT_CONTROL_POINTS) || 680 (emit->unit == PIPE_SHADER_TESS_EVAL && 681 index >= VGPU11_MAX_DS_INPUT_CONTROL_POINTS)) { 682 emit->register_overflow = TRUE; 683 } 684 break; 685 case VGPU10_OPERAND_TYPE_OUTPUT: 686 case VGPU10_OPCODE_DCL_OUTPUT: 687 case VGPU10_OPCODE_DCL_OUTPUT_SGV: 688 case VGPU10_OPCODE_DCL_OUTPUT_SIV: 689 /* Note: we are skipping two output indices in tcs for 690 * tessinner/outer levels. Implementation will not exceed 691 * number of output count but it allows index to go beyond 692 * VGPU11_MAX_HS_OUTPUTS. 693 * Index will never be >= index >= VGPU11_MAX_HS_OUTPUTS + 2 694 */ 695 if ((emit->unit == PIPE_SHADER_VERTEX && 696 index >= emit->max_vs_outputs) || 697 (emit->unit == PIPE_SHADER_GEOMETRY && 698 index >= VGPU10_MAX_GS_OUTPUTS) || 699 (emit->unit == PIPE_SHADER_FRAGMENT && 700 index >= VGPU10_MAX_FS_OUTPUTS) || 701 (emit->unit == PIPE_SHADER_TESS_CTRL && 702 index >= VGPU11_MAX_HS_OUTPUTS + 2) || 703 (emit->unit == PIPE_SHADER_TESS_EVAL && 704 index >= VGPU11_MAX_DS_OUTPUTS)) { 705 emit->register_overflow = TRUE; 706 } 707 break; 708 case VGPU10_OPERAND_TYPE_SAMPLER: 709 case VGPU10_OPCODE_DCL_SAMPLER: 710 if (index >= VGPU10_MAX_SAMPLERS) { 711 emit->register_overflow = TRUE; 712 } 713 break; 714 case VGPU10_OPERAND_TYPE_RESOURCE: 715 case VGPU10_OPCODE_DCL_RESOURCE: 716 if (index >= VGPU10_MAX_RESOURCES) { 717 emit->register_overflow = TRUE; 718 } 719 break; 720 case VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER: 721 if (index >= MAX_IMMEDIATE_COUNT) { 722 emit->register_overflow = TRUE; 723 } 724 break; 725 case VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK: 726 case VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID: 727 case VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID: 728 case VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT: 729 case VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT: 730 case VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT: 731 case VGPU10_OPERAND_TYPE_INPUT_THREAD_GROUP_ID: 732 case VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP: 733 /* nothing */ 734 break; 735 default: 736 assert(0); 737 ; /* nothing */ 738 } 739 740 if (emit->register_overflow && !overflow_before) { 741 debug_printf("svga: vgpu10 register overflow (reg %u, index %u)\n", 742 operandType, index); 743 } 744} 745 746 747/** 748 * Examine misc state to determine the clipping mode. 749 */ 750static void 751determine_clipping_mode(struct svga_shader_emitter_v10 *emit) 752{ 753 /* num_written_clipdistance in the shader info for tessellation 754 * control shader is always 0 because the TGSI_PROPERTY_NUM_CLIPDIST_ENABLED 755 * is not defined for this shader. So we go through all the output declarations 756 * to set the num_written_clipdistance. This is just to determine the 757 * clipping mode. 758 */ 759 if (emit->unit == PIPE_SHADER_TESS_CTRL) { 760 unsigned i; 761 for (i = 0; i < emit->info.num_outputs; i++) { 762 if (emit->info.output_semantic_name[i] == TGSI_SEMANTIC_CLIPDIST) { 763 emit->info.num_written_clipdistance = 764 4 * (emit->info.output_semantic_index[i] + 1); 765 } 766 } 767 } 768 769 if (emit->info.num_written_clipdistance > 0) { 770 emit->clip_mode = CLIP_DISTANCE; 771 } 772 else if (emit->info.writes_clipvertex) { 773 emit->clip_mode = CLIP_VERTEX; 774 } 775 else if (emit->key.clip_plane_enable && emit->key.last_vertex_stage) { 776 /* 777 * Only the last shader in the vertex processing stage needs to 778 * handle the legacy clip mode. 779 */ 780 emit->clip_mode = CLIP_LEGACY; 781 } 782 else { 783 emit->clip_mode = CLIP_NONE; 784 } 785} 786 787 788/** 789 * For clip distance register declarations and clip distance register 790 * writes we need to mask the declaration usage or instruction writemask 791 * (respectively) against the set of the really-enabled clipping planes. 792 * 793 * The piglit test spec/glsl-1.30/execution/clipping/vs-clip-distance-enables 794 * has a VS that writes to all 8 clip distance registers, but the plane enable 795 * flags are a subset of that. 796 * 797 * This function is used to apply the plane enable flags to the register 798 * declaration or instruction writemask. 799 * 800 * \param writemask the declaration usage mask or instruction writemask 801 * \param clip_reg_index which clip plane register is being declared/written. 802 * The legal values are 0 and 1 (two clip planes per 803 * register, for a total of 8 clip planes) 804 */ 805static unsigned 806apply_clip_plane_mask(struct svga_shader_emitter_v10 *emit, 807 unsigned writemask, unsigned clip_reg_index) 808{ 809 unsigned shift; 810 811 assert(clip_reg_index < 2); 812 813 /* four clip planes per clip register: */ 814 shift = clip_reg_index * 4; 815 writemask &= ((emit->key.clip_plane_enable >> shift) & 0xf); 816 817 return writemask; 818} 819 820 821/** 822 * Translate gallium shader type into VGPU10 type. 823 */ 824static VGPU10_PROGRAM_TYPE 825translate_shader_type(unsigned type) 826{ 827 switch (type) { 828 case PIPE_SHADER_VERTEX: 829 return VGPU10_VERTEX_SHADER; 830 case PIPE_SHADER_GEOMETRY: 831 return VGPU10_GEOMETRY_SHADER; 832 case PIPE_SHADER_FRAGMENT: 833 return VGPU10_PIXEL_SHADER; 834 case PIPE_SHADER_TESS_CTRL: 835 return VGPU10_HULL_SHADER; 836 case PIPE_SHADER_TESS_EVAL: 837 return VGPU10_DOMAIN_SHADER; 838 case PIPE_SHADER_COMPUTE: 839 return VGPU10_COMPUTE_SHADER; 840 default: 841 assert(!"Unexpected shader type"); 842 return VGPU10_VERTEX_SHADER; 843 } 844} 845 846 847/** 848 * Translate a TGSI_OPCODE_x into a VGPU10_OPCODE_x 849 * Note: we only need to translate the opcodes for "simple" instructions, 850 * as seen below. All other opcodes are handled/translated specially. 851 */ 852static VGPU10_OPCODE_TYPE 853translate_opcode(enum tgsi_opcode opcode) 854{ 855 switch (opcode) { 856 case TGSI_OPCODE_MOV: 857 return VGPU10_OPCODE_MOV; 858 case TGSI_OPCODE_MUL: 859 return VGPU10_OPCODE_MUL; 860 case TGSI_OPCODE_ADD: 861 return VGPU10_OPCODE_ADD; 862 case TGSI_OPCODE_DP3: 863 return VGPU10_OPCODE_DP3; 864 case TGSI_OPCODE_DP4: 865 return VGPU10_OPCODE_DP4; 866 case TGSI_OPCODE_MIN: 867 return VGPU10_OPCODE_MIN; 868 case TGSI_OPCODE_MAX: 869 return VGPU10_OPCODE_MAX; 870 case TGSI_OPCODE_MAD: 871 return VGPU10_OPCODE_MAD; 872 case TGSI_OPCODE_SQRT: 873 return VGPU10_OPCODE_SQRT; 874 case TGSI_OPCODE_FRC: 875 return VGPU10_OPCODE_FRC; 876 case TGSI_OPCODE_FLR: 877 return VGPU10_OPCODE_ROUND_NI; 878 case TGSI_OPCODE_FSEQ: 879 return VGPU10_OPCODE_EQ; 880 case TGSI_OPCODE_FSGE: 881 return VGPU10_OPCODE_GE; 882 case TGSI_OPCODE_FSNE: 883 return VGPU10_OPCODE_NE; 884 case TGSI_OPCODE_DDX: 885 return VGPU10_OPCODE_DERIV_RTX; 886 case TGSI_OPCODE_DDY: 887 return VGPU10_OPCODE_DERIV_RTY; 888 case TGSI_OPCODE_RET: 889 return VGPU10_OPCODE_RET; 890 case TGSI_OPCODE_DIV: 891 return VGPU10_OPCODE_DIV; 892 case TGSI_OPCODE_IDIV: 893 return VGPU10_OPCODE_VMWARE; 894 case TGSI_OPCODE_DP2: 895 return VGPU10_OPCODE_DP2; 896 case TGSI_OPCODE_BRK: 897 return VGPU10_OPCODE_BREAK; 898 case TGSI_OPCODE_IF: 899 return VGPU10_OPCODE_IF; 900 case TGSI_OPCODE_ELSE: 901 return VGPU10_OPCODE_ELSE; 902 case TGSI_OPCODE_ENDIF: 903 return VGPU10_OPCODE_ENDIF; 904 case TGSI_OPCODE_CEIL: 905 return VGPU10_OPCODE_ROUND_PI; 906 case TGSI_OPCODE_I2F: 907 return VGPU10_OPCODE_ITOF; 908 case TGSI_OPCODE_NOT: 909 return VGPU10_OPCODE_NOT; 910 case TGSI_OPCODE_TRUNC: 911 return VGPU10_OPCODE_ROUND_Z; 912 case TGSI_OPCODE_SHL: 913 return VGPU10_OPCODE_ISHL; 914 case TGSI_OPCODE_AND: 915 return VGPU10_OPCODE_AND; 916 case TGSI_OPCODE_OR: 917 return VGPU10_OPCODE_OR; 918 case TGSI_OPCODE_XOR: 919 return VGPU10_OPCODE_XOR; 920 case TGSI_OPCODE_CONT: 921 return VGPU10_OPCODE_CONTINUE; 922 case TGSI_OPCODE_EMIT: 923 return VGPU10_OPCODE_EMIT; 924 case TGSI_OPCODE_ENDPRIM: 925 return VGPU10_OPCODE_CUT; 926 case TGSI_OPCODE_BGNLOOP: 927 return VGPU10_OPCODE_LOOP; 928 case TGSI_OPCODE_ENDLOOP: 929 return VGPU10_OPCODE_ENDLOOP; 930 case TGSI_OPCODE_ENDSUB: 931 return VGPU10_OPCODE_RET; 932 case TGSI_OPCODE_NOP: 933 return VGPU10_OPCODE_NOP; 934 case TGSI_OPCODE_END: 935 return VGPU10_OPCODE_RET; 936 case TGSI_OPCODE_F2I: 937 return VGPU10_OPCODE_FTOI; 938 case TGSI_OPCODE_IMAX: 939 return VGPU10_OPCODE_IMAX; 940 case TGSI_OPCODE_IMIN: 941 return VGPU10_OPCODE_IMIN; 942 case TGSI_OPCODE_UDIV: 943 case TGSI_OPCODE_UMOD: 944 case TGSI_OPCODE_MOD: 945 return VGPU10_OPCODE_UDIV; 946 case TGSI_OPCODE_IMUL_HI: 947 return VGPU10_OPCODE_IMUL; 948 case TGSI_OPCODE_INEG: 949 return VGPU10_OPCODE_INEG; 950 case TGSI_OPCODE_ISHR: 951 return VGPU10_OPCODE_ISHR; 952 case TGSI_OPCODE_ISGE: 953 return VGPU10_OPCODE_IGE; 954 case TGSI_OPCODE_ISLT: 955 return VGPU10_OPCODE_ILT; 956 case TGSI_OPCODE_F2U: 957 return VGPU10_OPCODE_FTOU; 958 case TGSI_OPCODE_UADD: 959 return VGPU10_OPCODE_IADD; 960 case TGSI_OPCODE_U2F: 961 return VGPU10_OPCODE_UTOF; 962 case TGSI_OPCODE_UCMP: 963 return VGPU10_OPCODE_MOVC; 964 case TGSI_OPCODE_UMAD: 965 return VGPU10_OPCODE_UMAD; 966 case TGSI_OPCODE_UMAX: 967 return VGPU10_OPCODE_UMAX; 968 case TGSI_OPCODE_UMIN: 969 return VGPU10_OPCODE_UMIN; 970 case TGSI_OPCODE_UMUL: 971 case TGSI_OPCODE_UMUL_HI: 972 return VGPU10_OPCODE_UMUL; 973 case TGSI_OPCODE_USEQ: 974 return VGPU10_OPCODE_IEQ; 975 case TGSI_OPCODE_USGE: 976 return VGPU10_OPCODE_UGE; 977 case TGSI_OPCODE_USHR: 978 return VGPU10_OPCODE_USHR; 979 case TGSI_OPCODE_USLT: 980 return VGPU10_OPCODE_ULT; 981 case TGSI_OPCODE_USNE: 982 return VGPU10_OPCODE_INE; 983 case TGSI_OPCODE_SWITCH: 984 return VGPU10_OPCODE_SWITCH; 985 case TGSI_OPCODE_CASE: 986 return VGPU10_OPCODE_CASE; 987 case TGSI_OPCODE_DEFAULT: 988 return VGPU10_OPCODE_DEFAULT; 989 case TGSI_OPCODE_ENDSWITCH: 990 return VGPU10_OPCODE_ENDSWITCH; 991 case TGSI_OPCODE_FSLT: 992 return VGPU10_OPCODE_LT; 993 case TGSI_OPCODE_ROUND: 994 return VGPU10_OPCODE_ROUND_NE; 995 /* Begin SM5 opcodes */ 996 case TGSI_OPCODE_F2D: 997 return VGPU10_OPCODE_FTOD; 998 case TGSI_OPCODE_D2F: 999 return VGPU10_OPCODE_DTOF; 1000 case TGSI_OPCODE_DMUL: 1001 return VGPU10_OPCODE_DMUL; 1002 case TGSI_OPCODE_DADD: 1003 return VGPU10_OPCODE_DADD; 1004 case TGSI_OPCODE_DMAX: 1005 return VGPU10_OPCODE_DMAX; 1006 case TGSI_OPCODE_DMIN: 1007 return VGPU10_OPCODE_DMIN; 1008 case TGSI_OPCODE_DSEQ: 1009 return VGPU10_OPCODE_DEQ; 1010 case TGSI_OPCODE_DSGE: 1011 return VGPU10_OPCODE_DGE; 1012 case TGSI_OPCODE_DSLT: 1013 return VGPU10_OPCODE_DLT; 1014 case TGSI_OPCODE_DSNE: 1015 return VGPU10_OPCODE_DNE; 1016 case TGSI_OPCODE_IBFE: 1017 return VGPU10_OPCODE_IBFE; 1018 case TGSI_OPCODE_UBFE: 1019 return VGPU10_OPCODE_UBFE; 1020 case TGSI_OPCODE_BFI: 1021 return VGPU10_OPCODE_BFI; 1022 case TGSI_OPCODE_BREV: 1023 return VGPU10_OPCODE_BFREV; 1024 case TGSI_OPCODE_POPC: 1025 return VGPU10_OPCODE_COUNTBITS; 1026 case TGSI_OPCODE_LSB: 1027 return VGPU10_OPCODE_FIRSTBIT_LO; 1028 case TGSI_OPCODE_IMSB: 1029 return VGPU10_OPCODE_FIRSTBIT_SHI; 1030 case TGSI_OPCODE_UMSB: 1031 return VGPU10_OPCODE_FIRSTBIT_HI; 1032 case TGSI_OPCODE_INTERP_CENTROID: 1033 return VGPU10_OPCODE_EVAL_CENTROID; 1034 case TGSI_OPCODE_INTERP_SAMPLE: 1035 return VGPU10_OPCODE_EVAL_SAMPLE_INDEX; 1036 case TGSI_OPCODE_BARRIER: 1037 return VGPU10_OPCODE_SYNC; 1038 case TGSI_OPCODE_DFMA: 1039 return VGPU10_OPCODE_DFMA; 1040 case TGSI_OPCODE_FMA: 1041 return VGPU10_OPCODE_MAD; 1042 1043 /* DX11.1 Opcodes */ 1044 case TGSI_OPCODE_DDIV: 1045 return VGPU10_OPCODE_DDIV; 1046 case TGSI_OPCODE_DRCP: 1047 return VGPU10_OPCODE_DRCP; 1048 case TGSI_OPCODE_D2I: 1049 return VGPU10_OPCODE_DTOI; 1050 case TGSI_OPCODE_D2U: 1051 return VGPU10_OPCODE_DTOU; 1052 case TGSI_OPCODE_I2D: 1053 return VGPU10_OPCODE_ITOD; 1054 case TGSI_OPCODE_U2D: 1055 return VGPU10_OPCODE_UTOD; 1056 1057 case TGSI_OPCODE_SAMPLE_POS: 1058 /* Note: we never actually get this opcode because there's no GLSL 1059 * function to query multisample resource sample positions. There's 1060 * only the TGSI_SEMANTIC_SAMPLEPOS system value which contains the 1061 * position of the current sample in the render target. 1062 */ 1063 FALLTHROUGH; 1064 case TGSI_OPCODE_SAMPLE_INFO: 1065 /* NOTE: we never actually get this opcode because the GLSL compiler 1066 * implements the gl_NumSamples variable with a simple constant in the 1067 * constant buffer. 1068 */ 1069 FALLTHROUGH; 1070 default: 1071 assert(!"Unexpected TGSI opcode in translate_opcode()"); 1072 return VGPU10_OPCODE_NOP; 1073 } 1074} 1075 1076 1077/** 1078 * Translate a TGSI register file type into a VGPU10 operand type. 1079 * \param array is the TGSI_FILE_TEMPORARY register an array? 1080 */ 1081static VGPU10_OPERAND_TYPE 1082translate_register_file(enum tgsi_file_type file, boolean array) 1083{ 1084 switch (file) { 1085 case TGSI_FILE_CONSTANT: 1086 return VGPU10_OPERAND_TYPE_CONSTANT_BUFFER; 1087 case TGSI_FILE_INPUT: 1088 return VGPU10_OPERAND_TYPE_INPUT; 1089 case TGSI_FILE_OUTPUT: 1090 return VGPU10_OPERAND_TYPE_OUTPUT; 1091 case TGSI_FILE_TEMPORARY: 1092 return array ? VGPU10_OPERAND_TYPE_INDEXABLE_TEMP 1093 : VGPU10_OPERAND_TYPE_TEMP; 1094 case TGSI_FILE_IMMEDIATE: 1095 /* all immediates are 32-bit values at this time so 1096 * VGPU10_OPERAND_TYPE_IMMEDIATE64 is not possible at this time. 1097 */ 1098 return VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER; 1099 case TGSI_FILE_SAMPLER: 1100 return VGPU10_OPERAND_TYPE_SAMPLER; 1101 case TGSI_FILE_SYSTEM_VALUE: 1102 return VGPU10_OPERAND_TYPE_INPUT; 1103 1104 /* XXX TODO more cases to finish */ 1105 1106 default: 1107 assert(!"Bad tgsi register file!"); 1108 return VGPU10_OPERAND_TYPE_NULL; 1109 } 1110} 1111 1112 1113/** 1114 * Emit a null dst register 1115 */ 1116static void 1117emit_null_dst_register(struct svga_shader_emitter_v10 *emit) 1118{ 1119 VGPU10OperandToken0 operand; 1120 1121 operand.value = 0; 1122 operand.operandType = VGPU10_OPERAND_TYPE_NULL; 1123 operand.numComponents = VGPU10_OPERAND_0_COMPONENT; 1124 1125 emit_dword(emit, operand.value); 1126} 1127 1128 1129/** 1130 * If the given register is a temporary, return the array ID. 1131 * Else return zero. 1132 */ 1133static unsigned 1134get_temp_array_id(const struct svga_shader_emitter_v10 *emit, 1135 enum tgsi_file_type file, unsigned index) 1136{ 1137 if (file == TGSI_FILE_TEMPORARY) { 1138 return emit->temp_map[index].arrayId; 1139 } 1140 else { 1141 return 0; 1142 } 1143} 1144 1145 1146/** 1147 * If the given register is a temporary, convert the index from a TGSI 1148 * TEMPORARY index to a VGPU10 temp index. 1149 */ 1150static unsigned 1151remap_temp_index(const struct svga_shader_emitter_v10 *emit, 1152 enum tgsi_file_type file, unsigned index) 1153{ 1154 if (file == TGSI_FILE_TEMPORARY) { 1155 return emit->temp_map[index].index; 1156 } 1157 else { 1158 return index; 1159 } 1160} 1161 1162 1163/** 1164 * Setup the operand0 fields related to indexing (1D, 2D, relative, etc). 1165 * Note: the operandType field must already be initialized. 1166 * \param file the register file being accessed 1167 * \param indirect using indirect addressing of the register file? 1168 * \param index2D if true, 2-D indexing is being used (const or temp registers) 1169 * \param indirect2D if true, 2-D indirect indexing being used (for const buf) 1170 */ 1171static VGPU10OperandToken0 1172setup_operand0_indexing(struct svga_shader_emitter_v10 *emit, 1173 VGPU10OperandToken0 operand0, 1174 enum tgsi_file_type file, 1175 boolean indirect, 1176 boolean index2D, bool indirect2D) 1177{ 1178 VGPU10_OPERAND_INDEX_REPRESENTATION index0Rep, index1Rep; 1179 VGPU10_OPERAND_INDEX_DIMENSION indexDim; 1180 1181 /* 1182 * Compute index dimensions 1183 */ 1184 if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32 || 1185 operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID || 1186 operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID || 1187 operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID || 1188 operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP || 1189 operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID) { 1190 /* there's no swizzle for in-line immediates */ 1191 indexDim = VGPU10_OPERAND_INDEX_0D; 1192 assert(operand0.selectionMode == 0); 1193 } 1194 else if (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT) { 1195 indexDim = VGPU10_OPERAND_INDEX_0D; 1196 } 1197 else { 1198 indexDim = index2D ? VGPU10_OPERAND_INDEX_2D : VGPU10_OPERAND_INDEX_1D; 1199 } 1200 1201 /* 1202 * Compute index representation(s) (immediate vs relative). 1203 */ 1204 if (indexDim == VGPU10_OPERAND_INDEX_2D) { 1205 index0Rep = indirect2D ? VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE 1206 : VGPU10_OPERAND_INDEX_IMMEDIATE32; 1207 1208 index1Rep = indirect ? VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE 1209 : VGPU10_OPERAND_INDEX_IMMEDIATE32; 1210 } 1211 else if (indexDim == VGPU10_OPERAND_INDEX_1D) { 1212 index0Rep = indirect ? VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE 1213 : VGPU10_OPERAND_INDEX_IMMEDIATE32; 1214 1215 index1Rep = 0; 1216 } 1217 else { 1218 index0Rep = 0; 1219 index1Rep = 0; 1220 } 1221 1222 operand0.indexDimension = indexDim; 1223 operand0.index0Representation = index0Rep; 1224 operand0.index1Representation = index1Rep; 1225 1226 return operand0; 1227} 1228 1229 1230/** 1231 * Emit the operand for expressing an address register for indirect indexing. 1232 * Note that the address register is really just a temp register. 1233 * \param addr_reg_index which address register to use 1234 */ 1235static void 1236emit_indirect_register(struct svga_shader_emitter_v10 *emit, 1237 unsigned addr_reg_index) 1238{ 1239 unsigned tmp_reg_index; 1240 VGPU10OperandToken0 operand0; 1241 1242 assert(addr_reg_index < MAX_VGPU10_ADDR_REGS); 1243 1244 tmp_reg_index = emit->address_reg_index[addr_reg_index]; 1245 1246 /* operand0 is a simple temporary register, selecting one component */ 1247 operand0.value = 0; 1248 operand0.operandType = VGPU10_OPERAND_TYPE_TEMP; 1249 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 1250 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 1251 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 1252 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE; 1253 operand0.swizzleX = 0; 1254 operand0.swizzleY = 1; 1255 operand0.swizzleZ = 2; 1256 operand0.swizzleW = 3; 1257 1258 emit_dword(emit, operand0.value); 1259 emit_dword(emit, remap_temp_index(emit, TGSI_FILE_TEMPORARY, tmp_reg_index)); 1260} 1261 1262 1263/** 1264 * Translate the dst register of a TGSI instruction and emit VGPU10 tokens. 1265 * \param emit the emitter context 1266 * \param reg the TGSI dst register to translate 1267 */ 1268static void 1269emit_dst_register(struct svga_shader_emitter_v10 *emit, 1270 const struct tgsi_full_dst_register *reg) 1271{ 1272 enum tgsi_file_type file = reg->Register.File; 1273 unsigned index = reg->Register.Index; 1274 const enum tgsi_semantic sem_name = emit->info.output_semantic_name[index]; 1275 const unsigned sem_index = emit->info.output_semantic_index[index]; 1276 unsigned writemask = reg->Register.WriteMask; 1277 const boolean indirect = reg->Register.Indirect; 1278 unsigned tempArrayId = get_temp_array_id(emit, file, index); 1279 boolean index2d = reg->Register.Dimension || tempArrayId > 0; 1280 VGPU10OperandToken0 operand0; 1281 1282 if (file == TGSI_FILE_TEMPORARY) { 1283 emit->temp_map[index].initialized = TRUE; 1284 } 1285 1286 if (file == TGSI_FILE_OUTPUT) { 1287 if (emit->unit == PIPE_SHADER_VERTEX || 1288 emit->unit == PIPE_SHADER_GEOMETRY || 1289 emit->unit == PIPE_SHADER_TESS_EVAL) { 1290 if (index == emit->vposition.out_index && 1291 emit->vposition.tmp_index != INVALID_INDEX) { 1292 /* replace OUTPUT[POS] with TEMP[POS]. We need to store the 1293 * vertex position result in a temporary so that we can modify 1294 * it in the post_helper() code. 1295 */ 1296 file = TGSI_FILE_TEMPORARY; 1297 index = emit->vposition.tmp_index; 1298 } 1299 else if (sem_name == TGSI_SEMANTIC_CLIPDIST && 1300 emit->clip_dist_tmp_index != INVALID_INDEX) { 1301 /* replace OUTPUT[CLIPDIST] with TEMP[CLIPDIST]. 1302 * We store the clip distance in a temporary first, then 1303 * we'll copy it to the shadow copy and to CLIPDIST with the 1304 * enabled planes mask in emit_clip_distance_instructions(). 1305 */ 1306 file = TGSI_FILE_TEMPORARY; 1307 index = emit->clip_dist_tmp_index + sem_index; 1308 } 1309 else if (sem_name == TGSI_SEMANTIC_CLIPVERTEX && 1310 emit->clip_vertex_tmp_index != INVALID_INDEX) { 1311 /* replace the CLIPVERTEX output register with a temporary */ 1312 assert(emit->clip_mode == CLIP_VERTEX); 1313 assert(sem_index == 0); 1314 file = TGSI_FILE_TEMPORARY; 1315 index = emit->clip_vertex_tmp_index; 1316 } 1317 else if (sem_name == TGSI_SEMANTIC_COLOR && 1318 emit->key.clamp_vertex_color) { 1319 1320 /* set the saturate modifier of the instruction 1321 * to clamp the vertex color. 1322 */ 1323 VGPU10OpcodeToken0 *token = 1324 (VGPU10OpcodeToken0 *)emit->buf + emit->inst_start_token; 1325 token->saturate = TRUE; 1326 } 1327 else if (sem_name == TGSI_SEMANTIC_VIEWPORT_INDEX && 1328 emit->gs.viewport_index_out_index != INVALID_INDEX) { 1329 file = TGSI_FILE_TEMPORARY; 1330 index = emit->gs.viewport_index_tmp_index; 1331 } 1332 } 1333 else if (emit->unit == PIPE_SHADER_FRAGMENT) { 1334 if (sem_name == TGSI_SEMANTIC_POSITION) { 1335 /* Fragment depth output register */ 1336 operand0.value = 0; 1337 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH; 1338 operand0.indexDimension = VGPU10_OPERAND_INDEX_0D; 1339 operand0.numComponents = VGPU10_OPERAND_1_COMPONENT; 1340 emit_dword(emit, operand0.value); 1341 return; 1342 } 1343 else if (sem_name == TGSI_SEMANTIC_SAMPLEMASK) { 1344 /* Fragment sample mask output */ 1345 operand0.value = 0; 1346 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK; 1347 operand0.indexDimension = VGPU10_OPERAND_INDEX_0D; 1348 operand0.numComponents = VGPU10_OPERAND_1_COMPONENT; 1349 emit_dword(emit, operand0.value); 1350 return; 1351 } 1352 else if (index == emit->fs.color_out_index[0] && 1353 emit->fs.color_tmp_index != INVALID_INDEX) { 1354 /* replace OUTPUT[COLOR] with TEMP[COLOR]. We need to store the 1355 * fragment color result in a temporary so that we can read it 1356 * it in the post_helper() code. 1357 */ 1358 file = TGSI_FILE_TEMPORARY; 1359 index = emit->fs.color_tmp_index; 1360 } 1361 else { 1362 /* Typically, for fragment shaders, the output register index 1363 * matches the color semantic index. But not when we write to 1364 * the fragment depth register. In that case, OUT[0] will be 1365 * fragdepth and OUT[1] will be the 0th color output. We need 1366 * to use the semantic index for color outputs. 1367 */ 1368 assert(sem_name == TGSI_SEMANTIC_COLOR); 1369 index = emit->info.output_semantic_index[index]; 1370 1371 emit->num_output_writes++; 1372 } 1373 } 1374 else if (emit->unit == PIPE_SHADER_TESS_CTRL) { 1375 if (index == emit->tcs.inner.tgsi_index) { 1376 /* replace OUTPUT[TESSLEVEL] with temp. We are storing it 1377 * in temporary for now so that will be store into appropriate 1378 * registers in post_helper() in patch constant phase. 1379 */ 1380 if (emit->tcs.control_point_phase) { 1381 /* Discard writing into tessfactor in control point phase */ 1382 emit->discard_instruction = TRUE; 1383 } 1384 else { 1385 file = TGSI_FILE_TEMPORARY; 1386 index = emit->tcs.inner.temp_index; 1387 } 1388 } 1389 else if (index == emit->tcs.outer.tgsi_index) { 1390 /* replace OUTPUT[TESSLEVEL] with temp. We are storing it 1391 * in temporary for now so that will be store into appropriate 1392 * registers in post_helper(). 1393 */ 1394 if (emit->tcs.control_point_phase) { 1395 /* Discard writing into tessfactor in control point phase */ 1396 emit->discard_instruction = TRUE; 1397 } 1398 else { 1399 file = TGSI_FILE_TEMPORARY; 1400 index = emit->tcs.outer.temp_index; 1401 } 1402 } 1403 else if (index >= emit->tcs.patch_generic_out_index && 1404 index < (emit->tcs.patch_generic_out_index + 1405 emit->tcs.patch_generic_out_count)) { 1406 if (emit->tcs.control_point_phase) { 1407 /* Discard writing into generic patch constant outputs in 1408 control point phase */ 1409 emit->discard_instruction = TRUE; 1410 } 1411 else { 1412 if (emit->reemit_instruction) { 1413 /* Store results of reemitted instruction in temporary register. */ 1414 file = TGSI_FILE_TEMPORARY; 1415 index = emit->tcs.patch_generic_tmp_index + 1416 (index - emit->tcs.patch_generic_out_index); 1417 /** 1418 * Temporaries for patch constant data can be done 1419 * as indexable temporaries. 1420 */ 1421 tempArrayId = get_temp_array_id(emit, file, index); 1422 index2d = tempArrayId > 0; 1423 1424 emit->reemit_instruction = FALSE; 1425 } 1426 else { 1427 /* If per-patch outputs is been read in shader, we 1428 * reemit instruction and store results in temporaries in 1429 * patch constant phase. */ 1430 if (emit->info.reads_perpatch_outputs) { 1431 emit->reemit_instruction = TRUE; 1432 } 1433 } 1434 } 1435 } 1436 else if (reg->Register.Dimension) { 1437 /* Only control point outputs are declared 2D in tgsi */ 1438 if (emit->tcs.control_point_phase) { 1439 if (emit->reemit_instruction) { 1440 /* Store results of reemitted instruction in temporary register. */ 1441 index2d = FALSE; 1442 file = TGSI_FILE_TEMPORARY; 1443 index = emit->tcs.control_point_tmp_index + 1444 (index - emit->tcs.control_point_out_index); 1445 emit->reemit_instruction = FALSE; 1446 } 1447 else { 1448 /* The mapped control point outputs are 1-D */ 1449 index2d = FALSE; 1450 if (emit->info.reads_pervertex_outputs) { 1451 /* If per-vertex outputs is been read in shader, we 1452 * reemit instruction and store results in temporaries 1453 * control point phase. */ 1454 emit->reemit_instruction = TRUE; 1455 } 1456 } 1457 1458 if (sem_name == TGSI_SEMANTIC_CLIPDIST && 1459 emit->clip_dist_tmp_index != INVALID_INDEX) { 1460 /* replace OUTPUT[CLIPDIST] with TEMP[CLIPDIST]. 1461 * We store the clip distance in a temporary first, then 1462 * we'll copy it to the shadow copy and to CLIPDIST with the 1463 * enabled planes mask in emit_clip_distance_instructions(). 1464 */ 1465 file = TGSI_FILE_TEMPORARY; 1466 index = emit->clip_dist_tmp_index + sem_index; 1467 } 1468 else if (sem_name == TGSI_SEMANTIC_CLIPVERTEX && 1469 emit->clip_vertex_tmp_index != INVALID_INDEX) { 1470 /* replace the CLIPVERTEX output register with a temporary */ 1471 assert(emit->clip_mode == CLIP_VERTEX); 1472 assert(sem_index == 0); 1473 file = TGSI_FILE_TEMPORARY; 1474 index = emit->clip_vertex_tmp_index; 1475 } 1476 } 1477 else { 1478 /* Discard writing into control point outputs in 1479 patch constant phase */ 1480 emit->discard_instruction = TRUE; 1481 } 1482 } 1483 } 1484 } 1485 1486 /* init operand tokens to all zero */ 1487 operand0.value = 0; 1488 1489 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 1490 1491 /* the operand has a writemask */ 1492 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE; 1493 1494 /* Which of the four dest components to write to. Note that we can use a 1495 * simple assignment here since TGSI writemasks match VGPU10 writemasks. 1496 */ 1497 STATIC_ASSERT(TGSI_WRITEMASK_X == VGPU10_OPERAND_4_COMPONENT_MASK_X); 1498 operand0.mask = writemask; 1499 1500 /* translate TGSI register file type to VGPU10 operand type */ 1501 operand0.operandType = translate_register_file(file, tempArrayId > 0); 1502 1503 check_register_index(emit, operand0.operandType, index); 1504 1505 operand0 = setup_operand0_indexing(emit, operand0, file, indirect, 1506 index2d, FALSE); 1507 1508 /* Emit tokens */ 1509 emit_dword(emit, operand0.value); 1510 if (tempArrayId > 0) { 1511 emit_dword(emit, tempArrayId); 1512 } 1513 1514 emit_dword(emit, remap_temp_index(emit, file, index)); 1515 1516 if (indirect) { 1517 emit_indirect_register(emit, reg->Indirect.Index); 1518 } 1519} 1520 1521 1522/** 1523 * Check if temporary register needs to be initialize when 1524 * shader is not using indirect addressing for temporary and uninitialized 1525 * temporary is not used in loop. In these two scenarios, we cannot 1526 * determine if temporary is initialized or not. 1527 */ 1528static boolean 1529need_temp_reg_initialization(struct svga_shader_emitter_v10 *emit, 1530 unsigned index) 1531{ 1532 if (!(emit->info.indirect_files & (1u << TGSI_FILE_TEMPORARY)) 1533 && emit->current_loop_depth == 0) { 1534 if (!emit->temp_map[index].initialized && 1535 emit->temp_map[index].index < emit->num_shader_temps) { 1536 return TRUE; 1537 } 1538 } 1539 1540 return FALSE; 1541} 1542 1543 1544/** 1545 * Translate a src register of a TGSI instruction and emit VGPU10 tokens. 1546 * In quite a few cases, we do register substitution. For example, if 1547 * the TGSI register is the front/back-face register, we replace that with 1548 * a temp register containing a value we computed earlier. 1549 */ 1550static void 1551emit_src_register(struct svga_shader_emitter_v10 *emit, 1552 const struct tgsi_full_src_register *reg) 1553{ 1554 enum tgsi_file_type file = reg->Register.File; 1555 unsigned index = reg->Register.Index; 1556 boolean indirect = reg->Register.Indirect; 1557 unsigned tempArrayId = get_temp_array_id(emit, file, index); 1558 boolean index2d = (reg->Register.Dimension || 1559 tempArrayId > 0 || 1560 file == TGSI_FILE_CONSTANT); 1561 unsigned index2 = tempArrayId > 0 ? tempArrayId : reg->Dimension.Index; 1562 boolean indirect2d = reg->Dimension.Indirect; 1563 unsigned swizzleX = reg->Register.SwizzleX; 1564 unsigned swizzleY = reg->Register.SwizzleY; 1565 unsigned swizzleZ = reg->Register.SwizzleZ; 1566 unsigned swizzleW = reg->Register.SwizzleW; 1567 const boolean absolute = reg->Register.Absolute; 1568 const boolean negate = reg->Register.Negate; 1569 VGPU10OperandToken0 operand0; 1570 VGPU10OperandToken1 operand1; 1571 1572 operand0.value = operand1.value = 0; 1573 1574 if (emit->unit == PIPE_SHADER_FRAGMENT){ 1575 if (file == TGSI_FILE_INPUT) { 1576 if (index == emit->fs.face_input_index) { 1577 /* Replace INPUT[FACE] with TEMP[FACE] */ 1578 file = TGSI_FILE_TEMPORARY; 1579 index = emit->fs.face_tmp_index; 1580 } 1581 else if (index == emit->fs.fragcoord_input_index) { 1582 /* Replace INPUT[POSITION] with TEMP[POSITION] */ 1583 file = TGSI_FILE_TEMPORARY; 1584 index = emit->fs.fragcoord_tmp_index; 1585 } 1586 else if (index == emit->fs.layer_input_index) { 1587 /* Replace INPUT[LAYER] with zero.x */ 1588 file = TGSI_FILE_IMMEDIATE; 1589 index = emit->fs.layer_imm_index; 1590 swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_X; 1591 } 1592 else { 1593 /* We remap fragment shader inputs to that FS input indexes 1594 * match up with VS/GS output indexes. 1595 */ 1596 index = emit->linkage.input_map[index]; 1597 } 1598 } 1599 else if (file == TGSI_FILE_SYSTEM_VALUE) { 1600 if (index == emit->fs.sample_pos_sys_index) { 1601 assert(emit->version >= 41); 1602 /* Current sample position is in a temp register */ 1603 file = TGSI_FILE_TEMPORARY; 1604 index = emit->fs.sample_pos_tmp_index; 1605 } 1606 else if (index == emit->fs.sample_mask_in_sys_index) { 1607 /* Emitted as vCoverage0.x */ 1608 /* According to GLSL spec, the gl_SampleMaskIn array has ceil(s / 32) 1609 * elements where s is the maximum number of color samples supported 1610 * by the implementation. 1611 */ 1612 operand0.value = 0; 1613 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK; 1614 operand0.indexDimension = VGPU10_OPERAND_INDEX_0D; 1615 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 1616 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE; 1617 emit_dword(emit, operand0.value); 1618 return; 1619 } 1620 else { 1621 /* Map the TGSI system value to a VGPU10 input register */ 1622 assert(index < ARRAY_SIZE(emit->system_value_indexes)); 1623 file = TGSI_FILE_INPUT; 1624 index = emit->system_value_indexes[index]; 1625 } 1626 } 1627 } 1628 else if (emit->unit == PIPE_SHADER_GEOMETRY) { 1629 if (file == TGSI_FILE_INPUT) { 1630 if (index == emit->gs.prim_id_index) { 1631 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; 1632 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID; 1633 } 1634 index = emit->linkage.input_map[index]; 1635 } 1636 else if (file == TGSI_FILE_SYSTEM_VALUE && 1637 index == emit->gs.invocation_id_sys_index) { 1638 /* Emitted as vGSInstanceID0.x */ 1639 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 1640 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID; 1641 index = 0; 1642 } 1643 } 1644 else if (emit->unit == PIPE_SHADER_VERTEX) { 1645 if (file == TGSI_FILE_INPUT) { 1646 /* if input is adjusted... */ 1647 if ((emit->key.vs.adjust_attrib_w_1 | 1648 emit->key.vs.adjust_attrib_itof | 1649 emit->key.vs.adjust_attrib_utof | 1650 emit->key.vs.attrib_is_bgra | 1651 emit->key.vs.attrib_puint_to_snorm | 1652 emit->key.vs.attrib_puint_to_uscaled | 1653 emit->key.vs.attrib_puint_to_sscaled) & (1 << index)) { 1654 file = TGSI_FILE_TEMPORARY; 1655 index = emit->vs.adjusted_input[index]; 1656 } 1657 } 1658 else if (file == TGSI_FILE_SYSTEM_VALUE) { 1659 if (index == emit->vs.vertex_id_sys_index && 1660 emit->vs.vertex_id_tmp_index != INVALID_INDEX) { 1661 file = TGSI_FILE_TEMPORARY; 1662 index = emit->vs.vertex_id_tmp_index; 1663 swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_X; 1664 } 1665 else { 1666 /* Map the TGSI system value to a VGPU10 input register */ 1667 assert(index < ARRAY_SIZE(emit->system_value_indexes)); 1668 file = TGSI_FILE_INPUT; 1669 index = emit->system_value_indexes[index]; 1670 } 1671 } 1672 } 1673 else if (emit->unit == PIPE_SHADER_TESS_CTRL) { 1674 1675 if (file == TGSI_FILE_SYSTEM_VALUE) { 1676 if (index == emit->tcs.vertices_per_patch_index) { 1677 /** 1678 * if source register is the system value for vertices_per_patch, 1679 * replace it with the immediate. 1680 */ 1681 file = TGSI_FILE_IMMEDIATE; 1682 index = emit->tcs.imm_index; 1683 swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_X; 1684 } 1685 else if (index == emit->tcs.invocation_id_sys_index) { 1686 if (emit->tcs.control_point_phase) { 1687 /** 1688 * Emitted as vOutputControlPointID.x 1689 */ 1690 operand0.numComponents = VGPU10_OPERAND_1_COMPONENT; 1691 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID; 1692 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE; 1693 operand0.mask = 0; 1694 emit_dword(emit, operand0.value); 1695 return; 1696 } 1697 else { 1698 /* There is no control point ID input declaration in 1699 * the patch constant phase in hull shader. 1700 * Since for now we are emitting all instructions in 1701 * the patch constant phase, we are replacing the 1702 * control point ID reference with the immediate 0. 1703 */ 1704 file = TGSI_FILE_IMMEDIATE; 1705 index = emit->tcs.imm_index; 1706 swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_W; 1707 } 1708 } 1709 else if (index == emit->tcs.prim_id_index) { 1710 /** 1711 * Emitted as vPrim.x 1712 */ 1713 operand0.numComponents = VGPU10_OPERAND_1_COMPONENT; 1714 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID; 1715 index = 0; 1716 } 1717 } 1718 else if (file == TGSI_FILE_INPUT) { 1719 index = emit->linkage.input_map[index]; 1720 if (!emit->tcs.control_point_phase) { 1721 /* Emitted as vicp */ 1722 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 1723 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT; 1724 assert(reg->Register.Dimension); 1725 } 1726 } 1727 else if (file == TGSI_FILE_OUTPUT) { 1728 if ((index >= emit->tcs.patch_generic_out_index && 1729 index < (emit->tcs.patch_generic_out_index + 1730 emit->tcs.patch_generic_out_count)) || 1731 index == emit->tcs.inner.tgsi_index || 1732 index == emit->tcs.outer.tgsi_index) { 1733 if (emit->tcs.control_point_phase) { 1734 emit->discard_instruction = TRUE; 1735 } 1736 else { 1737 /* Device doesn't allow reading from output so 1738 * use corresponding temporary register as source */ 1739 file = TGSI_FILE_TEMPORARY; 1740 if (index == emit->tcs.inner.tgsi_index) { 1741 index = emit->tcs.inner.temp_index; 1742 } 1743 else if (index == emit->tcs.outer.tgsi_index) { 1744 index = emit->tcs.outer.temp_index; 1745 } 1746 else { 1747 index = emit->tcs.patch_generic_tmp_index + 1748 (index - emit->tcs.patch_generic_out_index); 1749 } 1750 1751 /** 1752 * Temporaries for patch constant data can be done 1753 * as indexable temporaries. 1754 */ 1755 tempArrayId = get_temp_array_id(emit, file, index); 1756 index2d = tempArrayId > 0; 1757 index2 = tempArrayId > 0 ? tempArrayId : reg->Dimension.Index; 1758 } 1759 } 1760 else if (index2d) { 1761 if (emit->tcs.control_point_phase) { 1762 /* Device doesn't allow reading from output so 1763 * use corresponding temporary register as source */ 1764 file = TGSI_FILE_TEMPORARY; 1765 index2d = FALSE; 1766 index = emit->tcs.control_point_tmp_index + 1767 (index - emit->tcs.control_point_out_index); 1768 } 1769 else { 1770 emit->discard_instruction = TRUE; 1771 } 1772 } 1773 } 1774 } 1775 else if (emit->unit == PIPE_SHADER_TESS_EVAL) { 1776 if (file == TGSI_FILE_SYSTEM_VALUE) { 1777 if (index == emit->tes.tesscoord_sys_index) { 1778 /** 1779 * Emitted as vDomain 1780 */ 1781 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 1782 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT; 1783 index = 0; 1784 1785 /* Make sure swizzles are of those components allowed according 1786 * to the tessellator domain. 1787 */ 1788 swizzleX = MIN2(swizzleX, emit->tes.swizzle_max); 1789 swizzleY = MIN2(swizzleY, emit->tes.swizzle_max); 1790 swizzleZ = MIN2(swizzleZ, emit->tes.swizzle_max); 1791 swizzleW = MIN2(swizzleW, emit->tes.swizzle_max); 1792 } 1793 else if (index == emit->tes.inner.tgsi_index) { 1794 file = TGSI_FILE_TEMPORARY; 1795 index = emit->tes.inner.temp_index; 1796 } 1797 else if (index == emit->tes.outer.tgsi_index) { 1798 file = TGSI_FILE_TEMPORARY; 1799 index = emit->tes.outer.temp_index; 1800 } 1801 else if (index == emit->tes.prim_id_index) { 1802 /** 1803 * Emitted as vPrim.x 1804 */ 1805 operand0.numComponents = VGPU10_OPERAND_1_COMPONENT; 1806 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID; 1807 index = 0; 1808 } 1809 1810 } 1811 else if (file == TGSI_FILE_INPUT) { 1812 if (index2d) { 1813 /* 2D input is emitted as vcp (input control point). */ 1814 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT; 1815 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 1816 1817 /* index specifies the element index and is remapped 1818 * to align with the tcs output index. 1819 */ 1820 index = emit->linkage.input_map[index]; 1821 1822 assert(index2 < emit->key.tes.vertices_per_patch); 1823 } 1824 else { 1825 if (index < emit->key.tes.tessfactor_index) 1826 /* index specifies the generic patch index. 1827 * Remapped to match up with the tcs output index. 1828 */ 1829 index = emit->linkage.input_map[index]; 1830 1831 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT; 1832 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 1833 } 1834 } 1835 } 1836 else if (emit->unit == PIPE_SHADER_COMPUTE) { 1837 if (file == TGSI_FILE_SYSTEM_VALUE) { 1838 if (index == emit->cs.thread_id_index) { 1839 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 1840 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP; 1841 index = 0; 1842 } else if (index == emit->cs.block_id_index) { 1843 operand0.value = 0; 1844 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 1845 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_THREAD_GROUP_ID; 1846 operand0.indexDimension = VGPU10_OPERAND_INDEX_0D; 1847 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE; 1848 operand0.swizzleX = swizzleX; 1849 operand0.swizzleY = swizzleY; 1850 operand0.swizzleZ = swizzleZ; 1851 operand0.swizzleW = swizzleW; 1852 emit_dword(emit, operand0.value); 1853 return; 1854 } else if (index == emit->cs.grid_size.tgsi_index) { 1855 file = TGSI_FILE_IMMEDIATE; 1856 index = emit->cs.grid_size.imm_index; 1857 } 1858 } 1859 } 1860 1861 if (file == TGSI_FILE_ADDRESS) { 1862 index = emit->address_reg_index[index]; 1863 file = TGSI_FILE_TEMPORARY; 1864 } 1865 1866 if (file == TGSI_FILE_CONSTANT) { 1867 /** 1868 * If this constant buffer is to be bound as srv raw buffer, 1869 * then we have to load the constant to a temp first before 1870 * it can be used as a source in the instruction. 1871 * This is accomplished in two passes. The first pass is to 1872 * identify if there is any constbuf to rawbuf translation. 1873 * If there isn't, emit the instruction as usual. 1874 * If there is, then we save the constant buffer reference info, 1875 * and then instead of emitting the instruction at the end 1876 * of the instruction, it will trigger a second pass of parsing 1877 * this instruction. Before it starts the parsing, it will 1878 * load the referenced raw buffer elements to temporaries. 1879 * Then it will emit the instruction that replaces the 1880 * constant buffer replaces with the corresponding temporaries. 1881 */ 1882 if (emit->raw_bufs & (1 << index2)) { 1883 if (emit->reemit_rawbuf_instruction != REEMIT_IN_PROGRESS) { 1884 unsigned tmpIdx = emit->raw_buf_cur_tmp_index; 1885 1886 emit->raw_buf_tmp[tmpIdx].buffer_index = index2; 1887 1888 /* Save whether the element index is indirect indexing */ 1889 emit->raw_buf_tmp[tmpIdx].indirect = indirect; 1890 1891 /* If it is indirect index, save the temporary 1892 * address index, otherwise, save the immediate index. 1893 */ 1894 if (indirect) { 1895 emit->raw_buf_tmp[tmpIdx].element_index = 1896 emit->address_reg_index[reg->Indirect.Index]; 1897 emit->raw_buf_tmp[tmpIdx].element_rel = 1898 reg->Register.Index; 1899 } 1900 else { 1901 emit->raw_buf_tmp[tmpIdx].element_index = index; 1902 emit->raw_buf_tmp[tmpIdx].element_rel = 0; 1903 } 1904 1905 emit->raw_buf_cur_tmp_index++; 1906 emit->reemit_rawbuf_instruction = REEMIT_TRUE; 1907 emit->discard_instruction = TRUE; 1908 emit->reemit_tgsi_instruction = TRUE; 1909 } 1910 else { 1911 /* In the reemitting process, replace the constant buffer 1912 * reference with temporary. 1913 */ 1914 file = TGSI_FILE_TEMPORARY; 1915 index = emit->raw_buf_cur_tmp_index + emit->raw_buf_tmp_index; 1916 index2d = FALSE; 1917 indirect = FALSE; 1918 emit->raw_buf_cur_tmp_index++; 1919 } 1920 } 1921 } 1922 1923 if (file == TGSI_FILE_TEMPORARY) { 1924 if (need_temp_reg_initialization(emit, index)) { 1925 emit->initialize_temp_index = index; 1926 emit->discard_instruction = TRUE; 1927 } 1928 } 1929 1930 if (operand0.value == 0) { 1931 /* if operand0 was not set above for a special case, do the general 1932 * case now. 1933 */ 1934 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 1935 operand0.operandType = translate_register_file(file, tempArrayId > 0); 1936 } 1937 operand0 = setup_operand0_indexing(emit, operand0, file, indirect, 1938 index2d, indirect2d); 1939 1940 if (operand0.operandType != VGPU10_OPERAND_TYPE_IMMEDIATE32 && 1941 operand0.operandType != VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) { 1942 /* there's no swizzle for in-line immediates */ 1943 if (swizzleX == swizzleY && 1944 swizzleX == swizzleZ && 1945 swizzleX == swizzleW) { 1946 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE; 1947 } 1948 else { 1949 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE; 1950 } 1951 1952 operand0.swizzleX = swizzleX; 1953 operand0.swizzleY = swizzleY; 1954 operand0.swizzleZ = swizzleZ; 1955 operand0.swizzleW = swizzleW; 1956 1957 if (absolute || negate) { 1958 operand0.extended = 1; 1959 operand1.extendedOperandType = VGPU10_EXTENDED_OPERAND_MODIFIER; 1960 if (absolute && !negate) 1961 operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABS; 1962 if (!absolute && negate) 1963 operand1.operandModifier = VGPU10_OPERAND_MODIFIER_NEG; 1964 if (absolute && negate) 1965 operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABSNEG; 1966 } 1967 } 1968 1969 check_register_index(emit, operand0.operandType, index); 1970 1971 /* Emit the operand tokens */ 1972 emit_dword(emit, operand0.value); 1973 if (operand0.extended) 1974 emit_dword(emit, operand1.value); 1975 1976 if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32) { 1977 /* Emit the four float/int in-line immediate values */ 1978 unsigned *c; 1979 assert(index < ARRAY_SIZE(emit->immediates)); 1980 assert(file == TGSI_FILE_IMMEDIATE); 1981 assert(swizzleX < 4); 1982 assert(swizzleY < 4); 1983 assert(swizzleZ < 4); 1984 assert(swizzleW < 4); 1985 c = (unsigned *) emit->immediates[index]; 1986 emit_dword(emit, c[swizzleX]); 1987 emit_dword(emit, c[swizzleY]); 1988 emit_dword(emit, c[swizzleZ]); 1989 emit_dword(emit, c[swizzleW]); 1990 } 1991 else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_1D) { 1992 /* Emit the register index(es) */ 1993 if (index2d) { 1994 emit_dword(emit, index2); 1995 1996 if (indirect2d) { 1997 emit_indirect_register(emit, reg->DimIndirect.Index); 1998 } 1999 } 2000 2001 emit_dword(emit, remap_temp_index(emit, file, index)); 2002 2003 if (indirect) { 2004 assert(operand0.operandType != VGPU10_OPERAND_TYPE_TEMP); 2005 emit_indirect_register(emit, reg->Indirect.Index); 2006 } 2007 } 2008} 2009 2010 2011/** 2012 * Emit a resource operand (for use with a SAMPLE instruction). 2013 */ 2014static void 2015emit_resource_register(struct svga_shader_emitter_v10 *emit, 2016 unsigned resource_number) 2017{ 2018 VGPU10OperandToken0 operand0; 2019 2020 check_register_index(emit, VGPU10_OPERAND_TYPE_RESOURCE, resource_number); 2021 2022 /* init */ 2023 operand0.value = 0; 2024 2025 operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE; 2026 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 2027 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 2028 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE; 2029 operand0.swizzleX = VGPU10_COMPONENT_X; 2030 operand0.swizzleY = VGPU10_COMPONENT_Y; 2031 operand0.swizzleZ = VGPU10_COMPONENT_Z; 2032 operand0.swizzleW = VGPU10_COMPONENT_W; 2033 2034 emit_dword(emit, operand0.value); 2035 emit_dword(emit, resource_number); 2036} 2037 2038 2039/** 2040 * Emit a sampler operand (for use with a SAMPLE instruction). 2041 */ 2042static void 2043emit_sampler_register(struct svga_shader_emitter_v10 *emit, 2044 unsigned unit) 2045{ 2046 VGPU10OperandToken0 operand0; 2047 unsigned sampler_number; 2048 2049 sampler_number = emit->key.tex[unit].sampler_index; 2050 2051 if ((emit->shadow_compare_units & (1 << unit)) && emit->use_sampler_state_mapping) 2052 sampler_number++; 2053 2054 check_register_index(emit, VGPU10_OPERAND_TYPE_SAMPLER, sampler_number); 2055 2056 /* init */ 2057 operand0.value = 0; 2058 2059 operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER; 2060 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 2061 2062 emit_dword(emit, operand0.value); 2063 emit_dword(emit, sampler_number); 2064} 2065 2066 2067/** 2068 * Emit an operand which reads the IS_FRONT_FACING register. 2069 */ 2070static void 2071emit_face_register(struct svga_shader_emitter_v10 *emit) 2072{ 2073 VGPU10OperandToken0 operand0; 2074 unsigned index = emit->linkage.input_map[emit->fs.face_input_index]; 2075 2076 /* init */ 2077 operand0.value = 0; 2078 2079 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT; 2080 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 2081 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE; 2082 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 2083 2084 operand0.swizzleX = VGPU10_COMPONENT_X; 2085 operand0.swizzleY = VGPU10_COMPONENT_X; 2086 operand0.swizzleZ = VGPU10_COMPONENT_X; 2087 operand0.swizzleW = VGPU10_COMPONENT_X; 2088 2089 emit_dword(emit, operand0.value); 2090 emit_dword(emit, index); 2091} 2092 2093 2094/** 2095 * Emit tokens for the "rasterizer" register used by the SAMPLE_POS 2096 * instruction. 2097 */ 2098static void 2099emit_rasterizer_register(struct svga_shader_emitter_v10 *emit) 2100{ 2101 VGPU10OperandToken0 operand0; 2102 2103 /* init */ 2104 operand0.value = 0; 2105 2106 /* No register index for rasterizer index (there's only one) */ 2107 operand0.operandType = VGPU10_OPERAND_TYPE_RASTERIZER; 2108 operand0.indexDimension = VGPU10_OPERAND_INDEX_0D; 2109 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 2110 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE; 2111 operand0.swizzleX = VGPU10_COMPONENT_X; 2112 operand0.swizzleY = VGPU10_COMPONENT_Y; 2113 operand0.swizzleZ = VGPU10_COMPONENT_Z; 2114 operand0.swizzleW = VGPU10_COMPONENT_W; 2115 2116 emit_dword(emit, operand0.value); 2117} 2118 2119 2120/** 2121 * Emit tokens for the "stream" register used by the 2122 * DCL_STREAM, CUT_STREAM, EMIT_STREAM instructions. 2123 */ 2124static void 2125emit_stream_register(struct svga_shader_emitter_v10 *emit, unsigned index) 2126{ 2127 VGPU10OperandToken0 operand0; 2128 2129 /* init */ 2130 operand0.value = 0; 2131 2132 /* No register index for rasterizer index (there's only one) */ 2133 operand0.operandType = VGPU10_OPERAND_TYPE_STREAM; 2134 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 2135 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; 2136 2137 emit_dword(emit, operand0.value); 2138 emit_dword(emit, index); 2139} 2140 2141 2142/** 2143 * Emit the token for a VGPU10 opcode, with precise parameter. 2144 * \param saturate clamp result to [0,1]? 2145 */ 2146static void 2147emit_opcode_precise(struct svga_shader_emitter_v10 *emit, 2148 unsigned vgpu10_opcode, boolean saturate, boolean precise) 2149{ 2150 VGPU10OpcodeToken0 token0; 2151 2152 token0.value = 0; /* init all fields to zero */ 2153 token0.opcodeType = vgpu10_opcode; 2154 token0.instructionLength = 0; /* Filled in by end_emit_instruction() */ 2155 token0.saturate = saturate; 2156 2157 /* Mesa's GLSL IR -> TGSI translator will set the TGSI precise flag for 2158 * 'invariant' declarations. Only set preciseValues=1 if we have SM5. 2159 */ 2160 token0.preciseValues = precise && emit->version >= 50; 2161 2162 emit_dword(emit, token0.value); 2163 2164 emit->uses_precise_qualifier |= token0.preciseValues; 2165} 2166 2167 2168/** 2169 * Emit the token for a VGPU10 opcode. 2170 * \param saturate clamp result to [0,1]? 2171 */ 2172static void 2173emit_opcode(struct svga_shader_emitter_v10 *emit, 2174 unsigned vgpu10_opcode, boolean saturate) 2175{ 2176 emit_opcode_precise(emit, vgpu10_opcode, saturate, FALSE); 2177} 2178 2179 2180/** 2181 * Emit the token for a VGPU10 resinfo instruction. 2182 * \param modifier return type modifier, _uint or _rcpFloat. 2183 * TODO: We may want to remove this parameter if it will 2184 * only ever be used as _uint. 2185 */ 2186static void 2187emit_opcode_resinfo(struct svga_shader_emitter_v10 *emit, 2188 VGPU10_RESINFO_RETURN_TYPE modifier) 2189{ 2190 VGPU10OpcodeToken0 token0; 2191 2192 token0.value = 0; /* init all fields to zero */ 2193 token0.opcodeType = VGPU10_OPCODE_RESINFO; 2194 token0.instructionLength = 0; /* Filled in by end_emit_instruction() */ 2195 token0.resinfoReturnType = modifier; 2196 2197 emit_dword(emit, token0.value); 2198} 2199 2200 2201/** 2202 * Emit opcode tokens for a texture sample instruction. Texture instructions 2203 * can be rather complicated (texel offsets, etc) so we have this specialized 2204 * function. 2205 */ 2206static void 2207emit_sample_opcode(struct svga_shader_emitter_v10 *emit, 2208 unsigned vgpu10_opcode, boolean saturate, 2209 const int offsets[3]) 2210{ 2211 VGPU10OpcodeToken0 token0; 2212 VGPU10OpcodeToken1 token1; 2213 2214 token0.value = 0; /* init all fields to zero */ 2215 token0.opcodeType = vgpu10_opcode; 2216 token0.instructionLength = 0; /* Filled in by end_emit_instruction() */ 2217 token0.saturate = saturate; 2218 2219 if (offsets[0] || offsets[1] || offsets[2]) { 2220 assert(offsets[0] >= VGPU10_MIN_TEXEL_FETCH_OFFSET); 2221 assert(offsets[1] >= VGPU10_MIN_TEXEL_FETCH_OFFSET); 2222 assert(offsets[2] >= VGPU10_MIN_TEXEL_FETCH_OFFSET); 2223 assert(offsets[0] <= VGPU10_MAX_TEXEL_FETCH_OFFSET); 2224 assert(offsets[1] <= VGPU10_MAX_TEXEL_FETCH_OFFSET); 2225 assert(offsets[2] <= VGPU10_MAX_TEXEL_FETCH_OFFSET); 2226 2227 token0.extended = 1; 2228 token1.value = 0; 2229 token1.opcodeType = VGPU10_EXTENDED_OPCODE_SAMPLE_CONTROLS; 2230 token1.offsetU = offsets[0]; 2231 token1.offsetV = offsets[1]; 2232 token1.offsetW = offsets[2]; 2233 } 2234 2235 emit_dword(emit, token0.value); 2236 if (token0.extended) { 2237 emit_dword(emit, token1.value); 2238 } 2239} 2240 2241 2242/** 2243 * Emit a DISCARD opcode token. 2244 * If nonzero is set, we'll discard the fragment if the X component is not 0. 2245 * Otherwise, we'll discard the fragment if the X component is 0. 2246 */ 2247static void 2248emit_discard_opcode(struct svga_shader_emitter_v10 *emit, boolean nonzero) 2249{ 2250 VGPU10OpcodeToken0 opcode0; 2251 2252 opcode0.value = 0; 2253 opcode0.opcodeType = VGPU10_OPCODE_DISCARD; 2254 if (nonzero) 2255 opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO; 2256 2257 emit_dword(emit, opcode0.value); 2258} 2259 2260 2261/** 2262 * We need to call this before we begin emitting a VGPU10 instruction. 2263 */ 2264static void 2265begin_emit_instruction(struct svga_shader_emitter_v10 *emit) 2266{ 2267 assert(emit->inst_start_token == 0); 2268 /* Save location of the instruction's VGPU10OpcodeToken0 token. 2269 * Note, we can't save a pointer because it would become invalid if 2270 * we have to realloc the output buffer. 2271 */ 2272 emit->inst_start_token = emit_get_num_tokens(emit); 2273} 2274 2275 2276/** 2277 * We need to call this after we emit the last token of a VGPU10 instruction. 2278 * This function patches in the opcode token's instructionLength field. 2279 */ 2280static void 2281end_emit_instruction(struct svga_shader_emitter_v10 *emit) 2282{ 2283 VGPU10OpcodeToken0 *tokens = (VGPU10OpcodeToken0 *) emit->buf; 2284 unsigned inst_length; 2285 2286 assert(emit->inst_start_token > 0); 2287 2288 if (emit->discard_instruction) { 2289 /* Back up the emit->ptr to where this instruction started so 2290 * that we discard the current instruction. 2291 */ 2292 emit->ptr = (char *) (tokens + emit->inst_start_token); 2293 } 2294 else { 2295 /* Compute instruction length and patch that into the start of 2296 * the instruction. 2297 */ 2298 inst_length = emit_get_num_tokens(emit) - emit->inst_start_token; 2299 2300 assert(inst_length > 0); 2301 2302 tokens[emit->inst_start_token].instructionLength = inst_length; 2303 } 2304 2305 emit->inst_start_token = 0; /* reset to zero for error checking */ 2306 emit->discard_instruction = FALSE; 2307} 2308 2309 2310/** 2311 * Return index for a free temporary register. 2312 */ 2313static unsigned 2314get_temp_index(struct svga_shader_emitter_v10 *emit) 2315{ 2316 assert(emit->internal_temp_count < MAX_INTERNAL_TEMPS); 2317 return emit->num_shader_temps + emit->internal_temp_count++; 2318} 2319 2320 2321/** 2322 * Release the temporaries which were generated by get_temp_index(). 2323 */ 2324static void 2325free_temp_indexes(struct svga_shader_emitter_v10 *emit) 2326{ 2327 emit->internal_temp_count = 0; 2328} 2329 2330 2331/** 2332 * Create a tgsi_full_src_register. 2333 */ 2334static struct tgsi_full_src_register 2335make_src_reg(enum tgsi_file_type file, unsigned index) 2336{ 2337 struct tgsi_full_src_register reg; 2338 2339 memset(®, 0, sizeof(reg)); 2340 reg.Register.File = file; 2341 reg.Register.Index = index; 2342 reg.Register.SwizzleX = TGSI_SWIZZLE_X; 2343 reg.Register.SwizzleY = TGSI_SWIZZLE_Y; 2344 reg.Register.SwizzleZ = TGSI_SWIZZLE_Z; 2345 reg.Register.SwizzleW = TGSI_SWIZZLE_W; 2346 return reg; 2347} 2348 2349 2350/** 2351 * Create a tgsi_full_src_register with a swizzle such that all four 2352 * vector components have the same scalar value. 2353 */ 2354static struct tgsi_full_src_register 2355make_src_scalar_reg(enum tgsi_file_type file, unsigned index, unsigned component) 2356{ 2357 struct tgsi_full_src_register reg; 2358 2359 assert(component >= TGSI_SWIZZLE_X); 2360 assert(component <= TGSI_SWIZZLE_W); 2361 2362 memset(®, 0, sizeof(reg)); 2363 reg.Register.File = file; 2364 reg.Register.Index = index; 2365 reg.Register.SwizzleX = 2366 reg.Register.SwizzleY = 2367 reg.Register.SwizzleZ = 2368 reg.Register.SwizzleW = component; 2369 return reg; 2370} 2371 2372 2373/** 2374 * Create a tgsi_full_src_register for a temporary. 2375 */ 2376static struct tgsi_full_src_register 2377make_src_temp_reg(unsigned index) 2378{ 2379 return make_src_reg(TGSI_FILE_TEMPORARY, index); 2380} 2381 2382 2383/** 2384 * Create a tgsi_full_src_register for a constant. 2385 */ 2386static struct tgsi_full_src_register 2387make_src_const_reg(unsigned index) 2388{ 2389 return make_src_reg(TGSI_FILE_CONSTANT, index); 2390} 2391 2392 2393/** 2394 * Create a tgsi_full_src_register for an immediate constant. 2395 */ 2396static struct tgsi_full_src_register 2397make_src_immediate_reg(unsigned index) 2398{ 2399 return make_src_reg(TGSI_FILE_IMMEDIATE, index); 2400} 2401 2402 2403/** 2404 * Create a tgsi_full_dst_register. 2405 */ 2406static struct tgsi_full_dst_register 2407make_dst_reg(enum tgsi_file_type file, unsigned index) 2408{ 2409 struct tgsi_full_dst_register reg; 2410 2411 memset(®, 0, sizeof(reg)); 2412 reg.Register.File = file; 2413 reg.Register.Index = index; 2414 reg.Register.WriteMask = TGSI_WRITEMASK_XYZW; 2415 return reg; 2416} 2417 2418 2419/** 2420 * Create a tgsi_full_dst_register for a temporary. 2421 */ 2422static struct tgsi_full_dst_register 2423make_dst_temp_reg(unsigned index) 2424{ 2425 return make_dst_reg(TGSI_FILE_TEMPORARY, index); 2426} 2427 2428 2429/** 2430 * Create a tgsi_full_dst_register for an output. 2431 */ 2432static struct tgsi_full_dst_register 2433make_dst_output_reg(unsigned index) 2434{ 2435 return make_dst_reg(TGSI_FILE_OUTPUT, index); 2436} 2437 2438 2439/** 2440 * Create negated tgsi_full_src_register. 2441 */ 2442static struct tgsi_full_src_register 2443negate_src(const struct tgsi_full_src_register *reg) 2444{ 2445 struct tgsi_full_src_register neg = *reg; 2446 neg.Register.Negate = !reg->Register.Negate; 2447 return neg; 2448} 2449 2450/** 2451 * Create absolute value of a tgsi_full_src_register. 2452 */ 2453static struct tgsi_full_src_register 2454absolute_src(const struct tgsi_full_src_register *reg) 2455{ 2456 struct tgsi_full_src_register absolute = *reg; 2457 absolute.Register.Absolute = 1; 2458 return absolute; 2459} 2460 2461 2462/** Return the named swizzle term from the src register */ 2463static inline unsigned 2464get_swizzle(const struct tgsi_full_src_register *reg, enum tgsi_swizzle term) 2465{ 2466 switch (term) { 2467 case TGSI_SWIZZLE_X: 2468 return reg->Register.SwizzleX; 2469 case TGSI_SWIZZLE_Y: 2470 return reg->Register.SwizzleY; 2471 case TGSI_SWIZZLE_Z: 2472 return reg->Register.SwizzleZ; 2473 case TGSI_SWIZZLE_W: 2474 return reg->Register.SwizzleW; 2475 default: 2476 assert(!"Bad swizzle"); 2477 return TGSI_SWIZZLE_X; 2478 } 2479} 2480 2481 2482/** 2483 * Create swizzled tgsi_full_src_register. 2484 */ 2485static struct tgsi_full_src_register 2486swizzle_src(const struct tgsi_full_src_register *reg, 2487 enum tgsi_swizzle swizzleX, enum tgsi_swizzle swizzleY, 2488 enum tgsi_swizzle swizzleZ, enum tgsi_swizzle swizzleW) 2489{ 2490 struct tgsi_full_src_register swizzled = *reg; 2491 /* Note: we swizzle the current swizzle */ 2492 swizzled.Register.SwizzleX = get_swizzle(reg, swizzleX); 2493 swizzled.Register.SwizzleY = get_swizzle(reg, swizzleY); 2494 swizzled.Register.SwizzleZ = get_swizzle(reg, swizzleZ); 2495 swizzled.Register.SwizzleW = get_swizzle(reg, swizzleW); 2496 return swizzled; 2497} 2498 2499 2500/** 2501 * Create swizzled tgsi_full_src_register where all the swizzle 2502 * terms are the same. 2503 */ 2504static struct tgsi_full_src_register 2505scalar_src(const struct tgsi_full_src_register *reg, enum tgsi_swizzle swizzle) 2506{ 2507 struct tgsi_full_src_register swizzled = *reg; 2508 /* Note: we swizzle the current swizzle */ 2509 swizzled.Register.SwizzleX = 2510 swizzled.Register.SwizzleY = 2511 swizzled.Register.SwizzleZ = 2512 swizzled.Register.SwizzleW = get_swizzle(reg, swizzle); 2513 return swizzled; 2514} 2515 2516 2517/** 2518 * Create new tgsi_full_dst_register with writemask. 2519 * \param mask bitmask of TGSI_WRITEMASK_[XYZW] 2520 */ 2521static struct tgsi_full_dst_register 2522writemask_dst(const struct tgsi_full_dst_register *reg, unsigned mask) 2523{ 2524 struct tgsi_full_dst_register masked = *reg; 2525 masked.Register.WriteMask = mask; 2526 return masked; 2527} 2528 2529 2530/** 2531 * Check if the register's swizzle is XXXX, YYYY, ZZZZ, or WWWW. 2532 */ 2533static boolean 2534same_swizzle_terms(const struct tgsi_full_src_register *reg) 2535{ 2536 return (reg->Register.SwizzleX == reg->Register.SwizzleY && 2537 reg->Register.SwizzleY == reg->Register.SwizzleZ && 2538 reg->Register.SwizzleZ == reg->Register.SwizzleW); 2539} 2540 2541 2542/** 2543 * Search the vector for the value 'x' and return its position. 2544 */ 2545static int 2546find_imm_in_vec4(const union tgsi_immediate_data vec[4], 2547 union tgsi_immediate_data x) 2548{ 2549 unsigned i; 2550 for (i = 0; i < 4; i++) { 2551 if (vec[i].Int == x.Int) 2552 return i; 2553 } 2554 return -1; 2555} 2556 2557 2558/** 2559 * Helper used by make_immediate_reg(), make_immediate_reg_4(). 2560 */ 2561static int 2562find_immediate(struct svga_shader_emitter_v10 *emit, 2563 union tgsi_immediate_data x, unsigned startIndex) 2564{ 2565 const unsigned endIndex = emit->num_immediates; 2566 unsigned i; 2567 2568 assert(emit->immediates_emitted); 2569 2570 /* Search immediates for x, y, z, w */ 2571 for (i = startIndex; i < endIndex; i++) { 2572 if (x.Int == emit->immediates[i][0].Int || 2573 x.Int == emit->immediates[i][1].Int || 2574 x.Int == emit->immediates[i][2].Int || 2575 x.Int == emit->immediates[i][3].Int) { 2576 return i; 2577 } 2578 } 2579 /* Should never try to use an immediate value that wasn't pre-declared */ 2580 assert(!"find_immediate() failed!"); 2581 return -1; 2582} 2583 2584 2585/** 2586 * As above, but search for a double[2] pair. 2587 */ 2588static int 2589find_immediate_dbl(struct svga_shader_emitter_v10 *emit, 2590 double x, double y) 2591{ 2592 const unsigned endIndex = emit->num_immediates; 2593 unsigned i; 2594 2595 assert(emit->immediates_emitted); 2596 2597 /* Search immediates for x, y, z, w */ 2598 for (i = 0; i < endIndex; i++) { 2599 if (x == emit->immediates_dbl[i][0] && 2600 y == emit->immediates_dbl[i][1]) { 2601 return i; 2602 } 2603 } 2604 /* Should never try to use an immediate value that wasn't pre-declared */ 2605 assert(!"find_immediate_dbl() failed!"); 2606 return -1; 2607} 2608 2609 2610 2611/** 2612 * Return a tgsi_full_src_register for an immediate/literal 2613 * union tgsi_immediate_data[4] value. 2614 * Note: the values must have been previously declared/allocated in 2615 * emit_pre_helpers(). And, all of x,y,z,w must be located in the same 2616 * vec4 immediate. 2617 */ 2618static struct tgsi_full_src_register 2619make_immediate_reg_4(struct svga_shader_emitter_v10 *emit, 2620 const union tgsi_immediate_data imm[4]) 2621{ 2622 struct tgsi_full_src_register reg; 2623 unsigned i; 2624 2625 for (i = 0; i < emit->num_common_immediates; i++) { 2626 /* search for first component value */ 2627 int immpos = find_immediate(emit, imm[0], i); 2628 int x, y, z, w; 2629 2630 assert(immpos >= 0); 2631 2632 /* find remaining components within the immediate vector */ 2633 x = find_imm_in_vec4(emit->immediates[immpos], imm[0]); 2634 y = find_imm_in_vec4(emit->immediates[immpos], imm[1]); 2635 z = find_imm_in_vec4(emit->immediates[immpos], imm[2]); 2636 w = find_imm_in_vec4(emit->immediates[immpos], imm[3]); 2637 2638 if (x >=0 && y >= 0 && z >= 0 && w >= 0) { 2639 /* found them all */ 2640 memset(®, 0, sizeof(reg)); 2641 reg.Register.File = TGSI_FILE_IMMEDIATE; 2642 reg.Register.Index = immpos; 2643 reg.Register.SwizzleX = x; 2644 reg.Register.SwizzleY = y; 2645 reg.Register.SwizzleZ = z; 2646 reg.Register.SwizzleW = w; 2647 return reg; 2648 } 2649 /* else, keep searching */ 2650 } 2651 2652 assert(!"Failed to find immediate register!"); 2653 2654 /* Just return IMM[0].xxxx */ 2655 memset(®, 0, sizeof(reg)); 2656 reg.Register.File = TGSI_FILE_IMMEDIATE; 2657 return reg; 2658} 2659 2660 2661/** 2662 * Return a tgsi_full_src_register for an immediate/literal 2663 * union tgsi_immediate_data value of the form {value, value, value, value}. 2664 * \sa make_immediate_reg_4() regarding allowed values. 2665 */ 2666static struct tgsi_full_src_register 2667make_immediate_reg(struct svga_shader_emitter_v10 *emit, 2668 union tgsi_immediate_data value) 2669{ 2670 struct tgsi_full_src_register reg; 2671 int immpos = find_immediate(emit, value, 0); 2672 2673 assert(immpos >= 0); 2674 2675 memset(®, 0, sizeof(reg)); 2676 reg.Register.File = TGSI_FILE_IMMEDIATE; 2677 reg.Register.Index = immpos; 2678 reg.Register.SwizzleX = 2679 reg.Register.SwizzleY = 2680 reg.Register.SwizzleZ = 2681 reg.Register.SwizzleW = find_imm_in_vec4(emit->immediates[immpos], value); 2682 2683 return reg; 2684} 2685 2686 2687/** 2688 * Return a tgsi_full_src_register for an immediate/literal float[4] value. 2689 * \sa make_immediate_reg_4() regarding allowed values. 2690 */ 2691static struct tgsi_full_src_register 2692make_immediate_reg_float4(struct svga_shader_emitter_v10 *emit, 2693 float x, float y, float z, float w) 2694{ 2695 union tgsi_immediate_data imm[4]; 2696 imm[0].Float = x; 2697 imm[1].Float = y; 2698 imm[2].Float = z; 2699 imm[3].Float = w; 2700 return make_immediate_reg_4(emit, imm); 2701} 2702 2703 2704/** 2705 * Return a tgsi_full_src_register for an immediate/literal float value 2706 * of the form {value, value, value, value}. 2707 * \sa make_immediate_reg_4() regarding allowed values. 2708 */ 2709static struct tgsi_full_src_register 2710make_immediate_reg_float(struct svga_shader_emitter_v10 *emit, float value) 2711{ 2712 union tgsi_immediate_data imm; 2713 imm.Float = value; 2714 return make_immediate_reg(emit, imm); 2715} 2716 2717 2718/** 2719 * Return a tgsi_full_src_register for an immediate/literal int[4] vector. 2720 */ 2721static struct tgsi_full_src_register 2722make_immediate_reg_int4(struct svga_shader_emitter_v10 *emit, 2723 int x, int y, int z, int w) 2724{ 2725 union tgsi_immediate_data imm[4]; 2726 imm[0].Int = x; 2727 imm[1].Int = y; 2728 imm[2].Int = z; 2729 imm[3].Int = w; 2730 return make_immediate_reg_4(emit, imm); 2731} 2732 2733 2734/** 2735 * Return a tgsi_full_src_register for an immediate/literal int value 2736 * of the form {value, value, value, value}. 2737 * \sa make_immediate_reg_4() regarding allowed values. 2738 */ 2739static struct tgsi_full_src_register 2740make_immediate_reg_int(struct svga_shader_emitter_v10 *emit, int value) 2741{ 2742 union tgsi_immediate_data imm; 2743 imm.Int = value; 2744 return make_immediate_reg(emit, imm); 2745} 2746 2747 2748static struct tgsi_full_src_register 2749make_immediate_reg_double(struct svga_shader_emitter_v10 *emit, double value) 2750{ 2751 struct tgsi_full_src_register reg; 2752 int immpos = find_immediate_dbl(emit, value, value); 2753 2754 assert(immpos >= 0); 2755 2756 memset(®, 0, sizeof(reg)); 2757 reg.Register.File = TGSI_FILE_IMMEDIATE; 2758 reg.Register.Index = immpos; 2759 reg.Register.SwizzleX = TGSI_SWIZZLE_X; 2760 reg.Register.SwizzleY = TGSI_SWIZZLE_Y; 2761 reg.Register.SwizzleZ = TGSI_SWIZZLE_Z; 2762 reg.Register.SwizzleW = TGSI_SWIZZLE_W; 2763 2764 return reg; 2765} 2766 2767 2768/** 2769 * Allocate space for a union tgsi_immediate_data[4] immediate. 2770 * \return the index/position of the immediate. 2771 */ 2772static unsigned 2773alloc_immediate_4(struct svga_shader_emitter_v10 *emit, 2774 const union tgsi_immediate_data imm[4]) 2775{ 2776 unsigned n = emit->num_immediates++; 2777 assert(!emit->immediates_emitted); 2778 assert(n < ARRAY_SIZE(emit->immediates)); 2779 emit->immediates[n][0] = imm[0]; 2780 emit->immediates[n][1] = imm[1]; 2781 emit->immediates[n][2] = imm[2]; 2782 emit->immediates[n][3] = imm[3]; 2783 return n; 2784} 2785 2786 2787/** 2788 * Allocate space for a float[4] immediate. 2789 * \return the index/position of the immediate. 2790 */ 2791static unsigned 2792alloc_immediate_float4(struct svga_shader_emitter_v10 *emit, 2793 float x, float y, float z, float w) 2794{ 2795 union tgsi_immediate_data imm[4]; 2796 imm[0].Float = x; 2797 imm[1].Float = y; 2798 imm[2].Float = z; 2799 imm[3].Float = w; 2800 return alloc_immediate_4(emit, imm); 2801} 2802 2803 2804/** 2805 * Allocate space for an int[4] immediate. 2806 * \return the index/position of the immediate. 2807 */ 2808static unsigned 2809alloc_immediate_int4(struct svga_shader_emitter_v10 *emit, 2810 int x, int y, int z, int w) 2811{ 2812 union tgsi_immediate_data imm[4]; 2813 imm[0].Int = x; 2814 imm[1].Int = y; 2815 imm[2].Int = z; 2816 imm[3].Int = w; 2817 return alloc_immediate_4(emit, imm); 2818} 2819 2820 2821static unsigned 2822alloc_immediate_double2(struct svga_shader_emitter_v10 *emit, 2823 double x, double y) 2824{ 2825 unsigned n = emit->num_immediates++; 2826 assert(!emit->immediates_emitted); 2827 assert(n < ARRAY_SIZE(emit->immediates)); 2828 emit->immediates_dbl[n][0] = x; 2829 emit->immediates_dbl[n][1] = y; 2830 return n; 2831 2832} 2833 2834 2835/** 2836 * Allocate a shader input to store a system value. 2837 */ 2838static unsigned 2839alloc_system_value_index(struct svga_shader_emitter_v10 *emit, unsigned index) 2840{ 2841 const unsigned n = emit->linkage.input_map_max + 1 + index; 2842 assert(index < ARRAY_SIZE(emit->system_value_indexes)); 2843 emit->system_value_indexes[index] = n; 2844 return n; 2845} 2846 2847 2848/** 2849 * Translate a TGSI immediate value (union tgsi_immediate_data[4]) to VGPU10. 2850 */ 2851static boolean 2852emit_vgpu10_immediate(struct svga_shader_emitter_v10 *emit, 2853 const struct tgsi_full_immediate *imm) 2854{ 2855 /* We don't actually emit any code here. We just save the 2856 * immediate values and emit them later. 2857 */ 2858 alloc_immediate_4(emit, imm->u); 2859 return TRUE; 2860} 2861 2862 2863/** 2864 * Emit a VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER block 2865 * containing all the immediate values previously allocated 2866 * with alloc_immediate_4(). 2867 */ 2868static boolean 2869emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 *emit) 2870{ 2871 VGPU10OpcodeToken0 token; 2872 2873 assert(!emit->immediates_emitted); 2874 2875 token.value = 0; 2876 token.opcodeType = VGPU10_OPCODE_CUSTOMDATA; 2877 token.customDataClass = VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER; 2878 2879 /* Note: no begin/end_emit_instruction() calls */ 2880 emit_dword(emit, token.value); 2881 emit_dword(emit, 2 + 4 * emit->num_immediates); 2882 emit_dwords(emit, (unsigned *) emit->immediates, 4 * emit->num_immediates); 2883 2884 emit->immediates_emitted = TRUE; 2885 2886 return TRUE; 2887} 2888 2889 2890/** 2891 * Translate a fragment shader's TGSI_INTERPOLATE_x mode to a vgpu10 2892 * interpolation mode. 2893 * \return a VGPU10_INTERPOLATION_x value 2894 */ 2895static unsigned 2896translate_interpolation(const struct svga_shader_emitter_v10 *emit, 2897 enum tgsi_interpolate_mode interp, 2898 enum tgsi_interpolate_loc interpolate_loc) 2899{ 2900 if (interp == TGSI_INTERPOLATE_COLOR) { 2901 interp = emit->key.fs.flatshade ? 2902 TGSI_INTERPOLATE_CONSTANT : TGSI_INTERPOLATE_PERSPECTIVE; 2903 } 2904 2905 switch (interp) { 2906 case TGSI_INTERPOLATE_CONSTANT: 2907 return VGPU10_INTERPOLATION_CONSTANT; 2908 case TGSI_INTERPOLATE_LINEAR: 2909 if (interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID) { 2910 return VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID; 2911 } else if (interpolate_loc == TGSI_INTERPOLATE_LOC_SAMPLE && 2912 emit->version >= 41) { 2913 return VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE; 2914 } else { 2915 return VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE; 2916 } 2917 break; 2918 case TGSI_INTERPOLATE_PERSPECTIVE: 2919 if (interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID) { 2920 return VGPU10_INTERPOLATION_LINEAR_CENTROID; 2921 } else if (interpolate_loc == TGSI_INTERPOLATE_LOC_SAMPLE && 2922 emit->version >= 41) { 2923 return VGPU10_INTERPOLATION_LINEAR_SAMPLE; 2924 } else { 2925 return VGPU10_INTERPOLATION_LINEAR; 2926 } 2927 break; 2928 default: 2929 assert(!"Unexpected interpolation mode"); 2930 return VGPU10_INTERPOLATION_CONSTANT; 2931 } 2932} 2933 2934 2935/** 2936 * Translate a TGSI property to VGPU10. 2937 * Don't emit any instructions yet, only need to gather the primitive property 2938 * information. The output primitive topology might be changed later. The 2939 * final property instructions will be emitted as part of the pre-helper code. 2940 */ 2941static boolean 2942emit_vgpu10_property(struct svga_shader_emitter_v10 *emit, 2943 const struct tgsi_full_property *prop) 2944{ 2945 static const VGPU10_PRIMITIVE primType[] = { 2946 VGPU10_PRIMITIVE_POINT, /* PIPE_PRIM_POINTS */ 2947 VGPU10_PRIMITIVE_LINE, /* PIPE_PRIM_LINES */ 2948 VGPU10_PRIMITIVE_LINE, /* PIPE_PRIM_LINE_LOOP */ 2949 VGPU10_PRIMITIVE_LINE, /* PIPE_PRIM_LINE_STRIP */ 2950 VGPU10_PRIMITIVE_TRIANGLE, /* PIPE_PRIM_TRIANGLES */ 2951 VGPU10_PRIMITIVE_TRIANGLE, /* PIPE_PRIM_TRIANGLE_STRIP */ 2952 VGPU10_PRIMITIVE_TRIANGLE, /* PIPE_PRIM_TRIANGLE_FAN */ 2953 VGPU10_PRIMITIVE_UNDEFINED, /* PIPE_PRIM_QUADS */ 2954 VGPU10_PRIMITIVE_UNDEFINED, /* PIPE_PRIM_QUAD_STRIP */ 2955 VGPU10_PRIMITIVE_UNDEFINED, /* PIPE_PRIM_POLYGON */ 2956 VGPU10_PRIMITIVE_LINE_ADJ, /* PIPE_PRIM_LINES_ADJACENCY */ 2957 VGPU10_PRIMITIVE_LINE_ADJ, /* PIPE_PRIM_LINE_STRIP_ADJACENCY */ 2958 VGPU10_PRIMITIVE_TRIANGLE_ADJ, /* PIPE_PRIM_TRIANGLES_ADJACENCY */ 2959 VGPU10_PRIMITIVE_TRIANGLE_ADJ /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */ 2960 }; 2961 2962 static const VGPU10_PRIMITIVE_TOPOLOGY primTopology[] = { 2963 VGPU10_PRIMITIVE_TOPOLOGY_POINTLIST, /* PIPE_PRIM_POINTS */ 2964 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST, /* PIPE_PRIM_LINES */ 2965 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST, /* PIPE_PRIM_LINE_LOOP */ 2966 VGPU10_PRIMITIVE_TOPOLOGY_LINESTRIP, /* PIPE_PRIM_LINE_STRIP */ 2967 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST, /* PIPE_PRIM_TRIANGLES */ 2968 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_STRIP */ 2969 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_FAN */ 2970 VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* PIPE_PRIM_QUADS */ 2971 VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* PIPE_PRIM_QUAD_STRIP */ 2972 VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* PIPE_PRIM_POLYGON */ 2973 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ, /* PIPE_PRIM_LINES_ADJACENCY */ 2974 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ, /* PIPE_PRIM_LINE_STRIP_ADJACENCY */ 2975 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ, /* PIPE_PRIM_TRIANGLES_ADJACENCY */ 2976 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */ 2977 }; 2978 2979 static const unsigned inputArraySize[] = { 2980 0, /* VGPU10_PRIMITIVE_UNDEFINED */ 2981 1, /* VGPU10_PRIMITIVE_POINT */ 2982 2, /* VGPU10_PRIMITIVE_LINE */ 2983 3, /* VGPU10_PRIMITIVE_TRIANGLE */ 2984 0, 2985 0, 2986 4, /* VGPU10_PRIMITIVE_LINE_ADJ */ 2987 6 /* VGPU10_PRIMITIVE_TRIANGLE_ADJ */ 2988 }; 2989 2990 switch (prop->Property.PropertyName) { 2991 case TGSI_PROPERTY_GS_INPUT_PRIM: 2992 assert(prop->u[0].Data < ARRAY_SIZE(primType)); 2993 emit->gs.prim_type = primType[prop->u[0].Data]; 2994 assert(emit->gs.prim_type != VGPU10_PRIMITIVE_UNDEFINED); 2995 emit->gs.input_size = inputArraySize[emit->gs.prim_type]; 2996 break; 2997 2998 case TGSI_PROPERTY_GS_OUTPUT_PRIM: 2999 assert(prop->u[0].Data < ARRAY_SIZE(primTopology)); 3000 emit->gs.prim_topology = primTopology[prop->u[0].Data]; 3001 assert(emit->gs.prim_topology != VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED); 3002 break; 3003 3004 case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES: 3005 emit->gs.max_out_vertices = prop->u[0].Data; 3006 break; 3007 3008 case TGSI_PROPERTY_GS_INVOCATIONS: 3009 emit->gs.invocations = prop->u[0].Data; 3010 break; 3011 3012 case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS: 3013 case TGSI_PROPERTY_NEXT_SHADER: 3014 case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED: 3015 /* no-op */ 3016 break; 3017 3018 case TGSI_PROPERTY_TCS_VERTICES_OUT: 3019 /* This info is already captured in the shader key */ 3020 break; 3021 3022 case TGSI_PROPERTY_TES_PRIM_MODE: 3023 emit->tes.prim_mode = prop->u[0].Data; 3024 break; 3025 3026 case TGSI_PROPERTY_TES_SPACING: 3027 emit->tes.spacing = prop->u[0].Data; 3028 break; 3029 3030 case TGSI_PROPERTY_TES_VERTEX_ORDER_CW: 3031 emit->tes.vertices_order_cw = prop->u[0].Data; 3032 break; 3033 3034 case TGSI_PROPERTY_TES_POINT_MODE: 3035 emit->tes.point_mode = prop->u[0].Data; 3036 break; 3037 3038 case TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH: 3039 emit->cs.block_width = prop->u[0].Data; 3040 break; 3041 3042 case TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT: 3043 emit->cs.block_height = prop->u[0].Data; 3044 break; 3045 3046 case TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH: 3047 emit->cs.block_depth = prop->u[0].Data; 3048 break; 3049 3050 case TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL: 3051 emit->fs.forceEarlyDepthStencil = TRUE; 3052 break; 3053 3054 default: 3055 debug_printf("Unexpected TGSI property %s\n", 3056 tgsi_property_names[prop->Property.PropertyName]); 3057 } 3058 3059 return TRUE; 3060} 3061 3062 3063static void 3064emit_property_instruction(struct svga_shader_emitter_v10 *emit, 3065 VGPU10OpcodeToken0 opcode0, unsigned nData, 3066 unsigned data) 3067{ 3068 begin_emit_instruction(emit); 3069 emit_dword(emit, opcode0.value); 3070 if (nData) 3071 emit_dword(emit, data); 3072 end_emit_instruction(emit); 3073} 3074 3075 3076/** 3077 * Emit property instructions 3078 */ 3079static void 3080emit_property_instructions(struct svga_shader_emitter_v10 *emit) 3081{ 3082 VGPU10OpcodeToken0 opcode0; 3083 3084 assert(emit->unit == PIPE_SHADER_GEOMETRY); 3085 3086 /* emit input primitive type declaration */ 3087 opcode0.value = 0; 3088 opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_INPUT_PRIMITIVE; 3089 opcode0.primitive = emit->gs.prim_type; 3090 emit_property_instruction(emit, opcode0, 0, 0); 3091 3092 /* emit max output vertices */ 3093 opcode0.value = 0; 3094 opcode0.opcodeType = VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT; 3095 emit_property_instruction(emit, opcode0, 1, emit->gs.max_out_vertices); 3096 3097 if (emit->version >= 50 && emit->gs.invocations > 0) { 3098 opcode0.value = 0; 3099 opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_INSTANCE_COUNT; 3100 emit_property_instruction(emit, opcode0, 1, emit->gs.invocations); 3101 } 3102} 3103 3104 3105/** 3106 * A helper function to declare tessellator domain in a hull shader or 3107 * in the domain shader. 3108 */ 3109static void 3110emit_tessellator_domain(struct svga_shader_emitter_v10 *emit, 3111 enum pipe_prim_type prim_mode) 3112{ 3113 VGPU10OpcodeToken0 opcode0; 3114 3115 opcode0.value = 0; 3116 opcode0.opcodeType = VGPU10_OPCODE_DCL_TESS_DOMAIN; 3117 switch (prim_mode) { 3118 case PIPE_PRIM_QUADS: 3119 case PIPE_PRIM_LINES: 3120 opcode0.tessDomain = VGPU10_TESSELLATOR_DOMAIN_QUAD; 3121 break; 3122 case PIPE_PRIM_TRIANGLES: 3123 opcode0.tessDomain = VGPU10_TESSELLATOR_DOMAIN_TRI; 3124 break; 3125 default: 3126 debug_printf("Invalid tessellator prim mode %d\n", prim_mode); 3127 opcode0.tessDomain = VGPU10_TESSELLATOR_DOMAIN_UNDEFINED; 3128 } 3129 begin_emit_instruction(emit); 3130 emit_dword(emit, opcode0.value); 3131 end_emit_instruction(emit); 3132} 3133 3134 3135/** 3136 * Emit domain shader declarations. 3137 */ 3138static void 3139emit_domain_shader_declarations(struct svga_shader_emitter_v10 *emit) 3140{ 3141 VGPU10OpcodeToken0 opcode0; 3142 3143 assert(emit->unit == PIPE_SHADER_TESS_EVAL); 3144 3145 /* Emit the input control point count */ 3146 assert(emit->key.tes.vertices_per_patch >= 0 && 3147 emit->key.tes.vertices_per_patch <= 32); 3148 3149 opcode0.value = 0; 3150 opcode0.opcodeType = VGPU10_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT; 3151 opcode0.controlPointCount = emit->key.tes.vertices_per_patch; 3152 begin_emit_instruction(emit); 3153 emit_dword(emit, opcode0.value); 3154 end_emit_instruction(emit); 3155 3156 emit_tessellator_domain(emit, emit->tes.prim_mode); 3157 3158 /* Specify a max for swizzles of the domain point according to the 3159 * tessellator domain type. 3160 */ 3161 emit->tes.swizzle_max = emit->tes.prim_mode == PIPE_PRIM_TRIANGLES ? 3162 TGSI_SWIZZLE_Z : TGSI_SWIZZLE_Y; 3163} 3164 3165 3166/** 3167 * Some common values like 0.0, 1.0, 0.5, etc. are frequently needed 3168 * to implement some instructions. We pre-allocate those values here 3169 * in the immediate constant buffer. 3170 */ 3171static void 3172alloc_common_immediates(struct svga_shader_emitter_v10 *emit) 3173{ 3174 unsigned n = 0; 3175 3176 emit->common_immediate_pos[n++] = 3177 alloc_immediate_float4(emit, 0.0f, 1.0f, 0.5f, -1.0f); 3178 3179 if (emit->info.opcode_count[TGSI_OPCODE_LIT] > 0) { 3180 emit->common_immediate_pos[n++] = 3181 alloc_immediate_float4(emit, 128.0f, -128.0f, 0.0f, 0.0f); 3182 } 3183 3184 emit->common_immediate_pos[n++] = 3185 alloc_immediate_int4(emit, 0, 1, 2, -1); 3186 3187 emit->common_immediate_pos[n++] = 3188 alloc_immediate_int4(emit, 3, 4, 5, 6); 3189 3190 if (emit->info.opcode_count[TGSI_OPCODE_IMSB] > 0 || 3191 emit->info.opcode_count[TGSI_OPCODE_UMSB] > 0) { 3192 emit->common_immediate_pos[n++] = 3193 alloc_immediate_int4(emit, 31, 0, 0, 0); 3194 } 3195 3196 if (emit->info.opcode_count[TGSI_OPCODE_UBFE] > 0 || 3197 emit->info.opcode_count[TGSI_OPCODE_IBFE] > 0 || 3198 emit->info.opcode_count[TGSI_OPCODE_BFI] > 0) { 3199 emit->common_immediate_pos[n++] = 3200 alloc_immediate_int4(emit, 32, 0, 0, 0); 3201 } 3202 3203 if (emit->key.vs.attrib_puint_to_snorm) { 3204 emit->common_immediate_pos[n++] = 3205 alloc_immediate_float4(emit, -2.0f, 2.0f, 3.0f, -1.66666f); 3206 } 3207 3208 if (emit->key.vs.attrib_puint_to_uscaled) { 3209 emit->common_immediate_pos[n++] = 3210 alloc_immediate_float4(emit, 1023.0f, 3.0f, 0.0f, 0.0f); 3211 } 3212 3213 if (emit->key.vs.attrib_puint_to_sscaled) { 3214 emit->common_immediate_pos[n++] = 3215 alloc_immediate_int4(emit, 22, 12, 2, 0); 3216 3217 emit->common_immediate_pos[n++] = 3218 alloc_immediate_int4(emit, 22, 30, 0, 0); 3219 } 3220 3221 if (emit->vposition.num_prescale > 1) { 3222 unsigned i; 3223 for (i = 0; i < emit->vposition.num_prescale; i+=4) { 3224 emit->common_immediate_pos[n++] = 3225 alloc_immediate_int4(emit, i, i+1, i+2, i+3); 3226 } 3227 } 3228 3229 emit->immediates_dbl = (double (*)[2]) emit->immediates; 3230 3231 if (emit->info.opcode_count[TGSI_OPCODE_DNEG] > 0) { 3232 emit->common_immediate_pos[n++] = 3233 alloc_immediate_double2(emit, -1.0, -1.0); 3234 } 3235 3236 if (emit->info.opcode_count[TGSI_OPCODE_DSQRT] > 0 || 3237 emit->info.opcode_count[TGSI_OPCODE_DTRUNC] > 0) { 3238 emit->common_immediate_pos[n++] = 3239 alloc_immediate_double2(emit, 0.0, 0.0); 3240 emit->common_immediate_pos[n++] = 3241 alloc_immediate_double2(emit, 1.0, 1.0); 3242 } 3243 3244 if (emit->info.opcode_count[TGSI_OPCODE_INTERP_OFFSET] > 0) { 3245 emit->common_immediate_pos[n++] = 3246 alloc_immediate_float4(emit, 16.0f, -16.0f, 0.0, 0.0); 3247 } 3248 3249 assert(n <= ARRAY_SIZE(emit->common_immediate_pos)); 3250 3251 unsigned i; 3252 3253 for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { 3254 if (emit->key.tex[i].texel_bias) { 3255 /* Replace 0.0f if more immediate float value is needed */ 3256 emit->common_immediate_pos[n++] = 3257 alloc_immediate_float4(emit, 0.0001f, 0.0f, 0.0f, 0.0f); 3258 break; 3259 } 3260 } 3261 3262 /** TODO: allocate immediates for all possible element byte offset? 3263 */ 3264 if (emit->raw_bufs) { 3265 unsigned i; 3266 for (i = 7; i < 12; i+=4) { 3267 emit->common_immediate_pos[n++] = 3268 alloc_immediate_int4(emit, i, (i+1), (i+2), (i+3)); 3269 } 3270 } 3271 3272 if (emit->info.indirect_files & 3273 (1 << TGSI_FILE_IMAGE | 1 << TGSI_FILE_BUFFER)) { 3274 unsigned i; 3275 for (i = 7; i < 8; i+=4) { 3276 emit->common_immediate_pos[n++] = 3277 alloc_immediate_int4(emit, i, (i+1), (i+2), (i+3)); 3278 } 3279 } 3280 3281 assert(n <= ARRAY_SIZE(emit->common_immediate_pos)); 3282 emit->num_common_immediates = n; 3283} 3284 3285 3286/** 3287 * Emit hull shader declarations. 3288*/ 3289static void 3290emit_hull_shader_declarations(struct svga_shader_emitter_v10 *emit) 3291{ 3292 VGPU10OpcodeToken0 opcode0; 3293 3294 /* Emit the input control point count */ 3295 assert(emit->key.tcs.vertices_per_patch > 0 && 3296 emit->key.tcs.vertices_per_patch <= 32); 3297 3298 opcode0.value = 0; 3299 opcode0.opcodeType = VGPU10_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT; 3300 opcode0.controlPointCount = emit->key.tcs.vertices_per_patch; 3301 begin_emit_instruction(emit); 3302 emit_dword(emit, opcode0.value); 3303 end_emit_instruction(emit); 3304 3305 /* Emit the output control point count */ 3306 assert(emit->key.tcs.vertices_out >= 0 && emit->key.tcs.vertices_out <= 32); 3307 3308 opcode0.value = 0; 3309 opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT; 3310 opcode0.controlPointCount = emit->key.tcs.vertices_out; 3311 begin_emit_instruction(emit); 3312 emit_dword(emit, opcode0.value); 3313 end_emit_instruction(emit); 3314 3315 /* Emit tessellator domain */ 3316 emit_tessellator_domain(emit, emit->key.tcs.prim_mode); 3317 3318 /* Emit tessellator output primitive */ 3319 opcode0.value = 0; 3320 opcode0.opcodeType = VGPU10_OPCODE_DCL_TESS_OUTPUT_PRIMITIVE; 3321 if (emit->key.tcs.point_mode) { 3322 opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_POINT; 3323 } 3324 else if (emit->key.tcs.prim_mode == PIPE_PRIM_LINES) { 3325 opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_LINE; 3326 } 3327 else { 3328 assert(emit->key.tcs.prim_mode == PIPE_PRIM_QUADS || 3329 emit->key.tcs.prim_mode == PIPE_PRIM_TRIANGLES); 3330 3331 if (emit->key.tcs.vertices_order_cw) 3332 opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_TRIANGLE_CCW; 3333 else 3334 opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_TRIANGLE_CW; 3335 } 3336 begin_emit_instruction(emit); 3337 emit_dword(emit, opcode0.value); 3338 end_emit_instruction(emit); 3339 3340 /* Emit tessellator partitioning */ 3341 opcode0.value = 0; 3342 opcode0.opcodeType = VGPU10_OPCODE_DCL_TESS_PARTITIONING; 3343 switch (emit->key.tcs.spacing) { 3344 case PIPE_TESS_SPACING_FRACTIONAL_ODD: 3345 opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD; 3346 break; 3347 case PIPE_TESS_SPACING_FRACTIONAL_EVEN: 3348 opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN; 3349 break; 3350 case PIPE_TESS_SPACING_EQUAL: 3351 opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_INTEGER; 3352 break; 3353 default: 3354 debug_printf("invalid tessellator spacing %d\n", emit->key.tcs.spacing); 3355 opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_UNDEFINED; 3356 } 3357 begin_emit_instruction(emit); 3358 emit_dword(emit, opcode0.value); 3359 end_emit_instruction(emit); 3360 3361 alloc_common_immediates(emit); 3362 3363 /* Declare constant registers */ 3364 emit_constant_declaration(emit); 3365 3366 /* Declare samplers and resources */ 3367 emit_sampler_declarations(emit); 3368 emit_resource_declarations(emit); 3369 3370 /* Declare images */ 3371 emit_image_declarations(emit); 3372 3373 /* Declare shader buffers */ 3374 emit_shader_buf_declarations(emit); 3375 3376 /* Declare atomic buffers */ 3377 emit_atomic_buf_declarations(emit); 3378 3379 int nVertices = emit->key.tcs.vertices_per_patch; 3380 emit->tcs.imm_index = 3381 alloc_immediate_int4(emit, nVertices, nVertices, nVertices, 0); 3382 3383 /* Now, emit the constant block containing all the immediates 3384 * declared by shader, as well as the extra ones seen above. 3385 */ 3386 emit_vgpu10_immediates_block(emit); 3387 3388} 3389 3390 3391/** 3392 * A helper function to determine if control point phase is needed. 3393 * Returns TRUE if there is control point output. 3394 */ 3395static boolean 3396needs_control_point_phase(struct svga_shader_emitter_v10 *emit) 3397{ 3398 unsigned i; 3399 3400 assert(emit->unit == PIPE_SHADER_TESS_CTRL); 3401 3402 /* If output control point count does not match the input count, 3403 * we need a control point phase to explicitly set the output control 3404 * points. 3405 */ 3406 if ((emit->key.tcs.vertices_per_patch != emit->key.tcs.vertices_out) && 3407 emit->key.tcs.vertices_out) 3408 return TRUE; 3409 3410 for (i = 0; i < emit->info.num_outputs; i++) { 3411 switch (emit->info.output_semantic_name[i]) { 3412 case TGSI_SEMANTIC_PATCH: 3413 case TGSI_SEMANTIC_TESSOUTER: 3414 case TGSI_SEMANTIC_TESSINNER: 3415 break; 3416 default: 3417 return TRUE; 3418 } 3419 } 3420 return FALSE; 3421} 3422 3423 3424/** 3425 * A helper function to add shader signature for passthrough control point 3426 * phase. This signature is also generated for passthrough control point 3427 * phase from HLSL compiler and is needed by Metal Renderer. 3428 */ 3429static void 3430emit_passthrough_control_point_signature(struct svga_shader_emitter_v10 *emit) 3431{ 3432 struct svga_shader_signature *sgn = &emit->signature; 3433 SVGA3dDXShaderSignatureEntry *sgnEntry; 3434 unsigned i; 3435 3436 for (i = 0; i < emit->info.num_inputs; i++) { 3437 unsigned index = emit->linkage.input_map[i]; 3438 enum tgsi_semantic sem_name = emit->info.input_semantic_name[i]; 3439 3440 sgnEntry = &sgn->inputs[sgn->header.numInputSignatures++]; 3441 3442 set_shader_signature_entry(sgnEntry, index, 3443 tgsi_semantic_to_sgn_name[sem_name], 3444 VGPU10_OPERAND_4_COMPONENT_MASK_ALL, 3445 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN, 3446 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT); 3447 3448 sgnEntry = &sgn->outputs[sgn->header.numOutputSignatures++]; 3449 3450 set_shader_signature_entry(sgnEntry, i, 3451 tgsi_semantic_to_sgn_name[sem_name], 3452 VGPU10_OPERAND_4_COMPONENT_MASK_ALL, 3453 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN, 3454 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT); 3455 } 3456} 3457 3458 3459/** 3460 * A helper function to emit an instruction to start the control point phase 3461 * in the hull shader. 3462 */ 3463static void 3464emit_control_point_phase_instruction(struct svga_shader_emitter_v10 *emit) 3465{ 3466 VGPU10OpcodeToken0 opcode0; 3467 3468 opcode0.value = 0; 3469 opcode0.opcodeType = VGPU10_OPCODE_HS_CONTROL_POINT_PHASE; 3470 begin_emit_instruction(emit); 3471 emit_dword(emit, opcode0.value); 3472 end_emit_instruction(emit); 3473} 3474 3475 3476/** 3477 * Start the hull shader control point phase 3478 */ 3479static boolean 3480emit_hull_shader_control_point_phase(struct svga_shader_emitter_v10 *emit) 3481{ 3482 /* If there is no control point output, skip the control point phase. */ 3483 if (!needs_control_point_phase(emit)) { 3484 if (!emit->key.tcs.vertices_out) { 3485 /** 3486 * If the tcs does not explicitly generate any control point output 3487 * and the tes does not use any input control point, then 3488 * emit an empty control point phase with zero output control 3489 * point count. 3490 */ 3491 emit_control_point_phase_instruction(emit); 3492 3493 /** 3494 * Since this is an empty control point phase, we will need to 3495 * add input signatures when we parse the tcs again in the 3496 * patch constant phase. 3497 */ 3498 emit->tcs.fork_phase_add_signature = TRUE; 3499 } 3500 else { 3501 /** 3502 * Before skipping the control point phase, add the signature for 3503 * the passthrough control point. 3504 */ 3505 emit_passthrough_control_point_signature(emit); 3506 } 3507 return FALSE; 3508 } 3509 3510 /* Start the control point phase in the hull shader */ 3511 emit_control_point_phase_instruction(emit); 3512 3513 /* Declare the output control point ID */ 3514 if (emit->tcs.invocation_id_sys_index == INVALID_INDEX) { 3515 /* Add invocation id declaration if it does not exist */ 3516 emit->tcs.invocation_id_sys_index = emit->info.num_system_values + 1; 3517 } 3518 3519 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, 3520 VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID, 3521 VGPU10_OPERAND_INDEX_0D, 3522 0, 1, 3523 VGPU10_NAME_UNDEFINED, 3524 VGPU10_OPERAND_0_COMPONENT, 0, 3525 0, 3526 VGPU10_INTERPOLATION_CONSTANT, TRUE, 3527 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED); 3528 3529 if (emit->tcs.prim_id_index != INVALID_INDEX) { 3530 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, 3531 VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID, 3532 VGPU10_OPERAND_INDEX_0D, 3533 0, 1, 3534 VGPU10_NAME_UNDEFINED, 3535 VGPU10_OPERAND_0_COMPONENT, 3536 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 3537 0, 3538 VGPU10_INTERPOLATION_UNDEFINED, TRUE, 3539 SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID); 3540 } 3541 3542 return TRUE; 3543} 3544 3545 3546/** 3547 * Start the hull shader patch constant phase and 3548 * do the second pass of the tcs translation and emit 3549 * the relevant declarations and instructions for this phase. 3550 */ 3551static boolean 3552emit_hull_shader_patch_constant_phase(struct svga_shader_emitter_v10 *emit, 3553 struct tgsi_parse_context *parse) 3554{ 3555 unsigned inst_number = 0; 3556 boolean ret = TRUE; 3557 VGPU10OpcodeToken0 opcode0; 3558 3559 emit->skip_instruction = FALSE; 3560 3561 /* Start the patch constant phase */ 3562 opcode0.value = 0; 3563 opcode0.opcodeType = VGPU10_OPCODE_HS_FORK_PHASE; 3564 begin_emit_instruction(emit); 3565 emit_dword(emit, opcode0.value); 3566 end_emit_instruction(emit); 3567 3568 /* Set the current phase to patch constant phase */ 3569 emit->tcs.control_point_phase = FALSE; 3570 3571 if (emit->tcs.prim_id_index != INVALID_INDEX) { 3572 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, 3573 VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID, 3574 VGPU10_OPERAND_INDEX_0D, 3575 0, 1, 3576 VGPU10_NAME_UNDEFINED, 3577 VGPU10_OPERAND_0_COMPONENT, 3578 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 3579 0, 3580 VGPU10_INTERPOLATION_UNDEFINED, TRUE, 3581 SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID); 3582 } 3583 3584 /* Emit declarations for this phase */ 3585 emit->index_range.required = 3586 emit->info.indirect_files & (1 << TGSI_FILE_INPUT) ? TRUE : FALSE; 3587 emit_tcs_input_declarations(emit); 3588 3589 if (emit->index_range.start_index != INVALID_INDEX) { 3590 emit_index_range_declaration(emit); 3591 } 3592 3593 emit->index_range.required = 3594 emit->info.indirect_files & (1 << TGSI_FILE_OUTPUT) ? TRUE : FALSE; 3595 emit_tcs_output_declarations(emit); 3596 3597 if (emit->index_range.start_index != INVALID_INDEX) { 3598 emit_index_range_declaration(emit); 3599 } 3600 emit->index_range.required = FALSE; 3601 3602 emit_temporaries_declaration(emit); 3603 3604 /* Reset the token position to the first instruction token 3605 * in preparation for the second pass of the shader 3606 */ 3607 parse->Position = emit->tcs.instruction_token_pos; 3608 3609 while (!tgsi_parse_end_of_tokens(parse)) { 3610 tgsi_parse_token(parse); 3611 3612 assert(parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION); 3613 ret = emit_vgpu10_instruction(emit, inst_number++, 3614 &parse->FullToken.FullInstruction); 3615 3616 /* Usually this applies to TCS only. If shader is reading output of 3617 * patch constant in fork phase, we should reemit all instructions 3618 * which are writting into output of patch constant in fork phase 3619 * to store results into temporaries. 3620 */ 3621 assert(!(emit->reemit_instruction && emit->reemit_rawbuf_instruction)); 3622 if (emit->reemit_instruction) { 3623 assert(emit->unit == PIPE_SHADER_TESS_CTRL); 3624 ret = emit_vgpu10_instruction(emit, inst_number, 3625 &parse->FullToken.FullInstruction); 3626 } else if (emit->reemit_rawbuf_instruction) { 3627 ret = emit_rawbuf_instruction(emit, inst_number, 3628 &parse->FullToken.FullInstruction); 3629 } 3630 3631 if (!ret) 3632 return FALSE; 3633 } 3634 3635 return TRUE; 3636} 3637 3638 3639/** 3640 * Emit the thread group declaration for compute shader. 3641 */ 3642static void 3643emit_compute_shader_declarations(struct svga_shader_emitter_v10 *emit) 3644{ 3645 VGPU10OpcodeToken0 opcode0; 3646 3647 opcode0.value = 0; 3648 opcode0.opcodeType = VGPU10_OPCODE_DCL_THREAD_GROUP; 3649 begin_emit_instruction(emit); 3650 emit_dword(emit, opcode0.value); 3651 emit_dword(emit, emit->cs.block_width); 3652 emit_dword(emit, emit->cs.block_height); 3653 emit_dword(emit, emit->cs.block_depth); 3654 end_emit_instruction(emit); 3655} 3656 3657 3658/** 3659 * Emit index range declaration. 3660 */ 3661static boolean 3662emit_index_range_declaration(struct svga_shader_emitter_v10 *emit) 3663{ 3664 if (emit->version < 50) 3665 return TRUE; 3666 3667 assert(emit->index_range.start_index != INVALID_INDEX); 3668 assert(emit->index_range.count != 0); 3669 assert(emit->index_range.required); 3670 assert(emit->index_range.operandType != VGPU10_NUM_OPERANDS); 3671 assert(emit->index_range.dim != 0); 3672 assert(emit->index_range.size != 0); 3673 3674 VGPU10OpcodeToken0 opcode0; 3675 VGPU10OperandToken0 operand0; 3676 3677 opcode0.value = 0; 3678 opcode0.opcodeType = VGPU10_OPCODE_DCL_INDEX_RANGE; 3679 3680 operand0.value = 0; 3681 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 3682 operand0.indexDimension = emit->index_range.dim; 3683 operand0.operandType = emit->index_range.operandType; 3684 operand0.mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL; 3685 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 3686 3687 if (emit->index_range.dim == VGPU10_OPERAND_INDEX_2D) 3688 operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 3689 3690 begin_emit_instruction(emit); 3691 emit_dword(emit, opcode0.value); 3692 emit_dword(emit, operand0.value); 3693 3694 if (emit->index_range.dim == VGPU10_OPERAND_INDEX_2D) { 3695 emit_dword(emit, emit->index_range.size); 3696 emit_dword(emit, emit->index_range.start_index); 3697 emit_dword(emit, emit->index_range.count); 3698 } 3699 else { 3700 emit_dword(emit, emit->index_range.start_index); 3701 emit_dword(emit, emit->index_range.count); 3702 } 3703 3704 end_emit_instruction(emit); 3705 3706 /* Reset fields in emit->index_range struct except 3707 * emit->index_range.required which will be reset afterwards 3708 */ 3709 emit->index_range.count = 0; 3710 emit->index_range.operandType = VGPU10_NUM_OPERANDS; 3711 emit->index_range.start_index = INVALID_INDEX; 3712 emit->index_range.size = 0; 3713 emit->index_range.dim = 0; 3714 3715 return TRUE; 3716} 3717 3718 3719/** 3720 * Emit a vgpu10 declaration "instruction". 3721 * \param index the register index 3722 * \param size array size of the operand. In most cases, it is 1, 3723 * but for inputs to geometry shader, the array size varies 3724 * depending on the primitive type. 3725 */ 3726static void 3727emit_decl_instruction(struct svga_shader_emitter_v10 *emit, 3728 VGPU10OpcodeToken0 opcode0, 3729 VGPU10OperandToken0 operand0, 3730 VGPU10NameToken name_token, 3731 unsigned index, unsigned size) 3732{ 3733 assert(opcode0.opcodeType); 3734 assert(operand0.mask || 3735 (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT) || 3736 (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_DEPTH) || 3737 (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK) || 3738 (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID) || 3739 (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) || 3740 (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID) || 3741 (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK) || 3742 (operand0.operandType == VGPU10_OPERAND_TYPE_STREAM)); 3743 3744 begin_emit_instruction(emit); 3745 emit_dword(emit, opcode0.value); 3746 3747 emit_dword(emit, operand0.value); 3748 3749 if (operand0.indexDimension == VGPU10_OPERAND_INDEX_1D) { 3750 /* Next token is the index of the register to declare */ 3751 emit_dword(emit, index); 3752 } 3753 else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_2D) { 3754 /* Next token is the size of the register */ 3755 emit_dword(emit, size); 3756 3757 /* Followed by the index of the register */ 3758 emit_dword(emit, index); 3759 } 3760 3761 if (name_token.value) { 3762 emit_dword(emit, name_token.value); 3763 } 3764 3765 end_emit_instruction(emit); 3766} 3767 3768 3769/** 3770 * Emit the declaration for a shader input. 3771 * \param opcodeType opcode type, one of VGPU10_OPCODE_DCL_INPUTx 3772 * \param operandType operand type, one of VGPU10_OPERAND_TYPE_INPUT_x 3773 * \param dim index dimension 3774 * \param index the input register index 3775 * \param size array size of the operand. In most cases, it is 1, 3776 * but for inputs to geometry shader, the array size varies 3777 * depending on the primitive type. For tessellation control 3778 * shader, the array size is the vertex count per patch. 3779 * \param name one of VGPU10_NAME_x 3780 * \parma numComp number of components 3781 * \param selMode component selection mode 3782 * \param usageMask bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values 3783 * \param interpMode interpolation mode 3784 */ 3785static void 3786emit_input_declaration(struct svga_shader_emitter_v10 *emit, 3787 VGPU10_OPCODE_TYPE opcodeType, 3788 VGPU10_OPERAND_TYPE operandType, 3789 VGPU10_OPERAND_INDEX_DIMENSION dim, 3790 unsigned index, unsigned size, 3791 VGPU10_SYSTEM_NAME name, 3792 VGPU10_OPERAND_NUM_COMPONENTS numComp, 3793 VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE selMode, 3794 unsigned usageMask, 3795 VGPU10_INTERPOLATION_MODE interpMode, 3796 boolean addSignature, 3797 SVGA3dDXSignatureSemanticName sgnName) 3798{ 3799 VGPU10OpcodeToken0 opcode0; 3800 VGPU10OperandToken0 operand0; 3801 VGPU10NameToken name_token; 3802 3803 assert(usageMask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL); 3804 assert(opcodeType == VGPU10_OPCODE_DCL_INPUT || 3805 opcodeType == VGPU10_OPCODE_DCL_INPUT_SIV || 3806 opcodeType == VGPU10_OPCODE_DCL_INPUT_SGV || 3807 opcodeType == VGPU10_OPCODE_DCL_INPUT_PS || 3808 opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SIV || 3809 opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SGV); 3810 assert(operandType == VGPU10_OPERAND_TYPE_INPUT || 3811 operandType == VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID || 3812 operandType == VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK || 3813 operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID || 3814 operandType == VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID || 3815 operandType == VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT || 3816 operandType == VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT || 3817 operandType == VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT || 3818 operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID || 3819 operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_GROUP_ID || 3820 operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP); 3821 3822 assert(numComp <= VGPU10_OPERAND_4_COMPONENT); 3823 assert(selMode <= VGPU10_OPERAND_4_COMPONENT_MASK_MODE); 3824 assert(dim <= VGPU10_OPERAND_INDEX_3D); 3825 assert(name == VGPU10_NAME_UNDEFINED || 3826 name == VGPU10_NAME_POSITION || 3827 name == VGPU10_NAME_INSTANCE_ID || 3828 name == VGPU10_NAME_VERTEX_ID || 3829 name == VGPU10_NAME_PRIMITIVE_ID || 3830 name == VGPU10_NAME_IS_FRONT_FACE || 3831 name == VGPU10_NAME_SAMPLE_INDEX || 3832 name == VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX || 3833 name == VGPU10_NAME_VIEWPORT_ARRAY_INDEX); 3834 3835 assert(interpMode == VGPU10_INTERPOLATION_UNDEFINED || 3836 interpMode == VGPU10_INTERPOLATION_CONSTANT || 3837 interpMode == VGPU10_INTERPOLATION_LINEAR || 3838 interpMode == VGPU10_INTERPOLATION_LINEAR_CENTROID || 3839 interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE || 3840 interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID || 3841 interpMode == VGPU10_INTERPOLATION_LINEAR_SAMPLE || 3842 interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE); 3843 3844 check_register_index(emit, opcodeType, index); 3845 3846 opcode0.value = operand0.value = name_token.value = 0; 3847 3848 opcode0.opcodeType = opcodeType; 3849 opcode0.interpolationMode = interpMode; 3850 3851 operand0.operandType = operandType; 3852 operand0.numComponents = numComp; 3853 operand0.selectionMode = selMode; 3854 operand0.mask = usageMask; 3855 operand0.indexDimension = dim; 3856 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 3857 if (dim == VGPU10_OPERAND_INDEX_2D) 3858 operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 3859 3860 name_token.name = name; 3861 3862 emit_decl_instruction(emit, opcode0, operand0, name_token, index, size); 3863 3864 if (addSignature) { 3865 struct svga_shader_signature *sgn = &emit->signature; 3866 if (operandType == VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT) { 3867 /* Set patch constant signature */ 3868 SVGA3dDXShaderSignatureEntry *sgnEntry = 3869 &sgn->patchConstants[sgn->header.numPatchConstantSignatures++]; 3870 set_shader_signature_entry(sgnEntry, index, 3871 sgnName, usageMask, 3872 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN, 3873 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT); 3874 3875 } else if (operandType == VGPU10_OPERAND_TYPE_INPUT || 3876 operandType == VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT) { 3877 /* Set input signature */ 3878 SVGA3dDXShaderSignatureEntry *sgnEntry = 3879 &sgn->inputs[sgn->header.numInputSignatures++]; 3880 set_shader_signature_entry(sgnEntry, index, 3881 sgnName, usageMask, 3882 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN, 3883 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT); 3884 } 3885 } 3886 3887 if (emit->index_range.required) { 3888 /* Here, index_range declaration is only applicable for opcodeType 3889 * VGPU10_OPCODE_DCL_INPUT and VGPU10_OPCODE_DCL_INPUT_PS and 3890 * for operandType VGPU10_OPERAND_TYPE_INPUT, 3891 * VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT and 3892 * VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT. 3893 */ 3894 if ((opcodeType != VGPU10_OPCODE_DCL_INPUT && 3895 opcodeType != VGPU10_OPCODE_DCL_INPUT_PS) || 3896 (operandType != VGPU10_OPERAND_TYPE_INPUT && 3897 operandType != VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT && 3898 operandType != VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT)) { 3899 if (emit->index_range.start_index != INVALID_INDEX) { 3900 emit_index_range_declaration(emit); 3901 } 3902 return; 3903 } 3904 3905 if (emit->index_range.operandType == VGPU10_NUM_OPERANDS) { 3906 /* Need record new index_range */ 3907 emit->index_range.count = 1; 3908 emit->index_range.operandType = operandType; 3909 emit->index_range.start_index = index; 3910 emit->index_range.size = size; 3911 emit->index_range.dim = dim; 3912 } 3913 else if (index != 3914 (emit->index_range.start_index + emit->index_range.count) || 3915 emit->index_range.operandType != operandType) { 3916 /* Input index is not contiguous with index range or operandType is 3917 * different from index range's operandType. We need to emit current 3918 * index_range first and then start recording next index range. 3919 */ 3920 emit_index_range_declaration(emit); 3921 3922 emit->index_range.count = 1; 3923 emit->index_range.operandType = operandType; 3924 emit->index_range.start_index = index; 3925 emit->index_range.size = size; 3926 emit->index_range.dim = dim; 3927 } 3928 else if (emit->index_range.operandType == operandType) { 3929 /* Since input index is contiguous with index range and operandType 3930 * is same as index range's operandType, increment index range count. 3931 */ 3932 emit->index_range.count++; 3933 } 3934 } 3935} 3936 3937 3938/** 3939 * Emit the declaration for a shader output. 3940 * \param type one of VGPU10_OPCODE_DCL_OUTPUTx 3941 * \param index the output register index 3942 * \param name one of VGPU10_NAME_x 3943 * \param usageMask bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values 3944 */ 3945static void 3946emit_output_declaration(struct svga_shader_emitter_v10 *emit, 3947 VGPU10_OPCODE_TYPE type, unsigned index, 3948 VGPU10_SYSTEM_NAME name, 3949 unsigned writemask, 3950 boolean addSignature, 3951 SVGA3dDXSignatureSemanticName sgnName) 3952{ 3953 VGPU10OpcodeToken0 opcode0; 3954 VGPU10OperandToken0 operand0; 3955 VGPU10NameToken name_token; 3956 3957 assert(writemask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL); 3958 assert(type == VGPU10_OPCODE_DCL_OUTPUT || 3959 type == VGPU10_OPCODE_DCL_OUTPUT_SGV || 3960 type == VGPU10_OPCODE_DCL_OUTPUT_SIV); 3961 assert(name == VGPU10_NAME_UNDEFINED || 3962 name == VGPU10_NAME_POSITION || 3963 name == VGPU10_NAME_PRIMITIVE_ID || 3964 name == VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX || 3965 name == VGPU10_NAME_VIEWPORT_ARRAY_INDEX || 3966 name == VGPU10_NAME_CLIP_DISTANCE); 3967 3968 check_register_index(emit, type, index); 3969 3970 opcode0.value = operand0.value = name_token.value = 0; 3971 3972 opcode0.opcodeType = type; 3973 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT; 3974 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 3975 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE; 3976 operand0.mask = writemask; 3977 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 3978 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 3979 3980 name_token.name = name; 3981 3982 emit_decl_instruction(emit, opcode0, operand0, name_token, index, 1); 3983 3984 /* Capture output signature */ 3985 if (addSignature) { 3986 struct svga_shader_signature *sgn = &emit->signature; 3987 SVGA3dDXShaderSignatureEntry *sgnEntry = 3988 &sgn->outputs[sgn->header.numOutputSignatures++]; 3989 set_shader_signature_entry(sgnEntry, index, 3990 sgnName, writemask, 3991 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN, 3992 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT); 3993 } 3994 3995 if (emit->index_range.required) { 3996 /* Here, index_range declaration is only applicable for opcodeType 3997 * VGPU10_OPCODE_DCL_OUTPUT and for operandType 3998 * VGPU10_OPERAND_TYPE_OUTPUT. 3999 */ 4000 if (type != VGPU10_OPCODE_DCL_OUTPUT) { 4001 if (emit->index_range.start_index != INVALID_INDEX) { 4002 emit_index_range_declaration(emit); 4003 } 4004 return; 4005 } 4006 4007 if (emit->index_range.operandType == VGPU10_NUM_OPERANDS) { 4008 /* Need record new index_range */ 4009 emit->index_range.count = 1; 4010 emit->index_range.operandType = VGPU10_OPERAND_TYPE_OUTPUT; 4011 emit->index_range.start_index = index; 4012 emit->index_range.size = 1; 4013 emit->index_range.dim = VGPU10_OPERAND_INDEX_1D; 4014 } 4015 else if (index != 4016 (emit->index_range.start_index + emit->index_range.count)) { 4017 /* Output index is not contiguous with index range. We need to 4018 * emit current index_range first and then start recording next 4019 * index range. 4020 */ 4021 emit_index_range_declaration(emit); 4022 4023 emit->index_range.count = 1; 4024 emit->index_range.operandType = VGPU10_OPERAND_TYPE_OUTPUT; 4025 emit->index_range.start_index = index; 4026 emit->index_range.size = 1; 4027 emit->index_range.dim = VGPU10_OPERAND_INDEX_1D; 4028 } 4029 else { 4030 /* Since output index is contiguous with index range, increment 4031 * index range count. 4032 */ 4033 emit->index_range.count++; 4034 } 4035 } 4036} 4037 4038 4039/** 4040 * Emit the declaration for the fragment depth output. 4041 */ 4042static void 4043emit_fragdepth_output_declaration(struct svga_shader_emitter_v10 *emit) 4044{ 4045 VGPU10OpcodeToken0 opcode0; 4046 VGPU10OperandToken0 operand0; 4047 VGPU10NameToken name_token; 4048 4049 assert(emit->unit == PIPE_SHADER_FRAGMENT); 4050 4051 opcode0.value = operand0.value = name_token.value = 0; 4052 4053 opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT; 4054 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH; 4055 operand0.numComponents = VGPU10_OPERAND_1_COMPONENT; 4056 operand0.indexDimension = VGPU10_OPERAND_INDEX_0D; 4057 operand0.mask = 0; 4058 4059 emit_decl_instruction(emit, opcode0, operand0, name_token, 0, 1); 4060} 4061 4062 4063/** 4064 * Emit the declaration for the fragment sample mask/coverage output. 4065 */ 4066static void 4067emit_samplemask_output_declaration(struct svga_shader_emitter_v10 *emit) 4068{ 4069 VGPU10OpcodeToken0 opcode0; 4070 VGPU10OperandToken0 operand0; 4071 VGPU10NameToken name_token; 4072 4073 assert(emit->unit == PIPE_SHADER_FRAGMENT); 4074 assert(emit->version >= 41); 4075 4076 opcode0.value = operand0.value = name_token.value = 0; 4077 4078 opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT; 4079 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK; 4080 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; 4081 operand0.indexDimension = VGPU10_OPERAND_INDEX_0D; 4082 operand0.mask = 0; 4083 4084 emit_decl_instruction(emit, opcode0, operand0, name_token, 0, 1); 4085} 4086 4087 4088/** 4089 * Emit output declarations for fragment shader. 4090 */ 4091static void 4092emit_fs_output_declarations(struct svga_shader_emitter_v10 *emit) 4093{ 4094 unsigned int i; 4095 4096 for (i = 0; i < emit->info.num_outputs; i++) { 4097 /*const unsigned usage_mask = emit->info.output_usage_mask[i];*/ 4098 const enum tgsi_semantic semantic_name = 4099 emit->info.output_semantic_name[i]; 4100 const unsigned semantic_index = emit->info.output_semantic_index[i]; 4101 unsigned index = i; 4102 4103 if (semantic_name == TGSI_SEMANTIC_COLOR) { 4104 assert(semantic_index < ARRAY_SIZE(emit->fs.color_out_index)); 4105 4106 emit->fs.color_out_index[semantic_index] = index; 4107 4108 emit->fs.num_color_outputs = MAX2(emit->fs.num_color_outputs, 4109 index + 1); 4110 4111 /* The semantic index is the shader's color output/buffer index */ 4112 emit_output_declaration(emit, 4113 VGPU10_OPCODE_DCL_OUTPUT, semantic_index, 4114 VGPU10_NAME_UNDEFINED, 4115 VGPU10_OPERAND_4_COMPONENT_MASK_ALL, 4116 TRUE, 4117 map_tgsi_semantic_to_sgn_name(semantic_name)); 4118 4119 if (semantic_index == 0) { 4120 if (emit->key.fs.write_color0_to_n_cbufs > 1) { 4121 /* Emit declarations for the additional color outputs 4122 * for broadcasting. 4123 */ 4124 unsigned j; 4125 for (j = 1; j < emit->key.fs.write_color0_to_n_cbufs; j++) { 4126 /* Allocate a new output index */ 4127 unsigned idx = emit->info.num_outputs + j - 1; 4128 emit->fs.color_out_index[j] = idx; 4129 emit_output_declaration(emit, 4130 VGPU10_OPCODE_DCL_OUTPUT, idx, 4131 VGPU10_NAME_UNDEFINED, 4132 VGPU10_OPERAND_4_COMPONENT_MASK_ALL, 4133 TRUE, 4134 map_tgsi_semantic_to_sgn_name(semantic_name)); 4135 emit->info.output_semantic_index[idx] = j; 4136 } 4137 4138 emit->fs.num_color_outputs = 4139 emit->key.fs.write_color0_to_n_cbufs; 4140 } 4141 } 4142 } 4143 else if (semantic_name == TGSI_SEMANTIC_POSITION) { 4144 /* Fragment depth output */ 4145 emit_fragdepth_output_declaration(emit); 4146 } 4147 else if (semantic_name == TGSI_SEMANTIC_SAMPLEMASK) { 4148 /* Sample mask output */ 4149 emit_samplemask_output_declaration(emit); 4150 } 4151 else { 4152 assert(!"Bad output semantic name"); 4153 } 4154 } 4155} 4156 4157 4158/** 4159 * Emit common output declaration for vertex processing. 4160 */ 4161static void 4162emit_vertex_output_declaration(struct svga_shader_emitter_v10 *emit, 4163 unsigned index, unsigned writemask, 4164 boolean addSignature) 4165{ 4166 const enum tgsi_semantic semantic_name = 4167 emit->info.output_semantic_name[index]; 4168 const unsigned semantic_index = emit->info.output_semantic_index[index]; 4169 unsigned name, type; 4170 unsigned final_mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL; 4171 4172 assert(emit->unit != PIPE_SHADER_FRAGMENT && 4173 emit->unit != PIPE_SHADER_COMPUTE); 4174 4175 switch (semantic_name) { 4176 case TGSI_SEMANTIC_POSITION: 4177 if (emit->unit == PIPE_SHADER_TESS_CTRL) { 4178 /* position will be declared in control point only */ 4179 assert(emit->tcs.control_point_phase); 4180 type = VGPU10_OPCODE_DCL_OUTPUT; 4181 name = VGPU10_NAME_UNDEFINED; 4182 emit_output_declaration(emit, type, index, name, final_mask, TRUE, 4183 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED); 4184 return; 4185 } 4186 else { 4187 type = VGPU10_OPCODE_DCL_OUTPUT_SIV; 4188 name = VGPU10_NAME_POSITION; 4189 } 4190 /* Save the index of the vertex position output register */ 4191 emit->vposition.out_index = index; 4192 break; 4193 case TGSI_SEMANTIC_CLIPDIST: 4194 type = VGPU10_OPCODE_DCL_OUTPUT_SIV; 4195 name = VGPU10_NAME_CLIP_DISTANCE; 4196 /* save the starting index of the clip distance output register */ 4197 if (semantic_index == 0) 4198 emit->clip_dist_out_index = index; 4199 final_mask = apply_clip_plane_mask(emit, writemask, semantic_index); 4200 if (final_mask == 0x0) 4201 return; /* discard this do-nothing declaration */ 4202 break; 4203 case TGSI_SEMANTIC_CLIPVERTEX: 4204 type = VGPU10_OPCODE_DCL_OUTPUT; 4205 name = VGPU10_NAME_UNDEFINED; 4206 emit->clip_vertex_out_index = index; 4207 break; 4208 default: 4209 /* generic output */ 4210 type = VGPU10_OPCODE_DCL_OUTPUT; 4211 name = VGPU10_NAME_UNDEFINED; 4212 } 4213 4214 emit_output_declaration(emit, type, index, name, final_mask, addSignature, 4215 map_tgsi_semantic_to_sgn_name(semantic_name)); 4216} 4217 4218 4219/** 4220 * Emit declaration for outputs in vertex shader. 4221 */ 4222static void 4223emit_vs_output_declarations(struct svga_shader_emitter_v10 *emit) 4224{ 4225 unsigned i; 4226 for (i = 0; i < emit->info.num_outputs; i++) { 4227 emit_vertex_output_declaration(emit, i, emit->output_usage_mask[i], TRUE); 4228 } 4229} 4230 4231 4232/** 4233 * A helper function to determine the writemask for an output 4234 * for the specified stream. 4235 */ 4236static unsigned 4237output_writemask_for_stream(unsigned stream, ubyte output_streams, 4238 ubyte output_usagemask) 4239{ 4240 unsigned i; 4241 unsigned writemask = 0; 4242 4243 for (i = 0; i < 4; i++) { 4244 if ((output_streams & 0x3) == stream) 4245 writemask |= (VGPU10_OPERAND_4_COMPONENT_MASK_X << i); 4246 output_streams >>= 2; 4247 } 4248 return writemask & output_usagemask; 4249} 4250 4251 4252/** 4253 * Emit declaration for outputs in geometry shader. 4254 */ 4255static void 4256emit_gs_output_declarations(struct svga_shader_emitter_v10 *emit) 4257{ 4258 unsigned i; 4259 VGPU10OpcodeToken0 opcode0; 4260 unsigned numStreamsSupported = 1; 4261 int s; 4262 4263 if (emit->version >= 50) { 4264 numStreamsSupported = ARRAY_SIZE(emit->info.num_stream_output_components); 4265 } 4266 4267 /** 4268 * Start emitting from the last stream first, so we end with 4269 * stream 0, so any of the auxiliary output declarations will 4270 * go to stream 0. 4271 */ 4272 for (s = numStreamsSupported-1; s >= 0; s--) { 4273 4274 if (emit->info.num_stream_output_components[s] == 0) 4275 continue; 4276 4277 if (emit->version >= 50) { 4278 /* DCL_STREAM stream */ 4279 begin_emit_instruction(emit); 4280 emit_opcode(emit, VGPU10_OPCODE_DCL_STREAM, FALSE); 4281 emit_stream_register(emit, s); 4282 end_emit_instruction(emit); 4283 } 4284 4285 /* emit output primitive topology declaration */ 4286 opcode0.value = 0; 4287 opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY; 4288 opcode0.primitiveTopology = emit->gs.prim_topology; 4289 emit_property_instruction(emit, opcode0, 0, 0); 4290 4291 for (i = 0; i < emit->info.num_outputs; i++) { 4292 unsigned writemask; 4293 4294 /* find out the writemask for this stream */ 4295 writemask = output_writemask_for_stream(s, emit->info.output_streams[i], 4296 emit->output_usage_mask[i]); 4297 4298 if (writemask) { 4299 enum tgsi_semantic semantic_name = 4300 emit->info.output_semantic_name[i]; 4301 4302 /* TODO: Still need to take care of a special case where a 4303 * single varying spans across multiple output registers. 4304 */ 4305 switch(semantic_name) { 4306 case TGSI_SEMANTIC_PRIMID: 4307 emit_output_declaration(emit, 4308 VGPU10_OPCODE_DCL_OUTPUT_SGV, i, 4309 VGPU10_NAME_PRIMITIVE_ID, 4310 VGPU10_OPERAND_4_COMPONENT_MASK_ALL, 4311 FALSE, 4312 map_tgsi_semantic_to_sgn_name(semantic_name)); 4313 break; 4314 case TGSI_SEMANTIC_LAYER: 4315 emit_output_declaration(emit, 4316 VGPU10_OPCODE_DCL_OUTPUT_SIV, i, 4317 VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX, 4318 VGPU10_OPERAND_4_COMPONENT_MASK_X, 4319 FALSE, 4320 map_tgsi_semantic_to_sgn_name(semantic_name)); 4321 break; 4322 case TGSI_SEMANTIC_VIEWPORT_INDEX: 4323 emit_output_declaration(emit, 4324 VGPU10_OPCODE_DCL_OUTPUT_SIV, i, 4325 VGPU10_NAME_VIEWPORT_ARRAY_INDEX, 4326 VGPU10_OPERAND_4_COMPONENT_MASK_X, 4327 FALSE, 4328 map_tgsi_semantic_to_sgn_name(semantic_name)); 4329 emit->gs.viewport_index_out_index = i; 4330 break; 4331 default: 4332 emit_vertex_output_declaration(emit, i, writemask, FALSE); 4333 } 4334 } 4335 } 4336 } 4337 4338 /* For geometry shader outputs, it is possible the same register is 4339 * declared multiple times for different streams. So to avoid 4340 * redundant signature entries, geometry shader output signature is done 4341 * outside of the declaration. 4342 */ 4343 struct svga_shader_signature *sgn = &emit->signature; 4344 SVGA3dDXShaderSignatureEntry *sgnEntry; 4345 4346 for (i = 0; i < emit->info.num_outputs; i++) { 4347 if (emit->output_usage_mask[i]) { 4348 enum tgsi_semantic sem_name = emit->info.output_semantic_name[i]; 4349 4350 sgnEntry = &sgn->outputs[sgn->header.numOutputSignatures++]; 4351 set_shader_signature_entry(sgnEntry, i, 4352 map_tgsi_semantic_to_sgn_name(sem_name), 4353 emit->output_usage_mask[i], 4354 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN, 4355 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT); 4356 } 4357 } 4358} 4359 4360 4361/** 4362 * Emit the declaration for the tess inner/outer output. 4363 * \param opcodeType either VGPU10_OPCODE_DCL_OUTPUT_SIV or _INPUT_SIV 4364 * \param operandType either VGPU10_OPERAND_TYPE_OUTPUT or _INPUT 4365 * \param name VGPU10_NAME_FINAL_*_TESSFACTOR value 4366 */ 4367static void 4368emit_tesslevel_declaration(struct svga_shader_emitter_v10 *emit, 4369 unsigned index, unsigned opcodeType, 4370 unsigned operandType, VGPU10_SYSTEM_NAME name, 4371 SVGA3dDXSignatureSemanticName sgnName) 4372{ 4373 VGPU10OpcodeToken0 opcode0; 4374 VGPU10OperandToken0 operand0; 4375 VGPU10NameToken name_token; 4376 4377 assert(emit->version >= 50); 4378 assert(name >= VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR || 4379 (emit->key.tcs.prim_mode == PIPE_PRIM_LINES && 4380 name == VGPU10_NAME_UNDEFINED)); 4381 assert(name <= VGPU10_NAME_FINAL_LINE_DENSITY_TESSFACTOR); 4382 4383 assert(operandType == VGPU10_OPERAND_TYPE_OUTPUT || 4384 operandType == VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT); 4385 4386 opcode0.value = operand0.value = name_token.value = 0; 4387 4388 opcode0.opcodeType = opcodeType; 4389 operand0.operandType = operandType; 4390 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 4391 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 4392 operand0.mask = VGPU10_OPERAND_4_COMPONENT_MASK_X; 4393 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE; 4394 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 4395 4396 name_token.name = name; 4397 emit_decl_instruction(emit, opcode0, operand0, name_token, index, 1); 4398 4399 /* Capture patch constant signature */ 4400 struct svga_shader_signature *sgn = &emit->signature; 4401 SVGA3dDXShaderSignatureEntry *sgnEntry = 4402 &sgn->patchConstants[sgn->header.numPatchConstantSignatures++]; 4403 set_shader_signature_entry(sgnEntry, index, 4404 sgnName, VGPU10_OPERAND_4_COMPONENT_MASK_X, 4405 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN, 4406 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT); 4407} 4408 4409 4410/** 4411 * Emit output declarations for tessellation control shader. 4412 */ 4413static void 4414emit_tcs_output_declarations(struct svga_shader_emitter_v10 *emit) 4415{ 4416 unsigned int i; 4417 unsigned outputIndex = emit->num_outputs; 4418 struct svga_shader_signature *sgn = &emit->signature; 4419 4420 /** 4421 * Initialize patch_generic_out_count so it won't be counted twice 4422 * since this function is called twice, one for control point phase 4423 * and another time for patch constant phase. 4424 */ 4425 emit->tcs.patch_generic_out_count = 0; 4426 4427 for (i = 0; i < emit->info.num_outputs; i++) { 4428 unsigned index = i; 4429 const enum tgsi_semantic semantic_name = 4430 emit->info.output_semantic_name[i]; 4431 4432 switch (semantic_name) { 4433 case TGSI_SEMANTIC_TESSINNER: 4434 emit->tcs.inner.tgsi_index = i; 4435 4436 /* skip per-patch output declarations in control point phase */ 4437 if (emit->tcs.control_point_phase) 4438 break; 4439 4440 emit->tcs.inner.out_index = outputIndex; 4441 switch (emit->key.tcs.prim_mode) { 4442 case PIPE_PRIM_QUADS: 4443 emit_tesslevel_declaration(emit, outputIndex++, 4444 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT, 4445 VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR, 4446 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR); 4447 4448 emit_tesslevel_declaration(emit, outputIndex++, 4449 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT, 4450 VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR, 4451 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR); 4452 break; 4453 case PIPE_PRIM_TRIANGLES: 4454 emit_tesslevel_declaration(emit, outputIndex++, 4455 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT, 4456 VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR, 4457 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR); 4458 break; 4459 case PIPE_PRIM_LINES: 4460 break; 4461 default: 4462 debug_printf("Unsupported primitive type"); 4463 } 4464 break; 4465 4466 case TGSI_SEMANTIC_TESSOUTER: 4467 emit->tcs.outer.tgsi_index = i; 4468 4469 /* skip per-patch output declarations in control point phase */ 4470 if (emit->tcs.control_point_phase) 4471 break; 4472 4473 emit->tcs.outer.out_index = outputIndex; 4474 switch (emit->key.tcs.prim_mode) { 4475 case PIPE_PRIM_QUADS: 4476 for (int j = 0; j < 4; j++) { 4477 emit_tesslevel_declaration(emit, outputIndex++, 4478 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT, 4479 VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + j, 4480 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + j); 4481 } 4482 break; 4483 case PIPE_PRIM_TRIANGLES: 4484 for (int j = 0; j < 3; j++) { 4485 emit_tesslevel_declaration(emit, outputIndex++, 4486 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT, 4487 VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + j, 4488 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + j); 4489 } 4490 break; 4491 case PIPE_PRIM_LINES: 4492 for (int j = 0; j < 2; j++) { 4493 emit_tesslevel_declaration(emit, outputIndex++, 4494 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT, 4495 VGPU10_NAME_FINAL_LINE_DETAIL_TESSFACTOR + j, 4496 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DETAIL_TESSFACTOR + j); 4497 } 4498 break; 4499 default: 4500 debug_printf("Unsupported primitive type"); 4501 } 4502 break; 4503 4504 case TGSI_SEMANTIC_PATCH: 4505 if (emit->tcs.patch_generic_out_index == INVALID_INDEX) 4506 emit->tcs.patch_generic_out_index= i; 4507 emit->tcs.patch_generic_out_count++; 4508 4509 /* skip per-patch output declarations in control point phase */ 4510 if (emit->tcs.control_point_phase) 4511 break; 4512 4513 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, index, 4514 VGPU10_NAME_UNDEFINED, 4515 VGPU10_OPERAND_4_COMPONENT_MASK_ALL, 4516 FALSE, 4517 map_tgsi_semantic_to_sgn_name(semantic_name)); 4518 4519 SVGA3dDXShaderSignatureEntry *sgnEntry = 4520 &sgn->patchConstants[sgn->header.numPatchConstantSignatures++]; 4521 set_shader_signature_entry(sgnEntry, index, 4522 map_tgsi_semantic_to_sgn_name(semantic_name), 4523 VGPU10_OPERAND_4_COMPONENT_MASK_ALL, 4524 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN, 4525 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT); 4526 4527 break; 4528 4529 default: 4530 /* save the starting index of control point outputs */ 4531 if (emit->tcs.control_point_out_index == INVALID_INDEX) 4532 emit->tcs.control_point_out_index = i; 4533 emit->tcs.control_point_out_count++; 4534 4535 /* skip control point output declarations in patch constant phase */ 4536 if (!emit->tcs.control_point_phase) 4537 break; 4538 4539 emit_vertex_output_declaration(emit, i, emit->output_usage_mask[i], 4540 TRUE); 4541 4542 } 4543 } 4544 4545 if (emit->tcs.control_point_phase) { 4546 /** 4547 * Add missing control point output in control point phase. 4548 */ 4549 if (emit->tcs.control_point_out_index == INVALID_INDEX) { 4550 /* use register index after tessellation factors */ 4551 switch (emit->key.tcs.prim_mode) { 4552 case PIPE_PRIM_QUADS: 4553 emit->tcs.control_point_out_index = outputIndex + 6; 4554 break; 4555 case PIPE_PRIM_TRIANGLES: 4556 emit->tcs.control_point_out_index = outputIndex + 4; 4557 break; 4558 default: 4559 emit->tcs.control_point_out_index = outputIndex + 2; 4560 break; 4561 } 4562 emit->tcs.control_point_out_count++; 4563 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, 4564 emit->tcs.control_point_out_index, 4565 VGPU10_NAME_POSITION, 4566 VGPU10_OPERAND_4_COMPONENT_MASK_ALL, 4567 TRUE, 4568 SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION); 4569 4570 /* If tcs does not output any control point output, 4571 * we can end the hull shader control point phase here 4572 * after emitting the default control point output. 4573 */ 4574 emit->skip_instruction = TRUE; 4575 } 4576 } 4577 else { 4578 if (emit->tcs.outer.out_index == INVALID_INDEX) { 4579 /* since the TCS did not declare out outer tess level output register, 4580 * we declare it here for patch constant phase only. 4581 */ 4582 emit->tcs.outer.out_index = outputIndex; 4583 if (emit->key.tcs.prim_mode == PIPE_PRIM_QUADS) { 4584 for (int i = 0; i < 4; i++) { 4585 emit_tesslevel_declaration(emit, outputIndex++, 4586 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT, 4587 VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i, 4588 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i); 4589 } 4590 } 4591 else if (emit->key.tcs.prim_mode == PIPE_PRIM_TRIANGLES) { 4592 for (int i = 0; i < 3; i++) { 4593 emit_tesslevel_declaration(emit, outputIndex++, 4594 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT, 4595 VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i, 4596 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i); 4597 } 4598 } 4599 } 4600 4601 if (emit->tcs.inner.out_index == INVALID_INDEX) { 4602 /* since the TCS did not declare out inner tess level output register, 4603 * we declare it here 4604 */ 4605 emit->tcs.inner.out_index = outputIndex; 4606 if (emit->key.tcs.prim_mode == PIPE_PRIM_QUADS) { 4607 emit_tesslevel_declaration(emit, outputIndex++, 4608 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT, 4609 VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR, 4610 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR); 4611 emit_tesslevel_declaration(emit, outputIndex++, 4612 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT, 4613 VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR, 4614 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR); 4615 } 4616 else if (emit->key.tcs.prim_mode == PIPE_PRIM_TRIANGLES) { 4617 emit_tesslevel_declaration(emit, outputIndex++, 4618 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT, 4619 VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR, 4620 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR); 4621 } 4622 } 4623 } 4624 emit->num_outputs = outputIndex; 4625} 4626 4627 4628/** 4629 * Emit output declarations for tessellation evaluation shader. 4630 */ 4631static void 4632emit_tes_output_declarations(struct svga_shader_emitter_v10 *emit) 4633{ 4634 unsigned int i; 4635 4636 for (i = 0; i < emit->info.num_outputs; i++) { 4637 emit_vertex_output_declaration(emit, i, emit->output_usage_mask[i], TRUE); 4638 } 4639} 4640 4641 4642/** 4643 * Emit the declaration for a system value input/output. 4644 */ 4645static void 4646emit_system_value_declaration(struct svga_shader_emitter_v10 *emit, 4647 enum tgsi_semantic semantic_name, unsigned index) 4648{ 4649 switch (semantic_name) { 4650 case TGSI_SEMANTIC_INSTANCEID: 4651 index = alloc_system_value_index(emit, index); 4652 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV, 4653 VGPU10_OPERAND_TYPE_INPUT, 4654 VGPU10_OPERAND_INDEX_1D, 4655 index, 1, 4656 VGPU10_NAME_INSTANCE_ID, 4657 VGPU10_OPERAND_4_COMPONENT, 4658 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 4659 VGPU10_OPERAND_4_COMPONENT_MASK_X, 4660 VGPU10_INTERPOLATION_UNDEFINED, TRUE, 4661 map_tgsi_semantic_to_sgn_name(semantic_name)); 4662 break; 4663 case TGSI_SEMANTIC_VERTEXID: 4664 emit->vs.vertex_id_sys_index = index; 4665 index = alloc_system_value_index(emit, index); 4666 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV, 4667 VGPU10_OPERAND_TYPE_INPUT, 4668 VGPU10_OPERAND_INDEX_1D, 4669 index, 1, 4670 VGPU10_NAME_VERTEX_ID, 4671 VGPU10_OPERAND_4_COMPONENT, 4672 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 4673 VGPU10_OPERAND_4_COMPONENT_MASK_X, 4674 VGPU10_INTERPOLATION_UNDEFINED, TRUE, 4675 map_tgsi_semantic_to_sgn_name(semantic_name)); 4676 break; 4677 case TGSI_SEMANTIC_SAMPLEID: 4678 assert(emit->unit == PIPE_SHADER_FRAGMENT); 4679 emit->fs.sample_id_sys_index = index; 4680 index = alloc_system_value_index(emit, index); 4681 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_PS_SIV, 4682 VGPU10_OPERAND_TYPE_INPUT, 4683 VGPU10_OPERAND_INDEX_1D, 4684 index, 1, 4685 VGPU10_NAME_SAMPLE_INDEX, 4686 VGPU10_OPERAND_4_COMPONENT, 4687 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 4688 VGPU10_OPERAND_4_COMPONENT_MASK_X, 4689 VGPU10_INTERPOLATION_CONSTANT, TRUE, 4690 map_tgsi_semantic_to_sgn_name(semantic_name)); 4691 break; 4692 case TGSI_SEMANTIC_SAMPLEPOS: 4693 /* This system value contains the position of the current sample 4694 * when using per-sample shading. We implement this by calling 4695 * the VGPU10_OPCODE_SAMPLE_POS instruction with the current sample 4696 * index as the argument. See emit_sample_position_instructions(). 4697 */ 4698 assert(emit->version >= 41); 4699 emit->fs.sample_pos_sys_index = index; 4700 index = alloc_system_value_index(emit, index); 4701 break; 4702 case TGSI_SEMANTIC_INVOCATIONID: 4703 /* Note: invocation id input is mapped to different register depending 4704 * on the shader type. In GS, it will be mapped to vGSInstanceID#. 4705 * In TCS, it will be mapped to vOutputControlPointID#. 4706 * Since in both cases, the mapped name is unique rather than 4707 * just a generic input name ("v#"), so there is no need to remap 4708 * the index value. 4709 */ 4710 assert(emit->unit == PIPE_SHADER_GEOMETRY || 4711 emit->unit == PIPE_SHADER_TESS_CTRL); 4712 assert(emit->version >= 50); 4713 4714 if (emit->unit == PIPE_SHADER_GEOMETRY) { 4715 emit->gs.invocation_id_sys_index = index; 4716 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, 4717 VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID, 4718 VGPU10_OPERAND_INDEX_0D, 4719 index, 1, 4720 VGPU10_NAME_UNDEFINED, 4721 VGPU10_OPERAND_0_COMPONENT, 4722 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 4723 0, 4724 VGPU10_INTERPOLATION_UNDEFINED, TRUE, 4725 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED); 4726 } else if (emit->unit == PIPE_SHADER_TESS_CTRL) { 4727 /* The emission of the control point id will be done 4728 * in the control point phase in emit_hull_shader_control_point_phase(). 4729 */ 4730 emit->tcs.invocation_id_sys_index = index; 4731 } 4732 break; 4733 case TGSI_SEMANTIC_SAMPLEMASK: 4734 /* Note: the PS sample mask input has a unique name ("vCoverage#") 4735 * rather than just a generic input name ("v#") so no need to remap the 4736 * index value. 4737 */ 4738 assert(emit->unit == PIPE_SHADER_FRAGMENT); 4739 assert(emit->version >= 50); 4740 emit->fs.sample_mask_in_sys_index = index; 4741 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, 4742 VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK, 4743 VGPU10_OPERAND_INDEX_0D, 4744 index, 1, 4745 VGPU10_NAME_UNDEFINED, 4746 VGPU10_OPERAND_1_COMPONENT, 4747 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 4748 0, 4749 VGPU10_INTERPOLATION_CONSTANT, TRUE, 4750 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED); 4751 break; 4752 case TGSI_SEMANTIC_TESSCOORD: 4753 assert(emit->version >= 50); 4754 4755 unsigned usageMask = 0; 4756 4757 if (emit->tes.prim_mode == PIPE_PRIM_TRIANGLES) { 4758 usageMask = VGPU10_OPERAND_4_COMPONENT_MASK_XYZ; 4759 } 4760 else if (emit->tes.prim_mode == PIPE_PRIM_LINES || 4761 emit->tes.prim_mode == PIPE_PRIM_QUADS) { 4762 usageMask = VGPU10_OPERAND_4_COMPONENT_MASK_XY; 4763 } 4764 4765 emit->tes.tesscoord_sys_index = index; 4766 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, 4767 VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT, 4768 VGPU10_OPERAND_INDEX_0D, 4769 index, 1, 4770 VGPU10_NAME_UNDEFINED, 4771 VGPU10_OPERAND_4_COMPONENT, 4772 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 4773 usageMask, 4774 VGPU10_INTERPOLATION_UNDEFINED, TRUE, 4775 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED); 4776 break; 4777 case TGSI_SEMANTIC_TESSINNER: 4778 assert(emit->version >= 50); 4779 emit->tes.inner.tgsi_index = index; 4780 break; 4781 case TGSI_SEMANTIC_TESSOUTER: 4782 assert(emit->version >= 50); 4783 emit->tes.outer.tgsi_index = index; 4784 break; 4785 case TGSI_SEMANTIC_VERTICESIN: 4786 assert(emit->unit == PIPE_SHADER_TESS_CTRL); 4787 assert(emit->version >= 50); 4788 4789 /* save the system value index */ 4790 emit->tcs.vertices_per_patch_index = index; 4791 break; 4792 case TGSI_SEMANTIC_PRIMID: 4793 assert(emit->version >= 50); 4794 if (emit->unit == PIPE_SHADER_TESS_CTRL) { 4795 emit->tcs.prim_id_index = index; 4796 } 4797 else if (emit->unit == PIPE_SHADER_TESS_EVAL) { 4798 emit->tes.prim_id_index = index; 4799 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, 4800 VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID, 4801 VGPU10_OPERAND_INDEX_0D, 4802 index, 1, 4803 VGPU10_NAME_UNDEFINED, 4804 VGPU10_OPERAND_0_COMPONENT, 4805 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 4806 0, 4807 VGPU10_INTERPOLATION_UNDEFINED, TRUE, 4808 map_tgsi_semantic_to_sgn_name(semantic_name)); 4809 } 4810 break; 4811 case TGSI_SEMANTIC_THREAD_ID: 4812 assert(emit->unit >= PIPE_SHADER_COMPUTE); 4813 assert(emit->version >= 50); 4814 emit->cs.thread_id_index = index; 4815 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, 4816 VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP, 4817 VGPU10_OPERAND_INDEX_0D, 4818 index, 1, 4819 VGPU10_NAME_UNDEFINED, 4820 VGPU10_OPERAND_4_COMPONENT, 4821 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 4822 VGPU10_OPERAND_4_COMPONENT_MASK_ALL, 4823 VGPU10_INTERPOLATION_UNDEFINED, TRUE, 4824 map_tgsi_semantic_to_sgn_name(semantic_name)); 4825 break; 4826 case TGSI_SEMANTIC_BLOCK_ID: 4827 assert(emit->unit >= PIPE_SHADER_COMPUTE); 4828 assert(emit->version >= 50); 4829 emit->cs.block_id_index = index; 4830 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, 4831 VGPU10_OPERAND_TYPE_INPUT_THREAD_GROUP_ID, 4832 VGPU10_OPERAND_INDEX_0D, 4833 index, 1, 4834 VGPU10_NAME_UNDEFINED, 4835 VGPU10_OPERAND_4_COMPONENT, 4836 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 4837 VGPU10_OPERAND_4_COMPONENT_MASK_ALL, 4838 VGPU10_INTERPOLATION_UNDEFINED, TRUE, 4839 map_tgsi_semantic_to_sgn_name(semantic_name)); 4840 break; 4841 case TGSI_SEMANTIC_GRID_SIZE: 4842 assert(emit->unit == PIPE_SHADER_COMPUTE); 4843 assert(emit->version >= 50); 4844 emit->cs.grid_size.tgsi_index = index; 4845 break; 4846 default: 4847 debug_printf("unexpected system value semantic index %u / %s\n", 4848 semantic_name, tgsi_semantic_names[semantic_name]); 4849 } 4850} 4851 4852/** 4853 * Translate a TGSI declaration to VGPU10. 4854 */ 4855static boolean 4856emit_vgpu10_declaration(struct svga_shader_emitter_v10 *emit, 4857 const struct tgsi_full_declaration *decl) 4858{ 4859 switch (decl->Declaration.File) { 4860 case TGSI_FILE_INPUT: 4861 /* do nothing - see emit_input_declarations() */ 4862 return TRUE; 4863 4864 case TGSI_FILE_OUTPUT: 4865 assert(decl->Range.First == decl->Range.Last); 4866 emit->output_usage_mask[decl->Range.First] = decl->Declaration.UsageMask; 4867 return TRUE; 4868 4869 case TGSI_FILE_TEMPORARY: 4870 /* Don't declare the temps here. Just keep track of how many 4871 * and emit the declaration later. 4872 */ 4873 if (decl->Declaration.Array) { 4874 /* Indexed temporary array. Save the start index of the array 4875 * and the size of the array. 4876 */ 4877 const unsigned arrayID = MIN2(decl->Array.ArrayID, MAX_TEMP_ARRAYS); 4878 assert(arrayID < ARRAY_SIZE(emit->temp_arrays)); 4879 4880 /* Save this array so we can emit the declaration for it later */ 4881 create_temp_array(emit, arrayID, decl->Range.First, 4882 decl->Range.Last - decl->Range.First + 1, 4883 decl->Range.First); 4884 } 4885 4886 /* for all temps, indexed or not, keep track of highest index */ 4887 emit->num_shader_temps = MAX2(emit->num_shader_temps, 4888 decl->Range.Last + 1); 4889 return TRUE; 4890 4891 case TGSI_FILE_CONSTANT: 4892 /* Don't declare constants here. Just keep track and emit later. */ 4893 { 4894 unsigned constbuf = 0, num_consts; 4895 if (decl->Declaration.Dimension) { 4896 constbuf = decl->Dim.Index2D; 4897 } 4898 /* We throw an assertion here when, in fact, the shader should never 4899 * have linked due to constbuf index out of bounds, so we shouldn't 4900 * have reached here. 4901 */ 4902 assert(constbuf < ARRAY_SIZE(emit->num_shader_consts)); 4903 4904 num_consts = MAX2(emit->num_shader_consts[constbuf], 4905 decl->Range.Last + 1); 4906 4907 if (num_consts > VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) { 4908 debug_printf("Warning: constant buffer is declared to size [%u]" 4909 " but [%u] is the limit.\n", 4910 num_consts, 4911 VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT); 4912 emit->register_overflow = TRUE; 4913 } 4914 /* The linker doesn't enforce the max UBO size so we clamp here */ 4915 emit->num_shader_consts[constbuf] = 4916 MIN2(num_consts, VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT); 4917 } 4918 return TRUE; 4919 4920 case TGSI_FILE_IMMEDIATE: 4921 assert(!"TGSI_FILE_IMMEDIATE not handled yet!"); 4922 return FALSE; 4923 4924 case TGSI_FILE_SYSTEM_VALUE: 4925 emit_system_value_declaration(emit, decl->Semantic.Name, 4926 decl->Range.First); 4927 return TRUE; 4928 4929 case TGSI_FILE_SAMPLER: 4930 /* Don't declare samplers here. Just keep track and emit later. */ 4931 emit->num_samplers = MAX2(emit->num_samplers, decl->Range.Last + 1); 4932 return TRUE; 4933 4934#if 0 4935 case TGSI_FILE_RESOURCE: 4936 /*opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;*/ 4937 /* XXX more, VGPU10_RETURN_TYPE_FLOAT */ 4938 assert(!"TGSI_FILE_RESOURCE not handled yet"); 4939 return FALSE; 4940#endif 4941 4942 case TGSI_FILE_ADDRESS: 4943 emit->num_address_regs = MAX2(emit->num_address_regs, 4944 decl->Range.Last + 1); 4945 return TRUE; 4946 4947 case TGSI_FILE_SAMPLER_VIEW: 4948 { 4949 unsigned unit = decl->Range.First; 4950 assert(decl->Range.First == decl->Range.Last); 4951 emit->sampler_target[unit] = decl->SamplerView.Resource; 4952 4953 /* Note: we can ignore YZW return types for now */ 4954 emit->sampler_return_type[unit] = decl->SamplerView.ReturnTypeX; 4955 emit->sampler_view[unit] = TRUE; 4956 } 4957 return TRUE; 4958 4959 case TGSI_FILE_IMAGE: 4960 { 4961 unsigned unit = decl->Range.First; 4962 assert(decl->Range.First == decl->Range.Last); 4963 assert(unit < PIPE_MAX_SHADER_IMAGES); 4964 emit->image[unit] = decl->Image; 4965 emit->image_mask |= 1 << unit; 4966 emit->num_images++; 4967 } 4968 return TRUE; 4969 4970 case TGSI_FILE_HW_ATOMIC: 4971 /* Declare the atomic buffer if it is not already declared. */ 4972 if (!(emit->atomic_bufs_mask & (1 << decl->Dim.Index2D))) { 4973 emit->num_atomic_bufs++; 4974 emit->atomic_bufs_mask |= (1 << decl->Dim.Index2D); 4975 } 4976 4977 /* Remember the maximum atomic counter index encountered */ 4978 emit->max_atomic_counter_index = 4979 MAX2(emit->max_atomic_counter_index, decl->Range.Last); 4980 return TRUE; 4981 4982 case TGSI_FILE_MEMORY: 4983 /* Record memory has been used. */ 4984 if (emit->unit == PIPE_SHADER_COMPUTE && 4985 decl->Declaration.MemType == TGSI_MEMORY_TYPE_SHARED) { 4986 emit->cs.shared_memory_declared = TRUE; 4987 } 4988 4989 return TRUE; 4990 4991 case TGSI_FILE_BUFFER: 4992 assert(emit->version >= 50); 4993 emit->num_shader_bufs++; 4994 return TRUE; 4995 4996 default: 4997 assert(!"Unexpected type of declaration"); 4998 return FALSE; 4999 } 5000} 5001 5002 5003/** 5004 * Emit input declarations for fragment shader. 5005 */ 5006static void 5007emit_fs_input_declarations(struct svga_shader_emitter_v10 *emit) 5008{ 5009 unsigned i; 5010 5011 for (i = 0; i < emit->linkage.num_inputs; i++) { 5012 enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i]; 5013 unsigned usage_mask = emit->info.input_usage_mask[i]; 5014 unsigned index = emit->linkage.input_map[i]; 5015 unsigned type, interpolationMode, name; 5016 unsigned mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL; 5017 5018 if (usage_mask == 0) 5019 continue; /* register is not actually used */ 5020 5021 if (semantic_name == TGSI_SEMANTIC_POSITION) { 5022 /* fragment position input */ 5023 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV; 5024 interpolationMode = VGPU10_INTERPOLATION_LINEAR; 5025 name = VGPU10_NAME_POSITION; 5026 if (usage_mask & TGSI_WRITEMASK_W) { 5027 /* we need to replace use of 'w' with '1/w' */ 5028 emit->fs.fragcoord_input_index = i; 5029 } 5030 } 5031 else if (semantic_name == TGSI_SEMANTIC_FACE) { 5032 /* fragment front-facing input */ 5033 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV; 5034 interpolationMode = VGPU10_INTERPOLATION_CONSTANT; 5035 name = VGPU10_NAME_IS_FRONT_FACE; 5036 emit->fs.face_input_index = i; 5037 } 5038 else if (semantic_name == TGSI_SEMANTIC_PRIMID) { 5039 /* primitive ID */ 5040 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV; 5041 interpolationMode = VGPU10_INTERPOLATION_CONSTANT; 5042 name = VGPU10_NAME_PRIMITIVE_ID; 5043 } 5044 else if (semantic_name == TGSI_SEMANTIC_SAMPLEID) { 5045 /* sample index / ID */ 5046 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV; 5047 interpolationMode = VGPU10_INTERPOLATION_CONSTANT; 5048 name = VGPU10_NAME_SAMPLE_INDEX; 5049 } 5050 else if (semantic_name == TGSI_SEMANTIC_LAYER) { 5051 /* render target array index */ 5052 if (emit->key.fs.layer_to_zero) { 5053 /** 5054 * The shader from the previous stage does not write to layer, 5055 * so reading the layer index in fragment shader should return 0. 5056 */ 5057 emit->fs.layer_input_index = i; 5058 continue; 5059 } else { 5060 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV; 5061 interpolationMode = VGPU10_INTERPOLATION_CONSTANT; 5062 name = VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX; 5063 mask = VGPU10_OPERAND_4_COMPONENT_MASK_X; 5064 } 5065 } 5066 else if (semantic_name == TGSI_SEMANTIC_VIEWPORT_INDEX) { 5067 /* viewport index */ 5068 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV; 5069 interpolationMode = VGPU10_INTERPOLATION_CONSTANT; 5070 name = VGPU10_NAME_VIEWPORT_ARRAY_INDEX; 5071 mask = VGPU10_OPERAND_4_COMPONENT_MASK_X; 5072 } 5073 else { 5074 /* general fragment input */ 5075 type = VGPU10_OPCODE_DCL_INPUT_PS; 5076 interpolationMode = 5077 translate_interpolation(emit, 5078 emit->info.input_interpolate[i], 5079 emit->info.input_interpolate_loc[i]); 5080 5081 /* keeps track if flat interpolation mode is being used */ 5082 emit->uses_flat_interp = emit->uses_flat_interp || 5083 (interpolationMode == VGPU10_INTERPOLATION_CONSTANT); 5084 5085 name = VGPU10_NAME_UNDEFINED; 5086 } 5087 5088 emit_input_declaration(emit, type, 5089 VGPU10_OPERAND_TYPE_INPUT, 5090 VGPU10_OPERAND_INDEX_1D, index, 1, 5091 name, 5092 VGPU10_OPERAND_4_COMPONENT, 5093 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 5094 mask, 5095 interpolationMode, TRUE, 5096 map_tgsi_semantic_to_sgn_name(semantic_name)); 5097 } 5098} 5099 5100 5101/** 5102 * Emit input declarations for vertex shader. 5103 */ 5104static void 5105emit_vs_input_declarations(struct svga_shader_emitter_v10 *emit) 5106{ 5107 unsigned i; 5108 5109 for (i = 0; i < emit->info.file_max[TGSI_FILE_INPUT] + 1; i++) { 5110 unsigned usage_mask = emit->info.input_usage_mask[i]; 5111 unsigned index = i; 5112 5113 if (usage_mask == 0) 5114 continue; /* register is not actually used */ 5115 5116 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, 5117 VGPU10_OPERAND_TYPE_INPUT, 5118 VGPU10_OPERAND_INDEX_1D, index, 1, 5119 VGPU10_NAME_UNDEFINED, 5120 VGPU10_OPERAND_4_COMPONENT, 5121 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 5122 VGPU10_OPERAND_4_COMPONENT_MASK_ALL, 5123 VGPU10_INTERPOLATION_UNDEFINED, TRUE, 5124 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED); 5125 } 5126} 5127 5128 5129/** 5130 * Emit input declarations for geometry shader. 5131 */ 5132static void 5133emit_gs_input_declarations(struct svga_shader_emitter_v10 *emit) 5134{ 5135 unsigned i; 5136 5137 for (i = 0; i < emit->info.num_inputs; i++) { 5138 enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i]; 5139 unsigned usage_mask = emit->info.input_usage_mask[i]; 5140 unsigned index = emit->linkage.input_map[i]; 5141 unsigned opcodeType, operandType; 5142 unsigned numComp, selMode; 5143 unsigned name; 5144 unsigned dim; 5145 5146 if (usage_mask == 0) 5147 continue; /* register is not actually used */ 5148 5149 opcodeType = VGPU10_OPCODE_DCL_INPUT; 5150 operandType = VGPU10_OPERAND_TYPE_INPUT; 5151 numComp = VGPU10_OPERAND_4_COMPONENT; 5152 selMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE; 5153 name = VGPU10_NAME_UNDEFINED; 5154 5155 /* all geometry shader inputs are two dimensional except 5156 * gl_PrimitiveID 5157 */ 5158 dim = VGPU10_OPERAND_INDEX_2D; 5159 5160 if (semantic_name == TGSI_SEMANTIC_PRIMID) { 5161 /* Primitive ID */ 5162 operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID; 5163 dim = VGPU10_OPERAND_INDEX_0D; 5164 numComp = VGPU10_OPERAND_0_COMPONENT; 5165 selMode = 0; 5166 5167 /* also save the register index so we can check for 5168 * primitive id when emit src register. We need to modify the 5169 * operand type, index dimension when emit primitive id src reg. 5170 */ 5171 emit->gs.prim_id_index = i; 5172 } 5173 else if (semantic_name == TGSI_SEMANTIC_POSITION) { 5174 /* vertex position input */ 5175 opcodeType = VGPU10_OPCODE_DCL_INPUT_SIV; 5176 name = VGPU10_NAME_POSITION; 5177 } 5178 5179 emit_input_declaration(emit, opcodeType, operandType, 5180 dim, index, 5181 emit->gs.input_size, 5182 name, 5183 numComp, selMode, 5184 VGPU10_OPERAND_4_COMPONENT_MASK_ALL, 5185 VGPU10_INTERPOLATION_UNDEFINED, TRUE, 5186 map_tgsi_semantic_to_sgn_name(semantic_name)); 5187 } 5188} 5189 5190 5191/** 5192 * Emit input declarations for tessellation control shader. 5193 */ 5194static void 5195emit_tcs_input_declarations(struct svga_shader_emitter_v10 *emit) 5196{ 5197 unsigned i; 5198 unsigned size = emit->key.tcs.vertices_per_patch; 5199 unsigned indicesMask = 0; 5200 boolean addSignature = TRUE; 5201 5202 if (!emit->tcs.control_point_phase) 5203 addSignature = emit->tcs.fork_phase_add_signature; 5204 5205 for (i = 0; i < emit->info.num_inputs; i++) { 5206 unsigned usage_mask = emit->info.input_usage_mask[i]; 5207 unsigned index = emit->linkage.input_map[i]; 5208 enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i]; 5209 VGPU10_SYSTEM_NAME name = VGPU10_NAME_UNDEFINED; 5210 VGPU10_OPERAND_TYPE operandType = VGPU10_OPERAND_TYPE_INPUT; 5211 SVGA3dDXSignatureSemanticName sgn_name = 5212 map_tgsi_semantic_to_sgn_name(semantic_name); 5213 5214 /* indices that are declared */ 5215 indicesMask |= 1 << index; 5216 5217 if (semantic_name == TGSI_SEMANTIC_POSITION || 5218 index == emit->linkage.position_index) { 5219 /* save the input control point index for later use */ 5220 emit->tcs.control_point_input_index = i; 5221 } 5222 else if (usage_mask == 0) { 5223 continue; /* register is not actually used */ 5224 } 5225 else if (semantic_name == TGSI_SEMANTIC_CLIPDIST) { 5226 /* The shadow copy is being used here. So set the signature name 5227 * to UNDEFINED. 5228 */ 5229 sgn_name = SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED; 5230 } 5231 5232 /* input control points in the patch constant phase are emitted in the 5233 * vicp register rather than the v register. 5234 */ 5235 if (!emit->tcs.control_point_phase) { 5236 operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT; 5237 } 5238 5239 /* Tessellation control shader inputs are two dimensional. 5240 * The array size is determined by the patch vertex count. 5241 */ 5242 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, 5243 operandType, 5244 VGPU10_OPERAND_INDEX_2D, 5245 index, size, name, 5246 VGPU10_OPERAND_4_COMPONENT, 5247 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 5248 VGPU10_OPERAND_4_COMPONENT_MASK_ALL, 5249 VGPU10_INTERPOLATION_UNDEFINED, 5250 addSignature, sgn_name); 5251 } 5252 5253 if (emit->tcs.control_point_phase) { 5254 5255 /* Also add an address register for the indirection to the 5256 * input control points 5257 */ 5258 emit->tcs.control_point_addr_index = emit->num_address_regs++; 5259 } 5260} 5261 5262 5263static void 5264emit_tessfactor_input_declarations(struct svga_shader_emitter_v10 *emit) 5265{ 5266 5267 /* In tcs, tess factors are emitted as extra outputs. 5268 * The starting register index for the tess factors is captured 5269 * in the compile key. 5270 */ 5271 unsigned inputIndex = emit->key.tes.tessfactor_index; 5272 5273 if (emit->tes.prim_mode == PIPE_PRIM_QUADS) { 5274 if (emit->key.tes.need_tessouter) { 5275 emit->tes.outer.in_index = inputIndex; 5276 for (int i = 0; i < 4; i++) { 5277 emit_tesslevel_declaration(emit, inputIndex++, 5278 VGPU10_OPCODE_DCL_INPUT_SIV, 5279 VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT, 5280 VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i, 5281 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i); 5282 } 5283 } 5284 5285 if (emit->key.tes.need_tessinner) { 5286 emit->tes.inner.in_index = inputIndex; 5287 emit_tesslevel_declaration(emit, inputIndex++, 5288 VGPU10_OPCODE_DCL_INPUT_SIV, 5289 VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT, 5290 VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR, 5291 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR); 5292 5293 emit_tesslevel_declaration(emit, inputIndex++, 5294 VGPU10_OPCODE_DCL_INPUT_SIV, 5295 VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT, 5296 VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR, 5297 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR); 5298 } 5299 } 5300 else if (emit->tes.prim_mode == PIPE_PRIM_TRIANGLES) { 5301 if (emit->key.tes.need_tessouter) { 5302 emit->tes.outer.in_index = inputIndex; 5303 for (int i = 0; i < 3; i++) { 5304 emit_tesslevel_declaration(emit, inputIndex++, 5305 VGPU10_OPCODE_DCL_INPUT_SIV, 5306 VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT, 5307 VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i, 5308 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i); 5309 } 5310 } 5311 5312 if (emit->key.tes.need_tessinner) { 5313 emit->tes.inner.in_index = inputIndex; 5314 emit_tesslevel_declaration(emit, inputIndex++, 5315 VGPU10_OPCODE_DCL_INPUT_SIV, 5316 VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT, 5317 VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR, 5318 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR); 5319 } 5320 } 5321 else if (emit->tes.prim_mode == PIPE_PRIM_LINES) { 5322 if (emit->key.tes.need_tessouter) { 5323 emit->tes.outer.in_index = inputIndex; 5324 emit_tesslevel_declaration(emit, inputIndex++, 5325 VGPU10_OPCODE_DCL_INPUT_SIV, 5326 VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT, 5327 VGPU10_NAME_FINAL_LINE_DETAIL_TESSFACTOR, 5328 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DETAIL_TESSFACTOR); 5329 5330 emit_tesslevel_declaration(emit, inputIndex++, 5331 VGPU10_OPCODE_DCL_INPUT_SIV, 5332 VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT, 5333 VGPU10_NAME_FINAL_LINE_DENSITY_TESSFACTOR, 5334 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DENSITY_TESSFACTOR); 5335 } 5336 } 5337} 5338 5339 5340/** 5341 * Emit input declarations for tessellation evaluation shader. 5342 */ 5343static void 5344emit_tes_input_declarations(struct svga_shader_emitter_v10 *emit) 5345{ 5346 unsigned i; 5347 5348 for (i = 0; i < emit->info.num_inputs; i++) { 5349 unsigned usage_mask = emit->info.input_usage_mask[i]; 5350 unsigned index = emit->linkage.input_map[i]; 5351 unsigned size; 5352 const enum tgsi_semantic semantic_name = 5353 emit->info.input_semantic_name[i]; 5354 SVGA3dDXSignatureSemanticName sgn_name; 5355 VGPU10_OPERAND_TYPE operandType; 5356 VGPU10_OPERAND_INDEX_DIMENSION dim; 5357 5358 if (usage_mask == 0) 5359 usage_mask = 1; /* at least set usage mask to one */ 5360 5361 if (semantic_name == TGSI_SEMANTIC_PATCH) { 5362 operandType = VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT; 5363 dim = VGPU10_OPERAND_INDEX_1D; 5364 size = 1; 5365 sgn_name = map_tgsi_semantic_to_sgn_name(semantic_name); 5366 } 5367 else { 5368 operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT; 5369 dim = VGPU10_OPERAND_INDEX_2D; 5370 size = emit->key.tes.vertices_per_patch; 5371 sgn_name = SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED; 5372 } 5373 5374 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, operandType, 5375 dim, index, size, VGPU10_NAME_UNDEFINED, 5376 VGPU10_OPERAND_4_COMPONENT, 5377 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 5378 VGPU10_OPERAND_4_COMPONENT_MASK_ALL, 5379 VGPU10_INTERPOLATION_UNDEFINED, 5380 TRUE, sgn_name); 5381 } 5382 5383 emit_tessfactor_input_declarations(emit); 5384 5385 /* DX spec requires DS input controlpoint/patch-constant signatures to match 5386 * the HS output controlpoint/patch-constant signatures exactly. 5387 * Add missing input declarations even if they are not used in the shader. 5388 */ 5389 if (emit->linkage.num_inputs < emit->linkage.prevShader.num_outputs) { 5390 struct tgsi_shader_info *prevInfo = emit->prevShaderInfo; 5391 for (i = 0; i < emit->linkage.prevShader.num_outputs; i++) { 5392 5393 /* If a tcs output does not have a corresponding input register in 5394 * tes, add one. 5395 */ 5396 if (emit->linkage.prevShader.output_map[i] > 5397 emit->linkage.input_map_max) { 5398 const enum tgsi_semantic sem_name = prevInfo->output_semantic_name[i]; 5399 5400 if (sem_name == TGSI_SEMANTIC_PATCH) { 5401 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, 5402 VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT, 5403 VGPU10_OPERAND_INDEX_1D, 5404 i, 1, VGPU10_NAME_UNDEFINED, 5405 VGPU10_OPERAND_4_COMPONENT, 5406 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 5407 VGPU10_OPERAND_4_COMPONENT_MASK_ALL, 5408 VGPU10_INTERPOLATION_UNDEFINED, 5409 TRUE, 5410 map_tgsi_semantic_to_sgn_name(sem_name)); 5411 5412 } else if (sem_name != TGSI_SEMANTIC_TESSINNER && 5413 sem_name != TGSI_SEMANTIC_TESSOUTER) { 5414 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, 5415 VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT, 5416 VGPU10_OPERAND_INDEX_2D, 5417 i, emit->key.tes.vertices_per_patch, 5418 VGPU10_NAME_UNDEFINED, 5419 VGPU10_OPERAND_4_COMPONENT, 5420 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 5421 VGPU10_OPERAND_4_COMPONENT_MASK_ALL, 5422 VGPU10_INTERPOLATION_UNDEFINED, 5423 TRUE, 5424 map_tgsi_semantic_to_sgn_name(sem_name)); 5425 } 5426 /* tessellation factors are taken care of in 5427 * emit_tessfactor_input_declarations(). 5428 */ 5429 } 5430 } 5431 } 5432} 5433 5434 5435/** 5436 * Emit all input declarations. 5437 */ 5438static boolean 5439emit_input_declarations(struct svga_shader_emitter_v10 *emit) 5440{ 5441 emit->index_range.required = 5442 emit->info.indirect_files & (1 << TGSI_FILE_INPUT) ? TRUE : FALSE; 5443 5444 switch (emit->unit) { 5445 case PIPE_SHADER_FRAGMENT: 5446 emit_fs_input_declarations(emit); 5447 break; 5448 case PIPE_SHADER_GEOMETRY: 5449 emit_gs_input_declarations(emit); 5450 break; 5451 case PIPE_SHADER_VERTEX: 5452 emit_vs_input_declarations(emit); 5453 break; 5454 case PIPE_SHADER_TESS_CTRL: 5455 emit_tcs_input_declarations(emit); 5456 break; 5457 case PIPE_SHADER_TESS_EVAL: 5458 emit_tes_input_declarations(emit); 5459 break; 5460 case PIPE_SHADER_COMPUTE: 5461 //XXX emit_cs_input_declarations(emit); 5462 break; 5463 default: 5464 assert(0); 5465 } 5466 5467 if (emit->index_range.start_index != INVALID_INDEX) { 5468 emit_index_range_declaration(emit); 5469 } 5470 emit->index_range.required = FALSE; 5471 return TRUE; 5472} 5473 5474 5475/** 5476 * Emit all output declarations. 5477 */ 5478static boolean 5479emit_output_declarations(struct svga_shader_emitter_v10 *emit) 5480{ 5481 emit->index_range.required = 5482 emit->info.indirect_files & (1 << TGSI_FILE_OUTPUT) ? TRUE : FALSE; 5483 5484 switch (emit->unit) { 5485 case PIPE_SHADER_FRAGMENT: 5486 emit_fs_output_declarations(emit); 5487 break; 5488 case PIPE_SHADER_GEOMETRY: 5489 emit_gs_output_declarations(emit); 5490 break; 5491 case PIPE_SHADER_VERTEX: 5492 emit_vs_output_declarations(emit); 5493 break; 5494 case PIPE_SHADER_TESS_CTRL: 5495 emit_tcs_output_declarations(emit); 5496 break; 5497 case PIPE_SHADER_TESS_EVAL: 5498 emit_tes_output_declarations(emit); 5499 break; 5500 case PIPE_SHADER_COMPUTE: 5501 //XXX emit_cs_output_declarations(emit); 5502 break; 5503 default: 5504 assert(0); 5505 } 5506 5507 if (emit->vposition.so_index != INVALID_INDEX && 5508 emit->vposition.out_index != INVALID_INDEX) { 5509 5510 assert(emit->unit != PIPE_SHADER_FRAGMENT); 5511 5512 /* Emit the declaration for the non-adjusted vertex position 5513 * for stream output purpose 5514 */ 5515 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, 5516 emit->vposition.so_index, 5517 VGPU10_NAME_UNDEFINED, 5518 VGPU10_OPERAND_4_COMPONENT_MASK_ALL, 5519 TRUE, 5520 SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION); 5521 } 5522 5523 if (emit->clip_dist_so_index != INVALID_INDEX && 5524 emit->clip_dist_out_index != INVALID_INDEX) { 5525 5526 assert(emit->unit != PIPE_SHADER_FRAGMENT); 5527 5528 /* Emit the declaration for the clip distance shadow copy which 5529 * will be used for stream output purpose and for clip distance 5530 * varying variable. Note all clip distances 5531 * will be written regardless of the enabled clipping planes. 5532 */ 5533 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, 5534 emit->clip_dist_so_index, 5535 VGPU10_NAME_UNDEFINED, 5536 VGPU10_OPERAND_4_COMPONENT_MASK_ALL, 5537 TRUE, 5538 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED); 5539 5540 if (emit->info.num_written_clipdistance > 4) { 5541 /* for the second clip distance register, each handles 4 planes */ 5542 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, 5543 emit->clip_dist_so_index + 1, 5544 VGPU10_NAME_UNDEFINED, 5545 VGPU10_OPERAND_4_COMPONENT_MASK_ALL, 5546 TRUE, 5547 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED); 5548 } 5549 } 5550 5551 if (emit->index_range.start_index != INVALID_INDEX) { 5552 emit_index_range_declaration(emit); 5553 } 5554 emit->index_range.required = FALSE; 5555 return TRUE; 5556} 5557 5558 5559/** 5560 * A helper function to create a temporary indexable array 5561 * and initialize the corresponding entries in the temp_map array. 5562 */ 5563static void 5564create_temp_array(struct svga_shader_emitter_v10 *emit, 5565 unsigned arrayID, unsigned first, unsigned count, 5566 unsigned startIndex) 5567{ 5568 unsigned i, tempIndex = startIndex; 5569 5570 emit->num_temp_arrays = MAX2(emit->num_temp_arrays, arrayID + 1); 5571 assert(emit->num_temp_arrays <= MAX_TEMP_ARRAYS); 5572 emit->num_temp_arrays = MIN2(emit->num_temp_arrays, MAX_TEMP_ARRAYS); 5573 5574 emit->temp_arrays[arrayID].start = first; 5575 emit->temp_arrays[arrayID].size = count; 5576 5577 /* Fill in the temp_map entries for this temp array */ 5578 for (i = 0; i < count; i++, tempIndex++) { 5579 emit->temp_map[tempIndex].arrayId = arrayID; 5580 emit->temp_map[tempIndex].index = i; 5581 } 5582} 5583 5584 5585/** 5586 * Emit the declaration for the temporary registers. 5587 */ 5588static boolean 5589emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit) 5590{ 5591 unsigned total_temps, reg, i; 5592 5593 total_temps = emit->num_shader_temps; 5594 5595 /* If there is indirect access to non-indexable temps in the shader, 5596 * convert those temps to indexable temps. This works around a bug 5597 * in the GLSL->TGSI translator exposed in piglit test 5598 * glsl-1.20/execution/fs-const-array-of-struct-of-array.shader_test. 5599 * Internal temps added by the driver remain as non-indexable temps. 5600 */ 5601 if ((emit->info.indirect_files & (1 << TGSI_FILE_TEMPORARY)) && 5602 emit->num_temp_arrays == 0) { 5603 create_temp_array(emit, 1, 0, total_temps, 0); 5604 } 5605 5606 /* Allocate extra temps for specially-implemented instructions, 5607 * such as LIT. 5608 */ 5609 total_temps += MAX_INTERNAL_TEMPS; 5610 5611 /* Allocate extra temps for clip distance or clip vertex. 5612 */ 5613 if (emit->clip_mode == CLIP_DISTANCE) { 5614 /* We need to write the clip distance to a temporary register 5615 * first. Then it will be copied to the shadow copy for 5616 * the clip distance varying variable and stream output purpose. 5617 * It will also be copied to the actual CLIPDIST register 5618 * according to the enabled clip planes 5619 */ 5620 emit->clip_dist_tmp_index = total_temps++; 5621 if (emit->info.num_written_clipdistance > 4) 5622 total_temps++; /* second clip register */ 5623 } 5624 else if (emit->clip_mode == CLIP_VERTEX && emit->key.last_vertex_stage) { 5625 /* If the current shader is in the last vertex processing stage, 5626 * We need to convert the TGSI CLIPVERTEX output to one or more 5627 * clip distances. Allocate a temp reg for the clipvertex here. 5628 */ 5629 assert(emit->info.writes_clipvertex > 0); 5630 emit->clip_vertex_tmp_index = total_temps; 5631 total_temps++; 5632 } 5633 5634 if (emit->info.uses_vertexid) { 5635 assert(emit->unit == PIPE_SHADER_VERTEX); 5636 emit->vs.vertex_id_tmp_index = total_temps++; 5637 } 5638 5639 if (emit->unit == PIPE_SHADER_VERTEX || emit->unit == PIPE_SHADER_GEOMETRY) { 5640 if (emit->vposition.need_prescale || emit->key.vs.undo_viewport || 5641 emit->key.clip_plane_enable || 5642 emit->vposition.so_index != INVALID_INDEX) { 5643 emit->vposition.tmp_index = total_temps; 5644 total_temps += 1; 5645 } 5646 5647 if (emit->vposition.need_prescale) { 5648 emit->vposition.prescale_scale_index = total_temps++; 5649 emit->vposition.prescale_trans_index = total_temps++; 5650 } 5651 5652 if (emit->unit == PIPE_SHADER_VERTEX) { 5653 unsigned attrib_mask = (emit->key.vs.adjust_attrib_w_1 | 5654 emit->key.vs.adjust_attrib_itof | 5655 emit->key.vs.adjust_attrib_utof | 5656 emit->key.vs.attrib_is_bgra | 5657 emit->key.vs.attrib_puint_to_snorm | 5658 emit->key.vs.attrib_puint_to_uscaled | 5659 emit->key.vs.attrib_puint_to_sscaled); 5660 while (attrib_mask) { 5661 unsigned index = u_bit_scan(&attrib_mask); 5662 emit->vs.adjusted_input[index] = total_temps++; 5663 } 5664 } 5665 else if (emit->unit == PIPE_SHADER_GEOMETRY) { 5666 if (emit->key.gs.writes_viewport_index) 5667 emit->gs.viewport_index_tmp_index = total_temps++; 5668 } 5669 } 5670 else if (emit->unit == PIPE_SHADER_FRAGMENT) { 5671 if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS || 5672 emit->key.fs.write_color0_to_n_cbufs > 1) { 5673 /* Allocate a temp to hold the output color */ 5674 emit->fs.color_tmp_index = total_temps; 5675 total_temps += 1; 5676 } 5677 5678 if (emit->fs.face_input_index != INVALID_INDEX) { 5679 /* Allocate a temp for the +/-1 face register */ 5680 emit->fs.face_tmp_index = total_temps; 5681 total_temps += 1; 5682 } 5683 5684 if (emit->fs.fragcoord_input_index != INVALID_INDEX) { 5685 /* Allocate a temp for modified fragment position register */ 5686 emit->fs.fragcoord_tmp_index = total_temps; 5687 total_temps += 1; 5688 } 5689 5690 if (emit->fs.sample_pos_sys_index != INVALID_INDEX) { 5691 /* Allocate a temp for the sample position */ 5692 emit->fs.sample_pos_tmp_index = total_temps++; 5693 } 5694 } 5695 else if (emit->unit == PIPE_SHADER_TESS_EVAL) { 5696 if (emit->vposition.need_prescale) { 5697 emit->vposition.tmp_index = total_temps++; 5698 emit->vposition.prescale_scale_index = total_temps++; 5699 emit->vposition.prescale_trans_index = total_temps++; 5700 } 5701 5702 if (emit->tes.inner.tgsi_index) { 5703 emit->tes.inner.temp_index = total_temps; 5704 total_temps += 1; 5705 } 5706 5707 if (emit->tes.outer.tgsi_index) { 5708 emit->tes.outer.temp_index = total_temps; 5709 total_temps += 1; 5710 } 5711 } 5712 else if (emit->unit == PIPE_SHADER_TESS_CTRL) { 5713 if (emit->tcs.inner.tgsi_index != INVALID_INDEX) { 5714 if (!emit->tcs.control_point_phase) { 5715 emit->tcs.inner.temp_index = total_temps; 5716 total_temps += 1; 5717 } 5718 } 5719 if (emit->tcs.outer.tgsi_index != INVALID_INDEX) { 5720 if (!emit->tcs.control_point_phase) { 5721 emit->tcs.outer.temp_index = total_temps; 5722 total_temps += 1; 5723 } 5724 } 5725 5726 if (emit->tcs.control_point_phase && 5727 emit->info.reads_pervertex_outputs) { 5728 emit->tcs.control_point_tmp_index = total_temps; 5729 total_temps += emit->tcs.control_point_out_count; 5730 } 5731 else if (!emit->tcs.control_point_phase && 5732 emit->info.reads_perpatch_outputs) { 5733 5734 /* If there is indirect access to the patch constant outputs 5735 * in the control point phase, then an indexable temporary array 5736 * will be created for these patch constant outputs. 5737 * Note, indirect access can only be applicable to 5738 * patch constant outputs in the control point phase. 5739 */ 5740 if (emit->info.indirect_files & (1 << TGSI_FILE_OUTPUT)) { 5741 unsigned arrayID = 5742 emit->num_temp_arrays ? emit->num_temp_arrays : 1; 5743 create_temp_array(emit, arrayID, 0, 5744 emit->tcs.patch_generic_out_count, total_temps); 5745 } 5746 emit->tcs.patch_generic_tmp_index = total_temps; 5747 total_temps += emit->tcs.patch_generic_out_count; 5748 } 5749 5750 emit->tcs.invocation_id_tmp_index = total_temps++; 5751 } 5752 5753 if (emit->raw_bufs) { 5754 /** 5755 * Add 3 more temporaries if we need to translate constant buffer 5756 * to srv raw buffer. Since we need to load the value to a temporary 5757 * before it can be used as a source. There could be three source 5758 * register in an instruction. 5759 */ 5760 emit->raw_buf_tmp_index = total_temps; 5761 total_temps+=3; 5762 } 5763 5764 for (i = 0; i < emit->num_address_regs; i++) { 5765 emit->address_reg_index[i] = total_temps++; 5766 } 5767 5768 /* Initialize the temp_map array which maps TGSI temp indexes to VGPU10 5769 * temp indexes. Basically, we compact all the non-array temp register 5770 * indexes into a consecutive series. 5771 * 5772 * Before, we may have some TGSI declarations like: 5773 * DCL TEMP[0..1], LOCAL 5774 * DCL TEMP[2..4], ARRAY(1), LOCAL 5775 * DCL TEMP[5..7], ARRAY(2), LOCAL 5776 * plus, some extra temps, like TEMP[8], TEMP[9] for misc things 5777 * 5778 * After, we'll have a map like this: 5779 * temp_map[0] = { array 0, index 0 } 5780 * temp_map[1] = { array 0, index 1 } 5781 * temp_map[2] = { array 1, index 0 } 5782 * temp_map[3] = { array 1, index 1 } 5783 * temp_map[4] = { array 1, index 2 } 5784 * temp_map[5] = { array 2, index 0 } 5785 * temp_map[6] = { array 2, index 1 } 5786 * temp_map[7] = { array 2, index 2 } 5787 * temp_map[8] = { array 0, index 2 } 5788 * temp_map[9] = { array 0, index 3 } 5789 * 5790 * We'll declare two arrays of 3 elements, plus a set of four non-indexed 5791 * temps numbered 0..3 5792 * 5793 * Any time we emit a temporary register index, we'll have to use the 5794 * temp_map[] table to convert the TGSI index to the VGPU10 index. 5795 * 5796 * Finally, we recompute the total_temps value here. 5797 */ 5798 reg = 0; 5799 for (i = 0; i < total_temps; i++) { 5800 if (emit->temp_map[i].arrayId == 0) { 5801 emit->temp_map[i].index = reg++; 5802 } 5803 } 5804 5805 if (0) { 5806 debug_printf("total_temps %u\n", total_temps); 5807 for (i = 0; i < total_temps; i++) { 5808 debug_printf("temp %u -> array %u index %u\n", 5809 i, emit->temp_map[i].arrayId, emit->temp_map[i].index); 5810 } 5811 } 5812 5813 total_temps = reg; 5814 5815 /* Emit declaration of ordinary temp registers */ 5816 if (total_temps > 0) { 5817 VGPU10OpcodeToken0 opcode0; 5818 5819 opcode0.value = 0; 5820 opcode0.opcodeType = VGPU10_OPCODE_DCL_TEMPS; 5821 5822 begin_emit_instruction(emit); 5823 emit_dword(emit, opcode0.value); 5824 emit_dword(emit, total_temps); 5825 end_emit_instruction(emit); 5826 } 5827 5828 /* Emit declarations for indexable temp arrays. Skip 0th entry since 5829 * it's unused. 5830 */ 5831 for (i = 1; i < emit->num_temp_arrays; i++) { 5832 unsigned num_temps = emit->temp_arrays[i].size; 5833 5834 if (num_temps > 0) { 5835 VGPU10OpcodeToken0 opcode0; 5836 5837 opcode0.value = 0; 5838 opcode0.opcodeType = VGPU10_OPCODE_DCL_INDEXABLE_TEMP; 5839 5840 begin_emit_instruction(emit); 5841 emit_dword(emit, opcode0.value); 5842 emit_dword(emit, i); /* which array */ 5843 emit_dword(emit, num_temps); 5844 emit_dword(emit, 4); /* num components */ 5845 end_emit_instruction(emit); 5846 5847 total_temps += num_temps; 5848 } 5849 } 5850 5851 /* Check that the grand total of all regular and indexed temps is 5852 * under the limit. 5853 */ 5854 check_register_index(emit, VGPU10_OPCODE_DCL_TEMPS, total_temps - 1); 5855 5856 return TRUE; 5857} 5858 5859 5860static boolean 5861emit_constant_declaration(struct svga_shader_emitter_v10 *emit) 5862{ 5863 VGPU10OpcodeToken0 opcode0; 5864 VGPU10OperandToken0 operand0; 5865 unsigned total_consts, i; 5866 5867 opcode0.value = 0; 5868 opcode0.opcodeType = VGPU10_OPCODE_DCL_CONSTANT_BUFFER; 5869 opcode0.accessPattern = VGPU10_CB_IMMEDIATE_INDEXED; 5870 /* XXX or, access pattern = VGPU10_CB_DYNAMIC_INDEXED */ 5871 5872 operand0.value = 0; 5873 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 5874 operand0.indexDimension = VGPU10_OPERAND_INDEX_2D; 5875 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 5876 operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 5877 operand0.operandType = VGPU10_OPERAND_TYPE_CONSTANT_BUFFER; 5878 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE; 5879 operand0.swizzleX = 0; 5880 operand0.swizzleY = 1; 5881 operand0.swizzleZ = 2; 5882 operand0.swizzleW = 3; 5883 5884 /** 5885 * Emit declaration for constant buffer [0]. We also allocate 5886 * room for the extra constants here. 5887 */ 5888 total_consts = emit->num_shader_consts[0]; 5889 5890 /* Now, allocate constant slots for the "extra" constants. 5891 * Note: it's critical that these extra constant locations 5892 * exactly match what's emitted by the "extra" constants code 5893 * in svga_state_constants.c 5894 */ 5895 5896 /* Vertex position scale/translation */ 5897 if (emit->vposition.need_prescale) { 5898 emit->vposition.prescale_cbuf_index = total_consts; 5899 total_consts += (2 * emit->vposition.num_prescale); 5900 } 5901 5902 if (emit->unit == PIPE_SHADER_VERTEX) { 5903 if (emit->key.vs.undo_viewport) { 5904 emit->vs.viewport_index = total_consts++; 5905 } 5906 if (emit->key.vs.need_vertex_id_bias) { 5907 emit->vs.vertex_id_bias_index = total_consts++; 5908 } 5909 } 5910 5911 /* user-defined clip planes */ 5912 if (emit->key.clip_plane_enable) { 5913 unsigned n = util_bitcount(emit->key.clip_plane_enable); 5914 assert(emit->unit != PIPE_SHADER_FRAGMENT && 5915 emit->unit != PIPE_SHADER_COMPUTE); 5916 for (i = 0; i < n; i++) { 5917 emit->clip_plane_const[i] = total_consts++; 5918 } 5919 } 5920 5921 for (i = 0; i < emit->num_samplers; i++) { 5922 5923 if (emit->key.tex[i].sampler_view) { 5924 /* Texcoord scale factors for RECT textures */ 5925 if (emit->key.tex[i].unnormalized) { 5926 emit->texcoord_scale_index[i] = total_consts++; 5927 } 5928 5929 /* Texture buffer sizes */ 5930 if (emit->key.tex[i].target == PIPE_BUFFER) { 5931 emit->texture_buffer_size_index[i] = total_consts++; 5932 } 5933 } 5934 } 5935 if (emit->key.image_size_used) { 5936 emit->image_size_index = total_consts; 5937 total_consts += emit->num_images; 5938 } 5939 5940 if (total_consts > 0) { 5941 if (total_consts > VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) { 5942 debug_printf("Warning: Too many constants [%u] declared in constant" 5943 " buffer 0. %u is the limit.\n", 5944 total_consts, 5945 VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT); 5946 total_consts = VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT; 5947 emit->register_overflow = TRUE; 5948 } 5949 begin_emit_instruction(emit); 5950 emit_dword(emit, opcode0.value); 5951 emit_dword(emit, operand0.value); 5952 emit_dword(emit, 0); /* which const buffer slot */ 5953 emit_dword(emit, total_consts); 5954 end_emit_instruction(emit); 5955 } 5956 5957 /* Declare remaining constant buffers (UBOs) */ 5958 5959 for (i = 1; i < ARRAY_SIZE(emit->num_shader_consts); i++) { 5960 if (emit->num_shader_consts[i] > 0) { 5961 if (emit->raw_bufs & (1 << i)) { 5962 /* UBO declared as srv raw buffer */ 5963 5964 VGPU10OpcodeToken0 opcode1; 5965 VGPU10OperandToken0 operand1; 5966 5967 opcode1.value = 0; 5968 opcode1.opcodeType = VGPU10_OPCODE_DCL_RESOURCE_RAW; 5969 opcode1.resourceDimension = VGPU10_RESOURCE_DIMENSION_UNKNOWN; 5970 5971 operand1.value = 0; 5972 operand1.numComponents = VGPU10_OPERAND_0_COMPONENT; 5973 operand1.operandType = VGPU10_OPERAND_TYPE_RESOURCE; 5974 operand1.indexDimension = VGPU10_OPERAND_INDEX_1D; 5975 operand1.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 5976 5977 begin_emit_instruction(emit); 5978 emit_dword(emit, opcode1.value); 5979 emit_dword(emit, operand1.value); 5980 emit_dword(emit, i + emit->raw_buf_srv_start_index); 5981 end_emit_instruction(emit); 5982 } 5983 else { 5984 5985 /* UBO declared as const buffer */ 5986 begin_emit_instruction(emit); 5987 emit_dword(emit, opcode0.value); 5988 emit_dword(emit, operand0.value); 5989 emit_dword(emit, i); /* which const buffer slot */ 5990 emit_dword(emit, emit->num_shader_consts[i]); 5991 end_emit_instruction(emit); 5992 } 5993 } 5994 } 5995 5996 return TRUE; 5997} 5998 5999 6000/** 6001 * Emit declarations for samplers. 6002 */ 6003static boolean 6004emit_sampler_declarations(struct svga_shader_emitter_v10 *emit) 6005{ 6006 unsigned i; 6007 6008 for (i = 0; i < emit->key.num_samplers; i++) { 6009 6010 VGPU10OpcodeToken0 opcode0; 6011 VGPU10OperandToken0 operand0; 6012 6013 opcode0.value = 0; 6014 opcode0.opcodeType = VGPU10_OPCODE_DCL_SAMPLER; 6015 opcode0.samplerMode = VGPU10_SAMPLER_MODE_DEFAULT; 6016 6017 operand0.value = 0; 6018 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; 6019 operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER; 6020 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 6021 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 6022 6023 begin_emit_instruction(emit); 6024 emit_dword(emit, opcode0.value); 6025 emit_dword(emit, operand0.value); 6026 emit_dword(emit, i); 6027 end_emit_instruction(emit); 6028 } 6029 6030 return TRUE; 6031} 6032 6033 6034/** 6035 * Translate PIPE_TEXTURE_x to VGPU10_RESOURCE_DIMENSION_x. 6036 */ 6037static unsigned 6038pipe_texture_to_resource_dimension(enum tgsi_texture_type target, 6039 unsigned num_samples, 6040 boolean is_array, 6041 boolean is_uav) 6042{ 6043 switch (target) { 6044 case PIPE_BUFFER: 6045 return VGPU10_RESOURCE_DIMENSION_BUFFER; 6046 case PIPE_TEXTURE_1D: 6047 return VGPU10_RESOURCE_DIMENSION_TEXTURE1D; 6048 case PIPE_TEXTURE_2D: 6049 return num_samples > 2 ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS : 6050 VGPU10_RESOURCE_DIMENSION_TEXTURE2D; 6051 case PIPE_TEXTURE_RECT: 6052 return VGPU10_RESOURCE_DIMENSION_TEXTURE2D; 6053 case PIPE_TEXTURE_3D: 6054 return VGPU10_RESOURCE_DIMENSION_TEXTURE3D; 6055 case PIPE_TEXTURE_CUBE: 6056 return VGPU10_RESOURCE_DIMENSION_TEXTURECUBE; 6057 case PIPE_TEXTURE_1D_ARRAY: 6058 return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY 6059 : VGPU10_RESOURCE_DIMENSION_TEXTURE1D; 6060 case PIPE_TEXTURE_2D_ARRAY: 6061 if (num_samples > 2 && is_array) 6062 return VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY; 6063 else if (is_array) 6064 return VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY; 6065 else 6066 return VGPU10_RESOURCE_DIMENSION_TEXTURE2D; 6067 case PIPE_TEXTURE_CUBE_ARRAY: 6068 return is_uav ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY : 6069 (is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY : 6070 VGPU10_RESOURCE_DIMENSION_TEXTURECUBE); 6071 default: 6072 assert(!"Unexpected resource type"); 6073 return VGPU10_RESOURCE_DIMENSION_TEXTURE2D; 6074 } 6075} 6076 6077 6078/** 6079 * Translate TGSI_TEXTURE_x to VGPU10_RESOURCE_DIMENSION_x. 6080 */ 6081static unsigned 6082tgsi_texture_to_resource_dimension(enum tgsi_texture_type target, 6083 unsigned num_samples, 6084 boolean is_array, 6085 boolean is_uav) 6086{ 6087 if (target == TGSI_TEXTURE_2D_MSAA && num_samples < 2) { 6088 target = TGSI_TEXTURE_2D; 6089 } 6090 else if (target == TGSI_TEXTURE_2D_ARRAY_MSAA && num_samples < 2) { 6091 target = TGSI_TEXTURE_2D_ARRAY; 6092 } 6093 6094 switch (target) { 6095 case TGSI_TEXTURE_BUFFER: 6096 return VGPU10_RESOURCE_DIMENSION_BUFFER; 6097 case TGSI_TEXTURE_1D: 6098 return VGPU10_RESOURCE_DIMENSION_TEXTURE1D; 6099 case TGSI_TEXTURE_2D: 6100 case TGSI_TEXTURE_RECT: 6101 return VGPU10_RESOURCE_DIMENSION_TEXTURE2D; 6102 case TGSI_TEXTURE_3D: 6103 return VGPU10_RESOURCE_DIMENSION_TEXTURE3D; 6104 case TGSI_TEXTURE_CUBE: 6105 case TGSI_TEXTURE_SHADOWCUBE: 6106 return is_uav ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY : 6107 VGPU10_RESOURCE_DIMENSION_TEXTURECUBE; 6108 case TGSI_TEXTURE_SHADOW1D: 6109 return VGPU10_RESOURCE_DIMENSION_TEXTURE1D; 6110 case TGSI_TEXTURE_SHADOW2D: 6111 case TGSI_TEXTURE_SHADOWRECT: 6112 return VGPU10_RESOURCE_DIMENSION_TEXTURE2D; 6113 case TGSI_TEXTURE_1D_ARRAY: 6114 case TGSI_TEXTURE_SHADOW1D_ARRAY: 6115 return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY 6116 : VGPU10_RESOURCE_DIMENSION_TEXTURE1D; 6117 case TGSI_TEXTURE_2D_ARRAY: 6118 case TGSI_TEXTURE_SHADOW2D_ARRAY: 6119 return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY 6120 : VGPU10_RESOURCE_DIMENSION_TEXTURE2D; 6121 case TGSI_TEXTURE_2D_MSAA: 6122 return VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS; 6123 case TGSI_TEXTURE_2D_ARRAY_MSAA: 6124 return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY 6125 : VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS; 6126 case TGSI_TEXTURE_CUBE_ARRAY: 6127 return is_uav ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY : 6128 (is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY : 6129 VGPU10_RESOURCE_DIMENSION_TEXTURECUBE); 6130 case TGSI_TEXTURE_SHADOWCUBE_ARRAY: 6131 return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY 6132 : VGPU10_RESOURCE_DIMENSION_TEXTURECUBE; 6133 default: 6134 assert(!"Unexpected resource type"); 6135 return VGPU10_RESOURCE_DIMENSION_TEXTURE2D; 6136 } 6137} 6138 6139 6140/** 6141 * Given a tgsi_return_type, return true iff it is an integer type. 6142 */ 6143static boolean 6144is_integer_type(enum tgsi_return_type type) 6145{ 6146 switch (type) { 6147 case TGSI_RETURN_TYPE_SINT: 6148 case TGSI_RETURN_TYPE_UINT: 6149 return TRUE; 6150 case TGSI_RETURN_TYPE_FLOAT: 6151 case TGSI_RETURN_TYPE_UNORM: 6152 case TGSI_RETURN_TYPE_SNORM: 6153 return FALSE; 6154 case TGSI_RETURN_TYPE_COUNT: 6155 default: 6156 assert(!"is_integer_type: Unknown tgsi_return_type"); 6157 return FALSE; 6158 } 6159} 6160 6161 6162/** 6163 * Emit declarations for resources. 6164 * XXX When we're sure that all TGSI shaders will be generated with 6165 * sampler view declarations (Ex: DCL SVIEW[n], 2D, UINT) we may 6166 * rework this code. 6167 */ 6168static boolean 6169emit_resource_declarations(struct svga_shader_emitter_v10 *emit) 6170{ 6171 unsigned i; 6172 6173 /* Emit resource decl for each sampler */ 6174 for (i = 0; i < emit->num_samplers; i++) { 6175 if (!(emit->info.samplers_declared & (1 << i))) 6176 continue; 6177 6178 VGPU10OpcodeToken0 opcode0; 6179 VGPU10OperandToken0 operand0; 6180 VGPU10ResourceReturnTypeToken return_type; 6181 VGPU10_RESOURCE_RETURN_TYPE rt; 6182 6183 opcode0.value = 0; 6184 opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE; 6185 if (emit->sampler_view[i] || !emit->key.tex[i].sampler_view) { 6186 opcode0.resourceDimension = 6187 tgsi_texture_to_resource_dimension(emit->sampler_target[i], 6188 emit->key.tex[i].num_samples, 6189 emit->key.tex[i].is_array, 6190 FALSE); 6191 } 6192 else { 6193 opcode0.resourceDimension = 6194 pipe_texture_to_resource_dimension(emit->key.tex[i].target, 6195 emit->key.tex[i].num_samples, 6196 emit->key.tex[i].is_array, 6197 FALSE); 6198 } 6199 opcode0.sampleCount = emit->key.tex[i].num_samples; 6200 operand0.value = 0; 6201 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; 6202 operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE; 6203 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 6204 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 6205 6206#if 1 6207 /* convert TGSI_RETURN_TYPE_x to VGPU10_RETURN_TYPE_x */ 6208 STATIC_ASSERT(VGPU10_RETURN_TYPE_UNORM == TGSI_RETURN_TYPE_UNORM + 1); 6209 STATIC_ASSERT(VGPU10_RETURN_TYPE_SNORM == TGSI_RETURN_TYPE_SNORM + 1); 6210 STATIC_ASSERT(VGPU10_RETURN_TYPE_SINT == TGSI_RETURN_TYPE_SINT + 1); 6211 STATIC_ASSERT(VGPU10_RETURN_TYPE_UINT == TGSI_RETURN_TYPE_UINT + 1); 6212 STATIC_ASSERT(VGPU10_RETURN_TYPE_FLOAT == TGSI_RETURN_TYPE_FLOAT + 1); 6213 assert(emit->sampler_return_type[i] <= TGSI_RETURN_TYPE_FLOAT); 6214 if (emit->sampler_view[i] || !emit->key.tex[i].sampler_view) { 6215 rt = emit->sampler_return_type[i] + 1; 6216 } 6217 else { 6218 rt = emit->key.tex[i].sampler_return_type; 6219 } 6220#else 6221 switch (emit->sampler_return_type[i]) { 6222 case TGSI_RETURN_TYPE_UNORM: rt = VGPU10_RETURN_TYPE_UNORM; break; 6223 case TGSI_RETURN_TYPE_SNORM: rt = VGPU10_RETURN_TYPE_SNORM; break; 6224 case TGSI_RETURN_TYPE_SINT: rt = VGPU10_RETURN_TYPE_SINT; break; 6225 case TGSI_RETURN_TYPE_UINT: rt = VGPU10_RETURN_TYPE_UINT; break; 6226 case TGSI_RETURN_TYPE_FLOAT: rt = VGPU10_RETURN_TYPE_FLOAT; break; 6227 case TGSI_RETURN_TYPE_COUNT: 6228 default: 6229 rt = VGPU10_RETURN_TYPE_FLOAT; 6230 assert(!"emit_resource_declarations: Unknown tgsi_return_type"); 6231 } 6232#endif 6233 6234 return_type.value = 0; 6235 return_type.component0 = rt; 6236 return_type.component1 = rt; 6237 return_type.component2 = rt; 6238 return_type.component3 = rt; 6239 6240 begin_emit_instruction(emit); 6241 emit_dword(emit, opcode0.value); 6242 emit_dword(emit, operand0.value); 6243 emit_dword(emit, i); 6244 emit_dword(emit, return_type.value); 6245 end_emit_instruction(emit); 6246 } 6247 6248 return TRUE; 6249} 6250 6251 6252/** 6253 * Emit instruction to declare uav for the shader image 6254 */ 6255static void 6256emit_image_declarations(struct svga_shader_emitter_v10 *emit) 6257{ 6258 unsigned i = 0; 6259 unsigned unit = 0; 6260 unsigned uav_mask = 0; 6261 6262 /* Emit uav decl for each image */ 6263 for (i = 0; i < emit->num_images; i++, unit++) { 6264 6265 /* Find the unit index of the next declared image. 6266 */ 6267 while (!(emit->image_mask & (1 << unit))) { 6268 unit++; 6269 } 6270 6271 VGPU10OpcodeToken0 opcode0; 6272 VGPU10OperandToken0 operand0; 6273 VGPU10ResourceReturnTypeToken return_type; 6274 6275 /* If the corresponding uav for the image is already declared, 6276 * skip this image declaration. 6277 */ 6278 if (uav_mask & (1 << emit->key.images[unit].uav_index)) 6279 continue; 6280 6281 opcode0.value = 0; 6282 opcode0.opcodeType = VGPU10_OPCODE_DCL_UAV_TYPED; 6283 opcode0.uavResourceDimension = 6284 tgsi_texture_to_resource_dimension(emit->image[unit].Resource, 6285 0, emit->key.images[unit].is_array, 6286 TRUE); 6287 6288 if (emit->key.images[unit].is_single_layer && 6289 emit->key.images[unit].resource_target == PIPE_TEXTURE_3D) { 6290 opcode0.uavResourceDimension = VGPU10_RESOURCE_DIMENSION_TEXTURE3D; 6291 } 6292 6293 /* Declare the uav as global coherent if the shader includes memory 6294 * barrier instructions. 6295 */ 6296 opcode0.globallyCoherent = 6297 (emit->info.opcode_count[TGSI_OPCODE_MEMBAR] > 0) ? 1 : 0; 6298 6299 operand0.value = 0; 6300 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; 6301 operand0.operandType = VGPU10_OPERAND_TYPE_UAV; 6302 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 6303 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 6304 6305 return_type.value = 0; 6306 return_type.component0 = 6307 return_type.component1 = 6308 return_type.component2 = 6309 return_type.component3 = emit->key.images[unit].return_type + 1; 6310 6311 assert(emit->key.images[unit].uav_index != SVGA3D_INVALID_ID); 6312 begin_emit_instruction(emit); 6313 emit_dword(emit, opcode0.value); 6314 emit_dword(emit, operand0.value); 6315 emit_dword(emit, emit->key.images[unit].uav_index); 6316 emit_dword(emit, return_type.value); 6317 end_emit_instruction(emit); 6318 6319 /* Mark the uav is already declared */ 6320 uav_mask |= 1 << emit->key.images[unit].uav_index; 6321 } 6322 6323 emit->uav_declared |= uav_mask; 6324} 6325 6326 6327/** 6328 * Emit instruction to declare uav for the shader buffer 6329 */ 6330static void 6331emit_shader_buf_declarations(struct svga_shader_emitter_v10 *emit) 6332{ 6333 unsigned i; 6334 unsigned uav_mask = 0; 6335 6336 /* Emit uav decl for each shader buffer */ 6337 for (i = 0; i < emit->num_shader_bufs; i++) { 6338 VGPU10OpcodeToken0 opcode0; 6339 VGPU10OperandToken0 operand0; 6340 6341 /* If the corresponding uav for the shader buf is already declared, 6342 * skip this shader buffer declaration. 6343 */ 6344 if (uav_mask & (1 << emit->key.shader_buf_uav_index[i])) 6345 continue; 6346 6347 opcode0.value = 0; 6348 opcode0.opcodeType = VGPU10_OPCODE_DCL_UAV_RAW; 6349 6350 /* Declare the uav as global coherent if the shader includes memory 6351 * barrier instructions. 6352 */ 6353 opcode0.globallyCoherent = 6354 (emit->info.opcode_count[TGSI_OPCODE_MEMBAR] > 0) ? 1 : 0; 6355 6356 operand0.value = 0; 6357 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; 6358 operand0.operandType = VGPU10_OPERAND_TYPE_UAV; 6359 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 6360 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 6361 6362 assert(emit->key.shader_buf_uav_index[i] != SVGA3D_INVALID_ID); 6363 begin_emit_instruction(emit); 6364 emit_dword(emit, opcode0.value); 6365 emit_dword(emit, operand0.value); 6366 emit_dword(emit, emit->key.shader_buf_uav_index[i]); 6367 end_emit_instruction(emit); 6368 6369 /* Mark the uav is already declared */ 6370 uav_mask |= 1 << emit->key.shader_buf_uav_index[i]; 6371 } 6372 6373 emit->uav_declared |= uav_mask; 6374} 6375 6376 6377/** 6378 * Emit instruction to declare thread group shared memory(tgsm) for shared memory 6379 */ 6380static void 6381emit_memory_declarations(struct svga_shader_emitter_v10 *emit) 6382{ 6383 if (emit->cs.shared_memory_declared) { 6384 VGPU10OpcodeToken0 opcode0; 6385 VGPU10OperandToken0 operand0; 6386 6387 opcode0.value = 0; 6388 opcode0.opcodeType = VGPU10_OPCODE_DCL_TGSM_RAW; 6389 6390 /* Declare the uav as global coherent if the shader includes memory 6391 * barrier instructions. 6392 */ 6393 opcode0.globallyCoherent = 6394 (emit->info.opcode_count[TGSI_OPCODE_MEMBAR] > 0) ? 1 : 0; 6395 6396 operand0.value = 0; 6397 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; 6398 operand0.operandType = VGPU10_OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY; 6399 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 6400 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 6401 6402 begin_emit_instruction(emit); 6403 emit_dword(emit, opcode0.value); 6404 emit_dword(emit, operand0.value); 6405 6406 /* Current state tracker only declares one shared memory for GLSL. 6407 * Use index 0 for this shared memory. 6408 */ 6409 emit_dword(emit, 0); 6410 emit_dword(emit, emit->key.cs.mem_size); /* byte Count */ 6411 end_emit_instruction(emit); 6412 } 6413} 6414 6415 6416/** 6417 * Emit instruction to declare uav for atomic buffers 6418 */ 6419static void 6420emit_atomic_buf_declarations(struct svga_shader_emitter_v10 *emit) 6421{ 6422 unsigned atomic_bufs_mask = emit->atomic_bufs_mask; 6423 unsigned uav_mask = 0; 6424 6425 /* Emit uav decl for each atomic buffer */ 6426 while (atomic_bufs_mask) { 6427 unsigned buf_index = u_bit_scan(&atomic_bufs_mask); 6428 unsigned uav_index = emit->key.atomic_buf_uav_index[buf_index]; 6429 6430 /* If the corresponding uav for the shader buf is already declared, 6431 * skip this shader buffer declaration. 6432 */ 6433 if (uav_mask & (1 << uav_index)) 6434 continue; 6435 6436 VGPU10OpcodeToken0 opcode0; 6437 VGPU10OperandToken0 operand0; 6438 6439 assert(uav_index != SVGA3D_INVALID_ID); 6440 6441 opcode0.value = 0; 6442 opcode0.opcodeType = VGPU10_OPCODE_DCL_UAV_RAW; 6443 opcode0.uavResourceDimension = VGPU10_RESOURCE_DIMENSION_BUFFER; 6444 6445 /* Declare the uav as global coherent if the shader includes memory 6446 * barrier instructions. 6447 */ 6448 opcode0.globallyCoherent = 6449 (emit->info.opcode_count[TGSI_OPCODE_MEMBAR] > 0) ? 1 : 0; 6450 opcode0.uavHasCounter = 1; 6451 6452 operand0.value = 0; 6453 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; 6454 operand0.operandType = VGPU10_OPERAND_TYPE_UAV; 6455 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 6456 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 6457 6458 begin_emit_instruction(emit); 6459 emit_dword(emit, opcode0.value); 6460 emit_dword(emit, operand0.value); 6461 emit_dword(emit, uav_index); 6462 end_emit_instruction(emit); 6463 6464 /* Mark the uav is already declared */ 6465 uav_mask |= 1 << uav_index; 6466 } 6467 6468 emit->uav_declared |= uav_mask; 6469 6470 /* Allocate immediates to be used for index to the atomic buffers */ 6471 unsigned j = 0; 6472 for (unsigned i = 0; i <= emit->num_atomic_bufs / 4; i++, j+=4) { 6473 alloc_immediate_int4(emit, j+0, j+1, j+2, j+3); 6474 } 6475 6476 /* Allocate immediates for the atomic counter index */ 6477 for (; j <= emit->max_atomic_counter_index; j+=4) { 6478 alloc_immediate_int4(emit, j+0, j+1, j+2, j+3); 6479 } 6480} 6481 6482 6483/** 6484 * Emit instruction with n=1, 2 or 3 source registers. 6485 */ 6486static void 6487emit_instruction_opn(struct svga_shader_emitter_v10 *emit, 6488 unsigned opcode, 6489 const struct tgsi_full_dst_register *dst, 6490 const struct tgsi_full_src_register *src1, 6491 const struct tgsi_full_src_register *src2, 6492 const struct tgsi_full_src_register *src3, 6493 boolean saturate, bool precise) 6494{ 6495 begin_emit_instruction(emit); 6496 emit_opcode_precise(emit, opcode, saturate, precise); 6497 emit_dst_register(emit, dst); 6498 emit_src_register(emit, src1); 6499 if (src2) { 6500 emit_src_register(emit, src2); 6501 } 6502 if (src3) { 6503 emit_src_register(emit, src3); 6504 } 6505 end_emit_instruction(emit); 6506} 6507 6508static void 6509emit_instruction_op1(struct svga_shader_emitter_v10 *emit, 6510 unsigned opcode, 6511 const struct tgsi_full_dst_register *dst, 6512 const struct tgsi_full_src_register *src) 6513{ 6514 emit_instruction_opn(emit, opcode, dst, src, NULL, NULL, FALSE, FALSE); 6515} 6516 6517static void 6518emit_instruction_op2(struct svga_shader_emitter_v10 *emit, 6519 VGPU10_OPCODE_TYPE opcode, 6520 const struct tgsi_full_dst_register *dst, 6521 const struct tgsi_full_src_register *src1, 6522 const struct tgsi_full_src_register *src2) 6523{ 6524 emit_instruction_opn(emit, opcode, dst, src1, src2, NULL, FALSE, FALSE); 6525} 6526 6527static void 6528emit_instruction_op3(struct svga_shader_emitter_v10 *emit, 6529 VGPU10_OPCODE_TYPE opcode, 6530 const struct tgsi_full_dst_register *dst, 6531 const struct tgsi_full_src_register *src1, 6532 const struct tgsi_full_src_register *src2, 6533 const struct tgsi_full_src_register *src3) 6534{ 6535 emit_instruction_opn(emit, opcode, dst, src1, src2, src3, FALSE, FALSE); 6536} 6537 6538static void 6539emit_instruction_op0(struct svga_shader_emitter_v10 *emit, 6540 VGPU10_OPCODE_TYPE opcode) 6541{ 6542 begin_emit_instruction(emit); 6543 emit_opcode(emit, opcode, FALSE); 6544 end_emit_instruction(emit); 6545} 6546 6547/** 6548 * Tessellation inner/outer levels needs to be store into its 6549 * appropriate registers depending on prim_mode. 6550 */ 6551static void 6552store_tesslevels(struct svga_shader_emitter_v10 *emit) 6553{ 6554 int i; 6555 6556 /* tessellation levels are required input/out in hull shader. 6557 * emitting the inner/outer tessellation levels, either from 6558 * values provided in tcs or fallback default values which is 1.0 6559 */ 6560 if (emit->key.tcs.prim_mode == PIPE_PRIM_QUADS) { 6561 struct tgsi_full_src_register temp_src; 6562 6563 if (emit->tcs.inner.tgsi_index != INVALID_INDEX) 6564 temp_src = make_src_temp_reg(emit->tcs.inner.temp_index); 6565 else 6566 temp_src = make_immediate_reg_float(emit, 1.0f); 6567 6568 for (i = 0; i < 2; i++) { 6569 struct tgsi_full_src_register src = 6570 scalar_src(&temp_src, TGSI_SWIZZLE_X + i); 6571 struct tgsi_full_dst_register dst = 6572 make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.inner.out_index + i); 6573 dst = writemask_dst(&dst, TGSI_WRITEMASK_X); 6574 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src); 6575 } 6576 6577 if (emit->tcs.outer.tgsi_index != INVALID_INDEX) 6578 temp_src = make_src_temp_reg(emit->tcs.outer.temp_index); 6579 else 6580 temp_src = make_immediate_reg_float(emit, 1.0f); 6581 6582 for (i = 0; i < 4; i++) { 6583 struct tgsi_full_src_register src = 6584 scalar_src(&temp_src, TGSI_SWIZZLE_X + i); 6585 struct tgsi_full_dst_register dst = 6586 make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.outer.out_index + i); 6587 dst = writemask_dst(&dst, TGSI_WRITEMASK_X); 6588 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src); 6589 } 6590 } 6591 else if (emit->key.tcs.prim_mode == PIPE_PRIM_TRIANGLES) { 6592 struct tgsi_full_src_register temp_src; 6593 6594 if (emit->tcs.inner.tgsi_index != INVALID_INDEX) 6595 temp_src = make_src_temp_reg(emit->tcs.inner.temp_index); 6596 else 6597 temp_src = make_immediate_reg_float(emit, 1.0f); 6598 6599 struct tgsi_full_src_register src = 6600 scalar_src(&temp_src, TGSI_SWIZZLE_X); 6601 struct tgsi_full_dst_register dst = 6602 make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.inner.out_index); 6603 dst = writemask_dst(&dst, TGSI_WRITEMASK_X); 6604 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src); 6605 6606 if (emit->tcs.outer.tgsi_index != INVALID_INDEX) 6607 temp_src = make_src_temp_reg(emit->tcs.outer.temp_index); 6608 else 6609 temp_src = make_immediate_reg_float(emit, 1.0f); 6610 6611 for (i = 0; i < 3; i++) { 6612 struct tgsi_full_src_register src = 6613 scalar_src(&temp_src, TGSI_SWIZZLE_X + i); 6614 struct tgsi_full_dst_register dst = 6615 make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.outer.out_index + i); 6616 dst = writemask_dst(&dst, TGSI_WRITEMASK_X); 6617 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src); 6618 } 6619 } 6620 else if (emit->key.tcs.prim_mode == PIPE_PRIM_LINES) { 6621 if (emit->tcs.outer.tgsi_index != INVALID_INDEX) { 6622 struct tgsi_full_src_register temp_src = 6623 make_src_temp_reg(emit->tcs.outer.temp_index); 6624 for (i = 0; i < 2; i++) { 6625 struct tgsi_full_src_register src = 6626 scalar_src(&temp_src, TGSI_SWIZZLE_X + i); 6627 struct tgsi_full_dst_register dst = 6628 make_dst_reg(TGSI_FILE_OUTPUT, 6629 emit->tcs.outer.out_index + i); 6630 dst = writemask_dst(&dst, TGSI_WRITEMASK_X); 6631 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src); 6632 } 6633 } 6634 } 6635 else { 6636 debug_printf("Unsupported primitive type"); 6637 } 6638} 6639 6640 6641/** 6642 * Emit the actual clip distance instructions to be used for clipping 6643 * by copying the clip distance from the temporary registers to the 6644 * CLIPDIST registers written with the enabled planes mask. 6645 * Also copy the clip distance from the temporary to the clip distance 6646 * shadow copy register which will be referenced by the input shader 6647 */ 6648static void 6649emit_clip_distance_instructions(struct svga_shader_emitter_v10 *emit) 6650{ 6651 struct tgsi_full_src_register tmp_clip_dist_src; 6652 struct tgsi_full_dst_register clip_dist_dst; 6653 6654 unsigned i; 6655 unsigned clip_plane_enable = emit->key.clip_plane_enable; 6656 unsigned clip_dist_tmp_index = emit->clip_dist_tmp_index; 6657 int num_written_clipdist = emit->info.num_written_clipdistance; 6658 6659 assert(emit->clip_dist_out_index != INVALID_INDEX); 6660 assert(emit->clip_dist_tmp_index != INVALID_INDEX); 6661 6662 /** 6663 * Temporary reset the temporary clip dist register index so 6664 * that the copy to the real clip dist register will not 6665 * attempt to copy to the temporary register again 6666 */ 6667 emit->clip_dist_tmp_index = INVALID_INDEX; 6668 6669 for (i = 0; i < 2 && num_written_clipdist > 0; i++, num_written_clipdist-=4) { 6670 6671 tmp_clip_dist_src = make_src_temp_reg(clip_dist_tmp_index + i); 6672 6673 /** 6674 * copy to the shadow copy for use by varying variable and 6675 * stream output. All clip distances 6676 * will be written regardless of the enabled clipping planes. 6677 */ 6678 clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT, 6679 emit->clip_dist_so_index + i); 6680 6681 /* MOV clip_dist_so, tmp_clip_dist */ 6682 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst, 6683 &tmp_clip_dist_src); 6684 6685 /** 6686 * copy those clip distances to enabled clipping planes 6687 * to CLIPDIST registers for clipping 6688 */ 6689 if (clip_plane_enable & 0xf) { 6690 clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT, 6691 emit->clip_dist_out_index + i); 6692 clip_dist_dst = writemask_dst(&clip_dist_dst, clip_plane_enable & 0xf); 6693 6694 /* MOV CLIPDIST, tmp_clip_dist */ 6695 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst, 6696 &tmp_clip_dist_src); 6697 } 6698 /* four clip planes per clip register */ 6699 clip_plane_enable >>= 4; 6700 } 6701 /** 6702 * set the temporary clip dist register index back to the 6703 * temporary index for the next vertex 6704 */ 6705 emit->clip_dist_tmp_index = clip_dist_tmp_index; 6706} 6707 6708/* Declare clip distance output registers for user-defined clip planes 6709 * or the TGSI_CLIPVERTEX output. 6710 */ 6711static void 6712emit_clip_distance_declarations(struct svga_shader_emitter_v10 *emit) 6713{ 6714 unsigned num_clip_planes = util_bitcount(emit->key.clip_plane_enable); 6715 unsigned index = emit->num_outputs; 6716 unsigned plane_mask; 6717 6718 assert(emit->unit != PIPE_SHADER_FRAGMENT); 6719 assert(num_clip_planes <= 8); 6720 6721 if (emit->clip_mode != CLIP_LEGACY && 6722 emit->clip_mode != CLIP_VERTEX) { 6723 return; 6724 } 6725 6726 if (num_clip_planes == 0) 6727 return; 6728 6729 /* Convert clip vertex to clip distances only in the last vertex stage */ 6730 if (!emit->key.last_vertex_stage) 6731 return; 6732 6733 /* Declare one or two clip output registers. The number of components 6734 * in the mask reflects the number of clip planes. For example, if 5 6735 * clip planes are needed, we'll declare outputs similar to: 6736 * dcl_output_siv o2.xyzw, clip_distance 6737 * dcl_output_siv o3.x, clip_distance 6738 */ 6739 emit->clip_dist_out_index = index; /* save the starting clip dist reg index */ 6740 6741 plane_mask = (1 << num_clip_planes) - 1; 6742 if (plane_mask & 0xf) { 6743 unsigned cmask = plane_mask & VGPU10_OPERAND_4_COMPONENT_MASK_ALL; 6744 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index, 6745 VGPU10_NAME_CLIP_DISTANCE, cmask, TRUE, 6746 SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE); 6747 emit->num_outputs++; 6748 } 6749 if (plane_mask & 0xf0) { 6750 unsigned cmask = (plane_mask >> 4) & VGPU10_OPERAND_4_COMPONENT_MASK_ALL; 6751 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index + 1, 6752 VGPU10_NAME_CLIP_DISTANCE, cmask, TRUE, 6753 SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE); 6754 emit->num_outputs++; 6755 } 6756} 6757 6758 6759/** 6760 * Emit the instructions for writing to the clip distance registers 6761 * to handle legacy/automatic clip planes. 6762 * For each clip plane, the distance is the dot product of the vertex 6763 * position (found in TEMP[vpos_tmp_index]) and the clip plane coefficients. 6764 * This is not used when the shader has an explicit CLIPVERTEX or CLIPDISTANCE 6765 * output registers already declared. 6766 */ 6767static void 6768emit_clip_distance_from_vpos(struct svga_shader_emitter_v10 *emit, 6769 unsigned vpos_tmp_index) 6770{ 6771 unsigned i, num_clip_planes = util_bitcount(emit->key.clip_plane_enable); 6772 6773 assert(emit->clip_mode == CLIP_LEGACY); 6774 assert(num_clip_planes <= 8); 6775 6776 assert(emit->unit == PIPE_SHADER_VERTEX || 6777 emit->unit == PIPE_SHADER_GEOMETRY || 6778 emit->unit == PIPE_SHADER_TESS_EVAL); 6779 6780 for (i = 0; i < num_clip_planes; i++) { 6781 struct tgsi_full_dst_register dst; 6782 struct tgsi_full_src_register plane_src, vpos_src; 6783 unsigned reg_index = emit->clip_dist_out_index + i / 4; 6784 unsigned comp = i % 4; 6785 unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp; 6786 6787 /* create dst, src regs */ 6788 dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index); 6789 dst = writemask_dst(&dst, writemask); 6790 6791 plane_src = make_src_const_reg(emit->clip_plane_const[i]); 6792 vpos_src = make_src_temp_reg(vpos_tmp_index); 6793 6794 /* DP4 clip_dist, plane, vpos */ 6795 emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst, 6796 &plane_src, &vpos_src); 6797 } 6798} 6799 6800 6801/** 6802 * Emit the instructions for computing the clip distance results from 6803 * the clip vertex temporary. 6804 * For each clip plane, the distance is the dot product of the clip vertex 6805 * position (found in a temp reg) and the clip plane coefficients. 6806 */ 6807static void 6808emit_clip_vertex_instructions(struct svga_shader_emitter_v10 *emit) 6809{ 6810 const unsigned num_clip = util_bitcount(emit->key.clip_plane_enable); 6811 unsigned i; 6812 struct tgsi_full_dst_register dst; 6813 struct tgsi_full_src_register clipvert_src; 6814 const unsigned clip_vertex_tmp = emit->clip_vertex_tmp_index; 6815 6816 assert(emit->unit == PIPE_SHADER_VERTEX || 6817 emit->unit == PIPE_SHADER_GEOMETRY || 6818 emit->unit == PIPE_SHADER_TESS_EVAL); 6819 6820 assert(emit->clip_mode == CLIP_VERTEX); 6821 6822 clipvert_src = make_src_temp_reg(clip_vertex_tmp); 6823 6824 for (i = 0; i < num_clip; i++) { 6825 struct tgsi_full_src_register plane_src; 6826 unsigned reg_index = emit->clip_dist_out_index + i / 4; 6827 unsigned comp = i % 4; 6828 unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp; 6829 6830 /* create dst, src regs */ 6831 dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index); 6832 dst = writemask_dst(&dst, writemask); 6833 6834 plane_src = make_src_const_reg(emit->clip_plane_const[i]); 6835 6836 /* DP4 clip_dist, plane, vpos */ 6837 emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst, 6838 &plane_src, &clipvert_src); 6839 } 6840 6841 /* copy temporary clip vertex register to the clip vertex register */ 6842 6843 assert(emit->clip_vertex_out_index != INVALID_INDEX); 6844 6845 /** 6846 * temporary reset the temporary clip vertex register index so 6847 * that copy to the clip vertex register will not attempt 6848 * to copy to the temporary register again 6849 */ 6850 emit->clip_vertex_tmp_index = INVALID_INDEX; 6851 6852 /* MOV clip_vertex, clip_vertex_tmp */ 6853 dst = make_dst_reg(TGSI_FILE_OUTPUT, emit->clip_vertex_out_index); 6854 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 6855 &dst, &clipvert_src); 6856 6857 /** 6858 * set the temporary clip vertex register index back to the 6859 * temporary index for the next vertex 6860 */ 6861 emit->clip_vertex_tmp_index = clip_vertex_tmp; 6862} 6863 6864/** 6865 * Emit code to convert RGBA to BGRA 6866 */ 6867static void 6868emit_swap_r_b(struct svga_shader_emitter_v10 *emit, 6869 const struct tgsi_full_dst_register *dst, 6870 const struct tgsi_full_src_register *src) 6871{ 6872 struct tgsi_full_src_register bgra_src = 6873 swizzle_src(src, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X, TGSI_SWIZZLE_W); 6874 6875 begin_emit_instruction(emit); 6876 emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE); 6877 emit_dst_register(emit, dst); 6878 emit_src_register(emit, &bgra_src); 6879 end_emit_instruction(emit); 6880} 6881 6882 6883/** Convert from 10_10_10_2 normalized to 10_10_10_2_snorm */ 6884static void 6885emit_puint_to_snorm(struct svga_shader_emitter_v10 *emit, 6886 const struct tgsi_full_dst_register *dst, 6887 const struct tgsi_full_src_register *src) 6888{ 6889 struct tgsi_full_src_register half = make_immediate_reg_float(emit, 0.5f); 6890 struct tgsi_full_src_register two = 6891 make_immediate_reg_float4(emit, 2.0f, 2.0f, 2.0f, 3.0f); 6892 struct tgsi_full_src_register neg_two = 6893 make_immediate_reg_float4(emit, -2.0f, -2.0f, -2.0f, -1.66666f); 6894 6895 unsigned val_tmp = get_temp_index(emit); 6896 struct tgsi_full_dst_register val_dst = make_dst_temp_reg(val_tmp); 6897 struct tgsi_full_src_register val_src = make_src_temp_reg(val_tmp); 6898 6899 unsigned bias_tmp = get_temp_index(emit); 6900 struct tgsi_full_dst_register bias_dst = make_dst_temp_reg(bias_tmp); 6901 struct tgsi_full_src_register bias_src = make_src_temp_reg(bias_tmp); 6902 6903 /* val = src * 2.0 */ 6904 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &val_dst, src, &two); 6905 6906 /* bias = src > 0.5 */ 6907 emit_instruction_op2(emit, VGPU10_OPCODE_GE, &bias_dst, src, &half); 6908 6909 /* bias = bias & -2.0 */ 6910 emit_instruction_op2(emit, VGPU10_OPCODE_AND, &bias_dst, 6911 &bias_src, &neg_two); 6912 6913 /* dst = val + bias */ 6914 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, dst, 6915 &val_src, &bias_src); 6916 6917 free_temp_indexes(emit); 6918} 6919 6920 6921/** Convert from 10_10_10_2_unorm to 10_10_10_2_uscaled */ 6922static void 6923emit_puint_to_uscaled(struct svga_shader_emitter_v10 *emit, 6924 const struct tgsi_full_dst_register *dst, 6925 const struct tgsi_full_src_register *src) 6926{ 6927 struct tgsi_full_src_register scale = 6928 make_immediate_reg_float4(emit, 1023.0f, 1023.0f, 1023.0f, 3.0f); 6929 6930 /* dst = src * scale */ 6931 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, dst, src, &scale); 6932} 6933 6934 6935/** Convert from R32_UINT to 10_10_10_2_sscaled */ 6936static void 6937emit_puint_to_sscaled(struct svga_shader_emitter_v10 *emit, 6938 const struct tgsi_full_dst_register *dst, 6939 const struct tgsi_full_src_register *src) 6940{ 6941 struct tgsi_full_src_register lshift = 6942 make_immediate_reg_int4(emit, 22, 12, 2, 0); 6943 struct tgsi_full_src_register rshift = 6944 make_immediate_reg_int4(emit, 22, 22, 22, 30); 6945 6946 struct tgsi_full_src_register src_xxxx = scalar_src(src, TGSI_SWIZZLE_X); 6947 6948 unsigned tmp = get_temp_index(emit); 6949 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 6950 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 6951 6952 /* 6953 * r = (pixel << 22) >> 22; # signed int in [511, -512] 6954 * g = (pixel << 12) >> 22; # signed int in [511, -512] 6955 * b = (pixel << 2) >> 22; # signed int in [511, -512] 6956 * a = (pixel << 0) >> 30; # signed int in [1, -2] 6957 * dst = i_to_f(r,g,b,a); # convert to float 6958 */ 6959 emit_instruction_op2(emit, VGPU10_OPCODE_ISHL, &tmp_dst, 6960 &src_xxxx, &lshift); 6961 emit_instruction_op2(emit, VGPU10_OPCODE_ISHR, &tmp_dst, 6962 &tmp_src, &rshift); 6963 emit_instruction_op1(emit, VGPU10_OPCODE_ITOF, dst, &tmp_src); 6964 6965 free_temp_indexes(emit); 6966} 6967 6968 6969/** 6970 * Emit code for TGSI_OPCODE_ARL or TGSI_OPCODE_UARL instruction. 6971 */ 6972static boolean 6973emit_arl_uarl(struct svga_shader_emitter_v10 *emit, 6974 const struct tgsi_full_instruction *inst) 6975{ 6976 unsigned index = inst->Dst[0].Register.Index; 6977 struct tgsi_full_dst_register dst; 6978 VGPU10_OPCODE_TYPE opcode; 6979 6980 assert(index < MAX_VGPU10_ADDR_REGS); 6981 dst = make_dst_temp_reg(emit->address_reg_index[index]); 6982 dst = writemask_dst(&dst, inst->Dst[0].Register.WriteMask); 6983 6984 /* ARL dst, s0 6985 * Translates into: 6986 * FTOI address_tmp, s0 6987 * 6988 * UARL dst, s0 6989 * Translates into: 6990 * MOV address_tmp, s0 6991 */ 6992 if (inst->Instruction.Opcode == TGSI_OPCODE_ARL) 6993 opcode = VGPU10_OPCODE_FTOI; 6994 else 6995 opcode = VGPU10_OPCODE_MOV; 6996 6997 emit_instruction_op1(emit, opcode, &dst, &inst->Src[0]); 6998 6999 return TRUE; 7000} 7001 7002 7003/** 7004 * Emit code for TGSI_OPCODE_CAL instruction. 7005 */ 7006static boolean 7007emit_cal(struct svga_shader_emitter_v10 *emit, 7008 const struct tgsi_full_instruction *inst) 7009{ 7010 unsigned label = inst->Label.Label; 7011 VGPU10OperandToken0 operand; 7012 operand.value = 0; 7013 operand.operandType = VGPU10_OPERAND_TYPE_LABEL; 7014 7015 begin_emit_instruction(emit); 7016 emit_dword(emit, operand.value); 7017 emit_dword(emit, label); 7018 end_emit_instruction(emit); 7019 7020 return TRUE; 7021} 7022 7023 7024/** 7025 * Emit code for TGSI_OPCODE_IABS instruction. 7026 */ 7027static boolean 7028emit_iabs(struct svga_shader_emitter_v10 *emit, 7029 const struct tgsi_full_instruction *inst) 7030{ 7031 /* dst.x = (src0.x < 0) ? -src0.x : src0.x 7032 * dst.y = (src0.y < 0) ? -src0.y : src0.y 7033 * dst.z = (src0.z < 0) ? -src0.z : src0.z 7034 * dst.w = (src0.w < 0) ? -src0.w : src0.w 7035 * 7036 * Translates into 7037 * IMAX dst, src, neg(src) 7038 */ 7039 struct tgsi_full_src_register neg_src = negate_src(&inst->Src[0]); 7040 emit_instruction_op2(emit, VGPU10_OPCODE_IMAX, &inst->Dst[0], 7041 &inst->Src[0], &neg_src); 7042 7043 return TRUE; 7044} 7045 7046 7047/** 7048 * Emit code for TGSI_OPCODE_CMP instruction. 7049 */ 7050static boolean 7051emit_cmp(struct svga_shader_emitter_v10 *emit, 7052 const struct tgsi_full_instruction *inst) 7053{ 7054 /* dst.x = (src0.x < 0) ? src1.x : src2.x 7055 * dst.y = (src0.y < 0) ? src1.y : src2.y 7056 * dst.z = (src0.z < 0) ? src1.z : src2.z 7057 * dst.w = (src0.w < 0) ? src1.w : src2.w 7058 * 7059 * Translates into 7060 * LT tmp, src0, 0.0 7061 * MOVC dst, tmp, src1, src2 7062 */ 7063 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 7064 unsigned tmp = get_temp_index(emit); 7065 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 7066 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 7067 7068 emit_instruction_opn(emit, VGPU10_OPCODE_LT, &tmp_dst, 7069 &inst->Src[0], &zero, NULL, FALSE, 7070 inst->Instruction.Precise); 7071 emit_instruction_opn(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], 7072 &tmp_src, &inst->Src[1], &inst->Src[2], 7073 inst->Instruction.Saturate, FALSE); 7074 7075 free_temp_indexes(emit); 7076 7077 return TRUE; 7078} 7079 7080 7081/** 7082 * Emit code for TGSI_OPCODE_DST instruction. 7083 */ 7084static boolean 7085emit_dst(struct svga_shader_emitter_v10 *emit, 7086 const struct tgsi_full_instruction *inst) 7087{ 7088 /* 7089 * dst.x = 1 7090 * dst.y = src0.y * src1.y 7091 * dst.z = src0.z 7092 * dst.w = src1.w 7093 */ 7094 7095 struct tgsi_full_src_register s0_yyyy = 7096 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y); 7097 struct tgsi_full_src_register s0_zzzz = 7098 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Z); 7099 struct tgsi_full_src_register s1_yyyy = 7100 scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y); 7101 struct tgsi_full_src_register s1_wwww = 7102 scalar_src(&inst->Src[1], TGSI_SWIZZLE_W); 7103 7104 /* 7105 * If dst and either src0 and src1 are the same we need 7106 * to create a temporary for it and insert a extra move. 7107 */ 7108 unsigned tmp_move = get_temp_index(emit); 7109 struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move); 7110 struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move); 7111 7112 /* MOV dst.x, 1.0 */ 7113 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 7114 struct tgsi_full_dst_register dst_x = 7115 writemask_dst(&move_dst, TGSI_WRITEMASK_X); 7116 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 7117 7118 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one); 7119 } 7120 7121 /* MUL dst.y, s0.y, s1.y */ 7122 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 7123 struct tgsi_full_dst_register dst_y = 7124 writemask_dst(&move_dst, TGSI_WRITEMASK_Y); 7125 7126 emit_instruction_opn(emit, VGPU10_OPCODE_MUL, &dst_y, &s0_yyyy, 7127 &s1_yyyy, NULL, inst->Instruction.Saturate, 7128 inst->Instruction.Precise); 7129 } 7130 7131 /* MOV dst.z, s0.z */ 7132 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 7133 struct tgsi_full_dst_register dst_z = 7134 writemask_dst(&move_dst, TGSI_WRITEMASK_Z); 7135 7136 emit_instruction_opn(emit, VGPU10_OPCODE_MOV, 7137 &dst_z, &s0_zzzz, NULL, NULL, 7138 inst->Instruction.Saturate, 7139 inst->Instruction.Precise); 7140 } 7141 7142 /* MOV dst.w, s1.w */ 7143 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 7144 struct tgsi_full_dst_register dst_w = 7145 writemask_dst(&move_dst, TGSI_WRITEMASK_W); 7146 7147 emit_instruction_opn(emit, VGPU10_OPCODE_MOV, 7148 &dst_w, &s1_wwww, NULL, NULL, 7149 inst->Instruction.Saturate, 7150 inst->Instruction.Precise); 7151 } 7152 7153 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src); 7154 free_temp_indexes(emit); 7155 7156 return TRUE; 7157} 7158 7159 7160/** 7161 * A helper function to return the stream index as specified in 7162 * the immediate register 7163 */ 7164static inline unsigned 7165find_stream_index(struct svga_shader_emitter_v10 *emit, 7166 const struct tgsi_full_src_register *src) 7167{ 7168 return emit->immediates[src->Register.Index][src->Register.SwizzleX].Int; 7169} 7170 7171 7172/** 7173 * Emit code for TGSI_OPCODE_ENDPRIM (GS only) 7174 */ 7175static boolean 7176emit_endprim(struct svga_shader_emitter_v10 *emit, 7177 const struct tgsi_full_instruction *inst) 7178{ 7179 assert(emit->unit == PIPE_SHADER_GEOMETRY); 7180 7181 begin_emit_instruction(emit); 7182 if (emit->version >= 50) { 7183 unsigned streamIndex = find_stream_index(emit, &inst->Src[0]); 7184 7185 if (emit->info.num_stream_output_components[streamIndex] == 0) { 7186 /** 7187 * If there is no output for this stream, discard this instruction. 7188 */ 7189 emit->discard_instruction = TRUE; 7190 } 7191 else { 7192 emit_opcode(emit, VGPU10_OPCODE_CUT_STREAM, FALSE); 7193 assert(inst->Src[0].Register.File == TGSI_FILE_IMMEDIATE); 7194 emit_stream_register(emit, streamIndex); 7195 } 7196 } 7197 else { 7198 emit_opcode(emit, VGPU10_OPCODE_CUT, FALSE); 7199 } 7200 end_emit_instruction(emit); 7201 return TRUE; 7202} 7203 7204 7205/** 7206 * Emit code for TGSI_OPCODE_EX2 (2^x) instruction. 7207 */ 7208static boolean 7209emit_ex2(struct svga_shader_emitter_v10 *emit, 7210 const struct tgsi_full_instruction *inst) 7211{ 7212 /* Note that TGSI_OPCODE_EX2 computes only one value from src.x 7213 * while VGPU10 computes four values. 7214 * 7215 * dst = EX2(src): 7216 * dst.xyzw = 2.0 ^ src.x 7217 */ 7218 7219 struct tgsi_full_src_register src_xxxx = 7220 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, 7221 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); 7222 7223 /* EXP tmp, s0.xxxx */ 7224 emit_instruction_opn(emit, VGPU10_OPCODE_EXP, &inst->Dst[0], &src_xxxx, 7225 NULL, NULL, 7226 inst->Instruction.Saturate, 7227 inst->Instruction.Precise); 7228 7229 return TRUE; 7230} 7231 7232 7233/** 7234 * Emit code for TGSI_OPCODE_EXP instruction. 7235 */ 7236static boolean 7237emit_exp(struct svga_shader_emitter_v10 *emit, 7238 const struct tgsi_full_instruction *inst) 7239{ 7240 /* 7241 * dst.x = 2 ^ floor(s0.x) 7242 * dst.y = s0.x - floor(s0.x) 7243 * dst.z = 2 ^ s0.x 7244 * dst.w = 1.0 7245 */ 7246 7247 struct tgsi_full_src_register src_xxxx = 7248 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X); 7249 unsigned tmp = get_temp_index(emit); 7250 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 7251 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 7252 7253 /* 7254 * If dst and src are the same we need to create 7255 * a temporary for it and insert a extra move. 7256 */ 7257 unsigned tmp_move = get_temp_index(emit); 7258 struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move); 7259 struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move); 7260 7261 /* only use X component of temp reg */ 7262 tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 7263 tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X); 7264 7265 /* ROUND_NI tmp.x, s0.x */ 7266 emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst, 7267 &src_xxxx); /* round to -infinity */ 7268 7269 /* EXP dst.x, tmp.x */ 7270 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 7271 struct tgsi_full_dst_register dst_x = 7272 writemask_dst(&move_dst, TGSI_WRITEMASK_X); 7273 7274 emit_instruction_opn(emit, VGPU10_OPCODE_EXP, &dst_x, &tmp_src, 7275 NULL, NULL, 7276 inst->Instruction.Saturate, 7277 inst->Instruction.Precise); 7278 } 7279 7280 /* ADD dst.y, s0.x, -tmp */ 7281 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 7282 struct tgsi_full_dst_register dst_y = 7283 writemask_dst(&move_dst, TGSI_WRITEMASK_Y); 7284 struct tgsi_full_src_register neg_tmp_src = negate_src(&tmp_src); 7285 7286 emit_instruction_opn(emit, VGPU10_OPCODE_ADD, &dst_y, &src_xxxx, 7287 &neg_tmp_src, NULL, 7288 inst->Instruction.Saturate, 7289 inst->Instruction.Precise); 7290 } 7291 7292 /* EXP dst.z, s0.x */ 7293 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 7294 struct tgsi_full_dst_register dst_z = 7295 writemask_dst(&move_dst, TGSI_WRITEMASK_Z); 7296 7297 emit_instruction_opn(emit, VGPU10_OPCODE_EXP, &dst_z, &src_xxxx, 7298 NULL, NULL, 7299 inst->Instruction.Saturate, 7300 inst->Instruction.Precise); 7301 } 7302 7303 /* MOV dst.w, 1.0 */ 7304 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 7305 struct tgsi_full_dst_register dst_w = 7306 writemask_dst(&move_dst, TGSI_WRITEMASK_W); 7307 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 7308 7309 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one); 7310 } 7311 7312 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src); 7313 7314 free_temp_indexes(emit); 7315 7316 return TRUE; 7317} 7318 7319 7320/** 7321 * Emit code for TGSI_OPCODE_IF instruction. 7322 */ 7323static boolean 7324emit_if(struct svga_shader_emitter_v10 *emit, 7325 const struct tgsi_full_src_register *src) 7326{ 7327 VGPU10OpcodeToken0 opcode0; 7328 7329 /* The src register should be a scalar */ 7330 assert(src->Register.SwizzleX == src->Register.SwizzleY && 7331 src->Register.SwizzleX == src->Register.SwizzleZ && 7332 src->Register.SwizzleX == src->Register.SwizzleW); 7333 7334 /* The only special thing here is that we need to set the 7335 * VGPU10_INSTRUCTION_TEST_NONZERO flag since we want to test if 7336 * src.x is non-zero. 7337 */ 7338 opcode0.value = 0; 7339 opcode0.opcodeType = VGPU10_OPCODE_IF; 7340 opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO; 7341 7342 begin_emit_instruction(emit); 7343 emit_dword(emit, opcode0.value); 7344 emit_src_register(emit, src); 7345 end_emit_instruction(emit); 7346 7347 return TRUE; 7348} 7349 7350 7351/** 7352 * Emit code for conditional discard instruction (discard fragment if any of 7353 * the register components are negative). 7354 */ 7355static boolean 7356emit_cond_discard(struct svga_shader_emitter_v10 *emit, 7357 const struct tgsi_full_instruction *inst) 7358{ 7359 unsigned tmp = get_temp_index(emit); 7360 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 7361 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 7362 7363 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 7364 7365 struct tgsi_full_dst_register tmp_dst_x = 7366 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 7367 struct tgsi_full_src_register tmp_src_xxxx = 7368 scalar_src(&tmp_src, TGSI_SWIZZLE_X); 7369 7370 /* tmp = src[0] < 0.0 */ 7371 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0], &zero); 7372 7373 if (!same_swizzle_terms(&inst->Src[0])) { 7374 /* If the swizzle is not XXXX, YYYY, ZZZZ or WWWW we need to 7375 * logically OR the swizzle terms. Most uses of this conditional 7376 * discard instruction only test one channel so it's good to 7377 * avoid these extra steps. 7378 */ 7379 struct tgsi_full_src_register tmp_src_yyyy = 7380 scalar_src(&tmp_src, TGSI_SWIZZLE_Y); 7381 struct tgsi_full_src_register tmp_src_zzzz = 7382 scalar_src(&tmp_src, TGSI_SWIZZLE_Z); 7383 struct tgsi_full_src_register tmp_src_wwww = 7384 scalar_src(&tmp_src, TGSI_SWIZZLE_W); 7385 7386 emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx, 7387 &tmp_src_yyyy); 7388 emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx, 7389 &tmp_src_zzzz); 7390 emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx, 7391 &tmp_src_wwww); 7392 } 7393 7394 begin_emit_instruction(emit); 7395 emit_discard_opcode(emit, TRUE); /* discard if src0.x is non-zero */ 7396 emit_src_register(emit, &tmp_src_xxxx); 7397 end_emit_instruction(emit); 7398 7399 free_temp_indexes(emit); 7400 7401 return TRUE; 7402} 7403 7404 7405/** 7406 * Emit code for the unconditional discard instruction. 7407 */ 7408static boolean 7409emit_discard(struct svga_shader_emitter_v10 *emit, 7410 const struct tgsi_full_instruction *inst) 7411{ 7412 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 7413 7414 /* DISCARD if 0.0 is zero */ 7415 begin_emit_instruction(emit); 7416 emit_discard_opcode(emit, FALSE); 7417 emit_src_register(emit, &zero); 7418 end_emit_instruction(emit); 7419 7420 return TRUE; 7421} 7422 7423 7424/** 7425 * Emit code for TGSI_OPCODE_LG2 instruction. 7426 */ 7427static boolean 7428emit_lg2(struct svga_shader_emitter_v10 *emit, 7429 const struct tgsi_full_instruction *inst) 7430{ 7431 /* Note that TGSI_OPCODE_LG2 computes only one value from src.x 7432 * while VGPU10 computes four values. 7433 * 7434 * dst = LG2(src): 7435 * dst.xyzw = log2(src.x) 7436 */ 7437 7438 struct tgsi_full_src_register src_xxxx = 7439 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, 7440 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); 7441 7442 /* LOG tmp, s0.xxxx */ 7443 emit_instruction_opn(emit, VGPU10_OPCODE_LOG, 7444 &inst->Dst[0], &src_xxxx, NULL, NULL, 7445 inst->Instruction.Saturate, 7446 inst->Instruction.Precise); 7447 7448 return TRUE; 7449} 7450 7451 7452/** 7453 * Emit code for TGSI_OPCODE_LIT instruction. 7454 */ 7455static boolean 7456emit_lit(struct svga_shader_emitter_v10 *emit, 7457 const struct tgsi_full_instruction *inst) 7458{ 7459 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 7460 7461 /* 7462 * If dst and src are the same we need to create 7463 * a temporary for it and insert a extra move. 7464 */ 7465 unsigned tmp_move = get_temp_index(emit); 7466 struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move); 7467 struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move); 7468 7469 /* 7470 * dst.x = 1 7471 * dst.y = max(src.x, 0) 7472 * dst.z = (src.x > 0) ? max(src.y, 0)^{clamp(src.w, -128, 128))} : 0 7473 * dst.w = 1 7474 */ 7475 7476 /* MOV dst.x, 1.0 */ 7477 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 7478 struct tgsi_full_dst_register dst_x = 7479 writemask_dst(&move_dst, TGSI_WRITEMASK_X); 7480 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one); 7481 } 7482 7483 /* MOV dst.w, 1.0 */ 7484 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 7485 struct tgsi_full_dst_register dst_w = 7486 writemask_dst(&move_dst, TGSI_WRITEMASK_W); 7487 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one); 7488 } 7489 7490 /* MAX dst.y, src.x, 0.0 */ 7491 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 7492 struct tgsi_full_dst_register dst_y = 7493 writemask_dst(&move_dst, TGSI_WRITEMASK_Y); 7494 struct tgsi_full_src_register zero = 7495 make_immediate_reg_float(emit, 0.0f); 7496 struct tgsi_full_src_register src_xxxx = 7497 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, 7498 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); 7499 7500 emit_instruction_opn(emit, VGPU10_OPCODE_MAX, &dst_y, &src_xxxx, 7501 &zero, NULL, inst->Instruction.Saturate, FALSE); 7502 } 7503 7504 /* 7505 * tmp1 = clamp(src.w, -128, 128); 7506 * MAX tmp1, src.w, -128 7507 * MIN tmp1, tmp1, 128 7508 * 7509 * tmp2 = max(tmp2, 0); 7510 * MAX tmp2, src.y, 0 7511 * 7512 * tmp1 = pow(tmp2, tmp1); 7513 * LOG tmp2, tmp2 7514 * MUL tmp1, tmp2, tmp1 7515 * EXP tmp1, tmp1 7516 * 7517 * tmp1 = (src.w == 0) ? 1 : tmp1; 7518 * EQ tmp2, 0, src.w 7519 * MOVC tmp1, tmp2, 1.0, tmp1 7520 * 7521 * dst.z = (0 < src.x) ? tmp1 : 0; 7522 * LT tmp2, 0, src.x 7523 * MOVC dst.z, tmp2, tmp1, 0.0 7524 */ 7525 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 7526 struct tgsi_full_dst_register dst_z = 7527 writemask_dst(&move_dst, TGSI_WRITEMASK_Z); 7528 7529 unsigned tmp1 = get_temp_index(emit); 7530 struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1); 7531 struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1); 7532 unsigned tmp2 = get_temp_index(emit); 7533 struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2); 7534 struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2); 7535 7536 struct tgsi_full_src_register src_xxxx = 7537 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X); 7538 struct tgsi_full_src_register src_yyyy = 7539 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y); 7540 struct tgsi_full_src_register src_wwww = 7541 scalar_src(&inst->Src[0], TGSI_SWIZZLE_W); 7542 7543 struct tgsi_full_src_register zero = 7544 make_immediate_reg_float(emit, 0.0f); 7545 struct tgsi_full_src_register lowerbound = 7546 make_immediate_reg_float(emit, -128.0f); 7547 struct tgsi_full_src_register upperbound = 7548 make_immediate_reg_float(emit, 128.0f); 7549 7550 emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp1_dst, &src_wwww, 7551 &lowerbound); 7552 emit_instruction_op2(emit, VGPU10_OPCODE_MIN, &tmp1_dst, &tmp1_src, 7553 &upperbound); 7554 emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp2_dst, &src_yyyy, 7555 &zero); 7556 7557 /* POW tmp1, tmp2, tmp1 */ 7558 /* LOG tmp2, tmp2 */ 7559 emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp2_dst, &tmp2_src); 7560 7561 /* MUL tmp1, tmp2, tmp1 */ 7562 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &tmp2_src, 7563 &tmp1_src); 7564 7565 /* EXP tmp1, tmp1 */ 7566 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp1_dst, &tmp1_src); 7567 7568 /* EQ tmp2, 0, src.w */ 7569 emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp2_dst, &zero, &src_wwww); 7570 /* MOVC tmp1.z, tmp2, tmp1, 1.0 */ 7571 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp1_dst, 7572 &tmp2_src, &one, &tmp1_src); 7573 7574 /* LT tmp2, 0, src.x */ 7575 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp2_dst, &zero, &src_xxxx); 7576 /* MOVC dst.z, tmp2, tmp1, 0.0 */ 7577 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &dst_z, 7578 &tmp2_src, &tmp1_src, &zero); 7579 } 7580 7581 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src); 7582 free_temp_indexes(emit); 7583 7584 return TRUE; 7585} 7586 7587 7588/** 7589 * Emit Level Of Detail Query (LODQ) instruction. 7590 */ 7591static boolean 7592emit_lodq(struct svga_shader_emitter_v10 *emit, 7593 const struct tgsi_full_instruction *inst) 7594{ 7595 const uint unit = inst->Src[1].Register.Index; 7596 7597 assert(emit->version >= 41); 7598 7599 /* LOD dst, coord, resource, sampler */ 7600 begin_emit_instruction(emit); 7601 emit_opcode(emit, VGPU10_OPCODE_LOD, FALSE); 7602 emit_dst_register(emit, &inst->Dst[0]); 7603 emit_src_register(emit, &inst->Src[0]); /* coord */ 7604 emit_resource_register(emit, unit); 7605 emit_sampler_register(emit, unit); 7606 end_emit_instruction(emit); 7607 7608 return TRUE; 7609} 7610 7611 7612/** 7613 * Emit code for TGSI_OPCODE_LOG instruction. 7614 */ 7615static boolean 7616emit_log(struct svga_shader_emitter_v10 *emit, 7617 const struct tgsi_full_instruction *inst) 7618{ 7619 /* 7620 * dst.x = floor(lg2(abs(s0.x))) 7621 * dst.y = abs(s0.x) / (2 ^ floor(lg2(abs(s0.x)))) 7622 * dst.z = lg2(abs(s0.x)) 7623 * dst.w = 1.0 7624 */ 7625 7626 struct tgsi_full_src_register src_xxxx = 7627 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X); 7628 unsigned tmp = get_temp_index(emit); 7629 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 7630 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 7631 struct tgsi_full_src_register abs_src_xxxx = absolute_src(&src_xxxx); 7632 7633 /* only use X component of temp reg */ 7634 tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 7635 tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X); 7636 7637 /* LOG tmp.x, abs(s0.x) */ 7638 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) { 7639 emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp_dst, &abs_src_xxxx); 7640 } 7641 7642 /* MOV dst.z, tmp.x */ 7643 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 7644 struct tgsi_full_dst_register dst_z = 7645 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Z); 7646 7647 emit_instruction_opn(emit, VGPU10_OPCODE_MOV, 7648 &dst_z, &tmp_src, NULL, NULL, 7649 inst->Instruction.Saturate, FALSE); 7650 } 7651 7652 /* FLR tmp.x, tmp.x */ 7653 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) { 7654 emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst, &tmp_src); 7655 } 7656 7657 /* MOV dst.x, tmp.x */ 7658 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 7659 struct tgsi_full_dst_register dst_x = 7660 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_X); 7661 7662 emit_instruction_opn(emit, VGPU10_OPCODE_MOV, 7663 &dst_x, &tmp_src, NULL, NULL, 7664 inst->Instruction.Saturate, FALSE); 7665 } 7666 7667 /* EXP tmp.x, tmp.x */ 7668 /* DIV dst.y, abs(s0.x), tmp.x */ 7669 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 7670 struct tgsi_full_dst_register dst_y = 7671 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Y); 7672 7673 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp_dst, &tmp_src); 7674 emit_instruction_opn(emit, VGPU10_OPCODE_DIV, &dst_y, &abs_src_xxxx, 7675 &tmp_src, NULL, inst->Instruction.Saturate, FALSE); 7676 } 7677 7678 /* MOV dst.w, 1.0 */ 7679 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 7680 struct tgsi_full_dst_register dst_w = 7681 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_W); 7682 struct tgsi_full_src_register one = 7683 make_immediate_reg_float(emit, 1.0f); 7684 7685 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one); 7686 } 7687 7688 free_temp_indexes(emit); 7689 7690 return TRUE; 7691} 7692 7693 7694/** 7695 * Emit code for TGSI_OPCODE_LRP instruction. 7696 */ 7697static boolean 7698emit_lrp(struct svga_shader_emitter_v10 *emit, 7699 const struct tgsi_full_instruction *inst) 7700{ 7701 /* dst = LRP(s0, s1, s2): 7702 * dst = s0 * (s1 - s2) + s2 7703 * Translates into: 7704 * SUB tmp, s1, s2; tmp = s1 - s2 7705 * MAD dst, s0, tmp, s2; dst = s0 * t1 + s2 7706 */ 7707 unsigned tmp = get_temp_index(emit); 7708 struct tgsi_full_src_register src_tmp = make_src_temp_reg(tmp); 7709 struct tgsi_full_dst_register dst_tmp = make_dst_temp_reg(tmp); 7710 struct tgsi_full_src_register neg_src2 = negate_src(&inst->Src[2]); 7711 7712 /* ADD tmp, s1, -s2 */ 7713 emit_instruction_opn(emit, VGPU10_OPCODE_ADD, &dst_tmp, 7714 &inst->Src[1], &neg_src2, NULL, FALSE, 7715 inst->Instruction.Precise); 7716 7717 /* MAD dst, s1, tmp, s3 */ 7718 emit_instruction_opn(emit, VGPU10_OPCODE_MAD, &inst->Dst[0], 7719 &inst->Src[0], &src_tmp, &inst->Src[2], 7720 inst->Instruction.Saturate, 7721 inst->Instruction.Precise); 7722 7723 free_temp_indexes(emit); 7724 7725 return TRUE; 7726} 7727 7728 7729/** 7730 * Emit code for TGSI_OPCODE_POW instruction. 7731 */ 7732static boolean 7733emit_pow(struct svga_shader_emitter_v10 *emit, 7734 const struct tgsi_full_instruction *inst) 7735{ 7736 /* Note that TGSI_OPCODE_POW computes only one value from src0.x and 7737 * src1.x while VGPU10 computes four values. 7738 * 7739 * dst = POW(src0, src1): 7740 * dst.xyzw = src0.x ^ src1.x 7741 */ 7742 unsigned tmp = get_temp_index(emit); 7743 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 7744 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 7745 struct tgsi_full_src_register src0_xxxx = 7746 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, 7747 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); 7748 struct tgsi_full_src_register src1_xxxx = 7749 swizzle_src(&inst->Src[1], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, 7750 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); 7751 7752 /* LOG tmp, s0.xxxx */ 7753 emit_instruction_opn(emit, VGPU10_OPCODE_LOG, 7754 &tmp_dst, &src0_xxxx, NULL, NULL, 7755 FALSE, inst->Instruction.Precise); 7756 7757 /* MUL tmp, tmp, s1.xxxx */ 7758 emit_instruction_opn(emit, VGPU10_OPCODE_MUL, 7759 &tmp_dst, &tmp_src, &src1_xxxx, NULL, 7760 FALSE, inst->Instruction.Precise); 7761 7762 /* EXP tmp, s0.xxxx */ 7763 emit_instruction_opn(emit, VGPU10_OPCODE_EXP, 7764 &inst->Dst[0], &tmp_src, NULL, NULL, 7765 inst->Instruction.Saturate, 7766 inst->Instruction.Precise); 7767 7768 /* free tmp */ 7769 free_temp_indexes(emit); 7770 7771 return TRUE; 7772} 7773 7774 7775/** 7776 * Emit code for TGSI_OPCODE_RCP (reciprocal) instruction. 7777 */ 7778static boolean 7779emit_rcp(struct svga_shader_emitter_v10 *emit, 7780 const struct tgsi_full_instruction *inst) 7781{ 7782 if (emit->version >= 50) { 7783 /* use new RCP instruction. But VGPU10_OPCODE_RCP is component-wise 7784 * while TGSI_OPCODE_RCP computes dst.xyzw = 1.0 / src.xxxx so we need 7785 * to manipulate the src register's swizzle. 7786 */ 7787 struct tgsi_full_src_register src = inst->Src[0]; 7788 src.Register.SwizzleY = 7789 src.Register.SwizzleZ = 7790 src.Register.SwizzleW = src.Register.SwizzleX; 7791 7792 begin_emit_instruction(emit); 7793 emit_opcode_precise(emit, VGPU10_OPCODE_RCP, 7794 inst->Instruction.Saturate, 7795 inst->Instruction.Precise); 7796 emit_dst_register(emit, &inst->Dst[0]); 7797 emit_src_register(emit, &src); 7798 end_emit_instruction(emit); 7799 } 7800 else { 7801 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 7802 7803 unsigned tmp = get_temp_index(emit); 7804 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 7805 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 7806 7807 struct tgsi_full_dst_register tmp_dst_x = 7808 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 7809 struct tgsi_full_src_register tmp_src_xxxx = 7810 scalar_src(&tmp_src, TGSI_SWIZZLE_X); 7811 7812 /* DIV tmp.x, 1.0, s0 */ 7813 emit_instruction_opn(emit, VGPU10_OPCODE_DIV, 7814 &tmp_dst_x, &one, &inst->Src[0], NULL, 7815 FALSE, inst->Instruction.Precise); 7816 7817 /* MOV dst, tmp.xxxx */ 7818 emit_instruction_opn(emit, VGPU10_OPCODE_MOV, 7819 &inst->Dst[0], &tmp_src_xxxx, NULL, NULL, 7820 inst->Instruction.Saturate, 7821 inst->Instruction.Precise); 7822 7823 free_temp_indexes(emit); 7824 } 7825 7826 return TRUE; 7827} 7828 7829 7830/** 7831 * Emit code for TGSI_OPCODE_RSQ instruction. 7832 */ 7833static boolean 7834emit_rsq(struct svga_shader_emitter_v10 *emit, 7835 const struct tgsi_full_instruction *inst) 7836{ 7837 /* dst = RSQ(src): 7838 * dst.xyzw = 1 / sqrt(src.x) 7839 * Translates into: 7840 * RSQ tmp, src.x 7841 * MOV dst, tmp.xxxx 7842 */ 7843 7844 unsigned tmp = get_temp_index(emit); 7845 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 7846 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 7847 7848 struct tgsi_full_dst_register tmp_dst_x = 7849 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 7850 struct tgsi_full_src_register tmp_src_xxxx = 7851 scalar_src(&tmp_src, TGSI_SWIZZLE_X); 7852 7853 /* RSQ tmp, src.x */ 7854 emit_instruction_opn(emit, VGPU10_OPCODE_RSQ, 7855 &tmp_dst_x, &inst->Src[0], NULL, NULL, 7856 FALSE, inst->Instruction.Precise); 7857 7858 /* MOV dst, tmp.xxxx */ 7859 emit_instruction_opn(emit, VGPU10_OPCODE_MOV, 7860 &inst->Dst[0], &tmp_src_xxxx, NULL, NULL, 7861 inst->Instruction.Saturate, 7862 inst->Instruction.Precise); 7863 7864 /* free tmp */ 7865 free_temp_indexes(emit); 7866 7867 return TRUE; 7868} 7869 7870 7871/** 7872 * Emit code for TGSI_OPCODE_SEQ (Set Equal) instruction. 7873 */ 7874static boolean 7875emit_seq(struct svga_shader_emitter_v10 *emit, 7876 const struct tgsi_full_instruction *inst) 7877{ 7878 /* dst = SEQ(s0, s1): 7879 * dst = s0 == s1 ? 1.0 : 0.0 (per component) 7880 * Translates into: 7881 * EQ tmp, s0, s1; tmp = s0 == s1 : 0xffffffff : 0 (per comp) 7882 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) 7883 */ 7884 unsigned tmp = get_temp_index(emit); 7885 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 7886 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 7887 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 7888 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 7889 7890 /* EQ tmp, s0, s1 */ 7891 emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp_dst, &inst->Src[0], 7892 &inst->Src[1]); 7893 7894 /* MOVC dst, tmp, one, zero */ 7895 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, 7896 &one, &zero); 7897 7898 free_temp_indexes(emit); 7899 7900 return TRUE; 7901} 7902 7903 7904/** 7905 * Emit code for TGSI_OPCODE_SGE (Set Greater than or Equal) instruction. 7906 */ 7907static boolean 7908emit_sge(struct svga_shader_emitter_v10 *emit, 7909 const struct tgsi_full_instruction *inst) 7910{ 7911 /* dst = SGE(s0, s1): 7912 * dst = s0 >= s1 ? 1.0 : 0.0 (per component) 7913 * Translates into: 7914 * GE tmp, s0, s1; tmp = s0 >= s1 : 0xffffffff : 0 (per comp) 7915 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) 7916 */ 7917 unsigned tmp = get_temp_index(emit); 7918 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 7919 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 7920 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 7921 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 7922 7923 /* GE tmp, s0, s1 */ 7924 emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[0], 7925 &inst->Src[1]); 7926 7927 /* MOVC dst, tmp, one, zero */ 7928 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, 7929 &one, &zero); 7930 7931 free_temp_indexes(emit); 7932 7933 return TRUE; 7934} 7935 7936 7937/** 7938 * Emit code for TGSI_OPCODE_SGT (Set Greater than) instruction. 7939 */ 7940static boolean 7941emit_sgt(struct svga_shader_emitter_v10 *emit, 7942 const struct tgsi_full_instruction *inst) 7943{ 7944 /* dst = SGT(s0, s1): 7945 * dst = s0 > s1 ? 1.0 : 0.0 (per component) 7946 * Translates into: 7947 * LT tmp, s1, s0; tmp = s1 < s0 ? 0xffffffff : 0 (per comp) 7948 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) 7949 */ 7950 unsigned tmp = get_temp_index(emit); 7951 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 7952 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 7953 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 7954 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 7955 7956 /* LT tmp, s1, s0 */ 7957 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[1], 7958 &inst->Src[0]); 7959 7960 /* MOVC dst, tmp, one, zero */ 7961 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, 7962 &one, &zero); 7963 7964 free_temp_indexes(emit); 7965 7966 return TRUE; 7967} 7968 7969 7970/** 7971 * Emit code for TGSI_OPCODE_SIN and TGSI_OPCODE_COS instructions. 7972 */ 7973static boolean 7974emit_sincos(struct svga_shader_emitter_v10 *emit, 7975 const struct tgsi_full_instruction *inst) 7976{ 7977 unsigned tmp = get_temp_index(emit); 7978 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 7979 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 7980 7981 struct tgsi_full_src_register tmp_src_xxxx = 7982 scalar_src(&tmp_src, TGSI_SWIZZLE_X); 7983 struct tgsi_full_dst_register tmp_dst_x = 7984 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 7985 7986 begin_emit_instruction(emit); 7987 emit_opcode(emit, VGPU10_OPCODE_SINCOS, FALSE); 7988 7989 if(inst->Instruction.Opcode == TGSI_OPCODE_SIN) 7990 { 7991 emit_dst_register(emit, &tmp_dst_x); /* first destination register */ 7992 emit_null_dst_register(emit); /* second destination register */ 7993 } 7994 else { 7995 emit_null_dst_register(emit); 7996 emit_dst_register(emit, &tmp_dst_x); 7997 } 7998 7999 emit_src_register(emit, &inst->Src[0]); 8000 end_emit_instruction(emit); 8001 8002 emit_instruction_opn(emit, VGPU10_OPCODE_MOV, 8003 &inst->Dst[0], &tmp_src_xxxx, NULL, NULL, 8004 inst->Instruction.Saturate, 8005 inst->Instruction.Precise); 8006 8007 free_temp_indexes(emit); 8008 8009 return TRUE; 8010} 8011 8012 8013/** 8014 * Emit code for TGSI_OPCODE_SLE (Set Less than or Equal) instruction. 8015 */ 8016static boolean 8017emit_sle(struct svga_shader_emitter_v10 *emit, 8018 const struct tgsi_full_instruction *inst) 8019{ 8020 /* dst = SLE(s0, s1): 8021 * dst = s0 <= s1 ? 1.0 : 0.0 (per component) 8022 * Translates into: 8023 * GE tmp, s1, s0; tmp = s1 >= s0 : 0xffffffff : 0 (per comp) 8024 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) 8025 */ 8026 unsigned tmp = get_temp_index(emit); 8027 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 8028 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 8029 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 8030 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 8031 8032 /* GE tmp, s1, s0 */ 8033 emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[1], 8034 &inst->Src[0]); 8035 8036 /* MOVC dst, tmp, one, zero */ 8037 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, 8038 &one, &zero); 8039 8040 free_temp_indexes(emit); 8041 8042 return TRUE; 8043} 8044 8045 8046/** 8047 * Emit code for TGSI_OPCODE_SLT (Set Less than) instruction. 8048 */ 8049static boolean 8050emit_slt(struct svga_shader_emitter_v10 *emit, 8051 const struct tgsi_full_instruction *inst) 8052{ 8053 /* dst = SLT(s0, s1): 8054 * dst = s0 < s1 ? 1.0 : 0.0 (per component) 8055 * Translates into: 8056 * LT tmp, s0, s1; tmp = s0 < s1 ? 0xffffffff : 0 (per comp) 8057 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) 8058 */ 8059 unsigned tmp = get_temp_index(emit); 8060 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 8061 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 8062 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 8063 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 8064 8065 /* LT tmp, s0, s1 */ 8066 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0], 8067 &inst->Src[1]); 8068 8069 /* MOVC dst, tmp, one, zero */ 8070 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, 8071 &one, &zero); 8072 8073 free_temp_indexes(emit); 8074 8075 return TRUE; 8076} 8077 8078 8079/** 8080 * Emit code for TGSI_OPCODE_SNE (Set Not Equal) instruction. 8081 */ 8082static boolean 8083emit_sne(struct svga_shader_emitter_v10 *emit, 8084 const struct tgsi_full_instruction *inst) 8085{ 8086 /* dst = SNE(s0, s1): 8087 * dst = s0 != s1 ? 1.0 : 0.0 (per component) 8088 * Translates into: 8089 * EQ tmp, s0, s1; tmp = s0 == s1 : 0xffffffff : 0 (per comp) 8090 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) 8091 */ 8092 unsigned tmp = get_temp_index(emit); 8093 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 8094 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 8095 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 8096 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 8097 8098 /* NE tmp, s0, s1 */ 8099 emit_instruction_op2(emit, VGPU10_OPCODE_NE, &tmp_dst, &inst->Src[0], 8100 &inst->Src[1]); 8101 8102 /* MOVC dst, tmp, one, zero */ 8103 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, 8104 &one, &zero); 8105 8106 free_temp_indexes(emit); 8107 8108 return TRUE; 8109} 8110 8111 8112/** 8113 * Emit code for TGSI_OPCODE_SSG (Set Sign) instruction. 8114 */ 8115static boolean 8116emit_ssg(struct svga_shader_emitter_v10 *emit, 8117 const struct tgsi_full_instruction *inst) 8118{ 8119 /* dst.x = (src.x > 0.0) ? 1.0 : (src.x < 0.0) ? -1.0 : 0.0 8120 * dst.y = (src.y > 0.0) ? 1.0 : (src.y < 0.0) ? -1.0 : 0.0 8121 * dst.z = (src.z > 0.0) ? 1.0 : (src.z < 0.0) ? -1.0 : 0.0 8122 * dst.w = (src.w > 0.0) ? 1.0 : (src.w < 0.0) ? -1.0 : 0.0 8123 * Translates into: 8124 * LT tmp1, src, zero; tmp1 = src < zero ? 0xffffffff : 0 (per comp) 8125 * MOVC tmp2, tmp1, -1.0, 0.0; tmp2 = tmp1 ? -1.0 : 0.0 (per component) 8126 * LT tmp1, zero, src; tmp1 = zero < src ? 0xffffffff : 0 (per comp) 8127 * MOVC dst, tmp1, 1.0, tmp2; dst = tmp1 ? 1.0 : tmp2 (per component) 8128 */ 8129 struct tgsi_full_src_register zero = 8130 make_immediate_reg_float(emit, 0.0f); 8131 struct tgsi_full_src_register one = 8132 make_immediate_reg_float(emit, 1.0f); 8133 struct tgsi_full_src_register neg_one = 8134 make_immediate_reg_float(emit, -1.0f); 8135 8136 unsigned tmp1 = get_temp_index(emit); 8137 struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1); 8138 struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1); 8139 8140 unsigned tmp2 = get_temp_index(emit); 8141 struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2); 8142 struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2); 8143 8144 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &inst->Src[0], 8145 &zero); 8146 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp2_dst, &tmp1_src, 8147 &neg_one, &zero); 8148 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &zero, 8149 &inst->Src[0]); 8150 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp1_src, 8151 &one, &tmp2_src); 8152 8153 free_temp_indexes(emit); 8154 8155 return TRUE; 8156} 8157 8158 8159/** 8160 * Emit code for TGSI_OPCODE_ISSG (Integer Set Sign) instruction. 8161 */ 8162static boolean 8163emit_issg(struct svga_shader_emitter_v10 *emit, 8164 const struct tgsi_full_instruction *inst) 8165{ 8166 /* dst.x = (src.x > 0) ? 1 : (src.x < 0) ? -1 : 0 8167 * dst.y = (src.y > 0) ? 1 : (src.y < 0) ? -1 : 0 8168 * dst.z = (src.z > 0) ? 1 : (src.z < 0) ? -1 : 0 8169 * dst.w = (src.w > 0) ? 1 : (src.w < 0) ? -1 : 0 8170 * Translates into: 8171 * ILT tmp1, src, 0 tmp1 = src < 0 ? -1 : 0 (per component) 8172 * ILT tmp2, 0, src tmp2 = 0 < src ? -1 : 0 (per component) 8173 * IADD dst, tmp1, neg(tmp2) dst = tmp1 - tmp2 (per component) 8174 */ 8175 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 8176 8177 unsigned tmp1 = get_temp_index(emit); 8178 struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1); 8179 struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1); 8180 8181 unsigned tmp2 = get_temp_index(emit); 8182 struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2); 8183 struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2); 8184 8185 struct tgsi_full_src_register neg_tmp2 = negate_src(&tmp2_src); 8186 8187 emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp1_dst, 8188 &inst->Src[0], &zero); 8189 emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp2_dst, 8190 &zero, &inst->Src[0]); 8191 emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &inst->Dst[0], 8192 &tmp1_src, &neg_tmp2); 8193 8194 free_temp_indexes(emit); 8195 8196 return TRUE; 8197} 8198 8199 8200/** 8201 * Emit a comparison instruction. The dest register will get 8202 * 0 or ~0 values depending on the outcome of comparing src0 to src1. 8203 */ 8204static void 8205emit_comparison(struct svga_shader_emitter_v10 *emit, 8206 SVGA3dCmpFunc func, 8207 const struct tgsi_full_dst_register *dst, 8208 const struct tgsi_full_src_register *src0, 8209 const struct tgsi_full_src_register *src1) 8210{ 8211 struct tgsi_full_src_register immediate; 8212 VGPU10OpcodeToken0 opcode0; 8213 boolean swapSrc = FALSE; 8214 8215 /* Sanity checks for svga vs. gallium enums */ 8216 STATIC_ASSERT(SVGA3D_CMP_LESS == (PIPE_FUNC_LESS + 1)); 8217 STATIC_ASSERT(SVGA3D_CMP_GREATEREQUAL == (PIPE_FUNC_GEQUAL + 1)); 8218 8219 opcode0.value = 0; 8220 8221 switch (func) { 8222 case SVGA3D_CMP_NEVER: 8223 immediate = make_immediate_reg_int(emit, 0); 8224 /* MOV dst, {0} */ 8225 begin_emit_instruction(emit); 8226 emit_dword(emit, VGPU10_OPCODE_MOV); 8227 emit_dst_register(emit, dst); 8228 emit_src_register(emit, &immediate); 8229 end_emit_instruction(emit); 8230 return; 8231 case SVGA3D_CMP_ALWAYS: 8232 immediate = make_immediate_reg_int(emit, -1); 8233 /* MOV dst, {-1} */ 8234 begin_emit_instruction(emit); 8235 emit_dword(emit, VGPU10_OPCODE_MOV); 8236 emit_dst_register(emit, dst); 8237 emit_src_register(emit, &immediate); 8238 end_emit_instruction(emit); 8239 return; 8240 case SVGA3D_CMP_LESS: 8241 opcode0.opcodeType = VGPU10_OPCODE_LT; 8242 break; 8243 case SVGA3D_CMP_EQUAL: 8244 opcode0.opcodeType = VGPU10_OPCODE_EQ; 8245 break; 8246 case SVGA3D_CMP_LESSEQUAL: 8247 opcode0.opcodeType = VGPU10_OPCODE_GE; 8248 swapSrc = TRUE; 8249 break; 8250 case SVGA3D_CMP_GREATER: 8251 opcode0.opcodeType = VGPU10_OPCODE_LT; 8252 swapSrc = TRUE; 8253 break; 8254 case SVGA3D_CMP_NOTEQUAL: 8255 opcode0.opcodeType = VGPU10_OPCODE_NE; 8256 break; 8257 case SVGA3D_CMP_GREATEREQUAL: 8258 opcode0.opcodeType = VGPU10_OPCODE_GE; 8259 break; 8260 default: 8261 assert(!"Unexpected comparison mode"); 8262 opcode0.opcodeType = VGPU10_OPCODE_EQ; 8263 } 8264 8265 begin_emit_instruction(emit); 8266 emit_dword(emit, opcode0.value); 8267 emit_dst_register(emit, dst); 8268 if (swapSrc) { 8269 emit_src_register(emit, src1); 8270 emit_src_register(emit, src0); 8271 } 8272 else { 8273 emit_src_register(emit, src0); 8274 emit_src_register(emit, src1); 8275 } 8276 end_emit_instruction(emit); 8277} 8278 8279 8280/** 8281 * Get texel/address offsets for a texture instruction. 8282 */ 8283static void 8284get_texel_offsets(const struct svga_shader_emitter_v10 *emit, 8285 const struct tgsi_full_instruction *inst, int offsets[3]) 8286{ 8287 if (inst->Texture.NumOffsets == 1) { 8288 /* According to OpenGL Shader Language spec the offsets are only 8289 * fetched from a previously-declared immediate/literal. 8290 */ 8291 const struct tgsi_texture_offset *off = inst->TexOffsets; 8292 const unsigned index = off[0].Index; 8293 const unsigned swizzleX = off[0].SwizzleX; 8294 const unsigned swizzleY = off[0].SwizzleY; 8295 const unsigned swizzleZ = off[0].SwizzleZ; 8296 const union tgsi_immediate_data *imm = emit->immediates[index]; 8297 8298 assert(inst->TexOffsets[0].File == TGSI_FILE_IMMEDIATE); 8299 8300 offsets[0] = imm[swizzleX].Int; 8301 offsets[1] = imm[swizzleY].Int; 8302 offsets[2] = imm[swizzleZ].Int; 8303 } 8304 else { 8305 offsets[0] = offsets[1] = offsets[2] = 0; 8306 } 8307} 8308 8309 8310/** 8311 * Set up the coordinate register for texture sampling. 8312 * When we're sampling from a RECT texture we have to scale the 8313 * unnormalized coordinate to a normalized coordinate. 8314 * We do that by multiplying the coordinate by an "extra" constant. 8315 * An alternative would be to use the RESINFO instruction to query the 8316 * texture's size. 8317 */ 8318static struct tgsi_full_src_register 8319setup_texcoord(struct svga_shader_emitter_v10 *emit, 8320 unsigned unit, 8321 const struct tgsi_full_src_register *coord) 8322{ 8323 if (emit->key.tex[unit].sampler_view && emit->key.tex[unit].unnormalized) { 8324 unsigned scale_index = emit->texcoord_scale_index[unit]; 8325 unsigned tmp = get_temp_index(emit); 8326 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 8327 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 8328 struct tgsi_full_src_register scale_src = make_src_const_reg(scale_index); 8329 8330 if (emit->key.tex[unit].texel_bias) { 8331 /* to fix texture coordinate rounding issue, 0.0001 offset is 8332 * been added. This fixes piglit test fbo-blit-scaled-linear. */ 8333 struct tgsi_full_src_register offset = 8334 make_immediate_reg_float(emit, 0.0001f); 8335 8336 /* ADD tmp, coord, offset */ 8337 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp_dst, 8338 coord, &offset); 8339 /* MUL tmp, tmp, scale */ 8340 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst, 8341 &tmp_src, &scale_src); 8342 } 8343 else { 8344 /* MUL tmp, coord, const[] */ 8345 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst, 8346 coord, &scale_src); 8347 } 8348 return tmp_src; 8349 } 8350 else { 8351 /* use texcoord as-is */ 8352 return *coord; 8353 } 8354} 8355 8356 8357/** 8358 * For SAMPLE_C instructions, emit the extra src register which indicates 8359 * the reference/comparision value. 8360 */ 8361static void 8362emit_tex_compare_refcoord(struct svga_shader_emitter_v10 *emit, 8363 enum tgsi_texture_type target, 8364 const struct tgsi_full_src_register *coord) 8365{ 8366 struct tgsi_full_src_register coord_src_ref; 8367 int component; 8368 8369 assert(tgsi_is_shadow_target(target)); 8370 8371 component = tgsi_util_get_shadow_ref_src_index(target) % 4; 8372 assert(component >= 0); 8373 8374 coord_src_ref = scalar_src(coord, component); 8375 8376 emit_src_register(emit, &coord_src_ref); 8377} 8378 8379 8380/** 8381 * Info for implementing texture swizzles. 8382 * The begin_tex_swizzle(), get_tex_swizzle_dst() and end_tex_swizzle() 8383 * functions use this to encapsulate the extra steps needed to perform 8384 * a texture swizzle, or shadow/depth comparisons. 8385 * The shadow/depth comparison is only done here if for the cases where 8386 * there's no VGPU10 opcode (like texture bias lookup w/ shadow compare). 8387 */ 8388struct tex_swizzle_info 8389{ 8390 boolean swizzled; 8391 boolean shadow_compare; 8392 unsigned unit; 8393 enum tgsi_texture_type texture_target; /**< TGSI_TEXTURE_x */ 8394 struct tgsi_full_src_register tmp_src; 8395 struct tgsi_full_dst_register tmp_dst; 8396 const struct tgsi_full_dst_register *inst_dst; 8397 const struct tgsi_full_src_register *coord_src; 8398}; 8399 8400 8401/** 8402 * Do setup for handling texture swizzles or shadow compares. 8403 * \param unit the texture unit 8404 * \param inst the TGSI texture instruction 8405 * \param shadow_compare do shadow/depth comparison? 8406 * \param swz returns the swizzle info 8407 */ 8408static void 8409begin_tex_swizzle(struct svga_shader_emitter_v10 *emit, 8410 unsigned unit, 8411 const struct tgsi_full_instruction *inst, 8412 boolean shadow_compare, 8413 struct tex_swizzle_info *swz) 8414{ 8415 swz->swizzled = (emit->key.tex[unit].swizzle_r != TGSI_SWIZZLE_X || 8416 emit->key.tex[unit].swizzle_g != TGSI_SWIZZLE_Y || 8417 emit->key.tex[unit].swizzle_b != TGSI_SWIZZLE_Z || 8418 emit->key.tex[unit].swizzle_a != TGSI_SWIZZLE_W); 8419 8420 swz->shadow_compare = shadow_compare; 8421 swz->texture_target = inst->Texture.Texture; 8422 8423 if (swz->swizzled || shadow_compare) { 8424 /* Allocate temp register for the result of the SAMPLE instruction 8425 * and the source of the MOV/compare/swizzle instructions. 8426 */ 8427 unsigned tmp = get_temp_index(emit); 8428 swz->tmp_src = make_src_temp_reg(tmp); 8429 swz->tmp_dst = make_dst_temp_reg(tmp); 8430 8431 swz->unit = unit; 8432 } 8433 swz->inst_dst = &inst->Dst[0]; 8434 swz->coord_src = &inst->Src[0]; 8435 8436 emit->shadow_compare_units |= shadow_compare << unit; 8437} 8438 8439 8440/** 8441 * Returns the register to put the SAMPLE instruction results into. 8442 * This will either be the original instruction dst reg (if no swizzle 8443 * and no shadow comparison) or a temporary reg if there is a swizzle. 8444 */ 8445static const struct tgsi_full_dst_register * 8446get_tex_swizzle_dst(const struct tex_swizzle_info *swz) 8447{ 8448 return (swz->swizzled || swz->shadow_compare) 8449 ? &swz->tmp_dst : swz->inst_dst; 8450} 8451 8452 8453/** 8454 * This emits the MOV instruction that actually implements a texture swizzle 8455 * and/or shadow comparison. 8456 */ 8457static void 8458end_tex_swizzle(struct svga_shader_emitter_v10 *emit, 8459 const struct tex_swizzle_info *swz) 8460{ 8461 if (swz->shadow_compare) { 8462 /* Emit extra instructions to compare the fetched texel value against 8463 * a texture coordinate component. The result of the comparison 8464 * is 0.0 or 1.0. 8465 */ 8466 struct tgsi_full_src_register coord_src; 8467 struct tgsi_full_src_register texel_src = 8468 scalar_src(&swz->tmp_src, TGSI_SWIZZLE_X); 8469 struct tgsi_full_src_register one = 8470 make_immediate_reg_float(emit, 1.0f); 8471 /* convert gallium comparison func to SVGA comparison func */ 8472 SVGA3dCmpFunc compare_func = emit->key.tex[swz->unit].compare_func + 1; 8473 8474 int component = 8475 tgsi_util_get_shadow_ref_src_index(swz->texture_target) % 4; 8476 assert(component >= 0); 8477 coord_src = scalar_src(swz->coord_src, component); 8478 8479 /* COMPARE tmp, coord, texel */ 8480 emit_comparison(emit, compare_func, 8481 &swz->tmp_dst, &coord_src, &texel_src); 8482 8483 /* AND dest, tmp, {1.0} */ 8484 begin_emit_instruction(emit); 8485 emit_opcode(emit, VGPU10_OPCODE_AND, FALSE); 8486 if (swz->swizzled) { 8487 emit_dst_register(emit, &swz->tmp_dst); 8488 } 8489 else { 8490 emit_dst_register(emit, swz->inst_dst); 8491 } 8492 emit_src_register(emit, &swz->tmp_src); 8493 emit_src_register(emit, &one); 8494 end_emit_instruction(emit); 8495 } 8496 8497 if (swz->swizzled) { 8498 unsigned swz_r = emit->key.tex[swz->unit].swizzle_r; 8499 unsigned swz_g = emit->key.tex[swz->unit].swizzle_g; 8500 unsigned swz_b = emit->key.tex[swz->unit].swizzle_b; 8501 unsigned swz_a = emit->key.tex[swz->unit].swizzle_a; 8502 unsigned writemask_0 = 0, writemask_1 = 0; 8503 boolean int_tex = is_integer_type(emit->sampler_return_type[swz->unit]); 8504 8505 /* Swizzle w/out zero/one terms */ 8506 struct tgsi_full_src_register src_swizzled = 8507 swizzle_src(&swz->tmp_src, 8508 swz_r < PIPE_SWIZZLE_0 ? swz_r : PIPE_SWIZZLE_X, 8509 swz_g < PIPE_SWIZZLE_0 ? swz_g : PIPE_SWIZZLE_Y, 8510 swz_b < PIPE_SWIZZLE_0 ? swz_b : PIPE_SWIZZLE_Z, 8511 swz_a < PIPE_SWIZZLE_0 ? swz_a : PIPE_SWIZZLE_W); 8512 8513 /* MOV dst, color(tmp).<swizzle> */ 8514 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 8515 swz->inst_dst, &src_swizzled); 8516 8517 /* handle swizzle zero terms */ 8518 writemask_0 = (((swz_r == PIPE_SWIZZLE_0) << 0) | 8519 ((swz_g == PIPE_SWIZZLE_0) << 1) | 8520 ((swz_b == PIPE_SWIZZLE_0) << 2) | 8521 ((swz_a == PIPE_SWIZZLE_0) << 3)); 8522 writemask_0 &= swz->inst_dst->Register.WriteMask; 8523 8524 if (writemask_0) { 8525 struct tgsi_full_src_register zero = int_tex ? 8526 make_immediate_reg_int(emit, 0) : 8527 make_immediate_reg_float(emit, 0.0f); 8528 struct tgsi_full_dst_register dst = 8529 writemask_dst(swz->inst_dst, writemask_0); 8530 8531 /* MOV dst.writemask_0, {0,0,0,0} */ 8532 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &zero); 8533 } 8534 8535 /* handle swizzle one terms */ 8536 writemask_1 = (((swz_r == PIPE_SWIZZLE_1) << 0) | 8537 ((swz_g == PIPE_SWIZZLE_1) << 1) | 8538 ((swz_b == PIPE_SWIZZLE_1) << 2) | 8539 ((swz_a == PIPE_SWIZZLE_1) << 3)); 8540 writemask_1 &= swz->inst_dst->Register.WriteMask; 8541 8542 if (writemask_1) { 8543 struct tgsi_full_src_register one = int_tex ? 8544 make_immediate_reg_int(emit, 1) : 8545 make_immediate_reg_float(emit, 1.0f); 8546 struct tgsi_full_dst_register dst = 8547 writemask_dst(swz->inst_dst, writemask_1); 8548 8549 /* MOV dst.writemask_1, {1,1,1,1} */ 8550 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &one); 8551 } 8552 } 8553} 8554 8555 8556/** 8557 * Emit code for TGSI_OPCODE_SAMPLE instruction. 8558 */ 8559static boolean 8560emit_sample(struct svga_shader_emitter_v10 *emit, 8561 const struct tgsi_full_instruction *inst) 8562{ 8563 const unsigned resource_unit = inst->Src[1].Register.Index; 8564 const unsigned sampler_unit = inst->Src[2].Register.Index; 8565 struct tgsi_full_src_register coord; 8566 int offsets[3]; 8567 struct tex_swizzle_info swz_info; 8568 8569 begin_tex_swizzle(emit, sampler_unit, inst, FALSE, &swz_info); 8570 8571 get_texel_offsets(emit, inst, offsets); 8572 8573 coord = setup_texcoord(emit, resource_unit, &inst->Src[0]); 8574 8575 /* SAMPLE dst, coord(s0), resource, sampler */ 8576 begin_emit_instruction(emit); 8577 8578 /* NOTE: for non-fragment shaders, we should use VGPU10_OPCODE_SAMPLE_L 8579 * with LOD=0. But our virtual GPU accepts this as-is. 8580 */ 8581 emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE, 8582 inst->Instruction.Saturate, offsets); 8583 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 8584 emit_src_register(emit, &coord); 8585 emit_resource_register(emit, resource_unit); 8586 emit_sampler_register(emit, sampler_unit); 8587 end_emit_instruction(emit); 8588 8589 end_tex_swizzle(emit, &swz_info); 8590 8591 free_temp_indexes(emit); 8592 8593 return TRUE; 8594} 8595 8596 8597/** 8598 * Check if a texture instruction is valid. 8599 * An example of an invalid texture instruction is doing shadow comparison 8600 * with an integer-valued texture. 8601 * If we detect an invalid texture instruction, we replace it with: 8602 * MOV dst, {1,1,1,1}; 8603 * \return TRUE if valid, FALSE if invalid. 8604 */ 8605static boolean 8606is_valid_tex_instruction(struct svga_shader_emitter_v10 *emit, 8607 const struct tgsi_full_instruction *inst) 8608{ 8609 const unsigned unit = inst->Src[1].Register.Index; 8610 const enum tgsi_texture_type target = inst->Texture.Texture; 8611 boolean valid = TRUE; 8612 8613 if (tgsi_is_shadow_target(target) && 8614 is_integer_type(emit->sampler_return_type[unit])) { 8615 debug_printf("Invalid SAMPLE_C with an integer texture!\n"); 8616 valid = FALSE; 8617 } 8618 /* XXX might check for other conditions in the future here */ 8619 8620 if (!valid) { 8621 /* emit a MOV dst, {1,1,1,1} instruction. */ 8622 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 8623 begin_emit_instruction(emit); 8624 emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE); 8625 emit_dst_register(emit, &inst->Dst[0]); 8626 emit_src_register(emit, &one); 8627 end_emit_instruction(emit); 8628 } 8629 8630 return valid; 8631} 8632 8633 8634/** 8635 * Emit code for TGSI_OPCODE_TEX (simple texture lookup) 8636 */ 8637static boolean 8638emit_tex(struct svga_shader_emitter_v10 *emit, 8639 const struct tgsi_full_instruction *inst) 8640{ 8641 const uint unit = inst->Src[1].Register.Index; 8642 const enum tgsi_texture_type target = inst->Texture.Texture; 8643 VGPU10_OPCODE_TYPE opcode; 8644 struct tgsi_full_src_register coord; 8645 int offsets[3]; 8646 struct tex_swizzle_info swz_info; 8647 boolean compare_in_shader; 8648 8649 /* check that the sampler returns a float */ 8650 if (!is_valid_tex_instruction(emit, inst)) 8651 return TRUE; 8652 8653 compare_in_shader = tgsi_is_shadow_target(target) && 8654 emit->key.tex[unit].compare_in_shader; 8655 8656 begin_tex_swizzle(emit, unit, inst, compare_in_shader, &swz_info); 8657 8658 get_texel_offsets(emit, inst, offsets); 8659 8660 coord = setup_texcoord(emit, unit, &inst->Src[0]); 8661 8662 /* SAMPLE dst, coord(s0), resource, sampler */ 8663 begin_emit_instruction(emit); 8664 8665 if (tgsi_is_shadow_target(target) && !compare_in_shader) 8666 opcode = VGPU10_OPCODE_SAMPLE_C; 8667 else 8668 opcode = VGPU10_OPCODE_SAMPLE; 8669 8670 emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets); 8671 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 8672 emit_src_register(emit, &coord); 8673 emit_resource_register(emit, unit); 8674 emit_sampler_register(emit, unit); 8675 if (opcode == VGPU10_OPCODE_SAMPLE_C) { 8676 emit_tex_compare_refcoord(emit, target, &coord); 8677 } 8678 end_emit_instruction(emit); 8679 8680 end_tex_swizzle(emit, &swz_info); 8681 8682 free_temp_indexes(emit); 8683 8684 return TRUE; 8685} 8686 8687/** 8688 * Emit code for TGSI_OPCODE_TG4 (texture lookup for texture gather) 8689 */ 8690static boolean 8691emit_tg4(struct svga_shader_emitter_v10 *emit, 8692 const struct tgsi_full_instruction *inst) 8693{ 8694 const uint unit = inst->Src[2].Register.Index; 8695 struct tgsi_full_src_register src; 8696 struct tgsi_full_src_register offset_src, sampler, ref; 8697 int offsets[3]; 8698 8699 /* check that the sampler returns a float */ 8700 if (!is_valid_tex_instruction(emit, inst)) 8701 return TRUE; 8702 8703 if (emit->version >= 50) { 8704 unsigned target = inst->Texture.Texture; 8705 int index = inst->Src[1].Register.Index; 8706 const union tgsi_immediate_data *imm = emit->immediates[index]; 8707 int select_comp = imm[inst->Src[1].Register.SwizzleX].Int; 8708 unsigned select_swizzle = PIPE_SWIZZLE_X; 8709 8710 if (!tgsi_is_shadow_target(target)) { 8711 switch (select_comp) { 8712 case 0: 8713 select_swizzle = emit->key.tex[unit].swizzle_r; 8714 break; 8715 case 1: 8716 select_swizzle = emit->key.tex[unit].swizzle_g; 8717 break; 8718 case 2: 8719 select_swizzle = emit->key.tex[unit].swizzle_b; 8720 break; 8721 case 3: 8722 select_swizzle = emit->key.tex[unit].swizzle_a; 8723 break; 8724 default: 8725 assert(!"Unexpected component in texture gather swizzle"); 8726 } 8727 } 8728 else { 8729 select_swizzle = emit->key.tex[unit].swizzle_r; 8730 } 8731 8732 if (select_swizzle == PIPE_SWIZZLE_1) { 8733 src = make_immediate_reg_float(emit, 1.0); 8734 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src); 8735 return TRUE; 8736 } 8737 else if (select_swizzle == PIPE_SWIZZLE_0) { 8738 src = make_immediate_reg_float(emit, 0.0); 8739 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src); 8740 return TRUE; 8741 } 8742 8743 src = setup_texcoord(emit, unit, &inst->Src[0]); 8744 8745 /* GATHER4 dst, coord, resource, sampler */ 8746 /* GATHER4_C dst, coord, resource, sampler ref */ 8747 /* GATHER4_PO dst, coord, offset resource, sampler */ 8748 /* GATHER4_PO_C dst, coord, offset resource, sampler, ref */ 8749 begin_emit_instruction(emit); 8750 if (inst->Texture.NumOffsets == 1) { 8751 if (tgsi_is_shadow_target(target)) { 8752 emit_opcode(emit, VGPU10_OPCODE_GATHER4_PO_C, 8753 inst->Instruction.Saturate); 8754 } 8755 else { 8756 emit_opcode(emit, VGPU10_OPCODE_GATHER4_PO, 8757 inst->Instruction.Saturate); 8758 } 8759 } 8760 else { 8761 if (tgsi_is_shadow_target(target)) { 8762 emit_opcode(emit, VGPU10_OPCODE_GATHER4_C, 8763 inst->Instruction.Saturate); 8764 } 8765 else { 8766 emit_opcode(emit, VGPU10_OPCODE_GATHER4, 8767 inst->Instruction.Saturate); 8768 } 8769 } 8770 8771 emit_dst_register(emit, &inst->Dst[0]); 8772 emit_src_register(emit, &src); 8773 if (inst->Texture.NumOffsets == 1) { 8774 /* offset */ 8775 offset_src = make_src_reg(inst->TexOffsets[0].File, 8776 inst->TexOffsets[0].Index); 8777 offset_src = swizzle_src(&offset_src, inst->TexOffsets[0].SwizzleX, 8778 inst->TexOffsets[0].SwizzleY, 8779 inst->TexOffsets[0].SwizzleZ, 8780 TGSI_SWIZZLE_W); 8781 emit_src_register(emit, &offset_src); 8782 } 8783 8784 /* resource */ 8785 emit_resource_register(emit, unit); 8786 8787 /* sampler */ 8788 sampler = make_src_reg(TGSI_FILE_SAMPLER, 8789 emit->key.tex[unit].sampler_index); 8790 sampler.Register.SwizzleX = 8791 sampler.Register.SwizzleY = 8792 sampler.Register.SwizzleZ = 8793 sampler.Register.SwizzleW = select_swizzle; 8794 emit_src_register(emit, &sampler); 8795 8796 if (tgsi_is_shadow_target(target)) { 8797 /* ref */ 8798 if (target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { 8799 ref = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X); 8800 emit_tex_compare_refcoord(emit, target, &ref); 8801 } 8802 else { 8803 emit_tex_compare_refcoord(emit, target, &src); 8804 } 8805 } 8806 8807 end_emit_instruction(emit); 8808 free_temp_indexes(emit); 8809 } 8810 else { 8811 /* Only a single channel is supported in SM4_1 and we report 8812 * PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS = 1. 8813 * Only the 0th component will be gathered. 8814 */ 8815 switch (emit->key.tex[unit].swizzle_r) { 8816 case PIPE_SWIZZLE_X: 8817 get_texel_offsets(emit, inst, offsets); 8818 src = setup_texcoord(emit, unit, &inst->Src[0]); 8819 8820 /* Gather dst, coord, resource, sampler */ 8821 begin_emit_instruction(emit); 8822 emit_sample_opcode(emit, VGPU10_OPCODE_GATHER4, 8823 inst->Instruction.Saturate, offsets); 8824 emit_dst_register(emit, &inst->Dst[0]); 8825 emit_src_register(emit, &src); 8826 emit_resource_register(emit, unit); 8827 8828 /* sampler */ 8829 sampler = make_src_reg(TGSI_FILE_SAMPLER, 8830 emit->key.tex[unit].sampler_index); 8831 sampler.Register.SwizzleX = 8832 sampler.Register.SwizzleY = 8833 sampler.Register.SwizzleZ = 8834 sampler.Register.SwizzleW = PIPE_SWIZZLE_X; 8835 emit_src_register(emit, &sampler); 8836 8837 end_emit_instruction(emit); 8838 break; 8839 case PIPE_SWIZZLE_W: 8840 case PIPE_SWIZZLE_1: 8841 src = make_immediate_reg_float(emit, 1.0); 8842 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src); 8843 break; 8844 case PIPE_SWIZZLE_Y: 8845 case PIPE_SWIZZLE_Z: 8846 case PIPE_SWIZZLE_0: 8847 default: 8848 src = make_immediate_reg_float(emit, 0.0); 8849 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src); 8850 break; 8851 } 8852 } 8853 8854 return TRUE; 8855} 8856 8857 8858 8859/** 8860 * Emit code for TGSI_OPCODE_TEX2 (texture lookup for shadow cube map arrays) 8861 */ 8862static boolean 8863emit_tex2(struct svga_shader_emitter_v10 *emit, 8864 const struct tgsi_full_instruction *inst) 8865{ 8866 const uint unit = inst->Src[2].Register.Index; 8867 unsigned target = inst->Texture.Texture; 8868 struct tgsi_full_src_register coord, ref; 8869 int offsets[3]; 8870 struct tex_swizzle_info swz_info; 8871 VGPU10_OPCODE_TYPE opcode; 8872 boolean compare_in_shader; 8873 8874 /* check that the sampler returns a float */ 8875 if (!is_valid_tex_instruction(emit, inst)) 8876 return TRUE; 8877 8878 compare_in_shader = emit->key.tex[unit].compare_in_shader; 8879 if (compare_in_shader) 8880 opcode = VGPU10_OPCODE_SAMPLE; 8881 else 8882 opcode = VGPU10_OPCODE_SAMPLE_C; 8883 8884 begin_tex_swizzle(emit, unit, inst, compare_in_shader, &swz_info); 8885 8886 get_texel_offsets(emit, inst, offsets); 8887 8888 coord = setup_texcoord(emit, unit, &inst->Src[0]); 8889 ref = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X); 8890 8891 /* SAMPLE_C dst, coord, resource, sampler, ref */ 8892 begin_emit_instruction(emit); 8893 emit_sample_opcode(emit, opcode, 8894 inst->Instruction.Saturate, offsets); 8895 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 8896 emit_src_register(emit, &coord); 8897 emit_resource_register(emit, unit); 8898 emit_sampler_register(emit, unit); 8899 if (opcode == VGPU10_OPCODE_SAMPLE_C) { 8900 emit_tex_compare_refcoord(emit, target, &ref); 8901 } 8902 end_emit_instruction(emit); 8903 8904 end_tex_swizzle(emit, &swz_info); 8905 8906 free_temp_indexes(emit); 8907 8908 return TRUE; 8909} 8910 8911 8912/** 8913 * Emit code for TGSI_OPCODE_TXP (projective texture) 8914 */ 8915static boolean 8916emit_txp(struct svga_shader_emitter_v10 *emit, 8917 const struct tgsi_full_instruction *inst) 8918{ 8919 const uint unit = inst->Src[1].Register.Index; 8920 const enum tgsi_texture_type target = inst->Texture.Texture; 8921 VGPU10_OPCODE_TYPE opcode; 8922 int offsets[3]; 8923 unsigned tmp = get_temp_index(emit); 8924 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 8925 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 8926 struct tgsi_full_src_register src0_wwww = 8927 scalar_src(&inst->Src[0], TGSI_SWIZZLE_W); 8928 struct tgsi_full_src_register coord; 8929 struct tex_swizzle_info swz_info; 8930 boolean compare_in_shader; 8931 8932 /* check that the sampler returns a float */ 8933 if (!is_valid_tex_instruction(emit, inst)) 8934 return TRUE; 8935 8936 compare_in_shader = tgsi_is_shadow_target(target) && 8937 emit->key.tex[unit].compare_in_shader; 8938 8939 begin_tex_swizzle(emit, unit, inst, compare_in_shader, &swz_info); 8940 8941 get_texel_offsets(emit, inst, offsets); 8942 8943 coord = setup_texcoord(emit, unit, &inst->Src[0]); 8944 8945 /* DIV tmp, coord, coord.wwww */ 8946 emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &tmp_dst, 8947 &coord, &src0_wwww); 8948 8949 /* SAMPLE dst, coord(tmp), resource, sampler */ 8950 begin_emit_instruction(emit); 8951 8952 if (tgsi_is_shadow_target(target) && !compare_in_shader) 8953 /* NOTE: for non-fragment shaders, we should use 8954 * VGPU10_OPCODE_SAMPLE_C_LZ, but our virtual GPU accepts this as-is. 8955 */ 8956 opcode = VGPU10_OPCODE_SAMPLE_C; 8957 else 8958 opcode = VGPU10_OPCODE_SAMPLE; 8959 8960 emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets); 8961 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 8962 emit_src_register(emit, &tmp_src); /* projected coord */ 8963 emit_resource_register(emit, unit); 8964 emit_sampler_register(emit, unit); 8965 if (opcode == VGPU10_OPCODE_SAMPLE_C) { 8966 emit_tex_compare_refcoord(emit, target, &tmp_src); 8967 } 8968 end_emit_instruction(emit); 8969 8970 end_tex_swizzle(emit, &swz_info); 8971 8972 free_temp_indexes(emit); 8973 8974 return TRUE; 8975} 8976 8977 8978/** 8979 * Emit code for TGSI_OPCODE_TXD (explicit derivatives) 8980 */ 8981static boolean 8982emit_txd(struct svga_shader_emitter_v10 *emit, 8983 const struct tgsi_full_instruction *inst) 8984{ 8985 const uint unit = inst->Src[3].Register.Index; 8986 const enum tgsi_texture_type target = inst->Texture.Texture; 8987 int offsets[3]; 8988 struct tgsi_full_src_register coord; 8989 struct tex_swizzle_info swz_info; 8990 8991 begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target), 8992 &swz_info); 8993 8994 get_texel_offsets(emit, inst, offsets); 8995 8996 coord = setup_texcoord(emit, unit, &inst->Src[0]); 8997 8998 /* SAMPLE_D dst, coord(s0), resource, sampler, Xderiv(s1), Yderiv(s2) */ 8999 begin_emit_instruction(emit); 9000 emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE_D, 9001 inst->Instruction.Saturate, offsets); 9002 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 9003 emit_src_register(emit, &coord); 9004 emit_resource_register(emit, unit); 9005 emit_sampler_register(emit, unit); 9006 emit_src_register(emit, &inst->Src[1]); /* Xderiv */ 9007 emit_src_register(emit, &inst->Src[2]); /* Yderiv */ 9008 end_emit_instruction(emit); 9009 9010 end_tex_swizzle(emit, &swz_info); 9011 9012 free_temp_indexes(emit); 9013 9014 return TRUE; 9015} 9016 9017 9018/** 9019 * Emit code for TGSI_OPCODE_TXF (texel fetch) 9020 */ 9021static boolean 9022emit_txf(struct svga_shader_emitter_v10 *emit, 9023 const struct tgsi_full_instruction *inst) 9024{ 9025 const uint unit = inst->Src[1].Register.Index; 9026 const boolean msaa = tgsi_is_msaa_target(inst->Texture.Texture) 9027 && emit->key.tex[unit].num_samples > 1; 9028 int offsets[3]; 9029 struct tex_swizzle_info swz_info; 9030 9031 begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info); 9032 9033 get_texel_offsets(emit, inst, offsets); 9034 9035 if (msaa) { 9036 assert(emit->key.tex[unit].num_samples > 1); 9037 9038 /* Fetch one sample from an MSAA texture */ 9039 struct tgsi_full_src_register sampleIndex = 9040 scalar_src(&inst->Src[0], TGSI_SWIZZLE_W); 9041 /* LD_MS dst, coord(s0), resource, sampleIndex */ 9042 begin_emit_instruction(emit); 9043 emit_sample_opcode(emit, VGPU10_OPCODE_LD_MS, 9044 inst->Instruction.Saturate, offsets); 9045 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 9046 emit_src_register(emit, &inst->Src[0]); 9047 emit_resource_register(emit, unit); 9048 emit_src_register(emit, &sampleIndex); 9049 end_emit_instruction(emit); 9050 } 9051 else { 9052 /* Fetch one texel specified by integer coordinate */ 9053 /* LD dst, coord(s0), resource */ 9054 begin_emit_instruction(emit); 9055 emit_sample_opcode(emit, VGPU10_OPCODE_LD, 9056 inst->Instruction.Saturate, offsets); 9057 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 9058 emit_src_register(emit, &inst->Src[0]); 9059 emit_resource_register(emit, unit); 9060 end_emit_instruction(emit); 9061 } 9062 9063 end_tex_swizzle(emit, &swz_info); 9064 9065 free_temp_indexes(emit); 9066 9067 return TRUE; 9068} 9069 9070 9071/** 9072 * Emit code for TGSI_OPCODE_TXL (explicit LOD) or TGSI_OPCODE_TXB (LOD bias) 9073 * or TGSI_OPCODE_TXB2 (for cube shadow maps). 9074 */ 9075static boolean 9076emit_txl_txb(struct svga_shader_emitter_v10 *emit, 9077 const struct tgsi_full_instruction *inst) 9078{ 9079 const enum tgsi_texture_type target = inst->Texture.Texture; 9080 VGPU10_OPCODE_TYPE opcode; 9081 unsigned unit; 9082 int offsets[3]; 9083 struct tgsi_full_src_register coord, lod_bias; 9084 struct tex_swizzle_info swz_info; 9085 9086 assert(inst->Instruction.Opcode == TGSI_OPCODE_TXL || 9087 inst->Instruction.Opcode == TGSI_OPCODE_TXB || 9088 inst->Instruction.Opcode == TGSI_OPCODE_TXB2); 9089 9090 if (inst->Instruction.Opcode == TGSI_OPCODE_TXB2) { 9091 lod_bias = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X); 9092 unit = inst->Src[2].Register.Index; 9093 } 9094 else { 9095 lod_bias = scalar_src(&inst->Src[0], TGSI_SWIZZLE_W); 9096 unit = inst->Src[1].Register.Index; 9097 } 9098 9099 begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target), 9100 &swz_info); 9101 9102 get_texel_offsets(emit, inst, offsets); 9103 9104 coord = setup_texcoord(emit, unit, &inst->Src[0]); 9105 9106 /* SAMPLE_L/B dst, coord(s0), resource, sampler, lod(s3) */ 9107 begin_emit_instruction(emit); 9108 if (inst->Instruction.Opcode == TGSI_OPCODE_TXL) { 9109 opcode = VGPU10_OPCODE_SAMPLE_L; 9110 } 9111 else { 9112 opcode = VGPU10_OPCODE_SAMPLE_B; 9113 } 9114 emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets); 9115 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 9116 emit_src_register(emit, &coord); 9117 emit_resource_register(emit, unit); 9118 emit_sampler_register(emit, unit); 9119 emit_src_register(emit, &lod_bias); 9120 end_emit_instruction(emit); 9121 9122 end_tex_swizzle(emit, &swz_info); 9123 9124 free_temp_indexes(emit); 9125 9126 return TRUE; 9127} 9128 9129 9130/** 9131 * Emit code for TGSI_OPCODE_TXL2 (explicit LOD) for cubemap array. 9132 */ 9133static boolean 9134emit_txl2(struct svga_shader_emitter_v10 *emit, 9135 const struct tgsi_full_instruction *inst) 9136{ 9137 unsigned target = inst->Texture.Texture; 9138 unsigned opcode, unit; 9139 int offsets[3]; 9140 struct tgsi_full_src_register coord, lod; 9141 struct tex_swizzle_info swz_info; 9142 9143 assert(inst->Instruction.Opcode == TGSI_OPCODE_TXL2); 9144 9145 lod = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X); 9146 unit = inst->Src[2].Register.Index; 9147 9148 begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target), 9149 &swz_info); 9150 9151 get_texel_offsets(emit, inst, offsets); 9152 9153 coord = setup_texcoord(emit, unit, &inst->Src[0]); 9154 9155 /* SAMPLE_L dst, coord(s0), resource, sampler, lod(s3) */ 9156 begin_emit_instruction(emit); 9157 opcode = VGPU10_OPCODE_SAMPLE_L; 9158 emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets); 9159 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 9160 emit_src_register(emit, &coord); 9161 emit_resource_register(emit, unit); 9162 emit_sampler_register(emit, unit); 9163 emit_src_register(emit, &lod); 9164 end_emit_instruction(emit); 9165 9166 end_tex_swizzle(emit, &swz_info); 9167 9168 free_temp_indexes(emit); 9169 9170 return TRUE; 9171} 9172 9173 9174/** 9175 * Emit code for TGSI_OPCODE_TXQ (texture query) instruction. 9176 */ 9177static boolean 9178emit_txq(struct svga_shader_emitter_v10 *emit, 9179 const struct tgsi_full_instruction *inst) 9180{ 9181 const uint unit = inst->Src[1].Register.Index; 9182 9183 if (emit->key.tex[unit].target == PIPE_BUFFER) { 9184 /* RESINFO does not support querying texture buffers, so we instead 9185 * store texture buffer sizes in shader constants, then copy them to 9186 * implement TXQ instead of emitting RESINFO. 9187 * MOV dst, const[texture_buffer_size_index[unit]] 9188 */ 9189 struct tgsi_full_src_register size_src = 9190 make_src_const_reg(emit->texture_buffer_size_index[unit]); 9191 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &size_src); 9192 } else { 9193 /* RESINFO dst, srcMipLevel, resource */ 9194 begin_emit_instruction(emit); 9195 emit_opcode_resinfo(emit, VGPU10_RESINFO_RETURN_UINT); 9196 emit_dst_register(emit, &inst->Dst[0]); 9197 emit_src_register(emit, &inst->Src[0]); 9198 emit_resource_register(emit, unit); 9199 end_emit_instruction(emit); 9200 } 9201 9202 free_temp_indexes(emit); 9203 9204 return TRUE; 9205} 9206 9207 9208/** 9209 * Does this opcode produce a double-precision result? 9210 * XXX perhaps move this to a TGSI utility. 9211 */ 9212static bool 9213opcode_has_dbl_dst(unsigned opcode) 9214{ 9215 switch (opcode) { 9216 case TGSI_OPCODE_F2D: 9217 case TGSI_OPCODE_DABS: 9218 case TGSI_OPCODE_DADD: 9219 case TGSI_OPCODE_DFRAC: 9220 case TGSI_OPCODE_DMAX: 9221 case TGSI_OPCODE_DMIN: 9222 case TGSI_OPCODE_DMUL: 9223 case TGSI_OPCODE_DNEG: 9224 case TGSI_OPCODE_I2D: 9225 case TGSI_OPCODE_U2D: 9226 case TGSI_OPCODE_DFMA: 9227 // XXX more TBD 9228 return true; 9229 default: 9230 return false; 9231 } 9232} 9233 9234 9235/** 9236 * Does this opcode use double-precision source registers? 9237 */ 9238static bool 9239opcode_has_dbl_src(unsigned opcode) 9240{ 9241 switch (opcode) { 9242 case TGSI_OPCODE_D2F: 9243 case TGSI_OPCODE_DABS: 9244 case TGSI_OPCODE_DADD: 9245 case TGSI_OPCODE_DFRAC: 9246 case TGSI_OPCODE_DMAX: 9247 case TGSI_OPCODE_DMIN: 9248 case TGSI_OPCODE_DMUL: 9249 case TGSI_OPCODE_DNEG: 9250 case TGSI_OPCODE_D2I: 9251 case TGSI_OPCODE_D2U: 9252 case TGSI_OPCODE_DFMA: 9253 case TGSI_OPCODE_DSLT: 9254 case TGSI_OPCODE_DSGE: 9255 case TGSI_OPCODE_DSEQ: 9256 case TGSI_OPCODE_DSNE: 9257 case TGSI_OPCODE_DRCP: 9258 case TGSI_OPCODE_DSQRT: 9259 case TGSI_OPCODE_DMAD: 9260 case TGSI_OPCODE_DLDEXP: 9261 case TGSI_OPCODE_DFRACEXP: 9262 case TGSI_OPCODE_DRSQ: 9263 case TGSI_OPCODE_DTRUNC: 9264 case TGSI_OPCODE_DCEIL: 9265 case TGSI_OPCODE_DFLR: 9266 case TGSI_OPCODE_DROUND: 9267 case TGSI_OPCODE_DSSG: 9268 return true; 9269 default: 9270 return false; 9271 } 9272} 9273 9274 9275/** 9276 * Check that the swizzle for reading from a double-precision register 9277 * is valid. If not valid, move the source to a temporary register first. 9278 */ 9279static struct tgsi_full_src_register 9280check_double_src(struct svga_shader_emitter_v10 *emit, 9281 const struct tgsi_full_src_register *reg) 9282{ 9283 struct tgsi_full_src_register src; 9284 9285 if (((reg->Register.SwizzleX == PIPE_SWIZZLE_X && 9286 reg->Register.SwizzleY == PIPE_SWIZZLE_Y) || 9287 (reg->Register.SwizzleX == PIPE_SWIZZLE_Z && 9288 reg->Register.SwizzleY == PIPE_SWIZZLE_W)) && 9289 ((reg->Register.SwizzleZ == PIPE_SWIZZLE_X && 9290 reg->Register.SwizzleW == PIPE_SWIZZLE_Y) || 9291 (reg->Register.SwizzleZ == PIPE_SWIZZLE_Z && 9292 reg->Register.SwizzleW == PIPE_SWIZZLE_W))) { 9293 src = *reg; 9294 } else { 9295 /* move the src to a temporary to fix the swizzle */ 9296 unsigned tmp = get_temp_index(emit); 9297 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 9298 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 9299 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &tmp_dst, reg); 9300 src = tmp_src; 9301 9302 /* The temporary index will be released in the caller */ 9303 } 9304 return src; 9305} 9306 9307/** 9308 * Check that the writemask for a double-precision instruction is valid. 9309 */ 9310static void 9311check_double_dst_writemask(const struct tgsi_full_instruction *inst) 9312{ 9313 ASSERTED unsigned writemask = inst->Dst[0].Register.WriteMask; 9314 9315 switch (inst->Instruction.Opcode) { 9316 case TGSI_OPCODE_DABS: 9317 case TGSI_OPCODE_DADD: 9318 case TGSI_OPCODE_DFRAC: 9319 case TGSI_OPCODE_DNEG: 9320 case TGSI_OPCODE_DMAD: 9321 case TGSI_OPCODE_DMAX: 9322 case TGSI_OPCODE_DMIN: 9323 case TGSI_OPCODE_DMUL: 9324 case TGSI_OPCODE_DRCP: 9325 case TGSI_OPCODE_DSQRT: 9326 case TGSI_OPCODE_F2D: 9327 case TGSI_OPCODE_DFMA: 9328 assert(writemask == TGSI_WRITEMASK_XYZW || 9329 writemask == TGSI_WRITEMASK_XY || 9330 writemask == TGSI_WRITEMASK_ZW); 9331 break; 9332 case TGSI_OPCODE_DSEQ: 9333 case TGSI_OPCODE_DSGE: 9334 case TGSI_OPCODE_DSNE: 9335 case TGSI_OPCODE_DSLT: 9336 case TGSI_OPCODE_D2I: 9337 case TGSI_OPCODE_D2U: 9338 /* Write to 1 or 2 components only */ 9339 assert(util_bitcount(writemask) <= 2); 9340 break; 9341 default: 9342 /* XXX this list may be incomplete */ 9343 ; 9344 } 9345} 9346 9347 9348/** 9349 * Double-precision absolute value. 9350 */ 9351static boolean 9352emit_dabs(struct svga_shader_emitter_v10 *emit, 9353 const struct tgsi_full_instruction *inst) 9354{ 9355 assert(emit->version >= 50); 9356 9357 struct tgsi_full_src_register src = check_double_src(emit, &inst->Src[0]); 9358 check_double_dst_writemask(inst); 9359 9360 struct tgsi_full_src_register abs_src = absolute_src(&src); 9361 9362 /* DMOV dst, |src| */ 9363 emit_instruction_op1(emit, VGPU10_OPCODE_DMOV, &inst->Dst[0], &abs_src); 9364 9365 free_temp_indexes(emit); 9366 return TRUE; 9367} 9368 9369 9370/** 9371 * Double-precision negation 9372 */ 9373static boolean 9374emit_dneg(struct svga_shader_emitter_v10 *emit, 9375 const struct tgsi_full_instruction *inst) 9376{ 9377 assert(emit->version >= 50); 9378 struct tgsi_full_src_register src = check_double_src(emit, &inst->Src[0]); 9379 check_double_dst_writemask(inst); 9380 9381 struct tgsi_full_src_register neg_src = negate_src(&src); 9382 9383 /* DMOV dst, -src */ 9384 emit_instruction_op1(emit, VGPU10_OPCODE_DMOV, &inst->Dst[0], &neg_src); 9385 9386 free_temp_indexes(emit); 9387 return TRUE; 9388} 9389 9390 9391/** 9392 * SM5 has no DMAD opcode. Implement negation with DMUL/DADD. 9393 */ 9394static boolean 9395emit_dmad(struct svga_shader_emitter_v10 *emit, 9396 const struct tgsi_full_instruction *inst) 9397{ 9398 assert(emit->version >= 50); 9399 struct tgsi_full_src_register src0 = check_double_src(emit, &inst->Src[0]); 9400 struct tgsi_full_src_register src1 = check_double_src(emit, &inst->Src[1]); 9401 struct tgsi_full_src_register src2 = check_double_src(emit, &inst->Src[2]); 9402 check_double_dst_writemask(inst); 9403 9404 unsigned tmp = get_temp_index(emit); 9405 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 9406 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 9407 9408 /* DMUL tmp, src[0], src[1] */ 9409 emit_instruction_opn(emit, VGPU10_OPCODE_DMUL, 9410 &tmp_dst, &src0, &src1, NULL, 9411 FALSE, inst->Instruction.Precise); 9412 9413 /* DADD dst, tmp, src[2] */ 9414 emit_instruction_opn(emit, VGPU10_OPCODE_DADD, 9415 &inst->Dst[0], &tmp_src, &src2, NULL, 9416 inst->Instruction.Saturate, inst->Instruction.Precise); 9417 free_temp_indexes(emit); 9418 9419 return TRUE; 9420} 9421 9422 9423/** 9424 * Double precision reciprocal square root 9425 */ 9426static boolean 9427emit_drsq(struct svga_shader_emitter_v10 *emit, 9428 const struct tgsi_full_dst_register *dst, 9429 const struct tgsi_full_src_register *src) 9430{ 9431 assert(emit->version >= 50); 9432 9433 VGPU10OpcodeToken0 token0; 9434 struct tgsi_full_src_register dsrc = check_double_src(emit, src); 9435 9436 begin_emit_instruction(emit); 9437 9438 token0.value = 0; 9439 token0.opcodeType = VGPU10_OPCODE_VMWARE; 9440 token0.vmwareOpcodeType = VGPU10_VMWARE_OPCODE_DRSQ; 9441 emit_dword(emit, token0.value); 9442 emit_dst_register(emit, dst); 9443 emit_src_register(emit, &dsrc); 9444 end_emit_instruction(emit); 9445 9446 free_temp_indexes(emit); 9447 9448 return TRUE; 9449} 9450 9451 9452/** 9453 * There is no SM5 opcode for double precision square root. 9454 * It will be implemented with DRSQ. 9455 * dst = src * DRSQ(src) 9456 */ 9457static boolean 9458emit_dsqrt(struct svga_shader_emitter_v10 *emit, 9459 const struct tgsi_full_instruction *inst) 9460{ 9461 assert(emit->version >= 50); 9462 9463 struct tgsi_full_src_register src = check_double_src(emit, &inst->Src[0]); 9464 9465 /* temporary register to hold the source */ 9466 unsigned tmp = get_temp_index(emit); 9467 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 9468 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 9469 9470 /* temporary register to hold the DEQ result */ 9471 unsigned tmp_cond = get_temp_index(emit); 9472 struct tgsi_full_dst_register tmp_cond_dst = make_dst_temp_reg(tmp_cond); 9473 struct tgsi_full_dst_register tmp_cond_dst_xy = 9474 writemask_dst(&tmp_cond_dst, TGSI_WRITEMASK_X | TGSI_WRITEMASK_Y); 9475 struct tgsi_full_src_register tmp_cond_src = make_src_temp_reg(tmp_cond); 9476 struct tgsi_full_src_register tmp_cond_src_xy = 9477 swizzle_src(&tmp_cond_src, 9478 PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, 9479 PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y); 9480 9481 /* The reciprocal square root of zero yields INF. 9482 * So if the source is 0, we replace it with 1 in the tmp register. 9483 * The later multiplication of zero in the original source will yield 0 9484 * in the result. 9485 */ 9486 9487 /* tmp1 = (src == 0) ? 1 : src; 9488 * EQ tmp1, 0, src 9489 * MOVC tmp, tmp1, 1.0, src 9490 */ 9491 struct tgsi_full_src_register zero = 9492 make_immediate_reg_double(emit, 0); 9493 9494 struct tgsi_full_src_register one = 9495 make_immediate_reg_double(emit, 1.0); 9496 9497 emit_instruction_op2(emit, VGPU10_OPCODE_DEQ, &tmp_cond_dst_xy, 9498 &zero, &src); 9499 emit_instruction_op3(emit, VGPU10_OPCODE_DMOVC, &tmp_dst, 9500 &tmp_cond_src_xy, &one, &src); 9501 9502 struct tgsi_full_dst_register tmp_rsq_dst = make_dst_temp_reg(tmp); 9503 struct tgsi_full_src_register tmp_rsq_src = make_src_temp_reg(tmp); 9504 9505 /* DRSQ tmp_rsq, tmp */ 9506 emit_drsq(emit, &tmp_rsq_dst, &tmp_src); 9507 9508 /* DMUL dst, tmp_rsq, src[0] */ 9509 emit_instruction_op2(emit, VGPU10_OPCODE_DMUL, &inst->Dst[0], 9510 &tmp_rsq_src, &src); 9511 9512 free_temp_indexes(emit); 9513 9514 return TRUE; 9515} 9516 9517 9518/** 9519 * glsl-nir path does not lower DTRUNC, so we need to 9520 * add the translation here. 9521 * 9522 * frac = DFRAC(src) 9523 * tmp = src - frac 9524 * dst = src >= 0 ? tmp : (tmp + (frac==0 ? 0 : 1)) 9525 */ 9526static boolean 9527emit_dtrunc(struct svga_shader_emitter_v10 *emit, 9528 const struct tgsi_full_instruction *inst) 9529{ 9530 assert(emit->version >= 50); 9531 9532 struct tgsi_full_src_register src = check_double_src(emit, &inst->Src[0]); 9533 9534 /* frac = DFRAC(src) */ 9535 unsigned frac_index = get_temp_index(emit); 9536 struct tgsi_full_dst_register frac_dst = make_dst_temp_reg(frac_index); 9537 struct tgsi_full_src_register frac_src = make_src_temp_reg(frac_index); 9538 9539 VGPU10OpcodeToken0 token0; 9540 begin_emit_instruction(emit); 9541 token0.value = 0; 9542 token0.opcodeType = VGPU10_OPCODE_VMWARE; 9543 token0.vmwareOpcodeType = VGPU10_VMWARE_OPCODE_DFRC; 9544 emit_dword(emit, token0.value); 9545 emit_dst_register(emit, &frac_dst); 9546 emit_src_register(emit, &src); 9547 end_emit_instruction(emit); 9548 9549 /* tmp = src - frac */ 9550 unsigned tmp_index = get_temp_index(emit); 9551 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp_index); 9552 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp_index); 9553 struct tgsi_full_src_register negate_frac_src = negate_src(&frac_src); 9554 emit_instruction_opn(emit, VGPU10_OPCODE_DADD, 9555 &tmp_dst, &src, &negate_frac_src, NULL, 9556 inst->Instruction.Saturate, inst->Instruction.Precise); 9557 9558 /* cond = frac==0 */ 9559 unsigned cond_index = get_temp_index(emit); 9560 struct tgsi_full_dst_register cond_dst = make_dst_temp_reg(cond_index); 9561 struct tgsi_full_src_register cond_src = make_src_temp_reg(cond_index); 9562 struct tgsi_full_src_register zero = 9563 make_immediate_reg_double(emit, 0); 9564 9565 /* Only use one or two components for double opcode */ 9566 cond_dst = writemask_dst(&cond_dst, TGSI_WRITEMASK_X | TGSI_WRITEMASK_Y); 9567 9568 emit_instruction_opn(emit, VGPU10_OPCODE_DEQ, 9569 &cond_dst, &frac_src, &zero, NULL, 9570 inst->Instruction.Saturate, inst->Instruction.Precise); 9571 9572 /* tmp2 = cond ? 0 : 1 */ 9573 unsigned tmp2_index = get_temp_index(emit); 9574 struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2_index); 9575 struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2_index); 9576 struct tgsi_full_src_register cond_src_xy = 9577 swizzle_src(&cond_src, PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, 9578 PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y); 9579 struct tgsi_full_src_register one = 9580 make_immediate_reg_double(emit, 1.0); 9581 9582 emit_instruction_opn(emit, VGPU10_OPCODE_DMOVC, 9583 &tmp2_dst, &cond_src_xy, &zero, &one, 9584 inst->Instruction.Saturate, inst->Instruction.Precise); 9585 9586 /* tmp2 = tmp + tmp2 */ 9587 emit_instruction_opn(emit, VGPU10_OPCODE_DADD, 9588 &tmp2_dst, &tmp_src, &tmp2_src, NULL, 9589 inst->Instruction.Saturate, inst->Instruction.Precise); 9590 9591 /* cond = src>=0 */ 9592 emit_instruction_opn(emit, VGPU10_OPCODE_DGE, 9593 &cond_dst, &src, &zero, NULL, 9594 inst->Instruction.Saturate, inst->Instruction.Precise); 9595 9596 /* dst = cond ? tmp : tmp2 */ 9597 emit_instruction_opn(emit, VGPU10_OPCODE_DMOVC, 9598 &inst->Dst[0], &cond_src_xy, &tmp_src, &tmp2_src, 9599 inst->Instruction.Saturate, inst->Instruction.Precise); 9600 9601 free_temp_indexes(emit); 9602 return TRUE; 9603} 9604 9605 9606static boolean 9607emit_interp_offset(struct svga_shader_emitter_v10 *emit, 9608 const struct tgsi_full_instruction *inst) 9609{ 9610 assert(emit->version >= 50); 9611 9612 /* The src1.xy offset is a float with values in the range [-0.5, 0.5] 9613 * where (0,0) is the center of the pixel. We need to translate that 9614 * into an integer offset on a 16x16 grid in the range [-8/16, 7/16]. 9615 * Also need to flip the Y axis (I think). 9616 */ 9617 unsigned tmp = get_temp_index(emit); 9618 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 9619 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 9620 struct tgsi_full_dst_register tmp_dst_xy = 9621 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X | TGSI_WRITEMASK_Y); 9622 struct tgsi_full_src_register const16 = 9623 make_immediate_reg_float4(emit, 16.0f, -16.0, 0, 0); 9624 9625 /* MUL tmp.xy, src1, {16, -16, 0, 0} */ 9626 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, 9627 &tmp_dst_xy, &inst->Src[1], &const16); 9628 9629 /* FTOI tmp.xy, tmp */ 9630 emit_instruction_op1(emit, VGPU10_OPCODE_FTOI, &tmp_dst_xy, &tmp_src); 9631 9632 /* EVAL_SNAPPED dst, src0, tmp */ 9633 emit_instruction_op2(emit, VGPU10_OPCODE_EVAL_SNAPPED, 9634 &inst->Dst[0], &inst->Src[0], &tmp_src); 9635 9636 free_temp_indexes(emit); 9637 9638 return TRUE; 9639} 9640 9641 9642/** 9643 * Emit a simple instruction (like ADD, MUL, MIN, etc). 9644 */ 9645static boolean 9646emit_simple(struct svga_shader_emitter_v10 *emit, 9647 const struct tgsi_full_instruction *inst) 9648{ 9649 const enum tgsi_opcode opcode = inst->Instruction.Opcode; 9650 const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode); 9651 const bool dbl_dst = opcode_has_dbl_dst(inst->Instruction.Opcode); 9652 const bool dbl_src = opcode_has_dbl_src(inst->Instruction.Opcode); 9653 unsigned i; 9654 9655 struct tgsi_full_src_register src[3]; 9656 9657 if (inst->Instruction.Opcode == TGSI_OPCODE_BGNLOOP) { 9658 emit->current_loop_depth++; 9659 } 9660 else if (inst->Instruction.Opcode == TGSI_OPCODE_ENDLOOP) { 9661 emit->current_loop_depth--; 9662 } 9663 9664 for (i = 0; i < op->num_src; i++) { 9665 if (dbl_src) 9666 src[i] = check_double_src(emit, &inst->Src[i]); 9667 else 9668 src[i] = inst->Src[i]; 9669 } 9670 9671 begin_emit_instruction(emit); 9672 emit_opcode_precise(emit, translate_opcode(inst->Instruction.Opcode), 9673 inst->Instruction.Saturate, 9674 inst->Instruction.Precise); 9675 for (i = 0; i < op->num_dst; i++) { 9676 if (dbl_dst) { 9677 check_double_dst_writemask(inst); 9678 } 9679 emit_dst_register(emit, &inst->Dst[i]); 9680 } 9681 for (i = 0; i < op->num_src; i++) { 9682 emit_src_register(emit, &src[i]); 9683 } 9684 end_emit_instruction(emit); 9685 9686 free_temp_indexes(emit); 9687 return TRUE; 9688} 9689 9690 9691/** 9692 * Emit MSB instruction (like IMSB, UMSB). 9693 * 9694 * GLSL returns the index starting from the LSB; 9695 * whereas in SM5, firstbit_hi/shi returns the index starting from the MSB. 9696 * To get correct location as per glsl from SM5 device, we should 9697 * return (31 - index) if returned index is not -1. 9698 */ 9699static boolean 9700emit_msb(struct svga_shader_emitter_v10 *emit, 9701 const struct tgsi_full_instruction *inst) 9702{ 9703 const struct tgsi_full_dst_register *index_dst = &inst->Dst[0]; 9704 9705 assert(index_dst->Register.File != TGSI_FILE_OUTPUT); 9706 9707 struct tgsi_full_src_register index_src = 9708 make_src_reg(index_dst->Register.File, index_dst->Register.Index); 9709 struct tgsi_full_src_register imm31 = 9710 make_immediate_reg_int(emit, 31); 9711 imm31 = scalar_src(&imm31, TGSI_SWIZZLE_X); 9712 struct tgsi_full_src_register neg_one = 9713 make_immediate_reg_int(emit, -1); 9714 neg_one = scalar_src(&neg_one, TGSI_SWIZZLE_X); 9715 unsigned tmp = get_temp_index(emit); 9716 const struct tgsi_full_dst_register tmp_dst = 9717 make_dst_temp_reg(tmp); 9718 const struct tgsi_full_dst_register tmp_dst_x = 9719 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 9720 const struct tgsi_full_src_register tmp_src_x = 9721 make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp, TGSI_SWIZZLE_X); 9722 int writemask = TGSI_WRITEMASK_X; 9723 int src_swizzle = TGSI_SWIZZLE_X; 9724 int dst_writemask = index_dst->Register.WriteMask; 9725 9726 emit_simple(emit, inst); 9727 9728 /* index conversion from SM5 to GLSL */ 9729 while (writemask & dst_writemask) { 9730 struct tgsi_full_src_register index_src_comp = 9731 scalar_src(&index_src, src_swizzle); 9732 struct tgsi_full_dst_register index_dst_comp = 9733 writemask_dst(index_dst, writemask); 9734 9735 /* check if index_src_comp != -1 */ 9736 emit_instruction_op2(emit, VGPU10_OPCODE_INE, 9737 &tmp_dst_x, &index_src_comp, &neg_one); 9738 9739 /* if */ 9740 emit_if(emit, &tmp_src_x); 9741 9742 index_src_comp = negate_src(&index_src_comp); 9743 /* SUB DST, IMM{31}, DST */ 9744 emit_instruction_op2(emit, VGPU10_OPCODE_IADD, 9745 &index_dst_comp, &imm31, &index_src_comp); 9746 9747 /* endif */ 9748 emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF); 9749 9750 writemask = writemask << 1; 9751 src_swizzle = src_swizzle + 1; 9752 } 9753 free_temp_indexes(emit); 9754 return TRUE; 9755} 9756 9757 9758/** 9759 * Emit a BFE instruction (like UBFE, IBFE). 9760 * tgsi representation: 9761 * U/IBFE dst, value, offset, width 9762 * SM5 representation: 9763 * U/IBFE dst, width, offset, value 9764 * Note: SM5 has width & offset range (0-31); 9765 * whereas GLSL has width & offset range (0-32) 9766 */ 9767static boolean 9768emit_bfe(struct svga_shader_emitter_v10 *emit, 9769 const struct tgsi_full_instruction *inst) 9770{ 9771 const enum tgsi_opcode opcode = inst->Instruction.Opcode; 9772 struct tgsi_full_src_register imm32 = make_immediate_reg_int(emit, 32); 9773 imm32 = scalar_src(&imm32, TGSI_SWIZZLE_X); 9774 struct tgsi_full_src_register zero = make_immediate_reg_int(emit, 0); 9775 zero = scalar_src(&zero, TGSI_SWIZZLE_X); 9776 9777 unsigned tmp1 = get_temp_index(emit); 9778 const struct tgsi_full_dst_register cond1_dst = make_dst_temp_reg(tmp1); 9779 const struct tgsi_full_dst_register cond1_dst_x = 9780 writemask_dst(&cond1_dst, TGSI_WRITEMASK_X); 9781 const struct tgsi_full_src_register cond1_src_x = 9782 make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp1, TGSI_SWIZZLE_X); 9783 9784 unsigned tmp2 = get_temp_index(emit); 9785 const struct tgsi_full_dst_register cond2_dst = make_dst_temp_reg(tmp2); 9786 const struct tgsi_full_dst_register cond2_dst_x = 9787 writemask_dst(&cond2_dst, TGSI_WRITEMASK_X); 9788 const struct tgsi_full_src_register cond2_src_x = 9789 make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp2, TGSI_SWIZZLE_X); 9790 9791 /** 9792 * In SM5, when width = 32 and offset = 0, it returns 0. 9793 * On the other hand GLSL, expects value to be copied as it is, to dst. 9794 */ 9795 9796 /* cond1 = width ! = 32 */ 9797 emit_instruction_op2(emit, VGPU10_OPCODE_IEQ, 9798 &cond1_dst_x, &inst->Src[2], &imm32); 9799 9800 /* cond2 = offset ! = 0 */ 9801 emit_instruction_op2(emit, VGPU10_OPCODE_IEQ, 9802 &cond2_dst_x, &inst->Src[1], &zero); 9803 9804 /* cond 2 = cond1 & cond 2 */ 9805 emit_instruction_op2(emit, VGPU10_OPCODE_AND, &cond2_dst_x, 9806 &cond2_src_x, 9807 &cond1_src_x); 9808 /* IF */ 9809 emit_if(emit, &cond2_src_x); 9810 9811 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], 9812 &inst->Src[0]); 9813 9814 /* ELSE */ 9815 emit_instruction_op0(emit, VGPU10_OPCODE_ELSE); 9816 9817 /* U/IBFE dst, width, offset, value */ 9818 emit_instruction_op3(emit, translate_opcode(opcode), &inst->Dst[0], 9819 &inst->Src[2], &inst->Src[1], &inst->Src[0]); 9820 9821 /* ENDIF */ 9822 emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF); 9823 9824 free_temp_indexes(emit); 9825 return TRUE; 9826} 9827 9828 9829/** 9830 * Emit BFI instruction 9831 * tgsi representation: 9832 * BFI dst, base, insert, offset, width 9833 * SM5 representation: 9834 * BFI dst, width, offset, insert, base 9835 * Note: SM5 has width & offset range (0-31); 9836 * whereas GLSL has width & offset range (0-32) 9837 */ 9838static boolean 9839emit_bfi(struct svga_shader_emitter_v10 *emit, 9840 const struct tgsi_full_instruction *inst) 9841{ 9842 const enum tgsi_opcode opcode = inst->Instruction.Opcode; 9843 struct tgsi_full_src_register imm32 = make_immediate_reg_int(emit, 32); 9844 imm32 = scalar_src(&imm32, TGSI_SWIZZLE_X); 9845 9846 struct tgsi_full_src_register zero = make_immediate_reg_int(emit, 0); 9847 zero = scalar_src(&zero, TGSI_SWIZZLE_X); 9848 9849 unsigned tmp1 = get_temp_index(emit); 9850 const struct tgsi_full_dst_register cond1_dst = make_dst_temp_reg(tmp1); 9851 const struct tgsi_full_dst_register cond1_dst_x = 9852 writemask_dst(&cond1_dst, TGSI_WRITEMASK_X); 9853 const struct tgsi_full_src_register cond1_src_x = 9854 make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp1, TGSI_SWIZZLE_X); 9855 9856 unsigned tmp2 = get_temp_index(emit); 9857 const struct tgsi_full_dst_register cond2_dst = make_dst_temp_reg(tmp2); 9858 const struct tgsi_full_dst_register cond2_dst_x = 9859 writemask_dst(&cond2_dst, TGSI_WRITEMASK_X); 9860 const struct tgsi_full_src_register cond2_src_x = 9861 make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp2, TGSI_SWIZZLE_X); 9862 9863 /** 9864 * In SM5, when width = 32 and offset = 0, it returns 0. 9865 * On the other hand GLSL, expects insert to be copied as it is, to dst. 9866 */ 9867 9868 /* cond1 = width == 32 */ 9869 emit_instruction_op2(emit, VGPU10_OPCODE_IEQ, 9870 &cond1_dst_x, &inst->Src[3], &imm32); 9871 9872 /* cond1 = offset == 0 */ 9873 emit_instruction_op2(emit, VGPU10_OPCODE_IEQ, 9874 &cond2_dst_x, &inst->Src[2], &zero); 9875 9876 /* cond2 = cond1 & cond2 */ 9877 emit_instruction_op2(emit, VGPU10_OPCODE_AND, 9878 &cond2_dst_x, &cond2_src_x, &cond1_src_x); 9879 9880 /* if */ 9881 emit_if(emit, &cond2_src_x); 9882 9883 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], 9884 &inst->Src[1]); 9885 9886 /* else */ 9887 emit_instruction_op0(emit, VGPU10_OPCODE_ELSE); 9888 9889 /* BFI dst, width, offset, insert, base */ 9890 begin_emit_instruction(emit); 9891 emit_opcode(emit, translate_opcode(opcode), inst->Instruction.Saturate); 9892 emit_dst_register(emit, &inst->Dst[0]); 9893 emit_src_register(emit, &inst->Src[3]); 9894 emit_src_register(emit, &inst->Src[2]); 9895 emit_src_register(emit, &inst->Src[1]); 9896 emit_src_register(emit, &inst->Src[0]); 9897 end_emit_instruction(emit); 9898 9899 /* endif */ 9900 emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF); 9901 9902 free_temp_indexes(emit); 9903 return TRUE; 9904} 9905 9906 9907/** 9908 * We only special case the MOV instruction to try to detect constant 9909 * color writes in the fragment shader. 9910 */ 9911static boolean 9912emit_mov(struct svga_shader_emitter_v10 *emit, 9913 const struct tgsi_full_instruction *inst) 9914{ 9915 const struct tgsi_full_src_register *src = &inst->Src[0]; 9916 const struct tgsi_full_dst_register *dst = &inst->Dst[0]; 9917 9918 if (emit->unit == PIPE_SHADER_FRAGMENT && 9919 dst->Register.File == TGSI_FILE_OUTPUT && 9920 dst->Register.Index == 0 && 9921 src->Register.File == TGSI_FILE_CONSTANT && 9922 !src->Register.Indirect) { 9923 emit->constant_color_output = TRUE; 9924 } 9925 9926 return emit_simple(emit, inst); 9927} 9928 9929 9930/** 9931 * Emit a simple VGPU10 instruction which writes to multiple dest registers, 9932 * where TGSI only uses one dest register. 9933 */ 9934static boolean 9935emit_simple_1dst(struct svga_shader_emitter_v10 *emit, 9936 const struct tgsi_full_instruction *inst, 9937 unsigned dst_count, 9938 unsigned dst_index) 9939{ 9940 const enum tgsi_opcode opcode = inst->Instruction.Opcode; 9941 const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode); 9942 unsigned i; 9943 9944 begin_emit_instruction(emit); 9945 emit_opcode(emit, translate_opcode(opcode), inst->Instruction.Saturate); 9946 9947 for (i = 0; i < dst_count; i++) { 9948 if (i == dst_index) { 9949 emit_dst_register(emit, &inst->Dst[0]); 9950 } else { 9951 emit_null_dst_register(emit); 9952 } 9953 } 9954 9955 for (i = 0; i < op->num_src; i++) { 9956 emit_src_register(emit, &inst->Src[i]); 9957 } 9958 end_emit_instruction(emit); 9959 9960 return TRUE; 9961} 9962 9963 9964/** 9965 * Emit a vmware specific VGPU10 instruction. 9966 */ 9967static boolean 9968emit_vmware(struct svga_shader_emitter_v10 *emit, 9969 const struct tgsi_full_instruction *inst, 9970 VGPU10_VMWARE_OPCODE_TYPE subopcode) 9971{ 9972 VGPU10OpcodeToken0 token0; 9973 const enum tgsi_opcode opcode = inst->Instruction.Opcode; 9974 const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode); 9975 const bool dbl_dst = opcode_has_dbl_dst(inst->Instruction.Opcode); 9976 const bool dbl_src = opcode_has_dbl_src(inst->Instruction.Opcode); 9977 unsigned i; 9978 struct tgsi_full_src_register src[3]; 9979 9980 for (i = 0; i < op->num_src; i++) { 9981 if (dbl_src) 9982 src[i] = check_double_src(emit, &inst->Src[i]); 9983 else 9984 src[i] = inst->Src[i]; 9985 } 9986 9987 begin_emit_instruction(emit); 9988 9989 assert((subopcode > 0 && emit->version >= 50) || subopcode == 0); 9990 9991 token0.value = 0; 9992 token0.opcodeType = VGPU10_OPCODE_VMWARE; 9993 token0.vmwareOpcodeType = subopcode; 9994 emit_dword(emit, token0.value); 9995 9996 if (subopcode == VGPU10_VMWARE_OPCODE_IDIV) { 9997 /* IDIV only uses the first dest register. */ 9998 emit_dst_register(emit, &inst->Dst[0]); 9999 emit_null_dst_register(emit); 10000 } else { 10001 for (i = 0; i < op->num_dst; i++) { 10002 if (dbl_dst) { 10003 check_double_dst_writemask(inst); 10004 } 10005 emit_dst_register(emit, &inst->Dst[i]); 10006 } 10007 } 10008 10009 for (i = 0; i < op->num_src; i++) { 10010 emit_src_register(emit, &src[i]); 10011 } 10012 end_emit_instruction(emit); 10013 10014 free_temp_indexes(emit); 10015 return TRUE; 10016} 10017 10018/** 10019 * Emit a memory register 10020 */ 10021 10022typedef enum { 10023 MEM_STORE = 0, 10024 MEM_LOAD = 1, 10025 MEM_ATOMIC_COUNTER 10026} memory_op; 10027 10028static void 10029emit_memory_register(struct svga_shader_emitter_v10 *emit, 10030 memory_op mem_op, 10031 const struct tgsi_full_instruction *inst, 10032 unsigned regIndex, unsigned writemask) 10033{ 10034 VGPU10OperandToken0 operand0; 10035 unsigned resIndex = 0; 10036 10037 operand0.value = 0; 10038 operand0.operandType = VGPU10_OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY; 10039 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 10040 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 10041 10042 switch (mem_op) { 10043 case MEM_ATOMIC_COUNTER: 10044 { 10045 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; 10046 resIndex = inst->Src[regIndex].Register.Index; 10047 break; 10048 } 10049 case MEM_STORE: 10050 { 10051 const struct tgsi_full_dst_register *reg = &inst->Dst[regIndex]; 10052 10053 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE; 10054 operand0.mask = writemask; 10055 resIndex = reg->Register.Index; 10056 break; 10057 } 10058 case MEM_LOAD: 10059 { 10060 const struct tgsi_full_src_register *reg = &inst->Src[regIndex]; 10061 10062 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE; 10063 operand0.swizzleX = reg->Register.SwizzleX; 10064 operand0.swizzleY = reg->Register.SwizzleY; 10065 operand0.swizzleZ = reg->Register.SwizzleZ; 10066 operand0.swizzleW = reg->Register.SwizzleW; 10067 resIndex = reg->Register.Index; 10068 break; 10069 } 10070 default: 10071 assert(!"Unexpected memory opcode"); 10072 break; 10073 } 10074 10075 emit_dword(emit, operand0.value); 10076 emit_dword(emit, resIndex); 10077} 10078 10079 10080typedef enum { 10081 UAV_STORE = 0, 10082 UAV_LOAD = 1, 10083 UAV_ATOMIC = 2, 10084 UAV_RESQ = 3, 10085} UAV_OP; 10086 10087 10088/** 10089 * Emit a uav register 10090 * \param uav_index index of resource register 10091 * \param uav_op UAV_STORE/ UAV_LOAD/ UAV_ATOMIC depending on opcode 10092 * \param resourceType resource file type 10093 * \param writemask resource writemask 10094 */ 10095 10096static void 10097emit_uav_register(struct svga_shader_emitter_v10 *emit, 10098 unsigned res_index, UAV_OP uav_op, 10099 enum tgsi_file_type resourceType, unsigned writemask) 10100{ 10101 VGPU10OperandToken0 operand0; 10102 unsigned uav_index = INVALID_INDEX; 10103 10104 operand0.value = 0; 10105 operand0.operandType = VGPU10_OPERAND_TYPE_UAV; 10106 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 10107 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 10108 10109 switch (resourceType) { 10110 case TGSI_FILE_IMAGE: 10111 uav_index = emit->key.images[res_index].uav_index; 10112 break; 10113 case TGSI_FILE_BUFFER: 10114 uav_index = emit->key.shader_buf_uav_index[res_index]; 10115 break; 10116 case TGSI_FILE_HW_ATOMIC: 10117 uav_index = emit->key.atomic_buf_uav_index[res_index]; 10118 break; 10119 default: 10120 assert(0); 10121 } 10122 10123 switch (uav_op) { 10124 case UAV_ATOMIC: 10125 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; 10126 break; 10127 10128 case UAV_STORE: 10129 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE; 10130 operand0.mask = writemask; 10131 break; 10132 10133 case UAV_LOAD: 10134 case UAV_RESQ: 10135 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE; 10136 operand0.swizzleX = VGPU10_COMPONENT_X; 10137 operand0.swizzleY = VGPU10_COMPONENT_Y; 10138 operand0.swizzleZ = VGPU10_COMPONENT_Z; 10139 operand0.swizzleW = VGPU10_COMPONENT_W; 10140 break; 10141 10142 default: 10143 break; 10144 } 10145 10146 emit_dword(emit, operand0.value); 10147 emit_dword(emit, uav_index); 10148} 10149 10150 10151/** 10152 * A helper function to emit the uav address. 10153 * For memory, buffer, and image resource, it is set to the specified address. 10154 * For HW atomic counter, the address is the sum of the address offset and the 10155 * offset into the HW atomic buffer as specified by the register index. 10156 * It is also possible to specify the counter index as an indirect address. 10157 * And in this case, the uav address will be the sum of the address offset and the 10158 * counter index specified in the indirect address. 10159 */ 10160static 10161struct tgsi_full_src_register 10162emit_uav_addr_offset(struct svga_shader_emitter_v10 *emit, 10163 enum tgsi_file_type resourceType, 10164 unsigned resourceIndex, 10165 unsigned resourceIndirect, 10166 unsigned resourceIndirectIndex, 10167 const struct tgsi_full_src_register *addr_reg) 10168{ 10169 unsigned addr_tmp; 10170 struct tgsi_full_dst_register addr_dst; 10171 struct tgsi_full_src_register addr_src; 10172 struct tgsi_full_src_register two = make_immediate_reg_int(emit, 2); 10173 struct tgsi_full_src_register zero = make_immediate_reg_int(emit, 0); 10174 10175 addr_tmp = get_temp_index(emit); 10176 addr_dst = make_dst_temp_reg(addr_tmp); 10177 addr_src = make_src_temp_reg(addr_tmp); 10178 10179 /* specified address offset */ 10180 if (addr_reg) 10181 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &addr_dst, addr_reg); 10182 else 10183 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &addr_dst, &zero); 10184 10185 /* For HW atomic counter, we need to find the index to the 10186 * HW atomic buffer. 10187 */ 10188 if (resourceType == TGSI_FILE_HW_ATOMIC) { 10189 if (resourceIndirect) { 10190 10191 /** 10192 * uav addr offset = counter layout offset + 10193 * counter indirect index address + address offset 10194 */ 10195 10196 /* counter layout offset */ 10197 struct tgsi_full_src_register layout_offset; 10198 layout_offset = 10199 make_immediate_reg_int(emit, resourceIndex); 10200 10201 /* counter layout offset + address offset */ 10202 emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &addr_dst, 10203 &addr_src, &layout_offset); 10204 10205 /* counter indirect index address */ 10206 unsigned indirect_addr = 10207 emit->address_reg_index[resourceIndirectIndex]; 10208 10209 struct tgsi_full_src_register indirect_addr_src = 10210 make_src_temp_reg(indirect_addr); 10211 10212 indirect_addr_src = scalar_src(&indirect_addr_src, TGSI_SWIZZLE_X); 10213 10214 /* counter layout offset + address offset + counter indirect address */ 10215 emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &addr_dst, 10216 &addr_src, &indirect_addr_src); 10217 10218 } else { 10219 struct tgsi_full_src_register index_src; 10220 10221 index_src = make_immediate_reg_int(emit, resourceIndex); 10222 10223 /* uav addr offset = counter index address + address offset */ 10224 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &addr_dst, 10225 &addr_src, &index_src); 10226 } 10227 10228 /* HW atomic buffer is declared as raw buffer, so the buffer address is 10229 * the byte offset, so we need to multiple the counter addr offset by 4. 10230 */ 10231 emit_instruction_op2(emit, VGPU10_OPCODE_ISHL, &addr_dst, 10232 &addr_src, &two); 10233 } 10234 else if (resourceType == TGSI_FILE_IMAGE) { 10235 if ((emit->key.images[resourceIndex].resource_target == PIPE_TEXTURE_3D) 10236 && emit->key.images[resourceIndex].is_single_layer) { 10237 10238 struct tgsi_full_dst_register addr_dst_z = 10239 writemask_dst(&addr_dst, TGSI_WRITEMASK_Z); 10240 10241 /* For non-layered 3D texture image view, we have to make sure the z 10242 * component of the address offset is set to 0. 10243 */ 10244 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &addr_dst_z, 10245 &zero); 10246 } 10247 } 10248 10249 return addr_src; 10250} 10251 10252 10253 10254/** 10255 * A helper function to expand indirect indexing to uav resource 10256 * by looping through the resource array, compare the indirect index and 10257 * emit the instruction for each resource in the array. 10258 */ 10259static void 10260loop_instruction(unsigned index, unsigned count, 10261 struct tgsi_full_src_register *addr_index, 10262 void (*fb)(struct svga_shader_emitter_v10 *, 10263 const struct tgsi_full_instruction *, unsigned), 10264 struct svga_shader_emitter_v10 *emit, 10265 const struct tgsi_full_instruction *inst) 10266{ 10267 if (count == 0) 10268 return; 10269 10270 if (index > 0) { 10271 /* ELSE */ 10272 emit_instruction_op0(emit, VGPU10_OPCODE_ELSE); 10273 } 10274 10275 struct tgsi_full_src_register index_src = 10276 make_immediate_reg_int(emit, index); 10277 10278 unsigned tmp_index = get_temp_index(emit); 10279 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp_index); 10280 struct tgsi_full_src_register tmp_src_x = 10281 scalar_src(&tmp_src, TGSI_SWIZZLE_X); 10282 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp_index); 10283 10284 /* IEQ tmp, addr_tmp_index, index */ 10285 emit_instruction_op2(emit, VGPU10_OPCODE_IEQ, &tmp_dst, 10286 addr_index, &index_src); 10287 10288 /* IF tmp */ 10289 emit_if(emit, &tmp_src_x); 10290 10291 free_temp_indexes(emit); 10292 10293 (*fb)(emit, inst, index); 10294 10295 loop_instruction(index+1, count-1, addr_index, fb, emit, inst); 10296 10297 /* ENDIF */ 10298 emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF); 10299} 10300 10301 10302/** 10303 * A helper function to emit the load instruction. 10304 */ 10305static void 10306emit_load_instruction(struct svga_shader_emitter_v10 *emit, 10307 const struct tgsi_full_instruction *inst, 10308 unsigned resourceIndex) 10309{ 10310 VGPU10OpcodeToken0 token0; 10311 struct tgsi_full_src_register addr_src; 10312 enum tgsi_file_type resourceType = inst->Src[0].Register.File; 10313 10314 /* Resolve the resource address for this resource first */ 10315 addr_src = emit_uav_addr_offset(emit, resourceType, resourceIndex, 10316 inst->Src[0].Register.Indirect, 10317 inst->Src[0].Indirect.Index, 10318 &inst->Src[1]); 10319 10320 /* LOAD resource, address, src */ 10321 begin_emit_instruction(emit); 10322 10323 token0.value = 0; 10324 10325 if (resourceType == TGSI_FILE_MEMORY || 10326 resourceType == TGSI_FILE_BUFFER || 10327 resourceType == TGSI_FILE_HW_ATOMIC) { 10328 token0.opcodeType = VGPU10_OPCODE_LD_RAW; 10329 addr_src = scalar_src(&addr_src, TGSI_SWIZZLE_X); 10330 } 10331 else { 10332 token0.opcodeType = VGPU10_OPCODE_LD_UAV_TYPED; 10333 } 10334 10335 token0.saturate = inst->Instruction.Saturate, 10336 emit_dword(emit, token0.value); 10337 10338 emit_dst_register(emit, &inst->Dst[0]); 10339 emit_src_register(emit, &addr_src); 10340 10341 if (resourceType == TGSI_FILE_MEMORY) { 10342 emit_memory_register(emit, MEM_LOAD, inst, 0, 0); 10343 } else if (resourceType == TGSI_FILE_HW_ATOMIC) { 10344 emit_uav_register(emit, inst->Src[0].Dimension.Index, 10345 UAV_LOAD, inst->Src[0].Register.File, 0); 10346 } else { 10347 emit_uav_register(emit, resourceIndex, 10348 UAV_LOAD, inst->Src[0].Register.File, 0); 10349 } 10350 10351 end_emit_instruction(emit); 10352 10353 free_temp_indexes(emit); 10354} 10355 10356 10357/** 10358 * Emit uav / memory load instruction 10359 */ 10360static boolean 10361emit_load(struct svga_shader_emitter_v10 *emit, 10362 const struct tgsi_full_instruction *inst) 10363{ 10364 enum tgsi_file_type resourceType = inst->Src[0].Register.File; 10365 unsigned resourceIndex = inst->Src[0].Register.Index; 10366 10367 /* If the resource register has indirect index, we will need 10368 * to expand it since SM5 device does not support indirect indexing 10369 * for uav. 10370 */ 10371 if (inst->Src[0].Register.Indirect && 10372 (resourceType == TGSI_FILE_BUFFER || resourceType == TGSI_FILE_IMAGE)) { 10373 10374 unsigned indirect_index = inst->Src[0].Indirect.Index; 10375 unsigned num_resources = 10376 resourceType == TGSI_FILE_BUFFER ? emit->num_shader_bufs : 10377 emit->num_images; 10378 10379 /* indirect index tmp register */ 10380 unsigned indirect_addr = emit->address_reg_index[indirect_index]; 10381 struct tgsi_full_src_register indirect_addr_src = 10382 make_src_temp_reg(indirect_addr); 10383 indirect_addr_src = scalar_src(&indirect_addr_src, TGSI_SWIZZLE_X); 10384 10385 /* Add offset to the indirect index */ 10386 if (inst->Src[0].Register.Index != 0) { 10387 struct tgsi_full_src_register offset = 10388 make_immediate_reg_int(emit, inst->Src[0].Register.Index); 10389 struct tgsi_full_dst_register indirect_addr_dst = 10390 make_dst_temp_reg(indirect_addr); 10391 emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &indirect_addr_dst, 10392 &indirect_addr_src, &offset); 10393 } 10394 10395 /* Loop through the resource array to find which resource to use. 10396 */ 10397 loop_instruction(0, num_resources, &indirect_addr_src, 10398 emit_load_instruction, emit, inst); 10399 } 10400 else { 10401 emit_load_instruction(emit, inst, resourceIndex); 10402 } 10403 10404 free_temp_indexes(emit); 10405 10406 return TRUE; 10407} 10408 10409 10410/** 10411 * A helper function to emit a store instruction. 10412 */ 10413static void 10414emit_store_instruction(struct svga_shader_emitter_v10 *emit, 10415 const struct tgsi_full_instruction *inst, 10416 unsigned resourceIndex) 10417{ 10418 VGPU10OpcodeToken0 token0; 10419 enum tgsi_file_type resourceType = inst->Dst[0].Register.File; 10420 unsigned writemask = inst->Dst[0].Register.WriteMask; 10421 struct tgsi_full_src_register addr_src; 10422 10423 unsigned tmp_index = get_temp_index(emit); 10424 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp_index); 10425 struct tgsi_full_dst_register tmp_dst_xyzw = make_dst_temp_reg(tmp_index); 10426 struct tgsi_full_dst_register tmp_dst; 10427 10428 struct tgsi_full_src_register src = inst->Src[1]; 10429 struct tgsi_full_src_register four = make_immediate_reg_int(emit, 4); 10430 10431 boolean needLoad = FALSE; 10432 boolean needPerComponentStore = FALSE; 10433 unsigned swizzles = 0; 10434 10435 /* Resolve the resource address for this resource first */ 10436 addr_src = emit_uav_addr_offset(emit, resourceType, 10437 inst->Dst[0].Register.Index, 10438 inst->Dst[0].Register.Indirect, 10439 inst->Dst[0].Indirect.Index, 10440 &inst->Src[0]); 10441 10442 /* First check the writemask to see if it can be supported 10443 * by the store instruction. 10444 * store_raw only allows .x, .xy, .xyz, .xyzw. For the typeless memory, 10445 * we can adjust the address offset, and do a per-component store. 10446 * store_uav_typed only allows .xyzw. In this case, we need to 10447 * do a load first, update the temporary and then issue the 10448 * store. This does have a small risk that if different threads 10449 * update different components of the same address, data might not be 10450 * in sync. 10451 */ 10452 if (resourceType == TGSI_FILE_IMAGE) { 10453 needLoad = (writemask == TGSI_WRITEMASK_XYZW) ? FALSE : TRUE; 10454 } 10455 else if (resourceType == TGSI_FILE_BUFFER || 10456 resourceType == TGSI_FILE_MEMORY) { 10457 if (!(writemask == TGSI_WRITEMASK_X || writemask == TGSI_WRITEMASK_XY || 10458 writemask == TGSI_WRITEMASK_XYZ || 10459 writemask == TGSI_WRITEMASK_XYZW)) { 10460 needPerComponentStore = TRUE; 10461 } 10462 } 10463 10464 if (needLoad) { 10465 assert(resourceType == TGSI_FILE_IMAGE); 10466 10467 /* LOAD resource, address, src */ 10468 begin_emit_instruction(emit); 10469 10470 token0.value = 0; 10471 token0.opcodeType = VGPU10_OPCODE_LD_UAV_TYPED; 10472 token0.saturate = inst->Instruction.Saturate, 10473 emit_dword(emit, token0.value); 10474 10475 emit_dst_register(emit, &tmp_dst_xyzw); 10476 emit_src_register(emit, &addr_src); 10477 emit_uav_register(emit, resourceIndex, UAV_LOAD, resourceType, 0); 10478 10479 end_emit_instruction(emit); 10480 10481 /* MOV tmp(writemask) src */ 10482 tmp_dst = writemask_dst(&tmp_dst_xyzw, writemask); 10483 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &tmp_dst, &inst->Src[1]); 10484 10485 /* Now set the writemask to xyzw for the store_uav_typed instruction */ 10486 writemask = TGSI_WRITEMASK_XYZW; 10487 } 10488 else if (needPerComponentStore) { 10489 /* Save the src swizzles */ 10490 swizzles = src.Register.SwizzleX | 10491 src.Register.SwizzleY << 2 | 10492 src.Register.SwizzleZ << 4 | 10493 src.Register.SwizzleW << 6; 10494 } 10495 10496 boolean storeDone = FALSE; 10497 unsigned perComponentWritemask = writemask; 10498 unsigned shift = 0; 10499 struct tgsi_full_src_register shift_src; 10500 10501 while (!storeDone) { 10502 10503 if (needPerComponentStore) { 10504 assert(perComponentWritemask); 10505 while (!(perComponentWritemask & TGSI_WRITEMASK_X)) { 10506 shift++; 10507 perComponentWritemask >>= 1; 10508 } 10509 10510 /* First adjust the addr_src to the next component */ 10511 if (shift != 0) { 10512 struct tgsi_full_dst_register addr_dst = 10513 make_dst_temp_reg(addr_src.Register.Index); 10514 shift_src = make_immediate_reg_int(emit, shift); 10515 emit_instruction_op3(emit, VGPU10_OPCODE_UMAD, &addr_dst, &four, 10516 &shift_src, &addr_src); 10517 10518 /* Adjust the src swizzle as well */ 10519 swizzles >>= (shift * 2); 10520 } 10521 10522 /* Now the address offset is set to the next component, 10523 * we can set the writemask to .x and make sure to set 10524 * the src swizzle as well. 10525 */ 10526 src.Register.SwizzleX = swizzles & 0x3; 10527 writemask = TGSI_WRITEMASK_X; 10528 10529 /* Shift for the next component check */ 10530 perComponentWritemask >>= 1; 10531 shift = 1; 10532 } 10533 10534 /* STORE resource, address, src */ 10535 begin_emit_instruction(emit); 10536 10537 token0.value = 0; 10538 token0.saturate = inst->Instruction.Saturate; 10539 10540 if (resourceType == TGSI_FILE_MEMORY) { 10541 token0.opcodeType = VGPU10_OPCODE_STORE_RAW; 10542 addr_src = scalar_src(&addr_src, TGSI_SWIZZLE_X); 10543 emit_dword(emit, token0.value); 10544 emit_memory_register(emit, MEM_STORE, inst, 0, writemask); 10545 } 10546 else if (resourceType == TGSI_FILE_BUFFER || 10547 resourceType == TGSI_FILE_HW_ATOMIC) { 10548 token0.opcodeType = VGPU10_OPCODE_STORE_RAW; 10549 addr_src = scalar_src(&addr_src, TGSI_SWIZZLE_X); 10550 emit_dword(emit, token0.value); 10551 emit_uav_register(emit, resourceIndex, UAV_STORE, 10552 resourceType, writemask); 10553 } 10554 else { 10555 token0.opcodeType = VGPU10_OPCODE_STORE_UAV_TYPED; 10556 emit_dword(emit, token0.value); 10557 emit_uav_register(emit, resourceIndex, UAV_STORE, 10558 resourceType, writemask); 10559 } 10560 10561 emit_src_register(emit, &addr_src); 10562 10563 if (needLoad) 10564 emit_src_register(emit, &tmp_src); 10565 else 10566 emit_src_register(emit, &src); 10567 10568 end_emit_instruction(emit); 10569 10570 if (!needPerComponentStore || !perComponentWritemask) 10571 storeDone = TRUE; 10572 } 10573 10574 free_temp_indexes(emit); 10575} 10576 10577 10578/** 10579 * Emit uav / memory store instruction 10580 */ 10581static boolean 10582emit_store(struct svga_shader_emitter_v10 *emit, 10583 const struct tgsi_full_instruction *inst) 10584{ 10585 enum tgsi_file_type resourceType = inst->Dst[0].Register.File; 10586 unsigned resourceIndex = inst->Dst[0].Register.Index; 10587 10588 /* If the resource register has indirect index, we will need 10589 * to expand it since SM5 device does not support indirect indexing 10590 * for uav. 10591 */ 10592 if (inst->Dst[0].Register.Indirect && 10593 (resourceType == TGSI_FILE_BUFFER || resourceType == TGSI_FILE_IMAGE)) { 10594 10595 unsigned indirect_index = inst->Dst[0].Indirect.Index; 10596 unsigned num_resources = 10597 resourceType == TGSI_FILE_BUFFER ? emit->num_shader_bufs : 10598 emit->num_images; 10599 10600 /* Indirect index tmp register */ 10601 unsigned indirect_addr = emit->address_reg_index[indirect_index]; 10602 struct tgsi_full_src_register indirect_addr_src = 10603 make_src_temp_reg(indirect_addr); 10604 indirect_addr_src = scalar_src(&indirect_addr_src, TGSI_SWIZZLE_X); 10605 10606 /* Add offset to the indirect index */ 10607 if (inst->Dst[0].Register.Index != 0) { 10608 struct tgsi_full_src_register offset = 10609 make_immediate_reg_int(emit, inst->Dst[0].Register.Index); 10610 struct tgsi_full_dst_register indirect_addr_dst = 10611 make_dst_temp_reg(indirect_addr); 10612 emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &indirect_addr_dst, 10613 &indirect_addr_src, &offset); 10614 } 10615 10616 /* Loop through the resource array to find which resource to use. 10617 */ 10618 loop_instruction(0, num_resources, &indirect_addr_src, 10619 emit_store_instruction, emit, inst); 10620 } 10621 else { 10622 emit_store_instruction(emit, inst, resourceIndex); 10623 } 10624 10625 free_temp_indexes(emit); 10626 10627 return TRUE; 10628} 10629 10630 10631/** 10632 * A helper function to emit an atomic instruction. 10633 */ 10634 10635static void 10636emit_atomic_instruction(struct svga_shader_emitter_v10 *emit, 10637 const struct tgsi_full_instruction *inst, 10638 unsigned resourceIndex) 10639{ 10640 VGPU10OpcodeToken0 token0; 10641 enum tgsi_file_type resourceType = inst->Src[0].Register.File; 10642 struct tgsi_full_src_register addr_src; 10643 VGPU10_OPCODE_TYPE opcode = emit->cur_atomic_opcode; 10644 const struct tgsi_full_src_register *offset; 10645 10646 /* ntt does not specify offset for HWATOMIC. So just set offset to NULL. */ 10647 offset = resourceType == TGSI_FILE_HW_ATOMIC ? NULL : &inst->Src[1]; 10648 10649 /* Resolve the resource address */ 10650 addr_src = emit_uav_addr_offset(emit, resourceType, 10651 inst->Src[0].Register.Index, 10652 inst->Src[0].Register.Indirect, 10653 inst->Src[0].Indirect.Index, 10654 offset); 10655 10656 /* Emit the atomic operation */ 10657 begin_emit_instruction(emit); 10658 10659 token0.value = 0; 10660 token0.opcodeType = opcode; 10661 token0.saturate = inst->Instruction.Saturate, 10662 emit_dword(emit, token0.value); 10663 10664 emit_dst_register(emit, &inst->Dst[0]); 10665 10666 if (inst->Src[0].Register.File == TGSI_FILE_MEMORY) { 10667 emit_memory_register(emit, MEM_ATOMIC_COUNTER, inst, 0, 0); 10668 } else if (inst->Src[0].Register.File == TGSI_FILE_HW_ATOMIC) { 10669 assert(inst->Src[0].Register.Dimension == 1); 10670 emit_uav_register(emit, inst->Src[0].Dimension.Index, 10671 UAV_ATOMIC, inst->Src[0].Register.File, 0); 10672 } else { 10673 emit_uav_register(emit, resourceIndex, 10674 UAV_ATOMIC, inst->Src[0].Register.File, 0); 10675 } 10676 10677 /* resource address offset */ 10678 emit_src_register(emit, &addr_src); 10679 10680 struct tgsi_full_src_register src0_x = 10681 swizzle_src(&inst->Src[2], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, 10682 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); 10683 emit_src_register(emit, &src0_x); 10684 10685 if (opcode == VGPU10_OPCODE_IMM_ATOMIC_CMP_EXCH) { 10686 struct tgsi_full_src_register src1_x = 10687 swizzle_src(&inst->Src[3], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, 10688 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); 10689 10690 emit_src_register(emit, &src1_x); 10691 } 10692 10693 end_emit_instruction(emit); 10694 10695 free_temp_indexes(emit); 10696} 10697 10698 10699/** 10700 * Emit atomic instruction 10701 */ 10702static boolean 10703emit_atomic(struct svga_shader_emitter_v10 *emit, 10704 const struct tgsi_full_instruction *inst, 10705 VGPU10_OPCODE_TYPE opcode) 10706{ 10707 enum tgsi_file_type resourceType = inst->Src[0].Register.File; 10708 unsigned resourceIndex = inst->Src[0].Register.Index; 10709 10710 emit->cur_atomic_opcode = opcode; 10711 10712 /* If the resource register has indirect index, we will need 10713 * to expand it since SM5 device does not support indirect indexing 10714 * for uav. 10715 */ 10716 if (inst->Dst[0].Register.Indirect && 10717 (resourceType == TGSI_FILE_BUFFER || resourceType == TGSI_FILE_IMAGE)) { 10718 10719 unsigned indirect_index = inst->Dst[0].Indirect.Index; 10720 unsigned num_resources = 10721 resourceType == TGSI_FILE_BUFFER ? emit->num_shader_bufs : 10722 emit->num_images; 10723 10724 /* indirect index tmp register */ 10725 unsigned indirect_addr = emit->address_reg_index[indirect_index]; 10726 struct tgsi_full_src_register indirect_addr_src = 10727 make_src_temp_reg(indirect_addr); 10728 indirect_addr_src = scalar_src(&indirect_addr_src, TGSI_SWIZZLE_X); 10729 10730 /* Loop through the resource array to find which resource to use. 10731 */ 10732 loop_instruction(0, num_resources, &indirect_addr_src, 10733 emit_atomic_instruction, emit, inst); 10734 } 10735 else { 10736 emit_atomic_instruction(emit, inst, resourceIndex); 10737 } 10738 10739 free_temp_indexes(emit); 10740 10741 return TRUE; 10742} 10743 10744 10745/** 10746 * Emit barrier instruction 10747 */ 10748static boolean 10749emit_barrier(struct svga_shader_emitter_v10 *emit, 10750 const struct tgsi_full_instruction *inst) 10751{ 10752 VGPU10OpcodeToken0 token0; 10753 10754 assert(emit->version >= 50); 10755 10756 token0.value = 0; 10757 token0.opcodeType = VGPU10_OPCODE_SYNC; 10758 10759 if (emit->unit == PIPE_SHADER_TESS_CTRL && emit->version == 50) { 10760 /* SM5 device doesn't support BARRIER in tcs . If barrier is used 10761 * in shader, don't do anything for this opcode and continue rest 10762 * of shader translation 10763 */ 10764 util_debug_message(&emit->svga_debug_callback, INFO, 10765 "barrier instruction is not supported in tessellation control shader\n"); 10766 return TRUE; 10767 } 10768 else if (emit->unit == PIPE_SHADER_COMPUTE) { 10769 if (emit->cs.shared_memory_declared) 10770 token0.syncThreadGroupShared = 1; 10771 10772 if (emit->uav_declared) 10773 token0.syncUAVMemoryGroup = 1; 10774 10775 token0.syncThreadsInGroup = 1; 10776 } else { 10777 token0.syncUAVMemoryGlobal = 1; 10778 } 10779 10780 assert(token0.syncUAVMemoryGlobal || token0.syncUAVMemoryGroup || 10781 token0.syncThreadGroupShared); 10782 10783 begin_emit_instruction(emit); 10784 emit_dword(emit, token0.value); 10785 end_emit_instruction(emit); 10786 10787 return TRUE; 10788} 10789 10790/** 10791 * Emit memory barrier instruction 10792 */ 10793static boolean 10794emit_memory_barrier(struct svga_shader_emitter_v10 *emit, 10795 const struct tgsi_full_instruction *inst) 10796{ 10797 unsigned index = inst->Src[0].Register.Index; 10798 unsigned swizzle = inst->Src[0].Register.SwizzleX; 10799 unsigned bartype = emit->immediates[index][swizzle].Int; 10800 VGPU10OpcodeToken0 token0; 10801 10802 token0.value = 0; 10803 token0.opcodeType = VGPU10_OPCODE_SYNC; 10804 10805 if (emit->unit == PIPE_SHADER_COMPUTE) { 10806 10807 /* For compute shader, issue sync opcode with different options 10808 * depending on the memory barrier type. 10809 * 10810 * Bit 0: Shader storage buffers 10811 * Bit 1: Atomic buffers 10812 * Bit 2: Images 10813 * Bit 3: Shared memory 10814 * Bit 4: Thread group 10815 */ 10816 10817 if (bartype & (TGSI_MEMBAR_SHADER_BUFFER | TGSI_MEMBAR_ATOMIC_BUFFER | 10818 TGSI_MEMBAR_SHADER_IMAGE)) 10819 token0.syncUAVMemoryGlobal = 1; 10820 else if (bartype & TGSI_MEMBAR_THREAD_GROUP) 10821 token0.syncUAVMemoryGroup = 1; 10822 10823 if (bartype & TGSI_MEMBAR_SHARED) 10824 token0.syncThreadGroupShared = 1; 10825 } 10826 else { 10827 /** 10828 * For graphics stages, only sync_uglobal is available. 10829 */ 10830 if (bartype & (TGSI_MEMBAR_SHADER_BUFFER | TGSI_MEMBAR_ATOMIC_BUFFER | 10831 TGSI_MEMBAR_SHADER_IMAGE)) 10832 token0.syncUAVMemoryGlobal = 1; 10833 } 10834 10835 assert(token0.syncUAVMemoryGlobal || token0.syncUAVMemoryGroup || 10836 token0.syncThreadGroupShared); 10837 10838 begin_emit_instruction(emit); 10839 emit_dword(emit, token0.value); 10840 end_emit_instruction(emit); 10841 10842 return TRUE; 10843} 10844 10845 10846/** 10847 * Emit code for TGSI_OPCODE_RESQ (image size) instruction. 10848 */ 10849static boolean 10850emit_resq(struct svga_shader_emitter_v10 *emit, 10851 const struct tgsi_full_instruction *inst) 10852{ 10853 struct tgsi_full_src_register zero = 10854 make_immediate_reg_int(emit, 0); 10855 10856 unsigned uav_resource = emit->image[inst->Src[0].Register.Index].Resource; 10857 10858 if (uav_resource == TGSI_TEXTURE_CUBE_ARRAY) { 10859 struct tgsi_full_src_register image_src; 10860 10861 image_src = make_src_const_reg(emit->image_size_index + inst->Src[0].Register.Index); 10862 10863 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &image_src); 10864 return TRUE; 10865 } 10866 10867 begin_emit_instruction(emit); 10868 if (uav_resource == TGSI_TEXTURE_BUFFER) { 10869 emit_opcode(emit, VGPU10_OPCODE_BUFINFO, FALSE); 10870 emit_dst_register(emit, &inst->Dst[0]); 10871 } 10872 else { 10873 emit_opcode_resinfo(emit, VGPU10_RESINFO_RETURN_UINT); 10874 emit_dst_register(emit, &inst->Dst[0]); 10875 emit_src_register(emit, &zero); 10876 } 10877 emit_uav_register(emit, inst->Src[0].Register.Index, 10878 UAV_RESQ, inst->Src[0].Register.File, 0); 10879 end_emit_instruction(emit); 10880 10881 return TRUE; 10882} 10883 10884 10885static boolean 10886emit_instruction(struct svga_shader_emitter_v10 *emit, 10887 unsigned inst_number, 10888 const struct tgsi_full_instruction *inst) 10889{ 10890 const enum tgsi_opcode opcode = inst->Instruction.Opcode; 10891 10892 switch (opcode) { 10893 case TGSI_OPCODE_ADD: 10894 case TGSI_OPCODE_AND: 10895 case TGSI_OPCODE_BGNLOOP: 10896 case TGSI_OPCODE_BRK: 10897 case TGSI_OPCODE_CEIL: 10898 case TGSI_OPCODE_CONT: 10899 case TGSI_OPCODE_DDX: 10900 case TGSI_OPCODE_DDY: 10901 case TGSI_OPCODE_DIV: 10902 case TGSI_OPCODE_DP2: 10903 case TGSI_OPCODE_DP3: 10904 case TGSI_OPCODE_DP4: 10905 case TGSI_OPCODE_ELSE: 10906 case TGSI_OPCODE_ENDIF: 10907 case TGSI_OPCODE_ENDLOOP: 10908 case TGSI_OPCODE_ENDSUB: 10909 case TGSI_OPCODE_F2I: 10910 case TGSI_OPCODE_F2U: 10911 case TGSI_OPCODE_FLR: 10912 case TGSI_OPCODE_FRC: 10913 case TGSI_OPCODE_FSEQ: 10914 case TGSI_OPCODE_FSGE: 10915 case TGSI_OPCODE_FSLT: 10916 case TGSI_OPCODE_FSNE: 10917 case TGSI_OPCODE_I2F: 10918 case TGSI_OPCODE_IMAX: 10919 case TGSI_OPCODE_IMIN: 10920 case TGSI_OPCODE_INEG: 10921 case TGSI_OPCODE_ISGE: 10922 case TGSI_OPCODE_ISHR: 10923 case TGSI_OPCODE_ISLT: 10924 case TGSI_OPCODE_MAD: 10925 case TGSI_OPCODE_MAX: 10926 case TGSI_OPCODE_MIN: 10927 case TGSI_OPCODE_MUL: 10928 case TGSI_OPCODE_NOP: 10929 case TGSI_OPCODE_NOT: 10930 case TGSI_OPCODE_OR: 10931 case TGSI_OPCODE_UADD: 10932 case TGSI_OPCODE_USEQ: 10933 case TGSI_OPCODE_USGE: 10934 case TGSI_OPCODE_USLT: 10935 case TGSI_OPCODE_UMIN: 10936 case TGSI_OPCODE_UMAD: 10937 case TGSI_OPCODE_UMAX: 10938 case TGSI_OPCODE_ROUND: 10939 case TGSI_OPCODE_SQRT: 10940 case TGSI_OPCODE_SHL: 10941 case TGSI_OPCODE_TRUNC: 10942 case TGSI_OPCODE_U2F: 10943 case TGSI_OPCODE_UCMP: 10944 case TGSI_OPCODE_USHR: 10945 case TGSI_OPCODE_USNE: 10946 case TGSI_OPCODE_XOR: 10947 /* Begin SM5 opcodes */ 10948 case TGSI_OPCODE_F2D: 10949 case TGSI_OPCODE_D2F: 10950 case TGSI_OPCODE_DADD: 10951 case TGSI_OPCODE_DMUL: 10952 case TGSI_OPCODE_DMAX: 10953 case TGSI_OPCODE_DMIN: 10954 case TGSI_OPCODE_DSGE: 10955 case TGSI_OPCODE_DSLT: 10956 case TGSI_OPCODE_DSEQ: 10957 case TGSI_OPCODE_DSNE: 10958 case TGSI_OPCODE_BREV: 10959 case TGSI_OPCODE_POPC: 10960 case TGSI_OPCODE_LSB: 10961 case TGSI_OPCODE_INTERP_CENTROID: 10962 case TGSI_OPCODE_INTERP_SAMPLE: 10963 /* simple instructions */ 10964 return emit_simple(emit, inst); 10965 case TGSI_OPCODE_RET: 10966 if (emit->unit == PIPE_SHADER_TESS_CTRL && 10967 !emit->tcs.control_point_phase) { 10968 10969 /* store the tessellation levels in the patch constant phase only */ 10970 store_tesslevels(emit); 10971 } 10972 return emit_simple(emit, inst); 10973 10974 case TGSI_OPCODE_IMSB: 10975 case TGSI_OPCODE_UMSB: 10976 return emit_msb(emit, inst); 10977 case TGSI_OPCODE_IBFE: 10978 case TGSI_OPCODE_UBFE: 10979 return emit_bfe(emit, inst); 10980 case TGSI_OPCODE_BFI: 10981 return emit_bfi(emit, inst); 10982 case TGSI_OPCODE_MOV: 10983 return emit_mov(emit, inst); 10984 case TGSI_OPCODE_EMIT: 10985 return emit_vertex(emit, inst); 10986 case TGSI_OPCODE_ENDPRIM: 10987 return emit_endprim(emit, inst); 10988 case TGSI_OPCODE_IABS: 10989 return emit_iabs(emit, inst); 10990 case TGSI_OPCODE_ARL: 10991 FALLTHROUGH; 10992 case TGSI_OPCODE_UARL: 10993 return emit_arl_uarl(emit, inst); 10994 case TGSI_OPCODE_BGNSUB: 10995 /* no-op */ 10996 return TRUE; 10997 case TGSI_OPCODE_CAL: 10998 return emit_cal(emit, inst); 10999 case TGSI_OPCODE_CMP: 11000 return emit_cmp(emit, inst); 11001 case TGSI_OPCODE_COS: 11002 return emit_sincos(emit, inst); 11003 case TGSI_OPCODE_DST: 11004 return emit_dst(emit, inst); 11005 case TGSI_OPCODE_EX2: 11006 return emit_ex2(emit, inst); 11007 case TGSI_OPCODE_EXP: 11008 return emit_exp(emit, inst); 11009 case TGSI_OPCODE_IF: 11010 return emit_if(emit, &inst->Src[0]); 11011 case TGSI_OPCODE_KILL: 11012 return emit_discard(emit, inst); 11013 case TGSI_OPCODE_KILL_IF: 11014 return emit_cond_discard(emit, inst); 11015 case TGSI_OPCODE_LG2: 11016 return emit_lg2(emit, inst); 11017 case TGSI_OPCODE_LIT: 11018 return emit_lit(emit, inst); 11019 case TGSI_OPCODE_LODQ: 11020 return emit_lodq(emit, inst); 11021 case TGSI_OPCODE_LOG: 11022 return emit_log(emit, inst); 11023 case TGSI_OPCODE_LRP: 11024 return emit_lrp(emit, inst); 11025 case TGSI_OPCODE_POW: 11026 return emit_pow(emit, inst); 11027 case TGSI_OPCODE_RCP: 11028 return emit_rcp(emit, inst); 11029 case TGSI_OPCODE_RSQ: 11030 return emit_rsq(emit, inst); 11031 case TGSI_OPCODE_SAMPLE: 11032 return emit_sample(emit, inst); 11033 case TGSI_OPCODE_SEQ: 11034 return emit_seq(emit, inst); 11035 case TGSI_OPCODE_SGE: 11036 return emit_sge(emit, inst); 11037 case TGSI_OPCODE_SGT: 11038 return emit_sgt(emit, inst); 11039 case TGSI_OPCODE_SIN: 11040 return emit_sincos(emit, inst); 11041 case TGSI_OPCODE_SLE: 11042 return emit_sle(emit, inst); 11043 case TGSI_OPCODE_SLT: 11044 return emit_slt(emit, inst); 11045 case TGSI_OPCODE_SNE: 11046 return emit_sne(emit, inst); 11047 case TGSI_OPCODE_SSG: 11048 return emit_ssg(emit, inst); 11049 case TGSI_OPCODE_ISSG: 11050 return emit_issg(emit, inst); 11051 case TGSI_OPCODE_TEX: 11052 return emit_tex(emit, inst); 11053 case TGSI_OPCODE_TG4: 11054 return emit_tg4(emit, inst); 11055 case TGSI_OPCODE_TEX2: 11056 return emit_tex2(emit, inst); 11057 case TGSI_OPCODE_TXP: 11058 return emit_txp(emit, inst); 11059 case TGSI_OPCODE_TXB: 11060 case TGSI_OPCODE_TXB2: 11061 case TGSI_OPCODE_TXL: 11062 return emit_txl_txb(emit, inst); 11063 case TGSI_OPCODE_TXD: 11064 return emit_txd(emit, inst); 11065 case TGSI_OPCODE_TXF: 11066 return emit_txf(emit, inst); 11067 case TGSI_OPCODE_TXL2: 11068 return emit_txl2(emit, inst); 11069 case TGSI_OPCODE_TXQ: 11070 return emit_txq(emit, inst); 11071 case TGSI_OPCODE_UIF: 11072 return emit_if(emit, &inst->Src[0]); 11073 case TGSI_OPCODE_UMUL_HI: 11074 case TGSI_OPCODE_IMUL_HI: 11075 case TGSI_OPCODE_UDIV: 11076 /* These cases use only the FIRST of two destination registers */ 11077 return emit_simple_1dst(emit, inst, 2, 0); 11078 case TGSI_OPCODE_IDIV: 11079 return emit_vmware(emit, inst, VGPU10_VMWARE_OPCODE_IDIV); 11080 case TGSI_OPCODE_UMUL: 11081 case TGSI_OPCODE_UMOD: 11082 case TGSI_OPCODE_MOD: 11083 /* These cases use only the SECOND of two destination registers */ 11084 return emit_simple_1dst(emit, inst, 2, 1); 11085 11086 /* Begin SM5 opcodes */ 11087 case TGSI_OPCODE_DABS: 11088 return emit_dabs(emit, inst); 11089 case TGSI_OPCODE_DNEG: 11090 return emit_dneg(emit, inst); 11091 case TGSI_OPCODE_DRCP: 11092 return emit_simple(emit, inst); 11093 case TGSI_OPCODE_DSQRT: 11094 return emit_dsqrt(emit, inst); 11095 case TGSI_OPCODE_DMAD: 11096 return emit_dmad(emit, inst); 11097 case TGSI_OPCODE_DFRAC: 11098 return emit_vmware(emit, inst, VGPU10_VMWARE_OPCODE_DFRC); 11099 case TGSI_OPCODE_D2I: 11100 case TGSI_OPCODE_D2U: 11101 return emit_simple(emit, inst); 11102 case TGSI_OPCODE_I2D: 11103 case TGSI_OPCODE_U2D: 11104 return emit_simple(emit, inst); 11105 case TGSI_OPCODE_DRSQ: 11106 return emit_drsq(emit, &inst->Dst[0], &inst->Src[0]); 11107 case TGSI_OPCODE_DDIV: 11108 return emit_simple(emit, inst); 11109 case TGSI_OPCODE_INTERP_OFFSET: 11110 return emit_interp_offset(emit, inst); 11111 case TGSI_OPCODE_FMA: 11112 case TGSI_OPCODE_DFMA: 11113 return emit_simple(emit, inst); 11114 11115 case TGSI_OPCODE_DTRUNC: 11116 return emit_dtrunc(emit, inst); 11117 11118 /* The following opcodes should never be seen here. We return zero 11119 * for all the PIPE_CAP_TGSI_DROUND_SUPPORTED, DFRACEXP_DLDEXP_SUPPORTED, 11120 * LDEXP_SUPPORTED queries. 11121 */ 11122 case TGSI_OPCODE_LDEXP: 11123 case TGSI_OPCODE_DSSG: 11124 case TGSI_OPCODE_DFRACEXP: 11125 case TGSI_OPCODE_DLDEXP: 11126 case TGSI_OPCODE_DCEIL: 11127 case TGSI_OPCODE_DFLR: 11128 debug_printf("Unexpected TGSI opcode %s. " 11129 "Should have been translated away by the GLSL compiler.\n", 11130 tgsi_get_opcode_name(opcode)); 11131 return FALSE; 11132 11133 case TGSI_OPCODE_LOAD: 11134 return emit_load(emit, inst); 11135 11136 case TGSI_OPCODE_STORE: 11137 return emit_store(emit, inst); 11138 11139 case TGSI_OPCODE_ATOMAND: 11140 return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_AND); 11141 11142 case TGSI_OPCODE_ATOMCAS: 11143 return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_CMP_EXCH); 11144 11145 case TGSI_OPCODE_ATOMIMAX: 11146 return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_IMAX); 11147 11148 case TGSI_OPCODE_ATOMIMIN: 11149 return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_IMIN); 11150 11151 case TGSI_OPCODE_ATOMOR: 11152 return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_OR); 11153 11154 case TGSI_OPCODE_ATOMUADD: 11155 return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_IADD); 11156 11157 case TGSI_OPCODE_ATOMUMAX: 11158 return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_UMAX); 11159 11160 case TGSI_OPCODE_ATOMUMIN: 11161 return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_UMIN); 11162 11163 case TGSI_OPCODE_ATOMXCHG: 11164 return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_EXCH); 11165 11166 case TGSI_OPCODE_ATOMXOR: 11167 return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_XOR); 11168 11169 case TGSI_OPCODE_BARRIER: 11170 return emit_barrier(emit, inst); 11171 11172 case TGSI_OPCODE_MEMBAR: 11173 return emit_memory_barrier(emit, inst); 11174 11175 case TGSI_OPCODE_RESQ: 11176 return emit_resq(emit, inst); 11177 11178 case TGSI_OPCODE_END: 11179 if (!emit_post_helpers(emit)) 11180 return FALSE; 11181 return emit_simple(emit, inst); 11182 11183 default: 11184 debug_printf("Unimplemented tgsi instruction %s\n", 11185 tgsi_get_opcode_name(opcode)); 11186 return FALSE; 11187 } 11188 11189 return TRUE; 11190} 11191 11192 11193/** 11194 * Translate a single TGSI instruction to VGPU10. 11195 */ 11196static boolean 11197emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit, 11198 unsigned inst_number, 11199 const struct tgsi_full_instruction *inst) 11200{ 11201 if (emit->skip_instruction) 11202 return TRUE; 11203 11204 boolean ret = TRUE; 11205 unsigned start_token = emit_get_num_tokens(emit); 11206 11207 emit->reemit_tgsi_instruction = FALSE; 11208 11209 ret = emit_instruction(emit, inst_number, inst); 11210 11211 if (emit->reemit_tgsi_instruction) { 11212 /** 11213 * Reset emit->ptr to where the translation of this tgsi instruction 11214 * started. 11215 */ 11216 VGPU10OpcodeToken0 *tokens = (VGPU10OpcodeToken0 *) emit->buf; 11217 emit->ptr = (char *) (tokens + start_token); 11218 11219 emit->reemit_tgsi_instruction = FALSE; 11220 } 11221 return ret; 11222} 11223 11224 11225/** 11226 * Emit the extra instructions to adjust the vertex position. 11227 * There are two possible adjustments: 11228 * 1. Converting from Gallium to VGPU10 coordinate space by applying the 11229 * "prescale" and "pretranslate" values. 11230 * 2. Undoing the viewport transformation when we use the swtnl/draw path. 11231 * \param vs_pos_tmp_index which temporary register contains the vertex pos. 11232 */ 11233static void 11234emit_vpos_instructions(struct svga_shader_emitter_v10 *emit) 11235{ 11236 struct tgsi_full_src_register tmp_pos_src; 11237 struct tgsi_full_dst_register pos_dst; 11238 const unsigned vs_pos_tmp_index = emit->vposition.tmp_index; 11239 11240 /* Don't bother to emit any extra vertex instructions if vertex position is 11241 * not written out 11242 */ 11243 if (emit->vposition.out_index == INVALID_INDEX) 11244 return; 11245 11246 /** 11247 * Reset the temporary vertex position register index 11248 * so that emit_dst_register() will use the real vertex position output 11249 */ 11250 emit->vposition.tmp_index = INVALID_INDEX; 11251 11252 tmp_pos_src = make_src_temp_reg(vs_pos_tmp_index); 11253 pos_dst = make_dst_output_reg(emit->vposition.out_index); 11254 11255 /* If non-adjusted vertex position register index 11256 * is valid, copy the vertex position from the temporary 11257 * vertex position register before it is modified by the 11258 * prescale computation. 11259 */ 11260 if (emit->vposition.so_index != INVALID_INDEX) { 11261 struct tgsi_full_dst_register pos_so_dst = 11262 make_dst_output_reg(emit->vposition.so_index); 11263 11264 /* MOV pos_so, tmp_pos */ 11265 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_so_dst, &tmp_pos_src); 11266 } 11267 11268 if (emit->vposition.need_prescale) { 11269 /* This code adjusts the vertex position to match the VGPU10 convention. 11270 * If p is the position computed by the shader (usually by applying the 11271 * modelview and projection matrices), the new position q is computed by: 11272 * 11273 * q.x = p.w * trans.x + p.x * scale.x 11274 * q.y = p.w * trans.y + p.y * scale.y 11275 * q.z = p.w * trans.z + p.z * scale.z; 11276 * q.w = p.w * trans.w + p.w; 11277 */ 11278 struct tgsi_full_src_register tmp_pos_src_w = 11279 scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W); 11280 struct tgsi_full_dst_register tmp_pos_dst = 11281 make_dst_temp_reg(vs_pos_tmp_index); 11282 struct tgsi_full_dst_register tmp_pos_dst_xyz = 11283 writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XYZ); 11284 11285 struct tgsi_full_src_register prescale_scale = 11286 make_src_temp_reg(emit->vposition.prescale_scale_index); 11287 struct tgsi_full_src_register prescale_trans = 11288 make_src_temp_reg(emit->vposition.prescale_trans_index); 11289 11290 /* MUL tmp_pos.xyz, tmp_pos, prescale.scale */ 11291 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xyz, 11292 &tmp_pos_src, &prescale_scale); 11293 11294 /* MAD pos, tmp_pos.wwww, prescale.trans, tmp_pos */ 11295 emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &pos_dst, &tmp_pos_src_w, 11296 &prescale_trans, &tmp_pos_src); 11297 } 11298 else if (emit->key.vs.undo_viewport) { 11299 /* This code computes the final vertex position from the temporary 11300 * vertex position by undoing the viewport transformation and the 11301 * divide-by-W operation (we convert window coords back to clip coords). 11302 * This is needed when we use the 'draw' module for fallbacks. 11303 * If p is the temp pos in window coords, then the NDC coord q is: 11304 * q.x = (p.x - vp.x_trans) / vp.x_scale * p.w 11305 * q.y = (p.y - vp.y_trans) / vp.y_scale * p.w 11306 * q.z = p.z * p.w 11307 * q.w = p.w 11308 * CONST[vs_viewport_index] contains: 11309 * { 1/vp.x_scale, 1/vp.y_scale, -vp.x_trans, -vp.y_trans } 11310 */ 11311 struct tgsi_full_dst_register tmp_pos_dst = 11312 make_dst_temp_reg(vs_pos_tmp_index); 11313 struct tgsi_full_dst_register tmp_pos_dst_xy = 11314 writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XY); 11315 struct tgsi_full_src_register tmp_pos_src_wwww = 11316 scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W); 11317 11318 struct tgsi_full_dst_register pos_dst_xyz = 11319 writemask_dst(&pos_dst, TGSI_WRITEMASK_XYZ); 11320 struct tgsi_full_dst_register pos_dst_w = 11321 writemask_dst(&pos_dst, TGSI_WRITEMASK_W); 11322 11323 struct tgsi_full_src_register vp_xyzw = 11324 make_src_const_reg(emit->vs.viewport_index); 11325 struct tgsi_full_src_register vp_zwww = 11326 swizzle_src(&vp_xyzw, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W, 11327 TGSI_SWIZZLE_W, TGSI_SWIZZLE_W); 11328 11329 /* ADD tmp_pos.xy, tmp_pos.xy, viewport.zwww */ 11330 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp_pos_dst_xy, 11331 &tmp_pos_src, &vp_zwww); 11332 11333 /* MUL tmp_pos.xy, tmp_pos.xyzw, viewport.xyzy */ 11334 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xy, 11335 &tmp_pos_src, &vp_xyzw); 11336 11337 /* MUL pos.xyz, tmp_pos.xyz, tmp_pos.www */ 11338 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &pos_dst_xyz, 11339 &tmp_pos_src, &tmp_pos_src_wwww); 11340 11341 /* MOV pos.w, tmp_pos.w */ 11342 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_dst_w, &tmp_pos_src); 11343 } 11344 else if (vs_pos_tmp_index != INVALID_INDEX) { 11345 /* This code is to handle the case where the temporary vertex 11346 * position register is created when the vertex shader has stream 11347 * output and prescale is disabled because rasterization is to be 11348 * discarded. 11349 */ 11350 struct tgsi_full_dst_register pos_dst = 11351 make_dst_output_reg(emit->vposition.out_index); 11352 11353 /* MOV pos, tmp_pos */ 11354 begin_emit_instruction(emit); 11355 emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE); 11356 emit_dst_register(emit, &pos_dst); 11357 emit_src_register(emit, &tmp_pos_src); 11358 end_emit_instruction(emit); 11359 } 11360 11361 /* Restore original vposition.tmp_index value for the next GS vertex. 11362 * It doesn't matter for VS. 11363 */ 11364 emit->vposition.tmp_index = vs_pos_tmp_index; 11365} 11366 11367static void 11368emit_clipping_instructions(struct svga_shader_emitter_v10 *emit) 11369{ 11370 if (emit->clip_mode == CLIP_DISTANCE) { 11371 /* Copy from copy distance temporary to CLIPDIST & the shadow copy */ 11372 emit_clip_distance_instructions(emit); 11373 11374 } else if (emit->clip_mode == CLIP_VERTEX && 11375 emit->key.last_vertex_stage) { 11376 /* Convert TGSI CLIPVERTEX to CLIPDIST */ 11377 emit_clip_vertex_instructions(emit); 11378 } 11379 11380 /** 11381 * Emit vertex position and take care of legacy user planes only if 11382 * there is a valid vertex position register index. 11383 * This is to take care of the case 11384 * where the shader doesn't output vertex position. Then in 11385 * this case, don't bother to emit more vertex instructions. 11386 */ 11387 if (emit->vposition.out_index == INVALID_INDEX) 11388 return; 11389 11390 /** 11391 * Emit per-vertex clipping instructions for legacy user defined clip planes. 11392 * NOTE: we must emit the clip distance instructions before the 11393 * emit_vpos_instructions() call since the later function will change 11394 * the TEMP[vs_pos_tmp_index] value. 11395 */ 11396 if (emit->clip_mode == CLIP_LEGACY && emit->key.last_vertex_stage) { 11397 /* Emit CLIPDIST for legacy user defined clip planes */ 11398 emit_clip_distance_from_vpos(emit, emit->vposition.tmp_index); 11399 } 11400} 11401 11402 11403/** 11404 * Emit extra per-vertex instructions. This includes clip-coordinate 11405 * space conversion and computing clip distances. This is called for 11406 * each GS emit-vertex instruction and at the end of VS translation. 11407 */ 11408static void 11409emit_vertex_instructions(struct svga_shader_emitter_v10 *emit) 11410{ 11411 /* Emit clipping instructions based on clipping mode */ 11412 emit_clipping_instructions(emit); 11413 11414 /* Emit vertex position instructions */ 11415 emit_vpos_instructions(emit); 11416} 11417 11418 11419/** 11420 * Translate the TGSI_OPCODE_EMIT GS instruction. 11421 */ 11422static boolean 11423emit_vertex(struct svga_shader_emitter_v10 *emit, 11424 const struct tgsi_full_instruction *inst) 11425{ 11426 unsigned ret = TRUE; 11427 11428 assert(emit->unit == PIPE_SHADER_GEOMETRY); 11429 11430 /** 11431 * Emit the viewport array index for the first vertex. 11432 */ 11433 if (emit->gs.viewport_index_out_index != INVALID_INDEX) { 11434 struct tgsi_full_dst_register viewport_index_out = 11435 make_dst_output_reg(emit->gs.viewport_index_out_index); 11436 struct tgsi_full_dst_register viewport_index_out_x = 11437 writemask_dst(&viewport_index_out, TGSI_WRITEMASK_X); 11438 struct tgsi_full_src_register viewport_index_tmp = 11439 make_src_temp_reg(emit->gs.viewport_index_tmp_index); 11440 11441 /* Set the out index to INVALID_INDEX, so it will not 11442 * be assigned to a temp again in emit_dst_register, and 11443 * the viewport index will not be assigned again in the 11444 * subsequent vertices. 11445 */ 11446 emit->gs.viewport_index_out_index = INVALID_INDEX; 11447 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 11448 &viewport_index_out_x, &viewport_index_tmp); 11449 } 11450 11451 /** 11452 * Find the stream index associated with this emit vertex instruction. 11453 */ 11454 assert(inst->Src[0].Register.File == TGSI_FILE_IMMEDIATE); 11455 unsigned streamIndex = find_stream_index(emit, &inst->Src[0]); 11456 11457 /** 11458 * According to the ARB_gpu_shader5 spec, the built-in geometry shader 11459 * outputs are always associated with vertex stream zero. 11460 * So emit the extra vertex instructions for position or clip distance 11461 * for stream zero only. 11462 */ 11463 if (streamIndex == 0) { 11464 /** 11465 * Before emitting vertex instructions, emit the temporaries for 11466 * the prescale constants based on the viewport index if needed. 11467 */ 11468 if (emit->vposition.need_prescale && !emit->vposition.have_prescale) 11469 emit_temp_prescale_instructions(emit); 11470 11471 emit_vertex_instructions(emit); 11472 } 11473 11474 begin_emit_instruction(emit); 11475 if (emit->version >= 50) { 11476 if (emit->info.num_stream_output_components[streamIndex] == 0) { 11477 /** 11478 * If there is no output for this stream, discard this instruction. 11479 */ 11480 emit->discard_instruction = TRUE; 11481 } 11482 else { 11483 emit_opcode(emit, VGPU10_OPCODE_EMIT_STREAM, FALSE); 11484 emit_stream_register(emit, streamIndex); 11485 } 11486 } 11487 else { 11488 emit_opcode(emit, VGPU10_OPCODE_EMIT, FALSE); 11489 } 11490 end_emit_instruction(emit); 11491 11492 return ret; 11493} 11494 11495 11496/** 11497 * Emit the extra code to convert from VGPU10's boolean front-face 11498 * register to TGSI's signed front-face register. 11499 * 11500 * TODO: Make temporary front-face register a scalar. 11501 */ 11502static void 11503emit_frontface_instructions(struct svga_shader_emitter_v10 *emit) 11504{ 11505 assert(emit->unit == PIPE_SHADER_FRAGMENT); 11506 11507 if (emit->fs.face_input_index != INVALID_INDEX) { 11508 /* convert vgpu10 boolean face register to gallium +/-1 value */ 11509 struct tgsi_full_dst_register tmp_dst = 11510 make_dst_temp_reg(emit->fs.face_tmp_index); 11511 struct tgsi_full_src_register one = 11512 make_immediate_reg_float(emit, 1.0f); 11513 struct tgsi_full_src_register neg_one = 11514 make_immediate_reg_float(emit, -1.0f); 11515 11516 /* MOVC face_tmp, IS_FRONT_FACE.x, 1.0, -1.0 */ 11517 begin_emit_instruction(emit); 11518 emit_opcode(emit, VGPU10_OPCODE_MOVC, FALSE); 11519 emit_dst_register(emit, &tmp_dst); 11520 emit_face_register(emit); 11521 emit_src_register(emit, &one); 11522 emit_src_register(emit, &neg_one); 11523 end_emit_instruction(emit); 11524 } 11525} 11526 11527 11528/** 11529 * Emit the extra code to convert from VGPU10's fragcoord.w value to 1/w. 11530 */ 11531static void 11532emit_fragcoord_instructions(struct svga_shader_emitter_v10 *emit) 11533{ 11534 assert(emit->unit == PIPE_SHADER_FRAGMENT); 11535 11536 if (emit->fs.fragcoord_input_index != INVALID_INDEX) { 11537 struct tgsi_full_dst_register tmp_dst = 11538 make_dst_temp_reg(emit->fs.fragcoord_tmp_index); 11539 struct tgsi_full_dst_register tmp_dst_xyz = 11540 writemask_dst(&tmp_dst, TGSI_WRITEMASK_XYZ); 11541 struct tgsi_full_dst_register tmp_dst_w = 11542 writemask_dst(&tmp_dst, TGSI_WRITEMASK_W); 11543 struct tgsi_full_src_register one = 11544 make_immediate_reg_float(emit, 1.0f); 11545 struct tgsi_full_src_register fragcoord = 11546 make_src_reg(TGSI_FILE_INPUT, emit->fs.fragcoord_input_index); 11547 11548 /* save the input index */ 11549 unsigned fragcoord_input_index = emit->fs.fragcoord_input_index; 11550 /* set to invalid to prevent substitution in emit_src_register() */ 11551 emit->fs.fragcoord_input_index = INVALID_INDEX; 11552 11553 /* MOV fragcoord_tmp.xyz, fragcoord.xyz */ 11554 begin_emit_instruction(emit); 11555 emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE); 11556 emit_dst_register(emit, &tmp_dst_xyz); 11557 emit_src_register(emit, &fragcoord); 11558 end_emit_instruction(emit); 11559 11560 /* DIV fragcoord_tmp.w, 1.0, fragcoord.w */ 11561 begin_emit_instruction(emit); 11562 emit_opcode(emit, VGPU10_OPCODE_DIV, FALSE); 11563 emit_dst_register(emit, &tmp_dst_w); 11564 emit_src_register(emit, &one); 11565 emit_src_register(emit, &fragcoord); 11566 end_emit_instruction(emit); 11567 11568 /* restore saved value */ 11569 emit->fs.fragcoord_input_index = fragcoord_input_index; 11570 } 11571} 11572 11573 11574/** 11575 * Emit the extra code to get the current sample position value and 11576 * put it into a temp register. 11577 */ 11578static void 11579emit_sample_position_instructions(struct svga_shader_emitter_v10 *emit) 11580{ 11581 assert(emit->unit == PIPE_SHADER_FRAGMENT); 11582 11583 if (emit->fs.sample_pos_sys_index != INVALID_INDEX) { 11584 assert(emit->version >= 41); 11585 11586 struct tgsi_full_dst_register tmp_dst = 11587 make_dst_temp_reg(emit->fs.sample_pos_tmp_index); 11588 struct tgsi_full_src_register half = 11589 make_immediate_reg_float4(emit, 0.5, 0.5, 0.0, 0.0); 11590 11591 struct tgsi_full_src_register tmp_src = 11592 make_src_temp_reg(emit->fs.sample_pos_tmp_index); 11593 struct tgsi_full_src_register sample_index_reg = 11594 make_src_scalar_reg(TGSI_FILE_SYSTEM_VALUE, 11595 emit->fs.sample_id_sys_index, TGSI_SWIZZLE_X); 11596 11597 /* The first src register is a shader resource (if we want a 11598 * multisampled resource sample position) or the rasterizer register 11599 * (if we want the current sample position in the color buffer). We 11600 * want the later. 11601 */ 11602 11603 /* SAMPLE_POS dst, RASTERIZER, sampleIndex */ 11604 begin_emit_instruction(emit); 11605 emit_opcode(emit, VGPU10_OPCODE_SAMPLE_POS, FALSE); 11606 emit_dst_register(emit, &tmp_dst); 11607 emit_rasterizer_register(emit); 11608 emit_src_register(emit, &sample_index_reg); 11609 end_emit_instruction(emit); 11610 11611 /* Convert from D3D coords to GL coords by adding 0.5 bias */ 11612 /* ADD dst, dst, half */ 11613 begin_emit_instruction(emit); 11614 emit_opcode(emit, VGPU10_OPCODE_ADD, FALSE); 11615 emit_dst_register(emit, &tmp_dst); 11616 emit_src_register(emit, &tmp_src); 11617 emit_src_register(emit, &half); 11618 end_emit_instruction(emit); 11619 } 11620} 11621 11622 11623/** 11624 * Emit extra instructions to adjust VS inputs/attributes. This can 11625 * mean casting a vertex attribute from int to float or setting the 11626 * W component to 1, or both. 11627 */ 11628static void 11629emit_vertex_attrib_instructions(struct svga_shader_emitter_v10 *emit) 11630{ 11631 const unsigned save_w_1_mask = emit->key.vs.adjust_attrib_w_1; 11632 const unsigned save_itof_mask = emit->key.vs.adjust_attrib_itof; 11633 const unsigned save_utof_mask = emit->key.vs.adjust_attrib_utof; 11634 const unsigned save_is_bgra_mask = emit->key.vs.attrib_is_bgra; 11635 const unsigned save_puint_to_snorm_mask = emit->key.vs.attrib_puint_to_snorm; 11636 const unsigned save_puint_to_uscaled_mask = emit->key.vs.attrib_puint_to_uscaled; 11637 const unsigned save_puint_to_sscaled_mask = emit->key.vs.attrib_puint_to_sscaled; 11638 11639 unsigned adjust_mask = (save_w_1_mask | 11640 save_itof_mask | 11641 save_utof_mask | 11642 save_is_bgra_mask | 11643 save_puint_to_snorm_mask | 11644 save_puint_to_uscaled_mask | 11645 save_puint_to_sscaled_mask); 11646 11647 assert(emit->unit == PIPE_SHADER_VERTEX); 11648 11649 if (adjust_mask) { 11650 struct tgsi_full_src_register one = 11651 make_immediate_reg_float(emit, 1.0f); 11652 11653 struct tgsi_full_src_register one_int = 11654 make_immediate_reg_int(emit, 1); 11655 11656 /* We need to turn off these bitmasks while emitting the 11657 * instructions below, then restore them afterward. 11658 */ 11659 emit->key.vs.adjust_attrib_w_1 = 0; 11660 emit->key.vs.adjust_attrib_itof = 0; 11661 emit->key.vs.adjust_attrib_utof = 0; 11662 emit->key.vs.attrib_is_bgra = 0; 11663 emit->key.vs.attrib_puint_to_snorm = 0; 11664 emit->key.vs.attrib_puint_to_uscaled = 0; 11665 emit->key.vs.attrib_puint_to_sscaled = 0; 11666 11667 while (adjust_mask) { 11668 unsigned index = u_bit_scan(&adjust_mask); 11669 11670 /* skip the instruction if this vertex attribute is not being used */ 11671 if (emit->info.input_usage_mask[index] == 0) 11672 continue; 11673 11674 unsigned tmp = emit->vs.adjusted_input[index]; 11675 struct tgsi_full_src_register input_src = 11676 make_src_reg(TGSI_FILE_INPUT, index); 11677 11678 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 11679 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 11680 struct tgsi_full_dst_register tmp_dst_w = 11681 writemask_dst(&tmp_dst, TGSI_WRITEMASK_W); 11682 11683 /* ITOF/UTOF/MOV tmp, input[index] */ 11684 if (save_itof_mask & (1 << index)) { 11685 emit_instruction_op1(emit, VGPU10_OPCODE_ITOF, 11686 &tmp_dst, &input_src); 11687 } 11688 else if (save_utof_mask & (1 << index)) { 11689 emit_instruction_op1(emit, VGPU10_OPCODE_UTOF, 11690 &tmp_dst, &input_src); 11691 } 11692 else if (save_puint_to_snorm_mask & (1 << index)) { 11693 emit_puint_to_snorm(emit, &tmp_dst, &input_src); 11694 } 11695 else if (save_puint_to_uscaled_mask & (1 << index)) { 11696 emit_puint_to_uscaled(emit, &tmp_dst, &input_src); 11697 } 11698 else if (save_puint_to_sscaled_mask & (1 << index)) { 11699 emit_puint_to_sscaled(emit, &tmp_dst, &input_src); 11700 } 11701 else { 11702 assert((save_w_1_mask | save_is_bgra_mask) & (1 << index)); 11703 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 11704 &tmp_dst, &input_src); 11705 } 11706 11707 if (save_is_bgra_mask & (1 << index)) { 11708 emit_swap_r_b(emit, &tmp_dst, &tmp_src); 11709 } 11710 11711 if (save_w_1_mask & (1 << index)) { 11712 /* MOV tmp.w, 1.0 */ 11713 if (emit->key.vs.attrib_is_pure_int & (1 << index)) { 11714 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 11715 &tmp_dst_w, &one_int); 11716 } 11717 else { 11718 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 11719 &tmp_dst_w, &one); 11720 } 11721 } 11722 } 11723 11724 emit->key.vs.adjust_attrib_w_1 = save_w_1_mask; 11725 emit->key.vs.adjust_attrib_itof = save_itof_mask; 11726 emit->key.vs.adjust_attrib_utof = save_utof_mask; 11727 emit->key.vs.attrib_is_bgra = save_is_bgra_mask; 11728 emit->key.vs.attrib_puint_to_snorm = save_puint_to_snorm_mask; 11729 emit->key.vs.attrib_puint_to_uscaled = save_puint_to_uscaled_mask; 11730 emit->key.vs.attrib_puint_to_sscaled = save_puint_to_sscaled_mask; 11731 } 11732} 11733 11734 11735/* Find zero-value immedate for default layer index */ 11736static void 11737emit_default_layer_instructions(struct svga_shader_emitter_v10 *emit) 11738{ 11739 assert(emit->unit == PIPE_SHADER_FRAGMENT); 11740 11741 /* immediate for default layer index 0 */ 11742 if (emit->fs.layer_input_index != INVALID_INDEX) { 11743 union tgsi_immediate_data imm; 11744 imm.Int = 0; 11745 emit->fs.layer_imm_index = find_immediate(emit, imm, 0); 11746 } 11747} 11748 11749 11750static void 11751emit_temp_prescale_from_cbuf(struct svga_shader_emitter_v10 *emit, 11752 unsigned cbuf_index, 11753 struct tgsi_full_dst_register *scale, 11754 struct tgsi_full_dst_register *translate) 11755{ 11756 struct tgsi_full_src_register scale_cbuf = make_src_const_reg(cbuf_index); 11757 struct tgsi_full_src_register trans_cbuf = make_src_const_reg(cbuf_index+1); 11758 11759 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, scale, &scale_cbuf); 11760 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, translate, &trans_cbuf); 11761} 11762 11763 11764/** 11765 * A recursive helper function to find the prescale from the constant buffer 11766 */ 11767static void 11768find_prescale_from_cbuf(struct svga_shader_emitter_v10 *emit, 11769 unsigned index, unsigned num_prescale, 11770 struct tgsi_full_src_register *vp_index, 11771 struct tgsi_full_dst_register *scale, 11772 struct tgsi_full_dst_register *translate, 11773 struct tgsi_full_src_register *tmp_src, 11774 struct tgsi_full_dst_register *tmp_dst) 11775{ 11776 if (num_prescale == 0) 11777 return; 11778 11779 if (index > 0) { 11780 /* ELSE */ 11781 emit_instruction_op0(emit, VGPU10_OPCODE_ELSE); 11782 } 11783 11784 struct tgsi_full_src_register index_src = 11785 make_immediate_reg_int(emit, index); 11786 11787 if (index == 0) { 11788 /* GE tmp, vp_index, index */ 11789 emit_instruction_op2(emit, VGPU10_OPCODE_GE, tmp_dst, 11790 vp_index, &index_src); 11791 } else { 11792 /* EQ tmp, vp_index, index */ 11793 emit_instruction_op2(emit, VGPU10_OPCODE_EQ, tmp_dst, 11794 vp_index, &index_src); 11795 } 11796 11797 /* IF tmp */ 11798 emit_if(emit, tmp_src); 11799 emit_temp_prescale_from_cbuf(emit, 11800 emit->vposition.prescale_cbuf_index + 2 * index, 11801 scale, translate); 11802 11803 find_prescale_from_cbuf(emit, index+1, num_prescale-1, 11804 vp_index, scale, translate, 11805 tmp_src, tmp_dst); 11806 11807 /* ENDIF */ 11808 emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF); 11809} 11810 11811 11812/** 11813 * This helper function emits instructions to set the prescale 11814 * and translate temporaries to the correct constants from the 11815 * constant buffer according to the designated viewport. 11816 */ 11817static void 11818emit_temp_prescale_instructions(struct svga_shader_emitter_v10 *emit) 11819{ 11820 struct tgsi_full_dst_register prescale_scale = 11821 make_dst_temp_reg(emit->vposition.prescale_scale_index); 11822 struct tgsi_full_dst_register prescale_translate = 11823 make_dst_temp_reg(emit->vposition.prescale_trans_index); 11824 11825 unsigned prescale_cbuf_index = emit->vposition.prescale_cbuf_index; 11826 11827 if (emit->vposition.num_prescale == 1) { 11828 emit_temp_prescale_from_cbuf(emit, 11829 prescale_cbuf_index, 11830 &prescale_scale, &prescale_translate); 11831 } else { 11832 /** 11833 * Since SM5 device does not support dynamic indexing, we need 11834 * to do the if-else to find the prescale constants for the 11835 * specified viewport. 11836 */ 11837 struct tgsi_full_src_register vp_index_src = 11838 make_src_temp_reg(emit->gs.viewport_index_tmp_index); 11839 11840 struct tgsi_full_src_register vp_index_src_x = 11841 scalar_src(&vp_index_src, TGSI_SWIZZLE_X); 11842 11843 unsigned tmp = get_temp_index(emit); 11844 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 11845 struct tgsi_full_src_register tmp_src_x = 11846 scalar_src(&tmp_src, TGSI_SWIZZLE_X); 11847 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 11848 11849 find_prescale_from_cbuf(emit, 0, emit->vposition.num_prescale, 11850 &vp_index_src_x, 11851 &prescale_scale, &prescale_translate, 11852 &tmp_src_x, &tmp_dst); 11853 } 11854 11855 /* Mark prescale temporaries are emitted */ 11856 emit->vposition.have_prescale = 1; 11857} 11858 11859 11860/** 11861 * A helper function to emit an instruction in a vertex shader to add a bias 11862 * to the VertexID system value. This patches the VertexID in the SVGA vertex 11863 * shader to include the base vertex of an indexed primitive or the start index 11864 * of a non-indexed primitive. 11865 */ 11866static void 11867emit_vertex_id_nobase_instruction(struct svga_shader_emitter_v10 *emit) 11868{ 11869 struct tgsi_full_src_register vertex_id_bias_index = 11870 make_src_const_reg(emit->vs.vertex_id_bias_index); 11871 struct tgsi_full_src_register vertex_id_sys_src = 11872 make_src_reg(TGSI_FILE_SYSTEM_VALUE, emit->vs.vertex_id_sys_index); 11873 struct tgsi_full_src_register vertex_id_sys_src_x = 11874 scalar_src(&vertex_id_sys_src, TGSI_SWIZZLE_X); 11875 struct tgsi_full_dst_register vertex_id_tmp_dst = 11876 make_dst_temp_reg(emit->vs.vertex_id_tmp_index); 11877 11878 /* IADD vertex_id_tmp, vertex_id_sys, vertex_id_bias */ 11879 unsigned vertex_id_tmp_index = emit->vs.vertex_id_tmp_index; 11880 emit->vs.vertex_id_tmp_index = INVALID_INDEX; 11881 emit_instruction_opn(emit, VGPU10_OPCODE_IADD, &vertex_id_tmp_dst, 11882 &vertex_id_sys_src_x, &vertex_id_bias_index, NULL, FALSE, 11883 FALSE); 11884 emit->vs.vertex_id_tmp_index = vertex_id_tmp_index; 11885} 11886 11887/** 11888 * Hull Shader must have control point outputs. But tessellation 11889 * control shader can return without writing to control point output. 11890 * In this case, the control point output is assumed to be passthrough 11891 * from the control point input. 11892 * This helper function is to write out a control point output first in case 11893 * the tessellation control shader returns before writing a 11894 * control point output. 11895 */ 11896static void 11897emit_tcs_default_control_point_output(struct svga_shader_emitter_v10 *emit) 11898{ 11899 assert(emit->unit == PIPE_SHADER_TESS_CTRL); 11900 assert(emit->tcs.control_point_phase); 11901 assert(emit->tcs.control_point_out_index != INVALID_INDEX); 11902 assert(emit->tcs.invocation_id_sys_index != INVALID_INDEX); 11903 11904 struct tgsi_full_dst_register output_control_point; 11905 output_control_point = 11906 make_dst_output_reg(emit->tcs.control_point_out_index); 11907 11908 if (emit->tcs.control_point_input_index == INVALID_INDEX) { 11909 /* MOV OUTPUT 0.0f */ 11910 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 11911 begin_emit_instruction(emit); 11912 emit_opcode_precise(emit, VGPU10_OPCODE_MOV, FALSE, FALSE); 11913 emit_dst_register(emit, &output_control_point); 11914 emit_src_register(emit, &zero); 11915 end_emit_instruction(emit); 11916 } 11917 else { 11918 /* UARL ADDR[INDEX].x INVOCATION.xxxx */ 11919 11920 struct tgsi_full_src_register invocation_src; 11921 struct tgsi_full_dst_register addr_dst; 11922 struct tgsi_full_dst_register addr_dst_x; 11923 unsigned addr_tmp; 11924 11925 addr_tmp = emit->address_reg_index[emit->tcs.control_point_addr_index]; 11926 addr_dst = make_dst_temp_reg(addr_tmp); 11927 addr_dst_x = writemask_dst(&addr_dst, TGSI_WRITEMASK_X); 11928 11929 invocation_src = make_src_reg(TGSI_FILE_SYSTEM_VALUE, 11930 emit->tcs.invocation_id_sys_index); 11931 11932 begin_emit_instruction(emit); 11933 emit_opcode_precise(emit, VGPU10_OPCODE_MOV, FALSE, FALSE); 11934 emit_dst_register(emit, &addr_dst_x); 11935 emit_src_register(emit, &invocation_src); 11936 end_emit_instruction(emit); 11937 11938 11939 /* MOV OUTPUT INPUT[ADDR[INDEX].x][POSITION] */ 11940 11941 struct tgsi_full_src_register input_control_point; 11942 input_control_point = make_src_reg(TGSI_FILE_INPUT, 11943 emit->tcs.control_point_input_index); 11944 input_control_point.Register.Dimension = 1; 11945 input_control_point.Dimension.Indirect = 1; 11946 input_control_point.DimIndirect.File = TGSI_FILE_ADDRESS; 11947 input_control_point.DimIndirect.Index = 11948 emit->tcs.control_point_addr_index; 11949 11950 begin_emit_instruction(emit); 11951 emit_opcode_precise(emit, VGPU10_OPCODE_MOV, FALSE, FALSE); 11952 emit_dst_register(emit, &output_control_point); 11953 emit_src_register(emit, &input_control_point); 11954 end_emit_instruction(emit); 11955 } 11956} 11957 11958/** 11959 * This functions constructs temporary tessfactor from VGPU10*_TESSFACTOR 11960 * values in domain shader. SM5 has tessfactors as floating point values where 11961 * as tgsi emit them as vector. This function allows to construct temp 11962 * tessfactor vector similar to TGSI_SEMANTIC_TESSINNER/OUTER filled with 11963 * values from VGPU10*_TESSFACTOR. Use this constructed vector whenever 11964 * TGSI_SEMANTIC_TESSINNER/OUTER is used in shader. 11965 */ 11966static void 11967emit_temp_tessfactor_instructions(struct svga_shader_emitter_v10 *emit) 11968{ 11969 struct tgsi_full_src_register src; 11970 struct tgsi_full_dst_register dst; 11971 11972 if (emit->tes.inner.tgsi_index != INVALID_INDEX) { 11973 dst = make_dst_temp_reg(emit->tes.inner.temp_index); 11974 11975 switch (emit->tes.prim_mode) { 11976 case PIPE_PRIM_QUADS: 11977 src = make_src_scalar_reg(TGSI_FILE_INPUT, 11978 emit->tes.inner.in_index + 1, TGSI_SWIZZLE_X); 11979 dst = writemask_dst(&dst, TGSI_WRITEMASK_Y); 11980 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src); 11981 FALLTHROUGH; 11982 case PIPE_PRIM_TRIANGLES: 11983 src = make_src_scalar_reg(TGSI_FILE_INPUT, 11984 emit->tes.inner.in_index, TGSI_SWIZZLE_X); 11985 dst = writemask_dst(&dst, TGSI_WRITEMASK_X); 11986 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src); 11987 break; 11988 case PIPE_PRIM_LINES: 11989 /** 11990 * As per SM5 spec, InsideTessFactor for isolines are unused. 11991 * In fact glsl tessInnerLevel for isolines doesn't mean anything but if 11992 * any application try to read tessInnerLevel in TES when primitive type 11993 * is isolines, then instead of driver throwing segfault for accesing it, 11994 * return atleast vec(1.0f) 11995 */ 11996 src = make_immediate_reg_float(emit, 1.0f); 11997 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src); 11998 break; 11999 default: 12000 break; 12001 } 12002 } 12003 12004 if (emit->tes.outer.tgsi_index != INVALID_INDEX) { 12005 dst = make_dst_temp_reg(emit->tes.outer.temp_index); 12006 12007 switch (emit->tes.prim_mode) { 12008 case PIPE_PRIM_QUADS: 12009 src = make_src_scalar_reg(TGSI_FILE_INPUT, 12010 emit->tes.outer.in_index + 3, TGSI_SWIZZLE_X); 12011 dst = writemask_dst(&dst, TGSI_WRITEMASK_W); 12012 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src); 12013 FALLTHROUGH; 12014 case PIPE_PRIM_TRIANGLES: 12015 src = make_src_scalar_reg(TGSI_FILE_INPUT, 12016 emit->tes.outer.in_index + 2, TGSI_SWIZZLE_X); 12017 dst = writemask_dst(&dst, TGSI_WRITEMASK_Z); 12018 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src); 12019 FALLTHROUGH; 12020 case PIPE_PRIM_LINES: 12021 src = make_src_scalar_reg(TGSI_FILE_INPUT, 12022 emit->tes.outer.in_index + 1, TGSI_SWIZZLE_X); 12023 dst = writemask_dst(&dst, TGSI_WRITEMASK_Y); 12024 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src); 12025 12026 src = make_src_scalar_reg(TGSI_FILE_INPUT, 12027 emit->tes.outer.in_index , TGSI_SWIZZLE_X); 12028 dst = writemask_dst(&dst, TGSI_WRITEMASK_X); 12029 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src); 12030 12031 break; 12032 default: 12033 break; 12034 } 12035 } 12036} 12037 12038 12039static void 12040emit_initialize_temp_instruction(struct svga_shader_emitter_v10 *emit) 12041{ 12042 struct tgsi_full_src_register src; 12043 struct tgsi_full_dst_register dst; 12044 unsigned vgpu10_temp_index = remap_temp_index(emit, TGSI_FILE_TEMPORARY, 12045 emit->initialize_temp_index); 12046 src = make_immediate_reg_float(emit, 0.0f); 12047 dst = make_dst_temp_reg(vgpu10_temp_index); 12048 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src); 12049 emit->temp_map[emit->initialize_temp_index].initialized = TRUE; 12050 emit->initialize_temp_index = INVALID_INDEX; 12051} 12052 12053 12054/** 12055 * Emit any extra/helper declarations/code that we might need between 12056 * the declaration section and code section. 12057 */ 12058static boolean 12059emit_pre_helpers(struct svga_shader_emitter_v10 *emit) 12060{ 12061 /* Properties */ 12062 if (emit->unit == PIPE_SHADER_GEOMETRY) 12063 emit_property_instructions(emit); 12064 else if (emit->unit == PIPE_SHADER_TESS_CTRL) { 12065 emit_hull_shader_declarations(emit); 12066 12067 /* Save the position of the first instruction token so that we can 12068 * do a second pass of the instructions for the patch constant phase. 12069 */ 12070 emit->tcs.instruction_token_pos = emit->cur_tgsi_token; 12071 emit->tcs.fork_phase_add_signature = FALSE; 12072 12073 if (!emit_hull_shader_control_point_phase(emit)) { 12074 emit->skip_instruction = TRUE; 12075 return TRUE; 12076 } 12077 12078 /* Set the current tcs phase to control point phase */ 12079 emit->tcs.control_point_phase = TRUE; 12080 } 12081 else if (emit->unit == PIPE_SHADER_TESS_EVAL) { 12082 emit_domain_shader_declarations(emit); 12083 } 12084 else if (emit->unit == PIPE_SHADER_COMPUTE) { 12085 emit_compute_shader_declarations(emit); 12086 } 12087 12088 /* Declare inputs */ 12089 if (!emit_input_declarations(emit)) 12090 return FALSE; 12091 12092 /* Declare outputs */ 12093 if (!emit_output_declarations(emit)) 12094 return FALSE; 12095 12096 /* Declare temporary registers */ 12097 emit_temporaries_declaration(emit); 12098 12099 /* For PIPE_SHADER_TESS_CTRL, constants, samplers, resources and immediates 12100 * will already be declared in hs_decls (emit_hull_shader_declarations) 12101 */ 12102 if (emit->unit != PIPE_SHADER_TESS_CTRL) { 12103 12104 alloc_common_immediates(emit); 12105 12106 /* Declare constant registers */ 12107 emit_constant_declaration(emit); 12108 12109 /* Declare samplers and resources */ 12110 emit_sampler_declarations(emit); 12111 emit_resource_declarations(emit); 12112 12113 /* Declare images */ 12114 emit_image_declarations(emit); 12115 12116 /* Declare shader buffers */ 12117 emit_shader_buf_declarations(emit); 12118 12119 /* Declare atomic buffers */ 12120 emit_atomic_buf_declarations(emit); 12121 } 12122 12123 if (emit->unit != PIPE_SHADER_FRAGMENT && 12124 emit->unit != PIPE_SHADER_COMPUTE) { 12125 /* 12126 * Declare clip distance output registers for ClipVertex or 12127 * user defined planes 12128 */ 12129 emit_clip_distance_declarations(emit); 12130 } 12131 12132 if (emit->unit == PIPE_SHADER_COMPUTE) { 12133 emit_memory_declarations(emit); 12134 12135 if (emit->cs.grid_size.tgsi_index != INVALID_INDEX) { 12136 emit->cs.grid_size.imm_index = 12137 alloc_immediate_int4(emit, 12138 emit->key.cs.grid_size[0], 12139 emit->key.cs.grid_size[1], 12140 emit->key.cs.grid_size[2], 0); 12141 } 12142 } 12143 12144 if (emit->unit == PIPE_SHADER_FRAGMENT && 12145 emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) { 12146 float alpha = emit->key.fs.alpha_ref; 12147 emit->fs.alpha_ref_index = 12148 alloc_immediate_float4(emit, alpha, alpha, alpha, alpha); 12149 } 12150 12151 if (emit->unit != PIPE_SHADER_TESS_CTRL) { 12152 /** 12153 * For PIPE_SHADER_TESS_CTRL, immediates are already declared in 12154 * hs_decls 12155 */ 12156 emit_vgpu10_immediates_block(emit); 12157 } 12158 else { 12159 emit_tcs_default_control_point_output(emit); 12160 } 12161 12162 if (emit->unit == PIPE_SHADER_FRAGMENT) { 12163 emit_frontface_instructions(emit); 12164 emit_fragcoord_instructions(emit); 12165 emit_sample_position_instructions(emit); 12166 emit_default_layer_instructions(emit); 12167 } 12168 else if (emit->unit == PIPE_SHADER_VERTEX) { 12169 emit_vertex_attrib_instructions(emit); 12170 12171 if (emit->info.uses_vertexid) 12172 emit_vertex_id_nobase_instruction(emit); 12173 } 12174 else if (emit->unit == PIPE_SHADER_TESS_EVAL) { 12175 emit_temp_tessfactor_instructions(emit); 12176 } 12177 12178 /** 12179 * For geometry shader that writes to viewport index, the prescale 12180 * temporaries will be done at the first vertex emission. 12181 */ 12182 if (emit->vposition.need_prescale && emit->vposition.num_prescale == 1) 12183 emit_temp_prescale_instructions(emit); 12184 12185 return TRUE; 12186} 12187 12188 12189/** 12190 * The device has no direct support for the pipe_blend_state::alpha_to_one 12191 * option so we implement it here with shader code. 12192 * 12193 * Note that this is kind of pointless, actually. Here we're clobbering 12194 * the alpha value with 1.0. So if alpha-to-coverage is enabled, we'll wind 12195 * up with 100% coverage. That's almost certainly not what the user wants. 12196 * The work-around is to add extra shader code to compute coverage from alpha 12197 * and write it to the coverage output register (if the user's shader doesn't 12198 * do so already). We'll probably do that in the future. 12199 */ 12200static void 12201emit_alpha_to_one_instructions(struct svga_shader_emitter_v10 *emit, 12202 unsigned fs_color_tmp_index) 12203{ 12204 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 12205 unsigned i; 12206 12207 /* Note: it's not 100% clear from the spec if we're supposed to clobber 12208 * the alpha for all render targets. But that's what NVIDIA does and 12209 * that's what Piglit tests. 12210 */ 12211 for (i = 0; i < emit->fs.num_color_outputs; i++) { 12212 struct tgsi_full_dst_register color_dst; 12213 12214 if (fs_color_tmp_index != INVALID_INDEX && i == 0) { 12215 /* write to the temp color register */ 12216 color_dst = make_dst_temp_reg(fs_color_tmp_index); 12217 } 12218 else { 12219 /* write directly to the color[i] output */ 12220 color_dst = make_dst_output_reg(emit->fs.color_out_index[i]); 12221 } 12222 12223 color_dst = writemask_dst(&color_dst, TGSI_WRITEMASK_W); 12224 12225 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &one); 12226 } 12227} 12228 12229 12230/** 12231 * Emit alpha test code. This compares TEMP[fs_color_tmp_index].w 12232 * against the alpha reference value and discards the fragment if the 12233 * comparison fails. 12234 */ 12235static void 12236emit_alpha_test_instructions(struct svga_shader_emitter_v10 *emit, 12237 unsigned fs_color_tmp_index) 12238{ 12239 /* compare output color's alpha to alpha ref and discard if comparison 12240 * fails. 12241 */ 12242 unsigned tmp = get_temp_index(emit); 12243 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 12244 struct tgsi_full_src_register tmp_src_x = 12245 scalar_src(&tmp_src, TGSI_SWIZZLE_X); 12246 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 12247 struct tgsi_full_src_register color_src = 12248 make_src_temp_reg(fs_color_tmp_index); 12249 struct tgsi_full_src_register color_src_w = 12250 scalar_src(&color_src, TGSI_SWIZZLE_W); 12251 struct tgsi_full_src_register ref_src = 12252 make_src_immediate_reg(emit->fs.alpha_ref_index); 12253 struct tgsi_full_dst_register color_dst = 12254 make_dst_output_reg(emit->fs.color_out_index[0]); 12255 12256 assert(emit->unit == PIPE_SHADER_FRAGMENT); 12257 12258 /* dst = src0 'alpha_func' src1 */ 12259 emit_comparison(emit, emit->key.fs.alpha_func, &tmp_dst, 12260 &color_src_w, &ref_src); 12261 12262 /* DISCARD if dst.x == 0 */ 12263 begin_emit_instruction(emit); 12264 emit_discard_opcode(emit, FALSE); /* discard if src0.x is zero */ 12265 emit_src_register(emit, &tmp_src_x); 12266 end_emit_instruction(emit); 12267 12268 /* If we don't need to broadcast the color below, emit the final color here. 12269 */ 12270 if (emit->key.fs.write_color0_to_n_cbufs <= 1) { 12271 /* MOV output.color, tempcolor */ 12272 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &color_src); 12273 } 12274 12275 free_temp_indexes(emit); 12276} 12277 12278 12279/** 12280 * Emit instructions for writing a single color output to multiple 12281 * color buffers. 12282 * This is used when the TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS (or 12283 * when key.fs.white_fragments is true). 12284 * property is set and the number of render targets is greater than one. 12285 * \param fs_color_tmp_index index of the temp register that holds the 12286 * color to broadcast. 12287 */ 12288static void 12289emit_broadcast_color_instructions(struct svga_shader_emitter_v10 *emit, 12290 unsigned fs_color_tmp_index) 12291{ 12292 const unsigned n = emit->key.fs.write_color0_to_n_cbufs; 12293 unsigned i; 12294 struct tgsi_full_src_register color_src; 12295 12296 if (emit->key.fs.white_fragments) { 12297 /* set all color outputs to white */ 12298 color_src = make_immediate_reg_float(emit, 1.0f); 12299 } 12300 else { 12301 /* set all color outputs to TEMP[fs_color_tmp_index] */ 12302 assert(fs_color_tmp_index != INVALID_INDEX); 12303 color_src = make_src_temp_reg(fs_color_tmp_index); 12304 } 12305 12306 assert(emit->unit == PIPE_SHADER_FRAGMENT); 12307 12308 for (i = 0; i < n; i++) { 12309 unsigned output_reg = emit->fs.color_out_index[i]; 12310 struct tgsi_full_dst_register color_dst = 12311 make_dst_output_reg(output_reg); 12312 12313 /* Fill in this semantic here since we'll use it later in 12314 * emit_dst_register(). 12315 */ 12316 emit->info.output_semantic_name[output_reg] = TGSI_SEMANTIC_COLOR; 12317 12318 /* MOV output.color[i], tempcolor */ 12319 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &color_src); 12320 } 12321} 12322 12323 12324/** 12325 * Emit extra helper code after the original shader code, but before the 12326 * last END/RET instruction. 12327 * For vertex shaders this means emitting the extra code to apply the 12328 * prescale scale/translation. 12329 */ 12330static boolean 12331emit_post_helpers(struct svga_shader_emitter_v10 *emit) 12332{ 12333 if (emit->unit == PIPE_SHADER_VERTEX) { 12334 emit_vertex_instructions(emit); 12335 } 12336 else if (emit->unit == PIPE_SHADER_FRAGMENT) { 12337 const unsigned fs_color_tmp_index = emit->fs.color_tmp_index; 12338 12339 assert(!(emit->key.fs.white_fragments && 12340 emit->key.fs.write_color0_to_n_cbufs == 0)); 12341 12342 /* We no longer want emit_dst_register() to substitute the 12343 * temporary fragment color register for the real color output. 12344 */ 12345 emit->fs.color_tmp_index = INVALID_INDEX; 12346 12347 if (emit->key.fs.alpha_to_one) { 12348 emit_alpha_to_one_instructions(emit, fs_color_tmp_index); 12349 } 12350 if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) { 12351 emit_alpha_test_instructions(emit, fs_color_tmp_index); 12352 } 12353 if (emit->key.fs.write_color0_to_n_cbufs > 1 || 12354 emit->key.fs.white_fragments) { 12355 emit_broadcast_color_instructions(emit, fs_color_tmp_index); 12356 } 12357 } 12358 else if (emit->unit == PIPE_SHADER_TESS_CTRL) { 12359 if (!emit->tcs.control_point_phase) { 12360 /* store the tessellation levels in the patch constant phase only */ 12361 store_tesslevels(emit); 12362 } 12363 else { 12364 emit_clipping_instructions(emit); 12365 } 12366 } 12367 else if (emit->unit == PIPE_SHADER_TESS_EVAL) { 12368 emit_vertex_instructions(emit); 12369 } 12370 12371 return TRUE; 12372} 12373 12374 12375/** 12376 * Reemit rawbuf instruction 12377 */ 12378static boolean 12379emit_rawbuf_instruction(struct svga_shader_emitter_v10 *emit, 12380 unsigned inst_number, 12381 const struct tgsi_full_instruction *inst) 12382{ 12383 boolean ret; 12384 12385 /* For all the rawbuf references in this instruction, 12386 * load the rawbuf reference and assign to the designated temporary. 12387 * Then reeemit the instruction. 12388 */ 12389 emit->reemit_rawbuf_instruction = REEMIT_IN_PROGRESS; 12390 12391 unsigned offset_tmp = get_temp_index(emit); 12392 struct tgsi_full_dst_register offset_dst = make_dst_temp_reg(offset_tmp); 12393 struct tgsi_full_src_register offset_src = make_src_temp_reg(offset_tmp); 12394 struct tgsi_full_src_register four = make_immediate_reg_int(emit, 4); 12395 12396 for (unsigned i = 0; i < emit->raw_buf_cur_tmp_index; i++) { 12397 struct tgsi_full_src_register element_src; 12398 12399 /* First get the element index register. */ 12400 12401 if (emit->raw_buf_tmp[i].indirect) { 12402 unsigned tmp = get_temp_index(emit); 12403 struct tgsi_full_dst_register element_dst = make_dst_temp_reg(tmp); 12404 struct tgsi_full_src_register element_index = 12405 make_src_temp_reg(emit->raw_buf_tmp[i].element_index); 12406 struct tgsi_full_src_register element_rel = 12407 make_immediate_reg_int(emit, emit->raw_buf_tmp[i].element_rel); 12408 12409 element_src = make_src_temp_reg(tmp); 12410 element_src = scalar_src(&element_src, TGSI_SWIZZLE_X); 12411 element_dst = writemask_dst(&element_dst, TGSI_WRITEMASK_X); 12412 12413 /* element index from the indirect register */ 12414 element_index = make_src_temp_reg(emit->raw_buf_tmp[i].element_index); 12415 element_index = scalar_src(&element_index, TGSI_SWIZZLE_X); 12416 12417 /* IADD element_src element_index element_index_relative */ 12418 emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &element_dst, 12419 &element_index, &element_rel); 12420 } 12421 else { 12422 element_src = 12423 make_immediate_reg_int(emit, emit->raw_buf_tmp[i].element_index); 12424 } 12425 12426 /* byte offset = element index << 4 */ 12427 emit_instruction_op2(emit, VGPU10_OPCODE_ISHL, &offset_dst, 12428 &element_src, &four); 12429 12430 struct tgsi_full_dst_register dst_tmp = 12431 make_dst_temp_reg(i + emit->raw_buf_tmp_index); 12432 12433 /* LD_RAW tmp, rawbuf byte offset, rawbuf */ 12434 12435 begin_emit_instruction(emit); 12436 emit_opcode(emit, VGPU10_OPCODE_LD_RAW, FALSE); 12437 emit_dst_register(emit, &dst_tmp); 12438 12439 struct tgsi_full_src_register offset_x = 12440 scalar_src(&offset_src, TGSI_SWIZZLE_X); 12441 emit_src_register(emit, &offset_x); 12442 12443 emit_resource_register(emit, 12444 emit->raw_buf_tmp[i].buffer_index + emit->raw_buf_srv_start_index); 12445 end_emit_instruction(emit); 12446 } 12447 12448 emit->raw_buf_cur_tmp_index = 0; 12449 12450 ret = emit_vgpu10_instruction(emit, inst_number, inst); 12451 12452 /* reset raw buf state */ 12453 emit->raw_buf_cur_tmp_index = 0; 12454 emit->reemit_rawbuf_instruction = REEMIT_FALSE; 12455 12456 free_temp_indexes(emit); 12457 12458 return ret; 12459} 12460 12461 12462/** 12463 * Translate the TGSI tokens into VGPU10 tokens. 12464 */ 12465static boolean 12466emit_vgpu10_instructions(struct svga_shader_emitter_v10 *emit, 12467 const struct tgsi_token *tokens) 12468{ 12469 struct tgsi_parse_context parse; 12470 boolean ret = TRUE; 12471 boolean pre_helpers_emitted = FALSE; 12472 unsigned inst_number = 0; 12473 12474 tgsi_parse_init(&parse, tokens); 12475 12476 while (!tgsi_parse_end_of_tokens(&parse)) { 12477 12478 /* Save the current tgsi token starting position */ 12479 emit->cur_tgsi_token = parse.Position; 12480 12481 tgsi_parse_token(&parse); 12482 12483 switch (parse.FullToken.Token.Type) { 12484 case TGSI_TOKEN_TYPE_IMMEDIATE: 12485 ret = emit_vgpu10_immediate(emit, &parse.FullToken.FullImmediate); 12486 if (!ret) 12487 goto done; 12488 break; 12489 12490 case TGSI_TOKEN_TYPE_DECLARATION: 12491 ret = emit_vgpu10_declaration(emit, &parse.FullToken.FullDeclaration); 12492 if (!ret) 12493 goto done; 12494 break; 12495 12496 case TGSI_TOKEN_TYPE_INSTRUCTION: 12497 if (!pre_helpers_emitted) { 12498 ret = emit_pre_helpers(emit); 12499 if (!ret) 12500 goto done; 12501 pre_helpers_emitted = TRUE; 12502 } 12503 ret = emit_vgpu10_instruction(emit, inst_number++, 12504 &parse.FullToken.FullInstruction); 12505 12506 /* Usually this applies to TCS only. If shader is reading control 12507 * point outputs in control point phase, we should reemit all 12508 * instructions which are writting into control point output in 12509 * control phase to store results into temporaries. 12510 */ 12511 if (emit->reemit_instruction) { 12512 assert(emit->unit == PIPE_SHADER_TESS_CTRL); 12513 ret = emit_vgpu10_instruction(emit, inst_number, 12514 &parse.FullToken.FullInstruction); 12515 } 12516 else if (emit->initialize_temp_index != INVALID_INDEX) { 12517 emit_initialize_temp_instruction(emit); 12518 emit->initialize_temp_index = INVALID_INDEX; 12519 ret = emit_vgpu10_instruction(emit, inst_number - 1, 12520 &parse.FullToken.FullInstruction); 12521 } 12522 else if (emit->reemit_rawbuf_instruction) { 12523 ret = emit_rawbuf_instruction(emit, inst_number - 1, 12524 &parse.FullToken.FullInstruction); 12525 } 12526 12527 if (!ret) 12528 goto done; 12529 break; 12530 12531 case TGSI_TOKEN_TYPE_PROPERTY: 12532 ret = emit_vgpu10_property(emit, &parse.FullToken.FullProperty); 12533 if (!ret) 12534 goto done; 12535 break; 12536 12537 default: 12538 break; 12539 } 12540 } 12541 12542 if (emit->unit == PIPE_SHADER_TESS_CTRL) { 12543 ret = emit_hull_shader_patch_constant_phase(emit, &parse); 12544 } 12545 12546done: 12547 tgsi_parse_free(&parse); 12548 return ret; 12549} 12550 12551 12552/** 12553 * Emit the first VGPU10 shader tokens. 12554 */ 12555static boolean 12556emit_vgpu10_header(struct svga_shader_emitter_v10 *emit) 12557{ 12558 VGPU10ProgramToken ptoken; 12559 12560 /* First token: VGPU10ProgramToken (version info, program type (VS,GS,PS)) */ 12561 12562 /* Maximum supported shader version is 50 */ 12563 unsigned version = MIN2(emit->version, 50); 12564 12565 ptoken.value = 0; /* init whole token to zero */ 12566 ptoken.majorVersion = version / 10; 12567 ptoken.minorVersion = version % 10; 12568 ptoken.programType = translate_shader_type(emit->unit); 12569 if (!emit_dword(emit, ptoken.value)) 12570 return FALSE; 12571 12572 /* Second token: total length of shader, in tokens. We can't fill this 12573 * in until we're all done. Emit zero for now. 12574 */ 12575 if (!emit_dword(emit, 0)) 12576 return FALSE; 12577 12578 if (emit->version >= 50) { 12579 VGPU10OpcodeToken0 token; 12580 12581 if (emit->unit == PIPE_SHADER_TESS_CTRL) { 12582 /* For hull shader, we need to start the declarations phase first before 12583 * emitting any declarations including the global flags. 12584 */ 12585 token.value = 0; 12586 token.opcodeType = VGPU10_OPCODE_HS_DECLS; 12587 begin_emit_instruction(emit); 12588 emit_dword(emit, token.value); 12589 end_emit_instruction(emit); 12590 } 12591 12592 /* Emit global flags */ 12593 token.value = 0; /* init whole token to zero */ 12594 token.opcodeType = VGPU10_OPCODE_DCL_GLOBAL_FLAGS; 12595 token.enableDoublePrecisionFloatOps = 1; /* set bit */ 12596 token.instructionLength = 1; 12597 if (!emit_dword(emit, token.value)) 12598 return FALSE; 12599 } 12600 12601 if (emit->version >= 40) { 12602 VGPU10OpcodeToken0 token; 12603 12604 /* Reserved for global flag such as refactoringAllowed. 12605 * If the shader does not use the precise qualifier, we will set the 12606 * refactoringAllowed global flag; otherwise, we will leave the reserved 12607 * token to NOP. 12608 */ 12609 emit->reserved_token = (emit->ptr - emit->buf) / sizeof(VGPU10OpcodeToken0); 12610 token.value = 0; 12611 token.opcodeType = VGPU10_OPCODE_NOP; 12612 token.instructionLength = 1; 12613 if (!emit_dword(emit, token.value)) 12614 return FALSE; 12615 } 12616 12617 return TRUE; 12618} 12619 12620 12621static boolean 12622emit_vgpu10_tail(struct svga_shader_emitter_v10 *emit) 12623{ 12624 VGPU10ProgramToken *tokens; 12625 12626 /* Replace the second token with total shader length */ 12627 tokens = (VGPU10ProgramToken *) emit->buf; 12628 tokens[1].value = emit_get_num_tokens(emit); 12629 12630 if (emit->version >= 40 && !emit->uses_precise_qualifier) { 12631 /* Replace the reserved token with the RefactoringAllowed global flag */ 12632 VGPU10OpcodeToken0 *ptoken; 12633 12634 ptoken = (VGPU10OpcodeToken0 *)&tokens[emit->reserved_token]; 12635 assert(ptoken->opcodeType == VGPU10_OPCODE_NOP); 12636 ptoken->opcodeType = VGPU10_OPCODE_DCL_GLOBAL_FLAGS; 12637 ptoken->refactoringAllowed = 1; 12638 } 12639 12640 if (emit->version >= 50 && emit->fs.forceEarlyDepthStencil) { 12641 /* Replace the reserved token with the forceEarlyDepthStencil global flag */ 12642 VGPU10OpcodeToken0 *ptoken; 12643 12644 ptoken = (VGPU10OpcodeToken0 *)&tokens[emit->reserved_token]; 12645 ptoken->opcodeType = VGPU10_OPCODE_DCL_GLOBAL_FLAGS; 12646 ptoken->forceEarlyDepthStencil = 1; 12647 } 12648 12649 return TRUE; 12650} 12651 12652 12653/** 12654 * Modify the FS to read the BCOLORs and use the FACE register 12655 * to choose between the front/back colors. 12656 */ 12657static const struct tgsi_token * 12658transform_fs_twoside(const struct tgsi_token *tokens) 12659{ 12660 if (0) { 12661 debug_printf("Before tgsi_add_two_side ------------------\n"); 12662 tgsi_dump(tokens,0); 12663 } 12664 tokens = tgsi_add_two_side(tokens); 12665 if (0) { 12666 debug_printf("After tgsi_add_two_side ------------------\n"); 12667 tgsi_dump(tokens, 0); 12668 } 12669 return tokens; 12670} 12671 12672 12673/** 12674 * Modify the FS to do polygon stipple. 12675 */ 12676static const struct tgsi_token * 12677transform_fs_pstipple(struct svga_shader_emitter_v10 *emit, 12678 const struct tgsi_token *tokens) 12679{ 12680 const struct tgsi_token *new_tokens; 12681 unsigned unit; 12682 12683 if (0) { 12684 debug_printf("Before pstipple ------------------\n"); 12685 tgsi_dump(tokens,0); 12686 } 12687 12688 new_tokens = util_pstipple_create_fragment_shader(tokens, &unit, 0, 12689 TGSI_FILE_INPUT); 12690 12691 emit->fs.pstipple_sampler_unit = unit; 12692 12693 /* The new sampler state is appended to the end of the samplers list */ 12694 emit->fs.pstipple_sampler_state_index = emit->key.num_samplers++; 12695 12696 /* Setup texture state for stipple */ 12697 emit->sampler_target[unit] = TGSI_TEXTURE_2D; 12698 emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X; 12699 emit->key.tex[unit].swizzle_g = TGSI_SWIZZLE_Y; 12700 emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z; 12701 emit->key.tex[unit].swizzle_a = TGSI_SWIZZLE_W; 12702 emit->key.tex[unit].target = PIPE_TEXTURE_2D; 12703 emit->key.tex[unit].sampler_index = emit->fs.pstipple_sampler_state_index; 12704 12705 if (0) { 12706 debug_printf("After pstipple ------------------\n"); 12707 tgsi_dump(new_tokens, 0); 12708 } 12709 12710 return new_tokens; 12711} 12712 12713/** 12714 * Modify the FS to support anti-aliasing point. 12715 */ 12716static const struct tgsi_token * 12717transform_fs_aapoint(struct svga_context *svga, 12718 const struct tgsi_token *tokens, 12719 int aa_coord_index) 12720{ 12721 bool need_texcoord_semantic = 12722 svga->pipe.screen->get_param(svga->pipe.screen, PIPE_CAP_TGSI_TEXCOORD); 12723 12724 if (0) { 12725 debug_printf("Before tgsi_add_aa_point ------------------\n"); 12726 tgsi_dump(tokens,0); 12727 } 12728 tokens = tgsi_add_aa_point(tokens, aa_coord_index, need_texcoord_semantic); 12729 if (0) { 12730 debug_printf("After tgsi_add_aa_point ------------------\n"); 12731 tgsi_dump(tokens, 0); 12732 } 12733 return tokens; 12734} 12735 12736 12737/** 12738 * A helper function to determine the shader in the previous stage and 12739 * then call the linker function to determine the input mapping for this 12740 * shader to match the output indices from the shader in the previous stage. 12741 */ 12742static void 12743compute_input_mapping(struct svga_context *svga, 12744 struct svga_shader_emitter_v10 *emit, 12745 enum pipe_shader_type unit) 12746{ 12747 struct svga_shader *prevShader = NULL; /* shader in the previous stage */ 12748 12749 if (unit == PIPE_SHADER_FRAGMENT) { 12750 prevShader = svga->curr.gs ? 12751 &svga->curr.gs->base : (svga->curr.tes ? 12752 &svga->curr.tes->base : &svga->curr.vs->base); 12753 } else if (unit == PIPE_SHADER_GEOMETRY) { 12754 prevShader = svga->curr.tes ? &svga->curr.tes->base : &svga->curr.vs->base; 12755 } else if (unit == PIPE_SHADER_TESS_EVAL) { 12756 assert(svga->curr.tcs); 12757 prevShader = &svga->curr.tcs->base; 12758 } else if (unit == PIPE_SHADER_TESS_CTRL) { 12759 assert(svga->curr.vs); 12760 prevShader = &svga->curr.vs->base; 12761 } 12762 12763 if (prevShader != NULL) { 12764 svga_link_shaders(&prevShader->tgsi_info, &emit->info, &emit->linkage); 12765 emit->prevShaderInfo = &prevShader->tgsi_info; 12766 } 12767 else { 12768 /** 12769 * Since vertex shader does not need to go through the linker to 12770 * establish the input map, we need to make sure the highest index 12771 * of input registers is set properly here. 12772 */ 12773 emit->linkage.input_map_max = MAX2((int)emit->linkage.input_map_max, 12774 emit->info.file_max[TGSI_FILE_INPUT]); 12775 } 12776} 12777 12778 12779/** 12780 * Copies the shader signature info to the shader variant 12781 */ 12782static void 12783copy_shader_signature(struct svga_shader_signature *sgn, 12784 struct svga_shader_variant *variant) 12785{ 12786 SVGA3dDXShaderSignatureHeader *header = &sgn->header; 12787 12788 /* Calculate the signature length */ 12789 variant->signatureLen = sizeof(SVGA3dDXShaderSignatureHeader) + 12790 (header->numInputSignatures + 12791 header->numOutputSignatures + 12792 header->numPatchConstantSignatures) * 12793 sizeof(SVGA3dDXShaderSignatureEntry); 12794 12795 /* Allocate buffer for the signature info */ 12796 variant->signature = 12797 (SVGA3dDXShaderSignatureHeader *)CALLOC(1, variant->signatureLen); 12798 12799 char *sgnBuf = (char *)variant->signature; 12800 unsigned sgnLen; 12801 12802 /* Copy the signature info to the shader variant structure */ 12803 memcpy(sgnBuf, &sgn->header, sizeof(SVGA3dDXShaderSignatureHeader)); 12804 sgnBuf += sizeof(SVGA3dDXShaderSignatureHeader); 12805 12806 if (header->numInputSignatures) { 12807 sgnLen = 12808 header->numInputSignatures * sizeof(SVGA3dDXShaderSignatureEntry); 12809 memcpy(sgnBuf, &sgn->inputs[0], sgnLen); 12810 sgnBuf += sgnLen; 12811 } 12812 12813 if (header->numOutputSignatures) { 12814 sgnLen = 12815 header->numOutputSignatures * sizeof(SVGA3dDXShaderSignatureEntry); 12816 memcpy(sgnBuf, &sgn->outputs[0], sgnLen); 12817 sgnBuf += sgnLen; 12818 } 12819 12820 if (header->numPatchConstantSignatures) { 12821 sgnLen = 12822 header->numPatchConstantSignatures * sizeof(SVGA3dDXShaderSignatureEntry); 12823 memcpy(sgnBuf, &sgn->patchConstants[0], sgnLen); 12824 } 12825} 12826 12827 12828/** 12829 * This is the main entrypoint for the TGSI -> VPGU10 translator. 12830 */ 12831struct svga_shader_variant * 12832svga_tgsi_vgpu10_translate(struct svga_context *svga, 12833 const struct svga_shader *shader, 12834 const struct svga_compile_key *key, 12835 enum pipe_shader_type unit) 12836{ 12837 struct svga_screen *svgascreen = svga_screen(svga->pipe.screen); 12838 struct svga_shader_variant *variant = NULL; 12839 struct svga_shader_emitter_v10 *emit; 12840 const struct tgsi_token *tokens = shader->tokens; 12841 12842 (void) make_immediate_reg_double; /* unused at this time */ 12843 12844 assert(unit == PIPE_SHADER_VERTEX || 12845 unit == PIPE_SHADER_GEOMETRY || 12846 unit == PIPE_SHADER_FRAGMENT || 12847 unit == PIPE_SHADER_TESS_CTRL || 12848 unit == PIPE_SHADER_TESS_EVAL || 12849 unit == PIPE_SHADER_COMPUTE); 12850 12851 /* These two flags cannot be used together */ 12852 assert(key->vs.need_prescale + key->vs.undo_viewport <= 1); 12853 12854 SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_TGSIVGPU10TRANSLATE); 12855 /* 12856 * Setup the code emitter 12857 */ 12858 emit = alloc_emitter(); 12859 if (!emit) 12860 goto done; 12861 12862 emit->unit = unit; 12863 if (svga_have_gl43(svga)) { 12864 emit->version = 51; 12865 } else if (svga_have_sm5(svga)) { 12866 emit->version = 50; 12867 } else if (svga_have_sm4_1(svga)) { 12868 emit->version = 41; 12869 } else { 12870 emit->version = 40; 12871 } 12872 12873 emit->use_sampler_state_mapping = emit->key.sampler_state_mapping; 12874 12875 emit->signature.header.headerVersion = SVGADX_SIGNATURE_HEADER_VERSION_0; 12876 12877 emit->key = *key; 12878 12879 emit->vposition.need_prescale = (emit->key.vs.need_prescale || 12880 emit->key.gs.need_prescale || 12881 emit->key.tes.need_prescale); 12882 12883 /* Determine how many prescale factors in the constant buffer */ 12884 emit->vposition.num_prescale = 1; 12885 if (emit->vposition.need_prescale && emit->key.gs.writes_viewport_index) { 12886 assert(emit->unit == PIPE_SHADER_GEOMETRY); 12887 emit->vposition.num_prescale = emit->key.gs.num_prescale; 12888 } 12889 12890 emit->vposition.tmp_index = INVALID_INDEX; 12891 emit->vposition.so_index = INVALID_INDEX; 12892 emit->vposition.out_index = INVALID_INDEX; 12893 12894 emit->vs.vertex_id_sys_index = INVALID_INDEX; 12895 emit->vs.vertex_id_tmp_index = INVALID_INDEX; 12896 emit->vs.vertex_id_bias_index = INVALID_INDEX; 12897 12898 emit->fs.color_tmp_index = INVALID_INDEX; 12899 emit->fs.face_input_index = INVALID_INDEX; 12900 emit->fs.fragcoord_input_index = INVALID_INDEX; 12901 emit->fs.sample_id_sys_index = INVALID_INDEX; 12902 emit->fs.sample_pos_sys_index = INVALID_INDEX; 12903 emit->fs.sample_mask_in_sys_index = INVALID_INDEX; 12904 emit->fs.layer_input_index = INVALID_INDEX; 12905 emit->fs.layer_imm_index = INVALID_INDEX; 12906 12907 emit->gs.prim_id_index = INVALID_INDEX; 12908 emit->gs.invocation_id_sys_index = INVALID_INDEX; 12909 emit->gs.viewport_index_out_index = INVALID_INDEX; 12910 emit->gs.viewport_index_tmp_index = INVALID_INDEX; 12911 12912 emit->tcs.vertices_per_patch_index = INVALID_INDEX; 12913 emit->tcs.invocation_id_sys_index = INVALID_INDEX; 12914 emit->tcs.control_point_input_index = INVALID_INDEX; 12915 emit->tcs.control_point_addr_index = INVALID_INDEX; 12916 emit->tcs.control_point_out_index = INVALID_INDEX; 12917 emit->tcs.control_point_tmp_index = INVALID_INDEX; 12918 emit->tcs.control_point_out_count = 0; 12919 emit->tcs.inner.out_index = INVALID_INDEX; 12920 emit->tcs.inner.temp_index = INVALID_INDEX; 12921 emit->tcs.inner.tgsi_index = INVALID_INDEX; 12922 emit->tcs.outer.out_index = INVALID_INDEX; 12923 emit->tcs.outer.temp_index = INVALID_INDEX; 12924 emit->tcs.outer.tgsi_index = INVALID_INDEX; 12925 emit->tcs.patch_generic_out_count = 0; 12926 emit->tcs.patch_generic_out_index = INVALID_INDEX; 12927 emit->tcs.patch_generic_tmp_index = INVALID_INDEX; 12928 emit->tcs.prim_id_index = INVALID_INDEX; 12929 12930 emit->tes.tesscoord_sys_index = INVALID_INDEX; 12931 emit->tes.inner.in_index = INVALID_INDEX; 12932 emit->tes.inner.temp_index = INVALID_INDEX; 12933 emit->tes.inner.tgsi_index = INVALID_INDEX; 12934 emit->tes.outer.in_index = INVALID_INDEX; 12935 emit->tes.outer.temp_index = INVALID_INDEX; 12936 emit->tes.outer.tgsi_index = INVALID_INDEX; 12937 emit->tes.prim_id_index = INVALID_INDEX; 12938 12939 emit->cs.thread_id_index = INVALID_INDEX; 12940 emit->cs.block_id_index = INVALID_INDEX; 12941 emit->cs.grid_size.tgsi_index = INVALID_INDEX; 12942 emit->cs.grid_size.imm_index = INVALID_INDEX; 12943 emit->cs.block_width = 1; 12944 emit->cs.block_height = 1; 12945 emit->cs.block_depth = 1; 12946 12947 emit->clip_dist_out_index = INVALID_INDEX; 12948 emit->clip_dist_tmp_index = INVALID_INDEX; 12949 emit->clip_dist_so_index = INVALID_INDEX; 12950 emit->clip_vertex_out_index = INVALID_INDEX; 12951 emit->clip_vertex_tmp_index = INVALID_INDEX; 12952 emit->svga_debug_callback = svga->debug.callback; 12953 12954 emit->index_range.start_index = INVALID_INDEX; 12955 emit->index_range.count = 0; 12956 emit->index_range.required = FALSE; 12957 emit->index_range.operandType = VGPU10_NUM_OPERANDS; 12958 emit->index_range.dim = 0; 12959 emit->index_range.size = 0; 12960 12961 emit->current_loop_depth = 0; 12962 12963 emit->initialize_temp_index = INVALID_INDEX; 12964 emit->image_size_index = INVALID_INDEX; 12965 12966 emit->max_vs_inputs = svgascreen->max_vs_inputs; 12967 emit->max_vs_outputs = svgascreen->max_vs_outputs; 12968 emit->max_gs_inputs = svgascreen->max_gs_inputs; 12969 12970 if (emit->key.fs.alpha_func == SVGA3D_CMP_INVALID) { 12971 emit->key.fs.alpha_func = SVGA3D_CMP_ALWAYS; 12972 } 12973 12974 if (unit == PIPE_SHADER_FRAGMENT) { 12975 if (key->fs.light_twoside) { 12976 tokens = transform_fs_twoside(tokens); 12977 } 12978 if (key->fs.pstipple) { 12979 const struct tgsi_token *new_tokens = 12980 transform_fs_pstipple(emit, tokens); 12981 if (tokens != shader->tokens) { 12982 /* free the two-sided shader tokens */ 12983 tgsi_free_tokens(tokens); 12984 } 12985 tokens = new_tokens; 12986 } 12987 if (key->fs.aa_point) { 12988 tokens = transform_fs_aapoint(svga, tokens, 12989 key->fs.aa_point_coord_index); 12990 } 12991 } 12992 12993 if (SVGA_DEBUG & DEBUG_TGSI) { 12994 debug_printf("#####################################\n"); 12995 debug_printf("### TGSI Shader %u\n", shader->id); 12996 tgsi_dump(tokens, 0); 12997 } 12998 12999 /** 13000 * Rescan the header if the token string is different from the one 13001 * included in the shader; otherwise, the header info is already up-to-date 13002 */ 13003 if (tokens != shader->tokens) { 13004 tgsi_scan_shader(tokens, &emit->info); 13005 } else { 13006 emit->info = shader->tgsi_info; 13007 } 13008 13009 emit->num_outputs = emit->info.num_outputs; 13010 13011 /** 13012 * Compute input mapping to match the outputs from shader 13013 * in the previous stage 13014 */ 13015 compute_input_mapping(svga, emit, unit); 13016 13017 determine_clipping_mode(emit); 13018 13019 if (unit == PIPE_SHADER_GEOMETRY || unit == PIPE_SHADER_VERTEX || 13020 unit == PIPE_SHADER_TESS_CTRL || unit == PIPE_SHADER_TESS_EVAL) { 13021 if (shader->stream_output != NULL || emit->clip_mode == CLIP_DISTANCE) { 13022 /* if there is stream output declarations associated 13023 * with this shader or the shader writes to ClipDistance 13024 * then reserve extra registers for the non-adjusted vertex position 13025 * and the ClipDistance shadow copy. 13026 */ 13027 emit->vposition.so_index = emit->num_outputs++; 13028 13029 if (emit->clip_mode == CLIP_DISTANCE) { 13030 emit->clip_dist_so_index = emit->num_outputs++; 13031 if (emit->info.num_written_clipdistance > 4) 13032 emit->num_outputs++; 13033 } 13034 } 13035 } 13036 13037 /* Determine if constbuf to rawbuf translation is needed */ 13038 if (emit->info.const_buffers_declared) { 13039 emit->raw_bufs = emit->key.raw_buffers; 13040 emit->raw_buf_srv_start_index = emit->key.srv_raw_buf_index; 13041 } 13042 13043 /* 13044 * Do actual shader translation. 13045 */ 13046 if (!emit_vgpu10_header(emit)) { 13047 debug_printf("svga: emit VGPU10 header failed\n"); 13048 goto cleanup; 13049 } 13050 13051 if (!emit_vgpu10_instructions(emit, tokens)) { 13052 debug_printf("svga: emit VGPU10 instructions failed\n"); 13053 goto cleanup; 13054 } 13055 13056 if (!emit_vgpu10_tail(emit)) { 13057 debug_printf("svga: emit VGPU10 tail failed\n"); 13058 goto cleanup; 13059 } 13060 13061 if (emit->register_overflow) { 13062 goto cleanup; 13063 } 13064 13065 /* 13066 * Create, initialize the 'variant' object. 13067 */ 13068 variant = svga_new_shader_variant(svga, unit); 13069 if (!variant) 13070 goto cleanup; 13071 13072 variant->shader = shader; 13073 variant->nr_tokens = emit_get_num_tokens(emit); 13074 variant->tokens = (const unsigned *)emit->buf; 13075 13076 /* Copy shader signature info to the shader variant */ 13077 if (svga_have_sm5(svga)) { 13078 copy_shader_signature(&emit->signature, variant); 13079 } 13080 13081 emit->buf = NULL; /* buffer is no longer owed by emitter context */ 13082 memcpy(&variant->key, key, sizeof(*key)); 13083 variant->id = UTIL_BITMASK_INVALID_INDEX; 13084 13085 /* The extra constant starting offset starts with the number of 13086 * shader constants declared in the shader. 13087 */ 13088 variant->extra_const_start = emit->num_shader_consts[0]; 13089 if (key->gs.wide_point) { 13090 /** 13091 * The extra constant added in the transformed shader 13092 * for inverse viewport scale is to be supplied by the driver. 13093 * So the extra constant starting offset needs to be reduced by 1. 13094 */ 13095 assert(variant->extra_const_start > 0); 13096 variant->extra_const_start--; 13097 } 13098 13099 if (unit == PIPE_SHADER_FRAGMENT) { 13100 struct svga_fs_variant *fs_variant = svga_fs_variant(variant); 13101 13102 fs_variant->pstipple_sampler_unit = emit->fs.pstipple_sampler_unit; 13103 fs_variant->pstipple_sampler_state_index = 13104 emit->fs.pstipple_sampler_state_index; 13105 13106 /* If there was exactly one write to a fragment shader output register 13107 * and it came from a constant buffer, we know all fragments will have 13108 * the same color (except for blending). 13109 */ 13110 fs_variant->constant_color_output = 13111 emit->constant_color_output && emit->num_output_writes == 1; 13112 13113 /** keep track in the variant if flat interpolation is used 13114 * for any of the varyings. 13115 */ 13116 fs_variant->uses_flat_interp = emit->uses_flat_interp; 13117 13118 fs_variant->fs_shadow_compare_units = emit->shadow_compare_units; 13119 } 13120 else if (unit == PIPE_SHADER_TESS_EVAL) { 13121 struct svga_tes_variant *tes_variant = svga_tes_variant(variant); 13122 13123 /* Keep track in the tes variant some of the layout parameters. 13124 * These parameters will be referenced by the tcs to emit 13125 * the necessary declarations for the hull shader. 13126 */ 13127 tes_variant->prim_mode = emit->tes.prim_mode; 13128 tes_variant->spacing = emit->tes.spacing; 13129 tes_variant->vertices_order_cw = emit->tes.vertices_order_cw; 13130 tes_variant->point_mode = emit->tes.point_mode; 13131 } 13132 13133 13134 if (tokens != shader->tokens) { 13135 tgsi_free_tokens(tokens); 13136 } 13137 13138cleanup: 13139 free_emitter(emit); 13140 13141done: 13142 SVGA_STATS_TIME_POP(svga_sws(svga)); 13143 return variant; 13144} 13145