1/************************************************************************** 2 * 3 * Copyright 2007 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28#include "util/u_debug.h" 29#include "pipe/p_shader_tokens.h" 30#include "tgsi_info.h" 31#include "tgsi_parse.h" 32#include "tgsi_util.h" 33#include "tgsi_exec.h" 34#include "util/bitscan.h" 35 36union pointer_hack 37{ 38 void *pointer; 39 uint64_t uint64; 40}; 41 42void * 43tgsi_align_128bit(void *unaligned) 44{ 45 union pointer_hack ph; 46 47 ph.uint64 = 0; 48 ph.pointer = unaligned; 49 ph.uint64 = (ph.uint64 + 15) & ~15; 50 return ph.pointer; 51} 52 53unsigned 54tgsi_util_get_src_register_swizzle(const struct tgsi_src_register *reg, 55 unsigned component) 56{ 57 switch (component) { 58 case TGSI_CHAN_X: 59 return reg->SwizzleX; 60 case TGSI_CHAN_Y: 61 return reg->SwizzleY; 62 case TGSI_CHAN_Z: 63 return reg->SwizzleZ; 64 case TGSI_CHAN_W: 65 return reg->SwizzleW; 66 default: 67 assert(0); 68 } 69 return 0; 70} 71 72 73unsigned 74tgsi_util_get_full_src_register_swizzle( 75 const struct tgsi_full_src_register *reg, 76 unsigned component) 77{ 78 return tgsi_util_get_src_register_swizzle(®->Register, component); 79} 80 81 82void 83tgsi_util_set_src_register_swizzle(struct tgsi_src_register *reg, 84 unsigned swizzle, 85 unsigned component) 86{ 87 switch (component) { 88 case 0: 89 reg->SwizzleX = swizzle; 90 break; 91 case 1: 92 reg->SwizzleY = swizzle; 93 break; 94 case 2: 95 reg->SwizzleZ = swizzle; 96 break; 97 case 3: 98 reg->SwizzleW = swizzle; 99 break; 100 default: 101 assert(0); 102 } 103} 104 105 106/** 107 * Determine which channels of the specificed src register are effectively 108 * used by this instruction. 109 */ 110unsigned 111tgsi_util_get_src_usage_mask(enum tgsi_opcode opcode, 112 unsigned src_idx, 113 uint8_t write_mask, 114 uint8_t swizzle_x, 115 uint8_t swizzle_y, 116 uint8_t swizzle_z, 117 uint8_t swizzle_w, 118 enum tgsi_texture_type tex_target, 119 enum tgsi_texture_type mem_target) 120{ 121 unsigned read_mask; 122 unsigned usage_mask; 123 124 switch (opcode) { 125 case TGSI_OPCODE_IF: 126 case TGSI_OPCODE_UIF: 127 case TGSI_OPCODE_EMIT: 128 case TGSI_OPCODE_ENDPRIM: 129 case TGSI_OPCODE_RCP: 130 case TGSI_OPCODE_RSQ: 131 case TGSI_OPCODE_SQRT: 132 case TGSI_OPCODE_EX2: 133 case TGSI_OPCODE_LG2: 134 case TGSI_OPCODE_SIN: 135 case TGSI_OPCODE_COS: 136 case TGSI_OPCODE_POW: /* reads src0.x and src1.x */ 137 case TGSI_OPCODE_UP2H: 138 case TGSI_OPCODE_UP2US: 139 case TGSI_OPCODE_UP4B: 140 case TGSI_OPCODE_UP4UB: 141 case TGSI_OPCODE_MEMBAR: 142 case TGSI_OPCODE_BALLOT: 143 read_mask = TGSI_WRITEMASK_X; 144 break; 145 146 case TGSI_OPCODE_DP2: 147 case TGSI_OPCODE_PK2H: 148 case TGSI_OPCODE_PK2US: 149 case TGSI_OPCODE_DFRACEXP: 150 case TGSI_OPCODE_F2D: 151 case TGSI_OPCODE_I2D: 152 case TGSI_OPCODE_U2D: 153 case TGSI_OPCODE_F2U64: 154 case TGSI_OPCODE_F2I64: 155 case TGSI_OPCODE_U2I64: 156 case TGSI_OPCODE_I2I64: 157 case TGSI_OPCODE_TXQS: /* bindless handle possible */ 158 case TGSI_OPCODE_RESQ: /* bindless handle possible */ 159 read_mask = TGSI_WRITEMASK_XY; 160 break; 161 162 case TGSI_OPCODE_TXQ: 163 if (src_idx == 0) 164 read_mask = TGSI_WRITEMASK_X; 165 else 166 read_mask = TGSI_WRITEMASK_XY; /* bindless handle possible */ 167 break; 168 169 case TGSI_OPCODE_DP3: 170 read_mask = TGSI_WRITEMASK_XYZ; 171 break; 172 173 case TGSI_OPCODE_DSEQ: 174 case TGSI_OPCODE_DSNE: 175 case TGSI_OPCODE_DSLT: 176 case TGSI_OPCODE_DSGE: 177 case TGSI_OPCODE_DP4: 178 case TGSI_OPCODE_PK4B: 179 case TGSI_OPCODE_PK4UB: 180 case TGSI_OPCODE_D2F: 181 case TGSI_OPCODE_D2I: 182 case TGSI_OPCODE_D2U: 183 case TGSI_OPCODE_I2F: 184 case TGSI_OPCODE_U2F: 185 case TGSI_OPCODE_U64SEQ: 186 case TGSI_OPCODE_U64SNE: 187 case TGSI_OPCODE_U64SLT: 188 case TGSI_OPCODE_U64SGE: 189 case TGSI_OPCODE_U642F: 190 case TGSI_OPCODE_I64SLT: 191 case TGSI_OPCODE_I64SGE: 192 case TGSI_OPCODE_I642F: 193 read_mask = TGSI_WRITEMASK_XYZW; 194 break; 195 196 case TGSI_OPCODE_LIT: 197 read_mask = write_mask & TGSI_WRITEMASK_YZ ? 198 TGSI_WRITEMASK_XY | TGSI_WRITEMASK_W : 0; 199 break; 200 201 case TGSI_OPCODE_EXP: 202 case TGSI_OPCODE_LOG: 203 read_mask = write_mask & TGSI_WRITEMASK_XYZ ? TGSI_WRITEMASK_X : 0; 204 break; 205 206 case TGSI_OPCODE_DST: 207 if (src_idx == 0) 208 read_mask = TGSI_WRITEMASK_YZ; 209 else 210 read_mask = TGSI_WRITEMASK_YW; 211 break; 212 213 case TGSI_OPCODE_DLDEXP: 214 if (src_idx == 0) { 215 read_mask = write_mask; 216 } else { 217 read_mask = 218 (write_mask & TGSI_WRITEMASK_XY ? TGSI_WRITEMASK_X : 0) | 219 (write_mask & TGSI_WRITEMASK_ZW ? TGSI_WRITEMASK_Z : 0); 220 } 221 break; 222 223 case TGSI_OPCODE_READ_INVOC: 224 if (src_idx == 0) 225 read_mask = write_mask; 226 else 227 read_mask = TGSI_WRITEMASK_X; 228 break; 229 230 case TGSI_OPCODE_FBFETCH: 231 read_mask = 0; /* not a real register read */ 232 break; 233 234 case TGSI_OPCODE_TEX: 235 case TGSI_OPCODE_TEX_LZ: 236 case TGSI_OPCODE_TXF_LZ: 237 case TGSI_OPCODE_TXF: 238 case TGSI_OPCODE_TXB: 239 case TGSI_OPCODE_TXL: 240 case TGSI_OPCODE_TXP: 241 case TGSI_OPCODE_TXD: 242 case TGSI_OPCODE_TEX2: 243 case TGSI_OPCODE_TXB2: 244 case TGSI_OPCODE_TXL2: 245 case TGSI_OPCODE_LODQ: 246 case TGSI_OPCODE_TG4: { 247 unsigned dim_layer = 248 tgsi_util_get_texture_coord_dim(tex_target); 249 unsigned dim_layer_shadow, dim; 250 251 /* Add shadow. */ 252 if (tgsi_is_shadow_target(tex_target)) { 253 dim_layer_shadow = dim_layer + 1; 254 if (tex_target == TGSI_TEXTURE_SHADOW1D) 255 dim_layer_shadow = 3; 256 } else { 257 dim_layer_shadow = dim_layer; 258 } 259 260 /* Remove layer. */ 261 if (tgsi_is_array_sampler(tex_target)) 262 dim = dim_layer - 1; 263 else 264 dim = dim_layer; 265 266 read_mask = TGSI_WRITEMASK_XY; /* bindless handle in the last operand */ 267 268 switch (src_idx) { 269 case 0: 270 if (opcode == TGSI_OPCODE_LODQ) 271 read_mask = u_bit_consecutive(0, dim); 272 else 273 read_mask = u_bit_consecutive(0, dim_layer_shadow) & 0xf; 274 275 if (tex_target == TGSI_TEXTURE_SHADOW1D) 276 read_mask &= ~TGSI_WRITEMASK_Y; 277 278 if (opcode == TGSI_OPCODE_TXF || 279 opcode == TGSI_OPCODE_TXB || 280 opcode == TGSI_OPCODE_TXL || 281 opcode == TGSI_OPCODE_TXP) 282 read_mask |= TGSI_WRITEMASK_W; 283 break; 284 285 case 1: 286 if (opcode == TGSI_OPCODE_TXD) 287 read_mask = u_bit_consecutive(0, dim); 288 else if (opcode == TGSI_OPCODE_TEX2 || 289 opcode == TGSI_OPCODE_TXB2 || 290 opcode == TGSI_OPCODE_TXL2 || 291 opcode == TGSI_OPCODE_TG4) 292 read_mask = TGSI_WRITEMASK_X; 293 break; 294 295 case 2: 296 if (opcode == TGSI_OPCODE_TXD) 297 read_mask = u_bit_consecutive(0, dim); 298 break; 299 } 300 break; 301 } 302 303 case TGSI_OPCODE_LOAD: 304 if (src_idx == 0) { 305 read_mask = TGSI_WRITEMASK_XY; /* bindless handle possible */ 306 } else { 307 unsigned dim = tgsi_util_get_texture_coord_dim(mem_target); 308 read_mask = u_bit_consecutive(0, dim); 309 } 310 break; 311 312 case TGSI_OPCODE_STORE: 313 if (src_idx == 0) { 314 unsigned dim = tgsi_util_get_texture_coord_dim(mem_target); 315 read_mask = u_bit_consecutive(0, dim); 316 } else { 317 read_mask = TGSI_WRITEMASK_XYZW; 318 } 319 break; 320 321 case TGSI_OPCODE_ATOMUADD: 322 case TGSI_OPCODE_ATOMXCHG: 323 case TGSI_OPCODE_ATOMCAS: 324 case TGSI_OPCODE_ATOMAND: 325 case TGSI_OPCODE_ATOMOR: 326 case TGSI_OPCODE_ATOMXOR: 327 case TGSI_OPCODE_ATOMUMIN: 328 case TGSI_OPCODE_ATOMUMAX: 329 case TGSI_OPCODE_ATOMIMIN: 330 case TGSI_OPCODE_ATOMIMAX: 331 case TGSI_OPCODE_ATOMFADD: 332 if (src_idx == 0) { 333 read_mask = TGSI_WRITEMASK_XY; /* bindless handle possible */ 334 } else if (src_idx == 1) { 335 unsigned dim = tgsi_util_get_texture_coord_dim(mem_target); 336 read_mask = u_bit_consecutive(0, dim); 337 } else { 338 read_mask = TGSI_WRITEMASK_XYZW; 339 } 340 break; 341 342 case TGSI_OPCODE_INTERP_CENTROID: 343 case TGSI_OPCODE_INTERP_SAMPLE: 344 case TGSI_OPCODE_INTERP_OFFSET: 345 if (src_idx == 0) 346 read_mask = write_mask; 347 else if (opcode == TGSI_OPCODE_INTERP_OFFSET) 348 read_mask = TGSI_WRITEMASK_XY; /* offset */ 349 else 350 read_mask = TGSI_WRITEMASK_X; /* sample */ 351 break; 352 353 default: 354 if (tgsi_get_opcode_info(opcode)->output_mode == 355 TGSI_OUTPUT_COMPONENTWISE) 356 read_mask = write_mask; 357 else 358 read_mask = TGSI_WRITEMASK_XYZW; /* assume all channels are read */ 359 break; 360 } 361 362 usage_mask = 0; 363 if (read_mask & TGSI_WRITEMASK_X) 364 usage_mask |= 1 << swizzle_x; 365 if (read_mask & TGSI_WRITEMASK_Y) 366 usage_mask |= 1 << swizzle_y; 367 if (read_mask & TGSI_WRITEMASK_Z) 368 usage_mask |= 1 << swizzle_z; 369 if (read_mask & TGSI_WRITEMASK_W) 370 usage_mask |= 1 << swizzle_w; 371 372 return usage_mask; 373} 374 375unsigned 376tgsi_util_get_inst_usage_mask(const struct tgsi_full_instruction *inst, 377 unsigned src_idx) 378{ 379 return tgsi_util_get_src_usage_mask(inst->Instruction.Opcode, src_idx, 380 inst->Dst[0].Register.WriteMask, 381 inst->Src[src_idx].Register.SwizzleX, 382 inst->Src[src_idx].Register.SwizzleY, 383 inst->Src[src_idx].Register.SwizzleZ, 384 inst->Src[src_idx].Register.SwizzleW, 385 inst->Texture.Texture, 386 inst->Memory.Texture); 387} 388 389/** 390 * Convert a tgsi_ind_register into a tgsi_src_register 391 */ 392struct tgsi_src_register 393tgsi_util_get_src_from_ind(const struct tgsi_ind_register *reg) 394{ 395 struct tgsi_src_register src = { 0 }; 396 397 src.File = reg->File; 398 src.Index = reg->Index; 399 src.SwizzleX = reg->Swizzle; 400 src.SwizzleY = reg->Swizzle; 401 src.SwizzleZ = reg->Swizzle; 402 src.SwizzleW = reg->Swizzle; 403 404 return src; 405} 406 407/** 408 * Return the dimension of the texture coordinates (layer included for array 409 * textures), as well as the location of the shadow reference value or the 410 * sample index. 411 */ 412int 413tgsi_util_get_texture_coord_dim(enum tgsi_texture_type tgsi_tex) 414{ 415 /* 416 * Depending on the texture target, (src0.xyzw, src1.x) is interpreted 417 * differently: 418 * 419 * (s, X, X, X, X), for BUFFER 420 * (s, X, X, X, X), for 1D 421 * (s, t, X, X, X), for 2D, RECT 422 * (s, t, r, X, X), for 3D, CUBE 423 * 424 * (s, layer, X, X, X), for 1D_ARRAY 425 * (s, t, layer, X, X), for 2D_ARRAY 426 * (s, t, r, layer, X), for CUBE_ARRAY 427 * 428 * (s, X, shadow, X, X), for SHADOW1D 429 * (s, t, shadow, X, X), for SHADOW2D, SHADOWRECT 430 * (s, t, r, shadow, X), for SHADOWCUBE 431 * 432 * (s, layer, shadow, X, X), for SHADOW1D_ARRAY 433 * (s, t, layer, shadow, X), for SHADOW2D_ARRAY 434 * (s, t, r, layer, shadow), for SHADOWCUBE_ARRAY 435 * 436 * (s, t, sample, X, X), for 2D_MSAA 437 * (s, t, layer, sample, X), for 2D_ARRAY_MSAA 438 */ 439 switch (tgsi_tex) { 440 case TGSI_TEXTURE_BUFFER: 441 case TGSI_TEXTURE_1D: 442 case TGSI_TEXTURE_SHADOW1D: 443 return 1; 444 case TGSI_TEXTURE_2D: 445 case TGSI_TEXTURE_RECT: 446 case TGSI_TEXTURE_1D_ARRAY: 447 case TGSI_TEXTURE_SHADOW2D: 448 case TGSI_TEXTURE_SHADOWRECT: 449 case TGSI_TEXTURE_SHADOW1D_ARRAY: 450 case TGSI_TEXTURE_2D_MSAA: 451 return 2; 452 case TGSI_TEXTURE_3D: 453 case TGSI_TEXTURE_CUBE: 454 case TGSI_TEXTURE_2D_ARRAY: 455 case TGSI_TEXTURE_SHADOWCUBE: 456 case TGSI_TEXTURE_SHADOW2D_ARRAY: 457 case TGSI_TEXTURE_2D_ARRAY_MSAA: 458 return 3; 459 case TGSI_TEXTURE_CUBE_ARRAY: 460 case TGSI_TEXTURE_SHADOWCUBE_ARRAY: 461 return 4; 462 default: 463 assert(!"unknown texture target"); 464 return 0; 465 } 466} 467 468 469/** 470 * Given a TGSI_TEXTURE_x target, return register component where the 471 * shadow reference/distance coordinate is found. Typically, components 472 * 0 and 1 are the (s,t) texcoords and component 2 or 3 hold the shadow 473 * reference value. But if we return 4, it means the reference value is 474 * found in the 0th component of the second coordinate argument to the 475 * TEX2 instruction. 476 */ 477int 478tgsi_util_get_shadow_ref_src_index(enum tgsi_texture_type tgsi_tex) 479{ 480 switch (tgsi_tex) { 481 case TGSI_TEXTURE_SHADOW1D: 482 case TGSI_TEXTURE_SHADOW2D: 483 case TGSI_TEXTURE_SHADOWRECT: 484 case TGSI_TEXTURE_SHADOW1D_ARRAY: 485 return 2; 486 case TGSI_TEXTURE_SHADOWCUBE: 487 case TGSI_TEXTURE_SHADOW2D_ARRAY: 488 case TGSI_TEXTURE_2D_MSAA: 489 case TGSI_TEXTURE_2D_ARRAY_MSAA: 490 return 3; 491 case TGSI_TEXTURE_SHADOWCUBE_ARRAY: 492 return 4; 493 default: 494 /* no shadow nor sample */ 495 return -1; 496 } 497} 498 499 500bool 501tgsi_is_shadow_target(enum tgsi_texture_type target) 502{ 503 switch (target) { 504 case TGSI_TEXTURE_SHADOW1D: 505 case TGSI_TEXTURE_SHADOW2D: 506 case TGSI_TEXTURE_SHADOWRECT: 507 case TGSI_TEXTURE_SHADOW1D_ARRAY: 508 case TGSI_TEXTURE_SHADOW2D_ARRAY: 509 case TGSI_TEXTURE_SHADOWCUBE: 510 case TGSI_TEXTURE_SHADOWCUBE_ARRAY: 511 return TRUE; 512 default: 513 return FALSE; 514 } 515} 516