1/************************************************************************** 2 * 3 * Copyright 2007-2008 VMware, Inc. 4 * All Rights Reserved. 5 * Copyright 2009-2010 VMware, Inc. All rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sub license, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the 16 * next paragraph) shall be included in all copies or substantial portions 17 * of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 22 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 * 27 **************************************************************************/ 28 29/** 30 * TGSI interpreter/executor. 31 * 32 * Flow control information: 33 * 34 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel) 35 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special 36 * care since a condition may be true for some quad components but false 37 * for other components. 38 * 39 * We basically execute all statements (even if they're in the part of 40 * an IF/ELSE clause that's "not taken") and use a special mask to 41 * control writing to destination registers. This is the ExecMask. 42 * See store_dest(). 43 * 44 * The ExecMask is computed from three other masks (CondMask, LoopMask and 45 * ContMask) which are controlled by the flow control instructions (namely: 46 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT). 47 * 48 * 49 * Authors: 50 * Michal Krol 51 * Brian Paul 52 */ 53 54#include "pipe/p_compiler.h" 55#include "pipe/p_state.h" 56#include "pipe/p_shader_tokens.h" 57#include "tgsi/tgsi_dump.h" 58#include "tgsi/tgsi_parse.h" 59#include "tgsi/tgsi_util.h" 60#include "tgsi_exec.h" 61#include "util/compiler.h" 62#include "util/half_float.h" 63#include "util/u_memory.h" 64#include "util/u_math.h" 65#include "util/rounding.h" 66 67 68#define DEBUG_EXECUTION 0 69 70 71#define TILE_TOP_LEFT 0 72#define TILE_TOP_RIGHT 1 73#define TILE_BOTTOM_LEFT 2 74#define TILE_BOTTOM_RIGHT 3 75 76union tgsi_double_channel { 77 double d[TGSI_QUAD_SIZE]; 78 unsigned u[TGSI_QUAD_SIZE][2]; 79 uint64_t u64[TGSI_QUAD_SIZE]; 80 int64_t i64[TGSI_QUAD_SIZE]; 81} ALIGN16; 82 83struct ALIGN16 tgsi_double_vector { 84 union tgsi_double_channel xy; 85 union tgsi_double_channel zw; 86}; 87 88static void 89micro_abs(union tgsi_exec_channel *dst, 90 const union tgsi_exec_channel *src) 91{ 92 dst->f[0] = fabsf(src->f[0]); 93 dst->f[1] = fabsf(src->f[1]); 94 dst->f[2] = fabsf(src->f[2]); 95 dst->f[3] = fabsf(src->f[3]); 96} 97 98static void 99micro_arl(union tgsi_exec_channel *dst, 100 const union tgsi_exec_channel *src) 101{ 102 dst->i[0] = (int)floorf(src->f[0]); 103 dst->i[1] = (int)floorf(src->f[1]); 104 dst->i[2] = (int)floorf(src->f[2]); 105 dst->i[3] = (int)floorf(src->f[3]); 106} 107 108static void 109micro_arr(union tgsi_exec_channel *dst, 110 const union tgsi_exec_channel *src) 111{ 112 dst->i[0] = (int)floorf(src->f[0] + 0.5f); 113 dst->i[1] = (int)floorf(src->f[1] + 0.5f); 114 dst->i[2] = (int)floorf(src->f[2] + 0.5f); 115 dst->i[3] = (int)floorf(src->f[3] + 0.5f); 116} 117 118static void 119micro_ceil(union tgsi_exec_channel *dst, 120 const union tgsi_exec_channel *src) 121{ 122 dst->f[0] = ceilf(src->f[0]); 123 dst->f[1] = ceilf(src->f[1]); 124 dst->f[2] = ceilf(src->f[2]); 125 dst->f[3] = ceilf(src->f[3]); 126} 127 128static void 129micro_cmp(union tgsi_exec_channel *dst, 130 const union tgsi_exec_channel *src0, 131 const union tgsi_exec_channel *src1, 132 const union tgsi_exec_channel *src2) 133{ 134 dst->f[0] = src0->f[0] < 0.0f ? src1->f[0] : src2->f[0]; 135 dst->f[1] = src0->f[1] < 0.0f ? src1->f[1] : src2->f[1]; 136 dst->f[2] = src0->f[2] < 0.0f ? src1->f[2] : src2->f[2]; 137 dst->f[3] = src0->f[3] < 0.0f ? src1->f[3] : src2->f[3]; 138} 139 140static void 141micro_cos(union tgsi_exec_channel *dst, 142 const union tgsi_exec_channel *src) 143{ 144 dst->f[0] = cosf(src->f[0]); 145 dst->f[1] = cosf(src->f[1]); 146 dst->f[2] = cosf(src->f[2]); 147 dst->f[3] = cosf(src->f[3]); 148} 149 150static void 151micro_d2f(union tgsi_exec_channel *dst, 152 const union tgsi_double_channel *src) 153{ 154 dst->f[0] = (float)src->d[0]; 155 dst->f[1] = (float)src->d[1]; 156 dst->f[2] = (float)src->d[2]; 157 dst->f[3] = (float)src->d[3]; 158} 159 160static void 161micro_d2i(union tgsi_exec_channel *dst, 162 const union tgsi_double_channel *src) 163{ 164 dst->i[0] = (int)src->d[0]; 165 dst->i[1] = (int)src->d[1]; 166 dst->i[2] = (int)src->d[2]; 167 dst->i[3] = (int)src->d[3]; 168} 169 170static void 171micro_d2u(union tgsi_exec_channel *dst, 172 const union tgsi_double_channel *src) 173{ 174 dst->u[0] = (unsigned)src->d[0]; 175 dst->u[1] = (unsigned)src->d[1]; 176 dst->u[2] = (unsigned)src->d[2]; 177 dst->u[3] = (unsigned)src->d[3]; 178} 179static void 180micro_dabs(union tgsi_double_channel *dst, 181 const union tgsi_double_channel *src) 182{ 183 dst->d[0] = src->d[0] >= 0.0 ? src->d[0] : -src->d[0]; 184 dst->d[1] = src->d[1] >= 0.0 ? src->d[1] : -src->d[1]; 185 dst->d[2] = src->d[2] >= 0.0 ? src->d[2] : -src->d[2]; 186 dst->d[3] = src->d[3] >= 0.0 ? src->d[3] : -src->d[3]; 187} 188 189static void 190micro_dadd(union tgsi_double_channel *dst, 191 const union tgsi_double_channel *src) 192{ 193 dst->d[0] = src[0].d[0] + src[1].d[0]; 194 dst->d[1] = src[0].d[1] + src[1].d[1]; 195 dst->d[2] = src[0].d[2] + src[1].d[2]; 196 dst->d[3] = src[0].d[3] + src[1].d[3]; 197} 198 199static void 200micro_ddiv(union tgsi_double_channel *dst, 201 const union tgsi_double_channel *src) 202{ 203 dst->d[0] = src[0].d[0] / src[1].d[0]; 204 dst->d[1] = src[0].d[1] / src[1].d[1]; 205 dst->d[2] = src[0].d[2] / src[1].d[2]; 206 dst->d[3] = src[0].d[3] / src[1].d[3]; 207} 208 209static void 210micro_ddx(union tgsi_exec_channel *dst, 211 const union tgsi_exec_channel *src) 212{ 213 dst->f[0] = 214 dst->f[1] = 215 dst->f[2] = 216 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT]; 217} 218 219static void 220micro_ddx_fine(union tgsi_exec_channel *dst, 221 const union tgsi_exec_channel *src) 222{ 223 dst->f[0] = 224 dst->f[1] = src->f[TILE_TOP_RIGHT] - src->f[TILE_TOP_LEFT]; 225 dst->f[2] = 226 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT]; 227} 228 229 230static void 231micro_ddy(union tgsi_exec_channel *dst, 232 const union tgsi_exec_channel *src) 233{ 234 dst->f[0] = 235 dst->f[1] = 236 dst->f[2] = 237 dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT]; 238} 239 240static void 241micro_ddy_fine(union tgsi_exec_channel *dst, 242 const union tgsi_exec_channel *src) 243{ 244 dst->f[0] = 245 dst->f[2] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT]; 246 dst->f[1] = 247 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_TOP_RIGHT]; 248} 249 250static void 251micro_dmul(union tgsi_double_channel *dst, 252 const union tgsi_double_channel *src) 253{ 254 dst->d[0] = src[0].d[0] * src[1].d[0]; 255 dst->d[1] = src[0].d[1] * src[1].d[1]; 256 dst->d[2] = src[0].d[2] * src[1].d[2]; 257 dst->d[3] = src[0].d[3] * src[1].d[3]; 258} 259 260static void 261micro_dmax(union tgsi_double_channel *dst, 262 const union tgsi_double_channel *src) 263{ 264 dst->d[0] = fmax(src[0].d[0], src[1].d[0]); 265 dst->d[1] = fmax(src[0].d[1], src[1].d[1]); 266 dst->d[2] = fmax(src[0].d[2], src[1].d[2]); 267 dst->d[3] = fmax(src[0].d[3], src[1].d[3]); 268} 269 270static void 271micro_dmin(union tgsi_double_channel *dst, 272 const union tgsi_double_channel *src) 273{ 274 dst->d[0] = fmin(src[0].d[0], src[1].d[0]); 275 dst->d[1] = fmin(src[0].d[1], src[1].d[1]); 276 dst->d[2] = fmin(src[0].d[2], src[1].d[2]); 277 dst->d[3] = fmin(src[0].d[3], src[1].d[3]); 278} 279 280static void 281micro_dneg(union tgsi_double_channel *dst, 282 const union tgsi_double_channel *src) 283{ 284 dst->d[0] = -src->d[0]; 285 dst->d[1] = -src->d[1]; 286 dst->d[2] = -src->d[2]; 287 dst->d[3] = -src->d[3]; 288} 289 290static void 291micro_dslt(union tgsi_double_channel *dst, 292 const union tgsi_double_channel *src) 293{ 294 dst->u[0][0] = src[0].d[0] < src[1].d[0] ? ~0U : 0U; 295 dst->u[1][0] = src[0].d[1] < src[1].d[1] ? ~0U : 0U; 296 dst->u[2][0] = src[0].d[2] < src[1].d[2] ? ~0U : 0U; 297 dst->u[3][0] = src[0].d[3] < src[1].d[3] ? ~0U : 0U; 298} 299 300static void 301micro_dsne(union tgsi_double_channel *dst, 302 const union tgsi_double_channel *src) 303{ 304 dst->u[0][0] = src[0].d[0] != src[1].d[0] ? ~0U : 0U; 305 dst->u[1][0] = src[0].d[1] != src[1].d[1] ? ~0U : 0U; 306 dst->u[2][0] = src[0].d[2] != src[1].d[2] ? ~0U : 0U; 307 dst->u[3][0] = src[0].d[3] != src[1].d[3] ? ~0U : 0U; 308} 309 310static void 311micro_dsge(union tgsi_double_channel *dst, 312 const union tgsi_double_channel *src) 313{ 314 dst->u[0][0] = src[0].d[0] >= src[1].d[0] ? ~0U : 0U; 315 dst->u[1][0] = src[0].d[1] >= src[1].d[1] ? ~0U : 0U; 316 dst->u[2][0] = src[0].d[2] >= src[1].d[2] ? ~0U : 0U; 317 dst->u[3][0] = src[0].d[3] >= src[1].d[3] ? ~0U : 0U; 318} 319 320static void 321micro_dseq(union tgsi_double_channel *dst, 322 const union tgsi_double_channel *src) 323{ 324 dst->u[0][0] = src[0].d[0] == src[1].d[0] ? ~0U : 0U; 325 dst->u[1][0] = src[0].d[1] == src[1].d[1] ? ~0U : 0U; 326 dst->u[2][0] = src[0].d[2] == src[1].d[2] ? ~0U : 0U; 327 dst->u[3][0] = src[0].d[3] == src[1].d[3] ? ~0U : 0U; 328} 329 330static void 331micro_drcp(union tgsi_double_channel *dst, 332 const union tgsi_double_channel *src) 333{ 334 dst->d[0] = 1.0 / src->d[0]; 335 dst->d[1] = 1.0 / src->d[1]; 336 dst->d[2] = 1.0 / src->d[2]; 337 dst->d[3] = 1.0 / src->d[3]; 338} 339 340static void 341micro_dsqrt(union tgsi_double_channel *dst, 342 const union tgsi_double_channel *src) 343{ 344 dst->d[0] = sqrt(src->d[0]); 345 dst->d[1] = sqrt(src->d[1]); 346 dst->d[2] = sqrt(src->d[2]); 347 dst->d[3] = sqrt(src->d[3]); 348} 349 350static void 351micro_drsq(union tgsi_double_channel *dst, 352 const union tgsi_double_channel *src) 353{ 354 dst->d[0] = 1.0 / sqrt(src->d[0]); 355 dst->d[1] = 1.0 / sqrt(src->d[1]); 356 dst->d[2] = 1.0 / sqrt(src->d[2]); 357 dst->d[3] = 1.0 / sqrt(src->d[3]); 358} 359 360static void 361micro_dmad(union tgsi_double_channel *dst, 362 const union tgsi_double_channel *src) 363{ 364 dst->d[0] = src[0].d[0] * src[1].d[0] + src[2].d[0]; 365 dst->d[1] = src[0].d[1] * src[1].d[1] + src[2].d[1]; 366 dst->d[2] = src[0].d[2] * src[1].d[2] + src[2].d[2]; 367 dst->d[3] = src[0].d[3] * src[1].d[3] + src[2].d[3]; 368} 369 370static void 371micro_dfrac(union tgsi_double_channel *dst, 372 const union tgsi_double_channel *src) 373{ 374 dst->d[0] = src->d[0] - floor(src->d[0]); 375 dst->d[1] = src->d[1] - floor(src->d[1]); 376 dst->d[2] = src->d[2] - floor(src->d[2]); 377 dst->d[3] = src->d[3] - floor(src->d[3]); 378} 379 380static void 381micro_dflr(union tgsi_double_channel *dst, 382 const union tgsi_double_channel *src) 383{ 384 dst->d[0] = floor(src->d[0]); 385 dst->d[1] = floor(src->d[1]); 386 dst->d[2] = floor(src->d[2]); 387 dst->d[3] = floor(src->d[3]); 388} 389 390static void 391micro_dldexp(union tgsi_double_channel *dst, 392 const union tgsi_double_channel *src0, 393 union tgsi_exec_channel *src1) 394{ 395 dst->d[0] = ldexp(src0->d[0], src1->i[0]); 396 dst->d[1] = ldexp(src0->d[1], src1->i[1]); 397 dst->d[2] = ldexp(src0->d[2], src1->i[2]); 398 dst->d[3] = ldexp(src0->d[3], src1->i[3]); 399} 400 401static void 402micro_dfracexp(union tgsi_double_channel *dst, 403 union tgsi_exec_channel *dst_exp, 404 const union tgsi_double_channel *src) 405{ 406 dst->d[0] = frexp(src->d[0], &dst_exp->i[0]); 407 dst->d[1] = frexp(src->d[1], &dst_exp->i[1]); 408 dst->d[2] = frexp(src->d[2], &dst_exp->i[2]); 409 dst->d[3] = frexp(src->d[3], &dst_exp->i[3]); 410} 411 412static void 413micro_exp2(union tgsi_exec_channel *dst, 414 const union tgsi_exec_channel *src) 415{ 416#if DEBUG 417 /* Inf is okay for this instruction, so clamp it to silence assertions. */ 418 uint i; 419 union tgsi_exec_channel clamped; 420 421 for (i = 0; i < 4; i++) { 422 if (src->f[i] > 127.99999f) { 423 clamped.f[i] = 127.99999f; 424 } else if (src->f[i] < -126.99999f) { 425 clamped.f[i] = -126.99999f; 426 } else { 427 clamped.f[i] = src->f[i]; 428 } 429 } 430 src = &clamped; 431#endif /* DEBUG */ 432 433 dst->f[0] = powf(2.0f, src->f[0]); 434 dst->f[1] = powf(2.0f, src->f[1]); 435 dst->f[2] = powf(2.0f, src->f[2]); 436 dst->f[3] = powf(2.0f, src->f[3]); 437} 438 439static void 440micro_f2d(union tgsi_double_channel *dst, 441 const union tgsi_exec_channel *src) 442{ 443 dst->d[0] = (double)src->f[0]; 444 dst->d[1] = (double)src->f[1]; 445 dst->d[2] = (double)src->f[2]; 446 dst->d[3] = (double)src->f[3]; 447} 448 449static void 450micro_flr(union tgsi_exec_channel *dst, 451 const union tgsi_exec_channel *src) 452{ 453 dst->f[0] = floorf(src->f[0]); 454 dst->f[1] = floorf(src->f[1]); 455 dst->f[2] = floorf(src->f[2]); 456 dst->f[3] = floorf(src->f[3]); 457} 458 459static void 460micro_frc(union tgsi_exec_channel *dst, 461 const union tgsi_exec_channel *src) 462{ 463 dst->f[0] = src->f[0] - floorf(src->f[0]); 464 dst->f[1] = src->f[1] - floorf(src->f[1]); 465 dst->f[2] = src->f[2] - floorf(src->f[2]); 466 dst->f[3] = src->f[3] - floorf(src->f[3]); 467} 468 469static void 470micro_i2d(union tgsi_double_channel *dst, 471 const union tgsi_exec_channel *src) 472{ 473 dst->d[0] = (double)src->i[0]; 474 dst->d[1] = (double)src->i[1]; 475 dst->d[2] = (double)src->i[2]; 476 dst->d[3] = (double)src->i[3]; 477} 478 479static void 480micro_iabs(union tgsi_exec_channel *dst, 481 const union tgsi_exec_channel *src) 482{ 483 dst->i[0] = src->i[0] >= 0 ? src->i[0] : -src->i[0]; 484 dst->i[1] = src->i[1] >= 0 ? src->i[1] : -src->i[1]; 485 dst->i[2] = src->i[2] >= 0 ? src->i[2] : -src->i[2]; 486 dst->i[3] = src->i[3] >= 0 ? src->i[3] : -src->i[3]; 487} 488 489static void 490micro_ineg(union tgsi_exec_channel *dst, 491 const union tgsi_exec_channel *src) 492{ 493 dst->i[0] = -src->i[0]; 494 dst->i[1] = -src->i[1]; 495 dst->i[2] = -src->i[2]; 496 dst->i[3] = -src->i[3]; 497} 498 499static void 500micro_lg2(union tgsi_exec_channel *dst, 501 const union tgsi_exec_channel *src) 502{ 503 dst->f[0] = logf(src->f[0]) * 1.442695f; 504 dst->f[1] = logf(src->f[1]) * 1.442695f; 505 dst->f[2] = logf(src->f[2]) * 1.442695f; 506 dst->f[3] = logf(src->f[3]) * 1.442695f; 507} 508 509static void 510micro_lrp(union tgsi_exec_channel *dst, 511 const union tgsi_exec_channel *src0, 512 const union tgsi_exec_channel *src1, 513 const union tgsi_exec_channel *src2) 514{ 515 dst->f[0] = src0->f[0] * (src1->f[0] - src2->f[0]) + src2->f[0]; 516 dst->f[1] = src0->f[1] * (src1->f[1] - src2->f[1]) + src2->f[1]; 517 dst->f[2] = src0->f[2] * (src1->f[2] - src2->f[2]) + src2->f[2]; 518 dst->f[3] = src0->f[3] * (src1->f[3] - src2->f[3]) + src2->f[3]; 519} 520 521static void 522micro_mad(union tgsi_exec_channel *dst, 523 const union tgsi_exec_channel *src0, 524 const union tgsi_exec_channel *src1, 525 const union tgsi_exec_channel *src2) 526{ 527 dst->f[0] = src0->f[0] * src1->f[0] + src2->f[0]; 528 dst->f[1] = src0->f[1] * src1->f[1] + src2->f[1]; 529 dst->f[2] = src0->f[2] * src1->f[2] + src2->f[2]; 530 dst->f[3] = src0->f[3] * src1->f[3] + src2->f[3]; 531} 532 533static void 534micro_mov(union tgsi_exec_channel *dst, 535 const union tgsi_exec_channel *src) 536{ 537 dst->u[0] = src->u[0]; 538 dst->u[1] = src->u[1]; 539 dst->u[2] = src->u[2]; 540 dst->u[3] = src->u[3]; 541} 542 543static void 544micro_rcp(union tgsi_exec_channel *dst, 545 const union tgsi_exec_channel *src) 546{ 547#if 0 /* for debugging */ 548 assert(src->f[0] != 0.0f); 549 assert(src->f[1] != 0.0f); 550 assert(src->f[2] != 0.0f); 551 assert(src->f[3] != 0.0f); 552#endif 553 dst->f[0] = 1.0f / src->f[0]; 554 dst->f[1] = 1.0f / src->f[1]; 555 dst->f[2] = 1.0f / src->f[2]; 556 dst->f[3] = 1.0f / src->f[3]; 557} 558 559static void 560micro_rnd(union tgsi_exec_channel *dst, 561 const union tgsi_exec_channel *src) 562{ 563 dst->f[0] = _mesa_roundevenf(src->f[0]); 564 dst->f[1] = _mesa_roundevenf(src->f[1]); 565 dst->f[2] = _mesa_roundevenf(src->f[2]); 566 dst->f[3] = _mesa_roundevenf(src->f[3]); 567} 568 569static void 570micro_rsq(union tgsi_exec_channel *dst, 571 const union tgsi_exec_channel *src) 572{ 573#if 0 /* for debugging */ 574 assert(src->f[0] != 0.0f); 575 assert(src->f[1] != 0.0f); 576 assert(src->f[2] != 0.0f); 577 assert(src->f[3] != 0.0f); 578#endif 579 dst->f[0] = 1.0f / sqrtf(src->f[0]); 580 dst->f[1] = 1.0f / sqrtf(src->f[1]); 581 dst->f[2] = 1.0f / sqrtf(src->f[2]); 582 dst->f[3] = 1.0f / sqrtf(src->f[3]); 583} 584 585static void 586micro_sqrt(union tgsi_exec_channel *dst, 587 const union tgsi_exec_channel *src) 588{ 589 dst->f[0] = sqrtf(src->f[0]); 590 dst->f[1] = sqrtf(src->f[1]); 591 dst->f[2] = sqrtf(src->f[2]); 592 dst->f[3] = sqrtf(src->f[3]); 593} 594 595static void 596micro_seq(union tgsi_exec_channel *dst, 597 const union tgsi_exec_channel *src0, 598 const union tgsi_exec_channel *src1) 599{ 600 dst->f[0] = src0->f[0] == src1->f[0] ? 1.0f : 0.0f; 601 dst->f[1] = src0->f[1] == src1->f[1] ? 1.0f : 0.0f; 602 dst->f[2] = src0->f[2] == src1->f[2] ? 1.0f : 0.0f; 603 dst->f[3] = src0->f[3] == src1->f[3] ? 1.0f : 0.0f; 604} 605 606static void 607micro_sge(union tgsi_exec_channel *dst, 608 const union tgsi_exec_channel *src0, 609 const union tgsi_exec_channel *src1) 610{ 611 dst->f[0] = src0->f[0] >= src1->f[0] ? 1.0f : 0.0f; 612 dst->f[1] = src0->f[1] >= src1->f[1] ? 1.0f : 0.0f; 613 dst->f[2] = src0->f[2] >= src1->f[2] ? 1.0f : 0.0f; 614 dst->f[3] = src0->f[3] >= src1->f[3] ? 1.0f : 0.0f; 615} 616 617static void 618micro_sgn(union tgsi_exec_channel *dst, 619 const union tgsi_exec_channel *src) 620{ 621 dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f; 622 dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f; 623 dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f; 624 dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f; 625} 626 627static void 628micro_isgn(union tgsi_exec_channel *dst, 629 const union tgsi_exec_channel *src) 630{ 631 dst->i[0] = src->i[0] < 0 ? -1 : src->i[0] > 0 ? 1 : 0; 632 dst->i[1] = src->i[1] < 0 ? -1 : src->i[1] > 0 ? 1 : 0; 633 dst->i[2] = src->i[2] < 0 ? -1 : src->i[2] > 0 ? 1 : 0; 634 dst->i[3] = src->i[3] < 0 ? -1 : src->i[3] > 0 ? 1 : 0; 635} 636 637static void 638micro_sgt(union tgsi_exec_channel *dst, 639 const union tgsi_exec_channel *src0, 640 const union tgsi_exec_channel *src1) 641{ 642 dst->f[0] = src0->f[0] > src1->f[0] ? 1.0f : 0.0f; 643 dst->f[1] = src0->f[1] > src1->f[1] ? 1.0f : 0.0f; 644 dst->f[2] = src0->f[2] > src1->f[2] ? 1.0f : 0.0f; 645 dst->f[3] = src0->f[3] > src1->f[3] ? 1.0f : 0.0f; 646} 647 648static void 649micro_sin(union tgsi_exec_channel *dst, 650 const union tgsi_exec_channel *src) 651{ 652 dst->f[0] = sinf(src->f[0]); 653 dst->f[1] = sinf(src->f[1]); 654 dst->f[2] = sinf(src->f[2]); 655 dst->f[3] = sinf(src->f[3]); 656} 657 658static void 659micro_sle(union tgsi_exec_channel *dst, 660 const union tgsi_exec_channel *src0, 661 const union tgsi_exec_channel *src1) 662{ 663 dst->f[0] = src0->f[0] <= src1->f[0] ? 1.0f : 0.0f; 664 dst->f[1] = src0->f[1] <= src1->f[1] ? 1.0f : 0.0f; 665 dst->f[2] = src0->f[2] <= src1->f[2] ? 1.0f : 0.0f; 666 dst->f[3] = src0->f[3] <= src1->f[3] ? 1.0f : 0.0f; 667} 668 669static void 670micro_slt(union tgsi_exec_channel *dst, 671 const union tgsi_exec_channel *src0, 672 const union tgsi_exec_channel *src1) 673{ 674 dst->f[0] = src0->f[0] < src1->f[0] ? 1.0f : 0.0f; 675 dst->f[1] = src0->f[1] < src1->f[1] ? 1.0f : 0.0f; 676 dst->f[2] = src0->f[2] < src1->f[2] ? 1.0f : 0.0f; 677 dst->f[3] = src0->f[3] < src1->f[3] ? 1.0f : 0.0f; 678} 679 680static void 681micro_sne(union tgsi_exec_channel *dst, 682 const union tgsi_exec_channel *src0, 683 const union tgsi_exec_channel *src1) 684{ 685 dst->f[0] = src0->f[0] != src1->f[0] ? 1.0f : 0.0f; 686 dst->f[1] = src0->f[1] != src1->f[1] ? 1.0f : 0.0f; 687 dst->f[2] = src0->f[2] != src1->f[2] ? 1.0f : 0.0f; 688 dst->f[3] = src0->f[3] != src1->f[3] ? 1.0f : 0.0f; 689} 690 691static void 692micro_trunc(union tgsi_exec_channel *dst, 693 const union tgsi_exec_channel *src) 694{ 695 dst->f[0] = truncf(src->f[0]); 696 dst->f[1] = truncf(src->f[1]); 697 dst->f[2] = truncf(src->f[2]); 698 dst->f[3] = truncf(src->f[3]); 699} 700 701static void 702micro_u2d(union tgsi_double_channel *dst, 703 const union tgsi_exec_channel *src) 704{ 705 dst->d[0] = (double)src->u[0]; 706 dst->d[1] = (double)src->u[1]; 707 dst->d[2] = (double)src->u[2]; 708 dst->d[3] = (double)src->u[3]; 709} 710 711static void 712micro_i64abs(union tgsi_double_channel *dst, 713 const union tgsi_double_channel *src) 714{ 715 dst->i64[0] = src->i64[0] >= 0.0 ? src->i64[0] : -src->i64[0]; 716 dst->i64[1] = src->i64[1] >= 0.0 ? src->i64[1] : -src->i64[1]; 717 dst->i64[2] = src->i64[2] >= 0.0 ? src->i64[2] : -src->i64[2]; 718 dst->i64[3] = src->i64[3] >= 0.0 ? src->i64[3] : -src->i64[3]; 719} 720 721static void 722micro_i64sgn(union tgsi_double_channel *dst, 723 const union tgsi_double_channel *src) 724{ 725 dst->i64[0] = src->i64[0] < 0 ? -1 : src->i64[0] > 0 ? 1 : 0; 726 dst->i64[1] = src->i64[1] < 0 ? -1 : src->i64[1] > 0 ? 1 : 0; 727 dst->i64[2] = src->i64[2] < 0 ? -1 : src->i64[2] > 0 ? 1 : 0; 728 dst->i64[3] = src->i64[3] < 0 ? -1 : src->i64[3] > 0 ? 1 : 0; 729} 730 731static void 732micro_i64neg(union tgsi_double_channel *dst, 733 const union tgsi_double_channel *src) 734{ 735 dst->i64[0] = -src->i64[0]; 736 dst->i64[1] = -src->i64[1]; 737 dst->i64[2] = -src->i64[2]; 738 dst->i64[3] = -src->i64[3]; 739} 740 741static void 742micro_u64seq(union tgsi_double_channel *dst, 743 const union tgsi_double_channel *src) 744{ 745 dst->u[0][0] = src[0].u64[0] == src[1].u64[0] ? ~0U : 0U; 746 dst->u[1][0] = src[0].u64[1] == src[1].u64[1] ? ~0U : 0U; 747 dst->u[2][0] = src[0].u64[2] == src[1].u64[2] ? ~0U : 0U; 748 dst->u[3][0] = src[0].u64[3] == src[1].u64[3] ? ~0U : 0U; 749} 750 751static void 752micro_u64sne(union tgsi_double_channel *dst, 753 const union tgsi_double_channel *src) 754{ 755 dst->u[0][0] = src[0].u64[0] != src[1].u64[0] ? ~0U : 0U; 756 dst->u[1][0] = src[0].u64[1] != src[1].u64[1] ? ~0U : 0U; 757 dst->u[2][0] = src[0].u64[2] != src[1].u64[2] ? ~0U : 0U; 758 dst->u[3][0] = src[0].u64[3] != src[1].u64[3] ? ~0U : 0U; 759} 760 761static void 762micro_i64slt(union tgsi_double_channel *dst, 763 const union tgsi_double_channel *src) 764{ 765 dst->u[0][0] = src[0].i64[0] < src[1].i64[0] ? ~0U : 0U; 766 dst->u[1][0] = src[0].i64[1] < src[1].i64[1] ? ~0U : 0U; 767 dst->u[2][0] = src[0].i64[2] < src[1].i64[2] ? ~0U : 0U; 768 dst->u[3][0] = src[0].i64[3] < src[1].i64[3] ? ~0U : 0U; 769} 770 771static void 772micro_u64slt(union tgsi_double_channel *dst, 773 const union tgsi_double_channel *src) 774{ 775 dst->u[0][0] = src[0].u64[0] < src[1].u64[0] ? ~0U : 0U; 776 dst->u[1][0] = src[0].u64[1] < src[1].u64[1] ? ~0U : 0U; 777 dst->u[2][0] = src[0].u64[2] < src[1].u64[2] ? ~0U : 0U; 778 dst->u[3][0] = src[0].u64[3] < src[1].u64[3] ? ~0U : 0U; 779} 780 781static void 782micro_i64sge(union tgsi_double_channel *dst, 783 const union tgsi_double_channel *src) 784{ 785 dst->u[0][0] = src[0].i64[0] >= src[1].i64[0] ? ~0U : 0U; 786 dst->u[1][0] = src[0].i64[1] >= src[1].i64[1] ? ~0U : 0U; 787 dst->u[2][0] = src[0].i64[2] >= src[1].i64[2] ? ~0U : 0U; 788 dst->u[3][0] = src[0].i64[3] >= src[1].i64[3] ? ~0U : 0U; 789} 790 791static void 792micro_u64sge(union tgsi_double_channel *dst, 793 const union tgsi_double_channel *src) 794{ 795 dst->u[0][0] = src[0].u64[0] >= src[1].u64[0] ? ~0U : 0U; 796 dst->u[1][0] = src[0].u64[1] >= src[1].u64[1] ? ~0U : 0U; 797 dst->u[2][0] = src[0].u64[2] >= src[1].u64[2] ? ~0U : 0U; 798 dst->u[3][0] = src[0].u64[3] >= src[1].u64[3] ? ~0U : 0U; 799} 800 801static void 802micro_u64max(union tgsi_double_channel *dst, 803 const union tgsi_double_channel *src) 804{ 805 dst->u64[0] = src[0].u64[0] > src[1].u64[0] ? src[0].u64[0] : src[1].u64[0]; 806 dst->u64[1] = src[0].u64[1] > src[1].u64[1] ? src[0].u64[1] : src[1].u64[1]; 807 dst->u64[2] = src[0].u64[2] > src[1].u64[2] ? src[0].u64[2] : src[1].u64[2]; 808 dst->u64[3] = src[0].u64[3] > src[1].u64[3] ? src[0].u64[3] : src[1].u64[3]; 809} 810 811static void 812micro_i64max(union tgsi_double_channel *dst, 813 const union tgsi_double_channel *src) 814{ 815 dst->i64[0] = src[0].i64[0] > src[1].i64[0] ? src[0].i64[0] : src[1].i64[0]; 816 dst->i64[1] = src[0].i64[1] > src[1].i64[1] ? src[0].i64[1] : src[1].i64[1]; 817 dst->i64[2] = src[0].i64[2] > src[1].i64[2] ? src[0].i64[2] : src[1].i64[2]; 818 dst->i64[3] = src[0].i64[3] > src[1].i64[3] ? src[0].i64[3] : src[1].i64[3]; 819} 820 821static void 822micro_u64min(union tgsi_double_channel *dst, 823 const union tgsi_double_channel *src) 824{ 825 dst->u64[0] = src[0].u64[0] < src[1].u64[0] ? src[0].u64[0] : src[1].u64[0]; 826 dst->u64[1] = src[0].u64[1] < src[1].u64[1] ? src[0].u64[1] : src[1].u64[1]; 827 dst->u64[2] = src[0].u64[2] < src[1].u64[2] ? src[0].u64[2] : src[1].u64[2]; 828 dst->u64[3] = src[0].u64[3] < src[1].u64[3] ? src[0].u64[3] : src[1].u64[3]; 829} 830 831static void 832micro_i64min(union tgsi_double_channel *dst, 833 const union tgsi_double_channel *src) 834{ 835 dst->i64[0] = src[0].i64[0] < src[1].i64[0] ? src[0].i64[0] : src[1].i64[0]; 836 dst->i64[1] = src[0].i64[1] < src[1].i64[1] ? src[0].i64[1] : src[1].i64[1]; 837 dst->i64[2] = src[0].i64[2] < src[1].i64[2] ? src[0].i64[2] : src[1].i64[2]; 838 dst->i64[3] = src[0].i64[3] < src[1].i64[3] ? src[0].i64[3] : src[1].i64[3]; 839} 840 841static void 842micro_u64add(union tgsi_double_channel *dst, 843 const union tgsi_double_channel *src) 844{ 845 dst->u64[0] = src[0].u64[0] + src[1].u64[0]; 846 dst->u64[1] = src[0].u64[1] + src[1].u64[1]; 847 dst->u64[2] = src[0].u64[2] + src[1].u64[2]; 848 dst->u64[3] = src[0].u64[3] + src[1].u64[3]; 849} 850 851static void 852micro_u64mul(union tgsi_double_channel *dst, 853 const union tgsi_double_channel *src) 854{ 855 dst->u64[0] = src[0].u64[0] * src[1].u64[0]; 856 dst->u64[1] = src[0].u64[1] * src[1].u64[1]; 857 dst->u64[2] = src[0].u64[2] * src[1].u64[2]; 858 dst->u64[3] = src[0].u64[3] * src[1].u64[3]; 859} 860 861static void 862micro_u64div(union tgsi_double_channel *dst, 863 const union tgsi_double_channel *src) 864{ 865 dst->u64[0] = src[1].u64[0] ? src[0].u64[0] / src[1].u64[0] : ~0ull; 866 dst->u64[1] = src[1].u64[1] ? src[0].u64[1] / src[1].u64[1] : ~0ull; 867 dst->u64[2] = src[1].u64[2] ? src[0].u64[2] / src[1].u64[2] : ~0ull; 868 dst->u64[3] = src[1].u64[3] ? src[0].u64[3] / src[1].u64[3] : ~0ull; 869} 870 871static void 872micro_i64div(union tgsi_double_channel *dst, 873 const union tgsi_double_channel *src) 874{ 875 dst->i64[0] = src[1].i64[0] ? src[0].i64[0] / src[1].i64[0] : 0; 876 dst->i64[1] = src[1].i64[1] ? src[0].i64[1] / src[1].i64[1] : 0; 877 dst->i64[2] = src[1].i64[2] ? src[0].i64[2] / src[1].i64[2] : 0; 878 dst->i64[3] = src[1].i64[3] ? src[0].i64[3] / src[1].i64[3] : 0; 879} 880 881static void 882micro_u64mod(union tgsi_double_channel *dst, 883 const union tgsi_double_channel *src) 884{ 885 dst->u64[0] = src[1].u64[0] ? src[0].u64[0] % src[1].u64[0] : ~0ull; 886 dst->u64[1] = src[1].u64[1] ? src[0].u64[1] % src[1].u64[1] : ~0ull; 887 dst->u64[2] = src[1].u64[2] ? src[0].u64[2] % src[1].u64[2] : ~0ull; 888 dst->u64[3] = src[1].u64[3] ? src[0].u64[3] % src[1].u64[3] : ~0ull; 889} 890 891static void 892micro_i64mod(union tgsi_double_channel *dst, 893 const union tgsi_double_channel *src) 894{ 895 dst->i64[0] = src[1].i64[0] ? src[0].i64[0] % src[1].i64[0] : ~0ll; 896 dst->i64[1] = src[1].i64[1] ? src[0].i64[1] % src[1].i64[1] : ~0ll; 897 dst->i64[2] = src[1].i64[2] ? src[0].i64[2] % src[1].i64[2] : ~0ll; 898 dst->i64[3] = src[1].i64[3] ? src[0].i64[3] % src[1].i64[3] : ~0ll; 899} 900 901static void 902micro_u64shl(union tgsi_double_channel *dst, 903 const union tgsi_double_channel *src0, 904 union tgsi_exec_channel *src1) 905{ 906 unsigned masked_count; 907 masked_count = src1->u[0] & 0x3f; 908 dst->u64[0] = src0->u64[0] << masked_count; 909 masked_count = src1->u[1] & 0x3f; 910 dst->u64[1] = src0->u64[1] << masked_count; 911 masked_count = src1->u[2] & 0x3f; 912 dst->u64[2] = src0->u64[2] << masked_count; 913 masked_count = src1->u[3] & 0x3f; 914 dst->u64[3] = src0->u64[3] << masked_count; 915} 916 917static void 918micro_i64shr(union tgsi_double_channel *dst, 919 const union tgsi_double_channel *src0, 920 union tgsi_exec_channel *src1) 921{ 922 unsigned masked_count; 923 masked_count = src1->u[0] & 0x3f; 924 dst->i64[0] = src0->i64[0] >> masked_count; 925 masked_count = src1->u[1] & 0x3f; 926 dst->i64[1] = src0->i64[1] >> masked_count; 927 masked_count = src1->u[2] & 0x3f; 928 dst->i64[2] = src0->i64[2] >> masked_count; 929 masked_count = src1->u[3] & 0x3f; 930 dst->i64[3] = src0->i64[3] >> masked_count; 931} 932 933static void 934micro_u64shr(union tgsi_double_channel *dst, 935 const union tgsi_double_channel *src0, 936 union tgsi_exec_channel *src1) 937{ 938 unsigned masked_count; 939 masked_count = src1->u[0] & 0x3f; 940 dst->u64[0] = src0->u64[0] >> masked_count; 941 masked_count = src1->u[1] & 0x3f; 942 dst->u64[1] = src0->u64[1] >> masked_count; 943 masked_count = src1->u[2] & 0x3f; 944 dst->u64[2] = src0->u64[2] >> masked_count; 945 masked_count = src1->u[3] & 0x3f; 946 dst->u64[3] = src0->u64[3] >> masked_count; 947} 948 949enum tgsi_exec_datatype { 950 TGSI_EXEC_DATA_FLOAT, 951 TGSI_EXEC_DATA_INT, 952 TGSI_EXEC_DATA_UINT, 953 TGSI_EXEC_DATA_DOUBLE, 954 TGSI_EXEC_DATA_INT64, 955 TGSI_EXEC_DATA_UINT64, 956}; 957 958/** The execution mask depends on the conditional mask and the loop mask */ 959#define UPDATE_EXEC_MASK(MACH) \ 960 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->Switch.mask & MACH->FuncMask 961 962 963static const union tgsi_exec_channel ZeroVec = 964 { { 0.0, 0.0, 0.0, 0.0 } }; 965 966static const union tgsi_exec_channel OneVec = { 967 {1.0f, 1.0f, 1.0f, 1.0f} 968}; 969 970static const union tgsi_exec_channel P128Vec = { 971 {128.0f, 128.0f, 128.0f, 128.0f} 972}; 973 974static const union tgsi_exec_channel M128Vec = { 975 {-128.0f, -128.0f, -128.0f, -128.0f} 976}; 977 978#ifdef DEBUG 979static void 980print_chan(const char *msg, const union tgsi_exec_channel *chan) 981{ 982 debug_printf("%s = {%f, %f, %f, %f}\n", 983 msg, chan->f[0], chan->f[1], chan->f[2], chan->f[3]); 984} 985#endif 986 987 988#ifdef DEBUG 989static void 990print_temp(const struct tgsi_exec_machine *mach, uint index) 991{ 992 const struct tgsi_exec_vector *tmp = &mach->Temps[index]; 993 int i; 994 debug_printf("Temp[%u] =\n", index); 995 for (i = 0; i < 4; i++) { 996 debug_printf(" %c: { %f, %f, %f, %f }\n", 997 "XYZW"[i], 998 tmp->xyzw[i].f[0], 999 tmp->xyzw[i].f[1], 1000 tmp->xyzw[i].f[2], 1001 tmp->xyzw[i].f[3]); 1002 } 1003} 1004#endif 1005 1006 1007void 1008tgsi_exec_set_constant_buffers(struct tgsi_exec_machine *mach, 1009 unsigned num_bufs, 1010 const void **bufs, 1011 const unsigned *buf_sizes) 1012{ 1013 unsigned i; 1014 1015 for (i = 0; i < num_bufs; i++) { 1016 mach->Consts[i] = bufs[i]; 1017 mach->ConstsSize[i] = buf_sizes[i]; 1018 } 1019} 1020 1021/** 1022 * Initialize machine state by expanding tokens to full instructions, 1023 * allocating temporary storage, setting up constants, etc. 1024 * After this, we can call tgsi_exec_machine_run() many times. 1025 */ 1026void 1027tgsi_exec_machine_bind_shader( 1028 struct tgsi_exec_machine *mach, 1029 const struct tgsi_token *tokens, 1030 struct tgsi_sampler *sampler, 1031 struct tgsi_image *image, 1032 struct tgsi_buffer *buffer) 1033{ 1034 uint k; 1035 struct tgsi_parse_context parse; 1036 struct tgsi_full_instruction *instructions; 1037 struct tgsi_full_declaration *declarations; 1038 uint maxInstructions = 10, numInstructions = 0; 1039 uint maxDeclarations = 10, numDeclarations = 0; 1040 1041#if 0 1042 tgsi_dump(tokens, 0); 1043#endif 1044 1045 mach->Tokens = tokens; 1046 mach->Sampler = sampler; 1047 mach->Image = image; 1048 mach->Buffer = buffer; 1049 1050 if (!tokens) { 1051 /* unbind and free all */ 1052 FREE(mach->Declarations); 1053 mach->Declarations = NULL; 1054 mach->NumDeclarations = 0; 1055 1056 FREE(mach->Instructions); 1057 mach->Instructions = NULL; 1058 mach->NumInstructions = 0; 1059 1060 return; 1061 } 1062 1063 k = tgsi_parse_init (&parse, mach->Tokens); 1064 if (k != TGSI_PARSE_OK) { 1065 debug_printf( "Problem parsing!\n" ); 1066 return; 1067 } 1068 1069 mach->ImmLimit = 0; 1070 mach->NumOutputs = 0; 1071 1072 for (k = 0; k < TGSI_SEMANTIC_COUNT; k++) 1073 mach->SysSemanticToIndex[k] = -1; 1074 1075 if (mach->ShaderType == PIPE_SHADER_GEOMETRY && 1076 !mach->UsedGeometryShader) { 1077 struct tgsi_exec_vector *inputs; 1078 struct tgsi_exec_vector *outputs; 1079 1080 inputs = align_malloc(sizeof(struct tgsi_exec_vector) * 1081 TGSI_MAX_PRIM_VERTICES * PIPE_MAX_SHADER_INPUTS, 1082 16); 1083 1084 if (!inputs) 1085 return; 1086 1087 outputs = align_malloc(sizeof(struct tgsi_exec_vector) * 1088 TGSI_MAX_TOTAL_VERTICES, 16); 1089 1090 if (!outputs) { 1091 align_free(inputs); 1092 return; 1093 } 1094 1095 align_free(mach->Inputs); 1096 align_free(mach->Outputs); 1097 1098 mach->Inputs = inputs; 1099 mach->Outputs = outputs; 1100 mach->UsedGeometryShader = TRUE; 1101 } 1102 1103 declarations = (struct tgsi_full_declaration *) 1104 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) ); 1105 1106 if (!declarations) { 1107 return; 1108 } 1109 1110 instructions = (struct tgsi_full_instruction *) 1111 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) ); 1112 1113 if (!instructions) { 1114 FREE( declarations ); 1115 return; 1116 } 1117 1118 while( !tgsi_parse_end_of_tokens( &parse ) ) { 1119 uint i; 1120 1121 tgsi_parse_token( &parse ); 1122 switch( parse.FullToken.Token.Type ) { 1123 case TGSI_TOKEN_TYPE_DECLARATION: 1124 /* save expanded declaration */ 1125 if (numDeclarations == maxDeclarations) { 1126 declarations = REALLOC(declarations, 1127 maxDeclarations 1128 * sizeof(struct tgsi_full_declaration), 1129 (maxDeclarations + 10) 1130 * sizeof(struct tgsi_full_declaration)); 1131 maxDeclarations += 10; 1132 } 1133 if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_OUTPUT) 1134 mach->NumOutputs = MAX2(mach->NumOutputs, parse.FullToken.FullDeclaration.Range.Last + 1); 1135 else if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_SYSTEM_VALUE) { 1136 const struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration; 1137 mach->SysSemanticToIndex[decl->Semantic.Name] = decl->Range.First; 1138 } 1139 1140 memcpy(declarations + numDeclarations, 1141 &parse.FullToken.FullDeclaration, 1142 sizeof(declarations[0])); 1143 numDeclarations++; 1144 break; 1145 1146 case TGSI_TOKEN_TYPE_IMMEDIATE: 1147 { 1148 uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; 1149 assert( size <= 4 ); 1150 if (mach->ImmLimit >= mach->ImmsReserved) { 1151 unsigned newReserved = mach->ImmsReserved ? 2 * mach->ImmsReserved : 128; 1152 float4 *imms = REALLOC(mach->Imms, mach->ImmsReserved, newReserved * sizeof(float4)); 1153 if (imms) { 1154 mach->ImmsReserved = newReserved; 1155 mach->Imms = imms; 1156 } else { 1157 debug_printf("Unable to (re)allocate space for immidiate constants\n"); 1158 break; 1159 } 1160 } 1161 1162 for( i = 0; i < size; i++ ) { 1163 mach->Imms[mach->ImmLimit][i] = 1164 parse.FullToken.FullImmediate.u[i].Float; 1165 } 1166 mach->ImmLimit += 1; 1167 } 1168 break; 1169 1170 case TGSI_TOKEN_TYPE_INSTRUCTION: 1171 1172 /* save expanded instruction */ 1173 if (numInstructions == maxInstructions) { 1174 instructions = REALLOC(instructions, 1175 maxInstructions 1176 * sizeof(struct tgsi_full_instruction), 1177 (maxInstructions + 10) 1178 * sizeof(struct tgsi_full_instruction)); 1179 maxInstructions += 10; 1180 } 1181 1182 memcpy(instructions + numInstructions, 1183 &parse.FullToken.FullInstruction, 1184 sizeof(instructions[0])); 1185 1186 numInstructions++; 1187 break; 1188 1189 case TGSI_TOKEN_TYPE_PROPERTY: 1190 if (mach->ShaderType == PIPE_SHADER_GEOMETRY) { 1191 if (parse.FullToken.FullProperty.Property.PropertyName == TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES) { 1192 mach->MaxOutputVertices = parse.FullToken.FullProperty.u[0].Data; 1193 } 1194 } 1195 break; 1196 1197 default: 1198 assert( 0 ); 1199 } 1200 } 1201 tgsi_parse_free (&parse); 1202 1203 FREE(mach->Declarations); 1204 mach->Declarations = declarations; 1205 mach->NumDeclarations = numDeclarations; 1206 1207 FREE(mach->Instructions); 1208 mach->Instructions = instructions; 1209 mach->NumInstructions = numInstructions; 1210} 1211 1212 1213struct tgsi_exec_machine * 1214tgsi_exec_machine_create(enum pipe_shader_type shader_type) 1215{ 1216 struct tgsi_exec_machine *mach; 1217 1218 mach = align_malloc( sizeof *mach, 16 ); 1219 if (!mach) 1220 goto fail; 1221 1222 memset(mach, 0, sizeof(*mach)); 1223 1224 mach->ShaderType = shader_type; 1225 1226 if (shader_type != PIPE_SHADER_COMPUTE) { 1227 mach->Inputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_SHADER_INPUTS, 16); 1228 mach->Outputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_SHADER_OUTPUTS, 16); 1229 if (!mach->Inputs || !mach->Outputs) 1230 goto fail; 1231 } 1232 1233 if (shader_type == PIPE_SHADER_FRAGMENT) { 1234 mach->InputSampleOffsetApply = align_malloc(sizeof(apply_sample_offset_func) * PIPE_MAX_SHADER_INPUTS, 16); 1235 if (!mach->InputSampleOffsetApply) 1236 goto fail; 1237 } 1238 1239#ifdef DEBUG 1240 /* silence warnings */ 1241 (void) print_chan; 1242 (void) print_temp; 1243#endif 1244 1245 return mach; 1246 1247fail: 1248 if (mach) { 1249 align_free(mach->InputSampleOffsetApply); 1250 align_free(mach->Inputs); 1251 align_free(mach->Outputs); 1252 align_free(mach); 1253 } 1254 return NULL; 1255} 1256 1257 1258void 1259tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach) 1260{ 1261 if (mach) { 1262 FREE(mach->Instructions); 1263 FREE(mach->Declarations); 1264 FREE(mach->Imms); 1265 1266 align_free(mach->InputSampleOffsetApply); 1267 align_free(mach->Inputs); 1268 align_free(mach->Outputs); 1269 1270 align_free(mach); 1271 } 1272} 1273 1274static void 1275micro_add(union tgsi_exec_channel *dst, 1276 const union tgsi_exec_channel *src0, 1277 const union tgsi_exec_channel *src1) 1278{ 1279 dst->f[0] = src0->f[0] + src1->f[0]; 1280 dst->f[1] = src0->f[1] + src1->f[1]; 1281 dst->f[2] = src0->f[2] + src1->f[2]; 1282 dst->f[3] = src0->f[3] + src1->f[3]; 1283} 1284 1285static void 1286micro_div( 1287 union tgsi_exec_channel *dst, 1288 const union tgsi_exec_channel *src0, 1289 const union tgsi_exec_channel *src1 ) 1290{ 1291 dst->f[0] = src0->f[0] / src1->f[0]; 1292 dst->f[1] = src0->f[1] / src1->f[1]; 1293 dst->f[2] = src0->f[2] / src1->f[2]; 1294 dst->f[3] = src0->f[3] / src1->f[3]; 1295} 1296 1297static void 1298micro_lt( 1299 union tgsi_exec_channel *dst, 1300 const union tgsi_exec_channel *src0, 1301 const union tgsi_exec_channel *src1, 1302 const union tgsi_exec_channel *src2, 1303 const union tgsi_exec_channel *src3 ) 1304{ 1305 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0]; 1306 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1]; 1307 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2]; 1308 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3]; 1309} 1310 1311static void 1312micro_max(union tgsi_exec_channel *dst, 1313 const union tgsi_exec_channel *src0, 1314 const union tgsi_exec_channel *src1) 1315{ 1316 dst->f[0] = fmaxf(src0->f[0], src1->f[0]); 1317 dst->f[1] = fmaxf(src0->f[1], src1->f[1]); 1318 dst->f[2] = fmaxf(src0->f[2], src1->f[2]); 1319 dst->f[3] = fmaxf(src0->f[3], src1->f[3]); 1320} 1321 1322static void 1323micro_min(union tgsi_exec_channel *dst, 1324 const union tgsi_exec_channel *src0, 1325 const union tgsi_exec_channel *src1) 1326{ 1327 dst->f[0] = fminf(src0->f[0], src1->f[0]); 1328 dst->f[1] = fminf(src0->f[1], src1->f[1]); 1329 dst->f[2] = fminf(src0->f[2], src1->f[2]); 1330 dst->f[3] = fminf(src0->f[3], src1->f[3]); 1331} 1332 1333static void 1334micro_mul(union tgsi_exec_channel *dst, 1335 const union tgsi_exec_channel *src0, 1336 const union tgsi_exec_channel *src1) 1337{ 1338 dst->f[0] = src0->f[0] * src1->f[0]; 1339 dst->f[1] = src0->f[1] * src1->f[1]; 1340 dst->f[2] = src0->f[2] * src1->f[2]; 1341 dst->f[3] = src0->f[3] * src1->f[3]; 1342} 1343 1344static void 1345micro_neg( 1346 union tgsi_exec_channel *dst, 1347 const union tgsi_exec_channel *src ) 1348{ 1349 dst->f[0] = -src->f[0]; 1350 dst->f[1] = -src->f[1]; 1351 dst->f[2] = -src->f[2]; 1352 dst->f[3] = -src->f[3]; 1353} 1354 1355static void 1356micro_pow( 1357 union tgsi_exec_channel *dst, 1358 const union tgsi_exec_channel *src0, 1359 const union tgsi_exec_channel *src1 ) 1360{ 1361 dst->f[0] = powf( src0->f[0], src1->f[0] ); 1362 dst->f[1] = powf( src0->f[1], src1->f[1] ); 1363 dst->f[2] = powf( src0->f[2], src1->f[2] ); 1364 dst->f[3] = powf( src0->f[3], src1->f[3] ); 1365} 1366 1367static void 1368micro_ldexp(union tgsi_exec_channel *dst, 1369 const union tgsi_exec_channel *src0, 1370 const union tgsi_exec_channel *src1) 1371{ 1372 dst->f[0] = ldexpf(src0->f[0], src1->i[0]); 1373 dst->f[1] = ldexpf(src0->f[1], src1->i[1]); 1374 dst->f[2] = ldexpf(src0->f[2], src1->i[2]); 1375 dst->f[3] = ldexpf(src0->f[3], src1->i[3]); 1376} 1377 1378static void 1379micro_sub(union tgsi_exec_channel *dst, 1380 const union tgsi_exec_channel *src0, 1381 const union tgsi_exec_channel *src1) 1382{ 1383 dst->f[0] = src0->f[0] - src1->f[0]; 1384 dst->f[1] = src0->f[1] - src1->f[1]; 1385 dst->f[2] = src0->f[2] - src1->f[2]; 1386 dst->f[3] = src0->f[3] - src1->f[3]; 1387} 1388 1389static void 1390fetch_src_file_channel(const struct tgsi_exec_machine *mach, 1391 const uint file, 1392 const uint swizzle, 1393 const union tgsi_exec_channel *index, 1394 const union tgsi_exec_channel *index2D, 1395 union tgsi_exec_channel *chan) 1396{ 1397 uint i; 1398 1399 assert(swizzle < 4); 1400 1401 switch (file) { 1402 case TGSI_FILE_CONSTANT: 1403 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1404 /* NOTE: copying the const value as a uint instead of float */ 1405 const uint constbuf = index2D->i[i]; 1406 const unsigned pos = index->i[i] * 4 + swizzle; 1407 /* const buffer bounds check */ 1408 if (pos >= mach->ConstsSize[constbuf] / 4) { 1409 if (0) { 1410 /* Debug: print warning */ 1411 static int count = 0; 1412 if (count++ < 100) 1413 debug_printf("TGSI Exec: const buffer index %d" 1414 " out of bounds\n", pos); 1415 } 1416 chan->u[i] = 0; 1417 } else { 1418 const uint *buf = (const uint *)mach->Consts[constbuf]; 1419 chan->u[i] = buf[pos]; 1420 } 1421 } 1422 break; 1423 1424 case TGSI_FILE_INPUT: 1425 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1426 /* 1427 if (PIPE_SHADER_GEOMETRY == mach->ShaderType) { 1428 debug_printf("Fetching Input[%d] (2d=%d, 1d=%d)\n", 1429 index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i], 1430 index2D->i[i], index->i[i]); 1431 }*/ 1432 int pos = index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i]; 1433 assert(pos >= 0); 1434 assert(pos < TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS); 1435 chan->u[i] = mach->Inputs[pos].xyzw[swizzle].u[i]; 1436 } 1437 break; 1438 1439 case TGSI_FILE_SYSTEM_VALUE: 1440 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1441 chan->u[i] = mach->SystemValue[index->i[i]].xyzw[swizzle].u[i]; 1442 } 1443 break; 1444 1445 case TGSI_FILE_TEMPORARY: 1446 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1447 assert(index->i[i] < TGSI_EXEC_NUM_TEMPS); 1448 assert(index2D->i[i] == 0); 1449 1450 chan->u[i] = mach->Temps[index->i[i]].xyzw[swizzle].u[i]; 1451 } 1452 break; 1453 1454 case TGSI_FILE_IMMEDIATE: 1455 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1456 assert(index->i[i] >= 0 && index->i[i] < (int)mach->ImmLimit); 1457 assert(index2D->i[i] == 0); 1458 1459 chan->f[i] = mach->Imms[index->i[i]][swizzle]; 1460 } 1461 break; 1462 1463 case TGSI_FILE_ADDRESS: 1464 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1465 assert(index->i[i] >= 0 && index->i[i] < ARRAY_SIZE(mach->Addrs)); 1466 assert(index2D->i[i] == 0); 1467 1468 chan->u[i] = mach->Addrs[index->i[i]].xyzw[swizzle].u[i]; 1469 } 1470 break; 1471 1472 case TGSI_FILE_OUTPUT: 1473 /* vertex/fragment output vars can be read too */ 1474 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1475 assert(index->i[i] >= 0); 1476 assert(index2D->i[i] == 0); 1477 1478 chan->u[i] = mach->Outputs[index->i[i]].xyzw[swizzle].u[i]; 1479 } 1480 break; 1481 1482 default: 1483 assert(0); 1484 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1485 chan->u[i] = 0; 1486 } 1487 } 1488} 1489 1490static void 1491get_index_registers(const struct tgsi_exec_machine *mach, 1492 const struct tgsi_full_src_register *reg, 1493 union tgsi_exec_channel *index, 1494 union tgsi_exec_channel *index2D) 1495{ 1496 /* We start with a direct index into a register file. 1497 * 1498 * file[1], 1499 * where: 1500 * file = Register.File 1501 * [1] = Register.Index 1502 */ 1503 index->i[0] = 1504 index->i[1] = 1505 index->i[2] = 1506 index->i[3] = reg->Register.Index; 1507 1508 /* There is an extra source register that indirectly subscripts 1509 * a register file. The direct index now becomes an offset 1510 * that is being added to the indirect register. 1511 * 1512 * file[ind[2].x+1], 1513 * where: 1514 * ind = Indirect.File 1515 * [2] = Indirect.Index 1516 * .x = Indirect.SwizzleX 1517 */ 1518 if (reg->Register.Indirect) { 1519 const uint execmask = mach->ExecMask; 1520 1521 assert(reg->Indirect.File == TGSI_FILE_ADDRESS); 1522 const union tgsi_exec_channel *addr = &mach->Addrs[reg->Indirect.Index].xyzw[reg->Indirect.Swizzle]; 1523 for (int i = 0; i < TGSI_QUAD_SIZE; i++) 1524 index->i[i] += addr->u[i]; 1525 1526 /* for disabled execution channels, zero-out the index to 1527 * avoid using a potential garbage value. 1528 */ 1529 for (int i = 0; i < TGSI_QUAD_SIZE; i++) { 1530 if ((execmask & (1 << i)) == 0) 1531 index->i[i] = 0; 1532 } 1533 } 1534 1535 /* There is an extra source register that is a second 1536 * subscript to a register file. Effectively it means that 1537 * the register file is actually a 2D array of registers. 1538 * 1539 * file[3][1], 1540 * where: 1541 * [3] = Dimension.Index 1542 */ 1543 if (reg->Register.Dimension) { 1544 index2D->i[0] = 1545 index2D->i[1] = 1546 index2D->i[2] = 1547 index2D->i[3] = reg->Dimension.Index; 1548 1549 /* Again, the second subscript index can be addressed indirectly 1550 * identically to the first one. 1551 * Nothing stops us from indirectly addressing the indirect register, 1552 * but there is no need for that, so we won't exercise it. 1553 * 1554 * file[ind[4].y+3][1], 1555 * where: 1556 * ind = DimIndirect.File 1557 * [4] = DimIndirect.Index 1558 * .y = DimIndirect.SwizzleX 1559 */ 1560 if (reg->Dimension.Indirect) { 1561 const uint execmask = mach->ExecMask; 1562 1563 assert(reg->DimIndirect.File == TGSI_FILE_ADDRESS); 1564 const union tgsi_exec_channel *addr = &mach->Addrs[reg->DimIndirect.Index].xyzw[reg->DimIndirect.Swizzle]; 1565 for (int i = 0; i < TGSI_QUAD_SIZE; i++) 1566 index2D->i[i] += addr->u[i]; 1567 1568 /* for disabled execution channels, zero-out the index to 1569 * avoid using a potential garbage value. 1570 */ 1571 for (int i = 0; i < TGSI_QUAD_SIZE; i++) { 1572 if ((execmask & (1 << i)) == 0) { 1573 index2D->i[i] = 0; 1574 } 1575 } 1576 } 1577 1578 /* If by any chance there was a need for a 3D array of register 1579 * files, we would have to check whether Dimension is followed 1580 * by a dimension register and continue the saga. 1581 */ 1582 } else { 1583 index2D->i[0] = 1584 index2D->i[1] = 1585 index2D->i[2] = 1586 index2D->i[3] = 0; 1587 } 1588} 1589 1590 1591static void 1592fetch_source_d(const struct tgsi_exec_machine *mach, 1593 union tgsi_exec_channel *chan, 1594 const struct tgsi_full_src_register *reg, 1595 const uint chan_index) 1596{ 1597 union tgsi_exec_channel index; 1598 union tgsi_exec_channel index2D; 1599 uint swizzle; 1600 1601 get_index_registers(mach, reg, &index, &index2D); 1602 1603 1604 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); 1605 fetch_src_file_channel(mach, 1606 reg->Register.File, 1607 swizzle, 1608 &index, 1609 &index2D, 1610 chan); 1611} 1612 1613static void 1614fetch_source(const struct tgsi_exec_machine *mach, 1615 union tgsi_exec_channel *chan, 1616 const struct tgsi_full_src_register *reg, 1617 const uint chan_index, 1618 enum tgsi_exec_datatype src_datatype) 1619{ 1620 fetch_source_d(mach, chan, reg, chan_index); 1621 1622 if (reg->Register.Absolute) { 1623 assert(src_datatype == TGSI_EXEC_DATA_FLOAT); 1624 micro_abs(chan, chan); 1625 } 1626 1627 if (reg->Register.Negate) { 1628 if (src_datatype == TGSI_EXEC_DATA_FLOAT) { 1629 micro_neg(chan, chan); 1630 } else { 1631 micro_ineg(chan, chan); 1632 } 1633 } 1634} 1635 1636static union tgsi_exec_channel * 1637store_dest_dstret(struct tgsi_exec_machine *mach, 1638 const union tgsi_exec_channel *chan, 1639 const struct tgsi_full_dst_register *reg, 1640 uint chan_index) 1641{ 1642 static union tgsi_exec_channel null; 1643 union tgsi_exec_channel *dst; 1644 int offset = 0; /* indirection offset */ 1645 int index; 1646 1647 1648 /* There is an extra source register that indirectly subscripts 1649 * a register file. The direct index now becomes an offset 1650 * that is being added to the indirect register. 1651 * 1652 * file[ind[2].x+1], 1653 * where: 1654 * ind = Indirect.File 1655 * [2] = Indirect.Index 1656 * .x = Indirect.SwizzleX 1657 */ 1658 if (reg->Register.Indirect) { 1659 union tgsi_exec_channel index; 1660 union tgsi_exec_channel indir_index; 1661 uint swizzle; 1662 1663 /* which address register (always zero for now) */ 1664 index.i[0] = 1665 index.i[1] = 1666 index.i[2] = 1667 index.i[3] = reg->Indirect.Index; 1668 1669 /* get current value of address register[swizzle] */ 1670 swizzle = reg->Indirect.Swizzle; 1671 1672 /* fetch values from the address/indirection register */ 1673 fetch_src_file_channel(mach, 1674 reg->Indirect.File, 1675 swizzle, 1676 &index, 1677 &ZeroVec, 1678 &indir_index); 1679 1680 /* save indirection offset */ 1681 offset = indir_index.i[0]; 1682 } 1683 1684 switch (reg->Register.File) { 1685 case TGSI_FILE_NULL: 1686 dst = &null; 1687 break; 1688 1689 case TGSI_FILE_OUTPUT: 1690 index = mach->OutputVertexOffset + reg->Register.Index; 1691 dst = &mach->Outputs[offset + index].xyzw[chan_index]; 1692#if 0 1693 debug_printf("NumOutputs = %d, TEMP_O_C/I = %d, redindex = %d\n", 1694 mach->NumOutputs, mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0], 1695 reg->Register.Index); 1696 if (PIPE_SHADER_GEOMETRY == mach->ShaderType) { 1697 debug_printf("STORING OUT[%d] mask(%d), = (", offset + index, execmask); 1698 for (i = 0; i < TGSI_QUAD_SIZE; i++) 1699 if (execmask & (1 << i)) 1700 debug_printf("%f, ", chan->f[i]); 1701 debug_printf(")\n"); 1702 } 1703#endif 1704 break; 1705 1706 case TGSI_FILE_TEMPORARY: 1707 index = reg->Register.Index; 1708 assert( index < TGSI_EXEC_NUM_TEMPS ); 1709 dst = &mach->Temps[offset + index].xyzw[chan_index]; 1710 break; 1711 1712 case TGSI_FILE_ADDRESS: 1713 index = reg->Register.Index; 1714 assert(index >= 0 && index < ARRAY_SIZE(mach->Addrs)); 1715 dst = &mach->Addrs[index].xyzw[chan_index]; 1716 break; 1717 1718 default: 1719 unreachable("Bad destination file"); 1720 } 1721 1722 return dst; 1723} 1724 1725static void 1726store_dest_double(struct tgsi_exec_machine *mach, 1727 const union tgsi_exec_channel *chan, 1728 const struct tgsi_full_dst_register *reg, 1729 uint chan_index) 1730{ 1731 union tgsi_exec_channel *dst; 1732 const uint execmask = mach->ExecMask; 1733 int i; 1734 1735 dst = store_dest_dstret(mach, chan, reg, chan_index); 1736 if (!dst) 1737 return; 1738 1739 /* doubles path */ 1740 for (i = 0; i < TGSI_QUAD_SIZE; i++) 1741 if (execmask & (1 << i)) 1742 dst->i[i] = chan->i[i]; 1743} 1744 1745static void 1746store_dest(struct tgsi_exec_machine *mach, 1747 const union tgsi_exec_channel *chan, 1748 const struct tgsi_full_dst_register *reg, 1749 const struct tgsi_full_instruction *inst, 1750 uint chan_index) 1751{ 1752 union tgsi_exec_channel *dst; 1753 const uint execmask = mach->ExecMask; 1754 int i; 1755 1756 dst = store_dest_dstret(mach, chan, reg, chan_index); 1757 if (!dst) 1758 return; 1759 1760 if (!inst->Instruction.Saturate) { 1761 for (i = 0; i < TGSI_QUAD_SIZE; i++) 1762 if (execmask & (1 << i)) 1763 dst->i[i] = chan->i[i]; 1764 } 1765 else { 1766 for (i = 0; i < TGSI_QUAD_SIZE; i++) 1767 if (execmask & (1 << i)) 1768 dst->f[i] = fminf(fmaxf(chan->f[i], 0.0f), 1.0f); 1769 } 1770} 1771 1772#define FETCH(VAL,INDEX,CHAN)\ 1773 fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT) 1774 1775#define IFETCH(VAL,INDEX,CHAN)\ 1776 fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_INT) 1777 1778 1779/** 1780 * Execute ARB-style KIL which is predicated by a src register. 1781 * Kill fragment if any of the four values is less than zero. 1782 */ 1783static void 1784exec_kill_if(struct tgsi_exec_machine *mach, 1785 const struct tgsi_full_instruction *inst) 1786{ 1787 uint uniquemask; 1788 uint chan_index; 1789 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ 1790 union tgsi_exec_channel r[1]; 1791 1792 /* This mask stores component bits that were already tested. */ 1793 uniquemask = 0; 1794 1795 for (chan_index = 0; chan_index < 4; chan_index++) 1796 { 1797 uint swizzle; 1798 uint i; 1799 1800 /* unswizzle channel */ 1801 swizzle = tgsi_util_get_full_src_register_swizzle ( 1802 &inst->Src[0], 1803 chan_index); 1804 1805 /* check if the component has not been already tested */ 1806 if (uniquemask & (1 << swizzle)) 1807 continue; 1808 uniquemask |= 1 << swizzle; 1809 1810 FETCH(&r[0], 0, chan_index); 1811 for (i = 0; i < 4; i++) 1812 if (r[0].f[i] < 0.0f) 1813 kilmask |= 1 << i; 1814 } 1815 1816 /* restrict to fragments currently executing */ 1817 kilmask &= mach->ExecMask; 1818 1819 mach->KillMask |= kilmask; 1820} 1821 1822/** 1823 * Unconditional fragment kill/discard. 1824 */ 1825static void 1826exec_kill(struct tgsi_exec_machine *mach) 1827{ 1828 /* kill fragment for all fragments currently executing. 1829 * bit 0 = pixel 0, bit 1 = pixel 1, etc. 1830 */ 1831 mach->KillMask |= mach->ExecMask; 1832} 1833 1834static void 1835emit_vertex(struct tgsi_exec_machine *mach, 1836 const struct tgsi_full_instruction *inst) 1837{ 1838 union tgsi_exec_channel r[1]; 1839 unsigned stream_id; 1840 unsigned prim_count; 1841 /* FIXME: check for exec mask correctly 1842 unsigned i; 1843 for (i = 0; i < TGSI_QUAD_SIZE; ++i) { 1844 if ((mach->ExecMask & (1 << i))) 1845 */ 1846 IFETCH(&r[0], 0, TGSI_CHAN_X); 1847 stream_id = r[0].u[0]; 1848 prim_count = mach->OutputPrimCount[stream_id]; 1849 if (mach->ExecMask) { 1850 if (mach->Primitives[stream_id][prim_count] >= mach->MaxOutputVertices) 1851 return; 1852 1853 if (mach->Primitives[stream_id][prim_count] == 0) 1854 mach->PrimitiveOffsets[stream_id][prim_count] = mach->OutputVertexOffset; 1855 mach->OutputVertexOffset += mach->NumOutputs; 1856 mach->Primitives[stream_id][prim_count]++; 1857 } 1858} 1859 1860static void 1861emit_primitive(struct tgsi_exec_machine *mach, 1862 const struct tgsi_full_instruction *inst) 1863{ 1864 unsigned *prim_count; 1865 union tgsi_exec_channel r[1]; 1866 unsigned stream_id = 0; 1867 /* FIXME: check for exec mask correctly 1868 unsigned i; 1869 for (i = 0; i < TGSI_QUAD_SIZE; ++i) { 1870 if ((mach->ExecMask & (1 << i))) 1871 */ 1872 if (inst) { 1873 IFETCH(&r[0], 0, TGSI_CHAN_X); 1874 stream_id = r[0].u[0]; 1875 } 1876 prim_count = &mach->OutputPrimCount[stream_id]; 1877 if (mach->ExecMask) { 1878 ++(*prim_count); 1879 assert((*prim_count * mach->NumOutputs) < TGSI_MAX_TOTAL_VERTICES); 1880 mach->Primitives[stream_id][*prim_count] = 0; 1881 } 1882} 1883 1884static void 1885conditional_emit_primitive(struct tgsi_exec_machine *mach) 1886{ 1887 if (PIPE_SHADER_GEOMETRY == mach->ShaderType) { 1888 int emitted_verts = mach->Primitives[0][mach->OutputPrimCount[0]]; 1889 if (emitted_verts) { 1890 emit_primitive(mach, NULL); 1891 } 1892 } 1893} 1894 1895 1896/* 1897 * Fetch four texture samples using STR texture coordinates. 1898 */ 1899static void 1900fetch_texel( struct tgsi_sampler *sampler, 1901 const unsigned sview_idx, 1902 const unsigned sampler_idx, 1903 const union tgsi_exec_channel *s, 1904 const union tgsi_exec_channel *t, 1905 const union tgsi_exec_channel *p, 1906 const union tgsi_exec_channel *c0, 1907 const union tgsi_exec_channel *c1, 1908 float derivs[3][2][TGSI_QUAD_SIZE], 1909 const int8_t offset[3], 1910 enum tgsi_sampler_control control, 1911 union tgsi_exec_channel *r, 1912 union tgsi_exec_channel *g, 1913 union tgsi_exec_channel *b, 1914 union tgsi_exec_channel *a ) 1915{ 1916 uint j; 1917 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 1918 1919 /* FIXME: handle explicit derivs, offsets */ 1920 sampler->get_samples(sampler, sview_idx, sampler_idx, 1921 s->f, t->f, p->f, c0->f, c1->f, derivs, offset, control, rgba); 1922 1923 for (j = 0; j < 4; j++) { 1924 r->f[j] = rgba[0][j]; 1925 g->f[j] = rgba[1][j]; 1926 b->f[j] = rgba[2][j]; 1927 a->f[j] = rgba[3][j]; 1928 } 1929} 1930 1931 1932#define TEX_MODIFIER_NONE 0 1933#define TEX_MODIFIER_PROJECTED 1 1934#define TEX_MODIFIER_LOD_BIAS 2 1935#define TEX_MODIFIER_EXPLICIT_LOD 3 1936#define TEX_MODIFIER_LEVEL_ZERO 4 1937#define TEX_MODIFIER_GATHER 5 1938 1939/* 1940 * Fetch all 3 (for s,t,r coords) texel offsets, put them into int array. 1941 */ 1942static void 1943fetch_texel_offsets(struct tgsi_exec_machine *mach, 1944 const struct tgsi_full_instruction *inst, 1945 int8_t offsets[3]) 1946{ 1947 if (inst->Texture.NumOffsets == 1) { 1948 union tgsi_exec_channel index; 1949 union tgsi_exec_channel offset[3]; 1950 index.i[0] = index.i[1] = index.i[2] = index.i[3] = inst->TexOffsets[0].Index; 1951 fetch_src_file_channel(mach, inst->TexOffsets[0].File, 1952 inst->TexOffsets[0].SwizzleX, &index, &ZeroVec, &offset[0]); 1953 fetch_src_file_channel(mach, inst->TexOffsets[0].File, 1954 inst->TexOffsets[0].SwizzleY, &index, &ZeroVec, &offset[1]); 1955 fetch_src_file_channel(mach, inst->TexOffsets[0].File, 1956 inst->TexOffsets[0].SwizzleZ, &index, &ZeroVec, &offset[2]); 1957 offsets[0] = offset[0].i[0]; 1958 offsets[1] = offset[1].i[0]; 1959 offsets[2] = offset[2].i[0]; 1960 } else { 1961 assert(inst->Texture.NumOffsets == 0); 1962 offsets[0] = offsets[1] = offsets[2] = 0; 1963 } 1964} 1965 1966 1967/* 1968 * Fetch dx and dy values for one channel (s, t or r). 1969 * Put dx values into one float array, dy values into another. 1970 */ 1971static void 1972fetch_assign_deriv_channel(struct tgsi_exec_machine *mach, 1973 const struct tgsi_full_instruction *inst, 1974 unsigned regdsrcx, 1975 unsigned chan, 1976 float derivs[2][TGSI_QUAD_SIZE]) 1977{ 1978 union tgsi_exec_channel d; 1979 FETCH(&d, regdsrcx, chan); 1980 derivs[0][0] = d.f[0]; 1981 derivs[0][1] = d.f[1]; 1982 derivs[0][2] = d.f[2]; 1983 derivs[0][3] = d.f[3]; 1984 FETCH(&d, regdsrcx + 1, chan); 1985 derivs[1][0] = d.f[0]; 1986 derivs[1][1] = d.f[1]; 1987 derivs[1][2] = d.f[2]; 1988 derivs[1][3] = d.f[3]; 1989} 1990 1991static uint 1992fetch_sampler_unit(struct tgsi_exec_machine *mach, 1993 const struct tgsi_full_instruction *inst, 1994 uint sampler) 1995{ 1996 uint unit = 0; 1997 int i; 1998 if (inst->Src[sampler].Register.Indirect) { 1999 const struct tgsi_full_src_register *reg = &inst->Src[sampler]; 2000 union tgsi_exec_channel indir_index, index2; 2001 const uint execmask = mach->ExecMask; 2002 index2.i[0] = 2003 index2.i[1] = 2004 index2.i[2] = 2005 index2.i[3] = reg->Indirect.Index; 2006 2007 fetch_src_file_channel(mach, 2008 reg->Indirect.File, 2009 reg->Indirect.Swizzle, 2010 &index2, 2011 &ZeroVec, 2012 &indir_index); 2013 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 2014 if (execmask & (1 << i)) { 2015 unit = inst->Src[sampler].Register.Index + indir_index.i[i]; 2016 break; 2017 } 2018 } 2019 2020 } else { 2021 unit = inst->Src[sampler].Register.Index; 2022 } 2023 return unit; 2024} 2025 2026/* 2027 * execute a texture instruction. 2028 * 2029 * modifier is used to control the channel routing for the 2030 * instruction variants like proj, lod, and texture with lod bias. 2031 * sampler indicates which src register the sampler is contained in. 2032 */ 2033static void 2034exec_tex(struct tgsi_exec_machine *mach, 2035 const struct tgsi_full_instruction *inst, 2036 uint modifier, uint sampler) 2037{ 2038 const union tgsi_exec_channel *args[5], *proj = NULL; 2039 union tgsi_exec_channel r[5]; 2040 enum tgsi_sampler_control control = TGSI_SAMPLER_LOD_NONE; 2041 uint chan; 2042 uint unit; 2043 int8_t offsets[3]; 2044 int dim, shadow_ref, i; 2045 2046 unit = fetch_sampler_unit(mach, inst, sampler); 2047 /* always fetch all 3 offsets, overkill but keeps code simple */ 2048 fetch_texel_offsets(mach, inst, offsets); 2049 2050 assert(modifier != TEX_MODIFIER_LEVEL_ZERO); 2051 assert(inst->Texture.Texture != TGSI_TEXTURE_BUFFER); 2052 2053 dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture); 2054 shadow_ref = tgsi_util_get_shadow_ref_src_index(inst->Texture.Texture); 2055 2056 assert(dim <= 4); 2057 if (shadow_ref >= 0) 2058 assert(shadow_ref >= dim && shadow_ref < (int)ARRAY_SIZE(args)); 2059 2060 /* fetch modifier to the last argument */ 2061 if (modifier != TEX_MODIFIER_NONE) { 2062 const int last = ARRAY_SIZE(args) - 1; 2063 2064 /* fetch modifier from src0.w or src1.x */ 2065 if (sampler == 1) { 2066 assert(dim <= TGSI_CHAN_W && shadow_ref != TGSI_CHAN_W); 2067 FETCH(&r[last], 0, TGSI_CHAN_W); 2068 } 2069 else { 2070 FETCH(&r[last], 1, TGSI_CHAN_X); 2071 } 2072 2073 if (modifier != TEX_MODIFIER_PROJECTED) { 2074 args[last] = &r[last]; 2075 } 2076 else { 2077 proj = &r[last]; 2078 args[last] = &ZeroVec; 2079 } 2080 2081 /* point unused arguments to zero vector */ 2082 for (i = dim; i < last; i++) 2083 args[i] = &ZeroVec; 2084 2085 if (modifier == TEX_MODIFIER_EXPLICIT_LOD) 2086 control = TGSI_SAMPLER_LOD_EXPLICIT; 2087 else if (modifier == TEX_MODIFIER_LOD_BIAS) 2088 control = TGSI_SAMPLER_LOD_BIAS; 2089 else if (modifier == TEX_MODIFIER_GATHER) 2090 control = TGSI_SAMPLER_GATHER; 2091 } 2092 else { 2093 for (i = dim; i < (int)ARRAY_SIZE(args); i++) 2094 args[i] = &ZeroVec; 2095 } 2096 2097 /* fetch coordinates */ 2098 for (i = 0; i < dim; i++) { 2099 FETCH(&r[i], 0, TGSI_CHAN_X + i); 2100 2101 if (proj) 2102 micro_div(&r[i], &r[i], proj); 2103 2104 args[i] = &r[i]; 2105 } 2106 2107 /* fetch reference value */ 2108 if (shadow_ref >= 0) { 2109 FETCH(&r[shadow_ref], shadow_ref / 4, TGSI_CHAN_X + (shadow_ref % 4)); 2110 2111 if (proj) 2112 micro_div(&r[shadow_ref], &r[shadow_ref], proj); 2113 2114 args[shadow_ref] = &r[shadow_ref]; 2115 } 2116 2117 fetch_texel(mach->Sampler, unit, unit, 2118 args[0], args[1], args[2], args[3], args[4], 2119 NULL, offsets, control, 2120 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 2121 2122#if 0 2123 debug_printf("fetch r: %g %g %g %g\n", 2124 r[0].f[0], r[0].f[1], r[0].f[2], r[0].f[3]); 2125 debug_printf("fetch g: %g %g %g %g\n", 2126 r[1].f[0], r[1].f[1], r[1].f[2], r[1].f[3]); 2127 debug_printf("fetch b: %g %g %g %g\n", 2128 r[2].f[0], r[2].f[1], r[2].f[2], r[2].f[3]); 2129 debug_printf("fetch a: %g %g %g %g\n", 2130 r[3].f[0], r[3].f[1], r[3].f[2], r[3].f[3]); 2131#endif 2132 2133 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2134 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2135 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan); 2136 } 2137 } 2138} 2139 2140static void 2141exec_lodq(struct tgsi_exec_machine *mach, 2142 const struct tgsi_full_instruction *inst) 2143{ 2144 uint resource_unit, sampler_unit; 2145 unsigned dim; 2146 unsigned i; 2147 union tgsi_exec_channel coords[4]; 2148 const union tgsi_exec_channel *args[ARRAY_SIZE(coords)]; 2149 union tgsi_exec_channel r[2]; 2150 2151 resource_unit = fetch_sampler_unit(mach, inst, 1); 2152 if (inst->Instruction.Opcode == TGSI_OPCODE_LOD) { 2153 uint target = mach->SamplerViews[resource_unit].Resource; 2154 dim = tgsi_util_get_texture_coord_dim(target); 2155 sampler_unit = fetch_sampler_unit(mach, inst, 2); 2156 } else { 2157 dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture); 2158 sampler_unit = resource_unit; 2159 } 2160 assert(dim <= ARRAY_SIZE(coords)); 2161 /* fetch coordinates */ 2162 for (i = 0; i < dim; i++) { 2163 FETCH(&coords[i], 0, TGSI_CHAN_X + i); 2164 args[i] = &coords[i]; 2165 } 2166 for (i = dim; i < ARRAY_SIZE(coords); i++) { 2167 args[i] = &ZeroVec; 2168 } 2169 mach->Sampler->query_lod(mach->Sampler, resource_unit, sampler_unit, 2170 args[0]->f, 2171 args[1]->f, 2172 args[2]->f, 2173 args[3]->f, 2174 TGSI_SAMPLER_LOD_NONE, 2175 r[0].f, 2176 r[1].f); 2177 2178 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 2179 store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X); 2180 } 2181 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 2182 store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Y); 2183 } 2184 if (inst->Instruction.Opcode == TGSI_OPCODE_LOD) { 2185 unsigned char swizzles[4]; 2186 unsigned chan; 2187 swizzles[0] = inst->Src[1].Register.SwizzleX; 2188 swizzles[1] = inst->Src[1].Register.SwizzleY; 2189 swizzles[2] = inst->Src[1].Register.SwizzleZ; 2190 swizzles[3] = inst->Src[1].Register.SwizzleW; 2191 2192 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2193 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2194 if (swizzles[chan] >= 2) { 2195 store_dest(mach, &ZeroVec, 2196 &inst->Dst[0], inst, chan); 2197 } else { 2198 store_dest(mach, &r[swizzles[chan]], 2199 &inst->Dst[0], inst, chan); 2200 } 2201 } 2202 } 2203 } else { 2204 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 2205 store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X); 2206 } 2207 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 2208 store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Y); 2209 } 2210 } 2211} 2212 2213static void 2214exec_txd(struct tgsi_exec_machine *mach, 2215 const struct tgsi_full_instruction *inst) 2216{ 2217 union tgsi_exec_channel r[4]; 2218 float derivs[3][2][TGSI_QUAD_SIZE]; 2219 uint chan; 2220 uint unit; 2221 int8_t offsets[3]; 2222 2223 unit = fetch_sampler_unit(mach, inst, 3); 2224 /* always fetch all 3 offsets, overkill but keeps code simple */ 2225 fetch_texel_offsets(mach, inst, offsets); 2226 2227 switch (inst->Texture.Texture) { 2228 case TGSI_TEXTURE_1D: 2229 FETCH(&r[0], 0, TGSI_CHAN_X); 2230 2231 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]); 2232 2233 fetch_texel(mach->Sampler, unit, unit, 2234 &r[0], &ZeroVec, &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */ 2235 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, 2236 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 2237 break; 2238 2239 case TGSI_TEXTURE_SHADOW1D: 2240 case TGSI_TEXTURE_1D_ARRAY: 2241 case TGSI_TEXTURE_SHADOW1D_ARRAY: 2242 /* SHADOW1D/1D_ARRAY would not need Y/Z respectively, but don't bother */ 2243 FETCH(&r[0], 0, TGSI_CHAN_X); 2244 FETCH(&r[1], 0, TGSI_CHAN_Y); 2245 FETCH(&r[2], 0, TGSI_CHAN_Z); 2246 2247 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]); 2248 2249 fetch_texel(mach->Sampler, unit, unit, 2250 &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */ 2251 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, 2252 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 2253 break; 2254 2255 case TGSI_TEXTURE_2D: 2256 case TGSI_TEXTURE_RECT: 2257 FETCH(&r[0], 0, TGSI_CHAN_X); 2258 FETCH(&r[1], 0, TGSI_CHAN_Y); 2259 2260 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]); 2261 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]); 2262 2263 fetch_texel(mach->Sampler, unit, unit, 2264 &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */ 2265 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, 2266 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 2267 break; 2268 2269 2270 case TGSI_TEXTURE_SHADOW2D: 2271 case TGSI_TEXTURE_SHADOWRECT: 2272 case TGSI_TEXTURE_2D_ARRAY: 2273 case TGSI_TEXTURE_SHADOW2D_ARRAY: 2274 /* only SHADOW2D_ARRAY actually needs W */ 2275 FETCH(&r[0], 0, TGSI_CHAN_X); 2276 FETCH(&r[1], 0, TGSI_CHAN_Y); 2277 FETCH(&r[2], 0, TGSI_CHAN_Z); 2278 FETCH(&r[3], 0, TGSI_CHAN_W); 2279 2280 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]); 2281 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]); 2282 2283 fetch_texel(mach->Sampler, unit, unit, 2284 &r[0], &r[1], &r[2], &r[3], &ZeroVec, /* inputs */ 2285 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, 2286 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 2287 break; 2288 2289 case TGSI_TEXTURE_3D: 2290 case TGSI_TEXTURE_CUBE: 2291 case TGSI_TEXTURE_CUBE_ARRAY: 2292 case TGSI_TEXTURE_SHADOWCUBE: 2293 /* only TEXTURE_CUBE_ARRAY and TEXTURE_SHADOWCUBE actually need W */ 2294 FETCH(&r[0], 0, TGSI_CHAN_X); 2295 FETCH(&r[1], 0, TGSI_CHAN_Y); 2296 FETCH(&r[2], 0, TGSI_CHAN_Z); 2297 FETCH(&r[3], 0, TGSI_CHAN_W); 2298 2299 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]); 2300 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]); 2301 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Z, derivs[2]); 2302 2303 fetch_texel(mach->Sampler, unit, unit, 2304 &r[0], &r[1], &r[2], &r[3], &ZeroVec, /* inputs */ 2305 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, 2306 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 2307 break; 2308 2309 default: 2310 assert(0); 2311 } 2312 2313 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2314 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2315 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan); 2316 } 2317 } 2318} 2319 2320 2321static void 2322exec_txf(struct tgsi_exec_machine *mach, 2323 const struct tgsi_full_instruction *inst) 2324{ 2325 union tgsi_exec_channel r[4]; 2326 uint chan; 2327 uint unit; 2328 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 2329 int j; 2330 int8_t offsets[3]; 2331 unsigned target; 2332 2333 unit = fetch_sampler_unit(mach, inst, 1); 2334 /* always fetch all 3 offsets, overkill but keeps code simple */ 2335 fetch_texel_offsets(mach, inst, offsets); 2336 2337 IFETCH(&r[3], 0, TGSI_CHAN_W); 2338 2339 if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I || 2340 inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I_MS) { 2341 target = mach->SamplerViews[unit].Resource; 2342 } 2343 else { 2344 target = inst->Texture.Texture; 2345 } 2346 switch(target) { 2347 case TGSI_TEXTURE_3D: 2348 case TGSI_TEXTURE_2D_ARRAY: 2349 case TGSI_TEXTURE_SHADOW2D_ARRAY: 2350 case TGSI_TEXTURE_2D_ARRAY_MSAA: 2351 IFETCH(&r[2], 0, TGSI_CHAN_Z); 2352 FALLTHROUGH; 2353 case TGSI_TEXTURE_2D: 2354 case TGSI_TEXTURE_RECT: 2355 case TGSI_TEXTURE_SHADOW1D_ARRAY: 2356 case TGSI_TEXTURE_SHADOW2D: 2357 case TGSI_TEXTURE_SHADOWRECT: 2358 case TGSI_TEXTURE_1D_ARRAY: 2359 case TGSI_TEXTURE_2D_MSAA: 2360 IFETCH(&r[1], 0, TGSI_CHAN_Y); 2361 FALLTHROUGH; 2362 case TGSI_TEXTURE_BUFFER: 2363 case TGSI_TEXTURE_1D: 2364 case TGSI_TEXTURE_SHADOW1D: 2365 IFETCH(&r[0], 0, TGSI_CHAN_X); 2366 break; 2367 default: 2368 assert(0); 2369 break; 2370 } 2371 2372 mach->Sampler->get_texel(mach->Sampler, unit, r[0].i, r[1].i, r[2].i, r[3].i, 2373 offsets, rgba); 2374 2375 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 2376 r[0].f[j] = rgba[0][j]; 2377 r[1].f[j] = rgba[1][j]; 2378 r[2].f[j] = rgba[2][j]; 2379 r[3].f[j] = rgba[3][j]; 2380 } 2381 2382 if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I || 2383 inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I_MS) { 2384 unsigned char swizzles[4]; 2385 swizzles[0] = inst->Src[1].Register.SwizzleX; 2386 swizzles[1] = inst->Src[1].Register.SwizzleY; 2387 swizzles[2] = inst->Src[1].Register.SwizzleZ; 2388 swizzles[3] = inst->Src[1].Register.SwizzleW; 2389 2390 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2391 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2392 store_dest(mach, &r[swizzles[chan]], 2393 &inst->Dst[0], inst, chan); 2394 } 2395 } 2396 } 2397 else { 2398 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2399 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2400 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan); 2401 } 2402 } 2403 } 2404} 2405 2406static void 2407exec_txq(struct tgsi_exec_machine *mach, 2408 const struct tgsi_full_instruction *inst) 2409{ 2410 int result[4]; 2411 union tgsi_exec_channel r[4], src; 2412 uint chan; 2413 uint unit; 2414 int i,j; 2415 2416 unit = fetch_sampler_unit(mach, inst, 1); 2417 2418 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_INT); 2419 2420 /* XXX: This interface can't return per-pixel values */ 2421 mach->Sampler->get_dims(mach->Sampler, unit, src.i[0], result); 2422 2423 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 2424 for (j = 0; j < 4; j++) { 2425 r[j].i[i] = result[j]; 2426 } 2427 } 2428 2429 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2430 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2431 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan); 2432 } 2433 } 2434} 2435 2436static void 2437exec_sample(struct tgsi_exec_machine *mach, 2438 const struct tgsi_full_instruction *inst, 2439 uint modifier, boolean compare) 2440{ 2441 const uint resource_unit = inst->Src[1].Register.Index; 2442 const uint sampler_unit = inst->Src[2].Register.Index; 2443 union tgsi_exec_channel r[5], c1; 2444 const union tgsi_exec_channel *lod = &ZeroVec; 2445 enum tgsi_sampler_control control = TGSI_SAMPLER_LOD_NONE; 2446 uint chan; 2447 unsigned char swizzles[4]; 2448 int8_t offsets[3]; 2449 2450 /* always fetch all 3 offsets, overkill but keeps code simple */ 2451 fetch_texel_offsets(mach, inst, offsets); 2452 2453 assert(modifier != TEX_MODIFIER_PROJECTED); 2454 2455 if (modifier != TEX_MODIFIER_NONE) { 2456 if (modifier == TEX_MODIFIER_LOD_BIAS) { 2457 FETCH(&c1, 3, TGSI_CHAN_X); 2458 lod = &c1; 2459 control = TGSI_SAMPLER_LOD_BIAS; 2460 } 2461 else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) { 2462 FETCH(&c1, 3, TGSI_CHAN_X); 2463 lod = &c1; 2464 control = TGSI_SAMPLER_LOD_EXPLICIT; 2465 } 2466 else if (modifier == TEX_MODIFIER_GATHER) { 2467 control = TGSI_SAMPLER_GATHER; 2468 } 2469 else { 2470 assert(modifier == TEX_MODIFIER_LEVEL_ZERO); 2471 control = TGSI_SAMPLER_LOD_ZERO; 2472 } 2473 } 2474 2475 FETCH(&r[0], 0, TGSI_CHAN_X); 2476 2477 switch (mach->SamplerViews[resource_unit].Resource) { 2478 case TGSI_TEXTURE_1D: 2479 if (compare) { 2480 FETCH(&r[2], 3, TGSI_CHAN_X); 2481 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2482 &r[0], &ZeroVec, &r[2], &ZeroVec, lod, /* S, T, P, C, LOD */ 2483 NULL, offsets, control, 2484 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 2485 } 2486 else { 2487 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2488 &r[0], &ZeroVec, &ZeroVec, &ZeroVec, lod, /* S, T, P, C, LOD */ 2489 NULL, offsets, control, 2490 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 2491 } 2492 break; 2493 2494 case TGSI_TEXTURE_1D_ARRAY: 2495 case TGSI_TEXTURE_2D: 2496 case TGSI_TEXTURE_RECT: 2497 FETCH(&r[1], 0, TGSI_CHAN_Y); 2498 if (compare) { 2499 FETCH(&r[2], 3, TGSI_CHAN_X); 2500 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2501 &r[0], &r[1], &r[2], &ZeroVec, lod, /* S, T, P, C, LOD */ 2502 NULL, offsets, control, 2503 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 2504 } 2505 else { 2506 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2507 &r[0], &r[1], &ZeroVec, &ZeroVec, lod, /* S, T, P, C, LOD */ 2508 NULL, offsets, control, 2509 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 2510 } 2511 break; 2512 2513 case TGSI_TEXTURE_2D_ARRAY: 2514 case TGSI_TEXTURE_3D: 2515 case TGSI_TEXTURE_CUBE: 2516 FETCH(&r[1], 0, TGSI_CHAN_Y); 2517 FETCH(&r[2], 0, TGSI_CHAN_Z); 2518 if(compare) { 2519 FETCH(&r[3], 3, TGSI_CHAN_X); 2520 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2521 &r[0], &r[1], &r[2], &r[3], lod, 2522 NULL, offsets, control, 2523 &r[0], &r[1], &r[2], &r[3]); 2524 } 2525 else { 2526 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2527 &r[0], &r[1], &r[2], &ZeroVec, lod, 2528 NULL, offsets, control, 2529 &r[0], &r[1], &r[2], &r[3]); 2530 } 2531 break; 2532 2533 case TGSI_TEXTURE_CUBE_ARRAY: 2534 FETCH(&r[1], 0, TGSI_CHAN_Y); 2535 FETCH(&r[2], 0, TGSI_CHAN_Z); 2536 FETCH(&r[3], 0, TGSI_CHAN_W); 2537 if(compare) { 2538 FETCH(&r[4], 3, TGSI_CHAN_X); 2539 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2540 &r[0], &r[1], &r[2], &r[3], &r[4], 2541 NULL, offsets, control, 2542 &r[0], &r[1], &r[2], &r[3]); 2543 } 2544 else { 2545 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2546 &r[0], &r[1], &r[2], &r[3], lod, 2547 NULL, offsets, control, 2548 &r[0], &r[1], &r[2], &r[3]); 2549 } 2550 break; 2551 2552 2553 default: 2554 assert(0); 2555 } 2556 2557 swizzles[0] = inst->Src[1].Register.SwizzleX; 2558 swizzles[1] = inst->Src[1].Register.SwizzleY; 2559 swizzles[2] = inst->Src[1].Register.SwizzleZ; 2560 swizzles[3] = inst->Src[1].Register.SwizzleW; 2561 2562 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2563 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2564 store_dest(mach, &r[swizzles[chan]], 2565 &inst->Dst[0], inst, chan); 2566 } 2567 } 2568} 2569 2570static void 2571exec_sample_d(struct tgsi_exec_machine *mach, 2572 const struct tgsi_full_instruction *inst) 2573{ 2574 const uint resource_unit = inst->Src[1].Register.Index; 2575 const uint sampler_unit = inst->Src[2].Register.Index; 2576 union tgsi_exec_channel r[4]; 2577 float derivs[3][2][TGSI_QUAD_SIZE]; 2578 uint chan; 2579 unsigned char swizzles[4]; 2580 int8_t offsets[3]; 2581 2582 /* always fetch all 3 offsets, overkill but keeps code simple */ 2583 fetch_texel_offsets(mach, inst, offsets); 2584 2585 FETCH(&r[0], 0, TGSI_CHAN_X); 2586 2587 switch (mach->SamplerViews[resource_unit].Resource) { 2588 case TGSI_TEXTURE_1D: 2589 case TGSI_TEXTURE_1D_ARRAY: 2590 /* only 1D array actually needs Y */ 2591 FETCH(&r[1], 0, TGSI_CHAN_Y); 2592 2593 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]); 2594 2595 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2596 &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */ 2597 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, 2598 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 2599 break; 2600 2601 case TGSI_TEXTURE_2D: 2602 case TGSI_TEXTURE_RECT: 2603 case TGSI_TEXTURE_2D_ARRAY: 2604 /* only 2D array actually needs Z */ 2605 FETCH(&r[1], 0, TGSI_CHAN_Y); 2606 FETCH(&r[2], 0, TGSI_CHAN_Z); 2607 2608 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]); 2609 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Y, derivs[1]); 2610 2611 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2612 &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec, /* inputs */ 2613 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, 2614 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 2615 break; 2616 2617 case TGSI_TEXTURE_3D: 2618 case TGSI_TEXTURE_CUBE: 2619 case TGSI_TEXTURE_CUBE_ARRAY: 2620 /* only cube array actually needs W */ 2621 FETCH(&r[1], 0, TGSI_CHAN_Y); 2622 FETCH(&r[2], 0, TGSI_CHAN_Z); 2623 FETCH(&r[3], 0, TGSI_CHAN_W); 2624 2625 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]); 2626 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Y, derivs[1]); 2627 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Z, derivs[2]); 2628 2629 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2630 &r[0], &r[1], &r[2], &r[3], &ZeroVec, 2631 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, 2632 &r[0], &r[1], &r[2], &r[3]); 2633 break; 2634 2635 default: 2636 assert(0); 2637 } 2638 2639 swizzles[0] = inst->Src[1].Register.SwizzleX; 2640 swizzles[1] = inst->Src[1].Register.SwizzleY; 2641 swizzles[2] = inst->Src[1].Register.SwizzleZ; 2642 swizzles[3] = inst->Src[1].Register.SwizzleW; 2643 2644 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2645 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2646 store_dest(mach, &r[swizzles[chan]], 2647 &inst->Dst[0], inst, chan); 2648 } 2649 } 2650} 2651 2652 2653/** 2654 * Evaluate a constant-valued coefficient at the position of the 2655 * current quad. 2656 */ 2657static void 2658eval_constant_coef( 2659 struct tgsi_exec_machine *mach, 2660 unsigned attrib, 2661 unsigned chan ) 2662{ 2663 unsigned i; 2664 2665 for( i = 0; i < TGSI_QUAD_SIZE; i++ ) { 2666 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan]; 2667 } 2668} 2669 2670static void 2671interp_constant_offset( 2672 UNUSED const struct tgsi_exec_machine *mach, 2673 UNUSED unsigned attrib, 2674 UNUSED unsigned chan, 2675 UNUSED float ofs_x, 2676 UNUSED float ofs_y, 2677 UNUSED union tgsi_exec_channel *out_chan) 2678{ 2679} 2680 2681/** 2682 * Evaluate a linear-valued coefficient at the position of the 2683 * current quad. 2684 */ 2685static void 2686interp_linear_offset( 2687 const struct tgsi_exec_machine *mach, 2688 unsigned attrib, 2689 unsigned chan, 2690 float ofs_x, 2691 float ofs_y, 2692 union tgsi_exec_channel *out_chan) 2693{ 2694 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 2695 const float dady = mach->InterpCoefs[attrib].dady[chan]; 2696 const float delta = ofs_x * dadx + ofs_y * dady; 2697 out_chan->f[0] += delta; 2698 out_chan->f[1] += delta; 2699 out_chan->f[2] += delta; 2700 out_chan->f[3] += delta; 2701} 2702 2703static void 2704eval_linear_coef(struct tgsi_exec_machine *mach, 2705 unsigned attrib, 2706 unsigned chan) 2707{ 2708 const float x = mach->QuadPos.xyzw[0].f[0]; 2709 const float y = mach->QuadPos.xyzw[1].f[0]; 2710 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 2711 const float dady = mach->InterpCoefs[attrib].dady[chan]; 2712 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 2713 2714 mach->Inputs[attrib].xyzw[chan].f[0] = a0; 2715 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx; 2716 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady; 2717 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady; 2718} 2719 2720/** 2721 * Evaluate a perspective-valued coefficient at the position of the 2722 * current quad. 2723 */ 2724 2725static void 2726interp_perspective_offset( 2727 const struct tgsi_exec_machine *mach, 2728 unsigned attrib, 2729 unsigned chan, 2730 float ofs_x, 2731 float ofs_y, 2732 union tgsi_exec_channel *out_chan) 2733{ 2734 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 2735 const float dady = mach->InterpCoefs[attrib].dady[chan]; 2736 const float *w = mach->QuadPos.xyzw[3].f; 2737 const float delta = ofs_x * dadx + ofs_y * dady; 2738 out_chan->f[0] += delta / w[0]; 2739 out_chan->f[1] += delta / w[1]; 2740 out_chan->f[2] += delta / w[2]; 2741 out_chan->f[3] += delta / w[3]; 2742} 2743 2744static void 2745eval_perspective_coef( 2746 struct tgsi_exec_machine *mach, 2747 unsigned attrib, 2748 unsigned chan ) 2749{ 2750 const float x = mach->QuadPos.xyzw[0].f[0]; 2751 const float y = mach->QuadPos.xyzw[1].f[0]; 2752 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 2753 const float dady = mach->InterpCoefs[attrib].dady[chan]; 2754 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 2755 const float *w = mach->QuadPos.xyzw[3].f; 2756 /* divide by W here */ 2757 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0]; 2758 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1]; 2759 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2]; 2760 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3]; 2761} 2762 2763 2764typedef void (* eval_coef_func)( 2765 struct tgsi_exec_machine *mach, 2766 unsigned attrib, 2767 unsigned chan ); 2768 2769static void 2770exec_declaration(struct tgsi_exec_machine *mach, 2771 const struct tgsi_full_declaration *decl) 2772{ 2773 if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) { 2774 mach->SamplerViews[decl->Range.First] = decl->SamplerView; 2775 return; 2776 } 2777 2778 if (mach->ShaderType == PIPE_SHADER_FRAGMENT) { 2779 if (decl->Declaration.File == TGSI_FILE_INPUT) { 2780 uint first, last, mask; 2781 2782 first = decl->Range.First; 2783 last = decl->Range.Last; 2784 mask = decl->Declaration.UsageMask; 2785 2786 /* XXX we could remove this special-case code since 2787 * mach->InterpCoefs[first].a0 should already have the 2788 * front/back-face value. But we should first update the 2789 * ureg code to emit the right UsageMask value (WRITEMASK_X). 2790 * Then, we could remove the tgsi_exec_machine::Face field. 2791 */ 2792 /* XXX make FACE a system value */ 2793 if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) { 2794 uint i; 2795 2796 assert(decl->Semantic.Index == 0); 2797 assert(first == last); 2798 2799 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 2800 mach->Inputs[first].xyzw[0].f[i] = mach->Face; 2801 } 2802 } else { 2803 eval_coef_func eval; 2804 apply_sample_offset_func interp; 2805 uint i, j; 2806 2807 switch (decl->Interp.Interpolate) { 2808 case TGSI_INTERPOLATE_CONSTANT: 2809 eval = eval_constant_coef; 2810 interp = interp_constant_offset; 2811 break; 2812 2813 case TGSI_INTERPOLATE_LINEAR: 2814 eval = eval_linear_coef; 2815 interp = interp_linear_offset; 2816 break; 2817 2818 case TGSI_INTERPOLATE_PERSPECTIVE: 2819 eval = eval_perspective_coef; 2820 interp = interp_perspective_offset; 2821 break; 2822 2823 case TGSI_INTERPOLATE_COLOR: 2824 eval = mach->flatshade_color ? eval_constant_coef : eval_perspective_coef; 2825 interp = mach->flatshade_color ? interp_constant_offset : interp_perspective_offset; 2826 break; 2827 2828 default: 2829 assert(0); 2830 return; 2831 } 2832 2833 for (i = first; i <= last; i++) 2834 mach->InputSampleOffsetApply[i] = interp; 2835 2836 for (j = 0; j < TGSI_NUM_CHANNELS; j++) { 2837 if (mask & (1 << j)) { 2838 for (i = first; i <= last; i++) { 2839 eval(mach, i, j); 2840 } 2841 } 2842 } 2843 } 2844 2845 if (DEBUG_EXECUTION) { 2846 uint i, j; 2847 for (i = first; i <= last; ++i) { 2848 debug_printf("IN[%2u] = ", i); 2849 for (j = 0; j < TGSI_NUM_CHANNELS; j++) { 2850 if (j > 0) { 2851 debug_printf(" "); 2852 } 2853 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n", 2854 mach->Inputs[i].xyzw[0].f[j], mach->Inputs[i].xyzw[0].u[j], 2855 mach->Inputs[i].xyzw[1].f[j], mach->Inputs[i].xyzw[1].u[j], 2856 mach->Inputs[i].xyzw[2].f[j], mach->Inputs[i].xyzw[2].u[j], 2857 mach->Inputs[i].xyzw[3].f[j], mach->Inputs[i].xyzw[3].u[j]); 2858 } 2859 } 2860 } 2861 } 2862 } 2863 2864} 2865 2866typedef void (* micro_unary_op)(union tgsi_exec_channel *dst, 2867 const union tgsi_exec_channel *src); 2868 2869static void 2870exec_scalar_unary(struct tgsi_exec_machine *mach, 2871 const struct tgsi_full_instruction *inst, 2872 micro_unary_op op, 2873 enum tgsi_exec_datatype src_datatype) 2874{ 2875 unsigned int chan; 2876 union tgsi_exec_channel src; 2877 union tgsi_exec_channel dst; 2878 2879 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, src_datatype); 2880 op(&dst, &src); 2881 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2882 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2883 store_dest(mach, &dst, &inst->Dst[0], inst, chan); 2884 } 2885 } 2886} 2887 2888static void 2889exec_vector_unary(struct tgsi_exec_machine *mach, 2890 const struct tgsi_full_instruction *inst, 2891 micro_unary_op op, 2892 enum tgsi_exec_datatype src_datatype) 2893{ 2894 unsigned int chan; 2895 struct tgsi_exec_vector dst; 2896 2897 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2898 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2899 union tgsi_exec_channel src; 2900 2901 fetch_source(mach, &src, &inst->Src[0], chan, src_datatype); 2902 op(&dst.xyzw[chan], &src); 2903 } 2904 } 2905 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2906 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2907 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan); 2908 } 2909 } 2910} 2911 2912typedef void (* micro_binary_op)(union tgsi_exec_channel *dst, 2913 const union tgsi_exec_channel *src0, 2914 const union tgsi_exec_channel *src1); 2915 2916static void 2917exec_scalar_binary(struct tgsi_exec_machine *mach, 2918 const struct tgsi_full_instruction *inst, 2919 micro_binary_op op, 2920 enum tgsi_exec_datatype src_datatype) 2921{ 2922 unsigned int chan; 2923 union tgsi_exec_channel src[2]; 2924 union tgsi_exec_channel dst; 2925 2926 fetch_source(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, src_datatype); 2927 fetch_source(mach, &src[1], &inst->Src[1], TGSI_CHAN_X, src_datatype); 2928 op(&dst, &src[0], &src[1]); 2929 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2930 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2931 store_dest(mach, &dst, &inst->Dst[0], inst, chan); 2932 } 2933 } 2934} 2935 2936static void 2937exec_vector_binary(struct tgsi_exec_machine *mach, 2938 const struct tgsi_full_instruction *inst, 2939 micro_binary_op op, 2940 enum tgsi_exec_datatype src_datatype) 2941{ 2942 unsigned int chan; 2943 struct tgsi_exec_vector dst; 2944 2945 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2946 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2947 union tgsi_exec_channel src[2]; 2948 2949 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype); 2950 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype); 2951 op(&dst.xyzw[chan], &src[0], &src[1]); 2952 } 2953 } 2954 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2955 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2956 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan); 2957 } 2958 } 2959} 2960 2961typedef void (* micro_trinary_op)(union tgsi_exec_channel *dst, 2962 const union tgsi_exec_channel *src0, 2963 const union tgsi_exec_channel *src1, 2964 const union tgsi_exec_channel *src2); 2965 2966static void 2967exec_vector_trinary(struct tgsi_exec_machine *mach, 2968 const struct tgsi_full_instruction *inst, 2969 micro_trinary_op op, 2970 enum tgsi_exec_datatype src_datatype) 2971{ 2972 unsigned int chan; 2973 struct tgsi_exec_vector dst; 2974 2975 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2976 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2977 union tgsi_exec_channel src[3]; 2978 2979 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype); 2980 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype); 2981 fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype); 2982 op(&dst.xyzw[chan], &src[0], &src[1], &src[2]); 2983 } 2984 } 2985 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2986 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2987 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan); 2988 } 2989 } 2990} 2991 2992typedef void (* micro_quaternary_op)(union tgsi_exec_channel *dst, 2993 const union tgsi_exec_channel *src0, 2994 const union tgsi_exec_channel *src1, 2995 const union tgsi_exec_channel *src2, 2996 const union tgsi_exec_channel *src3); 2997 2998static void 2999exec_vector_quaternary(struct tgsi_exec_machine *mach, 3000 const struct tgsi_full_instruction *inst, 3001 micro_quaternary_op op, 3002 enum tgsi_exec_datatype src_datatype) 3003{ 3004 unsigned int chan; 3005 struct tgsi_exec_vector dst; 3006 3007 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3008 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3009 union tgsi_exec_channel src[4]; 3010 3011 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype); 3012 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype); 3013 fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype); 3014 fetch_source(mach, &src[3], &inst->Src[3], chan, src_datatype); 3015 op(&dst.xyzw[chan], &src[0], &src[1], &src[2], &src[3]); 3016 } 3017 } 3018 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3019 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3020 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan); 3021 } 3022 } 3023} 3024 3025static void 3026exec_dp3(struct tgsi_exec_machine *mach, 3027 const struct tgsi_full_instruction *inst) 3028{ 3029 unsigned int chan; 3030 union tgsi_exec_channel arg[3]; 3031 3032 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3033 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3034 micro_mul(&arg[2], &arg[0], &arg[1]); 3035 3036 for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_Z; chan++) { 3037 fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT); 3038 fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT); 3039 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]); 3040 } 3041 3042 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3043 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3044 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan); 3045 } 3046 } 3047} 3048 3049static void 3050exec_dp4(struct tgsi_exec_machine *mach, 3051 const struct tgsi_full_instruction *inst) 3052{ 3053 unsigned int chan; 3054 union tgsi_exec_channel arg[3]; 3055 3056 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3057 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3058 micro_mul(&arg[2], &arg[0], &arg[1]); 3059 3060 for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_W; chan++) { 3061 fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT); 3062 fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT); 3063 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]); 3064 } 3065 3066 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3067 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3068 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan); 3069 } 3070 } 3071} 3072 3073static void 3074exec_dp2(struct tgsi_exec_machine *mach, 3075 const struct tgsi_full_instruction *inst) 3076{ 3077 unsigned int chan; 3078 union tgsi_exec_channel arg[3]; 3079 3080 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3081 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3082 micro_mul(&arg[2], &arg[0], &arg[1]); 3083 3084 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3085 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3086 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]); 3087 3088 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3089 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3090 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan); 3091 } 3092 } 3093} 3094 3095static void 3096exec_pk2h(struct tgsi_exec_machine *mach, 3097 const struct tgsi_full_instruction *inst) 3098{ 3099 unsigned chan; 3100 union tgsi_exec_channel arg[2], dst; 3101 3102 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3103 fetch_source(mach, &arg[1], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3104 for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) { 3105 dst.u[chan] = _mesa_float_to_half(arg[0].f[chan]) | 3106 (_mesa_float_to_half(arg[1].f[chan]) << 16); 3107 } 3108 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3109 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3110 store_dest(mach, &dst, &inst->Dst[0], inst, chan); 3111 } 3112 } 3113} 3114 3115static void 3116exec_up2h(struct tgsi_exec_machine *mach, 3117 const struct tgsi_full_instruction *inst) 3118{ 3119 unsigned chan; 3120 union tgsi_exec_channel arg, dst[2]; 3121 3122 fetch_source(mach, &arg, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT); 3123 for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) { 3124 dst[0].f[chan] = _mesa_half_to_float(arg.u[chan] & 0xffff); 3125 dst[1].f[chan] = _mesa_half_to_float(arg.u[chan] >> 16); 3126 } 3127 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3128 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3129 store_dest(mach, &dst[chan & 1], &inst->Dst[0], inst, chan); 3130 } 3131 } 3132} 3133 3134static void 3135micro_ucmp(union tgsi_exec_channel *dst, 3136 const union tgsi_exec_channel *src0, 3137 const union tgsi_exec_channel *src1, 3138 const union tgsi_exec_channel *src2) 3139{ 3140 dst->f[0] = src0->u[0] ? src1->f[0] : src2->f[0]; 3141 dst->f[1] = src0->u[1] ? src1->f[1] : src2->f[1]; 3142 dst->f[2] = src0->u[2] ? src1->f[2] : src2->f[2]; 3143 dst->f[3] = src0->u[3] ? src1->f[3] : src2->f[3]; 3144} 3145 3146static void 3147exec_ucmp(struct tgsi_exec_machine *mach, 3148 const struct tgsi_full_instruction *inst) 3149{ 3150 unsigned int chan; 3151 struct tgsi_exec_vector dst; 3152 3153 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3154 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3155 union tgsi_exec_channel src[3]; 3156 3157 fetch_source(mach, &src[0], &inst->Src[0], chan, 3158 TGSI_EXEC_DATA_UINT); 3159 fetch_source(mach, &src[1], &inst->Src[1], chan, 3160 TGSI_EXEC_DATA_FLOAT); 3161 fetch_source(mach, &src[2], &inst->Src[2], chan, 3162 TGSI_EXEC_DATA_FLOAT); 3163 micro_ucmp(&dst.xyzw[chan], &src[0], &src[1], &src[2]); 3164 } 3165 } 3166 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3167 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3168 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan); 3169 } 3170 } 3171} 3172 3173static void 3174exec_dst(struct tgsi_exec_machine *mach, 3175 const struct tgsi_full_instruction *inst) 3176{ 3177 union tgsi_exec_channel r[2]; 3178 union tgsi_exec_channel d[4]; 3179 3180 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 3181 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3182 fetch_source(mach, &r[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3183 micro_mul(&d[TGSI_CHAN_Y], &r[0], &r[1]); 3184 } 3185 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 3186 fetch_source(mach, &d[TGSI_CHAN_Z], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); 3187 } 3188 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 3189 fetch_source(mach, &d[TGSI_CHAN_W], &inst->Src[1], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); 3190 } 3191 3192 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 3193 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_X); 3194 } 3195 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 3196 store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y); 3197 } 3198 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 3199 store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z); 3200 } 3201 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 3202 store_dest(mach, &d[TGSI_CHAN_W], &inst->Dst[0], inst, TGSI_CHAN_W); 3203 } 3204} 3205 3206static void 3207exec_log(struct tgsi_exec_machine *mach, 3208 const struct tgsi_full_instruction *inst) 3209{ 3210 union tgsi_exec_channel r[3]; 3211 3212 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3213 micro_abs(&r[2], &r[0]); /* r2 = abs(r0) */ 3214 micro_lg2(&r[1], &r[2]); /* r1 = lg2(r2) */ 3215 micro_flr(&r[0], &r[1]); /* r0 = floor(r1) */ 3216 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 3217 store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X); 3218 } 3219 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 3220 micro_exp2(&r[0], &r[0]); /* r0 = 2 ^ r0 */ 3221 micro_div(&r[0], &r[2], &r[0]); /* r0 = r2 / r0 */ 3222 store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_Y); 3223 } 3224 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 3225 store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Z); 3226 } 3227 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 3228 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W); 3229 } 3230} 3231 3232static void 3233exec_exp(struct tgsi_exec_machine *mach, 3234 const struct tgsi_full_instruction *inst) 3235{ 3236 union tgsi_exec_channel r[3]; 3237 3238 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3239 micro_flr(&r[1], &r[0]); /* r1 = floor(r0) */ 3240 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 3241 micro_exp2(&r[2], &r[1]); /* r2 = 2 ^ r1 */ 3242 store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_X); 3243 } 3244 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 3245 micro_sub(&r[2], &r[0], &r[1]); /* r2 = r0 - r1 */ 3246 store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_Y); 3247 } 3248 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 3249 micro_exp2(&r[2], &r[0]); /* r2 = 2 ^ r0 */ 3250 store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_Z); 3251 } 3252 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 3253 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W); 3254 } 3255} 3256 3257static void 3258exec_lit(struct tgsi_exec_machine *mach, 3259 const struct tgsi_full_instruction *inst) 3260{ 3261 union tgsi_exec_channel r[3]; 3262 union tgsi_exec_channel d[3]; 3263 3264 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_YZ) { 3265 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3266 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 3267 fetch_source(mach, &r[1], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3268 micro_max(&r[1], &r[1], &ZeroVec); 3269 3270 fetch_source(mach, &r[2], &inst->Src[0], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); 3271 micro_min(&r[2], &r[2], &P128Vec); 3272 micro_max(&r[2], &r[2], &M128Vec); 3273 micro_pow(&r[1], &r[1], &r[2]); 3274 micro_lt(&d[TGSI_CHAN_Z], &ZeroVec, &r[0], &r[1], &ZeroVec); 3275 store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z); 3276 } 3277 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 3278 micro_max(&d[TGSI_CHAN_Y], &r[0], &ZeroVec); 3279 store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y); 3280 } 3281 } 3282 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 3283 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_X); 3284 } 3285 3286 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 3287 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W); 3288 } 3289} 3290 3291static void 3292exec_break(struct tgsi_exec_machine *mach) 3293{ 3294 if (mach->BreakType == TGSI_EXEC_BREAK_INSIDE_LOOP) { 3295 /* turn off loop channels for each enabled exec channel */ 3296 mach->LoopMask &= ~mach->ExecMask; 3297 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 3298 UPDATE_EXEC_MASK(mach); 3299 } else { 3300 assert(mach->BreakType == TGSI_EXEC_BREAK_INSIDE_SWITCH); 3301 3302 mach->Switch.mask = 0x0; 3303 3304 UPDATE_EXEC_MASK(mach); 3305 } 3306} 3307 3308static void 3309exec_switch(struct tgsi_exec_machine *mach, 3310 const struct tgsi_full_instruction *inst) 3311{ 3312 assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING); 3313 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); 3314 3315 mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch; 3316 fetch_source(mach, &mach->Switch.selector, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT); 3317 mach->Switch.mask = 0x0; 3318 mach->Switch.defaultMask = 0x0; 3319 3320 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; 3321 mach->BreakType = TGSI_EXEC_BREAK_INSIDE_SWITCH; 3322 3323 UPDATE_EXEC_MASK(mach); 3324} 3325 3326static void 3327exec_case(struct tgsi_exec_machine *mach, 3328 const struct tgsi_full_instruction *inst) 3329{ 3330 uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask; 3331 union tgsi_exec_channel src; 3332 uint mask = 0; 3333 3334 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT); 3335 3336 if (mach->Switch.selector.u[0] == src.u[0]) { 3337 mask |= 0x1; 3338 } 3339 if (mach->Switch.selector.u[1] == src.u[1]) { 3340 mask |= 0x2; 3341 } 3342 if (mach->Switch.selector.u[2] == src.u[2]) { 3343 mask |= 0x4; 3344 } 3345 if (mach->Switch.selector.u[3] == src.u[3]) { 3346 mask |= 0x8; 3347 } 3348 3349 mach->Switch.defaultMask |= mask; 3350 3351 mach->Switch.mask |= mask & prevMask; 3352 3353 UPDATE_EXEC_MASK(mach); 3354} 3355 3356/* FIXME: this will only work if default is last */ 3357static void 3358exec_default(struct tgsi_exec_machine *mach) 3359{ 3360 uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask; 3361 3362 mach->Switch.mask |= ~mach->Switch.defaultMask & prevMask; 3363 3364 UPDATE_EXEC_MASK(mach); 3365} 3366 3367static void 3368exec_endswitch(struct tgsi_exec_machine *mach) 3369{ 3370 mach->Switch = mach->SwitchStack[--mach->SwitchStackTop]; 3371 mach->BreakType = mach->BreakStack[--mach->BreakStackTop]; 3372 3373 UPDATE_EXEC_MASK(mach); 3374} 3375 3376typedef void (* micro_dop)(union tgsi_double_channel *dst, 3377 const union tgsi_double_channel *src); 3378 3379typedef void (* micro_dop_sop)(union tgsi_double_channel *dst, 3380 const union tgsi_double_channel *src0, 3381 union tgsi_exec_channel *src1); 3382 3383typedef void (* micro_dop_s)(union tgsi_double_channel *dst, 3384 const union tgsi_exec_channel *src); 3385 3386typedef void (* micro_sop_d)(union tgsi_exec_channel *dst, 3387 const union tgsi_double_channel *src); 3388 3389static void 3390fetch_double_channel(struct tgsi_exec_machine *mach, 3391 union tgsi_double_channel *chan, 3392 const struct tgsi_full_src_register *reg, 3393 uint chan_0, 3394 uint chan_1) 3395{ 3396 union tgsi_exec_channel src[2]; 3397 uint i; 3398 3399 fetch_source_d(mach, &src[0], reg, chan_0); 3400 fetch_source_d(mach, &src[1], reg, chan_1); 3401 3402 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 3403 chan->u[i][0] = src[0].u[i]; 3404 chan->u[i][1] = src[1].u[i]; 3405 } 3406 assert(!reg->Register.Absolute); 3407 assert(!reg->Register.Negate); 3408} 3409 3410static void 3411store_double_channel(struct tgsi_exec_machine *mach, 3412 const union tgsi_double_channel *chan, 3413 const struct tgsi_full_dst_register *reg, 3414 const struct tgsi_full_instruction *inst, 3415 uint chan_0, 3416 uint chan_1) 3417{ 3418 union tgsi_exec_channel dst[2]; 3419 uint i; 3420 union tgsi_double_channel temp; 3421 const uint execmask = mach->ExecMask; 3422 3423 if (!inst->Instruction.Saturate) { 3424 for (i = 0; i < TGSI_QUAD_SIZE; i++) 3425 if (execmask & (1 << i)) { 3426 dst[0].u[i] = chan->u[i][0]; 3427 dst[1].u[i] = chan->u[i][1]; 3428 } 3429 } 3430 else { 3431 for (i = 0; i < TGSI_QUAD_SIZE; i++) 3432 if (execmask & (1 << i)) { 3433 if (chan->d[i] < 0.0 || isnan(chan->d[i])) 3434 temp.d[i] = 0.0; 3435 else if (chan->d[i] > 1.0) 3436 temp.d[i] = 1.0; 3437 else 3438 temp.d[i] = chan->d[i]; 3439 3440 dst[0].u[i] = temp.u[i][0]; 3441 dst[1].u[i] = temp.u[i][1]; 3442 } 3443 } 3444 3445 store_dest_double(mach, &dst[0], reg, chan_0); 3446 if (chan_1 != (unsigned)-1) 3447 store_dest_double(mach, &dst[1], reg, chan_1); 3448} 3449 3450static void 3451exec_double_unary(struct tgsi_exec_machine *mach, 3452 const struct tgsi_full_instruction *inst, 3453 micro_dop op) 3454{ 3455 union tgsi_double_channel src; 3456 union tgsi_double_channel dst; 3457 3458 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) { 3459 fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); 3460 op(&dst, &src); 3461 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); 3462 } 3463 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) { 3464 fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); 3465 op(&dst, &src); 3466 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); 3467 } 3468} 3469 3470static void 3471exec_double_binary(struct tgsi_exec_machine *mach, 3472 const struct tgsi_full_instruction *inst, 3473 micro_dop op, 3474 enum tgsi_exec_datatype dst_datatype) 3475{ 3476 union tgsi_double_channel src[2]; 3477 union tgsi_double_channel dst; 3478 int first_dest_chan, second_dest_chan; 3479 int wmask; 3480 3481 wmask = inst->Dst[0].Register.WriteMask; 3482 /* these are & because of the way DSLT etc store their destinations */ 3483 if (wmask & TGSI_WRITEMASK_XY) { 3484 first_dest_chan = TGSI_CHAN_X; 3485 second_dest_chan = TGSI_CHAN_Y; 3486 if (dst_datatype == TGSI_EXEC_DATA_UINT) { 3487 first_dest_chan = (wmask & TGSI_WRITEMASK_X) ? TGSI_CHAN_X : TGSI_CHAN_Y; 3488 second_dest_chan = -1; 3489 } 3490 3491 fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); 3492 fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_X, TGSI_CHAN_Y); 3493 op(&dst, src); 3494 store_double_channel(mach, &dst, &inst->Dst[0], inst, first_dest_chan, second_dest_chan); 3495 } 3496 3497 if (wmask & TGSI_WRITEMASK_ZW) { 3498 first_dest_chan = TGSI_CHAN_Z; 3499 second_dest_chan = TGSI_CHAN_W; 3500 if (dst_datatype == TGSI_EXEC_DATA_UINT) { 3501 first_dest_chan = (wmask & TGSI_WRITEMASK_Z) ? TGSI_CHAN_Z : TGSI_CHAN_W; 3502 second_dest_chan = -1; 3503 } 3504 3505 fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); 3506 fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_CHAN_W); 3507 op(&dst, src); 3508 store_double_channel(mach, &dst, &inst->Dst[0], inst, first_dest_chan, second_dest_chan); 3509 } 3510} 3511 3512static void 3513exec_double_trinary(struct tgsi_exec_machine *mach, 3514 const struct tgsi_full_instruction *inst, 3515 micro_dop op) 3516{ 3517 union tgsi_double_channel src[3]; 3518 union tgsi_double_channel dst; 3519 3520 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) { 3521 fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); 3522 fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_X, TGSI_CHAN_Y); 3523 fetch_double_channel(mach, &src[2], &inst->Src[2], TGSI_CHAN_X, TGSI_CHAN_Y); 3524 op(&dst, src); 3525 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); 3526 } 3527 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) { 3528 fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); 3529 fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_CHAN_W); 3530 fetch_double_channel(mach, &src[2], &inst->Src[2], TGSI_CHAN_Z, TGSI_CHAN_W); 3531 op(&dst, src); 3532 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); 3533 } 3534} 3535 3536static void 3537exec_dldexp(struct tgsi_exec_machine *mach, 3538 const struct tgsi_full_instruction *inst) 3539{ 3540 union tgsi_double_channel src0; 3541 union tgsi_exec_channel src1; 3542 union tgsi_double_channel dst; 3543 int wmask; 3544 3545 wmask = inst->Dst[0].Register.WriteMask; 3546 if (wmask & TGSI_WRITEMASK_XY) { 3547 fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); 3548 fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_INT); 3549 micro_dldexp(&dst, &src0, &src1); 3550 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); 3551 } 3552 3553 if (wmask & TGSI_WRITEMASK_ZW) { 3554 fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); 3555 fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_INT); 3556 micro_dldexp(&dst, &src0, &src1); 3557 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); 3558 } 3559} 3560 3561static void 3562exec_dfracexp(struct tgsi_exec_machine *mach, 3563 const struct tgsi_full_instruction *inst) 3564{ 3565 union tgsi_double_channel src; 3566 union tgsi_double_channel dst; 3567 union tgsi_exec_channel dst_exp; 3568 3569 fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); 3570 micro_dfracexp(&dst, &dst_exp, &src); 3571 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) 3572 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); 3573 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) 3574 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); 3575 for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3576 if (inst->Dst[1].Register.WriteMask & (1 << chan)) 3577 store_dest(mach, &dst_exp, &inst->Dst[1], inst, chan); 3578 } 3579} 3580 3581static void 3582exec_arg0_64_arg1_32(struct tgsi_exec_machine *mach, 3583 const struct tgsi_full_instruction *inst, 3584 micro_dop_sop op) 3585{ 3586 union tgsi_double_channel src0; 3587 union tgsi_exec_channel src1; 3588 union tgsi_double_channel dst; 3589 int wmask; 3590 3591 wmask = inst->Dst[0].Register.WriteMask; 3592 if (wmask & TGSI_WRITEMASK_XY) { 3593 fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); 3594 fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_INT); 3595 op(&dst, &src0, &src1); 3596 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); 3597 } 3598 3599 if (wmask & TGSI_WRITEMASK_ZW) { 3600 fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); 3601 fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_INT); 3602 op(&dst, &src0, &src1); 3603 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); 3604 } 3605} 3606 3607static int 3608get_image_coord_dim(unsigned tgsi_tex) 3609{ 3610 int dim; 3611 switch (tgsi_tex) { 3612 case TGSI_TEXTURE_BUFFER: 3613 case TGSI_TEXTURE_1D: 3614 dim = 1; 3615 break; 3616 case TGSI_TEXTURE_2D: 3617 case TGSI_TEXTURE_RECT: 3618 case TGSI_TEXTURE_1D_ARRAY: 3619 case TGSI_TEXTURE_2D_MSAA: 3620 dim = 2; 3621 break; 3622 case TGSI_TEXTURE_3D: 3623 case TGSI_TEXTURE_CUBE: 3624 case TGSI_TEXTURE_2D_ARRAY: 3625 case TGSI_TEXTURE_2D_ARRAY_MSAA: 3626 case TGSI_TEXTURE_CUBE_ARRAY: 3627 dim = 3; 3628 break; 3629 default: 3630 assert(!"unknown texture target"); 3631 dim = 0; 3632 break; 3633 } 3634 3635 return dim; 3636} 3637 3638static int 3639get_image_coord_sample(unsigned tgsi_tex) 3640{ 3641 int sample = 0; 3642 switch (tgsi_tex) { 3643 case TGSI_TEXTURE_2D_MSAA: 3644 sample = 3; 3645 break; 3646 case TGSI_TEXTURE_2D_ARRAY_MSAA: 3647 sample = 4; 3648 break; 3649 default: 3650 break; 3651 } 3652 return sample; 3653} 3654 3655static void 3656exec_load_img(struct tgsi_exec_machine *mach, 3657 const struct tgsi_full_instruction *inst) 3658{ 3659 union tgsi_exec_channel r[4], sample_r; 3660 uint unit; 3661 int sample; 3662 int i, j; 3663 int dim; 3664 uint chan; 3665 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 3666 struct tgsi_image_params params; 3667 3668 unit = fetch_sampler_unit(mach, inst, 0); 3669 dim = get_image_coord_dim(inst->Memory.Texture); 3670 sample = get_image_coord_sample(inst->Memory.Texture); 3671 assert(dim <= 3); 3672 3673 params.execmask = mach->ExecMask & mach->NonHelperMask & ~mach->KillMask; 3674 params.unit = unit; 3675 params.tgsi_tex_instr = inst->Memory.Texture; 3676 params.format = inst->Memory.Format; 3677 3678 for (i = 0; i < dim; i++) { 3679 IFETCH(&r[i], 1, TGSI_CHAN_X + i); 3680 } 3681 3682 if (sample) 3683 IFETCH(&sample_r, 1, TGSI_CHAN_X + sample); 3684 3685 mach->Image->load(mach->Image, ¶ms, 3686 r[0].i, r[1].i, r[2].i, sample_r.i, 3687 rgba); 3688 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 3689 r[0].f[j] = rgba[0][j]; 3690 r[1].f[j] = rgba[1][j]; 3691 r[2].f[j] = rgba[2][j]; 3692 r[3].f[j] = rgba[3][j]; 3693 } 3694 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3695 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3696 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan); 3697 } 3698 } 3699} 3700 3701static void 3702exec_load_membuf(struct tgsi_exec_machine *mach, 3703 const struct tgsi_full_instruction *inst) 3704{ 3705 uint32_t unit = fetch_sampler_unit(mach, inst, 0); 3706 3707 uint32_t size; 3708 const char *ptr; 3709 switch (inst->Src[0].Register.File) { 3710 case TGSI_FILE_MEMORY: 3711 ptr = mach->LocalMem; 3712 size = mach->LocalMemSize; 3713 break; 3714 3715 case TGSI_FILE_BUFFER: 3716 ptr = mach->Buffer->lookup(mach->Buffer, unit, &size); 3717 break; 3718 3719 case TGSI_FILE_CONSTANT: 3720 if (unit < ARRAY_SIZE(mach->Consts)) { 3721 ptr = mach->Consts[unit]; 3722 size = mach->ConstsSize[unit]; 3723 } else { 3724 ptr = NULL; 3725 size = 0; 3726 } 3727 break; 3728 3729 default: 3730 unreachable("unsupported TGSI_OPCODE_LOAD file"); 3731 } 3732 3733 union tgsi_exec_channel offset; 3734 IFETCH(&offset, 1, TGSI_CHAN_X); 3735 3736 assert(inst->Dst[0].Register.WriteMask); 3737 uint32_t load_size = util_last_bit(inst->Dst[0].Register.WriteMask) * 4; 3738 3739 union tgsi_exec_channel rgba[TGSI_NUM_CHANNELS]; 3740 memset(&rgba, 0, sizeof(rgba)); 3741 for (int j = 0; j < TGSI_QUAD_SIZE; j++) { 3742 if (size >= load_size && offset.u[j] <= (size - load_size)) { 3743 for (int chan = 0; chan < load_size / 4; chan++) 3744 rgba[chan].u[j] = *(uint32_t *)(ptr + offset.u[j] + chan * 4); 3745 } 3746 } 3747 3748 for (int chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3749 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3750 store_dest(mach, &rgba[chan], &inst->Dst[0], inst, chan); 3751 } 3752 } 3753} 3754 3755static void 3756exec_load(struct tgsi_exec_machine *mach, 3757 const struct tgsi_full_instruction *inst) 3758{ 3759 if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) 3760 exec_load_img(mach, inst); 3761 else 3762 exec_load_membuf(mach, inst); 3763} 3764 3765static uint 3766fetch_store_img_unit(struct tgsi_exec_machine *mach, 3767 const struct tgsi_full_dst_register *dst) 3768{ 3769 uint unit = 0; 3770 int i; 3771 if (dst->Register.Indirect) { 3772 union tgsi_exec_channel indir_index, index2; 3773 const uint execmask = mach->ExecMask; 3774 index2.i[0] = 3775 index2.i[1] = 3776 index2.i[2] = 3777 index2.i[3] = dst->Indirect.Index; 3778 3779 fetch_src_file_channel(mach, 3780 dst->Indirect.File, 3781 dst->Indirect.Swizzle, 3782 &index2, 3783 &ZeroVec, 3784 &indir_index); 3785 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 3786 if (execmask & (1 << i)) { 3787 unit = dst->Register.Index + indir_index.i[i]; 3788 break; 3789 } 3790 } 3791 } else { 3792 unit = dst->Register.Index; 3793 } 3794 return unit; 3795} 3796 3797static void 3798exec_store_img(struct tgsi_exec_machine *mach, 3799 const struct tgsi_full_instruction *inst) 3800{ 3801 union tgsi_exec_channel r[3], sample_r; 3802 union tgsi_exec_channel value[4]; 3803 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 3804 struct tgsi_image_params params; 3805 int dim; 3806 int sample; 3807 int i, j; 3808 uint unit; 3809 unit = fetch_store_img_unit(mach, &inst->Dst[0]); 3810 dim = get_image_coord_dim(inst->Memory.Texture); 3811 sample = get_image_coord_sample(inst->Memory.Texture); 3812 assert(dim <= 3); 3813 3814 params.execmask = mach->ExecMask & mach->NonHelperMask & ~mach->KillMask; 3815 params.unit = unit; 3816 params.tgsi_tex_instr = inst->Memory.Texture; 3817 params.format = inst->Memory.Format; 3818 3819 for (i = 0; i < dim; i++) { 3820 IFETCH(&r[i], 0, TGSI_CHAN_X + i); 3821 } 3822 3823 for (i = 0; i < 4; i++) { 3824 FETCH(&value[i], 1, TGSI_CHAN_X + i); 3825 } 3826 if (sample) 3827 IFETCH(&sample_r, 0, TGSI_CHAN_X + sample); 3828 3829 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 3830 rgba[0][j] = value[0].f[j]; 3831 rgba[1][j] = value[1].f[j]; 3832 rgba[2][j] = value[2].f[j]; 3833 rgba[3][j] = value[3].f[j]; 3834 } 3835 3836 mach->Image->store(mach->Image, ¶ms, 3837 r[0].i, r[1].i, r[2].i, sample_r.i, 3838 rgba); 3839} 3840 3841 3842static void 3843exec_store_membuf(struct tgsi_exec_machine *mach, 3844 const struct tgsi_full_instruction *inst) 3845{ 3846 uint32_t unit = fetch_store_img_unit(mach, &inst->Dst[0]); 3847 uint32_t size; 3848 3849 int execmask = mach->ExecMask & mach->NonHelperMask & ~mach->KillMask; 3850 3851 const char *ptr; 3852 switch (inst->Dst[0].Register.File) { 3853 case TGSI_FILE_MEMORY: 3854 ptr = mach->LocalMem; 3855 size = mach->LocalMemSize; 3856 break; 3857 3858 case TGSI_FILE_BUFFER: 3859 ptr = mach->Buffer->lookup(mach->Buffer, unit, &size); 3860 break; 3861 3862 default: 3863 unreachable("unsupported TGSI_OPCODE_STORE file"); 3864 } 3865 3866 union tgsi_exec_channel offset; 3867 IFETCH(&offset, 0, TGSI_CHAN_X); 3868 3869 union tgsi_exec_channel value[4]; 3870 for (int i = 0; i < 4; i++) 3871 FETCH(&value[i], 1, TGSI_CHAN_X + i); 3872 3873 for (int j = 0; j < TGSI_QUAD_SIZE; j++) { 3874 if (!(execmask & (1 << j))) 3875 continue; 3876 if (size < offset.u[j]) 3877 continue; 3878 3879 uint32_t *invocation_ptr = (uint32_t *)(ptr + offset.u[j]); 3880 uint32_t size_avail = size - offset.u[j]; 3881 3882 for (int chan = 0; chan < MIN2(4, size_avail / 4); chan++) { 3883 if (inst->Dst[0].Register.WriteMask & (1 << chan)) 3884 memcpy(&invocation_ptr[chan], &value[chan].u[j], 4); 3885 } 3886 } 3887} 3888 3889static void 3890exec_store(struct tgsi_exec_machine *mach, 3891 const struct tgsi_full_instruction *inst) 3892{ 3893 if (inst->Dst[0].Register.File == TGSI_FILE_IMAGE) 3894 exec_store_img(mach, inst); 3895 else 3896 exec_store_membuf(mach, inst); 3897} 3898 3899static void 3900exec_atomop_img(struct tgsi_exec_machine *mach, 3901 const struct tgsi_full_instruction *inst) 3902{ 3903 union tgsi_exec_channel r[4], sample_r; 3904 union tgsi_exec_channel value[4], value2[4]; 3905 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 3906 float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 3907 struct tgsi_image_params params; 3908 int dim; 3909 int sample; 3910 int i, j; 3911 uint unit, chan; 3912 unit = fetch_sampler_unit(mach, inst, 0); 3913 dim = get_image_coord_dim(inst->Memory.Texture); 3914 sample = get_image_coord_sample(inst->Memory.Texture); 3915 assert(dim <= 3); 3916 3917 params.execmask = mach->ExecMask & mach->NonHelperMask & ~mach->KillMask; 3918 params.unit = unit; 3919 params.tgsi_tex_instr = inst->Memory.Texture; 3920 params.format = inst->Memory.Format; 3921 3922 for (i = 0; i < dim; i++) { 3923 IFETCH(&r[i], 1, TGSI_CHAN_X + i); 3924 } 3925 3926 for (i = 0; i < 4; i++) { 3927 FETCH(&value[i], 2, TGSI_CHAN_X + i); 3928 if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) 3929 FETCH(&value2[i], 3, TGSI_CHAN_X + i); 3930 } 3931 if (sample) 3932 IFETCH(&sample_r, 1, TGSI_CHAN_X + sample); 3933 3934 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 3935 rgba[0][j] = value[0].f[j]; 3936 rgba[1][j] = value[1].f[j]; 3937 rgba[2][j] = value[2].f[j]; 3938 rgba[3][j] = value[3].f[j]; 3939 } 3940 if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) { 3941 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 3942 rgba2[0][j] = value2[0].f[j]; 3943 rgba2[1][j] = value2[1].f[j]; 3944 rgba2[2][j] = value2[2].f[j]; 3945 rgba2[3][j] = value2[3].f[j]; 3946 } 3947 } 3948 3949 mach->Image->op(mach->Image, ¶ms, inst->Instruction.Opcode, 3950 r[0].i, r[1].i, r[2].i, sample_r.i, 3951 rgba, rgba2); 3952 3953 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 3954 r[0].f[j] = rgba[0][j]; 3955 r[1].f[j] = rgba[1][j]; 3956 r[2].f[j] = rgba[2][j]; 3957 r[3].f[j] = rgba[3][j]; 3958 } 3959 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3960 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3961 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan); 3962 } 3963 } 3964} 3965 3966static void 3967exec_atomop_membuf(struct tgsi_exec_machine *mach, 3968 const struct tgsi_full_instruction *inst) 3969{ 3970 union tgsi_exec_channel offset, r0, r1; 3971 uint chan, i; 3972 int execmask = mach->ExecMask & mach->NonHelperMask & ~mach->KillMask; 3973 IFETCH(&offset, 1, TGSI_CHAN_X); 3974 3975 if (!(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X)) 3976 return; 3977 3978 void *ptr[TGSI_QUAD_SIZE]; 3979 if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) { 3980 uint32_t unit = fetch_sampler_unit(mach, inst, 0); 3981 uint32_t size; 3982 char *buffer = mach->Buffer->lookup(mach->Buffer, unit, &size); 3983 for (int i = 0; i < TGSI_QUAD_SIZE; i++) { 3984 if (likely(size >= 4 && offset.u[i] <= size - 4)) 3985 ptr[i] = buffer + offset.u[i]; 3986 else 3987 ptr[i] = NULL; 3988 } 3989 } else { 3990 assert(inst->Src[0].Register.File == TGSI_FILE_MEMORY); 3991 3992 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 3993 if (likely(mach->LocalMemSize >= 4 && offset.u[i] <= mach->LocalMemSize - 4)) 3994 ptr[i] = (char *)mach->LocalMem + offset.u[i]; 3995 else 3996 ptr[i] = NULL; 3997 } 3998 } 3999 4000 FETCH(&r0, 2, TGSI_CHAN_X); 4001 if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) 4002 FETCH(&r1, 3, TGSI_CHAN_X); 4003 4004 /* The load/op/store sequence has to happen inside the loop since ptr 4005 * may have the same ptr in some of the invocations. 4006 */ 4007 for (int i = 0; i < TGSI_QUAD_SIZE; i++) { 4008 if (!(execmask & (1 << i))) 4009 continue; 4010 4011 uint32_t val = 0; 4012 if (ptr[i]) { 4013 memcpy(&val, ptr[i], sizeof(val)); 4014 4015 uint32_t result; 4016 switch (inst->Instruction.Opcode) { 4017 case TGSI_OPCODE_ATOMUADD: 4018 result = val + r0.u[i]; 4019 break; 4020 case TGSI_OPCODE_ATOMXOR: 4021 result = val ^ r0.u[i]; 4022 break; 4023 case TGSI_OPCODE_ATOMOR: 4024 result = val | r0.u[i]; 4025 break; 4026 case TGSI_OPCODE_ATOMAND: 4027 result = val & r0.u[i]; 4028 break; 4029 case TGSI_OPCODE_ATOMUMIN: 4030 result = MIN2(val, r0.u[i]); 4031 break; 4032 case TGSI_OPCODE_ATOMUMAX: 4033 result = MAX2(val, r0.u[i]); 4034 break; 4035 case TGSI_OPCODE_ATOMIMIN: 4036 result = MIN2((int32_t)val, r0.i[i]); 4037 break; 4038 case TGSI_OPCODE_ATOMIMAX: 4039 result = MAX2((int32_t)val, r0.i[i]); 4040 break; 4041 case TGSI_OPCODE_ATOMXCHG: 4042 result = r0.u[i]; 4043 break; 4044 case TGSI_OPCODE_ATOMCAS: 4045 if (val == r0.u[i]) 4046 result = r1.u[i]; 4047 else 4048 result = val; 4049 break; 4050 case TGSI_OPCODE_ATOMFADD: 4051 result = fui(uif(val) + r0.f[i]); 4052 break; 4053 default: 4054 unreachable("bad atomic op"); 4055 } 4056 memcpy(ptr[i], &result, sizeof(result)); 4057 } 4058 4059 r0.u[i] = val; 4060 } 4061 4062 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) 4063 store_dest(mach, &r0, &inst->Dst[0], inst, chan); 4064} 4065 4066static void 4067exec_atomop(struct tgsi_exec_machine *mach, 4068 const struct tgsi_full_instruction *inst) 4069{ 4070 if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) 4071 exec_atomop_img(mach, inst); 4072 else 4073 exec_atomop_membuf(mach, inst); 4074} 4075 4076static void 4077exec_resq_img(struct tgsi_exec_machine *mach, 4078 const struct tgsi_full_instruction *inst) 4079{ 4080 int result[4]; 4081 union tgsi_exec_channel r[4]; 4082 uint unit; 4083 int i, chan, j; 4084 struct tgsi_image_params params; 4085 4086 unit = fetch_sampler_unit(mach, inst, 0); 4087 4088 params.execmask = mach->ExecMask & mach->NonHelperMask & ~mach->KillMask; 4089 params.unit = unit; 4090 params.tgsi_tex_instr = inst->Memory.Texture; 4091 params.format = inst->Memory.Format; 4092 4093 mach->Image->get_dims(mach->Image, ¶ms, result); 4094 4095 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 4096 for (j = 0; j < 4; j++) { 4097 r[j].i[i] = result[j]; 4098 } 4099 } 4100 4101 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 4102 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 4103 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan); 4104 } 4105 } 4106} 4107 4108static void 4109exec_resq_buf(struct tgsi_exec_machine *mach, 4110 const struct tgsi_full_instruction *inst) 4111{ 4112 uint32_t unit = fetch_sampler_unit(mach, inst, 0); 4113 uint32_t size; 4114 (void)mach->Buffer->lookup(mach->Buffer, unit, &size); 4115 4116 union tgsi_exec_channel r; 4117 for (int i = 0; i < TGSI_QUAD_SIZE; i++) 4118 r.i[i] = size; 4119 4120 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 4121 for (int chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 4122 store_dest(mach, &r, &inst->Dst[0], inst, TGSI_CHAN_X); 4123 } 4124 } 4125} 4126 4127static void 4128exec_resq(struct tgsi_exec_machine *mach, 4129 const struct tgsi_full_instruction *inst) 4130{ 4131 if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) 4132 exec_resq_img(mach, inst); 4133 else 4134 exec_resq_buf(mach, inst); 4135} 4136 4137static void 4138micro_f2u64(union tgsi_double_channel *dst, 4139 const union tgsi_exec_channel *src) 4140{ 4141 dst->u64[0] = (uint64_t)src->f[0]; 4142 dst->u64[1] = (uint64_t)src->f[1]; 4143 dst->u64[2] = (uint64_t)src->f[2]; 4144 dst->u64[3] = (uint64_t)src->f[3]; 4145} 4146 4147static void 4148micro_f2i64(union tgsi_double_channel *dst, 4149 const union tgsi_exec_channel *src) 4150{ 4151 dst->i64[0] = (int64_t)src->f[0]; 4152 dst->i64[1] = (int64_t)src->f[1]; 4153 dst->i64[2] = (int64_t)src->f[2]; 4154 dst->i64[3] = (int64_t)src->f[3]; 4155} 4156 4157static void 4158micro_u2i64(union tgsi_double_channel *dst, 4159 const union tgsi_exec_channel *src) 4160{ 4161 dst->u64[0] = (uint64_t)src->u[0]; 4162 dst->u64[1] = (uint64_t)src->u[1]; 4163 dst->u64[2] = (uint64_t)src->u[2]; 4164 dst->u64[3] = (uint64_t)src->u[3]; 4165} 4166 4167static void 4168micro_i2i64(union tgsi_double_channel *dst, 4169 const union tgsi_exec_channel *src) 4170{ 4171 dst->i64[0] = (int64_t)src->i[0]; 4172 dst->i64[1] = (int64_t)src->i[1]; 4173 dst->i64[2] = (int64_t)src->i[2]; 4174 dst->i64[3] = (int64_t)src->i[3]; 4175} 4176 4177static void 4178micro_d2u64(union tgsi_double_channel *dst, 4179 const union tgsi_double_channel *src) 4180{ 4181 dst->u64[0] = (uint64_t)src->d[0]; 4182 dst->u64[1] = (uint64_t)src->d[1]; 4183 dst->u64[2] = (uint64_t)src->d[2]; 4184 dst->u64[3] = (uint64_t)src->d[3]; 4185} 4186 4187static void 4188micro_d2i64(union tgsi_double_channel *dst, 4189 const union tgsi_double_channel *src) 4190{ 4191 dst->i64[0] = (int64_t)src->d[0]; 4192 dst->i64[1] = (int64_t)src->d[1]; 4193 dst->i64[2] = (int64_t)src->d[2]; 4194 dst->i64[3] = (int64_t)src->d[3]; 4195} 4196 4197static void 4198micro_u642d(union tgsi_double_channel *dst, 4199 const union tgsi_double_channel *src) 4200{ 4201 dst->d[0] = (double)src->u64[0]; 4202 dst->d[1] = (double)src->u64[1]; 4203 dst->d[2] = (double)src->u64[2]; 4204 dst->d[3] = (double)src->u64[3]; 4205} 4206 4207static void 4208micro_i642d(union tgsi_double_channel *dst, 4209 const union tgsi_double_channel *src) 4210{ 4211 dst->d[0] = (double)src->i64[0]; 4212 dst->d[1] = (double)src->i64[1]; 4213 dst->d[2] = (double)src->i64[2]; 4214 dst->d[3] = (double)src->i64[3]; 4215} 4216 4217static void 4218micro_u642f(union tgsi_exec_channel *dst, 4219 const union tgsi_double_channel *src) 4220{ 4221 dst->f[0] = (float)src->u64[0]; 4222 dst->f[1] = (float)src->u64[1]; 4223 dst->f[2] = (float)src->u64[2]; 4224 dst->f[3] = (float)src->u64[3]; 4225} 4226 4227static void 4228micro_i642f(union tgsi_exec_channel *dst, 4229 const union tgsi_double_channel *src) 4230{ 4231 dst->f[0] = (float)src->i64[0]; 4232 dst->f[1] = (float)src->i64[1]; 4233 dst->f[2] = (float)src->i64[2]; 4234 dst->f[3] = (float)src->i64[3]; 4235} 4236 4237static void 4238exec_t_2_64(struct tgsi_exec_machine *mach, 4239 const struct tgsi_full_instruction *inst, 4240 micro_dop_s op, 4241 enum tgsi_exec_datatype src_datatype) 4242{ 4243 union tgsi_exec_channel src; 4244 union tgsi_double_channel dst; 4245 4246 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) { 4247 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, src_datatype); 4248 op(&dst, &src); 4249 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); 4250 } 4251 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) { 4252 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_Y, src_datatype); 4253 op(&dst, &src); 4254 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); 4255 } 4256} 4257 4258static void 4259exec_64_2_t(struct tgsi_exec_machine *mach, 4260 const struct tgsi_full_instruction *inst, 4261 micro_sop_d op) 4262{ 4263 union tgsi_double_channel src; 4264 union tgsi_exec_channel dst; 4265 int wm = inst->Dst[0].Register.WriteMask; 4266 int i; 4267 int bit; 4268 for (i = 0; i < 2; i++) { 4269 bit = ffs(wm); 4270 if (bit) { 4271 wm &= ~(1 << (bit - 1)); 4272 if (i == 0) 4273 fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); 4274 else 4275 fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); 4276 op(&dst, &src); 4277 store_dest(mach, &dst, &inst->Dst[0], inst, bit - 1); 4278 } 4279 } 4280} 4281 4282static void 4283micro_i2f(union tgsi_exec_channel *dst, 4284 const union tgsi_exec_channel *src) 4285{ 4286 dst->f[0] = (float)src->i[0]; 4287 dst->f[1] = (float)src->i[1]; 4288 dst->f[2] = (float)src->i[2]; 4289 dst->f[3] = (float)src->i[3]; 4290} 4291 4292static void 4293micro_not(union tgsi_exec_channel *dst, 4294 const union tgsi_exec_channel *src) 4295{ 4296 dst->u[0] = ~src->u[0]; 4297 dst->u[1] = ~src->u[1]; 4298 dst->u[2] = ~src->u[2]; 4299 dst->u[3] = ~src->u[3]; 4300} 4301 4302static void 4303micro_shl(union tgsi_exec_channel *dst, 4304 const union tgsi_exec_channel *src0, 4305 const union tgsi_exec_channel *src1) 4306{ 4307 unsigned masked_count; 4308 masked_count = src1->u[0] & 0x1f; 4309 dst->u[0] = src0->u[0] << masked_count; 4310 masked_count = src1->u[1] & 0x1f; 4311 dst->u[1] = src0->u[1] << masked_count; 4312 masked_count = src1->u[2] & 0x1f; 4313 dst->u[2] = src0->u[2] << masked_count; 4314 masked_count = src1->u[3] & 0x1f; 4315 dst->u[3] = src0->u[3] << masked_count; 4316} 4317 4318static void 4319micro_and(union tgsi_exec_channel *dst, 4320 const union tgsi_exec_channel *src0, 4321 const union tgsi_exec_channel *src1) 4322{ 4323 dst->u[0] = src0->u[0] & src1->u[0]; 4324 dst->u[1] = src0->u[1] & src1->u[1]; 4325 dst->u[2] = src0->u[2] & src1->u[2]; 4326 dst->u[3] = src0->u[3] & src1->u[3]; 4327} 4328 4329static void 4330micro_or(union tgsi_exec_channel *dst, 4331 const union tgsi_exec_channel *src0, 4332 const union tgsi_exec_channel *src1) 4333{ 4334 dst->u[0] = src0->u[0] | src1->u[0]; 4335 dst->u[1] = src0->u[1] | src1->u[1]; 4336 dst->u[2] = src0->u[2] | src1->u[2]; 4337 dst->u[3] = src0->u[3] | src1->u[3]; 4338} 4339 4340static void 4341micro_xor(union tgsi_exec_channel *dst, 4342 const union tgsi_exec_channel *src0, 4343 const union tgsi_exec_channel *src1) 4344{ 4345 dst->u[0] = src0->u[0] ^ src1->u[0]; 4346 dst->u[1] = src0->u[1] ^ src1->u[1]; 4347 dst->u[2] = src0->u[2] ^ src1->u[2]; 4348 dst->u[3] = src0->u[3] ^ src1->u[3]; 4349} 4350 4351static void 4352micro_mod(union tgsi_exec_channel *dst, 4353 const union tgsi_exec_channel *src0, 4354 const union tgsi_exec_channel *src1) 4355{ 4356 dst->i[0] = src1->i[0] ? src0->i[0] % src1->i[0] : ~0; 4357 dst->i[1] = src1->i[1] ? src0->i[1] % src1->i[1] : ~0; 4358 dst->i[2] = src1->i[2] ? src0->i[2] % src1->i[2] : ~0; 4359 dst->i[3] = src1->i[3] ? src0->i[3] % src1->i[3] : ~0; 4360} 4361 4362static void 4363micro_f2i(union tgsi_exec_channel *dst, 4364 const union tgsi_exec_channel *src) 4365{ 4366 dst->i[0] = (int)src->f[0]; 4367 dst->i[1] = (int)src->f[1]; 4368 dst->i[2] = (int)src->f[2]; 4369 dst->i[3] = (int)src->f[3]; 4370} 4371 4372static void 4373micro_fseq(union tgsi_exec_channel *dst, 4374 const union tgsi_exec_channel *src0, 4375 const union tgsi_exec_channel *src1) 4376{ 4377 dst->u[0] = src0->f[0] == src1->f[0] ? ~0 : 0; 4378 dst->u[1] = src0->f[1] == src1->f[1] ? ~0 : 0; 4379 dst->u[2] = src0->f[2] == src1->f[2] ? ~0 : 0; 4380 dst->u[3] = src0->f[3] == src1->f[3] ? ~0 : 0; 4381} 4382 4383static void 4384micro_fsge(union tgsi_exec_channel *dst, 4385 const union tgsi_exec_channel *src0, 4386 const union tgsi_exec_channel *src1) 4387{ 4388 dst->u[0] = src0->f[0] >= src1->f[0] ? ~0 : 0; 4389 dst->u[1] = src0->f[1] >= src1->f[1] ? ~0 : 0; 4390 dst->u[2] = src0->f[2] >= src1->f[2] ? ~0 : 0; 4391 dst->u[3] = src0->f[3] >= src1->f[3] ? ~0 : 0; 4392} 4393 4394static void 4395micro_fslt(union tgsi_exec_channel *dst, 4396 const union tgsi_exec_channel *src0, 4397 const union tgsi_exec_channel *src1) 4398{ 4399 dst->u[0] = src0->f[0] < src1->f[0] ? ~0 : 0; 4400 dst->u[1] = src0->f[1] < src1->f[1] ? ~0 : 0; 4401 dst->u[2] = src0->f[2] < src1->f[2] ? ~0 : 0; 4402 dst->u[3] = src0->f[3] < src1->f[3] ? ~0 : 0; 4403} 4404 4405static void 4406micro_fsne(union tgsi_exec_channel *dst, 4407 const union tgsi_exec_channel *src0, 4408 const union tgsi_exec_channel *src1) 4409{ 4410 dst->u[0] = src0->f[0] != src1->f[0] ? ~0 : 0; 4411 dst->u[1] = src0->f[1] != src1->f[1] ? ~0 : 0; 4412 dst->u[2] = src0->f[2] != src1->f[2] ? ~0 : 0; 4413 dst->u[3] = src0->f[3] != src1->f[3] ? ~0 : 0; 4414} 4415 4416static void 4417micro_idiv(union tgsi_exec_channel *dst, 4418 const union tgsi_exec_channel *src0, 4419 const union tgsi_exec_channel *src1) 4420{ 4421 dst->i[0] = src1->i[0] ? src0->i[0] / src1->i[0] : 0; 4422 dst->i[1] = src1->i[1] ? src0->i[1] / src1->i[1] : 0; 4423 dst->i[2] = src1->i[2] ? src0->i[2] / src1->i[2] : 0; 4424 dst->i[3] = src1->i[3] ? src0->i[3] / src1->i[3] : 0; 4425} 4426 4427static void 4428micro_imax(union tgsi_exec_channel *dst, 4429 const union tgsi_exec_channel *src0, 4430 const union tgsi_exec_channel *src1) 4431{ 4432 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0]; 4433 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1]; 4434 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2]; 4435 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3]; 4436} 4437 4438static void 4439micro_imin(union tgsi_exec_channel *dst, 4440 const union tgsi_exec_channel *src0, 4441 const union tgsi_exec_channel *src1) 4442{ 4443 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0]; 4444 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1]; 4445 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2]; 4446 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3]; 4447} 4448 4449static void 4450micro_isge(union tgsi_exec_channel *dst, 4451 const union tgsi_exec_channel *src0, 4452 const union tgsi_exec_channel *src1) 4453{ 4454 dst->i[0] = src0->i[0] >= src1->i[0] ? -1 : 0; 4455 dst->i[1] = src0->i[1] >= src1->i[1] ? -1 : 0; 4456 dst->i[2] = src0->i[2] >= src1->i[2] ? -1 : 0; 4457 dst->i[3] = src0->i[3] >= src1->i[3] ? -1 : 0; 4458} 4459 4460static void 4461micro_ishr(union tgsi_exec_channel *dst, 4462 const union tgsi_exec_channel *src0, 4463 const union tgsi_exec_channel *src1) 4464{ 4465 unsigned masked_count; 4466 masked_count = src1->i[0] & 0x1f; 4467 dst->i[0] = src0->i[0] >> masked_count; 4468 masked_count = src1->i[1] & 0x1f; 4469 dst->i[1] = src0->i[1] >> masked_count; 4470 masked_count = src1->i[2] & 0x1f; 4471 dst->i[2] = src0->i[2] >> masked_count; 4472 masked_count = src1->i[3] & 0x1f; 4473 dst->i[3] = src0->i[3] >> masked_count; 4474} 4475 4476static void 4477micro_islt(union tgsi_exec_channel *dst, 4478 const union tgsi_exec_channel *src0, 4479 const union tgsi_exec_channel *src1) 4480{ 4481 dst->i[0] = src0->i[0] < src1->i[0] ? -1 : 0; 4482 dst->i[1] = src0->i[1] < src1->i[1] ? -1 : 0; 4483 dst->i[2] = src0->i[2] < src1->i[2] ? -1 : 0; 4484 dst->i[3] = src0->i[3] < src1->i[3] ? -1 : 0; 4485} 4486 4487static void 4488micro_f2u(union tgsi_exec_channel *dst, 4489 const union tgsi_exec_channel *src) 4490{ 4491 dst->u[0] = (uint)src->f[0]; 4492 dst->u[1] = (uint)src->f[1]; 4493 dst->u[2] = (uint)src->f[2]; 4494 dst->u[3] = (uint)src->f[3]; 4495} 4496 4497static void 4498micro_u2f(union tgsi_exec_channel *dst, 4499 const union tgsi_exec_channel *src) 4500{ 4501 dst->f[0] = (float)src->u[0]; 4502 dst->f[1] = (float)src->u[1]; 4503 dst->f[2] = (float)src->u[2]; 4504 dst->f[3] = (float)src->u[3]; 4505} 4506 4507static void 4508micro_uadd(union tgsi_exec_channel *dst, 4509 const union tgsi_exec_channel *src0, 4510 const union tgsi_exec_channel *src1) 4511{ 4512 dst->u[0] = src0->u[0] + src1->u[0]; 4513 dst->u[1] = src0->u[1] + src1->u[1]; 4514 dst->u[2] = src0->u[2] + src1->u[2]; 4515 dst->u[3] = src0->u[3] + src1->u[3]; 4516} 4517 4518static void 4519micro_udiv(union tgsi_exec_channel *dst, 4520 const union tgsi_exec_channel *src0, 4521 const union tgsi_exec_channel *src1) 4522{ 4523 dst->u[0] = src1->u[0] ? src0->u[0] / src1->u[0] : ~0u; 4524 dst->u[1] = src1->u[1] ? src0->u[1] / src1->u[1] : ~0u; 4525 dst->u[2] = src1->u[2] ? src0->u[2] / src1->u[2] : ~0u; 4526 dst->u[3] = src1->u[3] ? src0->u[3] / src1->u[3] : ~0u; 4527} 4528 4529static void 4530micro_umad(union tgsi_exec_channel *dst, 4531 const union tgsi_exec_channel *src0, 4532 const union tgsi_exec_channel *src1, 4533 const union tgsi_exec_channel *src2) 4534{ 4535 dst->u[0] = src0->u[0] * src1->u[0] + src2->u[0]; 4536 dst->u[1] = src0->u[1] * src1->u[1] + src2->u[1]; 4537 dst->u[2] = src0->u[2] * src1->u[2] + src2->u[2]; 4538 dst->u[3] = src0->u[3] * src1->u[3] + src2->u[3]; 4539} 4540 4541static void 4542micro_umax(union tgsi_exec_channel *dst, 4543 const union tgsi_exec_channel *src0, 4544 const union tgsi_exec_channel *src1) 4545{ 4546 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0]; 4547 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1]; 4548 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2]; 4549 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3]; 4550} 4551 4552static void 4553micro_umin(union tgsi_exec_channel *dst, 4554 const union tgsi_exec_channel *src0, 4555 const union tgsi_exec_channel *src1) 4556{ 4557 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0]; 4558 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1]; 4559 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2]; 4560 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3]; 4561} 4562 4563static void 4564micro_umod(union tgsi_exec_channel *dst, 4565 const union tgsi_exec_channel *src0, 4566 const union tgsi_exec_channel *src1) 4567{ 4568 dst->u[0] = src1->u[0] ? src0->u[0] % src1->u[0] : ~0u; 4569 dst->u[1] = src1->u[1] ? src0->u[1] % src1->u[1] : ~0u; 4570 dst->u[2] = src1->u[2] ? src0->u[2] % src1->u[2] : ~0u; 4571 dst->u[3] = src1->u[3] ? src0->u[3] % src1->u[3] : ~0u; 4572} 4573 4574static void 4575micro_umul(union tgsi_exec_channel *dst, 4576 const union tgsi_exec_channel *src0, 4577 const union tgsi_exec_channel *src1) 4578{ 4579 dst->u[0] = src0->u[0] * src1->u[0]; 4580 dst->u[1] = src0->u[1] * src1->u[1]; 4581 dst->u[2] = src0->u[2] * src1->u[2]; 4582 dst->u[3] = src0->u[3] * src1->u[3]; 4583} 4584 4585static void 4586micro_imul_hi(union tgsi_exec_channel *dst, 4587 const union tgsi_exec_channel *src0, 4588 const union tgsi_exec_channel *src1) 4589{ 4590#define I64M(x, y) ((((int64_t)x) * ((int64_t)y)) >> 32) 4591 dst->i[0] = I64M(src0->i[0], src1->i[0]); 4592 dst->i[1] = I64M(src0->i[1], src1->i[1]); 4593 dst->i[2] = I64M(src0->i[2], src1->i[2]); 4594 dst->i[3] = I64M(src0->i[3], src1->i[3]); 4595#undef I64M 4596} 4597 4598static void 4599micro_umul_hi(union tgsi_exec_channel *dst, 4600 const union tgsi_exec_channel *src0, 4601 const union tgsi_exec_channel *src1) 4602{ 4603#define U64M(x, y) ((((uint64_t)x) * ((uint64_t)y)) >> 32) 4604 dst->u[0] = U64M(src0->u[0], src1->u[0]); 4605 dst->u[1] = U64M(src0->u[1], src1->u[1]); 4606 dst->u[2] = U64M(src0->u[2], src1->u[2]); 4607 dst->u[3] = U64M(src0->u[3], src1->u[3]); 4608#undef U64M 4609} 4610 4611static void 4612micro_useq(union tgsi_exec_channel *dst, 4613 const union tgsi_exec_channel *src0, 4614 const union tgsi_exec_channel *src1) 4615{ 4616 dst->u[0] = src0->u[0] == src1->u[0] ? ~0 : 0; 4617 dst->u[1] = src0->u[1] == src1->u[1] ? ~0 : 0; 4618 dst->u[2] = src0->u[2] == src1->u[2] ? ~0 : 0; 4619 dst->u[3] = src0->u[3] == src1->u[3] ? ~0 : 0; 4620} 4621 4622static void 4623micro_usge(union tgsi_exec_channel *dst, 4624 const union tgsi_exec_channel *src0, 4625 const union tgsi_exec_channel *src1) 4626{ 4627 dst->u[0] = src0->u[0] >= src1->u[0] ? ~0 : 0; 4628 dst->u[1] = src0->u[1] >= src1->u[1] ? ~0 : 0; 4629 dst->u[2] = src0->u[2] >= src1->u[2] ? ~0 : 0; 4630 dst->u[3] = src0->u[3] >= src1->u[3] ? ~0 : 0; 4631} 4632 4633static void 4634micro_ushr(union tgsi_exec_channel *dst, 4635 const union tgsi_exec_channel *src0, 4636 const union tgsi_exec_channel *src1) 4637{ 4638 unsigned masked_count; 4639 masked_count = src1->u[0] & 0x1f; 4640 dst->u[0] = src0->u[0] >> masked_count; 4641 masked_count = src1->u[1] & 0x1f; 4642 dst->u[1] = src0->u[1] >> masked_count; 4643 masked_count = src1->u[2] & 0x1f; 4644 dst->u[2] = src0->u[2] >> masked_count; 4645 masked_count = src1->u[3] & 0x1f; 4646 dst->u[3] = src0->u[3] >> masked_count; 4647} 4648 4649static void 4650micro_uslt(union tgsi_exec_channel *dst, 4651 const union tgsi_exec_channel *src0, 4652 const union tgsi_exec_channel *src1) 4653{ 4654 dst->u[0] = src0->u[0] < src1->u[0] ? ~0 : 0; 4655 dst->u[1] = src0->u[1] < src1->u[1] ? ~0 : 0; 4656 dst->u[2] = src0->u[2] < src1->u[2] ? ~0 : 0; 4657 dst->u[3] = src0->u[3] < src1->u[3] ? ~0 : 0; 4658} 4659 4660static void 4661micro_usne(union tgsi_exec_channel *dst, 4662 const union tgsi_exec_channel *src0, 4663 const union tgsi_exec_channel *src1) 4664{ 4665 dst->u[0] = src0->u[0] != src1->u[0] ? ~0 : 0; 4666 dst->u[1] = src0->u[1] != src1->u[1] ? ~0 : 0; 4667 dst->u[2] = src0->u[2] != src1->u[2] ? ~0 : 0; 4668 dst->u[3] = src0->u[3] != src1->u[3] ? ~0 : 0; 4669} 4670 4671static void 4672micro_uarl(union tgsi_exec_channel *dst, 4673 const union tgsi_exec_channel *src) 4674{ 4675 dst->i[0] = src->u[0]; 4676 dst->i[1] = src->u[1]; 4677 dst->i[2] = src->u[2]; 4678 dst->i[3] = src->u[3]; 4679} 4680 4681/** 4682 * Signed bitfield extract (i.e. sign-extend the extracted bits) 4683 */ 4684static void 4685micro_ibfe(union tgsi_exec_channel *dst, 4686 const union tgsi_exec_channel *src0, 4687 const union tgsi_exec_channel *src1, 4688 const union tgsi_exec_channel *src2) 4689{ 4690 int i; 4691 for (i = 0; i < 4; i++) { 4692 int width = src2->i[i]; 4693 int offset = src1->i[i] & 0x1f; 4694 if (width == 32 && offset == 0) { 4695 dst->i[i] = src0->i[i]; 4696 continue; 4697 } 4698 width &= 0x1f; 4699 if (width == 0) 4700 dst->i[i] = 0; 4701 else if (width + offset < 32) 4702 dst->i[i] = (src0->i[i] << (32 - width - offset)) >> (32 - width); 4703 else 4704 dst->i[i] = src0->i[i] >> offset; 4705 } 4706} 4707 4708/** 4709 * Unsigned bitfield extract 4710 */ 4711static void 4712micro_ubfe(union tgsi_exec_channel *dst, 4713 const union tgsi_exec_channel *src0, 4714 const union tgsi_exec_channel *src1, 4715 const union tgsi_exec_channel *src2) 4716{ 4717 int i; 4718 for (i = 0; i < 4; i++) { 4719 int width = src2->u[i]; 4720 int offset = src1->u[i] & 0x1f; 4721 if (width == 32 && offset == 0) { 4722 dst->u[i] = src0->u[i]; 4723 continue; 4724 } 4725 width &= 0x1f; 4726 if (width == 0) 4727 dst->u[i] = 0; 4728 else if (width + offset < 32) 4729 dst->u[i] = (src0->u[i] << (32 - width - offset)) >> (32 - width); 4730 else 4731 dst->u[i] = src0->u[i] >> offset; 4732 } 4733} 4734 4735/** 4736 * Bitfield insert: copy low bits from src1 into a region of src0. 4737 */ 4738static void 4739micro_bfi(union tgsi_exec_channel *dst, 4740 const union tgsi_exec_channel *src0, 4741 const union tgsi_exec_channel *src1, 4742 const union tgsi_exec_channel *src2, 4743 const union tgsi_exec_channel *src3) 4744{ 4745 int i; 4746 for (i = 0; i < 4; i++) { 4747 int width = src3->u[i]; 4748 int offset = src2->u[i] & 0x1f; 4749 if (width == 32) { 4750 dst->u[i] = src1->u[i]; 4751 } else { 4752 int bitmask = ((1 << width) - 1) << offset; 4753 dst->u[i] = ((src1->u[i] << offset) & bitmask) | (src0->u[i] & ~bitmask); 4754 } 4755 } 4756} 4757 4758static void 4759micro_brev(union tgsi_exec_channel *dst, 4760 const union tgsi_exec_channel *src) 4761{ 4762 dst->u[0] = util_bitreverse(src->u[0]); 4763 dst->u[1] = util_bitreverse(src->u[1]); 4764 dst->u[2] = util_bitreverse(src->u[2]); 4765 dst->u[3] = util_bitreverse(src->u[3]); 4766} 4767 4768static void 4769micro_popc(union tgsi_exec_channel *dst, 4770 const union tgsi_exec_channel *src) 4771{ 4772 dst->u[0] = util_bitcount(src->u[0]); 4773 dst->u[1] = util_bitcount(src->u[1]); 4774 dst->u[2] = util_bitcount(src->u[2]); 4775 dst->u[3] = util_bitcount(src->u[3]); 4776} 4777 4778static void 4779micro_lsb(union tgsi_exec_channel *dst, 4780 const union tgsi_exec_channel *src) 4781{ 4782 dst->i[0] = ffs(src->u[0]) - 1; 4783 dst->i[1] = ffs(src->u[1]) - 1; 4784 dst->i[2] = ffs(src->u[2]) - 1; 4785 dst->i[3] = ffs(src->u[3]) - 1; 4786} 4787 4788static void 4789micro_imsb(union tgsi_exec_channel *dst, 4790 const union tgsi_exec_channel *src) 4791{ 4792 dst->i[0] = util_last_bit_signed(src->i[0]) - 1; 4793 dst->i[1] = util_last_bit_signed(src->i[1]) - 1; 4794 dst->i[2] = util_last_bit_signed(src->i[2]) - 1; 4795 dst->i[3] = util_last_bit_signed(src->i[3]) - 1; 4796} 4797 4798static void 4799micro_umsb(union tgsi_exec_channel *dst, 4800 const union tgsi_exec_channel *src) 4801{ 4802 dst->i[0] = util_last_bit(src->u[0]) - 1; 4803 dst->i[1] = util_last_bit(src->u[1]) - 1; 4804 dst->i[2] = util_last_bit(src->u[2]) - 1; 4805 dst->i[3] = util_last_bit(src->u[3]) - 1; 4806} 4807 4808 4809static void 4810exec_interp_at_sample(struct tgsi_exec_machine *mach, 4811 const struct tgsi_full_instruction *inst) 4812{ 4813 union tgsi_exec_channel index; 4814 union tgsi_exec_channel index2D; 4815 union tgsi_exec_channel result[TGSI_NUM_CHANNELS]; 4816 const struct tgsi_full_src_register *reg = &inst->Src[0]; 4817 4818 assert(reg->Register.File == TGSI_FILE_INPUT); 4819 assert(inst->Src[1].Register.File == TGSI_FILE_IMMEDIATE); 4820 4821 get_index_registers(mach, reg, &index, &index2D); 4822 float sample = mach->Imms[inst->Src[1].Register.Index][inst->Src[1].Register.SwizzleX]; 4823 4824 /* Short cut: sample 0 is like a normal fetch */ 4825 for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 4826 if (!(inst->Dst[0].Register.WriteMask & (1 << chan))) 4827 continue; 4828 4829 fetch_src_file_channel(mach, TGSI_FILE_INPUT, chan, &index, &index2D, 4830 &result[chan]); 4831 if (sample != 0.0f) { 4832 4833 /* TODO: define the samples > 0, but so far we only do fake MSAA */ 4834 float x = 0; 4835 float y = 0; 4836 4837 unsigned pos = index2D.i[chan] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index.i[chan]; 4838 assert(pos >= 0); 4839 assert(pos < TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS); 4840 mach->InputSampleOffsetApply[pos](mach, pos, chan, x, y, &result[chan]); 4841 } 4842 store_dest(mach, &result[chan], &inst->Dst[0], inst, chan); 4843 } 4844} 4845 4846 4847static void 4848exec_interp_at_offset(struct tgsi_exec_machine *mach, 4849 const struct tgsi_full_instruction *inst) 4850{ 4851 union tgsi_exec_channel index; 4852 union tgsi_exec_channel index2D; 4853 union tgsi_exec_channel ofsx; 4854 union tgsi_exec_channel ofsy; 4855 const struct tgsi_full_src_register *reg = &inst->Src[0]; 4856 4857 assert(reg->Register.File == TGSI_FILE_INPUT); 4858 4859 get_index_registers(mach, reg, &index, &index2D); 4860 unsigned pos = index2D.i[0] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index.i[0]; 4861 4862 fetch_source(mach, &ofsx, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 4863 fetch_source(mach, &ofsy, &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 4864 4865 for (int chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 4866 if (!(inst->Dst[0].Register.WriteMask & (1 << chan))) 4867 continue; 4868 union tgsi_exec_channel result; 4869 fetch_src_file_channel(mach, TGSI_FILE_INPUT, chan, &index, &index2D, &result); 4870 mach->InputSampleOffsetApply[pos](mach, pos, chan, ofsx.f[chan], ofsy.f[chan], &result); 4871 store_dest(mach, &result, &inst->Dst[0], inst, chan); 4872 } 4873} 4874 4875 4876static void 4877exec_interp_at_centroid(struct tgsi_exec_machine *mach, 4878 const struct tgsi_full_instruction *inst) 4879{ 4880 union tgsi_exec_channel index; 4881 union tgsi_exec_channel index2D; 4882 union tgsi_exec_channel result[TGSI_NUM_CHANNELS]; 4883 const struct tgsi_full_src_register *reg = &inst->Src[0]; 4884 4885 assert(reg->Register.File == TGSI_FILE_INPUT); 4886 get_index_registers(mach, reg, &index, &index2D); 4887 4888 for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 4889 if (!(inst->Dst[0].Register.WriteMask & (1 << chan))) 4890 continue; 4891 4892 /* Here we should add the change to use a sample that lies within the 4893 * primitive (Section 15.2): 4894 * 4895 * "When interpolating variables declared using centroid in , 4896 * the variable is sampled at a location within the pixel covered 4897 * by the primitive generating the fragment. 4898 * ... 4899 * The built-in functions interpolateAtCentroid ... will sample 4900 * variables as though they were declared with the centroid ... 4901 * qualifier[s]." 4902 * 4903 * Since we only support 1 sample currently, this is just a pass-through. 4904 */ 4905 fetch_src_file_channel(mach, TGSI_FILE_INPUT, chan, &index, &index2D, 4906 &result[chan]); 4907 store_dest(mach, &result[chan], &inst->Dst[0], inst, chan); 4908 } 4909 4910} 4911 4912 4913/** 4914 * Execute a TGSI instruction. 4915 * Returns TRUE if a barrier instruction is hit, 4916 * otherwise FALSE. 4917 */ 4918static boolean 4919exec_instruction( 4920 struct tgsi_exec_machine *mach, 4921 const struct tgsi_full_instruction *inst, 4922 int *pc ) 4923{ 4924 union tgsi_exec_channel r[10]; 4925 4926 (*pc)++; 4927 4928 switch (inst->Instruction.Opcode) { 4929 case TGSI_OPCODE_ARL: 4930 exec_vector_unary(mach, inst, micro_arl, TGSI_EXEC_DATA_FLOAT); 4931 break; 4932 4933 case TGSI_OPCODE_MOV: 4934 exec_vector_unary(mach, inst, micro_mov, TGSI_EXEC_DATA_FLOAT); 4935 break; 4936 4937 case TGSI_OPCODE_LIT: 4938 exec_lit(mach, inst); 4939 break; 4940 4941 case TGSI_OPCODE_RCP: 4942 exec_scalar_unary(mach, inst, micro_rcp, TGSI_EXEC_DATA_FLOAT); 4943 break; 4944 4945 case TGSI_OPCODE_RSQ: 4946 exec_scalar_unary(mach, inst, micro_rsq, TGSI_EXEC_DATA_FLOAT); 4947 break; 4948 4949 case TGSI_OPCODE_EXP: 4950 exec_exp(mach, inst); 4951 break; 4952 4953 case TGSI_OPCODE_LOG: 4954 exec_log(mach, inst); 4955 break; 4956 4957 case TGSI_OPCODE_MUL: 4958 exec_vector_binary(mach, inst, micro_mul, TGSI_EXEC_DATA_FLOAT); 4959 break; 4960 4961 case TGSI_OPCODE_ADD: 4962 exec_vector_binary(mach, inst, micro_add, TGSI_EXEC_DATA_FLOAT); 4963 break; 4964 4965 case TGSI_OPCODE_DP3: 4966 exec_dp3(mach, inst); 4967 break; 4968 4969 case TGSI_OPCODE_DP4: 4970 exec_dp4(mach, inst); 4971 break; 4972 4973 case TGSI_OPCODE_DST: 4974 exec_dst(mach, inst); 4975 break; 4976 4977 case TGSI_OPCODE_MIN: 4978 exec_vector_binary(mach, inst, micro_min, TGSI_EXEC_DATA_FLOAT); 4979 break; 4980 4981 case TGSI_OPCODE_MAX: 4982 exec_vector_binary(mach, inst, micro_max, TGSI_EXEC_DATA_FLOAT); 4983 break; 4984 4985 case TGSI_OPCODE_SLT: 4986 exec_vector_binary(mach, inst, micro_slt, TGSI_EXEC_DATA_FLOAT); 4987 break; 4988 4989 case TGSI_OPCODE_SGE: 4990 exec_vector_binary(mach, inst, micro_sge, TGSI_EXEC_DATA_FLOAT); 4991 break; 4992 4993 case TGSI_OPCODE_MAD: 4994 exec_vector_trinary(mach, inst, micro_mad, TGSI_EXEC_DATA_FLOAT); 4995 break; 4996 4997 case TGSI_OPCODE_LRP: 4998 exec_vector_trinary(mach, inst, micro_lrp, TGSI_EXEC_DATA_FLOAT); 4999 break; 5000 5001 case TGSI_OPCODE_SQRT: 5002 exec_scalar_unary(mach, inst, micro_sqrt, TGSI_EXEC_DATA_FLOAT); 5003 break; 5004 5005 case TGSI_OPCODE_FRC: 5006 exec_vector_unary(mach, inst, micro_frc, TGSI_EXEC_DATA_FLOAT); 5007 break; 5008 5009 case TGSI_OPCODE_FLR: 5010 exec_vector_unary(mach, inst, micro_flr, TGSI_EXEC_DATA_FLOAT); 5011 break; 5012 5013 case TGSI_OPCODE_ROUND: 5014 exec_vector_unary(mach, inst, micro_rnd, TGSI_EXEC_DATA_FLOAT); 5015 break; 5016 5017 case TGSI_OPCODE_EX2: 5018 exec_scalar_unary(mach, inst, micro_exp2, TGSI_EXEC_DATA_FLOAT); 5019 break; 5020 5021 case TGSI_OPCODE_LG2: 5022 exec_scalar_unary(mach, inst, micro_lg2, TGSI_EXEC_DATA_FLOAT); 5023 break; 5024 5025 case TGSI_OPCODE_POW: 5026 exec_scalar_binary(mach, inst, micro_pow, TGSI_EXEC_DATA_FLOAT); 5027 break; 5028 5029 case TGSI_OPCODE_LDEXP: 5030 exec_vector_binary(mach, inst, micro_ldexp, TGSI_EXEC_DATA_FLOAT); 5031 break; 5032 5033 case TGSI_OPCODE_COS: 5034 exec_scalar_unary(mach, inst, micro_cos, TGSI_EXEC_DATA_FLOAT); 5035 break; 5036 5037 case TGSI_OPCODE_DDX_FINE: 5038 exec_vector_unary(mach, inst, micro_ddx_fine, TGSI_EXEC_DATA_FLOAT); 5039 break; 5040 5041 case TGSI_OPCODE_DDX: 5042 exec_vector_unary(mach, inst, micro_ddx, TGSI_EXEC_DATA_FLOAT); 5043 break; 5044 5045 case TGSI_OPCODE_DDY_FINE: 5046 exec_vector_unary(mach, inst, micro_ddy_fine, TGSI_EXEC_DATA_FLOAT); 5047 break; 5048 5049 case TGSI_OPCODE_DDY: 5050 exec_vector_unary(mach, inst, micro_ddy, TGSI_EXEC_DATA_FLOAT); 5051 break; 5052 5053 case TGSI_OPCODE_KILL: 5054 exec_kill (mach); 5055 break; 5056 5057 case TGSI_OPCODE_KILL_IF: 5058 exec_kill_if (mach, inst); 5059 break; 5060 5061 case TGSI_OPCODE_PK2H: 5062 exec_pk2h(mach, inst); 5063 break; 5064 5065 case TGSI_OPCODE_PK2US: 5066 assert (0); 5067 break; 5068 5069 case TGSI_OPCODE_PK4B: 5070 assert (0); 5071 break; 5072 5073 case TGSI_OPCODE_PK4UB: 5074 assert (0); 5075 break; 5076 5077 case TGSI_OPCODE_SEQ: 5078 exec_vector_binary(mach, inst, micro_seq, TGSI_EXEC_DATA_FLOAT); 5079 break; 5080 5081 case TGSI_OPCODE_SGT: 5082 exec_vector_binary(mach, inst, micro_sgt, TGSI_EXEC_DATA_FLOAT); 5083 break; 5084 5085 case TGSI_OPCODE_SIN: 5086 exec_scalar_unary(mach, inst, micro_sin, TGSI_EXEC_DATA_FLOAT); 5087 break; 5088 5089 case TGSI_OPCODE_SLE: 5090 exec_vector_binary(mach, inst, micro_sle, TGSI_EXEC_DATA_FLOAT); 5091 break; 5092 5093 case TGSI_OPCODE_SNE: 5094 exec_vector_binary(mach, inst, micro_sne, TGSI_EXEC_DATA_FLOAT); 5095 break; 5096 5097 case TGSI_OPCODE_TEX: 5098 /* simple texture lookup */ 5099 /* src[0] = texcoord */ 5100 /* src[1] = sampler unit */ 5101 exec_tex(mach, inst, TEX_MODIFIER_NONE, 1); 5102 break; 5103 5104 case TGSI_OPCODE_TXB: 5105 /* Texture lookup with lod bias */ 5106 /* src[0] = texcoord (src[0].w = LOD bias) */ 5107 /* src[1] = sampler unit */ 5108 exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS, 1); 5109 break; 5110 5111 case TGSI_OPCODE_TXD: 5112 /* Texture lookup with explict partial derivatives */ 5113 /* src[0] = texcoord */ 5114 /* src[1] = d[strq]/dx */ 5115 /* src[2] = d[strq]/dy */ 5116 /* src[3] = sampler unit */ 5117 exec_txd(mach, inst); 5118 break; 5119 5120 case TGSI_OPCODE_TXL: 5121 /* Texture lookup with explit LOD */ 5122 /* src[0] = texcoord (src[0].w = LOD) */ 5123 /* src[1] = sampler unit */ 5124 exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, 1); 5125 break; 5126 5127 case TGSI_OPCODE_TXP: 5128 /* Texture lookup with projection */ 5129 /* src[0] = texcoord (src[0].w = projection) */ 5130 /* src[1] = sampler unit */ 5131 exec_tex(mach, inst, TEX_MODIFIER_PROJECTED, 1); 5132 break; 5133 5134 case TGSI_OPCODE_TG4: 5135 /* src[0] = texcoord */ 5136 /* src[1] = component */ 5137 /* src[2] = sampler unit */ 5138 exec_tex(mach, inst, TEX_MODIFIER_GATHER, 2); 5139 break; 5140 5141 case TGSI_OPCODE_LODQ: 5142 /* src[0] = texcoord */ 5143 /* src[1] = sampler unit */ 5144 exec_lodq(mach, inst); 5145 break; 5146 5147 case TGSI_OPCODE_UP2H: 5148 exec_up2h(mach, inst); 5149 break; 5150 5151 case TGSI_OPCODE_UP2US: 5152 assert (0); 5153 break; 5154 5155 case TGSI_OPCODE_UP4B: 5156 assert (0); 5157 break; 5158 5159 case TGSI_OPCODE_UP4UB: 5160 assert (0); 5161 break; 5162 5163 case TGSI_OPCODE_ARR: 5164 exec_vector_unary(mach, inst, micro_arr, TGSI_EXEC_DATA_FLOAT); 5165 break; 5166 5167 case TGSI_OPCODE_CAL: 5168 /* skip the call if no execution channels are enabled */ 5169 if (mach->ExecMask) { 5170 /* do the call */ 5171 5172 /* First, record the depths of the execution stacks. 5173 * This is important for deeply nested/looped return statements. 5174 * We have to unwind the stacks by the correct amount. For a 5175 * real code generator, we could determine the number of entries 5176 * to pop off each stack with simple static analysis and avoid 5177 * implementing this data structure at run time. 5178 */ 5179 mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop; 5180 mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop; 5181 mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop; 5182 mach->CallStack[mach->CallStackTop].SwitchStackTop = mach->SwitchStackTop; 5183 mach->CallStack[mach->CallStackTop].BreakStackTop = mach->BreakStackTop; 5184 /* note that PC was already incremented above */ 5185 mach->CallStack[mach->CallStackTop].ReturnAddr = *pc; 5186 5187 mach->CallStackTop++; 5188 5189 /* Second, push the Cond, Loop, Cont, Func stacks */ 5190 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 5191 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 5192 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 5193 assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING); 5194 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); 5195 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); 5196 5197 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 5198 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 5199 mach->ContStack[mach->ContStackTop++] = mach->ContMask; 5200 mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch; 5201 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; 5202 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; 5203 5204 /* Finally, jump to the subroutine. The label is a pointer 5205 * (an instruction number) to the BGNSUB instruction. 5206 */ 5207 *pc = inst->Label.Label; 5208 assert(mach->Instructions[*pc].Instruction.Opcode 5209 == TGSI_OPCODE_BGNSUB); 5210 } 5211 break; 5212 5213 case TGSI_OPCODE_RET: 5214 mach->FuncMask &= ~mach->ExecMask; 5215 UPDATE_EXEC_MASK(mach); 5216 5217 if (mach->FuncMask == 0x0) { 5218 /* really return now (otherwise, keep executing */ 5219 5220 if (mach->CallStackTop == 0) { 5221 /* returning from main() */ 5222 mach->CondStackTop = 0; 5223 mach->LoopStackTop = 0; 5224 mach->ContStackTop = 0; 5225 mach->LoopLabelStackTop = 0; 5226 mach->SwitchStackTop = 0; 5227 mach->BreakStackTop = 0; 5228 *pc = -1; 5229 return FALSE; 5230 } 5231 5232 assert(mach->CallStackTop > 0); 5233 mach->CallStackTop--; 5234 5235 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop; 5236 mach->CondMask = mach->CondStack[mach->CondStackTop]; 5237 5238 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop; 5239 mach->LoopMask = mach->LoopStack[mach->LoopStackTop]; 5240 5241 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; 5242 mach->ContMask = mach->ContStack[mach->ContStackTop]; 5243 5244 mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop; 5245 mach->Switch = mach->SwitchStack[mach->SwitchStackTop]; 5246 5247 mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop; 5248 mach->BreakType = mach->BreakStack[mach->BreakStackTop]; 5249 5250 assert(mach->FuncStackTop > 0); 5251 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; 5252 5253 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr; 5254 5255 UPDATE_EXEC_MASK(mach); 5256 } 5257 break; 5258 5259 case TGSI_OPCODE_SSG: 5260 exec_vector_unary(mach, inst, micro_sgn, TGSI_EXEC_DATA_FLOAT); 5261 break; 5262 5263 case TGSI_OPCODE_CMP: 5264 exec_vector_trinary(mach, inst, micro_cmp, TGSI_EXEC_DATA_FLOAT); 5265 break; 5266 5267 case TGSI_OPCODE_DIV: 5268 exec_vector_binary(mach, inst, micro_div, TGSI_EXEC_DATA_FLOAT); 5269 break; 5270 5271 case TGSI_OPCODE_DP2: 5272 exec_dp2(mach, inst); 5273 break; 5274 5275 case TGSI_OPCODE_IF: 5276 /* push CondMask */ 5277 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 5278 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 5279 FETCH( &r[0], 0, TGSI_CHAN_X ); 5280 for (int i = 0; i < TGSI_QUAD_SIZE; i++) { 5281 if (!r[0].f[i]) 5282 mach->CondMask &= ~(1 << i); 5283 } 5284 UPDATE_EXEC_MASK(mach); 5285 /* If no channels are taking the then branch, jump to ELSE. */ 5286 if (!mach->CondMask) 5287 *pc = inst->Label.Label; 5288 break; 5289 5290 case TGSI_OPCODE_UIF: 5291 /* push CondMask */ 5292 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 5293 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 5294 IFETCH( &r[0], 0, TGSI_CHAN_X ); 5295 for (int i = 0; i < TGSI_QUAD_SIZE; i++) { 5296 if (!r[0].u[i]) 5297 mach->CondMask &= ~(1 << i); 5298 } 5299 UPDATE_EXEC_MASK(mach); 5300 /* If no channels are taking the then branch, jump to ELSE. */ 5301 if (!mach->CondMask) 5302 *pc = inst->Label.Label; 5303 break; 5304 5305 case TGSI_OPCODE_ELSE: 5306 /* invert CondMask wrt previous mask */ 5307 { 5308 uint prevMask; 5309 assert(mach->CondStackTop > 0); 5310 prevMask = mach->CondStack[mach->CondStackTop - 1]; 5311 mach->CondMask = ~mach->CondMask & prevMask; 5312 UPDATE_EXEC_MASK(mach); 5313 5314 /* If no channels are taking ELSE, jump to ENDIF */ 5315 if (!mach->CondMask) 5316 *pc = inst->Label.Label; 5317 } 5318 break; 5319 5320 case TGSI_OPCODE_ENDIF: 5321 /* pop CondMask */ 5322 assert(mach->CondStackTop > 0); 5323 mach->CondMask = mach->CondStack[--mach->CondStackTop]; 5324 UPDATE_EXEC_MASK(mach); 5325 break; 5326 5327 case TGSI_OPCODE_END: 5328 /* make sure we end primitives which haven't 5329 * been explicitly emitted */ 5330 conditional_emit_primitive(mach); 5331 /* halt execution */ 5332 *pc = -1; 5333 break; 5334 5335 case TGSI_OPCODE_CEIL: 5336 exec_vector_unary(mach, inst, micro_ceil, TGSI_EXEC_DATA_FLOAT); 5337 break; 5338 5339 case TGSI_OPCODE_I2F: 5340 exec_vector_unary(mach, inst, micro_i2f, TGSI_EXEC_DATA_INT); 5341 break; 5342 5343 case TGSI_OPCODE_NOT: 5344 exec_vector_unary(mach, inst, micro_not, TGSI_EXEC_DATA_UINT); 5345 break; 5346 5347 case TGSI_OPCODE_TRUNC: 5348 exec_vector_unary(mach, inst, micro_trunc, TGSI_EXEC_DATA_FLOAT); 5349 break; 5350 5351 case TGSI_OPCODE_SHL: 5352 exec_vector_binary(mach, inst, micro_shl, TGSI_EXEC_DATA_UINT); 5353 break; 5354 5355 case TGSI_OPCODE_AND: 5356 exec_vector_binary(mach, inst, micro_and, TGSI_EXEC_DATA_UINT); 5357 break; 5358 5359 case TGSI_OPCODE_OR: 5360 exec_vector_binary(mach, inst, micro_or, TGSI_EXEC_DATA_UINT); 5361 break; 5362 5363 case TGSI_OPCODE_MOD: 5364 exec_vector_binary(mach, inst, micro_mod, TGSI_EXEC_DATA_INT); 5365 break; 5366 5367 case TGSI_OPCODE_XOR: 5368 exec_vector_binary(mach, inst, micro_xor, TGSI_EXEC_DATA_UINT); 5369 break; 5370 5371 case TGSI_OPCODE_TXF: 5372 exec_txf(mach, inst); 5373 break; 5374 5375 case TGSI_OPCODE_TXQ: 5376 exec_txq(mach, inst); 5377 break; 5378 5379 case TGSI_OPCODE_EMIT: 5380 emit_vertex(mach, inst); 5381 break; 5382 5383 case TGSI_OPCODE_ENDPRIM: 5384 emit_primitive(mach, inst); 5385 break; 5386 5387 case TGSI_OPCODE_BGNLOOP: 5388 /* push LoopMask and ContMasks */ 5389 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 5390 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 5391 assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 5392 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); 5393 5394 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 5395 mach->ContStack[mach->ContStackTop++] = mach->ContMask; 5396 mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1; 5397 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; 5398 mach->BreakType = TGSI_EXEC_BREAK_INSIDE_LOOP; 5399 break; 5400 5401 case TGSI_OPCODE_ENDLOOP: 5402 /* Restore ContMask, but don't pop */ 5403 assert(mach->ContStackTop > 0); 5404 mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; 5405 UPDATE_EXEC_MASK(mach); 5406 if (mach->ExecMask) { 5407 /* repeat loop: jump to instruction just past BGNLOOP */ 5408 assert(mach->LoopLabelStackTop > 0); 5409 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1; 5410 } 5411 else { 5412 /* exit loop: pop LoopMask */ 5413 assert(mach->LoopStackTop > 0); 5414 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; 5415 /* pop ContMask */ 5416 assert(mach->ContStackTop > 0); 5417 mach->ContMask = mach->ContStack[--mach->ContStackTop]; 5418 assert(mach->LoopLabelStackTop > 0); 5419 --mach->LoopLabelStackTop; 5420 5421 mach->BreakType = mach->BreakStack[--mach->BreakStackTop]; 5422 } 5423 UPDATE_EXEC_MASK(mach); 5424 break; 5425 5426 case TGSI_OPCODE_BRK: 5427 exec_break(mach); 5428 break; 5429 5430 case TGSI_OPCODE_CONT: 5431 /* turn off cont channels for each enabled exec channel */ 5432 mach->ContMask &= ~mach->ExecMask; 5433 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 5434 UPDATE_EXEC_MASK(mach); 5435 break; 5436 5437 case TGSI_OPCODE_BGNSUB: 5438 /* no-op */ 5439 break; 5440 5441 case TGSI_OPCODE_ENDSUB: 5442 /* 5443 * XXX: This really should be a no-op. We should never reach this opcode. 5444 */ 5445 5446 assert(mach->CallStackTop > 0); 5447 mach->CallStackTop--; 5448 5449 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop; 5450 mach->CondMask = mach->CondStack[mach->CondStackTop]; 5451 5452 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop; 5453 mach->LoopMask = mach->LoopStack[mach->LoopStackTop]; 5454 5455 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; 5456 mach->ContMask = mach->ContStack[mach->ContStackTop]; 5457 5458 mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop; 5459 mach->Switch = mach->SwitchStack[mach->SwitchStackTop]; 5460 5461 mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop; 5462 mach->BreakType = mach->BreakStack[mach->BreakStackTop]; 5463 5464 assert(mach->FuncStackTop > 0); 5465 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; 5466 5467 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr; 5468 5469 UPDATE_EXEC_MASK(mach); 5470 break; 5471 5472 case TGSI_OPCODE_NOP: 5473 break; 5474 5475 case TGSI_OPCODE_F2I: 5476 exec_vector_unary(mach, inst, micro_f2i, TGSI_EXEC_DATA_FLOAT); 5477 break; 5478 5479 case TGSI_OPCODE_FSEQ: 5480 exec_vector_binary(mach, inst, micro_fseq, TGSI_EXEC_DATA_FLOAT); 5481 break; 5482 5483 case TGSI_OPCODE_FSGE: 5484 exec_vector_binary(mach, inst, micro_fsge, TGSI_EXEC_DATA_FLOAT); 5485 break; 5486 5487 case TGSI_OPCODE_FSLT: 5488 exec_vector_binary(mach, inst, micro_fslt, TGSI_EXEC_DATA_FLOAT); 5489 break; 5490 5491 case TGSI_OPCODE_FSNE: 5492 exec_vector_binary(mach, inst, micro_fsne, TGSI_EXEC_DATA_FLOAT); 5493 break; 5494 5495 case TGSI_OPCODE_IDIV: 5496 exec_vector_binary(mach, inst, micro_idiv, TGSI_EXEC_DATA_INT); 5497 break; 5498 5499 case TGSI_OPCODE_IMAX: 5500 exec_vector_binary(mach, inst, micro_imax, TGSI_EXEC_DATA_INT); 5501 break; 5502 5503 case TGSI_OPCODE_IMIN: 5504 exec_vector_binary(mach, inst, micro_imin, TGSI_EXEC_DATA_INT); 5505 break; 5506 5507 case TGSI_OPCODE_INEG: 5508 exec_vector_unary(mach, inst, micro_ineg, TGSI_EXEC_DATA_INT); 5509 break; 5510 5511 case TGSI_OPCODE_ISGE: 5512 exec_vector_binary(mach, inst, micro_isge, TGSI_EXEC_DATA_INT); 5513 break; 5514 5515 case TGSI_OPCODE_ISHR: 5516 exec_vector_binary(mach, inst, micro_ishr, TGSI_EXEC_DATA_INT); 5517 break; 5518 5519 case TGSI_OPCODE_ISLT: 5520 exec_vector_binary(mach, inst, micro_islt, TGSI_EXEC_DATA_INT); 5521 break; 5522 5523 case TGSI_OPCODE_F2U: 5524 exec_vector_unary(mach, inst, micro_f2u, TGSI_EXEC_DATA_FLOAT); 5525 break; 5526 5527 case TGSI_OPCODE_U2F: 5528 exec_vector_unary(mach, inst, micro_u2f, TGSI_EXEC_DATA_UINT); 5529 break; 5530 5531 case TGSI_OPCODE_UADD: 5532 exec_vector_binary(mach, inst, micro_uadd, TGSI_EXEC_DATA_INT); 5533 break; 5534 5535 case TGSI_OPCODE_UDIV: 5536 exec_vector_binary(mach, inst, micro_udiv, TGSI_EXEC_DATA_UINT); 5537 break; 5538 5539 case TGSI_OPCODE_UMAD: 5540 exec_vector_trinary(mach, inst, micro_umad, TGSI_EXEC_DATA_UINT); 5541 break; 5542 5543 case TGSI_OPCODE_UMAX: 5544 exec_vector_binary(mach, inst, micro_umax, TGSI_EXEC_DATA_UINT); 5545 break; 5546 5547 case TGSI_OPCODE_UMIN: 5548 exec_vector_binary(mach, inst, micro_umin, TGSI_EXEC_DATA_UINT); 5549 break; 5550 5551 case TGSI_OPCODE_UMOD: 5552 exec_vector_binary(mach, inst, micro_umod, TGSI_EXEC_DATA_UINT); 5553 break; 5554 5555 case TGSI_OPCODE_UMUL: 5556 exec_vector_binary(mach, inst, micro_umul, TGSI_EXEC_DATA_UINT); 5557 break; 5558 5559 case TGSI_OPCODE_IMUL_HI: 5560 exec_vector_binary(mach, inst, micro_imul_hi, TGSI_EXEC_DATA_INT); 5561 break; 5562 5563 case TGSI_OPCODE_UMUL_HI: 5564 exec_vector_binary(mach, inst, micro_umul_hi, TGSI_EXEC_DATA_UINT); 5565 break; 5566 5567 case TGSI_OPCODE_USEQ: 5568 exec_vector_binary(mach, inst, micro_useq, TGSI_EXEC_DATA_UINT); 5569 break; 5570 5571 case TGSI_OPCODE_USGE: 5572 exec_vector_binary(mach, inst, micro_usge, TGSI_EXEC_DATA_UINT); 5573 break; 5574 5575 case TGSI_OPCODE_USHR: 5576 exec_vector_binary(mach, inst, micro_ushr, TGSI_EXEC_DATA_UINT); 5577 break; 5578 5579 case TGSI_OPCODE_USLT: 5580 exec_vector_binary(mach, inst, micro_uslt, TGSI_EXEC_DATA_UINT); 5581 break; 5582 5583 case TGSI_OPCODE_USNE: 5584 exec_vector_binary(mach, inst, micro_usne, TGSI_EXEC_DATA_UINT); 5585 break; 5586 5587 case TGSI_OPCODE_SWITCH: 5588 exec_switch(mach, inst); 5589 break; 5590 5591 case TGSI_OPCODE_CASE: 5592 exec_case(mach, inst); 5593 break; 5594 5595 case TGSI_OPCODE_DEFAULT: 5596 exec_default(mach); 5597 break; 5598 5599 case TGSI_OPCODE_ENDSWITCH: 5600 exec_endswitch(mach); 5601 break; 5602 5603 case TGSI_OPCODE_SAMPLE_I: 5604 exec_txf(mach, inst); 5605 break; 5606 5607 case TGSI_OPCODE_SAMPLE_I_MS: 5608 exec_txf(mach, inst); 5609 break; 5610 5611 case TGSI_OPCODE_SAMPLE: 5612 exec_sample(mach, inst, TEX_MODIFIER_NONE, FALSE); 5613 break; 5614 5615 case TGSI_OPCODE_SAMPLE_B: 5616 exec_sample(mach, inst, TEX_MODIFIER_LOD_BIAS, FALSE); 5617 break; 5618 5619 case TGSI_OPCODE_SAMPLE_C: 5620 exec_sample(mach, inst, TEX_MODIFIER_NONE, TRUE); 5621 break; 5622 5623 case TGSI_OPCODE_SAMPLE_C_LZ: 5624 exec_sample(mach, inst, TEX_MODIFIER_LEVEL_ZERO, TRUE); 5625 break; 5626 5627 case TGSI_OPCODE_SAMPLE_D: 5628 exec_sample_d(mach, inst); 5629 break; 5630 5631 case TGSI_OPCODE_SAMPLE_L: 5632 exec_sample(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, FALSE); 5633 break; 5634 5635 case TGSI_OPCODE_GATHER4: 5636 exec_sample(mach, inst, TEX_MODIFIER_GATHER, FALSE); 5637 break; 5638 5639 case TGSI_OPCODE_SVIEWINFO: 5640 exec_txq(mach, inst); 5641 break; 5642 5643 case TGSI_OPCODE_SAMPLE_POS: 5644 assert(0); 5645 break; 5646 5647 case TGSI_OPCODE_SAMPLE_INFO: 5648 assert(0); 5649 break; 5650 5651 case TGSI_OPCODE_LOD: 5652 exec_lodq(mach, inst); 5653 break; 5654 5655 case TGSI_OPCODE_UARL: 5656 exec_vector_unary(mach, inst, micro_uarl, TGSI_EXEC_DATA_UINT); 5657 break; 5658 5659 case TGSI_OPCODE_UCMP: 5660 exec_ucmp(mach, inst); 5661 break; 5662 5663 case TGSI_OPCODE_IABS: 5664 exec_vector_unary(mach, inst, micro_iabs, TGSI_EXEC_DATA_INT); 5665 break; 5666 5667 case TGSI_OPCODE_ISSG: 5668 exec_vector_unary(mach, inst, micro_isgn, TGSI_EXEC_DATA_INT); 5669 break; 5670 5671 case TGSI_OPCODE_TEX2: 5672 /* simple texture lookup */ 5673 /* src[0] = texcoord */ 5674 /* src[1] = compare */ 5675 /* src[2] = sampler unit */ 5676 exec_tex(mach, inst, TEX_MODIFIER_NONE, 2); 5677 break; 5678 case TGSI_OPCODE_TXB2: 5679 /* simple texture lookup */ 5680 /* src[0] = texcoord */ 5681 /* src[1] = bias */ 5682 /* src[2] = sampler unit */ 5683 exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS, 2); 5684 break; 5685 case TGSI_OPCODE_TXL2: 5686 /* simple texture lookup */ 5687 /* src[0] = texcoord */ 5688 /* src[1] = lod */ 5689 /* src[2] = sampler unit */ 5690 exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, 2); 5691 break; 5692 5693 case TGSI_OPCODE_IBFE: 5694 exec_vector_trinary(mach, inst, micro_ibfe, TGSI_EXEC_DATA_INT); 5695 break; 5696 case TGSI_OPCODE_UBFE: 5697 exec_vector_trinary(mach, inst, micro_ubfe, TGSI_EXEC_DATA_UINT); 5698 break; 5699 case TGSI_OPCODE_BFI: 5700 exec_vector_quaternary(mach, inst, micro_bfi, TGSI_EXEC_DATA_UINT); 5701 break; 5702 case TGSI_OPCODE_BREV: 5703 exec_vector_unary(mach, inst, micro_brev, TGSI_EXEC_DATA_UINT); 5704 break; 5705 case TGSI_OPCODE_POPC: 5706 exec_vector_unary(mach, inst, micro_popc, TGSI_EXEC_DATA_UINT); 5707 break; 5708 case TGSI_OPCODE_LSB: 5709 exec_vector_unary(mach, inst, micro_lsb, TGSI_EXEC_DATA_UINT); 5710 break; 5711 case TGSI_OPCODE_IMSB: 5712 exec_vector_unary(mach, inst, micro_imsb, TGSI_EXEC_DATA_INT); 5713 break; 5714 case TGSI_OPCODE_UMSB: 5715 exec_vector_unary(mach, inst, micro_umsb, TGSI_EXEC_DATA_UINT); 5716 break; 5717 5718 case TGSI_OPCODE_F2D: 5719 exec_t_2_64(mach, inst, micro_f2d, TGSI_EXEC_DATA_FLOAT); 5720 break; 5721 5722 case TGSI_OPCODE_D2F: 5723 exec_64_2_t(mach, inst, micro_d2f); 5724 break; 5725 5726 case TGSI_OPCODE_DABS: 5727 exec_double_unary(mach, inst, micro_dabs); 5728 break; 5729 5730 case TGSI_OPCODE_DNEG: 5731 exec_double_unary(mach, inst, micro_dneg); 5732 break; 5733 5734 case TGSI_OPCODE_DADD: 5735 exec_double_binary(mach, inst, micro_dadd, TGSI_EXEC_DATA_DOUBLE); 5736 break; 5737 5738 case TGSI_OPCODE_DDIV: 5739 exec_double_binary(mach, inst, micro_ddiv, TGSI_EXEC_DATA_DOUBLE); 5740 break; 5741 5742 case TGSI_OPCODE_DMUL: 5743 exec_double_binary(mach, inst, micro_dmul, TGSI_EXEC_DATA_DOUBLE); 5744 break; 5745 5746 case TGSI_OPCODE_DMAX: 5747 exec_double_binary(mach, inst, micro_dmax, TGSI_EXEC_DATA_DOUBLE); 5748 break; 5749 5750 case TGSI_OPCODE_DMIN: 5751 exec_double_binary(mach, inst, micro_dmin, TGSI_EXEC_DATA_DOUBLE); 5752 break; 5753 5754 case TGSI_OPCODE_DSLT: 5755 exec_double_binary(mach, inst, micro_dslt, TGSI_EXEC_DATA_UINT); 5756 break; 5757 5758 case TGSI_OPCODE_DSGE: 5759 exec_double_binary(mach, inst, micro_dsge, TGSI_EXEC_DATA_UINT); 5760 break; 5761 5762 case TGSI_OPCODE_DSEQ: 5763 exec_double_binary(mach, inst, micro_dseq, TGSI_EXEC_DATA_UINT); 5764 break; 5765 5766 case TGSI_OPCODE_DSNE: 5767 exec_double_binary(mach, inst, micro_dsne, TGSI_EXEC_DATA_UINT); 5768 break; 5769 5770 case TGSI_OPCODE_DRCP: 5771 exec_double_unary(mach, inst, micro_drcp); 5772 break; 5773 5774 case TGSI_OPCODE_DSQRT: 5775 exec_double_unary(mach, inst, micro_dsqrt); 5776 break; 5777 5778 case TGSI_OPCODE_DRSQ: 5779 exec_double_unary(mach, inst, micro_drsq); 5780 break; 5781 5782 case TGSI_OPCODE_DMAD: 5783 exec_double_trinary(mach, inst, micro_dmad); 5784 break; 5785 5786 case TGSI_OPCODE_DFRAC: 5787 exec_double_unary(mach, inst, micro_dfrac); 5788 break; 5789 5790 case TGSI_OPCODE_DFLR: 5791 exec_double_unary(mach, inst, micro_dflr); 5792 break; 5793 5794 case TGSI_OPCODE_DLDEXP: 5795 exec_dldexp(mach, inst); 5796 break; 5797 5798 case TGSI_OPCODE_DFRACEXP: 5799 exec_dfracexp(mach, inst); 5800 break; 5801 5802 case TGSI_OPCODE_I2D: 5803 exec_t_2_64(mach, inst, micro_i2d, TGSI_EXEC_DATA_FLOAT); 5804 break; 5805 5806 case TGSI_OPCODE_D2I: 5807 exec_64_2_t(mach, inst, micro_d2i); 5808 break; 5809 5810 case TGSI_OPCODE_U2D: 5811 exec_t_2_64(mach, inst, micro_u2d, TGSI_EXEC_DATA_FLOAT); 5812 break; 5813 5814 case TGSI_OPCODE_D2U: 5815 exec_64_2_t(mach, inst, micro_d2u); 5816 break; 5817 5818 case TGSI_OPCODE_LOAD: 5819 exec_load(mach, inst); 5820 break; 5821 5822 case TGSI_OPCODE_STORE: 5823 exec_store(mach, inst); 5824 break; 5825 5826 case TGSI_OPCODE_ATOMUADD: 5827 case TGSI_OPCODE_ATOMXCHG: 5828 case TGSI_OPCODE_ATOMCAS: 5829 case TGSI_OPCODE_ATOMAND: 5830 case TGSI_OPCODE_ATOMOR: 5831 case TGSI_OPCODE_ATOMXOR: 5832 case TGSI_OPCODE_ATOMUMIN: 5833 case TGSI_OPCODE_ATOMUMAX: 5834 case TGSI_OPCODE_ATOMIMIN: 5835 case TGSI_OPCODE_ATOMIMAX: 5836 case TGSI_OPCODE_ATOMFADD: 5837 exec_atomop(mach, inst); 5838 break; 5839 5840 case TGSI_OPCODE_RESQ: 5841 exec_resq(mach, inst); 5842 break; 5843 case TGSI_OPCODE_BARRIER: 5844 case TGSI_OPCODE_MEMBAR: 5845 return TRUE; 5846 break; 5847 5848 case TGSI_OPCODE_I64ABS: 5849 exec_double_unary(mach, inst, micro_i64abs); 5850 break; 5851 5852 case TGSI_OPCODE_I64SSG: 5853 exec_double_unary(mach, inst, micro_i64sgn); 5854 break; 5855 5856 case TGSI_OPCODE_I64NEG: 5857 exec_double_unary(mach, inst, micro_i64neg); 5858 break; 5859 5860 case TGSI_OPCODE_U64SEQ: 5861 exec_double_binary(mach, inst, micro_u64seq, TGSI_EXEC_DATA_UINT); 5862 break; 5863 5864 case TGSI_OPCODE_U64SNE: 5865 exec_double_binary(mach, inst, micro_u64sne, TGSI_EXEC_DATA_UINT); 5866 break; 5867 5868 case TGSI_OPCODE_I64SLT: 5869 exec_double_binary(mach, inst, micro_i64slt, TGSI_EXEC_DATA_UINT); 5870 break; 5871 case TGSI_OPCODE_U64SLT: 5872 exec_double_binary(mach, inst, micro_u64slt, TGSI_EXEC_DATA_UINT); 5873 break; 5874 5875 case TGSI_OPCODE_I64SGE: 5876 exec_double_binary(mach, inst, micro_i64sge, TGSI_EXEC_DATA_UINT); 5877 break; 5878 case TGSI_OPCODE_U64SGE: 5879 exec_double_binary(mach, inst, micro_u64sge, TGSI_EXEC_DATA_UINT); 5880 break; 5881 5882 case TGSI_OPCODE_I64MIN: 5883 exec_double_binary(mach, inst, micro_i64min, TGSI_EXEC_DATA_INT64); 5884 break; 5885 case TGSI_OPCODE_U64MIN: 5886 exec_double_binary(mach, inst, micro_u64min, TGSI_EXEC_DATA_UINT64); 5887 break; 5888 case TGSI_OPCODE_I64MAX: 5889 exec_double_binary(mach, inst, micro_i64max, TGSI_EXEC_DATA_INT64); 5890 break; 5891 case TGSI_OPCODE_U64MAX: 5892 exec_double_binary(mach, inst, micro_u64max, TGSI_EXEC_DATA_UINT64); 5893 break; 5894 case TGSI_OPCODE_U64ADD: 5895 exec_double_binary(mach, inst, micro_u64add, TGSI_EXEC_DATA_UINT64); 5896 break; 5897 case TGSI_OPCODE_U64MUL: 5898 exec_double_binary(mach, inst, micro_u64mul, TGSI_EXEC_DATA_UINT64); 5899 break; 5900 case TGSI_OPCODE_U64SHL: 5901 exec_arg0_64_arg1_32(mach, inst, micro_u64shl); 5902 break; 5903 case TGSI_OPCODE_I64SHR: 5904 exec_arg0_64_arg1_32(mach, inst, micro_i64shr); 5905 break; 5906 case TGSI_OPCODE_U64SHR: 5907 exec_arg0_64_arg1_32(mach, inst, micro_u64shr); 5908 break; 5909 case TGSI_OPCODE_U64DIV: 5910 exec_double_binary(mach, inst, micro_u64div, TGSI_EXEC_DATA_UINT64); 5911 break; 5912 case TGSI_OPCODE_I64DIV: 5913 exec_double_binary(mach, inst, micro_i64div, TGSI_EXEC_DATA_INT64); 5914 break; 5915 case TGSI_OPCODE_U64MOD: 5916 exec_double_binary(mach, inst, micro_u64mod, TGSI_EXEC_DATA_UINT64); 5917 break; 5918 case TGSI_OPCODE_I64MOD: 5919 exec_double_binary(mach, inst, micro_i64mod, TGSI_EXEC_DATA_INT64); 5920 break; 5921 5922 case TGSI_OPCODE_F2U64: 5923 exec_t_2_64(mach, inst, micro_f2u64, TGSI_EXEC_DATA_FLOAT); 5924 break; 5925 5926 case TGSI_OPCODE_F2I64: 5927 exec_t_2_64(mach, inst, micro_f2i64, TGSI_EXEC_DATA_FLOAT); 5928 break; 5929 5930 case TGSI_OPCODE_U2I64: 5931 exec_t_2_64(mach, inst, micro_u2i64, TGSI_EXEC_DATA_INT); 5932 break; 5933 case TGSI_OPCODE_I2I64: 5934 exec_t_2_64(mach, inst, micro_i2i64, TGSI_EXEC_DATA_INT); 5935 break; 5936 5937 case TGSI_OPCODE_D2U64: 5938 exec_double_unary(mach, inst, micro_d2u64); 5939 break; 5940 5941 case TGSI_OPCODE_D2I64: 5942 exec_double_unary(mach, inst, micro_d2i64); 5943 break; 5944 5945 case TGSI_OPCODE_U642F: 5946 exec_64_2_t(mach, inst, micro_u642f); 5947 break; 5948 case TGSI_OPCODE_I642F: 5949 exec_64_2_t(mach, inst, micro_i642f); 5950 break; 5951 5952 case TGSI_OPCODE_U642D: 5953 exec_double_unary(mach, inst, micro_u642d); 5954 break; 5955 case TGSI_OPCODE_I642D: 5956 exec_double_unary(mach, inst, micro_i642d); 5957 break; 5958 case TGSI_OPCODE_INTERP_SAMPLE: 5959 exec_interp_at_sample(mach, inst); 5960 break; 5961 case TGSI_OPCODE_INTERP_OFFSET: 5962 exec_interp_at_offset(mach, inst); 5963 break; 5964 case TGSI_OPCODE_INTERP_CENTROID: 5965 exec_interp_at_centroid(mach, inst); 5966 break; 5967 default: 5968 assert( 0 ); 5969 } 5970 return FALSE; 5971} 5972 5973static void 5974tgsi_exec_machine_setup_masks(struct tgsi_exec_machine *mach) 5975{ 5976 uint default_mask = 0xf; 5977 5978 mach->KillMask = 0; 5979 mach->OutputVertexOffset = 0; 5980 5981 if (mach->ShaderType == PIPE_SHADER_GEOMETRY) { 5982 for (unsigned i = 0; i < TGSI_MAX_VERTEX_STREAMS; i++) { 5983 mach->OutputPrimCount[i] = 0; 5984 mach->Primitives[i][0] = 0; 5985 } 5986 /* GS runs on a single primitive for now */ 5987 default_mask = 0x1; 5988 } 5989 5990 if (mach->NonHelperMask == 0) 5991 mach->NonHelperMask = default_mask; 5992 mach->CondMask = default_mask; 5993 mach->LoopMask = default_mask; 5994 mach->ContMask = default_mask; 5995 mach->FuncMask = default_mask; 5996 mach->ExecMask = default_mask; 5997 5998 mach->Switch.mask = default_mask; 5999 6000 assert(mach->CondStackTop == 0); 6001 assert(mach->LoopStackTop == 0); 6002 assert(mach->ContStackTop == 0); 6003 assert(mach->SwitchStackTop == 0); 6004 assert(mach->BreakStackTop == 0); 6005 assert(mach->CallStackTop == 0); 6006} 6007 6008/** 6009 * Run TGSI interpreter. 6010 * \return bitmask of "alive" quad components 6011 */ 6012uint 6013tgsi_exec_machine_run( struct tgsi_exec_machine *mach, int start_pc ) 6014{ 6015 uint i; 6016 6017 mach->pc = start_pc; 6018 6019 if (!start_pc) { 6020 tgsi_exec_machine_setup_masks(mach); 6021 6022 /* execute declarations (interpolants) */ 6023 for (i = 0; i < mach->NumDeclarations; i++) { 6024 exec_declaration( mach, mach->Declarations+i ); 6025 } 6026 } 6027 6028 { 6029#if DEBUG_EXECUTION 6030 struct tgsi_exec_vector temps[TGSI_EXEC_NUM_TEMPS]; 6031 struct tgsi_exec_vector outputs[PIPE_MAX_ATTRIBS]; 6032 uint inst = 1; 6033 6034 if (!start_pc) { 6035 memset(mach->Temps, 0, sizeof(temps)); 6036 if (mach->Outputs) 6037 memset(mach->Outputs, 0, sizeof(outputs)); 6038 memset(temps, 0, sizeof(temps)); 6039 memset(outputs, 0, sizeof(outputs)); 6040 } 6041#endif 6042 6043 /* execute instructions, until pc is set to -1 */ 6044 while (mach->pc != -1) { 6045 boolean barrier_hit; 6046#if DEBUG_EXECUTION 6047 uint i; 6048 6049 tgsi_dump_instruction(&mach->Instructions[mach->pc], inst++); 6050#endif 6051 6052 assert(mach->pc < (int) mach->NumInstructions); 6053 barrier_hit = exec_instruction(mach, mach->Instructions + mach->pc, &mach->pc); 6054 6055 /* for compute shaders if we hit a barrier return now for later rescheduling */ 6056 if (barrier_hit && mach->ShaderType == PIPE_SHADER_COMPUTE) 6057 return 0; 6058 6059#if DEBUG_EXECUTION 6060 for (i = 0; i < TGSI_EXEC_NUM_TEMPS; i++) { 6061 if (memcmp(&temps[i], &mach->Temps[i], sizeof(temps[i]))) { 6062 uint j; 6063 6064 memcpy(&temps[i], &mach->Temps[i], sizeof(temps[i])); 6065 debug_printf("TEMP[%2u] = ", i); 6066 for (j = 0; j < 4; j++) { 6067 if (j > 0) { 6068 debug_printf(" "); 6069 } 6070 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n", 6071 temps[i].xyzw[0].f[j], temps[i].xyzw[0].u[j], 6072 temps[i].xyzw[1].f[j], temps[i].xyzw[1].u[j], 6073 temps[i].xyzw[2].f[j], temps[i].xyzw[2].u[j], 6074 temps[i].xyzw[3].f[j], temps[i].xyzw[3].u[j]); 6075 } 6076 } 6077 } 6078 if (mach->Outputs) { 6079 for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { 6080 if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) { 6081 uint j; 6082 6083 memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i])); 6084 debug_printf("OUT[%2u] = ", i); 6085 for (j = 0; j < 4; j++) { 6086 if (j > 0) { 6087 debug_printf(" "); 6088 } 6089 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n", 6090 outputs[i].xyzw[0].f[j], outputs[i].xyzw[0].u[j], 6091 outputs[i].xyzw[1].f[j], outputs[i].xyzw[1].u[j], 6092 outputs[i].xyzw[2].f[j], outputs[i].xyzw[2].u[j], 6093 outputs[i].xyzw[3].f[j], outputs[i].xyzw[3].u[j]); 6094 } 6095 } 6096 } 6097 } 6098#endif 6099 } 6100 } 6101 6102#if 0 6103 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */ 6104 if (mach->ShaderType == PIPE_SHADER_FRAGMENT) { 6105 /* 6106 * Scale back depth component. 6107 */ 6108 for (i = 0; i < 4; i++) 6109 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF; 6110 } 6111#endif 6112 6113 /* Strictly speaking, these assertions aren't really needed but they 6114 * can potentially catch some bugs in the control flow code. 6115 */ 6116 assert(mach->CondStackTop == 0); 6117 assert(mach->LoopStackTop == 0); 6118 assert(mach->ContStackTop == 0); 6119 assert(mach->SwitchStackTop == 0); 6120 assert(mach->BreakStackTop == 0); 6121 assert(mach->CallStackTop == 0); 6122 6123 return ~mach->KillMask; 6124} 6125