1/* 2 * Copyright © 2015 Intel Corporation 3 * Copyright © 2014-2015 Broadcom 4 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org> 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the next 14 * paragraph) shall be included in all copies or substantial portions of the 15 * Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 23 * IN THE SOFTWARE. 24 */ 25 26#include "compiler/nir/nir.h" 27#include "compiler/nir/nir_builder.h" 28#include "compiler/glsl/list.h" 29 30#include "main/mtypes.h" 31#include "main/shader_types.h" 32#include "util/ralloc.h" 33 34#include "prog_to_nir.h" 35#include "prog_instruction.h" 36#include "prog_parameter.h" 37#include "prog_print.h" 38#include "program.h" 39 40/** 41 * \file prog_to_nir.c 42 * 43 * A translator from Mesa IR (prog_instruction.h) to NIR. This is primarily 44 * intended to support ARB_vertex_program, ARB_fragment_program, and fixed-function 45 * vertex processing. Full GLSL support should use glsl_to_nir instead. 46 */ 47 48struct ptn_compile { 49 const struct gl_context *ctx; 50 const struct gl_program *prog; 51 nir_builder build; 52 bool error; 53 54 nir_variable *parameters; 55 nir_variable *input_vars[VARYING_SLOT_MAX]; 56 nir_variable *output_vars[VARYING_SLOT_MAX]; 57 nir_variable *sysval_vars[SYSTEM_VALUE_MAX]; 58 nir_variable *sampler_vars[32]; /* matches number of bits in TexSrcUnit */ 59 nir_register **output_regs; 60 nir_register **temp_regs; 61 62 nir_register *addr_reg; 63}; 64 65#define SWIZ(X, Y, Z, W) \ 66 (unsigned[4]){ SWIZZLE_##X, SWIZZLE_##Y, SWIZZLE_##Z, SWIZZLE_##W } 67#define ptn_channel(b, src, ch) nir_channel(b, src, SWIZZLE_##ch) 68 69static nir_ssa_def * 70ptn_src_for_dest(struct ptn_compile *c, nir_alu_dest *dest) 71{ 72 nir_builder *b = &c->build; 73 74 nir_alu_src src; 75 memset(&src, 0, sizeof(src)); 76 77 if (dest->dest.is_ssa) 78 src.src = nir_src_for_ssa(&dest->dest.ssa); 79 else { 80 assert(!dest->dest.reg.indirect); 81 src.src = nir_src_for_reg(dest->dest.reg.reg); 82 src.src.reg.base_offset = dest->dest.reg.base_offset; 83 } 84 85 for (int i = 0; i < 4; i++) 86 src.swizzle[i] = i; 87 88 return nir_mov_alu(b, src, 4); 89} 90 91static nir_alu_dest 92ptn_get_dest(struct ptn_compile *c, const struct prog_dst_register *prog_dst) 93{ 94 nir_alu_dest dest; 95 96 memset(&dest, 0, sizeof(dest)); 97 98 switch (prog_dst->File) { 99 case PROGRAM_TEMPORARY: 100 dest.dest.reg.reg = c->temp_regs[prog_dst->Index]; 101 break; 102 case PROGRAM_OUTPUT: 103 dest.dest.reg.reg = c->output_regs[prog_dst->Index]; 104 break; 105 case PROGRAM_ADDRESS: 106 assert(prog_dst->Index == 0); 107 dest.dest.reg.reg = c->addr_reg; 108 break; 109 case PROGRAM_UNDEFINED: 110 break; 111 } 112 113 dest.write_mask = prog_dst->WriteMask; 114 dest.saturate = false; 115 116 assert(!prog_dst->RelAddr); 117 118 return dest; 119} 120 121static nir_ssa_def * 122ptn_get_src(struct ptn_compile *c, const struct prog_src_register *prog_src) 123{ 124 nir_builder *b = &c->build; 125 nir_alu_src src; 126 127 memset(&src, 0, sizeof(src)); 128 129 switch (prog_src->File) { 130 case PROGRAM_UNDEFINED: 131 return nir_imm_float(b, 0.0); 132 case PROGRAM_TEMPORARY: 133 assert(!prog_src->RelAddr && prog_src->Index >= 0); 134 src.src.reg.reg = c->temp_regs[prog_src->Index]; 135 break; 136 case PROGRAM_INPUT: { 137 /* ARB_vertex_program doesn't allow relative addressing on vertex 138 * attributes; ARB_fragment_program has no relative addressing at all. 139 */ 140 assert(!prog_src->RelAddr); 141 142 assert(prog_src->Index >= 0 && prog_src->Index < VARYING_SLOT_MAX); 143 144 nir_variable *var = c->input_vars[prog_src->Index]; 145 src.src = nir_src_for_ssa(nir_load_var(b, var)); 146 break; 147 } 148 case PROGRAM_SYSTEM_VALUE: { 149 assert(!prog_src->RelAddr); 150 151 assert(prog_src->Index >= 0 && prog_src->Index < SYSTEM_VALUE_MAX); 152 153 nir_variable *var = c->sysval_vars[prog_src->Index]; 154 src.src = nir_src_for_ssa(nir_load_var(b, var)); 155 break; 156 } 157 case PROGRAM_STATE_VAR: 158 case PROGRAM_CONSTANT: { 159 /* We actually want to look at the type in the Parameters list for this, 160 * because it lets us upload constant builtin uniforms as actual 161 * constants. 162 */ 163 struct gl_program_parameter_list *plist = c->prog->Parameters; 164 gl_register_file file = prog_src->RelAddr ? prog_src->File : 165 plist->Parameters[prog_src->Index].Type; 166 167 switch (file) { 168 case PROGRAM_CONSTANT: 169 if ((c->prog->arb.IndirectRegisterFiles & 170 (1 << PROGRAM_CONSTANT)) == 0) { 171 unsigned pvo = plist->Parameters[prog_src->Index].ValueOffset; 172 float *v = (float *) plist->ParameterValues + pvo; 173 src.src = nir_src_for_ssa(nir_imm_vec4(b, v[0], v[1], v[2], v[3])); 174 break; 175 } 176 FALLTHROUGH; 177 case PROGRAM_STATE_VAR: { 178 assert(c->parameters != NULL); 179 180 nir_deref_instr *deref = nir_build_deref_var(b, c->parameters); 181 182 nir_ssa_def *index = nir_imm_int(b, prog_src->Index); 183 if (prog_src->RelAddr) 184 index = nir_iadd(b, index, nir_load_reg(b, c->addr_reg)); 185 deref = nir_build_deref_array(b, deref, nir_channel(b, index, 0)); 186 187 src.src = nir_src_for_ssa(nir_load_deref(b, deref)); 188 break; 189 } 190 default: 191 fprintf(stderr, "bad uniform src register file: %s (%d)\n", 192 _mesa_register_file_name(file), file); 193 abort(); 194 } 195 break; 196 } 197 default: 198 fprintf(stderr, "unknown src register file: %s (%d)\n", 199 _mesa_register_file_name(prog_src->File), prog_src->File); 200 abort(); 201 } 202 203 nir_ssa_def *def; 204 if (!HAS_EXTENDED_SWIZZLE(prog_src->Swizzle) && 205 (prog_src->Negate == NEGATE_NONE || prog_src->Negate == NEGATE_XYZW)) { 206 /* The simple non-SWZ case. */ 207 for (int i = 0; i < 4; i++) 208 src.swizzle[i] = GET_SWZ(prog_src->Swizzle, i); 209 210 def = nir_mov_alu(b, src, 4); 211 212 if (prog_src->Negate) 213 def = nir_fneg(b, def); 214 } else { 215 /* The SWZ instruction allows per-component zero/one swizzles, and also 216 * per-component negation. 217 */ 218 nir_ssa_def *chans[4]; 219 for (int i = 0; i < 4; i++) { 220 int swizzle = GET_SWZ(prog_src->Swizzle, i); 221 if (swizzle == SWIZZLE_ZERO) { 222 chans[i] = nir_imm_float(b, 0.0); 223 } else if (swizzle == SWIZZLE_ONE) { 224 chans[i] = nir_imm_float(b, 1.0); 225 } else { 226 assert(swizzle != SWIZZLE_NIL); 227 nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_mov); 228 nir_ssa_dest_init(&mov->instr, &mov->dest.dest, 1, 32, NULL); 229 mov->dest.write_mask = 0x1; 230 mov->src[0] = src; 231 mov->src[0].swizzle[0] = swizzle; 232 nir_builder_instr_insert(b, &mov->instr); 233 234 chans[i] = &mov->dest.dest.ssa; 235 } 236 237 if (prog_src->Negate & (1 << i)) 238 chans[i] = nir_fneg(b, chans[i]); 239 } 240 def = nir_vec4(b, chans[0], chans[1], chans[2], chans[3]); 241 } 242 243 return def; 244} 245 246static void 247ptn_alu(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src) 248{ 249 unsigned num_srcs = nir_op_infos[op].num_inputs; 250 nir_alu_instr *instr = nir_alu_instr_create(b->shader, op); 251 unsigned i; 252 253 for (i = 0; i < num_srcs; i++) 254 instr->src[i].src = nir_src_for_ssa(src[i]); 255 256 instr->dest = dest; 257 nir_builder_instr_insert(b, &instr->instr); 258} 259 260static void 261ptn_move_dest_masked(nir_builder *b, nir_alu_dest dest, 262 nir_ssa_def *def, unsigned write_mask) 263{ 264 if (!(dest.write_mask & write_mask)) 265 return; 266 267 nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_mov); 268 if (!mov) 269 return; 270 271 mov->dest = dest; 272 mov->dest.write_mask &= write_mask; 273 mov->src[0].src = nir_src_for_ssa(def); 274 for (unsigned i = def->num_components; i < 4; i++) 275 mov->src[0].swizzle[i] = def->num_components - 1; 276 nir_builder_instr_insert(b, &mov->instr); 277} 278 279static void 280ptn_move_dest(nir_builder *b, nir_alu_dest dest, nir_ssa_def *def) 281{ 282 ptn_move_dest_masked(b, dest, def, WRITEMASK_XYZW); 283} 284 285static void 286ptn_arl(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src) 287{ 288 ptn_move_dest(b, dest, nir_f2i32(b, nir_ffloor(b, src[0]))); 289} 290 291/* EXP - Approximate Exponential Base 2 292 * dst.x = 2^{\lfloor src.x\rfloor} 293 * dst.y = src.x - \lfloor src.x\rfloor 294 * dst.z = 2^{src.x} 295 * dst.w = 1.0 296 */ 297static void 298ptn_exp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src) 299{ 300 nir_ssa_def *srcx = ptn_channel(b, src[0], X); 301 302 ptn_move_dest_masked(b, dest, nir_fexp2(b, nir_ffloor(b, srcx)), WRITEMASK_X); 303 ptn_move_dest_masked(b, dest, nir_fsub(b, srcx, nir_ffloor(b, srcx)), WRITEMASK_Y); 304 ptn_move_dest_masked(b, dest, nir_fexp2(b, srcx), WRITEMASK_Z); 305 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W); 306} 307 308/* LOG - Approximate Logarithm Base 2 309 * dst.x = \lfloor\log_2{|src.x|}\rfloor 310 * dst.y = |src.x| * 2^{-\lfloor\log_2{|src.x|}\rfloor}} 311 * dst.z = \log_2{|src.x|} 312 * dst.w = 1.0 313 */ 314static void 315ptn_log(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src) 316{ 317 nir_ssa_def *abs_srcx = nir_fabs(b, ptn_channel(b, src[0], X)); 318 nir_ssa_def *log2 = nir_flog2(b, abs_srcx); 319 nir_ssa_def *floor_log2 = nir_ffloor(b, log2); 320 321 ptn_move_dest_masked(b, dest, floor_log2, WRITEMASK_X); 322 ptn_move_dest_masked(b, dest, 323 nir_fmul(b, abs_srcx, 324 nir_fexp2(b, nir_fneg(b, floor_log2))), 325 WRITEMASK_Y); 326 ptn_move_dest_masked(b, dest, log2, WRITEMASK_Z); 327 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W); 328} 329 330/* DST - Distance Vector 331 * dst.x = 1.0 332 * dst.y = src0.y \times src1.y 333 * dst.z = src0.z 334 * dst.w = src1.w 335 */ 336static void 337ptn_dst(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src) 338{ 339 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_X); 340 ptn_move_dest_masked(b, dest, nir_fmul(b, src[0], src[1]), WRITEMASK_Y); 341 ptn_move_dest_masked(b, dest, nir_mov(b, src[0]), WRITEMASK_Z); 342 ptn_move_dest_masked(b, dest, nir_mov(b, src[1]), WRITEMASK_W); 343} 344 345/* LIT - Light Coefficients 346 * dst.x = 1.0 347 * dst.y = max(src.x, 0.0) 348 * dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0 349 * dst.w = 1.0 350 */ 351static void 352ptn_lit(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src) 353{ 354 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_XW); 355 356 ptn_move_dest_masked(b, dest, nir_fmax(b, ptn_channel(b, src[0], X), 357 nir_imm_float(b, 0.0)), WRITEMASK_Y); 358 359 if (dest.write_mask & WRITEMASK_Z) { 360 nir_ssa_def *src0_y = ptn_channel(b, src[0], Y); 361 nir_ssa_def *wclamp = nir_fmax(b, nir_fmin(b, ptn_channel(b, src[0], W), 362 nir_imm_float(b, 128.0)), 363 nir_imm_float(b, -128.0)); 364 nir_ssa_def *pow = nir_fpow(b, nir_fmax(b, src0_y, nir_imm_float(b, 0.0)), 365 wclamp); 366 367 nir_ssa_def *z = nir_bcsel(b, 368 nir_fge(b, nir_imm_float(b, 0.0), ptn_channel(b, src[0], X)), 369 nir_imm_float(b, 0.0), 370 pow); 371 372 ptn_move_dest_masked(b, dest, z, WRITEMASK_Z); 373 } 374} 375 376/* SCS - Sine Cosine 377 * dst.x = \cos{src.x} 378 * dst.y = \sin{src.x} 379 * dst.z = 0.0 380 * dst.w = 1.0 381 */ 382static void 383ptn_scs(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src) 384{ 385 ptn_move_dest_masked(b, dest, nir_fcos(b, ptn_channel(b, src[0], X)), 386 WRITEMASK_X); 387 ptn_move_dest_masked(b, dest, nir_fsin(b, ptn_channel(b, src[0], X)), 388 WRITEMASK_Y); 389 ptn_move_dest_masked(b, dest, nir_imm_float(b, 0.0), WRITEMASK_Z); 390 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W); 391} 392 393static void 394ptn_slt(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src) 395{ 396 ptn_move_dest(b, dest, nir_slt(b, src[0], src[1])); 397} 398 399static void 400ptn_sge(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src) 401{ 402 ptn_move_dest(b, dest, nir_sge(b, src[0], src[1])); 403} 404 405static void 406ptn_xpd(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src) 407{ 408 ptn_move_dest_masked(b, dest, 409 nir_fsub(b, 410 nir_fmul(b, 411 nir_swizzle(b, src[0], SWIZ(Y, Z, X, W), 3), 412 nir_swizzle(b, src[1], SWIZ(Z, X, Y, W), 3)), 413 nir_fmul(b, 414 nir_swizzle(b, src[1], SWIZ(Y, Z, X, W), 3), 415 nir_swizzle(b, src[0], SWIZ(Z, X, Y, W), 3))), 416 WRITEMASK_XYZ); 417 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W); 418} 419 420static void 421ptn_dp2(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src) 422{ 423 ptn_move_dest(b, dest, nir_fdot2(b, src[0], src[1])); 424} 425 426static void 427ptn_dp3(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src) 428{ 429 ptn_move_dest(b, dest, nir_fdot3(b, src[0], src[1])); 430} 431 432static void 433ptn_dp4(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src) 434{ 435 ptn_move_dest(b, dest, nir_fdot4(b, src[0], src[1])); 436} 437 438static void 439ptn_dph(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src) 440{ 441 ptn_move_dest(b, dest, nir_fdph(b, src[0], src[1])); 442} 443 444static void 445ptn_cmp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src) 446{ 447 ptn_move_dest(b, dest, nir_bcsel(b, 448 nir_flt(b, src[0], nir_imm_float(b, 0.0)), 449 src[1], src[2])); 450} 451 452static void 453ptn_lrp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src) 454{ 455 ptn_move_dest(b, dest, nir_flrp(b, src[2], src[1], src[0])); 456} 457 458static void 459ptn_kil(nir_builder *b, nir_ssa_def **src) 460{ 461 /* flt must be exact, because NaN shouldn't discard. (apps rely on this) */ 462 b->exact = true; 463 nir_ssa_def *cmp = nir_bany(b, nir_flt(b, src[0], nir_imm_float(b, 0.0))); 464 b->exact = false; 465 466 nir_discard_if(b, cmp); 467} 468 469enum glsl_sampler_dim 470_mesa_texture_index_to_sampler_dim(gl_texture_index index, bool *is_array) 471{ 472 *is_array = false; 473 474 switch (index) { 475 case TEXTURE_2D_MULTISAMPLE_INDEX: 476 return GLSL_SAMPLER_DIM_MS; 477 case TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX: 478 *is_array = true; 479 return GLSL_SAMPLER_DIM_MS; 480 case TEXTURE_BUFFER_INDEX: 481 return GLSL_SAMPLER_DIM_BUF; 482 case TEXTURE_1D_INDEX: 483 return GLSL_SAMPLER_DIM_1D; 484 case TEXTURE_2D_INDEX: 485 return GLSL_SAMPLER_DIM_2D; 486 case TEXTURE_3D_INDEX: 487 return GLSL_SAMPLER_DIM_3D; 488 case TEXTURE_CUBE_INDEX: 489 return GLSL_SAMPLER_DIM_CUBE; 490 case TEXTURE_CUBE_ARRAY_INDEX: 491 *is_array = true; 492 return GLSL_SAMPLER_DIM_CUBE; 493 case TEXTURE_RECT_INDEX: 494 return GLSL_SAMPLER_DIM_RECT; 495 case TEXTURE_1D_ARRAY_INDEX: 496 *is_array = true; 497 return GLSL_SAMPLER_DIM_1D; 498 case TEXTURE_2D_ARRAY_INDEX: 499 *is_array = true; 500 return GLSL_SAMPLER_DIM_2D; 501 case TEXTURE_EXTERNAL_INDEX: 502 return GLSL_SAMPLER_DIM_EXTERNAL; 503 case NUM_TEXTURE_TARGETS: 504 break; 505 } 506 unreachable("unknown texture target"); 507} 508 509static void 510ptn_tex(struct ptn_compile *c, nir_alu_dest dest, nir_ssa_def **src, 511 struct prog_instruction *prog_inst) 512{ 513 nir_builder *b = &c->build; 514 nir_tex_instr *instr; 515 nir_texop op; 516 unsigned num_srcs; 517 518 switch (prog_inst->Opcode) { 519 case OPCODE_TEX: 520 op = nir_texop_tex; 521 num_srcs = 1; 522 break; 523 case OPCODE_TXB: 524 op = nir_texop_txb; 525 num_srcs = 2; 526 break; 527 case OPCODE_TXD: 528 op = nir_texop_txd; 529 num_srcs = 3; 530 break; 531 case OPCODE_TXL: 532 op = nir_texop_txl; 533 num_srcs = 2; 534 break; 535 case OPCODE_TXP: 536 op = nir_texop_tex; 537 num_srcs = 2; 538 break; 539 default: 540 fprintf(stderr, "unknown tex op %d\n", prog_inst->Opcode); 541 abort(); 542 } 543 544 /* Deref sources */ 545 num_srcs += 2; 546 547 if (prog_inst->TexShadow) 548 num_srcs++; 549 550 instr = nir_tex_instr_create(b->shader, num_srcs); 551 instr->op = op; 552 instr->dest_type = nir_type_float32; 553 instr->is_shadow = prog_inst->TexShadow; 554 555 bool is_array; 556 instr->sampler_dim = _mesa_texture_index_to_sampler_dim(prog_inst->TexSrcTarget, &is_array); 557 558 instr->coord_components = 559 glsl_get_sampler_dim_coordinate_components(instr->sampler_dim); 560 561 nir_variable *var = c->sampler_vars[prog_inst->TexSrcUnit]; 562 if (!var) { 563 const struct glsl_type *type = 564 glsl_sampler_type(instr->sampler_dim, instr->is_shadow, false, GLSL_TYPE_FLOAT); 565 char samplerName[20]; 566 snprintf(samplerName, sizeof(samplerName), "sampler_%d", prog_inst->TexSrcUnit); 567 var = nir_variable_create(b->shader, nir_var_uniform, type, samplerName); 568 var->data.binding = prog_inst->TexSrcUnit; 569 var->data.explicit_binding = true; 570 c->sampler_vars[prog_inst->TexSrcUnit] = var; 571 } 572 573 nir_deref_instr *deref = nir_build_deref_var(b, var); 574 575 unsigned src_number = 0; 576 577 instr->src[src_number].src = nir_src_for_ssa(&deref->dest.ssa); 578 instr->src[src_number].src_type = nir_tex_src_texture_deref; 579 src_number++; 580 instr->src[src_number].src = nir_src_for_ssa(&deref->dest.ssa); 581 instr->src[src_number].src_type = nir_tex_src_sampler_deref; 582 src_number++; 583 584 instr->src[src_number].src = 585 nir_src_for_ssa(nir_swizzle(b, src[0], SWIZ(X, Y, Z, W), 586 instr->coord_components)); 587 instr->src[src_number].src_type = nir_tex_src_coord; 588 src_number++; 589 590 if (prog_inst->Opcode == OPCODE_TXP) { 591 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W)); 592 instr->src[src_number].src_type = nir_tex_src_projector; 593 src_number++; 594 } 595 596 if (prog_inst->Opcode == OPCODE_TXB) { 597 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W)); 598 instr->src[src_number].src_type = nir_tex_src_bias; 599 src_number++; 600 } 601 602 if (prog_inst->Opcode == OPCODE_TXL) { 603 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W)); 604 instr->src[src_number].src_type = nir_tex_src_lod; 605 src_number++; 606 } 607 608 if (instr->is_shadow) { 609 if (instr->coord_components < 3) 610 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], Z)); 611 else 612 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W)); 613 614 instr->src[src_number].src_type = nir_tex_src_comparator; 615 src_number++; 616 } 617 618 assert(src_number == num_srcs); 619 620 nir_ssa_dest_init(&instr->instr, &instr->dest, 4, 32, NULL); 621 nir_builder_instr_insert(b, &instr->instr); 622 623 /* Resolve the writemask on the texture op. */ 624 ptn_move_dest(b, dest, &instr->dest.ssa); 625} 626 627static const nir_op op_trans[MAX_OPCODE] = { 628 [OPCODE_NOP] = 0, 629 [OPCODE_ABS] = nir_op_fabs, 630 [OPCODE_ADD] = nir_op_fadd, 631 [OPCODE_ARL] = 0, 632 [OPCODE_CMP] = 0, 633 [OPCODE_COS] = 0, 634 [OPCODE_DDX] = nir_op_fddx, 635 [OPCODE_DDY] = nir_op_fddy, 636 [OPCODE_DP2] = 0, 637 [OPCODE_DP3] = 0, 638 [OPCODE_DP4] = 0, 639 [OPCODE_DPH] = 0, 640 [OPCODE_DST] = 0, 641 [OPCODE_END] = 0, 642 [OPCODE_EX2] = 0, 643 [OPCODE_EXP] = 0, 644 [OPCODE_FLR] = nir_op_ffloor, 645 [OPCODE_FRC] = nir_op_ffract, 646 [OPCODE_LG2] = 0, 647 [OPCODE_LIT] = 0, 648 [OPCODE_LOG] = 0, 649 [OPCODE_LRP] = 0, 650 [OPCODE_MAD] = 0, 651 [OPCODE_MAX] = nir_op_fmax, 652 [OPCODE_MIN] = nir_op_fmin, 653 [OPCODE_MOV] = nir_op_mov, 654 [OPCODE_MUL] = nir_op_fmul, 655 [OPCODE_POW] = 0, 656 [OPCODE_RCP] = 0, 657 658 [OPCODE_RSQ] = 0, 659 [OPCODE_SCS] = 0, 660 [OPCODE_SGE] = 0, 661 [OPCODE_SIN] = 0, 662 [OPCODE_SLT] = 0, 663 [OPCODE_SSG] = nir_op_fsign, 664 [OPCODE_SUB] = nir_op_fsub, 665 [OPCODE_SWZ] = 0, 666 [OPCODE_TEX] = 0, 667 [OPCODE_TRUNC] = nir_op_ftrunc, 668 [OPCODE_TXB] = 0, 669 [OPCODE_TXD] = 0, 670 [OPCODE_TXL] = 0, 671 [OPCODE_TXP] = 0, 672 [OPCODE_XPD] = 0, 673}; 674 675static void 676ptn_emit_instruction(struct ptn_compile *c, struct prog_instruction *prog_inst) 677{ 678 nir_builder *b = &c->build; 679 unsigned i; 680 const unsigned op = prog_inst->Opcode; 681 682 if (op == OPCODE_END) 683 return; 684 685 nir_ssa_def *src[3]; 686 for (i = 0; i < 3; i++) { 687 src[i] = ptn_get_src(c, &prog_inst->SrcReg[i]); 688 } 689 nir_alu_dest dest = ptn_get_dest(c, &prog_inst->DstReg); 690 if (c->error) 691 return; 692 693 switch (op) { 694 case OPCODE_RSQ: 695 ptn_move_dest(b, dest, 696 nir_frsq(b, nir_fabs(b, ptn_channel(b, src[0], X)))); 697 break; 698 699 case OPCODE_RCP: 700 ptn_move_dest(b, dest, nir_frcp(b, ptn_channel(b, src[0], X))); 701 break; 702 703 case OPCODE_EX2: 704 ptn_move_dest(b, dest, nir_fexp2(b, ptn_channel(b, src[0], X))); 705 break; 706 707 case OPCODE_LG2: 708 ptn_move_dest(b, dest, nir_flog2(b, ptn_channel(b, src[0], X))); 709 break; 710 711 case OPCODE_POW: 712 ptn_move_dest(b, dest, nir_fpow(b, 713 ptn_channel(b, src[0], X), 714 ptn_channel(b, src[1], X))); 715 break; 716 717 case OPCODE_COS: 718 ptn_move_dest(b, dest, nir_fcos(b, ptn_channel(b, src[0], X))); 719 break; 720 721 case OPCODE_SIN: 722 ptn_move_dest(b, dest, nir_fsin(b, ptn_channel(b, src[0], X))); 723 break; 724 725 case OPCODE_ARL: 726 ptn_arl(b, dest, src); 727 break; 728 729 case OPCODE_EXP: 730 ptn_exp(b, dest, src); 731 break; 732 733 case OPCODE_LOG: 734 ptn_log(b, dest, src); 735 break; 736 737 case OPCODE_LRP: 738 ptn_lrp(b, dest, src); 739 break; 740 741 case OPCODE_MAD: 742 ptn_move_dest(b, dest, nir_fadd(b, nir_fmul(b, src[0], src[1]), src[2])); 743 break; 744 745 case OPCODE_DST: 746 ptn_dst(b, dest, src); 747 break; 748 749 case OPCODE_LIT: 750 ptn_lit(b, dest, src); 751 break; 752 753 case OPCODE_XPD: 754 ptn_xpd(b, dest, src); 755 break; 756 757 case OPCODE_DP2: 758 ptn_dp2(b, dest, src); 759 break; 760 761 case OPCODE_DP3: 762 ptn_dp3(b, dest, src); 763 break; 764 765 case OPCODE_DP4: 766 ptn_dp4(b, dest, src); 767 break; 768 769 case OPCODE_DPH: 770 ptn_dph(b, dest, src); 771 break; 772 773 case OPCODE_KIL: 774 ptn_kil(b, src); 775 break; 776 777 case OPCODE_CMP: 778 ptn_cmp(b, dest, src); 779 break; 780 781 case OPCODE_SCS: 782 ptn_scs(b, dest, src); 783 break; 784 785 case OPCODE_SLT: 786 ptn_slt(b, dest, src); 787 break; 788 789 case OPCODE_SGE: 790 ptn_sge(b, dest, src); 791 break; 792 793 case OPCODE_TEX: 794 case OPCODE_TXB: 795 case OPCODE_TXD: 796 case OPCODE_TXL: 797 case OPCODE_TXP: 798 ptn_tex(c, dest, src, prog_inst); 799 break; 800 801 case OPCODE_SWZ: 802 /* Extended swizzles were already handled in ptn_get_src(). */ 803 ptn_alu(b, nir_op_mov, dest, src); 804 break; 805 806 case OPCODE_NOP: 807 break; 808 809 default: 810 if (op_trans[op] != 0) { 811 ptn_alu(b, op_trans[op], dest, src); 812 } else { 813 fprintf(stderr, "unknown opcode: %s\n", _mesa_opcode_string(op)); 814 abort(); 815 } 816 break; 817 } 818 819 if (prog_inst->Saturate) { 820 assert(prog_inst->Saturate); 821 assert(!dest.dest.is_ssa); 822 ptn_move_dest(b, dest, nir_fsat(b, ptn_src_for_dest(c, &dest))); 823 } 824} 825 826/** 827 * Puts a NIR intrinsic to store of each PROGRAM_OUTPUT value to the output 828 * variables at the end of the shader. 829 * 830 * We don't generate these incrementally as the PROGRAM_OUTPUT values are 831 * written, because there's no output load intrinsic, which means we couldn't 832 * handle writemasks. 833 */ 834static void 835ptn_add_output_stores(struct ptn_compile *c) 836{ 837 nir_builder *b = &c->build; 838 839 nir_foreach_shader_out_variable(var, b->shader) { 840 nir_ssa_def *src = nir_load_reg(b, c->output_regs[var->data.location]); 841 if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB && 842 var->data.location == FRAG_RESULT_DEPTH) { 843 /* result.depth has this strange convention of being the .z component of 844 * a vec4 with undefined .xyw components. We resolve it to a scalar, to 845 * match GLSL's gl_FragDepth and the expectations of most backends. 846 */ 847 src = nir_channel(b, src, 2); 848 } 849 if (c->prog->Target == GL_VERTEX_PROGRAM_ARB && 850 (var->data.location == VARYING_SLOT_FOGC || 851 var->data.location == VARYING_SLOT_PSIZ)) { 852 /* result.{fogcoord,psiz} is a single component value */ 853 src = nir_channel(b, src, 0); 854 } 855 unsigned num_components = glsl_get_vector_elements(var->type); 856 nir_store_var(b, var, src, (1 << num_components) - 1); 857 } 858} 859 860static void 861setup_registers_and_variables(struct ptn_compile *c) 862{ 863 nir_builder *b = &c->build; 864 struct nir_shader *shader = b->shader; 865 866 /* Create input variables. */ 867 uint64_t inputs_read = c->prog->info.inputs_read; 868 while (inputs_read) { 869 const int i = u_bit_scan64(&inputs_read); 870 871 if (c->ctx->Const.GLSLFragCoordIsSysVal && 872 shader->info.stage == MESA_SHADER_FRAGMENT && 873 i == VARYING_SLOT_POS) { 874 nir_variable *var = nir_variable_create(shader, nir_var_system_value, glsl_vec4_type(), 875 "frag_coord"); 876 var->data.location = SYSTEM_VALUE_FRAG_COORD; 877 c->input_vars[i] = var; 878 continue; 879 } 880 881 nir_variable *var = 882 nir_variable_create(shader, nir_var_shader_in, glsl_vec4_type(), 883 ralloc_asprintf(shader, "in_%d", i)); 884 var->data.location = i; 885 var->data.index = 0; 886 887 if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { 888 if (i == VARYING_SLOT_FOGC) { 889 /* fogcoord is defined as <f, 0.0, 0.0, 1.0>. Make the actual 890 * input variable a float, and create a local containing the 891 * full vec4 value. 892 */ 893 var->type = glsl_float_type(); 894 895 nir_variable *fullvar = 896 nir_local_variable_create(b->impl, glsl_vec4_type(), 897 "fogcoord_tmp"); 898 899 nir_store_var(b, fullvar, 900 nir_vec4(b, nir_load_var(b, var), 901 nir_imm_float(b, 0.0), 902 nir_imm_float(b, 0.0), 903 nir_imm_float(b, 1.0)), 904 WRITEMASK_XYZW); 905 906 /* We inserted the real input into the list so the driver has real 907 * inputs, but we set c->input_vars[i] to the temporary so we use 908 * the splatted value. 909 */ 910 c->input_vars[i] = fullvar; 911 continue; 912 } 913 } 914 915 c->input_vars[i] = var; 916 } 917 918 /* Create system value variables */ 919 int i; 920 BITSET_FOREACH_SET(i, c->prog->info.system_values_read, SYSTEM_VALUE_MAX) { 921 nir_variable *var = 922 nir_variable_create(shader, nir_var_system_value, glsl_vec4_type(), 923 ralloc_asprintf(shader, "sv_%d", i)); 924 var->data.location = i; 925 var->data.index = 0; 926 927 c->sysval_vars[i] = var; 928 } 929 930 /* Create output registers and variables. */ 931 int max_outputs = util_last_bit64(c->prog->info.outputs_written); 932 c->output_regs = rzalloc_array(c, nir_register *, max_outputs); 933 934 uint64_t outputs_written = c->prog->info.outputs_written; 935 while (outputs_written) { 936 const int i = u_bit_scan64(&outputs_written); 937 938 /* Since we can't load from outputs in the IR, we make temporaries 939 * for the outputs and emit stores to the real outputs at the end of 940 * the shader. 941 */ 942 nir_register *reg = nir_local_reg_create(b->impl); 943 reg->num_components = 4; 944 945 const struct glsl_type *type; 946 if ((c->prog->Target == GL_FRAGMENT_PROGRAM_ARB && i == FRAG_RESULT_DEPTH) || 947 (c->prog->Target == GL_VERTEX_PROGRAM_ARB && i == VARYING_SLOT_FOGC) || 948 (c->prog->Target == GL_VERTEX_PROGRAM_ARB && i == VARYING_SLOT_PSIZ)) 949 type = glsl_float_type(); 950 else 951 type = glsl_vec4_type(); 952 953 nir_variable *var = 954 nir_variable_create(shader, nir_var_shader_out, type, 955 ralloc_asprintf(shader, "out_%d", i)); 956 var->data.location = i; 957 var->data.index = 0; 958 959 c->output_regs[i] = reg; 960 c->output_vars[i] = var; 961 } 962 963 /* Create temporary registers. */ 964 c->temp_regs = rzalloc_array(c, nir_register *, 965 c->prog->arb.NumTemporaries); 966 967 nir_register *reg; 968 for (unsigned i = 0; i < c->prog->arb.NumTemporaries; i++) { 969 reg = nir_local_reg_create(b->impl); 970 if (!reg) { 971 c->error = true; 972 return; 973 } 974 reg->num_components = 4; 975 c->temp_regs[i] = reg; 976 } 977 978 /* Create the address register (for ARB_vertex_program). */ 979 reg = nir_local_reg_create(b->impl); 980 if (!reg) { 981 c->error = true; 982 return; 983 } 984 reg->num_components = 1; 985 c->addr_reg = reg; 986} 987 988struct nir_shader * 989prog_to_nir(const struct gl_context *ctx, const struct gl_program *prog, 990 const nir_shader_compiler_options *options) 991{ 992 struct ptn_compile *c; 993 struct nir_shader *s; 994 gl_shader_stage stage = _mesa_program_enum_to_shader_stage(prog->Target); 995 996 c = rzalloc(NULL, struct ptn_compile); 997 if (!c) 998 return NULL; 999 c->prog = prog; 1000 c->ctx = ctx; 1001 1002 c->build = nir_builder_init_simple_shader(stage, options, NULL); 1003 1004 /* Copy the shader_info from the gl_program */ 1005 c->build.shader->info = prog->info; 1006 1007 s = c->build.shader; 1008 1009 if (prog->Parameters->NumParameters > 0) { 1010 const struct glsl_type *type = 1011 glsl_array_type(glsl_vec4_type(), prog->Parameters->NumParameters, 0); 1012 c->parameters = 1013 nir_variable_create(s, nir_var_uniform, type, 1014 prog->Parameters->Parameters[0].Name); 1015 } 1016 1017 setup_registers_and_variables(c); 1018 if (unlikely(c->error)) 1019 goto fail; 1020 1021 for (unsigned int i = 0; i < prog->arb.NumInstructions; i++) { 1022 ptn_emit_instruction(c, &prog->arb.Instructions[i]); 1023 1024 if (unlikely(c->error)) 1025 break; 1026 } 1027 1028 ptn_add_output_stores(c); 1029 1030 s->info.name = ralloc_asprintf(s, "ARB%d", prog->Id); 1031 s->info.num_textures = util_last_bit(prog->SamplersUsed); 1032 s->info.num_ubos = 0; 1033 s->info.num_abos = 0; 1034 s->info.num_ssbos = 0; 1035 s->info.num_images = 0; 1036 s->info.uses_texture_gather = false; 1037 s->info.clip_distance_array_size = 0; 1038 s->info.cull_distance_array_size = 0; 1039 s->info.separate_shader = false; 1040 s->info.io_lowered = false; 1041 s->info.internal = false; 1042 1043fail: 1044 if (c->error) { 1045 ralloc_free(s); 1046 s = NULL; 1047 } 1048 ralloc_free(c); 1049 return s; 1050} 1051