1/************************************************************************** 2 * 3 * Copyright 2007 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/** 29 * \file ffvertex_prog.c 30 * 31 * Create a vertex program to execute the current fixed function T&L pipeline. 32 * \author Keith Whitwell 33 */ 34 35 36#include "main/errors.h" 37#include "main/glheader.h" 38#include "main/mtypes.h" 39#include "main/macros.h" 40#include "main/enums.h" 41#include "main/context.h" 42#include "main/ffvertex_prog.h" 43#include "program/program.h" 44#include "program/prog_cache.h" 45#include "program/prog_instruction.h" 46#include "program/prog_parameter.h" 47#include "program/prog_print.h" 48#include "program/prog_statevars.h" 49#include "util/bitscan.h" 50 51#include "state_tracker/st_program.h" 52 53/** Max of number of lights and texture coord units */ 54#define NUM_UNITS MAX2(MAX_TEXTURE_COORD_UNITS, MAX_LIGHTS) 55 56struct state_key { 57 GLbitfield varying_vp_inputs; 58 59 unsigned fragprog_inputs_read:12; 60 61 unsigned light_color_material_mask:12; 62 unsigned light_global_enabled:1; 63 unsigned light_local_viewer:1; 64 unsigned light_twoside:1; 65 unsigned material_shininess_is_zero:1; 66 unsigned need_eye_coords:1; 67 unsigned normalize:1; 68 unsigned rescale_normals:1; 69 70 unsigned fog_distance_mode:2; 71 unsigned separate_specular:1; 72 unsigned point_attenuated:1; 73 74 struct { 75 unsigned char light_enabled:1; 76 unsigned char light_eyepos3_is_zero:1; 77 unsigned char light_spotcutoff_is_180:1; 78 unsigned char light_attenuated:1; 79 unsigned char texmat_enabled:1; 80 unsigned char coord_replace:1; 81 unsigned char texgen_enabled:1; 82 unsigned char texgen_mode0:4; 83 unsigned char texgen_mode1:4; 84 unsigned char texgen_mode2:4; 85 unsigned char texgen_mode3:4; 86 } unit[NUM_UNITS]; 87}; 88 89 90#define TXG_NONE 0 91#define TXG_OBJ_LINEAR 1 92#define TXG_EYE_LINEAR 2 93#define TXG_SPHERE_MAP 3 94#define TXG_REFLECTION_MAP 4 95#define TXG_NORMAL_MAP 5 96 97static GLuint translate_texgen( GLboolean enabled, GLenum mode ) 98{ 99 if (!enabled) 100 return TXG_NONE; 101 102 switch (mode) { 103 case GL_OBJECT_LINEAR: return TXG_OBJ_LINEAR; 104 case GL_EYE_LINEAR: return TXG_EYE_LINEAR; 105 case GL_SPHERE_MAP: return TXG_SPHERE_MAP; 106 case GL_REFLECTION_MAP_NV: return TXG_REFLECTION_MAP; 107 case GL_NORMAL_MAP_NV: return TXG_NORMAL_MAP; 108 default: return TXG_NONE; 109 } 110} 111 112#define FDM_EYE_RADIAL 0 113#define FDM_EYE_PLANE 1 114#define FDM_EYE_PLANE_ABS 2 115#define FDM_FROM_ARRAY 3 116 117static GLuint translate_fog_distance_mode(GLenum source, GLenum mode) 118{ 119 if (source == GL_FRAGMENT_DEPTH_EXT) { 120 switch (mode) { 121 case GL_EYE_RADIAL_NV: 122 return FDM_EYE_RADIAL; 123 case GL_EYE_PLANE: 124 return FDM_EYE_PLANE; 125 default: /* shouldn't happen; fall through to a sensible default */ 126 case GL_EYE_PLANE_ABSOLUTE_NV: 127 return FDM_EYE_PLANE_ABS; 128 } 129 } else { 130 return FDM_FROM_ARRAY; 131 } 132} 133 134static GLboolean check_active_shininess( struct gl_context *ctx, 135 const struct state_key *key, 136 GLuint side ) 137{ 138 GLuint attr = MAT_ATTRIB_FRONT_SHININESS + side; 139 140 if ((key->varying_vp_inputs & VERT_BIT_COLOR0) && 141 (key->light_color_material_mask & (1 << attr))) 142 return GL_TRUE; 143 144 if (key->varying_vp_inputs & VERT_BIT_MAT(attr)) 145 return GL_TRUE; 146 147 if (ctx->Light.Material.Attrib[attr][0] != 0.0F) 148 return GL_TRUE; 149 150 return GL_FALSE; 151} 152 153 154static void make_state_key( struct gl_context *ctx, struct state_key *key ) 155{ 156 const struct gl_program *fp = ctx->FragmentProgram._Current; 157 GLbitfield mask; 158 159 memset(key, 0, sizeof(struct state_key)); 160 161 if (_mesa_hw_select_enabled(ctx)) { 162 /* GL_SELECT mode only need position calculation. 163 * glBegin/End use VERT_BIT_SELECT_RESULT_OFFSET for multi name stack in one draw. 164 * glDrawArrays may also be called without user shader, fallback to FF one. 165 */ 166 key->varying_vp_inputs = ctx->VertexProgram._VaryingInputs & 167 (VERT_BIT_POS | VERT_BIT_SELECT_RESULT_OFFSET); 168 return; 169 } 170 171 /* This now relies on texenvprogram.c being active: 172 */ 173 assert(fp); 174 175 key->need_eye_coords = ctx->_NeedEyeCoords; 176 177 key->fragprog_inputs_read = fp->info.inputs_read; 178 key->varying_vp_inputs = ctx->VertexProgram._VaryingInputs; 179 180 if (ctx->RenderMode == GL_FEEDBACK) { 181 /* make sure the vertprog emits color and tex0 */ 182 key->fragprog_inputs_read |= (VARYING_BIT_COL0 | VARYING_BIT_TEX0); 183 } 184 185 if (ctx->Light.Enabled) { 186 key->light_global_enabled = 1; 187 188 if (ctx->Light.Model.LocalViewer) 189 key->light_local_viewer = 1; 190 191 if (ctx->Light.Model.TwoSide) 192 key->light_twoside = 1; 193 194 if (ctx->Light.Model.ColorControl == GL_SEPARATE_SPECULAR_COLOR) 195 key->separate_specular = 1; 196 197 if (ctx->Light.ColorMaterialEnabled) { 198 key->light_color_material_mask = ctx->Light._ColorMaterialBitmask; 199 } 200 201 mask = ctx->Light._EnabledLights; 202 while (mask) { 203 const int i = u_bit_scan(&mask); 204 struct gl_light_uniforms *lu = &ctx->Light.LightSource[i]; 205 206 key->unit[i].light_enabled = 1; 207 208 if (lu->EyePosition[3] == 0.0F) 209 key->unit[i].light_eyepos3_is_zero = 1; 210 211 if (lu->SpotCutoff == 180.0F) 212 key->unit[i].light_spotcutoff_is_180 = 1; 213 214 if (lu->ConstantAttenuation != 1.0F || 215 lu->LinearAttenuation != 0.0F || 216 lu->QuadraticAttenuation != 0.0F) 217 key->unit[i].light_attenuated = 1; 218 } 219 220 if (check_active_shininess(ctx, key, 0)) { 221 key->material_shininess_is_zero = 0; 222 } 223 else if (key->light_twoside && 224 check_active_shininess(ctx, key, 1)) { 225 key->material_shininess_is_zero = 0; 226 } 227 else { 228 key->material_shininess_is_zero = 1; 229 } 230 } 231 232 if (ctx->Transform.Normalize) 233 key->normalize = 1; 234 235 if (ctx->Transform.RescaleNormals) 236 key->rescale_normals = 1; 237 238 /* Only distinguish fog parameters if we actually need */ 239 if (key->fragprog_inputs_read & VARYING_BIT_FOGC) 240 key->fog_distance_mode = 241 translate_fog_distance_mode(ctx->Fog.FogCoordinateSource, 242 ctx->Fog.FogDistanceMode); 243 244 if (ctx->Point._Attenuated) 245 key->point_attenuated = 1; 246 247 mask = ctx->Texture._EnabledCoordUnits | ctx->Texture._TexGenEnabled 248 | ctx->Texture._TexMatEnabled | ctx->Point.CoordReplace; 249 while (mask) { 250 const int i = u_bit_scan(&mask); 251 struct gl_fixedfunc_texture_unit *texUnit = 252 &ctx->Texture.FixedFuncUnit[i]; 253 254 if (ctx->Point.PointSprite) 255 if (ctx->Point.CoordReplace & (1u << i)) 256 key->unit[i].coord_replace = 1; 257 258 if (ctx->Texture._TexMatEnabled & ENABLE_TEXMAT(i)) 259 key->unit[i].texmat_enabled = 1; 260 261 if (texUnit->TexGenEnabled) { 262 key->unit[i].texgen_enabled = 1; 263 264 key->unit[i].texgen_mode0 = 265 translate_texgen( texUnit->TexGenEnabled & (1<<0), 266 texUnit->GenS.Mode ); 267 key->unit[i].texgen_mode1 = 268 translate_texgen( texUnit->TexGenEnabled & (1<<1), 269 texUnit->GenT.Mode ); 270 key->unit[i].texgen_mode2 = 271 translate_texgen( texUnit->TexGenEnabled & (1<<2), 272 texUnit->GenR.Mode ); 273 key->unit[i].texgen_mode3 = 274 translate_texgen( texUnit->TexGenEnabled & (1<<3), 275 texUnit->GenQ.Mode ); 276 } 277 } 278} 279 280 281 282/* Very useful debugging tool - produces annotated listing of 283 * generated program with line/function references for each 284 * instruction back into this file: 285 */ 286#define DISASSEM 0 287 288 289/* Use uregs to represent registers internally, translate to Mesa's 290 * expected formats on emit. 291 * 292 * NOTE: These are passed by value extensively in this file rather 293 * than as usual by pointer reference. If this disturbs you, try 294 * remembering they are just 32bits in size. 295 * 296 * GCC is smart enough to deal with these dword-sized structures in 297 * much the same way as if I had defined them as dwords and was using 298 * macros to access and set the fields. This is much nicer and easier 299 * to evolve. 300 */ 301struct ureg { 302 GLuint file:4; 303 GLint idx:9; /* relative addressing may be negative */ 304 /* sizeof(idx) should == sizeof(prog_src_reg::Index) */ 305 GLuint negate:1; 306 GLuint swz:12; 307 GLuint pad:6; 308}; 309 310 311struct tnl_program { 312 const struct state_key *state; 313 struct gl_program *program; 314 struct gl_program_parameter_list *state_params; 315 GLuint max_inst; /** number of instructions allocated for program */ 316 GLboolean mvp_with_dp4; 317 318 GLuint temp_in_use; 319 GLuint temp_reserved; 320 321 struct ureg eye_position; 322 struct ureg eye_position_z; 323 struct ureg eye_position_normalized; 324 struct ureg transformed_normal; 325 struct ureg identity; 326 327 GLuint materials; 328 GLuint color_materials; 329}; 330 331 332static const struct ureg undef = { 333 PROGRAM_UNDEFINED, 334 0, 335 0, 336 0, 337 0 338}; 339 340/* Local shorthand: 341 */ 342#define X SWIZZLE_X 343#define Y SWIZZLE_Y 344#define Z SWIZZLE_Z 345#define W SWIZZLE_W 346 347 348/* Construct a ureg: 349 */ 350static struct ureg make_ureg(GLuint file, GLint idx) 351{ 352 struct ureg reg; 353 reg.file = file; 354 reg.idx = idx; 355 reg.negate = 0; 356 reg.swz = SWIZZLE_NOOP; 357 reg.pad = 0; 358 return reg; 359} 360 361 362static struct ureg negate( struct ureg reg ) 363{ 364 reg.negate ^= 1; 365 return reg; 366} 367 368 369static struct ureg swizzle( struct ureg reg, int x, int y, int z, int w ) 370{ 371 reg.swz = MAKE_SWIZZLE4(GET_SWZ(reg.swz, x), 372 GET_SWZ(reg.swz, y), 373 GET_SWZ(reg.swz, z), 374 GET_SWZ(reg.swz, w)); 375 return reg; 376} 377 378 379static struct ureg swizzle1( struct ureg reg, int x ) 380{ 381 return swizzle(reg, x, x, x, x); 382} 383 384 385static struct ureg get_temp( struct tnl_program *p ) 386{ 387 int bit = ffs( ~p->temp_in_use ); 388 if (!bit) { 389 _mesa_problem(NULL, "%s: out of temporaries\n", __FILE__); 390 exit(1); 391 } 392 393 if ((GLuint) bit > p->program->arb.NumTemporaries) 394 p->program->arb.NumTemporaries = bit; 395 396 p->temp_in_use |= 1<<(bit-1); 397 return make_ureg(PROGRAM_TEMPORARY, bit-1); 398} 399 400 401static struct ureg reserve_temp( struct tnl_program *p ) 402{ 403 struct ureg temp = get_temp( p ); 404 p->temp_reserved |= 1<<temp.idx; 405 return temp; 406} 407 408 409static void release_temp( struct tnl_program *p, struct ureg reg ) 410{ 411 if (reg.file == PROGRAM_TEMPORARY) { 412 p->temp_in_use &= ~(1<<reg.idx); 413 p->temp_in_use |= p->temp_reserved; /* can't release reserved temps */ 414 } 415} 416 417static void release_temps( struct tnl_program *p ) 418{ 419 p->temp_in_use = p->temp_reserved; 420} 421 422 423static struct ureg register_param4(struct tnl_program *p, 424 GLint s0, 425 GLint s1, 426 GLint s2, 427 GLint s3) 428{ 429 gl_state_index16 tokens[STATE_LENGTH]; 430 GLint idx; 431 tokens[0] = s0; 432 tokens[1] = s1; 433 tokens[2] = s2; 434 tokens[3] = s3; 435 idx = _mesa_add_state_reference(p->state_params, tokens); 436 return make_ureg(PROGRAM_STATE_VAR, idx); 437} 438 439 440#define register_param1(p,s0) register_param4(p,s0,0,0,0) 441#define register_param2(p,s0,s1) register_param4(p,s0,s1,0,0) 442#define register_param3(p,s0,s1,s2) register_param4(p,s0,s1,s2,0) 443 444 445 446/** 447 * \param input one of VERT_ATTRIB_x tokens. 448 */ 449static struct ureg register_input( struct tnl_program *p, GLuint input ) 450{ 451 assert(input < VERT_ATTRIB_MAX); 452 453 if (p->state->varying_vp_inputs & VERT_BIT(input)) { 454 p->program->info.inputs_read |= (uint64_t)VERT_BIT(input); 455 return make_ureg(PROGRAM_INPUT, input); 456 } 457 else { 458 return register_param2(p, STATE_CURRENT_ATTRIB, input); 459 } 460} 461 462 463/** 464 * \param input one of VARYING_SLOT_x tokens. 465 */ 466static struct ureg register_output( struct tnl_program *p, GLuint output ) 467{ 468 p->program->info.outputs_written |= BITFIELD64_BIT(output); 469 return make_ureg(PROGRAM_OUTPUT, output); 470} 471 472 473static struct ureg register_const4f( struct tnl_program *p, 474 GLfloat s0, 475 GLfloat s1, 476 GLfloat s2, 477 GLfloat s3) 478{ 479 gl_constant_value values[4]; 480 GLint idx; 481 GLuint swizzle; 482 values[0].f = s0; 483 values[1].f = s1; 484 values[2].f = s2; 485 values[3].f = s3; 486 idx = _mesa_add_unnamed_constant(p->program->Parameters, values, 4, 487 &swizzle ); 488 assert(swizzle == SWIZZLE_NOOP); 489 return make_ureg(PROGRAM_CONSTANT, idx); 490} 491 492#define register_const1f(p, s0) register_const4f(p, s0, 0, 0, 1) 493#define register_scalar_const(p, s0) register_const4f(p, s0, s0, s0, s0) 494#define register_const2f(p, s0, s1) register_const4f(p, s0, s1, 0, 1) 495#define register_const3f(p, s0, s1, s2) register_const4f(p, s0, s1, s2, 1) 496 497static GLboolean is_undef( struct ureg reg ) 498{ 499 return reg.file == PROGRAM_UNDEFINED; 500} 501 502 503static struct ureg get_identity_param( struct tnl_program *p ) 504{ 505 if (is_undef(p->identity)) 506 p->identity = register_const4f(p, 0,0,0,1); 507 508 return p->identity; 509} 510 511static void register_matrix_param5( struct tnl_program *p, 512 GLint s0, /* modelview, projection, etc */ 513 GLint s1, /* texture matrix number */ 514 GLint s2, /* first row */ 515 GLint s3, /* last row */ 516 struct ureg *matrix ) 517{ 518 GLint i; 519 520 /* This is a bit sad as the support is there to pull the whole 521 * matrix out in one go: 522 */ 523 for (i = 0; i <= s3 - s2; i++) 524 matrix[i] = register_param4(p, s0, s1, i, i); 525} 526 527 528static void emit_arg( struct prog_src_register *src, 529 struct ureg reg ) 530{ 531 src->File = reg.file; 532 src->Index = reg.idx; 533 src->Swizzle = reg.swz; 534 src->Negate = reg.negate ? NEGATE_XYZW : NEGATE_NONE; 535 src->RelAddr = 0; 536 /* Check that bitfield sizes aren't exceeded */ 537 assert(src->Index == reg.idx); 538} 539 540 541static void emit_dst( struct prog_dst_register *dst, 542 struct ureg reg, GLuint mask ) 543{ 544 dst->File = reg.file; 545 dst->Index = reg.idx; 546 /* allow zero as a shorthand for xyzw */ 547 dst->WriteMask = mask ? mask : WRITEMASK_XYZW; 548 /* Check that bitfield sizes aren't exceeded */ 549 assert(dst->Index == reg.idx); 550} 551 552 553static void debug_insn( struct prog_instruction *inst, const char *fn, 554 GLuint line ) 555{ 556 if (DISASSEM) { 557 static const char *last_fn; 558 559 if (fn != last_fn) { 560 last_fn = fn; 561 printf("%s:\n", fn); 562 } 563 564 printf("%d:\t", line); 565 _mesa_print_instruction(inst); 566 } 567} 568 569 570static void emit_op3fn(struct tnl_program *p, 571 enum prog_opcode op, 572 struct ureg dest, 573 GLuint mask, 574 struct ureg src0, 575 struct ureg src1, 576 struct ureg src2, 577 const char *fn, 578 GLuint line) 579{ 580 GLuint nr; 581 struct prog_instruction *inst; 582 583 assert(p->program->arb.NumInstructions <= p->max_inst); 584 585 if (p->program->arb.NumInstructions == p->max_inst) { 586 /* need to extend the program's instruction array */ 587 struct prog_instruction *newInst; 588 589 /* double the size */ 590 p->max_inst *= 2; 591 592 newInst = 593 rzalloc_array(p->program, struct prog_instruction, p->max_inst); 594 if (!newInst) { 595 _mesa_error(NULL, GL_OUT_OF_MEMORY, "vertex program build"); 596 return; 597 } 598 599 _mesa_copy_instructions(newInst, p->program->arb.Instructions, 600 p->program->arb.NumInstructions); 601 602 ralloc_free(p->program->arb.Instructions); 603 604 p->program->arb.Instructions = newInst; 605 } 606 607 nr = p->program->arb.NumInstructions++; 608 609 inst = &p->program->arb.Instructions[nr]; 610 inst->Opcode = (enum prog_opcode) op; 611 612 emit_arg( &inst->SrcReg[0], src0 ); 613 emit_arg( &inst->SrcReg[1], src1 ); 614 emit_arg( &inst->SrcReg[2], src2 ); 615 616 emit_dst( &inst->DstReg, dest, mask ); 617 618 debug_insn(inst, fn, line); 619} 620 621 622#define emit_op3(p, op, dst, mask, src0, src1, src2) \ 623 emit_op3fn(p, op, dst, mask, src0, src1, src2, __func__, __LINE__) 624 625#define emit_op2(p, op, dst, mask, src0, src1) \ 626 emit_op3fn(p, op, dst, mask, src0, src1, undef, __func__, __LINE__) 627 628#define emit_op1(p, op, dst, mask, src0) \ 629 emit_op3fn(p, op, dst, mask, src0, undef, undef, __func__, __LINE__) 630 631 632static struct ureg make_temp( struct tnl_program *p, struct ureg reg ) 633{ 634 if (reg.file == PROGRAM_TEMPORARY && 635 !(p->temp_reserved & (1<<reg.idx))) 636 return reg; 637 else { 638 struct ureg temp = get_temp(p); 639 emit_op1(p, OPCODE_MOV, temp, 0, reg); 640 return temp; 641 } 642} 643 644 645/* Currently no tracking performed of input/output/register size or 646 * active elements. Could be used to reduce these operations, as 647 * could the matrix type. 648 */ 649static void emit_matrix_transform_vec4( struct tnl_program *p, 650 struct ureg dest, 651 const struct ureg *mat, 652 struct ureg src) 653{ 654 emit_op2(p, OPCODE_DP4, dest, WRITEMASK_X, src, mat[0]); 655 emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Y, src, mat[1]); 656 emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Z, src, mat[2]); 657 emit_op2(p, OPCODE_DP4, dest, WRITEMASK_W, src, mat[3]); 658} 659 660 661/* This version is much easier to implement if writemasks are not 662 * supported natively on the target or (like SSE), the target doesn't 663 * have a clean/obvious dotproduct implementation. 664 */ 665static void emit_transpose_matrix_transform_vec4( struct tnl_program *p, 666 struct ureg dest, 667 const struct ureg *mat, 668 struct ureg src) 669{ 670 struct ureg tmp; 671 672 if (dest.file != PROGRAM_TEMPORARY) 673 tmp = get_temp(p); 674 else 675 tmp = dest; 676 677 emit_op2(p, OPCODE_MUL, tmp, 0, swizzle1(src,X), mat[0]); 678 emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Y), mat[1], tmp); 679 emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Z), mat[2], tmp); 680 emit_op3(p, OPCODE_MAD, dest, 0, swizzle1(src,W), mat[3], tmp); 681 682 if (dest.file != PROGRAM_TEMPORARY) 683 release_temp(p, tmp); 684} 685 686 687static void emit_matrix_transform_vec3( struct tnl_program *p, 688 struct ureg dest, 689 const struct ureg *mat, 690 struct ureg src) 691{ 692 emit_op2(p, OPCODE_DP3, dest, WRITEMASK_X, src, mat[0]); 693 emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Y, src, mat[1]); 694 emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Z, src, mat[2]); 695} 696 697 698static void emit_normalize_vec3( struct tnl_program *p, 699 struct ureg dest, 700 struct ureg src ) 701{ 702 struct ureg tmp = get_temp(p); 703 emit_op2(p, OPCODE_DP3, tmp, WRITEMASK_X, src, src); 704 emit_op1(p, OPCODE_RSQ, tmp, WRITEMASK_X, tmp); 705 emit_op2(p, OPCODE_MUL, dest, 0, src, swizzle1(tmp, X)); 706 release_temp(p, tmp); 707} 708 709 710static void emit_passthrough( struct tnl_program *p, 711 GLuint input, 712 GLuint output ) 713{ 714 struct ureg out = register_output(p, output); 715 emit_op1(p, OPCODE_MOV, out, 0, register_input(p, input)); 716} 717 718 719static struct ureg get_eye_position( struct tnl_program *p ) 720{ 721 if (is_undef(p->eye_position)) { 722 struct ureg pos = register_input( p, VERT_ATTRIB_POS ); 723 struct ureg modelview[4]; 724 725 p->eye_position = reserve_temp(p); 726 727 if (p->mvp_with_dp4) { 728 register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3, 729 modelview ); 730 731 emit_matrix_transform_vec4(p, p->eye_position, modelview, pos); 732 } 733 else { 734 register_matrix_param5( p, STATE_MODELVIEW_MATRIX_TRANSPOSE, 0, 0, 3, 735 modelview ); 736 737 emit_transpose_matrix_transform_vec4(p, p->eye_position, modelview, pos); 738 } 739 } 740 741 return p->eye_position; 742} 743 744 745static struct ureg get_eye_position_z( struct tnl_program *p ) 746{ 747 if (!is_undef(p->eye_position)) 748 return swizzle1(p->eye_position, Z); 749 750 if (is_undef(p->eye_position_z)) { 751 struct ureg pos = register_input( p, VERT_ATTRIB_POS ); 752 struct ureg modelview[4]; 753 754 p->eye_position_z = reserve_temp(p); 755 756 register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3, 757 modelview ); 758 759 emit_op2(p, OPCODE_DP4, p->eye_position_z, 0, pos, modelview[2]); 760 } 761 762 return p->eye_position_z; 763} 764 765 766static struct ureg get_eye_position_normalized( struct tnl_program *p ) 767{ 768 if (is_undef(p->eye_position_normalized)) { 769 struct ureg eye = get_eye_position(p); 770 p->eye_position_normalized = reserve_temp(p); 771 emit_normalize_vec3(p, p->eye_position_normalized, eye); 772 } 773 774 return p->eye_position_normalized; 775} 776 777 778static struct ureg get_transformed_normal( struct tnl_program *p ) 779{ 780 if (is_undef(p->transformed_normal) && 781 !p->state->need_eye_coords && 782 !p->state->normalize && 783 !(p->state->need_eye_coords == p->state->rescale_normals)) 784 { 785 p->transformed_normal = register_input(p, VERT_ATTRIB_NORMAL ); 786 } 787 else if (is_undef(p->transformed_normal)) 788 { 789 struct ureg normal = register_input(p, VERT_ATTRIB_NORMAL ); 790 struct ureg mvinv[3]; 791 struct ureg transformed_normal = reserve_temp(p); 792 793 if (p->state->need_eye_coords) { 794 register_matrix_param5( p, STATE_MODELVIEW_MATRIX_INVTRANS, 0, 0, 2, 795 mvinv ); 796 797 /* Transform to eye space: 798 */ 799 emit_matrix_transform_vec3( p, transformed_normal, mvinv, normal ); 800 normal = transformed_normal; 801 } 802 803 /* Normalize/Rescale: 804 */ 805 if (p->state->normalize) { 806 emit_normalize_vec3( p, transformed_normal, normal ); 807 normal = transformed_normal; 808 } 809 else if (p->state->need_eye_coords == p->state->rescale_normals) { 810 /* This is already adjusted for eye/non-eye rendering: 811 */ 812 struct ureg rescale = register_param1(p, STATE_NORMAL_SCALE); 813 814 emit_op2( p, OPCODE_MUL, transformed_normal, 0, normal, rescale ); 815 normal = transformed_normal; 816 } 817 818 assert(normal.file == PROGRAM_TEMPORARY); 819 p->transformed_normal = normal; 820 } 821 822 return p->transformed_normal; 823} 824 825 826static void build_hpos( struct tnl_program *p ) 827{ 828 struct ureg pos = register_input( p, VERT_ATTRIB_POS ); 829 struct ureg hpos = register_output( p, VARYING_SLOT_POS ); 830 struct ureg mvp[4]; 831 832 if (p->mvp_with_dp4) { 833 register_matrix_param5( p, STATE_MVP_MATRIX, 0, 0, 3, 834 mvp ); 835 emit_matrix_transform_vec4( p, hpos, mvp, pos ); 836 } 837 else { 838 register_matrix_param5( p, STATE_MVP_MATRIX_TRANSPOSE, 0, 0, 3, 839 mvp ); 840 emit_transpose_matrix_transform_vec4( p, hpos, mvp, pos ); 841 } 842} 843 844 845static GLuint material_attrib( GLuint side, GLuint property ) 846{ 847 switch (property) { 848 case STATE_AMBIENT: 849 return MAT_ATTRIB_FRONT_AMBIENT + side; 850 case STATE_DIFFUSE: 851 return MAT_ATTRIB_FRONT_DIFFUSE + side; 852 case STATE_SPECULAR: 853 return MAT_ATTRIB_FRONT_SPECULAR + side; 854 case STATE_EMISSION: 855 return MAT_ATTRIB_FRONT_EMISSION + side; 856 case STATE_SHININESS: 857 return MAT_ATTRIB_FRONT_SHININESS + side; 858 default: 859 unreachable("invalid value"); 860 } 861} 862 863 864/** 865 * Get a bitmask of which material values vary on a per-vertex basis. 866 */ 867static void set_material_flags( struct tnl_program *p ) 868{ 869 p->color_materials = 0; 870 p->materials = 0; 871 872 if (p->state->varying_vp_inputs & VERT_BIT_COLOR0) { 873 p->materials = 874 p->color_materials = p->state->light_color_material_mask; 875 } 876 877 p->materials |= ((p->state->varying_vp_inputs & VERT_BIT_MAT_ALL) 878 >> VERT_ATTRIB_MAT(0)); 879} 880 881 882static struct ureg get_material( struct tnl_program *p, GLuint side, 883 GLuint property ) 884{ 885 GLuint attrib = material_attrib(side, property); 886 887 if (p->color_materials & (1<<attrib)) 888 return register_input(p, VERT_ATTRIB_COLOR0); 889 else if (p->materials & (1<<attrib)) { 890 /* Put material values in the GENERIC slots -- they are not used 891 * for anything in fixed function mode. 892 */ 893 return register_input( p, VERT_ATTRIB_MAT(attrib) ); 894 } 895 else 896 return register_param2(p, STATE_MATERIAL, attrib); 897} 898 899#define SCENE_COLOR_BITS(side) (( MAT_BIT_FRONT_EMISSION | \ 900 MAT_BIT_FRONT_AMBIENT | \ 901 MAT_BIT_FRONT_DIFFUSE) << (side)) 902 903 904/** 905 * Either return a precalculated constant value or emit code to 906 * calculate these values dynamically in the case where material calls 907 * are present between begin/end pairs. 908 * 909 * Probably want to shift this to the program compilation phase - if 910 * we always emitted the calculation here, a smart compiler could 911 * detect that it was constant (given a certain set of inputs), and 912 * lift it out of the main loop. That way the programs created here 913 * would be independent of the vertex_buffer details. 914 */ 915static struct ureg get_scenecolor( struct tnl_program *p, GLuint side ) 916{ 917 if (p->materials & SCENE_COLOR_BITS(side)) { 918 struct ureg lm_ambient = register_param1(p, STATE_LIGHTMODEL_AMBIENT); 919 struct ureg material_emission = get_material(p, side, STATE_EMISSION); 920 struct ureg material_ambient = get_material(p, side, STATE_AMBIENT); 921 struct ureg material_diffuse = get_material(p, side, STATE_DIFFUSE); 922 struct ureg tmp = make_temp(p, material_diffuse); 923 emit_op3(p, OPCODE_MAD, tmp, WRITEMASK_XYZ, lm_ambient, 924 material_ambient, material_emission); 925 return tmp; 926 } 927 else 928 return register_param2( p, STATE_LIGHTMODEL_SCENECOLOR, side ); 929} 930 931 932static struct ureg get_lightprod( struct tnl_program *p, GLuint light, 933 GLuint side, GLuint property, bool *is_state_light ) 934{ 935 GLuint attrib = material_attrib(side, property); 936 if (p->materials & (1<<attrib)) { 937 struct ureg light_value = 938 register_param3(p, STATE_LIGHT, light, property); 939 *is_state_light = true; 940 return light_value; 941 } 942 else { 943 *is_state_light = false; 944 return register_param3(p, STATE_LIGHTPROD, light, attrib); 945 } 946} 947 948 949static struct ureg calculate_light_attenuation( struct tnl_program *p, 950 GLuint i, 951 struct ureg VPpli, 952 struct ureg dist ) 953{ 954 struct ureg attenuation = undef; 955 struct ureg att = undef; 956 957 /* Calculate spot attenuation: 958 */ 959 if (!p->state->unit[i].light_spotcutoff_is_180) { 960 struct ureg spot_dir_norm = register_param2(p, STATE_LIGHT_SPOT_DIR_NORMALIZED, i); 961 struct ureg spot = get_temp(p); 962 struct ureg slt = get_temp(p); 963 964 attenuation = register_param3(p, STATE_LIGHT, i, STATE_ATTENUATION); 965 att = get_temp(p); 966 967 emit_op2(p, OPCODE_DP3, spot, 0, negate(VPpli), spot_dir_norm); 968 emit_op2(p, OPCODE_SLT, slt, 0, swizzle1(spot_dir_norm,W), spot); 969 emit_op1(p, OPCODE_ABS, spot, 0, spot); 970 emit_op2(p, OPCODE_POW, spot, 0, spot, swizzle1(attenuation, W)); 971 emit_op2(p, OPCODE_MUL, att, 0, slt, spot); 972 973 release_temp(p, spot); 974 release_temp(p, slt); 975 } 976 977 /* Calculate distance attenuation(See formula (2.4) at glspec 2.1 page 62): 978 * 979 * Skip the calucation when _dist_ is undefined(light_eyepos3_is_zero) 980 */ 981 if (p->state->unit[i].light_attenuated && !is_undef(dist)) { 982 if (is_undef(att)) 983 att = get_temp(p); 984 985 if (is_undef(attenuation)) 986 attenuation = register_param3(p, STATE_LIGHT, i, STATE_ATTENUATION); 987 988 /* 1/d,d,d,1/d */ 989 emit_op1(p, OPCODE_RCP, dist, WRITEMASK_YZ, dist); 990 /* 1,d,d*d,1/d */ 991 emit_op2(p, OPCODE_MUL, dist, WRITEMASK_XZ, dist, swizzle1(dist,Y)); 992 /* 1/dist-atten */ 993 emit_op2(p, OPCODE_DP3, dist, 0, attenuation, dist); 994 995 if (!p->state->unit[i].light_spotcutoff_is_180) { 996 /* dist-atten */ 997 emit_op1(p, OPCODE_RCP, dist, 0, dist); 998 /* spot-atten * dist-atten */ 999 emit_op2(p, OPCODE_MUL, att, 0, dist, att); 1000 } 1001 else { 1002 /* dist-atten */ 1003 emit_op1(p, OPCODE_RCP, att, 0, dist); 1004 } 1005 } 1006 1007 return att; 1008} 1009 1010 1011/** 1012 * Compute: 1013 * lit.y = MAX(0, dots.x) 1014 * lit.z = SLT(0, dots.x) 1015 */ 1016static void emit_degenerate_lit( struct tnl_program *p, 1017 struct ureg lit, 1018 struct ureg dots ) 1019{ 1020 struct ureg id = get_identity_param(p); /* id = {0,0,0,1} */ 1021 1022 /* Note that lit.x & lit.w will not be examined. Note also that 1023 * dots.xyzw == dots.xxxx. 1024 */ 1025 1026 /* MAX lit, id, dots; 1027 */ 1028 emit_op2(p, OPCODE_MAX, lit, WRITEMASK_XYZW, id, dots); 1029 1030 /* result[2] = (in > 0 ? 1 : 0) 1031 * SLT lit.z, id.z, dots; # lit.z = (0 < dots.z) ? 1 : 0 1032 */ 1033 emit_op2(p, OPCODE_SLT, lit, WRITEMASK_Z, swizzle1(id,Z), dots); 1034} 1035 1036 1037/* Need to add some addtional parameters to allow lighting in object 1038 * space - STATE_SPOT_DIRECTION and STATE_HALF_VECTOR implicitly assume eye 1039 * space lighting. 1040 */ 1041static void build_lighting( struct tnl_program *p ) 1042{ 1043 const GLboolean twoside = p->state->light_twoside; 1044 const GLboolean separate = p->state->separate_specular; 1045 GLuint nr_lights = 0, count = 0; 1046 struct ureg normal = get_transformed_normal(p); 1047 struct ureg lit = get_temp(p); 1048 struct ureg dots = get_temp(p); 1049 struct ureg _col0 = undef, _col1 = undef; 1050 struct ureg _bfc0 = undef, _bfc1 = undef; 1051 GLuint i; 1052 1053 /* 1054 * NOTE: 1055 * dots.x = dot(normal, VPpli) 1056 * dots.y = dot(normal, halfAngle) 1057 * dots.z = back.shininess 1058 * dots.w = front.shininess 1059 */ 1060 1061 for (i = 0; i < MAX_LIGHTS; i++) 1062 if (p->state->unit[i].light_enabled) 1063 nr_lights++; 1064 1065 set_material_flags(p); 1066 1067 { 1068 if (!p->state->material_shininess_is_zero) { 1069 struct ureg shininess = get_material(p, 0, STATE_SHININESS); 1070 emit_op1(p, OPCODE_MOV, dots, WRITEMASK_W, swizzle1(shininess,X)); 1071 release_temp(p, shininess); 1072 } 1073 1074 _col0 = make_temp(p, get_scenecolor(p, 0)); 1075 if (separate) 1076 _col1 = make_temp(p, get_identity_param(p)); 1077 else 1078 _col1 = _col0; 1079 } 1080 1081 if (twoside) { 1082 if (!p->state->material_shininess_is_zero) { 1083 /* Note that we negate the back-face specular exponent here. 1084 * The negation will be un-done later in the back-face code below. 1085 */ 1086 struct ureg shininess = get_material(p, 1, STATE_SHININESS); 1087 emit_op1(p, OPCODE_MOV, dots, WRITEMASK_Z, 1088 negate(swizzle1(shininess,X))); 1089 release_temp(p, shininess); 1090 } 1091 1092 _bfc0 = make_temp(p, get_scenecolor(p, 1)); 1093 if (separate) 1094 _bfc1 = make_temp(p, get_identity_param(p)); 1095 else 1096 _bfc1 = _bfc0; 1097 } 1098 1099 /* If no lights, still need to emit the scenecolor. 1100 */ 1101 { 1102 struct ureg res0 = register_output( p, VARYING_SLOT_COL0 ); 1103 emit_op1(p, OPCODE_MOV, res0, 0, _col0); 1104 } 1105 1106 if (separate) { 1107 struct ureg res1 = register_output( p, VARYING_SLOT_COL1 ); 1108 emit_op1(p, OPCODE_MOV, res1, 0, _col1); 1109 } 1110 1111 if (twoside) { 1112 struct ureg res0 = register_output( p, VARYING_SLOT_BFC0 ); 1113 emit_op1(p, OPCODE_MOV, res0, 0, _bfc0); 1114 } 1115 1116 if (twoside && separate) { 1117 struct ureg res1 = register_output( p, VARYING_SLOT_BFC1 ); 1118 emit_op1(p, OPCODE_MOV, res1, 0, _bfc1); 1119 } 1120 1121 if (nr_lights == 0) { 1122 release_temps(p); 1123 return; 1124 } 1125 1126 /* Declare light products first to place them sequentially next to each 1127 * other for optimal constant uploads. 1128 */ 1129 struct ureg lightprod_front[MAX_LIGHTS][3]; 1130 struct ureg lightprod_back[MAX_LIGHTS][3]; 1131 bool lightprod_front_is_state_light[MAX_LIGHTS][3]; 1132 bool lightprod_back_is_state_light[MAX_LIGHTS][3]; 1133 1134 for (i = 0; i < MAX_LIGHTS; i++) { 1135 if (p->state->unit[i].light_enabled) { 1136 lightprod_front[i][0] = get_lightprod(p, i, 0, STATE_AMBIENT, 1137 &lightprod_front_is_state_light[i][0]); 1138 if (twoside) 1139 lightprod_back[i][0] = get_lightprod(p, i, 1, STATE_AMBIENT, 1140 &lightprod_back_is_state_light[i][0]); 1141 1142 lightprod_front[i][1] = get_lightprod(p, i, 0, STATE_DIFFUSE, 1143 &lightprod_front_is_state_light[i][1]); 1144 if (twoside) 1145 lightprod_back[i][1] = get_lightprod(p, i, 1, STATE_DIFFUSE, 1146 &lightprod_back_is_state_light[i][1]); 1147 1148 lightprod_front[i][2] = get_lightprod(p, i, 0, STATE_SPECULAR, 1149 &lightprod_front_is_state_light[i][2]); 1150 if (twoside) 1151 lightprod_back[i][2] = get_lightprod(p, i, 1, STATE_SPECULAR, 1152 &lightprod_back_is_state_light[i][2]); 1153 } 1154 } 1155 1156 /* Add more variables now that we'll use later, so that they are nicely 1157 * sorted in the parameter list. 1158 */ 1159 for (i = 0; i < MAX_LIGHTS; i++) { 1160 if (p->state->unit[i].light_enabled) { 1161 if (p->state->unit[i].light_eyepos3_is_zero) 1162 register_param2(p, STATE_LIGHT_POSITION_NORMALIZED, i); 1163 else 1164 register_param2(p, STATE_LIGHT_POSITION, i); 1165 } 1166 } 1167 for (i = 0; i < MAX_LIGHTS; i++) { 1168 if (p->state->unit[i].light_enabled && 1169 (!p->state->unit[i].light_spotcutoff_is_180 || 1170 (p->state->unit[i].light_attenuated && 1171 !p->state->unit[i].light_eyepos3_is_zero))) 1172 register_param3(p, STATE_LIGHT, i, STATE_ATTENUATION); 1173 } 1174 1175 for (i = 0; i < MAX_LIGHTS; i++) { 1176 if (p->state->unit[i].light_enabled) { 1177 struct ureg half = undef; 1178 struct ureg att = undef, VPpli = undef; 1179 struct ureg dist = undef; 1180 1181 count++; 1182 if (p->state->unit[i].light_eyepos3_is_zero) { 1183 VPpli = register_param2(p, STATE_LIGHT_POSITION_NORMALIZED, i); 1184 } else { 1185 struct ureg Ppli = register_param2(p, STATE_LIGHT_POSITION, i); 1186 struct ureg V = get_eye_position(p); 1187 1188 VPpli = get_temp(p); 1189 dist = get_temp(p); 1190 1191 /* Calculate VPpli vector 1192 */ 1193 emit_op2(p, OPCODE_SUB, VPpli, 0, Ppli, V); 1194 1195 /* Normalize VPpli. The dist value also used in 1196 * attenuation below. 1197 */ 1198 emit_op2(p, OPCODE_DP3, dist, 0, VPpli, VPpli); 1199 emit_op1(p, OPCODE_RSQ, dist, 0, dist); 1200 emit_op2(p, OPCODE_MUL, VPpli, 0, VPpli, dist); 1201 } 1202 1203 /* Calculate attenuation: 1204 */ 1205 att = calculate_light_attenuation(p, i, VPpli, dist); 1206 release_temp(p, dist); 1207 1208 /* Calculate viewer direction, or use infinite viewer: 1209 */ 1210 if (!p->state->material_shininess_is_zero) { 1211 if (p->state->light_local_viewer) { 1212 struct ureg eye_hat = get_eye_position_normalized(p); 1213 half = get_temp(p); 1214 emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat); 1215 emit_normalize_vec3(p, half, half); 1216 } else if (p->state->unit[i].light_eyepos3_is_zero) { 1217 half = register_param2(p, STATE_LIGHT_HALF_VECTOR, i); 1218 } else { 1219 struct ureg z_dir = swizzle(get_identity_param(p),X,Y,W,Z); 1220 half = get_temp(p); 1221 emit_op2(p, OPCODE_ADD, half, 0, VPpli, z_dir); 1222 emit_normalize_vec3(p, half, half); 1223 } 1224 } 1225 1226 /* Calculate dot products: 1227 */ 1228 if (p->state->material_shininess_is_zero) { 1229 emit_op2(p, OPCODE_DP3, dots, 0, normal, VPpli); 1230 } 1231 else { 1232 emit_op2(p, OPCODE_DP3, dots, WRITEMASK_X, normal, VPpli); 1233 emit_op2(p, OPCODE_DP3, dots, WRITEMASK_Y, normal, half); 1234 } 1235 1236 /* Front face lighting: 1237 */ 1238 { 1239 /* Transform STATE_LIGHT into STATE_LIGHTPROD if needed. This isn't done in 1240 * get_lightprod to avoid using too many temps. 1241 */ 1242 for (int j = 0; j < 3; j++) { 1243 if (lightprod_front_is_state_light[i][j]) { 1244 struct ureg material_value = get_material(p, 0, STATE_AMBIENT + j); 1245 struct ureg tmp = get_temp(p); 1246 emit_op2(p, OPCODE_MUL, tmp, 0, lightprod_front[i][j], material_value); 1247 lightprod_front[i][j] = tmp; 1248 } 1249 } 1250 1251 struct ureg ambient = lightprod_front[i][0]; 1252 struct ureg diffuse = lightprod_front[i][1]; 1253 struct ureg specular = lightprod_front[i][2]; 1254 struct ureg res0, res1; 1255 GLuint mask0, mask1; 1256 1257 if (count == nr_lights) { 1258 if (separate) { 1259 mask0 = WRITEMASK_XYZ; 1260 mask1 = WRITEMASK_XYZ; 1261 res0 = register_output( p, VARYING_SLOT_COL0 ); 1262 res1 = register_output( p, VARYING_SLOT_COL1 ); 1263 } 1264 else { 1265 mask0 = 0; 1266 mask1 = WRITEMASK_XYZ; 1267 res0 = _col0; 1268 res1 = register_output( p, VARYING_SLOT_COL0 ); 1269 } 1270 } 1271 else { 1272 mask0 = 0; 1273 mask1 = 0; 1274 res0 = _col0; 1275 res1 = _col1; 1276 } 1277 1278 if (!is_undef(att)) { 1279 /* light is attenuated by distance */ 1280 emit_op1(p, OPCODE_LIT, lit, 0, dots); 1281 emit_op2(p, OPCODE_MUL, lit, 0, lit, att); 1282 emit_op3(p, OPCODE_MAD, _col0, 0, swizzle1(lit,X), ambient, _col0); 1283 } 1284 else if (!p->state->material_shininess_is_zero) { 1285 /* there's a non-zero specular term */ 1286 emit_op1(p, OPCODE_LIT, lit, 0, dots); 1287 emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0); 1288 } 1289 else { 1290 /* no attenutation, no specular */ 1291 emit_degenerate_lit(p, lit, dots); 1292 emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0); 1293 } 1294 1295 emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _col0); 1296 emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _col1); 1297 1298 release_temp(p, ambient); 1299 release_temp(p, diffuse); 1300 release_temp(p, specular); 1301 } 1302 1303 /* Back face lighting: 1304 */ 1305 if (twoside) { 1306 /* Transform STATE_LIGHT into STATE_LIGHTPROD if needed. This isn't done in 1307 * get_lightprod to avoid using too many temps. 1308 */ 1309 for (int j = 0; j < 3; j++) { 1310 if (lightprod_back_is_state_light[i][j]) { 1311 struct ureg material_value = get_material(p, 1, STATE_AMBIENT + j); 1312 struct ureg tmp = get_temp(p); 1313 emit_op2(p, OPCODE_MUL, tmp, 1, lightprod_back[i][j], material_value); 1314 lightprod_back[i][j] = tmp; 1315 } 1316 } 1317 1318 struct ureg ambient = lightprod_back[i][0]; 1319 struct ureg diffuse = lightprod_back[i][1]; 1320 struct ureg specular = lightprod_back[i][2]; 1321 struct ureg res0, res1; 1322 GLuint mask0, mask1; 1323 1324 if (count == nr_lights) { 1325 if (separate) { 1326 mask0 = WRITEMASK_XYZ; 1327 mask1 = WRITEMASK_XYZ; 1328 res0 = register_output( p, VARYING_SLOT_BFC0 ); 1329 res1 = register_output( p, VARYING_SLOT_BFC1 ); 1330 } 1331 else { 1332 mask0 = 0; 1333 mask1 = WRITEMASK_XYZ; 1334 res0 = _bfc0; 1335 res1 = register_output( p, VARYING_SLOT_BFC0 ); 1336 } 1337 } 1338 else { 1339 res0 = _bfc0; 1340 res1 = _bfc1; 1341 mask0 = 0; 1342 mask1 = 0; 1343 } 1344 1345 /* For the back face we need to negate the X and Y component 1346 * dot products. dots.Z has the negated back-face specular 1347 * exponent. We swizzle that into the W position. This 1348 * negation makes the back-face specular term positive again. 1349 */ 1350 dots = negate(swizzle(dots,X,Y,W,Z)); 1351 1352 if (!is_undef(att)) { 1353 emit_op1(p, OPCODE_LIT, lit, 0, dots); 1354 emit_op2(p, OPCODE_MUL, lit, 0, lit, att); 1355 emit_op3(p, OPCODE_MAD, _bfc0, 0, swizzle1(lit,X), ambient, _bfc0); 1356 } 1357 else if (!p->state->material_shininess_is_zero) { 1358 emit_op1(p, OPCODE_LIT, lit, 0, dots); 1359 emit_op2(p, OPCODE_ADD, _bfc0, 0, ambient, _bfc0); /**/ 1360 } 1361 else { 1362 emit_degenerate_lit(p, lit, dots); 1363 emit_op2(p, OPCODE_ADD, _bfc0, 0, ambient, _bfc0); 1364 } 1365 1366 emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _bfc0); 1367 emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _bfc1); 1368 /* restore dots to its original state for subsequent lights 1369 * by negating and swizzling again. 1370 */ 1371 dots = negate(swizzle(dots,X,Y,W,Z)); 1372 1373 release_temp(p, ambient); 1374 release_temp(p, diffuse); 1375 release_temp(p, specular); 1376 } 1377 1378 release_temp(p, half); 1379 release_temp(p, VPpli); 1380 release_temp(p, att); 1381 } 1382 } 1383 1384 release_temps( p ); 1385} 1386 1387 1388static void build_fog( struct tnl_program *p ) 1389{ 1390 struct ureg fog = register_output(p, VARYING_SLOT_FOGC); 1391 struct ureg input; 1392 1393 switch (p->state->fog_distance_mode) { 1394 case FDM_EYE_RADIAL: { /* Z = sqrt(Xe*Xe + Ye*Ye + Ze*Ze) */ 1395 struct ureg tmp = get_temp(p); 1396 input = get_eye_position(p); 1397 emit_op2(p, OPCODE_DP3, tmp, WRITEMASK_X, input, input); 1398 emit_op1(p, OPCODE_RSQ, tmp, WRITEMASK_X, tmp); 1399 emit_op1(p, OPCODE_RCP, fog, WRITEMASK_X, tmp); 1400 break; 1401 } 1402 case FDM_EYE_PLANE: /* Z = Ze */ 1403 input = get_eye_position_z(p); 1404 emit_op1(p, OPCODE_MOV, fog, WRITEMASK_X, input); 1405 break; 1406 case FDM_EYE_PLANE_ABS: /* Z = abs(Ze) */ 1407 input = get_eye_position_z(p); 1408 emit_op1(p, OPCODE_ABS, fog, WRITEMASK_X, input); 1409 break; 1410 case FDM_FROM_ARRAY: 1411 input = swizzle1(register_input(p, VERT_ATTRIB_FOG), X); 1412 emit_op1(p, OPCODE_ABS, fog, WRITEMASK_X, input); 1413 break; 1414 default: 1415 assert(!"Bad fog mode in build_fog()"); 1416 break; 1417 } 1418 1419 emit_op1(p, OPCODE_MOV, fog, WRITEMASK_YZW, get_identity_param(p)); 1420} 1421 1422 1423static void build_reflect_texgen( struct tnl_program *p, 1424 struct ureg dest, 1425 GLuint writemask ) 1426{ 1427 struct ureg normal = get_transformed_normal(p); 1428 struct ureg eye_hat = get_eye_position_normalized(p); 1429 struct ureg tmp = get_temp(p); 1430 1431 /* n.u */ 1432 emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat); 1433 /* 2n.u */ 1434 emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp); 1435 /* (-2n.u)n + u */ 1436 emit_op3(p, OPCODE_MAD, dest, writemask, negate(tmp), normal, eye_hat); 1437 1438 release_temp(p, tmp); 1439} 1440 1441 1442static void build_sphere_texgen( struct tnl_program *p, 1443 struct ureg dest, 1444 GLuint writemask ) 1445{ 1446 struct ureg normal = get_transformed_normal(p); 1447 struct ureg eye_hat = get_eye_position_normalized(p); 1448 struct ureg tmp = get_temp(p); 1449 struct ureg half = register_scalar_const(p, .5); 1450 struct ureg r = get_temp(p); 1451 struct ureg inv_m = get_temp(p); 1452 struct ureg id = get_identity_param(p); 1453 1454 /* Could share the above calculations, but it would be 1455 * a fairly odd state for someone to set (both sphere and 1456 * reflection active for different texture coordinate 1457 * components. Of course - if two texture units enable 1458 * reflect and/or sphere, things start to tilt in favour 1459 * of seperating this out: 1460 */ 1461 1462 /* n.u */ 1463 emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat); 1464 /* 2n.u */ 1465 emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp); 1466 /* (-2n.u)n + u */ 1467 emit_op3(p, OPCODE_MAD, r, 0, negate(tmp), normal, eye_hat); 1468 /* r + 0,0,1 */ 1469 emit_op2(p, OPCODE_ADD, tmp, 0, r, swizzle(id,X,Y,W,Z)); 1470 /* rx^2 + ry^2 + (rz+1)^2 */ 1471 emit_op2(p, OPCODE_DP3, tmp, 0, tmp, tmp); 1472 /* 2/m */ 1473 emit_op1(p, OPCODE_RSQ, tmp, 0, tmp); 1474 /* 1/m */ 1475 emit_op2(p, OPCODE_MUL, inv_m, 0, tmp, half); 1476 /* r/m + 1/2 */ 1477 emit_op3(p, OPCODE_MAD, dest, writemask, r, inv_m, half); 1478 1479 release_temp(p, tmp); 1480 release_temp(p, r); 1481 release_temp(p, inv_m); 1482} 1483 1484 1485static void build_texture_transform( struct tnl_program *p ) 1486{ 1487 GLuint i, j; 1488 1489 for (i = 0; i < MAX_TEXTURE_COORD_UNITS; i++) { 1490 1491 if (!(p->state->fragprog_inputs_read & VARYING_BIT_TEX(i))) 1492 continue; 1493 1494 if (p->state->unit[i].coord_replace) 1495 continue; 1496 1497 if (p->state->unit[i].texgen_enabled || 1498 p->state->unit[i].texmat_enabled) { 1499 1500 GLuint texmat_enabled = p->state->unit[i].texmat_enabled; 1501 struct ureg out = register_output(p, VARYING_SLOT_TEX0 + i); 1502 struct ureg out_texgen = undef; 1503 1504 if (p->state->unit[i].texgen_enabled) { 1505 GLuint copy_mask = 0; 1506 GLuint sphere_mask = 0; 1507 GLuint reflect_mask = 0; 1508 GLuint normal_mask = 0; 1509 GLuint modes[4]; 1510 1511 if (texmat_enabled) 1512 out_texgen = get_temp(p); 1513 else 1514 out_texgen = out; 1515 1516 modes[0] = p->state->unit[i].texgen_mode0; 1517 modes[1] = p->state->unit[i].texgen_mode1; 1518 modes[2] = p->state->unit[i].texgen_mode2; 1519 modes[3] = p->state->unit[i].texgen_mode3; 1520 1521 for (j = 0; j < 4; j++) { 1522 switch (modes[j]) { 1523 case TXG_OBJ_LINEAR: { 1524 struct ureg obj = register_input(p, VERT_ATTRIB_POS); 1525 struct ureg plane = 1526 register_param3(p, STATE_TEXGEN, i, 1527 STATE_TEXGEN_OBJECT_S + j); 1528 1529 emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j, 1530 obj, plane ); 1531 break; 1532 } 1533 case TXG_EYE_LINEAR: { 1534 struct ureg eye = get_eye_position(p); 1535 struct ureg plane = 1536 register_param3(p, STATE_TEXGEN, i, 1537 STATE_TEXGEN_EYE_S + j); 1538 1539 emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j, 1540 eye, plane ); 1541 break; 1542 } 1543 case TXG_SPHERE_MAP: 1544 sphere_mask |= WRITEMASK_X << j; 1545 break; 1546 case TXG_REFLECTION_MAP: 1547 reflect_mask |= WRITEMASK_X << j; 1548 break; 1549 case TXG_NORMAL_MAP: 1550 normal_mask |= WRITEMASK_X << j; 1551 break; 1552 case TXG_NONE: 1553 copy_mask |= WRITEMASK_X << j; 1554 } 1555 } 1556 1557 if (sphere_mask) { 1558 build_sphere_texgen(p, out_texgen, sphere_mask); 1559 } 1560 1561 if (reflect_mask) { 1562 build_reflect_texgen(p, out_texgen, reflect_mask); 1563 } 1564 1565 if (normal_mask) { 1566 struct ureg normal = get_transformed_normal(p); 1567 emit_op1(p, OPCODE_MOV, out_texgen, normal_mask, normal ); 1568 } 1569 1570 if (copy_mask) { 1571 struct ureg in = register_input(p, VERT_ATTRIB_TEX0+i); 1572 emit_op1(p, OPCODE_MOV, out_texgen, copy_mask, in ); 1573 } 1574 } 1575 1576 if (texmat_enabled) { 1577 struct ureg texmat[4]; 1578 struct ureg in = (!is_undef(out_texgen) ? 1579 out_texgen : 1580 register_input(p, VERT_ATTRIB_TEX0+i)); 1581 if (p->mvp_with_dp4) { 1582 register_matrix_param5( p, STATE_TEXTURE_MATRIX, i, 0, 3, 1583 texmat ); 1584 emit_matrix_transform_vec4( p, out, texmat, in ); 1585 } 1586 else { 1587 register_matrix_param5( p, STATE_TEXTURE_MATRIX_TRANSPOSE, i, 0, 3, 1588 texmat ); 1589 emit_transpose_matrix_transform_vec4( p, out, texmat, in ); 1590 } 1591 } 1592 1593 release_temps(p); 1594 } 1595 else { 1596 emit_passthrough(p, VERT_ATTRIB_TEX0+i, VARYING_SLOT_TEX0+i); 1597 } 1598 } 1599} 1600 1601 1602/** 1603 * Point size attenuation computation. 1604 */ 1605static void build_atten_pointsize( struct tnl_program *p ) 1606{ 1607 struct ureg eye = get_eye_position_z(p); 1608 struct ureg state_size = register_param1(p, STATE_POINT_SIZE_CLAMPED); 1609 struct ureg state_attenuation = register_param1(p, STATE_POINT_ATTENUATION); 1610 struct ureg out = register_output(p, VARYING_SLOT_PSIZ); 1611 struct ureg ut = get_temp(p); 1612 1613 /* dist = |eyez| */ 1614 emit_op1(p, OPCODE_ABS, ut, WRITEMASK_Y, swizzle1(eye, Z)); 1615 /* p1 + dist * (p2 + dist * p3); */ 1616 emit_op3(p, OPCODE_MAD, ut, WRITEMASK_X, swizzle1(ut, Y), 1617 swizzle1(state_attenuation, Z), swizzle1(state_attenuation, Y)); 1618 emit_op3(p, OPCODE_MAD, ut, WRITEMASK_X, swizzle1(ut, Y), 1619 ut, swizzle1(state_attenuation, X)); 1620 1621 /* 1 / sqrt(factor) */ 1622 emit_op1(p, OPCODE_RSQ, ut, WRITEMASK_X, ut ); 1623 1624#if 0 1625 /* out = pointSize / sqrt(factor) */ 1626 emit_op2(p, OPCODE_MUL, out, WRITEMASK_X, ut, state_size); 1627#else 1628 /* this is a good place to clamp the point size since there's likely 1629 * no hardware registers to clamp point size at rasterization time. 1630 */ 1631 emit_op2(p, OPCODE_MUL, ut, WRITEMASK_X, ut, state_size); 1632 emit_op2(p, OPCODE_MAX, ut, WRITEMASK_X, ut, swizzle1(state_size, Y)); 1633 emit_op2(p, OPCODE_MIN, out, WRITEMASK_X, ut, swizzle1(state_size, Z)); 1634#endif 1635 1636 release_temp(p, ut); 1637} 1638 1639 1640/** 1641 * Pass-though per-vertex point size, from user's point size array. 1642 */ 1643static void build_array_pointsize( struct tnl_program *p ) 1644{ 1645 struct ureg in = register_input(p, VERT_ATTRIB_POINT_SIZE); 1646 struct ureg out = register_output(p, VARYING_SLOT_PSIZ); 1647 emit_op1(p, OPCODE_MOV, out, WRITEMASK_X, in); 1648} 1649 1650 1651static void build_tnl_program( struct tnl_program *p ) 1652{ 1653 /* Emit the program, starting with the modelview, projection transforms: 1654 */ 1655 build_hpos(p); 1656 1657 /* Lighting calculations: 1658 */ 1659 if (p->state->fragprog_inputs_read & (VARYING_BIT_COL0|VARYING_BIT_COL1)) { 1660 if (p->state->light_global_enabled) 1661 build_lighting(p); 1662 else { 1663 if (p->state->fragprog_inputs_read & VARYING_BIT_COL0) 1664 emit_passthrough(p, VERT_ATTRIB_COLOR0, VARYING_SLOT_COL0); 1665 1666 if (p->state->fragprog_inputs_read & VARYING_BIT_COL1) 1667 emit_passthrough(p, VERT_ATTRIB_COLOR1, VARYING_SLOT_COL1); 1668 } 1669 } 1670 1671 if (p->state->fragprog_inputs_read & VARYING_BIT_FOGC) 1672 build_fog(p); 1673 1674 if (p->state->fragprog_inputs_read & VARYING_BITS_TEX_ANY) 1675 build_texture_transform(p); 1676 1677 if (p->state->point_attenuated) 1678 build_atten_pointsize(p); 1679 else if (p->state->varying_vp_inputs & VERT_BIT_POINT_SIZE) 1680 build_array_pointsize(p); 1681 1682 if (p->state->varying_vp_inputs & VERT_BIT_SELECT_RESULT_OFFSET) 1683 emit_passthrough(p, VERT_ATTRIB_SELECT_RESULT_OFFSET, VARYING_SLOT_VAR0); 1684 1685 /* Finish up: 1686 */ 1687 emit_op1(p, OPCODE_END, undef, 0, undef); 1688 1689 /* Disassemble: 1690 */ 1691 if (DISASSEM) { 1692 printf ("\n"); 1693 } 1694} 1695 1696 1697static void 1698create_new_program( const struct state_key *key, 1699 struct gl_program *program, 1700 GLboolean mvp_with_dp4, 1701 GLuint max_temps) 1702{ 1703 struct tnl_program p; 1704 1705 memset(&p, 0, sizeof(p)); 1706 p.state = key; 1707 p.program = program; 1708 p.eye_position = undef; 1709 p.eye_position_z = undef; 1710 p.eye_position_normalized = undef; 1711 p.transformed_normal = undef; 1712 p.identity = undef; 1713 p.temp_in_use = 0; 1714 p.mvp_with_dp4 = mvp_with_dp4; 1715 1716 if (max_temps >= sizeof(int) * 8) 1717 p.temp_reserved = 0; 1718 else 1719 p.temp_reserved = ~((1<<max_temps)-1); 1720 1721 /* Start by allocating 32 instructions. 1722 * If we need more, we'll grow the instruction array as needed. 1723 */ 1724 p.max_inst = 32; 1725 p.program->arb.Instructions = 1726 rzalloc_array(program, struct prog_instruction, p.max_inst); 1727 p.program->String = NULL; 1728 p.program->arb.NumInstructions = 1729 p.program->arb.NumTemporaries = 1730 p.program->arb.NumParameters = 1731 p.program->arb.NumAttributes = p.program->arb.NumAddressRegs = 0; 1732 p.program->Parameters = _mesa_new_parameter_list(); 1733 p.program->info.inputs_read = 0; 1734 p.program->info.outputs_written = 0; 1735 p.state_params = _mesa_new_parameter_list(); 1736 1737 build_tnl_program( &p ); 1738 1739 _mesa_add_separate_state_parameters(p.program, p.state_params); 1740 _mesa_free_parameter_list(p.state_params); 1741} 1742 1743 1744/** 1745 * Return a vertex program which implements the current fixed-function 1746 * transform/lighting/texgen operations. 1747 */ 1748struct gl_program * 1749_mesa_get_fixed_func_vertex_program(struct gl_context *ctx) 1750{ 1751 struct gl_program *prog; 1752 struct state_key key; 1753 1754 /* We only update ctx->VertexProgram._VaryingInputs when in VP_MODE_FF _VPMode */ 1755 assert(VP_MODE_FF == ctx->VertexProgram._VPMode); 1756 1757 /* Grab all the relevant state and put it in a single structure: 1758 */ 1759 make_state_key(ctx, &key); 1760 1761 /* Look for an already-prepared program for this state: 1762 */ 1763 prog = _mesa_search_program_cache(ctx->VertexProgram.Cache, &key, 1764 sizeof(key)); 1765 1766 if (!prog) { 1767 /* OK, we'll have to build a new one */ 1768 if (0) 1769 printf("Build new TNL program\n"); 1770 1771 prog = ctx->Driver.NewProgram(ctx, MESA_SHADER_VERTEX, 0, true); 1772 if (!prog) 1773 return NULL; 1774 1775 create_new_program( &key, prog, 1776 ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS, 1777 ctx->Const.Program[MESA_SHADER_VERTEX].MaxTemps ); 1778 1779 st_program_string_notify(ctx, GL_VERTEX_PROGRAM_ARB, prog); 1780 1781 _mesa_program_cache_insert(ctx, ctx->VertexProgram.Cache, &key, 1782 sizeof(key), prog); 1783 } 1784 1785 return prog; 1786} 1787