1/* 2 * Copyright © 2006 - 2017 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "brw_compiler.h" 25#include "brw_eu.h" 26#include "brw_prim.h" 27 28#include "dev/intel_debug.h" 29 30struct brw_sf_compile { 31 struct brw_codegen func; 32 struct brw_sf_prog_key key; 33 struct brw_sf_prog_data prog_data; 34 35 struct brw_reg pv; 36 struct brw_reg det; 37 struct brw_reg dx0; 38 struct brw_reg dx2; 39 struct brw_reg dy0; 40 struct brw_reg dy2; 41 42 /* z and 1/w passed in separately: 43 */ 44 struct brw_reg z[3]; 45 struct brw_reg inv_w[3]; 46 47 /* The vertices: 48 */ 49 struct brw_reg vert[3]; 50 51 /* Temporaries, allocated after last vertex reg. 52 */ 53 struct brw_reg inv_det; 54 struct brw_reg a1_sub_a0; 55 struct brw_reg a2_sub_a0; 56 struct brw_reg tmp; 57 58 struct brw_reg m1Cx; 59 struct brw_reg m2Cy; 60 struct brw_reg m3C0; 61 62 GLuint nr_verts; 63 GLuint nr_attr_regs; 64 GLuint nr_setup_regs; 65 int urb_entry_read_offset; 66 67 /** The last known value of the f0.0 flag register. */ 68 unsigned flag_value; 69 70 struct brw_vue_map vue_map; 71}; 72 73/** 74 * Determine the vue slot corresponding to the given half of the given register. 75 */ 76static inline int vert_reg_to_vue_slot(struct brw_sf_compile *c, GLuint reg, 77 int half) 78{ 79 return (reg + c->urb_entry_read_offset) * 2 + half; 80} 81 82/** 83 * Determine the varying corresponding to the given half of the given 84 * register. half=0 means the first half of a register, half=1 means the 85 * second half. 86 */ 87static inline int vert_reg_to_varying(struct brw_sf_compile *c, GLuint reg, 88 int half) 89{ 90 int vue_slot = vert_reg_to_vue_slot(c, reg, half); 91 return c->vue_map.slot_to_varying[vue_slot]; 92} 93 94/** 95 * Determine the register corresponding to the given vue slot 96 */ 97static struct brw_reg get_vue_slot(struct brw_sf_compile *c, 98 struct brw_reg vert, 99 int vue_slot) 100{ 101 GLuint off = vue_slot / 2 - c->urb_entry_read_offset; 102 GLuint sub = vue_slot % 2; 103 104 return brw_vec4_grf(vert.nr + off, sub * 4); 105} 106 107/** 108 * Determine the register corresponding to the given varying. 109 */ 110static struct brw_reg get_varying(struct brw_sf_compile *c, 111 struct brw_reg vert, 112 GLuint varying) 113{ 114 int vue_slot = c->vue_map.varying_to_slot[varying]; 115 assert (vue_slot >= c->urb_entry_read_offset); 116 return get_vue_slot(c, vert, vue_slot); 117} 118 119static bool 120have_attr(struct brw_sf_compile *c, GLuint attr) 121{ 122 return (c->key.attrs & BITFIELD64_BIT(attr)) ? 1 : 0; 123} 124 125/*********************************************************************** 126 * Twoside lighting 127 */ 128static void copy_bfc( struct brw_sf_compile *c, 129 struct brw_reg vert ) 130{ 131 struct brw_codegen *p = &c->func; 132 GLuint i; 133 134 for (i = 0; i < 2; i++) { 135 if (have_attr(c, VARYING_SLOT_COL0+i) && 136 have_attr(c, VARYING_SLOT_BFC0+i)) 137 brw_MOV(p, 138 get_varying(c, vert, VARYING_SLOT_COL0+i), 139 get_varying(c, vert, VARYING_SLOT_BFC0+i)); 140 } 141} 142 143 144static void do_twoside_color( struct brw_sf_compile *c ) 145{ 146 struct brw_codegen *p = &c->func; 147 GLuint backface_conditional = c->key.frontface_ccw ? BRW_CONDITIONAL_G : BRW_CONDITIONAL_L; 148 149 /* Already done in clip program: 150 */ 151 if (c->key.primitive == BRW_SF_PRIM_UNFILLED_TRIS) 152 return; 153 154 /* If the vertex shader provides backface color, do the selection. The VS 155 * promises to set up the front color if the backface color is provided, but 156 * it may contain junk if never written to. 157 */ 158 if (!(have_attr(c, VARYING_SLOT_COL0) && have_attr(c, VARYING_SLOT_BFC0)) && 159 !(have_attr(c, VARYING_SLOT_COL1) && have_attr(c, VARYING_SLOT_BFC1))) 160 return; 161 162 /* Need to use BRW_EXECUTE_4 and also do an 4-wide compare in order 163 * to get all channels active inside the IF. In the clipping code 164 * we run with NoMask, so it's not an option and we can use 165 * BRW_EXECUTE_1 for all comparisons. 166 */ 167 brw_CMP(p, vec4(brw_null_reg()), backface_conditional, c->det, brw_imm_f(0)); 168 brw_IF(p, BRW_EXECUTE_4); 169 { 170 switch (c->nr_verts) { 171 case 3: copy_bfc(c, c->vert[2]); FALLTHROUGH; 172 case 2: copy_bfc(c, c->vert[1]); FALLTHROUGH; 173 case 1: copy_bfc(c, c->vert[0]); 174 } 175 } 176 brw_ENDIF(p); 177} 178 179 180 181/*********************************************************************** 182 * Flat shading 183 */ 184 185static void copy_flatshaded_attributes(struct brw_sf_compile *c, 186 struct brw_reg dst, 187 struct brw_reg src) 188{ 189 struct brw_codegen *p = &c->func; 190 int i; 191 192 for (i = 0; i < c->vue_map.num_slots; i++) { 193 if (c->key.interp_mode[i] == INTERP_MODE_FLAT) { 194 brw_MOV(p, 195 get_vue_slot(c, dst, i), 196 get_vue_slot(c, src, i)); 197 } 198 } 199} 200 201static int count_flatshaded_attributes(struct brw_sf_compile *c) 202{ 203 int i; 204 int count = 0; 205 206 for (i = 0; i < c->vue_map.num_slots; i++) 207 if (c->key.interp_mode[i] == INTERP_MODE_FLAT) 208 count++; 209 210 return count; 211} 212 213 214 215/* Need to use a computed jump to copy flatshaded attributes as the 216 * vertices are ordered according to y-coordinate before reaching this 217 * point, so the PV could be anywhere. 218 */ 219static void do_flatshade_triangle( struct brw_sf_compile *c ) 220{ 221 struct brw_codegen *p = &c->func; 222 GLuint nr; 223 GLuint jmpi = 1; 224 225 /* Already done in clip program: 226 */ 227 if (c->key.primitive == BRW_SF_PRIM_UNFILLED_TRIS) 228 return; 229 230 if (p->devinfo->ver == 5) 231 jmpi = 2; 232 233 nr = count_flatshaded_attributes(c); 234 235 brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr*2+1))); 236 brw_JMPI(p, c->pv, BRW_PREDICATE_NONE); 237 238 copy_flatshaded_attributes(c, c->vert[1], c->vert[0]); 239 copy_flatshaded_attributes(c, c->vert[2], c->vert[0]); 240 brw_JMPI(p, brw_imm_d(jmpi*(nr*4+1)), BRW_PREDICATE_NONE); 241 242 copy_flatshaded_attributes(c, c->vert[0], c->vert[1]); 243 copy_flatshaded_attributes(c, c->vert[2], c->vert[1]); 244 brw_JMPI(p, brw_imm_d(jmpi*nr*2), BRW_PREDICATE_NONE); 245 246 copy_flatshaded_attributes(c, c->vert[0], c->vert[2]); 247 copy_flatshaded_attributes(c, c->vert[1], c->vert[2]); 248} 249 250 251static void do_flatshade_line( struct brw_sf_compile *c ) 252{ 253 struct brw_codegen *p = &c->func; 254 GLuint nr; 255 GLuint jmpi = 1; 256 257 /* Already done in clip program: 258 */ 259 if (c->key.primitive == BRW_SF_PRIM_UNFILLED_TRIS) 260 return; 261 262 if (p->devinfo->ver == 5) 263 jmpi = 2; 264 265 nr = count_flatshaded_attributes(c); 266 267 brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr+1))); 268 brw_JMPI(p, c->pv, BRW_PREDICATE_NONE); 269 copy_flatshaded_attributes(c, c->vert[1], c->vert[0]); 270 271 brw_JMPI(p, brw_imm_ud(jmpi*nr), BRW_PREDICATE_NONE); 272 copy_flatshaded_attributes(c, c->vert[0], c->vert[1]); 273} 274 275 276/*********************************************************************** 277 * Triangle setup. 278 */ 279 280 281static void alloc_regs( struct brw_sf_compile *c ) 282{ 283 GLuint reg, i; 284 285 /* Values computed by fixed function unit: 286 */ 287 c->pv = retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_D); 288 c->det = brw_vec1_grf(1, 2); 289 c->dx0 = brw_vec1_grf(1, 3); 290 c->dx2 = brw_vec1_grf(1, 4); 291 c->dy0 = brw_vec1_grf(1, 5); 292 c->dy2 = brw_vec1_grf(1, 6); 293 294 /* z and 1/w passed in separately: 295 */ 296 c->z[0] = brw_vec1_grf(2, 0); 297 c->inv_w[0] = brw_vec1_grf(2, 1); 298 c->z[1] = brw_vec1_grf(2, 2); 299 c->inv_w[1] = brw_vec1_grf(2, 3); 300 c->z[2] = brw_vec1_grf(2, 4); 301 c->inv_w[2] = brw_vec1_grf(2, 5); 302 303 /* The vertices: 304 */ 305 reg = 3; 306 for (i = 0; i < c->nr_verts; i++) { 307 c->vert[i] = brw_vec8_grf(reg, 0); 308 reg += c->nr_attr_regs; 309 } 310 311 /* Temporaries, allocated after last vertex reg. 312 */ 313 c->inv_det = brw_vec1_grf(reg, 0); reg++; 314 c->a1_sub_a0 = brw_vec8_grf(reg, 0); reg++; 315 c->a2_sub_a0 = brw_vec8_grf(reg, 0); reg++; 316 c->tmp = brw_vec8_grf(reg, 0); reg++; 317 318 /* Note grf allocation: 319 */ 320 c->prog_data.total_grf = reg; 321 322 323 /* Outputs of this program - interpolation coefficients for 324 * rasterization: 325 */ 326 c->m1Cx = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 1, 0); 327 c->m2Cy = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 2, 0); 328 c->m3C0 = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 3, 0); 329} 330 331 332static void copy_z_inv_w( struct brw_sf_compile *c ) 333{ 334 struct brw_codegen *p = &c->func; 335 GLuint i; 336 337 /* Copy both scalars with a single MOV: 338 */ 339 for (i = 0; i < c->nr_verts; i++) 340 brw_MOV(p, vec2(suboffset(c->vert[i], 2)), vec2(c->z[i])); 341} 342 343 344static void invert_det( struct brw_sf_compile *c) 345{ 346 /* Looks like we invert all 8 elements just to get 1/det in 347 * position 2 !?! 348 */ 349 gfx4_math(&c->func, 350 c->inv_det, 351 BRW_MATH_FUNCTION_INV, 352 0, 353 c->det, 354 BRW_MATH_PRECISION_FULL); 355 356} 357 358 359static bool 360calculate_masks(struct brw_sf_compile *c, 361 GLuint reg, 362 GLushort *pc, 363 GLushort *pc_persp, 364 GLushort *pc_linear) 365{ 366 bool is_last_attr = (reg == c->nr_setup_regs - 1); 367 enum glsl_interp_mode interp; 368 369 *pc_persp = 0; 370 *pc_linear = 0; 371 *pc = 0xf; 372 373 interp = c->key.interp_mode[vert_reg_to_vue_slot(c, reg, 0)]; 374 if (interp == INTERP_MODE_SMOOTH) { 375 *pc_linear = 0xf; 376 *pc_persp = 0xf; 377 } else if (interp == INTERP_MODE_NOPERSPECTIVE) 378 *pc_linear = 0xf; 379 380 /* Maybe only process one attribute on the final round: 381 */ 382 if (vert_reg_to_varying(c, reg, 1) != BRW_VARYING_SLOT_COUNT) { 383 *pc |= 0xf0; 384 385 interp = c->key.interp_mode[vert_reg_to_vue_slot(c, reg, 1)]; 386 if (interp == INTERP_MODE_SMOOTH) { 387 *pc_linear |= 0xf0; 388 *pc_persp |= 0xf0; 389 } else if (interp == INTERP_MODE_NOPERSPECTIVE) 390 *pc_linear |= 0xf0; 391 } 392 393 return is_last_attr; 394} 395 396/* Calculates the predicate control for which channels of a reg 397 * (containing 2 attrs) to do point sprite coordinate replacement on. 398 */ 399static uint16_t 400calculate_point_sprite_mask(struct brw_sf_compile *c, GLuint reg) 401{ 402 int varying1, varying2; 403 uint16_t pc = 0; 404 405 varying1 = vert_reg_to_varying(c, reg, 0); 406 if (varying1 >= VARYING_SLOT_TEX0 && varying1 <= VARYING_SLOT_TEX7) { 407 if (c->key.point_sprite_coord_replace & (1 << (varying1 - VARYING_SLOT_TEX0))) 408 pc |= 0x0f; 409 } 410 if (varying1 == BRW_VARYING_SLOT_PNTC) 411 pc |= 0x0f; 412 413 varying2 = vert_reg_to_varying(c, reg, 1); 414 if (varying2 >= VARYING_SLOT_TEX0 && varying2 <= VARYING_SLOT_TEX7) { 415 if (c->key.point_sprite_coord_replace & (1 << (varying2 - 416 VARYING_SLOT_TEX0))) 417 pc |= 0xf0; 418 } 419 if (varying2 == BRW_VARYING_SLOT_PNTC) 420 pc |= 0xf0; 421 422 return pc; 423} 424 425static void 426set_predicate_control_flag_value(struct brw_codegen *p, 427 struct brw_sf_compile *c, 428 unsigned value) 429{ 430 brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); 431 432 if (value != 0xff) { 433 if (value != c->flag_value) { 434 brw_MOV(p, brw_flag_reg(0, 0), brw_imm_uw(value)); 435 c->flag_value = value; 436 } 437 438 brw_set_default_predicate_control(p, BRW_PREDICATE_NORMAL); 439 } 440} 441 442static void brw_emit_tri_setup(struct brw_sf_compile *c, bool allocate) 443{ 444 struct brw_codegen *p = &c->func; 445 GLuint i; 446 447 c->flag_value = 0xff; 448 c->nr_verts = 3; 449 450 if (allocate) 451 alloc_regs(c); 452 453 invert_det(c); 454 copy_z_inv_w(c); 455 456 if (c->key.do_twoside_color) 457 do_twoside_color(c); 458 459 if (c->key.contains_flat_varying) 460 do_flatshade_triangle(c); 461 462 463 for (i = 0; i < c->nr_setup_regs; i++) 464 { 465 /* Pair of incoming attributes: 466 */ 467 struct brw_reg a0 = offset(c->vert[0], i); 468 struct brw_reg a1 = offset(c->vert[1], i); 469 struct brw_reg a2 = offset(c->vert[2], i); 470 GLushort pc, pc_persp, pc_linear; 471 bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); 472 473 if (pc_persp) 474 { 475 set_predicate_control_flag_value(p, c, pc_persp); 476 brw_MUL(p, a0, a0, c->inv_w[0]); 477 brw_MUL(p, a1, a1, c->inv_w[1]); 478 brw_MUL(p, a2, a2, c->inv_w[2]); 479 } 480 481 482 /* Calculate coefficients for interpolated values: 483 */ 484 if (pc_linear) 485 { 486 set_predicate_control_flag_value(p, c, pc_linear); 487 488 brw_ADD(p, c->a1_sub_a0, a1, negate(a0)); 489 brw_ADD(p, c->a2_sub_a0, a2, negate(a0)); 490 491 /* calculate dA/dx 492 */ 493 brw_MUL(p, brw_null_reg(), c->a1_sub_a0, c->dy2); 494 brw_MAC(p, c->tmp, c->a2_sub_a0, negate(c->dy0)); 495 brw_MUL(p, c->m1Cx, c->tmp, c->inv_det); 496 497 /* calculate dA/dy 498 */ 499 brw_MUL(p, brw_null_reg(), c->a2_sub_a0, c->dx0); 500 brw_MAC(p, c->tmp, c->a1_sub_a0, negate(c->dx2)); 501 brw_MUL(p, c->m2Cy, c->tmp, c->inv_det); 502 } 503 504 { 505 set_predicate_control_flag_value(p, c, pc); 506 /* start point for interpolation 507 */ 508 brw_MOV(p, c->m3C0, a0); 509 510 /* Copy m0..m3 to URB. m0 is implicitly copied from r0 in 511 * the send instruction: 512 */ 513 brw_urb_WRITE(p, 514 brw_null_reg(), 515 0, 516 brw_vec8_grf(0, 0), /* r0, will be copied to m0 */ 517 last ? BRW_URB_WRITE_EOT_COMPLETE 518 : BRW_URB_WRITE_NO_FLAGS, 519 4, /* msg len */ 520 0, /* response len */ 521 i*4, /* offset */ 522 BRW_URB_SWIZZLE_TRANSPOSE); /* XXX: Swizzle control "SF to windower" */ 523 } 524 } 525 526 brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); 527} 528 529 530 531static void brw_emit_line_setup(struct brw_sf_compile *c, bool allocate) 532{ 533 struct brw_codegen *p = &c->func; 534 GLuint i; 535 536 c->flag_value = 0xff; 537 c->nr_verts = 2; 538 539 if (allocate) 540 alloc_regs(c); 541 542 invert_det(c); 543 copy_z_inv_w(c); 544 545 if (c->key.contains_flat_varying) 546 do_flatshade_line(c); 547 548 for (i = 0; i < c->nr_setup_regs; i++) 549 { 550 /* Pair of incoming attributes: 551 */ 552 struct brw_reg a0 = offset(c->vert[0], i); 553 struct brw_reg a1 = offset(c->vert[1], i); 554 GLushort pc, pc_persp, pc_linear; 555 bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); 556 557 if (pc_persp) 558 { 559 set_predicate_control_flag_value(p, c, pc_persp); 560 brw_MUL(p, a0, a0, c->inv_w[0]); 561 brw_MUL(p, a1, a1, c->inv_w[1]); 562 } 563 564 /* Calculate coefficients for position, color: 565 */ 566 if (pc_linear) { 567 set_predicate_control_flag_value(p, c, pc_linear); 568 569 brw_ADD(p, c->a1_sub_a0, a1, negate(a0)); 570 571 brw_MUL(p, c->tmp, c->a1_sub_a0, c->dx0); 572 brw_MUL(p, c->m1Cx, c->tmp, c->inv_det); 573 574 brw_MUL(p, c->tmp, c->a1_sub_a0, c->dy0); 575 brw_MUL(p, c->m2Cy, c->tmp, c->inv_det); 576 } 577 578 { 579 set_predicate_control_flag_value(p, c, pc); 580 581 /* start point for interpolation 582 */ 583 brw_MOV(p, c->m3C0, a0); 584 585 /* Copy m0..m3 to URB. 586 */ 587 brw_urb_WRITE(p, 588 brw_null_reg(), 589 0, 590 brw_vec8_grf(0, 0), 591 last ? BRW_URB_WRITE_EOT_COMPLETE 592 : BRW_URB_WRITE_NO_FLAGS, 593 4, /* msg len */ 594 0, /* response len */ 595 i*4, /* urb destination offset */ 596 BRW_URB_SWIZZLE_TRANSPOSE); 597 } 598 } 599 600 brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); 601} 602 603static void brw_emit_point_sprite_setup(struct brw_sf_compile *c, bool allocate) 604{ 605 struct brw_codegen *p = &c->func; 606 GLuint i; 607 608 c->flag_value = 0xff; 609 c->nr_verts = 1; 610 611 if (allocate) 612 alloc_regs(c); 613 614 copy_z_inv_w(c); 615 for (i = 0; i < c->nr_setup_regs; i++) 616 { 617 struct brw_reg a0 = offset(c->vert[0], i); 618 GLushort pc, pc_persp, pc_linear, pc_coord_replace; 619 bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); 620 621 pc_coord_replace = calculate_point_sprite_mask(c, i); 622 pc_persp &= ~pc_coord_replace; 623 624 if (pc_persp) { 625 set_predicate_control_flag_value(p, c, pc_persp); 626 brw_MUL(p, a0, a0, c->inv_w[0]); 627 } 628 629 /* Point sprite coordinate replacement: A texcoord with this 630 * enabled gets replaced with the value (x, y, 0, 1) where x and 631 * y vary from 0 to 1 across the horizontal and vertical of the 632 * point. 633 */ 634 if (pc_coord_replace) { 635 set_predicate_control_flag_value(p, c, pc_coord_replace); 636 /* Calculate 1.0/PointWidth */ 637 gfx4_math(&c->func, 638 c->tmp, 639 BRW_MATH_FUNCTION_INV, 640 0, 641 c->dx0, 642 BRW_MATH_PRECISION_FULL); 643 644 brw_set_default_access_mode(p, BRW_ALIGN_16); 645 646 /* dA/dx, dA/dy */ 647 brw_MOV(p, c->m1Cx, brw_imm_f(0.0)); 648 brw_MOV(p, c->m2Cy, brw_imm_f(0.0)); 649 brw_MOV(p, brw_writemask(c->m1Cx, WRITEMASK_X), c->tmp); 650 if (c->key.sprite_origin_lower_left) { 651 brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), negate(c->tmp)); 652 } else { 653 brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), c->tmp); 654 } 655 656 /* attribute constant offset */ 657 brw_MOV(p, c->m3C0, brw_imm_f(0.0)); 658 if (c->key.sprite_origin_lower_left) { 659 brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_YW), brw_imm_f(1.0)); 660 } else { 661 brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_W), brw_imm_f(1.0)); 662 } 663 664 brw_set_default_access_mode(p, BRW_ALIGN_1); 665 } 666 667 if (pc & ~pc_coord_replace) { 668 set_predicate_control_flag_value(p, c, pc & ~pc_coord_replace); 669 brw_MOV(p, c->m1Cx, brw_imm_ud(0)); 670 brw_MOV(p, c->m2Cy, brw_imm_ud(0)); 671 brw_MOV(p, c->m3C0, a0); /* constant value */ 672 } 673 674 675 set_predicate_control_flag_value(p, c, pc); 676 /* Copy m0..m3 to URB. */ 677 brw_urb_WRITE(p, 678 brw_null_reg(), 679 0, 680 brw_vec8_grf(0, 0), 681 last ? BRW_URB_WRITE_EOT_COMPLETE 682 : BRW_URB_WRITE_NO_FLAGS, 683 4, /* msg len */ 684 0, /* response len */ 685 i*4, /* urb destination offset */ 686 BRW_URB_SWIZZLE_TRANSPOSE); 687 } 688 689 brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); 690} 691 692/* Points setup - several simplifications as all attributes are 693 * constant across the face of the point (point sprites excluded!) 694 */ 695static void brw_emit_point_setup(struct brw_sf_compile *c, bool allocate) 696{ 697 struct brw_codegen *p = &c->func; 698 GLuint i; 699 700 c->flag_value = 0xff; 701 c->nr_verts = 1; 702 703 if (allocate) 704 alloc_regs(c); 705 706 copy_z_inv_w(c); 707 708 brw_MOV(p, c->m1Cx, brw_imm_ud(0)); /* zero - move out of loop */ 709 brw_MOV(p, c->m2Cy, brw_imm_ud(0)); /* zero - move out of loop */ 710 711 for (i = 0; i < c->nr_setup_regs; i++) 712 { 713 struct brw_reg a0 = offset(c->vert[0], i); 714 GLushort pc, pc_persp, pc_linear; 715 bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); 716 717 if (pc_persp) 718 { 719 /* This seems odd as the values are all constant, but the 720 * fragment shader will be expecting it: 721 */ 722 set_predicate_control_flag_value(p, c, pc_persp); 723 brw_MUL(p, a0, a0, c->inv_w[0]); 724 } 725 726 727 /* The delta values are always zero, just send the starting 728 * coordinate. Again, this is to fit in with the interpolation 729 * code in the fragment shader. 730 */ 731 { 732 set_predicate_control_flag_value(p, c, pc); 733 734 brw_MOV(p, c->m3C0, a0); /* constant value */ 735 736 /* Copy m0..m3 to URB. 737 */ 738 brw_urb_WRITE(p, 739 brw_null_reg(), 740 0, 741 brw_vec8_grf(0, 0), 742 last ? BRW_URB_WRITE_EOT_COMPLETE 743 : BRW_URB_WRITE_NO_FLAGS, 744 4, /* msg len */ 745 0, /* response len */ 746 i*4, /* urb destination offset */ 747 BRW_URB_SWIZZLE_TRANSPOSE); 748 } 749 } 750 751 brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); 752} 753 754static void brw_emit_anyprim_setup( struct brw_sf_compile *c ) 755{ 756 struct brw_codegen *p = &c->func; 757 struct brw_reg payload_prim = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0); 758 struct brw_reg payload_attr = get_element_ud(brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0), 0); 759 struct brw_reg primmask; 760 int jmp; 761 struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD)); 762 763 c->nr_verts = 3; 764 alloc_regs(c); 765 766 primmask = retype(get_element(c->tmp, 0), BRW_REGISTER_TYPE_UD); 767 768 brw_MOV(p, primmask, brw_imm_ud(1)); 769 brw_SHL(p, primmask, primmask, payload_prim); 770 771 brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_TRILIST) | 772 (1<<_3DPRIM_TRISTRIP) | 773 (1<<_3DPRIM_TRIFAN) | 774 (1<<_3DPRIM_TRISTRIP_REVERSE) | 775 (1<<_3DPRIM_POLYGON) | 776 (1<<_3DPRIM_RECTLIST) | 777 (1<<_3DPRIM_TRIFAN_NOSTIPPLE))); 778 brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z); 779 jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store; 780 brw_emit_tri_setup(c, false); 781 brw_land_fwd_jump(p, jmp); 782 783 brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_LINELIST) | 784 (1<<_3DPRIM_LINESTRIP) | 785 (1<<_3DPRIM_LINELOOP) | 786 (1<<_3DPRIM_LINESTRIP_CONT) | 787 (1<<_3DPRIM_LINESTRIP_BF) | 788 (1<<_3DPRIM_LINESTRIP_CONT_BF))); 789 brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z); 790 jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store; 791 brw_emit_line_setup(c, false); 792 brw_land_fwd_jump(p, jmp); 793 794 brw_AND(p, v1_null_ud, payload_attr, brw_imm_ud(1<<BRW_SPRITE_POINT_ENABLE)); 795 brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z); 796 jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store; 797 brw_emit_point_sprite_setup(c, false); 798 brw_land_fwd_jump(p, jmp); 799 800 brw_emit_point_setup( c, false ); 801} 802 803const unsigned * 804brw_compile_sf(const struct brw_compiler *compiler, 805 void *mem_ctx, 806 const struct brw_sf_prog_key *key, 807 struct brw_sf_prog_data *prog_data, 808 struct brw_vue_map *vue_map, 809 unsigned *final_assembly_size) 810{ 811 struct brw_sf_compile c; 812 memset(&c, 0, sizeof(c)); 813 814 /* Begin the compilation: 815 */ 816 brw_init_codegen(&compiler->isa, &c.func, mem_ctx); 817 818 c.key = *key; 819 c.vue_map = *vue_map; 820 if (c.key.do_point_coord) { 821 /* 822 * gl_PointCoord is a FS instead of VS builtin variable, thus it's 823 * not included in c.vue_map generated in VS stage. Here we add 824 * it manually to let SF shader generate the needed interpolation 825 * coefficient for FS shader. 826 */ 827 c.vue_map.varying_to_slot[BRW_VARYING_SLOT_PNTC] = c.vue_map.num_slots; 828 c.vue_map.slot_to_varying[c.vue_map.num_slots++] = BRW_VARYING_SLOT_PNTC; 829 } 830 c.urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET; 831 c.nr_attr_regs = (c.vue_map.num_slots + 1)/2 - c.urb_entry_read_offset; 832 c.nr_setup_regs = c.nr_attr_regs; 833 834 c.prog_data.urb_read_length = c.nr_attr_regs; 835 c.prog_data.urb_entry_size = c.nr_setup_regs * 2; 836 837 /* Which primitive? Or all three? 838 */ 839 switch (key->primitive) { 840 case BRW_SF_PRIM_TRIANGLES: 841 c.nr_verts = 3; 842 brw_emit_tri_setup( &c, true ); 843 break; 844 case BRW_SF_PRIM_LINES: 845 c.nr_verts = 2; 846 brw_emit_line_setup( &c, true ); 847 break; 848 case BRW_SF_PRIM_POINTS: 849 c.nr_verts = 1; 850 if (key->do_point_sprite) 851 brw_emit_point_sprite_setup( &c, true ); 852 else 853 brw_emit_point_setup( &c, true ); 854 break; 855 case BRW_SF_PRIM_UNFILLED_TRIS: 856 c.nr_verts = 3; 857 brw_emit_anyprim_setup( &c ); 858 break; 859 default: 860 unreachable("not reached"); 861 } 862 863 /* FINISHME: SF programs use calculated jumps (i.e., JMPI with a register 864 * source). Compacting would be difficult. 865 */ 866 /* brw_compact_instructions(&c.func, 0, 0, NULL); */ 867 868 *prog_data = c.prog_data; 869 870 const unsigned *program = brw_get_program(&c.func, final_assembly_size); 871 872 if (INTEL_DEBUG(DEBUG_SF)) { 873 fprintf(stderr, "sf:\n"); 874 brw_disassemble_with_labels(&compiler->isa, 875 program, 0, *final_assembly_size, stderr); 876 fprintf(stderr, "\n"); 877 } 878 879 return program; 880} 881