1/************************************************************************** 2 * 3 * Copyright 2007 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/* 29 * Binning code for triangles 30 */ 31 32#include "util/u_math.h" 33#include "util/u_memory.h" 34#include "util/u_rect.h" 35#include "util/u_sse.h" 36#include "lp_perf.h" 37#include "lp_setup_context.h" 38#include "lp_rast.h" 39#include "lp_state_fs.h" 40#include "lp_state_setup.h" 41#include "lp_context.h" 42 43#include <inttypes.h> 44 45 46#if defined(PIPE_ARCH_SSE) 47#include <emmintrin.h> 48#elif defined(_ARCH_PWR8) && UTIL_ARCH_LITTLE_ENDIAN 49#include <altivec.h> 50#include "util/u_pwr8.h" 51#endif 52 53#if !defined(PIPE_ARCH_SSE) 54 55static inline int 56subpixel_snap(float a) 57{ 58 return util_iround(FIXED_ONE * a); 59} 60 61#endif 62 63/* Position and area in fixed point coordinates */ 64struct fixed_position { 65 int32_t x[4]; 66 int32_t y[4]; 67 int32_t dx01; 68 int32_t dy01; 69 int32_t dx20; 70 int32_t dy20; 71}; 72 73 74/** 75 * Alloc space for a new triangle plus the input.a0/dadx/dady arrays 76 * immediately after it. 77 * The memory is allocated from the per-scene pool, not per-tile. 78 * \param tri_size returns number of bytes allocated 79 * \param num_inputs number of fragment shader inputs 80 * \return pointer to triangle space 81 */ 82struct lp_rast_triangle * 83lp_setup_alloc_triangle(struct lp_scene *scene, 84 unsigned nr_inputs, 85 unsigned nr_planes, 86 unsigned *tri_size) 87{ 88 // add 1 for XYZW position 89 unsigned input_array_sz = (nr_inputs + 1) * sizeof(float[4]); 90 unsigned plane_sz = nr_planes * sizeof(struct lp_rast_plane); 91 92 STATIC_ASSERT(sizeof(struct lp_rast_plane) % 8 == 0); 93 94 *tri_size = (sizeof(struct lp_rast_triangle) + 95 3 * input_array_sz + // 3 = da + dadx + dady 96 plane_sz); 97 98 struct lp_rast_triangle *tri = lp_scene_alloc_aligned(scene, *tri_size, 16); 99 if (!tri) 100 return NULL; 101 102 tri->inputs.stride = input_array_sz; 103 104 { 105 ASSERTED char *a = (char *)tri; 106 ASSERTED char *b = (char *)&GET_PLANES(tri)[nr_planes]; 107 108 assert(b - a == *tri_size); 109 } 110 111 return tri; 112} 113 114void 115lp_setup_print_vertex(struct lp_setup_context *setup, 116 const char *name, 117 const float (*v)[4]) 118{ 119 const struct lp_setup_variant_key *key = &setup->setup.variant->key; 120 121 debug_printf(" wpos (%s[0]) xyzw %f %f %f %f\n", 122 name, 123 v[0][0], v[0][1], v[0][2], v[0][3]); 124 125 for (int i = 0; i < key->num_inputs; i++) { 126 const float *in = v[key->inputs[i].src_index]; 127 128 debug_printf(" in[%d] (%s[%d]) %s%s%s%s ", 129 i, 130 name, key->inputs[i].src_index, 131 (key->inputs[i].usage_mask & 0x1) ? "x" : " ", 132 (key->inputs[i].usage_mask & 0x2) ? "y" : " ", 133 (key->inputs[i].usage_mask & 0x4) ? "z" : " ", 134 (key->inputs[i].usage_mask & 0x8) ? "w" : " "); 135 136 for (int j = 0; j < 4; j++) 137 if (key->inputs[i].usage_mask & (1<<j)) 138 debug_printf("%.5f ", in[j]); 139 140 debug_printf("\n"); 141 } 142} 143 144 145/** 146 * Print triangle vertex attribs (for debug). 147 */ 148void 149lp_setup_print_triangle(struct lp_setup_context *setup, 150 const float (*v0)[4], 151 const float (*v1)[4], 152 const float (*v2)[4]) 153{ 154 debug_printf("triangle\n"); 155 156 { 157 const float ex = v0[0][0] - v2[0][0]; 158 const float ey = v0[0][1] - v2[0][1]; 159 const float fx = v1[0][0] - v2[0][0]; 160 const float fy = v1[0][1] - v2[0][1]; 161 162 /* det = cross(e,f).z */ 163 const float det = ex * fy - ey * fx; 164 if (det < 0.0f) 165 debug_printf(" - ccw\n"); 166 else if (det > 0.0f) 167 debug_printf(" - cw\n"); 168 else 169 debug_printf(" - zero area\n"); 170 } 171 172 lp_setup_print_vertex(setup, "v0", v0); 173 lp_setup_print_vertex(setup, "v1", v1); 174 lp_setup_print_vertex(setup, "v2", v2); 175} 176 177 178#define MAX_PLANES 8 179static unsigned 180lp_rast_tri_tab[MAX_PLANES+1] = { 181 0, /* should be impossible */ 182 LP_RAST_OP_TRIANGLE_1, 183 LP_RAST_OP_TRIANGLE_2, 184 LP_RAST_OP_TRIANGLE_3, 185 LP_RAST_OP_TRIANGLE_4, 186 LP_RAST_OP_TRIANGLE_5, 187 LP_RAST_OP_TRIANGLE_6, 188 LP_RAST_OP_TRIANGLE_7, 189 LP_RAST_OP_TRIANGLE_8 190}; 191 192static unsigned 193lp_rast_32_tri_tab[MAX_PLANES+1] = { 194 0, /* should be impossible */ 195 LP_RAST_OP_TRIANGLE_32_1, 196 LP_RAST_OP_TRIANGLE_32_2, 197 LP_RAST_OP_TRIANGLE_32_3, 198 LP_RAST_OP_TRIANGLE_32_4, 199 LP_RAST_OP_TRIANGLE_32_5, 200 LP_RAST_OP_TRIANGLE_32_6, 201 LP_RAST_OP_TRIANGLE_32_7, 202 LP_RAST_OP_TRIANGLE_32_8 203}; 204 205 206static unsigned 207lp_rast_ms_tri_tab[MAX_PLANES+1] = { 208 0, /* should be impossible */ 209 LP_RAST_OP_MS_TRIANGLE_1, 210 LP_RAST_OP_MS_TRIANGLE_2, 211 LP_RAST_OP_MS_TRIANGLE_3, 212 LP_RAST_OP_MS_TRIANGLE_4, 213 LP_RAST_OP_MS_TRIANGLE_5, 214 LP_RAST_OP_MS_TRIANGLE_6, 215 LP_RAST_OP_MS_TRIANGLE_7, 216 LP_RAST_OP_MS_TRIANGLE_8 217}; 218 219 220/* 221 * Detect big primitives drawn with an alpha == 1.0. 222 * 223 * This is used when simulating anti-aliasing primitives in shaders, e.g., 224 * when drawing the windows client area in Aero's flip-3d effect. 225 */ 226static boolean 227check_opaque(const struct lp_setup_context *setup, 228 const float (*v1)[4], 229 const float (*v2)[4], 230 const float (*v3)[4]) 231{ 232 const struct lp_fragment_shader_variant *variant = 233 setup->fs.current.variant; 234 235 if (variant->opaque) 236 return TRUE; 237 238 if (!variant->potentially_opaque) 239 return FALSE; 240 241 const struct lp_tgsi_channel_info *alpha_info = &variant->shader->info.cbuf[0][3]; 242 if (alpha_info->file == TGSI_FILE_CONSTANT) { 243 const float *constants = setup->fs.current.jit_context.constants[0]; 244 float alpha = constants[alpha_info->u.index*4 + 245 alpha_info->swizzle]; 246 return alpha == 1.0f; 247 } 248 249 if (alpha_info->file == TGSI_FILE_INPUT) { 250 return (v1[1 + alpha_info->u.index][alpha_info->swizzle] == 1.0f && 251 v2[1 + alpha_info->u.index][alpha_info->swizzle] == 1.0f && 252 v3[1 + alpha_info->u.index][alpha_info->swizzle] == 1.0f); 253 } 254 255 return FALSE; 256} 257 258 259/** 260 * Do basic setup for triangle rasterization and determine which 261 * framebuffer tiles are touched. Put the triangle in the scene's 262 * bins for the tiles which we overlap. 263 */ 264static boolean 265do_triangle_ccw(struct lp_setup_context *setup, 266 struct fixed_position *position, 267 const float (*v0)[4], 268 const float (*v1)[4], 269 const float (*v2)[4], 270 boolean frontfacing) 271{ 272 struct lp_scene *scene = setup->scene; 273 274 if (0) 275 lp_setup_print_triangle(setup, v0, v1, v2); 276 277 const float (*pv)[4]; 278 if (setup->flatshade_first) { 279 pv = v0; 280 } else { 281 pv = v2; 282 } 283 284 unsigned viewport_index = 0; 285 if (setup->viewport_index_slot > 0) { 286 unsigned *udata = (unsigned*)pv[setup->viewport_index_slot]; 287 viewport_index = lp_clamp_viewport_idx(*udata); 288 } 289 290 unsigned layer = 0; 291 if (setup->layer_slot > 0) { 292 layer = *(unsigned*)pv[setup->layer_slot]; 293 layer = MIN2(layer, scene->fb_max_layer); 294 } 295 296 /* Bounding rectangle (in pixels) */ 297 struct u_rect bbox; 298 { 299 /* Yes this is necessary to accurately calculate bounding boxes 300 * with the two fill-conventions we support. GL (normally) ends 301 * up needing a bottom-left fill convention, which requires 302 * slightly different rounding. 303 */ 304 int adj = (setup->bottom_edge_rule != 0) ? 1 : 0; 305 306 /* Inclusive x0, exclusive x1 */ 307 bbox.x0 = MIN3(position->x[0], position->x[1], position->x[2]) >> FIXED_ORDER; 308 bbox.x1 = (MAX3(position->x[0], position->x[1], position->x[2]) - 1) >> FIXED_ORDER; 309 310 /* Inclusive / exclusive depending upon adj (bottom-left or top-right) */ 311 bbox.y0 = (MIN3(position->y[0], position->y[1], position->y[2]) + adj) >> FIXED_ORDER; 312 bbox.y1 = (MAX3(position->y[0], position->y[1], position->y[2]) - 1 + adj) >> FIXED_ORDER; 313 } 314 315 if (!u_rect_test_intersection(&setup->draw_regions[viewport_index], &bbox)) { 316 if (0) debug_printf("no intersection\n"); 317 LP_COUNT(nr_culled_tris); 318 return TRUE; 319 } 320 321 int max_szorig = ((bbox.x1 - (bbox.x0 & ~3)) | 322 (bbox.y1 - (bbox.y0 & ~3))); 323 boolean use_32bits = max_szorig <= MAX_FIXED_LENGTH32; 324#if defined(_ARCH_PWR8) && UTIL_ARCH_LITTLE_ENDIAN 325 boolean pwr8_limit_check = (bbox.x1 - bbox.x0) <= MAX_FIXED_LENGTH32 && 326 (bbox.y1 - bbox.y0) <= MAX_FIXED_LENGTH32; 327#endif 328 329 /* Can safely discard negative regions, but need to keep hold of 330 * information about when the triangle extends past screen 331 * boundaries. See trimmed_box in lp_setup_bin_triangle(). 332 */ 333 bbox.x0 = MAX2(bbox.x0, 0); 334 bbox.y0 = MAX2(bbox.y0, 0); 335 336 int nr_planes = 3; 337 338 /* 339 * Determine how many scissor planes we need, that is drop scissor 340 * edges if the bounding box of the tri is fully inside that edge. 341 */ 342 const struct u_rect *scissor = &setup->draw_regions[viewport_index]; 343 boolean s_planes[4]; 344 scissor_planes_needed(s_planes, &bbox, scissor); 345 nr_planes += s_planes[0] + s_planes[1] + s_planes[2] + s_planes[3]; 346 347 unsigned tri_bytes; 348 const struct lp_setup_variant_key *key = &setup->setup.variant->key; 349 struct lp_rast_triangle *tri = 350 lp_setup_alloc_triangle(scene, key->num_inputs, nr_planes, &tri_bytes); 351 if (!tri) 352 return FALSE; 353 354#ifdef DEBUG 355 tri->v[0][0] = v0[0][0]; 356 tri->v[1][0] = v1[0][0]; 357 tri->v[2][0] = v2[0][0]; 358 tri->v[0][1] = v0[0][1]; 359 tri->v[1][1] = v1[0][1]; 360 tri->v[2][1] = v2[0][1]; 361#endif 362 363 LP_COUNT(nr_tris); 364 365 /* 366 * Rotate the tri such that v0 is closest to the fb origin. 367 * This can give more accurate a0 value (which is at fb origin) 368 * when calculating the interpolants. 369 * It can't work when there's flat shading for instance in one 370 * of the attributes, hence restrict this to just a single attribute 371 * which is what causes some test failures. 372 * (This does not address the problem that interpolation may be 373 * inaccurate if gradients are relatively steep in small tris far 374 * away from the origin. It does however fix the (silly) wgf11rasterizer 375 * Interpolator test.) 376 * XXX This causes problems with mipgen -EmuTexture for not yet really 377 * understood reasons (if the vertices would be submitted in a different 378 * order, we'd also generate the same "wrong" results here without 379 * rotation). In any case, that we generate different values if a prim 380 * has the vertices rotated but is otherwise the same (which is due to 381 * numerical issues) is not a nice property. An additional problem by 382 * swapping the vertices here (which is possibly worse) is that 383 * the same primitive coming in twice might generate different values 384 * (in particular for z) due to the swapping potentially not happening 385 * both times, if the attributes to be interpolated are different. For now, 386 * just restrict this to not get used with dx9 (by checking pixel offset), 387 * could also restrict it further to only trigger with wgf11Interpolator 388 * Rasterizer test (the only place which needs it, with always the same 389 * vertices even). 390 */ 391 if ((LP_DEBUG & DEBUG_ACCURATE_A0) && 392 setup->pixel_offset == 0.5f && 393 key->num_inputs == 1 && 394 (key->inputs[0].interp == LP_INTERP_LINEAR || 395 key->inputs[0].interp == LP_INTERP_PERSPECTIVE)) { 396 float dist0 = v0[0][0] * v0[0][0] + v0[0][1] * v0[0][1]; 397 float dist1 = v1[0][0] * v1[0][0] + v1[0][1] * v1[0][1]; 398 float dist2 = v2[0][0] * v2[0][0] + v2[0][1] * v2[0][1]; 399 if (dist0 > dist1 && dist1 < dist2) { 400 const float (*vt)[4]; 401 int x, y; 402 vt = v0; 403 v0 = v1; 404 v1 = v2; 405 v2 = vt; 406 x = position->x[0]; 407 y = position->y[0]; 408 position->x[0] = position->x[1]; 409 position->y[0] = position->y[1]; 410 position->x[1] = position->x[2]; 411 position->y[1] = position->y[2]; 412 position->x[2] = x; 413 position->y[2] = y; 414 415 position->dx20 = position->dx01; 416 position->dy20 = position->dy01; 417 position->dx01 = position->x[0] - position->x[1]; 418 position->dy01 = position->y[0] - position->y[1]; 419 } else if (dist0 > dist2) { 420 const float (*vt)[4]; 421 int x, y; 422 vt = v0; 423 v0 = v2; 424 v2 = v1; 425 v1 = vt; 426 x = position->x[0]; 427 y = position->y[0]; 428 position->x[0] = position->x[2]; 429 position->y[0] = position->y[2]; 430 position->x[2] = position->x[1]; 431 position->y[2] = position->y[1]; 432 position->x[1] = x; 433 position->y[1] = y; 434 435 position->dx01 = position->dx20; 436 position->dy01 = position->dy20; 437 position->dx20 = position->x[2] - position->x[0]; 438 position->dy20 = position->y[2] - position->y[0]; 439 } 440 } 441 442 /* Setup parameter interpolants: 443 */ 444 setup->setup.variant->jit_function(v0, v1, v2, 445 frontfacing, 446 GET_A0(&tri->inputs), 447 GET_DADX(&tri->inputs), 448 GET_DADY(&tri->inputs), 449 &setup->setup.variant->key); 450 451 tri->inputs.frontfacing = frontfacing; 452 tri->inputs.disable = FALSE; 453 tri->inputs.is_blit = FALSE; 454 tri->inputs.layer = layer; 455 tri->inputs.viewport_index = viewport_index; 456 tri->inputs.view_index = setup->view_index; 457 458 if (0) 459 lp_dump_setup_coef(&setup->setup.variant->key, 460 GET_A0(&tri->inputs), 461 GET_DADX(&tri->inputs), 462 GET_DADY(&tri->inputs)); 463 464 struct lp_rast_plane *plane = GET_PLANES(tri); 465 466#if defined(PIPE_ARCH_SSE) 467 if (1) { 468 __m128i vertx, verty; 469 __m128i shufx, shufy; 470 __m128i dcdx, dcdy; 471 __m128i cdx02, cdx13, cdy02, cdy13, c02, c13; 472 __m128i c01, c23, unused; 473 __m128i dcdx_neg_mask; 474 __m128i dcdy_neg_mask; 475 __m128i dcdx_zero_mask; 476 __m128i top_left_flag, c_dec; 477 __m128i eo, p0, p1, p2; 478 __m128i zero = _mm_setzero_si128(); 479 480 vertx = _mm_load_si128((__m128i *)position->x); /* vertex x coords */ 481 verty = _mm_load_si128((__m128i *)position->y); /* vertex y coords */ 482 483 shufx = _mm_shuffle_epi32(vertx, _MM_SHUFFLE(3,0,2,1)); 484 shufy = _mm_shuffle_epi32(verty, _MM_SHUFFLE(3,0,2,1)); 485 486 dcdx = _mm_sub_epi32(verty, shufy); 487 dcdy = _mm_sub_epi32(vertx, shufx); 488 489 dcdx_neg_mask = _mm_srai_epi32(dcdx, 31); 490 dcdx_zero_mask = _mm_cmpeq_epi32(dcdx, zero); 491 dcdy_neg_mask = _mm_srai_epi32(dcdy, 31); 492 493 top_left_flag = _mm_set1_epi32((setup->bottom_edge_rule == 0) ? ~0 : 0); 494 495 c_dec = _mm_or_si128(dcdx_neg_mask, 496 _mm_and_si128(dcdx_zero_mask, 497 _mm_xor_si128(dcdy_neg_mask, 498 top_left_flag))); 499 500 /* 501 * 64 bit arithmetic. 502 * Note we need _signed_ mul (_mm_mul_epi32) which we emulate. 503 */ 504 cdx02 = mm_mullohi_epi32(dcdx, vertx, &cdx13); 505 cdy02 = mm_mullohi_epi32(dcdy, verty, &cdy13); 506 c02 = _mm_sub_epi64(cdx02, cdy02); 507 c13 = _mm_sub_epi64(cdx13, cdy13); 508 c02 = _mm_sub_epi64(c02, _mm_shuffle_epi32(c_dec, 509 _MM_SHUFFLE(2,2,0,0))); 510 c13 = _mm_sub_epi64(c13, _mm_shuffle_epi32(c_dec, 511 _MM_SHUFFLE(3,3,1,1))); 512 513 /* 514 * Useful for very small fbs/tris (or fewer subpixel bits) only: 515 * c = _mm_sub_epi32(mm_mullo_epi32(dcdx, vertx), 516 * mm_mullo_epi32(dcdy, verty)); 517 * 518 * c = _mm_sub_epi32(c, c_dec); 519 */ 520 521 /* Scale up to match c: 522 */ 523 dcdx = _mm_slli_epi32(dcdx, FIXED_ORDER); 524 dcdy = _mm_slli_epi32(dcdy, FIXED_ORDER); 525 526 /* 527 * Calculate trivial reject values: 528 * Note eo cannot overflow even if dcdx/dcdy would already have 529 * 31 bits (which they shouldn't have). This is because eo 530 * is never negative (albeit if we rely on that need to be careful...) 531 */ 532 eo = _mm_sub_epi32(_mm_andnot_si128(dcdy_neg_mask, dcdy), 533 _mm_and_si128(dcdx_neg_mask, dcdx)); 534 535 /* ei = _mm_sub_epi32(_mm_sub_epi32(dcdy, dcdx), eo); */ 536 537 /* 538 * Pointless transpose which gets undone immediately in 539 * rasterization. 540 * It is actually difficult to do away with it - would essentially 541 * need GET_PLANES_DX, GET_PLANES_DY etc., but the calculations 542 * for this then would need to depend on the number of planes. 543 * The transpose is quite special here due to c being 64bit... 544 * The store has to be unaligned (unless we'd make the plane size 545 * a multiple of 128), and of course storing eo separately... 546 */ 547 c01 = _mm_unpacklo_epi64(c02, c13); 548 c23 = _mm_unpackhi_epi64(c02, c13); 549 transpose2_64_2_32(&c01, &c23, &dcdx, &dcdy, 550 &p0, &p1, &p2, &unused); 551 _mm_storeu_si128((__m128i *)&plane[0], p0); 552 plane[0].eo = (uint32_t)_mm_cvtsi128_si32(eo); 553 _mm_storeu_si128((__m128i *)&plane[1], p1); 554 eo = _mm_shuffle_epi32(eo, _MM_SHUFFLE(3,2,0,1)); 555 plane[1].eo = (uint32_t)_mm_cvtsi128_si32(eo); 556 _mm_storeu_si128((__m128i *)&plane[2], p2); 557 eo = _mm_shuffle_epi32(eo, _MM_SHUFFLE(0,0,0,2)); 558 plane[2].eo = (uint32_t)_mm_cvtsi128_si32(eo); 559 } else 560#elif defined(_ARCH_PWR8) && UTIL_ARCH_LITTLE_ENDIAN 561 /* 562 * XXX this code is effectively disabled for all practical purposes, 563 * as the allowed fb size is tiny if FIXED_ORDER is 8. 564 */ 565 if (setup->fb.width <= MAX_FIXED_LENGTH32 && 566 setup->fb.height <= MAX_FIXED_LENGTH32 && 567 pwr8_limit_check) { 568 unsigned int bottom_edge; 569 __m128i vertx, verty; 570 __m128i shufx, shufy; 571 __m128i dcdx, dcdy, c; 572 __m128i unused; 573 __m128i dcdx_neg_mask; 574 __m128i dcdy_neg_mask; 575 __m128i dcdx_zero_mask; 576 __m128i top_left_flag; 577 __m128i c_inc_mask, c_inc; 578 __m128i eo, p0, p1, p2; 579 __m128i_union vshuf_mask; 580 __m128i zero = vec_splats((unsigned char) 0); 581 alignas(16) int32_t temp_vec[4]; 582 583#if UTIL_ARCH_LITTLE_ENDIAN 584 vshuf_mask.i[0] = 0x07060504; 585 vshuf_mask.i[1] = 0x0B0A0908; 586 vshuf_mask.i[2] = 0x03020100; 587 vshuf_mask.i[3] = 0x0F0E0D0C; 588#else 589 vshuf_mask.i[0] = 0x00010203; 590 vshuf_mask.i[1] = 0x0C0D0E0F; 591 vshuf_mask.i[2] = 0x04050607; 592 vshuf_mask.i[3] = 0x08090A0B; 593#endif 594 595 /* vertex x coords */ 596 vertx = vec_load_si128((const uint32_t *) position->x); 597 /* vertex y coords */ 598 verty = vec_load_si128((const uint32_t *) position->y); 599 600 shufx = vec_perm (vertx, vertx, vshuf_mask.m128i); 601 shufy = vec_perm (verty, verty, vshuf_mask.m128i); 602 603 dcdx = vec_sub_epi32(verty, shufy); 604 dcdy = vec_sub_epi32(vertx, shufx); 605 606 dcdx_neg_mask = vec_srai_epi32(dcdx, 31); 607 dcdx_zero_mask = vec_cmpeq_epi32(dcdx, zero); 608 dcdy_neg_mask = vec_srai_epi32(dcdy, 31); 609 610 bottom_edge = (setup->bottom_edge_rule == 0) ? ~0 : 0; 611 top_left_flag = (__m128i) vec_splats(bottom_edge); 612 613 c_inc_mask = vec_or(dcdx_neg_mask, 614 vec_and(dcdx_zero_mask, 615 vec_xor(dcdy_neg_mask, 616 top_left_flag))); 617 618 c_inc = vec_srli_epi32(c_inc_mask, 31); 619 620 c = vec_sub_epi32(vec_mullo_epi32(dcdx, vertx), 621 vec_mullo_epi32(dcdy, verty)); 622 623 c = vec_add_epi32(c, c_inc); 624 625 /* Scale up to match c: 626 */ 627 dcdx = vec_slli_epi32(dcdx, FIXED_ORDER); 628 dcdy = vec_slli_epi32(dcdy, FIXED_ORDER); 629 630 /* Calculate trivial reject values: 631 */ 632 eo = vec_sub_epi32(vec_andnot_si128(dcdy_neg_mask, dcdy), 633 vec_and(dcdx_neg_mask, dcdx)); 634 635 /* ei = _mm_sub_epi32(_mm_sub_epi32(dcdy, dcdx), eo); */ 636 637 /* Pointless transpose which gets undone immediately in 638 * rasterization: 639 */ 640 transpose4_epi32(&c, &dcdx, &dcdy, &eo, 641 &p0, &p1, &p2, &unused); 642 643#define STORE_PLANE(plane, vec) do { \ 644 vec_store_si128((uint32_t *)&temp_vec, vec); \ 645 plane.c = (int64_t)temp_vec[0]; \ 646 plane.dcdx = temp_vec[1]; \ 647 plane.dcdy = temp_vec[2]; \ 648 plane.eo = temp_vec[3]; \ 649 } while(0) 650 651 STORE_PLANE(plane[0], p0); 652 STORE_PLANE(plane[1], p1); 653 STORE_PLANE(plane[2], p2); 654#undef STORE_PLANE 655 } else 656#endif 657 { 658 plane[0].dcdy = position->dx01; 659 plane[1].dcdy = position->x[1] - position->x[2]; 660 plane[2].dcdy = position->dx20; 661 plane[0].dcdx = position->dy01; 662 plane[1].dcdx = position->y[1] - position->y[2]; 663 plane[2].dcdx = position->dy20; 664 665 for (int i = 0; i < 3; i++) { 666 /* half-edge constants, will be iterated over the whole render 667 * target. 668 */ 669 plane[i].c = IMUL64(plane[i].dcdx, position->x[i]) - 670 IMUL64(plane[i].dcdy, position->y[i]); 671 672 /* correct for top-left vs. bottom-left fill convention. 673 */ 674 if (plane[i].dcdx < 0) { 675 /* both fill conventions want this - adjust for left edges */ 676 plane[i].c++; 677 } 678 else if (plane[i].dcdx == 0) { 679 if (setup->bottom_edge_rule == 0) { 680 /* correct for top-left fill convention: 681 */ 682 if (plane[i].dcdy > 0) 683 plane[i].c++; 684 } else { 685 /* correct for bottom-left fill convention: 686 */ 687 if (plane[i].dcdy < 0) 688 plane[i].c++; 689 } 690 } 691 692 /* Scale up to match c: 693 */ 694 assert((plane[i].dcdx << FIXED_ORDER) >> FIXED_ORDER == plane[i].dcdx); 695 assert((plane[i].dcdy << FIXED_ORDER) >> FIXED_ORDER == plane[i].dcdy); 696 plane[i].dcdx <<= FIXED_ORDER; 697 plane[i].dcdy <<= FIXED_ORDER; 698 699 /* find trivial reject offsets for each edge for a single-pixel 700 * sized block. These will be scaled up at each recursive level to 701 * match the active blocksize. Scaling in this way works best if 702 * the blocks are square. 703 */ 704 plane[i].eo = 0; 705 if (plane[i].dcdx < 0) plane[i].eo -= plane[i].dcdx; 706 if (plane[i].dcdy > 0) plane[i].eo += plane[i].dcdy; 707 } 708 } 709 710 if (0) { 711 debug_printf("p0: %"PRIx64"/%08x/%08x/%08x\n", 712 plane[0].c, 713 plane[0].dcdx, 714 plane[0].dcdy, 715 plane[0].eo); 716 717 debug_printf("p1: %"PRIx64"/%08x/%08x/%08x\n", 718 plane[1].c, 719 plane[1].dcdx, 720 plane[1].dcdy, 721 plane[1].eo); 722 723 debug_printf("p2: %"PRIx64"/%08x/%08x/%08x\n", 724 plane[2].c, 725 plane[2].dcdx, 726 plane[2].dcdy, 727 plane[2].eo); 728 } 729 730 if (nr_planes > 3) { 731 lp_setup_add_scissor_planes(scissor, &plane[3], s_planes, setup->multisample); 732 } 733 734 return lp_setup_bin_triangle(setup, tri, use_32bits, 735 check_opaque(setup, v0, v1, v2), 736 &bbox, nr_planes, viewport_index); 737} 738 739/* 740 * Round to nearest less or equal power of two of the input. 741 * 742 * Undefined if no bit set exists, so code should check against 0 first. 743 */ 744static inline uint32_t 745floor_pot(uint32_t n) 746{ 747#if defined(PIPE_CC_GCC) && (defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)) 748 if (n == 0) 749 return 0; 750 751 __asm__("bsr %1,%0" 752 : "=r" (n) 753 : "rm" (n) 754 : "cc"); 755 return 1 << n; 756#else 757 n |= (n >> 1); 758 n |= (n >> 2); 759 n |= (n >> 4); 760 n |= (n >> 8); 761 n |= (n >> 16); 762 return n - (n >> 1); 763#endif 764} 765 766 767boolean 768lp_setup_bin_triangle(struct lp_setup_context *setup, 769 struct lp_rast_triangle *tri, 770 boolean use_32bits, 771 boolean opaque, 772 const struct u_rect *bbox, 773 int nr_planes, 774 unsigned viewport_index) 775{ 776 struct lp_scene *scene = setup->scene; 777 unsigned cmd; 778 779 /* What is the largest power-of-two boundary this triangle crosses: 780 */ 781 const int dx = floor_pot((bbox->x0 ^ bbox->x1) | 782 (bbox->y0 ^ bbox->y1)); 783 784 /* The largest dimension of the rasterized area of the triangle 785 * (aligned to a 4x4 grid), rounded down to the nearest power of two: 786 */ 787 const int max_sz = ((bbox->x1 - (bbox->x0 & ~3)) | 788 (bbox->y1 - (bbox->y0 & ~3))); 789 const int sz = floor_pot(max_sz); 790 791 /* 792 * NOTE: It is important to use the original bounding box 793 * which might contain negative values here, because if the 794 * plane math may overflow or not with the 32bit rasterization 795 * functions depends on the original extent of the triangle. 796 */ 797 798 /* Now apply scissor, etc to the bounding box. Could do this 799 * earlier, but it confuses the logic for tri-16 and would force 800 * the rasterizer to also respect scissor, etc, just for the rare 801 * cases where a small triangle extends beyond the scissor. 802 */ 803 struct u_rect trimmed_box = *bbox; 804 u_rect_find_intersection(&setup->draw_regions[viewport_index], 805 &trimmed_box); 806 807 /* Determine which tile(s) intersect the triangle's bounding box 808 */ 809 if (dx < TILE_SIZE) { 810 const int ix0 = bbox->x0 / TILE_SIZE; 811 const int iy0 = bbox->y0 / TILE_SIZE; 812 unsigned px = bbox->x0 & 63 & ~3; 813 unsigned py = bbox->y0 & 63 & ~3; 814 815 assert(iy0 == bbox->y1 / TILE_SIZE && 816 ix0 == bbox->x1 / TILE_SIZE); 817 818 if (nr_planes == 3) { 819 if (sz < 4) { 820 /* Triangle is contained in a single 4x4 stamp: 821 */ 822 assert(px + 4 <= TILE_SIZE); 823 assert(py + 4 <= TILE_SIZE); 824 if (setup->multisample) 825 cmd = LP_RAST_OP_MS_TRIANGLE_3_4; 826 else 827 cmd = use_32bits ? LP_RAST_OP_TRIANGLE_32_3_4 : LP_RAST_OP_TRIANGLE_3_4; 828 return lp_scene_bin_cmd_with_state(scene, ix0, iy0, 829 setup->fs.stored, cmd, 830 lp_rast_arg_triangle_contained(tri, px, py)); 831 } 832 833 if (sz < 16) { 834 /* Triangle is contained in a single 16x16 block: 835 */ 836 837 /* 838 * The 16x16 block is only 4x4 aligned, and can exceed the tile 839 * dimensions if the triangle is 16 pixels in one dimension but 4 840 * in the other. So budge the 16x16 back inside the tile. 841 */ 842 px = MIN2(px, TILE_SIZE - 16); 843 py = MIN2(py, TILE_SIZE - 16); 844 845 assert(px + 16 <= TILE_SIZE); 846 assert(py + 16 <= TILE_SIZE); 847 848 if (setup->multisample) 849 cmd = LP_RAST_OP_MS_TRIANGLE_3_16; 850 else 851 cmd = use_32bits ? LP_RAST_OP_TRIANGLE_32_3_16 : LP_RAST_OP_TRIANGLE_3_16; 852 return lp_scene_bin_cmd_with_state(scene, ix0, iy0, 853 setup->fs.stored, cmd, 854 lp_rast_arg_triangle_contained(tri, px, py)); 855 } 856 } else if (nr_planes == 4 && sz < 16) { 857 px = MIN2(px, TILE_SIZE - 16); 858 py = MIN2(py, TILE_SIZE - 16); 859 860 assert(px + 16 <= TILE_SIZE); 861 assert(py + 16 <= TILE_SIZE); 862 863 if (setup->multisample) 864 cmd = LP_RAST_OP_MS_TRIANGLE_4_16; 865 else 866 cmd = use_32bits ? LP_RAST_OP_TRIANGLE_32_4_16 : LP_RAST_OP_TRIANGLE_4_16; 867 return lp_scene_bin_cmd_with_state(scene, ix0, iy0, 868 setup->fs.stored, cmd, 869 lp_rast_arg_triangle_contained(tri, px, py)); 870 } 871 872 /* Triangle is contained in a single tile: 873 */ 874 if (setup->multisample) 875 cmd = lp_rast_ms_tri_tab[nr_planes]; 876 else 877 cmd = use_32bits ? lp_rast_32_tri_tab[nr_planes] : lp_rast_tri_tab[nr_planes]; 878 return lp_scene_bin_cmd_with_state(scene, ix0, iy0, setup->fs.stored, cmd, 879 lp_rast_arg_triangle(tri, (1<<nr_planes)-1)); 880 } else { 881 struct lp_rast_plane *plane = GET_PLANES(tri); 882 int64_t c[MAX_PLANES]; 883 int64_t ei[MAX_PLANES]; 884 885 int64_t eo[MAX_PLANES]; 886 int64_t xstep[MAX_PLANES]; 887 int64_t ystep[MAX_PLANES]; 888 int x, y; 889 890 const int ix0 = trimmed_box.x0 / TILE_SIZE; 891 const int iy0 = trimmed_box.y0 / TILE_SIZE; 892 const int ix1 = trimmed_box.x1 / TILE_SIZE; 893 const int iy1 = trimmed_box.y1 / TILE_SIZE; 894 895 for (int i = 0; i < nr_planes; i++) { 896 c[i] = (plane[i].c + 897 IMUL64(plane[i].dcdy, iy0) * TILE_SIZE - 898 IMUL64(plane[i].dcdx, ix0) * TILE_SIZE); 899 900 ei[i] = (plane[i].dcdy - 901 plane[i].dcdx - 902 (int64_t)plane[i].eo) << TILE_ORDER; 903 904 eo[i] = (int64_t)plane[i].eo << TILE_ORDER; 905 xstep[i] = -(((int64_t)plane[i].dcdx) << TILE_ORDER); 906 ystep[i] = ((int64_t)plane[i].dcdy) << TILE_ORDER; 907 } 908 909 tri->inputs.is_blit = lp_setup_is_blit(setup, &tri->inputs); 910 911 /* Test tile-sized blocks against the triangle. 912 * Discard blocks fully outside the tri. If the block is fully 913 * contained inside the tri, bin an lp_rast_shade_tile command. 914 * Else, bin a lp_rast_triangle command. 915 */ 916 for (y = iy0; y <= iy1; y++) { 917 boolean in = FALSE; /* are we inside the triangle? */ 918 int64_t cx[MAX_PLANES]; 919 920 for (int i = 0; i < nr_planes; i++) 921 cx[i] = c[i]; 922 923 for (x = ix0; x <= ix1; x++) { 924 int out = 0; 925 int partial = 0; 926 927 for (int i = 0; i < nr_planes; i++) { 928 int64_t planeout = cx[i] + eo[i]; 929 int64_t planepartial = cx[i] + ei[i] - 1; 930 out |= (int) (planeout >> 63); 931 partial |= ((int) (planepartial >> 63)) & (1<<i); 932 } 933 934 if (out) { 935 /* do nothing */ 936 if (in) 937 break; /* exiting triangle, all done with this row */ 938 LP_COUNT(nr_empty_64); 939 } else if (partial) { 940 /* Not trivially accepted by at least one plane - 941 * rasterize/shade partial tile 942 */ 943 int count = util_bitcount(partial); 944 in = TRUE; 945 946 if (setup->multisample) 947 cmd = lp_rast_ms_tri_tab[count]; 948 else 949 cmd = use_32bits ? lp_rast_32_tri_tab[count] : lp_rast_tri_tab[count]; 950 if (!lp_scene_bin_cmd_with_state(scene, x, y, 951 setup->fs.stored, cmd, 952 lp_rast_arg_triangle(tri, partial))) 953 goto fail; 954 955 LP_COUNT(nr_partially_covered_64); 956 } else { 957 /* triangle covers the whole tile- shade whole tile */ 958 LP_COUNT(nr_fully_covered_64); 959 in = TRUE; 960 if (!lp_setup_whole_tile(setup, &tri->inputs, x, y, opaque)) 961 goto fail; 962 } 963 964 /* Iterate cx values across the region: */ 965 for (int i = 0; i < nr_planes; i++) 966 cx[i] += xstep[i]; 967 } 968 969 /* Iterate c values down the region: */ 970 for (int i = 0; i < nr_planes; i++) 971 c[i] += ystep[i]; 972 } 973 } 974 975 return TRUE; 976 977fail: 978 /* Need to disable any partially binned triangle. This is easier 979 * than trying to locate all the triangle, shade-tile, etc, 980 * commands which may have been binned. 981 */ 982 tri->inputs.disable = TRUE; 983 return FALSE; 984} 985 986 987/** 988 * Try to draw the triangle, restart the scene on failure. 989 */ 990static inline void 991retry_triangle_ccw(struct lp_setup_context *setup, 992 struct fixed_position *position, 993 const float (*v0)[4], 994 const float (*v1)[4], 995 const float (*v2)[4], 996 boolean front) 997{ 998 if (!do_triangle_ccw(setup, position, v0, v1, v2, front)) { 999 if (!lp_setup_flush_and_restart(setup)) 1000 return; 1001 1002 if (!do_triangle_ccw(setup, position, v0, v1, v2, front)) 1003 return; 1004 } 1005} 1006 1007 1008/** 1009 * Calculate fixed position data for a triangle 1010 * It is unfortunate we need to do that here (as we need area 1011 * calculated in fixed point), as there's quite some code duplication 1012 * to what is done in the jit setup prog. 1013 */ 1014static inline int8_t 1015calc_fixed_position(struct lp_setup_context *setup, 1016 struct fixed_position* position, 1017 const float (*v0)[4], 1018 const float (*v1)[4], 1019 const float (*v2)[4]) 1020{ 1021 float pixel_offset = setup->multisample ? 0.0 : setup->pixel_offset; 1022 /* 1023 * The rounding may not be quite the same with PIPE_ARCH_SSE 1024 * (util_iround right now only does nearest/even on x87, 1025 * otherwise nearest/away-from-zero). 1026 * Both should be acceptable, I think. 1027 */ 1028#if defined(PIPE_ARCH_SSE) 1029 __m128 v0r, v1r; 1030 __m128 vxy0xy2, vxy1xy0; 1031 __m128i vxy0xy2i, vxy1xy0i; 1032 __m128i dxdy0120, x0x2y0y2, x1x0y1y0, x0120, y0120; 1033 __m128 pix_offset = _mm_set1_ps(pixel_offset); 1034 __m128 fixed_one = _mm_set1_ps((float)FIXED_ONE); 1035 v0r = _mm_castpd_ps(_mm_load_sd((double *)v0[0])); 1036 vxy0xy2 = _mm_loadh_pi(v0r, (__m64 *)v2[0]); 1037 v1r = _mm_castpd_ps(_mm_load_sd((double *)v1[0])); 1038 vxy1xy0 = _mm_movelh_ps(v1r, vxy0xy2); 1039 vxy0xy2 = _mm_sub_ps(vxy0xy2, pix_offset); 1040 vxy1xy0 = _mm_sub_ps(vxy1xy0, pix_offset); 1041 vxy0xy2 = _mm_mul_ps(vxy0xy2, fixed_one); 1042 vxy1xy0 = _mm_mul_ps(vxy1xy0, fixed_one); 1043 vxy0xy2i = _mm_cvtps_epi32(vxy0xy2); 1044 vxy1xy0i = _mm_cvtps_epi32(vxy1xy0); 1045 dxdy0120 = _mm_sub_epi32(vxy0xy2i, vxy1xy0i); 1046 _mm_store_si128((__m128i *)&position->dx01, dxdy0120); 1047 /* 1048 * For the mul, would need some more shuffles, plus emulation 1049 * for the signed mul (without sse41), so don't bother. 1050 */ 1051 x0x2y0y2 = _mm_shuffle_epi32(vxy0xy2i, _MM_SHUFFLE(3,1,2,0)); 1052 x1x0y1y0 = _mm_shuffle_epi32(vxy1xy0i, _MM_SHUFFLE(3,1,2,0)); 1053 x0120 = _mm_unpacklo_epi32(x0x2y0y2, x1x0y1y0); 1054 y0120 = _mm_unpackhi_epi32(x0x2y0y2, x1x0y1y0); 1055 _mm_store_si128((__m128i *)&position->x[0], x0120); 1056 _mm_store_si128((__m128i *)&position->y[0], y0120); 1057 1058#else 1059 position->x[0] = subpixel_snap(v0[0][0] - pixel_offset); 1060 position->x[1] = subpixel_snap(v1[0][0] - pixel_offset); 1061 position->x[2] = subpixel_snap(v2[0][0] - pixel_offset); 1062 position->x[3] = 0; // should be unused 1063 1064 position->y[0] = subpixel_snap(v0[0][1] - pixel_offset); 1065 position->y[1] = subpixel_snap(v1[0][1] - pixel_offset); 1066 position->y[2] = subpixel_snap(v2[0][1] - pixel_offset); 1067 position->y[3] = 0; // should be unused 1068 1069 position->dx01 = position->x[0] - position->x[1]; 1070 position->dy01 = position->y[0] - position->y[1]; 1071 1072 position->dx20 = position->x[2] - position->x[0]; 1073 position->dy20 = position->y[2] - position->y[0]; 1074#endif 1075 1076 uint64_t area = IMUL64(position->dx01, position->dy20) - 1077 IMUL64(position->dx20, position->dy01); 1078 return area == 0 ? 0 : (area & (1ULL << 63)) ? -1 : 1; 1079} 1080 1081 1082/** 1083 * Rotate a triangle, flipping its clockwise direction, 1084 * Swaps values for xy[0] and xy[1] 1085 */ 1086static inline void 1087rotate_fixed_position_01(struct fixed_position* position) 1088{ 1089 int x = position->x[1]; 1090 int y = position->y[1]; 1091 1092 position->x[1] = position->x[0]; 1093 position->y[1] = position->y[0]; 1094 position->x[0] = x; 1095 position->y[0] = y; 1096 1097 position->dx01 = -position->dx01; 1098 position->dy01 = -position->dy01; 1099 position->dx20 = position->x[2] - position->x[0]; 1100 position->dy20 = position->y[2] - position->y[0]; 1101} 1102 1103 1104/** 1105 * Rotate a triangle, flipping its clockwise direction, 1106 * Swaps values for xy[1] and xy[2] 1107 */ 1108static inline void 1109rotate_fixed_position_12(struct fixed_position* position) 1110{ 1111 int x = position->x[2]; 1112 int y = position->y[2]; 1113 1114 position->x[2] = position->x[1]; 1115 position->y[2] = position->y[1]; 1116 position->x[1] = x; 1117 position->y[1] = y; 1118 1119 x = position->dx01; 1120 y = position->dy01; 1121 position->dx01 = -position->dx20; 1122 position->dy01 = -position->dy20; 1123 position->dx20 = -x; 1124 position->dy20 = -y; 1125} 1126 1127 1128/** 1129 * Draw triangle if it's CW, cull otherwise. 1130 */ 1131static void 1132triangle_cw(struct lp_setup_context *setup, 1133 const float (*v0)[4], 1134 const float (*v1)[4], 1135 const float (*v2)[4]) 1136{ 1137 alignas(16) struct fixed_position position; 1138 struct llvmpipe_context *lp_context = (struct llvmpipe_context *)setup->pipe; 1139 1140 if (lp_context->active_statistics_queries) { 1141 lp_context->pipeline_statistics.c_primitives++; 1142 } 1143 1144 int8_t area_sign = calc_fixed_position(setup, &position, v0, v1, v2); 1145 1146 if (area_sign < 0) { 1147 if (setup->flatshade_first) { 1148 rotate_fixed_position_12(&position); 1149 retry_triangle_ccw(setup, &position, v0, v2, v1, !setup->ccw_is_frontface); 1150 } else { 1151 rotate_fixed_position_01(&position); 1152 retry_triangle_ccw(setup, &position, v1, v0, v2, !setup->ccw_is_frontface); 1153 } 1154 } 1155} 1156 1157 1158static void 1159triangle_ccw(struct lp_setup_context *setup, 1160 const float (*v0)[4], 1161 const float (*v1)[4], 1162 const float (*v2)[4]) 1163{ 1164 alignas(16) struct fixed_position position; 1165 struct llvmpipe_context *lp_context = (struct llvmpipe_context *)setup->pipe; 1166 1167 if (lp_context->active_statistics_queries) { 1168 lp_context->pipeline_statistics.c_primitives++; 1169 } 1170 1171 int8_t area_sign = calc_fixed_position(setup, &position, v0, v1, v2); 1172 1173 if (area_sign > 0) 1174 retry_triangle_ccw(setup, &position, v0, v1, v2, setup->ccw_is_frontface); 1175} 1176 1177 1178/** 1179 * Draw triangle whether it's CW or CCW. 1180 */ 1181static void 1182triangle_both(struct lp_setup_context *setup, 1183 const float (*v0)[4], 1184 const float (*v1)[4], 1185 const float (*v2)[4]) 1186{ 1187 alignas(16) struct fixed_position position; 1188 struct llvmpipe_context *lp_context = (struct llvmpipe_context *)setup->pipe; 1189 1190 if (lp_context->active_statistics_queries) { 1191 lp_context->pipeline_statistics.c_primitives++; 1192 } 1193 1194 int8_t area_sign = calc_fixed_position(setup, &position, v0, v1, v2); 1195 1196 if (0) { 1197 assert(!util_is_inf_or_nan(v0[0][0])); 1198 assert(!util_is_inf_or_nan(v0[0][1])); 1199 assert(!util_is_inf_or_nan(v1[0][0])); 1200 assert(!util_is_inf_or_nan(v1[0][1])); 1201 assert(!util_is_inf_or_nan(v2[0][0])); 1202 assert(!util_is_inf_or_nan(v2[0][1])); 1203 } 1204 1205 if (area_sign > 0) { 1206 retry_triangle_ccw(setup, &position, v0, v1, v2, setup->ccw_is_frontface); 1207 } else if (area_sign < 0) { 1208 if (setup->flatshade_first) { 1209 rotate_fixed_position_12(&position); 1210 retry_triangle_ccw(setup, &position, v0, v2, v1, !setup->ccw_is_frontface); 1211 } else { 1212 rotate_fixed_position_01(&position); 1213 retry_triangle_ccw(setup, &position, v1, v0, v2, !setup->ccw_is_frontface); 1214 } 1215 } 1216} 1217 1218 1219static void 1220triangle_noop(struct lp_setup_context *setup, 1221 const float (*v0)[4], 1222 const float (*v1)[4], 1223 const float (*v2)[4]) 1224{ 1225} 1226 1227 1228void 1229lp_setup_choose_triangle(struct lp_setup_context *setup) 1230{ 1231 if (setup->rasterizer_discard) { 1232 setup->triangle = triangle_noop; 1233 return; 1234 } 1235 switch (setup->cullmode) { 1236 case PIPE_FACE_NONE: 1237 setup->triangle = triangle_both; 1238 break; 1239 case PIPE_FACE_BACK: 1240 setup->triangle = setup->ccw_is_frontface ? triangle_ccw : triangle_cw; 1241 break; 1242 case PIPE_FACE_FRONT: 1243 setup->triangle = setup->ccw_is_frontface ? triangle_cw : triangle_ccw; 1244 break; 1245 default: 1246 setup->triangle = triangle_noop; 1247 break; 1248 } 1249} 1250