1/**************************************************************************
2 *
3 * Copyright 2007 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/*
29 * Binning code for triangles
30 */
31
32#include "util/u_math.h"
33#include "util/u_memory.h"
34#include "util/u_rect.h"
35#include "util/u_sse.h"
36#include "lp_perf.h"
37#include "lp_setup_context.h"
38#include "lp_rast.h"
39#include "lp_state_fs.h"
40#include "lp_state_setup.h"
41#include "lp_context.h"
42
43#include <inttypes.h>
44
45
46#if defined(PIPE_ARCH_SSE)
47#include <emmintrin.h>
48#elif defined(_ARCH_PWR8) && UTIL_ARCH_LITTLE_ENDIAN
49#include <altivec.h>
50#include "util/u_pwr8.h"
51#endif
52
53#if !defined(PIPE_ARCH_SSE)
54
55static inline int
56subpixel_snap(float a)
57{
58   return util_iround(FIXED_ONE * a);
59}
60
61#endif
62
63/* Position and area in fixed point coordinates */
64struct fixed_position {
65   int32_t x[4];
66   int32_t y[4];
67   int32_t dx01;
68   int32_t dy01;
69   int32_t dx20;
70   int32_t dy20;
71};
72
73
74/**
75 * Alloc space for a new triangle plus the input.a0/dadx/dady arrays
76 * immediately after it.
77 * The memory is allocated from the per-scene pool, not per-tile.
78 * \param tri_size  returns number of bytes allocated
79 * \param num_inputs  number of fragment shader inputs
80 * \return pointer to triangle space
81 */
82struct lp_rast_triangle *
83lp_setup_alloc_triangle(struct lp_scene *scene,
84                        unsigned nr_inputs,
85                        unsigned nr_planes,
86                        unsigned *tri_size)
87{
88   // add 1 for XYZW position
89   unsigned input_array_sz = (nr_inputs + 1) * sizeof(float[4]);
90   unsigned plane_sz = nr_planes * sizeof(struct lp_rast_plane);
91
92   STATIC_ASSERT(sizeof(struct lp_rast_plane) % 8 == 0);
93
94   *tri_size = (sizeof(struct lp_rast_triangle) +
95                3 * input_array_sz +   // 3 = da + dadx + dady
96                plane_sz);
97
98   struct lp_rast_triangle *tri = lp_scene_alloc_aligned(scene, *tri_size, 16);
99   if (!tri)
100      return NULL;
101
102   tri->inputs.stride = input_array_sz;
103
104   {
105      ASSERTED char *a = (char *)tri;
106      ASSERTED char *b = (char *)&GET_PLANES(tri)[nr_planes];
107
108      assert(b - a == *tri_size);
109   }
110
111   return tri;
112}
113
114void
115lp_setup_print_vertex(struct lp_setup_context *setup,
116                      const char *name,
117                      const float (*v)[4])
118{
119   const struct lp_setup_variant_key *key = &setup->setup.variant->key;
120
121   debug_printf("   wpos (%s[0]) xyzw %f %f %f %f\n",
122                name,
123                v[0][0], v[0][1], v[0][2], v[0][3]);
124
125   for (int i = 0; i < key->num_inputs; i++) {
126      const float *in = v[key->inputs[i].src_index];
127
128      debug_printf("  in[%d] (%s[%d]) %s%s%s%s ",
129                   i,
130                   name, key->inputs[i].src_index,
131                   (key->inputs[i].usage_mask & 0x1) ? "x" : " ",
132                   (key->inputs[i].usage_mask & 0x2) ? "y" : " ",
133                   (key->inputs[i].usage_mask & 0x4) ? "z" : " ",
134                   (key->inputs[i].usage_mask & 0x8) ? "w" : " ");
135
136      for (int j = 0; j < 4; j++)
137         if (key->inputs[i].usage_mask & (1<<j))
138            debug_printf("%.5f ", in[j]);
139
140      debug_printf("\n");
141   }
142}
143
144
145/**
146 * Print triangle vertex attribs (for debug).
147 */
148void
149lp_setup_print_triangle(struct lp_setup_context *setup,
150                        const float (*v0)[4],
151                        const float (*v1)[4],
152                        const float (*v2)[4])
153{
154   debug_printf("triangle\n");
155
156   {
157      const float ex = v0[0][0] - v2[0][0];
158      const float ey = v0[0][1] - v2[0][1];
159      const float fx = v1[0][0] - v2[0][0];
160      const float fy = v1[0][1] - v2[0][1];
161
162      /* det = cross(e,f).z */
163      const float det = ex * fy - ey * fx;
164      if (det < 0.0f)
165         debug_printf("   - ccw\n");
166      else if (det > 0.0f)
167         debug_printf("   - cw\n");
168      else
169         debug_printf("   - zero area\n");
170   }
171
172   lp_setup_print_vertex(setup, "v0", v0);
173   lp_setup_print_vertex(setup, "v1", v1);
174   lp_setup_print_vertex(setup, "v2", v2);
175}
176
177
178#define MAX_PLANES 8
179static unsigned
180lp_rast_tri_tab[MAX_PLANES+1] = {
181   0,               /* should be impossible */
182   LP_RAST_OP_TRIANGLE_1,
183   LP_RAST_OP_TRIANGLE_2,
184   LP_RAST_OP_TRIANGLE_3,
185   LP_RAST_OP_TRIANGLE_4,
186   LP_RAST_OP_TRIANGLE_5,
187   LP_RAST_OP_TRIANGLE_6,
188   LP_RAST_OP_TRIANGLE_7,
189   LP_RAST_OP_TRIANGLE_8
190};
191
192static unsigned
193lp_rast_32_tri_tab[MAX_PLANES+1] = {
194   0,               /* should be impossible */
195   LP_RAST_OP_TRIANGLE_32_1,
196   LP_RAST_OP_TRIANGLE_32_2,
197   LP_RAST_OP_TRIANGLE_32_3,
198   LP_RAST_OP_TRIANGLE_32_4,
199   LP_RAST_OP_TRIANGLE_32_5,
200   LP_RAST_OP_TRIANGLE_32_6,
201   LP_RAST_OP_TRIANGLE_32_7,
202   LP_RAST_OP_TRIANGLE_32_8
203};
204
205
206static unsigned
207lp_rast_ms_tri_tab[MAX_PLANES+1] = {
208   0,               /* should be impossible */
209   LP_RAST_OP_MS_TRIANGLE_1,
210   LP_RAST_OP_MS_TRIANGLE_2,
211   LP_RAST_OP_MS_TRIANGLE_3,
212   LP_RAST_OP_MS_TRIANGLE_4,
213   LP_RAST_OP_MS_TRIANGLE_5,
214   LP_RAST_OP_MS_TRIANGLE_6,
215   LP_RAST_OP_MS_TRIANGLE_7,
216   LP_RAST_OP_MS_TRIANGLE_8
217};
218
219
220/*
221 * Detect big primitives drawn with an alpha == 1.0.
222 *
223 * This is used when simulating anti-aliasing primitives in shaders, e.g.,
224 * when drawing the windows client area in Aero's flip-3d effect.
225 */
226static boolean
227check_opaque(const struct lp_setup_context *setup,
228             const float (*v1)[4],
229             const float (*v2)[4],
230             const float (*v3)[4])
231{
232   const struct lp_fragment_shader_variant *variant =
233      setup->fs.current.variant;
234
235   if (variant->opaque)
236      return TRUE;
237
238   if (!variant->potentially_opaque)
239      return FALSE;
240
241   const struct lp_tgsi_channel_info *alpha_info = &variant->shader->info.cbuf[0][3];
242   if (alpha_info->file == TGSI_FILE_CONSTANT) {
243      const float *constants = setup->fs.current.jit_context.constants[0];
244      float alpha = constants[alpha_info->u.index*4 +
245                              alpha_info->swizzle];
246      return alpha == 1.0f;
247   }
248
249   if (alpha_info->file == TGSI_FILE_INPUT) {
250      return (v1[1 + alpha_info->u.index][alpha_info->swizzle] == 1.0f &&
251              v2[1 + alpha_info->u.index][alpha_info->swizzle] == 1.0f &&
252              v3[1 + alpha_info->u.index][alpha_info->swizzle] == 1.0f);
253   }
254
255   return FALSE;
256}
257
258
259/**
260 * Do basic setup for triangle rasterization and determine which
261 * framebuffer tiles are touched.  Put the triangle in the scene's
262 * bins for the tiles which we overlap.
263 */
264static boolean
265do_triangle_ccw(struct lp_setup_context *setup,
266                struct fixed_position *position,
267                const float (*v0)[4],
268                const float (*v1)[4],
269                const float (*v2)[4],
270                boolean frontfacing)
271{
272   struct lp_scene *scene = setup->scene;
273
274   if (0)
275      lp_setup_print_triangle(setup, v0, v1, v2);
276
277   const float (*pv)[4];
278   if (setup->flatshade_first) {
279      pv = v0;
280   } else {
281      pv = v2;
282   }
283
284   unsigned viewport_index = 0;
285   if (setup->viewport_index_slot > 0) {
286      unsigned *udata = (unsigned*)pv[setup->viewport_index_slot];
287      viewport_index = lp_clamp_viewport_idx(*udata);
288   }
289
290   unsigned layer = 0;
291   if (setup->layer_slot > 0) {
292      layer = *(unsigned*)pv[setup->layer_slot];
293      layer = MIN2(layer, scene->fb_max_layer);
294   }
295
296   /* Bounding rectangle (in pixels) */
297   struct u_rect bbox;
298   {
299      /* Yes this is necessary to accurately calculate bounding boxes
300       * with the two fill-conventions we support.  GL (normally) ends
301       * up needing a bottom-left fill convention, which requires
302       * slightly different rounding.
303       */
304      int adj = (setup->bottom_edge_rule != 0) ? 1 : 0;
305
306      /* Inclusive x0, exclusive x1 */
307      bbox.x0 =  MIN3(position->x[0], position->x[1], position->x[2]) >> FIXED_ORDER;
308      bbox.x1 = (MAX3(position->x[0], position->x[1], position->x[2]) - 1) >> FIXED_ORDER;
309
310      /* Inclusive / exclusive depending upon adj (bottom-left or top-right) */
311      bbox.y0 = (MIN3(position->y[0], position->y[1], position->y[2]) + adj) >> FIXED_ORDER;
312      bbox.y1 = (MAX3(position->y[0], position->y[1], position->y[2]) - 1 + adj) >> FIXED_ORDER;
313   }
314
315   if (!u_rect_test_intersection(&setup->draw_regions[viewport_index], &bbox)) {
316      if (0) debug_printf("no intersection\n");
317      LP_COUNT(nr_culled_tris);
318      return TRUE;
319   }
320
321   int max_szorig = ((bbox.x1 - (bbox.x0 & ~3)) |
322                     (bbox.y1 - (bbox.y0 & ~3)));
323   boolean use_32bits = max_szorig <= MAX_FIXED_LENGTH32;
324#if defined(_ARCH_PWR8) && UTIL_ARCH_LITTLE_ENDIAN
325   boolean pwr8_limit_check = (bbox.x1 - bbox.x0) <= MAX_FIXED_LENGTH32 &&
326      (bbox.y1 - bbox.y0) <= MAX_FIXED_LENGTH32;
327#endif
328
329   /* Can safely discard negative regions, but need to keep hold of
330    * information about when the triangle extends past screen
331    * boundaries.  See trimmed_box in lp_setup_bin_triangle().
332    */
333   bbox.x0 = MAX2(bbox.x0, 0);
334   bbox.y0 = MAX2(bbox.y0, 0);
335
336   int nr_planes = 3;
337
338   /*
339    * Determine how many scissor planes we need, that is drop scissor
340    * edges if the bounding box of the tri is fully inside that edge.
341    */
342   const struct u_rect *scissor = &setup->draw_regions[viewport_index];
343   boolean s_planes[4];
344   scissor_planes_needed(s_planes, &bbox, scissor);
345   nr_planes += s_planes[0] + s_planes[1] + s_planes[2] + s_planes[3];
346
347   unsigned tri_bytes;
348   const struct lp_setup_variant_key *key = &setup->setup.variant->key;
349   struct lp_rast_triangle *tri =
350      lp_setup_alloc_triangle(scene, key->num_inputs, nr_planes, &tri_bytes);
351   if (!tri)
352      return FALSE;
353
354#ifdef DEBUG
355   tri->v[0][0] = v0[0][0];
356   tri->v[1][0] = v1[0][0];
357   tri->v[2][0] = v2[0][0];
358   tri->v[0][1] = v0[0][1];
359   tri->v[1][1] = v1[0][1];
360   tri->v[2][1] = v2[0][1];
361#endif
362
363   LP_COUNT(nr_tris);
364
365   /*
366    * Rotate the tri such that v0 is closest to the fb origin.
367    * This can give more accurate a0 value (which is at fb origin)
368    * when calculating the interpolants.
369    * It can't work when there's flat shading for instance in one
370    * of the attributes, hence restrict this to just a single attribute
371    * which is what causes some test failures.
372    * (This does not address the problem that interpolation may be
373    * inaccurate if gradients are relatively steep in small tris far
374    * away from the origin. It does however fix the (silly) wgf11rasterizer
375    * Interpolator test.)
376    * XXX This causes problems with mipgen -EmuTexture for not yet really
377    * understood reasons (if the vertices would be submitted in a different
378    * order, we'd also generate the same "wrong" results here without
379    * rotation). In any case, that we generate different values if a prim
380    * has the vertices rotated but is otherwise the same (which is due to
381    * numerical issues) is not a nice property. An additional problem by
382    * swapping the vertices here (which is possibly worse) is that
383    * the same primitive coming in twice might generate different values
384    * (in particular for z) due to the swapping potentially not happening
385    * both times, if the attributes to be interpolated are different. For now,
386    * just restrict this to not get used with dx9 (by checking pixel offset),
387    * could also restrict it further to only trigger with wgf11Interpolator
388    * Rasterizer test (the only place which needs it, with always the same
389    * vertices even).
390    */
391   if ((LP_DEBUG & DEBUG_ACCURATE_A0) &&
392       setup->pixel_offset == 0.5f &&
393       key->num_inputs == 1 &&
394       (key->inputs[0].interp == LP_INTERP_LINEAR ||
395        key->inputs[0].interp == LP_INTERP_PERSPECTIVE)) {
396      float dist0 = v0[0][0] * v0[0][0] + v0[0][1] * v0[0][1];
397      float dist1 = v1[0][0] * v1[0][0] + v1[0][1] * v1[0][1];
398      float dist2 = v2[0][0] * v2[0][0] + v2[0][1] * v2[0][1];
399      if (dist0 > dist1 && dist1 < dist2) {
400         const float (*vt)[4];
401         int x, y;
402         vt = v0;
403         v0 = v1;
404         v1 = v2;
405         v2 = vt;
406         x = position->x[0];
407         y = position->y[0];
408         position->x[0] = position->x[1];
409         position->y[0] = position->y[1];
410         position->x[1] = position->x[2];
411         position->y[1] = position->y[2];
412         position->x[2] = x;
413         position->y[2] = y;
414
415         position->dx20 = position->dx01;
416         position->dy20 = position->dy01;
417         position->dx01 = position->x[0] - position->x[1];
418         position->dy01 = position->y[0] - position->y[1];
419      } else if (dist0 > dist2) {
420         const float (*vt)[4];
421         int x, y;
422         vt = v0;
423         v0 = v2;
424         v2 = v1;
425         v1 = vt;
426         x = position->x[0];
427         y = position->y[0];
428         position->x[0] = position->x[2];
429         position->y[0] = position->y[2];
430         position->x[2] = position->x[1];
431         position->y[2] = position->y[1];
432         position->x[1] = x;
433         position->y[1] = y;
434
435         position->dx01 = position->dx20;
436         position->dy01 = position->dy20;
437         position->dx20 = position->x[2] - position->x[0];
438         position->dy20 = position->y[2] - position->y[0];
439      }
440   }
441
442   /* Setup parameter interpolants:
443    */
444   setup->setup.variant->jit_function(v0, v1, v2,
445                                      frontfacing,
446                                      GET_A0(&tri->inputs),
447                                      GET_DADX(&tri->inputs),
448                                      GET_DADY(&tri->inputs),
449                                      &setup->setup.variant->key);
450
451   tri->inputs.frontfacing = frontfacing;
452   tri->inputs.disable = FALSE;
453   tri->inputs.is_blit = FALSE;
454   tri->inputs.layer = layer;
455   tri->inputs.viewport_index = viewport_index;
456   tri->inputs.view_index = setup->view_index;
457
458   if (0)
459      lp_dump_setup_coef(&setup->setup.variant->key,
460                         GET_A0(&tri->inputs),
461                         GET_DADX(&tri->inputs),
462                         GET_DADY(&tri->inputs));
463
464   struct lp_rast_plane *plane = GET_PLANES(tri);
465
466#if defined(PIPE_ARCH_SSE)
467   if (1) {
468      __m128i vertx, verty;
469      __m128i shufx, shufy;
470      __m128i dcdx, dcdy;
471      __m128i cdx02, cdx13, cdy02, cdy13, c02, c13;
472      __m128i c01, c23, unused;
473      __m128i dcdx_neg_mask;
474      __m128i dcdy_neg_mask;
475      __m128i dcdx_zero_mask;
476      __m128i top_left_flag, c_dec;
477      __m128i eo, p0, p1, p2;
478      __m128i zero = _mm_setzero_si128();
479
480      vertx = _mm_load_si128((__m128i *)position->x); /* vertex x coords */
481      verty = _mm_load_si128((__m128i *)position->y); /* vertex y coords */
482
483      shufx = _mm_shuffle_epi32(vertx, _MM_SHUFFLE(3,0,2,1));
484      shufy = _mm_shuffle_epi32(verty, _MM_SHUFFLE(3,0,2,1));
485
486      dcdx = _mm_sub_epi32(verty, shufy);
487      dcdy = _mm_sub_epi32(vertx, shufx);
488
489      dcdx_neg_mask = _mm_srai_epi32(dcdx, 31);
490      dcdx_zero_mask = _mm_cmpeq_epi32(dcdx, zero);
491      dcdy_neg_mask = _mm_srai_epi32(dcdy, 31);
492
493      top_left_flag = _mm_set1_epi32((setup->bottom_edge_rule == 0) ? ~0 : 0);
494
495      c_dec = _mm_or_si128(dcdx_neg_mask,
496                           _mm_and_si128(dcdx_zero_mask,
497                                         _mm_xor_si128(dcdy_neg_mask,
498                                                       top_left_flag)));
499
500      /*
501       * 64 bit arithmetic.
502       * Note we need _signed_ mul (_mm_mul_epi32) which we emulate.
503       */
504      cdx02 = mm_mullohi_epi32(dcdx, vertx, &cdx13);
505      cdy02 = mm_mullohi_epi32(dcdy, verty, &cdy13);
506      c02 = _mm_sub_epi64(cdx02, cdy02);
507      c13 = _mm_sub_epi64(cdx13, cdy13);
508      c02 = _mm_sub_epi64(c02, _mm_shuffle_epi32(c_dec,
509                                                 _MM_SHUFFLE(2,2,0,0)));
510      c13 = _mm_sub_epi64(c13, _mm_shuffle_epi32(c_dec,
511                                                 _MM_SHUFFLE(3,3,1,1)));
512
513      /*
514       * Useful for very small fbs/tris (or fewer subpixel bits) only:
515       * c = _mm_sub_epi32(mm_mullo_epi32(dcdx, vertx),
516       *                   mm_mullo_epi32(dcdy, verty));
517       *
518       * c = _mm_sub_epi32(c, c_dec);
519       */
520
521      /* Scale up to match c:
522       */
523      dcdx = _mm_slli_epi32(dcdx, FIXED_ORDER);
524      dcdy = _mm_slli_epi32(dcdy, FIXED_ORDER);
525
526      /*
527       * Calculate trivial reject values:
528       * Note eo cannot overflow even if dcdx/dcdy would already have
529       * 31 bits (which they shouldn't have). This is because eo
530       * is never negative (albeit if we rely on that need to be careful...)
531       */
532      eo = _mm_sub_epi32(_mm_andnot_si128(dcdy_neg_mask, dcdy),
533                         _mm_and_si128(dcdx_neg_mask, dcdx));
534
535      /* ei = _mm_sub_epi32(_mm_sub_epi32(dcdy, dcdx), eo); */
536
537      /*
538       * Pointless transpose which gets undone immediately in
539       * rasterization.
540       * It is actually difficult to do away with it - would essentially
541       * need GET_PLANES_DX, GET_PLANES_DY etc., but the calculations
542       * for this then would need to depend on the number of planes.
543       * The transpose is quite special here due to c being 64bit...
544       * The store has to be unaligned (unless we'd make the plane size
545       * a multiple of 128), and of course storing eo separately...
546       */
547      c01 = _mm_unpacklo_epi64(c02, c13);
548      c23 = _mm_unpackhi_epi64(c02, c13);
549      transpose2_64_2_32(&c01, &c23, &dcdx, &dcdy,
550                         &p0, &p1, &p2, &unused);
551      _mm_storeu_si128((__m128i *)&plane[0], p0);
552      plane[0].eo = (uint32_t)_mm_cvtsi128_si32(eo);
553      _mm_storeu_si128((__m128i *)&plane[1], p1);
554      eo = _mm_shuffle_epi32(eo, _MM_SHUFFLE(3,2,0,1));
555      plane[1].eo = (uint32_t)_mm_cvtsi128_si32(eo);
556      _mm_storeu_si128((__m128i *)&plane[2], p2);
557      eo = _mm_shuffle_epi32(eo, _MM_SHUFFLE(0,0,0,2));
558      plane[2].eo = (uint32_t)_mm_cvtsi128_si32(eo);
559   } else
560#elif defined(_ARCH_PWR8) && UTIL_ARCH_LITTLE_ENDIAN
561   /*
562    * XXX this code is effectively disabled for all practical purposes,
563    * as the allowed fb size is tiny if FIXED_ORDER is 8.
564    */
565   if (setup->fb.width <= MAX_FIXED_LENGTH32 &&
566       setup->fb.height <= MAX_FIXED_LENGTH32 &&
567       pwr8_limit_check) {
568      unsigned int bottom_edge;
569      __m128i vertx, verty;
570      __m128i shufx, shufy;
571      __m128i dcdx, dcdy, c;
572      __m128i unused;
573      __m128i dcdx_neg_mask;
574      __m128i dcdy_neg_mask;
575      __m128i dcdx_zero_mask;
576      __m128i top_left_flag;
577      __m128i c_inc_mask, c_inc;
578      __m128i eo, p0, p1, p2;
579      __m128i_union vshuf_mask;
580      __m128i zero = vec_splats((unsigned char) 0);
581      alignas(16) int32_t temp_vec[4];
582
583#if UTIL_ARCH_LITTLE_ENDIAN
584      vshuf_mask.i[0] = 0x07060504;
585      vshuf_mask.i[1] = 0x0B0A0908;
586      vshuf_mask.i[2] = 0x03020100;
587      vshuf_mask.i[3] = 0x0F0E0D0C;
588#else
589      vshuf_mask.i[0] = 0x00010203;
590      vshuf_mask.i[1] = 0x0C0D0E0F;
591      vshuf_mask.i[2] = 0x04050607;
592      vshuf_mask.i[3] = 0x08090A0B;
593#endif
594
595      /* vertex x coords */
596      vertx = vec_load_si128((const uint32_t *) position->x);
597      /* vertex y coords */
598      verty = vec_load_si128((const uint32_t *) position->y);
599
600      shufx = vec_perm (vertx, vertx, vshuf_mask.m128i);
601      shufy = vec_perm (verty, verty, vshuf_mask.m128i);
602
603      dcdx = vec_sub_epi32(verty, shufy);
604      dcdy = vec_sub_epi32(vertx, shufx);
605
606      dcdx_neg_mask = vec_srai_epi32(dcdx, 31);
607      dcdx_zero_mask = vec_cmpeq_epi32(dcdx, zero);
608      dcdy_neg_mask = vec_srai_epi32(dcdy, 31);
609
610      bottom_edge = (setup->bottom_edge_rule == 0) ? ~0 : 0;
611      top_left_flag = (__m128i) vec_splats(bottom_edge);
612
613      c_inc_mask = vec_or(dcdx_neg_mask,
614                                vec_and(dcdx_zero_mask,
615                                              vec_xor(dcdy_neg_mask,
616                                                            top_left_flag)));
617
618      c_inc = vec_srli_epi32(c_inc_mask, 31);
619
620      c = vec_sub_epi32(vec_mullo_epi32(dcdx, vertx),
621                        vec_mullo_epi32(dcdy, verty));
622
623      c = vec_add_epi32(c, c_inc);
624
625      /* Scale up to match c:
626       */
627      dcdx = vec_slli_epi32(dcdx, FIXED_ORDER);
628      dcdy = vec_slli_epi32(dcdy, FIXED_ORDER);
629
630      /* Calculate trivial reject values:
631       */
632      eo = vec_sub_epi32(vec_andnot_si128(dcdy_neg_mask, dcdy),
633                         vec_and(dcdx_neg_mask, dcdx));
634
635      /* ei = _mm_sub_epi32(_mm_sub_epi32(dcdy, dcdx), eo); */
636
637      /* Pointless transpose which gets undone immediately in
638       * rasterization:
639       */
640      transpose4_epi32(&c, &dcdx, &dcdy, &eo,
641                       &p0, &p1, &p2, &unused);
642
643#define STORE_PLANE(plane, vec) do {                  \
644         vec_store_si128((uint32_t *)&temp_vec, vec); \
645         plane.c    = (int64_t)temp_vec[0];           \
646         plane.dcdx = temp_vec[1];                    \
647         plane.dcdy = temp_vec[2];                    \
648         plane.eo   = temp_vec[3];                    \
649      } while(0)
650
651      STORE_PLANE(plane[0], p0);
652      STORE_PLANE(plane[1], p1);
653      STORE_PLANE(plane[2], p2);
654#undef STORE_PLANE
655   } else
656#endif
657   {
658      plane[0].dcdy = position->dx01;
659      plane[1].dcdy = position->x[1] - position->x[2];
660      plane[2].dcdy = position->dx20;
661      plane[0].dcdx = position->dy01;
662      plane[1].dcdx = position->y[1] - position->y[2];
663      plane[2].dcdx = position->dy20;
664
665      for (int i = 0; i < 3; i++) {
666         /* half-edge constants, will be iterated over the whole render
667          * target.
668          */
669         plane[i].c = IMUL64(plane[i].dcdx, position->x[i]) -
670                      IMUL64(plane[i].dcdy, position->y[i]);
671
672         /* correct for top-left vs. bottom-left fill convention.
673          */
674         if (plane[i].dcdx < 0) {
675            /* both fill conventions want this - adjust for left edges */
676            plane[i].c++;
677         }
678         else if (plane[i].dcdx == 0) {
679            if (setup->bottom_edge_rule == 0) {
680               /* correct for top-left fill convention:
681                */
682               if (plane[i].dcdy > 0)
683                  plane[i].c++;
684            } else {
685               /* correct for bottom-left fill convention:
686                */
687               if (plane[i].dcdy < 0)
688                  plane[i].c++;
689            }
690         }
691
692         /* Scale up to match c:
693          */
694         assert((plane[i].dcdx << FIXED_ORDER) >> FIXED_ORDER == plane[i].dcdx);
695         assert((plane[i].dcdy << FIXED_ORDER) >> FIXED_ORDER == plane[i].dcdy);
696         plane[i].dcdx <<= FIXED_ORDER;
697         plane[i].dcdy <<= FIXED_ORDER;
698
699         /* find trivial reject offsets for each edge for a single-pixel
700          * sized block.  These will be scaled up at each recursive level to
701          * match the active blocksize.  Scaling in this way works best if
702          * the blocks are square.
703          */
704         plane[i].eo = 0;
705         if (plane[i].dcdx < 0) plane[i].eo -= plane[i].dcdx;
706         if (plane[i].dcdy > 0) plane[i].eo += plane[i].dcdy;
707      }
708   }
709
710   if (0) {
711      debug_printf("p0: %"PRIx64"/%08x/%08x/%08x\n",
712                   plane[0].c,
713                   plane[0].dcdx,
714                   plane[0].dcdy,
715                   plane[0].eo);
716
717      debug_printf("p1: %"PRIx64"/%08x/%08x/%08x\n",
718                   plane[1].c,
719                   plane[1].dcdx,
720                   plane[1].dcdy,
721                   plane[1].eo);
722
723      debug_printf("p2: %"PRIx64"/%08x/%08x/%08x\n",
724                   plane[2].c,
725                   plane[2].dcdx,
726                   plane[2].dcdy,
727                   plane[2].eo);
728   }
729
730   if (nr_planes > 3) {
731      lp_setup_add_scissor_planes(scissor, &plane[3], s_planes, setup->multisample);
732   }
733
734   return lp_setup_bin_triangle(setup, tri, use_32bits,
735                                check_opaque(setup, v0, v1, v2),
736                                &bbox, nr_planes, viewport_index);
737}
738
739/*
740 * Round to nearest less or equal power of two of the input.
741 *
742 * Undefined if no bit set exists, so code should check against 0 first.
743 */
744static inline uint32_t
745floor_pot(uint32_t n)
746{
747#if defined(PIPE_CC_GCC) && (defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64))
748   if (n == 0)
749      return 0;
750
751   __asm__("bsr %1,%0"
752          : "=r" (n)
753          : "rm" (n)
754          : "cc");
755   return 1 << n;
756#else
757   n |= (n >>  1);
758   n |= (n >>  2);
759   n |= (n >>  4);
760   n |= (n >>  8);
761   n |= (n >> 16);
762   return n - (n >> 1);
763#endif
764}
765
766
767boolean
768lp_setup_bin_triangle(struct lp_setup_context *setup,
769                      struct lp_rast_triangle *tri,
770                      boolean use_32bits,
771                      boolean opaque,
772                      const struct u_rect *bbox,
773                      int nr_planes,
774                      unsigned viewport_index)
775{
776   struct lp_scene *scene = setup->scene;
777   unsigned cmd;
778
779   /* What is the largest power-of-two boundary this triangle crosses:
780    */
781   const int dx = floor_pot((bbox->x0 ^ bbox->x1) |
782                            (bbox->y0 ^ bbox->y1));
783
784   /* The largest dimension of the rasterized area of the triangle
785    * (aligned to a 4x4 grid), rounded down to the nearest power of two:
786    */
787   const int max_sz = ((bbox->x1 - (bbox->x0 & ~3)) |
788                       (bbox->y1 - (bbox->y0 & ~3)));
789   const int sz = floor_pot(max_sz);
790
791   /*
792    * NOTE: It is important to use the original bounding box
793    * which might contain negative values here, because if the
794    * plane math may overflow or not with the 32bit rasterization
795    * functions depends on the original extent of the triangle.
796    */
797
798   /* Now apply scissor, etc to the bounding box.  Could do this
799    * earlier, but it confuses the logic for tri-16 and would force
800    * the rasterizer to also respect scissor, etc, just for the rare
801    * cases where a small triangle extends beyond the scissor.
802    */
803   struct u_rect trimmed_box = *bbox;
804   u_rect_find_intersection(&setup->draw_regions[viewport_index],
805                            &trimmed_box);
806
807   /* Determine which tile(s) intersect the triangle's bounding box
808    */
809   if (dx < TILE_SIZE) {
810      const int ix0 = bbox->x0 / TILE_SIZE;
811      const int iy0 = bbox->y0 / TILE_SIZE;
812      unsigned px = bbox->x0 & 63 & ~3;
813      unsigned py = bbox->y0 & 63 & ~3;
814
815      assert(iy0 == bbox->y1 / TILE_SIZE &&
816             ix0 == bbox->x1 / TILE_SIZE);
817
818      if (nr_planes == 3) {
819         if (sz < 4) {
820            /* Triangle is contained in a single 4x4 stamp:
821             */
822            assert(px + 4 <= TILE_SIZE);
823            assert(py + 4 <= TILE_SIZE);
824            if (setup->multisample)
825               cmd = LP_RAST_OP_MS_TRIANGLE_3_4;
826            else
827               cmd = use_32bits ? LP_RAST_OP_TRIANGLE_32_3_4 : LP_RAST_OP_TRIANGLE_3_4;
828            return lp_scene_bin_cmd_with_state(scene, ix0, iy0,
829                                               setup->fs.stored, cmd,
830                                               lp_rast_arg_triangle_contained(tri, px, py));
831         }
832
833         if (sz < 16) {
834            /* Triangle is contained in a single 16x16 block:
835             */
836
837            /*
838             * The 16x16 block is only 4x4 aligned, and can exceed the tile
839             * dimensions if the triangle is 16 pixels in one dimension but 4
840             * in the other. So budge the 16x16 back inside the tile.
841             */
842            px = MIN2(px, TILE_SIZE - 16);
843            py = MIN2(py, TILE_SIZE - 16);
844
845            assert(px + 16 <= TILE_SIZE);
846            assert(py + 16 <= TILE_SIZE);
847
848            if (setup->multisample)
849               cmd = LP_RAST_OP_MS_TRIANGLE_3_16;
850            else
851               cmd = use_32bits ? LP_RAST_OP_TRIANGLE_32_3_16 : LP_RAST_OP_TRIANGLE_3_16;
852            return lp_scene_bin_cmd_with_state(scene, ix0, iy0,
853                                               setup->fs.stored, cmd,
854                                               lp_rast_arg_triangle_contained(tri, px, py));
855         }
856      } else if (nr_planes == 4 && sz < 16) {
857         px = MIN2(px, TILE_SIZE - 16);
858         py = MIN2(py, TILE_SIZE - 16);
859
860         assert(px + 16 <= TILE_SIZE);
861         assert(py + 16 <= TILE_SIZE);
862
863         if (setup->multisample)
864            cmd = LP_RAST_OP_MS_TRIANGLE_4_16;
865         else
866            cmd = use_32bits ? LP_RAST_OP_TRIANGLE_32_4_16 : LP_RAST_OP_TRIANGLE_4_16;
867         return lp_scene_bin_cmd_with_state(scene, ix0, iy0,
868                                            setup->fs.stored, cmd,
869                                            lp_rast_arg_triangle_contained(tri, px, py));
870      }
871
872      /* Triangle is contained in a single tile:
873       */
874      if (setup->multisample)
875         cmd = lp_rast_ms_tri_tab[nr_planes];
876      else
877         cmd = use_32bits ? lp_rast_32_tri_tab[nr_planes] : lp_rast_tri_tab[nr_planes];
878      return lp_scene_bin_cmd_with_state(scene, ix0, iy0, setup->fs.stored, cmd,
879                                  lp_rast_arg_triangle(tri, (1<<nr_planes)-1));
880   } else {
881      struct lp_rast_plane *plane = GET_PLANES(tri);
882      int64_t c[MAX_PLANES];
883      int64_t ei[MAX_PLANES];
884
885      int64_t eo[MAX_PLANES];
886      int64_t xstep[MAX_PLANES];
887      int64_t ystep[MAX_PLANES];
888      int x, y;
889
890      const int ix0 = trimmed_box.x0 / TILE_SIZE;
891      const int iy0 = trimmed_box.y0 / TILE_SIZE;
892      const int ix1 = trimmed_box.x1 / TILE_SIZE;
893      const int iy1 = trimmed_box.y1 / TILE_SIZE;
894
895      for (int i = 0; i < nr_planes; i++) {
896         c[i] = (plane[i].c +
897                 IMUL64(plane[i].dcdy, iy0) * TILE_SIZE -
898                 IMUL64(plane[i].dcdx, ix0) * TILE_SIZE);
899
900         ei[i] = (plane[i].dcdy -
901                  plane[i].dcdx -
902                  (int64_t)plane[i].eo) << TILE_ORDER;
903
904         eo[i] = (int64_t)plane[i].eo << TILE_ORDER;
905         xstep[i] = -(((int64_t)plane[i].dcdx) << TILE_ORDER);
906         ystep[i] = ((int64_t)plane[i].dcdy) << TILE_ORDER;
907      }
908
909      tri->inputs.is_blit = lp_setup_is_blit(setup, &tri->inputs);
910
911      /* Test tile-sized blocks against the triangle.
912       * Discard blocks fully outside the tri.  If the block is fully
913       * contained inside the tri, bin an lp_rast_shade_tile command.
914       * Else, bin a lp_rast_triangle command.
915       */
916      for (y = iy0; y <= iy1; y++) {
917         boolean in = FALSE;  /* are we inside the triangle? */
918         int64_t cx[MAX_PLANES];
919
920         for (int i = 0; i < nr_planes; i++)
921            cx[i] = c[i];
922
923         for (x = ix0; x <= ix1; x++) {
924            int out = 0;
925            int partial = 0;
926
927            for (int i = 0; i < nr_planes; i++) {
928               int64_t planeout = cx[i] + eo[i];
929               int64_t planepartial = cx[i] + ei[i] - 1;
930               out |= (int) (planeout >> 63);
931               partial |= ((int) (planepartial >> 63)) & (1<<i);
932            }
933
934            if (out) {
935               /* do nothing */
936               if (in)
937                  break;  /* exiting triangle, all done with this row */
938               LP_COUNT(nr_empty_64);
939            } else if (partial) {
940               /* Not trivially accepted by at least one plane -
941                * rasterize/shade partial tile
942                */
943               int count = util_bitcount(partial);
944               in = TRUE;
945
946               if (setup->multisample)
947                  cmd = lp_rast_ms_tri_tab[count];
948               else
949                  cmd = use_32bits ? lp_rast_32_tri_tab[count] : lp_rast_tri_tab[count];
950               if (!lp_scene_bin_cmd_with_state(scene, x, y,
951                                                setup->fs.stored, cmd,
952                                                lp_rast_arg_triangle(tri, partial)))
953                  goto fail;
954
955               LP_COUNT(nr_partially_covered_64);
956            } else {
957               /* triangle covers the whole tile- shade whole tile */
958               LP_COUNT(nr_fully_covered_64);
959               in = TRUE;
960               if (!lp_setup_whole_tile(setup, &tri->inputs, x, y, opaque))
961                  goto fail;
962            }
963
964            /* Iterate cx values across the region: */
965            for (int i = 0; i < nr_planes; i++)
966               cx[i] += xstep[i];
967         }
968
969         /* Iterate c values down the region: */
970         for (int i = 0; i < nr_planes; i++)
971            c[i] += ystep[i];
972      }
973   }
974
975   return TRUE;
976
977fail:
978   /* Need to disable any partially binned triangle.  This is easier
979    * than trying to locate all the triangle, shade-tile, etc,
980    * commands which may have been binned.
981    */
982   tri->inputs.disable = TRUE;
983   return FALSE;
984}
985
986
987/**
988 * Try to draw the triangle, restart the scene on failure.
989 */
990static inline void
991retry_triangle_ccw(struct lp_setup_context *setup,
992                   struct fixed_position *position,
993                   const float (*v0)[4],
994                   const float (*v1)[4],
995                   const float (*v2)[4],
996                   boolean front)
997{
998   if (!do_triangle_ccw(setup, position, v0, v1, v2, front)) {
999      if (!lp_setup_flush_and_restart(setup))
1000         return;
1001
1002      if (!do_triangle_ccw(setup, position, v0, v1, v2, front))
1003         return;
1004   }
1005}
1006
1007
1008/**
1009 * Calculate fixed position data for a triangle
1010 * It is unfortunate we need to do that here (as we need area
1011 * calculated in fixed point), as there's quite some code duplication
1012 * to what is done in the jit setup prog.
1013 */
1014static inline int8_t
1015calc_fixed_position(struct lp_setup_context *setup,
1016                    struct fixed_position* position,
1017                    const float (*v0)[4],
1018                    const float (*v1)[4],
1019                    const float (*v2)[4])
1020{
1021   float pixel_offset = setup->multisample ? 0.0 : setup->pixel_offset;
1022   /*
1023    * The rounding may not be quite the same with PIPE_ARCH_SSE
1024    * (util_iround right now only does nearest/even on x87,
1025    * otherwise nearest/away-from-zero).
1026    * Both should be acceptable, I think.
1027    */
1028#if defined(PIPE_ARCH_SSE)
1029   __m128 v0r, v1r;
1030   __m128 vxy0xy2, vxy1xy0;
1031   __m128i vxy0xy2i, vxy1xy0i;
1032   __m128i dxdy0120, x0x2y0y2, x1x0y1y0, x0120, y0120;
1033   __m128 pix_offset = _mm_set1_ps(pixel_offset);
1034   __m128 fixed_one = _mm_set1_ps((float)FIXED_ONE);
1035   v0r = _mm_castpd_ps(_mm_load_sd((double *)v0[0]));
1036   vxy0xy2 = _mm_loadh_pi(v0r, (__m64 *)v2[0]);
1037   v1r = _mm_castpd_ps(_mm_load_sd((double *)v1[0]));
1038   vxy1xy0 = _mm_movelh_ps(v1r, vxy0xy2);
1039   vxy0xy2 = _mm_sub_ps(vxy0xy2, pix_offset);
1040   vxy1xy0 = _mm_sub_ps(vxy1xy0, pix_offset);
1041   vxy0xy2 = _mm_mul_ps(vxy0xy2, fixed_one);
1042   vxy1xy0 = _mm_mul_ps(vxy1xy0, fixed_one);
1043   vxy0xy2i = _mm_cvtps_epi32(vxy0xy2);
1044   vxy1xy0i = _mm_cvtps_epi32(vxy1xy0);
1045   dxdy0120 = _mm_sub_epi32(vxy0xy2i, vxy1xy0i);
1046   _mm_store_si128((__m128i *)&position->dx01, dxdy0120);
1047   /*
1048    * For the mul, would need some more shuffles, plus emulation
1049    * for the signed mul (without sse41), so don't bother.
1050    */
1051   x0x2y0y2 = _mm_shuffle_epi32(vxy0xy2i, _MM_SHUFFLE(3,1,2,0));
1052   x1x0y1y0 = _mm_shuffle_epi32(vxy1xy0i, _MM_SHUFFLE(3,1,2,0));
1053   x0120 = _mm_unpacklo_epi32(x0x2y0y2, x1x0y1y0);
1054   y0120 = _mm_unpackhi_epi32(x0x2y0y2, x1x0y1y0);
1055   _mm_store_si128((__m128i *)&position->x[0], x0120);
1056   _mm_store_si128((__m128i *)&position->y[0], y0120);
1057
1058#else
1059   position->x[0] = subpixel_snap(v0[0][0] - pixel_offset);
1060   position->x[1] = subpixel_snap(v1[0][0] - pixel_offset);
1061   position->x[2] = subpixel_snap(v2[0][0] - pixel_offset);
1062   position->x[3] = 0; // should be unused
1063
1064   position->y[0] = subpixel_snap(v0[0][1] - pixel_offset);
1065   position->y[1] = subpixel_snap(v1[0][1] - pixel_offset);
1066   position->y[2] = subpixel_snap(v2[0][1] - pixel_offset);
1067   position->y[3] = 0; // should be unused
1068
1069   position->dx01 = position->x[0] - position->x[1];
1070   position->dy01 = position->y[0] - position->y[1];
1071
1072   position->dx20 = position->x[2] - position->x[0];
1073   position->dy20 = position->y[2] - position->y[0];
1074#endif
1075
1076   uint64_t area = IMUL64(position->dx01, position->dy20) -
1077      IMUL64(position->dx20, position->dy01);
1078   return area == 0 ? 0 : (area & (1ULL << 63)) ? -1 : 1;
1079}
1080
1081
1082/**
1083 * Rotate a triangle, flipping its clockwise direction,
1084 * Swaps values for xy[0] and xy[1]
1085 */
1086static inline void
1087rotate_fixed_position_01(struct fixed_position* position)
1088{
1089   int x = position->x[1];
1090   int y = position->y[1];
1091
1092   position->x[1] = position->x[0];
1093   position->y[1] = position->y[0];
1094   position->x[0] = x;
1095   position->y[0] = y;
1096
1097   position->dx01 = -position->dx01;
1098   position->dy01 = -position->dy01;
1099   position->dx20 = position->x[2] - position->x[0];
1100   position->dy20 = position->y[2] - position->y[0];
1101}
1102
1103
1104/**
1105 * Rotate a triangle, flipping its clockwise direction,
1106 * Swaps values for xy[1] and xy[2]
1107 */
1108static inline void
1109rotate_fixed_position_12(struct fixed_position* position)
1110{
1111   int x = position->x[2];
1112   int y = position->y[2];
1113
1114   position->x[2] = position->x[1];
1115   position->y[2] = position->y[1];
1116   position->x[1] = x;
1117   position->y[1] = y;
1118
1119   x = position->dx01;
1120   y = position->dy01;
1121   position->dx01 = -position->dx20;
1122   position->dy01 = -position->dy20;
1123   position->dx20 = -x;
1124   position->dy20 = -y;
1125}
1126
1127
1128/**
1129 * Draw triangle if it's CW, cull otherwise.
1130 */
1131static void
1132triangle_cw(struct lp_setup_context *setup,
1133            const float (*v0)[4],
1134            const float (*v1)[4],
1135            const float (*v2)[4])
1136{
1137   alignas(16) struct fixed_position position;
1138   struct llvmpipe_context *lp_context = (struct llvmpipe_context *)setup->pipe;
1139
1140   if (lp_context->active_statistics_queries) {
1141      lp_context->pipeline_statistics.c_primitives++;
1142   }
1143
1144   int8_t area_sign = calc_fixed_position(setup, &position, v0, v1, v2);
1145
1146   if (area_sign < 0) {
1147      if (setup->flatshade_first) {
1148         rotate_fixed_position_12(&position);
1149         retry_triangle_ccw(setup, &position, v0, v2, v1, !setup->ccw_is_frontface);
1150      } else {
1151         rotate_fixed_position_01(&position);
1152         retry_triangle_ccw(setup, &position, v1, v0, v2, !setup->ccw_is_frontface);
1153      }
1154   }
1155}
1156
1157
1158static void
1159triangle_ccw(struct lp_setup_context *setup,
1160             const float (*v0)[4],
1161             const float (*v1)[4],
1162             const float (*v2)[4])
1163{
1164   alignas(16) struct fixed_position position;
1165   struct llvmpipe_context *lp_context = (struct llvmpipe_context *)setup->pipe;
1166
1167   if (lp_context->active_statistics_queries) {
1168      lp_context->pipeline_statistics.c_primitives++;
1169   }
1170
1171   int8_t area_sign = calc_fixed_position(setup, &position, v0, v1, v2);
1172
1173   if (area_sign > 0)
1174      retry_triangle_ccw(setup, &position, v0, v1, v2, setup->ccw_is_frontface);
1175}
1176
1177
1178/**
1179 * Draw triangle whether it's CW or CCW.
1180 */
1181static void
1182triangle_both(struct lp_setup_context *setup,
1183              const float (*v0)[4],
1184              const float (*v1)[4],
1185              const float (*v2)[4])
1186{
1187   alignas(16) struct fixed_position position;
1188   struct llvmpipe_context *lp_context = (struct llvmpipe_context *)setup->pipe;
1189
1190   if (lp_context->active_statistics_queries) {
1191      lp_context->pipeline_statistics.c_primitives++;
1192   }
1193
1194   int8_t area_sign = calc_fixed_position(setup, &position, v0, v1, v2);
1195
1196   if (0) {
1197      assert(!util_is_inf_or_nan(v0[0][0]));
1198      assert(!util_is_inf_or_nan(v0[0][1]));
1199      assert(!util_is_inf_or_nan(v1[0][0]));
1200      assert(!util_is_inf_or_nan(v1[0][1]));
1201      assert(!util_is_inf_or_nan(v2[0][0]));
1202      assert(!util_is_inf_or_nan(v2[0][1]));
1203   }
1204
1205   if (area_sign > 0) {
1206      retry_triangle_ccw(setup, &position, v0, v1, v2, setup->ccw_is_frontface);
1207   } else if (area_sign < 0) {
1208      if (setup->flatshade_first) {
1209         rotate_fixed_position_12(&position);
1210         retry_triangle_ccw(setup, &position, v0, v2, v1, !setup->ccw_is_frontface);
1211      } else {
1212         rotate_fixed_position_01(&position);
1213         retry_triangle_ccw(setup, &position, v1, v0, v2, !setup->ccw_is_frontface);
1214      }
1215   }
1216}
1217
1218
1219static void
1220triangle_noop(struct lp_setup_context *setup,
1221              const float (*v0)[4],
1222              const float (*v1)[4],
1223              const float (*v2)[4])
1224{
1225}
1226
1227
1228void
1229lp_setup_choose_triangle(struct lp_setup_context *setup)
1230{
1231   if (setup->rasterizer_discard) {
1232      setup->triangle = triangle_noop;
1233      return;
1234   }
1235   switch (setup->cullmode) {
1236   case PIPE_FACE_NONE:
1237      setup->triangle = triangle_both;
1238      break;
1239   case PIPE_FACE_BACK:
1240      setup->triangle = setup->ccw_is_frontface ? triangle_ccw : triangle_cw;
1241      break;
1242   case PIPE_FACE_FRONT:
1243      setup->triangle = setup->ccw_is_frontface ? triangle_cw : triangle_ccw;
1244      break;
1245   default:
1246      setup->triangle = triangle_noop;
1247      break;
1248   }
1249}
1250