1/*
2 * Copyright © 2006 - 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "brw_compiler.h"
25#include "brw_eu.h"
26#include "brw_prim.h"
27
28#include "dev/intel_debug.h"
29
30struct brw_sf_compile {
31   struct brw_codegen func;
32   struct brw_sf_prog_key key;
33   struct brw_sf_prog_data prog_data;
34
35   struct brw_reg pv;
36   struct brw_reg det;
37   struct brw_reg dx0;
38   struct brw_reg dx2;
39   struct brw_reg dy0;
40   struct brw_reg dy2;
41
42   /* z and 1/w passed in separately:
43    */
44   struct brw_reg z[3];
45   struct brw_reg inv_w[3];
46
47   /* The vertices:
48    */
49   struct brw_reg vert[3];
50
51    /* Temporaries, allocated after last vertex reg.
52    */
53   struct brw_reg inv_det;
54   struct brw_reg a1_sub_a0;
55   struct brw_reg a2_sub_a0;
56   struct brw_reg tmp;
57
58   struct brw_reg m1Cx;
59   struct brw_reg m2Cy;
60   struct brw_reg m3C0;
61
62   GLuint nr_verts;
63   GLuint nr_attr_regs;
64   GLuint nr_setup_regs;
65   int urb_entry_read_offset;
66
67   /** The last known value of the f0.0 flag register. */
68   unsigned flag_value;
69
70   struct brw_vue_map vue_map;
71};
72
73/**
74 * Determine the vue slot corresponding to the given half of the given register.
75 */
76static inline int vert_reg_to_vue_slot(struct brw_sf_compile *c, GLuint reg,
77                                       int half)
78{
79   return (reg + c->urb_entry_read_offset) * 2 + half;
80}
81
82/**
83 * Determine the varying corresponding to the given half of the given
84 * register.  half=0 means the first half of a register, half=1 means the
85 * second half.
86 */
87static inline int vert_reg_to_varying(struct brw_sf_compile *c, GLuint reg,
88                                      int half)
89{
90   int vue_slot = vert_reg_to_vue_slot(c, reg, half);
91   return c->vue_map.slot_to_varying[vue_slot];
92}
93
94/**
95 * Determine the register corresponding to the given vue slot
96 */
97static struct brw_reg get_vue_slot(struct brw_sf_compile *c,
98                                   struct brw_reg vert,
99                                   int vue_slot)
100{
101   GLuint off = vue_slot / 2 - c->urb_entry_read_offset;
102   GLuint sub = vue_slot % 2;
103
104   return brw_vec4_grf(vert.nr + off, sub * 4);
105}
106
107/**
108 * Determine the register corresponding to the given varying.
109 */
110static struct brw_reg get_varying(struct brw_sf_compile *c,
111                                  struct brw_reg vert,
112                                  GLuint varying)
113{
114   int vue_slot = c->vue_map.varying_to_slot[varying];
115   assert (vue_slot >= c->urb_entry_read_offset);
116   return get_vue_slot(c, vert, vue_slot);
117}
118
119static bool
120have_attr(struct brw_sf_compile *c, GLuint attr)
121{
122   return (c->key.attrs & BITFIELD64_BIT(attr)) ? 1 : 0;
123}
124
125/***********************************************************************
126 * Twoside lighting
127 */
128static void copy_bfc( struct brw_sf_compile *c,
129		      struct brw_reg vert )
130{
131   struct brw_codegen *p = &c->func;
132   GLuint i;
133
134   for (i = 0; i < 2; i++) {
135      if (have_attr(c, VARYING_SLOT_COL0+i) &&
136	  have_attr(c, VARYING_SLOT_BFC0+i))
137	 brw_MOV(p,
138		 get_varying(c, vert, VARYING_SLOT_COL0+i),
139		 get_varying(c, vert, VARYING_SLOT_BFC0+i));
140   }
141}
142
143
144static void do_twoside_color( struct brw_sf_compile *c )
145{
146   struct brw_codegen *p = &c->func;
147   GLuint backface_conditional = c->key.frontface_ccw ? BRW_CONDITIONAL_G : BRW_CONDITIONAL_L;
148
149   /* Already done in clip program:
150    */
151   if (c->key.primitive == BRW_SF_PRIM_UNFILLED_TRIS)
152      return;
153
154   /* If the vertex shader provides backface color, do the selection. The VS
155    * promises to set up the front color if the backface color is provided, but
156    * it may contain junk if never written to.
157    */
158   if (!(have_attr(c, VARYING_SLOT_COL0) && have_attr(c, VARYING_SLOT_BFC0)) &&
159       !(have_attr(c, VARYING_SLOT_COL1) && have_attr(c, VARYING_SLOT_BFC1)))
160      return;
161
162   /* Need to use BRW_EXECUTE_4 and also do an 4-wide compare in order
163    * to get all channels active inside the IF.  In the clipping code
164    * we run with NoMask, so it's not an option and we can use
165    * BRW_EXECUTE_1 for all comparisons.
166    */
167   brw_CMP(p, vec4(brw_null_reg()), backface_conditional, c->det, brw_imm_f(0));
168   brw_IF(p, BRW_EXECUTE_4);
169   {
170      switch (c->nr_verts) {
171      case 3: copy_bfc(c, c->vert[2]); FALLTHROUGH;
172      case 2: copy_bfc(c, c->vert[1]); FALLTHROUGH;
173      case 1: copy_bfc(c, c->vert[0]);
174      }
175   }
176   brw_ENDIF(p);
177}
178
179
180
181/***********************************************************************
182 * Flat shading
183 */
184
185static void copy_flatshaded_attributes(struct brw_sf_compile *c,
186                                       struct brw_reg dst,
187                                       struct brw_reg src)
188{
189   struct brw_codegen *p = &c->func;
190   int i;
191
192   for (i = 0; i < c->vue_map.num_slots; i++) {
193      if (c->key.interp_mode[i] == INTERP_MODE_FLAT) {
194         brw_MOV(p,
195                 get_vue_slot(c, dst, i),
196                 get_vue_slot(c, src, i));
197      }
198   }
199}
200
201static int count_flatshaded_attributes(struct brw_sf_compile *c)
202{
203   int i;
204   int count = 0;
205
206   for (i = 0; i < c->vue_map.num_slots; i++)
207      if (c->key.interp_mode[i] == INTERP_MODE_FLAT)
208         count++;
209
210   return count;
211}
212
213
214
215/* Need to use a computed jump to copy flatshaded attributes as the
216 * vertices are ordered according to y-coordinate before reaching this
217 * point, so the PV could be anywhere.
218 */
219static void do_flatshade_triangle( struct brw_sf_compile *c )
220{
221   struct brw_codegen *p = &c->func;
222   GLuint nr;
223   GLuint jmpi = 1;
224
225   /* Already done in clip program:
226    */
227   if (c->key.primitive == BRW_SF_PRIM_UNFILLED_TRIS)
228      return;
229
230   if (p->devinfo->ver == 5)
231       jmpi = 2;
232
233   nr = count_flatshaded_attributes(c);
234
235   brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr*2+1)));
236   brw_JMPI(p, c->pv, BRW_PREDICATE_NONE);
237
238   copy_flatshaded_attributes(c, c->vert[1], c->vert[0]);
239   copy_flatshaded_attributes(c, c->vert[2], c->vert[0]);
240   brw_JMPI(p, brw_imm_d(jmpi*(nr*4+1)), BRW_PREDICATE_NONE);
241
242   copy_flatshaded_attributes(c, c->vert[0], c->vert[1]);
243   copy_flatshaded_attributes(c, c->vert[2], c->vert[1]);
244   brw_JMPI(p, brw_imm_d(jmpi*nr*2), BRW_PREDICATE_NONE);
245
246   copy_flatshaded_attributes(c, c->vert[0], c->vert[2]);
247   copy_flatshaded_attributes(c, c->vert[1], c->vert[2]);
248}
249
250
251static void do_flatshade_line( struct brw_sf_compile *c )
252{
253   struct brw_codegen *p = &c->func;
254   GLuint nr;
255   GLuint jmpi = 1;
256
257   /* Already done in clip program:
258    */
259   if (c->key.primitive == BRW_SF_PRIM_UNFILLED_TRIS)
260      return;
261
262   if (p->devinfo->ver == 5)
263       jmpi = 2;
264
265   nr = count_flatshaded_attributes(c);
266
267   brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr+1)));
268   brw_JMPI(p, c->pv, BRW_PREDICATE_NONE);
269   copy_flatshaded_attributes(c, c->vert[1], c->vert[0]);
270
271   brw_JMPI(p, brw_imm_ud(jmpi*nr), BRW_PREDICATE_NONE);
272   copy_flatshaded_attributes(c, c->vert[0], c->vert[1]);
273}
274
275
276/***********************************************************************
277 * Triangle setup.
278 */
279
280
281static void alloc_regs( struct brw_sf_compile *c )
282{
283   GLuint reg, i;
284
285   /* Values computed by fixed function unit:
286    */
287   c->pv  = retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_D);
288   c->det = brw_vec1_grf(1, 2);
289   c->dx0 = brw_vec1_grf(1, 3);
290   c->dx2 = brw_vec1_grf(1, 4);
291   c->dy0 = brw_vec1_grf(1, 5);
292   c->dy2 = brw_vec1_grf(1, 6);
293
294   /* z and 1/w passed in separately:
295    */
296   c->z[0]     = brw_vec1_grf(2, 0);
297   c->inv_w[0] = brw_vec1_grf(2, 1);
298   c->z[1]     = brw_vec1_grf(2, 2);
299   c->inv_w[1] = brw_vec1_grf(2, 3);
300   c->z[2]     = brw_vec1_grf(2, 4);
301   c->inv_w[2] = brw_vec1_grf(2, 5);
302
303   /* The vertices:
304    */
305   reg = 3;
306   for (i = 0; i < c->nr_verts; i++) {
307      c->vert[i] = brw_vec8_grf(reg, 0);
308      reg += c->nr_attr_regs;
309   }
310
311   /* Temporaries, allocated after last vertex reg.
312    */
313   c->inv_det = brw_vec1_grf(reg, 0);  reg++;
314   c->a1_sub_a0 = brw_vec8_grf(reg, 0);  reg++;
315   c->a2_sub_a0 = brw_vec8_grf(reg, 0);  reg++;
316   c->tmp = brw_vec8_grf(reg, 0);  reg++;
317
318   /* Note grf allocation:
319    */
320   c->prog_data.total_grf = reg;
321
322
323   /* Outputs of this program - interpolation coefficients for
324    * rasterization:
325    */
326   c->m1Cx = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 1, 0);
327   c->m2Cy = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 2, 0);
328   c->m3C0 = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 3, 0);
329}
330
331
332static void copy_z_inv_w( struct brw_sf_compile *c )
333{
334   struct brw_codegen *p = &c->func;
335   GLuint i;
336
337   /* Copy both scalars with a single MOV:
338    */
339   for (i = 0; i < c->nr_verts; i++)
340      brw_MOV(p, vec2(suboffset(c->vert[i], 2)), vec2(c->z[i]));
341}
342
343
344static void invert_det( struct brw_sf_compile *c)
345{
346   /* Looks like we invert all 8 elements just to get 1/det in
347    * position 2 !?!
348    */
349   gfx4_math(&c->func,
350	     c->inv_det,
351	     BRW_MATH_FUNCTION_INV,
352	     0,
353	     c->det,
354	     BRW_MATH_PRECISION_FULL);
355
356}
357
358
359static bool
360calculate_masks(struct brw_sf_compile *c,
361                GLuint reg,
362                GLushort *pc,
363                GLushort *pc_persp,
364                GLushort *pc_linear)
365{
366   bool is_last_attr = (reg == c->nr_setup_regs - 1);
367   enum glsl_interp_mode interp;
368
369   *pc_persp = 0;
370   *pc_linear = 0;
371   *pc = 0xf;
372
373   interp = c->key.interp_mode[vert_reg_to_vue_slot(c, reg, 0)];
374   if (interp == INTERP_MODE_SMOOTH) {
375      *pc_linear = 0xf;
376      *pc_persp = 0xf;
377   } else if (interp == INTERP_MODE_NOPERSPECTIVE)
378      *pc_linear = 0xf;
379
380   /* Maybe only process one attribute on the final round:
381    */
382   if (vert_reg_to_varying(c, reg, 1) != BRW_VARYING_SLOT_COUNT) {
383      *pc |= 0xf0;
384
385      interp = c->key.interp_mode[vert_reg_to_vue_slot(c, reg, 1)];
386      if (interp == INTERP_MODE_SMOOTH) {
387         *pc_linear |= 0xf0;
388         *pc_persp |= 0xf0;
389      } else if (interp == INTERP_MODE_NOPERSPECTIVE)
390         *pc_linear |= 0xf0;
391   }
392
393   return is_last_attr;
394}
395
396/* Calculates the predicate control for which channels of a reg
397 * (containing 2 attrs) to do point sprite coordinate replacement on.
398 */
399static uint16_t
400calculate_point_sprite_mask(struct brw_sf_compile *c, GLuint reg)
401{
402   int varying1, varying2;
403   uint16_t pc = 0;
404
405   varying1 = vert_reg_to_varying(c, reg, 0);
406   if (varying1 >= VARYING_SLOT_TEX0 && varying1 <= VARYING_SLOT_TEX7) {
407      if (c->key.point_sprite_coord_replace & (1 << (varying1 - VARYING_SLOT_TEX0)))
408	 pc |= 0x0f;
409   }
410   if (varying1 == BRW_VARYING_SLOT_PNTC)
411      pc |= 0x0f;
412
413   varying2 = vert_reg_to_varying(c, reg, 1);
414   if (varying2 >= VARYING_SLOT_TEX0 && varying2 <= VARYING_SLOT_TEX7) {
415      if (c->key.point_sprite_coord_replace & (1 << (varying2 -
416                                                     VARYING_SLOT_TEX0)))
417         pc |= 0xf0;
418   }
419   if (varying2 == BRW_VARYING_SLOT_PNTC)
420      pc |= 0xf0;
421
422   return pc;
423}
424
425static void
426set_predicate_control_flag_value(struct brw_codegen *p,
427                                 struct brw_sf_compile *c,
428                                 unsigned value)
429{
430   brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
431
432   if (value != 0xff) {
433      if (value != c->flag_value) {
434         brw_MOV(p, brw_flag_reg(0, 0), brw_imm_uw(value));
435         c->flag_value = value;
436      }
437
438      brw_set_default_predicate_control(p, BRW_PREDICATE_NORMAL);
439   }
440}
441
442static void brw_emit_tri_setup(struct brw_sf_compile *c, bool allocate)
443{
444   struct brw_codegen *p = &c->func;
445   GLuint i;
446
447   c->flag_value = 0xff;
448   c->nr_verts = 3;
449
450   if (allocate)
451      alloc_regs(c);
452
453   invert_det(c);
454   copy_z_inv_w(c);
455
456   if (c->key.do_twoside_color)
457      do_twoside_color(c);
458
459   if (c->key.contains_flat_varying)
460      do_flatshade_triangle(c);
461
462
463   for (i = 0; i < c->nr_setup_regs; i++)
464   {
465      /* Pair of incoming attributes:
466       */
467      struct brw_reg a0 = offset(c->vert[0], i);
468      struct brw_reg a1 = offset(c->vert[1], i);
469      struct brw_reg a2 = offset(c->vert[2], i);
470      GLushort pc, pc_persp, pc_linear;
471      bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
472
473      if (pc_persp)
474      {
475	 set_predicate_control_flag_value(p, c, pc_persp);
476	 brw_MUL(p, a0, a0, c->inv_w[0]);
477	 brw_MUL(p, a1, a1, c->inv_w[1]);
478	 brw_MUL(p, a2, a2, c->inv_w[2]);
479      }
480
481
482      /* Calculate coefficients for interpolated values:
483       */
484      if (pc_linear)
485      {
486	 set_predicate_control_flag_value(p, c, pc_linear);
487
488	 brw_ADD(p, c->a1_sub_a0, a1, negate(a0));
489	 brw_ADD(p, c->a2_sub_a0, a2, negate(a0));
490
491	 /* calculate dA/dx
492	  */
493	 brw_MUL(p, brw_null_reg(), c->a1_sub_a0, c->dy2);
494	 brw_MAC(p, c->tmp, c->a2_sub_a0, negate(c->dy0));
495	 brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
496
497	 /* calculate dA/dy
498	  */
499	 brw_MUL(p, brw_null_reg(), c->a2_sub_a0, c->dx0);
500	 brw_MAC(p, c->tmp, c->a1_sub_a0, negate(c->dx2));
501	 brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
502      }
503
504      {
505	 set_predicate_control_flag_value(p, c, pc);
506	 /* start point for interpolation
507	  */
508	 brw_MOV(p, c->m3C0, a0);
509
510	 /* Copy m0..m3 to URB.  m0 is implicitly copied from r0 in
511	  * the send instruction:
512	  */
513	 brw_urb_WRITE(p,
514		       brw_null_reg(),
515		       0,
516		       brw_vec8_grf(0, 0), /* r0, will be copied to m0 */
517                       last ? BRW_URB_WRITE_EOT_COMPLETE
518                       : BRW_URB_WRITE_NO_FLAGS,
519		       4, 	/* msg len */
520		       0,	/* response len */
521		       i*4,	/* offset */
522		       BRW_URB_SWIZZLE_TRANSPOSE); /* XXX: Swizzle control "SF to windower" */
523      }
524   }
525
526   brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
527}
528
529
530
531static void brw_emit_line_setup(struct brw_sf_compile *c, bool allocate)
532{
533   struct brw_codegen *p = &c->func;
534   GLuint i;
535
536   c->flag_value = 0xff;
537   c->nr_verts = 2;
538
539   if (allocate)
540      alloc_regs(c);
541
542   invert_det(c);
543   copy_z_inv_w(c);
544
545   if (c->key.contains_flat_varying)
546      do_flatshade_line(c);
547
548   for (i = 0; i < c->nr_setup_regs; i++)
549   {
550      /* Pair of incoming attributes:
551       */
552      struct brw_reg a0 = offset(c->vert[0], i);
553      struct brw_reg a1 = offset(c->vert[1], i);
554      GLushort pc, pc_persp, pc_linear;
555      bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
556
557      if (pc_persp)
558      {
559	 set_predicate_control_flag_value(p, c, pc_persp);
560	 brw_MUL(p, a0, a0, c->inv_w[0]);
561	 brw_MUL(p, a1, a1, c->inv_w[1]);
562      }
563
564      /* Calculate coefficients for position, color:
565       */
566      if (pc_linear) {
567	 set_predicate_control_flag_value(p, c, pc_linear);
568
569	 brw_ADD(p, c->a1_sub_a0, a1, negate(a0));
570
571	 brw_MUL(p, c->tmp, c->a1_sub_a0, c->dx0);
572	 brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
573
574	 brw_MUL(p, c->tmp, c->a1_sub_a0, c->dy0);
575	 brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
576      }
577
578      {
579	 set_predicate_control_flag_value(p, c, pc);
580
581	 /* start point for interpolation
582	  */
583	 brw_MOV(p, c->m3C0, a0);
584
585	 /* Copy m0..m3 to URB.
586	  */
587	 brw_urb_WRITE(p,
588		       brw_null_reg(),
589		       0,
590		       brw_vec8_grf(0, 0),
591                       last ? BRW_URB_WRITE_EOT_COMPLETE
592                       : BRW_URB_WRITE_NO_FLAGS,
593		       4, 	/* msg len */
594		       0,	/* response len */
595		       i*4,	/* urb destination offset */
596		       BRW_URB_SWIZZLE_TRANSPOSE);
597      }
598   }
599
600   brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
601}
602
603static void brw_emit_point_sprite_setup(struct brw_sf_compile *c, bool allocate)
604{
605   struct brw_codegen *p = &c->func;
606   GLuint i;
607
608   c->flag_value = 0xff;
609   c->nr_verts = 1;
610
611   if (allocate)
612      alloc_regs(c);
613
614   copy_z_inv_w(c);
615   for (i = 0; i < c->nr_setup_regs; i++)
616   {
617      struct brw_reg a0 = offset(c->vert[0], i);
618      GLushort pc, pc_persp, pc_linear, pc_coord_replace;
619      bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
620
621      pc_coord_replace = calculate_point_sprite_mask(c, i);
622      pc_persp &= ~pc_coord_replace;
623
624      if (pc_persp) {
625	 set_predicate_control_flag_value(p, c, pc_persp);
626	 brw_MUL(p, a0, a0, c->inv_w[0]);
627      }
628
629      /* Point sprite coordinate replacement: A texcoord with this
630       * enabled gets replaced with the value (x, y, 0, 1) where x and
631       * y vary from 0 to 1 across the horizontal and vertical of the
632       * point.
633       */
634      if (pc_coord_replace) {
635	 set_predicate_control_flag_value(p, c, pc_coord_replace);
636	 /* Calculate 1.0/PointWidth */
637	 gfx4_math(&c->func,
638		   c->tmp,
639		   BRW_MATH_FUNCTION_INV,
640		   0,
641		   c->dx0,
642		   BRW_MATH_PRECISION_FULL);
643
644	 brw_set_default_access_mode(p, BRW_ALIGN_16);
645
646	 /* dA/dx, dA/dy */
647	 brw_MOV(p, c->m1Cx, brw_imm_f(0.0));
648	 brw_MOV(p, c->m2Cy, brw_imm_f(0.0));
649	 brw_MOV(p, brw_writemask(c->m1Cx, WRITEMASK_X), c->tmp);
650	 if (c->key.sprite_origin_lower_left) {
651	    brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), negate(c->tmp));
652	 } else {
653	    brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), c->tmp);
654	 }
655
656	 /* attribute constant offset */
657	 brw_MOV(p, c->m3C0, brw_imm_f(0.0));
658	 if (c->key.sprite_origin_lower_left) {
659	    brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_YW), brw_imm_f(1.0));
660	 } else {
661	    brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_W), brw_imm_f(1.0));
662	 }
663
664	 brw_set_default_access_mode(p, BRW_ALIGN_1);
665      }
666
667      if (pc & ~pc_coord_replace) {
668	 set_predicate_control_flag_value(p, c, pc & ~pc_coord_replace);
669	 brw_MOV(p, c->m1Cx, brw_imm_ud(0));
670	 brw_MOV(p, c->m2Cy, brw_imm_ud(0));
671	 brw_MOV(p, c->m3C0, a0); /* constant value */
672      }
673
674
675      set_predicate_control_flag_value(p, c, pc);
676      /* Copy m0..m3 to URB. */
677      brw_urb_WRITE(p,
678		    brw_null_reg(),
679		    0,
680		    brw_vec8_grf(0, 0),
681                    last ? BRW_URB_WRITE_EOT_COMPLETE
682                    : BRW_URB_WRITE_NO_FLAGS,
683		    4, 	/* msg len */
684		    0,	/* response len */
685		    i*4,	/* urb destination offset */
686		    BRW_URB_SWIZZLE_TRANSPOSE);
687   }
688
689   brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
690}
691
692/* Points setup - several simplifications as all attributes are
693 * constant across the face of the point (point sprites excluded!)
694 */
695static void brw_emit_point_setup(struct brw_sf_compile *c, bool allocate)
696{
697   struct brw_codegen *p = &c->func;
698   GLuint i;
699
700   c->flag_value = 0xff;
701   c->nr_verts = 1;
702
703   if (allocate)
704      alloc_regs(c);
705
706   copy_z_inv_w(c);
707
708   brw_MOV(p, c->m1Cx, brw_imm_ud(0)); /* zero - move out of loop */
709   brw_MOV(p, c->m2Cy, brw_imm_ud(0)); /* zero - move out of loop */
710
711   for (i = 0; i < c->nr_setup_regs; i++)
712   {
713      struct brw_reg a0 = offset(c->vert[0], i);
714      GLushort pc, pc_persp, pc_linear;
715      bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
716
717      if (pc_persp)
718      {
719	 /* This seems odd as the values are all constant, but the
720	  * fragment shader will be expecting it:
721	  */
722	 set_predicate_control_flag_value(p, c, pc_persp);
723	 brw_MUL(p, a0, a0, c->inv_w[0]);
724      }
725
726
727      /* The delta values are always zero, just send the starting
728       * coordinate.  Again, this is to fit in with the interpolation
729       * code in the fragment shader.
730       */
731      {
732	 set_predicate_control_flag_value(p, c, pc);
733
734	 brw_MOV(p, c->m3C0, a0); /* constant value */
735
736	 /* Copy m0..m3 to URB.
737	  */
738	 brw_urb_WRITE(p,
739		       brw_null_reg(),
740		       0,
741		       brw_vec8_grf(0, 0),
742                       last ? BRW_URB_WRITE_EOT_COMPLETE
743                       : BRW_URB_WRITE_NO_FLAGS,
744		       4, 	/* msg len */
745		       0,	/* response len */
746		       i*4,	/* urb destination offset */
747		       BRW_URB_SWIZZLE_TRANSPOSE);
748      }
749   }
750
751   brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
752}
753
754static void brw_emit_anyprim_setup( struct brw_sf_compile *c )
755{
756   struct brw_codegen *p = &c->func;
757   struct brw_reg payload_prim = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0);
758   struct brw_reg payload_attr = get_element_ud(brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0), 0);
759   struct brw_reg primmask;
760   int jmp;
761   struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
762
763   c->nr_verts = 3;
764   alloc_regs(c);
765
766   primmask = retype(get_element(c->tmp, 0), BRW_REGISTER_TYPE_UD);
767
768   brw_MOV(p, primmask, brw_imm_ud(1));
769   brw_SHL(p, primmask, primmask, payload_prim);
770
771   brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_TRILIST) |
772					       (1<<_3DPRIM_TRISTRIP) |
773					       (1<<_3DPRIM_TRIFAN) |
774					       (1<<_3DPRIM_TRISTRIP_REVERSE) |
775					       (1<<_3DPRIM_POLYGON) |
776					       (1<<_3DPRIM_RECTLIST) |
777					       (1<<_3DPRIM_TRIFAN_NOSTIPPLE)));
778   brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z);
779   jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store;
780   brw_emit_tri_setup(c, false);
781   brw_land_fwd_jump(p, jmp);
782
783   brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_LINELIST) |
784					       (1<<_3DPRIM_LINESTRIP) |
785					       (1<<_3DPRIM_LINELOOP) |
786					       (1<<_3DPRIM_LINESTRIP_CONT) |
787					       (1<<_3DPRIM_LINESTRIP_BF) |
788					       (1<<_3DPRIM_LINESTRIP_CONT_BF)));
789   brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z);
790   jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store;
791   brw_emit_line_setup(c, false);
792   brw_land_fwd_jump(p, jmp);
793
794   brw_AND(p, v1_null_ud, payload_attr, brw_imm_ud(1<<BRW_SPRITE_POINT_ENABLE));
795   brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z);
796   jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store;
797   brw_emit_point_sprite_setup(c, false);
798   brw_land_fwd_jump(p, jmp);
799
800   brw_emit_point_setup( c, false );
801}
802
803const unsigned *
804brw_compile_sf(const struct brw_compiler *compiler,
805               void *mem_ctx,
806               const struct brw_sf_prog_key *key,
807               struct brw_sf_prog_data *prog_data,
808               struct brw_vue_map *vue_map,
809               unsigned *final_assembly_size)
810{
811   struct brw_sf_compile c;
812   memset(&c, 0, sizeof(c));
813
814   /* Begin the compilation:
815    */
816   brw_init_codegen(&compiler->isa, &c.func, mem_ctx);
817
818   c.key = *key;
819   c.vue_map = *vue_map;
820   if (c.key.do_point_coord) {
821      /*
822       * gl_PointCoord is a FS instead of VS builtin variable, thus it's
823       * not included in c.vue_map generated in VS stage. Here we add
824       * it manually to let SF shader generate the needed interpolation
825       * coefficient for FS shader.
826       */
827      c.vue_map.varying_to_slot[BRW_VARYING_SLOT_PNTC] = c.vue_map.num_slots;
828      c.vue_map.slot_to_varying[c.vue_map.num_slots++] = BRW_VARYING_SLOT_PNTC;
829   }
830   c.urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET;
831   c.nr_attr_regs = (c.vue_map.num_slots + 1)/2 - c.urb_entry_read_offset;
832   c.nr_setup_regs = c.nr_attr_regs;
833
834   c.prog_data.urb_read_length = c.nr_attr_regs;
835   c.prog_data.urb_entry_size = c.nr_setup_regs * 2;
836
837   /* Which primitive?  Or all three?
838    */
839   switch (key->primitive) {
840   case BRW_SF_PRIM_TRIANGLES:
841      c.nr_verts = 3;
842      brw_emit_tri_setup( &c, true );
843      break;
844   case BRW_SF_PRIM_LINES:
845      c.nr_verts = 2;
846      brw_emit_line_setup( &c, true );
847      break;
848   case BRW_SF_PRIM_POINTS:
849      c.nr_verts = 1;
850      if (key->do_point_sprite)
851	  brw_emit_point_sprite_setup( &c, true );
852      else
853	  brw_emit_point_setup( &c, true );
854      break;
855   case BRW_SF_PRIM_UNFILLED_TRIS:
856      c.nr_verts = 3;
857      brw_emit_anyprim_setup( &c );
858      break;
859   default:
860      unreachable("not reached");
861   }
862
863   /* FINISHME: SF programs use calculated jumps (i.e., JMPI with a register
864    * source). Compacting would be difficult.
865    */
866   /* brw_compact_instructions(&c.func, 0, 0, NULL); */
867
868   *prog_data = c.prog_data;
869
870   const unsigned *program = brw_get_program(&c.func, final_assembly_size);
871
872   if (INTEL_DEBUG(DEBUG_SF)) {
873      fprintf(stderr, "sf:\n");
874      brw_disassemble_with_labels(&compiler->isa,
875                                  program, 0, *final_assembly_size, stderr);
876      fprintf(stderr, "\n");
877   }
878
879   return program;
880}
881