1/**********************************************************
2 * Copyright 1998-2022 VMware, Inc.  All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person
5 * obtaining a copy of this software and associated documentation
6 * files (the "Software"), to deal in the Software without
7 * restriction, including without limitation the rights to use, copy,
8 * modify, merge, publish, distribute, sublicense, and/or sell copies
9 * of the Software, and to permit persons to whom the Software is
10 * furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 **********************************************************/
25
26/**
27 * @file svga_tgsi_vgpu10.c
28 *
29 * TGSI -> VGPU10 shader translation.
30 *
31 * \author Mingcheng Chen
32 * \author Brian Paul
33 */
34
35#include "pipe/p_compiler.h"
36#include "pipe/p_shader_tokens.h"
37#include "pipe/p_defines.h"
38#include "tgsi/tgsi_build.h"
39#include "tgsi/tgsi_dump.h"
40#include "tgsi/tgsi_info.h"
41#include "tgsi/tgsi_parse.h"
42#include "tgsi/tgsi_scan.h"
43#include "tgsi/tgsi_strings.h"
44#include "tgsi/tgsi_two_side.h"
45#include "tgsi/tgsi_aa_point.h"
46#include "tgsi/tgsi_util.h"
47#include "util/u_math.h"
48#include "util/u_memory.h"
49#include "util/u_bitmask.h"
50#include "util/u_debug.h"
51#include "util/u_pstipple.h"
52
53#include "svga_context.h"
54#include "svga_debug.h"
55#include "svga_link.h"
56#include "svga_shader.h"
57#include "svga_tgsi.h"
58
59#include "VGPU10ShaderTokens.h"
60
61
62#define INVALID_INDEX 99999
63#define MAX_INTERNAL_TEMPS 4
64#define MAX_SYSTEM_VALUES 4
65#define MAX_IMMEDIATE_COUNT \
66        (VGPU10_MAX_IMMEDIATE_CONSTANT_BUFFER_ELEMENT_COUNT/4)
67#define MAX_TEMP_ARRAYS 64  /* Enough? */
68
69/**
70 * Clipping is complicated.  There's four different cases which we
71 * handle during VS/GS shader translation:
72 */
73enum clipping_mode
74{
75   CLIP_NONE,     /**< No clipping enabled */
76   CLIP_LEGACY,   /**< The shader has no clipping declarations or code but
77                   * one or more user-defined clip planes are enabled.  We
78                   * generate extra code to emit clip distances.
79                   */
80   CLIP_DISTANCE, /**< The shader already declares clip distance output
81                   * registers and has code to write to them.
82                   */
83   CLIP_VERTEX    /**< The shader declares a clip vertex output register and
84                  * has code that writes to the register.  We convert the
85                  * clipvertex position into one or more clip distances.
86                  */
87};
88
89
90/* Shader signature info */
91struct svga_shader_signature
92{
93   SVGA3dDXShaderSignatureHeader header;
94   SVGA3dDXShaderSignatureEntry inputs[PIPE_MAX_SHADER_INPUTS];
95   SVGA3dDXShaderSignatureEntry outputs[PIPE_MAX_SHADER_OUTPUTS];
96   SVGA3dDXShaderSignatureEntry patchConstants[PIPE_MAX_SHADER_OUTPUTS];
97};
98
99static inline void
100set_shader_signature_entry(SVGA3dDXShaderSignatureEntry *e,
101                           unsigned index,
102                           SVGA3dDXSignatureSemanticName sgnName,
103                           unsigned mask,
104                           SVGA3dDXSignatureRegisterComponentType compType,
105                           SVGA3dDXSignatureMinPrecision minPrecision)
106{
107   e->registerIndex = index;
108   e->semanticName = sgnName;
109   e->mask = mask;
110   e->componentType = compType;
111   e->minPrecision = minPrecision;
112};
113
114static const SVGA3dDXSignatureSemanticName
115tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_COUNT] = {
116   SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION,
117   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
118   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
119   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
120   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
121   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
122   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
123   SVGADX_SIGNATURE_SEMANTIC_NAME_IS_FRONT_FACE,
124   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
125   SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID,
126   SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID,
127   SVGADX_SIGNATURE_SEMANTIC_NAME_VERTEX_ID,
128   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
129   SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE,
130   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
131   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
132   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
133   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
134   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
135   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
136   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
137   SVGADX_SIGNATURE_SEMANTIC_NAME_VIEWPORT_ARRAY_INDEX,
138   SVGADX_SIGNATURE_SEMANTIC_NAME_RENDER_TARGET_ARRAY_INDEX,
139   SVGADX_SIGNATURE_SEMANTIC_NAME_SAMPLE_INDEX,
140   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
141   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
142   SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID,
143   SVGADX_SIGNATURE_SEMANTIC_NAME_VERTEX_ID,
144   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
145   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
146   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
147   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
148   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
149   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
150   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
151   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
152   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
153   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
154   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
155   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
156   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
157   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
158   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
159   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
160   SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
161};
162
163
164/**
165 * Map tgsi semantic name to SVGA signature semantic name
166 */
167static inline SVGA3dDXSignatureSemanticName
168map_tgsi_semantic_to_sgn_name(enum tgsi_semantic name)
169{
170   assert(name < TGSI_SEMANTIC_COUNT);
171
172   /* Do a few asserts here to spot check the mapping */
173   assert(tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_PRIMID] ==
174          SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID);
175   assert(tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_VIEWPORT_INDEX] ==
176          SVGADX_SIGNATURE_SEMANTIC_NAME_VIEWPORT_ARRAY_INDEX);
177   assert(tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_INVOCATIONID] ==
178          SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID);
179
180   return tgsi_semantic_to_sgn_name[name];
181}
182
183enum reemit_mode {
184   REEMIT_FALSE = 0,
185   REEMIT_TRUE = 1,
186   REEMIT_IN_PROGRESS = 2
187};
188
189struct svga_raw_buf_tmp {
190   bool indirect;
191   unsigned buffer_index:8;
192   unsigned element_index:8;
193   unsigned element_rel:8;
194};
195
196struct svga_shader_emitter_v10
197{
198   /* The token output buffer */
199   unsigned size;
200   char *buf;
201   char *ptr;
202
203   /* Information about the shader and state (does not change) */
204   struct svga_compile_key key;
205   struct tgsi_shader_info info;
206   unsigned unit;
207   unsigned version; /**< Either 40, 41, 50 or 51 at this time */
208
209   unsigned cur_tgsi_token;     /**< current tgsi token position */
210   unsigned inst_start_token;
211   boolean discard_instruction; /**< throw away current instruction? */
212   boolean reemit_instruction;  /**< reemit current instruction */
213   boolean reemit_tgsi_instruction;  /**< reemit current tgsi instruction */
214   boolean skip_instruction;    /**< skip current instruction */
215   boolean use_sampler_state_mapping; /* use sampler state mapping */
216   enum reemit_mode reemit_rawbuf_instruction;
217
218   union tgsi_immediate_data immediates[MAX_IMMEDIATE_COUNT][4];
219   double (*immediates_dbl)[2];
220   unsigned num_immediates;      /**< Number of immediates emitted */
221   unsigned common_immediate_pos[20];  /**< literals for common immediates */
222   unsigned num_common_immediates;
223   boolean immediates_emitted;
224
225   unsigned num_outputs;      /**< include any extra outputs */
226                              /**  The first extra output is reserved for
227                               *   non-adjusted vertex position for
228                               *   stream output purpose
229                               */
230
231   /* Temporary Registers */
232   unsigned num_shader_temps; /**< num of temps used by original shader */
233   unsigned internal_temp_count;  /**< currently allocated internal temps */
234   struct {
235      unsigned start, size;
236   } temp_arrays[MAX_TEMP_ARRAYS];
237   unsigned num_temp_arrays;
238
239   /** Map TGSI temp registers to VGPU10 temp array IDs and indexes */
240   struct {
241      unsigned arrayId, index;
242      boolean initialized;
243   } temp_map[VGPU10_MAX_TEMPS]; /**< arrayId, element */
244
245   unsigned initialize_temp_index;
246
247   /** Number of constants used by original shader for each constant buffer.
248    * The size should probably always match with that of svga_state.constbufs.
249    */
250   unsigned num_shader_consts[SVGA_MAX_CONST_BUFS];
251
252   /* Raw constant buffers */
253   unsigned raw_buf_srv_start_index;  /* starting srv index for raw buffers */
254   unsigned raw_bufs;                 /* raw buffers bitmask */
255   unsigned raw_buf_tmp_index;        /* starting temp index for raw buffers */
256   unsigned raw_buf_cur_tmp_index;    /* current temp index for raw buffers */
257   struct svga_raw_buf_tmp raw_buf_tmp[3]; /* temporaries for raw buf source */
258
259   /* Samplers */
260   unsigned num_samplers;
261   boolean sampler_view[PIPE_MAX_SAMPLERS];  /**< True if sampler view exists*/
262   ubyte sampler_target[PIPE_MAX_SAMPLERS];  /**< TGSI_TEXTURE_x */
263   ubyte sampler_return_type[PIPE_MAX_SAMPLERS];  /**< TGSI_RETURN_TYPE_x */
264
265   /* Images */
266   unsigned num_images;
267   unsigned image_mask;
268   struct tgsi_declaration_image image[PIPE_MAX_SHADER_IMAGES];
269   unsigned image_size_index;  /* starting index to cbuf for image size */
270
271   /* Shader buffers */
272   unsigned num_shader_bufs;
273
274   /* HW atomic buffers */
275   unsigned num_atomic_bufs;
276   unsigned atomic_bufs_mask;
277   unsigned max_atomic_counter_index;
278   VGPU10_OPCODE_TYPE cur_atomic_opcode;    /* current atomic opcode */
279
280   boolean uav_declared;  /* True if uav is declared */
281
282   /* Index Range declaration */
283   struct {
284      unsigned start_index;
285      unsigned count;
286      boolean required;
287      unsigned operandType;
288      unsigned size;
289      unsigned dim;
290   } index_range;
291
292   /* Address regs (really implemented with temps) */
293   unsigned num_address_regs;
294   unsigned address_reg_index[MAX_VGPU10_ADDR_REGS];
295
296   /* Output register usage masks */
297   ubyte output_usage_mask[PIPE_MAX_SHADER_OUTPUTS];
298
299   /* To map TGSI system value index to VGPU shader input indexes */
300   ubyte system_value_indexes[MAX_SYSTEM_VALUES];
301
302   struct {
303      /* vertex position scale/translation */
304      unsigned out_index;  /**< the real position output reg */
305      unsigned tmp_index;  /**< the fake/temp position output reg */
306      unsigned so_index;   /**< the non-adjusted position output reg */
307      unsigned prescale_cbuf_index;  /* index to the const buf for prescale */
308      unsigned prescale_scale_index, prescale_trans_index;
309      unsigned num_prescale;      /* number of prescale factor in const buf */
310      unsigned viewport_index;
311      unsigned need_prescale:1;
312      unsigned have_prescale:1;
313   } vposition;
314
315   /* Shader limits */
316   unsigned max_vs_inputs;
317   unsigned max_vs_outputs;
318   unsigned max_gs_inputs;
319
320   /* For vertex shaders only */
321   struct {
322      /* viewport constant */
323      unsigned viewport_index;
324
325      unsigned vertex_id_bias_index;
326      unsigned vertex_id_sys_index;
327      unsigned vertex_id_tmp_index;
328
329      /* temp index of adjusted vertex attributes */
330      unsigned adjusted_input[PIPE_MAX_SHADER_INPUTS];
331   } vs;
332
333   /* For fragment shaders only */
334   struct {
335      unsigned color_out_index[PIPE_MAX_COLOR_BUFS];  /**< the real color output regs */
336      unsigned num_color_outputs;
337      unsigned color_tmp_index;  /**< fake/temp color output reg */
338      unsigned alpha_ref_index;  /**< immediate constant for alpha ref */
339
340      /* front-face */
341      unsigned face_input_index; /**< real fragment shader face reg (bool) */
342      unsigned face_tmp_index;   /**< temp face reg converted to -1 / +1 */
343
344      unsigned pstipple_sampler_unit;
345      unsigned pstipple_sampler_state_index;
346
347      unsigned fragcoord_input_index;  /**< real fragment position input reg */
348      unsigned fragcoord_tmp_index;    /**< 1/w modified position temp reg */
349
350      unsigned sample_id_sys_index;  /**< TGSI index of sample id sys value */
351
352      unsigned sample_pos_sys_index; /**< TGSI index of sample pos sys value */
353      unsigned sample_pos_tmp_index; /**< which temp reg has the sample pos */
354
355      /** TGSI index of sample mask input sys value */
356      unsigned sample_mask_in_sys_index;
357
358      /* layer */
359      unsigned layer_input_index;    /**< TGSI index of layer */
360      unsigned layer_imm_index;      /**< immediate for default layer 0 */
361
362      boolean forceEarlyDepthStencil;  /**< true if Early Depth stencil test is enabled */
363   } fs;
364
365   /* For geometry shaders only */
366   struct {
367      VGPU10_PRIMITIVE prim_type;/**< VGPU10 primitive type */
368      VGPU10_PRIMITIVE_TOPOLOGY prim_topology; /**< VGPU10 primitive topology */
369      unsigned input_size;       /**< size of input arrays */
370      unsigned prim_id_index;    /**< primitive id register index */
371      unsigned max_out_vertices; /**< maximum number of output vertices */
372      unsigned invocations;
373      unsigned invocation_id_sys_index;
374
375      unsigned viewport_index_out_index;
376      unsigned viewport_index_tmp_index;
377   } gs;
378
379   /* For tessellation control shaders only */
380   struct {
381      unsigned vertices_per_patch_index;     /**< vertices_per_patch system value index */
382      unsigned imm_index;                    /**< immediate for tcs */
383      unsigned invocation_id_sys_index;      /**< invocation id */
384      unsigned invocation_id_tmp_index;
385      unsigned instruction_token_pos;        /* token pos for the first instruction */
386      unsigned control_point_input_index;    /* control point input register index */
387      unsigned control_point_addr_index;     /* control point input address register */
388      unsigned control_point_out_index;      /* control point output register index */
389      unsigned control_point_tmp_index;      /* control point temporary register */
390      unsigned control_point_out_count;      /* control point output count */
391      boolean  control_point_phase;          /* true if in control point phase */
392      boolean  fork_phase_add_signature;     /* true if needs to add signature in fork phase */
393      unsigned patch_generic_out_count;      /* per-patch generic output count */
394      unsigned patch_generic_out_index;      /* per-patch generic output register index*/
395      unsigned patch_generic_tmp_index;      /* per-patch generic temporary register index*/
396      unsigned prim_id_index;                /* primitive id */
397      struct {
398         unsigned out_index;      /* real tessinner output register */
399         unsigned temp_index;     /* tessinner temp register */
400         unsigned tgsi_index;     /* tgsi tessinner output register */
401      } inner;
402      struct {
403         unsigned out_index;      /* real tessouter output register */
404         unsigned temp_index;     /* tessouter temp register */
405         unsigned tgsi_index;     /* tgsi tessouter output register */
406      } outer;
407   } tcs;
408
409   /* For tessellation evaluation shaders only */
410   struct {
411      enum pipe_prim_type prim_mode;
412      enum pipe_tess_spacing spacing;
413      boolean vertices_order_cw;
414      boolean point_mode;
415      unsigned tesscoord_sys_index;
416      unsigned swizzle_max;
417      unsigned prim_id_index;                /* primitive id */
418      struct {
419         unsigned in_index;       /* real tessinner input register */
420         unsigned temp_index;     /* tessinner temp register */
421         unsigned tgsi_index;     /* tgsi tessinner input register */
422      } inner;
423      struct {
424         unsigned in_index;       /* real tessouter input register */
425         unsigned temp_index;     /* tessouter temp register */
426         unsigned tgsi_index;     /* tgsi tessouter input register */
427      } outer;
428   } tes;
429
430   struct {
431      unsigned block_width;       /* thread group size in x dimension */
432      unsigned block_height;      /* thread group size in y dimension */
433      unsigned block_depth;       /* thread group size in z dimension */
434      unsigned thread_id_index;   /* thread id tgsi index */
435      unsigned block_id_index;    /* block id tgsi index */
436      bool shared_memory_declared;    /* set if shared memory is declared */
437      struct {
438         unsigned tgsi_index;   /* grid size tgsi index */
439         unsigned imm_index;    /* grid size imm index */
440      } grid_size;
441   } cs;
442
443   /* For vertex or geometry shaders */
444   enum clipping_mode clip_mode;
445   unsigned clip_dist_out_index; /**< clip distance output register index */
446   unsigned clip_dist_tmp_index; /**< clip distance temporary register */
447   unsigned clip_dist_so_index;  /**< clip distance shadow copy */
448
449   /** Index of temporary holding the clipvertex coordinate */
450   unsigned clip_vertex_out_index; /**< clip vertex output register index */
451   unsigned clip_vertex_tmp_index; /**< clip vertex temporary index */
452
453   /* user clip plane constant slot indexes */
454   unsigned clip_plane_const[PIPE_MAX_CLIP_PLANES];
455
456   unsigned num_output_writes;
457   boolean constant_color_output;
458
459   boolean uses_flat_interp;
460
461   unsigned reserved_token;        /* index to the reserved token */
462   boolean uses_precise_qualifier;
463
464   /* For all shaders: const reg index for RECT coord scaling */
465   unsigned texcoord_scale_index[PIPE_MAX_SAMPLERS];
466
467   /* For all shaders: const reg index for texture buffer size */
468   unsigned texture_buffer_size_index[PIPE_MAX_SAMPLERS];
469
470   /** Which texture units are doing shadow comparison in the shader code */
471   unsigned shadow_compare_units;
472
473   /* VS/TCS/TES/GS/FS Linkage info */
474   struct shader_linkage linkage;
475   struct tgsi_shader_info *prevShaderInfo;
476
477   /* Shader signature */
478   struct svga_shader_signature signature;
479
480   bool register_overflow;  /**< Set if we exceed a VGPU10 register limit */
481
482   /* For util_debug_message */
483   struct util_debug_callback svga_debug_callback;
484
485   /* current loop depth in shader */
486   unsigned current_loop_depth;
487};
488
489
490static void emit_tcs_input_declarations(struct svga_shader_emitter_v10 *emit);
491static void emit_tcs_output_declarations(struct svga_shader_emitter_v10 *emit);
492static boolean emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit);
493static boolean emit_constant_declaration(struct svga_shader_emitter_v10 *emit);
494static boolean emit_sampler_declarations(struct svga_shader_emitter_v10 *emit);
495static boolean emit_resource_declarations(struct svga_shader_emitter_v10 *emit);
496static boolean emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 *emit);
497static boolean emit_index_range_declaration(struct svga_shader_emitter_v10 *emit);
498static void emit_image_declarations(struct svga_shader_emitter_v10 *emit);
499static void emit_shader_buf_declarations(struct svga_shader_emitter_v10 *emit);
500static void emit_atomic_buf_declarations(struct svga_shader_emitter_v10 *emit);
501static void emit_temp_prescale_instructions(struct svga_shader_emitter_v10 *emit);
502
503static boolean
504emit_post_helpers(struct svga_shader_emitter_v10 *emit);
505
506static boolean
507emit_vertex(struct svga_shader_emitter_v10 *emit,
508            const struct tgsi_full_instruction *inst);
509
510static boolean
511emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
512                        unsigned inst_number,
513                        const struct tgsi_full_instruction *inst);
514
515static void
516emit_input_declaration(struct svga_shader_emitter_v10 *emit,
517                       unsigned opcodeType, unsigned operandType,
518                       unsigned dim, unsigned index, unsigned size,
519                       unsigned name, unsigned numComp,
520                       unsigned selMode, unsigned usageMask,
521                       unsigned interpMode,
522                       boolean addSignature,
523                       SVGA3dDXSignatureSemanticName sgnName);
524
525static boolean
526emit_rawbuf_instruction(struct svga_shader_emitter_v10 *emit,
527                        unsigned inst_number,
528                        const struct tgsi_full_instruction *inst);
529
530static void
531create_temp_array(struct svga_shader_emitter_v10 *emit,
532                  unsigned arrayID, unsigned first, unsigned count,
533                  unsigned startIndex);
534
535static char err_buf[128];
536
537static boolean
538expand(struct svga_shader_emitter_v10 *emit)
539{
540   char *new_buf;
541   unsigned newsize = emit->size * 2;
542
543   if (emit->buf != err_buf)
544      new_buf = REALLOC(emit->buf, emit->size, newsize);
545   else
546      new_buf = NULL;
547
548   if (!new_buf) {
549      emit->ptr = err_buf;
550      emit->buf = err_buf;
551      emit->size = sizeof(err_buf);
552      return FALSE;
553   }
554
555   emit->size = newsize;
556   emit->ptr = new_buf + (emit->ptr - emit->buf);
557   emit->buf = new_buf;
558   return TRUE;
559}
560
561/**
562 * Create and initialize a new svga_shader_emitter_v10 object.
563 */
564static struct svga_shader_emitter_v10 *
565alloc_emitter(void)
566{
567   struct svga_shader_emitter_v10 *emit = CALLOC(1, sizeof(*emit));
568
569   if (!emit)
570      return NULL;
571
572   /* to initialize the output buffer */
573   emit->size = 512;
574   if (!expand(emit)) {
575      FREE(emit);
576      return NULL;
577   }
578   return emit;
579}
580
581/**
582 * Free an svga_shader_emitter_v10 object.
583 */
584static void
585free_emitter(struct svga_shader_emitter_v10 *emit)
586{
587   assert(emit);
588   FREE(emit->buf);    /* will be NULL if translation succeeded */
589   FREE(emit);
590}
591
592static inline boolean
593reserve(struct svga_shader_emitter_v10 *emit,
594        unsigned nr_dwords)
595{
596   while (emit->ptr - emit->buf + nr_dwords * sizeof(uint32) >= emit->size) {
597      if (!expand(emit))
598         return FALSE;
599   }
600
601   return TRUE;
602}
603
604static boolean
605emit_dword(struct svga_shader_emitter_v10 *emit, uint32 dword)
606{
607   if (!reserve(emit, 1))
608      return FALSE;
609
610   *(uint32 *)emit->ptr = dword;
611   emit->ptr += sizeof dword;
612   return TRUE;
613}
614
615static boolean
616emit_dwords(struct svga_shader_emitter_v10 *emit,
617            const uint32 *dwords,
618            unsigned nr)
619{
620   if (!reserve(emit, nr))
621      return FALSE;
622
623   memcpy(emit->ptr, dwords, nr * sizeof *dwords);
624   emit->ptr += nr * sizeof *dwords;
625   return TRUE;
626}
627
628/** Return the number of tokens in the emitter's buffer */
629static unsigned
630emit_get_num_tokens(const struct svga_shader_emitter_v10 *emit)
631{
632   return (emit->ptr - emit->buf) / sizeof(unsigned);
633}
634
635
636/**
637 * Check for register overflow.  If we overflow we'll set an
638 * error flag.  This function can be called for register declarations
639 * or use as src/dst instruction operands.
640 * \param type  register type.  One of VGPU10_OPERAND_TYPE_x
641                or VGPU10_OPCODE_DCL_x
642 * \param index  the register index
643 */
644static void
645check_register_index(struct svga_shader_emitter_v10 *emit,
646                     unsigned operandType, unsigned index)
647{
648   bool overflow_before = emit->register_overflow;
649
650   switch (operandType) {
651   case VGPU10_OPERAND_TYPE_TEMP:
652   case VGPU10_OPERAND_TYPE_INDEXABLE_TEMP:
653   case VGPU10_OPCODE_DCL_TEMPS:
654      if (index >= VGPU10_MAX_TEMPS) {
655         emit->register_overflow = TRUE;
656      }
657      break;
658   case VGPU10_OPERAND_TYPE_CONSTANT_BUFFER:
659   case VGPU10_OPCODE_DCL_CONSTANT_BUFFER:
660      if (index >= VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) {
661         emit->register_overflow = TRUE;
662      }
663      break;
664   case VGPU10_OPERAND_TYPE_INPUT:
665   case VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID:
666   case VGPU10_OPCODE_DCL_INPUT:
667   case VGPU10_OPCODE_DCL_INPUT_SGV:
668   case VGPU10_OPCODE_DCL_INPUT_SIV:
669   case VGPU10_OPCODE_DCL_INPUT_PS:
670   case VGPU10_OPCODE_DCL_INPUT_PS_SGV:
671   case VGPU10_OPCODE_DCL_INPUT_PS_SIV:
672      if ((emit->unit == PIPE_SHADER_VERTEX &&
673           index >= emit->max_vs_inputs) ||
674          (emit->unit == PIPE_SHADER_GEOMETRY &&
675           index >= emit->max_gs_inputs) ||
676          (emit->unit == PIPE_SHADER_FRAGMENT &&
677           index >= VGPU10_MAX_FS_INPUTS) ||
678          (emit->unit == PIPE_SHADER_TESS_CTRL &&
679           index >= VGPU11_MAX_HS_INPUT_CONTROL_POINTS) ||
680          (emit->unit == PIPE_SHADER_TESS_EVAL &&
681           index >= VGPU11_MAX_DS_INPUT_CONTROL_POINTS)) {
682         emit->register_overflow = TRUE;
683      }
684      break;
685   case VGPU10_OPERAND_TYPE_OUTPUT:
686   case VGPU10_OPCODE_DCL_OUTPUT:
687   case VGPU10_OPCODE_DCL_OUTPUT_SGV:
688   case VGPU10_OPCODE_DCL_OUTPUT_SIV:
689      /* Note: we are skipping two output indices in tcs for
690       * tessinner/outer levels. Implementation will not exceed
691       * number of output count but it allows index to go beyond
692       * VGPU11_MAX_HS_OUTPUTS.
693       * Index will never be >= index >= VGPU11_MAX_HS_OUTPUTS + 2
694       */
695      if ((emit->unit == PIPE_SHADER_VERTEX &&
696           index >= emit->max_vs_outputs) ||
697          (emit->unit == PIPE_SHADER_GEOMETRY &&
698           index >= VGPU10_MAX_GS_OUTPUTS) ||
699          (emit->unit == PIPE_SHADER_FRAGMENT &&
700           index >= VGPU10_MAX_FS_OUTPUTS) ||
701          (emit->unit == PIPE_SHADER_TESS_CTRL &&
702           index >= VGPU11_MAX_HS_OUTPUTS + 2) ||
703          (emit->unit == PIPE_SHADER_TESS_EVAL &&
704           index >= VGPU11_MAX_DS_OUTPUTS)) {
705         emit->register_overflow = TRUE;
706      }
707      break;
708   case VGPU10_OPERAND_TYPE_SAMPLER:
709   case VGPU10_OPCODE_DCL_SAMPLER:
710      if (index >= VGPU10_MAX_SAMPLERS) {
711         emit->register_overflow = TRUE;
712      }
713      break;
714   case VGPU10_OPERAND_TYPE_RESOURCE:
715   case VGPU10_OPCODE_DCL_RESOURCE:
716      if (index >= VGPU10_MAX_RESOURCES) {
717         emit->register_overflow = TRUE;
718      }
719      break;
720   case VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER:
721      if (index >= MAX_IMMEDIATE_COUNT) {
722         emit->register_overflow = TRUE;
723      }
724      break;
725   case VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK:
726   case VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID:
727   case VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID:
728   case VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT:
729   case VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT:
730   case VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT:
731   case VGPU10_OPERAND_TYPE_INPUT_THREAD_GROUP_ID:
732   case VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP:
733      /* nothing */
734      break;
735   default:
736      assert(0);
737      ; /* nothing */
738   }
739
740   if (emit->register_overflow && !overflow_before) {
741      debug_printf("svga: vgpu10 register overflow (reg %u, index %u)\n",
742                   operandType, index);
743   }
744}
745
746
747/**
748 * Examine misc state to determine the clipping mode.
749 */
750static void
751determine_clipping_mode(struct svga_shader_emitter_v10 *emit)
752{
753   /* num_written_clipdistance in the shader info for tessellation
754    * control shader is always 0 because the TGSI_PROPERTY_NUM_CLIPDIST_ENABLED
755    * is not defined for this shader. So we go through all the output declarations
756    * to set the num_written_clipdistance. This is just to determine the
757    * clipping mode.
758    */
759   if (emit->unit == PIPE_SHADER_TESS_CTRL) {
760      unsigned i;
761      for (i = 0; i < emit->info.num_outputs; i++) {
762         if (emit->info.output_semantic_name[i] == TGSI_SEMANTIC_CLIPDIST) {
763            emit->info.num_written_clipdistance =
764               4 * (emit->info.output_semantic_index[i] + 1);
765         }
766      }
767   }
768
769   if (emit->info.num_written_clipdistance > 0) {
770      emit->clip_mode = CLIP_DISTANCE;
771   }
772   else if (emit->info.writes_clipvertex) {
773      emit->clip_mode = CLIP_VERTEX;
774   }
775   else if (emit->key.clip_plane_enable && emit->key.last_vertex_stage) {
776      /*
777       * Only the last shader in the vertex processing stage needs to
778       * handle the legacy clip mode.
779       */
780      emit->clip_mode = CLIP_LEGACY;
781   }
782   else {
783      emit->clip_mode = CLIP_NONE;
784   }
785}
786
787
788/**
789 * For clip distance register declarations and clip distance register
790 * writes we need to mask the declaration usage or instruction writemask
791 * (respectively) against the set of the really-enabled clipping planes.
792 *
793 * The piglit test spec/glsl-1.30/execution/clipping/vs-clip-distance-enables
794 * has a VS that writes to all 8 clip distance registers, but the plane enable
795 * flags are a subset of that.
796 *
797 * This function is used to apply the plane enable flags to the register
798 * declaration or instruction writemask.
799 *
800 * \param writemask  the declaration usage mask or instruction writemask
801 * \param clip_reg_index  which clip plane register is being declared/written.
802 *                        The legal values are 0 and 1 (two clip planes per
803 *                        register, for a total of 8 clip planes)
804 */
805static unsigned
806apply_clip_plane_mask(struct svga_shader_emitter_v10 *emit,
807                      unsigned writemask, unsigned clip_reg_index)
808{
809   unsigned shift;
810
811   assert(clip_reg_index < 2);
812
813   /* four clip planes per clip register: */
814   shift = clip_reg_index * 4;
815   writemask &= ((emit->key.clip_plane_enable >> shift) & 0xf);
816
817   return writemask;
818}
819
820
821/**
822 * Translate gallium shader type into VGPU10 type.
823 */
824static VGPU10_PROGRAM_TYPE
825translate_shader_type(unsigned type)
826{
827   switch (type) {
828   case PIPE_SHADER_VERTEX:
829      return VGPU10_VERTEX_SHADER;
830   case PIPE_SHADER_GEOMETRY:
831      return VGPU10_GEOMETRY_SHADER;
832   case PIPE_SHADER_FRAGMENT:
833      return VGPU10_PIXEL_SHADER;
834   case PIPE_SHADER_TESS_CTRL:
835      return VGPU10_HULL_SHADER;
836   case PIPE_SHADER_TESS_EVAL:
837      return VGPU10_DOMAIN_SHADER;
838   case PIPE_SHADER_COMPUTE:
839      return VGPU10_COMPUTE_SHADER;
840   default:
841      assert(!"Unexpected shader type");
842      return VGPU10_VERTEX_SHADER;
843   }
844}
845
846
847/**
848 * Translate a TGSI_OPCODE_x into a VGPU10_OPCODE_x
849 * Note: we only need to translate the opcodes for "simple" instructions,
850 * as seen below.  All other opcodes are handled/translated specially.
851 */
852static VGPU10_OPCODE_TYPE
853translate_opcode(enum tgsi_opcode opcode)
854{
855   switch (opcode) {
856   case TGSI_OPCODE_MOV:
857      return VGPU10_OPCODE_MOV;
858   case TGSI_OPCODE_MUL:
859      return VGPU10_OPCODE_MUL;
860   case TGSI_OPCODE_ADD:
861      return VGPU10_OPCODE_ADD;
862   case TGSI_OPCODE_DP3:
863      return VGPU10_OPCODE_DP3;
864   case TGSI_OPCODE_DP4:
865      return VGPU10_OPCODE_DP4;
866   case TGSI_OPCODE_MIN:
867      return VGPU10_OPCODE_MIN;
868   case TGSI_OPCODE_MAX:
869      return VGPU10_OPCODE_MAX;
870   case TGSI_OPCODE_MAD:
871      return VGPU10_OPCODE_MAD;
872   case TGSI_OPCODE_SQRT:
873      return VGPU10_OPCODE_SQRT;
874   case TGSI_OPCODE_FRC:
875      return VGPU10_OPCODE_FRC;
876   case TGSI_OPCODE_FLR:
877      return VGPU10_OPCODE_ROUND_NI;
878   case TGSI_OPCODE_FSEQ:
879      return VGPU10_OPCODE_EQ;
880   case TGSI_OPCODE_FSGE:
881      return VGPU10_OPCODE_GE;
882   case TGSI_OPCODE_FSNE:
883      return VGPU10_OPCODE_NE;
884   case TGSI_OPCODE_DDX:
885      return VGPU10_OPCODE_DERIV_RTX;
886   case TGSI_OPCODE_DDY:
887      return VGPU10_OPCODE_DERIV_RTY;
888   case TGSI_OPCODE_RET:
889      return VGPU10_OPCODE_RET;
890   case TGSI_OPCODE_DIV:
891      return VGPU10_OPCODE_DIV;
892   case TGSI_OPCODE_IDIV:
893      return VGPU10_OPCODE_VMWARE;
894   case TGSI_OPCODE_DP2:
895      return VGPU10_OPCODE_DP2;
896   case TGSI_OPCODE_BRK:
897      return VGPU10_OPCODE_BREAK;
898   case TGSI_OPCODE_IF:
899      return VGPU10_OPCODE_IF;
900   case TGSI_OPCODE_ELSE:
901      return VGPU10_OPCODE_ELSE;
902   case TGSI_OPCODE_ENDIF:
903      return VGPU10_OPCODE_ENDIF;
904   case TGSI_OPCODE_CEIL:
905      return VGPU10_OPCODE_ROUND_PI;
906   case TGSI_OPCODE_I2F:
907      return VGPU10_OPCODE_ITOF;
908   case TGSI_OPCODE_NOT:
909      return VGPU10_OPCODE_NOT;
910   case TGSI_OPCODE_TRUNC:
911      return VGPU10_OPCODE_ROUND_Z;
912   case TGSI_OPCODE_SHL:
913      return VGPU10_OPCODE_ISHL;
914   case TGSI_OPCODE_AND:
915      return VGPU10_OPCODE_AND;
916   case TGSI_OPCODE_OR:
917      return VGPU10_OPCODE_OR;
918   case TGSI_OPCODE_XOR:
919      return VGPU10_OPCODE_XOR;
920   case TGSI_OPCODE_CONT:
921      return VGPU10_OPCODE_CONTINUE;
922   case TGSI_OPCODE_EMIT:
923      return VGPU10_OPCODE_EMIT;
924   case TGSI_OPCODE_ENDPRIM:
925      return VGPU10_OPCODE_CUT;
926   case TGSI_OPCODE_BGNLOOP:
927      return VGPU10_OPCODE_LOOP;
928   case TGSI_OPCODE_ENDLOOP:
929      return VGPU10_OPCODE_ENDLOOP;
930   case TGSI_OPCODE_ENDSUB:
931      return VGPU10_OPCODE_RET;
932   case TGSI_OPCODE_NOP:
933      return VGPU10_OPCODE_NOP;
934   case TGSI_OPCODE_END:
935      return VGPU10_OPCODE_RET;
936   case TGSI_OPCODE_F2I:
937      return VGPU10_OPCODE_FTOI;
938   case TGSI_OPCODE_IMAX:
939      return VGPU10_OPCODE_IMAX;
940   case TGSI_OPCODE_IMIN:
941      return VGPU10_OPCODE_IMIN;
942   case TGSI_OPCODE_UDIV:
943   case TGSI_OPCODE_UMOD:
944   case TGSI_OPCODE_MOD:
945      return VGPU10_OPCODE_UDIV;
946   case TGSI_OPCODE_IMUL_HI:
947      return VGPU10_OPCODE_IMUL;
948   case TGSI_OPCODE_INEG:
949      return VGPU10_OPCODE_INEG;
950   case TGSI_OPCODE_ISHR:
951      return VGPU10_OPCODE_ISHR;
952   case TGSI_OPCODE_ISGE:
953      return VGPU10_OPCODE_IGE;
954   case TGSI_OPCODE_ISLT:
955      return VGPU10_OPCODE_ILT;
956   case TGSI_OPCODE_F2U:
957      return VGPU10_OPCODE_FTOU;
958   case TGSI_OPCODE_UADD:
959      return VGPU10_OPCODE_IADD;
960   case TGSI_OPCODE_U2F:
961      return VGPU10_OPCODE_UTOF;
962   case TGSI_OPCODE_UCMP:
963      return VGPU10_OPCODE_MOVC;
964   case TGSI_OPCODE_UMAD:
965      return VGPU10_OPCODE_UMAD;
966   case TGSI_OPCODE_UMAX:
967      return VGPU10_OPCODE_UMAX;
968   case TGSI_OPCODE_UMIN:
969      return VGPU10_OPCODE_UMIN;
970   case TGSI_OPCODE_UMUL:
971   case TGSI_OPCODE_UMUL_HI:
972      return VGPU10_OPCODE_UMUL;
973   case TGSI_OPCODE_USEQ:
974      return VGPU10_OPCODE_IEQ;
975   case TGSI_OPCODE_USGE:
976      return VGPU10_OPCODE_UGE;
977   case TGSI_OPCODE_USHR:
978      return VGPU10_OPCODE_USHR;
979   case TGSI_OPCODE_USLT:
980      return VGPU10_OPCODE_ULT;
981   case TGSI_OPCODE_USNE:
982      return VGPU10_OPCODE_INE;
983   case TGSI_OPCODE_SWITCH:
984      return VGPU10_OPCODE_SWITCH;
985   case TGSI_OPCODE_CASE:
986      return VGPU10_OPCODE_CASE;
987   case TGSI_OPCODE_DEFAULT:
988      return VGPU10_OPCODE_DEFAULT;
989   case TGSI_OPCODE_ENDSWITCH:
990      return VGPU10_OPCODE_ENDSWITCH;
991   case TGSI_OPCODE_FSLT:
992      return VGPU10_OPCODE_LT;
993   case TGSI_OPCODE_ROUND:
994      return VGPU10_OPCODE_ROUND_NE;
995   /* Begin SM5 opcodes */
996   case TGSI_OPCODE_F2D:
997      return VGPU10_OPCODE_FTOD;
998   case TGSI_OPCODE_D2F:
999      return VGPU10_OPCODE_DTOF;
1000   case TGSI_OPCODE_DMUL:
1001      return VGPU10_OPCODE_DMUL;
1002   case TGSI_OPCODE_DADD:
1003      return VGPU10_OPCODE_DADD;
1004   case TGSI_OPCODE_DMAX:
1005      return VGPU10_OPCODE_DMAX;
1006   case TGSI_OPCODE_DMIN:
1007      return VGPU10_OPCODE_DMIN;
1008   case TGSI_OPCODE_DSEQ:
1009      return VGPU10_OPCODE_DEQ;
1010   case TGSI_OPCODE_DSGE:
1011      return VGPU10_OPCODE_DGE;
1012   case TGSI_OPCODE_DSLT:
1013      return VGPU10_OPCODE_DLT;
1014   case TGSI_OPCODE_DSNE:
1015      return VGPU10_OPCODE_DNE;
1016   case TGSI_OPCODE_IBFE:
1017      return VGPU10_OPCODE_IBFE;
1018   case TGSI_OPCODE_UBFE:
1019      return VGPU10_OPCODE_UBFE;
1020   case TGSI_OPCODE_BFI:
1021      return VGPU10_OPCODE_BFI;
1022   case TGSI_OPCODE_BREV:
1023      return VGPU10_OPCODE_BFREV;
1024   case TGSI_OPCODE_POPC:
1025      return VGPU10_OPCODE_COUNTBITS;
1026   case TGSI_OPCODE_LSB:
1027      return VGPU10_OPCODE_FIRSTBIT_LO;
1028   case TGSI_OPCODE_IMSB:
1029      return VGPU10_OPCODE_FIRSTBIT_SHI;
1030   case TGSI_OPCODE_UMSB:
1031      return VGPU10_OPCODE_FIRSTBIT_HI;
1032   case TGSI_OPCODE_INTERP_CENTROID:
1033      return VGPU10_OPCODE_EVAL_CENTROID;
1034   case TGSI_OPCODE_INTERP_SAMPLE:
1035      return VGPU10_OPCODE_EVAL_SAMPLE_INDEX;
1036   case TGSI_OPCODE_BARRIER:
1037      return VGPU10_OPCODE_SYNC;
1038   case TGSI_OPCODE_DFMA:
1039      return VGPU10_OPCODE_DFMA;
1040   case TGSI_OPCODE_FMA:
1041      return VGPU10_OPCODE_MAD;
1042
1043   /* DX11.1 Opcodes */
1044   case TGSI_OPCODE_DDIV:
1045      return VGPU10_OPCODE_DDIV;
1046   case TGSI_OPCODE_DRCP:
1047      return VGPU10_OPCODE_DRCP;
1048   case TGSI_OPCODE_D2I:
1049      return VGPU10_OPCODE_DTOI;
1050   case TGSI_OPCODE_D2U:
1051      return VGPU10_OPCODE_DTOU;
1052   case TGSI_OPCODE_I2D:
1053      return VGPU10_OPCODE_ITOD;
1054   case TGSI_OPCODE_U2D:
1055      return VGPU10_OPCODE_UTOD;
1056
1057   case TGSI_OPCODE_SAMPLE_POS:
1058      /* Note: we never actually get this opcode because there's no GLSL
1059       * function to query multisample resource sample positions.  There's
1060       * only the TGSI_SEMANTIC_SAMPLEPOS system value which contains the
1061       * position of the current sample in the render target.
1062       */
1063      FALLTHROUGH;
1064   case TGSI_OPCODE_SAMPLE_INFO:
1065      /* NOTE: we never actually get this opcode because the GLSL compiler
1066       * implements the gl_NumSamples variable with a simple constant in the
1067       * constant buffer.
1068       */
1069      FALLTHROUGH;
1070   default:
1071      assert(!"Unexpected TGSI opcode in translate_opcode()");
1072      return VGPU10_OPCODE_NOP;
1073   }
1074}
1075
1076
1077/**
1078 * Translate a TGSI register file type into a VGPU10 operand type.
1079 * \param array  is the TGSI_FILE_TEMPORARY register an array?
1080 */
1081static VGPU10_OPERAND_TYPE
1082translate_register_file(enum tgsi_file_type file, boolean array)
1083{
1084   switch (file) {
1085   case TGSI_FILE_CONSTANT:
1086      return VGPU10_OPERAND_TYPE_CONSTANT_BUFFER;
1087   case TGSI_FILE_INPUT:
1088      return VGPU10_OPERAND_TYPE_INPUT;
1089   case TGSI_FILE_OUTPUT:
1090      return VGPU10_OPERAND_TYPE_OUTPUT;
1091   case TGSI_FILE_TEMPORARY:
1092      return array ? VGPU10_OPERAND_TYPE_INDEXABLE_TEMP
1093                   : VGPU10_OPERAND_TYPE_TEMP;
1094   case TGSI_FILE_IMMEDIATE:
1095      /* all immediates are 32-bit values at this time so
1096       * VGPU10_OPERAND_TYPE_IMMEDIATE64 is not possible at this time.
1097       */
1098      return VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER;
1099   case TGSI_FILE_SAMPLER:
1100      return VGPU10_OPERAND_TYPE_SAMPLER;
1101   case TGSI_FILE_SYSTEM_VALUE:
1102      return VGPU10_OPERAND_TYPE_INPUT;
1103
1104   /* XXX TODO more cases to finish */
1105
1106   default:
1107      assert(!"Bad tgsi register file!");
1108      return VGPU10_OPERAND_TYPE_NULL;
1109   }
1110}
1111
1112
1113/**
1114 * Emit a null dst register
1115 */
1116static void
1117emit_null_dst_register(struct svga_shader_emitter_v10 *emit)
1118{
1119   VGPU10OperandToken0 operand;
1120
1121   operand.value = 0;
1122   operand.operandType = VGPU10_OPERAND_TYPE_NULL;
1123   operand.numComponents = VGPU10_OPERAND_0_COMPONENT;
1124
1125   emit_dword(emit, operand.value);
1126}
1127
1128
1129/**
1130 * If the given register is a temporary, return the array ID.
1131 * Else return zero.
1132 */
1133static unsigned
1134get_temp_array_id(const struct svga_shader_emitter_v10 *emit,
1135                  enum tgsi_file_type file, unsigned index)
1136{
1137   if (file == TGSI_FILE_TEMPORARY) {
1138      return emit->temp_map[index].arrayId;
1139   }
1140   else {
1141      return 0;
1142   }
1143}
1144
1145
1146/**
1147 * If the given register is a temporary, convert the index from a TGSI
1148 * TEMPORARY index to a VGPU10 temp index.
1149 */
1150static unsigned
1151remap_temp_index(const struct svga_shader_emitter_v10 *emit,
1152                 enum tgsi_file_type file, unsigned index)
1153{
1154   if (file == TGSI_FILE_TEMPORARY) {
1155      return emit->temp_map[index].index;
1156   }
1157   else {
1158      return index;
1159   }
1160}
1161
1162
1163/**
1164 * Setup the operand0 fields related to indexing (1D, 2D, relative, etc).
1165 * Note: the operandType field must already be initialized.
1166 * \param file  the register file being accessed
1167 * \param indirect  using indirect addressing of the register file?
1168 * \param index2D  if true, 2-D indexing is being used (const or temp registers)
1169 * \param indirect2D  if true, 2-D indirect indexing being used (for const buf)
1170 */
1171static VGPU10OperandToken0
1172setup_operand0_indexing(struct svga_shader_emitter_v10 *emit,
1173                        VGPU10OperandToken0 operand0,
1174                        enum tgsi_file_type file,
1175                        boolean indirect,
1176                        boolean index2D, bool indirect2D)
1177{
1178   VGPU10_OPERAND_INDEX_REPRESENTATION index0Rep, index1Rep;
1179   VGPU10_OPERAND_INDEX_DIMENSION indexDim;
1180
1181   /*
1182    * Compute index dimensions
1183    */
1184   if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32 ||
1185       operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID ||
1186       operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID ||
1187       operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID ||
1188       operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP ||
1189       operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID) {
1190      /* there's no swizzle for in-line immediates */
1191      indexDim = VGPU10_OPERAND_INDEX_0D;
1192      assert(operand0.selectionMode == 0);
1193   }
1194   else if (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT) {
1195      indexDim = VGPU10_OPERAND_INDEX_0D;
1196   }
1197   else {
1198      indexDim = index2D ? VGPU10_OPERAND_INDEX_2D : VGPU10_OPERAND_INDEX_1D;
1199   }
1200
1201   /*
1202    * Compute index representation(s) (immediate vs relative).
1203    */
1204   if (indexDim == VGPU10_OPERAND_INDEX_2D) {
1205      index0Rep = indirect2D ? VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE
1206         : VGPU10_OPERAND_INDEX_IMMEDIATE32;
1207
1208      index1Rep = indirect ? VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE
1209         : VGPU10_OPERAND_INDEX_IMMEDIATE32;
1210   }
1211   else if (indexDim == VGPU10_OPERAND_INDEX_1D) {
1212      index0Rep = indirect ? VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE
1213         : VGPU10_OPERAND_INDEX_IMMEDIATE32;
1214
1215      index1Rep = 0;
1216   }
1217   else {
1218      index0Rep = 0;
1219      index1Rep = 0;
1220   }
1221
1222   operand0.indexDimension = indexDim;
1223   operand0.index0Representation = index0Rep;
1224   operand0.index1Representation = index1Rep;
1225
1226   return operand0;
1227}
1228
1229
1230/**
1231 * Emit the operand for expressing an address register for indirect indexing.
1232 * Note that the address register is really just a temp register.
1233 * \param addr_reg_index  which address register to use
1234 */
1235static void
1236emit_indirect_register(struct svga_shader_emitter_v10 *emit,
1237                       unsigned addr_reg_index)
1238{
1239   unsigned tmp_reg_index;
1240   VGPU10OperandToken0 operand0;
1241
1242   assert(addr_reg_index < MAX_VGPU10_ADDR_REGS);
1243
1244   tmp_reg_index = emit->address_reg_index[addr_reg_index];
1245
1246   /* operand0 is a simple temporary register, selecting one component */
1247   operand0.value = 0;
1248   operand0.operandType = VGPU10_OPERAND_TYPE_TEMP;
1249   operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1250   operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
1251   operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
1252   operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
1253   operand0.swizzleX = 0;
1254   operand0.swizzleY = 1;
1255   operand0.swizzleZ = 2;
1256   operand0.swizzleW = 3;
1257
1258   emit_dword(emit, operand0.value);
1259   emit_dword(emit, remap_temp_index(emit, TGSI_FILE_TEMPORARY, tmp_reg_index));
1260}
1261
1262
1263/**
1264 * Translate the dst register of a TGSI instruction and emit VGPU10 tokens.
1265 * \param emit  the emitter context
1266 * \param reg  the TGSI dst register to translate
1267 */
1268static void
1269emit_dst_register(struct svga_shader_emitter_v10 *emit,
1270                  const struct tgsi_full_dst_register *reg)
1271{
1272   enum tgsi_file_type file = reg->Register.File;
1273   unsigned index = reg->Register.Index;
1274   const enum tgsi_semantic sem_name = emit->info.output_semantic_name[index];
1275   const unsigned sem_index = emit->info.output_semantic_index[index];
1276   unsigned writemask = reg->Register.WriteMask;
1277   const boolean indirect = reg->Register.Indirect;
1278   unsigned tempArrayId = get_temp_array_id(emit, file, index);
1279   boolean index2d = reg->Register.Dimension || tempArrayId > 0;
1280   VGPU10OperandToken0 operand0;
1281
1282   if (file == TGSI_FILE_TEMPORARY) {
1283      emit->temp_map[index].initialized = TRUE;
1284   }
1285
1286   if (file == TGSI_FILE_OUTPUT) {
1287      if (emit->unit == PIPE_SHADER_VERTEX ||
1288          emit->unit == PIPE_SHADER_GEOMETRY ||
1289          emit->unit == PIPE_SHADER_TESS_EVAL) {
1290         if (index == emit->vposition.out_index &&
1291             emit->vposition.tmp_index != INVALID_INDEX) {
1292            /* replace OUTPUT[POS] with TEMP[POS].  We need to store the
1293             * vertex position result in a temporary so that we can modify
1294             * it in the post_helper() code.
1295             */
1296            file = TGSI_FILE_TEMPORARY;
1297            index = emit->vposition.tmp_index;
1298         }
1299         else if (sem_name == TGSI_SEMANTIC_CLIPDIST &&
1300                  emit->clip_dist_tmp_index != INVALID_INDEX) {
1301            /* replace OUTPUT[CLIPDIST] with TEMP[CLIPDIST].
1302             * We store the clip distance in a temporary first, then
1303             * we'll copy it to the shadow copy and to CLIPDIST with the
1304             * enabled planes mask in emit_clip_distance_instructions().
1305             */
1306            file = TGSI_FILE_TEMPORARY;
1307            index = emit->clip_dist_tmp_index + sem_index;
1308         }
1309         else if (sem_name == TGSI_SEMANTIC_CLIPVERTEX &&
1310                  emit->clip_vertex_tmp_index != INVALID_INDEX) {
1311            /* replace the CLIPVERTEX output register with a temporary */
1312            assert(emit->clip_mode == CLIP_VERTEX);
1313            assert(sem_index == 0);
1314            file = TGSI_FILE_TEMPORARY;
1315            index = emit->clip_vertex_tmp_index;
1316         }
1317         else if (sem_name == TGSI_SEMANTIC_COLOR &&
1318                  emit->key.clamp_vertex_color) {
1319
1320            /* set the saturate modifier of the instruction
1321             * to clamp the vertex color.
1322             */
1323            VGPU10OpcodeToken0 *token =
1324               (VGPU10OpcodeToken0 *)emit->buf + emit->inst_start_token;
1325            token->saturate = TRUE;
1326         }
1327         else if (sem_name == TGSI_SEMANTIC_VIEWPORT_INDEX &&
1328                  emit->gs.viewport_index_out_index != INVALID_INDEX) {
1329            file = TGSI_FILE_TEMPORARY;
1330            index = emit->gs.viewport_index_tmp_index;
1331         }
1332      }
1333      else if (emit->unit == PIPE_SHADER_FRAGMENT) {
1334         if (sem_name == TGSI_SEMANTIC_POSITION) {
1335            /* Fragment depth output register */
1336            operand0.value = 0;
1337            operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH;
1338            operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
1339            operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1340            emit_dword(emit, operand0.value);
1341            return;
1342         }
1343         else if (sem_name == TGSI_SEMANTIC_SAMPLEMASK) {
1344            /* Fragment sample mask output */
1345            operand0.value = 0;
1346            operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK;
1347            operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
1348            operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1349            emit_dword(emit, operand0.value);
1350            return;
1351         }
1352         else if (index == emit->fs.color_out_index[0] &&
1353             emit->fs.color_tmp_index != INVALID_INDEX) {
1354            /* replace OUTPUT[COLOR] with TEMP[COLOR].  We need to store the
1355             * fragment color result in a temporary so that we can read it
1356             * it in the post_helper() code.
1357             */
1358            file = TGSI_FILE_TEMPORARY;
1359            index = emit->fs.color_tmp_index;
1360         }
1361         else {
1362            /* Typically, for fragment shaders, the output register index
1363             * matches the color semantic index.  But not when we write to
1364             * the fragment depth register.  In that case, OUT[0] will be
1365             * fragdepth and OUT[1] will be the 0th color output.  We need
1366             * to use the semantic index for color outputs.
1367             */
1368            assert(sem_name == TGSI_SEMANTIC_COLOR);
1369            index = emit->info.output_semantic_index[index];
1370
1371            emit->num_output_writes++;
1372         }
1373      }
1374      else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
1375         if (index == emit->tcs.inner.tgsi_index) {
1376            /* replace OUTPUT[TESSLEVEL] with temp. We are storing it
1377             * in temporary for now so that will be store into appropriate
1378             * registers in post_helper() in patch constant phase.
1379             */
1380            if (emit->tcs.control_point_phase) {
1381               /* Discard writing into tessfactor in control point phase */
1382               emit->discard_instruction =  TRUE;
1383            }
1384            else {
1385               file = TGSI_FILE_TEMPORARY;
1386               index = emit->tcs.inner.temp_index;
1387            }
1388         }
1389         else if (index == emit->tcs.outer.tgsi_index) {
1390            /* replace OUTPUT[TESSLEVEL] with temp. We are storing it
1391             * in temporary for now so that will be store into appropriate
1392             * registers in post_helper().
1393             */
1394            if (emit->tcs.control_point_phase) {
1395               /* Discard writing into tessfactor in control point phase */
1396               emit->discard_instruction =  TRUE;
1397            }
1398            else {
1399               file = TGSI_FILE_TEMPORARY;
1400               index = emit->tcs.outer.temp_index;
1401            }
1402         }
1403         else if (index >= emit->tcs.patch_generic_out_index &&
1404                  index < (emit->tcs.patch_generic_out_index +
1405                          emit->tcs.patch_generic_out_count)) {
1406            if (emit->tcs.control_point_phase) {
1407               /* Discard writing into generic patch constant outputs in
1408                  control point phase */
1409               emit->discard_instruction =  TRUE;
1410            }
1411            else {
1412               if (emit->reemit_instruction) {
1413                  /* Store results of reemitted instruction in temporary register. */
1414                  file = TGSI_FILE_TEMPORARY;
1415                  index = emit->tcs.patch_generic_tmp_index +
1416                          (index - emit->tcs.patch_generic_out_index);
1417                  /**
1418                   * Temporaries for patch constant data can be done
1419                   * as indexable temporaries.
1420                   */
1421                  tempArrayId = get_temp_array_id(emit, file, index);
1422                  index2d = tempArrayId > 0;
1423
1424                  emit->reemit_instruction = FALSE;
1425               }
1426               else {
1427                  /* If per-patch outputs is been read in shader, we
1428                   * reemit instruction and store results in temporaries in
1429                   * patch constant phase. */
1430                  if (emit->info.reads_perpatch_outputs) {
1431                     emit->reemit_instruction = TRUE;
1432                  }
1433               }
1434            }
1435         }
1436         else if (reg->Register.Dimension) {
1437            /* Only control point outputs are declared 2D in tgsi */
1438            if (emit->tcs.control_point_phase) {
1439               if (emit->reemit_instruction) {
1440                  /* Store results of reemitted instruction in temporary register. */
1441                  index2d = FALSE;
1442                  file = TGSI_FILE_TEMPORARY;
1443                  index = emit->tcs.control_point_tmp_index +
1444                          (index - emit->tcs.control_point_out_index);
1445                  emit->reemit_instruction = FALSE;
1446               }
1447               else {
1448                  /* The mapped control point outputs are 1-D */
1449                  index2d = FALSE;
1450                  if (emit->info.reads_pervertex_outputs) {
1451                     /* If per-vertex outputs is been read in shader, we
1452                      * reemit instruction and store results in temporaries
1453                      * control point phase. */
1454                     emit->reemit_instruction = TRUE;
1455                  }
1456               }
1457
1458               if (sem_name == TGSI_SEMANTIC_CLIPDIST &&
1459                   emit->clip_dist_tmp_index != INVALID_INDEX) {
1460                  /* replace OUTPUT[CLIPDIST] with TEMP[CLIPDIST].
1461                   * We store the clip distance in a temporary first, then
1462                   * we'll copy it to the shadow copy and to CLIPDIST with the
1463                   * enabled planes mask in emit_clip_distance_instructions().
1464                   */
1465                  file = TGSI_FILE_TEMPORARY;
1466                  index = emit->clip_dist_tmp_index + sem_index;
1467               }
1468               else if (sem_name == TGSI_SEMANTIC_CLIPVERTEX &&
1469                        emit->clip_vertex_tmp_index != INVALID_INDEX) {
1470                  /* replace the CLIPVERTEX output register with a temporary */
1471                  assert(emit->clip_mode == CLIP_VERTEX);
1472                  assert(sem_index == 0);
1473                  file = TGSI_FILE_TEMPORARY;
1474                  index = emit->clip_vertex_tmp_index;
1475               }
1476            }
1477            else {
1478               /* Discard writing into control point outputs in
1479                  patch constant phase */
1480               emit->discard_instruction =  TRUE;
1481            }
1482         }
1483      }
1484   }
1485
1486   /* init operand tokens to all zero */
1487   operand0.value = 0;
1488
1489   operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1490
1491   /* the operand has a writemask */
1492   operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
1493
1494   /* Which of the four dest components to write to. Note that we can use a
1495    * simple assignment here since TGSI writemasks match VGPU10 writemasks.
1496    */
1497   STATIC_ASSERT(TGSI_WRITEMASK_X == VGPU10_OPERAND_4_COMPONENT_MASK_X);
1498   operand0.mask = writemask;
1499
1500   /* translate TGSI register file type to VGPU10 operand type */
1501   operand0.operandType = translate_register_file(file, tempArrayId > 0);
1502
1503   check_register_index(emit, operand0.operandType, index);
1504
1505   operand0 = setup_operand0_indexing(emit, operand0, file, indirect,
1506                                      index2d, FALSE);
1507
1508   /* Emit tokens */
1509   emit_dword(emit, operand0.value);
1510   if (tempArrayId > 0) {
1511      emit_dword(emit, tempArrayId);
1512   }
1513
1514   emit_dword(emit, remap_temp_index(emit, file, index));
1515
1516   if (indirect) {
1517      emit_indirect_register(emit, reg->Indirect.Index);
1518   }
1519}
1520
1521
1522/**
1523 * Check if temporary register needs to be initialize when
1524 * shader is not using indirect addressing for temporary and uninitialized
1525 * temporary is not used in loop. In these two scenarios, we cannot
1526 * determine if temporary is initialized or not.
1527 */
1528static boolean
1529need_temp_reg_initialization(struct svga_shader_emitter_v10 *emit,
1530                             unsigned index)
1531{
1532   if (!(emit->info.indirect_files & (1u << TGSI_FILE_TEMPORARY))
1533       && emit->current_loop_depth == 0) {
1534      if (!emit->temp_map[index].initialized &&
1535          emit->temp_map[index].index < emit->num_shader_temps) {
1536         return TRUE;
1537      }
1538   }
1539
1540   return FALSE;
1541}
1542
1543
1544/**
1545 * Translate a src register of a TGSI instruction and emit VGPU10 tokens.
1546 * In quite a few cases, we do register substitution.  For example, if
1547 * the TGSI register is the front/back-face register, we replace that with
1548 * a temp register containing a value we computed earlier.
1549 */
1550static void
1551emit_src_register(struct svga_shader_emitter_v10 *emit,
1552                  const struct tgsi_full_src_register *reg)
1553{
1554   enum tgsi_file_type file = reg->Register.File;
1555   unsigned index = reg->Register.Index;
1556   boolean indirect = reg->Register.Indirect;
1557   unsigned tempArrayId = get_temp_array_id(emit, file, index);
1558   boolean index2d = (reg->Register.Dimension ||
1559                            tempArrayId > 0 ||
1560                            file == TGSI_FILE_CONSTANT);
1561   unsigned index2 = tempArrayId > 0 ? tempArrayId : reg->Dimension.Index;
1562   boolean indirect2d = reg->Dimension.Indirect;
1563   unsigned swizzleX = reg->Register.SwizzleX;
1564   unsigned swizzleY = reg->Register.SwizzleY;
1565   unsigned swizzleZ = reg->Register.SwizzleZ;
1566   unsigned swizzleW = reg->Register.SwizzleW;
1567   const boolean absolute = reg->Register.Absolute;
1568   const boolean negate = reg->Register.Negate;
1569   VGPU10OperandToken0 operand0;
1570   VGPU10OperandToken1 operand1;
1571
1572   operand0.value = operand1.value = 0;
1573
1574   if (emit->unit == PIPE_SHADER_FRAGMENT){
1575      if (file == TGSI_FILE_INPUT) {
1576         if (index == emit->fs.face_input_index) {
1577            /* Replace INPUT[FACE] with TEMP[FACE] */
1578            file = TGSI_FILE_TEMPORARY;
1579            index = emit->fs.face_tmp_index;
1580         }
1581         else if (index == emit->fs.fragcoord_input_index) {
1582            /* Replace INPUT[POSITION] with TEMP[POSITION] */
1583            file = TGSI_FILE_TEMPORARY;
1584            index = emit->fs.fragcoord_tmp_index;
1585         }
1586         else if (index == emit->fs.layer_input_index) {
1587            /* Replace INPUT[LAYER] with zero.x */
1588            file = TGSI_FILE_IMMEDIATE;
1589            index = emit->fs.layer_imm_index;
1590            swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_X;
1591         }
1592         else {
1593            /* We remap fragment shader inputs to that FS input indexes
1594             * match up with VS/GS output indexes.
1595             */
1596            index = emit->linkage.input_map[index];
1597         }
1598      }
1599      else if (file == TGSI_FILE_SYSTEM_VALUE) {
1600         if (index == emit->fs.sample_pos_sys_index) {
1601            assert(emit->version >= 41);
1602            /* Current sample position is in a temp register */
1603            file = TGSI_FILE_TEMPORARY;
1604            index = emit->fs.sample_pos_tmp_index;
1605         }
1606         else if (index == emit->fs.sample_mask_in_sys_index) {
1607            /* Emitted as vCoverage0.x */
1608            /* According to GLSL spec, the gl_SampleMaskIn array has ceil(s / 32)
1609             * elements where s is the maximum number of color samples supported
1610             * by the implementation.
1611             */
1612            operand0.value = 0;
1613            operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK;
1614            operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
1615            operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1616            operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
1617            emit_dword(emit, operand0.value);
1618            return;
1619         }
1620         else {
1621            /* Map the TGSI system value to a VGPU10 input register */
1622            assert(index < ARRAY_SIZE(emit->system_value_indexes));
1623            file = TGSI_FILE_INPUT;
1624            index = emit->system_value_indexes[index];
1625         }
1626      }
1627   }
1628   else if (emit->unit == PIPE_SHADER_GEOMETRY) {
1629      if (file == TGSI_FILE_INPUT) {
1630         if (index == emit->gs.prim_id_index) {
1631            operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
1632            operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
1633         }
1634         index = emit->linkage.input_map[index];
1635      }
1636      else if (file == TGSI_FILE_SYSTEM_VALUE &&
1637               index == emit->gs.invocation_id_sys_index) {
1638         /* Emitted as vGSInstanceID0.x */
1639         operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1640         operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID;
1641         index = 0;
1642      }
1643   }
1644   else if (emit->unit == PIPE_SHADER_VERTEX) {
1645      if (file == TGSI_FILE_INPUT) {
1646         /* if input is adjusted... */
1647         if ((emit->key.vs.adjust_attrib_w_1 |
1648              emit->key.vs.adjust_attrib_itof |
1649              emit->key.vs.adjust_attrib_utof |
1650              emit->key.vs.attrib_is_bgra |
1651              emit->key.vs.attrib_puint_to_snorm |
1652              emit->key.vs.attrib_puint_to_uscaled |
1653              emit->key.vs.attrib_puint_to_sscaled) & (1 << index)) {
1654            file = TGSI_FILE_TEMPORARY;
1655            index = emit->vs.adjusted_input[index];
1656         }
1657      }
1658      else if (file == TGSI_FILE_SYSTEM_VALUE) {
1659         if (index == emit->vs.vertex_id_sys_index &&
1660             emit->vs.vertex_id_tmp_index != INVALID_INDEX) {
1661            file = TGSI_FILE_TEMPORARY;
1662            index = emit->vs.vertex_id_tmp_index;
1663            swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_X;
1664         }
1665         else {
1666            /* Map the TGSI system value to a VGPU10 input register */
1667            assert(index < ARRAY_SIZE(emit->system_value_indexes));
1668            file = TGSI_FILE_INPUT;
1669            index = emit->system_value_indexes[index];
1670         }
1671      }
1672   }
1673   else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
1674
1675      if (file == TGSI_FILE_SYSTEM_VALUE) {
1676         if (index == emit->tcs.vertices_per_patch_index) {
1677            /**
1678             * if source register is the system value for vertices_per_patch,
1679             * replace it with the immediate.
1680             */
1681            file = TGSI_FILE_IMMEDIATE;
1682            index = emit->tcs.imm_index;
1683            swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_X;
1684         }
1685         else if (index == emit->tcs.invocation_id_sys_index) {
1686            if (emit->tcs.control_point_phase) {
1687               /**
1688                * Emitted as vOutputControlPointID.x
1689                */
1690               operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1691               operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID;
1692               operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
1693               operand0.mask = 0;
1694               emit_dword(emit, operand0.value);
1695               return;
1696            }
1697            else {
1698               /* There is no control point ID input declaration in
1699                * the patch constant phase in hull shader.
1700                * Since for now we are emitting all instructions in
1701                * the patch constant phase, we are replacing the
1702                * control point ID reference with the immediate 0.
1703                */
1704               file = TGSI_FILE_IMMEDIATE;
1705               index = emit->tcs.imm_index;
1706               swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_W;
1707            }
1708         }
1709         else if (index == emit->tcs.prim_id_index) {
1710            /**
1711             * Emitted as vPrim.x
1712             */
1713            operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1714            operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
1715            index = 0;
1716         }
1717      }
1718      else if (file == TGSI_FILE_INPUT) {
1719         index = emit->linkage.input_map[index];
1720         if (!emit->tcs.control_point_phase) {
1721            /* Emitted as vicp */
1722            operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1723            operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT;
1724            assert(reg->Register.Dimension);
1725         }
1726      }
1727      else if (file == TGSI_FILE_OUTPUT) {
1728         if ((index >= emit->tcs.patch_generic_out_index &&
1729             index < (emit->tcs.patch_generic_out_index +
1730                      emit->tcs.patch_generic_out_count)) ||
1731             index == emit->tcs.inner.tgsi_index ||
1732             index == emit->tcs.outer.tgsi_index) {
1733            if (emit->tcs.control_point_phase) {
1734               emit->discard_instruction = TRUE;
1735            }
1736            else {
1737               /* Device doesn't allow reading from output so
1738                * use corresponding temporary register as source */
1739               file = TGSI_FILE_TEMPORARY;
1740               if (index == emit->tcs.inner.tgsi_index) {
1741                  index = emit->tcs.inner.temp_index;
1742               }
1743               else if (index == emit->tcs.outer.tgsi_index) {
1744                  index = emit->tcs.outer.temp_index;
1745               }
1746               else {
1747                  index = emit->tcs.patch_generic_tmp_index +
1748                          (index - emit->tcs.patch_generic_out_index);
1749               }
1750
1751               /**
1752                * Temporaries for patch constant data can be done
1753                * as indexable temporaries.
1754                */
1755               tempArrayId = get_temp_array_id(emit, file, index);
1756               index2d = tempArrayId > 0;
1757               index2 = tempArrayId > 0 ? tempArrayId : reg->Dimension.Index;
1758            }
1759         }
1760         else if (index2d) {
1761            if (emit->tcs.control_point_phase) {
1762               /* Device doesn't allow reading from output so
1763                * use corresponding temporary register as source */
1764               file = TGSI_FILE_TEMPORARY;
1765               index2d = FALSE;
1766               index = emit->tcs.control_point_tmp_index +
1767                       (index - emit->tcs.control_point_out_index);
1768            }
1769            else {
1770               emit->discard_instruction = TRUE;
1771            }
1772         }
1773      }
1774   }
1775   else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
1776      if (file == TGSI_FILE_SYSTEM_VALUE) {
1777         if (index == emit->tes.tesscoord_sys_index) {
1778            /**
1779             * Emitted as vDomain
1780             */
1781            operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1782            operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT;
1783            index = 0;
1784
1785            /* Make sure swizzles are of those components allowed according
1786             * to the tessellator domain.
1787             */
1788            swizzleX = MIN2(swizzleX, emit->tes.swizzle_max);
1789            swizzleY = MIN2(swizzleY, emit->tes.swizzle_max);
1790            swizzleZ = MIN2(swizzleZ, emit->tes.swizzle_max);
1791            swizzleW = MIN2(swizzleW, emit->tes.swizzle_max);
1792         }
1793         else if (index == emit->tes.inner.tgsi_index) {
1794            file = TGSI_FILE_TEMPORARY;
1795            index = emit->tes.inner.temp_index;
1796         }
1797         else if (index == emit->tes.outer.tgsi_index) {
1798            file = TGSI_FILE_TEMPORARY;
1799            index = emit->tes.outer.temp_index;
1800         }
1801         else if (index == emit->tes.prim_id_index) {
1802            /**
1803             * Emitted as vPrim.x
1804             */
1805            operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1806            operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
1807            index = 0;
1808         }
1809
1810      }
1811      else if (file == TGSI_FILE_INPUT) {
1812         if (index2d) {
1813            /* 2D input is emitted as vcp (input control point). */
1814            operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT;
1815            operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1816
1817            /* index specifies the element index and is remapped
1818             * to align with the tcs output index.
1819             */
1820            index = emit->linkage.input_map[index];
1821
1822            assert(index2 < emit->key.tes.vertices_per_patch);
1823         }
1824         else {
1825            if (index < emit->key.tes.tessfactor_index)
1826               /* index specifies the generic patch index.
1827                * Remapped to match up with the tcs output index.
1828                */
1829               index = emit->linkage.input_map[index];
1830
1831            operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT;
1832            operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1833         }
1834      }
1835   }
1836   else if (emit->unit == PIPE_SHADER_COMPUTE) {
1837      if (file == TGSI_FILE_SYSTEM_VALUE) {
1838         if (index == emit->cs.thread_id_index) {
1839            operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1840            operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP;
1841            index = 0;
1842         } else if (index == emit->cs.block_id_index) {
1843            operand0.value = 0;
1844            operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1845            operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_THREAD_GROUP_ID;
1846            operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
1847            operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
1848            operand0.swizzleX = swizzleX;
1849            operand0.swizzleY = swizzleY;
1850            operand0.swizzleZ = swizzleZ;
1851            operand0.swizzleW = swizzleW;
1852            emit_dword(emit, operand0.value);
1853            return;
1854         } else if (index == emit->cs.grid_size.tgsi_index) {
1855            file = TGSI_FILE_IMMEDIATE;
1856            index = emit->cs.grid_size.imm_index;
1857         }
1858      }
1859   }
1860
1861   if (file == TGSI_FILE_ADDRESS) {
1862      index = emit->address_reg_index[index];
1863      file = TGSI_FILE_TEMPORARY;
1864   }
1865
1866   if (file == TGSI_FILE_CONSTANT) {
1867      /**
1868       * If this constant buffer is to be bound as srv raw buffer,
1869       * then we have to load the constant to a temp first before
1870       * it can be used as a source in the instruction.
1871       * This is accomplished in two passes. The first pass is to
1872       * identify if there is any constbuf to rawbuf translation.
1873       * If there isn't, emit the instruction as usual.
1874       * If there is, then we save the constant buffer reference info,
1875       * and then instead of emitting the instruction at the end
1876       * of the instruction, it will trigger a second pass of parsing
1877       * this instruction. Before it starts the parsing, it will
1878       * load the referenced raw buffer elements to temporaries.
1879       * Then it will emit the instruction that replaces the
1880       * constant buffer replaces with the corresponding temporaries.
1881       */
1882      if (emit->raw_bufs & (1 << index2)) {
1883         if (emit->reemit_rawbuf_instruction != REEMIT_IN_PROGRESS) {
1884            unsigned tmpIdx = emit->raw_buf_cur_tmp_index;
1885
1886            emit->raw_buf_tmp[tmpIdx].buffer_index = index2;
1887
1888            /* Save whether the element index is indirect indexing */
1889            emit->raw_buf_tmp[tmpIdx].indirect = indirect;
1890
1891            /* If it is indirect index, save the temporary
1892             * address index, otherwise, save the immediate index.
1893             */
1894            if (indirect) {
1895               emit->raw_buf_tmp[tmpIdx].element_index =
1896                  emit->address_reg_index[reg->Indirect.Index];
1897               emit->raw_buf_tmp[tmpIdx].element_rel =
1898                  reg->Register.Index;
1899            }
1900            else {
1901               emit->raw_buf_tmp[tmpIdx].element_index = index;
1902               emit->raw_buf_tmp[tmpIdx].element_rel = 0;
1903            }
1904
1905            emit->raw_buf_cur_tmp_index++;
1906            emit->reemit_rawbuf_instruction = REEMIT_TRUE;
1907            emit->discard_instruction = TRUE;
1908            emit->reemit_tgsi_instruction = TRUE;
1909         }
1910         else {
1911            /* In the reemitting process, replace the constant buffer
1912             * reference with temporary.
1913             */
1914            file = TGSI_FILE_TEMPORARY;
1915            index = emit->raw_buf_cur_tmp_index + emit->raw_buf_tmp_index;
1916            index2d = FALSE;
1917            indirect = FALSE;
1918            emit->raw_buf_cur_tmp_index++;
1919         }
1920      }
1921   }
1922
1923   if (file == TGSI_FILE_TEMPORARY) {
1924      if (need_temp_reg_initialization(emit, index)) {
1925         emit->initialize_temp_index = index;
1926         emit->discard_instruction = TRUE;
1927      }
1928   }
1929
1930   if (operand0.value == 0) {
1931      /* if operand0 was not set above for a special case, do the general
1932       * case now.
1933       */
1934      operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1935      operand0.operandType = translate_register_file(file, tempArrayId > 0);
1936   }
1937   operand0 = setup_operand0_indexing(emit, operand0, file, indirect,
1938                                      index2d, indirect2d);
1939
1940   if (operand0.operandType != VGPU10_OPERAND_TYPE_IMMEDIATE32 &&
1941       operand0.operandType != VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) {
1942      /* there's no swizzle for in-line immediates */
1943      if (swizzleX == swizzleY &&
1944          swizzleX == swizzleZ &&
1945          swizzleX == swizzleW) {
1946         operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
1947      }
1948      else {
1949         operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
1950      }
1951
1952      operand0.swizzleX = swizzleX;
1953      operand0.swizzleY = swizzleY;
1954      operand0.swizzleZ = swizzleZ;
1955      operand0.swizzleW = swizzleW;
1956
1957      if (absolute || negate) {
1958         operand0.extended = 1;
1959         operand1.extendedOperandType = VGPU10_EXTENDED_OPERAND_MODIFIER;
1960         if (absolute && !negate)
1961            operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABS;
1962         if (!absolute && negate)
1963            operand1.operandModifier = VGPU10_OPERAND_MODIFIER_NEG;
1964         if (absolute && negate)
1965            operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABSNEG;
1966      }
1967   }
1968
1969   check_register_index(emit, operand0.operandType, index);
1970
1971   /* Emit the operand tokens */
1972   emit_dword(emit, operand0.value);
1973   if (operand0.extended)
1974      emit_dword(emit, operand1.value);
1975
1976   if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32) {
1977      /* Emit the four float/int in-line immediate values */
1978      unsigned *c;
1979      assert(index < ARRAY_SIZE(emit->immediates));
1980      assert(file == TGSI_FILE_IMMEDIATE);
1981      assert(swizzleX < 4);
1982      assert(swizzleY < 4);
1983      assert(swizzleZ < 4);
1984      assert(swizzleW < 4);
1985      c = (unsigned *) emit->immediates[index];
1986      emit_dword(emit, c[swizzleX]);
1987      emit_dword(emit, c[swizzleY]);
1988      emit_dword(emit, c[swizzleZ]);
1989      emit_dword(emit, c[swizzleW]);
1990   }
1991   else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_1D) {
1992      /* Emit the register index(es) */
1993      if (index2d) {
1994         emit_dword(emit, index2);
1995
1996         if (indirect2d) {
1997            emit_indirect_register(emit, reg->DimIndirect.Index);
1998         }
1999      }
2000
2001      emit_dword(emit, remap_temp_index(emit, file, index));
2002
2003      if (indirect) {
2004         assert(operand0.operandType != VGPU10_OPERAND_TYPE_TEMP);
2005         emit_indirect_register(emit, reg->Indirect.Index);
2006      }
2007   }
2008}
2009
2010
2011/**
2012 * Emit a resource operand (for use with a SAMPLE instruction).
2013 */
2014static void
2015emit_resource_register(struct svga_shader_emitter_v10 *emit,
2016                       unsigned resource_number)
2017{
2018   VGPU10OperandToken0 operand0;
2019
2020   check_register_index(emit, VGPU10_OPERAND_TYPE_RESOURCE, resource_number);
2021
2022   /* init */
2023   operand0.value = 0;
2024
2025   operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE;
2026   operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
2027   operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
2028   operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
2029   operand0.swizzleX = VGPU10_COMPONENT_X;
2030   operand0.swizzleY = VGPU10_COMPONENT_Y;
2031   operand0.swizzleZ = VGPU10_COMPONENT_Z;
2032   operand0.swizzleW = VGPU10_COMPONENT_W;
2033
2034   emit_dword(emit, operand0.value);
2035   emit_dword(emit, resource_number);
2036}
2037
2038
2039/**
2040 * Emit a sampler operand (for use with a SAMPLE instruction).
2041 */
2042static void
2043emit_sampler_register(struct svga_shader_emitter_v10 *emit,
2044                      unsigned unit)
2045{
2046   VGPU10OperandToken0 operand0;
2047   unsigned sampler_number;
2048
2049   sampler_number = emit->key.tex[unit].sampler_index;
2050
2051   if ((emit->shadow_compare_units & (1 << unit)) && emit->use_sampler_state_mapping)
2052      sampler_number++;
2053
2054   check_register_index(emit, VGPU10_OPERAND_TYPE_SAMPLER, sampler_number);
2055
2056   /* init */
2057   operand0.value = 0;
2058
2059   operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER;
2060   operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
2061
2062   emit_dword(emit, operand0.value);
2063   emit_dword(emit, sampler_number);
2064}
2065
2066
2067/**
2068 * Emit an operand which reads the IS_FRONT_FACING register.
2069 */
2070static void
2071emit_face_register(struct svga_shader_emitter_v10 *emit)
2072{
2073   VGPU10OperandToken0 operand0;
2074   unsigned index = emit->linkage.input_map[emit->fs.face_input_index];
2075
2076   /* init */
2077   operand0.value = 0;
2078
2079   operand0.operandType = VGPU10_OPERAND_TYPE_INPUT;
2080   operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
2081   operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
2082   operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
2083
2084   operand0.swizzleX = VGPU10_COMPONENT_X;
2085   operand0.swizzleY = VGPU10_COMPONENT_X;
2086   operand0.swizzleZ = VGPU10_COMPONENT_X;
2087   operand0.swizzleW = VGPU10_COMPONENT_X;
2088
2089   emit_dword(emit, operand0.value);
2090   emit_dword(emit, index);
2091}
2092
2093
2094/**
2095 * Emit tokens for the "rasterizer" register used by the SAMPLE_POS
2096 * instruction.
2097 */
2098static void
2099emit_rasterizer_register(struct svga_shader_emitter_v10 *emit)
2100{
2101   VGPU10OperandToken0 operand0;
2102
2103   /* init */
2104   operand0.value = 0;
2105
2106   /* No register index for rasterizer index (there's only one) */
2107   operand0.operandType = VGPU10_OPERAND_TYPE_RASTERIZER;
2108   operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
2109   operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
2110   operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
2111   operand0.swizzleX = VGPU10_COMPONENT_X;
2112   operand0.swizzleY = VGPU10_COMPONENT_Y;
2113   operand0.swizzleZ = VGPU10_COMPONENT_Z;
2114   operand0.swizzleW = VGPU10_COMPONENT_W;
2115
2116   emit_dword(emit, operand0.value);
2117}
2118
2119
2120/**
2121 * Emit tokens for the "stream" register used by the
2122 * DCL_STREAM, CUT_STREAM, EMIT_STREAM instructions.
2123 */
2124static void
2125emit_stream_register(struct svga_shader_emitter_v10 *emit, unsigned index)
2126{
2127   VGPU10OperandToken0 operand0;
2128
2129   /* init */
2130   operand0.value = 0;
2131
2132   /* No register index for rasterizer index (there's only one) */
2133   operand0.operandType = VGPU10_OPERAND_TYPE_STREAM;
2134   operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
2135   operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
2136
2137   emit_dword(emit, operand0.value);
2138   emit_dword(emit, index);
2139}
2140
2141
2142/**
2143 * Emit the token for a VGPU10 opcode, with precise parameter.
2144 * \param saturate   clamp result to [0,1]?
2145 */
2146static void
2147emit_opcode_precise(struct svga_shader_emitter_v10 *emit,
2148                    unsigned vgpu10_opcode, boolean saturate, boolean precise)
2149{
2150   VGPU10OpcodeToken0 token0;
2151
2152   token0.value = 0;  /* init all fields to zero */
2153   token0.opcodeType = vgpu10_opcode;
2154   token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
2155   token0.saturate = saturate;
2156
2157   /* Mesa's GLSL IR -> TGSI translator will set the TGSI precise flag for
2158    * 'invariant' declarations.  Only set preciseValues=1 if we have SM5.
2159    */
2160   token0.preciseValues = precise && emit->version >= 50;
2161
2162   emit_dword(emit, token0.value);
2163
2164   emit->uses_precise_qualifier |= token0.preciseValues;
2165}
2166
2167
2168/**
2169 * Emit the token for a VGPU10 opcode.
2170 * \param saturate   clamp result to [0,1]?
2171 */
2172static void
2173emit_opcode(struct svga_shader_emitter_v10 *emit,
2174            unsigned vgpu10_opcode, boolean saturate)
2175{
2176   emit_opcode_precise(emit, vgpu10_opcode, saturate, FALSE);
2177}
2178
2179
2180/**
2181 * Emit the token for a VGPU10 resinfo instruction.
2182 * \param modifier   return type modifier, _uint or _rcpFloat.
2183 *                   TODO: We may want to remove this parameter if it will
2184 *                   only ever be used as _uint.
2185 */
2186static void
2187emit_opcode_resinfo(struct svga_shader_emitter_v10 *emit,
2188                    VGPU10_RESINFO_RETURN_TYPE modifier)
2189{
2190   VGPU10OpcodeToken0 token0;
2191
2192   token0.value = 0;  /* init all fields to zero */
2193   token0.opcodeType = VGPU10_OPCODE_RESINFO;
2194   token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
2195   token0.resinfoReturnType = modifier;
2196
2197   emit_dword(emit, token0.value);
2198}
2199
2200
2201/**
2202 * Emit opcode tokens for a texture sample instruction.  Texture instructions
2203 * can be rather complicated (texel offsets, etc) so we have this specialized
2204 * function.
2205 */
2206static void
2207emit_sample_opcode(struct svga_shader_emitter_v10 *emit,
2208                   unsigned vgpu10_opcode, boolean saturate,
2209                   const int offsets[3])
2210{
2211   VGPU10OpcodeToken0 token0;
2212   VGPU10OpcodeToken1 token1;
2213
2214   token0.value = 0;  /* init all fields to zero */
2215   token0.opcodeType = vgpu10_opcode;
2216   token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
2217   token0.saturate = saturate;
2218
2219   if (offsets[0] || offsets[1] || offsets[2]) {
2220      assert(offsets[0] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
2221      assert(offsets[1] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
2222      assert(offsets[2] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
2223      assert(offsets[0] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
2224      assert(offsets[1] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
2225      assert(offsets[2] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
2226
2227      token0.extended = 1;
2228      token1.value = 0;
2229      token1.opcodeType = VGPU10_EXTENDED_OPCODE_SAMPLE_CONTROLS;
2230      token1.offsetU = offsets[0];
2231      token1.offsetV = offsets[1];
2232      token1.offsetW = offsets[2];
2233   }
2234
2235   emit_dword(emit, token0.value);
2236   if (token0.extended) {
2237      emit_dword(emit, token1.value);
2238   }
2239}
2240
2241
2242/**
2243 * Emit a DISCARD opcode token.
2244 * If nonzero is set, we'll discard the fragment if the X component is not 0.
2245 * Otherwise, we'll discard the fragment if the X component is 0.
2246 */
2247static void
2248emit_discard_opcode(struct svga_shader_emitter_v10 *emit, boolean nonzero)
2249{
2250   VGPU10OpcodeToken0 opcode0;
2251
2252   opcode0.value = 0;
2253   opcode0.opcodeType = VGPU10_OPCODE_DISCARD;
2254   if (nonzero)
2255      opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO;
2256
2257   emit_dword(emit, opcode0.value);
2258}
2259
2260
2261/**
2262 * We need to call this before we begin emitting a VGPU10 instruction.
2263 */
2264static void
2265begin_emit_instruction(struct svga_shader_emitter_v10 *emit)
2266{
2267   assert(emit->inst_start_token == 0);
2268   /* Save location of the instruction's VGPU10OpcodeToken0 token.
2269    * Note, we can't save a pointer because it would become invalid if
2270    * we have to realloc the output buffer.
2271    */
2272   emit->inst_start_token = emit_get_num_tokens(emit);
2273}
2274
2275
2276/**
2277 * We need to call this after we emit the last token of a VGPU10 instruction.
2278 * This function patches in the opcode token's instructionLength field.
2279 */
2280static void
2281end_emit_instruction(struct svga_shader_emitter_v10 *emit)
2282{
2283   VGPU10OpcodeToken0 *tokens = (VGPU10OpcodeToken0 *) emit->buf;
2284   unsigned inst_length;
2285
2286   assert(emit->inst_start_token > 0);
2287
2288   if (emit->discard_instruction) {
2289      /* Back up the emit->ptr to where this instruction started so
2290       * that we discard the current instruction.
2291       */
2292      emit->ptr = (char *) (tokens + emit->inst_start_token);
2293   }
2294   else {
2295      /* Compute instruction length and patch that into the start of
2296       * the instruction.
2297       */
2298      inst_length = emit_get_num_tokens(emit) - emit->inst_start_token;
2299
2300      assert(inst_length > 0);
2301
2302      tokens[emit->inst_start_token].instructionLength = inst_length;
2303   }
2304
2305   emit->inst_start_token = 0; /* reset to zero for error checking */
2306   emit->discard_instruction = FALSE;
2307}
2308
2309
2310/**
2311 * Return index for a free temporary register.
2312 */
2313static unsigned
2314get_temp_index(struct svga_shader_emitter_v10 *emit)
2315{
2316   assert(emit->internal_temp_count < MAX_INTERNAL_TEMPS);
2317   return emit->num_shader_temps + emit->internal_temp_count++;
2318}
2319
2320
2321/**
2322 * Release the temporaries which were generated by get_temp_index().
2323 */
2324static void
2325free_temp_indexes(struct svga_shader_emitter_v10 *emit)
2326{
2327   emit->internal_temp_count = 0;
2328}
2329
2330
2331/**
2332 * Create a tgsi_full_src_register.
2333 */
2334static struct tgsi_full_src_register
2335make_src_reg(enum tgsi_file_type file, unsigned index)
2336{
2337   struct tgsi_full_src_register reg;
2338
2339   memset(&reg, 0, sizeof(reg));
2340   reg.Register.File = file;
2341   reg.Register.Index = index;
2342   reg.Register.SwizzleX = TGSI_SWIZZLE_X;
2343   reg.Register.SwizzleY = TGSI_SWIZZLE_Y;
2344   reg.Register.SwizzleZ = TGSI_SWIZZLE_Z;
2345   reg.Register.SwizzleW = TGSI_SWIZZLE_W;
2346   return reg;
2347}
2348
2349
2350/**
2351 * Create a tgsi_full_src_register with a swizzle such that all four
2352 * vector components have the same scalar value.
2353 */
2354static struct tgsi_full_src_register
2355make_src_scalar_reg(enum tgsi_file_type file, unsigned index, unsigned component)
2356{
2357   struct tgsi_full_src_register reg;
2358
2359   assert(component >= TGSI_SWIZZLE_X);
2360   assert(component <= TGSI_SWIZZLE_W);
2361
2362   memset(&reg, 0, sizeof(reg));
2363   reg.Register.File = file;
2364   reg.Register.Index = index;
2365   reg.Register.SwizzleX =
2366   reg.Register.SwizzleY =
2367   reg.Register.SwizzleZ =
2368   reg.Register.SwizzleW = component;
2369   return reg;
2370}
2371
2372
2373/**
2374 * Create a tgsi_full_src_register for a temporary.
2375 */
2376static struct tgsi_full_src_register
2377make_src_temp_reg(unsigned index)
2378{
2379   return make_src_reg(TGSI_FILE_TEMPORARY, index);
2380}
2381
2382
2383/**
2384 * Create a tgsi_full_src_register for a constant.
2385 */
2386static struct tgsi_full_src_register
2387make_src_const_reg(unsigned index)
2388{
2389   return make_src_reg(TGSI_FILE_CONSTANT, index);
2390}
2391
2392
2393/**
2394 * Create a tgsi_full_src_register for an immediate constant.
2395 */
2396static struct tgsi_full_src_register
2397make_src_immediate_reg(unsigned index)
2398{
2399   return make_src_reg(TGSI_FILE_IMMEDIATE, index);
2400}
2401
2402
2403/**
2404 * Create a tgsi_full_dst_register.
2405 */
2406static struct tgsi_full_dst_register
2407make_dst_reg(enum tgsi_file_type file, unsigned index)
2408{
2409   struct tgsi_full_dst_register reg;
2410
2411   memset(&reg, 0, sizeof(reg));
2412   reg.Register.File = file;
2413   reg.Register.Index = index;
2414   reg.Register.WriteMask = TGSI_WRITEMASK_XYZW;
2415   return reg;
2416}
2417
2418
2419/**
2420 * Create a tgsi_full_dst_register for a temporary.
2421 */
2422static struct tgsi_full_dst_register
2423make_dst_temp_reg(unsigned index)
2424{
2425   return make_dst_reg(TGSI_FILE_TEMPORARY, index);
2426}
2427
2428
2429/**
2430 * Create a tgsi_full_dst_register for an output.
2431 */
2432static struct tgsi_full_dst_register
2433make_dst_output_reg(unsigned index)
2434{
2435   return make_dst_reg(TGSI_FILE_OUTPUT, index);
2436}
2437
2438
2439/**
2440 * Create negated tgsi_full_src_register.
2441 */
2442static struct tgsi_full_src_register
2443negate_src(const struct tgsi_full_src_register *reg)
2444{
2445   struct tgsi_full_src_register neg = *reg;
2446   neg.Register.Negate = !reg->Register.Negate;
2447   return neg;
2448}
2449
2450/**
2451 * Create absolute value of a tgsi_full_src_register.
2452 */
2453static struct tgsi_full_src_register
2454absolute_src(const struct tgsi_full_src_register *reg)
2455{
2456   struct tgsi_full_src_register absolute = *reg;
2457   absolute.Register.Absolute = 1;
2458   return absolute;
2459}
2460
2461
2462/** Return the named swizzle term from the src register */
2463static inline unsigned
2464get_swizzle(const struct tgsi_full_src_register *reg, enum tgsi_swizzle term)
2465{
2466   switch (term) {
2467   case TGSI_SWIZZLE_X:
2468      return reg->Register.SwizzleX;
2469   case TGSI_SWIZZLE_Y:
2470      return reg->Register.SwizzleY;
2471   case TGSI_SWIZZLE_Z:
2472      return reg->Register.SwizzleZ;
2473   case TGSI_SWIZZLE_W:
2474      return reg->Register.SwizzleW;
2475   default:
2476      assert(!"Bad swizzle");
2477      return TGSI_SWIZZLE_X;
2478   }
2479}
2480
2481
2482/**
2483 * Create swizzled tgsi_full_src_register.
2484 */
2485static struct tgsi_full_src_register
2486swizzle_src(const struct tgsi_full_src_register *reg,
2487            enum tgsi_swizzle swizzleX, enum tgsi_swizzle swizzleY,
2488            enum tgsi_swizzle swizzleZ, enum tgsi_swizzle swizzleW)
2489{
2490   struct tgsi_full_src_register swizzled = *reg;
2491   /* Note: we swizzle the current swizzle */
2492   swizzled.Register.SwizzleX = get_swizzle(reg, swizzleX);
2493   swizzled.Register.SwizzleY = get_swizzle(reg, swizzleY);
2494   swizzled.Register.SwizzleZ = get_swizzle(reg, swizzleZ);
2495   swizzled.Register.SwizzleW = get_swizzle(reg, swizzleW);
2496   return swizzled;
2497}
2498
2499
2500/**
2501 * Create swizzled tgsi_full_src_register where all the swizzle
2502 * terms are the same.
2503 */
2504static struct tgsi_full_src_register
2505scalar_src(const struct tgsi_full_src_register *reg, enum tgsi_swizzle swizzle)
2506{
2507   struct tgsi_full_src_register swizzled = *reg;
2508   /* Note: we swizzle the current swizzle */
2509   swizzled.Register.SwizzleX =
2510   swizzled.Register.SwizzleY =
2511   swizzled.Register.SwizzleZ =
2512   swizzled.Register.SwizzleW = get_swizzle(reg, swizzle);
2513   return swizzled;
2514}
2515
2516
2517/**
2518 * Create new tgsi_full_dst_register with writemask.
2519 * \param mask  bitmask of TGSI_WRITEMASK_[XYZW]
2520 */
2521static struct tgsi_full_dst_register
2522writemask_dst(const struct tgsi_full_dst_register *reg, unsigned mask)
2523{
2524   struct tgsi_full_dst_register masked = *reg;
2525   masked.Register.WriteMask = mask;
2526   return masked;
2527}
2528
2529
2530/**
2531 * Check if the register's swizzle is XXXX, YYYY, ZZZZ, or WWWW.
2532 */
2533static boolean
2534same_swizzle_terms(const struct tgsi_full_src_register *reg)
2535{
2536   return (reg->Register.SwizzleX == reg->Register.SwizzleY &&
2537           reg->Register.SwizzleY == reg->Register.SwizzleZ &&
2538           reg->Register.SwizzleZ == reg->Register.SwizzleW);
2539}
2540
2541
2542/**
2543 * Search the vector for the value 'x' and return its position.
2544 */
2545static int
2546find_imm_in_vec4(const union tgsi_immediate_data vec[4],
2547                 union tgsi_immediate_data x)
2548{
2549   unsigned i;
2550   for (i = 0; i < 4; i++) {
2551      if (vec[i].Int == x.Int)
2552         return i;
2553   }
2554   return -1;
2555}
2556
2557
2558/**
2559 * Helper used by make_immediate_reg(), make_immediate_reg_4().
2560 */
2561static int
2562find_immediate(struct svga_shader_emitter_v10 *emit,
2563               union tgsi_immediate_data x, unsigned startIndex)
2564{
2565   const unsigned endIndex = emit->num_immediates;
2566   unsigned i;
2567
2568   assert(emit->immediates_emitted);
2569
2570   /* Search immediates for x, y, z, w */
2571   for (i = startIndex; i < endIndex; i++) {
2572      if (x.Int == emit->immediates[i][0].Int ||
2573          x.Int == emit->immediates[i][1].Int ||
2574          x.Int == emit->immediates[i][2].Int ||
2575          x.Int == emit->immediates[i][3].Int) {
2576         return i;
2577      }
2578   }
2579   /* Should never try to use an immediate value that wasn't pre-declared */
2580   assert(!"find_immediate() failed!");
2581   return -1;
2582}
2583
2584
2585/**
2586 * As above, but search for a double[2] pair.
2587 */
2588static int
2589find_immediate_dbl(struct svga_shader_emitter_v10 *emit,
2590                   double x, double y)
2591{
2592   const unsigned endIndex = emit->num_immediates;
2593   unsigned i;
2594
2595   assert(emit->immediates_emitted);
2596
2597   /* Search immediates for x, y, z, w */
2598   for (i = 0; i < endIndex; i++) {
2599      if (x == emit->immediates_dbl[i][0] &&
2600          y == emit->immediates_dbl[i][1]) {
2601         return i;
2602      }
2603   }
2604   /* Should never try to use an immediate value that wasn't pre-declared */
2605   assert(!"find_immediate_dbl() failed!");
2606   return -1;
2607}
2608
2609
2610
2611/**
2612 * Return a tgsi_full_src_register for an immediate/literal
2613 * union tgsi_immediate_data[4] value.
2614 * Note: the values must have been previously declared/allocated in
2615 * emit_pre_helpers().  And, all of x,y,z,w must be located in the same
2616 * vec4 immediate.
2617 */
2618static struct tgsi_full_src_register
2619make_immediate_reg_4(struct svga_shader_emitter_v10 *emit,
2620                     const union tgsi_immediate_data imm[4])
2621{
2622   struct tgsi_full_src_register reg;
2623   unsigned i;
2624
2625   for (i = 0; i < emit->num_common_immediates; i++) {
2626      /* search for first component value */
2627      int immpos = find_immediate(emit, imm[0], i);
2628      int x, y, z, w;
2629
2630      assert(immpos >= 0);
2631
2632      /* find remaining components within the immediate vector */
2633      x = find_imm_in_vec4(emit->immediates[immpos], imm[0]);
2634      y = find_imm_in_vec4(emit->immediates[immpos], imm[1]);
2635      z = find_imm_in_vec4(emit->immediates[immpos], imm[2]);
2636      w = find_imm_in_vec4(emit->immediates[immpos], imm[3]);
2637
2638      if (x >=0 &&  y >= 0 && z >= 0 && w >= 0) {
2639         /* found them all */
2640         memset(&reg, 0, sizeof(reg));
2641         reg.Register.File = TGSI_FILE_IMMEDIATE;
2642         reg.Register.Index = immpos;
2643         reg.Register.SwizzleX = x;
2644         reg.Register.SwizzleY = y;
2645         reg.Register.SwizzleZ = z;
2646         reg.Register.SwizzleW = w;
2647         return reg;
2648      }
2649      /* else, keep searching */
2650   }
2651
2652   assert(!"Failed to find immediate register!");
2653
2654   /* Just return IMM[0].xxxx */
2655   memset(&reg, 0, sizeof(reg));
2656   reg.Register.File = TGSI_FILE_IMMEDIATE;
2657   return reg;
2658}
2659
2660
2661/**
2662 * Return a tgsi_full_src_register for an immediate/literal
2663 * union tgsi_immediate_data value of the form {value, value, value, value}.
2664 * \sa make_immediate_reg_4() regarding allowed values.
2665 */
2666static struct tgsi_full_src_register
2667make_immediate_reg(struct svga_shader_emitter_v10 *emit,
2668                   union tgsi_immediate_data value)
2669{
2670   struct tgsi_full_src_register reg;
2671   int immpos = find_immediate(emit, value, 0);
2672
2673   assert(immpos >= 0);
2674
2675   memset(&reg, 0, sizeof(reg));
2676   reg.Register.File = TGSI_FILE_IMMEDIATE;
2677   reg.Register.Index = immpos;
2678   reg.Register.SwizzleX =
2679   reg.Register.SwizzleY =
2680   reg.Register.SwizzleZ =
2681   reg.Register.SwizzleW = find_imm_in_vec4(emit->immediates[immpos], value);
2682
2683   return reg;
2684}
2685
2686
2687/**
2688 * Return a tgsi_full_src_register for an immediate/literal float[4] value.
2689 * \sa make_immediate_reg_4() regarding allowed values.
2690 */
2691static struct tgsi_full_src_register
2692make_immediate_reg_float4(struct svga_shader_emitter_v10 *emit,
2693                          float x, float y, float z, float w)
2694{
2695   union tgsi_immediate_data imm[4];
2696   imm[0].Float = x;
2697   imm[1].Float = y;
2698   imm[2].Float = z;
2699   imm[3].Float = w;
2700   return make_immediate_reg_4(emit, imm);
2701}
2702
2703
2704/**
2705 * Return a tgsi_full_src_register for an immediate/literal float value
2706 * of the form {value, value, value, value}.
2707 * \sa make_immediate_reg_4() regarding allowed values.
2708 */
2709static struct tgsi_full_src_register
2710make_immediate_reg_float(struct svga_shader_emitter_v10 *emit, float value)
2711{
2712   union tgsi_immediate_data imm;
2713   imm.Float = value;
2714   return make_immediate_reg(emit, imm);
2715}
2716
2717
2718/**
2719 * Return a tgsi_full_src_register for an immediate/literal int[4] vector.
2720 */
2721static struct tgsi_full_src_register
2722make_immediate_reg_int4(struct svga_shader_emitter_v10 *emit,
2723                        int x, int y, int z, int w)
2724{
2725   union tgsi_immediate_data imm[4];
2726   imm[0].Int = x;
2727   imm[1].Int = y;
2728   imm[2].Int = z;
2729   imm[3].Int = w;
2730   return make_immediate_reg_4(emit, imm);
2731}
2732
2733
2734/**
2735 * Return a tgsi_full_src_register for an immediate/literal int value
2736 * of the form {value, value, value, value}.
2737 * \sa make_immediate_reg_4() regarding allowed values.
2738 */
2739static struct tgsi_full_src_register
2740make_immediate_reg_int(struct svga_shader_emitter_v10 *emit, int value)
2741{
2742   union tgsi_immediate_data imm;
2743   imm.Int = value;
2744   return make_immediate_reg(emit, imm);
2745}
2746
2747
2748static struct tgsi_full_src_register
2749make_immediate_reg_double(struct svga_shader_emitter_v10 *emit, double value)
2750{
2751   struct tgsi_full_src_register reg;
2752   int immpos = find_immediate_dbl(emit, value, value);
2753
2754   assert(immpos >= 0);
2755
2756   memset(&reg, 0, sizeof(reg));
2757   reg.Register.File = TGSI_FILE_IMMEDIATE;
2758   reg.Register.Index = immpos;
2759   reg.Register.SwizzleX = TGSI_SWIZZLE_X;
2760   reg.Register.SwizzleY = TGSI_SWIZZLE_Y;
2761   reg.Register.SwizzleZ = TGSI_SWIZZLE_Z;
2762   reg.Register.SwizzleW = TGSI_SWIZZLE_W;
2763
2764   return reg;
2765}
2766
2767
2768/**
2769 * Allocate space for a union tgsi_immediate_data[4] immediate.
2770 * \return  the index/position of the immediate.
2771 */
2772static unsigned
2773alloc_immediate_4(struct svga_shader_emitter_v10 *emit,
2774                  const union tgsi_immediate_data imm[4])
2775{
2776   unsigned n = emit->num_immediates++;
2777   assert(!emit->immediates_emitted);
2778   assert(n < ARRAY_SIZE(emit->immediates));
2779   emit->immediates[n][0] = imm[0];
2780   emit->immediates[n][1] = imm[1];
2781   emit->immediates[n][2] = imm[2];
2782   emit->immediates[n][3] = imm[3];
2783   return n;
2784}
2785
2786
2787/**
2788 * Allocate space for a float[4] immediate.
2789 * \return  the index/position of the immediate.
2790 */
2791static unsigned
2792alloc_immediate_float4(struct svga_shader_emitter_v10 *emit,
2793                       float x, float y, float z, float w)
2794{
2795   union tgsi_immediate_data imm[4];
2796   imm[0].Float = x;
2797   imm[1].Float = y;
2798   imm[2].Float = z;
2799   imm[3].Float = w;
2800   return alloc_immediate_4(emit, imm);
2801}
2802
2803
2804/**
2805 * Allocate space for an int[4] immediate.
2806 * \return  the index/position of the immediate.
2807 */
2808static unsigned
2809alloc_immediate_int4(struct svga_shader_emitter_v10 *emit,
2810                       int x, int y, int z, int w)
2811{
2812   union tgsi_immediate_data imm[4];
2813   imm[0].Int = x;
2814   imm[1].Int = y;
2815   imm[2].Int = z;
2816   imm[3].Int = w;
2817   return alloc_immediate_4(emit, imm);
2818}
2819
2820
2821static unsigned
2822alloc_immediate_double2(struct svga_shader_emitter_v10 *emit,
2823                        double x, double y)
2824{
2825   unsigned n = emit->num_immediates++;
2826   assert(!emit->immediates_emitted);
2827   assert(n < ARRAY_SIZE(emit->immediates));
2828   emit->immediates_dbl[n][0] = x;
2829   emit->immediates_dbl[n][1] = y;
2830   return n;
2831
2832}
2833
2834
2835/**
2836 * Allocate a shader input to store a system value.
2837 */
2838static unsigned
2839alloc_system_value_index(struct svga_shader_emitter_v10 *emit, unsigned index)
2840{
2841   const unsigned n = emit->linkage.input_map_max + 1 + index;
2842   assert(index < ARRAY_SIZE(emit->system_value_indexes));
2843   emit->system_value_indexes[index] = n;
2844   return n;
2845}
2846
2847
2848/**
2849 * Translate a TGSI immediate value (union tgsi_immediate_data[4]) to VGPU10.
2850 */
2851static boolean
2852emit_vgpu10_immediate(struct svga_shader_emitter_v10 *emit,
2853                      const struct tgsi_full_immediate *imm)
2854{
2855   /* We don't actually emit any code here.  We just save the
2856    * immediate values and emit them later.
2857    */
2858   alloc_immediate_4(emit, imm->u);
2859   return TRUE;
2860}
2861
2862
2863/**
2864 * Emit a VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER block
2865 * containing all the immediate values previously allocated
2866 * with alloc_immediate_4().
2867 */
2868static boolean
2869emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 *emit)
2870{
2871   VGPU10OpcodeToken0 token;
2872
2873   assert(!emit->immediates_emitted);
2874
2875   token.value = 0;
2876   token.opcodeType = VGPU10_OPCODE_CUSTOMDATA;
2877   token.customDataClass = VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER;
2878
2879   /* Note: no begin/end_emit_instruction() calls */
2880   emit_dword(emit, token.value);
2881   emit_dword(emit, 2 + 4 * emit->num_immediates);
2882   emit_dwords(emit, (unsigned *) emit->immediates, 4 * emit->num_immediates);
2883
2884   emit->immediates_emitted = TRUE;
2885
2886   return TRUE;
2887}
2888
2889
2890/**
2891 * Translate a fragment shader's TGSI_INTERPOLATE_x mode to a vgpu10
2892 * interpolation mode.
2893 * \return a VGPU10_INTERPOLATION_x value
2894 */
2895static unsigned
2896translate_interpolation(const struct svga_shader_emitter_v10 *emit,
2897                        enum tgsi_interpolate_mode interp,
2898                        enum tgsi_interpolate_loc interpolate_loc)
2899{
2900   if (interp == TGSI_INTERPOLATE_COLOR) {
2901      interp = emit->key.fs.flatshade ?
2902         TGSI_INTERPOLATE_CONSTANT : TGSI_INTERPOLATE_PERSPECTIVE;
2903   }
2904
2905   switch (interp) {
2906   case TGSI_INTERPOLATE_CONSTANT:
2907      return VGPU10_INTERPOLATION_CONSTANT;
2908   case TGSI_INTERPOLATE_LINEAR:
2909      if (interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID) {
2910         return VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID;
2911      } else if (interpolate_loc == TGSI_INTERPOLATE_LOC_SAMPLE &&
2912                 emit->version >= 41) {
2913         return VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE;
2914      } else {
2915         return VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE;
2916      }
2917      break;
2918   case TGSI_INTERPOLATE_PERSPECTIVE:
2919      if (interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID) {
2920         return VGPU10_INTERPOLATION_LINEAR_CENTROID;
2921      } else if (interpolate_loc == TGSI_INTERPOLATE_LOC_SAMPLE &&
2922                 emit->version >= 41) {
2923         return VGPU10_INTERPOLATION_LINEAR_SAMPLE;
2924      } else {
2925         return VGPU10_INTERPOLATION_LINEAR;
2926      }
2927      break;
2928   default:
2929      assert(!"Unexpected interpolation mode");
2930      return VGPU10_INTERPOLATION_CONSTANT;
2931   }
2932}
2933
2934
2935/**
2936 * Translate a TGSI property to VGPU10.
2937 * Don't emit any instructions yet, only need to gather the primitive property
2938 * information.  The output primitive topology might be changed later. The
2939 * final property instructions will be emitted as part of the pre-helper code.
2940 */
2941static boolean
2942emit_vgpu10_property(struct svga_shader_emitter_v10 *emit,
2943                     const struct tgsi_full_property *prop)
2944{
2945   static const VGPU10_PRIMITIVE primType[] = {
2946      VGPU10_PRIMITIVE_POINT,           /* PIPE_PRIM_POINTS */
2947      VGPU10_PRIMITIVE_LINE,            /* PIPE_PRIM_LINES */
2948      VGPU10_PRIMITIVE_LINE,            /* PIPE_PRIM_LINE_LOOP */
2949      VGPU10_PRIMITIVE_LINE,            /* PIPE_PRIM_LINE_STRIP */
2950      VGPU10_PRIMITIVE_TRIANGLE,        /* PIPE_PRIM_TRIANGLES */
2951      VGPU10_PRIMITIVE_TRIANGLE,        /* PIPE_PRIM_TRIANGLE_STRIP */
2952      VGPU10_PRIMITIVE_TRIANGLE,        /* PIPE_PRIM_TRIANGLE_FAN */
2953      VGPU10_PRIMITIVE_UNDEFINED,       /* PIPE_PRIM_QUADS */
2954      VGPU10_PRIMITIVE_UNDEFINED,       /* PIPE_PRIM_QUAD_STRIP */
2955      VGPU10_PRIMITIVE_UNDEFINED,       /* PIPE_PRIM_POLYGON */
2956      VGPU10_PRIMITIVE_LINE_ADJ,        /* PIPE_PRIM_LINES_ADJACENCY */
2957      VGPU10_PRIMITIVE_LINE_ADJ,        /* PIPE_PRIM_LINE_STRIP_ADJACENCY */
2958      VGPU10_PRIMITIVE_TRIANGLE_ADJ,    /* PIPE_PRIM_TRIANGLES_ADJACENCY */
2959      VGPU10_PRIMITIVE_TRIANGLE_ADJ     /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */
2960   };
2961
2962   static const VGPU10_PRIMITIVE_TOPOLOGY primTopology[] = {
2963      VGPU10_PRIMITIVE_TOPOLOGY_POINTLIST,     /* PIPE_PRIM_POINTS */
2964      VGPU10_PRIMITIVE_TOPOLOGY_LINELIST,      /* PIPE_PRIM_LINES */
2965      VGPU10_PRIMITIVE_TOPOLOGY_LINELIST,      /* PIPE_PRIM_LINE_LOOP */
2966      VGPU10_PRIMITIVE_TOPOLOGY_LINESTRIP,     /* PIPE_PRIM_LINE_STRIP */
2967      VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST,  /* PIPE_PRIM_TRIANGLES */
2968      VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_STRIP */
2969      VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_FAN */
2970      VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED,     /* PIPE_PRIM_QUADS */
2971      VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED,     /* PIPE_PRIM_QUAD_STRIP */
2972      VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED,     /* PIPE_PRIM_POLYGON */
2973      VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ,  /* PIPE_PRIM_LINES_ADJACENCY */
2974      VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ,  /* PIPE_PRIM_LINE_STRIP_ADJACENCY */
2975      VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ, /* PIPE_PRIM_TRIANGLES_ADJACENCY */
2976      VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */
2977   };
2978
2979   static const unsigned inputArraySize[] = {
2980      0,       /* VGPU10_PRIMITIVE_UNDEFINED */
2981      1,       /* VGPU10_PRIMITIVE_POINT */
2982      2,       /* VGPU10_PRIMITIVE_LINE */
2983      3,       /* VGPU10_PRIMITIVE_TRIANGLE */
2984      0,
2985      0,
2986      4,       /* VGPU10_PRIMITIVE_LINE_ADJ */
2987      6        /* VGPU10_PRIMITIVE_TRIANGLE_ADJ */
2988   };
2989
2990   switch (prop->Property.PropertyName) {
2991   case TGSI_PROPERTY_GS_INPUT_PRIM:
2992      assert(prop->u[0].Data < ARRAY_SIZE(primType));
2993      emit->gs.prim_type = primType[prop->u[0].Data];
2994      assert(emit->gs.prim_type != VGPU10_PRIMITIVE_UNDEFINED);
2995      emit->gs.input_size = inputArraySize[emit->gs.prim_type];
2996      break;
2997
2998   case TGSI_PROPERTY_GS_OUTPUT_PRIM:
2999      assert(prop->u[0].Data < ARRAY_SIZE(primTopology));
3000      emit->gs.prim_topology = primTopology[prop->u[0].Data];
3001      assert(emit->gs.prim_topology != VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED);
3002      break;
3003
3004   case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
3005      emit->gs.max_out_vertices = prop->u[0].Data;
3006      break;
3007
3008   case TGSI_PROPERTY_GS_INVOCATIONS:
3009      emit->gs.invocations = prop->u[0].Data;
3010      break;
3011
3012   case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
3013   case TGSI_PROPERTY_NEXT_SHADER:
3014   case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED:
3015      /* no-op */
3016      break;
3017
3018   case TGSI_PROPERTY_TCS_VERTICES_OUT:
3019      /* This info is already captured in the shader key */
3020      break;
3021
3022   case TGSI_PROPERTY_TES_PRIM_MODE:
3023      emit->tes.prim_mode = prop->u[0].Data;
3024      break;
3025
3026   case TGSI_PROPERTY_TES_SPACING:
3027      emit->tes.spacing = prop->u[0].Data;
3028      break;
3029
3030   case TGSI_PROPERTY_TES_VERTEX_ORDER_CW:
3031      emit->tes.vertices_order_cw = prop->u[0].Data;
3032      break;
3033
3034   case TGSI_PROPERTY_TES_POINT_MODE:
3035      emit->tes.point_mode = prop->u[0].Data;
3036      break;
3037
3038   case TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH:
3039      emit->cs.block_width = prop->u[0].Data;
3040      break;
3041
3042   case TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT:
3043      emit->cs.block_height = prop->u[0].Data;
3044      break;
3045
3046   case TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH:
3047      emit->cs.block_depth = prop->u[0].Data;
3048      break;
3049
3050   case TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL:
3051      emit->fs.forceEarlyDepthStencil = TRUE;
3052      break;
3053
3054   default:
3055      debug_printf("Unexpected TGSI property %s\n",
3056                   tgsi_property_names[prop->Property.PropertyName]);
3057   }
3058
3059   return TRUE;
3060}
3061
3062
3063static void
3064emit_property_instruction(struct svga_shader_emitter_v10 *emit,
3065                          VGPU10OpcodeToken0 opcode0, unsigned nData,
3066                          unsigned data)
3067{
3068   begin_emit_instruction(emit);
3069   emit_dword(emit, opcode0.value);
3070   if (nData)
3071      emit_dword(emit, data);
3072   end_emit_instruction(emit);
3073}
3074
3075
3076/**
3077 * Emit property instructions
3078 */
3079static void
3080emit_property_instructions(struct svga_shader_emitter_v10 *emit)
3081{
3082   VGPU10OpcodeToken0 opcode0;
3083
3084   assert(emit->unit == PIPE_SHADER_GEOMETRY);
3085
3086   /* emit input primitive type declaration */
3087   opcode0.value = 0;
3088   opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_INPUT_PRIMITIVE;
3089   opcode0.primitive = emit->gs.prim_type;
3090   emit_property_instruction(emit, opcode0, 0, 0);
3091
3092   /* emit max output vertices */
3093   opcode0.value = 0;
3094   opcode0.opcodeType = VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT;
3095   emit_property_instruction(emit, opcode0, 1, emit->gs.max_out_vertices);
3096
3097   if (emit->version >= 50 && emit->gs.invocations > 0) {
3098      opcode0.value = 0;
3099      opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_INSTANCE_COUNT;
3100      emit_property_instruction(emit, opcode0, 1, emit->gs.invocations);
3101   }
3102}
3103
3104
3105/**
3106 * A helper function to declare tessellator domain in a hull shader or
3107 * in the domain shader.
3108 */
3109static void
3110emit_tessellator_domain(struct svga_shader_emitter_v10 *emit,
3111                        enum pipe_prim_type prim_mode)
3112{
3113   VGPU10OpcodeToken0 opcode0;
3114
3115   opcode0.value = 0;
3116   opcode0.opcodeType = VGPU10_OPCODE_DCL_TESS_DOMAIN;
3117   switch (prim_mode) {
3118   case PIPE_PRIM_QUADS:
3119   case PIPE_PRIM_LINES:
3120      opcode0.tessDomain = VGPU10_TESSELLATOR_DOMAIN_QUAD;
3121      break;
3122   case PIPE_PRIM_TRIANGLES:
3123      opcode0.tessDomain = VGPU10_TESSELLATOR_DOMAIN_TRI;
3124      break;
3125   default:
3126      debug_printf("Invalid tessellator prim mode %d\n", prim_mode);
3127      opcode0.tessDomain = VGPU10_TESSELLATOR_DOMAIN_UNDEFINED;
3128   }
3129   begin_emit_instruction(emit);
3130   emit_dword(emit, opcode0.value);
3131   end_emit_instruction(emit);
3132}
3133
3134
3135/**
3136 * Emit domain shader declarations.
3137 */
3138static void
3139emit_domain_shader_declarations(struct svga_shader_emitter_v10 *emit)
3140{
3141   VGPU10OpcodeToken0 opcode0;
3142
3143   assert(emit->unit == PIPE_SHADER_TESS_EVAL);
3144
3145   /* Emit the input control point count */
3146   assert(emit->key.tes.vertices_per_patch >= 0 &&
3147          emit->key.tes.vertices_per_patch <= 32);
3148
3149   opcode0.value = 0;
3150   opcode0.opcodeType = VGPU10_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT;
3151   opcode0.controlPointCount = emit->key.tes.vertices_per_patch;
3152   begin_emit_instruction(emit);
3153   emit_dword(emit, opcode0.value);
3154   end_emit_instruction(emit);
3155
3156   emit_tessellator_domain(emit, emit->tes.prim_mode);
3157
3158   /* Specify a max for swizzles of the domain point according to the
3159    * tessellator domain type.
3160    */
3161   emit->tes.swizzle_max = emit->tes.prim_mode == PIPE_PRIM_TRIANGLES ?
3162                              TGSI_SWIZZLE_Z : TGSI_SWIZZLE_Y;
3163}
3164
3165
3166/**
3167 * Some common values like 0.0, 1.0, 0.5, etc. are frequently needed
3168 * to implement some instructions.  We pre-allocate those values here
3169 * in the immediate constant buffer.
3170 */
3171static void
3172alloc_common_immediates(struct svga_shader_emitter_v10 *emit)
3173{
3174   unsigned n = 0;
3175
3176   emit->common_immediate_pos[n++] =
3177      alloc_immediate_float4(emit, 0.0f, 1.0f, 0.5f, -1.0f);
3178
3179   if (emit->info.opcode_count[TGSI_OPCODE_LIT] > 0) {
3180      emit->common_immediate_pos[n++] =
3181         alloc_immediate_float4(emit, 128.0f, -128.0f, 0.0f, 0.0f);
3182   }
3183
3184   emit->common_immediate_pos[n++] =
3185      alloc_immediate_int4(emit, 0, 1, 2, -1);
3186
3187   emit->common_immediate_pos[n++] =
3188      alloc_immediate_int4(emit, 3, 4, 5, 6);
3189
3190   if (emit->info.opcode_count[TGSI_OPCODE_IMSB] > 0 ||
3191       emit->info.opcode_count[TGSI_OPCODE_UMSB] > 0) {
3192      emit->common_immediate_pos[n++] =
3193         alloc_immediate_int4(emit, 31, 0, 0, 0);
3194   }
3195
3196   if (emit->info.opcode_count[TGSI_OPCODE_UBFE] > 0 ||
3197       emit->info.opcode_count[TGSI_OPCODE_IBFE] > 0 ||
3198       emit->info.opcode_count[TGSI_OPCODE_BFI] > 0) {
3199      emit->common_immediate_pos[n++] =
3200         alloc_immediate_int4(emit, 32, 0, 0, 0);
3201   }
3202
3203   if (emit->key.vs.attrib_puint_to_snorm) {
3204      emit->common_immediate_pos[n++] =
3205         alloc_immediate_float4(emit, -2.0f, 2.0f, 3.0f, -1.66666f);
3206   }
3207
3208   if (emit->key.vs.attrib_puint_to_uscaled) {
3209      emit->common_immediate_pos[n++] =
3210         alloc_immediate_float4(emit, 1023.0f, 3.0f, 0.0f, 0.0f);
3211   }
3212
3213   if (emit->key.vs.attrib_puint_to_sscaled) {
3214      emit->common_immediate_pos[n++] =
3215         alloc_immediate_int4(emit, 22, 12, 2, 0);
3216
3217      emit->common_immediate_pos[n++] =
3218         alloc_immediate_int4(emit, 22, 30, 0, 0);
3219   }
3220
3221   if (emit->vposition.num_prescale > 1) {
3222      unsigned i;
3223      for (i = 0; i < emit->vposition.num_prescale; i+=4) {
3224         emit->common_immediate_pos[n++] =
3225            alloc_immediate_int4(emit, i, i+1, i+2, i+3);
3226      }
3227   }
3228
3229   emit->immediates_dbl = (double (*)[2]) emit->immediates;
3230
3231   if (emit->info.opcode_count[TGSI_OPCODE_DNEG] > 0) {
3232      emit->common_immediate_pos[n++] =
3233         alloc_immediate_double2(emit, -1.0, -1.0);
3234   }
3235
3236   if (emit->info.opcode_count[TGSI_OPCODE_DSQRT] > 0 ||
3237       emit->info.opcode_count[TGSI_OPCODE_DTRUNC] > 0) {
3238      emit->common_immediate_pos[n++] =
3239         alloc_immediate_double2(emit, 0.0, 0.0);
3240      emit->common_immediate_pos[n++] =
3241         alloc_immediate_double2(emit, 1.0, 1.0);
3242   }
3243
3244   if (emit->info.opcode_count[TGSI_OPCODE_INTERP_OFFSET] > 0) {
3245      emit->common_immediate_pos[n++] =
3246         alloc_immediate_float4(emit, 16.0f, -16.0f, 0.0, 0.0);
3247   }
3248
3249   assert(n <= ARRAY_SIZE(emit->common_immediate_pos));
3250
3251   unsigned i;
3252
3253   for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
3254      if (emit->key.tex[i].texel_bias) {
3255         /* Replace 0.0f if more immediate float value is needed */
3256         emit->common_immediate_pos[n++] =
3257            alloc_immediate_float4(emit, 0.0001f, 0.0f, 0.0f, 0.0f);
3258         break;
3259      }
3260   }
3261
3262   /** TODO: allocate immediates for all possible element byte offset?
3263    */
3264   if (emit->raw_bufs) {
3265      unsigned i;
3266      for (i = 7; i < 12; i+=4) {
3267         emit->common_immediate_pos[n++] =
3268            alloc_immediate_int4(emit, i, (i+1), (i+2), (i+3));
3269      }
3270   }
3271
3272   if (emit->info.indirect_files &
3273       (1 << TGSI_FILE_IMAGE | 1 << TGSI_FILE_BUFFER)) {
3274      unsigned i;
3275      for (i = 7; i < 8; i+=4) {
3276         emit->common_immediate_pos[n++] =
3277            alloc_immediate_int4(emit, i, (i+1), (i+2), (i+3));
3278      }
3279   }
3280
3281   assert(n <= ARRAY_SIZE(emit->common_immediate_pos));
3282   emit->num_common_immediates = n;
3283}
3284
3285
3286/**
3287 * Emit hull shader declarations.
3288*/
3289static void
3290emit_hull_shader_declarations(struct svga_shader_emitter_v10 *emit)
3291{
3292   VGPU10OpcodeToken0 opcode0;
3293
3294   /* Emit the input control point count */
3295   assert(emit->key.tcs.vertices_per_patch > 0 &&
3296          emit->key.tcs.vertices_per_patch <= 32);
3297
3298   opcode0.value = 0;
3299   opcode0.opcodeType = VGPU10_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT;
3300   opcode0.controlPointCount = emit->key.tcs.vertices_per_patch;
3301   begin_emit_instruction(emit);
3302   emit_dword(emit, opcode0.value);
3303   end_emit_instruction(emit);
3304
3305   /* Emit the output control point count */
3306   assert(emit->key.tcs.vertices_out >= 0 && emit->key.tcs.vertices_out <= 32);
3307
3308   opcode0.value = 0;
3309   opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT;
3310   opcode0.controlPointCount = emit->key.tcs.vertices_out;
3311   begin_emit_instruction(emit);
3312   emit_dword(emit, opcode0.value);
3313   end_emit_instruction(emit);
3314
3315   /* Emit tessellator domain */
3316   emit_tessellator_domain(emit, emit->key.tcs.prim_mode);
3317
3318   /* Emit tessellator output primitive */
3319   opcode0.value = 0;
3320   opcode0.opcodeType = VGPU10_OPCODE_DCL_TESS_OUTPUT_PRIMITIVE;
3321   if (emit->key.tcs.point_mode) {
3322      opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_POINT;
3323   }
3324   else if (emit->key.tcs.prim_mode == PIPE_PRIM_LINES) {
3325      opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_LINE;
3326   }
3327   else {
3328      assert(emit->key.tcs.prim_mode == PIPE_PRIM_QUADS ||
3329             emit->key.tcs.prim_mode == PIPE_PRIM_TRIANGLES);
3330
3331      if (emit->key.tcs.vertices_order_cw)
3332         opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_TRIANGLE_CCW;
3333      else
3334         opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_TRIANGLE_CW;
3335   }
3336   begin_emit_instruction(emit);
3337   emit_dword(emit, opcode0.value);
3338   end_emit_instruction(emit);
3339
3340   /* Emit tessellator partitioning */
3341   opcode0.value = 0;
3342   opcode0.opcodeType = VGPU10_OPCODE_DCL_TESS_PARTITIONING;
3343   switch (emit->key.tcs.spacing) {
3344   case PIPE_TESS_SPACING_FRACTIONAL_ODD:
3345      opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD;
3346      break;
3347   case PIPE_TESS_SPACING_FRACTIONAL_EVEN:
3348      opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN;
3349      break;
3350   case PIPE_TESS_SPACING_EQUAL:
3351      opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_INTEGER;
3352      break;
3353   default:
3354      debug_printf("invalid tessellator spacing %d\n", emit->key.tcs.spacing);
3355      opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_UNDEFINED;
3356   }
3357   begin_emit_instruction(emit);
3358   emit_dword(emit, opcode0.value);
3359   end_emit_instruction(emit);
3360
3361   alloc_common_immediates(emit);
3362
3363   /* Declare constant registers */
3364   emit_constant_declaration(emit);
3365
3366   /* Declare samplers and resources */
3367   emit_sampler_declarations(emit);
3368   emit_resource_declarations(emit);
3369
3370   /* Declare images */
3371   emit_image_declarations(emit);
3372
3373   /* Declare shader buffers */
3374   emit_shader_buf_declarations(emit);
3375
3376   /* Declare atomic buffers */
3377   emit_atomic_buf_declarations(emit);
3378
3379   int nVertices = emit->key.tcs.vertices_per_patch;
3380   emit->tcs.imm_index =
3381      alloc_immediate_int4(emit, nVertices, nVertices, nVertices, 0);
3382
3383   /* Now, emit the constant block containing all the immediates
3384    * declared by shader, as well as the extra ones seen above.
3385    */
3386   emit_vgpu10_immediates_block(emit);
3387
3388}
3389
3390
3391/**
3392 * A helper function to determine if control point phase is needed.
3393 * Returns TRUE if there is control point output.
3394 */
3395static boolean
3396needs_control_point_phase(struct svga_shader_emitter_v10 *emit)
3397{
3398   unsigned i;
3399
3400   assert(emit->unit == PIPE_SHADER_TESS_CTRL);
3401
3402   /* If output control point count does not match the input count,
3403    * we need a control point phase to explicitly set the output control
3404    * points.
3405    */
3406   if ((emit->key.tcs.vertices_per_patch != emit->key.tcs.vertices_out) &&
3407       emit->key.tcs.vertices_out)
3408      return TRUE;
3409
3410   for (i = 0; i < emit->info.num_outputs; i++) {
3411      switch (emit->info.output_semantic_name[i]) {
3412      case TGSI_SEMANTIC_PATCH:
3413      case TGSI_SEMANTIC_TESSOUTER:
3414      case TGSI_SEMANTIC_TESSINNER:
3415         break;
3416      default:
3417         return TRUE;
3418      }
3419   }
3420   return FALSE;
3421}
3422
3423
3424/**
3425 * A helper function to add shader signature for passthrough control point
3426 * phase. This signature is also generated for passthrough control point
3427 * phase from HLSL compiler and is needed by Metal Renderer.
3428 */
3429static void
3430emit_passthrough_control_point_signature(struct svga_shader_emitter_v10 *emit)
3431{
3432   struct svga_shader_signature *sgn = &emit->signature;
3433   SVGA3dDXShaderSignatureEntry *sgnEntry;
3434   unsigned i;
3435
3436   for (i = 0; i < emit->info.num_inputs; i++) {
3437      unsigned index = emit->linkage.input_map[i];
3438      enum tgsi_semantic sem_name = emit->info.input_semantic_name[i];
3439
3440      sgnEntry = &sgn->inputs[sgn->header.numInputSignatures++];
3441
3442      set_shader_signature_entry(sgnEntry, index,
3443                                 tgsi_semantic_to_sgn_name[sem_name],
3444                                 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
3445                                 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
3446                                 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
3447
3448      sgnEntry = &sgn->outputs[sgn->header.numOutputSignatures++];
3449
3450      set_shader_signature_entry(sgnEntry, i,
3451                                 tgsi_semantic_to_sgn_name[sem_name],
3452                                 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
3453                                 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
3454                                 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
3455   }
3456}
3457
3458
3459/**
3460 * A helper function to emit an instruction to start the control point phase
3461 * in the hull shader.
3462 */
3463static void
3464emit_control_point_phase_instruction(struct svga_shader_emitter_v10 *emit)
3465{
3466   VGPU10OpcodeToken0 opcode0;
3467
3468   opcode0.value = 0;
3469   opcode0.opcodeType = VGPU10_OPCODE_HS_CONTROL_POINT_PHASE;
3470   begin_emit_instruction(emit);
3471   emit_dword(emit, opcode0.value);
3472   end_emit_instruction(emit);
3473}
3474
3475
3476/**
3477 * Start the hull shader control point phase
3478 */
3479static boolean
3480emit_hull_shader_control_point_phase(struct svga_shader_emitter_v10 *emit)
3481{
3482   /* If there is no control point output, skip the control point phase. */
3483   if (!needs_control_point_phase(emit)) {
3484      if (!emit->key.tcs.vertices_out) {
3485         /**
3486          * If the tcs does not explicitly generate any control point output
3487          * and the tes does not use any input control point, then
3488          * emit an empty control point phase with zero output control
3489          * point count.
3490          */
3491         emit_control_point_phase_instruction(emit);
3492
3493         /**
3494          * Since this is an empty control point phase, we will need to
3495          * add input signatures when we parse the tcs again in the
3496          * patch constant phase.
3497          */
3498         emit->tcs.fork_phase_add_signature = TRUE;
3499      }
3500      else {
3501         /**
3502          * Before skipping the control point phase, add the signature for
3503          * the passthrough control point.
3504          */
3505         emit_passthrough_control_point_signature(emit);
3506      }
3507      return FALSE;
3508   }
3509
3510   /* Start the control point phase in the hull shader */
3511   emit_control_point_phase_instruction(emit);
3512
3513   /* Declare the output control point ID */
3514   if (emit->tcs.invocation_id_sys_index == INVALID_INDEX) {
3515      /* Add invocation id declaration if it does not exist */
3516      emit->tcs.invocation_id_sys_index = emit->info.num_system_values + 1;
3517   }
3518
3519   emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
3520                          VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID,
3521                          VGPU10_OPERAND_INDEX_0D,
3522                          0, 1,
3523                          VGPU10_NAME_UNDEFINED,
3524                          VGPU10_OPERAND_0_COMPONENT, 0,
3525                          0,
3526                          VGPU10_INTERPOLATION_CONSTANT, TRUE,
3527                          SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
3528
3529   if (emit->tcs.prim_id_index != INVALID_INDEX) {
3530      emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
3531                             VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID,
3532                             VGPU10_OPERAND_INDEX_0D,
3533                             0, 1,
3534                             VGPU10_NAME_UNDEFINED,
3535                             VGPU10_OPERAND_0_COMPONENT,
3536                             VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
3537                             0,
3538                             VGPU10_INTERPOLATION_UNDEFINED, TRUE,
3539                             SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID);
3540   }
3541
3542   return TRUE;
3543}
3544
3545
3546/**
3547 * Start the hull shader patch constant phase and
3548 * do the second pass of the tcs translation and emit
3549 * the relevant declarations and instructions for this phase.
3550 */
3551static boolean
3552emit_hull_shader_patch_constant_phase(struct svga_shader_emitter_v10 *emit,
3553                                      struct tgsi_parse_context *parse)
3554{
3555   unsigned inst_number = 0;
3556   boolean ret = TRUE;
3557   VGPU10OpcodeToken0 opcode0;
3558
3559   emit->skip_instruction = FALSE;
3560
3561   /* Start the patch constant phase */
3562   opcode0.value = 0;
3563   opcode0.opcodeType = VGPU10_OPCODE_HS_FORK_PHASE;
3564   begin_emit_instruction(emit);
3565   emit_dword(emit, opcode0.value);
3566   end_emit_instruction(emit);
3567
3568   /* Set the current phase to patch constant phase */
3569   emit->tcs.control_point_phase = FALSE;
3570
3571   if (emit->tcs.prim_id_index != INVALID_INDEX) {
3572      emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
3573                             VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID,
3574                             VGPU10_OPERAND_INDEX_0D,
3575                             0, 1,
3576                             VGPU10_NAME_UNDEFINED,
3577                             VGPU10_OPERAND_0_COMPONENT,
3578                             VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
3579                             0,
3580                             VGPU10_INTERPOLATION_UNDEFINED, TRUE,
3581                             SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID);
3582   }
3583
3584   /* Emit declarations for this phase */
3585   emit->index_range.required =
3586      emit->info.indirect_files & (1 << TGSI_FILE_INPUT) ? TRUE : FALSE;
3587   emit_tcs_input_declarations(emit);
3588
3589   if (emit->index_range.start_index != INVALID_INDEX) {
3590      emit_index_range_declaration(emit);
3591   }
3592
3593   emit->index_range.required =
3594      emit->info.indirect_files & (1 << TGSI_FILE_OUTPUT) ? TRUE : FALSE;
3595   emit_tcs_output_declarations(emit);
3596
3597   if (emit->index_range.start_index != INVALID_INDEX) {
3598      emit_index_range_declaration(emit);
3599   }
3600   emit->index_range.required = FALSE;
3601
3602   emit_temporaries_declaration(emit);
3603
3604   /* Reset the token position to the first instruction token
3605    * in preparation for the second pass of the shader
3606    */
3607   parse->Position = emit->tcs.instruction_token_pos;
3608
3609   while (!tgsi_parse_end_of_tokens(parse)) {
3610      tgsi_parse_token(parse);
3611
3612      assert(parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION);
3613      ret = emit_vgpu10_instruction(emit, inst_number++,
3614                                    &parse->FullToken.FullInstruction);
3615
3616      /* Usually this applies to TCS only. If shader is reading output of
3617       * patch constant in fork phase, we should reemit all instructions
3618       * which are writting into output of patch constant in fork phase
3619       * to store results into temporaries.
3620       */
3621      assert(!(emit->reemit_instruction && emit->reemit_rawbuf_instruction));
3622      if (emit->reemit_instruction) {
3623         assert(emit->unit == PIPE_SHADER_TESS_CTRL);
3624         ret = emit_vgpu10_instruction(emit, inst_number,
3625                                       &parse->FullToken.FullInstruction);
3626      } else if (emit->reemit_rawbuf_instruction) {
3627         ret = emit_rawbuf_instruction(emit, inst_number,
3628                                       &parse->FullToken.FullInstruction);
3629      }
3630
3631      if (!ret)
3632         return FALSE;
3633   }
3634
3635   return TRUE;
3636}
3637
3638
3639/**
3640 * Emit the thread group declaration for compute shader.
3641 */
3642static void
3643emit_compute_shader_declarations(struct svga_shader_emitter_v10 *emit)
3644{
3645   VGPU10OpcodeToken0 opcode0;
3646
3647   opcode0.value = 0;
3648   opcode0.opcodeType = VGPU10_OPCODE_DCL_THREAD_GROUP;
3649   begin_emit_instruction(emit);
3650   emit_dword(emit, opcode0.value);
3651   emit_dword(emit, emit->cs.block_width);
3652   emit_dword(emit, emit->cs.block_height);
3653   emit_dword(emit, emit->cs.block_depth);
3654   end_emit_instruction(emit);
3655}
3656
3657
3658/**
3659 * Emit index range declaration.
3660 */
3661static boolean
3662emit_index_range_declaration(struct svga_shader_emitter_v10 *emit)
3663{
3664   if (emit->version < 50)
3665      return TRUE;
3666
3667   assert(emit->index_range.start_index != INVALID_INDEX);
3668   assert(emit->index_range.count != 0);
3669   assert(emit->index_range.required);
3670   assert(emit->index_range.operandType != VGPU10_NUM_OPERANDS);
3671   assert(emit->index_range.dim != 0);
3672   assert(emit->index_range.size != 0);
3673
3674   VGPU10OpcodeToken0 opcode0;
3675   VGPU10OperandToken0 operand0;
3676
3677   opcode0.value = 0;
3678   opcode0.opcodeType = VGPU10_OPCODE_DCL_INDEX_RANGE;
3679
3680   operand0.value = 0;
3681   operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
3682   operand0.indexDimension = emit->index_range.dim;
3683   operand0.operandType = emit->index_range.operandType;
3684   operand0.mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
3685   operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
3686
3687   if (emit->index_range.dim == VGPU10_OPERAND_INDEX_2D)
3688      operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
3689
3690   begin_emit_instruction(emit);
3691   emit_dword(emit, opcode0.value);
3692   emit_dword(emit, operand0.value);
3693
3694   if (emit->index_range.dim == VGPU10_OPERAND_INDEX_2D) {
3695      emit_dword(emit, emit->index_range.size);
3696      emit_dword(emit, emit->index_range.start_index);
3697      emit_dword(emit, emit->index_range.count);
3698   }
3699   else {
3700      emit_dword(emit, emit->index_range.start_index);
3701      emit_dword(emit, emit->index_range.count);
3702   }
3703
3704   end_emit_instruction(emit);
3705
3706   /* Reset fields in emit->index_range struct except
3707    * emit->index_range.required which will be reset afterwards
3708    */
3709   emit->index_range.count = 0;
3710   emit->index_range.operandType = VGPU10_NUM_OPERANDS;
3711   emit->index_range.start_index = INVALID_INDEX;
3712   emit->index_range.size = 0;
3713   emit->index_range.dim = 0;
3714
3715   return TRUE;
3716}
3717
3718
3719/**
3720 * Emit a vgpu10 declaration "instruction".
3721 * \param index  the register index
3722 * \param size   array size of the operand. In most cases, it is 1,
3723 *               but for inputs to geometry shader, the array size varies
3724 *               depending on the primitive type.
3725 */
3726static void
3727emit_decl_instruction(struct svga_shader_emitter_v10 *emit,
3728                      VGPU10OpcodeToken0 opcode0,
3729                      VGPU10OperandToken0 operand0,
3730                      VGPU10NameToken name_token,
3731                      unsigned index, unsigned size)
3732{
3733   assert(opcode0.opcodeType);
3734   assert(operand0.mask ||
3735          (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT) ||
3736          (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_DEPTH) ||
3737          (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK) ||
3738          (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID) ||
3739          (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) ||
3740          (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID) ||
3741          (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK) ||
3742          (operand0.operandType == VGPU10_OPERAND_TYPE_STREAM));
3743
3744   begin_emit_instruction(emit);
3745   emit_dword(emit, opcode0.value);
3746
3747   emit_dword(emit, operand0.value);
3748
3749   if (operand0.indexDimension == VGPU10_OPERAND_INDEX_1D) {
3750      /* Next token is the index of the register to declare */
3751      emit_dword(emit, index);
3752   }
3753   else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_2D) {
3754      /* Next token is the size of the register */
3755      emit_dword(emit, size);
3756
3757      /* Followed by the index of the register */
3758      emit_dword(emit, index);
3759   }
3760
3761   if (name_token.value) {
3762      emit_dword(emit, name_token.value);
3763   }
3764
3765   end_emit_instruction(emit);
3766}
3767
3768
3769/**
3770 * Emit the declaration for a shader input.
3771 * \param opcodeType  opcode type, one of VGPU10_OPCODE_DCL_INPUTx
3772 * \param operandType operand type, one of VGPU10_OPERAND_TYPE_INPUT_x
3773 * \param dim         index dimension
3774 * \param index       the input register index
3775 * \param size        array size of the operand. In most cases, it is 1,
3776 *                    but for inputs to geometry shader, the array size varies
3777 *                    depending on the primitive type. For tessellation control
3778 *                    shader, the array size is the vertex count per patch.
3779 * \param name        one of VGPU10_NAME_x
3780 * \parma numComp     number of components
3781 * \param selMode     component selection mode
3782 * \param usageMask   bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values
3783 * \param interpMode  interpolation mode
3784 */
3785static void
3786emit_input_declaration(struct svga_shader_emitter_v10 *emit,
3787                       VGPU10_OPCODE_TYPE opcodeType,
3788                       VGPU10_OPERAND_TYPE operandType,
3789                       VGPU10_OPERAND_INDEX_DIMENSION dim,
3790                       unsigned index, unsigned size,
3791                       VGPU10_SYSTEM_NAME name,
3792                       VGPU10_OPERAND_NUM_COMPONENTS numComp,
3793                       VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE selMode,
3794                       unsigned usageMask,
3795                       VGPU10_INTERPOLATION_MODE interpMode,
3796                       boolean addSignature,
3797                       SVGA3dDXSignatureSemanticName sgnName)
3798{
3799   VGPU10OpcodeToken0 opcode0;
3800   VGPU10OperandToken0 operand0;
3801   VGPU10NameToken name_token;
3802
3803   assert(usageMask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
3804   assert(opcodeType == VGPU10_OPCODE_DCL_INPUT ||
3805          opcodeType == VGPU10_OPCODE_DCL_INPUT_SIV ||
3806          opcodeType == VGPU10_OPCODE_DCL_INPUT_SGV ||
3807          opcodeType == VGPU10_OPCODE_DCL_INPUT_PS ||
3808          opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SIV ||
3809          opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SGV);
3810   assert(operandType == VGPU10_OPERAND_TYPE_INPUT ||
3811          operandType == VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID ||
3812          operandType == VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK ||
3813          operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID ||
3814          operandType == VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID ||
3815          operandType == VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT ||
3816          operandType == VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT ||
3817          operandType == VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT ||
3818          operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID ||
3819          operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_GROUP_ID ||
3820          operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP);
3821
3822   assert(numComp <= VGPU10_OPERAND_4_COMPONENT);
3823   assert(selMode <= VGPU10_OPERAND_4_COMPONENT_MASK_MODE);
3824   assert(dim <= VGPU10_OPERAND_INDEX_3D);
3825   assert(name == VGPU10_NAME_UNDEFINED ||
3826          name == VGPU10_NAME_POSITION ||
3827          name == VGPU10_NAME_INSTANCE_ID ||
3828          name == VGPU10_NAME_VERTEX_ID ||
3829          name == VGPU10_NAME_PRIMITIVE_ID ||
3830          name == VGPU10_NAME_IS_FRONT_FACE ||
3831          name == VGPU10_NAME_SAMPLE_INDEX ||
3832          name == VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX ||
3833          name == VGPU10_NAME_VIEWPORT_ARRAY_INDEX);
3834
3835   assert(interpMode == VGPU10_INTERPOLATION_UNDEFINED ||
3836          interpMode == VGPU10_INTERPOLATION_CONSTANT ||
3837          interpMode == VGPU10_INTERPOLATION_LINEAR ||
3838          interpMode == VGPU10_INTERPOLATION_LINEAR_CENTROID ||
3839          interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE ||
3840          interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID ||
3841          interpMode == VGPU10_INTERPOLATION_LINEAR_SAMPLE ||
3842          interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE);
3843
3844   check_register_index(emit, opcodeType, index);
3845
3846   opcode0.value = operand0.value = name_token.value = 0;
3847
3848   opcode0.opcodeType = opcodeType;
3849   opcode0.interpolationMode = interpMode;
3850
3851   operand0.operandType = operandType;
3852   operand0.numComponents = numComp;
3853   operand0.selectionMode = selMode;
3854   operand0.mask = usageMask;
3855   operand0.indexDimension = dim;
3856   operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
3857   if (dim == VGPU10_OPERAND_INDEX_2D)
3858      operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
3859
3860   name_token.name = name;
3861
3862   emit_decl_instruction(emit, opcode0, operand0, name_token, index, size);
3863
3864   if (addSignature) {
3865      struct svga_shader_signature *sgn = &emit->signature;
3866      if (operandType == VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT) {
3867         /* Set patch constant signature */
3868         SVGA3dDXShaderSignatureEntry *sgnEntry =
3869            &sgn->patchConstants[sgn->header.numPatchConstantSignatures++];
3870         set_shader_signature_entry(sgnEntry, index,
3871                                    sgnName, usageMask,
3872                                    SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
3873                                    SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
3874
3875      } else if (operandType == VGPU10_OPERAND_TYPE_INPUT ||
3876                 operandType == VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT) {
3877         /* Set input signature */
3878         SVGA3dDXShaderSignatureEntry *sgnEntry =
3879            &sgn->inputs[sgn->header.numInputSignatures++];
3880         set_shader_signature_entry(sgnEntry, index,
3881                                    sgnName, usageMask,
3882                                    SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
3883                                    SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
3884      }
3885   }
3886
3887   if (emit->index_range.required) {
3888      /* Here, index_range declaration is only applicable for opcodeType
3889       * VGPU10_OPCODE_DCL_INPUT and VGPU10_OPCODE_DCL_INPUT_PS and
3890       * for operandType VGPU10_OPERAND_TYPE_INPUT,
3891       * VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT and
3892       * VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT.
3893       */
3894      if ((opcodeType != VGPU10_OPCODE_DCL_INPUT &&
3895           opcodeType != VGPU10_OPCODE_DCL_INPUT_PS) ||
3896          (operandType != VGPU10_OPERAND_TYPE_INPUT &&
3897           operandType != VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT &&
3898           operandType != VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT)) {
3899         if (emit->index_range.start_index != INVALID_INDEX) {
3900            emit_index_range_declaration(emit);
3901         }
3902         return;
3903      }
3904
3905      if (emit->index_range.operandType == VGPU10_NUM_OPERANDS) {
3906         /* Need record new index_range */
3907         emit->index_range.count = 1;
3908         emit->index_range.operandType = operandType;
3909         emit->index_range.start_index = index;
3910         emit->index_range.size = size;
3911         emit->index_range.dim = dim;
3912      }
3913      else if (index !=
3914               (emit->index_range.start_index + emit->index_range.count) ||
3915               emit->index_range.operandType != operandType) {
3916         /* Input index is not contiguous with index range or operandType is
3917          * different from index range's operandType. We need to emit current
3918          * index_range first and then start recording next index range.
3919          */
3920         emit_index_range_declaration(emit);
3921
3922         emit->index_range.count = 1;
3923         emit->index_range.operandType = operandType;
3924         emit->index_range.start_index = index;
3925         emit->index_range.size = size;
3926         emit->index_range.dim = dim;
3927      }
3928      else if (emit->index_range.operandType == operandType) {
3929         /* Since input index is contiguous with index range and operandType
3930          * is same as index range's operandType, increment index range count.
3931          */
3932         emit->index_range.count++;
3933      }
3934   }
3935}
3936
3937
3938/**
3939 * Emit the declaration for a shader output.
3940 * \param type  one of VGPU10_OPCODE_DCL_OUTPUTx
3941 * \param index  the output register index
3942 * \param name  one of VGPU10_NAME_x
3943 * \param usageMask  bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values
3944 */
3945static void
3946emit_output_declaration(struct svga_shader_emitter_v10 *emit,
3947                        VGPU10_OPCODE_TYPE type, unsigned index,
3948                        VGPU10_SYSTEM_NAME name,
3949                        unsigned writemask,
3950                        boolean addSignature,
3951                        SVGA3dDXSignatureSemanticName sgnName)
3952{
3953   VGPU10OpcodeToken0 opcode0;
3954   VGPU10OperandToken0 operand0;
3955   VGPU10NameToken name_token;
3956
3957   assert(writemask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
3958   assert(type == VGPU10_OPCODE_DCL_OUTPUT ||
3959          type == VGPU10_OPCODE_DCL_OUTPUT_SGV ||
3960          type == VGPU10_OPCODE_DCL_OUTPUT_SIV);
3961   assert(name == VGPU10_NAME_UNDEFINED ||
3962          name == VGPU10_NAME_POSITION ||
3963          name == VGPU10_NAME_PRIMITIVE_ID ||
3964          name == VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX ||
3965          name == VGPU10_NAME_VIEWPORT_ARRAY_INDEX ||
3966          name == VGPU10_NAME_CLIP_DISTANCE);
3967
3968   check_register_index(emit, type, index);
3969
3970   opcode0.value = operand0.value = name_token.value = 0;
3971
3972   opcode0.opcodeType = type;
3973   operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT;
3974   operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
3975   operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
3976   operand0.mask = writemask;
3977   operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
3978   operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
3979
3980   name_token.name = name;
3981
3982   emit_decl_instruction(emit, opcode0, operand0, name_token, index, 1);
3983
3984   /* Capture output signature */
3985   if (addSignature) {
3986      struct svga_shader_signature *sgn = &emit->signature;
3987      SVGA3dDXShaderSignatureEntry *sgnEntry =
3988         &sgn->outputs[sgn->header.numOutputSignatures++];
3989      set_shader_signature_entry(sgnEntry, index,
3990                                 sgnName, writemask,
3991                                 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
3992                                 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
3993   }
3994
3995   if (emit->index_range.required) {
3996      /* Here, index_range declaration is only applicable for opcodeType
3997       * VGPU10_OPCODE_DCL_OUTPUT and for operandType
3998       * VGPU10_OPERAND_TYPE_OUTPUT.
3999       */
4000      if (type != VGPU10_OPCODE_DCL_OUTPUT) {
4001         if (emit->index_range.start_index != INVALID_INDEX) {
4002            emit_index_range_declaration(emit);
4003         }
4004         return;
4005      }
4006
4007      if (emit->index_range.operandType == VGPU10_NUM_OPERANDS) {
4008         /* Need record new index_range */
4009         emit->index_range.count = 1;
4010         emit->index_range.operandType = VGPU10_OPERAND_TYPE_OUTPUT;
4011         emit->index_range.start_index = index;
4012         emit->index_range.size = 1;
4013         emit->index_range.dim = VGPU10_OPERAND_INDEX_1D;
4014      }
4015      else if (index !=
4016               (emit->index_range.start_index + emit->index_range.count)) {
4017         /* Output index is not contiguous with index range. We need to
4018          * emit current index_range first and then start recording next
4019          * index range.
4020          */
4021         emit_index_range_declaration(emit);
4022
4023         emit->index_range.count = 1;
4024         emit->index_range.operandType = VGPU10_OPERAND_TYPE_OUTPUT;
4025         emit->index_range.start_index = index;
4026         emit->index_range.size = 1;
4027         emit->index_range.dim = VGPU10_OPERAND_INDEX_1D;
4028      }
4029      else {
4030         /* Since output index is contiguous with index range, increment
4031          * index range count.
4032          */
4033         emit->index_range.count++;
4034      }
4035   }
4036}
4037
4038
4039/**
4040 * Emit the declaration for the fragment depth output.
4041 */
4042static void
4043emit_fragdepth_output_declaration(struct svga_shader_emitter_v10 *emit)
4044{
4045   VGPU10OpcodeToken0 opcode0;
4046   VGPU10OperandToken0 operand0;
4047   VGPU10NameToken name_token;
4048
4049   assert(emit->unit == PIPE_SHADER_FRAGMENT);
4050
4051   opcode0.value = operand0.value = name_token.value = 0;
4052
4053   opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT;
4054   operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH;
4055   operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
4056   operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
4057   operand0.mask = 0;
4058
4059   emit_decl_instruction(emit, opcode0, operand0, name_token, 0, 1);
4060}
4061
4062
4063/**
4064 * Emit the declaration for the fragment sample mask/coverage output.
4065 */
4066static void
4067emit_samplemask_output_declaration(struct svga_shader_emitter_v10 *emit)
4068{
4069   VGPU10OpcodeToken0 opcode0;
4070   VGPU10OperandToken0 operand0;
4071   VGPU10NameToken name_token;
4072
4073   assert(emit->unit == PIPE_SHADER_FRAGMENT);
4074   assert(emit->version >= 41);
4075
4076   opcode0.value = operand0.value = name_token.value = 0;
4077
4078   opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT;
4079   operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK;
4080   operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
4081   operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
4082   operand0.mask = 0;
4083
4084   emit_decl_instruction(emit, opcode0, operand0, name_token, 0, 1);
4085}
4086
4087
4088/**
4089 * Emit output declarations for fragment shader.
4090 */
4091static void
4092emit_fs_output_declarations(struct svga_shader_emitter_v10 *emit)
4093{
4094   unsigned int i;
4095
4096   for (i = 0; i < emit->info.num_outputs; i++) {
4097      /*const unsigned usage_mask = emit->info.output_usage_mask[i];*/
4098      const enum tgsi_semantic semantic_name =
4099         emit->info.output_semantic_name[i];
4100      const unsigned semantic_index = emit->info.output_semantic_index[i];
4101      unsigned index = i;
4102
4103      if (semantic_name == TGSI_SEMANTIC_COLOR) {
4104         assert(semantic_index < ARRAY_SIZE(emit->fs.color_out_index));
4105
4106         emit->fs.color_out_index[semantic_index] = index;
4107
4108         emit->fs.num_color_outputs = MAX2(emit->fs.num_color_outputs,
4109                                              index + 1);
4110
4111         /* The semantic index is the shader's color output/buffer index */
4112         emit_output_declaration(emit,
4113                                 VGPU10_OPCODE_DCL_OUTPUT, semantic_index,
4114                                 VGPU10_NAME_UNDEFINED,
4115                                 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4116                                 TRUE,
4117                                 map_tgsi_semantic_to_sgn_name(semantic_name));
4118
4119         if (semantic_index == 0) {
4120            if (emit->key.fs.write_color0_to_n_cbufs > 1) {
4121               /* Emit declarations for the additional color outputs
4122                * for broadcasting.
4123                */
4124               unsigned j;
4125               for (j = 1; j < emit->key.fs.write_color0_to_n_cbufs; j++) {
4126                  /* Allocate a new output index */
4127                  unsigned idx = emit->info.num_outputs + j - 1;
4128                  emit->fs.color_out_index[j] = idx;
4129                  emit_output_declaration(emit,
4130                                        VGPU10_OPCODE_DCL_OUTPUT, idx,
4131                                        VGPU10_NAME_UNDEFINED,
4132                                        VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4133                                        TRUE,
4134                                        map_tgsi_semantic_to_sgn_name(semantic_name));
4135                  emit->info.output_semantic_index[idx] = j;
4136               }
4137
4138               emit->fs.num_color_outputs =
4139                     emit->key.fs.write_color0_to_n_cbufs;
4140            }
4141         }
4142      }
4143      else if (semantic_name == TGSI_SEMANTIC_POSITION) {
4144         /* Fragment depth output */
4145         emit_fragdepth_output_declaration(emit);
4146      }
4147      else if (semantic_name == TGSI_SEMANTIC_SAMPLEMASK) {
4148         /* Sample mask output */
4149         emit_samplemask_output_declaration(emit);
4150      }
4151      else {
4152         assert(!"Bad output semantic name");
4153      }
4154   }
4155}
4156
4157
4158/**
4159 * Emit common output declaration for vertex processing.
4160 */
4161static void
4162emit_vertex_output_declaration(struct svga_shader_emitter_v10 *emit,
4163                               unsigned index, unsigned writemask,
4164                               boolean addSignature)
4165{
4166   const enum tgsi_semantic semantic_name =
4167         emit->info.output_semantic_name[index];
4168   const unsigned semantic_index = emit->info.output_semantic_index[index];
4169   unsigned name, type;
4170   unsigned final_mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
4171
4172   assert(emit->unit != PIPE_SHADER_FRAGMENT &&
4173          emit->unit != PIPE_SHADER_COMPUTE);
4174
4175   switch (semantic_name) {
4176   case TGSI_SEMANTIC_POSITION:
4177      if (emit->unit == PIPE_SHADER_TESS_CTRL) {
4178         /* position will be declared in control point only */
4179         assert(emit->tcs.control_point_phase);
4180         type = VGPU10_OPCODE_DCL_OUTPUT;
4181         name = VGPU10_NAME_UNDEFINED;
4182         emit_output_declaration(emit, type, index, name, final_mask, TRUE,
4183                                 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
4184         return;
4185      }
4186      else {
4187         type = VGPU10_OPCODE_DCL_OUTPUT_SIV;
4188         name = VGPU10_NAME_POSITION;
4189      }
4190      /* Save the index of the vertex position output register */
4191      emit->vposition.out_index = index;
4192      break;
4193   case TGSI_SEMANTIC_CLIPDIST:
4194      type = VGPU10_OPCODE_DCL_OUTPUT_SIV;
4195      name = VGPU10_NAME_CLIP_DISTANCE;
4196      /* save the starting index of the clip distance output register */
4197      if (semantic_index == 0)
4198         emit->clip_dist_out_index = index;
4199      final_mask = apply_clip_plane_mask(emit, writemask, semantic_index);
4200      if (final_mask == 0x0)
4201         return; /* discard this do-nothing declaration */
4202      break;
4203   case TGSI_SEMANTIC_CLIPVERTEX:
4204      type = VGPU10_OPCODE_DCL_OUTPUT;
4205      name = VGPU10_NAME_UNDEFINED;
4206      emit->clip_vertex_out_index = index;
4207      break;
4208   default:
4209      /* generic output */
4210      type = VGPU10_OPCODE_DCL_OUTPUT;
4211      name = VGPU10_NAME_UNDEFINED;
4212   }
4213
4214   emit_output_declaration(emit, type, index, name, final_mask, addSignature,
4215                           map_tgsi_semantic_to_sgn_name(semantic_name));
4216}
4217
4218
4219/**
4220 * Emit declaration for outputs in vertex shader.
4221 */
4222static void
4223emit_vs_output_declarations(struct svga_shader_emitter_v10 *emit)
4224{
4225   unsigned i;
4226   for (i = 0; i < emit->info.num_outputs; i++) {
4227      emit_vertex_output_declaration(emit, i, emit->output_usage_mask[i], TRUE);
4228   }
4229}
4230
4231
4232/**
4233 * A helper function to determine the writemask for an output
4234 * for the specified stream.
4235 */
4236static unsigned
4237output_writemask_for_stream(unsigned stream, ubyte output_streams,
4238                                 ubyte output_usagemask)
4239{
4240   unsigned i;
4241   unsigned writemask = 0;
4242
4243   for (i = 0; i < 4; i++) {
4244      if ((output_streams & 0x3) == stream)
4245         writemask |= (VGPU10_OPERAND_4_COMPONENT_MASK_X << i);
4246      output_streams >>= 2;
4247   }
4248   return writemask & output_usagemask;
4249}
4250
4251
4252/**
4253 * Emit declaration for outputs in geometry shader.
4254 */
4255static void
4256emit_gs_output_declarations(struct svga_shader_emitter_v10 *emit)
4257{
4258   unsigned i;
4259   VGPU10OpcodeToken0 opcode0;
4260   unsigned numStreamsSupported = 1;
4261   int s;
4262
4263   if (emit->version >= 50) {
4264      numStreamsSupported = ARRAY_SIZE(emit->info.num_stream_output_components);
4265   }
4266
4267   /**
4268    * Start emitting from the last stream first, so we end with
4269    * stream 0, so any of the auxiliary output declarations will
4270    * go to stream 0.
4271    */
4272   for (s = numStreamsSupported-1; s >= 0; s--) {
4273
4274      if (emit->info.num_stream_output_components[s] == 0)
4275         continue;
4276
4277      if (emit->version >= 50) {
4278         /* DCL_STREAM stream */
4279         begin_emit_instruction(emit);
4280         emit_opcode(emit, VGPU10_OPCODE_DCL_STREAM, FALSE);
4281         emit_stream_register(emit, s);
4282         end_emit_instruction(emit);
4283      }
4284
4285      /* emit output primitive topology declaration */
4286      opcode0.value = 0;
4287      opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY;
4288      opcode0.primitiveTopology = emit->gs.prim_topology;
4289      emit_property_instruction(emit, opcode0, 0, 0);
4290
4291      for (i = 0; i < emit->info.num_outputs; i++) {
4292         unsigned writemask;
4293
4294         /* find out the writemask for this stream */
4295         writemask = output_writemask_for_stream(s, emit->info.output_streams[i],
4296                                                 emit->output_usage_mask[i]);
4297
4298         if (writemask) {
4299            enum tgsi_semantic semantic_name =
4300               emit->info.output_semantic_name[i];
4301
4302            /* TODO: Still need to take care of a special case where a
4303             *       single varying spans across multiple output registers.
4304             */
4305            switch(semantic_name) {
4306            case TGSI_SEMANTIC_PRIMID:
4307               emit_output_declaration(emit,
4308                                       VGPU10_OPCODE_DCL_OUTPUT_SGV, i,
4309                                       VGPU10_NAME_PRIMITIVE_ID,
4310                                       VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4311                                       FALSE,
4312                                       map_tgsi_semantic_to_sgn_name(semantic_name));
4313               break;
4314            case TGSI_SEMANTIC_LAYER:
4315               emit_output_declaration(emit,
4316                                       VGPU10_OPCODE_DCL_OUTPUT_SIV, i,
4317                                       VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX,
4318                                       VGPU10_OPERAND_4_COMPONENT_MASK_X,
4319                                       FALSE,
4320                                       map_tgsi_semantic_to_sgn_name(semantic_name));
4321               break;
4322            case TGSI_SEMANTIC_VIEWPORT_INDEX:
4323               emit_output_declaration(emit,
4324                                       VGPU10_OPCODE_DCL_OUTPUT_SIV, i,
4325                                       VGPU10_NAME_VIEWPORT_ARRAY_INDEX,
4326                                       VGPU10_OPERAND_4_COMPONENT_MASK_X,
4327                                       FALSE,
4328                                       map_tgsi_semantic_to_sgn_name(semantic_name));
4329               emit->gs.viewport_index_out_index = i;
4330               break;
4331            default:
4332               emit_vertex_output_declaration(emit, i, writemask, FALSE);
4333            }
4334         }
4335      }
4336   }
4337
4338   /* For geometry shader outputs, it is possible the same register is
4339    * declared multiple times for different streams. So to avoid
4340    * redundant signature entries, geometry shader output signature is done
4341    * outside of the declaration.
4342    */
4343   struct svga_shader_signature *sgn = &emit->signature;
4344   SVGA3dDXShaderSignatureEntry *sgnEntry;
4345
4346   for (i = 0; i < emit->info.num_outputs; i++) {
4347      if (emit->output_usage_mask[i]) {
4348         enum tgsi_semantic sem_name = emit->info.output_semantic_name[i];
4349
4350         sgnEntry = &sgn->outputs[sgn->header.numOutputSignatures++];
4351         set_shader_signature_entry(sgnEntry, i,
4352                                    map_tgsi_semantic_to_sgn_name(sem_name),
4353                                    emit->output_usage_mask[i],
4354                                    SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
4355                                    SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
4356      }
4357   }
4358}
4359
4360
4361/**
4362 * Emit the declaration for the tess inner/outer output.
4363 * \param opcodeType either VGPU10_OPCODE_DCL_OUTPUT_SIV or _INPUT_SIV
4364 * \param operandType either VGPU10_OPERAND_TYPE_OUTPUT or _INPUT
4365 * \param name VGPU10_NAME_FINAL_*_TESSFACTOR value
4366 */
4367static void
4368emit_tesslevel_declaration(struct svga_shader_emitter_v10 *emit,
4369                           unsigned index, unsigned opcodeType,
4370                           unsigned operandType, VGPU10_SYSTEM_NAME name,
4371                           SVGA3dDXSignatureSemanticName sgnName)
4372{
4373   VGPU10OpcodeToken0 opcode0;
4374   VGPU10OperandToken0 operand0;
4375   VGPU10NameToken name_token;
4376
4377   assert(emit->version >= 50);
4378   assert(name >= VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR ||
4379          (emit->key.tcs.prim_mode == PIPE_PRIM_LINES &&
4380           name == VGPU10_NAME_UNDEFINED));
4381   assert(name <= VGPU10_NAME_FINAL_LINE_DENSITY_TESSFACTOR);
4382
4383   assert(operandType == VGPU10_OPERAND_TYPE_OUTPUT ||
4384          operandType == VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT);
4385
4386   opcode0.value = operand0.value = name_token.value = 0;
4387
4388   opcode0.opcodeType = opcodeType;
4389   operand0.operandType = operandType;
4390   operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
4391   operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
4392   operand0.mask = VGPU10_OPERAND_4_COMPONENT_MASK_X;
4393   operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
4394   operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
4395
4396   name_token.name = name;
4397   emit_decl_instruction(emit, opcode0, operand0, name_token, index, 1);
4398
4399   /* Capture patch constant signature */
4400   struct svga_shader_signature *sgn = &emit->signature;
4401   SVGA3dDXShaderSignatureEntry *sgnEntry =
4402      &sgn->patchConstants[sgn->header.numPatchConstantSignatures++];
4403   set_shader_signature_entry(sgnEntry, index,
4404                              sgnName, VGPU10_OPERAND_4_COMPONENT_MASK_X,
4405                              SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
4406                              SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
4407}
4408
4409
4410/**
4411 * Emit output declarations for tessellation control shader.
4412 */
4413static void
4414emit_tcs_output_declarations(struct svga_shader_emitter_v10 *emit)
4415{
4416   unsigned int i;
4417   unsigned outputIndex = emit->num_outputs;
4418   struct svga_shader_signature *sgn = &emit->signature;
4419
4420   /**
4421    * Initialize patch_generic_out_count so it won't be counted twice
4422    * since this function is called twice, one for control point phase
4423    * and another time for patch constant phase.
4424    */
4425   emit->tcs.patch_generic_out_count = 0;
4426
4427   for (i = 0; i < emit->info.num_outputs; i++) {
4428      unsigned index = i;
4429      const enum tgsi_semantic semantic_name =
4430         emit->info.output_semantic_name[i];
4431
4432      switch (semantic_name) {
4433      case TGSI_SEMANTIC_TESSINNER:
4434         emit->tcs.inner.tgsi_index = i;
4435
4436         /* skip per-patch output declarations in control point phase */
4437         if (emit->tcs.control_point_phase)
4438            break;
4439
4440         emit->tcs.inner.out_index = outputIndex;
4441         switch (emit->key.tcs.prim_mode) {
4442         case PIPE_PRIM_QUADS:
4443            emit_tesslevel_declaration(emit, outputIndex++,
4444               VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4445               VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR,
4446               SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR);
4447
4448            emit_tesslevel_declaration(emit, outputIndex++,
4449               VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4450               VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR,
4451               SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR);
4452            break;
4453         case PIPE_PRIM_TRIANGLES:
4454            emit_tesslevel_declaration(emit, outputIndex++,
4455               VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4456               VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR,
4457               SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR);
4458            break;
4459         case PIPE_PRIM_LINES:
4460            break;
4461         default:
4462            debug_printf("Unsupported primitive type");
4463         }
4464         break;
4465
4466      case TGSI_SEMANTIC_TESSOUTER:
4467         emit->tcs.outer.tgsi_index = i;
4468
4469         /* skip per-patch output declarations in control point phase */
4470         if (emit->tcs.control_point_phase)
4471            break;
4472
4473         emit->tcs.outer.out_index = outputIndex;
4474         switch (emit->key.tcs.prim_mode) {
4475         case PIPE_PRIM_QUADS:
4476            for (int j = 0; j < 4; j++) {
4477               emit_tesslevel_declaration(emit, outputIndex++,
4478                  VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4479                  VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + j,
4480                  SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + j);
4481            }
4482            break;
4483         case PIPE_PRIM_TRIANGLES:
4484            for (int j = 0; j < 3; j++) {
4485               emit_tesslevel_declaration(emit, outputIndex++,
4486                  VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4487                  VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + j,
4488                  SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + j);
4489            }
4490            break;
4491         case PIPE_PRIM_LINES:
4492            for (int j = 0; j < 2; j++) {
4493               emit_tesslevel_declaration(emit, outputIndex++,
4494                  VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4495                  VGPU10_NAME_FINAL_LINE_DETAIL_TESSFACTOR + j,
4496                  SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DETAIL_TESSFACTOR + j);
4497            }
4498            break;
4499         default:
4500            debug_printf("Unsupported primitive type");
4501         }
4502         break;
4503
4504      case TGSI_SEMANTIC_PATCH:
4505         if (emit->tcs.patch_generic_out_index == INVALID_INDEX)
4506            emit->tcs.patch_generic_out_index= i;
4507         emit->tcs.patch_generic_out_count++;
4508
4509         /* skip per-patch output declarations in control point phase */
4510         if (emit->tcs.control_point_phase)
4511            break;
4512
4513         emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, index,
4514                                 VGPU10_NAME_UNDEFINED,
4515                                 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4516                                 FALSE,
4517                                 map_tgsi_semantic_to_sgn_name(semantic_name));
4518
4519         SVGA3dDXShaderSignatureEntry *sgnEntry =
4520            &sgn->patchConstants[sgn->header.numPatchConstantSignatures++];
4521         set_shader_signature_entry(sgnEntry, index,
4522                                    map_tgsi_semantic_to_sgn_name(semantic_name),
4523                                    VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4524                                    SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
4525                                    SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
4526
4527         break;
4528
4529      default:
4530         /* save the starting index of control point outputs */
4531         if (emit->tcs.control_point_out_index == INVALID_INDEX)
4532            emit->tcs.control_point_out_index = i;
4533         emit->tcs.control_point_out_count++;
4534
4535         /* skip control point output declarations in patch constant phase */
4536         if (!emit->tcs.control_point_phase)
4537            break;
4538
4539         emit_vertex_output_declaration(emit, i, emit->output_usage_mask[i],
4540                                        TRUE);
4541
4542      }
4543   }
4544
4545   if (emit->tcs.control_point_phase) {
4546      /**
4547       * Add missing control point output in control point phase.
4548       */
4549      if (emit->tcs.control_point_out_index == INVALID_INDEX) {
4550         /* use register index after tessellation factors */
4551         switch (emit->key.tcs.prim_mode) {
4552         case PIPE_PRIM_QUADS:
4553            emit->tcs.control_point_out_index = outputIndex + 6;
4554            break;
4555         case PIPE_PRIM_TRIANGLES:
4556            emit->tcs.control_point_out_index = outputIndex + 4;
4557            break;
4558         default:
4559            emit->tcs.control_point_out_index = outputIndex + 2;
4560            break;
4561         }
4562         emit->tcs.control_point_out_count++;
4563         emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV,
4564                                 emit->tcs.control_point_out_index,
4565                                 VGPU10_NAME_POSITION,
4566                                 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4567                                 TRUE,
4568                                 SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION);
4569
4570         /* If tcs does not output any control point output,
4571          * we can end the hull shader control point phase here
4572          * after emitting the default control point output.
4573          */
4574         emit->skip_instruction = TRUE;
4575      }
4576   }
4577   else {
4578      if (emit->tcs.outer.out_index == INVALID_INDEX) {
4579         /* since the TCS did not declare out outer tess level output register,
4580          * we declare it here for patch constant phase only.
4581          */
4582         emit->tcs.outer.out_index = outputIndex;
4583         if (emit->key.tcs.prim_mode == PIPE_PRIM_QUADS) {
4584            for (int i = 0; i < 4; i++) {
4585               emit_tesslevel_declaration(emit, outputIndex++,
4586                  VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4587                  VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i,
4588                  SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i);
4589            }
4590         }
4591         else if (emit->key.tcs.prim_mode == PIPE_PRIM_TRIANGLES) {
4592            for (int i = 0; i < 3; i++) {
4593               emit_tesslevel_declaration(emit, outputIndex++,
4594                  VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4595                  VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i,
4596                  SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i);
4597            }
4598         }
4599      }
4600
4601      if (emit->tcs.inner.out_index == INVALID_INDEX) {
4602         /* since the TCS did not declare out inner tess level output register,
4603          * we declare it here
4604          */
4605         emit->tcs.inner.out_index = outputIndex;
4606         if (emit->key.tcs.prim_mode == PIPE_PRIM_QUADS) {
4607            emit_tesslevel_declaration(emit, outputIndex++,
4608               VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4609               VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR,
4610               SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR);
4611            emit_tesslevel_declaration(emit, outputIndex++,
4612               VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4613               VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR,
4614               SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR);
4615         }
4616         else if (emit->key.tcs.prim_mode == PIPE_PRIM_TRIANGLES) {
4617            emit_tesslevel_declaration(emit, outputIndex++,
4618               VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4619               VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR,
4620               SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR);
4621         }
4622      }
4623   }
4624   emit->num_outputs = outputIndex;
4625}
4626
4627
4628/**
4629 * Emit output declarations for tessellation evaluation shader.
4630 */
4631static void
4632emit_tes_output_declarations(struct svga_shader_emitter_v10 *emit)
4633{
4634   unsigned int i;
4635
4636   for (i = 0; i < emit->info.num_outputs; i++) {
4637      emit_vertex_output_declaration(emit, i, emit->output_usage_mask[i], TRUE);
4638   }
4639}
4640
4641
4642/**
4643 * Emit the declaration for a system value input/output.
4644 */
4645static void
4646emit_system_value_declaration(struct svga_shader_emitter_v10 *emit,
4647                              enum tgsi_semantic semantic_name, unsigned index)
4648{
4649   switch (semantic_name) {
4650   case TGSI_SEMANTIC_INSTANCEID:
4651      index = alloc_system_value_index(emit, index);
4652      emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV,
4653                             VGPU10_OPERAND_TYPE_INPUT,
4654                             VGPU10_OPERAND_INDEX_1D,
4655                             index, 1,
4656                             VGPU10_NAME_INSTANCE_ID,
4657                             VGPU10_OPERAND_4_COMPONENT,
4658                             VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4659                             VGPU10_OPERAND_4_COMPONENT_MASK_X,
4660                             VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4661                             map_tgsi_semantic_to_sgn_name(semantic_name));
4662      break;
4663   case TGSI_SEMANTIC_VERTEXID:
4664      emit->vs.vertex_id_sys_index = index;
4665      index = alloc_system_value_index(emit, index);
4666      emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV,
4667                             VGPU10_OPERAND_TYPE_INPUT,
4668                             VGPU10_OPERAND_INDEX_1D,
4669                             index, 1,
4670                             VGPU10_NAME_VERTEX_ID,
4671                             VGPU10_OPERAND_4_COMPONENT,
4672                             VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4673                             VGPU10_OPERAND_4_COMPONENT_MASK_X,
4674                             VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4675                             map_tgsi_semantic_to_sgn_name(semantic_name));
4676      break;
4677   case TGSI_SEMANTIC_SAMPLEID:
4678      assert(emit->unit == PIPE_SHADER_FRAGMENT);
4679      emit->fs.sample_id_sys_index = index;
4680      index = alloc_system_value_index(emit, index);
4681      emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_PS_SIV,
4682                             VGPU10_OPERAND_TYPE_INPUT,
4683                             VGPU10_OPERAND_INDEX_1D,
4684                             index, 1,
4685                             VGPU10_NAME_SAMPLE_INDEX,
4686                             VGPU10_OPERAND_4_COMPONENT,
4687                             VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4688                             VGPU10_OPERAND_4_COMPONENT_MASK_X,
4689                             VGPU10_INTERPOLATION_CONSTANT, TRUE,
4690                             map_tgsi_semantic_to_sgn_name(semantic_name));
4691      break;
4692   case TGSI_SEMANTIC_SAMPLEPOS:
4693      /* This system value contains the position of the current sample
4694       * when using per-sample shading.  We implement this by calling
4695       * the VGPU10_OPCODE_SAMPLE_POS instruction with the current sample
4696       * index as the argument.  See emit_sample_position_instructions().
4697       */
4698      assert(emit->version >= 41);
4699      emit->fs.sample_pos_sys_index = index;
4700      index = alloc_system_value_index(emit, index);
4701      break;
4702   case TGSI_SEMANTIC_INVOCATIONID:
4703      /* Note: invocation id input is mapped to different register depending
4704       * on the shader type. In GS, it will be mapped to vGSInstanceID#.
4705       * In TCS, it will be mapped to vOutputControlPointID#.
4706       * Since in both cases, the mapped name is unique rather than
4707       * just a generic input name ("v#"), so there is no need to remap
4708       * the index value.
4709       */
4710      assert(emit->unit == PIPE_SHADER_GEOMETRY ||
4711             emit->unit == PIPE_SHADER_TESS_CTRL);
4712      assert(emit->version >= 50);
4713
4714      if (emit->unit == PIPE_SHADER_GEOMETRY) {
4715         emit->gs.invocation_id_sys_index = index;
4716         emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4717                                VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID,
4718                                VGPU10_OPERAND_INDEX_0D,
4719                                index, 1,
4720                                VGPU10_NAME_UNDEFINED,
4721                                VGPU10_OPERAND_0_COMPONENT,
4722                                VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4723                                0,
4724                                VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4725                                SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
4726      } else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
4727         /* The emission of the control point id will be done
4728          * in the control point phase in emit_hull_shader_control_point_phase().
4729          */
4730         emit->tcs.invocation_id_sys_index = index;
4731      }
4732      break;
4733   case TGSI_SEMANTIC_SAMPLEMASK:
4734      /* Note: the PS sample mask input has a unique name ("vCoverage#")
4735       * rather than just a generic input name ("v#") so no need to remap the
4736       * index value.
4737       */
4738      assert(emit->unit == PIPE_SHADER_FRAGMENT);
4739      assert(emit->version >= 50);
4740      emit->fs.sample_mask_in_sys_index = index;
4741      emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4742                             VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK,
4743                             VGPU10_OPERAND_INDEX_0D,
4744                             index, 1,
4745                             VGPU10_NAME_UNDEFINED,
4746                             VGPU10_OPERAND_1_COMPONENT,
4747                             VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4748                             0,
4749                             VGPU10_INTERPOLATION_CONSTANT, TRUE,
4750                             SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
4751      break;
4752   case TGSI_SEMANTIC_TESSCOORD:
4753      assert(emit->version >= 50);
4754
4755      unsigned usageMask = 0;
4756
4757      if (emit->tes.prim_mode == PIPE_PRIM_TRIANGLES) {
4758         usageMask = VGPU10_OPERAND_4_COMPONENT_MASK_XYZ;
4759      }
4760      else if (emit->tes.prim_mode == PIPE_PRIM_LINES ||
4761               emit->tes.prim_mode == PIPE_PRIM_QUADS) {
4762         usageMask = VGPU10_OPERAND_4_COMPONENT_MASK_XY;
4763      }
4764
4765      emit->tes.tesscoord_sys_index = index;
4766      emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4767                             VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT,
4768                             VGPU10_OPERAND_INDEX_0D,
4769                             index, 1,
4770                             VGPU10_NAME_UNDEFINED,
4771                             VGPU10_OPERAND_4_COMPONENT,
4772                             VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4773                             usageMask,
4774                             VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4775                             SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
4776      break;
4777   case TGSI_SEMANTIC_TESSINNER:
4778      assert(emit->version >= 50);
4779      emit->tes.inner.tgsi_index = index;
4780      break;
4781   case TGSI_SEMANTIC_TESSOUTER:
4782      assert(emit->version >= 50);
4783      emit->tes.outer.tgsi_index = index;
4784      break;
4785   case TGSI_SEMANTIC_VERTICESIN:
4786      assert(emit->unit == PIPE_SHADER_TESS_CTRL);
4787      assert(emit->version >= 50);
4788
4789      /* save the system value index */
4790      emit->tcs.vertices_per_patch_index = index;
4791      break;
4792   case TGSI_SEMANTIC_PRIMID:
4793      assert(emit->version >= 50);
4794      if (emit->unit == PIPE_SHADER_TESS_CTRL) {
4795         emit->tcs.prim_id_index = index;
4796      }
4797      else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
4798         emit->tes.prim_id_index = index;
4799         emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4800                                VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID,
4801                                VGPU10_OPERAND_INDEX_0D,
4802                                index, 1,
4803                                VGPU10_NAME_UNDEFINED,
4804                                VGPU10_OPERAND_0_COMPONENT,
4805                                VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4806                                0,
4807                                VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4808                                map_tgsi_semantic_to_sgn_name(semantic_name));
4809      }
4810      break;
4811   case TGSI_SEMANTIC_THREAD_ID:
4812      assert(emit->unit >= PIPE_SHADER_COMPUTE);
4813      assert(emit->version >= 50);
4814      emit->cs.thread_id_index = index;
4815      emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4816                             VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP,
4817                             VGPU10_OPERAND_INDEX_0D,
4818                             index, 1,
4819                             VGPU10_NAME_UNDEFINED,
4820                             VGPU10_OPERAND_4_COMPONENT,
4821                             VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4822                             VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4823                             VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4824                             map_tgsi_semantic_to_sgn_name(semantic_name));
4825      break;
4826   case TGSI_SEMANTIC_BLOCK_ID:
4827      assert(emit->unit >= PIPE_SHADER_COMPUTE);
4828      assert(emit->version >= 50);
4829      emit->cs.block_id_index = index;
4830      emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4831                             VGPU10_OPERAND_TYPE_INPUT_THREAD_GROUP_ID,
4832                             VGPU10_OPERAND_INDEX_0D,
4833                             index, 1,
4834                             VGPU10_NAME_UNDEFINED,
4835                             VGPU10_OPERAND_4_COMPONENT,
4836                             VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4837                             VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4838                             VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4839                             map_tgsi_semantic_to_sgn_name(semantic_name));
4840      break;
4841   case TGSI_SEMANTIC_GRID_SIZE:
4842      assert(emit->unit == PIPE_SHADER_COMPUTE);
4843      assert(emit->version >= 50);
4844      emit->cs.grid_size.tgsi_index = index;
4845      break;
4846   default:
4847      debug_printf("unexpected system value semantic index %u / %s\n",
4848                   semantic_name, tgsi_semantic_names[semantic_name]);
4849   }
4850}
4851
4852/**
4853 * Translate a TGSI declaration to VGPU10.
4854 */
4855static boolean
4856emit_vgpu10_declaration(struct svga_shader_emitter_v10 *emit,
4857                        const struct tgsi_full_declaration *decl)
4858{
4859   switch (decl->Declaration.File) {
4860   case TGSI_FILE_INPUT:
4861      /* do nothing - see emit_input_declarations() */
4862      return TRUE;
4863
4864   case TGSI_FILE_OUTPUT:
4865      assert(decl->Range.First == decl->Range.Last);
4866      emit->output_usage_mask[decl->Range.First] = decl->Declaration.UsageMask;
4867      return TRUE;
4868
4869   case TGSI_FILE_TEMPORARY:
4870      /* Don't declare the temps here.  Just keep track of how many
4871       * and emit the declaration later.
4872       */
4873      if (decl->Declaration.Array) {
4874         /* Indexed temporary array.  Save the start index of the array
4875          * and the size of the array.
4876          */
4877         const unsigned arrayID = MIN2(decl->Array.ArrayID, MAX_TEMP_ARRAYS);
4878         assert(arrayID < ARRAY_SIZE(emit->temp_arrays));
4879
4880         /* Save this array so we can emit the declaration for it later */
4881         create_temp_array(emit, arrayID, decl->Range.First,
4882                           decl->Range.Last - decl->Range.First + 1,
4883                           decl->Range.First);
4884      }
4885
4886      /* for all temps, indexed or not, keep track of highest index */
4887      emit->num_shader_temps = MAX2(emit->num_shader_temps,
4888                                    decl->Range.Last + 1);
4889      return TRUE;
4890
4891   case TGSI_FILE_CONSTANT:
4892      /* Don't declare constants here.  Just keep track and emit later. */
4893      {
4894         unsigned constbuf = 0, num_consts;
4895         if (decl->Declaration.Dimension) {
4896            constbuf = decl->Dim.Index2D;
4897         }
4898         /* We throw an assertion here when, in fact, the shader should never
4899          * have linked due to constbuf index out of bounds, so we shouldn't
4900          * have reached here.
4901          */
4902         assert(constbuf < ARRAY_SIZE(emit->num_shader_consts));
4903
4904         num_consts = MAX2(emit->num_shader_consts[constbuf],
4905                           decl->Range.Last + 1);
4906
4907         if (num_consts > VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) {
4908            debug_printf("Warning: constant buffer is declared to size [%u]"
4909                         " but [%u] is the limit.\n",
4910                         num_consts,
4911                         VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT);
4912            emit->register_overflow = TRUE;
4913         }
4914         /* The linker doesn't enforce the max UBO size so we clamp here */
4915         emit->num_shader_consts[constbuf] =
4916            MIN2(num_consts, VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT);
4917      }
4918      return TRUE;
4919
4920   case TGSI_FILE_IMMEDIATE:
4921      assert(!"TGSI_FILE_IMMEDIATE not handled yet!");
4922      return FALSE;
4923
4924   case TGSI_FILE_SYSTEM_VALUE:
4925      emit_system_value_declaration(emit, decl->Semantic.Name,
4926                                    decl->Range.First);
4927      return TRUE;
4928
4929   case TGSI_FILE_SAMPLER:
4930      /* Don't declare samplers here.  Just keep track and emit later. */
4931      emit->num_samplers = MAX2(emit->num_samplers, decl->Range.Last + 1);
4932      return TRUE;
4933
4934#if 0
4935   case TGSI_FILE_RESOURCE:
4936      /*opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;*/
4937      /* XXX more, VGPU10_RETURN_TYPE_FLOAT */
4938      assert(!"TGSI_FILE_RESOURCE not handled yet");
4939      return FALSE;
4940#endif
4941
4942   case TGSI_FILE_ADDRESS:
4943      emit->num_address_regs = MAX2(emit->num_address_regs,
4944                                    decl->Range.Last + 1);
4945      return TRUE;
4946
4947   case TGSI_FILE_SAMPLER_VIEW:
4948      {
4949         unsigned unit = decl->Range.First;
4950         assert(decl->Range.First == decl->Range.Last);
4951         emit->sampler_target[unit] = decl->SamplerView.Resource;
4952
4953         /* Note: we can ignore YZW return types for now */
4954         emit->sampler_return_type[unit] = decl->SamplerView.ReturnTypeX;
4955         emit->sampler_view[unit] = TRUE;
4956      }
4957      return TRUE;
4958
4959   case TGSI_FILE_IMAGE:
4960      {
4961         unsigned unit = decl->Range.First;
4962         assert(decl->Range.First == decl->Range.Last);
4963         assert(unit < PIPE_MAX_SHADER_IMAGES);
4964         emit->image[unit] = decl->Image;
4965         emit->image_mask |= 1 << unit;
4966         emit->num_images++;
4967      }
4968      return TRUE;
4969
4970   case TGSI_FILE_HW_ATOMIC:
4971      /* Declare the atomic buffer if it is not already declared. */
4972      if (!(emit->atomic_bufs_mask & (1 << decl->Dim.Index2D))) {
4973         emit->num_atomic_bufs++;
4974         emit->atomic_bufs_mask |= (1 << decl->Dim.Index2D);
4975      }
4976
4977      /* Remember the maximum atomic counter index encountered */
4978      emit->max_atomic_counter_index =
4979         MAX2(emit->max_atomic_counter_index, decl->Range.Last);
4980      return TRUE;
4981
4982   case TGSI_FILE_MEMORY:
4983      /* Record memory has been used. */
4984      if (emit->unit == PIPE_SHADER_COMPUTE &&
4985          decl->Declaration.MemType == TGSI_MEMORY_TYPE_SHARED) {
4986         emit->cs.shared_memory_declared = TRUE;
4987      }
4988
4989      return TRUE;
4990
4991   case TGSI_FILE_BUFFER:
4992      assert(emit->version >= 50);
4993      emit->num_shader_bufs++;
4994      return TRUE;
4995
4996   default:
4997      assert(!"Unexpected type of declaration");
4998      return FALSE;
4999   }
5000}
5001
5002
5003/**
5004 * Emit input declarations for fragment shader.
5005 */
5006static void
5007emit_fs_input_declarations(struct svga_shader_emitter_v10 *emit)
5008{
5009   unsigned i;
5010
5011   for (i = 0; i < emit->linkage.num_inputs; i++) {
5012      enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i];
5013      unsigned usage_mask = emit->info.input_usage_mask[i];
5014      unsigned index = emit->linkage.input_map[i];
5015      unsigned type, interpolationMode, name;
5016      unsigned mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
5017
5018      if (usage_mask == 0)
5019         continue;  /* register is not actually used */
5020
5021      if (semantic_name == TGSI_SEMANTIC_POSITION) {
5022         /* fragment position input */
5023         type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
5024         interpolationMode = VGPU10_INTERPOLATION_LINEAR;
5025         name = VGPU10_NAME_POSITION;
5026         if (usage_mask & TGSI_WRITEMASK_W) {
5027            /* we need to replace use of 'w' with '1/w' */
5028            emit->fs.fragcoord_input_index = i;
5029         }
5030      }
5031      else if (semantic_name == TGSI_SEMANTIC_FACE) {
5032         /* fragment front-facing input */
5033         type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
5034         interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
5035         name = VGPU10_NAME_IS_FRONT_FACE;
5036         emit->fs.face_input_index = i;
5037      }
5038      else if (semantic_name == TGSI_SEMANTIC_PRIMID) {
5039         /* primitive ID */
5040         type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
5041         interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
5042         name = VGPU10_NAME_PRIMITIVE_ID;
5043      }
5044      else if (semantic_name == TGSI_SEMANTIC_SAMPLEID) {
5045         /* sample index / ID */
5046         type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
5047         interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
5048         name = VGPU10_NAME_SAMPLE_INDEX;
5049      }
5050      else if (semantic_name == TGSI_SEMANTIC_LAYER) {
5051         /* render target array index */
5052         if (emit->key.fs.layer_to_zero) {
5053            /**
5054             * The shader from the previous stage does not write to layer,
5055             * so reading the layer index in fragment shader should return 0.
5056             */
5057            emit->fs.layer_input_index = i;
5058            continue;
5059         } else {
5060            type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
5061            interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
5062            name = VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX;
5063            mask = VGPU10_OPERAND_4_COMPONENT_MASK_X;
5064         }
5065      }
5066      else if (semantic_name == TGSI_SEMANTIC_VIEWPORT_INDEX) {
5067         /* viewport index */
5068         type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
5069         interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
5070         name = VGPU10_NAME_VIEWPORT_ARRAY_INDEX;
5071         mask = VGPU10_OPERAND_4_COMPONENT_MASK_X;
5072      }
5073      else {
5074         /* general fragment input */
5075         type = VGPU10_OPCODE_DCL_INPUT_PS;
5076         interpolationMode =
5077               translate_interpolation(emit,
5078                                       emit->info.input_interpolate[i],
5079                                       emit->info.input_interpolate_loc[i]);
5080
5081         /* keeps track if flat interpolation mode is being used */
5082         emit->uses_flat_interp = emit->uses_flat_interp ||
5083               (interpolationMode == VGPU10_INTERPOLATION_CONSTANT);
5084
5085         name = VGPU10_NAME_UNDEFINED;
5086      }
5087
5088      emit_input_declaration(emit, type,
5089                             VGPU10_OPERAND_TYPE_INPUT,
5090                             VGPU10_OPERAND_INDEX_1D, index, 1,
5091                             name,
5092                             VGPU10_OPERAND_4_COMPONENT,
5093                             VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
5094                             mask,
5095                             interpolationMode, TRUE,
5096                             map_tgsi_semantic_to_sgn_name(semantic_name));
5097   }
5098}
5099
5100
5101/**
5102 * Emit input declarations for vertex shader.
5103 */
5104static void
5105emit_vs_input_declarations(struct svga_shader_emitter_v10 *emit)
5106{
5107   unsigned i;
5108
5109   for (i = 0; i < emit->info.file_max[TGSI_FILE_INPUT] + 1; i++) {
5110      unsigned usage_mask = emit->info.input_usage_mask[i];
5111      unsigned index = i;
5112
5113      if (usage_mask == 0)
5114         continue;  /* register is not actually used */
5115
5116      emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
5117                             VGPU10_OPERAND_TYPE_INPUT,
5118                             VGPU10_OPERAND_INDEX_1D, index, 1,
5119                             VGPU10_NAME_UNDEFINED,
5120                             VGPU10_OPERAND_4_COMPONENT,
5121                             VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
5122                             VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5123                             VGPU10_INTERPOLATION_UNDEFINED, TRUE,
5124                             SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
5125   }
5126}
5127
5128
5129/**
5130 * Emit input declarations for geometry shader.
5131 */
5132static void
5133emit_gs_input_declarations(struct svga_shader_emitter_v10 *emit)
5134{
5135   unsigned i;
5136
5137   for (i = 0; i < emit->info.num_inputs; i++) {
5138      enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i];
5139      unsigned usage_mask = emit->info.input_usage_mask[i];
5140      unsigned index = emit->linkage.input_map[i];
5141      unsigned opcodeType, operandType;
5142      unsigned numComp, selMode;
5143      unsigned name;
5144      unsigned dim;
5145
5146      if (usage_mask == 0)
5147         continue;  /* register is not actually used */
5148
5149      opcodeType = VGPU10_OPCODE_DCL_INPUT;
5150      operandType = VGPU10_OPERAND_TYPE_INPUT;
5151      numComp = VGPU10_OPERAND_4_COMPONENT;
5152      selMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
5153      name = VGPU10_NAME_UNDEFINED;
5154
5155      /* all geometry shader inputs are two dimensional except
5156       * gl_PrimitiveID
5157       */
5158      dim = VGPU10_OPERAND_INDEX_2D;
5159
5160      if (semantic_name == TGSI_SEMANTIC_PRIMID) {
5161         /* Primitive ID */
5162         operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
5163         dim = VGPU10_OPERAND_INDEX_0D;
5164         numComp = VGPU10_OPERAND_0_COMPONENT;
5165         selMode = 0;
5166
5167         /* also save the register index so we can check for
5168          * primitive id when emit src register. We need to modify the
5169          * operand type, index dimension when emit primitive id src reg.
5170          */
5171          emit->gs.prim_id_index = i;
5172      }
5173      else if (semantic_name == TGSI_SEMANTIC_POSITION) {
5174         /* vertex position input */
5175         opcodeType = VGPU10_OPCODE_DCL_INPUT_SIV;
5176         name = VGPU10_NAME_POSITION;
5177      }
5178
5179      emit_input_declaration(emit, opcodeType, operandType,
5180                             dim, index,
5181                             emit->gs.input_size,
5182                             name,
5183                             numComp, selMode,
5184                             VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5185                             VGPU10_INTERPOLATION_UNDEFINED, TRUE,
5186                             map_tgsi_semantic_to_sgn_name(semantic_name));
5187   }
5188}
5189
5190
5191/**
5192 * Emit input declarations for tessellation control shader.
5193 */
5194static void
5195emit_tcs_input_declarations(struct svga_shader_emitter_v10 *emit)
5196{
5197   unsigned i;
5198   unsigned size = emit->key.tcs.vertices_per_patch;
5199   unsigned indicesMask = 0;
5200   boolean addSignature = TRUE;
5201
5202   if (!emit->tcs.control_point_phase)
5203      addSignature = emit->tcs.fork_phase_add_signature;
5204
5205   for (i = 0; i < emit->info.num_inputs; i++) {
5206      unsigned usage_mask = emit->info.input_usage_mask[i];
5207      unsigned index = emit->linkage.input_map[i];
5208      enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i];
5209      VGPU10_SYSTEM_NAME name = VGPU10_NAME_UNDEFINED;
5210      VGPU10_OPERAND_TYPE operandType = VGPU10_OPERAND_TYPE_INPUT;
5211      SVGA3dDXSignatureSemanticName sgn_name =
5212         map_tgsi_semantic_to_sgn_name(semantic_name);
5213
5214      /* indices that are declared */
5215      indicesMask |= 1 << index;
5216
5217      if (semantic_name == TGSI_SEMANTIC_POSITION ||
5218          index == emit->linkage.position_index) {
5219         /* save the input control point index for later use */
5220         emit->tcs.control_point_input_index = i;
5221      }
5222      else if (usage_mask == 0) {
5223         continue;  /* register is not actually used */
5224      }
5225      else if (semantic_name == TGSI_SEMANTIC_CLIPDIST) {
5226         /* The shadow copy is being used here. So set the signature name
5227          * to UNDEFINED.
5228          */
5229         sgn_name = SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED;
5230      }
5231
5232      /* input control points in the patch constant phase are emitted in the
5233       * vicp register rather than the v register.
5234       */
5235      if (!emit->tcs.control_point_phase) {
5236         operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT;
5237      }
5238
5239      /* Tessellation control shader inputs are two dimensional.
5240       * The array size is determined by the patch vertex count.
5241       */
5242      emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
5243                             operandType,
5244                             VGPU10_OPERAND_INDEX_2D,
5245                             index, size, name,
5246                             VGPU10_OPERAND_4_COMPONENT,
5247                             VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
5248                             VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5249                             VGPU10_INTERPOLATION_UNDEFINED,
5250                             addSignature, sgn_name);
5251   }
5252
5253   if (emit->tcs.control_point_phase) {
5254
5255      /* Also add an address register for the indirection to the
5256       * input control points
5257       */
5258      emit->tcs.control_point_addr_index = emit->num_address_regs++;
5259   }
5260}
5261
5262
5263static void
5264emit_tessfactor_input_declarations(struct svga_shader_emitter_v10 *emit)
5265{
5266
5267   /* In tcs, tess factors are emitted as extra outputs.
5268    * The starting register index for the tess factors is captured
5269    * in the compile key.
5270    */
5271   unsigned inputIndex = emit->key.tes.tessfactor_index;
5272
5273   if (emit->tes.prim_mode == PIPE_PRIM_QUADS) {
5274      if (emit->key.tes.need_tessouter) {
5275         emit->tes.outer.in_index = inputIndex;
5276         for (int i = 0; i < 4; i++) {
5277            emit_tesslevel_declaration(emit, inputIndex++,
5278               VGPU10_OPCODE_DCL_INPUT_SIV,
5279               VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5280               VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i,
5281               SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i);
5282         }
5283      }
5284
5285      if (emit->key.tes.need_tessinner) {
5286         emit->tes.inner.in_index = inputIndex;
5287         emit_tesslevel_declaration(emit, inputIndex++,
5288            VGPU10_OPCODE_DCL_INPUT_SIV,
5289            VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5290            VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR,
5291            SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR);
5292
5293         emit_tesslevel_declaration(emit, inputIndex++,
5294            VGPU10_OPCODE_DCL_INPUT_SIV,
5295            VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5296            VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR,
5297            SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR);
5298      }
5299   }
5300   else if (emit->tes.prim_mode == PIPE_PRIM_TRIANGLES) {
5301      if (emit->key.tes.need_tessouter) {
5302         emit->tes.outer.in_index = inputIndex;
5303         for (int i = 0; i < 3; i++) {
5304            emit_tesslevel_declaration(emit, inputIndex++,
5305               VGPU10_OPCODE_DCL_INPUT_SIV,
5306               VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5307               VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i,
5308               SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i);
5309         }
5310      }
5311
5312      if (emit->key.tes.need_tessinner) {
5313         emit->tes.inner.in_index = inputIndex;
5314         emit_tesslevel_declaration(emit, inputIndex++,
5315            VGPU10_OPCODE_DCL_INPUT_SIV,
5316            VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5317            VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR,
5318            SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR);
5319      }
5320   }
5321   else if (emit->tes.prim_mode == PIPE_PRIM_LINES) {
5322      if (emit->key.tes.need_tessouter) {
5323         emit->tes.outer.in_index = inputIndex;
5324         emit_tesslevel_declaration(emit, inputIndex++,
5325            VGPU10_OPCODE_DCL_INPUT_SIV,
5326            VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5327            VGPU10_NAME_FINAL_LINE_DETAIL_TESSFACTOR,
5328            SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DETAIL_TESSFACTOR);
5329
5330         emit_tesslevel_declaration(emit, inputIndex++,
5331            VGPU10_OPCODE_DCL_INPUT_SIV,
5332            VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5333            VGPU10_NAME_FINAL_LINE_DENSITY_TESSFACTOR,
5334            SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DENSITY_TESSFACTOR);
5335      }
5336   }
5337}
5338
5339
5340/**
5341 * Emit input declarations for tessellation evaluation shader.
5342 */
5343static void
5344emit_tes_input_declarations(struct svga_shader_emitter_v10 *emit)
5345{
5346   unsigned i;
5347
5348   for (i = 0; i < emit->info.num_inputs; i++) {
5349      unsigned usage_mask = emit->info.input_usage_mask[i];
5350      unsigned index = emit->linkage.input_map[i];
5351      unsigned size;
5352      const enum tgsi_semantic semantic_name =
5353         emit->info.input_semantic_name[i];
5354      SVGA3dDXSignatureSemanticName sgn_name;
5355      VGPU10_OPERAND_TYPE operandType;
5356      VGPU10_OPERAND_INDEX_DIMENSION dim;
5357
5358      if (usage_mask == 0)
5359         usage_mask = 1;  /* at least set usage mask to one */
5360
5361      if (semantic_name == TGSI_SEMANTIC_PATCH) {
5362         operandType = VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT;
5363         dim = VGPU10_OPERAND_INDEX_1D;
5364         size = 1;
5365         sgn_name = map_tgsi_semantic_to_sgn_name(semantic_name);
5366      }
5367      else {
5368         operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT;
5369         dim = VGPU10_OPERAND_INDEX_2D;
5370         size = emit->key.tes.vertices_per_patch;
5371         sgn_name = SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED;
5372      }
5373
5374      emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, operandType,
5375                             dim, index, size, VGPU10_NAME_UNDEFINED,
5376                             VGPU10_OPERAND_4_COMPONENT,
5377                             VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
5378                             VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5379                             VGPU10_INTERPOLATION_UNDEFINED,
5380                             TRUE, sgn_name);
5381   }
5382
5383   emit_tessfactor_input_declarations(emit);
5384
5385   /* DX spec requires DS input controlpoint/patch-constant signatures to match
5386    * the HS output controlpoint/patch-constant signatures exactly.
5387    * Add missing input declarations even if they are not used in the shader.
5388    */
5389   if (emit->linkage.num_inputs < emit->linkage.prevShader.num_outputs) {
5390      struct tgsi_shader_info *prevInfo = emit->prevShaderInfo;
5391      for (i = 0; i < emit->linkage.prevShader.num_outputs; i++) {
5392
5393          /* If a tcs output does not have a corresponding input register in
5394           * tes, add one.
5395           */
5396          if (emit->linkage.prevShader.output_map[i] >
5397              emit->linkage.input_map_max) {
5398             const enum tgsi_semantic sem_name = prevInfo->output_semantic_name[i];
5399
5400             if (sem_name == TGSI_SEMANTIC_PATCH) {
5401                emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
5402                                       VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5403                                       VGPU10_OPERAND_INDEX_1D,
5404                                       i, 1, VGPU10_NAME_UNDEFINED,
5405                                       VGPU10_OPERAND_4_COMPONENT,
5406                                       VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
5407                                       VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5408                                       VGPU10_INTERPOLATION_UNDEFINED,
5409                                       TRUE,
5410                                       map_tgsi_semantic_to_sgn_name(sem_name));
5411
5412             } else if (sem_name != TGSI_SEMANTIC_TESSINNER &&
5413                        sem_name != TGSI_SEMANTIC_TESSOUTER) {
5414                emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
5415                                       VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT,
5416                                       VGPU10_OPERAND_INDEX_2D,
5417                                       i, emit->key.tes.vertices_per_patch,
5418                                       VGPU10_NAME_UNDEFINED,
5419                                       VGPU10_OPERAND_4_COMPONENT,
5420                                       VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
5421                                       VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5422                                       VGPU10_INTERPOLATION_UNDEFINED,
5423                                       TRUE,
5424                                       map_tgsi_semantic_to_sgn_name(sem_name));
5425             }
5426             /* tessellation factors are taken care of in
5427              * emit_tessfactor_input_declarations().
5428              */
5429         }
5430      }
5431   }
5432}
5433
5434
5435/**
5436 * Emit all input declarations.
5437 */
5438static boolean
5439emit_input_declarations(struct svga_shader_emitter_v10 *emit)
5440{
5441   emit->index_range.required =
5442      emit->info.indirect_files & (1 << TGSI_FILE_INPUT) ? TRUE : FALSE;
5443
5444   switch (emit->unit) {
5445   case PIPE_SHADER_FRAGMENT:
5446      emit_fs_input_declarations(emit);
5447      break;
5448   case PIPE_SHADER_GEOMETRY:
5449      emit_gs_input_declarations(emit);
5450      break;
5451   case PIPE_SHADER_VERTEX:
5452      emit_vs_input_declarations(emit);
5453      break;
5454   case PIPE_SHADER_TESS_CTRL:
5455      emit_tcs_input_declarations(emit);
5456      break;
5457   case PIPE_SHADER_TESS_EVAL:
5458      emit_tes_input_declarations(emit);
5459      break;
5460   case PIPE_SHADER_COMPUTE:
5461      //XXX emit_cs_input_declarations(emit);
5462      break;
5463   default:
5464      assert(0);
5465   }
5466
5467   if (emit->index_range.start_index != INVALID_INDEX) {
5468      emit_index_range_declaration(emit);
5469   }
5470   emit->index_range.required = FALSE;
5471   return TRUE;
5472}
5473
5474
5475/**
5476 * Emit all output declarations.
5477 */
5478static boolean
5479emit_output_declarations(struct svga_shader_emitter_v10 *emit)
5480{
5481   emit->index_range.required =
5482      emit->info.indirect_files & (1 << TGSI_FILE_OUTPUT) ? TRUE : FALSE;
5483
5484   switch (emit->unit) {
5485   case PIPE_SHADER_FRAGMENT:
5486      emit_fs_output_declarations(emit);
5487      break;
5488   case PIPE_SHADER_GEOMETRY:
5489      emit_gs_output_declarations(emit);
5490      break;
5491   case PIPE_SHADER_VERTEX:
5492      emit_vs_output_declarations(emit);
5493      break;
5494   case PIPE_SHADER_TESS_CTRL:
5495      emit_tcs_output_declarations(emit);
5496      break;
5497   case PIPE_SHADER_TESS_EVAL:
5498      emit_tes_output_declarations(emit);
5499      break;
5500   case PIPE_SHADER_COMPUTE:
5501      //XXX emit_cs_output_declarations(emit);
5502      break;
5503   default:
5504      assert(0);
5505   }
5506
5507   if (emit->vposition.so_index != INVALID_INDEX &&
5508       emit->vposition.out_index != INVALID_INDEX) {
5509
5510      assert(emit->unit != PIPE_SHADER_FRAGMENT);
5511
5512      /* Emit the declaration for the non-adjusted vertex position
5513       * for stream output purpose
5514       */
5515      emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
5516                              emit->vposition.so_index,
5517                              VGPU10_NAME_UNDEFINED,
5518                              VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5519                              TRUE,
5520                              SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION);
5521   }
5522
5523   if (emit->clip_dist_so_index != INVALID_INDEX &&
5524       emit->clip_dist_out_index != INVALID_INDEX) {
5525
5526      assert(emit->unit != PIPE_SHADER_FRAGMENT);
5527
5528      /* Emit the declaration for the clip distance shadow copy which
5529       * will be used for stream output purpose and for clip distance
5530       * varying variable. Note all clip distances
5531       * will be written regardless of the enabled clipping planes.
5532       */
5533      emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
5534                              emit->clip_dist_so_index,
5535                              VGPU10_NAME_UNDEFINED,
5536                              VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5537                              TRUE,
5538                              SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
5539
5540      if (emit->info.num_written_clipdistance > 4) {
5541         /* for the second clip distance register, each handles 4 planes */
5542         emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
5543                                 emit->clip_dist_so_index + 1,
5544                                 VGPU10_NAME_UNDEFINED,
5545                                 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5546                                 TRUE,
5547                                 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
5548      }
5549   }
5550
5551   if (emit->index_range.start_index != INVALID_INDEX) {
5552      emit_index_range_declaration(emit);
5553   }
5554   emit->index_range.required = FALSE;
5555   return TRUE;
5556}
5557
5558
5559/**
5560 * A helper function to create a temporary indexable array
5561 * and initialize the corresponding entries in the temp_map array.
5562 */
5563static void
5564create_temp_array(struct svga_shader_emitter_v10 *emit,
5565                  unsigned arrayID, unsigned first, unsigned count,
5566                  unsigned startIndex)
5567{
5568   unsigned i, tempIndex = startIndex;
5569
5570   emit->num_temp_arrays = MAX2(emit->num_temp_arrays, arrayID + 1);
5571   assert(emit->num_temp_arrays <= MAX_TEMP_ARRAYS);
5572   emit->num_temp_arrays = MIN2(emit->num_temp_arrays, MAX_TEMP_ARRAYS);
5573
5574   emit->temp_arrays[arrayID].start = first;
5575   emit->temp_arrays[arrayID].size = count;
5576
5577   /* Fill in the temp_map entries for this temp array */
5578   for (i = 0; i < count; i++, tempIndex++) {
5579      emit->temp_map[tempIndex].arrayId = arrayID;
5580      emit->temp_map[tempIndex].index = i;
5581   }
5582}
5583
5584
5585/**
5586 * Emit the declaration for the temporary registers.
5587 */
5588static boolean
5589emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit)
5590{
5591   unsigned total_temps, reg, i;
5592
5593   total_temps = emit->num_shader_temps;
5594
5595   /* If there is indirect access to non-indexable temps in the shader,
5596    * convert those temps to indexable temps. This works around a bug
5597    * in the GLSL->TGSI translator exposed in piglit test
5598    * glsl-1.20/execution/fs-const-array-of-struct-of-array.shader_test.
5599    * Internal temps added by the driver remain as non-indexable temps.
5600    */
5601   if ((emit->info.indirect_files & (1 << TGSI_FILE_TEMPORARY)) &&
5602       emit->num_temp_arrays == 0) {
5603      create_temp_array(emit, 1, 0, total_temps, 0);
5604   }
5605
5606   /* Allocate extra temps for specially-implemented instructions,
5607    * such as LIT.
5608    */
5609   total_temps += MAX_INTERNAL_TEMPS;
5610
5611   /* Allocate extra temps for clip distance or clip vertex.
5612    */
5613   if (emit->clip_mode == CLIP_DISTANCE) {
5614      /* We need to write the clip distance to a temporary register
5615       * first. Then it will be copied to the shadow copy for
5616       * the clip distance varying variable and stream output purpose.
5617       * It will also be copied to the actual CLIPDIST register
5618       * according to the enabled clip planes
5619       */
5620      emit->clip_dist_tmp_index = total_temps++;
5621      if (emit->info.num_written_clipdistance > 4)
5622         total_temps++; /* second clip register */
5623   }
5624   else if (emit->clip_mode == CLIP_VERTEX && emit->key.last_vertex_stage) {
5625      /* If the current shader is in the last vertex processing stage,
5626       * We need to convert the TGSI CLIPVERTEX output to one or more
5627       * clip distances.  Allocate a temp reg for the clipvertex here.
5628       */
5629      assert(emit->info.writes_clipvertex > 0);
5630      emit->clip_vertex_tmp_index = total_temps;
5631      total_temps++;
5632   }
5633
5634   if (emit->info.uses_vertexid) {
5635      assert(emit->unit == PIPE_SHADER_VERTEX);
5636      emit->vs.vertex_id_tmp_index = total_temps++;
5637   }
5638
5639   if (emit->unit == PIPE_SHADER_VERTEX || emit->unit == PIPE_SHADER_GEOMETRY) {
5640      if (emit->vposition.need_prescale || emit->key.vs.undo_viewport ||
5641          emit->key.clip_plane_enable ||
5642          emit->vposition.so_index != INVALID_INDEX) {
5643         emit->vposition.tmp_index = total_temps;
5644         total_temps += 1;
5645      }
5646
5647      if (emit->vposition.need_prescale) {
5648         emit->vposition.prescale_scale_index = total_temps++;
5649         emit->vposition.prescale_trans_index = total_temps++;
5650      }
5651
5652      if (emit->unit == PIPE_SHADER_VERTEX) {
5653         unsigned attrib_mask = (emit->key.vs.adjust_attrib_w_1 |
5654                                 emit->key.vs.adjust_attrib_itof |
5655                                 emit->key.vs.adjust_attrib_utof |
5656                                 emit->key.vs.attrib_is_bgra |
5657                                 emit->key.vs.attrib_puint_to_snorm |
5658                                 emit->key.vs.attrib_puint_to_uscaled |
5659                                 emit->key.vs.attrib_puint_to_sscaled);
5660         while (attrib_mask) {
5661            unsigned index = u_bit_scan(&attrib_mask);
5662            emit->vs.adjusted_input[index] = total_temps++;
5663         }
5664      }
5665      else if (emit->unit == PIPE_SHADER_GEOMETRY) {
5666         if (emit->key.gs.writes_viewport_index)
5667            emit->gs.viewport_index_tmp_index = total_temps++;
5668      }
5669   }
5670   else if (emit->unit == PIPE_SHADER_FRAGMENT) {
5671      if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS ||
5672          emit->key.fs.write_color0_to_n_cbufs > 1) {
5673         /* Allocate a temp to hold the output color */
5674         emit->fs.color_tmp_index = total_temps;
5675         total_temps += 1;
5676      }
5677
5678      if (emit->fs.face_input_index != INVALID_INDEX) {
5679         /* Allocate a temp for the +/-1 face register */
5680         emit->fs.face_tmp_index = total_temps;
5681         total_temps += 1;
5682      }
5683
5684      if (emit->fs.fragcoord_input_index != INVALID_INDEX) {
5685         /* Allocate a temp for modified fragment position register */
5686         emit->fs.fragcoord_tmp_index = total_temps;
5687         total_temps += 1;
5688      }
5689
5690      if (emit->fs.sample_pos_sys_index != INVALID_INDEX) {
5691         /* Allocate a temp for the sample position */
5692         emit->fs.sample_pos_tmp_index = total_temps++;
5693      }
5694   }
5695   else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
5696      if (emit->vposition.need_prescale) {
5697         emit->vposition.tmp_index = total_temps++;
5698         emit->vposition.prescale_scale_index = total_temps++;
5699         emit->vposition.prescale_trans_index = total_temps++;
5700      }
5701
5702      if (emit->tes.inner.tgsi_index) {
5703         emit->tes.inner.temp_index = total_temps;
5704         total_temps += 1;
5705      }
5706
5707      if (emit->tes.outer.tgsi_index) {
5708         emit->tes.outer.temp_index = total_temps;
5709         total_temps += 1;
5710      }
5711   }
5712   else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
5713      if (emit->tcs.inner.tgsi_index != INVALID_INDEX) {
5714         if (!emit->tcs.control_point_phase) {
5715            emit->tcs.inner.temp_index = total_temps;
5716            total_temps += 1;
5717         }
5718      }
5719      if (emit->tcs.outer.tgsi_index != INVALID_INDEX) {
5720         if (!emit->tcs.control_point_phase) {
5721            emit->tcs.outer.temp_index = total_temps;
5722            total_temps += 1;
5723         }
5724      }
5725
5726      if (emit->tcs.control_point_phase &&
5727          emit->info.reads_pervertex_outputs) {
5728         emit->tcs.control_point_tmp_index = total_temps;
5729         total_temps += emit->tcs.control_point_out_count;
5730      }
5731      else if (!emit->tcs.control_point_phase &&
5732               emit->info.reads_perpatch_outputs) {
5733
5734         /* If there is indirect access to the patch constant outputs
5735          * in the control point phase, then an indexable temporary array
5736          * will be created for these patch constant outputs.
5737          * Note, indirect access can only be applicable to
5738          * patch constant outputs in the control point phase.
5739          */
5740         if (emit->info.indirect_files & (1 << TGSI_FILE_OUTPUT)) {
5741            unsigned arrayID =
5742               emit->num_temp_arrays ? emit->num_temp_arrays : 1;
5743            create_temp_array(emit, arrayID, 0,
5744                              emit->tcs.patch_generic_out_count, total_temps);
5745         }
5746         emit->tcs.patch_generic_tmp_index = total_temps;
5747         total_temps += emit->tcs.patch_generic_out_count;
5748      }
5749
5750      emit->tcs.invocation_id_tmp_index = total_temps++;
5751   }
5752
5753   if (emit->raw_bufs) {
5754      /**
5755       * Add 3 more temporaries if we need to translate constant buffer
5756       * to srv raw buffer. Since we need to load the value to a temporary
5757       * before it can be used as a source. There could be three source
5758       * register in an instruction.
5759       */
5760      emit->raw_buf_tmp_index = total_temps;
5761      total_temps+=3;
5762   }
5763
5764   for (i = 0; i < emit->num_address_regs; i++) {
5765      emit->address_reg_index[i] = total_temps++;
5766   }
5767
5768   /* Initialize the temp_map array which maps TGSI temp indexes to VGPU10
5769    * temp indexes.  Basically, we compact all the non-array temp register
5770    * indexes into a consecutive series.
5771    *
5772    * Before, we may have some TGSI declarations like:
5773    *   DCL TEMP[0..1], LOCAL
5774    *   DCL TEMP[2..4], ARRAY(1), LOCAL
5775    *   DCL TEMP[5..7], ARRAY(2), LOCAL
5776    *   plus, some extra temps, like TEMP[8], TEMP[9] for misc things
5777    *
5778    * After, we'll have a map like this:
5779    *   temp_map[0] = { array 0, index 0 }
5780    *   temp_map[1] = { array 0, index 1 }
5781    *   temp_map[2] = { array 1, index 0 }
5782    *   temp_map[3] = { array 1, index 1 }
5783    *   temp_map[4] = { array 1, index 2 }
5784    *   temp_map[5] = { array 2, index 0 }
5785    *   temp_map[6] = { array 2, index 1 }
5786    *   temp_map[7] = { array 2, index 2 }
5787    *   temp_map[8] = { array 0, index 2 }
5788    *   temp_map[9] = { array 0, index 3 }
5789    *
5790    * We'll declare two arrays of 3 elements, plus a set of four non-indexed
5791    * temps numbered 0..3
5792    *
5793    * Any time we emit a temporary register index, we'll have to use the
5794    * temp_map[] table to convert the TGSI index to the VGPU10 index.
5795    *
5796    * Finally, we recompute the total_temps value here.
5797    */
5798   reg = 0;
5799   for (i = 0; i < total_temps; i++) {
5800      if (emit->temp_map[i].arrayId == 0) {
5801         emit->temp_map[i].index = reg++;
5802      }
5803   }
5804
5805   if (0) {
5806      debug_printf("total_temps %u\n", total_temps);
5807      for (i = 0; i < total_temps; i++) {
5808         debug_printf("temp %u ->  array %u  index %u\n",
5809                      i, emit->temp_map[i].arrayId, emit->temp_map[i].index);
5810      }
5811   }
5812
5813   total_temps = reg;
5814
5815   /* Emit declaration of ordinary temp registers */
5816   if (total_temps > 0) {
5817      VGPU10OpcodeToken0 opcode0;
5818
5819      opcode0.value = 0;
5820      opcode0.opcodeType = VGPU10_OPCODE_DCL_TEMPS;
5821
5822      begin_emit_instruction(emit);
5823      emit_dword(emit, opcode0.value);
5824      emit_dword(emit, total_temps);
5825      end_emit_instruction(emit);
5826   }
5827
5828   /* Emit declarations for indexable temp arrays.  Skip 0th entry since
5829    * it's unused.
5830    */
5831   for (i = 1; i < emit->num_temp_arrays; i++) {
5832      unsigned num_temps = emit->temp_arrays[i].size;
5833
5834      if (num_temps > 0) {
5835         VGPU10OpcodeToken0 opcode0;
5836
5837         opcode0.value = 0;
5838         opcode0.opcodeType = VGPU10_OPCODE_DCL_INDEXABLE_TEMP;
5839
5840         begin_emit_instruction(emit);
5841         emit_dword(emit, opcode0.value);
5842         emit_dword(emit, i); /* which array */
5843         emit_dword(emit, num_temps);
5844         emit_dword(emit, 4); /* num components */
5845         end_emit_instruction(emit);
5846
5847         total_temps += num_temps;
5848      }
5849   }
5850
5851   /* Check that the grand total of all regular and indexed temps is
5852    * under the limit.
5853    */
5854   check_register_index(emit, VGPU10_OPCODE_DCL_TEMPS, total_temps - 1);
5855
5856   return TRUE;
5857}
5858
5859
5860static boolean
5861emit_constant_declaration(struct svga_shader_emitter_v10 *emit)
5862{
5863   VGPU10OpcodeToken0 opcode0;
5864   VGPU10OperandToken0 operand0;
5865   unsigned total_consts, i;
5866
5867   opcode0.value = 0;
5868   opcode0.opcodeType = VGPU10_OPCODE_DCL_CONSTANT_BUFFER;
5869   opcode0.accessPattern = VGPU10_CB_IMMEDIATE_INDEXED;
5870   /* XXX or, access pattern = VGPU10_CB_DYNAMIC_INDEXED */
5871
5872   operand0.value = 0;
5873   operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
5874   operand0.indexDimension = VGPU10_OPERAND_INDEX_2D;
5875   operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
5876   operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
5877   operand0.operandType = VGPU10_OPERAND_TYPE_CONSTANT_BUFFER;
5878   operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
5879   operand0.swizzleX = 0;
5880   operand0.swizzleY = 1;
5881   operand0.swizzleZ = 2;
5882   operand0.swizzleW = 3;
5883
5884   /**
5885    * Emit declaration for constant buffer [0].  We also allocate
5886    * room for the extra constants here.
5887    */
5888   total_consts = emit->num_shader_consts[0];
5889
5890   /* Now, allocate constant slots for the "extra" constants.
5891    * Note: it's critical that these extra constant locations
5892    * exactly match what's emitted by the "extra" constants code
5893    * in svga_state_constants.c
5894    */
5895
5896   /* Vertex position scale/translation */
5897   if (emit->vposition.need_prescale) {
5898      emit->vposition.prescale_cbuf_index = total_consts;
5899      total_consts += (2 * emit->vposition.num_prescale);
5900   }
5901
5902   if (emit->unit == PIPE_SHADER_VERTEX) {
5903      if (emit->key.vs.undo_viewport) {
5904         emit->vs.viewport_index = total_consts++;
5905      }
5906      if (emit->key.vs.need_vertex_id_bias) {
5907         emit->vs.vertex_id_bias_index = total_consts++;
5908      }
5909   }
5910
5911   /* user-defined clip planes */
5912   if (emit->key.clip_plane_enable) {
5913      unsigned n = util_bitcount(emit->key.clip_plane_enable);
5914      assert(emit->unit != PIPE_SHADER_FRAGMENT &&
5915             emit->unit != PIPE_SHADER_COMPUTE);
5916      for (i = 0; i < n; i++) {
5917         emit->clip_plane_const[i] = total_consts++;
5918      }
5919   }
5920
5921   for (i = 0; i < emit->num_samplers; i++) {
5922
5923      if (emit->key.tex[i].sampler_view) {
5924         /* Texcoord scale factors for RECT textures */
5925         if (emit->key.tex[i].unnormalized) {
5926            emit->texcoord_scale_index[i] = total_consts++;
5927         }
5928
5929         /* Texture buffer sizes */
5930         if (emit->key.tex[i].target == PIPE_BUFFER) {
5931            emit->texture_buffer_size_index[i] = total_consts++;
5932         }
5933      }
5934   }
5935   if (emit->key.image_size_used) {
5936      emit->image_size_index = total_consts;
5937      total_consts += emit->num_images;
5938   }
5939
5940   if (total_consts > 0) {
5941      if (total_consts > VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) {
5942         debug_printf("Warning: Too many constants [%u] declared in constant"
5943                      " buffer 0. %u is the limit.\n",
5944                      total_consts,
5945                      VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT);
5946         total_consts = VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT;
5947         emit->register_overflow = TRUE;
5948      }
5949      begin_emit_instruction(emit);
5950      emit_dword(emit, opcode0.value);
5951      emit_dword(emit, operand0.value);
5952      emit_dword(emit, 0);  /* which const buffer slot */
5953      emit_dword(emit, total_consts);
5954      end_emit_instruction(emit);
5955   }
5956
5957   /* Declare remaining constant buffers (UBOs) */
5958
5959   for (i = 1; i < ARRAY_SIZE(emit->num_shader_consts); i++) {
5960      if (emit->num_shader_consts[i] > 0) {
5961         if (emit->raw_bufs & (1 << i)) {
5962            /* UBO declared as srv raw buffer */
5963
5964            VGPU10OpcodeToken0 opcode1;
5965            VGPU10OperandToken0 operand1;
5966
5967            opcode1.value = 0;
5968            opcode1.opcodeType = VGPU10_OPCODE_DCL_RESOURCE_RAW;
5969            opcode1.resourceDimension = VGPU10_RESOURCE_DIMENSION_UNKNOWN;
5970
5971            operand1.value = 0;
5972            operand1.numComponents = VGPU10_OPERAND_0_COMPONENT;
5973            operand1.operandType = VGPU10_OPERAND_TYPE_RESOURCE;
5974            operand1.indexDimension = VGPU10_OPERAND_INDEX_1D;
5975            operand1.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
5976
5977            begin_emit_instruction(emit);
5978            emit_dword(emit, opcode1.value);
5979            emit_dword(emit, operand1.value);
5980            emit_dword(emit, i + emit->raw_buf_srv_start_index);
5981            end_emit_instruction(emit);
5982         }
5983         else {
5984
5985            /* UBO declared as const buffer */
5986            begin_emit_instruction(emit);
5987            emit_dword(emit, opcode0.value);
5988            emit_dword(emit, operand0.value);
5989            emit_dword(emit, i);  /* which const buffer slot */
5990            emit_dword(emit, emit->num_shader_consts[i]);
5991            end_emit_instruction(emit);
5992         }
5993      }
5994   }
5995
5996   return TRUE;
5997}
5998
5999
6000/**
6001 * Emit declarations for samplers.
6002 */
6003static boolean
6004emit_sampler_declarations(struct svga_shader_emitter_v10 *emit)
6005{
6006   unsigned i;
6007
6008   for (i = 0; i < emit->key.num_samplers; i++) {
6009
6010      VGPU10OpcodeToken0 opcode0;
6011      VGPU10OperandToken0 operand0;
6012
6013      opcode0.value = 0;
6014      opcode0.opcodeType = VGPU10_OPCODE_DCL_SAMPLER;
6015      opcode0.samplerMode = VGPU10_SAMPLER_MODE_DEFAULT;
6016
6017      operand0.value = 0;
6018      operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
6019      operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER;
6020      operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
6021      operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
6022
6023      begin_emit_instruction(emit);
6024      emit_dword(emit, opcode0.value);
6025      emit_dword(emit, operand0.value);
6026      emit_dword(emit, i);
6027      end_emit_instruction(emit);
6028   }
6029
6030   return TRUE;
6031}
6032
6033
6034/**
6035 * Translate PIPE_TEXTURE_x to VGPU10_RESOURCE_DIMENSION_x.
6036 */
6037static unsigned
6038pipe_texture_to_resource_dimension(enum tgsi_texture_type target,
6039                                   unsigned num_samples,
6040                                   boolean is_array,
6041                                   boolean is_uav)
6042{
6043   switch (target) {
6044   case PIPE_BUFFER:
6045      return VGPU10_RESOURCE_DIMENSION_BUFFER;
6046   case PIPE_TEXTURE_1D:
6047      return VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
6048   case PIPE_TEXTURE_2D:
6049      return num_samples > 2 ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS :
6050         VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
6051   case PIPE_TEXTURE_RECT:
6052      return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
6053   case PIPE_TEXTURE_3D:
6054      return VGPU10_RESOURCE_DIMENSION_TEXTURE3D;
6055   case PIPE_TEXTURE_CUBE:
6056      return VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
6057   case PIPE_TEXTURE_1D_ARRAY:
6058      return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY
6059         : VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
6060   case PIPE_TEXTURE_2D_ARRAY:
6061      if (num_samples > 2 && is_array)
6062         return VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY;
6063      else if (is_array)
6064         return VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY;
6065      else
6066         return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
6067   case PIPE_TEXTURE_CUBE_ARRAY:
6068      return is_uav ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY :
6069             (is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY :
6070                         VGPU10_RESOURCE_DIMENSION_TEXTURECUBE);
6071   default:
6072      assert(!"Unexpected resource type");
6073      return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
6074   }
6075}
6076
6077
6078/**
6079 * Translate TGSI_TEXTURE_x to VGPU10_RESOURCE_DIMENSION_x.
6080 */
6081static unsigned
6082tgsi_texture_to_resource_dimension(enum tgsi_texture_type target,
6083                                   unsigned num_samples,
6084                                   boolean is_array,
6085                                   boolean is_uav)
6086{
6087   if (target == TGSI_TEXTURE_2D_MSAA && num_samples < 2) {
6088      target = TGSI_TEXTURE_2D;
6089   }
6090   else if (target == TGSI_TEXTURE_2D_ARRAY_MSAA && num_samples < 2) {
6091      target = TGSI_TEXTURE_2D_ARRAY;
6092   }
6093
6094   switch (target) {
6095   case TGSI_TEXTURE_BUFFER:
6096      return VGPU10_RESOURCE_DIMENSION_BUFFER;
6097   case TGSI_TEXTURE_1D:
6098      return VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
6099   case TGSI_TEXTURE_2D:
6100   case TGSI_TEXTURE_RECT:
6101      return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
6102   case TGSI_TEXTURE_3D:
6103      return VGPU10_RESOURCE_DIMENSION_TEXTURE3D;
6104   case TGSI_TEXTURE_CUBE:
6105   case TGSI_TEXTURE_SHADOWCUBE:
6106      return is_uav ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY :
6107                      VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
6108   case TGSI_TEXTURE_SHADOW1D:
6109      return VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
6110   case TGSI_TEXTURE_SHADOW2D:
6111   case TGSI_TEXTURE_SHADOWRECT:
6112      return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
6113   case TGSI_TEXTURE_1D_ARRAY:
6114   case TGSI_TEXTURE_SHADOW1D_ARRAY:
6115      return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY
6116         : VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
6117   case TGSI_TEXTURE_2D_ARRAY:
6118   case TGSI_TEXTURE_SHADOW2D_ARRAY:
6119      return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY
6120         : VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
6121   case TGSI_TEXTURE_2D_MSAA:
6122      return VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS;
6123   case TGSI_TEXTURE_2D_ARRAY_MSAA:
6124      return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY
6125         : VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS;
6126   case TGSI_TEXTURE_CUBE_ARRAY:
6127      return is_uav ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY :
6128             (is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY :
6129                         VGPU10_RESOURCE_DIMENSION_TEXTURECUBE);
6130   case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
6131      return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY
6132         : VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
6133   default:
6134      assert(!"Unexpected resource type");
6135      return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
6136   }
6137}
6138
6139
6140/**
6141 * Given a tgsi_return_type, return true iff it is an integer type.
6142 */
6143static boolean
6144is_integer_type(enum tgsi_return_type type)
6145{
6146   switch (type) {
6147      case TGSI_RETURN_TYPE_SINT:
6148      case TGSI_RETURN_TYPE_UINT:
6149         return TRUE;
6150      case TGSI_RETURN_TYPE_FLOAT:
6151      case TGSI_RETURN_TYPE_UNORM:
6152      case TGSI_RETURN_TYPE_SNORM:
6153         return FALSE;
6154      case TGSI_RETURN_TYPE_COUNT:
6155      default:
6156         assert(!"is_integer_type: Unknown tgsi_return_type");
6157         return FALSE;
6158   }
6159}
6160
6161
6162/**
6163 * Emit declarations for resources.
6164 * XXX When we're sure that all TGSI shaders will be generated with
6165 * sampler view declarations (Ex: DCL SVIEW[n], 2D, UINT) we may
6166 * rework this code.
6167 */
6168static boolean
6169emit_resource_declarations(struct svga_shader_emitter_v10 *emit)
6170{
6171   unsigned i;
6172
6173   /* Emit resource decl for each sampler */
6174   for (i = 0; i < emit->num_samplers; i++) {
6175      if (!(emit->info.samplers_declared & (1 << i)))
6176         continue;
6177
6178      VGPU10OpcodeToken0 opcode0;
6179      VGPU10OperandToken0 operand0;
6180      VGPU10ResourceReturnTypeToken return_type;
6181      VGPU10_RESOURCE_RETURN_TYPE rt;
6182
6183      opcode0.value = 0;
6184      opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;
6185      if (emit->sampler_view[i] || !emit->key.tex[i].sampler_view) {
6186         opcode0.resourceDimension =
6187            tgsi_texture_to_resource_dimension(emit->sampler_target[i],
6188                                               emit->key.tex[i].num_samples,
6189                                               emit->key.tex[i].is_array,
6190                                               FALSE);
6191      }
6192      else {
6193         opcode0.resourceDimension =
6194            pipe_texture_to_resource_dimension(emit->key.tex[i].target,
6195                                               emit->key.tex[i].num_samples,
6196                                               emit->key.tex[i].is_array,
6197                                               FALSE);
6198      }
6199      opcode0.sampleCount = emit->key.tex[i].num_samples;
6200      operand0.value = 0;
6201      operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
6202      operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE;
6203      operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
6204      operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
6205
6206#if 1
6207      /* convert TGSI_RETURN_TYPE_x to VGPU10_RETURN_TYPE_x */
6208      STATIC_ASSERT(VGPU10_RETURN_TYPE_UNORM == TGSI_RETURN_TYPE_UNORM + 1);
6209      STATIC_ASSERT(VGPU10_RETURN_TYPE_SNORM == TGSI_RETURN_TYPE_SNORM + 1);
6210      STATIC_ASSERT(VGPU10_RETURN_TYPE_SINT == TGSI_RETURN_TYPE_SINT + 1);
6211      STATIC_ASSERT(VGPU10_RETURN_TYPE_UINT == TGSI_RETURN_TYPE_UINT + 1);
6212      STATIC_ASSERT(VGPU10_RETURN_TYPE_FLOAT == TGSI_RETURN_TYPE_FLOAT + 1);
6213      assert(emit->sampler_return_type[i] <= TGSI_RETURN_TYPE_FLOAT);
6214      if (emit->sampler_view[i] || !emit->key.tex[i].sampler_view) {
6215         rt = emit->sampler_return_type[i] + 1;
6216      }
6217      else {
6218         rt = emit->key.tex[i].sampler_return_type;
6219      }
6220#else
6221      switch (emit->sampler_return_type[i]) {
6222         case TGSI_RETURN_TYPE_UNORM: rt = VGPU10_RETURN_TYPE_UNORM; break;
6223         case TGSI_RETURN_TYPE_SNORM: rt = VGPU10_RETURN_TYPE_SNORM; break;
6224         case TGSI_RETURN_TYPE_SINT:  rt = VGPU10_RETURN_TYPE_SINT;  break;
6225         case TGSI_RETURN_TYPE_UINT:  rt = VGPU10_RETURN_TYPE_UINT;  break;
6226         case TGSI_RETURN_TYPE_FLOAT: rt = VGPU10_RETURN_TYPE_FLOAT; break;
6227         case TGSI_RETURN_TYPE_COUNT:
6228         default:
6229            rt = VGPU10_RETURN_TYPE_FLOAT;
6230            assert(!"emit_resource_declarations: Unknown tgsi_return_type");
6231      }
6232#endif
6233
6234      return_type.value = 0;
6235      return_type.component0 = rt;
6236      return_type.component1 = rt;
6237      return_type.component2 = rt;
6238      return_type.component3 = rt;
6239
6240      begin_emit_instruction(emit);
6241      emit_dword(emit, opcode0.value);
6242      emit_dword(emit, operand0.value);
6243      emit_dword(emit, i);
6244      emit_dword(emit, return_type.value);
6245      end_emit_instruction(emit);
6246   }
6247
6248   return TRUE;
6249}
6250
6251
6252/**
6253 * Emit instruction to declare uav for the shader image
6254 */
6255static void
6256emit_image_declarations(struct svga_shader_emitter_v10 *emit)
6257{
6258   unsigned i = 0;
6259   unsigned unit = 0;
6260   unsigned uav_mask = 0;
6261
6262   /* Emit uav decl for each image */
6263   for (i = 0; i < emit->num_images; i++, unit++) {
6264
6265      /* Find the unit index of the next declared image.
6266       */
6267      while (!(emit->image_mask & (1 << unit))) {
6268         unit++;
6269      }
6270
6271      VGPU10OpcodeToken0 opcode0;
6272      VGPU10OperandToken0 operand0;
6273      VGPU10ResourceReturnTypeToken return_type;
6274
6275      /* If the corresponding uav for the image is already declared,
6276       * skip this image declaration.
6277       */
6278      if (uav_mask & (1 << emit->key.images[unit].uav_index))
6279         continue;
6280
6281      opcode0.value = 0;
6282      opcode0.opcodeType = VGPU10_OPCODE_DCL_UAV_TYPED;
6283      opcode0.uavResourceDimension =
6284         tgsi_texture_to_resource_dimension(emit->image[unit].Resource,
6285                                            0, emit->key.images[unit].is_array,
6286                                            TRUE);
6287
6288      if (emit->key.images[unit].is_single_layer &&
6289          emit->key.images[unit].resource_target == PIPE_TEXTURE_3D) {
6290         opcode0.uavResourceDimension = VGPU10_RESOURCE_DIMENSION_TEXTURE3D;
6291      }
6292
6293      /* Declare the uav as global coherent if the shader includes memory
6294       * barrier instructions.
6295       */
6296      opcode0.globallyCoherent =
6297         (emit->info.opcode_count[TGSI_OPCODE_MEMBAR] > 0) ? 1 : 0;
6298
6299      operand0.value = 0;
6300      operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
6301      operand0.operandType = VGPU10_OPERAND_TYPE_UAV;
6302      operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
6303      operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
6304
6305      return_type.value = 0;
6306      return_type.component0 =
6307         return_type.component1 =
6308         return_type.component2 =
6309         return_type.component3 = emit->key.images[unit].return_type + 1;
6310
6311      assert(emit->key.images[unit].uav_index != SVGA3D_INVALID_ID);
6312      begin_emit_instruction(emit);
6313      emit_dword(emit, opcode0.value);
6314      emit_dword(emit, operand0.value);
6315      emit_dword(emit, emit->key.images[unit].uav_index);
6316      emit_dword(emit, return_type.value);
6317      end_emit_instruction(emit);
6318
6319      /* Mark the uav is already declared */
6320      uav_mask |= 1 << emit->key.images[unit].uav_index;
6321   }
6322
6323   emit->uav_declared |= uav_mask;
6324}
6325
6326
6327/**
6328 * Emit instruction to declare uav for the shader buffer
6329 */
6330static void
6331emit_shader_buf_declarations(struct svga_shader_emitter_v10 *emit)
6332{
6333   unsigned i;
6334   unsigned uav_mask = 0;
6335
6336   /* Emit uav decl for each shader buffer */
6337   for (i = 0; i < emit->num_shader_bufs; i++) {
6338      VGPU10OpcodeToken0 opcode0;
6339      VGPU10OperandToken0 operand0;
6340
6341      /* If the corresponding uav for the shader buf is already declared,
6342       * skip this shader buffer declaration.
6343       */
6344      if (uav_mask & (1 << emit->key.shader_buf_uav_index[i]))
6345         continue;
6346
6347      opcode0.value = 0;
6348      opcode0.opcodeType = VGPU10_OPCODE_DCL_UAV_RAW;
6349
6350      /* Declare the uav as global coherent if the shader includes memory
6351       * barrier instructions.
6352       */
6353      opcode0.globallyCoherent =
6354         (emit->info.opcode_count[TGSI_OPCODE_MEMBAR] > 0) ? 1 : 0;
6355
6356      operand0.value = 0;
6357      operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
6358      operand0.operandType = VGPU10_OPERAND_TYPE_UAV;
6359      operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
6360      operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
6361
6362      assert(emit->key.shader_buf_uav_index[i] != SVGA3D_INVALID_ID);
6363      begin_emit_instruction(emit);
6364      emit_dword(emit, opcode0.value);
6365      emit_dword(emit, operand0.value);
6366      emit_dword(emit, emit->key.shader_buf_uav_index[i]);
6367      end_emit_instruction(emit);
6368
6369      /* Mark the uav is already declared */
6370      uav_mask |= 1 << emit->key.shader_buf_uav_index[i];
6371   }
6372
6373   emit->uav_declared |= uav_mask;
6374}
6375
6376
6377/**
6378 * Emit instruction to declare thread group shared memory(tgsm) for shared memory
6379 */
6380static void
6381emit_memory_declarations(struct svga_shader_emitter_v10 *emit)
6382{
6383   if (emit->cs.shared_memory_declared) {
6384      VGPU10OpcodeToken0 opcode0;
6385      VGPU10OperandToken0 operand0;
6386
6387      opcode0.value = 0;
6388      opcode0.opcodeType = VGPU10_OPCODE_DCL_TGSM_RAW;
6389
6390      /* Declare the uav as global coherent if the shader includes memory
6391       * barrier instructions.
6392       */
6393      opcode0.globallyCoherent =
6394         (emit->info.opcode_count[TGSI_OPCODE_MEMBAR] > 0) ? 1 : 0;
6395
6396      operand0.value = 0;
6397      operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
6398      operand0.operandType = VGPU10_OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY;
6399      operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
6400      operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
6401
6402      begin_emit_instruction(emit);
6403      emit_dword(emit, opcode0.value);
6404      emit_dword(emit, operand0.value);
6405
6406      /* Current state tracker only declares one shared memory for GLSL.
6407       * Use index 0 for this shared memory.
6408       */
6409      emit_dword(emit, 0);
6410      emit_dword(emit, emit->key.cs.mem_size); /* byte Count */
6411      end_emit_instruction(emit);
6412   }
6413}
6414
6415
6416/**
6417 * Emit instruction to declare uav for atomic buffers
6418 */
6419static void
6420emit_atomic_buf_declarations(struct svga_shader_emitter_v10 *emit)
6421{
6422   unsigned atomic_bufs_mask = emit->atomic_bufs_mask;
6423   unsigned uav_mask = 0;
6424
6425   /* Emit uav decl for each atomic buffer */
6426   while (atomic_bufs_mask) {
6427      unsigned buf_index = u_bit_scan(&atomic_bufs_mask);
6428      unsigned uav_index = emit->key.atomic_buf_uav_index[buf_index];
6429
6430      /* If the corresponding uav for the shader buf is already declared,
6431       * skip this shader buffer declaration.
6432       */
6433      if (uav_mask & (1 << uav_index))
6434         continue;
6435
6436      VGPU10OpcodeToken0 opcode0;
6437      VGPU10OperandToken0 operand0;
6438
6439      assert(uav_index != SVGA3D_INVALID_ID);
6440
6441      opcode0.value = 0;
6442      opcode0.opcodeType = VGPU10_OPCODE_DCL_UAV_RAW;
6443      opcode0.uavResourceDimension = VGPU10_RESOURCE_DIMENSION_BUFFER;
6444
6445      /* Declare the uav as global coherent if the shader includes memory
6446       * barrier instructions.
6447       */
6448      opcode0.globallyCoherent =
6449         (emit->info.opcode_count[TGSI_OPCODE_MEMBAR] > 0) ? 1 : 0;
6450      opcode0.uavHasCounter = 1;
6451
6452      operand0.value = 0;
6453      operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
6454      operand0.operandType = VGPU10_OPERAND_TYPE_UAV;
6455      operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
6456      operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
6457
6458      begin_emit_instruction(emit);
6459      emit_dword(emit, opcode0.value);
6460      emit_dword(emit, operand0.value);
6461      emit_dword(emit, uav_index);
6462      end_emit_instruction(emit);
6463
6464      /* Mark the uav is already declared */
6465      uav_mask |= 1 << uav_index;
6466   }
6467
6468   emit->uav_declared |= uav_mask;
6469
6470   /* Allocate immediates to be used for index to the atomic buffers */
6471   unsigned j = 0;
6472   for (unsigned i = 0; i <= emit->num_atomic_bufs / 4; i++, j+=4) {
6473      alloc_immediate_int4(emit, j+0, j+1, j+2, j+3);
6474   }
6475
6476   /* Allocate immediates for the atomic counter index */
6477   for (; j <= emit->max_atomic_counter_index; j+=4) {
6478      alloc_immediate_int4(emit, j+0, j+1, j+2, j+3);
6479   }
6480}
6481
6482
6483/**
6484 * Emit instruction with n=1, 2 or 3 source registers.
6485 */
6486static void
6487emit_instruction_opn(struct svga_shader_emitter_v10 *emit,
6488                     unsigned opcode,
6489                     const struct tgsi_full_dst_register *dst,
6490                     const struct tgsi_full_src_register *src1,
6491                     const struct tgsi_full_src_register *src2,
6492                     const struct tgsi_full_src_register *src3,
6493                     boolean saturate, bool precise)
6494{
6495   begin_emit_instruction(emit);
6496   emit_opcode_precise(emit, opcode, saturate, precise);
6497   emit_dst_register(emit, dst);
6498   emit_src_register(emit, src1);
6499   if (src2) {
6500      emit_src_register(emit, src2);
6501   }
6502   if (src3) {
6503      emit_src_register(emit, src3);
6504   }
6505   end_emit_instruction(emit);
6506}
6507
6508static void
6509emit_instruction_op1(struct svga_shader_emitter_v10 *emit,
6510                     unsigned opcode,
6511                     const struct tgsi_full_dst_register *dst,
6512                     const struct tgsi_full_src_register *src)
6513{
6514   emit_instruction_opn(emit, opcode, dst, src, NULL, NULL, FALSE, FALSE);
6515}
6516
6517static void
6518emit_instruction_op2(struct svga_shader_emitter_v10 *emit,
6519                     VGPU10_OPCODE_TYPE opcode,
6520                     const struct tgsi_full_dst_register *dst,
6521                     const struct tgsi_full_src_register *src1,
6522                     const struct tgsi_full_src_register *src2)
6523{
6524   emit_instruction_opn(emit, opcode, dst, src1, src2, NULL, FALSE, FALSE);
6525}
6526
6527static void
6528emit_instruction_op3(struct svga_shader_emitter_v10 *emit,
6529                     VGPU10_OPCODE_TYPE opcode,
6530                     const struct tgsi_full_dst_register *dst,
6531                     const struct tgsi_full_src_register *src1,
6532                     const struct tgsi_full_src_register *src2,
6533                     const struct tgsi_full_src_register *src3)
6534{
6535   emit_instruction_opn(emit, opcode, dst, src1, src2, src3, FALSE, FALSE);
6536}
6537
6538static void
6539emit_instruction_op0(struct svga_shader_emitter_v10 *emit,
6540                     VGPU10_OPCODE_TYPE opcode)
6541{
6542   begin_emit_instruction(emit);
6543   emit_opcode(emit, opcode, FALSE);
6544   end_emit_instruction(emit);
6545}
6546
6547/**
6548 * Tessellation inner/outer levels needs to be store into its
6549 * appropriate registers depending on prim_mode.
6550 */
6551static void
6552store_tesslevels(struct svga_shader_emitter_v10 *emit)
6553{
6554   int i;
6555
6556   /* tessellation levels are required input/out in hull shader.
6557    * emitting the inner/outer tessellation levels, either from
6558    * values provided in tcs or fallback default values which is 1.0
6559    */
6560   if (emit->key.tcs.prim_mode == PIPE_PRIM_QUADS) {
6561      struct tgsi_full_src_register temp_src;
6562
6563      if (emit->tcs.inner.tgsi_index != INVALID_INDEX)
6564         temp_src = make_src_temp_reg(emit->tcs.inner.temp_index);
6565      else
6566         temp_src = make_immediate_reg_float(emit, 1.0f);
6567
6568      for (i = 0; i < 2; i++) {
6569         struct tgsi_full_src_register src =
6570            scalar_src(&temp_src, TGSI_SWIZZLE_X + i);
6571         struct tgsi_full_dst_register dst =
6572            make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.inner.out_index + i);
6573         dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
6574         emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
6575      }
6576
6577      if (emit->tcs.outer.tgsi_index != INVALID_INDEX)
6578         temp_src = make_src_temp_reg(emit->tcs.outer.temp_index);
6579      else
6580         temp_src = make_immediate_reg_float(emit, 1.0f);
6581
6582      for (i = 0; i < 4; i++) {
6583         struct tgsi_full_src_register src =
6584            scalar_src(&temp_src, TGSI_SWIZZLE_X + i);
6585         struct tgsi_full_dst_register dst =
6586            make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.outer.out_index + i);
6587         dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
6588         emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
6589      }
6590   }
6591   else if (emit->key.tcs.prim_mode == PIPE_PRIM_TRIANGLES) {
6592      struct tgsi_full_src_register temp_src;
6593
6594      if (emit->tcs.inner.tgsi_index != INVALID_INDEX)
6595         temp_src = make_src_temp_reg(emit->tcs.inner.temp_index);
6596      else
6597         temp_src = make_immediate_reg_float(emit, 1.0f);
6598
6599      struct tgsi_full_src_register src =
6600         scalar_src(&temp_src, TGSI_SWIZZLE_X);
6601      struct tgsi_full_dst_register dst =
6602         make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.inner.out_index);
6603      dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
6604      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
6605
6606      if (emit->tcs.outer.tgsi_index != INVALID_INDEX)
6607         temp_src = make_src_temp_reg(emit->tcs.outer.temp_index);
6608      else
6609         temp_src = make_immediate_reg_float(emit, 1.0f);
6610
6611      for (i = 0; i < 3; i++) {
6612         struct tgsi_full_src_register src =
6613            scalar_src(&temp_src, TGSI_SWIZZLE_X + i);
6614         struct tgsi_full_dst_register dst =
6615            make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.outer.out_index + i);
6616         dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
6617         emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
6618      }
6619   }
6620   else if (emit->key.tcs.prim_mode ==  PIPE_PRIM_LINES) {
6621      if (emit->tcs.outer.tgsi_index != INVALID_INDEX) {
6622         struct tgsi_full_src_register temp_src =
6623            make_src_temp_reg(emit->tcs.outer.temp_index);
6624         for (i = 0; i < 2; i++) {
6625            struct tgsi_full_src_register src =
6626               scalar_src(&temp_src, TGSI_SWIZZLE_X + i);
6627            struct tgsi_full_dst_register dst =
6628               make_dst_reg(TGSI_FILE_OUTPUT,
6629                            emit->tcs.outer.out_index + i);
6630            dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
6631            emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
6632         }
6633      }
6634   }
6635   else {
6636      debug_printf("Unsupported primitive type");
6637   }
6638}
6639
6640
6641/**
6642 * Emit the actual clip distance instructions to be used for clipping
6643 * by copying the clip distance from the temporary registers to the
6644 * CLIPDIST registers written with the enabled planes mask.
6645 * Also copy the clip distance from the temporary to the clip distance
6646 * shadow copy register which will be referenced by the input shader
6647 */
6648static void
6649emit_clip_distance_instructions(struct svga_shader_emitter_v10 *emit)
6650{
6651   struct tgsi_full_src_register tmp_clip_dist_src;
6652   struct tgsi_full_dst_register clip_dist_dst;
6653
6654   unsigned i;
6655   unsigned clip_plane_enable = emit->key.clip_plane_enable;
6656   unsigned clip_dist_tmp_index = emit->clip_dist_tmp_index;
6657   int num_written_clipdist = emit->info.num_written_clipdistance;
6658
6659   assert(emit->clip_dist_out_index != INVALID_INDEX);
6660   assert(emit->clip_dist_tmp_index != INVALID_INDEX);
6661
6662   /**
6663    * Temporary reset the temporary clip dist register index so
6664    * that the copy to the real clip dist register will not
6665    * attempt to copy to the temporary register again
6666    */
6667   emit->clip_dist_tmp_index = INVALID_INDEX;
6668
6669   for (i = 0; i < 2 && num_written_clipdist > 0; i++, num_written_clipdist-=4) {
6670
6671      tmp_clip_dist_src = make_src_temp_reg(clip_dist_tmp_index + i);
6672
6673      /**
6674       * copy to the shadow copy for use by varying variable and
6675       * stream output. All clip distances
6676       * will be written regardless of the enabled clipping planes.
6677       */
6678      clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT,
6679                                   emit->clip_dist_so_index + i);
6680
6681      /* MOV clip_dist_so, tmp_clip_dist */
6682      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst,
6683                           &tmp_clip_dist_src);
6684
6685      /**
6686       * copy those clip distances to enabled clipping planes
6687       * to CLIPDIST registers for clipping
6688       */
6689      if (clip_plane_enable & 0xf) {
6690         clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT,
6691                                      emit->clip_dist_out_index + i);
6692         clip_dist_dst = writemask_dst(&clip_dist_dst, clip_plane_enable & 0xf);
6693
6694         /* MOV CLIPDIST, tmp_clip_dist */
6695         emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst,
6696                              &tmp_clip_dist_src);
6697      }
6698      /* four clip planes per clip register */
6699      clip_plane_enable >>= 4;
6700   }
6701   /**
6702    * set the temporary clip dist register index back to the
6703    * temporary index for the next vertex
6704    */
6705   emit->clip_dist_tmp_index = clip_dist_tmp_index;
6706}
6707
6708/* Declare clip distance output registers for user-defined clip planes
6709 * or the TGSI_CLIPVERTEX output.
6710 */
6711static void
6712emit_clip_distance_declarations(struct svga_shader_emitter_v10 *emit)
6713{
6714   unsigned num_clip_planes = util_bitcount(emit->key.clip_plane_enable);
6715   unsigned index = emit->num_outputs;
6716   unsigned plane_mask;
6717
6718   assert(emit->unit != PIPE_SHADER_FRAGMENT);
6719   assert(num_clip_planes <= 8);
6720
6721   if (emit->clip_mode != CLIP_LEGACY &&
6722       emit->clip_mode != CLIP_VERTEX) {
6723      return;
6724   }
6725
6726   if (num_clip_planes == 0)
6727      return;
6728
6729   /* Convert clip vertex to clip distances only in the last vertex stage */
6730   if (!emit->key.last_vertex_stage)
6731      return;
6732
6733   /* Declare one or two clip output registers.  The number of components
6734    * in the mask reflects the number of clip planes.  For example, if 5
6735    * clip planes are needed, we'll declare outputs similar to:
6736    * dcl_output_siv o2.xyzw, clip_distance
6737    * dcl_output_siv o3.x, clip_distance
6738    */
6739   emit->clip_dist_out_index = index; /* save the starting clip dist reg index */
6740
6741   plane_mask = (1 << num_clip_planes) - 1;
6742   if (plane_mask & 0xf) {
6743      unsigned cmask = plane_mask & VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
6744      emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index,
6745                              VGPU10_NAME_CLIP_DISTANCE, cmask, TRUE,
6746                              SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE);
6747      emit->num_outputs++;
6748   }
6749   if (plane_mask & 0xf0) {
6750      unsigned cmask = (plane_mask >> 4) & VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
6751      emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index + 1,
6752                              VGPU10_NAME_CLIP_DISTANCE, cmask, TRUE,
6753                              SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE);
6754      emit->num_outputs++;
6755   }
6756}
6757
6758
6759/**
6760 * Emit the instructions for writing to the clip distance registers
6761 * to handle legacy/automatic clip planes.
6762 * For each clip plane, the distance is the dot product of the vertex
6763 * position (found in TEMP[vpos_tmp_index]) and the clip plane coefficients.
6764 * This is not used when the shader has an explicit CLIPVERTEX or CLIPDISTANCE
6765 * output registers already declared.
6766 */
6767static void
6768emit_clip_distance_from_vpos(struct svga_shader_emitter_v10 *emit,
6769                             unsigned vpos_tmp_index)
6770{
6771   unsigned i, num_clip_planes = util_bitcount(emit->key.clip_plane_enable);
6772
6773   assert(emit->clip_mode == CLIP_LEGACY);
6774   assert(num_clip_planes <= 8);
6775
6776   assert(emit->unit == PIPE_SHADER_VERTEX ||
6777          emit->unit == PIPE_SHADER_GEOMETRY ||
6778          emit->unit == PIPE_SHADER_TESS_EVAL);
6779
6780   for (i = 0; i < num_clip_planes; i++) {
6781      struct tgsi_full_dst_register dst;
6782      struct tgsi_full_src_register plane_src, vpos_src;
6783      unsigned reg_index = emit->clip_dist_out_index + i / 4;
6784      unsigned comp = i % 4;
6785      unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp;
6786
6787      /* create dst, src regs */
6788      dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index);
6789      dst = writemask_dst(&dst, writemask);
6790
6791      plane_src = make_src_const_reg(emit->clip_plane_const[i]);
6792      vpos_src = make_src_temp_reg(vpos_tmp_index);
6793
6794      /* DP4 clip_dist, plane, vpos */
6795      emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst,
6796                           &plane_src, &vpos_src);
6797   }
6798}
6799
6800
6801/**
6802 * Emit the instructions for computing the clip distance results from
6803 * the clip vertex temporary.
6804 * For each clip plane, the distance is the dot product of the clip vertex
6805 * position (found in a temp reg) and the clip plane coefficients.
6806 */
6807static void
6808emit_clip_vertex_instructions(struct svga_shader_emitter_v10 *emit)
6809{
6810   const unsigned num_clip = util_bitcount(emit->key.clip_plane_enable);
6811   unsigned i;
6812   struct tgsi_full_dst_register dst;
6813   struct tgsi_full_src_register clipvert_src;
6814   const unsigned clip_vertex_tmp = emit->clip_vertex_tmp_index;
6815
6816   assert(emit->unit == PIPE_SHADER_VERTEX ||
6817          emit->unit == PIPE_SHADER_GEOMETRY ||
6818          emit->unit == PIPE_SHADER_TESS_EVAL);
6819
6820   assert(emit->clip_mode == CLIP_VERTEX);
6821
6822   clipvert_src = make_src_temp_reg(clip_vertex_tmp);
6823
6824   for (i = 0; i < num_clip; i++) {
6825      struct tgsi_full_src_register plane_src;
6826      unsigned reg_index = emit->clip_dist_out_index + i / 4;
6827      unsigned comp = i % 4;
6828      unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp;
6829
6830      /* create dst, src regs */
6831      dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index);
6832      dst = writemask_dst(&dst, writemask);
6833
6834      plane_src = make_src_const_reg(emit->clip_plane_const[i]);
6835
6836      /* DP4 clip_dist, plane, vpos */
6837      emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst,
6838                           &plane_src, &clipvert_src);
6839   }
6840
6841   /* copy temporary clip vertex register to the clip vertex register */
6842
6843   assert(emit->clip_vertex_out_index != INVALID_INDEX);
6844
6845   /**
6846    * temporary reset the temporary clip vertex register index so
6847    * that copy to the clip vertex register will not attempt
6848    * to copy to the temporary register again
6849    */
6850   emit->clip_vertex_tmp_index = INVALID_INDEX;
6851
6852   /* MOV clip_vertex, clip_vertex_tmp */
6853   dst = make_dst_reg(TGSI_FILE_OUTPUT, emit->clip_vertex_out_index);
6854   emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
6855                        &dst, &clipvert_src);
6856
6857   /**
6858    * set the temporary clip vertex register index back to the
6859    * temporary index for the next vertex
6860    */
6861   emit->clip_vertex_tmp_index = clip_vertex_tmp;
6862}
6863
6864/**
6865 * Emit code to convert RGBA to BGRA
6866 */
6867static void
6868emit_swap_r_b(struct svga_shader_emitter_v10 *emit,
6869                     const struct tgsi_full_dst_register *dst,
6870                     const struct tgsi_full_src_register *src)
6871{
6872   struct tgsi_full_src_register bgra_src =
6873      swizzle_src(src, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X, TGSI_SWIZZLE_W);
6874
6875   begin_emit_instruction(emit);
6876   emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
6877   emit_dst_register(emit, dst);
6878   emit_src_register(emit, &bgra_src);
6879   end_emit_instruction(emit);
6880}
6881
6882
6883/** Convert from 10_10_10_2 normalized to 10_10_10_2_snorm */
6884static void
6885emit_puint_to_snorm(struct svga_shader_emitter_v10 *emit,
6886                    const struct tgsi_full_dst_register *dst,
6887                    const struct tgsi_full_src_register *src)
6888{
6889   struct tgsi_full_src_register half = make_immediate_reg_float(emit, 0.5f);
6890   struct tgsi_full_src_register two =
6891      make_immediate_reg_float4(emit, 2.0f, 2.0f, 2.0f, 3.0f);
6892   struct tgsi_full_src_register neg_two =
6893      make_immediate_reg_float4(emit, -2.0f, -2.0f, -2.0f, -1.66666f);
6894
6895   unsigned val_tmp = get_temp_index(emit);
6896   struct tgsi_full_dst_register val_dst = make_dst_temp_reg(val_tmp);
6897   struct tgsi_full_src_register val_src = make_src_temp_reg(val_tmp);
6898
6899   unsigned bias_tmp = get_temp_index(emit);
6900   struct tgsi_full_dst_register bias_dst = make_dst_temp_reg(bias_tmp);
6901   struct tgsi_full_src_register bias_src = make_src_temp_reg(bias_tmp);
6902
6903   /* val = src * 2.0 */
6904   emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &val_dst, src, &two);
6905
6906   /* bias = src > 0.5 */
6907   emit_instruction_op2(emit, VGPU10_OPCODE_GE, &bias_dst, src, &half);
6908
6909   /* bias = bias & -2.0 */
6910   emit_instruction_op2(emit, VGPU10_OPCODE_AND, &bias_dst,
6911                        &bias_src, &neg_two);
6912
6913   /* dst = val + bias */
6914   emit_instruction_op2(emit, VGPU10_OPCODE_ADD, dst,
6915                        &val_src, &bias_src);
6916
6917   free_temp_indexes(emit);
6918}
6919
6920
6921/** Convert from 10_10_10_2_unorm to 10_10_10_2_uscaled */
6922static void
6923emit_puint_to_uscaled(struct svga_shader_emitter_v10 *emit,
6924                      const struct tgsi_full_dst_register *dst,
6925                      const struct tgsi_full_src_register *src)
6926{
6927   struct tgsi_full_src_register scale =
6928      make_immediate_reg_float4(emit, 1023.0f, 1023.0f, 1023.0f, 3.0f);
6929
6930   /* dst = src * scale */
6931   emit_instruction_op2(emit, VGPU10_OPCODE_MUL, dst, src, &scale);
6932}
6933
6934
6935/** Convert from R32_UINT to 10_10_10_2_sscaled */
6936static void
6937emit_puint_to_sscaled(struct svga_shader_emitter_v10 *emit,
6938                      const struct tgsi_full_dst_register *dst,
6939                      const struct tgsi_full_src_register *src)
6940{
6941   struct tgsi_full_src_register lshift =
6942      make_immediate_reg_int4(emit, 22, 12, 2, 0);
6943   struct tgsi_full_src_register rshift =
6944      make_immediate_reg_int4(emit, 22, 22, 22, 30);
6945
6946   struct tgsi_full_src_register src_xxxx = scalar_src(src, TGSI_SWIZZLE_X);
6947
6948   unsigned tmp = get_temp_index(emit);
6949   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
6950   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
6951
6952   /*
6953    * r = (pixel << 22) >> 22;   # signed int in [511, -512]
6954    * g = (pixel << 12) >> 22;   # signed int in [511, -512]
6955    * b = (pixel <<  2) >> 22;   # signed int in [511, -512]
6956    * a = (pixel <<  0) >> 30;   # signed int in [1, -2]
6957    * dst = i_to_f(r,g,b,a);     # convert to float
6958    */
6959   emit_instruction_op2(emit, VGPU10_OPCODE_ISHL, &tmp_dst,
6960                        &src_xxxx, &lshift);
6961   emit_instruction_op2(emit, VGPU10_OPCODE_ISHR, &tmp_dst,
6962                        &tmp_src, &rshift);
6963   emit_instruction_op1(emit, VGPU10_OPCODE_ITOF, dst, &tmp_src);
6964
6965   free_temp_indexes(emit);
6966}
6967
6968
6969/**
6970 * Emit code for TGSI_OPCODE_ARL or TGSI_OPCODE_UARL instruction.
6971 */
6972static boolean
6973emit_arl_uarl(struct svga_shader_emitter_v10 *emit,
6974              const struct tgsi_full_instruction *inst)
6975{
6976   unsigned index = inst->Dst[0].Register.Index;
6977   struct tgsi_full_dst_register dst;
6978   VGPU10_OPCODE_TYPE opcode;
6979
6980   assert(index < MAX_VGPU10_ADDR_REGS);
6981   dst = make_dst_temp_reg(emit->address_reg_index[index]);
6982   dst = writemask_dst(&dst, inst->Dst[0].Register.WriteMask);
6983
6984   /* ARL dst, s0
6985    * Translates into:
6986    * FTOI address_tmp, s0
6987    *
6988    * UARL dst, s0
6989    * Translates into:
6990    * MOV address_tmp, s0
6991    */
6992   if (inst->Instruction.Opcode == TGSI_OPCODE_ARL)
6993      opcode = VGPU10_OPCODE_FTOI;
6994   else
6995      opcode = VGPU10_OPCODE_MOV;
6996
6997   emit_instruction_op1(emit, opcode, &dst, &inst->Src[0]);
6998
6999   return TRUE;
7000}
7001
7002
7003/**
7004 * Emit code for TGSI_OPCODE_CAL instruction.
7005 */
7006static boolean
7007emit_cal(struct svga_shader_emitter_v10 *emit,
7008         const struct tgsi_full_instruction *inst)
7009{
7010   unsigned label = inst->Label.Label;
7011   VGPU10OperandToken0 operand;
7012   operand.value = 0;
7013   operand.operandType = VGPU10_OPERAND_TYPE_LABEL;
7014
7015   begin_emit_instruction(emit);
7016   emit_dword(emit, operand.value);
7017   emit_dword(emit, label);
7018   end_emit_instruction(emit);
7019
7020   return TRUE;
7021}
7022
7023
7024/**
7025 * Emit code for TGSI_OPCODE_IABS instruction.
7026 */
7027static boolean
7028emit_iabs(struct svga_shader_emitter_v10 *emit,
7029          const struct tgsi_full_instruction *inst)
7030{
7031   /* dst.x = (src0.x < 0) ? -src0.x : src0.x
7032    * dst.y = (src0.y < 0) ? -src0.y : src0.y
7033    * dst.z = (src0.z < 0) ? -src0.z : src0.z
7034    * dst.w = (src0.w < 0) ? -src0.w : src0.w
7035    *
7036    * Translates into
7037    *   IMAX dst, src, neg(src)
7038    */
7039   struct tgsi_full_src_register neg_src = negate_src(&inst->Src[0]);
7040   emit_instruction_op2(emit, VGPU10_OPCODE_IMAX, &inst->Dst[0],
7041                        &inst->Src[0], &neg_src);
7042
7043   return TRUE;
7044}
7045
7046
7047/**
7048 * Emit code for TGSI_OPCODE_CMP instruction.
7049 */
7050static boolean
7051emit_cmp(struct svga_shader_emitter_v10 *emit,
7052         const struct tgsi_full_instruction *inst)
7053{
7054   /* dst.x = (src0.x < 0) ? src1.x : src2.x
7055    * dst.y = (src0.y < 0) ? src1.y : src2.y
7056    * dst.z = (src0.z < 0) ? src1.z : src2.z
7057    * dst.w = (src0.w < 0) ? src1.w : src2.w
7058    *
7059    * Translates into
7060    *   LT tmp, src0, 0.0
7061    *   MOVC dst, tmp, src1, src2
7062    */
7063   struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7064   unsigned tmp = get_temp_index(emit);
7065   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7066   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7067
7068   emit_instruction_opn(emit, VGPU10_OPCODE_LT, &tmp_dst,
7069                        &inst->Src[0], &zero, NULL, FALSE,
7070                        inst->Instruction.Precise);
7071   emit_instruction_opn(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0],
7072                        &tmp_src, &inst->Src[1], &inst->Src[2],
7073                        inst->Instruction.Saturate, FALSE);
7074
7075   free_temp_indexes(emit);
7076
7077   return TRUE;
7078}
7079
7080
7081/**
7082 * Emit code for TGSI_OPCODE_DST instruction.
7083 */
7084static boolean
7085emit_dst(struct svga_shader_emitter_v10 *emit,
7086         const struct tgsi_full_instruction *inst)
7087{
7088   /*
7089    * dst.x = 1
7090    * dst.y = src0.y * src1.y
7091    * dst.z = src0.z
7092    * dst.w = src1.w
7093    */
7094
7095   struct tgsi_full_src_register s0_yyyy =
7096      scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
7097   struct tgsi_full_src_register s0_zzzz =
7098      scalar_src(&inst->Src[0], TGSI_SWIZZLE_Z);
7099   struct tgsi_full_src_register s1_yyyy =
7100      scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y);
7101   struct tgsi_full_src_register s1_wwww =
7102      scalar_src(&inst->Src[1], TGSI_SWIZZLE_W);
7103
7104   /*
7105    * If dst and either src0 and src1 are the same we need
7106    * to create a temporary for it and insert a extra move.
7107    */
7108   unsigned tmp_move = get_temp_index(emit);
7109   struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
7110   struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
7111
7112   /* MOV dst.x, 1.0 */
7113   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
7114      struct tgsi_full_dst_register dst_x =
7115         writemask_dst(&move_dst, TGSI_WRITEMASK_X);
7116      struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7117
7118      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one);
7119   }
7120
7121   /* MUL dst.y, s0.y, s1.y */
7122   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
7123      struct tgsi_full_dst_register dst_y =
7124         writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
7125
7126      emit_instruction_opn(emit, VGPU10_OPCODE_MUL, &dst_y, &s0_yyyy,
7127                           &s1_yyyy, NULL, inst->Instruction.Saturate,
7128                           inst->Instruction.Precise);
7129   }
7130
7131   /* MOV dst.z, s0.z */
7132   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
7133      struct tgsi_full_dst_register dst_z =
7134         writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
7135
7136      emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7137                           &dst_z, &s0_zzzz, NULL, NULL,
7138                           inst->Instruction.Saturate,
7139                           inst->Instruction.Precise);
7140  }
7141
7142   /* MOV dst.w, s1.w */
7143   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
7144      struct tgsi_full_dst_register dst_w =
7145         writemask_dst(&move_dst, TGSI_WRITEMASK_W);
7146
7147      emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7148                           &dst_w, &s1_wwww, NULL, NULL,
7149                           inst->Instruction.Saturate,
7150                           inst->Instruction.Precise);
7151   }
7152
7153   emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src);
7154   free_temp_indexes(emit);
7155
7156   return TRUE;
7157}
7158
7159
7160/**
7161 * A helper function to return the stream index as specified in
7162 * the immediate register
7163 */
7164static inline unsigned
7165find_stream_index(struct svga_shader_emitter_v10 *emit,
7166                  const struct tgsi_full_src_register *src)
7167{
7168   return emit->immediates[src->Register.Index][src->Register.SwizzleX].Int;
7169}
7170
7171
7172/**
7173 * Emit code for TGSI_OPCODE_ENDPRIM (GS only)
7174 */
7175static boolean
7176emit_endprim(struct svga_shader_emitter_v10 *emit,
7177             const struct tgsi_full_instruction *inst)
7178{
7179   assert(emit->unit == PIPE_SHADER_GEOMETRY);
7180
7181   begin_emit_instruction(emit);
7182   if (emit->version >= 50) {
7183      unsigned streamIndex = find_stream_index(emit, &inst->Src[0]);
7184
7185      if (emit->info.num_stream_output_components[streamIndex] == 0) {
7186         /**
7187          * If there is no output for this stream, discard this instruction.
7188          */
7189         emit->discard_instruction = TRUE;
7190      }
7191      else {
7192         emit_opcode(emit, VGPU10_OPCODE_CUT_STREAM, FALSE);
7193         assert(inst->Src[0].Register.File == TGSI_FILE_IMMEDIATE);
7194         emit_stream_register(emit, streamIndex);
7195      }
7196   }
7197   else {
7198      emit_opcode(emit, VGPU10_OPCODE_CUT, FALSE);
7199   }
7200   end_emit_instruction(emit);
7201   return TRUE;
7202}
7203
7204
7205/**
7206 * Emit code for TGSI_OPCODE_EX2 (2^x) instruction.
7207 */
7208static boolean
7209emit_ex2(struct svga_shader_emitter_v10 *emit,
7210         const struct tgsi_full_instruction *inst)
7211{
7212   /* Note that TGSI_OPCODE_EX2 computes only one value from src.x
7213    * while VGPU10 computes four values.
7214    *
7215    * dst = EX2(src):
7216    *   dst.xyzw = 2.0 ^ src.x
7217    */
7218
7219   struct tgsi_full_src_register src_xxxx =
7220      swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
7221                  TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
7222
7223   /* EXP tmp, s0.xxxx */
7224   emit_instruction_opn(emit, VGPU10_OPCODE_EXP, &inst->Dst[0], &src_xxxx,
7225                        NULL, NULL,
7226                        inst->Instruction.Saturate,
7227                        inst->Instruction.Precise);
7228
7229   return TRUE;
7230}
7231
7232
7233/**
7234 * Emit code for TGSI_OPCODE_EXP instruction.
7235 */
7236static boolean
7237emit_exp(struct svga_shader_emitter_v10 *emit,
7238         const struct tgsi_full_instruction *inst)
7239{
7240   /*
7241    * dst.x = 2 ^ floor(s0.x)
7242    * dst.y = s0.x - floor(s0.x)
7243    * dst.z = 2 ^ s0.x
7244    * dst.w = 1.0
7245    */
7246
7247   struct tgsi_full_src_register src_xxxx =
7248      scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
7249   unsigned tmp = get_temp_index(emit);
7250   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7251   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7252
7253   /*
7254    * If dst and src are the same we need to create
7255    * a temporary for it and insert a extra move.
7256    */
7257   unsigned tmp_move = get_temp_index(emit);
7258   struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
7259   struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
7260
7261   /* only use X component of temp reg */
7262   tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
7263   tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X);
7264
7265   /* ROUND_NI tmp.x, s0.x */
7266   emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst,
7267                        &src_xxxx); /* round to -infinity */
7268
7269   /* EXP dst.x, tmp.x */
7270   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
7271      struct tgsi_full_dst_register dst_x =
7272         writemask_dst(&move_dst, TGSI_WRITEMASK_X);
7273
7274      emit_instruction_opn(emit, VGPU10_OPCODE_EXP, &dst_x, &tmp_src,
7275                           NULL, NULL,
7276                           inst->Instruction.Saturate,
7277                           inst->Instruction.Precise);
7278   }
7279
7280   /* ADD dst.y, s0.x, -tmp */
7281   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
7282      struct tgsi_full_dst_register dst_y =
7283         writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
7284      struct tgsi_full_src_register neg_tmp_src = negate_src(&tmp_src);
7285
7286      emit_instruction_opn(emit, VGPU10_OPCODE_ADD, &dst_y, &src_xxxx,
7287                           &neg_tmp_src, NULL,
7288                           inst->Instruction.Saturate,
7289                           inst->Instruction.Precise);
7290   }
7291
7292   /* EXP dst.z, s0.x */
7293   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
7294      struct tgsi_full_dst_register dst_z =
7295         writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
7296
7297      emit_instruction_opn(emit, VGPU10_OPCODE_EXP, &dst_z, &src_xxxx,
7298                           NULL, NULL,
7299                           inst->Instruction.Saturate,
7300                           inst->Instruction.Precise);
7301   }
7302
7303   /* MOV dst.w, 1.0 */
7304   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
7305      struct tgsi_full_dst_register dst_w =
7306         writemask_dst(&move_dst, TGSI_WRITEMASK_W);
7307      struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7308
7309      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one);
7310   }
7311
7312   emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src);
7313
7314   free_temp_indexes(emit);
7315
7316   return TRUE;
7317}
7318
7319
7320/**
7321 * Emit code for TGSI_OPCODE_IF instruction.
7322 */
7323static boolean
7324emit_if(struct svga_shader_emitter_v10 *emit,
7325        const struct tgsi_full_src_register *src)
7326{
7327   VGPU10OpcodeToken0 opcode0;
7328
7329   /* The src register should be a scalar */
7330   assert(src->Register.SwizzleX == src->Register.SwizzleY &&
7331          src->Register.SwizzleX == src->Register.SwizzleZ &&
7332          src->Register.SwizzleX == src->Register.SwizzleW);
7333
7334   /* The only special thing here is that we need to set the
7335    * VGPU10_INSTRUCTION_TEST_NONZERO flag since we want to test if
7336    * src.x is non-zero.
7337    */
7338   opcode0.value = 0;
7339   opcode0.opcodeType = VGPU10_OPCODE_IF;
7340   opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO;
7341
7342   begin_emit_instruction(emit);
7343   emit_dword(emit, opcode0.value);
7344   emit_src_register(emit, src);
7345   end_emit_instruction(emit);
7346
7347   return TRUE;
7348}
7349
7350
7351/**
7352 * Emit code for conditional discard instruction (discard fragment if any of
7353 * the register components are negative).
7354 */
7355static boolean
7356emit_cond_discard(struct svga_shader_emitter_v10 *emit,
7357                  const struct tgsi_full_instruction *inst)
7358{
7359   unsigned tmp = get_temp_index(emit);
7360   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7361   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7362
7363   struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7364
7365   struct tgsi_full_dst_register tmp_dst_x =
7366      writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
7367   struct tgsi_full_src_register tmp_src_xxxx =
7368      scalar_src(&tmp_src, TGSI_SWIZZLE_X);
7369
7370   /* tmp = src[0] < 0.0 */
7371   emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0], &zero);
7372
7373   if (!same_swizzle_terms(&inst->Src[0])) {
7374      /* If the swizzle is not XXXX, YYYY, ZZZZ or WWWW we need to
7375       * logically OR the swizzle terms.  Most uses of this conditional
7376       * discard instruction only test one channel so it's good to
7377       * avoid these extra steps.
7378       */
7379      struct tgsi_full_src_register tmp_src_yyyy =
7380         scalar_src(&tmp_src, TGSI_SWIZZLE_Y);
7381      struct tgsi_full_src_register tmp_src_zzzz =
7382         scalar_src(&tmp_src, TGSI_SWIZZLE_Z);
7383      struct tgsi_full_src_register tmp_src_wwww =
7384         scalar_src(&tmp_src, TGSI_SWIZZLE_W);
7385
7386      emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
7387                           &tmp_src_yyyy);
7388      emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
7389                           &tmp_src_zzzz);
7390      emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
7391                           &tmp_src_wwww);
7392   }
7393
7394   begin_emit_instruction(emit);
7395   emit_discard_opcode(emit, TRUE); /* discard if src0.x is non-zero */
7396   emit_src_register(emit, &tmp_src_xxxx);
7397   end_emit_instruction(emit);
7398
7399   free_temp_indexes(emit);
7400
7401   return TRUE;
7402}
7403
7404
7405/**
7406 * Emit code for the unconditional discard instruction.
7407 */
7408static boolean
7409emit_discard(struct svga_shader_emitter_v10 *emit,
7410             const struct tgsi_full_instruction *inst)
7411{
7412   struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7413
7414   /* DISCARD if 0.0 is zero */
7415   begin_emit_instruction(emit);
7416   emit_discard_opcode(emit, FALSE);
7417   emit_src_register(emit, &zero);
7418   end_emit_instruction(emit);
7419
7420   return TRUE;
7421}
7422
7423
7424/**
7425 * Emit code for TGSI_OPCODE_LG2 instruction.
7426 */
7427static boolean
7428emit_lg2(struct svga_shader_emitter_v10 *emit,
7429         const struct tgsi_full_instruction *inst)
7430{
7431   /* Note that TGSI_OPCODE_LG2 computes only one value from src.x
7432    * while VGPU10 computes four values.
7433    *
7434    * dst = LG2(src):
7435    *   dst.xyzw = log2(src.x)
7436    */
7437
7438   struct tgsi_full_src_register src_xxxx =
7439      swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
7440                  TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
7441
7442   /* LOG tmp, s0.xxxx */
7443   emit_instruction_opn(emit, VGPU10_OPCODE_LOG,
7444                        &inst->Dst[0], &src_xxxx, NULL, NULL,
7445                        inst->Instruction.Saturate,
7446                        inst->Instruction.Precise);
7447
7448   return TRUE;
7449}
7450
7451
7452/**
7453 * Emit code for TGSI_OPCODE_LIT instruction.
7454 */
7455static boolean
7456emit_lit(struct svga_shader_emitter_v10 *emit,
7457         const struct tgsi_full_instruction *inst)
7458{
7459   struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7460
7461   /*
7462    * If dst and src are the same we need to create
7463    * a temporary for it and insert a extra move.
7464    */
7465   unsigned tmp_move = get_temp_index(emit);
7466   struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
7467   struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
7468
7469   /*
7470    * dst.x = 1
7471    * dst.y = max(src.x, 0)
7472    * dst.z = (src.x > 0) ? max(src.y, 0)^{clamp(src.w, -128, 128))} : 0
7473    * dst.w = 1
7474    */
7475
7476   /* MOV dst.x, 1.0 */
7477   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
7478      struct tgsi_full_dst_register dst_x =
7479         writemask_dst(&move_dst, TGSI_WRITEMASK_X);
7480      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one);
7481   }
7482
7483   /* MOV dst.w, 1.0 */
7484   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
7485      struct tgsi_full_dst_register dst_w =
7486         writemask_dst(&move_dst, TGSI_WRITEMASK_W);
7487      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one);
7488   }
7489
7490   /* MAX dst.y, src.x, 0.0 */
7491   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
7492      struct tgsi_full_dst_register dst_y =
7493         writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
7494      struct tgsi_full_src_register zero =
7495         make_immediate_reg_float(emit, 0.0f);
7496      struct tgsi_full_src_register src_xxxx =
7497         swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
7498                     TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
7499
7500      emit_instruction_opn(emit, VGPU10_OPCODE_MAX, &dst_y, &src_xxxx,
7501                           &zero, NULL, inst->Instruction.Saturate, FALSE);
7502   }
7503
7504   /*
7505    * tmp1 = clamp(src.w, -128, 128);
7506    *   MAX tmp1, src.w, -128
7507    *   MIN tmp1, tmp1, 128
7508    *
7509    * tmp2 = max(tmp2, 0);
7510    *   MAX tmp2, src.y, 0
7511    *
7512    * tmp1 = pow(tmp2, tmp1);
7513    *   LOG tmp2, tmp2
7514    *   MUL tmp1, tmp2, tmp1
7515    *   EXP tmp1, tmp1
7516    *
7517    * tmp1 = (src.w == 0) ? 1 : tmp1;
7518    *   EQ tmp2, 0, src.w
7519    *   MOVC tmp1, tmp2, 1.0, tmp1
7520    *
7521    * dst.z = (0 < src.x) ? tmp1 : 0;
7522    *   LT tmp2, 0, src.x
7523    *   MOVC dst.z, tmp2, tmp1, 0.0
7524    */
7525   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
7526      struct tgsi_full_dst_register dst_z =
7527         writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
7528
7529      unsigned tmp1 = get_temp_index(emit);
7530      struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
7531      struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
7532      unsigned tmp2 = get_temp_index(emit);
7533      struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
7534      struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
7535
7536      struct tgsi_full_src_register src_xxxx =
7537         scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
7538      struct tgsi_full_src_register src_yyyy =
7539         scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
7540      struct tgsi_full_src_register src_wwww =
7541         scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
7542
7543      struct tgsi_full_src_register zero =
7544         make_immediate_reg_float(emit, 0.0f);
7545      struct tgsi_full_src_register lowerbound =
7546         make_immediate_reg_float(emit, -128.0f);
7547      struct tgsi_full_src_register upperbound =
7548         make_immediate_reg_float(emit, 128.0f);
7549
7550      emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp1_dst, &src_wwww,
7551                           &lowerbound);
7552      emit_instruction_op2(emit, VGPU10_OPCODE_MIN, &tmp1_dst, &tmp1_src,
7553                           &upperbound);
7554      emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp2_dst, &src_yyyy,
7555                           &zero);
7556
7557      /* POW tmp1, tmp2, tmp1 */
7558      /* LOG tmp2, tmp2 */
7559      emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp2_dst, &tmp2_src);
7560
7561      /* MUL tmp1, tmp2, tmp1 */
7562      emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &tmp2_src,
7563                           &tmp1_src);
7564
7565      /* EXP tmp1, tmp1 */
7566      emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp1_dst, &tmp1_src);
7567
7568      /* EQ tmp2, 0, src.w */
7569      emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp2_dst, &zero, &src_wwww);
7570      /* MOVC tmp1.z, tmp2, tmp1, 1.0 */
7571      emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp1_dst,
7572                           &tmp2_src, &one, &tmp1_src);
7573
7574      /* LT tmp2, 0, src.x */
7575      emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp2_dst, &zero, &src_xxxx);
7576      /* MOVC dst.z, tmp2, tmp1, 0.0 */
7577      emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &dst_z,
7578                           &tmp2_src, &tmp1_src, &zero);
7579   }
7580
7581   emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src);
7582   free_temp_indexes(emit);
7583
7584   return TRUE;
7585}
7586
7587
7588/**
7589 * Emit Level Of Detail Query (LODQ) instruction.
7590 */
7591static boolean
7592emit_lodq(struct svga_shader_emitter_v10 *emit,
7593          const struct tgsi_full_instruction *inst)
7594{
7595   const uint unit = inst->Src[1].Register.Index;
7596
7597   assert(emit->version >= 41);
7598
7599   /* LOD dst, coord, resource, sampler */
7600   begin_emit_instruction(emit);
7601   emit_opcode(emit, VGPU10_OPCODE_LOD, FALSE);
7602   emit_dst_register(emit, &inst->Dst[0]);
7603   emit_src_register(emit, &inst->Src[0]); /* coord */
7604   emit_resource_register(emit, unit);
7605   emit_sampler_register(emit, unit);
7606   end_emit_instruction(emit);
7607
7608   return TRUE;
7609}
7610
7611
7612/**
7613 * Emit code for TGSI_OPCODE_LOG instruction.
7614 */
7615static boolean
7616emit_log(struct svga_shader_emitter_v10 *emit,
7617         const struct tgsi_full_instruction *inst)
7618{
7619   /*
7620    * dst.x = floor(lg2(abs(s0.x)))
7621    * dst.y = abs(s0.x) / (2 ^ floor(lg2(abs(s0.x))))
7622    * dst.z = lg2(abs(s0.x))
7623    * dst.w = 1.0
7624    */
7625
7626   struct tgsi_full_src_register src_xxxx =
7627      scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
7628   unsigned tmp = get_temp_index(emit);
7629   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7630   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7631   struct tgsi_full_src_register abs_src_xxxx = absolute_src(&src_xxxx);
7632
7633   /* only use X component of temp reg */
7634   tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
7635   tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X);
7636
7637   /* LOG tmp.x, abs(s0.x) */
7638   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) {
7639      emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp_dst, &abs_src_xxxx);
7640   }
7641
7642   /* MOV dst.z, tmp.x */
7643   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
7644      struct tgsi_full_dst_register dst_z =
7645         writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Z);
7646
7647      emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7648                           &dst_z, &tmp_src, NULL, NULL,
7649                           inst->Instruction.Saturate, FALSE);
7650   }
7651
7652   /* FLR tmp.x, tmp.x */
7653   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) {
7654      emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst, &tmp_src);
7655   }
7656
7657   /* MOV dst.x, tmp.x */
7658   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
7659      struct tgsi_full_dst_register dst_x =
7660         writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_X);
7661
7662      emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7663                           &dst_x, &tmp_src, NULL, NULL,
7664                           inst->Instruction.Saturate, FALSE);
7665   }
7666
7667   /* EXP tmp.x, tmp.x */
7668   /* DIV dst.y, abs(s0.x), tmp.x */
7669   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
7670      struct tgsi_full_dst_register dst_y =
7671         writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Y);
7672
7673      emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp_dst, &tmp_src);
7674      emit_instruction_opn(emit, VGPU10_OPCODE_DIV, &dst_y, &abs_src_xxxx,
7675                           &tmp_src, NULL, inst->Instruction.Saturate, FALSE);
7676   }
7677
7678   /* MOV dst.w, 1.0 */
7679   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
7680      struct tgsi_full_dst_register dst_w =
7681         writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_W);
7682      struct tgsi_full_src_register one =
7683         make_immediate_reg_float(emit, 1.0f);
7684
7685      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one);
7686   }
7687
7688   free_temp_indexes(emit);
7689
7690   return TRUE;
7691}
7692
7693
7694/**
7695 * Emit code for TGSI_OPCODE_LRP instruction.
7696 */
7697static boolean
7698emit_lrp(struct svga_shader_emitter_v10 *emit,
7699         const struct tgsi_full_instruction *inst)
7700{
7701   /* dst = LRP(s0, s1, s2):
7702    *   dst = s0 * (s1 - s2) + s2
7703    * Translates into:
7704    *   SUB tmp, s1, s2;        tmp = s1 - s2
7705    *   MAD dst, s0, tmp, s2;   dst = s0 * t1 + s2
7706    */
7707   unsigned tmp = get_temp_index(emit);
7708   struct tgsi_full_src_register src_tmp = make_src_temp_reg(tmp);
7709   struct tgsi_full_dst_register dst_tmp = make_dst_temp_reg(tmp);
7710   struct tgsi_full_src_register neg_src2 = negate_src(&inst->Src[2]);
7711
7712   /* ADD tmp, s1, -s2 */
7713   emit_instruction_opn(emit, VGPU10_OPCODE_ADD, &dst_tmp,
7714                        &inst->Src[1], &neg_src2, NULL, FALSE,
7715                        inst->Instruction.Precise);
7716
7717   /* MAD dst, s1, tmp, s3 */
7718   emit_instruction_opn(emit, VGPU10_OPCODE_MAD, &inst->Dst[0],
7719                        &inst->Src[0], &src_tmp, &inst->Src[2],
7720                        inst->Instruction.Saturate,
7721                        inst->Instruction.Precise);
7722
7723   free_temp_indexes(emit);
7724
7725   return TRUE;
7726}
7727
7728
7729/**
7730 * Emit code for TGSI_OPCODE_POW instruction.
7731 */
7732static boolean
7733emit_pow(struct svga_shader_emitter_v10 *emit,
7734         const struct tgsi_full_instruction *inst)
7735{
7736   /* Note that TGSI_OPCODE_POW computes only one value from src0.x and
7737    * src1.x while VGPU10 computes four values.
7738    *
7739    * dst = POW(src0, src1):
7740    *   dst.xyzw = src0.x ^ src1.x
7741    */
7742   unsigned tmp = get_temp_index(emit);
7743   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7744   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7745   struct tgsi_full_src_register src0_xxxx =
7746      swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
7747                  TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
7748   struct tgsi_full_src_register src1_xxxx =
7749      swizzle_src(&inst->Src[1], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
7750                  TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
7751
7752   /* LOG tmp, s0.xxxx */
7753   emit_instruction_opn(emit, VGPU10_OPCODE_LOG,
7754                        &tmp_dst, &src0_xxxx, NULL, NULL,
7755                        FALSE, inst->Instruction.Precise);
7756
7757   /* MUL tmp, tmp, s1.xxxx */
7758   emit_instruction_opn(emit, VGPU10_OPCODE_MUL,
7759                        &tmp_dst, &tmp_src, &src1_xxxx, NULL,
7760                        FALSE, inst->Instruction.Precise);
7761
7762   /* EXP tmp, s0.xxxx */
7763   emit_instruction_opn(emit, VGPU10_OPCODE_EXP,
7764                        &inst->Dst[0], &tmp_src, NULL, NULL,
7765                        inst->Instruction.Saturate,
7766                        inst->Instruction.Precise);
7767
7768   /* free tmp */
7769   free_temp_indexes(emit);
7770
7771   return TRUE;
7772}
7773
7774
7775/**
7776 * Emit code for TGSI_OPCODE_RCP (reciprocal) instruction.
7777 */
7778static boolean
7779emit_rcp(struct svga_shader_emitter_v10 *emit,
7780         const struct tgsi_full_instruction *inst)
7781{
7782   if (emit->version >= 50) {
7783      /* use new RCP instruction.  But VGPU10_OPCODE_RCP is component-wise
7784       * while TGSI_OPCODE_RCP computes dst.xyzw = 1.0 / src.xxxx so we need
7785       * to manipulate the src register's swizzle.
7786       */
7787      struct tgsi_full_src_register src = inst->Src[0];
7788      src.Register.SwizzleY =
7789      src.Register.SwizzleZ =
7790      src.Register.SwizzleW = src.Register.SwizzleX;
7791
7792      begin_emit_instruction(emit);
7793      emit_opcode_precise(emit, VGPU10_OPCODE_RCP,
7794                          inst->Instruction.Saturate,
7795                          inst->Instruction.Precise);
7796      emit_dst_register(emit, &inst->Dst[0]);
7797      emit_src_register(emit, &src);
7798      end_emit_instruction(emit);
7799   }
7800   else {
7801      struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7802
7803      unsigned tmp = get_temp_index(emit);
7804      struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7805      struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7806
7807      struct tgsi_full_dst_register tmp_dst_x =
7808         writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
7809      struct tgsi_full_src_register tmp_src_xxxx =
7810         scalar_src(&tmp_src, TGSI_SWIZZLE_X);
7811
7812      /* DIV tmp.x, 1.0, s0 */
7813      emit_instruction_opn(emit, VGPU10_OPCODE_DIV,
7814                           &tmp_dst_x, &one, &inst->Src[0], NULL,
7815                           FALSE, inst->Instruction.Precise);
7816
7817      /* MOV dst, tmp.xxxx */
7818      emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7819                           &inst->Dst[0], &tmp_src_xxxx, NULL, NULL,
7820                           inst->Instruction.Saturate,
7821                           inst->Instruction.Precise);
7822
7823      free_temp_indexes(emit);
7824   }
7825
7826   return TRUE;
7827}
7828
7829
7830/**
7831 * Emit code for TGSI_OPCODE_RSQ instruction.
7832 */
7833static boolean
7834emit_rsq(struct svga_shader_emitter_v10 *emit,
7835         const struct tgsi_full_instruction *inst)
7836{
7837   /* dst = RSQ(src):
7838    *   dst.xyzw = 1 / sqrt(src.x)
7839    * Translates into:
7840    *   RSQ tmp, src.x
7841    *   MOV dst, tmp.xxxx
7842    */
7843
7844   unsigned tmp = get_temp_index(emit);
7845   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7846   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7847
7848   struct tgsi_full_dst_register tmp_dst_x =
7849      writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
7850   struct tgsi_full_src_register tmp_src_xxxx =
7851      scalar_src(&tmp_src, TGSI_SWIZZLE_X);
7852
7853   /* RSQ tmp, src.x */
7854   emit_instruction_opn(emit, VGPU10_OPCODE_RSQ,
7855                        &tmp_dst_x, &inst->Src[0], NULL, NULL,
7856                        FALSE, inst->Instruction.Precise);
7857
7858   /* MOV dst, tmp.xxxx */
7859   emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7860                        &inst->Dst[0], &tmp_src_xxxx, NULL, NULL,
7861                        inst->Instruction.Saturate,
7862                        inst->Instruction.Precise);
7863
7864   /* free tmp */
7865   free_temp_indexes(emit);
7866
7867   return TRUE;
7868}
7869
7870
7871/**
7872 * Emit code for TGSI_OPCODE_SEQ (Set Equal) instruction.
7873 */
7874static boolean
7875emit_seq(struct svga_shader_emitter_v10 *emit,
7876         const struct tgsi_full_instruction *inst)
7877{
7878   /* dst = SEQ(s0, s1):
7879    *   dst = s0 == s1 ? 1.0 : 0.0  (per component)
7880    * Translates into:
7881    *   EQ tmp, s0, s1;           tmp = s0 == s1 : 0xffffffff : 0 (per comp)
7882    *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
7883    */
7884   unsigned tmp = get_temp_index(emit);
7885   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7886   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7887   struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7888   struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7889
7890   /* EQ tmp, s0, s1 */
7891   emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp_dst, &inst->Src[0],
7892                        &inst->Src[1]);
7893
7894   /* MOVC dst, tmp, one, zero */
7895   emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
7896                        &one, &zero);
7897
7898   free_temp_indexes(emit);
7899
7900   return TRUE;
7901}
7902
7903
7904/**
7905 * Emit code for TGSI_OPCODE_SGE (Set Greater than or Equal) instruction.
7906 */
7907static boolean
7908emit_sge(struct svga_shader_emitter_v10 *emit,
7909         const struct tgsi_full_instruction *inst)
7910{
7911   /* dst = SGE(s0, s1):
7912    *   dst = s0 >= s1 ? 1.0 : 0.0  (per component)
7913    * Translates into:
7914    *   GE tmp, s0, s1;           tmp = s0 >= s1 : 0xffffffff : 0 (per comp)
7915    *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
7916    */
7917   unsigned tmp = get_temp_index(emit);
7918   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7919   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7920   struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7921   struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7922
7923   /* GE tmp, s0, s1 */
7924   emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[0],
7925                        &inst->Src[1]);
7926
7927   /* MOVC dst, tmp, one, zero */
7928   emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
7929                        &one, &zero);
7930
7931   free_temp_indexes(emit);
7932
7933   return TRUE;
7934}
7935
7936
7937/**
7938 * Emit code for TGSI_OPCODE_SGT (Set Greater than) instruction.
7939 */
7940static boolean
7941emit_sgt(struct svga_shader_emitter_v10 *emit,
7942         const struct tgsi_full_instruction *inst)
7943{
7944   /* dst = SGT(s0, s1):
7945    *   dst = s0 > s1 ? 1.0 : 0.0  (per component)
7946    * Translates into:
7947    *   LT tmp, s1, s0;           tmp = s1 < s0 ? 0xffffffff : 0 (per comp)
7948    *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
7949    */
7950   unsigned tmp = get_temp_index(emit);
7951   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7952   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7953   struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7954   struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7955
7956   /* LT tmp, s1, s0 */
7957   emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[1],
7958                        &inst->Src[0]);
7959
7960   /* MOVC dst, tmp, one, zero */
7961   emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
7962                        &one, &zero);
7963
7964   free_temp_indexes(emit);
7965
7966   return TRUE;
7967}
7968
7969
7970/**
7971 * Emit code for TGSI_OPCODE_SIN and TGSI_OPCODE_COS instructions.
7972 */
7973static boolean
7974emit_sincos(struct svga_shader_emitter_v10 *emit,
7975         const struct tgsi_full_instruction *inst)
7976{
7977   unsigned tmp = get_temp_index(emit);
7978   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7979   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7980
7981   struct tgsi_full_src_register tmp_src_xxxx =
7982      scalar_src(&tmp_src, TGSI_SWIZZLE_X);
7983   struct tgsi_full_dst_register tmp_dst_x =
7984      writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
7985
7986   begin_emit_instruction(emit);
7987   emit_opcode(emit, VGPU10_OPCODE_SINCOS, FALSE);
7988
7989   if(inst->Instruction.Opcode == TGSI_OPCODE_SIN)
7990   {
7991      emit_dst_register(emit, &tmp_dst_x);  /* first destination register */
7992      emit_null_dst_register(emit);  /* second destination register */
7993   }
7994   else {
7995      emit_null_dst_register(emit);
7996      emit_dst_register(emit, &tmp_dst_x);
7997   }
7998
7999   emit_src_register(emit, &inst->Src[0]);
8000   end_emit_instruction(emit);
8001
8002   emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
8003                        &inst->Dst[0], &tmp_src_xxxx, NULL, NULL,
8004                        inst->Instruction.Saturate,
8005                        inst->Instruction.Precise);
8006
8007   free_temp_indexes(emit);
8008
8009   return TRUE;
8010}
8011
8012
8013/**
8014 * Emit code for TGSI_OPCODE_SLE (Set Less than or Equal) instruction.
8015 */
8016static boolean
8017emit_sle(struct svga_shader_emitter_v10 *emit,
8018         const struct tgsi_full_instruction *inst)
8019{
8020   /* dst = SLE(s0, s1):
8021    *   dst = s0 <= s1 ? 1.0 : 0.0  (per component)
8022    * Translates into:
8023    *   GE tmp, s1, s0;           tmp = s1 >= s0 : 0xffffffff : 0 (per comp)
8024    *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
8025    */
8026   unsigned tmp = get_temp_index(emit);
8027   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8028   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8029   struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
8030   struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
8031
8032   /* GE tmp, s1, s0 */
8033   emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[1],
8034                        &inst->Src[0]);
8035
8036   /* MOVC dst, tmp, one, zero */
8037   emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
8038                        &one, &zero);
8039
8040   free_temp_indexes(emit);
8041
8042   return TRUE;
8043}
8044
8045
8046/**
8047 * Emit code for TGSI_OPCODE_SLT (Set Less than) instruction.
8048 */
8049static boolean
8050emit_slt(struct svga_shader_emitter_v10 *emit,
8051         const struct tgsi_full_instruction *inst)
8052{
8053   /* dst = SLT(s0, s1):
8054    *   dst = s0 < s1 ? 1.0 : 0.0  (per component)
8055    * Translates into:
8056    *   LT tmp, s0, s1;           tmp = s0 < s1 ? 0xffffffff : 0 (per comp)
8057    *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
8058    */
8059   unsigned tmp = get_temp_index(emit);
8060   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8061   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8062   struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
8063   struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
8064
8065   /* LT tmp, s0, s1 */
8066   emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0],
8067                        &inst->Src[1]);
8068
8069   /* MOVC dst, tmp, one, zero */
8070   emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
8071                        &one, &zero);
8072
8073   free_temp_indexes(emit);
8074
8075   return TRUE;
8076}
8077
8078
8079/**
8080 * Emit code for TGSI_OPCODE_SNE (Set Not Equal) instruction.
8081 */
8082static boolean
8083emit_sne(struct svga_shader_emitter_v10 *emit,
8084         const struct tgsi_full_instruction *inst)
8085{
8086   /* dst = SNE(s0, s1):
8087    *   dst = s0 != s1 ? 1.0 : 0.0  (per component)
8088    * Translates into:
8089    *   EQ tmp, s0, s1;           tmp = s0 == s1 : 0xffffffff : 0 (per comp)
8090    *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
8091    */
8092   unsigned tmp = get_temp_index(emit);
8093   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8094   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8095   struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
8096   struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
8097
8098   /* NE tmp, s0, s1 */
8099   emit_instruction_op2(emit, VGPU10_OPCODE_NE, &tmp_dst, &inst->Src[0],
8100                        &inst->Src[1]);
8101
8102   /* MOVC dst, tmp, one, zero */
8103   emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
8104                        &one, &zero);
8105
8106   free_temp_indexes(emit);
8107
8108   return TRUE;
8109}
8110
8111
8112/**
8113 * Emit code for TGSI_OPCODE_SSG (Set Sign) instruction.
8114 */
8115static boolean
8116emit_ssg(struct svga_shader_emitter_v10 *emit,
8117         const struct tgsi_full_instruction *inst)
8118{
8119   /* dst.x = (src.x > 0.0) ? 1.0 : (src.x < 0.0) ? -1.0 : 0.0
8120    * dst.y = (src.y > 0.0) ? 1.0 : (src.y < 0.0) ? -1.0 : 0.0
8121    * dst.z = (src.z > 0.0) ? 1.0 : (src.z < 0.0) ? -1.0 : 0.0
8122    * dst.w = (src.w > 0.0) ? 1.0 : (src.w < 0.0) ? -1.0 : 0.0
8123    * Translates into:
8124    *   LT tmp1, src, zero;           tmp1 = src < zero ? 0xffffffff : 0 (per comp)
8125    *   MOVC tmp2, tmp1, -1.0, 0.0;   tmp2 = tmp1 ? -1.0 : 0.0 (per component)
8126    *   LT tmp1, zero, src;           tmp1 = zero < src ? 0xffffffff : 0 (per comp)
8127    *   MOVC dst, tmp1, 1.0, tmp2;    dst = tmp1 ? 1.0 : tmp2 (per component)
8128    */
8129   struct tgsi_full_src_register zero =
8130      make_immediate_reg_float(emit, 0.0f);
8131   struct tgsi_full_src_register one =
8132      make_immediate_reg_float(emit, 1.0f);
8133   struct tgsi_full_src_register neg_one =
8134      make_immediate_reg_float(emit, -1.0f);
8135
8136   unsigned tmp1 = get_temp_index(emit);
8137   struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
8138   struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
8139
8140   unsigned tmp2 = get_temp_index(emit);
8141   struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
8142   struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
8143
8144   emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &inst->Src[0],
8145                        &zero);
8146   emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp2_dst, &tmp1_src,
8147                        &neg_one, &zero);
8148   emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &zero,
8149                        &inst->Src[0]);
8150   emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp1_src,
8151                        &one, &tmp2_src);
8152
8153   free_temp_indexes(emit);
8154
8155   return TRUE;
8156}
8157
8158
8159/**
8160 * Emit code for TGSI_OPCODE_ISSG (Integer Set Sign) instruction.
8161 */
8162static boolean
8163emit_issg(struct svga_shader_emitter_v10 *emit,
8164          const struct tgsi_full_instruction *inst)
8165{
8166   /* dst.x = (src.x > 0) ? 1 : (src.x < 0) ? -1 : 0
8167    * dst.y = (src.y > 0) ? 1 : (src.y < 0) ? -1 : 0
8168    * dst.z = (src.z > 0) ? 1 : (src.z < 0) ? -1 : 0
8169    * dst.w = (src.w > 0) ? 1 : (src.w < 0) ? -1 : 0
8170    * Translates into:
8171    *   ILT tmp1, src, 0              tmp1 = src < 0 ? -1 : 0 (per component)
8172    *   ILT tmp2, 0, src              tmp2 = 0 < src ? -1 : 0 (per component)
8173    *   IADD dst, tmp1, neg(tmp2)     dst  = tmp1 - tmp2      (per component)
8174    */
8175   struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
8176
8177   unsigned tmp1 = get_temp_index(emit);
8178   struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
8179   struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
8180
8181   unsigned tmp2 = get_temp_index(emit);
8182   struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
8183   struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
8184
8185   struct tgsi_full_src_register neg_tmp2 = negate_src(&tmp2_src);
8186
8187   emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp1_dst,
8188                        &inst->Src[0], &zero);
8189   emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp2_dst,
8190                        &zero, &inst->Src[0]);
8191   emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &inst->Dst[0],
8192                        &tmp1_src, &neg_tmp2);
8193
8194   free_temp_indexes(emit);
8195
8196   return TRUE;
8197}
8198
8199
8200/**
8201 * Emit a comparison instruction.  The dest register will get
8202 * 0 or ~0 values depending on the outcome of comparing src0 to src1.
8203 */
8204static void
8205emit_comparison(struct svga_shader_emitter_v10 *emit,
8206                SVGA3dCmpFunc func,
8207                const struct tgsi_full_dst_register *dst,
8208                const struct tgsi_full_src_register *src0,
8209                const struct tgsi_full_src_register *src1)
8210{
8211   struct tgsi_full_src_register immediate;
8212   VGPU10OpcodeToken0 opcode0;
8213   boolean swapSrc = FALSE;
8214
8215   /* Sanity checks for svga vs. gallium enums */
8216   STATIC_ASSERT(SVGA3D_CMP_LESS == (PIPE_FUNC_LESS + 1));
8217   STATIC_ASSERT(SVGA3D_CMP_GREATEREQUAL == (PIPE_FUNC_GEQUAL + 1));
8218
8219   opcode0.value = 0;
8220
8221   switch (func) {
8222   case SVGA3D_CMP_NEVER:
8223      immediate = make_immediate_reg_int(emit, 0);
8224      /* MOV dst, {0} */
8225      begin_emit_instruction(emit);
8226      emit_dword(emit, VGPU10_OPCODE_MOV);
8227      emit_dst_register(emit, dst);
8228      emit_src_register(emit, &immediate);
8229      end_emit_instruction(emit);
8230      return;
8231   case SVGA3D_CMP_ALWAYS:
8232      immediate = make_immediate_reg_int(emit, -1);
8233      /* MOV dst, {-1} */
8234      begin_emit_instruction(emit);
8235      emit_dword(emit, VGPU10_OPCODE_MOV);
8236      emit_dst_register(emit, dst);
8237      emit_src_register(emit, &immediate);
8238      end_emit_instruction(emit);
8239      return;
8240   case SVGA3D_CMP_LESS:
8241      opcode0.opcodeType = VGPU10_OPCODE_LT;
8242      break;
8243   case SVGA3D_CMP_EQUAL:
8244      opcode0.opcodeType = VGPU10_OPCODE_EQ;
8245      break;
8246   case SVGA3D_CMP_LESSEQUAL:
8247      opcode0.opcodeType = VGPU10_OPCODE_GE;
8248      swapSrc = TRUE;
8249      break;
8250   case SVGA3D_CMP_GREATER:
8251      opcode0.opcodeType = VGPU10_OPCODE_LT;
8252      swapSrc = TRUE;
8253      break;
8254   case SVGA3D_CMP_NOTEQUAL:
8255      opcode0.opcodeType = VGPU10_OPCODE_NE;
8256      break;
8257   case SVGA3D_CMP_GREATEREQUAL:
8258      opcode0.opcodeType = VGPU10_OPCODE_GE;
8259      break;
8260   default:
8261      assert(!"Unexpected comparison mode");
8262      opcode0.opcodeType = VGPU10_OPCODE_EQ;
8263   }
8264
8265   begin_emit_instruction(emit);
8266   emit_dword(emit, opcode0.value);
8267   emit_dst_register(emit, dst);
8268   if (swapSrc) {
8269      emit_src_register(emit, src1);
8270      emit_src_register(emit, src0);
8271   }
8272   else {
8273      emit_src_register(emit, src0);
8274      emit_src_register(emit, src1);
8275   }
8276   end_emit_instruction(emit);
8277}
8278
8279
8280/**
8281 * Get texel/address offsets for a texture instruction.
8282 */
8283static void
8284get_texel_offsets(const struct svga_shader_emitter_v10 *emit,
8285                  const struct tgsi_full_instruction *inst, int offsets[3])
8286{
8287   if (inst->Texture.NumOffsets == 1) {
8288      /* According to OpenGL Shader Language spec the offsets are only
8289       * fetched from a previously-declared immediate/literal.
8290       */
8291      const struct tgsi_texture_offset *off = inst->TexOffsets;
8292      const unsigned index = off[0].Index;
8293      const unsigned swizzleX = off[0].SwizzleX;
8294      const unsigned swizzleY = off[0].SwizzleY;
8295      const unsigned swizzleZ = off[0].SwizzleZ;
8296      const union tgsi_immediate_data *imm = emit->immediates[index];
8297
8298      assert(inst->TexOffsets[0].File == TGSI_FILE_IMMEDIATE);
8299
8300      offsets[0] = imm[swizzleX].Int;
8301      offsets[1] = imm[swizzleY].Int;
8302      offsets[2] = imm[swizzleZ].Int;
8303   }
8304   else {
8305      offsets[0] = offsets[1] = offsets[2] = 0;
8306   }
8307}
8308
8309
8310/**
8311 * Set up the coordinate register for texture sampling.
8312 * When we're sampling from a RECT texture we have to scale the
8313 * unnormalized coordinate to a normalized coordinate.
8314 * We do that by multiplying the coordinate by an "extra" constant.
8315 * An alternative would be to use the RESINFO instruction to query the
8316 * texture's size.
8317 */
8318static struct tgsi_full_src_register
8319setup_texcoord(struct svga_shader_emitter_v10 *emit,
8320               unsigned unit,
8321               const struct tgsi_full_src_register *coord)
8322{
8323   if (emit->key.tex[unit].sampler_view && emit->key.tex[unit].unnormalized) {
8324      unsigned scale_index = emit->texcoord_scale_index[unit];
8325      unsigned tmp = get_temp_index(emit);
8326      struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8327      struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8328      struct tgsi_full_src_register scale_src = make_src_const_reg(scale_index);
8329
8330      if (emit->key.tex[unit].texel_bias) {
8331         /* to fix texture coordinate rounding issue, 0.0001 offset is
8332          * been added. This fixes piglit test fbo-blit-scaled-linear. */
8333         struct tgsi_full_src_register offset =
8334            make_immediate_reg_float(emit, 0.0001f);
8335
8336         /* ADD tmp, coord, offset */
8337         emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp_dst,
8338                              coord, &offset);
8339         /* MUL tmp, tmp, scale */
8340         emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst,
8341                              &tmp_src, &scale_src);
8342      }
8343      else {
8344         /* MUL tmp, coord, const[] */
8345         emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst,
8346                              coord, &scale_src);
8347      }
8348      return tmp_src;
8349   }
8350   else {
8351      /* use texcoord as-is */
8352      return *coord;
8353   }
8354}
8355
8356
8357/**
8358 * For SAMPLE_C instructions, emit the extra src register which indicates
8359 * the reference/comparision value.
8360 */
8361static void
8362emit_tex_compare_refcoord(struct svga_shader_emitter_v10 *emit,
8363                          enum tgsi_texture_type target,
8364                          const struct tgsi_full_src_register *coord)
8365{
8366   struct tgsi_full_src_register coord_src_ref;
8367   int component;
8368
8369   assert(tgsi_is_shadow_target(target));
8370
8371   component = tgsi_util_get_shadow_ref_src_index(target) % 4;
8372   assert(component >= 0);
8373
8374   coord_src_ref = scalar_src(coord, component);
8375
8376   emit_src_register(emit, &coord_src_ref);
8377}
8378
8379
8380/**
8381 * Info for implementing texture swizzles.
8382 * The begin_tex_swizzle(), get_tex_swizzle_dst() and end_tex_swizzle()
8383 * functions use this to encapsulate the extra steps needed to perform
8384 * a texture swizzle, or shadow/depth comparisons.
8385 * The shadow/depth comparison is only done here if for the cases where
8386 * there's no VGPU10 opcode (like texture bias lookup w/ shadow compare).
8387 */
8388struct tex_swizzle_info
8389{
8390   boolean swizzled;
8391   boolean shadow_compare;
8392   unsigned unit;
8393   enum tgsi_texture_type texture_target;  /**< TGSI_TEXTURE_x */
8394   struct tgsi_full_src_register tmp_src;
8395   struct tgsi_full_dst_register tmp_dst;
8396   const struct tgsi_full_dst_register *inst_dst;
8397   const struct tgsi_full_src_register *coord_src;
8398};
8399
8400
8401/**
8402 * Do setup for handling texture swizzles or shadow compares.
8403 * \param unit  the texture unit
8404 * \param inst  the TGSI texture instruction
8405 * \param shadow_compare  do shadow/depth comparison?
8406 * \param swz  returns the swizzle info
8407 */
8408static void
8409begin_tex_swizzle(struct svga_shader_emitter_v10 *emit,
8410                  unsigned unit,
8411                  const struct tgsi_full_instruction *inst,
8412                  boolean shadow_compare,
8413                  struct tex_swizzle_info *swz)
8414{
8415   swz->swizzled = (emit->key.tex[unit].swizzle_r != TGSI_SWIZZLE_X ||
8416                    emit->key.tex[unit].swizzle_g != TGSI_SWIZZLE_Y ||
8417                    emit->key.tex[unit].swizzle_b != TGSI_SWIZZLE_Z ||
8418                    emit->key.tex[unit].swizzle_a != TGSI_SWIZZLE_W);
8419
8420   swz->shadow_compare = shadow_compare;
8421   swz->texture_target = inst->Texture.Texture;
8422
8423   if (swz->swizzled || shadow_compare) {
8424      /* Allocate temp register for the result of the SAMPLE instruction
8425       * and the source of the MOV/compare/swizzle instructions.
8426       */
8427      unsigned tmp = get_temp_index(emit);
8428      swz->tmp_src = make_src_temp_reg(tmp);
8429      swz->tmp_dst = make_dst_temp_reg(tmp);
8430
8431      swz->unit = unit;
8432   }
8433   swz->inst_dst = &inst->Dst[0];
8434   swz->coord_src = &inst->Src[0];
8435
8436   emit->shadow_compare_units |= shadow_compare << unit;
8437}
8438
8439
8440/**
8441 * Returns the register to put the SAMPLE instruction results into.
8442 * This will either be the original instruction dst reg (if no swizzle
8443 * and no shadow comparison) or a temporary reg if there is a swizzle.
8444 */
8445static const struct tgsi_full_dst_register *
8446get_tex_swizzle_dst(const struct tex_swizzle_info *swz)
8447{
8448   return (swz->swizzled || swz->shadow_compare)
8449      ? &swz->tmp_dst : swz->inst_dst;
8450}
8451
8452
8453/**
8454 * This emits the MOV instruction that actually implements a texture swizzle
8455 * and/or shadow comparison.
8456 */
8457static void
8458end_tex_swizzle(struct svga_shader_emitter_v10 *emit,
8459                const struct tex_swizzle_info *swz)
8460{
8461   if (swz->shadow_compare) {
8462      /* Emit extra instructions to compare the fetched texel value against
8463       * a texture coordinate component.  The result of the comparison
8464       * is 0.0 or 1.0.
8465       */
8466      struct tgsi_full_src_register coord_src;
8467      struct tgsi_full_src_register texel_src =
8468         scalar_src(&swz->tmp_src, TGSI_SWIZZLE_X);
8469      struct tgsi_full_src_register one =
8470         make_immediate_reg_float(emit, 1.0f);
8471      /* convert gallium comparison func to SVGA comparison func */
8472      SVGA3dCmpFunc compare_func = emit->key.tex[swz->unit].compare_func + 1;
8473
8474      int component =
8475         tgsi_util_get_shadow_ref_src_index(swz->texture_target) % 4;
8476      assert(component >= 0);
8477      coord_src = scalar_src(swz->coord_src, component);
8478
8479      /* COMPARE tmp, coord, texel */
8480      emit_comparison(emit, compare_func,
8481                      &swz->tmp_dst, &coord_src, &texel_src);
8482
8483      /* AND dest, tmp, {1.0} */
8484      begin_emit_instruction(emit);
8485      emit_opcode(emit, VGPU10_OPCODE_AND, FALSE);
8486      if (swz->swizzled) {
8487         emit_dst_register(emit, &swz->tmp_dst);
8488      }
8489      else {
8490         emit_dst_register(emit, swz->inst_dst);
8491      }
8492      emit_src_register(emit, &swz->tmp_src);
8493      emit_src_register(emit, &one);
8494      end_emit_instruction(emit);
8495   }
8496
8497   if (swz->swizzled) {
8498      unsigned swz_r = emit->key.tex[swz->unit].swizzle_r;
8499      unsigned swz_g = emit->key.tex[swz->unit].swizzle_g;
8500      unsigned swz_b = emit->key.tex[swz->unit].swizzle_b;
8501      unsigned swz_a = emit->key.tex[swz->unit].swizzle_a;
8502      unsigned writemask_0 = 0, writemask_1 = 0;
8503      boolean int_tex = is_integer_type(emit->sampler_return_type[swz->unit]);
8504
8505      /* Swizzle w/out zero/one terms */
8506      struct tgsi_full_src_register src_swizzled =
8507         swizzle_src(&swz->tmp_src,
8508                     swz_r < PIPE_SWIZZLE_0 ? swz_r : PIPE_SWIZZLE_X,
8509                     swz_g < PIPE_SWIZZLE_0 ? swz_g : PIPE_SWIZZLE_Y,
8510                     swz_b < PIPE_SWIZZLE_0 ? swz_b : PIPE_SWIZZLE_Z,
8511                     swz_a < PIPE_SWIZZLE_0 ? swz_a : PIPE_SWIZZLE_W);
8512
8513      /* MOV dst, color(tmp).<swizzle> */
8514      emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
8515                           swz->inst_dst, &src_swizzled);
8516
8517      /* handle swizzle zero terms */
8518      writemask_0 = (((swz_r == PIPE_SWIZZLE_0) << 0) |
8519                     ((swz_g == PIPE_SWIZZLE_0) << 1) |
8520                     ((swz_b == PIPE_SWIZZLE_0) << 2) |
8521                     ((swz_a == PIPE_SWIZZLE_0) << 3));
8522      writemask_0 &= swz->inst_dst->Register.WriteMask;
8523
8524      if (writemask_0) {
8525         struct tgsi_full_src_register zero = int_tex ?
8526            make_immediate_reg_int(emit, 0) :
8527            make_immediate_reg_float(emit, 0.0f);
8528         struct tgsi_full_dst_register dst =
8529            writemask_dst(swz->inst_dst, writemask_0);
8530
8531         /* MOV dst.writemask_0, {0,0,0,0} */
8532         emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &zero);
8533      }
8534
8535      /* handle swizzle one terms */
8536      writemask_1 = (((swz_r == PIPE_SWIZZLE_1) << 0) |
8537                     ((swz_g == PIPE_SWIZZLE_1) << 1) |
8538                     ((swz_b == PIPE_SWIZZLE_1) << 2) |
8539                     ((swz_a == PIPE_SWIZZLE_1) << 3));
8540      writemask_1 &= swz->inst_dst->Register.WriteMask;
8541
8542      if (writemask_1) {
8543         struct tgsi_full_src_register one = int_tex ?
8544            make_immediate_reg_int(emit, 1) :
8545            make_immediate_reg_float(emit, 1.0f);
8546         struct tgsi_full_dst_register dst =
8547            writemask_dst(swz->inst_dst, writemask_1);
8548
8549         /* MOV dst.writemask_1, {1,1,1,1} */
8550         emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &one);
8551      }
8552   }
8553}
8554
8555
8556/**
8557 * Emit code for TGSI_OPCODE_SAMPLE instruction.
8558 */
8559static boolean
8560emit_sample(struct svga_shader_emitter_v10 *emit,
8561            const struct tgsi_full_instruction *inst)
8562{
8563   const unsigned resource_unit = inst->Src[1].Register.Index;
8564   const unsigned sampler_unit = inst->Src[2].Register.Index;
8565   struct tgsi_full_src_register coord;
8566   int offsets[3];
8567   struct tex_swizzle_info swz_info;
8568
8569   begin_tex_swizzle(emit, sampler_unit, inst, FALSE, &swz_info);
8570
8571   get_texel_offsets(emit, inst, offsets);
8572
8573   coord = setup_texcoord(emit, resource_unit, &inst->Src[0]);
8574
8575   /* SAMPLE dst, coord(s0), resource, sampler */
8576   begin_emit_instruction(emit);
8577
8578   /* NOTE: for non-fragment shaders, we should use VGPU10_OPCODE_SAMPLE_L
8579    * with LOD=0.  But our virtual GPU accepts this as-is.
8580    */
8581   emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE,
8582                      inst->Instruction.Saturate, offsets);
8583   emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8584   emit_src_register(emit, &coord);
8585   emit_resource_register(emit, resource_unit);
8586   emit_sampler_register(emit, sampler_unit);
8587   end_emit_instruction(emit);
8588
8589   end_tex_swizzle(emit, &swz_info);
8590
8591   free_temp_indexes(emit);
8592
8593   return TRUE;
8594}
8595
8596
8597/**
8598 * Check if a texture instruction is valid.
8599 * An example of an invalid texture instruction is doing shadow comparison
8600 * with an integer-valued texture.
8601 * If we detect an invalid texture instruction, we replace it with:
8602 *   MOV dst, {1,1,1,1};
8603 * \return TRUE if valid, FALSE if invalid.
8604 */
8605static boolean
8606is_valid_tex_instruction(struct svga_shader_emitter_v10 *emit,
8607                         const struct tgsi_full_instruction *inst)
8608{
8609   const unsigned unit = inst->Src[1].Register.Index;
8610   const enum tgsi_texture_type target = inst->Texture.Texture;
8611   boolean valid = TRUE;
8612
8613   if (tgsi_is_shadow_target(target) &&
8614       is_integer_type(emit->sampler_return_type[unit])) {
8615      debug_printf("Invalid SAMPLE_C with an integer texture!\n");
8616      valid = FALSE;
8617   }
8618   /* XXX might check for other conditions in the future here */
8619
8620   if (!valid) {
8621      /* emit a MOV dst, {1,1,1,1} instruction. */
8622      struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
8623      begin_emit_instruction(emit);
8624      emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
8625      emit_dst_register(emit, &inst->Dst[0]);
8626      emit_src_register(emit, &one);
8627      end_emit_instruction(emit);
8628   }
8629
8630   return valid;
8631}
8632
8633
8634/**
8635 * Emit code for TGSI_OPCODE_TEX (simple texture lookup)
8636 */
8637static boolean
8638emit_tex(struct svga_shader_emitter_v10 *emit,
8639         const struct tgsi_full_instruction *inst)
8640{
8641   const uint unit = inst->Src[1].Register.Index;
8642   const enum tgsi_texture_type target = inst->Texture.Texture;
8643   VGPU10_OPCODE_TYPE opcode;
8644   struct tgsi_full_src_register coord;
8645   int offsets[3];
8646   struct tex_swizzle_info swz_info;
8647   boolean compare_in_shader;
8648
8649   /* check that the sampler returns a float */
8650   if (!is_valid_tex_instruction(emit, inst))
8651      return TRUE;
8652
8653   compare_in_shader = tgsi_is_shadow_target(target) &&
8654                       emit->key.tex[unit].compare_in_shader;
8655
8656   begin_tex_swizzle(emit, unit, inst, compare_in_shader, &swz_info);
8657
8658   get_texel_offsets(emit, inst, offsets);
8659
8660   coord = setup_texcoord(emit, unit, &inst->Src[0]);
8661
8662   /* SAMPLE dst, coord(s0), resource, sampler */
8663   begin_emit_instruction(emit);
8664
8665   if (tgsi_is_shadow_target(target) && !compare_in_shader)
8666      opcode = VGPU10_OPCODE_SAMPLE_C;
8667   else
8668      opcode = VGPU10_OPCODE_SAMPLE;
8669
8670   emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
8671   emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8672   emit_src_register(emit, &coord);
8673   emit_resource_register(emit, unit);
8674   emit_sampler_register(emit, unit);
8675   if (opcode == VGPU10_OPCODE_SAMPLE_C) {
8676      emit_tex_compare_refcoord(emit, target, &coord);
8677   }
8678   end_emit_instruction(emit);
8679
8680   end_tex_swizzle(emit, &swz_info);
8681
8682   free_temp_indexes(emit);
8683
8684   return TRUE;
8685}
8686
8687/**
8688 * Emit code for TGSI_OPCODE_TG4 (texture lookup for texture gather)
8689 */
8690static boolean
8691emit_tg4(struct svga_shader_emitter_v10 *emit,
8692         const struct tgsi_full_instruction *inst)
8693{
8694   const uint unit = inst->Src[2].Register.Index;
8695   struct tgsi_full_src_register src;
8696   struct tgsi_full_src_register offset_src, sampler, ref;
8697   int offsets[3];
8698
8699   /* check that the sampler returns a float */
8700   if (!is_valid_tex_instruction(emit, inst))
8701      return TRUE;
8702
8703   if (emit->version >= 50) {
8704      unsigned target = inst->Texture.Texture;
8705      int index = inst->Src[1].Register.Index;
8706      const union tgsi_immediate_data *imm = emit->immediates[index];
8707      int select_comp  = imm[inst->Src[1].Register.SwizzleX].Int;
8708      unsigned select_swizzle = PIPE_SWIZZLE_X;
8709
8710      if (!tgsi_is_shadow_target(target)) {
8711         switch (select_comp) {
8712         case 0:
8713            select_swizzle = emit->key.tex[unit].swizzle_r;
8714            break;
8715         case 1:
8716            select_swizzle = emit->key.tex[unit].swizzle_g;
8717            break;
8718         case 2:
8719            select_swizzle = emit->key.tex[unit].swizzle_b;
8720            break;
8721         case 3:
8722            select_swizzle = emit->key.tex[unit].swizzle_a;
8723            break;
8724         default:
8725            assert(!"Unexpected component in texture gather swizzle");
8726         }
8727      }
8728      else {
8729         select_swizzle = emit->key.tex[unit].swizzle_r;
8730      }
8731
8732      if (select_swizzle == PIPE_SWIZZLE_1) {
8733         src = make_immediate_reg_float(emit, 1.0);
8734         emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src);
8735         return TRUE;
8736      }
8737      else if (select_swizzle == PIPE_SWIZZLE_0) {
8738         src = make_immediate_reg_float(emit, 0.0);
8739         emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src);
8740         return TRUE;
8741      }
8742
8743      src = setup_texcoord(emit, unit, &inst->Src[0]);
8744
8745      /* GATHER4 dst, coord, resource, sampler */
8746      /* GATHER4_C dst, coord, resource, sampler ref */
8747      /* GATHER4_PO dst, coord, offset resource, sampler */
8748      /* GATHER4_PO_C dst, coord, offset resource, sampler, ref */
8749      begin_emit_instruction(emit);
8750      if (inst->Texture.NumOffsets == 1) {
8751         if (tgsi_is_shadow_target(target)) {
8752            emit_opcode(emit, VGPU10_OPCODE_GATHER4_PO_C,
8753                        inst->Instruction.Saturate);
8754         }
8755         else {
8756            emit_opcode(emit, VGPU10_OPCODE_GATHER4_PO,
8757                        inst->Instruction.Saturate);
8758         }
8759      }
8760      else {
8761         if (tgsi_is_shadow_target(target)) {
8762            emit_opcode(emit, VGPU10_OPCODE_GATHER4_C,
8763                        inst->Instruction.Saturate);
8764         }
8765         else {
8766            emit_opcode(emit, VGPU10_OPCODE_GATHER4,
8767                        inst->Instruction.Saturate);
8768         }
8769      }
8770
8771      emit_dst_register(emit, &inst->Dst[0]);
8772      emit_src_register(emit, &src);
8773      if (inst->Texture.NumOffsets == 1) {
8774         /* offset */
8775         offset_src = make_src_reg(inst->TexOffsets[0].File,
8776                                   inst->TexOffsets[0].Index);
8777         offset_src = swizzle_src(&offset_src, inst->TexOffsets[0].SwizzleX,
8778                                  inst->TexOffsets[0].SwizzleY,
8779                                  inst->TexOffsets[0].SwizzleZ,
8780                                  TGSI_SWIZZLE_W);
8781         emit_src_register(emit, &offset_src);
8782      }
8783
8784      /* resource */
8785      emit_resource_register(emit, unit);
8786
8787      /* sampler */
8788      sampler = make_src_reg(TGSI_FILE_SAMPLER,
8789                             emit->key.tex[unit].sampler_index);
8790      sampler.Register.SwizzleX =
8791      sampler.Register.SwizzleY =
8792      sampler.Register.SwizzleZ =
8793      sampler.Register.SwizzleW = select_swizzle;
8794      emit_src_register(emit, &sampler);
8795
8796      if (tgsi_is_shadow_target(target)) {
8797         /* ref */
8798         if (target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
8799            ref = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
8800            emit_tex_compare_refcoord(emit, target, &ref);
8801         }
8802         else {
8803            emit_tex_compare_refcoord(emit, target, &src);
8804         }
8805      }
8806
8807      end_emit_instruction(emit);
8808      free_temp_indexes(emit);
8809   }
8810   else {
8811      /* Only a single channel is supported in SM4_1 and we report
8812       * PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS = 1.
8813       * Only the 0th component will be gathered.
8814       */
8815      switch (emit->key.tex[unit].swizzle_r) {
8816      case PIPE_SWIZZLE_X:
8817         get_texel_offsets(emit, inst, offsets);
8818         src = setup_texcoord(emit, unit, &inst->Src[0]);
8819
8820         /* Gather dst, coord, resource, sampler */
8821         begin_emit_instruction(emit);
8822         emit_sample_opcode(emit, VGPU10_OPCODE_GATHER4,
8823                            inst->Instruction.Saturate, offsets);
8824         emit_dst_register(emit, &inst->Dst[0]);
8825         emit_src_register(emit, &src);
8826         emit_resource_register(emit, unit);
8827
8828         /* sampler */
8829         sampler = make_src_reg(TGSI_FILE_SAMPLER,
8830                                emit->key.tex[unit].sampler_index);
8831         sampler.Register.SwizzleX =
8832         sampler.Register.SwizzleY =
8833         sampler.Register.SwizzleZ =
8834         sampler.Register.SwizzleW = PIPE_SWIZZLE_X;
8835         emit_src_register(emit, &sampler);
8836
8837         end_emit_instruction(emit);
8838         break;
8839      case PIPE_SWIZZLE_W:
8840      case PIPE_SWIZZLE_1:
8841         src = make_immediate_reg_float(emit, 1.0);
8842         emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src);
8843         break;
8844      case PIPE_SWIZZLE_Y:
8845      case PIPE_SWIZZLE_Z:
8846      case PIPE_SWIZZLE_0:
8847      default:
8848         src = make_immediate_reg_float(emit, 0.0);
8849         emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src);
8850         break;
8851      }
8852   }
8853
8854   return TRUE;
8855}
8856
8857
8858
8859/**
8860 * Emit code for TGSI_OPCODE_TEX2 (texture lookup for shadow cube map arrays)
8861 */
8862static boolean
8863emit_tex2(struct svga_shader_emitter_v10 *emit,
8864         const struct tgsi_full_instruction *inst)
8865{
8866   const uint unit = inst->Src[2].Register.Index;
8867   unsigned target = inst->Texture.Texture;
8868   struct tgsi_full_src_register coord, ref;
8869   int offsets[3];
8870   struct tex_swizzle_info swz_info;
8871   VGPU10_OPCODE_TYPE opcode;
8872   boolean compare_in_shader;
8873
8874   /* check that the sampler returns a float */
8875   if (!is_valid_tex_instruction(emit, inst))
8876      return TRUE;
8877
8878   compare_in_shader = emit->key.tex[unit].compare_in_shader;
8879   if (compare_in_shader)
8880      opcode = VGPU10_OPCODE_SAMPLE;
8881   else
8882      opcode = VGPU10_OPCODE_SAMPLE_C;
8883
8884   begin_tex_swizzle(emit, unit, inst, compare_in_shader, &swz_info);
8885
8886   get_texel_offsets(emit, inst, offsets);
8887
8888   coord = setup_texcoord(emit, unit, &inst->Src[0]);
8889   ref = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
8890
8891   /* SAMPLE_C dst, coord, resource, sampler, ref */
8892   begin_emit_instruction(emit);
8893   emit_sample_opcode(emit, opcode,
8894                      inst->Instruction.Saturate, offsets);
8895   emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8896   emit_src_register(emit, &coord);
8897   emit_resource_register(emit, unit);
8898   emit_sampler_register(emit, unit);
8899   if (opcode == VGPU10_OPCODE_SAMPLE_C) {
8900      emit_tex_compare_refcoord(emit, target, &ref);
8901   }
8902   end_emit_instruction(emit);
8903
8904   end_tex_swizzle(emit, &swz_info);
8905
8906   free_temp_indexes(emit);
8907
8908   return TRUE;
8909}
8910
8911
8912/**
8913 * Emit code for TGSI_OPCODE_TXP (projective texture)
8914 */
8915static boolean
8916emit_txp(struct svga_shader_emitter_v10 *emit,
8917         const struct tgsi_full_instruction *inst)
8918{
8919   const uint unit = inst->Src[1].Register.Index;
8920   const enum tgsi_texture_type target = inst->Texture.Texture;
8921   VGPU10_OPCODE_TYPE opcode;
8922   int offsets[3];
8923   unsigned tmp = get_temp_index(emit);
8924   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8925   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8926   struct tgsi_full_src_register src0_wwww =
8927      scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
8928   struct tgsi_full_src_register coord;
8929   struct tex_swizzle_info swz_info;
8930   boolean compare_in_shader;
8931
8932   /* check that the sampler returns a float */
8933   if (!is_valid_tex_instruction(emit, inst))
8934      return TRUE;
8935
8936   compare_in_shader = tgsi_is_shadow_target(target) &&
8937                       emit->key.tex[unit].compare_in_shader;
8938
8939   begin_tex_swizzle(emit, unit, inst, compare_in_shader, &swz_info);
8940
8941   get_texel_offsets(emit, inst, offsets);
8942
8943   coord = setup_texcoord(emit, unit, &inst->Src[0]);
8944
8945   /* DIV tmp, coord, coord.wwww */
8946   emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &tmp_dst,
8947                        &coord, &src0_wwww);
8948
8949   /* SAMPLE dst, coord(tmp), resource, sampler */
8950   begin_emit_instruction(emit);
8951
8952   if (tgsi_is_shadow_target(target) && !compare_in_shader)
8953      /* NOTE: for non-fragment shaders, we should use
8954       * VGPU10_OPCODE_SAMPLE_C_LZ, but our virtual GPU accepts this as-is.
8955       */
8956      opcode = VGPU10_OPCODE_SAMPLE_C;
8957   else
8958      opcode = VGPU10_OPCODE_SAMPLE;
8959
8960   emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
8961   emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8962   emit_src_register(emit, &tmp_src);  /* projected coord */
8963   emit_resource_register(emit, unit);
8964   emit_sampler_register(emit, unit);
8965   if (opcode == VGPU10_OPCODE_SAMPLE_C) {
8966      emit_tex_compare_refcoord(emit, target, &tmp_src);
8967   }
8968   end_emit_instruction(emit);
8969
8970   end_tex_swizzle(emit, &swz_info);
8971
8972   free_temp_indexes(emit);
8973
8974   return TRUE;
8975}
8976
8977
8978/**
8979 * Emit code for TGSI_OPCODE_TXD (explicit derivatives)
8980 */
8981static boolean
8982emit_txd(struct svga_shader_emitter_v10 *emit,
8983         const struct tgsi_full_instruction *inst)
8984{
8985   const uint unit = inst->Src[3].Register.Index;
8986   const enum tgsi_texture_type target = inst->Texture.Texture;
8987   int offsets[3];
8988   struct tgsi_full_src_register coord;
8989   struct tex_swizzle_info swz_info;
8990
8991   begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target),
8992                     &swz_info);
8993
8994   get_texel_offsets(emit, inst, offsets);
8995
8996   coord = setup_texcoord(emit, unit, &inst->Src[0]);
8997
8998   /* SAMPLE_D dst, coord(s0), resource, sampler, Xderiv(s1), Yderiv(s2) */
8999   begin_emit_instruction(emit);
9000   emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE_D,
9001                      inst->Instruction.Saturate, offsets);
9002   emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
9003   emit_src_register(emit, &coord);
9004   emit_resource_register(emit, unit);
9005   emit_sampler_register(emit, unit);
9006   emit_src_register(emit, &inst->Src[1]);  /* Xderiv */
9007   emit_src_register(emit, &inst->Src[2]);  /* Yderiv */
9008   end_emit_instruction(emit);
9009
9010   end_tex_swizzle(emit, &swz_info);
9011
9012   free_temp_indexes(emit);
9013
9014   return TRUE;
9015}
9016
9017
9018/**
9019 * Emit code for TGSI_OPCODE_TXF (texel fetch)
9020 */
9021static boolean
9022emit_txf(struct svga_shader_emitter_v10 *emit,
9023         const struct tgsi_full_instruction *inst)
9024{
9025   const uint unit = inst->Src[1].Register.Index;
9026   const boolean msaa = tgsi_is_msaa_target(inst->Texture.Texture)
9027      && emit->key.tex[unit].num_samples > 1;
9028   int offsets[3];
9029   struct tex_swizzle_info swz_info;
9030
9031   begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info);
9032
9033   get_texel_offsets(emit, inst, offsets);
9034
9035   if (msaa) {
9036      assert(emit->key.tex[unit].num_samples > 1);
9037
9038      /* Fetch one sample from an MSAA texture */
9039      struct tgsi_full_src_register sampleIndex =
9040         scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
9041      /* LD_MS dst, coord(s0), resource, sampleIndex */
9042      begin_emit_instruction(emit);
9043      emit_sample_opcode(emit, VGPU10_OPCODE_LD_MS,
9044                         inst->Instruction.Saturate, offsets);
9045      emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
9046      emit_src_register(emit, &inst->Src[0]);
9047      emit_resource_register(emit, unit);
9048      emit_src_register(emit, &sampleIndex);
9049      end_emit_instruction(emit);
9050   }
9051   else {
9052      /* Fetch one texel specified by integer coordinate */
9053      /* LD dst, coord(s0), resource */
9054      begin_emit_instruction(emit);
9055      emit_sample_opcode(emit, VGPU10_OPCODE_LD,
9056                         inst->Instruction.Saturate, offsets);
9057      emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
9058      emit_src_register(emit, &inst->Src[0]);
9059      emit_resource_register(emit, unit);
9060      end_emit_instruction(emit);
9061   }
9062
9063   end_tex_swizzle(emit, &swz_info);
9064
9065   free_temp_indexes(emit);
9066
9067   return TRUE;
9068}
9069
9070
9071/**
9072 * Emit code for TGSI_OPCODE_TXL (explicit LOD) or TGSI_OPCODE_TXB (LOD bias)
9073 * or TGSI_OPCODE_TXB2 (for cube shadow maps).
9074 */
9075static boolean
9076emit_txl_txb(struct svga_shader_emitter_v10 *emit,
9077             const struct tgsi_full_instruction *inst)
9078{
9079   const enum tgsi_texture_type target = inst->Texture.Texture;
9080   VGPU10_OPCODE_TYPE opcode;
9081   unsigned unit;
9082   int offsets[3];
9083   struct tgsi_full_src_register coord, lod_bias;
9084   struct tex_swizzle_info swz_info;
9085
9086   assert(inst->Instruction.Opcode == TGSI_OPCODE_TXL ||
9087          inst->Instruction.Opcode == TGSI_OPCODE_TXB ||
9088          inst->Instruction.Opcode == TGSI_OPCODE_TXB2);
9089
9090   if (inst->Instruction.Opcode == TGSI_OPCODE_TXB2) {
9091      lod_bias = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
9092      unit = inst->Src[2].Register.Index;
9093   }
9094   else {
9095      lod_bias = scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
9096      unit = inst->Src[1].Register.Index;
9097   }
9098
9099   begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target),
9100                     &swz_info);
9101
9102   get_texel_offsets(emit, inst, offsets);
9103
9104   coord = setup_texcoord(emit, unit, &inst->Src[0]);
9105
9106   /* SAMPLE_L/B dst, coord(s0), resource, sampler, lod(s3) */
9107   begin_emit_instruction(emit);
9108   if (inst->Instruction.Opcode == TGSI_OPCODE_TXL) {
9109      opcode = VGPU10_OPCODE_SAMPLE_L;
9110   }
9111   else {
9112      opcode = VGPU10_OPCODE_SAMPLE_B;
9113   }
9114   emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
9115   emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
9116   emit_src_register(emit, &coord);
9117   emit_resource_register(emit, unit);
9118   emit_sampler_register(emit, unit);
9119   emit_src_register(emit, &lod_bias);
9120   end_emit_instruction(emit);
9121
9122   end_tex_swizzle(emit, &swz_info);
9123
9124   free_temp_indexes(emit);
9125
9126   return TRUE;
9127}
9128
9129
9130/**
9131 * Emit code for TGSI_OPCODE_TXL2 (explicit LOD) for cubemap array.
9132 */
9133static boolean
9134emit_txl2(struct svga_shader_emitter_v10 *emit,
9135          const struct tgsi_full_instruction *inst)
9136{
9137   unsigned target = inst->Texture.Texture;
9138   unsigned opcode, unit;
9139   int offsets[3];
9140   struct tgsi_full_src_register coord, lod;
9141   struct tex_swizzle_info swz_info;
9142
9143   assert(inst->Instruction.Opcode == TGSI_OPCODE_TXL2);
9144
9145   lod = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
9146   unit = inst->Src[2].Register.Index;
9147
9148   begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target),
9149                     &swz_info);
9150
9151   get_texel_offsets(emit, inst, offsets);
9152
9153   coord = setup_texcoord(emit, unit, &inst->Src[0]);
9154
9155   /* SAMPLE_L dst, coord(s0), resource, sampler, lod(s3) */
9156   begin_emit_instruction(emit);
9157   opcode = VGPU10_OPCODE_SAMPLE_L;
9158   emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
9159   emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
9160   emit_src_register(emit, &coord);
9161   emit_resource_register(emit, unit);
9162   emit_sampler_register(emit, unit);
9163   emit_src_register(emit, &lod);
9164   end_emit_instruction(emit);
9165
9166   end_tex_swizzle(emit, &swz_info);
9167
9168   free_temp_indexes(emit);
9169
9170   return TRUE;
9171}
9172
9173
9174/**
9175 * Emit code for TGSI_OPCODE_TXQ (texture query) instruction.
9176 */
9177static boolean
9178emit_txq(struct svga_shader_emitter_v10 *emit,
9179         const struct tgsi_full_instruction *inst)
9180{
9181   const uint unit = inst->Src[1].Register.Index;
9182
9183   if (emit->key.tex[unit].target == PIPE_BUFFER) {
9184      /* RESINFO does not support querying texture buffers, so we instead
9185       * store texture buffer sizes in shader constants, then copy them to
9186       * implement TXQ instead of emitting RESINFO.
9187       * MOV dst, const[texture_buffer_size_index[unit]]
9188       */
9189      struct tgsi_full_src_register size_src =
9190         make_src_const_reg(emit->texture_buffer_size_index[unit]);
9191      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &size_src);
9192   } else {
9193      /* RESINFO dst, srcMipLevel, resource */
9194      begin_emit_instruction(emit);
9195      emit_opcode_resinfo(emit, VGPU10_RESINFO_RETURN_UINT);
9196      emit_dst_register(emit, &inst->Dst[0]);
9197      emit_src_register(emit, &inst->Src[0]);
9198      emit_resource_register(emit, unit);
9199      end_emit_instruction(emit);
9200   }
9201
9202   free_temp_indexes(emit);
9203
9204   return TRUE;
9205}
9206
9207
9208/**
9209 * Does this opcode produce a double-precision result?
9210 * XXX perhaps move this to a TGSI utility.
9211 */
9212static bool
9213opcode_has_dbl_dst(unsigned opcode)
9214{
9215   switch (opcode) {
9216   case TGSI_OPCODE_F2D:
9217   case TGSI_OPCODE_DABS:
9218   case TGSI_OPCODE_DADD:
9219   case TGSI_OPCODE_DFRAC:
9220   case TGSI_OPCODE_DMAX:
9221   case TGSI_OPCODE_DMIN:
9222   case TGSI_OPCODE_DMUL:
9223   case TGSI_OPCODE_DNEG:
9224   case TGSI_OPCODE_I2D:
9225   case TGSI_OPCODE_U2D:
9226   case TGSI_OPCODE_DFMA:
9227      // XXX more TBD
9228      return true;
9229   default:
9230      return false;
9231   }
9232}
9233
9234
9235/**
9236 * Does this opcode use double-precision source registers?
9237 */
9238static bool
9239opcode_has_dbl_src(unsigned opcode)
9240{
9241   switch (opcode) {
9242   case TGSI_OPCODE_D2F:
9243   case TGSI_OPCODE_DABS:
9244   case TGSI_OPCODE_DADD:
9245   case TGSI_OPCODE_DFRAC:
9246   case TGSI_OPCODE_DMAX:
9247   case TGSI_OPCODE_DMIN:
9248   case TGSI_OPCODE_DMUL:
9249   case TGSI_OPCODE_DNEG:
9250   case TGSI_OPCODE_D2I:
9251   case TGSI_OPCODE_D2U:
9252   case TGSI_OPCODE_DFMA:
9253   case TGSI_OPCODE_DSLT:
9254   case TGSI_OPCODE_DSGE:
9255   case TGSI_OPCODE_DSEQ:
9256   case TGSI_OPCODE_DSNE:
9257   case TGSI_OPCODE_DRCP:
9258   case TGSI_OPCODE_DSQRT:
9259   case TGSI_OPCODE_DMAD:
9260   case TGSI_OPCODE_DLDEXP:
9261   case TGSI_OPCODE_DFRACEXP:
9262   case TGSI_OPCODE_DRSQ:
9263   case TGSI_OPCODE_DTRUNC:
9264   case TGSI_OPCODE_DCEIL:
9265   case TGSI_OPCODE_DFLR:
9266   case TGSI_OPCODE_DROUND:
9267   case TGSI_OPCODE_DSSG:
9268      return true;
9269   default:
9270      return false;
9271   }
9272}
9273
9274
9275/**
9276 * Check that the swizzle for reading from a double-precision register
9277 * is valid. If not valid, move the source to a temporary register first.
9278 */
9279static struct tgsi_full_src_register
9280check_double_src(struct svga_shader_emitter_v10 *emit,
9281                 const struct tgsi_full_src_register *reg)
9282{
9283   struct tgsi_full_src_register src;
9284
9285   if (((reg->Register.SwizzleX == PIPE_SWIZZLE_X &&
9286         reg->Register.SwizzleY == PIPE_SWIZZLE_Y) ||
9287        (reg->Register.SwizzleX == PIPE_SWIZZLE_Z &&
9288         reg->Register.SwizzleY == PIPE_SWIZZLE_W)) &&
9289       ((reg->Register.SwizzleZ == PIPE_SWIZZLE_X &&
9290         reg->Register.SwizzleW == PIPE_SWIZZLE_Y) ||
9291        (reg->Register.SwizzleZ == PIPE_SWIZZLE_Z &&
9292         reg->Register.SwizzleW == PIPE_SWIZZLE_W))) {
9293      src = *reg;
9294   } else {
9295      /* move the src to a temporary to fix the swizzle */
9296      unsigned tmp = get_temp_index(emit);
9297      struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
9298      struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
9299      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &tmp_dst, reg);
9300      src = tmp_src;
9301
9302      /* The temporary index will be released in the caller */
9303   }
9304   return src;
9305}
9306
9307/**
9308 * Check that the writemask for a double-precision instruction is valid.
9309 */
9310static void
9311check_double_dst_writemask(const struct tgsi_full_instruction *inst)
9312{
9313   ASSERTED unsigned writemask = inst->Dst[0].Register.WriteMask;
9314
9315   switch (inst->Instruction.Opcode) {
9316   case TGSI_OPCODE_DABS:
9317   case TGSI_OPCODE_DADD:
9318   case TGSI_OPCODE_DFRAC:
9319   case TGSI_OPCODE_DNEG:
9320   case TGSI_OPCODE_DMAD:
9321   case TGSI_OPCODE_DMAX:
9322   case TGSI_OPCODE_DMIN:
9323   case TGSI_OPCODE_DMUL:
9324   case TGSI_OPCODE_DRCP:
9325   case TGSI_OPCODE_DSQRT:
9326   case TGSI_OPCODE_F2D:
9327   case TGSI_OPCODE_DFMA:
9328      assert(writemask == TGSI_WRITEMASK_XYZW ||
9329             writemask == TGSI_WRITEMASK_XY ||
9330             writemask == TGSI_WRITEMASK_ZW);
9331      break;
9332   case TGSI_OPCODE_DSEQ:
9333   case TGSI_OPCODE_DSGE:
9334   case TGSI_OPCODE_DSNE:
9335   case TGSI_OPCODE_DSLT:
9336   case TGSI_OPCODE_D2I:
9337   case TGSI_OPCODE_D2U:
9338      /* Write to 1 or 2 components only */
9339      assert(util_bitcount(writemask) <= 2);
9340      break;
9341   default:
9342      /* XXX this list may be incomplete */
9343      ;
9344   }
9345}
9346
9347
9348/**
9349 * Double-precision absolute value.
9350 */
9351static boolean
9352emit_dabs(struct svga_shader_emitter_v10 *emit,
9353          const struct tgsi_full_instruction *inst)
9354{
9355   assert(emit->version >= 50);
9356
9357   struct tgsi_full_src_register src = check_double_src(emit, &inst->Src[0]);
9358   check_double_dst_writemask(inst);
9359
9360   struct tgsi_full_src_register abs_src = absolute_src(&src);
9361
9362   /* DMOV dst, |src| */
9363   emit_instruction_op1(emit, VGPU10_OPCODE_DMOV, &inst->Dst[0], &abs_src);
9364
9365   free_temp_indexes(emit);
9366   return TRUE;
9367}
9368
9369
9370/**
9371 * Double-precision negation
9372 */
9373static boolean
9374emit_dneg(struct svga_shader_emitter_v10 *emit,
9375          const struct tgsi_full_instruction *inst)
9376{
9377   assert(emit->version >= 50);
9378   struct tgsi_full_src_register src = check_double_src(emit, &inst->Src[0]);
9379   check_double_dst_writemask(inst);
9380
9381   struct tgsi_full_src_register neg_src = negate_src(&src);
9382
9383   /* DMOV dst, -src */
9384   emit_instruction_op1(emit, VGPU10_OPCODE_DMOV, &inst->Dst[0], &neg_src);
9385
9386   free_temp_indexes(emit);
9387   return TRUE;
9388}
9389
9390
9391/**
9392 * SM5 has no DMAD opcode.  Implement negation with DMUL/DADD.
9393 */
9394static boolean
9395emit_dmad(struct svga_shader_emitter_v10 *emit,
9396          const struct tgsi_full_instruction *inst)
9397{
9398   assert(emit->version >= 50);
9399   struct tgsi_full_src_register src0 = check_double_src(emit, &inst->Src[0]);
9400   struct tgsi_full_src_register src1 = check_double_src(emit, &inst->Src[1]);
9401   struct tgsi_full_src_register src2 = check_double_src(emit, &inst->Src[2]);
9402   check_double_dst_writemask(inst);
9403
9404   unsigned tmp = get_temp_index(emit);
9405   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
9406   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
9407
9408   /* DMUL tmp, src[0], src[1] */
9409   emit_instruction_opn(emit, VGPU10_OPCODE_DMUL,
9410                        &tmp_dst, &src0, &src1, NULL,
9411                        FALSE, inst->Instruction.Precise);
9412
9413   /* DADD dst, tmp, src[2] */
9414   emit_instruction_opn(emit, VGPU10_OPCODE_DADD,
9415                        &inst->Dst[0], &tmp_src, &src2, NULL,
9416                        inst->Instruction.Saturate, inst->Instruction.Precise);
9417   free_temp_indexes(emit);
9418
9419   return TRUE;
9420}
9421
9422
9423/**
9424 * Double precision reciprocal square root
9425 */
9426static boolean
9427emit_drsq(struct svga_shader_emitter_v10 *emit,
9428          const struct tgsi_full_dst_register *dst,
9429          const struct tgsi_full_src_register *src)
9430{
9431   assert(emit->version >= 50);
9432
9433   VGPU10OpcodeToken0 token0;
9434   struct tgsi_full_src_register dsrc = check_double_src(emit, src);
9435
9436   begin_emit_instruction(emit);
9437
9438   token0.value = 0;
9439   token0.opcodeType = VGPU10_OPCODE_VMWARE;
9440   token0.vmwareOpcodeType = VGPU10_VMWARE_OPCODE_DRSQ;
9441   emit_dword(emit, token0.value);
9442   emit_dst_register(emit, dst);
9443   emit_src_register(emit, &dsrc);
9444   end_emit_instruction(emit);
9445
9446   free_temp_indexes(emit);
9447
9448   return TRUE;
9449}
9450
9451
9452/**
9453 * There is no SM5 opcode for double precision square root.
9454 * It will be implemented with DRSQ.
9455 * dst = src * DRSQ(src)
9456 */
9457static boolean
9458emit_dsqrt(struct svga_shader_emitter_v10 *emit,
9459          const struct tgsi_full_instruction *inst)
9460{
9461   assert(emit->version >= 50);
9462
9463   struct tgsi_full_src_register src = check_double_src(emit, &inst->Src[0]);
9464
9465   /* temporary register to hold the source */
9466   unsigned tmp = get_temp_index(emit);
9467   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
9468   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
9469
9470   /* temporary register to hold the DEQ result */
9471   unsigned tmp_cond = get_temp_index(emit);
9472   struct tgsi_full_dst_register tmp_cond_dst = make_dst_temp_reg(tmp_cond);
9473   struct tgsi_full_dst_register tmp_cond_dst_xy =
9474      writemask_dst(&tmp_cond_dst, TGSI_WRITEMASK_X | TGSI_WRITEMASK_Y);
9475   struct tgsi_full_src_register tmp_cond_src = make_src_temp_reg(tmp_cond);
9476   struct tgsi_full_src_register tmp_cond_src_xy =
9477         swizzle_src(&tmp_cond_src,
9478                     PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
9479                     PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y);
9480
9481   /* The reciprocal square root of zero yields INF.
9482    * So if the source is 0, we replace it with 1 in the tmp register.
9483    * The later multiplication of zero in the original source will yield 0
9484    * in the result.
9485    */
9486
9487   /* tmp1 = (src == 0) ? 1 : src;
9488    *   EQ tmp1, 0, src
9489    *   MOVC tmp, tmp1, 1.0, src
9490    */
9491   struct tgsi_full_src_register zero =
9492               make_immediate_reg_double(emit, 0);
9493
9494   struct tgsi_full_src_register one =
9495               make_immediate_reg_double(emit, 1.0);
9496
9497   emit_instruction_op2(emit, VGPU10_OPCODE_DEQ, &tmp_cond_dst_xy,
9498                        &zero, &src);
9499   emit_instruction_op3(emit, VGPU10_OPCODE_DMOVC, &tmp_dst,
9500                        &tmp_cond_src_xy, &one, &src);
9501
9502   struct tgsi_full_dst_register tmp_rsq_dst = make_dst_temp_reg(tmp);
9503   struct tgsi_full_src_register tmp_rsq_src = make_src_temp_reg(tmp);
9504
9505   /* DRSQ tmp_rsq, tmp */
9506   emit_drsq(emit, &tmp_rsq_dst, &tmp_src);
9507
9508   /* DMUL dst, tmp_rsq, src[0] */
9509   emit_instruction_op2(emit, VGPU10_OPCODE_DMUL, &inst->Dst[0],
9510                        &tmp_rsq_src, &src);
9511
9512   free_temp_indexes(emit);
9513
9514   return TRUE;
9515}
9516
9517
9518/**
9519 * glsl-nir path does not lower DTRUNC, so we need to
9520 * add the translation here.
9521 *
9522 * frac = DFRAC(src)
9523 * tmp = src - frac
9524 * dst = src >= 0 ? tmp : (tmp + (frac==0 ? 0 : 1))
9525 */
9526static boolean
9527emit_dtrunc(struct svga_shader_emitter_v10 *emit,
9528            const struct tgsi_full_instruction *inst)
9529{
9530   assert(emit->version >= 50);
9531
9532   struct tgsi_full_src_register src = check_double_src(emit, &inst->Src[0]);
9533
9534   /* frac = DFRAC(src) */
9535   unsigned frac_index = get_temp_index(emit);
9536   struct tgsi_full_dst_register frac_dst = make_dst_temp_reg(frac_index);
9537   struct tgsi_full_src_register frac_src = make_src_temp_reg(frac_index);
9538
9539   VGPU10OpcodeToken0 token0;
9540   begin_emit_instruction(emit);
9541   token0.value = 0;
9542   token0.opcodeType = VGPU10_OPCODE_VMWARE;
9543   token0.vmwareOpcodeType = VGPU10_VMWARE_OPCODE_DFRC;
9544   emit_dword(emit, token0.value);
9545   emit_dst_register(emit, &frac_dst);
9546   emit_src_register(emit, &src);
9547   end_emit_instruction(emit);
9548
9549   /* tmp = src - frac */
9550   unsigned tmp_index = get_temp_index(emit);
9551   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp_index);
9552   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp_index);
9553   struct tgsi_full_src_register negate_frac_src = negate_src(&frac_src);
9554   emit_instruction_opn(emit, VGPU10_OPCODE_DADD,
9555                        &tmp_dst, &src, &negate_frac_src, NULL,
9556                        inst->Instruction.Saturate, inst->Instruction.Precise);
9557
9558   /* cond = frac==0 */
9559   unsigned cond_index = get_temp_index(emit);
9560   struct tgsi_full_dst_register cond_dst = make_dst_temp_reg(cond_index);
9561   struct tgsi_full_src_register cond_src = make_src_temp_reg(cond_index);
9562   struct tgsi_full_src_register zero =
9563               make_immediate_reg_double(emit, 0);
9564
9565   /* Only use one or two components for double opcode */
9566   cond_dst = writemask_dst(&cond_dst, TGSI_WRITEMASK_X | TGSI_WRITEMASK_Y);
9567
9568   emit_instruction_opn(emit, VGPU10_OPCODE_DEQ,
9569                        &cond_dst, &frac_src, &zero, NULL,
9570                        inst->Instruction.Saturate, inst->Instruction.Precise);
9571
9572   /* tmp2 = cond ? 0 : 1 */
9573   unsigned tmp2_index = get_temp_index(emit);
9574   struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2_index);
9575   struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2_index);
9576   struct tgsi_full_src_register cond_src_xy =
9577      swizzle_src(&cond_src, PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
9578		             PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y);
9579   struct tgsi_full_src_register one =
9580               make_immediate_reg_double(emit, 1.0);
9581
9582   emit_instruction_opn(emit, VGPU10_OPCODE_DMOVC,
9583                        &tmp2_dst, &cond_src_xy, &zero, &one,
9584                        inst->Instruction.Saturate, inst->Instruction.Precise);
9585
9586   /* tmp2 = tmp + tmp2 */
9587   emit_instruction_opn(emit, VGPU10_OPCODE_DADD,
9588                        &tmp2_dst, &tmp_src, &tmp2_src, NULL,
9589                        inst->Instruction.Saturate, inst->Instruction.Precise);
9590
9591   /* cond = src>=0 */
9592   emit_instruction_opn(emit, VGPU10_OPCODE_DGE,
9593                        &cond_dst, &src, &zero, NULL,
9594                        inst->Instruction.Saturate, inst->Instruction.Precise);
9595
9596   /* dst = cond ? tmp : tmp2 */
9597   emit_instruction_opn(emit, VGPU10_OPCODE_DMOVC,
9598                        &inst->Dst[0], &cond_src_xy, &tmp_src, &tmp2_src,
9599                        inst->Instruction.Saturate, inst->Instruction.Precise);
9600
9601   free_temp_indexes(emit);
9602   return TRUE;
9603}
9604
9605
9606static boolean
9607emit_interp_offset(struct svga_shader_emitter_v10 *emit,
9608                   const struct tgsi_full_instruction *inst)
9609{
9610   assert(emit->version >= 50);
9611
9612   /* The src1.xy offset is a float with values in the range [-0.5, 0.5]
9613    * where (0,0) is the center of the pixel.  We need to translate that
9614    * into an integer offset on a 16x16 grid in the range [-8/16, 7/16].
9615    * Also need to flip the Y axis (I think).
9616    */
9617   unsigned tmp = get_temp_index(emit);
9618   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
9619   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
9620   struct tgsi_full_dst_register tmp_dst_xy =
9621      writemask_dst(&tmp_dst, TGSI_WRITEMASK_X | TGSI_WRITEMASK_Y);
9622   struct tgsi_full_src_register const16 =
9623      make_immediate_reg_float4(emit, 16.0f, -16.0, 0, 0);
9624
9625   /* MUL tmp.xy, src1, {16, -16, 0, 0} */
9626   emit_instruction_op2(emit, VGPU10_OPCODE_MUL,
9627                        &tmp_dst_xy, &inst->Src[1], &const16);
9628
9629   /* FTOI tmp.xy, tmp */
9630   emit_instruction_op1(emit, VGPU10_OPCODE_FTOI, &tmp_dst_xy, &tmp_src);
9631
9632   /* EVAL_SNAPPED dst, src0, tmp */
9633   emit_instruction_op2(emit, VGPU10_OPCODE_EVAL_SNAPPED,
9634                        &inst->Dst[0], &inst->Src[0], &tmp_src);
9635
9636   free_temp_indexes(emit);
9637
9638   return TRUE;
9639}
9640
9641
9642/**
9643 * Emit a simple instruction (like ADD, MUL, MIN, etc).
9644 */
9645static boolean
9646emit_simple(struct svga_shader_emitter_v10 *emit,
9647            const struct tgsi_full_instruction *inst)
9648{
9649   const enum tgsi_opcode opcode = inst->Instruction.Opcode;
9650   const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
9651   const bool dbl_dst = opcode_has_dbl_dst(inst->Instruction.Opcode);
9652   const bool dbl_src = opcode_has_dbl_src(inst->Instruction.Opcode);
9653   unsigned i;
9654
9655   struct tgsi_full_src_register src[3];
9656
9657   if (inst->Instruction.Opcode == TGSI_OPCODE_BGNLOOP) {
9658      emit->current_loop_depth++;
9659   }
9660   else if (inst->Instruction.Opcode == TGSI_OPCODE_ENDLOOP) {
9661      emit->current_loop_depth--;
9662   }
9663
9664   for (i = 0; i < op->num_src; i++) {
9665      if (dbl_src)
9666         src[i] = check_double_src(emit, &inst->Src[i]);
9667      else
9668         src[i] = inst->Src[i];
9669   }
9670
9671   begin_emit_instruction(emit);
9672   emit_opcode_precise(emit, translate_opcode(inst->Instruction.Opcode),
9673                       inst->Instruction.Saturate,
9674                       inst->Instruction.Precise);
9675   for (i = 0; i < op->num_dst; i++) {
9676      if (dbl_dst) {
9677         check_double_dst_writemask(inst);
9678      }
9679      emit_dst_register(emit, &inst->Dst[i]);
9680   }
9681   for (i = 0; i < op->num_src; i++) {
9682      emit_src_register(emit, &src[i]);
9683   }
9684   end_emit_instruction(emit);
9685
9686   free_temp_indexes(emit);
9687   return TRUE;
9688}
9689
9690
9691/**
9692 * Emit MSB instruction (like IMSB, UMSB).
9693 *
9694 * GLSL returns the index starting from the LSB;
9695 * whereas in SM5, firstbit_hi/shi returns the index starting from the MSB.
9696 * To get correct location as per glsl from SM5 device, we should
9697 * return (31 - index) if returned index is not -1.
9698 */
9699static boolean
9700emit_msb(struct svga_shader_emitter_v10 *emit,
9701         const struct tgsi_full_instruction *inst)
9702{
9703   const struct tgsi_full_dst_register *index_dst = &inst->Dst[0];
9704
9705   assert(index_dst->Register.File != TGSI_FILE_OUTPUT);
9706
9707   struct tgsi_full_src_register index_src =
9708      make_src_reg(index_dst->Register.File, index_dst->Register.Index);
9709   struct tgsi_full_src_register imm31 =
9710      make_immediate_reg_int(emit, 31);
9711   imm31 = scalar_src(&imm31, TGSI_SWIZZLE_X);
9712   struct tgsi_full_src_register neg_one =
9713      make_immediate_reg_int(emit, -1);
9714   neg_one = scalar_src(&neg_one, TGSI_SWIZZLE_X);
9715   unsigned tmp = get_temp_index(emit);
9716   const struct tgsi_full_dst_register tmp_dst =
9717      make_dst_temp_reg(tmp);
9718   const struct tgsi_full_dst_register tmp_dst_x =
9719      writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
9720   const struct tgsi_full_src_register tmp_src_x =
9721       make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp, TGSI_SWIZZLE_X);
9722   int writemask = TGSI_WRITEMASK_X;
9723   int src_swizzle = TGSI_SWIZZLE_X;
9724   int dst_writemask = index_dst->Register.WriteMask;
9725
9726   emit_simple(emit, inst);
9727
9728   /* index conversion from SM5 to GLSL */
9729   while (writemask & dst_writemask) {
9730      struct tgsi_full_src_register index_src_comp =
9731         scalar_src(&index_src, src_swizzle);
9732      struct tgsi_full_dst_register index_dst_comp =
9733         writemask_dst(index_dst, writemask);
9734
9735      /* check if index_src_comp != -1 */
9736      emit_instruction_op2(emit, VGPU10_OPCODE_INE,
9737                           &tmp_dst_x, &index_src_comp, &neg_one);
9738
9739      /* if */
9740      emit_if(emit, &tmp_src_x);
9741
9742      index_src_comp = negate_src(&index_src_comp);
9743      /* SUB DST, IMM{31}, DST */
9744      emit_instruction_op2(emit, VGPU10_OPCODE_IADD,
9745                           &index_dst_comp, &imm31, &index_src_comp);
9746
9747      /* endif */
9748      emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF);
9749
9750      writemask = writemask << 1;
9751      src_swizzle = src_swizzle + 1;
9752   }
9753   free_temp_indexes(emit);
9754   return TRUE;
9755}
9756
9757
9758/**
9759 * Emit a BFE instruction (like UBFE, IBFE).
9760 * tgsi representation:
9761 * U/IBFE dst, value, offset, width
9762 * SM5 representation:
9763 * U/IBFE dst, width, offset, value
9764 * Note: SM5 has width & offset range (0-31);
9765 *      whereas GLSL has width & offset range (0-32)
9766 */
9767static boolean
9768emit_bfe(struct svga_shader_emitter_v10 *emit,
9769         const struct tgsi_full_instruction *inst)
9770{
9771   const enum tgsi_opcode opcode = inst->Instruction.Opcode;
9772   struct tgsi_full_src_register imm32 = make_immediate_reg_int(emit, 32);
9773   imm32 = scalar_src(&imm32, TGSI_SWIZZLE_X);
9774   struct tgsi_full_src_register zero = make_immediate_reg_int(emit, 0);
9775   zero = scalar_src(&zero, TGSI_SWIZZLE_X);
9776
9777   unsigned tmp1 = get_temp_index(emit);
9778   const struct tgsi_full_dst_register cond1_dst = make_dst_temp_reg(tmp1);
9779   const struct tgsi_full_dst_register cond1_dst_x =
9780      writemask_dst(&cond1_dst, TGSI_WRITEMASK_X);
9781   const struct tgsi_full_src_register cond1_src_x =
9782      make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp1, TGSI_SWIZZLE_X);
9783
9784   unsigned tmp2 = get_temp_index(emit);
9785   const struct tgsi_full_dst_register cond2_dst = make_dst_temp_reg(tmp2);
9786   const struct tgsi_full_dst_register cond2_dst_x =
9787      writemask_dst(&cond2_dst, TGSI_WRITEMASK_X);
9788   const struct tgsi_full_src_register cond2_src_x =
9789      make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp2, TGSI_SWIZZLE_X);
9790
9791   /**
9792    * In SM5, when width = 32  and offset = 0, it returns 0.
9793    * On the other hand GLSL, expects value to be copied as it is, to dst.
9794    */
9795
9796   /* cond1 = width ! = 32 */
9797   emit_instruction_op2(emit, VGPU10_OPCODE_IEQ,
9798                        &cond1_dst_x, &inst->Src[2], &imm32);
9799
9800   /* cond2 = offset ! = 0 */
9801   emit_instruction_op2(emit, VGPU10_OPCODE_IEQ,
9802                        &cond2_dst_x, &inst->Src[1], &zero);
9803
9804   /* cond 2 = cond1 & cond 2 */
9805   emit_instruction_op2(emit, VGPU10_OPCODE_AND, &cond2_dst_x,
9806                        &cond2_src_x,
9807                        &cond1_src_x);
9808   /* IF */
9809   emit_if(emit, &cond2_src_x);
9810
9811   emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
9812                        &inst->Src[0]);
9813
9814   /* ELSE */
9815   emit_instruction_op0(emit, VGPU10_OPCODE_ELSE);
9816
9817   /* U/IBFE dst, width, offset, value */
9818   emit_instruction_op3(emit, translate_opcode(opcode), &inst->Dst[0],
9819                        &inst->Src[2], &inst->Src[1], &inst->Src[0]);
9820
9821   /* ENDIF */
9822   emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF);
9823
9824   free_temp_indexes(emit);
9825   return TRUE;
9826}
9827
9828
9829/**
9830 * Emit BFI  instruction
9831 * tgsi representation:
9832 * BFI dst, base, insert, offset, width
9833 * SM5 representation:
9834 * BFI dst, width, offset, insert, base
9835 * Note: SM5 has width & offset range (0-31);
9836 *      whereas GLSL has width & offset range (0-32)
9837 */
9838static boolean
9839emit_bfi(struct svga_shader_emitter_v10 *emit,
9840         const struct tgsi_full_instruction *inst)
9841{
9842   const enum tgsi_opcode opcode = inst->Instruction.Opcode;
9843   struct tgsi_full_src_register imm32 = make_immediate_reg_int(emit, 32);
9844   imm32 = scalar_src(&imm32, TGSI_SWIZZLE_X);
9845
9846   struct tgsi_full_src_register zero = make_immediate_reg_int(emit, 0);
9847   zero = scalar_src(&zero, TGSI_SWIZZLE_X);
9848
9849   unsigned tmp1 = get_temp_index(emit);
9850   const struct tgsi_full_dst_register cond1_dst = make_dst_temp_reg(tmp1);
9851   const struct tgsi_full_dst_register cond1_dst_x =
9852      writemask_dst(&cond1_dst, TGSI_WRITEMASK_X);
9853   const struct tgsi_full_src_register cond1_src_x =
9854      make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp1, TGSI_SWIZZLE_X);
9855
9856   unsigned tmp2 = get_temp_index(emit);
9857   const struct tgsi_full_dst_register cond2_dst = make_dst_temp_reg(tmp2);
9858   const struct tgsi_full_dst_register cond2_dst_x =
9859      writemask_dst(&cond2_dst, TGSI_WRITEMASK_X);
9860   const struct tgsi_full_src_register cond2_src_x =
9861      make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp2, TGSI_SWIZZLE_X);
9862
9863   /**
9864    * In SM5, when width = 32  and offset = 0, it returns 0.
9865    * On the other hand GLSL, expects insert to be copied as it is, to dst.
9866    */
9867
9868   /* cond1 = width == 32 */
9869   emit_instruction_op2(emit, VGPU10_OPCODE_IEQ,
9870                        &cond1_dst_x, &inst->Src[3], &imm32);
9871
9872   /* cond1 = offset == 0 */
9873   emit_instruction_op2(emit, VGPU10_OPCODE_IEQ,
9874                        &cond2_dst_x, &inst->Src[2], &zero);
9875
9876   /* cond2 = cond1 & cond2 */
9877   emit_instruction_op2(emit, VGPU10_OPCODE_AND,
9878                        &cond2_dst_x, &cond2_src_x, &cond1_src_x);
9879
9880   /* if */
9881   emit_if(emit, &cond2_src_x);
9882
9883   emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
9884                        &inst->Src[1]);
9885
9886   /* else */
9887   emit_instruction_op0(emit, VGPU10_OPCODE_ELSE);
9888
9889   /* BFI dst, width, offset, insert, base */
9890   begin_emit_instruction(emit);
9891   emit_opcode(emit, translate_opcode(opcode), inst->Instruction.Saturate);
9892   emit_dst_register(emit, &inst->Dst[0]);
9893   emit_src_register(emit, &inst->Src[3]);
9894   emit_src_register(emit, &inst->Src[2]);
9895   emit_src_register(emit, &inst->Src[1]);
9896   emit_src_register(emit, &inst->Src[0]);
9897   end_emit_instruction(emit);
9898
9899   /* endif */
9900   emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF);
9901
9902   free_temp_indexes(emit);
9903   return TRUE;
9904}
9905
9906
9907/**
9908 * We only special case the MOV instruction to try to detect constant
9909 * color writes in the fragment shader.
9910 */
9911static boolean
9912emit_mov(struct svga_shader_emitter_v10 *emit,
9913         const struct tgsi_full_instruction *inst)
9914{
9915   const struct tgsi_full_src_register *src = &inst->Src[0];
9916   const struct tgsi_full_dst_register *dst = &inst->Dst[0];
9917
9918   if (emit->unit == PIPE_SHADER_FRAGMENT &&
9919       dst->Register.File == TGSI_FILE_OUTPUT &&
9920       dst->Register.Index == 0 &&
9921       src->Register.File == TGSI_FILE_CONSTANT &&
9922       !src->Register.Indirect) {
9923      emit->constant_color_output = TRUE;
9924   }
9925
9926   return emit_simple(emit, inst);
9927}
9928
9929
9930/**
9931 * Emit a simple VGPU10 instruction which writes to multiple dest registers,
9932 * where TGSI only uses one dest register.
9933 */
9934static boolean
9935emit_simple_1dst(struct svga_shader_emitter_v10 *emit,
9936                 const struct tgsi_full_instruction *inst,
9937                 unsigned dst_count,
9938                 unsigned dst_index)
9939{
9940   const enum tgsi_opcode opcode = inst->Instruction.Opcode;
9941   const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
9942   unsigned i;
9943
9944   begin_emit_instruction(emit);
9945   emit_opcode(emit, translate_opcode(opcode), inst->Instruction.Saturate);
9946
9947   for (i = 0; i < dst_count; i++) {
9948      if (i == dst_index) {
9949         emit_dst_register(emit, &inst->Dst[0]);
9950      } else {
9951         emit_null_dst_register(emit);
9952      }
9953   }
9954
9955   for (i = 0; i < op->num_src; i++) {
9956      emit_src_register(emit, &inst->Src[i]);
9957   }
9958   end_emit_instruction(emit);
9959
9960   return TRUE;
9961}
9962
9963
9964/**
9965 * Emit a vmware specific VGPU10 instruction.
9966 */
9967static boolean
9968emit_vmware(struct svga_shader_emitter_v10 *emit,
9969            const struct tgsi_full_instruction *inst,
9970            VGPU10_VMWARE_OPCODE_TYPE subopcode)
9971{
9972   VGPU10OpcodeToken0 token0;
9973   const enum tgsi_opcode opcode = inst->Instruction.Opcode;
9974   const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
9975   const bool dbl_dst = opcode_has_dbl_dst(inst->Instruction.Opcode);
9976   const bool dbl_src = opcode_has_dbl_src(inst->Instruction.Opcode);
9977   unsigned i;
9978   struct tgsi_full_src_register src[3];
9979
9980   for (i = 0; i < op->num_src; i++) {
9981      if (dbl_src)
9982         src[i] = check_double_src(emit, &inst->Src[i]);
9983      else
9984         src[i] = inst->Src[i];
9985   }
9986
9987   begin_emit_instruction(emit);
9988
9989   assert((subopcode > 0 && emit->version >= 50) || subopcode == 0);
9990
9991   token0.value = 0;
9992   token0.opcodeType = VGPU10_OPCODE_VMWARE;
9993   token0.vmwareOpcodeType = subopcode;
9994   emit_dword(emit, token0.value);
9995
9996   if (subopcode == VGPU10_VMWARE_OPCODE_IDIV) {
9997      /* IDIV only uses the first dest register. */
9998      emit_dst_register(emit, &inst->Dst[0]);
9999      emit_null_dst_register(emit);
10000   } else {
10001      for (i = 0; i < op->num_dst; i++) {
10002         if (dbl_dst) {
10003            check_double_dst_writemask(inst);
10004         }
10005         emit_dst_register(emit, &inst->Dst[i]);
10006      }
10007   }
10008
10009   for (i = 0; i < op->num_src; i++) {
10010      emit_src_register(emit, &src[i]);
10011   }
10012   end_emit_instruction(emit);
10013
10014   free_temp_indexes(emit);
10015   return TRUE;
10016}
10017
10018/**
10019 * Emit a memory register
10020 */
10021
10022typedef enum {
10023   MEM_STORE = 0,
10024   MEM_LOAD = 1,
10025   MEM_ATOMIC_COUNTER
10026} memory_op;
10027
10028static void
10029emit_memory_register(struct svga_shader_emitter_v10 *emit,
10030                     memory_op mem_op,
10031                     const struct tgsi_full_instruction *inst,
10032                     unsigned regIndex, unsigned writemask)
10033{
10034   VGPU10OperandToken0 operand0;
10035   unsigned resIndex = 0;
10036
10037   operand0.value = 0;
10038   operand0.operandType = VGPU10_OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY;
10039   operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
10040   operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
10041
10042   switch (mem_op) {
10043   case MEM_ATOMIC_COUNTER:
10044   {
10045      operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
10046      resIndex = inst->Src[regIndex].Register.Index;
10047      break;
10048   }
10049   case MEM_STORE:
10050   {
10051      const struct tgsi_full_dst_register *reg = &inst->Dst[regIndex];
10052
10053      operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
10054      operand0.mask = writemask;
10055      resIndex = reg->Register.Index;
10056      break;
10057   }
10058   case MEM_LOAD:
10059   {
10060      const struct tgsi_full_src_register *reg = &inst->Src[regIndex];
10061
10062      operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
10063      operand0.swizzleX = reg->Register.SwizzleX;
10064      operand0.swizzleY = reg->Register.SwizzleY;
10065      operand0.swizzleZ = reg->Register.SwizzleZ;
10066      operand0.swizzleW = reg->Register.SwizzleW;
10067      resIndex = reg->Register.Index;
10068      break;
10069   }
10070   default:
10071      assert(!"Unexpected memory opcode");
10072      break;
10073   }
10074
10075   emit_dword(emit, operand0.value);
10076   emit_dword(emit, resIndex);
10077}
10078
10079
10080typedef enum {
10081   UAV_STORE = 0,
10082   UAV_LOAD = 1,
10083   UAV_ATOMIC = 2,
10084   UAV_RESQ = 3,
10085} UAV_OP;
10086
10087
10088/**
10089 * Emit a uav register
10090 * \param uav_index     index of resource register
10091 * \param uav_op        UAV_STORE/ UAV_LOAD/ UAV_ATOMIC depending on opcode
10092 * \param resourceType  resource file type
10093 * \param writemask     resource writemask
10094 */
10095
10096static void
10097emit_uav_register(struct svga_shader_emitter_v10 *emit,
10098                  unsigned res_index, UAV_OP uav_op,
10099                  enum tgsi_file_type resourceType, unsigned writemask)
10100{
10101   VGPU10OperandToken0 operand0;
10102   unsigned uav_index = INVALID_INDEX;
10103
10104   operand0.value = 0;
10105   operand0.operandType = VGPU10_OPERAND_TYPE_UAV;
10106   operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
10107   operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
10108
10109   switch (resourceType) {
10110   case TGSI_FILE_IMAGE:
10111      uav_index = emit->key.images[res_index].uav_index;
10112      break;
10113   case TGSI_FILE_BUFFER:
10114      uav_index = emit->key.shader_buf_uav_index[res_index];
10115      break;
10116   case TGSI_FILE_HW_ATOMIC:
10117      uav_index = emit->key.atomic_buf_uav_index[res_index];
10118      break;
10119   default:
10120      assert(0);
10121   }
10122
10123   switch (uav_op) {
10124   case UAV_ATOMIC:
10125      operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
10126      break;
10127
10128   case UAV_STORE:
10129      operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
10130      operand0.mask = writemask;
10131      break;
10132
10133   case UAV_LOAD:
10134   case UAV_RESQ:
10135      operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
10136      operand0.swizzleX = VGPU10_COMPONENT_X;
10137      operand0.swizzleY = VGPU10_COMPONENT_Y;
10138      operand0.swizzleZ = VGPU10_COMPONENT_Z;
10139      operand0.swizzleW = VGPU10_COMPONENT_W;
10140      break;
10141
10142   default:
10143      break;
10144   }
10145
10146   emit_dword(emit, operand0.value);
10147   emit_dword(emit, uav_index);
10148}
10149
10150
10151/**
10152 * A helper function to emit the uav address.
10153 * For memory, buffer, and image resource, it is set to the specified address.
10154 * For HW atomic counter, the address is the sum of the address offset and the
10155 * offset into the HW atomic buffer as specified by the register index.
10156 * It is also possible to specify the counter index as an indirect address.
10157 * And in this case, the uav address will be the sum of the address offset and the
10158 * counter index specified in the indirect address.
10159 */
10160static
10161struct tgsi_full_src_register
10162emit_uav_addr_offset(struct svga_shader_emitter_v10 *emit,
10163                     enum tgsi_file_type resourceType,
10164                     unsigned resourceIndex,
10165                     unsigned resourceIndirect,
10166                     unsigned resourceIndirectIndex,
10167                     const struct tgsi_full_src_register *addr_reg)
10168{
10169   unsigned addr_tmp;
10170   struct tgsi_full_dst_register addr_dst;
10171   struct tgsi_full_src_register addr_src;
10172   struct tgsi_full_src_register two = make_immediate_reg_int(emit, 2);
10173   struct tgsi_full_src_register zero = make_immediate_reg_int(emit, 0);
10174
10175   addr_tmp = get_temp_index(emit);
10176   addr_dst = make_dst_temp_reg(addr_tmp);
10177   addr_src = make_src_temp_reg(addr_tmp);
10178
10179   /* specified address offset */
10180   if (addr_reg)
10181      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &addr_dst, addr_reg);
10182   else
10183      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &addr_dst, &zero);
10184
10185   /* For HW atomic counter, we need to find the index to the
10186    * HW atomic buffer.
10187    */
10188   if (resourceType == TGSI_FILE_HW_ATOMIC) {
10189      if (resourceIndirect) {
10190
10191         /**
10192          * uav addr offset  = counter layout offset +
10193          *                    counter indirect index address + address offset
10194          */
10195
10196         /* counter layout offset */
10197         struct tgsi_full_src_register layout_offset;
10198         layout_offset =
10199            make_immediate_reg_int(emit, resourceIndex);
10200
10201         /* counter layout offset + address offset */
10202         emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &addr_dst,
10203                              &addr_src, &layout_offset);
10204
10205         /* counter indirect index address */
10206         unsigned indirect_addr =
10207            emit->address_reg_index[resourceIndirectIndex];
10208
10209         struct tgsi_full_src_register indirect_addr_src =
10210            make_src_temp_reg(indirect_addr);
10211
10212         indirect_addr_src = scalar_src(&indirect_addr_src, TGSI_SWIZZLE_X);
10213
10214         /* counter layout offset + address offset + counter indirect address */
10215         emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &addr_dst,
10216                              &addr_src, &indirect_addr_src);
10217
10218      } else {
10219         struct tgsi_full_src_register index_src;
10220
10221         index_src = make_immediate_reg_int(emit, resourceIndex);
10222
10223         /* uav addr offset  = counter index address + address offset */
10224         emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &addr_dst,
10225                              &addr_src, &index_src);
10226      }
10227
10228      /* HW atomic buffer is declared as raw buffer, so the buffer address is
10229       * the byte offset, so we need to multiple the counter addr offset by 4.
10230       */
10231      emit_instruction_op2(emit, VGPU10_OPCODE_ISHL, &addr_dst,
10232                           &addr_src, &two);
10233   }
10234   else if (resourceType == TGSI_FILE_IMAGE) {
10235      if ((emit->key.images[resourceIndex].resource_target == PIPE_TEXTURE_3D)
10236             && emit->key.images[resourceIndex].is_single_layer) {
10237
10238         struct tgsi_full_dst_register addr_dst_z =
10239            writemask_dst(&addr_dst, TGSI_WRITEMASK_Z);
10240
10241         /* For non-layered 3D texture image view, we have to make sure the z
10242          * component of the address offset is set to 0.
10243          */
10244         emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &addr_dst_z,
10245                              &zero);
10246      }
10247   }
10248
10249   return addr_src;
10250}
10251
10252
10253
10254/**
10255 * A helper function to expand indirect indexing to uav resource
10256 * by looping through the resource array, compare the indirect index and
10257 * emit the instruction for each resource in the array.
10258 */
10259static void
10260loop_instruction(unsigned index, unsigned count,
10261                 struct tgsi_full_src_register *addr_index,
10262                 void (*fb)(struct svga_shader_emitter_v10 *,
10263                            const struct tgsi_full_instruction *, unsigned),
10264                 struct svga_shader_emitter_v10 *emit,
10265                 const struct tgsi_full_instruction *inst)
10266{
10267   if (count == 0)
10268      return;
10269
10270   if (index > 0) {
10271      /* ELSE */
10272      emit_instruction_op0(emit, VGPU10_OPCODE_ELSE);
10273   }
10274
10275   struct tgsi_full_src_register index_src =
10276                                    make_immediate_reg_int(emit, index);
10277
10278   unsigned tmp_index = get_temp_index(emit);
10279   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp_index);
10280   struct tgsi_full_src_register tmp_src_x =
10281                scalar_src(&tmp_src, TGSI_SWIZZLE_X);
10282   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp_index);
10283
10284   /* IEQ tmp, addr_tmp_index, index */
10285   emit_instruction_op2(emit, VGPU10_OPCODE_IEQ, &tmp_dst,
10286                        addr_index, &index_src);
10287
10288   /* IF tmp */
10289   emit_if(emit, &tmp_src_x);
10290
10291   free_temp_indexes(emit);
10292
10293   (*fb)(emit, inst, index);
10294
10295   loop_instruction(index+1, count-1, addr_index, fb, emit, inst);
10296
10297   /* ENDIF */
10298   emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF);
10299}
10300
10301
10302/**
10303 * A helper function to emit the load instruction.
10304 */
10305static void
10306emit_load_instruction(struct svga_shader_emitter_v10 *emit,
10307                      const struct tgsi_full_instruction *inst,
10308                      unsigned resourceIndex)
10309{
10310   VGPU10OpcodeToken0 token0;
10311   struct tgsi_full_src_register addr_src;
10312   enum tgsi_file_type resourceType = inst->Src[0].Register.File;
10313
10314   /* Resolve the resource address for this resource first */
10315   addr_src = emit_uav_addr_offset(emit, resourceType, resourceIndex,
10316                                   inst->Src[0].Register.Indirect,
10317                                   inst->Src[0].Indirect.Index,
10318                                   &inst->Src[1]);
10319
10320   /* LOAD resource, address, src */
10321   begin_emit_instruction(emit);
10322
10323   token0.value = 0;
10324
10325   if (resourceType == TGSI_FILE_MEMORY ||
10326       resourceType == TGSI_FILE_BUFFER ||
10327       resourceType == TGSI_FILE_HW_ATOMIC) {
10328      token0.opcodeType = VGPU10_OPCODE_LD_RAW;
10329      addr_src = scalar_src(&addr_src, TGSI_SWIZZLE_X);
10330   }
10331   else {
10332      token0.opcodeType = VGPU10_OPCODE_LD_UAV_TYPED;
10333   }
10334
10335   token0.saturate = inst->Instruction.Saturate,
10336   emit_dword(emit, token0.value);
10337
10338   emit_dst_register(emit, &inst->Dst[0]);
10339   emit_src_register(emit, &addr_src);
10340
10341   if (resourceType == TGSI_FILE_MEMORY) {
10342      emit_memory_register(emit, MEM_LOAD, inst, 0, 0);
10343   } else if (resourceType == TGSI_FILE_HW_ATOMIC) {
10344      emit_uav_register(emit, inst->Src[0].Dimension.Index,
10345                        UAV_LOAD, inst->Src[0].Register.File, 0);
10346   } else {
10347      emit_uav_register(emit, resourceIndex,
10348                        UAV_LOAD, inst->Src[0].Register.File, 0);
10349   }
10350
10351   end_emit_instruction(emit);
10352
10353   free_temp_indexes(emit);
10354}
10355
10356
10357/**
10358 * Emit uav / memory load instruction
10359 */
10360static boolean
10361emit_load(struct svga_shader_emitter_v10 *emit,
10362           const struct tgsi_full_instruction *inst)
10363{
10364   enum tgsi_file_type resourceType = inst->Src[0].Register.File;
10365   unsigned resourceIndex = inst->Src[0].Register.Index;
10366
10367   /* If the resource register has indirect index, we will need
10368    * to expand it since SM5 device does not support indirect indexing
10369    * for uav.
10370    */
10371   if (inst->Src[0].Register.Indirect &&
10372       (resourceType == TGSI_FILE_BUFFER || resourceType == TGSI_FILE_IMAGE)) {
10373
10374      unsigned indirect_index = inst->Src[0].Indirect.Index;
10375      unsigned num_resources =
10376         resourceType == TGSI_FILE_BUFFER ? emit->num_shader_bufs :
10377                                            emit->num_images;
10378
10379      /* indirect index tmp register */
10380      unsigned indirect_addr = emit->address_reg_index[indirect_index];
10381      struct tgsi_full_src_register indirect_addr_src =
10382         make_src_temp_reg(indirect_addr);
10383      indirect_addr_src = scalar_src(&indirect_addr_src, TGSI_SWIZZLE_X);
10384
10385      /* Add offset to the indirect index */
10386      if (inst->Src[0].Register.Index != 0) {
10387         struct tgsi_full_src_register offset =
10388            make_immediate_reg_int(emit, inst->Src[0].Register.Index);
10389         struct tgsi_full_dst_register indirect_addr_dst =
10390            make_dst_temp_reg(indirect_addr);
10391         emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &indirect_addr_dst,
10392                              &indirect_addr_src, &offset);
10393      }
10394
10395      /* Loop through the resource array to find which resource to use.
10396       */
10397      loop_instruction(0, num_resources, &indirect_addr_src,
10398                       emit_load_instruction, emit, inst);
10399   }
10400   else {
10401      emit_load_instruction(emit, inst, resourceIndex);
10402   }
10403
10404   free_temp_indexes(emit);
10405
10406   return TRUE;
10407}
10408
10409
10410/**
10411 * A helper function to emit a store instruction.
10412 */
10413static void
10414emit_store_instruction(struct svga_shader_emitter_v10 *emit,
10415                       const struct tgsi_full_instruction *inst,
10416                       unsigned resourceIndex)
10417{
10418   VGPU10OpcodeToken0 token0;
10419   enum tgsi_file_type resourceType = inst->Dst[0].Register.File;
10420   unsigned writemask = inst->Dst[0].Register.WriteMask;
10421   struct tgsi_full_src_register addr_src;
10422
10423   unsigned tmp_index = get_temp_index(emit);
10424   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp_index);
10425   struct tgsi_full_dst_register tmp_dst_xyzw = make_dst_temp_reg(tmp_index);
10426   struct tgsi_full_dst_register tmp_dst;
10427
10428   struct tgsi_full_src_register src = inst->Src[1];
10429   struct tgsi_full_src_register four = make_immediate_reg_int(emit, 4);
10430
10431   boolean needLoad = FALSE;
10432   boolean needPerComponentStore = FALSE;
10433   unsigned swizzles = 0;
10434
10435   /* Resolve the resource address for this resource first */
10436   addr_src = emit_uav_addr_offset(emit, resourceType,
10437                                   inst->Dst[0].Register.Index,
10438                                   inst->Dst[0].Register.Indirect,
10439                                   inst->Dst[0].Indirect.Index,
10440                                   &inst->Src[0]);
10441
10442   /* First check the writemask to see if it can be supported
10443    * by the store instruction.
10444    * store_raw only allows .x, .xy, .xyz, .xyzw. For the typeless memory,
10445    * we can adjust the address offset, and do a per-component store.
10446    * store_uav_typed only allows .xyzw. In this case, we need to
10447    * do a load first, update the temporary and then issue the
10448    * store. This does have a small risk that if different threads
10449    * update different components of the same address, data might not be
10450    * in sync.
10451    */
10452   if (resourceType == TGSI_FILE_IMAGE) {
10453      needLoad = (writemask == TGSI_WRITEMASK_XYZW) ? FALSE : TRUE;
10454   }
10455   else if (resourceType == TGSI_FILE_BUFFER ||
10456            resourceType == TGSI_FILE_MEMORY) {
10457      if (!(writemask == TGSI_WRITEMASK_X || writemask == TGSI_WRITEMASK_XY ||
10458            writemask == TGSI_WRITEMASK_XYZ ||
10459            writemask == TGSI_WRITEMASK_XYZW)) {
10460         needPerComponentStore = TRUE;
10461      }
10462   }
10463
10464   if (needLoad) {
10465      assert(resourceType == TGSI_FILE_IMAGE);
10466
10467      /* LOAD resource, address, src */
10468      begin_emit_instruction(emit);
10469
10470      token0.value = 0;
10471      token0.opcodeType = VGPU10_OPCODE_LD_UAV_TYPED;
10472      token0.saturate = inst->Instruction.Saturate,
10473      emit_dword(emit, token0.value);
10474
10475      emit_dst_register(emit, &tmp_dst_xyzw);
10476      emit_src_register(emit, &addr_src);
10477      emit_uav_register(emit, resourceIndex, UAV_LOAD, resourceType, 0);
10478
10479      end_emit_instruction(emit);
10480
10481      /* MOV tmp(writemask) src */
10482      tmp_dst = writemask_dst(&tmp_dst_xyzw, writemask);
10483      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &tmp_dst, &inst->Src[1]);
10484
10485      /* Now set the writemask to xyzw for the store_uav_typed instruction */
10486      writemask = TGSI_WRITEMASK_XYZW;
10487   }
10488   else if (needPerComponentStore) {
10489      /* Save the src swizzles */
10490      swizzles = src.Register.SwizzleX |
10491                 src.Register.SwizzleY << 2 |
10492                 src.Register.SwizzleZ << 4 |
10493                 src.Register.SwizzleW << 6;
10494   }
10495
10496   boolean storeDone = FALSE;
10497   unsigned perComponentWritemask = writemask;
10498   unsigned shift = 0;
10499   struct tgsi_full_src_register shift_src;
10500
10501   while (!storeDone) {
10502
10503      if (needPerComponentStore) {
10504         assert(perComponentWritemask);
10505         while (!(perComponentWritemask & TGSI_WRITEMASK_X)) {
10506            shift++;
10507            perComponentWritemask >>= 1;
10508         }
10509
10510         /* First adjust the addr_src to the next component */
10511         if (shift != 0) {
10512            struct tgsi_full_dst_register addr_dst =
10513               make_dst_temp_reg(addr_src.Register.Index);
10514            shift_src = make_immediate_reg_int(emit, shift);
10515            emit_instruction_op3(emit, VGPU10_OPCODE_UMAD, &addr_dst, &four,
10516                                 &shift_src, &addr_src);
10517
10518            /* Adjust the src swizzle as well */
10519            swizzles >>= (shift * 2);
10520         }
10521
10522         /* Now the address offset is set to the next component,
10523          * we can set the writemask to .x and make sure to set
10524          * the src swizzle as well.
10525          */
10526         src.Register.SwizzleX = swizzles & 0x3;
10527         writemask = TGSI_WRITEMASK_X;
10528
10529         /* Shift for the next component check */
10530         perComponentWritemask >>= 1;
10531         shift = 1;
10532      }
10533
10534      /* STORE resource, address, src */
10535      begin_emit_instruction(emit);
10536
10537      token0.value = 0;
10538      token0.saturate = inst->Instruction.Saturate;
10539
10540      if (resourceType == TGSI_FILE_MEMORY) {
10541         token0.opcodeType = VGPU10_OPCODE_STORE_RAW;
10542         addr_src = scalar_src(&addr_src, TGSI_SWIZZLE_X);
10543         emit_dword(emit, token0.value);
10544         emit_memory_register(emit, MEM_STORE, inst, 0, writemask);
10545      }
10546      else if (resourceType == TGSI_FILE_BUFFER ||
10547               resourceType == TGSI_FILE_HW_ATOMIC) {
10548         token0.opcodeType = VGPU10_OPCODE_STORE_RAW;
10549         addr_src = scalar_src(&addr_src, TGSI_SWIZZLE_X);
10550         emit_dword(emit, token0.value);
10551         emit_uav_register(emit, resourceIndex, UAV_STORE,
10552                           resourceType, writemask);
10553      }
10554      else {
10555         token0.opcodeType = VGPU10_OPCODE_STORE_UAV_TYPED;
10556         emit_dword(emit, token0.value);
10557         emit_uav_register(emit, resourceIndex, UAV_STORE,
10558                           resourceType, writemask);
10559      }
10560
10561      emit_src_register(emit, &addr_src);
10562
10563      if (needLoad)
10564         emit_src_register(emit, &tmp_src);
10565      else
10566         emit_src_register(emit, &src);
10567
10568      end_emit_instruction(emit);
10569
10570      if (!needPerComponentStore || !perComponentWritemask)
10571         storeDone = TRUE;
10572   }
10573
10574   free_temp_indexes(emit);
10575}
10576
10577
10578/**
10579 * Emit uav / memory store instruction
10580 */
10581static boolean
10582emit_store(struct svga_shader_emitter_v10 *emit,
10583           const struct tgsi_full_instruction *inst)
10584{
10585   enum tgsi_file_type resourceType = inst->Dst[0].Register.File;
10586   unsigned resourceIndex = inst->Dst[0].Register.Index;
10587
10588   /* If the resource register has indirect index, we will need
10589    * to expand it since SM5 device does not support indirect indexing
10590    * for uav.
10591    */
10592   if (inst->Dst[0].Register.Indirect &&
10593       (resourceType == TGSI_FILE_BUFFER || resourceType == TGSI_FILE_IMAGE)) {
10594
10595      unsigned indirect_index = inst->Dst[0].Indirect.Index;
10596      unsigned num_resources =
10597         resourceType == TGSI_FILE_BUFFER ? emit->num_shader_bufs :
10598                                            emit->num_images;
10599
10600      /* Indirect index tmp register */
10601      unsigned indirect_addr = emit->address_reg_index[indirect_index];
10602      struct tgsi_full_src_register indirect_addr_src =
10603         make_src_temp_reg(indirect_addr);
10604      indirect_addr_src = scalar_src(&indirect_addr_src, TGSI_SWIZZLE_X);
10605
10606      /* Add offset to the indirect index */
10607      if (inst->Dst[0].Register.Index != 0) {
10608         struct tgsi_full_src_register offset =
10609            make_immediate_reg_int(emit, inst->Dst[0].Register.Index);
10610         struct tgsi_full_dst_register indirect_addr_dst =
10611            make_dst_temp_reg(indirect_addr);
10612         emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &indirect_addr_dst,
10613                              &indirect_addr_src, &offset);
10614      }
10615
10616      /* Loop through the resource array to find which resource to use.
10617       */
10618      loop_instruction(0, num_resources, &indirect_addr_src,
10619                       emit_store_instruction, emit, inst);
10620   }
10621   else {
10622      emit_store_instruction(emit, inst, resourceIndex);
10623   }
10624
10625   free_temp_indexes(emit);
10626
10627   return TRUE;
10628}
10629
10630
10631/**
10632 * A helper function to emit an atomic instruction.
10633 */
10634
10635static void
10636emit_atomic_instruction(struct svga_shader_emitter_v10 *emit,
10637                        const struct tgsi_full_instruction *inst,
10638                        unsigned resourceIndex)
10639{
10640   VGPU10OpcodeToken0 token0;
10641   enum tgsi_file_type resourceType = inst->Src[0].Register.File;
10642   struct tgsi_full_src_register addr_src;
10643   VGPU10_OPCODE_TYPE opcode = emit->cur_atomic_opcode;
10644   const struct tgsi_full_src_register *offset;
10645
10646   /* ntt does not specify offset for HWATOMIC. So just set offset to NULL. */
10647   offset = resourceType == TGSI_FILE_HW_ATOMIC ? NULL : &inst->Src[1];
10648
10649   /* Resolve the resource address */
10650   addr_src = emit_uav_addr_offset(emit, resourceType,
10651                                   inst->Src[0].Register.Index,
10652                                   inst->Src[0].Register.Indirect,
10653                                   inst->Src[0].Indirect.Index,
10654                                   offset);
10655
10656   /* Emit the atomic operation */
10657   begin_emit_instruction(emit);
10658
10659   token0.value = 0;
10660   token0.opcodeType = opcode;
10661   token0.saturate = inst->Instruction.Saturate,
10662   emit_dword(emit, token0.value);
10663
10664   emit_dst_register(emit, &inst->Dst[0]);
10665
10666   if (inst->Src[0].Register.File == TGSI_FILE_MEMORY) {
10667      emit_memory_register(emit, MEM_ATOMIC_COUNTER, inst, 0, 0);
10668   } else if (inst->Src[0].Register.File == TGSI_FILE_HW_ATOMIC) {
10669      assert(inst->Src[0].Register.Dimension == 1);
10670      emit_uav_register(emit, inst->Src[0].Dimension.Index,
10671                        UAV_ATOMIC, inst->Src[0].Register.File, 0);
10672   } else {
10673      emit_uav_register(emit, resourceIndex,
10674                        UAV_ATOMIC, inst->Src[0].Register.File, 0);
10675   }
10676
10677   /* resource address offset */
10678   emit_src_register(emit, &addr_src);
10679
10680   struct tgsi_full_src_register src0_x =
10681         swizzle_src(&inst->Src[2], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
10682                     TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
10683   emit_src_register(emit, &src0_x);
10684
10685   if (opcode == VGPU10_OPCODE_IMM_ATOMIC_CMP_EXCH) {
10686      struct tgsi_full_src_register src1_x =
10687         swizzle_src(&inst->Src[3], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
10688                     TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
10689
10690      emit_src_register(emit, &src1_x);
10691   }
10692
10693   end_emit_instruction(emit);
10694
10695   free_temp_indexes(emit);
10696}
10697
10698
10699/**
10700 * Emit atomic instruction
10701 */
10702static boolean
10703emit_atomic(struct svga_shader_emitter_v10 *emit,
10704            const struct tgsi_full_instruction *inst,
10705            VGPU10_OPCODE_TYPE opcode)
10706{
10707   enum tgsi_file_type resourceType = inst->Src[0].Register.File;
10708   unsigned resourceIndex = inst->Src[0].Register.Index;
10709
10710   emit->cur_atomic_opcode = opcode;
10711
10712   /* If the resource register has indirect index, we will need
10713    * to expand it since SM5 device does not support indirect indexing
10714    * for uav.
10715    */
10716   if (inst->Dst[0].Register.Indirect &&
10717       (resourceType == TGSI_FILE_BUFFER || resourceType == TGSI_FILE_IMAGE)) {
10718
10719      unsigned indirect_index = inst->Dst[0].Indirect.Index;
10720      unsigned num_resources =
10721         resourceType == TGSI_FILE_BUFFER ? emit->num_shader_bufs :
10722                                            emit->num_images;
10723
10724      /* indirect index tmp register */
10725      unsigned indirect_addr = emit->address_reg_index[indirect_index];
10726      struct tgsi_full_src_register indirect_addr_src =
10727         make_src_temp_reg(indirect_addr);
10728      indirect_addr_src = scalar_src(&indirect_addr_src, TGSI_SWIZZLE_X);
10729
10730      /* Loop through the resource array to find which resource to use.
10731       */
10732      loop_instruction(0, num_resources, &indirect_addr_src,
10733                       emit_atomic_instruction, emit, inst);
10734   }
10735   else {
10736      emit_atomic_instruction(emit, inst, resourceIndex);
10737   }
10738
10739   free_temp_indexes(emit);
10740
10741   return TRUE;
10742}
10743
10744
10745/**
10746 * Emit barrier instruction
10747 */
10748static boolean
10749emit_barrier(struct svga_shader_emitter_v10 *emit,
10750             const struct tgsi_full_instruction *inst)
10751{
10752   VGPU10OpcodeToken0 token0;
10753
10754   assert(emit->version >= 50);
10755
10756   token0.value = 0;
10757   token0.opcodeType = VGPU10_OPCODE_SYNC;
10758
10759   if (emit->unit == PIPE_SHADER_TESS_CTRL && emit->version == 50) {
10760      /* SM5 device doesn't support BARRIER in tcs . If barrier is used
10761       * in shader, don't do anything for this opcode and continue rest
10762       * of shader translation
10763       */
10764      util_debug_message(&emit->svga_debug_callback, INFO,
10765                         "barrier instruction is not supported in tessellation control shader\n");
10766      return TRUE;
10767   }
10768   else if (emit->unit == PIPE_SHADER_COMPUTE) {
10769      if (emit->cs.shared_memory_declared)
10770         token0.syncThreadGroupShared = 1;
10771
10772      if (emit->uav_declared)
10773         token0.syncUAVMemoryGroup = 1;
10774
10775      token0.syncThreadsInGroup = 1;
10776   } else {
10777      token0.syncUAVMemoryGlobal = 1;
10778   }
10779
10780   assert(token0.syncUAVMemoryGlobal || token0.syncUAVMemoryGroup ||
10781          token0.syncThreadGroupShared);
10782
10783   begin_emit_instruction(emit);
10784   emit_dword(emit, token0.value);
10785   end_emit_instruction(emit);
10786
10787   return TRUE;
10788}
10789
10790/**
10791 * Emit memory barrier instruction
10792 */
10793static boolean
10794emit_memory_barrier(struct svga_shader_emitter_v10 *emit,
10795                    const struct tgsi_full_instruction *inst)
10796{
10797   unsigned index = inst->Src[0].Register.Index;
10798   unsigned swizzle = inst->Src[0].Register.SwizzleX;
10799   unsigned bartype = emit->immediates[index][swizzle].Int;
10800   VGPU10OpcodeToken0 token0;
10801
10802   token0.value = 0;
10803   token0.opcodeType = VGPU10_OPCODE_SYNC;
10804
10805   if (emit->unit == PIPE_SHADER_COMPUTE) {
10806
10807      /* For compute shader, issue sync opcode with different options
10808       * depending on the memory barrier type.
10809       *
10810       * Bit 0: Shader storage buffers
10811       * Bit 1: Atomic buffers
10812       * Bit 2: Images
10813       * Bit 3: Shared memory
10814       * Bit 4: Thread group
10815       */
10816
10817      if (bartype & (TGSI_MEMBAR_SHADER_BUFFER | TGSI_MEMBAR_ATOMIC_BUFFER |
10818                     TGSI_MEMBAR_SHADER_IMAGE))
10819         token0.syncUAVMemoryGlobal = 1;
10820      else if (bartype & TGSI_MEMBAR_THREAD_GROUP)
10821         token0.syncUAVMemoryGroup = 1;
10822
10823      if (bartype & TGSI_MEMBAR_SHARED)
10824         token0.syncThreadGroupShared = 1;
10825   }
10826   else {
10827      /**
10828       * For graphics stages, only sync_uglobal is available.
10829       */
10830      if (bartype & (TGSI_MEMBAR_SHADER_BUFFER | TGSI_MEMBAR_ATOMIC_BUFFER |
10831                     TGSI_MEMBAR_SHADER_IMAGE))
10832         token0.syncUAVMemoryGlobal = 1;
10833   }
10834
10835   assert(token0.syncUAVMemoryGlobal || token0.syncUAVMemoryGroup ||
10836          token0.syncThreadGroupShared);
10837
10838   begin_emit_instruction(emit);
10839   emit_dword(emit, token0.value);
10840   end_emit_instruction(emit);
10841
10842   return TRUE;
10843}
10844
10845
10846/**
10847 * Emit code for TGSI_OPCODE_RESQ (image size) instruction.
10848 */
10849static boolean
10850emit_resq(struct svga_shader_emitter_v10 *emit,
10851         const struct tgsi_full_instruction *inst)
10852{
10853   struct tgsi_full_src_register zero =
10854      make_immediate_reg_int(emit, 0);
10855
10856   unsigned uav_resource = emit->image[inst->Src[0].Register.Index].Resource;
10857
10858   if (uav_resource == TGSI_TEXTURE_CUBE_ARRAY) {
10859      struct tgsi_full_src_register image_src;
10860
10861      image_src = make_src_const_reg(emit->image_size_index + inst->Src[0].Register.Index);
10862
10863      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &image_src);
10864      return TRUE;
10865   }
10866
10867   begin_emit_instruction(emit);
10868   if (uav_resource == TGSI_TEXTURE_BUFFER) {
10869      emit_opcode(emit, VGPU10_OPCODE_BUFINFO, FALSE);
10870      emit_dst_register(emit, &inst->Dst[0]);
10871   }
10872   else {
10873      emit_opcode_resinfo(emit, VGPU10_RESINFO_RETURN_UINT);
10874      emit_dst_register(emit, &inst->Dst[0]);
10875      emit_src_register(emit, &zero);
10876   }
10877   emit_uav_register(emit, inst->Src[0].Register.Index,
10878                     UAV_RESQ, inst->Src[0].Register.File, 0);
10879   end_emit_instruction(emit);
10880
10881   return TRUE;
10882}
10883
10884
10885static boolean
10886emit_instruction(struct svga_shader_emitter_v10 *emit,
10887                 unsigned inst_number,
10888                 const struct tgsi_full_instruction *inst)
10889{
10890   const enum tgsi_opcode opcode = inst->Instruction.Opcode;
10891
10892   switch (opcode) {
10893   case TGSI_OPCODE_ADD:
10894   case TGSI_OPCODE_AND:
10895   case TGSI_OPCODE_BGNLOOP:
10896   case TGSI_OPCODE_BRK:
10897   case TGSI_OPCODE_CEIL:
10898   case TGSI_OPCODE_CONT:
10899   case TGSI_OPCODE_DDX:
10900   case TGSI_OPCODE_DDY:
10901   case TGSI_OPCODE_DIV:
10902   case TGSI_OPCODE_DP2:
10903   case TGSI_OPCODE_DP3:
10904   case TGSI_OPCODE_DP4:
10905   case TGSI_OPCODE_ELSE:
10906   case TGSI_OPCODE_ENDIF:
10907   case TGSI_OPCODE_ENDLOOP:
10908   case TGSI_OPCODE_ENDSUB:
10909   case TGSI_OPCODE_F2I:
10910   case TGSI_OPCODE_F2U:
10911   case TGSI_OPCODE_FLR:
10912   case TGSI_OPCODE_FRC:
10913   case TGSI_OPCODE_FSEQ:
10914   case TGSI_OPCODE_FSGE:
10915   case TGSI_OPCODE_FSLT:
10916   case TGSI_OPCODE_FSNE:
10917   case TGSI_OPCODE_I2F:
10918   case TGSI_OPCODE_IMAX:
10919   case TGSI_OPCODE_IMIN:
10920   case TGSI_OPCODE_INEG:
10921   case TGSI_OPCODE_ISGE:
10922   case TGSI_OPCODE_ISHR:
10923   case TGSI_OPCODE_ISLT:
10924   case TGSI_OPCODE_MAD:
10925   case TGSI_OPCODE_MAX:
10926   case TGSI_OPCODE_MIN:
10927   case TGSI_OPCODE_MUL:
10928   case TGSI_OPCODE_NOP:
10929   case TGSI_OPCODE_NOT:
10930   case TGSI_OPCODE_OR:
10931   case TGSI_OPCODE_UADD:
10932   case TGSI_OPCODE_USEQ:
10933   case TGSI_OPCODE_USGE:
10934   case TGSI_OPCODE_USLT:
10935   case TGSI_OPCODE_UMIN:
10936   case TGSI_OPCODE_UMAD:
10937   case TGSI_OPCODE_UMAX:
10938   case TGSI_OPCODE_ROUND:
10939   case TGSI_OPCODE_SQRT:
10940   case TGSI_OPCODE_SHL:
10941   case TGSI_OPCODE_TRUNC:
10942   case TGSI_OPCODE_U2F:
10943   case TGSI_OPCODE_UCMP:
10944   case TGSI_OPCODE_USHR:
10945   case TGSI_OPCODE_USNE:
10946   case TGSI_OPCODE_XOR:
10947   /* Begin SM5 opcodes */
10948   case TGSI_OPCODE_F2D:
10949   case TGSI_OPCODE_D2F:
10950   case TGSI_OPCODE_DADD:
10951   case TGSI_OPCODE_DMUL:
10952   case TGSI_OPCODE_DMAX:
10953   case TGSI_OPCODE_DMIN:
10954   case TGSI_OPCODE_DSGE:
10955   case TGSI_OPCODE_DSLT:
10956   case TGSI_OPCODE_DSEQ:
10957   case TGSI_OPCODE_DSNE:
10958   case TGSI_OPCODE_BREV:
10959   case TGSI_OPCODE_POPC:
10960   case TGSI_OPCODE_LSB:
10961   case TGSI_OPCODE_INTERP_CENTROID:
10962   case TGSI_OPCODE_INTERP_SAMPLE:
10963      /* simple instructions */
10964      return emit_simple(emit, inst);
10965   case TGSI_OPCODE_RET:
10966      if (emit->unit == PIPE_SHADER_TESS_CTRL &&
10967          !emit->tcs.control_point_phase) {
10968
10969         /* store the tessellation levels in the patch constant phase only */
10970         store_tesslevels(emit);
10971      }
10972      return emit_simple(emit, inst);
10973
10974   case TGSI_OPCODE_IMSB:
10975   case TGSI_OPCODE_UMSB:
10976      return emit_msb(emit, inst);
10977   case TGSI_OPCODE_IBFE:
10978   case TGSI_OPCODE_UBFE:
10979      return emit_bfe(emit, inst);
10980   case TGSI_OPCODE_BFI:
10981      return emit_bfi(emit, inst);
10982   case TGSI_OPCODE_MOV:
10983      return emit_mov(emit, inst);
10984   case TGSI_OPCODE_EMIT:
10985      return emit_vertex(emit, inst);
10986   case TGSI_OPCODE_ENDPRIM:
10987      return emit_endprim(emit, inst);
10988   case TGSI_OPCODE_IABS:
10989      return emit_iabs(emit, inst);
10990   case TGSI_OPCODE_ARL:
10991      FALLTHROUGH;
10992   case TGSI_OPCODE_UARL:
10993      return emit_arl_uarl(emit, inst);
10994   case TGSI_OPCODE_BGNSUB:
10995      /* no-op */
10996      return TRUE;
10997   case TGSI_OPCODE_CAL:
10998      return emit_cal(emit, inst);
10999   case TGSI_OPCODE_CMP:
11000      return emit_cmp(emit, inst);
11001   case TGSI_OPCODE_COS:
11002      return emit_sincos(emit, inst);
11003   case TGSI_OPCODE_DST:
11004      return emit_dst(emit, inst);
11005   case TGSI_OPCODE_EX2:
11006      return emit_ex2(emit, inst);
11007   case TGSI_OPCODE_EXP:
11008      return emit_exp(emit, inst);
11009   case TGSI_OPCODE_IF:
11010      return emit_if(emit, &inst->Src[0]);
11011   case TGSI_OPCODE_KILL:
11012      return emit_discard(emit, inst);
11013   case TGSI_OPCODE_KILL_IF:
11014      return emit_cond_discard(emit, inst);
11015   case TGSI_OPCODE_LG2:
11016      return emit_lg2(emit, inst);
11017   case TGSI_OPCODE_LIT:
11018      return emit_lit(emit, inst);
11019   case TGSI_OPCODE_LODQ:
11020      return emit_lodq(emit, inst);
11021   case TGSI_OPCODE_LOG:
11022      return emit_log(emit, inst);
11023   case TGSI_OPCODE_LRP:
11024      return emit_lrp(emit, inst);
11025   case TGSI_OPCODE_POW:
11026      return emit_pow(emit, inst);
11027   case TGSI_OPCODE_RCP:
11028      return emit_rcp(emit, inst);
11029   case TGSI_OPCODE_RSQ:
11030      return emit_rsq(emit, inst);
11031   case TGSI_OPCODE_SAMPLE:
11032      return emit_sample(emit, inst);
11033   case TGSI_OPCODE_SEQ:
11034      return emit_seq(emit, inst);
11035   case TGSI_OPCODE_SGE:
11036      return emit_sge(emit, inst);
11037   case TGSI_OPCODE_SGT:
11038      return emit_sgt(emit, inst);
11039   case TGSI_OPCODE_SIN:
11040      return emit_sincos(emit, inst);
11041   case TGSI_OPCODE_SLE:
11042      return emit_sle(emit, inst);
11043   case TGSI_OPCODE_SLT:
11044      return emit_slt(emit, inst);
11045   case TGSI_OPCODE_SNE:
11046      return emit_sne(emit, inst);
11047   case TGSI_OPCODE_SSG:
11048      return emit_ssg(emit, inst);
11049   case TGSI_OPCODE_ISSG:
11050      return emit_issg(emit, inst);
11051   case TGSI_OPCODE_TEX:
11052      return emit_tex(emit, inst);
11053   case TGSI_OPCODE_TG4:
11054      return emit_tg4(emit, inst);
11055   case TGSI_OPCODE_TEX2:
11056      return emit_tex2(emit, inst);
11057   case TGSI_OPCODE_TXP:
11058      return emit_txp(emit, inst);
11059   case TGSI_OPCODE_TXB:
11060   case TGSI_OPCODE_TXB2:
11061   case TGSI_OPCODE_TXL:
11062      return emit_txl_txb(emit, inst);
11063   case TGSI_OPCODE_TXD:
11064      return emit_txd(emit, inst);
11065   case TGSI_OPCODE_TXF:
11066      return emit_txf(emit, inst);
11067   case TGSI_OPCODE_TXL2:
11068      return emit_txl2(emit, inst);
11069   case TGSI_OPCODE_TXQ:
11070      return emit_txq(emit, inst);
11071   case TGSI_OPCODE_UIF:
11072      return emit_if(emit, &inst->Src[0]);
11073   case TGSI_OPCODE_UMUL_HI:
11074   case TGSI_OPCODE_IMUL_HI:
11075   case TGSI_OPCODE_UDIV:
11076      /* These cases use only the FIRST of two destination registers */
11077      return emit_simple_1dst(emit, inst, 2, 0);
11078   case TGSI_OPCODE_IDIV:
11079      return emit_vmware(emit, inst, VGPU10_VMWARE_OPCODE_IDIV);
11080   case TGSI_OPCODE_UMUL:
11081   case TGSI_OPCODE_UMOD:
11082   case TGSI_OPCODE_MOD:
11083      /* These cases use only the SECOND of two destination registers */
11084      return emit_simple_1dst(emit, inst, 2, 1);
11085
11086   /* Begin SM5 opcodes */
11087   case TGSI_OPCODE_DABS:
11088      return emit_dabs(emit, inst);
11089   case TGSI_OPCODE_DNEG:
11090      return emit_dneg(emit, inst);
11091   case TGSI_OPCODE_DRCP:
11092      return emit_simple(emit, inst);
11093   case TGSI_OPCODE_DSQRT:
11094      return emit_dsqrt(emit, inst);
11095   case TGSI_OPCODE_DMAD:
11096      return emit_dmad(emit, inst);
11097   case TGSI_OPCODE_DFRAC:
11098      return emit_vmware(emit, inst, VGPU10_VMWARE_OPCODE_DFRC);
11099   case TGSI_OPCODE_D2I:
11100   case TGSI_OPCODE_D2U:
11101      return emit_simple(emit, inst);
11102   case TGSI_OPCODE_I2D:
11103   case TGSI_OPCODE_U2D:
11104      return emit_simple(emit, inst);
11105   case TGSI_OPCODE_DRSQ:
11106      return emit_drsq(emit, &inst->Dst[0], &inst->Src[0]);
11107   case TGSI_OPCODE_DDIV:
11108      return emit_simple(emit, inst);
11109   case TGSI_OPCODE_INTERP_OFFSET:
11110      return emit_interp_offset(emit, inst);
11111   case TGSI_OPCODE_FMA:
11112   case TGSI_OPCODE_DFMA:
11113      return emit_simple(emit, inst);
11114
11115   case TGSI_OPCODE_DTRUNC:
11116      return emit_dtrunc(emit, inst);
11117
11118   /* The following opcodes should never be seen here.  We return zero
11119    * for all the PIPE_CAP_TGSI_DROUND_SUPPORTED, DFRACEXP_DLDEXP_SUPPORTED,
11120    * LDEXP_SUPPORTED queries.
11121    */
11122   case TGSI_OPCODE_LDEXP:
11123   case TGSI_OPCODE_DSSG:
11124   case TGSI_OPCODE_DFRACEXP:
11125   case TGSI_OPCODE_DLDEXP:
11126   case TGSI_OPCODE_DCEIL:
11127   case TGSI_OPCODE_DFLR:
11128      debug_printf("Unexpected TGSI opcode %s.  "
11129                   "Should have been translated away by the GLSL compiler.\n",
11130                   tgsi_get_opcode_name(opcode));
11131      return FALSE;
11132
11133   case TGSI_OPCODE_LOAD:
11134      return emit_load(emit, inst);
11135
11136   case TGSI_OPCODE_STORE:
11137      return emit_store(emit, inst);
11138
11139   case TGSI_OPCODE_ATOMAND:
11140      return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_AND);
11141
11142   case TGSI_OPCODE_ATOMCAS:
11143      return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_CMP_EXCH);
11144
11145   case TGSI_OPCODE_ATOMIMAX:
11146      return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_IMAX);
11147
11148   case TGSI_OPCODE_ATOMIMIN:
11149      return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_IMIN);
11150
11151   case TGSI_OPCODE_ATOMOR:
11152      return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_OR);
11153
11154   case TGSI_OPCODE_ATOMUADD:
11155      return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_IADD);
11156
11157   case TGSI_OPCODE_ATOMUMAX:
11158      return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_UMAX);
11159
11160   case TGSI_OPCODE_ATOMUMIN:
11161      return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_UMIN);
11162
11163   case TGSI_OPCODE_ATOMXCHG:
11164      return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_EXCH);
11165
11166   case TGSI_OPCODE_ATOMXOR:
11167      return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_XOR);
11168
11169   case TGSI_OPCODE_BARRIER:
11170      return emit_barrier(emit, inst);
11171
11172   case TGSI_OPCODE_MEMBAR:
11173      return emit_memory_barrier(emit, inst);
11174
11175   case TGSI_OPCODE_RESQ:
11176      return emit_resq(emit, inst);
11177
11178   case TGSI_OPCODE_END:
11179      if (!emit_post_helpers(emit))
11180         return FALSE;
11181      return emit_simple(emit, inst);
11182
11183   default:
11184      debug_printf("Unimplemented tgsi instruction %s\n",
11185                   tgsi_get_opcode_name(opcode));
11186      return FALSE;
11187   }
11188
11189   return TRUE;
11190}
11191
11192
11193/**
11194 * Translate a single TGSI instruction to VGPU10.
11195 */
11196static boolean
11197emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
11198                        unsigned inst_number,
11199                        const struct tgsi_full_instruction *inst)
11200{
11201   if (emit->skip_instruction)
11202      return TRUE;
11203
11204   boolean ret = TRUE;
11205   unsigned start_token = emit_get_num_tokens(emit);
11206
11207   emit->reemit_tgsi_instruction = FALSE;
11208
11209   ret = emit_instruction(emit, inst_number, inst);
11210
11211   if (emit->reemit_tgsi_instruction) {
11212      /**
11213       * Reset emit->ptr to where the translation of this tgsi instruction
11214       * started.
11215       */
11216      VGPU10OpcodeToken0 *tokens = (VGPU10OpcodeToken0 *) emit->buf;
11217      emit->ptr = (char *) (tokens + start_token);
11218
11219      emit->reemit_tgsi_instruction = FALSE;
11220   }
11221   return ret;
11222}
11223
11224
11225/**
11226 * Emit the extra instructions to adjust the vertex position.
11227 * There are two possible adjustments:
11228 * 1. Converting from Gallium to VGPU10 coordinate space by applying the
11229 *    "prescale" and "pretranslate" values.
11230 * 2. Undoing the viewport transformation when we use the swtnl/draw path.
11231 * \param vs_pos_tmp_index  which temporary register contains the vertex pos.
11232 */
11233static void
11234emit_vpos_instructions(struct svga_shader_emitter_v10 *emit)
11235{
11236   struct tgsi_full_src_register tmp_pos_src;
11237   struct tgsi_full_dst_register pos_dst;
11238   const unsigned vs_pos_tmp_index = emit->vposition.tmp_index;
11239
11240   /* Don't bother to emit any extra vertex instructions if vertex position is
11241    * not written out
11242    */
11243   if (emit->vposition.out_index == INVALID_INDEX)
11244      return;
11245
11246   /**
11247    * Reset the temporary vertex position register index
11248    * so that emit_dst_register() will use the real vertex position output
11249    */
11250   emit->vposition.tmp_index = INVALID_INDEX;
11251
11252   tmp_pos_src = make_src_temp_reg(vs_pos_tmp_index);
11253   pos_dst = make_dst_output_reg(emit->vposition.out_index);
11254
11255   /* If non-adjusted vertex position register index
11256    * is valid, copy the vertex position from the temporary
11257    * vertex position register before it is modified by the
11258    * prescale computation.
11259    */
11260   if (emit->vposition.so_index != INVALID_INDEX) {
11261      struct tgsi_full_dst_register pos_so_dst =
11262         make_dst_output_reg(emit->vposition.so_index);
11263
11264      /* MOV pos_so, tmp_pos */
11265      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_so_dst, &tmp_pos_src);
11266   }
11267
11268   if (emit->vposition.need_prescale) {
11269      /* This code adjusts the vertex position to match the VGPU10 convention.
11270       * If p is the position computed by the shader (usually by applying the
11271       * modelview and projection matrices), the new position q is computed by:
11272       *
11273       * q.x = p.w * trans.x + p.x * scale.x
11274       * q.y = p.w * trans.y + p.y * scale.y
11275       * q.z = p.w * trans.z + p.z * scale.z;
11276       * q.w = p.w * trans.w + p.w;
11277       */
11278      struct tgsi_full_src_register tmp_pos_src_w =
11279         scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W);
11280      struct tgsi_full_dst_register tmp_pos_dst =
11281         make_dst_temp_reg(vs_pos_tmp_index);
11282      struct tgsi_full_dst_register tmp_pos_dst_xyz =
11283         writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XYZ);
11284
11285      struct tgsi_full_src_register prescale_scale =
11286         make_src_temp_reg(emit->vposition.prescale_scale_index);
11287      struct tgsi_full_src_register prescale_trans =
11288         make_src_temp_reg(emit->vposition.prescale_trans_index);
11289
11290      /* MUL tmp_pos.xyz, tmp_pos, prescale.scale */
11291      emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xyz,
11292                           &tmp_pos_src, &prescale_scale);
11293
11294      /* MAD pos, tmp_pos.wwww, prescale.trans, tmp_pos */
11295      emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &pos_dst, &tmp_pos_src_w,
11296                           &prescale_trans, &tmp_pos_src);
11297   }
11298   else if (emit->key.vs.undo_viewport) {
11299      /* This code computes the final vertex position from the temporary
11300       * vertex position by undoing the viewport transformation and the
11301       * divide-by-W operation (we convert window coords back to clip coords).
11302       * This is needed when we use the 'draw' module for fallbacks.
11303       * If p is the temp pos in window coords, then the NDC coord q is:
11304       *   q.x = (p.x - vp.x_trans) / vp.x_scale * p.w
11305       *   q.y = (p.y - vp.y_trans) / vp.y_scale * p.w
11306       *   q.z = p.z * p.w
11307       *   q.w = p.w
11308       * CONST[vs_viewport_index] contains:
11309       *   { 1/vp.x_scale, 1/vp.y_scale, -vp.x_trans, -vp.y_trans }
11310       */
11311      struct tgsi_full_dst_register tmp_pos_dst =
11312         make_dst_temp_reg(vs_pos_tmp_index);
11313      struct tgsi_full_dst_register tmp_pos_dst_xy =
11314         writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XY);
11315      struct tgsi_full_src_register tmp_pos_src_wwww =
11316         scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W);
11317
11318      struct tgsi_full_dst_register pos_dst_xyz =
11319         writemask_dst(&pos_dst, TGSI_WRITEMASK_XYZ);
11320      struct tgsi_full_dst_register pos_dst_w =
11321         writemask_dst(&pos_dst, TGSI_WRITEMASK_W);
11322
11323      struct tgsi_full_src_register vp_xyzw =
11324         make_src_const_reg(emit->vs.viewport_index);
11325      struct tgsi_full_src_register vp_zwww =
11326         swizzle_src(&vp_xyzw, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W,
11327                     TGSI_SWIZZLE_W, TGSI_SWIZZLE_W);
11328
11329      /* ADD tmp_pos.xy, tmp_pos.xy, viewport.zwww */
11330      emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp_pos_dst_xy,
11331                           &tmp_pos_src, &vp_zwww);
11332
11333      /* MUL tmp_pos.xy, tmp_pos.xyzw, viewport.xyzy */
11334      emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xy,
11335                           &tmp_pos_src, &vp_xyzw);
11336
11337      /* MUL pos.xyz, tmp_pos.xyz, tmp_pos.www */
11338      emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &pos_dst_xyz,
11339                           &tmp_pos_src, &tmp_pos_src_wwww);
11340
11341      /* MOV pos.w, tmp_pos.w */
11342      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_dst_w, &tmp_pos_src);
11343   }
11344   else if (vs_pos_tmp_index != INVALID_INDEX) {
11345      /* This code is to handle the case where the temporary vertex
11346       * position register is created when the vertex shader has stream
11347       * output and prescale is disabled because rasterization is to be
11348       * discarded.
11349       */
11350      struct tgsi_full_dst_register pos_dst =
11351         make_dst_output_reg(emit->vposition.out_index);
11352
11353      /* MOV pos, tmp_pos */
11354      begin_emit_instruction(emit);
11355      emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
11356      emit_dst_register(emit, &pos_dst);
11357      emit_src_register(emit, &tmp_pos_src);
11358      end_emit_instruction(emit);
11359   }
11360
11361   /* Restore original vposition.tmp_index value for the next GS vertex.
11362    * It doesn't matter for VS.
11363    */
11364   emit->vposition.tmp_index = vs_pos_tmp_index;
11365}
11366
11367static void
11368emit_clipping_instructions(struct svga_shader_emitter_v10 *emit)
11369{
11370   if (emit->clip_mode == CLIP_DISTANCE) {
11371      /* Copy from copy distance temporary to CLIPDIST & the shadow copy */
11372      emit_clip_distance_instructions(emit);
11373
11374   } else if (emit->clip_mode == CLIP_VERTEX &&
11375              emit->key.last_vertex_stage) {
11376      /* Convert TGSI CLIPVERTEX to CLIPDIST */
11377      emit_clip_vertex_instructions(emit);
11378   }
11379
11380   /**
11381    * Emit vertex position and take care of legacy user planes only if
11382    * there is a valid vertex position register index.
11383    * This is to take care of the case
11384    * where the shader doesn't output vertex position. Then in
11385    * this case, don't bother to emit more vertex instructions.
11386    */
11387   if (emit->vposition.out_index == INVALID_INDEX)
11388      return;
11389
11390   /**
11391    * Emit per-vertex clipping instructions for legacy user defined clip planes.
11392    * NOTE: we must emit the clip distance instructions before the
11393    * emit_vpos_instructions() call since the later function will change
11394    * the TEMP[vs_pos_tmp_index] value.
11395    */
11396   if (emit->clip_mode == CLIP_LEGACY && emit->key.last_vertex_stage) {
11397      /* Emit CLIPDIST for legacy user defined clip planes */
11398      emit_clip_distance_from_vpos(emit, emit->vposition.tmp_index);
11399   }
11400}
11401
11402
11403/**
11404 * Emit extra per-vertex instructions.  This includes clip-coordinate
11405 * space conversion and computing clip distances.  This is called for
11406 * each GS emit-vertex instruction and at the end of VS translation.
11407 */
11408static void
11409emit_vertex_instructions(struct svga_shader_emitter_v10 *emit)
11410{
11411   /* Emit clipping instructions based on clipping mode */
11412   emit_clipping_instructions(emit);
11413
11414   /* Emit vertex position instructions */
11415   emit_vpos_instructions(emit);
11416}
11417
11418
11419/**
11420 * Translate the TGSI_OPCODE_EMIT GS instruction.
11421 */
11422static boolean
11423emit_vertex(struct svga_shader_emitter_v10 *emit,
11424            const struct tgsi_full_instruction *inst)
11425{
11426   unsigned ret = TRUE;
11427
11428   assert(emit->unit == PIPE_SHADER_GEOMETRY);
11429
11430   /**
11431    * Emit the viewport array index for the first vertex.
11432    */
11433   if (emit->gs.viewport_index_out_index != INVALID_INDEX) {
11434      struct tgsi_full_dst_register viewport_index_out =
11435         make_dst_output_reg(emit->gs.viewport_index_out_index);
11436      struct tgsi_full_dst_register viewport_index_out_x =
11437         writemask_dst(&viewport_index_out, TGSI_WRITEMASK_X);
11438      struct tgsi_full_src_register viewport_index_tmp =
11439         make_src_temp_reg(emit->gs.viewport_index_tmp_index);
11440
11441      /* Set the out index to INVALID_INDEX, so it will not
11442       * be assigned to a temp again in emit_dst_register, and
11443       * the viewport index will not be assigned again in the
11444       * subsequent vertices.
11445       */
11446      emit->gs.viewport_index_out_index = INVALID_INDEX;
11447      emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
11448                           &viewport_index_out_x, &viewport_index_tmp);
11449   }
11450
11451   /**
11452    * Find the stream index associated with this emit vertex instruction.
11453    */
11454   assert(inst->Src[0].Register.File == TGSI_FILE_IMMEDIATE);
11455   unsigned streamIndex = find_stream_index(emit, &inst->Src[0]);
11456
11457   /**
11458    * According to the ARB_gpu_shader5 spec, the built-in geometry shader
11459    * outputs are always associated with vertex stream zero.
11460    * So emit the extra vertex instructions for position or clip distance
11461    * for stream zero only.
11462    */
11463   if (streamIndex == 0) {
11464      /**
11465       * Before emitting vertex instructions, emit the temporaries for
11466       * the prescale constants based on the viewport index if needed.
11467       */
11468      if (emit->vposition.need_prescale && !emit->vposition.have_prescale)
11469         emit_temp_prescale_instructions(emit);
11470
11471      emit_vertex_instructions(emit);
11472   }
11473
11474   begin_emit_instruction(emit);
11475   if (emit->version >= 50) {
11476      if (emit->info.num_stream_output_components[streamIndex] == 0) {
11477         /**
11478          * If there is no output for this stream, discard this instruction.
11479          */
11480         emit->discard_instruction = TRUE;
11481      }
11482      else {
11483         emit_opcode(emit, VGPU10_OPCODE_EMIT_STREAM, FALSE);
11484         emit_stream_register(emit, streamIndex);
11485      }
11486   }
11487   else {
11488      emit_opcode(emit, VGPU10_OPCODE_EMIT, FALSE);
11489   }
11490   end_emit_instruction(emit);
11491
11492   return ret;
11493}
11494
11495
11496/**
11497 * Emit the extra code to convert from VGPU10's boolean front-face
11498 * register to TGSI's signed front-face register.
11499 *
11500 * TODO: Make temporary front-face register a scalar.
11501 */
11502static void
11503emit_frontface_instructions(struct svga_shader_emitter_v10 *emit)
11504{
11505   assert(emit->unit == PIPE_SHADER_FRAGMENT);
11506
11507   if (emit->fs.face_input_index != INVALID_INDEX) {
11508      /* convert vgpu10 boolean face register to gallium +/-1 value */
11509      struct tgsi_full_dst_register tmp_dst =
11510         make_dst_temp_reg(emit->fs.face_tmp_index);
11511      struct tgsi_full_src_register one =
11512         make_immediate_reg_float(emit, 1.0f);
11513      struct tgsi_full_src_register neg_one =
11514         make_immediate_reg_float(emit, -1.0f);
11515
11516      /* MOVC face_tmp, IS_FRONT_FACE.x, 1.0, -1.0 */
11517      begin_emit_instruction(emit);
11518      emit_opcode(emit, VGPU10_OPCODE_MOVC, FALSE);
11519      emit_dst_register(emit, &tmp_dst);
11520      emit_face_register(emit);
11521      emit_src_register(emit, &one);
11522      emit_src_register(emit, &neg_one);
11523      end_emit_instruction(emit);
11524   }
11525}
11526
11527
11528/**
11529 * Emit the extra code to convert from VGPU10's fragcoord.w value to 1/w.
11530 */
11531static void
11532emit_fragcoord_instructions(struct svga_shader_emitter_v10 *emit)
11533{
11534   assert(emit->unit == PIPE_SHADER_FRAGMENT);
11535
11536   if (emit->fs.fragcoord_input_index != INVALID_INDEX) {
11537      struct tgsi_full_dst_register tmp_dst =
11538         make_dst_temp_reg(emit->fs.fragcoord_tmp_index);
11539      struct tgsi_full_dst_register tmp_dst_xyz =
11540         writemask_dst(&tmp_dst, TGSI_WRITEMASK_XYZ);
11541      struct tgsi_full_dst_register tmp_dst_w =
11542         writemask_dst(&tmp_dst, TGSI_WRITEMASK_W);
11543      struct tgsi_full_src_register one =
11544         make_immediate_reg_float(emit, 1.0f);
11545      struct tgsi_full_src_register fragcoord =
11546         make_src_reg(TGSI_FILE_INPUT, emit->fs.fragcoord_input_index);
11547
11548      /* save the input index */
11549      unsigned fragcoord_input_index = emit->fs.fragcoord_input_index;
11550      /* set to invalid to prevent substitution in emit_src_register() */
11551      emit->fs.fragcoord_input_index = INVALID_INDEX;
11552
11553      /* MOV fragcoord_tmp.xyz, fragcoord.xyz */
11554      begin_emit_instruction(emit);
11555      emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
11556      emit_dst_register(emit, &tmp_dst_xyz);
11557      emit_src_register(emit, &fragcoord);
11558      end_emit_instruction(emit);
11559
11560      /* DIV fragcoord_tmp.w, 1.0, fragcoord.w */
11561      begin_emit_instruction(emit);
11562      emit_opcode(emit, VGPU10_OPCODE_DIV, FALSE);
11563      emit_dst_register(emit, &tmp_dst_w);
11564      emit_src_register(emit, &one);
11565      emit_src_register(emit, &fragcoord);
11566      end_emit_instruction(emit);
11567
11568      /* restore saved value */
11569      emit->fs.fragcoord_input_index = fragcoord_input_index;
11570   }
11571}
11572
11573
11574/**
11575 * Emit the extra code to get the current sample position value and
11576 * put it into a temp register.
11577 */
11578static void
11579emit_sample_position_instructions(struct svga_shader_emitter_v10 *emit)
11580{
11581   assert(emit->unit == PIPE_SHADER_FRAGMENT);
11582
11583   if (emit->fs.sample_pos_sys_index != INVALID_INDEX) {
11584      assert(emit->version >= 41);
11585
11586      struct tgsi_full_dst_register tmp_dst =
11587         make_dst_temp_reg(emit->fs.sample_pos_tmp_index);
11588      struct tgsi_full_src_register half =
11589         make_immediate_reg_float4(emit, 0.5, 0.5, 0.0, 0.0);
11590
11591      struct tgsi_full_src_register tmp_src =
11592         make_src_temp_reg(emit->fs.sample_pos_tmp_index);
11593      struct tgsi_full_src_register sample_index_reg =
11594         make_src_scalar_reg(TGSI_FILE_SYSTEM_VALUE,
11595                             emit->fs.sample_id_sys_index, TGSI_SWIZZLE_X);
11596
11597      /* The first src register is a shader resource (if we want a
11598       * multisampled resource sample position) or the rasterizer register
11599       * (if we want the current sample position in the color buffer).  We
11600       * want the later.
11601       */
11602
11603      /* SAMPLE_POS dst, RASTERIZER, sampleIndex */
11604      begin_emit_instruction(emit);
11605      emit_opcode(emit, VGPU10_OPCODE_SAMPLE_POS, FALSE);
11606      emit_dst_register(emit, &tmp_dst);
11607      emit_rasterizer_register(emit);
11608      emit_src_register(emit, &sample_index_reg);
11609      end_emit_instruction(emit);
11610
11611      /* Convert from D3D coords to GL coords by adding 0.5 bias */
11612      /* ADD dst, dst, half */
11613      begin_emit_instruction(emit);
11614      emit_opcode(emit, VGPU10_OPCODE_ADD, FALSE);
11615      emit_dst_register(emit, &tmp_dst);
11616      emit_src_register(emit, &tmp_src);
11617      emit_src_register(emit, &half);
11618      end_emit_instruction(emit);
11619   }
11620}
11621
11622
11623/**
11624 * Emit extra instructions to adjust VS inputs/attributes.  This can
11625 * mean casting a vertex attribute from int to float or setting the
11626 * W component to 1, or both.
11627 */
11628static void
11629emit_vertex_attrib_instructions(struct svga_shader_emitter_v10 *emit)
11630{
11631   const unsigned save_w_1_mask = emit->key.vs.adjust_attrib_w_1;
11632   const unsigned save_itof_mask = emit->key.vs.adjust_attrib_itof;
11633   const unsigned save_utof_mask = emit->key.vs.adjust_attrib_utof;
11634   const unsigned save_is_bgra_mask = emit->key.vs.attrib_is_bgra;
11635   const unsigned save_puint_to_snorm_mask = emit->key.vs.attrib_puint_to_snorm;
11636   const unsigned save_puint_to_uscaled_mask = emit->key.vs.attrib_puint_to_uscaled;
11637   const unsigned save_puint_to_sscaled_mask = emit->key.vs.attrib_puint_to_sscaled;
11638
11639   unsigned adjust_mask = (save_w_1_mask |
11640                           save_itof_mask |
11641                           save_utof_mask |
11642                           save_is_bgra_mask |
11643                           save_puint_to_snorm_mask |
11644                           save_puint_to_uscaled_mask |
11645                           save_puint_to_sscaled_mask);
11646
11647   assert(emit->unit == PIPE_SHADER_VERTEX);
11648
11649   if (adjust_mask) {
11650      struct tgsi_full_src_register one =
11651         make_immediate_reg_float(emit, 1.0f);
11652
11653      struct tgsi_full_src_register one_int =
11654         make_immediate_reg_int(emit, 1);
11655
11656      /* We need to turn off these bitmasks while emitting the
11657       * instructions below, then restore them afterward.
11658       */
11659      emit->key.vs.adjust_attrib_w_1 = 0;
11660      emit->key.vs.adjust_attrib_itof = 0;
11661      emit->key.vs.adjust_attrib_utof = 0;
11662      emit->key.vs.attrib_is_bgra = 0;
11663      emit->key.vs.attrib_puint_to_snorm = 0;
11664      emit->key.vs.attrib_puint_to_uscaled = 0;
11665      emit->key.vs.attrib_puint_to_sscaled = 0;
11666
11667      while (adjust_mask) {
11668         unsigned index = u_bit_scan(&adjust_mask);
11669
11670         /* skip the instruction if this vertex attribute is not being used */
11671         if (emit->info.input_usage_mask[index] == 0)
11672            continue;
11673
11674         unsigned tmp = emit->vs.adjusted_input[index];
11675         struct tgsi_full_src_register input_src =
11676            make_src_reg(TGSI_FILE_INPUT, index);
11677
11678         struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
11679         struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
11680         struct tgsi_full_dst_register tmp_dst_w =
11681            writemask_dst(&tmp_dst, TGSI_WRITEMASK_W);
11682
11683         /* ITOF/UTOF/MOV tmp, input[index] */
11684         if (save_itof_mask & (1 << index)) {
11685            emit_instruction_op1(emit, VGPU10_OPCODE_ITOF,
11686                                 &tmp_dst, &input_src);
11687         }
11688         else if (save_utof_mask & (1 << index)) {
11689            emit_instruction_op1(emit, VGPU10_OPCODE_UTOF,
11690                                 &tmp_dst, &input_src);
11691         }
11692         else if (save_puint_to_snorm_mask & (1 << index)) {
11693            emit_puint_to_snorm(emit, &tmp_dst, &input_src);
11694         }
11695         else if (save_puint_to_uscaled_mask & (1 << index)) {
11696            emit_puint_to_uscaled(emit, &tmp_dst, &input_src);
11697         }
11698         else if (save_puint_to_sscaled_mask & (1 << index)) {
11699            emit_puint_to_sscaled(emit, &tmp_dst, &input_src);
11700         }
11701         else {
11702            assert((save_w_1_mask | save_is_bgra_mask) & (1 << index));
11703            emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
11704                                 &tmp_dst, &input_src);
11705         }
11706
11707         if (save_is_bgra_mask & (1 << index)) {
11708            emit_swap_r_b(emit, &tmp_dst, &tmp_src);
11709         }
11710
11711         if (save_w_1_mask & (1 << index)) {
11712            /* MOV tmp.w, 1.0 */
11713            if (emit->key.vs.attrib_is_pure_int & (1 << index)) {
11714               emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
11715                                    &tmp_dst_w, &one_int);
11716            }
11717            else {
11718               emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
11719                                    &tmp_dst_w, &one);
11720            }
11721         }
11722      }
11723
11724      emit->key.vs.adjust_attrib_w_1 = save_w_1_mask;
11725      emit->key.vs.adjust_attrib_itof = save_itof_mask;
11726      emit->key.vs.adjust_attrib_utof = save_utof_mask;
11727      emit->key.vs.attrib_is_bgra = save_is_bgra_mask;
11728      emit->key.vs.attrib_puint_to_snorm = save_puint_to_snorm_mask;
11729      emit->key.vs.attrib_puint_to_uscaled = save_puint_to_uscaled_mask;
11730      emit->key.vs.attrib_puint_to_sscaled = save_puint_to_sscaled_mask;
11731   }
11732}
11733
11734
11735/* Find zero-value immedate for default layer index */
11736static void
11737emit_default_layer_instructions(struct svga_shader_emitter_v10 *emit)
11738{
11739   assert(emit->unit == PIPE_SHADER_FRAGMENT);
11740
11741   /* immediate for default layer index 0 */
11742   if (emit->fs.layer_input_index != INVALID_INDEX) {
11743      union tgsi_immediate_data imm;
11744      imm.Int = 0;
11745      emit->fs.layer_imm_index = find_immediate(emit, imm, 0);
11746   }
11747}
11748
11749
11750static void
11751emit_temp_prescale_from_cbuf(struct svga_shader_emitter_v10 *emit,
11752                             unsigned cbuf_index,
11753                             struct tgsi_full_dst_register *scale,
11754                             struct tgsi_full_dst_register *translate)
11755{
11756   struct tgsi_full_src_register scale_cbuf = make_src_const_reg(cbuf_index);
11757   struct tgsi_full_src_register trans_cbuf = make_src_const_reg(cbuf_index+1);
11758
11759   emit_instruction_op1(emit, VGPU10_OPCODE_MOV, scale, &scale_cbuf);
11760   emit_instruction_op1(emit, VGPU10_OPCODE_MOV, translate, &trans_cbuf);
11761}
11762
11763
11764/**
11765 * A recursive helper function to find the prescale from the constant buffer
11766 */
11767static void
11768find_prescale_from_cbuf(struct svga_shader_emitter_v10 *emit,
11769                        unsigned index, unsigned num_prescale,
11770                        struct tgsi_full_src_register *vp_index,
11771                        struct tgsi_full_dst_register *scale,
11772                        struct tgsi_full_dst_register *translate,
11773                        struct tgsi_full_src_register *tmp_src,
11774                        struct tgsi_full_dst_register *tmp_dst)
11775{
11776   if (num_prescale == 0)
11777      return;
11778
11779   if (index > 0) {
11780      /* ELSE */
11781      emit_instruction_op0(emit, VGPU10_OPCODE_ELSE);
11782   }
11783
11784   struct tgsi_full_src_register index_src =
11785	                            make_immediate_reg_int(emit, index);
11786
11787   if (index == 0) {
11788      /* GE tmp, vp_index, index */
11789      emit_instruction_op2(emit, VGPU10_OPCODE_GE, tmp_dst,
11790                           vp_index, &index_src);
11791   } else {
11792      /* EQ tmp, vp_index, index */
11793      emit_instruction_op2(emit, VGPU10_OPCODE_EQ, tmp_dst,
11794                           vp_index, &index_src);
11795   }
11796
11797   /* IF tmp */
11798   emit_if(emit, tmp_src);
11799   emit_temp_prescale_from_cbuf(emit,
11800                                emit->vposition.prescale_cbuf_index + 2 * index,
11801                                scale, translate);
11802
11803   find_prescale_from_cbuf(emit, index+1, num_prescale-1,
11804                           vp_index, scale, translate,
11805                           tmp_src, tmp_dst);
11806
11807   /* ENDIF */
11808   emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF);
11809}
11810
11811
11812/**
11813 * This helper function emits instructions to set the prescale
11814 * and translate temporaries to the correct constants from the
11815 * constant buffer according to the designated viewport.
11816 */
11817static void
11818emit_temp_prescale_instructions(struct svga_shader_emitter_v10 *emit)
11819{
11820   struct tgsi_full_dst_register prescale_scale =
11821         make_dst_temp_reg(emit->vposition.prescale_scale_index);
11822   struct tgsi_full_dst_register prescale_translate =
11823         make_dst_temp_reg(emit->vposition.prescale_trans_index);
11824
11825   unsigned prescale_cbuf_index = emit->vposition.prescale_cbuf_index;
11826
11827   if (emit->vposition.num_prescale == 1) {
11828      emit_temp_prescale_from_cbuf(emit,
11829                                   prescale_cbuf_index,
11830                                   &prescale_scale, &prescale_translate);
11831   } else {
11832      /**
11833       * Since SM5 device does not support dynamic indexing, we need
11834       * to do the if-else to find the prescale constants for the
11835       * specified viewport.
11836       */
11837      struct tgsi_full_src_register vp_index_src =
11838         make_src_temp_reg(emit->gs.viewport_index_tmp_index);
11839
11840      struct tgsi_full_src_register vp_index_src_x =
11841         scalar_src(&vp_index_src, TGSI_SWIZZLE_X);
11842
11843      unsigned tmp = get_temp_index(emit);
11844      struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
11845      struct tgsi_full_src_register tmp_src_x =
11846                scalar_src(&tmp_src, TGSI_SWIZZLE_X);
11847      struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
11848
11849      find_prescale_from_cbuf(emit, 0, emit->vposition.num_prescale,
11850                              &vp_index_src_x,
11851		              &prescale_scale, &prescale_translate,
11852                              &tmp_src_x, &tmp_dst);
11853   }
11854
11855   /* Mark prescale temporaries are emitted */
11856   emit->vposition.have_prescale = 1;
11857}
11858
11859
11860/**
11861 * A helper function to emit an instruction in a vertex shader to add a bias
11862 * to the VertexID system value. This patches the VertexID in the SVGA vertex
11863 * shader to include the base vertex of an indexed primitive or the start index
11864 * of a non-indexed primitive.
11865 */
11866static void
11867emit_vertex_id_nobase_instruction(struct svga_shader_emitter_v10 *emit)
11868{
11869   struct tgsi_full_src_register vertex_id_bias_index =
11870      make_src_const_reg(emit->vs.vertex_id_bias_index);
11871   struct tgsi_full_src_register vertex_id_sys_src =
11872      make_src_reg(TGSI_FILE_SYSTEM_VALUE, emit->vs.vertex_id_sys_index);
11873   struct tgsi_full_src_register vertex_id_sys_src_x =
11874      scalar_src(&vertex_id_sys_src, TGSI_SWIZZLE_X);
11875   struct tgsi_full_dst_register vertex_id_tmp_dst =
11876      make_dst_temp_reg(emit->vs.vertex_id_tmp_index);
11877
11878   /* IADD vertex_id_tmp, vertex_id_sys, vertex_id_bias */
11879   unsigned vertex_id_tmp_index = emit->vs.vertex_id_tmp_index;
11880   emit->vs.vertex_id_tmp_index = INVALID_INDEX;
11881   emit_instruction_opn(emit, VGPU10_OPCODE_IADD, &vertex_id_tmp_dst,
11882                        &vertex_id_sys_src_x, &vertex_id_bias_index, NULL, FALSE,
11883                        FALSE);
11884   emit->vs.vertex_id_tmp_index = vertex_id_tmp_index;
11885}
11886
11887/**
11888 * Hull Shader must have control point outputs. But tessellation
11889 * control shader can return without writing to control point output.
11890 * In this case, the control point output is assumed to be passthrough
11891 * from the control point input.
11892 * This helper function is to write out a control point output first in case
11893 * the tessellation control shader returns before writing a
11894 * control point output.
11895 */
11896static void
11897emit_tcs_default_control_point_output(struct svga_shader_emitter_v10 *emit)
11898{
11899   assert(emit->unit == PIPE_SHADER_TESS_CTRL);
11900   assert(emit->tcs.control_point_phase);
11901   assert(emit->tcs.control_point_out_index != INVALID_INDEX);
11902   assert(emit->tcs.invocation_id_sys_index != INVALID_INDEX);
11903
11904   struct tgsi_full_dst_register output_control_point;
11905   output_control_point =
11906      make_dst_output_reg(emit->tcs.control_point_out_index);
11907
11908   if (emit->tcs.control_point_input_index == INVALID_INDEX) {
11909      /* MOV OUTPUT 0.0f */
11910      struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
11911      begin_emit_instruction(emit);
11912      emit_opcode_precise(emit, VGPU10_OPCODE_MOV, FALSE, FALSE);
11913      emit_dst_register(emit, &output_control_point);
11914      emit_src_register(emit, &zero);
11915      end_emit_instruction(emit);
11916   }
11917   else {
11918      /* UARL ADDR[INDEX].x INVOCATION.xxxx */
11919
11920      struct tgsi_full_src_register invocation_src;
11921      struct tgsi_full_dst_register addr_dst;
11922      struct tgsi_full_dst_register addr_dst_x;
11923      unsigned addr_tmp;
11924
11925      addr_tmp = emit->address_reg_index[emit->tcs.control_point_addr_index];
11926      addr_dst = make_dst_temp_reg(addr_tmp);
11927      addr_dst_x = writemask_dst(&addr_dst, TGSI_WRITEMASK_X);
11928
11929      invocation_src = make_src_reg(TGSI_FILE_SYSTEM_VALUE,
11930                                    emit->tcs.invocation_id_sys_index);
11931
11932      begin_emit_instruction(emit);
11933      emit_opcode_precise(emit, VGPU10_OPCODE_MOV, FALSE, FALSE);
11934      emit_dst_register(emit, &addr_dst_x);
11935      emit_src_register(emit, &invocation_src);
11936      end_emit_instruction(emit);
11937
11938
11939      /* MOV OUTPUT INPUT[ADDR[INDEX].x][POSITION] */
11940
11941      struct tgsi_full_src_register input_control_point;
11942      input_control_point = make_src_reg(TGSI_FILE_INPUT,
11943                                         emit->tcs.control_point_input_index);
11944      input_control_point.Register.Dimension = 1;
11945      input_control_point.Dimension.Indirect = 1;
11946      input_control_point.DimIndirect.File = TGSI_FILE_ADDRESS;
11947      input_control_point.DimIndirect.Index =
11948         emit->tcs.control_point_addr_index;
11949
11950      begin_emit_instruction(emit);
11951      emit_opcode_precise(emit, VGPU10_OPCODE_MOV, FALSE, FALSE);
11952      emit_dst_register(emit, &output_control_point);
11953      emit_src_register(emit, &input_control_point);
11954      end_emit_instruction(emit);
11955   }
11956}
11957
11958/**
11959 * This functions constructs temporary tessfactor from VGPU10*_TESSFACTOR
11960 * values in domain shader. SM5 has tessfactors as floating point values where
11961 * as tgsi emit them as vector. This function allows to construct temp
11962 * tessfactor vector similar to TGSI_SEMANTIC_TESSINNER/OUTER filled with
11963 * values from VGPU10*_TESSFACTOR. Use this constructed vector whenever
11964 * TGSI_SEMANTIC_TESSINNER/OUTER is used in shader.
11965 */
11966static void
11967emit_temp_tessfactor_instructions(struct svga_shader_emitter_v10 *emit)
11968{
11969   struct tgsi_full_src_register src;
11970   struct tgsi_full_dst_register dst;
11971
11972   if (emit->tes.inner.tgsi_index != INVALID_INDEX) {
11973      dst = make_dst_temp_reg(emit->tes.inner.temp_index);
11974
11975      switch (emit->tes.prim_mode) {
11976      case PIPE_PRIM_QUADS:
11977         src = make_src_scalar_reg(TGSI_FILE_INPUT,
11978                  emit->tes.inner.in_index + 1, TGSI_SWIZZLE_X);
11979         dst = writemask_dst(&dst, TGSI_WRITEMASK_Y);
11980         emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
11981         FALLTHROUGH;
11982      case PIPE_PRIM_TRIANGLES:
11983         src = make_src_scalar_reg(TGSI_FILE_INPUT,
11984                  emit->tes.inner.in_index, TGSI_SWIZZLE_X);
11985         dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
11986         emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
11987         break;
11988      case PIPE_PRIM_LINES:
11989         /**
11990          * As per SM5 spec, InsideTessFactor for isolines are unused.
11991          * In fact glsl tessInnerLevel for isolines doesn't mean anything but if
11992          * any application try to read tessInnerLevel in TES when primitive type
11993          * is isolines, then instead of driver throwing segfault for accesing it,
11994          * return atleast vec(1.0f)
11995          */
11996         src = make_immediate_reg_float(emit, 1.0f);
11997         emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
11998         break;
11999      default:
12000         break;
12001      }
12002   }
12003
12004   if (emit->tes.outer.tgsi_index != INVALID_INDEX) {
12005      dst = make_dst_temp_reg(emit->tes.outer.temp_index);
12006
12007      switch (emit->tes.prim_mode) {
12008      case PIPE_PRIM_QUADS:
12009         src = make_src_scalar_reg(TGSI_FILE_INPUT,
12010                  emit->tes.outer.in_index + 3, TGSI_SWIZZLE_X);
12011         dst = writemask_dst(&dst, TGSI_WRITEMASK_W);
12012         emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
12013         FALLTHROUGH;
12014      case PIPE_PRIM_TRIANGLES:
12015         src = make_src_scalar_reg(TGSI_FILE_INPUT,
12016                  emit->tes.outer.in_index + 2, TGSI_SWIZZLE_X);
12017         dst = writemask_dst(&dst, TGSI_WRITEMASK_Z);
12018         emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
12019         FALLTHROUGH;
12020      case PIPE_PRIM_LINES:
12021         src = make_src_scalar_reg(TGSI_FILE_INPUT,
12022                  emit->tes.outer.in_index + 1, TGSI_SWIZZLE_X);
12023         dst = writemask_dst(&dst, TGSI_WRITEMASK_Y);
12024         emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
12025
12026         src = make_src_scalar_reg(TGSI_FILE_INPUT,
12027                  emit->tes.outer.in_index , TGSI_SWIZZLE_X);
12028         dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
12029         emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
12030
12031         break;
12032      default:
12033         break;
12034      }
12035   }
12036}
12037
12038
12039static void
12040emit_initialize_temp_instruction(struct svga_shader_emitter_v10 *emit)
12041{
12042   struct tgsi_full_src_register src;
12043   struct tgsi_full_dst_register dst;
12044   unsigned vgpu10_temp_index = remap_temp_index(emit, TGSI_FILE_TEMPORARY,
12045                                                 emit->initialize_temp_index);
12046   src = make_immediate_reg_float(emit, 0.0f);
12047   dst = make_dst_temp_reg(vgpu10_temp_index);
12048   emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
12049   emit->temp_map[emit->initialize_temp_index].initialized = TRUE;
12050   emit->initialize_temp_index = INVALID_INDEX;
12051}
12052
12053
12054/**
12055 * Emit any extra/helper declarations/code that we might need between
12056 * the declaration section and code section.
12057 */
12058static boolean
12059emit_pre_helpers(struct svga_shader_emitter_v10 *emit)
12060{
12061   /* Properties */
12062   if (emit->unit == PIPE_SHADER_GEOMETRY)
12063      emit_property_instructions(emit);
12064   else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
12065      emit_hull_shader_declarations(emit);
12066
12067      /* Save the position of the first instruction token so that we can
12068       * do a second pass of the instructions for the patch constant phase.
12069       */
12070      emit->tcs.instruction_token_pos = emit->cur_tgsi_token;
12071      emit->tcs.fork_phase_add_signature = FALSE;
12072
12073      if (!emit_hull_shader_control_point_phase(emit)) {
12074         emit->skip_instruction = TRUE;
12075         return TRUE;
12076      }
12077
12078      /* Set the current tcs phase to control point phase */
12079      emit->tcs.control_point_phase = TRUE;
12080   }
12081   else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
12082      emit_domain_shader_declarations(emit);
12083   }
12084   else if (emit->unit == PIPE_SHADER_COMPUTE) {
12085      emit_compute_shader_declarations(emit);
12086   }
12087
12088   /* Declare inputs */
12089   if (!emit_input_declarations(emit))
12090      return FALSE;
12091
12092   /* Declare outputs */
12093   if (!emit_output_declarations(emit))
12094      return FALSE;
12095
12096   /* Declare temporary registers */
12097   emit_temporaries_declaration(emit);
12098
12099   /* For PIPE_SHADER_TESS_CTRL, constants, samplers, resources and immediates
12100    * will already be declared in hs_decls (emit_hull_shader_declarations)
12101    */
12102   if (emit->unit != PIPE_SHADER_TESS_CTRL) {
12103
12104      alloc_common_immediates(emit);
12105
12106      /* Declare constant registers */
12107      emit_constant_declaration(emit);
12108
12109      /* Declare samplers and resources */
12110      emit_sampler_declarations(emit);
12111      emit_resource_declarations(emit);
12112
12113      /* Declare images */
12114      emit_image_declarations(emit);
12115
12116      /* Declare shader buffers */
12117      emit_shader_buf_declarations(emit);
12118
12119      /* Declare atomic buffers */
12120      emit_atomic_buf_declarations(emit);
12121   }
12122
12123   if (emit->unit != PIPE_SHADER_FRAGMENT &&
12124       emit->unit != PIPE_SHADER_COMPUTE) {
12125      /*
12126       * Declare clip distance output registers for ClipVertex or
12127       * user defined planes
12128       */
12129      emit_clip_distance_declarations(emit);
12130   }
12131
12132   if (emit->unit == PIPE_SHADER_COMPUTE) {
12133      emit_memory_declarations(emit);
12134
12135      if (emit->cs.grid_size.tgsi_index != INVALID_INDEX) {
12136         emit->cs.grid_size.imm_index =
12137            alloc_immediate_int4(emit,
12138                                 emit->key.cs.grid_size[0],
12139                                 emit->key.cs.grid_size[1],
12140                                 emit->key.cs.grid_size[2], 0);
12141      }
12142   }
12143
12144   if (emit->unit == PIPE_SHADER_FRAGMENT &&
12145       emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) {
12146      float alpha = emit->key.fs.alpha_ref;
12147      emit->fs.alpha_ref_index =
12148         alloc_immediate_float4(emit, alpha, alpha, alpha, alpha);
12149   }
12150
12151   if (emit->unit != PIPE_SHADER_TESS_CTRL) {
12152      /**
12153       * For PIPE_SHADER_TESS_CTRL, immediates are already declared in
12154       * hs_decls
12155       */
12156      emit_vgpu10_immediates_block(emit);
12157   }
12158   else {
12159      emit_tcs_default_control_point_output(emit);
12160   }
12161
12162   if (emit->unit == PIPE_SHADER_FRAGMENT) {
12163      emit_frontface_instructions(emit);
12164      emit_fragcoord_instructions(emit);
12165      emit_sample_position_instructions(emit);
12166      emit_default_layer_instructions(emit);
12167   }
12168   else if (emit->unit == PIPE_SHADER_VERTEX) {
12169      emit_vertex_attrib_instructions(emit);
12170
12171      if (emit->info.uses_vertexid)
12172         emit_vertex_id_nobase_instruction(emit);
12173   }
12174   else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
12175      emit_temp_tessfactor_instructions(emit);
12176   }
12177
12178   /**
12179    * For geometry shader that writes to viewport index, the prescale
12180    * temporaries will be done at the first vertex emission.
12181    */
12182   if (emit->vposition.need_prescale && emit->vposition.num_prescale == 1)
12183      emit_temp_prescale_instructions(emit);
12184
12185   return TRUE;
12186}
12187
12188
12189/**
12190 * The device has no direct support for the pipe_blend_state::alpha_to_one
12191 * option so we implement it here with shader code.
12192 *
12193 * Note that this is kind of pointless, actually.  Here we're clobbering
12194 * the alpha value with 1.0.  So if alpha-to-coverage is enabled, we'll wind
12195 * up with 100% coverage.  That's almost certainly not what the user wants.
12196 * The work-around is to add extra shader code to compute coverage from alpha
12197 * and write it to the coverage output register (if the user's shader doesn't
12198 * do so already).  We'll probably do that in the future.
12199 */
12200static void
12201emit_alpha_to_one_instructions(struct svga_shader_emitter_v10 *emit,
12202                               unsigned fs_color_tmp_index)
12203{
12204   struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
12205   unsigned i;
12206
12207   /* Note: it's not 100% clear from the spec if we're supposed to clobber
12208    * the alpha for all render targets.  But that's what NVIDIA does and
12209    * that's what Piglit tests.
12210    */
12211   for (i = 0; i < emit->fs.num_color_outputs; i++) {
12212      struct tgsi_full_dst_register color_dst;
12213
12214      if (fs_color_tmp_index != INVALID_INDEX && i == 0) {
12215         /* write to the temp color register */
12216         color_dst = make_dst_temp_reg(fs_color_tmp_index);
12217      }
12218      else {
12219         /* write directly to the color[i] output */
12220         color_dst = make_dst_output_reg(emit->fs.color_out_index[i]);
12221      }
12222
12223      color_dst = writemask_dst(&color_dst, TGSI_WRITEMASK_W);
12224
12225      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &one);
12226   }
12227}
12228
12229
12230/**
12231 * Emit alpha test code.  This compares TEMP[fs_color_tmp_index].w
12232 * against the alpha reference value and discards the fragment if the
12233 * comparison fails.
12234 */
12235static void
12236emit_alpha_test_instructions(struct svga_shader_emitter_v10 *emit,
12237                             unsigned fs_color_tmp_index)
12238{
12239   /* compare output color's alpha to alpha ref and discard if comparison
12240    * fails.
12241    */
12242   unsigned tmp = get_temp_index(emit);
12243   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
12244   struct tgsi_full_src_register tmp_src_x =
12245      scalar_src(&tmp_src, TGSI_SWIZZLE_X);
12246   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
12247   struct tgsi_full_src_register color_src =
12248      make_src_temp_reg(fs_color_tmp_index);
12249   struct tgsi_full_src_register color_src_w =
12250      scalar_src(&color_src, TGSI_SWIZZLE_W);
12251   struct tgsi_full_src_register ref_src =
12252      make_src_immediate_reg(emit->fs.alpha_ref_index);
12253   struct tgsi_full_dst_register color_dst =
12254      make_dst_output_reg(emit->fs.color_out_index[0]);
12255
12256   assert(emit->unit == PIPE_SHADER_FRAGMENT);
12257
12258   /* dst = src0 'alpha_func' src1 */
12259   emit_comparison(emit, emit->key.fs.alpha_func, &tmp_dst,
12260                   &color_src_w, &ref_src);
12261
12262   /* DISCARD if dst.x == 0 */
12263   begin_emit_instruction(emit);
12264   emit_discard_opcode(emit, FALSE);  /* discard if src0.x is zero */
12265   emit_src_register(emit, &tmp_src_x);
12266   end_emit_instruction(emit);
12267
12268   /* If we don't need to broadcast the color below, emit the final color here.
12269    */
12270   if (emit->key.fs.write_color0_to_n_cbufs <= 1) {
12271      /* MOV output.color, tempcolor */
12272      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &color_src);
12273   }
12274
12275   free_temp_indexes(emit);
12276}
12277
12278
12279/**
12280 * Emit instructions for writing a single color output to multiple
12281 * color buffers.
12282 * This is used when the TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS (or
12283 * when key.fs.white_fragments is true).
12284 * property is set and the number of render targets is greater than one.
12285 * \param fs_color_tmp_index  index of the temp register that holds the
12286 *                            color to broadcast.
12287 */
12288static void
12289emit_broadcast_color_instructions(struct svga_shader_emitter_v10 *emit,
12290                                 unsigned fs_color_tmp_index)
12291{
12292   const unsigned n = emit->key.fs.write_color0_to_n_cbufs;
12293   unsigned i;
12294   struct tgsi_full_src_register color_src;
12295
12296   if (emit->key.fs.white_fragments) {
12297      /* set all color outputs to white */
12298      color_src = make_immediate_reg_float(emit, 1.0f);
12299   }
12300   else {
12301      /* set all color outputs to TEMP[fs_color_tmp_index] */
12302      assert(fs_color_tmp_index != INVALID_INDEX);
12303      color_src = make_src_temp_reg(fs_color_tmp_index);
12304   }
12305
12306   assert(emit->unit == PIPE_SHADER_FRAGMENT);
12307
12308   for (i = 0; i < n; i++) {
12309      unsigned output_reg = emit->fs.color_out_index[i];
12310      struct tgsi_full_dst_register color_dst =
12311         make_dst_output_reg(output_reg);
12312
12313      /* Fill in this semantic here since we'll use it later in
12314       * emit_dst_register().
12315       */
12316      emit->info.output_semantic_name[output_reg] = TGSI_SEMANTIC_COLOR;
12317
12318      /* MOV output.color[i], tempcolor */
12319      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &color_src);
12320   }
12321}
12322
12323
12324/**
12325 * Emit extra helper code after the original shader code, but before the
12326 * last END/RET instruction.
12327 * For vertex shaders this means emitting the extra code to apply the
12328 * prescale scale/translation.
12329 */
12330static boolean
12331emit_post_helpers(struct svga_shader_emitter_v10 *emit)
12332{
12333   if (emit->unit == PIPE_SHADER_VERTEX) {
12334      emit_vertex_instructions(emit);
12335   }
12336   else if (emit->unit == PIPE_SHADER_FRAGMENT) {
12337      const unsigned fs_color_tmp_index = emit->fs.color_tmp_index;
12338
12339      assert(!(emit->key.fs.white_fragments &&
12340               emit->key.fs.write_color0_to_n_cbufs == 0));
12341
12342      /* We no longer want emit_dst_register() to substitute the
12343       * temporary fragment color register for the real color output.
12344       */
12345      emit->fs.color_tmp_index = INVALID_INDEX;
12346
12347      if (emit->key.fs.alpha_to_one) {
12348         emit_alpha_to_one_instructions(emit, fs_color_tmp_index);
12349      }
12350      if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) {
12351         emit_alpha_test_instructions(emit, fs_color_tmp_index);
12352      }
12353      if (emit->key.fs.write_color0_to_n_cbufs > 1 ||
12354          emit->key.fs.white_fragments) {
12355         emit_broadcast_color_instructions(emit, fs_color_tmp_index);
12356      }
12357   }
12358   else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
12359      if (!emit->tcs.control_point_phase) {
12360         /* store the tessellation levels in the patch constant phase only */
12361         store_tesslevels(emit);
12362      }
12363      else {
12364         emit_clipping_instructions(emit);
12365      }
12366   }
12367   else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
12368      emit_vertex_instructions(emit);
12369   }
12370
12371   return TRUE;
12372}
12373
12374
12375/**
12376 * Reemit rawbuf instruction
12377 */
12378static boolean
12379emit_rawbuf_instruction(struct svga_shader_emitter_v10 *emit,
12380                        unsigned inst_number,
12381                        const struct tgsi_full_instruction *inst)
12382{
12383   boolean ret;
12384
12385   /* For all the rawbuf references in this instruction,
12386    * load the rawbuf reference and assign to the designated temporary.
12387    * Then reeemit the instruction.
12388    */
12389   emit->reemit_rawbuf_instruction = REEMIT_IN_PROGRESS;
12390
12391   unsigned offset_tmp = get_temp_index(emit);
12392   struct tgsi_full_dst_register offset_dst = make_dst_temp_reg(offset_tmp);
12393   struct tgsi_full_src_register offset_src = make_src_temp_reg(offset_tmp);
12394   struct tgsi_full_src_register four = make_immediate_reg_int(emit, 4);
12395
12396   for (unsigned i = 0; i < emit->raw_buf_cur_tmp_index; i++) {
12397      struct tgsi_full_src_register element_src;
12398
12399      /* First get the element index register. */
12400
12401      if (emit->raw_buf_tmp[i].indirect) {
12402         unsigned tmp = get_temp_index(emit);
12403         struct tgsi_full_dst_register element_dst = make_dst_temp_reg(tmp);
12404         struct tgsi_full_src_register element_index =
12405            make_src_temp_reg(emit->raw_buf_tmp[i].element_index);
12406         struct tgsi_full_src_register element_rel =
12407            make_immediate_reg_int(emit, emit->raw_buf_tmp[i].element_rel);
12408
12409         element_src = make_src_temp_reg(tmp);
12410         element_src = scalar_src(&element_src, TGSI_SWIZZLE_X);
12411         element_dst = writemask_dst(&element_dst, TGSI_WRITEMASK_X);
12412
12413         /* element index from the indirect register */
12414         element_index = make_src_temp_reg(emit->raw_buf_tmp[i].element_index);
12415         element_index = scalar_src(&element_index, TGSI_SWIZZLE_X);
12416
12417         /* IADD element_src element_index element_index_relative */
12418         emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &element_dst,
12419                              &element_index, &element_rel);
12420      }
12421      else {
12422         element_src =
12423            make_immediate_reg_int(emit, emit->raw_buf_tmp[i].element_index);
12424      }
12425
12426      /* byte offset = element index << 4 */
12427      emit_instruction_op2(emit, VGPU10_OPCODE_ISHL, &offset_dst,
12428                           &element_src, &four);
12429
12430      struct tgsi_full_dst_register dst_tmp =
12431         make_dst_temp_reg(i + emit->raw_buf_tmp_index);
12432
12433      /* LD_RAW tmp, rawbuf byte offset, rawbuf */
12434
12435      begin_emit_instruction(emit);
12436      emit_opcode(emit, VGPU10_OPCODE_LD_RAW, FALSE);
12437      emit_dst_register(emit, &dst_tmp);
12438
12439      struct tgsi_full_src_register offset_x =
12440            scalar_src(&offset_src, TGSI_SWIZZLE_X);
12441      emit_src_register(emit, &offset_x);
12442
12443      emit_resource_register(emit,
12444         emit->raw_buf_tmp[i].buffer_index + emit->raw_buf_srv_start_index);
12445      end_emit_instruction(emit);
12446   }
12447
12448   emit->raw_buf_cur_tmp_index = 0;
12449
12450   ret = emit_vgpu10_instruction(emit, inst_number, inst);
12451
12452   /* reset raw buf state */
12453   emit->raw_buf_cur_tmp_index = 0;
12454   emit->reemit_rawbuf_instruction = REEMIT_FALSE;
12455
12456   free_temp_indexes(emit);
12457
12458   return ret;
12459}
12460
12461
12462/**
12463 * Translate the TGSI tokens into VGPU10 tokens.
12464 */
12465static boolean
12466emit_vgpu10_instructions(struct svga_shader_emitter_v10 *emit,
12467                         const struct tgsi_token *tokens)
12468{
12469   struct tgsi_parse_context parse;
12470   boolean ret = TRUE;
12471   boolean pre_helpers_emitted = FALSE;
12472   unsigned inst_number = 0;
12473
12474   tgsi_parse_init(&parse, tokens);
12475
12476   while (!tgsi_parse_end_of_tokens(&parse)) {
12477
12478      /* Save the current tgsi token starting position */
12479      emit->cur_tgsi_token = parse.Position;
12480
12481      tgsi_parse_token(&parse);
12482
12483      switch (parse.FullToken.Token.Type) {
12484      case TGSI_TOKEN_TYPE_IMMEDIATE:
12485         ret = emit_vgpu10_immediate(emit, &parse.FullToken.FullImmediate);
12486         if (!ret)
12487            goto done;
12488         break;
12489
12490      case TGSI_TOKEN_TYPE_DECLARATION:
12491         ret = emit_vgpu10_declaration(emit, &parse.FullToken.FullDeclaration);
12492         if (!ret)
12493            goto done;
12494         break;
12495
12496      case TGSI_TOKEN_TYPE_INSTRUCTION:
12497         if (!pre_helpers_emitted) {
12498            ret = emit_pre_helpers(emit);
12499            if (!ret)
12500               goto done;
12501            pre_helpers_emitted = TRUE;
12502         }
12503         ret = emit_vgpu10_instruction(emit, inst_number++,
12504                                       &parse.FullToken.FullInstruction);
12505
12506         /* Usually this applies to TCS only. If shader is reading control
12507          * point outputs in control point phase, we should reemit all
12508          * instructions which are writting into control point output in
12509          * control phase to store results into temporaries.
12510          */
12511         if (emit->reemit_instruction) {
12512            assert(emit->unit == PIPE_SHADER_TESS_CTRL);
12513            ret = emit_vgpu10_instruction(emit, inst_number,
12514                                          &parse.FullToken.FullInstruction);
12515         }
12516         else if (emit->initialize_temp_index != INVALID_INDEX) {
12517            emit_initialize_temp_instruction(emit);
12518            emit->initialize_temp_index = INVALID_INDEX;
12519            ret = emit_vgpu10_instruction(emit, inst_number - 1,
12520                                          &parse.FullToken.FullInstruction);
12521         }
12522         else if (emit->reemit_rawbuf_instruction) {
12523            ret = emit_rawbuf_instruction(emit, inst_number - 1,
12524                                          &parse.FullToken.FullInstruction);
12525         }
12526
12527         if (!ret)
12528            goto done;
12529         break;
12530
12531      case TGSI_TOKEN_TYPE_PROPERTY:
12532         ret = emit_vgpu10_property(emit, &parse.FullToken.FullProperty);
12533         if (!ret)
12534            goto done;
12535         break;
12536
12537      default:
12538         break;
12539      }
12540   }
12541
12542   if (emit->unit == PIPE_SHADER_TESS_CTRL) {
12543      ret = emit_hull_shader_patch_constant_phase(emit, &parse);
12544   }
12545
12546done:
12547   tgsi_parse_free(&parse);
12548   return ret;
12549}
12550
12551
12552/**
12553 * Emit the first VGPU10 shader tokens.
12554 */
12555static boolean
12556emit_vgpu10_header(struct svga_shader_emitter_v10 *emit)
12557{
12558   VGPU10ProgramToken ptoken;
12559
12560   /* First token: VGPU10ProgramToken  (version info, program type (VS,GS,PS)) */
12561
12562   /* Maximum supported shader version is 50 */
12563   unsigned version = MIN2(emit->version, 50);
12564
12565   ptoken.value = 0; /* init whole token to zero */
12566   ptoken.majorVersion = version / 10;
12567   ptoken.minorVersion = version % 10;
12568   ptoken.programType = translate_shader_type(emit->unit);
12569   if (!emit_dword(emit, ptoken.value))
12570      return FALSE;
12571
12572   /* Second token: total length of shader, in tokens.  We can't fill this
12573    * in until we're all done.  Emit zero for now.
12574    */
12575   if (!emit_dword(emit, 0))
12576      return FALSE;
12577
12578   if (emit->version >= 50) {
12579      VGPU10OpcodeToken0 token;
12580
12581      if (emit->unit == PIPE_SHADER_TESS_CTRL) {
12582         /* For hull shader, we need to start the declarations phase first before
12583          * emitting any declarations including the global flags.
12584          */
12585         token.value = 0;
12586         token.opcodeType = VGPU10_OPCODE_HS_DECLS;
12587         begin_emit_instruction(emit);
12588         emit_dword(emit, token.value);
12589         end_emit_instruction(emit);
12590      }
12591
12592      /* Emit global flags */
12593      token.value = 0;    /* init whole token to zero */
12594      token.opcodeType = VGPU10_OPCODE_DCL_GLOBAL_FLAGS;
12595      token.enableDoublePrecisionFloatOps = 1;  /* set bit */
12596      token.instructionLength = 1;
12597      if (!emit_dword(emit, token.value))
12598         return FALSE;
12599   }
12600
12601   if (emit->version >= 40) {
12602      VGPU10OpcodeToken0 token;
12603
12604      /* Reserved for global flag such as refactoringAllowed.
12605       * If the shader does not use the precise qualifier, we will set the
12606       * refactoringAllowed global flag; otherwise, we will leave the reserved
12607       * token to NOP.
12608       */
12609      emit->reserved_token = (emit->ptr - emit->buf) / sizeof(VGPU10OpcodeToken0);
12610      token.value = 0;
12611      token.opcodeType = VGPU10_OPCODE_NOP;
12612      token.instructionLength = 1;
12613      if (!emit_dword(emit, token.value))
12614         return FALSE;
12615   }
12616
12617   return TRUE;
12618}
12619
12620
12621static boolean
12622emit_vgpu10_tail(struct svga_shader_emitter_v10 *emit)
12623{
12624   VGPU10ProgramToken *tokens;
12625
12626   /* Replace the second token with total shader length */
12627   tokens = (VGPU10ProgramToken *) emit->buf;
12628   tokens[1].value = emit_get_num_tokens(emit);
12629
12630   if (emit->version >= 40 && !emit->uses_precise_qualifier) {
12631      /* Replace the reserved token with the RefactoringAllowed global flag */
12632      VGPU10OpcodeToken0 *ptoken;
12633
12634      ptoken = (VGPU10OpcodeToken0 *)&tokens[emit->reserved_token];
12635      assert(ptoken->opcodeType == VGPU10_OPCODE_NOP);
12636      ptoken->opcodeType = VGPU10_OPCODE_DCL_GLOBAL_FLAGS;
12637      ptoken->refactoringAllowed = 1;
12638   }
12639
12640   if (emit->version >= 50 && emit->fs.forceEarlyDepthStencil) {
12641      /* Replace the reserved token with the forceEarlyDepthStencil  global flag */
12642      VGPU10OpcodeToken0 *ptoken;
12643
12644      ptoken = (VGPU10OpcodeToken0 *)&tokens[emit->reserved_token];
12645      ptoken->opcodeType = VGPU10_OPCODE_DCL_GLOBAL_FLAGS;
12646      ptoken->forceEarlyDepthStencil = 1;
12647   }
12648
12649   return TRUE;
12650}
12651
12652
12653/**
12654 * Modify the FS to read the BCOLORs and use the FACE register
12655 * to choose between the front/back colors.
12656 */
12657static const struct tgsi_token *
12658transform_fs_twoside(const struct tgsi_token *tokens)
12659{
12660   if (0) {
12661      debug_printf("Before tgsi_add_two_side ------------------\n");
12662      tgsi_dump(tokens,0);
12663   }
12664   tokens = tgsi_add_two_side(tokens);
12665   if (0) {
12666      debug_printf("After tgsi_add_two_side ------------------\n");
12667      tgsi_dump(tokens, 0);
12668   }
12669   return tokens;
12670}
12671
12672
12673/**
12674 * Modify the FS to do polygon stipple.
12675 */
12676static const struct tgsi_token *
12677transform_fs_pstipple(struct svga_shader_emitter_v10 *emit,
12678                      const struct tgsi_token *tokens)
12679{
12680   const struct tgsi_token *new_tokens;
12681   unsigned unit;
12682
12683   if (0) {
12684      debug_printf("Before pstipple ------------------\n");
12685      tgsi_dump(tokens,0);
12686   }
12687
12688   new_tokens = util_pstipple_create_fragment_shader(tokens, &unit, 0,
12689                                                     TGSI_FILE_INPUT);
12690
12691   emit->fs.pstipple_sampler_unit = unit;
12692
12693   /* The new sampler state is appended to the end of the samplers list */
12694   emit->fs.pstipple_sampler_state_index = emit->key.num_samplers++;
12695
12696   /* Setup texture state for stipple */
12697   emit->sampler_target[unit] = TGSI_TEXTURE_2D;
12698   emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X;
12699   emit->key.tex[unit].swizzle_g = TGSI_SWIZZLE_Y;
12700   emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z;
12701   emit->key.tex[unit].swizzle_a = TGSI_SWIZZLE_W;
12702   emit->key.tex[unit].target = PIPE_TEXTURE_2D;
12703   emit->key.tex[unit].sampler_index = emit->fs.pstipple_sampler_state_index;
12704
12705   if (0) {
12706      debug_printf("After pstipple ------------------\n");
12707      tgsi_dump(new_tokens, 0);
12708   }
12709
12710   return new_tokens;
12711}
12712
12713/**
12714 * Modify the FS to support anti-aliasing point.
12715 */
12716static const struct tgsi_token *
12717transform_fs_aapoint(struct svga_context *svga,
12718		     const struct tgsi_token *tokens,
12719                     int aa_coord_index)
12720{
12721   bool need_texcoord_semantic =
12722      svga->pipe.screen->get_param(svga->pipe.screen, PIPE_CAP_TGSI_TEXCOORD);
12723
12724   if (0) {
12725      debug_printf("Before tgsi_add_aa_point ------------------\n");
12726      tgsi_dump(tokens,0);
12727   }
12728   tokens = tgsi_add_aa_point(tokens, aa_coord_index, need_texcoord_semantic);
12729   if (0) {
12730      debug_printf("After tgsi_add_aa_point ------------------\n");
12731      tgsi_dump(tokens, 0);
12732   }
12733   return tokens;
12734}
12735
12736
12737/**
12738 * A helper function to determine the shader in the previous stage and
12739 * then call the linker function to determine the input mapping for this
12740 * shader to match the output indices from the shader in the previous stage.
12741 */
12742static void
12743compute_input_mapping(struct svga_context *svga,
12744                      struct svga_shader_emitter_v10 *emit,
12745                      enum pipe_shader_type unit)
12746{
12747   struct svga_shader *prevShader = NULL;   /* shader in the previous stage */
12748
12749   if (unit == PIPE_SHADER_FRAGMENT) {
12750      prevShader = svga->curr.gs ?
12751         &svga->curr.gs->base : (svga->curr.tes ?
12752         &svga->curr.tes->base : &svga->curr.vs->base);
12753   } else if (unit == PIPE_SHADER_GEOMETRY) {
12754      prevShader = svga->curr.tes ? &svga->curr.tes->base : &svga->curr.vs->base;
12755   } else if (unit == PIPE_SHADER_TESS_EVAL) {
12756      assert(svga->curr.tcs);
12757      prevShader = &svga->curr.tcs->base;
12758   } else if (unit == PIPE_SHADER_TESS_CTRL) {
12759      assert(svga->curr.vs);
12760      prevShader = &svga->curr.vs->base;
12761   }
12762
12763   if (prevShader != NULL) {
12764      svga_link_shaders(&prevShader->tgsi_info, &emit->info, &emit->linkage);
12765      emit->prevShaderInfo = &prevShader->tgsi_info;
12766   }
12767   else {
12768      /**
12769       * Since vertex shader does not need to go through the linker to
12770       * establish the input map, we need to make sure the highest index
12771       * of input registers is set properly here.
12772       */
12773      emit->linkage.input_map_max = MAX2((int)emit->linkage.input_map_max,
12774                                         emit->info.file_max[TGSI_FILE_INPUT]);
12775   }
12776}
12777
12778
12779/**
12780 * Copies the shader signature info to the shader variant
12781 */
12782static void
12783copy_shader_signature(struct svga_shader_signature *sgn,
12784                      struct svga_shader_variant *variant)
12785{
12786   SVGA3dDXShaderSignatureHeader *header = &sgn->header;
12787
12788   /* Calculate the signature length */
12789   variant->signatureLen = sizeof(SVGA3dDXShaderSignatureHeader) +
12790                           (header->numInputSignatures +
12791                            header->numOutputSignatures +
12792                            header->numPatchConstantSignatures) *
12793                           sizeof(SVGA3dDXShaderSignatureEntry);
12794
12795   /* Allocate buffer for the signature info */
12796   variant->signature =
12797      (SVGA3dDXShaderSignatureHeader *)CALLOC(1, variant->signatureLen);
12798
12799   char *sgnBuf = (char *)variant->signature;
12800   unsigned sgnLen;
12801
12802   /* Copy the signature info to the shader variant structure */
12803   memcpy(sgnBuf, &sgn->header, sizeof(SVGA3dDXShaderSignatureHeader));
12804   sgnBuf += sizeof(SVGA3dDXShaderSignatureHeader);
12805
12806   if (header->numInputSignatures) {
12807      sgnLen =
12808         header->numInputSignatures * sizeof(SVGA3dDXShaderSignatureEntry);
12809      memcpy(sgnBuf, &sgn->inputs[0], sgnLen);
12810      sgnBuf += sgnLen;
12811   }
12812
12813   if (header->numOutputSignatures) {
12814      sgnLen =
12815         header->numOutputSignatures * sizeof(SVGA3dDXShaderSignatureEntry);
12816      memcpy(sgnBuf, &sgn->outputs[0], sgnLen);
12817      sgnBuf += sgnLen;
12818   }
12819
12820   if (header->numPatchConstantSignatures) {
12821      sgnLen =
12822         header->numPatchConstantSignatures * sizeof(SVGA3dDXShaderSignatureEntry);
12823      memcpy(sgnBuf, &sgn->patchConstants[0], sgnLen);
12824   }
12825}
12826
12827
12828/**
12829 * This is the main entrypoint for the TGSI -> VPGU10 translator.
12830 */
12831struct svga_shader_variant *
12832svga_tgsi_vgpu10_translate(struct svga_context *svga,
12833                           const struct svga_shader *shader,
12834                           const struct svga_compile_key *key,
12835                           enum pipe_shader_type unit)
12836{
12837   struct svga_screen *svgascreen = svga_screen(svga->pipe.screen);
12838   struct svga_shader_variant *variant = NULL;
12839   struct svga_shader_emitter_v10 *emit;
12840   const struct tgsi_token *tokens = shader->tokens;
12841
12842   (void) make_immediate_reg_double;   /* unused at this time */
12843
12844   assert(unit == PIPE_SHADER_VERTEX ||
12845          unit == PIPE_SHADER_GEOMETRY ||
12846          unit == PIPE_SHADER_FRAGMENT ||
12847          unit == PIPE_SHADER_TESS_CTRL ||
12848          unit == PIPE_SHADER_TESS_EVAL ||
12849          unit == PIPE_SHADER_COMPUTE);
12850
12851   /* These two flags cannot be used together */
12852   assert(key->vs.need_prescale + key->vs.undo_viewport <= 1);
12853
12854   SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_TGSIVGPU10TRANSLATE);
12855   /*
12856    * Setup the code emitter
12857    */
12858   emit = alloc_emitter();
12859   if (!emit)
12860      goto done;
12861
12862   emit->unit = unit;
12863   if (svga_have_gl43(svga)) {
12864      emit->version = 51;
12865   } else if (svga_have_sm5(svga)) {
12866      emit->version = 50;
12867   } else if (svga_have_sm4_1(svga)) {
12868      emit->version = 41;
12869   } else {
12870      emit->version = 40;
12871   }
12872
12873   emit->use_sampler_state_mapping = emit->key.sampler_state_mapping;
12874
12875   emit->signature.header.headerVersion = SVGADX_SIGNATURE_HEADER_VERSION_0;
12876
12877   emit->key = *key;
12878
12879   emit->vposition.need_prescale = (emit->key.vs.need_prescale ||
12880                                    emit->key.gs.need_prescale ||
12881                                    emit->key.tes.need_prescale);
12882
12883   /* Determine how many prescale factors in the constant buffer */
12884   emit->vposition.num_prescale = 1;
12885   if (emit->vposition.need_prescale && emit->key.gs.writes_viewport_index) {
12886      assert(emit->unit == PIPE_SHADER_GEOMETRY);
12887      emit->vposition.num_prescale = emit->key.gs.num_prescale;
12888   }
12889
12890   emit->vposition.tmp_index = INVALID_INDEX;
12891   emit->vposition.so_index = INVALID_INDEX;
12892   emit->vposition.out_index = INVALID_INDEX;
12893
12894   emit->vs.vertex_id_sys_index = INVALID_INDEX;
12895   emit->vs.vertex_id_tmp_index = INVALID_INDEX;
12896   emit->vs.vertex_id_bias_index = INVALID_INDEX;
12897
12898   emit->fs.color_tmp_index = INVALID_INDEX;
12899   emit->fs.face_input_index = INVALID_INDEX;
12900   emit->fs.fragcoord_input_index = INVALID_INDEX;
12901   emit->fs.sample_id_sys_index = INVALID_INDEX;
12902   emit->fs.sample_pos_sys_index = INVALID_INDEX;
12903   emit->fs.sample_mask_in_sys_index = INVALID_INDEX;
12904   emit->fs.layer_input_index = INVALID_INDEX;
12905   emit->fs.layer_imm_index = INVALID_INDEX;
12906
12907   emit->gs.prim_id_index = INVALID_INDEX;
12908   emit->gs.invocation_id_sys_index = INVALID_INDEX;
12909   emit->gs.viewport_index_out_index = INVALID_INDEX;
12910   emit->gs.viewport_index_tmp_index = INVALID_INDEX;
12911
12912   emit->tcs.vertices_per_patch_index = INVALID_INDEX;
12913   emit->tcs.invocation_id_sys_index = INVALID_INDEX;
12914   emit->tcs.control_point_input_index = INVALID_INDEX;
12915   emit->tcs.control_point_addr_index = INVALID_INDEX;
12916   emit->tcs.control_point_out_index = INVALID_INDEX;
12917   emit->tcs.control_point_tmp_index = INVALID_INDEX;
12918   emit->tcs.control_point_out_count = 0;
12919   emit->tcs.inner.out_index = INVALID_INDEX;
12920   emit->tcs.inner.temp_index = INVALID_INDEX;
12921   emit->tcs.inner.tgsi_index = INVALID_INDEX;
12922   emit->tcs.outer.out_index = INVALID_INDEX;
12923   emit->tcs.outer.temp_index = INVALID_INDEX;
12924   emit->tcs.outer.tgsi_index = INVALID_INDEX;
12925   emit->tcs.patch_generic_out_count = 0;
12926   emit->tcs.patch_generic_out_index = INVALID_INDEX;
12927   emit->tcs.patch_generic_tmp_index = INVALID_INDEX;
12928   emit->tcs.prim_id_index = INVALID_INDEX;
12929
12930   emit->tes.tesscoord_sys_index = INVALID_INDEX;
12931   emit->tes.inner.in_index = INVALID_INDEX;
12932   emit->tes.inner.temp_index = INVALID_INDEX;
12933   emit->tes.inner.tgsi_index = INVALID_INDEX;
12934   emit->tes.outer.in_index = INVALID_INDEX;
12935   emit->tes.outer.temp_index = INVALID_INDEX;
12936   emit->tes.outer.tgsi_index = INVALID_INDEX;
12937   emit->tes.prim_id_index = INVALID_INDEX;
12938
12939   emit->cs.thread_id_index = INVALID_INDEX;
12940   emit->cs.block_id_index = INVALID_INDEX;
12941   emit->cs.grid_size.tgsi_index = INVALID_INDEX;
12942   emit->cs.grid_size.imm_index = INVALID_INDEX;
12943   emit->cs.block_width = 1;
12944   emit->cs.block_height = 1;
12945   emit->cs.block_depth = 1;
12946
12947   emit->clip_dist_out_index = INVALID_INDEX;
12948   emit->clip_dist_tmp_index = INVALID_INDEX;
12949   emit->clip_dist_so_index = INVALID_INDEX;
12950   emit->clip_vertex_out_index = INVALID_INDEX;
12951   emit->clip_vertex_tmp_index = INVALID_INDEX;
12952   emit->svga_debug_callback = svga->debug.callback;
12953
12954   emit->index_range.start_index = INVALID_INDEX;
12955   emit->index_range.count = 0;
12956   emit->index_range.required = FALSE;
12957   emit->index_range.operandType = VGPU10_NUM_OPERANDS;
12958   emit->index_range.dim = 0;
12959   emit->index_range.size = 0;
12960
12961   emit->current_loop_depth = 0;
12962
12963   emit->initialize_temp_index = INVALID_INDEX;
12964   emit->image_size_index = INVALID_INDEX;
12965
12966   emit->max_vs_inputs  = svgascreen->max_vs_inputs;
12967   emit->max_vs_outputs = svgascreen->max_vs_outputs;
12968   emit->max_gs_inputs  = svgascreen->max_gs_inputs;
12969
12970   if (emit->key.fs.alpha_func == SVGA3D_CMP_INVALID) {
12971      emit->key.fs.alpha_func = SVGA3D_CMP_ALWAYS;
12972   }
12973
12974   if (unit == PIPE_SHADER_FRAGMENT) {
12975      if (key->fs.light_twoside) {
12976         tokens = transform_fs_twoside(tokens);
12977      }
12978      if (key->fs.pstipple) {
12979         const struct tgsi_token *new_tokens =
12980            transform_fs_pstipple(emit, tokens);
12981         if (tokens != shader->tokens) {
12982            /* free the two-sided shader tokens */
12983            tgsi_free_tokens(tokens);
12984         }
12985         tokens = new_tokens;
12986      }
12987      if (key->fs.aa_point) {
12988         tokens = transform_fs_aapoint(svga, tokens,
12989			               key->fs.aa_point_coord_index);
12990      }
12991   }
12992
12993   if (SVGA_DEBUG & DEBUG_TGSI) {
12994      debug_printf("#####################################\n");
12995      debug_printf("### TGSI Shader %u\n", shader->id);
12996      tgsi_dump(tokens, 0);
12997   }
12998
12999   /**
13000    * Rescan the header if the token string is different from the one
13001    * included in the shader; otherwise, the header info is already up-to-date
13002    */
13003   if (tokens != shader->tokens) {
13004      tgsi_scan_shader(tokens, &emit->info);
13005   } else {
13006      emit->info = shader->tgsi_info;
13007   }
13008
13009   emit->num_outputs = emit->info.num_outputs;
13010
13011   /**
13012    * Compute input mapping to match the outputs from shader
13013    * in the previous stage
13014    */
13015   compute_input_mapping(svga, emit, unit);
13016
13017   determine_clipping_mode(emit);
13018
13019   if (unit == PIPE_SHADER_GEOMETRY || unit == PIPE_SHADER_VERTEX ||
13020       unit == PIPE_SHADER_TESS_CTRL || unit == PIPE_SHADER_TESS_EVAL) {
13021      if (shader->stream_output != NULL || emit->clip_mode == CLIP_DISTANCE) {
13022         /* if there is stream output declarations associated
13023          * with this shader or the shader writes to ClipDistance
13024          * then reserve extra registers for the non-adjusted vertex position
13025          * and the ClipDistance shadow copy.
13026          */
13027         emit->vposition.so_index = emit->num_outputs++;
13028
13029         if (emit->clip_mode == CLIP_DISTANCE) {
13030            emit->clip_dist_so_index = emit->num_outputs++;
13031            if (emit->info.num_written_clipdistance > 4)
13032               emit->num_outputs++;
13033         }
13034      }
13035   }
13036
13037   /* Determine if constbuf to rawbuf translation is needed */
13038   if (emit->info.const_buffers_declared) {
13039      emit->raw_bufs = emit->key.raw_buffers;
13040      emit->raw_buf_srv_start_index = emit->key.srv_raw_buf_index;
13041   }
13042
13043   /*
13044    * Do actual shader translation.
13045    */
13046   if (!emit_vgpu10_header(emit)) {
13047      debug_printf("svga: emit VGPU10 header failed\n");
13048      goto cleanup;
13049   }
13050
13051   if (!emit_vgpu10_instructions(emit, tokens)) {
13052      debug_printf("svga: emit VGPU10 instructions failed\n");
13053      goto cleanup;
13054   }
13055
13056   if (!emit_vgpu10_tail(emit)) {
13057      debug_printf("svga: emit VGPU10 tail failed\n");
13058      goto cleanup;
13059   }
13060
13061   if (emit->register_overflow) {
13062      goto cleanup;
13063   }
13064
13065   /*
13066    * Create, initialize the 'variant' object.
13067    */
13068   variant = svga_new_shader_variant(svga, unit);
13069   if (!variant)
13070      goto cleanup;
13071
13072   variant->shader = shader;
13073   variant->nr_tokens = emit_get_num_tokens(emit);
13074   variant->tokens = (const unsigned *)emit->buf;
13075
13076   /* Copy shader signature info to the shader variant */
13077   if (svga_have_sm5(svga)) {
13078      copy_shader_signature(&emit->signature, variant);
13079   }
13080
13081   emit->buf = NULL;  /* buffer is no longer owed by emitter context */
13082   memcpy(&variant->key, key, sizeof(*key));
13083   variant->id = UTIL_BITMASK_INVALID_INDEX;
13084
13085   /* The extra constant starting offset starts with the number of
13086    * shader constants declared in the shader.
13087    */
13088   variant->extra_const_start = emit->num_shader_consts[0];
13089   if (key->gs.wide_point) {
13090      /**
13091       * The extra constant added in the transformed shader
13092       * for inverse viewport scale is to be supplied by the driver.
13093       * So the extra constant starting offset needs to be reduced by 1.
13094       */
13095      assert(variant->extra_const_start > 0);
13096      variant->extra_const_start--;
13097   }
13098
13099   if (unit == PIPE_SHADER_FRAGMENT) {
13100      struct svga_fs_variant *fs_variant = svga_fs_variant(variant);
13101
13102      fs_variant->pstipple_sampler_unit = emit->fs.pstipple_sampler_unit;
13103      fs_variant->pstipple_sampler_state_index =
13104         emit->fs.pstipple_sampler_state_index;
13105
13106      /* If there was exactly one write to a fragment shader output register
13107       * and it came from a constant buffer, we know all fragments will have
13108       * the same color (except for blending).
13109       */
13110      fs_variant->constant_color_output =
13111         emit->constant_color_output && emit->num_output_writes == 1;
13112
13113      /** keep track in the variant if flat interpolation is used
13114       *  for any of the varyings.
13115       */
13116      fs_variant->uses_flat_interp = emit->uses_flat_interp;
13117
13118      fs_variant->fs_shadow_compare_units = emit->shadow_compare_units;
13119   }
13120   else if (unit == PIPE_SHADER_TESS_EVAL) {
13121      struct svga_tes_variant *tes_variant = svga_tes_variant(variant);
13122
13123      /* Keep track in the tes variant some of the layout parameters.
13124       * These parameters will be referenced by the tcs to emit
13125       * the necessary declarations for the hull shader.
13126       */
13127      tes_variant->prim_mode = emit->tes.prim_mode;
13128      tes_variant->spacing = emit->tes.spacing;
13129      tes_variant->vertices_order_cw = emit->tes.vertices_order_cw;
13130      tes_variant->point_mode = emit->tes.point_mode;
13131   }
13132
13133
13134   if (tokens != shader->tokens) {
13135      tgsi_free_tokens(tokens);
13136   }
13137
13138cleanup:
13139   free_emitter(emit);
13140
13141done:
13142   SVGA_STATS_TIME_POP(svga_sws(svga));
13143   return variant;
13144}
13145