1/**************************************************************************
2 *
3 * Copyright 2012-2021 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
18 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
24 * of the Software.
25 *
26 **************************************************************************/
27
28/*
29 * ShaderTGSI.c --
30 *    Functions for translating shaders.
31 */
32
33#include "Debug.h"
34#include "ShaderParse.h"
35
36#include "pipe/p_state.h"
37#include "tgsi/tgsi_ureg.h"
38#include "tgsi/tgsi_dump.h"
39#include "util/u_memory.h"
40
41#include "ShaderDump.h"
42
43
44enum dx10_opcode_format {
45   OF_FLOAT,
46   OF_INT,
47   OF_UINT
48};
49
50struct dx10_opcode_xlate {
51   D3D10_SB_OPCODE_TYPE type;
52   enum dx10_opcode_format format;
53   uint tgsi_opcode;
54};
55
56/* Opcodes that we have not even attempted to implement:
57 */
58#define TGSI_LOG_UNSUPPORTED TGSI_OPCODE_LAST
59
60/* Opcodes which do not translate directly to a TGSI opcode, but which
61 * have at least a partial implemention coded below:
62 */
63#define TGSI_EXPAND          (TGSI_OPCODE_LAST+1)
64
65static struct dx10_opcode_xlate opcode_xlate[D3D10_SB_NUM_OPCODES] = {
66   {D3D10_SB_OPCODE_ADD,                              OF_FLOAT, TGSI_OPCODE_ADD},
67   {D3D10_SB_OPCODE_AND,                              OF_UINT,  TGSI_OPCODE_AND},
68   {D3D10_SB_OPCODE_BREAK,                            OF_FLOAT, TGSI_OPCODE_BRK},
69   {D3D10_SB_OPCODE_BREAKC,                           OF_UINT,  TGSI_EXPAND},
70   {D3D10_SB_OPCODE_CALL,                             OF_UINT,  TGSI_EXPAND},
71   {D3D10_SB_OPCODE_CALLC,                            OF_UINT,  TGSI_EXPAND},
72   {D3D10_SB_OPCODE_CASE,                             OF_UINT,  TGSI_OPCODE_CASE},
73   {D3D10_SB_OPCODE_CONTINUE,                         OF_FLOAT, TGSI_OPCODE_CONT},
74   {D3D10_SB_OPCODE_CONTINUEC,                        OF_UINT,  TGSI_EXPAND},
75   {D3D10_SB_OPCODE_CUT,                              OF_FLOAT, TGSI_EXPAND},
76   {D3D10_SB_OPCODE_DEFAULT,                          OF_FLOAT, TGSI_OPCODE_DEFAULT},
77   {D3D10_SB_OPCODE_DERIV_RTX,                        OF_FLOAT, TGSI_OPCODE_DDX},
78   {D3D10_SB_OPCODE_DERIV_RTY,                        OF_FLOAT, TGSI_OPCODE_DDY},
79   {D3D10_SB_OPCODE_DISCARD,                          OF_UINT,  TGSI_EXPAND},
80   {D3D10_SB_OPCODE_DIV,                              OF_FLOAT, TGSI_OPCODE_DIV},
81   {D3D10_SB_OPCODE_DP2,                              OF_FLOAT, TGSI_OPCODE_DP2},
82   {D3D10_SB_OPCODE_DP3,                              OF_FLOAT, TGSI_OPCODE_DP3},
83   {D3D10_SB_OPCODE_DP4,                              OF_FLOAT, TGSI_OPCODE_DP4},
84   {D3D10_SB_OPCODE_ELSE,                             OF_FLOAT, TGSI_OPCODE_ELSE},
85   {D3D10_SB_OPCODE_EMIT,                             OF_FLOAT, TGSI_EXPAND},
86   {D3D10_SB_OPCODE_EMITTHENCUT,                      OF_FLOAT, TGSI_EXPAND},
87   {D3D10_SB_OPCODE_ENDIF,                            OF_FLOAT, TGSI_OPCODE_ENDIF},
88   {D3D10_SB_OPCODE_ENDLOOP,                          OF_FLOAT, TGSI_OPCODE_ENDLOOP},
89   {D3D10_SB_OPCODE_ENDSWITCH,                        OF_FLOAT, TGSI_OPCODE_ENDSWITCH},
90   {D3D10_SB_OPCODE_EQ,                               OF_FLOAT, TGSI_OPCODE_FSEQ},
91   {D3D10_SB_OPCODE_EXP,                              OF_FLOAT, TGSI_EXPAND},
92   {D3D10_SB_OPCODE_FRC,                              OF_FLOAT, TGSI_OPCODE_FRC},
93   {D3D10_SB_OPCODE_FTOI,                             OF_FLOAT, TGSI_EXPAND},
94   {D3D10_SB_OPCODE_FTOU,                             OF_FLOAT, TGSI_EXPAND},
95   {D3D10_SB_OPCODE_GE,                               OF_FLOAT, TGSI_OPCODE_FSGE},
96   {D3D10_SB_OPCODE_IADD,                             OF_INT,   TGSI_OPCODE_UADD},
97   {D3D10_SB_OPCODE_IF,                               OF_UINT,  TGSI_EXPAND},
98   {D3D10_SB_OPCODE_IEQ,                              OF_INT,   TGSI_OPCODE_USEQ},
99   {D3D10_SB_OPCODE_IGE,                              OF_INT,   TGSI_OPCODE_ISGE},
100   {D3D10_SB_OPCODE_ILT,                              OF_INT,   TGSI_OPCODE_ISLT},
101   {D3D10_SB_OPCODE_IMAD,                             OF_INT,   TGSI_OPCODE_UMAD},
102   {D3D10_SB_OPCODE_IMAX,                             OF_INT,   TGSI_OPCODE_IMAX},
103   {D3D10_SB_OPCODE_IMIN,                             OF_INT,   TGSI_OPCODE_IMIN},
104   {D3D10_SB_OPCODE_IMUL,                             OF_INT,   TGSI_EXPAND},
105   {D3D10_SB_OPCODE_INE,                              OF_INT,   TGSI_OPCODE_USNE},
106   {D3D10_SB_OPCODE_INEG,                             OF_INT,   TGSI_OPCODE_INEG},
107   {D3D10_SB_OPCODE_ISHL,                             OF_INT,   TGSI_OPCODE_SHL},
108   {D3D10_SB_OPCODE_ISHR,                             OF_INT,   TGSI_OPCODE_ISHR},
109   {D3D10_SB_OPCODE_ITOF,                             OF_INT,   TGSI_OPCODE_I2F},
110   {D3D10_SB_OPCODE_LABEL,                            OF_INT,   TGSI_EXPAND},
111   {D3D10_SB_OPCODE_LD,                               OF_UINT,  TGSI_EXPAND},
112   {D3D10_SB_OPCODE_LD_MS,                            OF_UINT,  TGSI_EXPAND},
113   {D3D10_SB_OPCODE_LOG,                              OF_FLOAT, TGSI_EXPAND},
114   {D3D10_SB_OPCODE_LOOP,                             OF_FLOAT, TGSI_OPCODE_BGNLOOP},
115   {D3D10_SB_OPCODE_LT,                               OF_FLOAT, TGSI_OPCODE_FSLT},
116   {D3D10_SB_OPCODE_MAD,                              OF_FLOAT, TGSI_OPCODE_MAD},
117   {D3D10_SB_OPCODE_MIN,                              OF_FLOAT, TGSI_OPCODE_MIN},
118   {D3D10_SB_OPCODE_MAX,                              OF_FLOAT, TGSI_OPCODE_MAX},
119   {D3D10_SB_OPCODE_CUSTOMDATA,                       OF_FLOAT, TGSI_EXPAND},
120   {D3D10_SB_OPCODE_MOV,                              OF_UINT,  TGSI_OPCODE_MOV},
121   {D3D10_SB_OPCODE_MOVC,                             OF_UINT,  TGSI_OPCODE_UCMP},
122   {D3D10_SB_OPCODE_MUL,                              OF_FLOAT, TGSI_OPCODE_MUL},
123   {D3D10_SB_OPCODE_NE,                               OF_FLOAT, TGSI_OPCODE_FSNE},
124   {D3D10_SB_OPCODE_NOP,                              OF_FLOAT, TGSI_OPCODE_NOP},
125   {D3D10_SB_OPCODE_NOT,                              OF_UINT,  TGSI_OPCODE_NOT},
126   {D3D10_SB_OPCODE_OR,                               OF_UINT,  TGSI_OPCODE_OR},
127   {D3D10_SB_OPCODE_RESINFO,                          OF_UINT,  TGSI_EXPAND},
128   {D3D10_SB_OPCODE_RET,                              OF_FLOAT, TGSI_OPCODE_RET},
129   {D3D10_SB_OPCODE_RETC,                             OF_UINT,  TGSI_EXPAND},
130   {D3D10_SB_OPCODE_ROUND_NE,                         OF_FLOAT, TGSI_OPCODE_ROUND},
131   {D3D10_SB_OPCODE_ROUND_NI,                         OF_FLOAT, TGSI_OPCODE_FLR},
132   {D3D10_SB_OPCODE_ROUND_PI,                         OF_FLOAT, TGSI_OPCODE_CEIL},
133   {D3D10_SB_OPCODE_ROUND_Z,                          OF_FLOAT, TGSI_OPCODE_TRUNC},
134   {D3D10_SB_OPCODE_RSQ,                              OF_FLOAT, TGSI_EXPAND},
135   {D3D10_SB_OPCODE_SAMPLE,                           OF_FLOAT, TGSI_EXPAND},
136   {D3D10_SB_OPCODE_SAMPLE_C,                         OF_FLOAT, TGSI_EXPAND},
137   {D3D10_SB_OPCODE_SAMPLE_C_LZ,                      OF_FLOAT, TGSI_EXPAND},
138   {D3D10_SB_OPCODE_SAMPLE_L,                         OF_FLOAT, TGSI_EXPAND},
139   {D3D10_SB_OPCODE_SAMPLE_D,                         OF_FLOAT, TGSI_EXPAND},
140   {D3D10_SB_OPCODE_SAMPLE_B,                         OF_FLOAT, TGSI_EXPAND},
141   {D3D10_SB_OPCODE_SQRT,                             OF_FLOAT, TGSI_EXPAND},
142   {D3D10_SB_OPCODE_SWITCH,                           OF_UINT,  TGSI_OPCODE_SWITCH},
143   {D3D10_SB_OPCODE_SINCOS,                           OF_FLOAT, TGSI_EXPAND},
144   {D3D10_SB_OPCODE_UDIV,                             OF_UINT,  TGSI_EXPAND},
145   {D3D10_SB_OPCODE_ULT,                              OF_UINT,  TGSI_OPCODE_USLT},
146   {D3D10_SB_OPCODE_UGE,                              OF_UINT,  TGSI_OPCODE_USGE},
147   {D3D10_SB_OPCODE_UMUL,                             OF_UINT,  TGSI_EXPAND},
148   {D3D10_SB_OPCODE_UMAD,                             OF_UINT,  TGSI_OPCODE_UMAD},
149   {D3D10_SB_OPCODE_UMAX,                             OF_UINT,  TGSI_OPCODE_UMAX},
150   {D3D10_SB_OPCODE_UMIN,                             OF_UINT,  TGSI_OPCODE_UMIN},
151   {D3D10_SB_OPCODE_USHR,                             OF_UINT,  TGSI_OPCODE_USHR},
152   {D3D10_SB_OPCODE_UTOF,                             OF_UINT,  TGSI_OPCODE_U2F},
153   {D3D10_SB_OPCODE_XOR,                              OF_UINT,  TGSI_OPCODE_XOR},
154   {D3D10_SB_OPCODE_DCL_RESOURCE,                     OF_FLOAT, TGSI_EXPAND},
155   {D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER,              OF_FLOAT, TGSI_EXPAND},
156   {D3D10_SB_OPCODE_DCL_SAMPLER,                      OF_FLOAT, TGSI_EXPAND},
157   {D3D10_SB_OPCODE_DCL_INDEX_RANGE,                  OF_FLOAT, TGSI_LOG_UNSUPPORTED},
158   {D3D10_SB_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY, OF_FLOAT, TGSI_EXPAND},
159   {D3D10_SB_OPCODE_DCL_GS_INPUT_PRIMITIVE,           OF_FLOAT, TGSI_EXPAND},
160   {D3D10_SB_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT,      OF_FLOAT, TGSI_EXPAND},
161   {D3D10_SB_OPCODE_DCL_INPUT,                        OF_FLOAT, TGSI_EXPAND},
162   {D3D10_SB_OPCODE_DCL_INPUT_SGV,                    OF_FLOAT, TGSI_EXPAND},
163   {D3D10_SB_OPCODE_DCL_INPUT_SIV,                    OF_FLOAT, TGSI_EXPAND},
164   {D3D10_SB_OPCODE_DCL_INPUT_PS,                     OF_FLOAT, TGSI_EXPAND},
165   {D3D10_SB_OPCODE_DCL_INPUT_PS_SGV,                 OF_FLOAT, TGSI_EXPAND},
166   {D3D10_SB_OPCODE_DCL_INPUT_PS_SIV,                 OF_FLOAT, TGSI_EXPAND},
167   {D3D10_SB_OPCODE_DCL_OUTPUT,                       OF_FLOAT, TGSI_EXPAND},
168   {D3D10_SB_OPCODE_DCL_OUTPUT_SGV,                   OF_FLOAT, TGSI_EXPAND},
169   {D3D10_SB_OPCODE_DCL_OUTPUT_SIV,                   OF_FLOAT, TGSI_EXPAND},
170   {D3D10_SB_OPCODE_DCL_TEMPS,                        OF_FLOAT, TGSI_EXPAND},
171   {D3D10_SB_OPCODE_DCL_INDEXABLE_TEMP,               OF_FLOAT, TGSI_EXPAND},
172   {D3D10_SB_OPCODE_DCL_GLOBAL_FLAGS,                 OF_FLOAT, TGSI_LOG_UNSUPPORTED},
173   {D3D10_SB_OPCODE_RESERVED0,                        OF_FLOAT, TGSI_LOG_UNSUPPORTED},
174   {D3D10_1_SB_OPCODE_LOD,                            OF_FLOAT, TGSI_LOG_UNSUPPORTED},
175   {D3D10_1_SB_OPCODE_GATHER4,                        OF_FLOAT, TGSI_LOG_UNSUPPORTED},
176   {D3D10_1_SB_OPCODE_SAMPLE_POS,                     OF_FLOAT, TGSI_LOG_UNSUPPORTED},
177   {D3D10_1_SB_OPCODE_SAMPLE_INFO,                    OF_FLOAT, TGSI_LOG_UNSUPPORTED}
178};
179
180#define SHADER_MAX_TEMPS 4096
181#define SHADER_MAX_INPUTS 32
182#define SHADER_MAX_OUTPUTS 32
183#define SHADER_MAX_CONSTS 4096
184#define SHADER_MAX_RESOURCES PIPE_MAX_SHADER_SAMPLER_VIEWS
185#define SHADER_MAX_SAMPLERS PIPE_MAX_SAMPLERS
186#define SHADER_MAX_INDEXABLE_TEMPS 4096
187
188struct Shader_call {
189   unsigned d3d_label;
190   unsigned tgsi_label_token;
191};
192
193struct Shader_label {
194   unsigned d3d_label;
195   unsigned tgsi_insn_no;
196};
197
198struct Shader_resource {
199   uint target;   /* TGSI_TEXTURE_x */
200};
201
202struct Shader_xlate {
203   struct ureg_program *ureg;
204
205   uint vertices_in;
206   uint declared_temps;
207
208   struct ureg_dst temps[SHADER_MAX_TEMPS];
209   struct ureg_dst output_depth;
210   struct Shader_resource resources[SHADER_MAX_RESOURCES];
211   struct ureg_src sv[SHADER_MAX_RESOURCES];
212   struct ureg_src samplers[SHADER_MAX_SAMPLERS];
213   struct ureg_src imms;
214   struct ureg_src prim_id;
215
216   uint temp_offset;
217   uint indexable_temp_offsets[SHADER_MAX_INDEXABLE_TEMPS];
218
219   struct {
220      boolean declared;
221      uint    writemask;
222      uint    siv_name;
223      boolean overloaded;
224      struct ureg_src reg;
225   } inputs[SHADER_MAX_INPUTS];
226
227   struct {
228      struct ureg_dst reg[4];
229   } outputs[SHADER_MAX_OUTPUTS];
230
231   struct {
232      uint d3d;
233      uint tgsi;
234   } clip_distance_mapping[2], cull_distance_mapping[2];
235   uint num_clip_distances_declared;
236   uint num_cull_distances_declared;
237
238   struct Shader_call *calls;
239   uint num_calls;
240   uint max_calls;
241   struct Shader_label *labels;
242   uint num_labels;
243   uint max_labels;
244};
245
246static uint
247translate_interpolation(D3D10_SB_INTERPOLATION_MODE interpolation)
248{
249   switch (interpolation) {
250   case D3D10_SB_INTERPOLATION_UNDEFINED:
251      assert(0);
252      return TGSI_INTERPOLATE_LINEAR;
253
254   case D3D10_SB_INTERPOLATION_CONSTANT:
255      return TGSI_INTERPOLATE_CONSTANT;
256   case D3D10_SB_INTERPOLATION_LINEAR:
257      return TGSI_INTERPOLATE_PERSPECTIVE;
258   case D3D10_SB_INTERPOLATION_LINEAR_NOPERSPECTIVE:
259      return TGSI_INTERPOLATE_LINEAR;
260
261   case D3D10_SB_INTERPOLATION_LINEAR_CENTROID:
262   case D3D10_SB_INTERPOLATION_LINEAR_SAMPLE: // DX10.1
263      LOG_UNSUPPORTED(TRUE);
264      return TGSI_INTERPOLATE_PERSPECTIVE;
265
266   case D3D10_SB_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID:
267   case D3D10_SB_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE: // DX10.1
268      LOG_UNSUPPORTED(TRUE);
269      return TGSI_INTERPOLATE_LINEAR;
270   }
271
272   assert(0);
273   return TGSI_INTERPOLATE_LINEAR;
274}
275
276static uint
277translate_system_name(D3D10_SB_NAME name)
278{
279   switch (name) {
280   case D3D10_SB_NAME_UNDEFINED:
281      assert(0);                /* should not happen */
282      return TGSI_SEMANTIC_GENERIC;
283   case D3D10_SB_NAME_POSITION:
284      return TGSI_SEMANTIC_POSITION;
285   case D3D10_SB_NAME_CLIP_DISTANCE:
286   case D3D10_SB_NAME_CULL_DISTANCE:
287      return TGSI_SEMANTIC_CLIPDIST;
288   case D3D10_SB_NAME_PRIMITIVE_ID:
289      return TGSI_SEMANTIC_PRIMID;
290   case D3D10_SB_NAME_INSTANCE_ID:
291      return TGSI_SEMANTIC_INSTANCEID;
292   case D3D10_SB_NAME_VERTEX_ID:
293      return TGSI_SEMANTIC_VERTEXID_NOBASE;
294   case D3D10_SB_NAME_VIEWPORT_ARRAY_INDEX:
295      return TGSI_SEMANTIC_VIEWPORT_INDEX;
296   case D3D10_SB_NAME_RENDER_TARGET_ARRAY_INDEX:
297      return TGSI_SEMANTIC_LAYER;
298   case D3D10_SB_NAME_IS_FRONT_FACE:
299      return TGSI_SEMANTIC_FACE;
300   case D3D10_SB_NAME_SAMPLE_INDEX:
301      LOG_UNSUPPORTED(TRUE);
302      return TGSI_SEMANTIC_GENERIC;
303   }
304
305   assert(0);
306   return TGSI_SEMANTIC_GENERIC;
307}
308
309static uint
310translate_semantic_index(struct Shader_xlate *sx,
311                         D3D10_SB_NAME name,
312                         const struct Shader_dst_operand *operand)
313{
314   unsigned idx;
315   switch (name) {
316   case D3D10_SB_NAME_CLIP_DISTANCE:
317   case D3D10_SB_NAME_CULL_DISTANCE:
318      if (sx->clip_distance_mapping[0].d3d == operand->base.index[0].imm) {
319         idx = sx->clip_distance_mapping[0].tgsi;
320      } else {
321         assert(sx->clip_distance_mapping[1].d3d == operand->base.index[0].imm);
322         idx = sx->clip_distance_mapping[1].tgsi;
323      }
324      break;
325/*   case D3D10_SB_NAME_CULL_DISTANCE:
326      if (sx->cull_distance_mapping[0].d3d == operand->base.index[0].imm) {
327         idx = sx->cull_distance_mapping[0].tgsi;
328      } else {
329         assert(sx->cull_distance_mapping[1].d3d == operand->base.index[0].imm);
330         idx = sx->cull_distance_mapping[1].tgsi;
331      }
332      break;*/
333   default:
334      idx = 0;
335   }
336   return idx;
337}
338
339static enum tgsi_return_type
340trans_dcl_ret_type(D3D10_SB_RESOURCE_RETURN_TYPE d3drettype) {
341   switch (d3drettype) {
342   case D3D10_SB_RETURN_TYPE_UNORM:
343      return TGSI_RETURN_TYPE_UNORM;
344   case D3D10_SB_RETURN_TYPE_SNORM:
345      return TGSI_RETURN_TYPE_SNORM;
346   case D3D10_SB_RETURN_TYPE_SINT:
347      return TGSI_RETURN_TYPE_SINT;
348   case D3D10_SB_RETURN_TYPE_UINT:
349      return TGSI_RETURN_TYPE_UINT;
350   case D3D10_SB_RETURN_TYPE_FLOAT:
351      return TGSI_RETURN_TYPE_FLOAT;
352   case D3D10_SB_RETURN_TYPE_MIXED:
353   default:
354      LOG_UNSUPPORTED(TRUE);
355      return TGSI_RETURN_TYPE_FLOAT;
356   }
357}
358
359static void
360declare_vertices_in(struct Shader_xlate *sx,
361                    unsigned in)
362{
363   /* Make sure vertices_in is consistent with input primitive
364    * and other input declarations.
365    */
366   if (sx->vertices_in) {
367      assert(sx->vertices_in == in);
368   } else {
369      sx->vertices_in = in;
370   }
371}
372
373struct swizzle_mapping {
374   unsigned x;
375   unsigned y;
376   unsigned z;
377   unsigned w;
378};
379
380/* mapping of writmask to swizzles */
381static const struct swizzle_mapping writemask_to_swizzle[] = {
382   { TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X }, //TGSI_WRITEMASK_NONE
383   { TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X }, //TGSI_WRITEMASK_X
384   { TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y }, //TGSI_WRITEMASK_Y
385   { TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y }, //TGSI_WRITEMASK_XY
386   { TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z }, //TGSI_WRITEMASK_Z
387   { TGSI_SWIZZLE_X, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X, TGSI_SWIZZLE_Z }, //TGSI_WRITEMASK_XZ
388   { TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z }, //TGSI_WRITEMASK_YZ
389   { TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X }, //TGSI_WRITEMASK_XYZ
390   { TGSI_SWIZZLE_W, TGSI_SWIZZLE_W, TGSI_SWIZZLE_W, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_W
391   { TGSI_SWIZZLE_X, TGSI_SWIZZLE_W, TGSI_SWIZZLE_X, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_XW
392   { TGSI_SWIZZLE_Y, TGSI_SWIZZLE_W, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_YW
393   { TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_W, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_XYW
394   { TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_ZW
395   { TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_XZW
396   { TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_YZW
397   { TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_XYZW
398};
399
400static struct ureg_src
401swizzle_reg(struct ureg_src src, uint writemask,
402            unsigned siv_name)
403{
404   switch (siv_name) {
405   case D3D10_SB_NAME_PRIMITIVE_ID:
406   case D3D10_SB_NAME_INSTANCE_ID:
407   case D3D10_SB_NAME_VERTEX_ID:
408   case D3D10_SB_NAME_VIEWPORT_ARRAY_INDEX:
409   case D3D10_SB_NAME_RENDER_TARGET_ARRAY_INDEX:
410   case D3D10_SB_NAME_IS_FRONT_FACE:
411      return ureg_scalar(src, TGSI_SWIZZLE_X);
412   default: {
413      const struct swizzle_mapping *swizzle =
414         &writemask_to_swizzle[writemask];
415      return ureg_swizzle(src, swizzle->x, swizzle->y,
416                          swizzle->z, swizzle->w);
417   }
418   }
419}
420
421static void
422dcl_base_output(struct Shader_xlate *sx,
423                struct ureg_program *ureg,
424                struct ureg_dst reg,
425                const struct Shader_dst_operand *operand)
426{
427   unsigned writemask =
428      operand->mask >> D3D10_SB_OPERAND_4_COMPONENT_MASK_SHIFT;
429   unsigned idx = operand->base.index[0].imm;
430   unsigned i;
431
432   if (!writemask) {
433      sx->outputs[idx].reg[0] = reg;
434      sx->outputs[idx].reg[1] = reg;
435      sx->outputs[idx].reg[2] = reg;
436      sx->outputs[idx].reg[3] = reg;
437      return;
438   }
439
440   for (i = 0; i < 4; ++i) {
441      unsigned mask = 1 << i;
442      if ((writemask & mask)) {
443         sx->outputs[idx].reg[i] = reg;
444      }
445   }
446}
447
448static void
449dcl_base_input(struct Shader_xlate *sx,
450               struct ureg_program *ureg,
451               const struct Shader_dst_operand *operand,
452               struct ureg_src dcl_reg,
453               uint index,
454               uint siv_name)
455{
456   unsigned writemask =
457      operand->mask >> D3D10_SB_OPERAND_4_COMPONENT_MASK_SHIFT;
458
459   if (sx->inputs[index].declared && !sx->inputs[index].overloaded) {
460      struct ureg_dst temp = ureg_DECL_temporary(sx->ureg);
461
462      ureg_MOV(ureg,
463               ureg_writemask(temp, sx->inputs[index].writemask),
464               swizzle_reg(sx->inputs[index].reg, sx->inputs[index].writemask,
465                           sx->inputs[index].siv_name));
466      ureg_MOV(ureg, ureg_writemask(temp, writemask),
467               swizzle_reg(dcl_reg, writemask, siv_name));
468      sx->inputs[index].reg = ureg_src(temp);
469      sx->inputs[index].overloaded = TRUE;
470      sx->inputs[index].writemask |= writemask;
471   } else if (sx->inputs[index].overloaded) {
472      struct ureg_dst temp = ureg_dst(sx->inputs[index].reg);
473      ureg_MOV(ureg, ureg_writemask(temp, writemask),
474               swizzle_reg(dcl_reg, writemask, siv_name));
475      sx->inputs[index].writemask |= writemask;
476   } else {
477      assert(!sx->inputs[index].declared);
478
479      sx->inputs[index].reg = dcl_reg;
480      sx->inputs[index].declared = TRUE;
481      sx->inputs[index].writemask = writemask;
482      sx->inputs[index].siv_name = siv_name;
483   }
484}
485
486static void
487dcl_vs_input(struct Shader_xlate *sx,
488             struct ureg_program *ureg,
489             const struct Shader_dst_operand *dst)
490{
491   struct ureg_src reg;
492   assert(dst->base.index_dim == 1);
493   assert(dst->base.index[0].imm < SHADER_MAX_INPUTS);
494
495   reg = ureg_DECL_vs_input(ureg, dst->base.index[0].imm);
496
497   dcl_base_input(sx, ureg, dst, reg, dst->base.index[0].imm,
498                  D3D10_SB_NAME_UNDEFINED);
499}
500
501static void
502dcl_gs_input(struct Shader_xlate *sx,
503             struct ureg_program *ureg,
504             const struct Shader_dst_operand *dst)
505{
506   if (dst->base.index_dim == 2) {
507      assert(dst->base.index[1].imm < SHADER_MAX_INPUTS);
508
509      declare_vertices_in(sx, dst->base.index[0].imm);
510
511      /* XXX: Implement declaration masks in gallium.
512       */
513      if (!sx->inputs[dst->base.index[1].imm].reg.File) {
514         struct ureg_src reg =
515            ureg_DECL_input(ureg,
516                            TGSI_SEMANTIC_GENERIC,
517                            dst->base.index[1].imm,
518                            0, 1);
519         dcl_base_input(sx, ureg, dst, reg, dst->base.index[1].imm,
520                        D3D10_SB_NAME_UNDEFINED);
521      }
522   } else {
523      assert(dst->base.type == D3D10_SB_OPERAND_TYPE_INPUT_PRIMITIVEID);
524      assert(dst->base.index_dim == 0);
525
526      sx->prim_id = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_PRIMID, 0);
527   }
528}
529
530static void
531dcl_sgv_input(struct Shader_xlate *sx,
532              struct ureg_program *ureg,
533              const struct Shader_dst_operand *dst,
534              uint dcl_siv_name)
535{
536   struct ureg_src reg;
537   assert(dst->base.index_dim == 1);
538   assert(dst->base.index[0].imm < SHADER_MAX_INPUTS);
539
540   reg = ureg_DECL_system_value(ureg, translate_system_name(dcl_siv_name), 0);
541
542   dcl_base_input(sx, ureg, dst, reg, dst->base.index[0].imm,
543                  dcl_siv_name);
544}
545
546static void
547dcl_siv_input(struct Shader_xlate *sx,
548              struct ureg_program *ureg,
549              const struct Shader_dst_operand *dst,
550              uint dcl_siv_name)
551{
552   struct ureg_src reg;
553   assert(dst->base.index_dim == 2);
554   assert(dst->base.index[1].imm < SHADER_MAX_INPUTS);
555
556   declare_vertices_in(sx, dst->base.index[0].imm);
557
558   reg = ureg_DECL_input(ureg,
559                         translate_system_name(dcl_siv_name), 0,
560                         0, 1);
561
562   dcl_base_input(sx, ureg, dst, reg, dst->base.index[1].imm,
563                  dcl_siv_name);
564}
565
566static void
567dcl_ps_input(struct Shader_xlate *sx,
568             struct ureg_program *ureg,
569             const struct Shader_dst_operand *dst,
570             uint dcl_in_ps_interp)
571{
572   struct ureg_src reg;
573   assert(dst->base.index_dim == 1);
574   assert(dst->base.index[0].imm < SHADER_MAX_INPUTS);
575
576   reg = ureg_DECL_fs_input(ureg,
577                            TGSI_SEMANTIC_GENERIC,
578                            dst->base.index[0].imm,
579                            translate_interpolation(dcl_in_ps_interp));
580
581   dcl_base_input(sx, ureg, dst, reg, dst->base.index[0].imm,
582                  D3D10_SB_NAME_UNDEFINED);
583}
584
585static void
586dcl_ps_sgv_input(struct Shader_xlate *sx,
587                 struct ureg_program *ureg,
588                 const struct Shader_dst_operand *dst,
589                 uint dcl_siv_name)
590{
591   struct ureg_src reg;
592   assert(dst->base.index_dim == 1);
593   assert(dst->base.index[0].imm < SHADER_MAX_INPUTS);
594
595   if (dcl_siv_name == D3D10_SB_NAME_POSITION) {
596      ureg_property(ureg,
597                    TGSI_PROPERTY_FS_COORD_ORIGIN,
598                    TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
599      ureg_property(ureg,
600                    TGSI_PROPERTY_FS_COORD_PIXEL_CENTER,
601                    TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER);
602   }
603
604   reg = ureg_DECL_fs_input(ureg,
605                            translate_system_name(dcl_siv_name),
606                            0,
607                            TGSI_INTERPOLATE_CONSTANT);
608
609   if (dcl_siv_name == D3D10_SB_NAME_IS_FRONT_FACE) {
610      /* We need to map gallium's front_face to the one expected
611       * by D3D10 */
612      struct ureg_dst tmp = ureg_DECL_temporary(ureg);
613
614      tmp = ureg_writemask(tmp, TGSI_WRITEMASK_X);
615
616      ureg_CMP(ureg, tmp, reg,
617               ureg_imm1i(ureg, 0), ureg_imm1i(ureg, -1));
618
619      reg = ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X);
620   }
621
622   dcl_base_input(sx, ureg, dst, reg, dst->base.index[0].imm,
623                  dcl_siv_name);
624}
625
626static void
627dcl_ps_siv_input(struct Shader_xlate *sx,
628                 struct ureg_program *ureg,
629                 const struct Shader_dst_operand *dst,
630                 uint dcl_siv_name, uint dcl_in_ps_interp)
631{
632   struct ureg_src reg;
633   assert(dst->base.index_dim == 1);
634   assert(dst->base.index[0].imm < SHADER_MAX_INPUTS);
635
636   reg = ureg_DECL_fs_input(ureg,
637                            translate_system_name(dcl_siv_name),
638                            0,
639                            translate_interpolation(dcl_in_ps_interp));
640
641   if (dcl_siv_name == D3D10_SB_NAME_POSITION) {
642      /* D3D10 expects reciprocal of interpolated 1/w as 4th component,
643       * gallium/GL just interpolated 1/w */
644      struct ureg_dst tmp = ureg_DECL_temporary(ureg);
645
646      ureg_MOV(ureg, tmp, reg);
647      ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W),
648               ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_W));
649      reg = ureg_src(tmp);
650   }
651
652   dcl_base_input(sx, ureg, dst, reg, dst->base.index[0].imm,
653                  dcl_siv_name);
654}
655
656static struct ureg_src
657translate_relative_operand(struct Shader_xlate *sx,
658                           const struct Shader_relative_operand *operand)
659{
660   struct ureg_src reg;
661
662   switch (operand->type) {
663   case D3D10_SB_OPERAND_TYPE_TEMP:
664      assert(operand->index[0].imm < SHADER_MAX_TEMPS);
665
666      reg = ureg_src(sx->temps[sx->temp_offset + operand->index[0].imm]);
667      break;
668
669   case D3D10_SB_OPERAND_TYPE_INPUT_PRIMITIVEID:
670      reg = sx->prim_id;
671      break;
672
673   case D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP:
674      assert(operand->index[1].imm < SHADER_MAX_TEMPS);
675
676      reg = ureg_src(sx->temps[sx->indexable_temp_offsets[operand->index[0].imm] +
677            operand->index[1].imm]);
678      break;
679
680   case D3D10_SB_OPERAND_TYPE_INPUT:
681   case D3D10_SB_OPERAND_TYPE_OUTPUT:
682   case D3D10_SB_OPERAND_TYPE_IMMEDIATE32:
683   case D3D10_SB_OPERAND_TYPE_IMMEDIATE64:
684   case D3D10_SB_OPERAND_TYPE_SAMPLER:
685   case D3D10_SB_OPERAND_TYPE_RESOURCE:
686   case D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER:
687   case D3D10_SB_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER:
688   case D3D10_SB_OPERAND_TYPE_LABEL:
689   case D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH:
690   case D3D10_SB_OPERAND_TYPE_NULL:
691   case D3D10_SB_OPERAND_TYPE_RASTERIZER:
692   case D3D10_SB_OPERAND_TYPE_OUTPUT_COVERAGE_MASK:
693      LOG_UNSUPPORTED(TRUE);
694      reg = ureg_src(ureg_DECL_temporary(sx->ureg));
695      break;
696
697   default:
698      assert(0);                /* should never happen */
699      reg = ureg_src(ureg_DECL_temporary(sx->ureg));
700   }
701
702   reg = ureg_scalar(reg, operand->comp);
703   return reg;
704}
705
706static struct ureg_dst
707translate_operand(struct Shader_xlate *sx,
708                  const struct Shader_operand *operand,
709                  unsigned writemask)
710{
711   struct ureg_dst reg;
712
713   switch (operand->type) {
714   case D3D10_SB_OPERAND_TYPE_TEMP:
715      assert(operand->index_dim == 1);
716      assert(operand->index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32);
717      assert(operand->index[0].imm < SHADER_MAX_TEMPS);
718
719      reg = sx->temps[sx->temp_offset + operand->index[0].imm];
720      break;
721
722   case D3D10_SB_OPERAND_TYPE_OUTPUT:
723      assert(operand->index_dim == 1);
724      assert(operand->index[0].imm < SHADER_MAX_OUTPUTS);
725
726      if (operand->index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32) {
727         if (!writemask) {
728            reg = sx->outputs[operand->index[0].imm].reg[0];
729         } else {
730            unsigned i;
731            for (i = 0; i < 4; ++i) {
732               unsigned mask = 1 << i;
733               if ((writemask & mask)) {
734                  reg = sx->outputs[operand->index[0].imm].reg[i];
735                  break;
736               }
737            }
738         }
739      } else {
740         struct ureg_src addr =
741            translate_relative_operand(sx, &operand->index[0].rel);
742         assert(operand->index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE);
743         reg = ureg_dst_indirect(sx->outputs[operand->index[0].imm].reg[0], addr);
744      }
745      break;
746
747   case D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH:
748      assert(operand->index_dim == 0);
749
750      reg = sx->output_depth;
751      break;
752
753   case D3D10_SB_OPERAND_TYPE_INPUT_PRIMITIVEID:
754      assert(operand->index_dim == 0);
755
756      reg = ureg_dst(sx->prim_id);
757      break;
758
759   case D3D10_SB_OPERAND_TYPE_INPUT:
760   case D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP:
761   case D3D10_SB_OPERAND_TYPE_IMMEDIATE32:
762   case D3D10_SB_OPERAND_TYPE_IMMEDIATE64:
763   case D3D10_SB_OPERAND_TYPE_SAMPLER:
764   case D3D10_SB_OPERAND_TYPE_RESOURCE:
765   case D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER:
766   case D3D10_SB_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER:
767   case D3D10_SB_OPERAND_TYPE_LABEL:
768   case D3D10_SB_OPERAND_TYPE_NULL:
769   case D3D10_SB_OPERAND_TYPE_RASTERIZER:
770   case D3D10_SB_OPERAND_TYPE_OUTPUT_COVERAGE_MASK:
771      /* XXX: Translate more operands types.
772       */
773      LOG_UNSUPPORTED(TRUE);
774      reg = ureg_DECL_temporary(sx->ureg);
775   }
776
777   return reg;
778}
779
780static struct ureg_src
781translate_indexable_temp(struct Shader_xlate *sx,
782                         const struct Shader_operand *operand)
783{
784   struct ureg_src reg;
785   switch (operand->index[1].index_rep) {
786   case D3D10_SB_OPERAND_INDEX_IMMEDIATE32:
787      reg = ureg_src(
788         sx->temps[sx->indexable_temp_offsets[operand->index[0].imm] +
789                   operand->index[1].imm]);
790      break;
791   case D3D10_SB_OPERAND_INDEX_RELATIVE:
792      reg = ureg_src_indirect(
793         ureg_src(sx->temps[
794                     sx->indexable_temp_offsets[operand->index[0].imm]]),
795         translate_relative_operand(sx,
796                                    &operand->index[1].rel));
797      break;
798   case D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE:
799      reg = ureg_src_indirect(
800         ureg_src(sx->temps[
801                     operand->index[1].imm +
802                     sx->indexable_temp_offsets[operand->index[0].imm]]),
803         translate_relative_operand(sx,
804                                    &operand->index[1].rel));
805      break;
806   default:
807      /* XXX: Other index representations.
808       */
809      LOG_UNSUPPORTED(TRUE);
810      reg = ureg_src(ureg_DECL_temporary(sx->ureg));
811   }
812   return reg;
813}
814
815static struct ureg_dst
816translate_dst_operand(struct Shader_xlate *sx,
817                      const struct Shader_dst_operand *operand,
818                      boolean saturate)
819{
820   struct ureg_dst reg;
821   unsigned writemask =
822      operand->mask >> D3D10_SB_OPERAND_4_COMPONENT_MASK_SHIFT;
823
824   assert((D3D10_SB_OPERAND_4_COMPONENT_MASK_SHIFT) == 4);
825   assert((D3D10_SB_OPERAND_4_COMPONENT_MASK_X >> 4) == TGSI_WRITEMASK_X);
826   assert((D3D10_SB_OPERAND_4_COMPONENT_MASK_Y >> 4) == TGSI_WRITEMASK_Y);
827   assert((D3D10_SB_OPERAND_4_COMPONENT_MASK_Z >> 4) == TGSI_WRITEMASK_Z);
828   assert((D3D10_SB_OPERAND_4_COMPONENT_MASK_W >> 4) == TGSI_WRITEMASK_W);
829
830   switch (operand->base.type) {
831   case D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP:
832      assert(operand->base.index_dim == 2);
833      assert(operand->base.index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32);
834      assert(operand->base.index[0].imm < SHADER_MAX_INDEXABLE_TEMPS);
835
836      reg = ureg_dst(translate_indexable_temp(sx, &operand->base));
837      break;
838
839   default:
840      reg = translate_operand(sx, &operand->base, writemask);
841   }
842
843   /* oDepth often has an empty writemask */
844   if (operand->base.type != D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH) {
845      reg = ureg_writemask(reg, writemask);
846   }
847
848   if (saturate) {
849      reg = ureg_saturate(reg);
850   }
851
852   return reg;
853}
854
855static struct ureg_src
856translate_src_operand(struct Shader_xlate *sx,
857                      const struct Shader_src_operand *operand,
858                      const enum dx10_opcode_format format)
859{
860   struct ureg_src reg;
861
862   switch (operand->base.type) {
863   case D3D10_SB_OPERAND_TYPE_INPUT:
864      if (operand->base.index_dim == 1) {
865         switch (operand->base.index[0].index_rep) {
866         case D3D10_SB_OPERAND_INDEX_IMMEDIATE32:
867            assert(operand->base.index[0].imm < SHADER_MAX_INPUTS);
868            reg = sx->inputs[operand->base.index[0].imm].reg;
869            break;
870         case D3D10_SB_OPERAND_INDEX_RELATIVE: {
871            struct ureg_src tmp =
872               translate_relative_operand(sx, &operand->base.index[0].rel);
873            reg = ureg_src_indirect(sx->inputs[0].reg, tmp);
874         }
875            break;
876         case D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: {
877            struct ureg_src tmp =
878               translate_relative_operand(sx, &operand->base.index[0].rel);
879            reg = ureg_src_indirect(sx->inputs[operand->base.index[0].imm].reg, tmp);
880         }
881            break;
882         default:
883            /* XXX: Other index representations.
884             */
885            LOG_UNSUPPORTED(TRUE);
886
887         }
888      } else {
889         assert(operand->base.index_dim == 2);
890         assert(operand->base.index[1].imm < SHADER_MAX_INPUTS);
891
892         switch (operand->base.index[1].index_rep) {
893         case D3D10_SB_OPERAND_INDEX_IMMEDIATE32:
894            reg = sx->inputs[operand->base.index[1].imm].reg;
895            break;
896         case D3D10_SB_OPERAND_INDEX_RELATIVE: {
897            struct ureg_src tmp =
898               translate_relative_operand(sx, &operand->base.index[1].rel);
899            reg = ureg_src_indirect(sx->inputs[0].reg, tmp);
900         }
901            break;
902         case D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: {
903            struct ureg_src tmp =
904               translate_relative_operand(sx, &operand->base.index[1].rel);
905            reg = ureg_src_indirect(sx->inputs[operand->base.index[1].imm].reg, tmp);
906         }
907            break;
908         default:
909            /* XXX: Other index representations.
910             */
911            LOG_UNSUPPORTED(TRUE);
912         }
913
914         switch (operand->base.index[0].index_rep) {
915         case D3D10_SB_OPERAND_INDEX_IMMEDIATE32:
916            reg = ureg_src_dimension(reg, operand->base.index[0].imm);
917            break;
918         case D3D10_SB_OPERAND_INDEX_RELATIVE:{
919            struct ureg_src tmp =
920               translate_relative_operand(sx, &operand->base.index[0].rel);
921            reg = ureg_src_dimension_indirect(reg, tmp, 0);
922         }
923            break;
924         case D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: {
925            struct ureg_src tmp =
926               translate_relative_operand(sx, &operand->base.index[0].rel);
927            reg = ureg_src_dimension_indirect(reg, tmp, operand->base.index[0].imm);
928         }
929            break;
930         default:
931            /* XXX: Other index representations.
932             */
933            LOG_UNSUPPORTED(TRUE);
934         }
935      }
936      break;
937
938   case D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP:
939      assert(operand->base.index_dim == 2);
940      assert(operand->base.index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32);
941      assert(operand->base.index[0].imm < SHADER_MAX_INDEXABLE_TEMPS);
942
943      reg = translate_indexable_temp(sx, &operand->base);
944      break;
945
946   case D3D10_SB_OPERAND_TYPE_IMMEDIATE32:
947      switch (format) {
948      case OF_FLOAT:
949         reg = ureg_imm4f(sx->ureg,
950                          operand->imm[0].f32,
951                          operand->imm[1].f32,
952                          operand->imm[2].f32,
953                          operand->imm[3].f32);
954         break;
955      case OF_INT:
956         reg = ureg_imm4i(sx->ureg,
957                          operand->imm[0].i32,
958                          operand->imm[1].i32,
959                          operand->imm[2].i32,
960                          operand->imm[3].i32);
961         break;
962      case OF_UINT:
963         reg = ureg_imm4u(sx->ureg,
964                          operand->imm[0].u32,
965                          operand->imm[1].u32,
966                          operand->imm[2].u32,
967                          operand->imm[3].u32);
968         break;
969      default:
970         assert(0);
971         reg = ureg_src(ureg_DECL_temporary(sx->ureg));
972      }
973      break;
974
975   case D3D10_SB_OPERAND_TYPE_SAMPLER:
976      assert(operand->base.index_dim == 1);
977      assert(operand->base.index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32);
978      assert(operand->base.index[0].imm < SHADER_MAX_SAMPLERS);
979
980      reg = sx->samplers[operand->base.index[0].imm];
981      break;
982
983   case D3D10_SB_OPERAND_TYPE_RESOURCE:
984      assert(operand->base.index_dim == 1);
985      assert(operand->base.index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32);
986      assert(operand->base.index[0].imm < SHADER_MAX_RESOURCES);
987
988      reg = sx->sv[operand->base.index[0].imm];
989      break;
990
991   case D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER:
992      assert(operand->base.index_dim == 2);
993
994      assert(operand->base.index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32);
995      assert(operand->base.index[0].imm < PIPE_MAX_CONSTANT_BUFFERS);
996
997      switch (operand->base.index[1].index_rep) {
998      case D3D10_SB_OPERAND_INDEX_IMMEDIATE32:
999         assert(operand->base.index[1].imm < SHADER_MAX_CONSTS);
1000
1001         reg = ureg_src_register(TGSI_FILE_CONSTANT, operand->base.index[1].imm);
1002         reg = ureg_src_dimension(reg, operand->base.index[0].imm);
1003         break;
1004      case D3D10_SB_OPERAND_INDEX_RELATIVE:
1005      case D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE:
1006         reg = ureg_src_register(TGSI_FILE_CONSTANT, operand->base.index[1].imm);
1007         reg = ureg_src_indirect(
1008            reg,
1009            translate_relative_operand(sx, &operand->base.index[1].rel));
1010         reg = ureg_src_dimension(reg, operand->base.index[0].imm);
1011         break;
1012      default:
1013         /* XXX: Other index representations.
1014          */
1015         LOG_UNSUPPORTED(TRUE);
1016      }
1017
1018      break;
1019
1020   case D3D10_SB_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER:
1021      assert(operand->base.index_dim == 1);
1022
1023      switch (operand->base.index[0].index_rep) {
1024      case D3D10_SB_OPERAND_INDEX_IMMEDIATE32:
1025         reg = sx->imms;
1026         reg.Index += operand->base.index[0].imm;
1027         break;
1028      case D3D10_SB_OPERAND_INDEX_RELATIVE:
1029      case D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE:
1030         reg = sx->imms;
1031         reg.Index += operand->base.index[0].imm;
1032         reg = ureg_src_indirect(
1033            sx->imms,
1034            translate_relative_operand(sx, &operand->base.index[0].rel));
1035         break;
1036      default:
1037         /* XXX: Other index representations.
1038          */
1039         LOG_UNSUPPORTED(TRUE);
1040      }
1041      break;
1042
1043   case D3D10_SB_OPERAND_TYPE_INPUT_PRIMITIVEID:
1044      reg = sx->prim_id;
1045      break;
1046
1047   default:
1048      reg = ureg_src(translate_operand(sx, &operand->base, 0));
1049   }
1050
1051   reg = ureg_swizzle(reg,
1052                      operand->swizzle[0],
1053                      operand->swizzle[1],
1054                      operand->swizzle[2],
1055                      operand->swizzle[3]);
1056
1057   switch (operand->modifier) {
1058   case D3D10_SB_OPERAND_MODIFIER_NONE:
1059      break;
1060   case D3D10_SB_OPERAND_MODIFIER_NEG:
1061      reg = ureg_negate(reg);
1062      break;
1063   case D3D10_SB_OPERAND_MODIFIER_ABS:
1064      reg = ureg_abs(reg);
1065      break;
1066   case D3D10_SB_OPERAND_MODIFIER_ABSNEG:
1067      reg = ureg_negate(ureg_abs(reg));
1068      break;
1069   default:
1070      assert(0);
1071   }
1072
1073   return reg;
1074}
1075
1076static uint
1077translate_resource_dimension(D3D10_SB_RESOURCE_DIMENSION dim)
1078{
1079   switch (dim) {
1080   case D3D10_SB_RESOURCE_DIMENSION_UNKNOWN:
1081      return TGSI_TEXTURE_UNKNOWN;
1082   case D3D10_SB_RESOURCE_DIMENSION_BUFFER:
1083      return TGSI_TEXTURE_BUFFER;
1084   case D3D10_SB_RESOURCE_DIMENSION_TEXTURE1D:
1085      return TGSI_TEXTURE_1D;
1086   case D3D10_SB_RESOURCE_DIMENSION_TEXTURE2D:
1087      return TGSI_TEXTURE_2D;
1088   case D3D10_SB_RESOURCE_DIMENSION_TEXTURE2DMS:
1089      return TGSI_TEXTURE_2D_MSAA;
1090   case D3D10_SB_RESOURCE_DIMENSION_TEXTURE3D:
1091      return TGSI_TEXTURE_3D;
1092   case D3D10_SB_RESOURCE_DIMENSION_TEXTURECUBE:
1093      return TGSI_TEXTURE_CUBE;
1094   case D3D10_SB_RESOURCE_DIMENSION_TEXTURE1DARRAY:
1095      return TGSI_TEXTURE_1D_ARRAY;
1096   case D3D10_SB_RESOURCE_DIMENSION_TEXTURE2DARRAY:
1097      return TGSI_TEXTURE_2D_ARRAY;
1098   case D3D10_SB_RESOURCE_DIMENSION_TEXTURE2DMSARRAY:
1099      return TGSI_TEXTURE_2D_ARRAY_MSAA;
1100   case D3D10_SB_RESOURCE_DIMENSION_TEXTURECUBEARRAY:
1101      return TGSI_TEXTURE_CUBE_ARRAY;
1102   default:
1103      assert(0);
1104      return TGSI_TEXTURE_UNKNOWN;
1105   }
1106}
1107
1108static uint
1109texture_dim_from_tgsi_target(unsigned tgsi_target)
1110{
1111   switch (tgsi_target) {
1112   case TGSI_TEXTURE_BUFFER:
1113   case TGSI_TEXTURE_1D:
1114   case TGSI_TEXTURE_1D_ARRAY:
1115      return 1;
1116   case TGSI_TEXTURE_2D:
1117   case TGSI_TEXTURE_2D_MSAA:
1118   case TGSI_TEXTURE_CUBE:
1119   case TGSI_TEXTURE_2D_ARRAY:
1120   case TGSI_TEXTURE_2D_ARRAY_MSAA:
1121      return 2;
1122   case TGSI_TEXTURE_3D:
1123      return 3;
1124   case TGSI_TEXTURE_UNKNOWN:
1125   default:
1126      assert(0);
1127      return 1;
1128   }
1129}
1130
1131static boolean
1132operand_is_scalar(const struct Shader_src_operand *operand)
1133{
1134   return operand->swizzle[0] == operand->swizzle[1] &&
1135          operand->swizzle[1] == operand->swizzle[2] &&
1136          operand->swizzle[2] == operand->swizzle[3];
1137}
1138
1139static void
1140Shader_add_call(struct Shader_xlate *sx,
1141                unsigned d3d_label,
1142                unsigned tgsi_label_token)
1143{
1144   ASSERT(sx->num_calls < sx->max_calls);
1145
1146   sx->calls[sx->num_calls].d3d_label = d3d_label;
1147   sx->calls[sx->num_calls].tgsi_label_token = tgsi_label_token;
1148   sx->num_calls++;
1149}
1150
1151static void
1152Shader_add_label(struct Shader_xlate *sx,
1153                 unsigned d3d_label,
1154                 unsigned tgsi_insn_no)
1155{
1156   ASSERT(sx->num_labels < sx->max_labels);
1157
1158   sx->labels[sx->num_labels].d3d_label = d3d_label;
1159   sx->labels[sx->num_labels].tgsi_insn_no = tgsi_insn_no;
1160   sx->num_labels++;
1161}
1162
1163
1164static void
1165sample_ureg_emit(struct ureg_program *ureg,
1166                 unsigned tgsi_opcode,
1167                 unsigned num_src,
1168                 struct Shader_opcode *opcode,
1169                 struct ureg_dst dst,
1170                 struct ureg_src *src)
1171{
1172   unsigned num_offsets = 0;
1173   struct tgsi_texture_offset texoffsets;
1174
1175   memset(&texoffsets, 0, sizeof texoffsets);
1176
1177   if (opcode->imm_texel_offset.u ||
1178       opcode->imm_texel_offset.v ||
1179       opcode->imm_texel_offset.w) {
1180      struct ureg_src offsetreg;
1181      num_offsets = 1;
1182      /* don't actually always need all 3 values */
1183      offsetreg = ureg_imm3i(ureg,
1184                             opcode->imm_texel_offset.u,
1185                             opcode->imm_texel_offset.v,
1186                             opcode->imm_texel_offset.w);
1187      texoffsets.File = offsetreg.File;
1188      texoffsets.Index = offsetreg.Index;
1189      texoffsets.SwizzleX = offsetreg.SwizzleX;
1190      texoffsets.SwizzleY = offsetreg.SwizzleY;
1191      texoffsets.SwizzleZ = offsetreg.SwizzleZ;
1192   }
1193
1194   ureg_tex_insn(ureg,
1195                 tgsi_opcode,
1196                 &dst, 1,
1197                 TGSI_TEXTURE_UNKNOWN,
1198                 TGSI_RETURN_TYPE_UNKNOWN,
1199                 &texoffsets, num_offsets,
1200                 src, num_src);
1201}
1202
1203typedef void (*unary_ureg_func)(struct ureg_program *ureg, struct ureg_dst dst,
1204                                struct ureg_src src);
1205static void
1206expand_unary_to_scalarf(struct ureg_program *ureg, unary_ureg_func func,
1207                        struct Shader_xlate *sx, struct Shader_opcode *opcode)
1208{
1209   struct ureg_dst tmp = ureg_DECL_temporary(ureg);
1210   struct ureg_dst dst = translate_dst_operand(sx, &opcode->dst[0],
1211                                               opcode->saturate);
1212   struct ureg_src src = translate_src_operand(sx, &opcode->src[0], OF_FLOAT);
1213   struct ureg_dst scalar_dst;
1214   ureg_MOV(ureg, tmp, src);
1215   src = ureg_src(tmp);
1216
1217   scalar_dst = ureg_writemask(dst, TGSI_WRITEMASK_X);
1218   if (scalar_dst.WriteMask != TGSI_WRITEMASK_NONE) {
1219      func(ureg, scalar_dst,
1220           ureg_scalar(src, TGSI_SWIZZLE_X));
1221   }
1222   scalar_dst = ureg_writemask(dst, TGSI_WRITEMASK_Y);
1223   if (scalar_dst.WriteMask != TGSI_WRITEMASK_NONE) {
1224      func(ureg, scalar_dst,
1225           ureg_scalar(src, TGSI_SWIZZLE_Y));
1226   }
1227   scalar_dst = ureg_writemask(dst, TGSI_WRITEMASK_Z);
1228   if (scalar_dst.WriteMask != TGSI_WRITEMASK_NONE) {
1229      func(ureg, scalar_dst,
1230           ureg_scalar(src, TGSI_SWIZZLE_Z));
1231   }
1232   scalar_dst = ureg_writemask(dst, TGSI_WRITEMASK_W);
1233   if (scalar_dst.WriteMask != TGSI_WRITEMASK_NONE) {
1234      func(ureg, scalar_dst,
1235           ureg_scalar(src, TGSI_SWIZZLE_W));
1236   }
1237   ureg_release_temporary(ureg, tmp);
1238}
1239
1240const struct tgsi_token *
1241Shader_tgsi_translate(const unsigned *code,
1242                      unsigned *output_mapping)
1243{
1244   struct Shader_xlate sx;
1245   struct Shader_parser parser;
1246   struct ureg_program *ureg = NULL;
1247   struct Shader_opcode opcode;
1248   const struct tgsi_token *tokens = NULL;
1249   uint nr_tokens;
1250   boolean shader_dumped = FALSE;
1251   boolean inside_sub = FALSE;
1252   uint i, j;
1253
1254   memset(&sx, 0, sizeof sx);
1255
1256   Shader_parse_init(&parser, code);
1257
1258   if (st_debug & ST_DEBUG_TGSI) {
1259      dx10_shader_dump_tokens(code);
1260      shader_dumped = TRUE;
1261   }
1262
1263   sx.max_calls = 64;
1264   sx.calls = (struct Shader_call *)MALLOC(sx.max_calls *
1265                                           sizeof(struct Shader_call));
1266   sx.num_calls = 0;
1267
1268   sx.max_labels = 64;
1269   sx.labels = (struct Shader_label *)MALLOC(sx.max_labels *
1270                                             sizeof(struct Shader_call));
1271   sx.num_labels = 0;
1272
1273
1274
1275   /* Header. */
1276   switch (parser.header.type) {
1277   case D3D10_SB_PIXEL_SHADER:
1278      ureg = ureg_create(PIPE_SHADER_FRAGMENT);
1279      break;
1280   case D3D10_SB_VERTEX_SHADER:
1281      ureg = ureg_create(PIPE_SHADER_VERTEX);
1282      break;
1283   case D3D10_SB_GEOMETRY_SHADER:
1284      ureg = ureg_create(PIPE_SHADER_GEOMETRY);
1285      break;
1286   }
1287
1288   assert(ureg);
1289   sx.ureg = ureg;
1290
1291   while (Shader_parse_opcode(&parser, &opcode)) {
1292      const struct dx10_opcode_xlate *ox;
1293
1294      assert(opcode.type < D3D10_SB_NUM_OPCODES);
1295      ox = &opcode_xlate[opcode.type];
1296
1297      switch (opcode.type) {
1298      case D3D10_SB_OPCODE_EXP:
1299         expand_unary_to_scalarf(ureg, ureg_EX2, &sx, &opcode);
1300         break;
1301      case D3D10_SB_OPCODE_SQRT:
1302         expand_unary_to_scalarf(ureg, ureg_SQRT, &sx, &opcode);
1303         break;
1304      case D3D10_SB_OPCODE_RSQ:
1305         expand_unary_to_scalarf(ureg, ureg_RSQ, &sx, &opcode);
1306         break;
1307      case D3D10_SB_OPCODE_LOG:
1308         expand_unary_to_scalarf(ureg, ureg_LG2, &sx, &opcode);
1309         break;
1310      case D3D10_SB_OPCODE_IMUL:
1311         if (opcode.dst[0].base.type != D3D10_SB_OPERAND_TYPE_NULL) {
1312            ureg_IMUL_HI(ureg,
1313                        translate_dst_operand(&sx, &opcode.dst[0], opcode.saturate),
1314                        translate_src_operand(&sx, &opcode.src[0], OF_INT),
1315                        translate_src_operand(&sx, &opcode.src[1], OF_INT));
1316         }
1317
1318         if (opcode.dst[1].base.type != D3D10_SB_OPERAND_TYPE_NULL) {
1319            ureg_UMUL(ureg,
1320                      translate_dst_operand(&sx, &opcode.dst[1], opcode.saturate),
1321                      translate_src_operand(&sx, &opcode.src[0], OF_INT),
1322                      translate_src_operand(&sx, &opcode.src[1], OF_INT));
1323         }
1324
1325         break;
1326
1327      case D3D10_SB_OPCODE_FTOI: {
1328         /* XXX: tgsi (and just about everybody else, c, opencl, glsl) has
1329          * out-of-range (and NaN) values undefined for f2i/f2u, but d3d10
1330          * requires clamping to min and max representable value (as well as 0
1331          * for NaNs) (this applies to both ftoi and ftou). At least the online
1332          * docs state that - this is consistent with generic d3d10 conversion
1333          * rules.
1334          * For FTOI, we cheat a bit here - in particular depending on noone
1335          * caring about NaNs, and depending on the (undefined!) behavior of
1336          * F2I returning 0x80000000 for too negative values (which works with
1337          * x86 sse). Hence only need to clamp too positive values.
1338          * Note that it is impossible to clamp using a float, since 2^31 - 1
1339          * is not exactly representable with a float.
1340          */
1341         struct ureg_dst too_large = ureg_DECL_temporary(ureg);
1342         struct ureg_dst tmp = ureg_DECL_temporary(ureg);
1343         ureg_FSGE(ureg, too_large,
1344                   translate_src_operand(&sx, &opcode.src[0], OF_FLOAT),
1345                   ureg_imm1f(ureg, 2147483648.0f));
1346         ureg_F2I(ureg, tmp,
1347                  translate_src_operand(&sx, &opcode.src[0], OF_FLOAT));
1348         ureg_UCMP(ureg,
1349                   translate_dst_operand(&sx, &opcode.dst[0], opcode.saturate),
1350                   ureg_src(too_large),
1351                   ureg_imm1i(ureg, 0x7fffffff),
1352                   ureg_src(tmp));
1353         ureg_release_temporary(ureg, too_large);
1354         ureg_release_temporary(ureg, tmp);
1355      }
1356         break;
1357
1358      case D3D10_SB_OPCODE_FTOU: {
1359         /* For ftou, we need to do both clamps, which as a bonus also
1360          * gets us correct NaN behavior.
1361          * Note that it is impossible to clamp using a float against the upper
1362          * limit, since 2^32 - 1 is not exactly representable with a float,
1363          * but the clamp against 0.0 certainly works just fine.
1364          */
1365         struct ureg_dst too_large = ureg_DECL_temporary(ureg);
1366         struct ureg_dst tmp = ureg_DECL_temporary(ureg);
1367         ureg_FSGE(ureg, too_large,
1368                   translate_src_operand(&sx, &opcode.src[0], OF_FLOAT),
1369                   ureg_imm1f(ureg, 4294967296.0f));
1370         /* clamp negative values + NaN to zero.
1371          * (Could be done slightly more efficient in llvmpipe due to
1372          * MAX NaN behavior handling.)
1373          */
1374         ureg_MAX(ureg, tmp,
1375                  ureg_imm1f(ureg, 0.0f),
1376                  translate_src_operand(&sx, &opcode.src[0], OF_FLOAT));
1377         ureg_F2U(ureg, tmp,
1378                  ureg_src(tmp));
1379         ureg_UCMP(ureg,
1380                   translate_dst_operand(&sx, &opcode.dst[0], opcode.saturate),
1381                   ureg_src(too_large),
1382                   ureg_imm1u(ureg, 0xffffffff),
1383                   ureg_src(tmp));
1384         ureg_release_temporary(ureg, too_large);
1385         ureg_release_temporary(ureg, tmp);
1386      }
1387         break;
1388
1389      case D3D10_SB_OPCODE_LD_MS:
1390         /* XXX: We don't support multi-sampling yet, but we need to parse
1391          * this opcode regardless, so we just ignore sample index operand
1392          * for now */
1393      case D3D10_SB_OPCODE_LD:
1394         if (st_debug & ST_DEBUG_OLD_TEX_OPS) {
1395            unsigned resource = opcode.src[1].base.index[0].imm;
1396            assert(opcode.src[1].base.index_dim == 1);
1397            assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES);
1398
1399            if (ureg_src_is_undef(sx.samplers[resource])) {
1400               sx.samplers[resource] =
1401                  ureg_DECL_sampler(ureg, resource);
1402            }
1403
1404            ureg_TXF(ureg,
1405                     translate_dst_operand(&sx, &opcode.dst[0], opcode.saturate),
1406                     sx.resources[resource].target,
1407                     translate_src_operand(&sx, &opcode.src[0], OF_FLOAT),
1408                     sx.samplers[resource]);
1409         }
1410         else {
1411            struct ureg_src srcreg[2];
1412            srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_INT);
1413            srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_INT);
1414
1415            sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE_I, 2, &opcode,
1416                             translate_dst_operand(&sx, &opcode.dst[0],
1417                                                   opcode.saturate),
1418                             srcreg);
1419         }
1420         break;
1421
1422      case D3D10_SB_OPCODE_CUSTOMDATA:
1423         if (opcode.customdata._class ==
1424             D3D10_SB_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER) {
1425            sx.imms =
1426               ureg_DECL_immediate_block_uint(ureg,
1427                                              opcode.customdata.u.constbuf.data,
1428                                              opcode.customdata.u.constbuf.count);
1429         } else {
1430            assert(0);
1431         }
1432         break;
1433
1434      case D3D10_SB_OPCODE_RESINFO:
1435         if (st_debug & ST_DEBUG_OLD_TEX_OPS) {
1436            unsigned resource = opcode.src[1].base.index[0].imm;
1437            assert(opcode.src[1].base.index_dim == 1);
1438            assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES);
1439
1440            if (ureg_src_is_undef(sx.samplers[resource])) {
1441               sx.samplers[resource] =
1442                  ureg_DECL_sampler(ureg, resource);
1443            }
1444            /* don't bother with swizzle, ret type etc. */
1445            ureg_TXQ(ureg,
1446                     translate_dst_operand(&sx, &opcode.dst[0],
1447                                           opcode.saturate),
1448                     sx.resources[resource].target,
1449                     translate_src_operand(&sx, &opcode.src[0], OF_UINT),
1450                     sx.samplers[resource]);
1451         }
1452         else {
1453            struct ureg_dst r0 = ureg_DECL_temporary(ureg);
1454            struct ureg_src tsrc = translate_src_operand(&sx, &opcode.src[1], OF_UINT);
1455            struct ureg_dst dstreg = translate_dst_operand(&sx, &opcode.dst[0],
1456                                                           opcode.saturate);
1457
1458            /* while specs say swizzle is ignored better safe than sorry */
1459            tsrc.SwizzleX = TGSI_SWIZZLE_X;
1460            tsrc.SwizzleY = TGSI_SWIZZLE_Y;
1461            tsrc.SwizzleZ = TGSI_SWIZZLE_Z;
1462            tsrc.SwizzleW = TGSI_SWIZZLE_W;
1463
1464            ureg_SVIEWINFO(ureg, r0,
1465                           translate_src_operand(&sx, &opcode.src[0], OF_UINT),
1466                           tsrc);
1467
1468            tsrc = ureg_src(r0);
1469            tsrc.SwizzleX = opcode.src[1].swizzle[0];
1470            tsrc.SwizzleY = opcode.src[1].swizzle[1];
1471            tsrc.SwizzleZ = opcode.src[1].swizzle[2];
1472            tsrc.SwizzleW = opcode.src[1].swizzle[3];
1473
1474            if (opcode.specific.resinfo_ret_type ==
1475                D3D10_SB_RESINFO_INSTRUCTION_RETURN_UINT) {
1476               ureg_MOV(ureg, dstreg, tsrc);
1477            }
1478            else if (opcode.specific.resinfo_ret_type ==
1479                     D3D10_SB_RESINFO_INSTRUCTION_RETURN_FLOAT) {
1480                ureg_I2F(ureg, dstreg, tsrc);
1481            }
1482            else { /* D3D10_SB_RESINFO_INSTRUCTION_RETURN_RCPFLOAT */
1483               unsigned i;
1484               /*
1485                * Must apply rcp only to parts determined by dims,
1486                * (width/height/depth) but NOT to array size nor mip levels
1487                * hence need to figure that out here.
1488                * This is one sick modifier if you ask me!
1489                */
1490               unsigned res_index = opcode.src[1].base.index[0].imm;
1491               unsigned target = sx.resources[res_index].target;
1492               unsigned dims = texture_dim_from_tgsi_target(target);
1493
1494               ureg_I2F(ureg, r0, ureg_src(r0));
1495               tsrc = ureg_src(r0);
1496               for (i = 0; i < 4; i++) {
1497                  unsigned dst_swizzle = opcode.src[1].swizzle[i];
1498                  struct ureg_dst dstregmasked = ureg_writemask(dstreg, 1 << i);
1499                  /*
1500                   * could do one mov with multiple write mask bits set
1501                   * but rcp is scalar anyway.
1502                   */
1503                  if (dst_swizzle < dims) {
1504                     ureg_RCP(ureg, dstregmasked, ureg_scalar(tsrc, dst_swizzle));
1505                  }
1506                  else {
1507                     ureg_MOV(ureg, dstregmasked, ureg_scalar(tsrc, dst_swizzle));
1508                  }
1509               }
1510            }
1511            ureg_release_temporary(ureg, r0);
1512         }
1513         break;
1514
1515      case D3D10_SB_OPCODE_SAMPLE:
1516         if (st_debug & ST_DEBUG_OLD_TEX_OPS) {
1517            assert(opcode.src[1].base.index_dim == 1);
1518            assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES);
1519
1520            LOG_UNSUPPORTED(opcode.src[1].base.index[0].imm != opcode.src[2].base.index[0].imm);
1521
1522            ureg_TEX(ureg,
1523                     translate_dst_operand(&sx, &opcode.dst[0],
1524                                           opcode.saturate),
1525                     sx.resources[opcode.src[1].base.index[0].imm].target,
1526                     translate_src_operand(&sx, &opcode.src[0], OF_FLOAT),
1527                     translate_src_operand(&sx, &opcode.src[2], OF_FLOAT));
1528         }
1529         else {
1530            struct ureg_src srcreg[3];
1531            srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_FLOAT);
1532            srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_UINT);
1533            srcreg[2] = translate_src_operand(&sx, &opcode.src[2], OF_UINT);
1534
1535            sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE, 3, &opcode,
1536                             translate_dst_operand(&sx, &opcode.dst[0],
1537                                                   opcode.saturate),
1538                             srcreg);
1539         }
1540         break;
1541
1542      case D3D10_SB_OPCODE_SAMPLE_C:
1543         if (st_debug & ST_DEBUG_OLD_TEX_OPS) {
1544            struct ureg_dst r0 = ureg_DECL_temporary(ureg);
1545
1546            /* XXX: Support only 2D texture targets for now.
1547             *      Need to figure out how to pack the compare value
1548             *      for other dimensions and if there is enough space
1549             *      in a single operand for all possible cases.
1550             */
1551            LOG_UNSUPPORTED(sx.resources[opcode.src[1].base.index[0].imm].target !=
1552                            TGSI_TEXTURE_2D);
1553
1554            assert(opcode.src[1].base.index_dim == 1);
1555            assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES);
1556
1557            /* Insert the compare value into .z component.
1558             */
1559            ureg_MOV(ureg,
1560                     ureg_writemask(r0, TGSI_WRITEMASK_XYW),
1561                     translate_src_operand(&sx, &opcode.src[0], OF_FLOAT));
1562            ureg_MOV(ureg,
1563                     ureg_writemask(r0, TGSI_WRITEMASK_Z),
1564                     translate_src_operand(&sx, &opcode.src[3], OF_FLOAT));
1565
1566            /* XXX: Pass explicit Lod=0 in D3D10_SB_OPCODE_SAMPLE_C_LZ case.
1567             */
1568
1569            ureg_TEX(ureg,
1570                     translate_dst_operand(&sx, &opcode.dst[0],
1571                                           opcode.saturate),
1572                     sx.resources[opcode.src[1].base.index[0].imm].target,
1573                     ureg_src(r0),
1574                     translate_src_operand(&sx, &opcode.src[2], OF_FLOAT));
1575
1576            ureg_release_temporary(ureg, r0);
1577         }
1578         else {
1579            struct ureg_src srcreg[4];
1580            srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_FLOAT);
1581            srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_UINT);
1582            srcreg[2] = translate_src_operand(&sx, &opcode.src[2], OF_UINT);
1583            srcreg[3] = translate_src_operand(&sx, &opcode.src[3], OF_FLOAT);
1584
1585            sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE_C, 4, &opcode,
1586                             translate_dst_operand(&sx, &opcode.dst[0],
1587                                                   opcode.saturate),
1588                             srcreg);
1589         }
1590         break;
1591
1592      case D3D10_SB_OPCODE_SAMPLE_C_LZ:
1593         if (st_debug & ST_DEBUG_OLD_TEX_OPS) {
1594            struct ureg_dst r0 = ureg_DECL_temporary(ureg);
1595
1596            assert(opcode.src[1].base.index_dim == 1);
1597            assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES);
1598
1599            /* XXX: Support only 2D texture targets for now.
1600             *      Need to figure out how to pack the compare value
1601             *      for other dimensions and if there is enough space
1602             *      in a single operand for all possible cases.
1603             */
1604            LOG_UNSUPPORTED(sx.resources[opcode.src[1].base.index[0].imm].target !=
1605                            TGSI_TEXTURE_2D);
1606
1607            /* Insert the compare value into .z component.
1608             * Insert 0 into .w component.
1609             */
1610            ureg_MOV(ureg,
1611                     ureg_writemask(r0, TGSI_WRITEMASK_XY),
1612                     translate_src_operand(&sx, &opcode.src[0], OF_FLOAT));
1613            ureg_MOV(ureg,
1614                     ureg_writemask(r0, TGSI_WRITEMASK_Z),
1615                     translate_src_operand(&sx, &opcode.src[3], OF_FLOAT));
1616            ureg_MOV(ureg,
1617                     ureg_writemask(r0, TGSI_WRITEMASK_W),
1618                     ureg_imm1f(ureg, 0.0f));
1619
1620            ureg_TXL(ureg,
1621                     translate_dst_operand(&sx, &opcode.dst[0],
1622                                           opcode.saturate),
1623                     sx.resources[opcode.src[1].base.index[0].imm].target,
1624                     ureg_src(r0),
1625                     translate_src_operand(&sx, &opcode.src[2], OF_FLOAT));
1626
1627            ureg_release_temporary(ureg, r0);
1628         }
1629         else {
1630            struct ureg_src srcreg[4];
1631            srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_FLOAT);
1632            srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_UINT);
1633            srcreg[2] = translate_src_operand(&sx, &opcode.src[2], OF_UINT);
1634            srcreg[3] = translate_src_operand(&sx, &opcode.src[3], OF_FLOAT);
1635
1636            sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE_C_LZ, 4, &opcode,
1637                             translate_dst_operand(&sx, &opcode.dst[0],
1638                                                   opcode.saturate),
1639                             srcreg);
1640         }
1641         break;
1642
1643      case D3D10_SB_OPCODE_SAMPLE_L:
1644         if (st_debug & ST_DEBUG_OLD_TEX_OPS) {
1645            struct ureg_dst r0 = ureg_DECL_temporary(ureg);
1646
1647            assert(opcode.src[1].base.index_dim == 1);
1648            assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES);
1649
1650            /* Insert LOD into .w component.
1651             */
1652            ureg_MOV(ureg,
1653                     ureg_writemask(r0, TGSI_WRITEMASK_XYZ),
1654                     translate_src_operand(&sx, &opcode.src[0], OF_FLOAT));
1655            ureg_MOV(ureg,
1656                     ureg_writemask(r0, TGSI_WRITEMASK_W),
1657                     translate_src_operand(&sx, &opcode.src[3], OF_FLOAT));
1658
1659            ureg_TXL(ureg,
1660                     translate_dst_operand(&sx, &opcode.dst[0],
1661                                           opcode.saturate),
1662                     sx.resources[opcode.src[1].base.index[0].imm].target,
1663                     ureg_src(r0),
1664                     translate_src_operand(&sx, &opcode.src[2], OF_FLOAT));
1665
1666            ureg_release_temporary(ureg, r0);
1667         }
1668         else {
1669            struct ureg_src srcreg[4];
1670            srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_FLOAT);
1671            srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_UINT);
1672            srcreg[2] = translate_src_operand(&sx, &opcode.src[2], OF_UINT);
1673            srcreg[3] = translate_src_operand(&sx, &opcode.src[3], OF_FLOAT);
1674
1675            sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE_L, 4, &opcode,
1676                             translate_dst_operand(&sx, &opcode.dst[0],
1677                                                   opcode.saturate),
1678                             srcreg);
1679         }
1680         break;
1681
1682      case D3D10_SB_OPCODE_SAMPLE_D:
1683         if (st_debug & ST_DEBUG_OLD_TEX_OPS) {
1684            assert(opcode.src[1].base.index_dim == 1);
1685            assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES);
1686
1687            ureg_TXD(ureg,
1688                     translate_dst_operand(&sx, &opcode.dst[0],
1689                                           opcode.saturate),
1690                     sx.resources[opcode.src[1].base.index[0].imm].target,
1691                     translate_src_operand(&sx, &opcode.src[0], OF_FLOAT),
1692                     translate_src_operand(&sx, &opcode.src[3], OF_FLOAT),
1693                     translate_src_operand(&sx, &opcode.src[4], OF_FLOAT),
1694                     translate_src_operand(&sx, &opcode.src[2], OF_FLOAT));
1695         }
1696         else {
1697            struct ureg_src srcreg[5];
1698            srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_FLOAT);
1699            srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_UINT);
1700            srcreg[2] = translate_src_operand(&sx, &opcode.src[2], OF_UINT);
1701            srcreg[3] = translate_src_operand(&sx, &opcode.src[3], OF_FLOAT);
1702            srcreg[4] = translate_src_operand(&sx, &opcode.src[4], OF_FLOAT);
1703
1704            sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE_D, 5, &opcode,
1705                             translate_dst_operand(&sx, &opcode.dst[0],
1706                                                   opcode.saturate),
1707                             srcreg);
1708         }
1709         break;
1710
1711      case D3D10_SB_OPCODE_SAMPLE_B:
1712         if (st_debug & ST_DEBUG_OLD_TEX_OPS) {
1713            struct ureg_dst r0 = ureg_DECL_temporary(ureg);
1714
1715            assert(opcode.src[1].base.index_dim == 1);
1716            assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES);
1717
1718            /* Insert LOD bias into .w component.
1719             */
1720            ureg_MOV(ureg,
1721                     ureg_writemask(r0, TGSI_WRITEMASK_XYZ),
1722                     translate_src_operand(&sx, &opcode.src[0], OF_FLOAT));
1723            ureg_MOV(ureg,
1724                     ureg_writemask(r0, TGSI_WRITEMASK_W),
1725                     translate_src_operand(&sx, &opcode.src[3], OF_FLOAT));
1726
1727            ureg_TXB(ureg,
1728                     translate_dst_operand(&sx, &opcode.dst[0],
1729                                           opcode.saturate),
1730                     sx.resources[opcode.src[1].base.index[0].imm].target,
1731                     ureg_src(r0),
1732                     translate_src_operand(&sx, &opcode.src[2], OF_FLOAT));
1733
1734            ureg_release_temporary(ureg, r0);
1735         }
1736         else {
1737            struct ureg_src srcreg[4];
1738            srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_FLOAT);
1739            srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_UINT);
1740            srcreg[2] = translate_src_operand(&sx, &opcode.src[2], OF_UINT);
1741            srcreg[3] = translate_src_operand(&sx, &opcode.src[3], OF_FLOAT);
1742
1743            sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE_B, 4, &opcode,
1744                             translate_dst_operand(&sx, &opcode.dst[0],
1745                                                   opcode.saturate),
1746                             srcreg);
1747         }
1748         break;
1749
1750      case D3D10_SB_OPCODE_SINCOS: {
1751         struct ureg_dst src0 = ureg_DECL_temporary(ureg);
1752         ureg_MOV(ureg, src0, translate_src_operand(&sx, &opcode.src[0], OF_FLOAT));
1753         if (opcode.dst[0].base.type != D3D10_SB_OPERAND_TYPE_NULL) {
1754            struct ureg_dst dst = translate_dst_operand(&sx, &opcode.dst[0],
1755                                                        opcode.saturate);
1756            struct ureg_src src = ureg_src(src0);
1757            ureg_SIN(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X),
1758                     ureg_scalar(src, TGSI_SWIZZLE_X));
1759            ureg_SIN(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y),
1760                     ureg_scalar(src, TGSI_SWIZZLE_Y));
1761            ureg_SIN(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z),
1762                     ureg_scalar(src, TGSI_SWIZZLE_Z));
1763            ureg_SIN(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W),
1764                     ureg_scalar(src, TGSI_SWIZZLE_W));
1765         }
1766         if (opcode.dst[1].base.type != D3D10_SB_OPERAND_TYPE_NULL) {
1767            struct ureg_dst dst = translate_dst_operand(&sx, &opcode.dst[1],
1768                                                        opcode.saturate);
1769            struct ureg_src src = ureg_src(src0);
1770            ureg_COS(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X),
1771                     ureg_scalar(src, TGSI_SWIZZLE_X));
1772            ureg_COS(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y),
1773                     ureg_scalar(src, TGSI_SWIZZLE_Y));
1774            ureg_COS(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z),
1775                     ureg_scalar(src, TGSI_SWIZZLE_Z));
1776            ureg_COS(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W),
1777                     ureg_scalar(src, TGSI_SWIZZLE_W));
1778         }
1779         ureg_release_temporary(ureg, src0);
1780      }
1781         break;
1782
1783      case D3D10_SB_OPCODE_UDIV: {
1784         struct ureg_dst src0 = ureg_DECL_temporary(ureg);
1785         struct ureg_dst src1 = ureg_DECL_temporary(ureg);
1786         ureg_MOV(ureg, src0, translate_src_operand(&sx, &opcode.src[0], OF_UINT));
1787         ureg_MOV(ureg, src1, translate_src_operand(&sx, &opcode.src[1], OF_UINT));
1788         if (opcode.dst[0].base.type != D3D10_SB_OPERAND_TYPE_NULL) {
1789            ureg_UDIV(ureg,
1790                      translate_dst_operand(&sx, &opcode.dst[0],
1791                                            opcode.saturate),
1792                      ureg_src(src0), ureg_src(src1));
1793         }
1794         if (opcode.dst[1].base.type != D3D10_SB_OPERAND_TYPE_NULL) {
1795            ureg_UMOD(ureg,
1796                      translate_dst_operand(&sx, &opcode.dst[1],
1797                                            opcode.saturate),
1798                      ureg_src(src0), ureg_src(src1));
1799         }
1800         ureg_release_temporary(ureg, src0);
1801         ureg_release_temporary(ureg, src1);
1802      }
1803         break;
1804      case D3D10_SB_OPCODE_UMUL: {
1805         if (opcode.dst[0].base.type != D3D10_SB_OPERAND_TYPE_NULL) {
1806            ureg_UMUL_HI(ureg,
1807                         translate_dst_operand(&sx, &opcode.dst[0],
1808                                               opcode.saturate),
1809                         translate_src_operand(&sx, &opcode.src[0], OF_UINT),
1810                         translate_src_operand(&sx, &opcode.src[1], OF_UINT));
1811         }
1812         if (opcode.dst[1].base.type != D3D10_SB_OPERAND_TYPE_NULL) {
1813            ureg_UMUL(ureg,
1814                      translate_dst_operand(&sx, &opcode.dst[1],
1815                                            opcode.saturate),
1816                      translate_src_operand(&sx, &opcode.src[0], OF_UINT),
1817                      translate_src_operand(&sx, &opcode.src[1], OF_UINT));
1818         }
1819      }
1820         break;
1821
1822      case D3D10_SB_OPCODE_DCL_RESOURCE:
1823      {
1824         unsigned target;
1825         unsigned res_index = opcode.dst[0].base.index[0].imm;
1826         assert(opcode.dst[0].base.index_dim == 1);
1827         assert(res_index < SHADER_MAX_RESOURCES);
1828
1829         target = translate_resource_dimension(opcode.specific.dcl_resource_dimension);
1830         sx.resources[res_index].target = target;
1831         if (!(st_debug & ST_DEBUG_OLD_TEX_OPS)) {
1832            sx.sv[res_index] =
1833               ureg_DECL_sampler_view(ureg, res_index, target,
1834                                      trans_dcl_ret_type(opcode.dcl_resource_ret_type[0]),
1835                                      trans_dcl_ret_type(opcode.dcl_resource_ret_type[1]),
1836                                      trans_dcl_ret_type(opcode.dcl_resource_ret_type[2]),
1837                                      trans_dcl_ret_type(opcode.dcl_resource_ret_type[3]));
1838         }
1839         break;
1840      }
1841
1842      case D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER: {
1843         unsigned num_constants = opcode.src[0].base.index[1].imm;
1844
1845         assert(opcode.src[0].base.index[0].imm < PIPE_MAX_CONSTANT_BUFFERS);
1846
1847         if (num_constants == 0) {
1848            num_constants = SHADER_MAX_CONSTS;
1849         } else {
1850            assert(num_constants <= SHADER_MAX_CONSTS);
1851         }
1852
1853         ureg_DECL_constant2D(ureg,
1854                              0,
1855                              num_constants - 1,
1856                              opcode.src[0].base.index[0].imm);
1857         break;
1858      }
1859
1860      case D3D10_SB_OPCODE_DCL_SAMPLER:
1861         assert(opcode.dst[0].base.index_dim == 1);
1862         assert(opcode.dst[0].base.index[0].imm < SHADER_MAX_SAMPLERS);
1863
1864         sx.samplers[opcode.dst[0].base.index[0].imm] =
1865            ureg_DECL_sampler(ureg,
1866                              opcode.dst[0].base.index[0].imm);
1867         break;
1868
1869      case D3D10_SB_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY:
1870         assert(parser.header.type == D3D10_SB_GEOMETRY_SHADER);
1871
1872         switch (opcode.specific.dcl_gs_output_primitive_topology) {
1873         case D3D10_SB_PRIMITIVE_TOPOLOGY_POINTLIST:
1874            ureg_property(sx.ureg,
1875                          TGSI_PROPERTY_GS_OUTPUT_PRIM,
1876                          PIPE_PRIM_POINTS);
1877            break;
1878
1879         case D3D10_SB_PRIMITIVE_TOPOLOGY_LINESTRIP:
1880            ureg_property(sx.ureg,
1881                          TGSI_PROPERTY_GS_OUTPUT_PRIM,
1882                          PIPE_PRIM_LINE_STRIP);
1883            break;
1884
1885         case D3D10_SB_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP:
1886            ureg_property(sx.ureg,
1887                          TGSI_PROPERTY_GS_OUTPUT_PRIM,
1888                          PIPE_PRIM_TRIANGLE_STRIP);
1889            break;
1890
1891         default:
1892            assert(0);
1893         }
1894         break;
1895
1896      case D3D10_SB_OPCODE_DCL_GS_INPUT_PRIMITIVE:
1897         assert(parser.header.type == D3D10_SB_GEOMETRY_SHADER);
1898
1899         /* Figure out the second dimension of GS inputs.
1900          */
1901         switch (opcode.specific.dcl_gs_input_primitive) {
1902         case D3D10_SB_PRIMITIVE_POINT:
1903            declare_vertices_in(&sx, 1);
1904            ureg_property(sx.ureg,
1905                          TGSI_PROPERTY_GS_INPUT_PRIM,
1906                          PIPE_PRIM_POINTS);
1907            break;
1908
1909         case D3D10_SB_PRIMITIVE_LINE:
1910            declare_vertices_in(&sx, 2);
1911            ureg_property(sx.ureg,
1912                          TGSI_PROPERTY_GS_INPUT_PRIM,
1913                          PIPE_PRIM_LINES);
1914            break;
1915
1916         case D3D10_SB_PRIMITIVE_TRIANGLE:
1917            declare_vertices_in(&sx, 3);
1918            ureg_property(sx.ureg,
1919                          TGSI_PROPERTY_GS_INPUT_PRIM,
1920                          PIPE_PRIM_TRIANGLES);
1921            break;
1922
1923         case D3D10_SB_PRIMITIVE_LINE_ADJ:
1924            declare_vertices_in(&sx, 4);
1925            ureg_property(sx.ureg,
1926                          TGSI_PROPERTY_GS_INPUT_PRIM,
1927                          PIPE_PRIM_LINES_ADJACENCY);
1928            break;
1929
1930         case D3D10_SB_PRIMITIVE_TRIANGLE_ADJ:
1931            declare_vertices_in(&sx, 6);
1932            ureg_property(sx.ureg,
1933                          TGSI_PROPERTY_GS_INPUT_PRIM,
1934                          PIPE_PRIM_TRIANGLES_ADJACENCY);
1935            break;
1936
1937         default:
1938            assert(0);
1939         }
1940         break;
1941
1942      case D3D10_SB_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT:
1943         assert(parser.header.type == D3D10_SB_GEOMETRY_SHADER);
1944
1945         ureg_property(sx.ureg,
1946                       TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES,
1947                       opcode.specific.dcl_max_output_vertex_count);
1948         break;
1949
1950      case D3D10_SB_OPCODE_DCL_INPUT:
1951         if (parser.header.type == D3D10_SB_VERTEX_SHADER) {
1952            dcl_vs_input(&sx, ureg, &opcode.dst[0]);
1953         } else {
1954            assert(parser.header.type == D3D10_SB_GEOMETRY_SHADER);
1955            dcl_gs_input(&sx, ureg, &opcode.dst[0]);
1956         }
1957         break;
1958
1959      case D3D10_SB_OPCODE_DCL_INPUT_SGV:
1960         assert(parser.header.type == D3D10_SB_VERTEX_SHADER);
1961         dcl_sgv_input(&sx, ureg, &opcode.dst[0], opcode.dcl_siv_name);
1962         break;
1963
1964      case D3D10_SB_OPCODE_DCL_INPUT_SIV:
1965         assert(parser.header.type == D3D10_SB_GEOMETRY_SHADER);
1966         dcl_siv_input(&sx, ureg, &opcode.dst[0], opcode.dcl_siv_name);
1967         break;
1968
1969      case D3D10_SB_OPCODE_DCL_INPUT_PS:
1970         assert(parser.header.type == D3D10_SB_PIXEL_SHADER);
1971         dcl_ps_input(&sx, ureg, &opcode.dst[0],
1972                      opcode.specific.dcl_in_ps_interp);
1973         break;
1974
1975      case D3D10_SB_OPCODE_DCL_INPUT_PS_SGV:
1976         assert(parser.header.type == D3D10_SB_PIXEL_SHADER);
1977         dcl_ps_sgv_input(&sx, ureg, &opcode.dst[0],
1978                          opcode.dcl_siv_name);
1979         break;
1980
1981      case D3D10_SB_OPCODE_DCL_INPUT_PS_SIV:
1982         assert(parser.header.type == D3D10_SB_PIXEL_SHADER);
1983         dcl_ps_siv_input(&sx, ureg, &opcode.dst[0],
1984                          opcode.dcl_siv_name,
1985                          opcode.specific.dcl_in_ps_interp);
1986         break;
1987
1988      case D3D10_SB_OPCODE_DCL_OUTPUT:
1989         if (parser.header.type == D3D10_SB_PIXEL_SHADER) {
1990            /* Pixel shader outputs. */
1991            if (opcode.dst[0].base.type == D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH) {
1992               /* Depth output. */
1993               assert(opcode.dst[0].base.index_dim == 0);
1994
1995               sx.output_depth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0, TGSI_WRITEMASK_Z, 0, 1);
1996               sx.output_depth = ureg_writemask(sx.output_depth, TGSI_WRITEMASK_Z);
1997            } else {
1998               /* Color outputs. */
1999               assert(opcode.dst[0].base.index_dim == 1);
2000               assert(opcode.dst[0].base.index[0].imm < SHADER_MAX_OUTPUTS);
2001
2002               dcl_base_output(&sx, ureg,
2003                               ureg_DECL_output(ureg,
2004                                                TGSI_SEMANTIC_COLOR,
2005                                                opcode.dst[0].base.index[0].imm),
2006                               &opcode.dst[0]);
2007            }
2008         } else {
2009            assert(opcode.dst[0].base.index_dim == 1);
2010            assert(opcode.dst[0].base.index[0].imm < SHADER_MAX_OUTPUTS);
2011
2012            if (output_mapping) {
2013               unsigned nr_outputs = ureg_get_nr_outputs(ureg);
2014               output_mapping[nr_outputs]
2015                  = opcode.dst[0].base.index[0].imm;
2016            }
2017            dcl_base_output(&sx, ureg,
2018                            ureg_DECL_output(ureg,
2019                                             TGSI_SEMANTIC_GENERIC,
2020                                             opcode.dst[0].base.index[0].imm),
2021                            &opcode.dst[0]);
2022         }
2023         break;
2024
2025      case D3D10_SB_OPCODE_DCL_OUTPUT_SIV:
2026         assert(opcode.dst[0].base.index_dim == 1);
2027         assert(opcode.dst[0].base.index[0].imm < SHADER_MAX_OUTPUTS);
2028
2029         if (output_mapping) {
2030            unsigned nr_outputs = ureg_get_nr_outputs(ureg);
2031            output_mapping[nr_outputs]
2032               = opcode.dst[0].base.index[0].imm;
2033         }
2034         if (opcode.dcl_siv_name == D3D10_SB_NAME_CLIP_DISTANCE ||
2035             opcode.dcl_siv_name == D3D10_SB_NAME_CULL_DISTANCE) {
2036            /*
2037             * FIXME: this is quite broken. gallium no longer has separate
2038             * clip/cull dists, using (max 2) combined clipdist/culldist regs
2039             * instead. Unlike d3d10 though, which is clip and which cull is
2040             * simply determined by by number of clip/cull dists (that is,
2041             * all clip dists must come first).
2042             */
2043            unsigned numcliporcull = sx.num_clip_distances_declared +
2044                                     sx.num_cull_distances_declared;
2045            sx.clip_distance_mapping[numcliporcull].d3d =
2046               opcode.dst[0].base.index[0].imm;
2047            sx.clip_distance_mapping[numcliporcull].tgsi = numcliporcull;
2048            if (opcode.dcl_siv_name == D3D10_SB_NAME_CLIP_DISTANCE) {
2049               ++sx.num_clip_distances_declared;
2050               /* re-emit should be safe... */
2051               ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED,
2052                             sx.num_clip_distances_declared);
2053            } else {
2054               ++sx.num_cull_distances_declared;
2055               ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED,
2056                             sx.num_cull_distances_declared);
2057            }
2058         } else if (0 && opcode.dcl_siv_name == D3D10_SB_NAME_CULL_DISTANCE) {
2059            sx.cull_distance_mapping[sx.num_cull_distances_declared].d3d =
2060               opcode.dst[0].base.index[0].imm;
2061            sx.cull_distance_mapping[sx.num_cull_distances_declared].tgsi =
2062               sx.num_cull_distances_declared;
2063            ++sx.num_cull_distances_declared;
2064            ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED,
2065                          sx.num_cull_distances_declared);
2066         }
2067
2068         dcl_base_output(&sx, ureg,
2069                         ureg_DECL_output_masked(
2070                            ureg,
2071                            translate_system_name(opcode.dcl_siv_name),
2072                            translate_semantic_index(&sx, opcode.dcl_siv_name,
2073                                                     &opcode.dst[0]),
2074                            opcode.dst[0].mask >> D3D10_SB_OPERAND_4_COMPONENT_MASK_SHIFT,
2075                            0, 1),
2076                         &opcode.dst[0]);
2077         break;
2078
2079      case D3D10_SB_OPCODE_DCL_OUTPUT_SGV:
2080         assert(opcode.dst[0].base.index_dim == 1);
2081         assert(opcode.dst[0].base.index[0].imm < SHADER_MAX_OUTPUTS);
2082
2083         if (output_mapping) {
2084            unsigned nr_outputs = ureg_get_nr_outputs(ureg);
2085            output_mapping[nr_outputs]
2086               = opcode.dst[0].base.index[0].imm;
2087         }
2088         dcl_base_output(&sx, ureg,
2089                         ureg_DECL_output(ureg,
2090                                          translate_system_name(opcode.dcl_siv_name),
2091                                          0),
2092                         &opcode.dst[0]);
2093         break;
2094
2095      case D3D10_SB_OPCODE_DCL_TEMPS:
2096         {
2097            uint i;
2098
2099            assert(opcode.specific.dcl_num_temps + sx.declared_temps <=
2100                   SHADER_MAX_TEMPS);
2101
2102            sx.temp_offset = sx.declared_temps;
2103
2104            for (i = 0; i < opcode.specific.dcl_num_temps; i++) {
2105               sx.temps[sx.declared_temps + i] = ureg_DECL_temporary(ureg);
2106            }
2107            sx.declared_temps += opcode.specific.dcl_num_temps;
2108         }
2109         break;
2110
2111      case D3D10_SB_OPCODE_DCL_INDEXABLE_TEMP:
2112         {
2113            uint i;
2114
2115            /* XXX: Add true indexable temps to gallium.
2116             */
2117
2118            assert(opcode.specific.dcl_indexable_temp.index <
2119                   SHADER_MAX_INDEXABLE_TEMPS);
2120            assert(opcode.specific.dcl_indexable_temp.count + sx.declared_temps <=
2121                   SHADER_MAX_TEMPS);
2122
2123            sx.indexable_temp_offsets[opcode.specific.dcl_indexable_temp.index] =
2124               sx.declared_temps;
2125
2126            for (i = 0; i < opcode.specific.dcl_indexable_temp.count; i++) {
2127               sx.temps[sx.declared_temps + i] = ureg_DECL_temporary(ureg);
2128            }
2129            sx.declared_temps += opcode.specific.dcl_indexable_temp.count;
2130         }
2131         break;
2132      case D3D10_SB_OPCODE_IF: {
2133         unsigned label = 0;
2134         if (opcode.specific.test_boolean == D3D10_SB_INSTRUCTION_TEST_ZERO) {
2135            struct ureg_src src =
2136               translate_src_operand(&sx, &opcode.src[0], OF_INT);
2137            struct ureg_dst src_nz = ureg_DECL_temporary(ureg);
2138            ureg_USEQ(ureg, src_nz, src, ureg_imm1u(ureg, 0));
2139            ureg_UIF(ureg, ureg_src(src_nz), &label);
2140            ureg_release_temporary(ureg, src_nz);;
2141         } else {
2142            ureg_UIF(ureg, translate_src_operand(&sx, &opcode.src[0], OF_INT), &label);
2143         }
2144      }
2145         break;
2146      case D3D10_SB_OPCODE_RETC:
2147      case D3D10_SB_OPCODE_CONTINUEC:
2148      case D3D10_SB_OPCODE_CALLC:
2149      case D3D10_SB_OPCODE_DISCARD:
2150      case D3D10_SB_OPCODE_BREAKC:
2151      {
2152         unsigned label = 0;
2153         assert(operand_is_scalar(&opcode.src[0]));
2154         if (opcode.specific.test_boolean == D3D10_SB_INSTRUCTION_TEST_ZERO) {
2155            struct ureg_src src =
2156               translate_src_operand(&sx, &opcode.src[0], OF_INT);
2157            struct ureg_dst src_nz = ureg_DECL_temporary(ureg);
2158            ureg_USEQ(ureg, src_nz, src, ureg_imm1u(ureg, 0));
2159            ureg_UIF(ureg, ureg_src(src_nz), &label);
2160            ureg_release_temporary(ureg, src_nz);
2161         }
2162         else {
2163            ureg_UIF(ureg, translate_src_operand(&sx, &opcode.src[0], OF_INT), &label);
2164         }
2165         switch (opcode.type) {
2166         case D3D10_SB_OPCODE_RETC:
2167            ureg_RET(ureg);
2168            break;
2169         case D3D10_SB_OPCODE_CONTINUEC:
2170            ureg_CONT(ureg);
2171            break;
2172         case D3D10_SB_OPCODE_CALLC: {
2173            unsigned label = opcode.src[1].base.index[0].imm;
2174            unsigned tgsi_token_label = 0;
2175            ureg_CAL(ureg, &tgsi_token_label);
2176            Shader_add_call(&sx, label, tgsi_token_label);
2177         }
2178            break;
2179         case D3D10_SB_OPCODE_DISCARD:
2180            ureg_KILL(ureg);
2181            break;
2182         case D3D10_SB_OPCODE_BREAKC:
2183            ureg_BRK(ureg);
2184            break;
2185         default:
2186            assert(0);
2187            break;
2188         }
2189         ureg_ENDIF(ureg);
2190      }
2191         break;
2192      case D3D10_SB_OPCODE_LABEL: {
2193         unsigned label = opcode.src[0].base.index[0].imm;
2194         unsigned tgsi_inst_no = 0;
2195         if (inside_sub) {
2196            ureg_ENDSUB(ureg);
2197         }
2198         tgsi_inst_no = ureg_get_instruction_number(ureg);
2199         ureg_BGNSUB(ureg);
2200         inside_sub = TRUE;
2201         Shader_add_label(&sx, label, tgsi_inst_no);
2202      }
2203         break;
2204      case D3D10_SB_OPCODE_CALL: {
2205         unsigned label = opcode.src[0].base.index[0].imm;
2206         unsigned tgsi_token_label = 0;
2207         ureg_CAL(ureg, &tgsi_token_label);
2208         Shader_add_call(&sx, label, tgsi_token_label);
2209      }
2210         break;
2211      case D3D10_SB_OPCODE_EMIT:
2212         ureg_EMIT(ureg, ureg_imm1u(ureg, 0));
2213         break;
2214      case D3D10_SB_OPCODE_CUT:
2215         ureg_ENDPRIM(ureg, ureg_imm1u(ureg, 0));
2216         break;
2217      case D3D10_SB_OPCODE_EMITTHENCUT:
2218         ureg_EMIT(ureg, ureg_imm1u(ureg, 0));
2219         ureg_ENDPRIM(ureg, ureg_imm1u(ureg, 0));
2220         break;
2221      case D3D10_SB_OPCODE_DCL_INDEX_RANGE:
2222      case D3D10_SB_OPCODE_DCL_GLOBAL_FLAGS:
2223         /* Ignore */
2224         break;
2225      default:
2226         {
2227            uint i;
2228            struct ureg_dst dst[SHADER_MAX_DST_OPERANDS];
2229            struct ureg_src src[SHADER_MAX_SRC_OPERANDS];
2230
2231            assert(ox->tgsi_opcode != TGSI_EXPAND);
2232
2233            if (ox->tgsi_opcode == TGSI_LOG_UNSUPPORTED) {
2234               if (!shader_dumped) {
2235                  dx10_shader_dump_tokens(code);
2236                  shader_dumped = TRUE;
2237               }
2238               debug_printf("%s: unsupported opcode %i\n",
2239                            __FUNCTION__, ox->type);
2240               assert(ox->tgsi_opcode != TGSI_LOG_UNSUPPORTED);
2241            }
2242
2243            /* Destination operands. */
2244            for (i = 0; i < opcode.num_dst; i++) {
2245               dst[i] = translate_dst_operand(&sx, &opcode.dst[i],
2246                                              opcode.saturate);
2247            }
2248
2249            /* Source operands. */
2250            for (i = 0; i < opcode.num_src; i++) {
2251               src[i] = translate_src_operand(&sx, &opcode.src[i], ox->format);
2252            }
2253
2254            /* Try to re-route output depth to Z channel. */
2255            if (opcode.dst[0].base.type == D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH) {
2256               LOG_UNSUPPORTED(opcode.type != D3D10_SB_OPCODE_MOV);
2257               dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_Z);
2258               src[0] = ureg_scalar(src[0], TGSI_SWIZZLE_X);
2259            }
2260
2261            ureg_insn(ureg,
2262                      ox->tgsi_opcode,
2263                      dst,
2264                      opcode.num_dst,
2265                      src,
2266                      opcode.num_src, 0);
2267         }
2268      }
2269
2270      Shader_opcode_free(&opcode);
2271   }
2272
2273   if (inside_sub) {
2274      ureg_ENDSUB(ureg);
2275   }
2276
2277   ureg_END(ureg);
2278
2279   for (i = 0; i < sx.num_calls; ++i) {
2280      for (j = 0; j < sx.num_labels; ++j) {
2281         if (sx.calls[i].d3d_label == sx.labels[j].d3d_label) {
2282            ureg_fixup_label(sx.ureg,
2283                             sx.calls[i].tgsi_label_token,
2284                             sx.labels[j].tgsi_insn_no);
2285            break;
2286         }
2287      }
2288      ASSERT(j < sx.num_labels);
2289   }
2290   FREE(sx.labels);
2291   FREE(sx.calls);
2292
2293   tokens = ureg_get_tokens(ureg, &nr_tokens);
2294   assert(tokens);
2295   ureg_destroy(ureg);
2296
2297   if (st_debug & ST_DEBUG_TGSI) {
2298      tgsi_dump(tokens, 0);
2299   }
2300
2301   return tokens;
2302}
2303