1/************************************************************************** 2 * 3 * Copyright 2012-2021 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 18 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 19 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 20 * USE OR OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * The above copyright notice and this permission notice (including the 23 * next paragraph) shall be included in all copies or substantial portions 24 * of the Software. 25 * 26 **************************************************************************/ 27 28/* 29 * ShaderTGSI.c -- 30 * Functions for translating shaders. 31 */ 32 33#include "Debug.h" 34#include "ShaderParse.h" 35 36#include "pipe/p_state.h" 37#include "tgsi/tgsi_ureg.h" 38#include "tgsi/tgsi_dump.h" 39#include "util/u_memory.h" 40 41#include "ShaderDump.h" 42 43 44enum dx10_opcode_format { 45 OF_FLOAT, 46 OF_INT, 47 OF_UINT 48}; 49 50struct dx10_opcode_xlate { 51 D3D10_SB_OPCODE_TYPE type; 52 enum dx10_opcode_format format; 53 uint tgsi_opcode; 54}; 55 56/* Opcodes that we have not even attempted to implement: 57 */ 58#define TGSI_LOG_UNSUPPORTED TGSI_OPCODE_LAST 59 60/* Opcodes which do not translate directly to a TGSI opcode, but which 61 * have at least a partial implemention coded below: 62 */ 63#define TGSI_EXPAND (TGSI_OPCODE_LAST+1) 64 65static struct dx10_opcode_xlate opcode_xlate[D3D10_SB_NUM_OPCODES] = { 66 {D3D10_SB_OPCODE_ADD, OF_FLOAT, TGSI_OPCODE_ADD}, 67 {D3D10_SB_OPCODE_AND, OF_UINT, TGSI_OPCODE_AND}, 68 {D3D10_SB_OPCODE_BREAK, OF_FLOAT, TGSI_OPCODE_BRK}, 69 {D3D10_SB_OPCODE_BREAKC, OF_UINT, TGSI_EXPAND}, 70 {D3D10_SB_OPCODE_CALL, OF_UINT, TGSI_EXPAND}, 71 {D3D10_SB_OPCODE_CALLC, OF_UINT, TGSI_EXPAND}, 72 {D3D10_SB_OPCODE_CASE, OF_UINT, TGSI_OPCODE_CASE}, 73 {D3D10_SB_OPCODE_CONTINUE, OF_FLOAT, TGSI_OPCODE_CONT}, 74 {D3D10_SB_OPCODE_CONTINUEC, OF_UINT, TGSI_EXPAND}, 75 {D3D10_SB_OPCODE_CUT, OF_FLOAT, TGSI_EXPAND}, 76 {D3D10_SB_OPCODE_DEFAULT, OF_FLOAT, TGSI_OPCODE_DEFAULT}, 77 {D3D10_SB_OPCODE_DERIV_RTX, OF_FLOAT, TGSI_OPCODE_DDX}, 78 {D3D10_SB_OPCODE_DERIV_RTY, OF_FLOAT, TGSI_OPCODE_DDY}, 79 {D3D10_SB_OPCODE_DISCARD, OF_UINT, TGSI_EXPAND}, 80 {D3D10_SB_OPCODE_DIV, OF_FLOAT, TGSI_OPCODE_DIV}, 81 {D3D10_SB_OPCODE_DP2, OF_FLOAT, TGSI_OPCODE_DP2}, 82 {D3D10_SB_OPCODE_DP3, OF_FLOAT, TGSI_OPCODE_DP3}, 83 {D3D10_SB_OPCODE_DP4, OF_FLOAT, TGSI_OPCODE_DP4}, 84 {D3D10_SB_OPCODE_ELSE, OF_FLOAT, TGSI_OPCODE_ELSE}, 85 {D3D10_SB_OPCODE_EMIT, OF_FLOAT, TGSI_EXPAND}, 86 {D3D10_SB_OPCODE_EMITTHENCUT, OF_FLOAT, TGSI_EXPAND}, 87 {D3D10_SB_OPCODE_ENDIF, OF_FLOAT, TGSI_OPCODE_ENDIF}, 88 {D3D10_SB_OPCODE_ENDLOOP, OF_FLOAT, TGSI_OPCODE_ENDLOOP}, 89 {D3D10_SB_OPCODE_ENDSWITCH, OF_FLOAT, TGSI_OPCODE_ENDSWITCH}, 90 {D3D10_SB_OPCODE_EQ, OF_FLOAT, TGSI_OPCODE_FSEQ}, 91 {D3D10_SB_OPCODE_EXP, OF_FLOAT, TGSI_EXPAND}, 92 {D3D10_SB_OPCODE_FRC, OF_FLOAT, TGSI_OPCODE_FRC}, 93 {D3D10_SB_OPCODE_FTOI, OF_FLOAT, TGSI_EXPAND}, 94 {D3D10_SB_OPCODE_FTOU, OF_FLOAT, TGSI_EXPAND}, 95 {D3D10_SB_OPCODE_GE, OF_FLOAT, TGSI_OPCODE_FSGE}, 96 {D3D10_SB_OPCODE_IADD, OF_INT, TGSI_OPCODE_UADD}, 97 {D3D10_SB_OPCODE_IF, OF_UINT, TGSI_EXPAND}, 98 {D3D10_SB_OPCODE_IEQ, OF_INT, TGSI_OPCODE_USEQ}, 99 {D3D10_SB_OPCODE_IGE, OF_INT, TGSI_OPCODE_ISGE}, 100 {D3D10_SB_OPCODE_ILT, OF_INT, TGSI_OPCODE_ISLT}, 101 {D3D10_SB_OPCODE_IMAD, OF_INT, TGSI_OPCODE_UMAD}, 102 {D3D10_SB_OPCODE_IMAX, OF_INT, TGSI_OPCODE_IMAX}, 103 {D3D10_SB_OPCODE_IMIN, OF_INT, TGSI_OPCODE_IMIN}, 104 {D3D10_SB_OPCODE_IMUL, OF_INT, TGSI_EXPAND}, 105 {D3D10_SB_OPCODE_INE, OF_INT, TGSI_OPCODE_USNE}, 106 {D3D10_SB_OPCODE_INEG, OF_INT, TGSI_OPCODE_INEG}, 107 {D3D10_SB_OPCODE_ISHL, OF_INT, TGSI_OPCODE_SHL}, 108 {D3D10_SB_OPCODE_ISHR, OF_INT, TGSI_OPCODE_ISHR}, 109 {D3D10_SB_OPCODE_ITOF, OF_INT, TGSI_OPCODE_I2F}, 110 {D3D10_SB_OPCODE_LABEL, OF_INT, TGSI_EXPAND}, 111 {D3D10_SB_OPCODE_LD, OF_UINT, TGSI_EXPAND}, 112 {D3D10_SB_OPCODE_LD_MS, OF_UINT, TGSI_EXPAND}, 113 {D3D10_SB_OPCODE_LOG, OF_FLOAT, TGSI_EXPAND}, 114 {D3D10_SB_OPCODE_LOOP, OF_FLOAT, TGSI_OPCODE_BGNLOOP}, 115 {D3D10_SB_OPCODE_LT, OF_FLOAT, TGSI_OPCODE_FSLT}, 116 {D3D10_SB_OPCODE_MAD, OF_FLOAT, TGSI_OPCODE_MAD}, 117 {D3D10_SB_OPCODE_MIN, OF_FLOAT, TGSI_OPCODE_MIN}, 118 {D3D10_SB_OPCODE_MAX, OF_FLOAT, TGSI_OPCODE_MAX}, 119 {D3D10_SB_OPCODE_CUSTOMDATA, OF_FLOAT, TGSI_EXPAND}, 120 {D3D10_SB_OPCODE_MOV, OF_UINT, TGSI_OPCODE_MOV}, 121 {D3D10_SB_OPCODE_MOVC, OF_UINT, TGSI_OPCODE_UCMP}, 122 {D3D10_SB_OPCODE_MUL, OF_FLOAT, TGSI_OPCODE_MUL}, 123 {D3D10_SB_OPCODE_NE, OF_FLOAT, TGSI_OPCODE_FSNE}, 124 {D3D10_SB_OPCODE_NOP, OF_FLOAT, TGSI_OPCODE_NOP}, 125 {D3D10_SB_OPCODE_NOT, OF_UINT, TGSI_OPCODE_NOT}, 126 {D3D10_SB_OPCODE_OR, OF_UINT, TGSI_OPCODE_OR}, 127 {D3D10_SB_OPCODE_RESINFO, OF_UINT, TGSI_EXPAND}, 128 {D3D10_SB_OPCODE_RET, OF_FLOAT, TGSI_OPCODE_RET}, 129 {D3D10_SB_OPCODE_RETC, OF_UINT, TGSI_EXPAND}, 130 {D3D10_SB_OPCODE_ROUND_NE, OF_FLOAT, TGSI_OPCODE_ROUND}, 131 {D3D10_SB_OPCODE_ROUND_NI, OF_FLOAT, TGSI_OPCODE_FLR}, 132 {D3D10_SB_OPCODE_ROUND_PI, OF_FLOAT, TGSI_OPCODE_CEIL}, 133 {D3D10_SB_OPCODE_ROUND_Z, OF_FLOAT, TGSI_OPCODE_TRUNC}, 134 {D3D10_SB_OPCODE_RSQ, OF_FLOAT, TGSI_EXPAND}, 135 {D3D10_SB_OPCODE_SAMPLE, OF_FLOAT, TGSI_EXPAND}, 136 {D3D10_SB_OPCODE_SAMPLE_C, OF_FLOAT, TGSI_EXPAND}, 137 {D3D10_SB_OPCODE_SAMPLE_C_LZ, OF_FLOAT, TGSI_EXPAND}, 138 {D3D10_SB_OPCODE_SAMPLE_L, OF_FLOAT, TGSI_EXPAND}, 139 {D3D10_SB_OPCODE_SAMPLE_D, OF_FLOAT, TGSI_EXPAND}, 140 {D3D10_SB_OPCODE_SAMPLE_B, OF_FLOAT, TGSI_EXPAND}, 141 {D3D10_SB_OPCODE_SQRT, OF_FLOAT, TGSI_EXPAND}, 142 {D3D10_SB_OPCODE_SWITCH, OF_UINT, TGSI_OPCODE_SWITCH}, 143 {D3D10_SB_OPCODE_SINCOS, OF_FLOAT, TGSI_EXPAND}, 144 {D3D10_SB_OPCODE_UDIV, OF_UINT, TGSI_EXPAND}, 145 {D3D10_SB_OPCODE_ULT, OF_UINT, TGSI_OPCODE_USLT}, 146 {D3D10_SB_OPCODE_UGE, OF_UINT, TGSI_OPCODE_USGE}, 147 {D3D10_SB_OPCODE_UMUL, OF_UINT, TGSI_EXPAND}, 148 {D3D10_SB_OPCODE_UMAD, OF_UINT, TGSI_OPCODE_UMAD}, 149 {D3D10_SB_OPCODE_UMAX, OF_UINT, TGSI_OPCODE_UMAX}, 150 {D3D10_SB_OPCODE_UMIN, OF_UINT, TGSI_OPCODE_UMIN}, 151 {D3D10_SB_OPCODE_USHR, OF_UINT, TGSI_OPCODE_USHR}, 152 {D3D10_SB_OPCODE_UTOF, OF_UINT, TGSI_OPCODE_U2F}, 153 {D3D10_SB_OPCODE_XOR, OF_UINT, TGSI_OPCODE_XOR}, 154 {D3D10_SB_OPCODE_DCL_RESOURCE, OF_FLOAT, TGSI_EXPAND}, 155 {D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER, OF_FLOAT, TGSI_EXPAND}, 156 {D3D10_SB_OPCODE_DCL_SAMPLER, OF_FLOAT, TGSI_EXPAND}, 157 {D3D10_SB_OPCODE_DCL_INDEX_RANGE, OF_FLOAT, TGSI_LOG_UNSUPPORTED}, 158 {D3D10_SB_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY, OF_FLOAT, TGSI_EXPAND}, 159 {D3D10_SB_OPCODE_DCL_GS_INPUT_PRIMITIVE, OF_FLOAT, TGSI_EXPAND}, 160 {D3D10_SB_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT, OF_FLOAT, TGSI_EXPAND}, 161 {D3D10_SB_OPCODE_DCL_INPUT, OF_FLOAT, TGSI_EXPAND}, 162 {D3D10_SB_OPCODE_DCL_INPUT_SGV, OF_FLOAT, TGSI_EXPAND}, 163 {D3D10_SB_OPCODE_DCL_INPUT_SIV, OF_FLOAT, TGSI_EXPAND}, 164 {D3D10_SB_OPCODE_DCL_INPUT_PS, OF_FLOAT, TGSI_EXPAND}, 165 {D3D10_SB_OPCODE_DCL_INPUT_PS_SGV, OF_FLOAT, TGSI_EXPAND}, 166 {D3D10_SB_OPCODE_DCL_INPUT_PS_SIV, OF_FLOAT, TGSI_EXPAND}, 167 {D3D10_SB_OPCODE_DCL_OUTPUT, OF_FLOAT, TGSI_EXPAND}, 168 {D3D10_SB_OPCODE_DCL_OUTPUT_SGV, OF_FLOAT, TGSI_EXPAND}, 169 {D3D10_SB_OPCODE_DCL_OUTPUT_SIV, OF_FLOAT, TGSI_EXPAND}, 170 {D3D10_SB_OPCODE_DCL_TEMPS, OF_FLOAT, TGSI_EXPAND}, 171 {D3D10_SB_OPCODE_DCL_INDEXABLE_TEMP, OF_FLOAT, TGSI_EXPAND}, 172 {D3D10_SB_OPCODE_DCL_GLOBAL_FLAGS, OF_FLOAT, TGSI_LOG_UNSUPPORTED}, 173 {D3D10_SB_OPCODE_RESERVED0, OF_FLOAT, TGSI_LOG_UNSUPPORTED}, 174 {D3D10_1_SB_OPCODE_LOD, OF_FLOAT, TGSI_LOG_UNSUPPORTED}, 175 {D3D10_1_SB_OPCODE_GATHER4, OF_FLOAT, TGSI_LOG_UNSUPPORTED}, 176 {D3D10_1_SB_OPCODE_SAMPLE_POS, OF_FLOAT, TGSI_LOG_UNSUPPORTED}, 177 {D3D10_1_SB_OPCODE_SAMPLE_INFO, OF_FLOAT, TGSI_LOG_UNSUPPORTED} 178}; 179 180#define SHADER_MAX_TEMPS 4096 181#define SHADER_MAX_INPUTS 32 182#define SHADER_MAX_OUTPUTS 32 183#define SHADER_MAX_CONSTS 4096 184#define SHADER_MAX_RESOURCES PIPE_MAX_SHADER_SAMPLER_VIEWS 185#define SHADER_MAX_SAMPLERS PIPE_MAX_SAMPLERS 186#define SHADER_MAX_INDEXABLE_TEMPS 4096 187 188struct Shader_call { 189 unsigned d3d_label; 190 unsigned tgsi_label_token; 191}; 192 193struct Shader_label { 194 unsigned d3d_label; 195 unsigned tgsi_insn_no; 196}; 197 198struct Shader_resource { 199 uint target; /* TGSI_TEXTURE_x */ 200}; 201 202struct Shader_xlate { 203 struct ureg_program *ureg; 204 205 uint vertices_in; 206 uint declared_temps; 207 208 struct ureg_dst temps[SHADER_MAX_TEMPS]; 209 struct ureg_dst output_depth; 210 struct Shader_resource resources[SHADER_MAX_RESOURCES]; 211 struct ureg_src sv[SHADER_MAX_RESOURCES]; 212 struct ureg_src samplers[SHADER_MAX_SAMPLERS]; 213 struct ureg_src imms; 214 struct ureg_src prim_id; 215 216 uint temp_offset; 217 uint indexable_temp_offsets[SHADER_MAX_INDEXABLE_TEMPS]; 218 219 struct { 220 boolean declared; 221 uint writemask; 222 uint siv_name; 223 boolean overloaded; 224 struct ureg_src reg; 225 } inputs[SHADER_MAX_INPUTS]; 226 227 struct { 228 struct ureg_dst reg[4]; 229 } outputs[SHADER_MAX_OUTPUTS]; 230 231 struct { 232 uint d3d; 233 uint tgsi; 234 } clip_distance_mapping[2], cull_distance_mapping[2]; 235 uint num_clip_distances_declared; 236 uint num_cull_distances_declared; 237 238 struct Shader_call *calls; 239 uint num_calls; 240 uint max_calls; 241 struct Shader_label *labels; 242 uint num_labels; 243 uint max_labels; 244}; 245 246static uint 247translate_interpolation(D3D10_SB_INTERPOLATION_MODE interpolation) 248{ 249 switch (interpolation) { 250 case D3D10_SB_INTERPOLATION_UNDEFINED: 251 assert(0); 252 return TGSI_INTERPOLATE_LINEAR; 253 254 case D3D10_SB_INTERPOLATION_CONSTANT: 255 return TGSI_INTERPOLATE_CONSTANT; 256 case D3D10_SB_INTERPOLATION_LINEAR: 257 return TGSI_INTERPOLATE_PERSPECTIVE; 258 case D3D10_SB_INTERPOLATION_LINEAR_NOPERSPECTIVE: 259 return TGSI_INTERPOLATE_LINEAR; 260 261 case D3D10_SB_INTERPOLATION_LINEAR_CENTROID: 262 case D3D10_SB_INTERPOLATION_LINEAR_SAMPLE: // DX10.1 263 LOG_UNSUPPORTED(TRUE); 264 return TGSI_INTERPOLATE_PERSPECTIVE; 265 266 case D3D10_SB_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID: 267 case D3D10_SB_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE: // DX10.1 268 LOG_UNSUPPORTED(TRUE); 269 return TGSI_INTERPOLATE_LINEAR; 270 } 271 272 assert(0); 273 return TGSI_INTERPOLATE_LINEAR; 274} 275 276static uint 277translate_system_name(D3D10_SB_NAME name) 278{ 279 switch (name) { 280 case D3D10_SB_NAME_UNDEFINED: 281 assert(0); /* should not happen */ 282 return TGSI_SEMANTIC_GENERIC; 283 case D3D10_SB_NAME_POSITION: 284 return TGSI_SEMANTIC_POSITION; 285 case D3D10_SB_NAME_CLIP_DISTANCE: 286 case D3D10_SB_NAME_CULL_DISTANCE: 287 return TGSI_SEMANTIC_CLIPDIST; 288 case D3D10_SB_NAME_PRIMITIVE_ID: 289 return TGSI_SEMANTIC_PRIMID; 290 case D3D10_SB_NAME_INSTANCE_ID: 291 return TGSI_SEMANTIC_INSTANCEID; 292 case D3D10_SB_NAME_VERTEX_ID: 293 return TGSI_SEMANTIC_VERTEXID_NOBASE; 294 case D3D10_SB_NAME_VIEWPORT_ARRAY_INDEX: 295 return TGSI_SEMANTIC_VIEWPORT_INDEX; 296 case D3D10_SB_NAME_RENDER_TARGET_ARRAY_INDEX: 297 return TGSI_SEMANTIC_LAYER; 298 case D3D10_SB_NAME_IS_FRONT_FACE: 299 return TGSI_SEMANTIC_FACE; 300 case D3D10_SB_NAME_SAMPLE_INDEX: 301 LOG_UNSUPPORTED(TRUE); 302 return TGSI_SEMANTIC_GENERIC; 303 } 304 305 assert(0); 306 return TGSI_SEMANTIC_GENERIC; 307} 308 309static uint 310translate_semantic_index(struct Shader_xlate *sx, 311 D3D10_SB_NAME name, 312 const struct Shader_dst_operand *operand) 313{ 314 unsigned idx; 315 switch (name) { 316 case D3D10_SB_NAME_CLIP_DISTANCE: 317 case D3D10_SB_NAME_CULL_DISTANCE: 318 if (sx->clip_distance_mapping[0].d3d == operand->base.index[0].imm) { 319 idx = sx->clip_distance_mapping[0].tgsi; 320 } else { 321 assert(sx->clip_distance_mapping[1].d3d == operand->base.index[0].imm); 322 idx = sx->clip_distance_mapping[1].tgsi; 323 } 324 break; 325/* case D3D10_SB_NAME_CULL_DISTANCE: 326 if (sx->cull_distance_mapping[0].d3d == operand->base.index[0].imm) { 327 idx = sx->cull_distance_mapping[0].tgsi; 328 } else { 329 assert(sx->cull_distance_mapping[1].d3d == operand->base.index[0].imm); 330 idx = sx->cull_distance_mapping[1].tgsi; 331 } 332 break;*/ 333 default: 334 idx = 0; 335 } 336 return idx; 337} 338 339static enum tgsi_return_type 340trans_dcl_ret_type(D3D10_SB_RESOURCE_RETURN_TYPE d3drettype) { 341 switch (d3drettype) { 342 case D3D10_SB_RETURN_TYPE_UNORM: 343 return TGSI_RETURN_TYPE_UNORM; 344 case D3D10_SB_RETURN_TYPE_SNORM: 345 return TGSI_RETURN_TYPE_SNORM; 346 case D3D10_SB_RETURN_TYPE_SINT: 347 return TGSI_RETURN_TYPE_SINT; 348 case D3D10_SB_RETURN_TYPE_UINT: 349 return TGSI_RETURN_TYPE_UINT; 350 case D3D10_SB_RETURN_TYPE_FLOAT: 351 return TGSI_RETURN_TYPE_FLOAT; 352 case D3D10_SB_RETURN_TYPE_MIXED: 353 default: 354 LOG_UNSUPPORTED(TRUE); 355 return TGSI_RETURN_TYPE_FLOAT; 356 } 357} 358 359static void 360declare_vertices_in(struct Shader_xlate *sx, 361 unsigned in) 362{ 363 /* Make sure vertices_in is consistent with input primitive 364 * and other input declarations. 365 */ 366 if (sx->vertices_in) { 367 assert(sx->vertices_in == in); 368 } else { 369 sx->vertices_in = in; 370 } 371} 372 373struct swizzle_mapping { 374 unsigned x; 375 unsigned y; 376 unsigned z; 377 unsigned w; 378}; 379 380/* mapping of writmask to swizzles */ 381static const struct swizzle_mapping writemask_to_swizzle[] = { 382 { TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X }, //TGSI_WRITEMASK_NONE 383 { TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X }, //TGSI_WRITEMASK_X 384 { TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y }, //TGSI_WRITEMASK_Y 385 { TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y }, //TGSI_WRITEMASK_XY 386 { TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z }, //TGSI_WRITEMASK_Z 387 { TGSI_SWIZZLE_X, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X, TGSI_SWIZZLE_Z }, //TGSI_WRITEMASK_XZ 388 { TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z }, //TGSI_WRITEMASK_YZ 389 { TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X }, //TGSI_WRITEMASK_XYZ 390 { TGSI_SWIZZLE_W, TGSI_SWIZZLE_W, TGSI_SWIZZLE_W, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_W 391 { TGSI_SWIZZLE_X, TGSI_SWIZZLE_W, TGSI_SWIZZLE_X, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_XW 392 { TGSI_SWIZZLE_Y, TGSI_SWIZZLE_W, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_YW 393 { TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_W, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_XYW 394 { TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_ZW 395 { TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_XZW 396 { TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_YZW 397 { TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_XYZW 398}; 399 400static struct ureg_src 401swizzle_reg(struct ureg_src src, uint writemask, 402 unsigned siv_name) 403{ 404 switch (siv_name) { 405 case D3D10_SB_NAME_PRIMITIVE_ID: 406 case D3D10_SB_NAME_INSTANCE_ID: 407 case D3D10_SB_NAME_VERTEX_ID: 408 case D3D10_SB_NAME_VIEWPORT_ARRAY_INDEX: 409 case D3D10_SB_NAME_RENDER_TARGET_ARRAY_INDEX: 410 case D3D10_SB_NAME_IS_FRONT_FACE: 411 return ureg_scalar(src, TGSI_SWIZZLE_X); 412 default: { 413 const struct swizzle_mapping *swizzle = 414 &writemask_to_swizzle[writemask]; 415 return ureg_swizzle(src, swizzle->x, swizzle->y, 416 swizzle->z, swizzle->w); 417 } 418 } 419} 420 421static void 422dcl_base_output(struct Shader_xlate *sx, 423 struct ureg_program *ureg, 424 struct ureg_dst reg, 425 const struct Shader_dst_operand *operand) 426{ 427 unsigned writemask = 428 operand->mask >> D3D10_SB_OPERAND_4_COMPONENT_MASK_SHIFT; 429 unsigned idx = operand->base.index[0].imm; 430 unsigned i; 431 432 if (!writemask) { 433 sx->outputs[idx].reg[0] = reg; 434 sx->outputs[idx].reg[1] = reg; 435 sx->outputs[idx].reg[2] = reg; 436 sx->outputs[idx].reg[3] = reg; 437 return; 438 } 439 440 for (i = 0; i < 4; ++i) { 441 unsigned mask = 1 << i; 442 if ((writemask & mask)) { 443 sx->outputs[idx].reg[i] = reg; 444 } 445 } 446} 447 448static void 449dcl_base_input(struct Shader_xlate *sx, 450 struct ureg_program *ureg, 451 const struct Shader_dst_operand *operand, 452 struct ureg_src dcl_reg, 453 uint index, 454 uint siv_name) 455{ 456 unsigned writemask = 457 operand->mask >> D3D10_SB_OPERAND_4_COMPONENT_MASK_SHIFT; 458 459 if (sx->inputs[index].declared && !sx->inputs[index].overloaded) { 460 struct ureg_dst temp = ureg_DECL_temporary(sx->ureg); 461 462 ureg_MOV(ureg, 463 ureg_writemask(temp, sx->inputs[index].writemask), 464 swizzle_reg(sx->inputs[index].reg, sx->inputs[index].writemask, 465 sx->inputs[index].siv_name)); 466 ureg_MOV(ureg, ureg_writemask(temp, writemask), 467 swizzle_reg(dcl_reg, writemask, siv_name)); 468 sx->inputs[index].reg = ureg_src(temp); 469 sx->inputs[index].overloaded = TRUE; 470 sx->inputs[index].writemask |= writemask; 471 } else if (sx->inputs[index].overloaded) { 472 struct ureg_dst temp = ureg_dst(sx->inputs[index].reg); 473 ureg_MOV(ureg, ureg_writemask(temp, writemask), 474 swizzle_reg(dcl_reg, writemask, siv_name)); 475 sx->inputs[index].writemask |= writemask; 476 } else { 477 assert(!sx->inputs[index].declared); 478 479 sx->inputs[index].reg = dcl_reg; 480 sx->inputs[index].declared = TRUE; 481 sx->inputs[index].writemask = writemask; 482 sx->inputs[index].siv_name = siv_name; 483 } 484} 485 486static void 487dcl_vs_input(struct Shader_xlate *sx, 488 struct ureg_program *ureg, 489 const struct Shader_dst_operand *dst) 490{ 491 struct ureg_src reg; 492 assert(dst->base.index_dim == 1); 493 assert(dst->base.index[0].imm < SHADER_MAX_INPUTS); 494 495 reg = ureg_DECL_vs_input(ureg, dst->base.index[0].imm); 496 497 dcl_base_input(sx, ureg, dst, reg, dst->base.index[0].imm, 498 D3D10_SB_NAME_UNDEFINED); 499} 500 501static void 502dcl_gs_input(struct Shader_xlate *sx, 503 struct ureg_program *ureg, 504 const struct Shader_dst_operand *dst) 505{ 506 if (dst->base.index_dim == 2) { 507 assert(dst->base.index[1].imm < SHADER_MAX_INPUTS); 508 509 declare_vertices_in(sx, dst->base.index[0].imm); 510 511 /* XXX: Implement declaration masks in gallium. 512 */ 513 if (!sx->inputs[dst->base.index[1].imm].reg.File) { 514 struct ureg_src reg = 515 ureg_DECL_input(ureg, 516 TGSI_SEMANTIC_GENERIC, 517 dst->base.index[1].imm, 518 0, 1); 519 dcl_base_input(sx, ureg, dst, reg, dst->base.index[1].imm, 520 D3D10_SB_NAME_UNDEFINED); 521 } 522 } else { 523 assert(dst->base.type == D3D10_SB_OPERAND_TYPE_INPUT_PRIMITIVEID); 524 assert(dst->base.index_dim == 0); 525 526 sx->prim_id = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_PRIMID, 0); 527 } 528} 529 530static void 531dcl_sgv_input(struct Shader_xlate *sx, 532 struct ureg_program *ureg, 533 const struct Shader_dst_operand *dst, 534 uint dcl_siv_name) 535{ 536 struct ureg_src reg; 537 assert(dst->base.index_dim == 1); 538 assert(dst->base.index[0].imm < SHADER_MAX_INPUTS); 539 540 reg = ureg_DECL_system_value(ureg, translate_system_name(dcl_siv_name), 0); 541 542 dcl_base_input(sx, ureg, dst, reg, dst->base.index[0].imm, 543 dcl_siv_name); 544} 545 546static void 547dcl_siv_input(struct Shader_xlate *sx, 548 struct ureg_program *ureg, 549 const struct Shader_dst_operand *dst, 550 uint dcl_siv_name) 551{ 552 struct ureg_src reg; 553 assert(dst->base.index_dim == 2); 554 assert(dst->base.index[1].imm < SHADER_MAX_INPUTS); 555 556 declare_vertices_in(sx, dst->base.index[0].imm); 557 558 reg = ureg_DECL_input(ureg, 559 translate_system_name(dcl_siv_name), 0, 560 0, 1); 561 562 dcl_base_input(sx, ureg, dst, reg, dst->base.index[1].imm, 563 dcl_siv_name); 564} 565 566static void 567dcl_ps_input(struct Shader_xlate *sx, 568 struct ureg_program *ureg, 569 const struct Shader_dst_operand *dst, 570 uint dcl_in_ps_interp) 571{ 572 struct ureg_src reg; 573 assert(dst->base.index_dim == 1); 574 assert(dst->base.index[0].imm < SHADER_MAX_INPUTS); 575 576 reg = ureg_DECL_fs_input(ureg, 577 TGSI_SEMANTIC_GENERIC, 578 dst->base.index[0].imm, 579 translate_interpolation(dcl_in_ps_interp)); 580 581 dcl_base_input(sx, ureg, dst, reg, dst->base.index[0].imm, 582 D3D10_SB_NAME_UNDEFINED); 583} 584 585static void 586dcl_ps_sgv_input(struct Shader_xlate *sx, 587 struct ureg_program *ureg, 588 const struct Shader_dst_operand *dst, 589 uint dcl_siv_name) 590{ 591 struct ureg_src reg; 592 assert(dst->base.index_dim == 1); 593 assert(dst->base.index[0].imm < SHADER_MAX_INPUTS); 594 595 if (dcl_siv_name == D3D10_SB_NAME_POSITION) { 596 ureg_property(ureg, 597 TGSI_PROPERTY_FS_COORD_ORIGIN, 598 TGSI_FS_COORD_ORIGIN_UPPER_LEFT); 599 ureg_property(ureg, 600 TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, 601 TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER); 602 } 603 604 reg = ureg_DECL_fs_input(ureg, 605 translate_system_name(dcl_siv_name), 606 0, 607 TGSI_INTERPOLATE_CONSTANT); 608 609 if (dcl_siv_name == D3D10_SB_NAME_IS_FRONT_FACE) { 610 /* We need to map gallium's front_face to the one expected 611 * by D3D10 */ 612 struct ureg_dst tmp = ureg_DECL_temporary(ureg); 613 614 tmp = ureg_writemask(tmp, TGSI_WRITEMASK_X); 615 616 ureg_CMP(ureg, tmp, reg, 617 ureg_imm1i(ureg, 0), ureg_imm1i(ureg, -1)); 618 619 reg = ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X); 620 } 621 622 dcl_base_input(sx, ureg, dst, reg, dst->base.index[0].imm, 623 dcl_siv_name); 624} 625 626static void 627dcl_ps_siv_input(struct Shader_xlate *sx, 628 struct ureg_program *ureg, 629 const struct Shader_dst_operand *dst, 630 uint dcl_siv_name, uint dcl_in_ps_interp) 631{ 632 struct ureg_src reg; 633 assert(dst->base.index_dim == 1); 634 assert(dst->base.index[0].imm < SHADER_MAX_INPUTS); 635 636 reg = ureg_DECL_fs_input(ureg, 637 translate_system_name(dcl_siv_name), 638 0, 639 translate_interpolation(dcl_in_ps_interp)); 640 641 if (dcl_siv_name == D3D10_SB_NAME_POSITION) { 642 /* D3D10 expects reciprocal of interpolated 1/w as 4th component, 643 * gallium/GL just interpolated 1/w */ 644 struct ureg_dst tmp = ureg_DECL_temporary(ureg); 645 646 ureg_MOV(ureg, tmp, reg); 647 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), 648 ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_W)); 649 reg = ureg_src(tmp); 650 } 651 652 dcl_base_input(sx, ureg, dst, reg, dst->base.index[0].imm, 653 dcl_siv_name); 654} 655 656static struct ureg_src 657translate_relative_operand(struct Shader_xlate *sx, 658 const struct Shader_relative_operand *operand) 659{ 660 struct ureg_src reg; 661 662 switch (operand->type) { 663 case D3D10_SB_OPERAND_TYPE_TEMP: 664 assert(operand->index[0].imm < SHADER_MAX_TEMPS); 665 666 reg = ureg_src(sx->temps[sx->temp_offset + operand->index[0].imm]); 667 break; 668 669 case D3D10_SB_OPERAND_TYPE_INPUT_PRIMITIVEID: 670 reg = sx->prim_id; 671 break; 672 673 case D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP: 674 assert(operand->index[1].imm < SHADER_MAX_TEMPS); 675 676 reg = ureg_src(sx->temps[sx->indexable_temp_offsets[operand->index[0].imm] + 677 operand->index[1].imm]); 678 break; 679 680 case D3D10_SB_OPERAND_TYPE_INPUT: 681 case D3D10_SB_OPERAND_TYPE_OUTPUT: 682 case D3D10_SB_OPERAND_TYPE_IMMEDIATE32: 683 case D3D10_SB_OPERAND_TYPE_IMMEDIATE64: 684 case D3D10_SB_OPERAND_TYPE_SAMPLER: 685 case D3D10_SB_OPERAND_TYPE_RESOURCE: 686 case D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER: 687 case D3D10_SB_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER: 688 case D3D10_SB_OPERAND_TYPE_LABEL: 689 case D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH: 690 case D3D10_SB_OPERAND_TYPE_NULL: 691 case D3D10_SB_OPERAND_TYPE_RASTERIZER: 692 case D3D10_SB_OPERAND_TYPE_OUTPUT_COVERAGE_MASK: 693 LOG_UNSUPPORTED(TRUE); 694 reg = ureg_src(ureg_DECL_temporary(sx->ureg)); 695 break; 696 697 default: 698 assert(0); /* should never happen */ 699 reg = ureg_src(ureg_DECL_temporary(sx->ureg)); 700 } 701 702 reg = ureg_scalar(reg, operand->comp); 703 return reg; 704} 705 706static struct ureg_dst 707translate_operand(struct Shader_xlate *sx, 708 const struct Shader_operand *operand, 709 unsigned writemask) 710{ 711 struct ureg_dst reg; 712 713 switch (operand->type) { 714 case D3D10_SB_OPERAND_TYPE_TEMP: 715 assert(operand->index_dim == 1); 716 assert(operand->index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32); 717 assert(operand->index[0].imm < SHADER_MAX_TEMPS); 718 719 reg = sx->temps[sx->temp_offset + operand->index[0].imm]; 720 break; 721 722 case D3D10_SB_OPERAND_TYPE_OUTPUT: 723 assert(operand->index_dim == 1); 724 assert(operand->index[0].imm < SHADER_MAX_OUTPUTS); 725 726 if (operand->index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32) { 727 if (!writemask) { 728 reg = sx->outputs[operand->index[0].imm].reg[0]; 729 } else { 730 unsigned i; 731 for (i = 0; i < 4; ++i) { 732 unsigned mask = 1 << i; 733 if ((writemask & mask)) { 734 reg = sx->outputs[operand->index[0].imm].reg[i]; 735 break; 736 } 737 } 738 } 739 } else { 740 struct ureg_src addr = 741 translate_relative_operand(sx, &operand->index[0].rel); 742 assert(operand->index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE); 743 reg = ureg_dst_indirect(sx->outputs[operand->index[0].imm].reg[0], addr); 744 } 745 break; 746 747 case D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH: 748 assert(operand->index_dim == 0); 749 750 reg = sx->output_depth; 751 break; 752 753 case D3D10_SB_OPERAND_TYPE_INPUT_PRIMITIVEID: 754 assert(operand->index_dim == 0); 755 756 reg = ureg_dst(sx->prim_id); 757 break; 758 759 case D3D10_SB_OPERAND_TYPE_INPUT: 760 case D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP: 761 case D3D10_SB_OPERAND_TYPE_IMMEDIATE32: 762 case D3D10_SB_OPERAND_TYPE_IMMEDIATE64: 763 case D3D10_SB_OPERAND_TYPE_SAMPLER: 764 case D3D10_SB_OPERAND_TYPE_RESOURCE: 765 case D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER: 766 case D3D10_SB_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER: 767 case D3D10_SB_OPERAND_TYPE_LABEL: 768 case D3D10_SB_OPERAND_TYPE_NULL: 769 case D3D10_SB_OPERAND_TYPE_RASTERIZER: 770 case D3D10_SB_OPERAND_TYPE_OUTPUT_COVERAGE_MASK: 771 /* XXX: Translate more operands types. 772 */ 773 LOG_UNSUPPORTED(TRUE); 774 reg = ureg_DECL_temporary(sx->ureg); 775 } 776 777 return reg; 778} 779 780static struct ureg_src 781translate_indexable_temp(struct Shader_xlate *sx, 782 const struct Shader_operand *operand) 783{ 784 struct ureg_src reg; 785 switch (operand->index[1].index_rep) { 786 case D3D10_SB_OPERAND_INDEX_IMMEDIATE32: 787 reg = ureg_src( 788 sx->temps[sx->indexable_temp_offsets[operand->index[0].imm] + 789 operand->index[1].imm]); 790 break; 791 case D3D10_SB_OPERAND_INDEX_RELATIVE: 792 reg = ureg_src_indirect( 793 ureg_src(sx->temps[ 794 sx->indexable_temp_offsets[operand->index[0].imm]]), 795 translate_relative_operand(sx, 796 &operand->index[1].rel)); 797 break; 798 case D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: 799 reg = ureg_src_indirect( 800 ureg_src(sx->temps[ 801 operand->index[1].imm + 802 sx->indexable_temp_offsets[operand->index[0].imm]]), 803 translate_relative_operand(sx, 804 &operand->index[1].rel)); 805 break; 806 default: 807 /* XXX: Other index representations. 808 */ 809 LOG_UNSUPPORTED(TRUE); 810 reg = ureg_src(ureg_DECL_temporary(sx->ureg)); 811 } 812 return reg; 813} 814 815static struct ureg_dst 816translate_dst_operand(struct Shader_xlate *sx, 817 const struct Shader_dst_operand *operand, 818 boolean saturate) 819{ 820 struct ureg_dst reg; 821 unsigned writemask = 822 operand->mask >> D3D10_SB_OPERAND_4_COMPONENT_MASK_SHIFT; 823 824 assert((D3D10_SB_OPERAND_4_COMPONENT_MASK_SHIFT) == 4); 825 assert((D3D10_SB_OPERAND_4_COMPONENT_MASK_X >> 4) == TGSI_WRITEMASK_X); 826 assert((D3D10_SB_OPERAND_4_COMPONENT_MASK_Y >> 4) == TGSI_WRITEMASK_Y); 827 assert((D3D10_SB_OPERAND_4_COMPONENT_MASK_Z >> 4) == TGSI_WRITEMASK_Z); 828 assert((D3D10_SB_OPERAND_4_COMPONENT_MASK_W >> 4) == TGSI_WRITEMASK_W); 829 830 switch (operand->base.type) { 831 case D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP: 832 assert(operand->base.index_dim == 2); 833 assert(operand->base.index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32); 834 assert(operand->base.index[0].imm < SHADER_MAX_INDEXABLE_TEMPS); 835 836 reg = ureg_dst(translate_indexable_temp(sx, &operand->base)); 837 break; 838 839 default: 840 reg = translate_operand(sx, &operand->base, writemask); 841 } 842 843 /* oDepth often has an empty writemask */ 844 if (operand->base.type != D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH) { 845 reg = ureg_writemask(reg, writemask); 846 } 847 848 if (saturate) { 849 reg = ureg_saturate(reg); 850 } 851 852 return reg; 853} 854 855static struct ureg_src 856translate_src_operand(struct Shader_xlate *sx, 857 const struct Shader_src_operand *operand, 858 const enum dx10_opcode_format format) 859{ 860 struct ureg_src reg; 861 862 switch (operand->base.type) { 863 case D3D10_SB_OPERAND_TYPE_INPUT: 864 if (operand->base.index_dim == 1) { 865 switch (operand->base.index[0].index_rep) { 866 case D3D10_SB_OPERAND_INDEX_IMMEDIATE32: 867 assert(operand->base.index[0].imm < SHADER_MAX_INPUTS); 868 reg = sx->inputs[operand->base.index[0].imm].reg; 869 break; 870 case D3D10_SB_OPERAND_INDEX_RELATIVE: { 871 struct ureg_src tmp = 872 translate_relative_operand(sx, &operand->base.index[0].rel); 873 reg = ureg_src_indirect(sx->inputs[0].reg, tmp); 874 } 875 break; 876 case D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: { 877 struct ureg_src tmp = 878 translate_relative_operand(sx, &operand->base.index[0].rel); 879 reg = ureg_src_indirect(sx->inputs[operand->base.index[0].imm].reg, tmp); 880 } 881 break; 882 default: 883 /* XXX: Other index representations. 884 */ 885 LOG_UNSUPPORTED(TRUE); 886 887 } 888 } else { 889 assert(operand->base.index_dim == 2); 890 assert(operand->base.index[1].imm < SHADER_MAX_INPUTS); 891 892 switch (operand->base.index[1].index_rep) { 893 case D3D10_SB_OPERAND_INDEX_IMMEDIATE32: 894 reg = sx->inputs[operand->base.index[1].imm].reg; 895 break; 896 case D3D10_SB_OPERAND_INDEX_RELATIVE: { 897 struct ureg_src tmp = 898 translate_relative_operand(sx, &operand->base.index[1].rel); 899 reg = ureg_src_indirect(sx->inputs[0].reg, tmp); 900 } 901 break; 902 case D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: { 903 struct ureg_src tmp = 904 translate_relative_operand(sx, &operand->base.index[1].rel); 905 reg = ureg_src_indirect(sx->inputs[operand->base.index[1].imm].reg, tmp); 906 } 907 break; 908 default: 909 /* XXX: Other index representations. 910 */ 911 LOG_UNSUPPORTED(TRUE); 912 } 913 914 switch (operand->base.index[0].index_rep) { 915 case D3D10_SB_OPERAND_INDEX_IMMEDIATE32: 916 reg = ureg_src_dimension(reg, operand->base.index[0].imm); 917 break; 918 case D3D10_SB_OPERAND_INDEX_RELATIVE:{ 919 struct ureg_src tmp = 920 translate_relative_operand(sx, &operand->base.index[0].rel); 921 reg = ureg_src_dimension_indirect(reg, tmp, 0); 922 } 923 break; 924 case D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: { 925 struct ureg_src tmp = 926 translate_relative_operand(sx, &operand->base.index[0].rel); 927 reg = ureg_src_dimension_indirect(reg, tmp, operand->base.index[0].imm); 928 } 929 break; 930 default: 931 /* XXX: Other index representations. 932 */ 933 LOG_UNSUPPORTED(TRUE); 934 } 935 } 936 break; 937 938 case D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP: 939 assert(operand->base.index_dim == 2); 940 assert(operand->base.index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32); 941 assert(operand->base.index[0].imm < SHADER_MAX_INDEXABLE_TEMPS); 942 943 reg = translate_indexable_temp(sx, &operand->base); 944 break; 945 946 case D3D10_SB_OPERAND_TYPE_IMMEDIATE32: 947 switch (format) { 948 case OF_FLOAT: 949 reg = ureg_imm4f(sx->ureg, 950 operand->imm[0].f32, 951 operand->imm[1].f32, 952 operand->imm[2].f32, 953 operand->imm[3].f32); 954 break; 955 case OF_INT: 956 reg = ureg_imm4i(sx->ureg, 957 operand->imm[0].i32, 958 operand->imm[1].i32, 959 operand->imm[2].i32, 960 operand->imm[3].i32); 961 break; 962 case OF_UINT: 963 reg = ureg_imm4u(sx->ureg, 964 operand->imm[0].u32, 965 operand->imm[1].u32, 966 operand->imm[2].u32, 967 operand->imm[3].u32); 968 break; 969 default: 970 assert(0); 971 reg = ureg_src(ureg_DECL_temporary(sx->ureg)); 972 } 973 break; 974 975 case D3D10_SB_OPERAND_TYPE_SAMPLER: 976 assert(operand->base.index_dim == 1); 977 assert(operand->base.index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32); 978 assert(operand->base.index[0].imm < SHADER_MAX_SAMPLERS); 979 980 reg = sx->samplers[operand->base.index[0].imm]; 981 break; 982 983 case D3D10_SB_OPERAND_TYPE_RESOURCE: 984 assert(operand->base.index_dim == 1); 985 assert(operand->base.index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32); 986 assert(operand->base.index[0].imm < SHADER_MAX_RESOURCES); 987 988 reg = sx->sv[operand->base.index[0].imm]; 989 break; 990 991 case D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER: 992 assert(operand->base.index_dim == 2); 993 994 assert(operand->base.index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32); 995 assert(operand->base.index[0].imm < PIPE_MAX_CONSTANT_BUFFERS); 996 997 switch (operand->base.index[1].index_rep) { 998 case D3D10_SB_OPERAND_INDEX_IMMEDIATE32: 999 assert(operand->base.index[1].imm < SHADER_MAX_CONSTS); 1000 1001 reg = ureg_src_register(TGSI_FILE_CONSTANT, operand->base.index[1].imm); 1002 reg = ureg_src_dimension(reg, operand->base.index[0].imm); 1003 break; 1004 case D3D10_SB_OPERAND_INDEX_RELATIVE: 1005 case D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: 1006 reg = ureg_src_register(TGSI_FILE_CONSTANT, operand->base.index[1].imm); 1007 reg = ureg_src_indirect( 1008 reg, 1009 translate_relative_operand(sx, &operand->base.index[1].rel)); 1010 reg = ureg_src_dimension(reg, operand->base.index[0].imm); 1011 break; 1012 default: 1013 /* XXX: Other index representations. 1014 */ 1015 LOG_UNSUPPORTED(TRUE); 1016 } 1017 1018 break; 1019 1020 case D3D10_SB_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER: 1021 assert(operand->base.index_dim == 1); 1022 1023 switch (operand->base.index[0].index_rep) { 1024 case D3D10_SB_OPERAND_INDEX_IMMEDIATE32: 1025 reg = sx->imms; 1026 reg.Index += operand->base.index[0].imm; 1027 break; 1028 case D3D10_SB_OPERAND_INDEX_RELATIVE: 1029 case D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: 1030 reg = sx->imms; 1031 reg.Index += operand->base.index[0].imm; 1032 reg = ureg_src_indirect( 1033 sx->imms, 1034 translate_relative_operand(sx, &operand->base.index[0].rel)); 1035 break; 1036 default: 1037 /* XXX: Other index representations. 1038 */ 1039 LOG_UNSUPPORTED(TRUE); 1040 } 1041 break; 1042 1043 case D3D10_SB_OPERAND_TYPE_INPUT_PRIMITIVEID: 1044 reg = sx->prim_id; 1045 break; 1046 1047 default: 1048 reg = ureg_src(translate_operand(sx, &operand->base, 0)); 1049 } 1050 1051 reg = ureg_swizzle(reg, 1052 operand->swizzle[0], 1053 operand->swizzle[1], 1054 operand->swizzle[2], 1055 operand->swizzle[3]); 1056 1057 switch (operand->modifier) { 1058 case D3D10_SB_OPERAND_MODIFIER_NONE: 1059 break; 1060 case D3D10_SB_OPERAND_MODIFIER_NEG: 1061 reg = ureg_negate(reg); 1062 break; 1063 case D3D10_SB_OPERAND_MODIFIER_ABS: 1064 reg = ureg_abs(reg); 1065 break; 1066 case D3D10_SB_OPERAND_MODIFIER_ABSNEG: 1067 reg = ureg_negate(ureg_abs(reg)); 1068 break; 1069 default: 1070 assert(0); 1071 } 1072 1073 return reg; 1074} 1075 1076static uint 1077translate_resource_dimension(D3D10_SB_RESOURCE_DIMENSION dim) 1078{ 1079 switch (dim) { 1080 case D3D10_SB_RESOURCE_DIMENSION_UNKNOWN: 1081 return TGSI_TEXTURE_UNKNOWN; 1082 case D3D10_SB_RESOURCE_DIMENSION_BUFFER: 1083 return TGSI_TEXTURE_BUFFER; 1084 case D3D10_SB_RESOURCE_DIMENSION_TEXTURE1D: 1085 return TGSI_TEXTURE_1D; 1086 case D3D10_SB_RESOURCE_DIMENSION_TEXTURE2D: 1087 return TGSI_TEXTURE_2D; 1088 case D3D10_SB_RESOURCE_DIMENSION_TEXTURE2DMS: 1089 return TGSI_TEXTURE_2D_MSAA; 1090 case D3D10_SB_RESOURCE_DIMENSION_TEXTURE3D: 1091 return TGSI_TEXTURE_3D; 1092 case D3D10_SB_RESOURCE_DIMENSION_TEXTURECUBE: 1093 return TGSI_TEXTURE_CUBE; 1094 case D3D10_SB_RESOURCE_DIMENSION_TEXTURE1DARRAY: 1095 return TGSI_TEXTURE_1D_ARRAY; 1096 case D3D10_SB_RESOURCE_DIMENSION_TEXTURE2DARRAY: 1097 return TGSI_TEXTURE_2D_ARRAY; 1098 case D3D10_SB_RESOURCE_DIMENSION_TEXTURE2DMSARRAY: 1099 return TGSI_TEXTURE_2D_ARRAY_MSAA; 1100 case D3D10_SB_RESOURCE_DIMENSION_TEXTURECUBEARRAY: 1101 return TGSI_TEXTURE_CUBE_ARRAY; 1102 default: 1103 assert(0); 1104 return TGSI_TEXTURE_UNKNOWN; 1105 } 1106} 1107 1108static uint 1109texture_dim_from_tgsi_target(unsigned tgsi_target) 1110{ 1111 switch (tgsi_target) { 1112 case TGSI_TEXTURE_BUFFER: 1113 case TGSI_TEXTURE_1D: 1114 case TGSI_TEXTURE_1D_ARRAY: 1115 return 1; 1116 case TGSI_TEXTURE_2D: 1117 case TGSI_TEXTURE_2D_MSAA: 1118 case TGSI_TEXTURE_CUBE: 1119 case TGSI_TEXTURE_2D_ARRAY: 1120 case TGSI_TEXTURE_2D_ARRAY_MSAA: 1121 return 2; 1122 case TGSI_TEXTURE_3D: 1123 return 3; 1124 case TGSI_TEXTURE_UNKNOWN: 1125 default: 1126 assert(0); 1127 return 1; 1128 } 1129} 1130 1131static boolean 1132operand_is_scalar(const struct Shader_src_operand *operand) 1133{ 1134 return operand->swizzle[0] == operand->swizzle[1] && 1135 operand->swizzle[1] == operand->swizzle[2] && 1136 operand->swizzle[2] == operand->swizzle[3]; 1137} 1138 1139static void 1140Shader_add_call(struct Shader_xlate *sx, 1141 unsigned d3d_label, 1142 unsigned tgsi_label_token) 1143{ 1144 ASSERT(sx->num_calls < sx->max_calls); 1145 1146 sx->calls[sx->num_calls].d3d_label = d3d_label; 1147 sx->calls[sx->num_calls].tgsi_label_token = tgsi_label_token; 1148 sx->num_calls++; 1149} 1150 1151static void 1152Shader_add_label(struct Shader_xlate *sx, 1153 unsigned d3d_label, 1154 unsigned tgsi_insn_no) 1155{ 1156 ASSERT(sx->num_labels < sx->max_labels); 1157 1158 sx->labels[sx->num_labels].d3d_label = d3d_label; 1159 sx->labels[sx->num_labels].tgsi_insn_no = tgsi_insn_no; 1160 sx->num_labels++; 1161} 1162 1163 1164static void 1165sample_ureg_emit(struct ureg_program *ureg, 1166 unsigned tgsi_opcode, 1167 unsigned num_src, 1168 struct Shader_opcode *opcode, 1169 struct ureg_dst dst, 1170 struct ureg_src *src) 1171{ 1172 unsigned num_offsets = 0; 1173 struct tgsi_texture_offset texoffsets; 1174 1175 memset(&texoffsets, 0, sizeof texoffsets); 1176 1177 if (opcode->imm_texel_offset.u || 1178 opcode->imm_texel_offset.v || 1179 opcode->imm_texel_offset.w) { 1180 struct ureg_src offsetreg; 1181 num_offsets = 1; 1182 /* don't actually always need all 3 values */ 1183 offsetreg = ureg_imm3i(ureg, 1184 opcode->imm_texel_offset.u, 1185 opcode->imm_texel_offset.v, 1186 opcode->imm_texel_offset.w); 1187 texoffsets.File = offsetreg.File; 1188 texoffsets.Index = offsetreg.Index; 1189 texoffsets.SwizzleX = offsetreg.SwizzleX; 1190 texoffsets.SwizzleY = offsetreg.SwizzleY; 1191 texoffsets.SwizzleZ = offsetreg.SwizzleZ; 1192 } 1193 1194 ureg_tex_insn(ureg, 1195 tgsi_opcode, 1196 &dst, 1, 1197 TGSI_TEXTURE_UNKNOWN, 1198 TGSI_RETURN_TYPE_UNKNOWN, 1199 &texoffsets, num_offsets, 1200 src, num_src); 1201} 1202 1203typedef void (*unary_ureg_func)(struct ureg_program *ureg, struct ureg_dst dst, 1204 struct ureg_src src); 1205static void 1206expand_unary_to_scalarf(struct ureg_program *ureg, unary_ureg_func func, 1207 struct Shader_xlate *sx, struct Shader_opcode *opcode) 1208{ 1209 struct ureg_dst tmp = ureg_DECL_temporary(ureg); 1210 struct ureg_dst dst = translate_dst_operand(sx, &opcode->dst[0], 1211 opcode->saturate); 1212 struct ureg_src src = translate_src_operand(sx, &opcode->src[0], OF_FLOAT); 1213 struct ureg_dst scalar_dst; 1214 ureg_MOV(ureg, tmp, src); 1215 src = ureg_src(tmp); 1216 1217 scalar_dst = ureg_writemask(dst, TGSI_WRITEMASK_X); 1218 if (scalar_dst.WriteMask != TGSI_WRITEMASK_NONE) { 1219 func(ureg, scalar_dst, 1220 ureg_scalar(src, TGSI_SWIZZLE_X)); 1221 } 1222 scalar_dst = ureg_writemask(dst, TGSI_WRITEMASK_Y); 1223 if (scalar_dst.WriteMask != TGSI_WRITEMASK_NONE) { 1224 func(ureg, scalar_dst, 1225 ureg_scalar(src, TGSI_SWIZZLE_Y)); 1226 } 1227 scalar_dst = ureg_writemask(dst, TGSI_WRITEMASK_Z); 1228 if (scalar_dst.WriteMask != TGSI_WRITEMASK_NONE) { 1229 func(ureg, scalar_dst, 1230 ureg_scalar(src, TGSI_SWIZZLE_Z)); 1231 } 1232 scalar_dst = ureg_writemask(dst, TGSI_WRITEMASK_W); 1233 if (scalar_dst.WriteMask != TGSI_WRITEMASK_NONE) { 1234 func(ureg, scalar_dst, 1235 ureg_scalar(src, TGSI_SWIZZLE_W)); 1236 } 1237 ureg_release_temporary(ureg, tmp); 1238} 1239 1240const struct tgsi_token * 1241Shader_tgsi_translate(const unsigned *code, 1242 unsigned *output_mapping) 1243{ 1244 struct Shader_xlate sx; 1245 struct Shader_parser parser; 1246 struct ureg_program *ureg = NULL; 1247 struct Shader_opcode opcode; 1248 const struct tgsi_token *tokens = NULL; 1249 uint nr_tokens; 1250 boolean shader_dumped = FALSE; 1251 boolean inside_sub = FALSE; 1252 uint i, j; 1253 1254 memset(&sx, 0, sizeof sx); 1255 1256 Shader_parse_init(&parser, code); 1257 1258 if (st_debug & ST_DEBUG_TGSI) { 1259 dx10_shader_dump_tokens(code); 1260 shader_dumped = TRUE; 1261 } 1262 1263 sx.max_calls = 64; 1264 sx.calls = (struct Shader_call *)MALLOC(sx.max_calls * 1265 sizeof(struct Shader_call)); 1266 sx.num_calls = 0; 1267 1268 sx.max_labels = 64; 1269 sx.labels = (struct Shader_label *)MALLOC(sx.max_labels * 1270 sizeof(struct Shader_call)); 1271 sx.num_labels = 0; 1272 1273 1274 1275 /* Header. */ 1276 switch (parser.header.type) { 1277 case D3D10_SB_PIXEL_SHADER: 1278 ureg = ureg_create(PIPE_SHADER_FRAGMENT); 1279 break; 1280 case D3D10_SB_VERTEX_SHADER: 1281 ureg = ureg_create(PIPE_SHADER_VERTEX); 1282 break; 1283 case D3D10_SB_GEOMETRY_SHADER: 1284 ureg = ureg_create(PIPE_SHADER_GEOMETRY); 1285 break; 1286 } 1287 1288 assert(ureg); 1289 sx.ureg = ureg; 1290 1291 while (Shader_parse_opcode(&parser, &opcode)) { 1292 const struct dx10_opcode_xlate *ox; 1293 1294 assert(opcode.type < D3D10_SB_NUM_OPCODES); 1295 ox = &opcode_xlate[opcode.type]; 1296 1297 switch (opcode.type) { 1298 case D3D10_SB_OPCODE_EXP: 1299 expand_unary_to_scalarf(ureg, ureg_EX2, &sx, &opcode); 1300 break; 1301 case D3D10_SB_OPCODE_SQRT: 1302 expand_unary_to_scalarf(ureg, ureg_SQRT, &sx, &opcode); 1303 break; 1304 case D3D10_SB_OPCODE_RSQ: 1305 expand_unary_to_scalarf(ureg, ureg_RSQ, &sx, &opcode); 1306 break; 1307 case D3D10_SB_OPCODE_LOG: 1308 expand_unary_to_scalarf(ureg, ureg_LG2, &sx, &opcode); 1309 break; 1310 case D3D10_SB_OPCODE_IMUL: 1311 if (opcode.dst[0].base.type != D3D10_SB_OPERAND_TYPE_NULL) { 1312 ureg_IMUL_HI(ureg, 1313 translate_dst_operand(&sx, &opcode.dst[0], opcode.saturate), 1314 translate_src_operand(&sx, &opcode.src[0], OF_INT), 1315 translate_src_operand(&sx, &opcode.src[1], OF_INT)); 1316 } 1317 1318 if (opcode.dst[1].base.type != D3D10_SB_OPERAND_TYPE_NULL) { 1319 ureg_UMUL(ureg, 1320 translate_dst_operand(&sx, &opcode.dst[1], opcode.saturate), 1321 translate_src_operand(&sx, &opcode.src[0], OF_INT), 1322 translate_src_operand(&sx, &opcode.src[1], OF_INT)); 1323 } 1324 1325 break; 1326 1327 case D3D10_SB_OPCODE_FTOI: { 1328 /* XXX: tgsi (and just about everybody else, c, opencl, glsl) has 1329 * out-of-range (and NaN) values undefined for f2i/f2u, but d3d10 1330 * requires clamping to min and max representable value (as well as 0 1331 * for NaNs) (this applies to both ftoi and ftou). At least the online 1332 * docs state that - this is consistent with generic d3d10 conversion 1333 * rules. 1334 * For FTOI, we cheat a bit here - in particular depending on noone 1335 * caring about NaNs, and depending on the (undefined!) behavior of 1336 * F2I returning 0x80000000 for too negative values (which works with 1337 * x86 sse). Hence only need to clamp too positive values. 1338 * Note that it is impossible to clamp using a float, since 2^31 - 1 1339 * is not exactly representable with a float. 1340 */ 1341 struct ureg_dst too_large = ureg_DECL_temporary(ureg); 1342 struct ureg_dst tmp = ureg_DECL_temporary(ureg); 1343 ureg_FSGE(ureg, too_large, 1344 translate_src_operand(&sx, &opcode.src[0], OF_FLOAT), 1345 ureg_imm1f(ureg, 2147483648.0f)); 1346 ureg_F2I(ureg, tmp, 1347 translate_src_operand(&sx, &opcode.src[0], OF_FLOAT)); 1348 ureg_UCMP(ureg, 1349 translate_dst_operand(&sx, &opcode.dst[0], opcode.saturate), 1350 ureg_src(too_large), 1351 ureg_imm1i(ureg, 0x7fffffff), 1352 ureg_src(tmp)); 1353 ureg_release_temporary(ureg, too_large); 1354 ureg_release_temporary(ureg, tmp); 1355 } 1356 break; 1357 1358 case D3D10_SB_OPCODE_FTOU: { 1359 /* For ftou, we need to do both clamps, which as a bonus also 1360 * gets us correct NaN behavior. 1361 * Note that it is impossible to clamp using a float against the upper 1362 * limit, since 2^32 - 1 is not exactly representable with a float, 1363 * but the clamp against 0.0 certainly works just fine. 1364 */ 1365 struct ureg_dst too_large = ureg_DECL_temporary(ureg); 1366 struct ureg_dst tmp = ureg_DECL_temporary(ureg); 1367 ureg_FSGE(ureg, too_large, 1368 translate_src_operand(&sx, &opcode.src[0], OF_FLOAT), 1369 ureg_imm1f(ureg, 4294967296.0f)); 1370 /* clamp negative values + NaN to zero. 1371 * (Could be done slightly more efficient in llvmpipe due to 1372 * MAX NaN behavior handling.) 1373 */ 1374 ureg_MAX(ureg, tmp, 1375 ureg_imm1f(ureg, 0.0f), 1376 translate_src_operand(&sx, &opcode.src[0], OF_FLOAT)); 1377 ureg_F2U(ureg, tmp, 1378 ureg_src(tmp)); 1379 ureg_UCMP(ureg, 1380 translate_dst_operand(&sx, &opcode.dst[0], opcode.saturate), 1381 ureg_src(too_large), 1382 ureg_imm1u(ureg, 0xffffffff), 1383 ureg_src(tmp)); 1384 ureg_release_temporary(ureg, too_large); 1385 ureg_release_temporary(ureg, tmp); 1386 } 1387 break; 1388 1389 case D3D10_SB_OPCODE_LD_MS: 1390 /* XXX: We don't support multi-sampling yet, but we need to parse 1391 * this opcode regardless, so we just ignore sample index operand 1392 * for now */ 1393 case D3D10_SB_OPCODE_LD: 1394 if (st_debug & ST_DEBUG_OLD_TEX_OPS) { 1395 unsigned resource = opcode.src[1].base.index[0].imm; 1396 assert(opcode.src[1].base.index_dim == 1); 1397 assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES); 1398 1399 if (ureg_src_is_undef(sx.samplers[resource])) { 1400 sx.samplers[resource] = 1401 ureg_DECL_sampler(ureg, resource); 1402 } 1403 1404 ureg_TXF(ureg, 1405 translate_dst_operand(&sx, &opcode.dst[0], opcode.saturate), 1406 sx.resources[resource].target, 1407 translate_src_operand(&sx, &opcode.src[0], OF_FLOAT), 1408 sx.samplers[resource]); 1409 } 1410 else { 1411 struct ureg_src srcreg[2]; 1412 srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_INT); 1413 srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_INT); 1414 1415 sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE_I, 2, &opcode, 1416 translate_dst_operand(&sx, &opcode.dst[0], 1417 opcode.saturate), 1418 srcreg); 1419 } 1420 break; 1421 1422 case D3D10_SB_OPCODE_CUSTOMDATA: 1423 if (opcode.customdata._class == 1424 D3D10_SB_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER) { 1425 sx.imms = 1426 ureg_DECL_immediate_block_uint(ureg, 1427 opcode.customdata.u.constbuf.data, 1428 opcode.customdata.u.constbuf.count); 1429 } else { 1430 assert(0); 1431 } 1432 break; 1433 1434 case D3D10_SB_OPCODE_RESINFO: 1435 if (st_debug & ST_DEBUG_OLD_TEX_OPS) { 1436 unsigned resource = opcode.src[1].base.index[0].imm; 1437 assert(opcode.src[1].base.index_dim == 1); 1438 assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES); 1439 1440 if (ureg_src_is_undef(sx.samplers[resource])) { 1441 sx.samplers[resource] = 1442 ureg_DECL_sampler(ureg, resource); 1443 } 1444 /* don't bother with swizzle, ret type etc. */ 1445 ureg_TXQ(ureg, 1446 translate_dst_operand(&sx, &opcode.dst[0], 1447 opcode.saturate), 1448 sx.resources[resource].target, 1449 translate_src_operand(&sx, &opcode.src[0], OF_UINT), 1450 sx.samplers[resource]); 1451 } 1452 else { 1453 struct ureg_dst r0 = ureg_DECL_temporary(ureg); 1454 struct ureg_src tsrc = translate_src_operand(&sx, &opcode.src[1], OF_UINT); 1455 struct ureg_dst dstreg = translate_dst_operand(&sx, &opcode.dst[0], 1456 opcode.saturate); 1457 1458 /* while specs say swizzle is ignored better safe than sorry */ 1459 tsrc.SwizzleX = TGSI_SWIZZLE_X; 1460 tsrc.SwizzleY = TGSI_SWIZZLE_Y; 1461 tsrc.SwizzleZ = TGSI_SWIZZLE_Z; 1462 tsrc.SwizzleW = TGSI_SWIZZLE_W; 1463 1464 ureg_SVIEWINFO(ureg, r0, 1465 translate_src_operand(&sx, &opcode.src[0], OF_UINT), 1466 tsrc); 1467 1468 tsrc = ureg_src(r0); 1469 tsrc.SwizzleX = opcode.src[1].swizzle[0]; 1470 tsrc.SwizzleY = opcode.src[1].swizzle[1]; 1471 tsrc.SwizzleZ = opcode.src[1].swizzle[2]; 1472 tsrc.SwizzleW = opcode.src[1].swizzle[3]; 1473 1474 if (opcode.specific.resinfo_ret_type == 1475 D3D10_SB_RESINFO_INSTRUCTION_RETURN_UINT) { 1476 ureg_MOV(ureg, dstreg, tsrc); 1477 } 1478 else if (opcode.specific.resinfo_ret_type == 1479 D3D10_SB_RESINFO_INSTRUCTION_RETURN_FLOAT) { 1480 ureg_I2F(ureg, dstreg, tsrc); 1481 } 1482 else { /* D3D10_SB_RESINFO_INSTRUCTION_RETURN_RCPFLOAT */ 1483 unsigned i; 1484 /* 1485 * Must apply rcp only to parts determined by dims, 1486 * (width/height/depth) but NOT to array size nor mip levels 1487 * hence need to figure that out here. 1488 * This is one sick modifier if you ask me! 1489 */ 1490 unsigned res_index = opcode.src[1].base.index[0].imm; 1491 unsigned target = sx.resources[res_index].target; 1492 unsigned dims = texture_dim_from_tgsi_target(target); 1493 1494 ureg_I2F(ureg, r0, ureg_src(r0)); 1495 tsrc = ureg_src(r0); 1496 for (i = 0; i < 4; i++) { 1497 unsigned dst_swizzle = opcode.src[1].swizzle[i]; 1498 struct ureg_dst dstregmasked = ureg_writemask(dstreg, 1 << i); 1499 /* 1500 * could do one mov with multiple write mask bits set 1501 * but rcp is scalar anyway. 1502 */ 1503 if (dst_swizzle < dims) { 1504 ureg_RCP(ureg, dstregmasked, ureg_scalar(tsrc, dst_swizzle)); 1505 } 1506 else { 1507 ureg_MOV(ureg, dstregmasked, ureg_scalar(tsrc, dst_swizzle)); 1508 } 1509 } 1510 } 1511 ureg_release_temporary(ureg, r0); 1512 } 1513 break; 1514 1515 case D3D10_SB_OPCODE_SAMPLE: 1516 if (st_debug & ST_DEBUG_OLD_TEX_OPS) { 1517 assert(opcode.src[1].base.index_dim == 1); 1518 assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES); 1519 1520 LOG_UNSUPPORTED(opcode.src[1].base.index[0].imm != opcode.src[2].base.index[0].imm); 1521 1522 ureg_TEX(ureg, 1523 translate_dst_operand(&sx, &opcode.dst[0], 1524 opcode.saturate), 1525 sx.resources[opcode.src[1].base.index[0].imm].target, 1526 translate_src_operand(&sx, &opcode.src[0], OF_FLOAT), 1527 translate_src_operand(&sx, &opcode.src[2], OF_FLOAT)); 1528 } 1529 else { 1530 struct ureg_src srcreg[3]; 1531 srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_FLOAT); 1532 srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_UINT); 1533 srcreg[2] = translate_src_operand(&sx, &opcode.src[2], OF_UINT); 1534 1535 sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE, 3, &opcode, 1536 translate_dst_operand(&sx, &opcode.dst[0], 1537 opcode.saturate), 1538 srcreg); 1539 } 1540 break; 1541 1542 case D3D10_SB_OPCODE_SAMPLE_C: 1543 if (st_debug & ST_DEBUG_OLD_TEX_OPS) { 1544 struct ureg_dst r0 = ureg_DECL_temporary(ureg); 1545 1546 /* XXX: Support only 2D texture targets for now. 1547 * Need to figure out how to pack the compare value 1548 * for other dimensions and if there is enough space 1549 * in a single operand for all possible cases. 1550 */ 1551 LOG_UNSUPPORTED(sx.resources[opcode.src[1].base.index[0].imm].target != 1552 TGSI_TEXTURE_2D); 1553 1554 assert(opcode.src[1].base.index_dim == 1); 1555 assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES); 1556 1557 /* Insert the compare value into .z component. 1558 */ 1559 ureg_MOV(ureg, 1560 ureg_writemask(r0, TGSI_WRITEMASK_XYW), 1561 translate_src_operand(&sx, &opcode.src[0], OF_FLOAT)); 1562 ureg_MOV(ureg, 1563 ureg_writemask(r0, TGSI_WRITEMASK_Z), 1564 translate_src_operand(&sx, &opcode.src[3], OF_FLOAT)); 1565 1566 /* XXX: Pass explicit Lod=0 in D3D10_SB_OPCODE_SAMPLE_C_LZ case. 1567 */ 1568 1569 ureg_TEX(ureg, 1570 translate_dst_operand(&sx, &opcode.dst[0], 1571 opcode.saturate), 1572 sx.resources[opcode.src[1].base.index[0].imm].target, 1573 ureg_src(r0), 1574 translate_src_operand(&sx, &opcode.src[2], OF_FLOAT)); 1575 1576 ureg_release_temporary(ureg, r0); 1577 } 1578 else { 1579 struct ureg_src srcreg[4]; 1580 srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_FLOAT); 1581 srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_UINT); 1582 srcreg[2] = translate_src_operand(&sx, &opcode.src[2], OF_UINT); 1583 srcreg[3] = translate_src_operand(&sx, &opcode.src[3], OF_FLOAT); 1584 1585 sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE_C, 4, &opcode, 1586 translate_dst_operand(&sx, &opcode.dst[0], 1587 opcode.saturate), 1588 srcreg); 1589 } 1590 break; 1591 1592 case D3D10_SB_OPCODE_SAMPLE_C_LZ: 1593 if (st_debug & ST_DEBUG_OLD_TEX_OPS) { 1594 struct ureg_dst r0 = ureg_DECL_temporary(ureg); 1595 1596 assert(opcode.src[1].base.index_dim == 1); 1597 assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES); 1598 1599 /* XXX: Support only 2D texture targets for now. 1600 * Need to figure out how to pack the compare value 1601 * for other dimensions and if there is enough space 1602 * in a single operand for all possible cases. 1603 */ 1604 LOG_UNSUPPORTED(sx.resources[opcode.src[1].base.index[0].imm].target != 1605 TGSI_TEXTURE_2D); 1606 1607 /* Insert the compare value into .z component. 1608 * Insert 0 into .w component. 1609 */ 1610 ureg_MOV(ureg, 1611 ureg_writemask(r0, TGSI_WRITEMASK_XY), 1612 translate_src_operand(&sx, &opcode.src[0], OF_FLOAT)); 1613 ureg_MOV(ureg, 1614 ureg_writemask(r0, TGSI_WRITEMASK_Z), 1615 translate_src_operand(&sx, &opcode.src[3], OF_FLOAT)); 1616 ureg_MOV(ureg, 1617 ureg_writemask(r0, TGSI_WRITEMASK_W), 1618 ureg_imm1f(ureg, 0.0f)); 1619 1620 ureg_TXL(ureg, 1621 translate_dst_operand(&sx, &opcode.dst[0], 1622 opcode.saturate), 1623 sx.resources[opcode.src[1].base.index[0].imm].target, 1624 ureg_src(r0), 1625 translate_src_operand(&sx, &opcode.src[2], OF_FLOAT)); 1626 1627 ureg_release_temporary(ureg, r0); 1628 } 1629 else { 1630 struct ureg_src srcreg[4]; 1631 srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_FLOAT); 1632 srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_UINT); 1633 srcreg[2] = translate_src_operand(&sx, &opcode.src[2], OF_UINT); 1634 srcreg[3] = translate_src_operand(&sx, &opcode.src[3], OF_FLOAT); 1635 1636 sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE_C_LZ, 4, &opcode, 1637 translate_dst_operand(&sx, &opcode.dst[0], 1638 opcode.saturate), 1639 srcreg); 1640 } 1641 break; 1642 1643 case D3D10_SB_OPCODE_SAMPLE_L: 1644 if (st_debug & ST_DEBUG_OLD_TEX_OPS) { 1645 struct ureg_dst r0 = ureg_DECL_temporary(ureg); 1646 1647 assert(opcode.src[1].base.index_dim == 1); 1648 assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES); 1649 1650 /* Insert LOD into .w component. 1651 */ 1652 ureg_MOV(ureg, 1653 ureg_writemask(r0, TGSI_WRITEMASK_XYZ), 1654 translate_src_operand(&sx, &opcode.src[0], OF_FLOAT)); 1655 ureg_MOV(ureg, 1656 ureg_writemask(r0, TGSI_WRITEMASK_W), 1657 translate_src_operand(&sx, &opcode.src[3], OF_FLOAT)); 1658 1659 ureg_TXL(ureg, 1660 translate_dst_operand(&sx, &opcode.dst[0], 1661 opcode.saturate), 1662 sx.resources[opcode.src[1].base.index[0].imm].target, 1663 ureg_src(r0), 1664 translate_src_operand(&sx, &opcode.src[2], OF_FLOAT)); 1665 1666 ureg_release_temporary(ureg, r0); 1667 } 1668 else { 1669 struct ureg_src srcreg[4]; 1670 srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_FLOAT); 1671 srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_UINT); 1672 srcreg[2] = translate_src_operand(&sx, &opcode.src[2], OF_UINT); 1673 srcreg[3] = translate_src_operand(&sx, &opcode.src[3], OF_FLOAT); 1674 1675 sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE_L, 4, &opcode, 1676 translate_dst_operand(&sx, &opcode.dst[0], 1677 opcode.saturate), 1678 srcreg); 1679 } 1680 break; 1681 1682 case D3D10_SB_OPCODE_SAMPLE_D: 1683 if (st_debug & ST_DEBUG_OLD_TEX_OPS) { 1684 assert(opcode.src[1].base.index_dim == 1); 1685 assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES); 1686 1687 ureg_TXD(ureg, 1688 translate_dst_operand(&sx, &opcode.dst[0], 1689 opcode.saturate), 1690 sx.resources[opcode.src[1].base.index[0].imm].target, 1691 translate_src_operand(&sx, &opcode.src[0], OF_FLOAT), 1692 translate_src_operand(&sx, &opcode.src[3], OF_FLOAT), 1693 translate_src_operand(&sx, &opcode.src[4], OF_FLOAT), 1694 translate_src_operand(&sx, &opcode.src[2], OF_FLOAT)); 1695 } 1696 else { 1697 struct ureg_src srcreg[5]; 1698 srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_FLOAT); 1699 srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_UINT); 1700 srcreg[2] = translate_src_operand(&sx, &opcode.src[2], OF_UINT); 1701 srcreg[3] = translate_src_operand(&sx, &opcode.src[3], OF_FLOAT); 1702 srcreg[4] = translate_src_operand(&sx, &opcode.src[4], OF_FLOAT); 1703 1704 sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE_D, 5, &opcode, 1705 translate_dst_operand(&sx, &opcode.dst[0], 1706 opcode.saturate), 1707 srcreg); 1708 } 1709 break; 1710 1711 case D3D10_SB_OPCODE_SAMPLE_B: 1712 if (st_debug & ST_DEBUG_OLD_TEX_OPS) { 1713 struct ureg_dst r0 = ureg_DECL_temporary(ureg); 1714 1715 assert(opcode.src[1].base.index_dim == 1); 1716 assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES); 1717 1718 /* Insert LOD bias into .w component. 1719 */ 1720 ureg_MOV(ureg, 1721 ureg_writemask(r0, TGSI_WRITEMASK_XYZ), 1722 translate_src_operand(&sx, &opcode.src[0], OF_FLOAT)); 1723 ureg_MOV(ureg, 1724 ureg_writemask(r0, TGSI_WRITEMASK_W), 1725 translate_src_operand(&sx, &opcode.src[3], OF_FLOAT)); 1726 1727 ureg_TXB(ureg, 1728 translate_dst_operand(&sx, &opcode.dst[0], 1729 opcode.saturate), 1730 sx.resources[opcode.src[1].base.index[0].imm].target, 1731 ureg_src(r0), 1732 translate_src_operand(&sx, &opcode.src[2], OF_FLOAT)); 1733 1734 ureg_release_temporary(ureg, r0); 1735 } 1736 else { 1737 struct ureg_src srcreg[4]; 1738 srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_FLOAT); 1739 srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_UINT); 1740 srcreg[2] = translate_src_operand(&sx, &opcode.src[2], OF_UINT); 1741 srcreg[3] = translate_src_operand(&sx, &opcode.src[3], OF_FLOAT); 1742 1743 sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE_B, 4, &opcode, 1744 translate_dst_operand(&sx, &opcode.dst[0], 1745 opcode.saturate), 1746 srcreg); 1747 } 1748 break; 1749 1750 case D3D10_SB_OPCODE_SINCOS: { 1751 struct ureg_dst src0 = ureg_DECL_temporary(ureg); 1752 ureg_MOV(ureg, src0, translate_src_operand(&sx, &opcode.src[0], OF_FLOAT)); 1753 if (opcode.dst[0].base.type != D3D10_SB_OPERAND_TYPE_NULL) { 1754 struct ureg_dst dst = translate_dst_operand(&sx, &opcode.dst[0], 1755 opcode.saturate); 1756 struct ureg_src src = ureg_src(src0); 1757 ureg_SIN(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), 1758 ureg_scalar(src, TGSI_SWIZZLE_X)); 1759 ureg_SIN(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), 1760 ureg_scalar(src, TGSI_SWIZZLE_Y)); 1761 ureg_SIN(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), 1762 ureg_scalar(src, TGSI_SWIZZLE_Z)); 1763 ureg_SIN(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), 1764 ureg_scalar(src, TGSI_SWIZZLE_W)); 1765 } 1766 if (opcode.dst[1].base.type != D3D10_SB_OPERAND_TYPE_NULL) { 1767 struct ureg_dst dst = translate_dst_operand(&sx, &opcode.dst[1], 1768 opcode.saturate); 1769 struct ureg_src src = ureg_src(src0); 1770 ureg_COS(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), 1771 ureg_scalar(src, TGSI_SWIZZLE_X)); 1772 ureg_COS(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), 1773 ureg_scalar(src, TGSI_SWIZZLE_Y)); 1774 ureg_COS(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), 1775 ureg_scalar(src, TGSI_SWIZZLE_Z)); 1776 ureg_COS(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), 1777 ureg_scalar(src, TGSI_SWIZZLE_W)); 1778 } 1779 ureg_release_temporary(ureg, src0); 1780 } 1781 break; 1782 1783 case D3D10_SB_OPCODE_UDIV: { 1784 struct ureg_dst src0 = ureg_DECL_temporary(ureg); 1785 struct ureg_dst src1 = ureg_DECL_temporary(ureg); 1786 ureg_MOV(ureg, src0, translate_src_operand(&sx, &opcode.src[0], OF_UINT)); 1787 ureg_MOV(ureg, src1, translate_src_operand(&sx, &opcode.src[1], OF_UINT)); 1788 if (opcode.dst[0].base.type != D3D10_SB_OPERAND_TYPE_NULL) { 1789 ureg_UDIV(ureg, 1790 translate_dst_operand(&sx, &opcode.dst[0], 1791 opcode.saturate), 1792 ureg_src(src0), ureg_src(src1)); 1793 } 1794 if (opcode.dst[1].base.type != D3D10_SB_OPERAND_TYPE_NULL) { 1795 ureg_UMOD(ureg, 1796 translate_dst_operand(&sx, &opcode.dst[1], 1797 opcode.saturate), 1798 ureg_src(src0), ureg_src(src1)); 1799 } 1800 ureg_release_temporary(ureg, src0); 1801 ureg_release_temporary(ureg, src1); 1802 } 1803 break; 1804 case D3D10_SB_OPCODE_UMUL: { 1805 if (opcode.dst[0].base.type != D3D10_SB_OPERAND_TYPE_NULL) { 1806 ureg_UMUL_HI(ureg, 1807 translate_dst_operand(&sx, &opcode.dst[0], 1808 opcode.saturate), 1809 translate_src_operand(&sx, &opcode.src[0], OF_UINT), 1810 translate_src_operand(&sx, &opcode.src[1], OF_UINT)); 1811 } 1812 if (opcode.dst[1].base.type != D3D10_SB_OPERAND_TYPE_NULL) { 1813 ureg_UMUL(ureg, 1814 translate_dst_operand(&sx, &opcode.dst[1], 1815 opcode.saturate), 1816 translate_src_operand(&sx, &opcode.src[0], OF_UINT), 1817 translate_src_operand(&sx, &opcode.src[1], OF_UINT)); 1818 } 1819 } 1820 break; 1821 1822 case D3D10_SB_OPCODE_DCL_RESOURCE: 1823 { 1824 unsigned target; 1825 unsigned res_index = opcode.dst[0].base.index[0].imm; 1826 assert(opcode.dst[0].base.index_dim == 1); 1827 assert(res_index < SHADER_MAX_RESOURCES); 1828 1829 target = translate_resource_dimension(opcode.specific.dcl_resource_dimension); 1830 sx.resources[res_index].target = target; 1831 if (!(st_debug & ST_DEBUG_OLD_TEX_OPS)) { 1832 sx.sv[res_index] = 1833 ureg_DECL_sampler_view(ureg, res_index, target, 1834 trans_dcl_ret_type(opcode.dcl_resource_ret_type[0]), 1835 trans_dcl_ret_type(opcode.dcl_resource_ret_type[1]), 1836 trans_dcl_ret_type(opcode.dcl_resource_ret_type[2]), 1837 trans_dcl_ret_type(opcode.dcl_resource_ret_type[3])); 1838 } 1839 break; 1840 } 1841 1842 case D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER: { 1843 unsigned num_constants = opcode.src[0].base.index[1].imm; 1844 1845 assert(opcode.src[0].base.index[0].imm < PIPE_MAX_CONSTANT_BUFFERS); 1846 1847 if (num_constants == 0) { 1848 num_constants = SHADER_MAX_CONSTS; 1849 } else { 1850 assert(num_constants <= SHADER_MAX_CONSTS); 1851 } 1852 1853 ureg_DECL_constant2D(ureg, 1854 0, 1855 num_constants - 1, 1856 opcode.src[0].base.index[0].imm); 1857 break; 1858 } 1859 1860 case D3D10_SB_OPCODE_DCL_SAMPLER: 1861 assert(opcode.dst[0].base.index_dim == 1); 1862 assert(opcode.dst[0].base.index[0].imm < SHADER_MAX_SAMPLERS); 1863 1864 sx.samplers[opcode.dst[0].base.index[0].imm] = 1865 ureg_DECL_sampler(ureg, 1866 opcode.dst[0].base.index[0].imm); 1867 break; 1868 1869 case D3D10_SB_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY: 1870 assert(parser.header.type == D3D10_SB_GEOMETRY_SHADER); 1871 1872 switch (opcode.specific.dcl_gs_output_primitive_topology) { 1873 case D3D10_SB_PRIMITIVE_TOPOLOGY_POINTLIST: 1874 ureg_property(sx.ureg, 1875 TGSI_PROPERTY_GS_OUTPUT_PRIM, 1876 PIPE_PRIM_POINTS); 1877 break; 1878 1879 case D3D10_SB_PRIMITIVE_TOPOLOGY_LINESTRIP: 1880 ureg_property(sx.ureg, 1881 TGSI_PROPERTY_GS_OUTPUT_PRIM, 1882 PIPE_PRIM_LINE_STRIP); 1883 break; 1884 1885 case D3D10_SB_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP: 1886 ureg_property(sx.ureg, 1887 TGSI_PROPERTY_GS_OUTPUT_PRIM, 1888 PIPE_PRIM_TRIANGLE_STRIP); 1889 break; 1890 1891 default: 1892 assert(0); 1893 } 1894 break; 1895 1896 case D3D10_SB_OPCODE_DCL_GS_INPUT_PRIMITIVE: 1897 assert(parser.header.type == D3D10_SB_GEOMETRY_SHADER); 1898 1899 /* Figure out the second dimension of GS inputs. 1900 */ 1901 switch (opcode.specific.dcl_gs_input_primitive) { 1902 case D3D10_SB_PRIMITIVE_POINT: 1903 declare_vertices_in(&sx, 1); 1904 ureg_property(sx.ureg, 1905 TGSI_PROPERTY_GS_INPUT_PRIM, 1906 PIPE_PRIM_POINTS); 1907 break; 1908 1909 case D3D10_SB_PRIMITIVE_LINE: 1910 declare_vertices_in(&sx, 2); 1911 ureg_property(sx.ureg, 1912 TGSI_PROPERTY_GS_INPUT_PRIM, 1913 PIPE_PRIM_LINES); 1914 break; 1915 1916 case D3D10_SB_PRIMITIVE_TRIANGLE: 1917 declare_vertices_in(&sx, 3); 1918 ureg_property(sx.ureg, 1919 TGSI_PROPERTY_GS_INPUT_PRIM, 1920 PIPE_PRIM_TRIANGLES); 1921 break; 1922 1923 case D3D10_SB_PRIMITIVE_LINE_ADJ: 1924 declare_vertices_in(&sx, 4); 1925 ureg_property(sx.ureg, 1926 TGSI_PROPERTY_GS_INPUT_PRIM, 1927 PIPE_PRIM_LINES_ADJACENCY); 1928 break; 1929 1930 case D3D10_SB_PRIMITIVE_TRIANGLE_ADJ: 1931 declare_vertices_in(&sx, 6); 1932 ureg_property(sx.ureg, 1933 TGSI_PROPERTY_GS_INPUT_PRIM, 1934 PIPE_PRIM_TRIANGLES_ADJACENCY); 1935 break; 1936 1937 default: 1938 assert(0); 1939 } 1940 break; 1941 1942 case D3D10_SB_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT: 1943 assert(parser.header.type == D3D10_SB_GEOMETRY_SHADER); 1944 1945 ureg_property(sx.ureg, 1946 TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES, 1947 opcode.specific.dcl_max_output_vertex_count); 1948 break; 1949 1950 case D3D10_SB_OPCODE_DCL_INPUT: 1951 if (parser.header.type == D3D10_SB_VERTEX_SHADER) { 1952 dcl_vs_input(&sx, ureg, &opcode.dst[0]); 1953 } else { 1954 assert(parser.header.type == D3D10_SB_GEOMETRY_SHADER); 1955 dcl_gs_input(&sx, ureg, &opcode.dst[0]); 1956 } 1957 break; 1958 1959 case D3D10_SB_OPCODE_DCL_INPUT_SGV: 1960 assert(parser.header.type == D3D10_SB_VERTEX_SHADER); 1961 dcl_sgv_input(&sx, ureg, &opcode.dst[0], opcode.dcl_siv_name); 1962 break; 1963 1964 case D3D10_SB_OPCODE_DCL_INPUT_SIV: 1965 assert(parser.header.type == D3D10_SB_GEOMETRY_SHADER); 1966 dcl_siv_input(&sx, ureg, &opcode.dst[0], opcode.dcl_siv_name); 1967 break; 1968 1969 case D3D10_SB_OPCODE_DCL_INPUT_PS: 1970 assert(parser.header.type == D3D10_SB_PIXEL_SHADER); 1971 dcl_ps_input(&sx, ureg, &opcode.dst[0], 1972 opcode.specific.dcl_in_ps_interp); 1973 break; 1974 1975 case D3D10_SB_OPCODE_DCL_INPUT_PS_SGV: 1976 assert(parser.header.type == D3D10_SB_PIXEL_SHADER); 1977 dcl_ps_sgv_input(&sx, ureg, &opcode.dst[0], 1978 opcode.dcl_siv_name); 1979 break; 1980 1981 case D3D10_SB_OPCODE_DCL_INPUT_PS_SIV: 1982 assert(parser.header.type == D3D10_SB_PIXEL_SHADER); 1983 dcl_ps_siv_input(&sx, ureg, &opcode.dst[0], 1984 opcode.dcl_siv_name, 1985 opcode.specific.dcl_in_ps_interp); 1986 break; 1987 1988 case D3D10_SB_OPCODE_DCL_OUTPUT: 1989 if (parser.header.type == D3D10_SB_PIXEL_SHADER) { 1990 /* Pixel shader outputs. */ 1991 if (opcode.dst[0].base.type == D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH) { 1992 /* Depth output. */ 1993 assert(opcode.dst[0].base.index_dim == 0); 1994 1995 sx.output_depth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0, TGSI_WRITEMASK_Z, 0, 1); 1996 sx.output_depth = ureg_writemask(sx.output_depth, TGSI_WRITEMASK_Z); 1997 } else { 1998 /* Color outputs. */ 1999 assert(opcode.dst[0].base.index_dim == 1); 2000 assert(opcode.dst[0].base.index[0].imm < SHADER_MAX_OUTPUTS); 2001 2002 dcl_base_output(&sx, ureg, 2003 ureg_DECL_output(ureg, 2004 TGSI_SEMANTIC_COLOR, 2005 opcode.dst[0].base.index[0].imm), 2006 &opcode.dst[0]); 2007 } 2008 } else { 2009 assert(opcode.dst[0].base.index_dim == 1); 2010 assert(opcode.dst[0].base.index[0].imm < SHADER_MAX_OUTPUTS); 2011 2012 if (output_mapping) { 2013 unsigned nr_outputs = ureg_get_nr_outputs(ureg); 2014 output_mapping[nr_outputs] 2015 = opcode.dst[0].base.index[0].imm; 2016 } 2017 dcl_base_output(&sx, ureg, 2018 ureg_DECL_output(ureg, 2019 TGSI_SEMANTIC_GENERIC, 2020 opcode.dst[0].base.index[0].imm), 2021 &opcode.dst[0]); 2022 } 2023 break; 2024 2025 case D3D10_SB_OPCODE_DCL_OUTPUT_SIV: 2026 assert(opcode.dst[0].base.index_dim == 1); 2027 assert(opcode.dst[0].base.index[0].imm < SHADER_MAX_OUTPUTS); 2028 2029 if (output_mapping) { 2030 unsigned nr_outputs = ureg_get_nr_outputs(ureg); 2031 output_mapping[nr_outputs] 2032 = opcode.dst[0].base.index[0].imm; 2033 } 2034 if (opcode.dcl_siv_name == D3D10_SB_NAME_CLIP_DISTANCE || 2035 opcode.dcl_siv_name == D3D10_SB_NAME_CULL_DISTANCE) { 2036 /* 2037 * FIXME: this is quite broken. gallium no longer has separate 2038 * clip/cull dists, using (max 2) combined clipdist/culldist regs 2039 * instead. Unlike d3d10 though, which is clip and which cull is 2040 * simply determined by by number of clip/cull dists (that is, 2041 * all clip dists must come first). 2042 */ 2043 unsigned numcliporcull = sx.num_clip_distances_declared + 2044 sx.num_cull_distances_declared; 2045 sx.clip_distance_mapping[numcliporcull].d3d = 2046 opcode.dst[0].base.index[0].imm; 2047 sx.clip_distance_mapping[numcliporcull].tgsi = numcliporcull; 2048 if (opcode.dcl_siv_name == D3D10_SB_NAME_CLIP_DISTANCE) { 2049 ++sx.num_clip_distances_declared; 2050 /* re-emit should be safe... */ 2051 ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED, 2052 sx.num_clip_distances_declared); 2053 } else { 2054 ++sx.num_cull_distances_declared; 2055 ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED, 2056 sx.num_cull_distances_declared); 2057 } 2058 } else if (0 && opcode.dcl_siv_name == D3D10_SB_NAME_CULL_DISTANCE) { 2059 sx.cull_distance_mapping[sx.num_cull_distances_declared].d3d = 2060 opcode.dst[0].base.index[0].imm; 2061 sx.cull_distance_mapping[sx.num_cull_distances_declared].tgsi = 2062 sx.num_cull_distances_declared; 2063 ++sx.num_cull_distances_declared; 2064 ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED, 2065 sx.num_cull_distances_declared); 2066 } 2067 2068 dcl_base_output(&sx, ureg, 2069 ureg_DECL_output_masked( 2070 ureg, 2071 translate_system_name(opcode.dcl_siv_name), 2072 translate_semantic_index(&sx, opcode.dcl_siv_name, 2073 &opcode.dst[0]), 2074 opcode.dst[0].mask >> D3D10_SB_OPERAND_4_COMPONENT_MASK_SHIFT, 2075 0, 1), 2076 &opcode.dst[0]); 2077 break; 2078 2079 case D3D10_SB_OPCODE_DCL_OUTPUT_SGV: 2080 assert(opcode.dst[0].base.index_dim == 1); 2081 assert(opcode.dst[0].base.index[0].imm < SHADER_MAX_OUTPUTS); 2082 2083 if (output_mapping) { 2084 unsigned nr_outputs = ureg_get_nr_outputs(ureg); 2085 output_mapping[nr_outputs] 2086 = opcode.dst[0].base.index[0].imm; 2087 } 2088 dcl_base_output(&sx, ureg, 2089 ureg_DECL_output(ureg, 2090 translate_system_name(opcode.dcl_siv_name), 2091 0), 2092 &opcode.dst[0]); 2093 break; 2094 2095 case D3D10_SB_OPCODE_DCL_TEMPS: 2096 { 2097 uint i; 2098 2099 assert(opcode.specific.dcl_num_temps + sx.declared_temps <= 2100 SHADER_MAX_TEMPS); 2101 2102 sx.temp_offset = sx.declared_temps; 2103 2104 for (i = 0; i < opcode.specific.dcl_num_temps; i++) { 2105 sx.temps[sx.declared_temps + i] = ureg_DECL_temporary(ureg); 2106 } 2107 sx.declared_temps += opcode.specific.dcl_num_temps; 2108 } 2109 break; 2110 2111 case D3D10_SB_OPCODE_DCL_INDEXABLE_TEMP: 2112 { 2113 uint i; 2114 2115 /* XXX: Add true indexable temps to gallium. 2116 */ 2117 2118 assert(opcode.specific.dcl_indexable_temp.index < 2119 SHADER_MAX_INDEXABLE_TEMPS); 2120 assert(opcode.specific.dcl_indexable_temp.count + sx.declared_temps <= 2121 SHADER_MAX_TEMPS); 2122 2123 sx.indexable_temp_offsets[opcode.specific.dcl_indexable_temp.index] = 2124 sx.declared_temps; 2125 2126 for (i = 0; i < opcode.specific.dcl_indexable_temp.count; i++) { 2127 sx.temps[sx.declared_temps + i] = ureg_DECL_temporary(ureg); 2128 } 2129 sx.declared_temps += opcode.specific.dcl_indexable_temp.count; 2130 } 2131 break; 2132 case D3D10_SB_OPCODE_IF: { 2133 unsigned label = 0; 2134 if (opcode.specific.test_boolean == D3D10_SB_INSTRUCTION_TEST_ZERO) { 2135 struct ureg_src src = 2136 translate_src_operand(&sx, &opcode.src[0], OF_INT); 2137 struct ureg_dst src_nz = ureg_DECL_temporary(ureg); 2138 ureg_USEQ(ureg, src_nz, src, ureg_imm1u(ureg, 0)); 2139 ureg_UIF(ureg, ureg_src(src_nz), &label); 2140 ureg_release_temporary(ureg, src_nz);; 2141 } else { 2142 ureg_UIF(ureg, translate_src_operand(&sx, &opcode.src[0], OF_INT), &label); 2143 } 2144 } 2145 break; 2146 case D3D10_SB_OPCODE_RETC: 2147 case D3D10_SB_OPCODE_CONTINUEC: 2148 case D3D10_SB_OPCODE_CALLC: 2149 case D3D10_SB_OPCODE_DISCARD: 2150 case D3D10_SB_OPCODE_BREAKC: 2151 { 2152 unsigned label = 0; 2153 assert(operand_is_scalar(&opcode.src[0])); 2154 if (opcode.specific.test_boolean == D3D10_SB_INSTRUCTION_TEST_ZERO) { 2155 struct ureg_src src = 2156 translate_src_operand(&sx, &opcode.src[0], OF_INT); 2157 struct ureg_dst src_nz = ureg_DECL_temporary(ureg); 2158 ureg_USEQ(ureg, src_nz, src, ureg_imm1u(ureg, 0)); 2159 ureg_UIF(ureg, ureg_src(src_nz), &label); 2160 ureg_release_temporary(ureg, src_nz); 2161 } 2162 else { 2163 ureg_UIF(ureg, translate_src_operand(&sx, &opcode.src[0], OF_INT), &label); 2164 } 2165 switch (opcode.type) { 2166 case D3D10_SB_OPCODE_RETC: 2167 ureg_RET(ureg); 2168 break; 2169 case D3D10_SB_OPCODE_CONTINUEC: 2170 ureg_CONT(ureg); 2171 break; 2172 case D3D10_SB_OPCODE_CALLC: { 2173 unsigned label = opcode.src[1].base.index[0].imm; 2174 unsigned tgsi_token_label = 0; 2175 ureg_CAL(ureg, &tgsi_token_label); 2176 Shader_add_call(&sx, label, tgsi_token_label); 2177 } 2178 break; 2179 case D3D10_SB_OPCODE_DISCARD: 2180 ureg_KILL(ureg); 2181 break; 2182 case D3D10_SB_OPCODE_BREAKC: 2183 ureg_BRK(ureg); 2184 break; 2185 default: 2186 assert(0); 2187 break; 2188 } 2189 ureg_ENDIF(ureg); 2190 } 2191 break; 2192 case D3D10_SB_OPCODE_LABEL: { 2193 unsigned label = opcode.src[0].base.index[0].imm; 2194 unsigned tgsi_inst_no = 0; 2195 if (inside_sub) { 2196 ureg_ENDSUB(ureg); 2197 } 2198 tgsi_inst_no = ureg_get_instruction_number(ureg); 2199 ureg_BGNSUB(ureg); 2200 inside_sub = TRUE; 2201 Shader_add_label(&sx, label, tgsi_inst_no); 2202 } 2203 break; 2204 case D3D10_SB_OPCODE_CALL: { 2205 unsigned label = opcode.src[0].base.index[0].imm; 2206 unsigned tgsi_token_label = 0; 2207 ureg_CAL(ureg, &tgsi_token_label); 2208 Shader_add_call(&sx, label, tgsi_token_label); 2209 } 2210 break; 2211 case D3D10_SB_OPCODE_EMIT: 2212 ureg_EMIT(ureg, ureg_imm1u(ureg, 0)); 2213 break; 2214 case D3D10_SB_OPCODE_CUT: 2215 ureg_ENDPRIM(ureg, ureg_imm1u(ureg, 0)); 2216 break; 2217 case D3D10_SB_OPCODE_EMITTHENCUT: 2218 ureg_EMIT(ureg, ureg_imm1u(ureg, 0)); 2219 ureg_ENDPRIM(ureg, ureg_imm1u(ureg, 0)); 2220 break; 2221 case D3D10_SB_OPCODE_DCL_INDEX_RANGE: 2222 case D3D10_SB_OPCODE_DCL_GLOBAL_FLAGS: 2223 /* Ignore */ 2224 break; 2225 default: 2226 { 2227 uint i; 2228 struct ureg_dst dst[SHADER_MAX_DST_OPERANDS]; 2229 struct ureg_src src[SHADER_MAX_SRC_OPERANDS]; 2230 2231 assert(ox->tgsi_opcode != TGSI_EXPAND); 2232 2233 if (ox->tgsi_opcode == TGSI_LOG_UNSUPPORTED) { 2234 if (!shader_dumped) { 2235 dx10_shader_dump_tokens(code); 2236 shader_dumped = TRUE; 2237 } 2238 debug_printf("%s: unsupported opcode %i\n", 2239 __FUNCTION__, ox->type); 2240 assert(ox->tgsi_opcode != TGSI_LOG_UNSUPPORTED); 2241 } 2242 2243 /* Destination operands. */ 2244 for (i = 0; i < opcode.num_dst; i++) { 2245 dst[i] = translate_dst_operand(&sx, &opcode.dst[i], 2246 opcode.saturate); 2247 } 2248 2249 /* Source operands. */ 2250 for (i = 0; i < opcode.num_src; i++) { 2251 src[i] = translate_src_operand(&sx, &opcode.src[i], ox->format); 2252 } 2253 2254 /* Try to re-route output depth to Z channel. */ 2255 if (opcode.dst[0].base.type == D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH) { 2256 LOG_UNSUPPORTED(opcode.type != D3D10_SB_OPCODE_MOV); 2257 dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_Z); 2258 src[0] = ureg_scalar(src[0], TGSI_SWIZZLE_X); 2259 } 2260 2261 ureg_insn(ureg, 2262 ox->tgsi_opcode, 2263 dst, 2264 opcode.num_dst, 2265 src, 2266 opcode.num_src, 0); 2267 } 2268 } 2269 2270 Shader_opcode_free(&opcode); 2271 } 2272 2273 if (inside_sub) { 2274 ureg_ENDSUB(ureg); 2275 } 2276 2277 ureg_END(ureg); 2278 2279 for (i = 0; i < sx.num_calls; ++i) { 2280 for (j = 0; j < sx.num_labels; ++j) { 2281 if (sx.calls[i].d3d_label == sx.labels[j].d3d_label) { 2282 ureg_fixup_label(sx.ureg, 2283 sx.calls[i].tgsi_label_token, 2284 sx.labels[j].tgsi_insn_no); 2285 break; 2286 } 2287 } 2288 ASSERT(j < sx.num_labels); 2289 } 2290 FREE(sx.labels); 2291 FREE(sx.calls); 2292 2293 tokens = ureg_get_tokens(ureg, &nr_tokens); 2294 assert(tokens); 2295 ureg_destroy(ureg); 2296 2297 if (st_debug & ST_DEBUG_TGSI) { 2298 tgsi_dump(tokens, 0); 2299 } 2300 2301 return tokens; 2302} 2303