1/* 2 * Copyright © Microsoft Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "nir_to_dxil.h" 25 26#include "dxil_container.h" 27#include "dxil_dump.h" 28#include "dxil_enums.h" 29#include "dxil_function.h" 30#include "dxil_module.h" 31#include "dxil_nir.h" 32#include "dxil_signature.h" 33 34#include "nir/nir_builder.h" 35#include "util/u_debug.h" 36#include "util/u_dynarray.h" 37#include "util/u_math.h" 38 39#include "git_sha1.h" 40 41#include "vulkan/vulkan_core.h" 42 43#include <stdint.h> 44 45int debug_dxil = 0; 46 47static const struct debug_named_value 48dxil_debug_options[] = { 49 { "verbose", DXIL_DEBUG_VERBOSE, NULL }, 50 { "dump_blob", DXIL_DEBUG_DUMP_BLOB , "Write shader blobs" }, 51 { "trace", DXIL_DEBUG_TRACE , "Trace instruction conversion" }, 52 { "dump_module", DXIL_DEBUG_DUMP_MODULE, "dump module tree to stderr"}, 53 DEBUG_NAMED_VALUE_END 54}; 55 56DEBUG_GET_ONCE_FLAGS_OPTION(debug_dxil, "DXIL_DEBUG", dxil_debug_options, 0) 57 58#define NIR_INSTR_UNSUPPORTED(instr) \ 59 if (debug_dxil & DXIL_DEBUG_VERBOSE) \ 60 do { \ 61 fprintf(stderr, "Unsupported instruction:"); \ 62 nir_print_instr(instr, stderr); \ 63 fprintf(stderr, "\n"); \ 64 } while (0) 65 66#define TRACE_CONVERSION(instr) \ 67 if (debug_dxil & DXIL_DEBUG_TRACE) \ 68 do { \ 69 fprintf(stderr, "Convert '"); \ 70 nir_print_instr(instr, stderr); \ 71 fprintf(stderr, "'\n"); \ 72 } while (0) 73 74static const nir_shader_compiler_options 75nir_options = { 76 .lower_ineg = true, 77 .lower_fneg = true, 78 .lower_ffma16 = true, 79 .lower_ffma32 = true, 80 .lower_isign = true, 81 .lower_fsign = true, 82 .lower_iabs = true, 83 .lower_fmod = true, 84 .lower_fpow = true, 85 .lower_scmp = true, 86 .lower_ldexp = true, 87 .lower_flrp16 = true, 88 .lower_flrp32 = true, 89 .lower_flrp64 = true, 90 .lower_bitfield_extract = true, 91 .lower_find_msb_to_reverse = true, 92 .lower_extract_word = true, 93 .lower_extract_byte = true, 94 .lower_insert_word = true, 95 .lower_insert_byte = true, 96 .lower_all_io_to_elements = true, 97 .lower_all_io_to_temps = true, 98 .lower_hadd = true, 99 .lower_uadd_sat = true, 100 .lower_usub_sat = true, 101 .lower_iadd_sat = true, 102 .lower_uadd_carry = true, 103 .lower_mul_high = true, 104 .lower_rotate = true, 105 .lower_pack_64_2x32_split = true, 106 .lower_pack_32_2x16_split = true, 107 .lower_unpack_64_2x32_split = true, 108 .lower_unpack_32_2x16_split = true, 109 .lower_unpack_half_2x16 = true, 110 .lower_unpack_snorm_2x16 = true, 111 .lower_unpack_snorm_4x8 = true, 112 .lower_unpack_unorm_2x16 = true, 113 .lower_unpack_unorm_4x8 = true, 114 .lower_interpolate_at = true, 115 .has_fsub = true, 116 .has_isub = true, 117 .use_scoped_barrier = true, 118 .vertex_id_zero_based = true, 119 .lower_base_vertex = true, 120 .lower_helper_invocation = true, 121 .has_cs_global_id = true, 122 .has_txs = true, 123 .lower_mul_2x32_64 = true, 124 .lower_doubles_options = 125 nir_lower_drcp | 126 nir_lower_dsqrt | 127 nir_lower_drsq | 128 nir_lower_dfract | 129 nir_lower_dtrunc | 130 nir_lower_dfloor | 131 nir_lower_dceil | 132 nir_lower_dround_even, 133 .max_unroll_iterations = 32, /* arbitrary */ 134 .force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out | nir_var_function_temp), 135}; 136 137const nir_shader_compiler_options* 138dxil_get_nir_compiler_options(void) 139{ 140 return &nir_options; 141} 142 143static bool 144emit_llvm_ident(struct dxil_module *m) 145{ 146 const struct dxil_mdnode *compiler = dxil_get_metadata_string(m, "Mesa version " PACKAGE_VERSION MESA_GIT_SHA1); 147 if (!compiler) 148 return false; 149 150 const struct dxil_mdnode *llvm_ident = dxil_get_metadata_node(m, &compiler, 1); 151 return llvm_ident && 152 dxil_add_metadata_named_node(m, "llvm.ident", &llvm_ident, 1); 153} 154 155static bool 156emit_named_version(struct dxil_module *m, const char *name, 157 int major, int minor) 158{ 159 const struct dxil_mdnode *major_node = dxil_get_metadata_int32(m, major); 160 const struct dxil_mdnode *minor_node = dxil_get_metadata_int32(m, minor); 161 const struct dxil_mdnode *version_nodes[] = { major_node, minor_node }; 162 const struct dxil_mdnode *version = dxil_get_metadata_node(m, version_nodes, 163 ARRAY_SIZE(version_nodes)); 164 return dxil_add_metadata_named_node(m, name, &version, 1); 165} 166 167static const char * 168get_shader_kind_str(enum dxil_shader_kind kind) 169{ 170 switch (kind) { 171 case DXIL_PIXEL_SHADER: 172 return "ps"; 173 case DXIL_VERTEX_SHADER: 174 return "vs"; 175 case DXIL_GEOMETRY_SHADER: 176 return "gs"; 177 case DXIL_HULL_SHADER: 178 return "hs"; 179 case DXIL_DOMAIN_SHADER: 180 return "ds"; 181 case DXIL_COMPUTE_SHADER: 182 return "cs"; 183 default: 184 unreachable("invalid shader kind"); 185 } 186} 187 188static bool 189emit_dx_shader_model(struct dxil_module *m) 190{ 191 const struct dxil_mdnode *type_node = dxil_get_metadata_string(m, get_shader_kind_str(m->shader_kind)); 192 const struct dxil_mdnode *major_node = dxil_get_metadata_int32(m, m->major_version); 193 const struct dxil_mdnode *minor_node = dxil_get_metadata_int32(m, m->minor_version); 194 const struct dxil_mdnode *shader_model[] = { type_node, major_node, 195 minor_node }; 196 const struct dxil_mdnode *dx_shader_model = dxil_get_metadata_node(m, shader_model, ARRAY_SIZE(shader_model)); 197 198 return dxil_add_metadata_named_node(m, "dx.shaderModel", 199 &dx_shader_model, 1); 200} 201 202enum { 203 DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG = 0, 204 DXIL_STRUCTURED_BUFFER_ELEMENT_STRIDE_TAG = 1 205}; 206 207enum dxil_intr { 208 DXIL_INTR_LOAD_INPUT = 4, 209 DXIL_INTR_STORE_OUTPUT = 5, 210 DXIL_INTR_FABS = 6, 211 DXIL_INTR_SATURATE = 7, 212 213 DXIL_INTR_ISFINITE = 10, 214 DXIL_INTR_ISNORMAL = 11, 215 216 DXIL_INTR_FCOS = 12, 217 DXIL_INTR_FSIN = 13, 218 219 DXIL_INTR_FEXP2 = 21, 220 DXIL_INTR_FRC = 22, 221 DXIL_INTR_FLOG2 = 23, 222 223 DXIL_INTR_SQRT = 24, 224 DXIL_INTR_RSQRT = 25, 225 DXIL_INTR_ROUND_NE = 26, 226 DXIL_INTR_ROUND_NI = 27, 227 DXIL_INTR_ROUND_PI = 28, 228 DXIL_INTR_ROUND_Z = 29, 229 230 DXIL_INTR_BFREV = 30, 231 DXIL_INTR_COUNTBITS = 31, 232 DXIL_INTR_FIRSTBIT_LO = 32, 233 DXIL_INTR_FIRSTBIT_HI = 33, 234 DXIL_INTR_FIRSTBIT_SHI = 34, 235 236 DXIL_INTR_FMAX = 35, 237 DXIL_INTR_FMIN = 36, 238 DXIL_INTR_IMAX = 37, 239 DXIL_INTR_IMIN = 38, 240 DXIL_INTR_UMAX = 39, 241 DXIL_INTR_UMIN = 40, 242 243 DXIL_INTR_FMA = 47, 244 245 DXIL_INTR_IBFE = 51, 246 DXIL_INTR_UBFE = 52, 247 DXIL_INTR_BFI = 53, 248 249 DXIL_INTR_CREATE_HANDLE = 57, 250 DXIL_INTR_CBUFFER_LOAD_LEGACY = 59, 251 252 DXIL_INTR_SAMPLE = 60, 253 DXIL_INTR_SAMPLE_BIAS = 61, 254 DXIL_INTR_SAMPLE_LEVEL = 62, 255 DXIL_INTR_SAMPLE_GRAD = 63, 256 DXIL_INTR_SAMPLE_CMP = 64, 257 DXIL_INTR_SAMPLE_CMP_LVL_ZERO = 65, 258 259 DXIL_INTR_TEXTURE_LOAD = 66, 260 DXIL_INTR_TEXTURE_STORE = 67, 261 262 DXIL_INTR_BUFFER_LOAD = 68, 263 DXIL_INTR_BUFFER_STORE = 69, 264 265 DXIL_INTR_TEXTURE_SIZE = 72, 266 DXIL_INTR_TEXTURE_GATHER = 73, 267 DXIL_INTR_TEXTURE_GATHER_CMP = 74, 268 269 DXIL_INTR_TEXTURE2DMS_GET_SAMPLE_POSITION = 75, 270 DXIL_INTR_RENDER_TARGET_GET_SAMPLE_POSITION = 76, 271 DXIL_INTR_RENDER_TARGET_GET_SAMPLE_COUNT = 77, 272 273 DXIL_INTR_ATOMIC_BINOP = 78, 274 DXIL_INTR_ATOMIC_CMPXCHG = 79, 275 DXIL_INTR_BARRIER = 80, 276 DXIL_INTR_TEXTURE_LOD = 81, 277 278 DXIL_INTR_DISCARD = 82, 279 DXIL_INTR_DDX_COARSE = 83, 280 DXIL_INTR_DDY_COARSE = 84, 281 DXIL_INTR_DDX_FINE = 85, 282 DXIL_INTR_DDY_FINE = 86, 283 284 DXIL_INTR_EVAL_SNAPPED = 87, 285 DXIL_INTR_EVAL_SAMPLE_INDEX = 88, 286 DXIL_INTR_EVAL_CENTROID = 89, 287 288 DXIL_INTR_SAMPLE_INDEX = 90, 289 DXIL_INTR_COVERAGE = 91, 290 291 DXIL_INTR_THREAD_ID = 93, 292 DXIL_INTR_GROUP_ID = 94, 293 DXIL_INTR_THREAD_ID_IN_GROUP = 95, 294 DXIL_INTR_FLATTENED_THREAD_ID_IN_GROUP = 96, 295 296 DXIL_INTR_EMIT_STREAM = 97, 297 DXIL_INTR_CUT_STREAM = 98, 298 299 DXIL_INTR_GS_INSTANCE_ID = 100, 300 301 DXIL_INTR_MAKE_DOUBLE = 101, 302 DXIL_INTR_SPLIT_DOUBLE = 102, 303 304 DXIL_INTR_LOAD_OUTPUT_CONTROL_POINT = 103, 305 DXIL_INTR_LOAD_PATCH_CONSTANT = 104, 306 DXIL_INTR_DOMAIN_LOCATION = 105, 307 DXIL_INTR_STORE_PATCH_CONSTANT = 106, 308 DXIL_INTR_OUTPUT_CONTROL_POINT_ID = 107, 309 DXIL_INTR_PRIMITIVE_ID = 108, 310 311 DXIL_INTR_LEGACY_F32TOF16 = 130, 312 DXIL_INTR_LEGACY_F16TOF32 = 131, 313 314 DXIL_INTR_ATTRIBUTE_AT_VERTEX = 137, 315}; 316 317enum dxil_atomic_op { 318 DXIL_ATOMIC_ADD = 0, 319 DXIL_ATOMIC_AND = 1, 320 DXIL_ATOMIC_OR = 2, 321 DXIL_ATOMIC_XOR = 3, 322 DXIL_ATOMIC_IMIN = 4, 323 DXIL_ATOMIC_IMAX = 5, 324 DXIL_ATOMIC_UMIN = 6, 325 DXIL_ATOMIC_UMAX = 7, 326 DXIL_ATOMIC_EXCHANGE = 8, 327}; 328 329typedef struct { 330 unsigned id; 331 unsigned binding; 332 unsigned size; 333 unsigned space; 334} resource_array_layout; 335 336static void 337fill_resource_metadata(struct dxil_module *m, const struct dxil_mdnode **fields, 338 const struct dxil_type *struct_type, 339 const char *name, const resource_array_layout *layout) 340{ 341 const struct dxil_type *pointer_type = dxil_module_get_pointer_type(m, struct_type); 342 const struct dxil_value *pointer_undef = dxil_module_get_undef(m, pointer_type); 343 344 fields[0] = dxil_get_metadata_int32(m, layout->id); // resource ID 345 fields[1] = dxil_get_metadata_value(m, pointer_type, pointer_undef); // global constant symbol 346 fields[2] = dxil_get_metadata_string(m, name ? name : ""); // name 347 fields[3] = dxil_get_metadata_int32(m, layout->space); // space ID 348 fields[4] = dxil_get_metadata_int32(m, layout->binding); // lower bound 349 fields[5] = dxil_get_metadata_int32(m, layout->size); // range size 350} 351 352static const struct dxil_mdnode * 353emit_srv_metadata(struct dxil_module *m, const struct dxil_type *elem_type, 354 const char *name, const resource_array_layout *layout, 355 enum dxil_component_type comp_type, 356 enum dxil_resource_kind res_kind) 357{ 358 const struct dxil_mdnode *fields[9]; 359 360 const struct dxil_mdnode *metadata_tag_nodes[2]; 361 362 fill_resource_metadata(m, fields, elem_type, name, layout); 363 fields[6] = dxil_get_metadata_int32(m, res_kind); // resource shape 364 fields[7] = dxil_get_metadata_int1(m, 0); // sample count 365 if (res_kind != DXIL_RESOURCE_KIND_RAW_BUFFER && 366 res_kind != DXIL_RESOURCE_KIND_STRUCTURED_BUFFER) { 367 metadata_tag_nodes[0] = dxil_get_metadata_int32(m, DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG); 368 metadata_tag_nodes[1] = dxil_get_metadata_int32(m, comp_type); 369 fields[8] = dxil_get_metadata_node(m, metadata_tag_nodes, ARRAY_SIZE(metadata_tag_nodes)); // metadata 370 } else if (res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER) 371 fields[8] = NULL; 372 else 373 unreachable("Structured buffers not supported yet"); 374 375 return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields)); 376} 377 378static const struct dxil_mdnode * 379emit_uav_metadata(struct dxil_module *m, const struct dxil_type *struct_type, 380 const char *name, const resource_array_layout *layout, 381 enum dxil_component_type comp_type, 382 enum dxil_resource_kind res_kind) 383{ 384 const struct dxil_mdnode *fields[11]; 385 386 const struct dxil_mdnode *metadata_tag_nodes[2]; 387 388 fill_resource_metadata(m, fields, struct_type, name, layout); 389 fields[6] = dxil_get_metadata_int32(m, res_kind); // resource shape 390 fields[7] = dxil_get_metadata_int1(m, false); // globally-coherent 391 fields[8] = dxil_get_metadata_int1(m, false); // has counter 392 fields[9] = dxil_get_metadata_int1(m, false); // is ROV 393 if (res_kind != DXIL_RESOURCE_KIND_RAW_BUFFER && 394 res_kind != DXIL_RESOURCE_KIND_STRUCTURED_BUFFER) { 395 metadata_tag_nodes[0] = dxil_get_metadata_int32(m, DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG); 396 metadata_tag_nodes[1] = dxil_get_metadata_int32(m, comp_type); 397 fields[10] = dxil_get_metadata_node(m, metadata_tag_nodes, ARRAY_SIZE(metadata_tag_nodes)); // metadata 398 } else if (res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER) 399 fields[10] = NULL; 400 else 401 unreachable("Structured buffers not supported yet"); 402 403 return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields)); 404} 405 406static const struct dxil_mdnode * 407emit_cbv_metadata(struct dxil_module *m, const struct dxil_type *struct_type, 408 const char *name, const resource_array_layout *layout, 409 unsigned size) 410{ 411 const struct dxil_mdnode *fields[8]; 412 413 fill_resource_metadata(m, fields, struct_type, name, layout); 414 fields[6] = dxil_get_metadata_int32(m, size); // constant buffer size 415 fields[7] = NULL; // metadata 416 417 return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields)); 418} 419 420static const struct dxil_mdnode * 421emit_sampler_metadata(struct dxil_module *m, const struct dxil_type *struct_type, 422 nir_variable *var, const resource_array_layout *layout) 423{ 424 const struct dxil_mdnode *fields[8]; 425 const struct glsl_type *type = glsl_without_array(var->type); 426 427 fill_resource_metadata(m, fields, struct_type, var->name, layout); 428 fields[6] = dxil_get_metadata_int32(m, DXIL_SAMPLER_KIND_DEFAULT); // sampler kind 429 enum dxil_sampler_kind sampler_kind = glsl_sampler_type_is_shadow(type) ? 430 DXIL_SAMPLER_KIND_COMPARISON : DXIL_SAMPLER_KIND_DEFAULT; 431 fields[6] = dxil_get_metadata_int32(m, sampler_kind); // sampler kind 432 fields[7] = NULL; // metadata 433 434 return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields)); 435} 436 437 438#define MAX_SRVS 128 439#define MAX_UAVS 64 440#define MAX_CBVS 64 // ?? 441#define MAX_SAMPLERS 64 // ?? 442 443struct dxil_def { 444 const struct dxil_value *chans[NIR_MAX_VEC_COMPONENTS]; 445}; 446 447struct ntd_context { 448 void *ralloc_ctx; 449 const struct nir_to_dxil_options *opts; 450 struct nir_shader *shader; 451 452 struct dxil_module mod; 453 454 struct util_dynarray srv_metadata_nodes; 455 const struct dxil_value *srv_handles[MAX_SRVS]; 456 457 struct util_dynarray uav_metadata_nodes; 458 const struct dxil_value *ssbo_handles[MAX_UAVS]; 459 const struct dxil_value *image_handles[MAX_UAVS]; 460 uint32_t num_uavs; 461 462 struct util_dynarray cbv_metadata_nodes; 463 const struct dxil_value *cbv_handles[MAX_CBVS]; 464 465 struct util_dynarray sampler_metadata_nodes; 466 const struct dxil_value *sampler_handles[MAX_SAMPLERS]; 467 468 struct util_dynarray resources; 469 470 const struct dxil_mdnode *shader_property_nodes[6]; 471 size_t num_shader_property_nodes; 472 473 struct dxil_def *defs; 474 unsigned num_defs; 475 struct hash_table *phis; 476 477 const struct dxil_value *sharedvars; 478 const struct dxil_value *scratchvars; 479 struct hash_table *consts; 480 481 nir_variable *ps_front_face; 482 nir_variable *system_value[SYSTEM_VALUE_MAX]; 483 484 nir_function *tess_ctrl_patch_constant_func; 485 unsigned tess_input_control_point_count; 486 487 struct dxil_func_def *main_func_def; 488 struct dxil_func_def *tess_ctrl_patch_constant_func_def; 489 unsigned unnamed_ubo_count; 490}; 491 492static const char* 493unary_func_name(enum dxil_intr intr) 494{ 495 switch (intr) { 496 case DXIL_INTR_COUNTBITS: 497 case DXIL_INTR_FIRSTBIT_HI: 498 case DXIL_INTR_FIRSTBIT_SHI: 499 case DXIL_INTR_FIRSTBIT_LO: 500 return "dx.op.unaryBits"; 501 case DXIL_INTR_ISFINITE: 502 case DXIL_INTR_ISNORMAL: 503 return "dx.op.isSpecialFloat"; 504 default: 505 return "dx.op.unary"; 506 } 507} 508 509static const struct dxil_value * 510emit_unary_call(struct ntd_context *ctx, enum overload_type overload, 511 enum dxil_intr intr, 512 const struct dxil_value *op0) 513{ 514 const struct dxil_func *func = dxil_get_function(&ctx->mod, 515 unary_func_name(intr), 516 overload); 517 if (!func) 518 return NULL; 519 520 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr); 521 if (!opcode) 522 return NULL; 523 524 const struct dxil_value *args[] = { 525 opcode, 526 op0 527 }; 528 529 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); 530} 531 532static const struct dxil_value * 533emit_binary_call(struct ntd_context *ctx, enum overload_type overload, 534 enum dxil_intr intr, 535 const struct dxil_value *op0, const struct dxil_value *op1) 536{ 537 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.binary", overload); 538 if (!func) 539 return NULL; 540 541 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr); 542 if (!opcode) 543 return NULL; 544 545 const struct dxil_value *args[] = { 546 opcode, 547 op0, 548 op1 549 }; 550 551 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); 552} 553 554static const struct dxil_value * 555emit_tertiary_call(struct ntd_context *ctx, enum overload_type overload, 556 enum dxil_intr intr, 557 const struct dxil_value *op0, 558 const struct dxil_value *op1, 559 const struct dxil_value *op2) 560{ 561 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.tertiary", overload); 562 if (!func) 563 return NULL; 564 565 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr); 566 if (!opcode) 567 return NULL; 568 569 const struct dxil_value *args[] = { 570 opcode, 571 op0, 572 op1, 573 op2 574 }; 575 576 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); 577} 578 579static const struct dxil_value * 580emit_quaternary_call(struct ntd_context *ctx, enum overload_type overload, 581 enum dxil_intr intr, 582 const struct dxil_value *op0, 583 const struct dxil_value *op1, 584 const struct dxil_value *op2, 585 const struct dxil_value *op3) 586{ 587 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.quaternary", overload); 588 if (!func) 589 return NULL; 590 591 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr); 592 if (!opcode) 593 return NULL; 594 595 const struct dxil_value *args[] = { 596 opcode, 597 op0, 598 op1, 599 op2, 600 op3 601 }; 602 603 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); 604} 605 606static const struct dxil_value * 607emit_threadid_call(struct ntd_context *ctx, const struct dxil_value *comp) 608{ 609 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.threadId", DXIL_I32); 610 if (!func) 611 return NULL; 612 613 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, 614 DXIL_INTR_THREAD_ID); 615 if (!opcode) 616 return NULL; 617 618 const struct dxil_value *args[] = { 619 opcode, 620 comp 621 }; 622 623 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); 624} 625 626static const struct dxil_value * 627emit_threadidingroup_call(struct ntd_context *ctx, 628 const struct dxil_value *comp) 629{ 630 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.threadIdInGroup", DXIL_I32); 631 632 if (!func) 633 return NULL; 634 635 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, 636 DXIL_INTR_THREAD_ID_IN_GROUP); 637 if (!opcode) 638 return NULL; 639 640 const struct dxil_value *args[] = { 641 opcode, 642 comp 643 }; 644 645 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); 646} 647 648static const struct dxil_value * 649emit_flattenedthreadidingroup_call(struct ntd_context *ctx) 650{ 651 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.flattenedThreadIdInGroup", DXIL_I32); 652 653 if (!func) 654 return NULL; 655 656 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, 657 DXIL_INTR_FLATTENED_THREAD_ID_IN_GROUP); 658 if (!opcode) 659 return NULL; 660 661 const struct dxil_value *args[] = { 662 opcode 663 }; 664 665 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); 666} 667 668static const struct dxil_value * 669emit_groupid_call(struct ntd_context *ctx, const struct dxil_value *comp) 670{ 671 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.groupId", DXIL_I32); 672 673 if (!func) 674 return NULL; 675 676 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, 677 DXIL_INTR_GROUP_ID); 678 if (!opcode) 679 return NULL; 680 681 const struct dxil_value *args[] = { 682 opcode, 683 comp 684 }; 685 686 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); 687} 688 689static const struct dxil_value * 690emit_bufferload_call(struct ntd_context *ctx, 691 const struct dxil_value *handle, 692 const struct dxil_value *coord[2], 693 enum overload_type overload) 694{ 695 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.bufferLoad", overload); 696 if (!func) 697 return NULL; 698 699 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, 700 DXIL_INTR_BUFFER_LOAD); 701 const struct dxil_value *args[] = { opcode, handle, coord[0], coord[1] }; 702 703 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); 704} 705 706static bool 707emit_bufferstore_call(struct ntd_context *ctx, 708 const struct dxil_value *handle, 709 const struct dxil_value *coord[2], 710 const struct dxil_value *value[4], 711 const struct dxil_value *write_mask, 712 enum overload_type overload) 713{ 714 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.bufferStore", overload); 715 716 if (!func) 717 return false; 718 719 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, 720 DXIL_INTR_BUFFER_STORE); 721 const struct dxil_value *args[] = { 722 opcode, handle, coord[0], coord[1], 723 value[0], value[1], value[2], value[3], 724 write_mask 725 }; 726 727 return dxil_emit_call_void(&ctx->mod, func, 728 args, ARRAY_SIZE(args)); 729} 730 731static const struct dxil_value * 732emit_textureload_call(struct ntd_context *ctx, 733 const struct dxil_value *handle, 734 const struct dxil_value *coord[3], 735 enum overload_type overload) 736{ 737 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.textureLoad", overload); 738 if (!func) 739 return NULL; 740 const struct dxil_type *int_type = dxil_module_get_int_type(&ctx->mod, 32); 741 const struct dxil_value *int_undef = dxil_module_get_undef(&ctx->mod, int_type); 742 743 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, 744 DXIL_INTR_TEXTURE_LOAD); 745 const struct dxil_value *args[] = { opcode, handle, 746 /*lod_or_sample*/ int_undef, 747 coord[0], coord[1], coord[2], 748 /* offsets */ int_undef, int_undef, int_undef}; 749 750 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); 751} 752 753static bool 754emit_texturestore_call(struct ntd_context *ctx, 755 const struct dxil_value *handle, 756 const struct dxil_value *coord[3], 757 const struct dxil_value *value[4], 758 const struct dxil_value *write_mask, 759 enum overload_type overload) 760{ 761 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.textureStore", overload); 762 763 if (!func) 764 return false; 765 766 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, 767 DXIL_INTR_TEXTURE_STORE); 768 const struct dxil_value *args[] = { 769 opcode, handle, coord[0], coord[1], coord[2], 770 value[0], value[1], value[2], value[3], 771 write_mask 772 }; 773 774 return dxil_emit_call_void(&ctx->mod, func, 775 args, ARRAY_SIZE(args)); 776} 777 778static const struct dxil_value * 779emit_atomic_binop(struct ntd_context *ctx, 780 const struct dxil_value *handle, 781 enum dxil_atomic_op atomic_op, 782 const struct dxil_value *coord[3], 783 const struct dxil_value *value) 784{ 785 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.atomicBinOp", DXIL_I32); 786 787 if (!func) 788 return false; 789 790 const struct dxil_value *opcode = 791 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_ATOMIC_BINOP); 792 const struct dxil_value *atomic_op_value = 793 dxil_module_get_int32_const(&ctx->mod, atomic_op); 794 const struct dxil_value *args[] = { 795 opcode, handle, atomic_op_value, 796 coord[0], coord[1], coord[2], value 797 }; 798 799 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); 800} 801 802static const struct dxil_value * 803emit_atomic_cmpxchg(struct ntd_context *ctx, 804 const struct dxil_value *handle, 805 const struct dxil_value *coord[3], 806 const struct dxil_value *cmpval, 807 const struct dxil_value *newval) 808{ 809 const struct dxil_func *func = 810 dxil_get_function(&ctx->mod, "dx.op.atomicCompareExchange", DXIL_I32); 811 812 if (!func) 813 return false; 814 815 const struct dxil_value *opcode = 816 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_ATOMIC_CMPXCHG); 817 const struct dxil_value *args[] = { 818 opcode, handle, coord[0], coord[1], coord[2], cmpval, newval 819 }; 820 821 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); 822} 823 824static const struct dxil_value * 825emit_createhandle_call(struct ntd_context *ctx, 826 enum dxil_resource_class resource_class, 827 unsigned resource_range_id, 828 const struct dxil_value *resource_range_index, 829 bool non_uniform_resource_index) 830{ 831 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CREATE_HANDLE); 832 const struct dxil_value *resource_class_value = dxil_module_get_int8_const(&ctx->mod, resource_class); 833 const struct dxil_value *resource_range_id_value = dxil_module_get_int32_const(&ctx->mod, resource_range_id); 834 const struct dxil_value *non_uniform_resource_index_value = dxil_module_get_int1_const(&ctx->mod, non_uniform_resource_index); 835 if (!opcode || !resource_class_value || !resource_range_id_value || 836 !non_uniform_resource_index_value) 837 return NULL; 838 839 const struct dxil_value *args[] = { 840 opcode, 841 resource_class_value, 842 resource_range_id_value, 843 resource_range_index, 844 non_uniform_resource_index_value 845 }; 846 847 const struct dxil_func *func = 848 dxil_get_function(&ctx->mod, "dx.op.createHandle", DXIL_NONE); 849 850 if (!func) 851 return NULL; 852 853 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); 854} 855 856static const struct dxil_value * 857emit_createhandle_call_const_index(struct ntd_context *ctx, 858 enum dxil_resource_class resource_class, 859 unsigned resource_range_id, 860 unsigned resource_range_index, 861 bool non_uniform_resource_index) 862{ 863 864 const struct dxil_value *resource_range_index_value = dxil_module_get_int32_const(&ctx->mod, resource_range_index); 865 if (!resource_range_index_value) 866 return NULL; 867 868 return emit_createhandle_call(ctx, resource_class, resource_range_id, 869 resource_range_index_value, 870 non_uniform_resource_index); 871} 872 873static void 874add_resource(struct ntd_context *ctx, enum dxil_resource_type type, 875 enum dxil_resource_kind kind, 876 const resource_array_layout *layout) 877{ 878 struct dxil_resource_v0 *resource_v0 = NULL; 879 struct dxil_resource_v1 *resource_v1 = NULL; 880 if (ctx->mod.minor_validator >= 6) { 881 resource_v1 = util_dynarray_grow(&ctx->resources, struct dxil_resource_v1, 1); 882 resource_v0 = &resource_v1->v0; 883 } else { 884 resource_v0 = util_dynarray_grow(&ctx->resources, struct dxil_resource_v0, 1); 885 } 886 resource_v0->resource_type = type; 887 resource_v0->space = layout->space; 888 resource_v0->lower_bound = layout->binding; 889 if (layout->size == 0 || (uint64_t)layout->size + layout->binding >= UINT_MAX) 890 resource_v0->upper_bound = UINT_MAX; 891 else 892 resource_v0->upper_bound = layout->binding + layout->size - 1; 893 if (type == DXIL_RES_UAV_TYPED || 894 type == DXIL_RES_UAV_RAW || 895 type == DXIL_RES_UAV_STRUCTURED) { 896 uint32_t new_uav_count = ctx->num_uavs + layout->size; 897 if (layout->size == 0 || new_uav_count < ctx->num_uavs) 898 ctx->num_uavs = UINT_MAX; 899 else 900 ctx->num_uavs = new_uav_count; 901 if (ctx->mod.minor_validator >= 6 && ctx->num_uavs > 8) 902 ctx->mod.feats.use_64uavs = 1; 903 } 904 905 if (resource_v1) { 906 resource_v1->resource_kind = kind; 907 /* No flags supported yet */ 908 resource_v1->resource_flags = 0; 909 } 910} 911 912static unsigned 913get_resource_id(struct ntd_context *ctx, enum dxil_resource_class class, 914 unsigned space, unsigned binding) 915{ 916 unsigned offset = 0; 917 unsigned count = 0; 918 919 unsigned num_srvs = util_dynarray_num_elements(&ctx->srv_metadata_nodes, const struct dxil_mdnode *); 920 unsigned num_uavs = util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *); 921 unsigned num_cbvs = util_dynarray_num_elements(&ctx->cbv_metadata_nodes, const struct dxil_mdnode *); 922 unsigned num_samplers = util_dynarray_num_elements(&ctx->sampler_metadata_nodes, const struct dxil_mdnode *); 923 924 switch (class) { 925 case DXIL_RESOURCE_CLASS_UAV: 926 offset = num_srvs + num_samplers + num_cbvs; 927 count = num_uavs; 928 break; 929 case DXIL_RESOURCE_CLASS_SRV: 930 offset = num_samplers + num_cbvs; 931 count = num_srvs; 932 break; 933 case DXIL_RESOURCE_CLASS_SAMPLER: 934 offset = num_cbvs; 935 count = num_samplers; 936 break; 937 case DXIL_RESOURCE_CLASS_CBV: 938 offset = 0; 939 count = num_cbvs; 940 break; 941 } 942 943 unsigned resource_element_size = ctx->mod.minor_validator >= 6 ? 944 sizeof(struct dxil_resource_v1) : sizeof(struct dxil_resource_v0); 945 assert(offset + count <= ctx->resources.size / resource_element_size); 946 for (unsigned i = offset; i < offset + count; ++i) { 947 const struct dxil_resource_v0 *resource = (const struct dxil_resource_v0 *)((const char *)ctx->resources.data + resource_element_size * i); 948 if (resource->space == space && 949 resource->lower_bound <= binding && 950 resource->upper_bound >= binding) { 951 return i - offset; 952 } 953 } 954 955 unreachable("Resource access for undeclared range"); 956 return 0; 957} 958 959static bool 960emit_srv(struct ntd_context *ctx, nir_variable *var, unsigned count) 961{ 962 unsigned id = util_dynarray_num_elements(&ctx->srv_metadata_nodes, const struct dxil_mdnode *); 963 unsigned binding = var->data.binding; 964 resource_array_layout layout = {id, binding, count, var->data.descriptor_set}; 965 966 enum dxil_component_type comp_type; 967 enum dxil_resource_kind res_kind; 968 enum dxil_resource_type res_type; 969 if (var->data.mode == nir_var_mem_ssbo) { 970 comp_type = DXIL_COMP_TYPE_INVALID; 971 res_kind = DXIL_RESOURCE_KIND_RAW_BUFFER; 972 res_type = DXIL_RES_SRV_RAW; 973 } else { 974 comp_type = dxil_get_comp_type(var->type); 975 res_kind = dxil_get_resource_kind(var->type); 976 res_type = DXIL_RES_SRV_TYPED; 977 } 978 const struct dxil_type *res_type_as_type = dxil_module_get_res_type(&ctx->mod, res_kind, comp_type, false /* readwrite */); 979 980 if (glsl_type_is_array(var->type)) 981 res_type_as_type = dxil_module_get_array_type(&ctx->mod, res_type_as_type, count); 982 983 const struct dxil_mdnode *srv_meta = emit_srv_metadata(&ctx->mod, res_type_as_type, var->name, 984 &layout, comp_type, res_kind); 985 986 if (!srv_meta) 987 return false; 988 989 util_dynarray_append(&ctx->srv_metadata_nodes, const struct dxil_mdnode *, srv_meta); 990 add_resource(ctx, res_type, res_kind, &layout); 991 if (res_type == DXIL_RES_SRV_RAW) 992 ctx->mod.raw_and_structured_buffers = true; 993 994 return true; 995} 996 997static bool 998emit_globals(struct ntd_context *ctx, unsigned size) 999{ 1000 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ssbo) 1001 size++; 1002 1003 if (!size) 1004 return true; 1005 1006 const struct dxil_type *struct_type = dxil_module_get_res_type(&ctx->mod, 1007 DXIL_RESOURCE_KIND_RAW_BUFFER, DXIL_COMP_TYPE_INVALID, true /* readwrite */); 1008 if (!struct_type) 1009 return false; 1010 1011 const struct dxil_type *array_type = 1012 dxil_module_get_array_type(&ctx->mod, struct_type, size); 1013 if (!array_type) 1014 return false; 1015 1016 resource_array_layout layout = {0, 0, size, 0}; 1017 const struct dxil_mdnode *uav_meta = 1018 emit_uav_metadata(&ctx->mod, array_type, 1019 "globals", &layout, 1020 DXIL_COMP_TYPE_INVALID, 1021 DXIL_RESOURCE_KIND_RAW_BUFFER); 1022 if (!uav_meta) 1023 return false; 1024 1025 util_dynarray_append(&ctx->uav_metadata_nodes, const struct dxil_mdnode *, uav_meta); 1026 if (ctx->mod.minor_validator < 6 && 1027 util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *) > 8) 1028 ctx->mod.feats.use_64uavs = 1; 1029 /* Handles to UAVs used for kernel globals are created on-demand */ 1030 add_resource(ctx, DXIL_RES_UAV_RAW, DXIL_RESOURCE_KIND_RAW_BUFFER, &layout); 1031 ctx->mod.raw_and_structured_buffers = true; 1032 return true; 1033} 1034 1035static bool 1036emit_uav(struct ntd_context *ctx, unsigned binding, unsigned space, unsigned count, 1037 enum dxil_component_type comp_type, enum dxil_resource_kind res_kind, const char *name) 1038{ 1039 unsigned id = util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *); 1040 resource_array_layout layout = { id, binding, count, space }; 1041 1042 const struct dxil_type *res_type = dxil_module_get_res_type(&ctx->mod, res_kind, comp_type, true /* readwrite */); 1043 res_type = dxil_module_get_array_type(&ctx->mod, res_type, count); 1044 const struct dxil_mdnode *uav_meta = emit_uav_metadata(&ctx->mod, res_type, name, 1045 &layout, comp_type, res_kind); 1046 1047 if (!uav_meta) 1048 return false; 1049 1050 util_dynarray_append(&ctx->uav_metadata_nodes, const struct dxil_mdnode *, uav_meta); 1051 if (ctx->mod.minor_validator < 6 && 1052 util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *) > 8) 1053 ctx->mod.feats.use_64uavs = 1; 1054 1055 add_resource(ctx, res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER ? DXIL_RES_UAV_RAW : DXIL_RES_UAV_TYPED, res_kind, &layout); 1056 if (res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER) 1057 ctx->mod.raw_and_structured_buffers = true; 1058 if (ctx->mod.shader_kind != DXIL_PIXEL_SHADER && 1059 ctx->mod.shader_kind != DXIL_COMPUTE_SHADER) 1060 ctx->mod.feats.uavs_at_every_stage = true; 1061 1062 return true; 1063} 1064 1065static bool 1066emit_uav_var(struct ntd_context *ctx, nir_variable *var, unsigned count) 1067{ 1068 unsigned binding, space; 1069 if (ctx->opts->environment == DXIL_ENVIRONMENT_GL) { 1070 /* For GL, the image intrinsics are already lowered, using driver_location 1071 * as the 0-based image index. Use space 1 so that we can keep using these 1072 * NIR constants without having to remap them, and so they don't overlap 1073 * SSBOs, which are also 0-based UAV bindings. 1074 */ 1075 binding = var->data.driver_location; 1076 space = 1; 1077 } else { 1078 binding = var->data.binding; 1079 space = var->data.descriptor_set; 1080 } 1081 enum dxil_component_type comp_type = dxil_get_comp_type(var->type); 1082 enum dxil_resource_kind res_kind = dxil_get_resource_kind(var->type); 1083 const char *name = var->name; 1084 1085 return emit_uav(ctx, binding, space, count, comp_type, res_kind, name); 1086} 1087 1088static void 1089var_fill_const_array_with_vector_or_scalar(struct ntd_context *ctx, 1090 const struct nir_constant *c, 1091 const struct glsl_type *type, 1092 void *const_vals, 1093 unsigned int offset) 1094{ 1095 assert(glsl_type_is_vector_or_scalar(type)); 1096 unsigned int components = glsl_get_vector_elements(type); 1097 unsigned bit_size = glsl_get_bit_size(type); 1098 unsigned int increment = bit_size / 8; 1099 1100 for (unsigned int comp = 0; comp < components; comp++) { 1101 uint8_t *dst = (uint8_t *)const_vals + offset; 1102 1103 switch (bit_size) { 1104 case 64: 1105 memcpy(dst, &c->values[comp].u64, sizeof(c->values[0].u64)); 1106 break; 1107 case 32: 1108 memcpy(dst, &c->values[comp].u32, sizeof(c->values[0].u32)); 1109 break; 1110 case 16: 1111 memcpy(dst, &c->values[comp].u16, sizeof(c->values[0].u16)); 1112 break; 1113 case 8: 1114 assert(glsl_base_type_is_integer(glsl_get_base_type(type))); 1115 memcpy(dst, &c->values[comp].u8, sizeof(c->values[0].u8)); 1116 break; 1117 default: 1118 unreachable("unexpeted bit-size"); 1119 } 1120 1121 offset += increment; 1122 } 1123} 1124 1125static void 1126var_fill_const_array(struct ntd_context *ctx, const struct nir_constant *c, 1127 const struct glsl_type *type, void *const_vals, 1128 unsigned int offset) 1129{ 1130 assert(!glsl_type_is_interface(type)); 1131 1132 if (glsl_type_is_vector_or_scalar(type)) { 1133 var_fill_const_array_with_vector_or_scalar(ctx, c, type, 1134 const_vals, 1135 offset); 1136 } else if (glsl_type_is_array(type)) { 1137 assert(!glsl_type_is_unsized_array(type)); 1138 const struct glsl_type *without = glsl_without_array(type); 1139 unsigned stride = glsl_get_explicit_stride(without); 1140 1141 for (unsigned elt = 0; elt < glsl_get_length(type); elt++) { 1142 var_fill_const_array(ctx, c->elements[elt], without, 1143 const_vals, offset + (elt * stride)); 1144 offset += glsl_get_cl_size(without); 1145 } 1146 } else if (glsl_type_is_struct(type)) { 1147 for (unsigned int elt = 0; elt < glsl_get_length(type); elt++) { 1148 const struct glsl_type *elt_type = glsl_get_struct_field(type, elt); 1149 unsigned field_offset = glsl_get_struct_field_offset(type, elt); 1150 1151 var_fill_const_array(ctx, c->elements[elt], 1152 elt_type, const_vals, 1153 offset + field_offset); 1154 } 1155 } else 1156 unreachable("unknown GLSL type in var_fill_const_array"); 1157} 1158 1159static bool 1160emit_global_consts(struct ntd_context *ctx) 1161{ 1162 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_temp) { 1163 assert(var->constant_initializer); 1164 1165 unsigned int num_members = DIV_ROUND_UP(glsl_get_cl_size(var->type), 4); 1166 uint32_t *const_ints = ralloc_array(ctx->ralloc_ctx, uint32_t, num_members); 1167 var_fill_const_array(ctx, var->constant_initializer, var->type, 1168 const_ints, 0); 1169 const struct dxil_value **const_vals = 1170 ralloc_array(ctx->ralloc_ctx, const struct dxil_value *, num_members); 1171 if (!const_vals) 1172 return false; 1173 for (int i = 0; i < num_members; i++) 1174 const_vals[i] = dxil_module_get_int32_const(&ctx->mod, const_ints[i]); 1175 1176 const struct dxil_type *elt_type = dxil_module_get_int_type(&ctx->mod, 32); 1177 if (!elt_type) 1178 return false; 1179 const struct dxil_type *type = 1180 dxil_module_get_array_type(&ctx->mod, elt_type, num_members); 1181 if (!type) 1182 return false; 1183 const struct dxil_value *agg_vals = 1184 dxil_module_get_array_const(&ctx->mod, type, const_vals); 1185 if (!agg_vals) 1186 return false; 1187 1188 const struct dxil_value *gvar = dxil_add_global_ptr_var(&ctx->mod, var->name, type, 1189 DXIL_AS_DEFAULT, 4, 1190 agg_vals); 1191 if (!gvar) 1192 return false; 1193 1194 if (!_mesa_hash_table_insert(ctx->consts, var, (void *)gvar)) 1195 return false; 1196 } 1197 1198 return true; 1199} 1200 1201static bool 1202emit_cbv(struct ntd_context *ctx, unsigned binding, unsigned space, 1203 unsigned size, unsigned count, char *name) 1204{ 1205 assert(count != 0); 1206 1207 unsigned idx = util_dynarray_num_elements(&ctx->cbv_metadata_nodes, const struct dxil_mdnode *); 1208 1209 const struct dxil_type *float32 = dxil_module_get_float_type(&ctx->mod, 32); 1210 const struct dxil_type *array_type = dxil_module_get_array_type(&ctx->mod, float32, size); 1211 const struct dxil_type *buffer_type = dxil_module_get_struct_type(&ctx->mod, name, 1212 &array_type, 1); 1213 // All ubo[1]s should have been lowered to ubo with static indexing 1214 const struct dxil_type *final_type = count != 1 ? dxil_module_get_array_type(&ctx->mod, buffer_type, count) : buffer_type; 1215 resource_array_layout layout = {idx, binding, count, space}; 1216 const struct dxil_mdnode *cbv_meta = emit_cbv_metadata(&ctx->mod, final_type, 1217 name, &layout, 4 * size); 1218 1219 if (!cbv_meta) 1220 return false; 1221 1222 util_dynarray_append(&ctx->cbv_metadata_nodes, const struct dxil_mdnode *, cbv_meta); 1223 add_resource(ctx, DXIL_RES_CBV, DXIL_RESOURCE_KIND_CBUFFER, &layout); 1224 1225 return true; 1226} 1227 1228static bool 1229emit_ubo_var(struct ntd_context *ctx, nir_variable *var) 1230{ 1231 unsigned count = 1; 1232 if (glsl_type_is_array(var->type)) 1233 count = glsl_get_length(var->type); 1234 1235 char *name = var->name; 1236 char temp_name[30]; 1237 if (name && strlen(name) == 0) { 1238 snprintf(temp_name, sizeof(temp_name), "__unnamed_ubo_%d", 1239 ctx->unnamed_ubo_count++); 1240 name = temp_name; 1241 } 1242 1243 const struct glsl_type *type = glsl_without_array(var->type); 1244 assert(glsl_type_is_struct(type) || glsl_type_is_interface(type)); 1245 unsigned dwords = ALIGN_POT(glsl_get_explicit_size(type, false), 16) / 4; 1246 1247 return emit_cbv(ctx, var->data.binding, var->data.descriptor_set, 1248 dwords, count, name); 1249} 1250 1251static bool 1252emit_sampler(struct ntd_context *ctx, nir_variable *var, unsigned count) 1253{ 1254 unsigned id = util_dynarray_num_elements(&ctx->sampler_metadata_nodes, const struct dxil_mdnode *); 1255 unsigned binding = var->data.binding; 1256 resource_array_layout layout = {id, binding, count, var->data.descriptor_set}; 1257 const struct dxil_type *int32_type = dxil_module_get_int_type(&ctx->mod, 32); 1258 const struct dxil_type *sampler_type = dxil_module_get_struct_type(&ctx->mod, "struct.SamplerState", &int32_type, 1); 1259 1260 if (glsl_type_is_array(var->type)) 1261 sampler_type = dxil_module_get_array_type(&ctx->mod, sampler_type, count); 1262 1263 const struct dxil_mdnode *sampler_meta = emit_sampler_metadata(&ctx->mod, sampler_type, var, &layout); 1264 1265 if (!sampler_meta) 1266 return false; 1267 1268 util_dynarray_append(&ctx->sampler_metadata_nodes, const struct dxil_mdnode *, sampler_meta); 1269 add_resource(ctx, DXIL_RES_SAMPLER, DXIL_RESOURCE_KIND_SAMPLER, &layout); 1270 1271 return true; 1272} 1273 1274static bool 1275emit_static_indexing_handles(struct ntd_context *ctx) 1276{ 1277 /* Vulkan always uses dynamic handles, from instructions in the NIR */ 1278 if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) 1279 return true; 1280 1281 unsigned last_res_class = -1; 1282 unsigned id = 0; 1283 1284 unsigned resource_element_size = ctx->mod.minor_validator >= 6 ? 1285 sizeof(struct dxil_resource_v1) : sizeof(struct dxil_resource_v0); 1286 for (struct dxil_resource_v0 *res = (struct dxil_resource_v0 *)ctx->resources.data; 1287 res < (struct dxil_resource_v0 *)((char *)ctx->resources.data + ctx->resources.size); 1288 res = (struct dxil_resource_v0 *)((char *)res + resource_element_size)) { 1289 enum dxil_resource_class res_class; 1290 const struct dxil_value **handle_array; 1291 switch (res->resource_type) { 1292 case DXIL_RES_SRV_TYPED: 1293 case DXIL_RES_SRV_RAW: 1294 case DXIL_RES_SRV_STRUCTURED: 1295 res_class = DXIL_RESOURCE_CLASS_SRV; 1296 handle_array = ctx->srv_handles; 1297 break; 1298 case DXIL_RES_CBV: 1299 res_class = DXIL_RESOURCE_CLASS_CBV; 1300 handle_array = ctx->cbv_handles; 1301 break; 1302 case DXIL_RES_SAMPLER: 1303 res_class = DXIL_RESOURCE_CLASS_SAMPLER; 1304 handle_array = ctx->sampler_handles; 1305 break; 1306 case DXIL_RES_UAV_RAW: 1307 res_class = DXIL_RESOURCE_CLASS_UAV; 1308 handle_array = ctx->ssbo_handles; 1309 break; 1310 case DXIL_RES_UAV_TYPED: 1311 case DXIL_RES_UAV_STRUCTURED: 1312 case DXIL_RES_UAV_STRUCTURED_WITH_COUNTER: 1313 res_class = DXIL_RESOURCE_CLASS_UAV; 1314 handle_array = ctx->image_handles; 1315 break; 1316 default: 1317 unreachable("Unexpected resource type"); 1318 } 1319 1320 if (last_res_class != res_class) 1321 id = 0; 1322 else 1323 id++; 1324 last_res_class = res_class; 1325 1326 if (res->space > 1) 1327 continue; 1328 assert(res->space == 0 || 1329 (res->space == 1 && 1330 res->resource_type != DXIL_RES_UAV_RAW && 1331 ctx->opts->environment == DXIL_ENVIRONMENT_GL)); 1332 1333 /* CL uses dynamic handles for the "globals" UAV array, but uses static 1334 * handles for UBOs, textures, and samplers. 1335 */ 1336 if (ctx->opts->environment == DXIL_ENVIRONMENT_CL && 1337 res->resource_type == DXIL_RES_UAV_RAW) 1338 continue; 1339 1340 for (unsigned i = res->lower_bound; i <= res->upper_bound; ++i) { 1341 handle_array[i] = emit_createhandle_call_const_index(ctx, res_class, id, i, false); 1342 if (!handle_array[i]) 1343 return false; 1344 } 1345 } 1346 return true; 1347} 1348 1349static const struct dxil_mdnode * 1350emit_gs_state(struct ntd_context *ctx) 1351{ 1352 const struct dxil_mdnode *gs_state_nodes[5]; 1353 const nir_shader *s = ctx->shader; 1354 1355 gs_state_nodes[0] = dxil_get_metadata_int32(&ctx->mod, dxil_get_input_primitive(s->info.gs.input_primitive)); 1356 gs_state_nodes[1] = dxil_get_metadata_int32(&ctx->mod, s->info.gs.vertices_out); 1357 gs_state_nodes[2] = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.gs.active_stream_mask, 1)); 1358 gs_state_nodes[3] = dxil_get_metadata_int32(&ctx->mod, dxil_get_primitive_topology(s->info.gs.output_primitive)); 1359 gs_state_nodes[4] = dxil_get_metadata_int32(&ctx->mod, s->info.gs.invocations); 1360 1361 for (unsigned i = 0; i < ARRAY_SIZE(gs_state_nodes); ++i) { 1362 if (!gs_state_nodes[i]) 1363 return NULL; 1364 } 1365 1366 return dxil_get_metadata_node(&ctx->mod, gs_state_nodes, ARRAY_SIZE(gs_state_nodes)); 1367} 1368 1369static enum dxil_tessellator_domain 1370get_tessellator_domain(enum tess_primitive_mode primitive_mode) 1371{ 1372 switch (primitive_mode) { 1373 case TESS_PRIMITIVE_QUADS: return DXIL_TESSELLATOR_DOMAIN_QUAD; 1374 case TESS_PRIMITIVE_TRIANGLES: return DXIL_TESSELLATOR_DOMAIN_TRI; 1375 case TESS_PRIMITIVE_ISOLINES: return DXIL_TESSELLATOR_DOMAIN_ISOLINE; 1376 default: 1377 unreachable("Invalid tessellator primitive mode"); 1378 } 1379} 1380 1381static enum dxil_tessellator_partitioning 1382get_tessellator_partitioning(enum gl_tess_spacing spacing) 1383{ 1384 switch (spacing) { 1385 default: 1386 case TESS_SPACING_EQUAL: 1387 return DXIL_TESSELLATOR_PARTITIONING_INTEGER; 1388 case TESS_SPACING_FRACTIONAL_EVEN: 1389 return DXIL_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN; 1390 case TESS_SPACING_FRACTIONAL_ODD: 1391 return DXIL_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD; 1392 } 1393} 1394 1395static enum dxil_tessellator_output_primitive 1396get_tessellator_output_primitive(const struct shader_info *info) 1397{ 1398 if (info->tess.point_mode) 1399 return DXIL_TESSELLATOR_OUTPUT_PRIMITIVE_POINT; 1400 if (info->tess._primitive_mode == TESS_PRIMITIVE_ISOLINES) 1401 return DXIL_TESSELLATOR_OUTPUT_PRIMITIVE_LINE; 1402 /* Note: GL tessellation domain is inverted from D3D, which means triangle 1403 * winding needs to be inverted. 1404 */ 1405 if (info->tess.ccw) 1406 return DXIL_TESSELLATOR_OUTPUT_PRIMITIVE_TRIANGLE_CW; 1407 return DXIL_TESSELLATOR_OUTPUT_PRIMITIVE_TRIANGLE_CCW; 1408} 1409 1410static const struct dxil_mdnode * 1411emit_hs_state(struct ntd_context *ctx) 1412{ 1413 const struct dxil_mdnode *hs_state_nodes[7]; 1414 1415 hs_state_nodes[0] = dxil_get_metadata_func(&ctx->mod, ctx->tess_ctrl_patch_constant_func_def->func); 1416 hs_state_nodes[1] = dxil_get_metadata_int32(&ctx->mod, ctx->tess_input_control_point_count); 1417 hs_state_nodes[2] = dxil_get_metadata_int32(&ctx->mod, ctx->shader->info.tess.tcs_vertices_out); 1418 hs_state_nodes[3] = dxil_get_metadata_int32(&ctx->mod, get_tessellator_domain(ctx->shader->info.tess._primitive_mode)); 1419 hs_state_nodes[4] = dxil_get_metadata_int32(&ctx->mod, get_tessellator_partitioning(ctx->shader->info.tess.spacing)); 1420 hs_state_nodes[5] = dxil_get_metadata_int32(&ctx->mod, get_tessellator_output_primitive(&ctx->shader->info)); 1421 hs_state_nodes[6] = dxil_get_metadata_float32(&ctx->mod, 64.0f); 1422 1423 return dxil_get_metadata_node(&ctx->mod, hs_state_nodes, ARRAY_SIZE(hs_state_nodes)); 1424} 1425 1426static const struct dxil_mdnode * 1427emit_ds_state(struct ntd_context *ctx) 1428{ 1429 const struct dxil_mdnode *ds_state_nodes[2]; 1430 1431 ds_state_nodes[0] = dxil_get_metadata_int32(&ctx->mod, get_tessellator_domain(ctx->shader->info.tess._primitive_mode)); 1432 ds_state_nodes[1] = dxil_get_metadata_int32(&ctx->mod, ctx->shader->info.tess.tcs_vertices_out); 1433 1434 return dxil_get_metadata_node(&ctx->mod, ds_state_nodes, ARRAY_SIZE(ds_state_nodes)); 1435} 1436 1437static const struct dxil_mdnode * 1438emit_threads(struct ntd_context *ctx) 1439{ 1440 const nir_shader *s = ctx->shader; 1441 const struct dxil_mdnode *threads_x = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.workgroup_size[0], 1)); 1442 const struct dxil_mdnode *threads_y = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.workgroup_size[1], 1)); 1443 const struct dxil_mdnode *threads_z = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.workgroup_size[2], 1)); 1444 if (!threads_x || !threads_y || !threads_z) 1445 return false; 1446 1447 const struct dxil_mdnode *threads_nodes[] = { threads_x, threads_y, threads_z }; 1448 return dxil_get_metadata_node(&ctx->mod, threads_nodes, ARRAY_SIZE(threads_nodes)); 1449} 1450 1451static int64_t 1452get_module_flags(struct ntd_context *ctx) 1453{ 1454 /* See the DXIL documentation for the definition of these flags: 1455 * 1456 * https://github.com/Microsoft/DirectXShaderCompiler/blob/master/docs/DXIL.rst#shader-flags 1457 */ 1458 1459 uint64_t flags = 0; 1460 if (ctx->mod.feats.doubles) 1461 flags |= (1 << 2); 1462 if (ctx->shader->info.stage == MESA_SHADER_FRAGMENT && 1463 ctx->shader->info.fs.early_fragment_tests) 1464 flags |= (1 << 3); 1465 if (ctx->mod.raw_and_structured_buffers) 1466 flags |= (1 << 4); 1467 if (ctx->mod.feats.min_precision) 1468 flags |= (1 << 5); 1469 if (ctx->mod.feats.dx11_1_double_extensions) 1470 flags |= (1 << 6); 1471 if (ctx->mod.feats.array_layer_from_vs_or_ds) 1472 flags |= (1 << 9); 1473 if (ctx->mod.feats.inner_coverage) 1474 flags |= (1 << 10); 1475 if (ctx->mod.feats.typed_uav_load_additional_formats) 1476 flags |= (1 << 13); 1477 if (ctx->mod.feats.use_64uavs) 1478 flags |= (1 << 15); 1479 if (ctx->mod.feats.uavs_at_every_stage) 1480 flags |= (1 << 16); 1481 if (ctx->mod.feats.cs_4x_raw_sb) 1482 flags |= (1 << 17); 1483 if (ctx->mod.feats.wave_ops) 1484 flags |= (1 << 19); 1485 if (ctx->mod.feats.int64_ops) 1486 flags |= (1 << 20); 1487 if (ctx->mod.feats.barycentrics) 1488 flags |= (1 << 22); 1489 if (ctx->mod.feats.stencil_ref) 1490 flags |= (1 << 11); 1491 if (ctx->mod.feats.native_low_precision) 1492 flags |= (1 << 23) | (1 << 5); 1493 1494 if (ctx->opts->disable_math_refactoring) 1495 flags |= (1 << 1); 1496 1497 return flags; 1498} 1499 1500static const struct dxil_mdnode * 1501emit_entrypoint(struct ntd_context *ctx, 1502 const struct dxil_func *func, const char *name, 1503 const struct dxil_mdnode *signatures, 1504 const struct dxil_mdnode *resources, 1505 const struct dxil_mdnode *shader_props) 1506{ 1507 char truncated_name[254] = { 0 }; 1508 strncpy(truncated_name, name, ARRAY_SIZE(truncated_name) - 1); 1509 1510 const struct dxil_mdnode *func_md = dxil_get_metadata_func(&ctx->mod, func); 1511 const struct dxil_mdnode *name_md = dxil_get_metadata_string(&ctx->mod, truncated_name); 1512 const struct dxil_mdnode *nodes[] = { 1513 func_md, 1514 name_md, 1515 signatures, 1516 resources, 1517 shader_props 1518 }; 1519 return dxil_get_metadata_node(&ctx->mod, nodes, 1520 ARRAY_SIZE(nodes)); 1521} 1522 1523static const struct dxil_mdnode * 1524emit_resources(struct ntd_context *ctx) 1525{ 1526 bool emit_resources = false; 1527 const struct dxil_mdnode *resources_nodes[] = { 1528 NULL, NULL, NULL, NULL 1529 }; 1530 1531#define ARRAY_AND_SIZE(arr) arr.data, util_dynarray_num_elements(&arr, const struct dxil_mdnode *) 1532 1533 if (ctx->srv_metadata_nodes.size) { 1534 resources_nodes[0] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->srv_metadata_nodes)); 1535 emit_resources = true; 1536 } 1537 1538 if (ctx->uav_metadata_nodes.size) { 1539 resources_nodes[1] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->uav_metadata_nodes)); 1540 emit_resources = true; 1541 } 1542 1543 if (ctx->cbv_metadata_nodes.size) { 1544 resources_nodes[2] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->cbv_metadata_nodes)); 1545 emit_resources = true; 1546 } 1547 1548 if (ctx->sampler_metadata_nodes.size) { 1549 resources_nodes[3] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->sampler_metadata_nodes)); 1550 emit_resources = true; 1551 } 1552 1553#undef ARRAY_AND_SIZE 1554 1555 return emit_resources ? 1556 dxil_get_metadata_node(&ctx->mod, resources_nodes, ARRAY_SIZE(resources_nodes)): NULL; 1557} 1558 1559static boolean 1560emit_tag(struct ntd_context *ctx, enum dxil_shader_tag tag, 1561 const struct dxil_mdnode *value_node) 1562{ 1563 const struct dxil_mdnode *tag_node = dxil_get_metadata_int32(&ctx->mod, tag); 1564 if (!tag_node || !value_node) 1565 return false; 1566 assert(ctx->num_shader_property_nodes <= ARRAY_SIZE(ctx->shader_property_nodes) - 2); 1567 ctx->shader_property_nodes[ctx->num_shader_property_nodes++] = tag_node; 1568 ctx->shader_property_nodes[ctx->num_shader_property_nodes++] = value_node; 1569 1570 return true; 1571} 1572 1573static bool 1574emit_metadata(struct ntd_context *ctx) 1575{ 1576 /* DXIL versions are 1.x for shader model 6.x */ 1577 assert(ctx->mod.major_version == 6); 1578 unsigned dxilMajor = 1; 1579 unsigned dxilMinor = ctx->mod.minor_version; 1580 unsigned valMajor = ctx->mod.major_validator; 1581 unsigned valMinor = ctx->mod.minor_validator; 1582 if (!emit_llvm_ident(&ctx->mod) || 1583 !emit_named_version(&ctx->mod, "dx.version", dxilMajor, dxilMinor) || 1584 !emit_named_version(&ctx->mod, "dx.valver", valMajor, valMinor) || 1585 !emit_dx_shader_model(&ctx->mod)) 1586 return false; 1587 1588 const struct dxil_func_def *main_func_def = ctx->main_func_def; 1589 if (!main_func_def) 1590 return false; 1591 const struct dxil_func *main_func = main_func_def->func; 1592 1593 const struct dxil_mdnode *resources_node = emit_resources(ctx); 1594 1595 const struct dxil_mdnode *main_entrypoint = dxil_get_metadata_func(&ctx->mod, main_func); 1596 const struct dxil_mdnode *node27 = dxil_get_metadata_node(&ctx->mod, NULL, 0); 1597 1598 const struct dxil_mdnode *node4 = dxil_get_metadata_int32(&ctx->mod, 0); 1599 const struct dxil_mdnode *nodes_4_27_27[] = { 1600 node4, node27, node27 1601 }; 1602 const struct dxil_mdnode *node28 = dxil_get_metadata_node(&ctx->mod, nodes_4_27_27, 1603 ARRAY_SIZE(nodes_4_27_27)); 1604 1605 const struct dxil_mdnode *node29 = dxil_get_metadata_node(&ctx->mod, &node28, 1); 1606 1607 const struct dxil_mdnode *node3 = dxil_get_metadata_int32(&ctx->mod, 1); 1608 const struct dxil_mdnode *main_type_annotation_nodes[] = { 1609 node3, main_entrypoint, node29 1610 }; 1611 const struct dxil_mdnode *main_type_annotation = dxil_get_metadata_node(&ctx->mod, main_type_annotation_nodes, 1612 ARRAY_SIZE(main_type_annotation_nodes)); 1613 1614 if (ctx->mod.shader_kind == DXIL_GEOMETRY_SHADER) { 1615 if (!emit_tag(ctx, DXIL_SHADER_TAG_GS_STATE, emit_gs_state(ctx))) 1616 return false; 1617 } else if (ctx->mod.shader_kind == DXIL_HULL_SHADER) { 1618 ctx->tess_input_control_point_count = 32; 1619 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in) { 1620 if (nir_is_arrayed_io(var, MESA_SHADER_TESS_CTRL)) { 1621 ctx->tess_input_control_point_count = glsl_array_size(var->type); 1622 break; 1623 } 1624 } 1625 1626 if (!emit_tag(ctx, DXIL_SHADER_TAG_HS_STATE, emit_hs_state(ctx))) 1627 return false; 1628 } else if (ctx->mod.shader_kind == DXIL_DOMAIN_SHADER) { 1629 if (!emit_tag(ctx, DXIL_SHADER_TAG_DS_STATE, emit_ds_state(ctx))) 1630 return false; 1631 } else if (ctx->mod.shader_kind == DXIL_COMPUTE_SHADER) { 1632 if (!emit_tag(ctx, DXIL_SHADER_TAG_NUM_THREADS, emit_threads(ctx))) 1633 return false; 1634 } 1635 1636 uint64_t flags = get_module_flags(ctx); 1637 if (flags != 0) { 1638 if (!emit_tag(ctx, DXIL_SHADER_TAG_FLAGS, dxil_get_metadata_int64(&ctx->mod, flags))) 1639 return false; 1640 } 1641 const struct dxil_mdnode *shader_properties = NULL; 1642 if (ctx->num_shader_property_nodes > 0) { 1643 shader_properties = dxil_get_metadata_node(&ctx->mod, ctx->shader_property_nodes, 1644 ctx->num_shader_property_nodes); 1645 if (!shader_properties) 1646 return false; 1647 } 1648 1649 nir_function_impl *entry_func_impl = nir_shader_get_entrypoint(ctx->shader); 1650 const struct dxil_mdnode *dx_entry_point = emit_entrypoint(ctx, main_func, 1651 entry_func_impl->function->name, get_signatures(&ctx->mod), resources_node, shader_properties); 1652 if (!dx_entry_point) 1653 return false; 1654 1655 if (resources_node) { 1656 const struct dxil_mdnode *dx_resources = resources_node; 1657 dxil_add_metadata_named_node(&ctx->mod, "dx.resources", 1658 &dx_resources, 1); 1659 } 1660 1661 const struct dxil_mdnode *dx_type_annotations[] = { main_type_annotation }; 1662 return dxil_add_metadata_named_node(&ctx->mod, "dx.typeAnnotations", 1663 dx_type_annotations, 1664 ARRAY_SIZE(dx_type_annotations)) && 1665 dxil_add_metadata_named_node(&ctx->mod, "dx.entryPoints", 1666 &dx_entry_point, 1); 1667} 1668 1669static const struct dxil_value * 1670bitcast_to_int(struct ntd_context *ctx, unsigned bit_size, 1671 const struct dxil_value *value) 1672{ 1673 const struct dxil_type *type = dxil_module_get_int_type(&ctx->mod, bit_size); 1674 if (!type) 1675 return NULL; 1676 1677 return dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST, type, value); 1678} 1679 1680static const struct dxil_value * 1681bitcast_to_float(struct ntd_context *ctx, unsigned bit_size, 1682 const struct dxil_value *value) 1683{ 1684 const struct dxil_type *type = dxil_module_get_float_type(&ctx->mod, bit_size); 1685 if (!type) 1686 return NULL; 1687 1688 return dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST, type, value); 1689} 1690 1691static void 1692store_ssa_def(struct ntd_context *ctx, nir_ssa_def *ssa, unsigned chan, 1693 const struct dxil_value *value) 1694{ 1695 assert(ssa->index < ctx->num_defs); 1696 assert(chan < ssa->num_components); 1697 /* We pre-defined the dest value because of a phi node, so bitcast while storing if the 1698 * base type differs */ 1699 if (ctx->defs[ssa->index].chans[chan]) { 1700 const struct dxil_type *expect_type = dxil_value_get_type(ctx->defs[ssa->index].chans[chan]); 1701 const struct dxil_type *value_type = dxil_value_get_type(value); 1702 if (dxil_type_to_nir_type(expect_type) != dxil_type_to_nir_type(value_type)) 1703 value = dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST, expect_type, value); 1704 } 1705 ctx->defs[ssa->index].chans[chan] = value; 1706} 1707 1708static void 1709store_dest_value(struct ntd_context *ctx, nir_dest *dest, unsigned chan, 1710 const struct dxil_value *value) 1711{ 1712 assert(dest->is_ssa); 1713 assert(value); 1714 store_ssa_def(ctx, &dest->ssa, chan, value); 1715} 1716 1717static void 1718store_dest(struct ntd_context *ctx, nir_dest *dest, unsigned chan, 1719 const struct dxil_value *value, nir_alu_type type) 1720{ 1721 switch (nir_alu_type_get_base_type(type)) { 1722 case nir_type_float: 1723 if (nir_dest_bit_size(*dest) == 64) 1724 ctx->mod.feats.doubles = true; 1725 store_dest_value(ctx, dest, chan, value); 1726 break; 1727 case nir_type_uint: 1728 case nir_type_int: 1729 if (nir_dest_bit_size(*dest) == 16) 1730 ctx->mod.feats.native_low_precision = true; 1731 if (nir_dest_bit_size(*dest) == 64) 1732 ctx->mod.feats.int64_ops = true; 1733 FALLTHROUGH; 1734 case nir_type_bool: 1735 store_dest_value(ctx, dest, chan, value); 1736 break; 1737 default: 1738 unreachable("unexpected nir_alu_type"); 1739 } 1740} 1741 1742static void 1743store_alu_dest(struct ntd_context *ctx, nir_alu_instr *alu, unsigned chan, 1744 const struct dxil_value *value) 1745{ 1746 assert(!alu->dest.saturate); 1747 store_dest(ctx, &alu->dest.dest, chan, value, 1748 nir_op_infos[alu->op].output_type); 1749} 1750 1751static const struct dxil_value * 1752get_src_ssa(struct ntd_context *ctx, const nir_ssa_def *ssa, unsigned chan) 1753{ 1754 assert(ssa->index < ctx->num_defs); 1755 assert(chan < ssa->num_components); 1756 assert(ctx->defs[ssa->index].chans[chan]); 1757 return ctx->defs[ssa->index].chans[chan]; 1758} 1759 1760static const struct dxil_value * 1761get_src(struct ntd_context *ctx, nir_src *src, unsigned chan, 1762 nir_alu_type type) 1763{ 1764 assert(src->is_ssa); 1765 const struct dxil_value *value = get_src_ssa(ctx, src->ssa, chan); 1766 1767 const int bit_size = nir_src_bit_size(*src); 1768 1769 switch (nir_alu_type_get_base_type(type)) { 1770 case nir_type_int: 1771 case nir_type_uint: { 1772 assert(bit_size != 64 || ctx->mod.feats.int64_ops); 1773 const struct dxil_type *expect_type = dxil_module_get_int_type(&ctx->mod, bit_size); 1774 /* nohing to do */ 1775 if (dxil_value_type_equal_to(value, expect_type)) 1776 return value; 1777 assert(dxil_value_type_bitsize_equal_to(value, bit_size)); 1778 return bitcast_to_int(ctx, bit_size, value); 1779 } 1780 1781 case nir_type_float: 1782 assert(nir_src_bit_size(*src) >= 16); 1783 assert(nir_src_bit_size(*src) != 64 || ctx->mod.feats.doubles); 1784 if (dxil_value_type_equal_to(value, dxil_module_get_float_type(&ctx->mod, bit_size))) 1785 return value; 1786 assert(dxil_value_type_bitsize_equal_to(value, bit_size)); 1787 return bitcast_to_float(ctx, bit_size, value); 1788 1789 case nir_type_bool: 1790 if (!dxil_value_type_bitsize_equal_to(value, 1)) { 1791 return dxil_emit_cast(&ctx->mod, DXIL_CAST_TRUNC, 1792 dxil_module_get_int_type(&ctx->mod, 1), value); 1793 } 1794 return value; 1795 1796 default: 1797 unreachable("unexpected nir_alu_type"); 1798 } 1799} 1800 1801static const struct dxil_type * 1802get_alu_src_type(struct ntd_context *ctx, nir_alu_instr *alu, unsigned src) 1803{ 1804 assert(!alu->src[src].abs); 1805 assert(!alu->src[src].negate); 1806 nir_ssa_def *ssa_src = alu->src[src].src.ssa; 1807 unsigned chan = alu->src[src].swizzle[0]; 1808 const struct dxil_value *value = get_src_ssa(ctx, ssa_src, chan); 1809 return dxil_value_get_type(value); 1810} 1811 1812static const struct dxil_value * 1813get_alu_src(struct ntd_context *ctx, nir_alu_instr *alu, unsigned src) 1814{ 1815 assert(!alu->src[src].abs); 1816 assert(!alu->src[src].negate); 1817 1818 unsigned chan = alu->src[src].swizzle[0]; 1819 return get_src(ctx, &alu->src[src].src, chan, 1820 nir_op_infos[alu->op].input_types[src]); 1821} 1822 1823static bool 1824emit_binop(struct ntd_context *ctx, nir_alu_instr *alu, 1825 enum dxil_bin_opcode opcode, 1826 const struct dxil_value *op0, const struct dxil_value *op1) 1827{ 1828 bool is_float_op = nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type) == nir_type_float; 1829 1830 enum dxil_opt_flags flags = 0; 1831 if (is_float_op && !alu->exact) 1832 flags |= DXIL_UNSAFE_ALGEBRA; 1833 1834 const struct dxil_value *v = dxil_emit_binop(&ctx->mod, opcode, op0, op1, flags); 1835 if (!v) 1836 return false; 1837 store_alu_dest(ctx, alu, 0, v); 1838 return true; 1839} 1840 1841static bool 1842emit_shift(struct ntd_context *ctx, nir_alu_instr *alu, 1843 enum dxil_bin_opcode opcode, 1844 const struct dxil_value *op0, const struct dxil_value *op1) 1845{ 1846 unsigned op0_bit_size = nir_src_bit_size(alu->src[0].src); 1847 unsigned op1_bit_size = nir_src_bit_size(alu->src[1].src); 1848 if (op0_bit_size != op1_bit_size) { 1849 const struct dxil_type *type = 1850 dxil_module_get_int_type(&ctx->mod, op0_bit_size); 1851 enum dxil_cast_opcode cast_op = 1852 op1_bit_size < op0_bit_size ? DXIL_CAST_ZEXT : DXIL_CAST_TRUNC; 1853 op1 = dxil_emit_cast(&ctx->mod, cast_op, type, op1); 1854 } 1855 1856 const struct dxil_value *v = 1857 dxil_emit_binop(&ctx->mod, opcode, op0, op1, 0); 1858 if (!v) 1859 return false; 1860 store_alu_dest(ctx, alu, 0, v); 1861 return true; 1862} 1863 1864static bool 1865emit_cmp(struct ntd_context *ctx, nir_alu_instr *alu, 1866 enum dxil_cmp_pred pred, 1867 const struct dxil_value *op0, const struct dxil_value *op1) 1868{ 1869 const struct dxil_value *v = dxil_emit_cmp(&ctx->mod, pred, op0, op1); 1870 if (!v) 1871 return false; 1872 store_alu_dest(ctx, alu, 0, v); 1873 return true; 1874} 1875 1876static enum dxil_cast_opcode 1877get_cast_op(nir_alu_instr *alu) 1878{ 1879 unsigned dst_bits = nir_dest_bit_size(alu->dest.dest); 1880 unsigned src_bits = nir_src_bit_size(alu->src[0].src); 1881 1882 switch (alu->op) { 1883 /* bool -> int */ 1884 case nir_op_b2i16: 1885 case nir_op_b2i32: 1886 case nir_op_b2i64: 1887 return DXIL_CAST_ZEXT; 1888 1889 /* float -> float */ 1890 case nir_op_f2f16_rtz: 1891 case nir_op_f2f32: 1892 case nir_op_f2f64: 1893 assert(dst_bits != src_bits); 1894 if (dst_bits < src_bits) 1895 return DXIL_CAST_FPTRUNC; 1896 else 1897 return DXIL_CAST_FPEXT; 1898 1899 /* int -> int */ 1900 case nir_op_i2i16: 1901 case nir_op_i2i32: 1902 case nir_op_i2i64: 1903 assert(dst_bits != src_bits); 1904 if (dst_bits < src_bits) 1905 return DXIL_CAST_TRUNC; 1906 else 1907 return DXIL_CAST_SEXT; 1908 1909 /* uint -> uint */ 1910 case nir_op_u2u16: 1911 case nir_op_u2u32: 1912 case nir_op_u2u64: 1913 assert(dst_bits != src_bits); 1914 if (dst_bits < src_bits) 1915 return DXIL_CAST_TRUNC; 1916 else 1917 return DXIL_CAST_ZEXT; 1918 1919 /* float -> int */ 1920 case nir_op_f2i16: 1921 case nir_op_f2i32: 1922 case nir_op_f2i64: 1923 return DXIL_CAST_FPTOSI; 1924 1925 /* float -> uint */ 1926 case nir_op_f2u16: 1927 case nir_op_f2u32: 1928 case nir_op_f2u64: 1929 return DXIL_CAST_FPTOUI; 1930 1931 /* int -> float */ 1932 case nir_op_i2f16: 1933 case nir_op_i2f32: 1934 case nir_op_i2f64: 1935 return DXIL_CAST_SITOFP; 1936 1937 /* uint -> float */ 1938 case nir_op_u2f16: 1939 case nir_op_u2f32: 1940 case nir_op_u2f64: 1941 return DXIL_CAST_UITOFP; 1942 1943 default: 1944 unreachable("unexpected cast op"); 1945 } 1946} 1947 1948static const struct dxil_type * 1949get_cast_dest_type(struct ntd_context *ctx, nir_alu_instr *alu) 1950{ 1951 unsigned dst_bits = nir_dest_bit_size(alu->dest.dest); 1952 switch (nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type)) { 1953 case nir_type_bool: 1954 assert(dst_bits == 1); 1955 FALLTHROUGH; 1956 case nir_type_int: 1957 case nir_type_uint: 1958 return dxil_module_get_int_type(&ctx->mod, dst_bits); 1959 1960 case nir_type_float: 1961 return dxil_module_get_float_type(&ctx->mod, dst_bits); 1962 1963 default: 1964 unreachable("unknown nir_alu_type"); 1965 } 1966} 1967 1968static bool 1969is_double(nir_alu_type alu_type, unsigned bit_size) 1970{ 1971 return nir_alu_type_get_base_type(alu_type) == nir_type_float && 1972 bit_size == 64; 1973} 1974 1975static bool 1976emit_cast(struct ntd_context *ctx, nir_alu_instr *alu, 1977 const struct dxil_value *value) 1978{ 1979 enum dxil_cast_opcode opcode = get_cast_op(alu); 1980 const struct dxil_type *type = get_cast_dest_type(ctx, alu); 1981 if (!type) 1982 return false; 1983 1984 const nir_op_info *info = &nir_op_infos[alu->op]; 1985 switch (opcode) { 1986 case DXIL_CAST_UITOFP: 1987 case DXIL_CAST_SITOFP: 1988 if (is_double(info->output_type, nir_dest_bit_size(alu->dest.dest))) 1989 ctx->mod.feats.dx11_1_double_extensions = true; 1990 break; 1991 case DXIL_CAST_FPTOUI: 1992 case DXIL_CAST_FPTOSI: 1993 if (is_double(info->input_types[0], nir_src_bit_size(alu->src[0].src))) 1994 ctx->mod.feats.dx11_1_double_extensions = true; 1995 break; 1996 default: 1997 break; 1998 } 1999 2000 const struct dxil_value *v = dxil_emit_cast(&ctx->mod, opcode, type, 2001 value); 2002 if (!v) 2003 return false; 2004 store_alu_dest(ctx, alu, 0, v); 2005 return true; 2006} 2007 2008static enum overload_type 2009get_overload(nir_alu_type alu_type, unsigned bit_size) 2010{ 2011 switch (nir_alu_type_get_base_type(alu_type)) { 2012 case nir_type_int: 2013 case nir_type_uint: 2014 switch (bit_size) { 2015 case 16: return DXIL_I16; 2016 case 32: return DXIL_I32; 2017 case 64: return DXIL_I64; 2018 default: 2019 unreachable("unexpected bit_size"); 2020 } 2021 case nir_type_float: 2022 switch (bit_size) { 2023 case 16: return DXIL_F16; 2024 case 32: return DXIL_F32; 2025 case 64: return DXIL_F64; 2026 default: 2027 unreachable("unexpected bit_size"); 2028 } 2029 default: 2030 unreachable("unexpected output type"); 2031 } 2032} 2033 2034static bool 2035emit_unary_intin(struct ntd_context *ctx, nir_alu_instr *alu, 2036 enum dxil_intr intr, const struct dxil_value *op) 2037{ 2038 const nir_op_info *info = &nir_op_infos[alu->op]; 2039 unsigned src_bits = nir_src_bit_size(alu->src[0].src); 2040 enum overload_type overload = get_overload(info->input_types[0], src_bits); 2041 2042 const struct dxil_value *v = emit_unary_call(ctx, overload, intr, op); 2043 if (!v) 2044 return false; 2045 store_alu_dest(ctx, alu, 0, v); 2046 return true; 2047} 2048 2049static bool 2050emit_binary_intin(struct ntd_context *ctx, nir_alu_instr *alu, 2051 enum dxil_intr intr, 2052 const struct dxil_value *op0, const struct dxil_value *op1) 2053{ 2054 const nir_op_info *info = &nir_op_infos[alu->op]; 2055 assert(info->output_type == info->input_types[0]); 2056 assert(info->output_type == info->input_types[1]); 2057 unsigned dst_bits = nir_dest_bit_size(alu->dest.dest); 2058 assert(nir_src_bit_size(alu->src[0].src) == dst_bits); 2059 assert(nir_src_bit_size(alu->src[1].src) == dst_bits); 2060 enum overload_type overload = get_overload(info->output_type, dst_bits); 2061 2062 const struct dxil_value *v = emit_binary_call(ctx, overload, intr, 2063 op0, op1); 2064 if (!v) 2065 return false; 2066 store_alu_dest(ctx, alu, 0, v); 2067 return true; 2068} 2069 2070static bool 2071emit_tertiary_intin(struct ntd_context *ctx, nir_alu_instr *alu, 2072 enum dxil_intr intr, 2073 const struct dxil_value *op0, 2074 const struct dxil_value *op1, 2075 const struct dxil_value *op2) 2076{ 2077 const nir_op_info *info = &nir_op_infos[alu->op]; 2078 unsigned dst_bits = nir_dest_bit_size(alu->dest.dest); 2079 assert(nir_src_bit_size(alu->src[0].src) == dst_bits); 2080 assert(nir_src_bit_size(alu->src[1].src) == dst_bits); 2081 assert(nir_src_bit_size(alu->src[2].src) == dst_bits); 2082 2083 assert(get_overload(info->output_type, dst_bits) == get_overload(info->input_types[0], dst_bits)); 2084 assert(get_overload(info->output_type, dst_bits) == get_overload(info->input_types[1], dst_bits)); 2085 assert(get_overload(info->output_type, dst_bits) == get_overload(info->input_types[2], dst_bits)); 2086 2087 enum overload_type overload = get_overload(info->output_type, dst_bits); 2088 2089 const struct dxil_value *v = emit_tertiary_call(ctx, overload, intr, 2090 op0, op1, op2); 2091 if (!v) 2092 return false; 2093 store_alu_dest(ctx, alu, 0, v); 2094 return true; 2095} 2096 2097static bool 2098emit_bitfield_insert(struct ntd_context *ctx, nir_alu_instr *alu, 2099 const struct dxil_value *base, 2100 const struct dxil_value *insert, 2101 const struct dxil_value *offset, 2102 const struct dxil_value *width) 2103{ 2104 /* DXIL is width, offset, insert, base, NIR is base, insert, offset, width */ 2105 const struct dxil_value *v = emit_quaternary_call(ctx, DXIL_I32, DXIL_INTR_BFI, 2106 width, offset, insert, base); 2107 if (!v) 2108 return false; 2109 2110 /* DXIL uses the 5 LSB from width/offset. Special-case width >= 32 == copy insert. */ 2111 const struct dxil_value *compare_width = dxil_emit_cmp(&ctx->mod, DXIL_ICMP_SGE, 2112 width, dxil_module_get_int32_const(&ctx->mod, 32)); 2113 v = dxil_emit_select(&ctx->mod, compare_width, insert, v); 2114 store_alu_dest(ctx, alu, 0, v); 2115 return true; 2116} 2117 2118static bool emit_select(struct ntd_context *ctx, nir_alu_instr *alu, 2119 const struct dxil_value *sel, 2120 const struct dxil_value *val_true, 2121 const struct dxil_value *val_false) 2122{ 2123 assert(sel); 2124 assert(val_true); 2125 assert(val_false); 2126 2127 const struct dxil_value *v = dxil_emit_select(&ctx->mod, sel, val_true, val_false); 2128 if (!v) 2129 return false; 2130 2131 store_alu_dest(ctx, alu, 0, v); 2132 return true; 2133} 2134 2135static bool 2136emit_b2f16(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val) 2137{ 2138 assert(val); 2139 2140 struct dxil_module *m = &ctx->mod; 2141 2142 const struct dxil_value *c1 = dxil_module_get_float16_const(m, 0x3C00); 2143 const struct dxil_value *c0 = dxil_module_get_float16_const(m, 0); 2144 2145 if (!c0 || !c1) 2146 return false; 2147 2148 return emit_select(ctx, alu, val, c1, c0); 2149} 2150 2151static bool 2152emit_b2f32(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val) 2153{ 2154 assert(val); 2155 2156 struct dxil_module *m = &ctx->mod; 2157 2158 const struct dxil_value *c1 = dxil_module_get_float_const(m, 1.0f); 2159 const struct dxil_value *c0 = dxil_module_get_float_const(m, 0.0f); 2160 2161 if (!c0 || !c1) 2162 return false; 2163 2164 return emit_select(ctx, alu, val, c1, c0); 2165} 2166 2167static bool 2168emit_b2f64(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val) 2169{ 2170 assert(val); 2171 2172 struct dxil_module *m = &ctx->mod; 2173 2174 const struct dxil_value *c1 = dxil_module_get_double_const(m, 1.0); 2175 const struct dxil_value *c0 = dxil_module_get_double_const(m, 0.0); 2176 2177 if (!c0 || !c1) 2178 return false; 2179 2180 ctx->mod.feats.doubles = 1; 2181 return emit_select(ctx, alu, val, c1, c0); 2182} 2183 2184static bool 2185emit_f2b32(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val) 2186{ 2187 assert(val); 2188 2189 const struct dxil_value *zero = dxil_module_get_float_const(&ctx->mod, 0.0f); 2190 return emit_cmp(ctx, alu, DXIL_FCMP_UNE, val, zero); 2191} 2192 2193static bool 2194emit_f16tof32(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val, bool shift) 2195{ 2196 if (shift) { 2197 val = dxil_emit_binop(&ctx->mod, DXIL_BINOP_LSHR, val, 2198 dxil_module_get_int32_const(&ctx->mod, 16), 0); 2199 if (!val) 2200 return false; 2201 } 2202 2203 const struct dxil_func *func = dxil_get_function(&ctx->mod, 2204 "dx.op.legacyF16ToF32", 2205 DXIL_NONE); 2206 if (!func) 2207 return false; 2208 2209 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_LEGACY_F16TOF32); 2210 if (!opcode) 2211 return false; 2212 2213 const struct dxil_value *args[] = { 2214 opcode, 2215 val 2216 }; 2217 2218 const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); 2219 if (!v) 2220 return false; 2221 store_alu_dest(ctx, alu, 0, v); 2222 return true; 2223} 2224 2225static bool 2226emit_f32tof16(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val0, const struct dxil_value *val1) 2227{ 2228 const struct dxil_func *func = dxil_get_function(&ctx->mod, 2229 "dx.op.legacyF32ToF16", 2230 DXIL_NONE); 2231 if (!func) 2232 return false; 2233 2234 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_LEGACY_F32TOF16); 2235 if (!opcode) 2236 return false; 2237 2238 const struct dxil_value *args[] = { 2239 opcode, 2240 val0 2241 }; 2242 2243 const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); 2244 if (!v) 2245 return false; 2246 2247 if (!nir_src_is_const(alu->src[1].src) || nir_src_as_int(alu->src[1].src) != 0) { 2248 args[1] = val1; 2249 const struct dxil_value *v_high = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); 2250 if (!v_high) 2251 return false; 2252 2253 v_high = dxil_emit_binop(&ctx->mod, DXIL_BINOP_SHL, v_high, 2254 dxil_module_get_int32_const(&ctx->mod, 16), 0); 2255 if (!v_high) 2256 return false; 2257 2258 v = dxil_emit_binop(&ctx->mod, DXIL_BINOP_OR, v, v_high, 0); 2259 if (!v) 2260 return false; 2261 } 2262 2263 store_alu_dest(ctx, alu, 0, v); 2264 return true; 2265} 2266 2267static bool 2268emit_vec(struct ntd_context *ctx, nir_alu_instr *alu, unsigned num_inputs) 2269{ 2270 const struct dxil_type *type = get_alu_src_type(ctx, alu, 0); 2271 nir_alu_type t = dxil_type_to_nir_type(type); 2272 2273 for (unsigned i = 0; i < num_inputs; i++) { 2274 const struct dxil_value *src = 2275 get_src(ctx, &alu->src[i].src, alu->src[i].swizzle[0], t); 2276 if (!src) 2277 return false; 2278 2279 store_alu_dest(ctx, alu, i, src); 2280 } 2281 return true; 2282} 2283 2284static bool 2285emit_make_double(struct ntd_context *ctx, nir_alu_instr *alu) 2286{ 2287 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.makeDouble", DXIL_F64); 2288 if (!func) 2289 return false; 2290 2291 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_MAKE_DOUBLE); 2292 if (!opcode) 2293 return false; 2294 2295 const struct dxil_value *args[3] = { 2296 opcode, 2297 get_src(ctx, &alu->src[0].src, alu->src[0].swizzle[0], nir_type_uint32), 2298 get_src(ctx, &alu->src[0].src, alu->src[0].swizzle[1], nir_type_uint32), 2299 }; 2300 if (!args[1] || !args[2]) 2301 return false; 2302 2303 const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); 2304 if (!v) 2305 return false; 2306 store_dest(ctx, &alu->dest.dest, 0, v, nir_type_float64); 2307 return true; 2308} 2309 2310static bool 2311emit_split_double(struct ntd_context *ctx, nir_alu_instr *alu) 2312{ 2313 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.splitDouble", DXIL_F64); 2314 if (!func) 2315 return false; 2316 2317 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SPLIT_DOUBLE); 2318 if (!opcode) 2319 return false; 2320 2321 const struct dxil_value *args[] = { 2322 opcode, 2323 get_src(ctx, &alu->src[0].src, alu->src[0].swizzle[0], nir_type_float64) 2324 }; 2325 if (!args[1]) 2326 return false; 2327 2328 const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); 2329 if (!v) 2330 return false; 2331 2332 const struct dxil_value *hi = dxil_emit_extractval(&ctx->mod, v, 0); 2333 const struct dxil_value *lo = dxil_emit_extractval(&ctx->mod, v, 1); 2334 if (!hi || !lo) 2335 return false; 2336 2337 store_dest_value(ctx, &alu->dest.dest, 0, hi); 2338 store_dest_value(ctx, &alu->dest.dest, 1, lo); 2339 return true; 2340} 2341 2342static bool 2343emit_alu(struct ntd_context *ctx, nir_alu_instr *alu) 2344{ 2345 /* handle vec-instructions first; they are the only ones that produce 2346 * vector results. 2347 */ 2348 switch (alu->op) { 2349 case nir_op_vec2: 2350 case nir_op_vec3: 2351 case nir_op_vec4: 2352 case nir_op_vec8: 2353 case nir_op_vec16: 2354 return emit_vec(ctx, alu, nir_op_infos[alu->op].num_inputs); 2355 case nir_op_mov: { 2356 assert(nir_dest_num_components(alu->dest.dest) == 1); 2357 store_ssa_def(ctx, &alu->dest.dest.ssa, 0, get_src_ssa(ctx, 2358 alu->src->src.ssa, alu->src->swizzle[0])); 2359 return true; 2360 } 2361 case nir_op_pack_double_2x32_dxil: 2362 return emit_make_double(ctx, alu); 2363 case nir_op_unpack_double_2x32_dxil: 2364 return emit_split_double(ctx, alu); 2365 default: 2366 /* silence warnings */ 2367 ; 2368 } 2369 2370 /* other ops should be scalar */ 2371 assert(alu->dest.write_mask == 1); 2372 const struct dxil_value *src[4]; 2373 assert(nir_op_infos[alu->op].num_inputs <= 4); 2374 for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) { 2375 src[i] = get_alu_src(ctx, alu, i); 2376 if (!src[i]) 2377 return false; 2378 } 2379 2380 switch (alu->op) { 2381 case nir_op_iadd: 2382 case nir_op_fadd: return emit_binop(ctx, alu, DXIL_BINOP_ADD, src[0], src[1]); 2383 2384 case nir_op_isub: 2385 case nir_op_fsub: return emit_binop(ctx, alu, DXIL_BINOP_SUB, src[0], src[1]); 2386 2387 case nir_op_imul: 2388 case nir_op_fmul: return emit_binop(ctx, alu, DXIL_BINOP_MUL, src[0], src[1]); 2389 2390 case nir_op_fdiv: 2391 if (alu->dest.dest.ssa.bit_size == 64) 2392 ctx->mod.feats.dx11_1_double_extensions = 1; 2393 FALLTHROUGH; 2394 case nir_op_idiv: 2395 return emit_binop(ctx, alu, DXIL_BINOP_SDIV, src[0], src[1]); 2396 2397 case nir_op_udiv: return emit_binop(ctx, alu, DXIL_BINOP_UDIV, src[0], src[1]); 2398 case nir_op_irem: return emit_binop(ctx, alu, DXIL_BINOP_SREM, src[0], src[1]); 2399 case nir_op_imod: return emit_binop(ctx, alu, DXIL_BINOP_UREM, src[0], src[1]); 2400 case nir_op_umod: return emit_binop(ctx, alu, DXIL_BINOP_UREM, src[0], src[1]); 2401 case nir_op_ishl: return emit_shift(ctx, alu, DXIL_BINOP_SHL, src[0], src[1]); 2402 case nir_op_ishr: return emit_shift(ctx, alu, DXIL_BINOP_ASHR, src[0], src[1]); 2403 case nir_op_ushr: return emit_shift(ctx, alu, DXIL_BINOP_LSHR, src[0], src[1]); 2404 case nir_op_iand: return emit_binop(ctx, alu, DXIL_BINOP_AND, src[0], src[1]); 2405 case nir_op_ior: return emit_binop(ctx, alu, DXIL_BINOP_OR, src[0], src[1]); 2406 case nir_op_ixor: return emit_binop(ctx, alu, DXIL_BINOP_XOR, src[0], src[1]); 2407 case nir_op_inot: { 2408 unsigned bit_size = alu->dest.dest.ssa.bit_size; 2409 intmax_t val = bit_size == 1 ? 1 : -1; 2410 const struct dxil_value *negative_one = dxil_module_get_int_const(&ctx->mod, val, bit_size); 2411 return emit_binop(ctx, alu, DXIL_BINOP_XOR, src[0], negative_one); 2412 } 2413 case nir_op_ieq: return emit_cmp(ctx, alu, DXIL_ICMP_EQ, src[0], src[1]); 2414 case nir_op_ine: return emit_cmp(ctx, alu, DXIL_ICMP_NE, src[0], src[1]); 2415 case nir_op_ige: return emit_cmp(ctx, alu, DXIL_ICMP_SGE, src[0], src[1]); 2416 case nir_op_uge: return emit_cmp(ctx, alu, DXIL_ICMP_UGE, src[0], src[1]); 2417 case nir_op_ilt: return emit_cmp(ctx, alu, DXIL_ICMP_SLT, src[0], src[1]); 2418 case nir_op_ult: return emit_cmp(ctx, alu, DXIL_ICMP_ULT, src[0], src[1]); 2419 case nir_op_feq: return emit_cmp(ctx, alu, DXIL_FCMP_OEQ, src[0], src[1]); 2420 case nir_op_fneu: return emit_cmp(ctx, alu, DXIL_FCMP_UNE, src[0], src[1]); 2421 case nir_op_flt: return emit_cmp(ctx, alu, DXIL_FCMP_OLT, src[0], src[1]); 2422 case nir_op_fge: return emit_cmp(ctx, alu, DXIL_FCMP_OGE, src[0], src[1]); 2423 case nir_op_bcsel: return emit_select(ctx, alu, src[0], src[1], src[2]); 2424 case nir_op_ftrunc: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_Z, src[0]); 2425 case nir_op_fabs: return emit_unary_intin(ctx, alu, DXIL_INTR_FABS, src[0]); 2426 case nir_op_fcos: return emit_unary_intin(ctx, alu, DXIL_INTR_FCOS, src[0]); 2427 case nir_op_fsin: return emit_unary_intin(ctx, alu, DXIL_INTR_FSIN, src[0]); 2428 case nir_op_fceil: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_PI, src[0]); 2429 case nir_op_fexp2: return emit_unary_intin(ctx, alu, DXIL_INTR_FEXP2, src[0]); 2430 case nir_op_flog2: return emit_unary_intin(ctx, alu, DXIL_INTR_FLOG2, src[0]); 2431 case nir_op_ffloor: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_NI, src[0]); 2432 case nir_op_ffract: return emit_unary_intin(ctx, alu, DXIL_INTR_FRC, src[0]); 2433 case nir_op_fisnormal: return emit_unary_intin(ctx, alu, DXIL_INTR_ISNORMAL, src[0]); 2434 case nir_op_fisfinite: return emit_unary_intin(ctx, alu, DXIL_INTR_ISFINITE, src[0]); 2435 2436 case nir_op_fddx: 2437 case nir_op_fddx_coarse: return emit_unary_intin(ctx, alu, DXIL_INTR_DDX_COARSE, src[0]); 2438 case nir_op_fddx_fine: return emit_unary_intin(ctx, alu, DXIL_INTR_DDX_FINE, src[0]); 2439 case nir_op_fddy: 2440 case nir_op_fddy_coarse: return emit_unary_intin(ctx, alu, DXIL_INTR_DDY_COARSE, src[0]); 2441 case nir_op_fddy_fine: return emit_unary_intin(ctx, alu, DXIL_INTR_DDY_FINE, src[0]); 2442 2443 case nir_op_fround_even: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_NE, src[0]); 2444 case nir_op_frcp: { 2445 const struct dxil_value *one = dxil_module_get_float_const(&ctx->mod, 1.0f); 2446 return emit_binop(ctx, alu, DXIL_BINOP_SDIV, one, src[0]); 2447 } 2448 case nir_op_fsat: return emit_unary_intin(ctx, alu, DXIL_INTR_SATURATE, src[0]); 2449 case nir_op_bit_count: return emit_unary_intin(ctx, alu, DXIL_INTR_COUNTBITS, src[0]); 2450 case nir_op_bitfield_reverse: return emit_unary_intin(ctx, alu, DXIL_INTR_BFREV, src[0]); 2451 case nir_op_ufind_msb_rev: return emit_unary_intin(ctx, alu, DXIL_INTR_FIRSTBIT_HI, src[0]); 2452 case nir_op_ifind_msb_rev: return emit_unary_intin(ctx, alu, DXIL_INTR_FIRSTBIT_SHI, src[0]); 2453 case nir_op_find_lsb: return emit_unary_intin(ctx, alu, DXIL_INTR_FIRSTBIT_LO, src[0]); 2454 case nir_op_imax: return emit_binary_intin(ctx, alu, DXIL_INTR_IMAX, src[0], src[1]); 2455 case nir_op_imin: return emit_binary_intin(ctx, alu, DXIL_INTR_IMIN, src[0], src[1]); 2456 case nir_op_umax: return emit_binary_intin(ctx, alu, DXIL_INTR_UMAX, src[0], src[1]); 2457 case nir_op_umin: return emit_binary_intin(ctx, alu, DXIL_INTR_UMIN, src[0], src[1]); 2458 case nir_op_frsq: return emit_unary_intin(ctx, alu, DXIL_INTR_RSQRT, src[0]); 2459 case nir_op_fsqrt: return emit_unary_intin(ctx, alu, DXIL_INTR_SQRT, src[0]); 2460 case nir_op_fmax: return emit_binary_intin(ctx, alu, DXIL_INTR_FMAX, src[0], src[1]); 2461 case nir_op_fmin: return emit_binary_intin(ctx, alu, DXIL_INTR_FMIN, src[0], src[1]); 2462 case nir_op_ffma: 2463 if (alu->dest.dest.ssa.bit_size == 64) 2464 ctx->mod.feats.dx11_1_double_extensions = 1; 2465 return emit_tertiary_intin(ctx, alu, DXIL_INTR_FMA, src[0], src[1], src[2]); 2466 2467 case nir_op_ibfe: return emit_tertiary_intin(ctx, alu, DXIL_INTR_IBFE, src[2], src[1], src[0]); 2468 case nir_op_ubfe: return emit_tertiary_intin(ctx, alu, DXIL_INTR_UBFE, src[2], src[1], src[0]); 2469 case nir_op_bitfield_insert: return emit_bitfield_insert(ctx, alu, src[0], src[1], src[2], src[3]); 2470 2471 case nir_op_unpack_half_2x16_split_x: return emit_f16tof32(ctx, alu, src[0], false); 2472 case nir_op_unpack_half_2x16_split_y: return emit_f16tof32(ctx, alu, src[0], true); 2473 case nir_op_pack_half_2x16_split: return emit_f32tof16(ctx, alu, src[0], src[1]); 2474 2475 case nir_op_b2i16: 2476 case nir_op_i2i16: 2477 case nir_op_f2i16: 2478 case nir_op_f2u16: 2479 case nir_op_u2u16: 2480 case nir_op_u2f16: 2481 case nir_op_i2f16: 2482 case nir_op_f2f16_rtz: 2483 case nir_op_b2i32: 2484 case nir_op_f2f32: 2485 case nir_op_f2i32: 2486 case nir_op_f2u32: 2487 case nir_op_i2f32: 2488 case nir_op_i2i32: 2489 case nir_op_u2f32: 2490 case nir_op_u2u32: 2491 case nir_op_b2i64: 2492 case nir_op_f2f64: 2493 case nir_op_f2i64: 2494 case nir_op_f2u64: 2495 case nir_op_i2f64: 2496 case nir_op_i2i64: 2497 case nir_op_u2f64: 2498 case nir_op_u2u64: 2499 return emit_cast(ctx, alu, src[0]); 2500 2501 case nir_op_f2b32: return emit_f2b32(ctx, alu, src[0]); 2502 case nir_op_b2f16: return emit_b2f16(ctx, alu, src[0]); 2503 case nir_op_b2f32: return emit_b2f32(ctx, alu, src[0]); 2504 case nir_op_b2f64: return emit_b2f64(ctx, alu, src[0]); 2505 default: 2506 NIR_INSTR_UNSUPPORTED(&alu->instr); 2507 assert("Unimplemented ALU instruction"); 2508 return false; 2509 } 2510} 2511 2512static const struct dxil_value * 2513load_ubo(struct ntd_context *ctx, const struct dxil_value *handle, 2514 const struct dxil_value *offset, enum overload_type overload) 2515{ 2516 assert(handle && offset); 2517 2518 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CBUFFER_LOAD_LEGACY); 2519 if (!opcode) 2520 return NULL; 2521 2522 const struct dxil_value *args[] = { 2523 opcode, handle, offset 2524 }; 2525 2526 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.cbufferLoadLegacy", overload); 2527 if (!func) 2528 return NULL; 2529 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); 2530} 2531 2532static bool 2533emit_barrier_impl(struct ntd_context *ctx, nir_variable_mode modes, nir_scope execution_scope, nir_scope mem_scope) 2534{ 2535 const struct dxil_value *opcode, *mode; 2536 const struct dxil_func *func; 2537 uint32_t flags = 0; 2538 2539 if (execution_scope == NIR_SCOPE_WORKGROUP) 2540 flags |= DXIL_BARRIER_MODE_SYNC_THREAD_GROUP; 2541 2542 if (modes & (nir_var_mem_ssbo | nir_var_mem_global | nir_var_image)) { 2543 if (mem_scope > NIR_SCOPE_WORKGROUP) 2544 flags |= DXIL_BARRIER_MODE_UAV_FENCE_GLOBAL; 2545 else 2546 flags |= DXIL_BARRIER_MODE_UAV_FENCE_THREAD_GROUP; 2547 } 2548 2549 if (modes & nir_var_mem_shared) 2550 flags |= DXIL_BARRIER_MODE_GROUPSHARED_MEM_FENCE; 2551 2552 func = dxil_get_function(&ctx->mod, "dx.op.barrier", DXIL_NONE); 2553 if (!func) 2554 return false; 2555 2556 opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_BARRIER); 2557 if (!opcode) 2558 return false; 2559 2560 mode = dxil_module_get_int32_const(&ctx->mod, flags); 2561 if (!mode) 2562 return false; 2563 2564 const struct dxil_value *args[] = { opcode, mode }; 2565 2566 return dxil_emit_call_void(&ctx->mod, func, 2567 args, ARRAY_SIZE(args)); 2568} 2569 2570static bool 2571emit_barrier(struct ntd_context *ctx, nir_intrinsic_instr *intr) 2572{ 2573 return emit_barrier_impl(ctx, 2574 nir_intrinsic_memory_modes(intr), 2575 nir_intrinsic_execution_scope(intr), 2576 nir_intrinsic_memory_scope(intr)); 2577} 2578 2579/* Memory barrier for UAVs (buffers/images) at cross-workgroup scope */ 2580static bool 2581emit_memory_barrier(struct ntd_context *ctx, nir_intrinsic_instr *intr) 2582{ 2583 return emit_barrier_impl(ctx, 2584 nir_var_mem_global, 2585 NIR_SCOPE_NONE, 2586 NIR_SCOPE_DEVICE); 2587} 2588 2589/* Memory barrier for TGSM */ 2590static bool 2591emit_memory_barrier_shared(struct ntd_context *ctx, nir_intrinsic_instr *intr) 2592{ 2593 return emit_barrier_impl(ctx, 2594 nir_var_mem_shared, 2595 NIR_SCOPE_NONE, 2596 NIR_SCOPE_WORKGROUP); 2597} 2598 2599/* Memory barrier for all intra-workgroup memory accesses (UAVs and TGSM) */ 2600static bool 2601emit_group_memory_barrier(struct ntd_context *ctx, nir_intrinsic_instr *intr) 2602{ 2603 return emit_barrier_impl(ctx, 2604 nir_var_mem_shared | nir_var_mem_global, 2605 NIR_SCOPE_NONE, 2606 NIR_SCOPE_WORKGROUP); 2607} 2608 2609static bool 2610emit_control_barrier(struct ntd_context *ctx, nir_intrinsic_instr *intr) 2611{ 2612 return emit_barrier_impl(ctx, 2613 nir_var_mem_shared, 2614 NIR_SCOPE_WORKGROUP, 2615 NIR_SCOPE_NONE); 2616} 2617 2618static bool 2619emit_load_global_invocation_id(struct ntd_context *ctx, 2620 nir_intrinsic_instr *intr) 2621{ 2622 assert(intr->dest.is_ssa); 2623 nir_component_mask_t comps = nir_ssa_def_components_read(&intr->dest.ssa); 2624 2625 for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) { 2626 if (comps & (1 << i)) { 2627 const struct dxil_value *idx = dxil_module_get_int32_const(&ctx->mod, i); 2628 if (!idx) 2629 return false; 2630 const struct dxil_value *globalid = emit_threadid_call(ctx, idx); 2631 2632 if (!globalid) 2633 return false; 2634 2635 store_dest_value(ctx, &intr->dest, i, globalid); 2636 } 2637 } 2638 return true; 2639} 2640 2641static bool 2642emit_load_local_invocation_id(struct ntd_context *ctx, 2643 nir_intrinsic_instr *intr) 2644{ 2645 assert(intr->dest.is_ssa); 2646 nir_component_mask_t comps = nir_ssa_def_components_read(&intr->dest.ssa); 2647 2648 for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) { 2649 if (comps & (1 << i)) { 2650 const struct dxil_value 2651 *idx = dxil_module_get_int32_const(&ctx->mod, i); 2652 if (!idx) 2653 return false; 2654 const struct dxil_value 2655 *threadidingroup = emit_threadidingroup_call(ctx, idx); 2656 if (!threadidingroup) 2657 return false; 2658 store_dest_value(ctx, &intr->dest, i, threadidingroup); 2659 } 2660 } 2661 return true; 2662} 2663 2664static bool 2665emit_load_local_invocation_index(struct ntd_context *ctx, 2666 nir_intrinsic_instr *intr) 2667{ 2668 assert(intr->dest.is_ssa); 2669 2670 const struct dxil_value 2671 *flattenedthreadidingroup = emit_flattenedthreadidingroup_call(ctx); 2672 if (!flattenedthreadidingroup) 2673 return false; 2674 store_dest_value(ctx, &intr->dest, 0, flattenedthreadidingroup); 2675 2676 return true; 2677} 2678 2679static bool 2680emit_load_local_workgroup_id(struct ntd_context *ctx, 2681 nir_intrinsic_instr *intr) 2682{ 2683 assert(intr->dest.is_ssa); 2684 nir_component_mask_t comps = nir_ssa_def_components_read(&intr->dest.ssa); 2685 2686 for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) { 2687 if (comps & (1 << i)) { 2688 const struct dxil_value *idx = dxil_module_get_int32_const(&ctx->mod, i); 2689 if (!idx) 2690 return false; 2691 const struct dxil_value *groupid = emit_groupid_call(ctx, idx); 2692 if (!groupid) 2693 return false; 2694 store_dest_value(ctx, &intr->dest, i, groupid); 2695 } 2696 } 2697 return true; 2698} 2699 2700static const struct dxil_value * 2701call_unary_external_function(struct ntd_context *ctx, 2702 const char *name, 2703 int32_t dxil_intr) 2704{ 2705 const struct dxil_func *func = 2706 dxil_get_function(&ctx->mod, name, DXIL_I32); 2707 if (!func) 2708 return false; 2709 2710 const struct dxil_value *opcode = 2711 dxil_module_get_int32_const(&ctx->mod, dxil_intr); 2712 if (!opcode) 2713 return false; 2714 2715 const struct dxil_value *args[] = {opcode}; 2716 2717 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); 2718} 2719 2720static bool 2721emit_load_unary_external_function(struct ntd_context *ctx, 2722 nir_intrinsic_instr *intr, const char *name, 2723 int32_t dxil_intr) 2724{ 2725 const struct dxil_value *value = call_unary_external_function(ctx, name, dxil_intr); 2726 store_dest_value(ctx, &intr->dest, 0, value); 2727 2728 return true; 2729} 2730 2731static bool 2732emit_load_sample_mask_in(struct ntd_context *ctx, nir_intrinsic_instr *intr) 2733{ 2734 const struct dxil_value *value = call_unary_external_function(ctx, 2735 "dx.op.coverage", DXIL_INTR_COVERAGE); 2736 2737 /* Mask coverage with (1 << sample index). Note, done as an AND to handle extrapolation cases. */ 2738 if (ctx->mod.info.has_per_sample_input) { 2739 value = dxil_emit_binop(&ctx->mod, DXIL_BINOP_AND, value, 2740 dxil_emit_binop(&ctx->mod, DXIL_BINOP_SHL, 2741 dxil_module_get_int32_const(&ctx->mod, 1), 2742 call_unary_external_function(ctx, "dx.op.sampleIndex", DXIL_INTR_SAMPLE_INDEX), 0), 0); 2743 } 2744 2745 store_dest_value(ctx, &intr->dest, 0, value); 2746 return true; 2747} 2748 2749static bool 2750emit_load_tess_coord(struct ntd_context *ctx, 2751 nir_intrinsic_instr *intr) 2752{ 2753 const struct dxil_func *func = 2754 dxil_get_function(&ctx->mod, "dx.op.domainLocation", DXIL_F32); 2755 if (!func) 2756 return false; 2757 2758 const struct dxil_value *opcode = 2759 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_DOMAIN_LOCATION); 2760 if (!opcode) 2761 return false; 2762 2763 unsigned num_coords = ctx->shader->info.tess._primitive_mode == TESS_PRIMITIVE_TRIANGLES ? 3 : 2; 2764 for (unsigned i = 0; i < num_coords; ++i) { 2765 unsigned component_idx = i; 2766 2767 const struct dxil_value *component = dxil_module_get_int32_const(&ctx->mod, component_idx); 2768 if (!component) 2769 return false; 2770 2771 const struct dxil_value *args[] = { opcode, component }; 2772 2773 const struct dxil_value *value = 2774 dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); 2775 store_dest_value(ctx, &intr->dest, i, value); 2776 } 2777 2778 for (unsigned i = num_coords; i < intr->dest.ssa.num_components; ++i) { 2779 const struct dxil_value *value = dxil_module_get_float_const(&ctx->mod, 0.0f); 2780 store_dest_value(ctx, &intr->dest, i, value); 2781 } 2782 2783 return true; 2784} 2785 2786static const struct dxil_value * 2787get_int32_undef(struct dxil_module *m) 2788{ 2789 const struct dxil_type *int32_type = 2790 dxil_module_get_int_type(m, 32); 2791 if (!int32_type) 2792 return NULL; 2793 2794 return dxil_module_get_undef(m, int32_type); 2795} 2796 2797static const struct dxil_value * 2798emit_gep_for_index(struct ntd_context *ctx, const nir_variable *var, 2799 const struct dxil_value *index) 2800{ 2801 assert(var->data.mode == nir_var_shader_temp); 2802 2803 struct hash_entry *he = _mesa_hash_table_search(ctx->consts, var); 2804 assert(he != NULL); 2805 const struct dxil_value *ptr = he->data; 2806 2807 const struct dxil_value *zero = dxil_module_get_int32_const(&ctx->mod, 0); 2808 if (!zero) 2809 return NULL; 2810 2811 const struct dxil_value *ops[] = { ptr, zero, index }; 2812 return dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops)); 2813} 2814 2815static const struct dxil_value * 2816get_resource_handle(struct ntd_context *ctx, nir_src *src, enum dxil_resource_class class, 2817 enum dxil_resource_kind kind) 2818{ 2819 /* This source might be one of: 2820 * 1. Constant resource index - just look it up in precomputed handle arrays 2821 * If it's null in that array, create a handle, and store the result 2822 * 2. A handle from load_vulkan_descriptor - just get the stored SSA value 2823 * 3. Dynamic resource index - create a handle for it here 2824 */ 2825 assert(src->ssa->num_components == 1 && src->ssa->bit_size == 32); 2826 nir_const_value *const_block_index = nir_src_as_const_value(*src); 2827 const struct dxil_value **handle_entry = NULL; 2828 if (const_block_index) { 2829 assert(ctx->opts->environment != DXIL_ENVIRONMENT_VULKAN); 2830 switch (kind) { 2831 case DXIL_RESOURCE_KIND_CBUFFER: 2832 handle_entry = &ctx->cbv_handles[const_block_index->u32]; 2833 break; 2834 case DXIL_RESOURCE_KIND_RAW_BUFFER: 2835 if (class == DXIL_RESOURCE_CLASS_UAV) 2836 handle_entry = &ctx->ssbo_handles[const_block_index->u32]; 2837 else 2838 handle_entry = &ctx->srv_handles[const_block_index->u32]; 2839 break; 2840 case DXIL_RESOURCE_KIND_SAMPLER: 2841 handle_entry = &ctx->sampler_handles[const_block_index->u32]; 2842 break; 2843 default: 2844 if (class == DXIL_RESOURCE_CLASS_UAV) 2845 handle_entry = &ctx->image_handles[const_block_index->u32]; 2846 else 2847 handle_entry = &ctx->srv_handles[const_block_index->u32]; 2848 break; 2849 } 2850 } 2851 2852 if (handle_entry && *handle_entry) 2853 return *handle_entry; 2854 2855 const struct dxil_value *value = get_src_ssa(ctx, src->ssa, 0); 2856 if (nir_src_as_deref(*src) || 2857 ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) { 2858 return value; 2859 } 2860 2861 unsigned space = 0; 2862 if (ctx->opts->environment == DXIL_ENVIRONMENT_GL && 2863 class == DXIL_RESOURCE_CLASS_UAV) { 2864 if (kind == DXIL_RESOURCE_KIND_RAW_BUFFER) 2865 space = 2; 2866 else 2867 space = 1; 2868 } 2869 2870 /* The base binding here will almost always be zero. The only cases where we end 2871 * up in this type of dynamic indexing are: 2872 * 1. GL UBOs 2873 * 2. GL SSBOs 2874 * 2. CL SSBOs 2875 * In all cases except GL UBOs, the resources are a single zero-based array. 2876 * In that case, the base is 1, because uniforms use 0 and cannot by dynamically 2877 * indexed. All other cases should either fall into static indexing (first early return), 2878 * deref-based dynamic handle creation (images, or Vulkan textures/samplers), or 2879 * load_vulkan_descriptor handle creation. 2880 */ 2881 unsigned base_binding = 0; 2882 if (ctx->opts->environment == DXIL_ENVIRONMENT_GL && 2883 class == DXIL_RESOURCE_CLASS_CBV) 2884 base_binding = 1; 2885 2886 const struct dxil_value *handle = emit_createhandle_call(ctx, class, 2887 get_resource_id(ctx, class, space, base_binding), value, !const_block_index); 2888 if (handle_entry) 2889 *handle_entry = handle; 2890 2891 return handle; 2892} 2893 2894static bool 2895emit_load_ssbo(struct ntd_context *ctx, nir_intrinsic_instr *intr) 2896{ 2897 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod); 2898 2899 enum dxil_resource_class class = DXIL_RESOURCE_CLASS_UAV; 2900 if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) { 2901 nir_variable *var = nir_get_binding_variable(ctx->shader, nir_chase_binding(intr->src[0])); 2902 if (var && var->data.access & ACCESS_NON_WRITEABLE) 2903 class = DXIL_RESOURCE_CLASS_SRV; 2904 } 2905 2906 const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], class, DXIL_RESOURCE_KIND_RAW_BUFFER); 2907 const struct dxil_value *offset = 2908 get_src(ctx, &intr->src[1], 0, nir_type_uint); 2909 if (!int32_undef || !handle || !offset) 2910 return false; 2911 2912 assert(nir_src_bit_size(intr->src[0]) == 32); 2913 assert(nir_intrinsic_dest_components(intr) <= 4); 2914 2915 const struct dxil_value *coord[2] = { 2916 offset, 2917 int32_undef 2918 }; 2919 2920 const struct dxil_value *load = emit_bufferload_call(ctx, handle, coord, DXIL_I32); 2921 if (!load) 2922 return false; 2923 2924 for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) { 2925 const struct dxil_value *val = 2926 dxil_emit_extractval(&ctx->mod, load, i); 2927 if (!val) 2928 return false; 2929 store_dest_value(ctx, &intr->dest, i, val); 2930 } 2931 return true; 2932} 2933 2934static bool 2935emit_store_ssbo(struct ntd_context *ctx, nir_intrinsic_instr *intr) 2936{ 2937 const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[1], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_RAW_BUFFER); 2938 const struct dxil_value *offset = 2939 get_src(ctx, &intr->src[2], 0, nir_type_uint); 2940 if (!handle || !offset) 2941 return false; 2942 2943 assert(nir_src_bit_size(intr->src[0]) == 32); 2944 unsigned num_components = nir_src_num_components(intr->src[0]); 2945 assert(num_components <= 4); 2946 const struct dxil_value *value[4]; 2947 for (unsigned i = 0; i < num_components; ++i) { 2948 value[i] = get_src(ctx, &intr->src[0], i, nir_type_uint); 2949 if (!value[i]) 2950 return false; 2951 } 2952 2953 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod); 2954 if (!int32_undef) 2955 return false; 2956 2957 const struct dxil_value *coord[2] = { 2958 offset, 2959 int32_undef 2960 }; 2961 2962 for (int i = num_components; i < 4; ++i) 2963 value[i] = int32_undef; 2964 2965 const struct dxil_value *write_mask = 2966 dxil_module_get_int8_const(&ctx->mod, (1u << num_components) - 1); 2967 if (!write_mask) 2968 return false; 2969 2970 return emit_bufferstore_call(ctx, handle, coord, value, write_mask, DXIL_I32); 2971} 2972 2973static bool 2974emit_store_ssbo_masked(struct ntd_context *ctx, nir_intrinsic_instr *intr) 2975{ 2976 const struct dxil_value *value = 2977 get_src(ctx, &intr->src[0], 0, nir_type_uint); 2978 const struct dxil_value *mask = 2979 get_src(ctx, &intr->src[1], 0, nir_type_uint); 2980 const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[2], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_RAW_BUFFER); 2981 const struct dxil_value *offset = 2982 get_src(ctx, &intr->src[3], 0, nir_type_uint); 2983 if (!value || !mask || !handle || !offset) 2984 return false; 2985 2986 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod); 2987 if (!int32_undef) 2988 return false; 2989 2990 const struct dxil_value *coord[3] = { 2991 offset, int32_undef, int32_undef 2992 }; 2993 2994 return 2995 emit_atomic_binop(ctx, handle, DXIL_ATOMIC_AND, coord, mask) != NULL && 2996 emit_atomic_binop(ctx, handle, DXIL_ATOMIC_OR, coord, value) != NULL; 2997} 2998 2999static bool 3000emit_store_shared(struct ntd_context *ctx, nir_intrinsic_instr *intr) 3001{ 3002 const struct dxil_value *zero, *index; 3003 3004 /* All shared mem accesses should have been lowered to scalar 32bit 3005 * accesses. 3006 */ 3007 assert(nir_src_bit_size(intr->src[0]) == 32); 3008 assert(nir_src_num_components(intr->src[0]) == 1); 3009 3010 zero = dxil_module_get_int32_const(&ctx->mod, 0); 3011 if (!zero) 3012 return false; 3013 3014 if (intr->intrinsic == nir_intrinsic_store_shared_dxil) 3015 index = get_src(ctx, &intr->src[1], 0, nir_type_uint); 3016 else 3017 index = get_src(ctx, &intr->src[2], 0, nir_type_uint); 3018 if (!index) 3019 return false; 3020 3021 const struct dxil_value *ops[] = { ctx->sharedvars, zero, index }; 3022 const struct dxil_value *ptr, *value; 3023 3024 ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops)); 3025 if (!ptr) 3026 return false; 3027 3028 value = get_src(ctx, &intr->src[0], 0, nir_type_uint); 3029 if (!value) 3030 return false; 3031 3032 if (intr->intrinsic == nir_intrinsic_store_shared_dxil) 3033 return dxil_emit_store(&ctx->mod, value, ptr, 4, false); 3034 3035 const struct dxil_value *mask = get_src(ctx, &intr->src[1], 0, nir_type_uint); 3036 if (!mask) 3037 return false; 3038 3039 if (!dxil_emit_atomicrmw(&ctx->mod, mask, ptr, DXIL_RMWOP_AND, false, 3040 DXIL_ATOMIC_ORDERING_ACQREL, 3041 DXIL_SYNC_SCOPE_CROSSTHREAD)) 3042 return false; 3043 3044 if (!dxil_emit_atomicrmw(&ctx->mod, value, ptr, DXIL_RMWOP_OR, false, 3045 DXIL_ATOMIC_ORDERING_ACQREL, 3046 DXIL_SYNC_SCOPE_CROSSTHREAD)) 3047 return false; 3048 3049 return true; 3050} 3051 3052static bool 3053emit_store_scratch(struct ntd_context *ctx, nir_intrinsic_instr *intr) 3054{ 3055 const struct dxil_value *zero, *index; 3056 3057 /* All scratch mem accesses should have been lowered to scalar 32bit 3058 * accesses. 3059 */ 3060 assert(nir_src_bit_size(intr->src[0]) == 32); 3061 assert(nir_src_num_components(intr->src[0]) == 1); 3062 3063 zero = dxil_module_get_int32_const(&ctx->mod, 0); 3064 if (!zero) 3065 return false; 3066 3067 index = get_src(ctx, &intr->src[1], 0, nir_type_uint); 3068 if (!index) 3069 return false; 3070 3071 const struct dxil_value *ops[] = { ctx->scratchvars, zero, index }; 3072 const struct dxil_value *ptr, *value; 3073 3074 ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops)); 3075 if (!ptr) 3076 return false; 3077 3078 value = get_src(ctx, &intr->src[0], 0, nir_type_uint); 3079 if (!value) 3080 return false; 3081 3082 return dxil_emit_store(&ctx->mod, value, ptr, 4, false); 3083} 3084 3085static bool 3086emit_load_ubo(struct ntd_context *ctx, nir_intrinsic_instr *intr) 3087{ 3088 const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_CBV, DXIL_RESOURCE_KIND_CBUFFER); 3089 if (!handle) 3090 return false; 3091 3092 const struct dxil_value *offset; 3093 nir_const_value *const_offset = nir_src_as_const_value(intr->src[1]); 3094 if (const_offset) { 3095 offset = dxil_module_get_int32_const(&ctx->mod, const_offset->i32 >> 4); 3096 } else { 3097 const struct dxil_value *offset_src = get_src(ctx, &intr->src[1], 0, nir_type_uint); 3098 const struct dxil_value *c4 = dxil_module_get_int32_const(&ctx->mod, 4); 3099 if (!offset_src || !c4) 3100 return false; 3101 3102 offset = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ASHR, offset_src, c4, 0); 3103 } 3104 3105 const struct dxil_value *agg = load_ubo(ctx, handle, offset, DXIL_F32); 3106 3107 if (!agg) 3108 return false; 3109 3110 for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) { 3111 const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, agg, i); 3112 store_dest(ctx, &intr->dest, i, retval, 3113 nir_dest_bit_size(intr->dest) > 1 ? nir_type_float : nir_type_bool); 3114 } 3115 return true; 3116} 3117 3118static bool 3119emit_load_ubo_dxil(struct ntd_context *ctx, nir_intrinsic_instr *intr) 3120{ 3121 assert(nir_dest_num_components(intr->dest) <= 4); 3122 assert(nir_dest_bit_size(intr->dest) == 32); 3123 3124 const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_CBV, DXIL_RESOURCE_KIND_CBUFFER); 3125 const struct dxil_value *offset = 3126 get_src(ctx, &intr->src[1], 0, nir_type_uint); 3127 3128 if (!handle || !offset) 3129 return false; 3130 3131 const struct dxil_value *agg = load_ubo(ctx, handle, offset, DXIL_I32); 3132 if (!agg) 3133 return false; 3134 3135 for (unsigned i = 0; i < nir_dest_num_components(intr->dest); i++) 3136 store_dest_value(ctx, &intr->dest, i, 3137 dxil_emit_extractval(&ctx->mod, agg, i)); 3138 3139 return true; 3140} 3141 3142/* Need to add patch-ness as a matching parameter, since driver_location is *not* unique 3143 * between control points and patch variables in HS/DS 3144 */ 3145static nir_variable * 3146find_patch_matching_variable_by_driver_location(nir_shader *s, nir_variable_mode mode, unsigned driver_location, bool patch) 3147{ 3148 nir_foreach_variable_with_modes(var, s, mode) { 3149 if (var->data.driver_location == driver_location && 3150 var->data.patch == patch) 3151 return var; 3152 } 3153 return NULL; 3154} 3155 3156static bool 3157emit_store_output_via_intrinsic(struct ntd_context *ctx, nir_intrinsic_instr *intr) 3158{ 3159 assert(intr->intrinsic == nir_intrinsic_store_output || 3160 ctx->mod.shader_kind == DXIL_HULL_SHADER); 3161 bool is_patch_constant = intr->intrinsic == nir_intrinsic_store_output && 3162 ctx->mod.shader_kind == DXIL_HULL_SHADER; 3163 nir_alu_type out_type = nir_intrinsic_src_type(intr); 3164 enum overload_type overload = get_overload(out_type, intr->src[0].ssa->bit_size); 3165 const struct dxil_func *func = dxil_get_function(&ctx->mod, is_patch_constant ? 3166 "dx.op.storePatchConstant" : "dx.op.storeOutput", 3167 overload); 3168 3169 if (!func) 3170 return false; 3171 3172 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, is_patch_constant ? 3173 DXIL_INTR_STORE_PATCH_CONSTANT : DXIL_INTR_STORE_OUTPUT); 3174 const struct dxil_value *output_id = dxil_module_get_int32_const(&ctx->mod, nir_intrinsic_base(intr)); 3175 unsigned row_index = intr->intrinsic == nir_intrinsic_store_output ? 1 : 2; 3176 3177 /* NIR has these as 1 row, N cols, but DXIL wants them as N rows, 1 col. We muck with these in the signature 3178 * generation, so muck with them here too. 3179 */ 3180 nir_io_semantics semantics = nir_intrinsic_io_semantics(intr); 3181 bool is_tess_level = is_patch_constant && 3182 (semantics.location == VARYING_SLOT_TESS_LEVEL_INNER || 3183 semantics.location == VARYING_SLOT_TESS_LEVEL_OUTER); 3184 3185 const struct dxil_value *row = NULL; 3186 const struct dxil_value *col = NULL; 3187 if (is_tess_level) 3188 col = dxil_module_get_int8_const(&ctx->mod, 0); 3189 else 3190 row = get_src(ctx, &intr->src[row_index], 0, nir_type_int); 3191 3192 bool success = true; 3193 uint32_t writemask = nir_intrinsic_write_mask(intr); 3194 3195 nir_variable *var = find_patch_matching_variable_by_driver_location(ctx->shader, nir_var_shader_out, nir_intrinsic_base(intr), is_patch_constant); 3196 unsigned var_base_component = var->data.location_frac; 3197 unsigned base_component = nir_intrinsic_component(intr) - var_base_component; 3198 3199 if (ctx->mod.minor_validator >= 5) { 3200 struct dxil_signature_record *sig_rec = is_patch_constant ? 3201 &ctx->mod.patch_consts[nir_intrinsic_base(intr)] : 3202 &ctx->mod.outputs[nir_intrinsic_base(intr)]; 3203 unsigned comp_size = intr->src[0].ssa->bit_size == 64 ? 2 : 1; 3204 unsigned comp_mask = 0; 3205 if (is_tess_level) 3206 comp_mask = 1; 3207 else if (comp_size == 1) 3208 comp_mask = writemask << var_base_component; 3209 else { 3210 for (unsigned i = 0; i < intr->num_components; ++i) 3211 if ((writemask & (1 << i))) 3212 comp_mask |= 3 << ((i + var_base_component) * comp_size); 3213 } 3214 for (unsigned r = 0; r < sig_rec->num_elements; ++r) 3215 sig_rec->elements[r].never_writes_mask &= ~comp_mask; 3216 3217 if (!nir_src_is_const(intr->src[row_index])) { 3218 struct dxil_psv_signature_element *psv_rec = is_patch_constant ? 3219 &ctx->mod.psv_patch_consts[nir_intrinsic_base(intr)] : 3220 &ctx->mod.psv_outputs[nir_intrinsic_base(intr)]; 3221 psv_rec->dynamic_mask_and_stream |= comp_mask; 3222 } 3223 } 3224 3225 for (unsigned i = 0; i < intr->num_components && success; ++i) { 3226 if (writemask & (1 << i)) { 3227 if (is_tess_level) 3228 row = dxil_module_get_int32_const(&ctx->mod, i + base_component); 3229 else 3230 col = dxil_module_get_int8_const(&ctx->mod, i + base_component); 3231 const struct dxil_value *value = get_src(ctx, &intr->src[0], i, out_type); 3232 if (!col || !row || !value) 3233 return false; 3234 3235 const struct dxil_value *args[] = { 3236 opcode, output_id, row, col, value 3237 }; 3238 success &= dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args)); 3239 } 3240 } 3241 3242 return success; 3243} 3244 3245static bool 3246emit_load_input_via_intrinsic(struct ntd_context *ctx, nir_intrinsic_instr *intr) 3247{ 3248 bool attr_at_vertex = false; 3249 if (ctx->mod.shader_kind == DXIL_PIXEL_SHADER && 3250 ctx->opts->interpolate_at_vertex && 3251 ctx->opts->provoking_vertex != 0 && 3252 (nir_intrinsic_dest_type(intr) & nir_type_float)) { 3253 nir_variable *var = nir_find_variable_with_driver_location(ctx->shader, nir_var_shader_in, nir_intrinsic_base(intr)); 3254 3255 attr_at_vertex = var && var->data.interpolation == INTERP_MODE_FLAT; 3256 } 3257 3258 bool is_patch_constant = (ctx->mod.shader_kind == DXIL_DOMAIN_SHADER && 3259 intr->intrinsic == nir_intrinsic_load_input) || 3260 (ctx->mod.shader_kind == DXIL_HULL_SHADER && 3261 intr->intrinsic == nir_intrinsic_load_output); 3262 bool is_output_control_point = intr->intrinsic == nir_intrinsic_load_per_vertex_output; 3263 3264 unsigned opcode_val; 3265 const char *func_name; 3266 if (attr_at_vertex) { 3267 opcode_val = DXIL_INTR_ATTRIBUTE_AT_VERTEX; 3268 func_name = "dx.op.attributeAtVertex"; 3269 if (ctx->mod.minor_validator >= 6) 3270 ctx->mod.feats.barycentrics = 1; 3271 } else if (is_patch_constant) { 3272 opcode_val = DXIL_INTR_LOAD_PATCH_CONSTANT; 3273 func_name = "dx.op.loadPatchConstant"; 3274 } else if (is_output_control_point) { 3275 opcode_val = DXIL_INTR_LOAD_OUTPUT_CONTROL_POINT; 3276 func_name = "dx.op.loadOutputControlPoint"; 3277 } else { 3278 opcode_val = DXIL_INTR_LOAD_INPUT; 3279 func_name = "dx.op.loadInput"; 3280 } 3281 3282 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, opcode_val); 3283 if (!opcode) 3284 return false; 3285 3286 const struct dxil_value *input_id = dxil_module_get_int32_const(&ctx->mod, 3287 is_patch_constant || is_output_control_point ? 3288 nir_intrinsic_base(intr) : 3289 ctx->mod.input_mappings[nir_intrinsic_base(intr)]); 3290 if (!input_id) 3291 return false; 3292 3293 bool is_per_vertex = 3294 intr->intrinsic == nir_intrinsic_load_per_vertex_input || 3295 intr->intrinsic == nir_intrinsic_load_per_vertex_output; 3296 int row_index = is_per_vertex ? 1 : 0; 3297 const struct dxil_value *vertex_id = NULL; 3298 if (!is_patch_constant) { 3299 if (is_per_vertex) { 3300 vertex_id = get_src(ctx, &intr->src[0], 0, nir_type_int); 3301 } else if (attr_at_vertex) { 3302 vertex_id = dxil_module_get_int8_const(&ctx->mod, ctx->opts->provoking_vertex); 3303 } else { 3304 const struct dxil_type *int32_type = dxil_module_get_int_type(&ctx->mod, 32); 3305 if (!int32_type) 3306 return false; 3307 3308 vertex_id = dxil_module_get_undef(&ctx->mod, int32_type); 3309 } 3310 if (!vertex_id) 3311 return false; 3312 } 3313 3314 /* NIR has these as 1 row, N cols, but DXIL wants them as N rows, 1 col. We muck with these in the signature 3315 * generation, so muck with them here too. 3316 */ 3317 nir_io_semantics semantics = nir_intrinsic_io_semantics(intr); 3318 bool is_tess_level = is_patch_constant && 3319 (semantics.location == VARYING_SLOT_TESS_LEVEL_INNER || 3320 semantics.location == VARYING_SLOT_TESS_LEVEL_OUTER); 3321 3322 const struct dxil_value *row = NULL; 3323 const struct dxil_value *comp = NULL; 3324 if (is_tess_level) 3325 comp = dxil_module_get_int8_const(&ctx->mod, 0); 3326 else 3327 row = get_src(ctx, &intr->src[row_index], 0, nir_type_int); 3328 3329 nir_alu_type out_type = nir_intrinsic_dest_type(intr); 3330 enum overload_type overload = get_overload(out_type, intr->dest.ssa.bit_size); 3331 3332 const struct dxil_func *func = dxil_get_function(&ctx->mod, func_name, overload); 3333 3334 if (!func) 3335 return false; 3336 3337 nir_variable *var = find_patch_matching_variable_by_driver_location(ctx->shader, nir_var_shader_in, nir_intrinsic_base(intr), is_patch_constant); 3338 unsigned var_base_component = var ? var->data.location_frac : 0; 3339 unsigned base_component = nir_intrinsic_component(intr) - var_base_component; 3340 3341 if (ctx->mod.minor_validator >= 5 && 3342 !is_output_control_point && 3343 intr->intrinsic != nir_intrinsic_load_output) { 3344 struct dxil_signature_record *sig_rec = is_patch_constant ? 3345 &ctx->mod.patch_consts[nir_intrinsic_base(intr)] : 3346 &ctx->mod.inputs[ctx->mod.input_mappings[nir_intrinsic_base(intr)]]; 3347 unsigned comp_size = intr->dest.ssa.bit_size == 64 ? 2 : 1; 3348 unsigned comp_mask = (1 << (intr->num_components * comp_size)) - 1; 3349 comp_mask <<= (var_base_component * comp_size); 3350 if (is_tess_level) 3351 comp_mask = 1; 3352 for (unsigned r = 0; r < sig_rec->num_elements; ++r) 3353 sig_rec->elements[r].always_reads_mask |= (comp_mask & sig_rec->elements[r].mask); 3354 3355 if (!nir_src_is_const(intr->src[row_index])) { 3356 struct dxil_psv_signature_element *psv_rec = is_patch_constant ? 3357 &ctx->mod.psv_patch_consts[nir_intrinsic_base(intr)] : 3358 &ctx->mod.psv_inputs[ctx->mod.input_mappings[nir_intrinsic_base(intr)]]; 3359 psv_rec->dynamic_mask_and_stream |= comp_mask; 3360 } 3361 } 3362 3363 for (unsigned i = 0; i < intr->num_components; ++i) { 3364 if (is_tess_level) 3365 row = dxil_module_get_int32_const(&ctx->mod, i + base_component); 3366 else 3367 comp = dxil_module_get_int8_const(&ctx->mod, i + base_component); 3368 3369 if (!row || !comp) 3370 return false; 3371 3372 const struct dxil_value *args[] = { 3373 opcode, input_id, row, comp, vertex_id 3374 }; 3375 3376 unsigned num_args = ARRAY_SIZE(args) - (is_patch_constant ? 1 : 0); 3377 const struct dxil_value *retval = dxil_emit_call(&ctx->mod, func, args, num_args); 3378 if (!retval) 3379 return false; 3380 store_dest(ctx, &intr->dest, i, retval, out_type); 3381 } 3382 return true; 3383} 3384 3385static bool 3386emit_load_interpolated_input(struct ntd_context *ctx, nir_intrinsic_instr *intr) 3387{ 3388 nir_intrinsic_instr *barycentric = nir_src_as_intrinsic(intr->src[0]); 3389 3390 const struct dxil_value *args[6] = { 0 }; 3391 3392 unsigned opcode_val; 3393 const char *func_name; 3394 unsigned num_args; 3395 switch (barycentric->intrinsic) { 3396 case nir_intrinsic_load_barycentric_at_offset: 3397 opcode_val = DXIL_INTR_EVAL_SNAPPED; 3398 func_name = "dx.op.evalSnapped"; 3399 num_args = 6; 3400 for (unsigned i = 0; i < 2; ++i) { 3401 const struct dxil_value *float_offset = get_src(ctx, &barycentric->src[0], i, nir_type_float); 3402 /* GLSL uses [-0.5f, 0.5f), DXIL uses (-8, 7) */ 3403 const struct dxil_value *offset_16 = dxil_emit_binop(&ctx->mod, 3404 DXIL_BINOP_MUL, float_offset, dxil_module_get_float_const(&ctx->mod, 16.0f), 0); 3405 args[i + 4] = dxil_emit_cast(&ctx->mod, DXIL_CAST_FPTOSI, 3406 dxil_module_get_int_type(&ctx->mod, 32), offset_16); 3407 } 3408 break; 3409 case nir_intrinsic_load_barycentric_pixel: 3410 opcode_val = DXIL_INTR_EVAL_SNAPPED; 3411 func_name = "dx.op.evalSnapped"; 3412 num_args = 6; 3413 args[4] = args[5] = dxil_module_get_int32_const(&ctx->mod, 0); 3414 break; 3415 case nir_intrinsic_load_barycentric_at_sample: 3416 opcode_val = DXIL_INTR_EVAL_SAMPLE_INDEX; 3417 func_name = "dx.op.evalSampleIndex"; 3418 num_args = 5; 3419 args[4] = get_src(ctx, &barycentric->src[0], 0, nir_type_int); 3420 break; 3421 case nir_intrinsic_load_barycentric_centroid: 3422 opcode_val = DXIL_INTR_EVAL_CENTROID; 3423 func_name = "dx.op.evalCentroid"; 3424 num_args = 4; 3425 break; 3426 default: 3427 unreachable("Unsupported interpolation barycentric intrinsic"); 3428 } 3429 args[0] = dxil_module_get_int32_const(&ctx->mod, opcode_val); 3430 args[1] = dxil_module_get_int32_const(&ctx->mod, nir_intrinsic_base(intr)); 3431 args[2] = get_src(ctx, &intr->src[1], 0, nir_type_int); 3432 3433 const struct dxil_func *func = dxil_get_function(&ctx->mod, func_name, DXIL_F32); 3434 3435 if (!func) 3436 return false; 3437 3438 nir_variable *var = find_patch_matching_variable_by_driver_location(ctx->shader, nir_var_shader_in, nir_intrinsic_base(intr), false); 3439 unsigned var_base_component = var ? var->data.location_frac : 0; 3440 unsigned base_component = nir_intrinsic_component(intr) - var_base_component; 3441 3442 if (ctx->mod.minor_validator >= 5) { 3443 struct dxil_signature_record *sig_rec = 3444 &ctx->mod.inputs[ctx->mod.input_mappings[nir_intrinsic_base(intr)]]; 3445 unsigned comp_size = intr->dest.ssa.bit_size == 64 ? 2 : 1; 3446 unsigned comp_mask = (1 << (intr->num_components * comp_size)) - 1; 3447 comp_mask <<= (var_base_component * comp_size); 3448 for (unsigned r = 0; r < sig_rec->num_elements; ++r) 3449 sig_rec->elements[r].always_reads_mask |= (comp_mask & sig_rec->elements[r].mask); 3450 3451 if (!nir_src_is_const(intr->src[1])) { 3452 struct dxil_psv_signature_element *psv_rec = 3453 &ctx->mod.psv_inputs[ctx->mod.input_mappings[nir_intrinsic_base(intr)]]; 3454 psv_rec->dynamic_mask_and_stream |= comp_mask; 3455 } 3456 } 3457 3458 for (unsigned i = 0; i < intr->num_components; ++i) { 3459 args[3] = dxil_module_get_int8_const(&ctx->mod, i + base_component); 3460 3461 const struct dxil_value *retval = dxil_emit_call(&ctx->mod, func, args, num_args); 3462 if (!retval) 3463 return false; 3464 store_dest(ctx, &intr->dest, i, retval, nir_type_float); 3465 } 3466 return true; 3467} 3468 3469static bool 3470emit_load_ptr(struct ntd_context *ctx, nir_intrinsic_instr *intr) 3471{ 3472 struct nir_variable *var = 3473 nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0])); 3474 3475 const struct dxil_value *index = 3476 get_src(ctx, &intr->src[1], 0, nir_type_uint); 3477 if (!index) 3478 return false; 3479 3480 const struct dxil_value *ptr = emit_gep_for_index(ctx, var, index); 3481 if (!ptr) 3482 return false; 3483 3484 const struct dxil_value *retval = 3485 dxil_emit_load(&ctx->mod, ptr, 4, false); 3486 if (!retval) 3487 return false; 3488 3489 store_dest(ctx, &intr->dest, 0, retval, nir_type_uint); 3490 return true; 3491} 3492 3493static bool 3494emit_load_shared(struct ntd_context *ctx, nir_intrinsic_instr *intr) 3495{ 3496 const struct dxil_value *zero, *index; 3497 unsigned bit_size = nir_dest_bit_size(intr->dest); 3498 unsigned align = bit_size / 8; 3499 3500 /* All shared mem accesses should have been lowered to scalar 32bit 3501 * accesses. 3502 */ 3503 assert(bit_size == 32); 3504 assert(nir_dest_num_components(intr->dest) == 1); 3505 3506 zero = dxil_module_get_int32_const(&ctx->mod, 0); 3507 if (!zero) 3508 return false; 3509 3510 index = get_src(ctx, &intr->src[0], 0, nir_type_uint); 3511 if (!index) 3512 return false; 3513 3514 const struct dxil_value *ops[] = { ctx->sharedvars, zero, index }; 3515 const struct dxil_value *ptr, *retval; 3516 3517 ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops)); 3518 if (!ptr) 3519 return false; 3520 3521 retval = dxil_emit_load(&ctx->mod, ptr, align, false); 3522 if (!retval) 3523 return false; 3524 3525 store_dest(ctx, &intr->dest, 0, retval, nir_type_uint); 3526 return true; 3527} 3528 3529static bool 3530emit_load_scratch(struct ntd_context *ctx, nir_intrinsic_instr *intr) 3531{ 3532 const struct dxil_value *zero, *index; 3533 unsigned bit_size = nir_dest_bit_size(intr->dest); 3534 unsigned align = bit_size / 8; 3535 3536 /* All scratch mem accesses should have been lowered to scalar 32bit 3537 * accesses. 3538 */ 3539 assert(bit_size == 32); 3540 assert(nir_dest_num_components(intr->dest) == 1); 3541 3542 zero = dxil_module_get_int32_const(&ctx->mod, 0); 3543 if (!zero) 3544 return false; 3545 3546 index = get_src(ctx, &intr->src[0], 0, nir_type_uint); 3547 if (!index) 3548 return false; 3549 3550 const struct dxil_value *ops[] = { ctx->scratchvars, zero, index }; 3551 const struct dxil_value *ptr, *retval; 3552 3553 ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops)); 3554 if (!ptr) 3555 return false; 3556 3557 retval = dxil_emit_load(&ctx->mod, ptr, align, false); 3558 if (!retval) 3559 return false; 3560 3561 store_dest(ctx, &intr->dest, 0, retval, nir_type_uint); 3562 return true; 3563} 3564 3565static bool 3566emit_discard_if_with_value(struct ntd_context *ctx, const struct dxil_value *value) 3567{ 3568 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_DISCARD); 3569 if (!opcode) 3570 return false; 3571 3572 const struct dxil_value *args[] = { 3573 opcode, 3574 value 3575 }; 3576 3577 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.discard", DXIL_NONE); 3578 if (!func) 3579 return false; 3580 3581 return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args)); 3582} 3583 3584static bool 3585emit_discard_if(struct ntd_context *ctx, nir_intrinsic_instr *intr) 3586{ 3587 const struct dxil_value *value = get_src(ctx, &intr->src[0], 0, nir_type_bool); 3588 if (!value) 3589 return false; 3590 3591 return emit_discard_if_with_value(ctx, value); 3592} 3593 3594static bool 3595emit_discard(struct ntd_context *ctx) 3596{ 3597 const struct dxil_value *value = dxil_module_get_int1_const(&ctx->mod, true); 3598 return emit_discard_if_with_value(ctx, value); 3599} 3600 3601static bool 3602emit_emit_vertex(struct ntd_context *ctx, nir_intrinsic_instr *intr) 3603{ 3604 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_EMIT_STREAM); 3605 const struct dxil_value *stream_id = dxil_module_get_int8_const(&ctx->mod, nir_intrinsic_stream_id(intr)); 3606 if (!opcode || !stream_id) 3607 return false; 3608 3609 const struct dxil_value *args[] = { 3610 opcode, 3611 stream_id 3612 }; 3613 3614 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.emitStream", DXIL_NONE); 3615 if (!func) 3616 return false; 3617 3618 return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args)); 3619} 3620 3621static bool 3622emit_end_primitive(struct ntd_context *ctx, nir_intrinsic_instr *intr) 3623{ 3624 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CUT_STREAM); 3625 const struct dxil_value *stream_id = dxil_module_get_int8_const(&ctx->mod, nir_intrinsic_stream_id(intr)); 3626 if (!opcode || !stream_id) 3627 return false; 3628 3629 const struct dxil_value *args[] = { 3630 opcode, 3631 stream_id 3632 }; 3633 3634 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.cutStream", DXIL_NONE); 3635 if (!func) 3636 return false; 3637 3638 return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args)); 3639} 3640 3641static bool 3642emit_image_store(struct ntd_context *ctx, nir_intrinsic_instr *intr) 3643{ 3644 const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D); 3645 if (!handle) 3646 return false; 3647 3648 bool is_array = false; 3649 if (intr->intrinsic == nir_intrinsic_image_deref_store) 3650 is_array = glsl_sampler_type_is_array(nir_src_as_deref(intr->src[0])->type); 3651 else 3652 is_array = nir_intrinsic_image_array(intr); 3653 3654 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod); 3655 if (!int32_undef) 3656 return false; 3657 3658 const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef }; 3659 enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_store ? 3660 nir_intrinsic_image_dim(intr) : 3661 glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type); 3662 unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim); 3663 if (is_array) 3664 ++num_coords; 3665 3666 assert(num_coords <= nir_src_num_components(intr->src[1])); 3667 for (unsigned i = 0; i < num_coords; ++i) { 3668 coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint); 3669 if (!coord[i]) 3670 return false; 3671 } 3672 3673 nir_alu_type in_type = nir_intrinsic_src_type(intr); 3674 enum overload_type overload = get_overload(in_type, 32); 3675 3676 assert(nir_src_bit_size(intr->src[3]) == 32); 3677 unsigned num_components = nir_src_num_components(intr->src[3]); 3678 assert(num_components <= 4); 3679 const struct dxil_value *value[4]; 3680 for (unsigned i = 0; i < num_components; ++i) { 3681 value[i] = get_src(ctx, &intr->src[3], i, in_type); 3682 if (!value[i]) 3683 return false; 3684 } 3685 3686 for (int i = num_components; i < 4; ++i) 3687 value[i] = int32_undef; 3688 3689 const struct dxil_value *write_mask = 3690 dxil_module_get_int8_const(&ctx->mod, (1u << num_components) - 1); 3691 if (!write_mask) 3692 return false; 3693 3694 if (image_dim == GLSL_SAMPLER_DIM_BUF) { 3695 coord[1] = int32_undef; 3696 return emit_bufferstore_call(ctx, handle, coord, value, write_mask, overload); 3697 } else 3698 return emit_texturestore_call(ctx, handle, coord, value, write_mask, overload); 3699} 3700 3701static bool 3702emit_image_load(struct ntd_context *ctx, nir_intrinsic_instr *intr) 3703{ 3704 const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D); 3705 if (!handle) 3706 return false; 3707 3708 bool is_array = false; 3709 if (intr->intrinsic == nir_intrinsic_image_deref_load) 3710 is_array = glsl_sampler_type_is_array(nir_src_as_deref(intr->src[0])->type); 3711 else 3712 is_array = nir_intrinsic_image_array(intr); 3713 3714 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod); 3715 if (!int32_undef) 3716 return false; 3717 3718 const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef }; 3719 enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_load ? 3720 nir_intrinsic_image_dim(intr) : 3721 glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type); 3722 unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim); 3723 if (is_array) 3724 ++num_coords; 3725 3726 assert(num_coords <= nir_src_num_components(intr->src[1])); 3727 for (unsigned i = 0; i < num_coords; ++i) { 3728 coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint); 3729 if (!coord[i]) 3730 return false; 3731 } 3732 3733 nir_alu_type out_type = nir_intrinsic_dest_type(intr); 3734 enum overload_type overload = get_overload(out_type, 32); 3735 3736 const struct dxil_value *load_result; 3737 if (image_dim == GLSL_SAMPLER_DIM_BUF) { 3738 coord[1] = int32_undef; 3739 load_result = emit_bufferload_call(ctx, handle, coord, overload); 3740 } else 3741 load_result = emit_textureload_call(ctx, handle, coord, overload); 3742 3743 if (!load_result) 3744 return false; 3745 3746 assert(nir_dest_bit_size(intr->dest) == 32); 3747 unsigned num_components = nir_dest_num_components(intr->dest); 3748 assert(num_components <= 4); 3749 for (unsigned i = 0; i < num_components; ++i) { 3750 const struct dxil_value *component = dxil_emit_extractval(&ctx->mod, load_result, i); 3751 if (!component) 3752 return false; 3753 store_dest(ctx, &intr->dest, i, component, out_type); 3754 } 3755 3756 /* FIXME: This flag should be set to true when the RWTexture is attached 3757 * a vector, and we always declare a vec4 right now, so it should always be 3758 * true. Might be worth reworking the dxil_module_get_res_type() to use a 3759 * scalar when the image only has one component. 3760 */ 3761 ctx->mod.feats.typed_uav_load_additional_formats = true; 3762 3763 return true; 3764} 3765 3766static bool 3767emit_image_atomic(struct ntd_context *ctx, nir_intrinsic_instr *intr, 3768 enum dxil_atomic_op op, nir_alu_type type) 3769{ 3770 const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D); 3771 if (!handle) 3772 return false; 3773 3774 bool is_array = false; 3775 nir_deref_instr *src_as_deref = nir_src_as_deref(intr->src[0]); 3776 if (src_as_deref) 3777 is_array = glsl_sampler_type_is_array(src_as_deref->type); 3778 else 3779 is_array = nir_intrinsic_image_array(intr); 3780 3781 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod); 3782 if (!int32_undef) 3783 return false; 3784 3785 const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef }; 3786 enum glsl_sampler_dim image_dim = src_as_deref ? 3787 glsl_get_sampler_dim(src_as_deref->type) : 3788 nir_intrinsic_image_dim(intr); 3789 unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim); 3790 if (is_array) 3791 ++num_coords; 3792 3793 assert(num_coords <= nir_src_num_components(intr->src[1])); 3794 for (unsigned i = 0; i < num_coords; ++i) { 3795 coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint); 3796 if (!coord[i]) 3797 return false; 3798 } 3799 3800 const struct dxil_value *value = get_src(ctx, &intr->src[3], 0, type); 3801 if (!value) 3802 return false; 3803 3804 const struct dxil_value *retval = 3805 emit_atomic_binop(ctx, handle, op, coord, value); 3806 3807 if (!retval) 3808 return false; 3809 3810 store_dest(ctx, &intr->dest, 0, retval, type); 3811 return true; 3812} 3813 3814static bool 3815emit_image_atomic_comp_swap(struct ntd_context *ctx, nir_intrinsic_instr *intr) 3816{ 3817 const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D); 3818 if (!handle) 3819 return false; 3820 3821 bool is_array = false; 3822 if (intr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap) 3823 is_array = glsl_sampler_type_is_array(nir_src_as_deref(intr->src[0])->type); 3824 else 3825 is_array = nir_intrinsic_image_array(intr); 3826 3827 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod); 3828 if (!int32_undef) 3829 return false; 3830 3831 const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef }; 3832 enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_atomic_comp_swap ? 3833 nir_intrinsic_image_dim(intr) : 3834 glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type); 3835 unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim); 3836 if (is_array) 3837 ++num_coords; 3838 3839 assert(num_coords <= nir_src_num_components(intr->src[1])); 3840 for (unsigned i = 0; i < num_coords; ++i) { 3841 coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint); 3842 if (!coord[i]) 3843 return false; 3844 } 3845 3846 const struct dxil_value *cmpval = get_src(ctx, &intr->src[3], 0, nir_type_uint); 3847 const struct dxil_value *newval = get_src(ctx, &intr->src[4], 0, nir_type_uint); 3848 if (!cmpval || !newval) 3849 return false; 3850 3851 const struct dxil_value *retval = 3852 emit_atomic_cmpxchg(ctx, handle, coord, cmpval, newval); 3853 3854 if (!retval) 3855 return false; 3856 3857 store_dest(ctx, &intr->dest, 0, retval, nir_type_uint); 3858 return true; 3859} 3860 3861struct texop_parameters { 3862 const struct dxil_value *tex; 3863 const struct dxil_value *sampler; 3864 const struct dxil_value *bias, *lod_or_sample, *min_lod; 3865 const struct dxil_value *coord[4], *offset[3], *dx[3], *dy[3]; 3866 const struct dxil_value *cmp; 3867 enum overload_type overload; 3868}; 3869 3870static const struct dxil_value * 3871emit_texture_size(struct ntd_context *ctx, struct texop_parameters *params) 3872{ 3873 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.getDimensions", DXIL_NONE); 3874 if (!func) 3875 return false; 3876 3877 const struct dxil_value *args[] = { 3878 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_SIZE), 3879 params->tex, 3880 params->lod_or_sample 3881 }; 3882 3883 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); 3884} 3885 3886static bool 3887emit_image_size(struct ntd_context *ctx, nir_intrinsic_instr *intr) 3888{ 3889 const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D); 3890 if (!handle) 3891 return false; 3892 3893 const struct dxil_value *lod = get_src(ctx, &intr->src[1], 0, nir_type_uint); 3894 if (!lod) 3895 return false; 3896 3897 struct texop_parameters params = { 3898 .tex = handle, 3899 .lod_or_sample = lod 3900 }; 3901 const struct dxil_value *dimensions = emit_texture_size(ctx, ¶ms); 3902 if (!dimensions) 3903 return false; 3904 3905 for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) { 3906 const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, dimensions, i); 3907 store_dest(ctx, &intr->dest, i, retval, nir_type_uint); 3908 } 3909 3910 return true; 3911} 3912 3913static bool 3914emit_get_ssbo_size(struct ntd_context *ctx, nir_intrinsic_instr *intr) 3915{ 3916 enum dxil_resource_class class = DXIL_RESOURCE_CLASS_UAV; 3917 if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) { 3918 nir_variable *var = nir_get_binding_variable(ctx->shader, nir_chase_binding(intr->src[0])); 3919 if (var && var->data.access & ACCESS_NON_WRITEABLE) 3920 class = DXIL_RESOURCE_CLASS_SRV; 3921 } 3922 3923 const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], class, DXIL_RESOURCE_KIND_RAW_BUFFER); 3924 if (!handle) 3925 return false; 3926 3927 struct texop_parameters params = { 3928 .tex = handle, 3929 .lod_or_sample = dxil_module_get_undef( 3930 &ctx->mod, dxil_module_get_int_type(&ctx->mod, 32)) 3931 }; 3932 3933 const struct dxil_value *dimensions = emit_texture_size(ctx, ¶ms); 3934 if (!dimensions) 3935 return false; 3936 3937 const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, dimensions, 0); 3938 store_dest(ctx, &intr->dest, 0, retval, nir_type_uint); 3939 3940 return true; 3941} 3942 3943static bool 3944emit_ssbo_atomic(struct ntd_context *ctx, nir_intrinsic_instr *intr, 3945 enum dxil_atomic_op op, nir_alu_type type) 3946{ 3947 const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_RAW_BUFFER); 3948 const struct dxil_value *offset = 3949 get_src(ctx, &intr->src[1], 0, nir_type_uint); 3950 const struct dxil_value *value = 3951 get_src(ctx, &intr->src[2], 0, type); 3952 3953 if (!value || !handle || !offset) 3954 return false; 3955 3956 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod); 3957 if (!int32_undef) 3958 return false; 3959 3960 const struct dxil_value *coord[3] = { 3961 offset, int32_undef, int32_undef 3962 }; 3963 3964 const struct dxil_value *retval = 3965 emit_atomic_binop(ctx, handle, op, coord, value); 3966 3967 if (!retval) 3968 return false; 3969 3970 store_dest(ctx, &intr->dest, 0, retval, type); 3971 return true; 3972} 3973 3974static bool 3975emit_ssbo_atomic_comp_swap(struct ntd_context *ctx, nir_intrinsic_instr *intr) 3976{ 3977 const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_RAW_BUFFER); 3978 const struct dxil_value *offset = 3979 get_src(ctx, &intr->src[1], 0, nir_type_uint); 3980 const struct dxil_value *cmpval = 3981 get_src(ctx, &intr->src[2], 0, nir_type_int); 3982 const struct dxil_value *newval = 3983 get_src(ctx, &intr->src[3], 0, nir_type_int); 3984 3985 if (!cmpval || !newval || !handle || !offset) 3986 return false; 3987 3988 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod); 3989 if (!int32_undef) 3990 return false; 3991 3992 const struct dxil_value *coord[3] = { 3993 offset, int32_undef, int32_undef 3994 }; 3995 3996 const struct dxil_value *retval = 3997 emit_atomic_cmpxchg(ctx, handle, coord, cmpval, newval); 3998 3999 if (!retval) 4000 return false; 4001 4002 store_dest(ctx, &intr->dest, 0, retval, nir_type_int); 4003 return true; 4004} 4005 4006static bool 4007emit_shared_atomic(struct ntd_context *ctx, nir_intrinsic_instr *intr, 4008 enum dxil_rmw_op op, nir_alu_type type) 4009{ 4010 const struct dxil_value *zero, *index; 4011 4012 assert(nir_src_bit_size(intr->src[1]) == 32); 4013 4014 zero = dxil_module_get_int32_const(&ctx->mod, 0); 4015 if (!zero) 4016 return false; 4017 4018 index = get_src(ctx, &intr->src[0], 0, nir_type_uint); 4019 if (!index) 4020 return false; 4021 4022 const struct dxil_value *ops[] = { ctx->sharedvars, zero, index }; 4023 const struct dxil_value *ptr, *value, *retval; 4024 4025 ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops)); 4026 if (!ptr) 4027 return false; 4028 4029 value = get_src(ctx, &intr->src[1], 0, type); 4030 if (!value) 4031 return false; 4032 4033 retval = dxil_emit_atomicrmw(&ctx->mod, value, ptr, op, false, 4034 DXIL_ATOMIC_ORDERING_ACQREL, 4035 DXIL_SYNC_SCOPE_CROSSTHREAD); 4036 if (!retval) 4037 return false; 4038 4039 store_dest(ctx, &intr->dest, 0, retval, type); 4040 return true; 4041} 4042 4043static bool 4044emit_shared_atomic_comp_swap(struct ntd_context *ctx, nir_intrinsic_instr *intr) 4045{ 4046 const struct dxil_value *zero, *index; 4047 4048 assert(nir_src_bit_size(intr->src[1]) == 32); 4049 4050 zero = dxil_module_get_int32_const(&ctx->mod, 0); 4051 if (!zero) 4052 return false; 4053 4054 index = get_src(ctx, &intr->src[0], 0, nir_type_uint); 4055 if (!index) 4056 return false; 4057 4058 const struct dxil_value *ops[] = { ctx->sharedvars, zero, index }; 4059 const struct dxil_value *ptr, *cmpval, *newval, *retval; 4060 4061 ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops)); 4062 if (!ptr) 4063 return false; 4064 4065 cmpval = get_src(ctx, &intr->src[1], 0, nir_type_uint); 4066 newval = get_src(ctx, &intr->src[2], 0, nir_type_uint); 4067 if (!cmpval || !newval) 4068 return false; 4069 4070 retval = dxil_emit_cmpxchg(&ctx->mod, cmpval, newval, ptr, false, 4071 DXIL_ATOMIC_ORDERING_ACQREL, 4072 DXIL_SYNC_SCOPE_CROSSTHREAD); 4073 if (!retval) 4074 return false; 4075 4076 store_dest(ctx, &intr->dest, 0, retval, nir_type_uint); 4077 return true; 4078} 4079 4080static bool 4081emit_vulkan_resource_index(struct ntd_context *ctx, nir_intrinsic_instr *intr) 4082{ 4083 unsigned int binding = nir_intrinsic_binding(intr); 4084 4085 bool const_index = nir_src_is_const(intr->src[0]); 4086 if (const_index) { 4087 binding += nir_src_as_const_value(intr->src[0])->u32; 4088 } 4089 4090 const struct dxil_value *index_value = dxil_module_get_int32_const(&ctx->mod, binding); 4091 if (!index_value) 4092 return false; 4093 4094 if (!const_index) { 4095 const struct dxil_value *offset = get_src(ctx, &intr->src[0], 0, nir_type_uint32); 4096 if (!offset) 4097 return false; 4098 4099 index_value = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD, index_value, offset, 0); 4100 if (!index_value) 4101 return false; 4102 } 4103 4104 store_dest(ctx, &intr->dest, 0, index_value, nir_type_uint32); 4105 store_dest(ctx, &intr->dest, 1, dxil_module_get_int32_const(&ctx->mod, 0), nir_type_uint32); 4106 return true; 4107} 4108 4109static bool 4110emit_load_vulkan_descriptor(struct ntd_context *ctx, nir_intrinsic_instr *intr) 4111{ 4112 nir_intrinsic_instr* index = nir_src_as_intrinsic(intr->src[0]); 4113 /* We currently do not support reindex */ 4114 assert(index && index->intrinsic == nir_intrinsic_vulkan_resource_index); 4115 4116 unsigned binding = nir_intrinsic_binding(index); 4117 unsigned space = nir_intrinsic_desc_set(index); 4118 4119 /* The descriptor_set field for variables is only 5 bits. We shouldn't have intrinsics trying to go beyond that. */ 4120 assert(space < 32); 4121 4122 nir_variable *var = nir_get_binding_variable(ctx->shader, nir_chase_binding(intr->src[0])); 4123 4124 const struct dxil_value *handle = NULL; 4125 enum dxil_resource_class resource_class; 4126 4127 switch (nir_intrinsic_desc_type(intr)) { 4128 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: 4129 resource_class = DXIL_RESOURCE_CLASS_CBV; 4130 break; 4131 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: 4132 if (var->data.access & ACCESS_NON_WRITEABLE) 4133 resource_class = DXIL_RESOURCE_CLASS_SRV; 4134 else 4135 resource_class = DXIL_RESOURCE_CLASS_UAV; 4136 break; 4137 default: 4138 unreachable("unknown descriptor type"); 4139 return false; 4140 } 4141 4142 const struct dxil_value *index_value = get_src(ctx, &intr->src[0], 0, nir_type_uint32); 4143 if (!index_value) 4144 return false; 4145 4146 handle = emit_createhandle_call(ctx, resource_class, 4147 get_resource_id(ctx, resource_class, space, binding), 4148 index_value, false); 4149 4150 store_dest_value(ctx, &intr->dest, 0, handle); 4151 store_dest(ctx, &intr->dest, 1, get_src(ctx, &intr->src[0], 1, nir_type_uint32), nir_type_uint32); 4152 4153 return true; 4154} 4155 4156static bool 4157emit_load_sample_pos_from_id(struct ntd_context *ctx, nir_intrinsic_instr *intr) 4158{ 4159 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.renderTargetGetSamplePosition", DXIL_NONE); 4160 if (!func) 4161 return false; 4162 4163 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_RENDER_TARGET_GET_SAMPLE_POSITION); 4164 if (!opcode) 4165 return false; 4166 4167 const struct dxil_value *args[] = { 4168 opcode, 4169 get_src(ctx, &intr->src[0], 0, nir_type_uint32), 4170 }; 4171 if (!args[1]) 4172 return false; 4173 4174 const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); 4175 if (!v) 4176 return false; 4177 4178 for (unsigned i = 0; i < 2; ++i) { 4179 /* GL coords go from 0 -> 1, D3D from -0.5 -> 0.5 */ 4180 const struct dxil_value *coord = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD, 4181 dxil_emit_extractval(&ctx->mod, v, i), 4182 dxil_module_get_float_const(&ctx->mod, 0.5f), 0); 4183 store_dest(ctx, &intr->dest, i, coord, nir_type_float32); 4184 } 4185 return true; 4186} 4187 4188static bool 4189emit_load_layer_id(struct ntd_context *ctx, nir_intrinsic_instr *intr) 4190{ 4191 const struct dxil_value *layer_id = dxil_module_get_int32_const(&ctx->mod, 0); 4192 /* TODO: Properly implement this once multi-view is supported */ 4193 store_dest_value(ctx, &intr->dest, 0, layer_id); 4194 return true; 4195} 4196 4197static bool 4198emit_load_sample_id(struct ntd_context *ctx, nir_intrinsic_instr *intr) 4199{ 4200 assert(ctx->mod.info.has_per_sample_input || 4201 intr->intrinsic == nir_intrinsic_load_sample_id_no_per_sample); 4202 4203 if (ctx->mod.info.has_per_sample_input) 4204 return emit_load_unary_external_function(ctx, intr, "dx.op.sampleIndex", 4205 DXIL_INTR_SAMPLE_INDEX); 4206 4207 store_dest_value(ctx, &intr->dest, 0, dxil_module_get_int32_const(&ctx->mod, 0)); 4208 return true; 4209} 4210 4211static bool 4212emit_intrinsic(struct ntd_context *ctx, nir_intrinsic_instr *intr) 4213{ 4214 switch (intr->intrinsic) { 4215 case nir_intrinsic_load_global_invocation_id: 4216 case nir_intrinsic_load_global_invocation_id_zero_base: 4217 return emit_load_global_invocation_id(ctx, intr); 4218 case nir_intrinsic_load_local_invocation_id: 4219 return emit_load_local_invocation_id(ctx, intr); 4220 case nir_intrinsic_load_local_invocation_index: 4221 return emit_load_local_invocation_index(ctx, intr); 4222 case nir_intrinsic_load_workgroup_id: 4223 case nir_intrinsic_load_workgroup_id_zero_base: 4224 return emit_load_local_workgroup_id(ctx, intr); 4225 case nir_intrinsic_load_ssbo: 4226 return emit_load_ssbo(ctx, intr); 4227 case nir_intrinsic_store_ssbo: 4228 return emit_store_ssbo(ctx, intr); 4229 case nir_intrinsic_store_ssbo_masked_dxil: 4230 return emit_store_ssbo_masked(ctx, intr); 4231 case nir_intrinsic_store_shared_dxil: 4232 case nir_intrinsic_store_shared_masked_dxil: 4233 return emit_store_shared(ctx, intr); 4234 case nir_intrinsic_store_scratch_dxil: 4235 return emit_store_scratch(ctx, intr); 4236 case nir_intrinsic_load_ptr_dxil: 4237 return emit_load_ptr(ctx, intr); 4238 case nir_intrinsic_load_ubo: 4239 return emit_load_ubo(ctx, intr); 4240 case nir_intrinsic_load_ubo_dxil: 4241 return emit_load_ubo_dxil(ctx, intr); 4242 case nir_intrinsic_load_primitive_id: 4243 return emit_load_unary_external_function(ctx, intr, "dx.op.primitiveID", 4244 DXIL_INTR_PRIMITIVE_ID); 4245 case nir_intrinsic_load_sample_id: 4246 case nir_intrinsic_load_sample_id_no_per_sample: 4247 return emit_load_sample_id(ctx, intr); 4248 case nir_intrinsic_load_invocation_id: 4249 switch (ctx->mod.shader_kind) { 4250 case DXIL_HULL_SHADER: 4251 return emit_load_unary_external_function(ctx, intr, "dx.op.outputControlPointID", 4252 DXIL_INTR_OUTPUT_CONTROL_POINT_ID); 4253 case DXIL_GEOMETRY_SHADER: 4254 return emit_load_unary_external_function(ctx, intr, "dx.op.gsInstanceID", 4255 DXIL_INTR_GS_INSTANCE_ID); 4256 default: 4257 unreachable("Unexpected shader kind for invocation ID"); 4258 } 4259 case nir_intrinsic_load_sample_mask_in: 4260 return emit_load_sample_mask_in(ctx, intr); 4261 case nir_intrinsic_load_tess_coord: 4262 return emit_load_tess_coord(ctx, intr); 4263 case nir_intrinsic_load_shared_dxil: 4264 return emit_load_shared(ctx, intr); 4265 case nir_intrinsic_load_scratch_dxil: 4266 return emit_load_scratch(ctx, intr); 4267 case nir_intrinsic_discard_if: 4268 case nir_intrinsic_demote_if: 4269 return emit_discard_if(ctx, intr); 4270 case nir_intrinsic_discard: 4271 case nir_intrinsic_demote: 4272 return emit_discard(ctx); 4273 case nir_intrinsic_emit_vertex: 4274 return emit_emit_vertex(ctx, intr); 4275 case nir_intrinsic_end_primitive: 4276 return emit_end_primitive(ctx, intr); 4277 case nir_intrinsic_scoped_barrier: 4278 return emit_barrier(ctx, intr); 4279 case nir_intrinsic_memory_barrier: 4280 case nir_intrinsic_memory_barrier_buffer: 4281 case nir_intrinsic_memory_barrier_image: 4282 case nir_intrinsic_memory_barrier_atomic_counter: 4283 return emit_memory_barrier(ctx, intr); 4284 case nir_intrinsic_memory_barrier_shared: 4285 return emit_memory_barrier_shared(ctx, intr); 4286 case nir_intrinsic_group_memory_barrier: 4287 return emit_group_memory_barrier(ctx, intr); 4288 case nir_intrinsic_control_barrier: 4289 return emit_control_barrier(ctx, intr); 4290 case nir_intrinsic_ssbo_atomic_add: 4291 return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_ADD, nir_type_int); 4292 case nir_intrinsic_ssbo_atomic_imin: 4293 return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_IMIN, nir_type_int); 4294 case nir_intrinsic_ssbo_atomic_umin: 4295 return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_UMIN, nir_type_uint); 4296 case nir_intrinsic_ssbo_atomic_imax: 4297 return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_IMAX, nir_type_int); 4298 case nir_intrinsic_ssbo_atomic_umax: 4299 return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_UMAX, nir_type_uint); 4300 case nir_intrinsic_ssbo_atomic_and: 4301 return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_AND, nir_type_uint); 4302 case nir_intrinsic_ssbo_atomic_or: 4303 return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_OR, nir_type_uint); 4304 case nir_intrinsic_ssbo_atomic_xor: 4305 return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_XOR, nir_type_uint); 4306 case nir_intrinsic_ssbo_atomic_exchange: 4307 return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_EXCHANGE, nir_type_int); 4308 case nir_intrinsic_ssbo_atomic_comp_swap: 4309 return emit_ssbo_atomic_comp_swap(ctx, intr); 4310 case nir_intrinsic_shared_atomic_add_dxil: 4311 return emit_shared_atomic(ctx, intr, DXIL_RMWOP_ADD, nir_type_int); 4312 case nir_intrinsic_shared_atomic_imin_dxil: 4313 return emit_shared_atomic(ctx, intr, DXIL_RMWOP_MIN, nir_type_int); 4314 case nir_intrinsic_shared_atomic_umin_dxil: 4315 return emit_shared_atomic(ctx, intr, DXIL_RMWOP_UMIN, nir_type_uint); 4316 case nir_intrinsic_shared_atomic_imax_dxil: 4317 return emit_shared_atomic(ctx, intr, DXIL_RMWOP_MAX, nir_type_int); 4318 case nir_intrinsic_shared_atomic_umax_dxil: 4319 return emit_shared_atomic(ctx, intr, DXIL_RMWOP_UMAX, nir_type_uint); 4320 case nir_intrinsic_shared_atomic_and_dxil: 4321 return emit_shared_atomic(ctx, intr, DXIL_RMWOP_AND, nir_type_uint); 4322 case nir_intrinsic_shared_atomic_or_dxil: 4323 return emit_shared_atomic(ctx, intr, DXIL_RMWOP_OR, nir_type_uint); 4324 case nir_intrinsic_shared_atomic_xor_dxil: 4325 return emit_shared_atomic(ctx, intr, DXIL_RMWOP_XOR, nir_type_uint); 4326 case nir_intrinsic_shared_atomic_exchange_dxil: 4327 return emit_shared_atomic(ctx, intr, DXIL_RMWOP_XCHG, nir_type_int); 4328 case nir_intrinsic_shared_atomic_comp_swap_dxil: 4329 return emit_shared_atomic_comp_swap(ctx, intr); 4330 case nir_intrinsic_image_deref_atomic_add: 4331 case nir_intrinsic_image_atomic_add: 4332 return emit_image_atomic(ctx, intr, DXIL_ATOMIC_ADD, nir_type_int); 4333 case nir_intrinsic_image_deref_atomic_imin: 4334 case nir_intrinsic_image_atomic_imin: 4335 return emit_image_atomic(ctx, intr, DXIL_ATOMIC_IMIN, nir_type_int); 4336 case nir_intrinsic_image_deref_atomic_umin: 4337 case nir_intrinsic_image_atomic_umin: 4338 return emit_image_atomic(ctx, intr, DXIL_ATOMIC_UMIN, nir_type_uint); 4339 case nir_intrinsic_image_deref_atomic_imax: 4340 case nir_intrinsic_image_atomic_imax: 4341 return emit_image_atomic(ctx, intr, DXIL_ATOMIC_IMAX, nir_type_int); 4342 case nir_intrinsic_image_deref_atomic_umax: 4343 case nir_intrinsic_image_atomic_umax: 4344 return emit_image_atomic(ctx, intr, DXIL_ATOMIC_IMAX, nir_type_uint); 4345 case nir_intrinsic_image_deref_atomic_and: 4346 case nir_intrinsic_image_atomic_and: 4347 return emit_image_atomic(ctx, intr, DXIL_ATOMIC_AND, nir_type_uint); 4348 case nir_intrinsic_image_deref_atomic_or: 4349 case nir_intrinsic_image_atomic_or: 4350 return emit_image_atomic(ctx, intr, DXIL_ATOMIC_OR, nir_type_uint); 4351 case nir_intrinsic_image_deref_atomic_xor: 4352 case nir_intrinsic_image_atomic_xor: 4353 return emit_image_atomic(ctx, intr, DXIL_ATOMIC_XOR, nir_type_uint); 4354 case nir_intrinsic_image_deref_atomic_exchange: 4355 case nir_intrinsic_image_atomic_exchange: 4356 return emit_image_atomic(ctx, intr, DXIL_ATOMIC_EXCHANGE, nir_type_uint); 4357 case nir_intrinsic_image_deref_atomic_comp_swap: 4358 case nir_intrinsic_image_atomic_comp_swap: 4359 return emit_image_atomic_comp_swap(ctx, intr); 4360 case nir_intrinsic_image_store: 4361 case nir_intrinsic_image_deref_store: 4362 return emit_image_store(ctx, intr); 4363 case nir_intrinsic_image_load: 4364 case nir_intrinsic_image_deref_load: 4365 return emit_image_load(ctx, intr); 4366 case nir_intrinsic_image_size: 4367 case nir_intrinsic_image_deref_size: 4368 return emit_image_size(ctx, intr); 4369 case nir_intrinsic_get_ssbo_size: 4370 return emit_get_ssbo_size(ctx, intr); 4371 case nir_intrinsic_load_input: 4372 case nir_intrinsic_load_per_vertex_input: 4373 case nir_intrinsic_load_output: 4374 case nir_intrinsic_load_per_vertex_output: 4375 return emit_load_input_via_intrinsic(ctx, intr); 4376 case nir_intrinsic_store_output: 4377 case nir_intrinsic_store_per_vertex_output: 4378 return emit_store_output_via_intrinsic(ctx, intr); 4379 4380 case nir_intrinsic_load_barycentric_at_offset: 4381 case nir_intrinsic_load_barycentric_at_sample: 4382 case nir_intrinsic_load_barycentric_centroid: 4383 case nir_intrinsic_load_barycentric_pixel: 4384 /* Emit nothing, we only support these as inputs to load_interpolated_input */ 4385 return true; 4386 case nir_intrinsic_load_interpolated_input: 4387 return emit_load_interpolated_input(ctx, intr); 4388 break; 4389 4390 case nir_intrinsic_vulkan_resource_index: 4391 return emit_vulkan_resource_index(ctx, intr); 4392 case nir_intrinsic_load_vulkan_descriptor: 4393 return emit_load_vulkan_descriptor(ctx, intr); 4394 case nir_intrinsic_load_layer_id: 4395 return emit_load_layer_id(ctx, intr); 4396 4397 case nir_intrinsic_load_sample_pos_from_id: 4398 return emit_load_sample_pos_from_id(ctx, intr); 4399 4400 case nir_intrinsic_load_num_workgroups: 4401 case nir_intrinsic_load_workgroup_size: 4402 default: 4403 NIR_INSTR_UNSUPPORTED(&intr->instr); 4404 unreachable("Unimplemented intrinsic instruction"); 4405 return false; 4406 } 4407} 4408 4409static bool 4410emit_load_const(struct ntd_context *ctx, nir_load_const_instr *load_const) 4411{ 4412 for (int i = 0; i < load_const->def.num_components; ++i) { 4413 const struct dxil_value *value; 4414 switch (load_const->def.bit_size) { 4415 case 1: 4416 value = dxil_module_get_int1_const(&ctx->mod, 4417 load_const->value[i].b); 4418 break; 4419 case 16: 4420 ctx->mod.feats.native_low_precision = true; 4421 value = dxil_module_get_int16_const(&ctx->mod, 4422 load_const->value[i].u16); 4423 break; 4424 case 32: 4425 value = dxil_module_get_int32_const(&ctx->mod, 4426 load_const->value[i].u32); 4427 break; 4428 case 64: 4429 ctx->mod.feats.int64_ops = true; 4430 value = dxil_module_get_int64_const(&ctx->mod, 4431 load_const->value[i].u64); 4432 break; 4433 default: 4434 unreachable("unexpected bit_size"); 4435 } 4436 if (!value) 4437 return false; 4438 4439 store_ssa_def(ctx, &load_const->def, i, value); 4440 } 4441 return true; 4442} 4443 4444static bool 4445emit_deref(struct ntd_context* ctx, nir_deref_instr* instr) 4446{ 4447 assert(instr->deref_type == nir_deref_type_var || 4448 instr->deref_type == nir_deref_type_array); 4449 4450 /* In the CL environment, there's nothing to emit. Any references to 4451 * derefs will emit the necessary logic to handle scratch/shared GEP addressing 4452 */ 4453 if (ctx->opts->environment == DXIL_ENVIRONMENT_CL) 4454 return true; 4455 4456 /* In the Vulkan environment, we don't have cached handles for textures or 4457 * samplers, so let's use the opportunity of walking through the derefs to 4458 * emit those. 4459 */ 4460 nir_variable *var = nir_deref_instr_get_variable(instr); 4461 assert(var); 4462 4463 if (!glsl_type_is_sampler(glsl_without_array(var->type)) && 4464 !glsl_type_is_image(glsl_without_array(var->type)) && 4465 !glsl_type_is_texture(glsl_without_array(var->type))) 4466 return true; 4467 4468 const struct glsl_type *type = instr->type; 4469 const struct dxil_value *binding; 4470 unsigned binding_val = ctx->opts->environment == DXIL_ENVIRONMENT_GL ? 4471 var->data.driver_location : var->data.binding; 4472 4473 if (instr->deref_type == nir_deref_type_var) { 4474 binding = dxil_module_get_int32_const(&ctx->mod, binding_val); 4475 } else { 4476 const struct dxil_value *base = get_src(ctx, &instr->parent, 0, nir_type_uint32); 4477 const struct dxil_value *offset = get_src(ctx, &instr->arr.index, 0, nir_type_uint32); 4478 if (!base || !offset) 4479 return false; 4480 4481 if (glsl_type_is_array(instr->type)) { 4482 offset = dxil_emit_binop(&ctx->mod, DXIL_BINOP_MUL, offset, 4483 dxil_module_get_int32_const(&ctx->mod, glsl_get_aoa_size(instr->type)), 0); 4484 if (!offset) 4485 return false; 4486 } 4487 binding = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD, base, offset, 0); 4488 } 4489 4490 if (!binding) 4491 return false; 4492 4493 /* Haven't finished chasing the deref chain yet, just store the value */ 4494 if (glsl_type_is_array(type)) { 4495 store_dest(ctx, &instr->dest, 0, binding, nir_type_uint32); 4496 return true; 4497 } 4498 4499 assert(glsl_type_is_sampler(type) || glsl_type_is_image(type) || glsl_type_is_texture(type)); 4500 enum dxil_resource_class res_class; 4501 if (glsl_type_is_image(type)) { 4502 if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN && 4503 (var->data.access & ACCESS_NON_WRITEABLE)) 4504 res_class = DXIL_RESOURCE_CLASS_SRV; 4505 else 4506 res_class = DXIL_RESOURCE_CLASS_UAV; 4507 } else if (glsl_type_is_sampler(type)) { 4508 res_class = DXIL_RESOURCE_CLASS_SAMPLER; 4509 } else { 4510 res_class = DXIL_RESOURCE_CLASS_SRV; 4511 } 4512 4513 unsigned descriptor_set = ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN ? 4514 var->data.descriptor_set : (glsl_type_is_image(type) ? 1 : 0); 4515 const struct dxil_value *handle = emit_createhandle_call(ctx, res_class, 4516 get_resource_id(ctx, res_class, descriptor_set, binding_val), binding, false); 4517 if (!handle) 4518 return false; 4519 4520 store_dest_value(ctx, &instr->dest, 0, handle); 4521 return true; 4522} 4523 4524static bool 4525emit_cond_branch(struct ntd_context *ctx, const struct dxil_value *cond, 4526 int true_block, int false_block) 4527{ 4528 assert(cond); 4529 assert(true_block >= 0); 4530 assert(false_block >= 0); 4531 return dxil_emit_branch(&ctx->mod, cond, true_block, false_block); 4532} 4533 4534static bool 4535emit_branch(struct ntd_context *ctx, int block) 4536{ 4537 assert(block >= 0); 4538 return dxil_emit_branch(&ctx->mod, NULL, block, -1); 4539} 4540 4541static bool 4542emit_jump(struct ntd_context *ctx, nir_jump_instr *instr) 4543{ 4544 switch (instr->type) { 4545 case nir_jump_break: 4546 case nir_jump_continue: 4547 assert(instr->instr.block->successors[0]); 4548 assert(!instr->instr.block->successors[1]); 4549 return emit_branch(ctx, instr->instr.block->successors[0]->index); 4550 4551 default: 4552 unreachable("Unsupported jump type\n"); 4553 } 4554} 4555 4556struct phi_block { 4557 unsigned num_components; 4558 struct dxil_instr *comp[NIR_MAX_VEC_COMPONENTS]; 4559}; 4560 4561static bool 4562emit_phi(struct ntd_context *ctx, nir_phi_instr *instr) 4563{ 4564 unsigned bit_size = nir_dest_bit_size(instr->dest); 4565 const struct dxil_type *type = dxil_module_get_int_type(&ctx->mod, 4566 bit_size); 4567 4568 struct phi_block *vphi = ralloc(ctx->phis, struct phi_block); 4569 vphi->num_components = nir_dest_num_components(instr->dest); 4570 4571 for (unsigned i = 0; i < vphi->num_components; ++i) { 4572 struct dxil_instr *phi = vphi->comp[i] = dxil_emit_phi(&ctx->mod, type); 4573 if (!phi) 4574 return false; 4575 store_dest_value(ctx, &instr->dest, i, dxil_instr_get_return_value(phi)); 4576 } 4577 _mesa_hash_table_insert(ctx->phis, instr, vphi); 4578 return true; 4579} 4580 4581static bool 4582fixup_phi(struct ntd_context *ctx, nir_phi_instr *instr, 4583 struct phi_block *vphi) 4584{ 4585 const struct dxil_value *values[16]; 4586 unsigned blocks[16]; 4587 for (unsigned i = 0; i < vphi->num_components; ++i) { 4588 size_t num_incoming = 0; 4589 nir_foreach_phi_src(src, instr) { 4590 assert(src->src.is_ssa); 4591 const struct dxil_value *val = get_src_ssa(ctx, src->src.ssa, i); 4592 values[num_incoming] = val; 4593 blocks[num_incoming] = src->pred->index; 4594 ++num_incoming; 4595 if (num_incoming == ARRAY_SIZE(values)) { 4596 if (!dxil_phi_add_incoming(vphi->comp[i], values, blocks, 4597 num_incoming)) 4598 return false; 4599 num_incoming = 0; 4600 } 4601 } 4602 if (num_incoming > 0 && !dxil_phi_add_incoming(vphi->comp[i], values, 4603 blocks, num_incoming)) 4604 return false; 4605 } 4606 return true; 4607} 4608 4609static unsigned 4610get_n_src(struct ntd_context *ctx, const struct dxil_value **values, 4611 unsigned max_components, nir_tex_src *src, nir_alu_type type) 4612{ 4613 unsigned num_components = nir_src_num_components(src->src); 4614 unsigned i = 0; 4615 4616 assert(num_components <= max_components); 4617 4618 for (i = 0; i < num_components; ++i) { 4619 values[i] = get_src(ctx, &src->src, i, type); 4620 if (!values[i]) 4621 return 0; 4622 } 4623 4624 return num_components; 4625} 4626 4627#define PAD_SRC(ctx, array, components, undef) \ 4628 for (unsigned i = components; i < ARRAY_SIZE(array); ++i) { \ 4629 array[i] = undef; \ 4630 } 4631 4632static const struct dxil_value * 4633emit_sample(struct ntd_context *ctx, struct texop_parameters *params) 4634{ 4635 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sample", params->overload); 4636 if (!func) 4637 return NULL; 4638 4639 const struct dxil_value *args[11] = { 4640 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE), 4641 params->tex, params->sampler, 4642 params->coord[0], params->coord[1], params->coord[2], params->coord[3], 4643 params->offset[0], params->offset[1], params->offset[2], 4644 params->min_lod 4645 }; 4646 4647 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); 4648} 4649 4650static const struct dxil_value * 4651emit_sample_bias(struct ntd_context *ctx, struct texop_parameters *params) 4652{ 4653 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleBias", params->overload); 4654 if (!func) 4655 return NULL; 4656 4657 assert(params->bias != NULL); 4658 4659 const struct dxil_value *args[12] = { 4660 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_BIAS), 4661 params->tex, params->sampler, 4662 params->coord[0], params->coord[1], params->coord[2], params->coord[3], 4663 params->offset[0], params->offset[1], params->offset[2], 4664 params->bias, params->min_lod 4665 }; 4666 4667 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); 4668} 4669 4670static const struct dxil_value * 4671emit_sample_level(struct ntd_context *ctx, struct texop_parameters *params) 4672{ 4673 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleLevel", params->overload); 4674 if (!func) 4675 return NULL; 4676 4677 assert(params->lod_or_sample != NULL); 4678 4679 const struct dxil_value *args[11] = { 4680 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_LEVEL), 4681 params->tex, params->sampler, 4682 params->coord[0], params->coord[1], params->coord[2], params->coord[3], 4683 params->offset[0], params->offset[1], params->offset[2], 4684 params->lod_or_sample 4685 }; 4686 4687 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); 4688} 4689 4690static const struct dxil_value * 4691emit_sample_cmp(struct ntd_context *ctx, struct texop_parameters *params) 4692{ 4693 const struct dxil_func *func; 4694 enum dxil_intr opcode; 4695 int numparam; 4696 4697 if (ctx->mod.shader_kind == DXIL_PIXEL_SHADER) { 4698 func = dxil_get_function(&ctx->mod, "dx.op.sampleCmp", DXIL_F32); 4699 opcode = DXIL_INTR_SAMPLE_CMP; 4700 numparam = 12; 4701 } else { 4702 func = dxil_get_function(&ctx->mod, "dx.op.sampleCmpLevelZero", DXIL_F32); 4703 opcode = DXIL_INTR_SAMPLE_CMP_LVL_ZERO; 4704 numparam = 11; 4705 } 4706 4707 if (!func) 4708 return NULL; 4709 4710 const struct dxil_value *args[12] = { 4711 dxil_module_get_int32_const(&ctx->mod, opcode), 4712 params->tex, params->sampler, 4713 params->coord[0], params->coord[1], params->coord[2], params->coord[3], 4714 params->offset[0], params->offset[1], params->offset[2], 4715 params->cmp, params->min_lod 4716 }; 4717 4718 return dxil_emit_call(&ctx->mod, func, args, numparam); 4719} 4720 4721static const struct dxil_value * 4722emit_sample_grad(struct ntd_context *ctx, struct texop_parameters *params) 4723{ 4724 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleGrad", params->overload); 4725 if (!func) 4726 return false; 4727 4728 const struct dxil_value *args[17] = { 4729 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_GRAD), 4730 params->tex, params->sampler, 4731 params->coord[0], params->coord[1], params->coord[2], params->coord[3], 4732 params->offset[0], params->offset[1], params->offset[2], 4733 params->dx[0], params->dx[1], params->dx[2], 4734 params->dy[0], params->dy[1], params->dy[2], 4735 params->min_lod 4736 }; 4737 4738 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); 4739} 4740 4741static const struct dxil_value * 4742emit_texel_fetch(struct ntd_context *ctx, struct texop_parameters *params) 4743{ 4744 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.textureLoad", params->overload); 4745 if (!func) 4746 return false; 4747 4748 if (!params->lod_or_sample) 4749 params->lod_or_sample = dxil_module_get_undef(&ctx->mod, dxil_module_get_int_type(&ctx->mod, 32)); 4750 4751 const struct dxil_value *args[] = { 4752 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_LOAD), 4753 params->tex, 4754 params->lod_or_sample, params->coord[0], params->coord[1], params->coord[2], 4755 params->offset[0], params->offset[1], params->offset[2] 4756 }; 4757 4758 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); 4759} 4760 4761static const struct dxil_value * 4762emit_texture_lod(struct ntd_context *ctx, struct texop_parameters *params, bool clamped) 4763{ 4764 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.calculateLOD", DXIL_F32); 4765 if (!func) 4766 return false; 4767 4768 const struct dxil_value *args[] = { 4769 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_LOD), 4770 params->tex, 4771 params->sampler, 4772 params->coord[0], 4773 params->coord[1], 4774 params->coord[2], 4775 dxil_module_get_int1_const(&ctx->mod, clamped ? 1 : 0) 4776 }; 4777 4778 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); 4779} 4780 4781static const struct dxil_value * 4782emit_texture_gather(struct ntd_context *ctx, struct texop_parameters *params, unsigned component) 4783{ 4784 const struct dxil_func *func = dxil_get_function(&ctx->mod, 4785 params->cmp ? "dx.op.textureGatherCmp" : "dx.op.textureGather", params->overload); 4786 if (!func) 4787 return false; 4788 4789 const struct dxil_value *args[] = { 4790 dxil_module_get_int32_const(&ctx->mod, params->cmp ? 4791 DXIL_INTR_TEXTURE_GATHER_CMP : DXIL_INTR_TEXTURE_GATHER), 4792 params->tex, 4793 params->sampler, 4794 params->coord[0], 4795 params->coord[1], 4796 params->coord[2], 4797 params->coord[3], 4798 params->offset[0], 4799 params->offset[1], 4800 dxil_module_get_int32_const(&ctx->mod, component), 4801 params->cmp 4802 }; 4803 4804 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args) - (params->cmp ? 0 : 1)); 4805} 4806 4807static bool 4808emit_tex(struct ntd_context *ctx, nir_tex_instr *instr) 4809{ 4810 struct texop_parameters params; 4811 memset(¶ms, 0, sizeof(struct texop_parameters)); 4812 if (ctx->opts->environment != DXIL_ENVIRONMENT_VULKAN) { 4813 params.tex = ctx->srv_handles[instr->texture_index]; 4814 params.sampler = ctx->sampler_handles[instr->sampler_index]; 4815 } 4816 4817 const struct dxil_type *int_type = dxil_module_get_int_type(&ctx->mod, 32); 4818 const struct dxil_type *float_type = dxil_module_get_float_type(&ctx->mod, 32); 4819 const struct dxil_value *int_undef = dxil_module_get_undef(&ctx->mod, int_type); 4820 const struct dxil_value *float_undef = dxil_module_get_undef(&ctx->mod, float_type); 4821 4822 unsigned coord_components = 0, offset_components = 0, dx_components = 0, dy_components = 0; 4823 params.overload = get_overload(instr->dest_type, 32); 4824 4825 for (unsigned i = 0; i < instr->num_srcs; i++) { 4826 nir_alu_type type = nir_tex_instr_src_type(instr, i); 4827 4828 switch (instr->src[i].src_type) { 4829 case nir_tex_src_coord: 4830 coord_components = get_n_src(ctx, params.coord, ARRAY_SIZE(params.coord), 4831 &instr->src[i], type); 4832 if (!coord_components) 4833 return false; 4834 break; 4835 4836 case nir_tex_src_offset: 4837 offset_components = get_n_src(ctx, params.offset, ARRAY_SIZE(params.offset), 4838 &instr->src[i], nir_type_int); 4839 if (!offset_components) 4840 return false; 4841 break; 4842 4843 case nir_tex_src_bias: 4844 assert(instr->op == nir_texop_txb); 4845 assert(nir_src_num_components(instr->src[i].src) == 1); 4846 params.bias = get_src(ctx, &instr->src[i].src, 0, nir_type_float); 4847 if (!params.bias) 4848 return false; 4849 break; 4850 4851 case nir_tex_src_lod: 4852 assert(nir_src_num_components(instr->src[i].src) == 1); 4853 if (instr->op == nir_texop_txf_ms) { 4854 assert(nir_src_as_int(instr->src[i].src) == 0); 4855 break; 4856 } 4857 4858 /* Buffers don't have a LOD */ 4859 if (instr->sampler_dim != GLSL_SAMPLER_DIM_BUF) 4860 params.lod_or_sample = get_src(ctx, &instr->src[i].src, 0, type); 4861 else 4862 params.lod_or_sample = int_undef; 4863 if (!params.lod_or_sample) 4864 return false; 4865 break; 4866 4867 case nir_tex_src_min_lod: 4868 assert(nir_src_num_components(instr->src[i].src) == 1); 4869 params.min_lod = get_src(ctx, &instr->src[i].src, 0, type); 4870 if (!params.min_lod) 4871 return false; 4872 break; 4873 4874 case nir_tex_src_comparator: 4875 assert(nir_src_num_components(instr->src[i].src) == 1); 4876 params.cmp = get_src(ctx, &instr->src[i].src, 0, nir_type_float); 4877 if (!params.cmp) 4878 return false; 4879 break; 4880 4881 case nir_tex_src_ddx: 4882 dx_components = get_n_src(ctx, params.dx, ARRAY_SIZE(params.dx), 4883 &instr->src[i], nir_type_float); 4884 if (!dx_components) 4885 return false; 4886 break; 4887 4888 case nir_tex_src_ddy: 4889 dy_components = get_n_src(ctx, params.dy, ARRAY_SIZE(params.dy), 4890 &instr->src[i], nir_type_float); 4891 if (!dy_components) 4892 return false; 4893 break; 4894 4895 case nir_tex_src_ms_index: 4896 params.lod_or_sample = get_src(ctx, &instr->src[i].src, 0, nir_type_int); 4897 if (!params.lod_or_sample) 4898 return false; 4899 break; 4900 4901 case nir_tex_src_texture_deref: 4902 assert(ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN); 4903 params.tex = get_src_ssa(ctx, instr->src[i].src.ssa, 0); 4904 break; 4905 4906 case nir_tex_src_sampler_deref: 4907 assert(ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN); 4908 params.sampler = get_src_ssa(ctx, instr->src[i].src.ssa, 0); 4909 break; 4910 4911 case nir_tex_src_texture_offset: 4912 params.tex = emit_createhandle_call(ctx, DXIL_RESOURCE_CLASS_SRV, 4913 get_resource_id(ctx, DXIL_RESOURCE_CLASS_SRV, 0, instr->texture_index), 4914 dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD, 4915 get_src_ssa(ctx, instr->src[i].src.ssa, 0), 4916 dxil_module_get_int32_const(&ctx->mod, instr->texture_index), 0), 4917 instr->texture_non_uniform); 4918 break; 4919 4920 case nir_tex_src_sampler_offset: 4921 if (nir_tex_instr_need_sampler(instr)) { 4922 params.sampler = emit_createhandle_call(ctx, DXIL_RESOURCE_CLASS_SAMPLER, 4923 get_resource_id(ctx, DXIL_RESOURCE_CLASS_SAMPLER, 0, instr->sampler_index), 4924 dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD, 4925 get_src_ssa(ctx, instr->src[i].src.ssa, 0), 4926 dxil_module_get_int32_const(&ctx->mod, instr->sampler_index), 0), 4927 instr->sampler_non_uniform); 4928 } 4929 break; 4930 4931 case nir_tex_src_projector: 4932 unreachable("Texture projector should have been lowered"); 4933 4934 default: 4935 fprintf(stderr, "texture source: %d\n", instr->src[i].src_type); 4936 unreachable("unknown texture source"); 4937 } 4938 } 4939 4940 assert(params.tex != NULL); 4941 assert(instr->op == nir_texop_txf || 4942 instr->op == nir_texop_txf_ms || 4943 nir_tex_instr_is_query(instr) || 4944 params.sampler != NULL); 4945 4946 PAD_SRC(ctx, params.coord, coord_components, float_undef); 4947 PAD_SRC(ctx, params.offset, offset_components, int_undef); 4948 if (!params.min_lod) params.min_lod = float_undef; 4949 4950 const struct dxil_value *sample = NULL; 4951 switch (instr->op) { 4952 case nir_texop_txb: 4953 sample = emit_sample_bias(ctx, ¶ms); 4954 break; 4955 4956 case nir_texop_tex: 4957 if (params.cmp != NULL) { 4958 sample = emit_sample_cmp(ctx, ¶ms); 4959 break; 4960 } else if (ctx->mod.shader_kind == DXIL_PIXEL_SHADER) { 4961 sample = emit_sample(ctx, ¶ms); 4962 break; 4963 } 4964 params.lod_or_sample = dxil_module_get_float_const(&ctx->mod, 0); 4965 FALLTHROUGH; 4966 case nir_texop_txl: 4967 sample = emit_sample_level(ctx, ¶ms); 4968 break; 4969 4970 case nir_texop_txd: 4971 PAD_SRC(ctx, params.dx, dx_components, float_undef); 4972 PAD_SRC(ctx, params.dy, dy_components,float_undef); 4973 sample = emit_sample_grad(ctx, ¶ms); 4974 break; 4975 4976 case nir_texop_txf: 4977 case nir_texop_txf_ms: 4978 if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) { 4979 params.coord[1] = int_undef; 4980 sample = emit_bufferload_call(ctx, params.tex, params.coord, params.overload); 4981 } else { 4982 PAD_SRC(ctx, params.coord, coord_components, int_undef); 4983 sample = emit_texel_fetch(ctx, ¶ms); 4984 } 4985 break; 4986 4987 case nir_texop_txs: 4988 sample = emit_texture_size(ctx, ¶ms); 4989 break; 4990 4991 case nir_texop_tg4: 4992 sample = emit_texture_gather(ctx, ¶ms, instr->component); 4993 break; 4994 4995 case nir_texop_lod: 4996 sample = emit_texture_lod(ctx, ¶ms, true); 4997 store_dest(ctx, &instr->dest, 0, sample, nir_alu_type_get_base_type(instr->dest_type)); 4998 sample = emit_texture_lod(ctx, ¶ms, false); 4999 store_dest(ctx, &instr->dest, 1, sample, nir_alu_type_get_base_type(instr->dest_type)); 5000 return true; 5001 5002 case nir_texop_query_levels: 5003 params.lod_or_sample = dxil_module_get_int_const(&ctx->mod, 0, 32); 5004 sample = emit_texture_size(ctx, ¶ms); 5005 const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, sample, 3); 5006 store_dest(ctx, &instr->dest, 0, retval, nir_alu_type_get_base_type(instr->dest_type)); 5007 return true; 5008 5009 default: 5010 fprintf(stderr, "texture op: %d\n", instr->op); 5011 unreachable("unknown texture op"); 5012 } 5013 5014 if (!sample) 5015 return false; 5016 5017 for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) { 5018 const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, sample, i); 5019 store_dest(ctx, &instr->dest, i, retval, nir_alu_type_get_base_type(instr->dest_type)); 5020 } 5021 5022 return true; 5023} 5024 5025static bool 5026emit_undefined(struct ntd_context *ctx, nir_ssa_undef_instr *undef) 5027{ 5028 for (unsigned i = 0; i < undef->def.num_components; ++i) 5029 store_ssa_def(ctx, &undef->def, i, dxil_module_get_int32_const(&ctx->mod, 0)); 5030 return true; 5031} 5032 5033static bool emit_instr(struct ntd_context *ctx, struct nir_instr* instr) 5034{ 5035 switch (instr->type) { 5036 case nir_instr_type_alu: 5037 return emit_alu(ctx, nir_instr_as_alu(instr)); 5038 case nir_instr_type_intrinsic: 5039 return emit_intrinsic(ctx, nir_instr_as_intrinsic(instr)); 5040 case nir_instr_type_load_const: 5041 return emit_load_const(ctx, nir_instr_as_load_const(instr)); 5042 case nir_instr_type_deref: 5043 return emit_deref(ctx, nir_instr_as_deref(instr)); 5044 case nir_instr_type_jump: 5045 return emit_jump(ctx, nir_instr_as_jump(instr)); 5046 case nir_instr_type_phi: 5047 return emit_phi(ctx, nir_instr_as_phi(instr)); 5048 case nir_instr_type_tex: 5049 return emit_tex(ctx, nir_instr_as_tex(instr)); 5050 case nir_instr_type_ssa_undef: 5051 return emit_undefined(ctx, nir_instr_as_ssa_undef(instr)); 5052 default: 5053 NIR_INSTR_UNSUPPORTED(instr); 5054 unreachable("Unimplemented instruction type"); 5055 return false; 5056 } 5057} 5058 5059 5060static bool 5061emit_block(struct ntd_context *ctx, struct nir_block *block) 5062{ 5063 assert(block->index < ctx->mod.cur_emitting_func->num_basic_block_ids); 5064 ctx->mod.cur_emitting_func->basic_block_ids[block->index] = ctx->mod.cur_emitting_func->curr_block; 5065 5066 nir_foreach_instr(instr, block) { 5067 TRACE_CONVERSION(instr); 5068 5069 if (!emit_instr(ctx, instr)) { 5070 return false; 5071 } 5072 } 5073 return true; 5074} 5075 5076static bool 5077emit_cf_list(struct ntd_context *ctx, struct exec_list *list); 5078 5079static bool 5080emit_if(struct ntd_context *ctx, struct nir_if *if_stmt) 5081{ 5082 assert(nir_src_num_components(if_stmt->condition) == 1); 5083 const struct dxil_value *cond = get_src(ctx, &if_stmt->condition, 0, 5084 nir_type_bool); 5085 if (!cond) 5086 return false; 5087 5088 /* prepare blocks */ 5089 nir_block *then_block = nir_if_first_then_block(if_stmt); 5090 assert(nir_if_last_then_block(if_stmt)->successors[0]); 5091 assert(!nir_if_last_then_block(if_stmt)->successors[1]); 5092 int then_succ = nir_if_last_then_block(if_stmt)->successors[0]->index; 5093 5094 nir_block *else_block = NULL; 5095 int else_succ = -1; 5096 if (!exec_list_is_empty(&if_stmt->else_list)) { 5097 else_block = nir_if_first_else_block(if_stmt); 5098 assert(nir_if_last_else_block(if_stmt)->successors[0]); 5099 assert(!nir_if_last_else_block(if_stmt)->successors[1]); 5100 else_succ = nir_if_last_else_block(if_stmt)->successors[0]->index; 5101 } 5102 5103 if (!emit_cond_branch(ctx, cond, then_block->index, 5104 else_block ? else_block->index : then_succ)) 5105 return false; 5106 5107 /* handle then-block */ 5108 if (!emit_cf_list(ctx, &if_stmt->then_list) || 5109 (!nir_block_ends_in_jump(nir_if_last_then_block(if_stmt)) && 5110 !emit_branch(ctx, then_succ))) 5111 return false; 5112 5113 if (else_block) { 5114 /* handle else-block */ 5115 if (!emit_cf_list(ctx, &if_stmt->else_list) || 5116 (!nir_block_ends_in_jump(nir_if_last_else_block(if_stmt)) && 5117 !emit_branch(ctx, else_succ))) 5118 return false; 5119 } 5120 5121 return true; 5122} 5123 5124static bool 5125emit_loop(struct ntd_context *ctx, nir_loop *loop) 5126{ 5127 nir_block *first_block = nir_loop_first_block(loop); 5128 5129 assert(nir_loop_last_block(loop)->successors[0]); 5130 assert(!nir_loop_last_block(loop)->successors[1]); 5131 5132 if (!emit_branch(ctx, first_block->index)) 5133 return false; 5134 5135 if (!emit_cf_list(ctx, &loop->body)) 5136 return false; 5137 5138 if (!emit_branch(ctx, first_block->index)) 5139 return false; 5140 5141 return true; 5142} 5143 5144static bool 5145emit_cf_list(struct ntd_context *ctx, struct exec_list *list) 5146{ 5147 foreach_list_typed(nir_cf_node, node, node, list) { 5148 switch (node->type) { 5149 case nir_cf_node_block: 5150 if (!emit_block(ctx, nir_cf_node_as_block(node))) 5151 return false; 5152 break; 5153 5154 case nir_cf_node_if: 5155 if (!emit_if(ctx, nir_cf_node_as_if(node))) 5156 return false; 5157 break; 5158 5159 case nir_cf_node_loop: 5160 if (!emit_loop(ctx, nir_cf_node_as_loop(node))) 5161 return false; 5162 break; 5163 5164 default: 5165 unreachable("unsupported cf-list node"); 5166 break; 5167 } 5168 } 5169 return true; 5170} 5171 5172static void 5173insert_sorted_by_binding(struct exec_list *var_list, nir_variable *new_var) 5174{ 5175 nir_foreach_variable_in_list(var, var_list) { 5176 if (var->data.binding > new_var->data.binding) { 5177 exec_node_insert_node_before(&var->node, &new_var->node); 5178 return; 5179 } 5180 } 5181 exec_list_push_tail(var_list, &new_var->node); 5182} 5183 5184 5185static void 5186sort_uniforms_by_binding_and_remove_structs(nir_shader *s) 5187{ 5188 struct exec_list new_list; 5189 exec_list_make_empty(&new_list); 5190 5191 nir_foreach_variable_with_modes_safe(var, s, nir_var_uniform) { 5192 exec_node_remove(&var->node); 5193 const struct glsl_type *type = glsl_without_array(var->type); 5194 if (!glsl_type_is_struct(type)) 5195 insert_sorted_by_binding(&new_list, var); 5196 } 5197 exec_list_append(&s->variables, &new_list); 5198} 5199 5200static void 5201prepare_phi_values(struct ntd_context *ctx, nir_function_impl *impl) 5202{ 5203 /* PHI nodes are difficult to get right when tracking the types: 5204 * Since the incoming sources are linked to blocks, we can't bitcast 5205 * on the fly while loading. So scan the shader and insert a typed dummy 5206 * value for each phi source, and when storing we convert if the incoming 5207 * value has a different type then the one expected by the phi node. 5208 * We choose int as default, because it supports more bit sizes. 5209 */ 5210 nir_foreach_block(block, impl) { 5211 nir_foreach_instr(instr, block) { 5212 if (instr->type == nir_instr_type_phi) { 5213 nir_phi_instr *ir = nir_instr_as_phi(instr); 5214 unsigned bitsize = nir_dest_bit_size(ir->dest); 5215 const struct dxil_value *dummy = dxil_module_get_int_const(&ctx->mod, 0, bitsize); 5216 nir_foreach_phi_src(src, ir) { 5217 for(unsigned int i = 0; i < ir->dest.ssa.num_components; ++i) 5218 store_ssa_def(ctx, src->src.ssa, i, dummy); 5219 } 5220 } 5221 } 5222 } 5223} 5224 5225static bool 5226emit_cbvs(struct ntd_context *ctx) 5227{ 5228 if (ctx->opts->environment != DXIL_ENVIRONMENT_GL) { 5229 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ubo) { 5230 if (!emit_ubo_var(ctx, var)) 5231 return false; 5232 } 5233 } else { 5234 if (ctx->shader->info.num_ubos) { 5235 const unsigned ubo_size = 16384 /*4096 vec4's*/; 5236 bool has_ubo0 = !ctx->opts->no_ubo0; 5237 bool has_state_vars = ctx->opts->last_ubo_is_not_arrayed; 5238 unsigned ubo1_array_size = ctx->shader->info.num_ubos - 5239 (has_state_vars ? 2 : 1); 5240 5241 if (has_ubo0 && 5242 !emit_cbv(ctx, 0, 0, ubo_size, 1, "__ubo_uniforms")) 5243 return false; 5244 if (ubo1_array_size && 5245 !emit_cbv(ctx, 1, 0, ubo_size, ubo1_array_size, "__ubos")) 5246 return false; 5247 if (has_state_vars && 5248 !emit_cbv(ctx, ctx->shader->info.num_ubos - 1, 0, ubo_size, 1, "__ubo_state_vars")) 5249 return false; 5250 } 5251 } 5252 5253 return true; 5254} 5255 5256static bool 5257emit_scratch(struct ntd_context *ctx) 5258{ 5259 if (ctx->shader->scratch_size) { 5260 /* 5261 * We always allocate an u32 array, no matter the actual variable types. 5262 * According to the DXIL spec, the minimum load/store granularity is 5263 * 32-bit, anything smaller requires using a read-extract/read-write-modify 5264 * approach. 5265 */ 5266 unsigned size = ALIGN_POT(ctx->shader->scratch_size, sizeof(uint32_t)); 5267 const struct dxil_type *int32 = dxil_module_get_int_type(&ctx->mod, 32); 5268 const struct dxil_value *array_length = dxil_module_get_int32_const(&ctx->mod, size / sizeof(uint32_t)); 5269 if (!int32 || !array_length) 5270 return false; 5271 5272 const struct dxil_type *type = dxil_module_get_array_type( 5273 &ctx->mod, int32, size / sizeof(uint32_t)); 5274 if (!type) 5275 return false; 5276 5277 ctx->scratchvars = dxil_emit_alloca(&ctx->mod, type, int32, array_length, 4); 5278 if (!ctx->scratchvars) 5279 return false; 5280 } 5281 5282 return true; 5283} 5284 5285/* The validator complains if we don't have ops that reference a global variable. */ 5286static bool 5287shader_has_shared_ops(struct nir_shader *s) 5288{ 5289 nir_foreach_function(func, s) { 5290 if (!func->impl) 5291 continue; 5292 nir_foreach_block(block, func->impl) { 5293 nir_foreach_instr(instr, block) { 5294 if (instr->type != nir_instr_type_intrinsic) 5295 continue; 5296 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 5297 switch (intrin->intrinsic) { 5298 case nir_intrinsic_load_shared_dxil: 5299 case nir_intrinsic_store_shared_dxil: 5300 case nir_intrinsic_shared_atomic_add_dxil: 5301 case nir_intrinsic_shared_atomic_and_dxil: 5302 case nir_intrinsic_shared_atomic_comp_swap_dxil: 5303 case nir_intrinsic_shared_atomic_exchange_dxil: 5304 case nir_intrinsic_shared_atomic_imax_dxil: 5305 case nir_intrinsic_shared_atomic_imin_dxil: 5306 case nir_intrinsic_shared_atomic_or_dxil: 5307 case nir_intrinsic_shared_atomic_umax_dxil: 5308 case nir_intrinsic_shared_atomic_umin_dxil: 5309 case nir_intrinsic_shared_atomic_xor_dxil: 5310 return true; 5311 default: break; 5312 } 5313 } 5314 } 5315 } 5316 return false; 5317} 5318 5319static bool 5320emit_function(struct ntd_context *ctx, nir_function *func) 5321{ 5322 assert(func->num_params == 0); 5323 nir_function_impl *impl = func->impl; 5324 if (!impl) 5325 return true; 5326 5327 nir_metadata_require(impl, nir_metadata_block_index); 5328 5329 const struct dxil_type *void_type = dxil_module_get_void_type(&ctx->mod); 5330 const struct dxil_type *func_type = dxil_module_add_function_type(&ctx->mod, void_type, NULL, 0); 5331 struct dxil_func_def *func_def = dxil_add_function_def(&ctx->mod, func->name, func_type, impl->num_blocks); 5332 if (!func_def) 5333 return false; 5334 5335 if (func->is_entrypoint) 5336 ctx->main_func_def = func_def; 5337 else if (func == ctx->tess_ctrl_patch_constant_func) 5338 ctx->tess_ctrl_patch_constant_func_def = func_def; 5339 5340 ctx->defs = rzalloc_array(ctx->ralloc_ctx, struct dxil_def, impl->ssa_alloc); 5341 if (!ctx->defs) 5342 return false; 5343 ctx->num_defs = impl->ssa_alloc; 5344 5345 ctx->phis = _mesa_pointer_hash_table_create(ctx->ralloc_ctx); 5346 if (!ctx->phis) 5347 return false; 5348 5349 prepare_phi_values(ctx, impl); 5350 5351 if (!emit_scratch(ctx)) 5352 return false; 5353 5354 if (!emit_static_indexing_handles(ctx)) 5355 return false; 5356 5357 if (!emit_cf_list(ctx, &impl->body)) 5358 return false; 5359 5360 hash_table_foreach(ctx->phis, entry) { 5361 if (!fixup_phi(ctx, (nir_phi_instr *)entry->key, 5362 (struct phi_block *)entry->data)) 5363 return false; 5364 } 5365 5366 if (!dxil_emit_ret_void(&ctx->mod)) 5367 return false; 5368 5369 ralloc_free(ctx->defs); 5370 ctx->defs = NULL; 5371 _mesa_hash_table_destroy(ctx->phis, NULL); 5372 return true; 5373} 5374 5375static bool 5376emit_module(struct ntd_context *ctx, const struct nir_to_dxil_options *opts) 5377{ 5378 /* The validator forces us to emit resources in a specific order: 5379 * CBVs, Samplers, SRVs, UAVs. While we are at it also remove 5380 * stale struct uniforms, they are lowered but might not have been removed */ 5381 sort_uniforms_by_binding_and_remove_structs(ctx->shader); 5382 5383 /* CBVs */ 5384 if (!emit_cbvs(ctx)) 5385 return false; 5386 5387 /* Samplers */ 5388 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_uniform) { 5389 unsigned count = glsl_type_get_sampler_count(var->type); 5390 assert(count == 0 || glsl_type_is_bare_sampler(glsl_without_array(var->type))); 5391 if (count > 0 && !emit_sampler(ctx, var, count)) 5392 return false; 5393 } 5394 5395 /* SRVs */ 5396 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_uniform) { 5397 if (glsl_type_is_texture(glsl_without_array(var->type)) && 5398 !emit_srv(ctx, var, glsl_type_get_texture_count(var->type))) 5399 return false; 5400 } 5401 5402 if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) { 5403 nir_foreach_image_variable(var, ctx->shader) { 5404 if ((var->data.access & ACCESS_NON_WRITEABLE) && 5405 !emit_srv(ctx, var, glsl_type_get_image_count(var->type))) 5406 return false; 5407 } 5408 } 5409 5410 /* Handle read-only SSBOs as SRVs */ 5411 if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) { 5412 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ssbo) { 5413 if ((var->data.access & ACCESS_NON_WRITEABLE) != 0) { 5414 unsigned count = 1; 5415 if (glsl_type_is_array(var->type)) 5416 count = glsl_get_length(var->type); 5417 if (!emit_srv(ctx, var, count)) 5418 return false; 5419 } 5420 } 5421 } 5422 5423 if (ctx->shader->info.shared_size && shader_has_shared_ops(ctx->shader)) { 5424 const struct dxil_type *type; 5425 unsigned size; 5426 5427 /* 5428 * We always allocate an u32 array, no matter the actual variable types. 5429 * According to the DXIL spec, the minimum load/store granularity is 5430 * 32-bit, anything smaller requires using a read-extract/read-write-modify 5431 * approach. Non-atomic 64-bit accesses are allowed, but the 5432 * GEP(cast(gvar, u64[] *), offset) and cast(GEP(gvar, offset), u64 *)) 5433 * sequences don't seem to be accepted by the DXIL validator when the 5434 * pointer is in the groupshared address space, making the 32-bit -> 64-bit 5435 * pointer cast impossible. 5436 */ 5437 size = ALIGN_POT(ctx->shader->info.shared_size, sizeof(uint32_t)); 5438 type = dxil_module_get_array_type(&ctx->mod, 5439 dxil_module_get_int_type(&ctx->mod, 32), 5440 size / sizeof(uint32_t)); 5441 ctx->sharedvars = dxil_add_global_ptr_var(&ctx->mod, "shared", type, 5442 DXIL_AS_GROUPSHARED, 5443 ffs(sizeof(uint64_t)), 5444 NULL); 5445 } 5446 5447 /* UAVs */ 5448 if (ctx->shader->info.stage == MESA_SHADER_KERNEL) { 5449 if (!emit_globals(ctx, opts->num_kernel_globals)) 5450 return false; 5451 5452 ctx->consts = _mesa_pointer_hash_table_create(ctx->ralloc_ctx); 5453 if (!ctx->consts) 5454 return false; 5455 if (!emit_global_consts(ctx)) 5456 return false; 5457 } else if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) { 5458 /* Handle read/write SSBOs as UAVs */ 5459 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ssbo) { 5460 if ((var->data.access & ACCESS_NON_WRITEABLE) == 0) { 5461 unsigned count = 1; 5462 if (glsl_type_is_array(var->type)) 5463 count = glsl_get_length(var->type); 5464 if (!emit_uav(ctx, var->data.binding, var->data.descriptor_set, 5465 count, DXIL_COMP_TYPE_INVALID, 5466 DXIL_RESOURCE_KIND_RAW_BUFFER, var->name)) 5467 return false; 5468 5469 } 5470 } 5471 } else { 5472 for (unsigned i = 0; i < ctx->shader->info.num_ssbos; ++i) { 5473 char name[64]; 5474 snprintf(name, sizeof(name), "__ssbo%d", i); 5475 if (!emit_uav(ctx, i, 0, 1, DXIL_COMP_TYPE_INVALID, 5476 DXIL_RESOURCE_KIND_RAW_BUFFER, name)) 5477 return false; 5478 } 5479 /* To work around a WARP bug, bind these descriptors a second time in descriptor 5480 * space 2. Space 0 will be used for static indexing, while space 2 will be used 5481 * for dynamic indexing. Space 0 will be individual SSBOs in the DXIL shader, while 5482 * space 2 will be a single array. 5483 */ 5484 if (ctx->shader->info.num_ssbos && 5485 !emit_uav(ctx, 0, 2, ctx->shader->info.num_ssbos, DXIL_COMP_TYPE_INVALID, 5486 DXIL_RESOURCE_KIND_RAW_BUFFER, "__ssbo_dynamic")) 5487 return false; 5488 } 5489 5490 nir_foreach_image_variable(var, ctx->shader) { 5491 if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN && 5492 var && (var->data.access & ACCESS_NON_WRITEABLE)) 5493 continue; // already handled in SRV 5494 5495 if (!emit_uav_var(ctx, var, glsl_type_get_image_count(var->type))) 5496 return false; 5497 } 5498 5499 ctx->mod.info.has_per_sample_input = 5500 BITSET_TEST(ctx->shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID); 5501 if (!ctx->mod.info.has_per_sample_input && ctx->shader->info.stage == MESA_SHADER_FRAGMENT) { 5502 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in | nir_var_system_value) { 5503 if (var->data.sample) { 5504 ctx->mod.info.has_per_sample_input = true; 5505 break; 5506 } 5507 } 5508 } 5509 5510 unsigned input_clip_size = ctx->mod.shader_kind == DXIL_PIXEL_SHADER ? 5511 ctx->shader->info.clip_distance_array_size : ctx->opts->input_clip_size; 5512 preprocess_signatures(&ctx->mod, ctx->shader, input_clip_size); 5513 5514 nir_foreach_function(func, ctx->shader) { 5515 if (!emit_function(ctx, func)) 5516 return false; 5517 } 5518 5519 if (ctx->shader->info.stage == MESA_SHADER_FRAGMENT) { 5520 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_out) { 5521 if (var->data.location == FRAG_RESULT_STENCIL) { 5522 ctx->mod.feats.stencil_ref = true; 5523 } 5524 } 5525 } else if (ctx->shader->info.stage == MESA_SHADER_VERTEX || 5526 ctx->shader->info.stage == MESA_SHADER_TESS_EVAL) { 5527 if (ctx->shader->info.outputs_written & 5528 (VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER)) 5529 ctx->mod.feats.array_layer_from_vs_or_ds = true; 5530 } 5531 5532 if (ctx->mod.feats.native_low_precision) 5533 ctx->mod.minor_version = MAX2(ctx->mod.minor_version, 2); 5534 5535 return emit_metadata(ctx) && 5536 dxil_emit_module(&ctx->mod); 5537} 5538 5539static unsigned int 5540get_dxil_shader_kind(struct nir_shader *s) 5541{ 5542 switch (s->info.stage) { 5543 case MESA_SHADER_VERTEX: 5544 return DXIL_VERTEX_SHADER; 5545 case MESA_SHADER_TESS_CTRL: 5546 return DXIL_HULL_SHADER; 5547 case MESA_SHADER_TESS_EVAL: 5548 return DXIL_DOMAIN_SHADER; 5549 case MESA_SHADER_GEOMETRY: 5550 return DXIL_GEOMETRY_SHADER; 5551 case MESA_SHADER_FRAGMENT: 5552 return DXIL_PIXEL_SHADER; 5553 case MESA_SHADER_KERNEL: 5554 case MESA_SHADER_COMPUTE: 5555 return DXIL_COMPUTE_SHADER; 5556 default: 5557 unreachable("unknown shader stage in nir_to_dxil"); 5558 return DXIL_COMPUTE_SHADER; 5559 } 5560} 5561 5562static unsigned 5563lower_bit_size_callback(const nir_instr* instr, void *data) 5564{ 5565 if (instr->type != nir_instr_type_alu) 5566 return 0; 5567 const nir_alu_instr *alu = nir_instr_as_alu(instr); 5568 5569 if (nir_op_infos[alu->op].is_conversion) 5570 return 0; 5571 5572 unsigned num_inputs = nir_op_infos[alu->op].num_inputs; 5573 const struct nir_to_dxil_options *opts = (const struct nir_to_dxil_options*)data; 5574 unsigned min_bit_size = opts->lower_int16 ? 32 : 16; 5575 5576 unsigned ret = 0; 5577 for (unsigned i = 0; i < num_inputs; i++) { 5578 unsigned bit_size = nir_src_bit_size(alu->src[i].src); 5579 if (bit_size != 1 && bit_size < min_bit_size) 5580 ret = min_bit_size; 5581 } 5582 5583 return ret; 5584} 5585 5586static void 5587optimize_nir(struct nir_shader *s, const struct nir_to_dxil_options *opts) 5588{ 5589 bool progress; 5590 do { 5591 progress = false; 5592 NIR_PASS_V(s, nir_lower_vars_to_ssa); 5593 NIR_PASS(progress, s, nir_lower_indirect_derefs, nir_var_function_temp, UINT32_MAX); 5594 NIR_PASS(progress, s, nir_lower_alu_to_scalar, NULL, NULL); 5595 NIR_PASS(progress, s, nir_copy_prop); 5596 NIR_PASS(progress, s, nir_opt_copy_prop_vars); 5597 NIR_PASS(progress, s, nir_lower_bit_size, lower_bit_size_callback, (void*)opts); 5598 NIR_PASS(progress, s, dxil_nir_lower_8bit_conv); 5599 if (opts->lower_int16) 5600 NIR_PASS(progress, s, dxil_nir_lower_16bit_conv); 5601 NIR_PASS(progress, s, nir_opt_remove_phis); 5602 NIR_PASS(progress, s, nir_opt_dce); 5603 NIR_PASS(progress, s, nir_opt_if, nir_opt_if_aggressive_last_continue | nir_opt_if_optimize_phi_true_false); 5604 NIR_PASS(progress, s, nir_opt_dead_cf); 5605 NIR_PASS(progress, s, nir_opt_cse); 5606 NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true); 5607 NIR_PASS(progress, s, nir_opt_algebraic); 5608 NIR_PASS(progress, s, dxil_nir_lower_x2b); 5609 if (s->options->lower_int64_options) 5610 NIR_PASS(progress, s, nir_lower_int64); 5611 NIR_PASS(progress, s, nir_lower_alu); 5612 NIR_PASS(progress, s, nir_opt_constant_folding); 5613 NIR_PASS(progress, s, nir_opt_undef); 5614 NIR_PASS(progress, s, nir_lower_undef_to_zero); 5615 NIR_PASS(progress, s, nir_opt_deref); 5616 NIR_PASS(progress, s, dxil_nir_lower_upcast_phis, opts->lower_int16 ? 32 : 16); 5617 NIR_PASS(progress, s, nir_lower_64bit_phis); 5618 NIR_PASS_V(s, nir_lower_system_values); 5619 } while (progress); 5620 5621 do { 5622 progress = false; 5623 NIR_PASS(progress, s, nir_opt_algebraic_late); 5624 } while (progress); 5625} 5626 5627static 5628void dxil_fill_validation_state(struct ntd_context *ctx, 5629 struct dxil_validation_state *state) 5630{ 5631 unsigned resource_element_size = ctx->mod.minor_validator >= 6 ? 5632 sizeof(struct dxil_resource_v1) : sizeof(struct dxil_resource_v0); 5633 state->num_resources = ctx->resources.size / resource_element_size; 5634 state->resources.v0 = (struct dxil_resource_v0*)ctx->resources.data; 5635 state->state.psv1.psv0.max_expected_wave_lane_count = UINT_MAX; 5636 state->state.psv1.shader_stage = (uint8_t)ctx->mod.shader_kind; 5637 state->state.psv1.sig_input_elements = (uint8_t)ctx->mod.num_sig_inputs; 5638 state->state.psv1.sig_output_elements = (uint8_t)ctx->mod.num_sig_outputs; 5639 state->state.psv1.sig_patch_const_or_prim_elements = (uint8_t)ctx->mod.num_sig_patch_consts; 5640 5641 switch (ctx->mod.shader_kind) { 5642 case DXIL_VERTEX_SHADER: 5643 state->state.psv1.psv0.vs.output_position_present = ctx->mod.info.has_out_position; 5644 break; 5645 case DXIL_PIXEL_SHADER: 5646 /* TODO: handle depth outputs */ 5647 state->state.psv1.psv0.ps.depth_output = ctx->mod.info.has_out_depth; 5648 state->state.psv1.psv0.ps.sample_frequency = 5649 ctx->mod.info.has_per_sample_input; 5650 break; 5651 case DXIL_COMPUTE_SHADER: 5652 state->state.num_threads_x = MAX2(ctx->shader->info.workgroup_size[0], 1); 5653 state->state.num_threads_y = MAX2(ctx->shader->info.workgroup_size[1], 1); 5654 state->state.num_threads_z = MAX2(ctx->shader->info.workgroup_size[2], 1); 5655 break; 5656 case DXIL_GEOMETRY_SHADER: 5657 state->state.psv1.max_vertex_count = ctx->shader->info.gs.vertices_out; 5658 state->state.psv1.psv0.gs.input_primitive = dxil_get_input_primitive(ctx->shader->info.gs.input_primitive); 5659 state->state.psv1.psv0.gs.output_toplology = dxil_get_primitive_topology(ctx->shader->info.gs.output_primitive); 5660 state->state.psv1.psv0.gs.output_stream_mask = MAX2(ctx->shader->info.gs.active_stream_mask, 1); 5661 state->state.psv1.psv0.gs.output_position_present = ctx->mod.info.has_out_position; 5662 break; 5663 case DXIL_HULL_SHADER: 5664 state->state.psv1.psv0.hs.input_control_point_count = ctx->tess_input_control_point_count; 5665 state->state.psv1.psv0.hs.output_control_point_count = ctx->shader->info.tess.tcs_vertices_out; 5666 state->state.psv1.psv0.hs.tessellator_domain = get_tessellator_domain(ctx->shader->info.tess._primitive_mode); 5667 state->state.psv1.psv0.hs.tessellator_output_primitive = get_tessellator_output_primitive(&ctx->shader->info); 5668 state->state.psv1.sig_patch_const_or_prim_vectors = ctx->mod.num_psv_patch_consts; 5669 break; 5670 case DXIL_DOMAIN_SHADER: 5671 state->state.psv1.psv0.ds.input_control_point_count = ctx->shader->info.tess.tcs_vertices_out; 5672 state->state.psv1.psv0.ds.tessellator_domain = get_tessellator_domain(ctx->shader->info.tess._primitive_mode); 5673 state->state.psv1.psv0.ds.output_position_present = ctx->mod.info.has_out_position; 5674 state->state.psv1.sig_patch_const_or_prim_vectors = ctx->mod.num_psv_patch_consts; 5675 break; 5676 default: 5677 assert(0 && "Shader type not (yet) supported"); 5678 } 5679} 5680 5681static nir_variable * 5682add_sysvalue(struct ntd_context *ctx, 5683 uint8_t value, char *name, 5684 int driver_location) 5685{ 5686 5687 nir_variable *var = rzalloc(ctx->shader, nir_variable); 5688 if (!var) 5689 return NULL; 5690 var->data.driver_location = driver_location; 5691 var->data.location = value; 5692 var->type = glsl_uint_type(); 5693 var->name = name; 5694 var->data.mode = nir_var_system_value; 5695 var->data.interpolation = INTERP_MODE_FLAT; 5696 return var; 5697} 5698 5699static bool 5700append_input_or_sysvalue(struct ntd_context *ctx, 5701 int input_loc, int sv_slot, 5702 char *name, int driver_location) 5703{ 5704 if (input_loc >= 0) { 5705 /* Check inputs whether a variable is available the corresponds 5706 * to the sysvalue */ 5707 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in) { 5708 if (var->data.location == input_loc) { 5709 ctx->system_value[sv_slot] = var; 5710 return true; 5711 } 5712 } 5713 } 5714 5715 ctx->system_value[sv_slot] = add_sysvalue(ctx, sv_slot, name, driver_location); 5716 if (!ctx->system_value[sv_slot]) 5717 return false; 5718 5719 nir_shader_add_variable(ctx->shader, ctx->system_value[sv_slot]); 5720 return true; 5721} 5722 5723struct sysvalue_name { 5724 gl_system_value value; 5725 int slot; 5726 char *name; 5727 gl_shader_stage only_in_shader; 5728} possible_sysvalues[] = { 5729 {SYSTEM_VALUE_VERTEX_ID_ZERO_BASE, -1, "SV_VertexID", MESA_SHADER_NONE}, 5730 {SYSTEM_VALUE_INSTANCE_ID, -1, "SV_InstanceID", MESA_SHADER_NONE}, 5731 {SYSTEM_VALUE_FRONT_FACE, VARYING_SLOT_FACE, "SV_IsFrontFace", MESA_SHADER_NONE}, 5732 {SYSTEM_VALUE_PRIMITIVE_ID, VARYING_SLOT_PRIMITIVE_ID, "SV_PrimitiveID", MESA_SHADER_GEOMETRY}, 5733 {SYSTEM_VALUE_SAMPLE_ID, -1, "SV_SampleIndex", MESA_SHADER_NONE}, 5734}; 5735 5736static bool 5737allocate_sysvalues(struct ntd_context *ctx) 5738{ 5739 unsigned driver_location = 0; 5740 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in) 5741 driver_location++; 5742 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_system_value) 5743 driver_location++; 5744 5745 if (ctx->shader->info.stage == MESA_SHADER_FRAGMENT && 5746 ctx->shader->info.inputs_read && 5747 !BITSET_TEST(ctx->shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID)) { 5748 bool need_sample_id = true; 5749 5750 /* "var->data.sample = true" sometimes just mean, "I want per-sample 5751 * shading", which explains why we can end up with vars having flat 5752 * interpolation with the per-sample bit set. If there's only such 5753 * type of variables, we need to tell DXIL that we read SV_SampleIndex 5754 * to make DXIL validation happy. 5755 */ 5756 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in) { 5757 if (!var->data.sample || var->data.interpolation != INTERP_MODE_FLAT) { 5758 need_sample_id = false; 5759 break; 5760 } 5761 } 5762 5763 if (need_sample_id) 5764 BITSET_SET(ctx->shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID); 5765 } 5766 5767 for (unsigned i = 0; i < ARRAY_SIZE(possible_sysvalues); ++i) { 5768 struct sysvalue_name *info = &possible_sysvalues[i]; 5769 if (info->only_in_shader != MESA_SHADER_NONE && 5770 info->only_in_shader != ctx->shader->info.stage) 5771 continue; 5772 if (BITSET_TEST(ctx->shader->info.system_values_read, info->value)) { 5773 if (!append_input_or_sysvalue(ctx, info->slot, 5774 info->value, info->name, 5775 driver_location++)) 5776 return false; 5777 } 5778 } 5779 return true; 5780} 5781 5782static int 5783type_size_vec4(const struct glsl_type *type, bool bindless) 5784{ 5785 return glsl_count_attribute_slots(type, false); 5786} 5787 5788static bool 5789dxil_validator_can_validate_shader_model(unsigned sm_minor, unsigned val_minor) 5790{ 5791 /* Currently the validators are versioned such that val 1.x is needed for SM6.x */ 5792 return sm_minor <= val_minor; 5793} 5794 5795static const unsigned dxil_validator_min_capable_version = DXIL_VALIDATOR_1_4; 5796static const unsigned dxil_validator_max_capable_version = DXIL_VALIDATOR_1_7; 5797 5798bool 5799nir_to_dxil(struct nir_shader *s, const struct nir_to_dxil_options *opts, 5800 struct blob *blob) 5801{ 5802 assert(opts); 5803 bool retval = true; 5804 debug_dxil = (int)debug_get_option_debug_dxil(); 5805 blob_init(blob); 5806 5807 if (opts->shader_model_max < SHADER_MODEL_6_1) { 5808 debug_printf("D3D12: cannot support emitting shader model 6.0 or lower\n"); 5809 return false; 5810 } 5811 5812 if (opts->validator_version_max != NO_DXIL_VALIDATION && 5813 opts->validator_version_max < dxil_validator_min_capable_version) { 5814 debug_printf("D3D12: Invalid validator version %d.%d, must be 1.4 or greater\n", 5815 opts->validator_version_max >> 16, 5816 opts->validator_version_max & 0xffff); 5817 return false; 5818 } 5819 5820 /* If no validation, write a blob as if it was going to be validated by the newest understood validator. 5821 * Same if the validator is newer than we know how to write for. 5822 */ 5823 uint32_t validator_version = 5824 opts->validator_version_max == NO_DXIL_VALIDATION || 5825 opts->validator_version_max > dxil_validator_max_capable_version ? 5826 dxil_validator_max_capable_version : opts->validator_version_max; 5827 5828 struct ntd_context *ctx = calloc(1, sizeof(*ctx)); 5829 if (!ctx) 5830 return false; 5831 5832 ctx->opts = opts; 5833 ctx->shader = s; 5834 5835 ctx->ralloc_ctx = ralloc_context(NULL); 5836 if (!ctx->ralloc_ctx) { 5837 retval = false; 5838 goto out; 5839 } 5840 5841 util_dynarray_init(&ctx->srv_metadata_nodes, ctx->ralloc_ctx); 5842 util_dynarray_init(&ctx->uav_metadata_nodes, ctx->ralloc_ctx); 5843 util_dynarray_init(&ctx->cbv_metadata_nodes, ctx->ralloc_ctx); 5844 util_dynarray_init(&ctx->sampler_metadata_nodes, ctx->ralloc_ctx); 5845 util_dynarray_init(&ctx->resources, ctx->ralloc_ctx); 5846 dxil_module_init(&ctx->mod, ctx->ralloc_ctx); 5847 ctx->mod.shader_kind = get_dxil_shader_kind(s); 5848 ctx->mod.major_version = 6; 5849 ctx->mod.minor_version = 1; 5850 ctx->mod.major_validator = validator_version >> 16; 5851 ctx->mod.minor_validator = validator_version & 0xffff; 5852 5853 if (s->info.stage <= MESA_SHADER_FRAGMENT) { 5854 uint64_t in_mask = 5855 s->info.stage == MESA_SHADER_VERTEX ? 5856 0 : (VARYING_BIT_PRIMITIVE_ID | VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER); 5857 uint64_t out_mask = 5858 s->info.stage == MESA_SHADER_FRAGMENT ? 5859 ((1ull << FRAG_RESULT_STENCIL) | (1ull << FRAG_RESULT_SAMPLE_MASK)) : 5860 (VARYING_BIT_PRIMITIVE_ID | VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER); 5861 5862 NIR_PASS_V(s, dxil_nir_fix_io_uint_type, in_mask, out_mask); 5863 } 5864 5865 NIR_PASS_V(s, dxil_nir_lower_fquantize2f16); 5866 NIR_PASS_V(s, nir_lower_frexp); 5867 NIR_PASS_V(s, nir_lower_flrp, 16 | 32 | 64, true); 5868 NIR_PASS_V(s, nir_lower_io, nir_var_shader_in | nir_var_shader_out, type_size_vec4, nir_lower_io_lower_64bit_to_32); 5869 NIR_PASS_V(s, dxil_nir_ensure_position_writes); 5870 NIR_PASS_V(s, nir_lower_pack); 5871 NIR_PASS_V(s, dxil_nir_lower_system_values); 5872 NIR_PASS_V(s, nir_lower_io_to_scalar, nir_var_shader_in | nir_var_system_value | nir_var_shader_out); 5873 5874 if (ctx->mod.shader_kind == DXIL_HULL_SHADER) 5875 NIR_PASS_V(s, dxil_nir_split_tess_ctrl, &ctx->tess_ctrl_patch_constant_func); 5876 5877 if (ctx->mod.shader_kind == DXIL_HULL_SHADER || 5878 ctx->mod.shader_kind == DXIL_DOMAIN_SHADER) { 5879 /* Make sure any derefs are gone after lower_io before updating tess level vars */ 5880 NIR_PASS_V(s, nir_opt_dce); 5881 NIR_PASS_V(s, dxil_nir_fixup_tess_level_for_domain); 5882 } 5883 5884 optimize_nir(s, opts); 5885 5886 NIR_PASS_V(s, nir_remove_dead_variables, 5887 nir_var_function_temp | nir_var_shader_temp, NULL); 5888 5889 if (!allocate_sysvalues(ctx)) 5890 return false; 5891 5892 NIR_PASS_V(s, dxil_nir_lower_sysval_to_load_input, ctx->system_value); 5893 NIR_PASS_V(s, nir_opt_dce); 5894 5895 if (debug_dxil & DXIL_DEBUG_VERBOSE) 5896 nir_print_shader(s, stderr); 5897 5898 if (!emit_module(ctx, opts)) { 5899 debug_printf("D3D12: dxil_container_add_module failed\n"); 5900 retval = false; 5901 goto out; 5902 } 5903 5904 assert(ctx->mod.major_version == 6 && ctx->mod.minor_version >= 1); 5905 if ((ctx->mod.major_version << 16 | ctx->mod.minor_version) > opts->shader_model_max) { 5906 debug_printf("D3D12: max shader model exceeded\n"); 5907 retval = false; 5908 goto out; 5909 } 5910 5911 assert(ctx->mod.major_validator == 1); 5912 if (!dxil_validator_can_validate_shader_model(ctx->mod.minor_version, ctx->mod.minor_validator)) { 5913 debug_printf("D3D12: shader model exceeds max that can be validated\n"); 5914 retval = false; 5915 goto out; 5916 } 5917 5918 if (debug_dxil & DXIL_DEBUG_DUMP_MODULE) { 5919 struct dxil_dumper *dumper = dxil_dump_create(); 5920 dxil_dump_module(dumper, &ctx->mod); 5921 fprintf(stderr, "\n"); 5922 dxil_dump_buf_to_file(dumper, stderr); 5923 fprintf(stderr, "\n\n"); 5924 dxil_dump_free(dumper); 5925 } 5926 5927 struct dxil_container container; 5928 dxil_container_init(&container); 5929 if (!dxil_container_add_features(&container, &ctx->mod.feats)) { 5930 debug_printf("D3D12: dxil_container_add_features failed\n"); 5931 retval = false; 5932 goto out; 5933 } 5934 5935 if (!dxil_container_add_io_signature(&container, 5936 DXIL_ISG1, 5937 ctx->mod.num_sig_inputs, 5938 ctx->mod.inputs, 5939 ctx->mod.minor_validator >= 7)) { 5940 debug_printf("D3D12: failed to write input signature\n"); 5941 retval = false; 5942 goto out; 5943 } 5944 5945 if (!dxil_container_add_io_signature(&container, 5946 DXIL_OSG1, 5947 ctx->mod.num_sig_outputs, 5948 ctx->mod.outputs, 5949 ctx->mod.minor_validator >= 7)) { 5950 debug_printf("D3D12: failed to write output signature\n"); 5951 retval = false; 5952 goto out; 5953 } 5954 5955 if ((ctx->mod.shader_kind == DXIL_HULL_SHADER || 5956 ctx->mod.shader_kind == DXIL_DOMAIN_SHADER) && 5957 !dxil_container_add_io_signature(&container, 5958 DXIL_PSG1, 5959 ctx->mod.num_sig_patch_consts, 5960 ctx->mod.patch_consts, 5961 ctx->mod.minor_validator >= 7)) { 5962 debug_printf("D3D12: failed to write patch constant signature\n"); 5963 retval = false; 5964 goto out; 5965 } 5966 5967 struct dxil_validation_state validation_state; 5968 memset(&validation_state, 0, sizeof(validation_state)); 5969 dxil_fill_validation_state(ctx, &validation_state); 5970 5971 if (!dxil_container_add_state_validation(&container,&ctx->mod, 5972 &validation_state)) { 5973 debug_printf("D3D12: failed to write state-validation\n"); 5974 retval = false; 5975 goto out; 5976 } 5977 5978 if (!dxil_container_add_module(&container, &ctx->mod)) { 5979 debug_printf("D3D12: failed to write module\n"); 5980 retval = false; 5981 goto out; 5982 } 5983 5984 if (!dxil_container_write(&container, blob)) { 5985 debug_printf("D3D12: dxil_container_write failed\n"); 5986 retval = false; 5987 goto out; 5988 } 5989 dxil_container_finish(&container); 5990 5991 if (debug_dxil & DXIL_DEBUG_DUMP_BLOB) { 5992 static int shader_id = 0; 5993 char buffer[64]; 5994 snprintf(buffer, sizeof(buffer), "shader_%s_%d.blob", 5995 get_shader_kind_str(ctx->mod.shader_kind), shader_id++); 5996 debug_printf("Try to write blob to %s\n", buffer); 5997 FILE *f = fopen(buffer, "wb"); 5998 if (f) { 5999 fwrite(blob->data, 1, blob->size, f); 6000 fclose(f); 6001 } 6002 } 6003 6004out: 6005 dxil_module_release(&ctx->mod); 6006 ralloc_free(ctx->ralloc_ctx); 6007 free(ctx); 6008 return retval; 6009} 6010 6011enum dxil_sysvalue_type 6012nir_var_to_dxil_sysvalue_type(nir_variable *var, uint64_t other_stage_mask) 6013{ 6014 switch (var->data.location) { 6015 case VARYING_SLOT_FACE: 6016 return DXIL_GENERATED_SYSVALUE; 6017 case VARYING_SLOT_POS: 6018 case VARYING_SLOT_PRIMITIVE_ID: 6019 case VARYING_SLOT_CLIP_DIST0: 6020 case VARYING_SLOT_CLIP_DIST1: 6021 case VARYING_SLOT_PSIZ: 6022 case VARYING_SLOT_TESS_LEVEL_INNER: 6023 case VARYING_SLOT_TESS_LEVEL_OUTER: 6024 case VARYING_SLOT_VIEWPORT: 6025 case VARYING_SLOT_LAYER: 6026 if (!((1ull << var->data.location) & other_stage_mask)) 6027 return DXIL_SYSVALUE; 6028 FALLTHROUGH; 6029 default: 6030 return DXIL_NO_SYSVALUE; 6031 } 6032} 6033