1/* 2 * Copyright © 2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Jason Ekstrand (jason@jlekstrand.net) 25 * 26 */ 27 28#include "vtn_private.h" 29#include "nir/nir_vla.h" 30#include "nir/nir_control_flow.h" 31#include "nir/nir_constant_expressions.h" 32#include "nir/nir_deref.h" 33#include "spirv_info.h" 34 35#include "util/format/u_format.h" 36#include "util/u_math.h" 37#include "util/u_string.h" 38 39#include <stdio.h> 40 41#ifndef NDEBUG 42static enum nir_spirv_debug_level 43vtn_default_log_level(void) 44{ 45 enum nir_spirv_debug_level level = NIR_SPIRV_DEBUG_LEVEL_WARNING; 46 const char *vtn_log_level_strings[] = { 47 [NIR_SPIRV_DEBUG_LEVEL_WARNING] = "warning", 48 [NIR_SPIRV_DEBUG_LEVEL_INFO] = "info", 49 [NIR_SPIRV_DEBUG_LEVEL_ERROR] = "error", 50 }; 51 const char *str = getenv("MESA_SPIRV_LOG_LEVEL"); 52 53 if (str == NULL) 54 return NIR_SPIRV_DEBUG_LEVEL_WARNING; 55 56 for (int i = 0; i < ARRAY_SIZE(vtn_log_level_strings); i++) { 57 if (strcasecmp(str, vtn_log_level_strings[i]) == 0) { 58 level = i; 59 break; 60 } 61 } 62 63 return level; 64} 65#endif 66 67void 68vtn_log(struct vtn_builder *b, enum nir_spirv_debug_level level, 69 size_t spirv_offset, const char *message) 70{ 71 if (b->options->debug.func) { 72 b->options->debug.func(b->options->debug.private_data, 73 level, spirv_offset, message); 74 } 75 76#ifndef NDEBUG 77 static enum nir_spirv_debug_level default_level = 78 NIR_SPIRV_DEBUG_LEVEL_INVALID; 79 80 if (default_level == NIR_SPIRV_DEBUG_LEVEL_INVALID) 81 default_level = vtn_default_log_level(); 82 83 if (level >= default_level) 84 fprintf(stderr, "%s\n", message); 85#endif 86} 87 88void 89vtn_logf(struct vtn_builder *b, enum nir_spirv_debug_level level, 90 size_t spirv_offset, const char *fmt, ...) 91{ 92 va_list args; 93 char *msg; 94 95 va_start(args, fmt); 96 msg = ralloc_vasprintf(NULL, fmt, args); 97 va_end(args); 98 99 vtn_log(b, level, spirv_offset, msg); 100 101 ralloc_free(msg); 102} 103 104static void 105vtn_log_err(struct vtn_builder *b, 106 enum nir_spirv_debug_level level, const char *prefix, 107 const char *file, unsigned line, 108 const char *fmt, va_list args) 109{ 110 char *msg; 111 112 msg = ralloc_strdup(NULL, prefix); 113 114#ifndef NDEBUG 115 ralloc_asprintf_append(&msg, " In file %s:%u\n", file, line); 116#endif 117 118 ralloc_asprintf_append(&msg, " "); 119 120 ralloc_vasprintf_append(&msg, fmt, args); 121 122 ralloc_asprintf_append(&msg, "\n %zu bytes into the SPIR-V binary", 123 b->spirv_offset); 124 125 if (b->file) { 126 ralloc_asprintf_append(&msg, 127 "\n in SPIR-V source file %s, line %d, col %d", 128 b->file, b->line, b->col); 129 } 130 131 vtn_log(b, level, b->spirv_offset, msg); 132 133 ralloc_free(msg); 134} 135 136static void 137vtn_dump_shader(struct vtn_builder *b, const char *path, const char *prefix) 138{ 139 static int idx = 0; 140 141 char filename[1024]; 142 int len = snprintf(filename, sizeof(filename), "%s/%s-%d.spirv", 143 path, prefix, idx++); 144 if (len < 0 || len >= sizeof(filename)) 145 return; 146 147 FILE *f = fopen(filename, "w"); 148 if (f == NULL) 149 return; 150 151 fwrite(b->spirv, sizeof(*b->spirv), b->spirv_word_count, f); 152 fclose(f); 153 154 vtn_info("SPIR-V shader dumped to %s", filename); 155} 156 157void 158_vtn_warn(struct vtn_builder *b, const char *file, unsigned line, 159 const char *fmt, ...) 160{ 161 va_list args; 162 163 va_start(args, fmt); 164 vtn_log_err(b, NIR_SPIRV_DEBUG_LEVEL_WARNING, "SPIR-V WARNING:\n", 165 file, line, fmt, args); 166 va_end(args); 167} 168 169void 170_vtn_err(struct vtn_builder *b, const char *file, unsigned line, 171 const char *fmt, ...) 172{ 173 va_list args; 174 175 va_start(args, fmt); 176 vtn_log_err(b, NIR_SPIRV_DEBUG_LEVEL_ERROR, "SPIR-V ERROR:\n", 177 file, line, fmt, args); 178 va_end(args); 179} 180 181void 182_vtn_fail(struct vtn_builder *b, const char *file, unsigned line, 183 const char *fmt, ...) 184{ 185 va_list args; 186 187 va_start(args, fmt); 188 vtn_log_err(b, NIR_SPIRV_DEBUG_LEVEL_ERROR, "SPIR-V parsing FAILED:\n", 189 file, line, fmt, args); 190 va_end(args); 191 192 const char *dump_path = getenv("MESA_SPIRV_FAIL_DUMP_PATH"); 193 if (dump_path) 194 vtn_dump_shader(b, dump_path, "fail"); 195 196 vtn_longjmp(b->fail_jump, 1); 197} 198 199static struct vtn_ssa_value * 200vtn_undef_ssa_value(struct vtn_builder *b, const struct glsl_type *type) 201{ 202 struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); 203 val->type = glsl_get_bare_type(type); 204 205 if (glsl_type_is_vector_or_scalar(type)) { 206 unsigned num_components = glsl_get_vector_elements(val->type); 207 unsigned bit_size = glsl_get_bit_size(val->type); 208 val->def = nir_ssa_undef(&b->nb, num_components, bit_size); 209 } else { 210 unsigned elems = glsl_get_length(val->type); 211 val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); 212 if (glsl_type_is_array_or_matrix(type)) { 213 const struct glsl_type *elem_type = glsl_get_array_element(type); 214 for (unsigned i = 0; i < elems; i++) 215 val->elems[i] = vtn_undef_ssa_value(b, elem_type); 216 } else { 217 vtn_assert(glsl_type_is_struct_or_ifc(type)); 218 for (unsigned i = 0; i < elems; i++) { 219 const struct glsl_type *elem_type = glsl_get_struct_field(type, i); 220 val->elems[i] = vtn_undef_ssa_value(b, elem_type); 221 } 222 } 223 } 224 225 return val; 226} 227 228struct vtn_ssa_value * 229vtn_const_ssa_value(struct vtn_builder *b, nir_constant *constant, 230 const struct glsl_type *type) 231{ 232 struct hash_entry *entry = _mesa_hash_table_search(b->const_table, constant); 233 234 if (entry) 235 return entry->data; 236 237 struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); 238 val->type = glsl_get_bare_type(type); 239 240 if (glsl_type_is_vector_or_scalar(type)) { 241 unsigned num_components = glsl_get_vector_elements(val->type); 242 unsigned bit_size = glsl_get_bit_size(type); 243 nir_load_const_instr *load = 244 nir_load_const_instr_create(b->shader, num_components, bit_size); 245 246 memcpy(load->value, constant->values, 247 sizeof(nir_const_value) * num_components); 248 249 nir_instr_insert_before_cf_list(&b->nb.impl->body, &load->instr); 250 val->def = &load->def; 251 } else { 252 unsigned elems = glsl_get_length(val->type); 253 val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); 254 if (glsl_type_is_array_or_matrix(type)) { 255 const struct glsl_type *elem_type = glsl_get_array_element(type); 256 for (unsigned i = 0; i < elems; i++) { 257 val->elems[i] = vtn_const_ssa_value(b, constant->elements[i], 258 elem_type); 259 } 260 } else { 261 vtn_assert(glsl_type_is_struct_or_ifc(type)); 262 for (unsigned i = 0; i < elems; i++) { 263 const struct glsl_type *elem_type = glsl_get_struct_field(type, i); 264 val->elems[i] = vtn_const_ssa_value(b, constant->elements[i], 265 elem_type); 266 } 267 } 268 } 269 270 return val; 271} 272 273struct vtn_ssa_value * 274vtn_ssa_value(struct vtn_builder *b, uint32_t value_id) 275{ 276 struct vtn_value *val = vtn_untyped_value(b, value_id); 277 switch (val->value_type) { 278 case vtn_value_type_undef: 279 return vtn_undef_ssa_value(b, val->type->type); 280 281 case vtn_value_type_constant: 282 return vtn_const_ssa_value(b, val->constant, val->type->type); 283 284 case vtn_value_type_ssa: 285 return val->ssa; 286 287 case vtn_value_type_pointer: 288 vtn_assert(val->pointer->ptr_type && val->pointer->ptr_type->type); 289 struct vtn_ssa_value *ssa = 290 vtn_create_ssa_value(b, val->pointer->ptr_type->type); 291 ssa->def = vtn_pointer_to_ssa(b, val->pointer); 292 return ssa; 293 294 default: 295 vtn_fail("Invalid type for an SSA value"); 296 } 297} 298 299struct vtn_value * 300vtn_push_ssa_value(struct vtn_builder *b, uint32_t value_id, 301 struct vtn_ssa_value *ssa) 302{ 303 struct vtn_type *type = vtn_get_value_type(b, value_id); 304 305 /* See vtn_create_ssa_value */ 306 vtn_fail_if(ssa->type != glsl_get_bare_type(type->type), 307 "Type mismatch for SPIR-V SSA value"); 308 309 struct vtn_value *val; 310 if (type->base_type == vtn_base_type_pointer) { 311 val = vtn_push_pointer(b, value_id, vtn_pointer_from_ssa(b, ssa->def, type)); 312 } else { 313 /* Don't trip the value_type_ssa check in vtn_push_value */ 314 val = vtn_push_value(b, value_id, vtn_value_type_invalid); 315 val->value_type = vtn_value_type_ssa; 316 val->ssa = ssa; 317 } 318 319 return val; 320} 321 322nir_ssa_def * 323vtn_get_nir_ssa(struct vtn_builder *b, uint32_t value_id) 324{ 325 struct vtn_ssa_value *ssa = vtn_ssa_value(b, value_id); 326 vtn_fail_if(!glsl_type_is_vector_or_scalar(ssa->type), 327 "Expected a vector or scalar type"); 328 return ssa->def; 329} 330 331struct vtn_value * 332vtn_push_nir_ssa(struct vtn_builder *b, uint32_t value_id, nir_ssa_def *def) 333{ 334 /* Types for all SPIR-V SSA values are set as part of a pre-pass so the 335 * type will be valid by the time we get here. 336 */ 337 struct vtn_type *type = vtn_get_value_type(b, value_id); 338 vtn_fail_if(def->num_components != glsl_get_vector_elements(type->type) || 339 def->bit_size != glsl_get_bit_size(type->type), 340 "Mismatch between NIR and SPIR-V type."); 341 struct vtn_ssa_value *ssa = vtn_create_ssa_value(b, type->type); 342 ssa->def = def; 343 return vtn_push_ssa_value(b, value_id, ssa); 344} 345 346static enum gl_access_qualifier 347spirv_to_gl_access_qualifier(struct vtn_builder *b, 348 SpvAccessQualifier access_qualifier) 349{ 350 switch (access_qualifier) { 351 case SpvAccessQualifierReadOnly: 352 return ACCESS_NON_WRITEABLE; 353 case SpvAccessQualifierWriteOnly: 354 return ACCESS_NON_READABLE; 355 case SpvAccessQualifierReadWrite: 356 return 0; 357 default: 358 vtn_fail("Invalid image access qualifier"); 359 } 360} 361 362static nir_deref_instr * 363vtn_get_image(struct vtn_builder *b, uint32_t value_id, 364 enum gl_access_qualifier *access) 365{ 366 struct vtn_type *type = vtn_get_value_type(b, value_id); 367 vtn_assert(type->base_type == vtn_base_type_image); 368 if (access) 369 *access |= spirv_to_gl_access_qualifier(b, type->access_qualifier); 370 nir_variable_mode mode = glsl_type_is_image(type->glsl_image) ? 371 nir_var_image : nir_var_uniform; 372 return nir_build_deref_cast(&b->nb, vtn_get_nir_ssa(b, value_id), 373 mode, type->glsl_image, 0); 374} 375 376static void 377vtn_push_image(struct vtn_builder *b, uint32_t value_id, 378 nir_deref_instr *deref, bool propagate_non_uniform) 379{ 380 struct vtn_type *type = vtn_get_value_type(b, value_id); 381 vtn_assert(type->base_type == vtn_base_type_image); 382 struct vtn_value *value = vtn_push_nir_ssa(b, value_id, &deref->dest.ssa); 383 value->propagated_non_uniform = propagate_non_uniform; 384} 385 386static nir_deref_instr * 387vtn_get_sampler(struct vtn_builder *b, uint32_t value_id) 388{ 389 struct vtn_type *type = vtn_get_value_type(b, value_id); 390 vtn_assert(type->base_type == vtn_base_type_sampler); 391 return nir_build_deref_cast(&b->nb, vtn_get_nir_ssa(b, value_id), 392 nir_var_uniform, glsl_bare_sampler_type(), 0); 393} 394 395nir_ssa_def * 396vtn_sampled_image_to_nir_ssa(struct vtn_builder *b, 397 struct vtn_sampled_image si) 398{ 399 return nir_vec2(&b->nb, &si.image->dest.ssa, &si.sampler->dest.ssa); 400} 401 402static void 403vtn_push_sampled_image(struct vtn_builder *b, uint32_t value_id, 404 struct vtn_sampled_image si, bool propagate_non_uniform) 405{ 406 struct vtn_type *type = vtn_get_value_type(b, value_id); 407 vtn_assert(type->base_type == vtn_base_type_sampled_image); 408 struct vtn_value *value = vtn_push_nir_ssa(b, value_id, 409 vtn_sampled_image_to_nir_ssa(b, si)); 410 value->propagated_non_uniform = propagate_non_uniform; 411} 412 413static struct vtn_sampled_image 414vtn_get_sampled_image(struct vtn_builder *b, uint32_t value_id) 415{ 416 struct vtn_type *type = vtn_get_value_type(b, value_id); 417 vtn_assert(type->base_type == vtn_base_type_sampled_image); 418 nir_ssa_def *si_vec2 = vtn_get_nir_ssa(b, value_id); 419 420 /* Even though this is a sampled image, we can end up here with a storage 421 * image because OpenCL doesn't distinguish between the two. 422 */ 423 const struct glsl_type *image_type = type->image->glsl_image; 424 nir_variable_mode image_mode = glsl_type_is_image(image_type) ? 425 nir_var_image : nir_var_uniform; 426 427 struct vtn_sampled_image si = { NULL, }; 428 si.image = nir_build_deref_cast(&b->nb, nir_channel(&b->nb, si_vec2, 0), 429 image_mode, image_type, 0); 430 si.sampler = nir_build_deref_cast(&b->nb, nir_channel(&b->nb, si_vec2, 1), 431 nir_var_uniform, 432 glsl_bare_sampler_type(), 0); 433 return si; 434} 435 436const char * 437vtn_string_literal(struct vtn_builder *b, const uint32_t *words, 438 unsigned word_count, unsigned *words_used) 439{ 440 /* From the SPIR-V spec: 441 * 442 * "A string is interpreted as a nul-terminated stream of characters. 443 * The character set is Unicode in the UTF-8 encoding scheme. The UTF-8 444 * octets (8-bit bytes) are packed four per word, following the 445 * little-endian convention (i.e., the first octet is in the 446 * lowest-order 8 bits of the word). The final word contains the 447 * string’s nul-termination character (0), and all contents past the 448 * end of the string in the final word are padded with 0." 449 * 450 * On big-endian, we need to byte-swap. 451 */ 452#if UTIL_ARCH_BIG_ENDIAN 453 { 454 uint32_t *copy = ralloc_array(b, uint32_t, word_count); 455 for (unsigned i = 0; i < word_count; i++) 456 copy[i] = util_bswap32(words[i]); 457 words = copy; 458 } 459#endif 460 461 const char *str = (char *)words; 462 const char *end = memchr(str, 0, word_count * 4); 463 vtn_fail_if(end == NULL, "String is not null-terminated"); 464 465 if (words_used) 466 *words_used = DIV_ROUND_UP(end - str + 1, sizeof(*words)); 467 468 return str; 469} 470 471const uint32_t * 472vtn_foreach_instruction(struct vtn_builder *b, const uint32_t *start, 473 const uint32_t *end, vtn_instruction_handler handler) 474{ 475 b->file = NULL; 476 b->line = -1; 477 b->col = -1; 478 479 const uint32_t *w = start; 480 while (w < end) { 481 SpvOp opcode = w[0] & SpvOpCodeMask; 482 unsigned count = w[0] >> SpvWordCountShift; 483 vtn_assert(count >= 1 && w + count <= end); 484 485 b->spirv_offset = (uint8_t *)w - (uint8_t *)b->spirv; 486 487 switch (opcode) { 488 case SpvOpNop: 489 break; /* Do nothing */ 490 491 case SpvOpLine: 492 b->file = vtn_value(b, w[1], vtn_value_type_string)->str; 493 b->line = w[2]; 494 b->col = w[3]; 495 break; 496 497 case SpvOpNoLine: 498 b->file = NULL; 499 b->line = -1; 500 b->col = -1; 501 break; 502 503 default: 504 if (!handler(b, opcode, w, count)) 505 return w; 506 break; 507 } 508 509 w += count; 510 } 511 512 b->spirv_offset = 0; 513 b->file = NULL; 514 b->line = -1; 515 b->col = -1; 516 517 assert(w == end); 518 return w; 519} 520 521static bool 522vtn_handle_non_semantic_instruction(struct vtn_builder *b, SpvOp ext_opcode, 523 const uint32_t *w, unsigned count) 524{ 525 /* Do nothing. */ 526 return true; 527} 528 529static void 530vtn_handle_extension(struct vtn_builder *b, SpvOp opcode, 531 const uint32_t *w, unsigned count) 532{ 533 switch (opcode) { 534 case SpvOpExtInstImport: { 535 struct vtn_value *val = vtn_push_value(b, w[1], vtn_value_type_extension); 536 const char *ext = vtn_string_literal(b, &w[2], count - 2, NULL); 537 if (strcmp(ext, "GLSL.std.450") == 0) { 538 val->ext_handler = vtn_handle_glsl450_instruction; 539 } else if ((strcmp(ext, "SPV_AMD_gcn_shader") == 0) 540 && (b->options && b->options->caps.amd_gcn_shader)) { 541 val->ext_handler = vtn_handle_amd_gcn_shader_instruction; 542 } else if ((strcmp(ext, "SPV_AMD_shader_ballot") == 0) 543 && (b->options && b->options->caps.amd_shader_ballot)) { 544 val->ext_handler = vtn_handle_amd_shader_ballot_instruction; 545 } else if ((strcmp(ext, "SPV_AMD_shader_trinary_minmax") == 0) 546 && (b->options && b->options->caps.amd_trinary_minmax)) { 547 val->ext_handler = vtn_handle_amd_shader_trinary_minmax_instruction; 548 } else if ((strcmp(ext, "SPV_AMD_shader_explicit_vertex_parameter") == 0) 549 && (b->options && b->options->caps.amd_shader_explicit_vertex_parameter)) { 550 val->ext_handler = vtn_handle_amd_shader_explicit_vertex_parameter_instruction; 551 } else if (strcmp(ext, "OpenCL.std") == 0) { 552 val->ext_handler = vtn_handle_opencl_instruction; 553 } else if (strstr(ext, "NonSemantic.") == ext) { 554 val->ext_handler = vtn_handle_non_semantic_instruction; 555 } else { 556 vtn_fail("Unsupported extension: %s", ext); 557 } 558 break; 559 } 560 561 case SpvOpExtInst: { 562 struct vtn_value *val = vtn_value(b, w[3], vtn_value_type_extension); 563 bool handled = val->ext_handler(b, w[4], w, count); 564 vtn_assert(handled); 565 break; 566 } 567 568 default: 569 vtn_fail_with_opcode("Unhandled opcode", opcode); 570 } 571} 572 573static void 574_foreach_decoration_helper(struct vtn_builder *b, 575 struct vtn_value *base_value, 576 int parent_member, 577 struct vtn_value *value, 578 vtn_decoration_foreach_cb cb, void *data) 579{ 580 for (struct vtn_decoration *dec = value->decoration; dec; dec = dec->next) { 581 int member; 582 if (dec->scope == VTN_DEC_DECORATION) { 583 member = parent_member; 584 } else if (dec->scope >= VTN_DEC_STRUCT_MEMBER0) { 585 vtn_fail_if(value->value_type != vtn_value_type_type || 586 value->type->base_type != vtn_base_type_struct, 587 "OpMemberDecorate and OpGroupMemberDecorate are only " 588 "allowed on OpTypeStruct"); 589 /* This means we haven't recursed yet */ 590 assert(value == base_value); 591 592 member = dec->scope - VTN_DEC_STRUCT_MEMBER0; 593 594 vtn_fail_if(member >= base_value->type->length, 595 "OpMemberDecorate specifies member %d but the " 596 "OpTypeStruct has only %u members", 597 member, base_value->type->length); 598 } else { 599 /* Not a decoration */ 600 assert(dec->scope == VTN_DEC_EXECUTION_MODE || 601 dec->scope <= VTN_DEC_STRUCT_MEMBER_NAME0); 602 continue; 603 } 604 605 if (dec->group) { 606 assert(dec->group->value_type == vtn_value_type_decoration_group); 607 _foreach_decoration_helper(b, base_value, member, dec->group, 608 cb, data); 609 } else { 610 cb(b, base_value, member, dec, data); 611 } 612 } 613} 614 615/** Iterates (recursively if needed) over all of the decorations on a value 616 * 617 * This function iterates over all of the decorations applied to a given 618 * value. If it encounters a decoration group, it recurses into the group 619 * and iterates over all of those decorations as well. 620 */ 621void 622vtn_foreach_decoration(struct vtn_builder *b, struct vtn_value *value, 623 vtn_decoration_foreach_cb cb, void *data) 624{ 625 _foreach_decoration_helper(b, value, -1, value, cb, data); 626} 627 628void 629vtn_foreach_execution_mode(struct vtn_builder *b, struct vtn_value *value, 630 vtn_execution_mode_foreach_cb cb, void *data) 631{ 632 for (struct vtn_decoration *dec = value->decoration; dec; dec = dec->next) { 633 if (dec->scope != VTN_DEC_EXECUTION_MODE) 634 continue; 635 636 assert(dec->group == NULL); 637 cb(b, value, dec, data); 638 } 639} 640 641void 642vtn_handle_decoration(struct vtn_builder *b, SpvOp opcode, 643 const uint32_t *w, unsigned count) 644{ 645 const uint32_t *w_end = w + count; 646 const uint32_t target = w[1]; 647 w += 2; 648 649 switch (opcode) { 650 case SpvOpDecorationGroup: 651 vtn_push_value(b, target, vtn_value_type_decoration_group); 652 break; 653 654 case SpvOpDecorate: 655 case SpvOpDecorateId: 656 case SpvOpMemberDecorate: 657 case SpvOpDecorateString: 658 case SpvOpMemberDecorateString: 659 case SpvOpExecutionMode: 660 case SpvOpExecutionModeId: { 661 struct vtn_value *val = vtn_untyped_value(b, target); 662 663 struct vtn_decoration *dec = rzalloc(b, struct vtn_decoration); 664 switch (opcode) { 665 case SpvOpDecorate: 666 case SpvOpDecorateId: 667 case SpvOpDecorateString: 668 dec->scope = VTN_DEC_DECORATION; 669 break; 670 case SpvOpMemberDecorate: 671 case SpvOpMemberDecorateString: 672 dec->scope = VTN_DEC_STRUCT_MEMBER0 + *(w++); 673 vtn_fail_if(dec->scope < VTN_DEC_STRUCT_MEMBER0, /* overflow */ 674 "Member argument of OpMemberDecorate too large"); 675 break; 676 case SpvOpExecutionMode: 677 case SpvOpExecutionModeId: 678 dec->scope = VTN_DEC_EXECUTION_MODE; 679 break; 680 default: 681 unreachable("Invalid decoration opcode"); 682 } 683 dec->decoration = *(w++); 684 dec->num_operands = w_end - w; 685 dec->operands = w; 686 687 /* Link into the list */ 688 dec->next = val->decoration; 689 val->decoration = dec; 690 break; 691 } 692 693 case SpvOpMemberName: { 694 struct vtn_value *val = vtn_untyped_value(b, target); 695 struct vtn_decoration *dec = rzalloc(b, struct vtn_decoration); 696 697 dec->scope = VTN_DEC_STRUCT_MEMBER_NAME0 - *(w++); 698 699 dec->member_name = vtn_string_literal(b, w, w_end - w, NULL); 700 701 dec->next = val->decoration; 702 val->decoration = dec; 703 break; 704 } 705 706 case SpvOpGroupMemberDecorate: 707 case SpvOpGroupDecorate: { 708 struct vtn_value *group = 709 vtn_value(b, target, vtn_value_type_decoration_group); 710 711 for (; w < w_end; w++) { 712 struct vtn_value *val = vtn_untyped_value(b, *w); 713 struct vtn_decoration *dec = rzalloc(b, struct vtn_decoration); 714 715 dec->group = group; 716 if (opcode == SpvOpGroupDecorate) { 717 dec->scope = VTN_DEC_DECORATION; 718 } else { 719 dec->scope = VTN_DEC_STRUCT_MEMBER0 + *(++w); 720 vtn_fail_if(dec->scope < 0, /* Check for overflow */ 721 "Member argument of OpGroupMemberDecorate too large"); 722 } 723 724 /* Link into the list */ 725 dec->next = val->decoration; 726 val->decoration = dec; 727 } 728 break; 729 } 730 731 default: 732 unreachable("Unhandled opcode"); 733 } 734} 735 736struct member_decoration_ctx { 737 unsigned num_fields; 738 struct glsl_struct_field *fields; 739 struct vtn_type *type; 740}; 741 742/** 743 * Returns true if the given type contains a struct decorated Block or 744 * BufferBlock 745 */ 746bool 747vtn_type_contains_block(struct vtn_builder *b, struct vtn_type *type) 748{ 749 switch (type->base_type) { 750 case vtn_base_type_array: 751 return vtn_type_contains_block(b, type->array_element); 752 case vtn_base_type_struct: 753 if (type->block || type->buffer_block) 754 return true; 755 for (unsigned i = 0; i < type->length; i++) { 756 if (vtn_type_contains_block(b, type->members[i])) 757 return true; 758 } 759 return false; 760 default: 761 return false; 762 } 763} 764 765/** Returns true if two types are "compatible", i.e. you can do an OpLoad, 766 * OpStore, or OpCopyMemory between them without breaking anything. 767 * Technically, the SPIR-V rules require the exact same type ID but this lets 768 * us internally be a bit looser. 769 */ 770bool 771vtn_types_compatible(struct vtn_builder *b, 772 struct vtn_type *t1, struct vtn_type *t2) 773{ 774 if (t1->id == t2->id) 775 return true; 776 777 if (t1->base_type != t2->base_type) 778 return false; 779 780 switch (t1->base_type) { 781 case vtn_base_type_void: 782 case vtn_base_type_scalar: 783 case vtn_base_type_vector: 784 case vtn_base_type_matrix: 785 case vtn_base_type_image: 786 case vtn_base_type_sampler: 787 case vtn_base_type_sampled_image: 788 case vtn_base_type_event: 789 return t1->type == t2->type; 790 791 case vtn_base_type_array: 792 return t1->length == t2->length && 793 vtn_types_compatible(b, t1->array_element, t2->array_element); 794 795 case vtn_base_type_pointer: 796 return vtn_types_compatible(b, t1->deref, t2->deref); 797 798 case vtn_base_type_struct: 799 if (t1->length != t2->length) 800 return false; 801 802 for (unsigned i = 0; i < t1->length; i++) { 803 if (!vtn_types_compatible(b, t1->members[i], t2->members[i])) 804 return false; 805 } 806 return true; 807 808 case vtn_base_type_accel_struct: 809 case vtn_base_type_ray_query: 810 return true; 811 812 case vtn_base_type_function: 813 /* This case shouldn't get hit since you can't copy around function 814 * types. Just require them to be identical. 815 */ 816 return false; 817 } 818 819 vtn_fail("Invalid base type"); 820} 821 822struct vtn_type * 823vtn_type_without_array(struct vtn_type *type) 824{ 825 while (type->base_type == vtn_base_type_array) 826 type = type->array_element; 827 return type; 828} 829 830/* does a shallow copy of a vtn_type */ 831 832static struct vtn_type * 833vtn_type_copy(struct vtn_builder *b, struct vtn_type *src) 834{ 835 struct vtn_type *dest = ralloc(b, struct vtn_type); 836 *dest = *src; 837 838 switch (src->base_type) { 839 case vtn_base_type_void: 840 case vtn_base_type_scalar: 841 case vtn_base_type_vector: 842 case vtn_base_type_matrix: 843 case vtn_base_type_array: 844 case vtn_base_type_pointer: 845 case vtn_base_type_image: 846 case vtn_base_type_sampler: 847 case vtn_base_type_sampled_image: 848 case vtn_base_type_event: 849 case vtn_base_type_accel_struct: 850 case vtn_base_type_ray_query: 851 /* Nothing more to do */ 852 break; 853 854 case vtn_base_type_struct: 855 dest->members = ralloc_array(b, struct vtn_type *, src->length); 856 memcpy(dest->members, src->members, 857 src->length * sizeof(src->members[0])); 858 859 dest->offsets = ralloc_array(b, unsigned, src->length); 860 memcpy(dest->offsets, src->offsets, 861 src->length * sizeof(src->offsets[0])); 862 break; 863 864 case vtn_base_type_function: 865 dest->params = ralloc_array(b, struct vtn_type *, src->length); 866 memcpy(dest->params, src->params, src->length * sizeof(src->params[0])); 867 break; 868 } 869 870 return dest; 871} 872 873static bool 874vtn_type_needs_explicit_layout(struct vtn_builder *b, struct vtn_type *type, 875 enum vtn_variable_mode mode) 876{ 877 /* For OpenCL we never want to strip the info from the types, and it makes 878 * type comparisons easier in later stages. 879 */ 880 if (b->options->environment == NIR_SPIRV_OPENCL) 881 return true; 882 883 switch (mode) { 884 case vtn_variable_mode_input: 885 case vtn_variable_mode_output: 886 /* Layout decorations kept because we need offsets for XFB arrays of 887 * blocks. 888 */ 889 return b->shader->info.has_transform_feedback_varyings; 890 891 case vtn_variable_mode_ssbo: 892 case vtn_variable_mode_phys_ssbo: 893 case vtn_variable_mode_ubo: 894 case vtn_variable_mode_push_constant: 895 case vtn_variable_mode_shader_record: 896 return true; 897 898 case vtn_variable_mode_workgroup: 899 return b->options->caps.workgroup_memory_explicit_layout; 900 901 default: 902 return false; 903 } 904} 905 906const struct glsl_type * 907vtn_type_get_nir_type(struct vtn_builder *b, struct vtn_type *type, 908 enum vtn_variable_mode mode) 909{ 910 if (mode == vtn_variable_mode_atomic_counter) { 911 vtn_fail_if(glsl_without_array(type->type) != glsl_uint_type(), 912 "Variables in the AtomicCounter storage class should be " 913 "(possibly arrays of arrays of) uint."); 914 return glsl_type_wrap_in_arrays(glsl_atomic_uint_type(), type->type); 915 } 916 917 if (mode == vtn_variable_mode_uniform) { 918 switch (type->base_type) { 919 case vtn_base_type_array: { 920 const struct glsl_type *elem_type = 921 vtn_type_get_nir_type(b, type->array_element, mode); 922 923 return glsl_array_type(elem_type, type->length, 924 glsl_get_explicit_stride(type->type)); 925 } 926 927 case vtn_base_type_struct: { 928 bool need_new_struct = false; 929 const uint32_t num_fields = type->length; 930 NIR_VLA(struct glsl_struct_field, fields, num_fields); 931 for (unsigned i = 0; i < num_fields; i++) { 932 fields[i] = *glsl_get_struct_field_data(type->type, i); 933 const struct glsl_type *field_nir_type = 934 vtn_type_get_nir_type(b, type->members[i], mode); 935 if (fields[i].type != field_nir_type) { 936 fields[i].type = field_nir_type; 937 need_new_struct = true; 938 } 939 } 940 if (need_new_struct) { 941 if (glsl_type_is_interface(type->type)) { 942 return glsl_interface_type(fields, num_fields, 943 /* packing */ 0, false, 944 glsl_get_type_name(type->type)); 945 } else { 946 return glsl_struct_type(fields, num_fields, 947 glsl_get_type_name(type->type), 948 glsl_struct_type_is_packed(type->type)); 949 } 950 } else { 951 /* No changes, just pass it on */ 952 return type->type; 953 } 954 } 955 956 case vtn_base_type_image: 957 vtn_assert(glsl_type_is_texture(type->glsl_image)); 958 return type->glsl_image; 959 960 case vtn_base_type_sampler: 961 return glsl_bare_sampler_type(); 962 963 case vtn_base_type_sampled_image: 964 return glsl_texture_type_to_sampler(type->image->glsl_image, 965 false /* is_shadow */); 966 967 default: 968 return type->type; 969 } 970 } 971 972 if (mode == vtn_variable_mode_image) { 973 struct vtn_type *image_type = vtn_type_without_array(type); 974 vtn_assert(image_type->base_type == vtn_base_type_image); 975 return glsl_type_wrap_in_arrays(image_type->glsl_image, type->type); 976 } 977 978 /* Layout decorations are allowed but ignored in certain conditions, 979 * to allow SPIR-V generators perform type deduplication. Discard 980 * unnecessary ones when passing to NIR. 981 */ 982 if (!vtn_type_needs_explicit_layout(b, type, mode)) 983 return glsl_get_bare_type(type->type); 984 985 return type->type; 986} 987 988static struct vtn_type * 989mutable_matrix_member(struct vtn_builder *b, struct vtn_type *type, int member) 990{ 991 type->members[member] = vtn_type_copy(b, type->members[member]); 992 type = type->members[member]; 993 994 /* We may have an array of matrices.... Oh, joy! */ 995 while (glsl_type_is_array(type->type)) { 996 type->array_element = vtn_type_copy(b, type->array_element); 997 type = type->array_element; 998 } 999 1000 vtn_assert(glsl_type_is_matrix(type->type)); 1001 1002 return type; 1003} 1004 1005static void 1006vtn_handle_access_qualifier(struct vtn_builder *b, struct vtn_type *type, 1007 int member, enum gl_access_qualifier access) 1008{ 1009 type->members[member] = vtn_type_copy(b, type->members[member]); 1010 type = type->members[member]; 1011 1012 type->access |= access; 1013} 1014 1015static void 1016array_stride_decoration_cb(struct vtn_builder *b, 1017 struct vtn_value *val, int member, 1018 const struct vtn_decoration *dec, void *void_ctx) 1019{ 1020 struct vtn_type *type = val->type; 1021 1022 if (dec->decoration == SpvDecorationArrayStride) { 1023 if (vtn_type_contains_block(b, type)) { 1024 vtn_warn("The ArrayStride decoration cannot be applied to an array " 1025 "type which contains a structure type decorated Block " 1026 "or BufferBlock"); 1027 /* Ignore the decoration */ 1028 } else { 1029 vtn_fail_if(dec->operands[0] == 0, "ArrayStride must be non-zero"); 1030 type->stride = dec->operands[0]; 1031 } 1032 } 1033} 1034 1035static void 1036struct_member_decoration_cb(struct vtn_builder *b, 1037 UNUSED struct vtn_value *val, int member, 1038 const struct vtn_decoration *dec, void *void_ctx) 1039{ 1040 struct member_decoration_ctx *ctx = void_ctx; 1041 1042 if (member < 0) 1043 return; 1044 1045 assert(member < ctx->num_fields); 1046 1047 switch (dec->decoration) { 1048 case SpvDecorationRelaxedPrecision: 1049 case SpvDecorationUniform: 1050 case SpvDecorationUniformId: 1051 break; /* FIXME: Do nothing with this for now. */ 1052 case SpvDecorationNonWritable: 1053 vtn_handle_access_qualifier(b, ctx->type, member, ACCESS_NON_WRITEABLE); 1054 break; 1055 case SpvDecorationNonReadable: 1056 vtn_handle_access_qualifier(b, ctx->type, member, ACCESS_NON_READABLE); 1057 break; 1058 case SpvDecorationVolatile: 1059 vtn_handle_access_qualifier(b, ctx->type, member, ACCESS_VOLATILE); 1060 break; 1061 case SpvDecorationCoherent: 1062 vtn_handle_access_qualifier(b, ctx->type, member, ACCESS_COHERENT); 1063 break; 1064 case SpvDecorationNoPerspective: 1065 ctx->fields[member].interpolation = INTERP_MODE_NOPERSPECTIVE; 1066 break; 1067 case SpvDecorationFlat: 1068 ctx->fields[member].interpolation = INTERP_MODE_FLAT; 1069 break; 1070 case SpvDecorationExplicitInterpAMD: 1071 ctx->fields[member].interpolation = INTERP_MODE_EXPLICIT; 1072 break; 1073 case SpvDecorationCentroid: 1074 ctx->fields[member].centroid = true; 1075 break; 1076 case SpvDecorationSample: 1077 ctx->fields[member].sample = true; 1078 break; 1079 case SpvDecorationStream: 1080 /* This is handled later by var_decoration_cb in vtn_variables.c */ 1081 break; 1082 case SpvDecorationLocation: 1083 ctx->fields[member].location = dec->operands[0]; 1084 break; 1085 case SpvDecorationComponent: 1086 break; /* FIXME: What should we do with these? */ 1087 case SpvDecorationBuiltIn: 1088 ctx->type->members[member] = vtn_type_copy(b, ctx->type->members[member]); 1089 ctx->type->members[member]->is_builtin = true; 1090 ctx->type->members[member]->builtin = dec->operands[0]; 1091 ctx->type->builtin_block = true; 1092 break; 1093 case SpvDecorationOffset: 1094 ctx->type->offsets[member] = dec->operands[0]; 1095 ctx->fields[member].offset = dec->operands[0]; 1096 break; 1097 case SpvDecorationMatrixStride: 1098 /* Handled as a second pass */ 1099 break; 1100 case SpvDecorationColMajor: 1101 break; /* Nothing to do here. Column-major is the default. */ 1102 case SpvDecorationRowMajor: 1103 mutable_matrix_member(b, ctx->type, member)->row_major = true; 1104 break; 1105 1106 case SpvDecorationPatch: 1107 case SpvDecorationPerPrimitiveNV: 1108 case SpvDecorationPerTaskNV: 1109 case SpvDecorationPerViewNV: 1110 break; 1111 1112 case SpvDecorationSpecId: 1113 case SpvDecorationBlock: 1114 case SpvDecorationBufferBlock: 1115 case SpvDecorationArrayStride: 1116 case SpvDecorationGLSLShared: 1117 case SpvDecorationGLSLPacked: 1118 case SpvDecorationInvariant: 1119 case SpvDecorationAliased: 1120 case SpvDecorationConstant: 1121 case SpvDecorationIndex: 1122 case SpvDecorationBinding: 1123 case SpvDecorationDescriptorSet: 1124 case SpvDecorationLinkageAttributes: 1125 case SpvDecorationNoContraction: 1126 case SpvDecorationInputAttachmentIndex: 1127 case SpvDecorationCPacked: 1128 vtn_warn("Decoration not allowed on struct members: %s", 1129 spirv_decoration_to_string(dec->decoration)); 1130 break; 1131 1132 case SpvDecorationRestrict: 1133 /* While "Restrict" is invalid for struct members, glslang incorrectly 1134 * generates it and it ends up hiding actual driver issues in a wall of 1135 * spam from deqp-vk. Return it to the above block once the issue is 1136 * resolved. https://github.com/KhronosGroup/glslang/issues/703 1137 */ 1138 break; 1139 1140 case SpvDecorationXfbBuffer: 1141 case SpvDecorationXfbStride: 1142 /* This is handled later by var_decoration_cb in vtn_variables.c */ 1143 break; 1144 1145 case SpvDecorationSaturatedConversion: 1146 case SpvDecorationFuncParamAttr: 1147 case SpvDecorationFPRoundingMode: 1148 case SpvDecorationFPFastMathMode: 1149 case SpvDecorationAlignment: 1150 if (b->shader->info.stage != MESA_SHADER_KERNEL) { 1151 vtn_warn("Decoration only allowed for CL-style kernels: %s", 1152 spirv_decoration_to_string(dec->decoration)); 1153 } 1154 break; 1155 1156 case SpvDecorationUserSemantic: 1157 case SpvDecorationUserTypeGOOGLE: 1158 /* User semantic decorations can safely be ignored by the driver. */ 1159 break; 1160 1161 default: 1162 vtn_fail_with_decoration("Unhandled decoration", dec->decoration); 1163 } 1164} 1165 1166/** Chases the array type all the way down to the tail and rewrites the 1167 * glsl_types to be based off the tail's glsl_type. 1168 */ 1169static void 1170vtn_array_type_rewrite_glsl_type(struct vtn_type *type) 1171{ 1172 if (type->base_type != vtn_base_type_array) 1173 return; 1174 1175 vtn_array_type_rewrite_glsl_type(type->array_element); 1176 1177 type->type = glsl_array_type(type->array_element->type, 1178 type->length, type->stride); 1179} 1180 1181/* Matrix strides are handled as a separate pass because we need to know 1182 * whether the matrix is row-major or not first. 1183 */ 1184static void 1185struct_member_matrix_stride_cb(struct vtn_builder *b, 1186 UNUSED struct vtn_value *val, int member, 1187 const struct vtn_decoration *dec, 1188 void *void_ctx) 1189{ 1190 if (dec->decoration != SpvDecorationMatrixStride) 1191 return; 1192 1193 vtn_fail_if(member < 0, 1194 "The MatrixStride decoration is only allowed on members " 1195 "of OpTypeStruct"); 1196 vtn_fail_if(dec->operands[0] == 0, "MatrixStride must be non-zero"); 1197 1198 struct member_decoration_ctx *ctx = void_ctx; 1199 1200 struct vtn_type *mat_type = mutable_matrix_member(b, ctx->type, member); 1201 if (mat_type->row_major) { 1202 mat_type->array_element = vtn_type_copy(b, mat_type->array_element); 1203 mat_type->stride = mat_type->array_element->stride; 1204 mat_type->array_element->stride = dec->operands[0]; 1205 1206 mat_type->type = glsl_explicit_matrix_type(mat_type->type, 1207 dec->operands[0], true); 1208 mat_type->array_element->type = glsl_get_column_type(mat_type->type); 1209 } else { 1210 vtn_assert(mat_type->array_element->stride > 0); 1211 mat_type->stride = dec->operands[0]; 1212 1213 mat_type->type = glsl_explicit_matrix_type(mat_type->type, 1214 dec->operands[0], false); 1215 } 1216 1217 /* Now that we've replaced the glsl_type with a properly strided matrix 1218 * type, rewrite the member type so that it's an array of the proper kind 1219 * of glsl_type. 1220 */ 1221 vtn_array_type_rewrite_glsl_type(ctx->type->members[member]); 1222 ctx->fields[member].type = ctx->type->members[member]->type; 1223} 1224 1225static void 1226struct_packed_decoration_cb(struct vtn_builder *b, 1227 struct vtn_value *val, int member, 1228 const struct vtn_decoration *dec, void *void_ctx) 1229{ 1230 vtn_assert(val->type->base_type == vtn_base_type_struct); 1231 if (dec->decoration == SpvDecorationCPacked) { 1232 if (b->shader->info.stage != MESA_SHADER_KERNEL) { 1233 vtn_warn("Decoration only allowed for CL-style kernels: %s", 1234 spirv_decoration_to_string(dec->decoration)); 1235 } 1236 val->type->packed = true; 1237 } 1238} 1239 1240static void 1241struct_block_decoration_cb(struct vtn_builder *b, 1242 struct vtn_value *val, int member, 1243 const struct vtn_decoration *dec, void *ctx) 1244{ 1245 if (member != -1) 1246 return; 1247 1248 struct vtn_type *type = val->type; 1249 if (dec->decoration == SpvDecorationBlock) 1250 type->block = true; 1251 else if (dec->decoration == SpvDecorationBufferBlock) 1252 type->buffer_block = true; 1253} 1254 1255static void 1256type_decoration_cb(struct vtn_builder *b, 1257 struct vtn_value *val, int member, 1258 const struct vtn_decoration *dec, UNUSED void *ctx) 1259{ 1260 struct vtn_type *type = val->type; 1261 1262 if (member != -1) { 1263 /* This should have been handled by OpTypeStruct */ 1264 assert(val->type->base_type == vtn_base_type_struct); 1265 assert(member >= 0 && member < val->type->length); 1266 return; 1267 } 1268 1269 switch (dec->decoration) { 1270 case SpvDecorationArrayStride: 1271 vtn_assert(type->base_type == vtn_base_type_array || 1272 type->base_type == vtn_base_type_pointer); 1273 break; 1274 case SpvDecorationBlock: 1275 vtn_assert(type->base_type == vtn_base_type_struct); 1276 vtn_assert(type->block); 1277 break; 1278 case SpvDecorationBufferBlock: 1279 vtn_assert(type->base_type == vtn_base_type_struct); 1280 vtn_assert(type->buffer_block); 1281 break; 1282 case SpvDecorationGLSLShared: 1283 case SpvDecorationGLSLPacked: 1284 /* Ignore these, since we get explicit offsets anyways */ 1285 break; 1286 1287 case SpvDecorationRowMajor: 1288 case SpvDecorationColMajor: 1289 case SpvDecorationMatrixStride: 1290 case SpvDecorationBuiltIn: 1291 case SpvDecorationNoPerspective: 1292 case SpvDecorationFlat: 1293 case SpvDecorationPatch: 1294 case SpvDecorationCentroid: 1295 case SpvDecorationSample: 1296 case SpvDecorationExplicitInterpAMD: 1297 case SpvDecorationVolatile: 1298 case SpvDecorationCoherent: 1299 case SpvDecorationNonWritable: 1300 case SpvDecorationNonReadable: 1301 case SpvDecorationUniform: 1302 case SpvDecorationUniformId: 1303 case SpvDecorationLocation: 1304 case SpvDecorationComponent: 1305 case SpvDecorationOffset: 1306 case SpvDecorationXfbBuffer: 1307 case SpvDecorationXfbStride: 1308 case SpvDecorationUserSemantic: 1309 vtn_warn("Decoration only allowed for struct members: %s", 1310 spirv_decoration_to_string(dec->decoration)); 1311 break; 1312 1313 case SpvDecorationStream: 1314 /* We don't need to do anything here, as stream is filled up when 1315 * aplying the decoration to a variable, just check that if it is not a 1316 * struct member, it should be a struct. 1317 */ 1318 vtn_assert(type->base_type == vtn_base_type_struct); 1319 break; 1320 1321 case SpvDecorationRelaxedPrecision: 1322 case SpvDecorationSpecId: 1323 case SpvDecorationInvariant: 1324 case SpvDecorationRestrict: 1325 case SpvDecorationAliased: 1326 case SpvDecorationConstant: 1327 case SpvDecorationIndex: 1328 case SpvDecorationBinding: 1329 case SpvDecorationDescriptorSet: 1330 case SpvDecorationLinkageAttributes: 1331 case SpvDecorationNoContraction: 1332 case SpvDecorationInputAttachmentIndex: 1333 vtn_warn("Decoration not allowed on types: %s", 1334 spirv_decoration_to_string(dec->decoration)); 1335 break; 1336 1337 case SpvDecorationCPacked: 1338 /* Handled when parsing a struct type, nothing to do here. */ 1339 break; 1340 1341 case SpvDecorationSaturatedConversion: 1342 case SpvDecorationFuncParamAttr: 1343 case SpvDecorationFPRoundingMode: 1344 case SpvDecorationFPFastMathMode: 1345 case SpvDecorationAlignment: 1346 vtn_warn("Decoration only allowed for CL-style kernels: %s", 1347 spirv_decoration_to_string(dec->decoration)); 1348 break; 1349 1350 case SpvDecorationUserTypeGOOGLE: 1351 /* User semantic decorations can safely be ignored by the driver. */ 1352 break; 1353 1354 default: 1355 vtn_fail_with_decoration("Unhandled decoration", dec->decoration); 1356 } 1357} 1358 1359static unsigned 1360translate_image_format(struct vtn_builder *b, SpvImageFormat format) 1361{ 1362 switch (format) { 1363 case SpvImageFormatUnknown: return PIPE_FORMAT_NONE; 1364 case SpvImageFormatRgba32f: return PIPE_FORMAT_R32G32B32A32_FLOAT; 1365 case SpvImageFormatRgba16f: return PIPE_FORMAT_R16G16B16A16_FLOAT; 1366 case SpvImageFormatR32f: return PIPE_FORMAT_R32_FLOAT; 1367 case SpvImageFormatRgba8: return PIPE_FORMAT_R8G8B8A8_UNORM; 1368 case SpvImageFormatRgba8Snorm: return PIPE_FORMAT_R8G8B8A8_SNORM; 1369 case SpvImageFormatRg32f: return PIPE_FORMAT_R32G32_FLOAT; 1370 case SpvImageFormatRg16f: return PIPE_FORMAT_R16G16_FLOAT; 1371 case SpvImageFormatR11fG11fB10f: return PIPE_FORMAT_R11G11B10_FLOAT; 1372 case SpvImageFormatR16f: return PIPE_FORMAT_R16_FLOAT; 1373 case SpvImageFormatRgba16: return PIPE_FORMAT_R16G16B16A16_UNORM; 1374 case SpvImageFormatRgb10A2: return PIPE_FORMAT_R10G10B10A2_UNORM; 1375 case SpvImageFormatRg16: return PIPE_FORMAT_R16G16_UNORM; 1376 case SpvImageFormatRg8: return PIPE_FORMAT_R8G8_UNORM; 1377 case SpvImageFormatR16: return PIPE_FORMAT_R16_UNORM; 1378 case SpvImageFormatR8: return PIPE_FORMAT_R8_UNORM; 1379 case SpvImageFormatRgba16Snorm: return PIPE_FORMAT_R16G16B16A16_SNORM; 1380 case SpvImageFormatRg16Snorm: return PIPE_FORMAT_R16G16_SNORM; 1381 case SpvImageFormatRg8Snorm: return PIPE_FORMAT_R8G8_SNORM; 1382 case SpvImageFormatR16Snorm: return PIPE_FORMAT_R16_SNORM; 1383 case SpvImageFormatR8Snorm: return PIPE_FORMAT_R8_SNORM; 1384 case SpvImageFormatRgba32i: return PIPE_FORMAT_R32G32B32A32_SINT; 1385 case SpvImageFormatRgba16i: return PIPE_FORMAT_R16G16B16A16_SINT; 1386 case SpvImageFormatRgba8i: return PIPE_FORMAT_R8G8B8A8_SINT; 1387 case SpvImageFormatR32i: return PIPE_FORMAT_R32_SINT; 1388 case SpvImageFormatRg32i: return PIPE_FORMAT_R32G32_SINT; 1389 case SpvImageFormatRg16i: return PIPE_FORMAT_R16G16_SINT; 1390 case SpvImageFormatRg8i: return PIPE_FORMAT_R8G8_SINT; 1391 case SpvImageFormatR16i: return PIPE_FORMAT_R16_SINT; 1392 case SpvImageFormatR8i: return PIPE_FORMAT_R8_SINT; 1393 case SpvImageFormatRgba32ui: return PIPE_FORMAT_R32G32B32A32_UINT; 1394 case SpvImageFormatRgba16ui: return PIPE_FORMAT_R16G16B16A16_UINT; 1395 case SpvImageFormatRgba8ui: return PIPE_FORMAT_R8G8B8A8_UINT; 1396 case SpvImageFormatR32ui: return PIPE_FORMAT_R32_UINT; 1397 case SpvImageFormatRgb10a2ui: return PIPE_FORMAT_R10G10B10A2_UINT; 1398 case SpvImageFormatRg32ui: return PIPE_FORMAT_R32G32_UINT; 1399 case SpvImageFormatRg16ui: return PIPE_FORMAT_R16G16_UINT; 1400 case SpvImageFormatRg8ui: return PIPE_FORMAT_R8G8_UINT; 1401 case SpvImageFormatR16ui: return PIPE_FORMAT_R16_UINT; 1402 case SpvImageFormatR8ui: return PIPE_FORMAT_R8_UINT; 1403 case SpvImageFormatR64ui: return PIPE_FORMAT_R64_UINT; 1404 case SpvImageFormatR64i: return PIPE_FORMAT_R64_SINT; 1405 default: 1406 vtn_fail("Invalid image format: %s (%u)", 1407 spirv_imageformat_to_string(format), format); 1408 } 1409} 1410 1411static void 1412vtn_handle_type(struct vtn_builder *b, SpvOp opcode, 1413 const uint32_t *w, unsigned count) 1414{ 1415 struct vtn_value *val = NULL; 1416 1417 /* In order to properly handle forward declarations, we have to defer 1418 * allocation for pointer types. 1419 */ 1420 if (opcode != SpvOpTypePointer && opcode != SpvOpTypeForwardPointer) { 1421 val = vtn_push_value(b, w[1], vtn_value_type_type); 1422 vtn_fail_if(val->type != NULL, 1423 "Only pointers can have forward declarations"); 1424 val->type = rzalloc(b, struct vtn_type); 1425 val->type->id = w[1]; 1426 } 1427 1428 switch (opcode) { 1429 case SpvOpTypeVoid: 1430 val->type->base_type = vtn_base_type_void; 1431 val->type->type = glsl_void_type(); 1432 break; 1433 case SpvOpTypeBool: 1434 val->type->base_type = vtn_base_type_scalar; 1435 val->type->type = glsl_bool_type(); 1436 val->type->length = 1; 1437 break; 1438 case SpvOpTypeInt: { 1439 int bit_size = w[2]; 1440 const bool signedness = w[3]; 1441 vtn_fail_if(bit_size != 8 && bit_size != 16 && 1442 bit_size != 32 && bit_size != 64, 1443 "Invalid int bit size: %u", bit_size); 1444 val->type->base_type = vtn_base_type_scalar; 1445 val->type->type = signedness ? glsl_intN_t_type(bit_size) : 1446 glsl_uintN_t_type(bit_size); 1447 val->type->length = 1; 1448 break; 1449 } 1450 1451 case SpvOpTypeFloat: { 1452 int bit_size = w[2]; 1453 val->type->base_type = vtn_base_type_scalar; 1454 vtn_fail_if(bit_size != 16 && bit_size != 32 && bit_size != 64, 1455 "Invalid float bit size: %u", bit_size); 1456 val->type->type = glsl_floatN_t_type(bit_size); 1457 val->type->length = 1; 1458 break; 1459 } 1460 1461 case SpvOpTypeVector: { 1462 struct vtn_type *base = vtn_get_type(b, w[2]); 1463 unsigned elems = w[3]; 1464 1465 vtn_fail_if(base->base_type != vtn_base_type_scalar, 1466 "Base type for OpTypeVector must be a scalar"); 1467 vtn_fail_if((elems < 2 || elems > 4) && (elems != 8) && (elems != 16), 1468 "Invalid component count for OpTypeVector"); 1469 1470 val->type->base_type = vtn_base_type_vector; 1471 val->type->type = glsl_vector_type(glsl_get_base_type(base->type), elems); 1472 val->type->length = elems; 1473 val->type->stride = glsl_type_is_boolean(val->type->type) 1474 ? 4 : glsl_get_bit_size(base->type) / 8; 1475 val->type->array_element = base; 1476 break; 1477 } 1478 1479 case SpvOpTypeMatrix: { 1480 struct vtn_type *base = vtn_get_type(b, w[2]); 1481 unsigned columns = w[3]; 1482 1483 vtn_fail_if(base->base_type != vtn_base_type_vector, 1484 "Base type for OpTypeMatrix must be a vector"); 1485 vtn_fail_if(columns < 2 || columns > 4, 1486 "Invalid column count for OpTypeMatrix"); 1487 1488 val->type->base_type = vtn_base_type_matrix; 1489 val->type->type = glsl_matrix_type(glsl_get_base_type(base->type), 1490 glsl_get_vector_elements(base->type), 1491 columns); 1492 vtn_fail_if(glsl_type_is_error(val->type->type), 1493 "Unsupported base type for OpTypeMatrix"); 1494 assert(!glsl_type_is_error(val->type->type)); 1495 val->type->length = columns; 1496 val->type->array_element = base; 1497 val->type->row_major = false; 1498 val->type->stride = 0; 1499 break; 1500 } 1501 1502 case SpvOpTypeRuntimeArray: 1503 case SpvOpTypeArray: { 1504 struct vtn_type *array_element = vtn_get_type(b, w[2]); 1505 1506 if (opcode == SpvOpTypeRuntimeArray) { 1507 /* A length of 0 is used to denote unsized arrays */ 1508 val->type->length = 0; 1509 } else { 1510 val->type->length = vtn_constant_uint(b, w[3]); 1511 } 1512 1513 val->type->base_type = vtn_base_type_array; 1514 val->type->array_element = array_element; 1515 1516 vtn_foreach_decoration(b, val, array_stride_decoration_cb, NULL); 1517 val->type->type = glsl_array_type(array_element->type, val->type->length, 1518 val->type->stride); 1519 break; 1520 } 1521 1522 case SpvOpTypeStruct: { 1523 unsigned num_fields = count - 2; 1524 val->type->base_type = vtn_base_type_struct; 1525 val->type->length = num_fields; 1526 val->type->members = ralloc_array(b, struct vtn_type *, num_fields); 1527 val->type->offsets = ralloc_array(b, unsigned, num_fields); 1528 val->type->packed = false; 1529 1530 NIR_VLA(struct glsl_struct_field, fields, count); 1531 for (unsigned i = 0; i < num_fields; i++) { 1532 val->type->members[i] = vtn_get_type(b, w[i + 2]); 1533 const char *name = NULL; 1534 for (struct vtn_decoration *dec = val->decoration; dec; dec = dec->next) { 1535 if (dec->scope == VTN_DEC_STRUCT_MEMBER_NAME0 - i) { 1536 name = dec->member_name; 1537 break; 1538 } 1539 } 1540 if (!name) 1541 name = ralloc_asprintf(b, "field%d", i); 1542 1543 fields[i] = (struct glsl_struct_field) { 1544 .type = val->type->members[i]->type, 1545 .name = name, 1546 .location = -1, 1547 .offset = -1, 1548 }; 1549 } 1550 1551 vtn_foreach_decoration(b, val, struct_packed_decoration_cb, NULL); 1552 1553 struct member_decoration_ctx ctx = { 1554 .num_fields = num_fields, 1555 .fields = fields, 1556 .type = val->type 1557 }; 1558 1559 vtn_foreach_decoration(b, val, struct_member_decoration_cb, &ctx); 1560 1561 /* Propagate access specifiers that are present on all members to the overall type */ 1562 enum gl_access_qualifier overall_access = ACCESS_COHERENT | ACCESS_VOLATILE | 1563 ACCESS_NON_READABLE | ACCESS_NON_WRITEABLE; 1564 for (unsigned i = 0; i < num_fields; ++i) 1565 overall_access &= val->type->members[i]->access; 1566 val->type->access = overall_access; 1567 1568 vtn_foreach_decoration(b, val, struct_member_matrix_stride_cb, &ctx); 1569 1570 vtn_foreach_decoration(b, val, struct_block_decoration_cb, NULL); 1571 1572 const char *name = val->name; 1573 1574 if (val->type->block || val->type->buffer_block) { 1575 /* Packing will be ignored since types coming from SPIR-V are 1576 * explicitly laid out. 1577 */ 1578 val->type->type = glsl_interface_type(fields, num_fields, 1579 /* packing */ 0, false, 1580 name ? name : "block"); 1581 } else { 1582 val->type->type = glsl_struct_type(fields, num_fields, 1583 name ? name : "struct", 1584 val->type->packed); 1585 } 1586 break; 1587 } 1588 1589 case SpvOpTypeFunction: { 1590 val->type->base_type = vtn_base_type_function; 1591 val->type->type = NULL; 1592 1593 val->type->return_type = vtn_get_type(b, w[2]); 1594 1595 const unsigned num_params = count - 3; 1596 val->type->length = num_params; 1597 val->type->params = ralloc_array(b, struct vtn_type *, num_params); 1598 for (unsigned i = 0; i < count - 3; i++) { 1599 val->type->params[i] = vtn_get_type(b, w[i + 3]); 1600 } 1601 break; 1602 } 1603 1604 case SpvOpTypePointer: 1605 case SpvOpTypeForwardPointer: { 1606 /* We can't blindly push the value because it might be a forward 1607 * declaration. 1608 */ 1609 val = vtn_untyped_value(b, w[1]); 1610 1611 SpvStorageClass storage_class = w[2]; 1612 1613 vtn_fail_if(opcode == SpvOpTypeForwardPointer && 1614 b->shader->info.stage != MESA_SHADER_KERNEL && 1615 storage_class != SpvStorageClassPhysicalStorageBuffer, 1616 "OpTypeForwardPointer is only allowed in Vulkan with " 1617 "the PhysicalStorageBuffer storage class"); 1618 1619 struct vtn_type *deref_type = NULL; 1620 if (opcode == SpvOpTypePointer) 1621 deref_type = vtn_get_type(b, w[3]); 1622 1623 bool has_forward_pointer = false; 1624 if (val->value_type == vtn_value_type_invalid) { 1625 val->value_type = vtn_value_type_type; 1626 val->type = rzalloc(b, struct vtn_type); 1627 val->type->id = w[1]; 1628 val->type->base_type = vtn_base_type_pointer; 1629 val->type->storage_class = storage_class; 1630 1631 /* These can actually be stored to nir_variables and used as SSA 1632 * values so they need a real glsl_type. 1633 */ 1634 enum vtn_variable_mode mode = vtn_storage_class_to_mode( 1635 b, storage_class, deref_type, NULL); 1636 1637 /* The deref type should only matter for the UniformConstant storage 1638 * class. In particular, it should never matter for any storage 1639 * classes that are allowed in combination with OpTypeForwardPointer. 1640 */ 1641 if (storage_class != SpvStorageClassUniform && 1642 storage_class != SpvStorageClassUniformConstant) { 1643 assert(mode == vtn_storage_class_to_mode(b, storage_class, 1644 NULL, NULL)); 1645 } 1646 1647 val->type->type = nir_address_format_to_glsl_type( 1648 vtn_mode_to_address_format(b, mode)); 1649 } else { 1650 vtn_fail_if(val->type->storage_class != storage_class, 1651 "The storage classes of an OpTypePointer and any " 1652 "OpTypeForwardPointers that provide forward " 1653 "declarations of it must match."); 1654 has_forward_pointer = true; 1655 } 1656 1657 if (opcode == SpvOpTypePointer) { 1658 vtn_fail_if(val->type->deref != NULL, 1659 "While OpTypeForwardPointer can be used to provide a " 1660 "forward declaration of a pointer, OpTypePointer can " 1661 "only be used once for a given id."); 1662 1663 vtn_fail_if(has_forward_pointer && 1664 deref_type->base_type != vtn_base_type_struct, 1665 "An OpTypePointer instruction must declare " 1666 "Pointer Type to be a pointer to an OpTypeStruct."); 1667 1668 val->type->deref = deref_type; 1669 1670 /* Only certain storage classes use ArrayStride. */ 1671 switch (storage_class) { 1672 case SpvStorageClassWorkgroup: 1673 if (!b->options->caps.workgroup_memory_explicit_layout) 1674 break; 1675 FALLTHROUGH; 1676 1677 case SpvStorageClassUniform: 1678 case SpvStorageClassPushConstant: 1679 case SpvStorageClassStorageBuffer: 1680 case SpvStorageClassPhysicalStorageBuffer: 1681 vtn_foreach_decoration(b, val, array_stride_decoration_cb, NULL); 1682 break; 1683 1684 default: 1685 /* Nothing to do. */ 1686 break; 1687 } 1688 } 1689 break; 1690 } 1691 1692 case SpvOpTypeImage: { 1693 val->type->base_type = vtn_base_type_image; 1694 1695 /* Images are represented in NIR as a scalar SSA value that is the 1696 * result of a deref instruction. An OpLoad on an OpTypeImage pointer 1697 * from UniformConstant memory just takes the NIR deref from the pointer 1698 * and turns it into an SSA value. 1699 */ 1700 val->type->type = nir_address_format_to_glsl_type( 1701 vtn_mode_to_address_format(b, vtn_variable_mode_function)); 1702 1703 const struct vtn_type *sampled_type = vtn_get_type(b, w[2]); 1704 if (b->shader->info.stage == MESA_SHADER_KERNEL) { 1705 vtn_fail_if(sampled_type->base_type != vtn_base_type_void, 1706 "Sampled type of OpTypeImage must be void for kernels"); 1707 } else { 1708 vtn_fail_if(sampled_type->base_type != vtn_base_type_scalar, 1709 "Sampled type of OpTypeImage must be a scalar"); 1710 if (b->options->caps.image_atomic_int64) { 1711 vtn_fail_if(glsl_get_bit_size(sampled_type->type) != 32 && 1712 glsl_get_bit_size(sampled_type->type) != 64, 1713 "Sampled type of OpTypeImage must be a 32 or 64-bit " 1714 "scalar"); 1715 } else { 1716 vtn_fail_if(glsl_get_bit_size(sampled_type->type) != 32, 1717 "Sampled type of OpTypeImage must be a 32-bit scalar"); 1718 } 1719 } 1720 1721 enum glsl_sampler_dim dim; 1722 switch ((SpvDim)w[3]) { 1723 case SpvDim1D: dim = GLSL_SAMPLER_DIM_1D; break; 1724 case SpvDim2D: dim = GLSL_SAMPLER_DIM_2D; break; 1725 case SpvDim3D: dim = GLSL_SAMPLER_DIM_3D; break; 1726 case SpvDimCube: dim = GLSL_SAMPLER_DIM_CUBE; break; 1727 case SpvDimRect: dim = GLSL_SAMPLER_DIM_RECT; break; 1728 case SpvDimBuffer: dim = GLSL_SAMPLER_DIM_BUF; break; 1729 case SpvDimSubpassData: dim = GLSL_SAMPLER_DIM_SUBPASS; break; 1730 default: 1731 vtn_fail("Invalid SPIR-V image dimensionality: %s (%u)", 1732 spirv_dim_to_string((SpvDim)w[3]), w[3]); 1733 } 1734 1735 /* w[4]: as per Vulkan spec "Validation Rules within a Module", 1736 * The “Depth” operand of OpTypeImage is ignored. 1737 */ 1738 bool is_array = w[5]; 1739 bool multisampled = w[6]; 1740 unsigned sampled = w[7]; 1741 SpvImageFormat format = w[8]; 1742 1743 if (count > 9) 1744 val->type->access_qualifier = w[9]; 1745 else if (b->shader->info.stage == MESA_SHADER_KERNEL) 1746 /* Per the CL C spec: If no qualifier is provided, read_only is assumed. */ 1747 val->type->access_qualifier = SpvAccessQualifierReadOnly; 1748 else 1749 val->type->access_qualifier = SpvAccessQualifierReadWrite; 1750 1751 if (multisampled) { 1752 if (dim == GLSL_SAMPLER_DIM_2D) 1753 dim = GLSL_SAMPLER_DIM_MS; 1754 else if (dim == GLSL_SAMPLER_DIM_SUBPASS) 1755 dim = GLSL_SAMPLER_DIM_SUBPASS_MS; 1756 else 1757 vtn_fail("Unsupported multisampled image type"); 1758 } 1759 1760 val->type->image_format = translate_image_format(b, format); 1761 1762 enum glsl_base_type sampled_base_type = 1763 glsl_get_base_type(sampled_type->type); 1764 if (sampled == 1) { 1765 val->type->glsl_image = glsl_texture_type(dim, is_array, 1766 sampled_base_type); 1767 } else if (sampled == 2) { 1768 val->type->glsl_image = glsl_image_type(dim, is_array, 1769 sampled_base_type); 1770 } else if (b->shader->info.stage == MESA_SHADER_KERNEL) { 1771 val->type->glsl_image = glsl_image_type(dim, is_array, 1772 GLSL_TYPE_VOID); 1773 } else { 1774 vtn_fail("We need to know if the image will be sampled"); 1775 } 1776 break; 1777 } 1778 1779 case SpvOpTypeSampledImage: { 1780 val->type->base_type = vtn_base_type_sampled_image; 1781 val->type->image = vtn_get_type(b, w[2]); 1782 1783 /* Sampled images are represented NIR as a vec2 SSA value where each 1784 * component is the result of a deref instruction. The first component 1785 * is the image and the second is the sampler. An OpLoad on an 1786 * OpTypeSampledImage pointer from UniformConstant memory just takes 1787 * the NIR deref from the pointer and duplicates it to both vector 1788 * components. 1789 */ 1790 nir_address_format addr_format = 1791 vtn_mode_to_address_format(b, vtn_variable_mode_function); 1792 assert(nir_address_format_num_components(addr_format) == 1); 1793 unsigned bit_size = nir_address_format_bit_size(addr_format); 1794 assert(bit_size == 32 || bit_size == 64); 1795 1796 enum glsl_base_type base_type = 1797 bit_size == 32 ? GLSL_TYPE_UINT : GLSL_TYPE_UINT64; 1798 val->type->type = glsl_vector_type(base_type, 2); 1799 break; 1800 } 1801 1802 case SpvOpTypeSampler: 1803 val->type->base_type = vtn_base_type_sampler; 1804 1805 /* Samplers are represented in NIR as a scalar SSA value that is the 1806 * result of a deref instruction. An OpLoad on an OpTypeSampler pointer 1807 * from UniformConstant memory just takes the NIR deref from the pointer 1808 * and turns it into an SSA value. 1809 */ 1810 val->type->type = nir_address_format_to_glsl_type( 1811 vtn_mode_to_address_format(b, vtn_variable_mode_function)); 1812 break; 1813 1814 case SpvOpTypeAccelerationStructureKHR: 1815 val->type->base_type = vtn_base_type_accel_struct; 1816 val->type->type = glsl_uint64_t_type(); 1817 break; 1818 1819 1820 case SpvOpTypeOpaque: { 1821 val->type->base_type = vtn_base_type_struct; 1822 const char *name = vtn_string_literal(b, &w[2], count - 2, NULL); 1823 val->type->type = glsl_struct_type(NULL, 0, name, false); 1824 break; 1825 } 1826 1827 case SpvOpTypeRayQueryKHR: { 1828 val->type->base_type = vtn_base_type_ray_query; 1829 val->type->type = glsl_uint64_t_type(); 1830 /* We may need to run queries on helper invocations. Here the parser 1831 * doesn't go through a deeper analysis on whether the result of a query 1832 * will be used in derivative instructions. 1833 * 1834 * An implementation willing to optimize this would look through the IR 1835 * and check if any derivative instruction uses the result of a query 1836 * and drop this flag if not. 1837 */ 1838 if (b->shader->info.stage == MESA_SHADER_FRAGMENT) 1839 val->type->access = ACCESS_INCLUDE_HELPERS; 1840 break; 1841 } 1842 1843 case SpvOpTypeEvent: 1844 val->type->base_type = vtn_base_type_event; 1845 val->type->type = glsl_int_type(); 1846 break; 1847 1848 case SpvOpTypeDeviceEvent: 1849 case SpvOpTypeReserveId: 1850 case SpvOpTypeQueue: 1851 case SpvOpTypePipe: 1852 default: 1853 vtn_fail_with_opcode("Unhandled opcode", opcode); 1854 } 1855 1856 vtn_foreach_decoration(b, val, type_decoration_cb, NULL); 1857 1858 if (val->type->base_type == vtn_base_type_struct && 1859 (val->type->block || val->type->buffer_block)) { 1860 for (unsigned i = 0; i < val->type->length; i++) { 1861 vtn_fail_if(vtn_type_contains_block(b, val->type->members[i]), 1862 "Block and BufferBlock decorations cannot decorate a " 1863 "structure type that is nested at any level inside " 1864 "another structure type decorated with Block or " 1865 "BufferBlock."); 1866 } 1867 } 1868} 1869 1870static nir_constant * 1871vtn_null_constant(struct vtn_builder *b, struct vtn_type *type) 1872{ 1873 nir_constant *c = rzalloc(b, nir_constant); 1874 1875 switch (type->base_type) { 1876 case vtn_base_type_scalar: 1877 case vtn_base_type_vector: 1878 /* Nothing to do here. It's already initialized to zero */ 1879 break; 1880 1881 case vtn_base_type_pointer: { 1882 enum vtn_variable_mode mode = vtn_storage_class_to_mode( 1883 b, type->storage_class, type->deref, NULL); 1884 nir_address_format addr_format = vtn_mode_to_address_format(b, mode); 1885 1886 const nir_const_value *null_value = nir_address_format_null_value(addr_format); 1887 memcpy(c->values, null_value, 1888 sizeof(nir_const_value) * nir_address_format_num_components(addr_format)); 1889 break; 1890 } 1891 1892 case vtn_base_type_void: 1893 case vtn_base_type_image: 1894 case vtn_base_type_sampler: 1895 case vtn_base_type_sampled_image: 1896 case vtn_base_type_function: 1897 case vtn_base_type_event: 1898 /* For those we have to return something but it doesn't matter what. */ 1899 break; 1900 1901 case vtn_base_type_matrix: 1902 case vtn_base_type_array: 1903 vtn_assert(type->length > 0); 1904 c->num_elements = type->length; 1905 c->elements = ralloc_array(b, nir_constant *, c->num_elements); 1906 1907 c->elements[0] = vtn_null_constant(b, type->array_element); 1908 for (unsigned i = 1; i < c->num_elements; i++) 1909 c->elements[i] = c->elements[0]; 1910 break; 1911 1912 case vtn_base_type_struct: 1913 c->num_elements = type->length; 1914 c->elements = ralloc_array(b, nir_constant *, c->num_elements); 1915 for (unsigned i = 0; i < c->num_elements; i++) 1916 c->elements[i] = vtn_null_constant(b, type->members[i]); 1917 break; 1918 1919 default: 1920 vtn_fail("Invalid type for null constant"); 1921 } 1922 1923 return c; 1924} 1925 1926static void 1927spec_constant_decoration_cb(struct vtn_builder *b, UNUSED struct vtn_value *val, 1928 ASSERTED int member, 1929 const struct vtn_decoration *dec, void *data) 1930{ 1931 vtn_assert(member == -1); 1932 if (dec->decoration != SpvDecorationSpecId) 1933 return; 1934 1935 nir_const_value *value = data; 1936 for (unsigned i = 0; i < b->num_specializations; i++) { 1937 if (b->specializations[i].id == dec->operands[0]) { 1938 *value = b->specializations[i].value; 1939 return; 1940 } 1941 } 1942} 1943 1944static void 1945handle_workgroup_size_decoration_cb(struct vtn_builder *b, 1946 struct vtn_value *val, 1947 ASSERTED int member, 1948 const struct vtn_decoration *dec, 1949 UNUSED void *data) 1950{ 1951 vtn_assert(member == -1); 1952 if (dec->decoration != SpvDecorationBuiltIn || 1953 dec->operands[0] != SpvBuiltInWorkgroupSize) 1954 return; 1955 1956 vtn_assert(val->type->type == glsl_vector_type(GLSL_TYPE_UINT, 3)); 1957 b->workgroup_size_builtin = val; 1958} 1959 1960static void 1961vtn_handle_constant(struct vtn_builder *b, SpvOp opcode, 1962 const uint32_t *w, unsigned count) 1963{ 1964 struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_constant); 1965 val->constant = rzalloc(b, nir_constant); 1966 switch (opcode) { 1967 case SpvOpConstantTrue: 1968 case SpvOpConstantFalse: 1969 case SpvOpSpecConstantTrue: 1970 case SpvOpSpecConstantFalse: { 1971 vtn_fail_if(val->type->type != glsl_bool_type(), 1972 "Result type of %s must be OpTypeBool", 1973 spirv_op_to_string(opcode)); 1974 1975 bool bval = (opcode == SpvOpConstantTrue || 1976 opcode == SpvOpSpecConstantTrue); 1977 1978 nir_const_value u32val = nir_const_value_for_uint(bval, 32); 1979 1980 if (opcode == SpvOpSpecConstantTrue || 1981 opcode == SpvOpSpecConstantFalse) 1982 vtn_foreach_decoration(b, val, spec_constant_decoration_cb, &u32val); 1983 1984 val->constant->values[0].b = u32val.u32 != 0; 1985 break; 1986 } 1987 1988 case SpvOpConstant: 1989 case SpvOpSpecConstant: { 1990 vtn_fail_if(val->type->base_type != vtn_base_type_scalar, 1991 "Result type of %s must be a scalar", 1992 spirv_op_to_string(opcode)); 1993 int bit_size = glsl_get_bit_size(val->type->type); 1994 switch (bit_size) { 1995 case 64: 1996 val->constant->values[0].u64 = vtn_u64_literal(&w[3]); 1997 break; 1998 case 32: 1999 val->constant->values[0].u32 = w[3]; 2000 break; 2001 case 16: 2002 val->constant->values[0].u16 = w[3]; 2003 break; 2004 case 8: 2005 val->constant->values[0].u8 = w[3]; 2006 break; 2007 default: 2008 vtn_fail("Unsupported SpvOpConstant bit size: %u", bit_size); 2009 } 2010 2011 if (opcode == SpvOpSpecConstant) 2012 vtn_foreach_decoration(b, val, spec_constant_decoration_cb, 2013 &val->constant->values[0]); 2014 break; 2015 } 2016 2017 case SpvOpSpecConstantComposite: 2018 case SpvOpConstantComposite: { 2019 unsigned elem_count = count - 3; 2020 vtn_fail_if(elem_count != val->type->length, 2021 "%s has %u constituents, expected %u", 2022 spirv_op_to_string(opcode), elem_count, val->type->length); 2023 2024 nir_constant **elems = ralloc_array(b, nir_constant *, elem_count); 2025 val->is_undef_constant = true; 2026 for (unsigned i = 0; i < elem_count; i++) { 2027 struct vtn_value *elem_val = vtn_untyped_value(b, w[i + 3]); 2028 2029 if (elem_val->value_type == vtn_value_type_constant) { 2030 elems[i] = elem_val->constant; 2031 val->is_undef_constant = val->is_undef_constant && 2032 elem_val->is_undef_constant; 2033 } else { 2034 vtn_fail_if(elem_val->value_type != vtn_value_type_undef, 2035 "only constants or undefs allowed for " 2036 "SpvOpConstantComposite"); 2037 /* to make it easier, just insert a NULL constant for now */ 2038 elems[i] = vtn_null_constant(b, elem_val->type); 2039 } 2040 } 2041 2042 switch (val->type->base_type) { 2043 case vtn_base_type_vector: { 2044 assert(glsl_type_is_vector(val->type->type)); 2045 for (unsigned i = 0; i < elem_count; i++) 2046 val->constant->values[i] = elems[i]->values[0]; 2047 break; 2048 } 2049 2050 case vtn_base_type_matrix: 2051 case vtn_base_type_struct: 2052 case vtn_base_type_array: 2053 ralloc_steal(val->constant, elems); 2054 val->constant->num_elements = elem_count; 2055 val->constant->elements = elems; 2056 break; 2057 2058 default: 2059 vtn_fail("Result type of %s must be a composite type", 2060 spirv_op_to_string(opcode)); 2061 } 2062 break; 2063 } 2064 2065 case SpvOpSpecConstantOp: { 2066 nir_const_value u32op = nir_const_value_for_uint(w[3], 32); 2067 vtn_foreach_decoration(b, val, spec_constant_decoration_cb, &u32op); 2068 SpvOp opcode = u32op.u32; 2069 switch (opcode) { 2070 case SpvOpVectorShuffle: { 2071 struct vtn_value *v0 = &b->values[w[4]]; 2072 struct vtn_value *v1 = &b->values[w[5]]; 2073 2074 vtn_assert(v0->value_type == vtn_value_type_constant || 2075 v0->value_type == vtn_value_type_undef); 2076 vtn_assert(v1->value_type == vtn_value_type_constant || 2077 v1->value_type == vtn_value_type_undef); 2078 2079 unsigned len0 = glsl_get_vector_elements(v0->type->type); 2080 unsigned len1 = glsl_get_vector_elements(v1->type->type); 2081 2082 vtn_assert(len0 + len1 < 16); 2083 2084 unsigned bit_size = glsl_get_bit_size(val->type->type); 2085 unsigned bit_size0 = glsl_get_bit_size(v0->type->type); 2086 unsigned bit_size1 = glsl_get_bit_size(v1->type->type); 2087 2088 vtn_assert(bit_size == bit_size0 && bit_size == bit_size1); 2089 (void)bit_size0; (void)bit_size1; 2090 2091 nir_const_value undef = { .u64 = 0xdeadbeefdeadbeef }; 2092 nir_const_value combined[NIR_MAX_VEC_COMPONENTS * 2]; 2093 2094 if (v0->value_type == vtn_value_type_constant) { 2095 for (unsigned i = 0; i < len0; i++) 2096 combined[i] = v0->constant->values[i]; 2097 } 2098 if (v1->value_type == vtn_value_type_constant) { 2099 for (unsigned i = 0; i < len1; i++) 2100 combined[len0 + i] = v1->constant->values[i]; 2101 } 2102 2103 for (unsigned i = 0, j = 0; i < count - 6; i++, j++) { 2104 uint32_t comp = w[i + 6]; 2105 if (comp == (uint32_t)-1) { 2106 /* If component is not used, set the value to a known constant 2107 * to detect if it is wrongly used. 2108 */ 2109 val->constant->values[j] = undef; 2110 } else { 2111 vtn_fail_if(comp >= len0 + len1, 2112 "All Component literals must either be FFFFFFFF " 2113 "or in [0, N - 1] (inclusive)."); 2114 val->constant->values[j] = combined[comp]; 2115 } 2116 } 2117 break; 2118 } 2119 2120 case SpvOpCompositeExtract: 2121 case SpvOpCompositeInsert: { 2122 struct vtn_value *comp; 2123 unsigned deref_start; 2124 struct nir_constant **c; 2125 if (opcode == SpvOpCompositeExtract) { 2126 comp = vtn_value(b, w[4], vtn_value_type_constant); 2127 deref_start = 5; 2128 c = &comp->constant; 2129 } else { 2130 comp = vtn_value(b, w[5], vtn_value_type_constant); 2131 deref_start = 6; 2132 val->constant = nir_constant_clone(comp->constant, 2133 (nir_variable *)b); 2134 c = &val->constant; 2135 } 2136 2137 int elem = -1; 2138 const struct vtn_type *type = comp->type; 2139 for (unsigned i = deref_start; i < count; i++) { 2140 vtn_fail_if(w[i] > type->length, 2141 "%uth index of %s is %u but the type has only " 2142 "%u elements", i - deref_start, 2143 spirv_op_to_string(opcode), w[i], type->length); 2144 2145 switch (type->base_type) { 2146 case vtn_base_type_vector: 2147 elem = w[i]; 2148 type = type->array_element; 2149 break; 2150 2151 case vtn_base_type_matrix: 2152 case vtn_base_type_array: 2153 c = &(*c)->elements[w[i]]; 2154 type = type->array_element; 2155 break; 2156 2157 case vtn_base_type_struct: 2158 c = &(*c)->elements[w[i]]; 2159 type = type->members[w[i]]; 2160 break; 2161 2162 default: 2163 vtn_fail("%s must only index into composite types", 2164 spirv_op_to_string(opcode)); 2165 } 2166 } 2167 2168 if (opcode == SpvOpCompositeExtract) { 2169 if (elem == -1) { 2170 val->constant = *c; 2171 } else { 2172 unsigned num_components = type->length; 2173 for (unsigned i = 0; i < num_components; i++) 2174 val->constant->values[i] = (*c)->values[elem + i]; 2175 } 2176 } else { 2177 struct vtn_value *insert = 2178 vtn_value(b, w[4], vtn_value_type_constant); 2179 vtn_assert(insert->type == type); 2180 if (elem == -1) { 2181 *c = insert->constant; 2182 } else { 2183 unsigned num_components = type->length; 2184 for (unsigned i = 0; i < num_components; i++) 2185 (*c)->values[elem + i] = insert->constant->values[i]; 2186 } 2187 } 2188 break; 2189 } 2190 2191 default: { 2192 bool swap; 2193 nir_alu_type dst_alu_type = nir_get_nir_type_for_glsl_type(val->type->type); 2194 nir_alu_type src_alu_type = dst_alu_type; 2195 unsigned num_components = glsl_get_vector_elements(val->type->type); 2196 unsigned bit_size; 2197 2198 vtn_assert(count <= 7); 2199 2200 switch (opcode) { 2201 case SpvOpSConvert: 2202 case SpvOpFConvert: 2203 case SpvOpUConvert: 2204 /* We have a source in a conversion */ 2205 src_alu_type = 2206 nir_get_nir_type_for_glsl_type(vtn_get_value_type(b, w[4])->type); 2207 /* We use the bitsize of the conversion source to evaluate the opcode later */ 2208 bit_size = glsl_get_bit_size(vtn_get_value_type(b, w[4])->type); 2209 break; 2210 default: 2211 bit_size = glsl_get_bit_size(val->type->type); 2212 }; 2213 2214 bool exact; 2215 nir_op op = vtn_nir_alu_op_for_spirv_opcode(b, opcode, &swap, &exact, 2216 nir_alu_type_get_type_size(src_alu_type), 2217 nir_alu_type_get_type_size(dst_alu_type)); 2218 2219 /* No SPIR-V opcodes handled through this path should set exact. 2220 * Since it is ignored, assert on it. 2221 */ 2222 assert(!exact); 2223 2224 nir_const_value src[3][NIR_MAX_VEC_COMPONENTS]; 2225 2226 for (unsigned i = 0; i < count - 4; i++) { 2227 struct vtn_value *src_val = 2228 vtn_value(b, w[4 + i], vtn_value_type_constant); 2229 2230 /* If this is an unsized source, pull the bit size from the 2231 * source; otherwise, we'll use the bit size from the destination. 2232 */ 2233 if (!nir_alu_type_get_type_size(nir_op_infos[op].input_types[i])) 2234 bit_size = glsl_get_bit_size(src_val->type->type); 2235 2236 unsigned src_comps = nir_op_infos[op].input_sizes[i] ? 2237 nir_op_infos[op].input_sizes[i] : 2238 num_components; 2239 2240 unsigned j = swap ? 1 - i : i; 2241 for (unsigned c = 0; c < src_comps; c++) 2242 src[j][c] = src_val->constant->values[c]; 2243 } 2244 2245 /* fix up fixed size sources */ 2246 switch (op) { 2247 case nir_op_ishl: 2248 case nir_op_ishr: 2249 case nir_op_ushr: { 2250 if (bit_size == 32) 2251 break; 2252 for (unsigned i = 0; i < num_components; ++i) { 2253 switch (bit_size) { 2254 case 64: src[1][i].u32 = src[1][i].u64; break; 2255 case 16: src[1][i].u32 = src[1][i].u16; break; 2256 case 8: src[1][i].u32 = src[1][i].u8; break; 2257 } 2258 } 2259 break; 2260 } 2261 default: 2262 break; 2263 } 2264 2265 nir_const_value *srcs[3] = { 2266 src[0], src[1], src[2], 2267 }; 2268 nir_eval_const_opcode(op, val->constant->values, 2269 num_components, bit_size, srcs, 2270 b->shader->info.float_controls_execution_mode); 2271 break; 2272 } /* default */ 2273 } 2274 break; 2275 } 2276 2277 case SpvOpConstantNull: 2278 val->constant = vtn_null_constant(b, val->type); 2279 val->is_null_constant = true; 2280 break; 2281 2282 default: 2283 vtn_fail_with_opcode("Unhandled opcode", opcode); 2284 } 2285 2286 /* Now that we have the value, update the workgroup size if needed */ 2287 if (gl_shader_stage_uses_workgroup(b->entry_point_stage)) 2288 vtn_foreach_decoration(b, val, handle_workgroup_size_decoration_cb, 2289 NULL); 2290} 2291 2292static void 2293vtn_split_barrier_semantics(struct vtn_builder *b, 2294 SpvMemorySemanticsMask semantics, 2295 SpvMemorySemanticsMask *before, 2296 SpvMemorySemanticsMask *after) 2297{ 2298 /* For memory semantics embedded in operations, we split them into up to 2299 * two barriers, to be added before and after the operation. This is less 2300 * strict than if we propagated until the final backend stage, but still 2301 * result in correct execution. 2302 * 2303 * A further improvement could be pipe this information (and use!) into the 2304 * next compiler layers, at the expense of making the handling of barriers 2305 * more complicated. 2306 */ 2307 2308 *before = SpvMemorySemanticsMaskNone; 2309 *after = SpvMemorySemanticsMaskNone; 2310 2311 SpvMemorySemanticsMask order_semantics = 2312 semantics & (SpvMemorySemanticsAcquireMask | 2313 SpvMemorySemanticsReleaseMask | 2314 SpvMemorySemanticsAcquireReleaseMask | 2315 SpvMemorySemanticsSequentiallyConsistentMask); 2316 2317 if (util_bitcount(order_semantics) > 1) { 2318 /* Old GLSLang versions incorrectly set all the ordering bits. This was 2319 * fixed in c51287d744fb6e7e9ccc09f6f8451e6c64b1dad6 of glslang repo, 2320 * and it is in GLSLang since revision "SPIRV99.1321" (from Jul-2016). 2321 */ 2322 vtn_warn("Multiple memory ordering semantics specified, " 2323 "assuming AcquireRelease."); 2324 order_semantics = SpvMemorySemanticsAcquireReleaseMask; 2325 } 2326 2327 const SpvMemorySemanticsMask av_vis_semantics = 2328 semantics & (SpvMemorySemanticsMakeAvailableMask | 2329 SpvMemorySemanticsMakeVisibleMask); 2330 2331 const SpvMemorySemanticsMask storage_semantics = 2332 semantics & (SpvMemorySemanticsUniformMemoryMask | 2333 SpvMemorySemanticsSubgroupMemoryMask | 2334 SpvMemorySemanticsWorkgroupMemoryMask | 2335 SpvMemorySemanticsCrossWorkgroupMemoryMask | 2336 SpvMemorySemanticsAtomicCounterMemoryMask | 2337 SpvMemorySemanticsImageMemoryMask | 2338 SpvMemorySemanticsOutputMemoryMask); 2339 2340 const SpvMemorySemanticsMask other_semantics = 2341 semantics & ~(order_semantics | av_vis_semantics | storage_semantics | 2342 SpvMemorySemanticsVolatileMask); 2343 2344 if (other_semantics) 2345 vtn_warn("Ignoring unhandled memory semantics: %u\n", other_semantics); 2346 2347 /* SequentiallyConsistent is treated as AcquireRelease. */ 2348 2349 /* The RELEASE barrier happens BEFORE the operation, and it is usually 2350 * associated with a Store. All the write operations with a matching 2351 * semantics will not be reordered after the Store. 2352 */ 2353 if (order_semantics & (SpvMemorySemanticsReleaseMask | 2354 SpvMemorySemanticsAcquireReleaseMask | 2355 SpvMemorySemanticsSequentiallyConsistentMask)) { 2356 *before |= SpvMemorySemanticsReleaseMask | storage_semantics; 2357 } 2358 2359 /* The ACQUIRE barrier happens AFTER the operation, and it is usually 2360 * associated with a Load. All the operations with a matching semantics 2361 * will not be reordered before the Load. 2362 */ 2363 if (order_semantics & (SpvMemorySemanticsAcquireMask | 2364 SpvMemorySemanticsAcquireReleaseMask | 2365 SpvMemorySemanticsSequentiallyConsistentMask)) { 2366 *after |= SpvMemorySemanticsAcquireMask | storage_semantics; 2367 } 2368 2369 if (av_vis_semantics & SpvMemorySemanticsMakeVisibleMask) 2370 *before |= SpvMemorySemanticsMakeVisibleMask | storage_semantics; 2371 2372 if (av_vis_semantics & SpvMemorySemanticsMakeAvailableMask) 2373 *after |= SpvMemorySemanticsMakeAvailableMask | storage_semantics; 2374} 2375 2376static nir_memory_semantics 2377vtn_mem_semantics_to_nir_mem_semantics(struct vtn_builder *b, 2378 SpvMemorySemanticsMask semantics) 2379{ 2380 nir_memory_semantics nir_semantics = 0; 2381 2382 SpvMemorySemanticsMask order_semantics = 2383 semantics & (SpvMemorySemanticsAcquireMask | 2384 SpvMemorySemanticsReleaseMask | 2385 SpvMemorySemanticsAcquireReleaseMask | 2386 SpvMemorySemanticsSequentiallyConsistentMask); 2387 2388 if (util_bitcount(order_semantics) > 1) { 2389 /* Old GLSLang versions incorrectly set all the ordering bits. This was 2390 * fixed in c51287d744fb6e7e9ccc09f6f8451e6c64b1dad6 of glslang repo, 2391 * and it is in GLSLang since revision "SPIRV99.1321" (from Jul-2016). 2392 */ 2393 vtn_warn("Multiple memory ordering semantics bits specified, " 2394 "assuming AcquireRelease."); 2395 order_semantics = SpvMemorySemanticsAcquireReleaseMask; 2396 } 2397 2398 switch (order_semantics) { 2399 case 0: 2400 /* Not an ordering barrier. */ 2401 break; 2402 2403 case SpvMemorySemanticsAcquireMask: 2404 nir_semantics = NIR_MEMORY_ACQUIRE; 2405 break; 2406 2407 case SpvMemorySemanticsReleaseMask: 2408 nir_semantics = NIR_MEMORY_RELEASE; 2409 break; 2410 2411 case SpvMemorySemanticsSequentiallyConsistentMask: 2412 FALLTHROUGH; /* Treated as AcquireRelease in Vulkan. */ 2413 case SpvMemorySemanticsAcquireReleaseMask: 2414 nir_semantics = NIR_MEMORY_ACQUIRE | NIR_MEMORY_RELEASE; 2415 break; 2416 2417 default: 2418 unreachable("Invalid memory order semantics"); 2419 } 2420 2421 if (semantics & SpvMemorySemanticsMakeAvailableMask) { 2422 vtn_fail_if(!b->options->caps.vk_memory_model, 2423 "To use MakeAvailable memory semantics the VulkanMemoryModel " 2424 "capability must be declared."); 2425 nir_semantics |= NIR_MEMORY_MAKE_AVAILABLE; 2426 } 2427 2428 if (semantics & SpvMemorySemanticsMakeVisibleMask) { 2429 vtn_fail_if(!b->options->caps.vk_memory_model, 2430 "To use MakeVisible memory semantics the VulkanMemoryModel " 2431 "capability must be declared."); 2432 nir_semantics |= NIR_MEMORY_MAKE_VISIBLE; 2433 } 2434 2435 return nir_semantics; 2436} 2437 2438static nir_variable_mode 2439vtn_mem_semantics_to_nir_var_modes(struct vtn_builder *b, 2440 SpvMemorySemanticsMask semantics) 2441{ 2442 /* Vulkan Environment for SPIR-V says "SubgroupMemory, CrossWorkgroupMemory, 2443 * and AtomicCounterMemory are ignored". 2444 */ 2445 if (b->options->environment == NIR_SPIRV_VULKAN) { 2446 semantics &= ~(SpvMemorySemanticsSubgroupMemoryMask | 2447 SpvMemorySemanticsCrossWorkgroupMemoryMask | 2448 SpvMemorySemanticsAtomicCounterMemoryMask); 2449 } 2450 2451 nir_variable_mode modes = 0; 2452 if (semantics & SpvMemorySemanticsUniformMemoryMask) { 2453 modes |= nir_var_uniform | 2454 nir_var_mem_ubo | 2455 nir_var_mem_ssbo | 2456 nir_var_mem_global; 2457 } 2458 if (semantics & SpvMemorySemanticsImageMemoryMask) 2459 modes |= nir_var_image; 2460 if (semantics & SpvMemorySemanticsWorkgroupMemoryMask) 2461 modes |= nir_var_mem_shared; 2462 if (semantics & SpvMemorySemanticsCrossWorkgroupMemoryMask) 2463 modes |= nir_var_mem_global; 2464 if (semantics & SpvMemorySemanticsOutputMemoryMask) { 2465 modes |= nir_var_shader_out; 2466 2467 if (b->shader->info.stage == MESA_SHADER_TASK) 2468 modes |= nir_var_mem_task_payload; 2469 } 2470 2471 return modes; 2472} 2473 2474static nir_scope 2475vtn_scope_to_nir_scope(struct vtn_builder *b, SpvScope scope) 2476{ 2477 nir_scope nir_scope; 2478 switch (scope) { 2479 case SpvScopeDevice: 2480 vtn_fail_if(b->options->caps.vk_memory_model && 2481 !b->options->caps.vk_memory_model_device_scope, 2482 "If the Vulkan memory model is declared and any instruction " 2483 "uses Device scope, the VulkanMemoryModelDeviceScope " 2484 "capability must be declared."); 2485 nir_scope = NIR_SCOPE_DEVICE; 2486 break; 2487 2488 case SpvScopeQueueFamily: 2489 vtn_fail_if(!b->options->caps.vk_memory_model, 2490 "To use Queue Family scope, the VulkanMemoryModel capability " 2491 "must be declared."); 2492 nir_scope = NIR_SCOPE_QUEUE_FAMILY; 2493 break; 2494 2495 case SpvScopeWorkgroup: 2496 nir_scope = NIR_SCOPE_WORKGROUP; 2497 break; 2498 2499 case SpvScopeSubgroup: 2500 nir_scope = NIR_SCOPE_SUBGROUP; 2501 break; 2502 2503 case SpvScopeInvocation: 2504 nir_scope = NIR_SCOPE_INVOCATION; 2505 break; 2506 2507 case SpvScopeShaderCallKHR: 2508 nir_scope = NIR_SCOPE_SHADER_CALL; 2509 break; 2510 2511 default: 2512 vtn_fail("Invalid memory scope"); 2513 } 2514 2515 return nir_scope; 2516} 2517 2518static void 2519vtn_emit_scoped_control_barrier(struct vtn_builder *b, SpvScope exec_scope, 2520 SpvScope mem_scope, 2521 SpvMemorySemanticsMask semantics) 2522{ 2523 nir_memory_semantics nir_semantics = 2524 vtn_mem_semantics_to_nir_mem_semantics(b, semantics); 2525 nir_variable_mode modes = vtn_mem_semantics_to_nir_var_modes(b, semantics); 2526 nir_scope nir_exec_scope = vtn_scope_to_nir_scope(b, exec_scope); 2527 2528 /* Memory semantics is optional for OpControlBarrier. */ 2529 nir_scope nir_mem_scope; 2530 if (nir_semantics == 0 || modes == 0) 2531 nir_mem_scope = NIR_SCOPE_NONE; 2532 else 2533 nir_mem_scope = vtn_scope_to_nir_scope(b, mem_scope); 2534 2535 nir_scoped_barrier(&b->nb, .execution_scope=nir_exec_scope, .memory_scope=nir_mem_scope, 2536 .memory_semantics=nir_semantics, .memory_modes=modes); 2537} 2538 2539static void 2540vtn_emit_scoped_memory_barrier(struct vtn_builder *b, SpvScope scope, 2541 SpvMemorySemanticsMask semantics) 2542{ 2543 nir_variable_mode modes = vtn_mem_semantics_to_nir_var_modes(b, semantics); 2544 nir_memory_semantics nir_semantics = 2545 vtn_mem_semantics_to_nir_mem_semantics(b, semantics); 2546 2547 /* No barrier to add. */ 2548 if (nir_semantics == 0 || modes == 0) 2549 return; 2550 2551 nir_scoped_barrier(&b->nb, .memory_scope=vtn_scope_to_nir_scope(b, scope), 2552 .memory_semantics=nir_semantics, 2553 .memory_modes=modes); 2554} 2555 2556struct vtn_ssa_value * 2557vtn_create_ssa_value(struct vtn_builder *b, const struct glsl_type *type) 2558{ 2559 /* Always use bare types for SSA values for a couple of reasons: 2560 * 2561 * 1. Code which emits deref chains should never listen to the explicit 2562 * layout information on the SSA value if any exists. If we've 2563 * accidentally been relying on this, we want to find those bugs. 2564 * 2565 * 2. We want to be able to quickly check that an SSA value being assigned 2566 * to a SPIR-V value has the right type. Using bare types everywhere 2567 * ensures that we can pointer-compare. 2568 */ 2569 struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); 2570 val->type = glsl_get_bare_type(type); 2571 2572 2573 if (!glsl_type_is_vector_or_scalar(type)) { 2574 unsigned elems = glsl_get_length(val->type); 2575 val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); 2576 if (glsl_type_is_array_or_matrix(type)) { 2577 const struct glsl_type *elem_type = glsl_get_array_element(type); 2578 for (unsigned i = 0; i < elems; i++) 2579 val->elems[i] = vtn_create_ssa_value(b, elem_type); 2580 } else { 2581 vtn_assert(glsl_type_is_struct_or_ifc(type)); 2582 for (unsigned i = 0; i < elems; i++) { 2583 const struct glsl_type *elem_type = glsl_get_struct_field(type, i); 2584 val->elems[i] = vtn_create_ssa_value(b, elem_type); 2585 } 2586 } 2587 } 2588 2589 return val; 2590} 2591 2592static nir_tex_src 2593vtn_tex_src(struct vtn_builder *b, unsigned index, nir_tex_src_type type) 2594{ 2595 nir_tex_src src; 2596 src.src = nir_src_for_ssa(vtn_get_nir_ssa(b, index)); 2597 src.src_type = type; 2598 return src; 2599} 2600 2601static uint32_t 2602image_operand_arg(struct vtn_builder *b, const uint32_t *w, uint32_t count, 2603 uint32_t mask_idx, SpvImageOperandsMask op) 2604{ 2605 static const SpvImageOperandsMask ops_with_arg = 2606 SpvImageOperandsBiasMask | 2607 SpvImageOperandsLodMask | 2608 SpvImageOperandsGradMask | 2609 SpvImageOperandsConstOffsetMask | 2610 SpvImageOperandsOffsetMask | 2611 SpvImageOperandsConstOffsetsMask | 2612 SpvImageOperandsSampleMask | 2613 SpvImageOperandsMinLodMask | 2614 SpvImageOperandsMakeTexelAvailableMask | 2615 SpvImageOperandsMakeTexelVisibleMask; 2616 2617 assert(util_bitcount(op) == 1); 2618 assert(w[mask_idx] & op); 2619 assert(op & ops_with_arg); 2620 2621 uint32_t idx = util_bitcount(w[mask_idx] & (op - 1) & ops_with_arg) + 1; 2622 2623 /* Adjust indices for operands with two arguments. */ 2624 static const SpvImageOperandsMask ops_with_two_args = 2625 SpvImageOperandsGradMask; 2626 idx += util_bitcount(w[mask_idx] & (op - 1) & ops_with_two_args); 2627 2628 idx += mask_idx; 2629 2630 vtn_fail_if(idx + (op & ops_with_two_args ? 1 : 0) >= count, 2631 "Image op claims to have %s but does not enough " 2632 "following operands", spirv_imageoperands_to_string(op)); 2633 2634 return idx; 2635} 2636 2637static void 2638non_uniform_decoration_cb(struct vtn_builder *b, 2639 struct vtn_value *val, int member, 2640 const struct vtn_decoration *dec, void *void_ctx) 2641{ 2642 enum gl_access_qualifier *access = void_ctx; 2643 switch (dec->decoration) { 2644 case SpvDecorationNonUniformEXT: 2645 *access |= ACCESS_NON_UNIFORM; 2646 break; 2647 2648 default: 2649 break; 2650 } 2651} 2652 2653/* Apply SignExtend/ZeroExtend operands to get the actual result type for 2654 * image read/sample operations and source type for write operations. 2655 */ 2656static nir_alu_type 2657get_image_type(struct vtn_builder *b, nir_alu_type type, unsigned operands) 2658{ 2659 unsigned extend_operands = 2660 operands & (SpvImageOperandsSignExtendMask | SpvImageOperandsZeroExtendMask); 2661 vtn_fail_if(nir_alu_type_get_base_type(type) == nir_type_float && extend_operands, 2662 "SignExtend/ZeroExtend used on floating-point texel type"); 2663 vtn_fail_if(extend_operands == 2664 (SpvImageOperandsSignExtendMask | SpvImageOperandsZeroExtendMask), 2665 "SignExtend and ZeroExtend both specified"); 2666 2667 if (operands & SpvImageOperandsSignExtendMask) 2668 return nir_type_int | nir_alu_type_get_type_size(type); 2669 if (operands & SpvImageOperandsZeroExtendMask) 2670 return nir_type_uint | nir_alu_type_get_type_size(type); 2671 2672 return type; 2673} 2674 2675static void 2676vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, 2677 const uint32_t *w, unsigned count) 2678{ 2679 if (opcode == SpvOpSampledImage) { 2680 struct vtn_sampled_image si = { 2681 .image = vtn_get_image(b, w[3], NULL), 2682 .sampler = vtn_get_sampler(b, w[4]), 2683 }; 2684 2685 enum gl_access_qualifier access = 0; 2686 vtn_foreach_decoration(b, vtn_untyped_value(b, w[3]), 2687 non_uniform_decoration_cb, &access); 2688 vtn_foreach_decoration(b, vtn_untyped_value(b, w[4]), 2689 non_uniform_decoration_cb, &access); 2690 2691 vtn_push_sampled_image(b, w[2], si, access & ACCESS_NON_UNIFORM); 2692 return; 2693 } else if (opcode == SpvOpImage) { 2694 struct vtn_sampled_image si = vtn_get_sampled_image(b, w[3]); 2695 2696 enum gl_access_qualifier access = 0; 2697 vtn_foreach_decoration(b, vtn_untyped_value(b, w[3]), 2698 non_uniform_decoration_cb, &access); 2699 2700 vtn_push_image(b, w[2], si.image, access & ACCESS_NON_UNIFORM); 2701 return; 2702 } else if (opcode == SpvOpImageSparseTexelsResident) { 2703 nir_ssa_def *code = vtn_get_nir_ssa(b, w[3]); 2704 vtn_push_nir_ssa(b, w[2], nir_is_sparse_texels_resident(&b->nb, 1, code)); 2705 return; 2706 } 2707 2708 nir_deref_instr *image = NULL, *sampler = NULL; 2709 struct vtn_value *sampled_val = vtn_untyped_value(b, w[3]); 2710 if (sampled_val->type->base_type == vtn_base_type_sampled_image) { 2711 struct vtn_sampled_image si = vtn_get_sampled_image(b, w[3]); 2712 image = si.image; 2713 sampler = si.sampler; 2714 } else { 2715 image = vtn_get_image(b, w[3], NULL); 2716 } 2717 2718 const enum glsl_sampler_dim sampler_dim = glsl_get_sampler_dim(image->type); 2719 const bool is_array = glsl_sampler_type_is_array(image->type); 2720 nir_alu_type dest_type = nir_type_invalid; 2721 2722 /* Figure out the base texture operation */ 2723 nir_texop texop; 2724 switch (opcode) { 2725 case SpvOpImageSampleImplicitLod: 2726 case SpvOpImageSparseSampleImplicitLod: 2727 case SpvOpImageSampleDrefImplicitLod: 2728 case SpvOpImageSparseSampleDrefImplicitLod: 2729 case SpvOpImageSampleProjImplicitLod: 2730 case SpvOpImageSampleProjDrefImplicitLod: 2731 texop = nir_texop_tex; 2732 break; 2733 2734 case SpvOpImageSampleExplicitLod: 2735 case SpvOpImageSparseSampleExplicitLod: 2736 case SpvOpImageSampleDrefExplicitLod: 2737 case SpvOpImageSparseSampleDrefExplicitLod: 2738 case SpvOpImageSampleProjExplicitLod: 2739 case SpvOpImageSampleProjDrefExplicitLod: 2740 texop = nir_texop_txl; 2741 break; 2742 2743 case SpvOpImageFetch: 2744 case SpvOpImageSparseFetch: 2745 if (sampler_dim == GLSL_SAMPLER_DIM_MS) { 2746 texop = nir_texop_txf_ms; 2747 } else { 2748 texop = nir_texop_txf; 2749 } 2750 break; 2751 2752 case SpvOpImageGather: 2753 case SpvOpImageSparseGather: 2754 case SpvOpImageDrefGather: 2755 case SpvOpImageSparseDrefGather: 2756 texop = nir_texop_tg4; 2757 break; 2758 2759 case SpvOpImageQuerySizeLod: 2760 case SpvOpImageQuerySize: 2761 texop = nir_texop_txs; 2762 dest_type = nir_type_int32; 2763 break; 2764 2765 case SpvOpImageQueryLod: 2766 texop = nir_texop_lod; 2767 dest_type = nir_type_float32; 2768 break; 2769 2770 case SpvOpImageQueryLevels: 2771 texop = nir_texop_query_levels; 2772 dest_type = nir_type_int32; 2773 break; 2774 2775 case SpvOpImageQuerySamples: 2776 texop = nir_texop_texture_samples; 2777 dest_type = nir_type_int32; 2778 break; 2779 2780 case SpvOpFragmentFetchAMD: 2781 texop = nir_texop_fragment_fetch_amd; 2782 break; 2783 2784 case SpvOpFragmentMaskFetchAMD: 2785 texop = nir_texop_fragment_mask_fetch_amd; 2786 dest_type = nir_type_uint32; 2787 break; 2788 2789 default: 2790 vtn_fail_with_opcode("Unhandled opcode", opcode); 2791 } 2792 2793 nir_tex_src srcs[10]; /* 10 should be enough */ 2794 nir_tex_src *p = srcs; 2795 2796 p->src = nir_src_for_ssa(&image->dest.ssa); 2797 p->src_type = nir_tex_src_texture_deref; 2798 p++; 2799 2800 switch (texop) { 2801 case nir_texop_tex: 2802 case nir_texop_txb: 2803 case nir_texop_txl: 2804 case nir_texop_txd: 2805 case nir_texop_tg4: 2806 case nir_texop_lod: 2807 vtn_fail_if(sampler == NULL, 2808 "%s requires an image of type OpTypeSampledImage", 2809 spirv_op_to_string(opcode)); 2810 p->src = nir_src_for_ssa(&sampler->dest.ssa); 2811 p->src_type = nir_tex_src_sampler_deref; 2812 p++; 2813 break; 2814 case nir_texop_txf: 2815 case nir_texop_txf_ms: 2816 case nir_texop_txs: 2817 case nir_texop_query_levels: 2818 case nir_texop_texture_samples: 2819 case nir_texop_samples_identical: 2820 case nir_texop_fragment_fetch_amd: 2821 case nir_texop_fragment_mask_fetch_amd: 2822 /* These don't */ 2823 break; 2824 case nir_texop_txf_ms_fb: 2825 vtn_fail("unexpected nir_texop_txf_ms_fb"); 2826 break; 2827 case nir_texop_txf_ms_mcs_intel: 2828 vtn_fail("unexpected nir_texop_txf_ms_mcs"); 2829 case nir_texop_tex_prefetch: 2830 vtn_fail("unexpected nir_texop_tex_prefetch"); 2831 } 2832 2833 unsigned idx = 4; 2834 2835 struct nir_ssa_def *coord; 2836 unsigned coord_components; 2837 switch (opcode) { 2838 case SpvOpImageSampleImplicitLod: 2839 case SpvOpImageSparseSampleImplicitLod: 2840 case SpvOpImageSampleExplicitLod: 2841 case SpvOpImageSparseSampleExplicitLod: 2842 case SpvOpImageSampleDrefImplicitLod: 2843 case SpvOpImageSparseSampleDrefImplicitLod: 2844 case SpvOpImageSampleDrefExplicitLod: 2845 case SpvOpImageSparseSampleDrefExplicitLod: 2846 case SpvOpImageSampleProjImplicitLod: 2847 case SpvOpImageSampleProjExplicitLod: 2848 case SpvOpImageSampleProjDrefImplicitLod: 2849 case SpvOpImageSampleProjDrefExplicitLod: 2850 case SpvOpImageFetch: 2851 case SpvOpImageSparseFetch: 2852 case SpvOpImageGather: 2853 case SpvOpImageSparseGather: 2854 case SpvOpImageDrefGather: 2855 case SpvOpImageSparseDrefGather: 2856 case SpvOpImageQueryLod: 2857 case SpvOpFragmentFetchAMD: 2858 case SpvOpFragmentMaskFetchAMD: { 2859 /* All these types have the coordinate as their first real argument */ 2860 coord_components = glsl_get_sampler_dim_coordinate_components(sampler_dim); 2861 2862 if (is_array && texop != nir_texop_lod) 2863 coord_components++; 2864 2865 struct vtn_ssa_value *coord_val = vtn_ssa_value(b, w[idx++]); 2866 coord = coord_val->def; 2867 /* From the SPIR-V spec verxion 1.5, rev. 5: 2868 * 2869 * "Coordinate must be a scalar or vector of floating-point type. It 2870 * contains (u[, v] ... [, array layer]) as needed by the definition 2871 * of Sampled Image. It may be a vector larger than needed, but all 2872 * unused components appear after all used components." 2873 */ 2874 vtn_fail_if(coord->num_components < coord_components, 2875 "Coordinate value passed has fewer components than sampler dimensionality."); 2876 p->src = nir_src_for_ssa(nir_trim_vector(&b->nb, coord, coord_components)); 2877 2878 /* OpenCL allows integer sampling coordinates */ 2879 if (glsl_type_is_integer(coord_val->type) && 2880 opcode == SpvOpImageSampleExplicitLod) { 2881 vtn_fail_if(b->shader->info.stage != MESA_SHADER_KERNEL, 2882 "Unless the Kernel capability is being used, the coordinate parameter " 2883 "OpImageSampleExplicitLod must be floating point."); 2884 2885 nir_ssa_def *coords[4]; 2886 nir_ssa_def *f0_5 = nir_imm_float(&b->nb, 0.5); 2887 for (unsigned i = 0; i < coord_components; i++) { 2888 coords[i] = nir_i2f32(&b->nb, nir_channel(&b->nb, p->src.ssa, i)); 2889 2890 if (!is_array || i != coord_components - 1) 2891 coords[i] = nir_fadd(&b->nb, coords[i], f0_5); 2892 } 2893 2894 p->src = nir_src_for_ssa(nir_vec(&b->nb, coords, coord_components)); 2895 } 2896 2897 p->src_type = nir_tex_src_coord; 2898 p++; 2899 break; 2900 } 2901 2902 default: 2903 coord = NULL; 2904 coord_components = 0; 2905 break; 2906 } 2907 2908 switch (opcode) { 2909 case SpvOpImageSampleProjImplicitLod: 2910 case SpvOpImageSampleProjExplicitLod: 2911 case SpvOpImageSampleProjDrefImplicitLod: 2912 case SpvOpImageSampleProjDrefExplicitLod: 2913 /* These have the projector as the last coordinate component */ 2914 p->src = nir_src_for_ssa(nir_channel(&b->nb, coord, coord_components)); 2915 p->src_type = nir_tex_src_projector; 2916 p++; 2917 break; 2918 2919 default: 2920 break; 2921 } 2922 2923 bool is_shadow = false; 2924 unsigned gather_component = 0; 2925 switch (opcode) { 2926 case SpvOpImageSampleDrefImplicitLod: 2927 case SpvOpImageSparseSampleDrefImplicitLod: 2928 case SpvOpImageSampleDrefExplicitLod: 2929 case SpvOpImageSparseSampleDrefExplicitLod: 2930 case SpvOpImageSampleProjDrefImplicitLod: 2931 case SpvOpImageSampleProjDrefExplicitLod: 2932 case SpvOpImageDrefGather: 2933 case SpvOpImageSparseDrefGather: 2934 /* These all have an explicit depth value as their next source */ 2935 is_shadow = true; 2936 (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_comparator); 2937 break; 2938 2939 case SpvOpImageGather: 2940 case SpvOpImageSparseGather: 2941 /* This has a component as its next source */ 2942 gather_component = vtn_constant_uint(b, w[idx++]); 2943 break; 2944 2945 default: 2946 break; 2947 } 2948 2949 bool is_sparse = false; 2950 switch (opcode) { 2951 case SpvOpImageSparseSampleImplicitLod: 2952 case SpvOpImageSparseSampleExplicitLod: 2953 case SpvOpImageSparseSampleDrefImplicitLod: 2954 case SpvOpImageSparseSampleDrefExplicitLod: 2955 case SpvOpImageSparseFetch: 2956 case SpvOpImageSparseGather: 2957 case SpvOpImageSparseDrefGather: 2958 is_sparse = true; 2959 break; 2960 default: 2961 break; 2962 } 2963 2964 /* For OpImageQuerySizeLod, we always have an LOD */ 2965 if (opcode == SpvOpImageQuerySizeLod) 2966 (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_lod); 2967 2968 /* For OpFragmentFetchAMD, we always have a multisample index */ 2969 if (opcode == SpvOpFragmentFetchAMD) 2970 (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ms_index); 2971 2972 /* Now we need to handle some number of optional arguments */ 2973 struct vtn_value *gather_offsets = NULL; 2974 uint32_t operands = SpvImageOperandsMaskNone; 2975 if (idx < count) { 2976 operands = w[idx]; 2977 2978 if (operands & SpvImageOperandsBiasMask) { 2979 vtn_assert(texop == nir_texop_tex || 2980 texop == nir_texop_tg4); 2981 if (texop == nir_texop_tex) 2982 texop = nir_texop_txb; 2983 uint32_t arg = image_operand_arg(b, w, count, idx, 2984 SpvImageOperandsBiasMask); 2985 (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_bias); 2986 } 2987 2988 if (operands & SpvImageOperandsLodMask) { 2989 vtn_assert(texop == nir_texop_txl || texop == nir_texop_txf || 2990 texop == nir_texop_txs || texop == nir_texop_tg4); 2991 uint32_t arg = image_operand_arg(b, w, count, idx, 2992 SpvImageOperandsLodMask); 2993 (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_lod); 2994 } 2995 2996 if (operands & SpvImageOperandsGradMask) { 2997 vtn_assert(texop == nir_texop_txl); 2998 texop = nir_texop_txd; 2999 uint32_t arg = image_operand_arg(b, w, count, idx, 3000 SpvImageOperandsGradMask); 3001 (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_ddx); 3002 (*p++) = vtn_tex_src(b, w[arg + 1], nir_tex_src_ddy); 3003 } 3004 3005 vtn_fail_if(util_bitcount(operands & (SpvImageOperandsConstOffsetsMask | 3006 SpvImageOperandsOffsetMask | 3007 SpvImageOperandsConstOffsetMask)) > 1, 3008 "At most one of the ConstOffset, Offset, and ConstOffsets " 3009 "image operands can be used on a given instruction."); 3010 3011 if (operands & SpvImageOperandsOffsetMask) { 3012 uint32_t arg = image_operand_arg(b, w, count, idx, 3013 SpvImageOperandsOffsetMask); 3014 (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_offset); 3015 } 3016 3017 if (operands & SpvImageOperandsConstOffsetMask) { 3018 uint32_t arg = image_operand_arg(b, w, count, idx, 3019 SpvImageOperandsConstOffsetMask); 3020 (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_offset); 3021 } 3022 3023 if (operands & SpvImageOperandsConstOffsetsMask) { 3024 vtn_assert(texop == nir_texop_tg4); 3025 uint32_t arg = image_operand_arg(b, w, count, idx, 3026 SpvImageOperandsConstOffsetsMask); 3027 gather_offsets = vtn_value(b, w[arg], vtn_value_type_constant); 3028 } 3029 3030 if (operands & SpvImageOperandsSampleMask) { 3031 vtn_assert(texop == nir_texop_txf_ms); 3032 uint32_t arg = image_operand_arg(b, w, count, idx, 3033 SpvImageOperandsSampleMask); 3034 texop = nir_texop_txf_ms; 3035 (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_ms_index); 3036 } 3037 3038 if (operands & SpvImageOperandsMinLodMask) { 3039 vtn_assert(texop == nir_texop_tex || 3040 texop == nir_texop_txb || 3041 texop == nir_texop_txd); 3042 uint32_t arg = image_operand_arg(b, w, count, idx, 3043 SpvImageOperandsMinLodMask); 3044 (*p++) = vtn_tex_src(b, w[arg], nir_tex_src_min_lod); 3045 } 3046 } 3047 3048 struct vtn_type *ret_type = vtn_get_type(b, w[1]); 3049 struct vtn_type *struct_type = NULL; 3050 if (is_sparse) { 3051 vtn_assert(glsl_type_is_struct_or_ifc(ret_type->type)); 3052 struct_type = ret_type; 3053 ret_type = struct_type->members[1]; 3054 } 3055 3056 nir_tex_instr *instr = nir_tex_instr_create(b->shader, p - srcs); 3057 instr->op = texop; 3058 3059 memcpy(instr->src, srcs, instr->num_srcs * sizeof(*instr->src)); 3060 3061 instr->coord_components = coord_components; 3062 instr->sampler_dim = sampler_dim; 3063 instr->is_array = is_array; 3064 instr->is_shadow = is_shadow; 3065 instr->is_sparse = is_sparse; 3066 instr->is_new_style_shadow = 3067 is_shadow && glsl_get_components(ret_type->type) == 1; 3068 instr->component = gather_component; 3069 3070 /* The Vulkan spec says: 3071 * 3072 * "If an instruction loads from or stores to a resource (including 3073 * atomics and image instructions) and the resource descriptor being 3074 * accessed is not dynamically uniform, then the operand corresponding 3075 * to that resource (e.g. the pointer or sampled image operand) must be 3076 * decorated with NonUniform." 3077 * 3078 * It's very careful to specify that the exact operand must be decorated 3079 * NonUniform. The SPIR-V parser is not expected to chase through long 3080 * chains to find the NonUniform decoration. It's either right there or we 3081 * can assume it doesn't exist. 3082 */ 3083 enum gl_access_qualifier access = 0; 3084 vtn_foreach_decoration(b, sampled_val, non_uniform_decoration_cb, &access); 3085 3086 if (operands & SpvImageOperandsNontemporalMask) 3087 access |= ACCESS_STREAM_CACHE_POLICY; 3088 3089 if (sampled_val->propagated_non_uniform) 3090 access |= ACCESS_NON_UNIFORM; 3091 3092 if (image && (access & ACCESS_NON_UNIFORM)) 3093 instr->texture_non_uniform = true; 3094 3095 if (sampler && (access & ACCESS_NON_UNIFORM)) 3096 instr->sampler_non_uniform = true; 3097 3098 /* for non-query ops, get dest_type from SPIR-V return type */ 3099 if (dest_type == nir_type_invalid) { 3100 /* the return type should match the image type, unless the image type is 3101 * VOID (CL image), in which case the return type dictates the sampler 3102 */ 3103 enum glsl_base_type sampler_base = 3104 glsl_get_sampler_result_type(image->type); 3105 enum glsl_base_type ret_base = glsl_get_base_type(ret_type->type); 3106 vtn_fail_if(sampler_base != ret_base && sampler_base != GLSL_TYPE_VOID, 3107 "SPIR-V return type mismatches image type. This is only valid " 3108 "for untyped images (OpenCL)."); 3109 dest_type = nir_get_nir_type_for_glsl_base_type(ret_base); 3110 dest_type = get_image_type(b, dest_type, operands); 3111 } 3112 3113 instr->dest_type = dest_type; 3114 3115 nir_ssa_dest_init(&instr->instr, &instr->dest, 3116 nir_tex_instr_dest_size(instr), 32, NULL); 3117 3118 vtn_assert(glsl_get_vector_elements(ret_type->type) == 3119 nir_tex_instr_result_size(instr)); 3120 3121 if (gather_offsets) { 3122 vtn_fail_if(gather_offsets->type->base_type != vtn_base_type_array || 3123 gather_offsets->type->length != 4, 3124 "ConstOffsets must be an array of size four of vectors " 3125 "of two integer components"); 3126 3127 struct vtn_type *vec_type = gather_offsets->type->array_element; 3128 vtn_fail_if(vec_type->base_type != vtn_base_type_vector || 3129 vec_type->length != 2 || 3130 !glsl_type_is_integer(vec_type->type), 3131 "ConstOffsets must be an array of size four of vectors " 3132 "of two integer components"); 3133 3134 unsigned bit_size = glsl_get_bit_size(vec_type->type); 3135 for (uint32_t i = 0; i < 4; i++) { 3136 const nir_const_value *cvec = 3137 gather_offsets->constant->elements[i]->values; 3138 for (uint32_t j = 0; j < 2; j++) { 3139 switch (bit_size) { 3140 case 8: instr->tg4_offsets[i][j] = cvec[j].i8; break; 3141 case 16: instr->tg4_offsets[i][j] = cvec[j].i16; break; 3142 case 32: instr->tg4_offsets[i][j] = cvec[j].i32; break; 3143 case 64: instr->tg4_offsets[i][j] = cvec[j].i64; break; 3144 default: 3145 vtn_fail("Unsupported bit size: %u", bit_size); 3146 } 3147 } 3148 } 3149 } 3150 3151 nir_builder_instr_insert(&b->nb, &instr->instr); 3152 3153 if (is_sparse) { 3154 struct vtn_ssa_value *dest = vtn_create_ssa_value(b, struct_type->type); 3155 unsigned result_size = glsl_get_vector_elements(ret_type->type); 3156 dest->elems[0]->def = nir_channel(&b->nb, &instr->dest.ssa, result_size); 3157 dest->elems[1]->def = nir_trim_vector(&b->nb, &instr->dest.ssa, 3158 result_size); 3159 vtn_push_ssa_value(b, w[2], dest); 3160 } else { 3161 vtn_push_nir_ssa(b, w[2], &instr->dest.ssa); 3162 } 3163} 3164 3165static void 3166fill_common_atomic_sources(struct vtn_builder *b, SpvOp opcode, 3167 const uint32_t *w, nir_src *src) 3168{ 3169 const struct glsl_type *type = vtn_get_type(b, w[1])->type; 3170 unsigned bit_size = glsl_get_bit_size(type); 3171 3172 switch (opcode) { 3173 case SpvOpAtomicIIncrement: 3174 src[0] = nir_src_for_ssa(nir_imm_intN_t(&b->nb, 1, bit_size)); 3175 break; 3176 3177 case SpvOpAtomicIDecrement: 3178 src[0] = nir_src_for_ssa(nir_imm_intN_t(&b->nb, -1, bit_size)); 3179 break; 3180 3181 case SpvOpAtomicISub: 3182 src[0] = 3183 nir_src_for_ssa(nir_ineg(&b->nb, vtn_get_nir_ssa(b, w[6]))); 3184 break; 3185 3186 case SpvOpAtomicCompareExchange: 3187 case SpvOpAtomicCompareExchangeWeak: 3188 src[0] = nir_src_for_ssa(vtn_get_nir_ssa(b, w[8])); 3189 src[1] = nir_src_for_ssa(vtn_get_nir_ssa(b, w[7])); 3190 break; 3191 3192 case SpvOpAtomicExchange: 3193 case SpvOpAtomicIAdd: 3194 case SpvOpAtomicSMin: 3195 case SpvOpAtomicUMin: 3196 case SpvOpAtomicSMax: 3197 case SpvOpAtomicUMax: 3198 case SpvOpAtomicAnd: 3199 case SpvOpAtomicOr: 3200 case SpvOpAtomicXor: 3201 case SpvOpAtomicFAddEXT: 3202 case SpvOpAtomicFMinEXT: 3203 case SpvOpAtomicFMaxEXT: 3204 src[0] = nir_src_for_ssa(vtn_get_nir_ssa(b, w[6])); 3205 break; 3206 3207 default: 3208 vtn_fail_with_opcode("Invalid SPIR-V atomic", opcode); 3209 } 3210} 3211 3212static nir_ssa_def * 3213get_image_coord(struct vtn_builder *b, uint32_t value) 3214{ 3215 nir_ssa_def *coord = vtn_get_nir_ssa(b, value); 3216 /* The image_load_store intrinsics assume a 4-dim coordinate */ 3217 return nir_pad_vec4(&b->nb, coord); 3218} 3219 3220static void 3221vtn_handle_image(struct vtn_builder *b, SpvOp opcode, 3222 const uint32_t *w, unsigned count) 3223{ 3224 /* Just get this one out of the way */ 3225 if (opcode == SpvOpImageTexelPointer) { 3226 struct vtn_value *val = 3227 vtn_push_value(b, w[2], vtn_value_type_image_pointer); 3228 val->image = ralloc(b, struct vtn_image_pointer); 3229 3230 val->image->image = vtn_nir_deref(b, w[3]); 3231 val->image->coord = get_image_coord(b, w[4]); 3232 val->image->sample = vtn_get_nir_ssa(b, w[5]); 3233 val->image->lod = nir_imm_int(&b->nb, 0); 3234 return; 3235 } 3236 3237 struct vtn_image_pointer image; 3238 SpvScope scope = SpvScopeInvocation; 3239 SpvMemorySemanticsMask semantics = 0; 3240 SpvImageOperandsMask operands = SpvImageOperandsMaskNone; 3241 3242 enum gl_access_qualifier access = 0; 3243 3244 struct vtn_value *res_val; 3245 switch (opcode) { 3246 case SpvOpAtomicExchange: 3247 case SpvOpAtomicCompareExchange: 3248 case SpvOpAtomicCompareExchangeWeak: 3249 case SpvOpAtomicIIncrement: 3250 case SpvOpAtomicIDecrement: 3251 case SpvOpAtomicIAdd: 3252 case SpvOpAtomicISub: 3253 case SpvOpAtomicLoad: 3254 case SpvOpAtomicSMin: 3255 case SpvOpAtomicUMin: 3256 case SpvOpAtomicSMax: 3257 case SpvOpAtomicUMax: 3258 case SpvOpAtomicAnd: 3259 case SpvOpAtomicOr: 3260 case SpvOpAtomicXor: 3261 case SpvOpAtomicFAddEXT: 3262 case SpvOpAtomicFMinEXT: 3263 case SpvOpAtomicFMaxEXT: 3264 res_val = vtn_value(b, w[3], vtn_value_type_image_pointer); 3265 image = *res_val->image; 3266 scope = vtn_constant_uint(b, w[4]); 3267 semantics = vtn_constant_uint(b, w[5]); 3268 access |= ACCESS_COHERENT; 3269 break; 3270 3271 case SpvOpAtomicStore: 3272 res_val = vtn_value(b, w[1], vtn_value_type_image_pointer); 3273 image = *res_val->image; 3274 scope = vtn_constant_uint(b, w[2]); 3275 semantics = vtn_constant_uint(b, w[3]); 3276 access |= ACCESS_COHERENT; 3277 break; 3278 3279 case SpvOpImageQuerySizeLod: 3280 res_val = vtn_untyped_value(b, w[3]); 3281 image.image = vtn_get_image(b, w[3], &access); 3282 image.coord = NULL; 3283 image.sample = NULL; 3284 image.lod = vtn_ssa_value(b, w[4])->def; 3285 break; 3286 3287 case SpvOpImageQuerySize: 3288 case SpvOpImageQuerySamples: 3289 res_val = vtn_untyped_value(b, w[3]); 3290 image.image = vtn_get_image(b, w[3], &access); 3291 image.coord = NULL; 3292 image.sample = NULL; 3293 image.lod = NULL; 3294 break; 3295 3296 case SpvOpImageQueryFormat: 3297 case SpvOpImageQueryOrder: 3298 res_val = vtn_untyped_value(b, w[3]); 3299 image.image = vtn_get_image(b, w[3], &access); 3300 image.coord = NULL; 3301 image.sample = NULL; 3302 image.lod = NULL; 3303 break; 3304 3305 case SpvOpImageRead: 3306 case SpvOpImageSparseRead: { 3307 res_val = vtn_untyped_value(b, w[3]); 3308 image.image = vtn_get_image(b, w[3], &access); 3309 image.coord = get_image_coord(b, w[4]); 3310 3311 operands = count > 5 ? w[5] : SpvImageOperandsMaskNone; 3312 3313 if (operands & SpvImageOperandsSampleMask) { 3314 uint32_t arg = image_operand_arg(b, w, count, 5, 3315 SpvImageOperandsSampleMask); 3316 image.sample = vtn_get_nir_ssa(b, w[arg]); 3317 } else { 3318 image.sample = nir_ssa_undef(&b->nb, 1, 32); 3319 } 3320 3321 if (operands & SpvImageOperandsMakeTexelVisibleMask) { 3322 vtn_fail_if((operands & SpvImageOperandsNonPrivateTexelMask) == 0, 3323 "MakeTexelVisible requires NonPrivateTexel to also be set."); 3324 uint32_t arg = image_operand_arg(b, w, count, 5, 3325 SpvImageOperandsMakeTexelVisibleMask); 3326 semantics = SpvMemorySemanticsMakeVisibleMask; 3327 scope = vtn_constant_uint(b, w[arg]); 3328 } 3329 3330 if (operands & SpvImageOperandsLodMask) { 3331 uint32_t arg = image_operand_arg(b, w, count, 5, 3332 SpvImageOperandsLodMask); 3333 image.lod = vtn_get_nir_ssa(b, w[arg]); 3334 } else { 3335 image.lod = nir_imm_int(&b->nb, 0); 3336 } 3337 3338 if (operands & SpvImageOperandsVolatileTexelMask) 3339 access |= ACCESS_VOLATILE; 3340 if (operands & SpvImageOperandsNontemporalMask) 3341 access |= ACCESS_STREAM_CACHE_POLICY; 3342 3343 break; 3344 } 3345 3346 case SpvOpImageWrite: { 3347 res_val = vtn_untyped_value(b, w[1]); 3348 image.image = vtn_get_image(b, w[1], &access); 3349 image.coord = get_image_coord(b, w[2]); 3350 3351 /* texel = w[3] */ 3352 3353 operands = count > 4 ? w[4] : SpvImageOperandsMaskNone; 3354 3355 if (operands & SpvImageOperandsSampleMask) { 3356 uint32_t arg = image_operand_arg(b, w, count, 4, 3357 SpvImageOperandsSampleMask); 3358 image.sample = vtn_get_nir_ssa(b, w[arg]); 3359 } else { 3360 image.sample = nir_ssa_undef(&b->nb, 1, 32); 3361 } 3362 3363 if (operands & SpvImageOperandsMakeTexelAvailableMask) { 3364 vtn_fail_if((operands & SpvImageOperandsNonPrivateTexelMask) == 0, 3365 "MakeTexelAvailable requires NonPrivateTexel to also be set."); 3366 uint32_t arg = image_operand_arg(b, w, count, 4, 3367 SpvImageOperandsMakeTexelAvailableMask); 3368 semantics = SpvMemorySemanticsMakeAvailableMask; 3369 scope = vtn_constant_uint(b, w[arg]); 3370 } 3371 3372 if (operands & SpvImageOperandsLodMask) { 3373 uint32_t arg = image_operand_arg(b, w, count, 4, 3374 SpvImageOperandsLodMask); 3375 image.lod = vtn_get_nir_ssa(b, w[arg]); 3376 } else { 3377 image.lod = nir_imm_int(&b->nb, 0); 3378 } 3379 3380 if (operands & SpvImageOperandsVolatileTexelMask) 3381 access |= ACCESS_VOLATILE; 3382 if (operands & SpvImageOperandsNontemporalMask) 3383 access |= ACCESS_STREAM_CACHE_POLICY; 3384 3385 break; 3386 } 3387 3388 default: 3389 vtn_fail_with_opcode("Invalid image opcode", opcode); 3390 } 3391 3392 if (semantics & SpvMemorySemanticsVolatileMask) 3393 access |= ACCESS_VOLATILE; 3394 3395 nir_intrinsic_op op; 3396 switch (opcode) { 3397#define OP(S, N) case SpvOp##S: op = nir_intrinsic_image_deref_##N; break; 3398 OP(ImageQuerySize, size) 3399 OP(ImageQuerySizeLod, size) 3400 OP(ImageRead, load) 3401 OP(ImageSparseRead, sparse_load) 3402 OP(ImageWrite, store) 3403 OP(AtomicLoad, load) 3404 OP(AtomicStore, store) 3405 OP(AtomicExchange, atomic_exchange) 3406 OP(AtomicCompareExchange, atomic_comp_swap) 3407 OP(AtomicCompareExchangeWeak, atomic_comp_swap) 3408 OP(AtomicIIncrement, atomic_add) 3409 OP(AtomicIDecrement, atomic_add) 3410 OP(AtomicIAdd, atomic_add) 3411 OP(AtomicISub, atomic_add) 3412 OP(AtomicSMin, atomic_imin) 3413 OP(AtomicUMin, atomic_umin) 3414 OP(AtomicSMax, atomic_imax) 3415 OP(AtomicUMax, atomic_umax) 3416 OP(AtomicAnd, atomic_and) 3417 OP(AtomicOr, atomic_or) 3418 OP(AtomicXor, atomic_xor) 3419 OP(AtomicFAddEXT, atomic_fadd) 3420 OP(AtomicFMinEXT, atomic_fmin) 3421 OP(AtomicFMaxEXT, atomic_fmax) 3422 OP(ImageQueryFormat, format) 3423 OP(ImageQueryOrder, order) 3424 OP(ImageQuerySamples, samples) 3425#undef OP 3426 default: 3427 vtn_fail_with_opcode("Invalid image opcode", opcode); 3428 } 3429 3430 nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->shader, op); 3431 3432 intrin->src[0] = nir_src_for_ssa(&image.image->dest.ssa); 3433 nir_intrinsic_set_image_dim(intrin, glsl_get_sampler_dim(image.image->type)); 3434 nir_intrinsic_set_image_array(intrin, 3435 glsl_sampler_type_is_array(image.image->type)); 3436 3437 switch (opcode) { 3438 case SpvOpImageQuerySamples: 3439 case SpvOpImageQuerySize: 3440 case SpvOpImageQuerySizeLod: 3441 case SpvOpImageQueryFormat: 3442 case SpvOpImageQueryOrder: 3443 break; 3444 default: 3445 /* The image coordinate is always 4 components but we may not have that 3446 * many. Swizzle to compensate. 3447 */ 3448 intrin->src[1] = nir_src_for_ssa(nir_pad_vec4(&b->nb, image.coord)); 3449 intrin->src[2] = nir_src_for_ssa(image.sample); 3450 break; 3451 } 3452 3453 /* The Vulkan spec says: 3454 * 3455 * "If an instruction loads from or stores to a resource (including 3456 * atomics and image instructions) and the resource descriptor being 3457 * accessed is not dynamically uniform, then the operand corresponding 3458 * to that resource (e.g. the pointer or sampled image operand) must be 3459 * decorated with NonUniform." 3460 * 3461 * It's very careful to specify that the exact operand must be decorated 3462 * NonUniform. The SPIR-V parser is not expected to chase through long 3463 * chains to find the NonUniform decoration. It's either right there or we 3464 * can assume it doesn't exist. 3465 */ 3466 vtn_foreach_decoration(b, res_val, non_uniform_decoration_cb, &access); 3467 nir_intrinsic_set_access(intrin, access); 3468 3469 switch (opcode) { 3470 case SpvOpImageQuerySamples: 3471 case SpvOpImageQueryFormat: 3472 case SpvOpImageQueryOrder: 3473 /* No additional sources */ 3474 break; 3475 case SpvOpImageQuerySize: 3476 intrin->src[1] = nir_src_for_ssa(nir_imm_int(&b->nb, 0)); 3477 break; 3478 case SpvOpImageQuerySizeLod: 3479 intrin->src[1] = nir_src_for_ssa(image.lod); 3480 break; 3481 case SpvOpAtomicLoad: 3482 case SpvOpImageRead: 3483 case SpvOpImageSparseRead: 3484 /* Only OpImageRead can support a lod parameter if 3485 * SPV_AMD_shader_image_load_store_lod is used but the current NIR 3486 * intrinsics definition for atomics requires us to set it for 3487 * OpAtomicLoad. 3488 */ 3489 intrin->src[3] = nir_src_for_ssa(image.lod); 3490 break; 3491 case SpvOpAtomicStore: 3492 case SpvOpImageWrite: { 3493 const uint32_t value_id = opcode == SpvOpAtomicStore ? w[4] : w[3]; 3494 struct vtn_ssa_value *value = vtn_ssa_value(b, value_id); 3495 /* nir_intrinsic_image_deref_store always takes a vec4 value */ 3496 assert(op == nir_intrinsic_image_deref_store); 3497 intrin->num_components = 4; 3498 intrin->src[3] = nir_src_for_ssa(nir_pad_vec4(&b->nb, value->def)); 3499 /* Only OpImageWrite can support a lod parameter if 3500 * SPV_AMD_shader_image_load_store_lod is used but the current NIR 3501 * intrinsics definition for atomics requires us to set it for 3502 * OpAtomicStore. 3503 */ 3504 intrin->src[4] = nir_src_for_ssa(image.lod); 3505 3506 nir_alu_type src_type = 3507 get_image_type(b, nir_get_nir_type_for_glsl_type(value->type), operands); 3508 nir_intrinsic_set_src_type(intrin, src_type); 3509 break; 3510 } 3511 3512 case SpvOpAtomicCompareExchange: 3513 case SpvOpAtomicCompareExchangeWeak: 3514 case SpvOpAtomicIIncrement: 3515 case SpvOpAtomicIDecrement: 3516 case SpvOpAtomicExchange: 3517 case SpvOpAtomicIAdd: 3518 case SpvOpAtomicISub: 3519 case SpvOpAtomicSMin: 3520 case SpvOpAtomicUMin: 3521 case SpvOpAtomicSMax: 3522 case SpvOpAtomicUMax: 3523 case SpvOpAtomicAnd: 3524 case SpvOpAtomicOr: 3525 case SpvOpAtomicXor: 3526 case SpvOpAtomicFAddEXT: 3527 case SpvOpAtomicFMinEXT: 3528 case SpvOpAtomicFMaxEXT: 3529 fill_common_atomic_sources(b, opcode, w, &intrin->src[3]); 3530 break; 3531 3532 default: 3533 vtn_fail_with_opcode("Invalid image opcode", opcode); 3534 } 3535 3536 /* Image operations implicitly have the Image storage memory semantics. */ 3537 semantics |= SpvMemorySemanticsImageMemoryMask; 3538 3539 SpvMemorySemanticsMask before_semantics; 3540 SpvMemorySemanticsMask after_semantics; 3541 vtn_split_barrier_semantics(b, semantics, &before_semantics, &after_semantics); 3542 3543 if (before_semantics) 3544 vtn_emit_memory_barrier(b, scope, before_semantics); 3545 3546 if (opcode != SpvOpImageWrite && opcode != SpvOpAtomicStore) { 3547 struct vtn_type *type = vtn_get_type(b, w[1]); 3548 struct vtn_type *struct_type = NULL; 3549 if (opcode == SpvOpImageSparseRead) { 3550 vtn_assert(glsl_type_is_struct_or_ifc(type->type)); 3551 struct_type = type; 3552 type = struct_type->members[1]; 3553 } 3554 3555 unsigned dest_components = glsl_get_vector_elements(type->type); 3556 if (opcode == SpvOpImageSparseRead) 3557 dest_components++; 3558 3559 if (nir_intrinsic_infos[op].dest_components == 0) 3560 intrin->num_components = dest_components; 3561 3562 unsigned bit_size = glsl_get_bit_size(type->type); 3563 if (opcode == SpvOpImageQuerySize || 3564 opcode == SpvOpImageQuerySizeLod) 3565 bit_size = MIN2(bit_size, 32); 3566 3567 nir_ssa_dest_init(&intrin->instr, &intrin->dest, 3568 nir_intrinsic_dest_components(intrin), 3569 bit_size, NULL); 3570 3571 nir_builder_instr_insert(&b->nb, &intrin->instr); 3572 3573 nir_ssa_def *result = nir_trim_vector(&b->nb, &intrin->dest.ssa, 3574 dest_components); 3575 3576 if (opcode == SpvOpImageQuerySize || 3577 opcode == SpvOpImageQuerySizeLod) 3578 result = nir_u2u(&b->nb, result, glsl_get_bit_size(type->type)); 3579 3580 if (opcode == SpvOpImageSparseRead) { 3581 struct vtn_ssa_value *dest = vtn_create_ssa_value(b, struct_type->type); 3582 unsigned res_type_size = glsl_get_vector_elements(type->type); 3583 dest->elems[0]->def = nir_channel(&b->nb, result, res_type_size); 3584 if (intrin->dest.ssa.bit_size != 32) 3585 dest->elems[0]->def = nir_u2u32(&b->nb, dest->elems[0]->def); 3586 dest->elems[1]->def = nir_trim_vector(&b->nb, result, res_type_size); 3587 vtn_push_ssa_value(b, w[2], dest); 3588 } else { 3589 vtn_push_nir_ssa(b, w[2], result); 3590 } 3591 3592 if (opcode == SpvOpImageRead || opcode == SpvOpImageSparseRead || 3593 opcode == SpvOpAtomicLoad) { 3594 nir_alu_type dest_type = 3595 get_image_type(b, nir_get_nir_type_for_glsl_type(type->type), operands); 3596 nir_intrinsic_set_dest_type(intrin, dest_type); 3597 } 3598 } else { 3599 nir_builder_instr_insert(&b->nb, &intrin->instr); 3600 } 3601 3602 if (after_semantics) 3603 vtn_emit_memory_barrier(b, scope, after_semantics); 3604} 3605 3606static nir_intrinsic_op 3607get_uniform_nir_atomic_op(struct vtn_builder *b, SpvOp opcode) 3608{ 3609 switch (opcode) { 3610#define OP(S, N) case SpvOp##S: return nir_intrinsic_atomic_counter_ ##N; 3611 OP(AtomicLoad, read_deref) 3612 OP(AtomicExchange, exchange) 3613 OP(AtomicCompareExchange, comp_swap) 3614 OP(AtomicCompareExchangeWeak, comp_swap) 3615 OP(AtomicIIncrement, inc_deref) 3616 OP(AtomicIDecrement, post_dec_deref) 3617 OP(AtomicIAdd, add_deref) 3618 OP(AtomicISub, add_deref) 3619 OP(AtomicUMin, min_deref) 3620 OP(AtomicUMax, max_deref) 3621 OP(AtomicAnd, and_deref) 3622 OP(AtomicOr, or_deref) 3623 OP(AtomicXor, xor_deref) 3624#undef OP 3625 default: 3626 /* We left the following out: AtomicStore, AtomicSMin and 3627 * AtomicSmax. Right now there are not nir intrinsics for them. At this 3628 * moment Atomic Counter support is needed for ARB_spirv support, so is 3629 * only need to support GLSL Atomic Counters that are uints and don't 3630 * allow direct storage. 3631 */ 3632 vtn_fail("Invalid uniform atomic"); 3633 } 3634} 3635 3636static nir_intrinsic_op 3637get_deref_nir_atomic_op(struct vtn_builder *b, SpvOp opcode) 3638{ 3639 switch (opcode) { 3640 case SpvOpAtomicLoad: return nir_intrinsic_load_deref; 3641 case SpvOpAtomicFlagClear: 3642 case SpvOpAtomicStore: return nir_intrinsic_store_deref; 3643#define OP(S, N) case SpvOp##S: return nir_intrinsic_deref_##N; 3644 OP(AtomicExchange, atomic_exchange) 3645 OP(AtomicCompareExchange, atomic_comp_swap) 3646 OP(AtomicCompareExchangeWeak, atomic_comp_swap) 3647 OP(AtomicIIncrement, atomic_add) 3648 OP(AtomicIDecrement, atomic_add) 3649 OP(AtomicIAdd, atomic_add) 3650 OP(AtomicISub, atomic_add) 3651 OP(AtomicSMin, atomic_imin) 3652 OP(AtomicUMin, atomic_umin) 3653 OP(AtomicSMax, atomic_imax) 3654 OP(AtomicUMax, atomic_umax) 3655 OP(AtomicAnd, atomic_and) 3656 OP(AtomicOr, atomic_or) 3657 OP(AtomicXor, atomic_xor) 3658 OP(AtomicFAddEXT, atomic_fadd) 3659 OP(AtomicFMinEXT, atomic_fmin) 3660 OP(AtomicFMaxEXT, atomic_fmax) 3661 OP(AtomicFlagTestAndSet, atomic_comp_swap) 3662#undef OP 3663 default: 3664 vtn_fail_with_opcode("Invalid shared atomic", opcode); 3665 } 3666} 3667 3668/* 3669 * Handles shared atomics, ssbo atomics and atomic counters. 3670 */ 3671static void 3672vtn_handle_atomics(struct vtn_builder *b, SpvOp opcode, 3673 const uint32_t *w, UNUSED unsigned count) 3674{ 3675 struct vtn_pointer *ptr; 3676 nir_intrinsic_instr *atomic; 3677 3678 SpvScope scope = SpvScopeInvocation; 3679 SpvMemorySemanticsMask semantics = 0; 3680 enum gl_access_qualifier access = 0; 3681 3682 switch (opcode) { 3683 case SpvOpAtomicLoad: 3684 case SpvOpAtomicExchange: 3685 case SpvOpAtomicCompareExchange: 3686 case SpvOpAtomicCompareExchangeWeak: 3687 case SpvOpAtomicIIncrement: 3688 case SpvOpAtomicIDecrement: 3689 case SpvOpAtomicIAdd: 3690 case SpvOpAtomicISub: 3691 case SpvOpAtomicSMin: 3692 case SpvOpAtomicUMin: 3693 case SpvOpAtomicSMax: 3694 case SpvOpAtomicUMax: 3695 case SpvOpAtomicAnd: 3696 case SpvOpAtomicOr: 3697 case SpvOpAtomicXor: 3698 case SpvOpAtomicFAddEXT: 3699 case SpvOpAtomicFMinEXT: 3700 case SpvOpAtomicFMaxEXT: 3701 case SpvOpAtomicFlagTestAndSet: 3702 ptr = vtn_pointer(b, w[3]); 3703 scope = vtn_constant_uint(b, w[4]); 3704 semantics = vtn_constant_uint(b, w[5]); 3705 break; 3706 case SpvOpAtomicFlagClear: 3707 case SpvOpAtomicStore: 3708 ptr = vtn_pointer(b, w[1]); 3709 scope = vtn_constant_uint(b, w[2]); 3710 semantics = vtn_constant_uint(b, w[3]); 3711 break; 3712 3713 default: 3714 vtn_fail_with_opcode("Invalid SPIR-V atomic", opcode); 3715 } 3716 3717 if (semantics & SpvMemorySemanticsVolatileMask) 3718 access |= ACCESS_VOLATILE; 3719 3720 /* uniform as "atomic counter uniform" */ 3721 if (ptr->mode == vtn_variable_mode_atomic_counter) { 3722 nir_deref_instr *deref = vtn_pointer_to_deref(b, ptr); 3723 nir_intrinsic_op op = get_uniform_nir_atomic_op(b, opcode); 3724 atomic = nir_intrinsic_instr_create(b->nb.shader, op); 3725 atomic->src[0] = nir_src_for_ssa(&deref->dest.ssa); 3726 3727 /* SSBO needs to initialize index/offset. In this case we don't need to, 3728 * as that info is already stored on the ptr->var->var nir_variable (see 3729 * vtn_create_variable) 3730 */ 3731 3732 switch (opcode) { 3733 case SpvOpAtomicLoad: 3734 case SpvOpAtomicExchange: 3735 case SpvOpAtomicCompareExchange: 3736 case SpvOpAtomicCompareExchangeWeak: 3737 case SpvOpAtomicIIncrement: 3738 case SpvOpAtomicIDecrement: 3739 case SpvOpAtomicIAdd: 3740 case SpvOpAtomicISub: 3741 case SpvOpAtomicSMin: 3742 case SpvOpAtomicUMin: 3743 case SpvOpAtomicSMax: 3744 case SpvOpAtomicUMax: 3745 case SpvOpAtomicAnd: 3746 case SpvOpAtomicOr: 3747 case SpvOpAtomicXor: 3748 /* Nothing: we don't need to call fill_common_atomic_sources here, as 3749 * atomic counter uniforms doesn't have sources 3750 */ 3751 break; 3752 3753 default: 3754 unreachable("Invalid SPIR-V atomic"); 3755 3756 } 3757 } else { 3758 nir_deref_instr *deref = vtn_pointer_to_deref(b, ptr); 3759 const struct glsl_type *deref_type = deref->type; 3760 nir_intrinsic_op op = get_deref_nir_atomic_op(b, opcode); 3761 atomic = nir_intrinsic_instr_create(b->nb.shader, op); 3762 atomic->src[0] = nir_src_for_ssa(&deref->dest.ssa); 3763 3764 if (ptr->mode != vtn_variable_mode_workgroup) 3765 access |= ACCESS_COHERENT; 3766 3767 nir_intrinsic_set_access(atomic, access); 3768 3769 switch (opcode) { 3770 case SpvOpAtomicLoad: 3771 atomic->num_components = glsl_get_vector_elements(deref_type); 3772 break; 3773 3774 case SpvOpAtomicStore: 3775 atomic->num_components = glsl_get_vector_elements(deref_type); 3776 nir_intrinsic_set_write_mask(atomic, (1 << atomic->num_components) - 1); 3777 atomic->src[1] = nir_src_for_ssa(vtn_get_nir_ssa(b, w[4])); 3778 break; 3779 3780 case SpvOpAtomicFlagClear: 3781 atomic->num_components = 1; 3782 nir_intrinsic_set_write_mask(atomic, 1); 3783 atomic->src[1] = nir_src_for_ssa(nir_imm_intN_t(&b->nb, 0, 32)); 3784 break; 3785 case SpvOpAtomicFlagTestAndSet: 3786 atomic->src[1] = nir_src_for_ssa(nir_imm_intN_t(&b->nb, 0, 32)); 3787 atomic->src[2] = nir_src_for_ssa(nir_imm_intN_t(&b->nb, -1, 32)); 3788 break; 3789 case SpvOpAtomicExchange: 3790 case SpvOpAtomicCompareExchange: 3791 case SpvOpAtomicCompareExchangeWeak: 3792 case SpvOpAtomicIIncrement: 3793 case SpvOpAtomicIDecrement: 3794 case SpvOpAtomicIAdd: 3795 case SpvOpAtomicISub: 3796 case SpvOpAtomicSMin: 3797 case SpvOpAtomicUMin: 3798 case SpvOpAtomicSMax: 3799 case SpvOpAtomicUMax: 3800 case SpvOpAtomicAnd: 3801 case SpvOpAtomicOr: 3802 case SpvOpAtomicXor: 3803 case SpvOpAtomicFAddEXT: 3804 case SpvOpAtomicFMinEXT: 3805 case SpvOpAtomicFMaxEXT: 3806 fill_common_atomic_sources(b, opcode, w, &atomic->src[1]); 3807 break; 3808 3809 default: 3810 vtn_fail_with_opcode("Invalid SPIR-V atomic", opcode); 3811 } 3812 } 3813 3814 /* Atomic ordering operations will implicitly apply to the atomic operation 3815 * storage class, so include that too. 3816 */ 3817 semantics |= vtn_mode_to_memory_semantics(ptr->mode); 3818 3819 SpvMemorySemanticsMask before_semantics; 3820 SpvMemorySemanticsMask after_semantics; 3821 vtn_split_barrier_semantics(b, semantics, &before_semantics, &after_semantics); 3822 3823 if (before_semantics) 3824 vtn_emit_memory_barrier(b, scope, before_semantics); 3825 3826 if (opcode != SpvOpAtomicStore && opcode != SpvOpAtomicFlagClear) { 3827 struct vtn_type *type = vtn_get_type(b, w[1]); 3828 3829 if (opcode == SpvOpAtomicFlagTestAndSet) { 3830 /* map atomic flag to a 32-bit atomic integer. */ 3831 nir_ssa_dest_init(&atomic->instr, &atomic->dest, 3832 1, 32, NULL); 3833 } else { 3834 nir_ssa_dest_init(&atomic->instr, &atomic->dest, 3835 glsl_get_vector_elements(type->type), 3836 glsl_get_bit_size(type->type), NULL); 3837 3838 vtn_push_nir_ssa(b, w[2], &atomic->dest.ssa); 3839 } 3840 } 3841 3842 nir_builder_instr_insert(&b->nb, &atomic->instr); 3843 3844 if (opcode == SpvOpAtomicFlagTestAndSet) { 3845 vtn_push_nir_ssa(b, w[2], nir_i2b1(&b->nb, &atomic->dest.ssa)); 3846 } 3847 if (after_semantics) 3848 vtn_emit_memory_barrier(b, scope, after_semantics); 3849} 3850 3851static nir_alu_instr * 3852create_vec(struct vtn_builder *b, unsigned num_components, unsigned bit_size) 3853{ 3854 nir_op op = nir_op_vec(num_components); 3855 nir_alu_instr *vec = nir_alu_instr_create(b->shader, op); 3856 nir_ssa_dest_init(&vec->instr, &vec->dest.dest, num_components, 3857 bit_size, NULL); 3858 vec->dest.write_mask = (1 << num_components) - 1; 3859 3860 return vec; 3861} 3862 3863struct vtn_ssa_value * 3864vtn_ssa_transpose(struct vtn_builder *b, struct vtn_ssa_value *src) 3865{ 3866 if (src->transposed) 3867 return src->transposed; 3868 3869 struct vtn_ssa_value *dest = 3870 vtn_create_ssa_value(b, glsl_transposed_type(src->type)); 3871 3872 for (unsigned i = 0; i < glsl_get_matrix_columns(dest->type); i++) { 3873 if (glsl_type_is_vector_or_scalar(src->type)) { 3874 dest->elems[i]->def = nir_channel(&b->nb, src->def, i); 3875 } else { 3876 unsigned cols = glsl_get_matrix_columns(src->type); 3877 nir_ssa_scalar srcs[NIR_MAX_MATRIX_COLUMNS]; 3878 for (unsigned j = 0; j < cols; j++) { 3879 srcs[j] = nir_get_ssa_scalar(src->elems[j]->def, i); 3880 } 3881 dest->elems[i]->def = nir_vec_scalars(&b->nb, srcs, cols); 3882 } 3883 } 3884 3885 dest->transposed = src; 3886 3887 return dest; 3888} 3889 3890static nir_ssa_def * 3891vtn_vector_shuffle(struct vtn_builder *b, unsigned num_components, 3892 nir_ssa_def *src0, nir_ssa_def *src1, 3893 const uint32_t *indices) 3894{ 3895 nir_alu_instr *vec = create_vec(b, num_components, src0->bit_size); 3896 3897 for (unsigned i = 0; i < num_components; i++) { 3898 uint32_t index = indices[i]; 3899 unsigned total_components = src0->num_components + src1->num_components; 3900 vtn_fail_if(index != 0xffffffff && index >= total_components, 3901 "OpVectorShuffle: All Component literals must either be " 3902 "FFFFFFFF or in [0, N - 1] (inclusive)"); 3903 3904 if (index == 0xffffffff) { 3905 vec->src[i].src = 3906 nir_src_for_ssa(nir_ssa_undef(&b->nb, 1, src0->bit_size)); 3907 } else if (index < src0->num_components) { 3908 vec->src[i].src = nir_src_for_ssa(src0); 3909 vec->src[i].swizzle[0] = index; 3910 } else { 3911 vec->src[i].src = nir_src_for_ssa(src1); 3912 vec->src[i].swizzle[0] = index - src0->num_components; 3913 } 3914 } 3915 3916 nir_builder_instr_insert(&b->nb, &vec->instr); 3917 3918 return &vec->dest.dest.ssa; 3919} 3920 3921/* 3922 * Concatentates a number of vectors/scalars together to produce a vector 3923 */ 3924static nir_ssa_def * 3925vtn_vector_construct(struct vtn_builder *b, unsigned num_components, 3926 unsigned num_srcs, nir_ssa_def **srcs) 3927{ 3928 nir_alu_instr *vec = create_vec(b, num_components, srcs[0]->bit_size); 3929 3930 /* From the SPIR-V 1.1 spec for OpCompositeConstruct: 3931 * 3932 * "When constructing a vector, there must be at least two Constituent 3933 * operands." 3934 */ 3935 vtn_assert(num_srcs >= 2); 3936 3937 unsigned dest_idx = 0; 3938 for (unsigned i = 0; i < num_srcs; i++) { 3939 nir_ssa_def *src = srcs[i]; 3940 vtn_assert(dest_idx + src->num_components <= num_components); 3941 for (unsigned j = 0; j < src->num_components; j++) { 3942 vec->src[dest_idx].src = nir_src_for_ssa(src); 3943 vec->src[dest_idx].swizzle[0] = j; 3944 dest_idx++; 3945 } 3946 } 3947 3948 /* From the SPIR-V 1.1 spec for OpCompositeConstruct: 3949 * 3950 * "When constructing a vector, the total number of components in all 3951 * the operands must equal the number of components in Result Type." 3952 */ 3953 vtn_assert(dest_idx == num_components); 3954 3955 nir_builder_instr_insert(&b->nb, &vec->instr); 3956 3957 return &vec->dest.dest.ssa; 3958} 3959 3960static struct vtn_ssa_value * 3961vtn_composite_copy(void *mem_ctx, struct vtn_ssa_value *src) 3962{ 3963 struct vtn_ssa_value *dest = rzalloc(mem_ctx, struct vtn_ssa_value); 3964 dest->type = src->type; 3965 3966 if (glsl_type_is_vector_or_scalar(src->type)) { 3967 dest->def = src->def; 3968 } else { 3969 unsigned elems = glsl_get_length(src->type); 3970 3971 dest->elems = ralloc_array(mem_ctx, struct vtn_ssa_value *, elems); 3972 for (unsigned i = 0; i < elems; i++) 3973 dest->elems[i] = vtn_composite_copy(mem_ctx, src->elems[i]); 3974 } 3975 3976 return dest; 3977} 3978 3979static struct vtn_ssa_value * 3980vtn_composite_insert(struct vtn_builder *b, struct vtn_ssa_value *src, 3981 struct vtn_ssa_value *insert, const uint32_t *indices, 3982 unsigned num_indices) 3983{ 3984 struct vtn_ssa_value *dest = vtn_composite_copy(b, src); 3985 3986 struct vtn_ssa_value *cur = dest; 3987 unsigned i; 3988 for (i = 0; i < num_indices - 1; i++) { 3989 /* If we got a vector here, that means the next index will be trying to 3990 * dereference a scalar. 3991 */ 3992 vtn_fail_if(glsl_type_is_vector_or_scalar(cur->type), 3993 "OpCompositeInsert has too many indices."); 3994 vtn_fail_if(indices[i] >= glsl_get_length(cur->type), 3995 "All indices in an OpCompositeInsert must be in-bounds"); 3996 cur = cur->elems[indices[i]]; 3997 } 3998 3999 if (glsl_type_is_vector_or_scalar(cur->type)) { 4000 vtn_fail_if(indices[i] >= glsl_get_vector_elements(cur->type), 4001 "All indices in an OpCompositeInsert must be in-bounds"); 4002 4003 /* According to the SPIR-V spec, OpCompositeInsert may work down to 4004 * the component granularity. In that case, the last index will be 4005 * the index to insert the scalar into the vector. 4006 */ 4007 4008 cur->def = nir_vector_insert_imm(&b->nb, cur->def, insert->def, indices[i]); 4009 } else { 4010 vtn_fail_if(indices[i] >= glsl_get_length(cur->type), 4011 "All indices in an OpCompositeInsert must be in-bounds"); 4012 cur->elems[indices[i]] = insert; 4013 } 4014 4015 return dest; 4016} 4017 4018static struct vtn_ssa_value * 4019vtn_composite_extract(struct vtn_builder *b, struct vtn_ssa_value *src, 4020 const uint32_t *indices, unsigned num_indices) 4021{ 4022 struct vtn_ssa_value *cur = src; 4023 for (unsigned i = 0; i < num_indices; i++) { 4024 if (glsl_type_is_vector_or_scalar(cur->type)) { 4025 vtn_assert(i == num_indices - 1); 4026 vtn_fail_if(indices[i] >= glsl_get_vector_elements(cur->type), 4027 "All indices in an OpCompositeExtract must be in-bounds"); 4028 4029 /* According to the SPIR-V spec, OpCompositeExtract may work down to 4030 * the component granularity. The last index will be the index of the 4031 * vector to extract. 4032 */ 4033 4034 const struct glsl_type *scalar_type = 4035 glsl_scalar_type(glsl_get_base_type(cur->type)); 4036 struct vtn_ssa_value *ret = vtn_create_ssa_value(b, scalar_type); 4037 ret->def = nir_channel(&b->nb, cur->def, indices[i]); 4038 return ret; 4039 } else { 4040 vtn_fail_if(indices[i] >= glsl_get_length(cur->type), 4041 "All indices in an OpCompositeExtract must be in-bounds"); 4042 cur = cur->elems[indices[i]]; 4043 } 4044 } 4045 4046 return cur; 4047} 4048 4049static void 4050vtn_handle_composite(struct vtn_builder *b, SpvOp opcode, 4051 const uint32_t *w, unsigned count) 4052{ 4053 struct vtn_type *type = vtn_get_type(b, w[1]); 4054 struct vtn_ssa_value *ssa = vtn_create_ssa_value(b, type->type); 4055 4056 switch (opcode) { 4057 case SpvOpVectorExtractDynamic: 4058 ssa->def = nir_vector_extract(&b->nb, vtn_get_nir_ssa(b, w[3]), 4059 vtn_get_nir_ssa(b, w[4])); 4060 break; 4061 4062 case SpvOpVectorInsertDynamic: 4063 ssa->def = nir_vector_insert(&b->nb, vtn_get_nir_ssa(b, w[3]), 4064 vtn_get_nir_ssa(b, w[4]), 4065 vtn_get_nir_ssa(b, w[5])); 4066 break; 4067 4068 case SpvOpVectorShuffle: 4069 ssa->def = vtn_vector_shuffle(b, glsl_get_vector_elements(type->type), 4070 vtn_get_nir_ssa(b, w[3]), 4071 vtn_get_nir_ssa(b, w[4]), 4072 w + 5); 4073 break; 4074 4075 case SpvOpCompositeConstruct: { 4076 unsigned elems = count - 3; 4077 assume(elems >= 1); 4078 if (glsl_type_is_vector_or_scalar(type->type)) { 4079 nir_ssa_def *srcs[NIR_MAX_VEC_COMPONENTS]; 4080 for (unsigned i = 0; i < elems; i++) { 4081 srcs[i] = vtn_get_nir_ssa(b, w[3 + i]); 4082 vtn_assert(glsl_get_bit_size(type->type) == srcs[i]->bit_size); 4083 } 4084 ssa->def = 4085 vtn_vector_construct(b, glsl_get_vector_elements(type->type), 4086 elems, srcs); 4087 } else { 4088 ssa->elems = ralloc_array(b, struct vtn_ssa_value *, elems); 4089 for (unsigned i = 0; i < elems; i++) 4090 ssa->elems[i] = vtn_ssa_value(b, w[3 + i]); 4091 } 4092 break; 4093 } 4094 case SpvOpCompositeExtract: 4095 ssa = vtn_composite_extract(b, vtn_ssa_value(b, w[3]), 4096 w + 4, count - 4); 4097 break; 4098 4099 case SpvOpCompositeInsert: 4100 ssa = vtn_composite_insert(b, vtn_ssa_value(b, w[4]), 4101 vtn_ssa_value(b, w[3]), 4102 w + 5, count - 5); 4103 break; 4104 4105 case SpvOpCopyLogical: 4106 ssa = vtn_composite_copy(b, vtn_ssa_value(b, w[3])); 4107 break; 4108 case SpvOpCopyObject: 4109 vtn_copy_value(b, w[3], w[2]); 4110 return; 4111 4112 default: 4113 vtn_fail_with_opcode("unknown composite operation", opcode); 4114 } 4115 4116 vtn_push_ssa_value(b, w[2], ssa); 4117} 4118 4119void 4120vtn_emit_memory_barrier(struct vtn_builder *b, SpvScope scope, 4121 SpvMemorySemanticsMask semantics) 4122{ 4123 if (b->shader->options->use_scoped_barrier) { 4124 vtn_emit_scoped_memory_barrier(b, scope, semantics); 4125 return; 4126 } 4127 4128 static const SpvMemorySemanticsMask all_memory_semantics = 4129 SpvMemorySemanticsUniformMemoryMask | 4130 SpvMemorySemanticsWorkgroupMemoryMask | 4131 SpvMemorySemanticsAtomicCounterMemoryMask | 4132 SpvMemorySemanticsImageMemoryMask | 4133 SpvMemorySemanticsOutputMemoryMask; 4134 4135 /* If we're not actually doing a memory barrier, bail */ 4136 if (!(semantics & all_memory_semantics)) 4137 return; 4138 4139 /* GL and Vulkan don't have these */ 4140 vtn_assert(scope != SpvScopeCrossDevice); 4141 4142 if (scope == SpvScopeSubgroup) 4143 return; /* Nothing to do here */ 4144 4145 if (scope == SpvScopeWorkgroup) { 4146 nir_group_memory_barrier(&b->nb); 4147 return; 4148 } 4149 4150 /* There's only three scopes left */ 4151 vtn_assert(scope == SpvScopeInvocation || scope == SpvScopeDevice || scope == SpvScopeQueueFamily); 4152 4153 /* Map the GLSL memoryBarrier() construct and any barriers with more than one 4154 * semantic to the corresponding NIR one. 4155 */ 4156 if (util_bitcount(semantics & all_memory_semantics) > 1) { 4157 nir_memory_barrier(&b->nb); 4158 if (semantics & SpvMemorySemanticsOutputMemoryMask) { 4159 /* GLSL memoryBarrier() (and the corresponding NIR one) doesn't include 4160 * TCS outputs, so we have to emit it's own intrinsic for that. We 4161 * then need to emit another memory_barrier to prevent moving 4162 * non-output operations to before the tcs_patch barrier. 4163 */ 4164 nir_memory_barrier_tcs_patch(&b->nb); 4165 nir_memory_barrier(&b->nb); 4166 } 4167 return; 4168 } 4169 4170 /* Issue a more specific barrier */ 4171 switch (semantics & all_memory_semantics) { 4172 case SpvMemorySemanticsUniformMemoryMask: 4173 nir_memory_barrier_buffer(&b->nb); 4174 break; 4175 case SpvMemorySemanticsWorkgroupMemoryMask: 4176 nir_memory_barrier_shared(&b->nb); 4177 break; 4178 case SpvMemorySemanticsAtomicCounterMemoryMask: 4179 nir_memory_barrier_atomic_counter(&b->nb); 4180 break; 4181 case SpvMemorySemanticsImageMemoryMask: 4182 nir_memory_barrier_image(&b->nb); 4183 break; 4184 case SpvMemorySemanticsOutputMemoryMask: 4185 if (b->nb.shader->info.stage == MESA_SHADER_TESS_CTRL) 4186 nir_memory_barrier_tcs_patch(&b->nb); 4187 break; 4188 default: 4189 break; 4190 } 4191} 4192 4193static void 4194vtn_handle_barrier(struct vtn_builder *b, SpvOp opcode, 4195 const uint32_t *w, UNUSED unsigned count) 4196{ 4197 switch (opcode) { 4198 case SpvOpEmitVertex: 4199 case SpvOpEmitStreamVertex: 4200 case SpvOpEndPrimitive: 4201 case SpvOpEndStreamPrimitive: { 4202 unsigned stream = 0; 4203 if (opcode == SpvOpEmitStreamVertex || opcode == SpvOpEndStreamPrimitive) 4204 stream = vtn_constant_uint(b, w[1]); 4205 4206 switch (opcode) { 4207 case SpvOpEmitStreamVertex: 4208 case SpvOpEmitVertex: 4209 nir_emit_vertex(&b->nb, stream); 4210 break; 4211 case SpvOpEndPrimitive: 4212 case SpvOpEndStreamPrimitive: 4213 nir_end_primitive(&b->nb, stream); 4214 break; 4215 default: 4216 unreachable("Invalid opcode"); 4217 } 4218 break; 4219 } 4220 4221 case SpvOpMemoryBarrier: { 4222 SpvScope scope = vtn_constant_uint(b, w[1]); 4223 SpvMemorySemanticsMask semantics = vtn_constant_uint(b, w[2]); 4224 vtn_emit_memory_barrier(b, scope, semantics); 4225 return; 4226 } 4227 4228 case SpvOpControlBarrier: { 4229 SpvScope execution_scope = vtn_constant_uint(b, w[1]); 4230 SpvScope memory_scope = vtn_constant_uint(b, w[2]); 4231 SpvMemorySemanticsMask memory_semantics = vtn_constant_uint(b, w[3]); 4232 4233 /* GLSLang, prior to commit 8297936dd6eb3, emitted OpControlBarrier with 4234 * memory semantics of None for GLSL barrier(). 4235 * And before that, prior to c3f1cdfa, emitted the OpControlBarrier with 4236 * Device instead of Workgroup for execution scope. 4237 */ 4238 if (b->wa_glslang_cs_barrier && 4239 b->nb.shader->info.stage == MESA_SHADER_COMPUTE && 4240 (execution_scope == SpvScopeWorkgroup || 4241 execution_scope == SpvScopeDevice) && 4242 memory_semantics == SpvMemorySemanticsMaskNone) { 4243 execution_scope = SpvScopeWorkgroup; 4244 memory_scope = SpvScopeWorkgroup; 4245 memory_semantics = SpvMemorySemanticsAcquireReleaseMask | 4246 SpvMemorySemanticsWorkgroupMemoryMask; 4247 } 4248 4249 /* From the SPIR-V spec: 4250 * 4251 * "When used with the TessellationControl execution model, it also 4252 * implicitly synchronizes the Output Storage Class: Writes to Output 4253 * variables performed by any invocation executed prior to a 4254 * OpControlBarrier will be visible to any other invocation after 4255 * return from that OpControlBarrier." 4256 * 4257 * The same applies to VK_NV_mesh_shader. 4258 */ 4259 if (b->nb.shader->info.stage == MESA_SHADER_TESS_CTRL || 4260 b->nb.shader->info.stage == MESA_SHADER_TASK || 4261 b->nb.shader->info.stage == MESA_SHADER_MESH) { 4262 memory_semantics &= ~(SpvMemorySemanticsAcquireMask | 4263 SpvMemorySemanticsReleaseMask | 4264 SpvMemorySemanticsAcquireReleaseMask | 4265 SpvMemorySemanticsSequentiallyConsistentMask); 4266 memory_semantics |= SpvMemorySemanticsAcquireReleaseMask | 4267 SpvMemorySemanticsOutputMemoryMask; 4268 } 4269 4270 if (b->shader->options->use_scoped_barrier) { 4271 vtn_emit_scoped_control_barrier(b, execution_scope, memory_scope, 4272 memory_semantics); 4273 } else { 4274 vtn_emit_memory_barrier(b, memory_scope, memory_semantics); 4275 4276 if (execution_scope == SpvScopeWorkgroup) 4277 nir_control_barrier(&b->nb); 4278 } 4279 break; 4280 } 4281 4282 default: 4283 unreachable("unknown barrier instruction"); 4284 } 4285} 4286 4287static enum tess_primitive_mode 4288tess_primitive_mode_from_spv_execution_mode(struct vtn_builder *b, 4289 SpvExecutionMode mode) 4290{ 4291 switch (mode) { 4292 case SpvExecutionModeTriangles: 4293 return TESS_PRIMITIVE_TRIANGLES; 4294 case SpvExecutionModeQuads: 4295 return TESS_PRIMITIVE_QUADS; 4296 case SpvExecutionModeIsolines: 4297 return TESS_PRIMITIVE_ISOLINES; 4298 default: 4299 vtn_fail("Invalid tess primitive type: %s (%u)", 4300 spirv_executionmode_to_string(mode), mode); 4301 } 4302} 4303 4304static enum shader_prim 4305primitive_from_spv_execution_mode(struct vtn_builder *b, 4306 SpvExecutionMode mode) 4307{ 4308 switch (mode) { 4309 case SpvExecutionModeInputPoints: 4310 case SpvExecutionModeOutputPoints: 4311 return SHADER_PRIM_POINTS; 4312 case SpvExecutionModeInputLines: 4313 case SpvExecutionModeOutputLinesNV: 4314 return SHADER_PRIM_LINES; 4315 case SpvExecutionModeInputLinesAdjacency: 4316 return SHADER_PRIM_LINES_ADJACENCY; 4317 case SpvExecutionModeTriangles: 4318 case SpvExecutionModeOutputTrianglesNV: 4319 return SHADER_PRIM_TRIANGLES; 4320 case SpvExecutionModeInputTrianglesAdjacency: 4321 return SHADER_PRIM_TRIANGLES_ADJACENCY; 4322 case SpvExecutionModeQuads: 4323 return SHADER_PRIM_QUADS; 4324 case SpvExecutionModeOutputLineStrip: 4325 return SHADER_PRIM_LINE_STRIP; 4326 case SpvExecutionModeOutputTriangleStrip: 4327 return SHADER_PRIM_TRIANGLE_STRIP; 4328 default: 4329 vtn_fail("Invalid primitive type: %s (%u)", 4330 spirv_executionmode_to_string(mode), mode); 4331 } 4332} 4333 4334static unsigned 4335vertices_in_from_spv_execution_mode(struct vtn_builder *b, 4336 SpvExecutionMode mode) 4337{ 4338 switch (mode) { 4339 case SpvExecutionModeInputPoints: 4340 return 1; 4341 case SpvExecutionModeInputLines: 4342 return 2; 4343 case SpvExecutionModeInputLinesAdjacency: 4344 return 4; 4345 case SpvExecutionModeTriangles: 4346 return 3; 4347 case SpvExecutionModeInputTrianglesAdjacency: 4348 return 6; 4349 default: 4350 vtn_fail("Invalid GS input mode: %s (%u)", 4351 spirv_executionmode_to_string(mode), mode); 4352 } 4353} 4354 4355static gl_shader_stage 4356stage_for_execution_model(struct vtn_builder *b, SpvExecutionModel model) 4357{ 4358 switch (model) { 4359 case SpvExecutionModelVertex: 4360 return MESA_SHADER_VERTEX; 4361 case SpvExecutionModelTessellationControl: 4362 return MESA_SHADER_TESS_CTRL; 4363 case SpvExecutionModelTessellationEvaluation: 4364 return MESA_SHADER_TESS_EVAL; 4365 case SpvExecutionModelGeometry: 4366 return MESA_SHADER_GEOMETRY; 4367 case SpvExecutionModelFragment: 4368 return MESA_SHADER_FRAGMENT; 4369 case SpvExecutionModelGLCompute: 4370 return MESA_SHADER_COMPUTE; 4371 case SpvExecutionModelKernel: 4372 return MESA_SHADER_KERNEL; 4373 case SpvExecutionModelRayGenerationKHR: 4374 return MESA_SHADER_RAYGEN; 4375 case SpvExecutionModelAnyHitKHR: 4376 return MESA_SHADER_ANY_HIT; 4377 case SpvExecutionModelClosestHitKHR: 4378 return MESA_SHADER_CLOSEST_HIT; 4379 case SpvExecutionModelMissKHR: 4380 return MESA_SHADER_MISS; 4381 case SpvExecutionModelIntersectionKHR: 4382 return MESA_SHADER_INTERSECTION; 4383 case SpvExecutionModelCallableKHR: 4384 return MESA_SHADER_CALLABLE; 4385 case SpvExecutionModelTaskNV: 4386 return MESA_SHADER_TASK; 4387 case SpvExecutionModelMeshNV: 4388 return MESA_SHADER_MESH; 4389 default: 4390 vtn_fail("Unsupported execution model: %s (%u)", 4391 spirv_executionmodel_to_string(model), model); 4392 } 4393} 4394 4395#define spv_check_supported(name, cap) do { \ 4396 if (!(b->options && b->options->caps.name)) \ 4397 vtn_warn("Unsupported SPIR-V capability: %s (%u)", \ 4398 spirv_capability_to_string(cap), cap); \ 4399 } while(0) 4400 4401 4402void 4403vtn_handle_entry_point(struct vtn_builder *b, const uint32_t *w, 4404 unsigned count) 4405{ 4406 struct vtn_value *entry_point = &b->values[w[2]]; 4407 /* Let this be a name label regardless */ 4408 unsigned name_words; 4409 entry_point->name = vtn_string_literal(b, &w[3], count - 3, &name_words); 4410 4411 if (strcmp(entry_point->name, b->entry_point_name) != 0 || 4412 stage_for_execution_model(b, w[1]) != b->entry_point_stage) 4413 return; 4414 4415 vtn_assert(b->entry_point == NULL); 4416 b->entry_point = entry_point; 4417 4418 /* Entry points enumerate which global variables are used. */ 4419 size_t start = 3 + name_words; 4420 b->interface_ids_count = count - start; 4421 b->interface_ids = ralloc_array(b, uint32_t, b->interface_ids_count); 4422 memcpy(b->interface_ids, &w[start], b->interface_ids_count * 4); 4423 qsort(b->interface_ids, b->interface_ids_count, 4, cmp_uint32_t); 4424} 4425 4426static bool 4427vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, 4428 const uint32_t *w, unsigned count) 4429{ 4430 switch (opcode) { 4431 case SpvOpSource: { 4432 const char *lang; 4433 switch (w[1]) { 4434 default: 4435 case SpvSourceLanguageUnknown: lang = "unknown"; break; 4436 case SpvSourceLanguageESSL: lang = "ESSL"; break; 4437 case SpvSourceLanguageGLSL: lang = "GLSL"; break; 4438 case SpvSourceLanguageOpenCL_C: lang = "OpenCL C"; break; 4439 case SpvSourceLanguageOpenCL_CPP: lang = "OpenCL C++"; break; 4440 case SpvSourceLanguageHLSL: lang = "HLSL"; break; 4441 } 4442 4443 uint32_t version = w[2]; 4444 4445 const char *file = 4446 (count > 3) ? vtn_value(b, w[3], vtn_value_type_string)->str : ""; 4447 4448 vtn_info("Parsing SPIR-V from %s %u source file %s", lang, version, file); 4449 4450 b->source_lang = w[1]; 4451 break; 4452 } 4453 4454 case SpvOpSourceExtension: 4455 case SpvOpSourceContinued: 4456 case SpvOpExtension: 4457 case SpvOpModuleProcessed: 4458 /* Unhandled, but these are for debug so that's ok. */ 4459 break; 4460 4461 case SpvOpCapability: { 4462 SpvCapability cap = w[1]; 4463 switch (cap) { 4464 case SpvCapabilityMatrix: 4465 case SpvCapabilityShader: 4466 case SpvCapabilityGeometry: 4467 case SpvCapabilityGeometryPointSize: 4468 case SpvCapabilityUniformBufferArrayDynamicIndexing: 4469 case SpvCapabilitySampledImageArrayDynamicIndexing: 4470 case SpvCapabilityStorageBufferArrayDynamicIndexing: 4471 case SpvCapabilityStorageImageArrayDynamicIndexing: 4472 case SpvCapabilityImageRect: 4473 case SpvCapabilitySampledRect: 4474 case SpvCapabilitySampled1D: 4475 case SpvCapabilityImage1D: 4476 case SpvCapabilitySampledCubeArray: 4477 case SpvCapabilityImageCubeArray: 4478 case SpvCapabilitySampledBuffer: 4479 case SpvCapabilityImageBuffer: 4480 case SpvCapabilityImageQuery: 4481 case SpvCapabilityDerivativeControl: 4482 case SpvCapabilityInterpolationFunction: 4483 case SpvCapabilityMultiViewport: 4484 case SpvCapabilitySampleRateShading: 4485 case SpvCapabilityClipDistance: 4486 case SpvCapabilityCullDistance: 4487 case SpvCapabilityInputAttachment: 4488 case SpvCapabilityImageGatherExtended: 4489 case SpvCapabilityStorageImageExtendedFormats: 4490 case SpvCapabilityVector16: 4491 case SpvCapabilityDotProduct: 4492 case SpvCapabilityDotProductInputAll: 4493 case SpvCapabilityDotProductInput4x8Bit: 4494 case SpvCapabilityDotProductInput4x8BitPacked: 4495 break; 4496 4497 case SpvCapabilityLinkage: 4498 if (!b->options->create_library) 4499 vtn_warn("Unsupported SPIR-V capability: %s", 4500 spirv_capability_to_string(cap)); 4501 spv_check_supported(linkage, cap); 4502 vtn_warn("The SPIR-V Linkage capability is not fully supported"); 4503 break; 4504 4505 case SpvCapabilitySparseResidency: 4506 spv_check_supported(sparse_residency, cap); 4507 break; 4508 4509 case SpvCapabilityMinLod: 4510 spv_check_supported(min_lod, cap); 4511 break; 4512 4513 case SpvCapabilityAtomicStorage: 4514 spv_check_supported(atomic_storage, cap); 4515 break; 4516 4517 case SpvCapabilityFloat64: 4518 spv_check_supported(float64, cap); 4519 break; 4520 case SpvCapabilityInt64: 4521 spv_check_supported(int64, cap); 4522 break; 4523 case SpvCapabilityInt16: 4524 spv_check_supported(int16, cap); 4525 break; 4526 case SpvCapabilityInt8: 4527 spv_check_supported(int8, cap); 4528 break; 4529 4530 case SpvCapabilityTransformFeedback: 4531 spv_check_supported(transform_feedback, cap); 4532 break; 4533 4534 case SpvCapabilityGeometryStreams: 4535 spv_check_supported(geometry_streams, cap); 4536 break; 4537 4538 case SpvCapabilityInt64Atomics: 4539 spv_check_supported(int64_atomics, cap); 4540 break; 4541 4542 case SpvCapabilityStorageImageMultisample: 4543 spv_check_supported(storage_image_ms, cap); 4544 break; 4545 4546 case SpvCapabilityAddresses: 4547 spv_check_supported(address, cap); 4548 break; 4549 4550 case SpvCapabilityKernel: 4551 case SpvCapabilityFloat16Buffer: 4552 spv_check_supported(kernel, cap); 4553 break; 4554 4555 case SpvCapabilityGenericPointer: 4556 spv_check_supported(generic_pointers, cap); 4557 break; 4558 4559 case SpvCapabilityImageBasic: 4560 spv_check_supported(kernel_image, cap); 4561 break; 4562 4563 case SpvCapabilityImageReadWrite: 4564 spv_check_supported(kernel_image_read_write, cap); 4565 break; 4566 4567 case SpvCapabilityLiteralSampler: 4568 spv_check_supported(literal_sampler, cap); 4569 break; 4570 4571 case SpvCapabilityImageMipmap: 4572 case SpvCapabilityPipes: 4573 case SpvCapabilityDeviceEnqueue: 4574 vtn_warn("Unsupported OpenCL-style SPIR-V capability: %s", 4575 spirv_capability_to_string(cap)); 4576 break; 4577 4578 case SpvCapabilityImageMSArray: 4579 spv_check_supported(image_ms_array, cap); 4580 break; 4581 4582 case SpvCapabilityTessellation: 4583 case SpvCapabilityTessellationPointSize: 4584 spv_check_supported(tessellation, cap); 4585 break; 4586 4587 case SpvCapabilityDrawParameters: 4588 spv_check_supported(draw_parameters, cap); 4589 break; 4590 4591 case SpvCapabilityStorageImageReadWithoutFormat: 4592 spv_check_supported(image_read_without_format, cap); 4593 break; 4594 4595 case SpvCapabilityStorageImageWriteWithoutFormat: 4596 spv_check_supported(image_write_without_format, cap); 4597 break; 4598 4599 case SpvCapabilityDeviceGroup: 4600 spv_check_supported(device_group, cap); 4601 break; 4602 4603 case SpvCapabilityMultiView: 4604 spv_check_supported(multiview, cap); 4605 break; 4606 4607 case SpvCapabilityGroupNonUniform: 4608 spv_check_supported(subgroup_basic, cap); 4609 break; 4610 4611 case SpvCapabilitySubgroupVoteKHR: 4612 case SpvCapabilityGroupNonUniformVote: 4613 spv_check_supported(subgroup_vote, cap); 4614 break; 4615 4616 case SpvCapabilitySubgroupBallotKHR: 4617 case SpvCapabilityGroupNonUniformBallot: 4618 spv_check_supported(subgroup_ballot, cap); 4619 break; 4620 4621 case SpvCapabilityGroupNonUniformShuffle: 4622 case SpvCapabilityGroupNonUniformShuffleRelative: 4623 spv_check_supported(subgroup_shuffle, cap); 4624 break; 4625 4626 case SpvCapabilityGroupNonUniformQuad: 4627 spv_check_supported(subgroup_quad, cap); 4628 break; 4629 4630 case SpvCapabilityGroupNonUniformArithmetic: 4631 case SpvCapabilityGroupNonUniformClustered: 4632 spv_check_supported(subgroup_arithmetic, cap); 4633 break; 4634 4635 case SpvCapabilityGroups: 4636 spv_check_supported(groups, cap); 4637 break; 4638 4639 case SpvCapabilitySubgroupDispatch: 4640 spv_check_supported(subgroup_dispatch, cap); 4641 /* Missing : 4642 * - SpvOpGetKernelLocalSizeForSubgroupCount 4643 * - SpvOpGetKernelMaxNumSubgroups 4644 * - SpvExecutionModeSubgroupsPerWorkgroup 4645 * - SpvExecutionModeSubgroupsPerWorkgroupId 4646 */ 4647 vtn_warn("Not fully supported capability: %s", 4648 spirv_capability_to_string(cap)); 4649 break; 4650 4651 case SpvCapabilityVariablePointersStorageBuffer: 4652 case SpvCapabilityVariablePointers: 4653 spv_check_supported(variable_pointers, cap); 4654 b->variable_pointers = true; 4655 break; 4656 4657 case SpvCapabilityStorageUniformBufferBlock16: 4658 case SpvCapabilityStorageUniform16: 4659 case SpvCapabilityStoragePushConstant16: 4660 case SpvCapabilityStorageInputOutput16: 4661 spv_check_supported(storage_16bit, cap); 4662 break; 4663 4664 case SpvCapabilityShaderLayer: 4665 case SpvCapabilityShaderViewportIndex: 4666 case SpvCapabilityShaderViewportIndexLayerEXT: 4667 spv_check_supported(shader_viewport_index_layer, cap); 4668 break; 4669 4670 case SpvCapabilityStorageBuffer8BitAccess: 4671 case SpvCapabilityUniformAndStorageBuffer8BitAccess: 4672 case SpvCapabilityStoragePushConstant8: 4673 spv_check_supported(storage_8bit, cap); 4674 break; 4675 4676 case SpvCapabilityShaderNonUniformEXT: 4677 spv_check_supported(descriptor_indexing, cap); 4678 break; 4679 4680 case SpvCapabilityInputAttachmentArrayDynamicIndexingEXT: 4681 case SpvCapabilityUniformTexelBufferArrayDynamicIndexingEXT: 4682 case SpvCapabilityStorageTexelBufferArrayDynamicIndexingEXT: 4683 spv_check_supported(descriptor_array_dynamic_indexing, cap); 4684 break; 4685 4686 case SpvCapabilityUniformBufferArrayNonUniformIndexingEXT: 4687 case SpvCapabilitySampledImageArrayNonUniformIndexingEXT: 4688 case SpvCapabilityStorageBufferArrayNonUniformIndexingEXT: 4689 case SpvCapabilityStorageImageArrayNonUniformIndexingEXT: 4690 case SpvCapabilityInputAttachmentArrayNonUniformIndexingEXT: 4691 case SpvCapabilityUniformTexelBufferArrayNonUniformIndexingEXT: 4692 case SpvCapabilityStorageTexelBufferArrayNonUniformIndexingEXT: 4693 spv_check_supported(descriptor_array_non_uniform_indexing, cap); 4694 break; 4695 4696 case SpvCapabilityRuntimeDescriptorArrayEXT: 4697 spv_check_supported(runtime_descriptor_array, cap); 4698 break; 4699 4700 case SpvCapabilityStencilExportEXT: 4701 spv_check_supported(stencil_export, cap); 4702 break; 4703 4704 case SpvCapabilitySampleMaskPostDepthCoverage: 4705 spv_check_supported(post_depth_coverage, cap); 4706 break; 4707 4708 case SpvCapabilityDenormFlushToZero: 4709 case SpvCapabilityDenormPreserve: 4710 case SpvCapabilitySignedZeroInfNanPreserve: 4711 case SpvCapabilityRoundingModeRTE: 4712 case SpvCapabilityRoundingModeRTZ: 4713 spv_check_supported(float_controls, cap); 4714 break; 4715 4716 case SpvCapabilityPhysicalStorageBufferAddresses: 4717 spv_check_supported(physical_storage_buffer_address, cap); 4718 break; 4719 4720 case SpvCapabilityComputeDerivativeGroupQuadsNV: 4721 case SpvCapabilityComputeDerivativeGroupLinearNV: 4722 spv_check_supported(derivative_group, cap); 4723 break; 4724 4725 case SpvCapabilityFloat16: 4726 spv_check_supported(float16, cap); 4727 break; 4728 4729 case SpvCapabilityFragmentShaderSampleInterlockEXT: 4730 spv_check_supported(fragment_shader_sample_interlock, cap); 4731 break; 4732 4733 case SpvCapabilityFragmentShaderPixelInterlockEXT: 4734 spv_check_supported(fragment_shader_pixel_interlock, cap); 4735 break; 4736 4737 case SpvCapabilityDemoteToHelperInvocation: 4738 spv_check_supported(demote_to_helper_invocation, cap); 4739 b->uses_demote_to_helper_invocation = true; 4740 break; 4741 4742 case SpvCapabilityShaderClockKHR: 4743 spv_check_supported(shader_clock, cap); 4744 break; 4745 4746 case SpvCapabilityVulkanMemoryModel: 4747 spv_check_supported(vk_memory_model, cap); 4748 break; 4749 4750 case SpvCapabilityVulkanMemoryModelDeviceScope: 4751 spv_check_supported(vk_memory_model_device_scope, cap); 4752 break; 4753 4754 case SpvCapabilityImageReadWriteLodAMD: 4755 spv_check_supported(amd_image_read_write_lod, cap); 4756 break; 4757 4758 case SpvCapabilityIntegerFunctions2INTEL: 4759 spv_check_supported(integer_functions2, cap); 4760 break; 4761 4762 case SpvCapabilityFragmentMaskAMD: 4763 spv_check_supported(amd_fragment_mask, cap); 4764 break; 4765 4766 case SpvCapabilityImageGatherBiasLodAMD: 4767 spv_check_supported(amd_image_gather_bias_lod, cap); 4768 break; 4769 4770 case SpvCapabilityAtomicFloat16AddEXT: 4771 spv_check_supported(float16_atomic_add, cap); 4772 break; 4773 4774 case SpvCapabilityAtomicFloat32AddEXT: 4775 spv_check_supported(float32_atomic_add, cap); 4776 break; 4777 4778 case SpvCapabilityAtomicFloat64AddEXT: 4779 spv_check_supported(float64_atomic_add, cap); 4780 break; 4781 4782 case SpvCapabilitySubgroupShuffleINTEL: 4783 spv_check_supported(intel_subgroup_shuffle, cap); 4784 break; 4785 4786 case SpvCapabilitySubgroupBufferBlockIOINTEL: 4787 spv_check_supported(intel_subgroup_buffer_block_io, cap); 4788 break; 4789 4790 case SpvCapabilityRayCullMaskKHR: 4791 spv_check_supported(ray_cull_mask, cap); 4792 break; 4793 4794 case SpvCapabilityRayTracingKHR: 4795 spv_check_supported(ray_tracing, cap); 4796 break; 4797 4798 case SpvCapabilityRayQueryKHR: 4799 spv_check_supported(ray_query, cap); 4800 break; 4801 4802 case SpvCapabilityRayTraversalPrimitiveCullingKHR: 4803 spv_check_supported(ray_traversal_primitive_culling, cap); 4804 break; 4805 4806 case SpvCapabilityInt64ImageEXT: 4807 spv_check_supported(image_atomic_int64, cap); 4808 break; 4809 4810 case SpvCapabilityFragmentShadingRateKHR: 4811 spv_check_supported(fragment_shading_rate, cap); 4812 break; 4813 4814 case SpvCapabilityWorkgroupMemoryExplicitLayoutKHR: 4815 spv_check_supported(workgroup_memory_explicit_layout, cap); 4816 break; 4817 4818 case SpvCapabilityWorkgroupMemoryExplicitLayout8BitAccessKHR: 4819 spv_check_supported(workgroup_memory_explicit_layout, cap); 4820 spv_check_supported(storage_8bit, cap); 4821 break; 4822 4823 case SpvCapabilityWorkgroupMemoryExplicitLayout16BitAccessKHR: 4824 spv_check_supported(workgroup_memory_explicit_layout, cap); 4825 spv_check_supported(storage_16bit, cap); 4826 break; 4827 4828 case SpvCapabilityAtomicFloat16MinMaxEXT: 4829 spv_check_supported(float16_atomic_min_max, cap); 4830 break; 4831 4832 case SpvCapabilityAtomicFloat32MinMaxEXT: 4833 spv_check_supported(float32_atomic_min_max, cap); 4834 break; 4835 4836 case SpvCapabilityAtomicFloat64MinMaxEXT: 4837 spv_check_supported(float64_atomic_min_max, cap); 4838 break; 4839 4840 case SpvCapabilityMeshShadingNV: 4841 spv_check_supported(mesh_shading_nv, cap); 4842 break; 4843 4844 case SpvCapabilityPerViewAttributesNV: 4845 spv_check_supported(per_view_attributes_nv, cap); 4846 break; 4847 4848 case SpvCapabilityShaderViewportMaskNV: 4849 spv_check_supported(shader_viewport_mask_nv, cap); 4850 break; 4851 4852 default: 4853 vtn_fail("Unhandled capability: %s (%u)", 4854 spirv_capability_to_string(cap), cap); 4855 } 4856 break; 4857 } 4858 4859 case SpvOpExtInstImport: 4860 vtn_handle_extension(b, opcode, w, count); 4861 break; 4862 4863 case SpvOpMemoryModel: 4864 switch (w[1]) { 4865 case SpvAddressingModelPhysical32: 4866 vtn_fail_if(b->shader->info.stage != MESA_SHADER_KERNEL, 4867 "AddressingModelPhysical32 only supported for kernels"); 4868 b->shader->info.cs.ptr_size = 32; 4869 b->physical_ptrs = true; 4870 assert(nir_address_format_bit_size(b->options->global_addr_format) == 32); 4871 assert(nir_address_format_num_components(b->options->global_addr_format) == 1); 4872 assert(nir_address_format_bit_size(b->options->shared_addr_format) == 32); 4873 assert(nir_address_format_num_components(b->options->shared_addr_format) == 1); 4874 assert(nir_address_format_bit_size(b->options->constant_addr_format) == 32); 4875 assert(nir_address_format_num_components(b->options->constant_addr_format) == 1); 4876 break; 4877 case SpvAddressingModelPhysical64: 4878 vtn_fail_if(b->shader->info.stage != MESA_SHADER_KERNEL, 4879 "AddressingModelPhysical64 only supported for kernels"); 4880 b->shader->info.cs.ptr_size = 64; 4881 b->physical_ptrs = true; 4882 assert(nir_address_format_bit_size(b->options->global_addr_format) == 64); 4883 assert(nir_address_format_num_components(b->options->global_addr_format) == 1); 4884 assert(nir_address_format_bit_size(b->options->shared_addr_format) == 64); 4885 assert(nir_address_format_num_components(b->options->shared_addr_format) == 1); 4886 assert(nir_address_format_bit_size(b->options->constant_addr_format) == 64); 4887 assert(nir_address_format_num_components(b->options->constant_addr_format) == 1); 4888 break; 4889 case SpvAddressingModelLogical: 4890 vtn_fail_if(b->shader->info.stage == MESA_SHADER_KERNEL, 4891 "AddressingModelLogical only supported for shaders"); 4892 b->physical_ptrs = false; 4893 break; 4894 case SpvAddressingModelPhysicalStorageBuffer64: 4895 vtn_fail_if(!b->options || 4896 !b->options->caps.physical_storage_buffer_address, 4897 "AddressingModelPhysicalStorageBuffer64 not supported"); 4898 break; 4899 default: 4900 vtn_fail("Unknown addressing model: %s (%u)", 4901 spirv_addressingmodel_to_string(w[1]), w[1]); 4902 break; 4903 } 4904 4905 b->mem_model = w[2]; 4906 switch (w[2]) { 4907 case SpvMemoryModelSimple: 4908 case SpvMemoryModelGLSL450: 4909 case SpvMemoryModelOpenCL: 4910 break; 4911 case SpvMemoryModelVulkan: 4912 vtn_fail_if(!b->options->caps.vk_memory_model, 4913 "Vulkan memory model is unsupported by this driver"); 4914 break; 4915 default: 4916 vtn_fail("Unsupported memory model: %s", 4917 spirv_memorymodel_to_string(w[2])); 4918 break; 4919 } 4920 break; 4921 4922 case SpvOpEntryPoint: 4923 vtn_handle_entry_point(b, w, count); 4924 break; 4925 4926 case SpvOpString: 4927 vtn_push_value(b, w[1], vtn_value_type_string)->str = 4928 vtn_string_literal(b, &w[2], count - 2, NULL); 4929 break; 4930 4931 case SpvOpName: 4932 b->values[w[1]].name = vtn_string_literal(b, &w[2], count - 2, NULL); 4933 break; 4934 4935 case SpvOpMemberName: 4936 case SpvOpExecutionMode: 4937 case SpvOpExecutionModeId: 4938 case SpvOpDecorationGroup: 4939 case SpvOpDecorate: 4940 case SpvOpDecorateId: 4941 case SpvOpMemberDecorate: 4942 case SpvOpGroupDecorate: 4943 case SpvOpGroupMemberDecorate: 4944 case SpvOpDecorateString: 4945 case SpvOpMemberDecorateString: 4946 vtn_handle_decoration(b, opcode, w, count); 4947 break; 4948 4949 case SpvOpExtInst: { 4950 struct vtn_value *val = vtn_value(b, w[3], vtn_value_type_extension); 4951 if (val->ext_handler == vtn_handle_non_semantic_instruction) { 4952 /* NonSemantic extended instructions are acceptable in preamble. */ 4953 vtn_handle_non_semantic_instruction(b, w[4], w, count); 4954 return true; 4955 } else { 4956 return false; /* End of preamble. */ 4957 } 4958 } 4959 4960 default: 4961 return false; /* End of preamble */ 4962 } 4963 4964 return true; 4965} 4966 4967static void 4968vtn_handle_execution_mode(struct vtn_builder *b, struct vtn_value *entry_point, 4969 const struct vtn_decoration *mode, UNUSED void *data) 4970{ 4971 vtn_assert(b->entry_point == entry_point); 4972 4973 switch(mode->exec_mode) { 4974 case SpvExecutionModeOriginUpperLeft: 4975 case SpvExecutionModeOriginLowerLeft: 4976 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT); 4977 b->shader->info.fs.origin_upper_left = 4978 (mode->exec_mode == SpvExecutionModeOriginUpperLeft); 4979 break; 4980 4981 case SpvExecutionModeEarlyFragmentTests: 4982 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT); 4983 b->shader->info.fs.early_fragment_tests = true; 4984 break; 4985 4986 case SpvExecutionModePostDepthCoverage: 4987 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT); 4988 b->shader->info.fs.post_depth_coverage = true; 4989 break; 4990 4991 case SpvExecutionModeInvocations: 4992 vtn_assert(b->shader->info.stage == MESA_SHADER_GEOMETRY); 4993 b->shader->info.gs.invocations = MAX2(1, mode->operands[0]); 4994 break; 4995 4996 case SpvExecutionModeDepthReplacing: 4997 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT); 4998 b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_ANY; 4999 break; 5000 case SpvExecutionModeDepthGreater: 5001 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT); 5002 b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_GREATER; 5003 break; 5004 case SpvExecutionModeDepthLess: 5005 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT); 5006 b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_LESS; 5007 break; 5008 case SpvExecutionModeDepthUnchanged: 5009 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT); 5010 b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_UNCHANGED; 5011 break; 5012 5013 case SpvExecutionModeLocalSizeHint: 5014 vtn_assert(b->shader->info.stage == MESA_SHADER_KERNEL); 5015 b->shader->info.cs.workgroup_size_hint[0] = mode->operands[0]; 5016 b->shader->info.cs.workgroup_size_hint[1] = mode->operands[1]; 5017 b->shader->info.cs.workgroup_size_hint[2] = mode->operands[2]; 5018 break; 5019 5020 case SpvExecutionModeLocalSize: 5021 if (gl_shader_stage_uses_workgroup(b->shader->info.stage)) { 5022 b->shader->info.workgroup_size[0] = mode->operands[0]; 5023 b->shader->info.workgroup_size[1] = mode->operands[1]; 5024 b->shader->info.workgroup_size[2] = mode->operands[2]; 5025 } else { 5026 vtn_fail("Execution mode LocalSize not supported in stage %s", 5027 _mesa_shader_stage_to_string(b->shader->info.stage)); 5028 } 5029 break; 5030 5031 case SpvExecutionModeOutputVertices: 5032 switch (b->shader->info.stage) { 5033 case MESA_SHADER_TESS_CTRL: 5034 case MESA_SHADER_TESS_EVAL: 5035 b->shader->info.tess.tcs_vertices_out = mode->operands[0]; 5036 break; 5037 case MESA_SHADER_GEOMETRY: 5038 b->shader->info.gs.vertices_out = mode->operands[0]; 5039 break; 5040 case MESA_SHADER_MESH: 5041 b->shader->info.mesh.max_vertices_out = mode->operands[0]; 5042 break; 5043 default: 5044 vtn_fail("Execution mode OutputVertices not supported in stage %s", 5045 _mesa_shader_stage_to_string(b->shader->info.stage)); 5046 break; 5047 } 5048 break; 5049 5050 case SpvExecutionModeInputPoints: 5051 case SpvExecutionModeInputLines: 5052 case SpvExecutionModeInputLinesAdjacency: 5053 case SpvExecutionModeTriangles: 5054 case SpvExecutionModeInputTrianglesAdjacency: 5055 case SpvExecutionModeQuads: 5056 case SpvExecutionModeIsolines: 5057 if (b->shader->info.stage == MESA_SHADER_TESS_CTRL || 5058 b->shader->info.stage == MESA_SHADER_TESS_EVAL) { 5059 b->shader->info.tess._primitive_mode = 5060 tess_primitive_mode_from_spv_execution_mode(b, mode->exec_mode); 5061 } else { 5062 vtn_assert(b->shader->info.stage == MESA_SHADER_GEOMETRY); 5063 b->shader->info.gs.vertices_in = 5064 vertices_in_from_spv_execution_mode(b, mode->exec_mode); 5065 b->shader->info.gs.input_primitive = 5066 primitive_from_spv_execution_mode(b, mode->exec_mode); 5067 } 5068 break; 5069 5070 case SpvExecutionModeOutputPrimitivesNV: 5071 vtn_assert(b->shader->info.stage == MESA_SHADER_MESH); 5072 b->shader->info.mesh.max_primitives_out = mode->operands[0]; 5073 break; 5074 5075 case SpvExecutionModeOutputLinesNV: 5076 case SpvExecutionModeOutputTrianglesNV: 5077 vtn_assert(b->shader->info.stage == MESA_SHADER_MESH); 5078 b->shader->info.mesh.primitive_type = 5079 primitive_from_spv_execution_mode(b, mode->exec_mode); 5080 break; 5081 5082 case SpvExecutionModeOutputPoints: { 5083 const unsigned primitive = 5084 primitive_from_spv_execution_mode(b, mode->exec_mode); 5085 5086 switch (b->shader->info.stage) { 5087 case MESA_SHADER_GEOMETRY: 5088 b->shader->info.gs.output_primitive = primitive; 5089 break; 5090 case MESA_SHADER_MESH: 5091 b->shader->info.mesh.primitive_type = primitive; 5092 break; 5093 default: 5094 vtn_fail("Execution mode OutputPoints not supported in stage %s", 5095 _mesa_shader_stage_to_string(b->shader->info.stage)); 5096 break; 5097 } 5098 break; 5099 } 5100 5101 case SpvExecutionModeOutputLineStrip: 5102 case SpvExecutionModeOutputTriangleStrip: 5103 vtn_assert(b->shader->info.stage == MESA_SHADER_GEOMETRY); 5104 b->shader->info.gs.output_primitive = 5105 primitive_from_spv_execution_mode(b, mode->exec_mode); 5106 break; 5107 5108 case SpvExecutionModeSpacingEqual: 5109 vtn_assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL || 5110 b->shader->info.stage == MESA_SHADER_TESS_EVAL); 5111 b->shader->info.tess.spacing = TESS_SPACING_EQUAL; 5112 break; 5113 case SpvExecutionModeSpacingFractionalEven: 5114 vtn_assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL || 5115 b->shader->info.stage == MESA_SHADER_TESS_EVAL); 5116 b->shader->info.tess.spacing = TESS_SPACING_FRACTIONAL_EVEN; 5117 break; 5118 case SpvExecutionModeSpacingFractionalOdd: 5119 vtn_assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL || 5120 b->shader->info.stage == MESA_SHADER_TESS_EVAL); 5121 b->shader->info.tess.spacing = TESS_SPACING_FRACTIONAL_ODD; 5122 break; 5123 case SpvExecutionModeVertexOrderCw: 5124 vtn_assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL || 5125 b->shader->info.stage == MESA_SHADER_TESS_EVAL); 5126 b->shader->info.tess.ccw = false; 5127 break; 5128 case SpvExecutionModeVertexOrderCcw: 5129 vtn_assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL || 5130 b->shader->info.stage == MESA_SHADER_TESS_EVAL); 5131 b->shader->info.tess.ccw = true; 5132 break; 5133 case SpvExecutionModePointMode: 5134 vtn_assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL || 5135 b->shader->info.stage == MESA_SHADER_TESS_EVAL); 5136 b->shader->info.tess.point_mode = true; 5137 break; 5138 5139 case SpvExecutionModePixelCenterInteger: 5140 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT); 5141 b->shader->info.fs.pixel_center_integer = true; 5142 break; 5143 5144 case SpvExecutionModeXfb: 5145 b->shader->info.has_transform_feedback_varyings = true; 5146 break; 5147 5148 case SpvExecutionModeVecTypeHint: 5149 break; /* OpenCL */ 5150 5151 case SpvExecutionModeContractionOff: 5152 if (b->shader->info.stage != MESA_SHADER_KERNEL) 5153 vtn_warn("ExectionMode only allowed for CL-style kernels: %s", 5154 spirv_executionmode_to_string(mode->exec_mode)); 5155 else 5156 b->exact = true; 5157 break; 5158 5159 case SpvExecutionModeStencilRefReplacingEXT: 5160 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT); 5161 break; 5162 5163 case SpvExecutionModeDerivativeGroupQuadsNV: 5164 vtn_assert(b->shader->info.stage == MESA_SHADER_COMPUTE); 5165 b->shader->info.cs.derivative_group = DERIVATIVE_GROUP_QUADS; 5166 break; 5167 5168 case SpvExecutionModeDerivativeGroupLinearNV: 5169 vtn_assert(b->shader->info.stage == MESA_SHADER_COMPUTE); 5170 b->shader->info.cs.derivative_group = DERIVATIVE_GROUP_LINEAR; 5171 break; 5172 5173 case SpvExecutionModePixelInterlockOrderedEXT: 5174 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT); 5175 b->shader->info.fs.pixel_interlock_ordered = true; 5176 break; 5177 5178 case SpvExecutionModePixelInterlockUnorderedEXT: 5179 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT); 5180 b->shader->info.fs.pixel_interlock_unordered = true; 5181 break; 5182 5183 case SpvExecutionModeSampleInterlockOrderedEXT: 5184 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT); 5185 b->shader->info.fs.sample_interlock_ordered = true; 5186 break; 5187 5188 case SpvExecutionModeSampleInterlockUnorderedEXT: 5189 vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT); 5190 b->shader->info.fs.sample_interlock_unordered = true; 5191 break; 5192 5193 case SpvExecutionModeDenormPreserve: 5194 case SpvExecutionModeDenormFlushToZero: 5195 case SpvExecutionModeSignedZeroInfNanPreserve: 5196 case SpvExecutionModeRoundingModeRTE: 5197 case SpvExecutionModeRoundingModeRTZ: { 5198 unsigned execution_mode = 0; 5199 switch (mode->exec_mode) { 5200 case SpvExecutionModeDenormPreserve: 5201 switch (mode->operands[0]) { 5202 case 16: execution_mode = FLOAT_CONTROLS_DENORM_PRESERVE_FP16; break; 5203 case 32: execution_mode = FLOAT_CONTROLS_DENORM_PRESERVE_FP32; break; 5204 case 64: execution_mode = FLOAT_CONTROLS_DENORM_PRESERVE_FP64; break; 5205 default: vtn_fail("Floating point type not supported"); 5206 } 5207 break; 5208 case SpvExecutionModeDenormFlushToZero: 5209 switch (mode->operands[0]) { 5210 case 16: execution_mode = FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP16; break; 5211 case 32: execution_mode = FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32; break; 5212 case 64: execution_mode = FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP64; break; 5213 default: vtn_fail("Floating point type not supported"); 5214 } 5215 break; 5216 case SpvExecutionModeSignedZeroInfNanPreserve: 5217 switch (mode->operands[0]) { 5218 case 16: execution_mode = FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP16; break; 5219 case 32: execution_mode = FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP32; break; 5220 case 64: execution_mode = FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP64; break; 5221 default: vtn_fail("Floating point type not supported"); 5222 } 5223 break; 5224 case SpvExecutionModeRoundingModeRTE: 5225 switch (mode->operands[0]) { 5226 case 16: execution_mode = FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP16; break; 5227 case 32: execution_mode = FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP32; break; 5228 case 64: execution_mode = FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP64; break; 5229 default: vtn_fail("Floating point type not supported"); 5230 } 5231 break; 5232 case SpvExecutionModeRoundingModeRTZ: 5233 switch (mode->operands[0]) { 5234 case 16: execution_mode = FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP16; break; 5235 case 32: execution_mode = FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP32; break; 5236 case 64: execution_mode = FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP64; break; 5237 default: vtn_fail("Floating point type not supported"); 5238 } 5239 break; 5240 default: 5241 break; 5242 } 5243 5244 b->shader->info.float_controls_execution_mode |= execution_mode; 5245 5246 for (unsigned bit_size = 16; bit_size <= 64; bit_size *= 2) { 5247 vtn_fail_if(nir_is_denorm_flush_to_zero(b->shader->info.float_controls_execution_mode, bit_size) && 5248 nir_is_denorm_preserve(b->shader->info.float_controls_execution_mode, bit_size), 5249 "Cannot flush to zero and preserve denorms for the same bit size."); 5250 vtn_fail_if(nir_is_rounding_mode_rtne(b->shader->info.float_controls_execution_mode, bit_size) && 5251 nir_is_rounding_mode_rtz(b->shader->info.float_controls_execution_mode, bit_size), 5252 "Cannot set rounding mode to RTNE and RTZ for the same bit size."); 5253 } 5254 break; 5255 } 5256 5257 case SpvExecutionModeLocalSizeId: 5258 case SpvExecutionModeLocalSizeHintId: 5259 /* Handled later by vtn_handle_execution_mode_id(). */ 5260 break; 5261 5262 case SpvExecutionModeSubgroupSize: 5263 vtn_assert(b->shader->info.stage == MESA_SHADER_KERNEL); 5264 vtn_assert(b->shader->info.subgroup_size == SUBGROUP_SIZE_VARYING); 5265 b->shader->info.subgroup_size = mode->operands[0]; 5266 break; 5267 5268 case SpvExecutionModeSubgroupUniformControlFlowKHR: 5269 /* There's no corresponding SPIR-V capability, so check here. */ 5270 vtn_fail_if(!b->options->caps.subgroup_uniform_control_flow, 5271 "SpvExecutionModeSubgroupUniformControlFlowKHR not supported."); 5272 break; 5273 5274 default: 5275 vtn_fail("Unhandled execution mode: %s (%u)", 5276 spirv_executionmode_to_string(mode->exec_mode), 5277 mode->exec_mode); 5278 } 5279} 5280 5281static void 5282vtn_handle_execution_mode_id(struct vtn_builder *b, struct vtn_value *entry_point, 5283 const struct vtn_decoration *mode, UNUSED void *data) 5284{ 5285 5286 vtn_assert(b->entry_point == entry_point); 5287 5288 switch (mode->exec_mode) { 5289 case SpvExecutionModeLocalSizeId: 5290 if (gl_shader_stage_uses_workgroup(b->shader->info.stage)) { 5291 b->shader->info.workgroup_size[0] = vtn_constant_uint(b, mode->operands[0]); 5292 b->shader->info.workgroup_size[1] = vtn_constant_uint(b, mode->operands[1]); 5293 b->shader->info.workgroup_size[2] = vtn_constant_uint(b, mode->operands[2]); 5294 } else { 5295 vtn_fail("Execution mode LocalSizeId not supported in stage %s", 5296 _mesa_shader_stage_to_string(b->shader->info.stage)); 5297 } 5298 break; 5299 5300 case SpvExecutionModeLocalSizeHintId: 5301 vtn_assert(b->shader->info.stage == MESA_SHADER_KERNEL); 5302 b->shader->info.cs.workgroup_size_hint[0] = vtn_constant_uint(b, mode->operands[0]); 5303 b->shader->info.cs.workgroup_size_hint[1] = vtn_constant_uint(b, mode->operands[1]); 5304 b->shader->info.cs.workgroup_size_hint[2] = vtn_constant_uint(b, mode->operands[2]); 5305 break; 5306 5307 default: 5308 /* Nothing to do. Literal execution modes already handled by 5309 * vtn_handle_execution_mode(). */ 5310 break; 5311 } 5312} 5313 5314static bool 5315vtn_handle_variable_or_type_instruction(struct vtn_builder *b, SpvOp opcode, 5316 const uint32_t *w, unsigned count) 5317{ 5318 vtn_set_instruction_result_type(b, opcode, w, count); 5319 5320 switch (opcode) { 5321 case SpvOpSource: 5322 case SpvOpSourceContinued: 5323 case SpvOpSourceExtension: 5324 case SpvOpExtension: 5325 case SpvOpCapability: 5326 case SpvOpExtInstImport: 5327 case SpvOpMemoryModel: 5328 case SpvOpEntryPoint: 5329 case SpvOpExecutionMode: 5330 case SpvOpString: 5331 case SpvOpName: 5332 case SpvOpMemberName: 5333 case SpvOpDecorationGroup: 5334 case SpvOpDecorate: 5335 case SpvOpDecorateId: 5336 case SpvOpMemberDecorate: 5337 case SpvOpGroupDecorate: 5338 case SpvOpGroupMemberDecorate: 5339 case SpvOpDecorateString: 5340 case SpvOpMemberDecorateString: 5341 vtn_fail("Invalid opcode types and variables section"); 5342 break; 5343 5344 case SpvOpTypeVoid: 5345 case SpvOpTypeBool: 5346 case SpvOpTypeInt: 5347 case SpvOpTypeFloat: 5348 case SpvOpTypeVector: 5349 case SpvOpTypeMatrix: 5350 case SpvOpTypeImage: 5351 case SpvOpTypeSampler: 5352 case SpvOpTypeSampledImage: 5353 case SpvOpTypeArray: 5354 case SpvOpTypeRuntimeArray: 5355 case SpvOpTypeStruct: 5356 case SpvOpTypeOpaque: 5357 case SpvOpTypePointer: 5358 case SpvOpTypeForwardPointer: 5359 case SpvOpTypeFunction: 5360 case SpvOpTypeEvent: 5361 case SpvOpTypeDeviceEvent: 5362 case SpvOpTypeReserveId: 5363 case SpvOpTypeQueue: 5364 case SpvOpTypePipe: 5365 case SpvOpTypeAccelerationStructureKHR: 5366 case SpvOpTypeRayQueryKHR: 5367 vtn_handle_type(b, opcode, w, count); 5368 break; 5369 5370 case SpvOpConstantTrue: 5371 case SpvOpConstantFalse: 5372 case SpvOpConstant: 5373 case SpvOpConstantComposite: 5374 case SpvOpConstantNull: 5375 case SpvOpSpecConstantTrue: 5376 case SpvOpSpecConstantFalse: 5377 case SpvOpSpecConstant: 5378 case SpvOpSpecConstantComposite: 5379 case SpvOpSpecConstantOp: 5380 vtn_handle_constant(b, opcode, w, count); 5381 break; 5382 5383 case SpvOpUndef: 5384 case SpvOpVariable: 5385 case SpvOpConstantSampler: 5386 vtn_handle_variables(b, opcode, w, count); 5387 break; 5388 5389 case SpvOpExtInst: { 5390 struct vtn_value *val = vtn_value(b, w[3], vtn_value_type_extension); 5391 /* NonSemantic extended instructions are acceptable in preamble, others 5392 * will indicate the end of preamble. 5393 */ 5394 return val->ext_handler == vtn_handle_non_semantic_instruction; 5395 } 5396 5397 default: 5398 return false; /* End of preamble */ 5399 } 5400 5401 return true; 5402} 5403 5404static struct vtn_ssa_value * 5405vtn_nir_select(struct vtn_builder *b, struct vtn_ssa_value *src0, 5406 struct vtn_ssa_value *src1, struct vtn_ssa_value *src2) 5407{ 5408 struct vtn_ssa_value *dest = rzalloc(b, struct vtn_ssa_value); 5409 dest->type = src1->type; 5410 5411 if (glsl_type_is_vector_or_scalar(src1->type)) { 5412 dest->def = nir_bcsel(&b->nb, src0->def, src1->def, src2->def); 5413 } else { 5414 unsigned elems = glsl_get_length(src1->type); 5415 5416 dest->elems = ralloc_array(b, struct vtn_ssa_value *, elems); 5417 for (unsigned i = 0; i < elems; i++) { 5418 dest->elems[i] = vtn_nir_select(b, src0, 5419 src1->elems[i], src2->elems[i]); 5420 } 5421 } 5422 5423 return dest; 5424} 5425 5426static void 5427vtn_handle_select(struct vtn_builder *b, SpvOp opcode, 5428 const uint32_t *w, unsigned count) 5429{ 5430 /* Handle OpSelect up-front here because it needs to be able to handle 5431 * pointers and not just regular vectors and scalars. 5432 */ 5433 struct vtn_value *res_val = vtn_untyped_value(b, w[2]); 5434 struct vtn_value *cond_val = vtn_untyped_value(b, w[3]); 5435 struct vtn_value *obj1_val = vtn_untyped_value(b, w[4]); 5436 struct vtn_value *obj2_val = vtn_untyped_value(b, w[5]); 5437 5438 vtn_fail_if(obj1_val->type != res_val->type || 5439 obj2_val->type != res_val->type, 5440 "Object types must match the result type in OpSelect"); 5441 5442 vtn_fail_if((cond_val->type->base_type != vtn_base_type_scalar && 5443 cond_val->type->base_type != vtn_base_type_vector) || 5444 !glsl_type_is_boolean(cond_val->type->type), 5445 "OpSelect must have either a vector of booleans or " 5446 "a boolean as Condition type"); 5447 5448 vtn_fail_if(cond_val->type->base_type == vtn_base_type_vector && 5449 (res_val->type->base_type != vtn_base_type_vector || 5450 res_val->type->length != cond_val->type->length), 5451 "When Condition type in OpSelect is a vector, the Result " 5452 "type must be a vector of the same length"); 5453 5454 switch (res_val->type->base_type) { 5455 case vtn_base_type_scalar: 5456 case vtn_base_type_vector: 5457 case vtn_base_type_matrix: 5458 case vtn_base_type_array: 5459 case vtn_base_type_struct: 5460 /* OK. */ 5461 break; 5462 case vtn_base_type_pointer: 5463 /* We need to have actual storage for pointer types. */ 5464 vtn_fail_if(res_val->type->type == NULL, 5465 "Invalid pointer result type for OpSelect"); 5466 break; 5467 default: 5468 vtn_fail("Result type of OpSelect must be a scalar, composite, or pointer"); 5469 } 5470 5471 vtn_push_ssa_value(b, w[2], 5472 vtn_nir_select(b, vtn_ssa_value(b, w[3]), 5473 vtn_ssa_value(b, w[4]), 5474 vtn_ssa_value(b, w[5]))); 5475} 5476 5477static void 5478vtn_handle_ptr(struct vtn_builder *b, SpvOp opcode, 5479 const uint32_t *w, unsigned count) 5480{ 5481 struct vtn_type *type1 = vtn_get_value_type(b, w[3]); 5482 struct vtn_type *type2 = vtn_get_value_type(b, w[4]); 5483 vtn_fail_if(type1->base_type != vtn_base_type_pointer || 5484 type2->base_type != vtn_base_type_pointer, 5485 "%s operands must have pointer types", 5486 spirv_op_to_string(opcode)); 5487 vtn_fail_if(type1->storage_class != type2->storage_class, 5488 "%s operands must have the same storage class", 5489 spirv_op_to_string(opcode)); 5490 5491 struct vtn_type *vtn_type = vtn_get_type(b, w[1]); 5492 const struct glsl_type *type = vtn_type->type; 5493 5494 nir_address_format addr_format = vtn_mode_to_address_format( 5495 b, vtn_storage_class_to_mode(b, type1->storage_class, NULL, NULL)); 5496 5497 nir_ssa_def *def; 5498 5499 switch (opcode) { 5500 case SpvOpPtrDiff: { 5501 /* OpPtrDiff returns the difference in number of elements (not byte offset). */ 5502 unsigned elem_size, elem_align; 5503 glsl_get_natural_size_align_bytes(type1->deref->type, 5504 &elem_size, &elem_align); 5505 5506 def = nir_build_addr_isub(&b->nb, 5507 vtn_get_nir_ssa(b, w[3]), 5508 vtn_get_nir_ssa(b, w[4]), 5509 addr_format); 5510 def = nir_idiv(&b->nb, def, nir_imm_intN_t(&b->nb, elem_size, def->bit_size)); 5511 def = nir_i2i(&b->nb, def, glsl_get_bit_size(type)); 5512 break; 5513 } 5514 5515 case SpvOpPtrEqual: 5516 case SpvOpPtrNotEqual: { 5517 def = nir_build_addr_ieq(&b->nb, 5518 vtn_get_nir_ssa(b, w[3]), 5519 vtn_get_nir_ssa(b, w[4]), 5520 addr_format); 5521 if (opcode == SpvOpPtrNotEqual) 5522 def = nir_inot(&b->nb, def); 5523 break; 5524 } 5525 5526 default: 5527 unreachable("Invalid ptr operation"); 5528 } 5529 5530 vtn_push_nir_ssa(b, w[2], def); 5531} 5532 5533static void 5534vtn_handle_ray_intrinsic(struct vtn_builder *b, SpvOp opcode, 5535 const uint32_t *w, unsigned count) 5536{ 5537 nir_intrinsic_instr *intrin; 5538 5539 switch (opcode) { 5540 case SpvOpTraceNV: 5541 case SpvOpTraceRayKHR: { 5542 intrin = nir_intrinsic_instr_create(b->nb.shader, 5543 nir_intrinsic_trace_ray); 5544 5545 /* The sources are in the same order in the NIR intrinsic */ 5546 for (unsigned i = 0; i < 10; i++) 5547 intrin->src[i] = nir_src_for_ssa(vtn_ssa_value(b, w[i + 1])->def); 5548 5549 nir_deref_instr *payload; 5550 if (opcode == SpvOpTraceNV) 5551 payload = vtn_get_call_payload_for_location(b, w[11]); 5552 else 5553 payload = vtn_nir_deref(b, w[11]); 5554 intrin->src[10] = nir_src_for_ssa(&payload->dest.ssa); 5555 nir_builder_instr_insert(&b->nb, &intrin->instr); 5556 break; 5557 } 5558 5559 case SpvOpReportIntersectionKHR: { 5560 intrin = nir_intrinsic_instr_create(b->nb.shader, 5561 nir_intrinsic_report_ray_intersection); 5562 intrin->src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[3])->def); 5563 intrin->src[1] = nir_src_for_ssa(vtn_ssa_value(b, w[4])->def); 5564 nir_ssa_dest_init(&intrin->instr, &intrin->dest, 1, 1, NULL); 5565 nir_builder_instr_insert(&b->nb, &intrin->instr); 5566 vtn_push_nir_ssa(b, w[2], &intrin->dest.ssa); 5567 break; 5568 } 5569 5570 case SpvOpIgnoreIntersectionNV: 5571 intrin = nir_intrinsic_instr_create(b->nb.shader, 5572 nir_intrinsic_ignore_ray_intersection); 5573 nir_builder_instr_insert(&b->nb, &intrin->instr); 5574 break; 5575 5576 case SpvOpTerminateRayNV: 5577 intrin = nir_intrinsic_instr_create(b->nb.shader, 5578 nir_intrinsic_terminate_ray); 5579 nir_builder_instr_insert(&b->nb, &intrin->instr); 5580 break; 5581 5582 case SpvOpExecuteCallableNV: 5583 case SpvOpExecuteCallableKHR: { 5584 intrin = nir_intrinsic_instr_create(b->nb.shader, 5585 nir_intrinsic_execute_callable); 5586 intrin->src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[1])->def); 5587 nir_deref_instr *payload; 5588 if (opcode == SpvOpExecuteCallableNV) 5589 payload = vtn_get_call_payload_for_location(b, w[2]); 5590 else 5591 payload = vtn_nir_deref(b, w[2]); 5592 intrin->src[1] = nir_src_for_ssa(&payload->dest.ssa); 5593 nir_builder_instr_insert(&b->nb, &intrin->instr); 5594 break; 5595 } 5596 5597 default: 5598 vtn_fail_with_opcode("Unhandled opcode", opcode); 5599 } 5600} 5601 5602static void 5603vtn_handle_write_packed_primitive_indices(struct vtn_builder *b, SpvOp opcode, 5604 const uint32_t *w, unsigned count) 5605{ 5606 vtn_assert(opcode == SpvOpWritePackedPrimitiveIndices4x8NV); 5607 5608 /* TODO(mesh): Use or create a primitive that allow the unpacking to 5609 * happen in the backend. What we have here is functional but too 5610 * blunt. 5611 */ 5612 5613 struct vtn_type *offset_type = vtn_get_value_type(b, w[1]); 5614 vtn_fail_if(offset_type->base_type != vtn_base_type_scalar || 5615 offset_type->type != glsl_uint_type(), 5616 "Index Offset type of OpWritePackedPrimitiveIndices4x8NV " 5617 "must be an OpTypeInt with 32-bit Width and 0 Signedness."); 5618 5619 struct vtn_type *packed_type = vtn_get_value_type(b, w[2]); 5620 vtn_fail_if(packed_type->base_type != vtn_base_type_scalar || 5621 packed_type->type != glsl_uint_type(), 5622 "Packed Indices type of OpWritePackedPrimitiveIndices4x8NV " 5623 "must be an OpTypeInt with 32-bit Width and 0 Signedness."); 5624 5625 nir_deref_instr *indices = NULL; 5626 nir_foreach_variable_with_modes(var, b->nb.shader, nir_var_shader_out) { 5627 if (var->data.location == VARYING_SLOT_PRIMITIVE_INDICES) { 5628 indices = nir_build_deref_var(&b->nb, var); 5629 break; 5630 } 5631 } 5632 5633 /* It may be the case that the variable is not present in the 5634 * entry point interface list. 5635 * 5636 * See https://github.com/KhronosGroup/SPIRV-Registry/issues/104. 5637 */ 5638 5639 if (!indices) { 5640 unsigned vertices_per_prim = 5641 num_mesh_vertices_per_primitive(b->shader->info.mesh.primitive_type); 5642 unsigned max_prim_indices = 5643 vertices_per_prim * b->shader->info.mesh.max_primitives_out; 5644 const struct glsl_type *t = 5645 glsl_array_type(glsl_uint_type(), max_prim_indices, 0); 5646 nir_variable *var = 5647 nir_variable_create(b->shader, nir_var_shader_out, t, 5648 "gl_PrimitiveIndicesNV"); 5649 5650 var->data.location = VARYING_SLOT_PRIMITIVE_INDICES; 5651 var->data.interpolation = INTERP_MODE_NONE; 5652 indices = nir_build_deref_var(&b->nb, var); 5653 } 5654 5655 nir_ssa_def *offset = vtn_get_nir_ssa(b, w[1]); 5656 nir_ssa_def *packed = vtn_get_nir_ssa(b, w[2]); 5657 nir_ssa_def *unpacked = nir_unpack_bits(&b->nb, packed, 8); 5658 for (int i = 0; i < 4; i++) { 5659 nir_deref_instr *offset_deref = 5660 nir_build_deref_array(&b->nb, indices, 5661 nir_iadd_imm(&b->nb, offset, i)); 5662 nir_ssa_def *val = nir_u2u32(&b->nb, nir_channel(&b->nb, unpacked, i)); 5663 5664 nir_store_deref(&b->nb, offset_deref, val, 0x1); 5665 } 5666} 5667 5668struct ray_query_value { 5669 nir_ray_query_value nir_value; 5670 const struct glsl_type *glsl_type; 5671}; 5672 5673static struct ray_query_value 5674spirv_to_nir_type_ray_query_intrinsic(struct vtn_builder *b, 5675 SpvOp opcode) 5676{ 5677 switch (opcode) { 5678#define CASE(_spv, _nir, _type) case SpvOpRayQueryGet##_spv: \ 5679 return (struct ray_query_value) { .nir_value = nir_ray_query_value_##_nir, .glsl_type = _type } 5680 CASE(RayTMinKHR, tmin, glsl_floatN_t_type(32)); 5681 CASE(RayFlagsKHR, flags, glsl_uint_type()); 5682 CASE(WorldRayDirectionKHR, world_ray_direction, glsl_vec_type(3)); 5683 CASE(WorldRayOriginKHR, world_ray_origin, glsl_vec_type(3)); 5684 CASE(IntersectionTypeKHR, intersection_type, glsl_uint_type()); 5685 CASE(IntersectionTKHR, intersection_t, glsl_floatN_t_type(32)); 5686 CASE(IntersectionInstanceCustomIndexKHR, intersection_instance_custom_index, glsl_int_type()); 5687 CASE(IntersectionInstanceIdKHR, intersection_instance_id, glsl_int_type()); 5688 CASE(IntersectionInstanceShaderBindingTableRecordOffsetKHR, intersection_instance_sbt_index, glsl_uint_type()); 5689 CASE(IntersectionGeometryIndexKHR, intersection_geometry_index, glsl_int_type()); 5690 CASE(IntersectionPrimitiveIndexKHR, intersection_primitive_index, glsl_int_type()); 5691 CASE(IntersectionBarycentricsKHR, intersection_barycentrics, glsl_vec_type(2)); 5692 CASE(IntersectionFrontFaceKHR, intersection_front_face, glsl_bool_type()); 5693 CASE(IntersectionCandidateAABBOpaqueKHR, intersection_candidate_aabb_opaque, glsl_bool_type()); 5694 CASE(IntersectionObjectToWorldKHR, intersection_object_to_world, glsl_matrix_type(glsl_get_base_type(glsl_float_type()), 3, 4)); 5695 CASE(IntersectionWorldToObjectKHR, intersection_world_to_object, glsl_matrix_type(glsl_get_base_type(glsl_float_type()), 3, 4)); 5696 CASE(IntersectionObjectRayOriginKHR, intersection_object_ray_origin, glsl_vec_type(3)); 5697 CASE(IntersectionObjectRayDirectionKHR, intersection_object_ray_direction, glsl_vec_type(3)); 5698#undef CASE 5699 default: 5700 vtn_fail_with_opcode("Unhandled opcode", opcode); 5701 } 5702} 5703 5704static void 5705ray_query_load_intrinsic_create(struct vtn_builder *b, SpvOp opcode, 5706 const uint32_t *w, nir_ssa_def *src0, 5707 nir_ssa_def *src1) 5708{ 5709 struct ray_query_value value = 5710 spirv_to_nir_type_ray_query_intrinsic(b, opcode); 5711 5712 if (glsl_type_is_matrix(value.glsl_type)) { 5713 const struct glsl_type *elem_type = glsl_get_array_element(value.glsl_type); 5714 const unsigned elems = glsl_get_length(value.glsl_type); 5715 5716 struct vtn_ssa_value *ssa = vtn_create_ssa_value(b, value.glsl_type); 5717 for (unsigned i = 0; i < elems; i++) { 5718 ssa->elems[i]->def = 5719 nir_build_rq_load(&b->nb, 5720 glsl_get_vector_elements(elem_type), 5721 glsl_get_bit_size(elem_type), 5722 src0, src1, 5723 .base = value.nir_value, 5724 .column = i); 5725 } 5726 5727 vtn_push_ssa_value(b, w[2], ssa); 5728 } else { 5729 assert(glsl_type_is_vector_or_scalar(value.glsl_type)); 5730 5731 vtn_push_nir_ssa(b, w[2], 5732 nir_rq_load(&b->nb, 5733 glsl_get_vector_elements(value.glsl_type), 5734 glsl_get_bit_size(value.glsl_type), 5735 src0, src1, 5736 .base = value.nir_value)); 5737 } 5738} 5739 5740static void 5741vtn_handle_ray_query_intrinsic(struct vtn_builder *b, SpvOp opcode, 5742 const uint32_t *w, unsigned count) 5743{ 5744 switch (opcode) { 5745 case SpvOpRayQueryInitializeKHR: { 5746 nir_intrinsic_instr *intrin = 5747 nir_intrinsic_instr_create(b->nb.shader, 5748 nir_intrinsic_rq_initialize); 5749 /* The sources are in the same order in the NIR intrinsic */ 5750 for (unsigned i = 0; i < 8; i++) 5751 intrin->src[i] = nir_src_for_ssa(vtn_ssa_value(b, w[i + 1])->def); 5752 nir_builder_instr_insert(&b->nb, &intrin->instr); 5753 break; 5754 } 5755 5756 case SpvOpRayQueryTerminateKHR: 5757 nir_rq_terminate(&b->nb, vtn_ssa_value(b, w[1])->def); 5758 break; 5759 5760 case SpvOpRayQueryProceedKHR: 5761 vtn_push_nir_ssa(b, w[2], 5762 nir_rq_proceed(&b->nb, 1, vtn_ssa_value(b, w[3])->def)); 5763 break; 5764 5765 case SpvOpRayQueryGenerateIntersectionKHR: 5766 nir_rq_generate_intersection(&b->nb, 5767 vtn_ssa_value(b, w[1])->def, 5768 vtn_ssa_value(b, w[2])->def); 5769 break; 5770 5771 case SpvOpRayQueryConfirmIntersectionKHR: 5772 nir_rq_confirm_intersection(&b->nb, vtn_ssa_value(b, w[1])->def); 5773 break; 5774 5775 case SpvOpRayQueryGetIntersectionTKHR: 5776 case SpvOpRayQueryGetIntersectionTypeKHR: 5777 case SpvOpRayQueryGetIntersectionInstanceCustomIndexKHR: 5778 case SpvOpRayQueryGetIntersectionInstanceIdKHR: 5779 case SpvOpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR: 5780 case SpvOpRayQueryGetIntersectionGeometryIndexKHR: 5781 case SpvOpRayQueryGetIntersectionPrimitiveIndexKHR: 5782 case SpvOpRayQueryGetIntersectionBarycentricsKHR: 5783 case SpvOpRayQueryGetIntersectionFrontFaceKHR: 5784 case SpvOpRayQueryGetIntersectionObjectRayDirectionKHR: 5785 case SpvOpRayQueryGetIntersectionObjectRayOriginKHR: 5786 case SpvOpRayQueryGetIntersectionObjectToWorldKHR: 5787 case SpvOpRayQueryGetIntersectionWorldToObjectKHR: 5788 ray_query_load_intrinsic_create(b, opcode, w, 5789 vtn_ssa_value(b, w[3])->def, 5790 nir_i2b1(&b->nb, vtn_ssa_value(b, w[4])->def)); 5791 break; 5792 5793 case SpvOpRayQueryGetRayTMinKHR: 5794 case SpvOpRayQueryGetRayFlagsKHR: 5795 case SpvOpRayQueryGetWorldRayDirectionKHR: 5796 case SpvOpRayQueryGetWorldRayOriginKHR: 5797 case SpvOpRayQueryGetIntersectionCandidateAABBOpaqueKHR: 5798 ray_query_load_intrinsic_create(b, opcode, w, 5799 vtn_ssa_value(b, w[3])->def, 5800 /* Committed value is ignored for these */ 5801 nir_imm_bool(&b->nb, false)); 5802 break; 5803 5804 default: 5805 vtn_fail_with_opcode("Unhandled opcode", opcode); 5806 } 5807} 5808 5809static bool 5810vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, 5811 const uint32_t *w, unsigned count) 5812{ 5813 switch (opcode) { 5814 case SpvOpLabel: 5815 break; 5816 5817 case SpvOpLoopMerge: 5818 case SpvOpSelectionMerge: 5819 /* This is handled by cfg pre-pass and walk_blocks */ 5820 break; 5821 5822 case SpvOpUndef: { 5823 struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_undef); 5824 val->type = vtn_get_type(b, w[1]); 5825 break; 5826 } 5827 5828 case SpvOpExtInst: 5829 vtn_handle_extension(b, opcode, w, count); 5830 break; 5831 5832 case SpvOpVariable: 5833 case SpvOpLoad: 5834 case SpvOpStore: 5835 case SpvOpCopyMemory: 5836 case SpvOpCopyMemorySized: 5837 case SpvOpAccessChain: 5838 case SpvOpPtrAccessChain: 5839 case SpvOpInBoundsAccessChain: 5840 case SpvOpInBoundsPtrAccessChain: 5841 case SpvOpArrayLength: 5842 case SpvOpConvertPtrToU: 5843 case SpvOpConvertUToPtr: 5844 case SpvOpGenericCastToPtrExplicit: 5845 case SpvOpGenericPtrMemSemantics: 5846 case SpvOpSubgroupBlockReadINTEL: 5847 case SpvOpSubgroupBlockWriteINTEL: 5848 case SpvOpConvertUToAccelerationStructureKHR: 5849 vtn_handle_variables(b, opcode, w, count); 5850 break; 5851 5852 case SpvOpFunctionCall: 5853 vtn_handle_function_call(b, opcode, w, count); 5854 break; 5855 5856 case SpvOpSampledImage: 5857 case SpvOpImage: 5858 case SpvOpImageSparseTexelsResident: 5859 case SpvOpImageSampleImplicitLod: 5860 case SpvOpImageSparseSampleImplicitLod: 5861 case SpvOpImageSampleExplicitLod: 5862 case SpvOpImageSparseSampleExplicitLod: 5863 case SpvOpImageSampleDrefImplicitLod: 5864 case SpvOpImageSparseSampleDrefImplicitLod: 5865 case SpvOpImageSampleDrefExplicitLod: 5866 case SpvOpImageSparseSampleDrefExplicitLod: 5867 case SpvOpImageSampleProjImplicitLod: 5868 case SpvOpImageSampleProjExplicitLod: 5869 case SpvOpImageSampleProjDrefImplicitLod: 5870 case SpvOpImageSampleProjDrefExplicitLod: 5871 case SpvOpImageFetch: 5872 case SpvOpImageSparseFetch: 5873 case SpvOpImageGather: 5874 case SpvOpImageSparseGather: 5875 case SpvOpImageDrefGather: 5876 case SpvOpImageSparseDrefGather: 5877 case SpvOpImageQueryLod: 5878 case SpvOpImageQueryLevels: 5879 vtn_handle_texture(b, opcode, w, count); 5880 break; 5881 5882 case SpvOpImageRead: 5883 case SpvOpImageSparseRead: 5884 case SpvOpImageWrite: 5885 case SpvOpImageTexelPointer: 5886 case SpvOpImageQueryFormat: 5887 case SpvOpImageQueryOrder: 5888 vtn_handle_image(b, opcode, w, count); 5889 break; 5890 5891 case SpvOpImageQuerySamples: 5892 case SpvOpImageQuerySizeLod: 5893 case SpvOpImageQuerySize: { 5894 struct vtn_type *image_type = vtn_get_value_type(b, w[3]); 5895 vtn_assert(image_type->base_type == vtn_base_type_image); 5896 if (glsl_type_is_image(image_type->glsl_image)) { 5897 vtn_handle_image(b, opcode, w, count); 5898 } else { 5899 vtn_assert(glsl_type_is_texture(image_type->glsl_image)); 5900 vtn_handle_texture(b, opcode, w, count); 5901 } 5902 break; 5903 } 5904 5905 case SpvOpFragmentMaskFetchAMD: 5906 case SpvOpFragmentFetchAMD: 5907 vtn_handle_texture(b, opcode, w, count); 5908 break; 5909 5910 case SpvOpAtomicLoad: 5911 case SpvOpAtomicExchange: 5912 case SpvOpAtomicCompareExchange: 5913 case SpvOpAtomicCompareExchangeWeak: 5914 case SpvOpAtomicIIncrement: 5915 case SpvOpAtomicIDecrement: 5916 case SpvOpAtomicIAdd: 5917 case SpvOpAtomicISub: 5918 case SpvOpAtomicSMin: 5919 case SpvOpAtomicUMin: 5920 case SpvOpAtomicSMax: 5921 case SpvOpAtomicUMax: 5922 case SpvOpAtomicAnd: 5923 case SpvOpAtomicOr: 5924 case SpvOpAtomicXor: 5925 case SpvOpAtomicFAddEXT: 5926 case SpvOpAtomicFMinEXT: 5927 case SpvOpAtomicFMaxEXT: 5928 case SpvOpAtomicFlagTestAndSet: { 5929 struct vtn_value *pointer = vtn_untyped_value(b, w[3]); 5930 if (pointer->value_type == vtn_value_type_image_pointer) { 5931 vtn_handle_image(b, opcode, w, count); 5932 } else { 5933 vtn_assert(pointer->value_type == vtn_value_type_pointer); 5934 vtn_handle_atomics(b, opcode, w, count); 5935 } 5936 break; 5937 } 5938 5939 case SpvOpAtomicStore: 5940 case SpvOpAtomicFlagClear: { 5941 struct vtn_value *pointer = vtn_untyped_value(b, w[1]); 5942 if (pointer->value_type == vtn_value_type_image_pointer) { 5943 vtn_handle_image(b, opcode, w, count); 5944 } else { 5945 vtn_assert(pointer->value_type == vtn_value_type_pointer); 5946 vtn_handle_atomics(b, opcode, w, count); 5947 } 5948 break; 5949 } 5950 5951 case SpvOpSelect: 5952 vtn_handle_select(b, opcode, w, count); 5953 break; 5954 5955 case SpvOpSNegate: 5956 case SpvOpFNegate: 5957 case SpvOpNot: 5958 case SpvOpAny: 5959 case SpvOpAll: 5960 case SpvOpConvertFToU: 5961 case SpvOpConvertFToS: 5962 case SpvOpConvertSToF: 5963 case SpvOpConvertUToF: 5964 case SpvOpUConvert: 5965 case SpvOpSConvert: 5966 case SpvOpFConvert: 5967 case SpvOpQuantizeToF16: 5968 case SpvOpSatConvertSToU: 5969 case SpvOpSatConvertUToS: 5970 case SpvOpPtrCastToGeneric: 5971 case SpvOpGenericCastToPtr: 5972 case SpvOpIsNan: 5973 case SpvOpIsInf: 5974 case SpvOpIsFinite: 5975 case SpvOpIsNormal: 5976 case SpvOpSignBitSet: 5977 case SpvOpLessOrGreater: 5978 case SpvOpOrdered: 5979 case SpvOpUnordered: 5980 case SpvOpIAdd: 5981 case SpvOpFAdd: 5982 case SpvOpISub: 5983 case SpvOpFSub: 5984 case SpvOpIMul: 5985 case SpvOpFMul: 5986 case SpvOpUDiv: 5987 case SpvOpSDiv: 5988 case SpvOpFDiv: 5989 case SpvOpUMod: 5990 case SpvOpSRem: 5991 case SpvOpSMod: 5992 case SpvOpFRem: 5993 case SpvOpFMod: 5994 case SpvOpVectorTimesScalar: 5995 case SpvOpDot: 5996 case SpvOpIAddCarry: 5997 case SpvOpISubBorrow: 5998 case SpvOpUMulExtended: 5999 case SpvOpSMulExtended: 6000 case SpvOpShiftRightLogical: 6001 case SpvOpShiftRightArithmetic: 6002 case SpvOpShiftLeftLogical: 6003 case SpvOpLogicalEqual: 6004 case SpvOpLogicalNotEqual: 6005 case SpvOpLogicalOr: 6006 case SpvOpLogicalAnd: 6007 case SpvOpLogicalNot: 6008 case SpvOpBitwiseOr: 6009 case SpvOpBitwiseXor: 6010 case SpvOpBitwiseAnd: 6011 case SpvOpIEqual: 6012 case SpvOpFOrdEqual: 6013 case SpvOpFUnordEqual: 6014 case SpvOpINotEqual: 6015 case SpvOpFOrdNotEqual: 6016 case SpvOpFUnordNotEqual: 6017 case SpvOpULessThan: 6018 case SpvOpSLessThan: 6019 case SpvOpFOrdLessThan: 6020 case SpvOpFUnordLessThan: 6021 case SpvOpUGreaterThan: 6022 case SpvOpSGreaterThan: 6023 case SpvOpFOrdGreaterThan: 6024 case SpvOpFUnordGreaterThan: 6025 case SpvOpULessThanEqual: 6026 case SpvOpSLessThanEqual: 6027 case SpvOpFOrdLessThanEqual: 6028 case SpvOpFUnordLessThanEqual: 6029 case SpvOpUGreaterThanEqual: 6030 case SpvOpSGreaterThanEqual: 6031 case SpvOpFOrdGreaterThanEqual: 6032 case SpvOpFUnordGreaterThanEqual: 6033 case SpvOpDPdx: 6034 case SpvOpDPdy: 6035 case SpvOpFwidth: 6036 case SpvOpDPdxFine: 6037 case SpvOpDPdyFine: 6038 case SpvOpFwidthFine: 6039 case SpvOpDPdxCoarse: 6040 case SpvOpDPdyCoarse: 6041 case SpvOpFwidthCoarse: 6042 case SpvOpBitFieldInsert: 6043 case SpvOpBitFieldSExtract: 6044 case SpvOpBitFieldUExtract: 6045 case SpvOpBitReverse: 6046 case SpvOpBitCount: 6047 case SpvOpTranspose: 6048 case SpvOpOuterProduct: 6049 case SpvOpMatrixTimesScalar: 6050 case SpvOpVectorTimesMatrix: 6051 case SpvOpMatrixTimesVector: 6052 case SpvOpMatrixTimesMatrix: 6053 case SpvOpUCountLeadingZerosINTEL: 6054 case SpvOpUCountTrailingZerosINTEL: 6055 case SpvOpAbsISubINTEL: 6056 case SpvOpAbsUSubINTEL: 6057 case SpvOpIAddSatINTEL: 6058 case SpvOpUAddSatINTEL: 6059 case SpvOpIAverageINTEL: 6060 case SpvOpUAverageINTEL: 6061 case SpvOpIAverageRoundedINTEL: 6062 case SpvOpUAverageRoundedINTEL: 6063 case SpvOpISubSatINTEL: 6064 case SpvOpUSubSatINTEL: 6065 case SpvOpIMul32x16INTEL: 6066 case SpvOpUMul32x16INTEL: 6067 vtn_handle_alu(b, opcode, w, count); 6068 break; 6069 6070 case SpvOpSDotKHR: 6071 case SpvOpUDotKHR: 6072 case SpvOpSUDotKHR: 6073 case SpvOpSDotAccSatKHR: 6074 case SpvOpUDotAccSatKHR: 6075 case SpvOpSUDotAccSatKHR: 6076 vtn_handle_integer_dot(b, opcode, w, count); 6077 break; 6078 6079 case SpvOpBitcast: 6080 vtn_handle_bitcast(b, w, count); 6081 break; 6082 6083 case SpvOpVectorExtractDynamic: 6084 case SpvOpVectorInsertDynamic: 6085 case SpvOpVectorShuffle: 6086 case SpvOpCompositeConstruct: 6087 case SpvOpCompositeExtract: 6088 case SpvOpCompositeInsert: 6089 case SpvOpCopyLogical: 6090 case SpvOpCopyObject: 6091 vtn_handle_composite(b, opcode, w, count); 6092 break; 6093 6094 case SpvOpEmitVertex: 6095 case SpvOpEndPrimitive: 6096 case SpvOpEmitStreamVertex: 6097 case SpvOpEndStreamPrimitive: 6098 case SpvOpControlBarrier: 6099 case SpvOpMemoryBarrier: 6100 vtn_handle_barrier(b, opcode, w, count); 6101 break; 6102 6103 case SpvOpGroupNonUniformElect: 6104 case SpvOpGroupNonUniformAll: 6105 case SpvOpGroupNonUniformAny: 6106 case SpvOpGroupNonUniformAllEqual: 6107 case SpvOpGroupNonUniformBroadcast: 6108 case SpvOpGroupNonUniformBroadcastFirst: 6109 case SpvOpGroupNonUniformBallot: 6110 case SpvOpGroupNonUniformInverseBallot: 6111 case SpvOpGroupNonUniformBallotBitExtract: 6112 case SpvOpGroupNonUniformBallotBitCount: 6113 case SpvOpGroupNonUniformBallotFindLSB: 6114 case SpvOpGroupNonUniformBallotFindMSB: 6115 case SpvOpGroupNonUniformShuffle: 6116 case SpvOpGroupNonUniformShuffleXor: 6117 case SpvOpGroupNonUniformShuffleUp: 6118 case SpvOpGroupNonUniformShuffleDown: 6119 case SpvOpGroupNonUniformIAdd: 6120 case SpvOpGroupNonUniformFAdd: 6121 case SpvOpGroupNonUniformIMul: 6122 case SpvOpGroupNonUniformFMul: 6123 case SpvOpGroupNonUniformSMin: 6124 case SpvOpGroupNonUniformUMin: 6125 case SpvOpGroupNonUniformFMin: 6126 case SpvOpGroupNonUniformSMax: 6127 case SpvOpGroupNonUniformUMax: 6128 case SpvOpGroupNonUniformFMax: 6129 case SpvOpGroupNonUniformBitwiseAnd: 6130 case SpvOpGroupNonUniformBitwiseOr: 6131 case SpvOpGroupNonUniformBitwiseXor: 6132 case SpvOpGroupNonUniformLogicalAnd: 6133 case SpvOpGroupNonUniformLogicalOr: 6134 case SpvOpGroupNonUniformLogicalXor: 6135 case SpvOpGroupNonUniformQuadBroadcast: 6136 case SpvOpGroupNonUniformQuadSwap: 6137 case SpvOpGroupAll: 6138 case SpvOpGroupAny: 6139 case SpvOpGroupBroadcast: 6140 case SpvOpGroupIAdd: 6141 case SpvOpGroupFAdd: 6142 case SpvOpGroupFMin: 6143 case SpvOpGroupUMin: 6144 case SpvOpGroupSMin: 6145 case SpvOpGroupFMax: 6146 case SpvOpGroupUMax: 6147 case SpvOpGroupSMax: 6148 case SpvOpSubgroupBallotKHR: 6149 case SpvOpSubgroupFirstInvocationKHR: 6150 case SpvOpSubgroupReadInvocationKHR: 6151 case SpvOpSubgroupAllKHR: 6152 case SpvOpSubgroupAnyKHR: 6153 case SpvOpSubgroupAllEqualKHR: 6154 case SpvOpGroupIAddNonUniformAMD: 6155 case SpvOpGroupFAddNonUniformAMD: 6156 case SpvOpGroupFMinNonUniformAMD: 6157 case SpvOpGroupUMinNonUniformAMD: 6158 case SpvOpGroupSMinNonUniformAMD: 6159 case SpvOpGroupFMaxNonUniformAMD: 6160 case SpvOpGroupUMaxNonUniformAMD: 6161 case SpvOpGroupSMaxNonUniformAMD: 6162 case SpvOpSubgroupShuffleINTEL: 6163 case SpvOpSubgroupShuffleDownINTEL: 6164 case SpvOpSubgroupShuffleUpINTEL: 6165 case SpvOpSubgroupShuffleXorINTEL: 6166 vtn_handle_subgroup(b, opcode, w, count); 6167 break; 6168 6169 case SpvOpPtrDiff: 6170 case SpvOpPtrEqual: 6171 case SpvOpPtrNotEqual: 6172 vtn_handle_ptr(b, opcode, w, count); 6173 break; 6174 6175 case SpvOpBeginInvocationInterlockEXT: 6176 nir_begin_invocation_interlock(&b->nb); 6177 break; 6178 6179 case SpvOpEndInvocationInterlockEXT: 6180 nir_end_invocation_interlock(&b->nb); 6181 break; 6182 6183 case SpvOpDemoteToHelperInvocation: { 6184 nir_demote(&b->nb); 6185 break; 6186 } 6187 6188 case SpvOpIsHelperInvocationEXT: { 6189 vtn_push_nir_ssa(b, w[2], nir_is_helper_invocation(&b->nb, 1)); 6190 break; 6191 } 6192 6193 case SpvOpReadClockKHR: { 6194 SpvScope scope = vtn_constant_uint(b, w[3]); 6195 nir_scope nir_scope; 6196 6197 switch (scope) { 6198 case SpvScopeDevice: 6199 nir_scope = NIR_SCOPE_DEVICE; 6200 break; 6201 case SpvScopeSubgroup: 6202 nir_scope = NIR_SCOPE_SUBGROUP; 6203 break; 6204 default: 6205 vtn_fail("invalid read clock scope"); 6206 } 6207 6208 /* Operation supports two result types: uvec2 and uint64_t. The NIR 6209 * intrinsic gives uvec2, so pack the result for the other case. 6210 */ 6211 nir_ssa_def *result = nir_shader_clock(&b->nb, nir_scope); 6212 6213 struct vtn_type *type = vtn_get_type(b, w[1]); 6214 const struct glsl_type *dest_type = type->type; 6215 6216 if (glsl_type_is_vector(dest_type)) { 6217 assert(dest_type == glsl_vector_type(GLSL_TYPE_UINT, 2)); 6218 } else { 6219 assert(glsl_type_is_scalar(dest_type)); 6220 assert(glsl_get_base_type(dest_type) == GLSL_TYPE_UINT64); 6221 result = nir_pack_64_2x32(&b->nb, result); 6222 } 6223 6224 vtn_push_nir_ssa(b, w[2], result); 6225 break; 6226 } 6227 6228 case SpvOpTraceNV: 6229 case SpvOpTraceRayKHR: 6230 case SpvOpReportIntersectionKHR: 6231 case SpvOpIgnoreIntersectionNV: 6232 case SpvOpTerminateRayNV: 6233 case SpvOpExecuteCallableNV: 6234 case SpvOpExecuteCallableKHR: 6235 vtn_handle_ray_intrinsic(b, opcode, w, count); 6236 break; 6237 6238 case SpvOpRayQueryInitializeKHR: 6239 case SpvOpRayQueryTerminateKHR: 6240 case SpvOpRayQueryGenerateIntersectionKHR: 6241 case SpvOpRayQueryConfirmIntersectionKHR: 6242 case SpvOpRayQueryProceedKHR: 6243 case SpvOpRayQueryGetIntersectionTypeKHR: 6244 case SpvOpRayQueryGetRayTMinKHR: 6245 case SpvOpRayQueryGetRayFlagsKHR: 6246 case SpvOpRayQueryGetIntersectionTKHR: 6247 case SpvOpRayQueryGetIntersectionInstanceCustomIndexKHR: 6248 case SpvOpRayQueryGetIntersectionInstanceIdKHR: 6249 case SpvOpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR: 6250 case SpvOpRayQueryGetIntersectionGeometryIndexKHR: 6251 case SpvOpRayQueryGetIntersectionPrimitiveIndexKHR: 6252 case SpvOpRayQueryGetIntersectionBarycentricsKHR: 6253 case SpvOpRayQueryGetIntersectionFrontFaceKHR: 6254 case SpvOpRayQueryGetIntersectionCandidateAABBOpaqueKHR: 6255 case SpvOpRayQueryGetIntersectionObjectRayDirectionKHR: 6256 case SpvOpRayQueryGetIntersectionObjectRayOriginKHR: 6257 case SpvOpRayQueryGetWorldRayDirectionKHR: 6258 case SpvOpRayQueryGetWorldRayOriginKHR: 6259 case SpvOpRayQueryGetIntersectionObjectToWorldKHR: 6260 case SpvOpRayQueryGetIntersectionWorldToObjectKHR: 6261 vtn_handle_ray_query_intrinsic(b, opcode, w, count); 6262 break; 6263 6264 case SpvOpLifetimeStart: 6265 case SpvOpLifetimeStop: 6266 break; 6267 6268 case SpvOpGroupAsyncCopy: 6269 case SpvOpGroupWaitEvents: 6270 vtn_handle_opencl_core_instruction(b, opcode, w, count); 6271 break; 6272 6273 case SpvOpWritePackedPrimitiveIndices4x8NV: 6274 vtn_handle_write_packed_primitive_indices(b, opcode, w, count); 6275 break; 6276 6277 default: 6278 vtn_fail_with_opcode("Unhandled opcode", opcode); 6279 } 6280 6281 return true; 6282} 6283 6284struct vtn_builder* 6285vtn_create_builder(const uint32_t *words, size_t word_count, 6286 gl_shader_stage stage, const char *entry_point_name, 6287 const struct spirv_to_nir_options *options) 6288{ 6289 /* Initialize the vtn_builder object */ 6290 struct vtn_builder *b = rzalloc(NULL, struct vtn_builder); 6291 struct spirv_to_nir_options *dup_options = 6292 ralloc(b, struct spirv_to_nir_options); 6293 *dup_options = *options; 6294 6295 b->spirv = words; 6296 b->spirv_word_count = word_count; 6297 b->file = NULL; 6298 b->line = -1; 6299 b->col = -1; 6300 list_inithead(&b->functions); 6301 b->entry_point_stage = stage; 6302 b->entry_point_name = entry_point_name; 6303 b->options = dup_options; 6304 6305 /* 6306 * Handle the SPIR-V header (first 5 dwords). 6307 * Can't use vtx_assert() as the setjmp(3) target isn't initialized yet. 6308 */ 6309 if (word_count <= 5) 6310 goto fail; 6311 6312 if (words[0] != SpvMagicNumber) { 6313 vtn_err("words[0] was 0x%x, want 0x%x", words[0], SpvMagicNumber); 6314 goto fail; 6315 } 6316 6317 b->version = words[1]; 6318 if (b->version < 0x10000) { 6319 vtn_err("version was 0x%x, want >= 0x10000", b->version); 6320 goto fail; 6321 } 6322 6323 b->generator_id = words[2] >> 16; 6324 uint16_t generator_version = words[2]; 6325 6326 /* In GLSLang commit 8297936dd6eb3, their handling of barrier() was fixed 6327 * to provide correct memory semantics on compute shader barrier() 6328 * commands. Prior to that, we need to fix them up ourselves. This 6329 * GLSLang fix caused them to bump to generator version 3. 6330 */ 6331 b->wa_glslang_cs_barrier = 6332 (b->generator_id == vtn_generator_glslang_reference_front_end && 6333 generator_version < 3); 6334 6335 /* Identifying the LLVM-SPIRV translator: 6336 * 6337 * The LLVM-SPIRV translator currently doesn't store any generator ID [1]. 6338 * Our use case involving the SPIRV-Tools linker also mean we want to check 6339 * for that tool instead. Finally the SPIRV-Tools linker also stores its 6340 * generator ID in the wrong location [2]. 6341 * 6342 * [1] : https://github.com/KhronosGroup/SPIRV-LLVM-Translator/pull/1223 6343 * [2] : https://github.com/KhronosGroup/SPIRV-Tools/pull/4549 6344 */ 6345 const bool is_llvm_spirv_translator = 6346 (b->generator_id == 0 && 6347 generator_version == vtn_generator_spirv_tools_linker) || 6348 b->generator_id == vtn_generator_spirv_tools_linker; 6349 6350 /* The LLVM-SPIRV translator generates Undef initializers for _local 6351 * variables [1]. 6352 * 6353 * [1] : https://github.com/KhronosGroup/SPIRV-LLVM-Translator/issues/1224 6354 */ 6355 b->wa_llvm_spirv_ignore_workgroup_initializer = 6356 b->options->environment == NIR_SPIRV_OPENCL && is_llvm_spirv_translator; 6357 6358 /* words[2] == generator magic */ 6359 unsigned value_id_bound = words[3]; 6360 if (words[4] != 0) { 6361 vtn_err("words[4] was %u, want 0", words[4]); 6362 goto fail; 6363 } 6364 6365 b->value_id_bound = value_id_bound; 6366 b->values = rzalloc_array(b, struct vtn_value, value_id_bound); 6367 6368 if (b->options->environment == NIR_SPIRV_VULKAN && b->version < 0x10400) 6369 b->vars_used_indirectly = _mesa_pointer_set_create(b); 6370 6371 return b; 6372 fail: 6373 ralloc_free(b); 6374 return NULL; 6375} 6376 6377static nir_function * 6378vtn_emit_kernel_entry_point_wrapper(struct vtn_builder *b, 6379 nir_function *entry_point) 6380{ 6381 vtn_assert(entry_point == b->entry_point->func->nir_func); 6382 vtn_fail_if(!entry_point->name, "entry points are required to have a name"); 6383 const char *func_name = 6384 ralloc_asprintf(b->shader, "__wrapped_%s", entry_point->name); 6385 6386 vtn_assert(b->shader->info.stage == MESA_SHADER_KERNEL); 6387 6388 nir_function *main_entry_point = nir_function_create(b->shader, func_name); 6389 main_entry_point->impl = nir_function_impl_create(main_entry_point); 6390 nir_builder_init(&b->nb, main_entry_point->impl); 6391 b->nb.cursor = nir_after_cf_list(&main_entry_point->impl->body); 6392 b->func_param_idx = 0; 6393 6394 nir_call_instr *call = nir_call_instr_create(b->nb.shader, entry_point); 6395 6396 for (unsigned i = 0; i < entry_point->num_params; ++i) { 6397 struct vtn_type *param_type = b->entry_point->func->type->params[i]; 6398 6399 /* consider all pointers to function memory to be parameters passed 6400 * by value 6401 */ 6402 bool is_by_val = param_type->base_type == vtn_base_type_pointer && 6403 param_type->storage_class == SpvStorageClassFunction; 6404 6405 /* input variable */ 6406 nir_variable *in_var = rzalloc(b->nb.shader, nir_variable); 6407 6408 if (is_by_val) { 6409 in_var->data.mode = nir_var_uniform; 6410 in_var->type = param_type->deref->type; 6411 } else if (param_type->base_type == vtn_base_type_image) { 6412 in_var->data.mode = nir_var_image; 6413 in_var->type = param_type->glsl_image; 6414 in_var->data.access = 6415 spirv_to_gl_access_qualifier(b, param_type->access_qualifier); 6416 } else if (param_type->base_type == vtn_base_type_sampler) { 6417 in_var->data.mode = nir_var_uniform; 6418 in_var->type = glsl_bare_sampler_type(); 6419 } else { 6420 in_var->data.mode = nir_var_uniform; 6421 in_var->type = param_type->type; 6422 } 6423 6424 in_var->data.read_only = true; 6425 in_var->data.location = i; 6426 6427 nir_shader_add_variable(b->nb.shader, in_var); 6428 6429 /* we have to copy the entire variable into function memory */ 6430 if (is_by_val) { 6431 nir_variable *copy_var = 6432 nir_local_variable_create(main_entry_point->impl, in_var->type, 6433 "copy_in"); 6434 nir_copy_var(&b->nb, copy_var, in_var); 6435 call->params[i] = 6436 nir_src_for_ssa(&nir_build_deref_var(&b->nb, copy_var)->dest.ssa); 6437 } else if (param_type->base_type == vtn_base_type_image || 6438 param_type->base_type == vtn_base_type_sampler) { 6439 /* Don't load the var, just pass a deref of it */ 6440 call->params[i] = nir_src_for_ssa(&nir_build_deref_var(&b->nb, in_var)->dest.ssa); 6441 } else { 6442 call->params[i] = nir_src_for_ssa(nir_load_var(&b->nb, in_var)); 6443 } 6444 } 6445 6446 nir_builder_instr_insert(&b->nb, &call->instr); 6447 6448 return main_entry_point; 6449} 6450 6451static bool 6452can_remove(nir_variable *var, void *data) 6453{ 6454 const struct set *vars_used_indirectly = data; 6455 return !_mesa_set_search(vars_used_indirectly, var); 6456} 6457 6458nir_shader * 6459spirv_to_nir(const uint32_t *words, size_t word_count, 6460 struct nir_spirv_specialization *spec, unsigned num_spec, 6461 gl_shader_stage stage, const char *entry_point_name, 6462 const struct spirv_to_nir_options *options, 6463 const nir_shader_compiler_options *nir_options) 6464 6465{ 6466 const uint32_t *word_end = words + word_count; 6467 6468 struct vtn_builder *b = vtn_create_builder(words, word_count, 6469 stage, entry_point_name, 6470 options); 6471 6472 if (b == NULL) 6473 return NULL; 6474 6475 /* See also _vtn_fail() */ 6476 if (vtn_setjmp(b->fail_jump)) { 6477 ralloc_free(b); 6478 return NULL; 6479 } 6480 6481 /* Skip the SPIR-V header, handled at vtn_create_builder */ 6482 words+= 5; 6483 6484 b->shader = nir_shader_create(b, stage, nir_options, NULL); 6485 b->shader->info.subgroup_size = options->subgroup_size; 6486 b->shader->info.float_controls_execution_mode = options->float_controls_execution_mode; 6487 6488 /* Handle all the preamble instructions */ 6489 words = vtn_foreach_instruction(b, words, word_end, 6490 vtn_handle_preamble_instruction); 6491 6492 /* DirectXShaderCompiler and glslang/shaderc both create OpKill from HLSL's 6493 * discard/clip, which uses demote semantics. DirectXShaderCompiler will use 6494 * demote if the extension is enabled, so we disable this workaround in that 6495 * case. 6496 * 6497 * Related glslang issue: https://github.com/KhronosGroup/glslang/issues/2416 6498 */ 6499 bool glslang = b->generator_id == vtn_generator_glslang_reference_front_end || 6500 b->generator_id == vtn_generator_shaderc_over_glslang; 6501 bool dxsc = b->generator_id == vtn_generator_spiregg; 6502 b->convert_discard_to_demote = ((dxsc && !b->uses_demote_to_helper_invocation) || 6503 (glslang && b->source_lang == SpvSourceLanguageHLSL)) && 6504 options->caps.demote_to_helper_invocation; 6505 6506 if (!options->create_library && b->entry_point == NULL) { 6507 vtn_fail("Entry point not found for %s shader \"%s\"", 6508 _mesa_shader_stage_to_string(stage), entry_point_name); 6509 ralloc_free(b); 6510 return NULL; 6511 } 6512 6513 /* Ensure a sane address mode is being used for function temps */ 6514 assert(nir_address_format_bit_size(b->options->temp_addr_format) == nir_get_ptr_bitsize(b->shader)); 6515 assert(nir_address_format_num_components(b->options->temp_addr_format) == 1); 6516 6517 /* Set shader info defaults */ 6518 if (stage == MESA_SHADER_GEOMETRY) 6519 b->shader->info.gs.invocations = 1; 6520 6521 /* Parse execution modes. */ 6522 if (!options->create_library) 6523 vtn_foreach_execution_mode(b, b->entry_point, 6524 vtn_handle_execution_mode, NULL); 6525 6526 b->specializations = spec; 6527 b->num_specializations = num_spec; 6528 6529 /* Handle all variable, type, and constant instructions */ 6530 words = vtn_foreach_instruction(b, words, word_end, 6531 vtn_handle_variable_or_type_instruction); 6532 6533 /* Parse execution modes that depend on IDs. Must happen after we have 6534 * constants parsed. 6535 */ 6536 if (!options->create_library) 6537 vtn_foreach_execution_mode(b, b->entry_point, 6538 vtn_handle_execution_mode_id, NULL); 6539 6540 if (b->workgroup_size_builtin) { 6541 vtn_assert(gl_shader_stage_uses_workgroup(stage)); 6542 vtn_assert(b->workgroup_size_builtin->type->type == 6543 glsl_vector_type(GLSL_TYPE_UINT, 3)); 6544 6545 nir_const_value *const_size = 6546 b->workgroup_size_builtin->constant->values; 6547 6548 b->shader->info.workgroup_size[0] = const_size[0].u32; 6549 b->shader->info.workgroup_size[1] = const_size[1].u32; 6550 b->shader->info.workgroup_size[2] = const_size[2].u32; 6551 } 6552 6553 /* Set types on all vtn_values */ 6554 vtn_foreach_instruction(b, words, word_end, vtn_set_instruction_result_type); 6555 6556 vtn_build_cfg(b, words, word_end); 6557 6558 if (!options->create_library) { 6559 assert(b->entry_point->value_type == vtn_value_type_function); 6560 b->entry_point->func->referenced = true; 6561 } 6562 6563 bool progress; 6564 do { 6565 progress = false; 6566 vtn_foreach_cf_node(node, &b->functions) { 6567 struct vtn_function *func = vtn_cf_node_as_function(node); 6568 if ((options->create_library || func->referenced) && !func->emitted) { 6569 b->const_table = _mesa_pointer_hash_table_create(b); 6570 6571 vtn_function_emit(b, func, vtn_handle_body_instruction); 6572 progress = true; 6573 } 6574 } 6575 } while (progress); 6576 6577 if (!options->create_library) { 6578 vtn_assert(b->entry_point->value_type == vtn_value_type_function); 6579 nir_function *entry_point = b->entry_point->func->nir_func; 6580 vtn_assert(entry_point); 6581 6582 /* post process entry_points with input params */ 6583 if (entry_point->num_params && b->shader->info.stage == MESA_SHADER_KERNEL) 6584 entry_point = vtn_emit_kernel_entry_point_wrapper(b, entry_point); 6585 6586 entry_point->is_entrypoint = true; 6587 } 6588 6589 /* structurize the CFG */ 6590 nir_lower_goto_ifs(b->shader); 6591 6592 /* A SPIR-V module can have multiple shaders stages and also multiple 6593 * shaders of the same stage. Global variables are declared per-module. 6594 * 6595 * Starting in SPIR-V 1.4 the list of global variables is part of 6596 * OpEntryPoint, so only valid ones will be created. Previous versions 6597 * only have Input and Output variables listed, so remove dead variables to 6598 * clean up the remaining ones. 6599 */ 6600 if (!options->create_library && b->version < 0x10400) { 6601 const nir_remove_dead_variables_options dead_opts = { 6602 .can_remove_var = can_remove, 6603 .can_remove_var_data = b->vars_used_indirectly, 6604 }; 6605 nir_remove_dead_variables(b->shader, ~(nir_var_function_temp | 6606 nir_var_shader_out | 6607 nir_var_shader_in | 6608 nir_var_system_value), 6609 b->vars_used_indirectly ? &dead_opts : NULL); 6610 } 6611 6612 nir_foreach_variable_in_shader(var, b->shader) { 6613 switch (var->data.mode) { 6614 case nir_var_mem_ubo: 6615 b->shader->info.num_ubos++; 6616 break; 6617 case nir_var_mem_ssbo: 6618 b->shader->info.num_ssbos++; 6619 break; 6620 case nir_var_mem_push_const: 6621 vtn_assert(b->shader->num_uniforms == 0); 6622 b->shader->num_uniforms = 6623 glsl_get_explicit_size(glsl_without_array(var->type), false); 6624 break; 6625 } 6626 } 6627 6628 /* We sometimes generate bogus derefs that, while never used, give the 6629 * validator a bit of heartburn. Run dead code to get rid of them. 6630 */ 6631 nir_opt_dce(b->shader); 6632 6633 /* Per SPV_KHR_workgroup_storage_explicit_layout, if one shared variable is 6634 * a Block, all of them will be and Blocks are explicitly laid out. 6635 */ 6636 nir_foreach_variable_with_modes(var, b->shader, nir_var_mem_shared) { 6637 if (glsl_type_is_interface(var->type)) { 6638 assert(b->options->caps.workgroup_memory_explicit_layout); 6639 b->shader->info.shared_memory_explicit_layout = true; 6640 break; 6641 } 6642 } 6643 if (b->shader->info.shared_memory_explicit_layout) { 6644 unsigned size = 0; 6645 nir_foreach_variable_with_modes(var, b->shader, nir_var_mem_shared) { 6646 assert(glsl_type_is_interface(var->type)); 6647 const bool align_to_stride = false; 6648 size = MAX2(size, glsl_get_explicit_size(var->type, align_to_stride)); 6649 } 6650 b->shader->info.shared_size = size; 6651 } 6652 6653 if (stage == MESA_SHADER_FRAGMENT) { 6654 /* From the Vulkan 1.2.199 spec: 6655 * 6656 * "If a fragment shader entry point’s interface includes an input 6657 * variable decorated with SamplePosition, Sample Shading is 6658 * considered enabled with a minSampleShading value of 1.0." 6659 * 6660 * Similar text exists for SampleId. Regarding the Sample decoration, 6661 * the Vulkan 1.2.199 spec says: 6662 * 6663 * "If a fragment shader input is decorated with Sample, a separate 6664 * value must be assigned to that variable for each covered sample in 6665 * the fragment, and that value must be sampled at the location of 6666 * the individual sample. When rasterizationSamples is 6667 * VK_SAMPLE_COUNT_1_BIT, the fragment center must be used for 6668 * Centroid, Sample, and undecorated attribute interpolation." 6669 * 6670 * Unfortunately, this isn't quite as clear about static use and the 6671 * interface but the static use check should be valid. 6672 * 6673 * For OpenGL, similar language exists but it's all more wishy-washy. 6674 * We'll assume the same behavior across APIs. 6675 */ 6676 nir_foreach_variable_with_modes(var, b->shader, 6677 nir_var_shader_in | 6678 nir_var_system_value) { 6679 struct nir_variable_data *members = 6680 var->members ? var->members : &var->data; 6681 uint16_t num_members = var->members ? var->num_members : 1; 6682 for (uint16_t i = 0; i < num_members; i++) { 6683 if (members[i].mode == nir_var_system_value && 6684 (members[i].location == SYSTEM_VALUE_SAMPLE_ID || 6685 members[i].location == SYSTEM_VALUE_SAMPLE_POS)) 6686 b->shader->info.fs.uses_sample_shading = true; 6687 6688 if (members[i].mode == nir_var_shader_in && members[i].sample) 6689 b->shader->info.fs.uses_sample_shading = true; 6690 } 6691 } 6692 } 6693 6694 /* Unparent the shader from the vtn_builder before we delete the builder */ 6695 ralloc_steal(NULL, b->shader); 6696 6697 nir_shader *shader = b->shader; 6698 ralloc_free(b); 6699 6700 return shader; 6701} 6702