1/* 2 * Copyright © 2017 Connor Abbott 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "nir_serialize.h" 25#include "nir_control_flow.h" 26#include "nir_xfb_info.h" 27#include "util/u_dynarray.h" 28#include "util/u_math.h" 29 30#define NIR_SERIALIZE_FUNC_HAS_IMPL ((void *)(intptr_t)1) 31#define MAX_OBJECT_IDS (1 << 20) 32 33typedef struct { 34 size_t blob_offset; 35 nir_ssa_def *src; 36 nir_block *block; 37} write_phi_fixup; 38 39typedef struct { 40 const nir_shader *nir; 41 42 struct blob *blob; 43 44 /* maps pointer to index */ 45 struct hash_table *remap_table; 46 47 /* the next index to assign to a NIR in-memory object */ 48 uint32_t next_idx; 49 50 /* Array of write_phi_fixup structs representing phi sources that need to 51 * be resolved in the second pass. 52 */ 53 struct util_dynarray phi_fixups; 54 55 /* The last serialized type. */ 56 const struct glsl_type *last_type; 57 const struct glsl_type *last_interface_type; 58 struct nir_variable_data last_var_data; 59 60 /* For skipping equal ALU headers (typical after scalarization). */ 61 nir_instr_type last_instr_type; 62 uintptr_t last_alu_header_offset; 63 uint32_t last_alu_header; 64 65 /* Don't write optional data such as variable names. */ 66 bool strip; 67} write_ctx; 68 69typedef struct { 70 nir_shader *nir; 71 72 struct blob_reader *blob; 73 74 /* the next index to assign to a NIR in-memory object */ 75 uint32_t next_idx; 76 77 /* The length of the index -> object table */ 78 uint32_t idx_table_len; 79 80 /* map from index to deserialized pointer */ 81 void **idx_table; 82 83 /* List of phi sources. */ 84 struct list_head phi_srcs; 85 86 /* The last deserialized type. */ 87 const struct glsl_type *last_type; 88 const struct glsl_type *last_interface_type; 89 struct nir_variable_data last_var_data; 90} read_ctx; 91 92static void 93write_add_object(write_ctx *ctx, const void *obj) 94{ 95 uint32_t index = ctx->next_idx++; 96 assert(index != MAX_OBJECT_IDS); 97 _mesa_hash_table_insert(ctx->remap_table, obj, (void *)(uintptr_t) index); 98} 99 100static uint32_t 101write_lookup_object(write_ctx *ctx, const void *obj) 102{ 103 struct hash_entry *entry = _mesa_hash_table_search(ctx->remap_table, obj); 104 assert(entry); 105 return (uint32_t)(uintptr_t) entry->data; 106} 107 108static void 109read_add_object(read_ctx *ctx, void *obj) 110{ 111 assert(ctx->next_idx < ctx->idx_table_len); 112 ctx->idx_table[ctx->next_idx++] = obj; 113} 114 115static void * 116read_lookup_object(read_ctx *ctx, uint32_t idx) 117{ 118 assert(idx < ctx->idx_table_len); 119 return ctx->idx_table[idx]; 120} 121 122static void * 123read_object(read_ctx *ctx) 124{ 125 return read_lookup_object(ctx, blob_read_uint32(ctx->blob)); 126} 127 128static uint32_t 129encode_bit_size_3bits(uint8_t bit_size) 130{ 131 /* Encode values of 0, 1, 2, 4, 8, 16, 32, 64 in 3 bits. */ 132 assert(bit_size <= 64 && util_is_power_of_two_or_zero(bit_size)); 133 if (bit_size) 134 return util_logbase2(bit_size) + 1; 135 return 0; 136} 137 138static uint8_t 139decode_bit_size_3bits(uint8_t bit_size) 140{ 141 if (bit_size) 142 return 1 << (bit_size - 1); 143 return 0; 144} 145 146#define NUM_COMPONENTS_IS_SEPARATE_7 7 147 148static uint8_t 149encode_num_components_in_3bits(uint8_t num_components) 150{ 151 if (num_components <= 4) 152 return num_components; 153 if (num_components == 8) 154 return 5; 155 if (num_components == 16) 156 return 6; 157 158 /* special value indicating that num_components is in the next uint32 */ 159 return NUM_COMPONENTS_IS_SEPARATE_7; 160} 161 162static uint8_t 163decode_num_components_in_3bits(uint8_t value) 164{ 165 if (value <= 4) 166 return value; 167 if (value == 5) 168 return 8; 169 if (value == 6) 170 return 16; 171 172 unreachable("invalid num_components encoding"); 173 return 0; 174} 175 176static void 177write_constant(write_ctx *ctx, const nir_constant *c) 178{ 179 blob_write_bytes(ctx->blob, c->values, sizeof(c->values)); 180 blob_write_uint32(ctx->blob, c->num_elements); 181 for (unsigned i = 0; i < c->num_elements; i++) 182 write_constant(ctx, c->elements[i]); 183} 184 185static nir_constant * 186read_constant(read_ctx *ctx, nir_variable *nvar) 187{ 188 nir_constant *c = ralloc(nvar, nir_constant); 189 190 blob_copy_bytes(ctx->blob, (uint8_t *)c->values, sizeof(c->values)); 191 c->num_elements = blob_read_uint32(ctx->blob); 192 c->elements = ralloc_array(nvar, nir_constant *, c->num_elements); 193 for (unsigned i = 0; i < c->num_elements; i++) 194 c->elements[i] = read_constant(ctx, nvar); 195 196 return c; 197} 198 199enum var_data_encoding { 200 var_encode_full, 201 var_encode_shader_temp, 202 var_encode_function_temp, 203 var_encode_location_diff, 204}; 205 206union packed_var { 207 uint32_t u32; 208 struct { 209 unsigned has_name:1; 210 unsigned has_constant_initializer:1; 211 unsigned has_pointer_initializer:1; 212 unsigned has_interface_type:1; 213 unsigned num_state_slots:7; 214 unsigned data_encoding:2; 215 unsigned type_same_as_last:1; 216 unsigned interface_type_same_as_last:1; 217 unsigned ray_query:1; 218 unsigned num_members:16; 219 } u; 220}; 221 222union packed_var_data_diff { 223 uint32_t u32; 224 struct { 225 int location:13; 226 int location_frac:3; 227 int driver_location:16; 228 } u; 229}; 230 231static void 232write_variable(write_ctx *ctx, const nir_variable *var) 233{ 234 write_add_object(ctx, var); 235 236 assert(var->num_state_slots < (1 << 7)); 237 238 STATIC_ASSERT(sizeof(union packed_var) == 4); 239 union packed_var flags; 240 flags.u32 = 0; 241 242 flags.u.has_name = !ctx->strip && var->name; 243 flags.u.has_constant_initializer = !!(var->constant_initializer); 244 flags.u.has_pointer_initializer = !!(var->pointer_initializer); 245 flags.u.has_interface_type = !!(var->interface_type); 246 flags.u.type_same_as_last = var->type == ctx->last_type; 247 flags.u.interface_type_same_as_last = 248 var->interface_type && var->interface_type == ctx->last_interface_type; 249 flags.u.num_state_slots = var->num_state_slots; 250 flags.u.num_members = var->num_members; 251 252 struct nir_variable_data data = var->data; 253 254 /* When stripping, we expect that the location is no longer needed, 255 * which is typically after shaders are linked. 256 */ 257 if (ctx->strip && 258 data.mode != nir_var_system_value && 259 data.mode != nir_var_shader_in && 260 data.mode != nir_var_shader_out) 261 data.location = 0; 262 263 /* Temporary variables don't serialize var->data. */ 264 if (data.mode == nir_var_shader_temp) 265 flags.u.data_encoding = var_encode_shader_temp; 266 else if (data.mode == nir_var_function_temp) 267 flags.u.data_encoding = var_encode_function_temp; 268 else { 269 struct nir_variable_data tmp = data; 270 271 tmp.location = ctx->last_var_data.location; 272 tmp.location_frac = ctx->last_var_data.location_frac; 273 tmp.driver_location = ctx->last_var_data.driver_location; 274 275 /* See if we can encode only the difference in locations from the last 276 * variable. 277 */ 278 if (memcmp(&ctx->last_var_data, &tmp, sizeof(tmp)) == 0 && 279 abs((int)data.location - 280 (int)ctx->last_var_data.location) < (1 << 12) && 281 abs((int)data.driver_location - 282 (int)ctx->last_var_data.driver_location) < (1 << 15)) 283 flags.u.data_encoding = var_encode_location_diff; 284 else 285 flags.u.data_encoding = var_encode_full; 286 } 287 288 flags.u.ray_query = var->data.ray_query; 289 290 blob_write_uint32(ctx->blob, flags.u32); 291 292 if (!flags.u.type_same_as_last) { 293 encode_type_to_blob(ctx->blob, var->type); 294 ctx->last_type = var->type; 295 } 296 297 if (var->interface_type && !flags.u.interface_type_same_as_last) { 298 encode_type_to_blob(ctx->blob, var->interface_type); 299 ctx->last_interface_type = var->interface_type; 300 } 301 302 if (flags.u.has_name) 303 blob_write_string(ctx->blob, var->name); 304 305 if (flags.u.data_encoding == var_encode_full || 306 flags.u.data_encoding == var_encode_location_diff) { 307 if (flags.u.data_encoding == var_encode_full) { 308 blob_write_bytes(ctx->blob, &data, sizeof(data)); 309 } else { 310 /* Serialize only the difference in locations from the last variable. 311 */ 312 union packed_var_data_diff diff; 313 314 diff.u.location = data.location - ctx->last_var_data.location; 315 diff.u.location_frac = data.location_frac - 316 ctx->last_var_data.location_frac; 317 diff.u.driver_location = data.driver_location - 318 ctx->last_var_data.driver_location; 319 320 blob_write_uint32(ctx->blob, diff.u32); 321 } 322 323 ctx->last_var_data = data; 324 } 325 326 for (unsigned i = 0; i < var->num_state_slots; i++) { 327 blob_write_bytes(ctx->blob, &var->state_slots[i], 328 sizeof(var->state_slots[i])); 329 } 330 if (var->constant_initializer) 331 write_constant(ctx, var->constant_initializer); 332 if (var->pointer_initializer) 333 write_lookup_object(ctx, var->pointer_initializer); 334 if (var->num_members > 0) { 335 blob_write_bytes(ctx->blob, (uint8_t *) var->members, 336 var->num_members * sizeof(*var->members)); 337 } 338} 339 340static nir_variable * 341read_variable(read_ctx *ctx) 342{ 343 nir_variable *var = rzalloc(ctx->nir, nir_variable); 344 read_add_object(ctx, var); 345 346 union packed_var flags; 347 flags.u32 = blob_read_uint32(ctx->blob); 348 349 if (flags.u.type_same_as_last) { 350 var->type = ctx->last_type; 351 } else { 352 var->type = decode_type_from_blob(ctx->blob); 353 ctx->last_type = var->type; 354 } 355 356 if (flags.u.has_interface_type) { 357 if (flags.u.interface_type_same_as_last) { 358 var->interface_type = ctx->last_interface_type; 359 } else { 360 var->interface_type = decode_type_from_blob(ctx->blob); 361 ctx->last_interface_type = var->interface_type; 362 } 363 } 364 365 if (flags.u.has_name) { 366 const char *name = blob_read_string(ctx->blob); 367 var->name = ralloc_strdup(var, name); 368 } else { 369 var->name = NULL; 370 } 371 372 if (flags.u.data_encoding == var_encode_shader_temp) 373 var->data.mode = nir_var_shader_temp; 374 else if (flags.u.data_encoding == var_encode_function_temp) 375 var->data.mode = nir_var_function_temp; 376 else if (flags.u.data_encoding == var_encode_full) { 377 blob_copy_bytes(ctx->blob, (uint8_t *) &var->data, sizeof(var->data)); 378 ctx->last_var_data = var->data; 379 } else { /* var_encode_location_diff */ 380 union packed_var_data_diff diff; 381 diff.u32 = blob_read_uint32(ctx->blob); 382 383 var->data = ctx->last_var_data; 384 var->data.location += diff.u.location; 385 var->data.location_frac += diff.u.location_frac; 386 var->data.driver_location += diff.u.driver_location; 387 388 ctx->last_var_data = var->data; 389 } 390 391 var->data.ray_query = flags.u.ray_query; 392 393 var->num_state_slots = flags.u.num_state_slots; 394 if (var->num_state_slots != 0) { 395 var->state_slots = ralloc_array(var, nir_state_slot, 396 var->num_state_slots); 397 for (unsigned i = 0; i < var->num_state_slots; i++) { 398 blob_copy_bytes(ctx->blob, &var->state_slots[i], 399 sizeof(var->state_slots[i])); 400 } 401 } 402 if (flags.u.has_constant_initializer) 403 var->constant_initializer = read_constant(ctx, var); 404 else 405 var->constant_initializer = NULL; 406 407 if (flags.u.has_pointer_initializer) 408 var->pointer_initializer = read_object(ctx); 409 else 410 var->pointer_initializer = NULL; 411 412 var->num_members = flags.u.num_members; 413 if (var->num_members > 0) { 414 var->members = ralloc_array(var, struct nir_variable_data, 415 var->num_members); 416 blob_copy_bytes(ctx->blob, (uint8_t *) var->members, 417 var->num_members * sizeof(*var->members)); 418 } 419 420 return var; 421} 422 423static void 424write_var_list(write_ctx *ctx, const struct exec_list *src) 425{ 426 blob_write_uint32(ctx->blob, exec_list_length(src)); 427 foreach_list_typed(nir_variable, var, node, src) { 428 write_variable(ctx, var); 429 } 430} 431 432static void 433read_var_list(read_ctx *ctx, struct exec_list *dst) 434{ 435 exec_list_make_empty(dst); 436 unsigned num_vars = blob_read_uint32(ctx->blob); 437 for (unsigned i = 0; i < num_vars; i++) { 438 nir_variable *var = read_variable(ctx); 439 exec_list_push_tail(dst, &var->node); 440 } 441} 442 443static void 444write_register(write_ctx *ctx, const nir_register *reg) 445{ 446 write_add_object(ctx, reg); 447 blob_write_uint32(ctx->blob, reg->num_components); 448 blob_write_uint32(ctx->blob, reg->bit_size); 449 blob_write_uint32(ctx->blob, reg->num_array_elems); 450 blob_write_uint32(ctx->blob, reg->index); 451 blob_write_uint8(ctx->blob, reg->divergent); 452} 453 454static nir_register * 455read_register(read_ctx *ctx) 456{ 457 nir_register *reg = ralloc(ctx->nir, nir_register); 458 read_add_object(ctx, reg); 459 reg->num_components = blob_read_uint32(ctx->blob); 460 reg->bit_size = blob_read_uint32(ctx->blob); 461 reg->num_array_elems = blob_read_uint32(ctx->blob); 462 reg->index = blob_read_uint32(ctx->blob); 463 reg->divergent = blob_read_uint8(ctx->blob); 464 465 list_inithead(®->uses); 466 list_inithead(®->defs); 467 list_inithead(®->if_uses); 468 469 return reg; 470} 471 472static void 473write_reg_list(write_ctx *ctx, const struct exec_list *src) 474{ 475 blob_write_uint32(ctx->blob, exec_list_length(src)); 476 foreach_list_typed(nir_register, reg, node, src) 477 write_register(ctx, reg); 478} 479 480static void 481read_reg_list(read_ctx *ctx, struct exec_list *dst) 482{ 483 exec_list_make_empty(dst); 484 unsigned num_regs = blob_read_uint32(ctx->blob); 485 for (unsigned i = 0; i < num_regs; i++) { 486 nir_register *reg = read_register(ctx); 487 exec_list_push_tail(dst, ®->node); 488 } 489} 490 491union packed_src { 492 uint32_t u32; 493 struct { 494 unsigned is_ssa:1; /* <-- Header */ 495 unsigned is_indirect:1; 496 unsigned object_idx:20; 497 unsigned _footer:10; /* <-- Footer */ 498 } any; 499 struct { 500 unsigned _header:22; /* <-- Header */ 501 unsigned negate:1; /* <-- Footer */ 502 unsigned abs:1; 503 unsigned swizzle_x:2; 504 unsigned swizzle_y:2; 505 unsigned swizzle_z:2; 506 unsigned swizzle_w:2; 507 } alu; 508 struct { 509 unsigned _header:22; /* <-- Header */ 510 unsigned src_type:5; /* <-- Footer */ 511 unsigned _pad:5; 512 } tex; 513}; 514 515static void 516write_src_full(write_ctx *ctx, const nir_src *src, union packed_src header) 517{ 518 /* Since sources are very frequent, we try to save some space when storing 519 * them. In particular, we store whether the source is a register and 520 * whether the register has an indirect index in the low two bits. We can 521 * assume that the high two bits of the index are zero, since otherwise our 522 * address space would've been exhausted allocating the remap table! 523 */ 524 header.any.is_ssa = src->is_ssa; 525 if (src->is_ssa) { 526 header.any.object_idx = write_lookup_object(ctx, src->ssa); 527 blob_write_uint32(ctx->blob, header.u32); 528 } else { 529 header.any.object_idx = write_lookup_object(ctx, src->reg.reg); 530 header.any.is_indirect = !!src->reg.indirect; 531 blob_write_uint32(ctx->blob, header.u32); 532 blob_write_uint32(ctx->blob, src->reg.base_offset); 533 if (src->reg.indirect) { 534 union packed_src header = {0}; 535 write_src_full(ctx, src->reg.indirect, header); 536 } 537 } 538} 539 540static void 541write_src(write_ctx *ctx, const nir_src *src) 542{ 543 union packed_src header = {0}; 544 write_src_full(ctx, src, header); 545} 546 547static union packed_src 548read_src(read_ctx *ctx, nir_src *src, void *mem_ctx) 549{ 550 STATIC_ASSERT(sizeof(union packed_src) == 4); 551 union packed_src header; 552 header.u32 = blob_read_uint32(ctx->blob); 553 554 src->is_ssa = header.any.is_ssa; 555 if (src->is_ssa) { 556 src->ssa = read_lookup_object(ctx, header.any.object_idx); 557 } else { 558 src->reg.reg = read_lookup_object(ctx, header.any.object_idx); 559 src->reg.base_offset = blob_read_uint32(ctx->blob); 560 if (header.any.is_indirect) { 561 src->reg.indirect = malloc(sizeof(nir_src)); 562 read_src(ctx, src->reg.indirect, mem_ctx); 563 } else { 564 src->reg.indirect = NULL; 565 } 566 } 567 return header; 568} 569 570union packed_dest { 571 uint8_t u8; 572 struct { 573 uint8_t is_ssa:1; 574 uint8_t num_components:3; 575 uint8_t bit_size:3; 576 uint8_t divergent:1; 577 } ssa; 578 struct { 579 uint8_t is_ssa:1; 580 uint8_t is_indirect:1; 581 uint8_t _pad:6; 582 } reg; 583}; 584 585enum intrinsic_const_indices_encoding { 586 /* Use packed_const_indices to store tightly packed indices. 587 * 588 * The common case for load_ubo is 0, 0, 0, which is trivially represented. 589 * The common cases for load_interpolated_input also fit here, e.g.: 7, 3 590 */ 591 const_indices_all_combined, 592 593 const_indices_8bit, /* 8 bits per element */ 594 const_indices_16bit, /* 16 bits per element */ 595 const_indices_32bit, /* 32 bits per element */ 596}; 597 598enum load_const_packing { 599 /* Constants are not packed and are stored in following dwords. */ 600 load_const_full, 601 602 /* packed_value contains high 19 bits, low bits are 0, 603 * good for floating-point decimals 604 */ 605 load_const_scalar_hi_19bits, 606 607 /* packed_value contains low 19 bits, high bits are sign-extended */ 608 load_const_scalar_lo_19bits_sext, 609}; 610 611union packed_instr { 612 uint32_t u32; 613 struct { 614 unsigned instr_type:4; /* always present */ 615 unsigned _pad:20; 616 unsigned dest:8; /* always last */ 617 } any; 618 struct { 619 unsigned instr_type:4; 620 unsigned exact:1; 621 unsigned no_signed_wrap:1; 622 unsigned no_unsigned_wrap:1; 623 unsigned saturate:1; 624 /* Reg: writemask; SSA: swizzles for 2 srcs */ 625 unsigned writemask_or_two_swizzles:4; 626 unsigned op:9; 627 unsigned packed_src_ssa_16bit:1; 628 /* Scalarized ALUs always have the same header. */ 629 unsigned num_followup_alu_sharing_header:2; 630 unsigned dest:8; 631 } alu; 632 struct { 633 unsigned instr_type:4; 634 unsigned deref_type:3; 635 unsigned cast_type_same_as_last:1; 636 unsigned modes:5; /* See (de|en)code_deref_modes() */ 637 unsigned _pad:9; 638 unsigned in_bounds:1; 639 unsigned packed_src_ssa_16bit:1; /* deref_var redefines this */ 640 unsigned dest:8; 641 } deref; 642 struct { 643 unsigned instr_type:4; 644 unsigned deref_type:3; 645 unsigned _pad:1; 646 unsigned object_idx:16; /* if 0, the object ID is a separate uint32 */ 647 unsigned dest:8; 648 } deref_var; 649 struct { 650 unsigned instr_type:4; 651 unsigned intrinsic:10; 652 unsigned const_indices_encoding:2; 653 unsigned packed_const_indices:8; 654 unsigned dest:8; 655 } intrinsic; 656 struct { 657 unsigned instr_type:4; 658 unsigned last_component:4; 659 unsigned bit_size:3; 660 unsigned packing:2; /* enum load_const_packing */ 661 unsigned packed_value:19; /* meaning determined by packing */ 662 } load_const; 663 struct { 664 unsigned instr_type:4; 665 unsigned last_component:4; 666 unsigned bit_size:3; 667 unsigned _pad:21; 668 } undef; 669 struct { 670 unsigned instr_type:4; 671 unsigned num_srcs:4; 672 unsigned op:5; 673 unsigned _pad:11; 674 unsigned dest:8; 675 } tex; 676 struct { 677 unsigned instr_type:4; 678 unsigned num_srcs:20; 679 unsigned dest:8; 680 } phi; 681 struct { 682 unsigned instr_type:4; 683 unsigned type:2; 684 unsigned _pad:26; 685 } jump; 686}; 687 688/* Write "lo24" as low 24 bits in the first uint32. */ 689static void 690write_dest(write_ctx *ctx, const nir_dest *dst, union packed_instr header, 691 nir_instr_type instr_type) 692{ 693 STATIC_ASSERT(sizeof(union packed_dest) == 1); 694 union packed_dest dest; 695 dest.u8 = 0; 696 697 dest.ssa.is_ssa = dst->is_ssa; 698 if (dst->is_ssa) { 699 dest.ssa.num_components = 700 encode_num_components_in_3bits(dst->ssa.num_components); 701 dest.ssa.bit_size = encode_bit_size_3bits(dst->ssa.bit_size); 702 dest.ssa.divergent = dst->ssa.divergent; 703 } else { 704 dest.reg.is_indirect = !!(dst->reg.indirect); 705 } 706 header.any.dest = dest.u8; 707 708 /* Check if the current ALU instruction has the same header as the previous 709 * instruction that is also ALU. If it is, we don't have to write 710 * the current header. This is a typical occurence after scalarization. 711 */ 712 if (instr_type == nir_instr_type_alu) { 713 bool equal_header = false; 714 715 if (ctx->last_instr_type == nir_instr_type_alu) { 716 assert(ctx->last_alu_header_offset); 717 union packed_instr last_header; 718 last_header.u32 = ctx->last_alu_header; 719 720 /* Clear the field that counts ALUs with equal headers. */ 721 union packed_instr clean_header; 722 clean_header.u32 = last_header.u32; 723 clean_header.alu.num_followup_alu_sharing_header = 0; 724 725 /* There can be at most 4 consecutive ALU instructions 726 * sharing the same header. 727 */ 728 if (last_header.alu.num_followup_alu_sharing_header < 3 && 729 header.u32 == clean_header.u32) { 730 last_header.alu.num_followup_alu_sharing_header++; 731 blob_overwrite_uint32(ctx->blob, ctx->last_alu_header_offset, 732 last_header.u32); 733 ctx->last_alu_header = last_header.u32; 734 equal_header = true; 735 } 736 } 737 738 if (!equal_header) { 739 ctx->last_alu_header_offset = blob_reserve_uint32(ctx->blob); 740 blob_overwrite_uint32(ctx->blob, ctx->last_alu_header_offset, header.u32); 741 ctx->last_alu_header = header.u32; 742 } 743 } else { 744 blob_write_uint32(ctx->blob, header.u32); 745 } 746 747 if (dest.ssa.is_ssa && 748 dest.ssa.num_components == NUM_COMPONENTS_IS_SEPARATE_7) 749 blob_write_uint32(ctx->blob, dst->ssa.num_components); 750 751 if (dst->is_ssa) { 752 write_add_object(ctx, &dst->ssa); 753 } else { 754 blob_write_uint32(ctx->blob, write_lookup_object(ctx, dst->reg.reg)); 755 blob_write_uint32(ctx->blob, dst->reg.base_offset); 756 if (dst->reg.indirect) 757 write_src(ctx, dst->reg.indirect); 758 } 759} 760 761static void 762read_dest(read_ctx *ctx, nir_dest *dst, nir_instr *instr, 763 union packed_instr header) 764{ 765 union packed_dest dest; 766 dest.u8 = header.any.dest; 767 768 if (dest.ssa.is_ssa) { 769 unsigned bit_size = decode_bit_size_3bits(dest.ssa.bit_size); 770 unsigned num_components; 771 if (dest.ssa.num_components == NUM_COMPONENTS_IS_SEPARATE_7) 772 num_components = blob_read_uint32(ctx->blob); 773 else 774 num_components = decode_num_components_in_3bits(dest.ssa.num_components); 775 nir_ssa_dest_init(instr, dst, num_components, bit_size, NULL); 776 dst->ssa.divergent = dest.ssa.divergent; 777 read_add_object(ctx, &dst->ssa); 778 } else { 779 dst->reg.reg = read_object(ctx); 780 dst->reg.base_offset = blob_read_uint32(ctx->blob); 781 if (dest.reg.is_indirect) { 782 dst->reg.indirect = malloc(sizeof(nir_src)); 783 read_src(ctx, dst->reg.indirect, instr); 784 } 785 } 786} 787 788static bool 789are_object_ids_16bit(write_ctx *ctx) 790{ 791 /* Check the highest object ID, because they are monotonic. */ 792 return ctx->next_idx < (1 << 16); 793} 794 795static bool 796is_alu_src_ssa_16bit(write_ctx *ctx, const nir_alu_instr *alu) 797{ 798 unsigned num_srcs = nir_op_infos[alu->op].num_inputs; 799 800 for (unsigned i = 0; i < num_srcs; i++) { 801 if (!alu->src[i].src.is_ssa || alu->src[i].abs || alu->src[i].negate) 802 return false; 803 804 unsigned src_components = nir_ssa_alu_instr_src_components(alu, i); 805 806 for (unsigned chan = 0; chan < src_components; chan++) { 807 /* The swizzles for src0.x and src1.x are stored 808 * in writemask_or_two_swizzles for SSA ALUs. 809 */ 810 if (alu->dest.dest.is_ssa && i < 2 && chan == 0 && 811 alu->src[i].swizzle[chan] < 4) 812 continue; 813 814 if (alu->src[i].swizzle[chan] != chan) 815 return false; 816 } 817 } 818 819 return are_object_ids_16bit(ctx); 820} 821 822static void 823write_alu(write_ctx *ctx, const nir_alu_instr *alu) 824{ 825 unsigned num_srcs = nir_op_infos[alu->op].num_inputs; 826 unsigned dst_components = nir_dest_num_components(alu->dest.dest); 827 828 /* 9 bits for nir_op */ 829 STATIC_ASSERT(nir_num_opcodes <= 512); 830 union packed_instr header; 831 header.u32 = 0; 832 833 header.alu.instr_type = alu->instr.type; 834 header.alu.exact = alu->exact; 835 header.alu.no_signed_wrap = alu->no_signed_wrap; 836 header.alu.no_unsigned_wrap = alu->no_unsigned_wrap; 837 header.alu.saturate = alu->dest.saturate; 838 header.alu.op = alu->op; 839 header.alu.packed_src_ssa_16bit = is_alu_src_ssa_16bit(ctx, alu); 840 841 if (header.alu.packed_src_ssa_16bit && 842 alu->dest.dest.is_ssa) { 843 /* For packed srcs of SSA ALUs, this field stores the swizzles. */ 844 header.alu.writemask_or_two_swizzles = alu->src[0].swizzle[0]; 845 if (num_srcs > 1) 846 header.alu.writemask_or_two_swizzles |= alu->src[1].swizzle[0] << 2; 847 } else if (!alu->dest.dest.is_ssa && dst_components <= 4) { 848 /* For vec4 registers, this field is a writemask. */ 849 header.alu.writemask_or_two_swizzles = alu->dest.write_mask; 850 } 851 852 write_dest(ctx, &alu->dest.dest, header, alu->instr.type); 853 854 if (!alu->dest.dest.is_ssa && dst_components > 4) 855 blob_write_uint32(ctx->blob, alu->dest.write_mask); 856 857 if (header.alu.packed_src_ssa_16bit) { 858 for (unsigned i = 0; i < num_srcs; i++) { 859 assert(alu->src[i].src.is_ssa); 860 unsigned idx = write_lookup_object(ctx, alu->src[i].src.ssa); 861 assert(idx < (1 << 16)); 862 blob_write_uint16(ctx->blob, idx); 863 } 864 } else { 865 for (unsigned i = 0; i < num_srcs; i++) { 866 unsigned src_channels = nir_ssa_alu_instr_src_components(alu, i); 867 unsigned src_components = nir_src_num_components(alu->src[i].src); 868 union packed_src src; 869 bool packed = src_components <= 4 && src_channels <= 4; 870 src.u32 = 0; 871 872 src.alu.negate = alu->src[i].negate; 873 src.alu.abs = alu->src[i].abs; 874 875 if (packed) { 876 src.alu.swizzle_x = alu->src[i].swizzle[0]; 877 src.alu.swizzle_y = alu->src[i].swizzle[1]; 878 src.alu.swizzle_z = alu->src[i].swizzle[2]; 879 src.alu.swizzle_w = alu->src[i].swizzle[3]; 880 } 881 882 write_src_full(ctx, &alu->src[i].src, src); 883 884 /* Store swizzles for vec8 and vec16. */ 885 if (!packed) { 886 for (unsigned o = 0; o < src_channels; o += 8) { 887 unsigned value = 0; 888 889 for (unsigned j = 0; j < 8 && o + j < src_channels; j++) { 890 value |= (uint32_t)alu->src[i].swizzle[o + j] << 891 (4 * j); /* 4 bits per swizzle */ 892 } 893 894 blob_write_uint32(ctx->blob, value); 895 } 896 } 897 } 898 } 899} 900 901static nir_alu_instr * 902read_alu(read_ctx *ctx, union packed_instr header) 903{ 904 unsigned num_srcs = nir_op_infos[header.alu.op].num_inputs; 905 nir_alu_instr *alu = nir_alu_instr_create(ctx->nir, header.alu.op); 906 907 alu->exact = header.alu.exact; 908 alu->no_signed_wrap = header.alu.no_signed_wrap; 909 alu->no_unsigned_wrap = header.alu.no_unsigned_wrap; 910 alu->dest.saturate = header.alu.saturate; 911 912 read_dest(ctx, &alu->dest.dest, &alu->instr, header); 913 914 unsigned dst_components = nir_dest_num_components(alu->dest.dest); 915 916 if (alu->dest.dest.is_ssa) { 917 alu->dest.write_mask = u_bit_consecutive(0, dst_components); 918 } else if (dst_components <= 4) { 919 alu->dest.write_mask = header.alu.writemask_or_two_swizzles; 920 } else { 921 alu->dest.write_mask = blob_read_uint32(ctx->blob); 922 } 923 924 if (header.alu.packed_src_ssa_16bit) { 925 for (unsigned i = 0; i < num_srcs; i++) { 926 nir_alu_src *src = &alu->src[i]; 927 src->src.is_ssa = true; 928 src->src.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob)); 929 930 memset(&src->swizzle, 0, sizeof(src->swizzle)); 931 932 unsigned src_components = nir_ssa_alu_instr_src_components(alu, i); 933 934 for (unsigned chan = 0; chan < src_components; chan++) 935 src->swizzle[chan] = chan; 936 } 937 } else { 938 for (unsigned i = 0; i < num_srcs; i++) { 939 union packed_src src = read_src(ctx, &alu->src[i].src, &alu->instr); 940 unsigned src_channels = nir_ssa_alu_instr_src_components(alu, i); 941 unsigned src_components = nir_src_num_components(alu->src[i].src); 942 bool packed = src_components <= 4 && src_channels <= 4; 943 944 alu->src[i].negate = src.alu.negate; 945 alu->src[i].abs = src.alu.abs; 946 947 memset(&alu->src[i].swizzle, 0, sizeof(alu->src[i].swizzle)); 948 949 if (packed) { 950 alu->src[i].swizzle[0] = src.alu.swizzle_x; 951 alu->src[i].swizzle[1] = src.alu.swizzle_y; 952 alu->src[i].swizzle[2] = src.alu.swizzle_z; 953 alu->src[i].swizzle[3] = src.alu.swizzle_w; 954 } else { 955 /* Load swizzles for vec8 and vec16. */ 956 for (unsigned o = 0; o < src_channels; o += 8) { 957 unsigned value = blob_read_uint32(ctx->blob); 958 959 for (unsigned j = 0; j < 8 && o + j < src_channels; j++) { 960 alu->src[i].swizzle[o + j] = 961 (value >> (4 * j)) & 0xf; /* 4 bits per swizzle */ 962 } 963 } 964 } 965 } 966 } 967 968 if (header.alu.packed_src_ssa_16bit && 969 alu->dest.dest.is_ssa) { 970 alu->src[0].swizzle[0] = header.alu.writemask_or_two_swizzles & 0x3; 971 if (num_srcs > 1) 972 alu->src[1].swizzle[0] = header.alu.writemask_or_two_swizzles >> 2; 973 } 974 975 return alu; 976} 977 978#define MODE_ENC_GENERIC_BIT (1 << 4) 979 980static nir_variable_mode 981decode_deref_modes(unsigned modes) 982{ 983 if (modes & MODE_ENC_GENERIC_BIT) { 984 modes &= ~MODE_ENC_GENERIC_BIT; 985 return modes << (ffs(nir_var_mem_generic) - 1); 986 } else { 987 return 1 << modes; 988 } 989} 990 991static unsigned 992encode_deref_modes(nir_variable_mode modes) 993{ 994 /* Mode sets on derefs generally come in two forms. For certain OpenCL 995 * cases, we can have more than one of the generic modes set. In this 996 * case, we need the full bitfield. Fortunately, there are only 4 of 997 * these. For all other modes, we can only have one mode at a time so we 998 * can compress them by only storing the bit position. This, plus one bit 999 * to select encoding, lets us pack the entire bitfield in 5 bits. 1000 */ 1001 STATIC_ASSERT((nir_var_all & ~nir_var_mem_generic) < 1002 (1 << MODE_ENC_GENERIC_BIT)); 1003 1004 unsigned enc; 1005 if (modes == 0 || (modes & nir_var_mem_generic)) { 1006 assert(!(modes & ~nir_var_mem_generic)); 1007 enc = modes >> (ffs(nir_var_mem_generic) - 1); 1008 assert(enc < MODE_ENC_GENERIC_BIT); 1009 enc |= MODE_ENC_GENERIC_BIT; 1010 } else { 1011 assert(util_is_power_of_two_nonzero(modes)); 1012 enc = ffs(modes) - 1; 1013 assert(enc < MODE_ENC_GENERIC_BIT); 1014 } 1015 assert(modes == decode_deref_modes(enc)); 1016 return enc; 1017} 1018 1019static void 1020write_deref(write_ctx *ctx, const nir_deref_instr *deref) 1021{ 1022 assert(deref->deref_type < 8); 1023 1024 union packed_instr header; 1025 header.u32 = 0; 1026 1027 header.deref.instr_type = deref->instr.type; 1028 header.deref.deref_type = deref->deref_type; 1029 1030 if (deref->deref_type == nir_deref_type_cast) { 1031 header.deref.modes = encode_deref_modes(deref->modes); 1032 header.deref.cast_type_same_as_last = deref->type == ctx->last_type; 1033 } 1034 1035 unsigned var_idx = 0; 1036 if (deref->deref_type == nir_deref_type_var) { 1037 var_idx = write_lookup_object(ctx, deref->var); 1038 if (var_idx && var_idx < (1 << 16)) 1039 header.deref_var.object_idx = var_idx; 1040 } 1041 1042 if (deref->deref_type == nir_deref_type_array || 1043 deref->deref_type == nir_deref_type_ptr_as_array) { 1044 header.deref.packed_src_ssa_16bit = 1045 deref->parent.is_ssa && deref->arr.index.is_ssa && 1046 are_object_ids_16bit(ctx); 1047 1048 header.deref.in_bounds = deref->arr.in_bounds; 1049 } 1050 1051 write_dest(ctx, &deref->dest, header, deref->instr.type); 1052 1053 switch (deref->deref_type) { 1054 case nir_deref_type_var: 1055 if (!header.deref_var.object_idx) 1056 blob_write_uint32(ctx->blob, var_idx); 1057 break; 1058 1059 case nir_deref_type_struct: 1060 write_src(ctx, &deref->parent); 1061 blob_write_uint32(ctx->blob, deref->strct.index); 1062 break; 1063 1064 case nir_deref_type_array: 1065 case nir_deref_type_ptr_as_array: 1066 if (header.deref.packed_src_ssa_16bit) { 1067 blob_write_uint16(ctx->blob, 1068 write_lookup_object(ctx, deref->parent.ssa)); 1069 blob_write_uint16(ctx->blob, 1070 write_lookup_object(ctx, deref->arr.index.ssa)); 1071 } else { 1072 write_src(ctx, &deref->parent); 1073 write_src(ctx, &deref->arr.index); 1074 } 1075 break; 1076 1077 case nir_deref_type_cast: 1078 write_src(ctx, &deref->parent); 1079 blob_write_uint32(ctx->blob, deref->cast.ptr_stride); 1080 blob_write_uint32(ctx->blob, deref->cast.align_mul); 1081 blob_write_uint32(ctx->blob, deref->cast.align_offset); 1082 if (!header.deref.cast_type_same_as_last) { 1083 encode_type_to_blob(ctx->blob, deref->type); 1084 ctx->last_type = deref->type; 1085 } 1086 break; 1087 1088 case nir_deref_type_array_wildcard: 1089 write_src(ctx, &deref->parent); 1090 break; 1091 1092 default: 1093 unreachable("Invalid deref type"); 1094 } 1095} 1096 1097static nir_deref_instr * 1098read_deref(read_ctx *ctx, union packed_instr header) 1099{ 1100 nir_deref_type deref_type = header.deref.deref_type; 1101 nir_deref_instr *deref = nir_deref_instr_create(ctx->nir, deref_type); 1102 1103 read_dest(ctx, &deref->dest, &deref->instr, header); 1104 1105 nir_deref_instr *parent; 1106 1107 switch (deref->deref_type) { 1108 case nir_deref_type_var: 1109 if (header.deref_var.object_idx) 1110 deref->var = read_lookup_object(ctx, header.deref_var.object_idx); 1111 else 1112 deref->var = read_object(ctx); 1113 1114 deref->type = deref->var->type; 1115 break; 1116 1117 case nir_deref_type_struct: 1118 read_src(ctx, &deref->parent, &deref->instr); 1119 parent = nir_src_as_deref(deref->parent); 1120 deref->strct.index = blob_read_uint32(ctx->blob); 1121 deref->type = glsl_get_struct_field(parent->type, deref->strct.index); 1122 break; 1123 1124 case nir_deref_type_array: 1125 case nir_deref_type_ptr_as_array: 1126 if (header.deref.packed_src_ssa_16bit) { 1127 deref->parent.is_ssa = true; 1128 deref->parent.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob)); 1129 deref->arr.index.is_ssa = true; 1130 deref->arr.index.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob)); 1131 } else { 1132 read_src(ctx, &deref->parent, &deref->instr); 1133 read_src(ctx, &deref->arr.index, &deref->instr); 1134 } 1135 1136 deref->arr.in_bounds = header.deref.in_bounds; 1137 1138 parent = nir_src_as_deref(deref->parent); 1139 if (deref->deref_type == nir_deref_type_array) 1140 deref->type = glsl_get_array_element(parent->type); 1141 else 1142 deref->type = parent->type; 1143 break; 1144 1145 case nir_deref_type_cast: 1146 read_src(ctx, &deref->parent, &deref->instr); 1147 deref->cast.ptr_stride = blob_read_uint32(ctx->blob); 1148 deref->cast.align_mul = blob_read_uint32(ctx->blob); 1149 deref->cast.align_offset = blob_read_uint32(ctx->blob); 1150 if (header.deref.cast_type_same_as_last) { 1151 deref->type = ctx->last_type; 1152 } else { 1153 deref->type = decode_type_from_blob(ctx->blob); 1154 ctx->last_type = deref->type; 1155 } 1156 break; 1157 1158 case nir_deref_type_array_wildcard: 1159 read_src(ctx, &deref->parent, &deref->instr); 1160 parent = nir_src_as_deref(deref->parent); 1161 deref->type = glsl_get_array_element(parent->type); 1162 break; 1163 1164 default: 1165 unreachable("Invalid deref type"); 1166 } 1167 1168 if (deref_type == nir_deref_type_var) { 1169 deref->modes = deref->var->data.mode; 1170 } else if (deref->deref_type == nir_deref_type_cast) { 1171 deref->modes = decode_deref_modes(header.deref.modes); 1172 } else { 1173 assert(deref->parent.is_ssa); 1174 deref->modes = nir_instr_as_deref(deref->parent.ssa->parent_instr)->modes; 1175 } 1176 1177 return deref; 1178} 1179 1180static void 1181write_intrinsic(write_ctx *ctx, const nir_intrinsic_instr *intrin) 1182{ 1183 /* 10 bits for nir_intrinsic_op */ 1184 STATIC_ASSERT(nir_num_intrinsics <= 1024); 1185 unsigned num_srcs = nir_intrinsic_infos[intrin->intrinsic].num_srcs; 1186 unsigned num_indices = nir_intrinsic_infos[intrin->intrinsic].num_indices; 1187 assert(intrin->intrinsic < 1024); 1188 1189 union packed_instr header; 1190 header.u32 = 0; 1191 1192 header.intrinsic.instr_type = intrin->instr.type; 1193 header.intrinsic.intrinsic = intrin->intrinsic; 1194 1195 /* Analyze constant indices to decide how to encode them. */ 1196 if (num_indices) { 1197 unsigned max_bits = 0; 1198 for (unsigned i = 0; i < num_indices; i++) { 1199 unsigned max = util_last_bit(intrin->const_index[i]); 1200 max_bits = MAX2(max_bits, max); 1201 } 1202 1203 if (max_bits * num_indices <= 8) { 1204 header.intrinsic.const_indices_encoding = const_indices_all_combined; 1205 1206 /* Pack all const indices into 8 bits. */ 1207 unsigned bit_size = 8 / num_indices; 1208 for (unsigned i = 0; i < num_indices; i++) { 1209 header.intrinsic.packed_const_indices |= 1210 intrin->const_index[i] << (i * bit_size); 1211 } 1212 } else if (max_bits <= 8) 1213 header.intrinsic.const_indices_encoding = const_indices_8bit; 1214 else if (max_bits <= 16) 1215 header.intrinsic.const_indices_encoding = const_indices_16bit; 1216 else 1217 header.intrinsic.const_indices_encoding = const_indices_32bit; 1218 } 1219 1220 if (nir_intrinsic_infos[intrin->intrinsic].has_dest) 1221 write_dest(ctx, &intrin->dest, header, intrin->instr.type); 1222 else 1223 blob_write_uint32(ctx->blob, header.u32); 1224 1225 for (unsigned i = 0; i < num_srcs; i++) 1226 write_src(ctx, &intrin->src[i]); 1227 1228 if (num_indices) { 1229 switch (header.intrinsic.const_indices_encoding) { 1230 case const_indices_8bit: 1231 for (unsigned i = 0; i < num_indices; i++) 1232 blob_write_uint8(ctx->blob, intrin->const_index[i]); 1233 break; 1234 case const_indices_16bit: 1235 for (unsigned i = 0; i < num_indices; i++) 1236 blob_write_uint16(ctx->blob, intrin->const_index[i]); 1237 break; 1238 case const_indices_32bit: 1239 for (unsigned i = 0; i < num_indices; i++) 1240 blob_write_uint32(ctx->blob, intrin->const_index[i]); 1241 break; 1242 } 1243 } 1244} 1245 1246static nir_intrinsic_instr * 1247read_intrinsic(read_ctx *ctx, union packed_instr header) 1248{ 1249 nir_intrinsic_op op = header.intrinsic.intrinsic; 1250 nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(ctx->nir, op); 1251 1252 unsigned num_srcs = nir_intrinsic_infos[op].num_srcs; 1253 unsigned num_indices = nir_intrinsic_infos[op].num_indices; 1254 1255 if (nir_intrinsic_infos[op].has_dest) 1256 read_dest(ctx, &intrin->dest, &intrin->instr, header); 1257 1258 for (unsigned i = 0; i < num_srcs; i++) 1259 read_src(ctx, &intrin->src[i], &intrin->instr); 1260 1261 /* Vectorized instrinsics have num_components same as dst or src that has 1262 * 0 components in the info. Find it. 1263 */ 1264 if (nir_intrinsic_infos[op].has_dest && 1265 nir_intrinsic_infos[op].dest_components == 0) { 1266 intrin->num_components = nir_dest_num_components(intrin->dest); 1267 } else { 1268 for (unsigned i = 0; i < num_srcs; i++) { 1269 if (nir_intrinsic_infos[op].src_components[i] == 0) { 1270 intrin->num_components = nir_src_num_components(intrin->src[i]); 1271 break; 1272 } 1273 } 1274 } 1275 1276 if (num_indices) { 1277 switch (header.intrinsic.const_indices_encoding) { 1278 case const_indices_all_combined: { 1279 unsigned bit_size = 8 / num_indices; 1280 unsigned bit_mask = u_bit_consecutive(0, bit_size); 1281 for (unsigned i = 0; i < num_indices; i++) { 1282 intrin->const_index[i] = 1283 (header.intrinsic.packed_const_indices >> (i * bit_size)) & 1284 bit_mask; 1285 } 1286 break; 1287 } 1288 case const_indices_8bit: 1289 for (unsigned i = 0; i < num_indices; i++) 1290 intrin->const_index[i] = blob_read_uint8(ctx->blob); 1291 break; 1292 case const_indices_16bit: 1293 for (unsigned i = 0; i < num_indices; i++) 1294 intrin->const_index[i] = blob_read_uint16(ctx->blob); 1295 break; 1296 case const_indices_32bit: 1297 for (unsigned i = 0; i < num_indices; i++) 1298 intrin->const_index[i] = blob_read_uint32(ctx->blob); 1299 break; 1300 } 1301 } 1302 1303 return intrin; 1304} 1305 1306static void 1307write_load_const(write_ctx *ctx, const nir_load_const_instr *lc) 1308{ 1309 assert(lc->def.num_components >= 1 && lc->def.num_components <= 16); 1310 union packed_instr header; 1311 header.u32 = 0; 1312 1313 header.load_const.instr_type = lc->instr.type; 1314 header.load_const.last_component = lc->def.num_components - 1; 1315 header.load_const.bit_size = encode_bit_size_3bits(lc->def.bit_size); 1316 header.load_const.packing = load_const_full; 1317 1318 /* Try to pack 1-component constants into the 19 free bits in the header. */ 1319 if (lc->def.num_components == 1) { 1320 switch (lc->def.bit_size) { 1321 case 64: 1322 if ((lc->value[0].u64 & 0x1fffffffffffull) == 0) { 1323 /* packed_value contains high 19 bits, low bits are 0 */ 1324 header.load_const.packing = load_const_scalar_hi_19bits; 1325 header.load_const.packed_value = lc->value[0].u64 >> 45; 1326 } else if (util_mask_sign_extend(lc->value[0].i64, 19) == lc->value[0].i64) { 1327 /* packed_value contains low 19 bits, high bits are sign-extended */ 1328 header.load_const.packing = load_const_scalar_lo_19bits_sext; 1329 header.load_const.packed_value = lc->value[0].u64; 1330 } 1331 break; 1332 1333 case 32: 1334 if ((lc->value[0].u32 & 0x1fff) == 0) { 1335 header.load_const.packing = load_const_scalar_hi_19bits; 1336 header.load_const.packed_value = lc->value[0].u32 >> 13; 1337 } else if (util_mask_sign_extend(lc->value[0].i32, 19) == lc->value[0].i32) { 1338 header.load_const.packing = load_const_scalar_lo_19bits_sext; 1339 header.load_const.packed_value = lc->value[0].u32; 1340 } 1341 break; 1342 1343 case 16: 1344 header.load_const.packing = load_const_scalar_lo_19bits_sext; 1345 header.load_const.packed_value = lc->value[0].u16; 1346 break; 1347 case 8: 1348 header.load_const.packing = load_const_scalar_lo_19bits_sext; 1349 header.load_const.packed_value = lc->value[0].u8; 1350 break; 1351 case 1: 1352 header.load_const.packing = load_const_scalar_lo_19bits_sext; 1353 header.load_const.packed_value = lc->value[0].b; 1354 break; 1355 default: 1356 unreachable("invalid bit_size"); 1357 } 1358 } 1359 1360 blob_write_uint32(ctx->blob, header.u32); 1361 1362 if (header.load_const.packing == load_const_full) { 1363 switch (lc->def.bit_size) { 1364 case 64: 1365 blob_write_bytes(ctx->blob, lc->value, 1366 sizeof(*lc->value) * lc->def.num_components); 1367 break; 1368 1369 case 32: 1370 for (unsigned i = 0; i < lc->def.num_components; i++) 1371 blob_write_uint32(ctx->blob, lc->value[i].u32); 1372 break; 1373 1374 case 16: 1375 for (unsigned i = 0; i < lc->def.num_components; i++) 1376 blob_write_uint16(ctx->blob, lc->value[i].u16); 1377 break; 1378 1379 default: 1380 assert(lc->def.bit_size <= 8); 1381 for (unsigned i = 0; i < lc->def.num_components; i++) 1382 blob_write_uint8(ctx->blob, lc->value[i].u8); 1383 break; 1384 } 1385 } 1386 1387 write_add_object(ctx, &lc->def); 1388} 1389 1390static nir_load_const_instr * 1391read_load_const(read_ctx *ctx, union packed_instr header) 1392{ 1393 nir_load_const_instr *lc = 1394 nir_load_const_instr_create(ctx->nir, header.load_const.last_component + 1, 1395 decode_bit_size_3bits(header.load_const.bit_size)); 1396 lc->def.divergent = false; 1397 1398 switch (header.load_const.packing) { 1399 case load_const_scalar_hi_19bits: 1400 switch (lc->def.bit_size) { 1401 case 64: 1402 lc->value[0].u64 = (uint64_t)header.load_const.packed_value << 45; 1403 break; 1404 case 32: 1405 lc->value[0].u32 = (uint64_t)header.load_const.packed_value << 13; 1406 break; 1407 default: 1408 unreachable("invalid bit_size"); 1409 } 1410 break; 1411 1412 case load_const_scalar_lo_19bits_sext: 1413 switch (lc->def.bit_size) { 1414 case 64: 1415 lc->value[0].i64 = ((int64_t)header.load_const.packed_value << 45) >> 45; 1416 break; 1417 case 32: 1418 lc->value[0].i32 = ((int32_t)header.load_const.packed_value << 13) >> 13; 1419 break; 1420 case 16: 1421 lc->value[0].u16 = header.load_const.packed_value; 1422 break; 1423 case 8: 1424 lc->value[0].u8 = header.load_const.packed_value; 1425 break; 1426 case 1: 1427 lc->value[0].b = header.load_const.packed_value; 1428 break; 1429 default: 1430 unreachable("invalid bit_size"); 1431 } 1432 break; 1433 1434 case load_const_full: 1435 switch (lc->def.bit_size) { 1436 case 64: 1437 blob_copy_bytes(ctx->blob, lc->value, sizeof(*lc->value) * lc->def.num_components); 1438 break; 1439 1440 case 32: 1441 for (unsigned i = 0; i < lc->def.num_components; i++) 1442 lc->value[i].u32 = blob_read_uint32(ctx->blob); 1443 break; 1444 1445 case 16: 1446 for (unsigned i = 0; i < lc->def.num_components; i++) 1447 lc->value[i].u16 = blob_read_uint16(ctx->blob); 1448 break; 1449 1450 default: 1451 assert(lc->def.bit_size <= 8); 1452 for (unsigned i = 0; i < lc->def.num_components; i++) 1453 lc->value[i].u8 = blob_read_uint8(ctx->blob); 1454 break; 1455 } 1456 break; 1457 } 1458 1459 read_add_object(ctx, &lc->def); 1460 return lc; 1461} 1462 1463static void 1464write_ssa_undef(write_ctx *ctx, const nir_ssa_undef_instr *undef) 1465{ 1466 assert(undef->def.num_components >= 1 && undef->def.num_components <= 16); 1467 1468 union packed_instr header; 1469 header.u32 = 0; 1470 1471 header.undef.instr_type = undef->instr.type; 1472 header.undef.last_component = undef->def.num_components - 1; 1473 header.undef.bit_size = encode_bit_size_3bits(undef->def.bit_size); 1474 1475 blob_write_uint32(ctx->blob, header.u32); 1476 write_add_object(ctx, &undef->def); 1477} 1478 1479static nir_ssa_undef_instr * 1480read_ssa_undef(read_ctx *ctx, union packed_instr header) 1481{ 1482 nir_ssa_undef_instr *undef = 1483 nir_ssa_undef_instr_create(ctx->nir, header.undef.last_component + 1, 1484 decode_bit_size_3bits(header.undef.bit_size)); 1485 1486 undef->def.divergent = false; 1487 1488 read_add_object(ctx, &undef->def); 1489 return undef; 1490} 1491 1492union packed_tex_data { 1493 uint32_t u32; 1494 struct { 1495 unsigned sampler_dim:4; 1496 unsigned dest_type:8; 1497 unsigned coord_components:3; 1498 unsigned is_array:1; 1499 unsigned is_shadow:1; 1500 unsigned is_new_style_shadow:1; 1501 unsigned is_sparse:1; 1502 unsigned component:2; 1503 unsigned texture_non_uniform:1; 1504 unsigned sampler_non_uniform:1; 1505 unsigned array_is_lowered_cube:1; 1506 unsigned unused:6; /* Mark unused for valgrind. */ 1507 } u; 1508}; 1509 1510static void 1511write_tex(write_ctx *ctx, const nir_tex_instr *tex) 1512{ 1513 assert(tex->num_srcs < 16); 1514 assert(tex->op < 32); 1515 1516 union packed_instr header; 1517 header.u32 = 0; 1518 1519 header.tex.instr_type = tex->instr.type; 1520 header.tex.num_srcs = tex->num_srcs; 1521 header.tex.op = tex->op; 1522 1523 write_dest(ctx, &tex->dest, header, tex->instr.type); 1524 1525 blob_write_uint32(ctx->blob, tex->texture_index); 1526 blob_write_uint32(ctx->blob, tex->sampler_index); 1527 if (tex->op == nir_texop_tg4) 1528 blob_write_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets)); 1529 1530 STATIC_ASSERT(sizeof(union packed_tex_data) == sizeof(uint32_t)); 1531 union packed_tex_data packed = { 1532 .u.sampler_dim = tex->sampler_dim, 1533 .u.dest_type = tex->dest_type, 1534 .u.coord_components = tex->coord_components, 1535 .u.is_array = tex->is_array, 1536 .u.is_shadow = tex->is_shadow, 1537 .u.is_new_style_shadow = tex->is_new_style_shadow, 1538 .u.is_sparse = tex->is_sparse, 1539 .u.component = tex->component, 1540 .u.texture_non_uniform = tex->texture_non_uniform, 1541 .u.sampler_non_uniform = tex->sampler_non_uniform, 1542 .u.array_is_lowered_cube = tex->array_is_lowered_cube, 1543 }; 1544 blob_write_uint32(ctx->blob, packed.u32); 1545 1546 for (unsigned i = 0; i < tex->num_srcs; i++) { 1547 union packed_src src; 1548 src.u32 = 0; 1549 src.tex.src_type = tex->src[i].src_type; 1550 write_src_full(ctx, &tex->src[i].src, src); 1551 } 1552} 1553 1554static nir_tex_instr * 1555read_tex(read_ctx *ctx, union packed_instr header) 1556{ 1557 nir_tex_instr *tex = nir_tex_instr_create(ctx->nir, header.tex.num_srcs); 1558 1559 read_dest(ctx, &tex->dest, &tex->instr, header); 1560 1561 tex->op = header.tex.op; 1562 tex->texture_index = blob_read_uint32(ctx->blob); 1563 tex->sampler_index = blob_read_uint32(ctx->blob); 1564 if (tex->op == nir_texop_tg4) 1565 blob_copy_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets)); 1566 1567 union packed_tex_data packed; 1568 packed.u32 = blob_read_uint32(ctx->blob); 1569 tex->sampler_dim = packed.u.sampler_dim; 1570 tex->dest_type = packed.u.dest_type; 1571 tex->coord_components = packed.u.coord_components; 1572 tex->is_array = packed.u.is_array; 1573 tex->is_shadow = packed.u.is_shadow; 1574 tex->is_new_style_shadow = packed.u.is_new_style_shadow; 1575 tex->is_sparse = packed.u.is_sparse; 1576 tex->component = packed.u.component; 1577 tex->texture_non_uniform = packed.u.texture_non_uniform; 1578 tex->sampler_non_uniform = packed.u.sampler_non_uniform; 1579 tex->array_is_lowered_cube = packed.u.array_is_lowered_cube; 1580 1581 for (unsigned i = 0; i < tex->num_srcs; i++) { 1582 union packed_src src = read_src(ctx, &tex->src[i].src, &tex->instr); 1583 tex->src[i].src_type = src.tex.src_type; 1584 } 1585 1586 return tex; 1587} 1588 1589static void 1590write_phi(write_ctx *ctx, const nir_phi_instr *phi) 1591{ 1592 union packed_instr header; 1593 header.u32 = 0; 1594 1595 header.phi.instr_type = phi->instr.type; 1596 header.phi.num_srcs = exec_list_length(&phi->srcs); 1597 1598 /* Phi nodes are special, since they may reference SSA definitions and 1599 * basic blocks that don't exist yet. We leave two empty uint32_t's here, 1600 * and then store enough information so that a later fixup pass can fill 1601 * them in correctly. 1602 */ 1603 write_dest(ctx, &phi->dest, header, phi->instr.type); 1604 1605 nir_foreach_phi_src(src, phi) { 1606 assert(src->src.is_ssa); 1607 size_t blob_offset = blob_reserve_uint32(ctx->blob); 1608 ASSERTED size_t blob_offset2 = blob_reserve_uint32(ctx->blob); 1609 assert(blob_offset + sizeof(uint32_t) == blob_offset2); 1610 write_phi_fixup fixup = { 1611 .blob_offset = blob_offset, 1612 .src = src->src.ssa, 1613 .block = src->pred, 1614 }; 1615 util_dynarray_append(&ctx->phi_fixups, write_phi_fixup, fixup); 1616 } 1617} 1618 1619static void 1620write_fixup_phis(write_ctx *ctx) 1621{ 1622 util_dynarray_foreach(&ctx->phi_fixups, write_phi_fixup, fixup) { 1623 blob_overwrite_uint32(ctx->blob, fixup->blob_offset, 1624 write_lookup_object(ctx, fixup->src)); 1625 blob_overwrite_uint32(ctx->blob, fixup->blob_offset + sizeof(uint32_t), 1626 write_lookup_object(ctx, fixup->block)); 1627 } 1628 1629 util_dynarray_clear(&ctx->phi_fixups); 1630} 1631 1632static nir_phi_instr * 1633read_phi(read_ctx *ctx, nir_block *blk, union packed_instr header) 1634{ 1635 nir_phi_instr *phi = nir_phi_instr_create(ctx->nir); 1636 1637 read_dest(ctx, &phi->dest, &phi->instr, header); 1638 1639 /* For similar reasons as before, we just store the index directly into the 1640 * pointer, and let a later pass resolve the phi sources. 1641 * 1642 * In order to ensure that the copied sources (which are just the indices 1643 * from the blob for now) don't get inserted into the old shader's use-def 1644 * lists, we have to add the phi instruction *before* we set up its 1645 * sources. 1646 */ 1647 nir_instr_insert_after_block(blk, &phi->instr); 1648 1649 for (unsigned i = 0; i < header.phi.num_srcs; i++) { 1650 nir_ssa_def *def = (nir_ssa_def *)(uintptr_t) blob_read_uint32(ctx->blob); 1651 nir_block *pred = (nir_block *)(uintptr_t) blob_read_uint32(ctx->blob); 1652 nir_phi_src *src = nir_phi_instr_add_src(phi, pred, nir_src_for_ssa(def)); 1653 1654 /* Since we're not letting nir_insert_instr handle use/def stuff for us, 1655 * we have to set the parent_instr manually. It doesn't really matter 1656 * when we do it, so we might as well do it here. 1657 */ 1658 src->src.parent_instr = &phi->instr; 1659 1660 /* Stash it in the list of phi sources. We'll walk this list and fix up 1661 * sources at the very end of read_function_impl. 1662 */ 1663 list_add(&src->src.use_link, &ctx->phi_srcs); 1664 } 1665 1666 return phi; 1667} 1668 1669static void 1670read_fixup_phis(read_ctx *ctx) 1671{ 1672 list_for_each_entry_safe(nir_phi_src, src, &ctx->phi_srcs, src.use_link) { 1673 src->pred = read_lookup_object(ctx, (uintptr_t)src->pred); 1674 src->src.ssa = read_lookup_object(ctx, (uintptr_t)src->src.ssa); 1675 1676 /* Remove from this list */ 1677 list_del(&src->src.use_link); 1678 1679 list_addtail(&src->src.use_link, &src->src.ssa->uses); 1680 } 1681 assert(list_is_empty(&ctx->phi_srcs)); 1682} 1683 1684static void 1685write_jump(write_ctx *ctx, const nir_jump_instr *jmp) 1686{ 1687 /* These aren't handled because they require special block linking */ 1688 assert(jmp->type != nir_jump_goto && jmp->type != nir_jump_goto_if); 1689 1690 assert(jmp->type < 4); 1691 1692 union packed_instr header; 1693 header.u32 = 0; 1694 1695 header.jump.instr_type = jmp->instr.type; 1696 header.jump.type = jmp->type; 1697 1698 blob_write_uint32(ctx->blob, header.u32); 1699} 1700 1701static nir_jump_instr * 1702read_jump(read_ctx *ctx, union packed_instr header) 1703{ 1704 /* These aren't handled because they require special block linking */ 1705 assert(header.jump.type != nir_jump_goto && 1706 header.jump.type != nir_jump_goto_if); 1707 1708 nir_jump_instr *jmp = nir_jump_instr_create(ctx->nir, header.jump.type); 1709 return jmp; 1710} 1711 1712static void 1713write_call(write_ctx *ctx, const nir_call_instr *call) 1714{ 1715 blob_write_uint32(ctx->blob, write_lookup_object(ctx, call->callee)); 1716 1717 for (unsigned i = 0; i < call->num_params; i++) 1718 write_src(ctx, &call->params[i]); 1719} 1720 1721static nir_call_instr * 1722read_call(read_ctx *ctx) 1723{ 1724 nir_function *callee = read_object(ctx); 1725 nir_call_instr *call = nir_call_instr_create(ctx->nir, callee); 1726 1727 for (unsigned i = 0; i < call->num_params; i++) 1728 read_src(ctx, &call->params[i], call); 1729 1730 return call; 1731} 1732 1733static void 1734write_instr(write_ctx *ctx, const nir_instr *instr) 1735{ 1736 /* We have only 4 bits for the instruction type. */ 1737 assert(instr->type < 16); 1738 1739 switch (instr->type) { 1740 case nir_instr_type_alu: 1741 write_alu(ctx, nir_instr_as_alu(instr)); 1742 break; 1743 case nir_instr_type_deref: 1744 write_deref(ctx, nir_instr_as_deref(instr)); 1745 break; 1746 case nir_instr_type_intrinsic: 1747 write_intrinsic(ctx, nir_instr_as_intrinsic(instr)); 1748 break; 1749 case nir_instr_type_load_const: 1750 write_load_const(ctx, nir_instr_as_load_const(instr)); 1751 break; 1752 case nir_instr_type_ssa_undef: 1753 write_ssa_undef(ctx, nir_instr_as_ssa_undef(instr)); 1754 break; 1755 case nir_instr_type_tex: 1756 write_tex(ctx, nir_instr_as_tex(instr)); 1757 break; 1758 case nir_instr_type_phi: 1759 write_phi(ctx, nir_instr_as_phi(instr)); 1760 break; 1761 case nir_instr_type_jump: 1762 write_jump(ctx, nir_instr_as_jump(instr)); 1763 break; 1764 case nir_instr_type_call: 1765 blob_write_uint32(ctx->blob, instr->type); 1766 write_call(ctx, nir_instr_as_call(instr)); 1767 break; 1768 case nir_instr_type_parallel_copy: 1769 unreachable("Cannot write parallel copies"); 1770 default: 1771 unreachable("bad instr type"); 1772 } 1773} 1774 1775/* Return the number of instructions read. */ 1776static unsigned 1777read_instr(read_ctx *ctx, nir_block *block) 1778{ 1779 STATIC_ASSERT(sizeof(union packed_instr) == 4); 1780 union packed_instr header; 1781 header.u32 = blob_read_uint32(ctx->blob); 1782 nir_instr *instr; 1783 1784 switch (header.any.instr_type) { 1785 case nir_instr_type_alu: 1786 for (unsigned i = 0; i <= header.alu.num_followup_alu_sharing_header; i++) 1787 nir_instr_insert_after_block(block, &read_alu(ctx, header)->instr); 1788 return header.alu.num_followup_alu_sharing_header + 1; 1789 case nir_instr_type_deref: 1790 instr = &read_deref(ctx, header)->instr; 1791 break; 1792 case nir_instr_type_intrinsic: 1793 instr = &read_intrinsic(ctx, header)->instr; 1794 break; 1795 case nir_instr_type_load_const: 1796 instr = &read_load_const(ctx, header)->instr; 1797 break; 1798 case nir_instr_type_ssa_undef: 1799 instr = &read_ssa_undef(ctx, header)->instr; 1800 break; 1801 case nir_instr_type_tex: 1802 instr = &read_tex(ctx, header)->instr; 1803 break; 1804 case nir_instr_type_phi: 1805 /* Phi instructions are a bit of a special case when reading because we 1806 * don't want inserting the instruction to automatically handle use/defs 1807 * for us. Instead, we need to wait until all the blocks/instructions 1808 * are read so that we can set their sources up. 1809 */ 1810 read_phi(ctx, block, header); 1811 return 1; 1812 case nir_instr_type_jump: 1813 instr = &read_jump(ctx, header)->instr; 1814 break; 1815 case nir_instr_type_call: 1816 instr = &read_call(ctx)->instr; 1817 break; 1818 case nir_instr_type_parallel_copy: 1819 unreachable("Cannot read parallel copies"); 1820 default: 1821 unreachable("bad instr type"); 1822 } 1823 1824 nir_instr_insert_after_block(block, instr); 1825 return 1; 1826} 1827 1828static void 1829write_block(write_ctx *ctx, const nir_block *block) 1830{ 1831 write_add_object(ctx, block); 1832 blob_write_uint32(ctx->blob, exec_list_length(&block->instr_list)); 1833 1834 ctx->last_instr_type = ~0; 1835 ctx->last_alu_header_offset = 0; 1836 1837 nir_foreach_instr(instr, block) { 1838 write_instr(ctx, instr); 1839 ctx->last_instr_type = instr->type; 1840 } 1841} 1842 1843static void 1844read_block(read_ctx *ctx, struct exec_list *cf_list) 1845{ 1846 /* Don't actually create a new block. Just use the one from the tail of 1847 * the list. NIR guarantees that the tail of the list is a block and that 1848 * no two blocks are side-by-side in the IR; It should be empty. 1849 */ 1850 nir_block *block = 1851 exec_node_data(nir_block, exec_list_get_tail(cf_list), cf_node.node); 1852 1853 read_add_object(ctx, block); 1854 unsigned num_instrs = blob_read_uint32(ctx->blob); 1855 for (unsigned i = 0; i < num_instrs;) { 1856 i += read_instr(ctx, block); 1857 } 1858} 1859 1860static void 1861write_cf_list(write_ctx *ctx, const struct exec_list *cf_list); 1862 1863static void 1864read_cf_list(read_ctx *ctx, struct exec_list *cf_list); 1865 1866static void 1867write_if(write_ctx *ctx, nir_if *nif) 1868{ 1869 write_src(ctx, &nif->condition); 1870 blob_write_uint8(ctx->blob, nif->control); 1871 1872 write_cf_list(ctx, &nif->then_list); 1873 write_cf_list(ctx, &nif->else_list); 1874} 1875 1876static void 1877read_if(read_ctx *ctx, struct exec_list *cf_list) 1878{ 1879 nir_if *nif = nir_if_create(ctx->nir); 1880 1881 read_src(ctx, &nif->condition, nif); 1882 nif->control = blob_read_uint8(ctx->blob); 1883 1884 nir_cf_node_insert_end(cf_list, &nif->cf_node); 1885 1886 read_cf_list(ctx, &nif->then_list); 1887 read_cf_list(ctx, &nif->else_list); 1888} 1889 1890static void 1891write_loop(write_ctx *ctx, nir_loop *loop) 1892{ 1893 blob_write_uint8(ctx->blob, loop->control); 1894 blob_write_uint8(ctx->blob, loop->divergent); 1895 write_cf_list(ctx, &loop->body); 1896} 1897 1898static void 1899read_loop(read_ctx *ctx, struct exec_list *cf_list) 1900{ 1901 nir_loop *loop = nir_loop_create(ctx->nir); 1902 1903 nir_cf_node_insert_end(cf_list, &loop->cf_node); 1904 1905 loop->control = blob_read_uint8(ctx->blob); 1906 loop->divergent = blob_read_uint8(ctx->blob); 1907 read_cf_list(ctx, &loop->body); 1908} 1909 1910static void 1911write_cf_node(write_ctx *ctx, nir_cf_node *cf) 1912{ 1913 blob_write_uint32(ctx->blob, cf->type); 1914 1915 switch (cf->type) { 1916 case nir_cf_node_block: 1917 write_block(ctx, nir_cf_node_as_block(cf)); 1918 break; 1919 case nir_cf_node_if: 1920 write_if(ctx, nir_cf_node_as_if(cf)); 1921 break; 1922 case nir_cf_node_loop: 1923 write_loop(ctx, nir_cf_node_as_loop(cf)); 1924 break; 1925 default: 1926 unreachable("bad cf type"); 1927 } 1928} 1929 1930static void 1931read_cf_node(read_ctx *ctx, struct exec_list *list) 1932{ 1933 nir_cf_node_type type = blob_read_uint32(ctx->blob); 1934 1935 switch (type) { 1936 case nir_cf_node_block: 1937 read_block(ctx, list); 1938 break; 1939 case nir_cf_node_if: 1940 read_if(ctx, list); 1941 break; 1942 case nir_cf_node_loop: 1943 read_loop(ctx, list); 1944 break; 1945 default: 1946 unreachable("bad cf type"); 1947 } 1948} 1949 1950static void 1951write_cf_list(write_ctx *ctx, const struct exec_list *cf_list) 1952{ 1953 blob_write_uint32(ctx->blob, exec_list_length(cf_list)); 1954 foreach_list_typed(nir_cf_node, cf, node, cf_list) { 1955 write_cf_node(ctx, cf); 1956 } 1957} 1958 1959static void 1960read_cf_list(read_ctx *ctx, struct exec_list *cf_list) 1961{ 1962 uint32_t num_cf_nodes = blob_read_uint32(ctx->blob); 1963 for (unsigned i = 0; i < num_cf_nodes; i++) 1964 read_cf_node(ctx, cf_list); 1965} 1966 1967static void 1968write_function_impl(write_ctx *ctx, const nir_function_impl *fi) 1969{ 1970 blob_write_uint8(ctx->blob, fi->structured); 1971 blob_write_uint8(ctx->blob, !!fi->preamble); 1972 1973 if (fi->preamble) 1974 blob_write_uint32(ctx->blob, write_lookup_object(ctx, fi->preamble)); 1975 1976 write_var_list(ctx, &fi->locals); 1977 write_reg_list(ctx, &fi->registers); 1978 blob_write_uint32(ctx->blob, fi->reg_alloc); 1979 1980 write_cf_list(ctx, &fi->body); 1981 write_fixup_phis(ctx); 1982} 1983 1984static nir_function_impl * 1985read_function_impl(read_ctx *ctx, nir_function *fxn) 1986{ 1987 nir_function_impl *fi = nir_function_impl_create_bare(ctx->nir); 1988 fi->function = fxn; 1989 1990 fi->structured = blob_read_uint8(ctx->blob); 1991 bool preamble = blob_read_uint8(ctx->blob); 1992 1993 if (preamble) 1994 fi->preamble = read_object(ctx); 1995 1996 read_var_list(ctx, &fi->locals); 1997 read_reg_list(ctx, &fi->registers); 1998 fi->reg_alloc = blob_read_uint32(ctx->blob); 1999 2000 read_cf_list(ctx, &fi->body); 2001 read_fixup_phis(ctx); 2002 2003 fi->valid_metadata = 0; 2004 2005 return fi; 2006} 2007 2008static void 2009write_function(write_ctx *ctx, const nir_function *fxn) 2010{ 2011 uint32_t flags = 0; 2012 if (fxn->is_entrypoint) 2013 flags |= 0x1; 2014 if (fxn->is_preamble) 2015 flags |= 0x2; 2016 if (fxn->name) 2017 flags |= 0x4; 2018 if (fxn->impl) 2019 flags |= 0x8; 2020 blob_write_uint32(ctx->blob, flags); 2021 if (fxn->name) 2022 blob_write_string(ctx->blob, fxn->name); 2023 2024 write_add_object(ctx, fxn); 2025 2026 blob_write_uint32(ctx->blob, fxn->num_params); 2027 for (unsigned i = 0; i < fxn->num_params; i++) { 2028 uint32_t val = 2029 ((uint32_t)fxn->params[i].num_components) | 2030 ((uint32_t)fxn->params[i].bit_size) << 8; 2031 blob_write_uint32(ctx->blob, val); 2032 } 2033 2034 /* At first glance, it looks like we should write the function_impl here. 2035 * However, call instructions need to be able to reference at least the 2036 * function and those will get processed as we write the function_impls. 2037 * We stop here and write function_impls as a second pass. 2038 */ 2039} 2040 2041static void 2042read_function(read_ctx *ctx) 2043{ 2044 uint32_t flags = blob_read_uint32(ctx->blob); 2045 bool has_name = flags & 0x4; 2046 char *name = has_name ? blob_read_string(ctx->blob) : NULL; 2047 2048 nir_function *fxn = nir_function_create(ctx->nir, name); 2049 2050 read_add_object(ctx, fxn); 2051 2052 fxn->num_params = blob_read_uint32(ctx->blob); 2053 fxn->params = ralloc_array(fxn, nir_parameter, fxn->num_params); 2054 for (unsigned i = 0; i < fxn->num_params; i++) { 2055 uint32_t val = blob_read_uint32(ctx->blob); 2056 fxn->params[i].num_components = val & 0xff; 2057 fxn->params[i].bit_size = (val >> 8) & 0xff; 2058 } 2059 2060 fxn->is_entrypoint = flags & 0x1; 2061 fxn->is_preamble = flags & 0x2; 2062 if (flags & 0x8) 2063 fxn->impl = NIR_SERIALIZE_FUNC_HAS_IMPL; 2064} 2065 2066static void 2067write_xfb_info(write_ctx *ctx, const nir_xfb_info *xfb) 2068{ 2069 if (xfb == NULL) { 2070 blob_write_uint32(ctx->blob, 0); 2071 } else { 2072 size_t size = nir_xfb_info_size(xfb->output_count); 2073 assert(size <= UINT32_MAX); 2074 blob_write_uint32(ctx->blob, size); 2075 blob_write_bytes(ctx->blob, xfb, size); 2076 } 2077} 2078 2079static nir_xfb_info * 2080read_xfb_info(read_ctx *ctx) 2081{ 2082 uint32_t size = blob_read_uint32(ctx->blob); 2083 if (size == 0) 2084 return NULL; 2085 2086 struct nir_xfb_info *xfb = ralloc_size(ctx->nir, size); 2087 blob_copy_bytes(ctx->blob, (void *)xfb, size); 2088 2089 return xfb; 2090} 2091 2092/** 2093 * Serialize NIR into a binary blob. 2094 * 2095 * \param strip Don't serialize information only useful for debugging, 2096 * such as variable names, making cache hits from similar 2097 * shaders more likely. 2098 */ 2099void 2100nir_serialize(struct blob *blob, const nir_shader *nir, bool strip) 2101{ 2102 write_ctx ctx = {0}; 2103 ctx.remap_table = _mesa_pointer_hash_table_create(NULL); 2104 ctx.blob = blob; 2105 ctx.nir = nir; 2106 ctx.strip = strip; 2107 util_dynarray_init(&ctx.phi_fixups, NULL); 2108 2109 size_t idx_size_offset = blob_reserve_uint32(blob); 2110 2111 struct shader_info info = nir->info; 2112 uint32_t strings = 0; 2113 if (!strip && info.name) 2114 strings |= 0x1; 2115 if (!strip && info.label) 2116 strings |= 0x2; 2117 blob_write_uint32(blob, strings); 2118 if (!strip && info.name) 2119 blob_write_string(blob, info.name); 2120 if (!strip && info.label) 2121 blob_write_string(blob, info.label); 2122 info.name = info.label = NULL; 2123 blob_write_bytes(blob, (uint8_t *) &info, sizeof(info)); 2124 2125 write_var_list(&ctx, &nir->variables); 2126 2127 blob_write_uint32(blob, nir->num_inputs); 2128 blob_write_uint32(blob, nir->num_uniforms); 2129 blob_write_uint32(blob, nir->num_outputs); 2130 blob_write_uint32(blob, nir->scratch_size); 2131 2132 blob_write_uint32(blob, exec_list_length(&nir->functions)); 2133 nir_foreach_function(fxn, nir) { 2134 write_function(&ctx, fxn); 2135 } 2136 2137 nir_foreach_function(fxn, nir) { 2138 if (fxn->impl) 2139 write_function_impl(&ctx, fxn->impl); 2140 } 2141 2142 blob_write_uint32(blob, nir->constant_data_size); 2143 if (nir->constant_data_size > 0) 2144 blob_write_bytes(blob, nir->constant_data, nir->constant_data_size); 2145 2146 write_xfb_info(&ctx, nir->xfb_info); 2147 2148 blob_overwrite_uint32(blob, idx_size_offset, ctx.next_idx); 2149 2150 _mesa_hash_table_destroy(ctx.remap_table, NULL); 2151 util_dynarray_fini(&ctx.phi_fixups); 2152} 2153 2154nir_shader * 2155nir_deserialize(void *mem_ctx, 2156 const struct nir_shader_compiler_options *options, 2157 struct blob_reader *blob) 2158{ 2159 read_ctx ctx = {0}; 2160 ctx.blob = blob; 2161 list_inithead(&ctx.phi_srcs); 2162 ctx.idx_table_len = blob_read_uint32(blob); 2163 ctx.idx_table = calloc(ctx.idx_table_len, sizeof(uintptr_t)); 2164 2165 uint32_t strings = blob_read_uint32(blob); 2166 char *name = (strings & 0x1) ? blob_read_string(blob) : NULL; 2167 char *label = (strings & 0x2) ? blob_read_string(blob) : NULL; 2168 2169 struct shader_info info; 2170 blob_copy_bytes(blob, (uint8_t *) &info, sizeof(info)); 2171 2172 ctx.nir = nir_shader_create(mem_ctx, info.stage, options, NULL); 2173 2174 info.name = name ? ralloc_strdup(ctx.nir, name) : NULL; 2175 info.label = label ? ralloc_strdup(ctx.nir, label) : NULL; 2176 2177 ctx.nir->info = info; 2178 2179 read_var_list(&ctx, &ctx.nir->variables); 2180 2181 ctx.nir->num_inputs = blob_read_uint32(blob); 2182 ctx.nir->num_uniforms = blob_read_uint32(blob); 2183 ctx.nir->num_outputs = blob_read_uint32(blob); 2184 ctx.nir->scratch_size = blob_read_uint32(blob); 2185 2186 unsigned num_functions = blob_read_uint32(blob); 2187 for (unsigned i = 0; i < num_functions; i++) 2188 read_function(&ctx); 2189 2190 nir_foreach_function(fxn, ctx.nir) { 2191 if (fxn->impl == NIR_SERIALIZE_FUNC_HAS_IMPL) 2192 fxn->impl = read_function_impl(&ctx, fxn); 2193 } 2194 2195 ctx.nir->constant_data_size = blob_read_uint32(blob); 2196 if (ctx.nir->constant_data_size > 0) { 2197 ctx.nir->constant_data = 2198 ralloc_size(ctx.nir, ctx.nir->constant_data_size); 2199 blob_copy_bytes(blob, ctx.nir->constant_data, 2200 ctx.nir->constant_data_size); 2201 } 2202 2203 ctx.nir->xfb_info = read_xfb_info(&ctx); 2204 2205 free(ctx.idx_table); 2206 2207 nir_validate_shader(ctx.nir, "after deserialize"); 2208 2209 return ctx.nir; 2210} 2211 2212void 2213nir_shader_serialize_deserialize(nir_shader *shader) 2214{ 2215 const struct nir_shader_compiler_options *options = shader->options; 2216 2217 struct blob writer; 2218 blob_init(&writer); 2219 nir_serialize(&writer, shader, false); 2220 2221 /* Delete all of dest's ralloc children but leave dest alone */ 2222 void *dead_ctx = ralloc_context(NULL); 2223 ralloc_adopt(dead_ctx, shader); 2224 ralloc_free(dead_ctx); 2225 2226 dead_ctx = ralloc_context(NULL); 2227 2228 struct blob_reader reader; 2229 blob_reader_init(&reader, writer.data, writer.size); 2230 nir_shader *copy = nir_deserialize(dead_ctx, options, &reader); 2231 2232 blob_finish(&writer); 2233 2234 nir_shader_replace(shader, copy); 2235 ralloc_free(dead_ctx); 2236} 2237