1 /*
2 * Copyright © 2017 Connor Abbott
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir_serialize.h"
25 #include "nir_control_flow.h"
26 #include "nir_xfb_info.h"
27 #include "util/u_dynarray.h"
28 #include "util/u_math.h"
29
30 #define NIR_SERIALIZE_FUNC_HAS_IMPL ((void *)(intptr_t)1)
31 #define MAX_OBJECT_IDS (1 << 20)
32
33 typedef struct {
34 size_t blob_offset;
35 nir_ssa_def *src;
36 nir_block *block;
37 } write_phi_fixup;
38
39 typedef struct {
40 const nir_shader *nir;
41
42 struct blob *blob;
43
44 /* maps pointer to index */
45 struct hash_table *remap_table;
46
47 /* the next index to assign to a NIR in-memory object */
48 uint32_t next_idx;
49
50 /* Array of write_phi_fixup structs representing phi sources that need to
51 * be resolved in the second pass.
52 */
53 struct util_dynarray phi_fixups;
54
55 /* The last serialized type. */
56 const struct glsl_type *last_type;
57 const struct glsl_type *last_interface_type;
58 struct nir_variable_data last_var_data;
59
60 /* For skipping equal ALU headers (typical after scalarization). */
61 nir_instr_type last_instr_type;
62 uintptr_t last_alu_header_offset;
63 uint32_t last_alu_header;
64
65 /* Don't write optional data such as variable names. */
66 bool strip;
67 } write_ctx;
68
69 typedef struct {
70 nir_shader *nir;
71
72 struct blob_reader *blob;
73
74 /* the next index to assign to a NIR in-memory object */
75 uint32_t next_idx;
76
77 /* The length of the index -> object table */
78 uint32_t idx_table_len;
79
80 /* map from index to deserialized pointer */
81 void **idx_table;
82
83 /* List of phi sources. */
84 struct list_head phi_srcs;
85
86 /* The last deserialized type. */
87 const struct glsl_type *last_type;
88 const struct glsl_type *last_interface_type;
89 struct nir_variable_data last_var_data;
90 } read_ctx;
91
92 static void
write_add_object(write_ctx *ctx, const void *obj)93 write_add_object(write_ctx *ctx, const void *obj)
94 {
95 uint32_t index = ctx->next_idx++;
96 assert(index != MAX_OBJECT_IDS);
97 _mesa_hash_table_insert(ctx->remap_table, obj, (void *)(uintptr_t) index);
98 }
99
100 static uint32_t
write_lookup_object(write_ctx *ctx, const void *obj)101 write_lookup_object(write_ctx *ctx, const void *obj)
102 {
103 struct hash_entry *entry = _mesa_hash_table_search(ctx->remap_table, obj);
104 assert(entry);
105 return (uint32_t)(uintptr_t) entry->data;
106 }
107
108 static void
read_add_object(read_ctx *ctx, void *obj)109 read_add_object(read_ctx *ctx, void *obj)
110 {
111 assert(ctx->next_idx < ctx->idx_table_len);
112 ctx->idx_table[ctx->next_idx++] = obj;
113 }
114
115 static void *
read_lookup_object(read_ctx *ctx, uint32_t idx)116 read_lookup_object(read_ctx *ctx, uint32_t idx)
117 {
118 assert(idx < ctx->idx_table_len);
119 return ctx->idx_table[idx];
120 }
121
122 static void *
read_object(read_ctx *ctx)123 read_object(read_ctx *ctx)
124 {
125 return read_lookup_object(ctx, blob_read_uint32(ctx->blob));
126 }
127
128 static uint32_t
encode_bit_size_3bits(uint8_t bit_size)129 encode_bit_size_3bits(uint8_t bit_size)
130 {
131 /* Encode values of 0, 1, 2, 4, 8, 16, 32, 64 in 3 bits. */
132 assert(bit_size <= 64 && util_is_power_of_two_or_zero(bit_size));
133 if (bit_size)
134 return util_logbase2(bit_size) + 1;
135 return 0;
136 }
137
138 static uint8_t
decode_bit_size_3bits(uint8_t bit_size)139 decode_bit_size_3bits(uint8_t bit_size)
140 {
141 if (bit_size)
142 return 1 << (bit_size - 1);
143 return 0;
144 }
145
146 #define NUM_COMPONENTS_IS_SEPARATE_7 7
147
148 static uint8_t
encode_num_components_in_3bits(uint8_t num_components)149 encode_num_components_in_3bits(uint8_t num_components)
150 {
151 if (num_components <= 4)
152 return num_components;
153 if (num_components == 8)
154 return 5;
155 if (num_components == 16)
156 return 6;
157
158 /* special value indicating that num_components is in the next uint32 */
159 return NUM_COMPONENTS_IS_SEPARATE_7;
160 }
161
162 static uint8_t
decode_num_components_in_3bits(uint8_t value)163 decode_num_components_in_3bits(uint8_t value)
164 {
165 if (value <= 4)
166 return value;
167 if (value == 5)
168 return 8;
169 if (value == 6)
170 return 16;
171
172 unreachable("invalid num_components encoding");
173 return 0;
174 }
175
176 static void
write_constant(write_ctx *ctx, const nir_constant *c)177 write_constant(write_ctx *ctx, const nir_constant *c)
178 {
179 blob_write_bytes(ctx->blob, c->values, sizeof(c->values));
180 blob_write_uint32(ctx->blob, c->num_elements);
181 for (unsigned i = 0; i < c->num_elements; i++)
182 write_constant(ctx, c->elements[i]);
183 }
184
185 static nir_constant *
read_constant(read_ctx *ctx, nir_variable *nvar)186 read_constant(read_ctx *ctx, nir_variable *nvar)
187 {
188 nir_constant *c = ralloc(nvar, nir_constant);
189
190 blob_copy_bytes(ctx->blob, (uint8_t *)c->values, sizeof(c->values));
191 c->num_elements = blob_read_uint32(ctx->blob);
192 c->elements = ralloc_array(nvar, nir_constant *, c->num_elements);
193 for (unsigned i = 0; i < c->num_elements; i++)
194 c->elements[i] = read_constant(ctx, nvar);
195
196 return c;
197 }
198
199 enum var_data_encoding {
200 var_encode_full,
201 var_encode_shader_temp,
202 var_encode_function_temp,
203 var_encode_location_diff,
204 };
205
206 union packed_var {
207 uint32_t u32;
208 struct {
209 unsigned has_name:1;
210 unsigned has_constant_initializer:1;
211 unsigned has_pointer_initializer:1;
212 unsigned has_interface_type:1;
213 unsigned num_state_slots:7;
214 unsigned data_encoding:2;
215 unsigned type_same_as_last:1;
216 unsigned interface_type_same_as_last:1;
217 unsigned ray_query:1;
218 unsigned num_members:16;
219 } u;
220 };
221
222 union packed_var_data_diff {
223 uint32_t u32;
224 struct {
225 int location:13;
226 int location_frac:3;
227 int driver_location:16;
228 } u;
229 };
230
231 static void
write_variable(write_ctx *ctx, const nir_variable *var)232 write_variable(write_ctx *ctx, const nir_variable *var)
233 {
234 write_add_object(ctx, var);
235
236 assert(var->num_state_slots < (1 << 7));
237
238 STATIC_ASSERT(sizeof(union packed_var) == 4);
239 union packed_var flags;
240 flags.u32 = 0;
241
242 flags.u.has_name = !ctx->strip && var->name;
243 flags.u.has_constant_initializer = !!(var->constant_initializer);
244 flags.u.has_pointer_initializer = !!(var->pointer_initializer);
245 flags.u.has_interface_type = !!(var->interface_type);
246 flags.u.type_same_as_last = var->type == ctx->last_type;
247 flags.u.interface_type_same_as_last =
248 var->interface_type && var->interface_type == ctx->last_interface_type;
249 flags.u.num_state_slots = var->num_state_slots;
250 flags.u.num_members = var->num_members;
251
252 struct nir_variable_data data = var->data;
253
254 /* When stripping, we expect that the location is no longer needed,
255 * which is typically after shaders are linked.
256 */
257 if (ctx->strip &&
258 data.mode != nir_var_system_value &&
259 data.mode != nir_var_shader_in &&
260 data.mode != nir_var_shader_out)
261 data.location = 0;
262
263 /* Temporary variables don't serialize var->data. */
264 if (data.mode == nir_var_shader_temp)
265 flags.u.data_encoding = var_encode_shader_temp;
266 else if (data.mode == nir_var_function_temp)
267 flags.u.data_encoding = var_encode_function_temp;
268 else {
269 struct nir_variable_data tmp = data;
270
271 tmp.location = ctx->last_var_data.location;
272 tmp.location_frac = ctx->last_var_data.location_frac;
273 tmp.driver_location = ctx->last_var_data.driver_location;
274
275 /* See if we can encode only the difference in locations from the last
276 * variable.
277 */
278 if (memcmp(&ctx->last_var_data, &tmp, sizeof(tmp)) == 0 &&
279 abs((int)data.location -
280 (int)ctx->last_var_data.location) < (1 << 12) &&
281 abs((int)data.driver_location -
282 (int)ctx->last_var_data.driver_location) < (1 << 15))
283 flags.u.data_encoding = var_encode_location_diff;
284 else
285 flags.u.data_encoding = var_encode_full;
286 }
287
288 flags.u.ray_query = var->data.ray_query;
289
290 blob_write_uint32(ctx->blob, flags.u32);
291
292 if (!flags.u.type_same_as_last) {
293 encode_type_to_blob(ctx->blob, var->type);
294 ctx->last_type = var->type;
295 }
296
297 if (var->interface_type && !flags.u.interface_type_same_as_last) {
298 encode_type_to_blob(ctx->blob, var->interface_type);
299 ctx->last_interface_type = var->interface_type;
300 }
301
302 if (flags.u.has_name)
303 blob_write_string(ctx->blob, var->name);
304
305 if (flags.u.data_encoding == var_encode_full ||
306 flags.u.data_encoding == var_encode_location_diff) {
307 if (flags.u.data_encoding == var_encode_full) {
308 blob_write_bytes(ctx->blob, &data, sizeof(data));
309 } else {
310 /* Serialize only the difference in locations from the last variable.
311 */
312 union packed_var_data_diff diff;
313
314 diff.u.location = data.location - ctx->last_var_data.location;
315 diff.u.location_frac = data.location_frac -
316 ctx->last_var_data.location_frac;
317 diff.u.driver_location = data.driver_location -
318 ctx->last_var_data.driver_location;
319
320 blob_write_uint32(ctx->blob, diff.u32);
321 }
322
323 ctx->last_var_data = data;
324 }
325
326 for (unsigned i = 0; i < var->num_state_slots; i++) {
327 blob_write_bytes(ctx->blob, &var->state_slots[i],
328 sizeof(var->state_slots[i]));
329 }
330 if (var->constant_initializer)
331 write_constant(ctx, var->constant_initializer);
332 if (var->pointer_initializer)
333 write_lookup_object(ctx, var->pointer_initializer);
334 if (var->num_members > 0) {
335 blob_write_bytes(ctx->blob, (uint8_t *) var->members,
336 var->num_members * sizeof(*var->members));
337 }
338 }
339
340 static nir_variable *
read_variable(read_ctx *ctx)341 read_variable(read_ctx *ctx)
342 {
343 nir_variable *var = rzalloc(ctx->nir, nir_variable);
344 read_add_object(ctx, var);
345
346 union packed_var flags;
347 flags.u32 = blob_read_uint32(ctx->blob);
348
349 if (flags.u.type_same_as_last) {
350 var->type = ctx->last_type;
351 } else {
352 var->type = decode_type_from_blob(ctx->blob);
353 ctx->last_type = var->type;
354 }
355
356 if (flags.u.has_interface_type) {
357 if (flags.u.interface_type_same_as_last) {
358 var->interface_type = ctx->last_interface_type;
359 } else {
360 var->interface_type = decode_type_from_blob(ctx->blob);
361 ctx->last_interface_type = var->interface_type;
362 }
363 }
364
365 if (flags.u.has_name) {
366 const char *name = blob_read_string(ctx->blob);
367 var->name = ralloc_strdup(var, name);
368 } else {
369 var->name = NULL;
370 }
371
372 if (flags.u.data_encoding == var_encode_shader_temp)
373 var->data.mode = nir_var_shader_temp;
374 else if (flags.u.data_encoding == var_encode_function_temp)
375 var->data.mode = nir_var_function_temp;
376 else if (flags.u.data_encoding == var_encode_full) {
377 blob_copy_bytes(ctx->blob, (uint8_t *) &var->data, sizeof(var->data));
378 ctx->last_var_data = var->data;
379 } else { /* var_encode_location_diff */
380 union packed_var_data_diff diff;
381 diff.u32 = blob_read_uint32(ctx->blob);
382
383 var->data = ctx->last_var_data;
384 var->data.location += diff.u.location;
385 var->data.location_frac += diff.u.location_frac;
386 var->data.driver_location += diff.u.driver_location;
387
388 ctx->last_var_data = var->data;
389 }
390
391 var->data.ray_query = flags.u.ray_query;
392
393 var->num_state_slots = flags.u.num_state_slots;
394 if (var->num_state_slots != 0) {
395 var->state_slots = ralloc_array(var, nir_state_slot,
396 var->num_state_slots);
397 for (unsigned i = 0; i < var->num_state_slots; i++) {
398 blob_copy_bytes(ctx->blob, &var->state_slots[i],
399 sizeof(var->state_slots[i]));
400 }
401 }
402 if (flags.u.has_constant_initializer)
403 var->constant_initializer = read_constant(ctx, var);
404 else
405 var->constant_initializer = NULL;
406
407 if (flags.u.has_pointer_initializer)
408 var->pointer_initializer = read_object(ctx);
409 else
410 var->pointer_initializer = NULL;
411
412 var->num_members = flags.u.num_members;
413 if (var->num_members > 0) {
414 var->members = ralloc_array(var, struct nir_variable_data,
415 var->num_members);
416 blob_copy_bytes(ctx->blob, (uint8_t *) var->members,
417 var->num_members * sizeof(*var->members));
418 }
419
420 return var;
421 }
422
423 static void
write_var_list(write_ctx *ctx, const struct exec_list *src)424 write_var_list(write_ctx *ctx, const struct exec_list *src)
425 {
426 blob_write_uint32(ctx->blob, exec_list_length(src));
427 foreach_list_typed(nir_variable, var, node, src) {
428 write_variable(ctx, var);
429 }
430 }
431
432 static void
read_var_list(read_ctx *ctx, struct exec_list *dst)433 read_var_list(read_ctx *ctx, struct exec_list *dst)
434 {
435 exec_list_make_empty(dst);
436 unsigned num_vars = blob_read_uint32(ctx->blob);
437 for (unsigned i = 0; i < num_vars; i++) {
438 nir_variable *var = read_variable(ctx);
439 exec_list_push_tail(dst, &var->node);
440 }
441 }
442
443 static void
write_register(write_ctx *ctx, const nir_register *reg)444 write_register(write_ctx *ctx, const nir_register *reg)
445 {
446 write_add_object(ctx, reg);
447 blob_write_uint32(ctx->blob, reg->num_components);
448 blob_write_uint32(ctx->blob, reg->bit_size);
449 blob_write_uint32(ctx->blob, reg->num_array_elems);
450 blob_write_uint32(ctx->blob, reg->index);
451 blob_write_uint8(ctx->blob, reg->divergent);
452 }
453
454 static nir_register *
read_register(read_ctx *ctx)455 read_register(read_ctx *ctx)
456 {
457 nir_register *reg = ralloc(ctx->nir, nir_register);
458 read_add_object(ctx, reg);
459 reg->num_components = blob_read_uint32(ctx->blob);
460 reg->bit_size = blob_read_uint32(ctx->blob);
461 reg->num_array_elems = blob_read_uint32(ctx->blob);
462 reg->index = blob_read_uint32(ctx->blob);
463 reg->divergent = blob_read_uint8(ctx->blob);
464
465 list_inithead(®->uses);
466 list_inithead(®->defs);
467 list_inithead(®->if_uses);
468
469 return reg;
470 }
471
472 static void
write_reg_list(write_ctx *ctx, const struct exec_list *src)473 write_reg_list(write_ctx *ctx, const struct exec_list *src)
474 {
475 blob_write_uint32(ctx->blob, exec_list_length(src));
476 foreach_list_typed(nir_register, reg, node, src)
477 write_register(ctx, reg);
478 }
479
480 static void
read_reg_list(read_ctx *ctx, struct exec_list *dst)481 read_reg_list(read_ctx *ctx, struct exec_list *dst)
482 {
483 exec_list_make_empty(dst);
484 unsigned num_regs = blob_read_uint32(ctx->blob);
485 for (unsigned i = 0; i < num_regs; i++) {
486 nir_register *reg = read_register(ctx);
487 exec_list_push_tail(dst, ®->node);
488 }
489 }
490
491 union packed_src {
492 uint32_t u32;
493 struct {
494 unsigned is_ssa:1; /* <-- Header */
495 unsigned is_indirect:1;
496 unsigned object_idx:20;
497 unsigned _footer:10; /* <-- Footer */
498 } any;
499 struct {
500 unsigned _header:22; /* <-- Header */
501 unsigned negate:1; /* <-- Footer */
502 unsigned abs:1;
503 unsigned swizzle_x:2;
504 unsigned swizzle_y:2;
505 unsigned swizzle_z:2;
506 unsigned swizzle_w:2;
507 } alu;
508 struct {
509 unsigned _header:22; /* <-- Header */
510 unsigned src_type:5; /* <-- Footer */
511 unsigned _pad:5;
512 } tex;
513 };
514
515 static void
write_src_full(write_ctx *ctx, const nir_src *src, union packed_src header)516 write_src_full(write_ctx *ctx, const nir_src *src, union packed_src header)
517 {
518 /* Since sources are very frequent, we try to save some space when storing
519 * them. In particular, we store whether the source is a register and
520 * whether the register has an indirect index in the low two bits. We can
521 * assume that the high two bits of the index are zero, since otherwise our
522 * address space would've been exhausted allocating the remap table!
523 */
524 header.any.is_ssa = src->is_ssa;
525 if (src->is_ssa) {
526 header.any.object_idx = write_lookup_object(ctx, src->ssa);
527 blob_write_uint32(ctx->blob, header.u32);
528 } else {
529 header.any.object_idx = write_lookup_object(ctx, src->reg.reg);
530 header.any.is_indirect = !!src->reg.indirect;
531 blob_write_uint32(ctx->blob, header.u32);
532 blob_write_uint32(ctx->blob, src->reg.base_offset);
533 if (src->reg.indirect) {
534 union packed_src header = {0};
535 write_src_full(ctx, src->reg.indirect, header);
536 }
537 }
538 }
539
540 static void
write_src(write_ctx *ctx, const nir_src *src)541 write_src(write_ctx *ctx, const nir_src *src)
542 {
543 union packed_src header = {0};
544 write_src_full(ctx, src, header);
545 }
546
547 static union packed_src
read_src(read_ctx *ctx, nir_src *src, void *mem_ctx)548 read_src(read_ctx *ctx, nir_src *src, void *mem_ctx)
549 {
550 STATIC_ASSERT(sizeof(union packed_src) == 4);
551 union packed_src header;
552 header.u32 = blob_read_uint32(ctx->blob);
553
554 src->is_ssa = header.any.is_ssa;
555 if (src->is_ssa) {
556 src->ssa = read_lookup_object(ctx, header.any.object_idx);
557 } else {
558 src->reg.reg = read_lookup_object(ctx, header.any.object_idx);
559 src->reg.base_offset = blob_read_uint32(ctx->blob);
560 if (header.any.is_indirect) {
561 src->reg.indirect = malloc(sizeof(nir_src));
562 read_src(ctx, src->reg.indirect, mem_ctx);
563 } else {
564 src->reg.indirect = NULL;
565 }
566 }
567 return header;
568 }
569
570 union packed_dest {
571 uint8_t u8;
572 struct {
573 uint8_t is_ssa:1;
574 uint8_t num_components:3;
575 uint8_t bit_size:3;
576 uint8_t divergent:1;
577 } ssa;
578 struct {
579 uint8_t is_ssa:1;
580 uint8_t is_indirect:1;
581 uint8_t _pad:6;
582 } reg;
583 };
584
585 enum intrinsic_const_indices_encoding {
586 /* Use packed_const_indices to store tightly packed indices.
587 *
588 * The common case for load_ubo is 0, 0, 0, which is trivially represented.
589 * The common cases for load_interpolated_input also fit here, e.g.: 7, 3
590 */
591 const_indices_all_combined,
592
593 const_indices_8bit, /* 8 bits per element */
594 const_indices_16bit, /* 16 bits per element */
595 const_indices_32bit, /* 32 bits per element */
596 };
597
598 enum load_const_packing {
599 /* Constants are not packed and are stored in following dwords. */
600 load_const_full,
601
602 /* packed_value contains high 19 bits, low bits are 0,
603 * good for floating-point decimals
604 */
605 load_const_scalar_hi_19bits,
606
607 /* packed_value contains low 19 bits, high bits are sign-extended */
608 load_const_scalar_lo_19bits_sext,
609 };
610
611 union packed_instr {
612 uint32_t u32;
613 struct {
614 unsigned instr_type:4; /* always present */
615 unsigned _pad:20;
616 unsigned dest:8; /* always last */
617 } any;
618 struct {
619 unsigned instr_type:4;
620 unsigned exact:1;
621 unsigned no_signed_wrap:1;
622 unsigned no_unsigned_wrap:1;
623 unsigned saturate:1;
624 /* Reg: writemask; SSA: swizzles for 2 srcs */
625 unsigned writemask_or_two_swizzles:4;
626 unsigned op:9;
627 unsigned packed_src_ssa_16bit:1;
628 /* Scalarized ALUs always have the same header. */
629 unsigned num_followup_alu_sharing_header:2;
630 unsigned dest:8;
631 } alu;
632 struct {
633 unsigned instr_type:4;
634 unsigned deref_type:3;
635 unsigned cast_type_same_as_last:1;
636 unsigned modes:5; /* See (de|en)code_deref_modes() */
637 unsigned _pad:9;
638 unsigned in_bounds:1;
639 unsigned packed_src_ssa_16bit:1; /* deref_var redefines this */
640 unsigned dest:8;
641 } deref;
642 struct {
643 unsigned instr_type:4;
644 unsigned deref_type:3;
645 unsigned _pad:1;
646 unsigned object_idx:16; /* if 0, the object ID is a separate uint32 */
647 unsigned dest:8;
648 } deref_var;
649 struct {
650 unsigned instr_type:4;
651 unsigned intrinsic:10;
652 unsigned const_indices_encoding:2;
653 unsigned packed_const_indices:8;
654 unsigned dest:8;
655 } intrinsic;
656 struct {
657 unsigned instr_type:4;
658 unsigned last_component:4;
659 unsigned bit_size:3;
660 unsigned packing:2; /* enum load_const_packing */
661 unsigned packed_value:19; /* meaning determined by packing */
662 } load_const;
663 struct {
664 unsigned instr_type:4;
665 unsigned last_component:4;
666 unsigned bit_size:3;
667 unsigned _pad:21;
668 } undef;
669 struct {
670 unsigned instr_type:4;
671 unsigned num_srcs:4;
672 unsigned op:5;
673 unsigned _pad:11;
674 unsigned dest:8;
675 } tex;
676 struct {
677 unsigned instr_type:4;
678 unsigned num_srcs:20;
679 unsigned dest:8;
680 } phi;
681 struct {
682 unsigned instr_type:4;
683 unsigned type:2;
684 unsigned _pad:26;
685 } jump;
686 };
687
688 /* Write "lo24" as low 24 bits in the first uint32. */
689 static void
write_dest(write_ctx *ctx, const nir_dest *dst, union packed_instr header, nir_instr_type instr_type)690 write_dest(write_ctx *ctx, const nir_dest *dst, union packed_instr header,
691 nir_instr_type instr_type)
692 {
693 STATIC_ASSERT(sizeof(union packed_dest) == 1);
694 union packed_dest dest;
695 dest.u8 = 0;
696
697 dest.ssa.is_ssa = dst->is_ssa;
698 if (dst->is_ssa) {
699 dest.ssa.num_components =
700 encode_num_components_in_3bits(dst->ssa.num_components);
701 dest.ssa.bit_size = encode_bit_size_3bits(dst->ssa.bit_size);
702 dest.ssa.divergent = dst->ssa.divergent;
703 } else {
704 dest.reg.is_indirect = !!(dst->reg.indirect);
705 }
706 header.any.dest = dest.u8;
707
708 /* Check if the current ALU instruction has the same header as the previous
709 * instruction that is also ALU. If it is, we don't have to write
710 * the current header. This is a typical occurence after scalarization.
711 */
712 if (instr_type == nir_instr_type_alu) {
713 bool equal_header = false;
714
715 if (ctx->last_instr_type == nir_instr_type_alu) {
716 assert(ctx->last_alu_header_offset);
717 union packed_instr last_header;
718 last_header.u32 = ctx->last_alu_header;
719
720 /* Clear the field that counts ALUs with equal headers. */
721 union packed_instr clean_header;
722 clean_header.u32 = last_header.u32;
723 clean_header.alu.num_followup_alu_sharing_header = 0;
724
725 /* There can be at most 4 consecutive ALU instructions
726 * sharing the same header.
727 */
728 if (last_header.alu.num_followup_alu_sharing_header < 3 &&
729 header.u32 == clean_header.u32) {
730 last_header.alu.num_followup_alu_sharing_header++;
731 blob_overwrite_uint32(ctx->blob, ctx->last_alu_header_offset,
732 last_header.u32);
733 ctx->last_alu_header = last_header.u32;
734 equal_header = true;
735 }
736 }
737
738 if (!equal_header) {
739 ctx->last_alu_header_offset = blob_reserve_uint32(ctx->blob);
740 blob_overwrite_uint32(ctx->blob, ctx->last_alu_header_offset, header.u32);
741 ctx->last_alu_header = header.u32;
742 }
743 } else {
744 blob_write_uint32(ctx->blob, header.u32);
745 }
746
747 if (dest.ssa.is_ssa &&
748 dest.ssa.num_components == NUM_COMPONENTS_IS_SEPARATE_7)
749 blob_write_uint32(ctx->blob, dst->ssa.num_components);
750
751 if (dst->is_ssa) {
752 write_add_object(ctx, &dst->ssa);
753 } else {
754 blob_write_uint32(ctx->blob, write_lookup_object(ctx, dst->reg.reg));
755 blob_write_uint32(ctx->blob, dst->reg.base_offset);
756 if (dst->reg.indirect)
757 write_src(ctx, dst->reg.indirect);
758 }
759 }
760
761 static void
read_dest(read_ctx *ctx, nir_dest *dst, nir_instr *instr, union packed_instr header)762 read_dest(read_ctx *ctx, nir_dest *dst, nir_instr *instr,
763 union packed_instr header)
764 {
765 union packed_dest dest;
766 dest.u8 = header.any.dest;
767
768 if (dest.ssa.is_ssa) {
769 unsigned bit_size = decode_bit_size_3bits(dest.ssa.bit_size);
770 unsigned num_components;
771 if (dest.ssa.num_components == NUM_COMPONENTS_IS_SEPARATE_7)
772 num_components = blob_read_uint32(ctx->blob);
773 else
774 num_components = decode_num_components_in_3bits(dest.ssa.num_components);
775 nir_ssa_dest_init(instr, dst, num_components, bit_size, NULL);
776 dst->ssa.divergent = dest.ssa.divergent;
777 read_add_object(ctx, &dst->ssa);
778 } else {
779 dst->reg.reg = read_object(ctx);
780 dst->reg.base_offset = blob_read_uint32(ctx->blob);
781 if (dest.reg.is_indirect) {
782 dst->reg.indirect = malloc(sizeof(nir_src));
783 read_src(ctx, dst->reg.indirect, instr);
784 }
785 }
786 }
787
788 static bool
are_object_ids_16bit(write_ctx *ctx)789 are_object_ids_16bit(write_ctx *ctx)
790 {
791 /* Check the highest object ID, because they are monotonic. */
792 return ctx->next_idx < (1 << 16);
793 }
794
795 static bool
is_alu_src_ssa_16bit(write_ctx *ctx, const nir_alu_instr *alu)796 is_alu_src_ssa_16bit(write_ctx *ctx, const nir_alu_instr *alu)
797 {
798 unsigned num_srcs = nir_op_infos[alu->op].num_inputs;
799
800 for (unsigned i = 0; i < num_srcs; i++) {
801 if (!alu->src[i].src.is_ssa || alu->src[i].abs || alu->src[i].negate)
802 return false;
803
804 unsigned src_components = nir_ssa_alu_instr_src_components(alu, i);
805
806 for (unsigned chan = 0; chan < src_components; chan++) {
807 /* The swizzles for src0.x and src1.x are stored
808 * in writemask_or_two_swizzles for SSA ALUs.
809 */
810 if (alu->dest.dest.is_ssa && i < 2 && chan == 0 &&
811 alu->src[i].swizzle[chan] < 4)
812 continue;
813
814 if (alu->src[i].swizzle[chan] != chan)
815 return false;
816 }
817 }
818
819 return are_object_ids_16bit(ctx);
820 }
821
822 static void
write_alu(write_ctx *ctx, const nir_alu_instr *alu)823 write_alu(write_ctx *ctx, const nir_alu_instr *alu)
824 {
825 unsigned num_srcs = nir_op_infos[alu->op].num_inputs;
826 unsigned dst_components = nir_dest_num_components(alu->dest.dest);
827
828 /* 9 bits for nir_op */
829 STATIC_ASSERT(nir_num_opcodes <= 512);
830 union packed_instr header;
831 header.u32 = 0;
832
833 header.alu.instr_type = alu->instr.type;
834 header.alu.exact = alu->exact;
835 header.alu.no_signed_wrap = alu->no_signed_wrap;
836 header.alu.no_unsigned_wrap = alu->no_unsigned_wrap;
837 header.alu.saturate = alu->dest.saturate;
838 header.alu.op = alu->op;
839 header.alu.packed_src_ssa_16bit = is_alu_src_ssa_16bit(ctx, alu);
840
841 if (header.alu.packed_src_ssa_16bit &&
842 alu->dest.dest.is_ssa) {
843 /* For packed srcs of SSA ALUs, this field stores the swizzles. */
844 header.alu.writemask_or_two_swizzles = alu->src[0].swizzle[0];
845 if (num_srcs > 1)
846 header.alu.writemask_or_two_swizzles |= alu->src[1].swizzle[0] << 2;
847 } else if (!alu->dest.dest.is_ssa && dst_components <= 4) {
848 /* For vec4 registers, this field is a writemask. */
849 header.alu.writemask_or_two_swizzles = alu->dest.write_mask;
850 }
851
852 write_dest(ctx, &alu->dest.dest, header, alu->instr.type);
853
854 if (!alu->dest.dest.is_ssa && dst_components > 4)
855 blob_write_uint32(ctx->blob, alu->dest.write_mask);
856
857 if (header.alu.packed_src_ssa_16bit) {
858 for (unsigned i = 0; i < num_srcs; i++) {
859 assert(alu->src[i].src.is_ssa);
860 unsigned idx = write_lookup_object(ctx, alu->src[i].src.ssa);
861 assert(idx < (1 << 16));
862 blob_write_uint16(ctx->blob, idx);
863 }
864 } else {
865 for (unsigned i = 0; i < num_srcs; i++) {
866 unsigned src_channels = nir_ssa_alu_instr_src_components(alu, i);
867 unsigned src_components = nir_src_num_components(alu->src[i].src);
868 union packed_src src;
869 bool packed = src_components <= 4 && src_channels <= 4;
870 src.u32 = 0;
871
872 src.alu.negate = alu->src[i].negate;
873 src.alu.abs = alu->src[i].abs;
874
875 if (packed) {
876 src.alu.swizzle_x = alu->src[i].swizzle[0];
877 src.alu.swizzle_y = alu->src[i].swizzle[1];
878 src.alu.swizzle_z = alu->src[i].swizzle[2];
879 src.alu.swizzle_w = alu->src[i].swizzle[3];
880 }
881
882 write_src_full(ctx, &alu->src[i].src, src);
883
884 /* Store swizzles for vec8 and vec16. */
885 if (!packed) {
886 for (unsigned o = 0; o < src_channels; o += 8) {
887 unsigned value = 0;
888
889 for (unsigned j = 0; j < 8 && o + j < src_channels; j++) {
890 value |= (uint32_t)alu->src[i].swizzle[o + j] <<
891 (4 * j); /* 4 bits per swizzle */
892 }
893
894 blob_write_uint32(ctx->blob, value);
895 }
896 }
897 }
898 }
899 }
900
901 static nir_alu_instr *
read_alu(read_ctx *ctx, union packed_instr header)902 read_alu(read_ctx *ctx, union packed_instr header)
903 {
904 unsigned num_srcs = nir_op_infos[header.alu.op].num_inputs;
905 nir_alu_instr *alu = nir_alu_instr_create(ctx->nir, header.alu.op);
906
907 alu->exact = header.alu.exact;
908 alu->no_signed_wrap = header.alu.no_signed_wrap;
909 alu->no_unsigned_wrap = header.alu.no_unsigned_wrap;
910 alu->dest.saturate = header.alu.saturate;
911
912 read_dest(ctx, &alu->dest.dest, &alu->instr, header);
913
914 unsigned dst_components = nir_dest_num_components(alu->dest.dest);
915
916 if (alu->dest.dest.is_ssa) {
917 alu->dest.write_mask = u_bit_consecutive(0, dst_components);
918 } else if (dst_components <= 4) {
919 alu->dest.write_mask = header.alu.writemask_or_two_swizzles;
920 } else {
921 alu->dest.write_mask = blob_read_uint32(ctx->blob);
922 }
923
924 if (header.alu.packed_src_ssa_16bit) {
925 for (unsigned i = 0; i < num_srcs; i++) {
926 nir_alu_src *src = &alu->src[i];
927 src->src.is_ssa = true;
928 src->src.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
929
930 memset(&src->swizzle, 0, sizeof(src->swizzle));
931
932 unsigned src_components = nir_ssa_alu_instr_src_components(alu, i);
933
934 for (unsigned chan = 0; chan < src_components; chan++)
935 src->swizzle[chan] = chan;
936 }
937 } else {
938 for (unsigned i = 0; i < num_srcs; i++) {
939 union packed_src src = read_src(ctx, &alu->src[i].src, &alu->instr);
940 unsigned src_channels = nir_ssa_alu_instr_src_components(alu, i);
941 unsigned src_components = nir_src_num_components(alu->src[i].src);
942 bool packed = src_components <= 4 && src_channels <= 4;
943
944 alu->src[i].negate = src.alu.negate;
945 alu->src[i].abs = src.alu.abs;
946
947 memset(&alu->src[i].swizzle, 0, sizeof(alu->src[i].swizzle));
948
949 if (packed) {
950 alu->src[i].swizzle[0] = src.alu.swizzle_x;
951 alu->src[i].swizzle[1] = src.alu.swizzle_y;
952 alu->src[i].swizzle[2] = src.alu.swizzle_z;
953 alu->src[i].swizzle[3] = src.alu.swizzle_w;
954 } else {
955 /* Load swizzles for vec8 and vec16. */
956 for (unsigned o = 0; o < src_channels; o += 8) {
957 unsigned value = blob_read_uint32(ctx->blob);
958
959 for (unsigned j = 0; j < 8 && o + j < src_channels; j++) {
960 alu->src[i].swizzle[o + j] =
961 (value >> (4 * j)) & 0xf; /* 4 bits per swizzle */
962 }
963 }
964 }
965 }
966 }
967
968 if (header.alu.packed_src_ssa_16bit &&
969 alu->dest.dest.is_ssa) {
970 alu->src[0].swizzle[0] = header.alu.writemask_or_two_swizzles & 0x3;
971 if (num_srcs > 1)
972 alu->src[1].swizzle[0] = header.alu.writemask_or_two_swizzles >> 2;
973 }
974
975 return alu;
976 }
977
978 #define MODE_ENC_GENERIC_BIT (1 << 4)
979
980 static nir_variable_mode
decode_deref_modes(unsigned modes)981 decode_deref_modes(unsigned modes)
982 {
983 if (modes & MODE_ENC_GENERIC_BIT) {
984 modes &= ~MODE_ENC_GENERIC_BIT;
985 return modes << (ffs(nir_var_mem_generic) - 1);
986 } else {
987 return 1 << modes;
988 }
989 }
990
991 static unsigned
encode_deref_modes(nir_variable_mode modes)992 encode_deref_modes(nir_variable_mode modes)
993 {
994 /* Mode sets on derefs generally come in two forms. For certain OpenCL
995 * cases, we can have more than one of the generic modes set. In this
996 * case, we need the full bitfield. Fortunately, there are only 4 of
997 * these. For all other modes, we can only have one mode at a time so we
998 * can compress them by only storing the bit position. This, plus one bit
999 * to select encoding, lets us pack the entire bitfield in 5 bits.
1000 */
1001 STATIC_ASSERT((nir_var_all & ~nir_var_mem_generic) <
1002 (1 << MODE_ENC_GENERIC_BIT));
1003
1004 unsigned enc;
1005 if (modes == 0 || (modes & nir_var_mem_generic)) {
1006 assert(!(modes & ~nir_var_mem_generic));
1007 enc = modes >> (ffs(nir_var_mem_generic) - 1);
1008 assert(enc < MODE_ENC_GENERIC_BIT);
1009 enc |= MODE_ENC_GENERIC_BIT;
1010 } else {
1011 assert(util_is_power_of_two_nonzero(modes));
1012 enc = ffs(modes) - 1;
1013 assert(enc < MODE_ENC_GENERIC_BIT);
1014 }
1015 assert(modes == decode_deref_modes(enc));
1016 return enc;
1017 }
1018
1019 static void
write_deref(write_ctx *ctx, const nir_deref_instr *deref)1020 write_deref(write_ctx *ctx, const nir_deref_instr *deref)
1021 {
1022 assert(deref->deref_type < 8);
1023
1024 union packed_instr header;
1025 header.u32 = 0;
1026
1027 header.deref.instr_type = deref->instr.type;
1028 header.deref.deref_type = deref->deref_type;
1029
1030 if (deref->deref_type == nir_deref_type_cast) {
1031 header.deref.modes = encode_deref_modes(deref->modes);
1032 header.deref.cast_type_same_as_last = deref->type == ctx->last_type;
1033 }
1034
1035 unsigned var_idx = 0;
1036 if (deref->deref_type == nir_deref_type_var) {
1037 var_idx = write_lookup_object(ctx, deref->var);
1038 if (var_idx && var_idx < (1 << 16))
1039 header.deref_var.object_idx = var_idx;
1040 }
1041
1042 if (deref->deref_type == nir_deref_type_array ||
1043 deref->deref_type == nir_deref_type_ptr_as_array) {
1044 header.deref.packed_src_ssa_16bit =
1045 deref->parent.is_ssa && deref->arr.index.is_ssa &&
1046 are_object_ids_16bit(ctx);
1047
1048 header.deref.in_bounds = deref->arr.in_bounds;
1049 }
1050
1051 write_dest(ctx, &deref->dest, header, deref->instr.type);
1052
1053 switch (deref->deref_type) {
1054 case nir_deref_type_var:
1055 if (!header.deref_var.object_idx)
1056 blob_write_uint32(ctx->blob, var_idx);
1057 break;
1058
1059 case nir_deref_type_struct:
1060 write_src(ctx, &deref->parent);
1061 blob_write_uint32(ctx->blob, deref->strct.index);
1062 break;
1063
1064 case nir_deref_type_array:
1065 case nir_deref_type_ptr_as_array:
1066 if (header.deref.packed_src_ssa_16bit) {
1067 blob_write_uint16(ctx->blob,
1068 write_lookup_object(ctx, deref->parent.ssa));
1069 blob_write_uint16(ctx->blob,
1070 write_lookup_object(ctx, deref->arr.index.ssa));
1071 } else {
1072 write_src(ctx, &deref->parent);
1073 write_src(ctx, &deref->arr.index);
1074 }
1075 break;
1076
1077 case nir_deref_type_cast:
1078 write_src(ctx, &deref->parent);
1079 blob_write_uint32(ctx->blob, deref->cast.ptr_stride);
1080 blob_write_uint32(ctx->blob, deref->cast.align_mul);
1081 blob_write_uint32(ctx->blob, deref->cast.align_offset);
1082 if (!header.deref.cast_type_same_as_last) {
1083 encode_type_to_blob(ctx->blob, deref->type);
1084 ctx->last_type = deref->type;
1085 }
1086 break;
1087
1088 case nir_deref_type_array_wildcard:
1089 write_src(ctx, &deref->parent);
1090 break;
1091
1092 default:
1093 unreachable("Invalid deref type");
1094 }
1095 }
1096
1097 static nir_deref_instr *
read_deref(read_ctx *ctx, union packed_instr header)1098 read_deref(read_ctx *ctx, union packed_instr header)
1099 {
1100 nir_deref_type deref_type = header.deref.deref_type;
1101 nir_deref_instr *deref = nir_deref_instr_create(ctx->nir, deref_type);
1102
1103 read_dest(ctx, &deref->dest, &deref->instr, header);
1104
1105 nir_deref_instr *parent;
1106
1107 switch (deref->deref_type) {
1108 case nir_deref_type_var:
1109 if (header.deref_var.object_idx)
1110 deref->var = read_lookup_object(ctx, header.deref_var.object_idx);
1111 else
1112 deref->var = read_object(ctx);
1113
1114 deref->type = deref->var->type;
1115 break;
1116
1117 case nir_deref_type_struct:
1118 read_src(ctx, &deref->parent, &deref->instr);
1119 parent = nir_src_as_deref(deref->parent);
1120 deref->strct.index = blob_read_uint32(ctx->blob);
1121 deref->type = glsl_get_struct_field(parent->type, deref->strct.index);
1122 break;
1123
1124 case nir_deref_type_array:
1125 case nir_deref_type_ptr_as_array:
1126 if (header.deref.packed_src_ssa_16bit) {
1127 deref->parent.is_ssa = true;
1128 deref->parent.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
1129 deref->arr.index.is_ssa = true;
1130 deref->arr.index.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
1131 } else {
1132 read_src(ctx, &deref->parent, &deref->instr);
1133 read_src(ctx, &deref->arr.index, &deref->instr);
1134 }
1135
1136 deref->arr.in_bounds = header.deref.in_bounds;
1137
1138 parent = nir_src_as_deref(deref->parent);
1139 if (deref->deref_type == nir_deref_type_array)
1140 deref->type = glsl_get_array_element(parent->type);
1141 else
1142 deref->type = parent->type;
1143 break;
1144
1145 case nir_deref_type_cast:
1146 read_src(ctx, &deref->parent, &deref->instr);
1147 deref->cast.ptr_stride = blob_read_uint32(ctx->blob);
1148 deref->cast.align_mul = blob_read_uint32(ctx->blob);
1149 deref->cast.align_offset = blob_read_uint32(ctx->blob);
1150 if (header.deref.cast_type_same_as_last) {
1151 deref->type = ctx->last_type;
1152 } else {
1153 deref->type = decode_type_from_blob(ctx->blob);
1154 ctx->last_type = deref->type;
1155 }
1156 break;
1157
1158 case nir_deref_type_array_wildcard:
1159 read_src(ctx, &deref->parent, &deref->instr);
1160 parent = nir_src_as_deref(deref->parent);
1161 deref->type = glsl_get_array_element(parent->type);
1162 break;
1163
1164 default:
1165 unreachable("Invalid deref type");
1166 }
1167
1168 if (deref_type == nir_deref_type_var) {
1169 deref->modes = deref->var->data.mode;
1170 } else if (deref->deref_type == nir_deref_type_cast) {
1171 deref->modes = decode_deref_modes(header.deref.modes);
1172 } else {
1173 assert(deref->parent.is_ssa);
1174 deref->modes = nir_instr_as_deref(deref->parent.ssa->parent_instr)->modes;
1175 }
1176
1177 return deref;
1178 }
1179
1180 static void
write_intrinsic(write_ctx *ctx, const nir_intrinsic_instr *intrin)1181 write_intrinsic(write_ctx *ctx, const nir_intrinsic_instr *intrin)
1182 {
1183 /* 10 bits for nir_intrinsic_op */
1184 STATIC_ASSERT(nir_num_intrinsics <= 1024);
1185 unsigned num_srcs = nir_intrinsic_infos[intrin->intrinsic].num_srcs;
1186 unsigned num_indices = nir_intrinsic_infos[intrin->intrinsic].num_indices;
1187 assert(intrin->intrinsic < 1024);
1188
1189 union packed_instr header;
1190 header.u32 = 0;
1191
1192 header.intrinsic.instr_type = intrin->instr.type;
1193 header.intrinsic.intrinsic = intrin->intrinsic;
1194
1195 /* Analyze constant indices to decide how to encode them. */
1196 if (num_indices) {
1197 unsigned max_bits = 0;
1198 for (unsigned i = 0; i < num_indices; i++) {
1199 unsigned max = util_last_bit(intrin->const_index[i]);
1200 max_bits = MAX2(max_bits, max);
1201 }
1202
1203 if (max_bits * num_indices <= 8) {
1204 header.intrinsic.const_indices_encoding = const_indices_all_combined;
1205
1206 /* Pack all const indices into 8 bits. */
1207 unsigned bit_size = 8 / num_indices;
1208 for (unsigned i = 0; i < num_indices; i++) {
1209 header.intrinsic.packed_const_indices |=
1210 intrin->const_index[i] << (i * bit_size);
1211 }
1212 } else if (max_bits <= 8)
1213 header.intrinsic.const_indices_encoding = const_indices_8bit;
1214 else if (max_bits <= 16)
1215 header.intrinsic.const_indices_encoding = const_indices_16bit;
1216 else
1217 header.intrinsic.const_indices_encoding = const_indices_32bit;
1218 }
1219
1220 if (nir_intrinsic_infos[intrin->intrinsic].has_dest)
1221 write_dest(ctx, &intrin->dest, header, intrin->instr.type);
1222 else
1223 blob_write_uint32(ctx->blob, header.u32);
1224
1225 for (unsigned i = 0; i < num_srcs; i++)
1226 write_src(ctx, &intrin->src[i]);
1227
1228 if (num_indices) {
1229 switch (header.intrinsic.const_indices_encoding) {
1230 case const_indices_8bit:
1231 for (unsigned i = 0; i < num_indices; i++)
1232 blob_write_uint8(ctx->blob, intrin->const_index[i]);
1233 break;
1234 case const_indices_16bit:
1235 for (unsigned i = 0; i < num_indices; i++)
1236 blob_write_uint16(ctx->blob, intrin->const_index[i]);
1237 break;
1238 case const_indices_32bit:
1239 for (unsigned i = 0; i < num_indices; i++)
1240 blob_write_uint32(ctx->blob, intrin->const_index[i]);
1241 break;
1242 }
1243 }
1244 }
1245
1246 static nir_intrinsic_instr *
read_intrinsic(read_ctx *ctx, union packed_instr header)1247 read_intrinsic(read_ctx *ctx, union packed_instr header)
1248 {
1249 nir_intrinsic_op op = header.intrinsic.intrinsic;
1250 nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(ctx->nir, op);
1251
1252 unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
1253 unsigned num_indices = nir_intrinsic_infos[op].num_indices;
1254
1255 if (nir_intrinsic_infos[op].has_dest)
1256 read_dest(ctx, &intrin->dest, &intrin->instr, header);
1257
1258 for (unsigned i = 0; i < num_srcs; i++)
1259 read_src(ctx, &intrin->src[i], &intrin->instr);
1260
1261 /* Vectorized instrinsics have num_components same as dst or src that has
1262 * 0 components in the info. Find it.
1263 */
1264 if (nir_intrinsic_infos[op].has_dest &&
1265 nir_intrinsic_infos[op].dest_components == 0) {
1266 intrin->num_components = nir_dest_num_components(intrin->dest);
1267 } else {
1268 for (unsigned i = 0; i < num_srcs; i++) {
1269 if (nir_intrinsic_infos[op].src_components[i] == 0) {
1270 intrin->num_components = nir_src_num_components(intrin->src[i]);
1271 break;
1272 }
1273 }
1274 }
1275
1276 if (num_indices) {
1277 switch (header.intrinsic.const_indices_encoding) {
1278 case const_indices_all_combined: {
1279 unsigned bit_size = 8 / num_indices;
1280 unsigned bit_mask = u_bit_consecutive(0, bit_size);
1281 for (unsigned i = 0; i < num_indices; i++) {
1282 intrin->const_index[i] =
1283 (header.intrinsic.packed_const_indices >> (i * bit_size)) &
1284 bit_mask;
1285 }
1286 break;
1287 }
1288 case const_indices_8bit:
1289 for (unsigned i = 0; i < num_indices; i++)
1290 intrin->const_index[i] = blob_read_uint8(ctx->blob);
1291 break;
1292 case const_indices_16bit:
1293 for (unsigned i = 0; i < num_indices; i++)
1294 intrin->const_index[i] = blob_read_uint16(ctx->blob);
1295 break;
1296 case const_indices_32bit:
1297 for (unsigned i = 0; i < num_indices; i++)
1298 intrin->const_index[i] = blob_read_uint32(ctx->blob);
1299 break;
1300 }
1301 }
1302
1303 return intrin;
1304 }
1305
1306 static void
write_load_const(write_ctx *ctx, const nir_load_const_instr *lc)1307 write_load_const(write_ctx *ctx, const nir_load_const_instr *lc)
1308 {
1309 assert(lc->def.num_components >= 1 && lc->def.num_components <= 16);
1310 union packed_instr header;
1311 header.u32 = 0;
1312
1313 header.load_const.instr_type = lc->instr.type;
1314 header.load_const.last_component = lc->def.num_components - 1;
1315 header.load_const.bit_size = encode_bit_size_3bits(lc->def.bit_size);
1316 header.load_const.packing = load_const_full;
1317
1318 /* Try to pack 1-component constants into the 19 free bits in the header. */
1319 if (lc->def.num_components == 1) {
1320 switch (lc->def.bit_size) {
1321 case 64:
1322 if ((lc->value[0].u64 & 0x1fffffffffffull) == 0) {
1323 /* packed_value contains high 19 bits, low bits are 0 */
1324 header.load_const.packing = load_const_scalar_hi_19bits;
1325 header.load_const.packed_value = lc->value[0].u64 >> 45;
1326 } else if (util_mask_sign_extend(lc->value[0].i64, 19) == lc->value[0].i64) {
1327 /* packed_value contains low 19 bits, high bits are sign-extended */
1328 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1329 header.load_const.packed_value = lc->value[0].u64;
1330 }
1331 break;
1332
1333 case 32:
1334 if ((lc->value[0].u32 & 0x1fff) == 0) {
1335 header.load_const.packing = load_const_scalar_hi_19bits;
1336 header.load_const.packed_value = lc->value[0].u32 >> 13;
1337 } else if (util_mask_sign_extend(lc->value[0].i32, 19) == lc->value[0].i32) {
1338 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1339 header.load_const.packed_value = lc->value[0].u32;
1340 }
1341 break;
1342
1343 case 16:
1344 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1345 header.load_const.packed_value = lc->value[0].u16;
1346 break;
1347 case 8:
1348 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1349 header.load_const.packed_value = lc->value[0].u8;
1350 break;
1351 case 1:
1352 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1353 header.load_const.packed_value = lc->value[0].b;
1354 break;
1355 default:
1356 unreachable("invalid bit_size");
1357 }
1358 }
1359
1360 blob_write_uint32(ctx->blob, header.u32);
1361
1362 if (header.load_const.packing == load_const_full) {
1363 switch (lc->def.bit_size) {
1364 case 64:
1365 blob_write_bytes(ctx->blob, lc->value,
1366 sizeof(*lc->value) * lc->def.num_components);
1367 break;
1368
1369 case 32:
1370 for (unsigned i = 0; i < lc->def.num_components; i++)
1371 blob_write_uint32(ctx->blob, lc->value[i].u32);
1372 break;
1373
1374 case 16:
1375 for (unsigned i = 0; i < lc->def.num_components; i++)
1376 blob_write_uint16(ctx->blob, lc->value[i].u16);
1377 break;
1378
1379 default:
1380 assert(lc->def.bit_size <= 8);
1381 for (unsigned i = 0; i < lc->def.num_components; i++)
1382 blob_write_uint8(ctx->blob, lc->value[i].u8);
1383 break;
1384 }
1385 }
1386
1387 write_add_object(ctx, &lc->def);
1388 }
1389
1390 static nir_load_const_instr *
read_load_const(read_ctx *ctx, union packed_instr header)1391 read_load_const(read_ctx *ctx, union packed_instr header)
1392 {
1393 nir_load_const_instr *lc =
1394 nir_load_const_instr_create(ctx->nir, header.load_const.last_component + 1,
1395 decode_bit_size_3bits(header.load_const.bit_size));
1396 lc->def.divergent = false;
1397
1398 switch (header.load_const.packing) {
1399 case load_const_scalar_hi_19bits:
1400 switch (lc->def.bit_size) {
1401 case 64:
1402 lc->value[0].u64 = (uint64_t)header.load_const.packed_value << 45;
1403 break;
1404 case 32:
1405 lc->value[0].u32 = (uint64_t)header.load_const.packed_value << 13;
1406 break;
1407 default:
1408 unreachable("invalid bit_size");
1409 }
1410 break;
1411
1412 case load_const_scalar_lo_19bits_sext:
1413 switch (lc->def.bit_size) {
1414 case 64:
1415 lc->value[0].i64 = ((int64_t)header.load_const.packed_value << 45) >> 45;
1416 break;
1417 case 32:
1418 lc->value[0].i32 = ((int32_t)header.load_const.packed_value << 13) >> 13;
1419 break;
1420 case 16:
1421 lc->value[0].u16 = header.load_const.packed_value;
1422 break;
1423 case 8:
1424 lc->value[0].u8 = header.load_const.packed_value;
1425 break;
1426 case 1:
1427 lc->value[0].b = header.load_const.packed_value;
1428 break;
1429 default:
1430 unreachable("invalid bit_size");
1431 }
1432 break;
1433
1434 case load_const_full:
1435 switch (lc->def.bit_size) {
1436 case 64:
1437 blob_copy_bytes(ctx->blob, lc->value, sizeof(*lc->value) * lc->def.num_components);
1438 break;
1439
1440 case 32:
1441 for (unsigned i = 0; i < lc->def.num_components; i++)
1442 lc->value[i].u32 = blob_read_uint32(ctx->blob);
1443 break;
1444
1445 case 16:
1446 for (unsigned i = 0; i < lc->def.num_components; i++)
1447 lc->value[i].u16 = blob_read_uint16(ctx->blob);
1448 break;
1449
1450 default:
1451 assert(lc->def.bit_size <= 8);
1452 for (unsigned i = 0; i < lc->def.num_components; i++)
1453 lc->value[i].u8 = blob_read_uint8(ctx->blob);
1454 break;
1455 }
1456 break;
1457 }
1458
1459 read_add_object(ctx, &lc->def);
1460 return lc;
1461 }
1462
1463 static void
write_ssa_undef(write_ctx *ctx, const nir_ssa_undef_instr *undef)1464 write_ssa_undef(write_ctx *ctx, const nir_ssa_undef_instr *undef)
1465 {
1466 assert(undef->def.num_components >= 1 && undef->def.num_components <= 16);
1467
1468 union packed_instr header;
1469 header.u32 = 0;
1470
1471 header.undef.instr_type = undef->instr.type;
1472 header.undef.last_component = undef->def.num_components - 1;
1473 header.undef.bit_size = encode_bit_size_3bits(undef->def.bit_size);
1474
1475 blob_write_uint32(ctx->blob, header.u32);
1476 write_add_object(ctx, &undef->def);
1477 }
1478
1479 static nir_ssa_undef_instr *
read_ssa_undef(read_ctx *ctx, union packed_instr header)1480 read_ssa_undef(read_ctx *ctx, union packed_instr header)
1481 {
1482 nir_ssa_undef_instr *undef =
1483 nir_ssa_undef_instr_create(ctx->nir, header.undef.last_component + 1,
1484 decode_bit_size_3bits(header.undef.bit_size));
1485
1486 undef->def.divergent = false;
1487
1488 read_add_object(ctx, &undef->def);
1489 return undef;
1490 }
1491
1492 union packed_tex_data {
1493 uint32_t u32;
1494 struct {
1495 unsigned sampler_dim:4;
1496 unsigned dest_type:8;
1497 unsigned coord_components:3;
1498 unsigned is_array:1;
1499 unsigned is_shadow:1;
1500 unsigned is_new_style_shadow:1;
1501 unsigned is_sparse:1;
1502 unsigned component:2;
1503 unsigned texture_non_uniform:1;
1504 unsigned sampler_non_uniform:1;
1505 unsigned array_is_lowered_cube:1;
1506 unsigned unused:6; /* Mark unused for valgrind. */
1507 } u;
1508 };
1509
1510 static void
write_tex(write_ctx *ctx, const nir_tex_instr *tex)1511 write_tex(write_ctx *ctx, const nir_tex_instr *tex)
1512 {
1513 assert(tex->num_srcs < 16);
1514 assert(tex->op < 32);
1515
1516 union packed_instr header;
1517 header.u32 = 0;
1518
1519 header.tex.instr_type = tex->instr.type;
1520 header.tex.num_srcs = tex->num_srcs;
1521 header.tex.op = tex->op;
1522
1523 write_dest(ctx, &tex->dest, header, tex->instr.type);
1524
1525 blob_write_uint32(ctx->blob, tex->texture_index);
1526 blob_write_uint32(ctx->blob, tex->sampler_index);
1527 if (tex->op == nir_texop_tg4)
1528 blob_write_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
1529
1530 STATIC_ASSERT(sizeof(union packed_tex_data) == sizeof(uint32_t));
1531 union packed_tex_data packed = {
1532 .u.sampler_dim = tex->sampler_dim,
1533 .u.dest_type = tex->dest_type,
1534 .u.coord_components = tex->coord_components,
1535 .u.is_array = tex->is_array,
1536 .u.is_shadow = tex->is_shadow,
1537 .u.is_new_style_shadow = tex->is_new_style_shadow,
1538 .u.is_sparse = tex->is_sparse,
1539 .u.component = tex->component,
1540 .u.texture_non_uniform = tex->texture_non_uniform,
1541 .u.sampler_non_uniform = tex->sampler_non_uniform,
1542 .u.array_is_lowered_cube = tex->array_is_lowered_cube,
1543 };
1544 blob_write_uint32(ctx->blob, packed.u32);
1545
1546 for (unsigned i = 0; i < tex->num_srcs; i++) {
1547 union packed_src src;
1548 src.u32 = 0;
1549 src.tex.src_type = tex->src[i].src_type;
1550 write_src_full(ctx, &tex->src[i].src, src);
1551 }
1552 }
1553
1554 static nir_tex_instr *
read_tex(read_ctx *ctx, union packed_instr header)1555 read_tex(read_ctx *ctx, union packed_instr header)
1556 {
1557 nir_tex_instr *tex = nir_tex_instr_create(ctx->nir, header.tex.num_srcs);
1558
1559 read_dest(ctx, &tex->dest, &tex->instr, header);
1560
1561 tex->op = header.tex.op;
1562 tex->texture_index = blob_read_uint32(ctx->blob);
1563 tex->sampler_index = blob_read_uint32(ctx->blob);
1564 if (tex->op == nir_texop_tg4)
1565 blob_copy_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
1566
1567 union packed_tex_data packed;
1568 packed.u32 = blob_read_uint32(ctx->blob);
1569 tex->sampler_dim = packed.u.sampler_dim;
1570 tex->dest_type = packed.u.dest_type;
1571 tex->coord_components = packed.u.coord_components;
1572 tex->is_array = packed.u.is_array;
1573 tex->is_shadow = packed.u.is_shadow;
1574 tex->is_new_style_shadow = packed.u.is_new_style_shadow;
1575 tex->is_sparse = packed.u.is_sparse;
1576 tex->component = packed.u.component;
1577 tex->texture_non_uniform = packed.u.texture_non_uniform;
1578 tex->sampler_non_uniform = packed.u.sampler_non_uniform;
1579 tex->array_is_lowered_cube = packed.u.array_is_lowered_cube;
1580
1581 for (unsigned i = 0; i < tex->num_srcs; i++) {
1582 union packed_src src = read_src(ctx, &tex->src[i].src, &tex->instr);
1583 tex->src[i].src_type = src.tex.src_type;
1584 }
1585
1586 return tex;
1587 }
1588
1589 static void
write_phi(write_ctx *ctx, const nir_phi_instr *phi)1590 write_phi(write_ctx *ctx, const nir_phi_instr *phi)
1591 {
1592 union packed_instr header;
1593 header.u32 = 0;
1594
1595 header.phi.instr_type = phi->instr.type;
1596 header.phi.num_srcs = exec_list_length(&phi->srcs);
1597
1598 /* Phi nodes are special, since they may reference SSA definitions and
1599 * basic blocks that don't exist yet. We leave two empty uint32_t's here,
1600 * and then store enough information so that a later fixup pass can fill
1601 * them in correctly.
1602 */
1603 write_dest(ctx, &phi->dest, header, phi->instr.type);
1604
1605 nir_foreach_phi_src(src, phi) {
1606 assert(src->src.is_ssa);
1607 size_t blob_offset = blob_reserve_uint32(ctx->blob);
1608 ASSERTED size_t blob_offset2 = blob_reserve_uint32(ctx->blob);
1609 assert(blob_offset + sizeof(uint32_t) == blob_offset2);
1610 write_phi_fixup fixup = {
1611 .blob_offset = blob_offset,
1612 .src = src->src.ssa,
1613 .block = src->pred,
1614 };
1615 util_dynarray_append(&ctx->phi_fixups, write_phi_fixup, fixup);
1616 }
1617 }
1618
1619 static void
write_fixup_phis(write_ctx *ctx)1620 write_fixup_phis(write_ctx *ctx)
1621 {
1622 util_dynarray_foreach(&ctx->phi_fixups, write_phi_fixup, fixup) {
1623 blob_overwrite_uint32(ctx->blob, fixup->blob_offset,
1624 write_lookup_object(ctx, fixup->src));
1625 blob_overwrite_uint32(ctx->blob, fixup->blob_offset + sizeof(uint32_t),
1626 write_lookup_object(ctx, fixup->block));
1627 }
1628
1629 util_dynarray_clear(&ctx->phi_fixups);
1630 }
1631
1632 static nir_phi_instr *
read_phi(read_ctx *ctx, nir_block *blk, union packed_instr header)1633 read_phi(read_ctx *ctx, nir_block *blk, union packed_instr header)
1634 {
1635 nir_phi_instr *phi = nir_phi_instr_create(ctx->nir);
1636
1637 read_dest(ctx, &phi->dest, &phi->instr, header);
1638
1639 /* For similar reasons as before, we just store the index directly into the
1640 * pointer, and let a later pass resolve the phi sources.
1641 *
1642 * In order to ensure that the copied sources (which are just the indices
1643 * from the blob for now) don't get inserted into the old shader's use-def
1644 * lists, we have to add the phi instruction *before* we set up its
1645 * sources.
1646 */
1647 nir_instr_insert_after_block(blk, &phi->instr);
1648
1649 for (unsigned i = 0; i < header.phi.num_srcs; i++) {
1650 nir_ssa_def *def = (nir_ssa_def *)(uintptr_t) blob_read_uint32(ctx->blob);
1651 nir_block *pred = (nir_block *)(uintptr_t) blob_read_uint32(ctx->blob);
1652 nir_phi_src *src = nir_phi_instr_add_src(phi, pred, nir_src_for_ssa(def));
1653
1654 /* Since we're not letting nir_insert_instr handle use/def stuff for us,
1655 * we have to set the parent_instr manually. It doesn't really matter
1656 * when we do it, so we might as well do it here.
1657 */
1658 src->src.parent_instr = &phi->instr;
1659
1660 /* Stash it in the list of phi sources. We'll walk this list and fix up
1661 * sources at the very end of read_function_impl.
1662 */
1663 list_add(&src->src.use_link, &ctx->phi_srcs);
1664 }
1665
1666 return phi;
1667 }
1668
1669 static void
read_fixup_phis(read_ctx *ctx)1670 read_fixup_phis(read_ctx *ctx)
1671 {
1672 list_for_each_entry_safe(nir_phi_src, src, &ctx->phi_srcs, src.use_link) {
1673 src->pred = read_lookup_object(ctx, (uintptr_t)src->pred);
1674 src->src.ssa = read_lookup_object(ctx, (uintptr_t)src->src.ssa);
1675
1676 /* Remove from this list */
1677 list_del(&src->src.use_link);
1678
1679 list_addtail(&src->src.use_link, &src->src.ssa->uses);
1680 }
1681 assert(list_is_empty(&ctx->phi_srcs));
1682 }
1683
1684 static void
write_jump(write_ctx *ctx, const nir_jump_instr *jmp)1685 write_jump(write_ctx *ctx, const nir_jump_instr *jmp)
1686 {
1687 /* These aren't handled because they require special block linking */
1688 assert(jmp->type != nir_jump_goto && jmp->type != nir_jump_goto_if);
1689
1690 assert(jmp->type < 4);
1691
1692 union packed_instr header;
1693 header.u32 = 0;
1694
1695 header.jump.instr_type = jmp->instr.type;
1696 header.jump.type = jmp->type;
1697
1698 blob_write_uint32(ctx->blob, header.u32);
1699 }
1700
1701 static nir_jump_instr *
read_jump(read_ctx *ctx, union packed_instr header)1702 read_jump(read_ctx *ctx, union packed_instr header)
1703 {
1704 /* These aren't handled because they require special block linking */
1705 assert(header.jump.type != nir_jump_goto &&
1706 header.jump.type != nir_jump_goto_if);
1707
1708 nir_jump_instr *jmp = nir_jump_instr_create(ctx->nir, header.jump.type);
1709 return jmp;
1710 }
1711
1712 static void
write_call(write_ctx *ctx, const nir_call_instr *call)1713 write_call(write_ctx *ctx, const nir_call_instr *call)
1714 {
1715 blob_write_uint32(ctx->blob, write_lookup_object(ctx, call->callee));
1716
1717 for (unsigned i = 0; i < call->num_params; i++)
1718 write_src(ctx, &call->params[i]);
1719 }
1720
1721 static nir_call_instr *
read_call(read_ctx *ctx)1722 read_call(read_ctx *ctx)
1723 {
1724 nir_function *callee = read_object(ctx);
1725 nir_call_instr *call = nir_call_instr_create(ctx->nir, callee);
1726
1727 for (unsigned i = 0; i < call->num_params; i++)
1728 read_src(ctx, &call->params[i], call);
1729
1730 return call;
1731 }
1732
1733 static void
write_instr(write_ctx *ctx, const nir_instr *instr)1734 write_instr(write_ctx *ctx, const nir_instr *instr)
1735 {
1736 /* We have only 4 bits for the instruction type. */
1737 assert(instr->type < 16);
1738
1739 switch (instr->type) {
1740 case nir_instr_type_alu:
1741 write_alu(ctx, nir_instr_as_alu(instr));
1742 break;
1743 case nir_instr_type_deref:
1744 write_deref(ctx, nir_instr_as_deref(instr));
1745 break;
1746 case nir_instr_type_intrinsic:
1747 write_intrinsic(ctx, nir_instr_as_intrinsic(instr));
1748 break;
1749 case nir_instr_type_load_const:
1750 write_load_const(ctx, nir_instr_as_load_const(instr));
1751 break;
1752 case nir_instr_type_ssa_undef:
1753 write_ssa_undef(ctx, nir_instr_as_ssa_undef(instr));
1754 break;
1755 case nir_instr_type_tex:
1756 write_tex(ctx, nir_instr_as_tex(instr));
1757 break;
1758 case nir_instr_type_phi:
1759 write_phi(ctx, nir_instr_as_phi(instr));
1760 break;
1761 case nir_instr_type_jump:
1762 write_jump(ctx, nir_instr_as_jump(instr));
1763 break;
1764 case nir_instr_type_call:
1765 blob_write_uint32(ctx->blob, instr->type);
1766 write_call(ctx, nir_instr_as_call(instr));
1767 break;
1768 case nir_instr_type_parallel_copy:
1769 unreachable("Cannot write parallel copies");
1770 default:
1771 unreachable("bad instr type");
1772 }
1773 }
1774
1775 /* Return the number of instructions read. */
1776 static unsigned
read_instr(read_ctx *ctx, nir_block *block)1777 read_instr(read_ctx *ctx, nir_block *block)
1778 {
1779 STATIC_ASSERT(sizeof(union packed_instr) == 4);
1780 union packed_instr header;
1781 header.u32 = blob_read_uint32(ctx->blob);
1782 nir_instr *instr;
1783
1784 switch (header.any.instr_type) {
1785 case nir_instr_type_alu:
1786 for (unsigned i = 0; i <= header.alu.num_followup_alu_sharing_header; i++)
1787 nir_instr_insert_after_block(block, &read_alu(ctx, header)->instr);
1788 return header.alu.num_followup_alu_sharing_header + 1;
1789 case nir_instr_type_deref:
1790 instr = &read_deref(ctx, header)->instr;
1791 break;
1792 case nir_instr_type_intrinsic:
1793 instr = &read_intrinsic(ctx, header)->instr;
1794 break;
1795 case nir_instr_type_load_const:
1796 instr = &read_load_const(ctx, header)->instr;
1797 break;
1798 case nir_instr_type_ssa_undef:
1799 instr = &read_ssa_undef(ctx, header)->instr;
1800 break;
1801 case nir_instr_type_tex:
1802 instr = &read_tex(ctx, header)->instr;
1803 break;
1804 case nir_instr_type_phi:
1805 /* Phi instructions are a bit of a special case when reading because we
1806 * don't want inserting the instruction to automatically handle use/defs
1807 * for us. Instead, we need to wait until all the blocks/instructions
1808 * are read so that we can set their sources up.
1809 */
1810 read_phi(ctx, block, header);
1811 return 1;
1812 case nir_instr_type_jump:
1813 instr = &read_jump(ctx, header)->instr;
1814 break;
1815 case nir_instr_type_call:
1816 instr = &read_call(ctx)->instr;
1817 break;
1818 case nir_instr_type_parallel_copy:
1819 unreachable("Cannot read parallel copies");
1820 default:
1821 unreachable("bad instr type");
1822 }
1823
1824 nir_instr_insert_after_block(block, instr);
1825 return 1;
1826 }
1827
1828 static void
write_block(write_ctx *ctx, const nir_block *block)1829 write_block(write_ctx *ctx, const nir_block *block)
1830 {
1831 write_add_object(ctx, block);
1832 blob_write_uint32(ctx->blob, exec_list_length(&block->instr_list));
1833
1834 ctx->last_instr_type = ~0;
1835 ctx->last_alu_header_offset = 0;
1836
1837 nir_foreach_instr(instr, block) {
1838 write_instr(ctx, instr);
1839 ctx->last_instr_type = instr->type;
1840 }
1841 }
1842
1843 static void
read_block(read_ctx *ctx, struct exec_list *cf_list)1844 read_block(read_ctx *ctx, struct exec_list *cf_list)
1845 {
1846 /* Don't actually create a new block. Just use the one from the tail of
1847 * the list. NIR guarantees that the tail of the list is a block and that
1848 * no two blocks are side-by-side in the IR; It should be empty.
1849 */
1850 nir_block *block =
1851 exec_node_data(nir_block, exec_list_get_tail(cf_list), cf_node.node);
1852
1853 read_add_object(ctx, block);
1854 unsigned num_instrs = blob_read_uint32(ctx->blob);
1855 for (unsigned i = 0; i < num_instrs;) {
1856 i += read_instr(ctx, block);
1857 }
1858 }
1859
1860 static void
1861 write_cf_list(write_ctx *ctx, const struct exec_list *cf_list);
1862
1863 static void
1864 read_cf_list(read_ctx *ctx, struct exec_list *cf_list);
1865
1866 static void
write_if(write_ctx *ctx, nir_if *nif)1867 write_if(write_ctx *ctx, nir_if *nif)
1868 {
1869 write_src(ctx, &nif->condition);
1870 blob_write_uint8(ctx->blob, nif->control);
1871
1872 write_cf_list(ctx, &nif->then_list);
1873 write_cf_list(ctx, &nif->else_list);
1874 }
1875
1876 static void
read_if(read_ctx *ctx, struct exec_list *cf_list)1877 read_if(read_ctx *ctx, struct exec_list *cf_list)
1878 {
1879 nir_if *nif = nir_if_create(ctx->nir);
1880
1881 read_src(ctx, &nif->condition, nif);
1882 nif->control = blob_read_uint8(ctx->blob);
1883
1884 nir_cf_node_insert_end(cf_list, &nif->cf_node);
1885
1886 read_cf_list(ctx, &nif->then_list);
1887 read_cf_list(ctx, &nif->else_list);
1888 }
1889
1890 static void
write_loop(write_ctx *ctx, nir_loop *loop)1891 write_loop(write_ctx *ctx, nir_loop *loop)
1892 {
1893 blob_write_uint8(ctx->blob, loop->control);
1894 blob_write_uint8(ctx->blob, loop->divergent);
1895 write_cf_list(ctx, &loop->body);
1896 }
1897
1898 static void
read_loop(read_ctx *ctx, struct exec_list *cf_list)1899 read_loop(read_ctx *ctx, struct exec_list *cf_list)
1900 {
1901 nir_loop *loop = nir_loop_create(ctx->nir);
1902
1903 nir_cf_node_insert_end(cf_list, &loop->cf_node);
1904
1905 loop->control = blob_read_uint8(ctx->blob);
1906 loop->divergent = blob_read_uint8(ctx->blob);
1907 read_cf_list(ctx, &loop->body);
1908 }
1909
1910 static void
write_cf_node(write_ctx *ctx, nir_cf_node *cf)1911 write_cf_node(write_ctx *ctx, nir_cf_node *cf)
1912 {
1913 blob_write_uint32(ctx->blob, cf->type);
1914
1915 switch (cf->type) {
1916 case nir_cf_node_block:
1917 write_block(ctx, nir_cf_node_as_block(cf));
1918 break;
1919 case nir_cf_node_if:
1920 write_if(ctx, nir_cf_node_as_if(cf));
1921 break;
1922 case nir_cf_node_loop:
1923 write_loop(ctx, nir_cf_node_as_loop(cf));
1924 break;
1925 default:
1926 unreachable("bad cf type");
1927 }
1928 }
1929
1930 static void
read_cf_node(read_ctx *ctx, struct exec_list *list)1931 read_cf_node(read_ctx *ctx, struct exec_list *list)
1932 {
1933 nir_cf_node_type type = blob_read_uint32(ctx->blob);
1934
1935 switch (type) {
1936 case nir_cf_node_block:
1937 read_block(ctx, list);
1938 break;
1939 case nir_cf_node_if:
1940 read_if(ctx, list);
1941 break;
1942 case nir_cf_node_loop:
1943 read_loop(ctx, list);
1944 break;
1945 default:
1946 unreachable("bad cf type");
1947 }
1948 }
1949
1950 static void
write_cf_list(write_ctx *ctx, const struct exec_list *cf_list)1951 write_cf_list(write_ctx *ctx, const struct exec_list *cf_list)
1952 {
1953 blob_write_uint32(ctx->blob, exec_list_length(cf_list));
1954 foreach_list_typed(nir_cf_node, cf, node, cf_list) {
1955 write_cf_node(ctx, cf);
1956 }
1957 }
1958
1959 static void
read_cf_list(read_ctx *ctx, struct exec_list *cf_list)1960 read_cf_list(read_ctx *ctx, struct exec_list *cf_list)
1961 {
1962 uint32_t num_cf_nodes = blob_read_uint32(ctx->blob);
1963 for (unsigned i = 0; i < num_cf_nodes; i++)
1964 read_cf_node(ctx, cf_list);
1965 }
1966
1967 static void
write_function_impl(write_ctx *ctx, const nir_function_impl *fi)1968 write_function_impl(write_ctx *ctx, const nir_function_impl *fi)
1969 {
1970 blob_write_uint8(ctx->blob, fi->structured);
1971 blob_write_uint8(ctx->blob, !!fi->preamble);
1972
1973 if (fi->preamble)
1974 blob_write_uint32(ctx->blob, write_lookup_object(ctx, fi->preamble));
1975
1976 write_var_list(ctx, &fi->locals);
1977 write_reg_list(ctx, &fi->registers);
1978 blob_write_uint32(ctx->blob, fi->reg_alloc);
1979
1980 write_cf_list(ctx, &fi->body);
1981 write_fixup_phis(ctx);
1982 }
1983
1984 static nir_function_impl *
read_function_impl(read_ctx *ctx, nir_function *fxn)1985 read_function_impl(read_ctx *ctx, nir_function *fxn)
1986 {
1987 nir_function_impl *fi = nir_function_impl_create_bare(ctx->nir);
1988 fi->function = fxn;
1989
1990 fi->structured = blob_read_uint8(ctx->blob);
1991 bool preamble = blob_read_uint8(ctx->blob);
1992
1993 if (preamble)
1994 fi->preamble = read_object(ctx);
1995
1996 read_var_list(ctx, &fi->locals);
1997 read_reg_list(ctx, &fi->registers);
1998 fi->reg_alloc = blob_read_uint32(ctx->blob);
1999
2000 read_cf_list(ctx, &fi->body);
2001 read_fixup_phis(ctx);
2002
2003 fi->valid_metadata = 0;
2004
2005 return fi;
2006 }
2007
2008 static void
write_function(write_ctx *ctx, const nir_function *fxn)2009 write_function(write_ctx *ctx, const nir_function *fxn)
2010 {
2011 uint32_t flags = 0;
2012 if (fxn->is_entrypoint)
2013 flags |= 0x1;
2014 if (fxn->is_preamble)
2015 flags |= 0x2;
2016 if (fxn->name)
2017 flags |= 0x4;
2018 if (fxn->impl)
2019 flags |= 0x8;
2020 blob_write_uint32(ctx->blob, flags);
2021 if (fxn->name)
2022 blob_write_string(ctx->blob, fxn->name);
2023
2024 write_add_object(ctx, fxn);
2025
2026 blob_write_uint32(ctx->blob, fxn->num_params);
2027 for (unsigned i = 0; i < fxn->num_params; i++) {
2028 uint32_t val =
2029 ((uint32_t)fxn->params[i].num_components) |
2030 ((uint32_t)fxn->params[i].bit_size) << 8;
2031 blob_write_uint32(ctx->blob, val);
2032 }
2033
2034 /* At first glance, it looks like we should write the function_impl here.
2035 * However, call instructions need to be able to reference at least the
2036 * function and those will get processed as we write the function_impls.
2037 * We stop here and write function_impls as a second pass.
2038 */
2039 }
2040
2041 static void
read_function(read_ctx *ctx)2042 read_function(read_ctx *ctx)
2043 {
2044 uint32_t flags = blob_read_uint32(ctx->blob);
2045 bool has_name = flags & 0x4;
2046 char *name = has_name ? blob_read_string(ctx->blob) : NULL;
2047
2048 nir_function *fxn = nir_function_create(ctx->nir, name);
2049
2050 read_add_object(ctx, fxn);
2051
2052 fxn->num_params = blob_read_uint32(ctx->blob);
2053 fxn->params = ralloc_array(fxn, nir_parameter, fxn->num_params);
2054 for (unsigned i = 0; i < fxn->num_params; i++) {
2055 uint32_t val = blob_read_uint32(ctx->blob);
2056 fxn->params[i].num_components = val & 0xff;
2057 fxn->params[i].bit_size = (val >> 8) & 0xff;
2058 }
2059
2060 fxn->is_entrypoint = flags & 0x1;
2061 fxn->is_preamble = flags & 0x2;
2062 if (flags & 0x8)
2063 fxn->impl = NIR_SERIALIZE_FUNC_HAS_IMPL;
2064 }
2065
2066 static void
write_xfb_info(write_ctx *ctx, const nir_xfb_info *xfb)2067 write_xfb_info(write_ctx *ctx, const nir_xfb_info *xfb)
2068 {
2069 if (xfb == NULL) {
2070 blob_write_uint32(ctx->blob, 0);
2071 } else {
2072 size_t size = nir_xfb_info_size(xfb->output_count);
2073 assert(size <= UINT32_MAX);
2074 blob_write_uint32(ctx->blob, size);
2075 blob_write_bytes(ctx->blob, xfb, size);
2076 }
2077 }
2078
2079 static nir_xfb_info *
read_xfb_info(read_ctx *ctx)2080 read_xfb_info(read_ctx *ctx)
2081 {
2082 uint32_t size = blob_read_uint32(ctx->blob);
2083 if (size == 0)
2084 return NULL;
2085
2086 struct nir_xfb_info *xfb = ralloc_size(ctx->nir, size);
2087 blob_copy_bytes(ctx->blob, (void *)xfb, size);
2088
2089 return xfb;
2090 }
2091
2092 /**
2093 * Serialize NIR into a binary blob.
2094 *
2095 * \param strip Don't serialize information only useful for debugging,
2096 * such as variable names, making cache hits from similar
2097 * shaders more likely.
2098 */
2099 void
nir_serialize(struct blob *blob, const nir_shader *nir, bool strip)2100 nir_serialize(struct blob *blob, const nir_shader *nir, bool strip)
2101 {
2102 write_ctx ctx = {0};
2103 ctx.remap_table = _mesa_pointer_hash_table_create(NULL);
2104 ctx.blob = blob;
2105 ctx.nir = nir;
2106 ctx.strip = strip;
2107 util_dynarray_init(&ctx.phi_fixups, NULL);
2108
2109 size_t idx_size_offset = blob_reserve_uint32(blob);
2110
2111 struct shader_info info = nir->info;
2112 uint32_t strings = 0;
2113 if (!strip && info.name)
2114 strings |= 0x1;
2115 if (!strip && info.label)
2116 strings |= 0x2;
2117 blob_write_uint32(blob, strings);
2118 if (!strip && info.name)
2119 blob_write_string(blob, info.name);
2120 if (!strip && info.label)
2121 blob_write_string(blob, info.label);
2122 info.name = info.label = NULL;
2123 blob_write_bytes(blob, (uint8_t *) &info, sizeof(info));
2124
2125 write_var_list(&ctx, &nir->variables);
2126
2127 blob_write_uint32(blob, nir->num_inputs);
2128 blob_write_uint32(blob, nir->num_uniforms);
2129 blob_write_uint32(blob, nir->num_outputs);
2130 blob_write_uint32(blob, nir->scratch_size);
2131
2132 blob_write_uint32(blob, exec_list_length(&nir->functions));
2133 nir_foreach_function(fxn, nir) {
2134 write_function(&ctx, fxn);
2135 }
2136
2137 nir_foreach_function(fxn, nir) {
2138 if (fxn->impl)
2139 write_function_impl(&ctx, fxn->impl);
2140 }
2141
2142 blob_write_uint32(blob, nir->constant_data_size);
2143 if (nir->constant_data_size > 0)
2144 blob_write_bytes(blob, nir->constant_data, nir->constant_data_size);
2145
2146 write_xfb_info(&ctx, nir->xfb_info);
2147
2148 blob_overwrite_uint32(blob, idx_size_offset, ctx.next_idx);
2149
2150 _mesa_hash_table_destroy(ctx.remap_table, NULL);
2151 util_dynarray_fini(&ctx.phi_fixups);
2152 }
2153
2154 nir_shader *
nir_deserialize(void *mem_ctx, const struct nir_shader_compiler_options *options, struct blob_reader *blob)2155 nir_deserialize(void *mem_ctx,
2156 const struct nir_shader_compiler_options *options,
2157 struct blob_reader *blob)
2158 {
2159 read_ctx ctx = {0};
2160 ctx.blob = blob;
2161 list_inithead(&ctx.phi_srcs);
2162 ctx.idx_table_len = blob_read_uint32(blob);
2163 ctx.idx_table = calloc(ctx.idx_table_len, sizeof(uintptr_t));
2164
2165 uint32_t strings = blob_read_uint32(blob);
2166 char *name = (strings & 0x1) ? blob_read_string(blob) : NULL;
2167 char *label = (strings & 0x2) ? blob_read_string(blob) : NULL;
2168
2169 struct shader_info info;
2170 blob_copy_bytes(blob, (uint8_t *) &info, sizeof(info));
2171
2172 ctx.nir = nir_shader_create(mem_ctx, info.stage, options, NULL);
2173
2174 info.name = name ? ralloc_strdup(ctx.nir, name) : NULL;
2175 info.label = label ? ralloc_strdup(ctx.nir, label) : NULL;
2176
2177 ctx.nir->info = info;
2178
2179 read_var_list(&ctx, &ctx.nir->variables);
2180
2181 ctx.nir->num_inputs = blob_read_uint32(blob);
2182 ctx.nir->num_uniforms = blob_read_uint32(blob);
2183 ctx.nir->num_outputs = blob_read_uint32(blob);
2184 ctx.nir->scratch_size = blob_read_uint32(blob);
2185
2186 unsigned num_functions = blob_read_uint32(blob);
2187 for (unsigned i = 0; i < num_functions; i++)
2188 read_function(&ctx);
2189
2190 nir_foreach_function(fxn, ctx.nir) {
2191 if (fxn->impl == NIR_SERIALIZE_FUNC_HAS_IMPL)
2192 fxn->impl = read_function_impl(&ctx, fxn);
2193 }
2194
2195 ctx.nir->constant_data_size = blob_read_uint32(blob);
2196 if (ctx.nir->constant_data_size > 0) {
2197 ctx.nir->constant_data =
2198 ralloc_size(ctx.nir, ctx.nir->constant_data_size);
2199 blob_copy_bytes(blob, ctx.nir->constant_data,
2200 ctx.nir->constant_data_size);
2201 }
2202
2203 ctx.nir->xfb_info = read_xfb_info(&ctx);
2204
2205 free(ctx.idx_table);
2206
2207 nir_validate_shader(ctx.nir, "after deserialize");
2208
2209 return ctx.nir;
2210 }
2211
2212 void
nir_shader_serialize_deserialize(nir_shader *shader)2213 nir_shader_serialize_deserialize(nir_shader *shader)
2214 {
2215 const struct nir_shader_compiler_options *options = shader->options;
2216
2217 struct blob writer;
2218 blob_init(&writer);
2219 nir_serialize(&writer, shader, false);
2220
2221 /* Delete all of dest's ralloc children but leave dest alone */
2222 void *dead_ctx = ralloc_context(NULL);
2223 ralloc_adopt(dead_ctx, shader);
2224 ralloc_free(dead_ctx);
2225
2226 dead_ctx = ralloc_context(NULL);
2227
2228 struct blob_reader reader;
2229 blob_reader_init(&reader, writer.data, writer.size);
2230 nir_shader *copy = nir_deserialize(dead_ctx, options, &reader);
2231
2232 blob_finish(&writer);
2233
2234 nir_shader_replace(shader, copy);
2235 ralloc_free(dead_ctx);
2236 }
2237