1 /*
2  * Copyright © 2017 Connor Abbott
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "nir_serialize.h"
25 #include "nir_control_flow.h"
26 #include "nir_xfb_info.h"
27 #include "util/u_dynarray.h"
28 #include "util/u_math.h"
29 
30 #define NIR_SERIALIZE_FUNC_HAS_IMPL ((void *)(intptr_t)1)
31 #define MAX_OBJECT_IDS (1 << 20)
32 
33 typedef struct {
34    size_t blob_offset;
35    nir_ssa_def *src;
36    nir_block *block;
37 } write_phi_fixup;
38 
39 typedef struct {
40    const nir_shader *nir;
41 
42    struct blob *blob;
43 
44    /* maps pointer to index */
45    struct hash_table *remap_table;
46 
47    /* the next index to assign to a NIR in-memory object */
48    uint32_t next_idx;
49 
50    /* Array of write_phi_fixup structs representing phi sources that need to
51     * be resolved in the second pass.
52     */
53    struct util_dynarray phi_fixups;
54 
55    /* The last serialized type. */
56    const struct glsl_type *last_type;
57    const struct glsl_type *last_interface_type;
58    struct nir_variable_data last_var_data;
59 
60    /* For skipping equal ALU headers (typical after scalarization). */
61    nir_instr_type last_instr_type;
62    uintptr_t last_alu_header_offset;
63    uint32_t last_alu_header;
64 
65    /* Don't write optional data such as variable names. */
66    bool strip;
67 } write_ctx;
68 
69 typedef struct {
70    nir_shader *nir;
71 
72    struct blob_reader *blob;
73 
74    /* the next index to assign to a NIR in-memory object */
75    uint32_t next_idx;
76 
77    /* The length of the index -> object table */
78    uint32_t idx_table_len;
79 
80    /* map from index to deserialized pointer */
81    void **idx_table;
82 
83    /* List of phi sources. */
84    struct list_head phi_srcs;
85 
86    /* The last deserialized type. */
87    const struct glsl_type *last_type;
88    const struct glsl_type *last_interface_type;
89    struct nir_variable_data last_var_data;
90 } read_ctx;
91 
92 static void
write_add_object(write_ctx *ctx, const void *obj)93 write_add_object(write_ctx *ctx, const void *obj)
94 {
95    uint32_t index = ctx->next_idx++;
96    assert(index != MAX_OBJECT_IDS);
97    _mesa_hash_table_insert(ctx->remap_table, obj, (void *)(uintptr_t) index);
98 }
99 
100 static uint32_t
write_lookup_object(write_ctx *ctx, const void *obj)101 write_lookup_object(write_ctx *ctx, const void *obj)
102 {
103    struct hash_entry *entry = _mesa_hash_table_search(ctx->remap_table, obj);
104    assert(entry);
105    return (uint32_t)(uintptr_t) entry->data;
106 }
107 
108 static void
read_add_object(read_ctx *ctx, void *obj)109 read_add_object(read_ctx *ctx, void *obj)
110 {
111    assert(ctx->next_idx < ctx->idx_table_len);
112    ctx->idx_table[ctx->next_idx++] = obj;
113 }
114 
115 static void *
read_lookup_object(read_ctx *ctx, uint32_t idx)116 read_lookup_object(read_ctx *ctx, uint32_t idx)
117 {
118    assert(idx < ctx->idx_table_len);
119    return ctx->idx_table[idx];
120 }
121 
122 static void *
read_object(read_ctx *ctx)123 read_object(read_ctx *ctx)
124 {
125    return read_lookup_object(ctx, blob_read_uint32(ctx->blob));
126 }
127 
128 static uint32_t
encode_bit_size_3bits(uint8_t bit_size)129 encode_bit_size_3bits(uint8_t bit_size)
130 {
131    /* Encode values of 0, 1, 2, 4, 8, 16, 32, 64 in 3 bits. */
132    assert(bit_size <= 64 && util_is_power_of_two_or_zero(bit_size));
133    if (bit_size)
134       return util_logbase2(bit_size) + 1;
135    return 0;
136 }
137 
138 static uint8_t
decode_bit_size_3bits(uint8_t bit_size)139 decode_bit_size_3bits(uint8_t bit_size)
140 {
141    if (bit_size)
142       return 1 << (bit_size - 1);
143    return 0;
144 }
145 
146 #define NUM_COMPONENTS_IS_SEPARATE_7   7
147 
148 static uint8_t
encode_num_components_in_3bits(uint8_t num_components)149 encode_num_components_in_3bits(uint8_t num_components)
150 {
151    if (num_components <= 4)
152       return num_components;
153    if (num_components == 8)
154       return 5;
155    if (num_components == 16)
156       return 6;
157 
158    /* special value indicating that num_components is in the next uint32 */
159    return NUM_COMPONENTS_IS_SEPARATE_7;
160 }
161 
162 static uint8_t
decode_num_components_in_3bits(uint8_t value)163 decode_num_components_in_3bits(uint8_t value)
164 {
165    if (value <= 4)
166       return value;
167    if (value == 5)
168       return 8;
169    if (value == 6)
170       return 16;
171 
172    unreachable("invalid num_components encoding");
173    return 0;
174 }
175 
176 static void
write_constant(write_ctx *ctx, const nir_constant *c)177 write_constant(write_ctx *ctx, const nir_constant *c)
178 {
179    blob_write_bytes(ctx->blob, c->values, sizeof(c->values));
180    blob_write_uint32(ctx->blob, c->num_elements);
181    for (unsigned i = 0; i < c->num_elements; i++)
182       write_constant(ctx, c->elements[i]);
183 }
184 
185 static nir_constant *
read_constant(read_ctx *ctx, nir_variable *nvar)186 read_constant(read_ctx *ctx, nir_variable *nvar)
187 {
188    nir_constant *c = ralloc(nvar, nir_constant);
189 
190    blob_copy_bytes(ctx->blob, (uint8_t *)c->values, sizeof(c->values));
191    c->num_elements = blob_read_uint32(ctx->blob);
192    c->elements = ralloc_array(nvar, nir_constant *, c->num_elements);
193    for (unsigned i = 0; i < c->num_elements; i++)
194       c->elements[i] = read_constant(ctx, nvar);
195 
196    return c;
197 }
198 
199 enum var_data_encoding {
200    var_encode_full,
201    var_encode_shader_temp,
202    var_encode_function_temp,
203    var_encode_location_diff,
204 };
205 
206 union packed_var {
207    uint32_t u32;
208    struct {
209       unsigned has_name:1;
210       unsigned has_constant_initializer:1;
211       unsigned has_pointer_initializer:1;
212       unsigned has_interface_type:1;
213       unsigned num_state_slots:7;
214       unsigned data_encoding:2;
215       unsigned type_same_as_last:1;
216       unsigned interface_type_same_as_last:1;
217       unsigned ray_query:1;
218       unsigned num_members:16;
219    } u;
220 };
221 
222 union packed_var_data_diff {
223    uint32_t u32;
224    struct {
225       int location:13;
226       int location_frac:3;
227       int driver_location:16;
228    } u;
229 };
230 
231 static void
write_variable(write_ctx *ctx, const nir_variable *var)232 write_variable(write_ctx *ctx, const nir_variable *var)
233 {
234    write_add_object(ctx, var);
235 
236    assert(var->num_state_slots < (1 << 7));
237 
238    STATIC_ASSERT(sizeof(union packed_var) == 4);
239    union packed_var flags;
240    flags.u32 = 0;
241 
242    flags.u.has_name = !ctx->strip && var->name;
243    flags.u.has_constant_initializer = !!(var->constant_initializer);
244    flags.u.has_pointer_initializer = !!(var->pointer_initializer);
245    flags.u.has_interface_type = !!(var->interface_type);
246    flags.u.type_same_as_last = var->type == ctx->last_type;
247    flags.u.interface_type_same_as_last =
248       var->interface_type && var->interface_type == ctx->last_interface_type;
249    flags.u.num_state_slots = var->num_state_slots;
250    flags.u.num_members = var->num_members;
251 
252    struct nir_variable_data data = var->data;
253 
254    /* When stripping, we expect that the location is no longer needed,
255     * which is typically after shaders are linked.
256     */
257    if (ctx->strip &&
258        data.mode != nir_var_system_value &&
259        data.mode != nir_var_shader_in &&
260        data.mode != nir_var_shader_out)
261       data.location = 0;
262 
263    /* Temporary variables don't serialize var->data. */
264    if (data.mode == nir_var_shader_temp)
265       flags.u.data_encoding = var_encode_shader_temp;
266    else if (data.mode == nir_var_function_temp)
267       flags.u.data_encoding = var_encode_function_temp;
268    else {
269       struct nir_variable_data tmp = data;
270 
271       tmp.location = ctx->last_var_data.location;
272       tmp.location_frac = ctx->last_var_data.location_frac;
273       tmp.driver_location = ctx->last_var_data.driver_location;
274 
275       /* See if we can encode only the difference in locations from the last
276        * variable.
277        */
278       if (memcmp(&ctx->last_var_data, &tmp, sizeof(tmp)) == 0 &&
279           abs((int)data.location -
280               (int)ctx->last_var_data.location) < (1 << 12) &&
281           abs((int)data.driver_location -
282               (int)ctx->last_var_data.driver_location) < (1 << 15))
283          flags.u.data_encoding = var_encode_location_diff;
284       else
285          flags.u.data_encoding = var_encode_full;
286    }
287 
288    flags.u.ray_query = var->data.ray_query;
289 
290    blob_write_uint32(ctx->blob, flags.u32);
291 
292    if (!flags.u.type_same_as_last) {
293       encode_type_to_blob(ctx->blob, var->type);
294       ctx->last_type = var->type;
295    }
296 
297    if (var->interface_type && !flags.u.interface_type_same_as_last) {
298       encode_type_to_blob(ctx->blob, var->interface_type);
299       ctx->last_interface_type = var->interface_type;
300    }
301 
302    if (flags.u.has_name)
303       blob_write_string(ctx->blob, var->name);
304 
305    if (flags.u.data_encoding == var_encode_full ||
306        flags.u.data_encoding == var_encode_location_diff) {
307       if (flags.u.data_encoding == var_encode_full) {
308          blob_write_bytes(ctx->blob, &data, sizeof(data));
309       } else {
310          /* Serialize only the difference in locations from the last variable.
311           */
312          union packed_var_data_diff diff;
313 
314          diff.u.location = data.location - ctx->last_var_data.location;
315          diff.u.location_frac = data.location_frac -
316                                 ctx->last_var_data.location_frac;
317          diff.u.driver_location = data.driver_location -
318                                   ctx->last_var_data.driver_location;
319 
320          blob_write_uint32(ctx->blob, diff.u32);
321       }
322 
323       ctx->last_var_data = data;
324    }
325 
326    for (unsigned i = 0; i < var->num_state_slots; i++) {
327       blob_write_bytes(ctx->blob, &var->state_slots[i],
328                        sizeof(var->state_slots[i]));
329    }
330    if (var->constant_initializer)
331       write_constant(ctx, var->constant_initializer);
332    if (var->pointer_initializer)
333       write_lookup_object(ctx, var->pointer_initializer);
334    if (var->num_members > 0) {
335       blob_write_bytes(ctx->blob, (uint8_t *) var->members,
336                        var->num_members * sizeof(*var->members));
337    }
338 }
339 
340 static nir_variable *
read_variable(read_ctx *ctx)341 read_variable(read_ctx *ctx)
342 {
343    nir_variable *var = rzalloc(ctx->nir, nir_variable);
344    read_add_object(ctx, var);
345 
346    union packed_var flags;
347    flags.u32 = blob_read_uint32(ctx->blob);
348 
349    if (flags.u.type_same_as_last) {
350       var->type = ctx->last_type;
351    } else {
352       var->type = decode_type_from_blob(ctx->blob);
353       ctx->last_type = var->type;
354    }
355 
356    if (flags.u.has_interface_type) {
357       if (flags.u.interface_type_same_as_last) {
358          var->interface_type = ctx->last_interface_type;
359       } else {
360          var->interface_type = decode_type_from_blob(ctx->blob);
361          ctx->last_interface_type = var->interface_type;
362       }
363    }
364 
365    if (flags.u.has_name) {
366       const char *name = blob_read_string(ctx->blob);
367       var->name = ralloc_strdup(var, name);
368    } else {
369       var->name = NULL;
370    }
371 
372    if (flags.u.data_encoding == var_encode_shader_temp)
373       var->data.mode = nir_var_shader_temp;
374    else if (flags.u.data_encoding == var_encode_function_temp)
375       var->data.mode = nir_var_function_temp;
376    else if (flags.u.data_encoding == var_encode_full) {
377       blob_copy_bytes(ctx->blob, (uint8_t *) &var->data, sizeof(var->data));
378       ctx->last_var_data = var->data;
379    } else { /* var_encode_location_diff */
380       union packed_var_data_diff diff;
381       diff.u32 = blob_read_uint32(ctx->blob);
382 
383       var->data = ctx->last_var_data;
384       var->data.location += diff.u.location;
385       var->data.location_frac += diff.u.location_frac;
386       var->data.driver_location += diff.u.driver_location;
387 
388       ctx->last_var_data = var->data;
389    }
390 
391    var->data.ray_query = flags.u.ray_query;
392 
393    var->num_state_slots = flags.u.num_state_slots;
394    if (var->num_state_slots != 0) {
395       var->state_slots = ralloc_array(var, nir_state_slot,
396                                       var->num_state_slots);
397       for (unsigned i = 0; i < var->num_state_slots; i++) {
398          blob_copy_bytes(ctx->blob, &var->state_slots[i],
399                          sizeof(var->state_slots[i]));
400       }
401    }
402    if (flags.u.has_constant_initializer)
403       var->constant_initializer = read_constant(ctx, var);
404    else
405       var->constant_initializer = NULL;
406 
407    if (flags.u.has_pointer_initializer)
408       var->pointer_initializer = read_object(ctx);
409    else
410       var->pointer_initializer = NULL;
411 
412    var->num_members = flags.u.num_members;
413    if (var->num_members > 0) {
414       var->members = ralloc_array(var, struct nir_variable_data,
415                                   var->num_members);
416       blob_copy_bytes(ctx->blob, (uint8_t *) var->members,
417                       var->num_members * sizeof(*var->members));
418    }
419 
420    return var;
421 }
422 
423 static void
write_var_list(write_ctx *ctx, const struct exec_list *src)424 write_var_list(write_ctx *ctx, const struct exec_list *src)
425 {
426    blob_write_uint32(ctx->blob, exec_list_length(src));
427    foreach_list_typed(nir_variable, var, node, src) {
428       write_variable(ctx, var);
429    }
430 }
431 
432 static void
read_var_list(read_ctx *ctx, struct exec_list *dst)433 read_var_list(read_ctx *ctx, struct exec_list *dst)
434 {
435    exec_list_make_empty(dst);
436    unsigned num_vars = blob_read_uint32(ctx->blob);
437    for (unsigned i = 0; i < num_vars; i++) {
438       nir_variable *var = read_variable(ctx);
439       exec_list_push_tail(dst, &var->node);
440    }
441 }
442 
443 static void
write_register(write_ctx *ctx, const nir_register *reg)444 write_register(write_ctx *ctx, const nir_register *reg)
445 {
446    write_add_object(ctx, reg);
447    blob_write_uint32(ctx->blob, reg->num_components);
448    blob_write_uint32(ctx->blob, reg->bit_size);
449    blob_write_uint32(ctx->blob, reg->num_array_elems);
450    blob_write_uint32(ctx->blob, reg->index);
451    blob_write_uint8(ctx->blob, reg->divergent);
452 }
453 
454 static nir_register *
read_register(read_ctx *ctx)455 read_register(read_ctx *ctx)
456 {
457    nir_register *reg = ralloc(ctx->nir, nir_register);
458    read_add_object(ctx, reg);
459    reg->num_components = blob_read_uint32(ctx->blob);
460    reg->bit_size = blob_read_uint32(ctx->blob);
461    reg->num_array_elems = blob_read_uint32(ctx->blob);
462    reg->index = blob_read_uint32(ctx->blob);
463    reg->divergent = blob_read_uint8(ctx->blob);
464 
465    list_inithead(&reg->uses);
466    list_inithead(&reg->defs);
467    list_inithead(&reg->if_uses);
468 
469    return reg;
470 }
471 
472 static void
write_reg_list(write_ctx *ctx, const struct exec_list *src)473 write_reg_list(write_ctx *ctx, const struct exec_list *src)
474 {
475    blob_write_uint32(ctx->blob, exec_list_length(src));
476    foreach_list_typed(nir_register, reg, node, src)
477       write_register(ctx, reg);
478 }
479 
480 static void
read_reg_list(read_ctx *ctx, struct exec_list *dst)481 read_reg_list(read_ctx *ctx, struct exec_list *dst)
482 {
483    exec_list_make_empty(dst);
484    unsigned num_regs = blob_read_uint32(ctx->blob);
485    for (unsigned i = 0; i < num_regs; i++) {
486       nir_register *reg = read_register(ctx);
487       exec_list_push_tail(dst, &reg->node);
488    }
489 }
490 
491 union packed_src {
492    uint32_t u32;
493    struct {
494       unsigned is_ssa:1;   /* <-- Header */
495       unsigned is_indirect:1;
496       unsigned object_idx:20;
497       unsigned _footer:10; /* <-- Footer */
498    } any;
499    struct {
500       unsigned _header:22; /* <-- Header */
501       unsigned negate:1;   /* <-- Footer */
502       unsigned abs:1;
503       unsigned swizzle_x:2;
504       unsigned swizzle_y:2;
505       unsigned swizzle_z:2;
506       unsigned swizzle_w:2;
507    } alu;
508    struct {
509       unsigned _header:22; /* <-- Header */
510       unsigned src_type:5; /* <-- Footer */
511       unsigned _pad:5;
512    } tex;
513 };
514 
515 static void
write_src_full(write_ctx *ctx, const nir_src *src, union packed_src header)516 write_src_full(write_ctx *ctx, const nir_src *src, union packed_src header)
517 {
518    /* Since sources are very frequent, we try to save some space when storing
519     * them. In particular, we store whether the source is a register and
520     * whether the register has an indirect index in the low two bits. We can
521     * assume that the high two bits of the index are zero, since otherwise our
522     * address space would've been exhausted allocating the remap table!
523     */
524    header.any.is_ssa = src->is_ssa;
525    if (src->is_ssa) {
526       header.any.object_idx = write_lookup_object(ctx, src->ssa);
527       blob_write_uint32(ctx->blob, header.u32);
528    } else {
529       header.any.object_idx = write_lookup_object(ctx, src->reg.reg);
530       header.any.is_indirect = !!src->reg.indirect;
531       blob_write_uint32(ctx->blob, header.u32);
532       blob_write_uint32(ctx->blob, src->reg.base_offset);
533       if (src->reg.indirect) {
534          union packed_src header = {0};
535          write_src_full(ctx, src->reg.indirect, header);
536       }
537    }
538 }
539 
540 static void
write_src(write_ctx *ctx, const nir_src *src)541 write_src(write_ctx *ctx, const nir_src *src)
542 {
543    union packed_src header = {0};
544    write_src_full(ctx, src, header);
545 }
546 
547 static union packed_src
read_src(read_ctx *ctx, nir_src *src, void *mem_ctx)548 read_src(read_ctx *ctx, nir_src *src, void *mem_ctx)
549 {
550    STATIC_ASSERT(sizeof(union packed_src) == 4);
551    union packed_src header;
552    header.u32 = blob_read_uint32(ctx->blob);
553 
554    src->is_ssa = header.any.is_ssa;
555    if (src->is_ssa) {
556       src->ssa = read_lookup_object(ctx, header.any.object_idx);
557    } else {
558       src->reg.reg = read_lookup_object(ctx, header.any.object_idx);
559       src->reg.base_offset = blob_read_uint32(ctx->blob);
560       if (header.any.is_indirect) {
561          src->reg.indirect = malloc(sizeof(nir_src));
562          read_src(ctx, src->reg.indirect, mem_ctx);
563       } else {
564          src->reg.indirect = NULL;
565       }
566    }
567    return header;
568 }
569 
570 union packed_dest {
571    uint8_t u8;
572    struct {
573       uint8_t is_ssa:1;
574       uint8_t num_components:3;
575       uint8_t bit_size:3;
576       uint8_t divergent:1;
577    } ssa;
578    struct {
579       uint8_t is_ssa:1;
580       uint8_t is_indirect:1;
581       uint8_t _pad:6;
582    } reg;
583 };
584 
585 enum intrinsic_const_indices_encoding {
586    /* Use packed_const_indices to store tightly packed indices.
587     *
588     * The common case for load_ubo is 0, 0, 0, which is trivially represented.
589     * The common cases for load_interpolated_input also fit here, e.g.: 7, 3
590     */
591    const_indices_all_combined,
592 
593    const_indices_8bit,  /* 8 bits per element */
594    const_indices_16bit, /* 16 bits per element */
595    const_indices_32bit, /* 32 bits per element */
596 };
597 
598 enum load_const_packing {
599    /* Constants are not packed and are stored in following dwords. */
600    load_const_full,
601 
602    /* packed_value contains high 19 bits, low bits are 0,
603     * good for floating-point decimals
604     */
605    load_const_scalar_hi_19bits,
606 
607    /* packed_value contains low 19 bits, high bits are sign-extended */
608    load_const_scalar_lo_19bits_sext,
609 };
610 
611 union packed_instr {
612    uint32_t u32;
613    struct {
614       unsigned instr_type:4; /* always present */
615       unsigned _pad:20;
616       unsigned dest:8;       /* always last */
617    } any;
618    struct {
619       unsigned instr_type:4;
620       unsigned exact:1;
621       unsigned no_signed_wrap:1;
622       unsigned no_unsigned_wrap:1;
623       unsigned saturate:1;
624       /* Reg: writemask; SSA: swizzles for 2 srcs */
625       unsigned writemask_or_two_swizzles:4;
626       unsigned op:9;
627       unsigned packed_src_ssa_16bit:1;
628       /* Scalarized ALUs always have the same header. */
629       unsigned num_followup_alu_sharing_header:2;
630       unsigned dest:8;
631    } alu;
632    struct {
633       unsigned instr_type:4;
634       unsigned deref_type:3;
635       unsigned cast_type_same_as_last:1;
636       unsigned modes:5; /* See (de|en)code_deref_modes() */
637       unsigned _pad:9;
638       unsigned in_bounds:1;
639       unsigned packed_src_ssa_16bit:1; /* deref_var redefines this */
640       unsigned dest:8;
641    } deref;
642    struct {
643       unsigned instr_type:4;
644       unsigned deref_type:3;
645       unsigned _pad:1;
646       unsigned object_idx:16; /* if 0, the object ID is a separate uint32 */
647       unsigned dest:8;
648    } deref_var;
649    struct {
650       unsigned instr_type:4;
651       unsigned intrinsic:10;
652       unsigned const_indices_encoding:2;
653       unsigned packed_const_indices:8;
654       unsigned dest:8;
655    } intrinsic;
656    struct {
657       unsigned instr_type:4;
658       unsigned last_component:4;
659       unsigned bit_size:3;
660       unsigned packing:2; /* enum load_const_packing */
661       unsigned packed_value:19; /* meaning determined by packing */
662    } load_const;
663    struct {
664       unsigned instr_type:4;
665       unsigned last_component:4;
666       unsigned bit_size:3;
667       unsigned _pad:21;
668    } undef;
669    struct {
670       unsigned instr_type:4;
671       unsigned num_srcs:4;
672       unsigned op:5;
673       unsigned _pad:11;
674       unsigned dest:8;
675    } tex;
676    struct {
677       unsigned instr_type:4;
678       unsigned num_srcs:20;
679       unsigned dest:8;
680    } phi;
681    struct {
682       unsigned instr_type:4;
683       unsigned type:2;
684       unsigned _pad:26;
685    } jump;
686 };
687 
688 /* Write "lo24" as low 24 bits in the first uint32. */
689 static void
write_dest(write_ctx *ctx, const nir_dest *dst, union packed_instr header, nir_instr_type instr_type)690 write_dest(write_ctx *ctx, const nir_dest *dst, union packed_instr header,
691            nir_instr_type instr_type)
692 {
693    STATIC_ASSERT(sizeof(union packed_dest) == 1);
694    union packed_dest dest;
695    dest.u8 = 0;
696 
697    dest.ssa.is_ssa = dst->is_ssa;
698    if (dst->is_ssa) {
699       dest.ssa.num_components =
700          encode_num_components_in_3bits(dst->ssa.num_components);
701       dest.ssa.bit_size = encode_bit_size_3bits(dst->ssa.bit_size);
702       dest.ssa.divergent = dst->ssa.divergent;
703    } else {
704       dest.reg.is_indirect = !!(dst->reg.indirect);
705    }
706    header.any.dest = dest.u8;
707 
708    /* Check if the current ALU instruction has the same header as the previous
709     * instruction that is also ALU. If it is, we don't have to write
710     * the current header. This is a typical occurence after scalarization.
711     */
712    if (instr_type == nir_instr_type_alu) {
713       bool equal_header = false;
714 
715       if (ctx->last_instr_type == nir_instr_type_alu) {
716          assert(ctx->last_alu_header_offset);
717          union packed_instr last_header;
718          last_header.u32 = ctx->last_alu_header;
719 
720          /* Clear the field that counts ALUs with equal headers. */
721          union packed_instr clean_header;
722          clean_header.u32 = last_header.u32;
723          clean_header.alu.num_followup_alu_sharing_header = 0;
724 
725          /* There can be at most 4 consecutive ALU instructions
726           * sharing the same header.
727           */
728          if (last_header.alu.num_followup_alu_sharing_header < 3 &&
729              header.u32 == clean_header.u32) {
730             last_header.alu.num_followup_alu_sharing_header++;
731             blob_overwrite_uint32(ctx->blob, ctx->last_alu_header_offset,
732                                   last_header.u32);
733             ctx->last_alu_header = last_header.u32;
734             equal_header = true;
735          }
736       }
737 
738       if (!equal_header) {
739          ctx->last_alu_header_offset = blob_reserve_uint32(ctx->blob);
740          blob_overwrite_uint32(ctx->blob, ctx->last_alu_header_offset, header.u32);
741          ctx->last_alu_header = header.u32;
742       }
743    } else {
744       blob_write_uint32(ctx->blob, header.u32);
745    }
746 
747    if (dest.ssa.is_ssa &&
748        dest.ssa.num_components == NUM_COMPONENTS_IS_SEPARATE_7)
749       blob_write_uint32(ctx->blob, dst->ssa.num_components);
750 
751    if (dst->is_ssa) {
752       write_add_object(ctx, &dst->ssa);
753    } else {
754       blob_write_uint32(ctx->blob, write_lookup_object(ctx, dst->reg.reg));
755       blob_write_uint32(ctx->blob, dst->reg.base_offset);
756       if (dst->reg.indirect)
757          write_src(ctx, dst->reg.indirect);
758    }
759 }
760 
761 static void
read_dest(read_ctx *ctx, nir_dest *dst, nir_instr *instr, union packed_instr header)762 read_dest(read_ctx *ctx, nir_dest *dst, nir_instr *instr,
763           union packed_instr header)
764 {
765    union packed_dest dest;
766    dest.u8 = header.any.dest;
767 
768    if (dest.ssa.is_ssa) {
769       unsigned bit_size = decode_bit_size_3bits(dest.ssa.bit_size);
770       unsigned num_components;
771       if (dest.ssa.num_components == NUM_COMPONENTS_IS_SEPARATE_7)
772          num_components = blob_read_uint32(ctx->blob);
773       else
774          num_components = decode_num_components_in_3bits(dest.ssa.num_components);
775       nir_ssa_dest_init(instr, dst, num_components, bit_size, NULL);
776       dst->ssa.divergent = dest.ssa.divergent;
777       read_add_object(ctx, &dst->ssa);
778    } else {
779       dst->reg.reg = read_object(ctx);
780       dst->reg.base_offset = blob_read_uint32(ctx->blob);
781       if (dest.reg.is_indirect) {
782          dst->reg.indirect = malloc(sizeof(nir_src));
783          read_src(ctx, dst->reg.indirect, instr);
784       }
785    }
786 }
787 
788 static bool
are_object_ids_16bit(write_ctx *ctx)789 are_object_ids_16bit(write_ctx *ctx)
790 {
791    /* Check the highest object ID, because they are monotonic. */
792    return ctx->next_idx < (1 << 16);
793 }
794 
795 static bool
is_alu_src_ssa_16bit(write_ctx *ctx, const nir_alu_instr *alu)796 is_alu_src_ssa_16bit(write_ctx *ctx, const nir_alu_instr *alu)
797 {
798    unsigned num_srcs = nir_op_infos[alu->op].num_inputs;
799 
800    for (unsigned i = 0; i < num_srcs; i++) {
801       if (!alu->src[i].src.is_ssa || alu->src[i].abs || alu->src[i].negate)
802          return false;
803 
804       unsigned src_components = nir_ssa_alu_instr_src_components(alu, i);
805 
806       for (unsigned chan = 0; chan < src_components; chan++) {
807          /* The swizzles for src0.x and src1.x are stored
808           * in writemask_or_two_swizzles for SSA ALUs.
809           */
810          if (alu->dest.dest.is_ssa && i < 2 && chan == 0 &&
811              alu->src[i].swizzle[chan] < 4)
812             continue;
813 
814          if (alu->src[i].swizzle[chan] != chan)
815             return false;
816       }
817    }
818 
819    return are_object_ids_16bit(ctx);
820 }
821 
822 static void
write_alu(write_ctx *ctx, const nir_alu_instr *alu)823 write_alu(write_ctx *ctx, const nir_alu_instr *alu)
824 {
825    unsigned num_srcs = nir_op_infos[alu->op].num_inputs;
826    unsigned dst_components = nir_dest_num_components(alu->dest.dest);
827 
828    /* 9 bits for nir_op */
829    STATIC_ASSERT(nir_num_opcodes <= 512);
830    union packed_instr header;
831    header.u32 = 0;
832 
833    header.alu.instr_type = alu->instr.type;
834    header.alu.exact = alu->exact;
835    header.alu.no_signed_wrap = alu->no_signed_wrap;
836    header.alu.no_unsigned_wrap = alu->no_unsigned_wrap;
837    header.alu.saturate = alu->dest.saturate;
838    header.alu.op = alu->op;
839    header.alu.packed_src_ssa_16bit = is_alu_src_ssa_16bit(ctx, alu);
840 
841    if (header.alu.packed_src_ssa_16bit &&
842        alu->dest.dest.is_ssa) {
843       /* For packed srcs of SSA ALUs, this field stores the swizzles. */
844       header.alu.writemask_or_two_swizzles = alu->src[0].swizzle[0];
845       if (num_srcs > 1)
846          header.alu.writemask_or_two_swizzles |= alu->src[1].swizzle[0] << 2;
847    } else if (!alu->dest.dest.is_ssa && dst_components <= 4) {
848       /* For vec4 registers, this field is a writemask. */
849       header.alu.writemask_or_two_swizzles = alu->dest.write_mask;
850    }
851 
852    write_dest(ctx, &alu->dest.dest, header, alu->instr.type);
853 
854    if (!alu->dest.dest.is_ssa && dst_components > 4)
855       blob_write_uint32(ctx->blob, alu->dest.write_mask);
856 
857    if (header.alu.packed_src_ssa_16bit) {
858       for (unsigned i = 0; i < num_srcs; i++) {
859          assert(alu->src[i].src.is_ssa);
860          unsigned idx = write_lookup_object(ctx, alu->src[i].src.ssa);
861          assert(idx < (1 << 16));
862          blob_write_uint16(ctx->blob, idx);
863       }
864    } else {
865       for (unsigned i = 0; i < num_srcs; i++) {
866          unsigned src_channels = nir_ssa_alu_instr_src_components(alu, i);
867          unsigned src_components = nir_src_num_components(alu->src[i].src);
868          union packed_src src;
869          bool packed = src_components <= 4 && src_channels <= 4;
870          src.u32 = 0;
871 
872          src.alu.negate = alu->src[i].negate;
873          src.alu.abs = alu->src[i].abs;
874 
875          if (packed) {
876             src.alu.swizzle_x = alu->src[i].swizzle[0];
877             src.alu.swizzle_y = alu->src[i].swizzle[1];
878             src.alu.swizzle_z = alu->src[i].swizzle[2];
879             src.alu.swizzle_w = alu->src[i].swizzle[3];
880          }
881 
882          write_src_full(ctx, &alu->src[i].src, src);
883 
884          /* Store swizzles for vec8 and vec16. */
885          if (!packed) {
886             for (unsigned o = 0; o < src_channels; o += 8) {
887                unsigned value = 0;
888 
889                for (unsigned j = 0; j < 8 && o + j < src_channels; j++) {
890                   value |= (uint32_t)alu->src[i].swizzle[o + j] <<
891                            (4 * j); /* 4 bits per swizzle */
892                }
893 
894                blob_write_uint32(ctx->blob, value);
895             }
896          }
897       }
898    }
899 }
900 
901 static nir_alu_instr *
read_alu(read_ctx *ctx, union packed_instr header)902 read_alu(read_ctx *ctx, union packed_instr header)
903 {
904    unsigned num_srcs = nir_op_infos[header.alu.op].num_inputs;
905    nir_alu_instr *alu = nir_alu_instr_create(ctx->nir, header.alu.op);
906 
907    alu->exact = header.alu.exact;
908    alu->no_signed_wrap = header.alu.no_signed_wrap;
909    alu->no_unsigned_wrap = header.alu.no_unsigned_wrap;
910    alu->dest.saturate = header.alu.saturate;
911 
912    read_dest(ctx, &alu->dest.dest, &alu->instr, header);
913 
914    unsigned dst_components = nir_dest_num_components(alu->dest.dest);
915 
916    if (alu->dest.dest.is_ssa) {
917       alu->dest.write_mask = u_bit_consecutive(0, dst_components);
918    } else if (dst_components <= 4) {
919       alu->dest.write_mask = header.alu.writemask_or_two_swizzles;
920    } else {
921       alu->dest.write_mask = blob_read_uint32(ctx->blob);
922    }
923 
924    if (header.alu.packed_src_ssa_16bit) {
925       for (unsigned i = 0; i < num_srcs; i++) {
926          nir_alu_src *src = &alu->src[i];
927          src->src.is_ssa = true;
928          src->src.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
929 
930          memset(&src->swizzle, 0, sizeof(src->swizzle));
931 
932          unsigned src_components = nir_ssa_alu_instr_src_components(alu, i);
933 
934          for (unsigned chan = 0; chan < src_components; chan++)
935             src->swizzle[chan] = chan;
936       }
937    } else {
938       for (unsigned i = 0; i < num_srcs; i++) {
939          union packed_src src = read_src(ctx, &alu->src[i].src, &alu->instr);
940          unsigned src_channels = nir_ssa_alu_instr_src_components(alu, i);
941          unsigned src_components = nir_src_num_components(alu->src[i].src);
942          bool packed = src_components <= 4 && src_channels <= 4;
943 
944          alu->src[i].negate = src.alu.negate;
945          alu->src[i].abs = src.alu.abs;
946 
947          memset(&alu->src[i].swizzle, 0, sizeof(alu->src[i].swizzle));
948 
949          if (packed) {
950             alu->src[i].swizzle[0] = src.alu.swizzle_x;
951             alu->src[i].swizzle[1] = src.alu.swizzle_y;
952             alu->src[i].swizzle[2] = src.alu.swizzle_z;
953             alu->src[i].swizzle[3] = src.alu.swizzle_w;
954          } else {
955             /* Load swizzles for vec8 and vec16. */
956             for (unsigned o = 0; o < src_channels; o += 8) {
957                unsigned value = blob_read_uint32(ctx->blob);
958 
959                for (unsigned j = 0; j < 8 && o + j < src_channels; j++) {
960                   alu->src[i].swizzle[o + j] =
961                      (value >> (4 * j)) & 0xf; /* 4 bits per swizzle */
962                }
963             }
964          }
965       }
966    }
967 
968    if (header.alu.packed_src_ssa_16bit &&
969        alu->dest.dest.is_ssa) {
970       alu->src[0].swizzle[0] = header.alu.writemask_or_two_swizzles & 0x3;
971       if (num_srcs > 1)
972          alu->src[1].swizzle[0] = header.alu.writemask_or_two_swizzles >> 2;
973    }
974 
975    return alu;
976 }
977 
978 #define MODE_ENC_GENERIC_BIT (1 << 4)
979 
980 static nir_variable_mode
decode_deref_modes(unsigned modes)981 decode_deref_modes(unsigned modes)
982 {
983    if (modes & MODE_ENC_GENERIC_BIT) {
984       modes &= ~MODE_ENC_GENERIC_BIT;
985       return modes << (ffs(nir_var_mem_generic) - 1);
986    } else {
987       return 1 << modes;
988    }
989 }
990 
991 static unsigned
encode_deref_modes(nir_variable_mode modes)992 encode_deref_modes(nir_variable_mode modes)
993 {
994    /* Mode sets on derefs generally come in two forms.  For certain OpenCL
995     * cases, we can have more than one of the generic modes set.  In this
996     * case, we need the full bitfield.  Fortunately, there are only 4 of
997     * these.  For all other modes, we can only have one mode at a time so we
998     * can compress them by only storing the bit position.  This, plus one bit
999     * to select encoding, lets us pack the entire bitfield in 5 bits.
1000     */
1001    STATIC_ASSERT((nir_var_all & ~nir_var_mem_generic) <
1002                  (1 << MODE_ENC_GENERIC_BIT));
1003 
1004    unsigned enc;
1005    if (modes == 0 || (modes & nir_var_mem_generic)) {
1006       assert(!(modes & ~nir_var_mem_generic));
1007       enc = modes >> (ffs(nir_var_mem_generic) - 1);
1008       assert(enc < MODE_ENC_GENERIC_BIT);
1009       enc |= MODE_ENC_GENERIC_BIT;
1010    } else {
1011       assert(util_is_power_of_two_nonzero(modes));
1012       enc = ffs(modes) - 1;
1013       assert(enc < MODE_ENC_GENERIC_BIT);
1014    }
1015    assert(modes == decode_deref_modes(enc));
1016    return enc;
1017 }
1018 
1019 static void
write_deref(write_ctx *ctx, const nir_deref_instr *deref)1020 write_deref(write_ctx *ctx, const nir_deref_instr *deref)
1021 {
1022    assert(deref->deref_type < 8);
1023 
1024    union packed_instr header;
1025    header.u32 = 0;
1026 
1027    header.deref.instr_type = deref->instr.type;
1028    header.deref.deref_type = deref->deref_type;
1029 
1030    if (deref->deref_type == nir_deref_type_cast) {
1031       header.deref.modes = encode_deref_modes(deref->modes);
1032       header.deref.cast_type_same_as_last = deref->type == ctx->last_type;
1033    }
1034 
1035    unsigned var_idx = 0;
1036    if (deref->deref_type == nir_deref_type_var) {
1037       var_idx = write_lookup_object(ctx, deref->var);
1038       if (var_idx && var_idx < (1 << 16))
1039          header.deref_var.object_idx = var_idx;
1040    }
1041 
1042    if (deref->deref_type == nir_deref_type_array ||
1043        deref->deref_type == nir_deref_type_ptr_as_array) {
1044       header.deref.packed_src_ssa_16bit =
1045          deref->parent.is_ssa && deref->arr.index.is_ssa &&
1046          are_object_ids_16bit(ctx);
1047 
1048       header.deref.in_bounds = deref->arr.in_bounds;
1049    }
1050 
1051    write_dest(ctx, &deref->dest, header, deref->instr.type);
1052 
1053    switch (deref->deref_type) {
1054    case nir_deref_type_var:
1055       if (!header.deref_var.object_idx)
1056          blob_write_uint32(ctx->blob, var_idx);
1057       break;
1058 
1059    case nir_deref_type_struct:
1060       write_src(ctx, &deref->parent);
1061       blob_write_uint32(ctx->blob, deref->strct.index);
1062       break;
1063 
1064    case nir_deref_type_array:
1065    case nir_deref_type_ptr_as_array:
1066       if (header.deref.packed_src_ssa_16bit) {
1067          blob_write_uint16(ctx->blob,
1068                            write_lookup_object(ctx, deref->parent.ssa));
1069          blob_write_uint16(ctx->blob,
1070                            write_lookup_object(ctx, deref->arr.index.ssa));
1071       } else {
1072          write_src(ctx, &deref->parent);
1073          write_src(ctx, &deref->arr.index);
1074       }
1075       break;
1076 
1077    case nir_deref_type_cast:
1078       write_src(ctx, &deref->parent);
1079       blob_write_uint32(ctx->blob, deref->cast.ptr_stride);
1080       blob_write_uint32(ctx->blob, deref->cast.align_mul);
1081       blob_write_uint32(ctx->blob, deref->cast.align_offset);
1082       if (!header.deref.cast_type_same_as_last) {
1083          encode_type_to_blob(ctx->blob, deref->type);
1084          ctx->last_type = deref->type;
1085       }
1086       break;
1087 
1088    case nir_deref_type_array_wildcard:
1089       write_src(ctx, &deref->parent);
1090       break;
1091 
1092    default:
1093       unreachable("Invalid deref type");
1094    }
1095 }
1096 
1097 static nir_deref_instr *
read_deref(read_ctx *ctx, union packed_instr header)1098 read_deref(read_ctx *ctx, union packed_instr header)
1099 {
1100    nir_deref_type deref_type = header.deref.deref_type;
1101    nir_deref_instr *deref = nir_deref_instr_create(ctx->nir, deref_type);
1102 
1103    read_dest(ctx, &deref->dest, &deref->instr, header);
1104 
1105    nir_deref_instr *parent;
1106 
1107    switch (deref->deref_type) {
1108    case nir_deref_type_var:
1109       if (header.deref_var.object_idx)
1110          deref->var = read_lookup_object(ctx, header.deref_var.object_idx);
1111       else
1112          deref->var = read_object(ctx);
1113 
1114       deref->type = deref->var->type;
1115       break;
1116 
1117    case nir_deref_type_struct:
1118       read_src(ctx, &deref->parent, &deref->instr);
1119       parent = nir_src_as_deref(deref->parent);
1120       deref->strct.index = blob_read_uint32(ctx->blob);
1121       deref->type = glsl_get_struct_field(parent->type, deref->strct.index);
1122       break;
1123 
1124    case nir_deref_type_array:
1125    case nir_deref_type_ptr_as_array:
1126       if (header.deref.packed_src_ssa_16bit) {
1127          deref->parent.is_ssa = true;
1128          deref->parent.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
1129          deref->arr.index.is_ssa = true;
1130          deref->arr.index.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
1131       } else {
1132          read_src(ctx, &deref->parent, &deref->instr);
1133          read_src(ctx, &deref->arr.index, &deref->instr);
1134       }
1135 
1136       deref->arr.in_bounds = header.deref.in_bounds;
1137 
1138       parent = nir_src_as_deref(deref->parent);
1139       if (deref->deref_type == nir_deref_type_array)
1140          deref->type = glsl_get_array_element(parent->type);
1141       else
1142          deref->type = parent->type;
1143       break;
1144 
1145    case nir_deref_type_cast:
1146       read_src(ctx, &deref->parent, &deref->instr);
1147       deref->cast.ptr_stride = blob_read_uint32(ctx->blob);
1148       deref->cast.align_mul = blob_read_uint32(ctx->blob);
1149       deref->cast.align_offset = blob_read_uint32(ctx->blob);
1150       if (header.deref.cast_type_same_as_last) {
1151          deref->type = ctx->last_type;
1152       } else {
1153          deref->type = decode_type_from_blob(ctx->blob);
1154          ctx->last_type = deref->type;
1155       }
1156       break;
1157 
1158    case nir_deref_type_array_wildcard:
1159       read_src(ctx, &deref->parent, &deref->instr);
1160       parent = nir_src_as_deref(deref->parent);
1161       deref->type = glsl_get_array_element(parent->type);
1162       break;
1163 
1164    default:
1165       unreachable("Invalid deref type");
1166    }
1167 
1168    if (deref_type == nir_deref_type_var) {
1169       deref->modes = deref->var->data.mode;
1170    } else if (deref->deref_type == nir_deref_type_cast) {
1171       deref->modes = decode_deref_modes(header.deref.modes);
1172    } else {
1173       assert(deref->parent.is_ssa);
1174       deref->modes = nir_instr_as_deref(deref->parent.ssa->parent_instr)->modes;
1175    }
1176 
1177    return deref;
1178 }
1179 
1180 static void
write_intrinsic(write_ctx *ctx, const nir_intrinsic_instr *intrin)1181 write_intrinsic(write_ctx *ctx, const nir_intrinsic_instr *intrin)
1182 {
1183    /* 10 bits for nir_intrinsic_op */
1184    STATIC_ASSERT(nir_num_intrinsics <= 1024);
1185    unsigned num_srcs = nir_intrinsic_infos[intrin->intrinsic].num_srcs;
1186    unsigned num_indices = nir_intrinsic_infos[intrin->intrinsic].num_indices;
1187    assert(intrin->intrinsic < 1024);
1188 
1189    union packed_instr header;
1190    header.u32 = 0;
1191 
1192    header.intrinsic.instr_type = intrin->instr.type;
1193    header.intrinsic.intrinsic = intrin->intrinsic;
1194 
1195    /* Analyze constant indices to decide how to encode them. */
1196    if (num_indices) {
1197       unsigned max_bits = 0;
1198       for (unsigned i = 0; i < num_indices; i++) {
1199          unsigned max = util_last_bit(intrin->const_index[i]);
1200          max_bits = MAX2(max_bits, max);
1201       }
1202 
1203       if (max_bits * num_indices <= 8) {
1204          header.intrinsic.const_indices_encoding = const_indices_all_combined;
1205 
1206          /* Pack all const indices into 8 bits. */
1207          unsigned bit_size = 8 / num_indices;
1208          for (unsigned i = 0; i < num_indices; i++) {
1209             header.intrinsic.packed_const_indices |=
1210                intrin->const_index[i] << (i * bit_size);
1211          }
1212       } else if (max_bits <= 8)
1213          header.intrinsic.const_indices_encoding = const_indices_8bit;
1214       else if (max_bits <= 16)
1215          header.intrinsic.const_indices_encoding = const_indices_16bit;
1216       else
1217          header.intrinsic.const_indices_encoding = const_indices_32bit;
1218    }
1219 
1220    if (nir_intrinsic_infos[intrin->intrinsic].has_dest)
1221       write_dest(ctx, &intrin->dest, header, intrin->instr.type);
1222    else
1223       blob_write_uint32(ctx->blob, header.u32);
1224 
1225    for (unsigned i = 0; i < num_srcs; i++)
1226       write_src(ctx, &intrin->src[i]);
1227 
1228    if (num_indices) {
1229       switch (header.intrinsic.const_indices_encoding) {
1230       case const_indices_8bit:
1231          for (unsigned i = 0; i < num_indices; i++)
1232             blob_write_uint8(ctx->blob, intrin->const_index[i]);
1233          break;
1234       case const_indices_16bit:
1235          for (unsigned i = 0; i < num_indices; i++)
1236             blob_write_uint16(ctx->blob, intrin->const_index[i]);
1237          break;
1238       case const_indices_32bit:
1239          for (unsigned i = 0; i < num_indices; i++)
1240             blob_write_uint32(ctx->blob, intrin->const_index[i]);
1241          break;
1242       }
1243    }
1244 }
1245 
1246 static nir_intrinsic_instr *
read_intrinsic(read_ctx *ctx, union packed_instr header)1247 read_intrinsic(read_ctx *ctx, union packed_instr header)
1248 {
1249    nir_intrinsic_op op = header.intrinsic.intrinsic;
1250    nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(ctx->nir, op);
1251 
1252    unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
1253    unsigned num_indices = nir_intrinsic_infos[op].num_indices;
1254 
1255    if (nir_intrinsic_infos[op].has_dest)
1256       read_dest(ctx, &intrin->dest, &intrin->instr, header);
1257 
1258    for (unsigned i = 0; i < num_srcs; i++)
1259       read_src(ctx, &intrin->src[i], &intrin->instr);
1260 
1261    /* Vectorized instrinsics have num_components same as dst or src that has
1262     * 0 components in the info. Find it.
1263     */
1264    if (nir_intrinsic_infos[op].has_dest &&
1265        nir_intrinsic_infos[op].dest_components == 0) {
1266       intrin->num_components = nir_dest_num_components(intrin->dest);
1267    } else {
1268       for (unsigned i = 0; i < num_srcs; i++) {
1269          if (nir_intrinsic_infos[op].src_components[i] == 0) {
1270             intrin->num_components = nir_src_num_components(intrin->src[i]);
1271             break;
1272          }
1273       }
1274    }
1275 
1276    if (num_indices) {
1277       switch (header.intrinsic.const_indices_encoding) {
1278       case const_indices_all_combined: {
1279          unsigned bit_size = 8 / num_indices;
1280          unsigned bit_mask = u_bit_consecutive(0, bit_size);
1281          for (unsigned i = 0; i < num_indices; i++) {
1282             intrin->const_index[i] =
1283                (header.intrinsic.packed_const_indices >> (i * bit_size)) &
1284                bit_mask;
1285          }
1286          break;
1287       }
1288       case const_indices_8bit:
1289          for (unsigned i = 0; i < num_indices; i++)
1290             intrin->const_index[i] = blob_read_uint8(ctx->blob);
1291          break;
1292       case const_indices_16bit:
1293          for (unsigned i = 0; i < num_indices; i++)
1294             intrin->const_index[i] = blob_read_uint16(ctx->blob);
1295          break;
1296       case const_indices_32bit:
1297          for (unsigned i = 0; i < num_indices; i++)
1298             intrin->const_index[i] = blob_read_uint32(ctx->blob);
1299          break;
1300       }
1301    }
1302 
1303    return intrin;
1304 }
1305 
1306 static void
write_load_const(write_ctx *ctx, const nir_load_const_instr *lc)1307 write_load_const(write_ctx *ctx, const nir_load_const_instr *lc)
1308 {
1309    assert(lc->def.num_components >= 1 && lc->def.num_components <= 16);
1310    union packed_instr header;
1311    header.u32 = 0;
1312 
1313    header.load_const.instr_type = lc->instr.type;
1314    header.load_const.last_component = lc->def.num_components - 1;
1315    header.load_const.bit_size = encode_bit_size_3bits(lc->def.bit_size);
1316    header.load_const.packing = load_const_full;
1317 
1318    /* Try to pack 1-component constants into the 19 free bits in the header. */
1319    if (lc->def.num_components == 1) {
1320       switch (lc->def.bit_size) {
1321       case 64:
1322          if ((lc->value[0].u64 & 0x1fffffffffffull) == 0) {
1323             /* packed_value contains high 19 bits, low bits are 0 */
1324             header.load_const.packing = load_const_scalar_hi_19bits;
1325             header.load_const.packed_value = lc->value[0].u64 >> 45;
1326          } else if (util_mask_sign_extend(lc->value[0].i64, 19) == lc->value[0].i64) {
1327             /* packed_value contains low 19 bits, high bits are sign-extended */
1328             header.load_const.packing = load_const_scalar_lo_19bits_sext;
1329             header.load_const.packed_value = lc->value[0].u64;
1330          }
1331          break;
1332 
1333       case 32:
1334          if ((lc->value[0].u32 & 0x1fff) == 0) {
1335             header.load_const.packing = load_const_scalar_hi_19bits;
1336             header.load_const.packed_value = lc->value[0].u32 >> 13;
1337          } else if (util_mask_sign_extend(lc->value[0].i32, 19) == lc->value[0].i32) {
1338             header.load_const.packing = load_const_scalar_lo_19bits_sext;
1339             header.load_const.packed_value = lc->value[0].u32;
1340          }
1341          break;
1342 
1343       case 16:
1344          header.load_const.packing = load_const_scalar_lo_19bits_sext;
1345          header.load_const.packed_value = lc->value[0].u16;
1346          break;
1347       case 8:
1348          header.load_const.packing = load_const_scalar_lo_19bits_sext;
1349          header.load_const.packed_value = lc->value[0].u8;
1350          break;
1351       case 1:
1352          header.load_const.packing = load_const_scalar_lo_19bits_sext;
1353          header.load_const.packed_value = lc->value[0].b;
1354          break;
1355       default:
1356          unreachable("invalid bit_size");
1357       }
1358    }
1359 
1360    blob_write_uint32(ctx->blob, header.u32);
1361 
1362    if (header.load_const.packing == load_const_full) {
1363       switch (lc->def.bit_size) {
1364       case 64:
1365          blob_write_bytes(ctx->blob, lc->value,
1366                           sizeof(*lc->value) * lc->def.num_components);
1367          break;
1368 
1369       case 32:
1370          for (unsigned i = 0; i < lc->def.num_components; i++)
1371             blob_write_uint32(ctx->blob, lc->value[i].u32);
1372          break;
1373 
1374       case 16:
1375          for (unsigned i = 0; i < lc->def.num_components; i++)
1376             blob_write_uint16(ctx->blob, lc->value[i].u16);
1377          break;
1378 
1379       default:
1380          assert(lc->def.bit_size <= 8);
1381          for (unsigned i = 0; i < lc->def.num_components; i++)
1382             blob_write_uint8(ctx->blob, lc->value[i].u8);
1383          break;
1384       }
1385    }
1386 
1387    write_add_object(ctx, &lc->def);
1388 }
1389 
1390 static nir_load_const_instr *
read_load_const(read_ctx *ctx, union packed_instr header)1391 read_load_const(read_ctx *ctx, union packed_instr header)
1392 {
1393    nir_load_const_instr *lc =
1394       nir_load_const_instr_create(ctx->nir, header.load_const.last_component + 1,
1395                                   decode_bit_size_3bits(header.load_const.bit_size));
1396    lc->def.divergent = false;
1397 
1398    switch (header.load_const.packing) {
1399    case load_const_scalar_hi_19bits:
1400       switch (lc->def.bit_size) {
1401       case 64:
1402          lc->value[0].u64 = (uint64_t)header.load_const.packed_value << 45;
1403          break;
1404       case 32:
1405          lc->value[0].u32 = (uint64_t)header.load_const.packed_value << 13;
1406          break;
1407       default:
1408          unreachable("invalid bit_size");
1409       }
1410       break;
1411 
1412    case load_const_scalar_lo_19bits_sext:
1413       switch (lc->def.bit_size) {
1414       case 64:
1415          lc->value[0].i64 = ((int64_t)header.load_const.packed_value << 45) >> 45;
1416          break;
1417       case 32:
1418          lc->value[0].i32 = ((int32_t)header.load_const.packed_value << 13) >> 13;
1419          break;
1420       case 16:
1421          lc->value[0].u16 = header.load_const.packed_value;
1422          break;
1423       case 8:
1424          lc->value[0].u8 = header.load_const.packed_value;
1425          break;
1426       case 1:
1427          lc->value[0].b = header.load_const.packed_value;
1428          break;
1429       default:
1430          unreachable("invalid bit_size");
1431       }
1432       break;
1433 
1434    case load_const_full:
1435       switch (lc->def.bit_size) {
1436       case 64:
1437          blob_copy_bytes(ctx->blob, lc->value, sizeof(*lc->value) * lc->def.num_components);
1438          break;
1439 
1440       case 32:
1441          for (unsigned i = 0; i < lc->def.num_components; i++)
1442             lc->value[i].u32 = blob_read_uint32(ctx->blob);
1443          break;
1444 
1445       case 16:
1446          for (unsigned i = 0; i < lc->def.num_components; i++)
1447             lc->value[i].u16 = blob_read_uint16(ctx->blob);
1448          break;
1449 
1450       default:
1451          assert(lc->def.bit_size <= 8);
1452          for (unsigned i = 0; i < lc->def.num_components; i++)
1453             lc->value[i].u8 = blob_read_uint8(ctx->blob);
1454          break;
1455       }
1456       break;
1457    }
1458 
1459    read_add_object(ctx, &lc->def);
1460    return lc;
1461 }
1462 
1463 static void
write_ssa_undef(write_ctx *ctx, const nir_ssa_undef_instr *undef)1464 write_ssa_undef(write_ctx *ctx, const nir_ssa_undef_instr *undef)
1465 {
1466    assert(undef->def.num_components >= 1 && undef->def.num_components <= 16);
1467 
1468    union packed_instr header;
1469    header.u32 = 0;
1470 
1471    header.undef.instr_type = undef->instr.type;
1472    header.undef.last_component = undef->def.num_components - 1;
1473    header.undef.bit_size = encode_bit_size_3bits(undef->def.bit_size);
1474 
1475    blob_write_uint32(ctx->blob, header.u32);
1476    write_add_object(ctx, &undef->def);
1477 }
1478 
1479 static nir_ssa_undef_instr *
read_ssa_undef(read_ctx *ctx, union packed_instr header)1480 read_ssa_undef(read_ctx *ctx, union packed_instr header)
1481 {
1482    nir_ssa_undef_instr *undef =
1483       nir_ssa_undef_instr_create(ctx->nir, header.undef.last_component + 1,
1484                                  decode_bit_size_3bits(header.undef.bit_size));
1485 
1486    undef->def.divergent = false;
1487 
1488    read_add_object(ctx, &undef->def);
1489    return undef;
1490 }
1491 
1492 union packed_tex_data {
1493    uint32_t u32;
1494    struct {
1495       unsigned sampler_dim:4;
1496       unsigned dest_type:8;
1497       unsigned coord_components:3;
1498       unsigned is_array:1;
1499       unsigned is_shadow:1;
1500       unsigned is_new_style_shadow:1;
1501       unsigned is_sparse:1;
1502       unsigned component:2;
1503       unsigned texture_non_uniform:1;
1504       unsigned sampler_non_uniform:1;
1505       unsigned array_is_lowered_cube:1;
1506       unsigned unused:6; /* Mark unused for valgrind. */
1507    } u;
1508 };
1509 
1510 static void
write_tex(write_ctx *ctx, const nir_tex_instr *tex)1511 write_tex(write_ctx *ctx, const nir_tex_instr *tex)
1512 {
1513    assert(tex->num_srcs < 16);
1514    assert(tex->op < 32);
1515 
1516    union packed_instr header;
1517    header.u32 = 0;
1518 
1519    header.tex.instr_type = tex->instr.type;
1520    header.tex.num_srcs = tex->num_srcs;
1521    header.tex.op = tex->op;
1522 
1523    write_dest(ctx, &tex->dest, header, tex->instr.type);
1524 
1525    blob_write_uint32(ctx->blob, tex->texture_index);
1526    blob_write_uint32(ctx->blob, tex->sampler_index);
1527    if (tex->op == nir_texop_tg4)
1528       blob_write_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
1529 
1530    STATIC_ASSERT(sizeof(union packed_tex_data) == sizeof(uint32_t));
1531    union packed_tex_data packed = {
1532       .u.sampler_dim = tex->sampler_dim,
1533       .u.dest_type = tex->dest_type,
1534       .u.coord_components = tex->coord_components,
1535       .u.is_array = tex->is_array,
1536       .u.is_shadow = tex->is_shadow,
1537       .u.is_new_style_shadow = tex->is_new_style_shadow,
1538       .u.is_sparse = tex->is_sparse,
1539       .u.component = tex->component,
1540       .u.texture_non_uniform = tex->texture_non_uniform,
1541       .u.sampler_non_uniform = tex->sampler_non_uniform,
1542       .u.array_is_lowered_cube = tex->array_is_lowered_cube,
1543    };
1544    blob_write_uint32(ctx->blob, packed.u32);
1545 
1546    for (unsigned i = 0; i < tex->num_srcs; i++) {
1547       union packed_src src;
1548       src.u32 = 0;
1549       src.tex.src_type = tex->src[i].src_type;
1550       write_src_full(ctx, &tex->src[i].src, src);
1551    }
1552 }
1553 
1554 static nir_tex_instr *
read_tex(read_ctx *ctx, union packed_instr header)1555 read_tex(read_ctx *ctx, union packed_instr header)
1556 {
1557    nir_tex_instr *tex = nir_tex_instr_create(ctx->nir, header.tex.num_srcs);
1558 
1559    read_dest(ctx, &tex->dest, &tex->instr, header);
1560 
1561    tex->op = header.tex.op;
1562    tex->texture_index = blob_read_uint32(ctx->blob);
1563    tex->sampler_index = blob_read_uint32(ctx->blob);
1564    if (tex->op == nir_texop_tg4)
1565       blob_copy_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
1566 
1567    union packed_tex_data packed;
1568    packed.u32 = blob_read_uint32(ctx->blob);
1569    tex->sampler_dim = packed.u.sampler_dim;
1570    tex->dest_type = packed.u.dest_type;
1571    tex->coord_components = packed.u.coord_components;
1572    tex->is_array = packed.u.is_array;
1573    tex->is_shadow = packed.u.is_shadow;
1574    tex->is_new_style_shadow = packed.u.is_new_style_shadow;
1575    tex->is_sparse = packed.u.is_sparse;
1576    tex->component = packed.u.component;
1577    tex->texture_non_uniform = packed.u.texture_non_uniform;
1578    tex->sampler_non_uniform = packed.u.sampler_non_uniform;
1579    tex->array_is_lowered_cube = packed.u.array_is_lowered_cube;
1580 
1581    for (unsigned i = 0; i < tex->num_srcs; i++) {
1582       union packed_src src = read_src(ctx, &tex->src[i].src, &tex->instr);
1583       tex->src[i].src_type = src.tex.src_type;
1584    }
1585 
1586    return tex;
1587 }
1588 
1589 static void
write_phi(write_ctx *ctx, const nir_phi_instr *phi)1590 write_phi(write_ctx *ctx, const nir_phi_instr *phi)
1591 {
1592    union packed_instr header;
1593    header.u32 = 0;
1594 
1595    header.phi.instr_type = phi->instr.type;
1596    header.phi.num_srcs = exec_list_length(&phi->srcs);
1597 
1598    /* Phi nodes are special, since they may reference SSA definitions and
1599     * basic blocks that don't exist yet. We leave two empty uint32_t's here,
1600     * and then store enough information so that a later fixup pass can fill
1601     * them in correctly.
1602     */
1603    write_dest(ctx, &phi->dest, header, phi->instr.type);
1604 
1605    nir_foreach_phi_src(src, phi) {
1606       assert(src->src.is_ssa);
1607       size_t blob_offset = blob_reserve_uint32(ctx->blob);
1608       ASSERTED size_t blob_offset2 = blob_reserve_uint32(ctx->blob);
1609       assert(blob_offset + sizeof(uint32_t) == blob_offset2);
1610       write_phi_fixup fixup = {
1611          .blob_offset = blob_offset,
1612          .src = src->src.ssa,
1613          .block = src->pred,
1614       };
1615       util_dynarray_append(&ctx->phi_fixups, write_phi_fixup, fixup);
1616    }
1617 }
1618 
1619 static void
write_fixup_phis(write_ctx *ctx)1620 write_fixup_phis(write_ctx *ctx)
1621 {
1622    util_dynarray_foreach(&ctx->phi_fixups, write_phi_fixup, fixup) {
1623       blob_overwrite_uint32(ctx->blob, fixup->blob_offset,
1624                             write_lookup_object(ctx, fixup->src));
1625       blob_overwrite_uint32(ctx->blob, fixup->blob_offset + sizeof(uint32_t),
1626                             write_lookup_object(ctx, fixup->block));
1627    }
1628 
1629    util_dynarray_clear(&ctx->phi_fixups);
1630 }
1631 
1632 static nir_phi_instr *
read_phi(read_ctx *ctx, nir_block *blk, union packed_instr header)1633 read_phi(read_ctx *ctx, nir_block *blk, union packed_instr header)
1634 {
1635    nir_phi_instr *phi = nir_phi_instr_create(ctx->nir);
1636 
1637    read_dest(ctx, &phi->dest, &phi->instr, header);
1638 
1639    /* For similar reasons as before, we just store the index directly into the
1640     * pointer, and let a later pass resolve the phi sources.
1641     *
1642     * In order to ensure that the copied sources (which are just the indices
1643     * from the blob for now) don't get inserted into the old shader's use-def
1644     * lists, we have to add the phi instruction *before* we set up its
1645     * sources.
1646     */
1647    nir_instr_insert_after_block(blk, &phi->instr);
1648 
1649    for (unsigned i = 0; i < header.phi.num_srcs; i++) {
1650       nir_ssa_def *def = (nir_ssa_def *)(uintptr_t) blob_read_uint32(ctx->blob);
1651       nir_block *pred = (nir_block *)(uintptr_t) blob_read_uint32(ctx->blob);
1652       nir_phi_src *src = nir_phi_instr_add_src(phi, pred, nir_src_for_ssa(def));
1653 
1654       /* Since we're not letting nir_insert_instr handle use/def stuff for us,
1655        * we have to set the parent_instr manually.  It doesn't really matter
1656        * when we do it, so we might as well do it here.
1657        */
1658       src->src.parent_instr = &phi->instr;
1659 
1660       /* Stash it in the list of phi sources.  We'll walk this list and fix up
1661        * sources at the very end of read_function_impl.
1662        */
1663       list_add(&src->src.use_link, &ctx->phi_srcs);
1664    }
1665 
1666    return phi;
1667 }
1668 
1669 static void
read_fixup_phis(read_ctx *ctx)1670 read_fixup_phis(read_ctx *ctx)
1671 {
1672    list_for_each_entry_safe(nir_phi_src, src, &ctx->phi_srcs, src.use_link) {
1673       src->pred = read_lookup_object(ctx, (uintptr_t)src->pred);
1674       src->src.ssa = read_lookup_object(ctx, (uintptr_t)src->src.ssa);
1675 
1676       /* Remove from this list */
1677       list_del(&src->src.use_link);
1678 
1679       list_addtail(&src->src.use_link, &src->src.ssa->uses);
1680    }
1681    assert(list_is_empty(&ctx->phi_srcs));
1682 }
1683 
1684 static void
write_jump(write_ctx *ctx, const nir_jump_instr *jmp)1685 write_jump(write_ctx *ctx, const nir_jump_instr *jmp)
1686 {
1687    /* These aren't handled because they require special block linking */
1688    assert(jmp->type != nir_jump_goto && jmp->type != nir_jump_goto_if);
1689 
1690    assert(jmp->type < 4);
1691 
1692    union packed_instr header;
1693    header.u32 = 0;
1694 
1695    header.jump.instr_type = jmp->instr.type;
1696    header.jump.type = jmp->type;
1697 
1698    blob_write_uint32(ctx->blob, header.u32);
1699 }
1700 
1701 static nir_jump_instr *
read_jump(read_ctx *ctx, union packed_instr header)1702 read_jump(read_ctx *ctx, union packed_instr header)
1703 {
1704    /* These aren't handled because they require special block linking */
1705    assert(header.jump.type != nir_jump_goto &&
1706           header.jump.type != nir_jump_goto_if);
1707 
1708    nir_jump_instr *jmp = nir_jump_instr_create(ctx->nir, header.jump.type);
1709    return jmp;
1710 }
1711 
1712 static void
write_call(write_ctx *ctx, const nir_call_instr *call)1713 write_call(write_ctx *ctx, const nir_call_instr *call)
1714 {
1715    blob_write_uint32(ctx->blob, write_lookup_object(ctx, call->callee));
1716 
1717    for (unsigned i = 0; i < call->num_params; i++)
1718       write_src(ctx, &call->params[i]);
1719 }
1720 
1721 static nir_call_instr *
read_call(read_ctx *ctx)1722 read_call(read_ctx *ctx)
1723 {
1724    nir_function *callee = read_object(ctx);
1725    nir_call_instr *call = nir_call_instr_create(ctx->nir, callee);
1726 
1727    for (unsigned i = 0; i < call->num_params; i++)
1728       read_src(ctx, &call->params[i], call);
1729 
1730    return call;
1731 }
1732 
1733 static void
write_instr(write_ctx *ctx, const nir_instr *instr)1734 write_instr(write_ctx *ctx, const nir_instr *instr)
1735 {
1736    /* We have only 4 bits for the instruction type. */
1737    assert(instr->type < 16);
1738 
1739    switch (instr->type) {
1740    case nir_instr_type_alu:
1741       write_alu(ctx, nir_instr_as_alu(instr));
1742       break;
1743    case nir_instr_type_deref:
1744       write_deref(ctx, nir_instr_as_deref(instr));
1745       break;
1746    case nir_instr_type_intrinsic:
1747       write_intrinsic(ctx, nir_instr_as_intrinsic(instr));
1748       break;
1749    case nir_instr_type_load_const:
1750       write_load_const(ctx, nir_instr_as_load_const(instr));
1751       break;
1752    case nir_instr_type_ssa_undef:
1753       write_ssa_undef(ctx, nir_instr_as_ssa_undef(instr));
1754       break;
1755    case nir_instr_type_tex:
1756       write_tex(ctx, nir_instr_as_tex(instr));
1757       break;
1758    case nir_instr_type_phi:
1759       write_phi(ctx, nir_instr_as_phi(instr));
1760       break;
1761    case nir_instr_type_jump:
1762       write_jump(ctx, nir_instr_as_jump(instr));
1763       break;
1764    case nir_instr_type_call:
1765       blob_write_uint32(ctx->blob, instr->type);
1766       write_call(ctx, nir_instr_as_call(instr));
1767       break;
1768    case nir_instr_type_parallel_copy:
1769       unreachable("Cannot write parallel copies");
1770    default:
1771       unreachable("bad instr type");
1772    }
1773 }
1774 
1775 /* Return the number of instructions read. */
1776 static unsigned
read_instr(read_ctx *ctx, nir_block *block)1777 read_instr(read_ctx *ctx, nir_block *block)
1778 {
1779    STATIC_ASSERT(sizeof(union packed_instr) == 4);
1780    union packed_instr header;
1781    header.u32 = blob_read_uint32(ctx->blob);
1782    nir_instr *instr;
1783 
1784    switch (header.any.instr_type) {
1785    case nir_instr_type_alu:
1786       for (unsigned i = 0; i <= header.alu.num_followup_alu_sharing_header; i++)
1787          nir_instr_insert_after_block(block, &read_alu(ctx, header)->instr);
1788       return header.alu.num_followup_alu_sharing_header + 1;
1789    case nir_instr_type_deref:
1790       instr = &read_deref(ctx, header)->instr;
1791       break;
1792    case nir_instr_type_intrinsic:
1793       instr = &read_intrinsic(ctx, header)->instr;
1794       break;
1795    case nir_instr_type_load_const:
1796       instr = &read_load_const(ctx, header)->instr;
1797       break;
1798    case nir_instr_type_ssa_undef:
1799       instr = &read_ssa_undef(ctx, header)->instr;
1800       break;
1801    case nir_instr_type_tex:
1802       instr = &read_tex(ctx, header)->instr;
1803       break;
1804    case nir_instr_type_phi:
1805       /* Phi instructions are a bit of a special case when reading because we
1806        * don't want inserting the instruction to automatically handle use/defs
1807        * for us.  Instead, we need to wait until all the blocks/instructions
1808        * are read so that we can set their sources up.
1809        */
1810       read_phi(ctx, block, header);
1811       return 1;
1812    case nir_instr_type_jump:
1813       instr = &read_jump(ctx, header)->instr;
1814       break;
1815    case nir_instr_type_call:
1816       instr = &read_call(ctx)->instr;
1817       break;
1818    case nir_instr_type_parallel_copy:
1819       unreachable("Cannot read parallel copies");
1820    default:
1821       unreachable("bad instr type");
1822    }
1823 
1824    nir_instr_insert_after_block(block, instr);
1825    return 1;
1826 }
1827 
1828 static void
write_block(write_ctx *ctx, const nir_block *block)1829 write_block(write_ctx *ctx, const nir_block *block)
1830 {
1831    write_add_object(ctx, block);
1832    blob_write_uint32(ctx->blob, exec_list_length(&block->instr_list));
1833 
1834    ctx->last_instr_type = ~0;
1835    ctx->last_alu_header_offset = 0;
1836 
1837    nir_foreach_instr(instr, block) {
1838       write_instr(ctx, instr);
1839       ctx->last_instr_type = instr->type;
1840    }
1841 }
1842 
1843 static void
read_block(read_ctx *ctx, struct exec_list *cf_list)1844 read_block(read_ctx *ctx, struct exec_list *cf_list)
1845 {
1846    /* Don't actually create a new block.  Just use the one from the tail of
1847     * the list.  NIR guarantees that the tail of the list is a block and that
1848     * no two blocks are side-by-side in the IR;  It should be empty.
1849     */
1850    nir_block *block =
1851       exec_node_data(nir_block, exec_list_get_tail(cf_list), cf_node.node);
1852 
1853    read_add_object(ctx, block);
1854    unsigned num_instrs = blob_read_uint32(ctx->blob);
1855    for (unsigned i = 0; i < num_instrs;) {
1856       i += read_instr(ctx, block);
1857    }
1858 }
1859 
1860 static void
1861 write_cf_list(write_ctx *ctx, const struct exec_list *cf_list);
1862 
1863 static void
1864 read_cf_list(read_ctx *ctx, struct exec_list *cf_list);
1865 
1866 static void
write_if(write_ctx *ctx, nir_if *nif)1867 write_if(write_ctx *ctx, nir_if *nif)
1868 {
1869    write_src(ctx, &nif->condition);
1870    blob_write_uint8(ctx->blob, nif->control);
1871 
1872    write_cf_list(ctx, &nif->then_list);
1873    write_cf_list(ctx, &nif->else_list);
1874 }
1875 
1876 static void
read_if(read_ctx *ctx, struct exec_list *cf_list)1877 read_if(read_ctx *ctx, struct exec_list *cf_list)
1878 {
1879    nir_if *nif = nir_if_create(ctx->nir);
1880 
1881    read_src(ctx, &nif->condition, nif);
1882    nif->control = blob_read_uint8(ctx->blob);
1883 
1884    nir_cf_node_insert_end(cf_list, &nif->cf_node);
1885 
1886    read_cf_list(ctx, &nif->then_list);
1887    read_cf_list(ctx, &nif->else_list);
1888 }
1889 
1890 static void
write_loop(write_ctx *ctx, nir_loop *loop)1891 write_loop(write_ctx *ctx, nir_loop *loop)
1892 {
1893    blob_write_uint8(ctx->blob, loop->control);
1894    blob_write_uint8(ctx->blob, loop->divergent);
1895    write_cf_list(ctx, &loop->body);
1896 }
1897 
1898 static void
read_loop(read_ctx *ctx, struct exec_list *cf_list)1899 read_loop(read_ctx *ctx, struct exec_list *cf_list)
1900 {
1901    nir_loop *loop = nir_loop_create(ctx->nir);
1902 
1903    nir_cf_node_insert_end(cf_list, &loop->cf_node);
1904 
1905    loop->control = blob_read_uint8(ctx->blob);
1906    loop->divergent = blob_read_uint8(ctx->blob);
1907    read_cf_list(ctx, &loop->body);
1908 }
1909 
1910 static void
write_cf_node(write_ctx *ctx, nir_cf_node *cf)1911 write_cf_node(write_ctx *ctx, nir_cf_node *cf)
1912 {
1913    blob_write_uint32(ctx->blob, cf->type);
1914 
1915    switch (cf->type) {
1916    case nir_cf_node_block:
1917       write_block(ctx, nir_cf_node_as_block(cf));
1918       break;
1919    case nir_cf_node_if:
1920       write_if(ctx, nir_cf_node_as_if(cf));
1921       break;
1922    case nir_cf_node_loop:
1923       write_loop(ctx, nir_cf_node_as_loop(cf));
1924       break;
1925    default:
1926       unreachable("bad cf type");
1927    }
1928 }
1929 
1930 static void
read_cf_node(read_ctx *ctx, struct exec_list *list)1931 read_cf_node(read_ctx *ctx, struct exec_list *list)
1932 {
1933    nir_cf_node_type type = blob_read_uint32(ctx->blob);
1934 
1935    switch (type) {
1936    case nir_cf_node_block:
1937       read_block(ctx, list);
1938       break;
1939    case nir_cf_node_if:
1940       read_if(ctx, list);
1941       break;
1942    case nir_cf_node_loop:
1943       read_loop(ctx, list);
1944       break;
1945    default:
1946       unreachable("bad cf type");
1947    }
1948 }
1949 
1950 static void
write_cf_list(write_ctx *ctx, const struct exec_list *cf_list)1951 write_cf_list(write_ctx *ctx, const struct exec_list *cf_list)
1952 {
1953    blob_write_uint32(ctx->blob, exec_list_length(cf_list));
1954    foreach_list_typed(nir_cf_node, cf, node, cf_list) {
1955       write_cf_node(ctx, cf);
1956    }
1957 }
1958 
1959 static void
read_cf_list(read_ctx *ctx, struct exec_list *cf_list)1960 read_cf_list(read_ctx *ctx, struct exec_list *cf_list)
1961 {
1962    uint32_t num_cf_nodes = blob_read_uint32(ctx->blob);
1963    for (unsigned i = 0; i < num_cf_nodes; i++)
1964       read_cf_node(ctx, cf_list);
1965 }
1966 
1967 static void
write_function_impl(write_ctx *ctx, const nir_function_impl *fi)1968 write_function_impl(write_ctx *ctx, const nir_function_impl *fi)
1969 {
1970    blob_write_uint8(ctx->blob, fi->structured);
1971    blob_write_uint8(ctx->blob, !!fi->preamble);
1972 
1973    if (fi->preamble)
1974       blob_write_uint32(ctx->blob, write_lookup_object(ctx, fi->preamble));
1975 
1976    write_var_list(ctx, &fi->locals);
1977    write_reg_list(ctx, &fi->registers);
1978    blob_write_uint32(ctx->blob, fi->reg_alloc);
1979 
1980    write_cf_list(ctx, &fi->body);
1981    write_fixup_phis(ctx);
1982 }
1983 
1984 static nir_function_impl *
read_function_impl(read_ctx *ctx, nir_function *fxn)1985 read_function_impl(read_ctx *ctx, nir_function *fxn)
1986 {
1987    nir_function_impl *fi = nir_function_impl_create_bare(ctx->nir);
1988    fi->function = fxn;
1989 
1990    fi->structured = blob_read_uint8(ctx->blob);
1991    bool preamble = blob_read_uint8(ctx->blob);
1992 
1993    if (preamble)
1994       fi->preamble = read_object(ctx);
1995 
1996    read_var_list(ctx, &fi->locals);
1997    read_reg_list(ctx, &fi->registers);
1998    fi->reg_alloc = blob_read_uint32(ctx->blob);
1999 
2000    read_cf_list(ctx, &fi->body);
2001    read_fixup_phis(ctx);
2002 
2003    fi->valid_metadata = 0;
2004 
2005    return fi;
2006 }
2007 
2008 static void
write_function(write_ctx *ctx, const nir_function *fxn)2009 write_function(write_ctx *ctx, const nir_function *fxn)
2010 {
2011    uint32_t flags = 0;
2012    if (fxn->is_entrypoint)
2013       flags |= 0x1;
2014    if (fxn->is_preamble)
2015       flags |= 0x2;
2016    if (fxn->name)
2017       flags |= 0x4;
2018    if (fxn->impl)
2019       flags |= 0x8;
2020    blob_write_uint32(ctx->blob, flags);
2021    if (fxn->name)
2022       blob_write_string(ctx->blob, fxn->name);
2023 
2024    write_add_object(ctx, fxn);
2025 
2026    blob_write_uint32(ctx->blob, fxn->num_params);
2027    for (unsigned i = 0; i < fxn->num_params; i++) {
2028       uint32_t val =
2029          ((uint32_t)fxn->params[i].num_components) |
2030          ((uint32_t)fxn->params[i].bit_size) << 8;
2031       blob_write_uint32(ctx->blob, val);
2032    }
2033 
2034    /* At first glance, it looks like we should write the function_impl here.
2035     * However, call instructions need to be able to reference at least the
2036     * function and those will get processed as we write the function_impls.
2037     * We stop here and write function_impls as a second pass.
2038     */
2039 }
2040 
2041 static void
read_function(read_ctx *ctx)2042 read_function(read_ctx *ctx)
2043 {
2044    uint32_t flags = blob_read_uint32(ctx->blob);
2045    bool has_name = flags & 0x4;
2046    char *name = has_name ? blob_read_string(ctx->blob) : NULL;
2047 
2048    nir_function *fxn = nir_function_create(ctx->nir, name);
2049 
2050    read_add_object(ctx, fxn);
2051 
2052    fxn->num_params = blob_read_uint32(ctx->blob);
2053    fxn->params = ralloc_array(fxn, nir_parameter, fxn->num_params);
2054    for (unsigned i = 0; i < fxn->num_params; i++) {
2055       uint32_t val = blob_read_uint32(ctx->blob);
2056       fxn->params[i].num_components = val & 0xff;
2057       fxn->params[i].bit_size = (val >> 8) & 0xff;
2058    }
2059 
2060    fxn->is_entrypoint = flags & 0x1;
2061    fxn->is_preamble = flags & 0x2;
2062    if (flags & 0x8)
2063       fxn->impl = NIR_SERIALIZE_FUNC_HAS_IMPL;
2064 }
2065 
2066 static void
write_xfb_info(write_ctx *ctx, const nir_xfb_info *xfb)2067 write_xfb_info(write_ctx *ctx, const nir_xfb_info *xfb)
2068 {
2069    if (xfb == NULL) {
2070       blob_write_uint32(ctx->blob, 0);
2071    } else {
2072       size_t size = nir_xfb_info_size(xfb->output_count);
2073       assert(size <= UINT32_MAX);
2074       blob_write_uint32(ctx->blob, size);
2075       blob_write_bytes(ctx->blob, xfb, size);
2076    }
2077 }
2078 
2079 static nir_xfb_info *
read_xfb_info(read_ctx *ctx)2080 read_xfb_info(read_ctx *ctx)
2081 {
2082    uint32_t size = blob_read_uint32(ctx->blob);
2083    if (size == 0)
2084       return NULL;
2085 
2086    struct nir_xfb_info *xfb = ralloc_size(ctx->nir, size);
2087    blob_copy_bytes(ctx->blob, (void *)xfb, size);
2088 
2089    return xfb;
2090 }
2091 
2092 /**
2093  * Serialize NIR into a binary blob.
2094  *
2095  * \param strip  Don't serialize information only useful for debugging,
2096  *               such as variable names, making cache hits from similar
2097  *               shaders more likely.
2098  */
2099 void
nir_serialize(struct blob *blob, const nir_shader *nir, bool strip)2100 nir_serialize(struct blob *blob, const nir_shader *nir, bool strip)
2101 {
2102    write_ctx ctx = {0};
2103    ctx.remap_table = _mesa_pointer_hash_table_create(NULL);
2104    ctx.blob = blob;
2105    ctx.nir = nir;
2106    ctx.strip = strip;
2107    util_dynarray_init(&ctx.phi_fixups, NULL);
2108 
2109    size_t idx_size_offset = blob_reserve_uint32(blob);
2110 
2111    struct shader_info info = nir->info;
2112    uint32_t strings = 0;
2113    if (!strip && info.name)
2114       strings |= 0x1;
2115    if (!strip && info.label)
2116       strings |= 0x2;
2117    blob_write_uint32(blob, strings);
2118    if (!strip && info.name)
2119       blob_write_string(blob, info.name);
2120    if (!strip && info.label)
2121       blob_write_string(blob, info.label);
2122    info.name = info.label = NULL;
2123    blob_write_bytes(blob, (uint8_t *) &info, sizeof(info));
2124 
2125    write_var_list(&ctx, &nir->variables);
2126 
2127    blob_write_uint32(blob, nir->num_inputs);
2128    blob_write_uint32(blob, nir->num_uniforms);
2129    blob_write_uint32(blob, nir->num_outputs);
2130    blob_write_uint32(blob, nir->scratch_size);
2131 
2132    blob_write_uint32(blob, exec_list_length(&nir->functions));
2133    nir_foreach_function(fxn, nir) {
2134       write_function(&ctx, fxn);
2135    }
2136 
2137    nir_foreach_function(fxn, nir) {
2138       if (fxn->impl)
2139          write_function_impl(&ctx, fxn->impl);
2140    }
2141 
2142    blob_write_uint32(blob, nir->constant_data_size);
2143    if (nir->constant_data_size > 0)
2144       blob_write_bytes(blob, nir->constant_data, nir->constant_data_size);
2145 
2146    write_xfb_info(&ctx, nir->xfb_info);
2147 
2148    blob_overwrite_uint32(blob, idx_size_offset, ctx.next_idx);
2149 
2150    _mesa_hash_table_destroy(ctx.remap_table, NULL);
2151    util_dynarray_fini(&ctx.phi_fixups);
2152 }
2153 
2154 nir_shader *
nir_deserialize(void *mem_ctx, const struct nir_shader_compiler_options *options, struct blob_reader *blob)2155 nir_deserialize(void *mem_ctx,
2156                 const struct nir_shader_compiler_options *options,
2157                 struct blob_reader *blob)
2158 {
2159    read_ctx ctx = {0};
2160    ctx.blob = blob;
2161    list_inithead(&ctx.phi_srcs);
2162    ctx.idx_table_len = blob_read_uint32(blob);
2163    ctx.idx_table = calloc(ctx.idx_table_len, sizeof(uintptr_t));
2164 
2165    uint32_t strings = blob_read_uint32(blob);
2166    char *name = (strings & 0x1) ? blob_read_string(blob) : NULL;
2167    char *label = (strings & 0x2) ? blob_read_string(blob) : NULL;
2168 
2169    struct shader_info info;
2170    blob_copy_bytes(blob, (uint8_t *) &info, sizeof(info));
2171 
2172    ctx.nir = nir_shader_create(mem_ctx, info.stage, options, NULL);
2173 
2174    info.name = name ? ralloc_strdup(ctx.nir, name) : NULL;
2175    info.label = label ? ralloc_strdup(ctx.nir, label) : NULL;
2176 
2177    ctx.nir->info = info;
2178 
2179    read_var_list(&ctx, &ctx.nir->variables);
2180 
2181    ctx.nir->num_inputs = blob_read_uint32(blob);
2182    ctx.nir->num_uniforms = blob_read_uint32(blob);
2183    ctx.nir->num_outputs = blob_read_uint32(blob);
2184    ctx.nir->scratch_size = blob_read_uint32(blob);
2185 
2186    unsigned num_functions = blob_read_uint32(blob);
2187    for (unsigned i = 0; i < num_functions; i++)
2188       read_function(&ctx);
2189 
2190    nir_foreach_function(fxn, ctx.nir) {
2191       if (fxn->impl == NIR_SERIALIZE_FUNC_HAS_IMPL)
2192          fxn->impl = read_function_impl(&ctx, fxn);
2193    }
2194 
2195    ctx.nir->constant_data_size = blob_read_uint32(blob);
2196    if (ctx.nir->constant_data_size > 0) {
2197       ctx.nir->constant_data =
2198          ralloc_size(ctx.nir, ctx.nir->constant_data_size);
2199       blob_copy_bytes(blob, ctx.nir->constant_data,
2200                       ctx.nir->constant_data_size);
2201    }
2202 
2203    ctx.nir->xfb_info = read_xfb_info(&ctx);
2204 
2205    free(ctx.idx_table);
2206 
2207    nir_validate_shader(ctx.nir, "after deserialize");
2208 
2209    return ctx.nir;
2210 }
2211 
2212 void
nir_shader_serialize_deserialize(nir_shader *shader)2213 nir_shader_serialize_deserialize(nir_shader *shader)
2214 {
2215    const struct nir_shader_compiler_options *options = shader->options;
2216 
2217    struct blob writer;
2218    blob_init(&writer);
2219    nir_serialize(&writer, shader, false);
2220 
2221    /* Delete all of dest's ralloc children but leave dest alone */
2222    void *dead_ctx = ralloc_context(NULL);
2223    ralloc_adopt(dead_ctx, shader);
2224    ralloc_free(dead_ctx);
2225 
2226    dead_ctx = ralloc_context(NULL);
2227 
2228    struct blob_reader reader;
2229    blob_reader_init(&reader, writer.data, writer.size);
2230    nir_shader *copy = nir_deserialize(dead_ctx, options, &reader);
2231 
2232    blob_finish(&writer);
2233 
2234    nir_shader_replace(shader, copy);
2235    ralloc_free(dead_ctx);
2236 }
2237