1/*
2 * Copyright © 2017 Connor Abbott
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "nir_serialize.h"
25#include "nir_control_flow.h"
26#include "nir_xfb_info.h"
27#include "util/u_dynarray.h"
28#include "util/u_math.h"
29
30#define NIR_SERIALIZE_FUNC_HAS_IMPL ((void *)(intptr_t)1)
31#define MAX_OBJECT_IDS (1 << 20)
32
33typedef struct {
34   size_t blob_offset;
35   nir_ssa_def *src;
36   nir_block *block;
37} write_phi_fixup;
38
39typedef struct {
40   const nir_shader *nir;
41
42   struct blob *blob;
43
44   /* maps pointer to index */
45   struct hash_table *remap_table;
46
47   /* the next index to assign to a NIR in-memory object */
48   uint32_t next_idx;
49
50   /* Array of write_phi_fixup structs representing phi sources that need to
51    * be resolved in the second pass.
52    */
53   struct util_dynarray phi_fixups;
54
55   /* The last serialized type. */
56   const struct glsl_type *last_type;
57   const struct glsl_type *last_interface_type;
58   struct nir_variable_data last_var_data;
59
60   /* For skipping equal ALU headers (typical after scalarization). */
61   nir_instr_type last_instr_type;
62   uintptr_t last_alu_header_offset;
63   uint32_t last_alu_header;
64
65   /* Don't write optional data such as variable names. */
66   bool strip;
67} write_ctx;
68
69typedef struct {
70   nir_shader *nir;
71
72   struct blob_reader *blob;
73
74   /* the next index to assign to a NIR in-memory object */
75   uint32_t next_idx;
76
77   /* The length of the index -> object table */
78   uint32_t idx_table_len;
79
80   /* map from index to deserialized pointer */
81   void **idx_table;
82
83   /* List of phi sources. */
84   struct list_head phi_srcs;
85
86   /* The last deserialized type. */
87   const struct glsl_type *last_type;
88   const struct glsl_type *last_interface_type;
89   struct nir_variable_data last_var_data;
90} read_ctx;
91
92static void
93write_add_object(write_ctx *ctx, const void *obj)
94{
95   uint32_t index = ctx->next_idx++;
96   assert(index != MAX_OBJECT_IDS);
97   _mesa_hash_table_insert(ctx->remap_table, obj, (void *)(uintptr_t) index);
98}
99
100static uint32_t
101write_lookup_object(write_ctx *ctx, const void *obj)
102{
103   struct hash_entry *entry = _mesa_hash_table_search(ctx->remap_table, obj);
104   assert(entry);
105   return (uint32_t)(uintptr_t) entry->data;
106}
107
108static void
109read_add_object(read_ctx *ctx, void *obj)
110{
111   assert(ctx->next_idx < ctx->idx_table_len);
112   ctx->idx_table[ctx->next_idx++] = obj;
113}
114
115static void *
116read_lookup_object(read_ctx *ctx, uint32_t idx)
117{
118   assert(idx < ctx->idx_table_len);
119   return ctx->idx_table[idx];
120}
121
122static void *
123read_object(read_ctx *ctx)
124{
125   return read_lookup_object(ctx, blob_read_uint32(ctx->blob));
126}
127
128static uint32_t
129encode_bit_size_3bits(uint8_t bit_size)
130{
131   /* Encode values of 0, 1, 2, 4, 8, 16, 32, 64 in 3 bits. */
132   assert(bit_size <= 64 && util_is_power_of_two_or_zero(bit_size));
133   if (bit_size)
134      return util_logbase2(bit_size) + 1;
135   return 0;
136}
137
138static uint8_t
139decode_bit_size_3bits(uint8_t bit_size)
140{
141   if (bit_size)
142      return 1 << (bit_size - 1);
143   return 0;
144}
145
146#define NUM_COMPONENTS_IS_SEPARATE_7   7
147
148static uint8_t
149encode_num_components_in_3bits(uint8_t num_components)
150{
151   if (num_components <= 4)
152      return num_components;
153   if (num_components == 8)
154      return 5;
155   if (num_components == 16)
156      return 6;
157
158   /* special value indicating that num_components is in the next uint32 */
159   return NUM_COMPONENTS_IS_SEPARATE_7;
160}
161
162static uint8_t
163decode_num_components_in_3bits(uint8_t value)
164{
165   if (value <= 4)
166      return value;
167   if (value == 5)
168      return 8;
169   if (value == 6)
170      return 16;
171
172   unreachable("invalid num_components encoding");
173   return 0;
174}
175
176static void
177write_constant(write_ctx *ctx, const nir_constant *c)
178{
179   blob_write_bytes(ctx->blob, c->values, sizeof(c->values));
180   blob_write_uint32(ctx->blob, c->num_elements);
181   for (unsigned i = 0; i < c->num_elements; i++)
182      write_constant(ctx, c->elements[i]);
183}
184
185static nir_constant *
186read_constant(read_ctx *ctx, nir_variable *nvar)
187{
188   nir_constant *c = ralloc(nvar, nir_constant);
189
190   blob_copy_bytes(ctx->blob, (uint8_t *)c->values, sizeof(c->values));
191   c->num_elements = blob_read_uint32(ctx->blob);
192   c->elements = ralloc_array(nvar, nir_constant *, c->num_elements);
193   for (unsigned i = 0; i < c->num_elements; i++)
194      c->elements[i] = read_constant(ctx, nvar);
195
196   return c;
197}
198
199enum var_data_encoding {
200   var_encode_full,
201   var_encode_shader_temp,
202   var_encode_function_temp,
203   var_encode_location_diff,
204};
205
206union packed_var {
207   uint32_t u32;
208   struct {
209      unsigned has_name:1;
210      unsigned has_constant_initializer:1;
211      unsigned has_pointer_initializer:1;
212      unsigned has_interface_type:1;
213      unsigned num_state_slots:7;
214      unsigned data_encoding:2;
215      unsigned type_same_as_last:1;
216      unsigned interface_type_same_as_last:1;
217      unsigned ray_query:1;
218      unsigned num_members:16;
219   } u;
220};
221
222union packed_var_data_diff {
223   uint32_t u32;
224   struct {
225      int location:13;
226      int location_frac:3;
227      int driver_location:16;
228   } u;
229};
230
231static void
232write_variable(write_ctx *ctx, const nir_variable *var)
233{
234   write_add_object(ctx, var);
235
236   assert(var->num_state_slots < (1 << 7));
237
238   STATIC_ASSERT(sizeof(union packed_var) == 4);
239   union packed_var flags;
240   flags.u32 = 0;
241
242   flags.u.has_name = !ctx->strip && var->name;
243   flags.u.has_constant_initializer = !!(var->constant_initializer);
244   flags.u.has_pointer_initializer = !!(var->pointer_initializer);
245   flags.u.has_interface_type = !!(var->interface_type);
246   flags.u.type_same_as_last = var->type == ctx->last_type;
247   flags.u.interface_type_same_as_last =
248      var->interface_type && var->interface_type == ctx->last_interface_type;
249   flags.u.num_state_slots = var->num_state_slots;
250   flags.u.num_members = var->num_members;
251
252   struct nir_variable_data data = var->data;
253
254   /* When stripping, we expect that the location is no longer needed,
255    * which is typically after shaders are linked.
256    */
257   if (ctx->strip &&
258       data.mode != nir_var_system_value &&
259       data.mode != nir_var_shader_in &&
260       data.mode != nir_var_shader_out)
261      data.location = 0;
262
263   /* Temporary variables don't serialize var->data. */
264   if (data.mode == nir_var_shader_temp)
265      flags.u.data_encoding = var_encode_shader_temp;
266   else if (data.mode == nir_var_function_temp)
267      flags.u.data_encoding = var_encode_function_temp;
268   else {
269      struct nir_variable_data tmp = data;
270
271      tmp.location = ctx->last_var_data.location;
272      tmp.location_frac = ctx->last_var_data.location_frac;
273      tmp.driver_location = ctx->last_var_data.driver_location;
274
275      /* See if we can encode only the difference in locations from the last
276       * variable.
277       */
278      if (memcmp(&ctx->last_var_data, &tmp, sizeof(tmp)) == 0 &&
279          abs((int)data.location -
280              (int)ctx->last_var_data.location) < (1 << 12) &&
281          abs((int)data.driver_location -
282              (int)ctx->last_var_data.driver_location) < (1 << 15))
283         flags.u.data_encoding = var_encode_location_diff;
284      else
285         flags.u.data_encoding = var_encode_full;
286   }
287
288   flags.u.ray_query = var->data.ray_query;
289
290   blob_write_uint32(ctx->blob, flags.u32);
291
292   if (!flags.u.type_same_as_last) {
293      encode_type_to_blob(ctx->blob, var->type);
294      ctx->last_type = var->type;
295   }
296
297   if (var->interface_type && !flags.u.interface_type_same_as_last) {
298      encode_type_to_blob(ctx->blob, var->interface_type);
299      ctx->last_interface_type = var->interface_type;
300   }
301
302   if (flags.u.has_name)
303      blob_write_string(ctx->blob, var->name);
304
305   if (flags.u.data_encoding == var_encode_full ||
306       flags.u.data_encoding == var_encode_location_diff) {
307      if (flags.u.data_encoding == var_encode_full) {
308         blob_write_bytes(ctx->blob, &data, sizeof(data));
309      } else {
310         /* Serialize only the difference in locations from the last variable.
311          */
312         union packed_var_data_diff diff;
313
314         diff.u.location = data.location - ctx->last_var_data.location;
315         diff.u.location_frac = data.location_frac -
316                                ctx->last_var_data.location_frac;
317         diff.u.driver_location = data.driver_location -
318                                  ctx->last_var_data.driver_location;
319
320         blob_write_uint32(ctx->blob, diff.u32);
321      }
322
323      ctx->last_var_data = data;
324   }
325
326   for (unsigned i = 0; i < var->num_state_slots; i++) {
327      blob_write_bytes(ctx->blob, &var->state_slots[i],
328                       sizeof(var->state_slots[i]));
329   }
330   if (var->constant_initializer)
331      write_constant(ctx, var->constant_initializer);
332   if (var->pointer_initializer)
333      write_lookup_object(ctx, var->pointer_initializer);
334   if (var->num_members > 0) {
335      blob_write_bytes(ctx->blob, (uint8_t *) var->members,
336                       var->num_members * sizeof(*var->members));
337   }
338}
339
340static nir_variable *
341read_variable(read_ctx *ctx)
342{
343   nir_variable *var = rzalloc(ctx->nir, nir_variable);
344   read_add_object(ctx, var);
345
346   union packed_var flags;
347   flags.u32 = blob_read_uint32(ctx->blob);
348
349   if (flags.u.type_same_as_last) {
350      var->type = ctx->last_type;
351   } else {
352      var->type = decode_type_from_blob(ctx->blob);
353      ctx->last_type = var->type;
354   }
355
356   if (flags.u.has_interface_type) {
357      if (flags.u.interface_type_same_as_last) {
358         var->interface_type = ctx->last_interface_type;
359      } else {
360         var->interface_type = decode_type_from_blob(ctx->blob);
361         ctx->last_interface_type = var->interface_type;
362      }
363   }
364
365   if (flags.u.has_name) {
366      const char *name = blob_read_string(ctx->blob);
367      var->name = ralloc_strdup(var, name);
368   } else {
369      var->name = NULL;
370   }
371
372   if (flags.u.data_encoding == var_encode_shader_temp)
373      var->data.mode = nir_var_shader_temp;
374   else if (flags.u.data_encoding == var_encode_function_temp)
375      var->data.mode = nir_var_function_temp;
376   else if (flags.u.data_encoding == var_encode_full) {
377      blob_copy_bytes(ctx->blob, (uint8_t *) &var->data, sizeof(var->data));
378      ctx->last_var_data = var->data;
379   } else { /* var_encode_location_diff */
380      union packed_var_data_diff diff;
381      diff.u32 = blob_read_uint32(ctx->blob);
382
383      var->data = ctx->last_var_data;
384      var->data.location += diff.u.location;
385      var->data.location_frac += diff.u.location_frac;
386      var->data.driver_location += diff.u.driver_location;
387
388      ctx->last_var_data = var->data;
389   }
390
391   var->data.ray_query = flags.u.ray_query;
392
393   var->num_state_slots = flags.u.num_state_slots;
394   if (var->num_state_slots != 0) {
395      var->state_slots = ralloc_array(var, nir_state_slot,
396                                      var->num_state_slots);
397      for (unsigned i = 0; i < var->num_state_slots; i++) {
398         blob_copy_bytes(ctx->blob, &var->state_slots[i],
399                         sizeof(var->state_slots[i]));
400      }
401   }
402   if (flags.u.has_constant_initializer)
403      var->constant_initializer = read_constant(ctx, var);
404   else
405      var->constant_initializer = NULL;
406
407   if (flags.u.has_pointer_initializer)
408      var->pointer_initializer = read_object(ctx);
409   else
410      var->pointer_initializer = NULL;
411
412   var->num_members = flags.u.num_members;
413   if (var->num_members > 0) {
414      var->members = ralloc_array(var, struct nir_variable_data,
415                                  var->num_members);
416      blob_copy_bytes(ctx->blob, (uint8_t *) var->members,
417                      var->num_members * sizeof(*var->members));
418   }
419
420   return var;
421}
422
423static void
424write_var_list(write_ctx *ctx, const struct exec_list *src)
425{
426   blob_write_uint32(ctx->blob, exec_list_length(src));
427   foreach_list_typed(nir_variable, var, node, src) {
428      write_variable(ctx, var);
429   }
430}
431
432static void
433read_var_list(read_ctx *ctx, struct exec_list *dst)
434{
435   exec_list_make_empty(dst);
436   unsigned num_vars = blob_read_uint32(ctx->blob);
437   for (unsigned i = 0; i < num_vars; i++) {
438      nir_variable *var = read_variable(ctx);
439      exec_list_push_tail(dst, &var->node);
440   }
441}
442
443static void
444write_register(write_ctx *ctx, const nir_register *reg)
445{
446   write_add_object(ctx, reg);
447   blob_write_uint32(ctx->blob, reg->num_components);
448   blob_write_uint32(ctx->blob, reg->bit_size);
449   blob_write_uint32(ctx->blob, reg->num_array_elems);
450   blob_write_uint32(ctx->blob, reg->index);
451   blob_write_uint8(ctx->blob, reg->divergent);
452}
453
454static nir_register *
455read_register(read_ctx *ctx)
456{
457   nir_register *reg = ralloc(ctx->nir, nir_register);
458   read_add_object(ctx, reg);
459   reg->num_components = blob_read_uint32(ctx->blob);
460   reg->bit_size = blob_read_uint32(ctx->blob);
461   reg->num_array_elems = blob_read_uint32(ctx->blob);
462   reg->index = blob_read_uint32(ctx->blob);
463   reg->divergent = blob_read_uint8(ctx->blob);
464
465   list_inithead(&reg->uses);
466   list_inithead(&reg->defs);
467   list_inithead(&reg->if_uses);
468
469   return reg;
470}
471
472static void
473write_reg_list(write_ctx *ctx, const struct exec_list *src)
474{
475   blob_write_uint32(ctx->blob, exec_list_length(src));
476   foreach_list_typed(nir_register, reg, node, src)
477      write_register(ctx, reg);
478}
479
480static void
481read_reg_list(read_ctx *ctx, struct exec_list *dst)
482{
483   exec_list_make_empty(dst);
484   unsigned num_regs = blob_read_uint32(ctx->blob);
485   for (unsigned i = 0; i < num_regs; i++) {
486      nir_register *reg = read_register(ctx);
487      exec_list_push_tail(dst, &reg->node);
488   }
489}
490
491union packed_src {
492   uint32_t u32;
493   struct {
494      unsigned is_ssa:1;   /* <-- Header */
495      unsigned is_indirect:1;
496      unsigned object_idx:20;
497      unsigned _footer:10; /* <-- Footer */
498   } any;
499   struct {
500      unsigned _header:22; /* <-- Header */
501      unsigned negate:1;   /* <-- Footer */
502      unsigned abs:1;
503      unsigned swizzle_x:2;
504      unsigned swizzle_y:2;
505      unsigned swizzle_z:2;
506      unsigned swizzle_w:2;
507   } alu;
508   struct {
509      unsigned _header:22; /* <-- Header */
510      unsigned src_type:5; /* <-- Footer */
511      unsigned _pad:5;
512   } tex;
513};
514
515static void
516write_src_full(write_ctx *ctx, const nir_src *src, union packed_src header)
517{
518   /* Since sources are very frequent, we try to save some space when storing
519    * them. In particular, we store whether the source is a register and
520    * whether the register has an indirect index in the low two bits. We can
521    * assume that the high two bits of the index are zero, since otherwise our
522    * address space would've been exhausted allocating the remap table!
523    */
524   header.any.is_ssa = src->is_ssa;
525   if (src->is_ssa) {
526      header.any.object_idx = write_lookup_object(ctx, src->ssa);
527      blob_write_uint32(ctx->blob, header.u32);
528   } else {
529      header.any.object_idx = write_lookup_object(ctx, src->reg.reg);
530      header.any.is_indirect = !!src->reg.indirect;
531      blob_write_uint32(ctx->blob, header.u32);
532      blob_write_uint32(ctx->blob, src->reg.base_offset);
533      if (src->reg.indirect) {
534         union packed_src header = {0};
535         write_src_full(ctx, src->reg.indirect, header);
536      }
537   }
538}
539
540static void
541write_src(write_ctx *ctx, const nir_src *src)
542{
543   union packed_src header = {0};
544   write_src_full(ctx, src, header);
545}
546
547static union packed_src
548read_src(read_ctx *ctx, nir_src *src, void *mem_ctx)
549{
550   STATIC_ASSERT(sizeof(union packed_src) == 4);
551   union packed_src header;
552   header.u32 = blob_read_uint32(ctx->blob);
553
554   src->is_ssa = header.any.is_ssa;
555   if (src->is_ssa) {
556      src->ssa = read_lookup_object(ctx, header.any.object_idx);
557   } else {
558      src->reg.reg = read_lookup_object(ctx, header.any.object_idx);
559      src->reg.base_offset = blob_read_uint32(ctx->blob);
560      if (header.any.is_indirect) {
561         src->reg.indirect = malloc(sizeof(nir_src));
562         read_src(ctx, src->reg.indirect, mem_ctx);
563      } else {
564         src->reg.indirect = NULL;
565      }
566   }
567   return header;
568}
569
570union packed_dest {
571   uint8_t u8;
572   struct {
573      uint8_t is_ssa:1;
574      uint8_t num_components:3;
575      uint8_t bit_size:3;
576      uint8_t divergent:1;
577   } ssa;
578   struct {
579      uint8_t is_ssa:1;
580      uint8_t is_indirect:1;
581      uint8_t _pad:6;
582   } reg;
583};
584
585enum intrinsic_const_indices_encoding {
586   /* Use packed_const_indices to store tightly packed indices.
587    *
588    * The common case for load_ubo is 0, 0, 0, which is trivially represented.
589    * The common cases for load_interpolated_input also fit here, e.g.: 7, 3
590    */
591   const_indices_all_combined,
592
593   const_indices_8bit,  /* 8 bits per element */
594   const_indices_16bit, /* 16 bits per element */
595   const_indices_32bit, /* 32 bits per element */
596};
597
598enum load_const_packing {
599   /* Constants are not packed and are stored in following dwords. */
600   load_const_full,
601
602   /* packed_value contains high 19 bits, low bits are 0,
603    * good for floating-point decimals
604    */
605   load_const_scalar_hi_19bits,
606
607   /* packed_value contains low 19 bits, high bits are sign-extended */
608   load_const_scalar_lo_19bits_sext,
609};
610
611union packed_instr {
612   uint32_t u32;
613   struct {
614      unsigned instr_type:4; /* always present */
615      unsigned _pad:20;
616      unsigned dest:8;       /* always last */
617   } any;
618   struct {
619      unsigned instr_type:4;
620      unsigned exact:1;
621      unsigned no_signed_wrap:1;
622      unsigned no_unsigned_wrap:1;
623      unsigned saturate:1;
624      /* Reg: writemask; SSA: swizzles for 2 srcs */
625      unsigned writemask_or_two_swizzles:4;
626      unsigned op:9;
627      unsigned packed_src_ssa_16bit:1;
628      /* Scalarized ALUs always have the same header. */
629      unsigned num_followup_alu_sharing_header:2;
630      unsigned dest:8;
631   } alu;
632   struct {
633      unsigned instr_type:4;
634      unsigned deref_type:3;
635      unsigned cast_type_same_as_last:1;
636      unsigned modes:5; /* See (de|en)code_deref_modes() */
637      unsigned _pad:9;
638      unsigned in_bounds:1;
639      unsigned packed_src_ssa_16bit:1; /* deref_var redefines this */
640      unsigned dest:8;
641   } deref;
642   struct {
643      unsigned instr_type:4;
644      unsigned deref_type:3;
645      unsigned _pad:1;
646      unsigned object_idx:16; /* if 0, the object ID is a separate uint32 */
647      unsigned dest:8;
648   } deref_var;
649   struct {
650      unsigned instr_type:4;
651      unsigned intrinsic:10;
652      unsigned const_indices_encoding:2;
653      unsigned packed_const_indices:8;
654      unsigned dest:8;
655   } intrinsic;
656   struct {
657      unsigned instr_type:4;
658      unsigned last_component:4;
659      unsigned bit_size:3;
660      unsigned packing:2; /* enum load_const_packing */
661      unsigned packed_value:19; /* meaning determined by packing */
662   } load_const;
663   struct {
664      unsigned instr_type:4;
665      unsigned last_component:4;
666      unsigned bit_size:3;
667      unsigned _pad:21;
668   } undef;
669   struct {
670      unsigned instr_type:4;
671      unsigned num_srcs:4;
672      unsigned op:5;
673      unsigned _pad:11;
674      unsigned dest:8;
675   } tex;
676   struct {
677      unsigned instr_type:4;
678      unsigned num_srcs:20;
679      unsigned dest:8;
680   } phi;
681   struct {
682      unsigned instr_type:4;
683      unsigned type:2;
684      unsigned _pad:26;
685   } jump;
686};
687
688/* Write "lo24" as low 24 bits in the first uint32. */
689static void
690write_dest(write_ctx *ctx, const nir_dest *dst, union packed_instr header,
691           nir_instr_type instr_type)
692{
693   STATIC_ASSERT(sizeof(union packed_dest) == 1);
694   union packed_dest dest;
695   dest.u8 = 0;
696
697   dest.ssa.is_ssa = dst->is_ssa;
698   if (dst->is_ssa) {
699      dest.ssa.num_components =
700         encode_num_components_in_3bits(dst->ssa.num_components);
701      dest.ssa.bit_size = encode_bit_size_3bits(dst->ssa.bit_size);
702      dest.ssa.divergent = dst->ssa.divergent;
703   } else {
704      dest.reg.is_indirect = !!(dst->reg.indirect);
705   }
706   header.any.dest = dest.u8;
707
708   /* Check if the current ALU instruction has the same header as the previous
709    * instruction that is also ALU. If it is, we don't have to write
710    * the current header. This is a typical occurence after scalarization.
711    */
712   if (instr_type == nir_instr_type_alu) {
713      bool equal_header = false;
714
715      if (ctx->last_instr_type == nir_instr_type_alu) {
716         assert(ctx->last_alu_header_offset);
717         union packed_instr last_header;
718         last_header.u32 = ctx->last_alu_header;
719
720         /* Clear the field that counts ALUs with equal headers. */
721         union packed_instr clean_header;
722         clean_header.u32 = last_header.u32;
723         clean_header.alu.num_followup_alu_sharing_header = 0;
724
725         /* There can be at most 4 consecutive ALU instructions
726          * sharing the same header.
727          */
728         if (last_header.alu.num_followup_alu_sharing_header < 3 &&
729             header.u32 == clean_header.u32) {
730            last_header.alu.num_followup_alu_sharing_header++;
731            blob_overwrite_uint32(ctx->blob, ctx->last_alu_header_offset,
732                                  last_header.u32);
733            ctx->last_alu_header = last_header.u32;
734            equal_header = true;
735         }
736      }
737
738      if (!equal_header) {
739         ctx->last_alu_header_offset = blob_reserve_uint32(ctx->blob);
740         blob_overwrite_uint32(ctx->blob, ctx->last_alu_header_offset, header.u32);
741         ctx->last_alu_header = header.u32;
742      }
743   } else {
744      blob_write_uint32(ctx->blob, header.u32);
745   }
746
747   if (dest.ssa.is_ssa &&
748       dest.ssa.num_components == NUM_COMPONENTS_IS_SEPARATE_7)
749      blob_write_uint32(ctx->blob, dst->ssa.num_components);
750
751   if (dst->is_ssa) {
752      write_add_object(ctx, &dst->ssa);
753   } else {
754      blob_write_uint32(ctx->blob, write_lookup_object(ctx, dst->reg.reg));
755      blob_write_uint32(ctx->blob, dst->reg.base_offset);
756      if (dst->reg.indirect)
757         write_src(ctx, dst->reg.indirect);
758   }
759}
760
761static void
762read_dest(read_ctx *ctx, nir_dest *dst, nir_instr *instr,
763          union packed_instr header)
764{
765   union packed_dest dest;
766   dest.u8 = header.any.dest;
767
768   if (dest.ssa.is_ssa) {
769      unsigned bit_size = decode_bit_size_3bits(dest.ssa.bit_size);
770      unsigned num_components;
771      if (dest.ssa.num_components == NUM_COMPONENTS_IS_SEPARATE_7)
772         num_components = blob_read_uint32(ctx->blob);
773      else
774         num_components = decode_num_components_in_3bits(dest.ssa.num_components);
775      nir_ssa_dest_init(instr, dst, num_components, bit_size, NULL);
776      dst->ssa.divergent = dest.ssa.divergent;
777      read_add_object(ctx, &dst->ssa);
778   } else {
779      dst->reg.reg = read_object(ctx);
780      dst->reg.base_offset = blob_read_uint32(ctx->blob);
781      if (dest.reg.is_indirect) {
782         dst->reg.indirect = malloc(sizeof(nir_src));
783         read_src(ctx, dst->reg.indirect, instr);
784      }
785   }
786}
787
788static bool
789are_object_ids_16bit(write_ctx *ctx)
790{
791   /* Check the highest object ID, because they are monotonic. */
792   return ctx->next_idx < (1 << 16);
793}
794
795static bool
796is_alu_src_ssa_16bit(write_ctx *ctx, const nir_alu_instr *alu)
797{
798   unsigned num_srcs = nir_op_infos[alu->op].num_inputs;
799
800   for (unsigned i = 0; i < num_srcs; i++) {
801      if (!alu->src[i].src.is_ssa || alu->src[i].abs || alu->src[i].negate)
802         return false;
803
804      unsigned src_components = nir_ssa_alu_instr_src_components(alu, i);
805
806      for (unsigned chan = 0; chan < src_components; chan++) {
807         /* The swizzles for src0.x and src1.x are stored
808          * in writemask_or_two_swizzles for SSA ALUs.
809          */
810         if (alu->dest.dest.is_ssa && i < 2 && chan == 0 &&
811             alu->src[i].swizzle[chan] < 4)
812            continue;
813
814         if (alu->src[i].swizzle[chan] != chan)
815            return false;
816      }
817   }
818
819   return are_object_ids_16bit(ctx);
820}
821
822static void
823write_alu(write_ctx *ctx, const nir_alu_instr *alu)
824{
825   unsigned num_srcs = nir_op_infos[alu->op].num_inputs;
826   unsigned dst_components = nir_dest_num_components(alu->dest.dest);
827
828   /* 9 bits for nir_op */
829   STATIC_ASSERT(nir_num_opcodes <= 512);
830   union packed_instr header;
831   header.u32 = 0;
832
833   header.alu.instr_type = alu->instr.type;
834   header.alu.exact = alu->exact;
835   header.alu.no_signed_wrap = alu->no_signed_wrap;
836   header.alu.no_unsigned_wrap = alu->no_unsigned_wrap;
837   header.alu.saturate = alu->dest.saturate;
838   header.alu.op = alu->op;
839   header.alu.packed_src_ssa_16bit = is_alu_src_ssa_16bit(ctx, alu);
840
841   if (header.alu.packed_src_ssa_16bit &&
842       alu->dest.dest.is_ssa) {
843      /* For packed srcs of SSA ALUs, this field stores the swizzles. */
844      header.alu.writemask_or_two_swizzles = alu->src[0].swizzle[0];
845      if (num_srcs > 1)
846         header.alu.writemask_or_two_swizzles |= alu->src[1].swizzle[0] << 2;
847   } else if (!alu->dest.dest.is_ssa && dst_components <= 4) {
848      /* For vec4 registers, this field is a writemask. */
849      header.alu.writemask_or_two_swizzles = alu->dest.write_mask;
850   }
851
852   write_dest(ctx, &alu->dest.dest, header, alu->instr.type);
853
854   if (!alu->dest.dest.is_ssa && dst_components > 4)
855      blob_write_uint32(ctx->blob, alu->dest.write_mask);
856
857   if (header.alu.packed_src_ssa_16bit) {
858      for (unsigned i = 0; i < num_srcs; i++) {
859         assert(alu->src[i].src.is_ssa);
860         unsigned idx = write_lookup_object(ctx, alu->src[i].src.ssa);
861         assert(idx < (1 << 16));
862         blob_write_uint16(ctx->blob, idx);
863      }
864   } else {
865      for (unsigned i = 0; i < num_srcs; i++) {
866         unsigned src_channels = nir_ssa_alu_instr_src_components(alu, i);
867         unsigned src_components = nir_src_num_components(alu->src[i].src);
868         union packed_src src;
869         bool packed = src_components <= 4 && src_channels <= 4;
870         src.u32 = 0;
871
872         src.alu.negate = alu->src[i].negate;
873         src.alu.abs = alu->src[i].abs;
874
875         if (packed) {
876            src.alu.swizzle_x = alu->src[i].swizzle[0];
877            src.alu.swizzle_y = alu->src[i].swizzle[1];
878            src.alu.swizzle_z = alu->src[i].swizzle[2];
879            src.alu.swizzle_w = alu->src[i].swizzle[3];
880         }
881
882         write_src_full(ctx, &alu->src[i].src, src);
883
884         /* Store swizzles for vec8 and vec16. */
885         if (!packed) {
886            for (unsigned o = 0; o < src_channels; o += 8) {
887               unsigned value = 0;
888
889               for (unsigned j = 0; j < 8 && o + j < src_channels; j++) {
890                  value |= (uint32_t)alu->src[i].swizzle[o + j] <<
891                           (4 * j); /* 4 bits per swizzle */
892               }
893
894               blob_write_uint32(ctx->blob, value);
895            }
896         }
897      }
898   }
899}
900
901static nir_alu_instr *
902read_alu(read_ctx *ctx, union packed_instr header)
903{
904   unsigned num_srcs = nir_op_infos[header.alu.op].num_inputs;
905   nir_alu_instr *alu = nir_alu_instr_create(ctx->nir, header.alu.op);
906
907   alu->exact = header.alu.exact;
908   alu->no_signed_wrap = header.alu.no_signed_wrap;
909   alu->no_unsigned_wrap = header.alu.no_unsigned_wrap;
910   alu->dest.saturate = header.alu.saturate;
911
912   read_dest(ctx, &alu->dest.dest, &alu->instr, header);
913
914   unsigned dst_components = nir_dest_num_components(alu->dest.dest);
915
916   if (alu->dest.dest.is_ssa) {
917      alu->dest.write_mask = u_bit_consecutive(0, dst_components);
918   } else if (dst_components <= 4) {
919      alu->dest.write_mask = header.alu.writemask_or_two_swizzles;
920   } else {
921      alu->dest.write_mask = blob_read_uint32(ctx->blob);
922   }
923
924   if (header.alu.packed_src_ssa_16bit) {
925      for (unsigned i = 0; i < num_srcs; i++) {
926         nir_alu_src *src = &alu->src[i];
927         src->src.is_ssa = true;
928         src->src.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
929
930         memset(&src->swizzle, 0, sizeof(src->swizzle));
931
932         unsigned src_components = nir_ssa_alu_instr_src_components(alu, i);
933
934         for (unsigned chan = 0; chan < src_components; chan++)
935            src->swizzle[chan] = chan;
936      }
937   } else {
938      for (unsigned i = 0; i < num_srcs; i++) {
939         union packed_src src = read_src(ctx, &alu->src[i].src, &alu->instr);
940         unsigned src_channels = nir_ssa_alu_instr_src_components(alu, i);
941         unsigned src_components = nir_src_num_components(alu->src[i].src);
942         bool packed = src_components <= 4 && src_channels <= 4;
943
944         alu->src[i].negate = src.alu.negate;
945         alu->src[i].abs = src.alu.abs;
946
947         memset(&alu->src[i].swizzle, 0, sizeof(alu->src[i].swizzle));
948
949         if (packed) {
950            alu->src[i].swizzle[0] = src.alu.swizzle_x;
951            alu->src[i].swizzle[1] = src.alu.swizzle_y;
952            alu->src[i].swizzle[2] = src.alu.swizzle_z;
953            alu->src[i].swizzle[3] = src.alu.swizzle_w;
954         } else {
955            /* Load swizzles for vec8 and vec16. */
956            for (unsigned o = 0; o < src_channels; o += 8) {
957               unsigned value = blob_read_uint32(ctx->blob);
958
959               for (unsigned j = 0; j < 8 && o + j < src_channels; j++) {
960                  alu->src[i].swizzle[o + j] =
961                     (value >> (4 * j)) & 0xf; /* 4 bits per swizzle */
962               }
963            }
964         }
965      }
966   }
967
968   if (header.alu.packed_src_ssa_16bit &&
969       alu->dest.dest.is_ssa) {
970      alu->src[0].swizzle[0] = header.alu.writemask_or_two_swizzles & 0x3;
971      if (num_srcs > 1)
972         alu->src[1].swizzle[0] = header.alu.writemask_or_two_swizzles >> 2;
973   }
974
975   return alu;
976}
977
978#define MODE_ENC_GENERIC_BIT (1 << 4)
979
980static nir_variable_mode
981decode_deref_modes(unsigned modes)
982{
983   if (modes & MODE_ENC_GENERIC_BIT) {
984      modes &= ~MODE_ENC_GENERIC_BIT;
985      return modes << (ffs(nir_var_mem_generic) - 1);
986   } else {
987      return 1 << modes;
988   }
989}
990
991static unsigned
992encode_deref_modes(nir_variable_mode modes)
993{
994   /* Mode sets on derefs generally come in two forms.  For certain OpenCL
995    * cases, we can have more than one of the generic modes set.  In this
996    * case, we need the full bitfield.  Fortunately, there are only 4 of
997    * these.  For all other modes, we can only have one mode at a time so we
998    * can compress them by only storing the bit position.  This, plus one bit
999    * to select encoding, lets us pack the entire bitfield in 5 bits.
1000    */
1001   STATIC_ASSERT((nir_var_all & ~nir_var_mem_generic) <
1002                 (1 << MODE_ENC_GENERIC_BIT));
1003
1004   unsigned enc;
1005   if (modes == 0 || (modes & nir_var_mem_generic)) {
1006      assert(!(modes & ~nir_var_mem_generic));
1007      enc = modes >> (ffs(nir_var_mem_generic) - 1);
1008      assert(enc < MODE_ENC_GENERIC_BIT);
1009      enc |= MODE_ENC_GENERIC_BIT;
1010   } else {
1011      assert(util_is_power_of_two_nonzero(modes));
1012      enc = ffs(modes) - 1;
1013      assert(enc < MODE_ENC_GENERIC_BIT);
1014   }
1015   assert(modes == decode_deref_modes(enc));
1016   return enc;
1017}
1018
1019static void
1020write_deref(write_ctx *ctx, const nir_deref_instr *deref)
1021{
1022   assert(deref->deref_type < 8);
1023
1024   union packed_instr header;
1025   header.u32 = 0;
1026
1027   header.deref.instr_type = deref->instr.type;
1028   header.deref.deref_type = deref->deref_type;
1029
1030   if (deref->deref_type == nir_deref_type_cast) {
1031      header.deref.modes = encode_deref_modes(deref->modes);
1032      header.deref.cast_type_same_as_last = deref->type == ctx->last_type;
1033   }
1034
1035   unsigned var_idx = 0;
1036   if (deref->deref_type == nir_deref_type_var) {
1037      var_idx = write_lookup_object(ctx, deref->var);
1038      if (var_idx && var_idx < (1 << 16))
1039         header.deref_var.object_idx = var_idx;
1040   }
1041
1042   if (deref->deref_type == nir_deref_type_array ||
1043       deref->deref_type == nir_deref_type_ptr_as_array) {
1044      header.deref.packed_src_ssa_16bit =
1045         deref->parent.is_ssa && deref->arr.index.is_ssa &&
1046         are_object_ids_16bit(ctx);
1047
1048      header.deref.in_bounds = deref->arr.in_bounds;
1049   }
1050
1051   write_dest(ctx, &deref->dest, header, deref->instr.type);
1052
1053   switch (deref->deref_type) {
1054   case nir_deref_type_var:
1055      if (!header.deref_var.object_idx)
1056         blob_write_uint32(ctx->blob, var_idx);
1057      break;
1058
1059   case nir_deref_type_struct:
1060      write_src(ctx, &deref->parent);
1061      blob_write_uint32(ctx->blob, deref->strct.index);
1062      break;
1063
1064   case nir_deref_type_array:
1065   case nir_deref_type_ptr_as_array:
1066      if (header.deref.packed_src_ssa_16bit) {
1067         blob_write_uint16(ctx->blob,
1068                           write_lookup_object(ctx, deref->parent.ssa));
1069         blob_write_uint16(ctx->blob,
1070                           write_lookup_object(ctx, deref->arr.index.ssa));
1071      } else {
1072         write_src(ctx, &deref->parent);
1073         write_src(ctx, &deref->arr.index);
1074      }
1075      break;
1076
1077   case nir_deref_type_cast:
1078      write_src(ctx, &deref->parent);
1079      blob_write_uint32(ctx->blob, deref->cast.ptr_stride);
1080      blob_write_uint32(ctx->blob, deref->cast.align_mul);
1081      blob_write_uint32(ctx->blob, deref->cast.align_offset);
1082      if (!header.deref.cast_type_same_as_last) {
1083         encode_type_to_blob(ctx->blob, deref->type);
1084         ctx->last_type = deref->type;
1085      }
1086      break;
1087
1088   case nir_deref_type_array_wildcard:
1089      write_src(ctx, &deref->parent);
1090      break;
1091
1092   default:
1093      unreachable("Invalid deref type");
1094   }
1095}
1096
1097static nir_deref_instr *
1098read_deref(read_ctx *ctx, union packed_instr header)
1099{
1100   nir_deref_type deref_type = header.deref.deref_type;
1101   nir_deref_instr *deref = nir_deref_instr_create(ctx->nir, deref_type);
1102
1103   read_dest(ctx, &deref->dest, &deref->instr, header);
1104
1105   nir_deref_instr *parent;
1106
1107   switch (deref->deref_type) {
1108   case nir_deref_type_var:
1109      if (header.deref_var.object_idx)
1110         deref->var = read_lookup_object(ctx, header.deref_var.object_idx);
1111      else
1112         deref->var = read_object(ctx);
1113
1114      deref->type = deref->var->type;
1115      break;
1116
1117   case nir_deref_type_struct:
1118      read_src(ctx, &deref->parent, &deref->instr);
1119      parent = nir_src_as_deref(deref->parent);
1120      deref->strct.index = blob_read_uint32(ctx->blob);
1121      deref->type = glsl_get_struct_field(parent->type, deref->strct.index);
1122      break;
1123
1124   case nir_deref_type_array:
1125   case nir_deref_type_ptr_as_array:
1126      if (header.deref.packed_src_ssa_16bit) {
1127         deref->parent.is_ssa = true;
1128         deref->parent.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
1129         deref->arr.index.is_ssa = true;
1130         deref->arr.index.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
1131      } else {
1132         read_src(ctx, &deref->parent, &deref->instr);
1133         read_src(ctx, &deref->arr.index, &deref->instr);
1134      }
1135
1136      deref->arr.in_bounds = header.deref.in_bounds;
1137
1138      parent = nir_src_as_deref(deref->parent);
1139      if (deref->deref_type == nir_deref_type_array)
1140         deref->type = glsl_get_array_element(parent->type);
1141      else
1142         deref->type = parent->type;
1143      break;
1144
1145   case nir_deref_type_cast:
1146      read_src(ctx, &deref->parent, &deref->instr);
1147      deref->cast.ptr_stride = blob_read_uint32(ctx->blob);
1148      deref->cast.align_mul = blob_read_uint32(ctx->blob);
1149      deref->cast.align_offset = blob_read_uint32(ctx->blob);
1150      if (header.deref.cast_type_same_as_last) {
1151         deref->type = ctx->last_type;
1152      } else {
1153         deref->type = decode_type_from_blob(ctx->blob);
1154         ctx->last_type = deref->type;
1155      }
1156      break;
1157
1158   case nir_deref_type_array_wildcard:
1159      read_src(ctx, &deref->parent, &deref->instr);
1160      parent = nir_src_as_deref(deref->parent);
1161      deref->type = glsl_get_array_element(parent->type);
1162      break;
1163
1164   default:
1165      unreachable("Invalid deref type");
1166   }
1167
1168   if (deref_type == nir_deref_type_var) {
1169      deref->modes = deref->var->data.mode;
1170   } else if (deref->deref_type == nir_deref_type_cast) {
1171      deref->modes = decode_deref_modes(header.deref.modes);
1172   } else {
1173      assert(deref->parent.is_ssa);
1174      deref->modes = nir_instr_as_deref(deref->parent.ssa->parent_instr)->modes;
1175   }
1176
1177   return deref;
1178}
1179
1180static void
1181write_intrinsic(write_ctx *ctx, const nir_intrinsic_instr *intrin)
1182{
1183   /* 10 bits for nir_intrinsic_op */
1184   STATIC_ASSERT(nir_num_intrinsics <= 1024);
1185   unsigned num_srcs = nir_intrinsic_infos[intrin->intrinsic].num_srcs;
1186   unsigned num_indices = nir_intrinsic_infos[intrin->intrinsic].num_indices;
1187   assert(intrin->intrinsic < 1024);
1188
1189   union packed_instr header;
1190   header.u32 = 0;
1191
1192   header.intrinsic.instr_type = intrin->instr.type;
1193   header.intrinsic.intrinsic = intrin->intrinsic;
1194
1195   /* Analyze constant indices to decide how to encode them. */
1196   if (num_indices) {
1197      unsigned max_bits = 0;
1198      for (unsigned i = 0; i < num_indices; i++) {
1199         unsigned max = util_last_bit(intrin->const_index[i]);
1200         max_bits = MAX2(max_bits, max);
1201      }
1202
1203      if (max_bits * num_indices <= 8) {
1204         header.intrinsic.const_indices_encoding = const_indices_all_combined;
1205
1206         /* Pack all const indices into 8 bits. */
1207         unsigned bit_size = 8 / num_indices;
1208         for (unsigned i = 0; i < num_indices; i++) {
1209            header.intrinsic.packed_const_indices |=
1210               intrin->const_index[i] << (i * bit_size);
1211         }
1212      } else if (max_bits <= 8)
1213         header.intrinsic.const_indices_encoding = const_indices_8bit;
1214      else if (max_bits <= 16)
1215         header.intrinsic.const_indices_encoding = const_indices_16bit;
1216      else
1217         header.intrinsic.const_indices_encoding = const_indices_32bit;
1218   }
1219
1220   if (nir_intrinsic_infos[intrin->intrinsic].has_dest)
1221      write_dest(ctx, &intrin->dest, header, intrin->instr.type);
1222   else
1223      blob_write_uint32(ctx->blob, header.u32);
1224
1225   for (unsigned i = 0; i < num_srcs; i++)
1226      write_src(ctx, &intrin->src[i]);
1227
1228   if (num_indices) {
1229      switch (header.intrinsic.const_indices_encoding) {
1230      case const_indices_8bit:
1231         for (unsigned i = 0; i < num_indices; i++)
1232            blob_write_uint8(ctx->blob, intrin->const_index[i]);
1233         break;
1234      case const_indices_16bit:
1235         for (unsigned i = 0; i < num_indices; i++)
1236            blob_write_uint16(ctx->blob, intrin->const_index[i]);
1237         break;
1238      case const_indices_32bit:
1239         for (unsigned i = 0; i < num_indices; i++)
1240            blob_write_uint32(ctx->blob, intrin->const_index[i]);
1241         break;
1242      }
1243   }
1244}
1245
1246static nir_intrinsic_instr *
1247read_intrinsic(read_ctx *ctx, union packed_instr header)
1248{
1249   nir_intrinsic_op op = header.intrinsic.intrinsic;
1250   nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(ctx->nir, op);
1251
1252   unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
1253   unsigned num_indices = nir_intrinsic_infos[op].num_indices;
1254
1255   if (nir_intrinsic_infos[op].has_dest)
1256      read_dest(ctx, &intrin->dest, &intrin->instr, header);
1257
1258   for (unsigned i = 0; i < num_srcs; i++)
1259      read_src(ctx, &intrin->src[i], &intrin->instr);
1260
1261   /* Vectorized instrinsics have num_components same as dst or src that has
1262    * 0 components in the info. Find it.
1263    */
1264   if (nir_intrinsic_infos[op].has_dest &&
1265       nir_intrinsic_infos[op].dest_components == 0) {
1266      intrin->num_components = nir_dest_num_components(intrin->dest);
1267   } else {
1268      for (unsigned i = 0; i < num_srcs; i++) {
1269         if (nir_intrinsic_infos[op].src_components[i] == 0) {
1270            intrin->num_components = nir_src_num_components(intrin->src[i]);
1271            break;
1272         }
1273      }
1274   }
1275
1276   if (num_indices) {
1277      switch (header.intrinsic.const_indices_encoding) {
1278      case const_indices_all_combined: {
1279         unsigned bit_size = 8 / num_indices;
1280         unsigned bit_mask = u_bit_consecutive(0, bit_size);
1281         for (unsigned i = 0; i < num_indices; i++) {
1282            intrin->const_index[i] =
1283               (header.intrinsic.packed_const_indices >> (i * bit_size)) &
1284               bit_mask;
1285         }
1286         break;
1287      }
1288      case const_indices_8bit:
1289         for (unsigned i = 0; i < num_indices; i++)
1290            intrin->const_index[i] = blob_read_uint8(ctx->blob);
1291         break;
1292      case const_indices_16bit:
1293         for (unsigned i = 0; i < num_indices; i++)
1294            intrin->const_index[i] = blob_read_uint16(ctx->blob);
1295         break;
1296      case const_indices_32bit:
1297         for (unsigned i = 0; i < num_indices; i++)
1298            intrin->const_index[i] = blob_read_uint32(ctx->blob);
1299         break;
1300      }
1301   }
1302
1303   return intrin;
1304}
1305
1306static void
1307write_load_const(write_ctx *ctx, const nir_load_const_instr *lc)
1308{
1309   assert(lc->def.num_components >= 1 && lc->def.num_components <= 16);
1310   union packed_instr header;
1311   header.u32 = 0;
1312
1313   header.load_const.instr_type = lc->instr.type;
1314   header.load_const.last_component = lc->def.num_components - 1;
1315   header.load_const.bit_size = encode_bit_size_3bits(lc->def.bit_size);
1316   header.load_const.packing = load_const_full;
1317
1318   /* Try to pack 1-component constants into the 19 free bits in the header. */
1319   if (lc->def.num_components == 1) {
1320      switch (lc->def.bit_size) {
1321      case 64:
1322         if ((lc->value[0].u64 & 0x1fffffffffffull) == 0) {
1323            /* packed_value contains high 19 bits, low bits are 0 */
1324            header.load_const.packing = load_const_scalar_hi_19bits;
1325            header.load_const.packed_value = lc->value[0].u64 >> 45;
1326         } else if (util_mask_sign_extend(lc->value[0].i64, 19) == lc->value[0].i64) {
1327            /* packed_value contains low 19 bits, high bits are sign-extended */
1328            header.load_const.packing = load_const_scalar_lo_19bits_sext;
1329            header.load_const.packed_value = lc->value[0].u64;
1330         }
1331         break;
1332
1333      case 32:
1334         if ((lc->value[0].u32 & 0x1fff) == 0) {
1335            header.load_const.packing = load_const_scalar_hi_19bits;
1336            header.load_const.packed_value = lc->value[0].u32 >> 13;
1337         } else if (util_mask_sign_extend(lc->value[0].i32, 19) == lc->value[0].i32) {
1338            header.load_const.packing = load_const_scalar_lo_19bits_sext;
1339            header.load_const.packed_value = lc->value[0].u32;
1340         }
1341         break;
1342
1343      case 16:
1344         header.load_const.packing = load_const_scalar_lo_19bits_sext;
1345         header.load_const.packed_value = lc->value[0].u16;
1346         break;
1347      case 8:
1348         header.load_const.packing = load_const_scalar_lo_19bits_sext;
1349         header.load_const.packed_value = lc->value[0].u8;
1350         break;
1351      case 1:
1352         header.load_const.packing = load_const_scalar_lo_19bits_sext;
1353         header.load_const.packed_value = lc->value[0].b;
1354         break;
1355      default:
1356         unreachable("invalid bit_size");
1357      }
1358   }
1359
1360   blob_write_uint32(ctx->blob, header.u32);
1361
1362   if (header.load_const.packing == load_const_full) {
1363      switch (lc->def.bit_size) {
1364      case 64:
1365         blob_write_bytes(ctx->blob, lc->value,
1366                          sizeof(*lc->value) * lc->def.num_components);
1367         break;
1368
1369      case 32:
1370         for (unsigned i = 0; i < lc->def.num_components; i++)
1371            blob_write_uint32(ctx->blob, lc->value[i].u32);
1372         break;
1373
1374      case 16:
1375         for (unsigned i = 0; i < lc->def.num_components; i++)
1376            blob_write_uint16(ctx->blob, lc->value[i].u16);
1377         break;
1378
1379      default:
1380         assert(lc->def.bit_size <= 8);
1381         for (unsigned i = 0; i < lc->def.num_components; i++)
1382            blob_write_uint8(ctx->blob, lc->value[i].u8);
1383         break;
1384      }
1385   }
1386
1387   write_add_object(ctx, &lc->def);
1388}
1389
1390static nir_load_const_instr *
1391read_load_const(read_ctx *ctx, union packed_instr header)
1392{
1393   nir_load_const_instr *lc =
1394      nir_load_const_instr_create(ctx->nir, header.load_const.last_component + 1,
1395                                  decode_bit_size_3bits(header.load_const.bit_size));
1396   lc->def.divergent = false;
1397
1398   switch (header.load_const.packing) {
1399   case load_const_scalar_hi_19bits:
1400      switch (lc->def.bit_size) {
1401      case 64:
1402         lc->value[0].u64 = (uint64_t)header.load_const.packed_value << 45;
1403         break;
1404      case 32:
1405         lc->value[0].u32 = (uint64_t)header.load_const.packed_value << 13;
1406         break;
1407      default:
1408         unreachable("invalid bit_size");
1409      }
1410      break;
1411
1412   case load_const_scalar_lo_19bits_sext:
1413      switch (lc->def.bit_size) {
1414      case 64:
1415         lc->value[0].i64 = ((int64_t)header.load_const.packed_value << 45) >> 45;
1416         break;
1417      case 32:
1418         lc->value[0].i32 = ((int32_t)header.load_const.packed_value << 13) >> 13;
1419         break;
1420      case 16:
1421         lc->value[0].u16 = header.load_const.packed_value;
1422         break;
1423      case 8:
1424         lc->value[0].u8 = header.load_const.packed_value;
1425         break;
1426      case 1:
1427         lc->value[0].b = header.load_const.packed_value;
1428         break;
1429      default:
1430         unreachable("invalid bit_size");
1431      }
1432      break;
1433
1434   case load_const_full:
1435      switch (lc->def.bit_size) {
1436      case 64:
1437         blob_copy_bytes(ctx->blob, lc->value, sizeof(*lc->value) * lc->def.num_components);
1438         break;
1439
1440      case 32:
1441         for (unsigned i = 0; i < lc->def.num_components; i++)
1442            lc->value[i].u32 = blob_read_uint32(ctx->blob);
1443         break;
1444
1445      case 16:
1446         for (unsigned i = 0; i < lc->def.num_components; i++)
1447            lc->value[i].u16 = blob_read_uint16(ctx->blob);
1448         break;
1449
1450      default:
1451         assert(lc->def.bit_size <= 8);
1452         for (unsigned i = 0; i < lc->def.num_components; i++)
1453            lc->value[i].u8 = blob_read_uint8(ctx->blob);
1454         break;
1455      }
1456      break;
1457   }
1458
1459   read_add_object(ctx, &lc->def);
1460   return lc;
1461}
1462
1463static void
1464write_ssa_undef(write_ctx *ctx, const nir_ssa_undef_instr *undef)
1465{
1466   assert(undef->def.num_components >= 1 && undef->def.num_components <= 16);
1467
1468   union packed_instr header;
1469   header.u32 = 0;
1470
1471   header.undef.instr_type = undef->instr.type;
1472   header.undef.last_component = undef->def.num_components - 1;
1473   header.undef.bit_size = encode_bit_size_3bits(undef->def.bit_size);
1474
1475   blob_write_uint32(ctx->blob, header.u32);
1476   write_add_object(ctx, &undef->def);
1477}
1478
1479static nir_ssa_undef_instr *
1480read_ssa_undef(read_ctx *ctx, union packed_instr header)
1481{
1482   nir_ssa_undef_instr *undef =
1483      nir_ssa_undef_instr_create(ctx->nir, header.undef.last_component + 1,
1484                                 decode_bit_size_3bits(header.undef.bit_size));
1485
1486   undef->def.divergent = false;
1487
1488   read_add_object(ctx, &undef->def);
1489   return undef;
1490}
1491
1492union packed_tex_data {
1493   uint32_t u32;
1494   struct {
1495      unsigned sampler_dim:4;
1496      unsigned dest_type:8;
1497      unsigned coord_components:3;
1498      unsigned is_array:1;
1499      unsigned is_shadow:1;
1500      unsigned is_new_style_shadow:1;
1501      unsigned is_sparse:1;
1502      unsigned component:2;
1503      unsigned texture_non_uniform:1;
1504      unsigned sampler_non_uniform:1;
1505      unsigned array_is_lowered_cube:1;
1506      unsigned unused:6; /* Mark unused for valgrind. */
1507   } u;
1508};
1509
1510static void
1511write_tex(write_ctx *ctx, const nir_tex_instr *tex)
1512{
1513   assert(tex->num_srcs < 16);
1514   assert(tex->op < 32);
1515
1516   union packed_instr header;
1517   header.u32 = 0;
1518
1519   header.tex.instr_type = tex->instr.type;
1520   header.tex.num_srcs = tex->num_srcs;
1521   header.tex.op = tex->op;
1522
1523   write_dest(ctx, &tex->dest, header, tex->instr.type);
1524
1525   blob_write_uint32(ctx->blob, tex->texture_index);
1526   blob_write_uint32(ctx->blob, tex->sampler_index);
1527   if (tex->op == nir_texop_tg4)
1528      blob_write_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
1529
1530   STATIC_ASSERT(sizeof(union packed_tex_data) == sizeof(uint32_t));
1531   union packed_tex_data packed = {
1532      .u.sampler_dim = tex->sampler_dim,
1533      .u.dest_type = tex->dest_type,
1534      .u.coord_components = tex->coord_components,
1535      .u.is_array = tex->is_array,
1536      .u.is_shadow = tex->is_shadow,
1537      .u.is_new_style_shadow = tex->is_new_style_shadow,
1538      .u.is_sparse = tex->is_sparse,
1539      .u.component = tex->component,
1540      .u.texture_non_uniform = tex->texture_non_uniform,
1541      .u.sampler_non_uniform = tex->sampler_non_uniform,
1542      .u.array_is_lowered_cube = tex->array_is_lowered_cube,
1543   };
1544   blob_write_uint32(ctx->blob, packed.u32);
1545
1546   for (unsigned i = 0; i < tex->num_srcs; i++) {
1547      union packed_src src;
1548      src.u32 = 0;
1549      src.tex.src_type = tex->src[i].src_type;
1550      write_src_full(ctx, &tex->src[i].src, src);
1551   }
1552}
1553
1554static nir_tex_instr *
1555read_tex(read_ctx *ctx, union packed_instr header)
1556{
1557   nir_tex_instr *tex = nir_tex_instr_create(ctx->nir, header.tex.num_srcs);
1558
1559   read_dest(ctx, &tex->dest, &tex->instr, header);
1560
1561   tex->op = header.tex.op;
1562   tex->texture_index = blob_read_uint32(ctx->blob);
1563   tex->sampler_index = blob_read_uint32(ctx->blob);
1564   if (tex->op == nir_texop_tg4)
1565      blob_copy_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
1566
1567   union packed_tex_data packed;
1568   packed.u32 = blob_read_uint32(ctx->blob);
1569   tex->sampler_dim = packed.u.sampler_dim;
1570   tex->dest_type = packed.u.dest_type;
1571   tex->coord_components = packed.u.coord_components;
1572   tex->is_array = packed.u.is_array;
1573   tex->is_shadow = packed.u.is_shadow;
1574   tex->is_new_style_shadow = packed.u.is_new_style_shadow;
1575   tex->is_sparse = packed.u.is_sparse;
1576   tex->component = packed.u.component;
1577   tex->texture_non_uniform = packed.u.texture_non_uniform;
1578   tex->sampler_non_uniform = packed.u.sampler_non_uniform;
1579   tex->array_is_lowered_cube = packed.u.array_is_lowered_cube;
1580
1581   for (unsigned i = 0; i < tex->num_srcs; i++) {
1582      union packed_src src = read_src(ctx, &tex->src[i].src, &tex->instr);
1583      tex->src[i].src_type = src.tex.src_type;
1584   }
1585
1586   return tex;
1587}
1588
1589static void
1590write_phi(write_ctx *ctx, const nir_phi_instr *phi)
1591{
1592   union packed_instr header;
1593   header.u32 = 0;
1594
1595   header.phi.instr_type = phi->instr.type;
1596   header.phi.num_srcs = exec_list_length(&phi->srcs);
1597
1598   /* Phi nodes are special, since they may reference SSA definitions and
1599    * basic blocks that don't exist yet. We leave two empty uint32_t's here,
1600    * and then store enough information so that a later fixup pass can fill
1601    * them in correctly.
1602    */
1603   write_dest(ctx, &phi->dest, header, phi->instr.type);
1604
1605   nir_foreach_phi_src(src, phi) {
1606      assert(src->src.is_ssa);
1607      size_t blob_offset = blob_reserve_uint32(ctx->blob);
1608      ASSERTED size_t blob_offset2 = blob_reserve_uint32(ctx->blob);
1609      assert(blob_offset + sizeof(uint32_t) == blob_offset2);
1610      write_phi_fixup fixup = {
1611         .blob_offset = blob_offset,
1612         .src = src->src.ssa,
1613         .block = src->pred,
1614      };
1615      util_dynarray_append(&ctx->phi_fixups, write_phi_fixup, fixup);
1616   }
1617}
1618
1619static void
1620write_fixup_phis(write_ctx *ctx)
1621{
1622   util_dynarray_foreach(&ctx->phi_fixups, write_phi_fixup, fixup) {
1623      blob_overwrite_uint32(ctx->blob, fixup->blob_offset,
1624                            write_lookup_object(ctx, fixup->src));
1625      blob_overwrite_uint32(ctx->blob, fixup->blob_offset + sizeof(uint32_t),
1626                            write_lookup_object(ctx, fixup->block));
1627   }
1628
1629   util_dynarray_clear(&ctx->phi_fixups);
1630}
1631
1632static nir_phi_instr *
1633read_phi(read_ctx *ctx, nir_block *blk, union packed_instr header)
1634{
1635   nir_phi_instr *phi = nir_phi_instr_create(ctx->nir);
1636
1637   read_dest(ctx, &phi->dest, &phi->instr, header);
1638
1639   /* For similar reasons as before, we just store the index directly into the
1640    * pointer, and let a later pass resolve the phi sources.
1641    *
1642    * In order to ensure that the copied sources (which are just the indices
1643    * from the blob for now) don't get inserted into the old shader's use-def
1644    * lists, we have to add the phi instruction *before* we set up its
1645    * sources.
1646    */
1647   nir_instr_insert_after_block(blk, &phi->instr);
1648
1649   for (unsigned i = 0; i < header.phi.num_srcs; i++) {
1650      nir_ssa_def *def = (nir_ssa_def *)(uintptr_t) blob_read_uint32(ctx->blob);
1651      nir_block *pred = (nir_block *)(uintptr_t) blob_read_uint32(ctx->blob);
1652      nir_phi_src *src = nir_phi_instr_add_src(phi, pred, nir_src_for_ssa(def));
1653
1654      /* Since we're not letting nir_insert_instr handle use/def stuff for us,
1655       * we have to set the parent_instr manually.  It doesn't really matter
1656       * when we do it, so we might as well do it here.
1657       */
1658      src->src.parent_instr = &phi->instr;
1659
1660      /* Stash it in the list of phi sources.  We'll walk this list and fix up
1661       * sources at the very end of read_function_impl.
1662       */
1663      list_add(&src->src.use_link, &ctx->phi_srcs);
1664   }
1665
1666   return phi;
1667}
1668
1669static void
1670read_fixup_phis(read_ctx *ctx)
1671{
1672   list_for_each_entry_safe(nir_phi_src, src, &ctx->phi_srcs, src.use_link) {
1673      src->pred = read_lookup_object(ctx, (uintptr_t)src->pred);
1674      src->src.ssa = read_lookup_object(ctx, (uintptr_t)src->src.ssa);
1675
1676      /* Remove from this list */
1677      list_del(&src->src.use_link);
1678
1679      list_addtail(&src->src.use_link, &src->src.ssa->uses);
1680   }
1681   assert(list_is_empty(&ctx->phi_srcs));
1682}
1683
1684static void
1685write_jump(write_ctx *ctx, const nir_jump_instr *jmp)
1686{
1687   /* These aren't handled because they require special block linking */
1688   assert(jmp->type != nir_jump_goto && jmp->type != nir_jump_goto_if);
1689
1690   assert(jmp->type < 4);
1691
1692   union packed_instr header;
1693   header.u32 = 0;
1694
1695   header.jump.instr_type = jmp->instr.type;
1696   header.jump.type = jmp->type;
1697
1698   blob_write_uint32(ctx->blob, header.u32);
1699}
1700
1701static nir_jump_instr *
1702read_jump(read_ctx *ctx, union packed_instr header)
1703{
1704   /* These aren't handled because they require special block linking */
1705   assert(header.jump.type != nir_jump_goto &&
1706          header.jump.type != nir_jump_goto_if);
1707
1708   nir_jump_instr *jmp = nir_jump_instr_create(ctx->nir, header.jump.type);
1709   return jmp;
1710}
1711
1712static void
1713write_call(write_ctx *ctx, const nir_call_instr *call)
1714{
1715   blob_write_uint32(ctx->blob, write_lookup_object(ctx, call->callee));
1716
1717   for (unsigned i = 0; i < call->num_params; i++)
1718      write_src(ctx, &call->params[i]);
1719}
1720
1721static nir_call_instr *
1722read_call(read_ctx *ctx)
1723{
1724   nir_function *callee = read_object(ctx);
1725   nir_call_instr *call = nir_call_instr_create(ctx->nir, callee);
1726
1727   for (unsigned i = 0; i < call->num_params; i++)
1728      read_src(ctx, &call->params[i], call);
1729
1730   return call;
1731}
1732
1733static void
1734write_instr(write_ctx *ctx, const nir_instr *instr)
1735{
1736   /* We have only 4 bits for the instruction type. */
1737   assert(instr->type < 16);
1738
1739   switch (instr->type) {
1740   case nir_instr_type_alu:
1741      write_alu(ctx, nir_instr_as_alu(instr));
1742      break;
1743   case nir_instr_type_deref:
1744      write_deref(ctx, nir_instr_as_deref(instr));
1745      break;
1746   case nir_instr_type_intrinsic:
1747      write_intrinsic(ctx, nir_instr_as_intrinsic(instr));
1748      break;
1749   case nir_instr_type_load_const:
1750      write_load_const(ctx, nir_instr_as_load_const(instr));
1751      break;
1752   case nir_instr_type_ssa_undef:
1753      write_ssa_undef(ctx, nir_instr_as_ssa_undef(instr));
1754      break;
1755   case nir_instr_type_tex:
1756      write_tex(ctx, nir_instr_as_tex(instr));
1757      break;
1758   case nir_instr_type_phi:
1759      write_phi(ctx, nir_instr_as_phi(instr));
1760      break;
1761   case nir_instr_type_jump:
1762      write_jump(ctx, nir_instr_as_jump(instr));
1763      break;
1764   case nir_instr_type_call:
1765      blob_write_uint32(ctx->blob, instr->type);
1766      write_call(ctx, nir_instr_as_call(instr));
1767      break;
1768   case nir_instr_type_parallel_copy:
1769      unreachable("Cannot write parallel copies");
1770   default:
1771      unreachable("bad instr type");
1772   }
1773}
1774
1775/* Return the number of instructions read. */
1776static unsigned
1777read_instr(read_ctx *ctx, nir_block *block)
1778{
1779   STATIC_ASSERT(sizeof(union packed_instr) == 4);
1780   union packed_instr header;
1781   header.u32 = blob_read_uint32(ctx->blob);
1782   nir_instr *instr;
1783
1784   switch (header.any.instr_type) {
1785   case nir_instr_type_alu:
1786      for (unsigned i = 0; i <= header.alu.num_followup_alu_sharing_header; i++)
1787         nir_instr_insert_after_block(block, &read_alu(ctx, header)->instr);
1788      return header.alu.num_followup_alu_sharing_header + 1;
1789   case nir_instr_type_deref:
1790      instr = &read_deref(ctx, header)->instr;
1791      break;
1792   case nir_instr_type_intrinsic:
1793      instr = &read_intrinsic(ctx, header)->instr;
1794      break;
1795   case nir_instr_type_load_const:
1796      instr = &read_load_const(ctx, header)->instr;
1797      break;
1798   case nir_instr_type_ssa_undef:
1799      instr = &read_ssa_undef(ctx, header)->instr;
1800      break;
1801   case nir_instr_type_tex:
1802      instr = &read_tex(ctx, header)->instr;
1803      break;
1804   case nir_instr_type_phi:
1805      /* Phi instructions are a bit of a special case when reading because we
1806       * don't want inserting the instruction to automatically handle use/defs
1807       * for us.  Instead, we need to wait until all the blocks/instructions
1808       * are read so that we can set their sources up.
1809       */
1810      read_phi(ctx, block, header);
1811      return 1;
1812   case nir_instr_type_jump:
1813      instr = &read_jump(ctx, header)->instr;
1814      break;
1815   case nir_instr_type_call:
1816      instr = &read_call(ctx)->instr;
1817      break;
1818   case nir_instr_type_parallel_copy:
1819      unreachable("Cannot read parallel copies");
1820   default:
1821      unreachable("bad instr type");
1822   }
1823
1824   nir_instr_insert_after_block(block, instr);
1825   return 1;
1826}
1827
1828static void
1829write_block(write_ctx *ctx, const nir_block *block)
1830{
1831   write_add_object(ctx, block);
1832   blob_write_uint32(ctx->blob, exec_list_length(&block->instr_list));
1833
1834   ctx->last_instr_type = ~0;
1835   ctx->last_alu_header_offset = 0;
1836
1837   nir_foreach_instr(instr, block) {
1838      write_instr(ctx, instr);
1839      ctx->last_instr_type = instr->type;
1840   }
1841}
1842
1843static void
1844read_block(read_ctx *ctx, struct exec_list *cf_list)
1845{
1846   /* Don't actually create a new block.  Just use the one from the tail of
1847    * the list.  NIR guarantees that the tail of the list is a block and that
1848    * no two blocks are side-by-side in the IR;  It should be empty.
1849    */
1850   nir_block *block =
1851      exec_node_data(nir_block, exec_list_get_tail(cf_list), cf_node.node);
1852
1853   read_add_object(ctx, block);
1854   unsigned num_instrs = blob_read_uint32(ctx->blob);
1855   for (unsigned i = 0; i < num_instrs;) {
1856      i += read_instr(ctx, block);
1857   }
1858}
1859
1860static void
1861write_cf_list(write_ctx *ctx, const struct exec_list *cf_list);
1862
1863static void
1864read_cf_list(read_ctx *ctx, struct exec_list *cf_list);
1865
1866static void
1867write_if(write_ctx *ctx, nir_if *nif)
1868{
1869   write_src(ctx, &nif->condition);
1870   blob_write_uint8(ctx->blob, nif->control);
1871
1872   write_cf_list(ctx, &nif->then_list);
1873   write_cf_list(ctx, &nif->else_list);
1874}
1875
1876static void
1877read_if(read_ctx *ctx, struct exec_list *cf_list)
1878{
1879   nir_if *nif = nir_if_create(ctx->nir);
1880
1881   read_src(ctx, &nif->condition, nif);
1882   nif->control = blob_read_uint8(ctx->blob);
1883
1884   nir_cf_node_insert_end(cf_list, &nif->cf_node);
1885
1886   read_cf_list(ctx, &nif->then_list);
1887   read_cf_list(ctx, &nif->else_list);
1888}
1889
1890static void
1891write_loop(write_ctx *ctx, nir_loop *loop)
1892{
1893   blob_write_uint8(ctx->blob, loop->control);
1894   blob_write_uint8(ctx->blob, loop->divergent);
1895   write_cf_list(ctx, &loop->body);
1896}
1897
1898static void
1899read_loop(read_ctx *ctx, struct exec_list *cf_list)
1900{
1901   nir_loop *loop = nir_loop_create(ctx->nir);
1902
1903   nir_cf_node_insert_end(cf_list, &loop->cf_node);
1904
1905   loop->control = blob_read_uint8(ctx->blob);
1906   loop->divergent = blob_read_uint8(ctx->blob);
1907   read_cf_list(ctx, &loop->body);
1908}
1909
1910static void
1911write_cf_node(write_ctx *ctx, nir_cf_node *cf)
1912{
1913   blob_write_uint32(ctx->blob, cf->type);
1914
1915   switch (cf->type) {
1916   case nir_cf_node_block:
1917      write_block(ctx, nir_cf_node_as_block(cf));
1918      break;
1919   case nir_cf_node_if:
1920      write_if(ctx, nir_cf_node_as_if(cf));
1921      break;
1922   case nir_cf_node_loop:
1923      write_loop(ctx, nir_cf_node_as_loop(cf));
1924      break;
1925   default:
1926      unreachable("bad cf type");
1927   }
1928}
1929
1930static void
1931read_cf_node(read_ctx *ctx, struct exec_list *list)
1932{
1933   nir_cf_node_type type = blob_read_uint32(ctx->blob);
1934
1935   switch (type) {
1936   case nir_cf_node_block:
1937      read_block(ctx, list);
1938      break;
1939   case nir_cf_node_if:
1940      read_if(ctx, list);
1941      break;
1942   case nir_cf_node_loop:
1943      read_loop(ctx, list);
1944      break;
1945   default:
1946      unreachable("bad cf type");
1947   }
1948}
1949
1950static void
1951write_cf_list(write_ctx *ctx, const struct exec_list *cf_list)
1952{
1953   blob_write_uint32(ctx->blob, exec_list_length(cf_list));
1954   foreach_list_typed(nir_cf_node, cf, node, cf_list) {
1955      write_cf_node(ctx, cf);
1956   }
1957}
1958
1959static void
1960read_cf_list(read_ctx *ctx, struct exec_list *cf_list)
1961{
1962   uint32_t num_cf_nodes = blob_read_uint32(ctx->blob);
1963   for (unsigned i = 0; i < num_cf_nodes; i++)
1964      read_cf_node(ctx, cf_list);
1965}
1966
1967static void
1968write_function_impl(write_ctx *ctx, const nir_function_impl *fi)
1969{
1970   blob_write_uint8(ctx->blob, fi->structured);
1971   blob_write_uint8(ctx->blob, !!fi->preamble);
1972
1973   if (fi->preamble)
1974      blob_write_uint32(ctx->blob, write_lookup_object(ctx, fi->preamble));
1975
1976   write_var_list(ctx, &fi->locals);
1977   write_reg_list(ctx, &fi->registers);
1978   blob_write_uint32(ctx->blob, fi->reg_alloc);
1979
1980   write_cf_list(ctx, &fi->body);
1981   write_fixup_phis(ctx);
1982}
1983
1984static nir_function_impl *
1985read_function_impl(read_ctx *ctx, nir_function *fxn)
1986{
1987   nir_function_impl *fi = nir_function_impl_create_bare(ctx->nir);
1988   fi->function = fxn;
1989
1990   fi->structured = blob_read_uint8(ctx->blob);
1991   bool preamble = blob_read_uint8(ctx->blob);
1992
1993   if (preamble)
1994      fi->preamble = read_object(ctx);
1995
1996   read_var_list(ctx, &fi->locals);
1997   read_reg_list(ctx, &fi->registers);
1998   fi->reg_alloc = blob_read_uint32(ctx->blob);
1999
2000   read_cf_list(ctx, &fi->body);
2001   read_fixup_phis(ctx);
2002
2003   fi->valid_metadata = 0;
2004
2005   return fi;
2006}
2007
2008static void
2009write_function(write_ctx *ctx, const nir_function *fxn)
2010{
2011   uint32_t flags = 0;
2012   if (fxn->is_entrypoint)
2013      flags |= 0x1;
2014   if (fxn->is_preamble)
2015      flags |= 0x2;
2016   if (fxn->name)
2017      flags |= 0x4;
2018   if (fxn->impl)
2019      flags |= 0x8;
2020   blob_write_uint32(ctx->blob, flags);
2021   if (fxn->name)
2022      blob_write_string(ctx->blob, fxn->name);
2023
2024   write_add_object(ctx, fxn);
2025
2026   blob_write_uint32(ctx->blob, fxn->num_params);
2027   for (unsigned i = 0; i < fxn->num_params; i++) {
2028      uint32_t val =
2029         ((uint32_t)fxn->params[i].num_components) |
2030         ((uint32_t)fxn->params[i].bit_size) << 8;
2031      blob_write_uint32(ctx->blob, val);
2032   }
2033
2034   /* At first glance, it looks like we should write the function_impl here.
2035    * However, call instructions need to be able to reference at least the
2036    * function and those will get processed as we write the function_impls.
2037    * We stop here and write function_impls as a second pass.
2038    */
2039}
2040
2041static void
2042read_function(read_ctx *ctx)
2043{
2044   uint32_t flags = blob_read_uint32(ctx->blob);
2045   bool has_name = flags & 0x4;
2046   char *name = has_name ? blob_read_string(ctx->blob) : NULL;
2047
2048   nir_function *fxn = nir_function_create(ctx->nir, name);
2049
2050   read_add_object(ctx, fxn);
2051
2052   fxn->num_params = blob_read_uint32(ctx->blob);
2053   fxn->params = ralloc_array(fxn, nir_parameter, fxn->num_params);
2054   for (unsigned i = 0; i < fxn->num_params; i++) {
2055      uint32_t val = blob_read_uint32(ctx->blob);
2056      fxn->params[i].num_components = val & 0xff;
2057      fxn->params[i].bit_size = (val >> 8) & 0xff;
2058   }
2059
2060   fxn->is_entrypoint = flags & 0x1;
2061   fxn->is_preamble = flags & 0x2;
2062   if (flags & 0x8)
2063      fxn->impl = NIR_SERIALIZE_FUNC_HAS_IMPL;
2064}
2065
2066static void
2067write_xfb_info(write_ctx *ctx, const nir_xfb_info *xfb)
2068{
2069   if (xfb == NULL) {
2070      blob_write_uint32(ctx->blob, 0);
2071   } else {
2072      size_t size = nir_xfb_info_size(xfb->output_count);
2073      assert(size <= UINT32_MAX);
2074      blob_write_uint32(ctx->blob, size);
2075      blob_write_bytes(ctx->blob, xfb, size);
2076   }
2077}
2078
2079static nir_xfb_info *
2080read_xfb_info(read_ctx *ctx)
2081{
2082   uint32_t size = blob_read_uint32(ctx->blob);
2083   if (size == 0)
2084      return NULL;
2085
2086   struct nir_xfb_info *xfb = ralloc_size(ctx->nir, size);
2087   blob_copy_bytes(ctx->blob, (void *)xfb, size);
2088
2089   return xfb;
2090}
2091
2092/**
2093 * Serialize NIR into a binary blob.
2094 *
2095 * \param strip  Don't serialize information only useful for debugging,
2096 *               such as variable names, making cache hits from similar
2097 *               shaders more likely.
2098 */
2099void
2100nir_serialize(struct blob *blob, const nir_shader *nir, bool strip)
2101{
2102   write_ctx ctx = {0};
2103   ctx.remap_table = _mesa_pointer_hash_table_create(NULL);
2104   ctx.blob = blob;
2105   ctx.nir = nir;
2106   ctx.strip = strip;
2107   util_dynarray_init(&ctx.phi_fixups, NULL);
2108
2109   size_t idx_size_offset = blob_reserve_uint32(blob);
2110
2111   struct shader_info info = nir->info;
2112   uint32_t strings = 0;
2113   if (!strip && info.name)
2114      strings |= 0x1;
2115   if (!strip && info.label)
2116      strings |= 0x2;
2117   blob_write_uint32(blob, strings);
2118   if (!strip && info.name)
2119      blob_write_string(blob, info.name);
2120   if (!strip && info.label)
2121      blob_write_string(blob, info.label);
2122   info.name = info.label = NULL;
2123   blob_write_bytes(blob, (uint8_t *) &info, sizeof(info));
2124
2125   write_var_list(&ctx, &nir->variables);
2126
2127   blob_write_uint32(blob, nir->num_inputs);
2128   blob_write_uint32(blob, nir->num_uniforms);
2129   blob_write_uint32(blob, nir->num_outputs);
2130   blob_write_uint32(blob, nir->scratch_size);
2131
2132   blob_write_uint32(blob, exec_list_length(&nir->functions));
2133   nir_foreach_function(fxn, nir) {
2134      write_function(&ctx, fxn);
2135   }
2136
2137   nir_foreach_function(fxn, nir) {
2138      if (fxn->impl)
2139         write_function_impl(&ctx, fxn->impl);
2140   }
2141
2142   blob_write_uint32(blob, nir->constant_data_size);
2143   if (nir->constant_data_size > 0)
2144      blob_write_bytes(blob, nir->constant_data, nir->constant_data_size);
2145
2146   write_xfb_info(&ctx, nir->xfb_info);
2147
2148   blob_overwrite_uint32(blob, idx_size_offset, ctx.next_idx);
2149
2150   _mesa_hash_table_destroy(ctx.remap_table, NULL);
2151   util_dynarray_fini(&ctx.phi_fixups);
2152}
2153
2154nir_shader *
2155nir_deserialize(void *mem_ctx,
2156                const struct nir_shader_compiler_options *options,
2157                struct blob_reader *blob)
2158{
2159   read_ctx ctx = {0};
2160   ctx.blob = blob;
2161   list_inithead(&ctx.phi_srcs);
2162   ctx.idx_table_len = blob_read_uint32(blob);
2163   ctx.idx_table = calloc(ctx.idx_table_len, sizeof(uintptr_t));
2164
2165   uint32_t strings = blob_read_uint32(blob);
2166   char *name = (strings & 0x1) ? blob_read_string(blob) : NULL;
2167   char *label = (strings & 0x2) ? blob_read_string(blob) : NULL;
2168
2169   struct shader_info info;
2170   blob_copy_bytes(blob, (uint8_t *) &info, sizeof(info));
2171
2172   ctx.nir = nir_shader_create(mem_ctx, info.stage, options, NULL);
2173
2174   info.name = name ? ralloc_strdup(ctx.nir, name) : NULL;
2175   info.label = label ? ralloc_strdup(ctx.nir, label) : NULL;
2176
2177   ctx.nir->info = info;
2178
2179   read_var_list(&ctx, &ctx.nir->variables);
2180
2181   ctx.nir->num_inputs = blob_read_uint32(blob);
2182   ctx.nir->num_uniforms = blob_read_uint32(blob);
2183   ctx.nir->num_outputs = blob_read_uint32(blob);
2184   ctx.nir->scratch_size = blob_read_uint32(blob);
2185
2186   unsigned num_functions = blob_read_uint32(blob);
2187   for (unsigned i = 0; i < num_functions; i++)
2188      read_function(&ctx);
2189
2190   nir_foreach_function(fxn, ctx.nir) {
2191      if (fxn->impl == NIR_SERIALIZE_FUNC_HAS_IMPL)
2192         fxn->impl = read_function_impl(&ctx, fxn);
2193   }
2194
2195   ctx.nir->constant_data_size = blob_read_uint32(blob);
2196   if (ctx.nir->constant_data_size > 0) {
2197      ctx.nir->constant_data =
2198         ralloc_size(ctx.nir, ctx.nir->constant_data_size);
2199      blob_copy_bytes(blob, ctx.nir->constant_data,
2200                      ctx.nir->constant_data_size);
2201   }
2202
2203   ctx.nir->xfb_info = read_xfb_info(&ctx);
2204
2205   free(ctx.idx_table);
2206
2207   nir_validate_shader(ctx.nir, "after deserialize");
2208
2209   return ctx.nir;
2210}
2211
2212void
2213nir_shader_serialize_deserialize(nir_shader *shader)
2214{
2215   const struct nir_shader_compiler_options *options = shader->options;
2216
2217   struct blob writer;
2218   blob_init(&writer);
2219   nir_serialize(&writer, shader, false);
2220
2221   /* Delete all of dest's ralloc children but leave dest alone */
2222   void *dead_ctx = ralloc_context(NULL);
2223   ralloc_adopt(dead_ctx, shader);
2224   ralloc_free(dead_ctx);
2225
2226   dead_ctx = ralloc_context(NULL);
2227
2228   struct blob_reader reader;
2229   blob_reader_init(&reader, writer.data, writer.size);
2230   nir_shader *copy = nir_deserialize(dead_ctx, options, &reader);
2231
2232   blob_finish(&writer);
2233
2234   nir_shader_replace(shader, copy);
2235   ralloc_free(dead_ctx);
2236}
2237