1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © 2017 Connor Abbott
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21bf215546Sopenharmony_ci * IN THE SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ci#include "nir_serialize.h"
25bf215546Sopenharmony_ci#include "nir_control_flow.h"
26bf215546Sopenharmony_ci#include "nir_xfb_info.h"
27bf215546Sopenharmony_ci#include "util/u_dynarray.h"
28bf215546Sopenharmony_ci#include "util/u_math.h"
29bf215546Sopenharmony_ci
30bf215546Sopenharmony_ci#define NIR_SERIALIZE_FUNC_HAS_IMPL ((void *)(intptr_t)1)
31bf215546Sopenharmony_ci#define MAX_OBJECT_IDS (1 << 20)
32bf215546Sopenharmony_ci
33bf215546Sopenharmony_citypedef struct {
34bf215546Sopenharmony_ci   size_t blob_offset;
35bf215546Sopenharmony_ci   nir_ssa_def *src;
36bf215546Sopenharmony_ci   nir_block *block;
37bf215546Sopenharmony_ci} write_phi_fixup;
38bf215546Sopenharmony_ci
39bf215546Sopenharmony_citypedef struct {
40bf215546Sopenharmony_ci   const nir_shader *nir;
41bf215546Sopenharmony_ci
42bf215546Sopenharmony_ci   struct blob *blob;
43bf215546Sopenharmony_ci
44bf215546Sopenharmony_ci   /* maps pointer to index */
45bf215546Sopenharmony_ci   struct hash_table *remap_table;
46bf215546Sopenharmony_ci
47bf215546Sopenharmony_ci   /* the next index to assign to a NIR in-memory object */
48bf215546Sopenharmony_ci   uint32_t next_idx;
49bf215546Sopenharmony_ci
50bf215546Sopenharmony_ci   /* Array of write_phi_fixup structs representing phi sources that need to
51bf215546Sopenharmony_ci    * be resolved in the second pass.
52bf215546Sopenharmony_ci    */
53bf215546Sopenharmony_ci   struct util_dynarray phi_fixups;
54bf215546Sopenharmony_ci
55bf215546Sopenharmony_ci   /* The last serialized type. */
56bf215546Sopenharmony_ci   const struct glsl_type *last_type;
57bf215546Sopenharmony_ci   const struct glsl_type *last_interface_type;
58bf215546Sopenharmony_ci   struct nir_variable_data last_var_data;
59bf215546Sopenharmony_ci
60bf215546Sopenharmony_ci   /* For skipping equal ALU headers (typical after scalarization). */
61bf215546Sopenharmony_ci   nir_instr_type last_instr_type;
62bf215546Sopenharmony_ci   uintptr_t last_alu_header_offset;
63bf215546Sopenharmony_ci   uint32_t last_alu_header;
64bf215546Sopenharmony_ci
65bf215546Sopenharmony_ci   /* Don't write optional data such as variable names. */
66bf215546Sopenharmony_ci   bool strip;
67bf215546Sopenharmony_ci} write_ctx;
68bf215546Sopenharmony_ci
69bf215546Sopenharmony_citypedef struct {
70bf215546Sopenharmony_ci   nir_shader *nir;
71bf215546Sopenharmony_ci
72bf215546Sopenharmony_ci   struct blob_reader *blob;
73bf215546Sopenharmony_ci
74bf215546Sopenharmony_ci   /* the next index to assign to a NIR in-memory object */
75bf215546Sopenharmony_ci   uint32_t next_idx;
76bf215546Sopenharmony_ci
77bf215546Sopenharmony_ci   /* The length of the index -> object table */
78bf215546Sopenharmony_ci   uint32_t idx_table_len;
79bf215546Sopenharmony_ci
80bf215546Sopenharmony_ci   /* map from index to deserialized pointer */
81bf215546Sopenharmony_ci   void **idx_table;
82bf215546Sopenharmony_ci
83bf215546Sopenharmony_ci   /* List of phi sources. */
84bf215546Sopenharmony_ci   struct list_head phi_srcs;
85bf215546Sopenharmony_ci
86bf215546Sopenharmony_ci   /* The last deserialized type. */
87bf215546Sopenharmony_ci   const struct glsl_type *last_type;
88bf215546Sopenharmony_ci   const struct glsl_type *last_interface_type;
89bf215546Sopenharmony_ci   struct nir_variable_data last_var_data;
90bf215546Sopenharmony_ci} read_ctx;
91bf215546Sopenharmony_ci
92bf215546Sopenharmony_cistatic void
93bf215546Sopenharmony_ciwrite_add_object(write_ctx *ctx, const void *obj)
94bf215546Sopenharmony_ci{
95bf215546Sopenharmony_ci   uint32_t index = ctx->next_idx++;
96bf215546Sopenharmony_ci   assert(index != MAX_OBJECT_IDS);
97bf215546Sopenharmony_ci   _mesa_hash_table_insert(ctx->remap_table, obj, (void *)(uintptr_t) index);
98bf215546Sopenharmony_ci}
99bf215546Sopenharmony_ci
100bf215546Sopenharmony_cistatic uint32_t
101bf215546Sopenharmony_ciwrite_lookup_object(write_ctx *ctx, const void *obj)
102bf215546Sopenharmony_ci{
103bf215546Sopenharmony_ci   struct hash_entry *entry = _mesa_hash_table_search(ctx->remap_table, obj);
104bf215546Sopenharmony_ci   assert(entry);
105bf215546Sopenharmony_ci   return (uint32_t)(uintptr_t) entry->data;
106bf215546Sopenharmony_ci}
107bf215546Sopenharmony_ci
108bf215546Sopenharmony_cistatic void
109bf215546Sopenharmony_ciread_add_object(read_ctx *ctx, void *obj)
110bf215546Sopenharmony_ci{
111bf215546Sopenharmony_ci   assert(ctx->next_idx < ctx->idx_table_len);
112bf215546Sopenharmony_ci   ctx->idx_table[ctx->next_idx++] = obj;
113bf215546Sopenharmony_ci}
114bf215546Sopenharmony_ci
115bf215546Sopenharmony_cistatic void *
116bf215546Sopenharmony_ciread_lookup_object(read_ctx *ctx, uint32_t idx)
117bf215546Sopenharmony_ci{
118bf215546Sopenharmony_ci   assert(idx < ctx->idx_table_len);
119bf215546Sopenharmony_ci   return ctx->idx_table[idx];
120bf215546Sopenharmony_ci}
121bf215546Sopenharmony_ci
122bf215546Sopenharmony_cistatic void *
123bf215546Sopenharmony_ciread_object(read_ctx *ctx)
124bf215546Sopenharmony_ci{
125bf215546Sopenharmony_ci   return read_lookup_object(ctx, blob_read_uint32(ctx->blob));
126bf215546Sopenharmony_ci}
127bf215546Sopenharmony_ci
128bf215546Sopenharmony_cistatic uint32_t
129bf215546Sopenharmony_ciencode_bit_size_3bits(uint8_t bit_size)
130bf215546Sopenharmony_ci{
131bf215546Sopenharmony_ci   /* Encode values of 0, 1, 2, 4, 8, 16, 32, 64 in 3 bits. */
132bf215546Sopenharmony_ci   assert(bit_size <= 64 && util_is_power_of_two_or_zero(bit_size));
133bf215546Sopenharmony_ci   if (bit_size)
134bf215546Sopenharmony_ci      return util_logbase2(bit_size) + 1;
135bf215546Sopenharmony_ci   return 0;
136bf215546Sopenharmony_ci}
137bf215546Sopenharmony_ci
138bf215546Sopenharmony_cistatic uint8_t
139bf215546Sopenharmony_cidecode_bit_size_3bits(uint8_t bit_size)
140bf215546Sopenharmony_ci{
141bf215546Sopenharmony_ci   if (bit_size)
142bf215546Sopenharmony_ci      return 1 << (bit_size - 1);
143bf215546Sopenharmony_ci   return 0;
144bf215546Sopenharmony_ci}
145bf215546Sopenharmony_ci
146bf215546Sopenharmony_ci#define NUM_COMPONENTS_IS_SEPARATE_7   7
147bf215546Sopenharmony_ci
148bf215546Sopenharmony_cistatic uint8_t
149bf215546Sopenharmony_ciencode_num_components_in_3bits(uint8_t num_components)
150bf215546Sopenharmony_ci{
151bf215546Sopenharmony_ci   if (num_components <= 4)
152bf215546Sopenharmony_ci      return num_components;
153bf215546Sopenharmony_ci   if (num_components == 8)
154bf215546Sopenharmony_ci      return 5;
155bf215546Sopenharmony_ci   if (num_components == 16)
156bf215546Sopenharmony_ci      return 6;
157bf215546Sopenharmony_ci
158bf215546Sopenharmony_ci   /* special value indicating that num_components is in the next uint32 */
159bf215546Sopenharmony_ci   return NUM_COMPONENTS_IS_SEPARATE_7;
160bf215546Sopenharmony_ci}
161bf215546Sopenharmony_ci
162bf215546Sopenharmony_cistatic uint8_t
163bf215546Sopenharmony_cidecode_num_components_in_3bits(uint8_t value)
164bf215546Sopenharmony_ci{
165bf215546Sopenharmony_ci   if (value <= 4)
166bf215546Sopenharmony_ci      return value;
167bf215546Sopenharmony_ci   if (value == 5)
168bf215546Sopenharmony_ci      return 8;
169bf215546Sopenharmony_ci   if (value == 6)
170bf215546Sopenharmony_ci      return 16;
171bf215546Sopenharmony_ci
172bf215546Sopenharmony_ci   unreachable("invalid num_components encoding");
173bf215546Sopenharmony_ci   return 0;
174bf215546Sopenharmony_ci}
175bf215546Sopenharmony_ci
176bf215546Sopenharmony_cistatic void
177bf215546Sopenharmony_ciwrite_constant(write_ctx *ctx, const nir_constant *c)
178bf215546Sopenharmony_ci{
179bf215546Sopenharmony_ci   blob_write_bytes(ctx->blob, c->values, sizeof(c->values));
180bf215546Sopenharmony_ci   blob_write_uint32(ctx->blob, c->num_elements);
181bf215546Sopenharmony_ci   for (unsigned i = 0; i < c->num_elements; i++)
182bf215546Sopenharmony_ci      write_constant(ctx, c->elements[i]);
183bf215546Sopenharmony_ci}
184bf215546Sopenharmony_ci
185bf215546Sopenharmony_cistatic nir_constant *
186bf215546Sopenharmony_ciread_constant(read_ctx *ctx, nir_variable *nvar)
187bf215546Sopenharmony_ci{
188bf215546Sopenharmony_ci   nir_constant *c = ralloc(nvar, nir_constant);
189bf215546Sopenharmony_ci
190bf215546Sopenharmony_ci   blob_copy_bytes(ctx->blob, (uint8_t *)c->values, sizeof(c->values));
191bf215546Sopenharmony_ci   c->num_elements = blob_read_uint32(ctx->blob);
192bf215546Sopenharmony_ci   c->elements = ralloc_array(nvar, nir_constant *, c->num_elements);
193bf215546Sopenharmony_ci   for (unsigned i = 0; i < c->num_elements; i++)
194bf215546Sopenharmony_ci      c->elements[i] = read_constant(ctx, nvar);
195bf215546Sopenharmony_ci
196bf215546Sopenharmony_ci   return c;
197bf215546Sopenharmony_ci}
198bf215546Sopenharmony_ci
199bf215546Sopenharmony_cienum var_data_encoding {
200bf215546Sopenharmony_ci   var_encode_full,
201bf215546Sopenharmony_ci   var_encode_shader_temp,
202bf215546Sopenharmony_ci   var_encode_function_temp,
203bf215546Sopenharmony_ci   var_encode_location_diff,
204bf215546Sopenharmony_ci};
205bf215546Sopenharmony_ci
206bf215546Sopenharmony_ciunion packed_var {
207bf215546Sopenharmony_ci   uint32_t u32;
208bf215546Sopenharmony_ci   struct {
209bf215546Sopenharmony_ci      unsigned has_name:1;
210bf215546Sopenharmony_ci      unsigned has_constant_initializer:1;
211bf215546Sopenharmony_ci      unsigned has_pointer_initializer:1;
212bf215546Sopenharmony_ci      unsigned has_interface_type:1;
213bf215546Sopenharmony_ci      unsigned num_state_slots:7;
214bf215546Sopenharmony_ci      unsigned data_encoding:2;
215bf215546Sopenharmony_ci      unsigned type_same_as_last:1;
216bf215546Sopenharmony_ci      unsigned interface_type_same_as_last:1;
217bf215546Sopenharmony_ci      unsigned ray_query:1;
218bf215546Sopenharmony_ci      unsigned num_members:16;
219bf215546Sopenharmony_ci   } u;
220bf215546Sopenharmony_ci};
221bf215546Sopenharmony_ci
222bf215546Sopenharmony_ciunion packed_var_data_diff {
223bf215546Sopenharmony_ci   uint32_t u32;
224bf215546Sopenharmony_ci   struct {
225bf215546Sopenharmony_ci      int location:13;
226bf215546Sopenharmony_ci      int location_frac:3;
227bf215546Sopenharmony_ci      int driver_location:16;
228bf215546Sopenharmony_ci   } u;
229bf215546Sopenharmony_ci};
230bf215546Sopenharmony_ci
231bf215546Sopenharmony_cistatic void
232bf215546Sopenharmony_ciwrite_variable(write_ctx *ctx, const nir_variable *var)
233bf215546Sopenharmony_ci{
234bf215546Sopenharmony_ci   write_add_object(ctx, var);
235bf215546Sopenharmony_ci
236bf215546Sopenharmony_ci   assert(var->num_state_slots < (1 << 7));
237bf215546Sopenharmony_ci
238bf215546Sopenharmony_ci   STATIC_ASSERT(sizeof(union packed_var) == 4);
239bf215546Sopenharmony_ci   union packed_var flags;
240bf215546Sopenharmony_ci   flags.u32 = 0;
241bf215546Sopenharmony_ci
242bf215546Sopenharmony_ci   flags.u.has_name = !ctx->strip && var->name;
243bf215546Sopenharmony_ci   flags.u.has_constant_initializer = !!(var->constant_initializer);
244bf215546Sopenharmony_ci   flags.u.has_pointer_initializer = !!(var->pointer_initializer);
245bf215546Sopenharmony_ci   flags.u.has_interface_type = !!(var->interface_type);
246bf215546Sopenharmony_ci   flags.u.type_same_as_last = var->type == ctx->last_type;
247bf215546Sopenharmony_ci   flags.u.interface_type_same_as_last =
248bf215546Sopenharmony_ci      var->interface_type && var->interface_type == ctx->last_interface_type;
249bf215546Sopenharmony_ci   flags.u.num_state_slots = var->num_state_slots;
250bf215546Sopenharmony_ci   flags.u.num_members = var->num_members;
251bf215546Sopenharmony_ci
252bf215546Sopenharmony_ci   struct nir_variable_data data = var->data;
253bf215546Sopenharmony_ci
254bf215546Sopenharmony_ci   /* When stripping, we expect that the location is no longer needed,
255bf215546Sopenharmony_ci    * which is typically after shaders are linked.
256bf215546Sopenharmony_ci    */
257bf215546Sopenharmony_ci   if (ctx->strip &&
258bf215546Sopenharmony_ci       data.mode != nir_var_system_value &&
259bf215546Sopenharmony_ci       data.mode != nir_var_shader_in &&
260bf215546Sopenharmony_ci       data.mode != nir_var_shader_out)
261bf215546Sopenharmony_ci      data.location = 0;
262bf215546Sopenharmony_ci
263bf215546Sopenharmony_ci   /* Temporary variables don't serialize var->data. */
264bf215546Sopenharmony_ci   if (data.mode == nir_var_shader_temp)
265bf215546Sopenharmony_ci      flags.u.data_encoding = var_encode_shader_temp;
266bf215546Sopenharmony_ci   else if (data.mode == nir_var_function_temp)
267bf215546Sopenharmony_ci      flags.u.data_encoding = var_encode_function_temp;
268bf215546Sopenharmony_ci   else {
269bf215546Sopenharmony_ci      struct nir_variable_data tmp = data;
270bf215546Sopenharmony_ci
271bf215546Sopenharmony_ci      tmp.location = ctx->last_var_data.location;
272bf215546Sopenharmony_ci      tmp.location_frac = ctx->last_var_data.location_frac;
273bf215546Sopenharmony_ci      tmp.driver_location = ctx->last_var_data.driver_location;
274bf215546Sopenharmony_ci
275bf215546Sopenharmony_ci      /* See if we can encode only the difference in locations from the last
276bf215546Sopenharmony_ci       * variable.
277bf215546Sopenharmony_ci       */
278bf215546Sopenharmony_ci      if (memcmp(&ctx->last_var_data, &tmp, sizeof(tmp)) == 0 &&
279bf215546Sopenharmony_ci          abs((int)data.location -
280bf215546Sopenharmony_ci              (int)ctx->last_var_data.location) < (1 << 12) &&
281bf215546Sopenharmony_ci          abs((int)data.driver_location -
282bf215546Sopenharmony_ci              (int)ctx->last_var_data.driver_location) < (1 << 15))
283bf215546Sopenharmony_ci         flags.u.data_encoding = var_encode_location_diff;
284bf215546Sopenharmony_ci      else
285bf215546Sopenharmony_ci         flags.u.data_encoding = var_encode_full;
286bf215546Sopenharmony_ci   }
287bf215546Sopenharmony_ci
288bf215546Sopenharmony_ci   flags.u.ray_query = var->data.ray_query;
289bf215546Sopenharmony_ci
290bf215546Sopenharmony_ci   blob_write_uint32(ctx->blob, flags.u32);
291bf215546Sopenharmony_ci
292bf215546Sopenharmony_ci   if (!flags.u.type_same_as_last) {
293bf215546Sopenharmony_ci      encode_type_to_blob(ctx->blob, var->type);
294bf215546Sopenharmony_ci      ctx->last_type = var->type;
295bf215546Sopenharmony_ci   }
296bf215546Sopenharmony_ci
297bf215546Sopenharmony_ci   if (var->interface_type && !flags.u.interface_type_same_as_last) {
298bf215546Sopenharmony_ci      encode_type_to_blob(ctx->blob, var->interface_type);
299bf215546Sopenharmony_ci      ctx->last_interface_type = var->interface_type;
300bf215546Sopenharmony_ci   }
301bf215546Sopenharmony_ci
302bf215546Sopenharmony_ci   if (flags.u.has_name)
303bf215546Sopenharmony_ci      blob_write_string(ctx->blob, var->name);
304bf215546Sopenharmony_ci
305bf215546Sopenharmony_ci   if (flags.u.data_encoding == var_encode_full ||
306bf215546Sopenharmony_ci       flags.u.data_encoding == var_encode_location_diff) {
307bf215546Sopenharmony_ci      if (flags.u.data_encoding == var_encode_full) {
308bf215546Sopenharmony_ci         blob_write_bytes(ctx->blob, &data, sizeof(data));
309bf215546Sopenharmony_ci      } else {
310bf215546Sopenharmony_ci         /* Serialize only the difference in locations from the last variable.
311bf215546Sopenharmony_ci          */
312bf215546Sopenharmony_ci         union packed_var_data_diff diff;
313bf215546Sopenharmony_ci
314bf215546Sopenharmony_ci         diff.u.location = data.location - ctx->last_var_data.location;
315bf215546Sopenharmony_ci         diff.u.location_frac = data.location_frac -
316bf215546Sopenharmony_ci                                ctx->last_var_data.location_frac;
317bf215546Sopenharmony_ci         diff.u.driver_location = data.driver_location -
318bf215546Sopenharmony_ci                                  ctx->last_var_data.driver_location;
319bf215546Sopenharmony_ci
320bf215546Sopenharmony_ci         blob_write_uint32(ctx->blob, diff.u32);
321bf215546Sopenharmony_ci      }
322bf215546Sopenharmony_ci
323bf215546Sopenharmony_ci      ctx->last_var_data = data;
324bf215546Sopenharmony_ci   }
325bf215546Sopenharmony_ci
326bf215546Sopenharmony_ci   for (unsigned i = 0; i < var->num_state_slots; i++) {
327bf215546Sopenharmony_ci      blob_write_bytes(ctx->blob, &var->state_slots[i],
328bf215546Sopenharmony_ci                       sizeof(var->state_slots[i]));
329bf215546Sopenharmony_ci   }
330bf215546Sopenharmony_ci   if (var->constant_initializer)
331bf215546Sopenharmony_ci      write_constant(ctx, var->constant_initializer);
332bf215546Sopenharmony_ci   if (var->pointer_initializer)
333bf215546Sopenharmony_ci      write_lookup_object(ctx, var->pointer_initializer);
334bf215546Sopenharmony_ci   if (var->num_members > 0) {
335bf215546Sopenharmony_ci      blob_write_bytes(ctx->blob, (uint8_t *) var->members,
336bf215546Sopenharmony_ci                       var->num_members * sizeof(*var->members));
337bf215546Sopenharmony_ci   }
338bf215546Sopenharmony_ci}
339bf215546Sopenharmony_ci
340bf215546Sopenharmony_cistatic nir_variable *
341bf215546Sopenharmony_ciread_variable(read_ctx *ctx)
342bf215546Sopenharmony_ci{
343bf215546Sopenharmony_ci   nir_variable *var = rzalloc(ctx->nir, nir_variable);
344bf215546Sopenharmony_ci   read_add_object(ctx, var);
345bf215546Sopenharmony_ci
346bf215546Sopenharmony_ci   union packed_var flags;
347bf215546Sopenharmony_ci   flags.u32 = blob_read_uint32(ctx->blob);
348bf215546Sopenharmony_ci
349bf215546Sopenharmony_ci   if (flags.u.type_same_as_last) {
350bf215546Sopenharmony_ci      var->type = ctx->last_type;
351bf215546Sopenharmony_ci   } else {
352bf215546Sopenharmony_ci      var->type = decode_type_from_blob(ctx->blob);
353bf215546Sopenharmony_ci      ctx->last_type = var->type;
354bf215546Sopenharmony_ci   }
355bf215546Sopenharmony_ci
356bf215546Sopenharmony_ci   if (flags.u.has_interface_type) {
357bf215546Sopenharmony_ci      if (flags.u.interface_type_same_as_last) {
358bf215546Sopenharmony_ci         var->interface_type = ctx->last_interface_type;
359bf215546Sopenharmony_ci      } else {
360bf215546Sopenharmony_ci         var->interface_type = decode_type_from_blob(ctx->blob);
361bf215546Sopenharmony_ci         ctx->last_interface_type = var->interface_type;
362bf215546Sopenharmony_ci      }
363bf215546Sopenharmony_ci   }
364bf215546Sopenharmony_ci
365bf215546Sopenharmony_ci   if (flags.u.has_name) {
366bf215546Sopenharmony_ci      const char *name = blob_read_string(ctx->blob);
367bf215546Sopenharmony_ci      var->name = ralloc_strdup(var, name);
368bf215546Sopenharmony_ci   } else {
369bf215546Sopenharmony_ci      var->name = NULL;
370bf215546Sopenharmony_ci   }
371bf215546Sopenharmony_ci
372bf215546Sopenharmony_ci   if (flags.u.data_encoding == var_encode_shader_temp)
373bf215546Sopenharmony_ci      var->data.mode = nir_var_shader_temp;
374bf215546Sopenharmony_ci   else if (flags.u.data_encoding == var_encode_function_temp)
375bf215546Sopenharmony_ci      var->data.mode = nir_var_function_temp;
376bf215546Sopenharmony_ci   else if (flags.u.data_encoding == var_encode_full) {
377bf215546Sopenharmony_ci      blob_copy_bytes(ctx->blob, (uint8_t *) &var->data, sizeof(var->data));
378bf215546Sopenharmony_ci      ctx->last_var_data = var->data;
379bf215546Sopenharmony_ci   } else { /* var_encode_location_diff */
380bf215546Sopenharmony_ci      union packed_var_data_diff diff;
381bf215546Sopenharmony_ci      diff.u32 = blob_read_uint32(ctx->blob);
382bf215546Sopenharmony_ci
383bf215546Sopenharmony_ci      var->data = ctx->last_var_data;
384bf215546Sopenharmony_ci      var->data.location += diff.u.location;
385bf215546Sopenharmony_ci      var->data.location_frac += diff.u.location_frac;
386bf215546Sopenharmony_ci      var->data.driver_location += diff.u.driver_location;
387bf215546Sopenharmony_ci
388bf215546Sopenharmony_ci      ctx->last_var_data = var->data;
389bf215546Sopenharmony_ci   }
390bf215546Sopenharmony_ci
391bf215546Sopenharmony_ci   var->data.ray_query = flags.u.ray_query;
392bf215546Sopenharmony_ci
393bf215546Sopenharmony_ci   var->num_state_slots = flags.u.num_state_slots;
394bf215546Sopenharmony_ci   if (var->num_state_slots != 0) {
395bf215546Sopenharmony_ci      var->state_slots = ralloc_array(var, nir_state_slot,
396bf215546Sopenharmony_ci                                      var->num_state_slots);
397bf215546Sopenharmony_ci      for (unsigned i = 0; i < var->num_state_slots; i++) {
398bf215546Sopenharmony_ci         blob_copy_bytes(ctx->blob, &var->state_slots[i],
399bf215546Sopenharmony_ci                         sizeof(var->state_slots[i]));
400bf215546Sopenharmony_ci      }
401bf215546Sopenharmony_ci   }
402bf215546Sopenharmony_ci   if (flags.u.has_constant_initializer)
403bf215546Sopenharmony_ci      var->constant_initializer = read_constant(ctx, var);
404bf215546Sopenharmony_ci   else
405bf215546Sopenharmony_ci      var->constant_initializer = NULL;
406bf215546Sopenharmony_ci
407bf215546Sopenharmony_ci   if (flags.u.has_pointer_initializer)
408bf215546Sopenharmony_ci      var->pointer_initializer = read_object(ctx);
409bf215546Sopenharmony_ci   else
410bf215546Sopenharmony_ci      var->pointer_initializer = NULL;
411bf215546Sopenharmony_ci
412bf215546Sopenharmony_ci   var->num_members = flags.u.num_members;
413bf215546Sopenharmony_ci   if (var->num_members > 0) {
414bf215546Sopenharmony_ci      var->members = ralloc_array(var, struct nir_variable_data,
415bf215546Sopenharmony_ci                                  var->num_members);
416bf215546Sopenharmony_ci      blob_copy_bytes(ctx->blob, (uint8_t *) var->members,
417bf215546Sopenharmony_ci                      var->num_members * sizeof(*var->members));
418bf215546Sopenharmony_ci   }
419bf215546Sopenharmony_ci
420bf215546Sopenharmony_ci   return var;
421bf215546Sopenharmony_ci}
422bf215546Sopenharmony_ci
423bf215546Sopenharmony_cistatic void
424bf215546Sopenharmony_ciwrite_var_list(write_ctx *ctx, const struct exec_list *src)
425bf215546Sopenharmony_ci{
426bf215546Sopenharmony_ci   blob_write_uint32(ctx->blob, exec_list_length(src));
427bf215546Sopenharmony_ci   foreach_list_typed(nir_variable, var, node, src) {
428bf215546Sopenharmony_ci      write_variable(ctx, var);
429bf215546Sopenharmony_ci   }
430bf215546Sopenharmony_ci}
431bf215546Sopenharmony_ci
432bf215546Sopenharmony_cistatic void
433bf215546Sopenharmony_ciread_var_list(read_ctx *ctx, struct exec_list *dst)
434bf215546Sopenharmony_ci{
435bf215546Sopenharmony_ci   exec_list_make_empty(dst);
436bf215546Sopenharmony_ci   unsigned num_vars = blob_read_uint32(ctx->blob);
437bf215546Sopenharmony_ci   for (unsigned i = 0; i < num_vars; i++) {
438bf215546Sopenharmony_ci      nir_variable *var = read_variable(ctx);
439bf215546Sopenharmony_ci      exec_list_push_tail(dst, &var->node);
440bf215546Sopenharmony_ci   }
441bf215546Sopenharmony_ci}
442bf215546Sopenharmony_ci
443bf215546Sopenharmony_cistatic void
444bf215546Sopenharmony_ciwrite_register(write_ctx *ctx, const nir_register *reg)
445bf215546Sopenharmony_ci{
446bf215546Sopenharmony_ci   write_add_object(ctx, reg);
447bf215546Sopenharmony_ci   blob_write_uint32(ctx->blob, reg->num_components);
448bf215546Sopenharmony_ci   blob_write_uint32(ctx->blob, reg->bit_size);
449bf215546Sopenharmony_ci   blob_write_uint32(ctx->blob, reg->num_array_elems);
450bf215546Sopenharmony_ci   blob_write_uint32(ctx->blob, reg->index);
451bf215546Sopenharmony_ci   blob_write_uint8(ctx->blob, reg->divergent);
452bf215546Sopenharmony_ci}
453bf215546Sopenharmony_ci
454bf215546Sopenharmony_cistatic nir_register *
455bf215546Sopenharmony_ciread_register(read_ctx *ctx)
456bf215546Sopenharmony_ci{
457bf215546Sopenharmony_ci   nir_register *reg = ralloc(ctx->nir, nir_register);
458bf215546Sopenharmony_ci   read_add_object(ctx, reg);
459bf215546Sopenharmony_ci   reg->num_components = blob_read_uint32(ctx->blob);
460bf215546Sopenharmony_ci   reg->bit_size = blob_read_uint32(ctx->blob);
461bf215546Sopenharmony_ci   reg->num_array_elems = blob_read_uint32(ctx->blob);
462bf215546Sopenharmony_ci   reg->index = blob_read_uint32(ctx->blob);
463bf215546Sopenharmony_ci   reg->divergent = blob_read_uint8(ctx->blob);
464bf215546Sopenharmony_ci
465bf215546Sopenharmony_ci   list_inithead(&reg->uses);
466bf215546Sopenharmony_ci   list_inithead(&reg->defs);
467bf215546Sopenharmony_ci   list_inithead(&reg->if_uses);
468bf215546Sopenharmony_ci
469bf215546Sopenharmony_ci   return reg;
470bf215546Sopenharmony_ci}
471bf215546Sopenharmony_ci
472bf215546Sopenharmony_cistatic void
473bf215546Sopenharmony_ciwrite_reg_list(write_ctx *ctx, const struct exec_list *src)
474bf215546Sopenharmony_ci{
475bf215546Sopenharmony_ci   blob_write_uint32(ctx->blob, exec_list_length(src));
476bf215546Sopenharmony_ci   foreach_list_typed(nir_register, reg, node, src)
477bf215546Sopenharmony_ci      write_register(ctx, reg);
478bf215546Sopenharmony_ci}
479bf215546Sopenharmony_ci
480bf215546Sopenharmony_cistatic void
481bf215546Sopenharmony_ciread_reg_list(read_ctx *ctx, struct exec_list *dst)
482bf215546Sopenharmony_ci{
483bf215546Sopenharmony_ci   exec_list_make_empty(dst);
484bf215546Sopenharmony_ci   unsigned num_regs = blob_read_uint32(ctx->blob);
485bf215546Sopenharmony_ci   for (unsigned i = 0; i < num_regs; i++) {
486bf215546Sopenharmony_ci      nir_register *reg = read_register(ctx);
487bf215546Sopenharmony_ci      exec_list_push_tail(dst, &reg->node);
488bf215546Sopenharmony_ci   }
489bf215546Sopenharmony_ci}
490bf215546Sopenharmony_ci
491bf215546Sopenharmony_ciunion packed_src {
492bf215546Sopenharmony_ci   uint32_t u32;
493bf215546Sopenharmony_ci   struct {
494bf215546Sopenharmony_ci      unsigned is_ssa:1;   /* <-- Header */
495bf215546Sopenharmony_ci      unsigned is_indirect:1;
496bf215546Sopenharmony_ci      unsigned object_idx:20;
497bf215546Sopenharmony_ci      unsigned _footer:10; /* <-- Footer */
498bf215546Sopenharmony_ci   } any;
499bf215546Sopenharmony_ci   struct {
500bf215546Sopenharmony_ci      unsigned _header:22; /* <-- Header */
501bf215546Sopenharmony_ci      unsigned negate:1;   /* <-- Footer */
502bf215546Sopenharmony_ci      unsigned abs:1;
503bf215546Sopenharmony_ci      unsigned swizzle_x:2;
504bf215546Sopenharmony_ci      unsigned swizzle_y:2;
505bf215546Sopenharmony_ci      unsigned swizzle_z:2;
506bf215546Sopenharmony_ci      unsigned swizzle_w:2;
507bf215546Sopenharmony_ci   } alu;
508bf215546Sopenharmony_ci   struct {
509bf215546Sopenharmony_ci      unsigned _header:22; /* <-- Header */
510bf215546Sopenharmony_ci      unsigned src_type:5; /* <-- Footer */
511bf215546Sopenharmony_ci      unsigned _pad:5;
512bf215546Sopenharmony_ci   } tex;
513bf215546Sopenharmony_ci};
514bf215546Sopenharmony_ci
515bf215546Sopenharmony_cistatic void
516bf215546Sopenharmony_ciwrite_src_full(write_ctx *ctx, const nir_src *src, union packed_src header)
517bf215546Sopenharmony_ci{
518bf215546Sopenharmony_ci   /* Since sources are very frequent, we try to save some space when storing
519bf215546Sopenharmony_ci    * them. In particular, we store whether the source is a register and
520bf215546Sopenharmony_ci    * whether the register has an indirect index in the low two bits. We can
521bf215546Sopenharmony_ci    * assume that the high two bits of the index are zero, since otherwise our
522bf215546Sopenharmony_ci    * address space would've been exhausted allocating the remap table!
523bf215546Sopenharmony_ci    */
524bf215546Sopenharmony_ci   header.any.is_ssa = src->is_ssa;
525bf215546Sopenharmony_ci   if (src->is_ssa) {
526bf215546Sopenharmony_ci      header.any.object_idx = write_lookup_object(ctx, src->ssa);
527bf215546Sopenharmony_ci      blob_write_uint32(ctx->blob, header.u32);
528bf215546Sopenharmony_ci   } else {
529bf215546Sopenharmony_ci      header.any.object_idx = write_lookup_object(ctx, src->reg.reg);
530bf215546Sopenharmony_ci      header.any.is_indirect = !!src->reg.indirect;
531bf215546Sopenharmony_ci      blob_write_uint32(ctx->blob, header.u32);
532bf215546Sopenharmony_ci      blob_write_uint32(ctx->blob, src->reg.base_offset);
533bf215546Sopenharmony_ci      if (src->reg.indirect) {
534bf215546Sopenharmony_ci         union packed_src header = {0};
535bf215546Sopenharmony_ci         write_src_full(ctx, src->reg.indirect, header);
536bf215546Sopenharmony_ci      }
537bf215546Sopenharmony_ci   }
538bf215546Sopenharmony_ci}
539bf215546Sopenharmony_ci
540bf215546Sopenharmony_cistatic void
541bf215546Sopenharmony_ciwrite_src(write_ctx *ctx, const nir_src *src)
542bf215546Sopenharmony_ci{
543bf215546Sopenharmony_ci   union packed_src header = {0};
544bf215546Sopenharmony_ci   write_src_full(ctx, src, header);
545bf215546Sopenharmony_ci}
546bf215546Sopenharmony_ci
547bf215546Sopenharmony_cistatic union packed_src
548bf215546Sopenharmony_ciread_src(read_ctx *ctx, nir_src *src, void *mem_ctx)
549bf215546Sopenharmony_ci{
550bf215546Sopenharmony_ci   STATIC_ASSERT(sizeof(union packed_src) == 4);
551bf215546Sopenharmony_ci   union packed_src header;
552bf215546Sopenharmony_ci   header.u32 = blob_read_uint32(ctx->blob);
553bf215546Sopenharmony_ci
554bf215546Sopenharmony_ci   src->is_ssa = header.any.is_ssa;
555bf215546Sopenharmony_ci   if (src->is_ssa) {
556bf215546Sopenharmony_ci      src->ssa = read_lookup_object(ctx, header.any.object_idx);
557bf215546Sopenharmony_ci   } else {
558bf215546Sopenharmony_ci      src->reg.reg = read_lookup_object(ctx, header.any.object_idx);
559bf215546Sopenharmony_ci      src->reg.base_offset = blob_read_uint32(ctx->blob);
560bf215546Sopenharmony_ci      if (header.any.is_indirect) {
561bf215546Sopenharmony_ci         src->reg.indirect = malloc(sizeof(nir_src));
562bf215546Sopenharmony_ci         read_src(ctx, src->reg.indirect, mem_ctx);
563bf215546Sopenharmony_ci      } else {
564bf215546Sopenharmony_ci         src->reg.indirect = NULL;
565bf215546Sopenharmony_ci      }
566bf215546Sopenharmony_ci   }
567bf215546Sopenharmony_ci   return header;
568bf215546Sopenharmony_ci}
569bf215546Sopenharmony_ci
570bf215546Sopenharmony_ciunion packed_dest {
571bf215546Sopenharmony_ci   uint8_t u8;
572bf215546Sopenharmony_ci   struct {
573bf215546Sopenharmony_ci      uint8_t is_ssa:1;
574bf215546Sopenharmony_ci      uint8_t num_components:3;
575bf215546Sopenharmony_ci      uint8_t bit_size:3;
576bf215546Sopenharmony_ci      uint8_t divergent:1;
577bf215546Sopenharmony_ci   } ssa;
578bf215546Sopenharmony_ci   struct {
579bf215546Sopenharmony_ci      uint8_t is_ssa:1;
580bf215546Sopenharmony_ci      uint8_t is_indirect:1;
581bf215546Sopenharmony_ci      uint8_t _pad:6;
582bf215546Sopenharmony_ci   } reg;
583bf215546Sopenharmony_ci};
584bf215546Sopenharmony_ci
585bf215546Sopenharmony_cienum intrinsic_const_indices_encoding {
586bf215546Sopenharmony_ci   /* Use packed_const_indices to store tightly packed indices.
587bf215546Sopenharmony_ci    *
588bf215546Sopenharmony_ci    * The common case for load_ubo is 0, 0, 0, which is trivially represented.
589bf215546Sopenharmony_ci    * The common cases for load_interpolated_input also fit here, e.g.: 7, 3
590bf215546Sopenharmony_ci    */
591bf215546Sopenharmony_ci   const_indices_all_combined,
592bf215546Sopenharmony_ci
593bf215546Sopenharmony_ci   const_indices_8bit,  /* 8 bits per element */
594bf215546Sopenharmony_ci   const_indices_16bit, /* 16 bits per element */
595bf215546Sopenharmony_ci   const_indices_32bit, /* 32 bits per element */
596bf215546Sopenharmony_ci};
597bf215546Sopenharmony_ci
598bf215546Sopenharmony_cienum load_const_packing {
599bf215546Sopenharmony_ci   /* Constants are not packed and are stored in following dwords. */
600bf215546Sopenharmony_ci   load_const_full,
601bf215546Sopenharmony_ci
602bf215546Sopenharmony_ci   /* packed_value contains high 19 bits, low bits are 0,
603bf215546Sopenharmony_ci    * good for floating-point decimals
604bf215546Sopenharmony_ci    */
605bf215546Sopenharmony_ci   load_const_scalar_hi_19bits,
606bf215546Sopenharmony_ci
607bf215546Sopenharmony_ci   /* packed_value contains low 19 bits, high bits are sign-extended */
608bf215546Sopenharmony_ci   load_const_scalar_lo_19bits_sext,
609bf215546Sopenharmony_ci};
610bf215546Sopenharmony_ci
611bf215546Sopenharmony_ciunion packed_instr {
612bf215546Sopenharmony_ci   uint32_t u32;
613bf215546Sopenharmony_ci   struct {
614bf215546Sopenharmony_ci      unsigned instr_type:4; /* always present */
615bf215546Sopenharmony_ci      unsigned _pad:20;
616bf215546Sopenharmony_ci      unsigned dest:8;       /* always last */
617bf215546Sopenharmony_ci   } any;
618bf215546Sopenharmony_ci   struct {
619bf215546Sopenharmony_ci      unsigned instr_type:4;
620bf215546Sopenharmony_ci      unsigned exact:1;
621bf215546Sopenharmony_ci      unsigned no_signed_wrap:1;
622bf215546Sopenharmony_ci      unsigned no_unsigned_wrap:1;
623bf215546Sopenharmony_ci      unsigned saturate:1;
624bf215546Sopenharmony_ci      /* Reg: writemask; SSA: swizzles for 2 srcs */
625bf215546Sopenharmony_ci      unsigned writemask_or_two_swizzles:4;
626bf215546Sopenharmony_ci      unsigned op:9;
627bf215546Sopenharmony_ci      unsigned packed_src_ssa_16bit:1;
628bf215546Sopenharmony_ci      /* Scalarized ALUs always have the same header. */
629bf215546Sopenharmony_ci      unsigned num_followup_alu_sharing_header:2;
630bf215546Sopenharmony_ci      unsigned dest:8;
631bf215546Sopenharmony_ci   } alu;
632bf215546Sopenharmony_ci   struct {
633bf215546Sopenharmony_ci      unsigned instr_type:4;
634bf215546Sopenharmony_ci      unsigned deref_type:3;
635bf215546Sopenharmony_ci      unsigned cast_type_same_as_last:1;
636bf215546Sopenharmony_ci      unsigned modes:5; /* See (de|en)code_deref_modes() */
637bf215546Sopenharmony_ci      unsigned _pad:9;
638bf215546Sopenharmony_ci      unsigned in_bounds:1;
639bf215546Sopenharmony_ci      unsigned packed_src_ssa_16bit:1; /* deref_var redefines this */
640bf215546Sopenharmony_ci      unsigned dest:8;
641bf215546Sopenharmony_ci   } deref;
642bf215546Sopenharmony_ci   struct {
643bf215546Sopenharmony_ci      unsigned instr_type:4;
644bf215546Sopenharmony_ci      unsigned deref_type:3;
645bf215546Sopenharmony_ci      unsigned _pad:1;
646bf215546Sopenharmony_ci      unsigned object_idx:16; /* if 0, the object ID is a separate uint32 */
647bf215546Sopenharmony_ci      unsigned dest:8;
648bf215546Sopenharmony_ci   } deref_var;
649bf215546Sopenharmony_ci   struct {
650bf215546Sopenharmony_ci      unsigned instr_type:4;
651bf215546Sopenharmony_ci      unsigned intrinsic:10;
652bf215546Sopenharmony_ci      unsigned const_indices_encoding:2;
653bf215546Sopenharmony_ci      unsigned packed_const_indices:8;
654bf215546Sopenharmony_ci      unsigned dest:8;
655bf215546Sopenharmony_ci   } intrinsic;
656bf215546Sopenharmony_ci   struct {
657bf215546Sopenharmony_ci      unsigned instr_type:4;
658bf215546Sopenharmony_ci      unsigned last_component:4;
659bf215546Sopenharmony_ci      unsigned bit_size:3;
660bf215546Sopenharmony_ci      unsigned packing:2; /* enum load_const_packing */
661bf215546Sopenharmony_ci      unsigned packed_value:19; /* meaning determined by packing */
662bf215546Sopenharmony_ci   } load_const;
663bf215546Sopenharmony_ci   struct {
664bf215546Sopenharmony_ci      unsigned instr_type:4;
665bf215546Sopenharmony_ci      unsigned last_component:4;
666bf215546Sopenharmony_ci      unsigned bit_size:3;
667bf215546Sopenharmony_ci      unsigned _pad:21;
668bf215546Sopenharmony_ci   } undef;
669bf215546Sopenharmony_ci   struct {
670bf215546Sopenharmony_ci      unsigned instr_type:4;
671bf215546Sopenharmony_ci      unsigned num_srcs:4;
672bf215546Sopenharmony_ci      unsigned op:5;
673bf215546Sopenharmony_ci      unsigned _pad:11;
674bf215546Sopenharmony_ci      unsigned dest:8;
675bf215546Sopenharmony_ci   } tex;
676bf215546Sopenharmony_ci   struct {
677bf215546Sopenharmony_ci      unsigned instr_type:4;
678bf215546Sopenharmony_ci      unsigned num_srcs:20;
679bf215546Sopenharmony_ci      unsigned dest:8;
680bf215546Sopenharmony_ci   } phi;
681bf215546Sopenharmony_ci   struct {
682bf215546Sopenharmony_ci      unsigned instr_type:4;
683bf215546Sopenharmony_ci      unsigned type:2;
684bf215546Sopenharmony_ci      unsigned _pad:26;
685bf215546Sopenharmony_ci   } jump;
686bf215546Sopenharmony_ci};
687bf215546Sopenharmony_ci
688bf215546Sopenharmony_ci/* Write "lo24" as low 24 bits in the first uint32. */
689bf215546Sopenharmony_cistatic void
690bf215546Sopenharmony_ciwrite_dest(write_ctx *ctx, const nir_dest *dst, union packed_instr header,
691bf215546Sopenharmony_ci           nir_instr_type instr_type)
692bf215546Sopenharmony_ci{
693bf215546Sopenharmony_ci   STATIC_ASSERT(sizeof(union packed_dest) == 1);
694bf215546Sopenharmony_ci   union packed_dest dest;
695bf215546Sopenharmony_ci   dest.u8 = 0;
696bf215546Sopenharmony_ci
697bf215546Sopenharmony_ci   dest.ssa.is_ssa = dst->is_ssa;
698bf215546Sopenharmony_ci   if (dst->is_ssa) {
699bf215546Sopenharmony_ci      dest.ssa.num_components =
700bf215546Sopenharmony_ci         encode_num_components_in_3bits(dst->ssa.num_components);
701bf215546Sopenharmony_ci      dest.ssa.bit_size = encode_bit_size_3bits(dst->ssa.bit_size);
702bf215546Sopenharmony_ci      dest.ssa.divergent = dst->ssa.divergent;
703bf215546Sopenharmony_ci   } else {
704bf215546Sopenharmony_ci      dest.reg.is_indirect = !!(dst->reg.indirect);
705bf215546Sopenharmony_ci   }
706bf215546Sopenharmony_ci   header.any.dest = dest.u8;
707bf215546Sopenharmony_ci
708bf215546Sopenharmony_ci   /* Check if the current ALU instruction has the same header as the previous
709bf215546Sopenharmony_ci    * instruction that is also ALU. If it is, we don't have to write
710bf215546Sopenharmony_ci    * the current header. This is a typical occurence after scalarization.
711bf215546Sopenharmony_ci    */
712bf215546Sopenharmony_ci   if (instr_type == nir_instr_type_alu) {
713bf215546Sopenharmony_ci      bool equal_header = false;
714bf215546Sopenharmony_ci
715bf215546Sopenharmony_ci      if (ctx->last_instr_type == nir_instr_type_alu) {
716bf215546Sopenharmony_ci         assert(ctx->last_alu_header_offset);
717bf215546Sopenharmony_ci         union packed_instr last_header;
718bf215546Sopenharmony_ci         last_header.u32 = ctx->last_alu_header;
719bf215546Sopenharmony_ci
720bf215546Sopenharmony_ci         /* Clear the field that counts ALUs with equal headers. */
721bf215546Sopenharmony_ci         union packed_instr clean_header;
722bf215546Sopenharmony_ci         clean_header.u32 = last_header.u32;
723bf215546Sopenharmony_ci         clean_header.alu.num_followup_alu_sharing_header = 0;
724bf215546Sopenharmony_ci
725bf215546Sopenharmony_ci         /* There can be at most 4 consecutive ALU instructions
726bf215546Sopenharmony_ci          * sharing the same header.
727bf215546Sopenharmony_ci          */
728bf215546Sopenharmony_ci         if (last_header.alu.num_followup_alu_sharing_header < 3 &&
729bf215546Sopenharmony_ci             header.u32 == clean_header.u32) {
730bf215546Sopenharmony_ci            last_header.alu.num_followup_alu_sharing_header++;
731bf215546Sopenharmony_ci            blob_overwrite_uint32(ctx->blob, ctx->last_alu_header_offset,
732bf215546Sopenharmony_ci                                  last_header.u32);
733bf215546Sopenharmony_ci            ctx->last_alu_header = last_header.u32;
734bf215546Sopenharmony_ci            equal_header = true;
735bf215546Sopenharmony_ci         }
736bf215546Sopenharmony_ci      }
737bf215546Sopenharmony_ci
738bf215546Sopenharmony_ci      if (!equal_header) {
739bf215546Sopenharmony_ci         ctx->last_alu_header_offset = blob_reserve_uint32(ctx->blob);
740bf215546Sopenharmony_ci         blob_overwrite_uint32(ctx->blob, ctx->last_alu_header_offset, header.u32);
741bf215546Sopenharmony_ci         ctx->last_alu_header = header.u32;
742bf215546Sopenharmony_ci      }
743bf215546Sopenharmony_ci   } else {
744bf215546Sopenharmony_ci      blob_write_uint32(ctx->blob, header.u32);
745bf215546Sopenharmony_ci   }
746bf215546Sopenharmony_ci
747bf215546Sopenharmony_ci   if (dest.ssa.is_ssa &&
748bf215546Sopenharmony_ci       dest.ssa.num_components == NUM_COMPONENTS_IS_SEPARATE_7)
749bf215546Sopenharmony_ci      blob_write_uint32(ctx->blob, dst->ssa.num_components);
750bf215546Sopenharmony_ci
751bf215546Sopenharmony_ci   if (dst->is_ssa) {
752bf215546Sopenharmony_ci      write_add_object(ctx, &dst->ssa);
753bf215546Sopenharmony_ci   } else {
754bf215546Sopenharmony_ci      blob_write_uint32(ctx->blob, write_lookup_object(ctx, dst->reg.reg));
755bf215546Sopenharmony_ci      blob_write_uint32(ctx->blob, dst->reg.base_offset);
756bf215546Sopenharmony_ci      if (dst->reg.indirect)
757bf215546Sopenharmony_ci         write_src(ctx, dst->reg.indirect);
758bf215546Sopenharmony_ci   }
759bf215546Sopenharmony_ci}
760bf215546Sopenharmony_ci
761bf215546Sopenharmony_cistatic void
762bf215546Sopenharmony_ciread_dest(read_ctx *ctx, nir_dest *dst, nir_instr *instr,
763bf215546Sopenharmony_ci          union packed_instr header)
764bf215546Sopenharmony_ci{
765bf215546Sopenharmony_ci   union packed_dest dest;
766bf215546Sopenharmony_ci   dest.u8 = header.any.dest;
767bf215546Sopenharmony_ci
768bf215546Sopenharmony_ci   if (dest.ssa.is_ssa) {
769bf215546Sopenharmony_ci      unsigned bit_size = decode_bit_size_3bits(dest.ssa.bit_size);
770bf215546Sopenharmony_ci      unsigned num_components;
771bf215546Sopenharmony_ci      if (dest.ssa.num_components == NUM_COMPONENTS_IS_SEPARATE_7)
772bf215546Sopenharmony_ci         num_components = blob_read_uint32(ctx->blob);
773bf215546Sopenharmony_ci      else
774bf215546Sopenharmony_ci         num_components = decode_num_components_in_3bits(dest.ssa.num_components);
775bf215546Sopenharmony_ci      nir_ssa_dest_init(instr, dst, num_components, bit_size, NULL);
776bf215546Sopenharmony_ci      dst->ssa.divergent = dest.ssa.divergent;
777bf215546Sopenharmony_ci      read_add_object(ctx, &dst->ssa);
778bf215546Sopenharmony_ci   } else {
779bf215546Sopenharmony_ci      dst->reg.reg = read_object(ctx);
780bf215546Sopenharmony_ci      dst->reg.base_offset = blob_read_uint32(ctx->blob);
781bf215546Sopenharmony_ci      if (dest.reg.is_indirect) {
782bf215546Sopenharmony_ci         dst->reg.indirect = malloc(sizeof(nir_src));
783bf215546Sopenharmony_ci         read_src(ctx, dst->reg.indirect, instr);
784bf215546Sopenharmony_ci      }
785bf215546Sopenharmony_ci   }
786bf215546Sopenharmony_ci}
787bf215546Sopenharmony_ci
788bf215546Sopenharmony_cistatic bool
789bf215546Sopenharmony_ciare_object_ids_16bit(write_ctx *ctx)
790bf215546Sopenharmony_ci{
791bf215546Sopenharmony_ci   /* Check the highest object ID, because they are monotonic. */
792bf215546Sopenharmony_ci   return ctx->next_idx < (1 << 16);
793bf215546Sopenharmony_ci}
794bf215546Sopenharmony_ci
795bf215546Sopenharmony_cistatic bool
796bf215546Sopenharmony_ciis_alu_src_ssa_16bit(write_ctx *ctx, const nir_alu_instr *alu)
797bf215546Sopenharmony_ci{
798bf215546Sopenharmony_ci   unsigned num_srcs = nir_op_infos[alu->op].num_inputs;
799bf215546Sopenharmony_ci
800bf215546Sopenharmony_ci   for (unsigned i = 0; i < num_srcs; i++) {
801bf215546Sopenharmony_ci      if (!alu->src[i].src.is_ssa || alu->src[i].abs || alu->src[i].negate)
802bf215546Sopenharmony_ci         return false;
803bf215546Sopenharmony_ci
804bf215546Sopenharmony_ci      unsigned src_components = nir_ssa_alu_instr_src_components(alu, i);
805bf215546Sopenharmony_ci
806bf215546Sopenharmony_ci      for (unsigned chan = 0; chan < src_components; chan++) {
807bf215546Sopenharmony_ci         /* The swizzles for src0.x and src1.x are stored
808bf215546Sopenharmony_ci          * in writemask_or_two_swizzles for SSA ALUs.
809bf215546Sopenharmony_ci          */
810bf215546Sopenharmony_ci         if (alu->dest.dest.is_ssa && i < 2 && chan == 0 &&
811bf215546Sopenharmony_ci             alu->src[i].swizzle[chan] < 4)
812bf215546Sopenharmony_ci            continue;
813bf215546Sopenharmony_ci
814bf215546Sopenharmony_ci         if (alu->src[i].swizzle[chan] != chan)
815bf215546Sopenharmony_ci            return false;
816bf215546Sopenharmony_ci      }
817bf215546Sopenharmony_ci   }
818bf215546Sopenharmony_ci
819bf215546Sopenharmony_ci   return are_object_ids_16bit(ctx);
820bf215546Sopenharmony_ci}
821bf215546Sopenharmony_ci
822bf215546Sopenharmony_cistatic void
823bf215546Sopenharmony_ciwrite_alu(write_ctx *ctx, const nir_alu_instr *alu)
824bf215546Sopenharmony_ci{
825bf215546Sopenharmony_ci   unsigned num_srcs = nir_op_infos[alu->op].num_inputs;
826bf215546Sopenharmony_ci   unsigned dst_components = nir_dest_num_components(alu->dest.dest);
827bf215546Sopenharmony_ci
828bf215546Sopenharmony_ci   /* 9 bits for nir_op */
829bf215546Sopenharmony_ci   STATIC_ASSERT(nir_num_opcodes <= 512);
830bf215546Sopenharmony_ci   union packed_instr header;
831bf215546Sopenharmony_ci   header.u32 = 0;
832bf215546Sopenharmony_ci
833bf215546Sopenharmony_ci   header.alu.instr_type = alu->instr.type;
834bf215546Sopenharmony_ci   header.alu.exact = alu->exact;
835bf215546Sopenharmony_ci   header.alu.no_signed_wrap = alu->no_signed_wrap;
836bf215546Sopenharmony_ci   header.alu.no_unsigned_wrap = alu->no_unsigned_wrap;
837bf215546Sopenharmony_ci   header.alu.saturate = alu->dest.saturate;
838bf215546Sopenharmony_ci   header.alu.op = alu->op;
839bf215546Sopenharmony_ci   header.alu.packed_src_ssa_16bit = is_alu_src_ssa_16bit(ctx, alu);
840bf215546Sopenharmony_ci
841bf215546Sopenharmony_ci   if (header.alu.packed_src_ssa_16bit &&
842bf215546Sopenharmony_ci       alu->dest.dest.is_ssa) {
843bf215546Sopenharmony_ci      /* For packed srcs of SSA ALUs, this field stores the swizzles. */
844bf215546Sopenharmony_ci      header.alu.writemask_or_two_swizzles = alu->src[0].swizzle[0];
845bf215546Sopenharmony_ci      if (num_srcs > 1)
846bf215546Sopenharmony_ci         header.alu.writemask_or_two_swizzles |= alu->src[1].swizzle[0] << 2;
847bf215546Sopenharmony_ci   } else if (!alu->dest.dest.is_ssa && dst_components <= 4) {
848bf215546Sopenharmony_ci      /* For vec4 registers, this field is a writemask. */
849bf215546Sopenharmony_ci      header.alu.writemask_or_two_swizzles = alu->dest.write_mask;
850bf215546Sopenharmony_ci   }
851bf215546Sopenharmony_ci
852bf215546Sopenharmony_ci   write_dest(ctx, &alu->dest.dest, header, alu->instr.type);
853bf215546Sopenharmony_ci
854bf215546Sopenharmony_ci   if (!alu->dest.dest.is_ssa && dst_components > 4)
855bf215546Sopenharmony_ci      blob_write_uint32(ctx->blob, alu->dest.write_mask);
856bf215546Sopenharmony_ci
857bf215546Sopenharmony_ci   if (header.alu.packed_src_ssa_16bit) {
858bf215546Sopenharmony_ci      for (unsigned i = 0; i < num_srcs; i++) {
859bf215546Sopenharmony_ci         assert(alu->src[i].src.is_ssa);
860bf215546Sopenharmony_ci         unsigned idx = write_lookup_object(ctx, alu->src[i].src.ssa);
861bf215546Sopenharmony_ci         assert(idx < (1 << 16));
862bf215546Sopenharmony_ci         blob_write_uint16(ctx->blob, idx);
863bf215546Sopenharmony_ci      }
864bf215546Sopenharmony_ci   } else {
865bf215546Sopenharmony_ci      for (unsigned i = 0; i < num_srcs; i++) {
866bf215546Sopenharmony_ci         unsigned src_channels = nir_ssa_alu_instr_src_components(alu, i);
867bf215546Sopenharmony_ci         unsigned src_components = nir_src_num_components(alu->src[i].src);
868bf215546Sopenharmony_ci         union packed_src src;
869bf215546Sopenharmony_ci         bool packed = src_components <= 4 && src_channels <= 4;
870bf215546Sopenharmony_ci         src.u32 = 0;
871bf215546Sopenharmony_ci
872bf215546Sopenharmony_ci         src.alu.negate = alu->src[i].negate;
873bf215546Sopenharmony_ci         src.alu.abs = alu->src[i].abs;
874bf215546Sopenharmony_ci
875bf215546Sopenharmony_ci         if (packed) {
876bf215546Sopenharmony_ci            src.alu.swizzle_x = alu->src[i].swizzle[0];
877bf215546Sopenharmony_ci            src.alu.swizzle_y = alu->src[i].swizzle[1];
878bf215546Sopenharmony_ci            src.alu.swizzle_z = alu->src[i].swizzle[2];
879bf215546Sopenharmony_ci            src.alu.swizzle_w = alu->src[i].swizzle[3];
880bf215546Sopenharmony_ci         }
881bf215546Sopenharmony_ci
882bf215546Sopenharmony_ci         write_src_full(ctx, &alu->src[i].src, src);
883bf215546Sopenharmony_ci
884bf215546Sopenharmony_ci         /* Store swizzles for vec8 and vec16. */
885bf215546Sopenharmony_ci         if (!packed) {
886bf215546Sopenharmony_ci            for (unsigned o = 0; o < src_channels; o += 8) {
887bf215546Sopenharmony_ci               unsigned value = 0;
888bf215546Sopenharmony_ci
889bf215546Sopenharmony_ci               for (unsigned j = 0; j < 8 && o + j < src_channels; j++) {
890bf215546Sopenharmony_ci                  value |= (uint32_t)alu->src[i].swizzle[o + j] <<
891bf215546Sopenharmony_ci                           (4 * j); /* 4 bits per swizzle */
892bf215546Sopenharmony_ci               }
893bf215546Sopenharmony_ci
894bf215546Sopenharmony_ci               blob_write_uint32(ctx->blob, value);
895bf215546Sopenharmony_ci            }
896bf215546Sopenharmony_ci         }
897bf215546Sopenharmony_ci      }
898bf215546Sopenharmony_ci   }
899bf215546Sopenharmony_ci}
900bf215546Sopenharmony_ci
901bf215546Sopenharmony_cistatic nir_alu_instr *
902bf215546Sopenharmony_ciread_alu(read_ctx *ctx, union packed_instr header)
903bf215546Sopenharmony_ci{
904bf215546Sopenharmony_ci   unsigned num_srcs = nir_op_infos[header.alu.op].num_inputs;
905bf215546Sopenharmony_ci   nir_alu_instr *alu = nir_alu_instr_create(ctx->nir, header.alu.op);
906bf215546Sopenharmony_ci
907bf215546Sopenharmony_ci   alu->exact = header.alu.exact;
908bf215546Sopenharmony_ci   alu->no_signed_wrap = header.alu.no_signed_wrap;
909bf215546Sopenharmony_ci   alu->no_unsigned_wrap = header.alu.no_unsigned_wrap;
910bf215546Sopenharmony_ci   alu->dest.saturate = header.alu.saturate;
911bf215546Sopenharmony_ci
912bf215546Sopenharmony_ci   read_dest(ctx, &alu->dest.dest, &alu->instr, header);
913bf215546Sopenharmony_ci
914bf215546Sopenharmony_ci   unsigned dst_components = nir_dest_num_components(alu->dest.dest);
915bf215546Sopenharmony_ci
916bf215546Sopenharmony_ci   if (alu->dest.dest.is_ssa) {
917bf215546Sopenharmony_ci      alu->dest.write_mask = u_bit_consecutive(0, dst_components);
918bf215546Sopenharmony_ci   } else if (dst_components <= 4) {
919bf215546Sopenharmony_ci      alu->dest.write_mask = header.alu.writemask_or_two_swizzles;
920bf215546Sopenharmony_ci   } else {
921bf215546Sopenharmony_ci      alu->dest.write_mask = blob_read_uint32(ctx->blob);
922bf215546Sopenharmony_ci   }
923bf215546Sopenharmony_ci
924bf215546Sopenharmony_ci   if (header.alu.packed_src_ssa_16bit) {
925bf215546Sopenharmony_ci      for (unsigned i = 0; i < num_srcs; i++) {
926bf215546Sopenharmony_ci         nir_alu_src *src = &alu->src[i];
927bf215546Sopenharmony_ci         src->src.is_ssa = true;
928bf215546Sopenharmony_ci         src->src.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
929bf215546Sopenharmony_ci
930bf215546Sopenharmony_ci         memset(&src->swizzle, 0, sizeof(src->swizzle));
931bf215546Sopenharmony_ci
932bf215546Sopenharmony_ci         unsigned src_components = nir_ssa_alu_instr_src_components(alu, i);
933bf215546Sopenharmony_ci
934bf215546Sopenharmony_ci         for (unsigned chan = 0; chan < src_components; chan++)
935bf215546Sopenharmony_ci            src->swizzle[chan] = chan;
936bf215546Sopenharmony_ci      }
937bf215546Sopenharmony_ci   } else {
938bf215546Sopenharmony_ci      for (unsigned i = 0; i < num_srcs; i++) {
939bf215546Sopenharmony_ci         union packed_src src = read_src(ctx, &alu->src[i].src, &alu->instr);
940bf215546Sopenharmony_ci         unsigned src_channels = nir_ssa_alu_instr_src_components(alu, i);
941bf215546Sopenharmony_ci         unsigned src_components = nir_src_num_components(alu->src[i].src);
942bf215546Sopenharmony_ci         bool packed = src_components <= 4 && src_channels <= 4;
943bf215546Sopenharmony_ci
944bf215546Sopenharmony_ci         alu->src[i].negate = src.alu.negate;
945bf215546Sopenharmony_ci         alu->src[i].abs = src.alu.abs;
946bf215546Sopenharmony_ci
947bf215546Sopenharmony_ci         memset(&alu->src[i].swizzle, 0, sizeof(alu->src[i].swizzle));
948bf215546Sopenharmony_ci
949bf215546Sopenharmony_ci         if (packed) {
950bf215546Sopenharmony_ci            alu->src[i].swizzle[0] = src.alu.swizzle_x;
951bf215546Sopenharmony_ci            alu->src[i].swizzle[1] = src.alu.swizzle_y;
952bf215546Sopenharmony_ci            alu->src[i].swizzle[2] = src.alu.swizzle_z;
953bf215546Sopenharmony_ci            alu->src[i].swizzle[3] = src.alu.swizzle_w;
954bf215546Sopenharmony_ci         } else {
955bf215546Sopenharmony_ci            /* Load swizzles for vec8 and vec16. */
956bf215546Sopenharmony_ci            for (unsigned o = 0; o < src_channels; o += 8) {
957bf215546Sopenharmony_ci               unsigned value = blob_read_uint32(ctx->blob);
958bf215546Sopenharmony_ci
959bf215546Sopenharmony_ci               for (unsigned j = 0; j < 8 && o + j < src_channels; j++) {
960bf215546Sopenharmony_ci                  alu->src[i].swizzle[o + j] =
961bf215546Sopenharmony_ci                     (value >> (4 * j)) & 0xf; /* 4 bits per swizzle */
962bf215546Sopenharmony_ci               }
963bf215546Sopenharmony_ci            }
964bf215546Sopenharmony_ci         }
965bf215546Sopenharmony_ci      }
966bf215546Sopenharmony_ci   }
967bf215546Sopenharmony_ci
968bf215546Sopenharmony_ci   if (header.alu.packed_src_ssa_16bit &&
969bf215546Sopenharmony_ci       alu->dest.dest.is_ssa) {
970bf215546Sopenharmony_ci      alu->src[0].swizzle[0] = header.alu.writemask_or_two_swizzles & 0x3;
971bf215546Sopenharmony_ci      if (num_srcs > 1)
972bf215546Sopenharmony_ci         alu->src[1].swizzle[0] = header.alu.writemask_or_two_swizzles >> 2;
973bf215546Sopenharmony_ci   }
974bf215546Sopenharmony_ci
975bf215546Sopenharmony_ci   return alu;
976bf215546Sopenharmony_ci}
977bf215546Sopenharmony_ci
978bf215546Sopenharmony_ci#define MODE_ENC_GENERIC_BIT (1 << 4)
979bf215546Sopenharmony_ci
980bf215546Sopenharmony_cistatic nir_variable_mode
981bf215546Sopenharmony_cidecode_deref_modes(unsigned modes)
982bf215546Sopenharmony_ci{
983bf215546Sopenharmony_ci   if (modes & MODE_ENC_GENERIC_BIT) {
984bf215546Sopenharmony_ci      modes &= ~MODE_ENC_GENERIC_BIT;
985bf215546Sopenharmony_ci      return modes << (ffs(nir_var_mem_generic) - 1);
986bf215546Sopenharmony_ci   } else {
987bf215546Sopenharmony_ci      return 1 << modes;
988bf215546Sopenharmony_ci   }
989bf215546Sopenharmony_ci}
990bf215546Sopenharmony_ci
991bf215546Sopenharmony_cistatic unsigned
992bf215546Sopenharmony_ciencode_deref_modes(nir_variable_mode modes)
993bf215546Sopenharmony_ci{
994bf215546Sopenharmony_ci   /* Mode sets on derefs generally come in two forms.  For certain OpenCL
995bf215546Sopenharmony_ci    * cases, we can have more than one of the generic modes set.  In this
996bf215546Sopenharmony_ci    * case, we need the full bitfield.  Fortunately, there are only 4 of
997bf215546Sopenharmony_ci    * these.  For all other modes, we can only have one mode at a time so we
998bf215546Sopenharmony_ci    * can compress them by only storing the bit position.  This, plus one bit
999bf215546Sopenharmony_ci    * to select encoding, lets us pack the entire bitfield in 5 bits.
1000bf215546Sopenharmony_ci    */
1001bf215546Sopenharmony_ci   STATIC_ASSERT((nir_var_all & ~nir_var_mem_generic) <
1002bf215546Sopenharmony_ci                 (1 << MODE_ENC_GENERIC_BIT));
1003bf215546Sopenharmony_ci
1004bf215546Sopenharmony_ci   unsigned enc;
1005bf215546Sopenharmony_ci   if (modes == 0 || (modes & nir_var_mem_generic)) {
1006bf215546Sopenharmony_ci      assert(!(modes & ~nir_var_mem_generic));
1007bf215546Sopenharmony_ci      enc = modes >> (ffs(nir_var_mem_generic) - 1);
1008bf215546Sopenharmony_ci      assert(enc < MODE_ENC_GENERIC_BIT);
1009bf215546Sopenharmony_ci      enc |= MODE_ENC_GENERIC_BIT;
1010bf215546Sopenharmony_ci   } else {
1011bf215546Sopenharmony_ci      assert(util_is_power_of_two_nonzero(modes));
1012bf215546Sopenharmony_ci      enc = ffs(modes) - 1;
1013bf215546Sopenharmony_ci      assert(enc < MODE_ENC_GENERIC_BIT);
1014bf215546Sopenharmony_ci   }
1015bf215546Sopenharmony_ci   assert(modes == decode_deref_modes(enc));
1016bf215546Sopenharmony_ci   return enc;
1017bf215546Sopenharmony_ci}
1018bf215546Sopenharmony_ci
1019bf215546Sopenharmony_cistatic void
1020bf215546Sopenharmony_ciwrite_deref(write_ctx *ctx, const nir_deref_instr *deref)
1021bf215546Sopenharmony_ci{
1022bf215546Sopenharmony_ci   assert(deref->deref_type < 8);
1023bf215546Sopenharmony_ci
1024bf215546Sopenharmony_ci   union packed_instr header;
1025bf215546Sopenharmony_ci   header.u32 = 0;
1026bf215546Sopenharmony_ci
1027bf215546Sopenharmony_ci   header.deref.instr_type = deref->instr.type;
1028bf215546Sopenharmony_ci   header.deref.deref_type = deref->deref_type;
1029bf215546Sopenharmony_ci
1030bf215546Sopenharmony_ci   if (deref->deref_type == nir_deref_type_cast) {
1031bf215546Sopenharmony_ci      header.deref.modes = encode_deref_modes(deref->modes);
1032bf215546Sopenharmony_ci      header.deref.cast_type_same_as_last = deref->type == ctx->last_type;
1033bf215546Sopenharmony_ci   }
1034bf215546Sopenharmony_ci
1035bf215546Sopenharmony_ci   unsigned var_idx = 0;
1036bf215546Sopenharmony_ci   if (deref->deref_type == nir_deref_type_var) {
1037bf215546Sopenharmony_ci      var_idx = write_lookup_object(ctx, deref->var);
1038bf215546Sopenharmony_ci      if (var_idx && var_idx < (1 << 16))
1039bf215546Sopenharmony_ci         header.deref_var.object_idx = var_idx;
1040bf215546Sopenharmony_ci   }
1041bf215546Sopenharmony_ci
1042bf215546Sopenharmony_ci   if (deref->deref_type == nir_deref_type_array ||
1043bf215546Sopenharmony_ci       deref->deref_type == nir_deref_type_ptr_as_array) {
1044bf215546Sopenharmony_ci      header.deref.packed_src_ssa_16bit =
1045bf215546Sopenharmony_ci         deref->parent.is_ssa && deref->arr.index.is_ssa &&
1046bf215546Sopenharmony_ci         are_object_ids_16bit(ctx);
1047bf215546Sopenharmony_ci
1048bf215546Sopenharmony_ci      header.deref.in_bounds = deref->arr.in_bounds;
1049bf215546Sopenharmony_ci   }
1050bf215546Sopenharmony_ci
1051bf215546Sopenharmony_ci   write_dest(ctx, &deref->dest, header, deref->instr.type);
1052bf215546Sopenharmony_ci
1053bf215546Sopenharmony_ci   switch (deref->deref_type) {
1054bf215546Sopenharmony_ci   case nir_deref_type_var:
1055bf215546Sopenharmony_ci      if (!header.deref_var.object_idx)
1056bf215546Sopenharmony_ci         blob_write_uint32(ctx->blob, var_idx);
1057bf215546Sopenharmony_ci      break;
1058bf215546Sopenharmony_ci
1059bf215546Sopenharmony_ci   case nir_deref_type_struct:
1060bf215546Sopenharmony_ci      write_src(ctx, &deref->parent);
1061bf215546Sopenharmony_ci      blob_write_uint32(ctx->blob, deref->strct.index);
1062bf215546Sopenharmony_ci      break;
1063bf215546Sopenharmony_ci
1064bf215546Sopenharmony_ci   case nir_deref_type_array:
1065bf215546Sopenharmony_ci   case nir_deref_type_ptr_as_array:
1066bf215546Sopenharmony_ci      if (header.deref.packed_src_ssa_16bit) {
1067bf215546Sopenharmony_ci         blob_write_uint16(ctx->blob,
1068bf215546Sopenharmony_ci                           write_lookup_object(ctx, deref->parent.ssa));
1069bf215546Sopenharmony_ci         blob_write_uint16(ctx->blob,
1070bf215546Sopenharmony_ci                           write_lookup_object(ctx, deref->arr.index.ssa));
1071bf215546Sopenharmony_ci      } else {
1072bf215546Sopenharmony_ci         write_src(ctx, &deref->parent);
1073bf215546Sopenharmony_ci         write_src(ctx, &deref->arr.index);
1074bf215546Sopenharmony_ci      }
1075bf215546Sopenharmony_ci      break;
1076bf215546Sopenharmony_ci
1077bf215546Sopenharmony_ci   case nir_deref_type_cast:
1078bf215546Sopenharmony_ci      write_src(ctx, &deref->parent);
1079bf215546Sopenharmony_ci      blob_write_uint32(ctx->blob, deref->cast.ptr_stride);
1080bf215546Sopenharmony_ci      blob_write_uint32(ctx->blob, deref->cast.align_mul);
1081bf215546Sopenharmony_ci      blob_write_uint32(ctx->blob, deref->cast.align_offset);
1082bf215546Sopenharmony_ci      if (!header.deref.cast_type_same_as_last) {
1083bf215546Sopenharmony_ci         encode_type_to_blob(ctx->blob, deref->type);
1084bf215546Sopenharmony_ci         ctx->last_type = deref->type;
1085bf215546Sopenharmony_ci      }
1086bf215546Sopenharmony_ci      break;
1087bf215546Sopenharmony_ci
1088bf215546Sopenharmony_ci   case nir_deref_type_array_wildcard:
1089bf215546Sopenharmony_ci      write_src(ctx, &deref->parent);
1090bf215546Sopenharmony_ci      break;
1091bf215546Sopenharmony_ci
1092bf215546Sopenharmony_ci   default:
1093bf215546Sopenharmony_ci      unreachable("Invalid deref type");
1094bf215546Sopenharmony_ci   }
1095bf215546Sopenharmony_ci}
1096bf215546Sopenharmony_ci
1097bf215546Sopenharmony_cistatic nir_deref_instr *
1098bf215546Sopenharmony_ciread_deref(read_ctx *ctx, union packed_instr header)
1099bf215546Sopenharmony_ci{
1100bf215546Sopenharmony_ci   nir_deref_type deref_type = header.deref.deref_type;
1101bf215546Sopenharmony_ci   nir_deref_instr *deref = nir_deref_instr_create(ctx->nir, deref_type);
1102bf215546Sopenharmony_ci
1103bf215546Sopenharmony_ci   read_dest(ctx, &deref->dest, &deref->instr, header);
1104bf215546Sopenharmony_ci
1105bf215546Sopenharmony_ci   nir_deref_instr *parent;
1106bf215546Sopenharmony_ci
1107bf215546Sopenharmony_ci   switch (deref->deref_type) {
1108bf215546Sopenharmony_ci   case nir_deref_type_var:
1109bf215546Sopenharmony_ci      if (header.deref_var.object_idx)
1110bf215546Sopenharmony_ci         deref->var = read_lookup_object(ctx, header.deref_var.object_idx);
1111bf215546Sopenharmony_ci      else
1112bf215546Sopenharmony_ci         deref->var = read_object(ctx);
1113bf215546Sopenharmony_ci
1114bf215546Sopenharmony_ci      deref->type = deref->var->type;
1115bf215546Sopenharmony_ci      break;
1116bf215546Sopenharmony_ci
1117bf215546Sopenharmony_ci   case nir_deref_type_struct:
1118bf215546Sopenharmony_ci      read_src(ctx, &deref->parent, &deref->instr);
1119bf215546Sopenharmony_ci      parent = nir_src_as_deref(deref->parent);
1120bf215546Sopenharmony_ci      deref->strct.index = blob_read_uint32(ctx->blob);
1121bf215546Sopenharmony_ci      deref->type = glsl_get_struct_field(parent->type, deref->strct.index);
1122bf215546Sopenharmony_ci      break;
1123bf215546Sopenharmony_ci
1124bf215546Sopenharmony_ci   case nir_deref_type_array:
1125bf215546Sopenharmony_ci   case nir_deref_type_ptr_as_array:
1126bf215546Sopenharmony_ci      if (header.deref.packed_src_ssa_16bit) {
1127bf215546Sopenharmony_ci         deref->parent.is_ssa = true;
1128bf215546Sopenharmony_ci         deref->parent.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
1129bf215546Sopenharmony_ci         deref->arr.index.is_ssa = true;
1130bf215546Sopenharmony_ci         deref->arr.index.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
1131bf215546Sopenharmony_ci      } else {
1132bf215546Sopenharmony_ci         read_src(ctx, &deref->parent, &deref->instr);
1133bf215546Sopenharmony_ci         read_src(ctx, &deref->arr.index, &deref->instr);
1134bf215546Sopenharmony_ci      }
1135bf215546Sopenharmony_ci
1136bf215546Sopenharmony_ci      deref->arr.in_bounds = header.deref.in_bounds;
1137bf215546Sopenharmony_ci
1138bf215546Sopenharmony_ci      parent = nir_src_as_deref(deref->parent);
1139bf215546Sopenharmony_ci      if (deref->deref_type == nir_deref_type_array)
1140bf215546Sopenharmony_ci         deref->type = glsl_get_array_element(parent->type);
1141bf215546Sopenharmony_ci      else
1142bf215546Sopenharmony_ci         deref->type = parent->type;
1143bf215546Sopenharmony_ci      break;
1144bf215546Sopenharmony_ci
1145bf215546Sopenharmony_ci   case nir_deref_type_cast:
1146bf215546Sopenharmony_ci      read_src(ctx, &deref->parent, &deref->instr);
1147bf215546Sopenharmony_ci      deref->cast.ptr_stride = blob_read_uint32(ctx->blob);
1148bf215546Sopenharmony_ci      deref->cast.align_mul = blob_read_uint32(ctx->blob);
1149bf215546Sopenharmony_ci      deref->cast.align_offset = blob_read_uint32(ctx->blob);
1150bf215546Sopenharmony_ci      if (header.deref.cast_type_same_as_last) {
1151bf215546Sopenharmony_ci         deref->type = ctx->last_type;
1152bf215546Sopenharmony_ci      } else {
1153bf215546Sopenharmony_ci         deref->type = decode_type_from_blob(ctx->blob);
1154bf215546Sopenharmony_ci         ctx->last_type = deref->type;
1155bf215546Sopenharmony_ci      }
1156bf215546Sopenharmony_ci      break;
1157bf215546Sopenharmony_ci
1158bf215546Sopenharmony_ci   case nir_deref_type_array_wildcard:
1159bf215546Sopenharmony_ci      read_src(ctx, &deref->parent, &deref->instr);
1160bf215546Sopenharmony_ci      parent = nir_src_as_deref(deref->parent);
1161bf215546Sopenharmony_ci      deref->type = glsl_get_array_element(parent->type);
1162bf215546Sopenharmony_ci      break;
1163bf215546Sopenharmony_ci
1164bf215546Sopenharmony_ci   default:
1165bf215546Sopenharmony_ci      unreachable("Invalid deref type");
1166bf215546Sopenharmony_ci   }
1167bf215546Sopenharmony_ci
1168bf215546Sopenharmony_ci   if (deref_type == nir_deref_type_var) {
1169bf215546Sopenharmony_ci      deref->modes = deref->var->data.mode;
1170bf215546Sopenharmony_ci   } else if (deref->deref_type == nir_deref_type_cast) {
1171bf215546Sopenharmony_ci      deref->modes = decode_deref_modes(header.deref.modes);
1172bf215546Sopenharmony_ci   } else {
1173bf215546Sopenharmony_ci      assert(deref->parent.is_ssa);
1174bf215546Sopenharmony_ci      deref->modes = nir_instr_as_deref(deref->parent.ssa->parent_instr)->modes;
1175bf215546Sopenharmony_ci   }
1176bf215546Sopenharmony_ci
1177bf215546Sopenharmony_ci   return deref;
1178bf215546Sopenharmony_ci}
1179bf215546Sopenharmony_ci
1180bf215546Sopenharmony_cistatic void
1181bf215546Sopenharmony_ciwrite_intrinsic(write_ctx *ctx, const nir_intrinsic_instr *intrin)
1182bf215546Sopenharmony_ci{
1183bf215546Sopenharmony_ci   /* 10 bits for nir_intrinsic_op */
1184bf215546Sopenharmony_ci   STATIC_ASSERT(nir_num_intrinsics <= 1024);
1185bf215546Sopenharmony_ci   unsigned num_srcs = nir_intrinsic_infos[intrin->intrinsic].num_srcs;
1186bf215546Sopenharmony_ci   unsigned num_indices = nir_intrinsic_infos[intrin->intrinsic].num_indices;
1187bf215546Sopenharmony_ci   assert(intrin->intrinsic < 1024);
1188bf215546Sopenharmony_ci
1189bf215546Sopenharmony_ci   union packed_instr header;
1190bf215546Sopenharmony_ci   header.u32 = 0;
1191bf215546Sopenharmony_ci
1192bf215546Sopenharmony_ci   header.intrinsic.instr_type = intrin->instr.type;
1193bf215546Sopenharmony_ci   header.intrinsic.intrinsic = intrin->intrinsic;
1194bf215546Sopenharmony_ci
1195bf215546Sopenharmony_ci   /* Analyze constant indices to decide how to encode them. */
1196bf215546Sopenharmony_ci   if (num_indices) {
1197bf215546Sopenharmony_ci      unsigned max_bits = 0;
1198bf215546Sopenharmony_ci      for (unsigned i = 0; i < num_indices; i++) {
1199bf215546Sopenharmony_ci         unsigned max = util_last_bit(intrin->const_index[i]);
1200bf215546Sopenharmony_ci         max_bits = MAX2(max_bits, max);
1201bf215546Sopenharmony_ci      }
1202bf215546Sopenharmony_ci
1203bf215546Sopenharmony_ci      if (max_bits * num_indices <= 8) {
1204bf215546Sopenharmony_ci         header.intrinsic.const_indices_encoding = const_indices_all_combined;
1205bf215546Sopenharmony_ci
1206bf215546Sopenharmony_ci         /* Pack all const indices into 8 bits. */
1207bf215546Sopenharmony_ci         unsigned bit_size = 8 / num_indices;
1208bf215546Sopenharmony_ci         for (unsigned i = 0; i < num_indices; i++) {
1209bf215546Sopenharmony_ci            header.intrinsic.packed_const_indices |=
1210bf215546Sopenharmony_ci               intrin->const_index[i] << (i * bit_size);
1211bf215546Sopenharmony_ci         }
1212bf215546Sopenharmony_ci      } else if (max_bits <= 8)
1213bf215546Sopenharmony_ci         header.intrinsic.const_indices_encoding = const_indices_8bit;
1214bf215546Sopenharmony_ci      else if (max_bits <= 16)
1215bf215546Sopenharmony_ci         header.intrinsic.const_indices_encoding = const_indices_16bit;
1216bf215546Sopenharmony_ci      else
1217bf215546Sopenharmony_ci         header.intrinsic.const_indices_encoding = const_indices_32bit;
1218bf215546Sopenharmony_ci   }
1219bf215546Sopenharmony_ci
1220bf215546Sopenharmony_ci   if (nir_intrinsic_infos[intrin->intrinsic].has_dest)
1221bf215546Sopenharmony_ci      write_dest(ctx, &intrin->dest, header, intrin->instr.type);
1222bf215546Sopenharmony_ci   else
1223bf215546Sopenharmony_ci      blob_write_uint32(ctx->blob, header.u32);
1224bf215546Sopenharmony_ci
1225bf215546Sopenharmony_ci   for (unsigned i = 0; i < num_srcs; i++)
1226bf215546Sopenharmony_ci      write_src(ctx, &intrin->src[i]);
1227bf215546Sopenharmony_ci
1228bf215546Sopenharmony_ci   if (num_indices) {
1229bf215546Sopenharmony_ci      switch (header.intrinsic.const_indices_encoding) {
1230bf215546Sopenharmony_ci      case const_indices_8bit:
1231bf215546Sopenharmony_ci         for (unsigned i = 0; i < num_indices; i++)
1232bf215546Sopenharmony_ci            blob_write_uint8(ctx->blob, intrin->const_index[i]);
1233bf215546Sopenharmony_ci         break;
1234bf215546Sopenharmony_ci      case const_indices_16bit:
1235bf215546Sopenharmony_ci         for (unsigned i = 0; i < num_indices; i++)
1236bf215546Sopenharmony_ci            blob_write_uint16(ctx->blob, intrin->const_index[i]);
1237bf215546Sopenharmony_ci         break;
1238bf215546Sopenharmony_ci      case const_indices_32bit:
1239bf215546Sopenharmony_ci         for (unsigned i = 0; i < num_indices; i++)
1240bf215546Sopenharmony_ci            blob_write_uint32(ctx->blob, intrin->const_index[i]);
1241bf215546Sopenharmony_ci         break;
1242bf215546Sopenharmony_ci      }
1243bf215546Sopenharmony_ci   }
1244bf215546Sopenharmony_ci}
1245bf215546Sopenharmony_ci
1246bf215546Sopenharmony_cistatic nir_intrinsic_instr *
1247bf215546Sopenharmony_ciread_intrinsic(read_ctx *ctx, union packed_instr header)
1248bf215546Sopenharmony_ci{
1249bf215546Sopenharmony_ci   nir_intrinsic_op op = header.intrinsic.intrinsic;
1250bf215546Sopenharmony_ci   nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(ctx->nir, op);
1251bf215546Sopenharmony_ci
1252bf215546Sopenharmony_ci   unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
1253bf215546Sopenharmony_ci   unsigned num_indices = nir_intrinsic_infos[op].num_indices;
1254bf215546Sopenharmony_ci
1255bf215546Sopenharmony_ci   if (nir_intrinsic_infos[op].has_dest)
1256bf215546Sopenharmony_ci      read_dest(ctx, &intrin->dest, &intrin->instr, header);
1257bf215546Sopenharmony_ci
1258bf215546Sopenharmony_ci   for (unsigned i = 0; i < num_srcs; i++)
1259bf215546Sopenharmony_ci      read_src(ctx, &intrin->src[i], &intrin->instr);
1260bf215546Sopenharmony_ci
1261bf215546Sopenharmony_ci   /* Vectorized instrinsics have num_components same as dst or src that has
1262bf215546Sopenharmony_ci    * 0 components in the info. Find it.
1263bf215546Sopenharmony_ci    */
1264bf215546Sopenharmony_ci   if (nir_intrinsic_infos[op].has_dest &&
1265bf215546Sopenharmony_ci       nir_intrinsic_infos[op].dest_components == 0) {
1266bf215546Sopenharmony_ci      intrin->num_components = nir_dest_num_components(intrin->dest);
1267bf215546Sopenharmony_ci   } else {
1268bf215546Sopenharmony_ci      for (unsigned i = 0; i < num_srcs; i++) {
1269bf215546Sopenharmony_ci         if (nir_intrinsic_infos[op].src_components[i] == 0) {
1270bf215546Sopenharmony_ci            intrin->num_components = nir_src_num_components(intrin->src[i]);
1271bf215546Sopenharmony_ci            break;
1272bf215546Sopenharmony_ci         }
1273bf215546Sopenharmony_ci      }
1274bf215546Sopenharmony_ci   }
1275bf215546Sopenharmony_ci
1276bf215546Sopenharmony_ci   if (num_indices) {
1277bf215546Sopenharmony_ci      switch (header.intrinsic.const_indices_encoding) {
1278bf215546Sopenharmony_ci      case const_indices_all_combined: {
1279bf215546Sopenharmony_ci         unsigned bit_size = 8 / num_indices;
1280bf215546Sopenharmony_ci         unsigned bit_mask = u_bit_consecutive(0, bit_size);
1281bf215546Sopenharmony_ci         for (unsigned i = 0; i < num_indices; i++) {
1282bf215546Sopenharmony_ci            intrin->const_index[i] =
1283bf215546Sopenharmony_ci               (header.intrinsic.packed_const_indices >> (i * bit_size)) &
1284bf215546Sopenharmony_ci               bit_mask;
1285bf215546Sopenharmony_ci         }
1286bf215546Sopenharmony_ci         break;
1287bf215546Sopenharmony_ci      }
1288bf215546Sopenharmony_ci      case const_indices_8bit:
1289bf215546Sopenharmony_ci         for (unsigned i = 0; i < num_indices; i++)
1290bf215546Sopenharmony_ci            intrin->const_index[i] = blob_read_uint8(ctx->blob);
1291bf215546Sopenharmony_ci         break;
1292bf215546Sopenharmony_ci      case const_indices_16bit:
1293bf215546Sopenharmony_ci         for (unsigned i = 0; i < num_indices; i++)
1294bf215546Sopenharmony_ci            intrin->const_index[i] = blob_read_uint16(ctx->blob);
1295bf215546Sopenharmony_ci         break;
1296bf215546Sopenharmony_ci      case const_indices_32bit:
1297bf215546Sopenharmony_ci         for (unsigned i = 0; i < num_indices; i++)
1298bf215546Sopenharmony_ci            intrin->const_index[i] = blob_read_uint32(ctx->blob);
1299bf215546Sopenharmony_ci         break;
1300bf215546Sopenharmony_ci      }
1301bf215546Sopenharmony_ci   }
1302bf215546Sopenharmony_ci
1303bf215546Sopenharmony_ci   return intrin;
1304bf215546Sopenharmony_ci}
1305bf215546Sopenharmony_ci
1306bf215546Sopenharmony_cistatic void
1307bf215546Sopenharmony_ciwrite_load_const(write_ctx *ctx, const nir_load_const_instr *lc)
1308bf215546Sopenharmony_ci{
1309bf215546Sopenharmony_ci   assert(lc->def.num_components >= 1 && lc->def.num_components <= 16);
1310bf215546Sopenharmony_ci   union packed_instr header;
1311bf215546Sopenharmony_ci   header.u32 = 0;
1312bf215546Sopenharmony_ci
1313bf215546Sopenharmony_ci   header.load_const.instr_type = lc->instr.type;
1314bf215546Sopenharmony_ci   header.load_const.last_component = lc->def.num_components - 1;
1315bf215546Sopenharmony_ci   header.load_const.bit_size = encode_bit_size_3bits(lc->def.bit_size);
1316bf215546Sopenharmony_ci   header.load_const.packing = load_const_full;
1317bf215546Sopenharmony_ci
1318bf215546Sopenharmony_ci   /* Try to pack 1-component constants into the 19 free bits in the header. */
1319bf215546Sopenharmony_ci   if (lc->def.num_components == 1) {
1320bf215546Sopenharmony_ci      switch (lc->def.bit_size) {
1321bf215546Sopenharmony_ci      case 64:
1322bf215546Sopenharmony_ci         if ((lc->value[0].u64 & 0x1fffffffffffull) == 0) {
1323bf215546Sopenharmony_ci            /* packed_value contains high 19 bits, low bits are 0 */
1324bf215546Sopenharmony_ci            header.load_const.packing = load_const_scalar_hi_19bits;
1325bf215546Sopenharmony_ci            header.load_const.packed_value = lc->value[0].u64 >> 45;
1326bf215546Sopenharmony_ci         } else if (util_mask_sign_extend(lc->value[0].i64, 19) == lc->value[0].i64) {
1327bf215546Sopenharmony_ci            /* packed_value contains low 19 bits, high bits are sign-extended */
1328bf215546Sopenharmony_ci            header.load_const.packing = load_const_scalar_lo_19bits_sext;
1329bf215546Sopenharmony_ci            header.load_const.packed_value = lc->value[0].u64;
1330bf215546Sopenharmony_ci         }
1331bf215546Sopenharmony_ci         break;
1332bf215546Sopenharmony_ci
1333bf215546Sopenharmony_ci      case 32:
1334bf215546Sopenharmony_ci         if ((lc->value[0].u32 & 0x1fff) == 0) {
1335bf215546Sopenharmony_ci            header.load_const.packing = load_const_scalar_hi_19bits;
1336bf215546Sopenharmony_ci            header.load_const.packed_value = lc->value[0].u32 >> 13;
1337bf215546Sopenharmony_ci         } else if (util_mask_sign_extend(lc->value[0].i32, 19) == lc->value[0].i32) {
1338bf215546Sopenharmony_ci            header.load_const.packing = load_const_scalar_lo_19bits_sext;
1339bf215546Sopenharmony_ci            header.load_const.packed_value = lc->value[0].u32;
1340bf215546Sopenharmony_ci         }
1341bf215546Sopenharmony_ci         break;
1342bf215546Sopenharmony_ci
1343bf215546Sopenharmony_ci      case 16:
1344bf215546Sopenharmony_ci         header.load_const.packing = load_const_scalar_lo_19bits_sext;
1345bf215546Sopenharmony_ci         header.load_const.packed_value = lc->value[0].u16;
1346bf215546Sopenharmony_ci         break;
1347bf215546Sopenharmony_ci      case 8:
1348bf215546Sopenharmony_ci         header.load_const.packing = load_const_scalar_lo_19bits_sext;
1349bf215546Sopenharmony_ci         header.load_const.packed_value = lc->value[0].u8;
1350bf215546Sopenharmony_ci         break;
1351bf215546Sopenharmony_ci      case 1:
1352bf215546Sopenharmony_ci         header.load_const.packing = load_const_scalar_lo_19bits_sext;
1353bf215546Sopenharmony_ci         header.load_const.packed_value = lc->value[0].b;
1354bf215546Sopenharmony_ci         break;
1355bf215546Sopenharmony_ci      default:
1356bf215546Sopenharmony_ci         unreachable("invalid bit_size");
1357bf215546Sopenharmony_ci      }
1358bf215546Sopenharmony_ci   }
1359bf215546Sopenharmony_ci
1360bf215546Sopenharmony_ci   blob_write_uint32(ctx->blob, header.u32);
1361bf215546Sopenharmony_ci
1362bf215546Sopenharmony_ci   if (header.load_const.packing == load_const_full) {
1363bf215546Sopenharmony_ci      switch (lc->def.bit_size) {
1364bf215546Sopenharmony_ci      case 64:
1365bf215546Sopenharmony_ci         blob_write_bytes(ctx->blob, lc->value,
1366bf215546Sopenharmony_ci                          sizeof(*lc->value) * lc->def.num_components);
1367bf215546Sopenharmony_ci         break;
1368bf215546Sopenharmony_ci
1369bf215546Sopenharmony_ci      case 32:
1370bf215546Sopenharmony_ci         for (unsigned i = 0; i < lc->def.num_components; i++)
1371bf215546Sopenharmony_ci            blob_write_uint32(ctx->blob, lc->value[i].u32);
1372bf215546Sopenharmony_ci         break;
1373bf215546Sopenharmony_ci
1374bf215546Sopenharmony_ci      case 16:
1375bf215546Sopenharmony_ci         for (unsigned i = 0; i < lc->def.num_components; i++)
1376bf215546Sopenharmony_ci            blob_write_uint16(ctx->blob, lc->value[i].u16);
1377bf215546Sopenharmony_ci         break;
1378bf215546Sopenharmony_ci
1379bf215546Sopenharmony_ci      default:
1380bf215546Sopenharmony_ci         assert(lc->def.bit_size <= 8);
1381bf215546Sopenharmony_ci         for (unsigned i = 0; i < lc->def.num_components; i++)
1382bf215546Sopenharmony_ci            blob_write_uint8(ctx->blob, lc->value[i].u8);
1383bf215546Sopenharmony_ci         break;
1384bf215546Sopenharmony_ci      }
1385bf215546Sopenharmony_ci   }
1386bf215546Sopenharmony_ci
1387bf215546Sopenharmony_ci   write_add_object(ctx, &lc->def);
1388bf215546Sopenharmony_ci}
1389bf215546Sopenharmony_ci
1390bf215546Sopenharmony_cistatic nir_load_const_instr *
1391bf215546Sopenharmony_ciread_load_const(read_ctx *ctx, union packed_instr header)
1392bf215546Sopenharmony_ci{
1393bf215546Sopenharmony_ci   nir_load_const_instr *lc =
1394bf215546Sopenharmony_ci      nir_load_const_instr_create(ctx->nir, header.load_const.last_component + 1,
1395bf215546Sopenharmony_ci                                  decode_bit_size_3bits(header.load_const.bit_size));
1396bf215546Sopenharmony_ci   lc->def.divergent = false;
1397bf215546Sopenharmony_ci
1398bf215546Sopenharmony_ci   switch (header.load_const.packing) {
1399bf215546Sopenharmony_ci   case load_const_scalar_hi_19bits:
1400bf215546Sopenharmony_ci      switch (lc->def.bit_size) {
1401bf215546Sopenharmony_ci      case 64:
1402bf215546Sopenharmony_ci         lc->value[0].u64 = (uint64_t)header.load_const.packed_value << 45;
1403bf215546Sopenharmony_ci         break;
1404bf215546Sopenharmony_ci      case 32:
1405bf215546Sopenharmony_ci         lc->value[0].u32 = (uint64_t)header.load_const.packed_value << 13;
1406bf215546Sopenharmony_ci         break;
1407bf215546Sopenharmony_ci      default:
1408bf215546Sopenharmony_ci         unreachable("invalid bit_size");
1409bf215546Sopenharmony_ci      }
1410bf215546Sopenharmony_ci      break;
1411bf215546Sopenharmony_ci
1412bf215546Sopenharmony_ci   case load_const_scalar_lo_19bits_sext:
1413bf215546Sopenharmony_ci      switch (lc->def.bit_size) {
1414bf215546Sopenharmony_ci      case 64:
1415bf215546Sopenharmony_ci         lc->value[0].i64 = ((int64_t)header.load_const.packed_value << 45) >> 45;
1416bf215546Sopenharmony_ci         break;
1417bf215546Sopenharmony_ci      case 32:
1418bf215546Sopenharmony_ci         lc->value[0].i32 = ((int32_t)header.load_const.packed_value << 13) >> 13;
1419bf215546Sopenharmony_ci         break;
1420bf215546Sopenharmony_ci      case 16:
1421bf215546Sopenharmony_ci         lc->value[0].u16 = header.load_const.packed_value;
1422bf215546Sopenharmony_ci         break;
1423bf215546Sopenharmony_ci      case 8:
1424bf215546Sopenharmony_ci         lc->value[0].u8 = header.load_const.packed_value;
1425bf215546Sopenharmony_ci         break;
1426bf215546Sopenharmony_ci      case 1:
1427bf215546Sopenharmony_ci         lc->value[0].b = header.load_const.packed_value;
1428bf215546Sopenharmony_ci         break;
1429bf215546Sopenharmony_ci      default:
1430bf215546Sopenharmony_ci         unreachable("invalid bit_size");
1431bf215546Sopenharmony_ci      }
1432bf215546Sopenharmony_ci      break;
1433bf215546Sopenharmony_ci
1434bf215546Sopenharmony_ci   case load_const_full:
1435bf215546Sopenharmony_ci      switch (lc->def.bit_size) {
1436bf215546Sopenharmony_ci      case 64:
1437bf215546Sopenharmony_ci         blob_copy_bytes(ctx->blob, lc->value, sizeof(*lc->value) * lc->def.num_components);
1438bf215546Sopenharmony_ci         break;
1439bf215546Sopenharmony_ci
1440bf215546Sopenharmony_ci      case 32:
1441bf215546Sopenharmony_ci         for (unsigned i = 0; i < lc->def.num_components; i++)
1442bf215546Sopenharmony_ci            lc->value[i].u32 = blob_read_uint32(ctx->blob);
1443bf215546Sopenharmony_ci         break;
1444bf215546Sopenharmony_ci
1445bf215546Sopenharmony_ci      case 16:
1446bf215546Sopenharmony_ci         for (unsigned i = 0; i < lc->def.num_components; i++)
1447bf215546Sopenharmony_ci            lc->value[i].u16 = blob_read_uint16(ctx->blob);
1448bf215546Sopenharmony_ci         break;
1449bf215546Sopenharmony_ci
1450bf215546Sopenharmony_ci      default:
1451bf215546Sopenharmony_ci         assert(lc->def.bit_size <= 8);
1452bf215546Sopenharmony_ci         for (unsigned i = 0; i < lc->def.num_components; i++)
1453bf215546Sopenharmony_ci            lc->value[i].u8 = blob_read_uint8(ctx->blob);
1454bf215546Sopenharmony_ci         break;
1455bf215546Sopenharmony_ci      }
1456bf215546Sopenharmony_ci      break;
1457bf215546Sopenharmony_ci   }
1458bf215546Sopenharmony_ci
1459bf215546Sopenharmony_ci   read_add_object(ctx, &lc->def);
1460bf215546Sopenharmony_ci   return lc;
1461bf215546Sopenharmony_ci}
1462bf215546Sopenharmony_ci
1463bf215546Sopenharmony_cistatic void
1464bf215546Sopenharmony_ciwrite_ssa_undef(write_ctx *ctx, const nir_ssa_undef_instr *undef)
1465bf215546Sopenharmony_ci{
1466bf215546Sopenharmony_ci   assert(undef->def.num_components >= 1 && undef->def.num_components <= 16);
1467bf215546Sopenharmony_ci
1468bf215546Sopenharmony_ci   union packed_instr header;
1469bf215546Sopenharmony_ci   header.u32 = 0;
1470bf215546Sopenharmony_ci
1471bf215546Sopenharmony_ci   header.undef.instr_type = undef->instr.type;
1472bf215546Sopenharmony_ci   header.undef.last_component = undef->def.num_components - 1;
1473bf215546Sopenharmony_ci   header.undef.bit_size = encode_bit_size_3bits(undef->def.bit_size);
1474bf215546Sopenharmony_ci
1475bf215546Sopenharmony_ci   blob_write_uint32(ctx->blob, header.u32);
1476bf215546Sopenharmony_ci   write_add_object(ctx, &undef->def);
1477bf215546Sopenharmony_ci}
1478bf215546Sopenharmony_ci
1479bf215546Sopenharmony_cistatic nir_ssa_undef_instr *
1480bf215546Sopenharmony_ciread_ssa_undef(read_ctx *ctx, union packed_instr header)
1481bf215546Sopenharmony_ci{
1482bf215546Sopenharmony_ci   nir_ssa_undef_instr *undef =
1483bf215546Sopenharmony_ci      nir_ssa_undef_instr_create(ctx->nir, header.undef.last_component + 1,
1484bf215546Sopenharmony_ci                                 decode_bit_size_3bits(header.undef.bit_size));
1485bf215546Sopenharmony_ci
1486bf215546Sopenharmony_ci   undef->def.divergent = false;
1487bf215546Sopenharmony_ci
1488bf215546Sopenharmony_ci   read_add_object(ctx, &undef->def);
1489bf215546Sopenharmony_ci   return undef;
1490bf215546Sopenharmony_ci}
1491bf215546Sopenharmony_ci
1492bf215546Sopenharmony_ciunion packed_tex_data {
1493bf215546Sopenharmony_ci   uint32_t u32;
1494bf215546Sopenharmony_ci   struct {
1495bf215546Sopenharmony_ci      unsigned sampler_dim:4;
1496bf215546Sopenharmony_ci      unsigned dest_type:8;
1497bf215546Sopenharmony_ci      unsigned coord_components:3;
1498bf215546Sopenharmony_ci      unsigned is_array:1;
1499bf215546Sopenharmony_ci      unsigned is_shadow:1;
1500bf215546Sopenharmony_ci      unsigned is_new_style_shadow:1;
1501bf215546Sopenharmony_ci      unsigned is_sparse:1;
1502bf215546Sopenharmony_ci      unsigned component:2;
1503bf215546Sopenharmony_ci      unsigned texture_non_uniform:1;
1504bf215546Sopenharmony_ci      unsigned sampler_non_uniform:1;
1505bf215546Sopenharmony_ci      unsigned array_is_lowered_cube:1;
1506bf215546Sopenharmony_ci      unsigned unused:6; /* Mark unused for valgrind. */
1507bf215546Sopenharmony_ci   } u;
1508bf215546Sopenharmony_ci};
1509bf215546Sopenharmony_ci
1510bf215546Sopenharmony_cistatic void
1511bf215546Sopenharmony_ciwrite_tex(write_ctx *ctx, const nir_tex_instr *tex)
1512bf215546Sopenharmony_ci{
1513bf215546Sopenharmony_ci   assert(tex->num_srcs < 16);
1514bf215546Sopenharmony_ci   assert(tex->op < 32);
1515bf215546Sopenharmony_ci
1516bf215546Sopenharmony_ci   union packed_instr header;
1517bf215546Sopenharmony_ci   header.u32 = 0;
1518bf215546Sopenharmony_ci
1519bf215546Sopenharmony_ci   header.tex.instr_type = tex->instr.type;
1520bf215546Sopenharmony_ci   header.tex.num_srcs = tex->num_srcs;
1521bf215546Sopenharmony_ci   header.tex.op = tex->op;
1522bf215546Sopenharmony_ci
1523bf215546Sopenharmony_ci   write_dest(ctx, &tex->dest, header, tex->instr.type);
1524bf215546Sopenharmony_ci
1525bf215546Sopenharmony_ci   blob_write_uint32(ctx->blob, tex->texture_index);
1526bf215546Sopenharmony_ci   blob_write_uint32(ctx->blob, tex->sampler_index);
1527bf215546Sopenharmony_ci   if (tex->op == nir_texop_tg4)
1528bf215546Sopenharmony_ci      blob_write_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
1529bf215546Sopenharmony_ci
1530bf215546Sopenharmony_ci   STATIC_ASSERT(sizeof(union packed_tex_data) == sizeof(uint32_t));
1531bf215546Sopenharmony_ci   union packed_tex_data packed = {
1532bf215546Sopenharmony_ci      .u.sampler_dim = tex->sampler_dim,
1533bf215546Sopenharmony_ci      .u.dest_type = tex->dest_type,
1534bf215546Sopenharmony_ci      .u.coord_components = tex->coord_components,
1535bf215546Sopenharmony_ci      .u.is_array = tex->is_array,
1536bf215546Sopenharmony_ci      .u.is_shadow = tex->is_shadow,
1537bf215546Sopenharmony_ci      .u.is_new_style_shadow = tex->is_new_style_shadow,
1538bf215546Sopenharmony_ci      .u.is_sparse = tex->is_sparse,
1539bf215546Sopenharmony_ci      .u.component = tex->component,
1540bf215546Sopenharmony_ci      .u.texture_non_uniform = tex->texture_non_uniform,
1541bf215546Sopenharmony_ci      .u.sampler_non_uniform = tex->sampler_non_uniform,
1542bf215546Sopenharmony_ci      .u.array_is_lowered_cube = tex->array_is_lowered_cube,
1543bf215546Sopenharmony_ci   };
1544bf215546Sopenharmony_ci   blob_write_uint32(ctx->blob, packed.u32);
1545bf215546Sopenharmony_ci
1546bf215546Sopenharmony_ci   for (unsigned i = 0; i < tex->num_srcs; i++) {
1547bf215546Sopenharmony_ci      union packed_src src;
1548bf215546Sopenharmony_ci      src.u32 = 0;
1549bf215546Sopenharmony_ci      src.tex.src_type = tex->src[i].src_type;
1550bf215546Sopenharmony_ci      write_src_full(ctx, &tex->src[i].src, src);
1551bf215546Sopenharmony_ci   }
1552bf215546Sopenharmony_ci}
1553bf215546Sopenharmony_ci
1554bf215546Sopenharmony_cistatic nir_tex_instr *
1555bf215546Sopenharmony_ciread_tex(read_ctx *ctx, union packed_instr header)
1556bf215546Sopenharmony_ci{
1557bf215546Sopenharmony_ci   nir_tex_instr *tex = nir_tex_instr_create(ctx->nir, header.tex.num_srcs);
1558bf215546Sopenharmony_ci
1559bf215546Sopenharmony_ci   read_dest(ctx, &tex->dest, &tex->instr, header);
1560bf215546Sopenharmony_ci
1561bf215546Sopenharmony_ci   tex->op = header.tex.op;
1562bf215546Sopenharmony_ci   tex->texture_index = blob_read_uint32(ctx->blob);
1563bf215546Sopenharmony_ci   tex->sampler_index = blob_read_uint32(ctx->blob);
1564bf215546Sopenharmony_ci   if (tex->op == nir_texop_tg4)
1565bf215546Sopenharmony_ci      blob_copy_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
1566bf215546Sopenharmony_ci
1567bf215546Sopenharmony_ci   union packed_tex_data packed;
1568bf215546Sopenharmony_ci   packed.u32 = blob_read_uint32(ctx->blob);
1569bf215546Sopenharmony_ci   tex->sampler_dim = packed.u.sampler_dim;
1570bf215546Sopenharmony_ci   tex->dest_type = packed.u.dest_type;
1571bf215546Sopenharmony_ci   tex->coord_components = packed.u.coord_components;
1572bf215546Sopenharmony_ci   tex->is_array = packed.u.is_array;
1573bf215546Sopenharmony_ci   tex->is_shadow = packed.u.is_shadow;
1574bf215546Sopenharmony_ci   tex->is_new_style_shadow = packed.u.is_new_style_shadow;
1575bf215546Sopenharmony_ci   tex->is_sparse = packed.u.is_sparse;
1576bf215546Sopenharmony_ci   tex->component = packed.u.component;
1577bf215546Sopenharmony_ci   tex->texture_non_uniform = packed.u.texture_non_uniform;
1578bf215546Sopenharmony_ci   tex->sampler_non_uniform = packed.u.sampler_non_uniform;
1579bf215546Sopenharmony_ci   tex->array_is_lowered_cube = packed.u.array_is_lowered_cube;
1580bf215546Sopenharmony_ci
1581bf215546Sopenharmony_ci   for (unsigned i = 0; i < tex->num_srcs; i++) {
1582bf215546Sopenharmony_ci      union packed_src src = read_src(ctx, &tex->src[i].src, &tex->instr);
1583bf215546Sopenharmony_ci      tex->src[i].src_type = src.tex.src_type;
1584bf215546Sopenharmony_ci   }
1585bf215546Sopenharmony_ci
1586bf215546Sopenharmony_ci   return tex;
1587bf215546Sopenharmony_ci}
1588bf215546Sopenharmony_ci
1589bf215546Sopenharmony_cistatic void
1590bf215546Sopenharmony_ciwrite_phi(write_ctx *ctx, const nir_phi_instr *phi)
1591bf215546Sopenharmony_ci{
1592bf215546Sopenharmony_ci   union packed_instr header;
1593bf215546Sopenharmony_ci   header.u32 = 0;
1594bf215546Sopenharmony_ci
1595bf215546Sopenharmony_ci   header.phi.instr_type = phi->instr.type;
1596bf215546Sopenharmony_ci   header.phi.num_srcs = exec_list_length(&phi->srcs);
1597bf215546Sopenharmony_ci
1598bf215546Sopenharmony_ci   /* Phi nodes are special, since they may reference SSA definitions and
1599bf215546Sopenharmony_ci    * basic blocks that don't exist yet. We leave two empty uint32_t's here,
1600bf215546Sopenharmony_ci    * and then store enough information so that a later fixup pass can fill
1601bf215546Sopenharmony_ci    * them in correctly.
1602bf215546Sopenharmony_ci    */
1603bf215546Sopenharmony_ci   write_dest(ctx, &phi->dest, header, phi->instr.type);
1604bf215546Sopenharmony_ci
1605bf215546Sopenharmony_ci   nir_foreach_phi_src(src, phi) {
1606bf215546Sopenharmony_ci      assert(src->src.is_ssa);
1607bf215546Sopenharmony_ci      size_t blob_offset = blob_reserve_uint32(ctx->blob);
1608bf215546Sopenharmony_ci      ASSERTED size_t blob_offset2 = blob_reserve_uint32(ctx->blob);
1609bf215546Sopenharmony_ci      assert(blob_offset + sizeof(uint32_t) == blob_offset2);
1610bf215546Sopenharmony_ci      write_phi_fixup fixup = {
1611bf215546Sopenharmony_ci         .blob_offset = blob_offset,
1612bf215546Sopenharmony_ci         .src = src->src.ssa,
1613bf215546Sopenharmony_ci         .block = src->pred,
1614bf215546Sopenharmony_ci      };
1615bf215546Sopenharmony_ci      util_dynarray_append(&ctx->phi_fixups, write_phi_fixup, fixup);
1616bf215546Sopenharmony_ci   }
1617bf215546Sopenharmony_ci}
1618bf215546Sopenharmony_ci
1619bf215546Sopenharmony_cistatic void
1620bf215546Sopenharmony_ciwrite_fixup_phis(write_ctx *ctx)
1621bf215546Sopenharmony_ci{
1622bf215546Sopenharmony_ci   util_dynarray_foreach(&ctx->phi_fixups, write_phi_fixup, fixup) {
1623bf215546Sopenharmony_ci      blob_overwrite_uint32(ctx->blob, fixup->blob_offset,
1624bf215546Sopenharmony_ci                            write_lookup_object(ctx, fixup->src));
1625bf215546Sopenharmony_ci      blob_overwrite_uint32(ctx->blob, fixup->blob_offset + sizeof(uint32_t),
1626bf215546Sopenharmony_ci                            write_lookup_object(ctx, fixup->block));
1627bf215546Sopenharmony_ci   }
1628bf215546Sopenharmony_ci
1629bf215546Sopenharmony_ci   util_dynarray_clear(&ctx->phi_fixups);
1630bf215546Sopenharmony_ci}
1631bf215546Sopenharmony_ci
1632bf215546Sopenharmony_cistatic nir_phi_instr *
1633bf215546Sopenharmony_ciread_phi(read_ctx *ctx, nir_block *blk, union packed_instr header)
1634bf215546Sopenharmony_ci{
1635bf215546Sopenharmony_ci   nir_phi_instr *phi = nir_phi_instr_create(ctx->nir);
1636bf215546Sopenharmony_ci
1637bf215546Sopenharmony_ci   read_dest(ctx, &phi->dest, &phi->instr, header);
1638bf215546Sopenharmony_ci
1639bf215546Sopenharmony_ci   /* For similar reasons as before, we just store the index directly into the
1640bf215546Sopenharmony_ci    * pointer, and let a later pass resolve the phi sources.
1641bf215546Sopenharmony_ci    *
1642bf215546Sopenharmony_ci    * In order to ensure that the copied sources (which are just the indices
1643bf215546Sopenharmony_ci    * from the blob for now) don't get inserted into the old shader's use-def
1644bf215546Sopenharmony_ci    * lists, we have to add the phi instruction *before* we set up its
1645bf215546Sopenharmony_ci    * sources.
1646bf215546Sopenharmony_ci    */
1647bf215546Sopenharmony_ci   nir_instr_insert_after_block(blk, &phi->instr);
1648bf215546Sopenharmony_ci
1649bf215546Sopenharmony_ci   for (unsigned i = 0; i < header.phi.num_srcs; i++) {
1650bf215546Sopenharmony_ci      nir_ssa_def *def = (nir_ssa_def *)(uintptr_t) blob_read_uint32(ctx->blob);
1651bf215546Sopenharmony_ci      nir_block *pred = (nir_block *)(uintptr_t) blob_read_uint32(ctx->blob);
1652bf215546Sopenharmony_ci      nir_phi_src *src = nir_phi_instr_add_src(phi, pred, nir_src_for_ssa(def));
1653bf215546Sopenharmony_ci
1654bf215546Sopenharmony_ci      /* Since we're not letting nir_insert_instr handle use/def stuff for us,
1655bf215546Sopenharmony_ci       * we have to set the parent_instr manually.  It doesn't really matter
1656bf215546Sopenharmony_ci       * when we do it, so we might as well do it here.
1657bf215546Sopenharmony_ci       */
1658bf215546Sopenharmony_ci      src->src.parent_instr = &phi->instr;
1659bf215546Sopenharmony_ci
1660bf215546Sopenharmony_ci      /* Stash it in the list of phi sources.  We'll walk this list and fix up
1661bf215546Sopenharmony_ci       * sources at the very end of read_function_impl.
1662bf215546Sopenharmony_ci       */
1663bf215546Sopenharmony_ci      list_add(&src->src.use_link, &ctx->phi_srcs);
1664bf215546Sopenharmony_ci   }
1665bf215546Sopenharmony_ci
1666bf215546Sopenharmony_ci   return phi;
1667bf215546Sopenharmony_ci}
1668bf215546Sopenharmony_ci
1669bf215546Sopenharmony_cistatic void
1670bf215546Sopenharmony_ciread_fixup_phis(read_ctx *ctx)
1671bf215546Sopenharmony_ci{
1672bf215546Sopenharmony_ci   list_for_each_entry_safe(nir_phi_src, src, &ctx->phi_srcs, src.use_link) {
1673bf215546Sopenharmony_ci      src->pred = read_lookup_object(ctx, (uintptr_t)src->pred);
1674bf215546Sopenharmony_ci      src->src.ssa = read_lookup_object(ctx, (uintptr_t)src->src.ssa);
1675bf215546Sopenharmony_ci
1676bf215546Sopenharmony_ci      /* Remove from this list */
1677bf215546Sopenharmony_ci      list_del(&src->src.use_link);
1678bf215546Sopenharmony_ci
1679bf215546Sopenharmony_ci      list_addtail(&src->src.use_link, &src->src.ssa->uses);
1680bf215546Sopenharmony_ci   }
1681bf215546Sopenharmony_ci   assert(list_is_empty(&ctx->phi_srcs));
1682bf215546Sopenharmony_ci}
1683bf215546Sopenharmony_ci
1684bf215546Sopenharmony_cistatic void
1685bf215546Sopenharmony_ciwrite_jump(write_ctx *ctx, const nir_jump_instr *jmp)
1686bf215546Sopenharmony_ci{
1687bf215546Sopenharmony_ci   /* These aren't handled because they require special block linking */
1688bf215546Sopenharmony_ci   assert(jmp->type != nir_jump_goto && jmp->type != nir_jump_goto_if);
1689bf215546Sopenharmony_ci
1690bf215546Sopenharmony_ci   assert(jmp->type < 4);
1691bf215546Sopenharmony_ci
1692bf215546Sopenharmony_ci   union packed_instr header;
1693bf215546Sopenharmony_ci   header.u32 = 0;
1694bf215546Sopenharmony_ci
1695bf215546Sopenharmony_ci   header.jump.instr_type = jmp->instr.type;
1696bf215546Sopenharmony_ci   header.jump.type = jmp->type;
1697bf215546Sopenharmony_ci
1698bf215546Sopenharmony_ci   blob_write_uint32(ctx->blob, header.u32);
1699bf215546Sopenharmony_ci}
1700bf215546Sopenharmony_ci
1701bf215546Sopenharmony_cistatic nir_jump_instr *
1702bf215546Sopenharmony_ciread_jump(read_ctx *ctx, union packed_instr header)
1703bf215546Sopenharmony_ci{
1704bf215546Sopenharmony_ci   /* These aren't handled because they require special block linking */
1705bf215546Sopenharmony_ci   assert(header.jump.type != nir_jump_goto &&
1706bf215546Sopenharmony_ci          header.jump.type != nir_jump_goto_if);
1707bf215546Sopenharmony_ci
1708bf215546Sopenharmony_ci   nir_jump_instr *jmp = nir_jump_instr_create(ctx->nir, header.jump.type);
1709bf215546Sopenharmony_ci   return jmp;
1710bf215546Sopenharmony_ci}
1711bf215546Sopenharmony_ci
1712bf215546Sopenharmony_cistatic void
1713bf215546Sopenharmony_ciwrite_call(write_ctx *ctx, const nir_call_instr *call)
1714bf215546Sopenharmony_ci{
1715bf215546Sopenharmony_ci   blob_write_uint32(ctx->blob, write_lookup_object(ctx, call->callee));
1716bf215546Sopenharmony_ci
1717bf215546Sopenharmony_ci   for (unsigned i = 0; i < call->num_params; i++)
1718bf215546Sopenharmony_ci      write_src(ctx, &call->params[i]);
1719bf215546Sopenharmony_ci}
1720bf215546Sopenharmony_ci
1721bf215546Sopenharmony_cistatic nir_call_instr *
1722bf215546Sopenharmony_ciread_call(read_ctx *ctx)
1723bf215546Sopenharmony_ci{
1724bf215546Sopenharmony_ci   nir_function *callee = read_object(ctx);
1725bf215546Sopenharmony_ci   nir_call_instr *call = nir_call_instr_create(ctx->nir, callee);
1726bf215546Sopenharmony_ci
1727bf215546Sopenharmony_ci   for (unsigned i = 0; i < call->num_params; i++)
1728bf215546Sopenharmony_ci      read_src(ctx, &call->params[i], call);
1729bf215546Sopenharmony_ci
1730bf215546Sopenharmony_ci   return call;
1731bf215546Sopenharmony_ci}
1732bf215546Sopenharmony_ci
1733bf215546Sopenharmony_cistatic void
1734bf215546Sopenharmony_ciwrite_instr(write_ctx *ctx, const nir_instr *instr)
1735bf215546Sopenharmony_ci{
1736bf215546Sopenharmony_ci   /* We have only 4 bits for the instruction type. */
1737bf215546Sopenharmony_ci   assert(instr->type < 16);
1738bf215546Sopenharmony_ci
1739bf215546Sopenharmony_ci   switch (instr->type) {
1740bf215546Sopenharmony_ci   case nir_instr_type_alu:
1741bf215546Sopenharmony_ci      write_alu(ctx, nir_instr_as_alu(instr));
1742bf215546Sopenharmony_ci      break;
1743bf215546Sopenharmony_ci   case nir_instr_type_deref:
1744bf215546Sopenharmony_ci      write_deref(ctx, nir_instr_as_deref(instr));
1745bf215546Sopenharmony_ci      break;
1746bf215546Sopenharmony_ci   case nir_instr_type_intrinsic:
1747bf215546Sopenharmony_ci      write_intrinsic(ctx, nir_instr_as_intrinsic(instr));
1748bf215546Sopenharmony_ci      break;
1749bf215546Sopenharmony_ci   case nir_instr_type_load_const:
1750bf215546Sopenharmony_ci      write_load_const(ctx, nir_instr_as_load_const(instr));
1751bf215546Sopenharmony_ci      break;
1752bf215546Sopenharmony_ci   case nir_instr_type_ssa_undef:
1753bf215546Sopenharmony_ci      write_ssa_undef(ctx, nir_instr_as_ssa_undef(instr));
1754bf215546Sopenharmony_ci      break;
1755bf215546Sopenharmony_ci   case nir_instr_type_tex:
1756bf215546Sopenharmony_ci      write_tex(ctx, nir_instr_as_tex(instr));
1757bf215546Sopenharmony_ci      break;
1758bf215546Sopenharmony_ci   case nir_instr_type_phi:
1759bf215546Sopenharmony_ci      write_phi(ctx, nir_instr_as_phi(instr));
1760bf215546Sopenharmony_ci      break;
1761bf215546Sopenharmony_ci   case nir_instr_type_jump:
1762bf215546Sopenharmony_ci      write_jump(ctx, nir_instr_as_jump(instr));
1763bf215546Sopenharmony_ci      break;
1764bf215546Sopenharmony_ci   case nir_instr_type_call:
1765bf215546Sopenharmony_ci      blob_write_uint32(ctx->blob, instr->type);
1766bf215546Sopenharmony_ci      write_call(ctx, nir_instr_as_call(instr));
1767bf215546Sopenharmony_ci      break;
1768bf215546Sopenharmony_ci   case nir_instr_type_parallel_copy:
1769bf215546Sopenharmony_ci      unreachable("Cannot write parallel copies");
1770bf215546Sopenharmony_ci   default:
1771bf215546Sopenharmony_ci      unreachable("bad instr type");
1772bf215546Sopenharmony_ci   }
1773bf215546Sopenharmony_ci}
1774bf215546Sopenharmony_ci
1775bf215546Sopenharmony_ci/* Return the number of instructions read. */
1776bf215546Sopenharmony_cistatic unsigned
1777bf215546Sopenharmony_ciread_instr(read_ctx *ctx, nir_block *block)
1778bf215546Sopenharmony_ci{
1779bf215546Sopenharmony_ci   STATIC_ASSERT(sizeof(union packed_instr) == 4);
1780bf215546Sopenharmony_ci   union packed_instr header;
1781bf215546Sopenharmony_ci   header.u32 = blob_read_uint32(ctx->blob);
1782bf215546Sopenharmony_ci   nir_instr *instr;
1783bf215546Sopenharmony_ci
1784bf215546Sopenharmony_ci   switch (header.any.instr_type) {
1785bf215546Sopenharmony_ci   case nir_instr_type_alu:
1786bf215546Sopenharmony_ci      for (unsigned i = 0; i <= header.alu.num_followup_alu_sharing_header; i++)
1787bf215546Sopenharmony_ci         nir_instr_insert_after_block(block, &read_alu(ctx, header)->instr);
1788bf215546Sopenharmony_ci      return header.alu.num_followup_alu_sharing_header + 1;
1789bf215546Sopenharmony_ci   case nir_instr_type_deref:
1790bf215546Sopenharmony_ci      instr = &read_deref(ctx, header)->instr;
1791bf215546Sopenharmony_ci      break;
1792bf215546Sopenharmony_ci   case nir_instr_type_intrinsic:
1793bf215546Sopenharmony_ci      instr = &read_intrinsic(ctx, header)->instr;
1794bf215546Sopenharmony_ci      break;
1795bf215546Sopenharmony_ci   case nir_instr_type_load_const:
1796bf215546Sopenharmony_ci      instr = &read_load_const(ctx, header)->instr;
1797bf215546Sopenharmony_ci      break;
1798bf215546Sopenharmony_ci   case nir_instr_type_ssa_undef:
1799bf215546Sopenharmony_ci      instr = &read_ssa_undef(ctx, header)->instr;
1800bf215546Sopenharmony_ci      break;
1801bf215546Sopenharmony_ci   case nir_instr_type_tex:
1802bf215546Sopenharmony_ci      instr = &read_tex(ctx, header)->instr;
1803bf215546Sopenharmony_ci      break;
1804bf215546Sopenharmony_ci   case nir_instr_type_phi:
1805bf215546Sopenharmony_ci      /* Phi instructions are a bit of a special case when reading because we
1806bf215546Sopenharmony_ci       * don't want inserting the instruction to automatically handle use/defs
1807bf215546Sopenharmony_ci       * for us.  Instead, we need to wait until all the blocks/instructions
1808bf215546Sopenharmony_ci       * are read so that we can set their sources up.
1809bf215546Sopenharmony_ci       */
1810bf215546Sopenharmony_ci      read_phi(ctx, block, header);
1811bf215546Sopenharmony_ci      return 1;
1812bf215546Sopenharmony_ci   case nir_instr_type_jump:
1813bf215546Sopenharmony_ci      instr = &read_jump(ctx, header)->instr;
1814bf215546Sopenharmony_ci      break;
1815bf215546Sopenharmony_ci   case nir_instr_type_call:
1816bf215546Sopenharmony_ci      instr = &read_call(ctx)->instr;
1817bf215546Sopenharmony_ci      break;
1818bf215546Sopenharmony_ci   case nir_instr_type_parallel_copy:
1819bf215546Sopenharmony_ci      unreachable("Cannot read parallel copies");
1820bf215546Sopenharmony_ci   default:
1821bf215546Sopenharmony_ci      unreachable("bad instr type");
1822bf215546Sopenharmony_ci   }
1823bf215546Sopenharmony_ci
1824bf215546Sopenharmony_ci   nir_instr_insert_after_block(block, instr);
1825bf215546Sopenharmony_ci   return 1;
1826bf215546Sopenharmony_ci}
1827bf215546Sopenharmony_ci
1828bf215546Sopenharmony_cistatic void
1829bf215546Sopenharmony_ciwrite_block(write_ctx *ctx, const nir_block *block)
1830bf215546Sopenharmony_ci{
1831bf215546Sopenharmony_ci   write_add_object(ctx, block);
1832bf215546Sopenharmony_ci   blob_write_uint32(ctx->blob, exec_list_length(&block->instr_list));
1833bf215546Sopenharmony_ci
1834bf215546Sopenharmony_ci   ctx->last_instr_type = ~0;
1835bf215546Sopenharmony_ci   ctx->last_alu_header_offset = 0;
1836bf215546Sopenharmony_ci
1837bf215546Sopenharmony_ci   nir_foreach_instr(instr, block) {
1838bf215546Sopenharmony_ci      write_instr(ctx, instr);
1839bf215546Sopenharmony_ci      ctx->last_instr_type = instr->type;
1840bf215546Sopenharmony_ci   }
1841bf215546Sopenharmony_ci}
1842bf215546Sopenharmony_ci
1843bf215546Sopenharmony_cistatic void
1844bf215546Sopenharmony_ciread_block(read_ctx *ctx, struct exec_list *cf_list)
1845bf215546Sopenharmony_ci{
1846bf215546Sopenharmony_ci   /* Don't actually create a new block.  Just use the one from the tail of
1847bf215546Sopenharmony_ci    * the list.  NIR guarantees that the tail of the list is a block and that
1848bf215546Sopenharmony_ci    * no two blocks are side-by-side in the IR;  It should be empty.
1849bf215546Sopenharmony_ci    */
1850bf215546Sopenharmony_ci   nir_block *block =
1851bf215546Sopenharmony_ci      exec_node_data(nir_block, exec_list_get_tail(cf_list), cf_node.node);
1852bf215546Sopenharmony_ci
1853bf215546Sopenharmony_ci   read_add_object(ctx, block);
1854bf215546Sopenharmony_ci   unsigned num_instrs = blob_read_uint32(ctx->blob);
1855bf215546Sopenharmony_ci   for (unsigned i = 0; i < num_instrs;) {
1856bf215546Sopenharmony_ci      i += read_instr(ctx, block);
1857bf215546Sopenharmony_ci   }
1858bf215546Sopenharmony_ci}
1859bf215546Sopenharmony_ci
1860bf215546Sopenharmony_cistatic void
1861bf215546Sopenharmony_ciwrite_cf_list(write_ctx *ctx, const struct exec_list *cf_list);
1862bf215546Sopenharmony_ci
1863bf215546Sopenharmony_cistatic void
1864bf215546Sopenharmony_ciread_cf_list(read_ctx *ctx, struct exec_list *cf_list);
1865bf215546Sopenharmony_ci
1866bf215546Sopenharmony_cistatic void
1867bf215546Sopenharmony_ciwrite_if(write_ctx *ctx, nir_if *nif)
1868bf215546Sopenharmony_ci{
1869bf215546Sopenharmony_ci   write_src(ctx, &nif->condition);
1870bf215546Sopenharmony_ci   blob_write_uint8(ctx->blob, nif->control);
1871bf215546Sopenharmony_ci
1872bf215546Sopenharmony_ci   write_cf_list(ctx, &nif->then_list);
1873bf215546Sopenharmony_ci   write_cf_list(ctx, &nif->else_list);
1874bf215546Sopenharmony_ci}
1875bf215546Sopenharmony_ci
1876bf215546Sopenharmony_cistatic void
1877bf215546Sopenharmony_ciread_if(read_ctx *ctx, struct exec_list *cf_list)
1878bf215546Sopenharmony_ci{
1879bf215546Sopenharmony_ci   nir_if *nif = nir_if_create(ctx->nir);
1880bf215546Sopenharmony_ci
1881bf215546Sopenharmony_ci   read_src(ctx, &nif->condition, nif);
1882bf215546Sopenharmony_ci   nif->control = blob_read_uint8(ctx->blob);
1883bf215546Sopenharmony_ci
1884bf215546Sopenharmony_ci   nir_cf_node_insert_end(cf_list, &nif->cf_node);
1885bf215546Sopenharmony_ci
1886bf215546Sopenharmony_ci   read_cf_list(ctx, &nif->then_list);
1887bf215546Sopenharmony_ci   read_cf_list(ctx, &nif->else_list);
1888bf215546Sopenharmony_ci}
1889bf215546Sopenharmony_ci
1890bf215546Sopenharmony_cistatic void
1891bf215546Sopenharmony_ciwrite_loop(write_ctx *ctx, nir_loop *loop)
1892bf215546Sopenharmony_ci{
1893bf215546Sopenharmony_ci   blob_write_uint8(ctx->blob, loop->control);
1894bf215546Sopenharmony_ci   blob_write_uint8(ctx->blob, loop->divergent);
1895bf215546Sopenharmony_ci   write_cf_list(ctx, &loop->body);
1896bf215546Sopenharmony_ci}
1897bf215546Sopenharmony_ci
1898bf215546Sopenharmony_cistatic void
1899bf215546Sopenharmony_ciread_loop(read_ctx *ctx, struct exec_list *cf_list)
1900bf215546Sopenharmony_ci{
1901bf215546Sopenharmony_ci   nir_loop *loop = nir_loop_create(ctx->nir);
1902bf215546Sopenharmony_ci
1903bf215546Sopenharmony_ci   nir_cf_node_insert_end(cf_list, &loop->cf_node);
1904bf215546Sopenharmony_ci
1905bf215546Sopenharmony_ci   loop->control = blob_read_uint8(ctx->blob);
1906bf215546Sopenharmony_ci   loop->divergent = blob_read_uint8(ctx->blob);
1907bf215546Sopenharmony_ci   read_cf_list(ctx, &loop->body);
1908bf215546Sopenharmony_ci}
1909bf215546Sopenharmony_ci
1910bf215546Sopenharmony_cistatic void
1911bf215546Sopenharmony_ciwrite_cf_node(write_ctx *ctx, nir_cf_node *cf)
1912bf215546Sopenharmony_ci{
1913bf215546Sopenharmony_ci   blob_write_uint32(ctx->blob, cf->type);
1914bf215546Sopenharmony_ci
1915bf215546Sopenharmony_ci   switch (cf->type) {
1916bf215546Sopenharmony_ci   case nir_cf_node_block:
1917bf215546Sopenharmony_ci      write_block(ctx, nir_cf_node_as_block(cf));
1918bf215546Sopenharmony_ci      break;
1919bf215546Sopenharmony_ci   case nir_cf_node_if:
1920bf215546Sopenharmony_ci      write_if(ctx, nir_cf_node_as_if(cf));
1921bf215546Sopenharmony_ci      break;
1922bf215546Sopenharmony_ci   case nir_cf_node_loop:
1923bf215546Sopenharmony_ci      write_loop(ctx, nir_cf_node_as_loop(cf));
1924bf215546Sopenharmony_ci      break;
1925bf215546Sopenharmony_ci   default:
1926bf215546Sopenharmony_ci      unreachable("bad cf type");
1927bf215546Sopenharmony_ci   }
1928bf215546Sopenharmony_ci}
1929bf215546Sopenharmony_ci
1930bf215546Sopenharmony_cistatic void
1931bf215546Sopenharmony_ciread_cf_node(read_ctx *ctx, struct exec_list *list)
1932bf215546Sopenharmony_ci{
1933bf215546Sopenharmony_ci   nir_cf_node_type type = blob_read_uint32(ctx->blob);
1934bf215546Sopenharmony_ci
1935bf215546Sopenharmony_ci   switch (type) {
1936bf215546Sopenharmony_ci   case nir_cf_node_block:
1937bf215546Sopenharmony_ci      read_block(ctx, list);
1938bf215546Sopenharmony_ci      break;
1939bf215546Sopenharmony_ci   case nir_cf_node_if:
1940bf215546Sopenharmony_ci      read_if(ctx, list);
1941bf215546Sopenharmony_ci      break;
1942bf215546Sopenharmony_ci   case nir_cf_node_loop:
1943bf215546Sopenharmony_ci      read_loop(ctx, list);
1944bf215546Sopenharmony_ci      break;
1945bf215546Sopenharmony_ci   default:
1946bf215546Sopenharmony_ci      unreachable("bad cf type");
1947bf215546Sopenharmony_ci   }
1948bf215546Sopenharmony_ci}
1949bf215546Sopenharmony_ci
1950bf215546Sopenharmony_cistatic void
1951bf215546Sopenharmony_ciwrite_cf_list(write_ctx *ctx, const struct exec_list *cf_list)
1952bf215546Sopenharmony_ci{
1953bf215546Sopenharmony_ci   blob_write_uint32(ctx->blob, exec_list_length(cf_list));
1954bf215546Sopenharmony_ci   foreach_list_typed(nir_cf_node, cf, node, cf_list) {
1955bf215546Sopenharmony_ci      write_cf_node(ctx, cf);
1956bf215546Sopenharmony_ci   }
1957bf215546Sopenharmony_ci}
1958bf215546Sopenharmony_ci
1959bf215546Sopenharmony_cistatic void
1960bf215546Sopenharmony_ciread_cf_list(read_ctx *ctx, struct exec_list *cf_list)
1961bf215546Sopenharmony_ci{
1962bf215546Sopenharmony_ci   uint32_t num_cf_nodes = blob_read_uint32(ctx->blob);
1963bf215546Sopenharmony_ci   for (unsigned i = 0; i < num_cf_nodes; i++)
1964bf215546Sopenharmony_ci      read_cf_node(ctx, cf_list);
1965bf215546Sopenharmony_ci}
1966bf215546Sopenharmony_ci
1967bf215546Sopenharmony_cistatic void
1968bf215546Sopenharmony_ciwrite_function_impl(write_ctx *ctx, const nir_function_impl *fi)
1969bf215546Sopenharmony_ci{
1970bf215546Sopenharmony_ci   blob_write_uint8(ctx->blob, fi->structured);
1971bf215546Sopenharmony_ci   blob_write_uint8(ctx->blob, !!fi->preamble);
1972bf215546Sopenharmony_ci
1973bf215546Sopenharmony_ci   if (fi->preamble)
1974bf215546Sopenharmony_ci      blob_write_uint32(ctx->blob, write_lookup_object(ctx, fi->preamble));
1975bf215546Sopenharmony_ci
1976bf215546Sopenharmony_ci   write_var_list(ctx, &fi->locals);
1977bf215546Sopenharmony_ci   write_reg_list(ctx, &fi->registers);
1978bf215546Sopenharmony_ci   blob_write_uint32(ctx->blob, fi->reg_alloc);
1979bf215546Sopenharmony_ci
1980bf215546Sopenharmony_ci   write_cf_list(ctx, &fi->body);
1981bf215546Sopenharmony_ci   write_fixup_phis(ctx);
1982bf215546Sopenharmony_ci}
1983bf215546Sopenharmony_ci
1984bf215546Sopenharmony_cistatic nir_function_impl *
1985bf215546Sopenharmony_ciread_function_impl(read_ctx *ctx, nir_function *fxn)
1986bf215546Sopenharmony_ci{
1987bf215546Sopenharmony_ci   nir_function_impl *fi = nir_function_impl_create_bare(ctx->nir);
1988bf215546Sopenharmony_ci   fi->function = fxn;
1989bf215546Sopenharmony_ci
1990bf215546Sopenharmony_ci   fi->structured = blob_read_uint8(ctx->blob);
1991bf215546Sopenharmony_ci   bool preamble = blob_read_uint8(ctx->blob);
1992bf215546Sopenharmony_ci
1993bf215546Sopenharmony_ci   if (preamble)
1994bf215546Sopenharmony_ci      fi->preamble = read_object(ctx);
1995bf215546Sopenharmony_ci
1996bf215546Sopenharmony_ci   read_var_list(ctx, &fi->locals);
1997bf215546Sopenharmony_ci   read_reg_list(ctx, &fi->registers);
1998bf215546Sopenharmony_ci   fi->reg_alloc = blob_read_uint32(ctx->blob);
1999bf215546Sopenharmony_ci
2000bf215546Sopenharmony_ci   read_cf_list(ctx, &fi->body);
2001bf215546Sopenharmony_ci   read_fixup_phis(ctx);
2002bf215546Sopenharmony_ci
2003bf215546Sopenharmony_ci   fi->valid_metadata = 0;
2004bf215546Sopenharmony_ci
2005bf215546Sopenharmony_ci   return fi;
2006bf215546Sopenharmony_ci}
2007bf215546Sopenharmony_ci
2008bf215546Sopenharmony_cistatic void
2009bf215546Sopenharmony_ciwrite_function(write_ctx *ctx, const nir_function *fxn)
2010bf215546Sopenharmony_ci{
2011bf215546Sopenharmony_ci   uint32_t flags = 0;
2012bf215546Sopenharmony_ci   if (fxn->is_entrypoint)
2013bf215546Sopenharmony_ci      flags |= 0x1;
2014bf215546Sopenharmony_ci   if (fxn->is_preamble)
2015bf215546Sopenharmony_ci      flags |= 0x2;
2016bf215546Sopenharmony_ci   if (fxn->name)
2017bf215546Sopenharmony_ci      flags |= 0x4;
2018bf215546Sopenharmony_ci   if (fxn->impl)
2019bf215546Sopenharmony_ci      flags |= 0x8;
2020bf215546Sopenharmony_ci   blob_write_uint32(ctx->blob, flags);
2021bf215546Sopenharmony_ci   if (fxn->name)
2022bf215546Sopenharmony_ci      blob_write_string(ctx->blob, fxn->name);
2023bf215546Sopenharmony_ci
2024bf215546Sopenharmony_ci   write_add_object(ctx, fxn);
2025bf215546Sopenharmony_ci
2026bf215546Sopenharmony_ci   blob_write_uint32(ctx->blob, fxn->num_params);
2027bf215546Sopenharmony_ci   for (unsigned i = 0; i < fxn->num_params; i++) {
2028bf215546Sopenharmony_ci      uint32_t val =
2029bf215546Sopenharmony_ci         ((uint32_t)fxn->params[i].num_components) |
2030bf215546Sopenharmony_ci         ((uint32_t)fxn->params[i].bit_size) << 8;
2031bf215546Sopenharmony_ci      blob_write_uint32(ctx->blob, val);
2032bf215546Sopenharmony_ci   }
2033bf215546Sopenharmony_ci
2034bf215546Sopenharmony_ci   /* At first glance, it looks like we should write the function_impl here.
2035bf215546Sopenharmony_ci    * However, call instructions need to be able to reference at least the
2036bf215546Sopenharmony_ci    * function and those will get processed as we write the function_impls.
2037bf215546Sopenharmony_ci    * We stop here and write function_impls as a second pass.
2038bf215546Sopenharmony_ci    */
2039bf215546Sopenharmony_ci}
2040bf215546Sopenharmony_ci
2041bf215546Sopenharmony_cistatic void
2042bf215546Sopenharmony_ciread_function(read_ctx *ctx)
2043bf215546Sopenharmony_ci{
2044bf215546Sopenharmony_ci   uint32_t flags = blob_read_uint32(ctx->blob);
2045bf215546Sopenharmony_ci   bool has_name = flags & 0x4;
2046bf215546Sopenharmony_ci   char *name = has_name ? blob_read_string(ctx->blob) : NULL;
2047bf215546Sopenharmony_ci
2048bf215546Sopenharmony_ci   nir_function *fxn = nir_function_create(ctx->nir, name);
2049bf215546Sopenharmony_ci
2050bf215546Sopenharmony_ci   read_add_object(ctx, fxn);
2051bf215546Sopenharmony_ci
2052bf215546Sopenharmony_ci   fxn->num_params = blob_read_uint32(ctx->blob);
2053bf215546Sopenharmony_ci   fxn->params = ralloc_array(fxn, nir_parameter, fxn->num_params);
2054bf215546Sopenharmony_ci   for (unsigned i = 0; i < fxn->num_params; i++) {
2055bf215546Sopenharmony_ci      uint32_t val = blob_read_uint32(ctx->blob);
2056bf215546Sopenharmony_ci      fxn->params[i].num_components = val & 0xff;
2057bf215546Sopenharmony_ci      fxn->params[i].bit_size = (val >> 8) & 0xff;
2058bf215546Sopenharmony_ci   }
2059bf215546Sopenharmony_ci
2060bf215546Sopenharmony_ci   fxn->is_entrypoint = flags & 0x1;
2061bf215546Sopenharmony_ci   fxn->is_preamble = flags & 0x2;
2062bf215546Sopenharmony_ci   if (flags & 0x8)
2063bf215546Sopenharmony_ci      fxn->impl = NIR_SERIALIZE_FUNC_HAS_IMPL;
2064bf215546Sopenharmony_ci}
2065bf215546Sopenharmony_ci
2066bf215546Sopenharmony_cistatic void
2067bf215546Sopenharmony_ciwrite_xfb_info(write_ctx *ctx, const nir_xfb_info *xfb)
2068bf215546Sopenharmony_ci{
2069bf215546Sopenharmony_ci   if (xfb == NULL) {
2070bf215546Sopenharmony_ci      blob_write_uint32(ctx->blob, 0);
2071bf215546Sopenharmony_ci   } else {
2072bf215546Sopenharmony_ci      size_t size = nir_xfb_info_size(xfb->output_count);
2073bf215546Sopenharmony_ci      assert(size <= UINT32_MAX);
2074bf215546Sopenharmony_ci      blob_write_uint32(ctx->blob, size);
2075bf215546Sopenharmony_ci      blob_write_bytes(ctx->blob, xfb, size);
2076bf215546Sopenharmony_ci   }
2077bf215546Sopenharmony_ci}
2078bf215546Sopenharmony_ci
2079bf215546Sopenharmony_cistatic nir_xfb_info *
2080bf215546Sopenharmony_ciread_xfb_info(read_ctx *ctx)
2081bf215546Sopenharmony_ci{
2082bf215546Sopenharmony_ci   uint32_t size = blob_read_uint32(ctx->blob);
2083bf215546Sopenharmony_ci   if (size == 0)
2084bf215546Sopenharmony_ci      return NULL;
2085bf215546Sopenharmony_ci
2086bf215546Sopenharmony_ci   struct nir_xfb_info *xfb = ralloc_size(ctx->nir, size);
2087bf215546Sopenharmony_ci   blob_copy_bytes(ctx->blob, (void *)xfb, size);
2088bf215546Sopenharmony_ci
2089bf215546Sopenharmony_ci   return xfb;
2090bf215546Sopenharmony_ci}
2091bf215546Sopenharmony_ci
2092bf215546Sopenharmony_ci/**
2093bf215546Sopenharmony_ci * Serialize NIR into a binary blob.
2094bf215546Sopenharmony_ci *
2095bf215546Sopenharmony_ci * \param strip  Don't serialize information only useful for debugging,
2096bf215546Sopenharmony_ci *               such as variable names, making cache hits from similar
2097bf215546Sopenharmony_ci *               shaders more likely.
2098bf215546Sopenharmony_ci */
2099bf215546Sopenharmony_civoid
2100bf215546Sopenharmony_cinir_serialize(struct blob *blob, const nir_shader *nir, bool strip)
2101bf215546Sopenharmony_ci{
2102bf215546Sopenharmony_ci   write_ctx ctx = {0};
2103bf215546Sopenharmony_ci   ctx.remap_table = _mesa_pointer_hash_table_create(NULL);
2104bf215546Sopenharmony_ci   ctx.blob = blob;
2105bf215546Sopenharmony_ci   ctx.nir = nir;
2106bf215546Sopenharmony_ci   ctx.strip = strip;
2107bf215546Sopenharmony_ci   util_dynarray_init(&ctx.phi_fixups, NULL);
2108bf215546Sopenharmony_ci
2109bf215546Sopenharmony_ci   size_t idx_size_offset = blob_reserve_uint32(blob);
2110bf215546Sopenharmony_ci
2111bf215546Sopenharmony_ci   struct shader_info info = nir->info;
2112bf215546Sopenharmony_ci   uint32_t strings = 0;
2113bf215546Sopenharmony_ci   if (!strip && info.name)
2114bf215546Sopenharmony_ci      strings |= 0x1;
2115bf215546Sopenharmony_ci   if (!strip && info.label)
2116bf215546Sopenharmony_ci      strings |= 0x2;
2117bf215546Sopenharmony_ci   blob_write_uint32(blob, strings);
2118bf215546Sopenharmony_ci   if (!strip && info.name)
2119bf215546Sopenharmony_ci      blob_write_string(blob, info.name);
2120bf215546Sopenharmony_ci   if (!strip && info.label)
2121bf215546Sopenharmony_ci      blob_write_string(blob, info.label);
2122bf215546Sopenharmony_ci   info.name = info.label = NULL;
2123bf215546Sopenharmony_ci   blob_write_bytes(blob, (uint8_t *) &info, sizeof(info));
2124bf215546Sopenharmony_ci
2125bf215546Sopenharmony_ci   write_var_list(&ctx, &nir->variables);
2126bf215546Sopenharmony_ci
2127bf215546Sopenharmony_ci   blob_write_uint32(blob, nir->num_inputs);
2128bf215546Sopenharmony_ci   blob_write_uint32(blob, nir->num_uniforms);
2129bf215546Sopenharmony_ci   blob_write_uint32(blob, nir->num_outputs);
2130bf215546Sopenharmony_ci   blob_write_uint32(blob, nir->scratch_size);
2131bf215546Sopenharmony_ci
2132bf215546Sopenharmony_ci   blob_write_uint32(blob, exec_list_length(&nir->functions));
2133bf215546Sopenharmony_ci   nir_foreach_function(fxn, nir) {
2134bf215546Sopenharmony_ci      write_function(&ctx, fxn);
2135bf215546Sopenharmony_ci   }
2136bf215546Sopenharmony_ci
2137bf215546Sopenharmony_ci   nir_foreach_function(fxn, nir) {
2138bf215546Sopenharmony_ci      if (fxn->impl)
2139bf215546Sopenharmony_ci         write_function_impl(&ctx, fxn->impl);
2140bf215546Sopenharmony_ci   }
2141bf215546Sopenharmony_ci
2142bf215546Sopenharmony_ci   blob_write_uint32(blob, nir->constant_data_size);
2143bf215546Sopenharmony_ci   if (nir->constant_data_size > 0)
2144bf215546Sopenharmony_ci      blob_write_bytes(blob, nir->constant_data, nir->constant_data_size);
2145bf215546Sopenharmony_ci
2146bf215546Sopenharmony_ci   write_xfb_info(&ctx, nir->xfb_info);
2147bf215546Sopenharmony_ci
2148bf215546Sopenharmony_ci   blob_overwrite_uint32(blob, idx_size_offset, ctx.next_idx);
2149bf215546Sopenharmony_ci
2150bf215546Sopenharmony_ci   _mesa_hash_table_destroy(ctx.remap_table, NULL);
2151bf215546Sopenharmony_ci   util_dynarray_fini(&ctx.phi_fixups);
2152bf215546Sopenharmony_ci}
2153bf215546Sopenharmony_ci
2154bf215546Sopenharmony_cinir_shader *
2155bf215546Sopenharmony_cinir_deserialize(void *mem_ctx,
2156bf215546Sopenharmony_ci                const struct nir_shader_compiler_options *options,
2157bf215546Sopenharmony_ci                struct blob_reader *blob)
2158bf215546Sopenharmony_ci{
2159bf215546Sopenharmony_ci   read_ctx ctx = {0};
2160bf215546Sopenharmony_ci   ctx.blob = blob;
2161bf215546Sopenharmony_ci   list_inithead(&ctx.phi_srcs);
2162bf215546Sopenharmony_ci   ctx.idx_table_len = blob_read_uint32(blob);
2163bf215546Sopenharmony_ci   ctx.idx_table = calloc(ctx.idx_table_len, sizeof(uintptr_t));
2164bf215546Sopenharmony_ci
2165bf215546Sopenharmony_ci   uint32_t strings = blob_read_uint32(blob);
2166bf215546Sopenharmony_ci   char *name = (strings & 0x1) ? blob_read_string(blob) : NULL;
2167bf215546Sopenharmony_ci   char *label = (strings & 0x2) ? blob_read_string(blob) : NULL;
2168bf215546Sopenharmony_ci
2169bf215546Sopenharmony_ci   struct shader_info info;
2170bf215546Sopenharmony_ci   blob_copy_bytes(blob, (uint8_t *) &info, sizeof(info));
2171bf215546Sopenharmony_ci
2172bf215546Sopenharmony_ci   ctx.nir = nir_shader_create(mem_ctx, info.stage, options, NULL);
2173bf215546Sopenharmony_ci
2174bf215546Sopenharmony_ci   info.name = name ? ralloc_strdup(ctx.nir, name) : NULL;
2175bf215546Sopenharmony_ci   info.label = label ? ralloc_strdup(ctx.nir, label) : NULL;
2176bf215546Sopenharmony_ci
2177bf215546Sopenharmony_ci   ctx.nir->info = info;
2178bf215546Sopenharmony_ci
2179bf215546Sopenharmony_ci   read_var_list(&ctx, &ctx.nir->variables);
2180bf215546Sopenharmony_ci
2181bf215546Sopenharmony_ci   ctx.nir->num_inputs = blob_read_uint32(blob);
2182bf215546Sopenharmony_ci   ctx.nir->num_uniforms = blob_read_uint32(blob);
2183bf215546Sopenharmony_ci   ctx.nir->num_outputs = blob_read_uint32(blob);
2184bf215546Sopenharmony_ci   ctx.nir->scratch_size = blob_read_uint32(blob);
2185bf215546Sopenharmony_ci
2186bf215546Sopenharmony_ci   unsigned num_functions = blob_read_uint32(blob);
2187bf215546Sopenharmony_ci   for (unsigned i = 0; i < num_functions; i++)
2188bf215546Sopenharmony_ci      read_function(&ctx);
2189bf215546Sopenharmony_ci
2190bf215546Sopenharmony_ci   nir_foreach_function(fxn, ctx.nir) {
2191bf215546Sopenharmony_ci      if (fxn->impl == NIR_SERIALIZE_FUNC_HAS_IMPL)
2192bf215546Sopenharmony_ci         fxn->impl = read_function_impl(&ctx, fxn);
2193bf215546Sopenharmony_ci   }
2194bf215546Sopenharmony_ci
2195bf215546Sopenharmony_ci   ctx.nir->constant_data_size = blob_read_uint32(blob);
2196bf215546Sopenharmony_ci   if (ctx.nir->constant_data_size > 0) {
2197bf215546Sopenharmony_ci      ctx.nir->constant_data =
2198bf215546Sopenharmony_ci         ralloc_size(ctx.nir, ctx.nir->constant_data_size);
2199bf215546Sopenharmony_ci      blob_copy_bytes(blob, ctx.nir->constant_data,
2200bf215546Sopenharmony_ci                      ctx.nir->constant_data_size);
2201bf215546Sopenharmony_ci   }
2202bf215546Sopenharmony_ci
2203bf215546Sopenharmony_ci   ctx.nir->xfb_info = read_xfb_info(&ctx);
2204bf215546Sopenharmony_ci
2205bf215546Sopenharmony_ci   free(ctx.idx_table);
2206bf215546Sopenharmony_ci
2207bf215546Sopenharmony_ci   nir_validate_shader(ctx.nir, "after deserialize");
2208bf215546Sopenharmony_ci
2209bf215546Sopenharmony_ci   return ctx.nir;
2210bf215546Sopenharmony_ci}
2211bf215546Sopenharmony_ci
2212bf215546Sopenharmony_civoid
2213bf215546Sopenharmony_cinir_shader_serialize_deserialize(nir_shader *shader)
2214bf215546Sopenharmony_ci{
2215bf215546Sopenharmony_ci   const struct nir_shader_compiler_options *options = shader->options;
2216bf215546Sopenharmony_ci
2217bf215546Sopenharmony_ci   struct blob writer;
2218bf215546Sopenharmony_ci   blob_init(&writer);
2219bf215546Sopenharmony_ci   nir_serialize(&writer, shader, false);
2220bf215546Sopenharmony_ci
2221bf215546Sopenharmony_ci   /* Delete all of dest's ralloc children but leave dest alone */
2222bf215546Sopenharmony_ci   void *dead_ctx = ralloc_context(NULL);
2223bf215546Sopenharmony_ci   ralloc_adopt(dead_ctx, shader);
2224bf215546Sopenharmony_ci   ralloc_free(dead_ctx);
2225bf215546Sopenharmony_ci
2226bf215546Sopenharmony_ci   dead_ctx = ralloc_context(NULL);
2227bf215546Sopenharmony_ci
2228bf215546Sopenharmony_ci   struct blob_reader reader;
2229bf215546Sopenharmony_ci   blob_reader_init(&reader, writer.data, writer.size);
2230bf215546Sopenharmony_ci   nir_shader *copy = nir_deserialize(dead_ctx, options, &reader);
2231bf215546Sopenharmony_ci
2232bf215546Sopenharmony_ci   blob_finish(&writer);
2233bf215546Sopenharmony_ci
2234bf215546Sopenharmony_ci   nir_shader_replace(shader, copy);
2235bf215546Sopenharmony_ci   ralloc_free(dead_ctx);
2236bf215546Sopenharmony_ci}
2237