1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2017 Connor Abbott 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include "nir_serialize.h" 25bf215546Sopenharmony_ci#include "nir_control_flow.h" 26bf215546Sopenharmony_ci#include "nir_xfb_info.h" 27bf215546Sopenharmony_ci#include "util/u_dynarray.h" 28bf215546Sopenharmony_ci#include "util/u_math.h" 29bf215546Sopenharmony_ci 30bf215546Sopenharmony_ci#define NIR_SERIALIZE_FUNC_HAS_IMPL ((void *)(intptr_t)1) 31bf215546Sopenharmony_ci#define MAX_OBJECT_IDS (1 << 20) 32bf215546Sopenharmony_ci 33bf215546Sopenharmony_citypedef struct { 34bf215546Sopenharmony_ci size_t blob_offset; 35bf215546Sopenharmony_ci nir_ssa_def *src; 36bf215546Sopenharmony_ci nir_block *block; 37bf215546Sopenharmony_ci} write_phi_fixup; 38bf215546Sopenharmony_ci 39bf215546Sopenharmony_citypedef struct { 40bf215546Sopenharmony_ci const nir_shader *nir; 41bf215546Sopenharmony_ci 42bf215546Sopenharmony_ci struct blob *blob; 43bf215546Sopenharmony_ci 44bf215546Sopenharmony_ci /* maps pointer to index */ 45bf215546Sopenharmony_ci struct hash_table *remap_table; 46bf215546Sopenharmony_ci 47bf215546Sopenharmony_ci /* the next index to assign to a NIR in-memory object */ 48bf215546Sopenharmony_ci uint32_t next_idx; 49bf215546Sopenharmony_ci 50bf215546Sopenharmony_ci /* Array of write_phi_fixup structs representing phi sources that need to 51bf215546Sopenharmony_ci * be resolved in the second pass. 52bf215546Sopenharmony_ci */ 53bf215546Sopenharmony_ci struct util_dynarray phi_fixups; 54bf215546Sopenharmony_ci 55bf215546Sopenharmony_ci /* The last serialized type. */ 56bf215546Sopenharmony_ci const struct glsl_type *last_type; 57bf215546Sopenharmony_ci const struct glsl_type *last_interface_type; 58bf215546Sopenharmony_ci struct nir_variable_data last_var_data; 59bf215546Sopenharmony_ci 60bf215546Sopenharmony_ci /* For skipping equal ALU headers (typical after scalarization). */ 61bf215546Sopenharmony_ci nir_instr_type last_instr_type; 62bf215546Sopenharmony_ci uintptr_t last_alu_header_offset; 63bf215546Sopenharmony_ci uint32_t last_alu_header; 64bf215546Sopenharmony_ci 65bf215546Sopenharmony_ci /* Don't write optional data such as variable names. */ 66bf215546Sopenharmony_ci bool strip; 67bf215546Sopenharmony_ci} write_ctx; 68bf215546Sopenharmony_ci 69bf215546Sopenharmony_citypedef struct { 70bf215546Sopenharmony_ci nir_shader *nir; 71bf215546Sopenharmony_ci 72bf215546Sopenharmony_ci struct blob_reader *blob; 73bf215546Sopenharmony_ci 74bf215546Sopenharmony_ci /* the next index to assign to a NIR in-memory object */ 75bf215546Sopenharmony_ci uint32_t next_idx; 76bf215546Sopenharmony_ci 77bf215546Sopenharmony_ci /* The length of the index -> object table */ 78bf215546Sopenharmony_ci uint32_t idx_table_len; 79bf215546Sopenharmony_ci 80bf215546Sopenharmony_ci /* map from index to deserialized pointer */ 81bf215546Sopenharmony_ci void **idx_table; 82bf215546Sopenharmony_ci 83bf215546Sopenharmony_ci /* List of phi sources. */ 84bf215546Sopenharmony_ci struct list_head phi_srcs; 85bf215546Sopenharmony_ci 86bf215546Sopenharmony_ci /* The last deserialized type. */ 87bf215546Sopenharmony_ci const struct glsl_type *last_type; 88bf215546Sopenharmony_ci const struct glsl_type *last_interface_type; 89bf215546Sopenharmony_ci struct nir_variable_data last_var_data; 90bf215546Sopenharmony_ci} read_ctx; 91bf215546Sopenharmony_ci 92bf215546Sopenharmony_cistatic void 93bf215546Sopenharmony_ciwrite_add_object(write_ctx *ctx, const void *obj) 94bf215546Sopenharmony_ci{ 95bf215546Sopenharmony_ci uint32_t index = ctx->next_idx++; 96bf215546Sopenharmony_ci assert(index != MAX_OBJECT_IDS); 97bf215546Sopenharmony_ci _mesa_hash_table_insert(ctx->remap_table, obj, (void *)(uintptr_t) index); 98bf215546Sopenharmony_ci} 99bf215546Sopenharmony_ci 100bf215546Sopenharmony_cistatic uint32_t 101bf215546Sopenharmony_ciwrite_lookup_object(write_ctx *ctx, const void *obj) 102bf215546Sopenharmony_ci{ 103bf215546Sopenharmony_ci struct hash_entry *entry = _mesa_hash_table_search(ctx->remap_table, obj); 104bf215546Sopenharmony_ci assert(entry); 105bf215546Sopenharmony_ci return (uint32_t)(uintptr_t) entry->data; 106bf215546Sopenharmony_ci} 107bf215546Sopenharmony_ci 108bf215546Sopenharmony_cistatic void 109bf215546Sopenharmony_ciread_add_object(read_ctx *ctx, void *obj) 110bf215546Sopenharmony_ci{ 111bf215546Sopenharmony_ci assert(ctx->next_idx < ctx->idx_table_len); 112bf215546Sopenharmony_ci ctx->idx_table[ctx->next_idx++] = obj; 113bf215546Sopenharmony_ci} 114bf215546Sopenharmony_ci 115bf215546Sopenharmony_cistatic void * 116bf215546Sopenharmony_ciread_lookup_object(read_ctx *ctx, uint32_t idx) 117bf215546Sopenharmony_ci{ 118bf215546Sopenharmony_ci assert(idx < ctx->idx_table_len); 119bf215546Sopenharmony_ci return ctx->idx_table[idx]; 120bf215546Sopenharmony_ci} 121bf215546Sopenharmony_ci 122bf215546Sopenharmony_cistatic void * 123bf215546Sopenharmony_ciread_object(read_ctx *ctx) 124bf215546Sopenharmony_ci{ 125bf215546Sopenharmony_ci return read_lookup_object(ctx, blob_read_uint32(ctx->blob)); 126bf215546Sopenharmony_ci} 127bf215546Sopenharmony_ci 128bf215546Sopenharmony_cistatic uint32_t 129bf215546Sopenharmony_ciencode_bit_size_3bits(uint8_t bit_size) 130bf215546Sopenharmony_ci{ 131bf215546Sopenharmony_ci /* Encode values of 0, 1, 2, 4, 8, 16, 32, 64 in 3 bits. */ 132bf215546Sopenharmony_ci assert(bit_size <= 64 && util_is_power_of_two_or_zero(bit_size)); 133bf215546Sopenharmony_ci if (bit_size) 134bf215546Sopenharmony_ci return util_logbase2(bit_size) + 1; 135bf215546Sopenharmony_ci return 0; 136bf215546Sopenharmony_ci} 137bf215546Sopenharmony_ci 138bf215546Sopenharmony_cistatic uint8_t 139bf215546Sopenharmony_cidecode_bit_size_3bits(uint8_t bit_size) 140bf215546Sopenharmony_ci{ 141bf215546Sopenharmony_ci if (bit_size) 142bf215546Sopenharmony_ci return 1 << (bit_size - 1); 143bf215546Sopenharmony_ci return 0; 144bf215546Sopenharmony_ci} 145bf215546Sopenharmony_ci 146bf215546Sopenharmony_ci#define NUM_COMPONENTS_IS_SEPARATE_7 7 147bf215546Sopenharmony_ci 148bf215546Sopenharmony_cistatic uint8_t 149bf215546Sopenharmony_ciencode_num_components_in_3bits(uint8_t num_components) 150bf215546Sopenharmony_ci{ 151bf215546Sopenharmony_ci if (num_components <= 4) 152bf215546Sopenharmony_ci return num_components; 153bf215546Sopenharmony_ci if (num_components == 8) 154bf215546Sopenharmony_ci return 5; 155bf215546Sopenharmony_ci if (num_components == 16) 156bf215546Sopenharmony_ci return 6; 157bf215546Sopenharmony_ci 158bf215546Sopenharmony_ci /* special value indicating that num_components is in the next uint32 */ 159bf215546Sopenharmony_ci return NUM_COMPONENTS_IS_SEPARATE_7; 160bf215546Sopenharmony_ci} 161bf215546Sopenharmony_ci 162bf215546Sopenharmony_cistatic uint8_t 163bf215546Sopenharmony_cidecode_num_components_in_3bits(uint8_t value) 164bf215546Sopenharmony_ci{ 165bf215546Sopenharmony_ci if (value <= 4) 166bf215546Sopenharmony_ci return value; 167bf215546Sopenharmony_ci if (value == 5) 168bf215546Sopenharmony_ci return 8; 169bf215546Sopenharmony_ci if (value == 6) 170bf215546Sopenharmony_ci return 16; 171bf215546Sopenharmony_ci 172bf215546Sopenharmony_ci unreachable("invalid num_components encoding"); 173bf215546Sopenharmony_ci return 0; 174bf215546Sopenharmony_ci} 175bf215546Sopenharmony_ci 176bf215546Sopenharmony_cistatic void 177bf215546Sopenharmony_ciwrite_constant(write_ctx *ctx, const nir_constant *c) 178bf215546Sopenharmony_ci{ 179bf215546Sopenharmony_ci blob_write_bytes(ctx->blob, c->values, sizeof(c->values)); 180bf215546Sopenharmony_ci blob_write_uint32(ctx->blob, c->num_elements); 181bf215546Sopenharmony_ci for (unsigned i = 0; i < c->num_elements; i++) 182bf215546Sopenharmony_ci write_constant(ctx, c->elements[i]); 183bf215546Sopenharmony_ci} 184bf215546Sopenharmony_ci 185bf215546Sopenharmony_cistatic nir_constant * 186bf215546Sopenharmony_ciread_constant(read_ctx *ctx, nir_variable *nvar) 187bf215546Sopenharmony_ci{ 188bf215546Sopenharmony_ci nir_constant *c = ralloc(nvar, nir_constant); 189bf215546Sopenharmony_ci 190bf215546Sopenharmony_ci blob_copy_bytes(ctx->blob, (uint8_t *)c->values, sizeof(c->values)); 191bf215546Sopenharmony_ci c->num_elements = blob_read_uint32(ctx->blob); 192bf215546Sopenharmony_ci c->elements = ralloc_array(nvar, nir_constant *, c->num_elements); 193bf215546Sopenharmony_ci for (unsigned i = 0; i < c->num_elements; i++) 194bf215546Sopenharmony_ci c->elements[i] = read_constant(ctx, nvar); 195bf215546Sopenharmony_ci 196bf215546Sopenharmony_ci return c; 197bf215546Sopenharmony_ci} 198bf215546Sopenharmony_ci 199bf215546Sopenharmony_cienum var_data_encoding { 200bf215546Sopenharmony_ci var_encode_full, 201bf215546Sopenharmony_ci var_encode_shader_temp, 202bf215546Sopenharmony_ci var_encode_function_temp, 203bf215546Sopenharmony_ci var_encode_location_diff, 204bf215546Sopenharmony_ci}; 205bf215546Sopenharmony_ci 206bf215546Sopenharmony_ciunion packed_var { 207bf215546Sopenharmony_ci uint32_t u32; 208bf215546Sopenharmony_ci struct { 209bf215546Sopenharmony_ci unsigned has_name:1; 210bf215546Sopenharmony_ci unsigned has_constant_initializer:1; 211bf215546Sopenharmony_ci unsigned has_pointer_initializer:1; 212bf215546Sopenharmony_ci unsigned has_interface_type:1; 213bf215546Sopenharmony_ci unsigned num_state_slots:7; 214bf215546Sopenharmony_ci unsigned data_encoding:2; 215bf215546Sopenharmony_ci unsigned type_same_as_last:1; 216bf215546Sopenharmony_ci unsigned interface_type_same_as_last:1; 217bf215546Sopenharmony_ci unsigned ray_query:1; 218bf215546Sopenharmony_ci unsigned num_members:16; 219bf215546Sopenharmony_ci } u; 220bf215546Sopenharmony_ci}; 221bf215546Sopenharmony_ci 222bf215546Sopenharmony_ciunion packed_var_data_diff { 223bf215546Sopenharmony_ci uint32_t u32; 224bf215546Sopenharmony_ci struct { 225bf215546Sopenharmony_ci int location:13; 226bf215546Sopenharmony_ci int location_frac:3; 227bf215546Sopenharmony_ci int driver_location:16; 228bf215546Sopenharmony_ci } u; 229bf215546Sopenharmony_ci}; 230bf215546Sopenharmony_ci 231bf215546Sopenharmony_cistatic void 232bf215546Sopenharmony_ciwrite_variable(write_ctx *ctx, const nir_variable *var) 233bf215546Sopenharmony_ci{ 234bf215546Sopenharmony_ci write_add_object(ctx, var); 235bf215546Sopenharmony_ci 236bf215546Sopenharmony_ci assert(var->num_state_slots < (1 << 7)); 237bf215546Sopenharmony_ci 238bf215546Sopenharmony_ci STATIC_ASSERT(sizeof(union packed_var) == 4); 239bf215546Sopenharmony_ci union packed_var flags; 240bf215546Sopenharmony_ci flags.u32 = 0; 241bf215546Sopenharmony_ci 242bf215546Sopenharmony_ci flags.u.has_name = !ctx->strip && var->name; 243bf215546Sopenharmony_ci flags.u.has_constant_initializer = !!(var->constant_initializer); 244bf215546Sopenharmony_ci flags.u.has_pointer_initializer = !!(var->pointer_initializer); 245bf215546Sopenharmony_ci flags.u.has_interface_type = !!(var->interface_type); 246bf215546Sopenharmony_ci flags.u.type_same_as_last = var->type == ctx->last_type; 247bf215546Sopenharmony_ci flags.u.interface_type_same_as_last = 248bf215546Sopenharmony_ci var->interface_type && var->interface_type == ctx->last_interface_type; 249bf215546Sopenharmony_ci flags.u.num_state_slots = var->num_state_slots; 250bf215546Sopenharmony_ci flags.u.num_members = var->num_members; 251bf215546Sopenharmony_ci 252bf215546Sopenharmony_ci struct nir_variable_data data = var->data; 253bf215546Sopenharmony_ci 254bf215546Sopenharmony_ci /* When stripping, we expect that the location is no longer needed, 255bf215546Sopenharmony_ci * which is typically after shaders are linked. 256bf215546Sopenharmony_ci */ 257bf215546Sopenharmony_ci if (ctx->strip && 258bf215546Sopenharmony_ci data.mode != nir_var_system_value && 259bf215546Sopenharmony_ci data.mode != nir_var_shader_in && 260bf215546Sopenharmony_ci data.mode != nir_var_shader_out) 261bf215546Sopenharmony_ci data.location = 0; 262bf215546Sopenharmony_ci 263bf215546Sopenharmony_ci /* Temporary variables don't serialize var->data. */ 264bf215546Sopenharmony_ci if (data.mode == nir_var_shader_temp) 265bf215546Sopenharmony_ci flags.u.data_encoding = var_encode_shader_temp; 266bf215546Sopenharmony_ci else if (data.mode == nir_var_function_temp) 267bf215546Sopenharmony_ci flags.u.data_encoding = var_encode_function_temp; 268bf215546Sopenharmony_ci else { 269bf215546Sopenharmony_ci struct nir_variable_data tmp = data; 270bf215546Sopenharmony_ci 271bf215546Sopenharmony_ci tmp.location = ctx->last_var_data.location; 272bf215546Sopenharmony_ci tmp.location_frac = ctx->last_var_data.location_frac; 273bf215546Sopenharmony_ci tmp.driver_location = ctx->last_var_data.driver_location; 274bf215546Sopenharmony_ci 275bf215546Sopenharmony_ci /* See if we can encode only the difference in locations from the last 276bf215546Sopenharmony_ci * variable. 277bf215546Sopenharmony_ci */ 278bf215546Sopenharmony_ci if (memcmp(&ctx->last_var_data, &tmp, sizeof(tmp)) == 0 && 279bf215546Sopenharmony_ci abs((int)data.location - 280bf215546Sopenharmony_ci (int)ctx->last_var_data.location) < (1 << 12) && 281bf215546Sopenharmony_ci abs((int)data.driver_location - 282bf215546Sopenharmony_ci (int)ctx->last_var_data.driver_location) < (1 << 15)) 283bf215546Sopenharmony_ci flags.u.data_encoding = var_encode_location_diff; 284bf215546Sopenharmony_ci else 285bf215546Sopenharmony_ci flags.u.data_encoding = var_encode_full; 286bf215546Sopenharmony_ci } 287bf215546Sopenharmony_ci 288bf215546Sopenharmony_ci flags.u.ray_query = var->data.ray_query; 289bf215546Sopenharmony_ci 290bf215546Sopenharmony_ci blob_write_uint32(ctx->blob, flags.u32); 291bf215546Sopenharmony_ci 292bf215546Sopenharmony_ci if (!flags.u.type_same_as_last) { 293bf215546Sopenharmony_ci encode_type_to_blob(ctx->blob, var->type); 294bf215546Sopenharmony_ci ctx->last_type = var->type; 295bf215546Sopenharmony_ci } 296bf215546Sopenharmony_ci 297bf215546Sopenharmony_ci if (var->interface_type && !flags.u.interface_type_same_as_last) { 298bf215546Sopenharmony_ci encode_type_to_blob(ctx->blob, var->interface_type); 299bf215546Sopenharmony_ci ctx->last_interface_type = var->interface_type; 300bf215546Sopenharmony_ci } 301bf215546Sopenharmony_ci 302bf215546Sopenharmony_ci if (flags.u.has_name) 303bf215546Sopenharmony_ci blob_write_string(ctx->blob, var->name); 304bf215546Sopenharmony_ci 305bf215546Sopenharmony_ci if (flags.u.data_encoding == var_encode_full || 306bf215546Sopenharmony_ci flags.u.data_encoding == var_encode_location_diff) { 307bf215546Sopenharmony_ci if (flags.u.data_encoding == var_encode_full) { 308bf215546Sopenharmony_ci blob_write_bytes(ctx->blob, &data, sizeof(data)); 309bf215546Sopenharmony_ci } else { 310bf215546Sopenharmony_ci /* Serialize only the difference in locations from the last variable. 311bf215546Sopenharmony_ci */ 312bf215546Sopenharmony_ci union packed_var_data_diff diff; 313bf215546Sopenharmony_ci 314bf215546Sopenharmony_ci diff.u.location = data.location - ctx->last_var_data.location; 315bf215546Sopenharmony_ci diff.u.location_frac = data.location_frac - 316bf215546Sopenharmony_ci ctx->last_var_data.location_frac; 317bf215546Sopenharmony_ci diff.u.driver_location = data.driver_location - 318bf215546Sopenharmony_ci ctx->last_var_data.driver_location; 319bf215546Sopenharmony_ci 320bf215546Sopenharmony_ci blob_write_uint32(ctx->blob, diff.u32); 321bf215546Sopenharmony_ci } 322bf215546Sopenharmony_ci 323bf215546Sopenharmony_ci ctx->last_var_data = data; 324bf215546Sopenharmony_ci } 325bf215546Sopenharmony_ci 326bf215546Sopenharmony_ci for (unsigned i = 0; i < var->num_state_slots; i++) { 327bf215546Sopenharmony_ci blob_write_bytes(ctx->blob, &var->state_slots[i], 328bf215546Sopenharmony_ci sizeof(var->state_slots[i])); 329bf215546Sopenharmony_ci } 330bf215546Sopenharmony_ci if (var->constant_initializer) 331bf215546Sopenharmony_ci write_constant(ctx, var->constant_initializer); 332bf215546Sopenharmony_ci if (var->pointer_initializer) 333bf215546Sopenharmony_ci write_lookup_object(ctx, var->pointer_initializer); 334bf215546Sopenharmony_ci if (var->num_members > 0) { 335bf215546Sopenharmony_ci blob_write_bytes(ctx->blob, (uint8_t *) var->members, 336bf215546Sopenharmony_ci var->num_members * sizeof(*var->members)); 337bf215546Sopenharmony_ci } 338bf215546Sopenharmony_ci} 339bf215546Sopenharmony_ci 340bf215546Sopenharmony_cistatic nir_variable * 341bf215546Sopenharmony_ciread_variable(read_ctx *ctx) 342bf215546Sopenharmony_ci{ 343bf215546Sopenharmony_ci nir_variable *var = rzalloc(ctx->nir, nir_variable); 344bf215546Sopenharmony_ci read_add_object(ctx, var); 345bf215546Sopenharmony_ci 346bf215546Sopenharmony_ci union packed_var flags; 347bf215546Sopenharmony_ci flags.u32 = blob_read_uint32(ctx->blob); 348bf215546Sopenharmony_ci 349bf215546Sopenharmony_ci if (flags.u.type_same_as_last) { 350bf215546Sopenharmony_ci var->type = ctx->last_type; 351bf215546Sopenharmony_ci } else { 352bf215546Sopenharmony_ci var->type = decode_type_from_blob(ctx->blob); 353bf215546Sopenharmony_ci ctx->last_type = var->type; 354bf215546Sopenharmony_ci } 355bf215546Sopenharmony_ci 356bf215546Sopenharmony_ci if (flags.u.has_interface_type) { 357bf215546Sopenharmony_ci if (flags.u.interface_type_same_as_last) { 358bf215546Sopenharmony_ci var->interface_type = ctx->last_interface_type; 359bf215546Sopenharmony_ci } else { 360bf215546Sopenharmony_ci var->interface_type = decode_type_from_blob(ctx->blob); 361bf215546Sopenharmony_ci ctx->last_interface_type = var->interface_type; 362bf215546Sopenharmony_ci } 363bf215546Sopenharmony_ci } 364bf215546Sopenharmony_ci 365bf215546Sopenharmony_ci if (flags.u.has_name) { 366bf215546Sopenharmony_ci const char *name = blob_read_string(ctx->blob); 367bf215546Sopenharmony_ci var->name = ralloc_strdup(var, name); 368bf215546Sopenharmony_ci } else { 369bf215546Sopenharmony_ci var->name = NULL; 370bf215546Sopenharmony_ci } 371bf215546Sopenharmony_ci 372bf215546Sopenharmony_ci if (flags.u.data_encoding == var_encode_shader_temp) 373bf215546Sopenharmony_ci var->data.mode = nir_var_shader_temp; 374bf215546Sopenharmony_ci else if (flags.u.data_encoding == var_encode_function_temp) 375bf215546Sopenharmony_ci var->data.mode = nir_var_function_temp; 376bf215546Sopenharmony_ci else if (flags.u.data_encoding == var_encode_full) { 377bf215546Sopenharmony_ci blob_copy_bytes(ctx->blob, (uint8_t *) &var->data, sizeof(var->data)); 378bf215546Sopenharmony_ci ctx->last_var_data = var->data; 379bf215546Sopenharmony_ci } else { /* var_encode_location_diff */ 380bf215546Sopenharmony_ci union packed_var_data_diff diff; 381bf215546Sopenharmony_ci diff.u32 = blob_read_uint32(ctx->blob); 382bf215546Sopenharmony_ci 383bf215546Sopenharmony_ci var->data = ctx->last_var_data; 384bf215546Sopenharmony_ci var->data.location += diff.u.location; 385bf215546Sopenharmony_ci var->data.location_frac += diff.u.location_frac; 386bf215546Sopenharmony_ci var->data.driver_location += diff.u.driver_location; 387bf215546Sopenharmony_ci 388bf215546Sopenharmony_ci ctx->last_var_data = var->data; 389bf215546Sopenharmony_ci } 390bf215546Sopenharmony_ci 391bf215546Sopenharmony_ci var->data.ray_query = flags.u.ray_query; 392bf215546Sopenharmony_ci 393bf215546Sopenharmony_ci var->num_state_slots = flags.u.num_state_slots; 394bf215546Sopenharmony_ci if (var->num_state_slots != 0) { 395bf215546Sopenharmony_ci var->state_slots = ralloc_array(var, nir_state_slot, 396bf215546Sopenharmony_ci var->num_state_slots); 397bf215546Sopenharmony_ci for (unsigned i = 0; i < var->num_state_slots; i++) { 398bf215546Sopenharmony_ci blob_copy_bytes(ctx->blob, &var->state_slots[i], 399bf215546Sopenharmony_ci sizeof(var->state_slots[i])); 400bf215546Sopenharmony_ci } 401bf215546Sopenharmony_ci } 402bf215546Sopenharmony_ci if (flags.u.has_constant_initializer) 403bf215546Sopenharmony_ci var->constant_initializer = read_constant(ctx, var); 404bf215546Sopenharmony_ci else 405bf215546Sopenharmony_ci var->constant_initializer = NULL; 406bf215546Sopenharmony_ci 407bf215546Sopenharmony_ci if (flags.u.has_pointer_initializer) 408bf215546Sopenharmony_ci var->pointer_initializer = read_object(ctx); 409bf215546Sopenharmony_ci else 410bf215546Sopenharmony_ci var->pointer_initializer = NULL; 411bf215546Sopenharmony_ci 412bf215546Sopenharmony_ci var->num_members = flags.u.num_members; 413bf215546Sopenharmony_ci if (var->num_members > 0) { 414bf215546Sopenharmony_ci var->members = ralloc_array(var, struct nir_variable_data, 415bf215546Sopenharmony_ci var->num_members); 416bf215546Sopenharmony_ci blob_copy_bytes(ctx->blob, (uint8_t *) var->members, 417bf215546Sopenharmony_ci var->num_members * sizeof(*var->members)); 418bf215546Sopenharmony_ci } 419bf215546Sopenharmony_ci 420bf215546Sopenharmony_ci return var; 421bf215546Sopenharmony_ci} 422bf215546Sopenharmony_ci 423bf215546Sopenharmony_cistatic void 424bf215546Sopenharmony_ciwrite_var_list(write_ctx *ctx, const struct exec_list *src) 425bf215546Sopenharmony_ci{ 426bf215546Sopenharmony_ci blob_write_uint32(ctx->blob, exec_list_length(src)); 427bf215546Sopenharmony_ci foreach_list_typed(nir_variable, var, node, src) { 428bf215546Sopenharmony_ci write_variable(ctx, var); 429bf215546Sopenharmony_ci } 430bf215546Sopenharmony_ci} 431bf215546Sopenharmony_ci 432bf215546Sopenharmony_cistatic void 433bf215546Sopenharmony_ciread_var_list(read_ctx *ctx, struct exec_list *dst) 434bf215546Sopenharmony_ci{ 435bf215546Sopenharmony_ci exec_list_make_empty(dst); 436bf215546Sopenharmony_ci unsigned num_vars = blob_read_uint32(ctx->blob); 437bf215546Sopenharmony_ci for (unsigned i = 0; i < num_vars; i++) { 438bf215546Sopenharmony_ci nir_variable *var = read_variable(ctx); 439bf215546Sopenharmony_ci exec_list_push_tail(dst, &var->node); 440bf215546Sopenharmony_ci } 441bf215546Sopenharmony_ci} 442bf215546Sopenharmony_ci 443bf215546Sopenharmony_cistatic void 444bf215546Sopenharmony_ciwrite_register(write_ctx *ctx, const nir_register *reg) 445bf215546Sopenharmony_ci{ 446bf215546Sopenharmony_ci write_add_object(ctx, reg); 447bf215546Sopenharmony_ci blob_write_uint32(ctx->blob, reg->num_components); 448bf215546Sopenharmony_ci blob_write_uint32(ctx->blob, reg->bit_size); 449bf215546Sopenharmony_ci blob_write_uint32(ctx->blob, reg->num_array_elems); 450bf215546Sopenharmony_ci blob_write_uint32(ctx->blob, reg->index); 451bf215546Sopenharmony_ci blob_write_uint8(ctx->blob, reg->divergent); 452bf215546Sopenharmony_ci} 453bf215546Sopenharmony_ci 454bf215546Sopenharmony_cistatic nir_register * 455bf215546Sopenharmony_ciread_register(read_ctx *ctx) 456bf215546Sopenharmony_ci{ 457bf215546Sopenharmony_ci nir_register *reg = ralloc(ctx->nir, nir_register); 458bf215546Sopenharmony_ci read_add_object(ctx, reg); 459bf215546Sopenharmony_ci reg->num_components = blob_read_uint32(ctx->blob); 460bf215546Sopenharmony_ci reg->bit_size = blob_read_uint32(ctx->blob); 461bf215546Sopenharmony_ci reg->num_array_elems = blob_read_uint32(ctx->blob); 462bf215546Sopenharmony_ci reg->index = blob_read_uint32(ctx->blob); 463bf215546Sopenharmony_ci reg->divergent = blob_read_uint8(ctx->blob); 464bf215546Sopenharmony_ci 465bf215546Sopenharmony_ci list_inithead(®->uses); 466bf215546Sopenharmony_ci list_inithead(®->defs); 467bf215546Sopenharmony_ci list_inithead(®->if_uses); 468bf215546Sopenharmony_ci 469bf215546Sopenharmony_ci return reg; 470bf215546Sopenharmony_ci} 471bf215546Sopenharmony_ci 472bf215546Sopenharmony_cistatic void 473bf215546Sopenharmony_ciwrite_reg_list(write_ctx *ctx, const struct exec_list *src) 474bf215546Sopenharmony_ci{ 475bf215546Sopenharmony_ci blob_write_uint32(ctx->blob, exec_list_length(src)); 476bf215546Sopenharmony_ci foreach_list_typed(nir_register, reg, node, src) 477bf215546Sopenharmony_ci write_register(ctx, reg); 478bf215546Sopenharmony_ci} 479bf215546Sopenharmony_ci 480bf215546Sopenharmony_cistatic void 481bf215546Sopenharmony_ciread_reg_list(read_ctx *ctx, struct exec_list *dst) 482bf215546Sopenharmony_ci{ 483bf215546Sopenharmony_ci exec_list_make_empty(dst); 484bf215546Sopenharmony_ci unsigned num_regs = blob_read_uint32(ctx->blob); 485bf215546Sopenharmony_ci for (unsigned i = 0; i < num_regs; i++) { 486bf215546Sopenharmony_ci nir_register *reg = read_register(ctx); 487bf215546Sopenharmony_ci exec_list_push_tail(dst, ®->node); 488bf215546Sopenharmony_ci } 489bf215546Sopenharmony_ci} 490bf215546Sopenharmony_ci 491bf215546Sopenharmony_ciunion packed_src { 492bf215546Sopenharmony_ci uint32_t u32; 493bf215546Sopenharmony_ci struct { 494bf215546Sopenharmony_ci unsigned is_ssa:1; /* <-- Header */ 495bf215546Sopenharmony_ci unsigned is_indirect:1; 496bf215546Sopenharmony_ci unsigned object_idx:20; 497bf215546Sopenharmony_ci unsigned _footer:10; /* <-- Footer */ 498bf215546Sopenharmony_ci } any; 499bf215546Sopenharmony_ci struct { 500bf215546Sopenharmony_ci unsigned _header:22; /* <-- Header */ 501bf215546Sopenharmony_ci unsigned negate:1; /* <-- Footer */ 502bf215546Sopenharmony_ci unsigned abs:1; 503bf215546Sopenharmony_ci unsigned swizzle_x:2; 504bf215546Sopenharmony_ci unsigned swizzle_y:2; 505bf215546Sopenharmony_ci unsigned swizzle_z:2; 506bf215546Sopenharmony_ci unsigned swizzle_w:2; 507bf215546Sopenharmony_ci } alu; 508bf215546Sopenharmony_ci struct { 509bf215546Sopenharmony_ci unsigned _header:22; /* <-- Header */ 510bf215546Sopenharmony_ci unsigned src_type:5; /* <-- Footer */ 511bf215546Sopenharmony_ci unsigned _pad:5; 512bf215546Sopenharmony_ci } tex; 513bf215546Sopenharmony_ci}; 514bf215546Sopenharmony_ci 515bf215546Sopenharmony_cistatic void 516bf215546Sopenharmony_ciwrite_src_full(write_ctx *ctx, const nir_src *src, union packed_src header) 517bf215546Sopenharmony_ci{ 518bf215546Sopenharmony_ci /* Since sources are very frequent, we try to save some space when storing 519bf215546Sopenharmony_ci * them. In particular, we store whether the source is a register and 520bf215546Sopenharmony_ci * whether the register has an indirect index in the low two bits. We can 521bf215546Sopenharmony_ci * assume that the high two bits of the index are zero, since otherwise our 522bf215546Sopenharmony_ci * address space would've been exhausted allocating the remap table! 523bf215546Sopenharmony_ci */ 524bf215546Sopenharmony_ci header.any.is_ssa = src->is_ssa; 525bf215546Sopenharmony_ci if (src->is_ssa) { 526bf215546Sopenharmony_ci header.any.object_idx = write_lookup_object(ctx, src->ssa); 527bf215546Sopenharmony_ci blob_write_uint32(ctx->blob, header.u32); 528bf215546Sopenharmony_ci } else { 529bf215546Sopenharmony_ci header.any.object_idx = write_lookup_object(ctx, src->reg.reg); 530bf215546Sopenharmony_ci header.any.is_indirect = !!src->reg.indirect; 531bf215546Sopenharmony_ci blob_write_uint32(ctx->blob, header.u32); 532bf215546Sopenharmony_ci blob_write_uint32(ctx->blob, src->reg.base_offset); 533bf215546Sopenharmony_ci if (src->reg.indirect) { 534bf215546Sopenharmony_ci union packed_src header = {0}; 535bf215546Sopenharmony_ci write_src_full(ctx, src->reg.indirect, header); 536bf215546Sopenharmony_ci } 537bf215546Sopenharmony_ci } 538bf215546Sopenharmony_ci} 539bf215546Sopenharmony_ci 540bf215546Sopenharmony_cistatic void 541bf215546Sopenharmony_ciwrite_src(write_ctx *ctx, const nir_src *src) 542bf215546Sopenharmony_ci{ 543bf215546Sopenharmony_ci union packed_src header = {0}; 544bf215546Sopenharmony_ci write_src_full(ctx, src, header); 545bf215546Sopenharmony_ci} 546bf215546Sopenharmony_ci 547bf215546Sopenharmony_cistatic union packed_src 548bf215546Sopenharmony_ciread_src(read_ctx *ctx, nir_src *src, void *mem_ctx) 549bf215546Sopenharmony_ci{ 550bf215546Sopenharmony_ci STATIC_ASSERT(sizeof(union packed_src) == 4); 551bf215546Sopenharmony_ci union packed_src header; 552bf215546Sopenharmony_ci header.u32 = blob_read_uint32(ctx->blob); 553bf215546Sopenharmony_ci 554bf215546Sopenharmony_ci src->is_ssa = header.any.is_ssa; 555bf215546Sopenharmony_ci if (src->is_ssa) { 556bf215546Sopenharmony_ci src->ssa = read_lookup_object(ctx, header.any.object_idx); 557bf215546Sopenharmony_ci } else { 558bf215546Sopenharmony_ci src->reg.reg = read_lookup_object(ctx, header.any.object_idx); 559bf215546Sopenharmony_ci src->reg.base_offset = blob_read_uint32(ctx->blob); 560bf215546Sopenharmony_ci if (header.any.is_indirect) { 561bf215546Sopenharmony_ci src->reg.indirect = malloc(sizeof(nir_src)); 562bf215546Sopenharmony_ci read_src(ctx, src->reg.indirect, mem_ctx); 563bf215546Sopenharmony_ci } else { 564bf215546Sopenharmony_ci src->reg.indirect = NULL; 565bf215546Sopenharmony_ci } 566bf215546Sopenharmony_ci } 567bf215546Sopenharmony_ci return header; 568bf215546Sopenharmony_ci} 569bf215546Sopenharmony_ci 570bf215546Sopenharmony_ciunion packed_dest { 571bf215546Sopenharmony_ci uint8_t u8; 572bf215546Sopenharmony_ci struct { 573bf215546Sopenharmony_ci uint8_t is_ssa:1; 574bf215546Sopenharmony_ci uint8_t num_components:3; 575bf215546Sopenharmony_ci uint8_t bit_size:3; 576bf215546Sopenharmony_ci uint8_t divergent:1; 577bf215546Sopenharmony_ci } ssa; 578bf215546Sopenharmony_ci struct { 579bf215546Sopenharmony_ci uint8_t is_ssa:1; 580bf215546Sopenharmony_ci uint8_t is_indirect:1; 581bf215546Sopenharmony_ci uint8_t _pad:6; 582bf215546Sopenharmony_ci } reg; 583bf215546Sopenharmony_ci}; 584bf215546Sopenharmony_ci 585bf215546Sopenharmony_cienum intrinsic_const_indices_encoding { 586bf215546Sopenharmony_ci /* Use packed_const_indices to store tightly packed indices. 587bf215546Sopenharmony_ci * 588bf215546Sopenharmony_ci * The common case for load_ubo is 0, 0, 0, which is trivially represented. 589bf215546Sopenharmony_ci * The common cases for load_interpolated_input also fit here, e.g.: 7, 3 590bf215546Sopenharmony_ci */ 591bf215546Sopenharmony_ci const_indices_all_combined, 592bf215546Sopenharmony_ci 593bf215546Sopenharmony_ci const_indices_8bit, /* 8 bits per element */ 594bf215546Sopenharmony_ci const_indices_16bit, /* 16 bits per element */ 595bf215546Sopenharmony_ci const_indices_32bit, /* 32 bits per element */ 596bf215546Sopenharmony_ci}; 597bf215546Sopenharmony_ci 598bf215546Sopenharmony_cienum load_const_packing { 599bf215546Sopenharmony_ci /* Constants are not packed and are stored in following dwords. */ 600bf215546Sopenharmony_ci load_const_full, 601bf215546Sopenharmony_ci 602bf215546Sopenharmony_ci /* packed_value contains high 19 bits, low bits are 0, 603bf215546Sopenharmony_ci * good for floating-point decimals 604bf215546Sopenharmony_ci */ 605bf215546Sopenharmony_ci load_const_scalar_hi_19bits, 606bf215546Sopenharmony_ci 607bf215546Sopenharmony_ci /* packed_value contains low 19 bits, high bits are sign-extended */ 608bf215546Sopenharmony_ci load_const_scalar_lo_19bits_sext, 609bf215546Sopenharmony_ci}; 610bf215546Sopenharmony_ci 611bf215546Sopenharmony_ciunion packed_instr { 612bf215546Sopenharmony_ci uint32_t u32; 613bf215546Sopenharmony_ci struct { 614bf215546Sopenharmony_ci unsigned instr_type:4; /* always present */ 615bf215546Sopenharmony_ci unsigned _pad:20; 616bf215546Sopenharmony_ci unsigned dest:8; /* always last */ 617bf215546Sopenharmony_ci } any; 618bf215546Sopenharmony_ci struct { 619bf215546Sopenharmony_ci unsigned instr_type:4; 620bf215546Sopenharmony_ci unsigned exact:1; 621bf215546Sopenharmony_ci unsigned no_signed_wrap:1; 622bf215546Sopenharmony_ci unsigned no_unsigned_wrap:1; 623bf215546Sopenharmony_ci unsigned saturate:1; 624bf215546Sopenharmony_ci /* Reg: writemask; SSA: swizzles for 2 srcs */ 625bf215546Sopenharmony_ci unsigned writemask_or_two_swizzles:4; 626bf215546Sopenharmony_ci unsigned op:9; 627bf215546Sopenharmony_ci unsigned packed_src_ssa_16bit:1; 628bf215546Sopenharmony_ci /* Scalarized ALUs always have the same header. */ 629bf215546Sopenharmony_ci unsigned num_followup_alu_sharing_header:2; 630bf215546Sopenharmony_ci unsigned dest:8; 631bf215546Sopenharmony_ci } alu; 632bf215546Sopenharmony_ci struct { 633bf215546Sopenharmony_ci unsigned instr_type:4; 634bf215546Sopenharmony_ci unsigned deref_type:3; 635bf215546Sopenharmony_ci unsigned cast_type_same_as_last:1; 636bf215546Sopenharmony_ci unsigned modes:5; /* See (de|en)code_deref_modes() */ 637bf215546Sopenharmony_ci unsigned _pad:9; 638bf215546Sopenharmony_ci unsigned in_bounds:1; 639bf215546Sopenharmony_ci unsigned packed_src_ssa_16bit:1; /* deref_var redefines this */ 640bf215546Sopenharmony_ci unsigned dest:8; 641bf215546Sopenharmony_ci } deref; 642bf215546Sopenharmony_ci struct { 643bf215546Sopenharmony_ci unsigned instr_type:4; 644bf215546Sopenharmony_ci unsigned deref_type:3; 645bf215546Sopenharmony_ci unsigned _pad:1; 646bf215546Sopenharmony_ci unsigned object_idx:16; /* if 0, the object ID is a separate uint32 */ 647bf215546Sopenharmony_ci unsigned dest:8; 648bf215546Sopenharmony_ci } deref_var; 649bf215546Sopenharmony_ci struct { 650bf215546Sopenharmony_ci unsigned instr_type:4; 651bf215546Sopenharmony_ci unsigned intrinsic:10; 652bf215546Sopenharmony_ci unsigned const_indices_encoding:2; 653bf215546Sopenharmony_ci unsigned packed_const_indices:8; 654bf215546Sopenharmony_ci unsigned dest:8; 655bf215546Sopenharmony_ci } intrinsic; 656bf215546Sopenharmony_ci struct { 657bf215546Sopenharmony_ci unsigned instr_type:4; 658bf215546Sopenharmony_ci unsigned last_component:4; 659bf215546Sopenharmony_ci unsigned bit_size:3; 660bf215546Sopenharmony_ci unsigned packing:2; /* enum load_const_packing */ 661bf215546Sopenharmony_ci unsigned packed_value:19; /* meaning determined by packing */ 662bf215546Sopenharmony_ci } load_const; 663bf215546Sopenharmony_ci struct { 664bf215546Sopenharmony_ci unsigned instr_type:4; 665bf215546Sopenharmony_ci unsigned last_component:4; 666bf215546Sopenharmony_ci unsigned bit_size:3; 667bf215546Sopenharmony_ci unsigned _pad:21; 668bf215546Sopenharmony_ci } undef; 669bf215546Sopenharmony_ci struct { 670bf215546Sopenharmony_ci unsigned instr_type:4; 671bf215546Sopenharmony_ci unsigned num_srcs:4; 672bf215546Sopenharmony_ci unsigned op:5; 673bf215546Sopenharmony_ci unsigned _pad:11; 674bf215546Sopenharmony_ci unsigned dest:8; 675bf215546Sopenharmony_ci } tex; 676bf215546Sopenharmony_ci struct { 677bf215546Sopenharmony_ci unsigned instr_type:4; 678bf215546Sopenharmony_ci unsigned num_srcs:20; 679bf215546Sopenharmony_ci unsigned dest:8; 680bf215546Sopenharmony_ci } phi; 681bf215546Sopenharmony_ci struct { 682bf215546Sopenharmony_ci unsigned instr_type:4; 683bf215546Sopenharmony_ci unsigned type:2; 684bf215546Sopenharmony_ci unsigned _pad:26; 685bf215546Sopenharmony_ci } jump; 686bf215546Sopenharmony_ci}; 687bf215546Sopenharmony_ci 688bf215546Sopenharmony_ci/* Write "lo24" as low 24 bits in the first uint32. */ 689bf215546Sopenharmony_cistatic void 690bf215546Sopenharmony_ciwrite_dest(write_ctx *ctx, const nir_dest *dst, union packed_instr header, 691bf215546Sopenharmony_ci nir_instr_type instr_type) 692bf215546Sopenharmony_ci{ 693bf215546Sopenharmony_ci STATIC_ASSERT(sizeof(union packed_dest) == 1); 694bf215546Sopenharmony_ci union packed_dest dest; 695bf215546Sopenharmony_ci dest.u8 = 0; 696bf215546Sopenharmony_ci 697bf215546Sopenharmony_ci dest.ssa.is_ssa = dst->is_ssa; 698bf215546Sopenharmony_ci if (dst->is_ssa) { 699bf215546Sopenharmony_ci dest.ssa.num_components = 700bf215546Sopenharmony_ci encode_num_components_in_3bits(dst->ssa.num_components); 701bf215546Sopenharmony_ci dest.ssa.bit_size = encode_bit_size_3bits(dst->ssa.bit_size); 702bf215546Sopenharmony_ci dest.ssa.divergent = dst->ssa.divergent; 703bf215546Sopenharmony_ci } else { 704bf215546Sopenharmony_ci dest.reg.is_indirect = !!(dst->reg.indirect); 705bf215546Sopenharmony_ci } 706bf215546Sopenharmony_ci header.any.dest = dest.u8; 707bf215546Sopenharmony_ci 708bf215546Sopenharmony_ci /* Check if the current ALU instruction has the same header as the previous 709bf215546Sopenharmony_ci * instruction that is also ALU. If it is, we don't have to write 710bf215546Sopenharmony_ci * the current header. This is a typical occurence after scalarization. 711bf215546Sopenharmony_ci */ 712bf215546Sopenharmony_ci if (instr_type == nir_instr_type_alu) { 713bf215546Sopenharmony_ci bool equal_header = false; 714bf215546Sopenharmony_ci 715bf215546Sopenharmony_ci if (ctx->last_instr_type == nir_instr_type_alu) { 716bf215546Sopenharmony_ci assert(ctx->last_alu_header_offset); 717bf215546Sopenharmony_ci union packed_instr last_header; 718bf215546Sopenharmony_ci last_header.u32 = ctx->last_alu_header; 719bf215546Sopenharmony_ci 720bf215546Sopenharmony_ci /* Clear the field that counts ALUs with equal headers. */ 721bf215546Sopenharmony_ci union packed_instr clean_header; 722bf215546Sopenharmony_ci clean_header.u32 = last_header.u32; 723bf215546Sopenharmony_ci clean_header.alu.num_followup_alu_sharing_header = 0; 724bf215546Sopenharmony_ci 725bf215546Sopenharmony_ci /* There can be at most 4 consecutive ALU instructions 726bf215546Sopenharmony_ci * sharing the same header. 727bf215546Sopenharmony_ci */ 728bf215546Sopenharmony_ci if (last_header.alu.num_followup_alu_sharing_header < 3 && 729bf215546Sopenharmony_ci header.u32 == clean_header.u32) { 730bf215546Sopenharmony_ci last_header.alu.num_followup_alu_sharing_header++; 731bf215546Sopenharmony_ci blob_overwrite_uint32(ctx->blob, ctx->last_alu_header_offset, 732bf215546Sopenharmony_ci last_header.u32); 733bf215546Sopenharmony_ci ctx->last_alu_header = last_header.u32; 734bf215546Sopenharmony_ci equal_header = true; 735bf215546Sopenharmony_ci } 736bf215546Sopenharmony_ci } 737bf215546Sopenharmony_ci 738bf215546Sopenharmony_ci if (!equal_header) { 739bf215546Sopenharmony_ci ctx->last_alu_header_offset = blob_reserve_uint32(ctx->blob); 740bf215546Sopenharmony_ci blob_overwrite_uint32(ctx->blob, ctx->last_alu_header_offset, header.u32); 741bf215546Sopenharmony_ci ctx->last_alu_header = header.u32; 742bf215546Sopenharmony_ci } 743bf215546Sopenharmony_ci } else { 744bf215546Sopenharmony_ci blob_write_uint32(ctx->blob, header.u32); 745bf215546Sopenharmony_ci } 746bf215546Sopenharmony_ci 747bf215546Sopenharmony_ci if (dest.ssa.is_ssa && 748bf215546Sopenharmony_ci dest.ssa.num_components == NUM_COMPONENTS_IS_SEPARATE_7) 749bf215546Sopenharmony_ci blob_write_uint32(ctx->blob, dst->ssa.num_components); 750bf215546Sopenharmony_ci 751bf215546Sopenharmony_ci if (dst->is_ssa) { 752bf215546Sopenharmony_ci write_add_object(ctx, &dst->ssa); 753bf215546Sopenharmony_ci } else { 754bf215546Sopenharmony_ci blob_write_uint32(ctx->blob, write_lookup_object(ctx, dst->reg.reg)); 755bf215546Sopenharmony_ci blob_write_uint32(ctx->blob, dst->reg.base_offset); 756bf215546Sopenharmony_ci if (dst->reg.indirect) 757bf215546Sopenharmony_ci write_src(ctx, dst->reg.indirect); 758bf215546Sopenharmony_ci } 759bf215546Sopenharmony_ci} 760bf215546Sopenharmony_ci 761bf215546Sopenharmony_cistatic void 762bf215546Sopenharmony_ciread_dest(read_ctx *ctx, nir_dest *dst, nir_instr *instr, 763bf215546Sopenharmony_ci union packed_instr header) 764bf215546Sopenharmony_ci{ 765bf215546Sopenharmony_ci union packed_dest dest; 766bf215546Sopenharmony_ci dest.u8 = header.any.dest; 767bf215546Sopenharmony_ci 768bf215546Sopenharmony_ci if (dest.ssa.is_ssa) { 769bf215546Sopenharmony_ci unsigned bit_size = decode_bit_size_3bits(dest.ssa.bit_size); 770bf215546Sopenharmony_ci unsigned num_components; 771bf215546Sopenharmony_ci if (dest.ssa.num_components == NUM_COMPONENTS_IS_SEPARATE_7) 772bf215546Sopenharmony_ci num_components = blob_read_uint32(ctx->blob); 773bf215546Sopenharmony_ci else 774bf215546Sopenharmony_ci num_components = decode_num_components_in_3bits(dest.ssa.num_components); 775bf215546Sopenharmony_ci nir_ssa_dest_init(instr, dst, num_components, bit_size, NULL); 776bf215546Sopenharmony_ci dst->ssa.divergent = dest.ssa.divergent; 777bf215546Sopenharmony_ci read_add_object(ctx, &dst->ssa); 778bf215546Sopenharmony_ci } else { 779bf215546Sopenharmony_ci dst->reg.reg = read_object(ctx); 780bf215546Sopenharmony_ci dst->reg.base_offset = blob_read_uint32(ctx->blob); 781bf215546Sopenharmony_ci if (dest.reg.is_indirect) { 782bf215546Sopenharmony_ci dst->reg.indirect = malloc(sizeof(nir_src)); 783bf215546Sopenharmony_ci read_src(ctx, dst->reg.indirect, instr); 784bf215546Sopenharmony_ci } 785bf215546Sopenharmony_ci } 786bf215546Sopenharmony_ci} 787bf215546Sopenharmony_ci 788bf215546Sopenharmony_cistatic bool 789bf215546Sopenharmony_ciare_object_ids_16bit(write_ctx *ctx) 790bf215546Sopenharmony_ci{ 791bf215546Sopenharmony_ci /* Check the highest object ID, because they are monotonic. */ 792bf215546Sopenharmony_ci return ctx->next_idx < (1 << 16); 793bf215546Sopenharmony_ci} 794bf215546Sopenharmony_ci 795bf215546Sopenharmony_cistatic bool 796bf215546Sopenharmony_ciis_alu_src_ssa_16bit(write_ctx *ctx, const nir_alu_instr *alu) 797bf215546Sopenharmony_ci{ 798bf215546Sopenharmony_ci unsigned num_srcs = nir_op_infos[alu->op].num_inputs; 799bf215546Sopenharmony_ci 800bf215546Sopenharmony_ci for (unsigned i = 0; i < num_srcs; i++) { 801bf215546Sopenharmony_ci if (!alu->src[i].src.is_ssa || alu->src[i].abs || alu->src[i].negate) 802bf215546Sopenharmony_ci return false; 803bf215546Sopenharmony_ci 804bf215546Sopenharmony_ci unsigned src_components = nir_ssa_alu_instr_src_components(alu, i); 805bf215546Sopenharmony_ci 806bf215546Sopenharmony_ci for (unsigned chan = 0; chan < src_components; chan++) { 807bf215546Sopenharmony_ci /* The swizzles for src0.x and src1.x are stored 808bf215546Sopenharmony_ci * in writemask_or_two_swizzles for SSA ALUs. 809bf215546Sopenharmony_ci */ 810bf215546Sopenharmony_ci if (alu->dest.dest.is_ssa && i < 2 && chan == 0 && 811bf215546Sopenharmony_ci alu->src[i].swizzle[chan] < 4) 812bf215546Sopenharmony_ci continue; 813bf215546Sopenharmony_ci 814bf215546Sopenharmony_ci if (alu->src[i].swizzle[chan] != chan) 815bf215546Sopenharmony_ci return false; 816bf215546Sopenharmony_ci } 817bf215546Sopenharmony_ci } 818bf215546Sopenharmony_ci 819bf215546Sopenharmony_ci return are_object_ids_16bit(ctx); 820bf215546Sopenharmony_ci} 821bf215546Sopenharmony_ci 822bf215546Sopenharmony_cistatic void 823bf215546Sopenharmony_ciwrite_alu(write_ctx *ctx, const nir_alu_instr *alu) 824bf215546Sopenharmony_ci{ 825bf215546Sopenharmony_ci unsigned num_srcs = nir_op_infos[alu->op].num_inputs; 826bf215546Sopenharmony_ci unsigned dst_components = nir_dest_num_components(alu->dest.dest); 827bf215546Sopenharmony_ci 828bf215546Sopenharmony_ci /* 9 bits for nir_op */ 829bf215546Sopenharmony_ci STATIC_ASSERT(nir_num_opcodes <= 512); 830bf215546Sopenharmony_ci union packed_instr header; 831bf215546Sopenharmony_ci header.u32 = 0; 832bf215546Sopenharmony_ci 833bf215546Sopenharmony_ci header.alu.instr_type = alu->instr.type; 834bf215546Sopenharmony_ci header.alu.exact = alu->exact; 835bf215546Sopenharmony_ci header.alu.no_signed_wrap = alu->no_signed_wrap; 836bf215546Sopenharmony_ci header.alu.no_unsigned_wrap = alu->no_unsigned_wrap; 837bf215546Sopenharmony_ci header.alu.saturate = alu->dest.saturate; 838bf215546Sopenharmony_ci header.alu.op = alu->op; 839bf215546Sopenharmony_ci header.alu.packed_src_ssa_16bit = is_alu_src_ssa_16bit(ctx, alu); 840bf215546Sopenharmony_ci 841bf215546Sopenharmony_ci if (header.alu.packed_src_ssa_16bit && 842bf215546Sopenharmony_ci alu->dest.dest.is_ssa) { 843bf215546Sopenharmony_ci /* For packed srcs of SSA ALUs, this field stores the swizzles. */ 844bf215546Sopenharmony_ci header.alu.writemask_or_two_swizzles = alu->src[0].swizzle[0]; 845bf215546Sopenharmony_ci if (num_srcs > 1) 846bf215546Sopenharmony_ci header.alu.writemask_or_two_swizzles |= alu->src[1].swizzle[0] << 2; 847bf215546Sopenharmony_ci } else if (!alu->dest.dest.is_ssa && dst_components <= 4) { 848bf215546Sopenharmony_ci /* For vec4 registers, this field is a writemask. */ 849bf215546Sopenharmony_ci header.alu.writemask_or_two_swizzles = alu->dest.write_mask; 850bf215546Sopenharmony_ci } 851bf215546Sopenharmony_ci 852bf215546Sopenharmony_ci write_dest(ctx, &alu->dest.dest, header, alu->instr.type); 853bf215546Sopenharmony_ci 854bf215546Sopenharmony_ci if (!alu->dest.dest.is_ssa && dst_components > 4) 855bf215546Sopenharmony_ci blob_write_uint32(ctx->blob, alu->dest.write_mask); 856bf215546Sopenharmony_ci 857bf215546Sopenharmony_ci if (header.alu.packed_src_ssa_16bit) { 858bf215546Sopenharmony_ci for (unsigned i = 0; i < num_srcs; i++) { 859bf215546Sopenharmony_ci assert(alu->src[i].src.is_ssa); 860bf215546Sopenharmony_ci unsigned idx = write_lookup_object(ctx, alu->src[i].src.ssa); 861bf215546Sopenharmony_ci assert(idx < (1 << 16)); 862bf215546Sopenharmony_ci blob_write_uint16(ctx->blob, idx); 863bf215546Sopenharmony_ci } 864bf215546Sopenharmony_ci } else { 865bf215546Sopenharmony_ci for (unsigned i = 0; i < num_srcs; i++) { 866bf215546Sopenharmony_ci unsigned src_channels = nir_ssa_alu_instr_src_components(alu, i); 867bf215546Sopenharmony_ci unsigned src_components = nir_src_num_components(alu->src[i].src); 868bf215546Sopenharmony_ci union packed_src src; 869bf215546Sopenharmony_ci bool packed = src_components <= 4 && src_channels <= 4; 870bf215546Sopenharmony_ci src.u32 = 0; 871bf215546Sopenharmony_ci 872bf215546Sopenharmony_ci src.alu.negate = alu->src[i].negate; 873bf215546Sopenharmony_ci src.alu.abs = alu->src[i].abs; 874bf215546Sopenharmony_ci 875bf215546Sopenharmony_ci if (packed) { 876bf215546Sopenharmony_ci src.alu.swizzle_x = alu->src[i].swizzle[0]; 877bf215546Sopenharmony_ci src.alu.swizzle_y = alu->src[i].swizzle[1]; 878bf215546Sopenharmony_ci src.alu.swizzle_z = alu->src[i].swizzle[2]; 879bf215546Sopenharmony_ci src.alu.swizzle_w = alu->src[i].swizzle[3]; 880bf215546Sopenharmony_ci } 881bf215546Sopenharmony_ci 882bf215546Sopenharmony_ci write_src_full(ctx, &alu->src[i].src, src); 883bf215546Sopenharmony_ci 884bf215546Sopenharmony_ci /* Store swizzles for vec8 and vec16. */ 885bf215546Sopenharmony_ci if (!packed) { 886bf215546Sopenharmony_ci for (unsigned o = 0; o < src_channels; o += 8) { 887bf215546Sopenharmony_ci unsigned value = 0; 888bf215546Sopenharmony_ci 889bf215546Sopenharmony_ci for (unsigned j = 0; j < 8 && o + j < src_channels; j++) { 890bf215546Sopenharmony_ci value |= (uint32_t)alu->src[i].swizzle[o + j] << 891bf215546Sopenharmony_ci (4 * j); /* 4 bits per swizzle */ 892bf215546Sopenharmony_ci } 893bf215546Sopenharmony_ci 894bf215546Sopenharmony_ci blob_write_uint32(ctx->blob, value); 895bf215546Sopenharmony_ci } 896bf215546Sopenharmony_ci } 897bf215546Sopenharmony_ci } 898bf215546Sopenharmony_ci } 899bf215546Sopenharmony_ci} 900bf215546Sopenharmony_ci 901bf215546Sopenharmony_cistatic nir_alu_instr * 902bf215546Sopenharmony_ciread_alu(read_ctx *ctx, union packed_instr header) 903bf215546Sopenharmony_ci{ 904bf215546Sopenharmony_ci unsigned num_srcs = nir_op_infos[header.alu.op].num_inputs; 905bf215546Sopenharmony_ci nir_alu_instr *alu = nir_alu_instr_create(ctx->nir, header.alu.op); 906bf215546Sopenharmony_ci 907bf215546Sopenharmony_ci alu->exact = header.alu.exact; 908bf215546Sopenharmony_ci alu->no_signed_wrap = header.alu.no_signed_wrap; 909bf215546Sopenharmony_ci alu->no_unsigned_wrap = header.alu.no_unsigned_wrap; 910bf215546Sopenharmony_ci alu->dest.saturate = header.alu.saturate; 911bf215546Sopenharmony_ci 912bf215546Sopenharmony_ci read_dest(ctx, &alu->dest.dest, &alu->instr, header); 913bf215546Sopenharmony_ci 914bf215546Sopenharmony_ci unsigned dst_components = nir_dest_num_components(alu->dest.dest); 915bf215546Sopenharmony_ci 916bf215546Sopenharmony_ci if (alu->dest.dest.is_ssa) { 917bf215546Sopenharmony_ci alu->dest.write_mask = u_bit_consecutive(0, dst_components); 918bf215546Sopenharmony_ci } else if (dst_components <= 4) { 919bf215546Sopenharmony_ci alu->dest.write_mask = header.alu.writemask_or_two_swizzles; 920bf215546Sopenharmony_ci } else { 921bf215546Sopenharmony_ci alu->dest.write_mask = blob_read_uint32(ctx->blob); 922bf215546Sopenharmony_ci } 923bf215546Sopenharmony_ci 924bf215546Sopenharmony_ci if (header.alu.packed_src_ssa_16bit) { 925bf215546Sopenharmony_ci for (unsigned i = 0; i < num_srcs; i++) { 926bf215546Sopenharmony_ci nir_alu_src *src = &alu->src[i]; 927bf215546Sopenharmony_ci src->src.is_ssa = true; 928bf215546Sopenharmony_ci src->src.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob)); 929bf215546Sopenharmony_ci 930bf215546Sopenharmony_ci memset(&src->swizzle, 0, sizeof(src->swizzle)); 931bf215546Sopenharmony_ci 932bf215546Sopenharmony_ci unsigned src_components = nir_ssa_alu_instr_src_components(alu, i); 933bf215546Sopenharmony_ci 934bf215546Sopenharmony_ci for (unsigned chan = 0; chan < src_components; chan++) 935bf215546Sopenharmony_ci src->swizzle[chan] = chan; 936bf215546Sopenharmony_ci } 937bf215546Sopenharmony_ci } else { 938bf215546Sopenharmony_ci for (unsigned i = 0; i < num_srcs; i++) { 939bf215546Sopenharmony_ci union packed_src src = read_src(ctx, &alu->src[i].src, &alu->instr); 940bf215546Sopenharmony_ci unsigned src_channels = nir_ssa_alu_instr_src_components(alu, i); 941bf215546Sopenharmony_ci unsigned src_components = nir_src_num_components(alu->src[i].src); 942bf215546Sopenharmony_ci bool packed = src_components <= 4 && src_channels <= 4; 943bf215546Sopenharmony_ci 944bf215546Sopenharmony_ci alu->src[i].negate = src.alu.negate; 945bf215546Sopenharmony_ci alu->src[i].abs = src.alu.abs; 946bf215546Sopenharmony_ci 947bf215546Sopenharmony_ci memset(&alu->src[i].swizzle, 0, sizeof(alu->src[i].swizzle)); 948bf215546Sopenharmony_ci 949bf215546Sopenharmony_ci if (packed) { 950bf215546Sopenharmony_ci alu->src[i].swizzle[0] = src.alu.swizzle_x; 951bf215546Sopenharmony_ci alu->src[i].swizzle[1] = src.alu.swizzle_y; 952bf215546Sopenharmony_ci alu->src[i].swizzle[2] = src.alu.swizzle_z; 953bf215546Sopenharmony_ci alu->src[i].swizzle[3] = src.alu.swizzle_w; 954bf215546Sopenharmony_ci } else { 955bf215546Sopenharmony_ci /* Load swizzles for vec8 and vec16. */ 956bf215546Sopenharmony_ci for (unsigned o = 0; o < src_channels; o += 8) { 957bf215546Sopenharmony_ci unsigned value = blob_read_uint32(ctx->blob); 958bf215546Sopenharmony_ci 959bf215546Sopenharmony_ci for (unsigned j = 0; j < 8 && o + j < src_channels; j++) { 960bf215546Sopenharmony_ci alu->src[i].swizzle[o + j] = 961bf215546Sopenharmony_ci (value >> (4 * j)) & 0xf; /* 4 bits per swizzle */ 962bf215546Sopenharmony_ci } 963bf215546Sopenharmony_ci } 964bf215546Sopenharmony_ci } 965bf215546Sopenharmony_ci } 966bf215546Sopenharmony_ci } 967bf215546Sopenharmony_ci 968bf215546Sopenharmony_ci if (header.alu.packed_src_ssa_16bit && 969bf215546Sopenharmony_ci alu->dest.dest.is_ssa) { 970bf215546Sopenharmony_ci alu->src[0].swizzle[0] = header.alu.writemask_or_two_swizzles & 0x3; 971bf215546Sopenharmony_ci if (num_srcs > 1) 972bf215546Sopenharmony_ci alu->src[1].swizzle[0] = header.alu.writemask_or_two_swizzles >> 2; 973bf215546Sopenharmony_ci } 974bf215546Sopenharmony_ci 975bf215546Sopenharmony_ci return alu; 976bf215546Sopenharmony_ci} 977bf215546Sopenharmony_ci 978bf215546Sopenharmony_ci#define MODE_ENC_GENERIC_BIT (1 << 4) 979bf215546Sopenharmony_ci 980bf215546Sopenharmony_cistatic nir_variable_mode 981bf215546Sopenharmony_cidecode_deref_modes(unsigned modes) 982bf215546Sopenharmony_ci{ 983bf215546Sopenharmony_ci if (modes & MODE_ENC_GENERIC_BIT) { 984bf215546Sopenharmony_ci modes &= ~MODE_ENC_GENERIC_BIT; 985bf215546Sopenharmony_ci return modes << (ffs(nir_var_mem_generic) - 1); 986bf215546Sopenharmony_ci } else { 987bf215546Sopenharmony_ci return 1 << modes; 988bf215546Sopenharmony_ci } 989bf215546Sopenharmony_ci} 990bf215546Sopenharmony_ci 991bf215546Sopenharmony_cistatic unsigned 992bf215546Sopenharmony_ciencode_deref_modes(nir_variable_mode modes) 993bf215546Sopenharmony_ci{ 994bf215546Sopenharmony_ci /* Mode sets on derefs generally come in two forms. For certain OpenCL 995bf215546Sopenharmony_ci * cases, we can have more than one of the generic modes set. In this 996bf215546Sopenharmony_ci * case, we need the full bitfield. Fortunately, there are only 4 of 997bf215546Sopenharmony_ci * these. For all other modes, we can only have one mode at a time so we 998bf215546Sopenharmony_ci * can compress them by only storing the bit position. This, plus one bit 999bf215546Sopenharmony_ci * to select encoding, lets us pack the entire bitfield in 5 bits. 1000bf215546Sopenharmony_ci */ 1001bf215546Sopenharmony_ci STATIC_ASSERT((nir_var_all & ~nir_var_mem_generic) < 1002bf215546Sopenharmony_ci (1 << MODE_ENC_GENERIC_BIT)); 1003bf215546Sopenharmony_ci 1004bf215546Sopenharmony_ci unsigned enc; 1005bf215546Sopenharmony_ci if (modes == 0 || (modes & nir_var_mem_generic)) { 1006bf215546Sopenharmony_ci assert(!(modes & ~nir_var_mem_generic)); 1007bf215546Sopenharmony_ci enc = modes >> (ffs(nir_var_mem_generic) - 1); 1008bf215546Sopenharmony_ci assert(enc < MODE_ENC_GENERIC_BIT); 1009bf215546Sopenharmony_ci enc |= MODE_ENC_GENERIC_BIT; 1010bf215546Sopenharmony_ci } else { 1011bf215546Sopenharmony_ci assert(util_is_power_of_two_nonzero(modes)); 1012bf215546Sopenharmony_ci enc = ffs(modes) - 1; 1013bf215546Sopenharmony_ci assert(enc < MODE_ENC_GENERIC_BIT); 1014bf215546Sopenharmony_ci } 1015bf215546Sopenharmony_ci assert(modes == decode_deref_modes(enc)); 1016bf215546Sopenharmony_ci return enc; 1017bf215546Sopenharmony_ci} 1018bf215546Sopenharmony_ci 1019bf215546Sopenharmony_cistatic void 1020bf215546Sopenharmony_ciwrite_deref(write_ctx *ctx, const nir_deref_instr *deref) 1021bf215546Sopenharmony_ci{ 1022bf215546Sopenharmony_ci assert(deref->deref_type < 8); 1023bf215546Sopenharmony_ci 1024bf215546Sopenharmony_ci union packed_instr header; 1025bf215546Sopenharmony_ci header.u32 = 0; 1026bf215546Sopenharmony_ci 1027bf215546Sopenharmony_ci header.deref.instr_type = deref->instr.type; 1028bf215546Sopenharmony_ci header.deref.deref_type = deref->deref_type; 1029bf215546Sopenharmony_ci 1030bf215546Sopenharmony_ci if (deref->deref_type == nir_deref_type_cast) { 1031bf215546Sopenharmony_ci header.deref.modes = encode_deref_modes(deref->modes); 1032bf215546Sopenharmony_ci header.deref.cast_type_same_as_last = deref->type == ctx->last_type; 1033bf215546Sopenharmony_ci } 1034bf215546Sopenharmony_ci 1035bf215546Sopenharmony_ci unsigned var_idx = 0; 1036bf215546Sopenharmony_ci if (deref->deref_type == nir_deref_type_var) { 1037bf215546Sopenharmony_ci var_idx = write_lookup_object(ctx, deref->var); 1038bf215546Sopenharmony_ci if (var_idx && var_idx < (1 << 16)) 1039bf215546Sopenharmony_ci header.deref_var.object_idx = var_idx; 1040bf215546Sopenharmony_ci } 1041bf215546Sopenharmony_ci 1042bf215546Sopenharmony_ci if (deref->deref_type == nir_deref_type_array || 1043bf215546Sopenharmony_ci deref->deref_type == nir_deref_type_ptr_as_array) { 1044bf215546Sopenharmony_ci header.deref.packed_src_ssa_16bit = 1045bf215546Sopenharmony_ci deref->parent.is_ssa && deref->arr.index.is_ssa && 1046bf215546Sopenharmony_ci are_object_ids_16bit(ctx); 1047bf215546Sopenharmony_ci 1048bf215546Sopenharmony_ci header.deref.in_bounds = deref->arr.in_bounds; 1049bf215546Sopenharmony_ci } 1050bf215546Sopenharmony_ci 1051bf215546Sopenharmony_ci write_dest(ctx, &deref->dest, header, deref->instr.type); 1052bf215546Sopenharmony_ci 1053bf215546Sopenharmony_ci switch (deref->deref_type) { 1054bf215546Sopenharmony_ci case nir_deref_type_var: 1055bf215546Sopenharmony_ci if (!header.deref_var.object_idx) 1056bf215546Sopenharmony_ci blob_write_uint32(ctx->blob, var_idx); 1057bf215546Sopenharmony_ci break; 1058bf215546Sopenharmony_ci 1059bf215546Sopenharmony_ci case nir_deref_type_struct: 1060bf215546Sopenharmony_ci write_src(ctx, &deref->parent); 1061bf215546Sopenharmony_ci blob_write_uint32(ctx->blob, deref->strct.index); 1062bf215546Sopenharmony_ci break; 1063bf215546Sopenharmony_ci 1064bf215546Sopenharmony_ci case nir_deref_type_array: 1065bf215546Sopenharmony_ci case nir_deref_type_ptr_as_array: 1066bf215546Sopenharmony_ci if (header.deref.packed_src_ssa_16bit) { 1067bf215546Sopenharmony_ci blob_write_uint16(ctx->blob, 1068bf215546Sopenharmony_ci write_lookup_object(ctx, deref->parent.ssa)); 1069bf215546Sopenharmony_ci blob_write_uint16(ctx->blob, 1070bf215546Sopenharmony_ci write_lookup_object(ctx, deref->arr.index.ssa)); 1071bf215546Sopenharmony_ci } else { 1072bf215546Sopenharmony_ci write_src(ctx, &deref->parent); 1073bf215546Sopenharmony_ci write_src(ctx, &deref->arr.index); 1074bf215546Sopenharmony_ci } 1075bf215546Sopenharmony_ci break; 1076bf215546Sopenharmony_ci 1077bf215546Sopenharmony_ci case nir_deref_type_cast: 1078bf215546Sopenharmony_ci write_src(ctx, &deref->parent); 1079bf215546Sopenharmony_ci blob_write_uint32(ctx->blob, deref->cast.ptr_stride); 1080bf215546Sopenharmony_ci blob_write_uint32(ctx->blob, deref->cast.align_mul); 1081bf215546Sopenharmony_ci blob_write_uint32(ctx->blob, deref->cast.align_offset); 1082bf215546Sopenharmony_ci if (!header.deref.cast_type_same_as_last) { 1083bf215546Sopenharmony_ci encode_type_to_blob(ctx->blob, deref->type); 1084bf215546Sopenharmony_ci ctx->last_type = deref->type; 1085bf215546Sopenharmony_ci } 1086bf215546Sopenharmony_ci break; 1087bf215546Sopenharmony_ci 1088bf215546Sopenharmony_ci case nir_deref_type_array_wildcard: 1089bf215546Sopenharmony_ci write_src(ctx, &deref->parent); 1090bf215546Sopenharmony_ci break; 1091bf215546Sopenharmony_ci 1092bf215546Sopenharmony_ci default: 1093bf215546Sopenharmony_ci unreachable("Invalid deref type"); 1094bf215546Sopenharmony_ci } 1095bf215546Sopenharmony_ci} 1096bf215546Sopenharmony_ci 1097bf215546Sopenharmony_cistatic nir_deref_instr * 1098bf215546Sopenharmony_ciread_deref(read_ctx *ctx, union packed_instr header) 1099bf215546Sopenharmony_ci{ 1100bf215546Sopenharmony_ci nir_deref_type deref_type = header.deref.deref_type; 1101bf215546Sopenharmony_ci nir_deref_instr *deref = nir_deref_instr_create(ctx->nir, deref_type); 1102bf215546Sopenharmony_ci 1103bf215546Sopenharmony_ci read_dest(ctx, &deref->dest, &deref->instr, header); 1104bf215546Sopenharmony_ci 1105bf215546Sopenharmony_ci nir_deref_instr *parent; 1106bf215546Sopenharmony_ci 1107bf215546Sopenharmony_ci switch (deref->deref_type) { 1108bf215546Sopenharmony_ci case nir_deref_type_var: 1109bf215546Sopenharmony_ci if (header.deref_var.object_idx) 1110bf215546Sopenharmony_ci deref->var = read_lookup_object(ctx, header.deref_var.object_idx); 1111bf215546Sopenharmony_ci else 1112bf215546Sopenharmony_ci deref->var = read_object(ctx); 1113bf215546Sopenharmony_ci 1114bf215546Sopenharmony_ci deref->type = deref->var->type; 1115bf215546Sopenharmony_ci break; 1116bf215546Sopenharmony_ci 1117bf215546Sopenharmony_ci case nir_deref_type_struct: 1118bf215546Sopenharmony_ci read_src(ctx, &deref->parent, &deref->instr); 1119bf215546Sopenharmony_ci parent = nir_src_as_deref(deref->parent); 1120bf215546Sopenharmony_ci deref->strct.index = blob_read_uint32(ctx->blob); 1121bf215546Sopenharmony_ci deref->type = glsl_get_struct_field(parent->type, deref->strct.index); 1122bf215546Sopenharmony_ci break; 1123bf215546Sopenharmony_ci 1124bf215546Sopenharmony_ci case nir_deref_type_array: 1125bf215546Sopenharmony_ci case nir_deref_type_ptr_as_array: 1126bf215546Sopenharmony_ci if (header.deref.packed_src_ssa_16bit) { 1127bf215546Sopenharmony_ci deref->parent.is_ssa = true; 1128bf215546Sopenharmony_ci deref->parent.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob)); 1129bf215546Sopenharmony_ci deref->arr.index.is_ssa = true; 1130bf215546Sopenharmony_ci deref->arr.index.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob)); 1131bf215546Sopenharmony_ci } else { 1132bf215546Sopenharmony_ci read_src(ctx, &deref->parent, &deref->instr); 1133bf215546Sopenharmony_ci read_src(ctx, &deref->arr.index, &deref->instr); 1134bf215546Sopenharmony_ci } 1135bf215546Sopenharmony_ci 1136bf215546Sopenharmony_ci deref->arr.in_bounds = header.deref.in_bounds; 1137bf215546Sopenharmony_ci 1138bf215546Sopenharmony_ci parent = nir_src_as_deref(deref->parent); 1139bf215546Sopenharmony_ci if (deref->deref_type == nir_deref_type_array) 1140bf215546Sopenharmony_ci deref->type = glsl_get_array_element(parent->type); 1141bf215546Sopenharmony_ci else 1142bf215546Sopenharmony_ci deref->type = parent->type; 1143bf215546Sopenharmony_ci break; 1144bf215546Sopenharmony_ci 1145bf215546Sopenharmony_ci case nir_deref_type_cast: 1146bf215546Sopenharmony_ci read_src(ctx, &deref->parent, &deref->instr); 1147bf215546Sopenharmony_ci deref->cast.ptr_stride = blob_read_uint32(ctx->blob); 1148bf215546Sopenharmony_ci deref->cast.align_mul = blob_read_uint32(ctx->blob); 1149bf215546Sopenharmony_ci deref->cast.align_offset = blob_read_uint32(ctx->blob); 1150bf215546Sopenharmony_ci if (header.deref.cast_type_same_as_last) { 1151bf215546Sopenharmony_ci deref->type = ctx->last_type; 1152bf215546Sopenharmony_ci } else { 1153bf215546Sopenharmony_ci deref->type = decode_type_from_blob(ctx->blob); 1154bf215546Sopenharmony_ci ctx->last_type = deref->type; 1155bf215546Sopenharmony_ci } 1156bf215546Sopenharmony_ci break; 1157bf215546Sopenharmony_ci 1158bf215546Sopenharmony_ci case nir_deref_type_array_wildcard: 1159bf215546Sopenharmony_ci read_src(ctx, &deref->parent, &deref->instr); 1160bf215546Sopenharmony_ci parent = nir_src_as_deref(deref->parent); 1161bf215546Sopenharmony_ci deref->type = glsl_get_array_element(parent->type); 1162bf215546Sopenharmony_ci break; 1163bf215546Sopenharmony_ci 1164bf215546Sopenharmony_ci default: 1165bf215546Sopenharmony_ci unreachable("Invalid deref type"); 1166bf215546Sopenharmony_ci } 1167bf215546Sopenharmony_ci 1168bf215546Sopenharmony_ci if (deref_type == nir_deref_type_var) { 1169bf215546Sopenharmony_ci deref->modes = deref->var->data.mode; 1170bf215546Sopenharmony_ci } else if (deref->deref_type == nir_deref_type_cast) { 1171bf215546Sopenharmony_ci deref->modes = decode_deref_modes(header.deref.modes); 1172bf215546Sopenharmony_ci } else { 1173bf215546Sopenharmony_ci assert(deref->parent.is_ssa); 1174bf215546Sopenharmony_ci deref->modes = nir_instr_as_deref(deref->parent.ssa->parent_instr)->modes; 1175bf215546Sopenharmony_ci } 1176bf215546Sopenharmony_ci 1177bf215546Sopenharmony_ci return deref; 1178bf215546Sopenharmony_ci} 1179bf215546Sopenharmony_ci 1180bf215546Sopenharmony_cistatic void 1181bf215546Sopenharmony_ciwrite_intrinsic(write_ctx *ctx, const nir_intrinsic_instr *intrin) 1182bf215546Sopenharmony_ci{ 1183bf215546Sopenharmony_ci /* 10 bits for nir_intrinsic_op */ 1184bf215546Sopenharmony_ci STATIC_ASSERT(nir_num_intrinsics <= 1024); 1185bf215546Sopenharmony_ci unsigned num_srcs = nir_intrinsic_infos[intrin->intrinsic].num_srcs; 1186bf215546Sopenharmony_ci unsigned num_indices = nir_intrinsic_infos[intrin->intrinsic].num_indices; 1187bf215546Sopenharmony_ci assert(intrin->intrinsic < 1024); 1188bf215546Sopenharmony_ci 1189bf215546Sopenharmony_ci union packed_instr header; 1190bf215546Sopenharmony_ci header.u32 = 0; 1191bf215546Sopenharmony_ci 1192bf215546Sopenharmony_ci header.intrinsic.instr_type = intrin->instr.type; 1193bf215546Sopenharmony_ci header.intrinsic.intrinsic = intrin->intrinsic; 1194bf215546Sopenharmony_ci 1195bf215546Sopenharmony_ci /* Analyze constant indices to decide how to encode them. */ 1196bf215546Sopenharmony_ci if (num_indices) { 1197bf215546Sopenharmony_ci unsigned max_bits = 0; 1198bf215546Sopenharmony_ci for (unsigned i = 0; i < num_indices; i++) { 1199bf215546Sopenharmony_ci unsigned max = util_last_bit(intrin->const_index[i]); 1200bf215546Sopenharmony_ci max_bits = MAX2(max_bits, max); 1201bf215546Sopenharmony_ci } 1202bf215546Sopenharmony_ci 1203bf215546Sopenharmony_ci if (max_bits * num_indices <= 8) { 1204bf215546Sopenharmony_ci header.intrinsic.const_indices_encoding = const_indices_all_combined; 1205bf215546Sopenharmony_ci 1206bf215546Sopenharmony_ci /* Pack all const indices into 8 bits. */ 1207bf215546Sopenharmony_ci unsigned bit_size = 8 / num_indices; 1208bf215546Sopenharmony_ci for (unsigned i = 0; i < num_indices; i++) { 1209bf215546Sopenharmony_ci header.intrinsic.packed_const_indices |= 1210bf215546Sopenharmony_ci intrin->const_index[i] << (i * bit_size); 1211bf215546Sopenharmony_ci } 1212bf215546Sopenharmony_ci } else if (max_bits <= 8) 1213bf215546Sopenharmony_ci header.intrinsic.const_indices_encoding = const_indices_8bit; 1214bf215546Sopenharmony_ci else if (max_bits <= 16) 1215bf215546Sopenharmony_ci header.intrinsic.const_indices_encoding = const_indices_16bit; 1216bf215546Sopenharmony_ci else 1217bf215546Sopenharmony_ci header.intrinsic.const_indices_encoding = const_indices_32bit; 1218bf215546Sopenharmony_ci } 1219bf215546Sopenharmony_ci 1220bf215546Sopenharmony_ci if (nir_intrinsic_infos[intrin->intrinsic].has_dest) 1221bf215546Sopenharmony_ci write_dest(ctx, &intrin->dest, header, intrin->instr.type); 1222bf215546Sopenharmony_ci else 1223bf215546Sopenharmony_ci blob_write_uint32(ctx->blob, header.u32); 1224bf215546Sopenharmony_ci 1225bf215546Sopenharmony_ci for (unsigned i = 0; i < num_srcs; i++) 1226bf215546Sopenharmony_ci write_src(ctx, &intrin->src[i]); 1227bf215546Sopenharmony_ci 1228bf215546Sopenharmony_ci if (num_indices) { 1229bf215546Sopenharmony_ci switch (header.intrinsic.const_indices_encoding) { 1230bf215546Sopenharmony_ci case const_indices_8bit: 1231bf215546Sopenharmony_ci for (unsigned i = 0; i < num_indices; i++) 1232bf215546Sopenharmony_ci blob_write_uint8(ctx->blob, intrin->const_index[i]); 1233bf215546Sopenharmony_ci break; 1234bf215546Sopenharmony_ci case const_indices_16bit: 1235bf215546Sopenharmony_ci for (unsigned i = 0; i < num_indices; i++) 1236bf215546Sopenharmony_ci blob_write_uint16(ctx->blob, intrin->const_index[i]); 1237bf215546Sopenharmony_ci break; 1238bf215546Sopenharmony_ci case const_indices_32bit: 1239bf215546Sopenharmony_ci for (unsigned i = 0; i < num_indices; i++) 1240bf215546Sopenharmony_ci blob_write_uint32(ctx->blob, intrin->const_index[i]); 1241bf215546Sopenharmony_ci break; 1242bf215546Sopenharmony_ci } 1243bf215546Sopenharmony_ci } 1244bf215546Sopenharmony_ci} 1245bf215546Sopenharmony_ci 1246bf215546Sopenharmony_cistatic nir_intrinsic_instr * 1247bf215546Sopenharmony_ciread_intrinsic(read_ctx *ctx, union packed_instr header) 1248bf215546Sopenharmony_ci{ 1249bf215546Sopenharmony_ci nir_intrinsic_op op = header.intrinsic.intrinsic; 1250bf215546Sopenharmony_ci nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(ctx->nir, op); 1251bf215546Sopenharmony_ci 1252bf215546Sopenharmony_ci unsigned num_srcs = nir_intrinsic_infos[op].num_srcs; 1253bf215546Sopenharmony_ci unsigned num_indices = nir_intrinsic_infos[op].num_indices; 1254bf215546Sopenharmony_ci 1255bf215546Sopenharmony_ci if (nir_intrinsic_infos[op].has_dest) 1256bf215546Sopenharmony_ci read_dest(ctx, &intrin->dest, &intrin->instr, header); 1257bf215546Sopenharmony_ci 1258bf215546Sopenharmony_ci for (unsigned i = 0; i < num_srcs; i++) 1259bf215546Sopenharmony_ci read_src(ctx, &intrin->src[i], &intrin->instr); 1260bf215546Sopenharmony_ci 1261bf215546Sopenharmony_ci /* Vectorized instrinsics have num_components same as dst or src that has 1262bf215546Sopenharmony_ci * 0 components in the info. Find it. 1263bf215546Sopenharmony_ci */ 1264bf215546Sopenharmony_ci if (nir_intrinsic_infos[op].has_dest && 1265bf215546Sopenharmony_ci nir_intrinsic_infos[op].dest_components == 0) { 1266bf215546Sopenharmony_ci intrin->num_components = nir_dest_num_components(intrin->dest); 1267bf215546Sopenharmony_ci } else { 1268bf215546Sopenharmony_ci for (unsigned i = 0; i < num_srcs; i++) { 1269bf215546Sopenharmony_ci if (nir_intrinsic_infos[op].src_components[i] == 0) { 1270bf215546Sopenharmony_ci intrin->num_components = nir_src_num_components(intrin->src[i]); 1271bf215546Sopenharmony_ci break; 1272bf215546Sopenharmony_ci } 1273bf215546Sopenharmony_ci } 1274bf215546Sopenharmony_ci } 1275bf215546Sopenharmony_ci 1276bf215546Sopenharmony_ci if (num_indices) { 1277bf215546Sopenharmony_ci switch (header.intrinsic.const_indices_encoding) { 1278bf215546Sopenharmony_ci case const_indices_all_combined: { 1279bf215546Sopenharmony_ci unsigned bit_size = 8 / num_indices; 1280bf215546Sopenharmony_ci unsigned bit_mask = u_bit_consecutive(0, bit_size); 1281bf215546Sopenharmony_ci for (unsigned i = 0; i < num_indices; i++) { 1282bf215546Sopenharmony_ci intrin->const_index[i] = 1283bf215546Sopenharmony_ci (header.intrinsic.packed_const_indices >> (i * bit_size)) & 1284bf215546Sopenharmony_ci bit_mask; 1285bf215546Sopenharmony_ci } 1286bf215546Sopenharmony_ci break; 1287bf215546Sopenharmony_ci } 1288bf215546Sopenharmony_ci case const_indices_8bit: 1289bf215546Sopenharmony_ci for (unsigned i = 0; i < num_indices; i++) 1290bf215546Sopenharmony_ci intrin->const_index[i] = blob_read_uint8(ctx->blob); 1291bf215546Sopenharmony_ci break; 1292bf215546Sopenharmony_ci case const_indices_16bit: 1293bf215546Sopenharmony_ci for (unsigned i = 0; i < num_indices; i++) 1294bf215546Sopenharmony_ci intrin->const_index[i] = blob_read_uint16(ctx->blob); 1295bf215546Sopenharmony_ci break; 1296bf215546Sopenharmony_ci case const_indices_32bit: 1297bf215546Sopenharmony_ci for (unsigned i = 0; i < num_indices; i++) 1298bf215546Sopenharmony_ci intrin->const_index[i] = blob_read_uint32(ctx->blob); 1299bf215546Sopenharmony_ci break; 1300bf215546Sopenharmony_ci } 1301bf215546Sopenharmony_ci } 1302bf215546Sopenharmony_ci 1303bf215546Sopenharmony_ci return intrin; 1304bf215546Sopenharmony_ci} 1305bf215546Sopenharmony_ci 1306bf215546Sopenharmony_cistatic void 1307bf215546Sopenharmony_ciwrite_load_const(write_ctx *ctx, const nir_load_const_instr *lc) 1308bf215546Sopenharmony_ci{ 1309bf215546Sopenharmony_ci assert(lc->def.num_components >= 1 && lc->def.num_components <= 16); 1310bf215546Sopenharmony_ci union packed_instr header; 1311bf215546Sopenharmony_ci header.u32 = 0; 1312bf215546Sopenharmony_ci 1313bf215546Sopenharmony_ci header.load_const.instr_type = lc->instr.type; 1314bf215546Sopenharmony_ci header.load_const.last_component = lc->def.num_components - 1; 1315bf215546Sopenharmony_ci header.load_const.bit_size = encode_bit_size_3bits(lc->def.bit_size); 1316bf215546Sopenharmony_ci header.load_const.packing = load_const_full; 1317bf215546Sopenharmony_ci 1318bf215546Sopenharmony_ci /* Try to pack 1-component constants into the 19 free bits in the header. */ 1319bf215546Sopenharmony_ci if (lc->def.num_components == 1) { 1320bf215546Sopenharmony_ci switch (lc->def.bit_size) { 1321bf215546Sopenharmony_ci case 64: 1322bf215546Sopenharmony_ci if ((lc->value[0].u64 & 0x1fffffffffffull) == 0) { 1323bf215546Sopenharmony_ci /* packed_value contains high 19 bits, low bits are 0 */ 1324bf215546Sopenharmony_ci header.load_const.packing = load_const_scalar_hi_19bits; 1325bf215546Sopenharmony_ci header.load_const.packed_value = lc->value[0].u64 >> 45; 1326bf215546Sopenharmony_ci } else if (util_mask_sign_extend(lc->value[0].i64, 19) == lc->value[0].i64) { 1327bf215546Sopenharmony_ci /* packed_value contains low 19 bits, high bits are sign-extended */ 1328bf215546Sopenharmony_ci header.load_const.packing = load_const_scalar_lo_19bits_sext; 1329bf215546Sopenharmony_ci header.load_const.packed_value = lc->value[0].u64; 1330bf215546Sopenharmony_ci } 1331bf215546Sopenharmony_ci break; 1332bf215546Sopenharmony_ci 1333bf215546Sopenharmony_ci case 32: 1334bf215546Sopenharmony_ci if ((lc->value[0].u32 & 0x1fff) == 0) { 1335bf215546Sopenharmony_ci header.load_const.packing = load_const_scalar_hi_19bits; 1336bf215546Sopenharmony_ci header.load_const.packed_value = lc->value[0].u32 >> 13; 1337bf215546Sopenharmony_ci } else if (util_mask_sign_extend(lc->value[0].i32, 19) == lc->value[0].i32) { 1338bf215546Sopenharmony_ci header.load_const.packing = load_const_scalar_lo_19bits_sext; 1339bf215546Sopenharmony_ci header.load_const.packed_value = lc->value[0].u32; 1340bf215546Sopenharmony_ci } 1341bf215546Sopenharmony_ci break; 1342bf215546Sopenharmony_ci 1343bf215546Sopenharmony_ci case 16: 1344bf215546Sopenharmony_ci header.load_const.packing = load_const_scalar_lo_19bits_sext; 1345bf215546Sopenharmony_ci header.load_const.packed_value = lc->value[0].u16; 1346bf215546Sopenharmony_ci break; 1347bf215546Sopenharmony_ci case 8: 1348bf215546Sopenharmony_ci header.load_const.packing = load_const_scalar_lo_19bits_sext; 1349bf215546Sopenharmony_ci header.load_const.packed_value = lc->value[0].u8; 1350bf215546Sopenharmony_ci break; 1351bf215546Sopenharmony_ci case 1: 1352bf215546Sopenharmony_ci header.load_const.packing = load_const_scalar_lo_19bits_sext; 1353bf215546Sopenharmony_ci header.load_const.packed_value = lc->value[0].b; 1354bf215546Sopenharmony_ci break; 1355bf215546Sopenharmony_ci default: 1356bf215546Sopenharmony_ci unreachable("invalid bit_size"); 1357bf215546Sopenharmony_ci } 1358bf215546Sopenharmony_ci } 1359bf215546Sopenharmony_ci 1360bf215546Sopenharmony_ci blob_write_uint32(ctx->blob, header.u32); 1361bf215546Sopenharmony_ci 1362bf215546Sopenharmony_ci if (header.load_const.packing == load_const_full) { 1363bf215546Sopenharmony_ci switch (lc->def.bit_size) { 1364bf215546Sopenharmony_ci case 64: 1365bf215546Sopenharmony_ci blob_write_bytes(ctx->blob, lc->value, 1366bf215546Sopenharmony_ci sizeof(*lc->value) * lc->def.num_components); 1367bf215546Sopenharmony_ci break; 1368bf215546Sopenharmony_ci 1369bf215546Sopenharmony_ci case 32: 1370bf215546Sopenharmony_ci for (unsigned i = 0; i < lc->def.num_components; i++) 1371bf215546Sopenharmony_ci blob_write_uint32(ctx->blob, lc->value[i].u32); 1372bf215546Sopenharmony_ci break; 1373bf215546Sopenharmony_ci 1374bf215546Sopenharmony_ci case 16: 1375bf215546Sopenharmony_ci for (unsigned i = 0; i < lc->def.num_components; i++) 1376bf215546Sopenharmony_ci blob_write_uint16(ctx->blob, lc->value[i].u16); 1377bf215546Sopenharmony_ci break; 1378bf215546Sopenharmony_ci 1379bf215546Sopenharmony_ci default: 1380bf215546Sopenharmony_ci assert(lc->def.bit_size <= 8); 1381bf215546Sopenharmony_ci for (unsigned i = 0; i < lc->def.num_components; i++) 1382bf215546Sopenharmony_ci blob_write_uint8(ctx->blob, lc->value[i].u8); 1383bf215546Sopenharmony_ci break; 1384bf215546Sopenharmony_ci } 1385bf215546Sopenharmony_ci } 1386bf215546Sopenharmony_ci 1387bf215546Sopenharmony_ci write_add_object(ctx, &lc->def); 1388bf215546Sopenharmony_ci} 1389bf215546Sopenharmony_ci 1390bf215546Sopenharmony_cistatic nir_load_const_instr * 1391bf215546Sopenharmony_ciread_load_const(read_ctx *ctx, union packed_instr header) 1392bf215546Sopenharmony_ci{ 1393bf215546Sopenharmony_ci nir_load_const_instr *lc = 1394bf215546Sopenharmony_ci nir_load_const_instr_create(ctx->nir, header.load_const.last_component + 1, 1395bf215546Sopenharmony_ci decode_bit_size_3bits(header.load_const.bit_size)); 1396bf215546Sopenharmony_ci lc->def.divergent = false; 1397bf215546Sopenharmony_ci 1398bf215546Sopenharmony_ci switch (header.load_const.packing) { 1399bf215546Sopenharmony_ci case load_const_scalar_hi_19bits: 1400bf215546Sopenharmony_ci switch (lc->def.bit_size) { 1401bf215546Sopenharmony_ci case 64: 1402bf215546Sopenharmony_ci lc->value[0].u64 = (uint64_t)header.load_const.packed_value << 45; 1403bf215546Sopenharmony_ci break; 1404bf215546Sopenharmony_ci case 32: 1405bf215546Sopenharmony_ci lc->value[0].u32 = (uint64_t)header.load_const.packed_value << 13; 1406bf215546Sopenharmony_ci break; 1407bf215546Sopenharmony_ci default: 1408bf215546Sopenharmony_ci unreachable("invalid bit_size"); 1409bf215546Sopenharmony_ci } 1410bf215546Sopenharmony_ci break; 1411bf215546Sopenharmony_ci 1412bf215546Sopenharmony_ci case load_const_scalar_lo_19bits_sext: 1413bf215546Sopenharmony_ci switch (lc->def.bit_size) { 1414bf215546Sopenharmony_ci case 64: 1415bf215546Sopenharmony_ci lc->value[0].i64 = ((int64_t)header.load_const.packed_value << 45) >> 45; 1416bf215546Sopenharmony_ci break; 1417bf215546Sopenharmony_ci case 32: 1418bf215546Sopenharmony_ci lc->value[0].i32 = ((int32_t)header.load_const.packed_value << 13) >> 13; 1419bf215546Sopenharmony_ci break; 1420bf215546Sopenharmony_ci case 16: 1421bf215546Sopenharmony_ci lc->value[0].u16 = header.load_const.packed_value; 1422bf215546Sopenharmony_ci break; 1423bf215546Sopenharmony_ci case 8: 1424bf215546Sopenharmony_ci lc->value[0].u8 = header.load_const.packed_value; 1425bf215546Sopenharmony_ci break; 1426bf215546Sopenharmony_ci case 1: 1427bf215546Sopenharmony_ci lc->value[0].b = header.load_const.packed_value; 1428bf215546Sopenharmony_ci break; 1429bf215546Sopenharmony_ci default: 1430bf215546Sopenharmony_ci unreachable("invalid bit_size"); 1431bf215546Sopenharmony_ci } 1432bf215546Sopenharmony_ci break; 1433bf215546Sopenharmony_ci 1434bf215546Sopenharmony_ci case load_const_full: 1435bf215546Sopenharmony_ci switch (lc->def.bit_size) { 1436bf215546Sopenharmony_ci case 64: 1437bf215546Sopenharmony_ci blob_copy_bytes(ctx->blob, lc->value, sizeof(*lc->value) * lc->def.num_components); 1438bf215546Sopenharmony_ci break; 1439bf215546Sopenharmony_ci 1440bf215546Sopenharmony_ci case 32: 1441bf215546Sopenharmony_ci for (unsigned i = 0; i < lc->def.num_components; i++) 1442bf215546Sopenharmony_ci lc->value[i].u32 = blob_read_uint32(ctx->blob); 1443bf215546Sopenharmony_ci break; 1444bf215546Sopenharmony_ci 1445bf215546Sopenharmony_ci case 16: 1446bf215546Sopenharmony_ci for (unsigned i = 0; i < lc->def.num_components; i++) 1447bf215546Sopenharmony_ci lc->value[i].u16 = blob_read_uint16(ctx->blob); 1448bf215546Sopenharmony_ci break; 1449bf215546Sopenharmony_ci 1450bf215546Sopenharmony_ci default: 1451bf215546Sopenharmony_ci assert(lc->def.bit_size <= 8); 1452bf215546Sopenharmony_ci for (unsigned i = 0; i < lc->def.num_components; i++) 1453bf215546Sopenharmony_ci lc->value[i].u8 = blob_read_uint8(ctx->blob); 1454bf215546Sopenharmony_ci break; 1455bf215546Sopenharmony_ci } 1456bf215546Sopenharmony_ci break; 1457bf215546Sopenharmony_ci } 1458bf215546Sopenharmony_ci 1459bf215546Sopenharmony_ci read_add_object(ctx, &lc->def); 1460bf215546Sopenharmony_ci return lc; 1461bf215546Sopenharmony_ci} 1462bf215546Sopenharmony_ci 1463bf215546Sopenharmony_cistatic void 1464bf215546Sopenharmony_ciwrite_ssa_undef(write_ctx *ctx, const nir_ssa_undef_instr *undef) 1465bf215546Sopenharmony_ci{ 1466bf215546Sopenharmony_ci assert(undef->def.num_components >= 1 && undef->def.num_components <= 16); 1467bf215546Sopenharmony_ci 1468bf215546Sopenharmony_ci union packed_instr header; 1469bf215546Sopenharmony_ci header.u32 = 0; 1470bf215546Sopenharmony_ci 1471bf215546Sopenharmony_ci header.undef.instr_type = undef->instr.type; 1472bf215546Sopenharmony_ci header.undef.last_component = undef->def.num_components - 1; 1473bf215546Sopenharmony_ci header.undef.bit_size = encode_bit_size_3bits(undef->def.bit_size); 1474bf215546Sopenharmony_ci 1475bf215546Sopenharmony_ci blob_write_uint32(ctx->blob, header.u32); 1476bf215546Sopenharmony_ci write_add_object(ctx, &undef->def); 1477bf215546Sopenharmony_ci} 1478bf215546Sopenharmony_ci 1479bf215546Sopenharmony_cistatic nir_ssa_undef_instr * 1480bf215546Sopenharmony_ciread_ssa_undef(read_ctx *ctx, union packed_instr header) 1481bf215546Sopenharmony_ci{ 1482bf215546Sopenharmony_ci nir_ssa_undef_instr *undef = 1483bf215546Sopenharmony_ci nir_ssa_undef_instr_create(ctx->nir, header.undef.last_component + 1, 1484bf215546Sopenharmony_ci decode_bit_size_3bits(header.undef.bit_size)); 1485bf215546Sopenharmony_ci 1486bf215546Sopenharmony_ci undef->def.divergent = false; 1487bf215546Sopenharmony_ci 1488bf215546Sopenharmony_ci read_add_object(ctx, &undef->def); 1489bf215546Sopenharmony_ci return undef; 1490bf215546Sopenharmony_ci} 1491bf215546Sopenharmony_ci 1492bf215546Sopenharmony_ciunion packed_tex_data { 1493bf215546Sopenharmony_ci uint32_t u32; 1494bf215546Sopenharmony_ci struct { 1495bf215546Sopenharmony_ci unsigned sampler_dim:4; 1496bf215546Sopenharmony_ci unsigned dest_type:8; 1497bf215546Sopenharmony_ci unsigned coord_components:3; 1498bf215546Sopenharmony_ci unsigned is_array:1; 1499bf215546Sopenharmony_ci unsigned is_shadow:1; 1500bf215546Sopenharmony_ci unsigned is_new_style_shadow:1; 1501bf215546Sopenharmony_ci unsigned is_sparse:1; 1502bf215546Sopenharmony_ci unsigned component:2; 1503bf215546Sopenharmony_ci unsigned texture_non_uniform:1; 1504bf215546Sopenharmony_ci unsigned sampler_non_uniform:1; 1505bf215546Sopenharmony_ci unsigned array_is_lowered_cube:1; 1506bf215546Sopenharmony_ci unsigned unused:6; /* Mark unused for valgrind. */ 1507bf215546Sopenharmony_ci } u; 1508bf215546Sopenharmony_ci}; 1509bf215546Sopenharmony_ci 1510bf215546Sopenharmony_cistatic void 1511bf215546Sopenharmony_ciwrite_tex(write_ctx *ctx, const nir_tex_instr *tex) 1512bf215546Sopenharmony_ci{ 1513bf215546Sopenharmony_ci assert(tex->num_srcs < 16); 1514bf215546Sopenharmony_ci assert(tex->op < 32); 1515bf215546Sopenharmony_ci 1516bf215546Sopenharmony_ci union packed_instr header; 1517bf215546Sopenharmony_ci header.u32 = 0; 1518bf215546Sopenharmony_ci 1519bf215546Sopenharmony_ci header.tex.instr_type = tex->instr.type; 1520bf215546Sopenharmony_ci header.tex.num_srcs = tex->num_srcs; 1521bf215546Sopenharmony_ci header.tex.op = tex->op; 1522bf215546Sopenharmony_ci 1523bf215546Sopenharmony_ci write_dest(ctx, &tex->dest, header, tex->instr.type); 1524bf215546Sopenharmony_ci 1525bf215546Sopenharmony_ci blob_write_uint32(ctx->blob, tex->texture_index); 1526bf215546Sopenharmony_ci blob_write_uint32(ctx->blob, tex->sampler_index); 1527bf215546Sopenharmony_ci if (tex->op == nir_texop_tg4) 1528bf215546Sopenharmony_ci blob_write_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets)); 1529bf215546Sopenharmony_ci 1530bf215546Sopenharmony_ci STATIC_ASSERT(sizeof(union packed_tex_data) == sizeof(uint32_t)); 1531bf215546Sopenharmony_ci union packed_tex_data packed = { 1532bf215546Sopenharmony_ci .u.sampler_dim = tex->sampler_dim, 1533bf215546Sopenharmony_ci .u.dest_type = tex->dest_type, 1534bf215546Sopenharmony_ci .u.coord_components = tex->coord_components, 1535bf215546Sopenharmony_ci .u.is_array = tex->is_array, 1536bf215546Sopenharmony_ci .u.is_shadow = tex->is_shadow, 1537bf215546Sopenharmony_ci .u.is_new_style_shadow = tex->is_new_style_shadow, 1538bf215546Sopenharmony_ci .u.is_sparse = tex->is_sparse, 1539bf215546Sopenharmony_ci .u.component = tex->component, 1540bf215546Sopenharmony_ci .u.texture_non_uniform = tex->texture_non_uniform, 1541bf215546Sopenharmony_ci .u.sampler_non_uniform = tex->sampler_non_uniform, 1542bf215546Sopenharmony_ci .u.array_is_lowered_cube = tex->array_is_lowered_cube, 1543bf215546Sopenharmony_ci }; 1544bf215546Sopenharmony_ci blob_write_uint32(ctx->blob, packed.u32); 1545bf215546Sopenharmony_ci 1546bf215546Sopenharmony_ci for (unsigned i = 0; i < tex->num_srcs; i++) { 1547bf215546Sopenharmony_ci union packed_src src; 1548bf215546Sopenharmony_ci src.u32 = 0; 1549bf215546Sopenharmony_ci src.tex.src_type = tex->src[i].src_type; 1550bf215546Sopenharmony_ci write_src_full(ctx, &tex->src[i].src, src); 1551bf215546Sopenharmony_ci } 1552bf215546Sopenharmony_ci} 1553bf215546Sopenharmony_ci 1554bf215546Sopenharmony_cistatic nir_tex_instr * 1555bf215546Sopenharmony_ciread_tex(read_ctx *ctx, union packed_instr header) 1556bf215546Sopenharmony_ci{ 1557bf215546Sopenharmony_ci nir_tex_instr *tex = nir_tex_instr_create(ctx->nir, header.tex.num_srcs); 1558bf215546Sopenharmony_ci 1559bf215546Sopenharmony_ci read_dest(ctx, &tex->dest, &tex->instr, header); 1560bf215546Sopenharmony_ci 1561bf215546Sopenharmony_ci tex->op = header.tex.op; 1562bf215546Sopenharmony_ci tex->texture_index = blob_read_uint32(ctx->blob); 1563bf215546Sopenharmony_ci tex->sampler_index = blob_read_uint32(ctx->blob); 1564bf215546Sopenharmony_ci if (tex->op == nir_texop_tg4) 1565bf215546Sopenharmony_ci blob_copy_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets)); 1566bf215546Sopenharmony_ci 1567bf215546Sopenharmony_ci union packed_tex_data packed; 1568bf215546Sopenharmony_ci packed.u32 = blob_read_uint32(ctx->blob); 1569bf215546Sopenharmony_ci tex->sampler_dim = packed.u.sampler_dim; 1570bf215546Sopenharmony_ci tex->dest_type = packed.u.dest_type; 1571bf215546Sopenharmony_ci tex->coord_components = packed.u.coord_components; 1572bf215546Sopenharmony_ci tex->is_array = packed.u.is_array; 1573bf215546Sopenharmony_ci tex->is_shadow = packed.u.is_shadow; 1574bf215546Sopenharmony_ci tex->is_new_style_shadow = packed.u.is_new_style_shadow; 1575bf215546Sopenharmony_ci tex->is_sparse = packed.u.is_sparse; 1576bf215546Sopenharmony_ci tex->component = packed.u.component; 1577bf215546Sopenharmony_ci tex->texture_non_uniform = packed.u.texture_non_uniform; 1578bf215546Sopenharmony_ci tex->sampler_non_uniform = packed.u.sampler_non_uniform; 1579bf215546Sopenharmony_ci tex->array_is_lowered_cube = packed.u.array_is_lowered_cube; 1580bf215546Sopenharmony_ci 1581bf215546Sopenharmony_ci for (unsigned i = 0; i < tex->num_srcs; i++) { 1582bf215546Sopenharmony_ci union packed_src src = read_src(ctx, &tex->src[i].src, &tex->instr); 1583bf215546Sopenharmony_ci tex->src[i].src_type = src.tex.src_type; 1584bf215546Sopenharmony_ci } 1585bf215546Sopenharmony_ci 1586bf215546Sopenharmony_ci return tex; 1587bf215546Sopenharmony_ci} 1588bf215546Sopenharmony_ci 1589bf215546Sopenharmony_cistatic void 1590bf215546Sopenharmony_ciwrite_phi(write_ctx *ctx, const nir_phi_instr *phi) 1591bf215546Sopenharmony_ci{ 1592bf215546Sopenharmony_ci union packed_instr header; 1593bf215546Sopenharmony_ci header.u32 = 0; 1594bf215546Sopenharmony_ci 1595bf215546Sopenharmony_ci header.phi.instr_type = phi->instr.type; 1596bf215546Sopenharmony_ci header.phi.num_srcs = exec_list_length(&phi->srcs); 1597bf215546Sopenharmony_ci 1598bf215546Sopenharmony_ci /* Phi nodes are special, since they may reference SSA definitions and 1599bf215546Sopenharmony_ci * basic blocks that don't exist yet. We leave two empty uint32_t's here, 1600bf215546Sopenharmony_ci * and then store enough information so that a later fixup pass can fill 1601bf215546Sopenharmony_ci * them in correctly. 1602bf215546Sopenharmony_ci */ 1603bf215546Sopenharmony_ci write_dest(ctx, &phi->dest, header, phi->instr.type); 1604bf215546Sopenharmony_ci 1605bf215546Sopenharmony_ci nir_foreach_phi_src(src, phi) { 1606bf215546Sopenharmony_ci assert(src->src.is_ssa); 1607bf215546Sopenharmony_ci size_t blob_offset = blob_reserve_uint32(ctx->blob); 1608bf215546Sopenharmony_ci ASSERTED size_t blob_offset2 = blob_reserve_uint32(ctx->blob); 1609bf215546Sopenharmony_ci assert(blob_offset + sizeof(uint32_t) == blob_offset2); 1610bf215546Sopenharmony_ci write_phi_fixup fixup = { 1611bf215546Sopenharmony_ci .blob_offset = blob_offset, 1612bf215546Sopenharmony_ci .src = src->src.ssa, 1613bf215546Sopenharmony_ci .block = src->pred, 1614bf215546Sopenharmony_ci }; 1615bf215546Sopenharmony_ci util_dynarray_append(&ctx->phi_fixups, write_phi_fixup, fixup); 1616bf215546Sopenharmony_ci } 1617bf215546Sopenharmony_ci} 1618bf215546Sopenharmony_ci 1619bf215546Sopenharmony_cistatic void 1620bf215546Sopenharmony_ciwrite_fixup_phis(write_ctx *ctx) 1621bf215546Sopenharmony_ci{ 1622bf215546Sopenharmony_ci util_dynarray_foreach(&ctx->phi_fixups, write_phi_fixup, fixup) { 1623bf215546Sopenharmony_ci blob_overwrite_uint32(ctx->blob, fixup->blob_offset, 1624bf215546Sopenharmony_ci write_lookup_object(ctx, fixup->src)); 1625bf215546Sopenharmony_ci blob_overwrite_uint32(ctx->blob, fixup->blob_offset + sizeof(uint32_t), 1626bf215546Sopenharmony_ci write_lookup_object(ctx, fixup->block)); 1627bf215546Sopenharmony_ci } 1628bf215546Sopenharmony_ci 1629bf215546Sopenharmony_ci util_dynarray_clear(&ctx->phi_fixups); 1630bf215546Sopenharmony_ci} 1631bf215546Sopenharmony_ci 1632bf215546Sopenharmony_cistatic nir_phi_instr * 1633bf215546Sopenharmony_ciread_phi(read_ctx *ctx, nir_block *blk, union packed_instr header) 1634bf215546Sopenharmony_ci{ 1635bf215546Sopenharmony_ci nir_phi_instr *phi = nir_phi_instr_create(ctx->nir); 1636bf215546Sopenharmony_ci 1637bf215546Sopenharmony_ci read_dest(ctx, &phi->dest, &phi->instr, header); 1638bf215546Sopenharmony_ci 1639bf215546Sopenharmony_ci /* For similar reasons as before, we just store the index directly into the 1640bf215546Sopenharmony_ci * pointer, and let a later pass resolve the phi sources. 1641bf215546Sopenharmony_ci * 1642bf215546Sopenharmony_ci * In order to ensure that the copied sources (which are just the indices 1643bf215546Sopenharmony_ci * from the blob for now) don't get inserted into the old shader's use-def 1644bf215546Sopenharmony_ci * lists, we have to add the phi instruction *before* we set up its 1645bf215546Sopenharmony_ci * sources. 1646bf215546Sopenharmony_ci */ 1647bf215546Sopenharmony_ci nir_instr_insert_after_block(blk, &phi->instr); 1648bf215546Sopenharmony_ci 1649bf215546Sopenharmony_ci for (unsigned i = 0; i < header.phi.num_srcs; i++) { 1650bf215546Sopenharmony_ci nir_ssa_def *def = (nir_ssa_def *)(uintptr_t) blob_read_uint32(ctx->blob); 1651bf215546Sopenharmony_ci nir_block *pred = (nir_block *)(uintptr_t) blob_read_uint32(ctx->blob); 1652bf215546Sopenharmony_ci nir_phi_src *src = nir_phi_instr_add_src(phi, pred, nir_src_for_ssa(def)); 1653bf215546Sopenharmony_ci 1654bf215546Sopenharmony_ci /* Since we're not letting nir_insert_instr handle use/def stuff for us, 1655bf215546Sopenharmony_ci * we have to set the parent_instr manually. It doesn't really matter 1656bf215546Sopenharmony_ci * when we do it, so we might as well do it here. 1657bf215546Sopenharmony_ci */ 1658bf215546Sopenharmony_ci src->src.parent_instr = &phi->instr; 1659bf215546Sopenharmony_ci 1660bf215546Sopenharmony_ci /* Stash it in the list of phi sources. We'll walk this list and fix up 1661bf215546Sopenharmony_ci * sources at the very end of read_function_impl. 1662bf215546Sopenharmony_ci */ 1663bf215546Sopenharmony_ci list_add(&src->src.use_link, &ctx->phi_srcs); 1664bf215546Sopenharmony_ci } 1665bf215546Sopenharmony_ci 1666bf215546Sopenharmony_ci return phi; 1667bf215546Sopenharmony_ci} 1668bf215546Sopenharmony_ci 1669bf215546Sopenharmony_cistatic void 1670bf215546Sopenharmony_ciread_fixup_phis(read_ctx *ctx) 1671bf215546Sopenharmony_ci{ 1672bf215546Sopenharmony_ci list_for_each_entry_safe(nir_phi_src, src, &ctx->phi_srcs, src.use_link) { 1673bf215546Sopenharmony_ci src->pred = read_lookup_object(ctx, (uintptr_t)src->pred); 1674bf215546Sopenharmony_ci src->src.ssa = read_lookup_object(ctx, (uintptr_t)src->src.ssa); 1675bf215546Sopenharmony_ci 1676bf215546Sopenharmony_ci /* Remove from this list */ 1677bf215546Sopenharmony_ci list_del(&src->src.use_link); 1678bf215546Sopenharmony_ci 1679bf215546Sopenharmony_ci list_addtail(&src->src.use_link, &src->src.ssa->uses); 1680bf215546Sopenharmony_ci } 1681bf215546Sopenharmony_ci assert(list_is_empty(&ctx->phi_srcs)); 1682bf215546Sopenharmony_ci} 1683bf215546Sopenharmony_ci 1684bf215546Sopenharmony_cistatic void 1685bf215546Sopenharmony_ciwrite_jump(write_ctx *ctx, const nir_jump_instr *jmp) 1686bf215546Sopenharmony_ci{ 1687bf215546Sopenharmony_ci /* These aren't handled because they require special block linking */ 1688bf215546Sopenharmony_ci assert(jmp->type != nir_jump_goto && jmp->type != nir_jump_goto_if); 1689bf215546Sopenharmony_ci 1690bf215546Sopenharmony_ci assert(jmp->type < 4); 1691bf215546Sopenharmony_ci 1692bf215546Sopenharmony_ci union packed_instr header; 1693bf215546Sopenharmony_ci header.u32 = 0; 1694bf215546Sopenharmony_ci 1695bf215546Sopenharmony_ci header.jump.instr_type = jmp->instr.type; 1696bf215546Sopenharmony_ci header.jump.type = jmp->type; 1697bf215546Sopenharmony_ci 1698bf215546Sopenharmony_ci blob_write_uint32(ctx->blob, header.u32); 1699bf215546Sopenharmony_ci} 1700bf215546Sopenharmony_ci 1701bf215546Sopenharmony_cistatic nir_jump_instr * 1702bf215546Sopenharmony_ciread_jump(read_ctx *ctx, union packed_instr header) 1703bf215546Sopenharmony_ci{ 1704bf215546Sopenharmony_ci /* These aren't handled because they require special block linking */ 1705bf215546Sopenharmony_ci assert(header.jump.type != nir_jump_goto && 1706bf215546Sopenharmony_ci header.jump.type != nir_jump_goto_if); 1707bf215546Sopenharmony_ci 1708bf215546Sopenharmony_ci nir_jump_instr *jmp = nir_jump_instr_create(ctx->nir, header.jump.type); 1709bf215546Sopenharmony_ci return jmp; 1710bf215546Sopenharmony_ci} 1711bf215546Sopenharmony_ci 1712bf215546Sopenharmony_cistatic void 1713bf215546Sopenharmony_ciwrite_call(write_ctx *ctx, const nir_call_instr *call) 1714bf215546Sopenharmony_ci{ 1715bf215546Sopenharmony_ci blob_write_uint32(ctx->blob, write_lookup_object(ctx, call->callee)); 1716bf215546Sopenharmony_ci 1717bf215546Sopenharmony_ci for (unsigned i = 0; i < call->num_params; i++) 1718bf215546Sopenharmony_ci write_src(ctx, &call->params[i]); 1719bf215546Sopenharmony_ci} 1720bf215546Sopenharmony_ci 1721bf215546Sopenharmony_cistatic nir_call_instr * 1722bf215546Sopenharmony_ciread_call(read_ctx *ctx) 1723bf215546Sopenharmony_ci{ 1724bf215546Sopenharmony_ci nir_function *callee = read_object(ctx); 1725bf215546Sopenharmony_ci nir_call_instr *call = nir_call_instr_create(ctx->nir, callee); 1726bf215546Sopenharmony_ci 1727bf215546Sopenharmony_ci for (unsigned i = 0; i < call->num_params; i++) 1728bf215546Sopenharmony_ci read_src(ctx, &call->params[i], call); 1729bf215546Sopenharmony_ci 1730bf215546Sopenharmony_ci return call; 1731bf215546Sopenharmony_ci} 1732bf215546Sopenharmony_ci 1733bf215546Sopenharmony_cistatic void 1734bf215546Sopenharmony_ciwrite_instr(write_ctx *ctx, const nir_instr *instr) 1735bf215546Sopenharmony_ci{ 1736bf215546Sopenharmony_ci /* We have only 4 bits for the instruction type. */ 1737bf215546Sopenharmony_ci assert(instr->type < 16); 1738bf215546Sopenharmony_ci 1739bf215546Sopenharmony_ci switch (instr->type) { 1740bf215546Sopenharmony_ci case nir_instr_type_alu: 1741bf215546Sopenharmony_ci write_alu(ctx, nir_instr_as_alu(instr)); 1742bf215546Sopenharmony_ci break; 1743bf215546Sopenharmony_ci case nir_instr_type_deref: 1744bf215546Sopenharmony_ci write_deref(ctx, nir_instr_as_deref(instr)); 1745bf215546Sopenharmony_ci break; 1746bf215546Sopenharmony_ci case nir_instr_type_intrinsic: 1747bf215546Sopenharmony_ci write_intrinsic(ctx, nir_instr_as_intrinsic(instr)); 1748bf215546Sopenharmony_ci break; 1749bf215546Sopenharmony_ci case nir_instr_type_load_const: 1750bf215546Sopenharmony_ci write_load_const(ctx, nir_instr_as_load_const(instr)); 1751bf215546Sopenharmony_ci break; 1752bf215546Sopenharmony_ci case nir_instr_type_ssa_undef: 1753bf215546Sopenharmony_ci write_ssa_undef(ctx, nir_instr_as_ssa_undef(instr)); 1754bf215546Sopenharmony_ci break; 1755bf215546Sopenharmony_ci case nir_instr_type_tex: 1756bf215546Sopenharmony_ci write_tex(ctx, nir_instr_as_tex(instr)); 1757bf215546Sopenharmony_ci break; 1758bf215546Sopenharmony_ci case nir_instr_type_phi: 1759bf215546Sopenharmony_ci write_phi(ctx, nir_instr_as_phi(instr)); 1760bf215546Sopenharmony_ci break; 1761bf215546Sopenharmony_ci case nir_instr_type_jump: 1762bf215546Sopenharmony_ci write_jump(ctx, nir_instr_as_jump(instr)); 1763bf215546Sopenharmony_ci break; 1764bf215546Sopenharmony_ci case nir_instr_type_call: 1765bf215546Sopenharmony_ci blob_write_uint32(ctx->blob, instr->type); 1766bf215546Sopenharmony_ci write_call(ctx, nir_instr_as_call(instr)); 1767bf215546Sopenharmony_ci break; 1768bf215546Sopenharmony_ci case nir_instr_type_parallel_copy: 1769bf215546Sopenharmony_ci unreachable("Cannot write parallel copies"); 1770bf215546Sopenharmony_ci default: 1771bf215546Sopenharmony_ci unreachable("bad instr type"); 1772bf215546Sopenharmony_ci } 1773bf215546Sopenharmony_ci} 1774bf215546Sopenharmony_ci 1775bf215546Sopenharmony_ci/* Return the number of instructions read. */ 1776bf215546Sopenharmony_cistatic unsigned 1777bf215546Sopenharmony_ciread_instr(read_ctx *ctx, nir_block *block) 1778bf215546Sopenharmony_ci{ 1779bf215546Sopenharmony_ci STATIC_ASSERT(sizeof(union packed_instr) == 4); 1780bf215546Sopenharmony_ci union packed_instr header; 1781bf215546Sopenharmony_ci header.u32 = blob_read_uint32(ctx->blob); 1782bf215546Sopenharmony_ci nir_instr *instr; 1783bf215546Sopenharmony_ci 1784bf215546Sopenharmony_ci switch (header.any.instr_type) { 1785bf215546Sopenharmony_ci case nir_instr_type_alu: 1786bf215546Sopenharmony_ci for (unsigned i = 0; i <= header.alu.num_followup_alu_sharing_header; i++) 1787bf215546Sopenharmony_ci nir_instr_insert_after_block(block, &read_alu(ctx, header)->instr); 1788bf215546Sopenharmony_ci return header.alu.num_followup_alu_sharing_header + 1; 1789bf215546Sopenharmony_ci case nir_instr_type_deref: 1790bf215546Sopenharmony_ci instr = &read_deref(ctx, header)->instr; 1791bf215546Sopenharmony_ci break; 1792bf215546Sopenharmony_ci case nir_instr_type_intrinsic: 1793bf215546Sopenharmony_ci instr = &read_intrinsic(ctx, header)->instr; 1794bf215546Sopenharmony_ci break; 1795bf215546Sopenharmony_ci case nir_instr_type_load_const: 1796bf215546Sopenharmony_ci instr = &read_load_const(ctx, header)->instr; 1797bf215546Sopenharmony_ci break; 1798bf215546Sopenharmony_ci case nir_instr_type_ssa_undef: 1799bf215546Sopenharmony_ci instr = &read_ssa_undef(ctx, header)->instr; 1800bf215546Sopenharmony_ci break; 1801bf215546Sopenharmony_ci case nir_instr_type_tex: 1802bf215546Sopenharmony_ci instr = &read_tex(ctx, header)->instr; 1803bf215546Sopenharmony_ci break; 1804bf215546Sopenharmony_ci case nir_instr_type_phi: 1805bf215546Sopenharmony_ci /* Phi instructions are a bit of a special case when reading because we 1806bf215546Sopenharmony_ci * don't want inserting the instruction to automatically handle use/defs 1807bf215546Sopenharmony_ci * for us. Instead, we need to wait until all the blocks/instructions 1808bf215546Sopenharmony_ci * are read so that we can set their sources up. 1809bf215546Sopenharmony_ci */ 1810bf215546Sopenharmony_ci read_phi(ctx, block, header); 1811bf215546Sopenharmony_ci return 1; 1812bf215546Sopenharmony_ci case nir_instr_type_jump: 1813bf215546Sopenharmony_ci instr = &read_jump(ctx, header)->instr; 1814bf215546Sopenharmony_ci break; 1815bf215546Sopenharmony_ci case nir_instr_type_call: 1816bf215546Sopenharmony_ci instr = &read_call(ctx)->instr; 1817bf215546Sopenharmony_ci break; 1818bf215546Sopenharmony_ci case nir_instr_type_parallel_copy: 1819bf215546Sopenharmony_ci unreachable("Cannot read parallel copies"); 1820bf215546Sopenharmony_ci default: 1821bf215546Sopenharmony_ci unreachable("bad instr type"); 1822bf215546Sopenharmony_ci } 1823bf215546Sopenharmony_ci 1824bf215546Sopenharmony_ci nir_instr_insert_after_block(block, instr); 1825bf215546Sopenharmony_ci return 1; 1826bf215546Sopenharmony_ci} 1827bf215546Sopenharmony_ci 1828bf215546Sopenharmony_cistatic void 1829bf215546Sopenharmony_ciwrite_block(write_ctx *ctx, const nir_block *block) 1830bf215546Sopenharmony_ci{ 1831bf215546Sopenharmony_ci write_add_object(ctx, block); 1832bf215546Sopenharmony_ci blob_write_uint32(ctx->blob, exec_list_length(&block->instr_list)); 1833bf215546Sopenharmony_ci 1834bf215546Sopenharmony_ci ctx->last_instr_type = ~0; 1835bf215546Sopenharmony_ci ctx->last_alu_header_offset = 0; 1836bf215546Sopenharmony_ci 1837bf215546Sopenharmony_ci nir_foreach_instr(instr, block) { 1838bf215546Sopenharmony_ci write_instr(ctx, instr); 1839bf215546Sopenharmony_ci ctx->last_instr_type = instr->type; 1840bf215546Sopenharmony_ci } 1841bf215546Sopenharmony_ci} 1842bf215546Sopenharmony_ci 1843bf215546Sopenharmony_cistatic void 1844bf215546Sopenharmony_ciread_block(read_ctx *ctx, struct exec_list *cf_list) 1845bf215546Sopenharmony_ci{ 1846bf215546Sopenharmony_ci /* Don't actually create a new block. Just use the one from the tail of 1847bf215546Sopenharmony_ci * the list. NIR guarantees that the tail of the list is a block and that 1848bf215546Sopenharmony_ci * no two blocks are side-by-side in the IR; It should be empty. 1849bf215546Sopenharmony_ci */ 1850bf215546Sopenharmony_ci nir_block *block = 1851bf215546Sopenharmony_ci exec_node_data(nir_block, exec_list_get_tail(cf_list), cf_node.node); 1852bf215546Sopenharmony_ci 1853bf215546Sopenharmony_ci read_add_object(ctx, block); 1854bf215546Sopenharmony_ci unsigned num_instrs = blob_read_uint32(ctx->blob); 1855bf215546Sopenharmony_ci for (unsigned i = 0; i < num_instrs;) { 1856bf215546Sopenharmony_ci i += read_instr(ctx, block); 1857bf215546Sopenharmony_ci } 1858bf215546Sopenharmony_ci} 1859bf215546Sopenharmony_ci 1860bf215546Sopenharmony_cistatic void 1861bf215546Sopenharmony_ciwrite_cf_list(write_ctx *ctx, const struct exec_list *cf_list); 1862bf215546Sopenharmony_ci 1863bf215546Sopenharmony_cistatic void 1864bf215546Sopenharmony_ciread_cf_list(read_ctx *ctx, struct exec_list *cf_list); 1865bf215546Sopenharmony_ci 1866bf215546Sopenharmony_cistatic void 1867bf215546Sopenharmony_ciwrite_if(write_ctx *ctx, nir_if *nif) 1868bf215546Sopenharmony_ci{ 1869bf215546Sopenharmony_ci write_src(ctx, &nif->condition); 1870bf215546Sopenharmony_ci blob_write_uint8(ctx->blob, nif->control); 1871bf215546Sopenharmony_ci 1872bf215546Sopenharmony_ci write_cf_list(ctx, &nif->then_list); 1873bf215546Sopenharmony_ci write_cf_list(ctx, &nif->else_list); 1874bf215546Sopenharmony_ci} 1875bf215546Sopenharmony_ci 1876bf215546Sopenharmony_cistatic void 1877bf215546Sopenharmony_ciread_if(read_ctx *ctx, struct exec_list *cf_list) 1878bf215546Sopenharmony_ci{ 1879bf215546Sopenharmony_ci nir_if *nif = nir_if_create(ctx->nir); 1880bf215546Sopenharmony_ci 1881bf215546Sopenharmony_ci read_src(ctx, &nif->condition, nif); 1882bf215546Sopenharmony_ci nif->control = blob_read_uint8(ctx->blob); 1883bf215546Sopenharmony_ci 1884bf215546Sopenharmony_ci nir_cf_node_insert_end(cf_list, &nif->cf_node); 1885bf215546Sopenharmony_ci 1886bf215546Sopenharmony_ci read_cf_list(ctx, &nif->then_list); 1887bf215546Sopenharmony_ci read_cf_list(ctx, &nif->else_list); 1888bf215546Sopenharmony_ci} 1889bf215546Sopenharmony_ci 1890bf215546Sopenharmony_cistatic void 1891bf215546Sopenharmony_ciwrite_loop(write_ctx *ctx, nir_loop *loop) 1892bf215546Sopenharmony_ci{ 1893bf215546Sopenharmony_ci blob_write_uint8(ctx->blob, loop->control); 1894bf215546Sopenharmony_ci blob_write_uint8(ctx->blob, loop->divergent); 1895bf215546Sopenharmony_ci write_cf_list(ctx, &loop->body); 1896bf215546Sopenharmony_ci} 1897bf215546Sopenharmony_ci 1898bf215546Sopenharmony_cistatic void 1899bf215546Sopenharmony_ciread_loop(read_ctx *ctx, struct exec_list *cf_list) 1900bf215546Sopenharmony_ci{ 1901bf215546Sopenharmony_ci nir_loop *loop = nir_loop_create(ctx->nir); 1902bf215546Sopenharmony_ci 1903bf215546Sopenharmony_ci nir_cf_node_insert_end(cf_list, &loop->cf_node); 1904bf215546Sopenharmony_ci 1905bf215546Sopenharmony_ci loop->control = blob_read_uint8(ctx->blob); 1906bf215546Sopenharmony_ci loop->divergent = blob_read_uint8(ctx->blob); 1907bf215546Sopenharmony_ci read_cf_list(ctx, &loop->body); 1908bf215546Sopenharmony_ci} 1909bf215546Sopenharmony_ci 1910bf215546Sopenharmony_cistatic void 1911bf215546Sopenharmony_ciwrite_cf_node(write_ctx *ctx, nir_cf_node *cf) 1912bf215546Sopenharmony_ci{ 1913bf215546Sopenharmony_ci blob_write_uint32(ctx->blob, cf->type); 1914bf215546Sopenharmony_ci 1915bf215546Sopenharmony_ci switch (cf->type) { 1916bf215546Sopenharmony_ci case nir_cf_node_block: 1917bf215546Sopenharmony_ci write_block(ctx, nir_cf_node_as_block(cf)); 1918bf215546Sopenharmony_ci break; 1919bf215546Sopenharmony_ci case nir_cf_node_if: 1920bf215546Sopenharmony_ci write_if(ctx, nir_cf_node_as_if(cf)); 1921bf215546Sopenharmony_ci break; 1922bf215546Sopenharmony_ci case nir_cf_node_loop: 1923bf215546Sopenharmony_ci write_loop(ctx, nir_cf_node_as_loop(cf)); 1924bf215546Sopenharmony_ci break; 1925bf215546Sopenharmony_ci default: 1926bf215546Sopenharmony_ci unreachable("bad cf type"); 1927bf215546Sopenharmony_ci } 1928bf215546Sopenharmony_ci} 1929bf215546Sopenharmony_ci 1930bf215546Sopenharmony_cistatic void 1931bf215546Sopenharmony_ciread_cf_node(read_ctx *ctx, struct exec_list *list) 1932bf215546Sopenharmony_ci{ 1933bf215546Sopenharmony_ci nir_cf_node_type type = blob_read_uint32(ctx->blob); 1934bf215546Sopenharmony_ci 1935bf215546Sopenharmony_ci switch (type) { 1936bf215546Sopenharmony_ci case nir_cf_node_block: 1937bf215546Sopenharmony_ci read_block(ctx, list); 1938bf215546Sopenharmony_ci break; 1939bf215546Sopenharmony_ci case nir_cf_node_if: 1940bf215546Sopenharmony_ci read_if(ctx, list); 1941bf215546Sopenharmony_ci break; 1942bf215546Sopenharmony_ci case nir_cf_node_loop: 1943bf215546Sopenharmony_ci read_loop(ctx, list); 1944bf215546Sopenharmony_ci break; 1945bf215546Sopenharmony_ci default: 1946bf215546Sopenharmony_ci unreachable("bad cf type"); 1947bf215546Sopenharmony_ci } 1948bf215546Sopenharmony_ci} 1949bf215546Sopenharmony_ci 1950bf215546Sopenharmony_cistatic void 1951bf215546Sopenharmony_ciwrite_cf_list(write_ctx *ctx, const struct exec_list *cf_list) 1952bf215546Sopenharmony_ci{ 1953bf215546Sopenharmony_ci blob_write_uint32(ctx->blob, exec_list_length(cf_list)); 1954bf215546Sopenharmony_ci foreach_list_typed(nir_cf_node, cf, node, cf_list) { 1955bf215546Sopenharmony_ci write_cf_node(ctx, cf); 1956bf215546Sopenharmony_ci } 1957bf215546Sopenharmony_ci} 1958bf215546Sopenharmony_ci 1959bf215546Sopenharmony_cistatic void 1960bf215546Sopenharmony_ciread_cf_list(read_ctx *ctx, struct exec_list *cf_list) 1961bf215546Sopenharmony_ci{ 1962bf215546Sopenharmony_ci uint32_t num_cf_nodes = blob_read_uint32(ctx->blob); 1963bf215546Sopenharmony_ci for (unsigned i = 0; i < num_cf_nodes; i++) 1964bf215546Sopenharmony_ci read_cf_node(ctx, cf_list); 1965bf215546Sopenharmony_ci} 1966bf215546Sopenharmony_ci 1967bf215546Sopenharmony_cistatic void 1968bf215546Sopenharmony_ciwrite_function_impl(write_ctx *ctx, const nir_function_impl *fi) 1969bf215546Sopenharmony_ci{ 1970bf215546Sopenharmony_ci blob_write_uint8(ctx->blob, fi->structured); 1971bf215546Sopenharmony_ci blob_write_uint8(ctx->blob, !!fi->preamble); 1972bf215546Sopenharmony_ci 1973bf215546Sopenharmony_ci if (fi->preamble) 1974bf215546Sopenharmony_ci blob_write_uint32(ctx->blob, write_lookup_object(ctx, fi->preamble)); 1975bf215546Sopenharmony_ci 1976bf215546Sopenharmony_ci write_var_list(ctx, &fi->locals); 1977bf215546Sopenharmony_ci write_reg_list(ctx, &fi->registers); 1978bf215546Sopenharmony_ci blob_write_uint32(ctx->blob, fi->reg_alloc); 1979bf215546Sopenharmony_ci 1980bf215546Sopenharmony_ci write_cf_list(ctx, &fi->body); 1981bf215546Sopenharmony_ci write_fixup_phis(ctx); 1982bf215546Sopenharmony_ci} 1983bf215546Sopenharmony_ci 1984bf215546Sopenharmony_cistatic nir_function_impl * 1985bf215546Sopenharmony_ciread_function_impl(read_ctx *ctx, nir_function *fxn) 1986bf215546Sopenharmony_ci{ 1987bf215546Sopenharmony_ci nir_function_impl *fi = nir_function_impl_create_bare(ctx->nir); 1988bf215546Sopenharmony_ci fi->function = fxn; 1989bf215546Sopenharmony_ci 1990bf215546Sopenharmony_ci fi->structured = blob_read_uint8(ctx->blob); 1991bf215546Sopenharmony_ci bool preamble = blob_read_uint8(ctx->blob); 1992bf215546Sopenharmony_ci 1993bf215546Sopenharmony_ci if (preamble) 1994bf215546Sopenharmony_ci fi->preamble = read_object(ctx); 1995bf215546Sopenharmony_ci 1996bf215546Sopenharmony_ci read_var_list(ctx, &fi->locals); 1997bf215546Sopenharmony_ci read_reg_list(ctx, &fi->registers); 1998bf215546Sopenharmony_ci fi->reg_alloc = blob_read_uint32(ctx->blob); 1999bf215546Sopenharmony_ci 2000bf215546Sopenharmony_ci read_cf_list(ctx, &fi->body); 2001bf215546Sopenharmony_ci read_fixup_phis(ctx); 2002bf215546Sopenharmony_ci 2003bf215546Sopenharmony_ci fi->valid_metadata = 0; 2004bf215546Sopenharmony_ci 2005bf215546Sopenharmony_ci return fi; 2006bf215546Sopenharmony_ci} 2007bf215546Sopenharmony_ci 2008bf215546Sopenharmony_cistatic void 2009bf215546Sopenharmony_ciwrite_function(write_ctx *ctx, const nir_function *fxn) 2010bf215546Sopenharmony_ci{ 2011bf215546Sopenharmony_ci uint32_t flags = 0; 2012bf215546Sopenharmony_ci if (fxn->is_entrypoint) 2013bf215546Sopenharmony_ci flags |= 0x1; 2014bf215546Sopenharmony_ci if (fxn->is_preamble) 2015bf215546Sopenharmony_ci flags |= 0x2; 2016bf215546Sopenharmony_ci if (fxn->name) 2017bf215546Sopenharmony_ci flags |= 0x4; 2018bf215546Sopenharmony_ci if (fxn->impl) 2019bf215546Sopenharmony_ci flags |= 0x8; 2020bf215546Sopenharmony_ci blob_write_uint32(ctx->blob, flags); 2021bf215546Sopenharmony_ci if (fxn->name) 2022bf215546Sopenharmony_ci blob_write_string(ctx->blob, fxn->name); 2023bf215546Sopenharmony_ci 2024bf215546Sopenharmony_ci write_add_object(ctx, fxn); 2025bf215546Sopenharmony_ci 2026bf215546Sopenharmony_ci blob_write_uint32(ctx->blob, fxn->num_params); 2027bf215546Sopenharmony_ci for (unsigned i = 0; i < fxn->num_params; i++) { 2028bf215546Sopenharmony_ci uint32_t val = 2029bf215546Sopenharmony_ci ((uint32_t)fxn->params[i].num_components) | 2030bf215546Sopenharmony_ci ((uint32_t)fxn->params[i].bit_size) << 8; 2031bf215546Sopenharmony_ci blob_write_uint32(ctx->blob, val); 2032bf215546Sopenharmony_ci } 2033bf215546Sopenharmony_ci 2034bf215546Sopenharmony_ci /* At first glance, it looks like we should write the function_impl here. 2035bf215546Sopenharmony_ci * However, call instructions need to be able to reference at least the 2036bf215546Sopenharmony_ci * function and those will get processed as we write the function_impls. 2037bf215546Sopenharmony_ci * We stop here and write function_impls as a second pass. 2038bf215546Sopenharmony_ci */ 2039bf215546Sopenharmony_ci} 2040bf215546Sopenharmony_ci 2041bf215546Sopenharmony_cistatic void 2042bf215546Sopenharmony_ciread_function(read_ctx *ctx) 2043bf215546Sopenharmony_ci{ 2044bf215546Sopenharmony_ci uint32_t flags = blob_read_uint32(ctx->blob); 2045bf215546Sopenharmony_ci bool has_name = flags & 0x4; 2046bf215546Sopenharmony_ci char *name = has_name ? blob_read_string(ctx->blob) : NULL; 2047bf215546Sopenharmony_ci 2048bf215546Sopenharmony_ci nir_function *fxn = nir_function_create(ctx->nir, name); 2049bf215546Sopenharmony_ci 2050bf215546Sopenharmony_ci read_add_object(ctx, fxn); 2051bf215546Sopenharmony_ci 2052bf215546Sopenharmony_ci fxn->num_params = blob_read_uint32(ctx->blob); 2053bf215546Sopenharmony_ci fxn->params = ralloc_array(fxn, nir_parameter, fxn->num_params); 2054bf215546Sopenharmony_ci for (unsigned i = 0; i < fxn->num_params; i++) { 2055bf215546Sopenharmony_ci uint32_t val = blob_read_uint32(ctx->blob); 2056bf215546Sopenharmony_ci fxn->params[i].num_components = val & 0xff; 2057bf215546Sopenharmony_ci fxn->params[i].bit_size = (val >> 8) & 0xff; 2058bf215546Sopenharmony_ci } 2059bf215546Sopenharmony_ci 2060bf215546Sopenharmony_ci fxn->is_entrypoint = flags & 0x1; 2061bf215546Sopenharmony_ci fxn->is_preamble = flags & 0x2; 2062bf215546Sopenharmony_ci if (flags & 0x8) 2063bf215546Sopenharmony_ci fxn->impl = NIR_SERIALIZE_FUNC_HAS_IMPL; 2064bf215546Sopenharmony_ci} 2065bf215546Sopenharmony_ci 2066bf215546Sopenharmony_cistatic void 2067bf215546Sopenharmony_ciwrite_xfb_info(write_ctx *ctx, const nir_xfb_info *xfb) 2068bf215546Sopenharmony_ci{ 2069bf215546Sopenharmony_ci if (xfb == NULL) { 2070bf215546Sopenharmony_ci blob_write_uint32(ctx->blob, 0); 2071bf215546Sopenharmony_ci } else { 2072bf215546Sopenharmony_ci size_t size = nir_xfb_info_size(xfb->output_count); 2073bf215546Sopenharmony_ci assert(size <= UINT32_MAX); 2074bf215546Sopenharmony_ci blob_write_uint32(ctx->blob, size); 2075bf215546Sopenharmony_ci blob_write_bytes(ctx->blob, xfb, size); 2076bf215546Sopenharmony_ci } 2077bf215546Sopenharmony_ci} 2078bf215546Sopenharmony_ci 2079bf215546Sopenharmony_cistatic nir_xfb_info * 2080bf215546Sopenharmony_ciread_xfb_info(read_ctx *ctx) 2081bf215546Sopenharmony_ci{ 2082bf215546Sopenharmony_ci uint32_t size = blob_read_uint32(ctx->blob); 2083bf215546Sopenharmony_ci if (size == 0) 2084bf215546Sopenharmony_ci return NULL; 2085bf215546Sopenharmony_ci 2086bf215546Sopenharmony_ci struct nir_xfb_info *xfb = ralloc_size(ctx->nir, size); 2087bf215546Sopenharmony_ci blob_copy_bytes(ctx->blob, (void *)xfb, size); 2088bf215546Sopenharmony_ci 2089bf215546Sopenharmony_ci return xfb; 2090bf215546Sopenharmony_ci} 2091bf215546Sopenharmony_ci 2092bf215546Sopenharmony_ci/** 2093bf215546Sopenharmony_ci * Serialize NIR into a binary blob. 2094bf215546Sopenharmony_ci * 2095bf215546Sopenharmony_ci * \param strip Don't serialize information only useful for debugging, 2096bf215546Sopenharmony_ci * such as variable names, making cache hits from similar 2097bf215546Sopenharmony_ci * shaders more likely. 2098bf215546Sopenharmony_ci */ 2099bf215546Sopenharmony_civoid 2100bf215546Sopenharmony_cinir_serialize(struct blob *blob, const nir_shader *nir, bool strip) 2101bf215546Sopenharmony_ci{ 2102bf215546Sopenharmony_ci write_ctx ctx = {0}; 2103bf215546Sopenharmony_ci ctx.remap_table = _mesa_pointer_hash_table_create(NULL); 2104bf215546Sopenharmony_ci ctx.blob = blob; 2105bf215546Sopenharmony_ci ctx.nir = nir; 2106bf215546Sopenharmony_ci ctx.strip = strip; 2107bf215546Sopenharmony_ci util_dynarray_init(&ctx.phi_fixups, NULL); 2108bf215546Sopenharmony_ci 2109bf215546Sopenharmony_ci size_t idx_size_offset = blob_reserve_uint32(blob); 2110bf215546Sopenharmony_ci 2111bf215546Sopenharmony_ci struct shader_info info = nir->info; 2112bf215546Sopenharmony_ci uint32_t strings = 0; 2113bf215546Sopenharmony_ci if (!strip && info.name) 2114bf215546Sopenharmony_ci strings |= 0x1; 2115bf215546Sopenharmony_ci if (!strip && info.label) 2116bf215546Sopenharmony_ci strings |= 0x2; 2117bf215546Sopenharmony_ci blob_write_uint32(blob, strings); 2118bf215546Sopenharmony_ci if (!strip && info.name) 2119bf215546Sopenharmony_ci blob_write_string(blob, info.name); 2120bf215546Sopenharmony_ci if (!strip && info.label) 2121bf215546Sopenharmony_ci blob_write_string(blob, info.label); 2122bf215546Sopenharmony_ci info.name = info.label = NULL; 2123bf215546Sopenharmony_ci blob_write_bytes(blob, (uint8_t *) &info, sizeof(info)); 2124bf215546Sopenharmony_ci 2125bf215546Sopenharmony_ci write_var_list(&ctx, &nir->variables); 2126bf215546Sopenharmony_ci 2127bf215546Sopenharmony_ci blob_write_uint32(blob, nir->num_inputs); 2128bf215546Sopenharmony_ci blob_write_uint32(blob, nir->num_uniforms); 2129bf215546Sopenharmony_ci blob_write_uint32(blob, nir->num_outputs); 2130bf215546Sopenharmony_ci blob_write_uint32(blob, nir->scratch_size); 2131bf215546Sopenharmony_ci 2132bf215546Sopenharmony_ci blob_write_uint32(blob, exec_list_length(&nir->functions)); 2133bf215546Sopenharmony_ci nir_foreach_function(fxn, nir) { 2134bf215546Sopenharmony_ci write_function(&ctx, fxn); 2135bf215546Sopenharmony_ci } 2136bf215546Sopenharmony_ci 2137bf215546Sopenharmony_ci nir_foreach_function(fxn, nir) { 2138bf215546Sopenharmony_ci if (fxn->impl) 2139bf215546Sopenharmony_ci write_function_impl(&ctx, fxn->impl); 2140bf215546Sopenharmony_ci } 2141bf215546Sopenharmony_ci 2142bf215546Sopenharmony_ci blob_write_uint32(blob, nir->constant_data_size); 2143bf215546Sopenharmony_ci if (nir->constant_data_size > 0) 2144bf215546Sopenharmony_ci blob_write_bytes(blob, nir->constant_data, nir->constant_data_size); 2145bf215546Sopenharmony_ci 2146bf215546Sopenharmony_ci write_xfb_info(&ctx, nir->xfb_info); 2147bf215546Sopenharmony_ci 2148bf215546Sopenharmony_ci blob_overwrite_uint32(blob, idx_size_offset, ctx.next_idx); 2149bf215546Sopenharmony_ci 2150bf215546Sopenharmony_ci _mesa_hash_table_destroy(ctx.remap_table, NULL); 2151bf215546Sopenharmony_ci util_dynarray_fini(&ctx.phi_fixups); 2152bf215546Sopenharmony_ci} 2153bf215546Sopenharmony_ci 2154bf215546Sopenharmony_cinir_shader * 2155bf215546Sopenharmony_cinir_deserialize(void *mem_ctx, 2156bf215546Sopenharmony_ci const struct nir_shader_compiler_options *options, 2157bf215546Sopenharmony_ci struct blob_reader *blob) 2158bf215546Sopenharmony_ci{ 2159bf215546Sopenharmony_ci read_ctx ctx = {0}; 2160bf215546Sopenharmony_ci ctx.blob = blob; 2161bf215546Sopenharmony_ci list_inithead(&ctx.phi_srcs); 2162bf215546Sopenharmony_ci ctx.idx_table_len = blob_read_uint32(blob); 2163bf215546Sopenharmony_ci ctx.idx_table = calloc(ctx.idx_table_len, sizeof(uintptr_t)); 2164bf215546Sopenharmony_ci 2165bf215546Sopenharmony_ci uint32_t strings = blob_read_uint32(blob); 2166bf215546Sopenharmony_ci char *name = (strings & 0x1) ? blob_read_string(blob) : NULL; 2167bf215546Sopenharmony_ci char *label = (strings & 0x2) ? blob_read_string(blob) : NULL; 2168bf215546Sopenharmony_ci 2169bf215546Sopenharmony_ci struct shader_info info; 2170bf215546Sopenharmony_ci blob_copy_bytes(blob, (uint8_t *) &info, sizeof(info)); 2171bf215546Sopenharmony_ci 2172bf215546Sopenharmony_ci ctx.nir = nir_shader_create(mem_ctx, info.stage, options, NULL); 2173bf215546Sopenharmony_ci 2174bf215546Sopenharmony_ci info.name = name ? ralloc_strdup(ctx.nir, name) : NULL; 2175bf215546Sopenharmony_ci info.label = label ? ralloc_strdup(ctx.nir, label) : NULL; 2176bf215546Sopenharmony_ci 2177bf215546Sopenharmony_ci ctx.nir->info = info; 2178bf215546Sopenharmony_ci 2179bf215546Sopenharmony_ci read_var_list(&ctx, &ctx.nir->variables); 2180bf215546Sopenharmony_ci 2181bf215546Sopenharmony_ci ctx.nir->num_inputs = blob_read_uint32(blob); 2182bf215546Sopenharmony_ci ctx.nir->num_uniforms = blob_read_uint32(blob); 2183bf215546Sopenharmony_ci ctx.nir->num_outputs = blob_read_uint32(blob); 2184bf215546Sopenharmony_ci ctx.nir->scratch_size = blob_read_uint32(blob); 2185bf215546Sopenharmony_ci 2186bf215546Sopenharmony_ci unsigned num_functions = blob_read_uint32(blob); 2187bf215546Sopenharmony_ci for (unsigned i = 0; i < num_functions; i++) 2188bf215546Sopenharmony_ci read_function(&ctx); 2189bf215546Sopenharmony_ci 2190bf215546Sopenharmony_ci nir_foreach_function(fxn, ctx.nir) { 2191bf215546Sopenharmony_ci if (fxn->impl == NIR_SERIALIZE_FUNC_HAS_IMPL) 2192bf215546Sopenharmony_ci fxn->impl = read_function_impl(&ctx, fxn); 2193bf215546Sopenharmony_ci } 2194bf215546Sopenharmony_ci 2195bf215546Sopenharmony_ci ctx.nir->constant_data_size = blob_read_uint32(blob); 2196bf215546Sopenharmony_ci if (ctx.nir->constant_data_size > 0) { 2197bf215546Sopenharmony_ci ctx.nir->constant_data = 2198bf215546Sopenharmony_ci ralloc_size(ctx.nir, ctx.nir->constant_data_size); 2199bf215546Sopenharmony_ci blob_copy_bytes(blob, ctx.nir->constant_data, 2200bf215546Sopenharmony_ci ctx.nir->constant_data_size); 2201bf215546Sopenharmony_ci } 2202bf215546Sopenharmony_ci 2203bf215546Sopenharmony_ci ctx.nir->xfb_info = read_xfb_info(&ctx); 2204bf215546Sopenharmony_ci 2205bf215546Sopenharmony_ci free(ctx.idx_table); 2206bf215546Sopenharmony_ci 2207bf215546Sopenharmony_ci nir_validate_shader(ctx.nir, "after deserialize"); 2208bf215546Sopenharmony_ci 2209bf215546Sopenharmony_ci return ctx.nir; 2210bf215546Sopenharmony_ci} 2211bf215546Sopenharmony_ci 2212bf215546Sopenharmony_civoid 2213bf215546Sopenharmony_cinir_shader_serialize_deserialize(nir_shader *shader) 2214bf215546Sopenharmony_ci{ 2215bf215546Sopenharmony_ci const struct nir_shader_compiler_options *options = shader->options; 2216bf215546Sopenharmony_ci 2217bf215546Sopenharmony_ci struct blob writer; 2218bf215546Sopenharmony_ci blob_init(&writer); 2219bf215546Sopenharmony_ci nir_serialize(&writer, shader, false); 2220bf215546Sopenharmony_ci 2221bf215546Sopenharmony_ci /* Delete all of dest's ralloc children but leave dest alone */ 2222bf215546Sopenharmony_ci void *dead_ctx = ralloc_context(NULL); 2223bf215546Sopenharmony_ci ralloc_adopt(dead_ctx, shader); 2224bf215546Sopenharmony_ci ralloc_free(dead_ctx); 2225bf215546Sopenharmony_ci 2226bf215546Sopenharmony_ci dead_ctx = ralloc_context(NULL); 2227bf215546Sopenharmony_ci 2228bf215546Sopenharmony_ci struct blob_reader reader; 2229bf215546Sopenharmony_ci blob_reader_init(&reader, writer.data, writer.size); 2230bf215546Sopenharmony_ci nir_shader *copy = nir_deserialize(dead_ctx, options, &reader); 2231bf215546Sopenharmony_ci 2232bf215546Sopenharmony_ci blob_finish(&writer); 2233bf215546Sopenharmony_ci 2234bf215546Sopenharmony_ci nir_shader_replace(shader, copy); 2235bf215546Sopenharmony_ci ralloc_free(dead_ctx); 2236bf215546Sopenharmony_ci} 2237