1/* 2 * Copyright © 2014-2017 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include <inttypes.h> 25#include "util/format/u_format.h" 26#include "util/u_math.h" 27#include "util/u_memory.h" 28#include "util/ralloc.h" 29#include "util/hash_table.h" 30#include "util/u_upload_mgr.h" 31#include "tgsi/tgsi_dump.h" 32#include "tgsi/tgsi_parse.h" 33#include "compiler/nir/nir.h" 34#include "compiler/nir/nir_builder.h" 35#include "nir/tgsi_to_nir.h" 36#include "compiler/v3d_compiler.h" 37#include "v3d_context.h" 38#include "broadcom/cle/v3d_packet_v33_pack.h" 39 40static struct v3d_compiled_shader * 41v3d_get_compiled_shader(struct v3d_context *v3d, 42 struct v3d_key *key, size_t key_size); 43static void 44v3d_setup_shared_precompile_key(struct v3d_uncompiled_shader *uncompiled, 45 struct v3d_key *key); 46 47static gl_varying_slot 48v3d_get_slot_for_driver_location(nir_shader *s, uint32_t driver_location) 49{ 50 nir_foreach_shader_out_variable(var, s) { 51 if (var->data.driver_location == driver_location) { 52 return var->data.location; 53 } 54 } 55 56 return -1; 57} 58 59/** 60 * Precomputes the TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC array for the shader. 61 * 62 * A shader can have 16 of these specs, and each one of them can write up to 63 * 16 dwords. Since we allow a total of 64 transform feedback output 64 * components (not 16 vectors), we have to group the writes of multiple 65 * varyings together in a single data spec. 66 */ 67static void 68v3d_set_transform_feedback_outputs(struct v3d_uncompiled_shader *so, 69 const struct pipe_stream_output_info *stream_output) 70{ 71 if (!stream_output->num_outputs) 72 return; 73 74 struct v3d_varying_slot slots[PIPE_MAX_SO_OUTPUTS * 4]; 75 int slot_count = 0; 76 77 for (int buffer = 0; buffer < PIPE_MAX_SO_BUFFERS; buffer++) { 78 uint32_t buffer_offset = 0; 79 uint32_t vpm_start = slot_count; 80 81 for (int i = 0; i < stream_output->num_outputs; i++) { 82 const struct pipe_stream_output *output = 83 &stream_output->output[i]; 84 85 if (output->output_buffer != buffer) 86 continue; 87 88 /* We assume that the SO outputs appear in increasing 89 * order in the buffer. 90 */ 91 assert(output->dst_offset >= buffer_offset); 92 93 /* Pad any undefined slots in the output */ 94 for (int j = buffer_offset; j < output->dst_offset; j++) { 95 slots[slot_count] = 96 v3d_slot_from_slot_and_component(VARYING_SLOT_POS, 0); 97 slot_count++; 98 buffer_offset++; 99 } 100 101 /* Set the coordinate shader up to output the 102 * components of this varying. 103 */ 104 for (int j = 0; j < output->num_components; j++) { 105 gl_varying_slot slot = 106 v3d_get_slot_for_driver_location(so->base.ir.nir, output->register_index); 107 108 slots[slot_count] = 109 v3d_slot_from_slot_and_component(slot, 110 output->start_component + j); 111 slot_count++; 112 buffer_offset++; 113 } 114 } 115 116 uint32_t vpm_size = slot_count - vpm_start; 117 if (!vpm_size) 118 continue; 119 120 uint32_t vpm_start_offset = vpm_start + 6; 121 122 while (vpm_size) { 123 uint32_t write_size = MIN2(vpm_size, 1 << 4); 124 125 struct V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC unpacked = { 126 /* We need the offset from the coordinate shader's VPM 127 * output block, which has the [X, Y, Z, W, Xs, Ys] 128 * values at the start. 129 */ 130 .first_shaded_vertex_value_to_output = vpm_start_offset, 131 .number_of_consecutive_vertex_values_to_output_as_32_bit_values = write_size, 132 .output_buffer_to_write_to = buffer, 133 }; 134 135 /* GFXH-1559 */ 136 assert(unpacked.first_shaded_vertex_value_to_output != 8 || 137 so->num_tf_specs != 0); 138 139 assert(so->num_tf_specs != ARRAY_SIZE(so->tf_specs)); 140 V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_pack(NULL, 141 (void *)&so->tf_specs[so->num_tf_specs], 142 &unpacked); 143 144 /* If point size is being written by the shader, then 145 * all the VPM start offsets are shifted up by one. 146 * We won't know that until the variant is compiled, 147 * though. 148 */ 149 unpacked.first_shaded_vertex_value_to_output++; 150 151 /* GFXH-1559 */ 152 assert(unpacked.first_shaded_vertex_value_to_output != 8 || 153 so->num_tf_specs != 0); 154 155 V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_pack(NULL, 156 (void *)&so->tf_specs_psiz[so->num_tf_specs], 157 &unpacked); 158 so->num_tf_specs++; 159 vpm_start_offset += write_size; 160 vpm_size -= write_size; 161 } 162 so->base.stream_output.stride[buffer] = 163 stream_output->stride[buffer]; 164 } 165 166 so->num_tf_outputs = slot_count; 167 so->tf_outputs = ralloc_array(so->base.ir.nir, struct v3d_varying_slot, 168 slot_count); 169 memcpy(so->tf_outputs, slots, sizeof(*slots) * slot_count); 170} 171 172static int 173type_size(const struct glsl_type *type, bool bindless) 174{ 175 return glsl_count_attribute_slots(type, false); 176} 177 178static void 179precompile_all_outputs(nir_shader *s, 180 struct v3d_varying_slot *outputs, 181 uint8_t *num_outputs) 182{ 183 nir_foreach_shader_out_variable(var, s) { 184 const int array_len = MAX2(glsl_get_length(var->type), 1); 185 for (int j = 0; j < array_len; j++) { 186 const int slot = var->data.location + j; 187 const int num_components = 188 glsl_get_components(var->type); 189 for (int i = 0; i < num_components; i++) { 190 const int swiz = var->data.location_frac + i; 191 outputs[(*num_outputs)++] = 192 v3d_slot_from_slot_and_component(slot, 193 swiz); 194 } 195 } 196 } 197} 198 199/** 200 * Precompiles a shader variant at shader state creation time if 201 * V3D_DEBUG=precompile is set. Used for shader-db 202 * (https://gitlab.freedesktop.org/mesa/shader-db) 203 */ 204static void 205v3d_shader_precompile(struct v3d_context *v3d, 206 struct v3d_uncompiled_shader *so) 207{ 208 nir_shader *s = so->base.ir.nir; 209 210 if (s->info.stage == MESA_SHADER_FRAGMENT) { 211 struct v3d_fs_key key = { 212 .base.shader_state = so, 213 }; 214 215 nir_foreach_shader_out_variable(var, s) { 216 if (var->data.location == FRAG_RESULT_COLOR) { 217 key.cbufs |= 1 << 0; 218 } else if (var->data.location >= FRAG_RESULT_DATA0) { 219 key.cbufs |= 1 << (var->data.location - 220 FRAG_RESULT_DATA0); 221 } 222 } 223 224 key.logicop_func = PIPE_LOGICOP_COPY; 225 226 v3d_setup_shared_precompile_key(so, &key.base); 227 v3d_get_compiled_shader(v3d, &key.base, sizeof(key)); 228 } else if (s->info.stage == MESA_SHADER_GEOMETRY) { 229 struct v3d_gs_key key = { 230 .base.shader_state = so, 231 .base.is_last_geometry_stage = true, 232 }; 233 234 v3d_setup_shared_precompile_key(so, &key.base); 235 236 precompile_all_outputs(s, 237 key.used_outputs, 238 &key.num_used_outputs); 239 240 v3d_get_compiled_shader(v3d, &key.base, sizeof(key)); 241 242 /* Compile GS bin shader: only position (XXX: include TF) */ 243 key.is_coord = true; 244 key.num_used_outputs = 0; 245 for (int i = 0; i < 4; i++) { 246 key.used_outputs[key.num_used_outputs++] = 247 v3d_slot_from_slot_and_component(VARYING_SLOT_POS, 248 i); 249 } 250 v3d_get_compiled_shader(v3d, &key.base, sizeof(key)); 251 } else { 252 assert(s->info.stage == MESA_SHADER_VERTEX); 253 struct v3d_vs_key key = { 254 .base.shader_state = so, 255 /* Emit fixed function outputs */ 256 .base.is_last_geometry_stage = true, 257 }; 258 259 v3d_setup_shared_precompile_key(so, &key.base); 260 261 precompile_all_outputs(s, 262 key.used_outputs, 263 &key.num_used_outputs); 264 265 v3d_get_compiled_shader(v3d, &key.base, sizeof(key)); 266 267 /* Compile VS bin shader: only position (XXX: include TF) */ 268 key.is_coord = true; 269 key.num_used_outputs = 0; 270 for (int i = 0; i < 4; i++) { 271 key.used_outputs[key.num_used_outputs++] = 272 v3d_slot_from_slot_and_component(VARYING_SLOT_POS, 273 i); 274 } 275 v3d_get_compiled_shader(v3d, &key.base, sizeof(key)); 276 } 277} 278 279static void * 280v3d_uncompiled_shader_create(struct pipe_context *pctx, 281 enum pipe_shader_ir type, void *ir) 282{ 283 struct v3d_context *v3d = v3d_context(pctx); 284 struct v3d_uncompiled_shader *so = CALLOC_STRUCT(v3d_uncompiled_shader); 285 if (!so) 286 return NULL; 287 288 so->program_id = v3d->next_uncompiled_program_id++; 289 290 nir_shader *s; 291 292 if (type == PIPE_SHADER_IR_NIR) { 293 /* The backend takes ownership of the NIR shader on state 294 * creation. 295 */ 296 s = ir; 297 } else { 298 assert(type == PIPE_SHADER_IR_TGSI); 299 300 if (unlikely(V3D_DEBUG & V3D_DEBUG_TGSI)) { 301 fprintf(stderr, "prog %d TGSI:\n", 302 so->program_id); 303 tgsi_dump(ir, 0); 304 fprintf(stderr, "\n"); 305 } 306 s = tgsi_to_nir(ir, pctx->screen, false); 307 } 308 309 if (s->info.stage != MESA_SHADER_VERTEX && 310 s->info.stage != MESA_SHADER_GEOMETRY) { 311 NIR_PASS(_, s, nir_lower_io, 312 nir_var_shader_in | nir_var_shader_out, 313 type_size, (nir_lower_io_options)0); 314 } 315 316 NIR_PASS(_, s, nir_lower_regs_to_ssa); 317 NIR_PASS(_, s, nir_normalize_cubemap_coords); 318 319 NIR_PASS(_, s, nir_lower_load_const_to_scalar); 320 321 v3d_optimize_nir(NULL, s); 322 323 NIR_PASS(_, s, nir_remove_dead_variables, nir_var_function_temp, NULL); 324 325 /* Garbage collect dead instructions */ 326 nir_sweep(s); 327 328 so->base.type = PIPE_SHADER_IR_NIR; 329 so->base.ir.nir = s; 330 331 if (unlikely(V3D_DEBUG & (V3D_DEBUG_NIR | 332 v3d_debug_flag_for_shader_stage(s->info.stage)))) { 333 fprintf(stderr, "%s prog %d NIR:\n", 334 gl_shader_stage_name(s->info.stage), 335 so->program_id); 336 nir_print_shader(s, stderr); 337 fprintf(stderr, "\n"); 338 } 339 340 if (unlikely(V3D_DEBUG & V3D_DEBUG_PRECOMPILE)) 341 v3d_shader_precompile(v3d, so); 342 343 return so; 344} 345 346static void 347v3d_shader_debug_output(const char *message, void *data) 348{ 349 struct v3d_context *v3d = data; 350 351 util_debug_message(&v3d->debug, SHADER_INFO, "%s", message); 352} 353 354static void * 355v3d_shader_state_create(struct pipe_context *pctx, 356 const struct pipe_shader_state *cso) 357{ 358 struct v3d_uncompiled_shader *so = 359 v3d_uncompiled_shader_create(pctx, 360 cso->type, 361 (cso->type == PIPE_SHADER_IR_TGSI ? 362 (void *)cso->tokens : 363 cso->ir.nir)); 364 365 v3d_set_transform_feedback_outputs(so, &cso->stream_output); 366 367 return so; 368} 369 370struct v3d_compiled_shader * 371v3d_get_compiled_shader(struct v3d_context *v3d, 372 struct v3d_key *key, 373 size_t key_size) 374{ 375 struct v3d_uncompiled_shader *shader_state = key->shader_state; 376 nir_shader *s = shader_state->base.ir.nir; 377 378 struct hash_table *ht = v3d->prog.cache[s->info.stage]; 379 struct hash_entry *entry = _mesa_hash_table_search(ht, key); 380 if (entry) 381 return entry->data; 382 383 int variant_id = 384 p_atomic_inc_return(&shader_state->compiled_variant_count); 385 386 struct v3d_compiled_shader *shader = NULL; 387 388#ifdef ENABLE_SHADER_CACHE 389 shader = v3d_disk_cache_retrieve(v3d, key); 390#endif 391 392 if (!shader) { 393 shader = rzalloc(NULL, struct v3d_compiled_shader); 394 395 int program_id = shader_state->program_id; 396 uint64_t *qpu_insts; 397 uint32_t shader_size; 398 399 qpu_insts = v3d_compile(v3d->screen->compiler, key, 400 &shader->prog_data.base, s, 401 v3d_shader_debug_output, 402 v3d, 403 program_id, variant_id, &shader_size); 404 ralloc_steal(shader, shader->prog_data.base); 405 406 if (shader_size) { 407 u_upload_data(v3d->state_uploader, 0, shader_size, 8, 408 qpu_insts, &shader->offset, &shader->resource); 409 } 410 411#ifdef ENABLE_SHADER_CACHE 412 v3d_disk_cache_store(v3d, key, shader, qpu_insts, shader_size); 413#endif 414 415 free(qpu_insts); 416 } 417 418 v3d_set_shader_uniform_dirty_flags(shader); 419 420 if (ht) { 421 struct v3d_key *dup_key; 422 dup_key = ralloc_size(shader, key_size); 423 memcpy(dup_key, key, key_size); 424 _mesa_hash_table_insert(ht, dup_key, shader); 425 } 426 427 if (shader->prog_data.base->spill_size > 428 v3d->prog.spill_size_per_thread) { 429 /* The TIDX register we use for choosing the area to access 430 * for scratch space is: (core << 6) | (qpu << 2) | thread. 431 * Even at minimum threadcount in a particular shader, that 432 * means we still multiply by qpus by 4. 433 */ 434 int total_spill_size = (v3d->screen->devinfo.qpu_count * 4 * 435 shader->prog_data.base->spill_size); 436 437 v3d_bo_unreference(&v3d->prog.spill_bo); 438 v3d->prog.spill_bo = v3d_bo_alloc(v3d->screen, 439 total_spill_size, "spill"); 440 v3d->prog.spill_size_per_thread = 441 shader->prog_data.base->spill_size; 442 } 443 444 return shader; 445} 446 447static void 448v3d_free_compiled_shader(struct v3d_compiled_shader *shader) 449{ 450 pipe_resource_reference(&shader->resource, NULL); 451 ralloc_free(shader); 452} 453 454static void 455v3d_setup_shared_key(struct v3d_context *v3d, struct v3d_key *key, 456 struct v3d_texture_stateobj *texstate) 457{ 458 const struct v3d_device_info *devinfo = &v3d->screen->devinfo; 459 460 key->num_tex_used = texstate->num_textures; 461 key->num_samplers_used = texstate->num_textures; 462 assert(key->num_tex_used == key->num_samplers_used); 463 for (int i = 0; i < texstate->num_textures; i++) { 464 struct pipe_sampler_view *sampler = texstate->textures[i]; 465 struct v3d_sampler_view *v3d_sampler = v3d_sampler_view(sampler); 466 struct pipe_sampler_state *sampler_state = 467 texstate->samplers[i]; 468 469 if (!sampler) 470 continue; 471 472 assert(sampler->target == PIPE_BUFFER || sampler_state); 473 474 unsigned compare_mode = sampler_state ? 475 sampler_state->compare_mode : 476 PIPE_TEX_COMPARE_NONE; 477 478 key->sampler[i].return_size = 479 v3d_get_tex_return_size(devinfo, 480 sampler->format, 481 compare_mode); 482 483 /* For 16-bit, we set up the sampler to always return 2 484 * channels (meaning no recompiles for most statechanges), 485 * while for 32 we actually scale the returns with channels. 486 */ 487 if (key->sampler[i].return_size == 16) { 488 key->sampler[i].return_channels = 2; 489 } else if (devinfo->ver > 40) { 490 key->sampler[i].return_channels = 4; 491 } else { 492 key->sampler[i].return_channels = 493 v3d_get_tex_return_channels(devinfo, 494 sampler->format); 495 } 496 497 if (key->sampler[i].return_size == 32 && devinfo->ver < 40) { 498 memcpy(key->tex[i].swizzle, 499 v3d_sampler->swizzle, 500 sizeof(v3d_sampler->swizzle)); 501 } else { 502 /* For 16-bit returns, we let the sampler state handle 503 * the swizzle. 504 */ 505 key->tex[i].swizzle[0] = PIPE_SWIZZLE_X; 506 key->tex[i].swizzle[1] = PIPE_SWIZZLE_Y; 507 key->tex[i].swizzle[2] = PIPE_SWIZZLE_Z; 508 key->tex[i].swizzle[3] = PIPE_SWIZZLE_W; 509 } 510 } 511} 512 513static void 514v3d_setup_shared_precompile_key(struct v3d_uncompiled_shader *uncompiled, 515 struct v3d_key *key) 516{ 517 nir_shader *s = uncompiled->base.ir.nir; 518 519 /* Note that below we access they key's texture and sampler fields 520 * using the same index. On OpenGL they are the same (they are 521 * combined) 522 */ 523 key->num_tex_used = s->info.num_textures; 524 key->num_samplers_used = s->info.num_textures; 525 for (int i = 0; i < s->info.num_textures; i++) { 526 key->sampler[i].return_size = 16; 527 key->sampler[i].return_channels = 2; 528 529 key->tex[i].swizzle[0] = PIPE_SWIZZLE_X; 530 key->tex[i].swizzle[1] = PIPE_SWIZZLE_Y; 531 key->tex[i].swizzle[2] = PIPE_SWIZZLE_Z; 532 key->tex[i].swizzle[3] = PIPE_SWIZZLE_W; 533 } 534} 535 536static void 537v3d_update_compiled_fs(struct v3d_context *v3d, uint8_t prim_mode) 538{ 539 struct v3d_job *job = v3d->job; 540 struct v3d_fs_key local_key; 541 struct v3d_fs_key *key = &local_key; 542 nir_shader *s = v3d->prog.bind_fs->base.ir.nir; 543 544 if (!(v3d->dirty & (V3D_DIRTY_PRIM_MODE | 545 V3D_DIRTY_BLEND | 546 V3D_DIRTY_FRAMEBUFFER | 547 V3D_DIRTY_ZSA | 548 V3D_DIRTY_RASTERIZER | 549 V3D_DIRTY_SAMPLE_STATE | 550 V3D_DIRTY_FRAGTEX | 551 V3D_DIRTY_UNCOMPILED_FS))) { 552 return; 553 } 554 555 memset(key, 0, sizeof(*key)); 556 v3d_setup_shared_key(v3d, &key->base, &v3d->tex[PIPE_SHADER_FRAGMENT]); 557 key->base.shader_state = v3d->prog.bind_fs; 558 key->base.ucp_enables = v3d->rasterizer->base.clip_plane_enable; 559 key->is_points = (prim_mode == PIPE_PRIM_POINTS); 560 key->is_lines = (prim_mode >= PIPE_PRIM_LINES && 561 prim_mode <= PIPE_PRIM_LINE_STRIP); 562 key->line_smoothing = (key->is_lines && 563 v3d_line_smoothing_enabled(v3d)); 564 key->has_gs = v3d->prog.bind_gs != NULL; 565 if (v3d->blend->base.logicop_enable) { 566 key->logicop_func = v3d->blend->base.logicop_func; 567 } else { 568 key->logicop_func = PIPE_LOGICOP_COPY; 569 } 570 if (job->msaa) { 571 key->msaa = v3d->rasterizer->base.multisample; 572 key->sample_coverage = (v3d->rasterizer->base.multisample && 573 v3d->sample_mask != (1 << V3D_MAX_SAMPLES) - 1); 574 key->sample_alpha_to_coverage = v3d->blend->base.alpha_to_coverage; 575 key->sample_alpha_to_one = v3d->blend->base.alpha_to_one; 576 } 577 578 key->swap_color_rb = v3d->swap_color_rb; 579 580 for (int i = 0; i < v3d->framebuffer.nr_cbufs; i++) { 581 struct pipe_surface *cbuf = v3d->framebuffer.cbufs[i]; 582 if (!cbuf) 583 continue; 584 585 /* gl_FragColor's propagation to however many bound color 586 * buffers there are means that the shader compile needs to 587 * know what buffers are present. 588 */ 589 key->cbufs |= 1 << i; 590 591 /* If logic operations are enabled then we might emit color 592 * reads and we need to know the color buffer format and 593 * swizzle for that. 594 */ 595 if (key->logicop_func != PIPE_LOGICOP_COPY) { 596 key->color_fmt[i].format = cbuf->format; 597 memcpy(key->color_fmt[i].swizzle, 598 v3d_get_format_swizzle(&v3d->screen->devinfo, 599 cbuf->format), 600 sizeof(key->color_fmt[i].swizzle)); 601 } 602 603 const struct util_format_description *desc = 604 util_format_description(cbuf->format); 605 606 if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT && 607 desc->channel[0].size == 32) { 608 key->f32_color_rb |= 1 << i; 609 } 610 611 if (s->info.fs.untyped_color_outputs) { 612 if (util_format_is_pure_uint(cbuf->format)) 613 key->uint_color_rb |= 1 << i; 614 else if (util_format_is_pure_sint(cbuf->format)) 615 key->int_color_rb |= 1 << i; 616 } 617 } 618 619 if (key->is_points) { 620 key->point_sprite_mask = 621 v3d->rasterizer->base.sprite_coord_enable; 622 /* this is handled by lower_wpos_pntc */ 623 key->point_coord_upper_left = false; 624 } 625 626 struct v3d_compiled_shader *old_fs = v3d->prog.fs; 627 v3d->prog.fs = v3d_get_compiled_shader(v3d, &key->base, sizeof(*key)); 628 if (v3d->prog.fs == old_fs) 629 return; 630 631 v3d->dirty |= V3D_DIRTY_COMPILED_FS; 632 633 if (old_fs) { 634 if (v3d->prog.fs->prog_data.fs->flat_shade_flags != 635 old_fs->prog_data.fs->flat_shade_flags) { 636 v3d->dirty |= V3D_DIRTY_FLAT_SHADE_FLAGS; 637 } 638 639 if (v3d->prog.fs->prog_data.fs->noperspective_flags != 640 old_fs->prog_data.fs->noperspective_flags) { 641 v3d->dirty |= V3D_DIRTY_NOPERSPECTIVE_FLAGS; 642 } 643 644 if (v3d->prog.fs->prog_data.fs->centroid_flags != 645 old_fs->prog_data.fs->centroid_flags) { 646 v3d->dirty |= V3D_DIRTY_CENTROID_FLAGS; 647 } 648 } 649 650 if (old_fs && memcmp(v3d->prog.fs->prog_data.fs->input_slots, 651 old_fs->prog_data.fs->input_slots, 652 sizeof(v3d->prog.fs->prog_data.fs->input_slots))) { 653 v3d->dirty |= V3D_DIRTY_FS_INPUTS; 654 } 655} 656 657static void 658v3d_update_compiled_gs(struct v3d_context *v3d, uint8_t prim_mode) 659{ 660 struct v3d_gs_key local_key; 661 struct v3d_gs_key *key = &local_key; 662 663 if (!(v3d->dirty & (V3D_DIRTY_GEOMTEX | 664 V3D_DIRTY_RASTERIZER | 665 V3D_DIRTY_UNCOMPILED_GS | 666 V3D_DIRTY_PRIM_MODE | 667 V3D_DIRTY_FS_INPUTS))) { 668 return; 669 } 670 671 if (!v3d->prog.bind_gs) { 672 v3d->prog.gs = NULL; 673 v3d->prog.gs_bin = NULL; 674 return; 675 } 676 677 memset(key, 0, sizeof(*key)); 678 v3d_setup_shared_key(v3d, &key->base, &v3d->tex[PIPE_SHADER_GEOMETRY]); 679 key->base.shader_state = v3d->prog.bind_gs; 680 key->base.ucp_enables = v3d->rasterizer->base.clip_plane_enable; 681 key->base.is_last_geometry_stage = true; 682 key->num_used_outputs = v3d->prog.fs->prog_data.fs->num_inputs; 683 STATIC_ASSERT(sizeof(key->used_outputs) == 684 sizeof(v3d->prog.fs->prog_data.fs->input_slots)); 685 memcpy(key->used_outputs, v3d->prog.fs->prog_data.fs->input_slots, 686 sizeof(key->used_outputs)); 687 688 key->per_vertex_point_size = 689 (prim_mode == PIPE_PRIM_POINTS && 690 v3d->rasterizer->base.point_size_per_vertex); 691 692 struct v3d_compiled_shader *gs = 693 v3d_get_compiled_shader(v3d, &key->base, sizeof(*key)); 694 if (gs != v3d->prog.gs) { 695 v3d->prog.gs = gs; 696 v3d->dirty |= V3D_DIRTY_COMPILED_GS; 697 } 698 699 key->is_coord = true; 700 701 /* The last bin-mode shader in the geometry pipeline only outputs 702 * varyings used by transform feedback. 703 */ 704 struct v3d_uncompiled_shader *shader_state = key->base.shader_state; 705 memcpy(key->used_outputs, shader_state->tf_outputs, 706 sizeof(*key->used_outputs) * shader_state->num_tf_outputs); 707 if (shader_state->num_tf_outputs < key->num_used_outputs) { 708 uint32_t size = sizeof(*key->used_outputs) * 709 (key->num_used_outputs - 710 shader_state->num_tf_outputs); 711 memset(&key->used_outputs[shader_state->num_tf_outputs], 712 0, size); 713 } 714 key->num_used_outputs = shader_state->num_tf_outputs; 715 716 struct v3d_compiled_shader *old_gs = v3d->prog.gs; 717 struct v3d_compiled_shader *gs_bin = 718 v3d_get_compiled_shader(v3d, &key->base, sizeof(*key)); 719 if (gs_bin != old_gs) { 720 v3d->prog.gs_bin = gs_bin; 721 v3d->dirty |= V3D_DIRTY_COMPILED_GS_BIN; 722 } 723 724 if (old_gs && memcmp(v3d->prog.gs->prog_data.gs->input_slots, 725 old_gs->prog_data.gs->input_slots, 726 sizeof(v3d->prog.gs->prog_data.gs->input_slots))) { 727 v3d->dirty |= V3D_DIRTY_GS_INPUTS; 728 } 729} 730 731static void 732v3d_update_compiled_vs(struct v3d_context *v3d, uint8_t prim_mode) 733{ 734 struct v3d_vs_key local_key; 735 struct v3d_vs_key *key = &local_key; 736 737 if (!(v3d->dirty & (V3D_DIRTY_VERTTEX | 738 V3D_DIRTY_VTXSTATE | 739 V3D_DIRTY_UNCOMPILED_VS | 740 (v3d->prog.bind_gs ? 0 : V3D_DIRTY_RASTERIZER) | 741 (v3d->prog.bind_gs ? 0 : V3D_DIRTY_PRIM_MODE) | 742 (v3d->prog.bind_gs ? V3D_DIRTY_GS_INPUTS : 743 V3D_DIRTY_FS_INPUTS)))) { 744 return; 745 } 746 747 memset(key, 0, sizeof(*key)); 748 v3d_setup_shared_key(v3d, &key->base, &v3d->tex[PIPE_SHADER_VERTEX]); 749 key->base.shader_state = v3d->prog.bind_vs; 750 key->base.ucp_enables = v3d->rasterizer->base.clip_plane_enable; 751 key->base.is_last_geometry_stage = !v3d->prog.bind_gs; 752 753 if (!v3d->prog.bind_gs) { 754 key->num_used_outputs = v3d->prog.fs->prog_data.fs->num_inputs; 755 STATIC_ASSERT(sizeof(key->used_outputs) == 756 sizeof(v3d->prog.fs->prog_data.fs->input_slots)); 757 memcpy(key->used_outputs, v3d->prog.fs->prog_data.fs->input_slots, 758 sizeof(key->used_outputs)); 759 } else { 760 key->num_used_outputs = v3d->prog.gs->prog_data.gs->num_inputs; 761 STATIC_ASSERT(sizeof(key->used_outputs) == 762 sizeof(v3d->prog.gs->prog_data.gs->input_slots)); 763 memcpy(key->used_outputs, v3d->prog.gs->prog_data.gs->input_slots, 764 sizeof(key->used_outputs)); 765 } 766 767 key->per_vertex_point_size = 768 (prim_mode == PIPE_PRIM_POINTS && 769 v3d->rasterizer->base.point_size_per_vertex); 770 771 nir_shader *s = v3d->prog.bind_vs->base.ir.nir; 772 uint64_t inputs_read = s->info.inputs_read; 773 assert(util_bitcount(inputs_read) <= v3d->vtx->num_elements); 774 775 while (inputs_read) { 776 int location = u_bit_scan64(&inputs_read); 777 nir_variable *var = 778 nir_find_variable_with_location(s, nir_var_shader_in, location); 779 assert (var != NULL); 780 int driver_location = var->data.driver_location; 781 switch (v3d->vtx->pipe[driver_location].src_format) { 782 case PIPE_FORMAT_B8G8R8A8_UNORM: 783 case PIPE_FORMAT_B10G10R10A2_UNORM: 784 case PIPE_FORMAT_B10G10R10A2_SNORM: 785 case PIPE_FORMAT_B10G10R10A2_USCALED: 786 case PIPE_FORMAT_B10G10R10A2_SSCALED: 787 key->va_swap_rb_mask |= 1 << location; 788 break; 789 default: 790 break; 791 } 792 } 793 794 struct v3d_compiled_shader *vs = 795 v3d_get_compiled_shader(v3d, &key->base, sizeof(*key)); 796 if (vs != v3d->prog.vs) { 797 v3d->prog.vs = vs; 798 v3d->dirty |= V3D_DIRTY_COMPILED_VS; 799 } 800 801 key->is_coord = true; 802 803 /* Coord shaders only output varyings used by transform feedback, 804 * unless they are linked to other shaders in the geometry side 805 * of the pipeline, since in that case any of the output varyings 806 * could be required in later geometry stages to compute 807 * gl_Position or TF outputs. 808 */ 809 if (!v3d->prog.bind_gs) { 810 struct v3d_uncompiled_shader *shader_state = 811 key->base.shader_state; 812 memcpy(key->used_outputs, shader_state->tf_outputs, 813 sizeof(*key->used_outputs) * 814 shader_state->num_tf_outputs); 815 if (shader_state->num_tf_outputs < key->num_used_outputs) { 816 uint32_t tail_bytes = 817 sizeof(*key->used_outputs) * 818 (key->num_used_outputs - 819 shader_state->num_tf_outputs); 820 memset(&key->used_outputs[shader_state->num_tf_outputs], 821 0, tail_bytes); 822 } 823 key->num_used_outputs = shader_state->num_tf_outputs; 824 } else { 825 key->num_used_outputs = v3d->prog.gs_bin->prog_data.gs->num_inputs; 826 STATIC_ASSERT(sizeof(key->used_outputs) == 827 sizeof(v3d->prog.gs_bin->prog_data.gs->input_slots)); 828 memcpy(key->used_outputs, v3d->prog.gs_bin->prog_data.gs->input_slots, 829 sizeof(key->used_outputs)); 830 } 831 832 struct v3d_compiled_shader *cs = 833 v3d_get_compiled_shader(v3d, &key->base, sizeof(*key)); 834 if (cs != v3d->prog.cs) { 835 v3d->prog.cs = cs; 836 v3d->dirty |= V3D_DIRTY_COMPILED_CS; 837 } 838} 839 840void 841v3d_update_compiled_shaders(struct v3d_context *v3d, uint8_t prim_mode) 842{ 843 v3d_update_compiled_fs(v3d, prim_mode); 844 v3d_update_compiled_gs(v3d, prim_mode); 845 v3d_update_compiled_vs(v3d, prim_mode); 846} 847 848void 849v3d_update_compiled_cs(struct v3d_context *v3d) 850{ 851 struct v3d_key local_key; 852 struct v3d_key *key = &local_key; 853 854 if (!(v3d->dirty & (V3D_DIRTY_UNCOMPILED_CS | 855 V3D_DIRTY_COMPTEX))) { 856 return; 857 } 858 859 memset(key, 0, sizeof(*key)); 860 v3d_setup_shared_key(v3d, key, &v3d->tex[PIPE_SHADER_COMPUTE]); 861 key->shader_state = v3d->prog.bind_compute; 862 863 struct v3d_compiled_shader *cs = 864 v3d_get_compiled_shader(v3d, key, sizeof(*key)); 865 if (cs != v3d->prog.compute) { 866 v3d->prog.compute = cs; 867 v3d->dirty |= V3D_DIRTY_COMPILED_CS; /* XXX */ 868 } 869} 870 871static uint32_t 872fs_cache_hash(const void *key) 873{ 874 return _mesa_hash_data(key, sizeof(struct v3d_fs_key)); 875} 876 877static uint32_t 878gs_cache_hash(const void *key) 879{ 880 return _mesa_hash_data(key, sizeof(struct v3d_gs_key)); 881} 882 883static uint32_t 884vs_cache_hash(const void *key) 885{ 886 return _mesa_hash_data(key, sizeof(struct v3d_vs_key)); 887} 888 889static uint32_t 890cs_cache_hash(const void *key) 891{ 892 return _mesa_hash_data(key, sizeof(struct v3d_key)); 893} 894 895static bool 896fs_cache_compare(const void *key1, const void *key2) 897{ 898 return memcmp(key1, key2, sizeof(struct v3d_fs_key)) == 0; 899} 900 901static bool 902gs_cache_compare(const void *key1, const void *key2) 903{ 904 return memcmp(key1, key2, sizeof(struct v3d_gs_key)) == 0; 905} 906 907static bool 908vs_cache_compare(const void *key1, const void *key2) 909{ 910 return memcmp(key1, key2, sizeof(struct v3d_vs_key)) == 0; 911} 912 913static bool 914cs_cache_compare(const void *key1, const void *key2) 915{ 916 return memcmp(key1, key2, sizeof(struct v3d_key)) == 0; 917} 918 919static void 920v3d_shader_state_delete(struct pipe_context *pctx, void *hwcso) 921{ 922 struct v3d_context *v3d = v3d_context(pctx); 923 struct v3d_uncompiled_shader *so = hwcso; 924 nir_shader *s = so->base.ir.nir; 925 926 hash_table_foreach(v3d->prog.cache[s->info.stage], entry) { 927 const struct v3d_key *key = entry->key; 928 struct v3d_compiled_shader *shader = entry->data; 929 930 if (key->shader_state != so) 931 continue; 932 933 if (v3d->prog.fs == shader) 934 v3d->prog.fs = NULL; 935 if (v3d->prog.vs == shader) 936 v3d->prog.vs = NULL; 937 if (v3d->prog.cs == shader) 938 v3d->prog.cs = NULL; 939 if (v3d->prog.compute == shader) 940 v3d->prog.compute = NULL; 941 942 _mesa_hash_table_remove(v3d->prog.cache[s->info.stage], entry); 943 v3d_free_compiled_shader(shader); 944 } 945 946 ralloc_free(so->base.ir.nir); 947 free(so); 948} 949 950static void 951v3d_fp_state_bind(struct pipe_context *pctx, void *hwcso) 952{ 953 struct v3d_context *v3d = v3d_context(pctx); 954 v3d->prog.bind_fs = hwcso; 955 v3d->dirty |= V3D_DIRTY_UNCOMPILED_FS; 956} 957 958static void 959v3d_gp_state_bind(struct pipe_context *pctx, void *hwcso) 960{ 961 struct v3d_context *v3d = v3d_context(pctx); 962 v3d->prog.bind_gs = hwcso; 963 v3d->dirty |= V3D_DIRTY_UNCOMPILED_GS; 964} 965 966static void 967v3d_vp_state_bind(struct pipe_context *pctx, void *hwcso) 968{ 969 struct v3d_context *v3d = v3d_context(pctx); 970 v3d->prog.bind_vs = hwcso; 971 v3d->dirty |= V3D_DIRTY_UNCOMPILED_VS; 972} 973 974static void 975v3d_compute_state_bind(struct pipe_context *pctx, void *state) 976{ 977 struct v3d_context *v3d = v3d_context(pctx); 978 979 v3d->prog.bind_compute = state; 980 v3d->dirty |= V3D_DIRTY_UNCOMPILED_CS; 981} 982 983static void * 984v3d_create_compute_state(struct pipe_context *pctx, 985 const struct pipe_compute_state *cso) 986{ 987 return v3d_uncompiled_shader_create(pctx, cso->ir_type, 988 (void *)cso->prog); 989} 990 991void 992v3d_program_init(struct pipe_context *pctx) 993{ 994 struct v3d_context *v3d = v3d_context(pctx); 995 996 pctx->create_vs_state = v3d_shader_state_create; 997 pctx->delete_vs_state = v3d_shader_state_delete; 998 999 pctx->create_gs_state = v3d_shader_state_create; 1000 pctx->delete_gs_state = v3d_shader_state_delete; 1001 1002 pctx->create_fs_state = v3d_shader_state_create; 1003 pctx->delete_fs_state = v3d_shader_state_delete; 1004 1005 pctx->bind_fs_state = v3d_fp_state_bind; 1006 pctx->bind_gs_state = v3d_gp_state_bind; 1007 pctx->bind_vs_state = v3d_vp_state_bind; 1008 1009 if (v3d->screen->has_csd) { 1010 pctx->create_compute_state = v3d_create_compute_state; 1011 pctx->delete_compute_state = v3d_shader_state_delete; 1012 pctx->bind_compute_state = v3d_compute_state_bind; 1013 } 1014 1015 v3d->prog.cache[MESA_SHADER_VERTEX] = 1016 _mesa_hash_table_create(pctx, vs_cache_hash, vs_cache_compare); 1017 v3d->prog.cache[MESA_SHADER_GEOMETRY] = 1018 _mesa_hash_table_create(pctx, gs_cache_hash, gs_cache_compare); 1019 v3d->prog.cache[MESA_SHADER_FRAGMENT] = 1020 _mesa_hash_table_create(pctx, fs_cache_hash, fs_cache_compare); 1021 v3d->prog.cache[MESA_SHADER_COMPUTE] = 1022 _mesa_hash_table_create(pctx, cs_cache_hash, cs_cache_compare); 1023} 1024 1025void 1026v3d_program_fini(struct pipe_context *pctx) 1027{ 1028 struct v3d_context *v3d = v3d_context(pctx); 1029 1030 for (int i = 0; i < MESA_SHADER_STAGES; i++) { 1031 struct hash_table *cache = v3d->prog.cache[i]; 1032 if (!cache) 1033 continue; 1034 1035 hash_table_foreach(cache, entry) { 1036 struct v3d_compiled_shader *shader = entry->data; 1037 v3d_free_compiled_shader(shader); 1038 _mesa_hash_table_remove(cache, entry); 1039 } 1040 } 1041 1042 v3d_bo_unreference(&v3d->prog.spill_bo); 1043} 1044