1/* 2 * Copyright © 2019 Raspberry Pi Ltd 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "v3dv_private.h" 25#include "vulkan/util/vk_util.h" 26#include "util/blob.h" 27#include "nir/nir_serialize.h" 28 29static const bool debug_cache = false; 30static const bool dump_stats = false; 31static const bool dump_stats_on_destroy = false; 32 33/* Shared for nir/variants */ 34#define V3DV_MAX_PIPELINE_CACHE_ENTRIES 4096 35 36static uint32_t 37sha1_hash_func(const void *sha1) 38{ 39 return _mesa_hash_data(sha1, 20); 40} 41 42static bool 43sha1_compare_func(const void *sha1_a, const void *sha1_b) 44{ 45 return memcmp(sha1_a, sha1_b, 20) == 0; 46} 47 48struct serialized_nir { 49 unsigned char sha1_key[20]; 50 size_t size; 51 char data[0]; 52}; 53 54static void 55cache_dump_stats(struct v3dv_pipeline_cache *cache) 56{ 57 fprintf(stderr, " NIR cache entries: %d\n", cache->nir_stats.count); 58 fprintf(stderr, " NIR cache miss count: %d\n", cache->nir_stats.miss); 59 fprintf(stderr, " NIR cache hit count: %d\n", cache->nir_stats.hit); 60 61 fprintf(stderr, " cache entries: %d\n", cache->stats.count); 62 fprintf(stderr, " cache miss count: %d\n", cache->stats.miss); 63 fprintf(stderr, " cache hit count: %d\n", cache->stats.hit); 64 65 fprintf(stderr, " on-disk cache hit count: %d\n", cache->stats.on_disk_hit); 66} 67 68static void 69pipeline_cache_lock(struct v3dv_pipeline_cache *cache) 70{ 71 if (!cache->externally_synchronized) 72 mtx_lock(&cache->mutex); 73} 74 75static void 76pipeline_cache_unlock(struct v3dv_pipeline_cache *cache) 77{ 78 if (!cache->externally_synchronized) 79 mtx_unlock(&cache->mutex); 80} 81 82void 83v3dv_pipeline_cache_upload_nir(struct v3dv_pipeline *pipeline, 84 struct v3dv_pipeline_cache *cache, 85 nir_shader *nir, 86 unsigned char sha1_key[20]) 87{ 88 if (!cache || !cache->nir_cache) 89 return; 90 91 if (cache->nir_stats.count > V3DV_MAX_PIPELINE_CACHE_ENTRIES) 92 return; 93 94 pipeline_cache_lock(cache); 95 struct hash_entry *entry = 96 _mesa_hash_table_search(cache->nir_cache, sha1_key); 97 pipeline_cache_unlock(cache); 98 if (entry) 99 return; 100 101 struct blob blob; 102 blob_init(&blob); 103 104 nir_serialize(&blob, nir, false); 105 if (blob.out_of_memory) { 106 blob_finish(&blob); 107 return; 108 } 109 110 pipeline_cache_lock(cache); 111 /* Because ralloc isn't thread-safe, we have to do all this inside the 112 * lock. We could unlock for the big memcpy but it's probably not worth 113 * the hassle. 114 */ 115 entry = _mesa_hash_table_search(cache->nir_cache, sha1_key); 116 if (entry) { 117 blob_finish(&blob); 118 pipeline_cache_unlock(cache); 119 return; 120 } 121 122 struct serialized_nir *snir = 123 ralloc_size(cache->nir_cache, sizeof(*snir) + blob.size); 124 memcpy(snir->sha1_key, sha1_key, 20); 125 snir->size = blob.size; 126 memcpy(snir->data, blob.data, blob.size); 127 128 blob_finish(&blob); 129 130 cache->nir_stats.count++; 131 if (debug_cache) { 132 char sha1buf[41]; 133 _mesa_sha1_format(sha1buf, snir->sha1_key); 134 fprintf(stderr, "pipeline cache %p, new nir entry %s\n", cache, sha1buf); 135 if (dump_stats) 136 cache_dump_stats(cache); 137 } 138 139 _mesa_hash_table_insert(cache->nir_cache, snir->sha1_key, snir); 140 141 pipeline_cache_unlock(cache); 142} 143 144nir_shader* 145v3dv_pipeline_cache_search_for_nir(struct v3dv_pipeline *pipeline, 146 struct v3dv_pipeline_cache *cache, 147 const nir_shader_compiler_options *nir_options, 148 unsigned char sha1_key[20]) 149{ 150 if (!cache || !cache->nir_cache) 151 return NULL; 152 153 if (debug_cache) { 154 char sha1buf[41]; 155 _mesa_sha1_format(sha1buf, sha1_key); 156 157 fprintf(stderr, "pipeline cache %p, search for nir %s\n", cache, sha1buf); 158 } 159 160 const struct serialized_nir *snir = NULL; 161 162 pipeline_cache_lock(cache); 163 struct hash_entry *entry = 164 _mesa_hash_table_search(cache->nir_cache, sha1_key); 165 if (entry) 166 snir = entry->data; 167 pipeline_cache_unlock(cache); 168 169 if (snir) { 170 struct blob_reader blob; 171 blob_reader_init(&blob, snir->data, snir->size); 172 173 /* We use context NULL as we want the p_stage to keep the reference to 174 * nir, as we keep open the possibility of provide a shader variant 175 * after cache creation 176 */ 177 nir_shader *nir = nir_deserialize(NULL, nir_options, &blob); 178 if (blob.overrun) { 179 ralloc_free(nir); 180 } else { 181 cache->nir_stats.hit++; 182 if (debug_cache) { 183 fprintf(stderr, "[v3dv nir cache] hit: %p\n", nir); 184 if (dump_stats) 185 cache_dump_stats(cache); 186 } 187 return nir; 188 } 189 } 190 191 cache->nir_stats.miss++; 192 if (debug_cache) { 193 fprintf(stderr, "[v3dv nir cache] miss\n"); 194 if (dump_stats) 195 cache_dump_stats(cache); 196 } 197 198 return NULL; 199} 200 201void 202v3dv_pipeline_cache_init(struct v3dv_pipeline_cache *cache, 203 struct v3dv_device *device, 204 VkPipelineCacheCreateFlags flags, 205 bool cache_enabled) 206{ 207 cache->device = device; 208 mtx_init(&cache->mutex, mtx_plain); 209 210 if (cache_enabled) { 211 cache->nir_cache = _mesa_hash_table_create(NULL, sha1_hash_func, 212 sha1_compare_func); 213 cache->nir_stats.miss = 0; 214 cache->nir_stats.hit = 0; 215 cache->nir_stats.count = 0; 216 217 cache->cache = _mesa_hash_table_create(NULL, sha1_hash_func, 218 sha1_compare_func); 219 cache->stats.miss = 0; 220 cache->stats.hit = 0; 221 cache->stats.count = 0; 222 223 cache->externally_synchronized = flags & 224 VK_PIPELINE_CACHE_CREATE_EXTERNALLY_SYNCHRONIZED_BIT; 225 } else { 226 cache->nir_cache = NULL; 227 cache->cache = NULL; 228 } 229 230} 231 232static struct v3dv_pipeline_shared_data * 233v3dv_pipeline_shared_data_create_from_blob(struct v3dv_pipeline_cache *cache, 234 struct blob_reader *blob); 235 236static void 237pipeline_cache_upload_shared_data(struct v3dv_pipeline_cache *cache, 238 struct v3dv_pipeline_shared_data *shared_data, 239 bool from_disk_cache); 240 241static bool 242v3dv_pipeline_shared_data_write_to_blob(const struct v3dv_pipeline_shared_data *cache_entry, 243 struct blob *blob); 244 245/** 246 * It searchs for pipeline cached data, and returns a v3dv_pipeline_shared_data with 247 * it, or NULL if doesn't have it cached. On the former, it will increases the 248 * ref_count, so caller is responsible to unref it. 249 */ 250struct v3dv_pipeline_shared_data * 251v3dv_pipeline_cache_search_for_pipeline(struct v3dv_pipeline_cache *cache, 252 unsigned char sha1_key[20], 253 bool *cache_hit) 254{ 255 if (!cache || !cache->cache) 256 return NULL; 257 258 if (debug_cache) { 259 char sha1buf[41]; 260 _mesa_sha1_format(sha1buf, sha1_key); 261 262 fprintf(stderr, "pipeline cache %p, search pipeline with key %s\n", cache, sha1buf); 263 } 264 265 pipeline_cache_lock(cache); 266 267 struct hash_entry *entry = 268 _mesa_hash_table_search(cache->cache, sha1_key); 269 270 if (entry) { 271 struct v3dv_pipeline_shared_data *cache_entry = 272 (struct v3dv_pipeline_shared_data *) entry->data; 273 assert(cache_entry); 274 275 cache->stats.hit++; 276 *cache_hit = true; 277 if (debug_cache) { 278 fprintf(stderr, "[v3dv cache] hit: %p\n", cache_entry); 279 if (dump_stats) 280 cache_dump_stats(cache); 281 } 282 283 284 v3dv_pipeline_shared_data_ref(cache_entry); 285 286 pipeline_cache_unlock(cache); 287 288 return cache_entry; 289 } 290 291 cache->stats.miss++; 292 if (debug_cache) { 293 fprintf(stderr, "[v3dv cache] miss\n"); 294 if (dump_stats) 295 cache_dump_stats(cache); 296 } 297 298 pipeline_cache_unlock(cache); 299 300#ifdef ENABLE_SHADER_CACHE 301 struct v3dv_device *device = cache->device; 302 struct disk_cache *disk_cache = device->pdevice->disk_cache; 303 /* Note that the on-disk-cache can be independently disabled, while keeping 304 * the pipeline cache working, by using the environment variable 305 * MESA_SHADER_CACHE_DISABLE. In that case the calls to disk_cache_put/get 306 * will not do anything. 307 */ 308 if (disk_cache && device->instance->pipeline_cache_enabled) { 309 cache_key cache_key; 310 disk_cache_compute_key(disk_cache, sha1_key, 20, cache_key); 311 312 size_t buffer_size; 313 uint8_t *buffer = disk_cache_get(disk_cache, cache_key, &buffer_size); 314 if (unlikely(V3D_DEBUG & V3D_DEBUG_CACHE)) { 315 char sha1buf[41]; 316 _mesa_sha1_format(sha1buf, cache_key); 317 fprintf(stderr, "[v3dv on-disk cache] %s %s\n", 318 buffer ? "hit" : "miss", 319 sha1buf); 320 } 321 322 if (buffer) { 323 struct blob_reader blob; 324 struct v3dv_pipeline_shared_data *shared_data; 325 326 blob_reader_init(&blob, buffer, buffer_size); 327 shared_data = v3dv_pipeline_shared_data_create_from_blob(cache, &blob); 328 free(buffer); 329 330 if (shared_data) { 331 /* Technically we could increase on_disk_hit as soon as we have a 332 * buffer, but we are more interested on hits that got a valid 333 * shared_data 334 */ 335 cache->stats.on_disk_hit++; 336 if (cache) 337 pipeline_cache_upload_shared_data(cache, shared_data, true); 338 return shared_data; 339 } 340 } 341 } 342#endif 343 344 return NULL; 345} 346 347void 348v3dv_pipeline_shared_data_destroy(struct v3dv_device *device, 349 struct v3dv_pipeline_shared_data *shared_data) 350{ 351 assert(shared_data->ref_cnt == 0); 352 353 for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) { 354 if (shared_data->variants[stage] != NULL) 355 v3dv_shader_variant_destroy(device, shared_data->variants[stage]); 356 357 /* We don't free binning descriptor maps as we are sharing them 358 * with the render shaders. 359 */ 360 if (shared_data->maps[stage] != NULL && 361 !broadcom_shader_stage_is_binning(stage)) { 362 vk_free(&device->vk.alloc, shared_data->maps[stage]); 363 } 364 } 365 366 if (shared_data->assembly_bo) 367 v3dv_bo_free(device, shared_data->assembly_bo); 368 369 vk_free(&device->vk.alloc, shared_data); 370} 371 372static struct v3dv_pipeline_shared_data * 373v3dv_pipeline_shared_data_new(struct v3dv_pipeline_cache *cache, 374 const unsigned char sha1_key[20], 375 struct v3dv_descriptor_maps **maps, 376 struct v3dv_shader_variant **variants, 377 const uint64_t *total_assembly, 378 const uint32_t total_assembly_size) 379{ 380 size_t size = sizeof(struct v3dv_pipeline_shared_data); 381 /* We create new_entry using the device alloc. Right now shared_data is ref 382 * and unref by both the pipeline and the pipeline cache, so we can't 383 * ensure that the cache or pipeline alloc will be available on the last 384 * unref. 385 */ 386 struct v3dv_pipeline_shared_data *new_entry = 387 vk_zalloc2(&cache->device->vk.alloc, NULL, size, 8, 388 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 389 390 if (new_entry == NULL) 391 return NULL; 392 393 new_entry->ref_cnt = 1; 394 memcpy(new_entry->sha1_key, sha1_key, 20); 395 396 for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) { 397 new_entry->maps[stage] = maps[stage]; 398 new_entry->variants[stage] = variants[stage]; 399 } 400 401 struct v3dv_bo *bo = v3dv_bo_alloc(cache->device, total_assembly_size, 402 "pipeline shader assembly", true); 403 if (!bo) { 404 fprintf(stderr, "failed to allocate memory for shaders assembly\n"); 405 goto fail; 406 } 407 408 bool ok = v3dv_bo_map(cache->device, bo, total_assembly_size); 409 if (!ok) { 410 fprintf(stderr, "failed to map source shader buffer\n"); 411 goto fail; 412 } 413 414 memcpy(bo->map, total_assembly, total_assembly_size); 415 416 new_entry->assembly_bo = bo; 417 418 return new_entry; 419 420fail: 421 v3dv_pipeline_shared_data_unref(cache->device, new_entry); 422 return NULL; 423} 424 425static void 426pipeline_cache_upload_shared_data(struct v3dv_pipeline_cache *cache, 427 struct v3dv_pipeline_shared_data *shared_data, 428 bool from_disk_cache) 429{ 430 assert(shared_data); 431 432 if (!cache || !cache->cache) 433 return; 434 435 if (cache->stats.count > V3DV_MAX_PIPELINE_CACHE_ENTRIES) 436 return; 437 438 pipeline_cache_lock(cache); 439 struct hash_entry *entry = NULL; 440 441 /* If this is being called from the disk cache, we already know that the 442 * entry is not on the hash table 443 */ 444 if (!from_disk_cache) 445 entry = _mesa_hash_table_search(cache->cache, shared_data->sha1_key); 446 447 if (entry) { 448 pipeline_cache_unlock(cache); 449 return; 450 } 451 452 v3dv_pipeline_shared_data_ref(shared_data); 453 _mesa_hash_table_insert(cache->cache, shared_data->sha1_key, shared_data); 454 cache->stats.count++; 455 if (debug_cache) { 456 char sha1buf[41]; 457 _mesa_sha1_format(sha1buf, shared_data->sha1_key); 458 459 fprintf(stderr, "pipeline cache %p, new cache entry with sha1 key %s:%p\n\n", 460 cache, sha1buf, shared_data); 461 if (dump_stats) 462 cache_dump_stats(cache); 463 } 464 465 pipeline_cache_unlock(cache); 466 467#ifdef ENABLE_SHADER_CACHE 468 /* If we are being called from a on-disk-cache hit, we can skip writing to 469 * the disk cache 470 */ 471 if (from_disk_cache) 472 return; 473 474 struct v3dv_device *device = cache->device; 475 struct disk_cache *disk_cache = device->pdevice->disk_cache; 476 if (disk_cache) { 477 struct blob binary; 478 blob_init(&binary); 479 if (v3dv_pipeline_shared_data_write_to_blob(shared_data, &binary)) { 480 cache_key cache_key; 481 disk_cache_compute_key(disk_cache, shared_data->sha1_key, 20, cache_key); 482 483 if (unlikely(V3D_DEBUG & V3D_DEBUG_CACHE)) { 484 char sha1buf[41]; 485 _mesa_sha1_format(sha1buf, shared_data->sha1_key); 486 fprintf(stderr, "[v3dv on-disk cache] storing %s\n", sha1buf); 487 } 488 disk_cache_put(disk_cache, cache_key, binary.data, binary.size, NULL); 489 } 490 491 blob_finish(&binary); 492 } 493#endif 494} 495 496/* Uploads all the "cacheable" or shared data from the pipeline */ 497void 498v3dv_pipeline_cache_upload_pipeline(struct v3dv_pipeline *pipeline, 499 struct v3dv_pipeline_cache *cache) 500{ 501 pipeline_cache_upload_shared_data(cache, pipeline->shared_data, false); 502} 503 504static struct serialized_nir* 505serialized_nir_create_from_blob(struct v3dv_pipeline_cache *cache, 506 struct blob_reader *blob) 507{ 508 const unsigned char *sha1_key = blob_read_bytes(blob, 20); 509 uint32_t snir_size = blob_read_uint32(blob); 510 const char* snir_data = blob_read_bytes(blob, snir_size); 511 if (blob->overrun) 512 return NULL; 513 514 struct serialized_nir *snir = 515 ralloc_size(cache->nir_cache, sizeof(*snir) + snir_size); 516 memcpy(snir->sha1_key, sha1_key, 20); 517 snir->size = snir_size; 518 memcpy(snir->data, snir_data, snir_size); 519 520 return snir; 521} 522 523static struct v3dv_shader_variant* 524shader_variant_create_from_blob(struct v3dv_device *device, 525 struct blob_reader *blob) 526{ 527 VkResult result; 528 529 enum broadcom_shader_stage stage = blob_read_uint32(blob); 530 531 uint32_t prog_data_size = blob_read_uint32(blob); 532 /* FIXME: as we include the stage perhaps we can avoid prog_data_size? */ 533 assert(prog_data_size == v3d_prog_data_size(broadcom_shader_stage_to_gl(stage))); 534 535 const void *prog_data = blob_read_bytes(blob, prog_data_size); 536 if (blob->overrun) 537 return NULL; 538 539 uint32_t ulist_count = blob_read_uint32(blob); 540 uint32_t contents_size = sizeof(enum quniform_contents) * ulist_count; 541 const void *contents_data = blob_read_bytes(blob, contents_size); 542 if (blob->overrun) 543 return NULL; 544 545 uint ulist_data_size = sizeof(uint32_t) * ulist_count; 546 const void *ulist_data_data = blob_read_bytes(blob, ulist_data_size); 547 if (blob->overrun) 548 return NULL; 549 550 uint32_t assembly_offset = blob_read_uint32(blob); 551 uint32_t qpu_insts_size = blob_read_uint32(blob); 552 553 /* shader_variant_create expects a newly created prog_data for their own, 554 * as it is what the v3d compiler returns. So we are also allocating one 555 * (including the uniform list) and filled it up with the data that we read 556 * from the blob 557 */ 558 struct v3d_prog_data *new_prog_data = rzalloc_size(NULL, prog_data_size); 559 memcpy(new_prog_data, prog_data, prog_data_size); 560 struct v3d_uniform_list *ulist = &new_prog_data->uniforms; 561 ulist->count = ulist_count; 562 ulist->contents = ralloc_array(new_prog_data, enum quniform_contents, ulist->count); 563 memcpy(ulist->contents, contents_data, contents_size); 564 ulist->data = ralloc_array(new_prog_data, uint32_t, ulist->count); 565 memcpy(ulist->data, ulist_data_data, ulist_data_size); 566 567 return v3dv_shader_variant_create(device, stage, 568 new_prog_data, prog_data_size, 569 assembly_offset, 570 NULL, qpu_insts_size, 571 &result); 572} 573 574static struct v3dv_pipeline_shared_data * 575v3dv_pipeline_shared_data_create_from_blob(struct v3dv_pipeline_cache *cache, 576 struct blob_reader *blob) 577{ 578 const unsigned char *sha1_key = blob_read_bytes(blob, 20); 579 580 struct v3dv_descriptor_maps *maps[BROADCOM_SHADER_STAGES] = { 0 }; 581 struct v3dv_shader_variant *variants[BROADCOM_SHADER_STAGES] = { 0 }; 582 583 uint8_t descriptor_maps_count = blob_read_uint8(blob); 584 for (uint8_t count = 0; count < descriptor_maps_count; count++) { 585 uint8_t stage = blob_read_uint8(blob); 586 587 const struct v3dv_descriptor_maps *current_maps = 588 blob_read_bytes(blob, sizeof(struct v3dv_descriptor_maps)); 589 590 if (blob->overrun) 591 goto fail; 592 593 maps[stage] = vk_zalloc2(&cache->device->vk.alloc, NULL, 594 sizeof(struct v3dv_descriptor_maps), 8, 595 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 596 597 if (maps[stage] == NULL) 598 goto fail; 599 600 memcpy(maps[stage], current_maps, sizeof(struct v3dv_descriptor_maps)); 601 if (broadcom_shader_stage_is_render_with_binning(stage)) { 602 enum broadcom_shader_stage bin_stage = 603 broadcom_binning_shader_stage_for_render_stage(stage); 604 maps[bin_stage] = maps[stage]; 605 } 606 } 607 608 uint8_t variant_count = blob_read_uint8(blob); 609 610 for (uint8_t count = 0; count < variant_count; count++) { 611 uint8_t stage = blob_read_uint8(blob); 612 struct v3dv_shader_variant *variant = 613 shader_variant_create_from_blob(cache->device, blob); 614 variants[stage] = variant; 615 } 616 617 uint32_t total_assembly_size = blob_read_uint32(blob); 618 const uint64_t *total_assembly = 619 blob_read_bytes(blob, total_assembly_size); 620 621 if (blob->overrun) 622 goto fail; 623 624 struct v3dv_pipeline_shared_data *data = 625 v3dv_pipeline_shared_data_new(cache, sha1_key, maps, variants, 626 total_assembly, total_assembly_size); 627 628 if (!data) 629 goto fail; 630 631 return data; 632 633fail: 634 for (int i = 0; i < BROADCOM_SHADER_STAGES; i++) { 635 if (maps[i]) 636 vk_free2(&cache->device->vk.alloc, NULL, maps[i]); 637 if (variants[i]) 638 v3dv_shader_variant_destroy(cache->device, variants[i]); 639 } 640 return NULL; 641} 642 643static void 644pipeline_cache_load(struct v3dv_pipeline_cache *cache, 645 size_t size, 646 const void *data) 647{ 648 struct v3dv_device *device = cache->device; 649 struct v3dv_physical_device *pdevice = &device->instance->physicalDevice; 650 struct vk_pipeline_cache_header header; 651 652 if (cache->cache == NULL || cache->nir_cache == NULL) 653 return; 654 655 struct blob_reader blob; 656 blob_reader_init(&blob, data, size); 657 658 blob_copy_bytes(&blob, &header, sizeof(header)); 659 if (size < sizeof(header)) 660 return; 661 memcpy(&header, data, sizeof(header)); 662 if (header.header_size < sizeof(header)) 663 return; 664 if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE) 665 return; 666 if (header.vendor_id != v3dv_physical_device_vendor_id(pdevice)) 667 return; 668 if (header.device_id != v3dv_physical_device_device_id(pdevice)) 669 return; 670 if (memcmp(header.uuid, pdevice->pipeline_cache_uuid, VK_UUID_SIZE) != 0) 671 return; 672 673 uint32_t nir_count = blob_read_uint32(&blob); 674 if (blob.overrun) 675 return; 676 677 for (uint32_t i = 0; i < nir_count; i++) { 678 struct serialized_nir *snir = 679 serialized_nir_create_from_blob(cache, &blob); 680 681 if (!snir) 682 break; 683 684 _mesa_hash_table_insert(cache->nir_cache, snir->sha1_key, snir); 685 cache->nir_stats.count++; 686 } 687 688 uint32_t count = blob_read_uint32(&blob); 689 if (blob.overrun) 690 return; 691 692 for (uint32_t i = 0; i < count; i++) { 693 struct v3dv_pipeline_shared_data *cache_entry = 694 v3dv_pipeline_shared_data_create_from_blob(cache, &blob); 695 if (!cache_entry) 696 break; 697 698 _mesa_hash_table_insert(cache->cache, cache_entry->sha1_key, cache_entry); 699 cache->stats.count++; 700 } 701 702 if (debug_cache) { 703 fprintf(stderr, "pipeline cache %p, loaded %i nir shaders and " 704 "%i entries\n", cache, nir_count, count); 705 if (dump_stats) 706 cache_dump_stats(cache); 707 } 708} 709 710VKAPI_ATTR VkResult VKAPI_CALL 711v3dv_CreatePipelineCache(VkDevice _device, 712 const VkPipelineCacheCreateInfo *pCreateInfo, 713 const VkAllocationCallbacks *pAllocator, 714 VkPipelineCache *pPipelineCache) 715{ 716 V3DV_FROM_HANDLE(v3dv_device, device, _device); 717 struct v3dv_pipeline_cache *cache; 718 719 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO); 720 721 cache = vk_object_zalloc(&device->vk, pAllocator, 722 sizeof(*cache), 723 VK_OBJECT_TYPE_PIPELINE_CACHE); 724 725 if (cache == NULL) 726 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 727 728 v3dv_pipeline_cache_init(cache, device, pCreateInfo->flags, 729 device->instance->pipeline_cache_enabled); 730 731 if (pCreateInfo->initialDataSize > 0) { 732 pipeline_cache_load(cache, 733 pCreateInfo->initialDataSize, 734 pCreateInfo->pInitialData); 735 } 736 737 *pPipelineCache = v3dv_pipeline_cache_to_handle(cache); 738 739 return VK_SUCCESS; 740} 741 742void 743v3dv_pipeline_cache_finish(struct v3dv_pipeline_cache *cache) 744{ 745 mtx_destroy(&cache->mutex); 746 747 if (dump_stats_on_destroy) 748 cache_dump_stats(cache); 749 750 if (cache->nir_cache) { 751 hash_table_foreach(cache->nir_cache, entry) 752 ralloc_free(entry->data); 753 754 _mesa_hash_table_destroy(cache->nir_cache, NULL); 755 } 756 757 if (cache->cache) { 758 hash_table_foreach(cache->cache, entry) { 759 struct v3dv_pipeline_shared_data *cache_entry = entry->data; 760 if (cache_entry) 761 v3dv_pipeline_shared_data_unref(cache->device, cache_entry); 762 } 763 764 _mesa_hash_table_destroy(cache->cache, NULL); 765 } 766} 767 768VKAPI_ATTR void VKAPI_CALL 769v3dv_DestroyPipelineCache(VkDevice _device, 770 VkPipelineCache _cache, 771 const VkAllocationCallbacks *pAllocator) 772{ 773 V3DV_FROM_HANDLE(v3dv_device, device, _device); 774 V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache); 775 776 if (!cache) 777 return; 778 779 v3dv_pipeline_cache_finish(cache); 780 781 vk_object_free(&device->vk, pAllocator, cache); 782} 783 784VKAPI_ATTR VkResult VKAPI_CALL 785v3dv_MergePipelineCaches(VkDevice device, 786 VkPipelineCache dstCache, 787 uint32_t srcCacheCount, 788 const VkPipelineCache *pSrcCaches) 789{ 790 V3DV_FROM_HANDLE(v3dv_pipeline_cache, dst, dstCache); 791 792 if (!dst->cache || !dst->nir_cache) 793 return VK_SUCCESS; 794 795 for (uint32_t i = 0; i < srcCacheCount; i++) { 796 V3DV_FROM_HANDLE(v3dv_pipeline_cache, src, pSrcCaches[i]); 797 if (!src->cache || !src->nir_cache) 798 continue; 799 800 hash_table_foreach(src->nir_cache, entry) { 801 struct serialized_nir *src_snir = entry->data; 802 assert(src_snir); 803 804 if (_mesa_hash_table_search(dst->nir_cache, src_snir->sha1_key)) 805 continue; 806 807 /* FIXME: we are using serialized nir shaders because they are 808 * convenient to create and store on the cache, but requires to do a 809 * copy here (and some other places) of the serialized NIR. Perhaps 810 * it would make sense to move to handle the NIR shaders with shared 811 * structures with ref counts, as the variants. 812 */ 813 struct serialized_nir *snir_dst = 814 ralloc_size(dst->nir_cache, sizeof(*snir_dst) + src_snir->size); 815 memcpy(snir_dst->sha1_key, src_snir->sha1_key, 20); 816 snir_dst->size = src_snir->size; 817 memcpy(snir_dst->data, src_snir->data, src_snir->size); 818 819 _mesa_hash_table_insert(dst->nir_cache, snir_dst->sha1_key, snir_dst); 820 dst->nir_stats.count++; 821 if (debug_cache) { 822 char sha1buf[41]; 823 _mesa_sha1_format(sha1buf, snir_dst->sha1_key); 824 825 fprintf(stderr, "pipeline cache %p, added nir entry %s " 826 "from pipeline cache %p\n", 827 dst, sha1buf, src); 828 if (dump_stats) 829 cache_dump_stats(dst); 830 } 831 } 832 833 hash_table_foreach(src->cache, entry) { 834 struct v3dv_pipeline_shared_data *cache_entry = entry->data; 835 assert(cache_entry); 836 837 if (_mesa_hash_table_search(dst->cache, cache_entry->sha1_key)) 838 continue; 839 840 v3dv_pipeline_shared_data_ref(cache_entry); 841 _mesa_hash_table_insert(dst->cache, cache_entry->sha1_key, cache_entry); 842 843 dst->stats.count++; 844 if (debug_cache) { 845 char sha1buf[41]; 846 _mesa_sha1_format(sha1buf, cache_entry->sha1_key); 847 848 fprintf(stderr, "pipeline cache %p, added entry %s " 849 "from pipeline cache %p\n", 850 dst, sha1buf, src); 851 if (dump_stats) 852 cache_dump_stats(dst); 853 } 854 } 855 } 856 857 return VK_SUCCESS; 858} 859 860static bool 861shader_variant_write_to_blob(const struct v3dv_shader_variant *variant, 862 struct blob *blob) 863{ 864 blob_write_uint32(blob, variant->stage); 865 866 blob_write_uint32(blob, variant->prog_data_size); 867 blob_write_bytes(blob, variant->prog_data.base, variant->prog_data_size); 868 869 struct v3d_uniform_list *ulist = &variant->prog_data.base->uniforms; 870 blob_write_uint32(blob, ulist->count); 871 blob_write_bytes(blob, ulist->contents, sizeof(enum quniform_contents) * ulist->count); 872 blob_write_bytes(blob, ulist->data, sizeof(uint32_t) * ulist->count); 873 874 blob_write_uint32(blob, variant->assembly_offset); 875 blob_write_uint32(blob, variant->qpu_insts_size); 876 877 return !blob->out_of_memory; 878} 879 880static bool 881v3dv_pipeline_shared_data_write_to_blob(const struct v3dv_pipeline_shared_data *cache_entry, 882 struct blob *blob) 883{ 884 blob_write_bytes(blob, cache_entry->sha1_key, 20); 885 886 uint8_t descriptor_maps_count = 0; 887 for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) { 888 if (broadcom_shader_stage_is_binning(stage)) 889 continue; 890 if (cache_entry->maps[stage] == NULL) 891 continue; 892 descriptor_maps_count++; 893 } 894 895 /* Compute pipelines only have one descriptor map, 896 * graphics pipelines may have 2 (VS+FS) or 3 (VS+GS+FS), since the binning 897 * stages take the descriptor map from the render stage. 898 */ 899 assert((descriptor_maps_count >= 2 && descriptor_maps_count <= 3) || 900 (descriptor_maps_count == 1 && cache_entry->variants[BROADCOM_SHADER_COMPUTE])); 901 blob_write_uint8(blob, descriptor_maps_count); 902 903 for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) { 904 if (cache_entry->maps[stage] == NULL) 905 continue; 906 if (broadcom_shader_stage_is_binning(stage)) 907 continue; 908 909 blob_write_uint8(blob, stage); 910 blob_write_bytes(blob, cache_entry->maps[stage], 911 sizeof(struct v3dv_descriptor_maps)); 912 } 913 914 uint8_t variant_count = 0; 915 for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) { 916 if (cache_entry->variants[stage] == NULL) 917 continue; 918 variant_count++; 919 } 920 921 /* Graphics pipelines with VS+FS have 3 variants, VS+GS+FS will have 5 and 922 * compute pipelines only have 1. 923 */ 924 assert((variant_count == 5 || variant_count == 3) || 925 (variant_count == 1 && cache_entry->variants[BROADCOM_SHADER_COMPUTE])); 926 blob_write_uint8(blob, variant_count); 927 928 uint32_t total_assembly_size = 0; 929 for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) { 930 if (cache_entry->variants[stage] == NULL) 931 continue; 932 933 blob_write_uint8(blob, stage); 934 if (!shader_variant_write_to_blob(cache_entry->variants[stage], blob)) 935 return false; 936 937 total_assembly_size += cache_entry->variants[stage]->qpu_insts_size; 938 } 939 blob_write_uint32(blob, total_assembly_size); 940 941 assert(cache_entry->assembly_bo->map); 942 assert(cache_entry->assembly_bo->size >= total_assembly_size); 943 blob_write_bytes(blob, cache_entry->assembly_bo->map, total_assembly_size); 944 945 return !blob->out_of_memory; 946} 947 948 949VKAPI_ATTR VkResult VKAPI_CALL 950v3dv_GetPipelineCacheData(VkDevice _device, 951 VkPipelineCache _cache, 952 size_t *pDataSize, 953 void *pData) 954{ 955 V3DV_FROM_HANDLE(v3dv_device, device, _device); 956 V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache); 957 958 struct blob blob; 959 if (pData) { 960 blob_init_fixed(&blob, pData, *pDataSize); 961 } else { 962 blob_init_fixed(&blob, NULL, SIZE_MAX); 963 } 964 965 struct v3dv_physical_device *pdevice = &device->instance->physicalDevice; 966 VkResult result = VK_INCOMPLETE; 967 968 pipeline_cache_lock(cache); 969 970 struct vk_pipeline_cache_header header = { 971 .header_size = sizeof(struct vk_pipeline_cache_header), 972 .header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE, 973 .vendor_id = v3dv_physical_device_vendor_id(pdevice), 974 .device_id = v3dv_physical_device_device_id(pdevice), 975 }; 976 memcpy(header.uuid, pdevice->pipeline_cache_uuid, VK_UUID_SIZE); 977 blob_write_bytes(&blob, &header, sizeof(header)); 978 979 uint32_t nir_count = 0; 980 intptr_t nir_count_offset = blob_reserve_uint32(&blob); 981 if (nir_count_offset < 0) { 982 *pDataSize = 0; 983 goto done; 984 } 985 986 if (cache->nir_cache) { 987 hash_table_foreach(cache->nir_cache, entry) { 988 const struct serialized_nir *snir = entry->data; 989 990 size_t save_size = blob.size; 991 992 blob_write_bytes(&blob, snir->sha1_key, 20); 993 blob_write_uint32(&blob, snir->size); 994 blob_write_bytes(&blob, snir->data, snir->size); 995 996 if (blob.out_of_memory) { 997 blob.size = save_size; 998 goto done; 999 } 1000 1001 nir_count++; 1002 } 1003 } 1004 blob_overwrite_uint32(&blob, nir_count_offset, nir_count); 1005 1006 uint32_t count = 0; 1007 intptr_t count_offset = blob_reserve_uint32(&blob); 1008 if (count_offset < 0) { 1009 *pDataSize = 0; 1010 goto done; 1011 } 1012 1013 if (cache->cache) { 1014 hash_table_foreach(cache->cache, entry) { 1015 struct v3dv_pipeline_shared_data *cache_entry = entry->data; 1016 1017 size_t save_size = blob.size; 1018 if (!v3dv_pipeline_shared_data_write_to_blob(cache_entry, &blob)) { 1019 /* If it fails reset to the previous size and bail */ 1020 blob.size = save_size; 1021 goto done; 1022 } 1023 1024 count++; 1025 } 1026 } 1027 1028 blob_overwrite_uint32(&blob, count_offset, count); 1029 1030 *pDataSize = blob.size; 1031 1032 result = VK_SUCCESS; 1033 1034 if (debug_cache) { 1035 assert(count <= cache->stats.count); 1036 fprintf(stderr, "GetPipelineCacheData: serializing cache %p, " 1037 "%i nir shader entries " 1038 "%i entries, %u DataSize\n", 1039 cache, nir_count, count, (uint32_t) *pDataSize); 1040 } 1041 1042 done: 1043 blob_finish(&blob); 1044 1045 pipeline_cache_unlock(cache); 1046 1047 return result; 1048} 1049