1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2019 Raspberry Pi Ltd 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include "v3dv_private.h" 25bf215546Sopenharmony_ci#include "vulkan/util/vk_util.h" 26bf215546Sopenharmony_ci#include "util/blob.h" 27bf215546Sopenharmony_ci#include "nir/nir_serialize.h" 28bf215546Sopenharmony_ci 29bf215546Sopenharmony_cistatic const bool debug_cache = false; 30bf215546Sopenharmony_cistatic const bool dump_stats = false; 31bf215546Sopenharmony_cistatic const bool dump_stats_on_destroy = false; 32bf215546Sopenharmony_ci 33bf215546Sopenharmony_ci/* Shared for nir/variants */ 34bf215546Sopenharmony_ci#define V3DV_MAX_PIPELINE_CACHE_ENTRIES 4096 35bf215546Sopenharmony_ci 36bf215546Sopenharmony_cistatic uint32_t 37bf215546Sopenharmony_cisha1_hash_func(const void *sha1) 38bf215546Sopenharmony_ci{ 39bf215546Sopenharmony_ci return _mesa_hash_data(sha1, 20); 40bf215546Sopenharmony_ci} 41bf215546Sopenharmony_ci 42bf215546Sopenharmony_cistatic bool 43bf215546Sopenharmony_cisha1_compare_func(const void *sha1_a, const void *sha1_b) 44bf215546Sopenharmony_ci{ 45bf215546Sopenharmony_ci return memcmp(sha1_a, sha1_b, 20) == 0; 46bf215546Sopenharmony_ci} 47bf215546Sopenharmony_ci 48bf215546Sopenharmony_cistruct serialized_nir { 49bf215546Sopenharmony_ci unsigned char sha1_key[20]; 50bf215546Sopenharmony_ci size_t size; 51bf215546Sopenharmony_ci char data[0]; 52bf215546Sopenharmony_ci}; 53bf215546Sopenharmony_ci 54bf215546Sopenharmony_cistatic void 55bf215546Sopenharmony_cicache_dump_stats(struct v3dv_pipeline_cache *cache) 56bf215546Sopenharmony_ci{ 57bf215546Sopenharmony_ci fprintf(stderr, " NIR cache entries: %d\n", cache->nir_stats.count); 58bf215546Sopenharmony_ci fprintf(stderr, " NIR cache miss count: %d\n", cache->nir_stats.miss); 59bf215546Sopenharmony_ci fprintf(stderr, " NIR cache hit count: %d\n", cache->nir_stats.hit); 60bf215546Sopenharmony_ci 61bf215546Sopenharmony_ci fprintf(stderr, " cache entries: %d\n", cache->stats.count); 62bf215546Sopenharmony_ci fprintf(stderr, " cache miss count: %d\n", cache->stats.miss); 63bf215546Sopenharmony_ci fprintf(stderr, " cache hit count: %d\n", cache->stats.hit); 64bf215546Sopenharmony_ci 65bf215546Sopenharmony_ci fprintf(stderr, " on-disk cache hit count: %d\n", cache->stats.on_disk_hit); 66bf215546Sopenharmony_ci} 67bf215546Sopenharmony_ci 68bf215546Sopenharmony_cistatic void 69bf215546Sopenharmony_cipipeline_cache_lock(struct v3dv_pipeline_cache *cache) 70bf215546Sopenharmony_ci{ 71bf215546Sopenharmony_ci if (!cache->externally_synchronized) 72bf215546Sopenharmony_ci mtx_lock(&cache->mutex); 73bf215546Sopenharmony_ci} 74bf215546Sopenharmony_ci 75bf215546Sopenharmony_cistatic void 76bf215546Sopenharmony_cipipeline_cache_unlock(struct v3dv_pipeline_cache *cache) 77bf215546Sopenharmony_ci{ 78bf215546Sopenharmony_ci if (!cache->externally_synchronized) 79bf215546Sopenharmony_ci mtx_unlock(&cache->mutex); 80bf215546Sopenharmony_ci} 81bf215546Sopenharmony_ci 82bf215546Sopenharmony_civoid 83bf215546Sopenharmony_civ3dv_pipeline_cache_upload_nir(struct v3dv_pipeline *pipeline, 84bf215546Sopenharmony_ci struct v3dv_pipeline_cache *cache, 85bf215546Sopenharmony_ci nir_shader *nir, 86bf215546Sopenharmony_ci unsigned char sha1_key[20]) 87bf215546Sopenharmony_ci{ 88bf215546Sopenharmony_ci if (!cache || !cache->nir_cache) 89bf215546Sopenharmony_ci return; 90bf215546Sopenharmony_ci 91bf215546Sopenharmony_ci if (cache->nir_stats.count > V3DV_MAX_PIPELINE_CACHE_ENTRIES) 92bf215546Sopenharmony_ci return; 93bf215546Sopenharmony_ci 94bf215546Sopenharmony_ci pipeline_cache_lock(cache); 95bf215546Sopenharmony_ci struct hash_entry *entry = 96bf215546Sopenharmony_ci _mesa_hash_table_search(cache->nir_cache, sha1_key); 97bf215546Sopenharmony_ci pipeline_cache_unlock(cache); 98bf215546Sopenharmony_ci if (entry) 99bf215546Sopenharmony_ci return; 100bf215546Sopenharmony_ci 101bf215546Sopenharmony_ci struct blob blob; 102bf215546Sopenharmony_ci blob_init(&blob); 103bf215546Sopenharmony_ci 104bf215546Sopenharmony_ci nir_serialize(&blob, nir, false); 105bf215546Sopenharmony_ci if (blob.out_of_memory) { 106bf215546Sopenharmony_ci blob_finish(&blob); 107bf215546Sopenharmony_ci return; 108bf215546Sopenharmony_ci } 109bf215546Sopenharmony_ci 110bf215546Sopenharmony_ci pipeline_cache_lock(cache); 111bf215546Sopenharmony_ci /* Because ralloc isn't thread-safe, we have to do all this inside the 112bf215546Sopenharmony_ci * lock. We could unlock for the big memcpy but it's probably not worth 113bf215546Sopenharmony_ci * the hassle. 114bf215546Sopenharmony_ci */ 115bf215546Sopenharmony_ci entry = _mesa_hash_table_search(cache->nir_cache, sha1_key); 116bf215546Sopenharmony_ci if (entry) { 117bf215546Sopenharmony_ci blob_finish(&blob); 118bf215546Sopenharmony_ci pipeline_cache_unlock(cache); 119bf215546Sopenharmony_ci return; 120bf215546Sopenharmony_ci } 121bf215546Sopenharmony_ci 122bf215546Sopenharmony_ci struct serialized_nir *snir = 123bf215546Sopenharmony_ci ralloc_size(cache->nir_cache, sizeof(*snir) + blob.size); 124bf215546Sopenharmony_ci memcpy(snir->sha1_key, sha1_key, 20); 125bf215546Sopenharmony_ci snir->size = blob.size; 126bf215546Sopenharmony_ci memcpy(snir->data, blob.data, blob.size); 127bf215546Sopenharmony_ci 128bf215546Sopenharmony_ci blob_finish(&blob); 129bf215546Sopenharmony_ci 130bf215546Sopenharmony_ci cache->nir_stats.count++; 131bf215546Sopenharmony_ci if (debug_cache) { 132bf215546Sopenharmony_ci char sha1buf[41]; 133bf215546Sopenharmony_ci _mesa_sha1_format(sha1buf, snir->sha1_key); 134bf215546Sopenharmony_ci fprintf(stderr, "pipeline cache %p, new nir entry %s\n", cache, sha1buf); 135bf215546Sopenharmony_ci if (dump_stats) 136bf215546Sopenharmony_ci cache_dump_stats(cache); 137bf215546Sopenharmony_ci } 138bf215546Sopenharmony_ci 139bf215546Sopenharmony_ci _mesa_hash_table_insert(cache->nir_cache, snir->sha1_key, snir); 140bf215546Sopenharmony_ci 141bf215546Sopenharmony_ci pipeline_cache_unlock(cache); 142bf215546Sopenharmony_ci} 143bf215546Sopenharmony_ci 144bf215546Sopenharmony_cinir_shader* 145bf215546Sopenharmony_civ3dv_pipeline_cache_search_for_nir(struct v3dv_pipeline *pipeline, 146bf215546Sopenharmony_ci struct v3dv_pipeline_cache *cache, 147bf215546Sopenharmony_ci const nir_shader_compiler_options *nir_options, 148bf215546Sopenharmony_ci unsigned char sha1_key[20]) 149bf215546Sopenharmony_ci{ 150bf215546Sopenharmony_ci if (!cache || !cache->nir_cache) 151bf215546Sopenharmony_ci return NULL; 152bf215546Sopenharmony_ci 153bf215546Sopenharmony_ci if (debug_cache) { 154bf215546Sopenharmony_ci char sha1buf[41]; 155bf215546Sopenharmony_ci _mesa_sha1_format(sha1buf, sha1_key); 156bf215546Sopenharmony_ci 157bf215546Sopenharmony_ci fprintf(stderr, "pipeline cache %p, search for nir %s\n", cache, sha1buf); 158bf215546Sopenharmony_ci } 159bf215546Sopenharmony_ci 160bf215546Sopenharmony_ci const struct serialized_nir *snir = NULL; 161bf215546Sopenharmony_ci 162bf215546Sopenharmony_ci pipeline_cache_lock(cache); 163bf215546Sopenharmony_ci struct hash_entry *entry = 164bf215546Sopenharmony_ci _mesa_hash_table_search(cache->nir_cache, sha1_key); 165bf215546Sopenharmony_ci if (entry) 166bf215546Sopenharmony_ci snir = entry->data; 167bf215546Sopenharmony_ci pipeline_cache_unlock(cache); 168bf215546Sopenharmony_ci 169bf215546Sopenharmony_ci if (snir) { 170bf215546Sopenharmony_ci struct blob_reader blob; 171bf215546Sopenharmony_ci blob_reader_init(&blob, snir->data, snir->size); 172bf215546Sopenharmony_ci 173bf215546Sopenharmony_ci /* We use context NULL as we want the p_stage to keep the reference to 174bf215546Sopenharmony_ci * nir, as we keep open the possibility of provide a shader variant 175bf215546Sopenharmony_ci * after cache creation 176bf215546Sopenharmony_ci */ 177bf215546Sopenharmony_ci nir_shader *nir = nir_deserialize(NULL, nir_options, &blob); 178bf215546Sopenharmony_ci if (blob.overrun) { 179bf215546Sopenharmony_ci ralloc_free(nir); 180bf215546Sopenharmony_ci } else { 181bf215546Sopenharmony_ci cache->nir_stats.hit++; 182bf215546Sopenharmony_ci if (debug_cache) { 183bf215546Sopenharmony_ci fprintf(stderr, "[v3dv nir cache] hit: %p\n", nir); 184bf215546Sopenharmony_ci if (dump_stats) 185bf215546Sopenharmony_ci cache_dump_stats(cache); 186bf215546Sopenharmony_ci } 187bf215546Sopenharmony_ci return nir; 188bf215546Sopenharmony_ci } 189bf215546Sopenharmony_ci } 190bf215546Sopenharmony_ci 191bf215546Sopenharmony_ci cache->nir_stats.miss++; 192bf215546Sopenharmony_ci if (debug_cache) { 193bf215546Sopenharmony_ci fprintf(stderr, "[v3dv nir cache] miss\n"); 194bf215546Sopenharmony_ci if (dump_stats) 195bf215546Sopenharmony_ci cache_dump_stats(cache); 196bf215546Sopenharmony_ci } 197bf215546Sopenharmony_ci 198bf215546Sopenharmony_ci return NULL; 199bf215546Sopenharmony_ci} 200bf215546Sopenharmony_ci 201bf215546Sopenharmony_civoid 202bf215546Sopenharmony_civ3dv_pipeline_cache_init(struct v3dv_pipeline_cache *cache, 203bf215546Sopenharmony_ci struct v3dv_device *device, 204bf215546Sopenharmony_ci VkPipelineCacheCreateFlags flags, 205bf215546Sopenharmony_ci bool cache_enabled) 206bf215546Sopenharmony_ci{ 207bf215546Sopenharmony_ci cache->device = device; 208bf215546Sopenharmony_ci mtx_init(&cache->mutex, mtx_plain); 209bf215546Sopenharmony_ci 210bf215546Sopenharmony_ci if (cache_enabled) { 211bf215546Sopenharmony_ci cache->nir_cache = _mesa_hash_table_create(NULL, sha1_hash_func, 212bf215546Sopenharmony_ci sha1_compare_func); 213bf215546Sopenharmony_ci cache->nir_stats.miss = 0; 214bf215546Sopenharmony_ci cache->nir_stats.hit = 0; 215bf215546Sopenharmony_ci cache->nir_stats.count = 0; 216bf215546Sopenharmony_ci 217bf215546Sopenharmony_ci cache->cache = _mesa_hash_table_create(NULL, sha1_hash_func, 218bf215546Sopenharmony_ci sha1_compare_func); 219bf215546Sopenharmony_ci cache->stats.miss = 0; 220bf215546Sopenharmony_ci cache->stats.hit = 0; 221bf215546Sopenharmony_ci cache->stats.count = 0; 222bf215546Sopenharmony_ci 223bf215546Sopenharmony_ci cache->externally_synchronized = flags & 224bf215546Sopenharmony_ci VK_PIPELINE_CACHE_CREATE_EXTERNALLY_SYNCHRONIZED_BIT; 225bf215546Sopenharmony_ci } else { 226bf215546Sopenharmony_ci cache->nir_cache = NULL; 227bf215546Sopenharmony_ci cache->cache = NULL; 228bf215546Sopenharmony_ci } 229bf215546Sopenharmony_ci 230bf215546Sopenharmony_ci} 231bf215546Sopenharmony_ci 232bf215546Sopenharmony_cistatic struct v3dv_pipeline_shared_data * 233bf215546Sopenharmony_civ3dv_pipeline_shared_data_create_from_blob(struct v3dv_pipeline_cache *cache, 234bf215546Sopenharmony_ci struct blob_reader *blob); 235bf215546Sopenharmony_ci 236bf215546Sopenharmony_cistatic void 237bf215546Sopenharmony_cipipeline_cache_upload_shared_data(struct v3dv_pipeline_cache *cache, 238bf215546Sopenharmony_ci struct v3dv_pipeline_shared_data *shared_data, 239bf215546Sopenharmony_ci bool from_disk_cache); 240bf215546Sopenharmony_ci 241bf215546Sopenharmony_cistatic bool 242bf215546Sopenharmony_civ3dv_pipeline_shared_data_write_to_blob(const struct v3dv_pipeline_shared_data *cache_entry, 243bf215546Sopenharmony_ci struct blob *blob); 244bf215546Sopenharmony_ci 245bf215546Sopenharmony_ci/** 246bf215546Sopenharmony_ci * It searchs for pipeline cached data, and returns a v3dv_pipeline_shared_data with 247bf215546Sopenharmony_ci * it, or NULL if doesn't have it cached. On the former, it will increases the 248bf215546Sopenharmony_ci * ref_count, so caller is responsible to unref it. 249bf215546Sopenharmony_ci */ 250bf215546Sopenharmony_cistruct v3dv_pipeline_shared_data * 251bf215546Sopenharmony_civ3dv_pipeline_cache_search_for_pipeline(struct v3dv_pipeline_cache *cache, 252bf215546Sopenharmony_ci unsigned char sha1_key[20], 253bf215546Sopenharmony_ci bool *cache_hit) 254bf215546Sopenharmony_ci{ 255bf215546Sopenharmony_ci if (!cache || !cache->cache) 256bf215546Sopenharmony_ci return NULL; 257bf215546Sopenharmony_ci 258bf215546Sopenharmony_ci if (debug_cache) { 259bf215546Sopenharmony_ci char sha1buf[41]; 260bf215546Sopenharmony_ci _mesa_sha1_format(sha1buf, sha1_key); 261bf215546Sopenharmony_ci 262bf215546Sopenharmony_ci fprintf(stderr, "pipeline cache %p, search pipeline with key %s\n", cache, sha1buf); 263bf215546Sopenharmony_ci } 264bf215546Sopenharmony_ci 265bf215546Sopenharmony_ci pipeline_cache_lock(cache); 266bf215546Sopenharmony_ci 267bf215546Sopenharmony_ci struct hash_entry *entry = 268bf215546Sopenharmony_ci _mesa_hash_table_search(cache->cache, sha1_key); 269bf215546Sopenharmony_ci 270bf215546Sopenharmony_ci if (entry) { 271bf215546Sopenharmony_ci struct v3dv_pipeline_shared_data *cache_entry = 272bf215546Sopenharmony_ci (struct v3dv_pipeline_shared_data *) entry->data; 273bf215546Sopenharmony_ci assert(cache_entry); 274bf215546Sopenharmony_ci 275bf215546Sopenharmony_ci cache->stats.hit++; 276bf215546Sopenharmony_ci *cache_hit = true; 277bf215546Sopenharmony_ci if (debug_cache) { 278bf215546Sopenharmony_ci fprintf(stderr, "[v3dv cache] hit: %p\n", cache_entry); 279bf215546Sopenharmony_ci if (dump_stats) 280bf215546Sopenharmony_ci cache_dump_stats(cache); 281bf215546Sopenharmony_ci } 282bf215546Sopenharmony_ci 283bf215546Sopenharmony_ci 284bf215546Sopenharmony_ci v3dv_pipeline_shared_data_ref(cache_entry); 285bf215546Sopenharmony_ci 286bf215546Sopenharmony_ci pipeline_cache_unlock(cache); 287bf215546Sopenharmony_ci 288bf215546Sopenharmony_ci return cache_entry; 289bf215546Sopenharmony_ci } 290bf215546Sopenharmony_ci 291bf215546Sopenharmony_ci cache->stats.miss++; 292bf215546Sopenharmony_ci if (debug_cache) { 293bf215546Sopenharmony_ci fprintf(stderr, "[v3dv cache] miss\n"); 294bf215546Sopenharmony_ci if (dump_stats) 295bf215546Sopenharmony_ci cache_dump_stats(cache); 296bf215546Sopenharmony_ci } 297bf215546Sopenharmony_ci 298bf215546Sopenharmony_ci pipeline_cache_unlock(cache); 299bf215546Sopenharmony_ci 300bf215546Sopenharmony_ci#ifdef ENABLE_SHADER_CACHE 301bf215546Sopenharmony_ci struct v3dv_device *device = cache->device; 302bf215546Sopenharmony_ci struct disk_cache *disk_cache = device->pdevice->disk_cache; 303bf215546Sopenharmony_ci /* Note that the on-disk-cache can be independently disabled, while keeping 304bf215546Sopenharmony_ci * the pipeline cache working, by using the environment variable 305bf215546Sopenharmony_ci * MESA_SHADER_CACHE_DISABLE. In that case the calls to disk_cache_put/get 306bf215546Sopenharmony_ci * will not do anything. 307bf215546Sopenharmony_ci */ 308bf215546Sopenharmony_ci if (disk_cache && device->instance->pipeline_cache_enabled) { 309bf215546Sopenharmony_ci cache_key cache_key; 310bf215546Sopenharmony_ci disk_cache_compute_key(disk_cache, sha1_key, 20, cache_key); 311bf215546Sopenharmony_ci 312bf215546Sopenharmony_ci size_t buffer_size; 313bf215546Sopenharmony_ci uint8_t *buffer = disk_cache_get(disk_cache, cache_key, &buffer_size); 314bf215546Sopenharmony_ci if (unlikely(V3D_DEBUG & V3D_DEBUG_CACHE)) { 315bf215546Sopenharmony_ci char sha1buf[41]; 316bf215546Sopenharmony_ci _mesa_sha1_format(sha1buf, cache_key); 317bf215546Sopenharmony_ci fprintf(stderr, "[v3dv on-disk cache] %s %s\n", 318bf215546Sopenharmony_ci buffer ? "hit" : "miss", 319bf215546Sopenharmony_ci sha1buf); 320bf215546Sopenharmony_ci } 321bf215546Sopenharmony_ci 322bf215546Sopenharmony_ci if (buffer) { 323bf215546Sopenharmony_ci struct blob_reader blob; 324bf215546Sopenharmony_ci struct v3dv_pipeline_shared_data *shared_data; 325bf215546Sopenharmony_ci 326bf215546Sopenharmony_ci blob_reader_init(&blob, buffer, buffer_size); 327bf215546Sopenharmony_ci shared_data = v3dv_pipeline_shared_data_create_from_blob(cache, &blob); 328bf215546Sopenharmony_ci free(buffer); 329bf215546Sopenharmony_ci 330bf215546Sopenharmony_ci if (shared_data) { 331bf215546Sopenharmony_ci /* Technically we could increase on_disk_hit as soon as we have a 332bf215546Sopenharmony_ci * buffer, but we are more interested on hits that got a valid 333bf215546Sopenharmony_ci * shared_data 334bf215546Sopenharmony_ci */ 335bf215546Sopenharmony_ci cache->stats.on_disk_hit++; 336bf215546Sopenharmony_ci if (cache) 337bf215546Sopenharmony_ci pipeline_cache_upload_shared_data(cache, shared_data, true); 338bf215546Sopenharmony_ci return shared_data; 339bf215546Sopenharmony_ci } 340bf215546Sopenharmony_ci } 341bf215546Sopenharmony_ci } 342bf215546Sopenharmony_ci#endif 343bf215546Sopenharmony_ci 344bf215546Sopenharmony_ci return NULL; 345bf215546Sopenharmony_ci} 346bf215546Sopenharmony_ci 347bf215546Sopenharmony_civoid 348bf215546Sopenharmony_civ3dv_pipeline_shared_data_destroy(struct v3dv_device *device, 349bf215546Sopenharmony_ci struct v3dv_pipeline_shared_data *shared_data) 350bf215546Sopenharmony_ci{ 351bf215546Sopenharmony_ci assert(shared_data->ref_cnt == 0); 352bf215546Sopenharmony_ci 353bf215546Sopenharmony_ci for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) { 354bf215546Sopenharmony_ci if (shared_data->variants[stage] != NULL) 355bf215546Sopenharmony_ci v3dv_shader_variant_destroy(device, shared_data->variants[stage]); 356bf215546Sopenharmony_ci 357bf215546Sopenharmony_ci /* We don't free binning descriptor maps as we are sharing them 358bf215546Sopenharmony_ci * with the render shaders. 359bf215546Sopenharmony_ci */ 360bf215546Sopenharmony_ci if (shared_data->maps[stage] != NULL && 361bf215546Sopenharmony_ci !broadcom_shader_stage_is_binning(stage)) { 362bf215546Sopenharmony_ci vk_free(&device->vk.alloc, shared_data->maps[stage]); 363bf215546Sopenharmony_ci } 364bf215546Sopenharmony_ci } 365bf215546Sopenharmony_ci 366bf215546Sopenharmony_ci if (shared_data->assembly_bo) 367bf215546Sopenharmony_ci v3dv_bo_free(device, shared_data->assembly_bo); 368bf215546Sopenharmony_ci 369bf215546Sopenharmony_ci vk_free(&device->vk.alloc, shared_data); 370bf215546Sopenharmony_ci} 371bf215546Sopenharmony_ci 372bf215546Sopenharmony_cistatic struct v3dv_pipeline_shared_data * 373bf215546Sopenharmony_civ3dv_pipeline_shared_data_new(struct v3dv_pipeline_cache *cache, 374bf215546Sopenharmony_ci const unsigned char sha1_key[20], 375bf215546Sopenharmony_ci struct v3dv_descriptor_maps **maps, 376bf215546Sopenharmony_ci struct v3dv_shader_variant **variants, 377bf215546Sopenharmony_ci const uint64_t *total_assembly, 378bf215546Sopenharmony_ci const uint32_t total_assembly_size) 379bf215546Sopenharmony_ci{ 380bf215546Sopenharmony_ci size_t size = sizeof(struct v3dv_pipeline_shared_data); 381bf215546Sopenharmony_ci /* We create new_entry using the device alloc. Right now shared_data is ref 382bf215546Sopenharmony_ci * and unref by both the pipeline and the pipeline cache, so we can't 383bf215546Sopenharmony_ci * ensure that the cache or pipeline alloc will be available on the last 384bf215546Sopenharmony_ci * unref. 385bf215546Sopenharmony_ci */ 386bf215546Sopenharmony_ci struct v3dv_pipeline_shared_data *new_entry = 387bf215546Sopenharmony_ci vk_zalloc2(&cache->device->vk.alloc, NULL, size, 8, 388bf215546Sopenharmony_ci VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 389bf215546Sopenharmony_ci 390bf215546Sopenharmony_ci if (new_entry == NULL) 391bf215546Sopenharmony_ci return NULL; 392bf215546Sopenharmony_ci 393bf215546Sopenharmony_ci new_entry->ref_cnt = 1; 394bf215546Sopenharmony_ci memcpy(new_entry->sha1_key, sha1_key, 20); 395bf215546Sopenharmony_ci 396bf215546Sopenharmony_ci for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) { 397bf215546Sopenharmony_ci new_entry->maps[stage] = maps[stage]; 398bf215546Sopenharmony_ci new_entry->variants[stage] = variants[stage]; 399bf215546Sopenharmony_ci } 400bf215546Sopenharmony_ci 401bf215546Sopenharmony_ci struct v3dv_bo *bo = v3dv_bo_alloc(cache->device, total_assembly_size, 402bf215546Sopenharmony_ci "pipeline shader assembly", true); 403bf215546Sopenharmony_ci if (!bo) { 404bf215546Sopenharmony_ci fprintf(stderr, "failed to allocate memory for shaders assembly\n"); 405bf215546Sopenharmony_ci goto fail; 406bf215546Sopenharmony_ci } 407bf215546Sopenharmony_ci 408bf215546Sopenharmony_ci bool ok = v3dv_bo_map(cache->device, bo, total_assembly_size); 409bf215546Sopenharmony_ci if (!ok) { 410bf215546Sopenharmony_ci fprintf(stderr, "failed to map source shader buffer\n"); 411bf215546Sopenharmony_ci goto fail; 412bf215546Sopenharmony_ci } 413bf215546Sopenharmony_ci 414bf215546Sopenharmony_ci memcpy(bo->map, total_assembly, total_assembly_size); 415bf215546Sopenharmony_ci 416bf215546Sopenharmony_ci new_entry->assembly_bo = bo; 417bf215546Sopenharmony_ci 418bf215546Sopenharmony_ci return new_entry; 419bf215546Sopenharmony_ci 420bf215546Sopenharmony_cifail: 421bf215546Sopenharmony_ci v3dv_pipeline_shared_data_unref(cache->device, new_entry); 422bf215546Sopenharmony_ci return NULL; 423bf215546Sopenharmony_ci} 424bf215546Sopenharmony_ci 425bf215546Sopenharmony_cistatic void 426bf215546Sopenharmony_cipipeline_cache_upload_shared_data(struct v3dv_pipeline_cache *cache, 427bf215546Sopenharmony_ci struct v3dv_pipeline_shared_data *shared_data, 428bf215546Sopenharmony_ci bool from_disk_cache) 429bf215546Sopenharmony_ci{ 430bf215546Sopenharmony_ci assert(shared_data); 431bf215546Sopenharmony_ci 432bf215546Sopenharmony_ci if (!cache || !cache->cache) 433bf215546Sopenharmony_ci return; 434bf215546Sopenharmony_ci 435bf215546Sopenharmony_ci if (cache->stats.count > V3DV_MAX_PIPELINE_CACHE_ENTRIES) 436bf215546Sopenharmony_ci return; 437bf215546Sopenharmony_ci 438bf215546Sopenharmony_ci pipeline_cache_lock(cache); 439bf215546Sopenharmony_ci struct hash_entry *entry = NULL; 440bf215546Sopenharmony_ci 441bf215546Sopenharmony_ci /* If this is being called from the disk cache, we already know that the 442bf215546Sopenharmony_ci * entry is not on the hash table 443bf215546Sopenharmony_ci */ 444bf215546Sopenharmony_ci if (!from_disk_cache) 445bf215546Sopenharmony_ci entry = _mesa_hash_table_search(cache->cache, shared_data->sha1_key); 446bf215546Sopenharmony_ci 447bf215546Sopenharmony_ci if (entry) { 448bf215546Sopenharmony_ci pipeline_cache_unlock(cache); 449bf215546Sopenharmony_ci return; 450bf215546Sopenharmony_ci } 451bf215546Sopenharmony_ci 452bf215546Sopenharmony_ci v3dv_pipeline_shared_data_ref(shared_data); 453bf215546Sopenharmony_ci _mesa_hash_table_insert(cache->cache, shared_data->sha1_key, shared_data); 454bf215546Sopenharmony_ci cache->stats.count++; 455bf215546Sopenharmony_ci if (debug_cache) { 456bf215546Sopenharmony_ci char sha1buf[41]; 457bf215546Sopenharmony_ci _mesa_sha1_format(sha1buf, shared_data->sha1_key); 458bf215546Sopenharmony_ci 459bf215546Sopenharmony_ci fprintf(stderr, "pipeline cache %p, new cache entry with sha1 key %s:%p\n\n", 460bf215546Sopenharmony_ci cache, sha1buf, shared_data); 461bf215546Sopenharmony_ci if (dump_stats) 462bf215546Sopenharmony_ci cache_dump_stats(cache); 463bf215546Sopenharmony_ci } 464bf215546Sopenharmony_ci 465bf215546Sopenharmony_ci pipeline_cache_unlock(cache); 466bf215546Sopenharmony_ci 467bf215546Sopenharmony_ci#ifdef ENABLE_SHADER_CACHE 468bf215546Sopenharmony_ci /* If we are being called from a on-disk-cache hit, we can skip writing to 469bf215546Sopenharmony_ci * the disk cache 470bf215546Sopenharmony_ci */ 471bf215546Sopenharmony_ci if (from_disk_cache) 472bf215546Sopenharmony_ci return; 473bf215546Sopenharmony_ci 474bf215546Sopenharmony_ci struct v3dv_device *device = cache->device; 475bf215546Sopenharmony_ci struct disk_cache *disk_cache = device->pdevice->disk_cache; 476bf215546Sopenharmony_ci if (disk_cache) { 477bf215546Sopenharmony_ci struct blob binary; 478bf215546Sopenharmony_ci blob_init(&binary); 479bf215546Sopenharmony_ci if (v3dv_pipeline_shared_data_write_to_blob(shared_data, &binary)) { 480bf215546Sopenharmony_ci cache_key cache_key; 481bf215546Sopenharmony_ci disk_cache_compute_key(disk_cache, shared_data->sha1_key, 20, cache_key); 482bf215546Sopenharmony_ci 483bf215546Sopenharmony_ci if (unlikely(V3D_DEBUG & V3D_DEBUG_CACHE)) { 484bf215546Sopenharmony_ci char sha1buf[41]; 485bf215546Sopenharmony_ci _mesa_sha1_format(sha1buf, shared_data->sha1_key); 486bf215546Sopenharmony_ci fprintf(stderr, "[v3dv on-disk cache] storing %s\n", sha1buf); 487bf215546Sopenharmony_ci } 488bf215546Sopenharmony_ci disk_cache_put(disk_cache, cache_key, binary.data, binary.size, NULL); 489bf215546Sopenharmony_ci } 490bf215546Sopenharmony_ci 491bf215546Sopenharmony_ci blob_finish(&binary); 492bf215546Sopenharmony_ci } 493bf215546Sopenharmony_ci#endif 494bf215546Sopenharmony_ci} 495bf215546Sopenharmony_ci 496bf215546Sopenharmony_ci/* Uploads all the "cacheable" or shared data from the pipeline */ 497bf215546Sopenharmony_civoid 498bf215546Sopenharmony_civ3dv_pipeline_cache_upload_pipeline(struct v3dv_pipeline *pipeline, 499bf215546Sopenharmony_ci struct v3dv_pipeline_cache *cache) 500bf215546Sopenharmony_ci{ 501bf215546Sopenharmony_ci pipeline_cache_upload_shared_data(cache, pipeline->shared_data, false); 502bf215546Sopenharmony_ci} 503bf215546Sopenharmony_ci 504bf215546Sopenharmony_cistatic struct serialized_nir* 505bf215546Sopenharmony_ciserialized_nir_create_from_blob(struct v3dv_pipeline_cache *cache, 506bf215546Sopenharmony_ci struct blob_reader *blob) 507bf215546Sopenharmony_ci{ 508bf215546Sopenharmony_ci const unsigned char *sha1_key = blob_read_bytes(blob, 20); 509bf215546Sopenharmony_ci uint32_t snir_size = blob_read_uint32(blob); 510bf215546Sopenharmony_ci const char* snir_data = blob_read_bytes(blob, snir_size); 511bf215546Sopenharmony_ci if (blob->overrun) 512bf215546Sopenharmony_ci return NULL; 513bf215546Sopenharmony_ci 514bf215546Sopenharmony_ci struct serialized_nir *snir = 515bf215546Sopenharmony_ci ralloc_size(cache->nir_cache, sizeof(*snir) + snir_size); 516bf215546Sopenharmony_ci memcpy(snir->sha1_key, sha1_key, 20); 517bf215546Sopenharmony_ci snir->size = snir_size; 518bf215546Sopenharmony_ci memcpy(snir->data, snir_data, snir_size); 519bf215546Sopenharmony_ci 520bf215546Sopenharmony_ci return snir; 521bf215546Sopenharmony_ci} 522bf215546Sopenharmony_ci 523bf215546Sopenharmony_cistatic struct v3dv_shader_variant* 524bf215546Sopenharmony_cishader_variant_create_from_blob(struct v3dv_device *device, 525bf215546Sopenharmony_ci struct blob_reader *blob) 526bf215546Sopenharmony_ci{ 527bf215546Sopenharmony_ci VkResult result; 528bf215546Sopenharmony_ci 529bf215546Sopenharmony_ci enum broadcom_shader_stage stage = blob_read_uint32(blob); 530bf215546Sopenharmony_ci 531bf215546Sopenharmony_ci uint32_t prog_data_size = blob_read_uint32(blob); 532bf215546Sopenharmony_ci /* FIXME: as we include the stage perhaps we can avoid prog_data_size? */ 533bf215546Sopenharmony_ci assert(prog_data_size == v3d_prog_data_size(broadcom_shader_stage_to_gl(stage))); 534bf215546Sopenharmony_ci 535bf215546Sopenharmony_ci const void *prog_data = blob_read_bytes(blob, prog_data_size); 536bf215546Sopenharmony_ci if (blob->overrun) 537bf215546Sopenharmony_ci return NULL; 538bf215546Sopenharmony_ci 539bf215546Sopenharmony_ci uint32_t ulist_count = blob_read_uint32(blob); 540bf215546Sopenharmony_ci uint32_t contents_size = sizeof(enum quniform_contents) * ulist_count; 541bf215546Sopenharmony_ci const void *contents_data = blob_read_bytes(blob, contents_size); 542bf215546Sopenharmony_ci if (blob->overrun) 543bf215546Sopenharmony_ci return NULL; 544bf215546Sopenharmony_ci 545bf215546Sopenharmony_ci uint ulist_data_size = sizeof(uint32_t) * ulist_count; 546bf215546Sopenharmony_ci const void *ulist_data_data = blob_read_bytes(blob, ulist_data_size); 547bf215546Sopenharmony_ci if (blob->overrun) 548bf215546Sopenharmony_ci return NULL; 549bf215546Sopenharmony_ci 550bf215546Sopenharmony_ci uint32_t assembly_offset = blob_read_uint32(blob); 551bf215546Sopenharmony_ci uint32_t qpu_insts_size = blob_read_uint32(blob); 552bf215546Sopenharmony_ci 553bf215546Sopenharmony_ci /* shader_variant_create expects a newly created prog_data for their own, 554bf215546Sopenharmony_ci * as it is what the v3d compiler returns. So we are also allocating one 555bf215546Sopenharmony_ci * (including the uniform list) and filled it up with the data that we read 556bf215546Sopenharmony_ci * from the blob 557bf215546Sopenharmony_ci */ 558bf215546Sopenharmony_ci struct v3d_prog_data *new_prog_data = rzalloc_size(NULL, prog_data_size); 559bf215546Sopenharmony_ci memcpy(new_prog_data, prog_data, prog_data_size); 560bf215546Sopenharmony_ci struct v3d_uniform_list *ulist = &new_prog_data->uniforms; 561bf215546Sopenharmony_ci ulist->count = ulist_count; 562bf215546Sopenharmony_ci ulist->contents = ralloc_array(new_prog_data, enum quniform_contents, ulist->count); 563bf215546Sopenharmony_ci memcpy(ulist->contents, contents_data, contents_size); 564bf215546Sopenharmony_ci ulist->data = ralloc_array(new_prog_data, uint32_t, ulist->count); 565bf215546Sopenharmony_ci memcpy(ulist->data, ulist_data_data, ulist_data_size); 566bf215546Sopenharmony_ci 567bf215546Sopenharmony_ci return v3dv_shader_variant_create(device, stage, 568bf215546Sopenharmony_ci new_prog_data, prog_data_size, 569bf215546Sopenharmony_ci assembly_offset, 570bf215546Sopenharmony_ci NULL, qpu_insts_size, 571bf215546Sopenharmony_ci &result); 572bf215546Sopenharmony_ci} 573bf215546Sopenharmony_ci 574bf215546Sopenharmony_cistatic struct v3dv_pipeline_shared_data * 575bf215546Sopenharmony_civ3dv_pipeline_shared_data_create_from_blob(struct v3dv_pipeline_cache *cache, 576bf215546Sopenharmony_ci struct blob_reader *blob) 577bf215546Sopenharmony_ci{ 578bf215546Sopenharmony_ci const unsigned char *sha1_key = blob_read_bytes(blob, 20); 579bf215546Sopenharmony_ci 580bf215546Sopenharmony_ci struct v3dv_descriptor_maps *maps[BROADCOM_SHADER_STAGES] = { 0 }; 581bf215546Sopenharmony_ci struct v3dv_shader_variant *variants[BROADCOM_SHADER_STAGES] = { 0 }; 582bf215546Sopenharmony_ci 583bf215546Sopenharmony_ci uint8_t descriptor_maps_count = blob_read_uint8(blob); 584bf215546Sopenharmony_ci for (uint8_t count = 0; count < descriptor_maps_count; count++) { 585bf215546Sopenharmony_ci uint8_t stage = blob_read_uint8(blob); 586bf215546Sopenharmony_ci 587bf215546Sopenharmony_ci const struct v3dv_descriptor_maps *current_maps = 588bf215546Sopenharmony_ci blob_read_bytes(blob, sizeof(struct v3dv_descriptor_maps)); 589bf215546Sopenharmony_ci 590bf215546Sopenharmony_ci if (blob->overrun) 591bf215546Sopenharmony_ci goto fail; 592bf215546Sopenharmony_ci 593bf215546Sopenharmony_ci maps[stage] = vk_zalloc2(&cache->device->vk.alloc, NULL, 594bf215546Sopenharmony_ci sizeof(struct v3dv_descriptor_maps), 8, 595bf215546Sopenharmony_ci VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 596bf215546Sopenharmony_ci 597bf215546Sopenharmony_ci if (maps[stage] == NULL) 598bf215546Sopenharmony_ci goto fail; 599bf215546Sopenharmony_ci 600bf215546Sopenharmony_ci memcpy(maps[stage], current_maps, sizeof(struct v3dv_descriptor_maps)); 601bf215546Sopenharmony_ci if (broadcom_shader_stage_is_render_with_binning(stage)) { 602bf215546Sopenharmony_ci enum broadcom_shader_stage bin_stage = 603bf215546Sopenharmony_ci broadcom_binning_shader_stage_for_render_stage(stage); 604bf215546Sopenharmony_ci maps[bin_stage] = maps[stage]; 605bf215546Sopenharmony_ci } 606bf215546Sopenharmony_ci } 607bf215546Sopenharmony_ci 608bf215546Sopenharmony_ci uint8_t variant_count = blob_read_uint8(blob); 609bf215546Sopenharmony_ci 610bf215546Sopenharmony_ci for (uint8_t count = 0; count < variant_count; count++) { 611bf215546Sopenharmony_ci uint8_t stage = blob_read_uint8(blob); 612bf215546Sopenharmony_ci struct v3dv_shader_variant *variant = 613bf215546Sopenharmony_ci shader_variant_create_from_blob(cache->device, blob); 614bf215546Sopenharmony_ci variants[stage] = variant; 615bf215546Sopenharmony_ci } 616bf215546Sopenharmony_ci 617bf215546Sopenharmony_ci uint32_t total_assembly_size = blob_read_uint32(blob); 618bf215546Sopenharmony_ci const uint64_t *total_assembly = 619bf215546Sopenharmony_ci blob_read_bytes(blob, total_assembly_size); 620bf215546Sopenharmony_ci 621bf215546Sopenharmony_ci if (blob->overrun) 622bf215546Sopenharmony_ci goto fail; 623bf215546Sopenharmony_ci 624bf215546Sopenharmony_ci struct v3dv_pipeline_shared_data *data = 625bf215546Sopenharmony_ci v3dv_pipeline_shared_data_new(cache, sha1_key, maps, variants, 626bf215546Sopenharmony_ci total_assembly, total_assembly_size); 627bf215546Sopenharmony_ci 628bf215546Sopenharmony_ci if (!data) 629bf215546Sopenharmony_ci goto fail; 630bf215546Sopenharmony_ci 631bf215546Sopenharmony_ci return data; 632bf215546Sopenharmony_ci 633bf215546Sopenharmony_cifail: 634bf215546Sopenharmony_ci for (int i = 0; i < BROADCOM_SHADER_STAGES; i++) { 635bf215546Sopenharmony_ci if (maps[i]) 636bf215546Sopenharmony_ci vk_free2(&cache->device->vk.alloc, NULL, maps[i]); 637bf215546Sopenharmony_ci if (variants[i]) 638bf215546Sopenharmony_ci v3dv_shader_variant_destroy(cache->device, variants[i]); 639bf215546Sopenharmony_ci } 640bf215546Sopenharmony_ci return NULL; 641bf215546Sopenharmony_ci} 642bf215546Sopenharmony_ci 643bf215546Sopenharmony_cistatic void 644bf215546Sopenharmony_cipipeline_cache_load(struct v3dv_pipeline_cache *cache, 645bf215546Sopenharmony_ci size_t size, 646bf215546Sopenharmony_ci const void *data) 647bf215546Sopenharmony_ci{ 648bf215546Sopenharmony_ci struct v3dv_device *device = cache->device; 649bf215546Sopenharmony_ci struct v3dv_physical_device *pdevice = &device->instance->physicalDevice; 650bf215546Sopenharmony_ci struct vk_pipeline_cache_header header; 651bf215546Sopenharmony_ci 652bf215546Sopenharmony_ci if (cache->cache == NULL || cache->nir_cache == NULL) 653bf215546Sopenharmony_ci return; 654bf215546Sopenharmony_ci 655bf215546Sopenharmony_ci struct blob_reader blob; 656bf215546Sopenharmony_ci blob_reader_init(&blob, data, size); 657bf215546Sopenharmony_ci 658bf215546Sopenharmony_ci blob_copy_bytes(&blob, &header, sizeof(header)); 659bf215546Sopenharmony_ci if (size < sizeof(header)) 660bf215546Sopenharmony_ci return; 661bf215546Sopenharmony_ci memcpy(&header, data, sizeof(header)); 662bf215546Sopenharmony_ci if (header.header_size < sizeof(header)) 663bf215546Sopenharmony_ci return; 664bf215546Sopenharmony_ci if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE) 665bf215546Sopenharmony_ci return; 666bf215546Sopenharmony_ci if (header.vendor_id != v3dv_physical_device_vendor_id(pdevice)) 667bf215546Sopenharmony_ci return; 668bf215546Sopenharmony_ci if (header.device_id != v3dv_physical_device_device_id(pdevice)) 669bf215546Sopenharmony_ci return; 670bf215546Sopenharmony_ci if (memcmp(header.uuid, pdevice->pipeline_cache_uuid, VK_UUID_SIZE) != 0) 671bf215546Sopenharmony_ci return; 672bf215546Sopenharmony_ci 673bf215546Sopenharmony_ci uint32_t nir_count = blob_read_uint32(&blob); 674bf215546Sopenharmony_ci if (blob.overrun) 675bf215546Sopenharmony_ci return; 676bf215546Sopenharmony_ci 677bf215546Sopenharmony_ci for (uint32_t i = 0; i < nir_count; i++) { 678bf215546Sopenharmony_ci struct serialized_nir *snir = 679bf215546Sopenharmony_ci serialized_nir_create_from_blob(cache, &blob); 680bf215546Sopenharmony_ci 681bf215546Sopenharmony_ci if (!snir) 682bf215546Sopenharmony_ci break; 683bf215546Sopenharmony_ci 684bf215546Sopenharmony_ci _mesa_hash_table_insert(cache->nir_cache, snir->sha1_key, snir); 685bf215546Sopenharmony_ci cache->nir_stats.count++; 686bf215546Sopenharmony_ci } 687bf215546Sopenharmony_ci 688bf215546Sopenharmony_ci uint32_t count = blob_read_uint32(&blob); 689bf215546Sopenharmony_ci if (blob.overrun) 690bf215546Sopenharmony_ci return; 691bf215546Sopenharmony_ci 692bf215546Sopenharmony_ci for (uint32_t i = 0; i < count; i++) { 693bf215546Sopenharmony_ci struct v3dv_pipeline_shared_data *cache_entry = 694bf215546Sopenharmony_ci v3dv_pipeline_shared_data_create_from_blob(cache, &blob); 695bf215546Sopenharmony_ci if (!cache_entry) 696bf215546Sopenharmony_ci break; 697bf215546Sopenharmony_ci 698bf215546Sopenharmony_ci _mesa_hash_table_insert(cache->cache, cache_entry->sha1_key, cache_entry); 699bf215546Sopenharmony_ci cache->stats.count++; 700bf215546Sopenharmony_ci } 701bf215546Sopenharmony_ci 702bf215546Sopenharmony_ci if (debug_cache) { 703bf215546Sopenharmony_ci fprintf(stderr, "pipeline cache %p, loaded %i nir shaders and " 704bf215546Sopenharmony_ci "%i entries\n", cache, nir_count, count); 705bf215546Sopenharmony_ci if (dump_stats) 706bf215546Sopenharmony_ci cache_dump_stats(cache); 707bf215546Sopenharmony_ci } 708bf215546Sopenharmony_ci} 709bf215546Sopenharmony_ci 710bf215546Sopenharmony_ciVKAPI_ATTR VkResult VKAPI_CALL 711bf215546Sopenharmony_civ3dv_CreatePipelineCache(VkDevice _device, 712bf215546Sopenharmony_ci const VkPipelineCacheCreateInfo *pCreateInfo, 713bf215546Sopenharmony_ci const VkAllocationCallbacks *pAllocator, 714bf215546Sopenharmony_ci VkPipelineCache *pPipelineCache) 715bf215546Sopenharmony_ci{ 716bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_device, device, _device); 717bf215546Sopenharmony_ci struct v3dv_pipeline_cache *cache; 718bf215546Sopenharmony_ci 719bf215546Sopenharmony_ci assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO); 720bf215546Sopenharmony_ci 721bf215546Sopenharmony_ci cache = vk_object_zalloc(&device->vk, pAllocator, 722bf215546Sopenharmony_ci sizeof(*cache), 723bf215546Sopenharmony_ci VK_OBJECT_TYPE_PIPELINE_CACHE); 724bf215546Sopenharmony_ci 725bf215546Sopenharmony_ci if (cache == NULL) 726bf215546Sopenharmony_ci return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 727bf215546Sopenharmony_ci 728bf215546Sopenharmony_ci v3dv_pipeline_cache_init(cache, device, pCreateInfo->flags, 729bf215546Sopenharmony_ci device->instance->pipeline_cache_enabled); 730bf215546Sopenharmony_ci 731bf215546Sopenharmony_ci if (pCreateInfo->initialDataSize > 0) { 732bf215546Sopenharmony_ci pipeline_cache_load(cache, 733bf215546Sopenharmony_ci pCreateInfo->initialDataSize, 734bf215546Sopenharmony_ci pCreateInfo->pInitialData); 735bf215546Sopenharmony_ci } 736bf215546Sopenharmony_ci 737bf215546Sopenharmony_ci *pPipelineCache = v3dv_pipeline_cache_to_handle(cache); 738bf215546Sopenharmony_ci 739bf215546Sopenharmony_ci return VK_SUCCESS; 740bf215546Sopenharmony_ci} 741bf215546Sopenharmony_ci 742bf215546Sopenharmony_civoid 743bf215546Sopenharmony_civ3dv_pipeline_cache_finish(struct v3dv_pipeline_cache *cache) 744bf215546Sopenharmony_ci{ 745bf215546Sopenharmony_ci mtx_destroy(&cache->mutex); 746bf215546Sopenharmony_ci 747bf215546Sopenharmony_ci if (dump_stats_on_destroy) 748bf215546Sopenharmony_ci cache_dump_stats(cache); 749bf215546Sopenharmony_ci 750bf215546Sopenharmony_ci if (cache->nir_cache) { 751bf215546Sopenharmony_ci hash_table_foreach(cache->nir_cache, entry) 752bf215546Sopenharmony_ci ralloc_free(entry->data); 753bf215546Sopenharmony_ci 754bf215546Sopenharmony_ci _mesa_hash_table_destroy(cache->nir_cache, NULL); 755bf215546Sopenharmony_ci } 756bf215546Sopenharmony_ci 757bf215546Sopenharmony_ci if (cache->cache) { 758bf215546Sopenharmony_ci hash_table_foreach(cache->cache, entry) { 759bf215546Sopenharmony_ci struct v3dv_pipeline_shared_data *cache_entry = entry->data; 760bf215546Sopenharmony_ci if (cache_entry) 761bf215546Sopenharmony_ci v3dv_pipeline_shared_data_unref(cache->device, cache_entry); 762bf215546Sopenharmony_ci } 763bf215546Sopenharmony_ci 764bf215546Sopenharmony_ci _mesa_hash_table_destroy(cache->cache, NULL); 765bf215546Sopenharmony_ci } 766bf215546Sopenharmony_ci} 767bf215546Sopenharmony_ci 768bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 769bf215546Sopenharmony_civ3dv_DestroyPipelineCache(VkDevice _device, 770bf215546Sopenharmony_ci VkPipelineCache _cache, 771bf215546Sopenharmony_ci const VkAllocationCallbacks *pAllocator) 772bf215546Sopenharmony_ci{ 773bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_device, device, _device); 774bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache); 775bf215546Sopenharmony_ci 776bf215546Sopenharmony_ci if (!cache) 777bf215546Sopenharmony_ci return; 778bf215546Sopenharmony_ci 779bf215546Sopenharmony_ci v3dv_pipeline_cache_finish(cache); 780bf215546Sopenharmony_ci 781bf215546Sopenharmony_ci vk_object_free(&device->vk, pAllocator, cache); 782bf215546Sopenharmony_ci} 783bf215546Sopenharmony_ci 784bf215546Sopenharmony_ciVKAPI_ATTR VkResult VKAPI_CALL 785bf215546Sopenharmony_civ3dv_MergePipelineCaches(VkDevice device, 786bf215546Sopenharmony_ci VkPipelineCache dstCache, 787bf215546Sopenharmony_ci uint32_t srcCacheCount, 788bf215546Sopenharmony_ci const VkPipelineCache *pSrcCaches) 789bf215546Sopenharmony_ci{ 790bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_pipeline_cache, dst, dstCache); 791bf215546Sopenharmony_ci 792bf215546Sopenharmony_ci if (!dst->cache || !dst->nir_cache) 793bf215546Sopenharmony_ci return VK_SUCCESS; 794bf215546Sopenharmony_ci 795bf215546Sopenharmony_ci for (uint32_t i = 0; i < srcCacheCount; i++) { 796bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_pipeline_cache, src, pSrcCaches[i]); 797bf215546Sopenharmony_ci if (!src->cache || !src->nir_cache) 798bf215546Sopenharmony_ci continue; 799bf215546Sopenharmony_ci 800bf215546Sopenharmony_ci hash_table_foreach(src->nir_cache, entry) { 801bf215546Sopenharmony_ci struct serialized_nir *src_snir = entry->data; 802bf215546Sopenharmony_ci assert(src_snir); 803bf215546Sopenharmony_ci 804bf215546Sopenharmony_ci if (_mesa_hash_table_search(dst->nir_cache, src_snir->sha1_key)) 805bf215546Sopenharmony_ci continue; 806bf215546Sopenharmony_ci 807bf215546Sopenharmony_ci /* FIXME: we are using serialized nir shaders because they are 808bf215546Sopenharmony_ci * convenient to create and store on the cache, but requires to do a 809bf215546Sopenharmony_ci * copy here (and some other places) of the serialized NIR. Perhaps 810bf215546Sopenharmony_ci * it would make sense to move to handle the NIR shaders with shared 811bf215546Sopenharmony_ci * structures with ref counts, as the variants. 812bf215546Sopenharmony_ci */ 813bf215546Sopenharmony_ci struct serialized_nir *snir_dst = 814bf215546Sopenharmony_ci ralloc_size(dst->nir_cache, sizeof(*snir_dst) + src_snir->size); 815bf215546Sopenharmony_ci memcpy(snir_dst->sha1_key, src_snir->sha1_key, 20); 816bf215546Sopenharmony_ci snir_dst->size = src_snir->size; 817bf215546Sopenharmony_ci memcpy(snir_dst->data, src_snir->data, src_snir->size); 818bf215546Sopenharmony_ci 819bf215546Sopenharmony_ci _mesa_hash_table_insert(dst->nir_cache, snir_dst->sha1_key, snir_dst); 820bf215546Sopenharmony_ci dst->nir_stats.count++; 821bf215546Sopenharmony_ci if (debug_cache) { 822bf215546Sopenharmony_ci char sha1buf[41]; 823bf215546Sopenharmony_ci _mesa_sha1_format(sha1buf, snir_dst->sha1_key); 824bf215546Sopenharmony_ci 825bf215546Sopenharmony_ci fprintf(stderr, "pipeline cache %p, added nir entry %s " 826bf215546Sopenharmony_ci "from pipeline cache %p\n", 827bf215546Sopenharmony_ci dst, sha1buf, src); 828bf215546Sopenharmony_ci if (dump_stats) 829bf215546Sopenharmony_ci cache_dump_stats(dst); 830bf215546Sopenharmony_ci } 831bf215546Sopenharmony_ci } 832bf215546Sopenharmony_ci 833bf215546Sopenharmony_ci hash_table_foreach(src->cache, entry) { 834bf215546Sopenharmony_ci struct v3dv_pipeline_shared_data *cache_entry = entry->data; 835bf215546Sopenharmony_ci assert(cache_entry); 836bf215546Sopenharmony_ci 837bf215546Sopenharmony_ci if (_mesa_hash_table_search(dst->cache, cache_entry->sha1_key)) 838bf215546Sopenharmony_ci continue; 839bf215546Sopenharmony_ci 840bf215546Sopenharmony_ci v3dv_pipeline_shared_data_ref(cache_entry); 841bf215546Sopenharmony_ci _mesa_hash_table_insert(dst->cache, cache_entry->sha1_key, cache_entry); 842bf215546Sopenharmony_ci 843bf215546Sopenharmony_ci dst->stats.count++; 844bf215546Sopenharmony_ci if (debug_cache) { 845bf215546Sopenharmony_ci char sha1buf[41]; 846bf215546Sopenharmony_ci _mesa_sha1_format(sha1buf, cache_entry->sha1_key); 847bf215546Sopenharmony_ci 848bf215546Sopenharmony_ci fprintf(stderr, "pipeline cache %p, added entry %s " 849bf215546Sopenharmony_ci "from pipeline cache %p\n", 850bf215546Sopenharmony_ci dst, sha1buf, src); 851bf215546Sopenharmony_ci if (dump_stats) 852bf215546Sopenharmony_ci cache_dump_stats(dst); 853bf215546Sopenharmony_ci } 854bf215546Sopenharmony_ci } 855bf215546Sopenharmony_ci } 856bf215546Sopenharmony_ci 857bf215546Sopenharmony_ci return VK_SUCCESS; 858bf215546Sopenharmony_ci} 859bf215546Sopenharmony_ci 860bf215546Sopenharmony_cistatic bool 861bf215546Sopenharmony_cishader_variant_write_to_blob(const struct v3dv_shader_variant *variant, 862bf215546Sopenharmony_ci struct blob *blob) 863bf215546Sopenharmony_ci{ 864bf215546Sopenharmony_ci blob_write_uint32(blob, variant->stage); 865bf215546Sopenharmony_ci 866bf215546Sopenharmony_ci blob_write_uint32(blob, variant->prog_data_size); 867bf215546Sopenharmony_ci blob_write_bytes(blob, variant->prog_data.base, variant->prog_data_size); 868bf215546Sopenharmony_ci 869bf215546Sopenharmony_ci struct v3d_uniform_list *ulist = &variant->prog_data.base->uniforms; 870bf215546Sopenharmony_ci blob_write_uint32(blob, ulist->count); 871bf215546Sopenharmony_ci blob_write_bytes(blob, ulist->contents, sizeof(enum quniform_contents) * ulist->count); 872bf215546Sopenharmony_ci blob_write_bytes(blob, ulist->data, sizeof(uint32_t) * ulist->count); 873bf215546Sopenharmony_ci 874bf215546Sopenharmony_ci blob_write_uint32(blob, variant->assembly_offset); 875bf215546Sopenharmony_ci blob_write_uint32(blob, variant->qpu_insts_size); 876bf215546Sopenharmony_ci 877bf215546Sopenharmony_ci return !blob->out_of_memory; 878bf215546Sopenharmony_ci} 879bf215546Sopenharmony_ci 880bf215546Sopenharmony_cistatic bool 881bf215546Sopenharmony_civ3dv_pipeline_shared_data_write_to_blob(const struct v3dv_pipeline_shared_data *cache_entry, 882bf215546Sopenharmony_ci struct blob *blob) 883bf215546Sopenharmony_ci{ 884bf215546Sopenharmony_ci blob_write_bytes(blob, cache_entry->sha1_key, 20); 885bf215546Sopenharmony_ci 886bf215546Sopenharmony_ci uint8_t descriptor_maps_count = 0; 887bf215546Sopenharmony_ci for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) { 888bf215546Sopenharmony_ci if (broadcom_shader_stage_is_binning(stage)) 889bf215546Sopenharmony_ci continue; 890bf215546Sopenharmony_ci if (cache_entry->maps[stage] == NULL) 891bf215546Sopenharmony_ci continue; 892bf215546Sopenharmony_ci descriptor_maps_count++; 893bf215546Sopenharmony_ci } 894bf215546Sopenharmony_ci 895bf215546Sopenharmony_ci /* Compute pipelines only have one descriptor map, 896bf215546Sopenharmony_ci * graphics pipelines may have 2 (VS+FS) or 3 (VS+GS+FS), since the binning 897bf215546Sopenharmony_ci * stages take the descriptor map from the render stage. 898bf215546Sopenharmony_ci */ 899bf215546Sopenharmony_ci assert((descriptor_maps_count >= 2 && descriptor_maps_count <= 3) || 900bf215546Sopenharmony_ci (descriptor_maps_count == 1 && cache_entry->variants[BROADCOM_SHADER_COMPUTE])); 901bf215546Sopenharmony_ci blob_write_uint8(blob, descriptor_maps_count); 902bf215546Sopenharmony_ci 903bf215546Sopenharmony_ci for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) { 904bf215546Sopenharmony_ci if (cache_entry->maps[stage] == NULL) 905bf215546Sopenharmony_ci continue; 906bf215546Sopenharmony_ci if (broadcom_shader_stage_is_binning(stage)) 907bf215546Sopenharmony_ci continue; 908bf215546Sopenharmony_ci 909bf215546Sopenharmony_ci blob_write_uint8(blob, stage); 910bf215546Sopenharmony_ci blob_write_bytes(blob, cache_entry->maps[stage], 911bf215546Sopenharmony_ci sizeof(struct v3dv_descriptor_maps)); 912bf215546Sopenharmony_ci } 913bf215546Sopenharmony_ci 914bf215546Sopenharmony_ci uint8_t variant_count = 0; 915bf215546Sopenharmony_ci for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) { 916bf215546Sopenharmony_ci if (cache_entry->variants[stage] == NULL) 917bf215546Sopenharmony_ci continue; 918bf215546Sopenharmony_ci variant_count++; 919bf215546Sopenharmony_ci } 920bf215546Sopenharmony_ci 921bf215546Sopenharmony_ci /* Graphics pipelines with VS+FS have 3 variants, VS+GS+FS will have 5 and 922bf215546Sopenharmony_ci * compute pipelines only have 1. 923bf215546Sopenharmony_ci */ 924bf215546Sopenharmony_ci assert((variant_count == 5 || variant_count == 3) || 925bf215546Sopenharmony_ci (variant_count == 1 && cache_entry->variants[BROADCOM_SHADER_COMPUTE])); 926bf215546Sopenharmony_ci blob_write_uint8(blob, variant_count); 927bf215546Sopenharmony_ci 928bf215546Sopenharmony_ci uint32_t total_assembly_size = 0; 929bf215546Sopenharmony_ci for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) { 930bf215546Sopenharmony_ci if (cache_entry->variants[stage] == NULL) 931bf215546Sopenharmony_ci continue; 932bf215546Sopenharmony_ci 933bf215546Sopenharmony_ci blob_write_uint8(blob, stage); 934bf215546Sopenharmony_ci if (!shader_variant_write_to_blob(cache_entry->variants[stage], blob)) 935bf215546Sopenharmony_ci return false; 936bf215546Sopenharmony_ci 937bf215546Sopenharmony_ci total_assembly_size += cache_entry->variants[stage]->qpu_insts_size; 938bf215546Sopenharmony_ci } 939bf215546Sopenharmony_ci blob_write_uint32(blob, total_assembly_size); 940bf215546Sopenharmony_ci 941bf215546Sopenharmony_ci assert(cache_entry->assembly_bo->map); 942bf215546Sopenharmony_ci assert(cache_entry->assembly_bo->size >= total_assembly_size); 943bf215546Sopenharmony_ci blob_write_bytes(blob, cache_entry->assembly_bo->map, total_assembly_size); 944bf215546Sopenharmony_ci 945bf215546Sopenharmony_ci return !blob->out_of_memory; 946bf215546Sopenharmony_ci} 947bf215546Sopenharmony_ci 948bf215546Sopenharmony_ci 949bf215546Sopenharmony_ciVKAPI_ATTR VkResult VKAPI_CALL 950bf215546Sopenharmony_civ3dv_GetPipelineCacheData(VkDevice _device, 951bf215546Sopenharmony_ci VkPipelineCache _cache, 952bf215546Sopenharmony_ci size_t *pDataSize, 953bf215546Sopenharmony_ci void *pData) 954bf215546Sopenharmony_ci{ 955bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_device, device, _device); 956bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache); 957bf215546Sopenharmony_ci 958bf215546Sopenharmony_ci struct blob blob; 959bf215546Sopenharmony_ci if (pData) { 960bf215546Sopenharmony_ci blob_init_fixed(&blob, pData, *pDataSize); 961bf215546Sopenharmony_ci } else { 962bf215546Sopenharmony_ci blob_init_fixed(&blob, NULL, SIZE_MAX); 963bf215546Sopenharmony_ci } 964bf215546Sopenharmony_ci 965bf215546Sopenharmony_ci struct v3dv_physical_device *pdevice = &device->instance->physicalDevice; 966bf215546Sopenharmony_ci VkResult result = VK_INCOMPLETE; 967bf215546Sopenharmony_ci 968bf215546Sopenharmony_ci pipeline_cache_lock(cache); 969bf215546Sopenharmony_ci 970bf215546Sopenharmony_ci struct vk_pipeline_cache_header header = { 971bf215546Sopenharmony_ci .header_size = sizeof(struct vk_pipeline_cache_header), 972bf215546Sopenharmony_ci .header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE, 973bf215546Sopenharmony_ci .vendor_id = v3dv_physical_device_vendor_id(pdevice), 974bf215546Sopenharmony_ci .device_id = v3dv_physical_device_device_id(pdevice), 975bf215546Sopenharmony_ci }; 976bf215546Sopenharmony_ci memcpy(header.uuid, pdevice->pipeline_cache_uuid, VK_UUID_SIZE); 977bf215546Sopenharmony_ci blob_write_bytes(&blob, &header, sizeof(header)); 978bf215546Sopenharmony_ci 979bf215546Sopenharmony_ci uint32_t nir_count = 0; 980bf215546Sopenharmony_ci intptr_t nir_count_offset = blob_reserve_uint32(&blob); 981bf215546Sopenharmony_ci if (nir_count_offset < 0) { 982bf215546Sopenharmony_ci *pDataSize = 0; 983bf215546Sopenharmony_ci goto done; 984bf215546Sopenharmony_ci } 985bf215546Sopenharmony_ci 986bf215546Sopenharmony_ci if (cache->nir_cache) { 987bf215546Sopenharmony_ci hash_table_foreach(cache->nir_cache, entry) { 988bf215546Sopenharmony_ci const struct serialized_nir *snir = entry->data; 989bf215546Sopenharmony_ci 990bf215546Sopenharmony_ci size_t save_size = blob.size; 991bf215546Sopenharmony_ci 992bf215546Sopenharmony_ci blob_write_bytes(&blob, snir->sha1_key, 20); 993bf215546Sopenharmony_ci blob_write_uint32(&blob, snir->size); 994bf215546Sopenharmony_ci blob_write_bytes(&blob, snir->data, snir->size); 995bf215546Sopenharmony_ci 996bf215546Sopenharmony_ci if (blob.out_of_memory) { 997bf215546Sopenharmony_ci blob.size = save_size; 998bf215546Sopenharmony_ci goto done; 999bf215546Sopenharmony_ci } 1000bf215546Sopenharmony_ci 1001bf215546Sopenharmony_ci nir_count++; 1002bf215546Sopenharmony_ci } 1003bf215546Sopenharmony_ci } 1004bf215546Sopenharmony_ci blob_overwrite_uint32(&blob, nir_count_offset, nir_count); 1005bf215546Sopenharmony_ci 1006bf215546Sopenharmony_ci uint32_t count = 0; 1007bf215546Sopenharmony_ci intptr_t count_offset = blob_reserve_uint32(&blob); 1008bf215546Sopenharmony_ci if (count_offset < 0) { 1009bf215546Sopenharmony_ci *pDataSize = 0; 1010bf215546Sopenharmony_ci goto done; 1011bf215546Sopenharmony_ci } 1012bf215546Sopenharmony_ci 1013bf215546Sopenharmony_ci if (cache->cache) { 1014bf215546Sopenharmony_ci hash_table_foreach(cache->cache, entry) { 1015bf215546Sopenharmony_ci struct v3dv_pipeline_shared_data *cache_entry = entry->data; 1016bf215546Sopenharmony_ci 1017bf215546Sopenharmony_ci size_t save_size = blob.size; 1018bf215546Sopenharmony_ci if (!v3dv_pipeline_shared_data_write_to_blob(cache_entry, &blob)) { 1019bf215546Sopenharmony_ci /* If it fails reset to the previous size and bail */ 1020bf215546Sopenharmony_ci blob.size = save_size; 1021bf215546Sopenharmony_ci goto done; 1022bf215546Sopenharmony_ci } 1023bf215546Sopenharmony_ci 1024bf215546Sopenharmony_ci count++; 1025bf215546Sopenharmony_ci } 1026bf215546Sopenharmony_ci } 1027bf215546Sopenharmony_ci 1028bf215546Sopenharmony_ci blob_overwrite_uint32(&blob, count_offset, count); 1029bf215546Sopenharmony_ci 1030bf215546Sopenharmony_ci *pDataSize = blob.size; 1031bf215546Sopenharmony_ci 1032bf215546Sopenharmony_ci result = VK_SUCCESS; 1033bf215546Sopenharmony_ci 1034bf215546Sopenharmony_ci if (debug_cache) { 1035bf215546Sopenharmony_ci assert(count <= cache->stats.count); 1036bf215546Sopenharmony_ci fprintf(stderr, "GetPipelineCacheData: serializing cache %p, " 1037bf215546Sopenharmony_ci "%i nir shader entries " 1038bf215546Sopenharmony_ci "%i entries, %u DataSize\n", 1039bf215546Sopenharmony_ci cache, nir_count, count, (uint32_t) *pDataSize); 1040bf215546Sopenharmony_ci } 1041bf215546Sopenharmony_ci 1042bf215546Sopenharmony_ci done: 1043bf215546Sopenharmony_ci blob_finish(&blob); 1044bf215546Sopenharmony_ci 1045bf215546Sopenharmony_ci pipeline_cache_unlock(cache); 1046bf215546Sopenharmony_ci 1047bf215546Sopenharmony_ci return result; 1048bf215546Sopenharmony_ci} 1049