1/*
2 * Copyright © 2019 Raspberry Pi Ltd
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "v3dv_private.h"
25#include "vulkan/util/vk_util.h"
26#include "util/blob.h"
27#include "nir/nir_serialize.h"
28
29static const bool debug_cache = false;
30static const bool dump_stats = false;
31static const bool dump_stats_on_destroy = false;
32
33/* Shared for nir/variants */
34#define V3DV_MAX_PIPELINE_CACHE_ENTRIES 4096
35
36static uint32_t
37sha1_hash_func(const void *sha1)
38{
39   return _mesa_hash_data(sha1, 20);
40}
41
42static bool
43sha1_compare_func(const void *sha1_a, const void *sha1_b)
44{
45   return memcmp(sha1_a, sha1_b, 20) == 0;
46}
47
48struct serialized_nir {
49   unsigned char sha1_key[20];
50   size_t size;
51   char data[0];
52};
53
54static void
55cache_dump_stats(struct v3dv_pipeline_cache *cache)
56{
57   fprintf(stderr, "  NIR cache entries:      %d\n", cache->nir_stats.count);
58   fprintf(stderr, "  NIR cache miss count:   %d\n", cache->nir_stats.miss);
59   fprintf(stderr, "  NIR cache hit  count:   %d\n", cache->nir_stats.hit);
60
61   fprintf(stderr, "  cache entries:      %d\n", cache->stats.count);
62   fprintf(stderr, "  cache miss count:   %d\n", cache->stats.miss);
63   fprintf(stderr, "  cache hit  count:   %d\n", cache->stats.hit);
64
65   fprintf(stderr, "  on-disk cache hit  count:   %d\n", cache->stats.on_disk_hit);
66}
67
68static void
69pipeline_cache_lock(struct v3dv_pipeline_cache *cache)
70{
71   if (!cache->externally_synchronized)
72      mtx_lock(&cache->mutex);
73}
74
75static void
76pipeline_cache_unlock(struct v3dv_pipeline_cache *cache)
77{
78   if (!cache->externally_synchronized)
79      mtx_unlock(&cache->mutex);
80}
81
82void
83v3dv_pipeline_cache_upload_nir(struct v3dv_pipeline *pipeline,
84                               struct v3dv_pipeline_cache *cache,
85                               nir_shader *nir,
86                               unsigned char sha1_key[20])
87{
88   if (!cache || !cache->nir_cache)
89      return;
90
91   if (cache->nir_stats.count > V3DV_MAX_PIPELINE_CACHE_ENTRIES)
92      return;
93
94   pipeline_cache_lock(cache);
95   struct hash_entry *entry =
96      _mesa_hash_table_search(cache->nir_cache, sha1_key);
97   pipeline_cache_unlock(cache);
98   if (entry)
99      return;
100
101   struct blob blob;
102   blob_init(&blob);
103
104   nir_serialize(&blob, nir, false);
105   if (blob.out_of_memory) {
106      blob_finish(&blob);
107      return;
108   }
109
110   pipeline_cache_lock(cache);
111   /* Because ralloc isn't thread-safe, we have to do all this inside the
112    * lock.  We could unlock for the big memcpy but it's probably not worth
113    * the hassle.
114    */
115   entry = _mesa_hash_table_search(cache->nir_cache, sha1_key);
116   if (entry) {
117      blob_finish(&blob);
118      pipeline_cache_unlock(cache);
119      return;
120   }
121
122   struct serialized_nir *snir =
123      ralloc_size(cache->nir_cache, sizeof(*snir) + blob.size);
124   memcpy(snir->sha1_key, sha1_key, 20);
125   snir->size = blob.size;
126   memcpy(snir->data, blob.data, blob.size);
127
128   blob_finish(&blob);
129
130   cache->nir_stats.count++;
131   if (debug_cache) {
132      char sha1buf[41];
133      _mesa_sha1_format(sha1buf, snir->sha1_key);
134      fprintf(stderr, "pipeline cache %p, new nir entry %s\n", cache, sha1buf);
135      if (dump_stats)
136         cache_dump_stats(cache);
137   }
138
139   _mesa_hash_table_insert(cache->nir_cache, snir->sha1_key, snir);
140
141   pipeline_cache_unlock(cache);
142}
143
144nir_shader*
145v3dv_pipeline_cache_search_for_nir(struct v3dv_pipeline *pipeline,
146                                   struct v3dv_pipeline_cache *cache,
147                                   const nir_shader_compiler_options *nir_options,
148                                   unsigned char sha1_key[20])
149{
150   if (!cache || !cache->nir_cache)
151      return NULL;
152
153   if (debug_cache) {
154      char sha1buf[41];
155      _mesa_sha1_format(sha1buf, sha1_key);
156
157      fprintf(stderr, "pipeline cache %p, search for nir %s\n", cache, sha1buf);
158   }
159
160   const struct serialized_nir *snir = NULL;
161
162   pipeline_cache_lock(cache);
163   struct hash_entry *entry =
164      _mesa_hash_table_search(cache->nir_cache, sha1_key);
165   if (entry)
166      snir = entry->data;
167   pipeline_cache_unlock(cache);
168
169   if (snir) {
170      struct blob_reader blob;
171      blob_reader_init(&blob, snir->data, snir->size);
172
173      /* We use context NULL as we want the p_stage to keep the reference to
174       * nir, as we keep open the possibility of provide a shader variant
175       * after cache creation
176       */
177      nir_shader *nir = nir_deserialize(NULL, nir_options, &blob);
178      if (blob.overrun) {
179         ralloc_free(nir);
180      } else {
181         cache->nir_stats.hit++;
182         if (debug_cache) {
183            fprintf(stderr, "[v3dv nir cache] hit: %p\n", nir);
184            if (dump_stats)
185               cache_dump_stats(cache);
186         }
187         return nir;
188      }
189   }
190
191   cache->nir_stats.miss++;
192   if (debug_cache) {
193      fprintf(stderr, "[v3dv nir cache] miss\n");
194      if (dump_stats)
195         cache_dump_stats(cache);
196   }
197
198   return NULL;
199}
200
201void
202v3dv_pipeline_cache_init(struct v3dv_pipeline_cache *cache,
203                         struct v3dv_device *device,
204                         VkPipelineCacheCreateFlags flags,
205                         bool cache_enabled)
206{
207   cache->device = device;
208   mtx_init(&cache->mutex, mtx_plain);
209
210   if (cache_enabled) {
211      cache->nir_cache = _mesa_hash_table_create(NULL, sha1_hash_func,
212                                                 sha1_compare_func);
213      cache->nir_stats.miss = 0;
214      cache->nir_stats.hit = 0;
215      cache->nir_stats.count = 0;
216
217      cache->cache = _mesa_hash_table_create(NULL, sha1_hash_func,
218                                             sha1_compare_func);
219      cache->stats.miss = 0;
220      cache->stats.hit = 0;
221      cache->stats.count = 0;
222
223      cache->externally_synchronized = flags &
224         VK_PIPELINE_CACHE_CREATE_EXTERNALLY_SYNCHRONIZED_BIT;
225   } else {
226      cache->nir_cache = NULL;
227      cache->cache = NULL;
228   }
229
230}
231
232static struct v3dv_pipeline_shared_data *
233v3dv_pipeline_shared_data_create_from_blob(struct v3dv_pipeline_cache *cache,
234                                           struct blob_reader *blob);
235
236static void
237pipeline_cache_upload_shared_data(struct v3dv_pipeline_cache *cache,
238                                  struct v3dv_pipeline_shared_data *shared_data,
239                                  bool from_disk_cache);
240
241static bool
242v3dv_pipeline_shared_data_write_to_blob(const struct v3dv_pipeline_shared_data *cache_entry,
243                                        struct blob *blob);
244
245/**
246 * It searchs for pipeline cached data, and returns a v3dv_pipeline_shared_data with
247 * it, or NULL if doesn't have it cached. On the former, it will increases the
248 * ref_count, so caller is responsible to unref it.
249 */
250struct v3dv_pipeline_shared_data *
251v3dv_pipeline_cache_search_for_pipeline(struct v3dv_pipeline_cache *cache,
252                                        unsigned char sha1_key[20],
253                                        bool *cache_hit)
254{
255   if (!cache || !cache->cache)
256      return NULL;
257
258   if (debug_cache) {
259      char sha1buf[41];
260      _mesa_sha1_format(sha1buf, sha1_key);
261
262      fprintf(stderr, "pipeline cache %p, search pipeline with key %s\n", cache, sha1buf);
263   }
264
265   pipeline_cache_lock(cache);
266
267   struct hash_entry *entry =
268      _mesa_hash_table_search(cache->cache, sha1_key);
269
270   if (entry) {
271      struct v3dv_pipeline_shared_data *cache_entry =
272         (struct v3dv_pipeline_shared_data *) entry->data;
273      assert(cache_entry);
274
275      cache->stats.hit++;
276      *cache_hit = true;
277      if (debug_cache) {
278         fprintf(stderr, "[v3dv cache] hit: %p\n", cache_entry);
279         if (dump_stats)
280            cache_dump_stats(cache);
281      }
282
283
284      v3dv_pipeline_shared_data_ref(cache_entry);
285
286      pipeline_cache_unlock(cache);
287
288      return cache_entry;
289   }
290
291   cache->stats.miss++;
292   if (debug_cache) {
293      fprintf(stderr, "[v3dv cache] miss\n");
294      if (dump_stats)
295         cache_dump_stats(cache);
296   }
297
298   pipeline_cache_unlock(cache);
299
300#ifdef ENABLE_SHADER_CACHE
301   struct v3dv_device *device = cache->device;
302   struct disk_cache *disk_cache = device->pdevice->disk_cache;
303   /* Note that the on-disk-cache can be independently disabled, while keeping
304    * the pipeline cache working, by using the environment variable
305    * MESA_SHADER_CACHE_DISABLE. In that case the calls to disk_cache_put/get
306    * will not do anything.
307    */
308   if (disk_cache && device->instance->pipeline_cache_enabled) {
309      cache_key cache_key;
310      disk_cache_compute_key(disk_cache, sha1_key, 20, cache_key);
311
312      size_t buffer_size;
313      uint8_t *buffer = disk_cache_get(disk_cache, cache_key, &buffer_size);
314      if (unlikely(V3D_DEBUG & V3D_DEBUG_CACHE)) {
315         char sha1buf[41];
316         _mesa_sha1_format(sha1buf, cache_key);
317         fprintf(stderr, "[v3dv on-disk cache] %s %s\n",
318                 buffer ? "hit" : "miss",
319                 sha1buf);
320      }
321
322      if (buffer) {
323         struct blob_reader blob;
324         struct v3dv_pipeline_shared_data *shared_data;
325
326         blob_reader_init(&blob, buffer, buffer_size);
327         shared_data = v3dv_pipeline_shared_data_create_from_blob(cache, &blob);
328         free(buffer);
329
330         if (shared_data) {
331            /* Technically we could increase on_disk_hit as soon as we have a
332             * buffer, but we are more interested on hits that got a valid
333             * shared_data
334             */
335            cache->stats.on_disk_hit++;
336            if (cache)
337               pipeline_cache_upload_shared_data(cache, shared_data, true);
338            return shared_data;
339         }
340      }
341   }
342#endif
343
344   return NULL;
345}
346
347void
348v3dv_pipeline_shared_data_destroy(struct v3dv_device *device,
349                                  struct v3dv_pipeline_shared_data *shared_data)
350{
351   assert(shared_data->ref_cnt == 0);
352
353   for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
354      if (shared_data->variants[stage] != NULL)
355         v3dv_shader_variant_destroy(device, shared_data->variants[stage]);
356
357      /* We don't free binning descriptor maps as we are sharing them
358       * with the render shaders.
359       */
360      if (shared_data->maps[stage] != NULL &&
361          !broadcom_shader_stage_is_binning(stage)) {
362         vk_free(&device->vk.alloc, shared_data->maps[stage]);
363      }
364   }
365
366   if (shared_data->assembly_bo)
367      v3dv_bo_free(device, shared_data->assembly_bo);
368
369   vk_free(&device->vk.alloc, shared_data);
370}
371
372static struct v3dv_pipeline_shared_data *
373v3dv_pipeline_shared_data_new(struct v3dv_pipeline_cache *cache,
374                              const unsigned char sha1_key[20],
375                              struct v3dv_descriptor_maps **maps,
376                              struct v3dv_shader_variant **variants,
377                              const uint64_t *total_assembly,
378                              const uint32_t total_assembly_size)
379{
380   size_t size = sizeof(struct v3dv_pipeline_shared_data);
381   /* We create new_entry using the device alloc. Right now shared_data is ref
382    * and unref by both the pipeline and the pipeline cache, so we can't
383    * ensure that the cache or pipeline alloc will be available on the last
384    * unref.
385    */
386   struct v3dv_pipeline_shared_data *new_entry =
387      vk_zalloc2(&cache->device->vk.alloc, NULL, size, 8,
388                 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
389
390   if (new_entry == NULL)
391      return NULL;
392
393   new_entry->ref_cnt = 1;
394   memcpy(new_entry->sha1_key, sha1_key, 20);
395
396   for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
397      new_entry->maps[stage] = maps[stage];
398      new_entry->variants[stage] = variants[stage];
399   }
400
401   struct v3dv_bo *bo = v3dv_bo_alloc(cache->device, total_assembly_size,
402                                      "pipeline shader assembly", true);
403   if (!bo) {
404      fprintf(stderr, "failed to allocate memory for shaders assembly\n");
405      goto fail;
406   }
407
408   bool ok = v3dv_bo_map(cache->device, bo, total_assembly_size);
409   if (!ok) {
410      fprintf(stderr, "failed to map source shader buffer\n");
411      goto fail;
412   }
413
414   memcpy(bo->map, total_assembly, total_assembly_size);
415
416   new_entry->assembly_bo = bo;
417
418   return new_entry;
419
420fail:
421   v3dv_pipeline_shared_data_unref(cache->device, new_entry);
422   return NULL;
423}
424
425static void
426pipeline_cache_upload_shared_data(struct v3dv_pipeline_cache *cache,
427                                  struct v3dv_pipeline_shared_data *shared_data,
428                                  bool from_disk_cache)
429{
430   assert(shared_data);
431
432   if (!cache || !cache->cache)
433      return;
434
435   if (cache->stats.count > V3DV_MAX_PIPELINE_CACHE_ENTRIES)
436      return;
437
438   pipeline_cache_lock(cache);
439   struct hash_entry *entry = NULL;
440
441   /* If this is being called from the disk cache, we already know that the
442    * entry is not on the hash table
443    */
444   if (!from_disk_cache)
445      entry = _mesa_hash_table_search(cache->cache, shared_data->sha1_key);
446
447   if (entry) {
448      pipeline_cache_unlock(cache);
449      return;
450   }
451
452   v3dv_pipeline_shared_data_ref(shared_data);
453   _mesa_hash_table_insert(cache->cache, shared_data->sha1_key, shared_data);
454   cache->stats.count++;
455   if (debug_cache) {
456      char sha1buf[41];
457      _mesa_sha1_format(sha1buf, shared_data->sha1_key);
458
459      fprintf(stderr, "pipeline cache %p, new cache entry with sha1 key %s:%p\n\n",
460              cache, sha1buf, shared_data);
461      if (dump_stats)
462         cache_dump_stats(cache);
463   }
464
465   pipeline_cache_unlock(cache);
466
467#ifdef ENABLE_SHADER_CACHE
468   /* If we are being called from a on-disk-cache hit, we can skip writing to
469    * the disk cache
470    */
471   if (from_disk_cache)
472      return;
473
474   struct v3dv_device *device = cache->device;
475   struct disk_cache *disk_cache = device->pdevice->disk_cache;
476   if (disk_cache) {
477      struct blob binary;
478      blob_init(&binary);
479      if (v3dv_pipeline_shared_data_write_to_blob(shared_data, &binary)) {
480         cache_key cache_key;
481         disk_cache_compute_key(disk_cache, shared_data->sha1_key, 20, cache_key);
482
483         if (unlikely(V3D_DEBUG & V3D_DEBUG_CACHE)) {
484            char sha1buf[41];
485            _mesa_sha1_format(sha1buf, shared_data->sha1_key);
486            fprintf(stderr, "[v3dv on-disk cache] storing %s\n", sha1buf);
487         }
488         disk_cache_put(disk_cache, cache_key, binary.data, binary.size, NULL);
489      }
490
491      blob_finish(&binary);
492   }
493#endif
494}
495
496/* Uploads all the "cacheable" or shared data from the pipeline */
497void
498v3dv_pipeline_cache_upload_pipeline(struct v3dv_pipeline *pipeline,
499                                    struct v3dv_pipeline_cache *cache)
500{
501   pipeline_cache_upload_shared_data(cache, pipeline->shared_data, false);
502}
503
504static struct serialized_nir*
505serialized_nir_create_from_blob(struct v3dv_pipeline_cache *cache,
506                                struct blob_reader *blob)
507{
508   const unsigned char *sha1_key = blob_read_bytes(blob, 20);
509   uint32_t snir_size = blob_read_uint32(blob);
510   const char* snir_data = blob_read_bytes(blob, snir_size);
511   if (blob->overrun)
512      return NULL;
513
514   struct serialized_nir *snir =
515      ralloc_size(cache->nir_cache, sizeof(*snir) + snir_size);
516   memcpy(snir->sha1_key, sha1_key, 20);
517   snir->size = snir_size;
518   memcpy(snir->data, snir_data, snir_size);
519
520   return snir;
521}
522
523static struct v3dv_shader_variant*
524shader_variant_create_from_blob(struct v3dv_device *device,
525                                struct blob_reader *blob)
526{
527   VkResult result;
528
529   enum broadcom_shader_stage stage = blob_read_uint32(blob);
530
531   uint32_t prog_data_size = blob_read_uint32(blob);
532   /* FIXME: as we include the stage perhaps we can avoid prog_data_size? */
533   assert(prog_data_size == v3d_prog_data_size(broadcom_shader_stage_to_gl(stage)));
534
535   const void *prog_data = blob_read_bytes(blob, prog_data_size);
536   if (blob->overrun)
537      return NULL;
538
539   uint32_t ulist_count = blob_read_uint32(blob);
540   uint32_t contents_size = sizeof(enum quniform_contents) * ulist_count;
541   const void *contents_data = blob_read_bytes(blob, contents_size);
542   if (blob->overrun)
543      return NULL;
544
545   uint ulist_data_size = sizeof(uint32_t) * ulist_count;
546   const void *ulist_data_data = blob_read_bytes(blob, ulist_data_size);
547   if (blob->overrun)
548      return NULL;
549
550   uint32_t assembly_offset = blob_read_uint32(blob);
551   uint32_t qpu_insts_size = blob_read_uint32(blob);
552
553   /* shader_variant_create expects a newly created prog_data for their own,
554    * as it is what the v3d compiler returns. So we are also allocating one
555    * (including the uniform list) and filled it up with the data that we read
556    * from the blob
557    */
558   struct v3d_prog_data *new_prog_data = rzalloc_size(NULL, prog_data_size);
559   memcpy(new_prog_data, prog_data, prog_data_size);
560   struct v3d_uniform_list *ulist = &new_prog_data->uniforms;
561   ulist->count = ulist_count;
562   ulist->contents = ralloc_array(new_prog_data, enum quniform_contents, ulist->count);
563   memcpy(ulist->contents, contents_data, contents_size);
564   ulist->data = ralloc_array(new_prog_data, uint32_t, ulist->count);
565   memcpy(ulist->data, ulist_data_data, ulist_data_size);
566
567   return v3dv_shader_variant_create(device, stage,
568                                     new_prog_data, prog_data_size,
569                                     assembly_offset,
570                                     NULL, qpu_insts_size,
571                                     &result);
572}
573
574static struct v3dv_pipeline_shared_data *
575v3dv_pipeline_shared_data_create_from_blob(struct v3dv_pipeline_cache *cache,
576                                           struct blob_reader *blob)
577{
578   const unsigned char *sha1_key = blob_read_bytes(blob, 20);
579
580   struct v3dv_descriptor_maps *maps[BROADCOM_SHADER_STAGES] = { 0 };
581   struct v3dv_shader_variant *variants[BROADCOM_SHADER_STAGES] = { 0 };
582
583   uint8_t descriptor_maps_count = blob_read_uint8(blob);
584   for (uint8_t count = 0; count < descriptor_maps_count; count++) {
585      uint8_t stage = blob_read_uint8(blob);
586
587      const struct v3dv_descriptor_maps *current_maps =
588         blob_read_bytes(blob, sizeof(struct v3dv_descriptor_maps));
589
590      if (blob->overrun)
591         goto fail;
592
593      maps[stage] = vk_zalloc2(&cache->device->vk.alloc, NULL,
594                               sizeof(struct v3dv_descriptor_maps), 8,
595                               VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
596
597      if (maps[stage] == NULL)
598         goto fail;
599
600      memcpy(maps[stage], current_maps, sizeof(struct v3dv_descriptor_maps));
601      if (broadcom_shader_stage_is_render_with_binning(stage)) {
602         enum broadcom_shader_stage bin_stage =
603            broadcom_binning_shader_stage_for_render_stage(stage);
604            maps[bin_stage] = maps[stage];
605      }
606   }
607
608   uint8_t variant_count = blob_read_uint8(blob);
609
610   for (uint8_t count = 0; count < variant_count; count++) {
611      uint8_t stage = blob_read_uint8(blob);
612      struct v3dv_shader_variant *variant =
613         shader_variant_create_from_blob(cache->device, blob);
614      variants[stage] = variant;
615   }
616
617   uint32_t total_assembly_size = blob_read_uint32(blob);
618   const uint64_t *total_assembly =
619      blob_read_bytes(blob, total_assembly_size);
620
621   if (blob->overrun)
622      goto fail;
623
624   struct v3dv_pipeline_shared_data *data =
625      v3dv_pipeline_shared_data_new(cache, sha1_key, maps, variants,
626                                    total_assembly, total_assembly_size);
627
628   if (!data)
629      goto fail;
630
631   return data;
632
633fail:
634   for (int i = 0; i < BROADCOM_SHADER_STAGES; i++) {
635      if (maps[i])
636         vk_free2(&cache->device->vk.alloc, NULL, maps[i]);
637      if (variants[i])
638         v3dv_shader_variant_destroy(cache->device, variants[i]);
639   }
640   return NULL;
641}
642
643static void
644pipeline_cache_load(struct v3dv_pipeline_cache *cache,
645                    size_t size,
646                    const void *data)
647{
648   struct v3dv_device *device = cache->device;
649   struct v3dv_physical_device *pdevice = &device->instance->physicalDevice;
650   struct vk_pipeline_cache_header header;
651
652   if (cache->cache == NULL || cache->nir_cache == NULL)
653      return;
654
655   struct blob_reader blob;
656   blob_reader_init(&blob, data, size);
657
658   blob_copy_bytes(&blob, &header, sizeof(header));
659   if (size < sizeof(header))
660      return;
661   memcpy(&header, data, sizeof(header));
662   if (header.header_size < sizeof(header))
663      return;
664   if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
665      return;
666   if (header.vendor_id != v3dv_physical_device_vendor_id(pdevice))
667      return;
668   if (header.device_id != v3dv_physical_device_device_id(pdevice))
669      return;
670   if (memcmp(header.uuid, pdevice->pipeline_cache_uuid, VK_UUID_SIZE) != 0)
671      return;
672
673   uint32_t nir_count = blob_read_uint32(&blob);
674   if (blob.overrun)
675      return;
676
677   for (uint32_t i = 0; i < nir_count; i++) {
678      struct serialized_nir *snir =
679         serialized_nir_create_from_blob(cache, &blob);
680
681      if (!snir)
682         break;
683
684      _mesa_hash_table_insert(cache->nir_cache, snir->sha1_key, snir);
685      cache->nir_stats.count++;
686   }
687
688   uint32_t count = blob_read_uint32(&blob);
689   if (blob.overrun)
690      return;
691
692   for (uint32_t i = 0; i < count; i++) {
693      struct v3dv_pipeline_shared_data *cache_entry =
694         v3dv_pipeline_shared_data_create_from_blob(cache, &blob);
695      if (!cache_entry)
696         break;
697
698      _mesa_hash_table_insert(cache->cache, cache_entry->sha1_key, cache_entry);
699      cache->stats.count++;
700   }
701
702   if (debug_cache) {
703      fprintf(stderr, "pipeline cache %p, loaded %i nir shaders and "
704              "%i entries\n", cache, nir_count, count);
705      if (dump_stats)
706         cache_dump_stats(cache);
707   }
708}
709
710VKAPI_ATTR VkResult VKAPI_CALL
711v3dv_CreatePipelineCache(VkDevice _device,
712                         const VkPipelineCacheCreateInfo *pCreateInfo,
713                         const VkAllocationCallbacks *pAllocator,
714                         VkPipelineCache *pPipelineCache)
715{
716   V3DV_FROM_HANDLE(v3dv_device, device, _device);
717   struct v3dv_pipeline_cache *cache;
718
719   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
720
721   cache = vk_object_zalloc(&device->vk, pAllocator,
722                            sizeof(*cache),
723                            VK_OBJECT_TYPE_PIPELINE_CACHE);
724
725   if (cache == NULL)
726      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
727
728   v3dv_pipeline_cache_init(cache, device, pCreateInfo->flags,
729                            device->instance->pipeline_cache_enabled);
730
731   if (pCreateInfo->initialDataSize > 0) {
732      pipeline_cache_load(cache,
733                          pCreateInfo->initialDataSize,
734                          pCreateInfo->pInitialData);
735   }
736
737   *pPipelineCache = v3dv_pipeline_cache_to_handle(cache);
738
739   return VK_SUCCESS;
740}
741
742void
743v3dv_pipeline_cache_finish(struct v3dv_pipeline_cache *cache)
744{
745   mtx_destroy(&cache->mutex);
746
747   if (dump_stats_on_destroy)
748      cache_dump_stats(cache);
749
750   if (cache->nir_cache) {
751      hash_table_foreach(cache->nir_cache, entry)
752         ralloc_free(entry->data);
753
754      _mesa_hash_table_destroy(cache->nir_cache, NULL);
755   }
756
757   if (cache->cache) {
758      hash_table_foreach(cache->cache, entry) {
759         struct v3dv_pipeline_shared_data *cache_entry = entry->data;
760         if (cache_entry)
761            v3dv_pipeline_shared_data_unref(cache->device, cache_entry);
762      }
763
764      _mesa_hash_table_destroy(cache->cache, NULL);
765   }
766}
767
768VKAPI_ATTR void VKAPI_CALL
769v3dv_DestroyPipelineCache(VkDevice _device,
770                          VkPipelineCache _cache,
771                          const VkAllocationCallbacks *pAllocator)
772{
773   V3DV_FROM_HANDLE(v3dv_device, device, _device);
774   V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
775
776   if (!cache)
777      return;
778
779   v3dv_pipeline_cache_finish(cache);
780
781   vk_object_free(&device->vk, pAllocator, cache);
782}
783
784VKAPI_ATTR VkResult VKAPI_CALL
785v3dv_MergePipelineCaches(VkDevice device,
786                         VkPipelineCache dstCache,
787                         uint32_t srcCacheCount,
788                         const VkPipelineCache *pSrcCaches)
789{
790   V3DV_FROM_HANDLE(v3dv_pipeline_cache, dst, dstCache);
791
792   if (!dst->cache || !dst->nir_cache)
793      return VK_SUCCESS;
794
795   for (uint32_t i = 0; i < srcCacheCount; i++) {
796      V3DV_FROM_HANDLE(v3dv_pipeline_cache, src, pSrcCaches[i]);
797      if (!src->cache || !src->nir_cache)
798         continue;
799
800      hash_table_foreach(src->nir_cache, entry) {
801         struct serialized_nir *src_snir = entry->data;
802         assert(src_snir);
803
804         if (_mesa_hash_table_search(dst->nir_cache, src_snir->sha1_key))
805            continue;
806
807         /* FIXME: we are using serialized nir shaders because they are
808          * convenient to create and store on the cache, but requires to do a
809          * copy here (and some other places) of the serialized NIR. Perhaps
810          * it would make sense to move to handle the NIR shaders with shared
811          * structures with ref counts, as the variants.
812          */
813         struct serialized_nir *snir_dst =
814            ralloc_size(dst->nir_cache, sizeof(*snir_dst) + src_snir->size);
815         memcpy(snir_dst->sha1_key, src_snir->sha1_key, 20);
816         snir_dst->size = src_snir->size;
817         memcpy(snir_dst->data, src_snir->data, src_snir->size);
818
819         _mesa_hash_table_insert(dst->nir_cache, snir_dst->sha1_key, snir_dst);
820         dst->nir_stats.count++;
821         if (debug_cache) {
822            char sha1buf[41];
823            _mesa_sha1_format(sha1buf, snir_dst->sha1_key);
824
825            fprintf(stderr, "pipeline cache %p, added nir entry %s "
826                    "from pipeline cache %p\n",
827                    dst, sha1buf, src);
828            if (dump_stats)
829               cache_dump_stats(dst);
830         }
831      }
832
833      hash_table_foreach(src->cache, entry) {
834         struct v3dv_pipeline_shared_data *cache_entry = entry->data;
835         assert(cache_entry);
836
837         if (_mesa_hash_table_search(dst->cache, cache_entry->sha1_key))
838            continue;
839
840         v3dv_pipeline_shared_data_ref(cache_entry);
841         _mesa_hash_table_insert(dst->cache, cache_entry->sha1_key, cache_entry);
842
843         dst->stats.count++;
844         if (debug_cache) {
845            char sha1buf[41];
846            _mesa_sha1_format(sha1buf, cache_entry->sha1_key);
847
848            fprintf(stderr, "pipeline cache %p, added entry %s "
849                    "from pipeline cache %p\n",
850                    dst, sha1buf, src);
851            if (dump_stats)
852               cache_dump_stats(dst);
853         }
854      }
855   }
856
857   return VK_SUCCESS;
858}
859
860static bool
861shader_variant_write_to_blob(const struct v3dv_shader_variant *variant,
862                             struct blob *blob)
863{
864   blob_write_uint32(blob, variant->stage);
865
866   blob_write_uint32(blob, variant->prog_data_size);
867   blob_write_bytes(blob, variant->prog_data.base, variant->prog_data_size);
868
869   struct v3d_uniform_list *ulist = &variant->prog_data.base->uniforms;
870   blob_write_uint32(blob, ulist->count);
871   blob_write_bytes(blob, ulist->contents, sizeof(enum quniform_contents) * ulist->count);
872   blob_write_bytes(blob, ulist->data, sizeof(uint32_t) * ulist->count);
873
874   blob_write_uint32(blob, variant->assembly_offset);
875   blob_write_uint32(blob, variant->qpu_insts_size);
876
877   return !blob->out_of_memory;
878}
879
880static bool
881v3dv_pipeline_shared_data_write_to_blob(const struct v3dv_pipeline_shared_data *cache_entry,
882                                        struct blob *blob)
883{
884   blob_write_bytes(blob, cache_entry->sha1_key, 20);
885
886   uint8_t descriptor_maps_count = 0;
887   for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
888      if (broadcom_shader_stage_is_binning(stage))
889         continue;
890      if (cache_entry->maps[stage] == NULL)
891         continue;
892      descriptor_maps_count++;
893   }
894
895   /* Compute pipelines only have one descriptor map,
896    * graphics pipelines may have 2 (VS+FS) or 3 (VS+GS+FS), since the binning
897    * stages take the descriptor map from the render stage.
898    */
899   assert((descriptor_maps_count >= 2 && descriptor_maps_count <= 3) ||
900          (descriptor_maps_count == 1 && cache_entry->variants[BROADCOM_SHADER_COMPUTE]));
901   blob_write_uint8(blob, descriptor_maps_count);
902
903   for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
904      if (cache_entry->maps[stage] == NULL)
905         continue;
906      if (broadcom_shader_stage_is_binning(stage))
907         continue;
908
909      blob_write_uint8(blob, stage);
910      blob_write_bytes(blob, cache_entry->maps[stage],
911                       sizeof(struct v3dv_descriptor_maps));
912   }
913
914   uint8_t variant_count = 0;
915   for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
916      if (cache_entry->variants[stage] == NULL)
917         continue;
918      variant_count++;
919   }
920
921   /* Graphics pipelines with VS+FS have 3 variants, VS+GS+FS will have 5 and
922    * compute pipelines only have 1.
923    */
924   assert((variant_count == 5  || variant_count == 3) ||
925          (variant_count == 1 && cache_entry->variants[BROADCOM_SHADER_COMPUTE]));
926   blob_write_uint8(blob, variant_count);
927
928   uint32_t total_assembly_size = 0;
929   for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
930      if (cache_entry->variants[stage] == NULL)
931         continue;
932
933      blob_write_uint8(blob, stage);
934      if (!shader_variant_write_to_blob(cache_entry->variants[stage], blob))
935         return false;
936
937      total_assembly_size += cache_entry->variants[stage]->qpu_insts_size;
938   }
939   blob_write_uint32(blob, total_assembly_size);
940
941   assert(cache_entry->assembly_bo->map);
942   assert(cache_entry->assembly_bo->size >= total_assembly_size);
943   blob_write_bytes(blob, cache_entry->assembly_bo->map, total_assembly_size);
944
945   return !blob->out_of_memory;
946}
947
948
949VKAPI_ATTR VkResult VKAPI_CALL
950v3dv_GetPipelineCacheData(VkDevice _device,
951                          VkPipelineCache _cache,
952                          size_t *pDataSize,
953                          void *pData)
954{
955   V3DV_FROM_HANDLE(v3dv_device, device, _device);
956   V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
957
958   struct blob blob;
959   if (pData) {
960      blob_init_fixed(&blob, pData, *pDataSize);
961   } else {
962      blob_init_fixed(&blob, NULL, SIZE_MAX);
963   }
964
965   struct v3dv_physical_device *pdevice = &device->instance->physicalDevice;
966   VkResult result = VK_INCOMPLETE;
967
968   pipeline_cache_lock(cache);
969
970   struct vk_pipeline_cache_header header = {
971      .header_size = sizeof(struct vk_pipeline_cache_header),
972      .header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE,
973      .vendor_id = v3dv_physical_device_vendor_id(pdevice),
974      .device_id = v3dv_physical_device_device_id(pdevice),
975   };
976   memcpy(header.uuid, pdevice->pipeline_cache_uuid, VK_UUID_SIZE);
977   blob_write_bytes(&blob, &header, sizeof(header));
978
979   uint32_t nir_count = 0;
980   intptr_t nir_count_offset = blob_reserve_uint32(&blob);
981   if (nir_count_offset < 0) {
982      *pDataSize = 0;
983      goto done;
984   }
985
986   if (cache->nir_cache) {
987      hash_table_foreach(cache->nir_cache, entry) {
988         const struct serialized_nir *snir = entry->data;
989
990         size_t save_size = blob.size;
991
992         blob_write_bytes(&blob, snir->sha1_key, 20);
993         blob_write_uint32(&blob, snir->size);
994         blob_write_bytes(&blob, snir->data, snir->size);
995
996         if (blob.out_of_memory) {
997            blob.size = save_size;
998            goto done;
999         }
1000
1001         nir_count++;
1002      }
1003   }
1004   blob_overwrite_uint32(&blob, nir_count_offset, nir_count);
1005
1006   uint32_t count = 0;
1007   intptr_t count_offset = blob_reserve_uint32(&blob);
1008   if (count_offset < 0) {
1009      *pDataSize = 0;
1010      goto done;
1011   }
1012
1013   if (cache->cache) {
1014      hash_table_foreach(cache->cache, entry) {
1015         struct v3dv_pipeline_shared_data *cache_entry = entry->data;
1016
1017         size_t save_size = blob.size;
1018         if (!v3dv_pipeline_shared_data_write_to_blob(cache_entry, &blob)) {
1019            /* If it fails reset to the previous size and bail */
1020            blob.size = save_size;
1021            goto done;
1022         }
1023
1024         count++;
1025      }
1026   }
1027
1028   blob_overwrite_uint32(&blob, count_offset, count);
1029
1030   *pDataSize = blob.size;
1031
1032   result = VK_SUCCESS;
1033
1034   if (debug_cache) {
1035      assert(count <= cache->stats.count);
1036      fprintf(stderr, "GetPipelineCacheData: serializing cache %p, "
1037              "%i nir shader entries "
1038              "%i entries, %u DataSize\n",
1039              cache, nir_count, count, (uint32_t) *pDataSize);
1040   }
1041
1042 done:
1043   blob_finish(&blob);
1044
1045   pipeline_cache_unlock(cache);
1046
1047   return result;
1048}
1049