1/*
2 * Copyright © 2019 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include <string.h>
25#include <stdlib.h>
26#include <assert.h>
27
28#include <vulkan/vulkan.h>
29#include <vulkan/vk_layer.h>
30
31#include "git_sha1.h"
32
33#include "imgui.h"
34
35#include "overlay_params.h"
36
37#include "util/debug.h"
38#include "util/hash_table.h"
39#include "util/list.h"
40#include "util/ralloc.h"
41#include "util/os_time.h"
42#include "util/os_socket.h"
43#include "util/simple_mtx.h"
44#include "util/u_math.h"
45
46#include "vk_enum_to_str.h"
47#include "vk_dispatch_table.h"
48#include "vk_util.h"
49
50/* Mapped from VkInstace/VkPhysicalDevice */
51struct instance_data {
52   struct vk_instance_dispatch_table vtable;
53   struct vk_physical_device_dispatch_table pd_vtable;
54   VkInstance instance;
55
56   struct overlay_params params;
57   bool pipeline_statistics_enabled;
58
59   bool first_line_printed;
60
61   int control_client;
62
63   /* Dumping of frame stats to a file has been enabled. */
64   bool capture_enabled;
65
66   /* Dumping of frame stats to a file has been enabled and started. */
67   bool capture_started;
68};
69
70struct frame_stat {
71   uint64_t stats[OVERLAY_PARAM_ENABLED_MAX];
72};
73
74/* Mapped from VkDevice */
75struct queue_data;
76struct device_data {
77   struct instance_data *instance;
78
79   PFN_vkSetDeviceLoaderData set_device_loader_data;
80
81   struct vk_device_dispatch_table vtable;
82   VkPhysicalDevice physical_device;
83   VkDevice device;
84
85   VkPhysicalDeviceProperties properties;
86
87   struct queue_data *graphic_queue;
88
89   struct queue_data **queues;
90   uint32_t n_queues;
91
92   /* For a single frame */
93   struct frame_stat frame_stats;
94};
95
96/* Mapped from VkCommandBuffer */
97struct command_buffer_data {
98   struct device_data *device;
99
100   VkCommandBufferLevel level;
101
102   VkCommandBuffer cmd_buffer;
103   VkQueryPool pipeline_query_pool;
104   VkQueryPool timestamp_query_pool;
105   uint32_t query_index;
106
107   struct frame_stat stats;
108
109   struct list_head link; /* link into queue_data::running_command_buffer */
110};
111
112/* Mapped from VkQueue */
113struct queue_data {
114   struct device_data *device;
115
116   VkQueue queue;
117   VkQueueFlags flags;
118   uint32_t family_index;
119   uint64_t timestamp_mask;
120
121   VkFence queries_fence;
122
123   struct list_head running_command_buffer;
124};
125
126struct overlay_draw {
127   struct list_head link;
128
129   VkCommandBuffer command_buffer;
130
131   VkSemaphore cross_engine_semaphore;
132
133   VkSemaphore semaphore;
134   VkFence fence;
135
136   VkBuffer vertex_buffer;
137   VkDeviceMemory vertex_buffer_mem;
138   VkDeviceSize vertex_buffer_size;
139
140   VkBuffer index_buffer;
141   VkDeviceMemory index_buffer_mem;
142   VkDeviceSize index_buffer_size;
143};
144
145/* Mapped from VkSwapchainKHR */
146struct swapchain_data {
147   struct device_data *device;
148
149   VkSwapchainKHR swapchain;
150   unsigned width, height;
151   VkFormat format;
152
153   uint32_t n_images;
154   VkImage *images;
155   VkImageView *image_views;
156   VkFramebuffer *framebuffers;
157
158   VkRenderPass render_pass;
159
160   VkDescriptorPool descriptor_pool;
161   VkDescriptorSetLayout descriptor_layout;
162   VkDescriptorSet descriptor_set;
163
164   VkSampler font_sampler;
165
166   VkPipelineLayout pipeline_layout;
167   VkPipeline pipeline;
168
169   VkCommandPool command_pool;
170
171   struct list_head draws; /* List of struct overlay_draw */
172
173   bool font_uploaded;
174   VkImage font_image;
175   VkImageView font_image_view;
176   VkDeviceMemory font_mem;
177   VkBuffer upload_font_buffer;
178   VkDeviceMemory upload_font_buffer_mem;
179
180   /**/
181   ImGuiContext* imgui_context;
182   ImVec2 window_size;
183
184   /**/
185   uint64_t n_frames;
186   uint64_t last_present_time;
187
188   unsigned n_frames_since_update;
189   uint64_t last_fps_update;
190   double fps;
191
192   enum overlay_param_enabled stat_selector;
193   double time_dividor;
194   struct frame_stat stats_min, stats_max;
195   struct frame_stat frames_stats[200];
196
197   /* Over a single frame */
198   struct frame_stat frame_stats;
199
200   /* Over fps_sampling_period */
201   struct frame_stat accumulated_stats;
202};
203
204static const VkQueryPipelineStatisticFlags overlay_query_flags =
205   VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_VERTICES_BIT |
206   VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT |
207   VK_QUERY_PIPELINE_STATISTIC_VERTEX_SHADER_INVOCATIONS_BIT |
208   VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT |
209   VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT |
210   VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT |
211   VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT |
212   VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT |
213   VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_CONTROL_SHADER_PATCHES_BIT |
214   VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_EVALUATION_SHADER_INVOCATIONS_BIT |
215   VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT;
216#define OVERLAY_QUERY_COUNT (11)
217
218static struct hash_table_u64 *vk_object_to_data = NULL;
219static simple_mtx_t vk_object_to_data_mutex = _SIMPLE_MTX_INITIALIZER_NP;
220
221thread_local ImGuiContext* __MesaImGui;
222
223static inline void ensure_vk_object_map(void)
224{
225   if (!vk_object_to_data)
226      vk_object_to_data = _mesa_hash_table_u64_create(NULL);
227}
228
229#define HKEY(obj) ((uint64_t)(obj))
230#define FIND(type, obj) ((type *)find_object_data(HKEY(obj)))
231
232static void *find_object_data(uint64_t obj)
233{
234   simple_mtx_lock(&vk_object_to_data_mutex);
235   ensure_vk_object_map();
236   void *data = _mesa_hash_table_u64_search(vk_object_to_data, obj);
237   simple_mtx_unlock(&vk_object_to_data_mutex);
238   return data;
239}
240
241static void map_object(uint64_t obj, void *data)
242{
243   simple_mtx_lock(&vk_object_to_data_mutex);
244   ensure_vk_object_map();
245   _mesa_hash_table_u64_insert(vk_object_to_data, obj, data);
246   simple_mtx_unlock(&vk_object_to_data_mutex);
247}
248
249static void unmap_object(uint64_t obj)
250{
251   simple_mtx_lock(&vk_object_to_data_mutex);
252   _mesa_hash_table_u64_remove(vk_object_to_data, obj);
253   simple_mtx_unlock(&vk_object_to_data_mutex);
254}
255
256/**/
257
258#define VK_CHECK(expr) \
259   do { \
260      VkResult __result = (expr); \
261      if (__result != VK_SUCCESS) { \
262         fprintf(stderr, "'%s' line %i failed with %s\n", \
263                 #expr, __LINE__, vk_Result_to_str(__result)); \
264      } \
265   } while (0)
266
267/**/
268
269static VkLayerInstanceCreateInfo *get_instance_chain_info(const VkInstanceCreateInfo *pCreateInfo,
270                                                          VkLayerFunction func)
271{
272   vk_foreach_struct_const(item, pCreateInfo->pNext) {
273      if (item->sType == VK_STRUCTURE_TYPE_LOADER_INSTANCE_CREATE_INFO &&
274          ((VkLayerInstanceCreateInfo *) item)->function == func)
275         return (VkLayerInstanceCreateInfo *) item;
276   }
277   unreachable("instance chain info not found");
278   return NULL;
279}
280
281static VkLayerDeviceCreateInfo *get_device_chain_info(const VkDeviceCreateInfo *pCreateInfo,
282                                                      VkLayerFunction func)
283{
284   vk_foreach_struct_const(item, pCreateInfo->pNext) {
285      if (item->sType == VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO &&
286          ((VkLayerDeviceCreateInfo *) item)->function == func)
287         return (VkLayerDeviceCreateInfo *)item;
288   }
289   unreachable("device chain info not found");
290   return NULL;
291}
292
293static struct VkBaseOutStructure *
294clone_chain(const struct VkBaseInStructure *chain)
295{
296   struct VkBaseOutStructure *head = NULL, *tail = NULL;
297
298   vk_foreach_struct_const(item, chain) {
299      size_t item_size = vk_structure_type_size(item);
300      struct VkBaseOutStructure *new_item =
301         (struct VkBaseOutStructure *)malloc(item_size);;
302
303      memcpy(new_item, item, item_size);
304
305      if (!head)
306         head = new_item;
307      if (tail)
308         tail->pNext = new_item;
309      tail = new_item;
310   }
311
312   return head;
313}
314
315static void
316free_chain(struct VkBaseOutStructure *chain)
317{
318   while (chain) {
319      void *node = chain;
320      chain = chain->pNext;
321      free(node);
322   }
323}
324
325/**/
326
327static struct instance_data *new_instance_data(VkInstance instance)
328{
329   struct instance_data *data = rzalloc(NULL, struct instance_data);
330   data->instance = instance;
331   data->control_client = -1;
332   map_object(HKEY(data->instance), data);
333   return data;
334}
335
336static void destroy_instance_data(struct instance_data *data)
337{
338   if (data->params.output_file)
339      fclose(data->params.output_file);
340   if (data->params.control >= 0)
341      os_socket_close(data->params.control);
342   unmap_object(HKEY(data->instance));
343   ralloc_free(data);
344}
345
346static void instance_data_map_physical_devices(struct instance_data *instance_data,
347                                               bool map)
348{
349   uint32_t physicalDeviceCount = 0;
350   instance_data->vtable.EnumeratePhysicalDevices(instance_data->instance,
351                                                  &physicalDeviceCount,
352                                                  NULL);
353
354   VkPhysicalDevice *physicalDevices = (VkPhysicalDevice *) malloc(sizeof(VkPhysicalDevice) * physicalDeviceCount);
355   instance_data->vtable.EnumeratePhysicalDevices(instance_data->instance,
356                                                  &physicalDeviceCount,
357                                                  physicalDevices);
358
359   for (uint32_t i = 0; i < physicalDeviceCount; i++) {
360      if (map)
361         map_object(HKEY(physicalDevices[i]), instance_data);
362      else
363         unmap_object(HKEY(physicalDevices[i]));
364   }
365
366   free(physicalDevices);
367}
368
369/**/
370static struct device_data *new_device_data(VkDevice device, struct instance_data *instance)
371{
372   struct device_data *data = rzalloc(NULL, struct device_data);
373   data->instance = instance;
374   data->device = device;
375   map_object(HKEY(data->device), data);
376   return data;
377}
378
379static struct queue_data *new_queue_data(VkQueue queue,
380                                         const VkQueueFamilyProperties *family_props,
381                                         uint32_t family_index,
382                                         struct device_data *device_data)
383{
384   struct queue_data *data = rzalloc(device_data, struct queue_data);
385   data->device = device_data;
386   data->queue = queue;
387   data->flags = family_props->queueFlags;
388   data->timestamp_mask = (1ull << family_props->timestampValidBits) - 1;
389   data->family_index = family_index;
390   list_inithead(&data->running_command_buffer);
391   map_object(HKEY(data->queue), data);
392
393   /* Fence synchronizing access to queries on that queue. */
394   VkFenceCreateInfo fence_info = {};
395   fence_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
396   fence_info.flags = VK_FENCE_CREATE_SIGNALED_BIT;
397   VK_CHECK(device_data->vtable.CreateFence(device_data->device,
398                                            &fence_info,
399                                            NULL,
400                                            &data->queries_fence));
401
402   if (data->flags & VK_QUEUE_GRAPHICS_BIT)
403      device_data->graphic_queue = data;
404
405   return data;
406}
407
408static void destroy_queue(struct queue_data *data)
409{
410   struct device_data *device_data = data->device;
411   device_data->vtable.DestroyFence(device_data->device, data->queries_fence, NULL);
412   unmap_object(HKEY(data->queue));
413   ralloc_free(data);
414}
415
416static void device_map_queues(struct device_data *data,
417                              const VkDeviceCreateInfo *pCreateInfo)
418{
419   for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++)
420      data->n_queues += pCreateInfo->pQueueCreateInfos[i].queueCount;
421   data->queues = ralloc_array(data, struct queue_data *, data->n_queues);
422
423   struct instance_data *instance_data = data->instance;
424   uint32_t n_family_props;
425   instance_data->pd_vtable.GetPhysicalDeviceQueueFamilyProperties(data->physical_device,
426                                                                   &n_family_props,
427                                                                   NULL);
428   VkQueueFamilyProperties *family_props =
429      (VkQueueFamilyProperties *)malloc(sizeof(VkQueueFamilyProperties) * n_family_props);
430   instance_data->pd_vtable.GetPhysicalDeviceQueueFamilyProperties(data->physical_device,
431                                                                   &n_family_props,
432                                                                   family_props);
433
434   uint32_t queue_index = 0;
435   for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
436      for (uint32_t j = 0; j < pCreateInfo->pQueueCreateInfos[i].queueCount; j++) {
437         VkQueue queue;
438         data->vtable.GetDeviceQueue(data->device,
439                                     pCreateInfo->pQueueCreateInfos[i].queueFamilyIndex,
440                                     j, &queue);
441
442         VK_CHECK(data->set_device_loader_data(data->device, queue));
443
444         data->queues[queue_index++] =
445            new_queue_data(queue, &family_props[pCreateInfo->pQueueCreateInfos[i].queueFamilyIndex],
446                           pCreateInfo->pQueueCreateInfos[i].queueFamilyIndex, data);
447      }
448   }
449
450   free(family_props);
451}
452
453static void device_unmap_queues(struct device_data *data)
454{
455   for (uint32_t i = 0; i < data->n_queues; i++)
456      destroy_queue(data->queues[i]);
457}
458
459static void destroy_device_data(struct device_data *data)
460{
461   unmap_object(HKEY(data->device));
462   ralloc_free(data);
463}
464
465/**/
466static struct command_buffer_data *new_command_buffer_data(VkCommandBuffer cmd_buffer,
467                                                           VkCommandBufferLevel level,
468                                                           VkQueryPool pipeline_query_pool,
469                                                           VkQueryPool timestamp_query_pool,
470                                                           uint32_t query_index,
471                                                           struct device_data *device_data)
472{
473   struct command_buffer_data *data = rzalloc(NULL, struct command_buffer_data);
474   data->device = device_data;
475   data->cmd_buffer = cmd_buffer;
476   data->level = level;
477   data->pipeline_query_pool = pipeline_query_pool;
478   data->timestamp_query_pool = timestamp_query_pool;
479   data->query_index = query_index;
480   list_inithead(&data->link);
481   map_object(HKEY(data->cmd_buffer), data);
482   return data;
483}
484
485static void destroy_command_buffer_data(struct command_buffer_data *data)
486{
487   unmap_object(HKEY(data->cmd_buffer));
488   list_delinit(&data->link);
489   ralloc_free(data);
490}
491
492/**/
493static struct swapchain_data *new_swapchain_data(VkSwapchainKHR swapchain,
494                                                 struct device_data *device_data)
495{
496   struct instance_data *instance_data = device_data->instance;
497   struct swapchain_data *data = rzalloc(NULL, struct swapchain_data);
498   data->device = device_data;
499   data->swapchain = swapchain;
500   data->window_size = ImVec2(instance_data->params.width, instance_data->params.height);
501   list_inithead(&data->draws);
502   map_object(HKEY(data->swapchain), data);
503   return data;
504}
505
506static void destroy_swapchain_data(struct swapchain_data *data)
507{
508   unmap_object(HKEY(data->swapchain));
509   ralloc_free(data);
510}
511
512struct overlay_draw *get_overlay_draw(struct swapchain_data *data)
513{
514   struct device_data *device_data = data->device;
515   struct overlay_draw *draw = list_is_empty(&data->draws) ?
516      NULL : list_first_entry(&data->draws, struct overlay_draw, link);
517
518   VkSemaphoreCreateInfo sem_info = {};
519   sem_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
520
521   if (draw && device_data->vtable.GetFenceStatus(device_data->device, draw->fence) == VK_SUCCESS) {
522      list_del(&draw->link);
523      VK_CHECK(device_data->vtable.ResetFences(device_data->device,
524                                               1, &draw->fence));
525      list_addtail(&draw->link, &data->draws);
526      return draw;
527   }
528
529   draw = rzalloc(data, struct overlay_draw);
530
531   VkCommandBufferAllocateInfo cmd_buffer_info = {};
532   cmd_buffer_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
533   cmd_buffer_info.commandPool = data->command_pool;
534   cmd_buffer_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
535   cmd_buffer_info.commandBufferCount = 1;
536   VK_CHECK(device_data->vtable.AllocateCommandBuffers(device_data->device,
537                                                       &cmd_buffer_info,
538                                                       &draw->command_buffer));
539   VK_CHECK(device_data->set_device_loader_data(device_data->device,
540                                                draw->command_buffer));
541
542
543   VkFenceCreateInfo fence_info = {};
544   fence_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
545   VK_CHECK(device_data->vtable.CreateFence(device_data->device,
546                                            &fence_info,
547                                            NULL,
548                                            &draw->fence));
549
550   VK_CHECK(device_data->vtable.CreateSemaphore(device_data->device, &sem_info,
551                                                NULL, &draw->semaphore));
552   VK_CHECK(device_data->vtable.CreateSemaphore(device_data->device, &sem_info,
553                                                NULL, &draw->cross_engine_semaphore));
554
555   list_addtail(&draw->link, &data->draws);
556
557   return draw;
558}
559
560static const char *param_unit(enum overlay_param_enabled param)
561{
562   switch (param) {
563   case OVERLAY_PARAM_ENABLED_frame_timing:
564   case OVERLAY_PARAM_ENABLED_acquire_timing:
565   case OVERLAY_PARAM_ENABLED_present_timing:
566      return "(us)";
567   case OVERLAY_PARAM_ENABLED_gpu_timing:
568      return "(ns)";
569   default:
570      return "";
571   }
572}
573
574static void parse_command(struct instance_data *instance_data,
575                          const char *cmd, unsigned cmdlen,
576                          const char *param, unsigned paramlen)
577{
578   if (!strncmp(cmd, "capture", cmdlen)) {
579      int value = atoi(param);
580      bool enabled = value > 0;
581
582      if (enabled) {
583         instance_data->capture_enabled = true;
584      } else {
585         instance_data->capture_enabled = false;
586         instance_data->capture_started = false;
587      }
588   }
589}
590
591#define BUFSIZE 4096
592
593/**
594 * This function will process commands through the control file.
595 *
596 * A command starts with a colon, followed by the command, and followed by an
597 * option '=' and a parameter.  It has to end with a semi-colon. A full command
598 * + parameter looks like:
599 *
600 *    :cmd=param;
601 */
602static void process_char(struct instance_data *instance_data, char c)
603{
604   static char cmd[BUFSIZE];
605   static char param[BUFSIZE];
606
607   static unsigned cmdpos = 0;
608   static unsigned parampos = 0;
609   static bool reading_cmd = false;
610   static bool reading_param = false;
611
612   switch (c) {
613   case ':':
614      cmdpos = 0;
615      parampos = 0;
616      reading_cmd = true;
617      reading_param = false;
618      break;
619   case ';':
620      if (!reading_cmd)
621         break;
622      cmd[cmdpos++] = '\0';
623      param[parampos++] = '\0';
624      parse_command(instance_data, cmd, cmdpos, param, parampos);
625      reading_cmd = false;
626      reading_param = false;
627      break;
628   case '=':
629      if (!reading_cmd)
630         break;
631      reading_param = true;
632      break;
633   default:
634      if (!reading_cmd)
635         break;
636
637      if (reading_param) {
638         /* overflow means an invalid parameter */
639         if (parampos >= BUFSIZE - 1) {
640            reading_cmd = false;
641            reading_param = false;
642            break;
643         }
644
645         param[parampos++] = c;
646      } else {
647         /* overflow means an invalid command */
648         if (cmdpos >= BUFSIZE - 1) {
649            reading_cmd = false;
650            break;
651         }
652
653         cmd[cmdpos++] = c;
654      }
655   }
656}
657
658static void control_send(struct instance_data *instance_data,
659                         const char *cmd, unsigned cmdlen,
660                         const char *param, unsigned paramlen)
661{
662   unsigned msglen = 0;
663   char buffer[BUFSIZE];
664
665   assert(cmdlen + paramlen + 3 < BUFSIZE);
666
667   buffer[msglen++] = ':';
668
669   memcpy(&buffer[msglen], cmd, cmdlen);
670   msglen += cmdlen;
671
672   if (paramlen > 0) {
673      buffer[msglen++] = '=';
674      memcpy(&buffer[msglen], param, paramlen);
675      msglen += paramlen;
676      buffer[msglen++] = ';';
677   }
678
679   os_socket_send(instance_data->control_client, buffer, msglen, 0);
680}
681
682static void control_send_connection_string(struct device_data *device_data)
683{
684   struct instance_data *instance_data = device_data->instance;
685
686   const char *controlVersionCmd = "MesaOverlayControlVersion";
687   const char *controlVersionString = "1";
688
689   control_send(instance_data, controlVersionCmd, strlen(controlVersionCmd),
690                controlVersionString, strlen(controlVersionString));
691
692   const char *deviceCmd = "DeviceName";
693   const char *deviceName = device_data->properties.deviceName;
694
695   control_send(instance_data, deviceCmd, strlen(deviceCmd),
696                deviceName, strlen(deviceName));
697
698   const char *mesaVersionCmd = "MesaVersion";
699   const char *mesaVersionString = "Mesa " PACKAGE_VERSION MESA_GIT_SHA1;
700
701   control_send(instance_data, mesaVersionCmd, strlen(mesaVersionCmd),
702                mesaVersionString, strlen(mesaVersionString));
703}
704
705static void control_client_check(struct device_data *device_data)
706{
707   struct instance_data *instance_data = device_data->instance;
708
709   /* Already connected, just return. */
710   if (instance_data->control_client >= 0)
711      return;
712
713   int socket = os_socket_accept(instance_data->params.control);
714   if (socket == -1) {
715      if (errno != EAGAIN && errno != EWOULDBLOCK && errno != ECONNABORTED)
716         fprintf(stderr, "ERROR on socket: %s\n", strerror(errno));
717      return;
718   }
719
720   if (socket >= 0) {
721      os_socket_block(socket, false);
722      instance_data->control_client = socket;
723      control_send_connection_string(device_data);
724   }
725}
726
727static void control_client_disconnected(struct instance_data *instance_data)
728{
729   os_socket_close(instance_data->control_client);
730   instance_data->control_client = -1;
731}
732
733static void process_control_socket(struct instance_data *instance_data)
734{
735   const int client = instance_data->control_client;
736   if (client >= 0) {
737      char buf[BUFSIZE];
738
739      while (true) {
740         ssize_t n = os_socket_recv(client, buf, BUFSIZE, 0);
741
742         if (n == -1) {
743            if (errno == EAGAIN || errno == EWOULDBLOCK) {
744               /* nothing to read, try again later */
745               break;
746            }
747
748            if (errno != ECONNRESET)
749               fprintf(stderr, "ERROR on connection: %s\n", strerror(errno));
750
751            control_client_disconnected(instance_data);
752         } else if (n == 0) {
753            /* recv() returns 0 when the client disconnects */
754            control_client_disconnected(instance_data);
755         }
756
757         for (ssize_t i = 0; i < n; i++) {
758            process_char(instance_data, buf[i]);
759         }
760
761         /* If we try to read BUFSIZE and receive BUFSIZE bytes from the
762          * socket, there's a good chance that there's still more data to be
763          * read, so we will try again. Otherwise, simply be done for this
764          * iteration and try again on the next frame.
765          */
766         if (n < BUFSIZE)
767            break;
768      }
769   }
770}
771
772static void snapshot_swapchain_frame(struct swapchain_data *data)
773{
774   struct device_data *device_data = data->device;
775   struct instance_data *instance_data = device_data->instance;
776   uint32_t f_idx = data->n_frames % ARRAY_SIZE(data->frames_stats);
777   uint64_t now = os_time_get(); /* us */
778
779   if (instance_data->params.control >= 0) {
780      control_client_check(device_data);
781      process_control_socket(instance_data);
782   }
783
784   if (data->last_present_time) {
785      data->frame_stats.stats[OVERLAY_PARAM_ENABLED_frame_timing] =
786         now - data->last_present_time;
787   }
788
789   memset(&data->frames_stats[f_idx], 0, sizeof(data->frames_stats[f_idx]));
790   for (int s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++) {
791      data->frames_stats[f_idx].stats[s] += device_data->frame_stats.stats[s] + data->frame_stats.stats[s];
792      data->accumulated_stats.stats[s] += device_data->frame_stats.stats[s] + data->frame_stats.stats[s];
793   }
794
795   /* If capture has been enabled but it hasn't started yet, it means we are on
796    * the first snapshot after it has been enabled. At this point we want to
797    * use the stats captured so far to update the display, but we don't want
798    * this data to cause noise to the stats that we want to capture from now
799    * on.
800    *
801    * capture_begin == true will trigger an update of the fps on display, and a
802    * flush of the data, but no stats will be written to the output file. This
803    * way, we will have only stats from after the capture has been enabled
804    * written to the output_file.
805    */
806   const bool capture_begin =
807      instance_data->capture_enabled && !instance_data->capture_started;
808
809   if (data->last_fps_update) {
810      double elapsed = (double)(now - data->last_fps_update); /* us */
811      if (capture_begin ||
812          elapsed >= instance_data->params.fps_sampling_period) {
813         data->fps = 1000000.0f * data->n_frames_since_update / elapsed;
814         if (instance_data->capture_started) {
815            if (!instance_data->first_line_printed) {
816               bool first_column = true;
817
818               instance_data->first_line_printed = true;
819
820#define OVERLAY_PARAM_BOOL(name) \
821               if (instance_data->params.enabled[OVERLAY_PARAM_ENABLED_##name]) { \
822                  fprintf(instance_data->params.output_file, \
823                          "%s%s%s", first_column ? "" : ", ", #name, \
824                          param_unit(OVERLAY_PARAM_ENABLED_##name)); \
825                  first_column = false; \
826               }
827#define OVERLAY_PARAM_CUSTOM(name)
828               OVERLAY_PARAMS
829#undef OVERLAY_PARAM_BOOL
830#undef OVERLAY_PARAM_CUSTOM
831               fprintf(instance_data->params.output_file, "\n");
832            }
833
834            for (int s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++) {
835               if (!instance_data->params.enabled[s])
836                  continue;
837               if (s == OVERLAY_PARAM_ENABLED_fps) {
838                  fprintf(instance_data->params.output_file,
839                          "%s%.2f", s == 0 ? "" : ", ", data->fps);
840               } else {
841                  fprintf(instance_data->params.output_file,
842                          "%s%" PRIu64, s == 0 ? "" : ", ",
843                          data->accumulated_stats.stats[s]);
844               }
845            }
846            fprintf(instance_data->params.output_file, "\n");
847            fflush(instance_data->params.output_file);
848         }
849
850         memset(&data->accumulated_stats, 0, sizeof(data->accumulated_stats));
851         data->n_frames_since_update = 0;
852         data->last_fps_update = now;
853
854         if (capture_begin)
855            instance_data->capture_started = true;
856      }
857   } else {
858      data->last_fps_update = now;
859   }
860
861   memset(&device_data->frame_stats, 0, sizeof(device_data->frame_stats));
862   memset(&data->frame_stats, 0, sizeof(device_data->frame_stats));
863
864   data->last_present_time = now;
865   data->n_frames++;
866   data->n_frames_since_update++;
867}
868
869static float get_time_stat(void *_data, int _idx)
870{
871   struct swapchain_data *data = (struct swapchain_data *) _data;
872   if ((ARRAY_SIZE(data->frames_stats) - _idx) > data->n_frames)
873      return 0.0f;
874   int idx = ARRAY_SIZE(data->frames_stats) +
875      data->n_frames < ARRAY_SIZE(data->frames_stats) ?
876      _idx - data->n_frames :
877      _idx + data->n_frames;
878   idx %= ARRAY_SIZE(data->frames_stats);
879   /* Time stats are in us. */
880   return data->frames_stats[idx].stats[data->stat_selector] / data->time_dividor;
881}
882
883static float get_stat(void *_data, int _idx)
884{
885   struct swapchain_data *data = (struct swapchain_data *) _data;
886   if ((ARRAY_SIZE(data->frames_stats) - _idx) > data->n_frames)
887      return 0.0f;
888   int idx = ARRAY_SIZE(data->frames_stats) +
889      data->n_frames < ARRAY_SIZE(data->frames_stats) ?
890      _idx - data->n_frames :
891      _idx + data->n_frames;
892   idx %= ARRAY_SIZE(data->frames_stats);
893   return data->frames_stats[idx].stats[data->stat_selector];
894}
895
896static void position_layer(struct swapchain_data *data)
897
898{
899   struct device_data *device_data = data->device;
900   struct instance_data *instance_data = device_data->instance;
901   const float margin = 10.0f;
902
903   ImGui::SetNextWindowBgAlpha(0.5);
904   ImGui::SetNextWindowSize(data->window_size, ImGuiCond_Always);
905   switch (instance_data->params.position) {
906   case LAYER_POSITION_TOP_LEFT:
907      ImGui::SetNextWindowPos(ImVec2(margin, margin), ImGuiCond_Always);
908      break;
909   case LAYER_POSITION_TOP_RIGHT:
910      ImGui::SetNextWindowPos(ImVec2(data->width - data->window_size.x - margin, margin),
911                              ImGuiCond_Always);
912      break;
913   case LAYER_POSITION_BOTTOM_LEFT:
914      ImGui::SetNextWindowPos(ImVec2(margin, data->height - data->window_size.y - margin),
915                              ImGuiCond_Always);
916      break;
917   case LAYER_POSITION_BOTTOM_RIGHT:
918      ImGui::SetNextWindowPos(ImVec2(data->width - data->window_size.x - margin,
919                                     data->height - data->window_size.y - margin),
920                              ImGuiCond_Always);
921      break;
922   }
923}
924
925static void compute_swapchain_display(struct swapchain_data *data)
926{
927   struct device_data *device_data = data->device;
928   struct instance_data *instance_data = device_data->instance;
929
930   ImGui::SetCurrentContext(data->imgui_context);
931   ImGui::NewFrame();
932   position_layer(data);
933   ImGui::Begin("Mesa overlay");
934   if (instance_data->params.enabled[OVERLAY_PARAM_ENABLED_device])
935      ImGui::Text("Device: %s", device_data->properties.deviceName);
936
937   if (instance_data->params.enabled[OVERLAY_PARAM_ENABLED_format]) {
938      const char *format_name = vk_Format_to_str(data->format);
939      format_name = format_name ? (format_name + strlen("VK_FORMAT_")) : "unknown";
940      ImGui::Text("Swapchain format: %s", format_name);
941   }
942   if (instance_data->params.enabled[OVERLAY_PARAM_ENABLED_frame])
943      ImGui::Text("Frames: %" PRIu64, data->n_frames);
944   if (instance_data->params.enabled[OVERLAY_PARAM_ENABLED_fps])
945      ImGui::Text("FPS: %.2f" , data->fps);
946
947   /* Recompute min/max */
948   for (uint32_t s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++) {
949      data->stats_min.stats[s] = UINT64_MAX;
950      data->stats_max.stats[s] = 0;
951   }
952   for (uint32_t f = 0; f < MIN2(data->n_frames, ARRAY_SIZE(data->frames_stats)); f++) {
953      for (uint32_t s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++) {
954         data->stats_min.stats[s] = MIN2(data->frames_stats[f].stats[s],
955                                         data->stats_min.stats[s]);
956         data->stats_max.stats[s] = MAX2(data->frames_stats[f].stats[s],
957                                         data->stats_max.stats[s]);
958      }
959   }
960   for (uint32_t s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++) {
961      assert(data->stats_min.stats[s] != UINT64_MAX);
962   }
963
964   for (uint32_t s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++) {
965      if (!instance_data->params.enabled[s] ||
966          s == OVERLAY_PARAM_ENABLED_device ||
967          s == OVERLAY_PARAM_ENABLED_format ||
968          s == OVERLAY_PARAM_ENABLED_fps ||
969          s == OVERLAY_PARAM_ENABLED_frame)
970         continue;
971
972      char hash[40];
973      snprintf(hash, sizeof(hash), "##%s", overlay_param_names[s]);
974      data->stat_selector = (enum overlay_param_enabled) s;
975      data->time_dividor = 1000.0f;
976      if (s == OVERLAY_PARAM_ENABLED_gpu_timing)
977         data->time_dividor = 1000000.0f;
978
979      if (s == OVERLAY_PARAM_ENABLED_frame_timing ||
980          s == OVERLAY_PARAM_ENABLED_acquire_timing ||
981          s == OVERLAY_PARAM_ENABLED_present_timing ||
982          s == OVERLAY_PARAM_ENABLED_gpu_timing) {
983         double min_time = data->stats_min.stats[s] / data->time_dividor;
984         double max_time = data->stats_max.stats[s] / data->time_dividor;
985         ImGui::PlotHistogram(hash, get_time_stat, data,
986                              ARRAY_SIZE(data->frames_stats), 0,
987                              NULL, min_time, max_time,
988                              ImVec2(ImGui::GetContentRegionAvailWidth(), 30));
989         ImGui::Text("%s: %.3fms [%.3f, %.3f]", overlay_param_names[s],
990                     get_time_stat(data, ARRAY_SIZE(data->frames_stats) - 1),
991                     min_time, max_time);
992      } else {
993         ImGui::PlotHistogram(hash, get_stat, data,
994                              ARRAY_SIZE(data->frames_stats), 0,
995                              NULL,
996                              data->stats_min.stats[s],
997                              data->stats_max.stats[s],
998                              ImVec2(ImGui::GetContentRegionAvailWidth(), 30));
999         ImGui::Text("%s: %.0f [%" PRIu64 ", %" PRIu64 "]", overlay_param_names[s],
1000                     get_stat(data, ARRAY_SIZE(data->frames_stats) - 1),
1001                     data->stats_min.stats[s], data->stats_max.stats[s]);
1002      }
1003   }
1004   data->window_size = ImVec2(data->window_size.x, ImGui::GetCursorPosY() + 10.0f);
1005   ImGui::End();
1006   ImGui::EndFrame();
1007   ImGui::Render();
1008}
1009
1010static uint32_t vk_memory_type(struct device_data *data,
1011                               VkMemoryPropertyFlags properties,
1012                               uint32_t type_bits)
1013{
1014    VkPhysicalDeviceMemoryProperties prop;
1015    data->instance->pd_vtable.GetPhysicalDeviceMemoryProperties(data->physical_device, &prop);
1016    for (uint32_t i = 0; i < prop.memoryTypeCount; i++)
1017        if ((prop.memoryTypes[i].propertyFlags & properties) == properties && type_bits & (1<<i))
1018            return i;
1019    return 0xFFFFFFFF; // Unable to find memoryType
1020}
1021
1022static void ensure_swapchain_fonts(struct swapchain_data *data,
1023                                   VkCommandBuffer command_buffer)
1024{
1025   if (data->font_uploaded)
1026      return;
1027
1028   data->font_uploaded = true;
1029
1030   struct device_data *device_data = data->device;
1031   ImGuiIO& io = ImGui::GetIO();
1032   unsigned char* pixels;
1033   int width, height;
1034   io.Fonts->GetTexDataAsRGBA32(&pixels, &width, &height);
1035   size_t upload_size = width * height * 4 * sizeof(char);
1036
1037   /* Upload buffer */
1038   VkBufferCreateInfo buffer_info = {};
1039   buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
1040   buffer_info.size = upload_size;
1041   buffer_info.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
1042   buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
1043   VK_CHECK(device_data->vtable.CreateBuffer(device_data->device, &buffer_info,
1044                                             NULL, &data->upload_font_buffer));
1045   VkMemoryRequirements upload_buffer_req;
1046   device_data->vtable.GetBufferMemoryRequirements(device_data->device,
1047                                                   data->upload_font_buffer,
1048                                                   &upload_buffer_req);
1049   VkMemoryAllocateInfo upload_alloc_info = {};
1050   upload_alloc_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
1051   upload_alloc_info.allocationSize = upload_buffer_req.size;
1052   upload_alloc_info.memoryTypeIndex = vk_memory_type(device_data,
1053                                                      VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
1054                                                      upload_buffer_req.memoryTypeBits);
1055   VK_CHECK(device_data->vtable.AllocateMemory(device_data->device,
1056                                               &upload_alloc_info,
1057                                               NULL,
1058                                               &data->upload_font_buffer_mem));
1059   VK_CHECK(device_data->vtable.BindBufferMemory(device_data->device,
1060                                                 data->upload_font_buffer,
1061                                                 data->upload_font_buffer_mem, 0));
1062
1063   /* Upload to Buffer */
1064   char* map = NULL;
1065   VK_CHECK(device_data->vtable.MapMemory(device_data->device,
1066                                          data->upload_font_buffer_mem,
1067                                          0, upload_size, 0, (void**)(&map)));
1068   memcpy(map, pixels, upload_size);
1069   VkMappedMemoryRange range[1] = {};
1070   range[0].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
1071   range[0].memory = data->upload_font_buffer_mem;
1072   range[0].size = upload_size;
1073   VK_CHECK(device_data->vtable.FlushMappedMemoryRanges(device_data->device, 1, range));
1074   device_data->vtable.UnmapMemory(device_data->device,
1075                                   data->upload_font_buffer_mem);
1076
1077   /* Copy buffer to image */
1078   VkImageMemoryBarrier copy_barrier[1] = {};
1079   copy_barrier[0].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
1080   copy_barrier[0].dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
1081   copy_barrier[0].oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
1082   copy_barrier[0].newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
1083   copy_barrier[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1084   copy_barrier[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1085   copy_barrier[0].image = data->font_image;
1086   copy_barrier[0].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
1087   copy_barrier[0].subresourceRange.levelCount = 1;
1088   copy_barrier[0].subresourceRange.layerCount = 1;
1089   device_data->vtable.CmdPipelineBarrier(command_buffer,
1090                                          VK_PIPELINE_STAGE_HOST_BIT,
1091                                          VK_PIPELINE_STAGE_TRANSFER_BIT,
1092                                          0, 0, NULL, 0, NULL,
1093                                          1, copy_barrier);
1094
1095   VkBufferImageCopy region = {};
1096   region.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
1097   region.imageSubresource.layerCount = 1;
1098   region.imageExtent.width = width;
1099   region.imageExtent.height = height;
1100   region.imageExtent.depth = 1;
1101   device_data->vtable.CmdCopyBufferToImage(command_buffer,
1102                                            data->upload_font_buffer,
1103                                            data->font_image,
1104                                            VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1105                                            1, &region);
1106
1107   VkImageMemoryBarrier use_barrier[1] = {};
1108   use_barrier[0].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
1109   use_barrier[0].srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
1110   use_barrier[0].dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
1111   use_barrier[0].oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
1112   use_barrier[0].newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
1113   use_barrier[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1114   use_barrier[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1115   use_barrier[0].image = data->font_image;
1116   use_barrier[0].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
1117   use_barrier[0].subresourceRange.levelCount = 1;
1118   use_barrier[0].subresourceRange.layerCount = 1;
1119   device_data->vtable.CmdPipelineBarrier(command_buffer,
1120                                          VK_PIPELINE_STAGE_TRANSFER_BIT,
1121                                          VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
1122                                          0,
1123                                          0, NULL,
1124                                          0, NULL,
1125                                          1, use_barrier);
1126
1127   /* Store our identifier */
1128   io.Fonts->TexID = (ImTextureID)(intptr_t)data->font_image;
1129}
1130
1131static void CreateOrResizeBuffer(struct device_data *data,
1132                                 VkBuffer *buffer,
1133                                 VkDeviceMemory *buffer_memory,
1134                                 VkDeviceSize *buffer_size,
1135                                 size_t new_size, VkBufferUsageFlagBits usage)
1136{
1137    if (*buffer != VK_NULL_HANDLE)
1138        data->vtable.DestroyBuffer(data->device, *buffer, NULL);
1139    if (*buffer_memory)
1140        data->vtable.FreeMemory(data->device, *buffer_memory, NULL);
1141
1142    VkBufferCreateInfo buffer_info = {};
1143    buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
1144    buffer_info.size = new_size;
1145    buffer_info.usage = usage;
1146    buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
1147    VK_CHECK(data->vtable.CreateBuffer(data->device, &buffer_info, NULL, buffer));
1148
1149    VkMemoryRequirements req;
1150    data->vtable.GetBufferMemoryRequirements(data->device, *buffer, &req);
1151    VkMemoryAllocateInfo alloc_info = {};
1152    alloc_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
1153    alloc_info.allocationSize = req.size;
1154    alloc_info.memoryTypeIndex =
1155       vk_memory_type(data, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, req.memoryTypeBits);
1156    VK_CHECK(data->vtable.AllocateMemory(data->device, &alloc_info, NULL, buffer_memory));
1157
1158    VK_CHECK(data->vtable.BindBufferMemory(data->device, *buffer, *buffer_memory, 0));
1159    *buffer_size = new_size;
1160}
1161
1162static struct overlay_draw *render_swapchain_display(struct swapchain_data *data,
1163                                                     struct queue_data *present_queue,
1164                                                     const VkSemaphore *wait_semaphores,
1165                                                     unsigned n_wait_semaphores,
1166                                                     unsigned image_index)
1167{
1168   ImDrawData* draw_data = ImGui::GetDrawData();
1169   if (draw_data->TotalVtxCount == 0)
1170      return NULL;
1171
1172   struct device_data *device_data = data->device;
1173   struct overlay_draw *draw = get_overlay_draw(data);
1174
1175   device_data->vtable.ResetCommandBuffer(draw->command_buffer, 0);
1176
1177   VkRenderPassBeginInfo render_pass_info = {};
1178   render_pass_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
1179   render_pass_info.renderPass = data->render_pass;
1180   render_pass_info.framebuffer = data->framebuffers[image_index];
1181   render_pass_info.renderArea.extent.width = data->width;
1182   render_pass_info.renderArea.extent.height = data->height;
1183
1184   VkCommandBufferBeginInfo buffer_begin_info = {};
1185   buffer_begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
1186
1187   device_data->vtable.BeginCommandBuffer(draw->command_buffer, &buffer_begin_info);
1188
1189   ensure_swapchain_fonts(data, draw->command_buffer);
1190
1191   /* Bounce the image to display back to color attachment layout for
1192    * rendering on top of it.
1193    */
1194   VkImageMemoryBarrier imb;
1195   imb.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
1196   imb.pNext = nullptr;
1197   imb.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
1198   imb.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
1199   imb.oldLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
1200   imb.newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
1201   imb.image = data->images[image_index];
1202   imb.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
1203   imb.subresourceRange.baseMipLevel = 0;
1204   imb.subresourceRange.levelCount = 1;
1205   imb.subresourceRange.baseArrayLayer = 0;
1206   imb.subresourceRange.layerCount = 1;
1207   imb.srcQueueFamilyIndex = present_queue->family_index;
1208   imb.dstQueueFamilyIndex = device_data->graphic_queue->family_index;
1209   device_data->vtable.CmdPipelineBarrier(draw->command_buffer,
1210                                          VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT,
1211                                          VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT,
1212                                          0,          /* dependency flags */
1213                                          0, nullptr, /* memory barriers */
1214                                          0, nullptr, /* buffer memory barriers */
1215                                          1, &imb);   /* image memory barriers */
1216
1217   device_data->vtable.CmdBeginRenderPass(draw->command_buffer, &render_pass_info,
1218                                          VK_SUBPASS_CONTENTS_INLINE);
1219
1220   /* Create/Resize vertex & index buffers */
1221   size_t vertex_size = ALIGN(draw_data->TotalVtxCount * sizeof(ImDrawVert), device_data->properties.limits.nonCoherentAtomSize);
1222   size_t index_size = ALIGN(draw_data->TotalIdxCount * sizeof(ImDrawIdx), device_data->properties.limits.nonCoherentAtomSize);
1223   if (draw->vertex_buffer_size < vertex_size) {
1224      CreateOrResizeBuffer(device_data,
1225                           &draw->vertex_buffer,
1226                           &draw->vertex_buffer_mem,
1227                           &draw->vertex_buffer_size,
1228                           vertex_size, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
1229   }
1230   if (draw->index_buffer_size < index_size) {
1231      CreateOrResizeBuffer(device_data,
1232                           &draw->index_buffer,
1233                           &draw->index_buffer_mem,
1234                           &draw->index_buffer_size,
1235                           index_size, VK_BUFFER_USAGE_INDEX_BUFFER_BIT);
1236   }
1237
1238    /* Upload vertex & index data */
1239    ImDrawVert* vtx_dst = NULL;
1240    ImDrawIdx* idx_dst = NULL;
1241    VK_CHECK(device_data->vtable.MapMemory(device_data->device, draw->vertex_buffer_mem,
1242                                           0, vertex_size, 0, (void**)(&vtx_dst)));
1243    VK_CHECK(device_data->vtable.MapMemory(device_data->device, draw->index_buffer_mem,
1244                                           0, index_size, 0, (void**)(&idx_dst)));
1245    for (int n = 0; n < draw_data->CmdListsCount; n++)
1246        {
1247           const ImDrawList* cmd_list = draw_data->CmdLists[n];
1248           memcpy(vtx_dst, cmd_list->VtxBuffer.Data, cmd_list->VtxBuffer.Size * sizeof(ImDrawVert));
1249           memcpy(idx_dst, cmd_list->IdxBuffer.Data, cmd_list->IdxBuffer.Size * sizeof(ImDrawIdx));
1250           vtx_dst += cmd_list->VtxBuffer.Size;
1251           idx_dst += cmd_list->IdxBuffer.Size;
1252        }
1253    VkMappedMemoryRange range[2] = {};
1254    range[0].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
1255    range[0].memory = draw->vertex_buffer_mem;
1256    range[0].size = VK_WHOLE_SIZE;
1257    range[1].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
1258    range[1].memory = draw->index_buffer_mem;
1259    range[1].size = VK_WHOLE_SIZE;
1260    VK_CHECK(device_data->vtable.FlushMappedMemoryRanges(device_data->device, 2, range));
1261    device_data->vtable.UnmapMemory(device_data->device, draw->vertex_buffer_mem);
1262    device_data->vtable.UnmapMemory(device_data->device, draw->index_buffer_mem);
1263
1264    /* Bind pipeline and descriptor sets */
1265    device_data->vtable.CmdBindPipeline(draw->command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, data->pipeline);
1266    VkDescriptorSet desc_set[1] = { data->descriptor_set };
1267    device_data->vtable.CmdBindDescriptorSets(draw->command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
1268                                              data->pipeline_layout, 0, 1, desc_set, 0, NULL);
1269
1270    /* Bind vertex & index buffers */
1271    VkBuffer vertex_buffers[1] = { draw->vertex_buffer };
1272    VkDeviceSize vertex_offset[1] = { 0 };
1273    device_data->vtable.CmdBindVertexBuffers(draw->command_buffer, 0, 1, vertex_buffers, vertex_offset);
1274    device_data->vtable.CmdBindIndexBuffer(draw->command_buffer, draw->index_buffer, 0, VK_INDEX_TYPE_UINT16);
1275
1276    /* Setup viewport */
1277    VkViewport viewport;
1278    viewport.x = 0;
1279    viewport.y = 0;
1280    viewport.width = draw_data->DisplaySize.x;
1281    viewport.height = draw_data->DisplaySize.y;
1282    viewport.minDepth = 0.0f;
1283    viewport.maxDepth = 1.0f;
1284    device_data->vtable.CmdSetViewport(draw->command_buffer, 0, 1, &viewport);
1285
1286
1287    /* Setup scale and translation through push constants :
1288     *
1289     * Our visible imgui space lies from draw_data->DisplayPos (top left) to
1290     * draw_data->DisplayPos+data_data->DisplaySize (bottom right). DisplayMin
1291     * is typically (0,0) for single viewport apps.
1292     */
1293    float scale[2];
1294    scale[0] = 2.0f / draw_data->DisplaySize.x;
1295    scale[1] = 2.0f / draw_data->DisplaySize.y;
1296    float translate[2];
1297    translate[0] = -1.0f - draw_data->DisplayPos.x * scale[0];
1298    translate[1] = -1.0f - draw_data->DisplayPos.y * scale[1];
1299    device_data->vtable.CmdPushConstants(draw->command_buffer, data->pipeline_layout,
1300                                         VK_SHADER_STAGE_VERTEX_BIT,
1301                                         sizeof(float) * 0, sizeof(float) * 2, scale);
1302    device_data->vtable.CmdPushConstants(draw->command_buffer, data->pipeline_layout,
1303                                         VK_SHADER_STAGE_VERTEX_BIT,
1304                                         sizeof(float) * 2, sizeof(float) * 2, translate);
1305
1306    // Render the command lists:
1307    int vtx_offset = 0;
1308    int idx_offset = 0;
1309    ImVec2 display_pos = draw_data->DisplayPos;
1310    for (int n = 0; n < draw_data->CmdListsCount; n++)
1311    {
1312        const ImDrawList* cmd_list = draw_data->CmdLists[n];
1313        for (int cmd_i = 0; cmd_i < cmd_list->CmdBuffer.Size; cmd_i++)
1314        {
1315            const ImDrawCmd* pcmd = &cmd_list->CmdBuffer[cmd_i];
1316            // Apply scissor/clipping rectangle
1317            // FIXME: We could clamp width/height based on clamped min/max values.
1318            VkRect2D scissor;
1319            scissor.offset.x = (int32_t)(pcmd->ClipRect.x - display_pos.x) > 0 ? (int32_t)(pcmd->ClipRect.x - display_pos.x) : 0;
1320            scissor.offset.y = (int32_t)(pcmd->ClipRect.y - display_pos.y) > 0 ? (int32_t)(pcmd->ClipRect.y - display_pos.y) : 0;
1321            scissor.extent.width = (uint32_t)(pcmd->ClipRect.z - pcmd->ClipRect.x);
1322            scissor.extent.height = (uint32_t)(pcmd->ClipRect.w - pcmd->ClipRect.y + 1); // FIXME: Why +1 here?
1323            device_data->vtable.CmdSetScissor(draw->command_buffer, 0, 1, &scissor);
1324
1325            // Draw
1326            device_data->vtable.CmdDrawIndexed(draw->command_buffer, pcmd->ElemCount, 1, idx_offset, vtx_offset, 0);
1327
1328            idx_offset += pcmd->ElemCount;
1329        }
1330        vtx_offset += cmd_list->VtxBuffer.Size;
1331    }
1332
1333   device_data->vtable.CmdEndRenderPass(draw->command_buffer);
1334
1335   if (device_data->graphic_queue->family_index != present_queue->family_index)
1336   {
1337      /* Transfer the image back to the present queue family
1338       * image layout was already changed to present by the render pass
1339       */
1340      imb.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
1341      imb.pNext = nullptr;
1342      imb.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
1343      imb.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
1344      imb.oldLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
1345      imb.newLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
1346      imb.image = data->images[image_index];
1347      imb.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
1348      imb.subresourceRange.baseMipLevel = 0;
1349      imb.subresourceRange.levelCount = 1;
1350      imb.subresourceRange.baseArrayLayer = 0;
1351      imb.subresourceRange.layerCount = 1;
1352      imb.srcQueueFamilyIndex = device_data->graphic_queue->family_index;
1353      imb.dstQueueFamilyIndex = present_queue->family_index;
1354      device_data->vtable.CmdPipelineBarrier(draw->command_buffer,
1355                                             VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT,
1356                                             VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT,
1357                                             0,          /* dependency flags */
1358                                             0, nullptr, /* memory barriers */
1359                                             0, nullptr, /* buffer memory barriers */
1360                                             1, &imb);   /* image memory barriers */
1361   }
1362
1363   device_data->vtable.EndCommandBuffer(draw->command_buffer);
1364
1365   /* When presenting on a different queue than where we're drawing the
1366    * overlay *AND* when the application does not provide a semaphore to
1367    * vkQueuePresent, insert our own cross engine synchronization
1368    * semaphore.
1369    */
1370   if (n_wait_semaphores == 0 && device_data->graphic_queue->queue != present_queue->queue) {
1371      VkPipelineStageFlags stages_wait = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
1372      VkSubmitInfo submit_info = {};
1373      submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
1374      submit_info.commandBufferCount = 0;
1375      submit_info.pWaitDstStageMask = &stages_wait;
1376      submit_info.waitSemaphoreCount = 0;
1377      submit_info.signalSemaphoreCount = 1;
1378      submit_info.pSignalSemaphores = &draw->cross_engine_semaphore;
1379
1380      device_data->vtable.QueueSubmit(present_queue->queue, 1, &submit_info, VK_NULL_HANDLE);
1381
1382      submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
1383      submit_info.commandBufferCount = 1;
1384      submit_info.pWaitDstStageMask = &stages_wait;
1385      submit_info.pCommandBuffers = &draw->command_buffer;
1386      submit_info.waitSemaphoreCount = 1;
1387      submit_info.pWaitSemaphores = &draw->cross_engine_semaphore;
1388      submit_info.signalSemaphoreCount = 1;
1389      submit_info.pSignalSemaphores = &draw->semaphore;
1390
1391      device_data->vtable.QueueSubmit(device_data->graphic_queue->queue, 1, &submit_info, draw->fence);
1392   } else {
1393      VkPipelineStageFlags *stages_wait = (VkPipelineStageFlags*) malloc(sizeof(VkPipelineStageFlags) * n_wait_semaphores);
1394      for (unsigned i = 0; i < n_wait_semaphores; i++)
1395      {
1396         // wait in the fragment stage until the swapchain image is ready
1397         stages_wait[i] = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
1398      }
1399
1400      VkSubmitInfo submit_info = {};
1401      submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
1402      submit_info.commandBufferCount = 1;
1403      submit_info.pCommandBuffers = &draw->command_buffer;
1404      submit_info.pWaitDstStageMask = stages_wait;
1405      submit_info.waitSemaphoreCount = n_wait_semaphores;
1406      submit_info.pWaitSemaphores = wait_semaphores;
1407      submit_info.signalSemaphoreCount = 1;
1408      submit_info.pSignalSemaphores = &draw->semaphore;
1409
1410      device_data->vtable.QueueSubmit(device_data->graphic_queue->queue, 1, &submit_info, draw->fence);
1411
1412      free(stages_wait);
1413   }
1414
1415   return draw;
1416}
1417
1418static const uint32_t overlay_vert_spv[] = {
1419#include "overlay.vert.spv.h"
1420};
1421static const uint32_t overlay_frag_spv[] = {
1422#include "overlay.frag.spv.h"
1423};
1424
1425static void setup_swapchain_data_pipeline(struct swapchain_data *data)
1426{
1427   struct device_data *device_data = data->device;
1428   VkShaderModule vert_module, frag_module;
1429
1430   /* Create shader modules */
1431   VkShaderModuleCreateInfo vert_info = {};
1432   vert_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
1433   vert_info.codeSize = sizeof(overlay_vert_spv);
1434   vert_info.pCode = overlay_vert_spv;
1435   VK_CHECK(device_data->vtable.CreateShaderModule(device_data->device,
1436                                                   &vert_info, NULL, &vert_module));
1437   VkShaderModuleCreateInfo frag_info = {};
1438   frag_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
1439   frag_info.codeSize = sizeof(overlay_frag_spv);
1440   frag_info.pCode = (uint32_t*)overlay_frag_spv;
1441   VK_CHECK(device_data->vtable.CreateShaderModule(device_data->device,
1442                                                   &frag_info, NULL, &frag_module));
1443
1444   /* Font sampler */
1445   VkSamplerCreateInfo sampler_info = {};
1446   sampler_info.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO;
1447   sampler_info.magFilter = VK_FILTER_LINEAR;
1448   sampler_info.minFilter = VK_FILTER_LINEAR;
1449   sampler_info.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR;
1450   sampler_info.addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT;
1451   sampler_info.addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT;
1452   sampler_info.addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT;
1453   sampler_info.minLod = -1000;
1454   sampler_info.maxLod = 1000;
1455   sampler_info.maxAnisotropy = 1.0f;
1456   VK_CHECK(device_data->vtable.CreateSampler(device_data->device, &sampler_info,
1457                                              NULL, &data->font_sampler));
1458
1459   /* Descriptor pool */
1460   VkDescriptorPoolSize sampler_pool_size = {};
1461   sampler_pool_size.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
1462   sampler_pool_size.descriptorCount = 1;
1463   VkDescriptorPoolCreateInfo desc_pool_info = {};
1464   desc_pool_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
1465   desc_pool_info.maxSets = 1;
1466   desc_pool_info.poolSizeCount = 1;
1467   desc_pool_info.pPoolSizes = &sampler_pool_size;
1468   VK_CHECK(device_data->vtable.CreateDescriptorPool(device_data->device,
1469                                                     &desc_pool_info,
1470                                                     NULL, &data->descriptor_pool));
1471
1472   /* Descriptor layout */
1473   VkSampler sampler[1] = { data->font_sampler };
1474   VkDescriptorSetLayoutBinding binding[1] = {};
1475   binding[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
1476   binding[0].descriptorCount = 1;
1477   binding[0].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
1478   binding[0].pImmutableSamplers = sampler;
1479   VkDescriptorSetLayoutCreateInfo set_layout_info = {};
1480   set_layout_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
1481   set_layout_info.bindingCount = 1;
1482   set_layout_info.pBindings = binding;
1483   VK_CHECK(device_data->vtable.CreateDescriptorSetLayout(device_data->device,
1484                                                          &set_layout_info,
1485                                                          NULL, &data->descriptor_layout));
1486
1487   /* Descriptor set */
1488   VkDescriptorSetAllocateInfo alloc_info = {};
1489   alloc_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
1490   alloc_info.descriptorPool = data->descriptor_pool;
1491   alloc_info.descriptorSetCount = 1;
1492   alloc_info.pSetLayouts = &data->descriptor_layout;
1493   VK_CHECK(device_data->vtable.AllocateDescriptorSets(device_data->device,
1494                                                       &alloc_info,
1495                                                       &data->descriptor_set));
1496
1497   /* Constants: we are using 'vec2 offset' and 'vec2 scale' instead of a full
1498    * 3d projection matrix
1499    */
1500   VkPushConstantRange push_constants[1] = {};
1501   push_constants[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
1502   push_constants[0].offset = sizeof(float) * 0;
1503   push_constants[0].size = sizeof(float) * 4;
1504   VkPipelineLayoutCreateInfo layout_info = {};
1505   layout_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
1506   layout_info.setLayoutCount = 1;
1507   layout_info.pSetLayouts = &data->descriptor_layout;
1508   layout_info.pushConstantRangeCount = 1;
1509   layout_info.pPushConstantRanges = push_constants;
1510   VK_CHECK(device_data->vtable.CreatePipelineLayout(device_data->device,
1511                                                     &layout_info,
1512                                                     NULL, &data->pipeline_layout));
1513
1514   VkPipelineShaderStageCreateInfo stage[2] = {};
1515   stage[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
1516   stage[0].stage = VK_SHADER_STAGE_VERTEX_BIT;
1517   stage[0].module = vert_module;
1518   stage[0].pName = "main";
1519   stage[1].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
1520   stage[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT;
1521   stage[1].module = frag_module;
1522   stage[1].pName = "main";
1523
1524   VkVertexInputBindingDescription binding_desc[1] = {};
1525   binding_desc[0].stride = sizeof(ImDrawVert);
1526   binding_desc[0].inputRate = VK_VERTEX_INPUT_RATE_VERTEX;
1527
1528   VkVertexInputAttributeDescription attribute_desc[3] = {};
1529   attribute_desc[0].location = 0;
1530   attribute_desc[0].binding = binding_desc[0].binding;
1531   attribute_desc[0].format = VK_FORMAT_R32G32_SFLOAT;
1532   attribute_desc[0].offset = IM_OFFSETOF(ImDrawVert, pos);
1533   attribute_desc[1].location = 1;
1534   attribute_desc[1].binding = binding_desc[0].binding;
1535   attribute_desc[1].format = VK_FORMAT_R32G32_SFLOAT;
1536   attribute_desc[1].offset = IM_OFFSETOF(ImDrawVert, uv);
1537   attribute_desc[2].location = 2;
1538   attribute_desc[2].binding = binding_desc[0].binding;
1539   attribute_desc[2].format = VK_FORMAT_R8G8B8A8_UNORM;
1540   attribute_desc[2].offset = IM_OFFSETOF(ImDrawVert, col);
1541
1542   VkPipelineVertexInputStateCreateInfo vertex_info = {};
1543   vertex_info.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO;
1544   vertex_info.vertexBindingDescriptionCount = 1;
1545   vertex_info.pVertexBindingDescriptions = binding_desc;
1546   vertex_info.vertexAttributeDescriptionCount = 3;
1547   vertex_info.pVertexAttributeDescriptions = attribute_desc;
1548
1549   VkPipelineInputAssemblyStateCreateInfo ia_info = {};
1550   ia_info.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO;
1551   ia_info.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
1552
1553   VkPipelineViewportStateCreateInfo viewport_info = {};
1554   viewport_info.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO;
1555   viewport_info.viewportCount = 1;
1556   viewport_info.scissorCount = 1;
1557
1558   VkPipelineRasterizationStateCreateInfo raster_info = {};
1559   raster_info.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO;
1560   raster_info.polygonMode = VK_POLYGON_MODE_FILL;
1561   raster_info.cullMode = VK_CULL_MODE_NONE;
1562   raster_info.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE;
1563   raster_info.lineWidth = 1.0f;
1564
1565   VkPipelineMultisampleStateCreateInfo ms_info = {};
1566   ms_info.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO;
1567   ms_info.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT;
1568
1569   VkPipelineColorBlendAttachmentState color_attachment[1] = {};
1570   color_attachment[0].blendEnable = VK_TRUE;
1571   color_attachment[0].srcColorBlendFactor = VK_BLEND_FACTOR_SRC_ALPHA;
1572   color_attachment[0].dstColorBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA;
1573   color_attachment[0].colorBlendOp = VK_BLEND_OP_ADD;
1574   color_attachment[0].srcAlphaBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA;
1575   color_attachment[0].dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO;
1576   color_attachment[0].alphaBlendOp = VK_BLEND_OP_ADD;
1577   color_attachment[0].colorWriteMask = VK_COLOR_COMPONENT_R_BIT |
1578      VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT;
1579
1580   VkPipelineDepthStencilStateCreateInfo depth_info = {};
1581   depth_info.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO;
1582
1583   VkPipelineColorBlendStateCreateInfo blend_info = {};
1584   blend_info.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO;
1585   blend_info.attachmentCount = 1;
1586   blend_info.pAttachments = color_attachment;
1587
1588   VkDynamicState dynamic_states[2] = { VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR };
1589   VkPipelineDynamicStateCreateInfo dynamic_state = {};
1590   dynamic_state.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO;
1591   dynamic_state.dynamicStateCount = (uint32_t)IM_ARRAYSIZE(dynamic_states);
1592   dynamic_state.pDynamicStates = dynamic_states;
1593
1594   VkGraphicsPipelineCreateInfo info = {};
1595   info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
1596   info.flags = 0;
1597   info.stageCount = 2;
1598   info.pStages = stage;
1599   info.pVertexInputState = &vertex_info;
1600   info.pInputAssemblyState = &ia_info;
1601   info.pViewportState = &viewport_info;
1602   info.pRasterizationState = &raster_info;
1603   info.pMultisampleState = &ms_info;
1604   info.pDepthStencilState = &depth_info;
1605   info.pColorBlendState = &blend_info;
1606   info.pDynamicState = &dynamic_state;
1607   info.layout = data->pipeline_layout;
1608   info.renderPass = data->render_pass;
1609   VK_CHECK(
1610      device_data->vtable.CreateGraphicsPipelines(device_data->device, VK_NULL_HANDLE,
1611                                                  1, &info,
1612                                                  NULL, &data->pipeline));
1613
1614   device_data->vtable.DestroyShaderModule(device_data->device, vert_module, NULL);
1615   device_data->vtable.DestroyShaderModule(device_data->device, frag_module, NULL);
1616
1617   ImGuiIO& io = ImGui::GetIO();
1618   unsigned char* pixels;
1619   int width, height;
1620   io.Fonts->GetTexDataAsRGBA32(&pixels, &width, &height);
1621
1622   /* Font image */
1623   VkImageCreateInfo image_info = {};
1624   image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
1625   image_info.imageType = VK_IMAGE_TYPE_2D;
1626   image_info.format = VK_FORMAT_R8G8B8A8_UNORM;
1627   image_info.extent.width = width;
1628   image_info.extent.height = height;
1629   image_info.extent.depth = 1;
1630   image_info.mipLevels = 1;
1631   image_info.arrayLayers = 1;
1632   image_info.samples = VK_SAMPLE_COUNT_1_BIT;
1633   image_info.tiling = VK_IMAGE_TILING_OPTIMAL;
1634   image_info.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT;
1635   image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
1636   image_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
1637   VK_CHECK(device_data->vtable.CreateImage(device_data->device, &image_info,
1638                                            NULL, &data->font_image));
1639   VkMemoryRequirements font_image_req;
1640   device_data->vtable.GetImageMemoryRequirements(device_data->device,
1641                                                  data->font_image, &font_image_req);
1642   VkMemoryAllocateInfo image_alloc_info = {};
1643   image_alloc_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
1644   image_alloc_info.allocationSize = font_image_req.size;
1645   image_alloc_info.memoryTypeIndex = vk_memory_type(device_data,
1646                                                     VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
1647                                                     font_image_req.memoryTypeBits);
1648   VK_CHECK(device_data->vtable.AllocateMemory(device_data->device, &image_alloc_info,
1649                                               NULL, &data->font_mem));
1650   VK_CHECK(device_data->vtable.BindImageMemory(device_data->device,
1651                                                data->font_image,
1652                                                data->font_mem, 0));
1653
1654   /* Font image view */
1655   VkImageViewCreateInfo view_info = {};
1656   view_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
1657   view_info.image = data->font_image;
1658   view_info.viewType = VK_IMAGE_VIEW_TYPE_2D;
1659   view_info.format = VK_FORMAT_R8G8B8A8_UNORM;
1660   view_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
1661   view_info.subresourceRange.levelCount = 1;
1662   view_info.subresourceRange.layerCount = 1;
1663   VK_CHECK(device_data->vtable.CreateImageView(device_data->device, &view_info,
1664                                                NULL, &data->font_image_view));
1665
1666   /* Descriptor set */
1667   VkDescriptorImageInfo desc_image[1] = {};
1668   desc_image[0].sampler = data->font_sampler;
1669   desc_image[0].imageView = data->font_image_view;
1670   desc_image[0].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
1671   VkWriteDescriptorSet write_desc[1] = {};
1672   write_desc[0].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
1673   write_desc[0].dstSet = data->descriptor_set;
1674   write_desc[0].descriptorCount = 1;
1675   write_desc[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
1676   write_desc[0].pImageInfo = desc_image;
1677   device_data->vtable.UpdateDescriptorSets(device_data->device, 1, write_desc, 0, NULL);
1678}
1679
1680static void setup_swapchain_data(struct swapchain_data *data,
1681                                 const VkSwapchainCreateInfoKHR *pCreateInfo)
1682{
1683   data->width = pCreateInfo->imageExtent.width;
1684   data->height = pCreateInfo->imageExtent.height;
1685   data->format = pCreateInfo->imageFormat;
1686
1687   data->imgui_context = ImGui::CreateContext();
1688   ImGui::SetCurrentContext(data->imgui_context);
1689
1690   ImGui::GetIO().IniFilename = NULL;
1691   ImGui::GetIO().DisplaySize = ImVec2((float)data->width, (float)data->height);
1692
1693   struct device_data *device_data = data->device;
1694
1695   /* Render pass */
1696   VkAttachmentDescription attachment_desc = {};
1697   attachment_desc.format = pCreateInfo->imageFormat;
1698   attachment_desc.samples = VK_SAMPLE_COUNT_1_BIT;
1699   attachment_desc.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
1700   attachment_desc.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
1701   attachment_desc.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
1702   attachment_desc.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
1703   attachment_desc.initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
1704   attachment_desc.finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
1705   VkAttachmentReference color_attachment = {};
1706   color_attachment.attachment = 0;
1707   color_attachment.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
1708   VkSubpassDescription subpass = {};
1709   subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
1710   subpass.colorAttachmentCount = 1;
1711   subpass.pColorAttachments = &color_attachment;
1712   VkSubpassDependency dependency = {};
1713   dependency.srcSubpass = VK_SUBPASS_EXTERNAL;
1714   dependency.dstSubpass = 0;
1715   dependency.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
1716   dependency.dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
1717   dependency.srcAccessMask = 0;
1718   dependency.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
1719   VkRenderPassCreateInfo render_pass_info = {};
1720   render_pass_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO;
1721   render_pass_info.attachmentCount = 1;
1722   render_pass_info.pAttachments = &attachment_desc;
1723   render_pass_info.subpassCount = 1;
1724   render_pass_info.pSubpasses = &subpass;
1725   render_pass_info.dependencyCount = 1;
1726   render_pass_info.pDependencies = &dependency;
1727   VK_CHECK(device_data->vtable.CreateRenderPass(device_data->device,
1728                                                 &render_pass_info,
1729                                                 NULL, &data->render_pass));
1730
1731   setup_swapchain_data_pipeline(data);
1732
1733   VK_CHECK(device_data->vtable.GetSwapchainImagesKHR(device_data->device,
1734                                                      data->swapchain,
1735                                                      &data->n_images,
1736                                                      NULL));
1737
1738   data->images = ralloc_array(data, VkImage, data->n_images);
1739   data->image_views = ralloc_array(data, VkImageView, data->n_images);
1740   data->framebuffers = ralloc_array(data, VkFramebuffer, data->n_images);
1741
1742   VK_CHECK(device_data->vtable.GetSwapchainImagesKHR(device_data->device,
1743                                                      data->swapchain,
1744                                                      &data->n_images,
1745                                                      data->images));
1746
1747   /* Image views */
1748   VkImageViewCreateInfo view_info = {};
1749   view_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
1750   view_info.viewType = VK_IMAGE_VIEW_TYPE_2D;
1751   view_info.format = pCreateInfo->imageFormat;
1752   view_info.components.r = VK_COMPONENT_SWIZZLE_R;
1753   view_info.components.g = VK_COMPONENT_SWIZZLE_G;
1754   view_info.components.b = VK_COMPONENT_SWIZZLE_B;
1755   view_info.components.a = VK_COMPONENT_SWIZZLE_A;
1756   view_info.subresourceRange = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 };
1757   for (uint32_t i = 0; i < data->n_images; i++) {
1758      view_info.image = data->images[i];
1759      VK_CHECK(device_data->vtable.CreateImageView(device_data->device,
1760                                                   &view_info, NULL,
1761                                                   &data->image_views[i]));
1762   }
1763
1764   /* Framebuffers */
1765   VkImageView attachment[1];
1766   VkFramebufferCreateInfo fb_info = {};
1767   fb_info.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO;
1768   fb_info.renderPass = data->render_pass;
1769   fb_info.attachmentCount = 1;
1770   fb_info.pAttachments = attachment;
1771   fb_info.width = data->width;
1772   fb_info.height = data->height;
1773   fb_info.layers = 1;
1774   for (uint32_t i = 0; i < data->n_images; i++) {
1775      attachment[0] = data->image_views[i];
1776      VK_CHECK(device_data->vtable.CreateFramebuffer(device_data->device, &fb_info,
1777                                                     NULL, &data->framebuffers[i]));
1778   }
1779
1780   /* Command buffer pool */
1781   VkCommandPoolCreateInfo cmd_buffer_pool_info = {};
1782   cmd_buffer_pool_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
1783   cmd_buffer_pool_info.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
1784   cmd_buffer_pool_info.queueFamilyIndex = device_data->graphic_queue->family_index;
1785   VK_CHECK(device_data->vtable.CreateCommandPool(device_data->device,
1786                                                  &cmd_buffer_pool_info,
1787                                                  NULL, &data->command_pool));
1788}
1789
1790static void shutdown_swapchain_data(struct swapchain_data *data)
1791{
1792   struct device_data *device_data = data->device;
1793
1794   list_for_each_entry_safe(struct overlay_draw, draw, &data->draws, link) {
1795      device_data->vtable.DestroySemaphore(device_data->device, draw->cross_engine_semaphore, NULL);
1796      device_data->vtable.DestroySemaphore(device_data->device, draw->semaphore, NULL);
1797      device_data->vtable.DestroyFence(device_data->device, draw->fence, NULL);
1798      device_data->vtable.DestroyBuffer(device_data->device, draw->vertex_buffer, NULL);
1799      device_data->vtable.DestroyBuffer(device_data->device, draw->index_buffer, NULL);
1800      device_data->vtable.FreeMemory(device_data->device, draw->vertex_buffer_mem, NULL);
1801      device_data->vtable.FreeMemory(device_data->device, draw->index_buffer_mem, NULL);
1802   }
1803
1804   for (uint32_t i = 0; i < data->n_images; i++) {
1805      device_data->vtable.DestroyImageView(device_data->device, data->image_views[i], NULL);
1806      device_data->vtable.DestroyFramebuffer(device_data->device, data->framebuffers[i], NULL);
1807   }
1808
1809   device_data->vtable.DestroyRenderPass(device_data->device, data->render_pass, NULL);
1810
1811   device_data->vtable.DestroyCommandPool(device_data->device, data->command_pool, NULL);
1812
1813   device_data->vtable.DestroyPipeline(device_data->device, data->pipeline, NULL);
1814   device_data->vtable.DestroyPipelineLayout(device_data->device, data->pipeline_layout, NULL);
1815
1816   device_data->vtable.DestroyDescriptorPool(device_data->device,
1817                                             data->descriptor_pool, NULL);
1818   device_data->vtable.DestroyDescriptorSetLayout(device_data->device,
1819                                                  data->descriptor_layout, NULL);
1820
1821   device_data->vtable.DestroySampler(device_data->device, data->font_sampler, NULL);
1822   device_data->vtable.DestroyImageView(device_data->device, data->font_image_view, NULL);
1823   device_data->vtable.DestroyImage(device_data->device, data->font_image, NULL);
1824   device_data->vtable.FreeMemory(device_data->device, data->font_mem, NULL);
1825
1826   device_data->vtable.DestroyBuffer(device_data->device, data->upload_font_buffer, NULL);
1827   device_data->vtable.FreeMemory(device_data->device, data->upload_font_buffer_mem, NULL);
1828
1829   ImGui::DestroyContext(data->imgui_context);
1830}
1831
1832static struct overlay_draw *before_present(struct swapchain_data *swapchain_data,
1833                                           struct queue_data *present_queue,
1834                                           const VkSemaphore *wait_semaphores,
1835                                           unsigned n_wait_semaphores,
1836                                           unsigned imageIndex)
1837{
1838   struct instance_data *instance_data = swapchain_data->device->instance;
1839   struct overlay_draw *draw = NULL;
1840
1841   snapshot_swapchain_frame(swapchain_data);
1842
1843   if (!instance_data->params.no_display && swapchain_data->n_frames > 0) {
1844      compute_swapchain_display(swapchain_data);
1845      draw = render_swapchain_display(swapchain_data, present_queue,
1846                                      wait_semaphores, n_wait_semaphores,
1847                                      imageIndex);
1848   }
1849
1850   return draw;
1851}
1852
1853static VkResult overlay_CreateSwapchainKHR(
1854    VkDevice                                    device,
1855    const VkSwapchainCreateInfoKHR*             pCreateInfo,
1856    const VkAllocationCallbacks*                pAllocator,
1857    VkSwapchainKHR*                             pSwapchain)
1858{
1859   struct device_data *device_data = FIND(struct device_data, device);
1860   VkResult result = device_data->vtable.CreateSwapchainKHR(device, pCreateInfo, pAllocator, pSwapchain);
1861   if (result != VK_SUCCESS) return result;
1862
1863   struct swapchain_data *swapchain_data = new_swapchain_data(*pSwapchain, device_data);
1864   setup_swapchain_data(swapchain_data, pCreateInfo);
1865   return result;
1866}
1867
1868static void overlay_DestroySwapchainKHR(
1869    VkDevice                                    device,
1870    VkSwapchainKHR                              swapchain,
1871    const VkAllocationCallbacks*                pAllocator)
1872{
1873   if (swapchain == VK_NULL_HANDLE) {
1874      struct device_data *device_data = FIND(struct device_data, device);
1875      device_data->vtable.DestroySwapchainKHR(device, swapchain, pAllocator);
1876      return;
1877   }
1878
1879   struct swapchain_data *swapchain_data =
1880      FIND(struct swapchain_data, swapchain);
1881
1882   shutdown_swapchain_data(swapchain_data);
1883   swapchain_data->device->vtable.DestroySwapchainKHR(device, swapchain, pAllocator);
1884   destroy_swapchain_data(swapchain_data);
1885}
1886
1887static VkResult overlay_QueuePresentKHR(
1888    VkQueue                                     queue,
1889    const VkPresentInfoKHR*                     pPresentInfo)
1890{
1891   struct queue_data *queue_data = FIND(struct queue_data, queue);
1892   struct device_data *device_data = queue_data->device;
1893   struct instance_data *instance_data = device_data->instance;
1894   uint32_t query_results[OVERLAY_QUERY_COUNT];
1895
1896   device_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_frame]++;
1897
1898   if (list_length(&queue_data->running_command_buffer) > 0) {
1899      /* Before getting the query results, make sure the operations have
1900       * completed.
1901       */
1902      VK_CHECK(device_data->vtable.ResetFences(device_data->device,
1903                                               1, &queue_data->queries_fence));
1904      VK_CHECK(device_data->vtable.QueueSubmit(queue, 0, NULL, queue_data->queries_fence));
1905      VK_CHECK(device_data->vtable.WaitForFences(device_data->device,
1906                                                 1, &queue_data->queries_fence,
1907                                                 VK_FALSE, UINT64_MAX));
1908
1909      /* Now get the results. */
1910      list_for_each_entry_safe(struct command_buffer_data, cmd_buffer_data,
1911                               &queue_data->running_command_buffer, link) {
1912         list_delinit(&cmd_buffer_data->link);
1913
1914         if (cmd_buffer_data->pipeline_query_pool) {
1915            memset(query_results, 0, sizeof(query_results));
1916            VK_CHECK(device_data->vtable.GetQueryPoolResults(device_data->device,
1917                                                             cmd_buffer_data->pipeline_query_pool,
1918                                                             cmd_buffer_data->query_index, 1,
1919                                                             sizeof(uint32_t) * OVERLAY_QUERY_COUNT,
1920                                                             query_results, 0, VK_QUERY_RESULT_WAIT_BIT));
1921
1922            for (uint32_t i = OVERLAY_PARAM_ENABLED_vertices;
1923                 i <= OVERLAY_PARAM_ENABLED_compute_invocations; i++) {
1924               device_data->frame_stats.stats[i] += query_results[i - OVERLAY_PARAM_ENABLED_vertices];
1925            }
1926         }
1927         if (cmd_buffer_data->timestamp_query_pool) {
1928            uint64_t gpu_timestamps[2] = { 0 };
1929            VK_CHECK(device_data->vtable.GetQueryPoolResults(device_data->device,
1930                                                             cmd_buffer_data->timestamp_query_pool,
1931                                                             cmd_buffer_data->query_index * 2, 2,
1932                                                             2 * sizeof(uint64_t), gpu_timestamps, sizeof(uint64_t),
1933                                                             VK_QUERY_RESULT_WAIT_BIT | VK_QUERY_RESULT_64_BIT));
1934
1935            gpu_timestamps[0] &= queue_data->timestamp_mask;
1936            gpu_timestamps[1] &= queue_data->timestamp_mask;
1937            device_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_gpu_timing] +=
1938               (gpu_timestamps[1] - gpu_timestamps[0]) *
1939               device_data->properties.limits.timestampPeriod;
1940         }
1941      }
1942   }
1943
1944   /* Otherwise we need to add our overlay drawing semaphore to the list of
1945    * semaphores to wait on. If we don't do that the presented picture might
1946    * be have incomplete overlay drawings.
1947    */
1948   VkResult result = VK_SUCCESS;
1949   if (instance_data->params.no_display) {
1950      for (uint32_t i = 0; i < pPresentInfo->swapchainCount; i++) {
1951         VkSwapchainKHR swapchain = pPresentInfo->pSwapchains[i];
1952         struct swapchain_data *swapchain_data =
1953            FIND(struct swapchain_data, swapchain);
1954
1955         uint32_t image_index = pPresentInfo->pImageIndices[i];
1956
1957         before_present(swapchain_data,
1958                        queue_data,
1959                        pPresentInfo->pWaitSemaphores,
1960                        pPresentInfo->waitSemaphoreCount,
1961                        image_index);
1962
1963         VkPresentInfoKHR present_info = *pPresentInfo;
1964         present_info.swapchainCount = 1;
1965         present_info.pSwapchains = &swapchain;
1966         present_info.pImageIndices = &image_index;
1967
1968         uint64_t ts0 = os_time_get();
1969         result = queue_data->device->vtable.QueuePresentKHR(queue, &present_info);
1970         uint64_t ts1 = os_time_get();
1971         swapchain_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_present_timing] += ts1 - ts0;
1972      }
1973   } else {
1974      for (uint32_t i = 0; i < pPresentInfo->swapchainCount; i++) {
1975         VkSwapchainKHR swapchain = pPresentInfo->pSwapchains[i];
1976         struct swapchain_data *swapchain_data =
1977            FIND(struct swapchain_data, swapchain);
1978
1979         uint32_t image_index = pPresentInfo->pImageIndices[i];
1980
1981         VkPresentInfoKHR present_info = *pPresentInfo;
1982         present_info.swapchainCount = 1;
1983         present_info.pSwapchains = &swapchain;
1984         present_info.pImageIndices = &image_index;
1985
1986         struct overlay_draw *draw = before_present(swapchain_data,
1987                                                    queue_data,
1988                                                    pPresentInfo->pWaitSemaphores,
1989                                                    pPresentInfo->waitSemaphoreCount,
1990                                                    image_index);
1991
1992         /* Because the submission of the overlay draw waits on the semaphores
1993          * handed for present, we don't need to have this present operation
1994          * wait on them as well, we can just wait on the overlay submission
1995          * semaphore.
1996          */
1997         present_info.pWaitSemaphores = &draw->semaphore;
1998         present_info.waitSemaphoreCount = 1;
1999
2000         uint64_t ts0 = os_time_get();
2001         VkResult chain_result = queue_data->device->vtable.QueuePresentKHR(queue, &present_info);
2002         uint64_t ts1 = os_time_get();
2003         swapchain_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_present_timing] += ts1 - ts0;
2004         if (pPresentInfo->pResults)
2005            pPresentInfo->pResults[i] = chain_result;
2006         if (chain_result != VK_SUCCESS && result == VK_SUCCESS)
2007            result = chain_result;
2008      }
2009   }
2010   return result;
2011}
2012
2013static VkResult overlay_AcquireNextImageKHR(
2014    VkDevice                                    device,
2015    VkSwapchainKHR                              swapchain,
2016    uint64_t                                    timeout,
2017    VkSemaphore                                 semaphore,
2018    VkFence                                     fence,
2019    uint32_t*                                   pImageIndex)
2020{
2021   struct swapchain_data *swapchain_data =
2022      FIND(struct swapchain_data, swapchain);
2023   struct device_data *device_data = swapchain_data->device;
2024
2025   uint64_t ts0 = os_time_get();
2026   VkResult result = device_data->vtable.AcquireNextImageKHR(device, swapchain, timeout,
2027                                                             semaphore, fence, pImageIndex);
2028   uint64_t ts1 = os_time_get();
2029
2030   swapchain_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_acquire_timing] += ts1 - ts0;
2031   swapchain_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_acquire]++;
2032
2033   return result;
2034}
2035
2036static VkResult overlay_AcquireNextImage2KHR(
2037    VkDevice                                    device,
2038    const VkAcquireNextImageInfoKHR*            pAcquireInfo,
2039    uint32_t*                                   pImageIndex)
2040{
2041   struct swapchain_data *swapchain_data =
2042      FIND(struct swapchain_data, pAcquireInfo->swapchain);
2043   struct device_data *device_data = swapchain_data->device;
2044
2045   uint64_t ts0 = os_time_get();
2046   VkResult result = device_data->vtable.AcquireNextImage2KHR(device, pAcquireInfo, pImageIndex);
2047   uint64_t ts1 = os_time_get();
2048
2049   swapchain_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_acquire_timing] += ts1 - ts0;
2050   swapchain_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_acquire]++;
2051
2052   return result;
2053}
2054
2055static void overlay_CmdDraw(
2056    VkCommandBuffer                             commandBuffer,
2057    uint32_t                                    vertexCount,
2058    uint32_t                                    instanceCount,
2059    uint32_t                                    firstVertex,
2060    uint32_t                                    firstInstance)
2061{
2062   struct command_buffer_data *cmd_buffer_data =
2063      FIND(struct command_buffer_data, commandBuffer);
2064   cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_draw]++;
2065   struct device_data *device_data = cmd_buffer_data->device;
2066   device_data->vtable.CmdDraw(commandBuffer, vertexCount, instanceCount,
2067                               firstVertex, firstInstance);
2068}
2069
2070static void overlay_CmdDrawIndexed(
2071    VkCommandBuffer                             commandBuffer,
2072    uint32_t                                    indexCount,
2073    uint32_t                                    instanceCount,
2074    uint32_t                                    firstIndex,
2075    int32_t                                     vertexOffset,
2076    uint32_t                                    firstInstance)
2077{
2078   struct command_buffer_data *cmd_buffer_data =
2079      FIND(struct command_buffer_data, commandBuffer);
2080   cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_draw_indexed]++;
2081   struct device_data *device_data = cmd_buffer_data->device;
2082   device_data->vtable.CmdDrawIndexed(commandBuffer, indexCount, instanceCount,
2083                                      firstIndex, vertexOffset, firstInstance);
2084}
2085
2086static void overlay_CmdDrawIndirect(
2087    VkCommandBuffer                             commandBuffer,
2088    VkBuffer                                    buffer,
2089    VkDeviceSize                                offset,
2090    uint32_t                                    drawCount,
2091    uint32_t                                    stride)
2092{
2093   struct command_buffer_data *cmd_buffer_data =
2094      FIND(struct command_buffer_data, commandBuffer);
2095   cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_draw_indirect]++;
2096   struct device_data *device_data = cmd_buffer_data->device;
2097   device_data->vtable.CmdDrawIndirect(commandBuffer, buffer, offset, drawCount, stride);
2098}
2099
2100static void overlay_CmdDrawIndexedIndirect(
2101    VkCommandBuffer                             commandBuffer,
2102    VkBuffer                                    buffer,
2103    VkDeviceSize                                offset,
2104    uint32_t                                    drawCount,
2105    uint32_t                                    stride)
2106{
2107   struct command_buffer_data *cmd_buffer_data =
2108      FIND(struct command_buffer_data, commandBuffer);
2109   cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_draw_indexed_indirect]++;
2110   struct device_data *device_data = cmd_buffer_data->device;
2111   device_data->vtable.CmdDrawIndexedIndirect(commandBuffer, buffer, offset, drawCount, stride);
2112}
2113
2114static void overlay_CmdDrawIndirectCount(
2115    VkCommandBuffer                             commandBuffer,
2116    VkBuffer                                    buffer,
2117    VkDeviceSize                                offset,
2118    VkBuffer                                    countBuffer,
2119    VkDeviceSize                                countBufferOffset,
2120    uint32_t                                    maxDrawCount,
2121    uint32_t                                    stride)
2122{
2123   struct command_buffer_data *cmd_buffer_data =
2124      FIND(struct command_buffer_data, commandBuffer);
2125   cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_draw_indirect_count]++;
2126   struct device_data *device_data = cmd_buffer_data->device;
2127   device_data->vtable.CmdDrawIndirectCount(commandBuffer, buffer, offset,
2128                                            countBuffer, countBufferOffset,
2129                                            maxDrawCount, stride);
2130}
2131
2132static void overlay_CmdDrawIndexedIndirectCount(
2133    VkCommandBuffer                             commandBuffer,
2134    VkBuffer                                    buffer,
2135    VkDeviceSize                                offset,
2136    VkBuffer                                    countBuffer,
2137    VkDeviceSize                                countBufferOffset,
2138    uint32_t                                    maxDrawCount,
2139    uint32_t                                    stride)
2140{
2141   struct command_buffer_data *cmd_buffer_data =
2142      FIND(struct command_buffer_data, commandBuffer);
2143   cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_draw_indexed_indirect_count]++;
2144   struct device_data *device_data = cmd_buffer_data->device;
2145   device_data->vtable.CmdDrawIndexedIndirectCount(commandBuffer, buffer, offset,
2146                                                   countBuffer, countBufferOffset,
2147                                                   maxDrawCount, stride);
2148}
2149
2150static void overlay_CmdDispatch(
2151    VkCommandBuffer                             commandBuffer,
2152    uint32_t                                    groupCountX,
2153    uint32_t                                    groupCountY,
2154    uint32_t                                    groupCountZ)
2155{
2156   struct command_buffer_data *cmd_buffer_data =
2157      FIND(struct command_buffer_data, commandBuffer);
2158   cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_dispatch]++;
2159   struct device_data *device_data = cmd_buffer_data->device;
2160   device_data->vtable.CmdDispatch(commandBuffer, groupCountX, groupCountY, groupCountZ);
2161}
2162
2163static void overlay_CmdDispatchIndirect(
2164    VkCommandBuffer                             commandBuffer,
2165    VkBuffer                                    buffer,
2166    VkDeviceSize                                offset)
2167{
2168   struct command_buffer_data *cmd_buffer_data =
2169      FIND(struct command_buffer_data, commandBuffer);
2170   cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_dispatch_indirect]++;
2171   struct device_data *device_data = cmd_buffer_data->device;
2172   device_data->vtable.CmdDispatchIndirect(commandBuffer, buffer, offset);
2173}
2174
2175static void overlay_CmdBindPipeline(
2176    VkCommandBuffer                             commandBuffer,
2177    VkPipelineBindPoint                         pipelineBindPoint,
2178    VkPipeline                                  pipeline)
2179{
2180   struct command_buffer_data *cmd_buffer_data =
2181      FIND(struct command_buffer_data, commandBuffer);
2182   switch (pipelineBindPoint) {
2183   case VK_PIPELINE_BIND_POINT_GRAPHICS: cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_pipeline_graphics]++; break;
2184   case VK_PIPELINE_BIND_POINT_COMPUTE: cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_pipeline_compute]++; break;
2185   case VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR: cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_pipeline_raytracing]++; break;
2186   default: break;
2187   }
2188   struct device_data *device_data = cmd_buffer_data->device;
2189   device_data->vtable.CmdBindPipeline(commandBuffer, pipelineBindPoint, pipeline);
2190}
2191
2192static VkResult overlay_BeginCommandBuffer(
2193    VkCommandBuffer                             commandBuffer,
2194    const VkCommandBufferBeginInfo*             pBeginInfo)
2195{
2196   struct command_buffer_data *cmd_buffer_data =
2197      FIND(struct command_buffer_data, commandBuffer);
2198   struct device_data *device_data = cmd_buffer_data->device;
2199
2200   memset(&cmd_buffer_data->stats, 0, sizeof(cmd_buffer_data->stats));
2201
2202   /* We don't record any query in secondary command buffers, just make sure
2203    * we have the right inheritance.
2204    */
2205   if (cmd_buffer_data->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) {
2206      VkCommandBufferBeginInfo *begin_info = (VkCommandBufferBeginInfo *)
2207         clone_chain((const struct VkBaseInStructure *)pBeginInfo);
2208      VkCommandBufferInheritanceInfo *parent_inhe_info = (VkCommandBufferInheritanceInfo *)
2209         vk_find_struct(begin_info, COMMAND_BUFFER_INHERITANCE_INFO);
2210      VkCommandBufferInheritanceInfo inhe_info = {
2211         VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO,
2212         NULL,
2213         VK_NULL_HANDLE,
2214         0,
2215         VK_NULL_HANDLE,
2216         VK_FALSE,
2217         0,
2218         overlay_query_flags,
2219      };
2220
2221      if (parent_inhe_info)
2222         parent_inhe_info->pipelineStatistics = overlay_query_flags;
2223      else {
2224         inhe_info.pNext = begin_info->pNext;
2225         begin_info->pNext = &inhe_info;
2226      }
2227
2228      VkResult result = device_data->vtable.BeginCommandBuffer(commandBuffer, pBeginInfo);
2229
2230      if (!parent_inhe_info)
2231         begin_info->pNext = inhe_info.pNext;
2232
2233      free_chain((struct VkBaseOutStructure *)begin_info);
2234
2235      return result;
2236   }
2237
2238   /* Otherwise record a begin query as first command. */
2239   VkResult result = device_data->vtable.BeginCommandBuffer(commandBuffer, pBeginInfo);
2240
2241   if (result == VK_SUCCESS) {
2242      if (cmd_buffer_data->pipeline_query_pool) {
2243         device_data->vtable.CmdResetQueryPool(commandBuffer,
2244                                               cmd_buffer_data->pipeline_query_pool,
2245                                               cmd_buffer_data->query_index, 1);
2246      }
2247      if (cmd_buffer_data->timestamp_query_pool) {
2248         device_data->vtable.CmdResetQueryPool(commandBuffer,
2249                                               cmd_buffer_data->timestamp_query_pool,
2250                                               cmd_buffer_data->query_index * 2, 2);
2251      }
2252      if (cmd_buffer_data->pipeline_query_pool) {
2253         device_data->vtable.CmdBeginQuery(commandBuffer,
2254                                           cmd_buffer_data->pipeline_query_pool,
2255                                           cmd_buffer_data->query_index, 0);
2256      }
2257      if (cmd_buffer_data->timestamp_query_pool) {
2258         device_data->vtable.CmdWriteTimestamp(commandBuffer,
2259                                               VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
2260                                               cmd_buffer_data->timestamp_query_pool,
2261                                               cmd_buffer_data->query_index * 2);
2262      }
2263   }
2264
2265   return result;
2266}
2267
2268static VkResult overlay_EndCommandBuffer(
2269    VkCommandBuffer                             commandBuffer)
2270{
2271   struct command_buffer_data *cmd_buffer_data =
2272      FIND(struct command_buffer_data, commandBuffer);
2273   struct device_data *device_data = cmd_buffer_data->device;
2274
2275   if (cmd_buffer_data->timestamp_query_pool) {
2276      device_data->vtable.CmdWriteTimestamp(commandBuffer,
2277                                            VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
2278                                            cmd_buffer_data->timestamp_query_pool,
2279                                            cmd_buffer_data->query_index * 2 + 1);
2280   }
2281   if (cmd_buffer_data->pipeline_query_pool) {
2282      device_data->vtable.CmdEndQuery(commandBuffer,
2283                                      cmd_buffer_data->pipeline_query_pool,
2284                                      cmd_buffer_data->query_index);
2285   }
2286
2287   return device_data->vtable.EndCommandBuffer(commandBuffer);
2288}
2289
2290static VkResult overlay_ResetCommandBuffer(
2291    VkCommandBuffer                             commandBuffer,
2292    VkCommandBufferResetFlags                   flags)
2293{
2294   struct command_buffer_data *cmd_buffer_data =
2295      FIND(struct command_buffer_data, commandBuffer);
2296   struct device_data *device_data = cmd_buffer_data->device;
2297
2298   memset(&cmd_buffer_data->stats, 0, sizeof(cmd_buffer_data->stats));
2299
2300   return device_data->vtable.ResetCommandBuffer(commandBuffer, flags);
2301}
2302
2303static void overlay_CmdExecuteCommands(
2304    VkCommandBuffer                             commandBuffer,
2305    uint32_t                                    commandBufferCount,
2306    const VkCommandBuffer*                      pCommandBuffers)
2307{
2308   struct command_buffer_data *cmd_buffer_data =
2309      FIND(struct command_buffer_data, commandBuffer);
2310   struct device_data *device_data = cmd_buffer_data->device;
2311
2312   /* Add the stats of the executed command buffers to the primary one. */
2313   for (uint32_t c = 0; c < commandBufferCount; c++) {
2314      struct command_buffer_data *sec_cmd_buffer_data =
2315         FIND(struct command_buffer_data, pCommandBuffers[c]);
2316
2317      for (uint32_t s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++)
2318         cmd_buffer_data->stats.stats[s] += sec_cmd_buffer_data->stats.stats[s];
2319   }
2320
2321   device_data->vtable.CmdExecuteCommands(commandBuffer, commandBufferCount, pCommandBuffers);
2322}
2323
2324static VkResult overlay_AllocateCommandBuffers(
2325   VkDevice                           device,
2326   const VkCommandBufferAllocateInfo* pAllocateInfo,
2327   VkCommandBuffer*                   pCommandBuffers)
2328{
2329   struct device_data *device_data = FIND(struct device_data, device);
2330   VkResult result =
2331      device_data->vtable.AllocateCommandBuffers(device, pAllocateInfo, pCommandBuffers);
2332   if (result != VK_SUCCESS)
2333      return result;
2334
2335   VkQueryPool pipeline_query_pool = VK_NULL_HANDLE;
2336   VkQueryPool timestamp_query_pool = VK_NULL_HANDLE;
2337   if (device_data->instance->pipeline_statistics_enabled &&
2338       pAllocateInfo->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
2339      VkQueryPoolCreateInfo pool_info = {
2340         VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
2341         NULL,
2342         0,
2343         VK_QUERY_TYPE_PIPELINE_STATISTICS,
2344         pAllocateInfo->commandBufferCount,
2345         overlay_query_flags,
2346      };
2347      VK_CHECK(device_data->vtable.CreateQueryPool(device_data->device, &pool_info,
2348                                                   NULL, &pipeline_query_pool));
2349   }
2350   if (device_data->instance->params.enabled[OVERLAY_PARAM_ENABLED_gpu_timing]) {
2351      VkQueryPoolCreateInfo pool_info = {
2352         VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
2353         NULL,
2354         0,
2355         VK_QUERY_TYPE_TIMESTAMP,
2356         pAllocateInfo->commandBufferCount * 2,
2357         0,
2358      };
2359      VK_CHECK(device_data->vtable.CreateQueryPool(device_data->device, &pool_info,
2360                                                   NULL, &timestamp_query_pool));
2361   }
2362
2363   for (uint32_t i = 0; i < pAllocateInfo->commandBufferCount; i++) {
2364      new_command_buffer_data(pCommandBuffers[i], pAllocateInfo->level,
2365                              pipeline_query_pool, timestamp_query_pool,
2366                              i, device_data);
2367   }
2368
2369   if (pipeline_query_pool)
2370      map_object(HKEY(pipeline_query_pool), (void *)(uintptr_t) pAllocateInfo->commandBufferCount);
2371   if (timestamp_query_pool)
2372      map_object(HKEY(timestamp_query_pool), (void *)(uintptr_t) pAllocateInfo->commandBufferCount);
2373
2374   return result;
2375}
2376
2377static void overlay_FreeCommandBuffers(
2378   VkDevice               device,
2379   VkCommandPool          commandPool,
2380   uint32_t               commandBufferCount,
2381   const VkCommandBuffer* pCommandBuffers)
2382{
2383   struct device_data *device_data = FIND(struct device_data, device);
2384   for (uint32_t i = 0; i < commandBufferCount; i++) {
2385      struct command_buffer_data *cmd_buffer_data =
2386         FIND(struct command_buffer_data, pCommandBuffers[i]);
2387
2388      /* It is legal to free a NULL command buffer*/
2389      if (!cmd_buffer_data)
2390         continue;
2391
2392      uint64_t count = (uintptr_t)find_object_data(HKEY(cmd_buffer_data->pipeline_query_pool));
2393      if (count == 1) {
2394         unmap_object(HKEY(cmd_buffer_data->pipeline_query_pool));
2395         device_data->vtable.DestroyQueryPool(device_data->device,
2396                                              cmd_buffer_data->pipeline_query_pool, NULL);
2397      } else if (count != 0) {
2398         map_object(HKEY(cmd_buffer_data->pipeline_query_pool), (void *)(uintptr_t)(count - 1));
2399      }
2400      count = (uintptr_t)find_object_data(HKEY(cmd_buffer_data->timestamp_query_pool));
2401      if (count == 1) {
2402         unmap_object(HKEY(cmd_buffer_data->timestamp_query_pool));
2403         device_data->vtable.DestroyQueryPool(device_data->device,
2404                                              cmd_buffer_data->timestamp_query_pool, NULL);
2405      } else if (count != 0) {
2406         map_object(HKEY(cmd_buffer_data->timestamp_query_pool), (void *)(uintptr_t)(count - 1));
2407      }
2408      destroy_command_buffer_data(cmd_buffer_data);
2409   }
2410
2411   device_data->vtable.FreeCommandBuffers(device, commandPool,
2412                                          commandBufferCount, pCommandBuffers);
2413}
2414
2415static VkResult overlay_QueueSubmit(
2416    VkQueue                                     queue,
2417    uint32_t                                    submitCount,
2418    const VkSubmitInfo*                         pSubmits,
2419    VkFence                                     fence)
2420{
2421   struct queue_data *queue_data = FIND(struct queue_data, queue);
2422   struct device_data *device_data = queue_data->device;
2423
2424   device_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_submit]++;
2425
2426   for (uint32_t s = 0; s < submitCount; s++) {
2427      for (uint32_t c = 0; c < pSubmits[s].commandBufferCount; c++) {
2428         struct command_buffer_data *cmd_buffer_data =
2429            FIND(struct command_buffer_data, pSubmits[s].pCommandBuffers[c]);
2430
2431         /* Merge the submitted command buffer stats into the device. */
2432         for (uint32_t st = 0; st < OVERLAY_PARAM_ENABLED_MAX; st++)
2433            device_data->frame_stats.stats[st] += cmd_buffer_data->stats.stats[st];
2434
2435         /* Attach the command buffer to the queue so we remember to read its
2436          * pipeline statistics & timestamps at QueuePresent().
2437          */
2438         if (!cmd_buffer_data->pipeline_query_pool &&
2439             !cmd_buffer_data->timestamp_query_pool)
2440            continue;
2441
2442         if (list_is_empty(&cmd_buffer_data->link)) {
2443            list_addtail(&cmd_buffer_data->link,
2444                         &queue_data->running_command_buffer);
2445         } else {
2446            fprintf(stderr, "Command buffer submitted multiple times before present.\n"
2447                    "This could lead to invalid data.\n");
2448         }
2449      }
2450   }
2451
2452   return device_data->vtable.QueueSubmit(queue, submitCount, pSubmits, fence);
2453}
2454
2455static VkResult overlay_QueueSubmit2KHR(
2456    VkQueue                                     queue,
2457    uint32_t                                    submitCount,
2458    const VkSubmitInfo2*                        pSubmits,
2459    VkFence                                     fence)
2460{
2461   struct queue_data *queue_data = FIND(struct queue_data, queue);
2462   struct device_data *device_data = queue_data->device;
2463
2464   device_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_submit]++;
2465
2466   for (uint32_t s = 0; s < submitCount; s++) {
2467      for (uint32_t c = 0; c < pSubmits[s].commandBufferInfoCount; c++) {
2468         struct command_buffer_data *cmd_buffer_data =
2469            FIND(struct command_buffer_data, pSubmits[s].pCommandBufferInfos[c].commandBuffer);
2470
2471         /* Merge the submitted command buffer stats into the device. */
2472         for (uint32_t st = 0; st < OVERLAY_PARAM_ENABLED_MAX; st++)
2473            device_data->frame_stats.stats[st] += cmd_buffer_data->stats.stats[st];
2474
2475         /* Attach the command buffer to the queue so we remember to read its
2476         * pipeline statistics & timestamps at QueuePresent().
2477         */
2478         if (!cmd_buffer_data->pipeline_query_pool &&
2479            !cmd_buffer_data->timestamp_query_pool)
2480            continue;
2481
2482         if (list_is_empty(&cmd_buffer_data->link)) {
2483            list_addtail(&cmd_buffer_data->link,
2484                        &queue_data->running_command_buffer);
2485         } else {
2486            fprintf(stderr, "Command buffer submitted multiple times before present.\n"
2487                  "This could lead to invalid data.\n");
2488         }
2489      }
2490   }
2491
2492   return device_data->vtable.QueueSubmit2KHR(queue, submitCount, pSubmits, fence);
2493}
2494
2495static VkResult overlay_CreateDevice(
2496    VkPhysicalDevice                            physicalDevice,
2497    const VkDeviceCreateInfo*                   pCreateInfo,
2498    const VkAllocationCallbacks*                pAllocator,
2499    VkDevice*                                   pDevice)
2500{
2501   struct instance_data *instance_data =
2502      FIND(struct instance_data, physicalDevice);
2503   VkLayerDeviceCreateInfo *chain_info =
2504      get_device_chain_info(pCreateInfo, VK_LAYER_LINK_INFO);
2505
2506   assert(chain_info->u.pLayerInfo);
2507   PFN_vkGetInstanceProcAddr fpGetInstanceProcAddr = chain_info->u.pLayerInfo->pfnNextGetInstanceProcAddr;
2508   PFN_vkGetDeviceProcAddr fpGetDeviceProcAddr = chain_info->u.pLayerInfo->pfnNextGetDeviceProcAddr;
2509   PFN_vkCreateDevice fpCreateDevice = (PFN_vkCreateDevice)fpGetInstanceProcAddr(NULL, "vkCreateDevice");
2510   if (fpCreateDevice == NULL) {
2511      return VK_ERROR_INITIALIZATION_FAILED;
2512   }
2513
2514   // Advance the link info for the next element on the chain
2515   chain_info->u.pLayerInfo = chain_info->u.pLayerInfo->pNext;
2516
2517   VkPhysicalDeviceFeatures device_features = {};
2518   VkPhysicalDeviceFeatures *device_features_ptr = NULL;
2519
2520   VkDeviceCreateInfo *device_info = (VkDeviceCreateInfo *)
2521      clone_chain((const struct VkBaseInStructure *)pCreateInfo);
2522
2523   VkPhysicalDeviceFeatures2 *device_features2 = (VkPhysicalDeviceFeatures2 *)
2524      vk_find_struct(device_info, PHYSICAL_DEVICE_FEATURES_2);
2525   if (device_features2) {
2526      /* Can't use device_info->pEnabledFeatures when VkPhysicalDeviceFeatures2 is present */
2527      device_features_ptr = &device_features2->features;
2528   } else {
2529      if (device_info->pEnabledFeatures)
2530         device_features = *(device_info->pEnabledFeatures);
2531      device_features_ptr = &device_features;
2532      device_info->pEnabledFeatures = &device_features;
2533   }
2534
2535   if (instance_data->pipeline_statistics_enabled) {
2536      device_features_ptr->inheritedQueries = true;
2537      device_features_ptr->pipelineStatisticsQuery = true;
2538   }
2539
2540
2541   VkResult result = fpCreateDevice(physicalDevice, device_info, pAllocator, pDevice);
2542   free_chain((struct VkBaseOutStructure *)device_info);
2543   if (result != VK_SUCCESS) return result;
2544
2545   struct device_data *device_data = new_device_data(*pDevice, instance_data);
2546   device_data->physical_device = physicalDevice;
2547   vk_device_dispatch_table_load(&device_data->vtable,
2548                                 fpGetDeviceProcAddr, *pDevice);
2549
2550   instance_data->pd_vtable.GetPhysicalDeviceProperties(device_data->physical_device,
2551                                                        &device_data->properties);
2552
2553   VkLayerDeviceCreateInfo *load_data_info =
2554      get_device_chain_info(pCreateInfo, VK_LOADER_DATA_CALLBACK);
2555   device_data->set_device_loader_data = load_data_info->u.pfnSetDeviceLoaderData;
2556
2557   device_map_queues(device_data, pCreateInfo);
2558
2559   return result;
2560}
2561
2562static void overlay_DestroyDevice(
2563    VkDevice                                    device,
2564    const VkAllocationCallbacks*                pAllocator)
2565{
2566   struct device_data *device_data = FIND(struct device_data, device);
2567   device_unmap_queues(device_data);
2568   device_data->vtable.DestroyDevice(device, pAllocator);
2569   destroy_device_data(device_data);
2570}
2571
2572static VkResult overlay_CreateInstance(
2573    const VkInstanceCreateInfo*                 pCreateInfo,
2574    const VkAllocationCallbacks*                pAllocator,
2575    VkInstance*                                 pInstance)
2576{
2577   VkLayerInstanceCreateInfo *chain_info =
2578      get_instance_chain_info(pCreateInfo, VK_LAYER_LINK_INFO);
2579
2580   assert(chain_info->u.pLayerInfo);
2581   PFN_vkGetInstanceProcAddr fpGetInstanceProcAddr =
2582      chain_info->u.pLayerInfo->pfnNextGetInstanceProcAddr;
2583   PFN_vkCreateInstance fpCreateInstance =
2584      (PFN_vkCreateInstance)fpGetInstanceProcAddr(NULL, "vkCreateInstance");
2585   if (fpCreateInstance == NULL) {
2586      return VK_ERROR_INITIALIZATION_FAILED;
2587   }
2588
2589   // Advance the link info for the next element on the chain
2590   chain_info->u.pLayerInfo = chain_info->u.pLayerInfo->pNext;
2591
2592   VkResult result = fpCreateInstance(pCreateInfo, pAllocator, pInstance);
2593   if (result != VK_SUCCESS) return result;
2594
2595   struct instance_data *instance_data = new_instance_data(*pInstance);
2596   vk_instance_dispatch_table_load(&instance_data->vtable,
2597                                   fpGetInstanceProcAddr,
2598                                   instance_data->instance);
2599   vk_physical_device_dispatch_table_load(&instance_data->pd_vtable,
2600                                          fpGetInstanceProcAddr,
2601                                          instance_data->instance);
2602   instance_data_map_physical_devices(instance_data, true);
2603
2604   parse_overlay_env(&instance_data->params, getenv("VK_LAYER_MESA_OVERLAY_CONFIG"));
2605
2606   /* If there's no control file, and an output_file was specified, start
2607    * capturing fps data right away.
2608    */
2609   instance_data->capture_enabled =
2610      instance_data->params.output_file && instance_data->params.control < 0;
2611   instance_data->capture_started = instance_data->capture_enabled;
2612
2613   for (int i = OVERLAY_PARAM_ENABLED_vertices;
2614        i <= OVERLAY_PARAM_ENABLED_compute_invocations; i++) {
2615      if (instance_data->params.enabled[i]) {
2616         instance_data->pipeline_statistics_enabled = true;
2617         break;
2618      }
2619   }
2620
2621   return result;
2622}
2623
2624static void overlay_DestroyInstance(
2625    VkInstance                                  instance,
2626    const VkAllocationCallbacks*                pAllocator)
2627{
2628   struct instance_data *instance_data = FIND(struct instance_data, instance);
2629   instance_data_map_physical_devices(instance_data, false);
2630   instance_data->vtable.DestroyInstance(instance, pAllocator);
2631   destroy_instance_data(instance_data);
2632}
2633
2634static const struct {
2635   const char *name;
2636   void *ptr;
2637} name_to_funcptr_map[] = {
2638   { "vkGetInstanceProcAddr", (void *) vkGetInstanceProcAddr },
2639   { "vkGetDeviceProcAddr", (void *) vkGetDeviceProcAddr },
2640#define ADD_HOOK(fn) { "vk" # fn, (void *) overlay_ ## fn }
2641#define ADD_ALIAS_HOOK(alias, fn) { "vk" # alias, (void *) overlay_ ## fn }
2642   ADD_HOOK(AllocateCommandBuffers),
2643   ADD_HOOK(FreeCommandBuffers),
2644   ADD_HOOK(ResetCommandBuffer),
2645   ADD_HOOK(BeginCommandBuffer),
2646   ADD_HOOK(EndCommandBuffer),
2647   ADD_HOOK(CmdExecuteCommands),
2648
2649   ADD_HOOK(CmdDraw),
2650   ADD_HOOK(CmdDrawIndexed),
2651   ADD_HOOK(CmdDrawIndirect),
2652   ADD_HOOK(CmdDrawIndexedIndirect),
2653   ADD_HOOK(CmdDispatch),
2654   ADD_HOOK(CmdDispatchIndirect),
2655   ADD_HOOK(CmdDrawIndirectCount),
2656   ADD_ALIAS_HOOK(CmdDrawIndirectCountKHR, CmdDrawIndirectCount),
2657   ADD_HOOK(CmdDrawIndexedIndirectCount),
2658   ADD_ALIAS_HOOK(CmdDrawIndexedIndirectCountKHR, CmdDrawIndexedIndirectCount),
2659
2660   ADD_HOOK(CmdBindPipeline),
2661
2662   ADD_HOOK(CreateSwapchainKHR),
2663   ADD_HOOK(QueuePresentKHR),
2664   ADD_HOOK(DestroySwapchainKHR),
2665   ADD_HOOK(AcquireNextImageKHR),
2666   ADD_HOOK(AcquireNextImage2KHR),
2667
2668   ADD_HOOK(QueueSubmit),
2669   ADD_HOOK(QueueSubmit2KHR),
2670
2671   ADD_HOOK(CreateDevice),
2672   ADD_HOOK(DestroyDevice),
2673
2674   ADD_HOOK(CreateInstance),
2675   ADD_HOOK(DestroyInstance),
2676#undef ADD_HOOK
2677#undef ADD_ALIAS_HOOK
2678};
2679
2680static void *find_ptr(const char *name)
2681{
2682   for (uint32_t i = 0; i < ARRAY_SIZE(name_to_funcptr_map); i++) {
2683      if (strcmp(name, name_to_funcptr_map[i].name) == 0)
2684         return name_to_funcptr_map[i].ptr;
2685   }
2686
2687   return NULL;
2688}
2689
2690VK_LAYER_EXPORT VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vkGetDeviceProcAddr(VkDevice dev,
2691                                                                             const char *funcName)
2692{
2693   void *ptr = find_ptr(funcName);
2694   if (ptr) return reinterpret_cast<PFN_vkVoidFunction>(ptr);
2695
2696   if (dev == NULL) return NULL;
2697
2698   struct device_data *device_data = FIND(struct device_data, dev);
2699   if (device_data->vtable.GetDeviceProcAddr == NULL) return NULL;
2700   return device_data->vtable.GetDeviceProcAddr(dev, funcName);
2701}
2702
2703VK_LAYER_EXPORT VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vkGetInstanceProcAddr(VkInstance instance,
2704                                                                               const char *funcName)
2705{
2706   void *ptr = find_ptr(funcName);
2707   if (ptr) return reinterpret_cast<PFN_vkVoidFunction>(ptr);
2708
2709   if (instance == NULL) return NULL;
2710
2711   struct instance_data *instance_data = FIND(struct instance_data, instance);
2712   if (instance_data->vtable.GetInstanceProcAddr == NULL) return NULL;
2713   return instance_data->vtable.GetInstanceProcAddr(instance, funcName);
2714}
2715