1/* 2 * Copyright © 2019 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include <string.h> 25#include <stdlib.h> 26#include <assert.h> 27 28#include <vulkan/vulkan.h> 29#include <vulkan/vk_layer.h> 30 31#include "git_sha1.h" 32 33#include "imgui.h" 34 35#include "overlay_params.h" 36 37#include "util/debug.h" 38#include "util/hash_table.h" 39#include "util/list.h" 40#include "util/ralloc.h" 41#include "util/os_time.h" 42#include "util/os_socket.h" 43#include "util/simple_mtx.h" 44#include "util/u_math.h" 45 46#include "vk_enum_to_str.h" 47#include "vk_dispatch_table.h" 48#include "vk_util.h" 49 50/* Mapped from VkInstace/VkPhysicalDevice */ 51struct instance_data { 52 struct vk_instance_dispatch_table vtable; 53 struct vk_physical_device_dispatch_table pd_vtable; 54 VkInstance instance; 55 56 struct overlay_params params; 57 bool pipeline_statistics_enabled; 58 59 bool first_line_printed; 60 61 int control_client; 62 63 /* Dumping of frame stats to a file has been enabled. */ 64 bool capture_enabled; 65 66 /* Dumping of frame stats to a file has been enabled and started. */ 67 bool capture_started; 68}; 69 70struct frame_stat { 71 uint64_t stats[OVERLAY_PARAM_ENABLED_MAX]; 72}; 73 74/* Mapped from VkDevice */ 75struct queue_data; 76struct device_data { 77 struct instance_data *instance; 78 79 PFN_vkSetDeviceLoaderData set_device_loader_data; 80 81 struct vk_device_dispatch_table vtable; 82 VkPhysicalDevice physical_device; 83 VkDevice device; 84 85 VkPhysicalDeviceProperties properties; 86 87 struct queue_data *graphic_queue; 88 89 struct queue_data **queues; 90 uint32_t n_queues; 91 92 /* For a single frame */ 93 struct frame_stat frame_stats; 94}; 95 96/* Mapped from VkCommandBuffer */ 97struct command_buffer_data { 98 struct device_data *device; 99 100 VkCommandBufferLevel level; 101 102 VkCommandBuffer cmd_buffer; 103 VkQueryPool pipeline_query_pool; 104 VkQueryPool timestamp_query_pool; 105 uint32_t query_index; 106 107 struct frame_stat stats; 108 109 struct list_head link; /* link into queue_data::running_command_buffer */ 110}; 111 112/* Mapped from VkQueue */ 113struct queue_data { 114 struct device_data *device; 115 116 VkQueue queue; 117 VkQueueFlags flags; 118 uint32_t family_index; 119 uint64_t timestamp_mask; 120 121 VkFence queries_fence; 122 123 struct list_head running_command_buffer; 124}; 125 126struct overlay_draw { 127 struct list_head link; 128 129 VkCommandBuffer command_buffer; 130 131 VkSemaphore cross_engine_semaphore; 132 133 VkSemaphore semaphore; 134 VkFence fence; 135 136 VkBuffer vertex_buffer; 137 VkDeviceMemory vertex_buffer_mem; 138 VkDeviceSize vertex_buffer_size; 139 140 VkBuffer index_buffer; 141 VkDeviceMemory index_buffer_mem; 142 VkDeviceSize index_buffer_size; 143}; 144 145/* Mapped from VkSwapchainKHR */ 146struct swapchain_data { 147 struct device_data *device; 148 149 VkSwapchainKHR swapchain; 150 unsigned width, height; 151 VkFormat format; 152 153 uint32_t n_images; 154 VkImage *images; 155 VkImageView *image_views; 156 VkFramebuffer *framebuffers; 157 158 VkRenderPass render_pass; 159 160 VkDescriptorPool descriptor_pool; 161 VkDescriptorSetLayout descriptor_layout; 162 VkDescriptorSet descriptor_set; 163 164 VkSampler font_sampler; 165 166 VkPipelineLayout pipeline_layout; 167 VkPipeline pipeline; 168 169 VkCommandPool command_pool; 170 171 struct list_head draws; /* List of struct overlay_draw */ 172 173 bool font_uploaded; 174 VkImage font_image; 175 VkImageView font_image_view; 176 VkDeviceMemory font_mem; 177 VkBuffer upload_font_buffer; 178 VkDeviceMemory upload_font_buffer_mem; 179 180 /**/ 181 ImGuiContext* imgui_context; 182 ImVec2 window_size; 183 184 /**/ 185 uint64_t n_frames; 186 uint64_t last_present_time; 187 188 unsigned n_frames_since_update; 189 uint64_t last_fps_update; 190 double fps; 191 192 enum overlay_param_enabled stat_selector; 193 double time_dividor; 194 struct frame_stat stats_min, stats_max; 195 struct frame_stat frames_stats[200]; 196 197 /* Over a single frame */ 198 struct frame_stat frame_stats; 199 200 /* Over fps_sampling_period */ 201 struct frame_stat accumulated_stats; 202}; 203 204static const VkQueryPipelineStatisticFlags overlay_query_flags = 205 VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_VERTICES_BIT | 206 VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT | 207 VK_QUERY_PIPELINE_STATISTIC_VERTEX_SHADER_INVOCATIONS_BIT | 208 VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT | 209 VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT | 210 VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT | 211 VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT | 212 VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT | 213 VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_CONTROL_SHADER_PATCHES_BIT | 214 VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_EVALUATION_SHADER_INVOCATIONS_BIT | 215 VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT; 216#define OVERLAY_QUERY_COUNT (11) 217 218static struct hash_table_u64 *vk_object_to_data = NULL; 219static simple_mtx_t vk_object_to_data_mutex = _SIMPLE_MTX_INITIALIZER_NP; 220 221thread_local ImGuiContext* __MesaImGui; 222 223static inline void ensure_vk_object_map(void) 224{ 225 if (!vk_object_to_data) 226 vk_object_to_data = _mesa_hash_table_u64_create(NULL); 227} 228 229#define HKEY(obj) ((uint64_t)(obj)) 230#define FIND(type, obj) ((type *)find_object_data(HKEY(obj))) 231 232static void *find_object_data(uint64_t obj) 233{ 234 simple_mtx_lock(&vk_object_to_data_mutex); 235 ensure_vk_object_map(); 236 void *data = _mesa_hash_table_u64_search(vk_object_to_data, obj); 237 simple_mtx_unlock(&vk_object_to_data_mutex); 238 return data; 239} 240 241static void map_object(uint64_t obj, void *data) 242{ 243 simple_mtx_lock(&vk_object_to_data_mutex); 244 ensure_vk_object_map(); 245 _mesa_hash_table_u64_insert(vk_object_to_data, obj, data); 246 simple_mtx_unlock(&vk_object_to_data_mutex); 247} 248 249static void unmap_object(uint64_t obj) 250{ 251 simple_mtx_lock(&vk_object_to_data_mutex); 252 _mesa_hash_table_u64_remove(vk_object_to_data, obj); 253 simple_mtx_unlock(&vk_object_to_data_mutex); 254} 255 256/**/ 257 258#define VK_CHECK(expr) \ 259 do { \ 260 VkResult __result = (expr); \ 261 if (__result != VK_SUCCESS) { \ 262 fprintf(stderr, "'%s' line %i failed with %s\n", \ 263 #expr, __LINE__, vk_Result_to_str(__result)); \ 264 } \ 265 } while (0) 266 267/**/ 268 269static VkLayerInstanceCreateInfo *get_instance_chain_info(const VkInstanceCreateInfo *pCreateInfo, 270 VkLayerFunction func) 271{ 272 vk_foreach_struct_const(item, pCreateInfo->pNext) { 273 if (item->sType == VK_STRUCTURE_TYPE_LOADER_INSTANCE_CREATE_INFO && 274 ((VkLayerInstanceCreateInfo *) item)->function == func) 275 return (VkLayerInstanceCreateInfo *) item; 276 } 277 unreachable("instance chain info not found"); 278 return NULL; 279} 280 281static VkLayerDeviceCreateInfo *get_device_chain_info(const VkDeviceCreateInfo *pCreateInfo, 282 VkLayerFunction func) 283{ 284 vk_foreach_struct_const(item, pCreateInfo->pNext) { 285 if (item->sType == VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO && 286 ((VkLayerDeviceCreateInfo *) item)->function == func) 287 return (VkLayerDeviceCreateInfo *)item; 288 } 289 unreachable("device chain info not found"); 290 return NULL; 291} 292 293static struct VkBaseOutStructure * 294clone_chain(const struct VkBaseInStructure *chain) 295{ 296 struct VkBaseOutStructure *head = NULL, *tail = NULL; 297 298 vk_foreach_struct_const(item, chain) { 299 size_t item_size = vk_structure_type_size(item); 300 struct VkBaseOutStructure *new_item = 301 (struct VkBaseOutStructure *)malloc(item_size);; 302 303 memcpy(new_item, item, item_size); 304 305 if (!head) 306 head = new_item; 307 if (tail) 308 tail->pNext = new_item; 309 tail = new_item; 310 } 311 312 return head; 313} 314 315static void 316free_chain(struct VkBaseOutStructure *chain) 317{ 318 while (chain) { 319 void *node = chain; 320 chain = chain->pNext; 321 free(node); 322 } 323} 324 325/**/ 326 327static struct instance_data *new_instance_data(VkInstance instance) 328{ 329 struct instance_data *data = rzalloc(NULL, struct instance_data); 330 data->instance = instance; 331 data->control_client = -1; 332 map_object(HKEY(data->instance), data); 333 return data; 334} 335 336static void destroy_instance_data(struct instance_data *data) 337{ 338 if (data->params.output_file) 339 fclose(data->params.output_file); 340 if (data->params.control >= 0) 341 os_socket_close(data->params.control); 342 unmap_object(HKEY(data->instance)); 343 ralloc_free(data); 344} 345 346static void instance_data_map_physical_devices(struct instance_data *instance_data, 347 bool map) 348{ 349 uint32_t physicalDeviceCount = 0; 350 instance_data->vtable.EnumeratePhysicalDevices(instance_data->instance, 351 &physicalDeviceCount, 352 NULL); 353 354 VkPhysicalDevice *physicalDevices = (VkPhysicalDevice *) malloc(sizeof(VkPhysicalDevice) * physicalDeviceCount); 355 instance_data->vtable.EnumeratePhysicalDevices(instance_data->instance, 356 &physicalDeviceCount, 357 physicalDevices); 358 359 for (uint32_t i = 0; i < physicalDeviceCount; i++) { 360 if (map) 361 map_object(HKEY(physicalDevices[i]), instance_data); 362 else 363 unmap_object(HKEY(physicalDevices[i])); 364 } 365 366 free(physicalDevices); 367} 368 369/**/ 370static struct device_data *new_device_data(VkDevice device, struct instance_data *instance) 371{ 372 struct device_data *data = rzalloc(NULL, struct device_data); 373 data->instance = instance; 374 data->device = device; 375 map_object(HKEY(data->device), data); 376 return data; 377} 378 379static struct queue_data *new_queue_data(VkQueue queue, 380 const VkQueueFamilyProperties *family_props, 381 uint32_t family_index, 382 struct device_data *device_data) 383{ 384 struct queue_data *data = rzalloc(device_data, struct queue_data); 385 data->device = device_data; 386 data->queue = queue; 387 data->flags = family_props->queueFlags; 388 data->timestamp_mask = (1ull << family_props->timestampValidBits) - 1; 389 data->family_index = family_index; 390 list_inithead(&data->running_command_buffer); 391 map_object(HKEY(data->queue), data); 392 393 /* Fence synchronizing access to queries on that queue. */ 394 VkFenceCreateInfo fence_info = {}; 395 fence_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; 396 fence_info.flags = VK_FENCE_CREATE_SIGNALED_BIT; 397 VK_CHECK(device_data->vtable.CreateFence(device_data->device, 398 &fence_info, 399 NULL, 400 &data->queries_fence)); 401 402 if (data->flags & VK_QUEUE_GRAPHICS_BIT) 403 device_data->graphic_queue = data; 404 405 return data; 406} 407 408static void destroy_queue(struct queue_data *data) 409{ 410 struct device_data *device_data = data->device; 411 device_data->vtable.DestroyFence(device_data->device, data->queries_fence, NULL); 412 unmap_object(HKEY(data->queue)); 413 ralloc_free(data); 414} 415 416static void device_map_queues(struct device_data *data, 417 const VkDeviceCreateInfo *pCreateInfo) 418{ 419 for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) 420 data->n_queues += pCreateInfo->pQueueCreateInfos[i].queueCount; 421 data->queues = ralloc_array(data, struct queue_data *, data->n_queues); 422 423 struct instance_data *instance_data = data->instance; 424 uint32_t n_family_props; 425 instance_data->pd_vtable.GetPhysicalDeviceQueueFamilyProperties(data->physical_device, 426 &n_family_props, 427 NULL); 428 VkQueueFamilyProperties *family_props = 429 (VkQueueFamilyProperties *)malloc(sizeof(VkQueueFamilyProperties) * n_family_props); 430 instance_data->pd_vtable.GetPhysicalDeviceQueueFamilyProperties(data->physical_device, 431 &n_family_props, 432 family_props); 433 434 uint32_t queue_index = 0; 435 for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) { 436 for (uint32_t j = 0; j < pCreateInfo->pQueueCreateInfos[i].queueCount; j++) { 437 VkQueue queue; 438 data->vtable.GetDeviceQueue(data->device, 439 pCreateInfo->pQueueCreateInfos[i].queueFamilyIndex, 440 j, &queue); 441 442 VK_CHECK(data->set_device_loader_data(data->device, queue)); 443 444 data->queues[queue_index++] = 445 new_queue_data(queue, &family_props[pCreateInfo->pQueueCreateInfos[i].queueFamilyIndex], 446 pCreateInfo->pQueueCreateInfos[i].queueFamilyIndex, data); 447 } 448 } 449 450 free(family_props); 451} 452 453static void device_unmap_queues(struct device_data *data) 454{ 455 for (uint32_t i = 0; i < data->n_queues; i++) 456 destroy_queue(data->queues[i]); 457} 458 459static void destroy_device_data(struct device_data *data) 460{ 461 unmap_object(HKEY(data->device)); 462 ralloc_free(data); 463} 464 465/**/ 466static struct command_buffer_data *new_command_buffer_data(VkCommandBuffer cmd_buffer, 467 VkCommandBufferLevel level, 468 VkQueryPool pipeline_query_pool, 469 VkQueryPool timestamp_query_pool, 470 uint32_t query_index, 471 struct device_data *device_data) 472{ 473 struct command_buffer_data *data = rzalloc(NULL, struct command_buffer_data); 474 data->device = device_data; 475 data->cmd_buffer = cmd_buffer; 476 data->level = level; 477 data->pipeline_query_pool = pipeline_query_pool; 478 data->timestamp_query_pool = timestamp_query_pool; 479 data->query_index = query_index; 480 list_inithead(&data->link); 481 map_object(HKEY(data->cmd_buffer), data); 482 return data; 483} 484 485static void destroy_command_buffer_data(struct command_buffer_data *data) 486{ 487 unmap_object(HKEY(data->cmd_buffer)); 488 list_delinit(&data->link); 489 ralloc_free(data); 490} 491 492/**/ 493static struct swapchain_data *new_swapchain_data(VkSwapchainKHR swapchain, 494 struct device_data *device_data) 495{ 496 struct instance_data *instance_data = device_data->instance; 497 struct swapchain_data *data = rzalloc(NULL, struct swapchain_data); 498 data->device = device_data; 499 data->swapchain = swapchain; 500 data->window_size = ImVec2(instance_data->params.width, instance_data->params.height); 501 list_inithead(&data->draws); 502 map_object(HKEY(data->swapchain), data); 503 return data; 504} 505 506static void destroy_swapchain_data(struct swapchain_data *data) 507{ 508 unmap_object(HKEY(data->swapchain)); 509 ralloc_free(data); 510} 511 512struct overlay_draw *get_overlay_draw(struct swapchain_data *data) 513{ 514 struct device_data *device_data = data->device; 515 struct overlay_draw *draw = list_is_empty(&data->draws) ? 516 NULL : list_first_entry(&data->draws, struct overlay_draw, link); 517 518 VkSemaphoreCreateInfo sem_info = {}; 519 sem_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; 520 521 if (draw && device_data->vtable.GetFenceStatus(device_data->device, draw->fence) == VK_SUCCESS) { 522 list_del(&draw->link); 523 VK_CHECK(device_data->vtable.ResetFences(device_data->device, 524 1, &draw->fence)); 525 list_addtail(&draw->link, &data->draws); 526 return draw; 527 } 528 529 draw = rzalloc(data, struct overlay_draw); 530 531 VkCommandBufferAllocateInfo cmd_buffer_info = {}; 532 cmd_buffer_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; 533 cmd_buffer_info.commandPool = data->command_pool; 534 cmd_buffer_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; 535 cmd_buffer_info.commandBufferCount = 1; 536 VK_CHECK(device_data->vtable.AllocateCommandBuffers(device_data->device, 537 &cmd_buffer_info, 538 &draw->command_buffer)); 539 VK_CHECK(device_data->set_device_loader_data(device_data->device, 540 draw->command_buffer)); 541 542 543 VkFenceCreateInfo fence_info = {}; 544 fence_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; 545 VK_CHECK(device_data->vtable.CreateFence(device_data->device, 546 &fence_info, 547 NULL, 548 &draw->fence)); 549 550 VK_CHECK(device_data->vtable.CreateSemaphore(device_data->device, &sem_info, 551 NULL, &draw->semaphore)); 552 VK_CHECK(device_data->vtable.CreateSemaphore(device_data->device, &sem_info, 553 NULL, &draw->cross_engine_semaphore)); 554 555 list_addtail(&draw->link, &data->draws); 556 557 return draw; 558} 559 560static const char *param_unit(enum overlay_param_enabled param) 561{ 562 switch (param) { 563 case OVERLAY_PARAM_ENABLED_frame_timing: 564 case OVERLAY_PARAM_ENABLED_acquire_timing: 565 case OVERLAY_PARAM_ENABLED_present_timing: 566 return "(us)"; 567 case OVERLAY_PARAM_ENABLED_gpu_timing: 568 return "(ns)"; 569 default: 570 return ""; 571 } 572} 573 574static void parse_command(struct instance_data *instance_data, 575 const char *cmd, unsigned cmdlen, 576 const char *param, unsigned paramlen) 577{ 578 if (!strncmp(cmd, "capture", cmdlen)) { 579 int value = atoi(param); 580 bool enabled = value > 0; 581 582 if (enabled) { 583 instance_data->capture_enabled = true; 584 } else { 585 instance_data->capture_enabled = false; 586 instance_data->capture_started = false; 587 } 588 } 589} 590 591#define BUFSIZE 4096 592 593/** 594 * This function will process commands through the control file. 595 * 596 * A command starts with a colon, followed by the command, and followed by an 597 * option '=' and a parameter. It has to end with a semi-colon. A full command 598 * + parameter looks like: 599 * 600 * :cmd=param; 601 */ 602static void process_char(struct instance_data *instance_data, char c) 603{ 604 static char cmd[BUFSIZE]; 605 static char param[BUFSIZE]; 606 607 static unsigned cmdpos = 0; 608 static unsigned parampos = 0; 609 static bool reading_cmd = false; 610 static bool reading_param = false; 611 612 switch (c) { 613 case ':': 614 cmdpos = 0; 615 parampos = 0; 616 reading_cmd = true; 617 reading_param = false; 618 break; 619 case ';': 620 if (!reading_cmd) 621 break; 622 cmd[cmdpos++] = '\0'; 623 param[parampos++] = '\0'; 624 parse_command(instance_data, cmd, cmdpos, param, parampos); 625 reading_cmd = false; 626 reading_param = false; 627 break; 628 case '=': 629 if (!reading_cmd) 630 break; 631 reading_param = true; 632 break; 633 default: 634 if (!reading_cmd) 635 break; 636 637 if (reading_param) { 638 /* overflow means an invalid parameter */ 639 if (parampos >= BUFSIZE - 1) { 640 reading_cmd = false; 641 reading_param = false; 642 break; 643 } 644 645 param[parampos++] = c; 646 } else { 647 /* overflow means an invalid command */ 648 if (cmdpos >= BUFSIZE - 1) { 649 reading_cmd = false; 650 break; 651 } 652 653 cmd[cmdpos++] = c; 654 } 655 } 656} 657 658static void control_send(struct instance_data *instance_data, 659 const char *cmd, unsigned cmdlen, 660 const char *param, unsigned paramlen) 661{ 662 unsigned msglen = 0; 663 char buffer[BUFSIZE]; 664 665 assert(cmdlen + paramlen + 3 < BUFSIZE); 666 667 buffer[msglen++] = ':'; 668 669 memcpy(&buffer[msglen], cmd, cmdlen); 670 msglen += cmdlen; 671 672 if (paramlen > 0) { 673 buffer[msglen++] = '='; 674 memcpy(&buffer[msglen], param, paramlen); 675 msglen += paramlen; 676 buffer[msglen++] = ';'; 677 } 678 679 os_socket_send(instance_data->control_client, buffer, msglen, 0); 680} 681 682static void control_send_connection_string(struct device_data *device_data) 683{ 684 struct instance_data *instance_data = device_data->instance; 685 686 const char *controlVersionCmd = "MesaOverlayControlVersion"; 687 const char *controlVersionString = "1"; 688 689 control_send(instance_data, controlVersionCmd, strlen(controlVersionCmd), 690 controlVersionString, strlen(controlVersionString)); 691 692 const char *deviceCmd = "DeviceName"; 693 const char *deviceName = device_data->properties.deviceName; 694 695 control_send(instance_data, deviceCmd, strlen(deviceCmd), 696 deviceName, strlen(deviceName)); 697 698 const char *mesaVersionCmd = "MesaVersion"; 699 const char *mesaVersionString = "Mesa " PACKAGE_VERSION MESA_GIT_SHA1; 700 701 control_send(instance_data, mesaVersionCmd, strlen(mesaVersionCmd), 702 mesaVersionString, strlen(mesaVersionString)); 703} 704 705static void control_client_check(struct device_data *device_data) 706{ 707 struct instance_data *instance_data = device_data->instance; 708 709 /* Already connected, just return. */ 710 if (instance_data->control_client >= 0) 711 return; 712 713 int socket = os_socket_accept(instance_data->params.control); 714 if (socket == -1) { 715 if (errno != EAGAIN && errno != EWOULDBLOCK && errno != ECONNABORTED) 716 fprintf(stderr, "ERROR on socket: %s\n", strerror(errno)); 717 return; 718 } 719 720 if (socket >= 0) { 721 os_socket_block(socket, false); 722 instance_data->control_client = socket; 723 control_send_connection_string(device_data); 724 } 725} 726 727static void control_client_disconnected(struct instance_data *instance_data) 728{ 729 os_socket_close(instance_data->control_client); 730 instance_data->control_client = -1; 731} 732 733static void process_control_socket(struct instance_data *instance_data) 734{ 735 const int client = instance_data->control_client; 736 if (client >= 0) { 737 char buf[BUFSIZE]; 738 739 while (true) { 740 ssize_t n = os_socket_recv(client, buf, BUFSIZE, 0); 741 742 if (n == -1) { 743 if (errno == EAGAIN || errno == EWOULDBLOCK) { 744 /* nothing to read, try again later */ 745 break; 746 } 747 748 if (errno != ECONNRESET) 749 fprintf(stderr, "ERROR on connection: %s\n", strerror(errno)); 750 751 control_client_disconnected(instance_data); 752 } else if (n == 0) { 753 /* recv() returns 0 when the client disconnects */ 754 control_client_disconnected(instance_data); 755 } 756 757 for (ssize_t i = 0; i < n; i++) { 758 process_char(instance_data, buf[i]); 759 } 760 761 /* If we try to read BUFSIZE and receive BUFSIZE bytes from the 762 * socket, there's a good chance that there's still more data to be 763 * read, so we will try again. Otherwise, simply be done for this 764 * iteration and try again on the next frame. 765 */ 766 if (n < BUFSIZE) 767 break; 768 } 769 } 770} 771 772static void snapshot_swapchain_frame(struct swapchain_data *data) 773{ 774 struct device_data *device_data = data->device; 775 struct instance_data *instance_data = device_data->instance; 776 uint32_t f_idx = data->n_frames % ARRAY_SIZE(data->frames_stats); 777 uint64_t now = os_time_get(); /* us */ 778 779 if (instance_data->params.control >= 0) { 780 control_client_check(device_data); 781 process_control_socket(instance_data); 782 } 783 784 if (data->last_present_time) { 785 data->frame_stats.stats[OVERLAY_PARAM_ENABLED_frame_timing] = 786 now - data->last_present_time; 787 } 788 789 memset(&data->frames_stats[f_idx], 0, sizeof(data->frames_stats[f_idx])); 790 for (int s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++) { 791 data->frames_stats[f_idx].stats[s] += device_data->frame_stats.stats[s] + data->frame_stats.stats[s]; 792 data->accumulated_stats.stats[s] += device_data->frame_stats.stats[s] + data->frame_stats.stats[s]; 793 } 794 795 /* If capture has been enabled but it hasn't started yet, it means we are on 796 * the first snapshot after it has been enabled. At this point we want to 797 * use the stats captured so far to update the display, but we don't want 798 * this data to cause noise to the stats that we want to capture from now 799 * on. 800 * 801 * capture_begin == true will trigger an update of the fps on display, and a 802 * flush of the data, but no stats will be written to the output file. This 803 * way, we will have only stats from after the capture has been enabled 804 * written to the output_file. 805 */ 806 const bool capture_begin = 807 instance_data->capture_enabled && !instance_data->capture_started; 808 809 if (data->last_fps_update) { 810 double elapsed = (double)(now - data->last_fps_update); /* us */ 811 if (capture_begin || 812 elapsed >= instance_data->params.fps_sampling_period) { 813 data->fps = 1000000.0f * data->n_frames_since_update / elapsed; 814 if (instance_data->capture_started) { 815 if (!instance_data->first_line_printed) { 816 bool first_column = true; 817 818 instance_data->first_line_printed = true; 819 820#define OVERLAY_PARAM_BOOL(name) \ 821 if (instance_data->params.enabled[OVERLAY_PARAM_ENABLED_##name]) { \ 822 fprintf(instance_data->params.output_file, \ 823 "%s%s%s", first_column ? "" : ", ", #name, \ 824 param_unit(OVERLAY_PARAM_ENABLED_##name)); \ 825 first_column = false; \ 826 } 827#define OVERLAY_PARAM_CUSTOM(name) 828 OVERLAY_PARAMS 829#undef OVERLAY_PARAM_BOOL 830#undef OVERLAY_PARAM_CUSTOM 831 fprintf(instance_data->params.output_file, "\n"); 832 } 833 834 for (int s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++) { 835 if (!instance_data->params.enabled[s]) 836 continue; 837 if (s == OVERLAY_PARAM_ENABLED_fps) { 838 fprintf(instance_data->params.output_file, 839 "%s%.2f", s == 0 ? "" : ", ", data->fps); 840 } else { 841 fprintf(instance_data->params.output_file, 842 "%s%" PRIu64, s == 0 ? "" : ", ", 843 data->accumulated_stats.stats[s]); 844 } 845 } 846 fprintf(instance_data->params.output_file, "\n"); 847 fflush(instance_data->params.output_file); 848 } 849 850 memset(&data->accumulated_stats, 0, sizeof(data->accumulated_stats)); 851 data->n_frames_since_update = 0; 852 data->last_fps_update = now; 853 854 if (capture_begin) 855 instance_data->capture_started = true; 856 } 857 } else { 858 data->last_fps_update = now; 859 } 860 861 memset(&device_data->frame_stats, 0, sizeof(device_data->frame_stats)); 862 memset(&data->frame_stats, 0, sizeof(device_data->frame_stats)); 863 864 data->last_present_time = now; 865 data->n_frames++; 866 data->n_frames_since_update++; 867} 868 869static float get_time_stat(void *_data, int _idx) 870{ 871 struct swapchain_data *data = (struct swapchain_data *) _data; 872 if ((ARRAY_SIZE(data->frames_stats) - _idx) > data->n_frames) 873 return 0.0f; 874 int idx = ARRAY_SIZE(data->frames_stats) + 875 data->n_frames < ARRAY_SIZE(data->frames_stats) ? 876 _idx - data->n_frames : 877 _idx + data->n_frames; 878 idx %= ARRAY_SIZE(data->frames_stats); 879 /* Time stats are in us. */ 880 return data->frames_stats[idx].stats[data->stat_selector] / data->time_dividor; 881} 882 883static float get_stat(void *_data, int _idx) 884{ 885 struct swapchain_data *data = (struct swapchain_data *) _data; 886 if ((ARRAY_SIZE(data->frames_stats) - _idx) > data->n_frames) 887 return 0.0f; 888 int idx = ARRAY_SIZE(data->frames_stats) + 889 data->n_frames < ARRAY_SIZE(data->frames_stats) ? 890 _idx - data->n_frames : 891 _idx + data->n_frames; 892 idx %= ARRAY_SIZE(data->frames_stats); 893 return data->frames_stats[idx].stats[data->stat_selector]; 894} 895 896static void position_layer(struct swapchain_data *data) 897 898{ 899 struct device_data *device_data = data->device; 900 struct instance_data *instance_data = device_data->instance; 901 const float margin = 10.0f; 902 903 ImGui::SetNextWindowBgAlpha(0.5); 904 ImGui::SetNextWindowSize(data->window_size, ImGuiCond_Always); 905 switch (instance_data->params.position) { 906 case LAYER_POSITION_TOP_LEFT: 907 ImGui::SetNextWindowPos(ImVec2(margin, margin), ImGuiCond_Always); 908 break; 909 case LAYER_POSITION_TOP_RIGHT: 910 ImGui::SetNextWindowPos(ImVec2(data->width - data->window_size.x - margin, margin), 911 ImGuiCond_Always); 912 break; 913 case LAYER_POSITION_BOTTOM_LEFT: 914 ImGui::SetNextWindowPos(ImVec2(margin, data->height - data->window_size.y - margin), 915 ImGuiCond_Always); 916 break; 917 case LAYER_POSITION_BOTTOM_RIGHT: 918 ImGui::SetNextWindowPos(ImVec2(data->width - data->window_size.x - margin, 919 data->height - data->window_size.y - margin), 920 ImGuiCond_Always); 921 break; 922 } 923} 924 925static void compute_swapchain_display(struct swapchain_data *data) 926{ 927 struct device_data *device_data = data->device; 928 struct instance_data *instance_data = device_data->instance; 929 930 ImGui::SetCurrentContext(data->imgui_context); 931 ImGui::NewFrame(); 932 position_layer(data); 933 ImGui::Begin("Mesa overlay"); 934 if (instance_data->params.enabled[OVERLAY_PARAM_ENABLED_device]) 935 ImGui::Text("Device: %s", device_data->properties.deviceName); 936 937 if (instance_data->params.enabled[OVERLAY_PARAM_ENABLED_format]) { 938 const char *format_name = vk_Format_to_str(data->format); 939 format_name = format_name ? (format_name + strlen("VK_FORMAT_")) : "unknown"; 940 ImGui::Text("Swapchain format: %s", format_name); 941 } 942 if (instance_data->params.enabled[OVERLAY_PARAM_ENABLED_frame]) 943 ImGui::Text("Frames: %" PRIu64, data->n_frames); 944 if (instance_data->params.enabled[OVERLAY_PARAM_ENABLED_fps]) 945 ImGui::Text("FPS: %.2f" , data->fps); 946 947 /* Recompute min/max */ 948 for (uint32_t s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++) { 949 data->stats_min.stats[s] = UINT64_MAX; 950 data->stats_max.stats[s] = 0; 951 } 952 for (uint32_t f = 0; f < MIN2(data->n_frames, ARRAY_SIZE(data->frames_stats)); f++) { 953 for (uint32_t s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++) { 954 data->stats_min.stats[s] = MIN2(data->frames_stats[f].stats[s], 955 data->stats_min.stats[s]); 956 data->stats_max.stats[s] = MAX2(data->frames_stats[f].stats[s], 957 data->stats_max.stats[s]); 958 } 959 } 960 for (uint32_t s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++) { 961 assert(data->stats_min.stats[s] != UINT64_MAX); 962 } 963 964 for (uint32_t s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++) { 965 if (!instance_data->params.enabled[s] || 966 s == OVERLAY_PARAM_ENABLED_device || 967 s == OVERLAY_PARAM_ENABLED_format || 968 s == OVERLAY_PARAM_ENABLED_fps || 969 s == OVERLAY_PARAM_ENABLED_frame) 970 continue; 971 972 char hash[40]; 973 snprintf(hash, sizeof(hash), "##%s", overlay_param_names[s]); 974 data->stat_selector = (enum overlay_param_enabled) s; 975 data->time_dividor = 1000.0f; 976 if (s == OVERLAY_PARAM_ENABLED_gpu_timing) 977 data->time_dividor = 1000000.0f; 978 979 if (s == OVERLAY_PARAM_ENABLED_frame_timing || 980 s == OVERLAY_PARAM_ENABLED_acquire_timing || 981 s == OVERLAY_PARAM_ENABLED_present_timing || 982 s == OVERLAY_PARAM_ENABLED_gpu_timing) { 983 double min_time = data->stats_min.stats[s] / data->time_dividor; 984 double max_time = data->stats_max.stats[s] / data->time_dividor; 985 ImGui::PlotHistogram(hash, get_time_stat, data, 986 ARRAY_SIZE(data->frames_stats), 0, 987 NULL, min_time, max_time, 988 ImVec2(ImGui::GetContentRegionAvailWidth(), 30)); 989 ImGui::Text("%s: %.3fms [%.3f, %.3f]", overlay_param_names[s], 990 get_time_stat(data, ARRAY_SIZE(data->frames_stats) - 1), 991 min_time, max_time); 992 } else { 993 ImGui::PlotHistogram(hash, get_stat, data, 994 ARRAY_SIZE(data->frames_stats), 0, 995 NULL, 996 data->stats_min.stats[s], 997 data->stats_max.stats[s], 998 ImVec2(ImGui::GetContentRegionAvailWidth(), 30)); 999 ImGui::Text("%s: %.0f [%" PRIu64 ", %" PRIu64 "]", overlay_param_names[s], 1000 get_stat(data, ARRAY_SIZE(data->frames_stats) - 1), 1001 data->stats_min.stats[s], data->stats_max.stats[s]); 1002 } 1003 } 1004 data->window_size = ImVec2(data->window_size.x, ImGui::GetCursorPosY() + 10.0f); 1005 ImGui::End(); 1006 ImGui::EndFrame(); 1007 ImGui::Render(); 1008} 1009 1010static uint32_t vk_memory_type(struct device_data *data, 1011 VkMemoryPropertyFlags properties, 1012 uint32_t type_bits) 1013{ 1014 VkPhysicalDeviceMemoryProperties prop; 1015 data->instance->pd_vtable.GetPhysicalDeviceMemoryProperties(data->physical_device, &prop); 1016 for (uint32_t i = 0; i < prop.memoryTypeCount; i++) 1017 if ((prop.memoryTypes[i].propertyFlags & properties) == properties && type_bits & (1<<i)) 1018 return i; 1019 return 0xFFFFFFFF; // Unable to find memoryType 1020} 1021 1022static void ensure_swapchain_fonts(struct swapchain_data *data, 1023 VkCommandBuffer command_buffer) 1024{ 1025 if (data->font_uploaded) 1026 return; 1027 1028 data->font_uploaded = true; 1029 1030 struct device_data *device_data = data->device; 1031 ImGuiIO& io = ImGui::GetIO(); 1032 unsigned char* pixels; 1033 int width, height; 1034 io.Fonts->GetTexDataAsRGBA32(&pixels, &width, &height); 1035 size_t upload_size = width * height * 4 * sizeof(char); 1036 1037 /* Upload buffer */ 1038 VkBufferCreateInfo buffer_info = {}; 1039 buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; 1040 buffer_info.size = upload_size; 1041 buffer_info.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; 1042 buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; 1043 VK_CHECK(device_data->vtable.CreateBuffer(device_data->device, &buffer_info, 1044 NULL, &data->upload_font_buffer)); 1045 VkMemoryRequirements upload_buffer_req; 1046 device_data->vtable.GetBufferMemoryRequirements(device_data->device, 1047 data->upload_font_buffer, 1048 &upload_buffer_req); 1049 VkMemoryAllocateInfo upload_alloc_info = {}; 1050 upload_alloc_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; 1051 upload_alloc_info.allocationSize = upload_buffer_req.size; 1052 upload_alloc_info.memoryTypeIndex = vk_memory_type(device_data, 1053 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, 1054 upload_buffer_req.memoryTypeBits); 1055 VK_CHECK(device_data->vtable.AllocateMemory(device_data->device, 1056 &upload_alloc_info, 1057 NULL, 1058 &data->upload_font_buffer_mem)); 1059 VK_CHECK(device_data->vtable.BindBufferMemory(device_data->device, 1060 data->upload_font_buffer, 1061 data->upload_font_buffer_mem, 0)); 1062 1063 /* Upload to Buffer */ 1064 char* map = NULL; 1065 VK_CHECK(device_data->vtable.MapMemory(device_data->device, 1066 data->upload_font_buffer_mem, 1067 0, upload_size, 0, (void**)(&map))); 1068 memcpy(map, pixels, upload_size); 1069 VkMappedMemoryRange range[1] = {}; 1070 range[0].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; 1071 range[0].memory = data->upload_font_buffer_mem; 1072 range[0].size = upload_size; 1073 VK_CHECK(device_data->vtable.FlushMappedMemoryRanges(device_data->device, 1, range)); 1074 device_data->vtable.UnmapMemory(device_data->device, 1075 data->upload_font_buffer_mem); 1076 1077 /* Copy buffer to image */ 1078 VkImageMemoryBarrier copy_barrier[1] = {}; 1079 copy_barrier[0].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; 1080 copy_barrier[0].dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; 1081 copy_barrier[0].oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; 1082 copy_barrier[0].newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; 1083 copy_barrier[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; 1084 copy_barrier[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; 1085 copy_barrier[0].image = data->font_image; 1086 copy_barrier[0].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; 1087 copy_barrier[0].subresourceRange.levelCount = 1; 1088 copy_barrier[0].subresourceRange.layerCount = 1; 1089 device_data->vtable.CmdPipelineBarrier(command_buffer, 1090 VK_PIPELINE_STAGE_HOST_BIT, 1091 VK_PIPELINE_STAGE_TRANSFER_BIT, 1092 0, 0, NULL, 0, NULL, 1093 1, copy_barrier); 1094 1095 VkBufferImageCopy region = {}; 1096 region.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; 1097 region.imageSubresource.layerCount = 1; 1098 region.imageExtent.width = width; 1099 region.imageExtent.height = height; 1100 region.imageExtent.depth = 1; 1101 device_data->vtable.CmdCopyBufferToImage(command_buffer, 1102 data->upload_font_buffer, 1103 data->font_image, 1104 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1105 1, ®ion); 1106 1107 VkImageMemoryBarrier use_barrier[1] = {}; 1108 use_barrier[0].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; 1109 use_barrier[0].srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; 1110 use_barrier[0].dstAccessMask = VK_ACCESS_SHADER_READ_BIT; 1111 use_barrier[0].oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; 1112 use_barrier[0].newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; 1113 use_barrier[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; 1114 use_barrier[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; 1115 use_barrier[0].image = data->font_image; 1116 use_barrier[0].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; 1117 use_barrier[0].subresourceRange.levelCount = 1; 1118 use_barrier[0].subresourceRange.layerCount = 1; 1119 device_data->vtable.CmdPipelineBarrier(command_buffer, 1120 VK_PIPELINE_STAGE_TRANSFER_BIT, 1121 VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 1122 0, 1123 0, NULL, 1124 0, NULL, 1125 1, use_barrier); 1126 1127 /* Store our identifier */ 1128 io.Fonts->TexID = (ImTextureID)(intptr_t)data->font_image; 1129} 1130 1131static void CreateOrResizeBuffer(struct device_data *data, 1132 VkBuffer *buffer, 1133 VkDeviceMemory *buffer_memory, 1134 VkDeviceSize *buffer_size, 1135 size_t new_size, VkBufferUsageFlagBits usage) 1136{ 1137 if (*buffer != VK_NULL_HANDLE) 1138 data->vtable.DestroyBuffer(data->device, *buffer, NULL); 1139 if (*buffer_memory) 1140 data->vtable.FreeMemory(data->device, *buffer_memory, NULL); 1141 1142 VkBufferCreateInfo buffer_info = {}; 1143 buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; 1144 buffer_info.size = new_size; 1145 buffer_info.usage = usage; 1146 buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; 1147 VK_CHECK(data->vtable.CreateBuffer(data->device, &buffer_info, NULL, buffer)); 1148 1149 VkMemoryRequirements req; 1150 data->vtable.GetBufferMemoryRequirements(data->device, *buffer, &req); 1151 VkMemoryAllocateInfo alloc_info = {}; 1152 alloc_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; 1153 alloc_info.allocationSize = req.size; 1154 alloc_info.memoryTypeIndex = 1155 vk_memory_type(data, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, req.memoryTypeBits); 1156 VK_CHECK(data->vtable.AllocateMemory(data->device, &alloc_info, NULL, buffer_memory)); 1157 1158 VK_CHECK(data->vtable.BindBufferMemory(data->device, *buffer, *buffer_memory, 0)); 1159 *buffer_size = new_size; 1160} 1161 1162static struct overlay_draw *render_swapchain_display(struct swapchain_data *data, 1163 struct queue_data *present_queue, 1164 const VkSemaphore *wait_semaphores, 1165 unsigned n_wait_semaphores, 1166 unsigned image_index) 1167{ 1168 ImDrawData* draw_data = ImGui::GetDrawData(); 1169 if (draw_data->TotalVtxCount == 0) 1170 return NULL; 1171 1172 struct device_data *device_data = data->device; 1173 struct overlay_draw *draw = get_overlay_draw(data); 1174 1175 device_data->vtable.ResetCommandBuffer(draw->command_buffer, 0); 1176 1177 VkRenderPassBeginInfo render_pass_info = {}; 1178 render_pass_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; 1179 render_pass_info.renderPass = data->render_pass; 1180 render_pass_info.framebuffer = data->framebuffers[image_index]; 1181 render_pass_info.renderArea.extent.width = data->width; 1182 render_pass_info.renderArea.extent.height = data->height; 1183 1184 VkCommandBufferBeginInfo buffer_begin_info = {}; 1185 buffer_begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; 1186 1187 device_data->vtable.BeginCommandBuffer(draw->command_buffer, &buffer_begin_info); 1188 1189 ensure_swapchain_fonts(data, draw->command_buffer); 1190 1191 /* Bounce the image to display back to color attachment layout for 1192 * rendering on top of it. 1193 */ 1194 VkImageMemoryBarrier imb; 1195 imb.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; 1196 imb.pNext = nullptr; 1197 imb.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; 1198 imb.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; 1199 imb.oldLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; 1200 imb.newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; 1201 imb.image = data->images[image_index]; 1202 imb.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; 1203 imb.subresourceRange.baseMipLevel = 0; 1204 imb.subresourceRange.levelCount = 1; 1205 imb.subresourceRange.baseArrayLayer = 0; 1206 imb.subresourceRange.layerCount = 1; 1207 imb.srcQueueFamilyIndex = present_queue->family_index; 1208 imb.dstQueueFamilyIndex = device_data->graphic_queue->family_index; 1209 device_data->vtable.CmdPipelineBarrier(draw->command_buffer, 1210 VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, 1211 VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, 1212 0, /* dependency flags */ 1213 0, nullptr, /* memory barriers */ 1214 0, nullptr, /* buffer memory barriers */ 1215 1, &imb); /* image memory barriers */ 1216 1217 device_data->vtable.CmdBeginRenderPass(draw->command_buffer, &render_pass_info, 1218 VK_SUBPASS_CONTENTS_INLINE); 1219 1220 /* Create/Resize vertex & index buffers */ 1221 size_t vertex_size = ALIGN(draw_data->TotalVtxCount * sizeof(ImDrawVert), device_data->properties.limits.nonCoherentAtomSize); 1222 size_t index_size = ALIGN(draw_data->TotalIdxCount * sizeof(ImDrawIdx), device_data->properties.limits.nonCoherentAtomSize); 1223 if (draw->vertex_buffer_size < vertex_size) { 1224 CreateOrResizeBuffer(device_data, 1225 &draw->vertex_buffer, 1226 &draw->vertex_buffer_mem, 1227 &draw->vertex_buffer_size, 1228 vertex_size, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT); 1229 } 1230 if (draw->index_buffer_size < index_size) { 1231 CreateOrResizeBuffer(device_data, 1232 &draw->index_buffer, 1233 &draw->index_buffer_mem, 1234 &draw->index_buffer_size, 1235 index_size, VK_BUFFER_USAGE_INDEX_BUFFER_BIT); 1236 } 1237 1238 /* Upload vertex & index data */ 1239 ImDrawVert* vtx_dst = NULL; 1240 ImDrawIdx* idx_dst = NULL; 1241 VK_CHECK(device_data->vtable.MapMemory(device_data->device, draw->vertex_buffer_mem, 1242 0, vertex_size, 0, (void**)(&vtx_dst))); 1243 VK_CHECK(device_data->vtable.MapMemory(device_data->device, draw->index_buffer_mem, 1244 0, index_size, 0, (void**)(&idx_dst))); 1245 for (int n = 0; n < draw_data->CmdListsCount; n++) 1246 { 1247 const ImDrawList* cmd_list = draw_data->CmdLists[n]; 1248 memcpy(vtx_dst, cmd_list->VtxBuffer.Data, cmd_list->VtxBuffer.Size * sizeof(ImDrawVert)); 1249 memcpy(idx_dst, cmd_list->IdxBuffer.Data, cmd_list->IdxBuffer.Size * sizeof(ImDrawIdx)); 1250 vtx_dst += cmd_list->VtxBuffer.Size; 1251 idx_dst += cmd_list->IdxBuffer.Size; 1252 } 1253 VkMappedMemoryRange range[2] = {}; 1254 range[0].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; 1255 range[0].memory = draw->vertex_buffer_mem; 1256 range[0].size = VK_WHOLE_SIZE; 1257 range[1].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; 1258 range[1].memory = draw->index_buffer_mem; 1259 range[1].size = VK_WHOLE_SIZE; 1260 VK_CHECK(device_data->vtable.FlushMappedMemoryRanges(device_data->device, 2, range)); 1261 device_data->vtable.UnmapMemory(device_data->device, draw->vertex_buffer_mem); 1262 device_data->vtable.UnmapMemory(device_data->device, draw->index_buffer_mem); 1263 1264 /* Bind pipeline and descriptor sets */ 1265 device_data->vtable.CmdBindPipeline(draw->command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, data->pipeline); 1266 VkDescriptorSet desc_set[1] = { data->descriptor_set }; 1267 device_data->vtable.CmdBindDescriptorSets(draw->command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, 1268 data->pipeline_layout, 0, 1, desc_set, 0, NULL); 1269 1270 /* Bind vertex & index buffers */ 1271 VkBuffer vertex_buffers[1] = { draw->vertex_buffer }; 1272 VkDeviceSize vertex_offset[1] = { 0 }; 1273 device_data->vtable.CmdBindVertexBuffers(draw->command_buffer, 0, 1, vertex_buffers, vertex_offset); 1274 device_data->vtable.CmdBindIndexBuffer(draw->command_buffer, draw->index_buffer, 0, VK_INDEX_TYPE_UINT16); 1275 1276 /* Setup viewport */ 1277 VkViewport viewport; 1278 viewport.x = 0; 1279 viewport.y = 0; 1280 viewport.width = draw_data->DisplaySize.x; 1281 viewport.height = draw_data->DisplaySize.y; 1282 viewport.minDepth = 0.0f; 1283 viewport.maxDepth = 1.0f; 1284 device_data->vtable.CmdSetViewport(draw->command_buffer, 0, 1, &viewport); 1285 1286 1287 /* Setup scale and translation through push constants : 1288 * 1289 * Our visible imgui space lies from draw_data->DisplayPos (top left) to 1290 * draw_data->DisplayPos+data_data->DisplaySize (bottom right). DisplayMin 1291 * is typically (0,0) for single viewport apps. 1292 */ 1293 float scale[2]; 1294 scale[0] = 2.0f / draw_data->DisplaySize.x; 1295 scale[1] = 2.0f / draw_data->DisplaySize.y; 1296 float translate[2]; 1297 translate[0] = -1.0f - draw_data->DisplayPos.x * scale[0]; 1298 translate[1] = -1.0f - draw_data->DisplayPos.y * scale[1]; 1299 device_data->vtable.CmdPushConstants(draw->command_buffer, data->pipeline_layout, 1300 VK_SHADER_STAGE_VERTEX_BIT, 1301 sizeof(float) * 0, sizeof(float) * 2, scale); 1302 device_data->vtable.CmdPushConstants(draw->command_buffer, data->pipeline_layout, 1303 VK_SHADER_STAGE_VERTEX_BIT, 1304 sizeof(float) * 2, sizeof(float) * 2, translate); 1305 1306 // Render the command lists: 1307 int vtx_offset = 0; 1308 int idx_offset = 0; 1309 ImVec2 display_pos = draw_data->DisplayPos; 1310 for (int n = 0; n < draw_data->CmdListsCount; n++) 1311 { 1312 const ImDrawList* cmd_list = draw_data->CmdLists[n]; 1313 for (int cmd_i = 0; cmd_i < cmd_list->CmdBuffer.Size; cmd_i++) 1314 { 1315 const ImDrawCmd* pcmd = &cmd_list->CmdBuffer[cmd_i]; 1316 // Apply scissor/clipping rectangle 1317 // FIXME: We could clamp width/height based on clamped min/max values. 1318 VkRect2D scissor; 1319 scissor.offset.x = (int32_t)(pcmd->ClipRect.x - display_pos.x) > 0 ? (int32_t)(pcmd->ClipRect.x - display_pos.x) : 0; 1320 scissor.offset.y = (int32_t)(pcmd->ClipRect.y - display_pos.y) > 0 ? (int32_t)(pcmd->ClipRect.y - display_pos.y) : 0; 1321 scissor.extent.width = (uint32_t)(pcmd->ClipRect.z - pcmd->ClipRect.x); 1322 scissor.extent.height = (uint32_t)(pcmd->ClipRect.w - pcmd->ClipRect.y + 1); // FIXME: Why +1 here? 1323 device_data->vtable.CmdSetScissor(draw->command_buffer, 0, 1, &scissor); 1324 1325 // Draw 1326 device_data->vtable.CmdDrawIndexed(draw->command_buffer, pcmd->ElemCount, 1, idx_offset, vtx_offset, 0); 1327 1328 idx_offset += pcmd->ElemCount; 1329 } 1330 vtx_offset += cmd_list->VtxBuffer.Size; 1331 } 1332 1333 device_data->vtable.CmdEndRenderPass(draw->command_buffer); 1334 1335 if (device_data->graphic_queue->family_index != present_queue->family_index) 1336 { 1337 /* Transfer the image back to the present queue family 1338 * image layout was already changed to present by the render pass 1339 */ 1340 imb.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; 1341 imb.pNext = nullptr; 1342 imb.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; 1343 imb.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; 1344 imb.oldLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; 1345 imb.newLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; 1346 imb.image = data->images[image_index]; 1347 imb.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; 1348 imb.subresourceRange.baseMipLevel = 0; 1349 imb.subresourceRange.levelCount = 1; 1350 imb.subresourceRange.baseArrayLayer = 0; 1351 imb.subresourceRange.layerCount = 1; 1352 imb.srcQueueFamilyIndex = device_data->graphic_queue->family_index; 1353 imb.dstQueueFamilyIndex = present_queue->family_index; 1354 device_data->vtable.CmdPipelineBarrier(draw->command_buffer, 1355 VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, 1356 VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, 1357 0, /* dependency flags */ 1358 0, nullptr, /* memory barriers */ 1359 0, nullptr, /* buffer memory barriers */ 1360 1, &imb); /* image memory barriers */ 1361 } 1362 1363 device_data->vtable.EndCommandBuffer(draw->command_buffer); 1364 1365 /* When presenting on a different queue than where we're drawing the 1366 * overlay *AND* when the application does not provide a semaphore to 1367 * vkQueuePresent, insert our own cross engine synchronization 1368 * semaphore. 1369 */ 1370 if (n_wait_semaphores == 0 && device_data->graphic_queue->queue != present_queue->queue) { 1371 VkPipelineStageFlags stages_wait = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; 1372 VkSubmitInfo submit_info = {}; 1373 submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; 1374 submit_info.commandBufferCount = 0; 1375 submit_info.pWaitDstStageMask = &stages_wait; 1376 submit_info.waitSemaphoreCount = 0; 1377 submit_info.signalSemaphoreCount = 1; 1378 submit_info.pSignalSemaphores = &draw->cross_engine_semaphore; 1379 1380 device_data->vtable.QueueSubmit(present_queue->queue, 1, &submit_info, VK_NULL_HANDLE); 1381 1382 submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; 1383 submit_info.commandBufferCount = 1; 1384 submit_info.pWaitDstStageMask = &stages_wait; 1385 submit_info.pCommandBuffers = &draw->command_buffer; 1386 submit_info.waitSemaphoreCount = 1; 1387 submit_info.pWaitSemaphores = &draw->cross_engine_semaphore; 1388 submit_info.signalSemaphoreCount = 1; 1389 submit_info.pSignalSemaphores = &draw->semaphore; 1390 1391 device_data->vtable.QueueSubmit(device_data->graphic_queue->queue, 1, &submit_info, draw->fence); 1392 } else { 1393 VkPipelineStageFlags *stages_wait = (VkPipelineStageFlags*) malloc(sizeof(VkPipelineStageFlags) * n_wait_semaphores); 1394 for (unsigned i = 0; i < n_wait_semaphores; i++) 1395 { 1396 // wait in the fragment stage until the swapchain image is ready 1397 stages_wait[i] = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; 1398 } 1399 1400 VkSubmitInfo submit_info = {}; 1401 submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; 1402 submit_info.commandBufferCount = 1; 1403 submit_info.pCommandBuffers = &draw->command_buffer; 1404 submit_info.pWaitDstStageMask = stages_wait; 1405 submit_info.waitSemaphoreCount = n_wait_semaphores; 1406 submit_info.pWaitSemaphores = wait_semaphores; 1407 submit_info.signalSemaphoreCount = 1; 1408 submit_info.pSignalSemaphores = &draw->semaphore; 1409 1410 device_data->vtable.QueueSubmit(device_data->graphic_queue->queue, 1, &submit_info, draw->fence); 1411 1412 free(stages_wait); 1413 } 1414 1415 return draw; 1416} 1417 1418static const uint32_t overlay_vert_spv[] = { 1419#include "overlay.vert.spv.h" 1420}; 1421static const uint32_t overlay_frag_spv[] = { 1422#include "overlay.frag.spv.h" 1423}; 1424 1425static void setup_swapchain_data_pipeline(struct swapchain_data *data) 1426{ 1427 struct device_data *device_data = data->device; 1428 VkShaderModule vert_module, frag_module; 1429 1430 /* Create shader modules */ 1431 VkShaderModuleCreateInfo vert_info = {}; 1432 vert_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; 1433 vert_info.codeSize = sizeof(overlay_vert_spv); 1434 vert_info.pCode = overlay_vert_spv; 1435 VK_CHECK(device_data->vtable.CreateShaderModule(device_data->device, 1436 &vert_info, NULL, &vert_module)); 1437 VkShaderModuleCreateInfo frag_info = {}; 1438 frag_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; 1439 frag_info.codeSize = sizeof(overlay_frag_spv); 1440 frag_info.pCode = (uint32_t*)overlay_frag_spv; 1441 VK_CHECK(device_data->vtable.CreateShaderModule(device_data->device, 1442 &frag_info, NULL, &frag_module)); 1443 1444 /* Font sampler */ 1445 VkSamplerCreateInfo sampler_info = {}; 1446 sampler_info.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; 1447 sampler_info.magFilter = VK_FILTER_LINEAR; 1448 sampler_info.minFilter = VK_FILTER_LINEAR; 1449 sampler_info.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR; 1450 sampler_info.addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT; 1451 sampler_info.addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT; 1452 sampler_info.addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT; 1453 sampler_info.minLod = -1000; 1454 sampler_info.maxLod = 1000; 1455 sampler_info.maxAnisotropy = 1.0f; 1456 VK_CHECK(device_data->vtable.CreateSampler(device_data->device, &sampler_info, 1457 NULL, &data->font_sampler)); 1458 1459 /* Descriptor pool */ 1460 VkDescriptorPoolSize sampler_pool_size = {}; 1461 sampler_pool_size.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; 1462 sampler_pool_size.descriptorCount = 1; 1463 VkDescriptorPoolCreateInfo desc_pool_info = {}; 1464 desc_pool_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; 1465 desc_pool_info.maxSets = 1; 1466 desc_pool_info.poolSizeCount = 1; 1467 desc_pool_info.pPoolSizes = &sampler_pool_size; 1468 VK_CHECK(device_data->vtable.CreateDescriptorPool(device_data->device, 1469 &desc_pool_info, 1470 NULL, &data->descriptor_pool)); 1471 1472 /* Descriptor layout */ 1473 VkSampler sampler[1] = { data->font_sampler }; 1474 VkDescriptorSetLayoutBinding binding[1] = {}; 1475 binding[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; 1476 binding[0].descriptorCount = 1; 1477 binding[0].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; 1478 binding[0].pImmutableSamplers = sampler; 1479 VkDescriptorSetLayoutCreateInfo set_layout_info = {}; 1480 set_layout_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; 1481 set_layout_info.bindingCount = 1; 1482 set_layout_info.pBindings = binding; 1483 VK_CHECK(device_data->vtable.CreateDescriptorSetLayout(device_data->device, 1484 &set_layout_info, 1485 NULL, &data->descriptor_layout)); 1486 1487 /* Descriptor set */ 1488 VkDescriptorSetAllocateInfo alloc_info = {}; 1489 alloc_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; 1490 alloc_info.descriptorPool = data->descriptor_pool; 1491 alloc_info.descriptorSetCount = 1; 1492 alloc_info.pSetLayouts = &data->descriptor_layout; 1493 VK_CHECK(device_data->vtable.AllocateDescriptorSets(device_data->device, 1494 &alloc_info, 1495 &data->descriptor_set)); 1496 1497 /* Constants: we are using 'vec2 offset' and 'vec2 scale' instead of a full 1498 * 3d projection matrix 1499 */ 1500 VkPushConstantRange push_constants[1] = {}; 1501 push_constants[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; 1502 push_constants[0].offset = sizeof(float) * 0; 1503 push_constants[0].size = sizeof(float) * 4; 1504 VkPipelineLayoutCreateInfo layout_info = {}; 1505 layout_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; 1506 layout_info.setLayoutCount = 1; 1507 layout_info.pSetLayouts = &data->descriptor_layout; 1508 layout_info.pushConstantRangeCount = 1; 1509 layout_info.pPushConstantRanges = push_constants; 1510 VK_CHECK(device_data->vtable.CreatePipelineLayout(device_data->device, 1511 &layout_info, 1512 NULL, &data->pipeline_layout)); 1513 1514 VkPipelineShaderStageCreateInfo stage[2] = {}; 1515 stage[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; 1516 stage[0].stage = VK_SHADER_STAGE_VERTEX_BIT; 1517 stage[0].module = vert_module; 1518 stage[0].pName = "main"; 1519 stage[1].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; 1520 stage[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT; 1521 stage[1].module = frag_module; 1522 stage[1].pName = "main"; 1523 1524 VkVertexInputBindingDescription binding_desc[1] = {}; 1525 binding_desc[0].stride = sizeof(ImDrawVert); 1526 binding_desc[0].inputRate = VK_VERTEX_INPUT_RATE_VERTEX; 1527 1528 VkVertexInputAttributeDescription attribute_desc[3] = {}; 1529 attribute_desc[0].location = 0; 1530 attribute_desc[0].binding = binding_desc[0].binding; 1531 attribute_desc[0].format = VK_FORMAT_R32G32_SFLOAT; 1532 attribute_desc[0].offset = IM_OFFSETOF(ImDrawVert, pos); 1533 attribute_desc[1].location = 1; 1534 attribute_desc[1].binding = binding_desc[0].binding; 1535 attribute_desc[1].format = VK_FORMAT_R32G32_SFLOAT; 1536 attribute_desc[1].offset = IM_OFFSETOF(ImDrawVert, uv); 1537 attribute_desc[2].location = 2; 1538 attribute_desc[2].binding = binding_desc[0].binding; 1539 attribute_desc[2].format = VK_FORMAT_R8G8B8A8_UNORM; 1540 attribute_desc[2].offset = IM_OFFSETOF(ImDrawVert, col); 1541 1542 VkPipelineVertexInputStateCreateInfo vertex_info = {}; 1543 vertex_info.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; 1544 vertex_info.vertexBindingDescriptionCount = 1; 1545 vertex_info.pVertexBindingDescriptions = binding_desc; 1546 vertex_info.vertexAttributeDescriptionCount = 3; 1547 vertex_info.pVertexAttributeDescriptions = attribute_desc; 1548 1549 VkPipelineInputAssemblyStateCreateInfo ia_info = {}; 1550 ia_info.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; 1551 ia_info.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; 1552 1553 VkPipelineViewportStateCreateInfo viewport_info = {}; 1554 viewport_info.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; 1555 viewport_info.viewportCount = 1; 1556 viewport_info.scissorCount = 1; 1557 1558 VkPipelineRasterizationStateCreateInfo raster_info = {}; 1559 raster_info.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; 1560 raster_info.polygonMode = VK_POLYGON_MODE_FILL; 1561 raster_info.cullMode = VK_CULL_MODE_NONE; 1562 raster_info.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE; 1563 raster_info.lineWidth = 1.0f; 1564 1565 VkPipelineMultisampleStateCreateInfo ms_info = {}; 1566 ms_info.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; 1567 ms_info.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; 1568 1569 VkPipelineColorBlendAttachmentState color_attachment[1] = {}; 1570 color_attachment[0].blendEnable = VK_TRUE; 1571 color_attachment[0].srcColorBlendFactor = VK_BLEND_FACTOR_SRC_ALPHA; 1572 color_attachment[0].dstColorBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; 1573 color_attachment[0].colorBlendOp = VK_BLEND_OP_ADD; 1574 color_attachment[0].srcAlphaBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; 1575 color_attachment[0].dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO; 1576 color_attachment[0].alphaBlendOp = VK_BLEND_OP_ADD; 1577 color_attachment[0].colorWriteMask = VK_COLOR_COMPONENT_R_BIT | 1578 VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT; 1579 1580 VkPipelineDepthStencilStateCreateInfo depth_info = {}; 1581 depth_info.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; 1582 1583 VkPipelineColorBlendStateCreateInfo blend_info = {}; 1584 blend_info.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; 1585 blend_info.attachmentCount = 1; 1586 blend_info.pAttachments = color_attachment; 1587 1588 VkDynamicState dynamic_states[2] = { VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR }; 1589 VkPipelineDynamicStateCreateInfo dynamic_state = {}; 1590 dynamic_state.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; 1591 dynamic_state.dynamicStateCount = (uint32_t)IM_ARRAYSIZE(dynamic_states); 1592 dynamic_state.pDynamicStates = dynamic_states; 1593 1594 VkGraphicsPipelineCreateInfo info = {}; 1595 info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; 1596 info.flags = 0; 1597 info.stageCount = 2; 1598 info.pStages = stage; 1599 info.pVertexInputState = &vertex_info; 1600 info.pInputAssemblyState = &ia_info; 1601 info.pViewportState = &viewport_info; 1602 info.pRasterizationState = &raster_info; 1603 info.pMultisampleState = &ms_info; 1604 info.pDepthStencilState = &depth_info; 1605 info.pColorBlendState = &blend_info; 1606 info.pDynamicState = &dynamic_state; 1607 info.layout = data->pipeline_layout; 1608 info.renderPass = data->render_pass; 1609 VK_CHECK( 1610 device_data->vtable.CreateGraphicsPipelines(device_data->device, VK_NULL_HANDLE, 1611 1, &info, 1612 NULL, &data->pipeline)); 1613 1614 device_data->vtable.DestroyShaderModule(device_data->device, vert_module, NULL); 1615 device_data->vtable.DestroyShaderModule(device_data->device, frag_module, NULL); 1616 1617 ImGuiIO& io = ImGui::GetIO(); 1618 unsigned char* pixels; 1619 int width, height; 1620 io.Fonts->GetTexDataAsRGBA32(&pixels, &width, &height); 1621 1622 /* Font image */ 1623 VkImageCreateInfo image_info = {}; 1624 image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; 1625 image_info.imageType = VK_IMAGE_TYPE_2D; 1626 image_info.format = VK_FORMAT_R8G8B8A8_UNORM; 1627 image_info.extent.width = width; 1628 image_info.extent.height = height; 1629 image_info.extent.depth = 1; 1630 image_info.mipLevels = 1; 1631 image_info.arrayLayers = 1; 1632 image_info.samples = VK_SAMPLE_COUNT_1_BIT; 1633 image_info.tiling = VK_IMAGE_TILING_OPTIMAL; 1634 image_info.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT; 1635 image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; 1636 image_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; 1637 VK_CHECK(device_data->vtable.CreateImage(device_data->device, &image_info, 1638 NULL, &data->font_image)); 1639 VkMemoryRequirements font_image_req; 1640 device_data->vtable.GetImageMemoryRequirements(device_data->device, 1641 data->font_image, &font_image_req); 1642 VkMemoryAllocateInfo image_alloc_info = {}; 1643 image_alloc_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; 1644 image_alloc_info.allocationSize = font_image_req.size; 1645 image_alloc_info.memoryTypeIndex = vk_memory_type(device_data, 1646 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, 1647 font_image_req.memoryTypeBits); 1648 VK_CHECK(device_data->vtable.AllocateMemory(device_data->device, &image_alloc_info, 1649 NULL, &data->font_mem)); 1650 VK_CHECK(device_data->vtable.BindImageMemory(device_data->device, 1651 data->font_image, 1652 data->font_mem, 0)); 1653 1654 /* Font image view */ 1655 VkImageViewCreateInfo view_info = {}; 1656 view_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; 1657 view_info.image = data->font_image; 1658 view_info.viewType = VK_IMAGE_VIEW_TYPE_2D; 1659 view_info.format = VK_FORMAT_R8G8B8A8_UNORM; 1660 view_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; 1661 view_info.subresourceRange.levelCount = 1; 1662 view_info.subresourceRange.layerCount = 1; 1663 VK_CHECK(device_data->vtable.CreateImageView(device_data->device, &view_info, 1664 NULL, &data->font_image_view)); 1665 1666 /* Descriptor set */ 1667 VkDescriptorImageInfo desc_image[1] = {}; 1668 desc_image[0].sampler = data->font_sampler; 1669 desc_image[0].imageView = data->font_image_view; 1670 desc_image[0].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; 1671 VkWriteDescriptorSet write_desc[1] = {}; 1672 write_desc[0].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; 1673 write_desc[0].dstSet = data->descriptor_set; 1674 write_desc[0].descriptorCount = 1; 1675 write_desc[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; 1676 write_desc[0].pImageInfo = desc_image; 1677 device_data->vtable.UpdateDescriptorSets(device_data->device, 1, write_desc, 0, NULL); 1678} 1679 1680static void setup_swapchain_data(struct swapchain_data *data, 1681 const VkSwapchainCreateInfoKHR *pCreateInfo) 1682{ 1683 data->width = pCreateInfo->imageExtent.width; 1684 data->height = pCreateInfo->imageExtent.height; 1685 data->format = pCreateInfo->imageFormat; 1686 1687 data->imgui_context = ImGui::CreateContext(); 1688 ImGui::SetCurrentContext(data->imgui_context); 1689 1690 ImGui::GetIO().IniFilename = NULL; 1691 ImGui::GetIO().DisplaySize = ImVec2((float)data->width, (float)data->height); 1692 1693 struct device_data *device_data = data->device; 1694 1695 /* Render pass */ 1696 VkAttachmentDescription attachment_desc = {}; 1697 attachment_desc.format = pCreateInfo->imageFormat; 1698 attachment_desc.samples = VK_SAMPLE_COUNT_1_BIT; 1699 attachment_desc.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; 1700 attachment_desc.storeOp = VK_ATTACHMENT_STORE_OP_STORE; 1701 attachment_desc.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; 1702 attachment_desc.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; 1703 attachment_desc.initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; 1704 attachment_desc.finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; 1705 VkAttachmentReference color_attachment = {}; 1706 color_attachment.attachment = 0; 1707 color_attachment.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; 1708 VkSubpassDescription subpass = {}; 1709 subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; 1710 subpass.colorAttachmentCount = 1; 1711 subpass.pColorAttachments = &color_attachment; 1712 VkSubpassDependency dependency = {}; 1713 dependency.srcSubpass = VK_SUBPASS_EXTERNAL; 1714 dependency.dstSubpass = 0; 1715 dependency.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; 1716 dependency.dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; 1717 dependency.srcAccessMask = 0; 1718 dependency.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; 1719 VkRenderPassCreateInfo render_pass_info = {}; 1720 render_pass_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; 1721 render_pass_info.attachmentCount = 1; 1722 render_pass_info.pAttachments = &attachment_desc; 1723 render_pass_info.subpassCount = 1; 1724 render_pass_info.pSubpasses = &subpass; 1725 render_pass_info.dependencyCount = 1; 1726 render_pass_info.pDependencies = &dependency; 1727 VK_CHECK(device_data->vtable.CreateRenderPass(device_data->device, 1728 &render_pass_info, 1729 NULL, &data->render_pass)); 1730 1731 setup_swapchain_data_pipeline(data); 1732 1733 VK_CHECK(device_data->vtable.GetSwapchainImagesKHR(device_data->device, 1734 data->swapchain, 1735 &data->n_images, 1736 NULL)); 1737 1738 data->images = ralloc_array(data, VkImage, data->n_images); 1739 data->image_views = ralloc_array(data, VkImageView, data->n_images); 1740 data->framebuffers = ralloc_array(data, VkFramebuffer, data->n_images); 1741 1742 VK_CHECK(device_data->vtable.GetSwapchainImagesKHR(device_data->device, 1743 data->swapchain, 1744 &data->n_images, 1745 data->images)); 1746 1747 /* Image views */ 1748 VkImageViewCreateInfo view_info = {}; 1749 view_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; 1750 view_info.viewType = VK_IMAGE_VIEW_TYPE_2D; 1751 view_info.format = pCreateInfo->imageFormat; 1752 view_info.components.r = VK_COMPONENT_SWIZZLE_R; 1753 view_info.components.g = VK_COMPONENT_SWIZZLE_G; 1754 view_info.components.b = VK_COMPONENT_SWIZZLE_B; 1755 view_info.components.a = VK_COMPONENT_SWIZZLE_A; 1756 view_info.subresourceRange = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 }; 1757 for (uint32_t i = 0; i < data->n_images; i++) { 1758 view_info.image = data->images[i]; 1759 VK_CHECK(device_data->vtable.CreateImageView(device_data->device, 1760 &view_info, NULL, 1761 &data->image_views[i])); 1762 } 1763 1764 /* Framebuffers */ 1765 VkImageView attachment[1]; 1766 VkFramebufferCreateInfo fb_info = {}; 1767 fb_info.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; 1768 fb_info.renderPass = data->render_pass; 1769 fb_info.attachmentCount = 1; 1770 fb_info.pAttachments = attachment; 1771 fb_info.width = data->width; 1772 fb_info.height = data->height; 1773 fb_info.layers = 1; 1774 for (uint32_t i = 0; i < data->n_images; i++) { 1775 attachment[0] = data->image_views[i]; 1776 VK_CHECK(device_data->vtable.CreateFramebuffer(device_data->device, &fb_info, 1777 NULL, &data->framebuffers[i])); 1778 } 1779 1780 /* Command buffer pool */ 1781 VkCommandPoolCreateInfo cmd_buffer_pool_info = {}; 1782 cmd_buffer_pool_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; 1783 cmd_buffer_pool_info.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; 1784 cmd_buffer_pool_info.queueFamilyIndex = device_data->graphic_queue->family_index; 1785 VK_CHECK(device_data->vtable.CreateCommandPool(device_data->device, 1786 &cmd_buffer_pool_info, 1787 NULL, &data->command_pool)); 1788} 1789 1790static void shutdown_swapchain_data(struct swapchain_data *data) 1791{ 1792 struct device_data *device_data = data->device; 1793 1794 list_for_each_entry_safe(struct overlay_draw, draw, &data->draws, link) { 1795 device_data->vtable.DestroySemaphore(device_data->device, draw->cross_engine_semaphore, NULL); 1796 device_data->vtable.DestroySemaphore(device_data->device, draw->semaphore, NULL); 1797 device_data->vtable.DestroyFence(device_data->device, draw->fence, NULL); 1798 device_data->vtable.DestroyBuffer(device_data->device, draw->vertex_buffer, NULL); 1799 device_data->vtable.DestroyBuffer(device_data->device, draw->index_buffer, NULL); 1800 device_data->vtable.FreeMemory(device_data->device, draw->vertex_buffer_mem, NULL); 1801 device_data->vtable.FreeMemory(device_data->device, draw->index_buffer_mem, NULL); 1802 } 1803 1804 for (uint32_t i = 0; i < data->n_images; i++) { 1805 device_data->vtable.DestroyImageView(device_data->device, data->image_views[i], NULL); 1806 device_data->vtable.DestroyFramebuffer(device_data->device, data->framebuffers[i], NULL); 1807 } 1808 1809 device_data->vtable.DestroyRenderPass(device_data->device, data->render_pass, NULL); 1810 1811 device_data->vtable.DestroyCommandPool(device_data->device, data->command_pool, NULL); 1812 1813 device_data->vtable.DestroyPipeline(device_data->device, data->pipeline, NULL); 1814 device_data->vtable.DestroyPipelineLayout(device_data->device, data->pipeline_layout, NULL); 1815 1816 device_data->vtable.DestroyDescriptorPool(device_data->device, 1817 data->descriptor_pool, NULL); 1818 device_data->vtable.DestroyDescriptorSetLayout(device_data->device, 1819 data->descriptor_layout, NULL); 1820 1821 device_data->vtable.DestroySampler(device_data->device, data->font_sampler, NULL); 1822 device_data->vtable.DestroyImageView(device_data->device, data->font_image_view, NULL); 1823 device_data->vtable.DestroyImage(device_data->device, data->font_image, NULL); 1824 device_data->vtable.FreeMemory(device_data->device, data->font_mem, NULL); 1825 1826 device_data->vtable.DestroyBuffer(device_data->device, data->upload_font_buffer, NULL); 1827 device_data->vtable.FreeMemory(device_data->device, data->upload_font_buffer_mem, NULL); 1828 1829 ImGui::DestroyContext(data->imgui_context); 1830} 1831 1832static struct overlay_draw *before_present(struct swapchain_data *swapchain_data, 1833 struct queue_data *present_queue, 1834 const VkSemaphore *wait_semaphores, 1835 unsigned n_wait_semaphores, 1836 unsigned imageIndex) 1837{ 1838 struct instance_data *instance_data = swapchain_data->device->instance; 1839 struct overlay_draw *draw = NULL; 1840 1841 snapshot_swapchain_frame(swapchain_data); 1842 1843 if (!instance_data->params.no_display && swapchain_data->n_frames > 0) { 1844 compute_swapchain_display(swapchain_data); 1845 draw = render_swapchain_display(swapchain_data, present_queue, 1846 wait_semaphores, n_wait_semaphores, 1847 imageIndex); 1848 } 1849 1850 return draw; 1851} 1852 1853static VkResult overlay_CreateSwapchainKHR( 1854 VkDevice device, 1855 const VkSwapchainCreateInfoKHR* pCreateInfo, 1856 const VkAllocationCallbacks* pAllocator, 1857 VkSwapchainKHR* pSwapchain) 1858{ 1859 struct device_data *device_data = FIND(struct device_data, device); 1860 VkResult result = device_data->vtable.CreateSwapchainKHR(device, pCreateInfo, pAllocator, pSwapchain); 1861 if (result != VK_SUCCESS) return result; 1862 1863 struct swapchain_data *swapchain_data = new_swapchain_data(*pSwapchain, device_data); 1864 setup_swapchain_data(swapchain_data, pCreateInfo); 1865 return result; 1866} 1867 1868static void overlay_DestroySwapchainKHR( 1869 VkDevice device, 1870 VkSwapchainKHR swapchain, 1871 const VkAllocationCallbacks* pAllocator) 1872{ 1873 if (swapchain == VK_NULL_HANDLE) { 1874 struct device_data *device_data = FIND(struct device_data, device); 1875 device_data->vtable.DestroySwapchainKHR(device, swapchain, pAllocator); 1876 return; 1877 } 1878 1879 struct swapchain_data *swapchain_data = 1880 FIND(struct swapchain_data, swapchain); 1881 1882 shutdown_swapchain_data(swapchain_data); 1883 swapchain_data->device->vtable.DestroySwapchainKHR(device, swapchain, pAllocator); 1884 destroy_swapchain_data(swapchain_data); 1885} 1886 1887static VkResult overlay_QueuePresentKHR( 1888 VkQueue queue, 1889 const VkPresentInfoKHR* pPresentInfo) 1890{ 1891 struct queue_data *queue_data = FIND(struct queue_data, queue); 1892 struct device_data *device_data = queue_data->device; 1893 struct instance_data *instance_data = device_data->instance; 1894 uint32_t query_results[OVERLAY_QUERY_COUNT]; 1895 1896 device_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_frame]++; 1897 1898 if (list_length(&queue_data->running_command_buffer) > 0) { 1899 /* Before getting the query results, make sure the operations have 1900 * completed. 1901 */ 1902 VK_CHECK(device_data->vtable.ResetFences(device_data->device, 1903 1, &queue_data->queries_fence)); 1904 VK_CHECK(device_data->vtable.QueueSubmit(queue, 0, NULL, queue_data->queries_fence)); 1905 VK_CHECK(device_data->vtable.WaitForFences(device_data->device, 1906 1, &queue_data->queries_fence, 1907 VK_FALSE, UINT64_MAX)); 1908 1909 /* Now get the results. */ 1910 list_for_each_entry_safe(struct command_buffer_data, cmd_buffer_data, 1911 &queue_data->running_command_buffer, link) { 1912 list_delinit(&cmd_buffer_data->link); 1913 1914 if (cmd_buffer_data->pipeline_query_pool) { 1915 memset(query_results, 0, sizeof(query_results)); 1916 VK_CHECK(device_data->vtable.GetQueryPoolResults(device_data->device, 1917 cmd_buffer_data->pipeline_query_pool, 1918 cmd_buffer_data->query_index, 1, 1919 sizeof(uint32_t) * OVERLAY_QUERY_COUNT, 1920 query_results, 0, VK_QUERY_RESULT_WAIT_BIT)); 1921 1922 for (uint32_t i = OVERLAY_PARAM_ENABLED_vertices; 1923 i <= OVERLAY_PARAM_ENABLED_compute_invocations; i++) { 1924 device_data->frame_stats.stats[i] += query_results[i - OVERLAY_PARAM_ENABLED_vertices]; 1925 } 1926 } 1927 if (cmd_buffer_data->timestamp_query_pool) { 1928 uint64_t gpu_timestamps[2] = { 0 }; 1929 VK_CHECK(device_data->vtable.GetQueryPoolResults(device_data->device, 1930 cmd_buffer_data->timestamp_query_pool, 1931 cmd_buffer_data->query_index * 2, 2, 1932 2 * sizeof(uint64_t), gpu_timestamps, sizeof(uint64_t), 1933 VK_QUERY_RESULT_WAIT_BIT | VK_QUERY_RESULT_64_BIT)); 1934 1935 gpu_timestamps[0] &= queue_data->timestamp_mask; 1936 gpu_timestamps[1] &= queue_data->timestamp_mask; 1937 device_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_gpu_timing] += 1938 (gpu_timestamps[1] - gpu_timestamps[0]) * 1939 device_data->properties.limits.timestampPeriod; 1940 } 1941 } 1942 } 1943 1944 /* Otherwise we need to add our overlay drawing semaphore to the list of 1945 * semaphores to wait on. If we don't do that the presented picture might 1946 * be have incomplete overlay drawings. 1947 */ 1948 VkResult result = VK_SUCCESS; 1949 if (instance_data->params.no_display) { 1950 for (uint32_t i = 0; i < pPresentInfo->swapchainCount; i++) { 1951 VkSwapchainKHR swapchain = pPresentInfo->pSwapchains[i]; 1952 struct swapchain_data *swapchain_data = 1953 FIND(struct swapchain_data, swapchain); 1954 1955 uint32_t image_index = pPresentInfo->pImageIndices[i]; 1956 1957 before_present(swapchain_data, 1958 queue_data, 1959 pPresentInfo->pWaitSemaphores, 1960 pPresentInfo->waitSemaphoreCount, 1961 image_index); 1962 1963 VkPresentInfoKHR present_info = *pPresentInfo; 1964 present_info.swapchainCount = 1; 1965 present_info.pSwapchains = &swapchain; 1966 present_info.pImageIndices = &image_index; 1967 1968 uint64_t ts0 = os_time_get(); 1969 result = queue_data->device->vtable.QueuePresentKHR(queue, &present_info); 1970 uint64_t ts1 = os_time_get(); 1971 swapchain_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_present_timing] += ts1 - ts0; 1972 } 1973 } else { 1974 for (uint32_t i = 0; i < pPresentInfo->swapchainCount; i++) { 1975 VkSwapchainKHR swapchain = pPresentInfo->pSwapchains[i]; 1976 struct swapchain_data *swapchain_data = 1977 FIND(struct swapchain_data, swapchain); 1978 1979 uint32_t image_index = pPresentInfo->pImageIndices[i]; 1980 1981 VkPresentInfoKHR present_info = *pPresentInfo; 1982 present_info.swapchainCount = 1; 1983 present_info.pSwapchains = &swapchain; 1984 present_info.pImageIndices = &image_index; 1985 1986 struct overlay_draw *draw = before_present(swapchain_data, 1987 queue_data, 1988 pPresentInfo->pWaitSemaphores, 1989 pPresentInfo->waitSemaphoreCount, 1990 image_index); 1991 1992 /* Because the submission of the overlay draw waits on the semaphores 1993 * handed for present, we don't need to have this present operation 1994 * wait on them as well, we can just wait on the overlay submission 1995 * semaphore. 1996 */ 1997 present_info.pWaitSemaphores = &draw->semaphore; 1998 present_info.waitSemaphoreCount = 1; 1999 2000 uint64_t ts0 = os_time_get(); 2001 VkResult chain_result = queue_data->device->vtable.QueuePresentKHR(queue, &present_info); 2002 uint64_t ts1 = os_time_get(); 2003 swapchain_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_present_timing] += ts1 - ts0; 2004 if (pPresentInfo->pResults) 2005 pPresentInfo->pResults[i] = chain_result; 2006 if (chain_result != VK_SUCCESS && result == VK_SUCCESS) 2007 result = chain_result; 2008 } 2009 } 2010 return result; 2011} 2012 2013static VkResult overlay_AcquireNextImageKHR( 2014 VkDevice device, 2015 VkSwapchainKHR swapchain, 2016 uint64_t timeout, 2017 VkSemaphore semaphore, 2018 VkFence fence, 2019 uint32_t* pImageIndex) 2020{ 2021 struct swapchain_data *swapchain_data = 2022 FIND(struct swapchain_data, swapchain); 2023 struct device_data *device_data = swapchain_data->device; 2024 2025 uint64_t ts0 = os_time_get(); 2026 VkResult result = device_data->vtable.AcquireNextImageKHR(device, swapchain, timeout, 2027 semaphore, fence, pImageIndex); 2028 uint64_t ts1 = os_time_get(); 2029 2030 swapchain_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_acquire_timing] += ts1 - ts0; 2031 swapchain_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_acquire]++; 2032 2033 return result; 2034} 2035 2036static VkResult overlay_AcquireNextImage2KHR( 2037 VkDevice device, 2038 const VkAcquireNextImageInfoKHR* pAcquireInfo, 2039 uint32_t* pImageIndex) 2040{ 2041 struct swapchain_data *swapchain_data = 2042 FIND(struct swapchain_data, pAcquireInfo->swapchain); 2043 struct device_data *device_data = swapchain_data->device; 2044 2045 uint64_t ts0 = os_time_get(); 2046 VkResult result = device_data->vtable.AcquireNextImage2KHR(device, pAcquireInfo, pImageIndex); 2047 uint64_t ts1 = os_time_get(); 2048 2049 swapchain_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_acquire_timing] += ts1 - ts0; 2050 swapchain_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_acquire]++; 2051 2052 return result; 2053} 2054 2055static void overlay_CmdDraw( 2056 VkCommandBuffer commandBuffer, 2057 uint32_t vertexCount, 2058 uint32_t instanceCount, 2059 uint32_t firstVertex, 2060 uint32_t firstInstance) 2061{ 2062 struct command_buffer_data *cmd_buffer_data = 2063 FIND(struct command_buffer_data, commandBuffer); 2064 cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_draw]++; 2065 struct device_data *device_data = cmd_buffer_data->device; 2066 device_data->vtable.CmdDraw(commandBuffer, vertexCount, instanceCount, 2067 firstVertex, firstInstance); 2068} 2069 2070static void overlay_CmdDrawIndexed( 2071 VkCommandBuffer commandBuffer, 2072 uint32_t indexCount, 2073 uint32_t instanceCount, 2074 uint32_t firstIndex, 2075 int32_t vertexOffset, 2076 uint32_t firstInstance) 2077{ 2078 struct command_buffer_data *cmd_buffer_data = 2079 FIND(struct command_buffer_data, commandBuffer); 2080 cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_draw_indexed]++; 2081 struct device_data *device_data = cmd_buffer_data->device; 2082 device_data->vtable.CmdDrawIndexed(commandBuffer, indexCount, instanceCount, 2083 firstIndex, vertexOffset, firstInstance); 2084} 2085 2086static void overlay_CmdDrawIndirect( 2087 VkCommandBuffer commandBuffer, 2088 VkBuffer buffer, 2089 VkDeviceSize offset, 2090 uint32_t drawCount, 2091 uint32_t stride) 2092{ 2093 struct command_buffer_data *cmd_buffer_data = 2094 FIND(struct command_buffer_data, commandBuffer); 2095 cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_draw_indirect]++; 2096 struct device_data *device_data = cmd_buffer_data->device; 2097 device_data->vtable.CmdDrawIndirect(commandBuffer, buffer, offset, drawCount, stride); 2098} 2099 2100static void overlay_CmdDrawIndexedIndirect( 2101 VkCommandBuffer commandBuffer, 2102 VkBuffer buffer, 2103 VkDeviceSize offset, 2104 uint32_t drawCount, 2105 uint32_t stride) 2106{ 2107 struct command_buffer_data *cmd_buffer_data = 2108 FIND(struct command_buffer_data, commandBuffer); 2109 cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_draw_indexed_indirect]++; 2110 struct device_data *device_data = cmd_buffer_data->device; 2111 device_data->vtable.CmdDrawIndexedIndirect(commandBuffer, buffer, offset, drawCount, stride); 2112} 2113 2114static void overlay_CmdDrawIndirectCount( 2115 VkCommandBuffer commandBuffer, 2116 VkBuffer buffer, 2117 VkDeviceSize offset, 2118 VkBuffer countBuffer, 2119 VkDeviceSize countBufferOffset, 2120 uint32_t maxDrawCount, 2121 uint32_t stride) 2122{ 2123 struct command_buffer_data *cmd_buffer_data = 2124 FIND(struct command_buffer_data, commandBuffer); 2125 cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_draw_indirect_count]++; 2126 struct device_data *device_data = cmd_buffer_data->device; 2127 device_data->vtable.CmdDrawIndirectCount(commandBuffer, buffer, offset, 2128 countBuffer, countBufferOffset, 2129 maxDrawCount, stride); 2130} 2131 2132static void overlay_CmdDrawIndexedIndirectCount( 2133 VkCommandBuffer commandBuffer, 2134 VkBuffer buffer, 2135 VkDeviceSize offset, 2136 VkBuffer countBuffer, 2137 VkDeviceSize countBufferOffset, 2138 uint32_t maxDrawCount, 2139 uint32_t stride) 2140{ 2141 struct command_buffer_data *cmd_buffer_data = 2142 FIND(struct command_buffer_data, commandBuffer); 2143 cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_draw_indexed_indirect_count]++; 2144 struct device_data *device_data = cmd_buffer_data->device; 2145 device_data->vtable.CmdDrawIndexedIndirectCount(commandBuffer, buffer, offset, 2146 countBuffer, countBufferOffset, 2147 maxDrawCount, stride); 2148} 2149 2150static void overlay_CmdDispatch( 2151 VkCommandBuffer commandBuffer, 2152 uint32_t groupCountX, 2153 uint32_t groupCountY, 2154 uint32_t groupCountZ) 2155{ 2156 struct command_buffer_data *cmd_buffer_data = 2157 FIND(struct command_buffer_data, commandBuffer); 2158 cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_dispatch]++; 2159 struct device_data *device_data = cmd_buffer_data->device; 2160 device_data->vtable.CmdDispatch(commandBuffer, groupCountX, groupCountY, groupCountZ); 2161} 2162 2163static void overlay_CmdDispatchIndirect( 2164 VkCommandBuffer commandBuffer, 2165 VkBuffer buffer, 2166 VkDeviceSize offset) 2167{ 2168 struct command_buffer_data *cmd_buffer_data = 2169 FIND(struct command_buffer_data, commandBuffer); 2170 cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_dispatch_indirect]++; 2171 struct device_data *device_data = cmd_buffer_data->device; 2172 device_data->vtable.CmdDispatchIndirect(commandBuffer, buffer, offset); 2173} 2174 2175static void overlay_CmdBindPipeline( 2176 VkCommandBuffer commandBuffer, 2177 VkPipelineBindPoint pipelineBindPoint, 2178 VkPipeline pipeline) 2179{ 2180 struct command_buffer_data *cmd_buffer_data = 2181 FIND(struct command_buffer_data, commandBuffer); 2182 switch (pipelineBindPoint) { 2183 case VK_PIPELINE_BIND_POINT_GRAPHICS: cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_pipeline_graphics]++; break; 2184 case VK_PIPELINE_BIND_POINT_COMPUTE: cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_pipeline_compute]++; break; 2185 case VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR: cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_pipeline_raytracing]++; break; 2186 default: break; 2187 } 2188 struct device_data *device_data = cmd_buffer_data->device; 2189 device_data->vtable.CmdBindPipeline(commandBuffer, pipelineBindPoint, pipeline); 2190} 2191 2192static VkResult overlay_BeginCommandBuffer( 2193 VkCommandBuffer commandBuffer, 2194 const VkCommandBufferBeginInfo* pBeginInfo) 2195{ 2196 struct command_buffer_data *cmd_buffer_data = 2197 FIND(struct command_buffer_data, commandBuffer); 2198 struct device_data *device_data = cmd_buffer_data->device; 2199 2200 memset(&cmd_buffer_data->stats, 0, sizeof(cmd_buffer_data->stats)); 2201 2202 /* We don't record any query in secondary command buffers, just make sure 2203 * we have the right inheritance. 2204 */ 2205 if (cmd_buffer_data->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) { 2206 VkCommandBufferBeginInfo *begin_info = (VkCommandBufferBeginInfo *) 2207 clone_chain((const struct VkBaseInStructure *)pBeginInfo); 2208 VkCommandBufferInheritanceInfo *parent_inhe_info = (VkCommandBufferInheritanceInfo *) 2209 vk_find_struct(begin_info, COMMAND_BUFFER_INHERITANCE_INFO); 2210 VkCommandBufferInheritanceInfo inhe_info = { 2211 VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO, 2212 NULL, 2213 VK_NULL_HANDLE, 2214 0, 2215 VK_NULL_HANDLE, 2216 VK_FALSE, 2217 0, 2218 overlay_query_flags, 2219 }; 2220 2221 if (parent_inhe_info) 2222 parent_inhe_info->pipelineStatistics = overlay_query_flags; 2223 else { 2224 inhe_info.pNext = begin_info->pNext; 2225 begin_info->pNext = &inhe_info; 2226 } 2227 2228 VkResult result = device_data->vtable.BeginCommandBuffer(commandBuffer, pBeginInfo); 2229 2230 if (!parent_inhe_info) 2231 begin_info->pNext = inhe_info.pNext; 2232 2233 free_chain((struct VkBaseOutStructure *)begin_info); 2234 2235 return result; 2236 } 2237 2238 /* Otherwise record a begin query as first command. */ 2239 VkResult result = device_data->vtable.BeginCommandBuffer(commandBuffer, pBeginInfo); 2240 2241 if (result == VK_SUCCESS) { 2242 if (cmd_buffer_data->pipeline_query_pool) { 2243 device_data->vtable.CmdResetQueryPool(commandBuffer, 2244 cmd_buffer_data->pipeline_query_pool, 2245 cmd_buffer_data->query_index, 1); 2246 } 2247 if (cmd_buffer_data->timestamp_query_pool) { 2248 device_data->vtable.CmdResetQueryPool(commandBuffer, 2249 cmd_buffer_data->timestamp_query_pool, 2250 cmd_buffer_data->query_index * 2, 2); 2251 } 2252 if (cmd_buffer_data->pipeline_query_pool) { 2253 device_data->vtable.CmdBeginQuery(commandBuffer, 2254 cmd_buffer_data->pipeline_query_pool, 2255 cmd_buffer_data->query_index, 0); 2256 } 2257 if (cmd_buffer_data->timestamp_query_pool) { 2258 device_data->vtable.CmdWriteTimestamp(commandBuffer, 2259 VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 2260 cmd_buffer_data->timestamp_query_pool, 2261 cmd_buffer_data->query_index * 2); 2262 } 2263 } 2264 2265 return result; 2266} 2267 2268static VkResult overlay_EndCommandBuffer( 2269 VkCommandBuffer commandBuffer) 2270{ 2271 struct command_buffer_data *cmd_buffer_data = 2272 FIND(struct command_buffer_data, commandBuffer); 2273 struct device_data *device_data = cmd_buffer_data->device; 2274 2275 if (cmd_buffer_data->timestamp_query_pool) { 2276 device_data->vtable.CmdWriteTimestamp(commandBuffer, 2277 VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 2278 cmd_buffer_data->timestamp_query_pool, 2279 cmd_buffer_data->query_index * 2 + 1); 2280 } 2281 if (cmd_buffer_data->pipeline_query_pool) { 2282 device_data->vtable.CmdEndQuery(commandBuffer, 2283 cmd_buffer_data->pipeline_query_pool, 2284 cmd_buffer_data->query_index); 2285 } 2286 2287 return device_data->vtable.EndCommandBuffer(commandBuffer); 2288} 2289 2290static VkResult overlay_ResetCommandBuffer( 2291 VkCommandBuffer commandBuffer, 2292 VkCommandBufferResetFlags flags) 2293{ 2294 struct command_buffer_data *cmd_buffer_data = 2295 FIND(struct command_buffer_data, commandBuffer); 2296 struct device_data *device_data = cmd_buffer_data->device; 2297 2298 memset(&cmd_buffer_data->stats, 0, sizeof(cmd_buffer_data->stats)); 2299 2300 return device_data->vtable.ResetCommandBuffer(commandBuffer, flags); 2301} 2302 2303static void overlay_CmdExecuteCommands( 2304 VkCommandBuffer commandBuffer, 2305 uint32_t commandBufferCount, 2306 const VkCommandBuffer* pCommandBuffers) 2307{ 2308 struct command_buffer_data *cmd_buffer_data = 2309 FIND(struct command_buffer_data, commandBuffer); 2310 struct device_data *device_data = cmd_buffer_data->device; 2311 2312 /* Add the stats of the executed command buffers to the primary one. */ 2313 for (uint32_t c = 0; c < commandBufferCount; c++) { 2314 struct command_buffer_data *sec_cmd_buffer_data = 2315 FIND(struct command_buffer_data, pCommandBuffers[c]); 2316 2317 for (uint32_t s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++) 2318 cmd_buffer_data->stats.stats[s] += sec_cmd_buffer_data->stats.stats[s]; 2319 } 2320 2321 device_data->vtable.CmdExecuteCommands(commandBuffer, commandBufferCount, pCommandBuffers); 2322} 2323 2324static VkResult overlay_AllocateCommandBuffers( 2325 VkDevice device, 2326 const VkCommandBufferAllocateInfo* pAllocateInfo, 2327 VkCommandBuffer* pCommandBuffers) 2328{ 2329 struct device_data *device_data = FIND(struct device_data, device); 2330 VkResult result = 2331 device_data->vtable.AllocateCommandBuffers(device, pAllocateInfo, pCommandBuffers); 2332 if (result != VK_SUCCESS) 2333 return result; 2334 2335 VkQueryPool pipeline_query_pool = VK_NULL_HANDLE; 2336 VkQueryPool timestamp_query_pool = VK_NULL_HANDLE; 2337 if (device_data->instance->pipeline_statistics_enabled && 2338 pAllocateInfo->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) { 2339 VkQueryPoolCreateInfo pool_info = { 2340 VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, 2341 NULL, 2342 0, 2343 VK_QUERY_TYPE_PIPELINE_STATISTICS, 2344 pAllocateInfo->commandBufferCount, 2345 overlay_query_flags, 2346 }; 2347 VK_CHECK(device_data->vtable.CreateQueryPool(device_data->device, &pool_info, 2348 NULL, &pipeline_query_pool)); 2349 } 2350 if (device_data->instance->params.enabled[OVERLAY_PARAM_ENABLED_gpu_timing]) { 2351 VkQueryPoolCreateInfo pool_info = { 2352 VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, 2353 NULL, 2354 0, 2355 VK_QUERY_TYPE_TIMESTAMP, 2356 pAllocateInfo->commandBufferCount * 2, 2357 0, 2358 }; 2359 VK_CHECK(device_data->vtable.CreateQueryPool(device_data->device, &pool_info, 2360 NULL, ×tamp_query_pool)); 2361 } 2362 2363 for (uint32_t i = 0; i < pAllocateInfo->commandBufferCount; i++) { 2364 new_command_buffer_data(pCommandBuffers[i], pAllocateInfo->level, 2365 pipeline_query_pool, timestamp_query_pool, 2366 i, device_data); 2367 } 2368 2369 if (pipeline_query_pool) 2370 map_object(HKEY(pipeline_query_pool), (void *)(uintptr_t) pAllocateInfo->commandBufferCount); 2371 if (timestamp_query_pool) 2372 map_object(HKEY(timestamp_query_pool), (void *)(uintptr_t) pAllocateInfo->commandBufferCount); 2373 2374 return result; 2375} 2376 2377static void overlay_FreeCommandBuffers( 2378 VkDevice device, 2379 VkCommandPool commandPool, 2380 uint32_t commandBufferCount, 2381 const VkCommandBuffer* pCommandBuffers) 2382{ 2383 struct device_data *device_data = FIND(struct device_data, device); 2384 for (uint32_t i = 0; i < commandBufferCount; i++) { 2385 struct command_buffer_data *cmd_buffer_data = 2386 FIND(struct command_buffer_data, pCommandBuffers[i]); 2387 2388 /* It is legal to free a NULL command buffer*/ 2389 if (!cmd_buffer_data) 2390 continue; 2391 2392 uint64_t count = (uintptr_t)find_object_data(HKEY(cmd_buffer_data->pipeline_query_pool)); 2393 if (count == 1) { 2394 unmap_object(HKEY(cmd_buffer_data->pipeline_query_pool)); 2395 device_data->vtable.DestroyQueryPool(device_data->device, 2396 cmd_buffer_data->pipeline_query_pool, NULL); 2397 } else if (count != 0) { 2398 map_object(HKEY(cmd_buffer_data->pipeline_query_pool), (void *)(uintptr_t)(count - 1)); 2399 } 2400 count = (uintptr_t)find_object_data(HKEY(cmd_buffer_data->timestamp_query_pool)); 2401 if (count == 1) { 2402 unmap_object(HKEY(cmd_buffer_data->timestamp_query_pool)); 2403 device_data->vtable.DestroyQueryPool(device_data->device, 2404 cmd_buffer_data->timestamp_query_pool, NULL); 2405 } else if (count != 0) { 2406 map_object(HKEY(cmd_buffer_data->timestamp_query_pool), (void *)(uintptr_t)(count - 1)); 2407 } 2408 destroy_command_buffer_data(cmd_buffer_data); 2409 } 2410 2411 device_data->vtable.FreeCommandBuffers(device, commandPool, 2412 commandBufferCount, pCommandBuffers); 2413} 2414 2415static VkResult overlay_QueueSubmit( 2416 VkQueue queue, 2417 uint32_t submitCount, 2418 const VkSubmitInfo* pSubmits, 2419 VkFence fence) 2420{ 2421 struct queue_data *queue_data = FIND(struct queue_data, queue); 2422 struct device_data *device_data = queue_data->device; 2423 2424 device_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_submit]++; 2425 2426 for (uint32_t s = 0; s < submitCount; s++) { 2427 for (uint32_t c = 0; c < pSubmits[s].commandBufferCount; c++) { 2428 struct command_buffer_data *cmd_buffer_data = 2429 FIND(struct command_buffer_data, pSubmits[s].pCommandBuffers[c]); 2430 2431 /* Merge the submitted command buffer stats into the device. */ 2432 for (uint32_t st = 0; st < OVERLAY_PARAM_ENABLED_MAX; st++) 2433 device_data->frame_stats.stats[st] += cmd_buffer_data->stats.stats[st]; 2434 2435 /* Attach the command buffer to the queue so we remember to read its 2436 * pipeline statistics & timestamps at QueuePresent(). 2437 */ 2438 if (!cmd_buffer_data->pipeline_query_pool && 2439 !cmd_buffer_data->timestamp_query_pool) 2440 continue; 2441 2442 if (list_is_empty(&cmd_buffer_data->link)) { 2443 list_addtail(&cmd_buffer_data->link, 2444 &queue_data->running_command_buffer); 2445 } else { 2446 fprintf(stderr, "Command buffer submitted multiple times before present.\n" 2447 "This could lead to invalid data.\n"); 2448 } 2449 } 2450 } 2451 2452 return device_data->vtable.QueueSubmit(queue, submitCount, pSubmits, fence); 2453} 2454 2455static VkResult overlay_QueueSubmit2KHR( 2456 VkQueue queue, 2457 uint32_t submitCount, 2458 const VkSubmitInfo2* pSubmits, 2459 VkFence fence) 2460{ 2461 struct queue_data *queue_data = FIND(struct queue_data, queue); 2462 struct device_data *device_data = queue_data->device; 2463 2464 device_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_submit]++; 2465 2466 for (uint32_t s = 0; s < submitCount; s++) { 2467 for (uint32_t c = 0; c < pSubmits[s].commandBufferInfoCount; c++) { 2468 struct command_buffer_data *cmd_buffer_data = 2469 FIND(struct command_buffer_data, pSubmits[s].pCommandBufferInfos[c].commandBuffer); 2470 2471 /* Merge the submitted command buffer stats into the device. */ 2472 for (uint32_t st = 0; st < OVERLAY_PARAM_ENABLED_MAX; st++) 2473 device_data->frame_stats.stats[st] += cmd_buffer_data->stats.stats[st]; 2474 2475 /* Attach the command buffer to the queue so we remember to read its 2476 * pipeline statistics & timestamps at QueuePresent(). 2477 */ 2478 if (!cmd_buffer_data->pipeline_query_pool && 2479 !cmd_buffer_data->timestamp_query_pool) 2480 continue; 2481 2482 if (list_is_empty(&cmd_buffer_data->link)) { 2483 list_addtail(&cmd_buffer_data->link, 2484 &queue_data->running_command_buffer); 2485 } else { 2486 fprintf(stderr, "Command buffer submitted multiple times before present.\n" 2487 "This could lead to invalid data.\n"); 2488 } 2489 } 2490 } 2491 2492 return device_data->vtable.QueueSubmit2KHR(queue, submitCount, pSubmits, fence); 2493} 2494 2495static VkResult overlay_CreateDevice( 2496 VkPhysicalDevice physicalDevice, 2497 const VkDeviceCreateInfo* pCreateInfo, 2498 const VkAllocationCallbacks* pAllocator, 2499 VkDevice* pDevice) 2500{ 2501 struct instance_data *instance_data = 2502 FIND(struct instance_data, physicalDevice); 2503 VkLayerDeviceCreateInfo *chain_info = 2504 get_device_chain_info(pCreateInfo, VK_LAYER_LINK_INFO); 2505 2506 assert(chain_info->u.pLayerInfo); 2507 PFN_vkGetInstanceProcAddr fpGetInstanceProcAddr = chain_info->u.pLayerInfo->pfnNextGetInstanceProcAddr; 2508 PFN_vkGetDeviceProcAddr fpGetDeviceProcAddr = chain_info->u.pLayerInfo->pfnNextGetDeviceProcAddr; 2509 PFN_vkCreateDevice fpCreateDevice = (PFN_vkCreateDevice)fpGetInstanceProcAddr(NULL, "vkCreateDevice"); 2510 if (fpCreateDevice == NULL) { 2511 return VK_ERROR_INITIALIZATION_FAILED; 2512 } 2513 2514 // Advance the link info for the next element on the chain 2515 chain_info->u.pLayerInfo = chain_info->u.pLayerInfo->pNext; 2516 2517 VkPhysicalDeviceFeatures device_features = {}; 2518 VkPhysicalDeviceFeatures *device_features_ptr = NULL; 2519 2520 VkDeviceCreateInfo *device_info = (VkDeviceCreateInfo *) 2521 clone_chain((const struct VkBaseInStructure *)pCreateInfo); 2522 2523 VkPhysicalDeviceFeatures2 *device_features2 = (VkPhysicalDeviceFeatures2 *) 2524 vk_find_struct(device_info, PHYSICAL_DEVICE_FEATURES_2); 2525 if (device_features2) { 2526 /* Can't use device_info->pEnabledFeatures when VkPhysicalDeviceFeatures2 is present */ 2527 device_features_ptr = &device_features2->features; 2528 } else { 2529 if (device_info->pEnabledFeatures) 2530 device_features = *(device_info->pEnabledFeatures); 2531 device_features_ptr = &device_features; 2532 device_info->pEnabledFeatures = &device_features; 2533 } 2534 2535 if (instance_data->pipeline_statistics_enabled) { 2536 device_features_ptr->inheritedQueries = true; 2537 device_features_ptr->pipelineStatisticsQuery = true; 2538 } 2539 2540 2541 VkResult result = fpCreateDevice(physicalDevice, device_info, pAllocator, pDevice); 2542 free_chain((struct VkBaseOutStructure *)device_info); 2543 if (result != VK_SUCCESS) return result; 2544 2545 struct device_data *device_data = new_device_data(*pDevice, instance_data); 2546 device_data->physical_device = physicalDevice; 2547 vk_device_dispatch_table_load(&device_data->vtable, 2548 fpGetDeviceProcAddr, *pDevice); 2549 2550 instance_data->pd_vtable.GetPhysicalDeviceProperties(device_data->physical_device, 2551 &device_data->properties); 2552 2553 VkLayerDeviceCreateInfo *load_data_info = 2554 get_device_chain_info(pCreateInfo, VK_LOADER_DATA_CALLBACK); 2555 device_data->set_device_loader_data = load_data_info->u.pfnSetDeviceLoaderData; 2556 2557 device_map_queues(device_data, pCreateInfo); 2558 2559 return result; 2560} 2561 2562static void overlay_DestroyDevice( 2563 VkDevice device, 2564 const VkAllocationCallbacks* pAllocator) 2565{ 2566 struct device_data *device_data = FIND(struct device_data, device); 2567 device_unmap_queues(device_data); 2568 device_data->vtable.DestroyDevice(device, pAllocator); 2569 destroy_device_data(device_data); 2570} 2571 2572static VkResult overlay_CreateInstance( 2573 const VkInstanceCreateInfo* pCreateInfo, 2574 const VkAllocationCallbacks* pAllocator, 2575 VkInstance* pInstance) 2576{ 2577 VkLayerInstanceCreateInfo *chain_info = 2578 get_instance_chain_info(pCreateInfo, VK_LAYER_LINK_INFO); 2579 2580 assert(chain_info->u.pLayerInfo); 2581 PFN_vkGetInstanceProcAddr fpGetInstanceProcAddr = 2582 chain_info->u.pLayerInfo->pfnNextGetInstanceProcAddr; 2583 PFN_vkCreateInstance fpCreateInstance = 2584 (PFN_vkCreateInstance)fpGetInstanceProcAddr(NULL, "vkCreateInstance"); 2585 if (fpCreateInstance == NULL) { 2586 return VK_ERROR_INITIALIZATION_FAILED; 2587 } 2588 2589 // Advance the link info for the next element on the chain 2590 chain_info->u.pLayerInfo = chain_info->u.pLayerInfo->pNext; 2591 2592 VkResult result = fpCreateInstance(pCreateInfo, pAllocator, pInstance); 2593 if (result != VK_SUCCESS) return result; 2594 2595 struct instance_data *instance_data = new_instance_data(*pInstance); 2596 vk_instance_dispatch_table_load(&instance_data->vtable, 2597 fpGetInstanceProcAddr, 2598 instance_data->instance); 2599 vk_physical_device_dispatch_table_load(&instance_data->pd_vtable, 2600 fpGetInstanceProcAddr, 2601 instance_data->instance); 2602 instance_data_map_physical_devices(instance_data, true); 2603 2604 parse_overlay_env(&instance_data->params, getenv("VK_LAYER_MESA_OVERLAY_CONFIG")); 2605 2606 /* If there's no control file, and an output_file was specified, start 2607 * capturing fps data right away. 2608 */ 2609 instance_data->capture_enabled = 2610 instance_data->params.output_file && instance_data->params.control < 0; 2611 instance_data->capture_started = instance_data->capture_enabled; 2612 2613 for (int i = OVERLAY_PARAM_ENABLED_vertices; 2614 i <= OVERLAY_PARAM_ENABLED_compute_invocations; i++) { 2615 if (instance_data->params.enabled[i]) { 2616 instance_data->pipeline_statistics_enabled = true; 2617 break; 2618 } 2619 } 2620 2621 return result; 2622} 2623 2624static void overlay_DestroyInstance( 2625 VkInstance instance, 2626 const VkAllocationCallbacks* pAllocator) 2627{ 2628 struct instance_data *instance_data = FIND(struct instance_data, instance); 2629 instance_data_map_physical_devices(instance_data, false); 2630 instance_data->vtable.DestroyInstance(instance, pAllocator); 2631 destroy_instance_data(instance_data); 2632} 2633 2634static const struct { 2635 const char *name; 2636 void *ptr; 2637} name_to_funcptr_map[] = { 2638 { "vkGetInstanceProcAddr", (void *) vkGetInstanceProcAddr }, 2639 { "vkGetDeviceProcAddr", (void *) vkGetDeviceProcAddr }, 2640#define ADD_HOOK(fn) { "vk" # fn, (void *) overlay_ ## fn } 2641#define ADD_ALIAS_HOOK(alias, fn) { "vk" # alias, (void *) overlay_ ## fn } 2642 ADD_HOOK(AllocateCommandBuffers), 2643 ADD_HOOK(FreeCommandBuffers), 2644 ADD_HOOK(ResetCommandBuffer), 2645 ADD_HOOK(BeginCommandBuffer), 2646 ADD_HOOK(EndCommandBuffer), 2647 ADD_HOOK(CmdExecuteCommands), 2648 2649 ADD_HOOK(CmdDraw), 2650 ADD_HOOK(CmdDrawIndexed), 2651 ADD_HOOK(CmdDrawIndirect), 2652 ADD_HOOK(CmdDrawIndexedIndirect), 2653 ADD_HOOK(CmdDispatch), 2654 ADD_HOOK(CmdDispatchIndirect), 2655 ADD_HOOK(CmdDrawIndirectCount), 2656 ADD_ALIAS_HOOK(CmdDrawIndirectCountKHR, CmdDrawIndirectCount), 2657 ADD_HOOK(CmdDrawIndexedIndirectCount), 2658 ADD_ALIAS_HOOK(CmdDrawIndexedIndirectCountKHR, CmdDrawIndexedIndirectCount), 2659 2660 ADD_HOOK(CmdBindPipeline), 2661 2662 ADD_HOOK(CreateSwapchainKHR), 2663 ADD_HOOK(QueuePresentKHR), 2664 ADD_HOOK(DestroySwapchainKHR), 2665 ADD_HOOK(AcquireNextImageKHR), 2666 ADD_HOOK(AcquireNextImage2KHR), 2667 2668 ADD_HOOK(QueueSubmit), 2669 ADD_HOOK(QueueSubmit2KHR), 2670 2671 ADD_HOOK(CreateDevice), 2672 ADD_HOOK(DestroyDevice), 2673 2674 ADD_HOOK(CreateInstance), 2675 ADD_HOOK(DestroyInstance), 2676#undef ADD_HOOK 2677#undef ADD_ALIAS_HOOK 2678}; 2679 2680static void *find_ptr(const char *name) 2681{ 2682 for (uint32_t i = 0; i < ARRAY_SIZE(name_to_funcptr_map); i++) { 2683 if (strcmp(name, name_to_funcptr_map[i].name) == 0) 2684 return name_to_funcptr_map[i].ptr; 2685 } 2686 2687 return NULL; 2688} 2689 2690VK_LAYER_EXPORT VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vkGetDeviceProcAddr(VkDevice dev, 2691 const char *funcName) 2692{ 2693 void *ptr = find_ptr(funcName); 2694 if (ptr) return reinterpret_cast<PFN_vkVoidFunction>(ptr); 2695 2696 if (dev == NULL) return NULL; 2697 2698 struct device_data *device_data = FIND(struct device_data, dev); 2699 if (device_data->vtable.GetDeviceProcAddr == NULL) return NULL; 2700 return device_data->vtable.GetDeviceProcAddr(dev, funcName); 2701} 2702 2703VK_LAYER_EXPORT VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vkGetInstanceProcAddr(VkInstance instance, 2704 const char *funcName) 2705{ 2706 void *ptr = find_ptr(funcName); 2707 if (ptr) return reinterpret_cast<PFN_vkVoidFunction>(ptr); 2708 2709 if (instance == NULL) return NULL; 2710 2711 struct instance_data *instance_data = FIND(struct instance_data, instance); 2712 if (instance_data->vtable.GetInstanceProcAddr == NULL) return NULL; 2713 return instance_data->vtable.GetInstanceProcAddr(instance, funcName); 2714} 2715