1 /*
2 * Copyright (c) 2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "device_vk.h"
17
18 #include <algorithm>
19 #include <cinttypes>
20 #include <cstdint>
21 #include <vulkan/vulkan_core.h>
22
23 #include <base/containers/vector.h>
24 #include <base/math/mathf.h>
25 #include <core/engine_info.h>
26 #include <render/intf_render_context.h>
27 #include <render/namespace.h>
28
29 #include "device/device.h"
30 #include "device/gpu_program_util.h"
31 #include "device/gpu_resource_manager.h"
32 #include "device/shader_manager.h"
33 #include "device/shader_module.h"
34 #include "platform_vk.h"
35 #include "util/log.h"
36 #include "vulkan/create_functions_vk.h"
37 #include "vulkan/gpu_buffer_vk.h"
38 #include "vulkan/gpu_image_vk.h"
39 #include "vulkan/gpu_memory_allocator_vk.h"
40 #include "vulkan/gpu_program_vk.h"
41 #include "vulkan/gpu_query_vk.h"
42 #include "vulkan/gpu_sampler_vk.h"
43 #include "vulkan/gpu_semaphore_vk.h"
44 #include "vulkan/node_context_descriptor_set_manager_vk.h"
45 #include "vulkan/node_context_pool_manager_vk.h"
46 #include "vulkan/pipeline_state_object_vk.h"
47 #include "vulkan/render_backend_vk.h"
48 #include "vulkan/render_frame_sync_vk.h"
49 #include "vulkan/shader_module_vk.h"
50 #include "vulkan/swapchain_vk.h"
51 #include "vulkan/validate_vk.h"
52
53 using namespace BASE_NS;
54
55 RENDER_BEGIN_NAMESPACE()
56 namespace {
57 static constexpr string_view DEVICE_EXTENSION_SWAPCHAIN { VK_KHR_SWAPCHAIN_EXTENSION_NAME };
58
59 // promoted to 1.2, requires VK_KHR_create_renderpass2
60 static constexpr string_view DEVICE_EXTENSION_DEPTH_STENCIL_RESOLVE { VK_KHR_DEPTH_STENCIL_RESOLVE_EXTENSION_NAME };
61 static constexpr string_view DEVICE_EXTENSION_CREATE_RENDERPASS2 { VK_KHR_CREATE_RENDERPASS_2_EXTENSION_NAME };
62
63 static constexpr string_view DEVICE_EXTENSION_EXTERNAL_MEMORY { VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME };
64 static constexpr string_view DEVICE_EXTENSION_GET_MEMORY_REQUIREMENTS2 {
65 VK_KHR_GET_MEMORY_REQUIREMENTS_2_EXTENSION_NAME
66 };
67 static constexpr string_view DEVICE_EXTENSION_SAMPLER_YCBCR_CONVERSION {
68 VK_KHR_SAMPLER_YCBCR_CONVERSION_EXTENSION_NAME
69 };
70 static constexpr string_view DEVICE_EXTENSION_QUEUE_FAMILY_FOREIGN { VK_EXT_QUEUE_FAMILY_FOREIGN_EXTENSION_NAME };
71 static constexpr string_view DEVICE_EXTENSION_MULTIVIEW { VK_KHR_MULTIVIEW_EXTENSION_NAME };
72 static constexpr string_view DEVICE_EXTENSION_MAINTENANCE4 = VK_KHR_MAINTENANCE_4_EXTENSION_NAME;
73 static constexpr string_view DEVICE_EXTENSION_DESCRIPTOR_INDEXING = VK_EXT_DESCRIPTOR_INDEXING_EXTENSION_NAME;
74
75 struct ChainWrapper {
76 void** ppNextFeatures { nullptr };
77 void** ppNextProperties { nullptr };
78 };
79
80 struct PhysicalDeviceYcbcrStructsVk {
81 VkPhysicalDeviceSamplerYcbcrConversionFeatures ycbcrConversionFeatures {};
82 };
83
84 #if (RENDER_VULKAN_FSR_ENABLED == 1)
85 struct PhysicalDeviceFragmentShadingRateStructsVk {
86 VkPhysicalDeviceFragmentShadingRateFeaturesKHR physicalDeviceFragmentShadingRateFeatures;
87 VkPhysicalDeviceFragmentShadingRatePropertiesKHR physicalDeviceFragmentShadingRateProperties;
88 };
89 #endif
90
91 #if (RENDER_VULKAN_RT_ENABLED == 1)
92 struct PhysicalDeviceRayTracingStructsVk {
93 VkPhysicalDeviceBufferDeviceAddressFeatures physicalDeviceBufferDeviceAddressFeatures;
94 VkPhysicalDeviceRayTracingPipelineFeaturesKHR physicalDeviceRayTracingPipelineFeatures;
95 VkPhysicalDeviceAccelerationStructureFeaturesKHR physicalDeviceAccelerationStructureFeatures;
96 VkPhysicalDeviceRayQueryFeaturesKHR physicalDeviceRayQueryFeatures;
97 };
98 #endif
99
100 struct PhysicalDeviceMultiviewStructsVk {
101 VkPhysicalDeviceMultiviewFeaturesKHR physicalDeviceMultiviewFeatures;
102 VkPhysicalDeviceMultiviewPropertiesKHR physicalDeviceMultiviewProperties;
103 };
104
105 struct PhysicalDeviceDesciptorIndexingStructsVk {
106 VkPhysicalDeviceDescriptorIndexingFeatures physicalDeviceDescriptorIndexingFeatures;
107 VkPhysicalDeviceDescriptorIndexingProperties physicalDeviceDescriptorIndexingProperties;
108 };
109
110 struct PhysicalDeviceMaintenance4Vk {
111 VkPhysicalDeviceMaintenance4Features maintenance4Features {};
112 };
113
114 struct ChainObjects {
115 unique_ptr<PhysicalDeviceYcbcrStructsVk> ycbcr;
116 #if (RENDER_VULKAN_RT_ENABLED == 1)
117 unique_ptr<PhysicalDeviceRayTracingStructsVk> rt;
118 #endif
119 #if (RENDER_VULKAN_FSR_ENABLED == 1)
120 unique_ptr<PhysicalDeviceFragmentShadingRateStructsVk> fsr;
121 #endif
122 unique_ptr<PhysicalDeviceMultiviewStructsVk> mv;
123 unique_ptr<PhysicalDeviceDesciptorIndexingStructsVk> di;
124 unique_ptr<PhysicalDeviceMaintenance4Vk> maintenance4;
125 };
126
127 // fragment shading rate
128 #if (RENDER_VULKAN_FSR_ENABLED == 1)
129 // VK_KHR_fragment_shading_rate, requires VK_KHR_create_renderpass2, requires VK_KHR_get_physical_device_properties2
130 static constexpr string_view DEVICE_EXTENSION_FRAGMENT_SHADING_RATE { VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME };
131
GetPhysicalDeviceFragmentShadingRateStructs(ChainObjects& co, ChainWrapper& cw)132 void GetPhysicalDeviceFragmentShadingRateStructs(ChainObjects& co, ChainWrapper& cw)
133 {
134 co.fsr = make_unique<PhysicalDeviceFragmentShadingRateStructsVk>();
135 auto& fsr = co.fsr;
136 fsr->physicalDeviceFragmentShadingRateFeatures = {
137 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR, // sType
138 nullptr, // pNext
139 VK_FALSE, // pipelineFragmentShadingRate
140 VK_FALSE, // primitiveFragmentShadingRate
141 VK_FALSE, // attachmentFragmentShadingRate
142 };
143 *cw.ppNextFeatures = &fsr->physicalDeviceFragmentShadingRateFeatures;
144 cw.ppNextFeatures = &fsr->physicalDeviceFragmentShadingRateFeatures.pNext;
145
146 fsr->physicalDeviceFragmentShadingRateProperties = {
147 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR, // sType
148 nullptr, // pNext
149 };
150 *cw.ppNextProperties = &fsr->physicalDeviceFragmentShadingRateProperties;
151 cw.ppNextProperties = &fsr->physicalDeviceFragmentShadingRateProperties.pNext;
152 }
153 #endif
154
GetPhysicalDeviceMultiviewFeaturesStructs(ChainObjects& co, ChainWrapper& cw)155 void GetPhysicalDeviceMultiviewFeaturesStructs(ChainObjects& co, ChainWrapper& cw)
156 {
157 co.mv = make_unique<PhysicalDeviceMultiviewStructsVk>();
158 auto& mv = co.mv;
159 mv->physicalDeviceMultiviewFeatures = {
160 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES_KHR, // sType
161 nullptr, // pNext
162 VK_FALSE, // multiview
163 VK_FALSE, // multiviewGeometryShader
164 VK_FALSE, // multiviewTessellationShader
165 };
166 *cw.ppNextFeatures = &mv->physicalDeviceMultiviewFeatures;
167 cw.ppNextFeatures = &mv->physicalDeviceMultiviewFeatures.pNext;
168
169 mv->physicalDeviceMultiviewProperties = {
170 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES_KHR, // sType
171 nullptr, // pNext
172 0, // maxMultiviewViewCount
173 0, // maxMultiviewInstanceIndex
174 };
175 *cw.ppNextProperties = &mv->physicalDeviceMultiviewProperties;
176 cw.ppNextProperties = &mv->physicalDeviceMultiviewProperties.pNext;
177 }
178
GetPhysicalDeviceDescriptorIndexingFeaturesStructs(ChainObjects& co, ChainWrapper& cw)179 void GetPhysicalDeviceDescriptorIndexingFeaturesStructs(ChainObjects& co, ChainWrapper& cw)
180 {
181 co.di = make_unique<PhysicalDeviceDesciptorIndexingStructsVk>();
182 auto& di = co.di;
183 di->physicalDeviceDescriptorIndexingFeatures = {
184 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES, // sType
185 nullptr, // pNext
186 VK_FALSE, // shaderInputAttachmentArrayDynamicIndexing
187 VK_FALSE, // shaderUniformTexelBufferArrayDynamicIndexing
188 VK_FALSE, // shaderStorageTexelBufferArrayDynamicIndexing
189 VK_FALSE, // shaderUniformBufferArrayNonUniformIndexing
190 VK_FALSE, // shaderSampledImageArrayNonUniformIndexing
191 VK_FALSE, // shaderStorageBufferArrayNonUniformIndexing
192 VK_FALSE, // shaderStorageImageArrayNonUniformIndexing
193 VK_FALSE, // shaderInputAttachmentArrayNonUniformIndexing
194 VK_FALSE, // shaderUniformTexelBufferArrayNonUniformIndexing
195 VK_FALSE, // shaderStorageTexelBufferArrayNonUniformIndexing
196 VK_FALSE, // descriptorBindingUniformBufferUpdateAfterBind
197 VK_FALSE, // descriptorBindingSampledImageUpdateAsfterBind
198 VK_FALSE, // descriptorBindingStorageImageUpdateAfterBind
199 VK_FALSE, // descriptorBindingStorageBufferUpdateAfterBind
200 VK_FALSE, // descriptorBindingUniformTexelBufferUpdateAfterBind
201 VK_FALSE, // descriptorBindingStorageTexelBufferUpdateAfterBind
202 VK_FALSE, // descriptorBindingUpdateUnusedWhilePending
203 VK_FALSE, // descriptorBindingPartiallyBound
204 VK_FALSE, // descriptorBindingVariableDescriptorCount
205 VK_FALSE, // runtimeDescriptorArray
206 };
207 *cw.ppNextFeatures = &di->physicalDeviceDescriptorIndexingFeatures;
208 cw.ppNextFeatures = &di->physicalDeviceDescriptorIndexingFeatures.pNext;
209
210 di->physicalDeviceDescriptorIndexingProperties = {
211 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES, // sType
212 nullptr, // pNext
213 0U, // maxUpdateAfterBindDescriptorsInAllPools
214 VK_FALSE, // shaderUniformBufferArrayNonUniformIndexingNative
215 VK_FALSE, // shaderSampledImageArrayNonUniformIndexingNative
216 VK_FALSE, // shaderStorageBufferArrayNonUniformIndexingNative
217 VK_FALSE, // shaderStorageImageArrayNonUniformIndexingNative
218 VK_FALSE, // shaderInputAttachmentArrayNonUniformIndexingNative
219 VK_FALSE, // robustBufferAccessUpdateAfterBind
220 VK_FALSE, // quadDivergentImplicitLod
221 0U, // maxPerStageDescriptorUpdateAfterBindSamplers
222 0U, // maxPerStageDescriptorUpdateAfterBindUniformBuffers
223 0U, // maxPerStageDescriptorUpdateAfterBindStorageBuffers
224 0U, // maxPerStageDescriptorUpdateAfterBindSampledImages
225 0U, // maxPerStageDescriptorUpdateAfterBindStorageImages
226 0U, // maxPerStageDescriptorUpdateAfterBindInputAttachments
227 0U, // maxPerStageUpdateAfterBindResources
228 0U, // maxDescriptorSetUpdateAfterBindSamplers
229 0U, // maxDescriptorSetUpdateAfterBindUniformBuffers
230 0U, // maxDescriptorSetUpdateAfterBindUniformBuffersDynamic
231 0U, // maxDescriptorSetUpdateAfterBindStorageBuffers
232 0U, // maxDescriptorSetUpdateAfterBindStorageBuffersDynamic
233 0U, // maxDescriptorSetUpdateAfterBindSampledImages
234 0U, // maxDescriptorSetUpdateAfterBindStorageImages
235 0U, // maxDescriptorSetUpdateAfterBindInputAttachments
236 };
237 *cw.ppNextProperties = &di->physicalDeviceDescriptorIndexingProperties;
238 cw.ppNextProperties = &di->physicalDeviceDescriptorIndexingProperties.pNext;
239 }
240
241 // ray-tracing
242 #if (RENDER_VULKAN_RT_ENABLED == 1)
243 static constexpr string_view DEVICE_EXTENSION_ACCELERATION_STRUCTURE { "VK_KHR_acceleration_structure" };
244 static constexpr string_view DEVICE_EXTENSION_RAY_QUERY { "VK_KHR_ray_query" };
245 static constexpr string_view DEVICE_EXTENSION_DEFERRED_HOST_OPERATIONS { "VK_KHR_deferred_host_operations" };
246 static constexpr string_view DEVICE_EXTENSION_RAY_TRACING_PIPELINE { "VK_KHR_ray_tracing_pipeline" };
247 static constexpr string_view DEVICE_EXTENSION_PIPELINE_LIBRARY { "VK_KHR_pipeline_library" };
248
GetPhysicalDeviceRayTracingStructs(ChainObjects& co, ChainWrapper& cw)249 void GetPhysicalDeviceRayTracingStructs(ChainObjects& co, ChainWrapper& cw)
250 {
251 co.rt = make_unique<PhysicalDeviceRayTracingStructsVk>();
252 auto& rt = co.rt;
253 rt->physicalDeviceBufferDeviceAddressFeatures = {
254 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES, // sType
255 nullptr, // pNext
256 VK_FALSE, // bufferDeviceAddress;
257 VK_FALSE, // bufferDeviceAddressCaptureReplay
258 VK_FALSE, // bufferDeviceAddressMultiDevice
259 };
260 rt->physicalDeviceRayTracingPipelineFeatures = {
261 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PIPELINE_FEATURES_KHR, // sType
262 &rt->physicalDeviceBufferDeviceAddressFeatures, // pNext
263 VK_FALSE, // rayTracingPipeline;
264 VK_FALSE, // rayTracingPipelineShaderGroupHandleCaptureReplay;
265 VK_FALSE, // rayTracingPipelineShaderGroupHandleCaptureReplayMixed;
266 VK_FALSE, // rayTracingPipelineTraceRaysIndirect;
267 VK_FALSE, // rayTraversalPrimitiveCulling;
268 };
269 rt->physicalDeviceAccelerationStructureFeatures = {
270 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR, // sType
271 &rt->physicalDeviceRayTracingPipelineFeatures, // pNext
272 VK_FALSE, // accelerationStructure;
273 VK_FALSE, // accelerationStructureCaptureReplay
274 VK_FALSE, // accelerationStructureIndirectBuild
275 VK_FALSE, // accelerationStructureHostCommands
276 VK_FALSE, // descriptorBindingAccelerationStructureUpdateAfterBind
277 };
278 rt->physicalDeviceRayQueryFeatures = {
279 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_QUERY_FEATURES_KHR, // sType
280 &rt->physicalDeviceAccelerationStructureFeatures, // pNext
281 true, // rayQuery
282 };
283
284 *cw.ppNextFeatures = &rt->physicalDeviceRayQueryFeatures;
285 cw.ppNextFeatures = &rt->physicalDeviceBufferDeviceAddressFeatures.pNext;
286 }
287 #endif
288
GetPhysicalDeviceYcbcrStructs(ChainObjects& co, ChainWrapper& cw)289 void GetPhysicalDeviceYcbcrStructs(ChainObjects& co, ChainWrapper& cw)
290 {
291 co.ycbcr = make_unique<PhysicalDeviceYcbcrStructsVk>();
292 auto& ycbcr = co.ycbcr;
293 ycbcr->ycbcrConversionFeatures = {
294 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES, // sType
295 nullptr, // pNext
296 VK_FALSE, // samplerYcbcrConversion
297 };
298
299 *cw.ppNextFeatures = &ycbcr->ycbcrConversionFeatures;
300 cw.ppNextFeatures = &ycbcr->ycbcrConversionFeatures.pNext;
301 }
302
GetYcbcrExtFunctions(const VkInstance instance, DeviceVk::ExtFunctions& extFunctions)303 void GetYcbcrExtFunctions(const VkInstance instance, DeviceVk::ExtFunctions& extFunctions)
304 {
305 extFunctions.vkCreateSamplerYcbcrConversion =
306 (PFN_vkCreateSamplerYcbcrConversion)(void*)vkGetInstanceProcAddr(instance, "vkCreateSamplerYcbcrConversion");
307 if (!extFunctions.vkCreateSamplerYcbcrConversion) {
308 PLUGIN_LOG_E("vkGetInstanceProcAddr failed for vkCreateSamplerYcbcrConversion");
309 }
310 extFunctions.vkDestroySamplerYcbcrConversion =
311 (PFN_vkDestroySamplerYcbcrConversion)vkGetInstanceProcAddr(instance, "vkDestroySamplerYcbcrConversion");
312 if (!extFunctions.vkDestroySamplerYcbcrConversion) {
313 PLUGIN_LOG_E("vkGetInstanceProcAddr failed for vkDestroySamplerYcbcrConversion");
314 }
315 }
316
GetPhysicalDeviceMaintenance4Structs(ChainObjects& co, ChainWrapper& cw)317 void GetPhysicalDeviceMaintenance4Structs(ChainObjects& co, ChainWrapper& cw)
318 {
319 co.maintenance4 = make_unique<PhysicalDeviceMaintenance4Vk>();
320 auto& m4 = co.maintenance4;
321 m4->maintenance4Features = {
322 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_4_FEATURES, // sType
323 nullptr, // pNext
324 true, // maintenance4
325 };
326
327 *cw.ppNextFeatures = &m4->maintenance4Features;
328 cw.ppNextFeatures = &m4->maintenance4Features.pNext;
329 }
330
331 constexpr uint32_t MIN_ALLOCATION_BLOCK_SIZE { 4u * 1024u * 1024u };
332 constexpr uint32_t MAX_ALLOCATION_BLOCK_SIZE { 1024u * 1024u * 1024u };
333 static constexpr const QueueProperties DEFAULT_QUEUE {
334 VK_QUEUE_GRAPHICS_BIT, // requiredFlags
335 1, // count
336 1.0f, // priority
337 false, // explicitFlags
338 true, // canPresent
339 };
340
GetAllocatorCreateInfo(const BackendExtraVk* backendExtra)341 PlatformGpuMemoryAllocator::GpuMemoryAllocatorCreateInfo GetAllocatorCreateInfo(const BackendExtraVk* backendExtra)
342 {
343 // create default pools
344 PlatformGpuMemoryAllocator::GpuMemoryAllocatorCreateInfo createInfo;
345 uint32_t dynamicUboByteSize = 16u * 1024u * 1024u;
346 if (backendExtra) {
347 const auto& sizes = backendExtra->gpuMemoryAllocatorSizes;
348 if (sizes.defaultAllocationBlockSize != ~0u) {
349 createInfo.preferredLargeHeapBlockSize = Math::min(
350 MAX_ALLOCATION_BLOCK_SIZE, Math::max(sizes.defaultAllocationBlockSize, MIN_ALLOCATION_BLOCK_SIZE));
351 }
352 if (sizes.customAllocationDynamicUboBlockSize != ~0u) {
353 dynamicUboByteSize = Math::min(MAX_ALLOCATION_BLOCK_SIZE,
354 Math::max(sizes.customAllocationDynamicUboBlockSize, MIN_ALLOCATION_BLOCK_SIZE));
355 }
356 }
357
358 // staging
359 {
360 GpuBufferDesc desc;
361 desc.engineCreationFlags = EngineBufferCreationFlagBits::CORE_ENGINE_BUFFER_CREATION_SINGLE_SHOT_STAGING;
362 desc.memoryPropertyFlags = MemoryPropertyFlagBits::CORE_MEMORY_PROPERTY_HOST_COHERENT_BIT |
363 MemoryPropertyFlagBits::CORE_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
364 desc.usageFlags = BufferUsageFlagBits::CORE_BUFFER_USAGE_TRANSFER_SRC_BIT;
365 createInfo.customPools.push_back({
366 "STAGING_GPU_BUFFER",
367 PlatformGpuMemoryAllocator::MemoryAllocatorResourceType::GPU_BUFFER,
368 0u,
369 // if linear allocator is used, depending clients usage pattern, memory can be easily wasted.
370 false,
371 { move(desc) },
372 });
373 }
374 // dynamic uniform ring buffers
375 {
376 GpuBufferDesc desc;
377 desc.engineCreationFlags = EngineBufferCreationFlagBits::CORE_ENGINE_BUFFER_CREATION_DYNAMIC_RING_BUFFER;
378 desc.memoryPropertyFlags = MemoryPropertyFlagBits::CORE_MEMORY_PROPERTY_HOST_COHERENT_BIT |
379 MemoryPropertyFlagBits::CORE_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
380 desc.usageFlags = BufferUsageFlagBits::CORE_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
381 createInfo.customPools.push_back({
382 "DYNAMIC_UNIFORM_GPU_BUFFER",
383 PlatformGpuMemoryAllocator::MemoryAllocatorResourceType::GPU_BUFFER,
384 dynamicUboByteSize,
385 false,
386 { move(desc) },
387 });
388 }
389
390 return createInfo;
391 }
392
DebugMessengerCallback(VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity, VkDebugUtilsMessageTypeFlagsEXT messageTypes, const VkDebugUtilsMessengerCallbackDataEXT* pCallbackData, void* pUserData)393 VkBool32 VKAPI_PTR DebugMessengerCallback(VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity,
394 VkDebugUtilsMessageTypeFlagsEXT messageTypes, const VkDebugUtilsMessengerCallbackDataEXT* pCallbackData,
395 void* pUserData)
396 {
397 if (pCallbackData && pCallbackData->pMessage) {
398 if ((VkDebugUtilsMessageSeverityFlagsEXT)messageSeverity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) {
399 PLUGIN_LOG_E("%s: %s", pCallbackData->pMessageIdName, pCallbackData->pMessage);
400 } else if ((VkDebugUtilsMessageSeverityFlagsEXT)messageSeverity &
401 (VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT)) {
402 PLUGIN_LOG_W("%s: %s", pCallbackData->pMessageIdName, pCallbackData->pMessage);
403 } else if ((VkDebugUtilsMessageSeverityFlagsEXT)messageSeverity &
404 VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT) {
405 PLUGIN_LOG_I("%s: %s", pCallbackData->pMessageIdName, pCallbackData->pMessage);
406 } else if ((VkDebugUtilsMessageSeverityFlagsEXT)messageSeverity &
407 VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT) {
408 PLUGIN_LOG_V("%s: %s", pCallbackData->pMessageIdName, pCallbackData->pMessage);
409 }
410 }
411
412 // The application should always return VK_FALSE.
413 return VK_FALSE;
414 }
415
DebugReportCallback(VkDebugReportFlagsEXT flags, VkDebugReportObjectTypeEXT, uint64_t, size_t, int32_t, const char*, const char* pMessage, void*)416 VkBool32 VKAPI_PTR DebugReportCallback(VkDebugReportFlagsEXT flags, VkDebugReportObjectTypeEXT, uint64_t, size_t,
417 int32_t, const char*, const char* pMessage, void*)
418 {
419 if (flags & VK_DEBUG_REPORT_ERROR_BIT_EXT) {
420 PLUGIN_LOG_E("%s", pMessage);
421 } else if (flags & (VK_DEBUG_REPORT_WARNING_BIT_EXT | VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT)) {
422 PLUGIN_LOG_W("%s", pMessage);
423 } else if (flags & VK_DEBUG_REPORT_INFORMATION_BIT_EXT) {
424 PLUGIN_LOG_I("%s", pMessage);
425 } else if (flags & VK_DEBUG_REPORT_DEBUG_BIT_EXT) {
426 PLUGIN_LOG_D("%s", pMessage);
427 }
428 return VK_FALSE;
429 }
430
EmplaceDeviceQueue( const VkDevice device, const LowLevelQueueInfo& aQueueInfo, vector<LowLevelGpuQueueVk>& aLowLevelQueues)431 void EmplaceDeviceQueue(
432 const VkDevice device, const LowLevelQueueInfo& aQueueInfo, vector<LowLevelGpuQueueVk>& aLowLevelQueues)
433 {
434 for (uint32_t idx = 0; idx < aQueueInfo.queueCount; ++idx) {
435 VkQueue queue = VK_NULL_HANDLE;
436 vkGetDeviceQueue(device, // device
437 aQueueInfo.queueFamilyIndex, // queueFamilyIndex
438 idx, // queueIndex
439 &queue); // pQueue
440 aLowLevelQueues.push_back(LowLevelGpuQueueVk { queue, aQueueInfo });
441 }
442 }
443
CheckValidDepthFormats(const DevicePlatformDataVk& devicePlat, DevicePlatformInternalDataVk& dataInternal)444 void CheckValidDepthFormats(const DevicePlatformDataVk& devicePlat, DevicePlatformInternalDataVk& dataInternal)
445 {
446 constexpr uint32_t DEPTH_FORMAT_COUNT { 4 };
447 constexpr Format DEPTH_FORMATS[DEPTH_FORMAT_COUNT] = { BASE_FORMAT_D24_UNORM_S8_UINT, BASE_FORMAT_D32_SFLOAT,
448 BASE_FORMAT_D16_UNORM, BASE_FORMAT_X8_D24_UNORM_PACK32 };
449 for (uint32_t idx = 0; idx < DEPTH_FORMAT_COUNT; ++idx) {
450 VkFormatProperties formatProperties;
451 Format format = DEPTH_FORMATS[idx];
452 vkGetPhysicalDeviceFormatProperties(devicePlat.physicalDevice, // physicalDevice
453 (VkFormat)format, // format
454 &formatProperties); // pFormatProperties
455 const VkFormatFeatureFlags optimalTilingFeatureFlags = formatProperties.optimalTilingFeatures;
456 if (optimalTilingFeatureFlags & VkFormatFeatureFlagBits::VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT) {
457 dataInternal.supportedDepthFormats.push_back(format);
458 }
459 }
460 }
461
GetPreferredDeviceExtensions(const BackendExtraVk* backendExtra, DevicePlatformDataVk& plat)462 vector<string_view> GetPreferredDeviceExtensions(const BackendExtraVk* backendExtra, DevicePlatformDataVk& plat)
463 {
464 vector<string_view> extensions { DEVICE_EXTENSION_SWAPCHAIN };
465 extensions.push_back(DEVICE_EXTENSION_CREATE_RENDERPASS2);
466 extensions.push_back(DEVICE_EXTENSION_DEPTH_STENCIL_RESOLVE);
467 extensions.push_back(DEVICE_EXTENSION_MAINTENANCE4);
468 GetPlatformDeviceExtensions(extensions);
469 #if (RENDER_VULKAN_FSR_ENABLED == 1)
470 extensions.push_back(DEVICE_EXTENSION_FRAGMENT_SHADING_RATE);
471 #endif
472 #if (RENDER_VULKAN_RT_ENABLED == 1)
473 extensions.push_back(DEVICE_EXTENSION_ACCELERATION_STRUCTURE);
474 extensions.push_back(DEVICE_EXTENSION_RAY_TRACING_PIPELINE);
475 extensions.push_back(DEVICE_EXTENSION_RAY_QUERY);
476 extensions.push_back(DEVICE_EXTENSION_PIPELINE_LIBRARY);
477 extensions.push_back(DEVICE_EXTENSION_DEFERRED_HOST_OPERATIONS);
478 #endif
479 if (plat.deviceApiMinor >= 1) { // enable only for 1.1+
480 extensions.push_back(DEVICE_EXTENSION_MULTIVIEW);
481 }
482 if (plat.deviceApiMinor >= 2) { // enable only for 1.2+
483 extensions.push_back(DEVICE_EXTENSION_DESCRIPTOR_INDEXING);
484 }
485 if (backendExtra) {
486 for (const auto str : backendExtra->extensions.extensionNames) {
487 extensions.push_back(str);
488 }
489 }
490 return extensions;
491 }
492
GetEnabledCommonDeviceExtensions( const unordered_map<string, uint32_t>& enabledDeviceExtensions)493 DeviceVk::CommonDeviceExtensions GetEnabledCommonDeviceExtensions(
494 const unordered_map<string, uint32_t>& enabledDeviceExtensions)
495 {
496 DeviceVk::CommonDeviceExtensions extensions;
497 extensions.swapchain = enabledDeviceExtensions.contains(DEVICE_EXTENSION_SWAPCHAIN);
498 // renderpass2 required on 1.2, we only use renderpass 2 when we need depth stencil resolve
499 extensions.renderPass2 = enabledDeviceExtensions.contains(DEVICE_EXTENSION_DEPTH_STENCIL_RESOLVE) &&
500 enabledDeviceExtensions.contains(DEVICE_EXTENSION_CREATE_RENDERPASS2);
501 extensions.externalMemory = enabledDeviceExtensions.contains(DEVICE_EXTENSION_EXTERNAL_MEMORY);
502 extensions.getMemoryRequirements2 = enabledDeviceExtensions.contains(DEVICE_EXTENSION_GET_MEMORY_REQUIREMENTS2);
503 extensions.queueFamilyForeign = enabledDeviceExtensions.contains(DEVICE_EXTENSION_QUEUE_FAMILY_FOREIGN);
504 extensions.samplerYcbcrConversion = enabledDeviceExtensions.contains(DEVICE_EXTENSION_SAMPLER_YCBCR_CONVERSION);
505 extensions.multiView = enabledDeviceExtensions.contains(DEVICE_EXTENSION_MULTIVIEW);
506 extensions.descriptorIndexing = enabledDeviceExtensions.contains(DEVICE_EXTENSION_DESCRIPTOR_INDEXING);
507 #if (RENDER_VULKAN_FSR_ENABLED == 1)
508 extensions.fragmentShadingRate = enabledDeviceExtensions.contains(DEVICE_EXTENSION_FRAGMENT_SHADING_RATE);
509 #endif
510
511 return extensions;
512 }
513
GetCommonDevicePropertiesFunc(const ChainObjects& co)514 CommonDeviceProperties GetCommonDevicePropertiesFunc(const ChainObjects& co)
515 {
516 CommonDeviceProperties cdp;
517 #if (RENDER_VULKAN_FSR_ENABLED == 1)
518 if (co.fsr) {
519 const auto& fsrVk = co.fsr->physicalDeviceFragmentShadingRateProperties;
520 cdp.fragmentShadingRateProperties.minFragmentShadingRateAttachmentTexelSize = {
521 fsrVk.minFragmentShadingRateAttachmentTexelSize.width,
522 fsrVk.minFragmentShadingRateAttachmentTexelSize.height
523 };
524 cdp.fragmentShadingRateProperties.maxFragmentShadingRateAttachmentTexelSize = {
525 fsrVk.maxFragmentShadingRateAttachmentTexelSize.width,
526 fsrVk.maxFragmentShadingRateAttachmentTexelSize.height
527 };
528 cdp.fragmentShadingRateProperties.maxFragmentSize = { fsrVk.maxFragmentSize.width,
529 fsrVk.maxFragmentSize.height };
530 }
531 #endif
532 return cdp;
533 }
534
PreparePhysicalDeviceFeaturesForEnabling(const BackendExtraVk* backendExtra, DevicePlatformDataVk& plat)535 void PreparePhysicalDeviceFeaturesForEnabling(const BackendExtraVk* backendExtra, DevicePlatformDataVk& plat)
536 {
537 // enable all by default and then disable few
538 plat.enabledPhysicalDeviceFeatures = plat.physicalDeviceProperties.physicalDeviceFeatures;
539 // prepare feature disable for core engine
540 plat.enabledPhysicalDeviceFeatures.geometryShader = VK_FALSE;
541 plat.enabledPhysicalDeviceFeatures.tessellationShader = VK_FALSE;
542 plat.enabledPhysicalDeviceFeatures.sampleRateShading = VK_FALSE;
543 plat.enabledPhysicalDeviceFeatures.occlusionQueryPrecise = VK_FALSE;
544 plat.enabledPhysicalDeviceFeatures.pipelineStatisticsQuery = VK_FALSE;
545 plat.enabledPhysicalDeviceFeatures.shaderTessellationAndGeometryPointSize = VK_FALSE;
546 plat.enabledPhysicalDeviceFeatures.inheritedQueries = VK_FALSE;
547 if (backendExtra) {
548 // check for support and prepare enabling
549 if (backendExtra->extensions.physicalDeviceFeaturesToEnable) {
550 const size_t valueCount = sizeof(VkPhysicalDeviceFeatures) / sizeof(VkBool32);
551 const array_view<const VkBool32> supported(
552 reinterpret_cast<VkBool32*>(&plat.physicalDeviceProperties.physicalDeviceFeatures), valueCount);
553 VkPhysicalDeviceFeatures* wantedFeatures =
554 (&backendExtra->extensions.physicalDeviceFeaturesToEnable->features);
555 const array_view<const VkBool32> wanted(reinterpret_cast<VkBool32*>(wantedFeatures), valueCount);
556
557 array_view<VkBool32> enabledPhysicalDeviceFeatures(
558 reinterpret_cast<VkBool32*>(&plat.enabledPhysicalDeviceFeatures), valueCount);
559 for (size_t idx = 0; idx < valueCount; ++idx) {
560 if (supported[idx] && wanted[idx]) {
561 enabledPhysicalDeviceFeatures[idx] = VK_TRUE;
562 } else if (wanted[idx]) {
563 PLUGIN_LOG_W(
564 "physical device feature not supported/enabled from idx: %u", static_cast<uint32_t>(idx));
565 }
566 }
567 }
568 }
569 }
570
FillDeviceFormatSupport(VkPhysicalDevice physicalDevice, const Format format)571 FormatProperties FillDeviceFormatSupport(VkPhysicalDevice physicalDevice, const Format format)
572 {
573 VkFormatProperties formatProperties;
574 vkGetPhysicalDeviceFormatProperties(physicalDevice, // physicalDevice
575 (VkFormat)format, // format
576 &formatProperties); // pFormatProperties
577 return FormatProperties {
578 (FormatFeatureFlags)formatProperties.linearTilingFeatures,
579 (FormatFeatureFlags)formatProperties.optimalTilingFeatures,
580 (FormatFeatureFlags)formatProperties.bufferFeatures,
581 GpuProgramUtil::FormatByteSize(format),
582 };
583 }
584
FillFormatSupport(VkPhysicalDevice physicalDevice, vector<FormatProperties>& formats)585 void FillFormatSupport(VkPhysicalDevice physicalDevice, vector<FormatProperties>& formats)
586 {
587 const uint32_t fullSize = DeviceFormatSupportConstants::LINEAR_FORMAT_MAX_COUNT +
588 DeviceFormatSupportConstants::ADDITIONAL_FORMAT_MAX_COUNT;
589 formats.resize(fullSize);
590 for (uint32_t idx = 0; idx < DeviceFormatSupportConstants::LINEAR_FORMAT_MAX_COUNT; ++idx) {
591 formats[idx] = FillDeviceFormatSupport(physicalDevice, static_cast<Format>(idx));
592 }
593 // pre-build additional formats
594 for (uint32_t idx = 0; idx < DeviceFormatSupportConstants::ADDITIONAL_FORMAT_MAX_COUNT; ++idx) {
595 const uint32_t currIdx = idx + DeviceFormatSupportConstants::ADDITIONAL_FORMAT_BASE_IDX;
596 PLUGIN_ASSERT(currIdx < static_cast<uint32_t>(formats.size()));
597 const uint32_t formatIdx = idx + DeviceFormatSupportConstants::ADDITIONAL_FORMAT_START_NUMBER;
598 formats[currIdx] = FillDeviceFormatSupport(physicalDevice, static_cast<Format>(formatIdx));
599 }
600 }
601 } // namespace
602
603 DeviceVk::DeviceVk(RenderContext& renderContext, DeviceCreateInfo const& createInfo) : Device(renderContext, createInfo)
604 {
605 // assume instance and device will be created internally
606 ownInstanceAndDevice_ = true;
607
608 const BackendExtraVk* backendExtra = static_cast<const BackendExtraVk*>(createInfo.backendConfiguration);
609 // update internal state based the optional backend configuration given by the client. the size of queuProperties
610 // will depend on the enableMultiQueue setting.
611 const auto queueProperties = CheckExternalConfig(backendExtra);
612
613 // these check internally ownInstanceAndDevice_ and skip creation if provided by user
614 CreateInstance();
615 CreatePhysicalDevice();
616
617 const auto availableQueues = CreateFunctionsVk::GetAvailableQueues(plat_.physicalDevice, queueProperties);
618
619 // own device creation does a lot of work for figuring out what to create, but for external device
620 // CheckExternalConfig stored the enabled extensions and features, and we just need to check what is available
621 if (ownInstanceAndDevice_) {
622 CreateDevice(backendExtra, availableQueues);
623 } else {
624 commonDeviceExtensions_ = GetEnabledCommonDeviceExtensions(extensions_);
625 platformDeviceExtensions_ = GetEnabledPlatformDeviceExtensions(extensions_);
626 // filling commonDeviceProperties_ isn't done, but at the moment that only contains fragment rate shading
627 // should walk through BackendExtraVk::extensions::physicalDeviceFeaturesToEnable::pNext and see what's
628 // available.
629 }
630
631 CreateDebugFunctions();
632 CreateExtFunctions();
633 CreatePlatformExtFunctions();
634 SortAvailableQueues(availableQueues);
635
636 CheckValidDepthFormats(plat_, platInternal_);
637 FillFormatSupport(plat_.physicalDevice, formatProperties_);
638
639 PLUGIN_ASSERT_MSG(!lowLevelGpuQueues_.graphicsQueues.empty(), "default queue not initialized");
640 if (!lowLevelGpuQueues_.graphicsQueues.empty()) {
641 lowLevelGpuQueues_.defaultQueue = lowLevelGpuQueues_.graphicsQueues[0];
642 } else {
643 PLUGIN_LOG_E("default vulkan queue not initialized");
644 }
645
646 gpuQueueCount_ =
647 static_cast<uint32_t>(lowLevelGpuQueues_.computeQueues.size() + lowLevelGpuQueues_.graphicsQueues.size() +
648 lowLevelGpuQueues_.transferQueues.size());
649
650 const PlatformGpuMemoryAllocator::GpuMemoryAllocatorCreateInfo allocatorCreateInfo =
651 GetAllocatorCreateInfo(backendExtra);
652 platformGpuMemoryAllocator_ = make_unique<PlatformGpuMemoryAllocator>(
653 plat_.instance, plat_.physicalDevice, plat_.device, allocatorCreateInfo);
654
655 if (queueProperties.size() > 1) {
656 PLUGIN_LOG_I("gpu queue count: %u", gpuQueueCount_);
657 }
658
659 SetDeviceStatus(true);
660
661 const GpuResourceManager::CreateInfo grmCreateInfo {
662 GpuResourceManager::GPU_RESOURCE_MANAGER_OPTIMIZE_STAGING_MEMORY,
663 };
664 gpuResourceMgr_ = make_unique<GpuResourceManager>(*this, grmCreateInfo);
665 shaderMgr_ = make_unique<ShaderManager>(*this);
666
667 lowLevelDevice_ = make_unique<LowLevelDeviceVk>(*this);
668 }
669
~DeviceVk()670 DeviceVk::~DeviceVk()
671 {
672 WaitForIdle();
673
674 // must release handles before taking down gpu resource manager.
675 swapchains_.clear();
676
677 gpuResourceMgr_.reset();
678 shaderMgr_.reset();
679
680 platformGpuMemoryAllocator_.reset();
681
682 if (plat_.pipelineCache) {
683 CreateFunctionsVk::DestroyPipelineCache(plat_.device, plat_.pipelineCache);
684 }
685
686 if (ownInstanceAndDevice_) {
687 CreateFunctionsVk::DestroyDevice(plat_.device);
688 CreateFunctionsVk::DestroyDebugMessenger(plat_.instance, debugFunctionUtilities_.debugMessenger);
689 CreateFunctionsVk::DestroyDebugCallback(plat_.instance, debugFunctionUtilities_.debugCallback);
690 CreateFunctionsVk::DestroyInstance(plat_.instance);
691 }
692 }
693
CreateInstance()694 void DeviceVk::CreateInstance()
695 {
696 const auto instanceWrapper = (plat_.instance == VK_NULL_HANDLE) ?
697 CreateFunctionsVk::CreateInstance(VersionInfo { "core_renderer", 0, 1, 0},
698 VersionInfo { "core_renderer_app", 0, 1, 0 }) : CreateFunctionsVk::GetWrapper(plat_.instance);
699
700 plat_.instance = instanceWrapper.instance;
701 // update with physical device creation
702 plat_.deviceApiMajor = instanceWrapper.apiMajor;
703 plat_.deviceApiMinor = instanceWrapper.apiMinor;
704 if (instanceWrapper.debugUtilsSupported) {
705 debugFunctionUtilities_.debugMessenger =
706 CreateFunctionsVk::CreateDebugMessenger(plat_.instance, DebugMessengerCallback);
707 }
708 if (!debugFunctionUtilities_.debugMessenger && instanceWrapper.debugReportSupported) {
709 debugFunctionUtilities_.debugCallback =
710 CreateFunctionsVk::CreateDebugCallback(plat_.instance, DebugReportCallback);
711 }
712
713 extFunctions_.vkAcquireNextImageKHR =
714 (PFN_vkAcquireNextImageKHR)(void*)vkGetInstanceProcAddr(plat_.instance, "vkAcquireNextImageKHR");
715 if ((plat_.deviceApiMajor >= 1) && (plat_.deviceApiMinor >= 1)) {
716 extFunctions_.vkGetPhysicalDeviceFeatures2 =
717 (PFN_vkGetPhysicalDeviceFeatures2)vkGetInstanceProcAddr(plat_.instance, "vkGetPhysicalDeviceFeatures2");
718 extFunctions_.vkGetPhysicalDeviceProperties2 =
719 (PFN_vkGetPhysicalDeviceProperties2)vkGetInstanceProcAddr(plat_.instance, "vkGetPhysicalDeviceProperties2");
720 }
721 }
722
CreatePhysicalDevice()723 void DeviceVk::CreatePhysicalDevice()
724 {
725 auto physicalDeviceWrapper = (plat_.physicalDevice == VK_NULL_HANDLE) ?
726 CreateFunctionsVk::CreatePhysicalDevice(plat_.instance, DEFAULT_QUEUE ) :
727 CreateFunctionsVk::GetWrapper(plat_.physicalDevice);
728 const uint32_t physicalDeviceApiMajor =
729 VK_VERSION_MAJOR(physicalDeviceWrapper.physicalDeviceProperties.physicalDeviceProperties.apiVersion);
730 const uint32_t physicalDeviceApiMinor =
731 VK_VERSION_MINOR(physicalDeviceWrapper.physicalDeviceProperties.physicalDeviceProperties.apiVersion);
732 plat_.deviceApiMajor = Math::min(plat_.deviceApiMajor, physicalDeviceApiMajor);
733 plat_.deviceApiMinor = Math::min(plat_.deviceApiMinor, physicalDeviceApiMinor);
734 PLUGIN_LOG_D("device api version %u.%u", plat_.deviceApiMajor, plat_.deviceApiMinor);
735
736 plat_.physicalDevice = physicalDeviceWrapper.physicalDevice;
737 plat_.physicalDeviceProperties = move(physicalDeviceWrapper.physicalDeviceProperties);
738 plat_.physicalDeviceExtensions = move(physicalDeviceWrapper.physicalDeviceExtensions);
739 const auto& memoryProperties = plat_.physicalDeviceProperties.physicalDeviceMemoryProperties;
740 deviceSharedMemoryPropertyFlags_ =
741 (memoryProperties.memoryTypeCount > 0) ? (MemoryPropertyFlags)memoryProperties.memoryTypes[0].propertyFlags : 0;
742 for (uint32_t idx = 1; idx < memoryProperties.memoryTypeCount; ++idx) {
743 const MemoryPropertyFlags memoryPropertyFlags =
744 (MemoryPropertyFlags)memoryProperties.memoryTypes[idx].propertyFlags;
745 // do not compare lazily allocated or protected memory blocks
746 if ((memoryPropertyFlags & (CORE_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT | CORE_MEMORY_PROPERTY_PROTECTED_BIT)) ==
747 0) {
748 deviceSharedMemoryPropertyFlags_ &= memoryPropertyFlags;
749 }
750 }
751 }
752
CreateDevice(const BackendExtraVk* backendExtra, const vector<LowLevelQueueInfo>& availableQueues)753 void DeviceVk::CreateDevice(const BackendExtraVk* backendExtra, const vector<LowLevelQueueInfo>& availableQueues)
754 {
755 vector<string_view> preferredExtensions = GetPreferredDeviceExtensions(backendExtra, plat_);
756 PreparePhysicalDeviceFeaturesForEnabling(backendExtra, plat_);
757
758 ChainWrapper chainWrapper;
759 ChainObjects chainObjects;
760
761 VkPhysicalDeviceFeatures2* physicalDeviceFeatures2Ptr = nullptr;
762 VkPhysicalDeviceFeatures2 physicalDeviceFeatures2 {
763 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, // sType
764 nullptr, // pNext
765 {}, // features
766 };
767 chainWrapper.ppNextFeatures = &physicalDeviceFeatures2.pNext;
768
769 VkPhysicalDeviceProperties2 physicalDeviceProperties2 {
770 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2, // sType
771 nullptr, // pNext
772 {}, // properties
773 };
774 chainWrapper.ppNextProperties = &physicalDeviceProperties2.pNext;
775
776 GetPhysicalDeviceYcbcrStructs(chainObjects, chainWrapper);
777 #if (RENDER_VULKAN_RT_ENABLED == 1)
778 GetPhysicalDeviceRayTracingStructs(chainObjects, chainWrapper);
779 #endif
780 #if (RENDER_VULKAN_FSR_ENABLED == 1)
781 if (CreateFunctionsVk::HasExtension(plat_.physicalDeviceExtensions, DEVICE_EXTENSION_FRAGMENT_SHADING_RATE)) {
782 GetPhysicalDeviceFragmentShadingRateStructs(chainObjects, chainWrapper);
783 }
784 #endif
785 if (plat_.deviceApiMinor >= 1) { // enable only for 1.1 + for now
786 GetPhysicalDeviceMultiviewFeaturesStructs(chainObjects, chainWrapper);
787 }
788 if (plat_.deviceApiMinor >= 2) { // enable only for 1.2 + for now
789 GetPhysicalDeviceDescriptorIndexingFeaturesStructs(chainObjects, chainWrapper);
790 }
791 if (CreateFunctionsVk::HasExtension(plat_.physicalDeviceExtensions, DEVICE_EXTENSION_MAINTENANCE4)) {
792 GetPhysicalDeviceMaintenance4Structs(chainObjects, chainWrapper);
793 }
794 if ((plat_.deviceApiMajor >= 1) && (plat_.deviceApiMinor >= 1)) {
795 // pipe user extension physical device features
796 if (backendExtra) {
797 if (backendExtra->extensions.physicalDeviceFeaturesToEnable) {
798 *chainWrapper.ppNextFeatures = backendExtra->extensions.physicalDeviceFeaturesToEnable->pNext;
799 }
800 }
801 if (extFunctions_.vkGetPhysicalDeviceFeatures2) {
802 extFunctions_.vkGetPhysicalDeviceFeatures2(plat_.physicalDevice, &physicalDeviceFeatures2);
803 }
804 if (extFunctions_.vkGetPhysicalDeviceProperties2) {
805 extFunctions_.vkGetPhysicalDeviceProperties2(plat_.physicalDevice, &physicalDeviceProperties2);
806 }
807
808 // vkGetPhysicalDeviceFeatures has already filled this and PreparePhysicalDeviceFeaturesForEnabling
809 // disabled/ enabled some features.
810 physicalDeviceFeatures2.features = plat_.enabledPhysicalDeviceFeatures;
811 physicalDeviceFeatures2Ptr = &physicalDeviceFeatures2;
812 }
813 const DeviceWrapper deviceWrapper =
814 CreateFunctionsVk::CreateDevice(plat_.instance, plat_.physicalDevice, plat_.physicalDeviceExtensions,
815 plat_.enabledPhysicalDeviceFeatures, physicalDeviceFeatures2Ptr, availableQueues, preferredExtensions);
816 plat_.device = deviceWrapper.device;
817 for (const auto& ref : deviceWrapper.extensions) {
818 extensions_[ref] = 1u;
819 }
820 commonDeviceExtensions_ = GetEnabledCommonDeviceExtensions(extensions_);
821 platformDeviceExtensions_ = GetEnabledPlatformDeviceExtensions(extensions_);
822 commonDeviceProperties_ = GetCommonDevicePropertiesFunc(chainObjects);
823 }
824
CheckExternalConfig(const BackendExtraVk* backendConfiguration)825 vector<QueueProperties> DeviceVk::CheckExternalConfig(const BackendExtraVk* backendConfiguration)
826 {
827 vector<QueueProperties> queueProperties;
828 queueProperties.push_back(DEFAULT_QUEUE);
829
830 if (!backendConfiguration) {
831 return queueProperties;
832 }
833
834 const auto& extra = *backendConfiguration;
835 if (extra.enableMultiQueue) {
836 queueProperties.push_back(QueueProperties {
837 VK_QUEUE_COMPUTE_BIT, // requiredFlags
838 1, // count
839 1.0f, // priority
840 true, // explicitFlags
841 false, // canPresent
842 });
843 PLUGIN_LOG_I("trying to enable gpu multi-queue, with queue count: %u", (uint32_t)queueProperties.size());
844 }
845
846 if (extra.instance != VK_NULL_HANDLE) {
847 PLUGIN_LOG_D("trying to use application given vulkan instance, device, and physical device");
848 PLUGIN_ASSERT((extra.instance && extra.physicalDevice && extra.device));
849 plat_.instance = extra.instance;
850 plat_.physicalDevice = extra.physicalDevice;
851 plat_.device = extra.device;
852 if (extra.extensions.physicalDeviceFeaturesToEnable) {
853 plat_.enabledPhysicalDeviceFeatures = extra.extensions.physicalDeviceFeaturesToEnable->features;
854 }
855 ownInstanceAndDevice_ = false; // everything given from the application
856
857 const auto myDevice = plat_.physicalDevice;
858 auto& myProperties = plat_.physicalDeviceProperties;
859 vkGetPhysicalDeviceProperties(myDevice, &myProperties.physicalDeviceProperties);
860 vkGetPhysicalDeviceFeatures(myDevice, &myProperties.physicalDeviceFeatures);
861 vkGetPhysicalDeviceMemoryProperties(myDevice, &myProperties.physicalDeviceMemoryProperties);
862
863 for (const auto& extension : extra.extensions.extensionNames) {
864 extensions_[extension] = 1u;
865 }
866 }
867 return queueProperties;
868 }
869
SortAvailableQueues(const vector<LowLevelQueueInfo>& availableQueues)870 void DeviceVk::SortAvailableQueues(const vector<LowLevelQueueInfo>& availableQueues)
871 {
872 for (const auto& ref : availableQueues) {
873 if (ref.queueFlags == VkQueueFlagBits::VK_QUEUE_COMPUTE_BIT) {
874 EmplaceDeviceQueue(plat_.device, ref, lowLevelGpuQueues_.computeQueues);
875 } else if (ref.queueFlags == VkQueueFlagBits::VK_QUEUE_GRAPHICS_BIT) {
876 EmplaceDeviceQueue(plat_.device, ref, lowLevelGpuQueues_.graphicsQueues);
877 } else if (ref.queueFlags == VkQueueFlagBits::VK_QUEUE_TRANSFER_BIT) {
878 EmplaceDeviceQueue(plat_.device, ref, lowLevelGpuQueues_.transferQueues);
879 }
880 }
881 }
882
GetBackendType() const883 DeviceBackendType DeviceVk::GetBackendType() const
884 {
885 return DeviceBackendType::VULKAN;
886 }
887
GetPlatformData() const888 const DevicePlatformData& DeviceVk::GetPlatformData() const
889 {
890 return plat_;
891 }
892
GetPlatformDataVk() const893 const DevicePlatformDataVk& DeviceVk::GetPlatformDataVk() const
894 {
895 return plat_;
896 }
897
GetPlatformInternalDataVk() const898 const DevicePlatformInternalDataVk& DeviceVk::GetPlatformInternalDataVk() const
899 {
900 return platInternal_;
901 }
902
GetLowLevelDevice() const903 ILowLevelDevice& DeviceVk::GetLowLevelDevice() const
904 {
905 return *lowLevelDevice_;
906 }
907
GetFormatProperties(const Format format) const908 FormatProperties DeviceVk::GetFormatProperties(const Format format) const
909 {
910 const uint32_t formatSupportSize = static_cast<uint32_t>(formatProperties_.size());
911 const uint32_t formatIdx = static_cast<uint32_t>(format);
912 if (formatIdx < formatSupportSize) {
913 return formatProperties_[formatIdx];
914 } else if ((formatIdx >= DeviceFormatSupportConstants::ADDITIONAL_FORMAT_START_NUMBER) &&
915 (formatIdx <= DeviceFormatSupportConstants::ADDITIONAL_FORMAT_END_NUMBER)) {
916 const uint32_t currIdx = formatIdx - DeviceFormatSupportConstants::ADDITIONAL_FORMAT_START_NUMBER;
917 PLUGIN_UNUSED(currIdx);
918 PLUGIN_ASSERT(currIdx < formatSupportSize);
919 return formatProperties_[currIdx];
920 }
921 return {};
922 }
923
924 AccelerationStructureBuildSizes DeviceVk::GetAccelerationStructureBuildSizes(
925 const AccelerationStructureBuildGeometryInfo& geometry,
926 BASE_NS::array_view<const AccelerationStructureGeometryTrianglesInfo> triangles,
927 BASE_NS::array_view<const AccelerationStructureGeometryAabbsInfo> aabbs,
928 BASE_NS::array_view<const AccelerationStructureGeometryInstancesInfo> instances) const
929 {
930 #if (RENDER_VULKAN_RT_ENABLED == 1)
931 const VkDevice device = plat_.device;
932
933 const size_t arraySize = triangles.size() + aabbs.size() + instances.size();
934 vector<VkAccelerationStructureGeometryKHR> geometryData(arraySize);
935 vector<uint32_t> maxPrimitiveCounts(arraySize);
936 uint32_t arrayIndex = 0;
937 for (const auto& trianglesRef : triangles) {
938 geometryData[arrayIndex] = VkAccelerationStructureGeometryKHR {
939 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, // sType
940 nullptr, // pNext
941 VkGeometryTypeKHR::VK_GEOMETRY_TYPE_TRIANGLES_KHR, // geometryType
942 {}, // geometry;
943 VkGeometryFlagsKHR(trianglesRef.geometryFlags), // flags
944 };
945 geometryData[arrayIndex].geometry.triangles = VkAccelerationStructureGeometryTrianglesDataKHR {
946 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR, // sType
947 nullptr, // pNext
948 VkFormat(trianglesRef.vertexFormat), // vertexFormat
949 {}, // vertexData
950 VkDeviceSize(trianglesRef.vertexStride), // vertexStride
951 trianglesRef.maxVertex, // maxVertex
952 VkIndexType(trianglesRef.indexType), // indexType
953 {}, // indexData
954 {}, // transformData
955 };
956 maxPrimitiveCounts[arrayIndex] = trianglesRef.indexCount / 3u; // triangles;
957 arrayIndex++;
958 }
959 for (const auto& aabbsRef : aabbs) {
960 geometryData[arrayIndex] = VkAccelerationStructureGeometryKHR {
961 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, // sType
962 nullptr, // pNext
963 VkGeometryTypeKHR::VK_GEOMETRY_TYPE_AABBS_KHR, // geometryType
964 {}, // geometry;
965 0, // flags
966 };
967 geometryData[arrayIndex].geometry.aabbs = VkAccelerationStructureGeometryAabbsDataKHR {
968 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_AABBS_DATA_KHR, // sType
969 nullptr, // pNext
970 {}, // data
971 aabbsRef.stride, // stride
972 };
973 maxPrimitiveCounts[arrayIndex] = 1u;
974 arrayIndex++;
975 }
976 for (const auto& instancesRef : instances) {
977 geometryData[arrayIndex] = VkAccelerationStructureGeometryKHR {
978 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, // sType
979 nullptr, // pNext
980 VkGeometryTypeKHR::VK_GEOMETRY_TYPE_INSTANCES_KHR, // geometryType
981 {}, // geometry;
982 0, // flags
983 };
984 geometryData[arrayIndex].geometry.instances = VkAccelerationStructureGeometryInstancesDataKHR {
985 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR, // sType
986 nullptr, // pNext
987 instancesRef.arrayOfPointers, // arrayOfPointers
988 {}, // data
989 };
990 maxPrimitiveCounts[arrayIndex] = 1u;
991 arrayIndex++;
992 }
993
994 const VkAccelerationStructureBuildGeometryInfoKHR geometryInfoVk {
995 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR, // sType
996 nullptr, // pNext
997 VkAccelerationStructureTypeKHR(geometry.type), // type
998 VkBuildAccelerationStructureFlagsKHR(geometry.flags), // flags
999 VkBuildAccelerationStructureModeKHR(geometry.mode), // mode
1000 VK_NULL_HANDLE, // srcAccelerationStructure
1001 VK_NULL_HANDLE, // dstAccelerationStructure
1002 arrayIndex, // geometryCount
1003 geometryData.data(), // pGeometries
1004 nullptr, // ppGeometries
1005 {}, // scratchData
1006 };
1007
1008 VkAccelerationStructureBuildSizesInfoKHR buildSizesInfo {
1009 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR, // sType
1010 nullptr, // pNext
1011 0, // accelerationStructureSize
1012 0, // updateScratchSize
1013 0, // buildScratchSize
1014 };
1015 if ((arrayIndex > 0) && extFunctions_.vkGetAccelerationStructureBuildSizesKHR) {
1016 extFunctions_.vkGetAccelerationStructureBuildSizesKHR(device, // device
1017 VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, // buildType,
1018 &geometryInfoVk, // pBuildInfo
1019 maxPrimitiveCounts.data(), // pMaxPrimitiveCounts
1020 &buildSizesInfo); // pSizeInfo
1021 }
1022
1023 return AccelerationStructureBuildSizes {
1024 static_cast<uint32_t>(buildSizesInfo.accelerationStructureSize),
1025 static_cast<uint32_t>(buildSizesInfo.updateScratchSize),
1026 static_cast<uint32_t>(buildSizesInfo.buildScratchSize),
1027 };
1028 #else
1029 return AccelerationStructureBuildSizes { 0, 0, 0 };
1030 #endif
1031 }
1032
CreateDeviceSwapchain(const SwapchainCreateInfo& swapchainCreateInfo)1033 unique_ptr<Swapchain> DeviceVk::CreateDeviceSwapchain(const SwapchainCreateInfo& swapchainCreateInfo)
1034 {
1035 return make_unique<SwapchainVk>(*this, swapchainCreateInfo);
1036 }
1037
DestroyDeviceSwapchain()1038 void DeviceVk::DestroyDeviceSwapchain() {}
1039
GetPlatformGpuMemoryAllocator()1040 PlatformGpuMemoryAllocator* DeviceVk::GetPlatformGpuMemoryAllocator()
1041 {
1042 return platformGpuMemoryAllocator_.get();
1043 }
1044
GetValidGpuQueue(const GpuQueue& gpuQueue) const1045 GpuQueue DeviceVk::GetValidGpuQueue(const GpuQueue& gpuQueue) const
1046 {
1047 const auto getSpecificQueue = [](const uint32_t queueIndex, const GpuQueue::QueueType queueType,
1048 const vector<LowLevelGpuQueueVk>& specificQueues, const GpuQueue& defaultQueue) {
1049 const uint32_t queueCount = (uint32_t)specificQueues.size();
1050 if (queueIndex < queueCount) {
1051 return GpuQueue { queueType, queueIndex };
1052 } else if (queueCount > 0) {
1053 return GpuQueue { queueType, 0 };
1054 }
1055 return defaultQueue;
1056 };
1057
1058 GpuQueue defaultQueue { GpuQueue::QueueType::GRAPHICS, 0 };
1059 if (gpuQueue.type == GpuQueue::QueueType::COMPUTE) {
1060 return getSpecificQueue(
1061 gpuQueue.index, GpuQueue::QueueType::COMPUTE, lowLevelGpuQueues_.computeQueues, defaultQueue);
1062 } else if (gpuQueue.type == GpuQueue::QueueType::GRAPHICS) {
1063 return getSpecificQueue(
1064 gpuQueue.index, GpuQueue::QueueType::GRAPHICS, lowLevelGpuQueues_.graphicsQueues, defaultQueue);
1065 } else if (gpuQueue.type == GpuQueue::QueueType::TRANSFER) {
1066 return getSpecificQueue(
1067 gpuQueue.index, GpuQueue::QueueType::TRANSFER, lowLevelGpuQueues_.transferQueues, defaultQueue);
1068 } else {
1069 return defaultQueue;
1070 }
1071 }
1072
GetGpuQueueCount() const1073 uint32_t DeviceVk::GetGpuQueueCount() const
1074 {
1075 return gpuQueueCount_;
1076 }
1077
InitializePipelineCache(array_view<const uint8_t> initialData)1078 void DeviceVk::InitializePipelineCache(array_view<const uint8_t> initialData)
1079 {
1080 if (plat_.pipelineCache) {
1081 CreateFunctionsVk::DestroyPipelineCache(plat_.device, plat_.pipelineCache);
1082 }
1083 struct CacheHeader {
1084 uint32_t bytes;
1085 uint32_t version;
1086 uint32_t vendorId;
1087 uint32_t deviceId;
1088 uint8_t pipelineCacheUUID[VK_UUID_SIZE];
1089 };
1090 if (initialData.size() > sizeof(CacheHeader)) {
1091 CacheHeader header;
1092 CloneData(&header, sizeof(header), initialData.data(), sizeof(header));
1093 const auto& props = plat_.physicalDeviceProperties.physicalDeviceProperties;
1094 if (header.version != VkPipelineCacheHeaderVersion::VK_PIPELINE_CACHE_HEADER_VERSION_ONE ||
1095 header.vendorId != props.vendorID || header.deviceId != props.deviceID ||
1096 memcmp(header.pipelineCacheUUID, props.pipelineCacheUUID, VK_UUID_SIZE)) {
1097 initialData = {};
1098 }
1099 }
1100
1101 plat_.pipelineCache = CreateFunctionsVk::CreatePipelineCache(plat_.device, initialData);
1102 }
1103
GetPipelineCache() const1104 vector<uint8_t> DeviceVk::GetPipelineCache() const
1105 {
1106 vector<uint8_t> deviceData;
1107 if (plat_.pipelineCache) {
1108 size_t dataSize = 0u;
1109 if (auto result = vkGetPipelineCacheData(plat_.device, plat_.pipelineCache, &dataSize, nullptr);
1110 result == VK_SUCCESS && dataSize) {
1111 deviceData.resize(dataSize);
1112 dataSize = deviceData.size();
1113 result = vkGetPipelineCacheData(plat_.device, plat_.pipelineCache, &dataSize, deviceData.data());
1114 if (result == VK_SUCCESS) {
1115 deviceData.resize(dataSize);
1116 } else {
1117 deviceData.clear();
1118 }
1119 }
1120 }
1121 return deviceData;
1122 }
1123
GetGpuQueue(const GpuQueue& gpuQueue) const1124 LowLevelGpuQueueVk DeviceVk::GetGpuQueue(const GpuQueue& gpuQueue) const
1125 {
1126 // 1. tries to return the typed queue with given index
1127 // 2. tries to return the typed queue with an index 0
1128 // 3. returns the default queue
1129 const auto getSpecificQueue = [](const uint32_t queueIndex, const vector<LowLevelGpuQueueVk>& specificQueues,
1130 const LowLevelGpuQueueVk& defaultQueue) {
1131 const uint32_t queueCount = (uint32_t)specificQueues.size();
1132 if (queueIndex < queueCount) {
1133 return specificQueues[queueIndex];
1134 } else if (queueCount > 0) {
1135 return specificQueues[0];
1136 }
1137 return defaultQueue;
1138 };
1139
1140 if (gpuQueue.type == GpuQueue::QueueType::COMPUTE) {
1141 return getSpecificQueue(gpuQueue.index, lowLevelGpuQueues_.computeQueues, lowLevelGpuQueues_.defaultQueue);
1142 } else if (gpuQueue.type == GpuQueue::QueueType::GRAPHICS) {
1143 return getSpecificQueue(gpuQueue.index, lowLevelGpuQueues_.graphicsQueues, lowLevelGpuQueues_.defaultQueue);
1144 } else if (gpuQueue.type == GpuQueue::QueueType::TRANSFER) {
1145 return getSpecificQueue(gpuQueue.index, lowLevelGpuQueues_.transferQueues, lowLevelGpuQueues_.defaultQueue);
1146 } else {
1147 return lowLevelGpuQueues_.defaultQueue;
1148 }
1149 }
1150
GetPresentationGpuQueue() const1151 LowLevelGpuQueueVk DeviceVk::GetPresentationGpuQueue() const
1152 {
1153 // NOTE: expected presentation
1154 return GetGpuQueue(GpuQueue { GpuQueue::QueueType::GRAPHICS, 0 });
1155 }
1156
GetLowLevelGpuQueues() const1157 vector<LowLevelGpuQueueVk> DeviceVk::GetLowLevelGpuQueues() const
1158 {
1159 vector<LowLevelGpuQueueVk> gpuQueues;
1160 gpuQueues.reserve(gpuQueueCount_);
1161 gpuQueues.insert(gpuQueues.end(), lowLevelGpuQueues_.computeQueues.begin(), lowLevelGpuQueues_.computeQueues.end());
1162 gpuQueues.insert(
1163 gpuQueues.end(), lowLevelGpuQueues_.graphicsQueues.begin(), lowLevelGpuQueues_.graphicsQueues.end());
1164 gpuQueues.insert(
1165 gpuQueues.end(), lowLevelGpuQueues_.transferQueues.begin(), lowLevelGpuQueues_.transferQueues.end());
1166 return gpuQueues;
1167 }
1168
WaitForIdle()1169 void DeviceVk::WaitForIdle()
1170 {
1171 if (plat_.device) {
1172 if (!isRenderbackendRunning_) {
1173 PLUGIN_LOG_D("Device - WaitForIdle");
1174 vkDeviceWaitIdle(plat_.device); // device
1175 } else {
1176 PLUGIN_LOG_E("Device WaitForIdle can only called when render backend is not running");
1177 }
1178 }
1179 }
1180
Activate()1181 void DeviceVk::Activate() {}
1182
Deactivate()1183 void DeviceVk::Deactivate() {}
1184
AllowThreadedProcessing() const1185 bool DeviceVk::AllowThreadedProcessing() const
1186 {
1187 return true;
1188 }
1189
GetFeatureConfigurations() const1190 const DeviceVk::FeatureConfigurations& DeviceVk::GetFeatureConfigurations() const
1191 {
1192 return featureConfigurations_;
1193 }
1194
GetCommonDeviceExtensions() const1195 const DeviceVk::CommonDeviceExtensions& DeviceVk::GetCommonDeviceExtensions() const
1196 {
1197 return commonDeviceExtensions_;
1198 }
1199
GetPlatformDeviceExtensions() const1200 const PlatformDeviceExtensions& DeviceVk::GetPlatformDeviceExtensions() const
1201 {
1202 return platformDeviceExtensions_;
1203 }
1204
HasDeviceExtension(const string_view extensionName) const1205 bool DeviceVk::HasDeviceExtension(const string_view extensionName) const
1206 {
1207 return extensions_.contains(extensionName);
1208 }
1209
CreateDeviceVk(RenderContext& renderContext, DeviceCreateInfo const& createInfo)1210 unique_ptr<Device> CreateDeviceVk(RenderContext& renderContext, DeviceCreateInfo const& createInfo)
1211 {
1212 return make_unique<DeviceVk>(renderContext, createInfo);
1213 }
1214
CreateGpuBuffer(const GpuBufferDesc& desc)1215 unique_ptr<GpuBuffer> DeviceVk::CreateGpuBuffer(const GpuBufferDesc& desc)
1216 {
1217 return make_unique<GpuBufferVk>(*this, desc);
1218 }
1219
CreateGpuBuffer(const GpuAccelerationStructureDesc& descAccel)1220 unique_ptr<GpuBuffer> DeviceVk::CreateGpuBuffer(const GpuAccelerationStructureDesc& descAccel)
1221 {
1222 return make_unique<GpuBufferVk>(*this, descAccel);
1223 }
1224
CreateGpuImage(const GpuImageDesc& desc)1225 unique_ptr<GpuImage> DeviceVk::CreateGpuImage(const GpuImageDesc& desc)
1226 {
1227 return make_unique<GpuImageVk>(*this, desc);
1228 }
1229
CreateGpuImageView( const GpuImageDesc& desc, const GpuImagePlatformData& platformData, const uintptr_t hwBuffer)1230 unique_ptr<GpuImage> DeviceVk::CreateGpuImageView(
1231 const GpuImageDesc& desc, const GpuImagePlatformData& platformData, const uintptr_t hwBuffer)
1232 {
1233 return make_unique<GpuImageVk>(*this, desc, platformData, hwBuffer);
1234 }
1235
CreateGpuImageView(const GpuImageDesc& desc, const GpuImagePlatformData& platformData)1236 unique_ptr<GpuImage> DeviceVk::CreateGpuImageView(const GpuImageDesc& desc, const GpuImagePlatformData& platformData)
1237 {
1238 return CreateGpuImageView(desc, platformData, 0);
1239 }
1240
CreateGpuImageViews(const Swapchain& swapchain)1241 vector<unique_ptr<GpuImage>> DeviceVk::CreateGpuImageViews(const Swapchain& swapchain)
1242 {
1243 const GpuImageDesc& desc = swapchain.GetDesc();
1244 const auto& swapchainPlat = static_cast<const SwapchainVk&>(swapchain).GetPlatformData();
1245
1246 vector<unique_ptr<GpuImage>> gpuImages(swapchainPlat.swapchainImages.images.size());
1247 for (size_t idx = 0; idx < gpuImages.size(); ++idx) {
1248 GpuImagePlatformDataVk gpuImagePlat;
1249 gpuImagePlat.image = swapchainPlat.swapchainImages.images[idx];
1250 gpuImagePlat.imageView = swapchainPlat.swapchainImages.imageViews[idx];
1251 gpuImages[idx] = this->CreateGpuImageView(desc, gpuImagePlat);
1252 }
1253 return gpuImages;
1254 }
1255
CreateGpuImageView( const GpuImageDesc& desc, const BackendSpecificImageDesc& platformData)1256 unique_ptr<GpuImage> DeviceVk::CreateGpuImageView(
1257 const GpuImageDesc& desc, const BackendSpecificImageDesc& platformData)
1258 {
1259 const ImageDescVk& imageDesc = (const ImageDescVk&)platformData;
1260 GpuImagePlatformDataVk platData;
1261 platData.image = imageDesc.image;
1262 platData.imageView = imageDesc.imageView;
1263 return CreateGpuImageView(desc, platData, imageDesc.platformHwBuffer);
1264 }
1265
CreateGpuSampler(const GpuSamplerDesc& desc)1266 unique_ptr<GpuSampler> DeviceVk::CreateGpuSampler(const GpuSamplerDesc& desc)
1267 {
1268 return make_unique<GpuSamplerVk>(*this, desc);
1269 }
1270
CreateRenderFrameSync()1271 unique_ptr<RenderFrameSync> DeviceVk::CreateRenderFrameSync()
1272 {
1273 return make_unique<RenderFrameSyncVk>(*this);
1274 }
1275
CreateRenderBackend( GpuResourceManager& gpuResourceMgr, const CORE_NS::IParallelTaskQueue::Ptr& queue)1276 unique_ptr<RenderBackend> DeviceVk::CreateRenderBackend(
1277 GpuResourceManager& gpuResourceMgr, const CORE_NS::IParallelTaskQueue::Ptr& queue)
1278 {
1279 return make_unique<RenderBackendVk>(*this, gpuResourceMgr, queue);
1280 }
1281
CreateShaderModule(const ShaderModuleCreateInfo& data)1282 unique_ptr<ShaderModule> DeviceVk::CreateShaderModule(const ShaderModuleCreateInfo& data)
1283 {
1284 return make_unique<ShaderModuleVk>(*this, data);
1285 }
1286
CreateComputeShaderModule(const ShaderModuleCreateInfo& data)1287 unique_ptr<ShaderModule> DeviceVk::CreateComputeShaderModule(const ShaderModuleCreateInfo& data)
1288 {
1289 return make_unique<ShaderModuleVk>(*this, data);
1290 }
1291
CreateGpuShaderProgram(const GpuShaderProgramCreateData& data)1292 unique_ptr<GpuShaderProgram> DeviceVk::CreateGpuShaderProgram(const GpuShaderProgramCreateData& data)
1293 {
1294 return make_unique<GpuShaderProgramVk>(*this, data);
1295 }
1296
CreateGpuComputeProgram(const GpuComputeProgramCreateData& data)1297 unique_ptr<GpuComputeProgram> DeviceVk::CreateGpuComputeProgram(const GpuComputeProgramCreateData& data)
1298 {
1299 return make_unique<GpuComputeProgramVk>(*this, data);
1300 }
1301
CreateNodeContextDescriptorSetManager()1302 unique_ptr<NodeContextDescriptorSetManager> DeviceVk::CreateNodeContextDescriptorSetManager()
1303 {
1304 return make_unique<NodeContextDescriptorSetManagerVk>(*this);
1305 }
1306
CreateNodeContextPoolManager( GpuResourceManager& gpuResourceMgr, const GpuQueue& gpuQueue)1307 unique_ptr<NodeContextPoolManager> DeviceVk::CreateNodeContextPoolManager(
1308 GpuResourceManager& gpuResourceMgr, const GpuQueue& gpuQueue)
1309 {
1310 return make_unique<NodeContextPoolManagerVk>(*this, gpuResourceMgr, gpuQueue);
1311 }
1312
CreateGraphicsPipelineStateObject(const GpuShaderProgram& gpuProgram, const GraphicsState& graphicsState, const PipelineLayout& pipelineLayout, const VertexInputDeclarationView& vertexInputDeclaration, const ShaderSpecializationConstantDataView& specializationConstants, const array_view<const DynamicStateEnum> dynamicStates, const RenderPassDesc& renderPassDesc, const array_view<const RenderPassSubpassDesc>& renderPassSubpassDescs, const uint32_t subpassIndex, const LowLevelRenderPassData* renderPassData, const LowLevelPipelineLayoutData* pipelineLayoutData)1313 unique_ptr<GraphicsPipelineStateObject> DeviceVk::CreateGraphicsPipelineStateObject(const GpuShaderProgram& gpuProgram,
1314 const GraphicsState& graphicsState, const PipelineLayout& pipelineLayout,
1315 const VertexInputDeclarationView& vertexInputDeclaration,
1316 const ShaderSpecializationConstantDataView& specializationConstants,
1317 const array_view<const DynamicStateEnum> dynamicStates, const RenderPassDesc& renderPassDesc,
1318 const array_view<const RenderPassSubpassDesc>& renderPassSubpassDescs, const uint32_t subpassIndex,
1319 const LowLevelRenderPassData* renderPassData, const LowLevelPipelineLayoutData* pipelineLayoutData)
1320 {
1321 PLUGIN_ASSERT(renderPassData);
1322 PLUGIN_ASSERT(pipelineLayoutData);
1323 return make_unique<GraphicsPipelineStateObjectVk>(*this, gpuProgram, graphicsState, pipelineLayout,
1324 vertexInputDeclaration, specializationConstants, dynamicStates, renderPassDesc, renderPassSubpassDescs,
1325 subpassIndex, *renderPassData, *pipelineLayoutData);
1326 }
1327
CreateComputePipelineStateObject(const GpuComputeProgram& gpuProgram, const PipelineLayout& pipelineLayout, const ShaderSpecializationConstantDataView& specializationConstants, const LowLevelPipelineLayoutData* pipelineLayoutData)1328 unique_ptr<ComputePipelineStateObject> DeviceVk::CreateComputePipelineStateObject(const GpuComputeProgram& gpuProgram,
1329 const PipelineLayout& pipelineLayout, const ShaderSpecializationConstantDataView& specializationConstants,
1330 const LowLevelPipelineLayoutData* pipelineLayoutData)
1331 {
1332 PLUGIN_ASSERT(pipelineLayoutData);
1333 return make_unique<ComputePipelineStateObjectVk>(
1334 *this, gpuProgram, pipelineLayout, specializationConstants, *pipelineLayoutData);
1335 }
1336
CreateGpuSemaphore()1337 unique_ptr<GpuSemaphore> DeviceVk::CreateGpuSemaphore()
1338 {
1339 return make_unique<GpuSemaphoreVk>(*this);
1340 }
1341
CreateGpuSemaphoreView(const uint64_t handle)1342 unique_ptr<GpuSemaphore> DeviceVk::CreateGpuSemaphoreView(const uint64_t handle)
1343 {
1344 return make_unique<GpuSemaphoreVk>(*this, handle);
1345 }
1346
GetDebugFunctionUtilities() const1347 const DebugFunctionUtilitiesVk& DeviceVk::GetDebugFunctionUtilities() const
1348 {
1349 return debugFunctionUtilities_;
1350 }
1351
CreateDebugFunctions()1352 void DeviceVk::CreateDebugFunctions()
1353 {
1354 #if (RENDER_VULKAN_VALIDATION_ENABLED == 1)
1355 debugFunctionUtilities_.vkSetDebugUtilsObjectNameEXT =
1356 (PFN_vkSetDebugUtilsObjectNameEXT)(void*)vkGetDeviceProcAddr(plat_.device, "vkSetDebugUtilsObjectNameEXT");
1357 #endif
1358 #if (RENDER_DEBUG_MARKERS_ENABLED == 1) || (RENDER_DEBUG_COMMAND_MARKERS_ENABLED == 1)
1359 debugFunctionUtilities_.vkCmdBeginDebugUtilsLabelEXT =
1360 (PFN_vkCmdBeginDebugUtilsLabelEXT)vkGetInstanceProcAddr(plat_.instance, "vkCmdBeginDebugUtilsLabelEXT");
1361 debugFunctionUtilities_.vkCmdEndDebugUtilsLabelEXT =
1362 (PFN_vkCmdEndDebugUtilsLabelEXT)vkGetInstanceProcAddr(plat_.instance, "vkCmdEndDebugUtilsLabelEXT");
1363 #endif
1364 }
1365
GetExtFunctions() const1366 const DeviceVk::ExtFunctions& DeviceVk::GetExtFunctions() const
1367 {
1368 return extFunctions_;
1369 }
1370
GetPlatformExtFunctions() const1371 const PlatformExtFunctions& DeviceVk::GetPlatformExtFunctions() const
1372 {
1373 return platformExtFunctions_;
1374 }
1375
CreateExtFunctions()1376 void DeviceVk::CreateExtFunctions()
1377 {
1378 if (commonDeviceExtensions_.renderPass2) {
1379 extFunctions_.vkCreateRenderPass2KHR =
1380 (PFN_vkCreateRenderPass2KHR)(void*)vkGetInstanceProcAddr(plat_.instance, "vkCreateRenderPass2KHR");
1381 if (!extFunctions_.vkCreateRenderPass2KHR) {
1382 commonDeviceExtensions_.renderPass2 = false;
1383 PLUGIN_LOG_E("vkGetInstanceProcAddr failed for vkCreateRenderPass2KHR");
1384 }
1385 }
1386 if (commonDeviceExtensions_.getMemoryRequirements2) {
1387 extFunctions_.vkGetImageMemoryRequirements2 = (PFN_vkGetImageMemoryRequirements2)vkGetInstanceProcAddr(
1388 plat_.instance, "vkGetImageMemoryRequirements2KHR");
1389 if (!extFunctions_.vkGetImageMemoryRequirements2) {
1390 PLUGIN_LOG_E("vkGetInstanceProcAddr failed for vkGetImageMemoryRequirements2");
1391 }
1392 }
1393 if (commonDeviceExtensions_.samplerYcbcrConversion) {
1394 GetYcbcrExtFunctions(plat_.instance, extFunctions_);
1395 }
1396 #if (RENDER_VULKAN_FSR_ENABLED == 1)
1397 if (commonDeviceExtensions_.fragmentShadingRate) {
1398 extFunctions_.vkCmdSetFragmentShadingRateKHR =
1399 (PFN_vkCmdSetFragmentShadingRateKHR)vkGetInstanceProcAddr(plat_.instance, "vkCmdSetFragmentShadingRateKHR");
1400 }
1401 #endif
1402
1403 #if (RENDER_VULKAN_RT_ENABLED == 1)
1404 extFunctions_.vkGetAccelerationStructureBuildSizesKHR =
1405 (PFN_vkGetAccelerationStructureBuildSizesKHR)vkGetInstanceProcAddr(
1406 plat_.instance, "vkGetAccelerationStructureBuildSizesKHR");
1407 if (!extFunctions_.vkGetAccelerationStructureBuildSizesKHR) {
1408 PLUGIN_LOG_E("vkGetInstanceProcAddr failed for vkGetAccelerationStructureBuildSizesKHR");
1409 }
1410 extFunctions_.vkCmdBuildAccelerationStructuresKHR = (PFN_vkCmdBuildAccelerationStructuresKHR)vkGetInstanceProcAddr(
1411 plat_.instance, "vkCmdBuildAccelerationStructuresKHR");
1412 if (!extFunctions_.vkCmdBuildAccelerationStructuresKHR) {
1413 PLUGIN_LOG_E("vkGetInstanceProcAddr failed for vkCmdBuildAccelerationStructuresKHR");
1414 }
1415 extFunctions_.vkCreateAccelerationStructureKHR =
1416 (PFN_vkCreateAccelerationStructureKHR)vkGetInstanceProcAddr(plat_.instance, "vkCreateAccelerationStructureKHR");
1417 if (!extFunctions_.vkCreateAccelerationStructureKHR) {
1418 PLUGIN_LOG_E("vkGetInstanceProcAddr failed for vkCreateAccelerationStructureKHR");
1419 }
1420 extFunctions_.vkDestroyAccelerationStructureKHR = (PFN_vkDestroyAccelerationStructureKHR)vkGetInstanceProcAddr(
1421 plat_.instance, "vkDestroyAccelerationStructureKHR");
1422 if (!extFunctions_.vkDestroyAccelerationStructureKHR) {
1423 PLUGIN_LOG_E("vkGetInstanceProcAddr failed for vkDestroyAccelerationStructureKHR");
1424 }
1425 extFunctions_.vkGetAccelerationStructureDeviceAddressKHR =
1426 (PFN_vkGetAccelerationStructureDeviceAddressKHR)vkGetInstanceProcAddr(
1427 plat_.instance, "vkGetAccelerationStructureDeviceAddressKHR");
1428 if (!extFunctions_.vkGetAccelerationStructureDeviceAddressKHR) {
1429 PLUGIN_LOG_E("vkGetInstanceProcAddr failed for vkGetAccelerationStructureDeviceAddressKHR");
1430 }
1431 #endif
1432 }
1433
LowLevelDeviceVk(DeviceVk& deviceVk)1434 LowLevelDeviceVk::LowLevelDeviceVk(DeviceVk& deviceVk)
1435 : deviceVk_(deviceVk), gpuResourceMgr_(static_cast<GpuResourceManager&>(deviceVk_.GetGpuResourceManager()))
1436 {}
1437
GetBackendType() const1438 DeviceBackendType LowLevelDeviceVk::GetBackendType() const
1439 {
1440 return DeviceBackendType::VULKAN;
1441 }
1442
GetPlatformDataVk() const1443 const DevicePlatformDataVk& LowLevelDeviceVk::GetPlatformDataVk() const
1444 {
1445 return deviceVk_.GetPlatformDataVk();
1446 }
1447
GetBuffer(RenderHandle handle) const1448 GpuBufferPlatformDataVk LowLevelDeviceVk::GetBuffer(RenderHandle handle) const
1449 {
1450 if (deviceVk_.GetLockResourceBackendAccess()) {
1451 GpuBufferVk* buffer = gpuResourceMgr_.GetBuffer<GpuBufferVk>(handle);
1452 if (buffer) {
1453 return buffer->GetPlatformData();
1454 }
1455 } else {
1456 PLUGIN_LOG_E("low level device methods can only be used within specific methods");
1457 }
1458 return {};
1459 }
1460
GetImage(RenderHandle handle) const1461 GpuImagePlatformDataVk LowLevelDeviceVk::GetImage(RenderHandle handle) const
1462 {
1463 if (deviceVk_.GetLockResourceBackendAccess()) {
1464 GpuImageVk* image = gpuResourceMgr_.GetImage<GpuImageVk>(handle);
1465 if (image) {
1466 return image->GetPlatformData();
1467 }
1468 } else {
1469 PLUGIN_LOG_E("low level device methods can only be used within specific methods");
1470 }
1471 return {};
1472 }
1473
GetSampler(RenderHandle handle) const1474 GpuSamplerPlatformDataVk LowLevelDeviceVk::GetSampler(RenderHandle handle) const
1475 {
1476 if (deviceVk_.GetLockResourceBackendAccess()) {
1477 GpuSamplerVk* sampler = gpuResourceMgr_.GetSampler<GpuSamplerVk>(handle);
1478 if (sampler) {
1479 return sampler->GetPlatformData();
1480 }
1481 } else {
1482 PLUGIN_LOG_E("low level device methods can be only used within specific methods");
1483 }
1484 return {};
1485 }
1486 RENDER_END_NAMESPACE()
1487