1/* 2 * Copyright © Microsoft Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "dzn_private.h" 25#include "dzn_abi_helper.h" 26 27#include "vk_alloc.h" 28#include "vk_debug_report.h" 29#include "vk_util.h" 30 31#include "os_time.h" 32 33static D3D12_QUERY_HEAP_TYPE 34dzn_query_pool_get_heap_type(VkQueryType in) 35{ 36 switch (in) { 37 case VK_QUERY_TYPE_OCCLUSION: return D3D12_QUERY_HEAP_TYPE_OCCLUSION; 38 case VK_QUERY_TYPE_PIPELINE_STATISTICS: return D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS; 39 case VK_QUERY_TYPE_TIMESTAMP: return D3D12_QUERY_HEAP_TYPE_TIMESTAMP; 40 default: unreachable("Unsupported query type"); 41 } 42} 43 44D3D12_QUERY_TYPE 45dzn_query_pool_get_query_type(const struct dzn_query_pool *qpool, 46 VkQueryControlFlags flags) 47{ 48 switch (qpool->heap_type) { 49 case D3D12_QUERY_HEAP_TYPE_OCCLUSION: 50 return flags & VK_QUERY_CONTROL_PRECISE_BIT ? 51 D3D12_QUERY_TYPE_OCCLUSION : D3D12_QUERY_TYPE_BINARY_OCCLUSION; 52 case D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS: return D3D12_QUERY_TYPE_PIPELINE_STATISTICS; 53 case D3D12_QUERY_HEAP_TYPE_TIMESTAMP: return D3D12_QUERY_TYPE_TIMESTAMP; 54 default: unreachable("Unsupported query type"); 55 } 56} 57 58static void 59dzn_query_pool_destroy(struct dzn_query_pool *qpool, 60 const VkAllocationCallbacks *alloc) 61{ 62 if (!qpool) 63 return; 64 65 struct dzn_device *device = container_of(qpool->base.device, struct dzn_device, vk); 66 67 if (qpool->collect_map) 68 ID3D12Resource_Unmap(qpool->collect_buffer, 0, NULL); 69 70 if (qpool->collect_buffer) 71 ID3D12Resource_Release(qpool->collect_buffer); 72 73 if (qpool->resolve_buffer) 74 ID3D12Resource_Release(qpool->resolve_buffer); 75 76 if (qpool->heap) 77 ID3D12QueryHeap_Release(qpool->heap); 78 79 for (uint32_t q = 0; q < qpool->query_count; q++) { 80 if (qpool->queries[q].fence) 81 ID3D12Fence_Release(qpool->queries[q].fence); 82 } 83 84 mtx_destroy(&qpool->queries_lock); 85 vk_object_base_finish(&qpool->base); 86 vk_free2(&device->vk.alloc, alloc, qpool); 87} 88 89static VkResult 90dzn_query_pool_create(struct dzn_device *device, 91 const VkQueryPoolCreateInfo *info, 92 const VkAllocationCallbacks *alloc, 93 VkQueryPool *out) 94{ 95 VK_MULTIALLOC(ma); 96 VK_MULTIALLOC_DECL(&ma, struct dzn_query_pool, qpool, 1); 97 VK_MULTIALLOC_DECL(&ma, struct dzn_query, queries, info->queryCount); 98 99 if (!vk_multialloc_zalloc2(&ma, &device->vk.alloc, alloc, 100 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT)) 101 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 102 103 vk_object_base_init(&device->vk, &qpool->base, VK_OBJECT_TYPE_QUERY_POOL); 104 105 mtx_init(&qpool->queries_lock, mtx_plain); 106 qpool->query_count = info->queryCount; 107 qpool->queries = queries; 108 109 D3D12_QUERY_HEAP_DESC desc = { 0 }; 110 qpool->heap_type = desc.Type = dzn_query_pool_get_heap_type(info->queryType); 111 desc.Count = info->queryCount; 112 desc.NodeMask = 0; 113 114 HRESULT hres = 115 ID3D12Device1_CreateQueryHeap(device->dev, &desc, 116 &IID_ID3D12QueryHeap, 117 (void **)&qpool->heap); 118 if (FAILED(hres)) { 119 dzn_query_pool_destroy(qpool, alloc); 120 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); 121 } 122 123 switch (info->queryType) { 124 case VK_QUERY_TYPE_OCCLUSION: 125 case VK_QUERY_TYPE_TIMESTAMP: 126 qpool->query_size = sizeof(uint64_t); 127 break; 128 case VK_QUERY_TYPE_PIPELINE_STATISTICS: 129 qpool->pipeline_statistics = info->pipelineStatistics; 130 qpool->query_size = sizeof(D3D12_QUERY_DATA_PIPELINE_STATISTICS); 131 break; 132 default: unreachable("Unsupported query type"); 133 } 134 135 D3D12_HEAP_PROPERTIES hprops = 136 dzn_ID3D12Device2_GetCustomHeapProperties(device->dev, 0, D3D12_HEAP_TYPE_DEFAULT); 137 D3D12_RESOURCE_DESC rdesc = { 138 .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER, 139 .Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT, 140 .Width = info->queryCount * qpool->query_size, 141 .Height = 1, 142 .DepthOrArraySize = 1, 143 .MipLevels = 1, 144 .Format = DXGI_FORMAT_UNKNOWN, 145 .SampleDesc = { .Count = 1, .Quality = 0 }, 146 .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR, 147 .Flags = D3D12_RESOURCE_FLAG_NONE, 148 }; 149 150 hres = ID3D12Device1_CreateCommittedResource(device->dev, &hprops, 151 D3D12_HEAP_FLAG_NONE, 152 &rdesc, 153 D3D12_RESOURCE_STATE_COPY_DEST, 154 NULL, 155 &IID_ID3D12Resource, 156 (void **)&qpool->resolve_buffer); 157 if (FAILED(hres)) { 158 dzn_query_pool_destroy(qpool, alloc); 159 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); 160 } 161 162 hprops = dzn_ID3D12Device2_GetCustomHeapProperties(device->dev, 0, 163 D3D12_HEAP_TYPE_READBACK); 164 rdesc.Width = info->queryCount * (qpool->query_size + sizeof(uint64_t)); 165 hres = ID3D12Device1_CreateCommittedResource(device->dev, &hprops, 166 D3D12_HEAP_FLAG_NONE, 167 &rdesc, 168 D3D12_RESOURCE_STATE_COPY_DEST, 169 NULL, 170 &IID_ID3D12Resource, 171 (void **)&qpool->collect_buffer); 172 if (FAILED(hres)) { 173 dzn_query_pool_destroy(qpool, alloc); 174 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); 175 } 176 177 hres = ID3D12Resource_Map(qpool->collect_buffer, 0, NULL, (void **)&qpool->collect_map); 178 if (FAILED(hres)) { 179 dzn_query_pool_destroy(qpool, alloc); 180 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 181 } 182 183 memset(qpool->collect_map, 0, rdesc.Width); 184 185 *out = dzn_query_pool_to_handle(qpool); 186 return VK_SUCCESS; 187} 188 189uint32_t 190dzn_query_pool_get_result_offset(const struct dzn_query_pool *qpool, uint32_t query) 191{ 192 return query * qpool->query_size; 193} 194 195uint32_t 196dzn_query_pool_get_result_size(const struct dzn_query_pool *qpool, uint32_t query_count) 197{ 198 return query_count * qpool->query_size; 199} 200 201uint32_t 202dzn_query_pool_get_availability_offset(const struct dzn_query_pool *qpool, uint32_t query) 203{ 204 return (qpool->query_count * qpool->query_size) + (sizeof(uint64_t) * query); 205} 206 207VKAPI_ATTR VkResult VKAPI_CALL 208dzn_CreateQueryPool(VkDevice device, 209 const VkQueryPoolCreateInfo *pCreateInfo, 210 const VkAllocationCallbacks *pAllocator, 211 VkQueryPool *pQueryPool) 212{ 213 return dzn_query_pool_create(dzn_device_from_handle(device), 214 pCreateInfo, pAllocator, pQueryPool); 215} 216 217VKAPI_ATTR void VKAPI_CALL 218dzn_DestroyQueryPool(VkDevice device, 219 VkQueryPool queryPool, 220 const VkAllocationCallbacks *pAllocator) 221{ 222 dzn_query_pool_destroy(dzn_query_pool_from_handle(queryPool), pAllocator); 223} 224 225VKAPI_ATTR void VKAPI_CALL 226dzn_ResetQueryPool(VkDevice device, 227 VkQueryPool queryPool, 228 uint32_t firstQuery, 229 uint32_t queryCount) 230{ 231 VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool); 232 233 mtx_lock(&qpool->queries_lock); 234 for (uint32_t q = 0; q < queryCount; q++) { 235 struct dzn_query *query = &qpool->queries[firstQuery + q]; 236 237 query->fence_value = 0; 238 if (query->fence) { 239 ID3D12Fence_Release(query->fence); 240 query->fence = NULL; 241 } 242 } 243 mtx_lock(&qpool->queries_lock); 244 245 memset((uint8_t *)qpool->collect_map + dzn_query_pool_get_result_offset(qpool, firstQuery), 246 0, queryCount * qpool->query_size); 247 memset((uint8_t *)qpool->collect_map + dzn_query_pool_get_availability_offset(qpool, firstQuery), 248 0, queryCount * sizeof(uint64_t)); 249} 250 251VKAPI_ATTR VkResult VKAPI_CALL 252dzn_GetQueryPoolResults(VkDevice device, 253 VkQueryPool queryPool, 254 uint32_t firstQuery, 255 uint32_t queryCount, 256 size_t dataSize, 257 void *pData, 258 VkDeviceSize stride, 259 VkQueryResultFlags flags) 260{ 261 VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool); 262 263 uint32_t step = (flags & VK_QUERY_RESULT_64_BIT) ? 264 sizeof(uint64_t) : sizeof(uint32_t); 265 VkResult result = VK_SUCCESS; 266 267 for (uint32_t q = 0; q < queryCount; q++) { 268 struct dzn_query *query = &qpool->queries[q + firstQuery]; 269 270 uint8_t *dst_ptr = (uint8_t *)pData + (stride * q); 271 uint8_t *src_ptr = 272 (uint8_t *)qpool->collect_map + 273 dzn_query_pool_get_result_offset(qpool, firstQuery + q); 274 uint64_t available = 0; 275 276 if (flags & VK_QUERY_RESULT_WAIT_BIT) { 277 ID3D12Fence *query_fence = NULL; 278 uint64_t query_fence_val = 0; 279 280 while (true) { 281 mtx_lock(&qpool->queries_lock); 282 if (query->fence) { 283 query_fence = query->fence; 284 ID3D12Fence_AddRef(query_fence); 285 } 286 query_fence_val = query->fence_value; 287 mtx_unlock(&qpool->queries_lock); 288 289 if (query_fence) 290 break; 291 292 /* Check again in 10ms. 293 * FIXME: decrease the polling period if it happens to hurt latency. 294 */ 295 os_time_sleep(10 * 1000); 296 } 297 298 ID3D12Fence_SetEventOnCompletion(query_fence, query_fence_val, NULL); 299 ID3D12Fence_Release(query_fence); 300 available = UINT64_MAX; 301 } else { 302 ID3D12Fence *query_fence = NULL; 303 mtx_lock(&qpool->queries_lock); 304 if (query->fence) { 305 query_fence = query->fence; 306 ID3D12Fence_AddRef(query_fence); 307 } 308 uint64_t query_fence_val = query->fence_value; 309 mtx_unlock(&qpool->queries_lock); 310 311 if (query_fence) { 312 if (ID3D12Fence_GetCompletedValue(query_fence) >= query_fence_val) 313 available = UINT64_MAX; 314 ID3D12Fence_Release(query_fence); 315 } 316 } 317 318 if (qpool->heap_type != D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS) { 319 if (available) 320 memcpy(dst_ptr, src_ptr, step); 321 else if (flags & VK_QUERY_RESULT_PARTIAL_BIT) 322 memset(dst_ptr, 0, step); 323 324 dst_ptr += step; 325 } else { 326 for (uint32_t c = 0; c < sizeof(D3D12_QUERY_DATA_PIPELINE_STATISTICS) / sizeof(uint64_t); c++) { 327 if (!(BITFIELD_BIT(c) & qpool->pipeline_statistics)) 328 continue; 329 330 if (available) 331 memcpy(dst_ptr, src_ptr + (c * sizeof(uint64_t)), step); 332 else if (flags & VK_QUERY_RESULT_PARTIAL_BIT) 333 memset(dst_ptr, 0, step); 334 335 dst_ptr += step; 336 } 337 } 338 339 if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) 340 memcpy(dst_ptr, &available, step); 341 342 if (!available && !(flags & VK_QUERY_RESULT_PARTIAL_BIT)) 343 result = VK_NOT_READY; 344 } 345 346 return result; 347} 348