1/*
2 * Copyright © Microsoft Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "dzn_private.h"
25#include "dzn_abi_helper.h"
26
27#include "vk_alloc.h"
28#include "vk_debug_report.h"
29#include "vk_util.h"
30
31#include "os_time.h"
32
33static D3D12_QUERY_HEAP_TYPE
34dzn_query_pool_get_heap_type(VkQueryType in)
35{
36   switch (in) {
37   case VK_QUERY_TYPE_OCCLUSION: return D3D12_QUERY_HEAP_TYPE_OCCLUSION;
38   case VK_QUERY_TYPE_PIPELINE_STATISTICS: return D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS;
39   case VK_QUERY_TYPE_TIMESTAMP: return D3D12_QUERY_HEAP_TYPE_TIMESTAMP;
40   default: unreachable("Unsupported query type");
41   }
42}
43
44D3D12_QUERY_TYPE
45dzn_query_pool_get_query_type(const struct dzn_query_pool *qpool,
46                              VkQueryControlFlags flags)
47{
48   switch (qpool->heap_type) {
49   case D3D12_QUERY_HEAP_TYPE_OCCLUSION:
50      return flags & VK_QUERY_CONTROL_PRECISE_BIT ?
51             D3D12_QUERY_TYPE_OCCLUSION : D3D12_QUERY_TYPE_BINARY_OCCLUSION;
52   case D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS: return D3D12_QUERY_TYPE_PIPELINE_STATISTICS;
53   case D3D12_QUERY_HEAP_TYPE_TIMESTAMP: return D3D12_QUERY_TYPE_TIMESTAMP;
54   default: unreachable("Unsupported query type");
55   }
56}
57
58static void
59dzn_query_pool_destroy(struct dzn_query_pool *qpool,
60                       const VkAllocationCallbacks *alloc)
61{
62   if (!qpool)
63      return;
64
65   struct dzn_device *device = container_of(qpool->base.device, struct dzn_device, vk);
66
67   if (qpool->collect_map)
68      ID3D12Resource_Unmap(qpool->collect_buffer, 0, NULL);
69
70   if (qpool->collect_buffer)
71      ID3D12Resource_Release(qpool->collect_buffer);
72
73   if (qpool->resolve_buffer)
74      ID3D12Resource_Release(qpool->resolve_buffer);
75
76   if (qpool->heap)
77      ID3D12QueryHeap_Release(qpool->heap);
78
79   for (uint32_t q = 0; q < qpool->query_count; q++) {
80      if (qpool->queries[q].fence)
81         ID3D12Fence_Release(qpool->queries[q].fence);
82   }
83
84   mtx_destroy(&qpool->queries_lock);
85   vk_object_base_finish(&qpool->base);
86   vk_free2(&device->vk.alloc, alloc, qpool);
87}
88
89static VkResult
90dzn_query_pool_create(struct dzn_device *device,
91                      const VkQueryPoolCreateInfo *info,
92                      const VkAllocationCallbacks *alloc,
93                      VkQueryPool *out)
94{
95   VK_MULTIALLOC(ma);
96   VK_MULTIALLOC_DECL(&ma, struct dzn_query_pool, qpool, 1);
97   VK_MULTIALLOC_DECL(&ma, struct dzn_query, queries, info->queryCount);
98
99   if (!vk_multialloc_zalloc2(&ma, &device->vk.alloc, alloc,
100                              VK_SYSTEM_ALLOCATION_SCOPE_OBJECT))
101      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
102
103   vk_object_base_init(&device->vk, &qpool->base, VK_OBJECT_TYPE_QUERY_POOL);
104
105   mtx_init(&qpool->queries_lock, mtx_plain);
106   qpool->query_count = info->queryCount;
107   qpool->queries = queries;
108
109   D3D12_QUERY_HEAP_DESC desc = { 0 };
110   qpool->heap_type = desc.Type = dzn_query_pool_get_heap_type(info->queryType);
111   desc.Count = info->queryCount;
112   desc.NodeMask = 0;
113
114   HRESULT hres =
115      ID3D12Device1_CreateQueryHeap(device->dev, &desc,
116                                    &IID_ID3D12QueryHeap,
117                                    (void **)&qpool->heap);
118   if (FAILED(hres)) {
119      dzn_query_pool_destroy(qpool, alloc);
120      return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
121   }
122
123   switch (info->queryType) {
124   case VK_QUERY_TYPE_OCCLUSION:
125   case VK_QUERY_TYPE_TIMESTAMP:
126      qpool->query_size = sizeof(uint64_t);
127      break;
128   case VK_QUERY_TYPE_PIPELINE_STATISTICS:
129      qpool->pipeline_statistics = info->pipelineStatistics;
130      qpool->query_size = sizeof(D3D12_QUERY_DATA_PIPELINE_STATISTICS);
131      break;
132   default: unreachable("Unsupported query type");
133   }
134
135   D3D12_HEAP_PROPERTIES hprops =
136      dzn_ID3D12Device2_GetCustomHeapProperties(device->dev, 0, D3D12_HEAP_TYPE_DEFAULT);
137   D3D12_RESOURCE_DESC rdesc = {
138      .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER,
139      .Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT,
140      .Width = info->queryCount * qpool->query_size,
141      .Height = 1,
142      .DepthOrArraySize = 1,
143      .MipLevels = 1,
144      .Format = DXGI_FORMAT_UNKNOWN,
145      .SampleDesc = { .Count = 1, .Quality = 0 },
146      .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
147      .Flags = D3D12_RESOURCE_FLAG_NONE,
148   };
149
150   hres = ID3D12Device1_CreateCommittedResource(device->dev, &hprops,
151                                                D3D12_HEAP_FLAG_NONE,
152                                                &rdesc,
153                                                D3D12_RESOURCE_STATE_COPY_DEST,
154                                                NULL,
155                                                &IID_ID3D12Resource,
156                                                (void **)&qpool->resolve_buffer);
157   if (FAILED(hres)) {
158      dzn_query_pool_destroy(qpool, alloc);
159      return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
160   }
161
162   hprops = dzn_ID3D12Device2_GetCustomHeapProperties(device->dev, 0,
163                                                      D3D12_HEAP_TYPE_READBACK);
164   rdesc.Width = info->queryCount * (qpool->query_size + sizeof(uint64_t));
165   hres = ID3D12Device1_CreateCommittedResource(device->dev, &hprops,
166                                                D3D12_HEAP_FLAG_NONE,
167                                                &rdesc,
168                                                D3D12_RESOURCE_STATE_COPY_DEST,
169                                                NULL,
170                                                &IID_ID3D12Resource,
171                                                (void **)&qpool->collect_buffer);
172   if (FAILED(hres)) {
173      dzn_query_pool_destroy(qpool, alloc);
174      return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
175   }
176
177   hres = ID3D12Resource_Map(qpool->collect_buffer, 0, NULL, (void **)&qpool->collect_map);
178   if (FAILED(hres)) {
179      dzn_query_pool_destroy(qpool, alloc);
180      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
181   }
182
183   memset(qpool->collect_map, 0, rdesc.Width);
184
185   *out = dzn_query_pool_to_handle(qpool);
186   return VK_SUCCESS;
187}
188
189uint32_t
190dzn_query_pool_get_result_offset(const struct dzn_query_pool *qpool, uint32_t query)
191{
192   return query * qpool->query_size;
193}
194
195uint32_t
196dzn_query_pool_get_result_size(const struct dzn_query_pool *qpool, uint32_t query_count)
197{
198   return query_count * qpool->query_size;
199}
200
201uint32_t
202dzn_query_pool_get_availability_offset(const struct dzn_query_pool *qpool, uint32_t query)
203{
204   return (qpool->query_count * qpool->query_size) + (sizeof(uint64_t) * query);
205}
206
207VKAPI_ATTR VkResult VKAPI_CALL
208dzn_CreateQueryPool(VkDevice device,
209                    const VkQueryPoolCreateInfo *pCreateInfo,
210                    const VkAllocationCallbacks *pAllocator,
211                    VkQueryPool *pQueryPool)
212{
213   return dzn_query_pool_create(dzn_device_from_handle(device),
214                                pCreateInfo, pAllocator, pQueryPool);
215}
216
217VKAPI_ATTR void VKAPI_CALL
218dzn_DestroyQueryPool(VkDevice device,
219                     VkQueryPool queryPool,
220                     const VkAllocationCallbacks *pAllocator)
221{
222   dzn_query_pool_destroy(dzn_query_pool_from_handle(queryPool), pAllocator);
223}
224
225VKAPI_ATTR void VKAPI_CALL
226dzn_ResetQueryPool(VkDevice device,
227                   VkQueryPool queryPool,
228                   uint32_t firstQuery,
229                   uint32_t queryCount)
230{
231   VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool);
232
233   mtx_lock(&qpool->queries_lock);
234   for (uint32_t q = 0; q < queryCount; q++) {
235      struct dzn_query *query = &qpool->queries[firstQuery + q];
236
237      query->fence_value = 0;
238      if (query->fence) {
239         ID3D12Fence_Release(query->fence);
240         query->fence = NULL;
241      }
242   }
243   mtx_lock(&qpool->queries_lock);
244
245   memset((uint8_t *)qpool->collect_map + dzn_query_pool_get_result_offset(qpool, firstQuery),
246          0, queryCount * qpool->query_size);
247   memset((uint8_t *)qpool->collect_map + dzn_query_pool_get_availability_offset(qpool, firstQuery),
248          0, queryCount * sizeof(uint64_t));
249}
250
251VKAPI_ATTR VkResult VKAPI_CALL
252dzn_GetQueryPoolResults(VkDevice device,
253                        VkQueryPool queryPool,
254                        uint32_t firstQuery,
255                        uint32_t queryCount,
256                        size_t dataSize,
257                        void *pData,
258                        VkDeviceSize stride,
259                        VkQueryResultFlags flags)
260{
261   VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool);
262
263   uint32_t step = (flags & VK_QUERY_RESULT_64_BIT) ?
264                   sizeof(uint64_t) : sizeof(uint32_t);
265   VkResult result = VK_SUCCESS;
266
267   for (uint32_t q = 0; q < queryCount; q++) {
268      struct dzn_query *query = &qpool->queries[q + firstQuery];
269
270      uint8_t *dst_ptr = (uint8_t *)pData + (stride * q);
271      uint8_t *src_ptr =
272         (uint8_t *)qpool->collect_map +
273         dzn_query_pool_get_result_offset(qpool, firstQuery + q);
274      uint64_t available = 0;
275
276      if (flags & VK_QUERY_RESULT_WAIT_BIT) {
277         ID3D12Fence *query_fence = NULL;
278         uint64_t query_fence_val = 0;
279
280         while (true) {
281            mtx_lock(&qpool->queries_lock);
282            if (query->fence) {
283               query_fence = query->fence;
284               ID3D12Fence_AddRef(query_fence);
285            }
286            query_fence_val = query->fence_value;
287            mtx_unlock(&qpool->queries_lock);
288
289            if (query_fence)
290               break;
291
292            /* Check again in 10ms.
293             * FIXME: decrease the polling period if it happens to hurt latency.
294             */
295            os_time_sleep(10 * 1000);
296         }
297
298         ID3D12Fence_SetEventOnCompletion(query_fence, query_fence_val, NULL);
299         ID3D12Fence_Release(query_fence);
300         available = UINT64_MAX;
301      } else {
302         ID3D12Fence *query_fence = NULL;
303         mtx_lock(&qpool->queries_lock);
304         if (query->fence) {
305            query_fence = query->fence;
306            ID3D12Fence_AddRef(query_fence);
307         }
308         uint64_t query_fence_val = query->fence_value;
309         mtx_unlock(&qpool->queries_lock);
310
311         if (query_fence) {
312            if (ID3D12Fence_GetCompletedValue(query_fence) >= query_fence_val)
313               available = UINT64_MAX;
314            ID3D12Fence_Release(query_fence);
315         }
316      }
317
318      if (qpool->heap_type != D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS) {
319         if (available)
320            memcpy(dst_ptr, src_ptr, step);
321         else if (flags & VK_QUERY_RESULT_PARTIAL_BIT)
322            memset(dst_ptr, 0, step);
323
324         dst_ptr += step;
325      } else {
326         for (uint32_t c = 0; c < sizeof(D3D12_QUERY_DATA_PIPELINE_STATISTICS) / sizeof(uint64_t); c++) {
327            if (!(BITFIELD_BIT(c) & qpool->pipeline_statistics))
328               continue;
329
330            if (available)
331               memcpy(dst_ptr, src_ptr + (c * sizeof(uint64_t)), step);
332            else if (flags & VK_QUERY_RESULT_PARTIAL_BIT)
333               memset(dst_ptr, 0, step);
334
335            dst_ptr += step;
336         }
337      }
338
339      if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
340         memcpy(dst_ptr, &available, step);
341
342      if (!available && !(flags & VK_QUERY_RESULT_PARTIAL_BIT))
343         result = VK_NOT_READY;
344   }
345
346   return result;
347}
348