1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3bf215546Sopenharmony_ci * Copyright 2014 Marek Olšák <marek.olsak@amd.com>
4bf215546Sopenharmony_ci * Copyright 2018 Advanced Micro Devices, Inc.
5bf215546Sopenharmony_ci * All Rights Reserved.
6bf215546Sopenharmony_ci *
7bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
8bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
9bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
10bf215546Sopenharmony_ci * on the rights to use, copy, modify, merge, publish, distribute, sub
11bf215546Sopenharmony_ci * license, and/or sell copies of the Software, and to permit persons to whom
12bf215546Sopenharmony_ci * the Software is furnished to do so, subject to the following conditions:
13bf215546Sopenharmony_ci *
14bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
15bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
16bf215546Sopenharmony_ci * Software.
17bf215546Sopenharmony_ci *
18bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21bf215546Sopenharmony_ci * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22bf215546Sopenharmony_ci * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23bf215546Sopenharmony_ci * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24bf215546Sopenharmony_ci * USE OR OTHER DEALINGS IN THE SOFTWARE.
25bf215546Sopenharmony_ci */
26bf215546Sopenharmony_ci
27bf215546Sopenharmony_ci#include "si_query.h"
28bf215546Sopenharmony_ci#include "si_build_pm4.h"
29bf215546Sopenharmony_ci
30bf215546Sopenharmony_ci#include "amd/common/sid.h"
31bf215546Sopenharmony_ci#include "si_pipe.h"
32bf215546Sopenharmony_ci#include "util/os_time.h"
33bf215546Sopenharmony_ci#include "util/u_memory.h"
34bf215546Sopenharmony_ci#include "util/u_suballoc.h"
35bf215546Sopenharmony_ci#include "util/u_upload_mgr.h"
36bf215546Sopenharmony_ci
37bf215546Sopenharmony_cistatic const struct si_query_ops query_hw_ops;
38bf215546Sopenharmony_ci
39bf215546Sopenharmony_cistruct si_hw_query_params {
40bf215546Sopenharmony_ci   unsigned start_offset;
41bf215546Sopenharmony_ci   unsigned end_offset;
42bf215546Sopenharmony_ci   unsigned fence_offset;
43bf215546Sopenharmony_ci   unsigned pair_stride;
44bf215546Sopenharmony_ci   unsigned pair_count;
45bf215546Sopenharmony_ci};
46bf215546Sopenharmony_ci
47bf215546Sopenharmony_ci/* Queries without buffer handling or suspend/resume. */
48bf215546Sopenharmony_cistruct si_query_sw {
49bf215546Sopenharmony_ci   struct si_query b;
50bf215546Sopenharmony_ci
51bf215546Sopenharmony_ci   uint64_t begin_result;
52bf215546Sopenharmony_ci   uint64_t end_result;
53bf215546Sopenharmony_ci
54bf215546Sopenharmony_ci   uint64_t begin_time;
55bf215546Sopenharmony_ci   uint64_t end_time;
56bf215546Sopenharmony_ci
57bf215546Sopenharmony_ci   /* Fence for GPU_FINISHED. */
58bf215546Sopenharmony_ci   struct pipe_fence_handle *fence;
59bf215546Sopenharmony_ci};
60bf215546Sopenharmony_ci
61bf215546Sopenharmony_cistatic void si_query_sw_destroy(struct si_context *sctx, struct si_query *squery)
62bf215546Sopenharmony_ci{
63bf215546Sopenharmony_ci   struct si_query_sw *query = (struct si_query_sw *)squery;
64bf215546Sopenharmony_ci
65bf215546Sopenharmony_ci   sctx->b.screen->fence_reference(sctx->b.screen, &query->fence, NULL);
66bf215546Sopenharmony_ci   FREE(query);
67bf215546Sopenharmony_ci}
68bf215546Sopenharmony_ci
69bf215546Sopenharmony_cistatic enum radeon_value_id winsys_id_from_type(unsigned type)
70bf215546Sopenharmony_ci{
71bf215546Sopenharmony_ci   switch (type) {
72bf215546Sopenharmony_ci   case SI_QUERY_REQUESTED_VRAM:
73bf215546Sopenharmony_ci      return RADEON_REQUESTED_VRAM_MEMORY;
74bf215546Sopenharmony_ci   case SI_QUERY_REQUESTED_GTT:
75bf215546Sopenharmony_ci      return RADEON_REQUESTED_GTT_MEMORY;
76bf215546Sopenharmony_ci   case SI_QUERY_MAPPED_VRAM:
77bf215546Sopenharmony_ci      return RADEON_MAPPED_VRAM;
78bf215546Sopenharmony_ci   case SI_QUERY_MAPPED_GTT:
79bf215546Sopenharmony_ci      return RADEON_MAPPED_GTT;
80bf215546Sopenharmony_ci   case SI_QUERY_SLAB_WASTED_VRAM:
81bf215546Sopenharmony_ci      return RADEON_SLAB_WASTED_VRAM;
82bf215546Sopenharmony_ci   case SI_QUERY_SLAB_WASTED_GTT:
83bf215546Sopenharmony_ci      return RADEON_SLAB_WASTED_GTT;
84bf215546Sopenharmony_ci   case SI_QUERY_BUFFER_WAIT_TIME:
85bf215546Sopenharmony_ci      return RADEON_BUFFER_WAIT_TIME_NS;
86bf215546Sopenharmony_ci   case SI_QUERY_NUM_MAPPED_BUFFERS:
87bf215546Sopenharmony_ci      return RADEON_NUM_MAPPED_BUFFERS;
88bf215546Sopenharmony_ci   case SI_QUERY_NUM_GFX_IBS:
89bf215546Sopenharmony_ci      return RADEON_NUM_GFX_IBS;
90bf215546Sopenharmony_ci   case SI_QUERY_GFX_BO_LIST_SIZE:
91bf215546Sopenharmony_ci      return RADEON_GFX_BO_LIST_COUNTER;
92bf215546Sopenharmony_ci   case SI_QUERY_GFX_IB_SIZE:
93bf215546Sopenharmony_ci      return RADEON_GFX_IB_SIZE_COUNTER;
94bf215546Sopenharmony_ci   case SI_QUERY_NUM_BYTES_MOVED:
95bf215546Sopenharmony_ci      return RADEON_NUM_BYTES_MOVED;
96bf215546Sopenharmony_ci   case SI_QUERY_NUM_EVICTIONS:
97bf215546Sopenharmony_ci      return RADEON_NUM_EVICTIONS;
98bf215546Sopenharmony_ci   case SI_QUERY_NUM_VRAM_CPU_PAGE_FAULTS:
99bf215546Sopenharmony_ci      return RADEON_NUM_VRAM_CPU_PAGE_FAULTS;
100bf215546Sopenharmony_ci   case SI_QUERY_VRAM_USAGE:
101bf215546Sopenharmony_ci      return RADEON_VRAM_USAGE;
102bf215546Sopenharmony_ci   case SI_QUERY_VRAM_VIS_USAGE:
103bf215546Sopenharmony_ci      return RADEON_VRAM_VIS_USAGE;
104bf215546Sopenharmony_ci   case SI_QUERY_GTT_USAGE:
105bf215546Sopenharmony_ci      return RADEON_GTT_USAGE;
106bf215546Sopenharmony_ci   case SI_QUERY_GPU_TEMPERATURE:
107bf215546Sopenharmony_ci      return RADEON_GPU_TEMPERATURE;
108bf215546Sopenharmony_ci   case SI_QUERY_CURRENT_GPU_SCLK:
109bf215546Sopenharmony_ci      return RADEON_CURRENT_SCLK;
110bf215546Sopenharmony_ci   case SI_QUERY_CURRENT_GPU_MCLK:
111bf215546Sopenharmony_ci      return RADEON_CURRENT_MCLK;
112bf215546Sopenharmony_ci   case SI_QUERY_CS_THREAD_BUSY:
113bf215546Sopenharmony_ci      return RADEON_CS_THREAD_TIME;
114bf215546Sopenharmony_ci   default:
115bf215546Sopenharmony_ci      unreachable("query type does not correspond to winsys id");
116bf215546Sopenharmony_ci   }
117bf215546Sopenharmony_ci}
118bf215546Sopenharmony_ci
119bf215546Sopenharmony_cistatic bool si_query_sw_begin(struct si_context *sctx, struct si_query *squery)
120bf215546Sopenharmony_ci{
121bf215546Sopenharmony_ci   struct si_query_sw *query = (struct si_query_sw *)squery;
122bf215546Sopenharmony_ci   enum radeon_value_id ws_id;
123bf215546Sopenharmony_ci
124bf215546Sopenharmony_ci   switch (query->b.type) {
125bf215546Sopenharmony_ci   case PIPE_QUERY_TIMESTAMP_DISJOINT:
126bf215546Sopenharmony_ci   case PIPE_QUERY_GPU_FINISHED:
127bf215546Sopenharmony_ci      break;
128bf215546Sopenharmony_ci   case SI_QUERY_DRAW_CALLS:
129bf215546Sopenharmony_ci      query->begin_result = sctx->num_draw_calls;
130bf215546Sopenharmony_ci      break;
131bf215546Sopenharmony_ci   case SI_QUERY_DECOMPRESS_CALLS:
132bf215546Sopenharmony_ci      query->begin_result = sctx->num_decompress_calls;
133bf215546Sopenharmony_ci      break;
134bf215546Sopenharmony_ci   case SI_QUERY_PRIM_RESTART_CALLS:
135bf215546Sopenharmony_ci      query->begin_result = sctx->num_prim_restart_calls;
136bf215546Sopenharmony_ci      break;
137bf215546Sopenharmony_ci   case SI_QUERY_COMPUTE_CALLS:
138bf215546Sopenharmony_ci      query->begin_result = sctx->num_compute_calls;
139bf215546Sopenharmony_ci      break;
140bf215546Sopenharmony_ci   case SI_QUERY_CP_DMA_CALLS:
141bf215546Sopenharmony_ci      query->begin_result = sctx->num_cp_dma_calls;
142bf215546Sopenharmony_ci      break;
143bf215546Sopenharmony_ci   case SI_QUERY_NUM_VS_FLUSHES:
144bf215546Sopenharmony_ci      query->begin_result = sctx->num_vs_flushes;
145bf215546Sopenharmony_ci      break;
146bf215546Sopenharmony_ci   case SI_QUERY_NUM_PS_FLUSHES:
147bf215546Sopenharmony_ci      query->begin_result = sctx->num_ps_flushes;
148bf215546Sopenharmony_ci      break;
149bf215546Sopenharmony_ci   case SI_QUERY_NUM_CS_FLUSHES:
150bf215546Sopenharmony_ci      query->begin_result = sctx->num_cs_flushes;
151bf215546Sopenharmony_ci      break;
152bf215546Sopenharmony_ci   case SI_QUERY_NUM_CB_CACHE_FLUSHES:
153bf215546Sopenharmony_ci      query->begin_result = sctx->num_cb_cache_flushes;
154bf215546Sopenharmony_ci      break;
155bf215546Sopenharmony_ci   case SI_QUERY_NUM_DB_CACHE_FLUSHES:
156bf215546Sopenharmony_ci      query->begin_result = sctx->num_db_cache_flushes;
157bf215546Sopenharmony_ci      break;
158bf215546Sopenharmony_ci   case SI_QUERY_NUM_L2_INVALIDATES:
159bf215546Sopenharmony_ci      query->begin_result = sctx->num_L2_invalidates;
160bf215546Sopenharmony_ci      break;
161bf215546Sopenharmony_ci   case SI_QUERY_NUM_L2_WRITEBACKS:
162bf215546Sopenharmony_ci      query->begin_result = sctx->num_L2_writebacks;
163bf215546Sopenharmony_ci      break;
164bf215546Sopenharmony_ci   case SI_QUERY_NUM_RESIDENT_HANDLES:
165bf215546Sopenharmony_ci      query->begin_result = sctx->num_resident_handles;
166bf215546Sopenharmony_ci      break;
167bf215546Sopenharmony_ci   case SI_QUERY_TC_OFFLOADED_SLOTS:
168bf215546Sopenharmony_ci      query->begin_result = sctx->tc ? sctx->tc->num_offloaded_slots : 0;
169bf215546Sopenharmony_ci      break;
170bf215546Sopenharmony_ci   case SI_QUERY_TC_DIRECT_SLOTS:
171bf215546Sopenharmony_ci      query->begin_result = sctx->tc ? sctx->tc->num_direct_slots : 0;
172bf215546Sopenharmony_ci      break;
173bf215546Sopenharmony_ci   case SI_QUERY_TC_NUM_SYNCS:
174bf215546Sopenharmony_ci      query->begin_result = sctx->tc ? sctx->tc->num_syncs : 0;
175bf215546Sopenharmony_ci      break;
176bf215546Sopenharmony_ci   case SI_QUERY_REQUESTED_VRAM:
177bf215546Sopenharmony_ci   case SI_QUERY_REQUESTED_GTT:
178bf215546Sopenharmony_ci   case SI_QUERY_MAPPED_VRAM:
179bf215546Sopenharmony_ci   case SI_QUERY_MAPPED_GTT:
180bf215546Sopenharmony_ci   case SI_QUERY_SLAB_WASTED_VRAM:
181bf215546Sopenharmony_ci   case SI_QUERY_SLAB_WASTED_GTT:
182bf215546Sopenharmony_ci   case SI_QUERY_VRAM_USAGE:
183bf215546Sopenharmony_ci   case SI_QUERY_VRAM_VIS_USAGE:
184bf215546Sopenharmony_ci   case SI_QUERY_GTT_USAGE:
185bf215546Sopenharmony_ci   case SI_QUERY_GPU_TEMPERATURE:
186bf215546Sopenharmony_ci   case SI_QUERY_CURRENT_GPU_SCLK:
187bf215546Sopenharmony_ci   case SI_QUERY_CURRENT_GPU_MCLK:
188bf215546Sopenharmony_ci   case SI_QUERY_BACK_BUFFER_PS_DRAW_RATIO:
189bf215546Sopenharmony_ci   case SI_QUERY_NUM_MAPPED_BUFFERS:
190bf215546Sopenharmony_ci      query->begin_result = 0;
191bf215546Sopenharmony_ci      break;
192bf215546Sopenharmony_ci   case SI_QUERY_BUFFER_WAIT_TIME:
193bf215546Sopenharmony_ci   case SI_QUERY_GFX_IB_SIZE:
194bf215546Sopenharmony_ci   case SI_QUERY_NUM_GFX_IBS:
195bf215546Sopenharmony_ci   case SI_QUERY_NUM_BYTES_MOVED:
196bf215546Sopenharmony_ci   case SI_QUERY_NUM_EVICTIONS:
197bf215546Sopenharmony_ci   case SI_QUERY_NUM_VRAM_CPU_PAGE_FAULTS: {
198bf215546Sopenharmony_ci      enum radeon_value_id ws_id = winsys_id_from_type(query->b.type);
199bf215546Sopenharmony_ci      query->begin_result = sctx->ws->query_value(sctx->ws, ws_id);
200bf215546Sopenharmony_ci      break;
201bf215546Sopenharmony_ci   }
202bf215546Sopenharmony_ci   case SI_QUERY_GFX_BO_LIST_SIZE:
203bf215546Sopenharmony_ci      ws_id = winsys_id_from_type(query->b.type);
204bf215546Sopenharmony_ci      query->begin_result = sctx->ws->query_value(sctx->ws, ws_id);
205bf215546Sopenharmony_ci      query->begin_time = sctx->ws->query_value(sctx->ws, RADEON_NUM_GFX_IBS);
206bf215546Sopenharmony_ci      break;
207bf215546Sopenharmony_ci   case SI_QUERY_CS_THREAD_BUSY:
208bf215546Sopenharmony_ci      ws_id = winsys_id_from_type(query->b.type);
209bf215546Sopenharmony_ci      query->begin_result = sctx->ws->query_value(sctx->ws, ws_id);
210bf215546Sopenharmony_ci      query->begin_time = os_time_get_nano();
211bf215546Sopenharmony_ci      break;
212bf215546Sopenharmony_ci   case SI_QUERY_GALLIUM_THREAD_BUSY:
213bf215546Sopenharmony_ci      query->begin_result = sctx->tc ? util_queue_get_thread_time_nano(&sctx->tc->queue, 0) : 0;
214bf215546Sopenharmony_ci      query->begin_time = os_time_get_nano();
215bf215546Sopenharmony_ci      break;
216bf215546Sopenharmony_ci   case SI_QUERY_GPU_LOAD:
217bf215546Sopenharmony_ci   case SI_QUERY_GPU_SHADERS_BUSY:
218bf215546Sopenharmony_ci   case SI_QUERY_GPU_TA_BUSY:
219bf215546Sopenharmony_ci   case SI_QUERY_GPU_GDS_BUSY:
220bf215546Sopenharmony_ci   case SI_QUERY_GPU_VGT_BUSY:
221bf215546Sopenharmony_ci   case SI_QUERY_GPU_IA_BUSY:
222bf215546Sopenharmony_ci   case SI_QUERY_GPU_SX_BUSY:
223bf215546Sopenharmony_ci   case SI_QUERY_GPU_WD_BUSY:
224bf215546Sopenharmony_ci   case SI_QUERY_GPU_BCI_BUSY:
225bf215546Sopenharmony_ci   case SI_QUERY_GPU_SC_BUSY:
226bf215546Sopenharmony_ci   case SI_QUERY_GPU_PA_BUSY:
227bf215546Sopenharmony_ci   case SI_QUERY_GPU_DB_BUSY:
228bf215546Sopenharmony_ci   case SI_QUERY_GPU_CP_BUSY:
229bf215546Sopenharmony_ci   case SI_QUERY_GPU_CB_BUSY:
230bf215546Sopenharmony_ci   case SI_QUERY_GPU_SDMA_BUSY:
231bf215546Sopenharmony_ci   case SI_QUERY_GPU_PFP_BUSY:
232bf215546Sopenharmony_ci   case SI_QUERY_GPU_MEQ_BUSY:
233bf215546Sopenharmony_ci   case SI_QUERY_GPU_ME_BUSY:
234bf215546Sopenharmony_ci   case SI_QUERY_GPU_SURF_SYNC_BUSY:
235bf215546Sopenharmony_ci   case SI_QUERY_GPU_CP_DMA_BUSY:
236bf215546Sopenharmony_ci   case SI_QUERY_GPU_SCRATCH_RAM_BUSY:
237bf215546Sopenharmony_ci      query->begin_result = si_begin_counter(sctx->screen, query->b.type);
238bf215546Sopenharmony_ci      break;
239bf215546Sopenharmony_ci   case SI_QUERY_NUM_COMPILATIONS:
240bf215546Sopenharmony_ci      query->begin_result = p_atomic_read(&sctx->screen->num_compilations);
241bf215546Sopenharmony_ci      break;
242bf215546Sopenharmony_ci   case SI_QUERY_NUM_SHADERS_CREATED:
243bf215546Sopenharmony_ci      query->begin_result = p_atomic_read(&sctx->screen->num_shaders_created);
244bf215546Sopenharmony_ci      break;
245bf215546Sopenharmony_ci   case SI_QUERY_LIVE_SHADER_CACHE_HITS:
246bf215546Sopenharmony_ci      query->begin_result = sctx->screen->live_shader_cache.hits;
247bf215546Sopenharmony_ci      break;
248bf215546Sopenharmony_ci   case SI_QUERY_LIVE_SHADER_CACHE_MISSES:
249bf215546Sopenharmony_ci      query->begin_result = sctx->screen->live_shader_cache.misses;
250bf215546Sopenharmony_ci      break;
251bf215546Sopenharmony_ci   case SI_QUERY_MEMORY_SHADER_CACHE_HITS:
252bf215546Sopenharmony_ci      query->begin_result = sctx->screen->num_memory_shader_cache_hits;
253bf215546Sopenharmony_ci      break;
254bf215546Sopenharmony_ci   case SI_QUERY_MEMORY_SHADER_CACHE_MISSES:
255bf215546Sopenharmony_ci      query->begin_result = sctx->screen->num_memory_shader_cache_misses;
256bf215546Sopenharmony_ci      break;
257bf215546Sopenharmony_ci   case SI_QUERY_DISK_SHADER_CACHE_HITS:
258bf215546Sopenharmony_ci      query->begin_result = sctx->screen->num_disk_shader_cache_hits;
259bf215546Sopenharmony_ci      break;
260bf215546Sopenharmony_ci   case SI_QUERY_DISK_SHADER_CACHE_MISSES:
261bf215546Sopenharmony_ci      query->begin_result = sctx->screen->num_disk_shader_cache_misses;
262bf215546Sopenharmony_ci      break;
263bf215546Sopenharmony_ci   case SI_QUERY_GPIN_ASIC_ID:
264bf215546Sopenharmony_ci   case SI_QUERY_GPIN_NUM_SIMD:
265bf215546Sopenharmony_ci   case SI_QUERY_GPIN_NUM_RB:
266bf215546Sopenharmony_ci   case SI_QUERY_GPIN_NUM_SPI:
267bf215546Sopenharmony_ci   case SI_QUERY_GPIN_NUM_SE:
268bf215546Sopenharmony_ci      break;
269bf215546Sopenharmony_ci   default:
270bf215546Sopenharmony_ci      unreachable("si_query_sw_begin: bad query type");
271bf215546Sopenharmony_ci   }
272bf215546Sopenharmony_ci
273bf215546Sopenharmony_ci   return true;
274bf215546Sopenharmony_ci}
275bf215546Sopenharmony_ci
276bf215546Sopenharmony_cistatic bool si_query_sw_end(struct si_context *sctx, struct si_query *squery)
277bf215546Sopenharmony_ci{
278bf215546Sopenharmony_ci   struct si_query_sw *query = (struct si_query_sw *)squery;
279bf215546Sopenharmony_ci   enum radeon_value_id ws_id;
280bf215546Sopenharmony_ci
281bf215546Sopenharmony_ci   switch (query->b.type) {
282bf215546Sopenharmony_ci   case PIPE_QUERY_TIMESTAMP_DISJOINT:
283bf215546Sopenharmony_ci      break;
284bf215546Sopenharmony_ci   case PIPE_QUERY_GPU_FINISHED:
285bf215546Sopenharmony_ci      sctx->b.flush(&sctx->b, &query->fence, PIPE_FLUSH_DEFERRED);
286bf215546Sopenharmony_ci      break;
287bf215546Sopenharmony_ci   case SI_QUERY_DRAW_CALLS:
288bf215546Sopenharmony_ci      query->end_result = sctx->num_draw_calls;
289bf215546Sopenharmony_ci      break;
290bf215546Sopenharmony_ci   case SI_QUERY_DECOMPRESS_CALLS:
291bf215546Sopenharmony_ci      query->end_result = sctx->num_decompress_calls;
292bf215546Sopenharmony_ci      break;
293bf215546Sopenharmony_ci   case SI_QUERY_PRIM_RESTART_CALLS:
294bf215546Sopenharmony_ci      query->end_result = sctx->num_prim_restart_calls;
295bf215546Sopenharmony_ci      break;
296bf215546Sopenharmony_ci   case SI_QUERY_COMPUTE_CALLS:
297bf215546Sopenharmony_ci      query->end_result = sctx->num_compute_calls;
298bf215546Sopenharmony_ci      break;
299bf215546Sopenharmony_ci   case SI_QUERY_CP_DMA_CALLS:
300bf215546Sopenharmony_ci      query->end_result = sctx->num_cp_dma_calls;
301bf215546Sopenharmony_ci      break;
302bf215546Sopenharmony_ci   case SI_QUERY_NUM_VS_FLUSHES:
303bf215546Sopenharmony_ci      query->end_result = sctx->num_vs_flushes;
304bf215546Sopenharmony_ci      break;
305bf215546Sopenharmony_ci   case SI_QUERY_NUM_PS_FLUSHES:
306bf215546Sopenharmony_ci      query->end_result = sctx->num_ps_flushes;
307bf215546Sopenharmony_ci      break;
308bf215546Sopenharmony_ci   case SI_QUERY_NUM_CS_FLUSHES:
309bf215546Sopenharmony_ci      query->end_result = sctx->num_cs_flushes;
310bf215546Sopenharmony_ci      break;
311bf215546Sopenharmony_ci   case SI_QUERY_NUM_CB_CACHE_FLUSHES:
312bf215546Sopenharmony_ci      query->end_result = sctx->num_cb_cache_flushes;
313bf215546Sopenharmony_ci      break;
314bf215546Sopenharmony_ci   case SI_QUERY_NUM_DB_CACHE_FLUSHES:
315bf215546Sopenharmony_ci      query->end_result = sctx->num_db_cache_flushes;
316bf215546Sopenharmony_ci      break;
317bf215546Sopenharmony_ci   case SI_QUERY_NUM_L2_INVALIDATES:
318bf215546Sopenharmony_ci      query->end_result = sctx->num_L2_invalidates;
319bf215546Sopenharmony_ci      break;
320bf215546Sopenharmony_ci   case SI_QUERY_NUM_L2_WRITEBACKS:
321bf215546Sopenharmony_ci      query->end_result = sctx->num_L2_writebacks;
322bf215546Sopenharmony_ci      break;
323bf215546Sopenharmony_ci   case SI_QUERY_NUM_RESIDENT_HANDLES:
324bf215546Sopenharmony_ci      query->end_result = sctx->num_resident_handles;
325bf215546Sopenharmony_ci      break;
326bf215546Sopenharmony_ci   case SI_QUERY_TC_OFFLOADED_SLOTS:
327bf215546Sopenharmony_ci      query->end_result = sctx->tc ? sctx->tc->num_offloaded_slots : 0;
328bf215546Sopenharmony_ci      break;
329bf215546Sopenharmony_ci   case SI_QUERY_TC_DIRECT_SLOTS:
330bf215546Sopenharmony_ci      query->end_result = sctx->tc ? sctx->tc->num_direct_slots : 0;
331bf215546Sopenharmony_ci      break;
332bf215546Sopenharmony_ci   case SI_QUERY_TC_NUM_SYNCS:
333bf215546Sopenharmony_ci      query->end_result = sctx->tc ? sctx->tc->num_syncs : 0;
334bf215546Sopenharmony_ci      break;
335bf215546Sopenharmony_ci   case SI_QUERY_REQUESTED_VRAM:
336bf215546Sopenharmony_ci   case SI_QUERY_REQUESTED_GTT:
337bf215546Sopenharmony_ci   case SI_QUERY_MAPPED_VRAM:
338bf215546Sopenharmony_ci   case SI_QUERY_MAPPED_GTT:
339bf215546Sopenharmony_ci   case SI_QUERY_SLAB_WASTED_VRAM:
340bf215546Sopenharmony_ci   case SI_QUERY_SLAB_WASTED_GTT:
341bf215546Sopenharmony_ci   case SI_QUERY_VRAM_USAGE:
342bf215546Sopenharmony_ci   case SI_QUERY_VRAM_VIS_USAGE:
343bf215546Sopenharmony_ci   case SI_QUERY_GTT_USAGE:
344bf215546Sopenharmony_ci   case SI_QUERY_GPU_TEMPERATURE:
345bf215546Sopenharmony_ci   case SI_QUERY_CURRENT_GPU_SCLK:
346bf215546Sopenharmony_ci   case SI_QUERY_CURRENT_GPU_MCLK:
347bf215546Sopenharmony_ci   case SI_QUERY_BUFFER_WAIT_TIME:
348bf215546Sopenharmony_ci   case SI_QUERY_GFX_IB_SIZE:
349bf215546Sopenharmony_ci   case SI_QUERY_NUM_MAPPED_BUFFERS:
350bf215546Sopenharmony_ci   case SI_QUERY_NUM_GFX_IBS:
351bf215546Sopenharmony_ci   case SI_QUERY_NUM_BYTES_MOVED:
352bf215546Sopenharmony_ci   case SI_QUERY_NUM_EVICTIONS:
353bf215546Sopenharmony_ci   case SI_QUERY_NUM_VRAM_CPU_PAGE_FAULTS: {
354bf215546Sopenharmony_ci      enum radeon_value_id ws_id = winsys_id_from_type(query->b.type);
355bf215546Sopenharmony_ci      query->end_result = sctx->ws->query_value(sctx->ws, ws_id);
356bf215546Sopenharmony_ci      break;
357bf215546Sopenharmony_ci   }
358bf215546Sopenharmony_ci   case SI_QUERY_GFX_BO_LIST_SIZE:
359bf215546Sopenharmony_ci      ws_id = winsys_id_from_type(query->b.type);
360bf215546Sopenharmony_ci      query->end_result = sctx->ws->query_value(sctx->ws, ws_id);
361bf215546Sopenharmony_ci      query->end_time = sctx->ws->query_value(sctx->ws, RADEON_NUM_GFX_IBS);
362bf215546Sopenharmony_ci      break;
363bf215546Sopenharmony_ci   case SI_QUERY_CS_THREAD_BUSY:
364bf215546Sopenharmony_ci      ws_id = winsys_id_from_type(query->b.type);
365bf215546Sopenharmony_ci      query->end_result = sctx->ws->query_value(sctx->ws, ws_id);
366bf215546Sopenharmony_ci      query->end_time = os_time_get_nano();
367bf215546Sopenharmony_ci      break;
368bf215546Sopenharmony_ci   case SI_QUERY_GALLIUM_THREAD_BUSY:
369bf215546Sopenharmony_ci      query->end_result = sctx->tc ? util_queue_get_thread_time_nano(&sctx->tc->queue, 0) : 0;
370bf215546Sopenharmony_ci      query->end_time = os_time_get_nano();
371bf215546Sopenharmony_ci      break;
372bf215546Sopenharmony_ci   case SI_QUERY_GPU_LOAD:
373bf215546Sopenharmony_ci   case SI_QUERY_GPU_SHADERS_BUSY:
374bf215546Sopenharmony_ci   case SI_QUERY_GPU_TA_BUSY:
375bf215546Sopenharmony_ci   case SI_QUERY_GPU_GDS_BUSY:
376bf215546Sopenharmony_ci   case SI_QUERY_GPU_VGT_BUSY:
377bf215546Sopenharmony_ci   case SI_QUERY_GPU_IA_BUSY:
378bf215546Sopenharmony_ci   case SI_QUERY_GPU_SX_BUSY:
379bf215546Sopenharmony_ci   case SI_QUERY_GPU_WD_BUSY:
380bf215546Sopenharmony_ci   case SI_QUERY_GPU_BCI_BUSY:
381bf215546Sopenharmony_ci   case SI_QUERY_GPU_SC_BUSY:
382bf215546Sopenharmony_ci   case SI_QUERY_GPU_PA_BUSY:
383bf215546Sopenharmony_ci   case SI_QUERY_GPU_DB_BUSY:
384bf215546Sopenharmony_ci   case SI_QUERY_GPU_CP_BUSY:
385bf215546Sopenharmony_ci   case SI_QUERY_GPU_CB_BUSY:
386bf215546Sopenharmony_ci   case SI_QUERY_GPU_SDMA_BUSY:
387bf215546Sopenharmony_ci   case SI_QUERY_GPU_PFP_BUSY:
388bf215546Sopenharmony_ci   case SI_QUERY_GPU_MEQ_BUSY:
389bf215546Sopenharmony_ci   case SI_QUERY_GPU_ME_BUSY:
390bf215546Sopenharmony_ci   case SI_QUERY_GPU_SURF_SYNC_BUSY:
391bf215546Sopenharmony_ci   case SI_QUERY_GPU_CP_DMA_BUSY:
392bf215546Sopenharmony_ci   case SI_QUERY_GPU_SCRATCH_RAM_BUSY:
393bf215546Sopenharmony_ci      query->end_result = si_end_counter(sctx->screen, query->b.type, query->begin_result);
394bf215546Sopenharmony_ci      query->begin_result = 0;
395bf215546Sopenharmony_ci      break;
396bf215546Sopenharmony_ci   case SI_QUERY_NUM_COMPILATIONS:
397bf215546Sopenharmony_ci      query->end_result = p_atomic_read(&sctx->screen->num_compilations);
398bf215546Sopenharmony_ci      break;
399bf215546Sopenharmony_ci   case SI_QUERY_NUM_SHADERS_CREATED:
400bf215546Sopenharmony_ci      query->end_result = p_atomic_read(&sctx->screen->num_shaders_created);
401bf215546Sopenharmony_ci      break;
402bf215546Sopenharmony_ci   case SI_QUERY_BACK_BUFFER_PS_DRAW_RATIO:
403bf215546Sopenharmony_ci      query->end_result = sctx->last_tex_ps_draw_ratio;
404bf215546Sopenharmony_ci      break;
405bf215546Sopenharmony_ci   case SI_QUERY_LIVE_SHADER_CACHE_HITS:
406bf215546Sopenharmony_ci      query->end_result = sctx->screen->live_shader_cache.hits;
407bf215546Sopenharmony_ci      break;
408bf215546Sopenharmony_ci   case SI_QUERY_LIVE_SHADER_CACHE_MISSES:
409bf215546Sopenharmony_ci      query->end_result = sctx->screen->live_shader_cache.misses;
410bf215546Sopenharmony_ci      break;
411bf215546Sopenharmony_ci   case SI_QUERY_MEMORY_SHADER_CACHE_HITS:
412bf215546Sopenharmony_ci      query->end_result = sctx->screen->num_memory_shader_cache_hits;
413bf215546Sopenharmony_ci      break;
414bf215546Sopenharmony_ci   case SI_QUERY_MEMORY_SHADER_CACHE_MISSES:
415bf215546Sopenharmony_ci      query->end_result = sctx->screen->num_memory_shader_cache_misses;
416bf215546Sopenharmony_ci      break;
417bf215546Sopenharmony_ci   case SI_QUERY_DISK_SHADER_CACHE_HITS:
418bf215546Sopenharmony_ci      query->end_result = sctx->screen->num_disk_shader_cache_hits;
419bf215546Sopenharmony_ci      break;
420bf215546Sopenharmony_ci   case SI_QUERY_DISK_SHADER_CACHE_MISSES:
421bf215546Sopenharmony_ci      query->end_result = sctx->screen->num_disk_shader_cache_misses;
422bf215546Sopenharmony_ci      break;
423bf215546Sopenharmony_ci   case SI_QUERY_GPIN_ASIC_ID:
424bf215546Sopenharmony_ci   case SI_QUERY_GPIN_NUM_SIMD:
425bf215546Sopenharmony_ci   case SI_QUERY_GPIN_NUM_RB:
426bf215546Sopenharmony_ci   case SI_QUERY_GPIN_NUM_SPI:
427bf215546Sopenharmony_ci   case SI_QUERY_GPIN_NUM_SE:
428bf215546Sopenharmony_ci      break;
429bf215546Sopenharmony_ci   default:
430bf215546Sopenharmony_ci      unreachable("si_query_sw_end: bad query type");
431bf215546Sopenharmony_ci   }
432bf215546Sopenharmony_ci
433bf215546Sopenharmony_ci   return true;
434bf215546Sopenharmony_ci}
435bf215546Sopenharmony_ci
436bf215546Sopenharmony_cistatic bool si_query_sw_get_result(struct si_context *sctx, struct si_query *squery, bool wait,
437bf215546Sopenharmony_ci                                   union pipe_query_result *result)
438bf215546Sopenharmony_ci{
439bf215546Sopenharmony_ci   struct si_query_sw *query = (struct si_query_sw *)squery;
440bf215546Sopenharmony_ci
441bf215546Sopenharmony_ci   switch (query->b.type) {
442bf215546Sopenharmony_ci   case PIPE_QUERY_TIMESTAMP_DISJOINT:
443bf215546Sopenharmony_ci      /* Convert from cycles per millisecond to cycles per second (Hz). */
444bf215546Sopenharmony_ci      result->timestamp_disjoint.frequency = (uint64_t)sctx->screen->info.clock_crystal_freq * 1000;
445bf215546Sopenharmony_ci      result->timestamp_disjoint.disjoint = false;
446bf215546Sopenharmony_ci      return true;
447bf215546Sopenharmony_ci   case PIPE_QUERY_GPU_FINISHED: {
448bf215546Sopenharmony_ci      struct pipe_screen *screen = sctx->b.screen;
449bf215546Sopenharmony_ci      struct pipe_context *ctx = squery->b.flushed ? NULL : &sctx->b;
450bf215546Sopenharmony_ci
451bf215546Sopenharmony_ci      result->b = screen->fence_finish(screen, ctx, query->fence, wait ? PIPE_TIMEOUT_INFINITE : 0);
452bf215546Sopenharmony_ci      return result->b;
453bf215546Sopenharmony_ci   }
454bf215546Sopenharmony_ci
455bf215546Sopenharmony_ci   case SI_QUERY_GFX_BO_LIST_SIZE:
456bf215546Sopenharmony_ci      result->u64 =
457bf215546Sopenharmony_ci         (query->end_result - query->begin_result) / (query->end_time - query->begin_time);
458bf215546Sopenharmony_ci      return true;
459bf215546Sopenharmony_ci   case SI_QUERY_CS_THREAD_BUSY:
460bf215546Sopenharmony_ci   case SI_QUERY_GALLIUM_THREAD_BUSY:
461bf215546Sopenharmony_ci      result->u64 =
462bf215546Sopenharmony_ci         (query->end_result - query->begin_result) * 100 / (query->end_time - query->begin_time);
463bf215546Sopenharmony_ci      return true;
464bf215546Sopenharmony_ci   case SI_QUERY_GPIN_ASIC_ID:
465bf215546Sopenharmony_ci      result->u32 = 0;
466bf215546Sopenharmony_ci      return true;
467bf215546Sopenharmony_ci   case SI_QUERY_GPIN_NUM_SIMD:
468bf215546Sopenharmony_ci      result->u32 = sctx->screen->info.num_cu;
469bf215546Sopenharmony_ci      return true;
470bf215546Sopenharmony_ci   case SI_QUERY_GPIN_NUM_RB:
471bf215546Sopenharmony_ci      result->u32 = sctx->screen->info.max_render_backends;
472bf215546Sopenharmony_ci      return true;
473bf215546Sopenharmony_ci   case SI_QUERY_GPIN_NUM_SPI:
474bf215546Sopenharmony_ci      result->u32 = 1; /* all supported chips have one SPI per SE */
475bf215546Sopenharmony_ci      return true;
476bf215546Sopenharmony_ci   case SI_QUERY_GPIN_NUM_SE:
477bf215546Sopenharmony_ci      result->u32 = sctx->screen->info.max_se;
478bf215546Sopenharmony_ci      return true;
479bf215546Sopenharmony_ci   }
480bf215546Sopenharmony_ci
481bf215546Sopenharmony_ci   result->u64 = query->end_result - query->begin_result;
482bf215546Sopenharmony_ci
483bf215546Sopenharmony_ci   switch (query->b.type) {
484bf215546Sopenharmony_ci   case SI_QUERY_BUFFER_WAIT_TIME:
485bf215546Sopenharmony_ci   case SI_QUERY_GPU_TEMPERATURE:
486bf215546Sopenharmony_ci      result->u64 /= 1000;
487bf215546Sopenharmony_ci      break;
488bf215546Sopenharmony_ci   case SI_QUERY_CURRENT_GPU_SCLK:
489bf215546Sopenharmony_ci   case SI_QUERY_CURRENT_GPU_MCLK:
490bf215546Sopenharmony_ci      result->u64 *= 1000000;
491bf215546Sopenharmony_ci      break;
492bf215546Sopenharmony_ci   }
493bf215546Sopenharmony_ci
494bf215546Sopenharmony_ci   return true;
495bf215546Sopenharmony_ci}
496bf215546Sopenharmony_ci
497bf215546Sopenharmony_cistatic const struct si_query_ops sw_query_ops = {.destroy = si_query_sw_destroy,
498bf215546Sopenharmony_ci                                                 .begin = si_query_sw_begin,
499bf215546Sopenharmony_ci                                                 .end = si_query_sw_end,
500bf215546Sopenharmony_ci                                                 .get_result = si_query_sw_get_result,
501bf215546Sopenharmony_ci                                                 .get_result_resource = NULL};
502bf215546Sopenharmony_ci
503bf215546Sopenharmony_cistatic struct pipe_query *si_query_sw_create(unsigned query_type)
504bf215546Sopenharmony_ci{
505bf215546Sopenharmony_ci   struct si_query_sw *query;
506bf215546Sopenharmony_ci
507bf215546Sopenharmony_ci   query = CALLOC_STRUCT(si_query_sw);
508bf215546Sopenharmony_ci   if (!query)
509bf215546Sopenharmony_ci      return NULL;
510bf215546Sopenharmony_ci
511bf215546Sopenharmony_ci   query->b.type = query_type;
512bf215546Sopenharmony_ci   query->b.ops = &sw_query_ops;
513bf215546Sopenharmony_ci
514bf215546Sopenharmony_ci   return (struct pipe_query *)query;
515bf215546Sopenharmony_ci}
516bf215546Sopenharmony_ci
517bf215546Sopenharmony_civoid si_query_buffer_destroy(struct si_screen *sscreen, struct si_query_buffer *buffer)
518bf215546Sopenharmony_ci{
519bf215546Sopenharmony_ci   struct si_query_buffer *prev = buffer->previous;
520bf215546Sopenharmony_ci
521bf215546Sopenharmony_ci   /* Release all query buffers. */
522bf215546Sopenharmony_ci   while (prev) {
523bf215546Sopenharmony_ci      struct si_query_buffer *qbuf = prev;
524bf215546Sopenharmony_ci      prev = prev->previous;
525bf215546Sopenharmony_ci      si_resource_reference(&qbuf->buf, NULL);
526bf215546Sopenharmony_ci      FREE(qbuf);
527bf215546Sopenharmony_ci   }
528bf215546Sopenharmony_ci
529bf215546Sopenharmony_ci   si_resource_reference(&buffer->buf, NULL);
530bf215546Sopenharmony_ci}
531bf215546Sopenharmony_ci
532bf215546Sopenharmony_civoid si_query_buffer_reset(struct si_context *sctx, struct si_query_buffer *buffer)
533bf215546Sopenharmony_ci{
534bf215546Sopenharmony_ci   /* Discard all query buffers except for the oldest. */
535bf215546Sopenharmony_ci   while (buffer->previous) {
536bf215546Sopenharmony_ci      struct si_query_buffer *qbuf = buffer->previous;
537bf215546Sopenharmony_ci      buffer->previous = qbuf->previous;
538bf215546Sopenharmony_ci
539bf215546Sopenharmony_ci      si_resource_reference(&buffer->buf, NULL);
540bf215546Sopenharmony_ci      buffer->buf = qbuf->buf; /* move ownership */
541bf215546Sopenharmony_ci      FREE(qbuf);
542bf215546Sopenharmony_ci   }
543bf215546Sopenharmony_ci   buffer->results_end = 0;
544bf215546Sopenharmony_ci
545bf215546Sopenharmony_ci   if (!buffer->buf)
546bf215546Sopenharmony_ci      return;
547bf215546Sopenharmony_ci
548bf215546Sopenharmony_ci   /* Discard even the oldest buffer if it can't be mapped without a stall. */
549bf215546Sopenharmony_ci   if (si_cs_is_buffer_referenced(sctx, buffer->buf->buf, RADEON_USAGE_READWRITE) ||
550bf215546Sopenharmony_ci       !sctx->ws->buffer_wait(sctx->ws, buffer->buf->buf, 0, RADEON_USAGE_READWRITE)) {
551bf215546Sopenharmony_ci      si_resource_reference(&buffer->buf, NULL);
552bf215546Sopenharmony_ci   } else {
553bf215546Sopenharmony_ci      buffer->unprepared = true;
554bf215546Sopenharmony_ci   }
555bf215546Sopenharmony_ci}
556bf215546Sopenharmony_ci
557bf215546Sopenharmony_cibool si_query_buffer_alloc(struct si_context *sctx, struct si_query_buffer *buffer,
558bf215546Sopenharmony_ci                           bool (*prepare_buffer)(struct si_context *, struct si_query_buffer *),
559bf215546Sopenharmony_ci                           unsigned size)
560bf215546Sopenharmony_ci{
561bf215546Sopenharmony_ci   bool unprepared = buffer->unprepared;
562bf215546Sopenharmony_ci   buffer->unprepared = false;
563bf215546Sopenharmony_ci
564bf215546Sopenharmony_ci   if (!buffer->buf || buffer->results_end + size > buffer->buf->b.b.width0) {
565bf215546Sopenharmony_ci      if (buffer->buf) {
566bf215546Sopenharmony_ci         struct si_query_buffer *qbuf = MALLOC_STRUCT(si_query_buffer);
567bf215546Sopenharmony_ci         memcpy(qbuf, buffer, sizeof(*qbuf));
568bf215546Sopenharmony_ci         buffer->previous = qbuf;
569bf215546Sopenharmony_ci      }
570bf215546Sopenharmony_ci      buffer->results_end = 0;
571bf215546Sopenharmony_ci
572bf215546Sopenharmony_ci      /* Queries are normally read by the CPU after
573bf215546Sopenharmony_ci       * being written by the gpu, hence staging is probably a good
574bf215546Sopenharmony_ci       * usage pattern.
575bf215546Sopenharmony_ci       */
576bf215546Sopenharmony_ci      struct si_screen *screen = sctx->screen;
577bf215546Sopenharmony_ci      unsigned buf_size = MAX2(size, screen->info.min_alloc_size);
578bf215546Sopenharmony_ci      buffer->buf = si_resource(pipe_buffer_create(&screen->b, 0, PIPE_USAGE_STAGING, buf_size));
579bf215546Sopenharmony_ci      if (unlikely(!buffer->buf))
580bf215546Sopenharmony_ci         return false;
581bf215546Sopenharmony_ci      unprepared = true;
582bf215546Sopenharmony_ci   }
583bf215546Sopenharmony_ci
584bf215546Sopenharmony_ci   if (unprepared && prepare_buffer) {
585bf215546Sopenharmony_ci      if (unlikely(!prepare_buffer(sctx, buffer))) {
586bf215546Sopenharmony_ci         si_resource_reference(&buffer->buf, NULL);
587bf215546Sopenharmony_ci         return false;
588bf215546Sopenharmony_ci      }
589bf215546Sopenharmony_ci   }
590bf215546Sopenharmony_ci
591bf215546Sopenharmony_ci   return true;
592bf215546Sopenharmony_ci}
593bf215546Sopenharmony_ci
594bf215546Sopenharmony_civoid si_query_hw_destroy(struct si_context *sctx, struct si_query *squery)
595bf215546Sopenharmony_ci{
596bf215546Sopenharmony_ci   struct si_query_hw *query = (struct si_query_hw *)squery;
597bf215546Sopenharmony_ci
598bf215546Sopenharmony_ci   si_query_buffer_destroy(sctx->screen, &query->buffer);
599bf215546Sopenharmony_ci   si_resource_reference(&query->workaround_buf, NULL);
600bf215546Sopenharmony_ci   FREE(squery);
601bf215546Sopenharmony_ci}
602bf215546Sopenharmony_ci
603bf215546Sopenharmony_cistatic bool si_query_hw_prepare_buffer(struct si_context *sctx, struct si_query_buffer *qbuf)
604bf215546Sopenharmony_ci{
605bf215546Sopenharmony_ci   struct si_query_hw *query = container_of(qbuf, struct si_query_hw, buffer);
606bf215546Sopenharmony_ci   struct si_screen *screen = sctx->screen;
607bf215546Sopenharmony_ci
608bf215546Sopenharmony_ci   /* The caller ensures that the buffer is currently unused by the GPU. */
609bf215546Sopenharmony_ci   uint32_t *results = screen->ws->buffer_map(sctx->ws, qbuf->buf->buf, NULL,
610bf215546Sopenharmony_ci                                              PIPE_MAP_WRITE | PIPE_MAP_UNSYNCHRONIZED);
611bf215546Sopenharmony_ci   if (!results)
612bf215546Sopenharmony_ci      return false;
613bf215546Sopenharmony_ci
614bf215546Sopenharmony_ci   memset(results, 0, qbuf->buf->b.b.width0);
615bf215546Sopenharmony_ci
616bf215546Sopenharmony_ci   if (query->b.type == PIPE_QUERY_OCCLUSION_COUNTER ||
617bf215546Sopenharmony_ci       query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE ||
618bf215546Sopenharmony_ci       query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE) {
619bf215546Sopenharmony_ci      unsigned max_rbs = screen->info.max_render_backends;
620bf215546Sopenharmony_ci      unsigned enabled_rb_mask = screen->info.enabled_rb_mask;
621bf215546Sopenharmony_ci      unsigned num_results;
622bf215546Sopenharmony_ci      unsigned i, j;
623bf215546Sopenharmony_ci
624bf215546Sopenharmony_ci      /* Set top bits for unused backends. */
625bf215546Sopenharmony_ci      num_results = qbuf->buf->b.b.width0 / query->result_size;
626bf215546Sopenharmony_ci      for (j = 0; j < num_results; j++) {
627bf215546Sopenharmony_ci         for (i = 0; i < max_rbs; i++) {
628bf215546Sopenharmony_ci            if (!(enabled_rb_mask & (1 << i))) {
629bf215546Sopenharmony_ci               results[(i * 4) + 1] = 0x80000000;
630bf215546Sopenharmony_ci               results[(i * 4) + 3] = 0x80000000;
631bf215546Sopenharmony_ci            }
632bf215546Sopenharmony_ci         }
633bf215546Sopenharmony_ci         results += 4 * max_rbs;
634bf215546Sopenharmony_ci      }
635bf215546Sopenharmony_ci   }
636bf215546Sopenharmony_ci
637bf215546Sopenharmony_ci   return true;
638bf215546Sopenharmony_ci}
639bf215546Sopenharmony_ci
640bf215546Sopenharmony_cistatic unsigned si_query_pipestats_num_results(struct si_screen *sscreen)
641bf215546Sopenharmony_ci{
642bf215546Sopenharmony_ci   return sscreen->info.gfx_level >= GFX11 ? 14 : 11;
643bf215546Sopenharmony_ci}
644bf215546Sopenharmony_ci
645bf215546Sopenharmony_cistatic unsigned si_query_pipestat_dw_offset(enum pipe_statistics_query_index index)
646bf215546Sopenharmony_ci{
647bf215546Sopenharmony_ci   switch (index) {
648bf215546Sopenharmony_ci   case PIPE_STAT_QUERY_PS_INVOCATIONS: return 0;
649bf215546Sopenharmony_ci   case PIPE_STAT_QUERY_C_PRIMITIVES: return 2;
650bf215546Sopenharmony_ci   case PIPE_STAT_QUERY_C_INVOCATIONS: return 4;
651bf215546Sopenharmony_ci   case PIPE_STAT_QUERY_VS_INVOCATIONS: return 6;
652bf215546Sopenharmony_ci   case PIPE_STAT_QUERY_GS_INVOCATIONS: return 8;
653bf215546Sopenharmony_ci   case PIPE_STAT_QUERY_GS_PRIMITIVES: return 10;
654bf215546Sopenharmony_ci   case PIPE_STAT_QUERY_IA_PRIMITIVES: return 12;
655bf215546Sopenharmony_ci   case PIPE_STAT_QUERY_IA_VERTICES: return 14;
656bf215546Sopenharmony_ci   case PIPE_STAT_QUERY_HS_INVOCATIONS: return 16;
657bf215546Sopenharmony_ci   case PIPE_STAT_QUERY_DS_INVOCATIONS: return 18;
658bf215546Sopenharmony_ci   case PIPE_STAT_QUERY_CS_INVOCATIONS: return 20;
659bf215546Sopenharmony_ci   /* gfx11: MS_INVOCATIONS */
660bf215546Sopenharmony_ci   /* gfx11: MS_PRIMITIVES */
661bf215546Sopenharmony_ci   /* gfx11: TS_INVOCATIONS */
662bf215546Sopenharmony_ci   default:
663bf215546Sopenharmony_ci      assert(false);
664bf215546Sopenharmony_ci   }
665bf215546Sopenharmony_ci   return ~0;
666bf215546Sopenharmony_ci}
667bf215546Sopenharmony_ci
668bf215546Sopenharmony_ciunsigned si_query_pipestat_end_dw_offset(struct si_screen *sscreen,
669bf215546Sopenharmony_ci                                         enum pipe_statistics_query_index index)
670bf215546Sopenharmony_ci{
671bf215546Sopenharmony_ci   return si_query_pipestats_num_results(sscreen) * 2 + si_query_pipestat_dw_offset(index);
672bf215546Sopenharmony_ci}
673bf215546Sopenharmony_ci
674bf215546Sopenharmony_cistatic void si_query_hw_get_result_resource(struct si_context *sctx, struct si_query *squery,
675bf215546Sopenharmony_ci                                            enum pipe_query_flags flags,
676bf215546Sopenharmony_ci                                            enum pipe_query_value_type result_type,
677bf215546Sopenharmony_ci                                            int index, struct pipe_resource *resource,
678bf215546Sopenharmony_ci                                            unsigned offset);
679bf215546Sopenharmony_ci
680bf215546Sopenharmony_cistatic void si_query_hw_do_emit_start(struct si_context *sctx, struct si_query_hw *query,
681bf215546Sopenharmony_ci                                      struct si_resource *buffer, uint64_t va);
682bf215546Sopenharmony_cistatic void si_query_hw_do_emit_stop(struct si_context *sctx, struct si_query_hw *query,
683bf215546Sopenharmony_ci                                     struct si_resource *buffer, uint64_t va);
684bf215546Sopenharmony_cistatic void si_query_hw_add_result(struct si_screen *sscreen, struct si_query_hw *, void *buffer,
685bf215546Sopenharmony_ci                                   union pipe_query_result *result);
686bf215546Sopenharmony_cistatic void si_query_hw_clear_result(struct si_query_hw *, union pipe_query_result *);
687bf215546Sopenharmony_ci
688bf215546Sopenharmony_cistatic struct si_query_hw_ops query_hw_default_hw_ops = {
689bf215546Sopenharmony_ci   .prepare_buffer = si_query_hw_prepare_buffer,
690bf215546Sopenharmony_ci   .emit_start = si_query_hw_do_emit_start,
691bf215546Sopenharmony_ci   .emit_stop = si_query_hw_do_emit_stop,
692bf215546Sopenharmony_ci   .clear_result = si_query_hw_clear_result,
693bf215546Sopenharmony_ci   .add_result = si_query_hw_add_result,
694bf215546Sopenharmony_ci};
695bf215546Sopenharmony_ci
696bf215546Sopenharmony_cistatic struct pipe_query *si_query_hw_create(struct si_screen *sscreen, unsigned query_type,
697bf215546Sopenharmony_ci                                             unsigned index)
698bf215546Sopenharmony_ci{
699bf215546Sopenharmony_ci   struct si_query_hw *query = CALLOC_STRUCT(si_query_hw);
700bf215546Sopenharmony_ci   if (!query)
701bf215546Sopenharmony_ci      return NULL;
702bf215546Sopenharmony_ci
703bf215546Sopenharmony_ci   query->b.type = query_type;
704bf215546Sopenharmony_ci   query->b.ops = &query_hw_ops;
705bf215546Sopenharmony_ci   query->ops = &query_hw_default_hw_ops;
706bf215546Sopenharmony_ci
707bf215546Sopenharmony_ci   switch (query_type) {
708bf215546Sopenharmony_ci   case PIPE_QUERY_OCCLUSION_COUNTER:
709bf215546Sopenharmony_ci   case PIPE_QUERY_OCCLUSION_PREDICATE:
710bf215546Sopenharmony_ci   case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
711bf215546Sopenharmony_ci      query->result_size = 16 * sscreen->info.max_render_backends;
712bf215546Sopenharmony_ci      query->result_size += 16; /* for the fence + alignment */
713bf215546Sopenharmony_ci      query->b.num_cs_dw_suspend = 6 + si_cp_write_fence_dwords(sscreen);
714bf215546Sopenharmony_ci      break;
715bf215546Sopenharmony_ci   case PIPE_QUERY_TIME_ELAPSED:
716bf215546Sopenharmony_ci      query->result_size = 24;
717bf215546Sopenharmony_ci      query->b.num_cs_dw_suspend = 8 + si_cp_write_fence_dwords(sscreen);
718bf215546Sopenharmony_ci      break;
719bf215546Sopenharmony_ci   case PIPE_QUERY_TIMESTAMP:
720bf215546Sopenharmony_ci      query->result_size = 16;
721bf215546Sopenharmony_ci      query->b.num_cs_dw_suspend = 8 + si_cp_write_fence_dwords(sscreen);
722bf215546Sopenharmony_ci      query->flags = SI_QUERY_HW_FLAG_NO_START;
723bf215546Sopenharmony_ci      break;
724bf215546Sopenharmony_ci   case PIPE_QUERY_PRIMITIVES_EMITTED:
725bf215546Sopenharmony_ci   case PIPE_QUERY_PRIMITIVES_GENERATED:
726bf215546Sopenharmony_ci   case PIPE_QUERY_SO_STATISTICS:
727bf215546Sopenharmony_ci   case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
728bf215546Sopenharmony_ci      /* NumPrimitivesWritten, PrimitiveStorageNeeded. */
729bf215546Sopenharmony_ci      query->result_size = 32;
730bf215546Sopenharmony_ci      query->b.num_cs_dw_suspend = 6;
731bf215546Sopenharmony_ci      query->stream = index;
732bf215546Sopenharmony_ci      break;
733bf215546Sopenharmony_ci   case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
734bf215546Sopenharmony_ci      /* NumPrimitivesWritten, PrimitiveStorageNeeded. */
735bf215546Sopenharmony_ci      query->result_size = 32 * SI_MAX_STREAMS;
736bf215546Sopenharmony_ci      query->b.num_cs_dw_suspend = 6 * SI_MAX_STREAMS;
737bf215546Sopenharmony_ci      break;
738bf215546Sopenharmony_ci   case PIPE_QUERY_PIPELINE_STATISTICS:
739bf215546Sopenharmony_ci      query->result_size = si_query_pipestats_num_results(sscreen) * 16;
740bf215546Sopenharmony_ci      query->result_size += 8; /* for the fence + alignment */
741bf215546Sopenharmony_ci      query->b.num_cs_dw_suspend = 6 + si_cp_write_fence_dwords(sscreen);
742bf215546Sopenharmony_ci      query->index = index;
743bf215546Sopenharmony_ci      if ((index == PIPE_STAT_QUERY_GS_PRIMITIVES || index == PIPE_STAT_QUERY_GS_INVOCATIONS) &&
744bf215546Sopenharmony_ci          sscreen->use_ngg && (sscreen->info.gfx_level >= GFX10 && sscreen->info.gfx_level <= GFX10_3))
745bf215546Sopenharmony_ci         query->flags |= SI_QUERY_EMULATE_GS_COUNTERS;
746bf215546Sopenharmony_ci      break;
747bf215546Sopenharmony_ci   default:
748bf215546Sopenharmony_ci      assert(0);
749bf215546Sopenharmony_ci      FREE(query);
750bf215546Sopenharmony_ci      return NULL;
751bf215546Sopenharmony_ci   }
752bf215546Sopenharmony_ci
753bf215546Sopenharmony_ci   return (struct pipe_query *)query;
754bf215546Sopenharmony_ci}
755bf215546Sopenharmony_ci
756bf215546Sopenharmony_cistatic void si_update_occlusion_query_state(struct si_context *sctx, unsigned type, int diff)
757bf215546Sopenharmony_ci{
758bf215546Sopenharmony_ci   if (type == PIPE_QUERY_OCCLUSION_COUNTER || type == PIPE_QUERY_OCCLUSION_PREDICATE ||
759bf215546Sopenharmony_ci       type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE) {
760bf215546Sopenharmony_ci      bool old_enable = sctx->num_occlusion_queries != 0;
761bf215546Sopenharmony_ci      bool old_perfect_enable = sctx->num_perfect_occlusion_queries != 0;
762bf215546Sopenharmony_ci      bool enable, perfect_enable;
763bf215546Sopenharmony_ci
764bf215546Sopenharmony_ci      sctx->num_occlusion_queries += diff;
765bf215546Sopenharmony_ci      assert(sctx->num_occlusion_queries >= 0);
766bf215546Sopenharmony_ci
767bf215546Sopenharmony_ci      if (type != PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE) {
768bf215546Sopenharmony_ci         sctx->num_perfect_occlusion_queries += diff;
769bf215546Sopenharmony_ci         assert(sctx->num_perfect_occlusion_queries >= 0);
770bf215546Sopenharmony_ci      }
771bf215546Sopenharmony_ci
772bf215546Sopenharmony_ci      enable = sctx->num_occlusion_queries != 0;
773bf215546Sopenharmony_ci      perfect_enable = sctx->num_perfect_occlusion_queries != 0;
774bf215546Sopenharmony_ci
775bf215546Sopenharmony_ci      if (enable != old_enable || perfect_enable != old_perfect_enable) {
776bf215546Sopenharmony_ci         si_set_occlusion_query_state(sctx, old_perfect_enable);
777bf215546Sopenharmony_ci      }
778bf215546Sopenharmony_ci   }
779bf215546Sopenharmony_ci}
780bf215546Sopenharmony_ci
781bf215546Sopenharmony_cistatic unsigned event_type_for_stream(unsigned stream)
782bf215546Sopenharmony_ci{
783bf215546Sopenharmony_ci   switch (stream) {
784bf215546Sopenharmony_ci   default:
785bf215546Sopenharmony_ci   case 0:
786bf215546Sopenharmony_ci      return V_028A90_SAMPLE_STREAMOUTSTATS;
787bf215546Sopenharmony_ci   case 1:
788bf215546Sopenharmony_ci      return V_028A90_SAMPLE_STREAMOUTSTATS1;
789bf215546Sopenharmony_ci   case 2:
790bf215546Sopenharmony_ci      return V_028A90_SAMPLE_STREAMOUTSTATS2;
791bf215546Sopenharmony_ci   case 3:
792bf215546Sopenharmony_ci      return V_028A90_SAMPLE_STREAMOUTSTATS3;
793bf215546Sopenharmony_ci   }
794bf215546Sopenharmony_ci}
795bf215546Sopenharmony_ci
796bf215546Sopenharmony_cistatic void emit_sample_streamout(struct radeon_cmdbuf *cs, uint64_t va, unsigned stream)
797bf215546Sopenharmony_ci{
798bf215546Sopenharmony_ci   radeon_begin(cs);
799bf215546Sopenharmony_ci   radeon_emit(PKT3(PKT3_EVENT_WRITE, 2, 0));
800bf215546Sopenharmony_ci   radeon_emit(EVENT_TYPE(event_type_for_stream(stream)) | EVENT_INDEX(3));
801bf215546Sopenharmony_ci   radeon_emit(va);
802bf215546Sopenharmony_ci   radeon_emit(va >> 32);
803bf215546Sopenharmony_ci   radeon_end();
804bf215546Sopenharmony_ci}
805bf215546Sopenharmony_ci
806bf215546Sopenharmony_cistatic void si_query_hw_do_emit_start(struct si_context *sctx, struct si_query_hw *query,
807bf215546Sopenharmony_ci                                      struct si_resource *buffer, uint64_t va)
808bf215546Sopenharmony_ci{
809bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = &sctx->gfx_cs;
810bf215546Sopenharmony_ci
811bf215546Sopenharmony_ci   switch (query->b.type) {
812bf215546Sopenharmony_ci   case PIPE_QUERY_OCCLUSION_COUNTER:
813bf215546Sopenharmony_ci   case PIPE_QUERY_OCCLUSION_PREDICATE:
814bf215546Sopenharmony_ci   case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: {
815bf215546Sopenharmony_ci      radeon_begin(cs);
816bf215546Sopenharmony_ci      if (sctx->gfx_level >= GFX11) {
817bf215546Sopenharmony_ci         uint64_t rb_mask = BITFIELD64_MASK(sctx->screen->info.max_render_backends);
818bf215546Sopenharmony_ci
819bf215546Sopenharmony_ci         radeon_emit(PKT3(PKT3_EVENT_WRITE, 2, 0));
820bf215546Sopenharmony_ci         radeon_emit(EVENT_TYPE(V_028A90_PIXEL_PIPE_STAT_CONTROL) | EVENT_INDEX(1));
821bf215546Sopenharmony_ci         radeon_emit(PIXEL_PIPE_STATE_CNTL_COUNTER_ID(0) |
822bf215546Sopenharmony_ci                     PIXEL_PIPE_STATE_CNTL_STRIDE(2) |
823bf215546Sopenharmony_ci                     PIXEL_PIPE_STATE_CNTL_INSTANCE_EN_LO(rb_mask));
824bf215546Sopenharmony_ci         radeon_emit(PIXEL_PIPE_STATE_CNTL_INSTANCE_EN_HI(rb_mask));
825bf215546Sopenharmony_ci      }
826bf215546Sopenharmony_ci
827bf215546Sopenharmony_ci      radeon_emit(PKT3(PKT3_EVENT_WRITE, 2, 0));
828bf215546Sopenharmony_ci      if (sctx->gfx_level >= GFX11)
829bf215546Sopenharmony_ci         radeon_emit(EVENT_TYPE(V_028A90_PIXEL_PIPE_STAT_DUMP) | EVENT_INDEX(1));
830bf215546Sopenharmony_ci      else
831bf215546Sopenharmony_ci         radeon_emit(EVENT_TYPE(V_028A90_ZPASS_DONE) | EVENT_INDEX(1));
832bf215546Sopenharmony_ci      radeon_emit(va);
833bf215546Sopenharmony_ci      radeon_emit(va >> 32);
834bf215546Sopenharmony_ci      radeon_end();
835bf215546Sopenharmony_ci      break;
836bf215546Sopenharmony_ci   }
837bf215546Sopenharmony_ci   case PIPE_QUERY_PRIMITIVES_EMITTED:
838bf215546Sopenharmony_ci   case PIPE_QUERY_PRIMITIVES_GENERATED:
839bf215546Sopenharmony_ci   case PIPE_QUERY_SO_STATISTICS:
840bf215546Sopenharmony_ci   case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
841bf215546Sopenharmony_ci      emit_sample_streamout(cs, va, query->stream);
842bf215546Sopenharmony_ci      break;
843bf215546Sopenharmony_ci   case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
844bf215546Sopenharmony_ci      for (unsigned stream = 0; stream < SI_MAX_STREAMS; ++stream)
845bf215546Sopenharmony_ci         emit_sample_streamout(cs, va + 32 * stream, stream);
846bf215546Sopenharmony_ci      break;
847bf215546Sopenharmony_ci   case PIPE_QUERY_TIME_ELAPSED:
848bf215546Sopenharmony_ci      si_cp_release_mem(sctx, cs, V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, EOP_INT_SEL_NONE,
849bf215546Sopenharmony_ci                        EOP_DATA_SEL_TIMESTAMP, NULL, va, 0, query->b.type);
850bf215546Sopenharmony_ci      break;
851bf215546Sopenharmony_ci   case PIPE_QUERY_PIPELINE_STATISTICS: {
852bf215546Sopenharmony_ci      if (sctx->screen->use_ngg && query->flags & SI_QUERY_EMULATE_GS_COUNTERS) {
853bf215546Sopenharmony_ci         /* The hw GS primitive counter doesn't work when ngg is active.
854bf215546Sopenharmony_ci          * So if use_ngg is true, we don't use the hw version but instead
855bf215546Sopenharmony_ci          * emulate it in the GS shader.
856bf215546Sopenharmony_ci          * The value is written at the same position, so we don't need to
857bf215546Sopenharmony_ci          * change anything else.
858bf215546Sopenharmony_ci          * If ngg is enabled for the draw, the primitive count is written in
859bf215546Sopenharmony_ci          * gfx10_ngg_gs_emit_epilogue. If ngg is disabled, the number of exported
860bf215546Sopenharmony_ci          * vertices is stored in gs_emitted_vertices and the number of prim
861bf215546Sopenharmony_ci          * is computed based on the output prim type in emit_gs_epilogue.
862bf215546Sopenharmony_ci          */
863bf215546Sopenharmony_ci         struct pipe_shader_buffer sbuf;
864bf215546Sopenharmony_ci         sbuf.buffer = &buffer->b.b;
865bf215546Sopenharmony_ci         sbuf.buffer_offset = query->buffer.results_end;
866bf215546Sopenharmony_ci         sbuf.buffer_size = buffer->bo_size;
867bf215546Sopenharmony_ci         si_set_internal_shader_buffer(sctx, SI_GS_QUERY_EMULATED_COUNTERS_BUF, &sbuf);
868bf215546Sopenharmony_ci         SET_FIELD(sctx->current_gs_state, GS_STATE_PIPELINE_STATS_EMU, 1);
869bf215546Sopenharmony_ci
870bf215546Sopenharmony_ci         const uint32_t zero = 0;
871bf215546Sopenharmony_ci         radeon_begin(cs);
872bf215546Sopenharmony_ci         /* Clear the emulated counter end value. We don't clear start because it's unused. */
873bf215546Sopenharmony_ci         va += si_query_pipestat_end_dw_offset(sctx->screen, query->index) * 4;
874bf215546Sopenharmony_ci         radeon_emit(PKT3(PKT3_WRITE_DATA, 2 + 1, 0));
875bf215546Sopenharmony_ci         radeon_emit(S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP));
876bf215546Sopenharmony_ci         radeon_emit(va);
877bf215546Sopenharmony_ci         radeon_emit(va >> 32);
878bf215546Sopenharmony_ci         radeon_emit(zero);
879bf215546Sopenharmony_ci         radeon_end();
880bf215546Sopenharmony_ci
881bf215546Sopenharmony_ci         sctx->num_pipeline_stat_emulated_queries++;
882bf215546Sopenharmony_ci      } else {
883bf215546Sopenharmony_ci         radeon_begin(cs);
884bf215546Sopenharmony_ci         radeon_emit(PKT3(PKT3_EVENT_WRITE, 2, 0));
885bf215546Sopenharmony_ci         radeon_emit(EVENT_TYPE(V_028A90_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
886bf215546Sopenharmony_ci         radeon_emit(va);
887bf215546Sopenharmony_ci         radeon_emit(va >> 32);
888bf215546Sopenharmony_ci         radeon_end();
889bf215546Sopenharmony_ci      }
890bf215546Sopenharmony_ci      break;
891bf215546Sopenharmony_ci   }
892bf215546Sopenharmony_ci   default:
893bf215546Sopenharmony_ci      assert(0);
894bf215546Sopenharmony_ci   }
895bf215546Sopenharmony_ci   radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, query->buffer.buf,
896bf215546Sopenharmony_ci                             RADEON_USAGE_WRITE | RADEON_PRIO_QUERY);
897bf215546Sopenharmony_ci}
898bf215546Sopenharmony_ci
899bf215546Sopenharmony_cistatic void si_query_hw_emit_start(struct si_context *sctx, struct si_query_hw *query)
900bf215546Sopenharmony_ci{
901bf215546Sopenharmony_ci   uint64_t va;
902bf215546Sopenharmony_ci
903bf215546Sopenharmony_ci   if (!query->buffer.buf && query->flags & SI_QUERY_EMULATE_GS_COUNTERS)
904bf215546Sopenharmony_ci      si_resource_reference(&query->buffer.buf, sctx->pipeline_stats_query_buf);
905bf215546Sopenharmony_ci
906bf215546Sopenharmony_ci   /* Don't realloc pipeline_stats_query_buf */
907bf215546Sopenharmony_ci   if ((!(query->flags & SI_QUERY_EMULATE_GS_COUNTERS) || !sctx->pipeline_stats_query_buf) &&
908bf215546Sopenharmony_ci       !si_query_buffer_alloc(sctx, &query->buffer, query->ops->prepare_buffer, query->result_size))
909bf215546Sopenharmony_ci      return;
910bf215546Sopenharmony_ci
911bf215546Sopenharmony_ci   if (query->flags & SI_QUERY_EMULATE_GS_COUNTERS)
912bf215546Sopenharmony_ci      si_resource_reference(&sctx->pipeline_stats_query_buf, query->buffer.buf);
913bf215546Sopenharmony_ci
914bf215546Sopenharmony_ci   si_update_occlusion_query_state(sctx, query->b.type, 1);
915bf215546Sopenharmony_ci   si_update_prims_generated_query_state(sctx, query->b.type, 1);
916bf215546Sopenharmony_ci
917bf215546Sopenharmony_ci   if (query->b.type == PIPE_QUERY_PIPELINE_STATISTICS)
918bf215546Sopenharmony_ci      sctx->num_pipeline_stat_queries++;
919bf215546Sopenharmony_ci
920bf215546Sopenharmony_ci   si_need_gfx_cs_space(sctx, 0);
921bf215546Sopenharmony_ci
922bf215546Sopenharmony_ci   va = query->buffer.buf->gpu_address + query->buffer.results_end;
923bf215546Sopenharmony_ci   query->ops->emit_start(sctx, query, query->buffer.buf, va);
924bf215546Sopenharmony_ci}
925bf215546Sopenharmony_ci
926bf215546Sopenharmony_cistatic void si_query_hw_do_emit_stop(struct si_context *sctx, struct si_query_hw *query,
927bf215546Sopenharmony_ci                                     struct si_resource *buffer, uint64_t va)
928bf215546Sopenharmony_ci{
929bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = &sctx->gfx_cs;
930bf215546Sopenharmony_ci   uint64_t fence_va = 0;
931bf215546Sopenharmony_ci
932bf215546Sopenharmony_ci   switch (query->b.type) {
933bf215546Sopenharmony_ci   case PIPE_QUERY_OCCLUSION_COUNTER:
934bf215546Sopenharmony_ci   case PIPE_QUERY_OCCLUSION_PREDICATE:
935bf215546Sopenharmony_ci   case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: {
936bf215546Sopenharmony_ci      va += 8;
937bf215546Sopenharmony_ci      radeon_begin(cs);
938bf215546Sopenharmony_ci      radeon_emit(PKT3(PKT3_EVENT_WRITE, 2, 0));
939bf215546Sopenharmony_ci      if (sctx->gfx_level >= GFX11)
940bf215546Sopenharmony_ci         radeon_emit(EVENT_TYPE(V_028A90_PIXEL_PIPE_STAT_DUMP) | EVENT_INDEX(1));
941bf215546Sopenharmony_ci      else
942bf215546Sopenharmony_ci         radeon_emit(EVENT_TYPE(V_028A90_ZPASS_DONE) | EVENT_INDEX(1));
943bf215546Sopenharmony_ci      radeon_emit(va);
944bf215546Sopenharmony_ci      radeon_emit(va >> 32);
945bf215546Sopenharmony_ci      radeon_end();
946bf215546Sopenharmony_ci
947bf215546Sopenharmony_ci      fence_va = va + sctx->screen->info.max_render_backends * 16 - 8;
948bf215546Sopenharmony_ci      break;
949bf215546Sopenharmony_ci   }
950bf215546Sopenharmony_ci   case PIPE_QUERY_PRIMITIVES_EMITTED:
951bf215546Sopenharmony_ci   case PIPE_QUERY_PRIMITIVES_GENERATED:
952bf215546Sopenharmony_ci   case PIPE_QUERY_SO_STATISTICS:
953bf215546Sopenharmony_ci   case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
954bf215546Sopenharmony_ci      va += 16;
955bf215546Sopenharmony_ci      emit_sample_streamout(cs, va, query->stream);
956bf215546Sopenharmony_ci      break;
957bf215546Sopenharmony_ci   case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
958bf215546Sopenharmony_ci      va += 16;
959bf215546Sopenharmony_ci      for (unsigned stream = 0; stream < SI_MAX_STREAMS; ++stream)
960bf215546Sopenharmony_ci         emit_sample_streamout(cs, va + 32 * stream, stream);
961bf215546Sopenharmony_ci      break;
962bf215546Sopenharmony_ci   case PIPE_QUERY_TIME_ELAPSED:
963bf215546Sopenharmony_ci      va += 8;
964bf215546Sopenharmony_ci      FALLTHROUGH;
965bf215546Sopenharmony_ci   case PIPE_QUERY_TIMESTAMP:
966bf215546Sopenharmony_ci      si_cp_release_mem(sctx, cs, V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, EOP_INT_SEL_NONE,
967bf215546Sopenharmony_ci                        EOP_DATA_SEL_TIMESTAMP, NULL, va, 0, query->b.type);
968bf215546Sopenharmony_ci      fence_va = va + 8;
969bf215546Sopenharmony_ci      break;
970bf215546Sopenharmony_ci   case PIPE_QUERY_PIPELINE_STATISTICS: {
971bf215546Sopenharmony_ci      unsigned sample_size = (query->result_size - 8) / 2;
972bf215546Sopenharmony_ci
973bf215546Sopenharmony_ci      va += sample_size;
974bf215546Sopenharmony_ci
975bf215546Sopenharmony_ci      radeon_begin(cs);
976bf215546Sopenharmony_ci      if (sctx->screen->use_ngg && query->flags & SI_QUERY_EMULATE_GS_COUNTERS) {
977bf215546Sopenharmony_ci         radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
978bf215546Sopenharmony_ci         radeon_emit(EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
979bf215546Sopenharmony_ci
980bf215546Sopenharmony_ci         if (--sctx->num_pipeline_stat_emulated_queries == 0) {
981bf215546Sopenharmony_ci            si_set_internal_shader_buffer(sctx, SI_GS_QUERY_BUF, NULL);
982bf215546Sopenharmony_ci            SET_FIELD(sctx->current_gs_state, GS_STATE_PIPELINE_STATS_EMU, 0);
983bf215546Sopenharmony_ci         }
984bf215546Sopenharmony_ci      } else {
985bf215546Sopenharmony_ci         radeon_emit(PKT3(PKT3_EVENT_WRITE, 2, 0));
986bf215546Sopenharmony_ci         radeon_emit(EVENT_TYPE(V_028A90_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
987bf215546Sopenharmony_ci         radeon_emit(va);
988bf215546Sopenharmony_ci         radeon_emit(va >> 32);
989bf215546Sopenharmony_ci      }
990bf215546Sopenharmony_ci      radeon_end();
991bf215546Sopenharmony_ci
992bf215546Sopenharmony_ci      fence_va = va + sample_size;
993bf215546Sopenharmony_ci      break;
994bf215546Sopenharmony_ci   }
995bf215546Sopenharmony_ci   default:
996bf215546Sopenharmony_ci      assert(0);
997bf215546Sopenharmony_ci   }
998bf215546Sopenharmony_ci   radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, query->buffer.buf,
999bf215546Sopenharmony_ci                             RADEON_USAGE_WRITE | RADEON_PRIO_QUERY);
1000bf215546Sopenharmony_ci
1001bf215546Sopenharmony_ci   if (fence_va) {
1002bf215546Sopenharmony_ci      si_cp_release_mem(sctx, cs, V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, EOP_INT_SEL_NONE,
1003bf215546Sopenharmony_ci                        EOP_DATA_SEL_VALUE_32BIT, query->buffer.buf, fence_va, 0x80000000,
1004bf215546Sopenharmony_ci                        query->b.type);
1005bf215546Sopenharmony_ci   }
1006bf215546Sopenharmony_ci}
1007bf215546Sopenharmony_ci
1008bf215546Sopenharmony_cistatic void si_query_hw_emit_stop(struct si_context *sctx, struct si_query_hw *query)
1009bf215546Sopenharmony_ci{
1010bf215546Sopenharmony_ci   uint64_t va;
1011bf215546Sopenharmony_ci
1012bf215546Sopenharmony_ci   /* The queries which need begin already called this in begin_query. */
1013bf215546Sopenharmony_ci   if (query->flags & SI_QUERY_HW_FLAG_NO_START) {
1014bf215546Sopenharmony_ci      si_need_gfx_cs_space(sctx, 0);
1015bf215546Sopenharmony_ci      if (!si_query_buffer_alloc(sctx, &query->buffer, query->ops->prepare_buffer,
1016bf215546Sopenharmony_ci                                 query->result_size))
1017bf215546Sopenharmony_ci         return;
1018bf215546Sopenharmony_ci   }
1019bf215546Sopenharmony_ci
1020bf215546Sopenharmony_ci   if (!query->buffer.buf)
1021bf215546Sopenharmony_ci      return; // previous buffer allocation failure
1022bf215546Sopenharmony_ci
1023bf215546Sopenharmony_ci   /* emit end query */
1024bf215546Sopenharmony_ci   va = query->buffer.buf->gpu_address + query->buffer.results_end;
1025bf215546Sopenharmony_ci
1026bf215546Sopenharmony_ci   query->ops->emit_stop(sctx, query, query->buffer.buf, va);
1027bf215546Sopenharmony_ci
1028bf215546Sopenharmony_ci   query->buffer.results_end += query->result_size;
1029bf215546Sopenharmony_ci
1030bf215546Sopenharmony_ci   si_update_occlusion_query_state(sctx, query->b.type, -1);
1031bf215546Sopenharmony_ci   si_update_prims_generated_query_state(sctx, query->b.type, -1);
1032bf215546Sopenharmony_ci
1033bf215546Sopenharmony_ci   if (query->b.type == PIPE_QUERY_PIPELINE_STATISTICS)
1034bf215546Sopenharmony_ci      sctx->num_pipeline_stat_queries--;
1035bf215546Sopenharmony_ci}
1036bf215546Sopenharmony_ci
1037bf215546Sopenharmony_cistatic void emit_set_predicate(struct si_context *ctx, struct si_resource *buf, uint64_t va,
1038bf215546Sopenharmony_ci                               uint32_t op)
1039bf215546Sopenharmony_ci{
1040bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = &ctx->gfx_cs;
1041bf215546Sopenharmony_ci
1042bf215546Sopenharmony_ci   radeon_begin(cs);
1043bf215546Sopenharmony_ci
1044bf215546Sopenharmony_ci   if (ctx->gfx_level >= GFX9) {
1045bf215546Sopenharmony_ci      radeon_emit(PKT3(PKT3_SET_PREDICATION, 2, 0));
1046bf215546Sopenharmony_ci      radeon_emit(op);
1047bf215546Sopenharmony_ci      radeon_emit(va);
1048bf215546Sopenharmony_ci      radeon_emit(va >> 32);
1049bf215546Sopenharmony_ci   } else {
1050bf215546Sopenharmony_ci      radeon_emit(PKT3(PKT3_SET_PREDICATION, 1, 0));
1051bf215546Sopenharmony_ci      radeon_emit(va);
1052bf215546Sopenharmony_ci      radeon_emit(op | ((va >> 32) & 0xFF));
1053bf215546Sopenharmony_ci   }
1054bf215546Sopenharmony_ci   radeon_end();
1055bf215546Sopenharmony_ci
1056bf215546Sopenharmony_ci   radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, buf, RADEON_USAGE_READ | RADEON_PRIO_QUERY);
1057bf215546Sopenharmony_ci}
1058bf215546Sopenharmony_ci
1059bf215546Sopenharmony_cistatic void si_emit_query_predication(struct si_context *ctx)
1060bf215546Sopenharmony_ci{
1061bf215546Sopenharmony_ci   uint32_t op;
1062bf215546Sopenharmony_ci   bool flag_wait, invert;
1063bf215546Sopenharmony_ci
1064bf215546Sopenharmony_ci   struct si_query_hw *query = (struct si_query_hw *)ctx->render_cond;
1065bf215546Sopenharmony_ci   if (!query)
1066bf215546Sopenharmony_ci      return;
1067bf215546Sopenharmony_ci
1068bf215546Sopenharmony_ci   invert = ctx->render_cond_invert;
1069bf215546Sopenharmony_ci   flag_wait = ctx->render_cond_mode == PIPE_RENDER_COND_WAIT ||
1070bf215546Sopenharmony_ci               ctx->render_cond_mode == PIPE_RENDER_COND_BY_REGION_WAIT;
1071bf215546Sopenharmony_ci
1072bf215546Sopenharmony_ci   if (ctx->screen->use_ngg_streamout && (query->b.type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
1073bf215546Sopenharmony_ci                                          query->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)) {
1074bf215546Sopenharmony_ci      struct gfx10_sh_query *gfx10_query = (struct gfx10_sh_query *)query;
1075bf215546Sopenharmony_ci      struct gfx10_sh_query_buffer *qbuf, *first, *last;
1076bf215546Sopenharmony_ci
1077bf215546Sopenharmony_ci      op = PRED_OP(PREDICATION_OP_PRIMCOUNT);
1078bf215546Sopenharmony_ci
1079bf215546Sopenharmony_ci      /* if true then invert, see GL_ARB_conditional_render_inverted */
1080bf215546Sopenharmony_ci      if (!invert)
1081bf215546Sopenharmony_ci         op |= PREDICATION_DRAW_NOT_VISIBLE; /* Draw if not visible or overflow */
1082bf215546Sopenharmony_ci      else
1083bf215546Sopenharmony_ci         op |= PREDICATION_DRAW_VISIBLE; /* Draw if visible or no overflow */
1084bf215546Sopenharmony_ci
1085bf215546Sopenharmony_ci      op |= flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW;
1086bf215546Sopenharmony_ci
1087bf215546Sopenharmony_ci      first = gfx10_query->first;
1088bf215546Sopenharmony_ci      last = gfx10_query->last;
1089bf215546Sopenharmony_ci
1090bf215546Sopenharmony_ci      while (first) {
1091bf215546Sopenharmony_ci         qbuf = first;
1092bf215546Sopenharmony_ci         if (first != last)
1093bf215546Sopenharmony_ci            first = list_entry(qbuf->list.next, struct gfx10_sh_query_buffer, list);
1094bf215546Sopenharmony_ci         else
1095bf215546Sopenharmony_ci            first = NULL;
1096bf215546Sopenharmony_ci
1097bf215546Sopenharmony_ci         unsigned results_base = gfx10_query->first_begin;
1098bf215546Sopenharmony_ci         uint64_t va_base = qbuf->buf->gpu_address;
1099bf215546Sopenharmony_ci         uint64_t va = va_base + results_base;
1100bf215546Sopenharmony_ci
1101bf215546Sopenharmony_ci         unsigned begin = qbuf == gfx10_query->first ? gfx10_query->first_begin : 0;
1102bf215546Sopenharmony_ci         unsigned end = qbuf == gfx10_query->last ? gfx10_query->last_end : qbuf->buf->b.b.width0;
1103bf215546Sopenharmony_ci
1104bf215546Sopenharmony_ci         unsigned count = (end - begin) / sizeof(struct gfx10_sh_query_buffer_mem);
1105bf215546Sopenharmony_ci         do {
1106bf215546Sopenharmony_ci            if (gfx10_query->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {
1107bf215546Sopenharmony_ci               for (unsigned stream = 0; stream < SI_MAX_STREAMS; ++stream) {
1108bf215546Sopenharmony_ci                  emit_set_predicate(ctx, qbuf->buf, va + 4 * sizeof(uint64_t) * stream, op);
1109bf215546Sopenharmony_ci
1110bf215546Sopenharmony_ci                  /* set CONTINUE bit for all packets except the first */
1111bf215546Sopenharmony_ci                  op |= PREDICATION_CONTINUE;
1112bf215546Sopenharmony_ci               }
1113bf215546Sopenharmony_ci            } else {
1114bf215546Sopenharmony_ci               emit_set_predicate(ctx, qbuf->buf, va + 4 * sizeof(uint64_t) * gfx10_query->stream, op);
1115bf215546Sopenharmony_ci               op |= PREDICATION_CONTINUE;
1116bf215546Sopenharmony_ci            }
1117bf215546Sopenharmony_ci
1118bf215546Sopenharmony_ci            results_base += sizeof(struct gfx10_sh_query_buffer_mem);
1119bf215546Sopenharmony_ci         } while (count--);
1120bf215546Sopenharmony_ci      }
1121bf215546Sopenharmony_ci   } else {
1122bf215546Sopenharmony_ci      struct si_query_buffer *qbuf;
1123bf215546Sopenharmony_ci
1124bf215546Sopenharmony_ci      if (query->workaround_buf) {
1125bf215546Sopenharmony_ci         op = PRED_OP(PREDICATION_OP_BOOL64);
1126bf215546Sopenharmony_ci      } else {
1127bf215546Sopenharmony_ci         switch (query->b.type) {
1128bf215546Sopenharmony_ci         case PIPE_QUERY_OCCLUSION_COUNTER:
1129bf215546Sopenharmony_ci         case PIPE_QUERY_OCCLUSION_PREDICATE:
1130bf215546Sopenharmony_ci         case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
1131bf215546Sopenharmony_ci            op = PRED_OP(PREDICATION_OP_ZPASS);
1132bf215546Sopenharmony_ci            break;
1133bf215546Sopenharmony_ci         case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
1134bf215546Sopenharmony_ci         case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
1135bf215546Sopenharmony_ci            op = PRED_OP(PREDICATION_OP_PRIMCOUNT);
1136bf215546Sopenharmony_ci            invert = !invert;
1137bf215546Sopenharmony_ci            break;
1138bf215546Sopenharmony_ci         default:
1139bf215546Sopenharmony_ci            assert(0);
1140bf215546Sopenharmony_ci            return;
1141bf215546Sopenharmony_ci         }
1142bf215546Sopenharmony_ci      }
1143bf215546Sopenharmony_ci
1144bf215546Sopenharmony_ci      /* if true then invert, see GL_ARB_conditional_render_inverted */
1145bf215546Sopenharmony_ci      if (invert)
1146bf215546Sopenharmony_ci         op |= PREDICATION_DRAW_NOT_VISIBLE; /* Draw if not visible or overflow */
1147bf215546Sopenharmony_ci      else
1148bf215546Sopenharmony_ci         op |= PREDICATION_DRAW_VISIBLE; /* Draw if visible or no overflow */
1149bf215546Sopenharmony_ci
1150bf215546Sopenharmony_ci      /* Use the value written by compute shader as a workaround. Note that
1151bf215546Sopenharmony_ci       * the wait flag does not apply in this predication mode.
1152bf215546Sopenharmony_ci       *
1153bf215546Sopenharmony_ci       * The shader outputs the result value to L2. Workarounds only affect GFX8
1154bf215546Sopenharmony_ci       * and later, where the CP reads data from L2, so we don't need an
1155bf215546Sopenharmony_ci       * additional flush.
1156bf215546Sopenharmony_ci       */
1157bf215546Sopenharmony_ci      if (query->workaround_buf) {
1158bf215546Sopenharmony_ci         uint64_t va = query->workaround_buf->gpu_address + query->workaround_offset;
1159bf215546Sopenharmony_ci         emit_set_predicate(ctx, query->workaround_buf, va, op);
1160bf215546Sopenharmony_ci         return;
1161bf215546Sopenharmony_ci      }
1162bf215546Sopenharmony_ci
1163bf215546Sopenharmony_ci      op |= flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW;
1164bf215546Sopenharmony_ci
1165bf215546Sopenharmony_ci      /* emit predicate packets for all data blocks */
1166bf215546Sopenharmony_ci      for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) {
1167bf215546Sopenharmony_ci         unsigned results_base = 0;
1168bf215546Sopenharmony_ci         uint64_t va_base = qbuf->buf->gpu_address;
1169bf215546Sopenharmony_ci
1170bf215546Sopenharmony_ci         while (results_base < qbuf->results_end) {
1171bf215546Sopenharmony_ci            uint64_t va = va_base + results_base;
1172bf215546Sopenharmony_ci
1173bf215546Sopenharmony_ci            if (query->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {
1174bf215546Sopenharmony_ci               for (unsigned stream = 0; stream < SI_MAX_STREAMS; ++stream) {
1175bf215546Sopenharmony_ci                  emit_set_predicate(ctx, qbuf->buf, va + 32 * stream, op);
1176bf215546Sopenharmony_ci
1177bf215546Sopenharmony_ci                  /* set CONTINUE bit for all packets except the first */
1178bf215546Sopenharmony_ci                  op |= PREDICATION_CONTINUE;
1179bf215546Sopenharmony_ci               }
1180bf215546Sopenharmony_ci            } else {
1181bf215546Sopenharmony_ci               emit_set_predicate(ctx, qbuf->buf, va, op);
1182bf215546Sopenharmony_ci               op |= PREDICATION_CONTINUE;
1183bf215546Sopenharmony_ci            }
1184bf215546Sopenharmony_ci
1185bf215546Sopenharmony_ci            results_base += query->result_size;
1186bf215546Sopenharmony_ci         }
1187bf215546Sopenharmony_ci      }
1188bf215546Sopenharmony_ci   }
1189bf215546Sopenharmony_ci}
1190bf215546Sopenharmony_ci
1191bf215546Sopenharmony_cistatic struct pipe_query *si_create_query(struct pipe_context *ctx, unsigned query_type,
1192bf215546Sopenharmony_ci                                          unsigned index)
1193bf215546Sopenharmony_ci{
1194bf215546Sopenharmony_ci   struct si_screen *sscreen = (struct si_screen *)ctx->screen;
1195bf215546Sopenharmony_ci
1196bf215546Sopenharmony_ci   if (query_type == PIPE_QUERY_TIMESTAMP_DISJOINT || query_type == PIPE_QUERY_GPU_FINISHED ||
1197bf215546Sopenharmony_ci       (query_type >= PIPE_QUERY_DRIVER_SPECIFIC))
1198bf215546Sopenharmony_ci      return si_query_sw_create(query_type);
1199bf215546Sopenharmony_ci
1200bf215546Sopenharmony_ci   if (sscreen->use_ngg_streamout &&
1201bf215546Sopenharmony_ci       (query_type == PIPE_QUERY_PRIMITIVES_EMITTED ||
1202bf215546Sopenharmony_ci        query_type == PIPE_QUERY_PRIMITIVES_GENERATED || query_type == PIPE_QUERY_SO_STATISTICS ||
1203bf215546Sopenharmony_ci        query_type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
1204bf215546Sopenharmony_ci        query_type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE))
1205bf215546Sopenharmony_ci      return gfx10_sh_query_create(sscreen, query_type, index);
1206bf215546Sopenharmony_ci
1207bf215546Sopenharmony_ci   return si_query_hw_create(sscreen, query_type, index);
1208bf215546Sopenharmony_ci}
1209bf215546Sopenharmony_ci
1210bf215546Sopenharmony_cistatic void si_destroy_query(struct pipe_context *ctx, struct pipe_query *query)
1211bf215546Sopenharmony_ci{
1212bf215546Sopenharmony_ci   struct si_context *sctx = (struct si_context *)ctx;
1213bf215546Sopenharmony_ci   struct si_query *squery = (struct si_query *)query;
1214bf215546Sopenharmony_ci
1215bf215546Sopenharmony_ci   squery->ops->destroy(sctx, squery);
1216bf215546Sopenharmony_ci}
1217bf215546Sopenharmony_ci
1218bf215546Sopenharmony_cistatic bool si_begin_query(struct pipe_context *ctx, struct pipe_query *query)
1219bf215546Sopenharmony_ci{
1220bf215546Sopenharmony_ci   struct si_context *sctx = (struct si_context *)ctx;
1221bf215546Sopenharmony_ci   struct si_query *squery = (struct si_query *)query;
1222bf215546Sopenharmony_ci
1223bf215546Sopenharmony_ci   return squery->ops->begin(sctx, squery);
1224bf215546Sopenharmony_ci}
1225bf215546Sopenharmony_ci
1226bf215546Sopenharmony_cibool si_query_hw_begin(struct si_context *sctx, struct si_query *squery)
1227bf215546Sopenharmony_ci{
1228bf215546Sopenharmony_ci   struct si_query_hw *query = (struct si_query_hw *)squery;
1229bf215546Sopenharmony_ci
1230bf215546Sopenharmony_ci   if (query->flags & SI_QUERY_HW_FLAG_NO_START) {
1231bf215546Sopenharmony_ci      assert(0);
1232bf215546Sopenharmony_ci      return false;
1233bf215546Sopenharmony_ci   }
1234bf215546Sopenharmony_ci
1235bf215546Sopenharmony_ci   if (!(query->flags & SI_QUERY_HW_FLAG_BEGIN_RESUMES))
1236bf215546Sopenharmony_ci      si_query_buffer_reset(sctx, &query->buffer);
1237bf215546Sopenharmony_ci
1238bf215546Sopenharmony_ci   si_resource_reference(&query->workaround_buf, NULL);
1239bf215546Sopenharmony_ci
1240bf215546Sopenharmony_ci   si_query_hw_emit_start(sctx, query);
1241bf215546Sopenharmony_ci   if (!query->buffer.buf)
1242bf215546Sopenharmony_ci      return false;
1243bf215546Sopenharmony_ci
1244bf215546Sopenharmony_ci   list_addtail(&query->b.active_list, &sctx->active_queries);
1245bf215546Sopenharmony_ci   sctx->num_cs_dw_queries_suspend += query->b.num_cs_dw_suspend;
1246bf215546Sopenharmony_ci   return true;
1247bf215546Sopenharmony_ci}
1248bf215546Sopenharmony_ci
1249bf215546Sopenharmony_cistatic bool si_end_query(struct pipe_context *ctx, struct pipe_query *query)
1250bf215546Sopenharmony_ci{
1251bf215546Sopenharmony_ci   struct si_context *sctx = (struct si_context *)ctx;
1252bf215546Sopenharmony_ci   struct si_query *squery = (struct si_query *)query;
1253bf215546Sopenharmony_ci
1254bf215546Sopenharmony_ci   return squery->ops->end(sctx, squery);
1255bf215546Sopenharmony_ci}
1256bf215546Sopenharmony_ci
1257bf215546Sopenharmony_cibool si_query_hw_end(struct si_context *sctx, struct si_query *squery)
1258bf215546Sopenharmony_ci{
1259bf215546Sopenharmony_ci   struct si_query_hw *query = (struct si_query_hw *)squery;
1260bf215546Sopenharmony_ci
1261bf215546Sopenharmony_ci   if (query->flags & SI_QUERY_HW_FLAG_NO_START)
1262bf215546Sopenharmony_ci      si_query_buffer_reset(sctx, &query->buffer);
1263bf215546Sopenharmony_ci
1264bf215546Sopenharmony_ci   si_query_hw_emit_stop(sctx, query);
1265bf215546Sopenharmony_ci
1266bf215546Sopenharmony_ci   if (!(query->flags & SI_QUERY_HW_FLAG_NO_START)) {
1267bf215546Sopenharmony_ci      list_delinit(&query->b.active_list);
1268bf215546Sopenharmony_ci      sctx->num_cs_dw_queries_suspend -= query->b.num_cs_dw_suspend;
1269bf215546Sopenharmony_ci   }
1270bf215546Sopenharmony_ci
1271bf215546Sopenharmony_ci   if (!query->buffer.buf)
1272bf215546Sopenharmony_ci      return false;
1273bf215546Sopenharmony_ci
1274bf215546Sopenharmony_ci   return true;
1275bf215546Sopenharmony_ci}
1276bf215546Sopenharmony_ci
1277bf215546Sopenharmony_cistatic void si_get_hw_query_params(struct si_context *sctx, struct si_query_hw *squery, int index,
1278bf215546Sopenharmony_ci                                   struct si_hw_query_params *params)
1279bf215546Sopenharmony_ci{
1280bf215546Sopenharmony_ci   unsigned max_rbs = sctx->screen->info.max_render_backends;
1281bf215546Sopenharmony_ci
1282bf215546Sopenharmony_ci   params->pair_stride = 0;
1283bf215546Sopenharmony_ci   params->pair_count = 1;
1284bf215546Sopenharmony_ci
1285bf215546Sopenharmony_ci   switch (squery->b.type) {
1286bf215546Sopenharmony_ci   case PIPE_QUERY_OCCLUSION_COUNTER:
1287bf215546Sopenharmony_ci   case PIPE_QUERY_OCCLUSION_PREDICATE:
1288bf215546Sopenharmony_ci   case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
1289bf215546Sopenharmony_ci      params->start_offset = 0;
1290bf215546Sopenharmony_ci      params->end_offset = 8;
1291bf215546Sopenharmony_ci      params->fence_offset = max_rbs * 16;
1292bf215546Sopenharmony_ci      params->pair_stride = 16;
1293bf215546Sopenharmony_ci      params->pair_count = max_rbs;
1294bf215546Sopenharmony_ci      break;
1295bf215546Sopenharmony_ci   case PIPE_QUERY_TIME_ELAPSED:
1296bf215546Sopenharmony_ci      params->start_offset = 0;
1297bf215546Sopenharmony_ci      params->end_offset = 8;
1298bf215546Sopenharmony_ci      params->fence_offset = 16;
1299bf215546Sopenharmony_ci      break;
1300bf215546Sopenharmony_ci   case PIPE_QUERY_TIMESTAMP:
1301bf215546Sopenharmony_ci      params->start_offset = 0;
1302bf215546Sopenharmony_ci      params->end_offset = 0;
1303bf215546Sopenharmony_ci      params->fence_offset = 8;
1304bf215546Sopenharmony_ci      break;
1305bf215546Sopenharmony_ci   case PIPE_QUERY_PRIMITIVES_EMITTED:
1306bf215546Sopenharmony_ci      params->start_offset = 8;
1307bf215546Sopenharmony_ci      params->end_offset = 24;
1308bf215546Sopenharmony_ci      params->fence_offset = params->end_offset + 4;
1309bf215546Sopenharmony_ci      break;
1310bf215546Sopenharmony_ci   case PIPE_QUERY_PRIMITIVES_GENERATED:
1311bf215546Sopenharmony_ci      params->start_offset = 0;
1312bf215546Sopenharmony_ci      params->end_offset = 16;
1313bf215546Sopenharmony_ci      params->fence_offset = params->end_offset + 4;
1314bf215546Sopenharmony_ci      break;
1315bf215546Sopenharmony_ci   case PIPE_QUERY_SO_STATISTICS:
1316bf215546Sopenharmony_ci      params->start_offset = 8 - index * 8;
1317bf215546Sopenharmony_ci      params->end_offset = 24 - index * 8;
1318bf215546Sopenharmony_ci      params->fence_offset = params->end_offset + 4;
1319bf215546Sopenharmony_ci      break;
1320bf215546Sopenharmony_ci   case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
1321bf215546Sopenharmony_ci      params->pair_count = SI_MAX_STREAMS;
1322bf215546Sopenharmony_ci      params->pair_stride = 32;
1323bf215546Sopenharmony_ci      FALLTHROUGH;
1324bf215546Sopenharmony_ci   case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
1325bf215546Sopenharmony_ci      params->start_offset = 0;
1326bf215546Sopenharmony_ci      params->end_offset = 16;
1327bf215546Sopenharmony_ci
1328bf215546Sopenharmony_ci      /* We can re-use the high dword of the last 64-bit value as a
1329bf215546Sopenharmony_ci       * fence: it is initialized as 0, and the high bit is set by
1330bf215546Sopenharmony_ci       * the write of the streamout stats event.
1331bf215546Sopenharmony_ci       */
1332bf215546Sopenharmony_ci      params->fence_offset = squery->result_size - 4;
1333bf215546Sopenharmony_ci      break;
1334bf215546Sopenharmony_ci   case PIPE_QUERY_PIPELINE_STATISTICS: {
1335bf215546Sopenharmony_ci      params->start_offset = si_query_pipestat_dw_offset(index) * 4;
1336bf215546Sopenharmony_ci      params->end_offset = si_query_pipestat_end_dw_offset(sctx->screen, index) * 4;
1337bf215546Sopenharmony_ci      params->fence_offset = si_query_pipestats_num_results(sctx->screen) * 16;
1338bf215546Sopenharmony_ci      break;
1339bf215546Sopenharmony_ci   }
1340bf215546Sopenharmony_ci   default:
1341bf215546Sopenharmony_ci      unreachable("si_get_hw_query_params unsupported");
1342bf215546Sopenharmony_ci   }
1343bf215546Sopenharmony_ci}
1344bf215546Sopenharmony_ci
1345bf215546Sopenharmony_cistatic unsigned si_query_read_result(void *map, unsigned start_index, unsigned end_index,
1346bf215546Sopenharmony_ci                                     bool test_status_bit)
1347bf215546Sopenharmony_ci{
1348bf215546Sopenharmony_ci   uint32_t *current_result = (uint32_t *)map;
1349bf215546Sopenharmony_ci   uint64_t start, end;
1350bf215546Sopenharmony_ci
1351bf215546Sopenharmony_ci   start = (uint64_t)current_result[start_index] | (uint64_t)current_result[start_index + 1] << 32;
1352bf215546Sopenharmony_ci   end = (uint64_t)current_result[end_index] | (uint64_t)current_result[end_index + 1] << 32;
1353bf215546Sopenharmony_ci
1354bf215546Sopenharmony_ci   if (!test_status_bit || ((start & 0x8000000000000000UL) && (end & 0x8000000000000000UL))) {
1355bf215546Sopenharmony_ci      return end - start;
1356bf215546Sopenharmony_ci   }
1357bf215546Sopenharmony_ci   return 0;
1358bf215546Sopenharmony_ci}
1359bf215546Sopenharmony_ci
1360bf215546Sopenharmony_cistatic void si_query_hw_add_result(struct si_screen *sscreen, struct si_query_hw *query,
1361bf215546Sopenharmony_ci                                   void *buffer, union pipe_query_result *result)
1362bf215546Sopenharmony_ci{
1363bf215546Sopenharmony_ci   unsigned max_rbs = sscreen->info.max_render_backends;
1364bf215546Sopenharmony_ci
1365bf215546Sopenharmony_ci   switch (query->b.type) {
1366bf215546Sopenharmony_ci   case PIPE_QUERY_OCCLUSION_COUNTER: {
1367bf215546Sopenharmony_ci      for (unsigned i = 0; i < max_rbs; ++i) {
1368bf215546Sopenharmony_ci         unsigned results_base = i * 16;
1369bf215546Sopenharmony_ci         result->u64 += si_query_read_result(buffer + results_base, 0, 2, true);
1370bf215546Sopenharmony_ci      }
1371bf215546Sopenharmony_ci      break;
1372bf215546Sopenharmony_ci   }
1373bf215546Sopenharmony_ci   case PIPE_QUERY_OCCLUSION_PREDICATE:
1374bf215546Sopenharmony_ci   case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: {
1375bf215546Sopenharmony_ci      for (unsigned i = 0; i < max_rbs; ++i) {
1376bf215546Sopenharmony_ci         unsigned results_base = i * 16;
1377bf215546Sopenharmony_ci         result->b = result->b || si_query_read_result(buffer + results_base, 0, 2, true) != 0;
1378bf215546Sopenharmony_ci      }
1379bf215546Sopenharmony_ci      break;
1380bf215546Sopenharmony_ci   }
1381bf215546Sopenharmony_ci   case PIPE_QUERY_TIME_ELAPSED:
1382bf215546Sopenharmony_ci      result->u64 += si_query_read_result(buffer, 0, 2, false);
1383bf215546Sopenharmony_ci      break;
1384bf215546Sopenharmony_ci   case PIPE_QUERY_TIMESTAMP:
1385bf215546Sopenharmony_ci      result->u64 = *(uint64_t *)buffer;
1386bf215546Sopenharmony_ci      break;
1387bf215546Sopenharmony_ci   case PIPE_QUERY_PRIMITIVES_EMITTED:
1388bf215546Sopenharmony_ci      /* SAMPLE_STREAMOUTSTATS stores this structure:
1389bf215546Sopenharmony_ci       * {
1390bf215546Sopenharmony_ci       *    u64 NumPrimitivesWritten;
1391bf215546Sopenharmony_ci       *    u64 PrimitiveStorageNeeded;
1392bf215546Sopenharmony_ci       * }
1393bf215546Sopenharmony_ci       * We only need NumPrimitivesWritten here. */
1394bf215546Sopenharmony_ci      result->u64 += si_query_read_result(buffer, 2, 6, true);
1395bf215546Sopenharmony_ci      break;
1396bf215546Sopenharmony_ci   case PIPE_QUERY_PRIMITIVES_GENERATED:
1397bf215546Sopenharmony_ci      /* Here we read PrimitiveStorageNeeded. */
1398bf215546Sopenharmony_ci      result->u64 += si_query_read_result(buffer, 0, 4, true);
1399bf215546Sopenharmony_ci      break;
1400bf215546Sopenharmony_ci   case PIPE_QUERY_SO_STATISTICS:
1401bf215546Sopenharmony_ci      result->so_statistics.num_primitives_written += si_query_read_result(buffer, 2, 6, true);
1402bf215546Sopenharmony_ci      result->so_statistics.primitives_storage_needed += si_query_read_result(buffer, 0, 4, true);
1403bf215546Sopenharmony_ci      break;
1404bf215546Sopenharmony_ci   case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
1405bf215546Sopenharmony_ci      result->b = result->b || si_query_read_result(buffer, 2, 6, true) !=
1406bf215546Sopenharmony_ci                                  si_query_read_result(buffer, 0, 4, true);
1407bf215546Sopenharmony_ci      break;
1408bf215546Sopenharmony_ci   case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
1409bf215546Sopenharmony_ci      for (unsigned stream = 0; stream < SI_MAX_STREAMS; ++stream) {
1410bf215546Sopenharmony_ci         result->b = result->b || si_query_read_result(buffer, 2, 6, true) !=
1411bf215546Sopenharmony_ci                                     si_query_read_result(buffer, 0, 4, true);
1412bf215546Sopenharmony_ci         buffer = (char *)buffer + 32;
1413bf215546Sopenharmony_ci      }
1414bf215546Sopenharmony_ci      break;
1415bf215546Sopenharmony_ci   case PIPE_QUERY_PIPELINE_STATISTICS:
1416bf215546Sopenharmony_ci      for (int i = 0; i < 11; i++) {
1417bf215546Sopenharmony_ci         result->pipeline_statistics.counters[i] +=
1418bf215546Sopenharmony_ci            si_query_read_result(buffer, si_query_pipestat_dw_offset(i),
1419bf215546Sopenharmony_ci                                 si_query_pipestat_end_dw_offset(sscreen, i), false);
1420bf215546Sopenharmony_ci      }
1421bf215546Sopenharmony_ci#if 0 /* for testing */
1422bf215546Sopenharmony_ci      printf("Pipeline stats: IA verts=%llu, IA prims=%llu, VS=%llu, HS=%llu, "
1423bf215546Sopenharmony_ci             "DS=%llu, GS=%llu, GS prims=%llu, Clipper=%llu, "
1424bf215546Sopenharmony_ci             "Clipper prims=%llu, PS=%llu, CS=%llu\n",
1425bf215546Sopenharmony_ci             result->pipeline_statistics.ia_vertices,
1426bf215546Sopenharmony_ci             result->pipeline_statistics.ia_primitives,
1427bf215546Sopenharmony_ci             result->pipeline_statistics.vs_invocations,
1428bf215546Sopenharmony_ci             result->pipeline_statistics.hs_invocations,
1429bf215546Sopenharmony_ci             result->pipeline_statistics.ds_invocations,
1430bf215546Sopenharmony_ci             result->pipeline_statistics.gs_invocations,
1431bf215546Sopenharmony_ci             result->pipeline_statistics.gs_primitives,
1432bf215546Sopenharmony_ci             result->pipeline_statistics.c_invocations,
1433bf215546Sopenharmony_ci             result->pipeline_statistics.c_primitives,
1434bf215546Sopenharmony_ci             result->pipeline_statistics.ps_invocations,
1435bf215546Sopenharmony_ci             result->pipeline_statistics.cs_invocations);
1436bf215546Sopenharmony_ci#endif
1437bf215546Sopenharmony_ci      break;
1438bf215546Sopenharmony_ci   default:
1439bf215546Sopenharmony_ci      assert(0);
1440bf215546Sopenharmony_ci   }
1441bf215546Sopenharmony_ci}
1442bf215546Sopenharmony_ci
1443bf215546Sopenharmony_civoid si_query_hw_suspend(struct si_context *sctx, struct si_query *query)
1444bf215546Sopenharmony_ci{
1445bf215546Sopenharmony_ci   si_query_hw_emit_stop(sctx, (struct si_query_hw *)query);
1446bf215546Sopenharmony_ci}
1447bf215546Sopenharmony_ci
1448bf215546Sopenharmony_civoid si_query_hw_resume(struct si_context *sctx, struct si_query *query)
1449bf215546Sopenharmony_ci{
1450bf215546Sopenharmony_ci   si_query_hw_emit_start(sctx, (struct si_query_hw *)query);
1451bf215546Sopenharmony_ci}
1452bf215546Sopenharmony_ci
1453bf215546Sopenharmony_cistatic const struct si_query_ops query_hw_ops = {
1454bf215546Sopenharmony_ci   .destroy = si_query_hw_destroy,
1455bf215546Sopenharmony_ci   .begin = si_query_hw_begin,
1456bf215546Sopenharmony_ci   .end = si_query_hw_end,
1457bf215546Sopenharmony_ci   .get_result = si_query_hw_get_result,
1458bf215546Sopenharmony_ci   .get_result_resource = si_query_hw_get_result_resource,
1459bf215546Sopenharmony_ci
1460bf215546Sopenharmony_ci   .suspend = si_query_hw_suspend,
1461bf215546Sopenharmony_ci   .resume = si_query_hw_resume,
1462bf215546Sopenharmony_ci};
1463bf215546Sopenharmony_ci
1464bf215546Sopenharmony_cistatic bool si_get_query_result(struct pipe_context *ctx, struct pipe_query *query, bool wait,
1465bf215546Sopenharmony_ci                                union pipe_query_result *result)
1466bf215546Sopenharmony_ci{
1467bf215546Sopenharmony_ci   struct si_context *sctx = (struct si_context *)ctx;
1468bf215546Sopenharmony_ci   struct si_query *squery = (struct si_query *)query;
1469bf215546Sopenharmony_ci
1470bf215546Sopenharmony_ci   return squery->ops->get_result(sctx, squery, wait, result);
1471bf215546Sopenharmony_ci}
1472bf215546Sopenharmony_ci
1473bf215546Sopenharmony_cistatic void si_get_query_result_resource(struct pipe_context *ctx, struct pipe_query *query,
1474bf215546Sopenharmony_ci                                         enum pipe_query_flags flags, enum pipe_query_value_type result_type,
1475bf215546Sopenharmony_ci                                         int index, struct pipe_resource *resource, unsigned offset)
1476bf215546Sopenharmony_ci{
1477bf215546Sopenharmony_ci   struct si_context *sctx = (struct si_context *)ctx;
1478bf215546Sopenharmony_ci   struct si_query *squery = (struct si_query *)query;
1479bf215546Sopenharmony_ci
1480bf215546Sopenharmony_ci   squery->ops->get_result_resource(sctx, squery, flags, result_type, index, resource, offset);
1481bf215546Sopenharmony_ci}
1482bf215546Sopenharmony_ci
1483bf215546Sopenharmony_cistatic void si_query_hw_clear_result(struct si_query_hw *query, union pipe_query_result *result)
1484bf215546Sopenharmony_ci{
1485bf215546Sopenharmony_ci   util_query_clear_result(result, query->b.type);
1486bf215546Sopenharmony_ci}
1487bf215546Sopenharmony_ci
1488bf215546Sopenharmony_cibool si_query_hw_get_result(struct si_context *sctx, struct si_query *squery, bool wait,
1489bf215546Sopenharmony_ci                            union pipe_query_result *result)
1490bf215546Sopenharmony_ci{
1491bf215546Sopenharmony_ci   struct si_screen *sscreen = sctx->screen;
1492bf215546Sopenharmony_ci   struct si_query_hw *query = (struct si_query_hw *)squery;
1493bf215546Sopenharmony_ci   struct si_query_buffer *qbuf;
1494bf215546Sopenharmony_ci
1495bf215546Sopenharmony_ci   query->ops->clear_result(query, result);
1496bf215546Sopenharmony_ci
1497bf215546Sopenharmony_ci   for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) {
1498bf215546Sopenharmony_ci      unsigned usage = PIPE_MAP_READ | (wait ? 0 : PIPE_MAP_DONTBLOCK);
1499bf215546Sopenharmony_ci      unsigned results_base = 0;
1500bf215546Sopenharmony_ci      void *map;
1501bf215546Sopenharmony_ci
1502bf215546Sopenharmony_ci      if (squery->b.flushed)
1503bf215546Sopenharmony_ci         map = sctx->ws->buffer_map(sctx->ws, qbuf->buf->buf, NULL, usage);
1504bf215546Sopenharmony_ci      else
1505bf215546Sopenharmony_ci         map = si_buffer_map(sctx, qbuf->buf, usage);
1506bf215546Sopenharmony_ci
1507bf215546Sopenharmony_ci      if (!map)
1508bf215546Sopenharmony_ci         return false;
1509bf215546Sopenharmony_ci
1510bf215546Sopenharmony_ci      while (results_base != qbuf->results_end) {
1511bf215546Sopenharmony_ci         query->ops->add_result(sscreen, query, map + results_base, result);
1512bf215546Sopenharmony_ci         results_base += query->result_size;
1513bf215546Sopenharmony_ci      }
1514bf215546Sopenharmony_ci   }
1515bf215546Sopenharmony_ci
1516bf215546Sopenharmony_ci   /* Convert the time to expected units. */
1517bf215546Sopenharmony_ci   if (squery->type == PIPE_QUERY_TIME_ELAPSED ||
1518bf215546Sopenharmony_ci       squery->type == PIPE_QUERY_TIMESTAMP) {
1519bf215546Sopenharmony_ci      result->u64 = (1000000 * result->u64) / sscreen->info.clock_crystal_freq;
1520bf215546Sopenharmony_ci   }
1521bf215546Sopenharmony_ci   return true;
1522bf215546Sopenharmony_ci}
1523bf215546Sopenharmony_ci
1524bf215546Sopenharmony_cistatic void si_query_hw_get_result_resource(struct si_context *sctx, struct si_query *squery,
1525bf215546Sopenharmony_ci                                            enum pipe_query_flags flags,
1526bf215546Sopenharmony_ci                                            enum pipe_query_value_type result_type,
1527bf215546Sopenharmony_ci                                            int index, struct pipe_resource *resource,
1528bf215546Sopenharmony_ci                                            unsigned offset)
1529bf215546Sopenharmony_ci{
1530bf215546Sopenharmony_ci   struct si_query_hw *query = (struct si_query_hw *)squery;
1531bf215546Sopenharmony_ci   struct si_query_buffer *qbuf;
1532bf215546Sopenharmony_ci   struct si_query_buffer *qbuf_prev;
1533bf215546Sopenharmony_ci   struct pipe_resource *tmp_buffer = NULL;
1534bf215546Sopenharmony_ci   unsigned tmp_buffer_offset = 0;
1535bf215546Sopenharmony_ci   struct si_qbo_state saved_state = {};
1536bf215546Sopenharmony_ci   struct pipe_grid_info grid = {};
1537bf215546Sopenharmony_ci   struct pipe_constant_buffer constant_buffer = {};
1538bf215546Sopenharmony_ci   struct pipe_shader_buffer ssbo[3];
1539bf215546Sopenharmony_ci   struct si_hw_query_params params;
1540bf215546Sopenharmony_ci   struct {
1541bf215546Sopenharmony_ci      uint32_t end_offset;
1542bf215546Sopenharmony_ci      uint32_t result_stride;
1543bf215546Sopenharmony_ci      uint32_t result_count;
1544bf215546Sopenharmony_ci      uint32_t config;
1545bf215546Sopenharmony_ci      uint32_t fence_offset;
1546bf215546Sopenharmony_ci      uint32_t pair_stride;
1547bf215546Sopenharmony_ci      uint32_t pair_count;
1548bf215546Sopenharmony_ci   } consts;
1549bf215546Sopenharmony_ci
1550bf215546Sopenharmony_ci   if (!sctx->query_result_shader) {
1551bf215546Sopenharmony_ci      sctx->query_result_shader = si_create_query_result_cs(sctx);
1552bf215546Sopenharmony_ci      if (!sctx->query_result_shader)
1553bf215546Sopenharmony_ci         return;
1554bf215546Sopenharmony_ci   }
1555bf215546Sopenharmony_ci
1556bf215546Sopenharmony_ci   if (query->buffer.previous) {
1557bf215546Sopenharmony_ci      u_suballocator_alloc(&sctx->allocator_zeroed_memory, 16, 16, &tmp_buffer_offset, &tmp_buffer);
1558bf215546Sopenharmony_ci      if (!tmp_buffer)
1559bf215546Sopenharmony_ci         return;
1560bf215546Sopenharmony_ci   }
1561bf215546Sopenharmony_ci
1562bf215546Sopenharmony_ci   si_save_qbo_state(sctx, &saved_state);
1563bf215546Sopenharmony_ci
1564bf215546Sopenharmony_ci   si_get_hw_query_params(sctx, query, index >= 0 ? index : 0, &params);
1565bf215546Sopenharmony_ci   consts.end_offset = params.end_offset - params.start_offset;
1566bf215546Sopenharmony_ci   consts.fence_offset = params.fence_offset - params.start_offset;
1567bf215546Sopenharmony_ci   consts.result_stride = query->result_size;
1568bf215546Sopenharmony_ci   consts.pair_stride = params.pair_stride;
1569bf215546Sopenharmony_ci   consts.pair_count = params.pair_count;
1570bf215546Sopenharmony_ci
1571bf215546Sopenharmony_ci   constant_buffer.buffer_size = sizeof(consts);
1572bf215546Sopenharmony_ci   constant_buffer.user_buffer = &consts;
1573bf215546Sopenharmony_ci
1574bf215546Sopenharmony_ci   ssbo[1].buffer = tmp_buffer;
1575bf215546Sopenharmony_ci   ssbo[1].buffer_offset = tmp_buffer_offset;
1576bf215546Sopenharmony_ci   ssbo[1].buffer_size = 16;
1577bf215546Sopenharmony_ci
1578bf215546Sopenharmony_ci   ssbo[2] = ssbo[1];
1579bf215546Sopenharmony_ci
1580bf215546Sopenharmony_ci   grid.block[0] = 1;
1581bf215546Sopenharmony_ci   grid.block[1] = 1;
1582bf215546Sopenharmony_ci   grid.block[2] = 1;
1583bf215546Sopenharmony_ci   grid.grid[0] = 1;
1584bf215546Sopenharmony_ci   grid.grid[1] = 1;
1585bf215546Sopenharmony_ci   grid.grid[2] = 1;
1586bf215546Sopenharmony_ci
1587bf215546Sopenharmony_ci   consts.config = 0;
1588bf215546Sopenharmony_ci   if (index < 0)
1589bf215546Sopenharmony_ci      consts.config |= 4;
1590bf215546Sopenharmony_ci   if (query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE ||
1591bf215546Sopenharmony_ci       query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE)
1592bf215546Sopenharmony_ci      consts.config |= 8;
1593bf215546Sopenharmony_ci   else if (query->b.type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
1594bf215546Sopenharmony_ci            query->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
1595bf215546Sopenharmony_ci      consts.config |= 8 | 256;
1596bf215546Sopenharmony_ci   else if (query->b.type == PIPE_QUERY_TIMESTAMP || query->b.type == PIPE_QUERY_TIME_ELAPSED)
1597bf215546Sopenharmony_ci      consts.config |= 32;
1598bf215546Sopenharmony_ci
1599bf215546Sopenharmony_ci   switch (result_type) {
1600bf215546Sopenharmony_ci   case PIPE_QUERY_TYPE_U64:
1601bf215546Sopenharmony_ci   case PIPE_QUERY_TYPE_I64:
1602bf215546Sopenharmony_ci      consts.config |= 64;
1603bf215546Sopenharmony_ci      break;
1604bf215546Sopenharmony_ci   case PIPE_QUERY_TYPE_I32:
1605bf215546Sopenharmony_ci      consts.config |= 128;
1606bf215546Sopenharmony_ci      break;
1607bf215546Sopenharmony_ci   case PIPE_QUERY_TYPE_U32:
1608bf215546Sopenharmony_ci      break;
1609bf215546Sopenharmony_ci   }
1610bf215546Sopenharmony_ci
1611bf215546Sopenharmony_ci   sctx->flags |= sctx->screen->barrier_flags.cp_to_L2;
1612bf215546Sopenharmony_ci
1613bf215546Sopenharmony_ci   for (qbuf = &query->buffer; qbuf; qbuf = qbuf_prev) {
1614bf215546Sopenharmony_ci      if (query->b.type != PIPE_QUERY_TIMESTAMP) {
1615bf215546Sopenharmony_ci         qbuf_prev = qbuf->previous;
1616bf215546Sopenharmony_ci         consts.result_count = qbuf->results_end / query->result_size;
1617bf215546Sopenharmony_ci         consts.config &= ~3;
1618bf215546Sopenharmony_ci         if (qbuf != &query->buffer)
1619bf215546Sopenharmony_ci            consts.config |= 1;
1620bf215546Sopenharmony_ci         if (qbuf->previous)
1621bf215546Sopenharmony_ci            consts.config |= 2;
1622bf215546Sopenharmony_ci      } else {
1623bf215546Sopenharmony_ci         /* Only read the last timestamp. */
1624bf215546Sopenharmony_ci         qbuf_prev = NULL;
1625bf215546Sopenharmony_ci         consts.result_count = 0;
1626bf215546Sopenharmony_ci         consts.config |= 16;
1627bf215546Sopenharmony_ci         params.start_offset += qbuf->results_end - query->result_size;
1628bf215546Sopenharmony_ci      }
1629bf215546Sopenharmony_ci
1630bf215546Sopenharmony_ci      sctx->b.set_constant_buffer(&sctx->b, PIPE_SHADER_COMPUTE, 0, false, &constant_buffer);
1631bf215546Sopenharmony_ci
1632bf215546Sopenharmony_ci      ssbo[0].buffer = &qbuf->buf->b.b;
1633bf215546Sopenharmony_ci      ssbo[0].buffer_offset = params.start_offset;
1634bf215546Sopenharmony_ci      ssbo[0].buffer_size = qbuf->results_end - params.start_offset;
1635bf215546Sopenharmony_ci
1636bf215546Sopenharmony_ci      if (!qbuf->previous) {
1637bf215546Sopenharmony_ci         ssbo[2].buffer = resource;
1638bf215546Sopenharmony_ci         ssbo[2].buffer_offset = offset;
1639bf215546Sopenharmony_ci         ssbo[2].buffer_size = resource->width0 - offset;
1640bf215546Sopenharmony_ci         /* assert size is correct, based on result_type ? */
1641bf215546Sopenharmony_ci
1642bf215546Sopenharmony_ci         si_resource(resource)->TC_L2_dirty = true;
1643bf215546Sopenharmony_ci      }
1644bf215546Sopenharmony_ci
1645bf215546Sopenharmony_ci      if ((flags & PIPE_QUERY_WAIT) && qbuf == &query->buffer) {
1646bf215546Sopenharmony_ci         uint64_t va;
1647bf215546Sopenharmony_ci
1648bf215546Sopenharmony_ci         /* Wait for result availability. Wait only for readiness
1649bf215546Sopenharmony_ci          * of the last entry, since the fence writes should be
1650bf215546Sopenharmony_ci          * serialized in the CP.
1651bf215546Sopenharmony_ci          */
1652bf215546Sopenharmony_ci         va = qbuf->buf->gpu_address + qbuf->results_end - query->result_size;
1653bf215546Sopenharmony_ci         va += params.fence_offset;
1654bf215546Sopenharmony_ci
1655bf215546Sopenharmony_ci         si_cp_wait_mem(sctx, &sctx->gfx_cs, va, 0x80000000, 0x80000000, WAIT_REG_MEM_EQUAL);
1656bf215546Sopenharmony_ci      }
1657bf215546Sopenharmony_ci      si_launch_grid_internal_ssbos(sctx, &grid, sctx->query_result_shader,
1658bf215546Sopenharmony_ci                                    SI_OP_SYNC_AFTER, SI_COHERENCY_SHADER,
1659bf215546Sopenharmony_ci                                    3, ssbo, 0x4);
1660bf215546Sopenharmony_ci   }
1661bf215546Sopenharmony_ci
1662bf215546Sopenharmony_ci   si_restore_qbo_state(sctx, &saved_state);
1663bf215546Sopenharmony_ci   pipe_resource_reference(&tmp_buffer, NULL);
1664bf215546Sopenharmony_ci}
1665bf215546Sopenharmony_ci
1666bf215546Sopenharmony_cistatic void si_render_condition(struct pipe_context *ctx, struct pipe_query *query, bool condition,
1667bf215546Sopenharmony_ci                                enum pipe_render_cond_flag mode)
1668bf215546Sopenharmony_ci{
1669bf215546Sopenharmony_ci   struct si_context *sctx = (struct si_context *)ctx;
1670bf215546Sopenharmony_ci   struct si_query_hw *squery = (struct si_query_hw *)query;
1671bf215546Sopenharmony_ci   struct si_atom *atom = &sctx->atoms.s.render_cond;
1672bf215546Sopenharmony_ci
1673bf215546Sopenharmony_ci   if (query) {
1674bf215546Sopenharmony_ci      bool needs_workaround = false;
1675bf215546Sopenharmony_ci
1676bf215546Sopenharmony_ci      /* There was a firmware regression in GFX8 which causes successive
1677bf215546Sopenharmony_ci       * SET_PREDICATION packets to give the wrong answer for
1678bf215546Sopenharmony_ci       * non-inverted stream overflow predication.
1679bf215546Sopenharmony_ci       */
1680bf215546Sopenharmony_ci      if (((sctx->gfx_level == GFX8 && sctx->screen->info.pfp_fw_feature < 49) ||
1681bf215546Sopenharmony_ci           (sctx->gfx_level == GFX9 && sctx->screen->info.pfp_fw_feature < 38)) &&
1682bf215546Sopenharmony_ci          !condition &&
1683bf215546Sopenharmony_ci          (squery->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE ||
1684bf215546Sopenharmony_ci           (squery->b.type == PIPE_QUERY_SO_OVERFLOW_PREDICATE &&
1685bf215546Sopenharmony_ci            (squery->buffer.previous || squery->buffer.results_end > squery->result_size)))) {
1686bf215546Sopenharmony_ci         needs_workaround = true;
1687bf215546Sopenharmony_ci      }
1688bf215546Sopenharmony_ci
1689bf215546Sopenharmony_ci      if (needs_workaround && !squery->workaround_buf) {
1690bf215546Sopenharmony_ci         bool old_render_cond_enabled = sctx->render_cond_enabled;
1691bf215546Sopenharmony_ci         sctx->render_cond_enabled = false;
1692bf215546Sopenharmony_ci
1693bf215546Sopenharmony_ci         u_suballocator_alloc(&sctx->allocator_zeroed_memory, 8, 8, &squery->workaround_offset,
1694bf215546Sopenharmony_ci                              (struct pipe_resource **)&squery->workaround_buf);
1695bf215546Sopenharmony_ci
1696bf215546Sopenharmony_ci         /* Reset to NULL to avoid a redundant SET_PREDICATION
1697bf215546Sopenharmony_ci          * from launching the compute grid.
1698bf215546Sopenharmony_ci          */
1699bf215546Sopenharmony_ci         sctx->render_cond = NULL;
1700bf215546Sopenharmony_ci
1701bf215546Sopenharmony_ci         ctx->get_query_result_resource(ctx, query, true, PIPE_QUERY_TYPE_U64, 0,
1702bf215546Sopenharmony_ci                                        &squery->workaround_buf->b.b, squery->workaround_offset);
1703bf215546Sopenharmony_ci
1704bf215546Sopenharmony_ci         /* Settings this in the render cond atom is too late,
1705bf215546Sopenharmony_ci          * so set it here. */
1706bf215546Sopenharmony_ci         sctx->flags |= sctx->screen->barrier_flags.L2_to_cp | SI_CONTEXT_FLUSH_FOR_RENDER_COND;
1707bf215546Sopenharmony_ci
1708bf215546Sopenharmony_ci         sctx->render_cond_enabled = old_render_cond_enabled;
1709bf215546Sopenharmony_ci      }
1710bf215546Sopenharmony_ci   }
1711bf215546Sopenharmony_ci
1712bf215546Sopenharmony_ci   sctx->render_cond = query;
1713bf215546Sopenharmony_ci   sctx->render_cond_invert = condition;
1714bf215546Sopenharmony_ci   sctx->render_cond_mode = mode;
1715bf215546Sopenharmony_ci   sctx->render_cond_enabled = query;
1716bf215546Sopenharmony_ci
1717bf215546Sopenharmony_ci   si_set_atom_dirty(sctx, atom, query != NULL);
1718bf215546Sopenharmony_ci}
1719bf215546Sopenharmony_ci
1720bf215546Sopenharmony_civoid si_suspend_queries(struct si_context *sctx)
1721bf215546Sopenharmony_ci{
1722bf215546Sopenharmony_ci   struct si_query *query;
1723bf215546Sopenharmony_ci
1724bf215546Sopenharmony_ci   LIST_FOR_EACH_ENTRY (query, &sctx->active_queries, active_list)
1725bf215546Sopenharmony_ci      query->ops->suspend(sctx, query);
1726bf215546Sopenharmony_ci}
1727bf215546Sopenharmony_ci
1728bf215546Sopenharmony_civoid si_resume_queries(struct si_context *sctx)
1729bf215546Sopenharmony_ci{
1730bf215546Sopenharmony_ci   struct si_query *query;
1731bf215546Sopenharmony_ci
1732bf215546Sopenharmony_ci   /* Check CS space here. Resuming must not be interrupted by flushes. */
1733bf215546Sopenharmony_ci   si_need_gfx_cs_space(sctx, 0);
1734bf215546Sopenharmony_ci
1735bf215546Sopenharmony_ci   LIST_FOR_EACH_ENTRY (query, &sctx->active_queries, active_list)
1736bf215546Sopenharmony_ci      query->ops->resume(sctx, query);
1737bf215546Sopenharmony_ci}
1738bf215546Sopenharmony_ci
1739bf215546Sopenharmony_ci#define XFULL(name_, query_type_, type_, result_type_, group_id_)                                  \
1740bf215546Sopenharmony_ci   {                                                                                               \
1741bf215546Sopenharmony_ci      .name = name_, .query_type = SI_QUERY_##query_type_, .type = PIPE_DRIVER_QUERY_TYPE_##type_, \
1742bf215546Sopenharmony_ci      .result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_##result_type_, .group_id = group_id_           \
1743bf215546Sopenharmony_ci   }
1744bf215546Sopenharmony_ci
1745bf215546Sopenharmony_ci#define X(name_, query_type_, type_, result_type_)                                                 \
1746bf215546Sopenharmony_ci   XFULL(name_, query_type_, type_, result_type_, ~(unsigned)0)
1747bf215546Sopenharmony_ci
1748bf215546Sopenharmony_ci#define XG(group_, name_, query_type_, type_, result_type_)                                        \
1749bf215546Sopenharmony_ci   XFULL(name_, query_type_, type_, result_type_, SI_QUERY_GROUP_##group_)
1750bf215546Sopenharmony_ci
1751bf215546Sopenharmony_cistatic struct pipe_driver_query_info si_driver_query_list[] = {
1752bf215546Sopenharmony_ci   X("num-compilations", NUM_COMPILATIONS, UINT64, CUMULATIVE),
1753bf215546Sopenharmony_ci   X("num-shaders-created", NUM_SHADERS_CREATED, UINT64, CUMULATIVE),
1754bf215546Sopenharmony_ci   X("draw-calls", DRAW_CALLS, UINT64, AVERAGE),
1755bf215546Sopenharmony_ci   X("decompress-calls", DECOMPRESS_CALLS, UINT64, AVERAGE),
1756bf215546Sopenharmony_ci   X("prim-restart-calls", PRIM_RESTART_CALLS, UINT64, AVERAGE),
1757bf215546Sopenharmony_ci   X("compute-calls", COMPUTE_CALLS, UINT64, AVERAGE),
1758bf215546Sopenharmony_ci   X("cp-dma-calls", CP_DMA_CALLS, UINT64, AVERAGE),
1759bf215546Sopenharmony_ci   X("num-vs-flushes", NUM_VS_FLUSHES, UINT64, AVERAGE),
1760bf215546Sopenharmony_ci   X("num-ps-flushes", NUM_PS_FLUSHES, UINT64, AVERAGE),
1761bf215546Sopenharmony_ci   X("num-cs-flushes", NUM_CS_FLUSHES, UINT64, AVERAGE),
1762bf215546Sopenharmony_ci   X("num-CB-cache-flushes", NUM_CB_CACHE_FLUSHES, UINT64, AVERAGE),
1763bf215546Sopenharmony_ci   X("num-DB-cache-flushes", NUM_DB_CACHE_FLUSHES, UINT64, AVERAGE),
1764bf215546Sopenharmony_ci   X("num-L2-invalidates", NUM_L2_INVALIDATES, UINT64, AVERAGE),
1765bf215546Sopenharmony_ci   X("num-L2-writebacks", NUM_L2_WRITEBACKS, UINT64, AVERAGE),
1766bf215546Sopenharmony_ci   X("num-resident-handles", NUM_RESIDENT_HANDLES, UINT64, AVERAGE),
1767bf215546Sopenharmony_ci   X("tc-offloaded-slots", TC_OFFLOADED_SLOTS, UINT64, AVERAGE),
1768bf215546Sopenharmony_ci   X("tc-direct-slots", TC_DIRECT_SLOTS, UINT64, AVERAGE),
1769bf215546Sopenharmony_ci   X("tc-num-syncs", TC_NUM_SYNCS, UINT64, AVERAGE),
1770bf215546Sopenharmony_ci   X("CS-thread-busy", CS_THREAD_BUSY, UINT64, AVERAGE),
1771bf215546Sopenharmony_ci   X("gallium-thread-busy", GALLIUM_THREAD_BUSY, UINT64, AVERAGE),
1772bf215546Sopenharmony_ci   X("requested-VRAM", REQUESTED_VRAM, BYTES, AVERAGE),
1773bf215546Sopenharmony_ci   X("requested-GTT", REQUESTED_GTT, BYTES, AVERAGE),
1774bf215546Sopenharmony_ci   X("mapped-VRAM", MAPPED_VRAM, BYTES, AVERAGE),
1775bf215546Sopenharmony_ci   X("mapped-GTT", MAPPED_GTT, BYTES, AVERAGE),
1776bf215546Sopenharmony_ci   X("slab-wasted-VRAM", SLAB_WASTED_VRAM, BYTES, AVERAGE),
1777bf215546Sopenharmony_ci   X("slab-wasted-GTT", SLAB_WASTED_GTT, BYTES, AVERAGE),
1778bf215546Sopenharmony_ci   X("buffer-wait-time", BUFFER_WAIT_TIME, MICROSECONDS, CUMULATIVE),
1779bf215546Sopenharmony_ci   X("num-mapped-buffers", NUM_MAPPED_BUFFERS, UINT64, AVERAGE),
1780bf215546Sopenharmony_ci   X("num-GFX-IBs", NUM_GFX_IBS, UINT64, AVERAGE),
1781bf215546Sopenharmony_ci   X("GFX-BO-list-size", GFX_BO_LIST_SIZE, UINT64, AVERAGE),
1782bf215546Sopenharmony_ci   X("GFX-IB-size", GFX_IB_SIZE, UINT64, AVERAGE),
1783bf215546Sopenharmony_ci   X("num-bytes-moved", NUM_BYTES_MOVED, BYTES, CUMULATIVE),
1784bf215546Sopenharmony_ci   X("num-evictions", NUM_EVICTIONS, UINT64, CUMULATIVE),
1785bf215546Sopenharmony_ci   X("VRAM-CPU-page-faults", NUM_VRAM_CPU_PAGE_FAULTS, UINT64, CUMULATIVE),
1786bf215546Sopenharmony_ci   X("VRAM-usage", VRAM_USAGE, BYTES, AVERAGE),
1787bf215546Sopenharmony_ci   X("VRAM-vis-usage", VRAM_VIS_USAGE, BYTES, AVERAGE),
1788bf215546Sopenharmony_ci   X("GTT-usage", GTT_USAGE, BYTES, AVERAGE),
1789bf215546Sopenharmony_ci   X("back-buffer-ps-draw-ratio", BACK_BUFFER_PS_DRAW_RATIO, UINT64, AVERAGE),
1790bf215546Sopenharmony_ci   X("live-shader-cache-hits", LIVE_SHADER_CACHE_HITS, UINT, CUMULATIVE),
1791bf215546Sopenharmony_ci   X("live-shader-cache-misses", LIVE_SHADER_CACHE_MISSES, UINT, CUMULATIVE),
1792bf215546Sopenharmony_ci   X("memory-shader-cache-hits", MEMORY_SHADER_CACHE_HITS, UINT, CUMULATIVE),
1793bf215546Sopenharmony_ci   X("memory-shader-cache-misses", MEMORY_SHADER_CACHE_MISSES, UINT, CUMULATIVE),
1794bf215546Sopenharmony_ci   X("disk-shader-cache-hits", DISK_SHADER_CACHE_HITS, UINT, CUMULATIVE),
1795bf215546Sopenharmony_ci   X("disk-shader-cache-misses", DISK_SHADER_CACHE_MISSES, UINT, CUMULATIVE),
1796bf215546Sopenharmony_ci
1797bf215546Sopenharmony_ci   /* GPIN queries are for the benefit of old versions of GPUPerfStudio,
1798bf215546Sopenharmony_ci    * which use it as a fallback path to detect the GPU type.
1799bf215546Sopenharmony_ci    *
1800bf215546Sopenharmony_ci    * Note: The names of these queries are significant for GPUPerfStudio
1801bf215546Sopenharmony_ci    * (and possibly their order as well). */
1802bf215546Sopenharmony_ci   XG(GPIN, "GPIN_000", GPIN_ASIC_ID, UINT, AVERAGE),
1803bf215546Sopenharmony_ci   XG(GPIN, "GPIN_001", GPIN_NUM_SIMD, UINT, AVERAGE),
1804bf215546Sopenharmony_ci   XG(GPIN, "GPIN_002", GPIN_NUM_RB, UINT, AVERAGE),
1805bf215546Sopenharmony_ci   XG(GPIN, "GPIN_003", GPIN_NUM_SPI, UINT, AVERAGE),
1806bf215546Sopenharmony_ci   XG(GPIN, "GPIN_004", GPIN_NUM_SE, UINT, AVERAGE),
1807bf215546Sopenharmony_ci
1808bf215546Sopenharmony_ci   X("temperature", GPU_TEMPERATURE, UINT64, AVERAGE),
1809bf215546Sopenharmony_ci   X("shader-clock", CURRENT_GPU_SCLK, HZ, AVERAGE),
1810bf215546Sopenharmony_ci   X("memory-clock", CURRENT_GPU_MCLK, HZ, AVERAGE),
1811bf215546Sopenharmony_ci
1812bf215546Sopenharmony_ci   /* The following queries must be at the end of the list because their
1813bf215546Sopenharmony_ci    * availability is adjusted dynamically based on the DRM version. */
1814bf215546Sopenharmony_ci   X("GPU-load", GPU_LOAD, UINT64, AVERAGE),
1815bf215546Sopenharmony_ci   X("GPU-shaders-busy", GPU_SHADERS_BUSY, UINT64, AVERAGE),
1816bf215546Sopenharmony_ci   X("GPU-ta-busy", GPU_TA_BUSY, UINT64, AVERAGE),
1817bf215546Sopenharmony_ci   X("GPU-gds-busy", GPU_GDS_BUSY, UINT64, AVERAGE),
1818bf215546Sopenharmony_ci   X("GPU-vgt-busy", GPU_VGT_BUSY, UINT64, AVERAGE),
1819bf215546Sopenharmony_ci   X("GPU-ia-busy", GPU_IA_BUSY, UINT64, AVERAGE),
1820bf215546Sopenharmony_ci   X("GPU-sx-busy", GPU_SX_BUSY, UINT64, AVERAGE),
1821bf215546Sopenharmony_ci   X("GPU-wd-busy", GPU_WD_BUSY, UINT64, AVERAGE),
1822bf215546Sopenharmony_ci   X("GPU-bci-busy", GPU_BCI_BUSY, UINT64, AVERAGE),
1823bf215546Sopenharmony_ci   X("GPU-sc-busy", GPU_SC_BUSY, UINT64, AVERAGE),
1824bf215546Sopenharmony_ci   X("GPU-pa-busy", GPU_PA_BUSY, UINT64, AVERAGE),
1825bf215546Sopenharmony_ci   X("GPU-db-busy", GPU_DB_BUSY, UINT64, AVERAGE),
1826bf215546Sopenharmony_ci   X("GPU-cp-busy", GPU_CP_BUSY, UINT64, AVERAGE),
1827bf215546Sopenharmony_ci   X("GPU-cb-busy", GPU_CB_BUSY, UINT64, AVERAGE),
1828bf215546Sopenharmony_ci
1829bf215546Sopenharmony_ci   /* SRBM_STATUS2 */
1830bf215546Sopenharmony_ci   X("GPU-sdma-busy", GPU_SDMA_BUSY, UINT64, AVERAGE),
1831bf215546Sopenharmony_ci
1832bf215546Sopenharmony_ci   /* CP_STAT */
1833bf215546Sopenharmony_ci   X("GPU-pfp-busy", GPU_PFP_BUSY, UINT64, AVERAGE),
1834bf215546Sopenharmony_ci   X("GPU-meq-busy", GPU_MEQ_BUSY, UINT64, AVERAGE),
1835bf215546Sopenharmony_ci   X("GPU-me-busy", GPU_ME_BUSY, UINT64, AVERAGE),
1836bf215546Sopenharmony_ci   X("GPU-surf-sync-busy", GPU_SURF_SYNC_BUSY, UINT64, AVERAGE),
1837bf215546Sopenharmony_ci   X("GPU-cp-dma-busy", GPU_CP_DMA_BUSY, UINT64, AVERAGE),
1838bf215546Sopenharmony_ci   X("GPU-scratch-ram-busy", GPU_SCRATCH_RAM_BUSY, UINT64, AVERAGE),
1839bf215546Sopenharmony_ci};
1840bf215546Sopenharmony_ci
1841bf215546Sopenharmony_ci#undef X
1842bf215546Sopenharmony_ci#undef XG
1843bf215546Sopenharmony_ci#undef XFULL
1844bf215546Sopenharmony_ci
1845bf215546Sopenharmony_cistatic unsigned si_get_num_queries(struct si_screen *sscreen)
1846bf215546Sopenharmony_ci{
1847bf215546Sopenharmony_ci   /* amdgpu */
1848bf215546Sopenharmony_ci   if (sscreen->info.is_amdgpu) {
1849bf215546Sopenharmony_ci      if (sscreen->info.gfx_level >= GFX8)
1850bf215546Sopenharmony_ci         return ARRAY_SIZE(si_driver_query_list);
1851bf215546Sopenharmony_ci      else
1852bf215546Sopenharmony_ci         return ARRAY_SIZE(si_driver_query_list) - 7;
1853bf215546Sopenharmony_ci   }
1854bf215546Sopenharmony_ci
1855bf215546Sopenharmony_ci   /* radeon */
1856bf215546Sopenharmony_ci   if (sscreen->info.gfx_level == GFX7)
1857bf215546Sopenharmony_ci      return ARRAY_SIZE(si_driver_query_list) - 6;
1858bf215546Sopenharmony_ci   else
1859bf215546Sopenharmony_ci      return ARRAY_SIZE(si_driver_query_list) - 7;
1860bf215546Sopenharmony_ci
1861bf215546Sopenharmony_ci   return ARRAY_SIZE(si_driver_query_list) - 21;
1862bf215546Sopenharmony_ci}
1863bf215546Sopenharmony_ci
1864bf215546Sopenharmony_cistatic int si_get_driver_query_info(struct pipe_screen *screen, unsigned index,
1865bf215546Sopenharmony_ci                                    struct pipe_driver_query_info *info)
1866bf215546Sopenharmony_ci{
1867bf215546Sopenharmony_ci   struct si_screen *sscreen = (struct si_screen *)screen;
1868bf215546Sopenharmony_ci   unsigned num_queries = si_get_num_queries(sscreen);
1869bf215546Sopenharmony_ci
1870bf215546Sopenharmony_ci   if (!info) {
1871bf215546Sopenharmony_ci      unsigned num_perfcounters = si_get_perfcounter_info(sscreen, 0, NULL);
1872bf215546Sopenharmony_ci
1873bf215546Sopenharmony_ci      return num_queries + num_perfcounters;
1874bf215546Sopenharmony_ci   }
1875bf215546Sopenharmony_ci
1876bf215546Sopenharmony_ci   if (index >= num_queries)
1877bf215546Sopenharmony_ci      return si_get_perfcounter_info(sscreen, index - num_queries, info);
1878bf215546Sopenharmony_ci
1879bf215546Sopenharmony_ci   *info = si_driver_query_list[index];
1880bf215546Sopenharmony_ci
1881bf215546Sopenharmony_ci   switch (info->query_type) {
1882bf215546Sopenharmony_ci   case SI_QUERY_REQUESTED_VRAM:
1883bf215546Sopenharmony_ci   case SI_QUERY_VRAM_USAGE:
1884bf215546Sopenharmony_ci   case SI_QUERY_MAPPED_VRAM:
1885bf215546Sopenharmony_ci   case SI_QUERY_SLAB_WASTED_VRAM:
1886bf215546Sopenharmony_ci      info->max_value.u64 = (uint64_t)sscreen->info.vram_size_kb * 1024;
1887bf215546Sopenharmony_ci      break;
1888bf215546Sopenharmony_ci   case SI_QUERY_REQUESTED_GTT:
1889bf215546Sopenharmony_ci   case SI_QUERY_GTT_USAGE:
1890bf215546Sopenharmony_ci   case SI_QUERY_MAPPED_GTT:
1891bf215546Sopenharmony_ci   case SI_QUERY_SLAB_WASTED_GTT:
1892bf215546Sopenharmony_ci      info->max_value.u64 = (uint64_t)sscreen->info.gart_size_kb * 1024;
1893bf215546Sopenharmony_ci      break;
1894bf215546Sopenharmony_ci   case SI_QUERY_GPU_TEMPERATURE:
1895bf215546Sopenharmony_ci      info->max_value.u64 = 125;
1896bf215546Sopenharmony_ci      break;
1897bf215546Sopenharmony_ci   case SI_QUERY_VRAM_VIS_USAGE:
1898bf215546Sopenharmony_ci      info->max_value.u64 = (uint64_t)sscreen->info.vram_vis_size_kb * 1024;
1899bf215546Sopenharmony_ci      break;
1900bf215546Sopenharmony_ci   }
1901bf215546Sopenharmony_ci
1902bf215546Sopenharmony_ci   if (info->group_id != ~(unsigned)0 && sscreen->perfcounters)
1903bf215546Sopenharmony_ci      info->group_id += sscreen->perfcounters->base.num_groups;
1904bf215546Sopenharmony_ci
1905bf215546Sopenharmony_ci   return 1;
1906bf215546Sopenharmony_ci}
1907bf215546Sopenharmony_ci
1908bf215546Sopenharmony_ci/* Note: Unfortunately, GPUPerfStudio hardcodes the order of hardware
1909bf215546Sopenharmony_ci * performance counter groups, so be careful when changing this and related
1910bf215546Sopenharmony_ci * functions.
1911bf215546Sopenharmony_ci */
1912bf215546Sopenharmony_cistatic int si_get_driver_query_group_info(struct pipe_screen *screen, unsigned index,
1913bf215546Sopenharmony_ci                                          struct pipe_driver_query_group_info *info)
1914bf215546Sopenharmony_ci{
1915bf215546Sopenharmony_ci   struct si_screen *sscreen = (struct si_screen *)screen;
1916bf215546Sopenharmony_ci   unsigned num_pc_groups = 0;
1917bf215546Sopenharmony_ci
1918bf215546Sopenharmony_ci   if (sscreen->perfcounters)
1919bf215546Sopenharmony_ci      num_pc_groups = sscreen->perfcounters->base.num_groups;
1920bf215546Sopenharmony_ci
1921bf215546Sopenharmony_ci   if (!info)
1922bf215546Sopenharmony_ci      return num_pc_groups + SI_NUM_SW_QUERY_GROUPS;
1923bf215546Sopenharmony_ci
1924bf215546Sopenharmony_ci   if (index < num_pc_groups)
1925bf215546Sopenharmony_ci      return si_get_perfcounter_group_info(sscreen, index, info);
1926bf215546Sopenharmony_ci
1927bf215546Sopenharmony_ci   index -= num_pc_groups;
1928bf215546Sopenharmony_ci   if (index >= SI_NUM_SW_QUERY_GROUPS)
1929bf215546Sopenharmony_ci      return 0;
1930bf215546Sopenharmony_ci
1931bf215546Sopenharmony_ci   info->name = "GPIN";
1932bf215546Sopenharmony_ci   info->max_active_queries = 5;
1933bf215546Sopenharmony_ci   info->num_queries = 5;
1934bf215546Sopenharmony_ci   return 1;
1935bf215546Sopenharmony_ci}
1936bf215546Sopenharmony_ci
1937bf215546Sopenharmony_civoid si_init_query_functions(struct si_context *sctx)
1938bf215546Sopenharmony_ci{
1939bf215546Sopenharmony_ci   sctx->b.create_query = si_create_query;
1940bf215546Sopenharmony_ci   sctx->b.create_batch_query = si_create_batch_query;
1941bf215546Sopenharmony_ci   sctx->b.destroy_query = si_destroy_query;
1942bf215546Sopenharmony_ci   sctx->b.begin_query = si_begin_query;
1943bf215546Sopenharmony_ci   sctx->b.end_query = si_end_query;
1944bf215546Sopenharmony_ci   sctx->b.get_query_result = si_get_query_result;
1945bf215546Sopenharmony_ci   sctx->b.get_query_result_resource = si_get_query_result_resource;
1946bf215546Sopenharmony_ci
1947bf215546Sopenharmony_ci   if (sctx->has_graphics) {
1948bf215546Sopenharmony_ci      sctx->atoms.s.render_cond.emit = si_emit_query_predication;
1949bf215546Sopenharmony_ci      sctx->b.render_condition = si_render_condition;
1950bf215546Sopenharmony_ci   }
1951bf215546Sopenharmony_ci
1952bf215546Sopenharmony_ci   list_inithead(&sctx->active_queries);
1953bf215546Sopenharmony_ci}
1954bf215546Sopenharmony_ci
1955bf215546Sopenharmony_civoid si_init_screen_query_functions(struct si_screen *sscreen)
1956bf215546Sopenharmony_ci{
1957bf215546Sopenharmony_ci   sscreen->b.get_driver_query_info = si_get_driver_query_info;
1958bf215546Sopenharmony_ci   sscreen->b.get_driver_query_group_info = si_get_driver_query_group_info;
1959bf215546Sopenharmony_ci}
1960