1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright 2018 Advanced Micro Devices, Inc.
3bf215546Sopenharmony_ci * All Rights Reserved.
4bf215546Sopenharmony_ci *
5bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
6bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
7bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
8bf215546Sopenharmony_ci * on the rights to use, copy, modify, merge, publish, distribute, sub
9bf215546Sopenharmony_ci * license, and/or sell copies of the Software, and to permit persons to whom
10bf215546Sopenharmony_ci * the Software is furnished to do so, subject to the following conditions:
11bf215546Sopenharmony_ci *
12bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
13bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
14bf215546Sopenharmony_ci * Software.
15bf215546Sopenharmony_ci *
16bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19bf215546Sopenharmony_ci * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20bf215546Sopenharmony_ci * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21bf215546Sopenharmony_ci * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22bf215546Sopenharmony_ci * USE OR OTHER DEALINGS IN THE SOFTWARE.
23bf215546Sopenharmony_ci */
24bf215546Sopenharmony_ci
25bf215546Sopenharmony_ci#include "si_pipe.h"
26bf215546Sopenharmony_ci#include "si_query.h"
27bf215546Sopenharmony_ci#include "sid.h"
28bf215546Sopenharmony_ci#include "util/u_memory.h"
29bf215546Sopenharmony_ci#include "util/u_suballoc.h"
30bf215546Sopenharmony_ci
31bf215546Sopenharmony_ci#include <stddef.h>
32bf215546Sopenharmony_ci
33bf215546Sopenharmony_cistatic void emit_shader_query(struct si_context *sctx)
34bf215546Sopenharmony_ci{
35bf215546Sopenharmony_ci   assert(!list_is_empty(&sctx->shader_query_buffers));
36bf215546Sopenharmony_ci
37bf215546Sopenharmony_ci   struct gfx10_sh_query_buffer *qbuf =
38bf215546Sopenharmony_ci      list_last_entry(&sctx->shader_query_buffers, struct gfx10_sh_query_buffer, list);
39bf215546Sopenharmony_ci   qbuf->head += sizeof(struct gfx10_sh_query_buffer_mem);
40bf215546Sopenharmony_ci}
41bf215546Sopenharmony_ci
42bf215546Sopenharmony_cistatic void gfx10_release_query_buffers(struct si_context *sctx,
43bf215546Sopenharmony_ci                                        struct gfx10_sh_query_buffer *first,
44bf215546Sopenharmony_ci                                        struct gfx10_sh_query_buffer *last)
45bf215546Sopenharmony_ci{
46bf215546Sopenharmony_ci   while (first) {
47bf215546Sopenharmony_ci      struct gfx10_sh_query_buffer *qbuf = first;
48bf215546Sopenharmony_ci      if (first != last)
49bf215546Sopenharmony_ci         first = list_entry(qbuf->list.next, struct gfx10_sh_query_buffer, list);
50bf215546Sopenharmony_ci      else
51bf215546Sopenharmony_ci         first = NULL;
52bf215546Sopenharmony_ci
53bf215546Sopenharmony_ci      qbuf->refcount--;
54bf215546Sopenharmony_ci      if (qbuf->refcount)
55bf215546Sopenharmony_ci         continue;
56bf215546Sopenharmony_ci
57bf215546Sopenharmony_ci      if (qbuf->list.next == &sctx->shader_query_buffers)
58bf215546Sopenharmony_ci         continue; /* keep the most recent buffer; it may not be full yet */
59bf215546Sopenharmony_ci      if (qbuf->list.prev == &sctx->shader_query_buffers)
60bf215546Sopenharmony_ci         continue; /* keep the oldest buffer for recycling */
61bf215546Sopenharmony_ci
62bf215546Sopenharmony_ci      list_del(&qbuf->list);
63bf215546Sopenharmony_ci      si_resource_reference(&qbuf->buf, NULL);
64bf215546Sopenharmony_ci      FREE(qbuf);
65bf215546Sopenharmony_ci   }
66bf215546Sopenharmony_ci}
67bf215546Sopenharmony_ci
68bf215546Sopenharmony_cistatic bool gfx10_alloc_query_buffer(struct si_context *sctx)
69bf215546Sopenharmony_ci{
70bf215546Sopenharmony_ci   if (si_is_atom_dirty(sctx, &sctx->atoms.s.shader_query))
71bf215546Sopenharmony_ci      return true;
72bf215546Sopenharmony_ci
73bf215546Sopenharmony_ci   struct gfx10_sh_query_buffer *qbuf = NULL;
74bf215546Sopenharmony_ci
75bf215546Sopenharmony_ci   if (!list_is_empty(&sctx->shader_query_buffers)) {
76bf215546Sopenharmony_ci      qbuf = list_last_entry(&sctx->shader_query_buffers, struct gfx10_sh_query_buffer, list);
77bf215546Sopenharmony_ci      if (qbuf->head + sizeof(struct gfx10_sh_query_buffer_mem) <= qbuf->buf->b.b.width0)
78bf215546Sopenharmony_ci         goto success;
79bf215546Sopenharmony_ci
80bf215546Sopenharmony_ci      qbuf = list_first_entry(&sctx->shader_query_buffers, struct gfx10_sh_query_buffer, list);
81bf215546Sopenharmony_ci      if (!qbuf->refcount &&
82bf215546Sopenharmony_ci          !si_cs_is_buffer_referenced(sctx, qbuf->buf->buf, RADEON_USAGE_READWRITE) &&
83bf215546Sopenharmony_ci          sctx->ws->buffer_wait(sctx->ws, qbuf->buf->buf, 0, RADEON_USAGE_READWRITE)) {
84bf215546Sopenharmony_ci         /* Can immediately re-use the oldest buffer */
85bf215546Sopenharmony_ci         list_del(&qbuf->list);
86bf215546Sopenharmony_ci      } else {
87bf215546Sopenharmony_ci         qbuf = NULL;
88bf215546Sopenharmony_ci      }
89bf215546Sopenharmony_ci   }
90bf215546Sopenharmony_ci
91bf215546Sopenharmony_ci   if (!qbuf) {
92bf215546Sopenharmony_ci      qbuf = CALLOC_STRUCT(gfx10_sh_query_buffer);
93bf215546Sopenharmony_ci      if (unlikely(!qbuf))
94bf215546Sopenharmony_ci         return false;
95bf215546Sopenharmony_ci
96bf215546Sopenharmony_ci      struct si_screen *screen = sctx->screen;
97bf215546Sopenharmony_ci      unsigned buf_size =
98bf215546Sopenharmony_ci         MAX2(sizeof(struct gfx10_sh_query_buffer_mem), screen->info.min_alloc_size);
99bf215546Sopenharmony_ci      qbuf->buf = si_resource(pipe_buffer_create(&screen->b, 0, PIPE_USAGE_STAGING, buf_size));
100bf215546Sopenharmony_ci      if (unlikely(!qbuf->buf)) {
101bf215546Sopenharmony_ci         FREE(qbuf);
102bf215546Sopenharmony_ci         return false;
103bf215546Sopenharmony_ci      }
104bf215546Sopenharmony_ci   }
105bf215546Sopenharmony_ci
106bf215546Sopenharmony_ci   /* The buffer is currently unused by the GPU. Initialize it.
107bf215546Sopenharmony_ci    *
108bf215546Sopenharmony_ci    * We need to set the high bit of all the primitive counters for
109bf215546Sopenharmony_ci    * compatibility with the SET_PREDICATION packet.
110bf215546Sopenharmony_ci    */
111bf215546Sopenharmony_ci   uint64_t *results = sctx->ws->buffer_map(sctx->ws, qbuf->buf->buf, NULL,
112bf215546Sopenharmony_ci                                            PIPE_MAP_WRITE | PIPE_MAP_UNSYNCHRONIZED);
113bf215546Sopenharmony_ci   assert(results);
114bf215546Sopenharmony_ci
115bf215546Sopenharmony_ci   for (unsigned i = 0, e = qbuf->buf->b.b.width0 / sizeof(struct gfx10_sh_query_buffer_mem); i < e;
116bf215546Sopenharmony_ci        ++i) {
117bf215546Sopenharmony_ci      for (unsigned j = 0; j < 16; ++j)
118bf215546Sopenharmony_ci         results[32 * i + j] = (uint64_t)1 << 63;
119bf215546Sopenharmony_ci      results[32 * i + 16] = 0;
120bf215546Sopenharmony_ci   }
121bf215546Sopenharmony_ci
122bf215546Sopenharmony_ci   list_addtail(&qbuf->list, &sctx->shader_query_buffers);
123bf215546Sopenharmony_ci   qbuf->head = 0;
124bf215546Sopenharmony_ci   qbuf->refcount = sctx->num_active_shader_queries;
125bf215546Sopenharmony_ci
126bf215546Sopenharmony_cisuccess:;
127bf215546Sopenharmony_ci   struct pipe_shader_buffer sbuf;
128bf215546Sopenharmony_ci   sbuf.buffer = &qbuf->buf->b.b;
129bf215546Sopenharmony_ci   sbuf.buffer_offset = qbuf->head;
130bf215546Sopenharmony_ci   sbuf.buffer_size = sizeof(struct gfx10_sh_query_buffer_mem);
131bf215546Sopenharmony_ci   si_set_internal_shader_buffer(sctx, SI_GS_QUERY_BUF, &sbuf);
132bf215546Sopenharmony_ci   SET_FIELD(sctx->current_gs_state, GS_STATE_STREAMOUT_QUERY_ENABLED, 1);
133bf215546Sopenharmony_ci
134bf215546Sopenharmony_ci   si_mark_atom_dirty(sctx, &sctx->atoms.s.shader_query);
135bf215546Sopenharmony_ci   return true;
136bf215546Sopenharmony_ci}
137bf215546Sopenharmony_ci
138bf215546Sopenharmony_cistatic void gfx10_sh_query_destroy(struct si_context *sctx, struct si_query *rquery)
139bf215546Sopenharmony_ci{
140bf215546Sopenharmony_ci   struct gfx10_sh_query *query = (struct gfx10_sh_query *)rquery;
141bf215546Sopenharmony_ci   gfx10_release_query_buffers(sctx, query->first, query->last);
142bf215546Sopenharmony_ci   FREE(query);
143bf215546Sopenharmony_ci}
144bf215546Sopenharmony_ci
145bf215546Sopenharmony_cistatic bool gfx10_sh_query_begin(struct si_context *sctx, struct si_query *rquery)
146bf215546Sopenharmony_ci{
147bf215546Sopenharmony_ci   struct gfx10_sh_query *query = (struct gfx10_sh_query *)rquery;
148bf215546Sopenharmony_ci
149bf215546Sopenharmony_ci   gfx10_release_query_buffers(sctx, query->first, query->last);
150bf215546Sopenharmony_ci   query->first = query->last = NULL;
151bf215546Sopenharmony_ci
152bf215546Sopenharmony_ci   if (unlikely(!gfx10_alloc_query_buffer(sctx)))
153bf215546Sopenharmony_ci      return false;
154bf215546Sopenharmony_ci
155bf215546Sopenharmony_ci   query->first = list_last_entry(&sctx->shader_query_buffers, struct gfx10_sh_query_buffer, list);
156bf215546Sopenharmony_ci   query->first_begin = query->first->head;
157bf215546Sopenharmony_ci
158bf215546Sopenharmony_ci   sctx->num_active_shader_queries++;
159bf215546Sopenharmony_ci   query->first->refcount++;
160bf215546Sopenharmony_ci
161bf215546Sopenharmony_ci   return true;
162bf215546Sopenharmony_ci}
163bf215546Sopenharmony_ci
164bf215546Sopenharmony_cistatic bool gfx10_sh_query_end(struct si_context *sctx, struct si_query *rquery)
165bf215546Sopenharmony_ci{
166bf215546Sopenharmony_ci   struct gfx10_sh_query *query = (struct gfx10_sh_query *)rquery;
167bf215546Sopenharmony_ci
168bf215546Sopenharmony_ci   if (unlikely(!query->first))
169bf215546Sopenharmony_ci      return false; /* earlier out of memory error */
170bf215546Sopenharmony_ci
171bf215546Sopenharmony_ci   query->last = list_last_entry(&sctx->shader_query_buffers, struct gfx10_sh_query_buffer, list);
172bf215546Sopenharmony_ci   query->last_end = query->last->head;
173bf215546Sopenharmony_ci
174bf215546Sopenharmony_ci   /* Signal the fence of the previous chunk */
175bf215546Sopenharmony_ci   if (query->last_end != 0) {
176bf215546Sopenharmony_ci      uint64_t fence_va = query->last->buf->gpu_address;
177bf215546Sopenharmony_ci      fence_va += query->last_end - sizeof(struct gfx10_sh_query_buffer_mem);
178bf215546Sopenharmony_ci      fence_va += offsetof(struct gfx10_sh_query_buffer_mem, fence);
179bf215546Sopenharmony_ci      si_cp_release_mem(sctx, &sctx->gfx_cs, V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM,
180bf215546Sopenharmony_ci                        EOP_INT_SEL_NONE, EOP_DATA_SEL_VALUE_32BIT, query->last->buf, fence_va,
181bf215546Sopenharmony_ci                        0xffffffff, PIPE_QUERY_GPU_FINISHED);
182bf215546Sopenharmony_ci   }
183bf215546Sopenharmony_ci
184bf215546Sopenharmony_ci   sctx->num_active_shader_queries--;
185bf215546Sopenharmony_ci
186bf215546Sopenharmony_ci   if (sctx->num_active_shader_queries <= 0 || !si_is_atom_dirty(sctx, &sctx->atoms.s.shader_query)) {
187bf215546Sopenharmony_ci      si_set_internal_shader_buffer(sctx, SI_GS_QUERY_BUF, NULL);
188bf215546Sopenharmony_ci      SET_FIELD(sctx->current_gs_state, GS_STATE_STREAMOUT_QUERY_ENABLED, 0);
189bf215546Sopenharmony_ci
190bf215546Sopenharmony_ci      /* If a query_begin is followed by a query_end without a draw
191bf215546Sopenharmony_ci       * in-between, we need to clear the atom to ensure that the
192bf215546Sopenharmony_ci       * next query_begin will re-initialize the shader buffer. */
193bf215546Sopenharmony_ci      si_set_atom_dirty(sctx, &sctx->atoms.s.shader_query, false);
194bf215546Sopenharmony_ci   }
195bf215546Sopenharmony_ci
196bf215546Sopenharmony_ci   return true;
197bf215546Sopenharmony_ci}
198bf215546Sopenharmony_ci
199bf215546Sopenharmony_cistatic void gfx10_sh_query_add_result(struct gfx10_sh_query *query,
200bf215546Sopenharmony_ci                                      struct gfx10_sh_query_buffer_mem *qmem,
201bf215546Sopenharmony_ci                                      union pipe_query_result *result)
202bf215546Sopenharmony_ci{
203bf215546Sopenharmony_ci   static const uint64_t mask = ((uint64_t)1 << 63) - 1;
204bf215546Sopenharmony_ci
205bf215546Sopenharmony_ci   switch (query->b.type) {
206bf215546Sopenharmony_ci   case PIPE_QUERY_PRIMITIVES_EMITTED:
207bf215546Sopenharmony_ci      result->u64 += qmem->stream[query->stream].emitted_primitives & mask;
208bf215546Sopenharmony_ci      break;
209bf215546Sopenharmony_ci   case PIPE_QUERY_PRIMITIVES_GENERATED:
210bf215546Sopenharmony_ci      result->u64 += qmem->stream[query->stream].generated_primitives & mask;
211bf215546Sopenharmony_ci      break;
212bf215546Sopenharmony_ci   case PIPE_QUERY_SO_STATISTICS:
213bf215546Sopenharmony_ci      result->so_statistics.num_primitives_written +=
214bf215546Sopenharmony_ci         qmem->stream[query->stream].emitted_primitives & mask;
215bf215546Sopenharmony_ci      result->so_statistics.primitives_storage_needed +=
216bf215546Sopenharmony_ci         qmem->stream[query->stream].generated_primitives & mask;
217bf215546Sopenharmony_ci      break;
218bf215546Sopenharmony_ci   case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
219bf215546Sopenharmony_ci      result->b |= qmem->stream[query->stream].emitted_primitives !=
220bf215546Sopenharmony_ci                   qmem->stream[query->stream].generated_primitives;
221bf215546Sopenharmony_ci      break;
222bf215546Sopenharmony_ci   case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
223bf215546Sopenharmony_ci      for (unsigned stream = 0; stream < SI_MAX_STREAMS; ++stream) {
224bf215546Sopenharmony_ci         result->b |= qmem->stream[stream].emitted_primitives !=
225bf215546Sopenharmony_ci                      qmem->stream[stream].generated_primitives;
226bf215546Sopenharmony_ci      }
227bf215546Sopenharmony_ci      break;
228bf215546Sopenharmony_ci   default:
229bf215546Sopenharmony_ci      assert(0);
230bf215546Sopenharmony_ci   }
231bf215546Sopenharmony_ci}
232bf215546Sopenharmony_ci
233bf215546Sopenharmony_cistatic bool gfx10_sh_query_get_result(struct si_context *sctx, struct si_query *rquery, bool wait,
234bf215546Sopenharmony_ci                                      union pipe_query_result *result)
235bf215546Sopenharmony_ci{
236bf215546Sopenharmony_ci   struct gfx10_sh_query *query = (struct gfx10_sh_query *)rquery;
237bf215546Sopenharmony_ci
238bf215546Sopenharmony_ci   util_query_clear_result(result, query->b.type);
239bf215546Sopenharmony_ci
240bf215546Sopenharmony_ci   if (unlikely(!query->first))
241bf215546Sopenharmony_ci      return false; /* earlier out of memory error */
242bf215546Sopenharmony_ci   assert(query->last);
243bf215546Sopenharmony_ci
244bf215546Sopenharmony_ci   for (struct gfx10_sh_query_buffer *qbuf = query->last;;
245bf215546Sopenharmony_ci        qbuf = list_entry(qbuf->list.prev, struct gfx10_sh_query_buffer, list)) {
246bf215546Sopenharmony_ci      unsigned usage = PIPE_MAP_READ | (wait ? 0 : PIPE_MAP_DONTBLOCK);
247bf215546Sopenharmony_ci      void *map;
248bf215546Sopenharmony_ci
249bf215546Sopenharmony_ci      if (rquery->b.flushed)
250bf215546Sopenharmony_ci         map = sctx->ws->buffer_map(sctx->ws, qbuf->buf->buf, NULL, usage);
251bf215546Sopenharmony_ci      else
252bf215546Sopenharmony_ci         map = si_buffer_map(sctx, qbuf->buf, usage);
253bf215546Sopenharmony_ci
254bf215546Sopenharmony_ci      if (!map)
255bf215546Sopenharmony_ci         return false;
256bf215546Sopenharmony_ci
257bf215546Sopenharmony_ci      unsigned results_begin = 0;
258bf215546Sopenharmony_ci      unsigned results_end = qbuf->head;
259bf215546Sopenharmony_ci      if (qbuf == query->first)
260bf215546Sopenharmony_ci         results_begin = query->first_begin;
261bf215546Sopenharmony_ci      if (qbuf == query->last)
262bf215546Sopenharmony_ci         results_end = query->last_end;
263bf215546Sopenharmony_ci
264bf215546Sopenharmony_ci      while (results_begin != results_end) {
265bf215546Sopenharmony_ci         struct gfx10_sh_query_buffer_mem *qmem = map + results_begin;
266bf215546Sopenharmony_ci         results_begin += sizeof(*qmem);
267bf215546Sopenharmony_ci
268bf215546Sopenharmony_ci         gfx10_sh_query_add_result(query, qmem, result);
269bf215546Sopenharmony_ci      }
270bf215546Sopenharmony_ci
271bf215546Sopenharmony_ci      if (qbuf == query->first)
272bf215546Sopenharmony_ci         break;
273bf215546Sopenharmony_ci   }
274bf215546Sopenharmony_ci
275bf215546Sopenharmony_ci   return true;
276bf215546Sopenharmony_ci}
277bf215546Sopenharmony_ci
278bf215546Sopenharmony_cistatic void gfx10_sh_query_get_result_resource(struct si_context *sctx, struct si_query *rquery,
279bf215546Sopenharmony_ci                                               enum pipe_query_flags flags,
280bf215546Sopenharmony_ci                                               enum pipe_query_value_type result_type,
281bf215546Sopenharmony_ci                                               int index, struct pipe_resource *resource,
282bf215546Sopenharmony_ci                                               unsigned offset)
283bf215546Sopenharmony_ci{
284bf215546Sopenharmony_ci   struct gfx10_sh_query *query = (struct gfx10_sh_query *)rquery;
285bf215546Sopenharmony_ci   struct si_qbo_state saved_state = {};
286bf215546Sopenharmony_ci   struct pipe_resource *tmp_buffer = NULL;
287bf215546Sopenharmony_ci   unsigned tmp_buffer_offset = 0;
288bf215546Sopenharmony_ci
289bf215546Sopenharmony_ci   if (!sctx->sh_query_result_shader) {
290bf215546Sopenharmony_ci      sctx->sh_query_result_shader = gfx10_create_sh_query_result_cs(sctx);
291bf215546Sopenharmony_ci      if (!sctx->sh_query_result_shader)
292bf215546Sopenharmony_ci         return;
293bf215546Sopenharmony_ci   }
294bf215546Sopenharmony_ci
295bf215546Sopenharmony_ci   if (query->first != query->last) {
296bf215546Sopenharmony_ci      u_suballocator_alloc(&sctx->allocator_zeroed_memory, 16, 16, &tmp_buffer_offset, &tmp_buffer);
297bf215546Sopenharmony_ci      if (!tmp_buffer)
298bf215546Sopenharmony_ci         return;
299bf215546Sopenharmony_ci   }
300bf215546Sopenharmony_ci
301bf215546Sopenharmony_ci   si_save_qbo_state(sctx, &saved_state);
302bf215546Sopenharmony_ci
303bf215546Sopenharmony_ci   /* Pre-fill the constants configuring the shader behavior. */
304bf215546Sopenharmony_ci   struct {
305bf215546Sopenharmony_ci      uint32_t config;
306bf215546Sopenharmony_ci      uint32_t offset;
307bf215546Sopenharmony_ci      uint32_t chain;
308bf215546Sopenharmony_ci      uint32_t result_count;
309bf215546Sopenharmony_ci   } consts;
310bf215546Sopenharmony_ci   struct pipe_constant_buffer constant_buffer = {};
311bf215546Sopenharmony_ci
312bf215546Sopenharmony_ci   if (index >= 0) {
313bf215546Sopenharmony_ci      switch (query->b.type) {
314bf215546Sopenharmony_ci      case PIPE_QUERY_PRIMITIVES_GENERATED:
315bf215546Sopenharmony_ci         consts.offset = 4 * sizeof(uint64_t) * query->stream + 2 * sizeof(uint64_t);
316bf215546Sopenharmony_ci         consts.config = 0;
317bf215546Sopenharmony_ci         break;
318bf215546Sopenharmony_ci      case PIPE_QUERY_PRIMITIVES_EMITTED:
319bf215546Sopenharmony_ci         consts.offset = 4 * sizeof(uint64_t) * query->stream + 3 * sizeof(uint64_t);
320bf215546Sopenharmony_ci         consts.config = 0;
321bf215546Sopenharmony_ci         break;
322bf215546Sopenharmony_ci      case PIPE_QUERY_SO_STATISTICS:
323bf215546Sopenharmony_ci         consts.offset = sizeof(uint32_t) * (4 * index + query->stream);
324bf215546Sopenharmony_ci         consts.config = 0;
325bf215546Sopenharmony_ci         break;
326bf215546Sopenharmony_ci      case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
327bf215546Sopenharmony_ci         consts.offset = 4 * sizeof(uint64_t) * query->stream;
328bf215546Sopenharmony_ci         consts.config = 2;
329bf215546Sopenharmony_ci         break;
330bf215546Sopenharmony_ci      case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
331bf215546Sopenharmony_ci         consts.offset = 0;
332bf215546Sopenharmony_ci         consts.config = 3;
333bf215546Sopenharmony_ci         break;
334bf215546Sopenharmony_ci      default:
335bf215546Sopenharmony_ci         unreachable("bad query type");
336bf215546Sopenharmony_ci      }
337bf215546Sopenharmony_ci   } else {
338bf215546Sopenharmony_ci      /* Check result availability. */
339bf215546Sopenharmony_ci      consts.offset = 0;
340bf215546Sopenharmony_ci      consts.config = 1;
341bf215546Sopenharmony_ci   }
342bf215546Sopenharmony_ci
343bf215546Sopenharmony_ci   if (result_type == PIPE_QUERY_TYPE_I64 || result_type == PIPE_QUERY_TYPE_U64)
344bf215546Sopenharmony_ci      consts.config |= 8;
345bf215546Sopenharmony_ci
346bf215546Sopenharmony_ci   constant_buffer.buffer_size = sizeof(consts);
347bf215546Sopenharmony_ci   constant_buffer.user_buffer = &consts;
348bf215546Sopenharmony_ci
349bf215546Sopenharmony_ci   /* Pre-fill the SSBOs and grid. */
350bf215546Sopenharmony_ci   struct pipe_shader_buffer ssbo[3];
351bf215546Sopenharmony_ci   struct pipe_grid_info grid = {};
352bf215546Sopenharmony_ci
353bf215546Sopenharmony_ci   ssbo[1].buffer = tmp_buffer;
354bf215546Sopenharmony_ci   ssbo[1].buffer_offset = tmp_buffer_offset;
355bf215546Sopenharmony_ci   ssbo[1].buffer_size = 16;
356bf215546Sopenharmony_ci
357bf215546Sopenharmony_ci   ssbo[2] = ssbo[1];
358bf215546Sopenharmony_ci
359bf215546Sopenharmony_ci   grid.block[0] = 1;
360bf215546Sopenharmony_ci   grid.block[1] = 1;
361bf215546Sopenharmony_ci   grid.block[2] = 1;
362bf215546Sopenharmony_ci   grid.grid[0] = 1;
363bf215546Sopenharmony_ci   grid.grid[1] = 1;
364bf215546Sopenharmony_ci   grid.grid[2] = 1;
365bf215546Sopenharmony_ci
366bf215546Sopenharmony_ci   struct gfx10_sh_query_buffer *qbuf = query->first;
367bf215546Sopenharmony_ci   for (;;) {
368bf215546Sopenharmony_ci      unsigned begin = qbuf == query->first ? query->first_begin : 0;
369bf215546Sopenharmony_ci      unsigned end = qbuf == query->last ? query->last_end : qbuf->buf->b.b.width0;
370bf215546Sopenharmony_ci      if (!end)
371bf215546Sopenharmony_ci         continue;
372bf215546Sopenharmony_ci
373bf215546Sopenharmony_ci      ssbo[0].buffer = &qbuf->buf->b.b;
374bf215546Sopenharmony_ci      ssbo[0].buffer_offset = begin;
375bf215546Sopenharmony_ci      ssbo[0].buffer_size = end - begin;
376bf215546Sopenharmony_ci
377bf215546Sopenharmony_ci      consts.result_count = (end - begin) / sizeof(struct gfx10_sh_query_buffer_mem);
378bf215546Sopenharmony_ci      consts.chain = 0;
379bf215546Sopenharmony_ci      if (qbuf != query->first)
380bf215546Sopenharmony_ci         consts.chain |= 1;
381bf215546Sopenharmony_ci      if (qbuf != query->last)
382bf215546Sopenharmony_ci         consts.chain |= 2;
383bf215546Sopenharmony_ci
384bf215546Sopenharmony_ci      if (qbuf == query->last) {
385bf215546Sopenharmony_ci         ssbo[2].buffer = resource;
386bf215546Sopenharmony_ci         ssbo[2].buffer_offset = offset;
387bf215546Sopenharmony_ci         ssbo[2].buffer_size = 8;
388bf215546Sopenharmony_ci      }
389bf215546Sopenharmony_ci
390bf215546Sopenharmony_ci      sctx->b.set_constant_buffer(&sctx->b, PIPE_SHADER_COMPUTE, 0, false, &constant_buffer);
391bf215546Sopenharmony_ci
392bf215546Sopenharmony_ci      if (flags & PIPE_QUERY_WAIT) {
393bf215546Sopenharmony_ci         uint64_t va;
394bf215546Sopenharmony_ci
395bf215546Sopenharmony_ci         /* Wait for result availability. Wait only for readiness
396bf215546Sopenharmony_ci          * of the last entry, since the fence writes should be
397bf215546Sopenharmony_ci          * serialized in the CP.
398bf215546Sopenharmony_ci          */
399bf215546Sopenharmony_ci         va = qbuf->buf->gpu_address;
400bf215546Sopenharmony_ci         va += end - sizeof(struct gfx10_sh_query_buffer_mem);
401bf215546Sopenharmony_ci         va += offsetof(struct gfx10_sh_query_buffer_mem, fence);
402bf215546Sopenharmony_ci
403bf215546Sopenharmony_ci         si_cp_wait_mem(sctx, &sctx->gfx_cs, va, 0x00000001, 0x00000001, 0);
404bf215546Sopenharmony_ci      }
405bf215546Sopenharmony_ci
406bf215546Sopenharmony_ci      /* ssbo[2] is either tmp_buffer or resource */
407bf215546Sopenharmony_ci      assert(ssbo[2].buffer);
408bf215546Sopenharmony_ci      si_launch_grid_internal_ssbos(sctx, &grid, sctx->sh_query_result_shader,
409bf215546Sopenharmony_ci                                    SI_OP_SYNC_PS_BEFORE | SI_OP_SYNC_AFTER, SI_COHERENCY_SHADER,
410bf215546Sopenharmony_ci                                    3, ssbo, (1 << 2) | (ssbo[1].buffer ? 1 << 1 : 0));
411bf215546Sopenharmony_ci
412bf215546Sopenharmony_ci      if (qbuf == query->last)
413bf215546Sopenharmony_ci         break;
414bf215546Sopenharmony_ci      qbuf = list_entry(qbuf->list.next, struct gfx10_sh_query_buffer, list);
415bf215546Sopenharmony_ci   }
416bf215546Sopenharmony_ci
417bf215546Sopenharmony_ci   si_restore_qbo_state(sctx, &saved_state);
418bf215546Sopenharmony_ci   pipe_resource_reference(&tmp_buffer, NULL);
419bf215546Sopenharmony_ci}
420bf215546Sopenharmony_ci
421bf215546Sopenharmony_cistatic const struct si_query_ops gfx10_sh_query_ops = {
422bf215546Sopenharmony_ci   .destroy = gfx10_sh_query_destroy,
423bf215546Sopenharmony_ci   .begin = gfx10_sh_query_begin,
424bf215546Sopenharmony_ci   .end = gfx10_sh_query_end,
425bf215546Sopenharmony_ci   .get_result = gfx10_sh_query_get_result,
426bf215546Sopenharmony_ci   .get_result_resource = gfx10_sh_query_get_result_resource,
427bf215546Sopenharmony_ci};
428bf215546Sopenharmony_ci
429bf215546Sopenharmony_cistruct pipe_query *gfx10_sh_query_create(struct si_screen *screen, enum pipe_query_type query_type,
430bf215546Sopenharmony_ci                                         unsigned index)
431bf215546Sopenharmony_ci{
432bf215546Sopenharmony_ci   struct gfx10_sh_query *query = CALLOC_STRUCT(gfx10_sh_query);
433bf215546Sopenharmony_ci   if (unlikely(!query))
434bf215546Sopenharmony_ci      return NULL;
435bf215546Sopenharmony_ci
436bf215546Sopenharmony_ci   query->b.ops = &gfx10_sh_query_ops;
437bf215546Sopenharmony_ci   query->b.type = query_type;
438bf215546Sopenharmony_ci   query->stream = index;
439bf215546Sopenharmony_ci
440bf215546Sopenharmony_ci   return (struct pipe_query *)query;
441bf215546Sopenharmony_ci}
442bf215546Sopenharmony_ci
443bf215546Sopenharmony_civoid gfx10_init_query(struct si_context *sctx)
444bf215546Sopenharmony_ci{
445bf215546Sopenharmony_ci   list_inithead(&sctx->shader_query_buffers);
446bf215546Sopenharmony_ci   sctx->atoms.s.shader_query.emit = emit_shader_query;
447bf215546Sopenharmony_ci}
448bf215546Sopenharmony_ci
449bf215546Sopenharmony_civoid gfx10_destroy_query(struct si_context *sctx)
450bf215546Sopenharmony_ci{
451bf215546Sopenharmony_ci   if (!sctx->shader_query_buffers.next)
452bf215546Sopenharmony_ci      return;
453bf215546Sopenharmony_ci
454bf215546Sopenharmony_ci   while (!list_is_empty(&sctx->shader_query_buffers)) {
455bf215546Sopenharmony_ci      struct gfx10_sh_query_buffer *qbuf =
456bf215546Sopenharmony_ci         list_first_entry(&sctx->shader_query_buffers, struct gfx10_sh_query_buffer, list);
457bf215546Sopenharmony_ci      list_del(&qbuf->list);
458bf215546Sopenharmony_ci
459bf215546Sopenharmony_ci      assert(!qbuf->refcount);
460bf215546Sopenharmony_ci      si_resource_reference(&qbuf->buf, NULL);
461bf215546Sopenharmony_ci      FREE(qbuf);
462bf215546Sopenharmony_ci   }
463bf215546Sopenharmony_ci}
464