1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright 2018 Advanced Micro Devices, Inc. 3bf215546Sopenharmony_ci * All Rights Reserved. 4bf215546Sopenharmony_ci * 5bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 6bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 7bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 8bf215546Sopenharmony_ci * on the rights to use, copy, modify, merge, publish, distribute, sub 9bf215546Sopenharmony_ci * license, and/or sell copies of the Software, and to permit persons to whom 10bf215546Sopenharmony_ci * the Software is furnished to do so, subject to the following conditions: 11bf215546Sopenharmony_ci * 12bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 13bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 14bf215546Sopenharmony_ci * Software. 15bf215546Sopenharmony_ci * 16bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19bf215546Sopenharmony_ci * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 20bf215546Sopenharmony_ci * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21bf215546Sopenharmony_ci * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22bf215546Sopenharmony_ci * USE OR OTHER DEALINGS IN THE SOFTWARE. 23bf215546Sopenharmony_ci */ 24bf215546Sopenharmony_ci 25bf215546Sopenharmony_ci#include "si_pipe.h" 26bf215546Sopenharmony_ci#include "si_query.h" 27bf215546Sopenharmony_ci#include "sid.h" 28bf215546Sopenharmony_ci#include "util/u_memory.h" 29bf215546Sopenharmony_ci#include "util/u_suballoc.h" 30bf215546Sopenharmony_ci 31bf215546Sopenharmony_ci#include <stddef.h> 32bf215546Sopenharmony_ci 33bf215546Sopenharmony_cistatic void emit_shader_query(struct si_context *sctx) 34bf215546Sopenharmony_ci{ 35bf215546Sopenharmony_ci assert(!list_is_empty(&sctx->shader_query_buffers)); 36bf215546Sopenharmony_ci 37bf215546Sopenharmony_ci struct gfx10_sh_query_buffer *qbuf = 38bf215546Sopenharmony_ci list_last_entry(&sctx->shader_query_buffers, struct gfx10_sh_query_buffer, list); 39bf215546Sopenharmony_ci qbuf->head += sizeof(struct gfx10_sh_query_buffer_mem); 40bf215546Sopenharmony_ci} 41bf215546Sopenharmony_ci 42bf215546Sopenharmony_cistatic void gfx10_release_query_buffers(struct si_context *sctx, 43bf215546Sopenharmony_ci struct gfx10_sh_query_buffer *first, 44bf215546Sopenharmony_ci struct gfx10_sh_query_buffer *last) 45bf215546Sopenharmony_ci{ 46bf215546Sopenharmony_ci while (first) { 47bf215546Sopenharmony_ci struct gfx10_sh_query_buffer *qbuf = first; 48bf215546Sopenharmony_ci if (first != last) 49bf215546Sopenharmony_ci first = list_entry(qbuf->list.next, struct gfx10_sh_query_buffer, list); 50bf215546Sopenharmony_ci else 51bf215546Sopenharmony_ci first = NULL; 52bf215546Sopenharmony_ci 53bf215546Sopenharmony_ci qbuf->refcount--; 54bf215546Sopenharmony_ci if (qbuf->refcount) 55bf215546Sopenharmony_ci continue; 56bf215546Sopenharmony_ci 57bf215546Sopenharmony_ci if (qbuf->list.next == &sctx->shader_query_buffers) 58bf215546Sopenharmony_ci continue; /* keep the most recent buffer; it may not be full yet */ 59bf215546Sopenharmony_ci if (qbuf->list.prev == &sctx->shader_query_buffers) 60bf215546Sopenharmony_ci continue; /* keep the oldest buffer for recycling */ 61bf215546Sopenharmony_ci 62bf215546Sopenharmony_ci list_del(&qbuf->list); 63bf215546Sopenharmony_ci si_resource_reference(&qbuf->buf, NULL); 64bf215546Sopenharmony_ci FREE(qbuf); 65bf215546Sopenharmony_ci } 66bf215546Sopenharmony_ci} 67bf215546Sopenharmony_ci 68bf215546Sopenharmony_cistatic bool gfx10_alloc_query_buffer(struct si_context *sctx) 69bf215546Sopenharmony_ci{ 70bf215546Sopenharmony_ci if (si_is_atom_dirty(sctx, &sctx->atoms.s.shader_query)) 71bf215546Sopenharmony_ci return true; 72bf215546Sopenharmony_ci 73bf215546Sopenharmony_ci struct gfx10_sh_query_buffer *qbuf = NULL; 74bf215546Sopenharmony_ci 75bf215546Sopenharmony_ci if (!list_is_empty(&sctx->shader_query_buffers)) { 76bf215546Sopenharmony_ci qbuf = list_last_entry(&sctx->shader_query_buffers, struct gfx10_sh_query_buffer, list); 77bf215546Sopenharmony_ci if (qbuf->head + sizeof(struct gfx10_sh_query_buffer_mem) <= qbuf->buf->b.b.width0) 78bf215546Sopenharmony_ci goto success; 79bf215546Sopenharmony_ci 80bf215546Sopenharmony_ci qbuf = list_first_entry(&sctx->shader_query_buffers, struct gfx10_sh_query_buffer, list); 81bf215546Sopenharmony_ci if (!qbuf->refcount && 82bf215546Sopenharmony_ci !si_cs_is_buffer_referenced(sctx, qbuf->buf->buf, RADEON_USAGE_READWRITE) && 83bf215546Sopenharmony_ci sctx->ws->buffer_wait(sctx->ws, qbuf->buf->buf, 0, RADEON_USAGE_READWRITE)) { 84bf215546Sopenharmony_ci /* Can immediately re-use the oldest buffer */ 85bf215546Sopenharmony_ci list_del(&qbuf->list); 86bf215546Sopenharmony_ci } else { 87bf215546Sopenharmony_ci qbuf = NULL; 88bf215546Sopenharmony_ci } 89bf215546Sopenharmony_ci } 90bf215546Sopenharmony_ci 91bf215546Sopenharmony_ci if (!qbuf) { 92bf215546Sopenharmony_ci qbuf = CALLOC_STRUCT(gfx10_sh_query_buffer); 93bf215546Sopenharmony_ci if (unlikely(!qbuf)) 94bf215546Sopenharmony_ci return false; 95bf215546Sopenharmony_ci 96bf215546Sopenharmony_ci struct si_screen *screen = sctx->screen; 97bf215546Sopenharmony_ci unsigned buf_size = 98bf215546Sopenharmony_ci MAX2(sizeof(struct gfx10_sh_query_buffer_mem), screen->info.min_alloc_size); 99bf215546Sopenharmony_ci qbuf->buf = si_resource(pipe_buffer_create(&screen->b, 0, PIPE_USAGE_STAGING, buf_size)); 100bf215546Sopenharmony_ci if (unlikely(!qbuf->buf)) { 101bf215546Sopenharmony_ci FREE(qbuf); 102bf215546Sopenharmony_ci return false; 103bf215546Sopenharmony_ci } 104bf215546Sopenharmony_ci } 105bf215546Sopenharmony_ci 106bf215546Sopenharmony_ci /* The buffer is currently unused by the GPU. Initialize it. 107bf215546Sopenharmony_ci * 108bf215546Sopenharmony_ci * We need to set the high bit of all the primitive counters for 109bf215546Sopenharmony_ci * compatibility with the SET_PREDICATION packet. 110bf215546Sopenharmony_ci */ 111bf215546Sopenharmony_ci uint64_t *results = sctx->ws->buffer_map(sctx->ws, qbuf->buf->buf, NULL, 112bf215546Sopenharmony_ci PIPE_MAP_WRITE | PIPE_MAP_UNSYNCHRONIZED); 113bf215546Sopenharmony_ci assert(results); 114bf215546Sopenharmony_ci 115bf215546Sopenharmony_ci for (unsigned i = 0, e = qbuf->buf->b.b.width0 / sizeof(struct gfx10_sh_query_buffer_mem); i < e; 116bf215546Sopenharmony_ci ++i) { 117bf215546Sopenharmony_ci for (unsigned j = 0; j < 16; ++j) 118bf215546Sopenharmony_ci results[32 * i + j] = (uint64_t)1 << 63; 119bf215546Sopenharmony_ci results[32 * i + 16] = 0; 120bf215546Sopenharmony_ci } 121bf215546Sopenharmony_ci 122bf215546Sopenharmony_ci list_addtail(&qbuf->list, &sctx->shader_query_buffers); 123bf215546Sopenharmony_ci qbuf->head = 0; 124bf215546Sopenharmony_ci qbuf->refcount = sctx->num_active_shader_queries; 125bf215546Sopenharmony_ci 126bf215546Sopenharmony_cisuccess:; 127bf215546Sopenharmony_ci struct pipe_shader_buffer sbuf; 128bf215546Sopenharmony_ci sbuf.buffer = &qbuf->buf->b.b; 129bf215546Sopenharmony_ci sbuf.buffer_offset = qbuf->head; 130bf215546Sopenharmony_ci sbuf.buffer_size = sizeof(struct gfx10_sh_query_buffer_mem); 131bf215546Sopenharmony_ci si_set_internal_shader_buffer(sctx, SI_GS_QUERY_BUF, &sbuf); 132bf215546Sopenharmony_ci SET_FIELD(sctx->current_gs_state, GS_STATE_STREAMOUT_QUERY_ENABLED, 1); 133bf215546Sopenharmony_ci 134bf215546Sopenharmony_ci si_mark_atom_dirty(sctx, &sctx->atoms.s.shader_query); 135bf215546Sopenharmony_ci return true; 136bf215546Sopenharmony_ci} 137bf215546Sopenharmony_ci 138bf215546Sopenharmony_cistatic void gfx10_sh_query_destroy(struct si_context *sctx, struct si_query *rquery) 139bf215546Sopenharmony_ci{ 140bf215546Sopenharmony_ci struct gfx10_sh_query *query = (struct gfx10_sh_query *)rquery; 141bf215546Sopenharmony_ci gfx10_release_query_buffers(sctx, query->first, query->last); 142bf215546Sopenharmony_ci FREE(query); 143bf215546Sopenharmony_ci} 144bf215546Sopenharmony_ci 145bf215546Sopenharmony_cistatic bool gfx10_sh_query_begin(struct si_context *sctx, struct si_query *rquery) 146bf215546Sopenharmony_ci{ 147bf215546Sopenharmony_ci struct gfx10_sh_query *query = (struct gfx10_sh_query *)rquery; 148bf215546Sopenharmony_ci 149bf215546Sopenharmony_ci gfx10_release_query_buffers(sctx, query->first, query->last); 150bf215546Sopenharmony_ci query->first = query->last = NULL; 151bf215546Sopenharmony_ci 152bf215546Sopenharmony_ci if (unlikely(!gfx10_alloc_query_buffer(sctx))) 153bf215546Sopenharmony_ci return false; 154bf215546Sopenharmony_ci 155bf215546Sopenharmony_ci query->first = list_last_entry(&sctx->shader_query_buffers, struct gfx10_sh_query_buffer, list); 156bf215546Sopenharmony_ci query->first_begin = query->first->head; 157bf215546Sopenharmony_ci 158bf215546Sopenharmony_ci sctx->num_active_shader_queries++; 159bf215546Sopenharmony_ci query->first->refcount++; 160bf215546Sopenharmony_ci 161bf215546Sopenharmony_ci return true; 162bf215546Sopenharmony_ci} 163bf215546Sopenharmony_ci 164bf215546Sopenharmony_cistatic bool gfx10_sh_query_end(struct si_context *sctx, struct si_query *rquery) 165bf215546Sopenharmony_ci{ 166bf215546Sopenharmony_ci struct gfx10_sh_query *query = (struct gfx10_sh_query *)rquery; 167bf215546Sopenharmony_ci 168bf215546Sopenharmony_ci if (unlikely(!query->first)) 169bf215546Sopenharmony_ci return false; /* earlier out of memory error */ 170bf215546Sopenharmony_ci 171bf215546Sopenharmony_ci query->last = list_last_entry(&sctx->shader_query_buffers, struct gfx10_sh_query_buffer, list); 172bf215546Sopenharmony_ci query->last_end = query->last->head; 173bf215546Sopenharmony_ci 174bf215546Sopenharmony_ci /* Signal the fence of the previous chunk */ 175bf215546Sopenharmony_ci if (query->last_end != 0) { 176bf215546Sopenharmony_ci uint64_t fence_va = query->last->buf->gpu_address; 177bf215546Sopenharmony_ci fence_va += query->last_end - sizeof(struct gfx10_sh_query_buffer_mem); 178bf215546Sopenharmony_ci fence_va += offsetof(struct gfx10_sh_query_buffer_mem, fence); 179bf215546Sopenharmony_ci si_cp_release_mem(sctx, &sctx->gfx_cs, V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, 180bf215546Sopenharmony_ci EOP_INT_SEL_NONE, EOP_DATA_SEL_VALUE_32BIT, query->last->buf, fence_va, 181bf215546Sopenharmony_ci 0xffffffff, PIPE_QUERY_GPU_FINISHED); 182bf215546Sopenharmony_ci } 183bf215546Sopenharmony_ci 184bf215546Sopenharmony_ci sctx->num_active_shader_queries--; 185bf215546Sopenharmony_ci 186bf215546Sopenharmony_ci if (sctx->num_active_shader_queries <= 0 || !si_is_atom_dirty(sctx, &sctx->atoms.s.shader_query)) { 187bf215546Sopenharmony_ci si_set_internal_shader_buffer(sctx, SI_GS_QUERY_BUF, NULL); 188bf215546Sopenharmony_ci SET_FIELD(sctx->current_gs_state, GS_STATE_STREAMOUT_QUERY_ENABLED, 0); 189bf215546Sopenharmony_ci 190bf215546Sopenharmony_ci /* If a query_begin is followed by a query_end without a draw 191bf215546Sopenharmony_ci * in-between, we need to clear the atom to ensure that the 192bf215546Sopenharmony_ci * next query_begin will re-initialize the shader buffer. */ 193bf215546Sopenharmony_ci si_set_atom_dirty(sctx, &sctx->atoms.s.shader_query, false); 194bf215546Sopenharmony_ci } 195bf215546Sopenharmony_ci 196bf215546Sopenharmony_ci return true; 197bf215546Sopenharmony_ci} 198bf215546Sopenharmony_ci 199bf215546Sopenharmony_cistatic void gfx10_sh_query_add_result(struct gfx10_sh_query *query, 200bf215546Sopenharmony_ci struct gfx10_sh_query_buffer_mem *qmem, 201bf215546Sopenharmony_ci union pipe_query_result *result) 202bf215546Sopenharmony_ci{ 203bf215546Sopenharmony_ci static const uint64_t mask = ((uint64_t)1 << 63) - 1; 204bf215546Sopenharmony_ci 205bf215546Sopenharmony_ci switch (query->b.type) { 206bf215546Sopenharmony_ci case PIPE_QUERY_PRIMITIVES_EMITTED: 207bf215546Sopenharmony_ci result->u64 += qmem->stream[query->stream].emitted_primitives & mask; 208bf215546Sopenharmony_ci break; 209bf215546Sopenharmony_ci case PIPE_QUERY_PRIMITIVES_GENERATED: 210bf215546Sopenharmony_ci result->u64 += qmem->stream[query->stream].generated_primitives & mask; 211bf215546Sopenharmony_ci break; 212bf215546Sopenharmony_ci case PIPE_QUERY_SO_STATISTICS: 213bf215546Sopenharmony_ci result->so_statistics.num_primitives_written += 214bf215546Sopenharmony_ci qmem->stream[query->stream].emitted_primitives & mask; 215bf215546Sopenharmony_ci result->so_statistics.primitives_storage_needed += 216bf215546Sopenharmony_ci qmem->stream[query->stream].generated_primitives & mask; 217bf215546Sopenharmony_ci break; 218bf215546Sopenharmony_ci case PIPE_QUERY_SO_OVERFLOW_PREDICATE: 219bf215546Sopenharmony_ci result->b |= qmem->stream[query->stream].emitted_primitives != 220bf215546Sopenharmony_ci qmem->stream[query->stream].generated_primitives; 221bf215546Sopenharmony_ci break; 222bf215546Sopenharmony_ci case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: 223bf215546Sopenharmony_ci for (unsigned stream = 0; stream < SI_MAX_STREAMS; ++stream) { 224bf215546Sopenharmony_ci result->b |= qmem->stream[stream].emitted_primitives != 225bf215546Sopenharmony_ci qmem->stream[stream].generated_primitives; 226bf215546Sopenharmony_ci } 227bf215546Sopenharmony_ci break; 228bf215546Sopenharmony_ci default: 229bf215546Sopenharmony_ci assert(0); 230bf215546Sopenharmony_ci } 231bf215546Sopenharmony_ci} 232bf215546Sopenharmony_ci 233bf215546Sopenharmony_cistatic bool gfx10_sh_query_get_result(struct si_context *sctx, struct si_query *rquery, bool wait, 234bf215546Sopenharmony_ci union pipe_query_result *result) 235bf215546Sopenharmony_ci{ 236bf215546Sopenharmony_ci struct gfx10_sh_query *query = (struct gfx10_sh_query *)rquery; 237bf215546Sopenharmony_ci 238bf215546Sopenharmony_ci util_query_clear_result(result, query->b.type); 239bf215546Sopenharmony_ci 240bf215546Sopenharmony_ci if (unlikely(!query->first)) 241bf215546Sopenharmony_ci return false; /* earlier out of memory error */ 242bf215546Sopenharmony_ci assert(query->last); 243bf215546Sopenharmony_ci 244bf215546Sopenharmony_ci for (struct gfx10_sh_query_buffer *qbuf = query->last;; 245bf215546Sopenharmony_ci qbuf = list_entry(qbuf->list.prev, struct gfx10_sh_query_buffer, list)) { 246bf215546Sopenharmony_ci unsigned usage = PIPE_MAP_READ | (wait ? 0 : PIPE_MAP_DONTBLOCK); 247bf215546Sopenharmony_ci void *map; 248bf215546Sopenharmony_ci 249bf215546Sopenharmony_ci if (rquery->b.flushed) 250bf215546Sopenharmony_ci map = sctx->ws->buffer_map(sctx->ws, qbuf->buf->buf, NULL, usage); 251bf215546Sopenharmony_ci else 252bf215546Sopenharmony_ci map = si_buffer_map(sctx, qbuf->buf, usage); 253bf215546Sopenharmony_ci 254bf215546Sopenharmony_ci if (!map) 255bf215546Sopenharmony_ci return false; 256bf215546Sopenharmony_ci 257bf215546Sopenharmony_ci unsigned results_begin = 0; 258bf215546Sopenharmony_ci unsigned results_end = qbuf->head; 259bf215546Sopenharmony_ci if (qbuf == query->first) 260bf215546Sopenharmony_ci results_begin = query->first_begin; 261bf215546Sopenharmony_ci if (qbuf == query->last) 262bf215546Sopenharmony_ci results_end = query->last_end; 263bf215546Sopenharmony_ci 264bf215546Sopenharmony_ci while (results_begin != results_end) { 265bf215546Sopenharmony_ci struct gfx10_sh_query_buffer_mem *qmem = map + results_begin; 266bf215546Sopenharmony_ci results_begin += sizeof(*qmem); 267bf215546Sopenharmony_ci 268bf215546Sopenharmony_ci gfx10_sh_query_add_result(query, qmem, result); 269bf215546Sopenharmony_ci } 270bf215546Sopenharmony_ci 271bf215546Sopenharmony_ci if (qbuf == query->first) 272bf215546Sopenharmony_ci break; 273bf215546Sopenharmony_ci } 274bf215546Sopenharmony_ci 275bf215546Sopenharmony_ci return true; 276bf215546Sopenharmony_ci} 277bf215546Sopenharmony_ci 278bf215546Sopenharmony_cistatic void gfx10_sh_query_get_result_resource(struct si_context *sctx, struct si_query *rquery, 279bf215546Sopenharmony_ci enum pipe_query_flags flags, 280bf215546Sopenharmony_ci enum pipe_query_value_type result_type, 281bf215546Sopenharmony_ci int index, struct pipe_resource *resource, 282bf215546Sopenharmony_ci unsigned offset) 283bf215546Sopenharmony_ci{ 284bf215546Sopenharmony_ci struct gfx10_sh_query *query = (struct gfx10_sh_query *)rquery; 285bf215546Sopenharmony_ci struct si_qbo_state saved_state = {}; 286bf215546Sopenharmony_ci struct pipe_resource *tmp_buffer = NULL; 287bf215546Sopenharmony_ci unsigned tmp_buffer_offset = 0; 288bf215546Sopenharmony_ci 289bf215546Sopenharmony_ci if (!sctx->sh_query_result_shader) { 290bf215546Sopenharmony_ci sctx->sh_query_result_shader = gfx10_create_sh_query_result_cs(sctx); 291bf215546Sopenharmony_ci if (!sctx->sh_query_result_shader) 292bf215546Sopenharmony_ci return; 293bf215546Sopenharmony_ci } 294bf215546Sopenharmony_ci 295bf215546Sopenharmony_ci if (query->first != query->last) { 296bf215546Sopenharmony_ci u_suballocator_alloc(&sctx->allocator_zeroed_memory, 16, 16, &tmp_buffer_offset, &tmp_buffer); 297bf215546Sopenharmony_ci if (!tmp_buffer) 298bf215546Sopenharmony_ci return; 299bf215546Sopenharmony_ci } 300bf215546Sopenharmony_ci 301bf215546Sopenharmony_ci si_save_qbo_state(sctx, &saved_state); 302bf215546Sopenharmony_ci 303bf215546Sopenharmony_ci /* Pre-fill the constants configuring the shader behavior. */ 304bf215546Sopenharmony_ci struct { 305bf215546Sopenharmony_ci uint32_t config; 306bf215546Sopenharmony_ci uint32_t offset; 307bf215546Sopenharmony_ci uint32_t chain; 308bf215546Sopenharmony_ci uint32_t result_count; 309bf215546Sopenharmony_ci } consts; 310bf215546Sopenharmony_ci struct pipe_constant_buffer constant_buffer = {}; 311bf215546Sopenharmony_ci 312bf215546Sopenharmony_ci if (index >= 0) { 313bf215546Sopenharmony_ci switch (query->b.type) { 314bf215546Sopenharmony_ci case PIPE_QUERY_PRIMITIVES_GENERATED: 315bf215546Sopenharmony_ci consts.offset = 4 * sizeof(uint64_t) * query->stream + 2 * sizeof(uint64_t); 316bf215546Sopenharmony_ci consts.config = 0; 317bf215546Sopenharmony_ci break; 318bf215546Sopenharmony_ci case PIPE_QUERY_PRIMITIVES_EMITTED: 319bf215546Sopenharmony_ci consts.offset = 4 * sizeof(uint64_t) * query->stream + 3 * sizeof(uint64_t); 320bf215546Sopenharmony_ci consts.config = 0; 321bf215546Sopenharmony_ci break; 322bf215546Sopenharmony_ci case PIPE_QUERY_SO_STATISTICS: 323bf215546Sopenharmony_ci consts.offset = sizeof(uint32_t) * (4 * index + query->stream); 324bf215546Sopenharmony_ci consts.config = 0; 325bf215546Sopenharmony_ci break; 326bf215546Sopenharmony_ci case PIPE_QUERY_SO_OVERFLOW_PREDICATE: 327bf215546Sopenharmony_ci consts.offset = 4 * sizeof(uint64_t) * query->stream; 328bf215546Sopenharmony_ci consts.config = 2; 329bf215546Sopenharmony_ci break; 330bf215546Sopenharmony_ci case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: 331bf215546Sopenharmony_ci consts.offset = 0; 332bf215546Sopenharmony_ci consts.config = 3; 333bf215546Sopenharmony_ci break; 334bf215546Sopenharmony_ci default: 335bf215546Sopenharmony_ci unreachable("bad query type"); 336bf215546Sopenharmony_ci } 337bf215546Sopenharmony_ci } else { 338bf215546Sopenharmony_ci /* Check result availability. */ 339bf215546Sopenharmony_ci consts.offset = 0; 340bf215546Sopenharmony_ci consts.config = 1; 341bf215546Sopenharmony_ci } 342bf215546Sopenharmony_ci 343bf215546Sopenharmony_ci if (result_type == PIPE_QUERY_TYPE_I64 || result_type == PIPE_QUERY_TYPE_U64) 344bf215546Sopenharmony_ci consts.config |= 8; 345bf215546Sopenharmony_ci 346bf215546Sopenharmony_ci constant_buffer.buffer_size = sizeof(consts); 347bf215546Sopenharmony_ci constant_buffer.user_buffer = &consts; 348bf215546Sopenharmony_ci 349bf215546Sopenharmony_ci /* Pre-fill the SSBOs and grid. */ 350bf215546Sopenharmony_ci struct pipe_shader_buffer ssbo[3]; 351bf215546Sopenharmony_ci struct pipe_grid_info grid = {}; 352bf215546Sopenharmony_ci 353bf215546Sopenharmony_ci ssbo[1].buffer = tmp_buffer; 354bf215546Sopenharmony_ci ssbo[1].buffer_offset = tmp_buffer_offset; 355bf215546Sopenharmony_ci ssbo[1].buffer_size = 16; 356bf215546Sopenharmony_ci 357bf215546Sopenharmony_ci ssbo[2] = ssbo[1]; 358bf215546Sopenharmony_ci 359bf215546Sopenharmony_ci grid.block[0] = 1; 360bf215546Sopenharmony_ci grid.block[1] = 1; 361bf215546Sopenharmony_ci grid.block[2] = 1; 362bf215546Sopenharmony_ci grid.grid[0] = 1; 363bf215546Sopenharmony_ci grid.grid[1] = 1; 364bf215546Sopenharmony_ci grid.grid[2] = 1; 365bf215546Sopenharmony_ci 366bf215546Sopenharmony_ci struct gfx10_sh_query_buffer *qbuf = query->first; 367bf215546Sopenharmony_ci for (;;) { 368bf215546Sopenharmony_ci unsigned begin = qbuf == query->first ? query->first_begin : 0; 369bf215546Sopenharmony_ci unsigned end = qbuf == query->last ? query->last_end : qbuf->buf->b.b.width0; 370bf215546Sopenharmony_ci if (!end) 371bf215546Sopenharmony_ci continue; 372bf215546Sopenharmony_ci 373bf215546Sopenharmony_ci ssbo[0].buffer = &qbuf->buf->b.b; 374bf215546Sopenharmony_ci ssbo[0].buffer_offset = begin; 375bf215546Sopenharmony_ci ssbo[0].buffer_size = end - begin; 376bf215546Sopenharmony_ci 377bf215546Sopenharmony_ci consts.result_count = (end - begin) / sizeof(struct gfx10_sh_query_buffer_mem); 378bf215546Sopenharmony_ci consts.chain = 0; 379bf215546Sopenharmony_ci if (qbuf != query->first) 380bf215546Sopenharmony_ci consts.chain |= 1; 381bf215546Sopenharmony_ci if (qbuf != query->last) 382bf215546Sopenharmony_ci consts.chain |= 2; 383bf215546Sopenharmony_ci 384bf215546Sopenharmony_ci if (qbuf == query->last) { 385bf215546Sopenharmony_ci ssbo[2].buffer = resource; 386bf215546Sopenharmony_ci ssbo[2].buffer_offset = offset; 387bf215546Sopenharmony_ci ssbo[2].buffer_size = 8; 388bf215546Sopenharmony_ci } 389bf215546Sopenharmony_ci 390bf215546Sopenharmony_ci sctx->b.set_constant_buffer(&sctx->b, PIPE_SHADER_COMPUTE, 0, false, &constant_buffer); 391bf215546Sopenharmony_ci 392bf215546Sopenharmony_ci if (flags & PIPE_QUERY_WAIT) { 393bf215546Sopenharmony_ci uint64_t va; 394bf215546Sopenharmony_ci 395bf215546Sopenharmony_ci /* Wait for result availability. Wait only for readiness 396bf215546Sopenharmony_ci * of the last entry, since the fence writes should be 397bf215546Sopenharmony_ci * serialized in the CP. 398bf215546Sopenharmony_ci */ 399bf215546Sopenharmony_ci va = qbuf->buf->gpu_address; 400bf215546Sopenharmony_ci va += end - sizeof(struct gfx10_sh_query_buffer_mem); 401bf215546Sopenharmony_ci va += offsetof(struct gfx10_sh_query_buffer_mem, fence); 402bf215546Sopenharmony_ci 403bf215546Sopenharmony_ci si_cp_wait_mem(sctx, &sctx->gfx_cs, va, 0x00000001, 0x00000001, 0); 404bf215546Sopenharmony_ci } 405bf215546Sopenharmony_ci 406bf215546Sopenharmony_ci /* ssbo[2] is either tmp_buffer or resource */ 407bf215546Sopenharmony_ci assert(ssbo[2].buffer); 408bf215546Sopenharmony_ci si_launch_grid_internal_ssbos(sctx, &grid, sctx->sh_query_result_shader, 409bf215546Sopenharmony_ci SI_OP_SYNC_PS_BEFORE | SI_OP_SYNC_AFTER, SI_COHERENCY_SHADER, 410bf215546Sopenharmony_ci 3, ssbo, (1 << 2) | (ssbo[1].buffer ? 1 << 1 : 0)); 411bf215546Sopenharmony_ci 412bf215546Sopenharmony_ci if (qbuf == query->last) 413bf215546Sopenharmony_ci break; 414bf215546Sopenharmony_ci qbuf = list_entry(qbuf->list.next, struct gfx10_sh_query_buffer, list); 415bf215546Sopenharmony_ci } 416bf215546Sopenharmony_ci 417bf215546Sopenharmony_ci si_restore_qbo_state(sctx, &saved_state); 418bf215546Sopenharmony_ci pipe_resource_reference(&tmp_buffer, NULL); 419bf215546Sopenharmony_ci} 420bf215546Sopenharmony_ci 421bf215546Sopenharmony_cistatic const struct si_query_ops gfx10_sh_query_ops = { 422bf215546Sopenharmony_ci .destroy = gfx10_sh_query_destroy, 423bf215546Sopenharmony_ci .begin = gfx10_sh_query_begin, 424bf215546Sopenharmony_ci .end = gfx10_sh_query_end, 425bf215546Sopenharmony_ci .get_result = gfx10_sh_query_get_result, 426bf215546Sopenharmony_ci .get_result_resource = gfx10_sh_query_get_result_resource, 427bf215546Sopenharmony_ci}; 428bf215546Sopenharmony_ci 429bf215546Sopenharmony_cistruct pipe_query *gfx10_sh_query_create(struct si_screen *screen, enum pipe_query_type query_type, 430bf215546Sopenharmony_ci unsigned index) 431bf215546Sopenharmony_ci{ 432bf215546Sopenharmony_ci struct gfx10_sh_query *query = CALLOC_STRUCT(gfx10_sh_query); 433bf215546Sopenharmony_ci if (unlikely(!query)) 434bf215546Sopenharmony_ci return NULL; 435bf215546Sopenharmony_ci 436bf215546Sopenharmony_ci query->b.ops = &gfx10_sh_query_ops; 437bf215546Sopenharmony_ci query->b.type = query_type; 438bf215546Sopenharmony_ci query->stream = index; 439bf215546Sopenharmony_ci 440bf215546Sopenharmony_ci return (struct pipe_query *)query; 441bf215546Sopenharmony_ci} 442bf215546Sopenharmony_ci 443bf215546Sopenharmony_civoid gfx10_init_query(struct si_context *sctx) 444bf215546Sopenharmony_ci{ 445bf215546Sopenharmony_ci list_inithead(&sctx->shader_query_buffers); 446bf215546Sopenharmony_ci sctx->atoms.s.shader_query.emit = emit_shader_query; 447bf215546Sopenharmony_ci} 448bf215546Sopenharmony_ci 449bf215546Sopenharmony_civoid gfx10_destroy_query(struct si_context *sctx) 450bf215546Sopenharmony_ci{ 451bf215546Sopenharmony_ci if (!sctx->shader_query_buffers.next) 452bf215546Sopenharmony_ci return; 453bf215546Sopenharmony_ci 454bf215546Sopenharmony_ci while (!list_is_empty(&sctx->shader_query_buffers)) { 455bf215546Sopenharmony_ci struct gfx10_sh_query_buffer *qbuf = 456bf215546Sopenharmony_ci list_first_entry(&sctx->shader_query_buffers, struct gfx10_sh_query_buffer, list); 457bf215546Sopenharmony_ci list_del(&qbuf->list); 458bf215546Sopenharmony_ci 459bf215546Sopenharmony_ci assert(!qbuf->refcount); 460bf215546Sopenharmony_ci si_resource_reference(&qbuf->buf, NULL); 461bf215546Sopenharmony_ci FREE(qbuf); 462bf215546Sopenharmony_ci } 463bf215546Sopenharmony_ci} 464