1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright 2015 Advanced Micro Devices, Inc.
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21bf215546Sopenharmony_ci * SOFTWARE.
22bf215546Sopenharmony_ci *
23bf215546Sopenharmony_ci * Authors:
24bf215546Sopenharmony_ci *  Nicolai Hähnle <nicolai.haehnle@amd.com>
25bf215546Sopenharmony_ci *
26bf215546Sopenharmony_ci */
27bf215546Sopenharmony_ci
28bf215546Sopenharmony_ci#include "util/u_memory.h"
29bf215546Sopenharmony_ci#include "r600_query.h"
30bf215546Sopenharmony_ci#include "r600_pipe_common.h"
31bf215546Sopenharmony_ci#include "r600d_common.h"
32bf215546Sopenharmony_ci
33bf215546Sopenharmony_ci/* Max counters per HW block */
34bf215546Sopenharmony_ci#define R600_QUERY_MAX_COUNTERS 16
35bf215546Sopenharmony_ci
36bf215546Sopenharmony_cistatic struct r600_perfcounter_block *
37bf215546Sopenharmony_cilookup_counter(struct r600_perfcounters *pc, unsigned index,
38bf215546Sopenharmony_ci	       unsigned *base_gid, unsigned *sub_index)
39bf215546Sopenharmony_ci{
40bf215546Sopenharmony_ci	struct r600_perfcounter_block *block = pc->blocks;
41bf215546Sopenharmony_ci	unsigned bid;
42bf215546Sopenharmony_ci
43bf215546Sopenharmony_ci	*base_gid = 0;
44bf215546Sopenharmony_ci	for (bid = 0; bid < pc->num_blocks; ++bid, ++block) {
45bf215546Sopenharmony_ci		unsigned total = block->num_groups * block->num_selectors;
46bf215546Sopenharmony_ci
47bf215546Sopenharmony_ci		if (index < total) {
48bf215546Sopenharmony_ci			*sub_index = index;
49bf215546Sopenharmony_ci			return block;
50bf215546Sopenharmony_ci		}
51bf215546Sopenharmony_ci
52bf215546Sopenharmony_ci		index -= total;
53bf215546Sopenharmony_ci		*base_gid += block->num_groups;
54bf215546Sopenharmony_ci	}
55bf215546Sopenharmony_ci
56bf215546Sopenharmony_ci	return NULL;
57bf215546Sopenharmony_ci}
58bf215546Sopenharmony_ci
59bf215546Sopenharmony_cistatic struct r600_perfcounter_block *
60bf215546Sopenharmony_cilookup_group(struct r600_perfcounters *pc, unsigned *index)
61bf215546Sopenharmony_ci{
62bf215546Sopenharmony_ci	unsigned bid;
63bf215546Sopenharmony_ci	struct r600_perfcounter_block *block = pc->blocks;
64bf215546Sopenharmony_ci
65bf215546Sopenharmony_ci	for (bid = 0; bid < pc->num_blocks; ++bid, ++block) {
66bf215546Sopenharmony_ci		if (*index < block->num_groups)
67bf215546Sopenharmony_ci			return block;
68bf215546Sopenharmony_ci		*index -= block->num_groups;
69bf215546Sopenharmony_ci	}
70bf215546Sopenharmony_ci
71bf215546Sopenharmony_ci	return NULL;
72bf215546Sopenharmony_ci}
73bf215546Sopenharmony_ci
74bf215546Sopenharmony_cistruct r600_pc_group {
75bf215546Sopenharmony_ci	struct r600_pc_group *next;
76bf215546Sopenharmony_ci	struct r600_perfcounter_block *block;
77bf215546Sopenharmony_ci	unsigned sub_gid; /* only used during init */
78bf215546Sopenharmony_ci	unsigned result_base; /* only used during init */
79bf215546Sopenharmony_ci	int se;
80bf215546Sopenharmony_ci	int instance;
81bf215546Sopenharmony_ci	unsigned num_counters;
82bf215546Sopenharmony_ci	unsigned selectors[R600_QUERY_MAX_COUNTERS];
83bf215546Sopenharmony_ci};
84bf215546Sopenharmony_ci
85bf215546Sopenharmony_cistruct r600_pc_counter {
86bf215546Sopenharmony_ci	unsigned base;
87bf215546Sopenharmony_ci	unsigned qwords;
88bf215546Sopenharmony_ci	unsigned stride; /* in uint64s */
89bf215546Sopenharmony_ci};
90bf215546Sopenharmony_ci
91bf215546Sopenharmony_ci#define R600_PC_SHADERS_WINDOWING (1 << 31)
92bf215546Sopenharmony_ci
93bf215546Sopenharmony_cistruct r600_query_pc {
94bf215546Sopenharmony_ci	struct r600_query_hw b;
95bf215546Sopenharmony_ci
96bf215546Sopenharmony_ci	unsigned shaders;
97bf215546Sopenharmony_ci	unsigned num_counters;
98bf215546Sopenharmony_ci	struct r600_pc_counter *counters;
99bf215546Sopenharmony_ci	struct r600_pc_group *groups;
100bf215546Sopenharmony_ci};
101bf215546Sopenharmony_ci
102bf215546Sopenharmony_cistatic void r600_pc_query_destroy(struct r600_common_screen *rscreen,
103bf215546Sopenharmony_ci				  struct r600_query *rquery)
104bf215546Sopenharmony_ci{
105bf215546Sopenharmony_ci	struct r600_query_pc *query = (struct r600_query_pc *)rquery;
106bf215546Sopenharmony_ci
107bf215546Sopenharmony_ci	while (query->groups) {
108bf215546Sopenharmony_ci		struct r600_pc_group *group = query->groups;
109bf215546Sopenharmony_ci		query->groups = group->next;
110bf215546Sopenharmony_ci		FREE(group);
111bf215546Sopenharmony_ci	}
112bf215546Sopenharmony_ci
113bf215546Sopenharmony_ci	FREE(query->counters);
114bf215546Sopenharmony_ci
115bf215546Sopenharmony_ci	r600_query_hw_destroy(rscreen, rquery);
116bf215546Sopenharmony_ci}
117bf215546Sopenharmony_ci
118bf215546Sopenharmony_cistatic bool r600_pc_query_prepare_buffer(struct r600_common_screen *screen,
119bf215546Sopenharmony_ci					 struct r600_query_hw *hwquery,
120bf215546Sopenharmony_ci					 struct r600_resource *buffer)
121bf215546Sopenharmony_ci{
122bf215546Sopenharmony_ci	/* no-op */
123bf215546Sopenharmony_ci	return true;
124bf215546Sopenharmony_ci}
125bf215546Sopenharmony_ci
126bf215546Sopenharmony_cistatic void r600_pc_query_emit_start(struct r600_common_context *ctx,
127bf215546Sopenharmony_ci				     struct r600_query_hw *hwquery,
128bf215546Sopenharmony_ci				     struct r600_resource *buffer, uint64_t va)
129bf215546Sopenharmony_ci{
130bf215546Sopenharmony_ci	struct r600_perfcounters *pc = ctx->screen->perfcounters;
131bf215546Sopenharmony_ci	struct r600_query_pc *query = (struct r600_query_pc *)hwquery;
132bf215546Sopenharmony_ci	struct r600_pc_group *group;
133bf215546Sopenharmony_ci	int current_se = -1;
134bf215546Sopenharmony_ci	int current_instance = -1;
135bf215546Sopenharmony_ci
136bf215546Sopenharmony_ci	if (query->shaders)
137bf215546Sopenharmony_ci		pc->emit_shaders(ctx, query->shaders);
138bf215546Sopenharmony_ci
139bf215546Sopenharmony_ci	for (group = query->groups; group; group = group->next) {
140bf215546Sopenharmony_ci		struct r600_perfcounter_block *block = group->block;
141bf215546Sopenharmony_ci
142bf215546Sopenharmony_ci		if (group->se != current_se || group->instance != current_instance) {
143bf215546Sopenharmony_ci			current_se = group->se;
144bf215546Sopenharmony_ci			current_instance = group->instance;
145bf215546Sopenharmony_ci			pc->emit_instance(ctx, group->se, group->instance);
146bf215546Sopenharmony_ci		}
147bf215546Sopenharmony_ci
148bf215546Sopenharmony_ci		pc->emit_select(ctx, block, group->num_counters, group->selectors);
149bf215546Sopenharmony_ci	}
150bf215546Sopenharmony_ci
151bf215546Sopenharmony_ci	if (current_se != -1 || current_instance != -1)
152bf215546Sopenharmony_ci		pc->emit_instance(ctx, -1, -1);
153bf215546Sopenharmony_ci
154bf215546Sopenharmony_ci	pc->emit_start(ctx, buffer, va);
155bf215546Sopenharmony_ci}
156bf215546Sopenharmony_ci
157bf215546Sopenharmony_cistatic void r600_pc_query_emit_stop(struct r600_common_context *ctx,
158bf215546Sopenharmony_ci				    struct r600_query_hw *hwquery,
159bf215546Sopenharmony_ci				    struct r600_resource *buffer, uint64_t va)
160bf215546Sopenharmony_ci{
161bf215546Sopenharmony_ci	struct r600_perfcounters *pc = ctx->screen->perfcounters;
162bf215546Sopenharmony_ci	struct r600_query_pc *query = (struct r600_query_pc *)hwquery;
163bf215546Sopenharmony_ci	struct r600_pc_group *group;
164bf215546Sopenharmony_ci
165bf215546Sopenharmony_ci	pc->emit_stop(ctx, buffer, va);
166bf215546Sopenharmony_ci
167bf215546Sopenharmony_ci	for (group = query->groups; group; group = group->next) {
168bf215546Sopenharmony_ci		struct r600_perfcounter_block *block = group->block;
169bf215546Sopenharmony_ci		unsigned se = group->se >= 0 ? group->se : 0;
170bf215546Sopenharmony_ci		unsigned se_end = se + 1;
171bf215546Sopenharmony_ci
172bf215546Sopenharmony_ci		if ((block->flags & R600_PC_BLOCK_SE) && (group->se < 0))
173bf215546Sopenharmony_ci			se_end = ctx->screen->info.max_se;
174bf215546Sopenharmony_ci
175bf215546Sopenharmony_ci		do {
176bf215546Sopenharmony_ci			unsigned instance = group->instance >= 0 ? group->instance : 0;
177bf215546Sopenharmony_ci
178bf215546Sopenharmony_ci			do {
179bf215546Sopenharmony_ci				pc->emit_instance(ctx, se, instance);
180bf215546Sopenharmony_ci				pc->emit_read(ctx, block,
181bf215546Sopenharmony_ci					      group->num_counters, group->selectors,
182bf215546Sopenharmony_ci					      buffer, va);
183bf215546Sopenharmony_ci				va += sizeof(uint64_t) * group->num_counters;
184bf215546Sopenharmony_ci			} while (group->instance < 0 && ++instance < block->num_instances);
185bf215546Sopenharmony_ci		} while (++se < se_end);
186bf215546Sopenharmony_ci	}
187bf215546Sopenharmony_ci
188bf215546Sopenharmony_ci	pc->emit_instance(ctx, -1, -1);
189bf215546Sopenharmony_ci}
190bf215546Sopenharmony_ci
191bf215546Sopenharmony_cistatic void r600_pc_query_clear_result(struct r600_query_hw *hwquery,
192bf215546Sopenharmony_ci				       union pipe_query_result *result)
193bf215546Sopenharmony_ci{
194bf215546Sopenharmony_ci	struct r600_query_pc *query = (struct r600_query_pc *)hwquery;
195bf215546Sopenharmony_ci
196bf215546Sopenharmony_ci	memset(result, 0, sizeof(result->batch[0]) * query->num_counters);
197bf215546Sopenharmony_ci}
198bf215546Sopenharmony_ci
199bf215546Sopenharmony_cistatic void r600_pc_query_add_result(struct r600_common_screen *rscreen,
200bf215546Sopenharmony_ci				     struct r600_query_hw *hwquery,
201bf215546Sopenharmony_ci				     void *buffer,
202bf215546Sopenharmony_ci				     union pipe_query_result *result)
203bf215546Sopenharmony_ci{
204bf215546Sopenharmony_ci	struct r600_query_pc *query = (struct r600_query_pc *)hwquery;
205bf215546Sopenharmony_ci	uint64_t *results = buffer;
206bf215546Sopenharmony_ci	unsigned i, j;
207bf215546Sopenharmony_ci
208bf215546Sopenharmony_ci	for (i = 0; i < query->num_counters; ++i) {
209bf215546Sopenharmony_ci		struct r600_pc_counter *counter = &query->counters[i];
210bf215546Sopenharmony_ci
211bf215546Sopenharmony_ci		for (j = 0; j < counter->qwords; ++j) {
212bf215546Sopenharmony_ci			uint32_t value = results[counter->base + j * counter->stride];
213bf215546Sopenharmony_ci			result->batch[i].u64 += value;
214bf215546Sopenharmony_ci		}
215bf215546Sopenharmony_ci	}
216bf215546Sopenharmony_ci}
217bf215546Sopenharmony_ci
218bf215546Sopenharmony_cistatic struct r600_query_ops batch_query_ops = {
219bf215546Sopenharmony_ci	.destroy = r600_pc_query_destroy,
220bf215546Sopenharmony_ci	.begin = r600_query_hw_begin,
221bf215546Sopenharmony_ci	.end = r600_query_hw_end,
222bf215546Sopenharmony_ci	.get_result = r600_query_hw_get_result
223bf215546Sopenharmony_ci};
224bf215546Sopenharmony_ci
225bf215546Sopenharmony_cistatic struct r600_query_hw_ops batch_query_hw_ops = {
226bf215546Sopenharmony_ci	.prepare_buffer = r600_pc_query_prepare_buffer,
227bf215546Sopenharmony_ci	.emit_start = r600_pc_query_emit_start,
228bf215546Sopenharmony_ci	.emit_stop = r600_pc_query_emit_stop,
229bf215546Sopenharmony_ci	.clear_result = r600_pc_query_clear_result,
230bf215546Sopenharmony_ci	.add_result = r600_pc_query_add_result,
231bf215546Sopenharmony_ci};
232bf215546Sopenharmony_ci
233bf215546Sopenharmony_cistatic struct r600_pc_group *get_group_state(struct r600_common_screen *screen,
234bf215546Sopenharmony_ci					     struct r600_query_pc *query,
235bf215546Sopenharmony_ci					     struct r600_perfcounter_block *block,
236bf215546Sopenharmony_ci					     unsigned sub_gid)
237bf215546Sopenharmony_ci{
238bf215546Sopenharmony_ci	struct r600_pc_group *group = query->groups;
239bf215546Sopenharmony_ci
240bf215546Sopenharmony_ci	while (group) {
241bf215546Sopenharmony_ci		if (group->block == block && group->sub_gid == sub_gid)
242bf215546Sopenharmony_ci			return group;
243bf215546Sopenharmony_ci		group = group->next;
244bf215546Sopenharmony_ci	}
245bf215546Sopenharmony_ci
246bf215546Sopenharmony_ci	group = CALLOC_STRUCT(r600_pc_group);
247bf215546Sopenharmony_ci	if (!group)
248bf215546Sopenharmony_ci		return NULL;
249bf215546Sopenharmony_ci
250bf215546Sopenharmony_ci	group->block = block;
251bf215546Sopenharmony_ci	group->sub_gid = sub_gid;
252bf215546Sopenharmony_ci
253bf215546Sopenharmony_ci	if (block->flags & R600_PC_BLOCK_SHADER) {
254bf215546Sopenharmony_ci		unsigned sub_gids = block->num_instances;
255bf215546Sopenharmony_ci		unsigned shader_id;
256bf215546Sopenharmony_ci		unsigned shaders;
257bf215546Sopenharmony_ci		unsigned query_shaders;
258bf215546Sopenharmony_ci
259bf215546Sopenharmony_ci		if (block->flags & R600_PC_BLOCK_SE_GROUPS)
260bf215546Sopenharmony_ci			sub_gids = sub_gids * screen->info.max_se;
261bf215546Sopenharmony_ci		shader_id = sub_gid / sub_gids;
262bf215546Sopenharmony_ci		sub_gid = sub_gid % sub_gids;
263bf215546Sopenharmony_ci
264bf215546Sopenharmony_ci		shaders = screen->perfcounters->shader_type_bits[shader_id];
265bf215546Sopenharmony_ci
266bf215546Sopenharmony_ci		query_shaders = query->shaders & ~R600_PC_SHADERS_WINDOWING;
267bf215546Sopenharmony_ci		if (query_shaders && query_shaders != shaders) {
268bf215546Sopenharmony_ci			fprintf(stderr, "r600_perfcounter: incompatible shader groups\n");
269bf215546Sopenharmony_ci			FREE(group);
270bf215546Sopenharmony_ci			return NULL;
271bf215546Sopenharmony_ci		}
272bf215546Sopenharmony_ci		query->shaders = shaders;
273bf215546Sopenharmony_ci	}
274bf215546Sopenharmony_ci
275bf215546Sopenharmony_ci	if (block->flags & R600_PC_BLOCK_SHADER_WINDOWED && !query->shaders) {
276bf215546Sopenharmony_ci		// A non-zero value in query->shaders ensures that the shader
277bf215546Sopenharmony_ci		// masking is reset unless the user explicitly requests one.
278bf215546Sopenharmony_ci		query->shaders = R600_PC_SHADERS_WINDOWING;
279bf215546Sopenharmony_ci	}
280bf215546Sopenharmony_ci
281bf215546Sopenharmony_ci	if (block->flags & R600_PC_BLOCK_SE_GROUPS) {
282bf215546Sopenharmony_ci		group->se = sub_gid / block->num_instances;
283bf215546Sopenharmony_ci		sub_gid = sub_gid % block->num_instances;
284bf215546Sopenharmony_ci	} else {
285bf215546Sopenharmony_ci		group->se = -1;
286bf215546Sopenharmony_ci	}
287bf215546Sopenharmony_ci
288bf215546Sopenharmony_ci	if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) {
289bf215546Sopenharmony_ci		group->instance = sub_gid;
290bf215546Sopenharmony_ci	} else {
291bf215546Sopenharmony_ci		group->instance = -1;
292bf215546Sopenharmony_ci	}
293bf215546Sopenharmony_ci
294bf215546Sopenharmony_ci	group->next = query->groups;
295bf215546Sopenharmony_ci	query->groups = group;
296bf215546Sopenharmony_ci
297bf215546Sopenharmony_ci	return group;
298bf215546Sopenharmony_ci}
299bf215546Sopenharmony_ci
300bf215546Sopenharmony_cistruct pipe_query *r600_create_batch_query(struct pipe_context *ctx,
301bf215546Sopenharmony_ci					   unsigned num_queries,
302bf215546Sopenharmony_ci					   unsigned *query_types)
303bf215546Sopenharmony_ci{
304bf215546Sopenharmony_ci	struct r600_common_screen *screen =
305bf215546Sopenharmony_ci		(struct r600_common_screen *)ctx->screen;
306bf215546Sopenharmony_ci	struct r600_perfcounters *pc = screen->perfcounters;
307bf215546Sopenharmony_ci	struct r600_perfcounter_block *block;
308bf215546Sopenharmony_ci	struct r600_pc_group *group;
309bf215546Sopenharmony_ci	struct r600_query_pc *query;
310bf215546Sopenharmony_ci	unsigned base_gid, sub_gid, sub_index;
311bf215546Sopenharmony_ci	unsigned i, j;
312bf215546Sopenharmony_ci
313bf215546Sopenharmony_ci	if (!pc)
314bf215546Sopenharmony_ci		return NULL;
315bf215546Sopenharmony_ci
316bf215546Sopenharmony_ci	query = CALLOC_STRUCT(r600_query_pc);
317bf215546Sopenharmony_ci	if (!query)
318bf215546Sopenharmony_ci		return NULL;
319bf215546Sopenharmony_ci
320bf215546Sopenharmony_ci	query->b.b.ops = &batch_query_ops;
321bf215546Sopenharmony_ci	query->b.ops = &batch_query_hw_ops;
322bf215546Sopenharmony_ci
323bf215546Sopenharmony_ci	query->num_counters = num_queries;
324bf215546Sopenharmony_ci
325bf215546Sopenharmony_ci	/* Collect selectors per group */
326bf215546Sopenharmony_ci	for (i = 0; i < num_queries; ++i) {
327bf215546Sopenharmony_ci		unsigned sub_gid;
328bf215546Sopenharmony_ci
329bf215546Sopenharmony_ci		if (query_types[i] < R600_QUERY_FIRST_PERFCOUNTER)
330bf215546Sopenharmony_ci			goto error;
331bf215546Sopenharmony_ci
332bf215546Sopenharmony_ci		block = lookup_counter(pc, query_types[i] - R600_QUERY_FIRST_PERFCOUNTER,
333bf215546Sopenharmony_ci				       &base_gid, &sub_index);
334bf215546Sopenharmony_ci		if (!block)
335bf215546Sopenharmony_ci			goto error;
336bf215546Sopenharmony_ci
337bf215546Sopenharmony_ci		sub_gid = sub_index / block->num_selectors;
338bf215546Sopenharmony_ci		sub_index = sub_index % block->num_selectors;
339bf215546Sopenharmony_ci
340bf215546Sopenharmony_ci		group = get_group_state(screen, query, block, sub_gid);
341bf215546Sopenharmony_ci		if (!group)
342bf215546Sopenharmony_ci			goto error;
343bf215546Sopenharmony_ci
344bf215546Sopenharmony_ci		if (group->num_counters >= block->num_counters) {
345bf215546Sopenharmony_ci			fprintf(stderr,
346bf215546Sopenharmony_ci				"perfcounter group %s: too many selected\n",
347bf215546Sopenharmony_ci				block->basename);
348bf215546Sopenharmony_ci			goto error;
349bf215546Sopenharmony_ci		}
350bf215546Sopenharmony_ci		group->selectors[group->num_counters] = sub_index;
351bf215546Sopenharmony_ci		++group->num_counters;
352bf215546Sopenharmony_ci	}
353bf215546Sopenharmony_ci
354bf215546Sopenharmony_ci	/* Compute result bases and CS size per group */
355bf215546Sopenharmony_ci	query->b.num_cs_dw_begin = pc->num_start_cs_dwords;
356bf215546Sopenharmony_ci	query->b.num_cs_dw_end = pc->num_stop_cs_dwords;
357bf215546Sopenharmony_ci
358bf215546Sopenharmony_ci	query->b.num_cs_dw_begin += pc->num_instance_cs_dwords; /* conservative */
359bf215546Sopenharmony_ci	query->b.num_cs_dw_end += pc->num_instance_cs_dwords;
360bf215546Sopenharmony_ci
361bf215546Sopenharmony_ci	i = 0;
362bf215546Sopenharmony_ci	for (group = query->groups; group; group = group->next) {
363bf215546Sopenharmony_ci		struct r600_perfcounter_block *block = group->block;
364bf215546Sopenharmony_ci		unsigned select_dw, read_dw;
365bf215546Sopenharmony_ci		unsigned instances = 1;
366bf215546Sopenharmony_ci
367bf215546Sopenharmony_ci		if ((block->flags & R600_PC_BLOCK_SE) && group->se < 0)
368bf215546Sopenharmony_ci			instances = screen->info.max_se;
369bf215546Sopenharmony_ci		if (group->instance < 0)
370bf215546Sopenharmony_ci			instances *= block->num_instances;
371bf215546Sopenharmony_ci
372bf215546Sopenharmony_ci		group->result_base = i;
373bf215546Sopenharmony_ci		query->b.result_size += sizeof(uint64_t) * instances * group->num_counters;
374bf215546Sopenharmony_ci		i += instances * group->num_counters;
375bf215546Sopenharmony_ci
376bf215546Sopenharmony_ci		pc->get_size(block, group->num_counters, group->selectors,
377bf215546Sopenharmony_ci			     &select_dw, &read_dw);
378bf215546Sopenharmony_ci		query->b.num_cs_dw_begin += select_dw;
379bf215546Sopenharmony_ci		query->b.num_cs_dw_end += instances * read_dw;
380bf215546Sopenharmony_ci		query->b.num_cs_dw_begin += pc->num_instance_cs_dwords; /* conservative */
381bf215546Sopenharmony_ci		query->b.num_cs_dw_end += instances * pc->num_instance_cs_dwords;
382bf215546Sopenharmony_ci	}
383bf215546Sopenharmony_ci
384bf215546Sopenharmony_ci	if (query->shaders) {
385bf215546Sopenharmony_ci		if (query->shaders == R600_PC_SHADERS_WINDOWING)
386bf215546Sopenharmony_ci			query->shaders = 0xffffffff;
387bf215546Sopenharmony_ci		query->b.num_cs_dw_begin += pc->num_shaders_cs_dwords;
388bf215546Sopenharmony_ci	}
389bf215546Sopenharmony_ci
390bf215546Sopenharmony_ci	/* Map user-supplied query array to result indices */
391bf215546Sopenharmony_ci	query->counters = CALLOC(num_queries, sizeof(*query->counters));
392bf215546Sopenharmony_ci	for (i = 0; i < num_queries; ++i) {
393bf215546Sopenharmony_ci		struct r600_pc_counter *counter = &query->counters[i];
394bf215546Sopenharmony_ci		struct r600_perfcounter_block *block;
395bf215546Sopenharmony_ci
396bf215546Sopenharmony_ci		block = lookup_counter(pc, query_types[i] - R600_QUERY_FIRST_PERFCOUNTER,
397bf215546Sopenharmony_ci				       &base_gid, &sub_index);
398bf215546Sopenharmony_ci
399bf215546Sopenharmony_ci		sub_gid = sub_index / block->num_selectors;
400bf215546Sopenharmony_ci		sub_index = sub_index % block->num_selectors;
401bf215546Sopenharmony_ci
402bf215546Sopenharmony_ci		group = get_group_state(screen, query, block, sub_gid);
403bf215546Sopenharmony_ci		assert(group != NULL);
404bf215546Sopenharmony_ci
405bf215546Sopenharmony_ci		for (j = 0; j < group->num_counters; ++j) {
406bf215546Sopenharmony_ci			if (group->selectors[j] == sub_index)
407bf215546Sopenharmony_ci				break;
408bf215546Sopenharmony_ci		}
409bf215546Sopenharmony_ci
410bf215546Sopenharmony_ci		counter->base = group->result_base + j;
411bf215546Sopenharmony_ci		counter->stride = group->num_counters;
412bf215546Sopenharmony_ci
413bf215546Sopenharmony_ci		counter->qwords = 1;
414bf215546Sopenharmony_ci		if ((block->flags & R600_PC_BLOCK_SE) && group->se < 0)
415bf215546Sopenharmony_ci			counter->qwords = screen->info.max_se;
416bf215546Sopenharmony_ci		if (group->instance < 0)
417bf215546Sopenharmony_ci			counter->qwords *= block->num_instances;
418bf215546Sopenharmony_ci	}
419bf215546Sopenharmony_ci
420bf215546Sopenharmony_ci	if (!r600_query_hw_init(screen, &query->b))
421bf215546Sopenharmony_ci		goto error;
422bf215546Sopenharmony_ci
423bf215546Sopenharmony_ci	return (struct pipe_query *)query;
424bf215546Sopenharmony_ci
425bf215546Sopenharmony_cierror:
426bf215546Sopenharmony_ci	r600_pc_query_destroy(screen, &query->b.b);
427bf215546Sopenharmony_ci	return NULL;
428bf215546Sopenharmony_ci}
429bf215546Sopenharmony_ci
430bf215546Sopenharmony_cistatic bool r600_init_block_names(struct r600_common_screen *screen,
431bf215546Sopenharmony_ci				  struct r600_perfcounter_block *block)
432bf215546Sopenharmony_ci{
433bf215546Sopenharmony_ci	unsigned i, j, k;
434bf215546Sopenharmony_ci	unsigned groups_shader = 1, groups_se = 1, groups_instance = 1;
435bf215546Sopenharmony_ci	unsigned namelen;
436bf215546Sopenharmony_ci	char *groupname;
437bf215546Sopenharmony_ci	char *p;
438bf215546Sopenharmony_ci
439bf215546Sopenharmony_ci	if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS)
440bf215546Sopenharmony_ci		groups_instance = block->num_instances;
441bf215546Sopenharmony_ci	if (block->flags & R600_PC_BLOCK_SE_GROUPS)
442bf215546Sopenharmony_ci		groups_se = screen->info.max_se;
443bf215546Sopenharmony_ci	if (block->flags & R600_PC_BLOCK_SHADER)
444bf215546Sopenharmony_ci		groups_shader = screen->perfcounters->num_shader_types;
445bf215546Sopenharmony_ci
446bf215546Sopenharmony_ci	namelen = strlen(block->basename);
447bf215546Sopenharmony_ci	block->group_name_stride = namelen + 1;
448bf215546Sopenharmony_ci	if (block->flags & R600_PC_BLOCK_SHADER)
449bf215546Sopenharmony_ci		block->group_name_stride += 3;
450bf215546Sopenharmony_ci	if (block->flags & R600_PC_BLOCK_SE_GROUPS) {
451bf215546Sopenharmony_ci		assert(groups_se <= 10);
452bf215546Sopenharmony_ci		block->group_name_stride += 1;
453bf215546Sopenharmony_ci
454bf215546Sopenharmony_ci		if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS)
455bf215546Sopenharmony_ci			block->group_name_stride += 1;
456bf215546Sopenharmony_ci	}
457bf215546Sopenharmony_ci	if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) {
458bf215546Sopenharmony_ci		assert(groups_instance <= 100);
459bf215546Sopenharmony_ci		block->group_name_stride += 2;
460bf215546Sopenharmony_ci	}
461bf215546Sopenharmony_ci
462bf215546Sopenharmony_ci	block->group_names = MALLOC(block->num_groups * block->group_name_stride);
463bf215546Sopenharmony_ci	if (!block->group_names)
464bf215546Sopenharmony_ci		return false;
465bf215546Sopenharmony_ci
466bf215546Sopenharmony_ci	groupname = block->group_names;
467bf215546Sopenharmony_ci	for (i = 0; i < groups_shader; ++i) {
468bf215546Sopenharmony_ci		const char *shader_suffix = screen->perfcounters->shader_type_suffixes[i];
469bf215546Sopenharmony_ci		unsigned shaderlen = strlen(shader_suffix);
470bf215546Sopenharmony_ci		for (j = 0; j < groups_se; ++j) {
471bf215546Sopenharmony_ci			for (k = 0; k < groups_instance; ++k) {
472bf215546Sopenharmony_ci				strcpy(groupname, block->basename);
473bf215546Sopenharmony_ci				p = groupname + namelen;
474bf215546Sopenharmony_ci
475bf215546Sopenharmony_ci				if (block->flags & R600_PC_BLOCK_SHADER) {
476bf215546Sopenharmony_ci					strcpy(p, shader_suffix);
477bf215546Sopenharmony_ci					p += shaderlen;
478bf215546Sopenharmony_ci				}
479bf215546Sopenharmony_ci
480bf215546Sopenharmony_ci				if (block->flags & R600_PC_BLOCK_SE_GROUPS) {
481bf215546Sopenharmony_ci					p += sprintf(p, "%d", j);
482bf215546Sopenharmony_ci					if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS)
483bf215546Sopenharmony_ci						*p++ = '_';
484bf215546Sopenharmony_ci				}
485bf215546Sopenharmony_ci
486bf215546Sopenharmony_ci				if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS)
487bf215546Sopenharmony_ci					p += sprintf(p, "%d", k);
488bf215546Sopenharmony_ci
489bf215546Sopenharmony_ci				groupname += block->group_name_stride;
490bf215546Sopenharmony_ci			}
491bf215546Sopenharmony_ci		}
492bf215546Sopenharmony_ci	}
493bf215546Sopenharmony_ci
494bf215546Sopenharmony_ci	assert(block->num_selectors <= 1000);
495bf215546Sopenharmony_ci	block->selector_name_stride = block->group_name_stride + 4;
496bf215546Sopenharmony_ci	block->selector_names = MALLOC(block->num_groups * block->num_selectors *
497bf215546Sopenharmony_ci				       block->selector_name_stride);
498bf215546Sopenharmony_ci	if (!block->selector_names)
499bf215546Sopenharmony_ci		return false;
500bf215546Sopenharmony_ci
501bf215546Sopenharmony_ci	groupname = block->group_names;
502bf215546Sopenharmony_ci	p = block->selector_names;
503bf215546Sopenharmony_ci	for (i = 0; i < block->num_groups; ++i) {
504bf215546Sopenharmony_ci		for (j = 0; j < block->num_selectors; ++j) {
505bf215546Sopenharmony_ci			sprintf(p, "%s_%03d", groupname, j);
506bf215546Sopenharmony_ci			p += block->selector_name_stride;
507bf215546Sopenharmony_ci		}
508bf215546Sopenharmony_ci		groupname += block->group_name_stride;
509bf215546Sopenharmony_ci	}
510bf215546Sopenharmony_ci
511bf215546Sopenharmony_ci	return true;
512bf215546Sopenharmony_ci}
513bf215546Sopenharmony_ci
514bf215546Sopenharmony_ciint r600_get_perfcounter_info(struct r600_common_screen *screen,
515bf215546Sopenharmony_ci			      unsigned index,
516bf215546Sopenharmony_ci			      struct pipe_driver_query_info *info)
517bf215546Sopenharmony_ci{
518bf215546Sopenharmony_ci	struct r600_perfcounters *pc = screen->perfcounters;
519bf215546Sopenharmony_ci	struct r600_perfcounter_block *block;
520bf215546Sopenharmony_ci	unsigned base_gid, sub;
521bf215546Sopenharmony_ci
522bf215546Sopenharmony_ci	if (!pc)
523bf215546Sopenharmony_ci		return 0;
524bf215546Sopenharmony_ci
525bf215546Sopenharmony_ci	if (!info) {
526bf215546Sopenharmony_ci		unsigned bid, num_queries = 0;
527bf215546Sopenharmony_ci
528bf215546Sopenharmony_ci		for (bid = 0; bid < pc->num_blocks; ++bid) {
529bf215546Sopenharmony_ci			num_queries += pc->blocks[bid].num_selectors *
530bf215546Sopenharmony_ci				       pc->blocks[bid].num_groups;
531bf215546Sopenharmony_ci		}
532bf215546Sopenharmony_ci
533bf215546Sopenharmony_ci		return num_queries;
534bf215546Sopenharmony_ci	}
535bf215546Sopenharmony_ci
536bf215546Sopenharmony_ci	block = lookup_counter(pc, index, &base_gid, &sub);
537bf215546Sopenharmony_ci	if (!block)
538bf215546Sopenharmony_ci		return 0;
539bf215546Sopenharmony_ci
540bf215546Sopenharmony_ci	if (!block->selector_names) {
541bf215546Sopenharmony_ci		if (!r600_init_block_names(screen, block))
542bf215546Sopenharmony_ci			return 0;
543bf215546Sopenharmony_ci	}
544bf215546Sopenharmony_ci	info->name = block->selector_names + sub * block->selector_name_stride;
545bf215546Sopenharmony_ci	info->query_type = R600_QUERY_FIRST_PERFCOUNTER + index;
546bf215546Sopenharmony_ci	info->max_value.u64 = 0;
547bf215546Sopenharmony_ci	info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
548bf215546Sopenharmony_ci	info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE;
549bf215546Sopenharmony_ci	info->group_id = base_gid + sub / block->num_selectors;
550bf215546Sopenharmony_ci	info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH;
551bf215546Sopenharmony_ci	if (sub > 0 && sub + 1 < block->num_selectors * block->num_groups)
552bf215546Sopenharmony_ci		info->flags |= PIPE_DRIVER_QUERY_FLAG_DONT_LIST;
553bf215546Sopenharmony_ci	return 1;
554bf215546Sopenharmony_ci}
555bf215546Sopenharmony_ci
556bf215546Sopenharmony_ciint r600_get_perfcounter_group_info(struct r600_common_screen *screen,
557bf215546Sopenharmony_ci				    unsigned index,
558bf215546Sopenharmony_ci				    struct pipe_driver_query_group_info *info)
559bf215546Sopenharmony_ci{
560bf215546Sopenharmony_ci	struct r600_perfcounters *pc = screen->perfcounters;
561bf215546Sopenharmony_ci	struct r600_perfcounter_block *block;
562bf215546Sopenharmony_ci
563bf215546Sopenharmony_ci	if (!pc)
564bf215546Sopenharmony_ci		return 0;
565bf215546Sopenharmony_ci
566bf215546Sopenharmony_ci	if (!info)
567bf215546Sopenharmony_ci		return pc->num_groups;
568bf215546Sopenharmony_ci
569bf215546Sopenharmony_ci	block = lookup_group(pc, &index);
570bf215546Sopenharmony_ci	if (!block)
571bf215546Sopenharmony_ci		return 0;
572bf215546Sopenharmony_ci
573bf215546Sopenharmony_ci	if (!block->group_names) {
574bf215546Sopenharmony_ci		if (!r600_init_block_names(screen, block))
575bf215546Sopenharmony_ci			return 0;
576bf215546Sopenharmony_ci	}
577bf215546Sopenharmony_ci	info->name = block->group_names + index * block->group_name_stride;
578bf215546Sopenharmony_ci	info->num_queries = block->num_selectors;
579bf215546Sopenharmony_ci	info->max_active_queries = block->num_counters;
580bf215546Sopenharmony_ci	return 1;
581bf215546Sopenharmony_ci}
582bf215546Sopenharmony_ci
583bf215546Sopenharmony_civoid r600_perfcounters_destroy(struct r600_common_screen *rscreen)
584bf215546Sopenharmony_ci{
585bf215546Sopenharmony_ci	if (rscreen->perfcounters)
586bf215546Sopenharmony_ci		rscreen->perfcounters->cleanup(rscreen);
587bf215546Sopenharmony_ci}
588bf215546Sopenharmony_ci
589bf215546Sopenharmony_cibool r600_perfcounters_init(struct r600_perfcounters *pc,
590bf215546Sopenharmony_ci			    unsigned num_blocks)
591bf215546Sopenharmony_ci{
592bf215546Sopenharmony_ci	pc->blocks = CALLOC(num_blocks, sizeof(struct r600_perfcounter_block));
593bf215546Sopenharmony_ci	if (!pc->blocks)
594bf215546Sopenharmony_ci		return false;
595bf215546Sopenharmony_ci
596bf215546Sopenharmony_ci	pc->separate_se = debug_get_bool_option("RADEON_PC_SEPARATE_SE", false);
597bf215546Sopenharmony_ci	pc->separate_instance = debug_get_bool_option("RADEON_PC_SEPARATE_INSTANCE", false);
598bf215546Sopenharmony_ci
599bf215546Sopenharmony_ci	return true;
600bf215546Sopenharmony_ci}
601bf215546Sopenharmony_ci
602bf215546Sopenharmony_civoid r600_perfcounters_add_block(struct r600_common_screen *rscreen,
603bf215546Sopenharmony_ci				 struct r600_perfcounters *pc,
604bf215546Sopenharmony_ci				 const char *name, unsigned flags,
605bf215546Sopenharmony_ci				 unsigned counters, unsigned selectors,
606bf215546Sopenharmony_ci				 unsigned instances, void *data)
607bf215546Sopenharmony_ci{
608bf215546Sopenharmony_ci	struct r600_perfcounter_block *block = &pc->blocks[pc->num_blocks];
609bf215546Sopenharmony_ci
610bf215546Sopenharmony_ci	assert(counters <= R600_QUERY_MAX_COUNTERS);
611bf215546Sopenharmony_ci
612bf215546Sopenharmony_ci	block->basename = name;
613bf215546Sopenharmony_ci	block->flags = flags;
614bf215546Sopenharmony_ci	block->num_counters = counters;
615bf215546Sopenharmony_ci	block->num_selectors = selectors;
616bf215546Sopenharmony_ci	block->num_instances = MAX2(instances, 1);
617bf215546Sopenharmony_ci	block->data = data;
618bf215546Sopenharmony_ci
619bf215546Sopenharmony_ci	if (pc->separate_se && (block->flags & R600_PC_BLOCK_SE))
620bf215546Sopenharmony_ci		block->flags |= R600_PC_BLOCK_SE_GROUPS;
621bf215546Sopenharmony_ci	if (pc->separate_instance && block->num_instances > 1)
622bf215546Sopenharmony_ci		block->flags |= R600_PC_BLOCK_INSTANCE_GROUPS;
623bf215546Sopenharmony_ci
624bf215546Sopenharmony_ci	if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) {
625bf215546Sopenharmony_ci		block->num_groups = block->num_instances;
626bf215546Sopenharmony_ci	} else {
627bf215546Sopenharmony_ci		block->num_groups = 1;
628bf215546Sopenharmony_ci	}
629bf215546Sopenharmony_ci
630bf215546Sopenharmony_ci	if (block->flags & R600_PC_BLOCK_SE_GROUPS)
631bf215546Sopenharmony_ci		block->num_groups *= rscreen->info.max_se;
632bf215546Sopenharmony_ci	if (block->flags & R600_PC_BLOCK_SHADER)
633bf215546Sopenharmony_ci		block->num_groups *= pc->num_shader_types;
634bf215546Sopenharmony_ci
635bf215546Sopenharmony_ci	++pc->num_blocks;
636bf215546Sopenharmony_ci	pc->num_groups += block->num_groups;
637bf215546Sopenharmony_ci}
638bf215546Sopenharmony_ci
639bf215546Sopenharmony_civoid r600_perfcounters_do_destroy(struct r600_perfcounters *pc)
640bf215546Sopenharmony_ci{
641bf215546Sopenharmony_ci	unsigned i;
642bf215546Sopenharmony_ci
643bf215546Sopenharmony_ci	for (i = 0; i < pc->num_blocks; ++i) {
644bf215546Sopenharmony_ci		FREE(pc->blocks[i].group_names);
645bf215546Sopenharmony_ci		FREE(pc->blocks[i].selector_names);
646bf215546Sopenharmony_ci	}
647bf215546Sopenharmony_ci	FREE(pc->blocks);
648bf215546Sopenharmony_ci	FREE(pc);
649bf215546Sopenharmony_ci}
650