1/* 2 * Copyright (C) 2017 Rob Clark <robclark@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: 24 * Rob Clark <robclark@freedesktop.org> 25 */ 26 27/* NOTE: see https://github.com/freedreno/freedreno/wiki/A5xx-Queries */ 28 29#include "freedreno_query_acc.h" 30#include "freedreno_resource.h" 31 32#include "fd5_context.h" 33#include "fd5_emit.h" 34#include "fd5_format.h" 35#include "fd5_query.h" 36 37struct PACKED fd5_query_sample { 38 uint64_t start; 39 uint64_t result; 40 uint64_t stop; 41}; 42 43/* offset of a single field of an array of fd5_query_sample: */ 44#define query_sample_idx(aq, idx, field) \ 45 fd_resource((aq)->prsc)->bo, \ 46 (idx * sizeof(struct fd5_query_sample)) + \ 47 offsetof(struct fd5_query_sample, field), \ 48 0, 0 49 50/* offset of a single field of fd5_query_sample: */ 51#define query_sample(aq, field) query_sample_idx(aq, 0, field) 52 53/* 54 * Occlusion Query: 55 * 56 * OCCLUSION_COUNTER and OCCLUSION_PREDICATE differ only in how they 57 * interpret results 58 */ 59 60static void 61occlusion_resume(struct fd_acc_query *aq, struct fd_batch *batch) 62{ 63 struct fd_ringbuffer *ring = batch->draw; 64 65 OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_CONTROL, 1); 66 OUT_RING(ring, A5XX_RB_SAMPLE_COUNT_CONTROL_COPY); 67 68 OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_ADDR_LO, 2); 69 OUT_RELOC(ring, query_sample(aq, start)); 70 71 fd5_event_write(batch, ring, ZPASS_DONE, false); 72 fd_reset_wfi(batch); 73 74 fd5_context(batch->ctx)->samples_passed_queries++; 75} 76 77static void 78occlusion_pause(struct fd_acc_query *aq, struct fd_batch *batch) 79{ 80 struct fd_ringbuffer *ring = batch->draw; 81 82 OUT_PKT7(ring, CP_MEM_WRITE, 4); 83 OUT_RELOC(ring, query_sample(aq, stop)); 84 OUT_RING(ring, 0xffffffff); 85 OUT_RING(ring, 0xffffffff); 86 87 OUT_PKT7(ring, CP_WAIT_MEM_WRITES, 0); 88 89 OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_CONTROL, 1); 90 OUT_RING(ring, A5XX_RB_SAMPLE_COUNT_CONTROL_COPY); 91 92 OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_ADDR_LO, 2); 93 OUT_RELOC(ring, query_sample(aq, stop)); 94 95 fd5_event_write(batch, ring, ZPASS_DONE, false); 96 fd_reset_wfi(batch); 97 98 OUT_PKT7(ring, CP_WAIT_REG_MEM, 6); 99 OUT_RING(ring, 0x00000014); // XXX 100 OUT_RELOC(ring, query_sample(aq, stop)); 101 OUT_RING(ring, 0xffffffff); 102 OUT_RING(ring, 0xffffffff); 103 OUT_RING(ring, 0x00000010); // XXX 104 105 /* result += stop - start: */ 106 OUT_PKT7(ring, CP_MEM_TO_MEM, 9); 107 OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C); 108 OUT_RELOC(ring, query_sample(aq, result)); /* dst */ 109 OUT_RELOC(ring, query_sample(aq, result)); /* srcA */ 110 OUT_RELOC(ring, query_sample(aq, stop)); /* srcB */ 111 OUT_RELOC(ring, query_sample(aq, start)); /* srcC */ 112 113 fd5_context(batch->ctx)->samples_passed_queries--; 114} 115 116static void 117occlusion_counter_result(struct fd_acc_query *aq, void *buf, 118 union pipe_query_result *result) 119{ 120 struct fd5_query_sample *sp = buf; 121 result->u64 = sp->result; 122} 123 124static void 125occlusion_predicate_result(struct fd_acc_query *aq, void *buf, 126 union pipe_query_result *result) 127{ 128 struct fd5_query_sample *sp = buf; 129 result->b = !!sp->result; 130} 131 132static const struct fd_acc_sample_provider occlusion_counter = { 133 .query_type = PIPE_QUERY_OCCLUSION_COUNTER, 134 .size = sizeof(struct fd5_query_sample), 135 .resume = occlusion_resume, 136 .pause = occlusion_pause, 137 .result = occlusion_counter_result, 138}; 139 140static const struct fd_acc_sample_provider occlusion_predicate = { 141 .query_type = PIPE_QUERY_OCCLUSION_PREDICATE, 142 .size = sizeof(struct fd5_query_sample), 143 .resume = occlusion_resume, 144 .pause = occlusion_pause, 145 .result = occlusion_predicate_result, 146}; 147 148static const struct fd_acc_sample_provider occlusion_predicate_conservative = { 149 .query_type = PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE, 150 .size = sizeof(struct fd5_query_sample), 151 .resume = occlusion_resume, 152 .pause = occlusion_pause, 153 .result = occlusion_predicate_result, 154}; 155 156/* 157 * Timestamp Queries: 158 */ 159 160static void 161timestamp_resume(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt 162{ 163 struct fd_ringbuffer *ring = batch->draw; 164 165 OUT_PKT7(ring, CP_EVENT_WRITE, 4); 166 OUT_RING(ring, 167 CP_EVENT_WRITE_0_EVENT(RB_DONE_TS) | CP_EVENT_WRITE_0_TIMESTAMP); 168 OUT_RELOC(ring, query_sample(aq, start)); 169 OUT_RING(ring, 0x00000000); 170 171 fd_reset_wfi(batch); 172} 173 174static void 175timestamp_pause(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt 176{ 177 struct fd_ringbuffer *ring = batch->draw; 178 179 OUT_PKT7(ring, CP_EVENT_WRITE, 4); 180 OUT_RING(ring, 181 CP_EVENT_WRITE_0_EVENT(RB_DONE_TS) | CP_EVENT_WRITE_0_TIMESTAMP); 182 OUT_RELOC(ring, query_sample(aq, stop)); 183 OUT_RING(ring, 0x00000000); 184 185 fd_reset_wfi(batch); 186 fd_wfi(batch, ring); 187 188 /* result += stop - start: */ 189 OUT_PKT7(ring, CP_MEM_TO_MEM, 9); 190 OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C); 191 OUT_RELOC(ring, query_sample(aq, result)); /* dst */ 192 OUT_RELOC(ring, query_sample(aq, result)); /* srcA */ 193 OUT_RELOC(ring, query_sample(aq, stop)); /* srcB */ 194 OUT_RELOC(ring, query_sample(aq, start)); /* srcC */ 195} 196 197static uint64_t 198ticks_to_ns(uint32_t ts) 199{ 200 /* This is based on the 19.2MHz always-on rbbm timer. 201 * 202 * TODO we should probably query this value from kernel.. 203 */ 204 return ts * (1000000000 / 19200000); 205} 206 207static void 208time_elapsed_accumulate_result(struct fd_acc_query *aq, void *buf, 209 union pipe_query_result *result) 210{ 211 struct fd5_query_sample *sp = buf; 212 result->u64 = ticks_to_ns(sp->result); 213} 214 215static void 216timestamp_accumulate_result(struct fd_acc_query *aq, void *buf, 217 union pipe_query_result *result) 218{ 219 struct fd5_query_sample *sp = buf; 220 result->u64 = ticks_to_ns(sp->result); 221} 222 223static const struct fd_acc_sample_provider time_elapsed = { 224 .query_type = PIPE_QUERY_TIME_ELAPSED, 225 .always = true, 226 .size = sizeof(struct fd5_query_sample), 227 .resume = timestamp_resume, 228 .pause = timestamp_pause, 229 .result = time_elapsed_accumulate_result, 230}; 231 232/* NOTE: timestamp query isn't going to give terribly sensible results 233 * on a tiler. But it is needed by qapitrace profile heatmap. If you 234 * add in a binning pass, the results get even more non-sensical. So 235 * we just return the timestamp on the first tile and hope that is 236 * kind of good enough. 237 */ 238 239static const struct fd_acc_sample_provider timestamp = { 240 .query_type = PIPE_QUERY_TIMESTAMP, 241 .always = true, 242 .size = sizeof(struct fd5_query_sample), 243 .resume = timestamp_resume, 244 .pause = timestamp_pause, 245 .result = timestamp_accumulate_result, 246}; 247 248/* 249 * Performance Counter (batch) queries: 250 * 251 * Only one of these is active at a time, per design of the gallium 252 * batch_query API design. On perfcntr query tracks N query_types, 253 * each of which has a 'fd_batch_query_entry' that maps it back to 254 * the associated group and counter. 255 */ 256 257struct fd_batch_query_entry { 258 uint8_t gid; /* group-id */ 259 uint8_t cid; /* countable-id within the group */ 260}; 261 262struct fd_batch_query_data { 263 struct fd_screen *screen; 264 unsigned num_query_entries; 265 struct fd_batch_query_entry query_entries[]; 266}; 267 268static void 269perfcntr_resume(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt 270{ 271 struct fd_batch_query_data *data = aq->query_data; 272 struct fd_screen *screen = data->screen; 273 struct fd_ringbuffer *ring = batch->draw; 274 275 unsigned counters_per_group[screen->num_perfcntr_groups]; 276 memset(counters_per_group, 0, sizeof(counters_per_group)); 277 278 fd_wfi(batch, ring); 279 280 /* configure performance counters for the requested queries: */ 281 for (unsigned i = 0; i < data->num_query_entries; i++) { 282 struct fd_batch_query_entry *entry = &data->query_entries[i]; 283 const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid]; 284 unsigned counter_idx = counters_per_group[entry->gid]++; 285 286 assert(counter_idx < g->num_counters); 287 288 OUT_PKT4(ring, g->counters[counter_idx].select_reg, 1); 289 OUT_RING(ring, g->countables[entry->cid].selector); 290 } 291 292 memset(counters_per_group, 0, sizeof(counters_per_group)); 293 294 /* and snapshot the start values */ 295 for (unsigned i = 0; i < data->num_query_entries; i++) { 296 struct fd_batch_query_entry *entry = &data->query_entries[i]; 297 const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid]; 298 unsigned counter_idx = counters_per_group[entry->gid]++; 299 const struct fd_perfcntr_counter *counter = &g->counters[counter_idx]; 300 301 OUT_PKT7(ring, CP_REG_TO_MEM, 3); 302 OUT_RING(ring, CP_REG_TO_MEM_0_64B | 303 CP_REG_TO_MEM_0_REG(counter->counter_reg_lo)); 304 OUT_RELOC(ring, query_sample_idx(aq, i, start)); 305 } 306} 307 308static void 309perfcntr_pause(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt 310{ 311 struct fd_batch_query_data *data = aq->query_data; 312 struct fd_screen *screen = data->screen; 313 struct fd_ringbuffer *ring = batch->draw; 314 315 unsigned counters_per_group[screen->num_perfcntr_groups]; 316 memset(counters_per_group, 0, sizeof(counters_per_group)); 317 318 fd_wfi(batch, ring); 319 320 /* TODO do we need to bother to turn anything off? */ 321 322 /* snapshot the end values: */ 323 for (unsigned i = 0; i < data->num_query_entries; i++) { 324 struct fd_batch_query_entry *entry = &data->query_entries[i]; 325 const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid]; 326 unsigned counter_idx = counters_per_group[entry->gid]++; 327 const struct fd_perfcntr_counter *counter = &g->counters[counter_idx]; 328 329 OUT_PKT7(ring, CP_REG_TO_MEM, 3); 330 OUT_RING(ring, CP_REG_TO_MEM_0_64B | 331 CP_REG_TO_MEM_0_REG(counter->counter_reg_lo)); 332 OUT_RELOC(ring, query_sample_idx(aq, i, stop)); 333 } 334 335 /* and compute the result: */ 336 for (unsigned i = 0; i < data->num_query_entries; i++) { 337 /* result += stop - start: */ 338 OUT_PKT7(ring, CP_MEM_TO_MEM, 9); 339 OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C); 340 OUT_RELOC(ring, query_sample_idx(aq, i, result)); /* dst */ 341 OUT_RELOC(ring, query_sample_idx(aq, i, result)); /* srcA */ 342 OUT_RELOC(ring, query_sample_idx(aq, i, stop)); /* srcB */ 343 OUT_RELOC(ring, query_sample_idx(aq, i, start)); /* srcC */ 344 } 345} 346 347static void 348perfcntr_accumulate_result(struct fd_acc_query *aq, void *buf, 349 union pipe_query_result *result) 350{ 351 struct fd_batch_query_data *data = aq->query_data; 352 struct fd5_query_sample *sp = buf; 353 354 for (unsigned i = 0; i < data->num_query_entries; i++) { 355 result->batch[i].u64 = sp[i].result; 356 } 357} 358 359static const struct fd_acc_sample_provider perfcntr = { 360 .query_type = FD_QUERY_FIRST_PERFCNTR, 361 .always = true, 362 .resume = perfcntr_resume, 363 .pause = perfcntr_pause, 364 .result = perfcntr_accumulate_result, 365}; 366 367static struct pipe_query * 368fd5_create_batch_query(struct pipe_context *pctx, unsigned num_queries, 369 unsigned *query_types) 370{ 371 struct fd_context *ctx = fd_context(pctx); 372 struct fd_screen *screen = ctx->screen; 373 struct fd_query *q; 374 struct fd_acc_query *aq; 375 struct fd_batch_query_data *data; 376 377 data = CALLOC_VARIANT_LENGTH_STRUCT( 378 fd_batch_query_data, num_queries * sizeof(data->query_entries[0])); 379 380 data->screen = screen; 381 data->num_query_entries = num_queries; 382 383 /* validate the requested query_types and ensure we don't try 384 * to request more query_types of a given group than we have 385 * counters: 386 */ 387 unsigned counters_per_group[screen->num_perfcntr_groups]; 388 memset(counters_per_group, 0, sizeof(counters_per_group)); 389 390 for (unsigned i = 0; i < num_queries; i++) { 391 unsigned idx = query_types[i] - FD_QUERY_FIRST_PERFCNTR; 392 393 /* verify valid query_type, ie. is it actually a perfcntr? */ 394 if ((query_types[i] < FD_QUERY_FIRST_PERFCNTR) || 395 (idx >= screen->num_perfcntr_queries)) { 396 mesa_loge("invalid batch query query_type: %u", query_types[i]); 397 goto error; 398 } 399 400 struct fd_batch_query_entry *entry = &data->query_entries[i]; 401 struct pipe_driver_query_info *pq = &screen->perfcntr_queries[idx]; 402 403 entry->gid = pq->group_id; 404 405 /* the perfcntr_queries[] table flattens all the countables 406 * for each group in series, ie: 407 * 408 * (G0,C0), .., (G0,Cn), (G1,C0), .., (G1,Cm), ... 409 * 410 * So to find the countable index just step back through the 411 * table to find the first entry with the same group-id. 412 */ 413 while (pq > screen->perfcntr_queries) { 414 pq--; 415 if (pq->group_id == entry->gid) 416 entry->cid++; 417 } 418 419 if (counters_per_group[entry->gid] >= 420 screen->perfcntr_groups[entry->gid].num_counters) { 421 mesa_loge("too many counters for group %u\n", entry->gid); 422 goto error; 423 } 424 425 counters_per_group[entry->gid]++; 426 } 427 428 q = fd_acc_create_query2(ctx, 0, 0, &perfcntr); 429 aq = fd_acc_query(q); 430 431 /* sample buffer size is based on # of queries: */ 432 aq->size = num_queries * sizeof(struct fd5_query_sample); 433 aq->query_data = data; 434 435 return (struct pipe_query *)q; 436 437error: 438 free(data); 439 return NULL; 440} 441 442void 443fd5_query_context_init(struct pipe_context *pctx) disable_thread_safety_analysis 444{ 445 struct fd_context *ctx = fd_context(pctx); 446 447 ctx->create_query = fd_acc_create_query; 448 ctx->query_update_batch = fd_acc_query_update_batch; 449 450 pctx->create_batch_query = fd5_create_batch_query; 451 452 fd_acc_query_register_provider(pctx, &occlusion_counter); 453 fd_acc_query_register_provider(pctx, &occlusion_predicate); 454 fd_acc_query_register_provider(pctx, &occlusion_predicate_conservative); 455 456 fd_acc_query_register_provider(pctx, &time_elapsed); 457 fd_acc_query_register_provider(pctx, ×tamp); 458} 459