1/* 2 * Copyright © 2017 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included 12 * in all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 * DEALINGS IN THE SOFTWARE. 21 */ 22 23/** 24 * @file iris_query.c 25 * 26 * ============================= GENXML CODE ============================= 27 * [This file is compiled once per generation.] 28 * ======================================================================= 29 * 30 * Query object support. This allows measuring various simple statistics 31 * via counters on the GPU. We use GenX code for MI_MATH calculations. 32 */ 33 34#include <stdio.h> 35#include <errno.h> 36#include "pipe/p_defines.h" 37#include "pipe/p_state.h" 38#include "pipe/p_context.h" 39#include "pipe/p_screen.h" 40#include "util/u_inlines.h" 41#include "util/u_upload_mgr.h" 42#include "iris_context.h" 43#include "iris_defines.h" 44#include "iris_fence.h" 45#include "iris_monitor.h" 46#include "iris_resource.h" 47#include "iris_screen.h" 48 49#include "iris_genx_macros.h" 50 51#define SO_PRIM_STORAGE_NEEDED(n) (GENX(SO_PRIM_STORAGE_NEEDED0_num) + (n) * 8) 52#define SO_NUM_PRIMS_WRITTEN(n) (GENX(SO_NUM_PRIMS_WRITTEN0_num) + (n) * 8) 53 54struct iris_query { 55 struct threaded_query b; 56 57 enum pipe_query_type type; 58 int index; 59 60 bool ready; 61 62 bool stalled; 63 64 uint64_t result; 65 66 struct iris_state_ref query_state_ref; 67 struct iris_query_snapshots *map; 68 struct iris_syncobj *syncobj; 69 70 int batch_idx; 71 72 struct iris_monitor_object *monitor; 73 74 /* Fence for PIPE_QUERY_GPU_FINISHED. */ 75 struct pipe_fence_handle *fence; 76}; 77 78struct iris_query_snapshots { 79 /** iris_render_condition's saved MI_PREDICATE_RESULT value. */ 80 uint64_t predicate_result; 81 82 /** Have the start/end snapshots landed? */ 83 uint64_t snapshots_landed; 84 85 /** Starting and ending counter snapshots */ 86 uint64_t start; 87 uint64_t end; 88}; 89 90struct iris_query_so_overflow { 91 uint64_t predicate_result; 92 uint64_t snapshots_landed; 93 94 struct { 95 uint64_t prim_storage_needed[2]; 96 uint64_t num_prims[2]; 97 } stream[4]; 98}; 99 100static struct mi_value 101query_mem64(struct iris_query *q, uint32_t offset) 102{ 103 struct iris_address addr = { 104 .bo = iris_resource_bo(q->query_state_ref.res), 105 .offset = q->query_state_ref.offset + offset, 106 .access = IRIS_DOMAIN_OTHER_WRITE 107 }; 108 return mi_mem64(addr); 109} 110 111/** 112 * Is this type of query written by PIPE_CONTROL? 113 */ 114static bool 115iris_is_query_pipelined(struct iris_query *q) 116{ 117 switch (q->type) { 118 case PIPE_QUERY_OCCLUSION_COUNTER: 119 case PIPE_QUERY_OCCLUSION_PREDICATE: 120 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: 121 case PIPE_QUERY_TIMESTAMP: 122 case PIPE_QUERY_TIMESTAMP_DISJOINT: 123 case PIPE_QUERY_TIME_ELAPSED: 124 return true; 125 126 default: 127 return false; 128 } 129} 130 131static void 132mark_available(struct iris_context *ice, struct iris_query *q) 133{ 134 struct iris_batch *batch = &ice->batches[q->batch_idx]; 135 unsigned flags = PIPE_CONTROL_WRITE_IMMEDIATE; 136 unsigned offset = offsetof(struct iris_query_snapshots, snapshots_landed); 137 struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res); 138 offset += q->query_state_ref.offset; 139 140 if (!iris_is_query_pipelined(q)) { 141 batch->screen->vtbl.store_data_imm64(batch, bo, offset, true); 142 } else { 143 /* Order available *after* the query results. */ 144 flags |= PIPE_CONTROL_FLUSH_ENABLE; 145 iris_emit_pipe_control_write(batch, "query: mark available", 146 flags, bo, offset, true); 147 } 148} 149 150/** 151 * Write PS_DEPTH_COUNT to q->(dest) via a PIPE_CONTROL. 152 */ 153static void 154iris_pipelined_write(struct iris_batch *batch, 155 struct iris_query *q, 156 enum pipe_control_flags flags, 157 unsigned offset) 158{ 159 const struct intel_device_info *devinfo = &batch->screen->devinfo; 160 const unsigned optional_cs_stall = 161 GFX_VER == 9 && devinfo->gt == 4 ? PIPE_CONTROL_CS_STALL : 0; 162 struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res); 163 164 iris_emit_pipe_control_write(batch, "query: pipelined snapshot write", 165 flags | optional_cs_stall, 166 bo, offset, 0ull); 167} 168 169static void 170write_value(struct iris_context *ice, struct iris_query *q, unsigned offset) 171{ 172 struct iris_batch *batch = &ice->batches[q->batch_idx]; 173 struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res); 174 175 if (!iris_is_query_pipelined(q)) { 176 iris_emit_pipe_control_flush(batch, 177 "query: non-pipelined snapshot write", 178 PIPE_CONTROL_CS_STALL | 179 PIPE_CONTROL_STALL_AT_SCOREBOARD); 180 q->stalled = true; 181 } 182 183 switch (q->type) { 184 case PIPE_QUERY_OCCLUSION_COUNTER: 185 case PIPE_QUERY_OCCLUSION_PREDICATE: 186 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: 187 if (GFX_VER >= 10) { 188 /* "Driver must program PIPE_CONTROL with only Depth Stall Enable 189 * bit set prior to programming a PIPE_CONTROL with Write PS Depth 190 * Count sync operation." 191 */ 192 iris_emit_pipe_control_flush(batch, 193 "workaround: depth stall before writing " 194 "PS_DEPTH_COUNT", 195 PIPE_CONTROL_DEPTH_STALL); 196 } 197 iris_pipelined_write(&ice->batches[IRIS_BATCH_RENDER], q, 198 PIPE_CONTROL_WRITE_DEPTH_COUNT | 199 PIPE_CONTROL_DEPTH_STALL, 200 offset); 201 break; 202 case PIPE_QUERY_TIME_ELAPSED: 203 case PIPE_QUERY_TIMESTAMP: 204 case PIPE_QUERY_TIMESTAMP_DISJOINT: 205 iris_pipelined_write(&ice->batches[IRIS_BATCH_RENDER], q, 206 PIPE_CONTROL_WRITE_TIMESTAMP, 207 offset); 208 break; 209 case PIPE_QUERY_PRIMITIVES_GENERATED: 210 batch->screen->vtbl.store_register_mem64(batch, 211 q->index == 0 ? 212 GENX(CL_INVOCATION_COUNT_num) : 213 SO_PRIM_STORAGE_NEEDED(q->index), 214 bo, offset, false); 215 break; 216 case PIPE_QUERY_PRIMITIVES_EMITTED: 217 batch->screen->vtbl.store_register_mem64(batch, 218 SO_NUM_PRIMS_WRITTEN(q->index), 219 bo, offset, false); 220 break; 221 case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE: { 222 static const uint32_t index_to_reg[] = { 223 GENX(IA_VERTICES_COUNT_num), 224 GENX(IA_PRIMITIVES_COUNT_num), 225 GENX(VS_INVOCATION_COUNT_num), 226 GENX(GS_INVOCATION_COUNT_num), 227 GENX(GS_PRIMITIVES_COUNT_num), 228 GENX(CL_INVOCATION_COUNT_num), 229 GENX(CL_PRIMITIVES_COUNT_num), 230 GENX(PS_INVOCATION_COUNT_num), 231 GENX(HS_INVOCATION_COUNT_num), 232 GENX(DS_INVOCATION_COUNT_num), 233 GENX(CS_INVOCATION_COUNT_num), 234 }; 235 const uint32_t reg = index_to_reg[q->index]; 236 237 batch->screen->vtbl.store_register_mem64(batch, reg, bo, offset, false); 238 break; 239 } 240 default: 241 assert(false); 242 } 243} 244 245static void 246write_overflow_values(struct iris_context *ice, struct iris_query *q, bool end) 247{ 248 struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER]; 249 uint32_t count = q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ? 1 : 4; 250 struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res); 251 uint32_t offset = q->query_state_ref.offset; 252 253 iris_emit_pipe_control_flush(batch, 254 "query: write SO overflow snapshots", 255 PIPE_CONTROL_CS_STALL | 256 PIPE_CONTROL_STALL_AT_SCOREBOARD); 257 for (uint32_t i = 0; i < count; i++) { 258 int s = q->index + i; 259 int g_idx = offset + offsetof(struct iris_query_so_overflow, 260 stream[s].num_prims[end]); 261 int w_idx = offset + offsetof(struct iris_query_so_overflow, 262 stream[s].prim_storage_needed[end]); 263 batch->screen->vtbl.store_register_mem64(batch, SO_NUM_PRIMS_WRITTEN(s), 264 bo, g_idx, false); 265 batch->screen->vtbl.store_register_mem64(batch, SO_PRIM_STORAGE_NEEDED(s), 266 bo, w_idx, false); 267 } 268} 269 270static uint64_t 271iris_raw_timestamp_delta(uint64_t time0, uint64_t time1) 272{ 273 if (time0 > time1) { 274 return (1ULL << TIMESTAMP_BITS) + time1 - time0; 275 } else { 276 return time1 - time0; 277 } 278} 279 280static bool 281stream_overflowed(struct iris_query_so_overflow *so, int s) 282{ 283 return (so->stream[s].prim_storage_needed[1] - 284 so->stream[s].prim_storage_needed[0]) != 285 (so->stream[s].num_prims[1] - so->stream[s].num_prims[0]); 286} 287 288static void 289calculate_result_on_cpu(const struct intel_device_info *devinfo, 290 struct iris_query *q) 291{ 292 switch (q->type) { 293 case PIPE_QUERY_OCCLUSION_PREDICATE: 294 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: 295 q->result = q->map->end != q->map->start; 296 break; 297 case PIPE_QUERY_TIMESTAMP: 298 case PIPE_QUERY_TIMESTAMP_DISJOINT: 299 /* The timestamp is the single starting snapshot. */ 300 q->result = intel_device_info_timebase_scale(devinfo, q->map->start); 301 q->result &= (1ull << TIMESTAMP_BITS) - 1; 302 break; 303 case PIPE_QUERY_TIME_ELAPSED: 304 q->result = iris_raw_timestamp_delta(q->map->start, q->map->end); 305 q->result = intel_device_info_timebase_scale(devinfo, q->result); 306 q->result &= (1ull << TIMESTAMP_BITS) - 1; 307 break; 308 case PIPE_QUERY_SO_OVERFLOW_PREDICATE: 309 q->result = stream_overflowed((void *) q->map, q->index); 310 break; 311 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: 312 q->result = false; 313 for (int i = 0; i < PIPE_MAX_VERTEX_STREAMS; i++) 314 q->result |= stream_overflowed((void *) q->map, i); 315 break; 316 case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE: 317 q->result = q->map->end - q->map->start; 318 319 /* WaDividePSInvocationCountBy4:HSW,BDW */ 320 if (GFX_VER == 8 && q->index == PIPE_STAT_QUERY_PS_INVOCATIONS) 321 q->result /= 4; 322 break; 323 case PIPE_QUERY_OCCLUSION_COUNTER: 324 case PIPE_QUERY_PRIMITIVES_GENERATED: 325 case PIPE_QUERY_PRIMITIVES_EMITTED: 326 default: 327 q->result = q->map->end - q->map->start; 328 break; 329 } 330 331 q->ready = true; 332} 333 334/** 335 * Calculate the streamout overflow for stream \p idx: 336 * 337 * (num_prims[1] - num_prims[0]) - (storage_needed[1] - storage_needed[0]) 338 */ 339static struct mi_value 340calc_overflow_for_stream(struct mi_builder *b, 341 struct iris_query *q, 342 int idx) 343{ 344#define C(counter, i) query_mem64(q, \ 345 offsetof(struct iris_query_so_overflow, stream[idx].counter[i])) 346 347 return mi_isub(b, mi_isub(b, C(num_prims, 1), C(num_prims, 0)), 348 mi_isub(b, C(prim_storage_needed, 1), 349 C(prim_storage_needed, 0))); 350#undef C 351} 352 353/** 354 * Calculate whether any stream has overflowed. 355 */ 356static struct mi_value 357calc_overflow_any_stream(struct mi_builder *b, struct iris_query *q) 358{ 359 struct mi_value stream_result[PIPE_MAX_VERTEX_STREAMS]; 360 for (int i = 0; i < PIPE_MAX_VERTEX_STREAMS; i++) 361 stream_result[i] = calc_overflow_for_stream(b, q, i); 362 363 struct mi_value result = stream_result[0]; 364 for (int i = 1; i < PIPE_MAX_VERTEX_STREAMS; i++) 365 result = mi_ior(b, result, stream_result[i]); 366 367 return result; 368} 369 370static bool 371query_is_boolean(enum pipe_query_type type) 372{ 373 switch (type) { 374 case PIPE_QUERY_OCCLUSION_PREDICATE: 375 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: 376 case PIPE_QUERY_SO_OVERFLOW_PREDICATE: 377 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: 378 return true; 379 default: 380 return false; 381 } 382} 383 384/** 385 * Calculate the result using MI_MATH. 386 */ 387static struct mi_value 388calculate_result_on_gpu(const struct intel_device_info *devinfo, 389 struct mi_builder *b, 390 struct iris_query *q) 391{ 392 struct mi_value result; 393 struct mi_value start_val = 394 query_mem64(q, offsetof(struct iris_query_snapshots, start)); 395 struct mi_value end_val = 396 query_mem64(q, offsetof(struct iris_query_snapshots, end)); 397 398 switch (q->type) { 399 case PIPE_QUERY_SO_OVERFLOW_PREDICATE: 400 result = calc_overflow_for_stream(b, q, q->index); 401 break; 402 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: 403 result = calc_overflow_any_stream(b, q); 404 break; 405 case PIPE_QUERY_TIMESTAMP: { 406 /* TODO: This discards any fractional bits of the timebase scale. 407 * We would need to do a bit of fixed point math on the CS ALU, or 408 * launch an actual shader to calculate this with full precision. 409 */ 410 uint32_t scale = 1000000000ull / devinfo->timestamp_frequency; 411 result = mi_iand(b, mi_imm((1ull << 36) - 1), 412 mi_imul_imm(b, start_val, scale)); 413 break; 414 } 415 case PIPE_QUERY_TIME_ELAPSED: { 416 /* TODO: This discards fractional bits (see above). */ 417 uint32_t scale = 1000000000ull / devinfo->timestamp_frequency; 418 result = mi_imul_imm(b, mi_isub(b, end_val, start_val), scale); 419 break; 420 } 421 default: 422 result = mi_isub(b, end_val, start_val); 423 break; 424 } 425 426 /* WaDividePSInvocationCountBy4:HSW,BDW */ 427 if (GFX_VER == 8 && 428 q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE && 429 q->index == PIPE_STAT_QUERY_PS_INVOCATIONS) 430 result = mi_ushr32_imm(b, result, 2); 431 432 if (query_is_boolean(q->type)) 433 result = mi_iand(b, mi_nz(b, result), mi_imm(1)); 434 435 return result; 436} 437 438static struct pipe_query * 439iris_create_query(struct pipe_context *ctx, 440 unsigned query_type, 441 unsigned index) 442{ 443 struct iris_query *q = calloc(1, sizeof(struct iris_query)); 444 445 q->type = query_type; 446 q->index = index; 447 q->monitor = NULL; 448 449 if (q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE && 450 q->index == PIPE_STAT_QUERY_CS_INVOCATIONS) 451 q->batch_idx = IRIS_BATCH_COMPUTE; 452 else 453 q->batch_idx = IRIS_BATCH_RENDER; 454 return (struct pipe_query *) q; 455} 456 457static struct pipe_query * 458iris_create_batch_query(struct pipe_context *ctx, 459 unsigned num_queries, 460 unsigned *query_types) 461{ 462 struct iris_context *ice = (void *) ctx; 463 struct iris_query *q = calloc(1, sizeof(struct iris_query)); 464 if (unlikely(!q)) 465 return NULL; 466 q->type = PIPE_QUERY_DRIVER_SPECIFIC; 467 q->index = -1; 468 q->monitor = iris_create_monitor_object(ice, num_queries, query_types); 469 if (unlikely(!q->monitor)) { 470 free(q); 471 return NULL; 472 } 473 474 return (struct pipe_query *) q; 475} 476 477static void 478iris_destroy_query(struct pipe_context *ctx, struct pipe_query *p_query) 479{ 480 struct iris_query *query = (void *) p_query; 481 struct iris_screen *screen = (void *) ctx->screen; 482 if (query->monitor) { 483 iris_destroy_monitor_object(ctx, query->monitor); 484 query->monitor = NULL; 485 } else { 486 iris_syncobj_reference(screen->bufmgr, &query->syncobj, NULL); 487 screen->base.fence_reference(ctx->screen, &query->fence, NULL); 488 } 489 pipe_resource_reference(&query->query_state_ref.res, NULL); 490 free(query); 491} 492 493 494static bool 495iris_begin_query(struct pipe_context *ctx, struct pipe_query *query) 496{ 497 struct iris_context *ice = (void *) ctx; 498 struct iris_query *q = (void *) query; 499 500 if (q->monitor) 501 return iris_begin_monitor(ctx, q->monitor); 502 503 void *ptr = NULL; 504 uint32_t size; 505 506 if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE || 507 q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) 508 size = sizeof(struct iris_query_so_overflow); 509 else 510 size = sizeof(struct iris_query_snapshots); 511 512 u_upload_alloc(ice->query_buffer_uploader, 0, 513 size, size, &q->query_state_ref.offset, 514 &q->query_state_ref.res, &ptr); 515 516 if (!iris_resource_bo(q->query_state_ref.res)) 517 return false; 518 519 q->map = ptr; 520 if (!q->map) 521 return false; 522 523 q->result = 0ull; 524 q->ready = false; 525 WRITE_ONCE(q->map->snapshots_landed, false); 526 527 if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->index == 0) { 528 ice->state.prims_generated_query_active = true; 529 ice->state.dirty |= IRIS_DIRTY_STREAMOUT | IRIS_DIRTY_CLIP; 530 } 531 532 if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE || 533 q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) 534 write_overflow_values(ice, q, false); 535 else 536 write_value(ice, q, 537 q->query_state_ref.offset + 538 offsetof(struct iris_query_snapshots, start)); 539 540 return true; 541} 542 543static bool 544iris_end_query(struct pipe_context *ctx, struct pipe_query *query) 545{ 546 struct iris_context *ice = (void *) ctx; 547 struct iris_query *q = (void *) query; 548 549 if (q->monitor) 550 return iris_end_monitor(ctx, q->monitor); 551 552 if (q->type == PIPE_QUERY_GPU_FINISHED) { 553 ctx->flush(ctx, &q->fence, PIPE_FLUSH_DEFERRED); 554 return true; 555 } 556 557 struct iris_batch *batch = &ice->batches[q->batch_idx]; 558 559 if (q->type == PIPE_QUERY_TIMESTAMP) { 560 iris_begin_query(ctx, query); 561 iris_batch_reference_signal_syncobj(batch, &q->syncobj); 562 mark_available(ice, q); 563 return true; 564 } 565 566 if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->index == 0) { 567 ice->state.prims_generated_query_active = false; 568 ice->state.dirty |= IRIS_DIRTY_STREAMOUT | IRIS_DIRTY_CLIP; 569 } 570 571 if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE || 572 q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) 573 write_overflow_values(ice, q, true); 574 else 575 write_value(ice, q, 576 q->query_state_ref.offset + 577 offsetof(struct iris_query_snapshots, end)); 578 579 iris_batch_reference_signal_syncobj(batch, &q->syncobj); 580 mark_available(ice, q); 581 582 return true; 583} 584 585/** 586 * See if the snapshots have landed for a query, and if so, compute the 587 * result and mark it ready. Does not flush (unlike iris_get_query_result). 588 */ 589static void 590iris_check_query_no_flush(struct iris_context *ice, struct iris_query *q) 591{ 592 struct iris_screen *screen = (void *) ice->ctx.screen; 593 const struct intel_device_info *devinfo = &screen->devinfo; 594 595 if (!q->ready && READ_ONCE(q->map->snapshots_landed)) { 596 calculate_result_on_cpu(devinfo, q); 597 } 598} 599 600static bool 601iris_get_query_result(struct pipe_context *ctx, 602 struct pipe_query *query, 603 bool wait, 604 union pipe_query_result *result) 605{ 606 struct iris_context *ice = (void *) ctx; 607 struct iris_query *q = (void *) query; 608 609 if (q->monitor) 610 return iris_get_monitor_result(ctx, q->monitor, wait, result->batch); 611 612 struct iris_screen *screen = (void *) ctx->screen; 613 const struct intel_device_info *devinfo = &screen->devinfo; 614 615 if (unlikely(screen->devinfo.no_hw)) { 616 result->u64 = 0; 617 return true; 618 } 619 620 if (q->type == PIPE_QUERY_GPU_FINISHED) { 621 struct pipe_screen *screen = ctx->screen; 622 623 result->b = screen->fence_finish(screen, ctx, q->fence, 624 wait ? PIPE_TIMEOUT_INFINITE : 0); 625 return result->b; 626 } 627 628 if (!q->ready) { 629 struct iris_batch *batch = &ice->batches[q->batch_idx]; 630 if (q->syncobj == iris_batch_get_signal_syncobj(batch)) 631 iris_batch_flush(batch); 632 633 while (!READ_ONCE(q->map->snapshots_landed)) { 634 if (wait) 635 iris_wait_syncobj(screen->bufmgr, q->syncobj, INT64_MAX); 636 else 637 return false; 638 } 639 640 assert(READ_ONCE(q->map->snapshots_landed)); 641 calculate_result_on_cpu(devinfo, q); 642 } 643 644 assert(q->ready); 645 646 result->u64 = q->result; 647 648 return true; 649} 650 651static void 652iris_get_query_result_resource(struct pipe_context *ctx, 653 struct pipe_query *query, 654 enum pipe_query_flags flags, 655 enum pipe_query_value_type result_type, 656 int index, 657 struct pipe_resource *p_res, 658 unsigned offset) 659{ 660 struct iris_context *ice = (void *) ctx; 661 struct iris_query *q = (void *) query; 662 struct iris_batch *batch = &ice->batches[q->batch_idx]; 663 const struct intel_device_info *devinfo = &batch->screen->devinfo; 664 struct iris_resource *res = (void *) p_res; 665 struct iris_bo *query_bo = iris_resource_bo(q->query_state_ref.res); 666 struct iris_bo *dst_bo = iris_resource_bo(p_res); 667 unsigned snapshots_landed_offset = 668 offsetof(struct iris_query_snapshots, snapshots_landed); 669 670 res->bind_history |= PIPE_BIND_QUERY_BUFFER; 671 672 if (index == -1) { 673 /* They're asking for the availability of the result. If we still 674 * have commands queued up which produce the result, submit them 675 * now so that progress happens. Either way, copy the snapshots 676 * landed field to the destination resource. 677 */ 678 if (q->syncobj == iris_batch_get_signal_syncobj(batch)) 679 iris_batch_flush(batch); 680 681 batch->screen->vtbl.copy_mem_mem(batch, dst_bo, offset, 682 query_bo, snapshots_landed_offset, 683 result_type <= PIPE_QUERY_TYPE_U32 ? 4 : 8); 684 return; 685 } 686 687 if (!q->ready && READ_ONCE(q->map->snapshots_landed)) { 688 /* The final snapshots happen to have landed, so let's just compute 689 * the result on the CPU now... 690 */ 691 calculate_result_on_cpu(devinfo, q); 692 } 693 694 if (q->ready) { 695 /* We happen to have the result on the CPU, so just copy it. */ 696 if (result_type <= PIPE_QUERY_TYPE_U32) { 697 batch->screen->vtbl.store_data_imm32(batch, dst_bo, offset, q->result); 698 } else { 699 batch->screen->vtbl.store_data_imm64(batch, dst_bo, offset, q->result); 700 } 701 702 /* Make sure QBO is flushed before its result is used elsewhere. */ 703 iris_dirty_for_history(ice, res); 704 return; 705 } 706 707 bool predicated = !(flags & PIPE_QUERY_WAIT) && !q->stalled; 708 709 struct mi_builder b; 710 mi_builder_init(&b, &batch->screen->devinfo, batch); 711 712 iris_batch_sync_region_start(batch); 713 714 struct mi_value result = calculate_result_on_gpu(devinfo, &b, q); 715 struct mi_value dst = 716 result_type <= PIPE_QUERY_TYPE_U32 ? 717 mi_mem32(rw_bo(dst_bo, offset, IRIS_DOMAIN_OTHER_WRITE)) : 718 mi_mem64(rw_bo(dst_bo, offset, IRIS_DOMAIN_OTHER_WRITE)); 719 720 if (predicated) { 721 mi_store(&b, mi_reg32(MI_PREDICATE_RESULT), 722 mi_mem64(ro_bo(query_bo, snapshots_landed_offset))); 723 mi_store_if(&b, dst, result); 724 } else { 725 mi_store(&b, dst, result); 726 } 727 728 iris_batch_sync_region_end(batch); 729} 730 731static void 732iris_set_active_query_state(struct pipe_context *ctx, bool enable) 733{ 734 struct iris_context *ice = (void *) ctx; 735 736 if (ice->state.statistics_counters_enabled == enable) 737 return; 738 739 // XXX: most packets aren't paying attention to this yet, because it'd 740 // have to be done dynamically at draw time, which is a pain 741 ice->state.statistics_counters_enabled = enable; 742 ice->state.dirty |= IRIS_DIRTY_CLIP | 743 IRIS_DIRTY_RASTER | 744 IRIS_DIRTY_STREAMOUT | 745 IRIS_DIRTY_WM; 746 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_GS | 747 IRIS_STAGE_DIRTY_TCS | 748 IRIS_STAGE_DIRTY_TES | 749 IRIS_STAGE_DIRTY_VS; 750} 751 752static void 753set_predicate_enable(struct iris_context *ice, bool value) 754{ 755 if (value) 756 ice->state.predicate = IRIS_PREDICATE_STATE_RENDER; 757 else 758 ice->state.predicate = IRIS_PREDICATE_STATE_DONT_RENDER; 759} 760 761static void 762set_predicate_for_result(struct iris_context *ice, 763 struct iris_query *q, 764 bool inverted) 765{ 766 struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER]; 767 struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res); 768 769 iris_batch_sync_region_start(batch); 770 771 /* The CPU doesn't have the query result yet; use hardware predication */ 772 ice->state.predicate = IRIS_PREDICATE_STATE_USE_BIT; 773 774 /* Ensure the memory is coherent for MI_LOAD_REGISTER_* commands. */ 775 iris_emit_pipe_control_flush(batch, 776 "conditional rendering: set predicate", 777 PIPE_CONTROL_FLUSH_ENABLE); 778 q->stalled = true; 779 780 struct mi_builder b; 781 mi_builder_init(&b, &batch->screen->devinfo, batch); 782 783 struct mi_value result; 784 785 switch (q->type) { 786 case PIPE_QUERY_SO_OVERFLOW_PREDICATE: 787 result = calc_overflow_for_stream(&b, q, q->index); 788 break; 789 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: 790 result = calc_overflow_any_stream(&b, q); 791 break; 792 default: { 793 /* PIPE_QUERY_OCCLUSION_* */ 794 struct mi_value start = 795 query_mem64(q, offsetof(struct iris_query_snapshots, start)); 796 struct mi_value end = 797 query_mem64(q, offsetof(struct iris_query_snapshots, end)); 798 result = mi_isub(&b, end, start); 799 break; 800 } 801 } 802 803 result = inverted ? mi_z(&b, result) : mi_nz(&b, result); 804 result = mi_iand(&b, result, mi_imm(1)); 805 806 /* We immediately set the predicate on the render batch, as all the 807 * counters come from 3D operations. However, we may need to predicate 808 * a compute dispatch, which executes in a different GEM context and has 809 * a different MI_PREDICATE_RESULT register. So, we save the result to 810 * memory and reload it in iris_launch_grid. 811 */ 812 mi_value_ref(&b, result); 813 mi_store(&b, mi_reg32(MI_PREDICATE_RESULT), result); 814 mi_store(&b, query_mem64(q, offsetof(struct iris_query_snapshots, 815 predicate_result)), result); 816 ice->state.compute_predicate = bo; 817 818 iris_batch_sync_region_end(batch); 819} 820 821static void 822iris_render_condition(struct pipe_context *ctx, 823 struct pipe_query *query, 824 bool condition, 825 enum pipe_render_cond_flag mode) 826{ 827 struct iris_context *ice = (void *) ctx; 828 struct iris_query *q = (void *) query; 829 830 /* The old condition isn't relevant; we'll update it if necessary */ 831 ice->state.compute_predicate = NULL; 832 833 if (!q) { 834 ice->state.predicate = IRIS_PREDICATE_STATE_RENDER; 835 return; 836 } 837 838 iris_check_query_no_flush(ice, q); 839 840 if (q->result || q->ready) { 841 set_predicate_enable(ice, (q->result != 0) ^ condition); 842 } else { 843 if (mode == PIPE_RENDER_COND_NO_WAIT || 844 mode == PIPE_RENDER_COND_BY_REGION_NO_WAIT) { 845 perf_debug(&ice->dbg, "Conditional rendering demoted from " 846 "\"no wait\" to \"wait\"."); 847 } 848 set_predicate_for_result(ice, q, condition); 849 } 850} 851 852void 853genX(init_query)(struct iris_context *ice) 854{ 855 struct pipe_context *ctx = &ice->ctx; 856 857 ctx->create_query = iris_create_query; 858 ctx->create_batch_query = iris_create_batch_query; 859 ctx->destroy_query = iris_destroy_query; 860 ctx->begin_query = iris_begin_query; 861 ctx->end_query = iris_end_query; 862 ctx->get_query_result = iris_get_query_result; 863 ctx->get_query_result_resource = iris_get_query_result_resource; 864 ctx->set_active_query_state = iris_set_active_query_state; 865 ctx->render_condition = iris_render_condition; 866} 867