1/* 2 * Copyright © 2014-2017 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24/** @file v3d_job.c 25 * 26 * Functions for submitting V3D render jobs to the kernel. 27 */ 28 29#include <xf86drm.h> 30#include "v3d_context.h" 31/* The OQ/semaphore packets are the same across V3D versions. */ 32#define V3D_VERSION 33 33#include "broadcom/cle/v3dx_pack.h" 34#include "broadcom/common/v3d_macros.h" 35#include "util/hash_table.h" 36#include "util/ralloc.h" 37#include "util/set.h" 38#include "broadcom/clif/clif_dump.h" 39 40void 41v3d_job_free(struct v3d_context *v3d, struct v3d_job *job) 42{ 43 set_foreach(job->bos, entry) { 44 struct v3d_bo *bo = (struct v3d_bo *)entry->key; 45 v3d_bo_unreference(&bo); 46 } 47 48 _mesa_hash_table_remove_key(v3d->jobs, &job->key); 49 50 if (job->write_prscs) { 51 set_foreach(job->write_prscs, entry) { 52 const struct pipe_resource *prsc = entry->key; 53 54 _mesa_hash_table_remove_key(v3d->write_jobs, prsc); 55 } 56 } 57 58 for (int i = 0; i < job->nr_cbufs; i++) { 59 if (job->cbufs[i]) { 60 _mesa_hash_table_remove_key(v3d->write_jobs, 61 job->cbufs[i]->texture); 62 pipe_surface_reference(&job->cbufs[i], NULL); 63 } 64 } 65 if (job->zsbuf) { 66 struct v3d_resource *rsc = v3d_resource(job->zsbuf->texture); 67 if (rsc->separate_stencil) 68 _mesa_hash_table_remove_key(v3d->write_jobs, 69 &rsc->separate_stencil->base); 70 71 _mesa_hash_table_remove_key(v3d->write_jobs, 72 job->zsbuf->texture); 73 pipe_surface_reference(&job->zsbuf, NULL); 74 } 75 if (job->bbuf) 76 pipe_surface_reference(&job->bbuf, NULL); 77 78 if (v3d->job == job) 79 v3d->job = NULL; 80 81 v3d_destroy_cl(&job->bcl); 82 v3d_destroy_cl(&job->rcl); 83 v3d_destroy_cl(&job->indirect); 84 v3d_bo_unreference(&job->tile_alloc); 85 v3d_bo_unreference(&job->tile_state); 86 87 ralloc_free(job); 88} 89 90struct v3d_job * 91v3d_job_create(struct v3d_context *v3d) 92{ 93 struct v3d_job *job = rzalloc(v3d, struct v3d_job); 94 95 job->v3d = v3d; 96 97 v3d_init_cl(job, &job->bcl); 98 v3d_init_cl(job, &job->rcl); 99 v3d_init_cl(job, &job->indirect); 100 101 job->draw_min_x = ~0; 102 job->draw_min_y = ~0; 103 job->draw_max_x = 0; 104 job->draw_max_y = 0; 105 106 job->bos = _mesa_set_create(job, 107 _mesa_hash_pointer, 108 _mesa_key_pointer_equal); 109 return job; 110} 111 112void 113v3d_job_add_bo(struct v3d_job *job, struct v3d_bo *bo) 114{ 115 if (!bo) 116 return; 117 118 if (_mesa_set_search(job->bos, bo)) 119 return; 120 121 v3d_bo_reference(bo); 122 _mesa_set_add(job->bos, bo); 123 job->referenced_size += bo->size; 124 125 uint32_t *bo_handles = (void *)(uintptr_t)job->submit.bo_handles; 126 127 if (job->submit.bo_handle_count >= job->bo_handles_size) { 128 job->bo_handles_size = MAX2(4, job->bo_handles_size * 2); 129 bo_handles = reralloc(job, bo_handles, 130 uint32_t, job->bo_handles_size); 131 job->submit.bo_handles = (uintptr_t)(void *)bo_handles; 132 } 133 bo_handles[job->submit.bo_handle_count++] = bo->handle; 134} 135 136void 137v3d_job_add_write_resource(struct v3d_job *job, struct pipe_resource *prsc) 138{ 139 struct v3d_context *v3d = job->v3d; 140 141 if (!job->write_prscs) { 142 job->write_prscs = _mesa_set_create(job, 143 _mesa_hash_pointer, 144 _mesa_key_pointer_equal); 145 } 146 147 _mesa_set_add(job->write_prscs, prsc); 148 _mesa_hash_table_insert(v3d->write_jobs, prsc, job); 149} 150 151void 152v3d_flush_jobs_using_bo(struct v3d_context *v3d, struct v3d_bo *bo) 153{ 154 hash_table_foreach(v3d->jobs, entry) { 155 struct v3d_job *job = entry->data; 156 157 if (_mesa_set_search(job->bos, bo)) 158 v3d_job_submit(v3d, job); 159 } 160} 161 162void 163v3d_job_add_tf_write_resource(struct v3d_job *job, struct pipe_resource *prsc) 164{ 165 v3d_job_add_write_resource(job, prsc); 166 167 if (!job->tf_write_prscs) 168 job->tf_write_prscs = _mesa_pointer_set_create(job); 169 170 _mesa_set_add(job->tf_write_prscs, prsc); 171} 172 173static bool 174v3d_job_writes_resource_from_tf(struct v3d_job *job, 175 struct pipe_resource *prsc) 176{ 177 if (!job->tf_enabled) 178 return false; 179 180 if (!job->tf_write_prscs) 181 return false; 182 183 return _mesa_set_search(job->tf_write_prscs, prsc) != NULL; 184} 185 186void 187v3d_flush_jobs_writing_resource(struct v3d_context *v3d, 188 struct pipe_resource *prsc, 189 enum v3d_flush_cond flush_cond, 190 bool is_compute_pipeline) 191{ 192 struct hash_entry *entry = _mesa_hash_table_search(v3d->write_jobs, 193 prsc); 194 struct v3d_resource *rsc = v3d_resource(prsc); 195 196 /* We need to sync if graphics pipeline reads a resource written 197 * by the compute pipeline. The same would be needed for the case of 198 * graphics-compute dependency but nowadays all compute jobs 199 * are serialized with the previous submitted job. 200 */ 201 if (!is_compute_pipeline && rsc->bo != NULL && rsc->compute_written) { 202 v3d->sync_on_last_compute_job = true; 203 rsc->compute_written = false; 204 } 205 206 if (!entry) 207 return; 208 209 struct v3d_job *job = entry->data; 210 211 bool needs_flush; 212 switch (flush_cond) { 213 case V3D_FLUSH_ALWAYS: 214 needs_flush = true; 215 break; 216 case V3D_FLUSH_NOT_CURRENT_JOB: 217 needs_flush = !v3d->job || v3d->job != job; 218 break; 219 case V3D_FLUSH_DEFAULT: 220 default: 221 /* For writes from TF in the same job we use the "Wait for TF" 222 * feature provided by the hardware so we don't want to flush. 223 * The exception to this is when the caller is about to map the 224 * resource since in that case we don't have a 'Wait for TF' 225 * command the in command stream. In this scenario the caller 226 * is expected to set 'always_flush' to True. 227 */ 228 needs_flush = !v3d_job_writes_resource_from_tf(job, prsc); 229 } 230 231 if (needs_flush) 232 v3d_job_submit(v3d, job); 233} 234 235void 236v3d_flush_jobs_reading_resource(struct v3d_context *v3d, 237 struct pipe_resource *prsc, 238 enum v3d_flush_cond flush_cond, 239 bool is_compute_pipeline) 240{ 241 struct v3d_resource *rsc = v3d_resource(prsc); 242 243 /* We only need to force the flush on TF writes, which is the only 244 * case where we might skip the flush to use the 'Wait for TF' 245 * command. Here we are flushing for a read, which means that the 246 * caller intends to write to the resource, so we don't care if 247 * there was a previous TF write to it. 248 */ 249 v3d_flush_jobs_writing_resource(v3d, prsc, flush_cond, 250 is_compute_pipeline); 251 252 hash_table_foreach(v3d->jobs, entry) { 253 struct v3d_job *job = entry->data; 254 255 if (!_mesa_set_search(job->bos, rsc->bo)) 256 continue; 257 258 bool needs_flush; 259 switch (flush_cond) { 260 case V3D_FLUSH_NOT_CURRENT_JOB: 261 needs_flush = !v3d->job || v3d->job != job; 262 break; 263 case V3D_FLUSH_ALWAYS: 264 case V3D_FLUSH_DEFAULT: 265 default: 266 needs_flush = true; 267 } 268 269 if (needs_flush) 270 v3d_job_submit(v3d, job); 271 272 /* Reminder: v3d->jobs is safe to keep iterating even 273 * after deletion of an entry. 274 */ 275 continue; 276 } 277} 278 279/** 280 * Returns a v3d_job struture for tracking V3D rendering to a particular FBO. 281 * 282 * If we've already started rendering to this FBO, then return the same job, 283 * otherwise make a new one. If we're beginning rendering to an FBO, make 284 * sure that any previous reads of the FBO (or writes to its color/Z surfaces) 285 * have been flushed. 286 */ 287struct v3d_job * 288v3d_get_job(struct v3d_context *v3d, 289 uint32_t nr_cbufs, 290 struct pipe_surface **cbufs, 291 struct pipe_surface *zsbuf, 292 struct pipe_surface *bbuf) 293{ 294 /* Return the existing job for this FBO if we have one */ 295 struct v3d_job_key local_key = { 296 .cbufs = { 297 cbufs[0], 298 cbufs[1], 299 cbufs[2], 300 cbufs[3], 301 }, 302 .zsbuf = zsbuf, 303 .bbuf = bbuf, 304 }; 305 struct hash_entry *entry = _mesa_hash_table_search(v3d->jobs, 306 &local_key); 307 if (entry) 308 return entry->data; 309 310 /* Creating a new job. Make sure that any previous jobs reading or 311 * writing these buffers are flushed. 312 */ 313 struct v3d_job *job = v3d_job_create(v3d); 314 job->nr_cbufs = nr_cbufs; 315 316 for (int i = 0; i < job->nr_cbufs; i++) { 317 if (cbufs[i]) { 318 v3d_flush_jobs_reading_resource(v3d, cbufs[i]->texture, 319 V3D_FLUSH_DEFAULT, 320 false); 321 pipe_surface_reference(&job->cbufs[i], cbufs[i]); 322 323 if (cbufs[i]->texture->nr_samples > 1) 324 job->msaa = true; 325 } 326 } 327 if (zsbuf) { 328 v3d_flush_jobs_reading_resource(v3d, zsbuf->texture, 329 V3D_FLUSH_DEFAULT, 330 false); 331 pipe_surface_reference(&job->zsbuf, zsbuf); 332 if (zsbuf->texture->nr_samples > 1) 333 job->msaa = true; 334 } 335 if (bbuf) { 336 pipe_surface_reference(&job->bbuf, bbuf); 337 if (bbuf->texture->nr_samples > 1) 338 job->msaa = true; 339 } 340 341 for (int i = 0; i < job->nr_cbufs; i++) { 342 if (cbufs[i]) 343 _mesa_hash_table_insert(v3d->write_jobs, 344 cbufs[i]->texture, job); 345 } 346 if (zsbuf) { 347 _mesa_hash_table_insert(v3d->write_jobs, zsbuf->texture, job); 348 349 struct v3d_resource *rsc = v3d_resource(zsbuf->texture); 350 if (rsc->separate_stencil) { 351 v3d_flush_jobs_reading_resource(v3d, 352 &rsc->separate_stencil->base, 353 V3D_FLUSH_DEFAULT, 354 false); 355 _mesa_hash_table_insert(v3d->write_jobs, 356 &rsc->separate_stencil->base, 357 job); 358 } 359 } 360 361 job->double_buffer = 362 unlikely(V3D_DEBUG & V3D_DEBUG_DOUBLE_BUFFER) && !job->msaa; 363 364 memcpy(&job->key, &local_key, sizeof(local_key)); 365 _mesa_hash_table_insert(v3d->jobs, &job->key, job); 366 367 return job; 368} 369 370struct v3d_job * 371v3d_get_job_for_fbo(struct v3d_context *v3d) 372{ 373 if (v3d->job) 374 return v3d->job; 375 376 uint32_t nr_cbufs = v3d->framebuffer.nr_cbufs; 377 struct pipe_surface **cbufs = v3d->framebuffer.cbufs; 378 struct pipe_surface *zsbuf = v3d->framebuffer.zsbuf; 379 struct v3d_job *job = v3d_get_job(v3d, nr_cbufs, cbufs, zsbuf, NULL); 380 381 if (v3d->framebuffer.samples >= 1) { 382 job->msaa = true; 383 job->double_buffer = false; 384 } 385 386 v3d_get_tile_buffer_size(job->msaa, job->double_buffer, 387 job->nr_cbufs, job->cbufs, job->bbuf, 388 &job->tile_width, &job->tile_height, 389 &job->internal_bpp); 390 391 /* The dirty flags are tracking what's been updated while v3d->job has 392 * been bound, so set them all to ~0 when switching between jobs. We 393 * also need to reset all state at the start of rendering. 394 */ 395 v3d->dirty = ~0; 396 397 /* If we're binding to uninitialized buffers, no need to load their 398 * contents before drawing. 399 */ 400 for (int i = 0; i < nr_cbufs; i++) { 401 if (cbufs[i]) { 402 struct v3d_resource *rsc = v3d_resource(cbufs[i]->texture); 403 if (!rsc->writes) 404 job->clear |= PIPE_CLEAR_COLOR0 << i; 405 } 406 } 407 408 if (zsbuf) { 409 struct v3d_resource *rsc = v3d_resource(zsbuf->texture); 410 if (!rsc->writes) 411 job->clear |= PIPE_CLEAR_DEPTH; 412 413 if (rsc->separate_stencil) 414 rsc = rsc->separate_stencil; 415 416 if (!rsc->writes) 417 job->clear |= PIPE_CLEAR_STENCIL; 418 } 419 420 job->draw_tiles_x = DIV_ROUND_UP(v3d->framebuffer.width, 421 job->tile_width); 422 job->draw_tiles_y = DIV_ROUND_UP(v3d->framebuffer.height, 423 job->tile_height); 424 425 v3d->job = job; 426 427 return job; 428} 429 430static void 431v3d_clif_dump(struct v3d_context *v3d, struct v3d_job *job) 432{ 433 if (!(unlikely(V3D_DEBUG & (V3D_DEBUG_CL | 434 V3D_DEBUG_CL_NO_BIN | 435 V3D_DEBUG_CLIF)))) 436 return; 437 438 struct clif_dump *clif = clif_dump_init(&v3d->screen->devinfo, 439 stderr, 440 V3D_DEBUG & (V3D_DEBUG_CL | 441 V3D_DEBUG_CL_NO_BIN), 442 V3D_DEBUG & V3D_DEBUG_CL_NO_BIN); 443 444 set_foreach(job->bos, entry) { 445 struct v3d_bo *bo = (void *)entry->key; 446 char *name = ralloc_asprintf(NULL, "%s_0x%x", 447 bo->name, bo->offset); 448 449 v3d_bo_map(bo); 450 clif_dump_add_bo(clif, name, bo->offset, bo->size, bo->map); 451 452 ralloc_free(name); 453 } 454 455 clif_dump(clif, &job->submit); 456 457 clif_dump_destroy(clif); 458} 459 460static void 461v3d_read_and_accumulate_primitive_counters(struct v3d_context *v3d) 462{ 463 assert(v3d->prim_counts); 464 465 perf_debug("stalling on TF counts readback\n"); 466 struct v3d_resource *rsc = v3d_resource(v3d->prim_counts); 467 if (v3d_bo_wait(rsc->bo, PIPE_TIMEOUT_INFINITE, "prim-counts")) { 468 uint32_t *map = v3d_bo_map(rsc->bo) + v3d->prim_counts_offset; 469 v3d->tf_prims_generated += map[V3D_PRIM_COUNTS_TF_WRITTEN]; 470 /* When we only have a vertex shader we determine the primitive 471 * count in the CPU so don't update it here again. 472 */ 473 if (v3d->prog.gs) 474 v3d->prims_generated += map[V3D_PRIM_COUNTS_WRITTEN]; 475 } 476} 477 478/** 479 * Submits the job to the kernel and then reinitializes it. 480 */ 481void 482v3d_job_submit(struct v3d_context *v3d, struct v3d_job *job) 483{ 484 struct v3d_screen *screen = v3d->screen; 485 486 if (!job->needs_flush) 487 goto done; 488 489 /* The GL_PRIMITIVES_GENERATED query is included with 490 * OES_geometry_shader. 491 */ 492 job->needs_primitives_generated = 493 v3d->n_primitives_generated_queries_in_flight > 0 && 494 v3d->prog.gs; 495 496 if (job->needs_primitives_generated) 497 v3d_ensure_prim_counts_allocated(v3d); 498 499 if (screen->devinfo.ver >= 41) 500 v3d41_emit_rcl(job); 501 else 502 v3d33_emit_rcl(job); 503 504 if (cl_offset(&job->bcl) > 0) { 505 if (screen->devinfo.ver >= 41) 506 v3d41_bcl_epilogue(v3d, job); 507 else 508 v3d33_bcl_epilogue(v3d, job); 509 } 510 511 /* While the RCL will implicitly depend on the last RCL to have 512 * finished, we also need to block on any previous TFU job we may have 513 * dispatched. 514 */ 515 job->submit.in_sync_rcl = v3d->out_sync; 516 517 /* Update the sync object for the last rendering by our context. */ 518 job->submit.out_sync = v3d->out_sync; 519 520 job->submit.bcl_end = job->bcl.bo->offset + cl_offset(&job->bcl); 521 job->submit.rcl_end = job->rcl.bo->offset + cl_offset(&job->rcl); 522 523 if (v3d->active_perfmon) { 524 assert(screen->has_perfmon); 525 job->submit.perfmon_id = v3d->active_perfmon->kperfmon_id; 526 } 527 528 /* If we are submitting a job with a different perfmon, we need to 529 * ensure the previous one fully finishes before starting this; 530 * otherwise it would wrongly mix counter results. 531 */ 532 if (v3d->active_perfmon != v3d->last_perfmon) { 533 v3d->last_perfmon = v3d->active_perfmon; 534 job->submit.in_sync_bcl = v3d->out_sync; 535 } 536 537 job->submit.flags = 0; 538 if (job->tmu_dirty_rcl && screen->has_cache_flush) 539 job->submit.flags |= DRM_V3D_SUBMIT_CL_FLUSH_CACHE; 540 541 /* On V3D 4.1, the tile alloc/state setup moved to register writes 542 * instead of binner packets. 543 */ 544 if (screen->devinfo.ver >= 41) { 545 v3d_job_add_bo(job, job->tile_alloc); 546 job->submit.qma = job->tile_alloc->offset; 547 job->submit.qms = job->tile_alloc->size; 548 549 v3d_job_add_bo(job, job->tile_state); 550 job->submit.qts = job->tile_state->offset; 551 } 552 553 v3d_clif_dump(v3d, job); 554 555 if (!(unlikely(V3D_DEBUG & V3D_DEBUG_NORAST))) { 556 int ret; 557 558 ret = v3d_ioctl(v3d->fd, DRM_IOCTL_V3D_SUBMIT_CL, &job->submit); 559 static bool warned = false; 560 if (ret && !warned) { 561 fprintf(stderr, "Draw call returned %s. " 562 "Expect corruption.\n", strerror(errno)); 563 warned = true; 564 } else if (!ret) { 565 if (v3d->active_perfmon) 566 v3d->active_perfmon->job_submitted = true; 567 } 568 569 /* If we are submitting a job in the middle of transform 570 * feedback or there is a primitives generated query with a 571 * geometry shader then we need to read the primitive counts 572 * and accumulate them, otherwise they will be reset at the 573 * start of the next draw when we emit the Tile Binning Mode 574 * Configuration packet. 575 * 576 * If the job doesn't have any TF draw calls, then we know 577 * the primitive count must be zero and we can skip stalling 578 * for this. This also fixes a problem because it seems that 579 * in this scenario the counters are not reset with the Tile 580 * Binning Mode Configuration packet, which would translate 581 * to us reading an obsolete (possibly non-zero) value from 582 * the GPU counters. 583 */ 584 if (job->needs_primitives_generated || 585 (v3d->streamout.num_targets && 586 job->tf_draw_calls_queued > 0)) 587 v3d_read_and_accumulate_primitive_counters(v3d); 588 } 589 590done: 591 v3d_job_free(v3d, job); 592} 593 594static bool 595v3d_job_compare(const void *a, const void *b) 596{ 597 return memcmp(a, b, sizeof(struct v3d_job_key)) == 0; 598} 599 600static uint32_t 601v3d_job_hash(const void *key) 602{ 603 return _mesa_hash_data(key, sizeof(struct v3d_job_key)); 604} 605 606void 607v3d_job_init(struct v3d_context *v3d) 608{ 609 v3d->jobs = _mesa_hash_table_create(v3d, 610 v3d_job_hash, 611 v3d_job_compare); 612 v3d->write_jobs = _mesa_hash_table_create(v3d, 613 _mesa_hash_pointer, 614 _mesa_key_pointer_equal); 615} 616 617