1/* 2 * Copyright © 2014-2015 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24/** @file vc4_job.c 25 * 26 * Functions for submitting VC4 render jobs to the kernel. 27 */ 28 29#include <xf86drm.h> 30#include "vc4_cl_dump.h" 31#include "vc4_context.h" 32#include "util/hash_table.h" 33 34static void 35vc4_job_free(struct vc4_context *vc4, struct vc4_job *job) 36{ 37 struct vc4_bo **referenced_bos = job->bo_pointers.base; 38 for (int i = 0; i < cl_offset(&job->bo_handles) / 4; i++) { 39 vc4_bo_unreference(&referenced_bos[i]); 40 } 41 42 _mesa_hash_table_remove_key(vc4->jobs, &job->key); 43 44 if (job->color_write) { 45 _mesa_hash_table_remove_key(vc4->write_jobs, 46 job->color_write->texture); 47 pipe_surface_reference(&job->color_write, NULL); 48 } 49 if (job->msaa_color_write) { 50 _mesa_hash_table_remove_key(vc4->write_jobs, 51 job->msaa_color_write->texture); 52 pipe_surface_reference(&job->msaa_color_write, NULL); 53 } 54 if (job->zs_write) { 55 _mesa_hash_table_remove_key(vc4->write_jobs, 56 job->zs_write->texture); 57 pipe_surface_reference(&job->zs_write, NULL); 58 } 59 if (job->msaa_zs_write) { 60 _mesa_hash_table_remove_key(vc4->write_jobs, 61 job->msaa_zs_write->texture); 62 pipe_surface_reference(&job->msaa_zs_write, NULL); 63 } 64 65 pipe_surface_reference(&job->color_read, NULL); 66 pipe_surface_reference(&job->zs_read, NULL); 67 68 if (vc4->job == job) 69 vc4->job = NULL; 70 71 ralloc_free(job); 72} 73 74static struct vc4_job * 75vc4_job_create(struct vc4_context *vc4) 76{ 77 struct vc4_job *job = rzalloc(vc4, struct vc4_job); 78 79 vc4_init_cl(job, &job->bcl); 80 vc4_init_cl(job, &job->shader_rec); 81 vc4_init_cl(job, &job->uniforms); 82 vc4_init_cl(job, &job->bo_handles); 83 vc4_init_cl(job, &job->bo_pointers); 84 85 job->draw_min_x = ~0; 86 job->draw_min_y = ~0; 87 job->draw_max_x = 0; 88 job->draw_max_y = 0; 89 90 job->last_gem_handle_hindex = ~0; 91 92 if (vc4->perfmon) 93 job->perfmon = vc4->perfmon; 94 95 return job; 96} 97 98void 99vc4_flush_jobs_writing_resource(struct vc4_context *vc4, 100 struct pipe_resource *prsc) 101{ 102 struct hash_entry *entry = _mesa_hash_table_search(vc4->write_jobs, 103 prsc); 104 if (entry) { 105 struct vc4_job *job = entry->data; 106 vc4_job_submit(vc4, job); 107 } 108} 109 110void 111vc4_flush_jobs_reading_resource(struct vc4_context *vc4, 112 struct pipe_resource *prsc) 113{ 114 struct vc4_resource *rsc = vc4_resource(prsc); 115 116 vc4_flush_jobs_writing_resource(vc4, prsc); 117 118 hash_table_foreach(vc4->jobs, entry) { 119 struct vc4_job *job = entry->data; 120 121 struct vc4_bo **referenced_bos = job->bo_pointers.base; 122 bool found = false; 123 for (int i = 0; i < cl_offset(&job->bo_handles) / 4; i++) { 124 if (referenced_bos[i] == rsc->bo) { 125 found = true; 126 break; 127 } 128 } 129 if (found) { 130 vc4_job_submit(vc4, job); 131 continue; 132 } 133 134 /* Also check for the Z/color buffers, since the references to 135 * those are only added immediately before submit. 136 */ 137 if (job->color_read && !(job->cleared & PIPE_CLEAR_COLOR)) { 138 struct vc4_resource *ctex = 139 vc4_resource(job->color_read->texture); 140 if (ctex->bo == rsc->bo) { 141 vc4_job_submit(vc4, job); 142 continue; 143 } 144 } 145 146 if (job->zs_read && !(job->cleared & 147 (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) { 148 struct vc4_resource *ztex = 149 vc4_resource(job->zs_read->texture); 150 if (ztex->bo == rsc->bo) { 151 vc4_job_submit(vc4, job); 152 continue; 153 } 154 } 155 } 156} 157 158/** 159 * Returns a vc4_job struture for tracking V3D rendering to a particular FBO. 160 * 161 * If we've already started rendering to this FBO, then return old same job, 162 * otherwise make a new one. If we're beginning rendering to an FBO, make 163 * sure that any previous reads of the FBO (or writes to its color/Z surfaces) 164 * have been flushed. 165 */ 166struct vc4_job * 167vc4_get_job(struct vc4_context *vc4, 168 struct pipe_surface *cbuf, struct pipe_surface *zsbuf) 169{ 170 /* Return the existing job for this FBO if we have one */ 171 struct vc4_job_key local_key = {.cbuf = cbuf, .zsbuf = zsbuf}; 172 struct hash_entry *entry = _mesa_hash_table_search(vc4->jobs, 173 &local_key); 174 if (entry) 175 return entry->data; 176 177 /* Creating a new job. Make sure that any previous jobs reading or 178 * writing these buffers are flushed. 179 */ 180 if (cbuf) 181 vc4_flush_jobs_reading_resource(vc4, cbuf->texture); 182 if (zsbuf) 183 vc4_flush_jobs_reading_resource(vc4, zsbuf->texture); 184 185 struct vc4_job *job = vc4_job_create(vc4); 186 187 if (cbuf) { 188 if (cbuf->texture->nr_samples > 1) { 189 job->msaa = true; 190 pipe_surface_reference(&job->msaa_color_write, cbuf); 191 } else { 192 pipe_surface_reference(&job->color_write, cbuf); 193 } 194 } 195 196 if (zsbuf) { 197 if (zsbuf->texture->nr_samples > 1) { 198 job->msaa = true; 199 pipe_surface_reference(&job->msaa_zs_write, zsbuf); 200 } else { 201 pipe_surface_reference(&job->zs_write, zsbuf); 202 } 203 } 204 205 if (job->msaa) { 206 job->tile_width = 32; 207 job->tile_height = 32; 208 } else { 209 job->tile_width = 64; 210 job->tile_height = 64; 211 } 212 213 if (cbuf) 214 _mesa_hash_table_insert(vc4->write_jobs, cbuf->texture, job); 215 if (zsbuf) 216 _mesa_hash_table_insert(vc4->write_jobs, zsbuf->texture, job); 217 218 job->key.cbuf = cbuf; 219 job->key.zsbuf = zsbuf; 220 _mesa_hash_table_insert(vc4->jobs, &job->key, job); 221 222 return job; 223} 224 225struct vc4_job * 226vc4_get_job_for_fbo(struct vc4_context *vc4) 227{ 228 if (vc4->job) 229 return vc4->job; 230 231 struct pipe_surface *cbuf = vc4->framebuffer.cbufs[0]; 232 struct pipe_surface *zsbuf = vc4->framebuffer.zsbuf; 233 struct vc4_job *job = vc4_get_job(vc4, cbuf, zsbuf); 234 235 /* The dirty flags are tracking what's been updated while vc4->job has 236 * been bound, so set them all to ~0 when switching between jobs. We 237 * also need to reset all state at the start of rendering. 238 */ 239 vc4->dirty = ~0; 240 241 /* Set up the read surfaces in the job. If they aren't actually 242 * getting read (due to a clear starting the frame), job->cleared will 243 * mask out the read. 244 */ 245 pipe_surface_reference(&job->color_read, cbuf); 246 pipe_surface_reference(&job->zs_read, zsbuf); 247 248 /* If we're binding to uninitialized buffers, no need to load their 249 * contents before drawing. 250 */ 251 if (cbuf) { 252 struct vc4_resource *rsc = vc4_resource(cbuf->texture); 253 if (!rsc->writes) 254 job->cleared |= PIPE_CLEAR_COLOR0; 255 } 256 257 if (zsbuf) { 258 struct vc4_resource *rsc = vc4_resource(zsbuf->texture); 259 if (!rsc->writes) 260 job->cleared |= PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL; 261 } 262 263 job->draw_tiles_x = DIV_ROUND_UP(vc4->framebuffer.width, 264 job->tile_width); 265 job->draw_tiles_y = DIV_ROUND_UP(vc4->framebuffer.height, 266 job->tile_height); 267 268 /* Initialize the job with the raster order flags -- each draw will 269 * check that we haven't changed the flags, since that requires a 270 * flush. 271 */ 272 if (vc4->rasterizer) 273 job->flags = vc4->rasterizer->tile_raster_order_flags; 274 275 vc4->job = job; 276 277 return job; 278} 279 280static void 281vc4_submit_setup_rcl_surface(struct vc4_job *job, 282 struct drm_vc4_submit_rcl_surface *submit_surf, 283 struct pipe_surface *psurf, 284 bool is_depth, bool is_write) 285{ 286 struct vc4_surface *surf = vc4_surface(psurf); 287 288 if (!surf) 289 return; 290 291 struct vc4_resource *rsc = vc4_resource(psurf->texture); 292 submit_surf->hindex = vc4_gem_hindex(job, rsc->bo); 293 submit_surf->offset = surf->offset; 294 295 if (psurf->texture->nr_samples <= 1) { 296 if (is_depth) { 297 submit_surf->bits = 298 VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_ZS, 299 VC4_LOADSTORE_TILE_BUFFER_BUFFER); 300 301 } else { 302 submit_surf->bits = 303 VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_COLOR, 304 VC4_LOADSTORE_TILE_BUFFER_BUFFER) | 305 VC4_SET_FIELD(vc4_rt_format_is_565(psurf->format) ? 306 VC4_LOADSTORE_TILE_BUFFER_BGR565 : 307 VC4_LOADSTORE_TILE_BUFFER_RGBA8888, 308 VC4_LOADSTORE_TILE_BUFFER_FORMAT); 309 } 310 submit_surf->bits |= 311 VC4_SET_FIELD(surf->tiling, 312 VC4_LOADSTORE_TILE_BUFFER_TILING); 313 } else { 314 assert(!is_write); 315 submit_surf->flags |= VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES; 316 } 317 318 if (is_write) 319 rsc->writes++; 320} 321 322static void 323vc4_submit_setup_rcl_render_config_surface(struct vc4_job *job, 324 struct drm_vc4_submit_rcl_surface *submit_surf, 325 struct pipe_surface *psurf) 326{ 327 struct vc4_surface *surf = vc4_surface(psurf); 328 329 if (!surf) 330 return; 331 332 struct vc4_resource *rsc = vc4_resource(psurf->texture); 333 submit_surf->hindex = vc4_gem_hindex(job, rsc->bo); 334 submit_surf->offset = surf->offset; 335 336 if (psurf->texture->nr_samples <= 1) { 337 submit_surf->bits = 338 VC4_SET_FIELD(vc4_rt_format_is_565(surf->base.format) ? 339 VC4_RENDER_CONFIG_FORMAT_BGR565 : 340 VC4_RENDER_CONFIG_FORMAT_RGBA8888, 341 VC4_RENDER_CONFIG_FORMAT) | 342 VC4_SET_FIELD(surf->tiling, 343 VC4_RENDER_CONFIG_MEMORY_FORMAT); 344 } 345 346 rsc->writes++; 347} 348 349static void 350vc4_submit_setup_rcl_msaa_surface(struct vc4_job *job, 351 struct drm_vc4_submit_rcl_surface *submit_surf, 352 struct pipe_surface *psurf) 353{ 354 struct vc4_surface *surf = vc4_surface(psurf); 355 356 if (!surf) 357 return; 358 359 struct vc4_resource *rsc = vc4_resource(psurf->texture); 360 submit_surf->hindex = vc4_gem_hindex(job, rsc->bo); 361 submit_surf->offset = surf->offset; 362 submit_surf->bits = 0; 363 rsc->writes++; 364} 365 366/** 367 * Submits the job to the kernel and then reinitializes it. 368 */ 369void 370vc4_job_submit(struct vc4_context *vc4, struct vc4_job *job) 371{ 372 if (!job->needs_flush) 373 goto done; 374 375 /* The RCL setup would choke if the draw bounds cause no drawing, so 376 * just drop the drawing if that's the case. 377 */ 378 if (job->draw_max_x <= job->draw_min_x || 379 job->draw_max_y <= job->draw_min_y) { 380 goto done; 381 } 382 383 if (vc4_debug & VC4_DEBUG_CL) { 384 fprintf(stderr, "BCL:\n"); 385 vc4_dump_cl(job->bcl.base, cl_offset(&job->bcl), false); 386 } 387 388 if (cl_offset(&job->bcl) > 0) { 389 /* Increment the semaphore indicating that binning is done and 390 * unblocking the render thread. Note that this doesn't act 391 * until the FLUSH completes. 392 */ 393 cl_ensure_space(&job->bcl, 8); 394 cl_emit(&job->bcl, INCREMENT_SEMAPHORE, incr); 395 /* The FLUSH caps all of our bin lists with a 396 * VC4_PACKET_RETURN. 397 */ 398 cl_emit(&job->bcl, FLUSH, flush); 399 } 400 struct drm_vc4_submit_cl submit = { 401 .color_read.hindex = ~0, 402 .zs_read.hindex = ~0, 403 .color_write.hindex = ~0, 404 .msaa_color_write.hindex = ~0, 405 .zs_write.hindex = ~0, 406 .msaa_zs_write.hindex = ~0, 407 }; 408 409 cl_ensure_space(&job->bo_handles, 6 * sizeof(uint32_t)); 410 cl_ensure_space(&job->bo_pointers, 6 * sizeof(struct vc4_bo *)); 411 412 if (job->resolve & PIPE_CLEAR_COLOR) { 413 if (!(job->cleared & PIPE_CLEAR_COLOR)) { 414 vc4_submit_setup_rcl_surface(job, &submit.color_read, 415 job->color_read, 416 false, false); 417 } 418 vc4_submit_setup_rcl_render_config_surface(job, 419 &submit.color_write, 420 job->color_write); 421 vc4_submit_setup_rcl_msaa_surface(job, 422 &submit.msaa_color_write, 423 job->msaa_color_write); 424 } 425 if (job->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) { 426 if (!(job->cleared & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) { 427 vc4_submit_setup_rcl_surface(job, &submit.zs_read, 428 job->zs_read, true, false); 429 } 430 vc4_submit_setup_rcl_surface(job, &submit.zs_write, 431 job->zs_write, true, true); 432 vc4_submit_setup_rcl_msaa_surface(job, &submit.msaa_zs_write, 433 job->msaa_zs_write); 434 } 435 436 if (job->msaa) { 437 /* This bit controls how many pixels the general 438 * (i.e. subsampled) loads/stores are iterating over 439 * (multisample loads replicate out to the other samples). 440 */ 441 submit.color_write.bits |= VC4_RENDER_CONFIG_MS_MODE_4X; 442 /* Controls whether color_write's 443 * VC4_PACKET_STORE_MS_TILE_BUFFER does 4x decimation 444 */ 445 submit.color_write.bits |= VC4_RENDER_CONFIG_DECIMATE_MODE_4X; 446 } 447 448 submit.bo_handles = (uintptr_t)job->bo_handles.base; 449 submit.bo_handle_count = cl_offset(&job->bo_handles) / 4; 450 submit.bin_cl = (uintptr_t)job->bcl.base; 451 submit.bin_cl_size = cl_offset(&job->bcl); 452 submit.shader_rec = (uintptr_t)job->shader_rec.base; 453 submit.shader_rec_size = cl_offset(&job->shader_rec); 454 submit.shader_rec_count = job->shader_rec_count; 455 submit.uniforms = (uintptr_t)job->uniforms.base; 456 submit.uniforms_size = cl_offset(&job->uniforms); 457 if (job->perfmon) 458 submit.perfmonid = job->perfmon->id; 459 460 assert(job->draw_min_x != ~0 && job->draw_min_y != ~0); 461 submit.min_x_tile = job->draw_min_x / job->tile_width; 462 submit.min_y_tile = job->draw_min_y / job->tile_height; 463 submit.max_x_tile = (job->draw_max_x - 1) / job->tile_width; 464 submit.max_y_tile = (job->draw_max_y - 1) / job->tile_height; 465 submit.width = job->draw_width; 466 submit.height = job->draw_height; 467 if (job->cleared) { 468 submit.flags |= VC4_SUBMIT_CL_USE_CLEAR_COLOR; 469 submit.clear_color[0] = job->clear_color[0]; 470 submit.clear_color[1] = job->clear_color[1]; 471 submit.clear_z = job->clear_depth; 472 submit.clear_s = job->clear_stencil; 473 } 474 submit.flags |= job->flags; 475 476 if (vc4->screen->has_syncobj) { 477 submit.out_sync = vc4->job_syncobj; 478 479 if (vc4->in_fence_fd >= 0) { 480 /* This replaces the fence in the syncobj. */ 481 drmSyncobjImportSyncFile(vc4->fd, vc4->in_syncobj, 482 vc4->in_fence_fd); 483 submit.in_sync = vc4->in_syncobj; 484 close(vc4->in_fence_fd); 485 vc4->in_fence_fd = -1; 486 } 487 } 488 489 if (!(vc4_debug & VC4_DEBUG_NORAST)) { 490 int ret; 491 492 ret = vc4_ioctl(vc4->fd, DRM_IOCTL_VC4_SUBMIT_CL, &submit); 493 static bool warned = false; 494 if (ret && !warned) { 495 fprintf(stderr, "Draw call returned %s. " 496 "Expect corruption.\n", strerror(errno)); 497 warned = true; 498 } else if (!ret) { 499 vc4->last_emit_seqno = submit.seqno; 500 if (job->perfmon) 501 job->perfmon->last_seqno = submit.seqno; 502 } 503 } 504 505 if (vc4->last_emit_seqno - vc4->screen->finished_seqno > 5) { 506 if (!vc4_wait_seqno(vc4->screen, 507 vc4->last_emit_seqno - 5, 508 PIPE_TIMEOUT_INFINITE, 509 "job throttling")) { 510 fprintf(stderr, "Job throttling failed\n"); 511 } 512 } 513 514 if (vc4_debug & VC4_DEBUG_ALWAYS_SYNC) { 515 if (!vc4_wait_seqno(vc4->screen, vc4->last_emit_seqno, 516 PIPE_TIMEOUT_INFINITE, "sync")) { 517 fprintf(stderr, "Wait failed.\n"); 518 abort(); 519 } 520 } 521 522done: 523 vc4_job_free(vc4, job); 524} 525 526static bool 527vc4_job_compare(const void *a, const void *b) 528{ 529 return memcmp(a, b, sizeof(struct vc4_job_key)) == 0; 530} 531 532static uint32_t 533vc4_job_hash(const void *key) 534{ 535 return _mesa_hash_data(key, sizeof(struct vc4_job_key)); 536} 537 538int 539vc4_job_init(struct vc4_context *vc4) 540{ 541 vc4->jobs = _mesa_hash_table_create(vc4, 542 vc4_job_hash, 543 vc4_job_compare); 544 vc4->write_jobs = _mesa_hash_table_create(vc4, 545 _mesa_hash_pointer, 546 _mesa_key_pointer_equal); 547 548 if (vc4->screen->has_syncobj) { 549 /* Create the syncobj as signaled since with no job executed 550 * there is nothing to wait on. 551 */ 552 int ret = drmSyncobjCreate(vc4->fd, 553 DRM_SYNCOBJ_CREATE_SIGNALED, 554 &vc4->job_syncobj); 555 if (ret) { 556 /* If the screen indicated syncobj support, we should 557 * be able to create a signaled syncobj. 558 * At this point it is too late to pretend the screen 559 * has no syncobj support. 560 */ 561 return ret; 562 } 563 } 564 565 return 0; 566} 567 568