1/* 2 * Copyright (C) 2017-2019 Lima Project 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24#include <stdlib.h> 25#include <string.h> 26 27#include "xf86drm.h" 28#include "drm-uapi/lima_drm.h" 29 30#include "util/u_math.h" 31#include "util/ralloc.h" 32#include "util/os_time.h" 33#include "util/hash_table.h" 34#include "util/format/u_format.h" 35#include "util/u_upload_mgr.h" 36#include "util/u_inlines.h" 37#include "util/u_framebuffer.h" 38 39#include "lima_screen.h" 40#include "lima_context.h" 41#include "lima_job.h" 42#include "lima_bo.h" 43#include "lima_util.h" 44#include "lima_format.h" 45#include "lima_resource.h" 46#include "lima_texture.h" 47#include "lima_fence.h" 48#include "lima_gpu.h" 49#include "lima_blit.h" 50 51#define VOID2U64(x) ((uint64_t)(unsigned long)(x)) 52 53static void 54lima_get_fb_info(struct lima_job *job) 55{ 56 struct lima_context *ctx = job->ctx; 57 struct lima_job_fb_info *fb = &job->fb; 58 struct lima_surface *surf = lima_surface(job->key.cbuf); 59 60 if (!surf) 61 surf = lima_surface(job->key.zsbuf); 62 63 if (!surf) { 64 /* We don't have neither cbuf nor zsbuf, use dimensions from ctx */ 65 fb->width = ctx->framebuffer.base.width; 66 fb->height = ctx->framebuffer.base.height; 67 } else { 68 fb->width = surf->base.width; 69 fb->height = surf->base.height; 70 } 71 72 int width = align(fb->width, 16) >> 4; 73 int height = align(fb->height, 16) >> 4; 74 75 struct lima_screen *screen = lima_screen(ctx->base.screen); 76 77 fb->tiled_w = width; 78 fb->tiled_h = height; 79 80 fb->shift_h = 0; 81 fb->shift_w = 0; 82 83 int limit = screen->plb_max_blk; 84 while ((width * height) > limit) { 85 if (width >= height) { 86 width = (width + 1) >> 1; 87 fb->shift_w++; 88 } else { 89 height = (height + 1) >> 1; 90 fb->shift_h++; 91 } 92 } 93 94 fb->block_w = width; 95 fb->block_h = height; 96 97 fb->shift_min = MIN3(fb->shift_w, fb->shift_h, 2); 98} 99 100static struct lima_job * 101lima_job_create(struct lima_context *ctx, 102 struct pipe_surface *cbuf, 103 struct pipe_surface *zsbuf) 104{ 105 struct lima_job *s; 106 107 s = rzalloc(ctx, struct lima_job); 108 if (!s) 109 return NULL; 110 111 s->fd = lima_screen(ctx->base.screen)->fd; 112 s->ctx = ctx; 113 114 s->damage_rect.minx = s->damage_rect.miny = 0xffff; 115 s->damage_rect.maxx = s->damage_rect.maxy = 0; 116 s->draws = 0; 117 118 s->clear.depth = 0x00ffffff; 119 120 for (int i = 0; i < 2; i++) { 121 util_dynarray_init(s->gem_bos + i, s); 122 util_dynarray_init(s->bos + i, s); 123 } 124 125 util_dynarray_init(&s->vs_cmd_array, s); 126 util_dynarray_init(&s->plbu_cmd_array, s); 127 util_dynarray_init(&s->plbu_cmd_head, s); 128 129 pipe_surface_reference(&s->key.cbuf, cbuf); 130 pipe_surface_reference(&s->key.zsbuf, zsbuf); 131 132 lima_get_fb_info(s); 133 134 s->dump = lima_dump_create(); 135 136 return s; 137} 138 139static void 140lima_job_free(struct lima_job *job) 141{ 142 struct lima_context *ctx = job->ctx; 143 144 _mesa_hash_table_remove_key(ctx->jobs, &job->key); 145 146 if (job->key.cbuf && (job->resolve & PIPE_CLEAR_COLOR0)) 147 _mesa_hash_table_remove_key(ctx->write_jobs, job->key.cbuf->texture); 148 if (job->key.zsbuf && (job->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) 149 _mesa_hash_table_remove_key(ctx->write_jobs, job->key.zsbuf->texture); 150 151 pipe_surface_reference(&job->key.cbuf, NULL); 152 pipe_surface_reference(&job->key.zsbuf, NULL); 153 154 lima_dump_free(job->dump); 155 job->dump = NULL; 156 157 /* TODO: do we need a cache for job? */ 158 ralloc_free(job); 159} 160 161struct lima_job * 162lima_job_get_with_fb(struct lima_context *ctx, 163 struct pipe_surface *cbuf, 164 struct pipe_surface *zsbuf) 165{ 166 struct lima_job_key local_key = { 167 .cbuf = cbuf, 168 .zsbuf = zsbuf, 169 }; 170 171 struct hash_entry *entry = _mesa_hash_table_search(ctx->jobs, &local_key); 172 if (entry) 173 return entry->data; 174 175 struct lima_job *job = lima_job_create(ctx, cbuf, zsbuf); 176 if (!job) 177 return NULL; 178 179 _mesa_hash_table_insert(ctx->jobs, &job->key, job); 180 181 return job; 182} 183 184static struct lima_job * 185_lima_job_get(struct lima_context *ctx) 186{ 187 struct lima_context_framebuffer *fb = &ctx->framebuffer; 188 189 return lima_job_get_with_fb(ctx, fb->base.cbufs[0], fb->base.zsbuf); 190} 191 192/* 193 * Note: this function can only be called in draw code path, 194 * must not exist in flush code path. 195 */ 196struct lima_job * 197lima_job_get(struct lima_context *ctx) 198{ 199 if (ctx->job) 200 return ctx->job; 201 202 ctx->job = _lima_job_get(ctx); 203 return ctx->job; 204} 205 206bool lima_job_add_bo(struct lima_job *job, int pipe, 207 struct lima_bo *bo, uint32_t flags) 208{ 209 util_dynarray_foreach(job->gem_bos + pipe, struct drm_lima_gem_submit_bo, gem_bo) { 210 if (bo->handle == gem_bo->handle) { 211 gem_bo->flags |= flags; 212 return true; 213 } 214 } 215 216 struct drm_lima_gem_submit_bo *job_bo = 217 util_dynarray_grow(job->gem_bos + pipe, struct drm_lima_gem_submit_bo, 1); 218 job_bo->handle = bo->handle; 219 job_bo->flags = flags; 220 221 struct lima_bo **jbo = util_dynarray_grow(job->bos + pipe, struct lima_bo *, 1); 222 *jbo = bo; 223 224 /* prevent bo from being freed when job start */ 225 lima_bo_reference(bo); 226 227 return true; 228} 229 230static bool 231lima_job_start(struct lima_job *job, int pipe, void *frame, uint32_t size) 232{ 233 struct lima_context *ctx = job->ctx; 234 struct drm_lima_gem_submit req = { 235 .ctx = ctx->id, 236 .pipe = pipe, 237 .nr_bos = job->gem_bos[pipe].size / sizeof(struct drm_lima_gem_submit_bo), 238 .bos = VOID2U64(util_dynarray_begin(job->gem_bos + pipe)), 239 .frame = VOID2U64(frame), 240 .frame_size = size, 241 .out_sync = ctx->out_sync[pipe], 242 }; 243 244 if (ctx->in_sync_fd >= 0) { 245 int err = drmSyncobjImportSyncFile(job->fd, ctx->in_sync[pipe], 246 ctx->in_sync_fd); 247 if (err) 248 return false; 249 250 req.in_sync[0] = ctx->in_sync[pipe]; 251 close(ctx->in_sync_fd); 252 ctx->in_sync_fd = -1; 253 } 254 255 bool ret = drmIoctl(job->fd, DRM_IOCTL_LIMA_GEM_SUBMIT, &req) == 0; 256 257 util_dynarray_foreach(job->bos + pipe, struct lima_bo *, bo) { 258 lima_bo_unreference(*bo); 259 } 260 261 return ret; 262} 263 264static bool 265lima_job_wait(struct lima_job *job, int pipe, uint64_t timeout_ns) 266{ 267 int64_t abs_timeout = os_time_get_absolute_timeout(timeout_ns); 268 if (abs_timeout == OS_TIMEOUT_INFINITE) 269 abs_timeout = INT64_MAX; 270 271 struct lima_context *ctx = job->ctx; 272 return !drmSyncobjWait(job->fd, ctx->out_sync + pipe, 1, abs_timeout, 0, NULL); 273} 274 275static bool 276lima_job_has_bo(struct lima_job *job, struct lima_bo *bo, bool all) 277{ 278 for (int i = 0; i < 2; i++) { 279 util_dynarray_foreach(job->gem_bos + i, struct drm_lima_gem_submit_bo, gem_bo) { 280 if (bo->handle == gem_bo->handle) { 281 if (all || gem_bo->flags & LIMA_SUBMIT_BO_WRITE) 282 return true; 283 else 284 break; 285 } 286 } 287 } 288 289 return false; 290} 291 292void * 293lima_job_create_stream_bo(struct lima_job *job, int pipe, 294 unsigned size, uint32_t *va) 295{ 296 struct lima_context *ctx = job->ctx; 297 298 void *cpu; 299 unsigned offset; 300 struct pipe_resource *pres = NULL; 301 u_upload_alloc(ctx->uploader, 0, size, 0x40, &offset, &pres, &cpu); 302 303 struct lima_resource *res = lima_resource(pres); 304 *va = res->bo->va + offset; 305 306 lima_job_add_bo(job, pipe, res->bo, LIMA_SUBMIT_BO_READ); 307 308 pipe_resource_reference(&pres, NULL); 309 310 return cpu; 311} 312 313static inline struct lima_damage_region * 314lima_job_get_damage(struct lima_job *job) 315{ 316 if (!(job->key.cbuf && (job->resolve & PIPE_CLEAR_COLOR0))) 317 return NULL; 318 319 struct lima_surface *surf = lima_surface(job->key.cbuf); 320 struct lima_resource *res = lima_resource(surf->base.texture); 321 return &res->damage; 322} 323 324static bool 325lima_fb_cbuf_needs_reload(struct lima_job *job) 326{ 327 if (!job->key.cbuf) 328 return false; 329 330 struct lima_surface *surf = lima_surface(job->key.cbuf); 331 struct lima_resource *res = lima_resource(surf->base.texture); 332 if (res->damage.region) { 333 /* for EGL_KHR_partial_update, when EGL_EXT_buffer_age is enabled, 334 * we need to reload damage region, otherwise just want to reload 335 * the region not aligned to tile boundary */ 336 //if (!res->damage.aligned) 337 // return true; 338 return true; 339 } 340 else if (surf->reload & PIPE_CLEAR_COLOR0) 341 return true; 342 343 return false; 344} 345 346static bool 347lima_fb_zsbuf_needs_reload(struct lima_job *job) 348{ 349 if (!job->key.zsbuf) 350 return false; 351 352 struct lima_surface *surf = lima_surface(job->key.zsbuf); 353 if (surf->reload & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) 354 return true; 355 356 return false; 357} 358 359static void 360lima_pack_reload_plbu_cmd(struct lima_job *job, struct pipe_surface *psurf) 361{ 362 struct lima_job_fb_info *fb = &job->fb; 363 struct lima_context *ctx = job->ctx; 364 struct pipe_box src = { 365 .x = 0, 366 .y = 0, 367 .width = fb->width, 368 .height = fb->height, 369 }; 370 371 struct pipe_box dst = { 372 .x = 0, 373 .y = 0, 374 .width = fb->width, 375 .height = fb->height, 376 }; 377 378 if (ctx->framebuffer.base.samples > 1) { 379 for (int i = 0; i < LIMA_MAX_SAMPLES; i++) { 380 lima_pack_blit_cmd(job, &job->plbu_cmd_head, 381 psurf, &src, &dst, 382 PIPE_TEX_FILTER_NEAREST, false, 383 (1 << i), i); 384 } 385 } else { 386 lima_pack_blit_cmd(job, &job->plbu_cmd_head, 387 psurf, &src, &dst, 388 PIPE_TEX_FILTER_NEAREST, false, 389 0xf, 0); 390 } 391} 392 393static void 394lima_pack_head_plbu_cmd(struct lima_job *job) 395{ 396 struct lima_context *ctx = job->ctx; 397 struct lima_job_fb_info *fb = &job->fb; 398 399 PLBU_CMD_BEGIN(&job->plbu_cmd_head, 10); 400 401 PLBU_CMD_UNKNOWN2(); 402 PLBU_CMD_BLOCK_STEP(fb->shift_min, fb->shift_h, fb->shift_w); 403 PLBU_CMD_TILED_DIMENSIONS(fb->tiled_w, fb->tiled_h); 404 PLBU_CMD_BLOCK_STRIDE(fb->block_w); 405 406 PLBU_CMD_ARRAY_ADDRESS( 407 ctx->plb_gp_stream->va + ctx->plb_index * ctx->plb_gp_size, 408 fb->block_w * fb->block_h); 409 410 PLBU_CMD_END(); 411 412 if (lima_fb_cbuf_needs_reload(job)) { 413 lima_pack_reload_plbu_cmd(job, job->key.cbuf); 414 } 415 416 if (lima_fb_zsbuf_needs_reload(job)) 417 lima_pack_reload_plbu_cmd(job, job->key.zsbuf); 418} 419 420static void 421hilbert_rotate(int n, int *x, int *y, int rx, int ry) 422{ 423 if (ry == 0) { 424 if (rx == 1) { 425 *x = n-1 - *x; 426 *y = n-1 - *y; 427 } 428 429 /* Swap x and y */ 430 int t = *x; 431 *x = *y; 432 *y = t; 433 } 434} 435 436static void 437hilbert_coords(int n, int d, int *x, int *y) 438{ 439 int rx, ry, i, t=d; 440 441 *x = *y = 0; 442 443 for (i = 0; (1 << i) < n; i++) { 444 445 rx = 1 & (t / 2); 446 ry = 1 & (t ^ rx); 447 448 hilbert_rotate(1 << i, x, y, rx, ry); 449 450 *x += rx << i; 451 *y += ry << i; 452 453 t /= 4; 454 } 455} 456 457static int 458lima_get_pp_stream_size(int num_pp, int tiled_w, int tiled_h, uint32_t *off) 459{ 460 /* carefully calculate each stream start address: 461 * 1. overflow: each stream size may be different due to 462 * fb->tiled_w * fb->tiled_h can't be divided by num_pp, 463 * extra size should be added to the preceeding stream 464 * 2. alignment: each stream address should be 0x20 aligned 465 */ 466 int delta = tiled_w * tiled_h / num_pp * 16 + 16; 467 int remain = tiled_w * tiled_h % num_pp; 468 int offset = 0; 469 470 for (int i = 0; i < num_pp; i++) { 471 off[i] = offset; 472 473 offset += delta; 474 if (remain) { 475 offset += 16; 476 remain--; 477 } 478 offset = align(offset, 0x20); 479 } 480 481 return offset; 482} 483 484static void 485lima_generate_pp_stream(struct lima_job *job, int off_x, int off_y, 486 int tiled_w, int tiled_h) 487{ 488 struct lima_context *ctx = job->ctx; 489 struct lima_pp_stream_state *ps = &ctx->pp_stream; 490 struct lima_job_fb_info *fb = &job->fb; 491 struct lima_screen *screen = lima_screen(ctx->base.screen); 492 int num_pp = screen->num_pp; 493 assert(num_pp > 0); 494 495 /* use hilbert_coords to generates 1D to 2D relationship. 496 * 1D for pp stream index and 2D for plb block x/y on framebuffer. 497 * if multi pp, interleave the 1D index to make each pp's render target 498 * close enough which should result close workload 499 */ 500 int max = MAX2(tiled_w, tiled_h); 501 int index = 0; 502 uint32_t *stream[8]; 503 int si[8] = {0}; 504 int dim = 0; 505 int count = 0; 506 507 /* Don't update count if we get zero rect. We'll just generate 508 * PP stream with just terminators in it. 509 */ 510 if ((tiled_w * tiled_h) != 0) { 511 dim = util_logbase2_ceil(max); 512 count = 1 << (dim + dim); 513 } 514 515 for (int i = 0; i < num_pp; i++) 516 stream[i] = ps->map + ps->offset[i]; 517 518 for (int i = 0; i < count; i++) { 519 int x, y; 520 hilbert_coords(max, i, &x, &y); 521 if (x < tiled_w && y < tiled_h) { 522 x += off_x; 523 y += off_y; 524 525 int pp = index % num_pp; 526 int offset = ((y >> fb->shift_h) * fb->block_w + 527 (x >> fb->shift_w)) * LIMA_CTX_PLB_BLK_SIZE; 528 int plb_va = ctx->plb[ctx->plb_index]->va + offset; 529 530 stream[pp][si[pp]++] = 0; 531 stream[pp][si[pp]++] = 0xB8000000 | x | (y << 8); 532 stream[pp][si[pp]++] = 0xE0000002 | ((plb_va >> 3) & ~0xE0000003); 533 stream[pp][si[pp]++] = 0xB0000000; 534 535 index++; 536 } 537 } 538 539 for (int i = 0; i < num_pp; i++) { 540 stream[i][si[i]++] = 0; 541 stream[i][si[i]++] = 0xBC000000; 542 stream[i][si[i]++] = 0; 543 stream[i][si[i]++] = 0; 544 545 lima_dump_command_stream_print( 546 job->dump, stream[i], si[i] * 4, 547 false, "pp plb stream %d at va %x\n", 548 i, ps->va + ps->offset[i]); 549 } 550} 551 552static void 553lima_free_stale_pp_stream_bo(struct lima_context *ctx) 554{ 555 list_for_each_entry_safe(struct lima_ctx_plb_pp_stream, entry, 556 &ctx->plb_pp_stream_lru_list, lru_list) { 557 if (ctx->plb_stream_cache_size <= lima_plb_pp_stream_cache_size) 558 break; 559 560 struct hash_entry *hash_entry = 561 _mesa_hash_table_search(ctx->plb_pp_stream, &entry->key); 562 if (hash_entry) 563 _mesa_hash_table_remove(ctx->plb_pp_stream, hash_entry); 564 list_del(&entry->lru_list); 565 566 ctx->plb_stream_cache_size -= entry->bo->size; 567 lima_bo_unreference(entry->bo); 568 569 ralloc_free(entry); 570 } 571} 572 573static void 574lima_update_damage_pp_stream(struct lima_job *job) 575{ 576 struct lima_context *ctx = job->ctx; 577 struct lima_damage_region *ds = lima_job_get_damage(job); 578 struct lima_job_fb_info *fb = &job->fb; 579 struct pipe_scissor_state bound; 580 struct pipe_scissor_state *dr = &job->damage_rect; 581 582 if (ds && ds->region) { 583 struct pipe_scissor_state *dbound = &ds->bound; 584 bound.minx = MAX2(dbound->minx, dr->minx >> 4); 585 bound.miny = MAX2(dbound->miny, dr->miny >> 4); 586 bound.maxx = MIN2(dbound->maxx, (dr->maxx + 0xf) >> 4); 587 bound.maxy = MIN2(dbound->maxy, (dr->maxy + 0xf) >> 4); 588 } else { 589 bound.minx = dr->minx >> 4; 590 bound.miny = dr->miny >> 4; 591 bound.maxx = (dr->maxx + 0xf) >> 4; 592 bound.maxy = (dr->maxy + 0xf) >> 4; 593 } 594 595 /* Clamp to FB size */ 596 bound.minx = MIN2(bound.minx, fb->tiled_w); 597 bound.miny = MIN2(bound.miny, fb->tiled_h); 598 bound.maxx = MIN2(bound.maxx, fb->tiled_w); 599 bound.maxy = MIN2(bound.maxy, fb->tiled_h); 600 601 struct lima_ctx_plb_pp_stream_key key = { 602 .plb_index = ctx->plb_index, 603 .minx = bound.minx, 604 .miny = bound.miny, 605 .maxx = bound.maxx, 606 .maxy = bound.maxy, 607 .shift_w = fb->shift_w, 608 .shift_h = fb->shift_h, 609 .block_w = fb->block_w, 610 .block_h = fb->block_h, 611 }; 612 613 struct hash_entry *entry = 614 _mesa_hash_table_search(ctx->plb_pp_stream, &key); 615 if (entry) { 616 struct lima_ctx_plb_pp_stream *s = entry->data; 617 618 list_del(&s->lru_list); 619 list_addtail(&s->lru_list, &ctx->plb_pp_stream_lru_list); 620 621 ctx->pp_stream.map = lima_bo_map(s->bo); 622 ctx->pp_stream.va = s->bo->va; 623 memcpy(ctx->pp_stream.offset, s->offset, sizeof(s->offset)); 624 625 lima_job_add_bo(job, LIMA_PIPE_PP, s->bo, LIMA_SUBMIT_BO_READ); 626 627 return; 628 } 629 630 lima_free_stale_pp_stream_bo(ctx); 631 632 struct lima_screen *screen = lima_screen(ctx->base.screen); 633 struct lima_ctx_plb_pp_stream *s = 634 rzalloc(ctx->plb_pp_stream, struct lima_ctx_plb_pp_stream); 635 636 list_inithead(&s->lru_list); 637 s->key.plb_index = ctx->plb_index; 638 s->key.minx = bound.minx; 639 s->key.maxx = bound.maxx; 640 s->key.miny = bound.miny; 641 s->key.maxy = bound.maxy; 642 s->key.shift_w = fb->shift_w; 643 s->key.shift_h = fb->shift_h; 644 s->key.block_w = fb->block_w; 645 s->key.block_h = fb->block_h; 646 647 int tiled_w = bound.maxx - bound.minx; 648 int tiled_h = bound.maxy - bound.miny; 649 int size = lima_get_pp_stream_size( 650 screen->num_pp, tiled_w, tiled_h, s->offset); 651 652 s->bo = lima_bo_create(screen, size, 0); 653 654 ctx->pp_stream.map = lima_bo_map(s->bo); 655 ctx->pp_stream.va = s->bo->va; 656 memcpy(ctx->pp_stream.offset, s->offset, sizeof(s->offset)); 657 658 lima_generate_pp_stream(job, bound.minx, bound.miny, tiled_w, tiled_h); 659 660 ctx->plb_stream_cache_size += size; 661 list_addtail(&s->lru_list, &ctx->plb_pp_stream_lru_list); 662 _mesa_hash_table_insert(ctx->plb_pp_stream, &s->key, s); 663 664 lima_job_add_bo(job, LIMA_PIPE_PP, s->bo, LIMA_SUBMIT_BO_READ); 665} 666 667static bool 668lima_damage_fullscreen(struct lima_job *job) 669{ 670 struct pipe_scissor_state *dr = &job->damage_rect; 671 672 return dr->minx == 0 && 673 dr->miny == 0 && 674 dr->maxx == job->fb.width && 675 dr->maxy == job->fb.height; 676} 677 678static void 679lima_update_pp_stream(struct lima_job *job) 680{ 681 struct lima_context *ctx = job->ctx; 682 struct lima_screen *screen = lima_screen(ctx->base.screen); 683 struct lima_damage_region *damage = lima_job_get_damage(job); 684 if ((screen->gpu_type == DRM_LIMA_PARAM_GPU_ID_MALI400) || 685 (damage && damage->region) || !lima_damage_fullscreen(job)) 686 lima_update_damage_pp_stream(job); 687 else 688 /* Mali450 doesn't need full PP stream */ 689 ctx->pp_stream.map = NULL; 690} 691 692static void 693lima_update_job_bo(struct lima_job *job) 694{ 695 struct lima_context *ctx = job->ctx; 696 697 lima_job_add_bo(job, LIMA_PIPE_GP, ctx->plb_gp_stream, 698 LIMA_SUBMIT_BO_READ); 699 lima_job_add_bo(job, LIMA_PIPE_GP, ctx->plb[ctx->plb_index], 700 LIMA_SUBMIT_BO_WRITE); 701 lima_job_add_bo(job, LIMA_PIPE_GP, ctx->gp_tile_heap[ctx->plb_index], 702 LIMA_SUBMIT_BO_WRITE); 703 704 lima_dump_command_stream_print( 705 job->dump, ctx->plb_gp_stream->map + ctx->plb_index * ctx->plb_gp_size, 706 ctx->plb_gp_size, false, "gp plb stream at va %x\n", 707 ctx->plb_gp_stream->va + ctx->plb_index * ctx->plb_gp_size); 708 709 lima_job_add_bo(job, LIMA_PIPE_PP, ctx->plb[ctx->plb_index], 710 LIMA_SUBMIT_BO_READ); 711 lima_job_add_bo(job, LIMA_PIPE_PP, ctx->gp_tile_heap[ctx->plb_index], 712 LIMA_SUBMIT_BO_READ); 713 714 struct lima_screen *screen = lima_screen(ctx->base.screen); 715 lima_job_add_bo(job, LIMA_PIPE_PP, screen->pp_buffer, LIMA_SUBMIT_BO_READ); 716} 717 718static void 719lima_finish_plbu_cmd(struct util_dynarray *plbu_cmd_array) 720{ 721 int i = 0; 722 uint32_t *plbu_cmd = util_dynarray_ensure_cap(plbu_cmd_array, plbu_cmd_array->size + 2 * 4); 723 724 plbu_cmd[i++] = 0x00000000; 725 plbu_cmd[i++] = 0x50000000; /* END */ 726 727 plbu_cmd_array->size += i * 4; 728} 729 730static void 731lima_pack_wb_zsbuf_reg(struct lima_job *job, uint32_t *wb_reg, int wb_idx) 732{ 733 struct lima_job_fb_info *fb = &job->fb; 734 struct pipe_surface *zsbuf = job->key.zsbuf; 735 struct lima_resource *res = lima_resource(zsbuf->texture); 736 int level = zsbuf->u.tex.level; 737 uint32_t format = lima_format_get_pixel(zsbuf->format); 738 739 struct lima_pp_wb_reg *wb = (void *)wb_reg; 740 wb[wb_idx].type = 0x01; /* 1 for depth, stencil */ 741 wb[wb_idx].address = res->bo->va + res->levels[level].offset; 742 wb[wb_idx].pixel_format = format; 743 if (res->tiled) { 744 wb[wb_idx].pixel_layout = 0x2; 745 wb[wb_idx].pitch = fb->tiled_w; 746 } else { 747 wb[wb_idx].pixel_layout = 0x0; 748 wb[wb_idx].pitch = res->levels[level].stride / 8; 749 } 750 wb[wb_idx].flags = 0; 751 unsigned nr_samples = zsbuf->nr_samples ? 752 zsbuf->nr_samples : MAX2(1, zsbuf->texture->nr_samples); 753 if (nr_samples > 1) { 754 wb[wb_idx].mrt_pitch = res->mrt_pitch; 755 wb[wb_idx].mrt_bits = u_bit_consecutive(0, nr_samples); 756 } 757} 758 759static void 760lima_pack_wb_cbuf_reg(struct lima_job *job, uint32_t *frame_reg, 761 uint32_t *wb_reg, int wb_idx) 762{ 763 struct lima_job_fb_info *fb = &job->fb; 764 struct pipe_surface *cbuf = job->key.cbuf; 765 struct lima_resource *res = lima_resource(cbuf->texture); 766 int level = cbuf->u.tex.level; 767 unsigned layer = cbuf->u.tex.first_layer; 768 uint32_t format = lima_format_get_pixel(cbuf->format); 769 bool swap_channels = lima_format_get_pixel_swap_rb(cbuf->format); 770 771 struct lima_pp_frame_reg *frame = (void *)frame_reg; 772 frame->channel_layout = lima_format_get_channel_layout(cbuf->format); 773 774 struct lima_pp_wb_reg *wb = (void *)wb_reg; 775 wb[wb_idx].type = 0x02; /* 2 for color buffer */ 776 wb[wb_idx].address = res->bo->va + res->levels[level].offset + layer * res->levels[level].layer_stride; 777 wb[wb_idx].pixel_format = format; 778 if (res->tiled) { 779 wb[wb_idx].pixel_layout = 0x2; 780 wb[wb_idx].pitch = fb->tiled_w; 781 } else { 782 wb[wb_idx].pixel_layout = 0x0; 783 wb[wb_idx].pitch = res->levels[level].stride / 8; 784 } 785 wb[wb_idx].flags = swap_channels ? 0x4 : 0x0; 786 unsigned nr_samples = cbuf->nr_samples ? 787 cbuf->nr_samples : MAX2(1, cbuf->texture->nr_samples); 788 if (nr_samples > 1) { 789 wb[wb_idx].mrt_pitch = res->mrt_pitch; 790 wb[wb_idx].mrt_bits = u_bit_consecutive(0, nr_samples); 791 } 792} 793 794static void 795lima_pack_pp_frame_reg(struct lima_job *job, uint32_t *frame_reg, 796 uint32_t *wb_reg) 797{ 798 struct lima_context *ctx = job->ctx; 799 struct lima_job_fb_info *fb = &job->fb; 800 struct pipe_surface *cbuf = job->key.cbuf; 801 struct lima_pp_frame_reg *frame = (void *)frame_reg; 802 struct lima_screen *screen = lima_screen(ctx->base.screen); 803 int wb_idx = 0; 804 805 frame->render_address = screen->pp_buffer->va + pp_frame_rsw_offset; 806 frame->flags = 0x02; 807 if (cbuf && util_format_is_float(cbuf->format)) { 808 frame->flags |= 0x01; /* enable fp16 */ 809 frame->clear_value_color = (uint32_t)(job->clear.color_16pc & 0xffffffffUL); 810 frame->clear_value_color_1 = (uint32_t)(job->clear.color_16pc >> 32); 811 frame->clear_value_color_2 = 0; 812 frame->clear_value_color_3 = 0; 813 } 814 else { 815 frame->clear_value_color = job->clear.color_8pc; 816 frame->clear_value_color_1 = job->clear.color_8pc; 817 frame->clear_value_color_2 = job->clear.color_8pc; 818 frame->clear_value_color_3 = job->clear.color_8pc; 819 } 820 821 frame->clear_value_depth = job->clear.depth; 822 frame->clear_value_stencil = job->clear.stencil; 823 frame->one = 1; 824 825 frame->width = fb->width - 1; 826 frame->height = fb->height - 1; 827 828 /* frame->fragment_stack_address is overwritten per-pp in the kernel 829 * by the values of pp_frame.fragment_stack_address[i] */ 830 831 /* These are "stack size" and "stack offset" shifted, 832 * here they are assumed to be always the same. */ 833 frame->fragment_stack_size = job->pp_max_stack_size << 16 | job->pp_max_stack_size; 834 835 /* related with MSAA and different value when r4p0/r7p0 */ 836 frame->supersampled_height = fb->height * 2 - 1; 837 frame->scale = 0xE0C; 838 839 frame->dubya = 0x77; 840 frame->onscreen = 1; 841 frame->blocking = (fb->shift_min << 28) | (fb->shift_h << 16) | fb->shift_w; 842 843 /* Set default layout to 8888 */ 844 frame->channel_layout = 0x8888; 845 846 if (cbuf && (job->resolve & PIPE_CLEAR_COLOR0)) 847 lima_pack_wb_cbuf_reg(job, frame_reg, wb_reg, wb_idx++); 848 849 if (job->key.zsbuf && 850 (job->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) 851 lima_pack_wb_zsbuf_reg(job, wb_reg, wb_idx++); 852} 853 854void 855lima_do_job(struct lima_job *job) 856{ 857 #define pp_stack_pp_size 0x400 858 859 struct lima_context *ctx = job->ctx; 860 861 lima_pack_head_plbu_cmd(job); 862 lima_finish_plbu_cmd(&job->plbu_cmd_array); 863 864 lima_update_job_bo(job); 865 866 int vs_cmd_size = job->vs_cmd_array.size; 867 uint32_t vs_cmd_va = 0; 868 869 if (vs_cmd_size) { 870 void *vs_cmd = lima_job_create_stream_bo( 871 job, LIMA_PIPE_GP, vs_cmd_size, &vs_cmd_va); 872 memcpy(vs_cmd, util_dynarray_begin(&job->vs_cmd_array), vs_cmd_size); 873 874 lima_dump_command_stream_print( 875 job->dump, vs_cmd, vs_cmd_size, false, "flush vs cmd at va %x\n", vs_cmd_va); 876 lima_dump_vs_command_stream_print(job->dump, vs_cmd, vs_cmd_size, vs_cmd_va); 877 } 878 879 uint32_t plbu_cmd_va; 880 int plbu_cmd_size = job->plbu_cmd_array.size + job->plbu_cmd_head.size; 881 void *plbu_cmd = lima_job_create_stream_bo( 882 job, LIMA_PIPE_GP, plbu_cmd_size, &plbu_cmd_va); 883 memcpy(plbu_cmd, 884 util_dynarray_begin(&job->plbu_cmd_head), 885 job->plbu_cmd_head.size); 886 memcpy(plbu_cmd + job->plbu_cmd_head.size, 887 util_dynarray_begin(&job->plbu_cmd_array), 888 job->plbu_cmd_array.size); 889 890 lima_dump_command_stream_print( 891 job->dump, plbu_cmd, plbu_cmd_size, false, "flush plbu cmd at va %x\n", plbu_cmd_va); 892 lima_dump_plbu_command_stream_print(job->dump, plbu_cmd, plbu_cmd_size, plbu_cmd_va); 893 894 struct lima_screen *screen = lima_screen(ctx->base.screen); 895 struct drm_lima_gp_frame gp_frame; 896 struct lima_gp_frame_reg *gp_frame_reg = (void *)gp_frame.frame; 897 gp_frame_reg->vs_cmd_start = vs_cmd_va; 898 gp_frame_reg->vs_cmd_end = vs_cmd_va + vs_cmd_size; 899 gp_frame_reg->plbu_cmd_start = plbu_cmd_va; 900 gp_frame_reg->plbu_cmd_end = plbu_cmd_va + plbu_cmd_size; 901 gp_frame_reg->tile_heap_start = ctx->gp_tile_heap[ctx->plb_index]->va; 902 gp_frame_reg->tile_heap_end = ctx->gp_tile_heap[ctx->plb_index]->va + ctx->gp_tile_heap_size; 903 904 lima_dump_command_stream_print( 905 job->dump, &gp_frame, sizeof(gp_frame), false, "add gp frame\n"); 906 907 if (!lima_job_start(job, LIMA_PIPE_GP, &gp_frame, sizeof(gp_frame))) 908 fprintf(stderr, "gp job error\n"); 909 910 if (job->dump) { 911 if (lima_job_wait(job, LIMA_PIPE_GP, PIPE_TIMEOUT_INFINITE)) { 912 if (ctx->gp_output) { 913 float *pos = lima_bo_map(ctx->gp_output); 914 lima_dump_command_stream_print( 915 job->dump, pos, 4 * 4 * 16, true, "gl_pos dump at va %x\n", 916 ctx->gp_output->va); 917 } 918 919 uint32_t *plb = lima_bo_map(ctx->plb[ctx->plb_index]); 920 lima_dump_command_stream_print( 921 job->dump, plb, LIMA_CTX_PLB_BLK_SIZE, false, "plb dump at va %x\n", 922 ctx->plb[ctx->plb_index]->va); 923 } 924 else { 925 fprintf(stderr, "gp job wait error\n"); 926 exit(1); 927 } 928 } 929 930 uint32_t pp_stack_va = 0; 931 if (job->pp_max_stack_size) { 932 lima_job_create_stream_bo( 933 job, LIMA_PIPE_PP, 934 screen->num_pp * job->pp_max_stack_size * pp_stack_pp_size, 935 &pp_stack_va); 936 } 937 938 lima_update_pp_stream(job); 939 940 struct lima_pp_stream_state *ps = &ctx->pp_stream; 941 if (screen->gpu_type == DRM_LIMA_PARAM_GPU_ID_MALI400) { 942 struct drm_lima_m400_pp_frame pp_frame = {0}; 943 lima_pack_pp_frame_reg(job, pp_frame.frame, pp_frame.wb); 944 pp_frame.num_pp = screen->num_pp; 945 946 for (int i = 0; i < screen->num_pp; i++) { 947 pp_frame.plbu_array_address[i] = ps->va + ps->offset[i]; 948 if (job->pp_max_stack_size) 949 pp_frame.fragment_stack_address[i] = pp_stack_va + 950 job->pp_max_stack_size * pp_stack_pp_size * i; 951 } 952 953 lima_dump_command_stream_print( 954 job->dump, &pp_frame, sizeof(pp_frame), false, "add pp frame\n"); 955 956 if (!lima_job_start(job, LIMA_PIPE_PP, &pp_frame, sizeof(pp_frame))) 957 fprintf(stderr, "pp job error\n"); 958 } 959 else { 960 struct drm_lima_m450_pp_frame pp_frame = {0}; 961 lima_pack_pp_frame_reg(job, pp_frame.frame, pp_frame.wb); 962 pp_frame.num_pp = screen->num_pp; 963 964 if (job->pp_max_stack_size) 965 for (int i = 0; i < screen->num_pp; i++) 966 pp_frame.fragment_stack_address[i] = pp_stack_va + 967 job->pp_max_stack_size * pp_stack_pp_size * i; 968 969 if (ps->map) { 970 for (int i = 0; i < screen->num_pp; i++) 971 pp_frame.plbu_array_address[i] = ps->va + ps->offset[i]; 972 } 973 else { 974 pp_frame.use_dlbu = true; 975 976 struct lima_job_fb_info *fb = &job->fb; 977 pp_frame.dlbu_regs[0] = ctx->plb[ctx->plb_index]->va; 978 pp_frame.dlbu_regs[1] = ((fb->tiled_h - 1) << 16) | (fb->tiled_w - 1); 979 unsigned s = util_logbase2(LIMA_CTX_PLB_BLK_SIZE) - 7; 980 pp_frame.dlbu_regs[2] = (s << 28) | (fb->shift_h << 16) | fb->shift_w; 981 pp_frame.dlbu_regs[3] = ((fb->tiled_h - 1) << 24) | ((fb->tiled_w - 1) << 16); 982 } 983 984 lima_dump_command_stream_print( 985 job->dump, &pp_frame, sizeof(pp_frame), false, "add pp frame\n"); 986 987 if (!lima_job_start(job, LIMA_PIPE_PP, &pp_frame, sizeof(pp_frame))) 988 fprintf(stderr, "pp job error\n"); 989 } 990 991 if (job->dump) { 992 if (!lima_job_wait(job, LIMA_PIPE_PP, PIPE_TIMEOUT_INFINITE)) { 993 fprintf(stderr, "pp wait error\n"); 994 exit(1); 995 } 996 } 997 998 ctx->plb_index = (ctx->plb_index + 1) % lima_ctx_num_plb; 999 1000 /* Set reload flags for next draw. It'll be unset if buffer is cleared */ 1001 if (job->key.cbuf && (job->resolve & PIPE_CLEAR_COLOR0)) { 1002 struct lima_surface *surf = lima_surface(job->key.cbuf); 1003 surf->reload = PIPE_CLEAR_COLOR0; 1004 } 1005 1006 if (job->key.zsbuf && (job->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) { 1007 struct lima_surface *surf = lima_surface(job->key.zsbuf); 1008 surf->reload = (job->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)); 1009 } 1010 1011 if (ctx->job == job) 1012 ctx->job = NULL; 1013 1014 lima_job_free(job); 1015} 1016 1017void 1018lima_flush(struct lima_context *ctx) 1019{ 1020 hash_table_foreach(ctx->jobs, entry) { 1021 struct lima_job *job = entry->data; 1022 lima_do_job(job); 1023 } 1024} 1025 1026void 1027lima_flush_job_accessing_bo( 1028 struct lima_context *ctx, struct lima_bo *bo, bool write) 1029{ 1030 hash_table_foreach(ctx->jobs, entry) { 1031 struct lima_job *job = entry->data; 1032 if (lima_job_has_bo(job, bo, write)) 1033 lima_do_job(job); 1034 } 1035} 1036 1037/* 1038 * This is for current job flush previous job which write to the resource it wants 1039 * to read. Tipical usage is flush the FBO which is used as current task's texture. 1040 */ 1041void 1042lima_flush_previous_job_writing_resource( 1043 struct lima_context *ctx, struct pipe_resource *prsc) 1044{ 1045 struct hash_entry *entry = _mesa_hash_table_search(ctx->write_jobs, prsc); 1046 1047 if (entry) { 1048 struct lima_job *job = entry->data; 1049 1050 /* do not flush current job */ 1051 if (job != ctx->job) 1052 lima_do_job(job); 1053 } 1054} 1055 1056static void 1057lima_pipe_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence, 1058 unsigned flags) 1059{ 1060 struct lima_context *ctx = lima_context(pctx); 1061 1062 lima_flush(ctx); 1063 1064 if (fence) { 1065 int drm_fd = lima_screen(ctx->base.screen)->fd; 1066 int fd; 1067 1068 if (!drmSyncobjExportSyncFile(drm_fd, ctx->out_sync[LIMA_PIPE_PP], &fd)) 1069 *fence = lima_fence_create(fd); 1070 } 1071} 1072 1073static void 1074lima_texture_barrier(struct pipe_context *pctx, unsigned flags) 1075{ 1076 struct lima_context *ctx = lima_context(pctx); 1077 1078 lima_flush(ctx); 1079} 1080 1081static bool 1082lima_job_compare(const void *s1, const void *s2) 1083{ 1084 return memcmp(s1, s2, sizeof(struct lima_job_key)) == 0; 1085} 1086 1087static uint32_t 1088lima_job_hash(const void *key) 1089{ 1090 return _mesa_hash_data(key, sizeof(struct lima_job_key)); 1091} 1092 1093bool lima_job_init(struct lima_context *ctx) 1094{ 1095 int fd = lima_screen(ctx->base.screen)->fd; 1096 1097 ctx->jobs = _mesa_hash_table_create(ctx, lima_job_hash, lima_job_compare); 1098 if (!ctx->jobs) 1099 return false; 1100 1101 ctx->write_jobs = _mesa_hash_table_create( 1102 ctx, _mesa_hash_pointer, _mesa_key_pointer_equal); 1103 if (!ctx->write_jobs) 1104 return false; 1105 1106 ctx->in_sync_fd = -1; 1107 1108 for (int i = 0; i < 2; i++) { 1109 if (drmSyncobjCreate(fd, DRM_SYNCOBJ_CREATE_SIGNALED, ctx->in_sync + i) || 1110 drmSyncobjCreate(fd, DRM_SYNCOBJ_CREATE_SIGNALED, ctx->out_sync + i)) 1111 return false; 1112 } 1113 1114 ctx->base.flush = lima_pipe_flush; 1115 ctx->base.texture_barrier = lima_texture_barrier; 1116 1117 return true; 1118} 1119 1120void lima_job_fini(struct lima_context *ctx) 1121{ 1122 int fd = lima_screen(ctx->base.screen)->fd; 1123 1124 lima_flush(ctx); 1125 1126 for (int i = 0; i < 2; i++) { 1127 if (ctx->in_sync[i]) 1128 drmSyncobjDestroy(fd, ctx->in_sync[i]); 1129 if (ctx->out_sync[i]) 1130 drmSyncobjDestroy(fd, ctx->out_sync[i]); 1131 } 1132 1133 if (ctx->in_sync_fd >= 0) 1134 close(ctx->in_sync_fd); 1135} 1136