1/* 2 * Copyright (C) 2021 Collabora, Ltd. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: 24 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> 25 * Boris Brezillon <boris.brezillon@collabora.com> 26 */ 27 28#include "util/macros.h" 29 30 31#include "pan_cs.h" 32#include "pan_encoder.h" 33#include "pan_texture.h" 34 35static unsigned 36mod_to_block_fmt(uint64_t mod) 37{ 38 switch (mod) { 39 case DRM_FORMAT_MOD_LINEAR: 40 return MALI_BLOCK_FORMAT_LINEAR; 41 case DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED: 42 return MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED; 43 default: 44#if PAN_ARCH >= 5 45 if (drm_is_afbc(mod) && !(mod & AFBC_FORMAT_MOD_TILED)) 46 return MALI_BLOCK_FORMAT_AFBC; 47#endif 48#if PAN_ARCH >= 7 49 if (drm_is_afbc(mod) && (mod & AFBC_FORMAT_MOD_TILED)) 50 return MALI_BLOCK_FORMAT_AFBC_TILED; 51#endif 52 53 unreachable("Unsupported modifer"); 54 } 55} 56 57static enum mali_msaa 58mali_sampling_mode(const struct pan_image_view *view) 59{ 60 if (view->image->layout.nr_samples > 1) { 61 assert(view->nr_samples == view->image->layout.nr_samples); 62 assert(view->image->layout.slices[0].surface_stride != 0); 63 return MALI_MSAA_LAYERED; 64 } 65 66 if (view->nr_samples > view->image->layout.nr_samples) { 67 assert(view->image->layout.nr_samples == 1); 68 return MALI_MSAA_AVERAGE; 69 } 70 71 assert(view->nr_samples == view->image->layout.nr_samples); 72 assert(view->nr_samples == 1); 73 74 return MALI_MSAA_SINGLE; 75} 76 77static inline enum mali_sample_pattern 78pan_sample_pattern(unsigned samples) 79{ 80 switch (samples) { 81 case 1: return MALI_SAMPLE_PATTERN_SINGLE_SAMPLED; 82 case 4: return MALI_SAMPLE_PATTERN_ROTATED_4X_GRID; 83 case 8: return MALI_SAMPLE_PATTERN_D3D_8X_GRID; 84 case 16: return MALI_SAMPLE_PATTERN_D3D_16X_GRID; 85 default: unreachable("Unsupported sample count"); 86 } 87} 88 89int 90GENX(pan_select_crc_rt)(const struct pan_fb_info *fb, unsigned tile_size) 91{ 92 /* Disable CRC when the tile size is not 16x16. In the hardware, CRC 93 * tiles are the same size as the tiles of the framebuffer. However, 94 * our code only handles 16x16 tiles. Therefore under the current 95 * implementation, we must disable CRC when 16x16 tiles are not used. 96 * 97 * This may hurt performance. However, smaller tile sizes are rare, and 98 * CRCs are more expensive at smaller tile sizes, reducing the benefit. 99 * Restricting CRC to 16x16 should work in practice. 100 */ 101 if (tile_size != 16 * 16) { 102 assert(tile_size < 16 * 16); 103 return -1; 104 } 105 106#if PAN_ARCH <= 6 107 if (fb->rt_count == 1 && fb->rts[0].view && !fb->rts[0].discard && 108 fb->rts[0].view->image->layout.crc_mode != PAN_IMAGE_CRC_NONE) 109 return 0; 110 111 return -1; 112#else 113 bool best_rt_valid = false; 114 int best_rt = -1; 115 116 for (unsigned i = 0; i < fb->rt_count; i++) { 117 if (!fb->rts[i].view || fb->rts[0].discard || 118 fb->rts[i].view->image->layout.crc_mode == PAN_IMAGE_CRC_NONE) 119 continue; 120 121 bool valid = *(fb->rts[i].crc_valid); 122 bool full = !fb->extent.minx && !fb->extent.miny && 123 fb->extent.maxx == (fb->width - 1) && 124 fb->extent.maxy == (fb->height - 1); 125 if (!full && !valid) 126 continue; 127 128 if (best_rt < 0 || (valid && !best_rt_valid)) { 129 best_rt = i; 130 best_rt_valid = valid; 131 } 132 133 if (valid) 134 break; 135 } 136 137 return best_rt; 138#endif 139} 140 141static enum mali_zs_format 142translate_zs_format(enum pipe_format in) 143{ 144 switch (in) { 145 case PIPE_FORMAT_Z16_UNORM: return MALI_ZS_FORMAT_D16; 146 case PIPE_FORMAT_Z24_UNORM_S8_UINT: return MALI_ZS_FORMAT_D24S8; 147 case PIPE_FORMAT_Z24X8_UNORM: return MALI_ZS_FORMAT_D24X8; 148 case PIPE_FORMAT_Z32_FLOAT: return MALI_ZS_FORMAT_D32; 149#if PAN_ARCH <= 7 150 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: return MALI_ZS_FORMAT_D32_S8X24; 151#endif 152 default: unreachable("Unsupported depth/stencil format."); 153 } 154} 155 156#if PAN_ARCH >= 5 157static enum mali_s_format 158translate_s_format(enum pipe_format in) 159{ 160 switch (in) { 161 case PIPE_FORMAT_S8_UINT: return MALI_S_FORMAT_S8; 162 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 163 case PIPE_FORMAT_X24S8_UINT: 164 return MALI_S_FORMAT_X24S8; 165 166#if PAN_ARCH <= 7 167 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 168 case PIPE_FORMAT_S8X24_UINT: 169 return MALI_S_FORMAT_S8X24; 170 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 171 return MALI_S_FORMAT_X32_S8X24; 172#endif 173 174 default: 175 unreachable("Unsupported stencil format."); 176 } 177} 178 179static void 180pan_prepare_s(const struct pan_fb_info *fb, 181 struct MALI_ZS_CRC_EXTENSION *ext) 182{ 183 const struct pan_image_view *s = fb->zs.view.s; 184 185 if (!s) 186 return; 187 188 unsigned level = s->first_level; 189 190 ext->s_msaa = mali_sampling_mode(s); 191 192 struct pan_surface surf; 193 pan_iview_get_surface(s, 0, 0, 0, &surf); 194 195 assert(s->image->layout.modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED || 196 s->image->layout.modifier == DRM_FORMAT_MOD_LINEAR); 197 ext->s_writeback_base = surf.data; 198 ext->s_writeback_row_stride = s->image->layout.slices[level].row_stride; 199 ext->s_writeback_surface_stride = 200 (s->image->layout.nr_samples > 1) ? 201 s->image->layout.slices[level].surface_stride : 0; 202 ext->s_block_format = mod_to_block_fmt(s->image->layout.modifier); 203 ext->s_write_format = translate_s_format(s->format); 204} 205 206static void 207pan_prepare_zs(const struct pan_fb_info *fb, 208 struct MALI_ZS_CRC_EXTENSION *ext) 209{ 210 const struct pan_image_view *zs = fb->zs.view.zs; 211 212 if (!zs) 213 return; 214 215 unsigned level = zs->first_level; 216 217 ext->zs_msaa = mali_sampling_mode(zs); 218 219 struct pan_surface surf; 220 pan_iview_get_surface(zs, 0, 0, 0, &surf); 221 UNUSED const struct pan_image_slice_layout *slice = &zs->image->layout.slices[level]; 222 223 if (drm_is_afbc(zs->image->layout.modifier)) { 224#if PAN_ARCH >= 9 225 ext->zs_writeback_base = surf.afbc.header; 226 ext->zs_writeback_row_stride = slice->row_stride; 227 /* TODO: surface stride? */ 228 ext->zs_afbc_body_offset = surf.afbc.body - surf.afbc.header; 229 230 /* TODO: stencil AFBC? */ 231#else 232#if PAN_ARCH >= 6 233 ext->zs_afbc_row_stride = pan_afbc_stride_blocks(zs->image->layout.modifier, slice->row_stride); 234#else 235 ext->zs_block_format = MALI_BLOCK_FORMAT_AFBC; 236 ext->zs_afbc_body_size = 0x1000; 237 ext->zs_afbc_chunk_size = 9; 238 ext->zs_afbc_sparse = true; 239#endif 240 241 ext->zs_afbc_header = surf.afbc.header; 242 ext->zs_afbc_body = surf.afbc.body; 243#endif 244 } else { 245 assert(zs->image->layout.modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED || 246 zs->image->layout.modifier == DRM_FORMAT_MOD_LINEAR); 247 248 /* TODO: Z32F(S8) support, which is always linear */ 249 250 ext->zs_writeback_base = surf.data; 251 ext->zs_writeback_row_stride = 252 zs->image->layout.slices[level].row_stride; 253 ext->zs_writeback_surface_stride = 254 (zs->image->layout.nr_samples > 1) ? 255 zs->image->layout.slices[level].surface_stride : 0; 256 } 257 258 ext->zs_block_format = mod_to_block_fmt(zs->image->layout.modifier); 259 ext->zs_write_format = translate_zs_format(zs->format); 260 if (ext->zs_write_format == MALI_ZS_FORMAT_D24S8) 261 ext->s_writeback_base = ext->zs_writeback_base; 262} 263 264static void 265pan_prepare_crc(const struct pan_fb_info *fb, int rt_crc, 266 struct MALI_ZS_CRC_EXTENSION *ext) 267{ 268 if (rt_crc < 0) 269 return; 270 271 assert(rt_crc < fb->rt_count); 272 273 const struct pan_image_view *rt = fb->rts[rt_crc].view; 274 const struct pan_image_slice_layout *slice = &rt->image->layout.slices[rt->first_level]; 275 ext->crc_base = (rt->image->layout.crc_mode == PAN_IMAGE_CRC_INBAND ? 276 (rt->image->data.bo->ptr.gpu + rt->image->data.offset) : 277 (rt->image->crc.bo->ptr.gpu + rt->image->crc.offset)) + 278 slice->crc.offset; 279 ext->crc_row_stride = slice->crc.stride; 280 281#if PAN_ARCH >= 7 282 ext->crc_render_target = rt_crc; 283 284 if (fb->rts[rt_crc].clear) { 285 uint32_t clear_val = fb->rts[rt_crc].clear_value[0]; 286 ext->crc_clear_color = clear_val | 0xc000000000000000 | 287 (((uint64_t)clear_val & 0xffff) << 32); 288 } 289#endif 290} 291 292static void 293pan_emit_zs_crc_ext(const struct pan_fb_info *fb, int rt_crc, 294 void *zs_crc_ext) 295{ 296 pan_pack(zs_crc_ext, ZS_CRC_EXTENSION, cfg) { 297 pan_prepare_crc(fb, rt_crc, &cfg); 298 cfg.zs_clean_pixel_write_enable = fb->zs.clear.z || fb->zs.clear.s; 299 pan_prepare_zs(fb, &cfg); 300 pan_prepare_s(fb, &cfg); 301 } 302} 303 304/* Measure format as it appears in the tile buffer */ 305 306static unsigned 307pan_bytes_per_pixel_tib(enum pipe_format format) 308{ 309 if (panfrost_blendable_formats_v7[format].internal) { 310 /* Blendable formats are always 32-bits in the tile buffer, 311 * extra bits are used as padding or to dither */ 312 return 4; 313 } else { 314 /* Non-blendable formats are raw, rounded up to the nearest 315 * power-of-two size */ 316 unsigned bytes = util_format_get_blocksize(format); 317 return util_next_power_of_two(bytes); 318 } 319} 320 321static unsigned 322pan_cbuf_bytes_per_pixel(const struct pan_fb_info *fb) 323{ 324 unsigned sum = 0; 325 326 for (int cb = 0; cb < fb->rt_count; ++cb) { 327 const struct pan_image_view *rt = fb->rts[cb].view; 328 329 if (!rt) 330 continue; 331 332 sum += pan_bytes_per_pixel_tib(rt->format) * rt->nr_samples; 333 } 334 335 return sum; 336} 337 338/* 339 * Select the largest tile size that fits within the tilebuffer budget. 340 * Formally, maximize (pixels per tile) such that it is a power of two and 341 * 342 * (bytes per pixel) (pixels per tile) <= (max bytes per tile) 343 * 344 * A bit of algebra gives the following formula. 345 */ 346static unsigned 347pan_select_max_tile_size(unsigned tile_buffer_bytes, unsigned bytes_per_pixel) 348{ 349 assert(util_is_power_of_two_nonzero(tile_buffer_bytes)); 350 assert(tile_buffer_bytes >= 1024); 351 352 return tile_buffer_bytes >> util_logbase2_ceil(bytes_per_pixel); 353} 354 355static enum mali_color_format 356pan_mfbd_raw_format(unsigned bits) 357{ 358 switch (bits) { 359 case 8: return MALI_COLOR_FORMAT_RAW8; 360 case 16: return MALI_COLOR_FORMAT_RAW16; 361 case 24: return MALI_COLOR_FORMAT_RAW24; 362 case 32: return MALI_COLOR_FORMAT_RAW32; 363 case 48: return MALI_COLOR_FORMAT_RAW48; 364 case 64: return MALI_COLOR_FORMAT_RAW64; 365 case 96: return MALI_COLOR_FORMAT_RAW96; 366 case 128: return MALI_COLOR_FORMAT_RAW128; 367 case 192: return MALI_COLOR_FORMAT_RAW192; 368 case 256: return MALI_COLOR_FORMAT_RAW256; 369 case 384: return MALI_COLOR_FORMAT_RAW384; 370 case 512: return MALI_COLOR_FORMAT_RAW512; 371 case 768: return MALI_COLOR_FORMAT_RAW768; 372 case 1024: return MALI_COLOR_FORMAT_RAW1024; 373 case 1536: return MALI_COLOR_FORMAT_RAW1536; 374 case 2048: return MALI_COLOR_FORMAT_RAW2048; 375 default: unreachable("invalid raw bpp"); 376 } 377} 378 379static void 380pan_rt_init_format(const struct pan_image_view *rt, 381 struct MALI_RENDER_TARGET *cfg) 382{ 383 /* Explode details on the format */ 384 385 const struct util_format_description *desc = 386 util_format_description(rt->format); 387 388 /* The swizzle for rendering is inverted from texturing */ 389 390 unsigned char swizzle[4] = { 391 PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W, 392 }; 393 394 /* Fill in accordingly, defaulting to 8-bit UNORM */ 395 396 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) 397 cfg->srgb = true; 398 399 struct pan_blendable_format fmt = panfrost_blendable_formats_v7[rt->format]; 400 401 if (fmt.internal) { 402 cfg->internal_format = fmt.internal; 403 cfg->writeback_format = fmt.writeback; 404 panfrost_invert_swizzle(desc->swizzle, swizzle); 405 } else { 406 /* Construct RAW internal/writeback, where internal is 407 * specified logarithmically (round to next power-of-two). 408 * Offset specified from RAW8, where 8 = 2^3 */ 409 410 unsigned bits = desc->block.bits; 411 unsigned offset = util_logbase2_ceil(bits) - 3; 412 assert(offset <= 4); 413 414 cfg->internal_format = 415 MALI_COLOR_BUFFER_INTERNAL_FORMAT_RAW8 + offset; 416 417 cfg->writeback_format = pan_mfbd_raw_format(bits); 418 } 419 420 cfg->swizzle = panfrost_translate_swizzle_4(swizzle); 421} 422 423#if PAN_ARCH >= 9 424enum mali_afbc_compression_mode 425pan_afbc_compression_mode(enum pipe_format format) 426{ 427 /* There's a special case for texturing the stencil part from a combined 428 * depth/stencil texture, handle it separately. 429 */ 430 if (format == PIPE_FORMAT_X24S8_UINT) 431 return MALI_AFBC_COMPRESSION_MODE_X24S8; 432 433 /* Otherwise, map canonical formats to the hardware enum. This only 434 * needs to handle the subset of formats returned by 435 * panfrost_afbc_format. 436 */ 437 switch (panfrost_afbc_format(PAN_ARCH, format)) { 438 case PIPE_FORMAT_R8G8_UNORM: return MALI_AFBC_COMPRESSION_MODE_R8G8; 439 case PIPE_FORMAT_R8G8B8_UNORM: return MALI_AFBC_COMPRESSION_MODE_R8G8B8; 440 case PIPE_FORMAT_R8G8B8A8_UNORM: return MALI_AFBC_COMPRESSION_MODE_R8G8B8A8; 441 case PIPE_FORMAT_R5G6B5_UNORM: return MALI_AFBC_COMPRESSION_MODE_R5G6B5; 442 case PIPE_FORMAT_S8_UINT: return MALI_AFBC_COMPRESSION_MODE_S8; 443 case PIPE_FORMAT_NONE: unreachable("invalid format for AFBC"); 444 default: unreachable("unknown canonical AFBC format"); 445 } 446} 447#endif 448 449static void 450pan_prepare_rt(const struct pan_fb_info *fb, unsigned idx, 451 unsigned cbuf_offset, 452 struct MALI_RENDER_TARGET *cfg) 453{ 454 cfg->clean_pixel_write_enable = fb->rts[idx].clear; 455 cfg->internal_buffer_offset = cbuf_offset; 456 if (fb->rts[idx].clear) { 457 cfg->clear.color_0 = fb->rts[idx].clear_value[0]; 458 cfg->clear.color_1 = fb->rts[idx].clear_value[1]; 459 cfg->clear.color_2 = fb->rts[idx].clear_value[2]; 460 cfg->clear.color_3 = fb->rts[idx].clear_value[3]; 461 } 462 463 const struct pan_image_view *rt = fb->rts[idx].view; 464 if (!rt || fb->rts[idx].discard) { 465 cfg->internal_format = MALI_COLOR_BUFFER_INTERNAL_FORMAT_R8G8B8A8; 466 cfg->internal_buffer_offset = cbuf_offset; 467#if PAN_ARCH >= 7 468 cfg->writeback_block_format = MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED; 469 cfg->dithering_enable = true; 470#endif 471 return; 472 } 473 474 cfg->write_enable = true; 475 cfg->dithering_enable = true; 476 477 unsigned level = rt->first_level; 478 assert(rt->last_level == rt->first_level); 479 assert(rt->last_layer == rt->first_layer); 480 481 int row_stride = rt->image->layout.slices[level].row_stride; 482 483 /* Only set layer_stride for layered MSAA rendering */ 484 485 unsigned layer_stride = 486 (rt->image->layout.nr_samples > 1) ? 487 rt->image->layout.slices[level].surface_stride : 0; 488 489 cfg->writeback_msaa = mali_sampling_mode(rt); 490 491 pan_rt_init_format(rt, cfg); 492 493 cfg->writeback_block_format = mod_to_block_fmt(rt->image->layout.modifier); 494 495 struct pan_surface surf; 496 pan_iview_get_surface(rt, 0, 0, 0, &surf); 497 498 if (drm_is_afbc(rt->image->layout.modifier)) { 499#if PAN_ARCH >= 9 500 if (rt->image->layout.modifier & AFBC_FORMAT_MOD_YTR) 501 cfg->afbc.yuv_transform = true; 502 503 cfg->afbc.wide_block = panfrost_afbc_is_wide(rt->image->layout.modifier); 504 cfg->afbc.header = surf.afbc.header; 505 cfg->afbc.body_offset = surf.afbc.body - surf.afbc.header; 506 assert(surf.afbc.body >= surf.afbc.header); 507 508 cfg->afbc.compression_mode = pan_afbc_compression_mode(rt->format); 509 cfg->afbc.row_stride = row_stride; 510#else 511 const struct pan_image_slice_layout *slice = &rt->image->layout.slices[level]; 512 513#if PAN_ARCH >= 6 514 cfg->afbc.row_stride = pan_afbc_stride_blocks(rt->image->layout.modifier, slice->row_stride); 515 cfg->afbc.afbc_wide_block_enable = 516 panfrost_afbc_is_wide(rt->image->layout.modifier); 517#else 518 cfg->afbc.chunk_size = 9; 519 cfg->afbc.sparse = true; 520 cfg->afbc.body_size = slice->afbc.body_size; 521#endif 522 523 cfg->afbc.header = surf.afbc.header; 524 cfg->afbc.body = surf.afbc.body; 525 526 if (rt->image->layout.modifier & AFBC_FORMAT_MOD_YTR) 527 cfg->afbc.yuv_transform_enable = true; 528#endif 529 } else { 530 assert(rt->image->layout.modifier == DRM_FORMAT_MOD_LINEAR || 531 rt->image->layout.modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED); 532 cfg->rgb.base = surf.data; 533 cfg->rgb.row_stride = row_stride; 534 cfg->rgb.surface_stride = layer_stride; 535 } 536} 537#endif 538 539void 540GENX(pan_emit_tls)(const struct pan_tls_info *info, 541 void *out) 542{ 543 pan_pack(out, LOCAL_STORAGE, cfg) { 544 if (info->tls.size) { 545 unsigned shift = 546 panfrost_get_stack_shift(info->tls.size); 547 548 cfg.tls_size = shift; 549#if PAN_ARCH >= 9 550 /* For now, always use packed TLS addressing. This is 551 * better for the cache and requires no fix up code in 552 * the shader. We may need to revisit this someday for 553 * OpenCL generic pointer support. 554 */ 555 cfg.tls_address_mode = MALI_ADDRESS_MODE_PACKED; 556 557 assert((info->tls.ptr & 4095) == 0); 558 cfg.tls_base_pointer = info->tls.ptr >> 8; 559#else 560 cfg.tls_base_pointer = info->tls.ptr; 561#endif 562 } 563 564 if (info->wls.size) { 565 assert(!(info->wls.ptr & 4095)); 566 assert((info->wls.ptr & 0xffffffff00000000ULL) == ((info->wls.ptr + info->wls.size - 1) & 0xffffffff00000000ULL)); 567 cfg.wls_base_pointer = info->wls.ptr; 568 unsigned wls_size = pan_wls_adjust_size(info->wls.size); 569 cfg.wls_instances = pan_wls_instances(&info->wls.dim); 570 cfg.wls_size_scale = util_logbase2(wls_size) + 1; 571 } else { 572 cfg.wls_instances = MALI_LOCAL_STORAGE_NO_WORKGROUP_MEM; 573 } 574 } 575} 576 577#if PAN_ARCH <= 5 578static void 579pan_emit_midgard_tiler(const struct panfrost_device *dev, 580 const struct pan_fb_info *fb, 581 const struct pan_tiler_context *tiler_ctx, 582 void *out) 583{ 584 bool hierarchy = !dev->model->quirks.no_hierarchical_tiling; 585 586 assert(tiler_ctx->midgard.polygon_list->ptr.gpu); 587 588 pan_pack(out, TILER_CONTEXT, cfg) { 589 unsigned header_size; 590 591 if (tiler_ctx->midgard.disable) { 592 cfg.hierarchy_mask = 593 hierarchy ? 594 MALI_MIDGARD_TILER_DISABLED : 595 MALI_MIDGARD_TILER_USER; 596 header_size = MALI_MIDGARD_TILER_MINIMUM_HEADER_SIZE; 597 cfg.polygon_list_size = header_size + (hierarchy ? 0 : 4); 598 cfg.heap_start = tiler_ctx->midgard.polygon_list->ptr.gpu; 599 cfg.heap_end = tiler_ctx->midgard.polygon_list->ptr.gpu; 600 } else { 601 cfg.hierarchy_mask = 602 panfrost_choose_hierarchy_mask(fb->width, 603 fb->height, 604 1, hierarchy); 605 header_size = panfrost_tiler_header_size(fb->width, 606 fb->height, 607 cfg.hierarchy_mask, 608 hierarchy); 609 cfg.polygon_list_size = 610 panfrost_tiler_full_size(fb->width, fb->height, 611 cfg.hierarchy_mask, 612 hierarchy); 613 cfg.heap_start = dev->tiler_heap->ptr.gpu; 614 cfg.heap_end = dev->tiler_heap->ptr.gpu + dev->tiler_heap->size; 615 } 616 617 cfg.polygon_list = tiler_ctx->midgard.polygon_list->ptr.gpu; 618 cfg.polygon_list_body = cfg.polygon_list + header_size; 619 } 620} 621#endif 622 623#if PAN_ARCH >= 5 624static void 625pan_emit_rt(const struct pan_fb_info *fb, 626 unsigned idx, unsigned cbuf_offset, void *out) 627{ 628 pan_pack(out, RENDER_TARGET, cfg) { 629 pan_prepare_rt(fb, idx, cbuf_offset, &cfg); 630 } 631} 632 633#if PAN_ARCH >= 6 634/* All Bifrost and Valhall GPUs are affected by issue TSIX-2033: 635 * 636 * Forcing clean_tile_writes breaks INTERSECT readbacks 637 * 638 * To workaround, use the frame shader mode ALWAYS instead of INTERSECT if 639 * clean tile writes is forced. Since INTERSECT is a hint that the hardware may 640 * ignore, this cannot affect correctness, only performance */ 641 642static enum mali_pre_post_frame_shader_mode 643pan_fix_frame_shader_mode(enum mali_pre_post_frame_shader_mode mode, bool force_clean_tile) 644{ 645 if (force_clean_tile && mode == MALI_PRE_POST_FRAME_SHADER_MODE_INTERSECT) 646 return MALI_PRE_POST_FRAME_SHADER_MODE_ALWAYS; 647 else 648 return mode; 649} 650 651/* Regardless of clean_tile_write_enable, the hardware writes clean tiles if 652 * the effective tile size differs from the superblock size of any enabled AFBC 653 * render target. Check this condition. */ 654 655static bool 656pan_force_clean_write_rt(const struct pan_image_view *rt, unsigned tile_size) 657{ 658 if (!drm_is_afbc(rt->image->layout.modifier)) 659 return false; 660 661 unsigned superblock = panfrost_afbc_superblock_width(rt->image->layout.modifier); 662 663 assert(superblock >= 16); 664 assert(tile_size <= 16*16); 665 666 /* Tile size and superblock differ unless they are both 16x16 */ 667 return !(superblock == 16 && tile_size == 16*16); 668} 669 670static bool 671pan_force_clean_write(const struct pan_fb_info *fb, unsigned tile_size) 672{ 673 /* Maximum tile size */ 674 assert(tile_size <= 16*16); 675 676 for (unsigned i = 0; i < fb->rt_count; ++i) { 677 if (fb->rts[i].view && !fb->rts[i].discard && 678 pan_force_clean_write_rt(fb->rts[i].view, tile_size)) 679 return true; 680 } 681 682 if (fb->zs.view.zs && !fb->zs.discard.z && 683 pan_force_clean_write_rt(fb->zs.view.zs, tile_size)) 684 return true; 685 686 if (fb->zs.view.s && !fb->zs.discard.s && 687 pan_force_clean_write_rt(fb->zs.view.s, tile_size)) 688 return true; 689 690 return false; 691} 692 693#endif 694 695unsigned 696GENX(pan_emit_fbd)(const struct panfrost_device *dev, 697 const struct pan_fb_info *fb, 698 const struct pan_tls_info *tls, 699 const struct pan_tiler_context *tiler_ctx, 700 void *out) 701{ 702 unsigned tags = MALI_FBD_TAG_IS_MFBD; 703 void *fbd = out; 704 void *rtd = out + pan_size(FRAMEBUFFER); 705 706#if PAN_ARCH <= 5 707 GENX(pan_emit_tls)(tls, 708 pan_section_ptr(fbd, FRAMEBUFFER, LOCAL_STORAGE)); 709#endif 710 711 unsigned bytes_per_pixel = pan_cbuf_bytes_per_pixel(fb); 712 unsigned tile_size = pan_select_max_tile_size(dev->optimal_tib_size, 713 bytes_per_pixel); 714 715 /* Clamp tile size to hardware limits */ 716 tile_size = MIN2(tile_size, 16 * 16); 717 assert(tile_size >= 4 * 4); 718 719 /* Colour buffer allocations must be 1K aligned. */ 720 unsigned cbuf_allocation = ALIGN_POT(bytes_per_pixel * tile_size, 1024); 721 assert(cbuf_allocation <= dev->optimal_tib_size && "tile too big"); 722 723 int crc_rt = GENX(pan_select_crc_rt)(fb, tile_size); 724 bool has_zs_crc_ext = (fb->zs.view.zs || fb->zs.view.s || crc_rt >= 0); 725 726 pan_section_pack(fbd, FRAMEBUFFER, PARAMETERS, cfg) { 727#if PAN_ARCH >= 6 728 bool force_clean_write = pan_force_clean_write(fb, tile_size); 729 730 cfg.sample_locations = 731 panfrost_sample_positions(dev, pan_sample_pattern(fb->nr_samples)); 732 cfg.pre_frame_0 = pan_fix_frame_shader_mode(fb->bifrost.pre_post.modes[0], force_clean_write); 733 cfg.pre_frame_1 = pan_fix_frame_shader_mode(fb->bifrost.pre_post.modes[1], force_clean_write); 734 cfg.post_frame = pan_fix_frame_shader_mode(fb->bifrost.pre_post.modes[2], force_clean_write); 735 cfg.frame_shader_dcds = fb->bifrost.pre_post.dcds.gpu; 736 cfg.tiler = tiler_ctx->bifrost; 737#endif 738 cfg.width = fb->width; 739 cfg.height = fb->height; 740 cfg.bound_max_x = fb->width - 1; 741 cfg.bound_max_y = fb->height - 1; 742 743 cfg.effective_tile_size = tile_size; 744 cfg.tie_break_rule = MALI_TIE_BREAK_RULE_MINUS_180_IN_0_OUT; 745 cfg.render_target_count = MAX2(fb->rt_count, 1); 746 747 /* Default to 24 bit depth if there's no surface. */ 748 cfg.z_internal_format = 749 fb->zs.view.zs ? 750 panfrost_get_z_internal_format(fb->zs.view.zs->format) : 751 MALI_Z_INTERNAL_FORMAT_D24; 752 753 cfg.z_clear = fb->zs.clear_value.depth; 754 cfg.s_clear = fb->zs.clear_value.stencil; 755 cfg.color_buffer_allocation = cbuf_allocation; 756 cfg.sample_count = fb->nr_samples; 757 cfg.sample_pattern = pan_sample_pattern(fb->nr_samples); 758 cfg.z_write_enable = (fb->zs.view.zs && !fb->zs.discard.z); 759 cfg.s_write_enable = (fb->zs.view.s && !fb->zs.discard.s); 760 cfg.has_zs_crc_extension = has_zs_crc_ext; 761 762 if (crc_rt >= 0) { 763 bool *valid = fb->rts[crc_rt].crc_valid; 764 bool full = !fb->extent.minx && !fb->extent.miny && 765 fb->extent.maxx == (fb->width - 1) && 766 fb->extent.maxy == (fb->height - 1); 767 768 cfg.crc_read_enable = *valid; 769 770 /* If the data is currently invalid, still write CRC 771 * data if we are doing a full write, so that it is 772 * valid for next time. */ 773 cfg.crc_write_enable = *valid || full; 774 775 *valid |= full; 776 } 777 778#if PAN_ARCH >= 9 779 cfg.point_sprite_coord_origin_max_y = fb->sprite_coord_origin; 780 cfg.first_provoking_vertex = fb->first_provoking_vertex; 781#endif 782 } 783 784#if PAN_ARCH >= 6 785 pan_section_pack(fbd, FRAMEBUFFER, PADDING, padding); 786#else 787 pan_emit_midgard_tiler(dev, fb, tiler_ctx, 788 pan_section_ptr(fbd, FRAMEBUFFER, TILER)); 789 790 /* All weights set to 0, nothing to do here */ 791 pan_section_pack(fbd, FRAMEBUFFER, TILER_WEIGHTS, w); 792#endif 793 794 if (has_zs_crc_ext) { 795 pan_emit_zs_crc_ext(fb, crc_rt, 796 out + pan_size(FRAMEBUFFER)); 797 rtd += pan_size(ZS_CRC_EXTENSION); 798 tags |= MALI_FBD_TAG_HAS_ZS_RT; 799 } 800 801 unsigned rt_count = MAX2(fb->rt_count, 1); 802 unsigned cbuf_offset = 0; 803 for (unsigned i = 0; i < rt_count; i++) { 804 pan_emit_rt(fb, i, cbuf_offset, rtd); 805 rtd += pan_size(RENDER_TARGET); 806 if (!fb->rts[i].view) 807 continue; 808 809 cbuf_offset += pan_bytes_per_pixel_tib(fb->rts[i].view->format) * 810 tile_size * fb->rts[i].view->image->layout.nr_samples; 811 812 if (i != crc_rt) 813 *(fb->rts[i].crc_valid) = false; 814 } 815 tags |= MALI_POSITIVE(MAX2(fb->rt_count, 1)) << 2; 816 817 return tags; 818} 819#else /* PAN_ARCH == 4 */ 820unsigned 821GENX(pan_emit_fbd)(const struct panfrost_device *dev, 822 const struct pan_fb_info *fb, 823 const struct pan_tls_info *tls, 824 const struct pan_tiler_context *tiler_ctx, 825 void *fbd) 826{ 827 assert(fb->rt_count <= 1); 828 829 GENX(pan_emit_tls)(tls, 830 pan_section_ptr(fbd, FRAMEBUFFER, 831 LOCAL_STORAGE)); 832 pan_section_pack(fbd, FRAMEBUFFER, PARAMETERS, cfg) { 833 cfg.bound_max_x = fb->width - 1; 834 cfg.bound_max_y = fb->height - 1; 835 cfg.dithering_enable = true; 836 cfg.clean_pixel_write_enable = true; 837 cfg.tie_break_rule = MALI_TIE_BREAK_RULE_MINUS_180_IN_0_OUT; 838 if (fb->rts[0].clear) { 839 cfg.clear_color_0 = fb->rts[0].clear_value[0]; 840 cfg.clear_color_1 = fb->rts[0].clear_value[1]; 841 cfg.clear_color_2 = fb->rts[0].clear_value[2]; 842 cfg.clear_color_3 = fb->rts[0].clear_value[3]; 843 } 844 845 if (fb->zs.clear.z) 846 cfg.z_clear = fb->zs.clear_value.depth; 847 848 if (fb->zs.clear.s) 849 cfg.s_clear = fb->zs.clear_value.stencil; 850 851 if (fb->rt_count && fb->rts[0].view) { 852 const struct pan_image_view *rt = fb->rts[0].view; 853 854 const struct util_format_description *desc = 855 util_format_description(rt->format); 856 857 /* The swizzle for rendering is inverted from texturing */ 858 unsigned char swizzle[4]; 859 panfrost_invert_swizzle(desc->swizzle, swizzle); 860 cfg.swizzle = panfrost_translate_swizzle_4(swizzle); 861 862 struct pan_blendable_format fmt = panfrost_blendable_formats_v7[rt->format]; 863 if (fmt.internal) { 864 cfg.internal_format = fmt.internal; 865 cfg.color_writeback_format = fmt.writeback; 866 } else { 867 unreachable("raw formats not finished for SFBD"); 868 } 869 870 unsigned level = rt->first_level; 871 struct pan_surface surf; 872 873 pan_iview_get_surface(rt, 0, 0, 0, &surf); 874 875 cfg.color_write_enable = !fb->rts[0].discard; 876 cfg.color_writeback.base = surf.data; 877 cfg.color_writeback.row_stride = 878 rt->image->layout.slices[level].row_stride; 879 880 cfg.color_block_format = mod_to_block_fmt(rt->image->layout.modifier); 881 assert(cfg.color_block_format == MALI_BLOCK_FORMAT_LINEAR || 882 cfg.color_block_format == MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED); 883 884 if (rt->image->layout.crc_mode != PAN_IMAGE_CRC_NONE) { 885 const struct pan_image_slice_layout *slice = 886 &rt->image->layout.slices[level]; 887 888 cfg.crc_buffer.row_stride = slice->crc.stride; 889 if (rt->image->layout.crc_mode == PAN_IMAGE_CRC_INBAND) { 890 cfg.crc_buffer.base = rt->image->data.bo->ptr.gpu + 891 rt->image->data.offset + 892 slice->crc.offset; 893 } else { 894 cfg.crc_buffer.base = rt->image->crc.bo->ptr.gpu + 895 rt->image->crc.offset + 896 slice->crc.offset; 897 } 898 } 899 } 900 901 if (fb->zs.view.zs) { 902 const struct pan_image_view *zs = fb->zs.view.zs; 903 unsigned level = zs->first_level; 904 struct pan_surface surf; 905 906 pan_iview_get_surface(zs, 0, 0, 0, &surf); 907 908 cfg.zs_write_enable = !fb->zs.discard.z; 909 cfg.zs_writeback.base = surf.data; 910 cfg.zs_writeback.row_stride = 911 zs->image->layout.slices[level].row_stride; 912 cfg.zs_block_format = mod_to_block_fmt(zs->image->layout.modifier); 913 assert(cfg.zs_block_format == MALI_BLOCK_FORMAT_LINEAR || 914 cfg.zs_block_format == MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED); 915 916 cfg.zs_format = translate_zs_format(zs->format); 917 } 918 919 cfg.sample_count = fb->nr_samples; 920 921 if (fb->rt_count) 922 cfg.msaa = mali_sampling_mode(fb->rts[0].view); 923 } 924 925 pan_emit_midgard_tiler(dev, fb, tiler_ctx, 926 pan_section_ptr(fbd, FRAMEBUFFER, TILER)); 927 928 /* All weights set to 0, nothing to do here */ 929 pan_section_pack(fbd, FRAMEBUFFER, TILER_WEIGHTS, w); 930 931 pan_section_pack(fbd, FRAMEBUFFER, PADDING_1, padding); 932 pan_section_pack(fbd, FRAMEBUFFER, PADDING_2, padding); 933 return 0; 934} 935#endif 936 937#if PAN_ARCH >= 6 938void 939GENX(pan_emit_tiler_heap)(const struct panfrost_device *dev, 940 void *out) 941{ 942 pan_pack(out, TILER_HEAP, heap) { 943 heap.size = dev->tiler_heap->size; 944 heap.base = dev->tiler_heap->ptr.gpu; 945 heap.bottom = dev->tiler_heap->ptr.gpu; 946 heap.top = dev->tiler_heap->ptr.gpu + dev->tiler_heap->size; 947 } 948} 949 950void 951GENX(pan_emit_tiler_ctx)(const struct panfrost_device *dev, 952 unsigned fb_width, unsigned fb_height, 953 unsigned nr_samples, 954 bool first_provoking_vertex, 955 mali_ptr heap, 956 void *out) 957{ 958 unsigned max_levels = dev->tiler_features.max_levels; 959 assert(max_levels >= 2); 960 961 pan_pack(out, TILER_CONTEXT, tiler) { 962 /* TODO: Select hierarchy mask more effectively */ 963 tiler.hierarchy_mask = (max_levels >= 8) ? 0xFF : 0x28; 964 965 /* For large framebuffers, disable the smallest bin size to 966 * avoid pathological tiler memory usage. Required to avoid OOM 967 * on dEQP-GLES31.functional.fbo.no_attachments.maximums.all on 968 * Mali-G57. 969 */ 970 if (MAX2(fb_width, fb_height) >= 4096) 971 tiler.hierarchy_mask &= ~1; 972 973 tiler.fb_width = fb_width; 974 tiler.fb_height = fb_height; 975 tiler.heap = heap; 976 tiler.sample_pattern = pan_sample_pattern(nr_samples); 977#if PAN_ARCH >= 9 978 tiler.first_provoking_vertex = first_provoking_vertex; 979#endif 980 } 981} 982#endif 983 984void 985GENX(pan_emit_fragment_job)(const struct pan_fb_info *fb, 986 mali_ptr fbd, 987 void *out) 988{ 989 pan_section_pack(out, FRAGMENT_JOB, HEADER, header) { 990 header.type = MALI_JOB_TYPE_FRAGMENT; 991 header.index = 1; 992 } 993 994 pan_section_pack(out, FRAGMENT_JOB, PAYLOAD, payload) { 995 payload.bound_min_x = fb->extent.minx >> MALI_TILE_SHIFT; 996 payload.bound_min_y = fb->extent.miny >> MALI_TILE_SHIFT; 997 payload.bound_max_x = fb->extent.maxx >> MALI_TILE_SHIFT; 998 payload.bound_max_y = fb->extent.maxy >> MALI_TILE_SHIFT; 999 payload.framebuffer = fbd; 1000 1001#if PAN_ARCH >= 5 1002 if (fb->tile_map.base) { 1003 payload.has_tile_enable_map = true; 1004 payload.tile_enable_map = fb->tile_map.base; 1005 payload.tile_enable_map_row_stride = fb->tile_map.stride; 1006 } 1007#endif 1008 } 1009} 1010