1/* 2 * Copyright (C) 2019-2022 Collabora, Ltd. 3 * Copyright (C) 2018-2019 Alyssa Rosenzweig 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 * 24 */ 25 26#include "util/macros.h" 27#include "util/u_math.h" 28#include "pan_texture.h" 29 30/* List of supported modifiers, in descending order of preference. AFBC is 31 * faster than u-interleaved tiling which is faster than linear. Within AFBC, 32 * enabling the YUV-like transform is typically a win where possible. */ 33 34uint64_t pan_best_modifiers[PAN_MODIFIER_COUNT] = { 35 DRM_FORMAT_MOD_ARM_AFBC( 36 AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 | 37 AFBC_FORMAT_MOD_TILED | 38 AFBC_FORMAT_MOD_SC | 39 AFBC_FORMAT_MOD_SPARSE | 40 AFBC_FORMAT_MOD_YTR), 41 42 DRM_FORMAT_MOD_ARM_AFBC( 43 AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 | 44 AFBC_FORMAT_MOD_TILED | 45 AFBC_FORMAT_MOD_SC | 46 AFBC_FORMAT_MOD_SPARSE), 47 48 DRM_FORMAT_MOD_ARM_AFBC( 49 AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 | 50 AFBC_FORMAT_MOD_SPARSE | 51 AFBC_FORMAT_MOD_YTR), 52 53 DRM_FORMAT_MOD_ARM_AFBC( 54 AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 | 55 AFBC_FORMAT_MOD_SPARSE), 56 57 DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED, 58 DRM_FORMAT_MOD_LINEAR 59}; 60 61/* Table of AFBC superblock sizes */ 62static const struct pan_block_size 63afbc_superblock_sizes[] = { 64 [AFBC_FORMAT_MOD_BLOCK_SIZE_16x16] = { 16, 16 }, 65 [AFBC_FORMAT_MOD_BLOCK_SIZE_32x8] = { 32, 8 }, 66 [AFBC_FORMAT_MOD_BLOCK_SIZE_64x4] = { 64, 4 }, 67}; 68 69/* 70 * Given an AFBC modifier, return the superblock size. 71 * 72 * We do not yet have any use cases for multiplanar YCBCr formats with different 73 * superblock sizes on the luma and chroma planes. These formats are unsupported 74 * for now. 75 */ 76struct pan_block_size 77panfrost_afbc_superblock_size(uint64_t modifier) 78{ 79 unsigned index = (modifier & AFBC_FORMAT_MOD_BLOCK_SIZE_MASK); 80 81 assert(drm_is_afbc(modifier)); 82 assert(index < ARRAY_SIZE(afbc_superblock_sizes)); 83 84 return afbc_superblock_sizes[index]; 85} 86 87/* 88 * Given an AFBC modifier, return the width of the superblock. 89 */ 90unsigned 91panfrost_afbc_superblock_width(uint64_t modifier) 92{ 93 return panfrost_afbc_superblock_size(modifier).width; 94} 95 96/* 97 * Given an AFBC modifier, return the height of the superblock. 98 */ 99unsigned 100panfrost_afbc_superblock_height(uint64_t modifier) 101{ 102 return panfrost_afbc_superblock_size(modifier).height; 103} 104 105/* 106 * Given an AFBC modifier, return if "wide blocks" are used. Wide blocks are 107 * defined as superblocks wider than 16 pixels, the minimum (and default) super 108 * block width. 109 */ 110bool 111panfrost_afbc_is_wide(uint64_t modifier) 112{ 113 return panfrost_afbc_superblock_width(modifier) > 16; 114} 115 116/* 117 * Given a format, determine the tile size used for u-interleaving. For formats 118 * that are already block compressed, this is 4x4. For all other formats, this 119 * is 16x16, hence the modifier name. 120 */ 121static inline struct pan_block_size 122panfrost_u_interleaved_tile_size(enum pipe_format format) 123{ 124 if (util_format_is_compressed(format)) 125 return (struct pan_block_size) { 4, 4 }; 126 else 127 return (struct pan_block_size) { 16, 16 }; 128} 129 130/* 131 * Determine the block size used for interleaving. For u-interleaving, this is 132 * the tile size. For AFBC, this is the superblock size. For linear textures, 133 * this is trivially 1x1. 134 */ 135struct pan_block_size 136panfrost_block_size(uint64_t modifier, enum pipe_format format) 137{ 138 if (modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED) 139 return panfrost_u_interleaved_tile_size(format); 140 else if (drm_is_afbc(modifier)) 141 return panfrost_afbc_superblock_size(modifier); 142 else 143 return (struct pan_block_size) { 1, 1 }; 144} 145 146/* 147 * Determine the tile size used by AFBC. This tiles superblocks themselves. 148 * Current GPUs support either 8x8 tiling or no tiling (1x1) 149 */ 150static inline unsigned 151pan_afbc_tile_size(uint64_t modifier) 152{ 153 return (modifier & AFBC_FORMAT_MOD_TILED) ? 8 : 1; 154} 155 156/* 157 * Determine the number of bytes between header rows for an AFBC image. For an 158 * image with linear headers, this is simply the number of header blocks 159 * (=superblocks) per row times the numbers of bytes per header block. For an 160 * image with linear headers, this is multipled by the number of rows of 161 * header blocks are in a tile together. 162 */ 163uint32_t 164pan_afbc_row_stride(uint64_t modifier, uint32_t width) 165{ 166 unsigned block_width = panfrost_afbc_superblock_width(modifier); 167 168 return (width / block_width) * pan_afbc_tile_size(modifier) * 169 AFBC_HEADER_BYTES_PER_TILE; 170} 171 172/* 173 * Determine the number of header blocks between header rows. This is equal to 174 * the number of bytes between header rows divided by the bytes per blocks of a 175 * header tile. This is also divided by the tile size to give a "line stride" in 176 * blocks, rather than a real row stride. This is required by Bifrost. 177 */ 178uint32_t 179pan_afbc_stride_blocks(uint64_t modifier, uint32_t row_stride_bytes) 180{ 181 return row_stride_bytes / 182 (AFBC_HEADER_BYTES_PER_TILE * pan_afbc_tile_size(modifier)); 183} 184 185/* 186 * Determine the required alignment for the body offset of an AFBC image. For 187 * now, this depends only on whether tiling is in use. These minimum alignments 188 * are required on all current GPUs. 189 */ 190static inline uint32_t 191pan_afbc_body_align(uint64_t modifier) 192{ 193 return (modifier & AFBC_FORMAT_MOD_TILED) ? 4096 : 64; 194} 195 196/* Computes sizes for checksumming, which is 8 bytes per 16x16 tile. 197 * Checksumming is believed to be a CRC variant (CRC64 based on the size?). 198 * This feature is also known as "transaction elimination". */ 199 200#define CHECKSUM_TILE_WIDTH 16 201#define CHECKSUM_TILE_HEIGHT 16 202#define CHECKSUM_BYTES_PER_TILE 8 203 204unsigned 205panfrost_compute_checksum_size( 206 struct pan_image_slice_layout *slice, 207 unsigned width, 208 unsigned height) 209{ 210 unsigned tile_count_x = DIV_ROUND_UP(width, CHECKSUM_TILE_WIDTH); 211 unsigned tile_count_y = DIV_ROUND_UP(height, CHECKSUM_TILE_HEIGHT); 212 213 slice->crc.stride = tile_count_x * CHECKSUM_BYTES_PER_TILE; 214 215 return slice->crc.stride * tile_count_y; 216} 217 218unsigned 219panfrost_get_layer_stride(const struct pan_image_layout *layout, 220 unsigned level) 221{ 222 if (layout->dim != MALI_TEXTURE_DIMENSION_3D) 223 return layout->array_stride; 224 else if (drm_is_afbc(layout->modifier)) 225 return layout->slices[level].afbc.surface_stride; 226 else 227 return layout->slices[level].surface_stride; 228} 229 230unsigned 231panfrost_get_legacy_stride(const struct pan_image_layout *layout, 232 unsigned level) 233{ 234 unsigned row_stride = layout->slices[level].row_stride; 235 struct pan_block_size block_size = 236 panfrost_block_size(layout->modifier, layout->format); 237 238 if (drm_is_afbc(layout->modifier)) { 239 unsigned width = u_minify(layout->width, level); 240 width = ALIGN_POT(width, block_size.width); 241 242 return width * util_format_get_blocksize(layout->format); 243 } else { 244 return row_stride / block_size.height; 245 } 246} 247 248unsigned 249panfrost_from_legacy_stride(unsigned legacy_stride, 250 enum pipe_format format, 251 uint64_t modifier) 252{ 253 struct pan_block_size block_size = 254 panfrost_block_size(modifier, format); 255 256 if (drm_is_afbc(modifier)) { 257 unsigned width = legacy_stride / util_format_get_blocksize(format); 258 259 return pan_afbc_row_stride(modifier, width); 260 } else { 261 return legacy_stride * block_size.height; 262 } 263} 264 265/* Computes the offset into a texture at a particular level/face. Add to 266 * the base address of a texture to get the address to that level/face */ 267 268unsigned 269panfrost_texture_offset(const struct pan_image_layout *layout, 270 unsigned level, unsigned array_idx, 271 unsigned surface_idx) 272{ 273 return layout->slices[level].offset + 274 (array_idx * layout->array_stride) + 275 (surface_idx * layout->slices[level].surface_stride); 276} 277 278/* 279 * Return the minimum stride alignment in bytes for a given texture format. 280 * 281 * There is no format on any supported Mali with a minimum alignment greater 282 * than 64 bytes, but 64 bytes is the required alignment of all regular formats 283 * in v7 and newer. If this alignment is not met, imprecise faults may be 284 * raised. 285 * 286 * This may not be necessary on older hardware but we enforce it there too for 287 * uniformity. If this poses a problem there, we'll need a solution that can 288 * handle v7 as well. 289 * 290 * Certain non-regular formats require smaller power-of-two alignments. 291 * This requirement could be loosened in the future if there is a compelling 292 * reason, by making this query more precise. 293 */ 294uint32_t 295pan_stride_align_B(UNUSED enum pipe_format format) 296{ 297 return 64; 298} 299 300bool 301pan_is_stride_aligned(enum pipe_format format, uint32_t stride_B) 302{ 303 return (stride_B % pan_stride_align_B(format)) == 0; 304} 305 306bool 307pan_image_layout_init(struct pan_image_layout *layout, 308 const struct pan_image_explicit_layout *explicit_layout) 309{ 310 /* Explicit stride only work with non-mipmap, non-array; single-sample 311 * 2D image, and in-band CRC can't be used. 312 */ 313 if (explicit_layout && 314 (layout->depth > 1 || layout->nr_samples > 1 || 315 layout->array_size > 1 || layout->dim != MALI_TEXTURE_DIMENSION_2D || 316 layout->nr_slices > 1 || layout->crc_mode == PAN_IMAGE_CRC_INBAND)) 317 return false; 318 319 /* Require both offsets and strides to be aligned to the hardware 320 * requirement. Panfrost allocates offsets and strides like this, so 321 * this requirement is satisfied by any image that was exported from 322 * another process with Panfrost. However, it does restrict imports of 323 * EGL external images. 324 */ 325 if (explicit_layout && 326 !(pan_is_stride_aligned(layout->format, explicit_layout->offset) && 327 pan_is_stride_aligned(layout->format, explicit_layout->row_stride))) 328 return false; 329 330 unsigned fmt_blocksize = util_format_get_blocksize(layout->format); 331 332 /* MSAA is implemented as a 3D texture with z corresponding to the 333 * sample #, horrifyingly enough */ 334 335 assert(layout->depth == 1 || layout->nr_samples == 1); 336 337 bool afbc = drm_is_afbc(layout->modifier); 338 bool linear = layout->modifier == DRM_FORMAT_MOD_LINEAR; 339 bool is_3d = layout->dim == MALI_TEXTURE_DIMENSION_3D; 340 341 unsigned oob_crc_offset = 0; 342 unsigned offset = explicit_layout ? explicit_layout->offset : 0; 343 struct pan_block_size block_size = 344 panfrost_block_size(layout->modifier, layout->format); 345 346 unsigned width = layout->width; 347 unsigned height = layout->height; 348 unsigned depth = layout->depth; 349 350 unsigned align_w = block_size.width; 351 unsigned align_h = block_size.height; 352 353 /* For tiled AFBC, align to tiles of superblocks (this can be large) */ 354 if (afbc) { 355 align_w *= pan_afbc_tile_size(layout->modifier); 356 align_h *= pan_afbc_tile_size(layout->modifier); 357 } 358 359 for (unsigned l = 0; l < layout->nr_slices; ++l) { 360 struct pan_image_slice_layout *slice = &layout->slices[l]; 361 362 unsigned effective_width = ALIGN_POT(util_format_get_nblocksx(layout->format, width), align_w); 363 unsigned effective_height = ALIGN_POT(util_format_get_nblocksy(layout->format, height), align_h); 364 365 /* Align levels to cache-line as a performance improvement for 366 * linear/tiled and as a requirement for AFBC */ 367 368 offset = ALIGN_POT(offset, 64); 369 370 slice->offset = offset; 371 372 unsigned row_stride = fmt_blocksize * effective_width * block_size.height; 373 374 if (explicit_layout && !afbc) { 375 /* Make sure the explicit stride is valid */ 376 if (explicit_layout->row_stride < row_stride) 377 return false; 378 379 row_stride = explicit_layout->row_stride; 380 } else if (linear) { 381 /* Keep lines alignment on 64 byte for performance. 382 * 383 * Note that this is a multiple of the minimum 384 * stride alignment, so the hardware requirement is 385 * satisfied as a result. 386 */ 387 row_stride = ALIGN_POT(row_stride, 64); 388 } 389 390 391 assert(pan_is_stride_aligned(layout->format, row_stride) && 392 "alignment gauranteed in both code paths"); 393 394 unsigned slice_one_size = row_stride * (effective_height / block_size.height); 395 396 /* Compute AFBC sizes if necessary */ 397 if (afbc) { 398 slice->row_stride = 399 pan_afbc_row_stride(layout->modifier, effective_width); 400 slice->afbc.header_size = 401 ALIGN_POT(slice->row_stride * (effective_height / align_h), 402 pan_afbc_body_align(layout->modifier)); 403 404 if (explicit_layout && explicit_layout->row_stride < slice->row_stride) 405 return false; 406 407 /* AFBC body size */ 408 slice->afbc.body_size = slice_one_size; 409 410 /* 3D AFBC resources have all headers placed at the 411 * beginning instead of having them split per depth 412 * level 413 */ 414 if (is_3d) { 415 slice->afbc.surface_stride = 416 slice->afbc.header_size; 417 slice->afbc.header_size *= depth; 418 slice->afbc.body_size *= depth; 419 offset += slice->afbc.header_size; 420 } else { 421 slice_one_size += slice->afbc.header_size; 422 slice->afbc.surface_stride = slice_one_size; 423 } 424 } else { 425 slice->row_stride = row_stride; 426 } 427 428 unsigned slice_full_size = 429 slice_one_size * depth * layout->nr_samples; 430 431 slice->surface_stride = slice_one_size; 432 433 assert(pan_is_stride_aligned(layout->format, slice->surface_stride) && 434 "integer multiple of aligned is still aligned, " 435 "and AFBC header is at least 64 byte aligned"); 436 437 /* Compute AFBC sizes if necessary */ 438 439 offset += slice_full_size; 440 slice->size = slice_full_size; 441 442 /* Add a checksum region if necessary */ 443 if (layout->crc_mode != PAN_IMAGE_CRC_NONE) { 444 slice->crc.size = 445 panfrost_compute_checksum_size(slice, width, height); 446 447 if (layout->crc_mode == PAN_IMAGE_CRC_INBAND) { 448 slice->crc.offset = offset; 449 offset += slice->crc.size; 450 slice->size += slice->crc.size; 451 } else { 452 slice->crc.offset = oob_crc_offset; 453 oob_crc_offset += slice->crc.size; 454 } 455 } 456 457 width = u_minify(width, 1); 458 height = u_minify(height, 1); 459 depth = u_minify(depth, 1); 460 } 461 462 /* Arrays and cubemaps have the entire miptree duplicated */ 463 layout->array_stride = ALIGN_POT(offset, 64); 464 if (explicit_layout) 465 layout->data_size = offset; 466 else 467 layout->data_size = ALIGN_POT(layout->array_stride * layout->array_size, 4096); 468 layout->crc_size = oob_crc_offset; 469 470 return true; 471} 472 473void 474pan_iview_get_surface(const struct pan_image_view *iview, 475 unsigned level, unsigned layer, unsigned sample, 476 struct pan_surface *surf) 477{ 478 level += iview->first_level; 479 assert(level < iview->image->layout.nr_slices); 480 481 layer += iview->first_layer; 482 483 bool is_3d = iview->image->layout.dim == MALI_TEXTURE_DIMENSION_3D; 484 const struct pan_image_slice_layout *slice = &iview->image->layout.slices[level]; 485 mali_ptr base = iview->image->data.bo->ptr.gpu + iview->image->data.offset; 486 487 if (drm_is_afbc(iview->image->layout.modifier)) { 488 assert(!sample); 489 490 if (is_3d) { 491 ASSERTED unsigned depth = u_minify(iview->image->layout.depth, level); 492 assert(layer < depth); 493 surf->afbc.header = base + slice->offset + 494 (layer * slice->afbc.surface_stride); 495 surf->afbc.body = base + slice->offset + 496 slice->afbc.header_size + 497 (slice->surface_stride * layer); 498 } else { 499 assert(layer < iview->image->layout.array_size); 500 surf->afbc.header = base + 501 panfrost_texture_offset(&iview->image->layout, 502 level, layer, 0); 503 surf->afbc.body = surf->afbc.header + slice->afbc.header_size; 504 } 505 } else { 506 unsigned array_idx = is_3d ? 0 : layer; 507 unsigned surface_idx = is_3d ? layer : sample; 508 509 surf->data = base + 510 panfrost_texture_offset(&iview->image->layout, level, 511 array_idx, surface_idx); 512 } 513} 514