1/*
2 * Copyright (C) 2019-2022 Collabora, Ltd.
3 * Copyright (C) 2018-2019 Alyssa Rosenzweig
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 */
25
26#include "util/macros.h"
27#include "util/u_math.h"
28#include "pan_texture.h"
29
30/* List of supported modifiers, in descending order of preference. AFBC is
31 * faster than u-interleaved tiling which is faster than linear. Within AFBC,
32 * enabling the YUV-like transform is typically a win where possible. */
33
34uint64_t pan_best_modifiers[PAN_MODIFIER_COUNT] = {
35        DRM_FORMAT_MOD_ARM_AFBC(
36                AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
37                AFBC_FORMAT_MOD_TILED |
38                AFBC_FORMAT_MOD_SC |
39                AFBC_FORMAT_MOD_SPARSE |
40                AFBC_FORMAT_MOD_YTR),
41
42        DRM_FORMAT_MOD_ARM_AFBC(
43                AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
44                AFBC_FORMAT_MOD_TILED |
45                AFBC_FORMAT_MOD_SC |
46                AFBC_FORMAT_MOD_SPARSE),
47
48        DRM_FORMAT_MOD_ARM_AFBC(
49                AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
50                AFBC_FORMAT_MOD_SPARSE |
51                AFBC_FORMAT_MOD_YTR),
52
53        DRM_FORMAT_MOD_ARM_AFBC(
54                AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
55                AFBC_FORMAT_MOD_SPARSE),
56
57        DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED,
58        DRM_FORMAT_MOD_LINEAR
59};
60
61/* Table of AFBC superblock sizes */
62static const struct pan_block_size
63afbc_superblock_sizes[] = {
64        [AFBC_FORMAT_MOD_BLOCK_SIZE_16x16]      = { 16, 16 },
65        [AFBC_FORMAT_MOD_BLOCK_SIZE_32x8]       = { 32,  8 },
66        [AFBC_FORMAT_MOD_BLOCK_SIZE_64x4]       = { 64,  4 },
67};
68
69/*
70 * Given an AFBC modifier, return the superblock size.
71 *
72 * We do not yet have any use cases for multiplanar YCBCr formats with different
73 * superblock sizes on the luma and chroma planes. These formats are unsupported
74 * for now.
75 */
76struct pan_block_size
77panfrost_afbc_superblock_size(uint64_t modifier)
78{
79        unsigned index = (modifier & AFBC_FORMAT_MOD_BLOCK_SIZE_MASK);
80
81        assert(drm_is_afbc(modifier));
82        assert(index < ARRAY_SIZE(afbc_superblock_sizes));
83
84        return afbc_superblock_sizes[index];
85}
86
87/*
88 * Given an AFBC modifier, return the width of the superblock.
89 */
90unsigned
91panfrost_afbc_superblock_width(uint64_t modifier)
92{
93        return panfrost_afbc_superblock_size(modifier).width;
94}
95
96/*
97 * Given an AFBC modifier, return the height of the superblock.
98 */
99unsigned
100panfrost_afbc_superblock_height(uint64_t modifier)
101{
102        return panfrost_afbc_superblock_size(modifier).height;
103}
104
105/*
106 * Given an AFBC modifier, return if "wide blocks" are used. Wide blocks are
107 * defined as superblocks wider than 16 pixels, the minimum (and default) super
108 * block width.
109 */
110bool
111panfrost_afbc_is_wide(uint64_t modifier)
112{
113        return panfrost_afbc_superblock_width(modifier) > 16;
114}
115
116/*
117 * Given a format, determine the tile size used for u-interleaving. For formats
118 * that are already block compressed, this is 4x4. For all other formats, this
119 * is 16x16, hence the modifier name.
120 */
121static inline struct pan_block_size
122panfrost_u_interleaved_tile_size(enum pipe_format format)
123{
124        if (util_format_is_compressed(format))
125                return (struct pan_block_size) {  4,  4 };
126        else
127                return (struct pan_block_size) { 16, 16 };
128}
129
130/*
131 * Determine the block size used for interleaving. For u-interleaving, this is
132 * the tile size. For AFBC, this is the superblock size. For linear textures,
133 * this is trivially 1x1.
134 */
135struct pan_block_size
136panfrost_block_size(uint64_t modifier, enum pipe_format format)
137{
138        if (modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED)
139                return panfrost_u_interleaved_tile_size(format);
140        else if (drm_is_afbc(modifier))
141                return panfrost_afbc_superblock_size(modifier);
142        else
143                return (struct pan_block_size) { 1, 1 };
144}
145
146/*
147 * Determine the tile size used by AFBC. This tiles superblocks themselves.
148 * Current GPUs support either 8x8 tiling or no tiling (1x1)
149 */
150static inline unsigned
151pan_afbc_tile_size(uint64_t modifier)
152{
153        return (modifier & AFBC_FORMAT_MOD_TILED) ? 8 : 1;
154}
155
156/*
157 * Determine the number of bytes between header rows for an AFBC image. For an
158 * image with linear headers, this is simply the number of header blocks
159 * (=superblocks) per row times the numbers of bytes per header block. For an
160 * image with linear headers, this is multipled by the number of rows of
161 * header blocks are in a tile together.
162 */
163uint32_t
164pan_afbc_row_stride(uint64_t modifier, uint32_t width)
165{
166        unsigned block_width = panfrost_afbc_superblock_width(modifier);
167
168        return (width / block_width) * pan_afbc_tile_size(modifier) *
169                AFBC_HEADER_BYTES_PER_TILE;
170}
171
172/*
173 * Determine the number of header blocks between header rows. This is equal to
174 * the number of bytes between header rows divided by the bytes per blocks of a
175 * header tile. This is also divided by the tile size to give a "line stride" in
176 * blocks, rather than a real row stride. This is required by Bifrost.
177 */
178uint32_t
179pan_afbc_stride_blocks(uint64_t modifier, uint32_t row_stride_bytes)
180{
181        return row_stride_bytes /
182               (AFBC_HEADER_BYTES_PER_TILE * pan_afbc_tile_size(modifier));
183}
184
185/*
186 * Determine the required alignment for the body offset of an AFBC image. For
187 * now, this depends only on whether tiling is in use. These minimum alignments
188 * are required on all current GPUs.
189 */
190static inline uint32_t
191pan_afbc_body_align(uint64_t modifier)
192{
193        return (modifier & AFBC_FORMAT_MOD_TILED) ? 4096 : 64;
194}
195
196/* Computes sizes for checksumming, which is 8 bytes per 16x16 tile.
197 * Checksumming is believed to be a CRC variant (CRC64 based on the size?).
198 * This feature is also known as "transaction elimination". */
199
200#define CHECKSUM_TILE_WIDTH 16
201#define CHECKSUM_TILE_HEIGHT 16
202#define CHECKSUM_BYTES_PER_TILE 8
203
204unsigned
205panfrost_compute_checksum_size(
206        struct pan_image_slice_layout *slice,
207        unsigned width,
208        unsigned height)
209{
210        unsigned tile_count_x = DIV_ROUND_UP(width, CHECKSUM_TILE_WIDTH);
211        unsigned tile_count_y = DIV_ROUND_UP(height, CHECKSUM_TILE_HEIGHT);
212
213        slice->crc.stride = tile_count_x * CHECKSUM_BYTES_PER_TILE;
214
215        return slice->crc.stride * tile_count_y;
216}
217
218unsigned
219panfrost_get_layer_stride(const struct pan_image_layout *layout,
220                          unsigned level)
221{
222        if (layout->dim != MALI_TEXTURE_DIMENSION_3D)
223                return layout->array_stride;
224        else if (drm_is_afbc(layout->modifier))
225                return layout->slices[level].afbc.surface_stride;
226        else
227                return layout->slices[level].surface_stride;
228}
229
230unsigned
231panfrost_get_legacy_stride(const struct pan_image_layout *layout,
232                           unsigned level)
233{
234        unsigned row_stride = layout->slices[level].row_stride;
235        struct pan_block_size block_size =
236                panfrost_block_size(layout->modifier, layout->format);
237
238        if (drm_is_afbc(layout->modifier)) {
239                unsigned width = u_minify(layout->width, level);
240                width = ALIGN_POT(width, block_size.width);
241
242                return width * util_format_get_blocksize(layout->format);
243        } else {
244                return row_stride / block_size.height;
245        }
246}
247
248unsigned
249panfrost_from_legacy_stride(unsigned legacy_stride,
250                            enum pipe_format format,
251                            uint64_t modifier)
252{
253        struct pan_block_size block_size =
254                panfrost_block_size(modifier, format);
255
256        if (drm_is_afbc(modifier)) {
257                unsigned width = legacy_stride / util_format_get_blocksize(format);
258
259                return pan_afbc_row_stride(modifier, width);
260        } else {
261                return legacy_stride * block_size.height;
262        }
263}
264
265/* Computes the offset into a texture at a particular level/face. Add to
266 * the base address of a texture to get the address to that level/face */
267
268unsigned
269panfrost_texture_offset(const struct pan_image_layout *layout,
270                        unsigned level, unsigned array_idx,
271                        unsigned surface_idx)
272{
273        return layout->slices[level].offset +
274               (array_idx * layout->array_stride) +
275               (surface_idx * layout->slices[level].surface_stride);
276}
277
278/*
279 * Return the minimum stride alignment in bytes for a given texture format.
280 *
281 * There is no format on any supported Mali with a minimum alignment greater
282 * than 64 bytes, but 64 bytes is the required alignment of all regular formats
283 * in v7 and newer. If this alignment is not met, imprecise faults may be
284 * raised.
285 *
286 * This may not be necessary on older hardware but we enforce it there too for
287 * uniformity. If this poses a problem there, we'll need a solution that can
288 * handle v7 as well.
289 *
290 * Certain non-regular formats require smaller power-of-two alignments.
291 * This requirement could be loosened in the future if there is a compelling
292 * reason, by making this query more precise.
293 */
294uint32_t
295pan_stride_align_B(UNUSED enum pipe_format format)
296{
297        return 64;
298}
299
300bool
301pan_is_stride_aligned(enum pipe_format format, uint32_t stride_B)
302{
303        return (stride_B % pan_stride_align_B(format)) == 0;
304}
305
306bool
307pan_image_layout_init(struct pan_image_layout *layout,
308                      const struct pan_image_explicit_layout *explicit_layout)
309{
310        /* Explicit stride only work with non-mipmap, non-array; single-sample
311         * 2D image, and in-band CRC can't be used.
312         */
313        if (explicit_layout &&
314	    (layout->depth > 1 || layout->nr_samples > 1 ||
315             layout->array_size > 1 || layout->dim != MALI_TEXTURE_DIMENSION_2D ||
316             layout->nr_slices > 1 || layout->crc_mode == PAN_IMAGE_CRC_INBAND))
317                return false;
318
319        /* Require both offsets and strides to be aligned to the hardware
320         * requirement. Panfrost allocates offsets and strides like this, so
321         * this requirement is satisfied by any image that was exported from
322         * another process with Panfrost. However, it does restrict imports of
323         * EGL external images.
324         */
325        if (explicit_layout &&
326            !(pan_is_stride_aligned(layout->format, explicit_layout->offset) &&
327              pan_is_stride_aligned(layout->format, explicit_layout->row_stride)))
328                return false;
329
330        unsigned fmt_blocksize = util_format_get_blocksize(layout->format);
331
332        /* MSAA is implemented as a 3D texture with z corresponding to the
333         * sample #, horrifyingly enough */
334
335        assert(layout->depth == 1 || layout->nr_samples == 1);
336
337        bool afbc = drm_is_afbc(layout->modifier);
338        bool linear = layout->modifier == DRM_FORMAT_MOD_LINEAR;
339        bool is_3d = layout->dim == MALI_TEXTURE_DIMENSION_3D;
340
341        unsigned oob_crc_offset = 0;
342        unsigned offset = explicit_layout ? explicit_layout->offset : 0;
343        struct pan_block_size block_size =
344                panfrost_block_size(layout->modifier, layout->format);
345
346        unsigned width = layout->width;
347        unsigned height = layout->height;
348        unsigned depth = layout->depth;
349
350        unsigned align_w = block_size.width;
351        unsigned align_h = block_size.height;
352
353        /* For tiled AFBC, align to tiles of superblocks (this can be large) */
354        if (afbc) {
355                align_w *= pan_afbc_tile_size(layout->modifier);
356                align_h *= pan_afbc_tile_size(layout->modifier);
357        }
358
359        for (unsigned l = 0; l < layout->nr_slices; ++l) {
360                struct pan_image_slice_layout *slice = &layout->slices[l];
361
362                unsigned effective_width = ALIGN_POT(util_format_get_nblocksx(layout->format, width), align_w);
363                unsigned effective_height = ALIGN_POT(util_format_get_nblocksy(layout->format, height), align_h);
364
365                /* Align levels to cache-line as a performance improvement for
366                 * linear/tiled and as a requirement for AFBC */
367
368                offset = ALIGN_POT(offset, 64);
369
370                slice->offset = offset;
371
372                unsigned row_stride = fmt_blocksize * effective_width * block_size.height;
373
374                if (explicit_layout && !afbc) {
375                        /* Make sure the explicit stride is valid */
376                        if (explicit_layout->row_stride < row_stride)
377                                return false;
378
379                        row_stride = explicit_layout->row_stride;
380                } else if (linear) {
381                        /* Keep lines alignment on 64 byte for performance.
382                         *
383                         * Note that this is a multiple of the minimum
384                         * stride alignment, so the hardware requirement is
385                         * satisfied as a result.
386                         */
387                        row_stride = ALIGN_POT(row_stride, 64);
388                }
389
390
391                assert(pan_is_stride_aligned(layout->format, row_stride) &&
392                       "alignment gauranteed in both code paths");
393
394                unsigned slice_one_size = row_stride * (effective_height / block_size.height);
395
396                /* Compute AFBC sizes if necessary */
397                if (afbc) {
398                        slice->row_stride =
399                                pan_afbc_row_stride(layout->modifier, effective_width);
400                        slice->afbc.header_size =
401                                ALIGN_POT(slice->row_stride * (effective_height / align_h),
402                                          pan_afbc_body_align(layout->modifier));
403
404                        if (explicit_layout && explicit_layout->row_stride < slice->row_stride)
405                                return false;
406
407                        /* AFBC body size */
408                        slice->afbc.body_size = slice_one_size;
409
410                        /* 3D AFBC resources have all headers placed at the
411                         * beginning instead of having them split per depth
412                         * level
413                         */
414                        if (is_3d) {
415                                slice->afbc.surface_stride =
416                                        slice->afbc.header_size;
417                                slice->afbc.header_size *= depth;
418                                slice->afbc.body_size *= depth;
419                                offset += slice->afbc.header_size;
420                        } else {
421                                slice_one_size += slice->afbc.header_size;
422                                slice->afbc.surface_stride = slice_one_size;
423                        }
424                } else {
425                        slice->row_stride = row_stride;
426                }
427
428                unsigned slice_full_size =
429                        slice_one_size * depth * layout->nr_samples;
430
431                slice->surface_stride = slice_one_size;
432
433                assert(pan_is_stride_aligned(layout->format, slice->surface_stride) &&
434                       "integer multiple of aligned is still aligned, "
435                       "and AFBC header is at least 64 byte aligned");
436
437                /* Compute AFBC sizes if necessary */
438
439                offset += slice_full_size;
440                slice->size = slice_full_size;
441
442                /* Add a checksum region if necessary */
443                if (layout->crc_mode != PAN_IMAGE_CRC_NONE) {
444                        slice->crc.size =
445                                panfrost_compute_checksum_size(slice, width, height);
446
447                        if (layout->crc_mode == PAN_IMAGE_CRC_INBAND) {
448                                slice->crc.offset = offset;
449                                offset += slice->crc.size;
450                                slice->size += slice->crc.size;
451                        } else {
452                                slice->crc.offset = oob_crc_offset;
453                                oob_crc_offset += slice->crc.size;
454                        }
455                }
456
457                width = u_minify(width, 1);
458                height = u_minify(height, 1);
459                depth = u_minify(depth, 1);
460        }
461
462        /* Arrays and cubemaps have the entire miptree duplicated */
463        layout->array_stride = ALIGN_POT(offset, 64);
464        if (explicit_layout)
465                layout->data_size = offset;
466        else
467                layout->data_size = ALIGN_POT(layout->array_stride * layout->array_size, 4096);
468        layout->crc_size = oob_crc_offset;
469
470        return true;
471}
472
473void
474pan_iview_get_surface(const struct pan_image_view *iview,
475                      unsigned level, unsigned layer, unsigned sample,
476                      struct pan_surface *surf)
477{
478        level += iview->first_level;
479        assert(level < iview->image->layout.nr_slices);
480
481       layer += iview->first_layer;
482
483        bool is_3d = iview->image->layout.dim == MALI_TEXTURE_DIMENSION_3D;
484        const struct pan_image_slice_layout *slice = &iview->image->layout.slices[level];
485        mali_ptr base = iview->image->data.bo->ptr.gpu + iview->image->data.offset;
486
487        if (drm_is_afbc(iview->image->layout.modifier)) {
488                assert(!sample);
489
490                if (is_3d) {
491                        ASSERTED unsigned depth = u_minify(iview->image->layout.depth, level);
492                        assert(layer < depth);
493                        surf->afbc.header = base + slice->offset +
494                                           (layer * slice->afbc.surface_stride);
495                        surf->afbc.body = base + slice->offset +
496                                          slice->afbc.header_size +
497                                          (slice->surface_stride * layer);
498                } else {
499                        assert(layer < iview->image->layout.array_size);
500                        surf->afbc.header = base +
501                                            panfrost_texture_offset(&iview->image->layout,
502                                                                    level, layer, 0);
503                        surf->afbc.body = surf->afbc.header + slice->afbc.header_size;
504                }
505        } else {
506                unsigned array_idx = is_3d ? 0 : layer;
507                unsigned surface_idx = is_3d ? layer : sample;
508
509                surf->data = base +
510                             panfrost_texture_offset(&iview->image->layout, level,
511                                                     array_idx, surface_idx);
512        }
513}
514