xref: /third_party/mesa3d/src/panfrost/lib/pan_cs.c (revision bf215546)
1/*
2 * Copyright (C) 2021 Collabora, Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 *   Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25 *   Boris Brezillon <boris.brezillon@collabora.com>
26 */
27
28#include "util/macros.h"
29
30
31#include "pan_cs.h"
32#include "pan_encoder.h"
33#include "pan_texture.h"
34
35static unsigned
36mod_to_block_fmt(uint64_t mod)
37{
38        switch (mod) {
39        case DRM_FORMAT_MOD_LINEAR:
40                return MALI_BLOCK_FORMAT_LINEAR;
41	case DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED:
42                return MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED;
43        default:
44#if PAN_ARCH >= 5
45                if (drm_is_afbc(mod) && !(mod & AFBC_FORMAT_MOD_TILED))
46                        return MALI_BLOCK_FORMAT_AFBC;
47#endif
48#if PAN_ARCH >= 7
49                if (drm_is_afbc(mod) && (mod & AFBC_FORMAT_MOD_TILED))
50                        return MALI_BLOCK_FORMAT_AFBC_TILED;
51#endif
52
53                unreachable("Unsupported modifer");
54        }
55}
56
57static enum mali_msaa
58mali_sampling_mode(const struct pan_image_view *view)
59{
60        if (view->image->layout.nr_samples > 1) {
61                assert(view->nr_samples == view->image->layout.nr_samples);
62                assert(view->image->layout.slices[0].surface_stride != 0);
63                return MALI_MSAA_LAYERED;
64        }
65
66        if (view->nr_samples > view->image->layout.nr_samples) {
67                assert(view->image->layout.nr_samples == 1);
68                return MALI_MSAA_AVERAGE;
69        }
70
71        assert(view->nr_samples == view->image->layout.nr_samples);
72        assert(view->nr_samples == 1);
73
74        return MALI_MSAA_SINGLE;
75}
76
77static inline enum mali_sample_pattern
78pan_sample_pattern(unsigned samples)
79{
80        switch (samples) {
81        case 1:  return MALI_SAMPLE_PATTERN_SINGLE_SAMPLED;
82        case 4:  return MALI_SAMPLE_PATTERN_ROTATED_4X_GRID;
83        case 8:  return MALI_SAMPLE_PATTERN_D3D_8X_GRID;
84        case 16: return MALI_SAMPLE_PATTERN_D3D_16X_GRID;
85        default: unreachable("Unsupported sample count");
86        }
87}
88
89int
90GENX(pan_select_crc_rt)(const struct pan_fb_info *fb, unsigned tile_size)
91{
92        /* Disable CRC when the tile size is not 16x16. In the hardware, CRC
93         * tiles are the same size as the tiles of the framebuffer. However,
94         * our code only handles 16x16 tiles. Therefore under the current
95         * implementation, we must disable CRC when 16x16 tiles are not used.
96         *
97         * This may hurt performance. However, smaller tile sizes are rare, and
98         * CRCs are more expensive at smaller tile sizes, reducing the benefit.
99         * Restricting CRC to 16x16 should work in practice.
100         */
101        if (tile_size != 16 * 16) {
102                assert(tile_size < 16 * 16);
103                return -1;
104        }
105
106#if PAN_ARCH <= 6
107        if (fb->rt_count == 1 && fb->rts[0].view && !fb->rts[0].discard &&
108            fb->rts[0].view->image->layout.crc_mode != PAN_IMAGE_CRC_NONE)
109                return 0;
110
111        return -1;
112#else
113        bool best_rt_valid = false;
114        int best_rt = -1;
115
116        for (unsigned i = 0; i < fb->rt_count; i++) {
117		if (!fb->rts[i].view || fb->rts[0].discard ||
118                    fb->rts[i].view->image->layout.crc_mode == PAN_IMAGE_CRC_NONE)
119                        continue;
120
121                bool valid = *(fb->rts[i].crc_valid);
122                bool full = !fb->extent.minx && !fb->extent.miny &&
123                            fb->extent.maxx == (fb->width - 1) &&
124                            fb->extent.maxy == (fb->height - 1);
125                if (!full && !valid)
126                        continue;
127
128                if (best_rt < 0 || (valid && !best_rt_valid)) {
129                        best_rt = i;
130                        best_rt_valid = valid;
131                }
132
133                if (valid)
134                        break;
135        }
136
137        return best_rt;
138#endif
139}
140
141static enum mali_zs_format
142translate_zs_format(enum pipe_format in)
143{
144        switch (in) {
145        case PIPE_FORMAT_Z16_UNORM: return MALI_ZS_FORMAT_D16;
146        case PIPE_FORMAT_Z24_UNORM_S8_UINT: return MALI_ZS_FORMAT_D24S8;
147        case PIPE_FORMAT_Z24X8_UNORM: return MALI_ZS_FORMAT_D24X8;
148        case PIPE_FORMAT_Z32_FLOAT: return MALI_ZS_FORMAT_D32;
149#if PAN_ARCH <= 7
150        case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: return MALI_ZS_FORMAT_D32_S8X24;
151#endif
152        default: unreachable("Unsupported depth/stencil format.");
153        }
154}
155
156#if PAN_ARCH >= 5
157static enum mali_s_format
158translate_s_format(enum pipe_format in)
159{
160        switch (in) {
161        case PIPE_FORMAT_S8_UINT: return MALI_S_FORMAT_S8;
162        case PIPE_FORMAT_Z24_UNORM_S8_UINT:
163        case PIPE_FORMAT_X24S8_UINT:
164                return MALI_S_FORMAT_X24S8;
165
166#if PAN_ARCH <= 7
167        case PIPE_FORMAT_S8_UINT_Z24_UNORM:
168        case PIPE_FORMAT_S8X24_UINT:
169                return MALI_S_FORMAT_S8X24;
170        case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
171                return MALI_S_FORMAT_X32_S8X24;
172#endif
173
174        default:
175                unreachable("Unsupported stencil format.");
176        }
177}
178
179static void
180pan_prepare_s(const struct pan_fb_info *fb,
181              struct MALI_ZS_CRC_EXTENSION *ext)
182{
183        const struct pan_image_view *s = fb->zs.view.s;
184
185        if (!s)
186                return;
187
188        unsigned level = s->first_level;
189
190        ext->s_msaa = mali_sampling_mode(s);
191
192        struct pan_surface surf;
193        pan_iview_get_surface(s, 0, 0, 0, &surf);
194
195        assert(s->image->layout.modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED ||
196               s->image->layout.modifier == DRM_FORMAT_MOD_LINEAR);
197        ext->s_writeback_base = surf.data;
198        ext->s_writeback_row_stride = s->image->layout.slices[level].row_stride;
199        ext->s_writeback_surface_stride =
200                (s->image->layout.nr_samples > 1) ?
201                s->image->layout.slices[level].surface_stride : 0;
202        ext->s_block_format = mod_to_block_fmt(s->image->layout.modifier);
203        ext->s_write_format = translate_s_format(s->format);
204}
205
206static void
207pan_prepare_zs(const struct pan_fb_info *fb,
208               struct MALI_ZS_CRC_EXTENSION *ext)
209{
210        const struct pan_image_view *zs = fb->zs.view.zs;
211
212        if (!zs)
213                return;
214
215        unsigned level = zs->first_level;
216
217        ext->zs_msaa = mali_sampling_mode(zs);
218
219        struct pan_surface surf;
220        pan_iview_get_surface(zs, 0, 0, 0, &surf);
221        UNUSED const struct pan_image_slice_layout *slice = &zs->image->layout.slices[level];
222
223        if (drm_is_afbc(zs->image->layout.modifier)) {
224#if PAN_ARCH >= 9
225                ext->zs_writeback_base = surf.afbc.header;
226                ext->zs_writeback_row_stride = slice->row_stride;
227                /* TODO: surface stride? */
228                ext->zs_afbc_body_offset = surf.afbc.body - surf.afbc.header;
229
230                /* TODO: stencil AFBC? */
231#else
232#if PAN_ARCH >= 6
233                ext->zs_afbc_row_stride = pan_afbc_stride_blocks(zs->image->layout.modifier, slice->row_stride);
234#else
235                ext->zs_block_format = MALI_BLOCK_FORMAT_AFBC;
236                ext->zs_afbc_body_size = 0x1000;
237                ext->zs_afbc_chunk_size = 9;
238                ext->zs_afbc_sparse = true;
239#endif
240
241                ext->zs_afbc_header = surf.afbc.header;
242                ext->zs_afbc_body = surf.afbc.body;
243#endif
244        } else {
245                assert(zs->image->layout.modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED ||
246                       zs->image->layout.modifier == DRM_FORMAT_MOD_LINEAR);
247
248                /* TODO: Z32F(S8) support, which is always linear */
249
250                ext->zs_writeback_base = surf.data;
251                ext->zs_writeback_row_stride =
252                        zs->image->layout.slices[level].row_stride;
253                ext->zs_writeback_surface_stride =
254                        (zs->image->layout.nr_samples > 1) ?
255                        zs->image->layout.slices[level].surface_stride : 0;
256        }
257
258        ext->zs_block_format = mod_to_block_fmt(zs->image->layout.modifier);
259        ext->zs_write_format = translate_zs_format(zs->format);
260        if (ext->zs_write_format == MALI_ZS_FORMAT_D24S8)
261                ext->s_writeback_base = ext->zs_writeback_base;
262}
263
264static void
265pan_prepare_crc(const struct pan_fb_info *fb, int rt_crc,
266                struct MALI_ZS_CRC_EXTENSION *ext)
267{
268        if (rt_crc < 0)
269                return;
270
271        assert(rt_crc < fb->rt_count);
272
273        const struct pan_image_view *rt = fb->rts[rt_crc].view;
274        const struct pan_image_slice_layout *slice = &rt->image->layout.slices[rt->first_level];
275        ext->crc_base = (rt->image->layout.crc_mode == PAN_IMAGE_CRC_INBAND ?
276                         (rt->image->data.bo->ptr.gpu + rt->image->data.offset) :
277                         (rt->image->crc.bo->ptr.gpu + rt->image->crc.offset)) +
278                        slice->crc.offset;
279        ext->crc_row_stride = slice->crc.stride;
280
281#if PAN_ARCH >= 7
282        ext->crc_render_target = rt_crc;
283
284        if (fb->rts[rt_crc].clear) {
285                uint32_t clear_val = fb->rts[rt_crc].clear_value[0];
286                ext->crc_clear_color = clear_val | 0xc000000000000000 |
287                                       (((uint64_t)clear_val & 0xffff) << 32);
288        }
289#endif
290}
291
292static void
293pan_emit_zs_crc_ext(const struct pan_fb_info *fb, int rt_crc,
294                    void *zs_crc_ext)
295{
296        pan_pack(zs_crc_ext, ZS_CRC_EXTENSION, cfg) {
297                pan_prepare_crc(fb, rt_crc, &cfg);
298                cfg.zs_clean_pixel_write_enable = fb->zs.clear.z || fb->zs.clear.s;
299                pan_prepare_zs(fb, &cfg);
300                pan_prepare_s(fb, &cfg);
301        }
302}
303
304/* Measure format as it appears in the tile buffer */
305
306static unsigned
307pan_bytes_per_pixel_tib(enum pipe_format format)
308{
309        if (panfrost_blendable_formats_v7[format].internal) {
310                /* Blendable formats are always 32-bits in the tile buffer,
311                 * extra bits are used as padding or to dither */
312                return 4;
313        } else {
314                /* Non-blendable formats are raw, rounded up to the nearest
315                 * power-of-two size */
316                unsigned bytes = util_format_get_blocksize(format);
317                return util_next_power_of_two(bytes);
318        }
319}
320
321static unsigned
322pan_cbuf_bytes_per_pixel(const struct pan_fb_info *fb)
323{
324        unsigned sum = 0;
325
326        for (int cb = 0; cb < fb->rt_count; ++cb) {
327                const struct pan_image_view *rt = fb->rts[cb].view;
328
329                if (!rt)
330                        continue;
331
332                sum += pan_bytes_per_pixel_tib(rt->format) * rt->nr_samples;
333        }
334
335        return sum;
336}
337
338/*
339 * Select the largest tile size that fits within the tilebuffer budget.
340 * Formally, maximize (pixels per tile) such that it is a power of two and
341 *
342 *      (bytes per pixel) (pixels per tile) <= (max bytes per tile)
343 *
344 * A bit of algebra gives the following formula.
345 */
346static unsigned
347pan_select_max_tile_size(unsigned tile_buffer_bytes, unsigned bytes_per_pixel)
348{
349        assert(util_is_power_of_two_nonzero(tile_buffer_bytes));
350        assert(tile_buffer_bytes >= 1024);
351
352        return tile_buffer_bytes >> util_logbase2_ceil(bytes_per_pixel);
353}
354
355static enum mali_color_format
356pan_mfbd_raw_format(unsigned bits)
357{
358        switch (bits) {
359        case    8: return MALI_COLOR_FORMAT_RAW8;
360        case   16: return MALI_COLOR_FORMAT_RAW16;
361        case   24: return MALI_COLOR_FORMAT_RAW24;
362        case   32: return MALI_COLOR_FORMAT_RAW32;
363        case   48: return MALI_COLOR_FORMAT_RAW48;
364        case   64: return MALI_COLOR_FORMAT_RAW64;
365        case   96: return MALI_COLOR_FORMAT_RAW96;
366        case  128: return MALI_COLOR_FORMAT_RAW128;
367        case  192: return MALI_COLOR_FORMAT_RAW192;
368        case  256: return MALI_COLOR_FORMAT_RAW256;
369        case  384: return MALI_COLOR_FORMAT_RAW384;
370        case  512: return MALI_COLOR_FORMAT_RAW512;
371        case  768: return MALI_COLOR_FORMAT_RAW768;
372        case 1024: return MALI_COLOR_FORMAT_RAW1024;
373        case 1536: return MALI_COLOR_FORMAT_RAW1536;
374        case 2048: return MALI_COLOR_FORMAT_RAW2048;
375        default: unreachable("invalid raw bpp");
376        }
377}
378
379static void
380pan_rt_init_format(const struct pan_image_view *rt,
381                   struct MALI_RENDER_TARGET *cfg)
382{
383        /* Explode details on the format */
384
385        const struct util_format_description *desc =
386                util_format_description(rt->format);
387
388        /* The swizzle for rendering is inverted from texturing */
389
390        unsigned char swizzle[4] = {
391                PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W,
392        };
393
394        /* Fill in accordingly, defaulting to 8-bit UNORM */
395
396        if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
397                cfg->srgb = true;
398
399        struct pan_blendable_format fmt = panfrost_blendable_formats_v7[rt->format];
400
401        if (fmt.internal) {
402                cfg->internal_format = fmt.internal;
403                cfg->writeback_format = fmt.writeback;
404                panfrost_invert_swizzle(desc->swizzle, swizzle);
405        } else {
406                /* Construct RAW internal/writeback, where internal is
407                 * specified logarithmically (round to next power-of-two).
408                 * Offset specified from RAW8, where 8 = 2^3 */
409
410                unsigned bits = desc->block.bits;
411                unsigned offset = util_logbase2_ceil(bits) - 3;
412                assert(offset <= 4);
413
414                cfg->internal_format =
415                        MALI_COLOR_BUFFER_INTERNAL_FORMAT_RAW8 + offset;
416
417                cfg->writeback_format = pan_mfbd_raw_format(bits);
418        }
419
420        cfg->swizzle = panfrost_translate_swizzle_4(swizzle);
421}
422
423#if PAN_ARCH >= 9
424enum mali_afbc_compression_mode
425pan_afbc_compression_mode(enum pipe_format format)
426{
427        /* There's a special case for texturing the stencil part from a combined
428         * depth/stencil texture, handle it separately.
429         */
430        if (format == PIPE_FORMAT_X24S8_UINT)
431                return MALI_AFBC_COMPRESSION_MODE_X24S8;
432
433        /* Otherwise, map canonical formats to the hardware enum. This only
434         * needs to handle the subset of formats returned by
435         * panfrost_afbc_format.
436         */
437        switch (panfrost_afbc_format(PAN_ARCH, format)) {
438        case PIPE_FORMAT_R8G8_UNORM: return MALI_AFBC_COMPRESSION_MODE_R8G8;
439        case PIPE_FORMAT_R8G8B8_UNORM: return MALI_AFBC_COMPRESSION_MODE_R8G8B8;
440        case PIPE_FORMAT_R8G8B8A8_UNORM: return MALI_AFBC_COMPRESSION_MODE_R8G8B8A8;
441        case PIPE_FORMAT_R5G6B5_UNORM: return MALI_AFBC_COMPRESSION_MODE_R5G6B5;
442        case PIPE_FORMAT_S8_UINT: return MALI_AFBC_COMPRESSION_MODE_S8;
443        case PIPE_FORMAT_NONE: unreachable("invalid format for AFBC");
444        default: unreachable("unknown canonical AFBC format");
445        }
446}
447#endif
448
449static void
450pan_prepare_rt(const struct pan_fb_info *fb, unsigned idx,
451               unsigned cbuf_offset,
452               struct MALI_RENDER_TARGET *cfg)
453{
454        cfg->clean_pixel_write_enable = fb->rts[idx].clear;
455        cfg->internal_buffer_offset = cbuf_offset;
456        if (fb->rts[idx].clear) {
457                cfg->clear.color_0 = fb->rts[idx].clear_value[0];
458                cfg->clear.color_1 = fb->rts[idx].clear_value[1];
459                cfg->clear.color_2 = fb->rts[idx].clear_value[2];
460                cfg->clear.color_3 = fb->rts[idx].clear_value[3];
461        }
462
463        const struct pan_image_view *rt = fb->rts[idx].view;
464        if (!rt || fb->rts[idx].discard) {
465                cfg->internal_format = MALI_COLOR_BUFFER_INTERNAL_FORMAT_R8G8B8A8;
466                cfg->internal_buffer_offset = cbuf_offset;
467#if PAN_ARCH >= 7
468                cfg->writeback_block_format = MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED;
469                cfg->dithering_enable = true;
470#endif
471                return;
472        }
473
474        cfg->write_enable = true;
475        cfg->dithering_enable = true;
476
477        unsigned level = rt->first_level;
478        assert(rt->last_level == rt->first_level);
479        assert(rt->last_layer == rt->first_layer);
480
481        int row_stride = rt->image->layout.slices[level].row_stride;
482
483        /* Only set layer_stride for layered MSAA rendering  */
484
485        unsigned layer_stride =
486                (rt->image->layout.nr_samples > 1) ?
487                        rt->image->layout.slices[level].surface_stride : 0;
488
489        cfg->writeback_msaa = mali_sampling_mode(rt);
490
491        pan_rt_init_format(rt, cfg);
492
493        cfg->writeback_block_format = mod_to_block_fmt(rt->image->layout.modifier);
494
495        struct pan_surface surf;
496        pan_iview_get_surface(rt, 0, 0, 0, &surf);
497
498        if (drm_is_afbc(rt->image->layout.modifier)) {
499#if PAN_ARCH >= 9
500                if (rt->image->layout.modifier & AFBC_FORMAT_MOD_YTR)
501                        cfg->afbc.yuv_transform = true;
502
503                cfg->afbc.wide_block = panfrost_afbc_is_wide(rt->image->layout.modifier);
504                cfg->afbc.header = surf.afbc.header;
505                cfg->afbc.body_offset = surf.afbc.body - surf.afbc.header;
506                assert(surf.afbc.body >= surf.afbc.header);
507
508                cfg->afbc.compression_mode = pan_afbc_compression_mode(rt->format);
509                cfg->afbc.row_stride = row_stride;
510#else
511                const struct pan_image_slice_layout *slice = &rt->image->layout.slices[level];
512
513#if PAN_ARCH >= 6
514                cfg->afbc.row_stride = pan_afbc_stride_blocks(rt->image->layout.modifier, slice->row_stride);
515                cfg->afbc.afbc_wide_block_enable =
516                        panfrost_afbc_is_wide(rt->image->layout.modifier);
517#else
518                cfg->afbc.chunk_size = 9;
519                cfg->afbc.sparse = true;
520                cfg->afbc.body_size = slice->afbc.body_size;
521#endif
522
523                cfg->afbc.header = surf.afbc.header;
524                cfg->afbc.body = surf.afbc.body;
525
526                if (rt->image->layout.modifier & AFBC_FORMAT_MOD_YTR)
527                        cfg->afbc.yuv_transform_enable = true;
528#endif
529        } else {
530                assert(rt->image->layout.modifier == DRM_FORMAT_MOD_LINEAR ||
531                       rt->image->layout.modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED);
532                cfg->rgb.base = surf.data;
533                cfg->rgb.row_stride = row_stride;
534                cfg->rgb.surface_stride = layer_stride;
535        }
536}
537#endif
538
539void
540GENX(pan_emit_tls)(const struct pan_tls_info *info,
541                   void *out)
542{
543        pan_pack(out, LOCAL_STORAGE, cfg) {
544                if (info->tls.size) {
545                        unsigned shift =
546                                panfrost_get_stack_shift(info->tls.size);
547
548                        cfg.tls_size = shift;
549#if PAN_ARCH >= 9
550                        /* For now, always use packed TLS addressing. This is
551                         * better for the cache and requires no fix up code in
552                         * the shader. We may need to revisit this someday for
553                         * OpenCL generic pointer support.
554                         */
555                        cfg.tls_address_mode = MALI_ADDRESS_MODE_PACKED;
556
557                        assert((info->tls.ptr & 4095) == 0);
558                        cfg.tls_base_pointer = info->tls.ptr >> 8;
559#else
560                        cfg.tls_base_pointer = info->tls.ptr;
561#endif
562                }
563
564                if (info->wls.size) {
565                        assert(!(info->wls.ptr & 4095));
566                        assert((info->wls.ptr & 0xffffffff00000000ULL) == ((info->wls.ptr + info->wls.size - 1) & 0xffffffff00000000ULL));
567                        cfg.wls_base_pointer = info->wls.ptr;
568                        unsigned wls_size = pan_wls_adjust_size(info->wls.size);
569                        cfg.wls_instances = pan_wls_instances(&info->wls.dim);
570                        cfg.wls_size_scale = util_logbase2(wls_size) + 1;
571                } else {
572                        cfg.wls_instances = MALI_LOCAL_STORAGE_NO_WORKGROUP_MEM;
573                }
574        }
575}
576
577#if PAN_ARCH <= 5
578static void
579pan_emit_midgard_tiler(const struct panfrost_device *dev,
580                       const struct pan_fb_info *fb,
581                       const struct pan_tiler_context *tiler_ctx,
582                       void *out)
583{
584        bool hierarchy = !dev->model->quirks.no_hierarchical_tiling;
585
586        assert(tiler_ctx->midgard.polygon_list->ptr.gpu);
587
588        pan_pack(out, TILER_CONTEXT, cfg) {
589                unsigned header_size;
590
591                if (tiler_ctx->midgard.disable) {
592                        cfg.hierarchy_mask =
593                                hierarchy ?
594                                MALI_MIDGARD_TILER_DISABLED :
595                                MALI_MIDGARD_TILER_USER;
596                        header_size = MALI_MIDGARD_TILER_MINIMUM_HEADER_SIZE;
597                        cfg.polygon_list_size = header_size + (hierarchy ? 0 : 4);
598                        cfg.heap_start = tiler_ctx->midgard.polygon_list->ptr.gpu;
599                        cfg.heap_end = tiler_ctx->midgard.polygon_list->ptr.gpu;
600		} else {
601                        cfg.hierarchy_mask =
602                                panfrost_choose_hierarchy_mask(fb->width,
603                                                               fb->height,
604                                                               1, hierarchy);
605                        header_size = panfrost_tiler_header_size(fb->width,
606                                                                 fb->height,
607                                                                 cfg.hierarchy_mask,
608                                                                 hierarchy);
609                        cfg.polygon_list_size =
610                                panfrost_tiler_full_size(fb->width, fb->height,
611                                                         cfg.hierarchy_mask,
612                                                         hierarchy);
613                        cfg.heap_start = dev->tiler_heap->ptr.gpu;
614                        cfg.heap_end = dev->tiler_heap->ptr.gpu + dev->tiler_heap->size;
615                }
616
617                cfg.polygon_list = tiler_ctx->midgard.polygon_list->ptr.gpu;
618                cfg.polygon_list_body = cfg.polygon_list + header_size;
619        }
620}
621#endif
622
623#if PAN_ARCH >= 5
624static void
625pan_emit_rt(const struct pan_fb_info *fb,
626            unsigned idx, unsigned cbuf_offset, void *out)
627{
628        pan_pack(out, RENDER_TARGET, cfg) {
629                pan_prepare_rt(fb, idx, cbuf_offset, &cfg);
630        }
631}
632
633#if PAN_ARCH >= 6
634/* All Bifrost and Valhall GPUs are affected by issue TSIX-2033:
635 *
636 *      Forcing clean_tile_writes breaks INTERSECT readbacks
637 *
638 * To workaround, use the frame shader mode ALWAYS instead of INTERSECT if
639 * clean tile writes is forced. Since INTERSECT is a hint that the hardware may
640 * ignore, this cannot affect correctness, only performance */
641
642static enum mali_pre_post_frame_shader_mode
643pan_fix_frame_shader_mode(enum mali_pre_post_frame_shader_mode mode, bool force_clean_tile)
644{
645        if (force_clean_tile && mode == MALI_PRE_POST_FRAME_SHADER_MODE_INTERSECT)
646                return MALI_PRE_POST_FRAME_SHADER_MODE_ALWAYS;
647        else
648                return mode;
649}
650
651/* Regardless of clean_tile_write_enable, the hardware writes clean tiles if
652 * the effective tile size differs from the superblock size of any enabled AFBC
653 * render target. Check this condition. */
654
655static bool
656pan_force_clean_write_rt(const struct pan_image_view *rt, unsigned tile_size)
657{
658        if (!drm_is_afbc(rt->image->layout.modifier))
659                return false;
660
661        unsigned superblock = panfrost_afbc_superblock_width(rt->image->layout.modifier);
662
663        assert(superblock >= 16);
664        assert(tile_size <= 16*16);
665
666        /* Tile size and superblock differ unless they are both 16x16 */
667        return !(superblock == 16 && tile_size == 16*16);
668}
669
670static bool
671pan_force_clean_write(const struct pan_fb_info *fb, unsigned tile_size)
672{
673        /* Maximum tile size */
674        assert(tile_size <= 16*16);
675
676        for (unsigned i = 0; i < fb->rt_count; ++i) {
677                if (fb->rts[i].view && !fb->rts[i].discard &&
678                    pan_force_clean_write_rt(fb->rts[i].view, tile_size))
679                        return true;
680        }
681
682        if (fb->zs.view.zs && !fb->zs.discard.z &&
683            pan_force_clean_write_rt(fb->zs.view.zs, tile_size))
684                return true;
685
686        if (fb->zs.view.s && !fb->zs.discard.s &&
687            pan_force_clean_write_rt(fb->zs.view.s, tile_size))
688                return true;
689
690        return false;
691}
692
693#endif
694
695unsigned
696GENX(pan_emit_fbd)(const struct panfrost_device *dev,
697                   const struct pan_fb_info *fb,
698                   const struct pan_tls_info *tls,
699                   const struct pan_tiler_context *tiler_ctx,
700                   void *out)
701{
702        unsigned tags = MALI_FBD_TAG_IS_MFBD;
703        void *fbd = out;
704        void *rtd = out + pan_size(FRAMEBUFFER);
705
706#if PAN_ARCH <= 5
707        GENX(pan_emit_tls)(tls,
708                           pan_section_ptr(fbd, FRAMEBUFFER, LOCAL_STORAGE));
709#endif
710
711        unsigned bytes_per_pixel = pan_cbuf_bytes_per_pixel(fb);
712        unsigned tile_size = pan_select_max_tile_size(dev->optimal_tib_size,
713                                                      bytes_per_pixel);
714
715        /* Clamp tile size to hardware limits */
716        tile_size = MIN2(tile_size, 16 * 16);
717        assert(tile_size >= 4 * 4);
718
719        /* Colour buffer allocations must be 1K aligned. */
720        unsigned cbuf_allocation = ALIGN_POT(bytes_per_pixel * tile_size, 1024);
721        assert(cbuf_allocation <= dev->optimal_tib_size && "tile too big");
722
723        int crc_rt = GENX(pan_select_crc_rt)(fb, tile_size);
724        bool has_zs_crc_ext = (fb->zs.view.zs || fb->zs.view.s || crc_rt >= 0);
725
726        pan_section_pack(fbd, FRAMEBUFFER, PARAMETERS, cfg) {
727#if PAN_ARCH >= 6
728                bool force_clean_write = pan_force_clean_write(fb, tile_size);
729
730                cfg.sample_locations =
731                        panfrost_sample_positions(dev, pan_sample_pattern(fb->nr_samples));
732                cfg.pre_frame_0 = pan_fix_frame_shader_mode(fb->bifrost.pre_post.modes[0], force_clean_write);
733                cfg.pre_frame_1 = pan_fix_frame_shader_mode(fb->bifrost.pre_post.modes[1], force_clean_write);
734                cfg.post_frame  = pan_fix_frame_shader_mode(fb->bifrost.pre_post.modes[2], force_clean_write);
735                cfg.frame_shader_dcds = fb->bifrost.pre_post.dcds.gpu;
736                cfg.tiler = tiler_ctx->bifrost;
737#endif
738                cfg.width = fb->width;
739                cfg.height = fb->height;
740                cfg.bound_max_x = fb->width - 1;
741                cfg.bound_max_y = fb->height - 1;
742
743                cfg.effective_tile_size = tile_size;
744                cfg.tie_break_rule = MALI_TIE_BREAK_RULE_MINUS_180_IN_0_OUT;
745                cfg.render_target_count = MAX2(fb->rt_count, 1);
746
747                /* Default to 24 bit depth if there's no surface. */
748                cfg.z_internal_format =
749                        fb->zs.view.zs ?
750                        panfrost_get_z_internal_format(fb->zs.view.zs->format) :
751                        MALI_Z_INTERNAL_FORMAT_D24;
752
753                cfg.z_clear = fb->zs.clear_value.depth;
754                cfg.s_clear = fb->zs.clear_value.stencil;
755                cfg.color_buffer_allocation = cbuf_allocation;
756                cfg.sample_count = fb->nr_samples;
757                cfg.sample_pattern = pan_sample_pattern(fb->nr_samples);
758                cfg.z_write_enable = (fb->zs.view.zs && !fb->zs.discard.z);
759                cfg.s_write_enable = (fb->zs.view.s && !fb->zs.discard.s);
760                cfg.has_zs_crc_extension = has_zs_crc_ext;
761
762                if (crc_rt >= 0) {
763                        bool *valid = fb->rts[crc_rt].crc_valid;
764                        bool full = !fb->extent.minx && !fb->extent.miny &&
765                                    fb->extent.maxx == (fb->width - 1) &&
766                                    fb->extent.maxy == (fb->height - 1);
767
768                        cfg.crc_read_enable = *valid;
769
770                        /* If the data is currently invalid, still write CRC
771                         * data if we are doing a full write, so that it is
772                         * valid for next time. */
773                        cfg.crc_write_enable = *valid || full;
774
775                        *valid |= full;
776                }
777
778#if PAN_ARCH >= 9
779                cfg.point_sprite_coord_origin_max_y = fb->sprite_coord_origin;
780                cfg.first_provoking_vertex = fb->first_provoking_vertex;
781#endif
782        }
783
784#if PAN_ARCH >= 6
785        pan_section_pack(fbd, FRAMEBUFFER, PADDING, padding);
786#else
787        pan_emit_midgard_tiler(dev, fb, tiler_ctx,
788                               pan_section_ptr(fbd, FRAMEBUFFER, TILER));
789
790        /* All weights set to 0, nothing to do here */
791        pan_section_pack(fbd, FRAMEBUFFER, TILER_WEIGHTS, w);
792#endif
793
794        if (has_zs_crc_ext) {
795                pan_emit_zs_crc_ext(fb, crc_rt,
796                                    out + pan_size(FRAMEBUFFER));
797                rtd += pan_size(ZS_CRC_EXTENSION);
798                tags |= MALI_FBD_TAG_HAS_ZS_RT;
799        }
800
801        unsigned rt_count = MAX2(fb->rt_count, 1);
802        unsigned cbuf_offset = 0;
803        for (unsigned i = 0; i < rt_count; i++) {
804                pan_emit_rt(fb, i, cbuf_offset, rtd);
805                rtd += pan_size(RENDER_TARGET);
806                if (!fb->rts[i].view)
807                        continue;
808
809                cbuf_offset += pan_bytes_per_pixel_tib(fb->rts[i].view->format) *
810                               tile_size * fb->rts[i].view->image->layout.nr_samples;
811
812                if (i != crc_rt)
813                        *(fb->rts[i].crc_valid) = false;
814        }
815        tags |= MALI_POSITIVE(MAX2(fb->rt_count, 1)) << 2;
816
817        return tags;
818}
819#else /* PAN_ARCH == 4 */
820unsigned
821GENX(pan_emit_fbd)(const struct panfrost_device *dev,
822                   const struct pan_fb_info *fb,
823                   const struct pan_tls_info *tls,
824                   const struct pan_tiler_context *tiler_ctx,
825                   void *fbd)
826{
827        assert(fb->rt_count <= 1);
828
829        GENX(pan_emit_tls)(tls,
830                           pan_section_ptr(fbd, FRAMEBUFFER,
831                                           LOCAL_STORAGE));
832        pan_section_pack(fbd, FRAMEBUFFER, PARAMETERS, cfg) {
833                cfg.bound_max_x = fb->width - 1;
834                cfg.bound_max_y = fb->height - 1;
835                cfg.dithering_enable = true;
836                cfg.clean_pixel_write_enable = true;
837                cfg.tie_break_rule = MALI_TIE_BREAK_RULE_MINUS_180_IN_0_OUT;
838                if (fb->rts[0].clear) {
839                        cfg.clear_color_0 = fb->rts[0].clear_value[0];
840                        cfg.clear_color_1 = fb->rts[0].clear_value[1];
841                        cfg.clear_color_2 = fb->rts[0].clear_value[2];
842                        cfg.clear_color_3 = fb->rts[0].clear_value[3];
843                }
844
845                if (fb->zs.clear.z)
846                        cfg.z_clear = fb->zs.clear_value.depth;
847
848                if (fb->zs.clear.s)
849                        cfg.s_clear = fb->zs.clear_value.stencil;
850
851                if (fb->rt_count && fb->rts[0].view) {
852                        const struct pan_image_view *rt = fb->rts[0].view;
853
854                        const struct util_format_description *desc =
855                                util_format_description(rt->format);
856
857                        /* The swizzle for rendering is inverted from texturing */
858                        unsigned char swizzle[4];
859                        panfrost_invert_swizzle(desc->swizzle, swizzle);
860                        cfg.swizzle = panfrost_translate_swizzle_4(swizzle);
861
862                        struct pan_blendable_format fmt = panfrost_blendable_formats_v7[rt->format];
863                        if (fmt.internal) {
864                                cfg.internal_format = fmt.internal;
865                                cfg.color_writeback_format = fmt.writeback;
866                        } else {
867                                unreachable("raw formats not finished for SFBD");
868                        }
869
870                        unsigned level = rt->first_level;
871                        struct pan_surface surf;
872
873                        pan_iview_get_surface(rt, 0, 0, 0, &surf);
874
875                        cfg.color_write_enable = !fb->rts[0].discard;
876                        cfg.color_writeback.base = surf.data;
877                        cfg.color_writeback.row_stride =
878	                        rt->image->layout.slices[level].row_stride;
879
880                        cfg.color_block_format = mod_to_block_fmt(rt->image->layout.modifier);
881                        assert(cfg.color_block_format == MALI_BLOCK_FORMAT_LINEAR ||
882                               cfg.color_block_format == MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED);
883
884                        if (rt->image->layout.crc_mode != PAN_IMAGE_CRC_NONE) {
885                                const struct pan_image_slice_layout *slice =
886                                        &rt->image->layout.slices[level];
887
888                                cfg.crc_buffer.row_stride = slice->crc.stride;
889                                if (rt->image->layout.crc_mode == PAN_IMAGE_CRC_INBAND) {
890                                        cfg.crc_buffer.base = rt->image->data.bo->ptr.gpu +
891                                                              rt->image->data.offset +
892                                                              slice->crc.offset;
893                                } else {
894                                        cfg.crc_buffer.base = rt->image->crc.bo->ptr.gpu +
895                                                              rt->image->crc.offset +
896                                                              slice->crc.offset;
897                                }
898                        }
899                }
900
901                if (fb->zs.view.zs) {
902                        const struct pan_image_view *zs = fb->zs.view.zs;
903                        unsigned level = zs->first_level;
904                        struct pan_surface surf;
905
906                        pan_iview_get_surface(zs, 0, 0, 0, &surf);
907
908                        cfg.zs_write_enable = !fb->zs.discard.z;
909                        cfg.zs_writeback.base = surf.data;
910                        cfg.zs_writeback.row_stride =
911                                zs->image->layout.slices[level].row_stride;
912                        cfg.zs_block_format = mod_to_block_fmt(zs->image->layout.modifier);
913                        assert(cfg.zs_block_format == MALI_BLOCK_FORMAT_LINEAR ||
914                               cfg.zs_block_format == MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED);
915
916                        cfg.zs_format = translate_zs_format(zs->format);
917                }
918
919                cfg.sample_count = fb->nr_samples;
920
921                if (fb->rt_count)
922                        cfg.msaa = mali_sampling_mode(fb->rts[0].view);
923        }
924
925        pan_emit_midgard_tiler(dev, fb, tiler_ctx,
926                               pan_section_ptr(fbd, FRAMEBUFFER, TILER));
927
928        /* All weights set to 0, nothing to do here */
929        pan_section_pack(fbd, FRAMEBUFFER, TILER_WEIGHTS, w);
930
931        pan_section_pack(fbd, FRAMEBUFFER, PADDING_1, padding);
932        pan_section_pack(fbd, FRAMEBUFFER, PADDING_2, padding);
933        return 0;
934}
935#endif
936
937#if PAN_ARCH >= 6
938void
939GENX(pan_emit_tiler_heap)(const struct panfrost_device *dev,
940                          void *out)
941{
942        pan_pack(out, TILER_HEAP, heap) {
943                heap.size = dev->tiler_heap->size;
944                heap.base = dev->tiler_heap->ptr.gpu;
945                heap.bottom = dev->tiler_heap->ptr.gpu;
946                heap.top = dev->tiler_heap->ptr.gpu + dev->tiler_heap->size;
947        }
948}
949
950void
951GENX(pan_emit_tiler_ctx)(const struct panfrost_device *dev,
952                         unsigned fb_width, unsigned fb_height,
953                         unsigned nr_samples,
954                         bool first_provoking_vertex,
955                         mali_ptr heap,
956                         void *out)
957{
958        unsigned max_levels = dev->tiler_features.max_levels;
959        assert(max_levels >= 2);
960
961        pan_pack(out, TILER_CONTEXT, tiler) {
962                /* TODO: Select hierarchy mask more effectively */
963                tiler.hierarchy_mask = (max_levels >= 8) ? 0xFF : 0x28;
964
965                /* For large framebuffers, disable the smallest bin size to
966                 * avoid pathological tiler memory usage. Required to avoid OOM
967                 * on dEQP-GLES31.functional.fbo.no_attachments.maximums.all on
968                 * Mali-G57.
969                 */
970                if (MAX2(fb_width, fb_height) >= 4096)
971                        tiler.hierarchy_mask &= ~1;
972
973                tiler.fb_width = fb_width;
974                tiler.fb_height = fb_height;
975                tiler.heap = heap;
976                tiler.sample_pattern = pan_sample_pattern(nr_samples);
977#if PAN_ARCH >= 9
978                tiler.first_provoking_vertex = first_provoking_vertex;
979#endif
980        }
981}
982#endif
983
984void
985GENX(pan_emit_fragment_job)(const struct pan_fb_info *fb,
986                            mali_ptr fbd,
987                            void *out)
988{
989        pan_section_pack(out, FRAGMENT_JOB, HEADER, header) {
990                header.type = MALI_JOB_TYPE_FRAGMENT;
991                header.index = 1;
992        }
993
994        pan_section_pack(out, FRAGMENT_JOB, PAYLOAD, payload) {
995                payload.bound_min_x = fb->extent.minx >> MALI_TILE_SHIFT;
996                payload.bound_min_y = fb->extent.miny >> MALI_TILE_SHIFT;
997                payload.bound_max_x = fb->extent.maxx >> MALI_TILE_SHIFT;
998                payload.bound_max_y = fb->extent.maxy >> MALI_TILE_SHIFT;
999                payload.framebuffer = fbd;
1000
1001#if PAN_ARCH >= 5
1002                if (fb->tile_map.base) {
1003                        payload.has_tile_enable_map = true;
1004                        payload.tile_enable_map = fb->tile_map.base;
1005                        payload.tile_enable_map_row_stride = fb->tile_map.stride;
1006                }
1007#endif
1008        }
1009}
1010