1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright (C) 2018 Alyssa Rosenzweig
3bf215546Sopenharmony_ci * Copyright (C) 2020 Collabora Ltd.
4bf215546Sopenharmony_ci * Copyright © 2017 Intel Corporation
5bf215546Sopenharmony_ci *
6bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
7bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
8bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
9bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
11bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
12bf215546Sopenharmony_ci *
13bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
14bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
15bf215546Sopenharmony_ci * Software.
16bf215546Sopenharmony_ci *
17bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23bf215546Sopenharmony_ci * SOFTWARE.
24bf215546Sopenharmony_ci */
25bf215546Sopenharmony_ci
26bf215546Sopenharmony_ci#include "util/macros.h"
27bf215546Sopenharmony_ci#include "util/u_prim.h"
28bf215546Sopenharmony_ci#include "util/u_vbuf.h"
29bf215546Sopenharmony_ci#include "util/u_helpers.h"
30bf215546Sopenharmony_ci#include "util/u_draw.h"
31bf215546Sopenharmony_ci#include "util/u_memory.h"
32bf215546Sopenharmony_ci#include "pipe/p_defines.h"
33bf215546Sopenharmony_ci#include "pipe/p_state.h"
34bf215546Sopenharmony_ci#include "gallium/auxiliary/util/u_blend.h"
35bf215546Sopenharmony_ci
36bf215546Sopenharmony_ci#include "genxml/gen_macros.h"
37bf215546Sopenharmony_ci
38bf215546Sopenharmony_ci#include "pan_pool.h"
39bf215546Sopenharmony_ci#include "pan_bo.h"
40bf215546Sopenharmony_ci#include "pan_blend.h"
41bf215546Sopenharmony_ci#include "pan_context.h"
42bf215546Sopenharmony_ci#include "pan_job.h"
43bf215546Sopenharmony_ci#include "pan_shader.h"
44bf215546Sopenharmony_ci#include "pan_texture.h"
45bf215546Sopenharmony_ci#include "pan_util.h"
46bf215546Sopenharmony_ci#include "pan_indirect_draw.h"
47bf215546Sopenharmony_ci#include "pan_indirect_dispatch.h"
48bf215546Sopenharmony_ci#include "pan_blitter.h"
49bf215546Sopenharmony_ci
50bf215546Sopenharmony_ci#define PAN_GPU_INDIRECTS (PAN_ARCH == 7)
51bf215546Sopenharmony_ci
52bf215546Sopenharmony_cistruct panfrost_rasterizer {
53bf215546Sopenharmony_ci        struct pipe_rasterizer_state base;
54bf215546Sopenharmony_ci
55bf215546Sopenharmony_ci#if PAN_ARCH <= 7
56bf215546Sopenharmony_ci        /* Partially packed RSD words */
57bf215546Sopenharmony_ci        struct mali_multisample_misc_packed multisample;
58bf215546Sopenharmony_ci        struct mali_stencil_mask_misc_packed stencil_misc;
59bf215546Sopenharmony_ci#endif
60bf215546Sopenharmony_ci};
61bf215546Sopenharmony_ci
62bf215546Sopenharmony_cistruct panfrost_zsa_state {
63bf215546Sopenharmony_ci        struct pipe_depth_stencil_alpha_state base;
64bf215546Sopenharmony_ci
65bf215546Sopenharmony_ci        /* Is any depth, stencil, or alpha testing enabled? */
66bf215546Sopenharmony_ci        bool enabled;
67bf215546Sopenharmony_ci
68bf215546Sopenharmony_ci        /* Does the depth and stencil tests always pass? This ignores write
69bf215546Sopenharmony_ci         * masks, we are only interested in whether pixels may be killed.
70bf215546Sopenharmony_ci         */
71bf215546Sopenharmony_ci        bool zs_always_passes;
72bf215546Sopenharmony_ci
73bf215546Sopenharmony_ci        /* Are depth or stencil writes possible? */
74bf215546Sopenharmony_ci        bool writes_zs;
75bf215546Sopenharmony_ci
76bf215546Sopenharmony_ci#if PAN_ARCH <= 7
77bf215546Sopenharmony_ci        /* Prepacked words from the RSD */
78bf215546Sopenharmony_ci        struct mali_multisample_misc_packed rsd_depth;
79bf215546Sopenharmony_ci        struct mali_stencil_mask_misc_packed rsd_stencil;
80bf215546Sopenharmony_ci        struct mali_stencil_packed stencil_front, stencil_back;
81bf215546Sopenharmony_ci#else
82bf215546Sopenharmony_ci        /* Depth/stencil descriptor template */
83bf215546Sopenharmony_ci        struct mali_depth_stencil_packed desc;
84bf215546Sopenharmony_ci#endif
85bf215546Sopenharmony_ci};
86bf215546Sopenharmony_ci
87bf215546Sopenharmony_cistruct panfrost_sampler_state {
88bf215546Sopenharmony_ci        struct pipe_sampler_state base;
89bf215546Sopenharmony_ci        struct mali_sampler_packed hw;
90bf215546Sopenharmony_ci};
91bf215546Sopenharmony_ci
92bf215546Sopenharmony_ci/* Misnomer: Sampler view corresponds to textures, not samplers */
93bf215546Sopenharmony_ci
94bf215546Sopenharmony_cistruct panfrost_sampler_view {
95bf215546Sopenharmony_ci        struct pipe_sampler_view base;
96bf215546Sopenharmony_ci        struct panfrost_pool_ref state;
97bf215546Sopenharmony_ci        struct mali_texture_packed bifrost_descriptor;
98bf215546Sopenharmony_ci        mali_ptr texture_bo;
99bf215546Sopenharmony_ci        uint64_t modifier;
100bf215546Sopenharmony_ci
101bf215546Sopenharmony_ci        /* Pool used to allocate the descriptor. If NULL, defaults to the global
102bf215546Sopenharmony_ci         * descriptor pool. Can be set for short lived descriptors, useful for
103bf215546Sopenharmony_ci         * shader images on Valhall.
104bf215546Sopenharmony_ci         */
105bf215546Sopenharmony_ci        struct panfrost_pool *pool;
106bf215546Sopenharmony_ci};
107bf215546Sopenharmony_ci
108bf215546Sopenharmony_cistruct panfrost_vertex_state {
109bf215546Sopenharmony_ci        unsigned num_elements;
110bf215546Sopenharmony_ci        struct pipe_vertex_element pipe[PIPE_MAX_ATTRIBS];
111bf215546Sopenharmony_ci
112bf215546Sopenharmony_ci#if PAN_ARCH >= 9
113bf215546Sopenharmony_ci        /* Packed attribute descriptor. All fields are set at CSO create time
114bf215546Sopenharmony_ci         * except for stride, which must be ORed in at draw time
115bf215546Sopenharmony_ci         */
116bf215546Sopenharmony_ci        struct mali_attribute_packed attributes[PIPE_MAX_ATTRIBS];
117bf215546Sopenharmony_ci#else
118bf215546Sopenharmony_ci        /* buffers corresponds to attribute buffer, element_buffers corresponds
119bf215546Sopenharmony_ci         * to an index in buffers for each vertex element */
120bf215546Sopenharmony_ci        struct pan_vertex_buffer buffers[PIPE_MAX_ATTRIBS];
121bf215546Sopenharmony_ci        unsigned element_buffer[PIPE_MAX_ATTRIBS];
122bf215546Sopenharmony_ci        unsigned nr_bufs;
123bf215546Sopenharmony_ci
124bf215546Sopenharmony_ci        unsigned formats[PIPE_MAX_ATTRIBS];
125bf215546Sopenharmony_ci#endif
126bf215546Sopenharmony_ci};
127bf215546Sopenharmony_ci
128bf215546Sopenharmony_ci/* Statically assert that PIPE_* enums match the hardware enums.
129bf215546Sopenharmony_ci * (As long as they match, we don't need to translate them.)
130bf215546Sopenharmony_ci */
131bf215546Sopenharmony_cistatic_assert((int)PIPE_FUNC_NEVER    == MALI_FUNC_NEVER,     "must match");
132bf215546Sopenharmony_cistatic_assert((int)PIPE_FUNC_LESS     == MALI_FUNC_LESS,      "must match");
133bf215546Sopenharmony_cistatic_assert((int)PIPE_FUNC_EQUAL    == MALI_FUNC_EQUAL,     "must match");
134bf215546Sopenharmony_cistatic_assert((int)PIPE_FUNC_LEQUAL   == MALI_FUNC_LEQUAL,    "must match");
135bf215546Sopenharmony_cistatic_assert((int)PIPE_FUNC_GREATER  == MALI_FUNC_GREATER,   "must match");
136bf215546Sopenharmony_cistatic_assert((int)PIPE_FUNC_NOTEQUAL == MALI_FUNC_NOT_EQUAL, "must match");
137bf215546Sopenharmony_cistatic_assert((int)PIPE_FUNC_GEQUAL   == MALI_FUNC_GEQUAL,    "must match");
138bf215546Sopenharmony_cistatic_assert((int)PIPE_FUNC_ALWAYS   == MALI_FUNC_ALWAYS,    "must match");
139bf215546Sopenharmony_ci
140bf215546Sopenharmony_cistatic inline enum mali_sample_pattern
141bf215546Sopenharmony_cipanfrost_sample_pattern(unsigned samples)
142bf215546Sopenharmony_ci{
143bf215546Sopenharmony_ci        switch (samples) {
144bf215546Sopenharmony_ci        case 1:  return MALI_SAMPLE_PATTERN_SINGLE_SAMPLED;
145bf215546Sopenharmony_ci        case 4:  return MALI_SAMPLE_PATTERN_ROTATED_4X_GRID;
146bf215546Sopenharmony_ci        case 8:  return MALI_SAMPLE_PATTERN_D3D_8X_GRID;
147bf215546Sopenharmony_ci        case 16: return MALI_SAMPLE_PATTERN_D3D_16X_GRID;
148bf215546Sopenharmony_ci        default: unreachable("Unsupported sample count");
149bf215546Sopenharmony_ci        }
150bf215546Sopenharmony_ci}
151bf215546Sopenharmony_ci
152bf215546Sopenharmony_cistatic unsigned
153bf215546Sopenharmony_citranslate_tex_wrap(enum pipe_tex_wrap w, bool using_nearest)
154bf215546Sopenharmony_ci{
155bf215546Sopenharmony_ci        /* CLAMP is only supported on Midgard, where it is broken for nearest
156bf215546Sopenharmony_ci         * filtering. Use CLAMP_TO_EDGE in that case.
157bf215546Sopenharmony_ci         */
158bf215546Sopenharmony_ci
159bf215546Sopenharmony_ci        switch (w) {
160bf215546Sopenharmony_ci        case PIPE_TEX_WRAP_REPEAT: return MALI_WRAP_MODE_REPEAT;
161bf215546Sopenharmony_ci        case PIPE_TEX_WRAP_CLAMP_TO_EDGE: return MALI_WRAP_MODE_CLAMP_TO_EDGE;
162bf215546Sopenharmony_ci        case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return MALI_WRAP_MODE_CLAMP_TO_BORDER;
163bf215546Sopenharmony_ci        case PIPE_TEX_WRAP_MIRROR_REPEAT: return MALI_WRAP_MODE_MIRRORED_REPEAT;
164bf215546Sopenharmony_ci        case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: return MALI_WRAP_MODE_MIRRORED_CLAMP_TO_EDGE;
165bf215546Sopenharmony_ci        case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: return MALI_WRAP_MODE_MIRRORED_CLAMP_TO_BORDER;
166bf215546Sopenharmony_ci
167bf215546Sopenharmony_ci#if PAN_ARCH <= 5
168bf215546Sopenharmony_ci        case PIPE_TEX_WRAP_CLAMP:
169bf215546Sopenharmony_ci                return using_nearest ? MALI_WRAP_MODE_CLAMP_TO_EDGE :
170bf215546Sopenharmony_ci                                       MALI_WRAP_MODE_CLAMP;
171bf215546Sopenharmony_ci        case PIPE_TEX_WRAP_MIRROR_CLAMP:
172bf215546Sopenharmony_ci                return using_nearest ? MALI_WRAP_MODE_MIRRORED_CLAMP_TO_EDGE :
173bf215546Sopenharmony_ci                                       MALI_WRAP_MODE_MIRRORED_CLAMP;
174bf215546Sopenharmony_ci#endif
175bf215546Sopenharmony_ci
176bf215546Sopenharmony_ci        default: unreachable("Invalid wrap");
177bf215546Sopenharmony_ci        }
178bf215546Sopenharmony_ci}
179bf215546Sopenharmony_ci
180bf215546Sopenharmony_ci/* The hardware compares in the wrong order order, so we have to flip before
181bf215546Sopenharmony_ci * encoding. Yes, really. */
182bf215546Sopenharmony_ci
183bf215546Sopenharmony_cistatic enum mali_func
184bf215546Sopenharmony_cipanfrost_sampler_compare_func(const struct pipe_sampler_state *cso)
185bf215546Sopenharmony_ci{
186bf215546Sopenharmony_ci        return !cso->compare_mode ? MALI_FUNC_NEVER :
187bf215546Sopenharmony_ci                panfrost_flip_compare_func((enum mali_func) cso->compare_func);
188bf215546Sopenharmony_ci}
189bf215546Sopenharmony_ci
190bf215546Sopenharmony_cistatic enum mali_mipmap_mode
191bf215546Sopenharmony_cipan_pipe_to_mipmode(enum pipe_tex_mipfilter f)
192bf215546Sopenharmony_ci{
193bf215546Sopenharmony_ci        switch (f) {
194bf215546Sopenharmony_ci        case PIPE_TEX_MIPFILTER_NEAREST: return MALI_MIPMAP_MODE_NEAREST;
195bf215546Sopenharmony_ci        case PIPE_TEX_MIPFILTER_LINEAR: return MALI_MIPMAP_MODE_TRILINEAR;
196bf215546Sopenharmony_ci#if PAN_ARCH >= 6
197bf215546Sopenharmony_ci        case PIPE_TEX_MIPFILTER_NONE: return MALI_MIPMAP_MODE_NONE;
198bf215546Sopenharmony_ci#else
199bf215546Sopenharmony_ci        case PIPE_TEX_MIPFILTER_NONE: return MALI_MIPMAP_MODE_NEAREST;
200bf215546Sopenharmony_ci#endif
201bf215546Sopenharmony_ci        default: unreachable("Invalid");
202bf215546Sopenharmony_ci        }
203bf215546Sopenharmony_ci}
204bf215546Sopenharmony_ci
205bf215546Sopenharmony_ci
206bf215546Sopenharmony_cistatic void *
207bf215546Sopenharmony_cipanfrost_create_sampler_state(
208bf215546Sopenharmony_ci        struct pipe_context *pctx,
209bf215546Sopenharmony_ci        const struct pipe_sampler_state *cso)
210bf215546Sopenharmony_ci{
211bf215546Sopenharmony_ci        struct panfrost_sampler_state *so = CALLOC_STRUCT(panfrost_sampler_state);
212bf215546Sopenharmony_ci        so->base = *cso;
213bf215546Sopenharmony_ci
214bf215546Sopenharmony_ci        bool using_nearest = cso->min_img_filter == PIPE_TEX_MIPFILTER_NEAREST;
215bf215546Sopenharmony_ci
216bf215546Sopenharmony_ci        pan_pack(&so->hw, SAMPLER, cfg) {
217bf215546Sopenharmony_ci                cfg.magnify_nearest = cso->mag_img_filter == PIPE_TEX_FILTER_NEAREST;
218bf215546Sopenharmony_ci                cfg.minify_nearest = cso->min_img_filter == PIPE_TEX_FILTER_NEAREST;
219bf215546Sopenharmony_ci
220bf215546Sopenharmony_ci                cfg.normalized_coordinates = cso->normalized_coords;
221bf215546Sopenharmony_ci                cfg.lod_bias = FIXED_16(cso->lod_bias, true);
222bf215546Sopenharmony_ci                cfg.minimum_lod = FIXED_16(cso->min_lod, false);
223bf215546Sopenharmony_ci                cfg.maximum_lod = FIXED_16(cso->max_lod, false);
224bf215546Sopenharmony_ci
225bf215546Sopenharmony_ci                cfg.wrap_mode_s = translate_tex_wrap(cso->wrap_s, using_nearest);
226bf215546Sopenharmony_ci                cfg.wrap_mode_t = translate_tex_wrap(cso->wrap_t, using_nearest);
227bf215546Sopenharmony_ci                cfg.wrap_mode_r = translate_tex_wrap(cso->wrap_r, using_nearest);
228bf215546Sopenharmony_ci
229bf215546Sopenharmony_ci                cfg.mipmap_mode = pan_pipe_to_mipmode(cso->min_mip_filter);
230bf215546Sopenharmony_ci                cfg.compare_function = panfrost_sampler_compare_func(cso);
231bf215546Sopenharmony_ci                cfg.seamless_cube_map = cso->seamless_cube_map;
232bf215546Sopenharmony_ci
233bf215546Sopenharmony_ci                cfg.border_color_r = cso->border_color.ui[0];
234bf215546Sopenharmony_ci                cfg.border_color_g = cso->border_color.ui[1];
235bf215546Sopenharmony_ci                cfg.border_color_b = cso->border_color.ui[2];
236bf215546Sopenharmony_ci                cfg.border_color_a = cso->border_color.ui[3];
237bf215546Sopenharmony_ci
238bf215546Sopenharmony_ci#if PAN_ARCH >= 6
239bf215546Sopenharmony_ci                if (cso->max_anisotropy > 1) {
240bf215546Sopenharmony_ci                        cfg.maximum_anisotropy = cso->max_anisotropy;
241bf215546Sopenharmony_ci                        cfg.lod_algorithm = MALI_LOD_ALGORITHM_ANISOTROPIC;
242bf215546Sopenharmony_ci                }
243bf215546Sopenharmony_ci#else
244bf215546Sopenharmony_ci                /* Emulate disabled mipmapping by clamping the LOD as tight as
245bf215546Sopenharmony_ci                 * possible (from 0 to epsilon = 1/256) */
246bf215546Sopenharmony_ci                if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_NONE)
247bf215546Sopenharmony_ci                        cfg.maximum_lod = cfg.minimum_lod + 1;
248bf215546Sopenharmony_ci#endif
249bf215546Sopenharmony_ci        }
250bf215546Sopenharmony_ci
251bf215546Sopenharmony_ci        return so;
252bf215546Sopenharmony_ci}
253bf215546Sopenharmony_ci
254bf215546Sopenharmony_cistatic bool
255bf215546Sopenharmony_cipanfrost_fs_required(
256bf215546Sopenharmony_ci                struct panfrost_shader_state *fs,
257bf215546Sopenharmony_ci                struct panfrost_blend_state *blend,
258bf215546Sopenharmony_ci                struct pipe_framebuffer_state *state,
259bf215546Sopenharmony_ci                const struct panfrost_zsa_state *zsa)
260bf215546Sopenharmony_ci{
261bf215546Sopenharmony_ci        /* If we generally have side effects. This inclues use of discard,
262bf215546Sopenharmony_ci         * which can affect the results of an occlusion query. */
263bf215546Sopenharmony_ci        if (fs->info.fs.sidefx)
264bf215546Sopenharmony_ci                return true;
265bf215546Sopenharmony_ci
266bf215546Sopenharmony_ci        /* Using an empty FS requires early-z to be enabled, but alpha test
267bf215546Sopenharmony_ci         * needs it disabled. Alpha test is only native on Midgard, so only
268bf215546Sopenharmony_ci         * check there.
269bf215546Sopenharmony_ci         */
270bf215546Sopenharmony_ci        if (PAN_ARCH <= 5 && zsa->base.alpha_func != PIPE_FUNC_ALWAYS)
271bf215546Sopenharmony_ci                return true;
272bf215546Sopenharmony_ci
273bf215546Sopenharmony_ci        /* If colour is written we need to execute */
274bf215546Sopenharmony_ci        for (unsigned i = 0; i < state->nr_cbufs; ++i) {
275bf215546Sopenharmony_ci                if (state->cbufs[i] && !blend->info[i].no_colour)
276bf215546Sopenharmony_ci                        return true;
277bf215546Sopenharmony_ci        }
278bf215546Sopenharmony_ci
279bf215546Sopenharmony_ci        /* If depth is written and not implied we need to execute.
280bf215546Sopenharmony_ci         * TODO: Predicate on Z/S writes being enabled */
281bf215546Sopenharmony_ci        return (fs->info.fs.writes_depth || fs->info.fs.writes_stencil);
282bf215546Sopenharmony_ci}
283bf215546Sopenharmony_ci
284bf215546Sopenharmony_ci/* Get pointers to the blend shaders bound to each active render target. Used
285bf215546Sopenharmony_ci * to emit the blend descriptors, as well as the fragment renderer state
286bf215546Sopenharmony_ci * descriptor.
287bf215546Sopenharmony_ci */
288bf215546Sopenharmony_cistatic void
289bf215546Sopenharmony_cipanfrost_get_blend_shaders(struct panfrost_batch *batch,
290bf215546Sopenharmony_ci                           mali_ptr *blend_shaders)
291bf215546Sopenharmony_ci{
292bf215546Sopenharmony_ci        unsigned shader_offset = 0;
293bf215546Sopenharmony_ci        struct panfrost_bo *shader_bo = NULL;
294bf215546Sopenharmony_ci
295bf215546Sopenharmony_ci        for (unsigned c = 0; c < batch->key.nr_cbufs; ++c) {
296bf215546Sopenharmony_ci                if (batch->key.cbufs[c]) {
297bf215546Sopenharmony_ci                        blend_shaders[c] = panfrost_get_blend(batch,
298bf215546Sopenharmony_ci                                        c, &shader_bo, &shader_offset);
299bf215546Sopenharmony_ci                }
300bf215546Sopenharmony_ci        }
301bf215546Sopenharmony_ci}
302bf215546Sopenharmony_ci
303bf215546Sopenharmony_ci#if PAN_ARCH >= 5
304bf215546Sopenharmony_ciUNUSED static uint16_t
305bf215546Sopenharmony_cipack_blend_constant(enum pipe_format format, float cons)
306bf215546Sopenharmony_ci{
307bf215546Sopenharmony_ci        const struct util_format_description *format_desc =
308bf215546Sopenharmony_ci                util_format_description(format);
309bf215546Sopenharmony_ci
310bf215546Sopenharmony_ci        unsigned chan_size = 0;
311bf215546Sopenharmony_ci
312bf215546Sopenharmony_ci        for (unsigned i = 0; i < format_desc->nr_channels; i++)
313bf215546Sopenharmony_ci                chan_size = MAX2(format_desc->channel[0].size, chan_size);
314bf215546Sopenharmony_ci
315bf215546Sopenharmony_ci        uint16_t unorm = (cons * ((1 << chan_size) - 1));
316bf215546Sopenharmony_ci        return unorm << (16 - chan_size);
317bf215546Sopenharmony_ci}
318bf215546Sopenharmony_ci
319bf215546Sopenharmony_ci/*
320bf215546Sopenharmony_ci * Determine whether to set the respective overdraw alpha flag.
321bf215546Sopenharmony_ci *
322bf215546Sopenharmony_ci * The overdraw alpha=1 flag should be set when alpha=1 implies full overdraw,
323bf215546Sopenharmony_ci * equivalently, all enabled render targets have alpha_one_store set. Likewise,
324bf215546Sopenharmony_ci * overdraw alpha=0 should be set when alpha=0 implies no overdraw,
325bf215546Sopenharmony_ci * equivalently, all enabled render targets have alpha_zero_nop set.
326bf215546Sopenharmony_ci */
327bf215546Sopenharmony_cistatic bool
328bf215546Sopenharmony_cipanfrost_overdraw_alpha(const struct panfrost_context *ctx, bool zero)
329bf215546Sopenharmony_ci{
330bf215546Sopenharmony_ci        const struct panfrost_blend_state *so = ctx->blend;
331bf215546Sopenharmony_ci
332bf215546Sopenharmony_ci        for (unsigned i = 0; i < ctx->pipe_framebuffer.nr_cbufs; ++i) {
333bf215546Sopenharmony_ci                const struct pan_blend_info info = so->info[i];
334bf215546Sopenharmony_ci
335bf215546Sopenharmony_ci                bool enabled = ctx->pipe_framebuffer.cbufs[i] && info.no_colour;
336bf215546Sopenharmony_ci                bool flag = zero ? info.alpha_zero_nop : info.alpha_one_store;
337bf215546Sopenharmony_ci
338bf215546Sopenharmony_ci                if (enabled && !flag)
339bf215546Sopenharmony_ci                        return false;
340bf215546Sopenharmony_ci        }
341bf215546Sopenharmony_ci
342bf215546Sopenharmony_ci        return true;
343bf215546Sopenharmony_ci}
344bf215546Sopenharmony_ci
345bf215546Sopenharmony_cistatic void
346bf215546Sopenharmony_cipanfrost_emit_blend(struct panfrost_batch *batch, void *rts, mali_ptr *blend_shaders)
347bf215546Sopenharmony_ci{
348bf215546Sopenharmony_ci        unsigned rt_count = batch->key.nr_cbufs;
349bf215546Sopenharmony_ci        struct panfrost_context *ctx = batch->ctx;
350bf215546Sopenharmony_ci        const struct panfrost_blend_state *so = ctx->blend;
351bf215546Sopenharmony_ci        bool dithered = so->base.dither;
352bf215546Sopenharmony_ci
353bf215546Sopenharmony_ci        /* Always have at least one render target for depth-only passes */
354bf215546Sopenharmony_ci        for (unsigned i = 0; i < MAX2(rt_count, 1); ++i) {
355bf215546Sopenharmony_ci                struct mali_blend_packed *packed = rts + (i * pan_size(BLEND));
356bf215546Sopenharmony_ci
357bf215546Sopenharmony_ci                /* Disable blending for unbacked render targets */
358bf215546Sopenharmony_ci                if (rt_count == 0 || !batch->key.cbufs[i] || so->info[i].no_colour) {
359bf215546Sopenharmony_ci                        pan_pack(rts + i * pan_size(BLEND), BLEND, cfg) {
360bf215546Sopenharmony_ci                                cfg.enable = false;
361bf215546Sopenharmony_ci#if PAN_ARCH >= 6
362bf215546Sopenharmony_ci                                cfg.internal.mode = MALI_BLEND_MODE_OFF;
363bf215546Sopenharmony_ci#endif
364bf215546Sopenharmony_ci                        }
365bf215546Sopenharmony_ci
366bf215546Sopenharmony_ci                        continue;
367bf215546Sopenharmony_ci                }
368bf215546Sopenharmony_ci
369bf215546Sopenharmony_ci                struct pan_blend_info info = so->info[i];
370bf215546Sopenharmony_ci                enum pipe_format format = batch->key.cbufs[i]->format;
371bf215546Sopenharmony_ci                float cons = pan_blend_get_constant(info.constant_mask,
372bf215546Sopenharmony_ci                                                    ctx->blend_color.color);
373bf215546Sopenharmony_ci
374bf215546Sopenharmony_ci                /* Word 0: Flags and constant */
375bf215546Sopenharmony_ci                pan_pack(packed, BLEND, cfg) {
376bf215546Sopenharmony_ci                        cfg.srgb = util_format_is_srgb(format);
377bf215546Sopenharmony_ci                        cfg.load_destination = info.load_dest;
378bf215546Sopenharmony_ci                        cfg.round_to_fb_precision = !dithered;
379bf215546Sopenharmony_ci                        cfg.alpha_to_one = ctx->blend->base.alpha_to_one;
380bf215546Sopenharmony_ci#if PAN_ARCH >= 6
381bf215546Sopenharmony_ci                        if (!blend_shaders[i])
382bf215546Sopenharmony_ci                                cfg.constant = pack_blend_constant(format, cons);
383bf215546Sopenharmony_ci#else
384bf215546Sopenharmony_ci                        cfg.blend_shader = (blend_shaders[i] != 0);
385bf215546Sopenharmony_ci
386bf215546Sopenharmony_ci                        if (blend_shaders[i])
387bf215546Sopenharmony_ci                                cfg.shader_pc = blend_shaders[i];
388bf215546Sopenharmony_ci                        else
389bf215546Sopenharmony_ci                                cfg.constant = cons;
390bf215546Sopenharmony_ci#endif
391bf215546Sopenharmony_ci                }
392bf215546Sopenharmony_ci
393bf215546Sopenharmony_ci                if (!blend_shaders[i]) {
394bf215546Sopenharmony_ci                        /* Word 1: Blend Equation */
395bf215546Sopenharmony_ci                        STATIC_ASSERT(pan_size(BLEND_EQUATION) == 4);
396bf215546Sopenharmony_ci                        packed->opaque[PAN_ARCH >= 6 ? 1 : 2] = so->equation[i];
397bf215546Sopenharmony_ci                }
398bf215546Sopenharmony_ci
399bf215546Sopenharmony_ci#if PAN_ARCH >= 6
400bf215546Sopenharmony_ci                const struct panfrost_device *dev = pan_device(ctx->base.screen);
401bf215546Sopenharmony_ci                struct panfrost_shader_state *fs =
402bf215546Sopenharmony_ci                        panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
403bf215546Sopenharmony_ci
404bf215546Sopenharmony_ci                /* Words 2 and 3: Internal blend */
405bf215546Sopenharmony_ci                if (blend_shaders[i]) {
406bf215546Sopenharmony_ci                        /* The blend shader's address needs to be at
407bf215546Sopenharmony_ci                         * the same top 32 bit as the fragment shader.
408bf215546Sopenharmony_ci                         * TODO: Ensure that's always the case.
409bf215546Sopenharmony_ci                         */
410bf215546Sopenharmony_ci                        assert(!fs->bin.bo ||
411bf215546Sopenharmony_ci                                        (blend_shaders[i] & (0xffffffffull << 32)) ==
412bf215546Sopenharmony_ci                                        (fs->bin.gpu & (0xffffffffull << 32)));
413bf215546Sopenharmony_ci
414bf215546Sopenharmony_ci                        pan_pack(&packed->opaque[2], INTERNAL_BLEND, cfg) {
415bf215546Sopenharmony_ci                                cfg.mode = MALI_BLEND_MODE_SHADER;
416bf215546Sopenharmony_ci                                cfg.shader.pc = (u32) blend_shaders[i];
417bf215546Sopenharmony_ci
418bf215546Sopenharmony_ci#if PAN_ARCH <= 7
419bf215546Sopenharmony_ci                                unsigned ret_offset = fs->info.bifrost.blend[i].return_offset;
420bf215546Sopenharmony_ci                                assert(!(ret_offset & 0x7));
421bf215546Sopenharmony_ci
422bf215546Sopenharmony_ci                                cfg.shader.return_value = ret_offset ?
423bf215546Sopenharmony_ci                                        fs->bin.gpu + ret_offset : 0;
424bf215546Sopenharmony_ci#endif
425bf215546Sopenharmony_ci                        }
426bf215546Sopenharmony_ci                } else {
427bf215546Sopenharmony_ci                        pan_pack(&packed->opaque[2], INTERNAL_BLEND, cfg) {
428bf215546Sopenharmony_ci                                cfg.mode = info.opaque ?
429bf215546Sopenharmony_ci                                        MALI_BLEND_MODE_OPAQUE :
430bf215546Sopenharmony_ci                                        MALI_BLEND_MODE_FIXED_FUNCTION;
431bf215546Sopenharmony_ci
432bf215546Sopenharmony_ci                                /* If we want the conversion to work properly,
433bf215546Sopenharmony_ci                                 * num_comps must be set to 4
434bf215546Sopenharmony_ci                                 */
435bf215546Sopenharmony_ci                                cfg.fixed_function.num_comps = 4;
436bf215546Sopenharmony_ci                                cfg.fixed_function.conversion.memory_format =
437bf215546Sopenharmony_ci                                        panfrost_format_to_bifrost_blend(dev, format, dithered);
438bf215546Sopenharmony_ci                                cfg.fixed_function.conversion.register_format =
439bf215546Sopenharmony_ci                                        fs->info.bifrost.blend[i].format;
440bf215546Sopenharmony_ci                                cfg.fixed_function.rt = i;
441bf215546Sopenharmony_ci
442bf215546Sopenharmony_ci#if PAN_ARCH <= 7
443bf215546Sopenharmony_ci                                if (!info.opaque) {
444bf215546Sopenharmony_ci                                        cfg.fixed_function.alpha_zero_nop = info.alpha_zero_nop;
445bf215546Sopenharmony_ci                                        cfg.fixed_function.alpha_one_store = info.alpha_one_store;
446bf215546Sopenharmony_ci                                }
447bf215546Sopenharmony_ci#endif
448bf215546Sopenharmony_ci                        }
449bf215546Sopenharmony_ci                }
450bf215546Sopenharmony_ci#endif
451bf215546Sopenharmony_ci        }
452bf215546Sopenharmony_ci}
453bf215546Sopenharmony_ci#endif
454bf215546Sopenharmony_ci
455bf215546Sopenharmony_cistatic inline bool
456bf215546Sopenharmony_cipan_allow_forward_pixel_to_kill(struct panfrost_context *ctx, struct panfrost_shader_state *fs)
457bf215546Sopenharmony_ci{
458bf215546Sopenharmony_ci        /* Track if any colour buffer is reused across draws, either
459bf215546Sopenharmony_ci         * from reading it directly, or from failing to write it
460bf215546Sopenharmony_ci         */
461bf215546Sopenharmony_ci        unsigned rt_mask = ctx->fb_rt_mask;
462bf215546Sopenharmony_ci        uint64_t rt_written = (fs->info.outputs_written >> FRAG_RESULT_DATA0);
463bf215546Sopenharmony_ci        bool blend_reads_dest = (ctx->blend->load_dest_mask & rt_mask);
464bf215546Sopenharmony_ci        bool alpha_to_coverage = ctx->blend->base.alpha_to_coverage;
465bf215546Sopenharmony_ci
466bf215546Sopenharmony_ci        return fs->info.fs.can_fpk &&
467bf215546Sopenharmony_ci                !(rt_mask & ~rt_written) &&
468bf215546Sopenharmony_ci                !alpha_to_coverage &&
469bf215546Sopenharmony_ci                !blend_reads_dest;
470bf215546Sopenharmony_ci}
471bf215546Sopenharmony_ci
472bf215546Sopenharmony_cistatic mali_ptr
473bf215546Sopenharmony_cipanfrost_emit_compute_shader_meta(struct panfrost_batch *batch, enum pipe_shader_type stage)
474bf215546Sopenharmony_ci{
475bf215546Sopenharmony_ci        struct panfrost_shader_state *ss = panfrost_get_shader_state(batch->ctx, stage);
476bf215546Sopenharmony_ci
477bf215546Sopenharmony_ci        panfrost_batch_add_bo(batch, ss->bin.bo, PIPE_SHADER_VERTEX);
478bf215546Sopenharmony_ci        panfrost_batch_add_bo(batch, ss->state.bo, PIPE_SHADER_VERTEX);
479bf215546Sopenharmony_ci
480bf215546Sopenharmony_ci        return ss->state.gpu;
481bf215546Sopenharmony_ci}
482bf215546Sopenharmony_ci
483bf215546Sopenharmony_ci#if PAN_ARCH <= 7
484bf215546Sopenharmony_ci/* Construct a partial RSD corresponding to no executed fragment shader, and
485bf215546Sopenharmony_ci * merge with the existing partial RSD. */
486bf215546Sopenharmony_ci
487bf215546Sopenharmony_cistatic void
488bf215546Sopenharmony_cipan_merge_empty_fs(struct mali_renderer_state_packed *rsd)
489bf215546Sopenharmony_ci{
490bf215546Sopenharmony_ci        struct mali_renderer_state_packed empty_rsd;
491bf215546Sopenharmony_ci
492bf215546Sopenharmony_ci        pan_pack(&empty_rsd, RENDERER_STATE, cfg) {
493bf215546Sopenharmony_ci#if PAN_ARCH >= 6
494bf215546Sopenharmony_ci                cfg.properties.shader_modifies_coverage = true;
495bf215546Sopenharmony_ci                cfg.properties.allow_forward_pixel_to_kill = true;
496bf215546Sopenharmony_ci                cfg.properties.allow_forward_pixel_to_be_killed = true;
497bf215546Sopenharmony_ci                cfg.properties.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY;
498bf215546Sopenharmony_ci
499bf215546Sopenharmony_ci                /* Alpha isn't written so these are vacuous */
500bf215546Sopenharmony_ci                cfg.multisample_misc.overdraw_alpha0 = true;
501bf215546Sopenharmony_ci                cfg.multisample_misc.overdraw_alpha1 = true;
502bf215546Sopenharmony_ci#else
503bf215546Sopenharmony_ci                cfg.shader.shader = 0x1;
504bf215546Sopenharmony_ci                cfg.properties.work_register_count = 1;
505bf215546Sopenharmony_ci                cfg.properties.depth_source = MALI_DEPTH_SOURCE_FIXED_FUNCTION;
506bf215546Sopenharmony_ci                cfg.properties.force_early_z = true;
507bf215546Sopenharmony_ci#endif
508bf215546Sopenharmony_ci        }
509bf215546Sopenharmony_ci
510bf215546Sopenharmony_ci        pan_merge((*rsd), empty_rsd, RENDERER_STATE);
511bf215546Sopenharmony_ci}
512bf215546Sopenharmony_ci
513bf215546Sopenharmony_cistatic void
514bf215546Sopenharmony_cipanfrost_prepare_fs_state(struct panfrost_context *ctx,
515bf215546Sopenharmony_ci                          mali_ptr *blend_shaders,
516bf215546Sopenharmony_ci                          struct mali_renderer_state_packed *rsd)
517bf215546Sopenharmony_ci{
518bf215546Sopenharmony_ci        struct pipe_rasterizer_state *rast = &ctx->rasterizer->base;
519bf215546Sopenharmony_ci        const struct panfrost_zsa_state *zsa = ctx->depth_stencil;
520bf215546Sopenharmony_ci        struct panfrost_shader_state *fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
521bf215546Sopenharmony_ci        struct panfrost_blend_state *so = ctx->blend;
522bf215546Sopenharmony_ci        bool alpha_to_coverage = ctx->blend->base.alpha_to_coverage;
523bf215546Sopenharmony_ci        bool msaa = rast->multisample;
524bf215546Sopenharmony_ci
525bf215546Sopenharmony_ci        unsigned rt_count = ctx->pipe_framebuffer.nr_cbufs;
526bf215546Sopenharmony_ci
527bf215546Sopenharmony_ci        bool has_blend_shader = false;
528bf215546Sopenharmony_ci
529bf215546Sopenharmony_ci        for (unsigned c = 0; c < rt_count; ++c)
530bf215546Sopenharmony_ci                has_blend_shader |= (blend_shaders[c] != 0);
531bf215546Sopenharmony_ci
532bf215546Sopenharmony_ci        bool has_oq = ctx->occlusion_query && ctx->active_queries;
533bf215546Sopenharmony_ci
534bf215546Sopenharmony_ci        pan_pack(rsd, RENDERER_STATE, cfg) {
535bf215546Sopenharmony_ci                if (panfrost_fs_required(fs, so, &ctx->pipe_framebuffer, zsa)) {
536bf215546Sopenharmony_ci#if PAN_ARCH >= 6
537bf215546Sopenharmony_ci                        struct pan_earlyzs_state earlyzs =
538bf215546Sopenharmony_ci                               pan_earlyzs_get(fs->earlyzs,
539bf215546Sopenharmony_ci                                               ctx->depth_stencil->writes_zs ||
540bf215546Sopenharmony_ci                                               has_oq,
541bf215546Sopenharmony_ci                                               ctx->blend->base.alpha_to_coverage,
542bf215546Sopenharmony_ci                                               ctx->depth_stencil->zs_always_passes);
543bf215546Sopenharmony_ci
544bf215546Sopenharmony_ci                        cfg.properties.pixel_kill_operation = earlyzs.kill;
545bf215546Sopenharmony_ci                        cfg.properties.zs_update_operation = earlyzs.update;
546bf215546Sopenharmony_ci
547bf215546Sopenharmony_ci                        cfg.properties.allow_forward_pixel_to_kill =
548bf215546Sopenharmony_ci                                pan_allow_forward_pixel_to_kill(ctx, fs);
549bf215546Sopenharmony_ci#else
550bf215546Sopenharmony_ci                        cfg.properties.force_early_z =
551bf215546Sopenharmony_ci                                fs->info.fs.can_early_z && !alpha_to_coverage &&
552bf215546Sopenharmony_ci                                ((enum mali_func) zsa->base.alpha_func == MALI_FUNC_ALWAYS);
553bf215546Sopenharmony_ci
554bf215546Sopenharmony_ci                        /* TODO: Reduce this limit? */
555bf215546Sopenharmony_ci                        if (has_blend_shader)
556bf215546Sopenharmony_ci                                cfg.properties.work_register_count = MAX2(fs->info.work_reg_count, 8);
557bf215546Sopenharmony_ci                        else
558bf215546Sopenharmony_ci                                cfg.properties.work_register_count = fs->info.work_reg_count;
559bf215546Sopenharmony_ci
560bf215546Sopenharmony_ci                        /* Hardware quirks around early-zs forcing without a
561bf215546Sopenharmony_ci                         * depth buffer. Note this breaks occlusion queries. */
562bf215546Sopenharmony_ci                        bool force_ez_with_discard = !zsa->enabled && !has_oq;
563bf215546Sopenharmony_ci
564bf215546Sopenharmony_ci                        cfg.properties.shader_reads_tilebuffer =
565bf215546Sopenharmony_ci                                force_ez_with_discard && fs->info.fs.can_discard;
566bf215546Sopenharmony_ci                        cfg.properties.shader_contains_discard =
567bf215546Sopenharmony_ci                                !force_ez_with_discard && fs->info.fs.can_discard;
568bf215546Sopenharmony_ci#endif
569bf215546Sopenharmony_ci                }
570bf215546Sopenharmony_ci
571bf215546Sopenharmony_ci#if PAN_ARCH == 4
572bf215546Sopenharmony_ci                if (rt_count > 0) {
573bf215546Sopenharmony_ci                        cfg.multisample_misc.load_destination = so->info[0].load_dest;
574bf215546Sopenharmony_ci                        cfg.multisample_misc.blend_shader = (blend_shaders[0] != 0);
575bf215546Sopenharmony_ci                        cfg.stencil_mask_misc.write_enable = !so->info[0].no_colour;
576bf215546Sopenharmony_ci                        cfg.stencil_mask_misc.srgb = util_format_is_srgb(ctx->pipe_framebuffer.cbufs[0]->format);
577bf215546Sopenharmony_ci                        cfg.stencil_mask_misc.dither_disable = !so->base.dither;
578bf215546Sopenharmony_ci                        cfg.stencil_mask_misc.alpha_to_one = so->base.alpha_to_one;
579bf215546Sopenharmony_ci
580bf215546Sopenharmony_ci                        if (blend_shaders[0]) {
581bf215546Sopenharmony_ci                                cfg.blend_shader = blend_shaders[0];
582bf215546Sopenharmony_ci                        } else {
583bf215546Sopenharmony_ci                                cfg.blend_constant = pan_blend_get_constant(
584bf215546Sopenharmony_ci                                                so->info[0].constant_mask,
585bf215546Sopenharmony_ci                                                ctx->blend_color.color);
586bf215546Sopenharmony_ci                        }
587bf215546Sopenharmony_ci                } else {
588bf215546Sopenharmony_ci                        /* If there is no colour buffer, leaving fields default is
589bf215546Sopenharmony_ci                         * fine, except for blending which is nonnullable */
590bf215546Sopenharmony_ci                        cfg.blend_equation.color_mask = 0xf;
591bf215546Sopenharmony_ci                        cfg.blend_equation.rgb.a = MALI_BLEND_OPERAND_A_SRC;
592bf215546Sopenharmony_ci                        cfg.blend_equation.rgb.b = MALI_BLEND_OPERAND_B_SRC;
593bf215546Sopenharmony_ci                        cfg.blend_equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO;
594bf215546Sopenharmony_ci                        cfg.blend_equation.alpha.a = MALI_BLEND_OPERAND_A_SRC;
595bf215546Sopenharmony_ci                        cfg.blend_equation.alpha.b = MALI_BLEND_OPERAND_B_SRC;
596bf215546Sopenharmony_ci                        cfg.blend_equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO;
597bf215546Sopenharmony_ci                }
598bf215546Sopenharmony_ci#elif PAN_ARCH == 5
599bf215546Sopenharmony_ci                /* Workaround */
600bf215546Sopenharmony_ci                cfg.legacy_blend_shader = panfrost_last_nonnull(blend_shaders, rt_count);
601bf215546Sopenharmony_ci#endif
602bf215546Sopenharmony_ci
603bf215546Sopenharmony_ci                cfg.multisample_misc.sample_mask = msaa ? ctx->sample_mask : 0xFFFF;
604bf215546Sopenharmony_ci
605bf215546Sopenharmony_ci                cfg.multisample_misc.evaluate_per_sample =
606bf215546Sopenharmony_ci                        msaa && (ctx->min_samples > 1);
607bf215546Sopenharmony_ci
608bf215546Sopenharmony_ci#if PAN_ARCH >= 6
609bf215546Sopenharmony_ci                /* MSAA blend shaders need to pass their sample ID to
610bf215546Sopenharmony_ci                 * LD_TILE/ST_TILE, so we must preload it. Additionally, we
611bf215546Sopenharmony_ci                 * need per-sample shading for the blend shader, accomplished
612bf215546Sopenharmony_ci                 * by forcing per-sample shading for the whole program. */
613bf215546Sopenharmony_ci
614bf215546Sopenharmony_ci                if (msaa && has_blend_shader) {
615bf215546Sopenharmony_ci                        cfg.multisample_misc.evaluate_per_sample = true;
616bf215546Sopenharmony_ci                        cfg.preload.fragment.sample_mask_id = true;
617bf215546Sopenharmony_ci                }
618bf215546Sopenharmony_ci
619bf215546Sopenharmony_ci                /* Flip gl_PointCoord (and point sprites) depending on API
620bf215546Sopenharmony_ci                 * setting on framebuffer orientation. We do not use
621bf215546Sopenharmony_ci                 * lower_wpos_pntc on Bifrost.
622bf215546Sopenharmony_ci                 */
623bf215546Sopenharmony_ci                cfg.properties.point_sprite_coord_origin_max_y =
624bf215546Sopenharmony_ci                        (rast->sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT);
625bf215546Sopenharmony_ci
626bf215546Sopenharmony_ci                cfg.multisample_misc.overdraw_alpha0 = panfrost_overdraw_alpha(ctx, 0);
627bf215546Sopenharmony_ci                cfg.multisample_misc.overdraw_alpha1 = panfrost_overdraw_alpha(ctx, 1);
628bf215546Sopenharmony_ci#endif
629bf215546Sopenharmony_ci
630bf215546Sopenharmony_ci                cfg.stencil_mask_misc.alpha_to_coverage = alpha_to_coverage;
631bf215546Sopenharmony_ci                cfg.depth_units = rast->offset_units * 2.0f;
632bf215546Sopenharmony_ci                cfg.depth_factor = rast->offset_scale;
633bf215546Sopenharmony_ci
634bf215546Sopenharmony_ci                bool back_enab = zsa->base.stencil[1].enabled;
635bf215546Sopenharmony_ci                cfg.stencil_front.reference_value = ctx->stencil_ref.ref_value[0];
636bf215546Sopenharmony_ci                cfg.stencil_back.reference_value = ctx->stencil_ref.ref_value[back_enab ? 1 : 0];
637bf215546Sopenharmony_ci
638bf215546Sopenharmony_ci#if PAN_ARCH <= 5
639bf215546Sopenharmony_ci                /* v6+ fits register preload here, no alpha testing */
640bf215546Sopenharmony_ci                cfg.alpha_reference = zsa->base.alpha_ref_value;
641bf215546Sopenharmony_ci#endif
642bf215546Sopenharmony_ci        }
643bf215546Sopenharmony_ci}
644bf215546Sopenharmony_ci
645bf215546Sopenharmony_cistatic void
646bf215546Sopenharmony_cipanfrost_emit_frag_shader(struct panfrost_context *ctx,
647bf215546Sopenharmony_ci                          struct mali_renderer_state_packed *fragmeta,
648bf215546Sopenharmony_ci                          mali_ptr *blend_shaders)
649bf215546Sopenharmony_ci{
650bf215546Sopenharmony_ci        const struct panfrost_zsa_state *zsa = ctx->depth_stencil;
651bf215546Sopenharmony_ci        const struct panfrost_rasterizer *rast = ctx->rasterizer;
652bf215546Sopenharmony_ci        struct panfrost_shader_state *fs =
653bf215546Sopenharmony_ci                panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
654bf215546Sopenharmony_ci
655bf215546Sopenharmony_ci        /* We need to merge several several partial renderer state descriptors,
656bf215546Sopenharmony_ci         * so stage to temporary storage rather than reading back write-combine
657bf215546Sopenharmony_ci         * memory, which will trash performance. */
658bf215546Sopenharmony_ci        struct mali_renderer_state_packed rsd;
659bf215546Sopenharmony_ci        panfrost_prepare_fs_state(ctx, blend_shaders, &rsd);
660bf215546Sopenharmony_ci
661bf215546Sopenharmony_ci#if PAN_ARCH == 4
662bf215546Sopenharmony_ci        if (ctx->pipe_framebuffer.nr_cbufs > 0 && !blend_shaders[0]) {
663bf215546Sopenharmony_ci                /* Word 14: SFBD Blend Equation */
664bf215546Sopenharmony_ci                STATIC_ASSERT(pan_size(BLEND_EQUATION) == 4);
665bf215546Sopenharmony_ci                rsd.opaque[14] = ctx->blend->equation[0];
666bf215546Sopenharmony_ci        }
667bf215546Sopenharmony_ci#endif
668bf215546Sopenharmony_ci
669bf215546Sopenharmony_ci        /* Merge with CSO state and upload */
670bf215546Sopenharmony_ci        if (panfrost_fs_required(fs, ctx->blend, &ctx->pipe_framebuffer, zsa)) {
671bf215546Sopenharmony_ci                struct mali_renderer_state_packed *partial_rsd =
672bf215546Sopenharmony_ci                        (struct mali_renderer_state_packed *)&fs->partial_rsd;
673bf215546Sopenharmony_ci                STATIC_ASSERT(sizeof(fs->partial_rsd) == sizeof(*partial_rsd));
674bf215546Sopenharmony_ci                pan_merge(rsd, *partial_rsd, RENDERER_STATE);
675bf215546Sopenharmony_ci        } else {
676bf215546Sopenharmony_ci                pan_merge_empty_fs(&rsd);
677bf215546Sopenharmony_ci        }
678bf215546Sopenharmony_ci
679bf215546Sopenharmony_ci        /* Word 8, 9 Misc state */
680bf215546Sopenharmony_ci        rsd.opaque[8] |= zsa->rsd_depth.opaque[0]
681bf215546Sopenharmony_ci                       | rast->multisample.opaque[0];
682bf215546Sopenharmony_ci
683bf215546Sopenharmony_ci        rsd.opaque[9] |= zsa->rsd_stencil.opaque[0]
684bf215546Sopenharmony_ci                       | rast->stencil_misc.opaque[0];
685bf215546Sopenharmony_ci
686bf215546Sopenharmony_ci        /* Word 10, 11 Stencil Front and Back */
687bf215546Sopenharmony_ci        rsd.opaque[10] |= zsa->stencil_front.opaque[0];
688bf215546Sopenharmony_ci        rsd.opaque[11] |= zsa->stencil_back.opaque[0];
689bf215546Sopenharmony_ci
690bf215546Sopenharmony_ci        memcpy(fragmeta, &rsd, sizeof(rsd));
691bf215546Sopenharmony_ci}
692bf215546Sopenharmony_ci
693bf215546Sopenharmony_cistatic mali_ptr
694bf215546Sopenharmony_cipanfrost_emit_frag_shader_meta(struct panfrost_batch *batch)
695bf215546Sopenharmony_ci{
696bf215546Sopenharmony_ci        struct panfrost_context *ctx = batch->ctx;
697bf215546Sopenharmony_ci        struct panfrost_shader_state *ss = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
698bf215546Sopenharmony_ci
699bf215546Sopenharmony_ci        panfrost_batch_add_bo(batch, ss->bin.bo, PIPE_SHADER_FRAGMENT);
700bf215546Sopenharmony_ci
701bf215546Sopenharmony_ci        struct panfrost_ptr xfer;
702bf215546Sopenharmony_ci
703bf215546Sopenharmony_ci#if PAN_ARCH == 4
704bf215546Sopenharmony_ci        xfer = pan_pool_alloc_desc(&batch->pool.base, RENDERER_STATE);
705bf215546Sopenharmony_ci#else
706bf215546Sopenharmony_ci        unsigned rt_count = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1);
707bf215546Sopenharmony_ci
708bf215546Sopenharmony_ci        xfer = pan_pool_alloc_desc_aggregate(&batch->pool.base,
709bf215546Sopenharmony_ci                                             PAN_DESC(RENDERER_STATE),
710bf215546Sopenharmony_ci                                             PAN_DESC_ARRAY(rt_count, BLEND));
711bf215546Sopenharmony_ci#endif
712bf215546Sopenharmony_ci
713bf215546Sopenharmony_ci        mali_ptr blend_shaders[PIPE_MAX_COLOR_BUFS] = { 0 };
714bf215546Sopenharmony_ci        panfrost_get_blend_shaders(batch, blend_shaders);
715bf215546Sopenharmony_ci
716bf215546Sopenharmony_ci        panfrost_emit_frag_shader(ctx, (struct mali_renderer_state_packed *) xfer.cpu, blend_shaders);
717bf215546Sopenharmony_ci
718bf215546Sopenharmony_ci#if PAN_ARCH >= 5
719bf215546Sopenharmony_ci        panfrost_emit_blend(batch, xfer.cpu + pan_size(RENDERER_STATE), blend_shaders);
720bf215546Sopenharmony_ci#endif
721bf215546Sopenharmony_ci
722bf215546Sopenharmony_ci        return xfer.gpu;
723bf215546Sopenharmony_ci}
724bf215546Sopenharmony_ci#endif
725bf215546Sopenharmony_ci
726bf215546Sopenharmony_cistatic mali_ptr
727bf215546Sopenharmony_cipanfrost_emit_viewport(struct panfrost_batch *batch)
728bf215546Sopenharmony_ci{
729bf215546Sopenharmony_ci        struct panfrost_context *ctx = batch->ctx;
730bf215546Sopenharmony_ci        const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
731bf215546Sopenharmony_ci        const struct pipe_scissor_state *ss = &ctx->scissor;
732bf215546Sopenharmony_ci        const struct pipe_rasterizer_state *rast = &ctx->rasterizer->base;
733bf215546Sopenharmony_ci
734bf215546Sopenharmony_ci        /* Derive min/max from translate/scale. Note since |x| >= 0 by
735bf215546Sopenharmony_ci         * definition, we have that -|x| <= |x| hence translate - |scale| <=
736bf215546Sopenharmony_ci         * translate + |scale|, so the ordering is correct here. */
737bf215546Sopenharmony_ci        float vp_minx = vp->translate[0] - fabsf(vp->scale[0]);
738bf215546Sopenharmony_ci        float vp_maxx = vp->translate[0] + fabsf(vp->scale[0]);
739bf215546Sopenharmony_ci        float vp_miny = vp->translate[1] - fabsf(vp->scale[1]);
740bf215546Sopenharmony_ci        float vp_maxy = vp->translate[1] + fabsf(vp->scale[1]);
741bf215546Sopenharmony_ci        float minz = (vp->translate[2] - fabsf(vp->scale[2]));
742bf215546Sopenharmony_ci        float maxz = (vp->translate[2] + fabsf(vp->scale[2]));
743bf215546Sopenharmony_ci
744bf215546Sopenharmony_ci        /* Scissor to the intersection of viewport and to the scissor, clamped
745bf215546Sopenharmony_ci         * to the framebuffer */
746bf215546Sopenharmony_ci
747bf215546Sopenharmony_ci        unsigned minx = MIN2(batch->key.width, MAX2((int) vp_minx, 0));
748bf215546Sopenharmony_ci        unsigned maxx = MIN2(batch->key.width, MAX2((int) vp_maxx, 0));
749bf215546Sopenharmony_ci        unsigned miny = MIN2(batch->key.height, MAX2((int) vp_miny, 0));
750bf215546Sopenharmony_ci        unsigned maxy = MIN2(batch->key.height, MAX2((int) vp_maxy, 0));
751bf215546Sopenharmony_ci
752bf215546Sopenharmony_ci        if (ss && rast->scissor) {
753bf215546Sopenharmony_ci                minx = MAX2(ss->minx, minx);
754bf215546Sopenharmony_ci                miny = MAX2(ss->miny, miny);
755bf215546Sopenharmony_ci                maxx = MIN2(ss->maxx, maxx);
756bf215546Sopenharmony_ci                maxy = MIN2(ss->maxy, maxy);
757bf215546Sopenharmony_ci        }
758bf215546Sopenharmony_ci
759bf215546Sopenharmony_ci        /* Set the range to [1, 1) so max values don't wrap round */
760bf215546Sopenharmony_ci        if (maxx == 0 || maxy == 0)
761bf215546Sopenharmony_ci                maxx = maxy = minx = miny = 1;
762bf215546Sopenharmony_ci
763bf215546Sopenharmony_ci        panfrost_batch_union_scissor(batch, minx, miny, maxx, maxy);
764bf215546Sopenharmony_ci        batch->scissor_culls_everything = (minx >= maxx || miny >= maxy);
765bf215546Sopenharmony_ci
766bf215546Sopenharmony_ci        /* [minx, maxx) and [miny, maxy) are exclusive ranges in the hardware */
767bf215546Sopenharmony_ci        maxx--;
768bf215546Sopenharmony_ci        maxy--;
769bf215546Sopenharmony_ci
770bf215546Sopenharmony_ci        batch->minimum_z = rast->depth_clip_near ? minz : -INFINITY;
771bf215546Sopenharmony_ci        batch->maximum_z = rast->depth_clip_far  ? maxz : +INFINITY;
772bf215546Sopenharmony_ci
773bf215546Sopenharmony_ci#if PAN_ARCH <= 7
774bf215546Sopenharmony_ci        struct panfrost_ptr T = pan_pool_alloc_desc(&batch->pool.base, VIEWPORT);
775bf215546Sopenharmony_ci
776bf215546Sopenharmony_ci        pan_pack(T.cpu, VIEWPORT, cfg) {
777bf215546Sopenharmony_ci                cfg.scissor_minimum_x = minx;
778bf215546Sopenharmony_ci                cfg.scissor_minimum_y = miny;
779bf215546Sopenharmony_ci                cfg.scissor_maximum_x = maxx;
780bf215546Sopenharmony_ci                cfg.scissor_maximum_y = maxy;
781bf215546Sopenharmony_ci
782bf215546Sopenharmony_ci                cfg.minimum_z = batch->minimum_z;
783bf215546Sopenharmony_ci                cfg.maximum_z = batch->maximum_z;
784bf215546Sopenharmony_ci        }
785bf215546Sopenharmony_ci
786bf215546Sopenharmony_ci        return T.gpu;
787bf215546Sopenharmony_ci#else
788bf215546Sopenharmony_ci        pan_pack(&batch->scissor, SCISSOR, cfg) {
789bf215546Sopenharmony_ci                cfg.scissor_minimum_x = minx;
790bf215546Sopenharmony_ci                cfg.scissor_minimum_y = miny;
791bf215546Sopenharmony_ci                cfg.scissor_maximum_x = maxx;
792bf215546Sopenharmony_ci                cfg.scissor_maximum_y = maxy;
793bf215546Sopenharmony_ci        }
794bf215546Sopenharmony_ci
795bf215546Sopenharmony_ci        return 0;
796bf215546Sopenharmony_ci#endif
797bf215546Sopenharmony_ci}
798bf215546Sopenharmony_ci
799bf215546Sopenharmony_ci#if PAN_ARCH >= 9
800bf215546Sopenharmony_ci/**
801bf215546Sopenharmony_ci * Emit a Valhall depth/stencil descriptor at draw-time. The bulk of the
802bf215546Sopenharmony_ci * descriptor corresponds to a pipe_depth_stencil_alpha CSO and is packed at
803bf215546Sopenharmony_ci * CSO create time. However, the stencil reference values and shader
804bf215546Sopenharmony_ci * interactions are dynamic state. Pack only the dynamic state here and OR
805bf215546Sopenharmony_ci * together.
806bf215546Sopenharmony_ci */
807bf215546Sopenharmony_cistatic mali_ptr
808bf215546Sopenharmony_cipanfrost_emit_depth_stencil(struct panfrost_batch *batch)
809bf215546Sopenharmony_ci{
810bf215546Sopenharmony_ci        struct panfrost_context *ctx = batch->ctx;
811bf215546Sopenharmony_ci        const struct panfrost_zsa_state *zsa = ctx->depth_stencil;
812bf215546Sopenharmony_ci        struct panfrost_rasterizer *rast = ctx->rasterizer;
813bf215546Sopenharmony_ci        struct panfrost_shader_state *fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
814bf215546Sopenharmony_ci        bool back_enab = zsa->base.stencil[1].enabled;
815bf215546Sopenharmony_ci
816bf215546Sopenharmony_ci        struct panfrost_ptr T = pan_pool_alloc_desc(&batch->pool.base, DEPTH_STENCIL);
817bf215546Sopenharmony_ci        struct mali_depth_stencil_packed dynamic;
818bf215546Sopenharmony_ci
819bf215546Sopenharmony_ci        pan_pack(&dynamic, DEPTH_STENCIL, cfg) {
820bf215546Sopenharmony_ci                cfg.front_reference_value = ctx->stencil_ref.ref_value[0];
821bf215546Sopenharmony_ci                cfg.back_reference_value = ctx->stencil_ref.ref_value[back_enab ? 1 : 0];
822bf215546Sopenharmony_ci
823bf215546Sopenharmony_ci                cfg.stencil_from_shader = fs->info.fs.writes_stencil;
824bf215546Sopenharmony_ci                cfg.depth_source = pan_depth_source(&fs->info);
825bf215546Sopenharmony_ci
826bf215546Sopenharmony_ci                cfg.depth_bias_enable = rast->base.offset_tri;
827bf215546Sopenharmony_ci                cfg.depth_units = rast->base.offset_units * 2.0f;
828bf215546Sopenharmony_ci                cfg.depth_factor = rast->base.offset_scale;
829bf215546Sopenharmony_ci                cfg.depth_bias_clamp = rast->base.offset_clamp;
830bf215546Sopenharmony_ci        }
831bf215546Sopenharmony_ci
832bf215546Sopenharmony_ci        pan_merge(dynamic, zsa->desc, DEPTH_STENCIL);
833bf215546Sopenharmony_ci        memcpy(T.cpu, &dynamic, pan_size(DEPTH_STENCIL));
834bf215546Sopenharmony_ci
835bf215546Sopenharmony_ci        return T.gpu;
836bf215546Sopenharmony_ci}
837bf215546Sopenharmony_ci
838bf215546Sopenharmony_ci/**
839bf215546Sopenharmony_ci * Emit Valhall blend descriptor at draw-time. The descriptor itself is shared
840bf215546Sopenharmony_ci * with Bifrost, but the container data structure is simplified.
841bf215546Sopenharmony_ci */
842bf215546Sopenharmony_cistatic mali_ptr
843bf215546Sopenharmony_cipanfrost_emit_blend_valhall(struct panfrost_batch *batch)
844bf215546Sopenharmony_ci{
845bf215546Sopenharmony_ci        unsigned rt_count = MAX2(batch->key.nr_cbufs, 1);
846bf215546Sopenharmony_ci
847bf215546Sopenharmony_ci        struct panfrost_ptr T = pan_pool_alloc_desc_array(&batch->pool.base, rt_count, BLEND);
848bf215546Sopenharmony_ci
849bf215546Sopenharmony_ci        mali_ptr blend_shaders[PIPE_MAX_COLOR_BUFS] = { 0 };
850bf215546Sopenharmony_ci        panfrost_get_blend_shaders(batch, blend_shaders);
851bf215546Sopenharmony_ci
852bf215546Sopenharmony_ci        panfrost_emit_blend(batch, T.cpu, blend_shaders);
853bf215546Sopenharmony_ci
854bf215546Sopenharmony_ci        /* Precalculate for the per-draw path */
855bf215546Sopenharmony_ci        bool has_blend_shader = false;
856bf215546Sopenharmony_ci
857bf215546Sopenharmony_ci        for (unsigned i = 0; i < rt_count; ++i)
858bf215546Sopenharmony_ci                has_blend_shader |= !!blend_shaders[i];
859bf215546Sopenharmony_ci
860bf215546Sopenharmony_ci        batch->ctx->valhall_has_blend_shader = has_blend_shader;
861bf215546Sopenharmony_ci
862bf215546Sopenharmony_ci        return T.gpu;
863bf215546Sopenharmony_ci}
864bf215546Sopenharmony_ci
865bf215546Sopenharmony_ci/**
866bf215546Sopenharmony_ci * Emit Valhall buffer descriptors for bound vertex buffers at draw-time.
867bf215546Sopenharmony_ci */
868bf215546Sopenharmony_cistatic mali_ptr
869bf215546Sopenharmony_cipanfrost_emit_vertex_buffers(struct panfrost_batch *batch)
870bf215546Sopenharmony_ci{
871bf215546Sopenharmony_ci        struct panfrost_context *ctx = batch->ctx;
872bf215546Sopenharmony_ci        unsigned buffer_count = util_last_bit(ctx->vb_mask);
873bf215546Sopenharmony_ci        struct panfrost_ptr T = pan_pool_alloc_desc_array(&batch->pool.base,
874bf215546Sopenharmony_ci                                                          buffer_count, BUFFER);
875bf215546Sopenharmony_ci        struct mali_buffer_packed *buffers = T.cpu;
876bf215546Sopenharmony_ci
877bf215546Sopenharmony_ci        u_foreach_bit(i, ctx->vb_mask) {
878bf215546Sopenharmony_ci                struct pipe_vertex_buffer vb = ctx->vertex_buffers[i];
879bf215546Sopenharmony_ci                struct pipe_resource *prsrc = vb.buffer.resource;
880bf215546Sopenharmony_ci                struct panfrost_resource *rsrc = pan_resource(prsrc);
881bf215546Sopenharmony_ci                assert(!vb.is_user_buffer);
882bf215546Sopenharmony_ci
883bf215546Sopenharmony_ci                panfrost_batch_read_rsrc(batch, rsrc, PIPE_SHADER_VERTEX);
884bf215546Sopenharmony_ci
885bf215546Sopenharmony_ci                pan_pack(buffers + i, BUFFER, cfg) {
886bf215546Sopenharmony_ci                        cfg.address = rsrc->image.data.bo->ptr.gpu +
887bf215546Sopenharmony_ci                                      vb.buffer_offset;
888bf215546Sopenharmony_ci
889bf215546Sopenharmony_ci                        cfg.size = prsrc->width0 - vb.buffer_offset;
890bf215546Sopenharmony_ci                }
891bf215546Sopenharmony_ci        }
892bf215546Sopenharmony_ci
893bf215546Sopenharmony_ci        return T.gpu;
894bf215546Sopenharmony_ci}
895bf215546Sopenharmony_ci
896bf215546Sopenharmony_ci/**
897bf215546Sopenharmony_ci * Emit Valhall attribute descriptors and associated (vertex) buffer
898bf215546Sopenharmony_ci * descriptors at draw-time. The attribute descriptors are packed at draw time
899bf215546Sopenharmony_ci * except for the stride field. The buffer descriptors are packed here, though
900bf215546Sopenharmony_ci * that could be moved into panfrost_set_vertex_buffers if needed.
901bf215546Sopenharmony_ci */
902bf215546Sopenharmony_cistatic mali_ptr
903bf215546Sopenharmony_cipanfrost_emit_vertex_data(struct panfrost_batch *batch)
904bf215546Sopenharmony_ci{
905bf215546Sopenharmony_ci        struct panfrost_context *ctx = batch->ctx;
906bf215546Sopenharmony_ci        struct panfrost_vertex_state *vtx = ctx->vertex;
907bf215546Sopenharmony_ci        struct panfrost_ptr T = pan_pool_alloc_desc_array(&batch->pool.base,
908bf215546Sopenharmony_ci                                                          vtx->num_elements,
909bf215546Sopenharmony_ci                                                          ATTRIBUTE);
910bf215546Sopenharmony_ci        struct mali_attribute_packed *attributes = T.cpu;
911bf215546Sopenharmony_ci
912bf215546Sopenharmony_ci        for (unsigned i = 0; i < vtx->num_elements; ++i) {
913bf215546Sopenharmony_ci                struct mali_attribute_packed packed;
914bf215546Sopenharmony_ci                unsigned vbi = vtx->pipe[i].vertex_buffer_index;
915bf215546Sopenharmony_ci
916bf215546Sopenharmony_ci                pan_pack(&packed, ATTRIBUTE, cfg) {
917bf215546Sopenharmony_ci                        cfg.stride = ctx->vertex_buffers[vbi].stride;
918bf215546Sopenharmony_ci                }
919bf215546Sopenharmony_ci
920bf215546Sopenharmony_ci                pan_merge(packed, vtx->attributes[i], ATTRIBUTE);
921bf215546Sopenharmony_ci                attributes[i] = packed;
922bf215546Sopenharmony_ci        }
923bf215546Sopenharmony_ci
924bf215546Sopenharmony_ci        return T.gpu;
925bf215546Sopenharmony_ci}
926bf215546Sopenharmony_ci
927bf215546Sopenharmony_ci/*
928bf215546Sopenharmony_ci * Emit Valhall descriptors for shader images. Unlike previous generations,
929bf215546Sopenharmony_ci * Valhall does not have a special descriptor for images. Standard texture
930bf215546Sopenharmony_ci * descriptors are used. The binding is different in Gallium, however, so we
931bf215546Sopenharmony_ci * translate.
932bf215546Sopenharmony_ci */
933bf215546Sopenharmony_cistatic struct pipe_sampler_view
934bf215546Sopenharmony_cipanfrost_pipe_image_to_sampler_view(struct pipe_image_view *v)
935bf215546Sopenharmony_ci{
936bf215546Sopenharmony_ci        struct pipe_sampler_view out = {
937bf215546Sopenharmony_ci                .format = v->format,
938bf215546Sopenharmony_ci                .texture = v->resource,
939bf215546Sopenharmony_ci                .target = v->resource->target,
940bf215546Sopenharmony_ci                .swizzle_r = PIPE_SWIZZLE_X,
941bf215546Sopenharmony_ci                .swizzle_g = PIPE_SWIZZLE_Y,
942bf215546Sopenharmony_ci                .swizzle_b = PIPE_SWIZZLE_Z,
943bf215546Sopenharmony_ci                .swizzle_a = PIPE_SWIZZLE_W
944bf215546Sopenharmony_ci        };
945bf215546Sopenharmony_ci
946bf215546Sopenharmony_ci        if (out.target == PIPE_BUFFER) {
947bf215546Sopenharmony_ci                out.u.buf.offset = v->u.buf.offset;
948bf215546Sopenharmony_ci                out.u.buf.size = v->u.buf.size;
949bf215546Sopenharmony_ci        } else {
950bf215546Sopenharmony_ci                out.u.tex.first_layer = v->u.tex.first_layer;
951bf215546Sopenharmony_ci                out.u.tex.last_layer = v->u.tex.last_layer;
952bf215546Sopenharmony_ci
953bf215546Sopenharmony_ci                /* Single level only */
954bf215546Sopenharmony_ci                out.u.tex.first_level = v->u.tex.level;
955bf215546Sopenharmony_ci                out.u.tex.last_level = v->u.tex.level;
956bf215546Sopenharmony_ci        }
957bf215546Sopenharmony_ci
958bf215546Sopenharmony_ci        return out;
959bf215546Sopenharmony_ci}
960bf215546Sopenharmony_ci
961bf215546Sopenharmony_cistatic void
962bf215546Sopenharmony_cipanfrost_update_sampler_view(struct panfrost_sampler_view *view,
963bf215546Sopenharmony_ci                             struct pipe_context *pctx);
964bf215546Sopenharmony_ci
965bf215546Sopenharmony_cistatic mali_ptr
966bf215546Sopenharmony_cipanfrost_emit_images(struct panfrost_batch *batch, enum pipe_shader_type stage)
967bf215546Sopenharmony_ci{
968bf215546Sopenharmony_ci        struct panfrost_context *ctx = batch->ctx;
969bf215546Sopenharmony_ci        unsigned last_bit = util_last_bit(ctx->image_mask[stage]);
970bf215546Sopenharmony_ci
971bf215546Sopenharmony_ci        struct panfrost_ptr T =
972bf215546Sopenharmony_ci                pan_pool_alloc_desc_array(&batch->pool.base, last_bit, TEXTURE);
973bf215546Sopenharmony_ci
974bf215546Sopenharmony_ci        struct mali_texture_packed *out = (struct mali_texture_packed *) T.cpu;
975bf215546Sopenharmony_ci
976bf215546Sopenharmony_ci        for (int i = 0; i < last_bit; ++i) {
977bf215546Sopenharmony_ci                struct pipe_image_view *image = &ctx->images[stage][i];
978bf215546Sopenharmony_ci
979bf215546Sopenharmony_ci                if (!(ctx->image_mask[stage] & BITFIELD_BIT(i))) {
980bf215546Sopenharmony_ci                        memset(&out[i], 0, sizeof(out[i]));
981bf215546Sopenharmony_ci                        continue;
982bf215546Sopenharmony_ci                }
983bf215546Sopenharmony_ci
984bf215546Sopenharmony_ci                /* Construct a synthetic sampler view so we can use our usual
985bf215546Sopenharmony_ci                 * sampler view code for the actual descriptor packing.
986bf215546Sopenharmony_ci                 *
987bf215546Sopenharmony_ci                 * Use the batch pool for a transient allocation, rather than
988bf215546Sopenharmony_ci                 * allocating a long-lived descriptor.
989bf215546Sopenharmony_ci                 */
990bf215546Sopenharmony_ci                struct panfrost_sampler_view view = {
991bf215546Sopenharmony_ci                        .base = panfrost_pipe_image_to_sampler_view(image),
992bf215546Sopenharmony_ci                        .pool = &batch->pool
993bf215546Sopenharmony_ci                };
994bf215546Sopenharmony_ci
995bf215546Sopenharmony_ci                /* If we specify a cube map, the hardware internally treat it as
996bf215546Sopenharmony_ci                 * a 2D array. Since cube maps as images can confuse our common
997bf215546Sopenharmony_ci                 * texturing code, explicitly use a 2D array.
998bf215546Sopenharmony_ci                 *
999bf215546Sopenharmony_ci                 * Similar concerns apply to 3D textures.
1000bf215546Sopenharmony_ci                 */
1001bf215546Sopenharmony_ci                if (view.base.target == PIPE_BUFFER)
1002bf215546Sopenharmony_ci                        view.base.target = PIPE_BUFFER;
1003bf215546Sopenharmony_ci                else
1004bf215546Sopenharmony_ci                        view.base.target = PIPE_TEXTURE_2D_ARRAY;
1005bf215546Sopenharmony_ci
1006bf215546Sopenharmony_ci                panfrost_update_sampler_view(&view, &ctx->base);
1007bf215546Sopenharmony_ci                out[i] = view.bifrost_descriptor;
1008bf215546Sopenharmony_ci
1009bf215546Sopenharmony_ci                panfrost_track_image_access(batch, stage, image);
1010bf215546Sopenharmony_ci        }
1011bf215546Sopenharmony_ci
1012bf215546Sopenharmony_ci        return T.gpu;
1013bf215546Sopenharmony_ci}
1014bf215546Sopenharmony_ci#endif
1015bf215546Sopenharmony_ci
1016bf215546Sopenharmony_cistatic mali_ptr
1017bf215546Sopenharmony_cipanfrost_map_constant_buffer_gpu(struct panfrost_batch *batch,
1018bf215546Sopenharmony_ci                                 enum pipe_shader_type st,
1019bf215546Sopenharmony_ci                                 struct panfrost_constant_buffer *buf,
1020bf215546Sopenharmony_ci                                 unsigned index)
1021bf215546Sopenharmony_ci{
1022bf215546Sopenharmony_ci        struct pipe_constant_buffer *cb = &buf->cb[index];
1023bf215546Sopenharmony_ci        struct panfrost_resource *rsrc = pan_resource(cb->buffer);
1024bf215546Sopenharmony_ci
1025bf215546Sopenharmony_ci        if (rsrc) {
1026bf215546Sopenharmony_ci                panfrost_batch_read_rsrc(batch, rsrc, st);
1027bf215546Sopenharmony_ci
1028bf215546Sopenharmony_ci                /* Alignment gauranteed by
1029bf215546Sopenharmony_ci                 * PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT */
1030bf215546Sopenharmony_ci                return rsrc->image.data.bo->ptr.gpu + cb->buffer_offset;
1031bf215546Sopenharmony_ci        } else if (cb->user_buffer) {
1032bf215546Sopenharmony_ci                return pan_pool_upload_aligned(&batch->pool.base,
1033bf215546Sopenharmony_ci                                               cb->user_buffer +
1034bf215546Sopenharmony_ci                                               cb->buffer_offset,
1035bf215546Sopenharmony_ci                                               cb->buffer_size, 16);
1036bf215546Sopenharmony_ci        } else {
1037bf215546Sopenharmony_ci                unreachable("No constant buffer");
1038bf215546Sopenharmony_ci        }
1039bf215546Sopenharmony_ci}
1040bf215546Sopenharmony_ci
1041bf215546Sopenharmony_cistruct sysval_uniform {
1042bf215546Sopenharmony_ci        union {
1043bf215546Sopenharmony_ci                float f[4];
1044bf215546Sopenharmony_ci                int32_t i[4];
1045bf215546Sopenharmony_ci                uint32_t u[4];
1046bf215546Sopenharmony_ci                uint64_t du[2];
1047bf215546Sopenharmony_ci        };
1048bf215546Sopenharmony_ci};
1049bf215546Sopenharmony_ci
1050bf215546Sopenharmony_cistatic void
1051bf215546Sopenharmony_cipanfrost_upload_viewport_scale_sysval(struct panfrost_batch *batch,
1052bf215546Sopenharmony_ci                                      struct sysval_uniform *uniform)
1053bf215546Sopenharmony_ci{
1054bf215546Sopenharmony_ci        struct panfrost_context *ctx = batch->ctx;
1055bf215546Sopenharmony_ci        const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
1056bf215546Sopenharmony_ci
1057bf215546Sopenharmony_ci        uniform->f[0] = vp->scale[0];
1058bf215546Sopenharmony_ci        uniform->f[1] = vp->scale[1];
1059bf215546Sopenharmony_ci        uniform->f[2] = vp->scale[2];
1060bf215546Sopenharmony_ci}
1061bf215546Sopenharmony_ci
1062bf215546Sopenharmony_cistatic void
1063bf215546Sopenharmony_cipanfrost_upload_viewport_offset_sysval(struct panfrost_batch *batch,
1064bf215546Sopenharmony_ci                                       struct sysval_uniform *uniform)
1065bf215546Sopenharmony_ci{
1066bf215546Sopenharmony_ci        struct panfrost_context *ctx = batch->ctx;
1067bf215546Sopenharmony_ci        const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
1068bf215546Sopenharmony_ci
1069bf215546Sopenharmony_ci        uniform->f[0] = vp->translate[0];
1070bf215546Sopenharmony_ci        uniform->f[1] = vp->translate[1];
1071bf215546Sopenharmony_ci        uniform->f[2] = vp->translate[2];
1072bf215546Sopenharmony_ci}
1073bf215546Sopenharmony_ci
1074bf215546Sopenharmony_cistatic void panfrost_upload_txs_sysval(struct panfrost_batch *batch,
1075bf215546Sopenharmony_ci                                       enum pipe_shader_type st,
1076bf215546Sopenharmony_ci                                       unsigned int sysvalid,
1077bf215546Sopenharmony_ci                                       struct sysval_uniform *uniform)
1078bf215546Sopenharmony_ci{
1079bf215546Sopenharmony_ci        struct panfrost_context *ctx = batch->ctx;
1080bf215546Sopenharmony_ci        unsigned texidx = PAN_SYSVAL_ID_TO_TXS_TEX_IDX(sysvalid);
1081bf215546Sopenharmony_ci        unsigned dim = PAN_SYSVAL_ID_TO_TXS_DIM(sysvalid);
1082bf215546Sopenharmony_ci        bool is_array = PAN_SYSVAL_ID_TO_TXS_IS_ARRAY(sysvalid);
1083bf215546Sopenharmony_ci        struct pipe_sampler_view *tex = &ctx->sampler_views[st][texidx]->base;
1084bf215546Sopenharmony_ci
1085bf215546Sopenharmony_ci        assert(dim);
1086bf215546Sopenharmony_ci
1087bf215546Sopenharmony_ci        if (tex->target == PIPE_BUFFER) {
1088bf215546Sopenharmony_ci                assert(dim == 1);
1089bf215546Sopenharmony_ci                uniform->i[0] =
1090bf215546Sopenharmony_ci                        tex->u.buf.size / util_format_get_blocksize(tex->format);
1091bf215546Sopenharmony_ci                return;
1092bf215546Sopenharmony_ci        }
1093bf215546Sopenharmony_ci
1094bf215546Sopenharmony_ci        uniform->i[0] = u_minify(tex->texture->width0, tex->u.tex.first_level);
1095bf215546Sopenharmony_ci
1096bf215546Sopenharmony_ci        if (dim > 1)
1097bf215546Sopenharmony_ci                uniform->i[1] = u_minify(tex->texture->height0,
1098bf215546Sopenharmony_ci                                         tex->u.tex.first_level);
1099bf215546Sopenharmony_ci
1100bf215546Sopenharmony_ci        if (dim > 2)
1101bf215546Sopenharmony_ci                uniform->i[2] = u_minify(tex->texture->depth0,
1102bf215546Sopenharmony_ci                                         tex->u.tex.first_level);
1103bf215546Sopenharmony_ci
1104bf215546Sopenharmony_ci        if (is_array) {
1105bf215546Sopenharmony_ci                unsigned size = tex->texture->array_size;
1106bf215546Sopenharmony_ci
1107bf215546Sopenharmony_ci                /* Internally, we store the number of 2D images (faces * array
1108bf215546Sopenharmony_ci                 * size). Externally, we report the array size in terms of
1109bf215546Sopenharmony_ci                 * complete cubes. So divide by the # of faces per cube.
1110bf215546Sopenharmony_ci                 */
1111bf215546Sopenharmony_ci                if (tex->target == PIPE_TEXTURE_CUBE_ARRAY)
1112bf215546Sopenharmony_ci                        size /= 6;
1113bf215546Sopenharmony_ci
1114bf215546Sopenharmony_ci                uniform->i[dim] = size;
1115bf215546Sopenharmony_ci        }
1116bf215546Sopenharmony_ci}
1117bf215546Sopenharmony_ci
1118bf215546Sopenharmony_cistatic void panfrost_upload_image_size_sysval(struct panfrost_batch *batch,
1119bf215546Sopenharmony_ci                                              enum pipe_shader_type st,
1120bf215546Sopenharmony_ci                                              unsigned int sysvalid,
1121bf215546Sopenharmony_ci                                              struct sysval_uniform *uniform)
1122bf215546Sopenharmony_ci{
1123bf215546Sopenharmony_ci        struct panfrost_context *ctx = batch->ctx;
1124bf215546Sopenharmony_ci        unsigned idx = PAN_SYSVAL_ID_TO_TXS_TEX_IDX(sysvalid);
1125bf215546Sopenharmony_ci        unsigned dim = PAN_SYSVAL_ID_TO_TXS_DIM(sysvalid);
1126bf215546Sopenharmony_ci        unsigned is_array = PAN_SYSVAL_ID_TO_TXS_IS_ARRAY(sysvalid);
1127bf215546Sopenharmony_ci
1128bf215546Sopenharmony_ci        assert(dim && dim < 4);
1129bf215546Sopenharmony_ci
1130bf215546Sopenharmony_ci        struct pipe_image_view *image = &ctx->images[st][idx];
1131bf215546Sopenharmony_ci
1132bf215546Sopenharmony_ci        if (image->resource->target == PIPE_BUFFER) {
1133bf215546Sopenharmony_ci                unsigned blocksize = util_format_get_blocksize(image->format);
1134bf215546Sopenharmony_ci                uniform->i[0] = image->resource->width0 / blocksize;
1135bf215546Sopenharmony_ci                return;
1136bf215546Sopenharmony_ci        }
1137bf215546Sopenharmony_ci
1138bf215546Sopenharmony_ci        uniform->i[0] = u_minify(image->resource->width0,
1139bf215546Sopenharmony_ci                                 image->u.tex.level);
1140bf215546Sopenharmony_ci
1141bf215546Sopenharmony_ci        if (dim > 1)
1142bf215546Sopenharmony_ci                uniform->i[1] = u_minify(image->resource->height0,
1143bf215546Sopenharmony_ci                                         image->u.tex.level);
1144bf215546Sopenharmony_ci
1145bf215546Sopenharmony_ci        if (dim > 2)
1146bf215546Sopenharmony_ci                uniform->i[2] = u_minify(image->resource->depth0,
1147bf215546Sopenharmony_ci                                         image->u.tex.level);
1148bf215546Sopenharmony_ci
1149bf215546Sopenharmony_ci        if (is_array)
1150bf215546Sopenharmony_ci                uniform->i[dim] = image->resource->array_size;
1151bf215546Sopenharmony_ci}
1152bf215546Sopenharmony_ci
1153bf215546Sopenharmony_cistatic void
1154bf215546Sopenharmony_cipanfrost_upload_ssbo_sysval(struct panfrost_batch *batch,
1155bf215546Sopenharmony_ci                            enum pipe_shader_type st,
1156bf215546Sopenharmony_ci                            unsigned ssbo_id,
1157bf215546Sopenharmony_ci                            struct sysval_uniform *uniform)
1158bf215546Sopenharmony_ci{
1159bf215546Sopenharmony_ci        struct panfrost_context *ctx = batch->ctx;
1160bf215546Sopenharmony_ci
1161bf215546Sopenharmony_ci        assert(ctx->ssbo_mask[st] & (1 << ssbo_id));
1162bf215546Sopenharmony_ci        struct pipe_shader_buffer sb = ctx->ssbo[st][ssbo_id];
1163bf215546Sopenharmony_ci
1164bf215546Sopenharmony_ci        /* Compute address */
1165bf215546Sopenharmony_ci        struct panfrost_resource *rsrc = pan_resource(sb.buffer);
1166bf215546Sopenharmony_ci        struct panfrost_bo *bo = rsrc->image.data.bo;
1167bf215546Sopenharmony_ci
1168bf215546Sopenharmony_ci        panfrost_batch_write_rsrc(batch, rsrc, st);
1169bf215546Sopenharmony_ci
1170bf215546Sopenharmony_ci        util_range_add(&rsrc->base, &rsrc->valid_buffer_range,
1171bf215546Sopenharmony_ci                        sb.buffer_offset, sb.buffer_size);
1172bf215546Sopenharmony_ci
1173bf215546Sopenharmony_ci        /* Upload address and size as sysval */
1174bf215546Sopenharmony_ci        uniform->du[0] = bo->ptr.gpu + sb.buffer_offset;
1175bf215546Sopenharmony_ci        uniform->u[2] = sb.buffer_size;
1176bf215546Sopenharmony_ci}
1177bf215546Sopenharmony_ci
1178bf215546Sopenharmony_cistatic void
1179bf215546Sopenharmony_cipanfrost_upload_sampler_sysval(struct panfrost_batch *batch,
1180bf215546Sopenharmony_ci                               enum pipe_shader_type st,
1181bf215546Sopenharmony_ci                               unsigned samp_idx,
1182bf215546Sopenharmony_ci                               struct sysval_uniform *uniform)
1183bf215546Sopenharmony_ci{
1184bf215546Sopenharmony_ci        struct panfrost_context *ctx = batch->ctx;
1185bf215546Sopenharmony_ci        struct pipe_sampler_state *sampl = &ctx->samplers[st][samp_idx]->base;
1186bf215546Sopenharmony_ci
1187bf215546Sopenharmony_ci        uniform->f[0] = sampl->min_lod;
1188bf215546Sopenharmony_ci        uniform->f[1] = sampl->max_lod;
1189bf215546Sopenharmony_ci        uniform->f[2] = sampl->lod_bias;
1190bf215546Sopenharmony_ci
1191bf215546Sopenharmony_ci        /* Even without any errata, Midgard represents "no mipmapping" as
1192bf215546Sopenharmony_ci         * fixing the LOD with the clamps; keep behaviour consistent. c.f.
1193bf215546Sopenharmony_ci         * panfrost_create_sampler_state which also explains our choice of
1194bf215546Sopenharmony_ci         * epsilon value (again to keep behaviour consistent) */
1195bf215546Sopenharmony_ci
1196bf215546Sopenharmony_ci        if (sampl->min_mip_filter == PIPE_TEX_MIPFILTER_NONE)
1197bf215546Sopenharmony_ci                uniform->f[1] = uniform->f[0] + (1.0/256.0);
1198bf215546Sopenharmony_ci}
1199bf215546Sopenharmony_ci
1200bf215546Sopenharmony_cistatic void
1201bf215546Sopenharmony_cipanfrost_upload_num_work_groups_sysval(struct panfrost_batch *batch,
1202bf215546Sopenharmony_ci                                       struct sysval_uniform *uniform)
1203bf215546Sopenharmony_ci{
1204bf215546Sopenharmony_ci        struct panfrost_context *ctx = batch->ctx;
1205bf215546Sopenharmony_ci
1206bf215546Sopenharmony_ci        uniform->u[0] = ctx->compute_grid->grid[0];
1207bf215546Sopenharmony_ci        uniform->u[1] = ctx->compute_grid->grid[1];
1208bf215546Sopenharmony_ci        uniform->u[2] = ctx->compute_grid->grid[2];
1209bf215546Sopenharmony_ci}
1210bf215546Sopenharmony_ci
1211bf215546Sopenharmony_cistatic void
1212bf215546Sopenharmony_cipanfrost_upload_local_group_size_sysval(struct panfrost_batch *batch,
1213bf215546Sopenharmony_ci                                        struct sysval_uniform *uniform)
1214bf215546Sopenharmony_ci{
1215bf215546Sopenharmony_ci        struct panfrost_context *ctx = batch->ctx;
1216bf215546Sopenharmony_ci
1217bf215546Sopenharmony_ci        uniform->u[0] = ctx->compute_grid->block[0];
1218bf215546Sopenharmony_ci        uniform->u[1] = ctx->compute_grid->block[1];
1219bf215546Sopenharmony_ci        uniform->u[2] = ctx->compute_grid->block[2];
1220bf215546Sopenharmony_ci}
1221bf215546Sopenharmony_ci
1222bf215546Sopenharmony_cistatic void
1223bf215546Sopenharmony_cipanfrost_upload_work_dim_sysval(struct panfrost_batch *batch,
1224bf215546Sopenharmony_ci                                struct sysval_uniform *uniform)
1225bf215546Sopenharmony_ci{
1226bf215546Sopenharmony_ci        struct panfrost_context *ctx = batch->ctx;
1227bf215546Sopenharmony_ci
1228bf215546Sopenharmony_ci        uniform->u[0] = ctx->compute_grid->work_dim;
1229bf215546Sopenharmony_ci}
1230bf215546Sopenharmony_ci
1231bf215546Sopenharmony_ci/* Sample positions are pushed in a Bifrost specific format on Bifrost. On
1232bf215546Sopenharmony_ci * Midgard, we emulate the Bifrost path with some extra arithmetic in the
1233bf215546Sopenharmony_ci * shader, to keep the code as unified as possible. */
1234bf215546Sopenharmony_ci
1235bf215546Sopenharmony_cistatic void
1236bf215546Sopenharmony_cipanfrost_upload_sample_positions_sysval(struct panfrost_batch *batch,
1237bf215546Sopenharmony_ci                                struct sysval_uniform *uniform)
1238bf215546Sopenharmony_ci{
1239bf215546Sopenharmony_ci        struct panfrost_context *ctx = batch->ctx;
1240bf215546Sopenharmony_ci        struct panfrost_device *dev = pan_device(ctx->base.screen);
1241bf215546Sopenharmony_ci
1242bf215546Sopenharmony_ci        unsigned samples = util_framebuffer_get_num_samples(&batch->key);
1243bf215546Sopenharmony_ci        uniform->du[0] = panfrost_sample_positions(dev, panfrost_sample_pattern(samples));
1244bf215546Sopenharmony_ci}
1245bf215546Sopenharmony_ci
1246bf215546Sopenharmony_cistatic void
1247bf215546Sopenharmony_cipanfrost_upload_multisampled_sysval(struct panfrost_batch *batch,
1248bf215546Sopenharmony_ci                                struct sysval_uniform *uniform)
1249bf215546Sopenharmony_ci{
1250bf215546Sopenharmony_ci        unsigned samples = util_framebuffer_get_num_samples(&batch->key);
1251bf215546Sopenharmony_ci        uniform->u[0] = samples > 1;
1252bf215546Sopenharmony_ci}
1253bf215546Sopenharmony_ci
1254bf215546Sopenharmony_ci#if PAN_ARCH >= 6
1255bf215546Sopenharmony_cistatic void
1256bf215546Sopenharmony_cipanfrost_upload_rt_conversion_sysval(struct panfrost_batch *batch,
1257bf215546Sopenharmony_ci                unsigned size_and_rt, struct sysval_uniform *uniform)
1258bf215546Sopenharmony_ci{
1259bf215546Sopenharmony_ci        struct panfrost_context *ctx = batch->ctx;
1260bf215546Sopenharmony_ci        struct panfrost_device *dev = pan_device(ctx->base.screen);
1261bf215546Sopenharmony_ci        unsigned rt = size_and_rt & 0xF;
1262bf215546Sopenharmony_ci        unsigned size = size_and_rt >> 4;
1263bf215546Sopenharmony_ci
1264bf215546Sopenharmony_ci        if (rt < batch->key.nr_cbufs && batch->key.cbufs[rt]) {
1265bf215546Sopenharmony_ci                enum pipe_format format = batch->key.cbufs[rt]->format;
1266bf215546Sopenharmony_ci                uniform->u[0] =
1267bf215546Sopenharmony_ci                        GENX(pan_blend_get_internal_desc)(dev, format, rt, size, false) >> 32;
1268bf215546Sopenharmony_ci        } else {
1269bf215546Sopenharmony_ci                pan_pack(&uniform->u[0], INTERNAL_CONVERSION, cfg)
1270bf215546Sopenharmony_ci                        cfg.memory_format = dev->formats[PIPE_FORMAT_NONE].hw;
1271bf215546Sopenharmony_ci        }
1272bf215546Sopenharmony_ci}
1273bf215546Sopenharmony_ci#endif
1274bf215546Sopenharmony_ci
1275bf215546Sopenharmony_cistatic unsigned
1276bf215546Sopenharmony_cipanfrost_xfb_offset(unsigned stride, struct pipe_stream_output_target *target)
1277bf215546Sopenharmony_ci{
1278bf215546Sopenharmony_ci        return target->buffer_offset + (pan_so_target(target)->offset * stride);
1279bf215546Sopenharmony_ci}
1280bf215546Sopenharmony_ci
1281bf215546Sopenharmony_cistatic void
1282bf215546Sopenharmony_cipanfrost_upload_sysvals(struct panfrost_batch *batch,
1283bf215546Sopenharmony_ci                        const struct panfrost_ptr *ptr,
1284bf215546Sopenharmony_ci                        struct panfrost_shader_state *ss,
1285bf215546Sopenharmony_ci                        enum pipe_shader_type st)
1286bf215546Sopenharmony_ci{
1287bf215546Sopenharmony_ci        struct sysval_uniform *uniforms = ptr->cpu;
1288bf215546Sopenharmony_ci
1289bf215546Sopenharmony_ci        for (unsigned i = 0; i < ss->info.sysvals.sysval_count; ++i) {
1290bf215546Sopenharmony_ci                int sysval = ss->info.sysvals.sysvals[i];
1291bf215546Sopenharmony_ci
1292bf215546Sopenharmony_ci                switch (PAN_SYSVAL_TYPE(sysval)) {
1293bf215546Sopenharmony_ci                case PAN_SYSVAL_VIEWPORT_SCALE:
1294bf215546Sopenharmony_ci                        panfrost_upload_viewport_scale_sysval(batch,
1295bf215546Sopenharmony_ci                                                              &uniforms[i]);
1296bf215546Sopenharmony_ci                        break;
1297bf215546Sopenharmony_ci                case PAN_SYSVAL_VIEWPORT_OFFSET:
1298bf215546Sopenharmony_ci                        panfrost_upload_viewport_offset_sysval(batch,
1299bf215546Sopenharmony_ci                                                               &uniforms[i]);
1300bf215546Sopenharmony_ci                        break;
1301bf215546Sopenharmony_ci                case PAN_SYSVAL_TEXTURE_SIZE:
1302bf215546Sopenharmony_ci                        panfrost_upload_txs_sysval(batch, st,
1303bf215546Sopenharmony_ci                                                   PAN_SYSVAL_ID(sysval),
1304bf215546Sopenharmony_ci                                                   &uniforms[i]);
1305bf215546Sopenharmony_ci                        break;
1306bf215546Sopenharmony_ci                case PAN_SYSVAL_SSBO:
1307bf215546Sopenharmony_ci                        panfrost_upload_ssbo_sysval(batch, st,
1308bf215546Sopenharmony_ci                                                    PAN_SYSVAL_ID(sysval),
1309bf215546Sopenharmony_ci                                                    &uniforms[i]);
1310bf215546Sopenharmony_ci                        break;
1311bf215546Sopenharmony_ci
1312bf215546Sopenharmony_ci                case PAN_SYSVAL_XFB:
1313bf215546Sopenharmony_ci                {
1314bf215546Sopenharmony_ci                        unsigned buf = PAN_SYSVAL_ID(sysval);
1315bf215546Sopenharmony_ci                        struct panfrost_shader_state *vs =
1316bf215546Sopenharmony_ci                                panfrost_get_shader_state(batch->ctx, PIPE_SHADER_VERTEX);
1317bf215546Sopenharmony_ci                        struct pipe_stream_output_info *so = &vs->stream_output;
1318bf215546Sopenharmony_ci                        unsigned stride = so->stride[buf] * 4;
1319bf215546Sopenharmony_ci
1320bf215546Sopenharmony_ci                        struct pipe_stream_output_target *target = NULL;
1321bf215546Sopenharmony_ci                        if (buf < batch->ctx->streamout.num_targets)
1322bf215546Sopenharmony_ci                                target = batch->ctx->streamout.targets[buf];
1323bf215546Sopenharmony_ci
1324bf215546Sopenharmony_ci                        if (!target) {
1325bf215546Sopenharmony_ci                                /* Memory sink */
1326bf215546Sopenharmony_ci                                uniforms[i].du[0] = 0x8ull << 60;
1327bf215546Sopenharmony_ci                                break;
1328bf215546Sopenharmony_ci                        }
1329bf215546Sopenharmony_ci
1330bf215546Sopenharmony_ci                        struct panfrost_resource *rsrc = pan_resource(target->buffer);
1331bf215546Sopenharmony_ci                        unsigned offset = panfrost_xfb_offset(stride, target);
1332bf215546Sopenharmony_ci
1333bf215546Sopenharmony_ci                        util_range_add(&rsrc->base, &rsrc->valid_buffer_range,
1334bf215546Sopenharmony_ci                                offset, target->buffer_size - offset);
1335bf215546Sopenharmony_ci
1336bf215546Sopenharmony_ci                        panfrost_batch_write_rsrc(batch, rsrc, PIPE_SHADER_VERTEX);
1337bf215546Sopenharmony_ci
1338bf215546Sopenharmony_ci                        uniforms[i].du[0] = rsrc->image.data.bo->ptr.gpu + offset;
1339bf215546Sopenharmony_ci                        break;
1340bf215546Sopenharmony_ci                }
1341bf215546Sopenharmony_ci
1342bf215546Sopenharmony_ci                case PAN_SYSVAL_NUM_VERTICES:
1343bf215546Sopenharmony_ci                        uniforms[i].u[0] = batch->ctx->vertex_count;
1344bf215546Sopenharmony_ci                        break;
1345bf215546Sopenharmony_ci
1346bf215546Sopenharmony_ci                case PAN_SYSVAL_NUM_WORK_GROUPS:
1347bf215546Sopenharmony_ci                        for (unsigned j = 0; j < 3; j++) {
1348bf215546Sopenharmony_ci                                batch->num_wg_sysval[j] =
1349bf215546Sopenharmony_ci                                        ptr->gpu + (i * sizeof(*uniforms)) + (j * 4);
1350bf215546Sopenharmony_ci                        }
1351bf215546Sopenharmony_ci                        panfrost_upload_num_work_groups_sysval(batch,
1352bf215546Sopenharmony_ci                                                               &uniforms[i]);
1353bf215546Sopenharmony_ci                        break;
1354bf215546Sopenharmony_ci                case PAN_SYSVAL_LOCAL_GROUP_SIZE:
1355bf215546Sopenharmony_ci                        panfrost_upload_local_group_size_sysval(batch,
1356bf215546Sopenharmony_ci                                                                &uniforms[i]);
1357bf215546Sopenharmony_ci                        break;
1358bf215546Sopenharmony_ci                case PAN_SYSVAL_WORK_DIM:
1359bf215546Sopenharmony_ci                        panfrost_upload_work_dim_sysval(batch,
1360bf215546Sopenharmony_ci                                                        &uniforms[i]);
1361bf215546Sopenharmony_ci                        break;
1362bf215546Sopenharmony_ci                case PAN_SYSVAL_SAMPLER:
1363bf215546Sopenharmony_ci                        panfrost_upload_sampler_sysval(batch, st,
1364bf215546Sopenharmony_ci                                                       PAN_SYSVAL_ID(sysval),
1365bf215546Sopenharmony_ci                                                       &uniforms[i]);
1366bf215546Sopenharmony_ci                        break;
1367bf215546Sopenharmony_ci                case PAN_SYSVAL_IMAGE_SIZE:
1368bf215546Sopenharmony_ci                        panfrost_upload_image_size_sysval(batch, st,
1369bf215546Sopenharmony_ci                                                          PAN_SYSVAL_ID(sysval),
1370bf215546Sopenharmony_ci                                                          &uniforms[i]);
1371bf215546Sopenharmony_ci                        break;
1372bf215546Sopenharmony_ci                case PAN_SYSVAL_SAMPLE_POSITIONS:
1373bf215546Sopenharmony_ci                        panfrost_upload_sample_positions_sysval(batch,
1374bf215546Sopenharmony_ci                                                        &uniforms[i]);
1375bf215546Sopenharmony_ci                        break;
1376bf215546Sopenharmony_ci                case PAN_SYSVAL_MULTISAMPLED:
1377bf215546Sopenharmony_ci                        panfrost_upload_multisampled_sysval(batch,
1378bf215546Sopenharmony_ci                                                               &uniforms[i]);
1379bf215546Sopenharmony_ci                        break;
1380bf215546Sopenharmony_ci#if PAN_ARCH >= 6
1381bf215546Sopenharmony_ci                case PAN_SYSVAL_RT_CONVERSION:
1382bf215546Sopenharmony_ci                        panfrost_upload_rt_conversion_sysval(batch,
1383bf215546Sopenharmony_ci                                        PAN_SYSVAL_ID(sysval), &uniforms[i]);
1384bf215546Sopenharmony_ci                        break;
1385bf215546Sopenharmony_ci#endif
1386bf215546Sopenharmony_ci                case PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS:
1387bf215546Sopenharmony_ci                        batch->ctx->first_vertex_sysval_ptr =
1388bf215546Sopenharmony_ci                                ptr->gpu + (i * sizeof(*uniforms));
1389bf215546Sopenharmony_ci                        batch->ctx->base_vertex_sysval_ptr =
1390bf215546Sopenharmony_ci                                batch->ctx->first_vertex_sysval_ptr + 4;
1391bf215546Sopenharmony_ci                        batch->ctx->base_instance_sysval_ptr =
1392bf215546Sopenharmony_ci                                batch->ctx->first_vertex_sysval_ptr + 8;
1393bf215546Sopenharmony_ci
1394bf215546Sopenharmony_ci                        uniforms[i].u[0] = batch->ctx->offset_start;
1395bf215546Sopenharmony_ci                        uniforms[i].u[1] = batch->ctx->base_vertex;
1396bf215546Sopenharmony_ci                        uniforms[i].u[2] = batch->ctx->base_instance;
1397bf215546Sopenharmony_ci                        break;
1398bf215546Sopenharmony_ci                case PAN_SYSVAL_DRAWID:
1399bf215546Sopenharmony_ci                        uniforms[i].u[0] = batch->ctx->drawid;
1400bf215546Sopenharmony_ci                        break;
1401bf215546Sopenharmony_ci                default:
1402bf215546Sopenharmony_ci                        assert(0);
1403bf215546Sopenharmony_ci                }
1404bf215546Sopenharmony_ci        }
1405bf215546Sopenharmony_ci}
1406bf215546Sopenharmony_ci
1407bf215546Sopenharmony_cistatic const void *
1408bf215546Sopenharmony_cipanfrost_map_constant_buffer_cpu(struct panfrost_context *ctx,
1409bf215546Sopenharmony_ci                                 struct panfrost_constant_buffer *buf,
1410bf215546Sopenharmony_ci                                 unsigned index)
1411bf215546Sopenharmony_ci{
1412bf215546Sopenharmony_ci        struct pipe_constant_buffer *cb = &buf->cb[index];
1413bf215546Sopenharmony_ci        struct panfrost_resource *rsrc = pan_resource(cb->buffer);
1414bf215546Sopenharmony_ci
1415bf215546Sopenharmony_ci        if (rsrc) {
1416bf215546Sopenharmony_ci                panfrost_bo_mmap(rsrc->image.data.bo);
1417bf215546Sopenharmony_ci                panfrost_flush_writer(ctx, rsrc, "CPU constant buffer mapping");
1418bf215546Sopenharmony_ci                panfrost_bo_wait(rsrc->image.data.bo, INT64_MAX, false);
1419bf215546Sopenharmony_ci
1420bf215546Sopenharmony_ci                return rsrc->image.data.bo->ptr.cpu + cb->buffer_offset;
1421bf215546Sopenharmony_ci        } else if (cb->user_buffer) {
1422bf215546Sopenharmony_ci                return cb->user_buffer + cb->buffer_offset;
1423bf215546Sopenharmony_ci        } else
1424bf215546Sopenharmony_ci                unreachable("No constant buffer");
1425bf215546Sopenharmony_ci}
1426bf215546Sopenharmony_ci
1427bf215546Sopenharmony_ci/* Emit a single UBO record. On Valhall, UBOs are dumb buffers and are
1428bf215546Sopenharmony_ci * implemented with buffer descriptors in the resource table, sized in terms of
1429bf215546Sopenharmony_ci * bytes. On Bifrost and older, UBOs have special uniform buffer data
1430bf215546Sopenharmony_ci * structure, sized in terms of entries.
1431bf215546Sopenharmony_ci */
1432bf215546Sopenharmony_cistatic void
1433bf215546Sopenharmony_cipanfrost_emit_ubo(void *base, unsigned index, mali_ptr address, size_t size)
1434bf215546Sopenharmony_ci{
1435bf215546Sopenharmony_ci#if PAN_ARCH >= 9
1436bf215546Sopenharmony_ci        struct mali_buffer_packed *out = base;
1437bf215546Sopenharmony_ci
1438bf215546Sopenharmony_ci        pan_pack(out + index, BUFFER, cfg) {
1439bf215546Sopenharmony_ci                cfg.size = size;
1440bf215546Sopenharmony_ci                cfg.address = address;
1441bf215546Sopenharmony_ci        }
1442bf215546Sopenharmony_ci#else
1443bf215546Sopenharmony_ci        struct mali_uniform_buffer_packed *out = base;
1444bf215546Sopenharmony_ci
1445bf215546Sopenharmony_ci        /* Issue (57) for the ARB_uniform_buffer_object spec says that
1446bf215546Sopenharmony_ci         * the buffer can be larger than the uniform data inside it,
1447bf215546Sopenharmony_ci         * so clamp ubo size to what hardware supports. */
1448bf215546Sopenharmony_ci
1449bf215546Sopenharmony_ci        pan_pack(out + index, UNIFORM_BUFFER, cfg) {
1450bf215546Sopenharmony_ci                cfg.entries = MIN2(DIV_ROUND_UP(size, 16), 1 << 12);
1451bf215546Sopenharmony_ci                cfg.pointer = address;
1452bf215546Sopenharmony_ci        }
1453bf215546Sopenharmony_ci#endif
1454bf215546Sopenharmony_ci}
1455bf215546Sopenharmony_ci
1456bf215546Sopenharmony_cistatic mali_ptr
1457bf215546Sopenharmony_cipanfrost_emit_const_buf(struct panfrost_batch *batch,
1458bf215546Sopenharmony_ci                        enum pipe_shader_type stage,
1459bf215546Sopenharmony_ci                        unsigned *buffer_count,
1460bf215546Sopenharmony_ci                        mali_ptr *push_constants,
1461bf215546Sopenharmony_ci                        unsigned *pushed_words)
1462bf215546Sopenharmony_ci{
1463bf215546Sopenharmony_ci        struct panfrost_context *ctx = batch->ctx;
1464bf215546Sopenharmony_ci        struct panfrost_shader_variants *all = ctx->shader[stage];
1465bf215546Sopenharmony_ci
1466bf215546Sopenharmony_ci        if (!all)
1467bf215546Sopenharmony_ci                return 0;
1468bf215546Sopenharmony_ci
1469bf215546Sopenharmony_ci        struct panfrost_constant_buffer *buf = &ctx->constant_buffer[stage];
1470bf215546Sopenharmony_ci        struct panfrost_shader_state *ss = &all->variants[all->active_variant];
1471bf215546Sopenharmony_ci
1472bf215546Sopenharmony_ci        /* Allocate room for the sysval and the uniforms */
1473bf215546Sopenharmony_ci        size_t sys_size = sizeof(float) * 4 * ss->info.sysvals.sysval_count;
1474bf215546Sopenharmony_ci        struct panfrost_ptr transfer =
1475bf215546Sopenharmony_ci                pan_pool_alloc_aligned(&batch->pool.base, sys_size, 16);
1476bf215546Sopenharmony_ci
1477bf215546Sopenharmony_ci        /* Upload sysvals requested by the shader */
1478bf215546Sopenharmony_ci        panfrost_upload_sysvals(batch, &transfer, ss, stage);
1479bf215546Sopenharmony_ci
1480bf215546Sopenharmony_ci        /* Next up, attach UBOs. UBO count includes gaps but no sysval UBO */
1481bf215546Sopenharmony_ci        struct panfrost_shader_state *shader = panfrost_get_shader_state(ctx, stage);
1482bf215546Sopenharmony_ci        unsigned ubo_count = shader->info.ubo_count - (sys_size ? 1 : 0);
1483bf215546Sopenharmony_ci        unsigned sysval_ubo = sys_size ? ubo_count : ~0;
1484bf215546Sopenharmony_ci        struct panfrost_ptr ubos = { 0 };
1485bf215546Sopenharmony_ci
1486bf215546Sopenharmony_ci#if PAN_ARCH >= 9
1487bf215546Sopenharmony_ci        ubos = pan_pool_alloc_desc_array(&batch->pool.base,
1488bf215546Sopenharmony_ci                                         ubo_count + 1,
1489bf215546Sopenharmony_ci                                         BUFFER);
1490bf215546Sopenharmony_ci#else
1491bf215546Sopenharmony_ci        ubos = pan_pool_alloc_desc_array(&batch->pool.base,
1492bf215546Sopenharmony_ci                                         ubo_count + 1,
1493bf215546Sopenharmony_ci                                         UNIFORM_BUFFER);
1494bf215546Sopenharmony_ci#endif
1495bf215546Sopenharmony_ci
1496bf215546Sopenharmony_ci        if (buffer_count)
1497bf215546Sopenharmony_ci                *buffer_count = ubo_count + (sys_size ? 1 : 0);
1498bf215546Sopenharmony_ci
1499bf215546Sopenharmony_ci        /* Upload sysval as a final UBO */
1500bf215546Sopenharmony_ci
1501bf215546Sopenharmony_ci        if (sys_size)
1502bf215546Sopenharmony_ci                panfrost_emit_ubo(ubos.cpu, ubo_count, transfer.gpu, sys_size);
1503bf215546Sopenharmony_ci
1504bf215546Sopenharmony_ci        /* The rest are honest-to-goodness UBOs */
1505bf215546Sopenharmony_ci
1506bf215546Sopenharmony_ci        u_foreach_bit(ubo, ss->info.ubo_mask & buf->enabled_mask) {
1507bf215546Sopenharmony_ci                size_t usz = buf->cb[ubo].buffer_size;
1508bf215546Sopenharmony_ci                mali_ptr address = 0;
1509bf215546Sopenharmony_ci
1510bf215546Sopenharmony_ci                if (usz > 0) {
1511bf215546Sopenharmony_ci                        address = panfrost_map_constant_buffer_gpu(batch,
1512bf215546Sopenharmony_ci                                        stage, buf, ubo);
1513bf215546Sopenharmony_ci                }
1514bf215546Sopenharmony_ci
1515bf215546Sopenharmony_ci                panfrost_emit_ubo(ubos.cpu, ubo, address, usz);
1516bf215546Sopenharmony_ci        }
1517bf215546Sopenharmony_ci
1518bf215546Sopenharmony_ci        if (pushed_words)
1519bf215546Sopenharmony_ci                *pushed_words = ss->info.push.count;
1520bf215546Sopenharmony_ci
1521bf215546Sopenharmony_ci        if (ss->info.push.count == 0)
1522bf215546Sopenharmony_ci                return ubos.gpu;
1523bf215546Sopenharmony_ci
1524bf215546Sopenharmony_ci        /* Copy push constants required by the shader */
1525bf215546Sopenharmony_ci        struct panfrost_ptr push_transfer =
1526bf215546Sopenharmony_ci                pan_pool_alloc_aligned(&batch->pool.base,
1527bf215546Sopenharmony_ci                                       ss->info.push.count * 4, 16);
1528bf215546Sopenharmony_ci
1529bf215546Sopenharmony_ci        uint32_t *push_cpu = (uint32_t *) push_transfer.cpu;
1530bf215546Sopenharmony_ci        *push_constants = push_transfer.gpu;
1531bf215546Sopenharmony_ci
1532bf215546Sopenharmony_ci        for (unsigned i = 0; i < ss->info.push.count; ++i) {
1533bf215546Sopenharmony_ci                struct panfrost_ubo_word src = ss->info.push.words[i];
1534bf215546Sopenharmony_ci
1535bf215546Sopenharmony_ci                if (src.ubo == sysval_ubo) {
1536bf215546Sopenharmony_ci                        unsigned sysval_idx = src.offset / 16;
1537bf215546Sopenharmony_ci                        unsigned sysval_comp = (src.offset % 16) / 4;
1538bf215546Sopenharmony_ci                        unsigned sysval_type = PAN_SYSVAL_TYPE(ss->info.sysvals.sysvals[sysval_idx]);
1539bf215546Sopenharmony_ci                        mali_ptr ptr = push_transfer.gpu + (4 * i);
1540bf215546Sopenharmony_ci
1541bf215546Sopenharmony_ci                        switch (sysval_type) {
1542bf215546Sopenharmony_ci                        case PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS:
1543bf215546Sopenharmony_ci                                switch (sysval_comp) {
1544bf215546Sopenharmony_ci                                case 0:
1545bf215546Sopenharmony_ci                                        batch->ctx->first_vertex_sysval_ptr = ptr;
1546bf215546Sopenharmony_ci                                        break;
1547bf215546Sopenharmony_ci                                case 1:
1548bf215546Sopenharmony_ci                                        batch->ctx->base_vertex_sysval_ptr = ptr;
1549bf215546Sopenharmony_ci                                        break;
1550bf215546Sopenharmony_ci                                case 2:
1551bf215546Sopenharmony_ci                                        batch->ctx->base_instance_sysval_ptr = ptr;
1552bf215546Sopenharmony_ci                                        break;
1553bf215546Sopenharmony_ci                                case 3:
1554bf215546Sopenharmony_ci                                        /* Spurious (Midgard doesn't pack) */
1555bf215546Sopenharmony_ci                                        break;
1556bf215546Sopenharmony_ci                                default:
1557bf215546Sopenharmony_ci                                        unreachable("Invalid vertex/instance offset component\n");
1558bf215546Sopenharmony_ci                                }
1559bf215546Sopenharmony_ci                                break;
1560bf215546Sopenharmony_ci
1561bf215546Sopenharmony_ci                        case PAN_SYSVAL_NUM_WORK_GROUPS:
1562bf215546Sopenharmony_ci                                batch->num_wg_sysval[sysval_comp] = ptr;
1563bf215546Sopenharmony_ci                                break;
1564bf215546Sopenharmony_ci
1565bf215546Sopenharmony_ci                        default:
1566bf215546Sopenharmony_ci                                break;
1567bf215546Sopenharmony_ci                        }
1568bf215546Sopenharmony_ci                }
1569bf215546Sopenharmony_ci                /* Map the UBO, this should be cheap. However this is reading
1570bf215546Sopenharmony_ci                 * from write-combine memory which is _very_ slow. It might pay
1571bf215546Sopenharmony_ci                 * off to upload sysvals to a staging buffer on the CPU on the
1572bf215546Sopenharmony_ci                 * assumption sysvals will get pushed (TODO) */
1573bf215546Sopenharmony_ci
1574bf215546Sopenharmony_ci                const void *mapped_ubo = (src.ubo == sysval_ubo) ? transfer.cpu :
1575bf215546Sopenharmony_ci                        panfrost_map_constant_buffer_cpu(ctx, buf, src.ubo);
1576bf215546Sopenharmony_ci
1577bf215546Sopenharmony_ci                /* TODO: Is there any benefit to combining ranges */
1578bf215546Sopenharmony_ci                memcpy(push_cpu + i, (uint8_t *) mapped_ubo + src.offset, 4);
1579bf215546Sopenharmony_ci        }
1580bf215546Sopenharmony_ci
1581bf215546Sopenharmony_ci        return ubos.gpu;
1582bf215546Sopenharmony_ci}
1583bf215546Sopenharmony_ci
1584bf215546Sopenharmony_cistatic mali_ptr
1585bf215546Sopenharmony_cipanfrost_emit_shared_memory(struct panfrost_batch *batch,
1586bf215546Sopenharmony_ci                            const struct pipe_grid_info *grid)
1587bf215546Sopenharmony_ci{
1588bf215546Sopenharmony_ci        struct panfrost_context *ctx = batch->ctx;
1589bf215546Sopenharmony_ci        struct panfrost_device *dev = pan_device(ctx->base.screen);
1590bf215546Sopenharmony_ci        struct panfrost_shader_variants *all = ctx->shader[PIPE_SHADER_COMPUTE];
1591bf215546Sopenharmony_ci        struct panfrost_shader_state *ss = &all->variants[all->active_variant];
1592bf215546Sopenharmony_ci        struct panfrost_ptr t =
1593bf215546Sopenharmony_ci                pan_pool_alloc_desc(&batch->pool.base, LOCAL_STORAGE);
1594bf215546Sopenharmony_ci
1595bf215546Sopenharmony_ci        struct pan_tls_info info = {
1596bf215546Sopenharmony_ci                .tls.size = ss->info.tls_size,
1597bf215546Sopenharmony_ci                .wls.size = ss->info.wls_size,
1598bf215546Sopenharmony_ci                .wls.dim.x = grid->grid[0],
1599bf215546Sopenharmony_ci                .wls.dim.y = grid->grid[1],
1600bf215546Sopenharmony_ci                .wls.dim.z = grid->grid[2],
1601bf215546Sopenharmony_ci        };
1602bf215546Sopenharmony_ci
1603bf215546Sopenharmony_ci        if (ss->info.tls_size) {
1604bf215546Sopenharmony_ci                struct panfrost_bo *bo =
1605bf215546Sopenharmony_ci                        panfrost_batch_get_scratchpad(batch,
1606bf215546Sopenharmony_ci                                                      ss->info.tls_size,
1607bf215546Sopenharmony_ci                                                      dev->thread_tls_alloc,
1608bf215546Sopenharmony_ci                                                      dev->core_id_range);
1609bf215546Sopenharmony_ci                info.tls.ptr = bo->ptr.gpu;
1610bf215546Sopenharmony_ci        }
1611bf215546Sopenharmony_ci
1612bf215546Sopenharmony_ci        if (ss->info.wls_size) {
1613bf215546Sopenharmony_ci                unsigned size =
1614bf215546Sopenharmony_ci                        pan_wls_adjust_size(info.wls.size) *
1615bf215546Sopenharmony_ci                        pan_wls_instances(&info.wls.dim) *
1616bf215546Sopenharmony_ci                        dev->core_id_range;
1617bf215546Sopenharmony_ci
1618bf215546Sopenharmony_ci                struct panfrost_bo *bo =
1619bf215546Sopenharmony_ci                        panfrost_batch_get_shared_memory(batch, size, 1);
1620bf215546Sopenharmony_ci
1621bf215546Sopenharmony_ci                info.wls.ptr = bo->ptr.gpu;
1622bf215546Sopenharmony_ci        }
1623bf215546Sopenharmony_ci
1624bf215546Sopenharmony_ci        GENX(pan_emit_tls)(&info, t.cpu);
1625bf215546Sopenharmony_ci        return t.gpu;
1626bf215546Sopenharmony_ci}
1627bf215546Sopenharmony_ci
1628bf215546Sopenharmony_ci#if PAN_ARCH <= 5
1629bf215546Sopenharmony_cistatic mali_ptr
1630bf215546Sopenharmony_cipanfrost_get_tex_desc(struct panfrost_batch *batch,
1631bf215546Sopenharmony_ci                      enum pipe_shader_type st,
1632bf215546Sopenharmony_ci                      struct panfrost_sampler_view *view)
1633bf215546Sopenharmony_ci{
1634bf215546Sopenharmony_ci        if (!view)
1635bf215546Sopenharmony_ci                return (mali_ptr) 0;
1636bf215546Sopenharmony_ci
1637bf215546Sopenharmony_ci        struct pipe_sampler_view *pview = &view->base;
1638bf215546Sopenharmony_ci        struct panfrost_resource *rsrc = pan_resource(pview->texture);
1639bf215546Sopenharmony_ci
1640bf215546Sopenharmony_ci        panfrost_batch_read_rsrc(batch, rsrc, st);
1641bf215546Sopenharmony_ci        panfrost_batch_add_bo(batch, view->state.bo, st);
1642bf215546Sopenharmony_ci
1643bf215546Sopenharmony_ci        return view->state.gpu;
1644bf215546Sopenharmony_ci}
1645bf215546Sopenharmony_ci#endif
1646bf215546Sopenharmony_ci
1647bf215546Sopenharmony_cistatic void
1648bf215546Sopenharmony_cipanfrost_create_sampler_view_bo(struct panfrost_sampler_view *so,
1649bf215546Sopenharmony_ci                                struct pipe_context *pctx,
1650bf215546Sopenharmony_ci                                struct pipe_resource *texture)
1651bf215546Sopenharmony_ci{
1652bf215546Sopenharmony_ci        struct panfrost_device *device = pan_device(pctx->screen);
1653bf215546Sopenharmony_ci        struct panfrost_context *ctx = pan_context(pctx);
1654bf215546Sopenharmony_ci        struct panfrost_resource *prsrc = (struct panfrost_resource *)texture;
1655bf215546Sopenharmony_ci        enum pipe_format format = so->base.format;
1656bf215546Sopenharmony_ci        assert(prsrc->image.data.bo);
1657bf215546Sopenharmony_ci
1658bf215546Sopenharmony_ci        /* Format to access the stencil/depth portion of a Z32_S8 texture */
1659bf215546Sopenharmony_ci        if (format == PIPE_FORMAT_X32_S8X24_UINT) {
1660bf215546Sopenharmony_ci                assert(prsrc->separate_stencil);
1661bf215546Sopenharmony_ci                texture = &prsrc->separate_stencil->base;
1662bf215546Sopenharmony_ci                prsrc = (struct panfrost_resource *)texture;
1663bf215546Sopenharmony_ci                format = texture->format;
1664bf215546Sopenharmony_ci        } else if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
1665bf215546Sopenharmony_ci                format = PIPE_FORMAT_Z32_FLOAT;
1666bf215546Sopenharmony_ci        }
1667bf215546Sopenharmony_ci
1668bf215546Sopenharmony_ci        const struct util_format_description *desc = util_format_description(format);
1669bf215546Sopenharmony_ci
1670bf215546Sopenharmony_ci        bool fake_rgtc = !panfrost_supports_compressed_format(device, MALI_BC4_UNORM);
1671bf215546Sopenharmony_ci
1672bf215546Sopenharmony_ci        if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC && fake_rgtc) {
1673bf215546Sopenharmony_ci                if (desc->is_snorm)
1674bf215546Sopenharmony_ci                        format = PIPE_FORMAT_R8G8B8A8_SNORM;
1675bf215546Sopenharmony_ci                else
1676bf215546Sopenharmony_ci                        format = PIPE_FORMAT_R8G8B8A8_UNORM;
1677bf215546Sopenharmony_ci                desc = util_format_description(format);
1678bf215546Sopenharmony_ci        }
1679bf215546Sopenharmony_ci
1680bf215546Sopenharmony_ci        so->texture_bo = prsrc->image.data.bo->ptr.gpu;
1681bf215546Sopenharmony_ci        so->modifier = prsrc->image.layout.modifier;
1682bf215546Sopenharmony_ci
1683bf215546Sopenharmony_ci        /* MSAA only supported for 2D textures */
1684bf215546Sopenharmony_ci
1685bf215546Sopenharmony_ci        assert(texture->nr_samples <= 1 ||
1686bf215546Sopenharmony_ci               so->base.target == PIPE_TEXTURE_2D ||
1687bf215546Sopenharmony_ci               so->base.target == PIPE_TEXTURE_2D_ARRAY);
1688bf215546Sopenharmony_ci
1689bf215546Sopenharmony_ci        enum mali_texture_dimension type =
1690bf215546Sopenharmony_ci                panfrost_translate_texture_dimension(so->base.target);
1691bf215546Sopenharmony_ci
1692bf215546Sopenharmony_ci        bool is_buffer = (so->base.target == PIPE_BUFFER);
1693bf215546Sopenharmony_ci
1694bf215546Sopenharmony_ci        unsigned first_level = is_buffer ? 0 : so->base.u.tex.first_level;
1695bf215546Sopenharmony_ci        unsigned last_level = is_buffer ? 0 : so->base.u.tex.last_level;
1696bf215546Sopenharmony_ci        unsigned first_layer = is_buffer ? 0 : so->base.u.tex.first_layer;
1697bf215546Sopenharmony_ci        unsigned last_layer = is_buffer ? 0 : so->base.u.tex.last_layer;
1698bf215546Sopenharmony_ci        unsigned buf_offset = is_buffer ? so->base.u.buf.offset : 0;
1699bf215546Sopenharmony_ci        unsigned buf_size = (is_buffer ? so->base.u.buf.size : 0) /
1700bf215546Sopenharmony_ci                            util_format_get_blocksize(format);
1701bf215546Sopenharmony_ci
1702bf215546Sopenharmony_ci        if (so->base.target == PIPE_TEXTURE_3D) {
1703bf215546Sopenharmony_ci                first_layer /= prsrc->image.layout.depth;
1704bf215546Sopenharmony_ci                last_layer /= prsrc->image.layout.depth;
1705bf215546Sopenharmony_ci                assert(!first_layer && !last_layer);
1706bf215546Sopenharmony_ci        }
1707bf215546Sopenharmony_ci
1708bf215546Sopenharmony_ci        struct pan_image_view iview = {
1709bf215546Sopenharmony_ci                .format = format,
1710bf215546Sopenharmony_ci                .dim = type,
1711bf215546Sopenharmony_ci                .first_level = first_level,
1712bf215546Sopenharmony_ci                .last_level = last_level,
1713bf215546Sopenharmony_ci                .first_layer = first_layer,
1714bf215546Sopenharmony_ci                .last_layer = last_layer,
1715bf215546Sopenharmony_ci                .swizzle = {
1716bf215546Sopenharmony_ci                        so->base.swizzle_r,
1717bf215546Sopenharmony_ci                        so->base.swizzle_g,
1718bf215546Sopenharmony_ci                        so->base.swizzle_b,
1719bf215546Sopenharmony_ci                        so->base.swizzle_a,
1720bf215546Sopenharmony_ci                },
1721bf215546Sopenharmony_ci                .image = &prsrc->image,
1722bf215546Sopenharmony_ci
1723bf215546Sopenharmony_ci                .buf.offset = buf_offset,
1724bf215546Sopenharmony_ci                .buf.size = buf_size,
1725bf215546Sopenharmony_ci        };
1726bf215546Sopenharmony_ci
1727bf215546Sopenharmony_ci        unsigned size =
1728bf215546Sopenharmony_ci                (PAN_ARCH <= 5 ? pan_size(TEXTURE) : 0) +
1729bf215546Sopenharmony_ci                GENX(panfrost_estimate_texture_payload_size)(&iview);
1730bf215546Sopenharmony_ci
1731bf215546Sopenharmony_ci        struct panfrost_pool *pool = so->pool ?: &ctx->descs;
1732bf215546Sopenharmony_ci        struct panfrost_ptr payload = pan_pool_alloc_aligned(&pool->base, size, 64);
1733bf215546Sopenharmony_ci        so->state = panfrost_pool_take_ref(&ctx->descs, payload.gpu);
1734bf215546Sopenharmony_ci
1735bf215546Sopenharmony_ci        void *tex = (PAN_ARCH >= 6) ? &so->bifrost_descriptor : payload.cpu;
1736bf215546Sopenharmony_ci
1737bf215546Sopenharmony_ci        if (PAN_ARCH <= 5) {
1738bf215546Sopenharmony_ci                payload.cpu += pan_size(TEXTURE);
1739bf215546Sopenharmony_ci                payload.gpu += pan_size(TEXTURE);
1740bf215546Sopenharmony_ci        }
1741bf215546Sopenharmony_ci
1742bf215546Sopenharmony_ci        GENX(panfrost_new_texture)(device, &iview, tex, &payload);
1743bf215546Sopenharmony_ci}
1744bf215546Sopenharmony_ci
1745bf215546Sopenharmony_cistatic void
1746bf215546Sopenharmony_cipanfrost_update_sampler_view(struct panfrost_sampler_view *view,
1747bf215546Sopenharmony_ci                             struct pipe_context *pctx)
1748bf215546Sopenharmony_ci{
1749bf215546Sopenharmony_ci        struct panfrost_resource *rsrc = pan_resource(view->base.texture);
1750bf215546Sopenharmony_ci        if (view->texture_bo != rsrc->image.data.bo->ptr.gpu ||
1751bf215546Sopenharmony_ci            view->modifier != rsrc->image.layout.modifier) {
1752bf215546Sopenharmony_ci                panfrost_bo_unreference(view->state.bo);
1753bf215546Sopenharmony_ci                panfrost_create_sampler_view_bo(view, pctx, &rsrc->base);
1754bf215546Sopenharmony_ci        }
1755bf215546Sopenharmony_ci}
1756bf215546Sopenharmony_ci
1757bf215546Sopenharmony_cistatic mali_ptr
1758bf215546Sopenharmony_cipanfrost_emit_texture_descriptors(struct panfrost_batch *batch,
1759bf215546Sopenharmony_ci                                  enum pipe_shader_type stage)
1760bf215546Sopenharmony_ci{
1761bf215546Sopenharmony_ci        struct panfrost_context *ctx = batch->ctx;
1762bf215546Sopenharmony_ci
1763bf215546Sopenharmony_ci        if (!ctx->sampler_view_count[stage])
1764bf215546Sopenharmony_ci                return 0;
1765bf215546Sopenharmony_ci
1766bf215546Sopenharmony_ci#if PAN_ARCH >= 6
1767bf215546Sopenharmony_ci        struct panfrost_ptr T =
1768bf215546Sopenharmony_ci                pan_pool_alloc_desc_array(&batch->pool.base,
1769bf215546Sopenharmony_ci                                          ctx->sampler_view_count[stage],
1770bf215546Sopenharmony_ci                                          TEXTURE);
1771bf215546Sopenharmony_ci        struct mali_texture_packed *out =
1772bf215546Sopenharmony_ci                (struct mali_texture_packed *) T.cpu;
1773bf215546Sopenharmony_ci
1774bf215546Sopenharmony_ci        for (int i = 0; i < ctx->sampler_view_count[stage]; ++i) {
1775bf215546Sopenharmony_ci                struct panfrost_sampler_view *view = ctx->sampler_views[stage][i];
1776bf215546Sopenharmony_ci
1777bf215546Sopenharmony_ci                if (!view) {
1778bf215546Sopenharmony_ci                        memset(&out[i], 0, sizeof(out[i]));
1779bf215546Sopenharmony_ci                        continue;
1780bf215546Sopenharmony_ci                }
1781bf215546Sopenharmony_ci
1782bf215546Sopenharmony_ci                struct pipe_sampler_view *pview = &view->base;
1783bf215546Sopenharmony_ci                struct panfrost_resource *rsrc = pan_resource(pview->texture);
1784bf215546Sopenharmony_ci
1785bf215546Sopenharmony_ci                panfrost_update_sampler_view(view, &ctx->base);
1786bf215546Sopenharmony_ci                out[i] = view->bifrost_descriptor;
1787bf215546Sopenharmony_ci
1788bf215546Sopenharmony_ci                panfrost_batch_read_rsrc(batch, rsrc, stage);
1789bf215546Sopenharmony_ci                panfrost_batch_add_bo(batch, view->state.bo, stage);
1790bf215546Sopenharmony_ci        }
1791bf215546Sopenharmony_ci
1792bf215546Sopenharmony_ci        return T.gpu;
1793bf215546Sopenharmony_ci#else
1794bf215546Sopenharmony_ci        uint64_t trampolines[PIPE_MAX_SHADER_SAMPLER_VIEWS];
1795bf215546Sopenharmony_ci
1796bf215546Sopenharmony_ci        for (int i = 0; i < ctx->sampler_view_count[stage]; ++i) {
1797bf215546Sopenharmony_ci                struct panfrost_sampler_view *view = ctx->sampler_views[stage][i];
1798bf215546Sopenharmony_ci
1799bf215546Sopenharmony_ci                if (!view) {
1800bf215546Sopenharmony_ci                        trampolines[i] = 0;
1801bf215546Sopenharmony_ci                        continue;
1802bf215546Sopenharmony_ci                }
1803bf215546Sopenharmony_ci
1804bf215546Sopenharmony_ci                panfrost_update_sampler_view(view, &ctx->base);
1805bf215546Sopenharmony_ci
1806bf215546Sopenharmony_ci                trampolines[i] = panfrost_get_tex_desc(batch, stage, view);
1807bf215546Sopenharmony_ci        }
1808bf215546Sopenharmony_ci
1809bf215546Sopenharmony_ci        return pan_pool_upload_aligned(&batch->pool.base, trampolines,
1810bf215546Sopenharmony_ci                                       sizeof(uint64_t) *
1811bf215546Sopenharmony_ci                                       ctx->sampler_view_count[stage],
1812bf215546Sopenharmony_ci                                       sizeof(uint64_t));
1813bf215546Sopenharmony_ci#endif
1814bf215546Sopenharmony_ci}
1815bf215546Sopenharmony_ci
1816bf215546Sopenharmony_cistatic mali_ptr
1817bf215546Sopenharmony_cipanfrost_emit_sampler_descriptors(struct panfrost_batch *batch,
1818bf215546Sopenharmony_ci                                  enum pipe_shader_type stage)
1819bf215546Sopenharmony_ci{
1820bf215546Sopenharmony_ci        struct panfrost_context *ctx = batch->ctx;
1821bf215546Sopenharmony_ci
1822bf215546Sopenharmony_ci        if (!ctx->sampler_count[stage])
1823bf215546Sopenharmony_ci                return 0;
1824bf215546Sopenharmony_ci
1825bf215546Sopenharmony_ci        struct panfrost_ptr T =
1826bf215546Sopenharmony_ci                pan_pool_alloc_desc_array(&batch->pool.base,
1827bf215546Sopenharmony_ci                                          ctx->sampler_count[stage],
1828bf215546Sopenharmony_ci                                          SAMPLER);
1829bf215546Sopenharmony_ci        struct mali_sampler_packed *out = (struct mali_sampler_packed *) T.cpu;
1830bf215546Sopenharmony_ci
1831bf215546Sopenharmony_ci        for (unsigned i = 0; i < ctx->sampler_count[stage]; ++i) {
1832bf215546Sopenharmony_ci                struct panfrost_sampler_state *st = ctx->samplers[stage][i];
1833bf215546Sopenharmony_ci
1834bf215546Sopenharmony_ci                out[i] = st ? st->hw : (struct mali_sampler_packed){0};
1835bf215546Sopenharmony_ci        }
1836bf215546Sopenharmony_ci
1837bf215546Sopenharmony_ci        return T.gpu;
1838bf215546Sopenharmony_ci}
1839bf215546Sopenharmony_ci
1840bf215546Sopenharmony_ci#if PAN_ARCH <= 7
1841bf215546Sopenharmony_ci/* Packs all image attribute descs and attribute buffer descs.
1842bf215546Sopenharmony_ci * `first_image_buf_index` must be the index of the first image attribute buffer descriptor.
1843bf215546Sopenharmony_ci */
1844bf215546Sopenharmony_cistatic void
1845bf215546Sopenharmony_ciemit_image_attribs(struct panfrost_context *ctx, enum pipe_shader_type shader,
1846bf215546Sopenharmony_ci                   struct mali_attribute_packed *attribs, unsigned first_buf)
1847bf215546Sopenharmony_ci{
1848bf215546Sopenharmony_ci        struct panfrost_device *dev = pan_device(ctx->base.screen);
1849bf215546Sopenharmony_ci        unsigned last_bit = util_last_bit(ctx->image_mask[shader]);
1850bf215546Sopenharmony_ci
1851bf215546Sopenharmony_ci        for (unsigned i = 0; i < last_bit; ++i) {
1852bf215546Sopenharmony_ci                enum pipe_format format = ctx->images[shader][i].format;
1853bf215546Sopenharmony_ci
1854bf215546Sopenharmony_ci                pan_pack(attribs + i, ATTRIBUTE, cfg) {
1855bf215546Sopenharmony_ci                        /* Continuation record means 2 buffers per image */
1856bf215546Sopenharmony_ci                        cfg.buffer_index = first_buf + (i * 2);
1857bf215546Sopenharmony_ci                        cfg.offset_enable = (PAN_ARCH <= 5);
1858bf215546Sopenharmony_ci                        cfg.format = dev->formats[format].hw;
1859bf215546Sopenharmony_ci                }
1860bf215546Sopenharmony_ci        }
1861bf215546Sopenharmony_ci}
1862bf215546Sopenharmony_ci
1863bf215546Sopenharmony_cistatic enum mali_attribute_type
1864bf215546Sopenharmony_cipan_modifier_to_attr_type(uint64_t modifier)
1865bf215546Sopenharmony_ci{
1866bf215546Sopenharmony_ci        switch (modifier) {
1867bf215546Sopenharmony_ci        case DRM_FORMAT_MOD_LINEAR:
1868bf215546Sopenharmony_ci                return MALI_ATTRIBUTE_TYPE_3D_LINEAR;
1869bf215546Sopenharmony_ci        case DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED:
1870bf215546Sopenharmony_ci                return MALI_ATTRIBUTE_TYPE_3D_INTERLEAVED;
1871bf215546Sopenharmony_ci        default:
1872bf215546Sopenharmony_ci                unreachable("Invalid modifier for attribute record");
1873bf215546Sopenharmony_ci        }
1874bf215546Sopenharmony_ci}
1875bf215546Sopenharmony_ci
1876bf215546Sopenharmony_cistatic void
1877bf215546Sopenharmony_ciemit_image_bufs(struct panfrost_batch *batch, enum pipe_shader_type shader,
1878bf215546Sopenharmony_ci                struct mali_attribute_buffer_packed *bufs,
1879bf215546Sopenharmony_ci                unsigned first_image_buf_index)
1880bf215546Sopenharmony_ci{
1881bf215546Sopenharmony_ci        struct panfrost_context *ctx = batch->ctx;
1882bf215546Sopenharmony_ci        unsigned last_bit = util_last_bit(ctx->image_mask[shader]);
1883bf215546Sopenharmony_ci
1884bf215546Sopenharmony_ci        for (unsigned i = 0; i < last_bit; ++i) {
1885bf215546Sopenharmony_ci                struct pipe_image_view *image = &ctx->images[shader][i];
1886bf215546Sopenharmony_ci
1887bf215546Sopenharmony_ci                if (!(ctx->image_mask[shader] & (1 << i)) ||
1888bf215546Sopenharmony_ci                    !(image->shader_access & PIPE_IMAGE_ACCESS_READ_WRITE)) {
1889bf215546Sopenharmony_ci                        /* Unused image bindings */
1890bf215546Sopenharmony_ci                        pan_pack(bufs + (i * 2), ATTRIBUTE_BUFFER, cfg);
1891bf215546Sopenharmony_ci                        pan_pack(bufs + (i * 2) + 1, ATTRIBUTE_BUFFER, cfg);
1892bf215546Sopenharmony_ci                        continue;
1893bf215546Sopenharmony_ci                }
1894bf215546Sopenharmony_ci
1895bf215546Sopenharmony_ci                struct panfrost_resource *rsrc = pan_resource(image->resource);
1896bf215546Sopenharmony_ci
1897bf215546Sopenharmony_ci                /* TODO: MSAA */
1898bf215546Sopenharmony_ci                assert(image->resource->nr_samples <= 1 && "MSAA'd images not supported");
1899bf215546Sopenharmony_ci
1900bf215546Sopenharmony_ci                bool is_3d = rsrc->base.target == PIPE_TEXTURE_3D;
1901bf215546Sopenharmony_ci                bool is_buffer = rsrc->base.target == PIPE_BUFFER;
1902bf215546Sopenharmony_ci
1903bf215546Sopenharmony_ci                unsigned offset = is_buffer ? image->u.buf.offset :
1904bf215546Sopenharmony_ci                        panfrost_texture_offset(&rsrc->image.layout,
1905bf215546Sopenharmony_ci                                                image->u.tex.level,
1906bf215546Sopenharmony_ci                                                is_3d ? 0 : image->u.tex.first_layer,
1907bf215546Sopenharmony_ci                                                is_3d ? image->u.tex.first_layer : 0);
1908bf215546Sopenharmony_ci
1909bf215546Sopenharmony_ci                panfrost_track_image_access(batch, shader, image);
1910bf215546Sopenharmony_ci
1911bf215546Sopenharmony_ci                pan_pack(bufs + (i * 2), ATTRIBUTE_BUFFER, cfg) {
1912bf215546Sopenharmony_ci                        cfg.type = pan_modifier_to_attr_type(rsrc->image.layout.modifier);
1913bf215546Sopenharmony_ci                        cfg.pointer = rsrc->image.data.bo->ptr.gpu + offset;
1914bf215546Sopenharmony_ci                        cfg.stride = util_format_get_blocksize(image->format);
1915bf215546Sopenharmony_ci                        cfg.size = rsrc->image.data.bo->size - offset;
1916bf215546Sopenharmony_ci                }
1917bf215546Sopenharmony_ci
1918bf215546Sopenharmony_ci                if (is_buffer) {
1919bf215546Sopenharmony_ci                        pan_pack(bufs + (i * 2) + 1, ATTRIBUTE_BUFFER_CONTINUATION_3D, cfg) {
1920bf215546Sopenharmony_ci                                cfg.s_dimension = rsrc->base.width0 /
1921bf215546Sopenharmony_ci                                        util_format_get_blocksize(image->format);
1922bf215546Sopenharmony_ci                                cfg.t_dimension = cfg.r_dimension = 1;
1923bf215546Sopenharmony_ci                        }
1924bf215546Sopenharmony_ci
1925bf215546Sopenharmony_ci                        continue;
1926bf215546Sopenharmony_ci                }
1927bf215546Sopenharmony_ci
1928bf215546Sopenharmony_ci                pan_pack(bufs + (i * 2) + 1, ATTRIBUTE_BUFFER_CONTINUATION_3D, cfg) {
1929bf215546Sopenharmony_ci                        unsigned level = image->u.tex.level;
1930bf215546Sopenharmony_ci
1931bf215546Sopenharmony_ci                        cfg.s_dimension = u_minify(rsrc->base.width0, level);
1932bf215546Sopenharmony_ci                        cfg.t_dimension = u_minify(rsrc->base.height0, level);
1933bf215546Sopenharmony_ci                        cfg.r_dimension = is_3d ?
1934bf215546Sopenharmony_ci                                u_minify(rsrc->base.depth0, level) :
1935bf215546Sopenharmony_ci                                image->u.tex.last_layer - image->u.tex.first_layer + 1;
1936bf215546Sopenharmony_ci
1937bf215546Sopenharmony_ci                        cfg.row_stride =
1938bf215546Sopenharmony_ci                                rsrc->image.layout.slices[level].row_stride;
1939bf215546Sopenharmony_ci
1940bf215546Sopenharmony_ci                        if (rsrc->base.target != PIPE_TEXTURE_2D) {
1941bf215546Sopenharmony_ci                                cfg.slice_stride =
1942bf215546Sopenharmony_ci                                        panfrost_get_layer_stride(&rsrc->image.layout,
1943bf215546Sopenharmony_ci                                                                  level);
1944bf215546Sopenharmony_ci                        }
1945bf215546Sopenharmony_ci                }
1946bf215546Sopenharmony_ci        }
1947bf215546Sopenharmony_ci}
1948bf215546Sopenharmony_ci
1949bf215546Sopenharmony_cistatic mali_ptr
1950bf215546Sopenharmony_cipanfrost_emit_image_attribs(struct panfrost_batch *batch,
1951bf215546Sopenharmony_ci                            mali_ptr *buffers,
1952bf215546Sopenharmony_ci                            enum pipe_shader_type type)
1953bf215546Sopenharmony_ci{
1954bf215546Sopenharmony_ci        struct panfrost_context *ctx = batch->ctx;
1955bf215546Sopenharmony_ci        struct panfrost_shader_state *shader = panfrost_get_shader_state(ctx, type);
1956bf215546Sopenharmony_ci
1957bf215546Sopenharmony_ci        if (!shader->info.attribute_count) {
1958bf215546Sopenharmony_ci                *buffers = 0;
1959bf215546Sopenharmony_ci                return 0;
1960bf215546Sopenharmony_ci        }
1961bf215546Sopenharmony_ci
1962bf215546Sopenharmony_ci        /* Images always need a MALI_ATTRIBUTE_BUFFER_CONTINUATION_3D */
1963bf215546Sopenharmony_ci        unsigned attr_count = shader->info.attribute_count;
1964bf215546Sopenharmony_ci        unsigned buf_count = (attr_count * 2) + (PAN_ARCH >= 6 ? 1 : 0);
1965bf215546Sopenharmony_ci
1966bf215546Sopenharmony_ci        struct panfrost_ptr bufs =
1967bf215546Sopenharmony_ci                pan_pool_alloc_desc_array(&batch->pool.base, buf_count, ATTRIBUTE_BUFFER);
1968bf215546Sopenharmony_ci
1969bf215546Sopenharmony_ci        struct panfrost_ptr attribs =
1970bf215546Sopenharmony_ci                pan_pool_alloc_desc_array(&batch->pool.base, attr_count, ATTRIBUTE);
1971bf215546Sopenharmony_ci
1972bf215546Sopenharmony_ci        emit_image_attribs(ctx, type, attribs.cpu, 0);
1973bf215546Sopenharmony_ci        emit_image_bufs(batch, type, bufs.cpu, 0);
1974bf215546Sopenharmony_ci
1975bf215546Sopenharmony_ci        /* We need an empty attrib buf to stop the prefetching on Bifrost */
1976bf215546Sopenharmony_ci#if PAN_ARCH >= 6
1977bf215546Sopenharmony_ci        pan_pack(bufs.cpu + ((buf_count - 1) * pan_size(ATTRIBUTE_BUFFER)),
1978bf215546Sopenharmony_ci                 ATTRIBUTE_BUFFER, cfg);
1979bf215546Sopenharmony_ci#endif
1980bf215546Sopenharmony_ci
1981bf215546Sopenharmony_ci        *buffers = bufs.gpu;
1982bf215546Sopenharmony_ci        return attribs.gpu;
1983bf215546Sopenharmony_ci}
1984bf215546Sopenharmony_ci
1985bf215546Sopenharmony_cistatic mali_ptr
1986bf215546Sopenharmony_cipanfrost_emit_vertex_data(struct panfrost_batch *batch,
1987bf215546Sopenharmony_ci                          mali_ptr *buffers)
1988bf215546Sopenharmony_ci{
1989bf215546Sopenharmony_ci        struct panfrost_context *ctx = batch->ctx;
1990bf215546Sopenharmony_ci        struct panfrost_vertex_state *so = ctx->vertex;
1991bf215546Sopenharmony_ci        struct panfrost_shader_state *vs = panfrost_get_shader_state(ctx, PIPE_SHADER_VERTEX);
1992bf215546Sopenharmony_ci        bool instanced = ctx->indirect_draw || ctx->instance_count > 1;
1993bf215546Sopenharmony_ci        uint32_t image_mask = ctx->image_mask[PIPE_SHADER_VERTEX];
1994bf215546Sopenharmony_ci        unsigned nr_images = util_last_bit(image_mask);
1995bf215546Sopenharmony_ci
1996bf215546Sopenharmony_ci        /* Worst case: everything is NPOT, which is only possible if instancing
1997bf215546Sopenharmony_ci         * is enabled. Otherwise single record is gauranteed.
1998bf215546Sopenharmony_ci         * Also, we allocate more memory than what's needed here if either instancing
1999bf215546Sopenharmony_ci         * is enabled or images are present, this can be improved. */
2000bf215546Sopenharmony_ci        unsigned bufs_per_attrib = (instanced || nr_images > 0) ? 2 : 1;
2001bf215546Sopenharmony_ci        unsigned nr_bufs = ((so->nr_bufs + nr_images) * bufs_per_attrib) +
2002bf215546Sopenharmony_ci                           (PAN_ARCH >= 6 ? 1 : 0);
2003bf215546Sopenharmony_ci
2004bf215546Sopenharmony_ci        unsigned count = vs->info.attribute_count;
2005bf215546Sopenharmony_ci
2006bf215546Sopenharmony_ci        if (vs->xfb)
2007bf215546Sopenharmony_ci                count = MAX2(count, vs->xfb->info.attribute_count);
2008bf215546Sopenharmony_ci
2009bf215546Sopenharmony_ci#if PAN_ARCH <= 5
2010bf215546Sopenharmony_ci        /* Midgard needs vertexid/instanceid handled specially */
2011bf215546Sopenharmony_ci        bool special_vbufs = count >= PAN_VERTEX_ID;
2012bf215546Sopenharmony_ci
2013bf215546Sopenharmony_ci        if (special_vbufs)
2014bf215546Sopenharmony_ci                nr_bufs += 2;
2015bf215546Sopenharmony_ci#endif
2016bf215546Sopenharmony_ci
2017bf215546Sopenharmony_ci        if (!nr_bufs) {
2018bf215546Sopenharmony_ci                *buffers = 0;
2019bf215546Sopenharmony_ci                return 0;
2020bf215546Sopenharmony_ci        }
2021bf215546Sopenharmony_ci
2022bf215546Sopenharmony_ci        struct panfrost_ptr S =
2023bf215546Sopenharmony_ci                pan_pool_alloc_desc_array(&batch->pool.base, nr_bufs,
2024bf215546Sopenharmony_ci                                          ATTRIBUTE_BUFFER);
2025bf215546Sopenharmony_ci        struct panfrost_ptr T =
2026bf215546Sopenharmony_ci                pan_pool_alloc_desc_array(&batch->pool.base, count,
2027bf215546Sopenharmony_ci                                          ATTRIBUTE);
2028bf215546Sopenharmony_ci
2029bf215546Sopenharmony_ci        struct mali_attribute_buffer_packed *bufs =
2030bf215546Sopenharmony_ci                (struct mali_attribute_buffer_packed *) S.cpu;
2031bf215546Sopenharmony_ci
2032bf215546Sopenharmony_ci        struct mali_attribute_packed *out =
2033bf215546Sopenharmony_ci                (struct mali_attribute_packed *) T.cpu;
2034bf215546Sopenharmony_ci
2035bf215546Sopenharmony_ci        unsigned attrib_to_buffer[PIPE_MAX_ATTRIBS] = { 0 };
2036bf215546Sopenharmony_ci        unsigned k = 0;
2037bf215546Sopenharmony_ci
2038bf215546Sopenharmony_ci        for (unsigned i = 0; i < so->nr_bufs; ++i) {
2039bf215546Sopenharmony_ci                unsigned vbi = so->buffers[i].vbi;
2040bf215546Sopenharmony_ci                unsigned divisor = so->buffers[i].divisor;
2041bf215546Sopenharmony_ci                attrib_to_buffer[i] = k;
2042bf215546Sopenharmony_ci
2043bf215546Sopenharmony_ci                if (!(ctx->vb_mask & (1 << vbi)))
2044bf215546Sopenharmony_ci                        continue;
2045bf215546Sopenharmony_ci
2046bf215546Sopenharmony_ci                struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[vbi];
2047bf215546Sopenharmony_ci                struct panfrost_resource *rsrc;
2048bf215546Sopenharmony_ci
2049bf215546Sopenharmony_ci                rsrc = pan_resource(buf->buffer.resource);
2050bf215546Sopenharmony_ci                if (!rsrc)
2051bf215546Sopenharmony_ci                        continue;
2052bf215546Sopenharmony_ci
2053bf215546Sopenharmony_ci                panfrost_batch_read_rsrc(batch, rsrc, PIPE_SHADER_VERTEX);
2054bf215546Sopenharmony_ci
2055bf215546Sopenharmony_ci                /* Mask off lower bits, see offset fixup below */
2056bf215546Sopenharmony_ci                mali_ptr raw_addr = rsrc->image.data.bo->ptr.gpu + buf->buffer_offset;
2057bf215546Sopenharmony_ci                mali_ptr addr = raw_addr & ~63;
2058bf215546Sopenharmony_ci
2059bf215546Sopenharmony_ci                /* Since we advanced the base pointer, we shrink the buffer
2060bf215546Sopenharmony_ci                 * size, but add the offset we subtracted */
2061bf215546Sopenharmony_ci                unsigned size = rsrc->base.width0 + (raw_addr - addr)
2062bf215546Sopenharmony_ci                        - buf->buffer_offset;
2063bf215546Sopenharmony_ci
2064bf215546Sopenharmony_ci                /* When there is a divisor, the hardware-level divisor is
2065bf215546Sopenharmony_ci                 * the product of the instance divisor and the padded count */
2066bf215546Sopenharmony_ci                unsigned stride = buf->stride;
2067bf215546Sopenharmony_ci
2068bf215546Sopenharmony_ci                if (ctx->indirect_draw) {
2069bf215546Sopenharmony_ci                        /* We allocated 2 records for each attribute buffer */
2070bf215546Sopenharmony_ci                        assert((k & 1) == 0);
2071bf215546Sopenharmony_ci
2072bf215546Sopenharmony_ci                        /* With indirect draws we can't guess the vertex_count.
2073bf215546Sopenharmony_ci                         * Pre-set the address, stride and size fields, the
2074bf215546Sopenharmony_ci                         * compute shader do the rest.
2075bf215546Sopenharmony_ci                         */
2076bf215546Sopenharmony_ci                        pan_pack(bufs + k, ATTRIBUTE_BUFFER, cfg) {
2077bf215546Sopenharmony_ci                                cfg.type = MALI_ATTRIBUTE_TYPE_1D;
2078bf215546Sopenharmony_ci                                cfg.pointer = addr;
2079bf215546Sopenharmony_ci                                cfg.stride = stride;
2080bf215546Sopenharmony_ci                                cfg.size = size;
2081bf215546Sopenharmony_ci                        }
2082bf215546Sopenharmony_ci
2083bf215546Sopenharmony_ci                        /* We store the unmodified divisor in the continuation
2084bf215546Sopenharmony_ci                         * slot so the compute shader can retrieve it.
2085bf215546Sopenharmony_ci                         */
2086bf215546Sopenharmony_ci                        pan_pack(bufs + k + 1, ATTRIBUTE_BUFFER_CONTINUATION_NPOT, cfg) {
2087bf215546Sopenharmony_ci                                cfg.divisor = divisor;
2088bf215546Sopenharmony_ci                        }
2089bf215546Sopenharmony_ci
2090bf215546Sopenharmony_ci                        k += 2;
2091bf215546Sopenharmony_ci                        continue;
2092bf215546Sopenharmony_ci                }
2093bf215546Sopenharmony_ci
2094bf215546Sopenharmony_ci                unsigned hw_divisor = ctx->padded_count * divisor;
2095bf215546Sopenharmony_ci
2096bf215546Sopenharmony_ci                if (ctx->instance_count <= 1) {
2097bf215546Sopenharmony_ci                        /* Per-instance would be every attribute equal */
2098bf215546Sopenharmony_ci                        if (divisor)
2099bf215546Sopenharmony_ci                                stride = 0;
2100bf215546Sopenharmony_ci
2101bf215546Sopenharmony_ci                        pan_pack(bufs + k, ATTRIBUTE_BUFFER, cfg) {
2102bf215546Sopenharmony_ci                                cfg.pointer = addr;
2103bf215546Sopenharmony_ci                                cfg.stride = stride;
2104bf215546Sopenharmony_ci                                cfg.size = size;
2105bf215546Sopenharmony_ci                        }
2106bf215546Sopenharmony_ci                } else if (!divisor) {
2107bf215546Sopenharmony_ci                        pan_pack(bufs + k, ATTRIBUTE_BUFFER, cfg) {
2108bf215546Sopenharmony_ci                                cfg.type = MALI_ATTRIBUTE_TYPE_1D_MODULUS;
2109bf215546Sopenharmony_ci                                cfg.pointer = addr;
2110bf215546Sopenharmony_ci                                cfg.stride = stride;
2111bf215546Sopenharmony_ci                                cfg.size = size;
2112bf215546Sopenharmony_ci                                cfg.divisor = ctx->padded_count;
2113bf215546Sopenharmony_ci                        }
2114bf215546Sopenharmony_ci                } else if (util_is_power_of_two_or_zero(hw_divisor)) {
2115bf215546Sopenharmony_ci                        pan_pack(bufs + k, ATTRIBUTE_BUFFER, cfg) {
2116bf215546Sopenharmony_ci                                cfg.type = MALI_ATTRIBUTE_TYPE_1D_POT_DIVISOR;
2117bf215546Sopenharmony_ci                                cfg.pointer = addr;
2118bf215546Sopenharmony_ci                                cfg.stride = stride;
2119bf215546Sopenharmony_ci                                cfg.size = size;
2120bf215546Sopenharmony_ci                                cfg.divisor_r = __builtin_ctz(hw_divisor);
2121bf215546Sopenharmony_ci                        }
2122bf215546Sopenharmony_ci
2123bf215546Sopenharmony_ci                } else {
2124bf215546Sopenharmony_ci                        unsigned shift = 0, extra_flags = 0;
2125bf215546Sopenharmony_ci
2126bf215546Sopenharmony_ci                        unsigned magic_divisor =
2127bf215546Sopenharmony_ci                                panfrost_compute_magic_divisor(hw_divisor, &shift, &extra_flags);
2128bf215546Sopenharmony_ci
2129bf215546Sopenharmony_ci                        /* Records with continuations must be aligned */
2130bf215546Sopenharmony_ci                        k = ALIGN_POT(k, 2);
2131bf215546Sopenharmony_ci                        attrib_to_buffer[i] = k;
2132bf215546Sopenharmony_ci
2133bf215546Sopenharmony_ci                        pan_pack(bufs + k, ATTRIBUTE_BUFFER, cfg) {
2134bf215546Sopenharmony_ci                                cfg.type = MALI_ATTRIBUTE_TYPE_1D_NPOT_DIVISOR;
2135bf215546Sopenharmony_ci                                cfg.pointer = addr;
2136bf215546Sopenharmony_ci                                cfg.stride = stride;
2137bf215546Sopenharmony_ci                                cfg.size = size;
2138bf215546Sopenharmony_ci
2139bf215546Sopenharmony_ci                                cfg.divisor_r = shift;
2140bf215546Sopenharmony_ci                                cfg.divisor_e = extra_flags;
2141bf215546Sopenharmony_ci                        }
2142bf215546Sopenharmony_ci
2143bf215546Sopenharmony_ci                        pan_pack(bufs + k + 1, ATTRIBUTE_BUFFER_CONTINUATION_NPOT, cfg) {
2144bf215546Sopenharmony_ci                                cfg.divisor_numerator = magic_divisor;
2145bf215546Sopenharmony_ci                                cfg.divisor = divisor;
2146bf215546Sopenharmony_ci                        }
2147bf215546Sopenharmony_ci
2148bf215546Sopenharmony_ci                        ++k;
2149bf215546Sopenharmony_ci                }
2150bf215546Sopenharmony_ci
2151bf215546Sopenharmony_ci                ++k;
2152bf215546Sopenharmony_ci        }
2153bf215546Sopenharmony_ci
2154bf215546Sopenharmony_ci#if PAN_ARCH <= 5
2155bf215546Sopenharmony_ci        /* Add special gl_VertexID/gl_InstanceID buffers */
2156bf215546Sopenharmony_ci        if (special_vbufs) {
2157bf215546Sopenharmony_ci                panfrost_vertex_id(ctx->padded_count, &bufs[k], ctx->instance_count > 1);
2158bf215546Sopenharmony_ci
2159bf215546Sopenharmony_ci                pan_pack(out + PAN_VERTEX_ID, ATTRIBUTE, cfg) {
2160bf215546Sopenharmony_ci                        cfg.buffer_index = k++;
2161bf215546Sopenharmony_ci                        cfg.format = so->formats[PAN_VERTEX_ID];
2162bf215546Sopenharmony_ci                }
2163bf215546Sopenharmony_ci
2164bf215546Sopenharmony_ci                panfrost_instance_id(ctx->padded_count, &bufs[k], ctx->instance_count > 1);
2165bf215546Sopenharmony_ci
2166bf215546Sopenharmony_ci                pan_pack(out + PAN_INSTANCE_ID, ATTRIBUTE, cfg) {
2167bf215546Sopenharmony_ci                        cfg.buffer_index = k++;
2168bf215546Sopenharmony_ci                        cfg.format = so->formats[PAN_INSTANCE_ID];
2169bf215546Sopenharmony_ci                }
2170bf215546Sopenharmony_ci        }
2171bf215546Sopenharmony_ci#endif
2172bf215546Sopenharmony_ci
2173bf215546Sopenharmony_ci        if (nr_images) {
2174bf215546Sopenharmony_ci                k = ALIGN_POT(k, 2);
2175bf215546Sopenharmony_ci                emit_image_attribs(ctx, PIPE_SHADER_VERTEX, out + so->num_elements, k);
2176bf215546Sopenharmony_ci                emit_image_bufs(batch, PIPE_SHADER_VERTEX, bufs + k, k);
2177bf215546Sopenharmony_ci                k += (util_last_bit(ctx->image_mask[PIPE_SHADER_VERTEX]) * 2);
2178bf215546Sopenharmony_ci        }
2179bf215546Sopenharmony_ci
2180bf215546Sopenharmony_ci#if PAN_ARCH >= 6
2181bf215546Sopenharmony_ci        /* We need an empty attrib buf to stop the prefetching on Bifrost */
2182bf215546Sopenharmony_ci        pan_pack(&bufs[k], ATTRIBUTE_BUFFER, cfg);
2183bf215546Sopenharmony_ci#endif
2184bf215546Sopenharmony_ci
2185bf215546Sopenharmony_ci        /* Attribute addresses require 64-byte alignment, so let:
2186bf215546Sopenharmony_ci         *
2187bf215546Sopenharmony_ci         *      base' = base & ~63 = base - (base & 63)
2188bf215546Sopenharmony_ci         *      offset' = offset + (base & 63)
2189bf215546Sopenharmony_ci         *
2190bf215546Sopenharmony_ci         * Since base' + offset' = base + offset, these are equivalent
2191bf215546Sopenharmony_ci         * addressing modes and now base is 64 aligned.
2192bf215546Sopenharmony_ci         */
2193bf215546Sopenharmony_ci
2194bf215546Sopenharmony_ci        /* While these are usually equal, they are not required to be. In some
2195bf215546Sopenharmony_ci         * cases, u_blitter passes too high a value for num_elements.
2196bf215546Sopenharmony_ci         */
2197bf215546Sopenharmony_ci        assert(vs->info.attributes_read_count <= so->num_elements);
2198bf215546Sopenharmony_ci
2199bf215546Sopenharmony_ci        for (unsigned i = 0; i < vs->info.attributes_read_count; ++i) {
2200bf215546Sopenharmony_ci                unsigned vbi = so->pipe[i].vertex_buffer_index;
2201bf215546Sopenharmony_ci                struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[vbi];
2202bf215546Sopenharmony_ci
2203bf215546Sopenharmony_ci                /* BOs are aligned; just fixup for buffer_offset */
2204bf215546Sopenharmony_ci                signed src_offset = so->pipe[i].src_offset;
2205bf215546Sopenharmony_ci                src_offset += (buf->buffer_offset & 63);
2206bf215546Sopenharmony_ci
2207bf215546Sopenharmony_ci                /* Base instance offset */
2208bf215546Sopenharmony_ci                if (ctx->base_instance && so->pipe[i].instance_divisor) {
2209bf215546Sopenharmony_ci                        src_offset += (ctx->base_instance * buf->stride) /
2210bf215546Sopenharmony_ci                                      so->pipe[i].instance_divisor;
2211bf215546Sopenharmony_ci                }
2212bf215546Sopenharmony_ci
2213bf215546Sopenharmony_ci                /* Also, somewhat obscurely per-instance data needs to be
2214bf215546Sopenharmony_ci                 * offset in response to a delayed start in an indexed draw */
2215bf215546Sopenharmony_ci
2216bf215546Sopenharmony_ci                if (so->pipe[i].instance_divisor && ctx->instance_count > 1)
2217bf215546Sopenharmony_ci                        src_offset -= buf->stride * ctx->offset_start;
2218bf215546Sopenharmony_ci
2219bf215546Sopenharmony_ci                pan_pack(out + i, ATTRIBUTE, cfg) {
2220bf215546Sopenharmony_ci                        cfg.buffer_index = attrib_to_buffer[so->element_buffer[i]];
2221bf215546Sopenharmony_ci                        cfg.format = so->formats[i];
2222bf215546Sopenharmony_ci                        cfg.offset = src_offset;
2223bf215546Sopenharmony_ci                }
2224bf215546Sopenharmony_ci        }
2225bf215546Sopenharmony_ci
2226bf215546Sopenharmony_ci        *buffers = S.gpu;
2227bf215546Sopenharmony_ci        return T.gpu;
2228bf215546Sopenharmony_ci}
2229bf215546Sopenharmony_ci
2230bf215546Sopenharmony_cistatic mali_ptr
2231bf215546Sopenharmony_cipanfrost_emit_varyings(struct panfrost_batch *batch,
2232bf215546Sopenharmony_ci                struct mali_attribute_buffer_packed *slot,
2233bf215546Sopenharmony_ci                unsigned stride, unsigned count)
2234bf215546Sopenharmony_ci{
2235bf215546Sopenharmony_ci        unsigned size = stride * count;
2236bf215546Sopenharmony_ci        mali_ptr ptr =
2237bf215546Sopenharmony_ci                batch->ctx->indirect_draw ? 0 :
2238bf215546Sopenharmony_ci                pan_pool_alloc_aligned(&batch->invisible_pool.base, size, 64).gpu;
2239bf215546Sopenharmony_ci
2240bf215546Sopenharmony_ci        pan_pack(slot, ATTRIBUTE_BUFFER, cfg) {
2241bf215546Sopenharmony_ci                cfg.stride = stride;
2242bf215546Sopenharmony_ci                cfg.size = size;
2243bf215546Sopenharmony_ci                cfg.pointer = ptr;
2244bf215546Sopenharmony_ci        }
2245bf215546Sopenharmony_ci
2246bf215546Sopenharmony_ci        return ptr;
2247bf215546Sopenharmony_ci}
2248bf215546Sopenharmony_ci
2249bf215546Sopenharmony_ci/* Given a varying, figure out which index it corresponds to */
2250bf215546Sopenharmony_ci
2251bf215546Sopenharmony_cistatic inline unsigned
2252bf215546Sopenharmony_cipan_varying_index(unsigned present, enum pan_special_varying v)
2253bf215546Sopenharmony_ci{
2254bf215546Sopenharmony_ci        return util_bitcount(present & BITFIELD_MASK(v));
2255bf215546Sopenharmony_ci}
2256bf215546Sopenharmony_ci
2257bf215546Sopenharmony_ci/* Determines which varying buffers are required */
2258bf215546Sopenharmony_ci
2259bf215546Sopenharmony_cistatic inline unsigned
2260bf215546Sopenharmony_cipan_varying_present(const struct panfrost_device *dev,
2261bf215546Sopenharmony_ci                    struct pan_shader_info *producer,
2262bf215546Sopenharmony_ci                    struct pan_shader_info *consumer,
2263bf215546Sopenharmony_ci                    uint16_t point_coord_mask)
2264bf215546Sopenharmony_ci{
2265bf215546Sopenharmony_ci        /* At the moment we always emit general and position buffers. Not
2266bf215546Sopenharmony_ci         * strictly necessary but usually harmless */
2267bf215546Sopenharmony_ci
2268bf215546Sopenharmony_ci        unsigned present = BITFIELD_BIT(PAN_VARY_GENERAL) | BITFIELD_BIT(PAN_VARY_POSITION);
2269bf215546Sopenharmony_ci
2270bf215546Sopenharmony_ci        /* Enable special buffers by the shader info */
2271bf215546Sopenharmony_ci
2272bf215546Sopenharmony_ci        if (producer->vs.writes_point_size)
2273bf215546Sopenharmony_ci                present |= BITFIELD_BIT(PAN_VARY_PSIZ);
2274bf215546Sopenharmony_ci
2275bf215546Sopenharmony_ci#if PAN_ARCH <= 5
2276bf215546Sopenharmony_ci        /* On Midgard, these exist as real varyings. Later architectures use
2277bf215546Sopenharmony_ci         * LD_VAR_SPECIAL reads instead. */
2278bf215546Sopenharmony_ci
2279bf215546Sopenharmony_ci        if (consumer->fs.reads_point_coord)
2280bf215546Sopenharmony_ci                present |= BITFIELD_BIT(PAN_VARY_PNTCOORD);
2281bf215546Sopenharmony_ci
2282bf215546Sopenharmony_ci        if (consumer->fs.reads_face)
2283bf215546Sopenharmony_ci                present |= BITFIELD_BIT(PAN_VARY_FACE);
2284bf215546Sopenharmony_ci
2285bf215546Sopenharmony_ci        if (consumer->fs.reads_frag_coord)
2286bf215546Sopenharmony_ci                present |= BITFIELD_BIT(PAN_VARY_FRAGCOORD);
2287bf215546Sopenharmony_ci
2288bf215546Sopenharmony_ci        /* Also, if we have a point sprite, we need a point coord buffer */
2289bf215546Sopenharmony_ci
2290bf215546Sopenharmony_ci        for (unsigned i = 0; i < consumer->varyings.input_count; i++)  {
2291bf215546Sopenharmony_ci                gl_varying_slot loc = consumer->varyings.input[i].location;
2292bf215546Sopenharmony_ci
2293bf215546Sopenharmony_ci                if (util_varying_is_point_coord(loc, point_coord_mask))
2294bf215546Sopenharmony_ci                        present |= BITFIELD_BIT(PAN_VARY_PNTCOORD);
2295bf215546Sopenharmony_ci        }
2296bf215546Sopenharmony_ci#endif
2297bf215546Sopenharmony_ci
2298bf215546Sopenharmony_ci        return present;
2299bf215546Sopenharmony_ci}
2300bf215546Sopenharmony_ci
2301bf215546Sopenharmony_ci/* Emitters for varying records */
2302bf215546Sopenharmony_ci
2303bf215546Sopenharmony_cistatic void
2304bf215546Sopenharmony_cipan_emit_vary(const struct panfrost_device *dev,
2305bf215546Sopenharmony_ci              struct mali_attribute_packed *out,
2306bf215546Sopenharmony_ci              unsigned buffer_index,
2307bf215546Sopenharmony_ci              mali_pixel_format format, unsigned offset)
2308bf215546Sopenharmony_ci{
2309bf215546Sopenharmony_ci        pan_pack(out, ATTRIBUTE, cfg) {
2310bf215546Sopenharmony_ci                cfg.buffer_index = buffer_index;
2311bf215546Sopenharmony_ci                cfg.offset_enable = (PAN_ARCH <= 5);
2312bf215546Sopenharmony_ci                cfg.format = format;
2313bf215546Sopenharmony_ci                cfg.offset = offset;
2314bf215546Sopenharmony_ci        }
2315bf215546Sopenharmony_ci}
2316bf215546Sopenharmony_ci
2317bf215546Sopenharmony_ci/* Special records */
2318bf215546Sopenharmony_ci
2319bf215546Sopenharmony_cistatic const struct {
2320bf215546Sopenharmony_ci       unsigned components;
2321bf215546Sopenharmony_ci       enum mali_format format;
2322bf215546Sopenharmony_ci} pan_varying_formats[PAN_VARY_MAX] = {
2323bf215546Sopenharmony_ci        [PAN_VARY_POSITION]     = { 4, MALI_SNAP_4 },
2324bf215546Sopenharmony_ci        [PAN_VARY_PSIZ]         = { 1, MALI_R16F },
2325bf215546Sopenharmony_ci        [PAN_VARY_PNTCOORD]     = { 1, MALI_R16F },
2326bf215546Sopenharmony_ci        [PAN_VARY_FACE]         = { 1, MALI_R32I },
2327bf215546Sopenharmony_ci        [PAN_VARY_FRAGCOORD]    = { 4, MALI_RGBA32F },
2328bf215546Sopenharmony_ci};
2329bf215546Sopenharmony_ci
2330bf215546Sopenharmony_cistatic mali_pixel_format
2331bf215546Sopenharmony_cipan_special_format(const struct panfrost_device *dev,
2332bf215546Sopenharmony_ci                enum pan_special_varying buf)
2333bf215546Sopenharmony_ci{
2334bf215546Sopenharmony_ci        assert(buf < PAN_VARY_MAX);
2335bf215546Sopenharmony_ci        mali_pixel_format format = (pan_varying_formats[buf].format << 12);
2336bf215546Sopenharmony_ci
2337bf215546Sopenharmony_ci#if PAN_ARCH <= 6
2338bf215546Sopenharmony_ci        unsigned nr = pan_varying_formats[buf].components;
2339bf215546Sopenharmony_ci        format |= panfrost_get_default_swizzle(nr);
2340bf215546Sopenharmony_ci#endif
2341bf215546Sopenharmony_ci
2342bf215546Sopenharmony_ci        return format;
2343bf215546Sopenharmony_ci}
2344bf215546Sopenharmony_ci
2345bf215546Sopenharmony_cistatic void
2346bf215546Sopenharmony_cipan_emit_vary_special(const struct panfrost_device *dev,
2347bf215546Sopenharmony_ci                      struct mali_attribute_packed *out,
2348bf215546Sopenharmony_ci                      unsigned present, enum pan_special_varying buf)
2349bf215546Sopenharmony_ci{
2350bf215546Sopenharmony_ci        pan_emit_vary(dev, out, pan_varying_index(present, buf),
2351bf215546Sopenharmony_ci                        pan_special_format(dev, buf), 0);
2352bf215546Sopenharmony_ci}
2353bf215546Sopenharmony_ci
2354bf215546Sopenharmony_ci/* Negative indicates a varying is not found */
2355bf215546Sopenharmony_ci
2356bf215546Sopenharmony_cistatic signed
2357bf215546Sopenharmony_cipan_find_vary(const struct pan_shader_varying *vary,
2358bf215546Sopenharmony_ci                unsigned vary_count, unsigned loc)
2359bf215546Sopenharmony_ci{
2360bf215546Sopenharmony_ci        for (unsigned i = 0; i < vary_count; ++i) {
2361bf215546Sopenharmony_ci                if (vary[i].location == loc)
2362bf215546Sopenharmony_ci                        return i;
2363bf215546Sopenharmony_ci        }
2364bf215546Sopenharmony_ci
2365bf215546Sopenharmony_ci        return -1;
2366bf215546Sopenharmony_ci}
2367bf215546Sopenharmony_ci
2368bf215546Sopenharmony_ci/* Assign varying locations for the general buffer. Returns the calculated
2369bf215546Sopenharmony_ci * per-vertex stride, and outputs offsets into the passed array. Negative
2370bf215546Sopenharmony_ci * offset indicates a varying is not used. */
2371bf215546Sopenharmony_ci
2372bf215546Sopenharmony_cistatic unsigned
2373bf215546Sopenharmony_cipan_assign_varyings(const struct panfrost_device *dev,
2374bf215546Sopenharmony_ci                    struct pan_shader_info *producer,
2375bf215546Sopenharmony_ci                    struct pan_shader_info *consumer,
2376bf215546Sopenharmony_ci                    signed *offsets)
2377bf215546Sopenharmony_ci{
2378bf215546Sopenharmony_ci        unsigned producer_count = producer->varyings.output_count;
2379bf215546Sopenharmony_ci        unsigned consumer_count = consumer->varyings.input_count;
2380bf215546Sopenharmony_ci
2381bf215546Sopenharmony_ci        const struct pan_shader_varying *producer_vars = producer->varyings.output;
2382bf215546Sopenharmony_ci        const struct pan_shader_varying *consumer_vars = consumer->varyings.input;
2383bf215546Sopenharmony_ci
2384bf215546Sopenharmony_ci        unsigned stride = 0;
2385bf215546Sopenharmony_ci
2386bf215546Sopenharmony_ci        for (unsigned i = 0; i < producer_count; ++i) {
2387bf215546Sopenharmony_ci                signed loc = pan_find_vary(consumer_vars, consumer_count,
2388bf215546Sopenharmony_ci                                producer_vars[i].location);
2389bf215546Sopenharmony_ci
2390bf215546Sopenharmony_ci                if (loc >= 0) {
2391bf215546Sopenharmony_ci                        offsets[i] = stride;
2392bf215546Sopenharmony_ci
2393bf215546Sopenharmony_ci                        enum pipe_format format = consumer_vars[loc].format;
2394bf215546Sopenharmony_ci                        stride += util_format_get_blocksize(format);
2395bf215546Sopenharmony_ci                } else {
2396bf215546Sopenharmony_ci                        offsets[i] = -1;
2397bf215546Sopenharmony_ci                }
2398bf215546Sopenharmony_ci        }
2399bf215546Sopenharmony_ci
2400bf215546Sopenharmony_ci        return stride;
2401bf215546Sopenharmony_ci}
2402bf215546Sopenharmony_ci
2403bf215546Sopenharmony_ci/* Emitter for a single varying (attribute) descriptor */
2404bf215546Sopenharmony_ci
2405bf215546Sopenharmony_cistatic void
2406bf215546Sopenharmony_cipanfrost_emit_varying(const struct panfrost_device *dev,
2407bf215546Sopenharmony_ci                      struct mali_attribute_packed *out,
2408bf215546Sopenharmony_ci                      const struct pan_shader_varying varying,
2409bf215546Sopenharmony_ci                      enum pipe_format pipe_format,
2410bf215546Sopenharmony_ci                      unsigned present,
2411bf215546Sopenharmony_ci                      uint16_t point_sprite_mask,
2412bf215546Sopenharmony_ci                      signed offset,
2413bf215546Sopenharmony_ci                      enum pan_special_varying pos_varying)
2414bf215546Sopenharmony_ci{
2415bf215546Sopenharmony_ci        /* Note: varying.format != pipe_format in some obscure cases due to a
2416bf215546Sopenharmony_ci         * limitation of the NIR linker. This should be fixed in the future to
2417bf215546Sopenharmony_ci         * eliminate the additional lookups. See:
2418bf215546Sopenharmony_ci         * dEQP-GLES3.functional.shaders.conditionals.if.sequence_statements_vertex
2419bf215546Sopenharmony_ci         */
2420bf215546Sopenharmony_ci        gl_varying_slot loc = varying.location;
2421bf215546Sopenharmony_ci        mali_pixel_format format = dev->formats[pipe_format].hw;
2422bf215546Sopenharmony_ci
2423bf215546Sopenharmony_ci        if (util_varying_is_point_coord(loc, point_sprite_mask)) {
2424bf215546Sopenharmony_ci                pan_emit_vary_special(dev, out, present, PAN_VARY_PNTCOORD);
2425bf215546Sopenharmony_ci        } else if (loc == VARYING_SLOT_POS) {
2426bf215546Sopenharmony_ci                pan_emit_vary_special(dev, out, present, pos_varying);
2427bf215546Sopenharmony_ci        } else if (loc == VARYING_SLOT_PSIZ) {
2428bf215546Sopenharmony_ci                pan_emit_vary_special(dev, out, present, PAN_VARY_PSIZ);
2429bf215546Sopenharmony_ci        } else if (loc == VARYING_SLOT_FACE) {
2430bf215546Sopenharmony_ci                pan_emit_vary_special(dev, out, present, PAN_VARY_FACE);
2431bf215546Sopenharmony_ci        } else if (offset < 0) {
2432bf215546Sopenharmony_ci                pan_emit_vary(dev, out, 0, (MALI_CONSTANT << 12), 0);
2433bf215546Sopenharmony_ci        } else {
2434bf215546Sopenharmony_ci                STATIC_ASSERT(PAN_VARY_GENERAL == 0);
2435bf215546Sopenharmony_ci                pan_emit_vary(dev, out, 0, format, offset);
2436bf215546Sopenharmony_ci        }
2437bf215546Sopenharmony_ci}
2438bf215546Sopenharmony_ci
2439bf215546Sopenharmony_ci/* Links varyings and uploads ATTRIBUTE descriptors. Can execute at link time,
2440bf215546Sopenharmony_ci * rather than draw time (under good conditions). */
2441bf215546Sopenharmony_ci
2442bf215546Sopenharmony_cistatic void
2443bf215546Sopenharmony_cipanfrost_emit_varying_descs(
2444bf215546Sopenharmony_ci                struct panfrost_pool *pool,
2445bf215546Sopenharmony_ci                struct panfrost_shader_state *producer,
2446bf215546Sopenharmony_ci                struct panfrost_shader_state *consumer,
2447bf215546Sopenharmony_ci                uint16_t point_coord_mask,
2448bf215546Sopenharmony_ci                struct pan_linkage *out)
2449bf215546Sopenharmony_ci{
2450bf215546Sopenharmony_ci        struct panfrost_device *dev = pool->base.dev;
2451bf215546Sopenharmony_ci        unsigned producer_count = producer->info.varyings.output_count;
2452bf215546Sopenharmony_ci        unsigned consumer_count = consumer->info.varyings.input_count;
2453bf215546Sopenharmony_ci
2454bf215546Sopenharmony_ci        /* Offsets within the general varying buffer, indexed by location */
2455bf215546Sopenharmony_ci        signed offsets[PAN_MAX_VARYINGS];
2456bf215546Sopenharmony_ci        assert(producer_count <= ARRAY_SIZE(offsets));
2457bf215546Sopenharmony_ci        assert(consumer_count <= ARRAY_SIZE(offsets));
2458bf215546Sopenharmony_ci
2459bf215546Sopenharmony_ci        /* Allocate enough descriptors for both shader stages */
2460bf215546Sopenharmony_ci        struct panfrost_ptr T =
2461bf215546Sopenharmony_ci                pan_pool_alloc_desc_array(&pool->base,
2462bf215546Sopenharmony_ci                                          producer_count + consumer_count,
2463bf215546Sopenharmony_ci                                          ATTRIBUTE);
2464bf215546Sopenharmony_ci
2465bf215546Sopenharmony_ci        /* Take a reference if we're being put on the CSO */
2466bf215546Sopenharmony_ci        if (!pool->owned) {
2467bf215546Sopenharmony_ci                out->bo = pool->transient_bo;
2468bf215546Sopenharmony_ci                panfrost_bo_reference(out->bo);
2469bf215546Sopenharmony_ci        }
2470bf215546Sopenharmony_ci
2471bf215546Sopenharmony_ci        struct mali_attribute_packed *descs = T.cpu;
2472bf215546Sopenharmony_ci        out->producer = producer_count ? T.gpu : 0;
2473bf215546Sopenharmony_ci        out->consumer = consumer_count ? T.gpu +
2474bf215546Sopenharmony_ci                (pan_size(ATTRIBUTE) * producer_count) : 0;
2475bf215546Sopenharmony_ci
2476bf215546Sopenharmony_ci        /* Lay out the varyings. Must use producer to lay out, in order to
2477bf215546Sopenharmony_ci         * respect transform feedback precisions. */
2478bf215546Sopenharmony_ci        out->present = pan_varying_present(dev, &producer->info,
2479bf215546Sopenharmony_ci                        &consumer->info, point_coord_mask);
2480bf215546Sopenharmony_ci
2481bf215546Sopenharmony_ci        out->stride = pan_assign_varyings(dev, &producer->info,
2482bf215546Sopenharmony_ci                        &consumer->info, offsets);
2483bf215546Sopenharmony_ci
2484bf215546Sopenharmony_ci        for (unsigned i = 0; i < producer_count; ++i) {
2485bf215546Sopenharmony_ci                signed j = pan_find_vary(consumer->info.varyings.input,
2486bf215546Sopenharmony_ci                                consumer->info.varyings.input_count,
2487bf215546Sopenharmony_ci                                producer->info.varyings.output[i].location);
2488bf215546Sopenharmony_ci
2489bf215546Sopenharmony_ci                enum pipe_format format = (j >= 0) ?
2490bf215546Sopenharmony_ci                        consumer->info.varyings.input[j].format :
2491bf215546Sopenharmony_ci                        producer->info.varyings.output[i].format;
2492bf215546Sopenharmony_ci
2493bf215546Sopenharmony_ci                panfrost_emit_varying(dev, descs + i,
2494bf215546Sopenharmony_ci                                producer->info.varyings.output[i], format,
2495bf215546Sopenharmony_ci                                out->present, 0, offsets[i], PAN_VARY_POSITION);
2496bf215546Sopenharmony_ci        }
2497bf215546Sopenharmony_ci
2498bf215546Sopenharmony_ci        for (unsigned i = 0; i < consumer_count; ++i) {
2499bf215546Sopenharmony_ci                signed j = pan_find_vary(producer->info.varyings.output,
2500bf215546Sopenharmony_ci                                producer->info.varyings.output_count,
2501bf215546Sopenharmony_ci                                consumer->info.varyings.input[i].location);
2502bf215546Sopenharmony_ci
2503bf215546Sopenharmony_ci                signed offset = (j >= 0) ? offsets[j] : -1;
2504bf215546Sopenharmony_ci
2505bf215546Sopenharmony_ci                panfrost_emit_varying(dev, descs + producer_count + i,
2506bf215546Sopenharmony_ci                                consumer->info.varyings.input[i],
2507bf215546Sopenharmony_ci                                consumer->info.varyings.input[i].format,
2508bf215546Sopenharmony_ci                                out->present, point_coord_mask,
2509bf215546Sopenharmony_ci                                offset, PAN_VARY_FRAGCOORD);
2510bf215546Sopenharmony_ci        }
2511bf215546Sopenharmony_ci}
2512bf215546Sopenharmony_ci
2513bf215546Sopenharmony_ci#if PAN_ARCH <= 5
2514bf215546Sopenharmony_cistatic void
2515bf215546Sopenharmony_cipan_emit_special_input(struct mali_attribute_buffer_packed *out,
2516bf215546Sopenharmony_ci                unsigned present,
2517bf215546Sopenharmony_ci                enum pan_special_varying v,
2518bf215546Sopenharmony_ci                unsigned special)
2519bf215546Sopenharmony_ci{
2520bf215546Sopenharmony_ci        if (present & BITFIELD_BIT(v)) {
2521bf215546Sopenharmony_ci                unsigned idx = pan_varying_index(present, v);
2522bf215546Sopenharmony_ci
2523bf215546Sopenharmony_ci                pan_pack(out + idx, ATTRIBUTE_BUFFER, cfg) {
2524bf215546Sopenharmony_ci                        cfg.special = special;
2525bf215546Sopenharmony_ci                        cfg.type = 0;
2526bf215546Sopenharmony_ci                }
2527bf215546Sopenharmony_ci        }
2528bf215546Sopenharmony_ci}
2529bf215546Sopenharmony_ci#endif
2530bf215546Sopenharmony_ci
2531bf215546Sopenharmony_cistatic void
2532bf215546Sopenharmony_cipanfrost_emit_varying_descriptor(struct panfrost_batch *batch,
2533bf215546Sopenharmony_ci                                 unsigned vertex_count,
2534bf215546Sopenharmony_ci                                 mali_ptr *vs_attribs,
2535bf215546Sopenharmony_ci                                 mali_ptr *fs_attribs,
2536bf215546Sopenharmony_ci                                 mali_ptr *buffers,
2537bf215546Sopenharmony_ci                                 unsigned *buffer_count,
2538bf215546Sopenharmony_ci                                 mali_ptr *position,
2539bf215546Sopenharmony_ci                                 mali_ptr *psiz,
2540bf215546Sopenharmony_ci                                 bool point_coord_replace)
2541bf215546Sopenharmony_ci{
2542bf215546Sopenharmony_ci        /* Load the shaders */
2543bf215546Sopenharmony_ci        struct panfrost_context *ctx = batch->ctx;
2544bf215546Sopenharmony_ci        struct panfrost_shader_state *vs, *fs;
2545bf215546Sopenharmony_ci
2546bf215546Sopenharmony_ci        vs = panfrost_get_shader_state(ctx, PIPE_SHADER_VERTEX);
2547bf215546Sopenharmony_ci        fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
2548bf215546Sopenharmony_ci
2549bf215546Sopenharmony_ci        uint16_t point_coord_mask = 0;
2550bf215546Sopenharmony_ci
2551bf215546Sopenharmony_ci#if PAN_ARCH <= 5
2552bf215546Sopenharmony_ci        /* Point sprites are lowered on Bifrost and newer */
2553bf215546Sopenharmony_ci        if (point_coord_replace)
2554bf215546Sopenharmony_ci                point_coord_mask = ctx->rasterizer->base.sprite_coord_enable;
2555bf215546Sopenharmony_ci#endif
2556bf215546Sopenharmony_ci
2557bf215546Sopenharmony_ci        /* In good conditions, we only need to link varyings once */
2558bf215546Sopenharmony_ci        bool prelink =
2559bf215546Sopenharmony_ci                (point_coord_mask == 0) &&
2560bf215546Sopenharmony_ci                !vs->info.separable &&
2561bf215546Sopenharmony_ci                !fs->info.separable;
2562bf215546Sopenharmony_ci
2563bf215546Sopenharmony_ci        /* Try to reduce copies */
2564bf215546Sopenharmony_ci        struct pan_linkage _linkage;
2565bf215546Sopenharmony_ci        struct pan_linkage *linkage = prelink ? &vs->linkage : &_linkage;
2566bf215546Sopenharmony_ci
2567bf215546Sopenharmony_ci        /* Emit ATTRIBUTE descriptors if needed */
2568bf215546Sopenharmony_ci        if (!prelink || vs->linkage.bo == NULL) {
2569bf215546Sopenharmony_ci                struct panfrost_pool *pool =
2570bf215546Sopenharmony_ci                        prelink ? &ctx->descs : &batch->pool;
2571bf215546Sopenharmony_ci
2572bf215546Sopenharmony_ci                panfrost_emit_varying_descs(pool, vs, fs, point_coord_mask, linkage);
2573bf215546Sopenharmony_ci        }
2574bf215546Sopenharmony_ci
2575bf215546Sopenharmony_ci        unsigned present = linkage->present, stride = linkage->stride;
2576bf215546Sopenharmony_ci        unsigned count = util_bitcount(present);
2577bf215546Sopenharmony_ci        struct panfrost_ptr T =
2578bf215546Sopenharmony_ci                pan_pool_alloc_desc_array(&batch->pool.base,
2579bf215546Sopenharmony_ci                                          count + 1,
2580bf215546Sopenharmony_ci                                          ATTRIBUTE_BUFFER);
2581bf215546Sopenharmony_ci        struct mali_attribute_buffer_packed *varyings =
2582bf215546Sopenharmony_ci                (struct mali_attribute_buffer_packed *) T.cpu;
2583bf215546Sopenharmony_ci
2584bf215546Sopenharmony_ci        if (buffer_count)
2585bf215546Sopenharmony_ci                *buffer_count = count;
2586bf215546Sopenharmony_ci
2587bf215546Sopenharmony_ci#if PAN_ARCH >= 6
2588bf215546Sopenharmony_ci        /* Suppress prefetch on Bifrost */
2589bf215546Sopenharmony_ci        memset(varyings + count, 0, sizeof(*varyings));
2590bf215546Sopenharmony_ci#endif
2591bf215546Sopenharmony_ci
2592bf215546Sopenharmony_ci        if (stride) {
2593bf215546Sopenharmony_ci                panfrost_emit_varyings(batch,
2594bf215546Sopenharmony_ci                                &varyings[pan_varying_index(present, PAN_VARY_GENERAL)],
2595bf215546Sopenharmony_ci                                stride, vertex_count);
2596bf215546Sopenharmony_ci        } else {
2597bf215546Sopenharmony_ci                /* The indirect draw code reads the stride field, make sure
2598bf215546Sopenharmony_ci                 * that it is initialised */
2599bf215546Sopenharmony_ci                memset(varyings + pan_varying_index(present, PAN_VARY_GENERAL), 0,
2600bf215546Sopenharmony_ci                       sizeof(*varyings));
2601bf215546Sopenharmony_ci        }
2602bf215546Sopenharmony_ci
2603bf215546Sopenharmony_ci        /* fp32 vec4 gl_Position */
2604bf215546Sopenharmony_ci        *position = panfrost_emit_varyings(batch,
2605bf215546Sopenharmony_ci                        &varyings[pan_varying_index(present, PAN_VARY_POSITION)],
2606bf215546Sopenharmony_ci                        sizeof(float) * 4, vertex_count);
2607bf215546Sopenharmony_ci
2608bf215546Sopenharmony_ci        if (present & BITFIELD_BIT(PAN_VARY_PSIZ)) {
2609bf215546Sopenharmony_ci                *psiz = panfrost_emit_varyings(batch,
2610bf215546Sopenharmony_ci                                &varyings[pan_varying_index(present, PAN_VARY_PSIZ)],
2611bf215546Sopenharmony_ci                                2, vertex_count);
2612bf215546Sopenharmony_ci        }
2613bf215546Sopenharmony_ci
2614bf215546Sopenharmony_ci#if PAN_ARCH <= 5
2615bf215546Sopenharmony_ci        pan_emit_special_input(varyings, present,
2616bf215546Sopenharmony_ci                        PAN_VARY_PNTCOORD, MALI_ATTRIBUTE_SPECIAL_POINT_COORD);
2617bf215546Sopenharmony_ci        pan_emit_special_input(varyings, present, PAN_VARY_FACE,
2618bf215546Sopenharmony_ci                        MALI_ATTRIBUTE_SPECIAL_FRONT_FACING);
2619bf215546Sopenharmony_ci        pan_emit_special_input(varyings, present, PAN_VARY_FRAGCOORD,
2620bf215546Sopenharmony_ci                        MALI_ATTRIBUTE_SPECIAL_FRAG_COORD);
2621bf215546Sopenharmony_ci#endif
2622bf215546Sopenharmony_ci
2623bf215546Sopenharmony_ci        *buffers = T.gpu;
2624bf215546Sopenharmony_ci        *vs_attribs = linkage->producer;
2625bf215546Sopenharmony_ci        *fs_attribs = linkage->consumer;
2626bf215546Sopenharmony_ci}
2627bf215546Sopenharmony_ci
2628bf215546Sopenharmony_ci/*
2629bf215546Sopenharmony_ci * Emit jobs required for the rasterization pipeline. If there are side effects
2630bf215546Sopenharmony_ci * from the vertex shader, these are handled ahead-of-time with a compute
2631bf215546Sopenharmony_ci * shader. This function should not be called if rasterization is skipped.
2632bf215546Sopenharmony_ci */
2633bf215546Sopenharmony_cistatic void
2634bf215546Sopenharmony_cipanfrost_emit_vertex_tiler_jobs(struct panfrost_batch *batch,
2635bf215546Sopenharmony_ci                                const struct panfrost_ptr *vertex_job,
2636bf215546Sopenharmony_ci                                const struct panfrost_ptr *tiler_job)
2637bf215546Sopenharmony_ci{
2638bf215546Sopenharmony_ci        struct panfrost_context *ctx = batch->ctx;
2639bf215546Sopenharmony_ci
2640bf215546Sopenharmony_ci        /* XXX - set job_barrier in case buffers get ping-ponged and we need to
2641bf215546Sopenharmony_ci         * enforce ordering, this has a perf hit! See
2642bf215546Sopenharmony_ci         * KHR-GLES31.core.vertex_attrib_binding.advanced-iterations
2643bf215546Sopenharmony_ci         */
2644bf215546Sopenharmony_ci        unsigned vertex = panfrost_add_job(&batch->pool.base, &batch->scoreboard,
2645bf215546Sopenharmony_ci                                           MALI_JOB_TYPE_VERTEX, true, false,
2646bf215546Sopenharmony_ci                                           ctx->indirect_draw ?
2647bf215546Sopenharmony_ci                                           batch->indirect_draw_job_id : 0,
2648bf215546Sopenharmony_ci                                           0, vertex_job, false);
2649bf215546Sopenharmony_ci
2650bf215546Sopenharmony_ci        panfrost_add_job(&batch->pool.base, &batch->scoreboard,
2651bf215546Sopenharmony_ci                         MALI_JOB_TYPE_TILER, false, false,
2652bf215546Sopenharmony_ci                         vertex, 0, tiler_job, false);
2653bf215546Sopenharmony_ci}
2654bf215546Sopenharmony_ci#endif
2655bf215546Sopenharmony_ci
2656bf215546Sopenharmony_cistatic void
2657bf215546Sopenharmony_ciemit_tls(struct panfrost_batch *batch)
2658bf215546Sopenharmony_ci{
2659bf215546Sopenharmony_ci        struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
2660bf215546Sopenharmony_ci
2661bf215546Sopenharmony_ci        /* Emitted with the FB descriptor on Midgard. */
2662bf215546Sopenharmony_ci        if (PAN_ARCH <= 5 && batch->framebuffer.gpu)
2663bf215546Sopenharmony_ci                return;
2664bf215546Sopenharmony_ci
2665bf215546Sopenharmony_ci        struct panfrost_bo *tls_bo =
2666bf215546Sopenharmony_ci                batch->stack_size ?
2667bf215546Sopenharmony_ci                panfrost_batch_get_scratchpad(batch,
2668bf215546Sopenharmony_ci                                              batch->stack_size,
2669bf215546Sopenharmony_ci                                              dev->thread_tls_alloc,
2670bf215546Sopenharmony_ci                                              dev->core_id_range):
2671bf215546Sopenharmony_ci                NULL;
2672bf215546Sopenharmony_ci        struct pan_tls_info tls = {
2673bf215546Sopenharmony_ci                .tls = {
2674bf215546Sopenharmony_ci                        .ptr = tls_bo ? tls_bo->ptr.gpu : 0,
2675bf215546Sopenharmony_ci                        .size = batch->stack_size,
2676bf215546Sopenharmony_ci                },
2677bf215546Sopenharmony_ci        };
2678bf215546Sopenharmony_ci
2679bf215546Sopenharmony_ci        assert(batch->tls.cpu);
2680bf215546Sopenharmony_ci        GENX(pan_emit_tls)(&tls, batch->tls.cpu);
2681bf215546Sopenharmony_ci}
2682bf215546Sopenharmony_ci
2683bf215546Sopenharmony_cistatic void
2684bf215546Sopenharmony_ciemit_fbd(struct panfrost_batch *batch, const struct pan_fb_info *fb)
2685bf215546Sopenharmony_ci{
2686bf215546Sopenharmony_ci        struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
2687bf215546Sopenharmony_ci        struct panfrost_bo *tls_bo =
2688bf215546Sopenharmony_ci                batch->stack_size ?
2689bf215546Sopenharmony_ci                panfrost_batch_get_scratchpad(batch,
2690bf215546Sopenharmony_ci                                              batch->stack_size,
2691bf215546Sopenharmony_ci                                              dev->thread_tls_alloc,
2692bf215546Sopenharmony_ci                                              dev->core_id_range):
2693bf215546Sopenharmony_ci                NULL;
2694bf215546Sopenharmony_ci        struct pan_tls_info tls = {
2695bf215546Sopenharmony_ci                .tls = {
2696bf215546Sopenharmony_ci                        .ptr = tls_bo ? tls_bo->ptr.gpu : 0,
2697bf215546Sopenharmony_ci                        .size = batch->stack_size,
2698bf215546Sopenharmony_ci                },
2699bf215546Sopenharmony_ci        };
2700bf215546Sopenharmony_ci
2701bf215546Sopenharmony_ci        batch->framebuffer.gpu |=
2702bf215546Sopenharmony_ci                GENX(pan_emit_fbd)(dev, fb, &tls, &batch->tiler_ctx,
2703bf215546Sopenharmony_ci                                   batch->framebuffer.cpu);
2704bf215546Sopenharmony_ci}
2705bf215546Sopenharmony_ci
2706bf215546Sopenharmony_ci/* Mark a surface as written */
2707bf215546Sopenharmony_ci
2708bf215546Sopenharmony_cistatic void
2709bf215546Sopenharmony_cipanfrost_initialize_surface(struct panfrost_batch *batch,
2710bf215546Sopenharmony_ci                            struct pipe_surface *surf)
2711bf215546Sopenharmony_ci{
2712bf215546Sopenharmony_ci        if (surf) {
2713bf215546Sopenharmony_ci                struct panfrost_resource *rsrc = pan_resource(surf->texture);
2714bf215546Sopenharmony_ci                BITSET_SET(rsrc->valid.data, surf->u.tex.level);
2715bf215546Sopenharmony_ci        }
2716bf215546Sopenharmony_ci}
2717bf215546Sopenharmony_ci
2718bf215546Sopenharmony_ci/* Generate a fragment job. This should be called once per frame. (Usually,
2719bf215546Sopenharmony_ci * this corresponds to eglSwapBuffers or one of glFlush, glFinish)
2720bf215546Sopenharmony_ci */
2721bf215546Sopenharmony_cistatic mali_ptr
2722bf215546Sopenharmony_ciemit_fragment_job(struct panfrost_batch *batch, const struct pan_fb_info *pfb)
2723bf215546Sopenharmony_ci{
2724bf215546Sopenharmony_ci        /* Mark the affected buffers as initialized, since we're writing to it.
2725bf215546Sopenharmony_ci         * Also, add the surfaces we're writing to to the batch */
2726bf215546Sopenharmony_ci
2727bf215546Sopenharmony_ci        struct pipe_framebuffer_state *fb = &batch->key;
2728bf215546Sopenharmony_ci
2729bf215546Sopenharmony_ci        for (unsigned i = 0; i < fb->nr_cbufs; ++i)
2730bf215546Sopenharmony_ci                panfrost_initialize_surface(batch, fb->cbufs[i]);
2731bf215546Sopenharmony_ci
2732bf215546Sopenharmony_ci        panfrost_initialize_surface(batch, fb->zsbuf);
2733bf215546Sopenharmony_ci
2734bf215546Sopenharmony_ci        /* The passed tile coords can be out of range in some cases, so we need
2735bf215546Sopenharmony_ci         * to clamp them to the framebuffer size to avoid a TILE_RANGE_FAULT.
2736bf215546Sopenharmony_ci         * Theoretically we also need to clamp the coordinates positive, but we
2737bf215546Sopenharmony_ci         * avoid that edge case as all four values are unsigned. Also,
2738bf215546Sopenharmony_ci         * theoretically we could clamp the minima, but if that has to happen
2739bf215546Sopenharmony_ci         * the asserts would fail anyway (since the maxima would get clamped
2740bf215546Sopenharmony_ci         * and then be smaller than the minima). An edge case of sorts occurs
2741bf215546Sopenharmony_ci         * when no scissors are added to draw, so by default min=~0 and max=0.
2742bf215546Sopenharmony_ci         * But that can't happen if any actual drawing occurs (beyond a
2743bf215546Sopenharmony_ci         * wallpaper reload), so this is again irrelevant in practice. */
2744bf215546Sopenharmony_ci
2745bf215546Sopenharmony_ci        batch->maxx = MIN2(batch->maxx, fb->width);
2746bf215546Sopenharmony_ci        batch->maxy = MIN2(batch->maxy, fb->height);
2747bf215546Sopenharmony_ci
2748bf215546Sopenharmony_ci        /* Rendering region must be at least 1x1; otherwise, there is nothing
2749bf215546Sopenharmony_ci         * to do and the whole job chain should have been discarded. */
2750bf215546Sopenharmony_ci
2751bf215546Sopenharmony_ci        assert(batch->maxx > batch->minx);
2752bf215546Sopenharmony_ci        assert(batch->maxy > batch->miny);
2753bf215546Sopenharmony_ci
2754bf215546Sopenharmony_ci        struct panfrost_ptr transfer =
2755bf215546Sopenharmony_ci                pan_pool_alloc_desc(&batch->pool.base, FRAGMENT_JOB);
2756bf215546Sopenharmony_ci
2757bf215546Sopenharmony_ci        GENX(pan_emit_fragment_job)(pfb, batch->framebuffer.gpu,
2758bf215546Sopenharmony_ci                                    transfer.cpu);
2759bf215546Sopenharmony_ci
2760bf215546Sopenharmony_ci        return transfer.gpu;
2761bf215546Sopenharmony_ci}
2762bf215546Sopenharmony_ci
2763bf215546Sopenharmony_ci#define DEFINE_CASE(c) case PIPE_PRIM_##c: return MALI_DRAW_MODE_##c;
2764bf215546Sopenharmony_ci
2765bf215546Sopenharmony_cistatic uint8_t
2766bf215546Sopenharmony_cipan_draw_mode(enum pipe_prim_type mode)
2767bf215546Sopenharmony_ci{
2768bf215546Sopenharmony_ci        switch (mode) {
2769bf215546Sopenharmony_ci                DEFINE_CASE(POINTS);
2770bf215546Sopenharmony_ci                DEFINE_CASE(LINES);
2771bf215546Sopenharmony_ci                DEFINE_CASE(LINE_LOOP);
2772bf215546Sopenharmony_ci                DEFINE_CASE(LINE_STRIP);
2773bf215546Sopenharmony_ci                DEFINE_CASE(TRIANGLES);
2774bf215546Sopenharmony_ci                DEFINE_CASE(TRIANGLE_STRIP);
2775bf215546Sopenharmony_ci                DEFINE_CASE(TRIANGLE_FAN);
2776bf215546Sopenharmony_ci                DEFINE_CASE(QUADS);
2777bf215546Sopenharmony_ci                DEFINE_CASE(POLYGON);
2778bf215546Sopenharmony_ci#if PAN_ARCH <= 6
2779bf215546Sopenharmony_ci                DEFINE_CASE(QUAD_STRIP);
2780bf215546Sopenharmony_ci#endif
2781bf215546Sopenharmony_ci
2782bf215546Sopenharmony_ci        default:
2783bf215546Sopenharmony_ci                unreachable("Invalid draw mode");
2784bf215546Sopenharmony_ci        }
2785bf215546Sopenharmony_ci}
2786bf215546Sopenharmony_ci
2787bf215546Sopenharmony_ci#undef DEFINE_CASE
2788bf215546Sopenharmony_ci
2789bf215546Sopenharmony_ci/* Count generated primitives (when there is no geom/tess shaders) for
2790bf215546Sopenharmony_ci * transform feedback */
2791bf215546Sopenharmony_ci
2792bf215546Sopenharmony_cistatic void
2793bf215546Sopenharmony_cipanfrost_statistics_record(
2794bf215546Sopenharmony_ci                struct panfrost_context *ctx,
2795bf215546Sopenharmony_ci                const struct pipe_draw_info *info,
2796bf215546Sopenharmony_ci                const struct pipe_draw_start_count_bias *draw)
2797bf215546Sopenharmony_ci{
2798bf215546Sopenharmony_ci        if (!ctx->active_queries)
2799bf215546Sopenharmony_ci                return;
2800bf215546Sopenharmony_ci
2801bf215546Sopenharmony_ci        uint32_t prims = u_prims_for_vertices(info->mode, draw->count);
2802bf215546Sopenharmony_ci        ctx->prims_generated += prims;
2803bf215546Sopenharmony_ci
2804bf215546Sopenharmony_ci        if (!ctx->streamout.num_targets)
2805bf215546Sopenharmony_ci                return;
2806bf215546Sopenharmony_ci
2807bf215546Sopenharmony_ci        ctx->tf_prims_generated += prims;
2808bf215546Sopenharmony_ci        ctx->dirty |= PAN_DIRTY_SO;
2809bf215546Sopenharmony_ci}
2810bf215546Sopenharmony_ci
2811bf215546Sopenharmony_cistatic void
2812bf215546Sopenharmony_cipanfrost_update_streamout_offsets(struct panfrost_context *ctx)
2813bf215546Sopenharmony_ci{
2814bf215546Sopenharmony_ci        unsigned count = u_stream_outputs_for_vertices(ctx->active_prim,
2815bf215546Sopenharmony_ci                                                       ctx->vertex_count);
2816bf215546Sopenharmony_ci
2817bf215546Sopenharmony_ci        for (unsigned i = 0; i < ctx->streamout.num_targets; ++i) {
2818bf215546Sopenharmony_ci                if (!ctx->streamout.targets[i])
2819bf215546Sopenharmony_ci                        continue;
2820bf215546Sopenharmony_ci
2821bf215546Sopenharmony_ci                pan_so_target(ctx->streamout.targets[i])->offset += count;
2822bf215546Sopenharmony_ci        }
2823bf215546Sopenharmony_ci}
2824bf215546Sopenharmony_ci
2825bf215546Sopenharmony_cistatic inline enum mali_index_type
2826bf215546Sopenharmony_cipanfrost_translate_index_size(unsigned size)
2827bf215546Sopenharmony_ci{
2828bf215546Sopenharmony_ci        STATIC_ASSERT(MALI_INDEX_TYPE_NONE  == 0);
2829bf215546Sopenharmony_ci        STATIC_ASSERT(MALI_INDEX_TYPE_UINT8  == 1);
2830bf215546Sopenharmony_ci        STATIC_ASSERT(MALI_INDEX_TYPE_UINT16 == 2);
2831bf215546Sopenharmony_ci
2832bf215546Sopenharmony_ci        return (size == 4) ? MALI_INDEX_TYPE_UINT32 : size;
2833bf215546Sopenharmony_ci}
2834bf215546Sopenharmony_ci
2835bf215546Sopenharmony_ci#if PAN_ARCH <= 7
2836bf215546Sopenharmony_cistatic inline void
2837bf215546Sopenharmony_cipan_emit_draw_descs(struct panfrost_batch *batch,
2838bf215546Sopenharmony_ci                struct MALI_DRAW *d, enum pipe_shader_type st)
2839bf215546Sopenharmony_ci{
2840bf215546Sopenharmony_ci        d->offset_start = batch->ctx->offset_start;
2841bf215546Sopenharmony_ci        d->instance_size = batch->ctx->instance_count > 1 ?
2842bf215546Sopenharmony_ci                           batch->ctx->padded_count : 1;
2843bf215546Sopenharmony_ci
2844bf215546Sopenharmony_ci        d->uniform_buffers = batch->uniform_buffers[st];
2845bf215546Sopenharmony_ci        d->push_uniforms = batch->push_uniforms[st];
2846bf215546Sopenharmony_ci        d->textures = batch->textures[st];
2847bf215546Sopenharmony_ci        d->samplers = batch->samplers[st];
2848bf215546Sopenharmony_ci}
2849bf215546Sopenharmony_ci
2850bf215546Sopenharmony_cistatic void
2851bf215546Sopenharmony_cipanfrost_draw_emit_vertex_section(struct panfrost_batch *batch,
2852bf215546Sopenharmony_ci                                  mali_ptr vs_vary, mali_ptr varyings,
2853bf215546Sopenharmony_ci                                  mali_ptr attribs, mali_ptr attrib_bufs,
2854bf215546Sopenharmony_ci                                  void *section)
2855bf215546Sopenharmony_ci{
2856bf215546Sopenharmony_ci        pan_pack(section, DRAW, cfg) {
2857bf215546Sopenharmony_ci                cfg.state = batch->rsd[PIPE_SHADER_VERTEX];
2858bf215546Sopenharmony_ci                cfg.attributes = attribs;
2859bf215546Sopenharmony_ci                cfg.attribute_buffers = attrib_bufs;
2860bf215546Sopenharmony_ci                cfg.varyings = vs_vary;
2861bf215546Sopenharmony_ci                cfg.varying_buffers = vs_vary ? varyings : 0;
2862bf215546Sopenharmony_ci                cfg.thread_storage = batch->tls.gpu;
2863bf215546Sopenharmony_ci                pan_emit_draw_descs(batch, &cfg, PIPE_SHADER_VERTEX);
2864bf215546Sopenharmony_ci        }
2865bf215546Sopenharmony_ci}
2866bf215546Sopenharmony_ci
2867bf215546Sopenharmony_cistatic void
2868bf215546Sopenharmony_cipanfrost_draw_emit_vertex(struct panfrost_batch *batch,
2869bf215546Sopenharmony_ci                          const struct pipe_draw_info *info,
2870bf215546Sopenharmony_ci                          void *invocation_template,
2871bf215546Sopenharmony_ci                          mali_ptr vs_vary, mali_ptr varyings,
2872bf215546Sopenharmony_ci                          mali_ptr attribs, mali_ptr attrib_bufs,
2873bf215546Sopenharmony_ci                          void *job)
2874bf215546Sopenharmony_ci{
2875bf215546Sopenharmony_ci        void *section =
2876bf215546Sopenharmony_ci                pan_section_ptr(job, COMPUTE_JOB, INVOCATION);
2877bf215546Sopenharmony_ci        memcpy(section, invocation_template, pan_size(INVOCATION));
2878bf215546Sopenharmony_ci
2879bf215546Sopenharmony_ci        pan_section_pack(job, COMPUTE_JOB, PARAMETERS, cfg) {
2880bf215546Sopenharmony_ci                cfg.job_task_split = 5;
2881bf215546Sopenharmony_ci        }
2882bf215546Sopenharmony_ci
2883bf215546Sopenharmony_ci        section = pan_section_ptr(job, COMPUTE_JOB, DRAW);
2884bf215546Sopenharmony_ci        panfrost_draw_emit_vertex_section(batch, vs_vary, varyings,
2885bf215546Sopenharmony_ci                                          attribs, attrib_bufs, section);
2886bf215546Sopenharmony_ci}
2887bf215546Sopenharmony_ci#endif
2888bf215546Sopenharmony_ci
2889bf215546Sopenharmony_cistatic void
2890bf215546Sopenharmony_cipanfrost_emit_primitive_size(struct panfrost_context *ctx,
2891bf215546Sopenharmony_ci                             bool points, mali_ptr size_array,
2892bf215546Sopenharmony_ci                             void *prim_size)
2893bf215546Sopenharmony_ci{
2894bf215546Sopenharmony_ci        struct panfrost_rasterizer *rast = ctx->rasterizer;
2895bf215546Sopenharmony_ci
2896bf215546Sopenharmony_ci        pan_pack(prim_size, PRIMITIVE_SIZE, cfg) {
2897bf215546Sopenharmony_ci                if (panfrost_writes_point_size(ctx)) {
2898bf215546Sopenharmony_ci                        cfg.size_array = size_array;
2899bf215546Sopenharmony_ci                } else {
2900bf215546Sopenharmony_ci                        cfg.constant = points ?
2901bf215546Sopenharmony_ci                                       rast->base.point_size :
2902bf215546Sopenharmony_ci                                       rast->base.line_width;
2903bf215546Sopenharmony_ci                }
2904bf215546Sopenharmony_ci        }
2905bf215546Sopenharmony_ci}
2906bf215546Sopenharmony_ci
2907bf215546Sopenharmony_cistatic bool
2908bf215546Sopenharmony_cipanfrost_is_implicit_prim_restart(const struct pipe_draw_info *info)
2909bf215546Sopenharmony_ci{
2910bf215546Sopenharmony_ci       /* As a reminder primitive_restart should always be checked before any
2911bf215546Sopenharmony_ci          access to restart_index. */
2912bf215546Sopenharmony_ci        return info->primitive_restart &&
2913bf215546Sopenharmony_ci                info->restart_index == (unsigned)BITFIELD_MASK(info->index_size * 8);
2914bf215546Sopenharmony_ci}
2915bf215546Sopenharmony_ci
2916bf215546Sopenharmony_ci/* On Bifrost and older, the Renderer State Descriptor aggregates many pieces of
2917bf215546Sopenharmony_ci * 3D state. In particular, it groups the fragment shader descriptor with
2918bf215546Sopenharmony_ci * depth/stencil, blend, polygon offset, and multisampling state. These pieces
2919bf215546Sopenharmony_ci * of state are dirty tracked independently for the benefit of newer GPUs that
2920bf215546Sopenharmony_ci * separate the descriptors. FRAGMENT_RSD_DIRTY_MASK contains the list of 3D
2921bf215546Sopenharmony_ci * dirty flags that trigger re-emits of the fragment RSD.
2922bf215546Sopenharmony_ci *
2923bf215546Sopenharmony_ci * Obscurely, occlusion queries are included. Occlusion query state is nominally
2924bf215546Sopenharmony_ci * specified in the draw call descriptor, but must be considered when determing
2925bf215546Sopenharmony_ci * early-Z state which is part of the RSD.
2926bf215546Sopenharmony_ci */
2927bf215546Sopenharmony_ci#define FRAGMENT_RSD_DIRTY_MASK ( \
2928bf215546Sopenharmony_ci        PAN_DIRTY_ZS | PAN_DIRTY_BLEND | PAN_DIRTY_MSAA | \
2929bf215546Sopenharmony_ci        PAN_DIRTY_RASTERIZER | PAN_DIRTY_OQ)
2930bf215546Sopenharmony_ci
2931bf215546Sopenharmony_cistatic inline void
2932bf215546Sopenharmony_cipanfrost_update_shader_state(struct panfrost_batch *batch,
2933bf215546Sopenharmony_ci                             enum pipe_shader_type st)
2934bf215546Sopenharmony_ci{
2935bf215546Sopenharmony_ci        struct panfrost_context *ctx = batch->ctx;
2936bf215546Sopenharmony_ci        struct panfrost_shader_state *ss = panfrost_get_shader_state(ctx, st);
2937bf215546Sopenharmony_ci
2938bf215546Sopenharmony_ci        bool frag = (st == PIPE_SHADER_FRAGMENT);
2939bf215546Sopenharmony_ci        unsigned dirty_3d = ctx->dirty;
2940bf215546Sopenharmony_ci        unsigned dirty = ctx->dirty_shader[st];
2941bf215546Sopenharmony_ci
2942bf215546Sopenharmony_ci        if (dirty & PAN_DIRTY_STAGE_TEXTURE) {
2943bf215546Sopenharmony_ci                batch->textures[st] =
2944bf215546Sopenharmony_ci                        panfrost_emit_texture_descriptors(batch, st);
2945bf215546Sopenharmony_ci        }
2946bf215546Sopenharmony_ci
2947bf215546Sopenharmony_ci        if (dirty & PAN_DIRTY_STAGE_SAMPLER) {
2948bf215546Sopenharmony_ci                batch->samplers[st] =
2949bf215546Sopenharmony_ci                        panfrost_emit_sampler_descriptors(batch, st);
2950bf215546Sopenharmony_ci        }
2951bf215546Sopenharmony_ci
2952bf215546Sopenharmony_ci        /* On Bifrost and older, the fragment shader descriptor is fused
2953bf215546Sopenharmony_ci         * together with the renderer state; the combined renderer state
2954bf215546Sopenharmony_ci         * descriptor is emitted below. Otherwise, the shader descriptor is
2955bf215546Sopenharmony_ci         * standalone and is emitted here.
2956bf215546Sopenharmony_ci         */
2957bf215546Sopenharmony_ci        if ((dirty & PAN_DIRTY_STAGE_SHADER) && !((PAN_ARCH <= 7) && frag)) {
2958bf215546Sopenharmony_ci                batch->rsd[st] = panfrost_emit_compute_shader_meta(batch, st);
2959bf215546Sopenharmony_ci        }
2960bf215546Sopenharmony_ci
2961bf215546Sopenharmony_ci#if PAN_ARCH >= 9
2962bf215546Sopenharmony_ci        if (dirty & PAN_DIRTY_STAGE_IMAGE)
2963bf215546Sopenharmony_ci                batch->images[st] = panfrost_emit_images(batch, st);
2964bf215546Sopenharmony_ci#endif
2965bf215546Sopenharmony_ci
2966bf215546Sopenharmony_ci        if ((dirty & ss->dirty_shader) || (dirty_3d & ss->dirty_3d)) {
2967bf215546Sopenharmony_ci                batch->uniform_buffers[st] = panfrost_emit_const_buf(batch, st,
2968bf215546Sopenharmony_ci                                NULL, &batch->push_uniforms[st], NULL);
2969bf215546Sopenharmony_ci        }
2970bf215546Sopenharmony_ci
2971bf215546Sopenharmony_ci#if PAN_ARCH <= 7
2972bf215546Sopenharmony_ci        /* On Bifrost and older, if the fragment shader changes OR any renderer
2973bf215546Sopenharmony_ci         * state specified with the fragment shader, the whole renderer state
2974bf215546Sopenharmony_ci         * descriptor is dirtied and must be reemited.
2975bf215546Sopenharmony_ci         */
2976bf215546Sopenharmony_ci        if (frag && ((dirty & PAN_DIRTY_STAGE_SHADER) ||
2977bf215546Sopenharmony_ci                     (dirty_3d & FRAGMENT_RSD_DIRTY_MASK))) {
2978bf215546Sopenharmony_ci
2979bf215546Sopenharmony_ci                batch->rsd[st] = panfrost_emit_frag_shader_meta(batch);
2980bf215546Sopenharmony_ci        }
2981bf215546Sopenharmony_ci
2982bf215546Sopenharmony_ci        if (frag && (dirty & PAN_DIRTY_STAGE_IMAGE)) {
2983bf215546Sopenharmony_ci                batch->attribs[st] = panfrost_emit_image_attribs(batch,
2984bf215546Sopenharmony_ci                                &batch->attrib_bufs[st], st);
2985bf215546Sopenharmony_ci        }
2986bf215546Sopenharmony_ci#endif
2987bf215546Sopenharmony_ci}
2988bf215546Sopenharmony_ci
2989bf215546Sopenharmony_cistatic inline void
2990bf215546Sopenharmony_cipanfrost_update_state_3d(struct panfrost_batch *batch)
2991bf215546Sopenharmony_ci{
2992bf215546Sopenharmony_ci        struct panfrost_context *ctx = batch->ctx;
2993bf215546Sopenharmony_ci        unsigned dirty = ctx->dirty;
2994bf215546Sopenharmony_ci
2995bf215546Sopenharmony_ci        if (dirty & PAN_DIRTY_TLS_SIZE)
2996bf215546Sopenharmony_ci                panfrost_batch_adjust_stack_size(batch);
2997bf215546Sopenharmony_ci
2998bf215546Sopenharmony_ci        if (dirty & PAN_DIRTY_BLEND)
2999bf215546Sopenharmony_ci                panfrost_set_batch_masks_blend(batch);
3000bf215546Sopenharmony_ci
3001bf215546Sopenharmony_ci        if (dirty & PAN_DIRTY_ZS)
3002bf215546Sopenharmony_ci                panfrost_set_batch_masks_zs(batch);
3003bf215546Sopenharmony_ci
3004bf215546Sopenharmony_ci#if PAN_ARCH >= 9
3005bf215546Sopenharmony_ci        if ((dirty & (PAN_DIRTY_ZS | PAN_DIRTY_RASTERIZER)) ||
3006bf215546Sopenharmony_ci            (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & PAN_DIRTY_STAGE_SHADER))
3007bf215546Sopenharmony_ci                batch->depth_stencil = panfrost_emit_depth_stencil(batch);
3008bf215546Sopenharmony_ci
3009bf215546Sopenharmony_ci        if (dirty & PAN_DIRTY_BLEND)
3010bf215546Sopenharmony_ci                batch->blend = panfrost_emit_blend_valhall(batch);
3011bf215546Sopenharmony_ci
3012bf215546Sopenharmony_ci        if (dirty & PAN_DIRTY_VERTEX) {
3013bf215546Sopenharmony_ci                batch->attribs[PIPE_SHADER_VERTEX] =
3014bf215546Sopenharmony_ci                        panfrost_emit_vertex_data(batch);
3015bf215546Sopenharmony_ci
3016bf215546Sopenharmony_ci                batch->attrib_bufs[PIPE_SHADER_VERTEX] =
3017bf215546Sopenharmony_ci                        panfrost_emit_vertex_buffers(batch);
3018bf215546Sopenharmony_ci        }
3019bf215546Sopenharmony_ci#endif
3020bf215546Sopenharmony_ci}
3021bf215546Sopenharmony_ci
3022bf215546Sopenharmony_ci#if PAN_ARCH >= 6
3023bf215546Sopenharmony_cistatic mali_ptr
3024bf215546Sopenharmony_cipanfrost_batch_get_bifrost_tiler(struct panfrost_batch *batch, unsigned vertex_count)
3025bf215546Sopenharmony_ci{
3026bf215546Sopenharmony_ci        struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
3027bf215546Sopenharmony_ci
3028bf215546Sopenharmony_ci        if (!vertex_count)
3029bf215546Sopenharmony_ci                return 0;
3030bf215546Sopenharmony_ci
3031bf215546Sopenharmony_ci        if (batch->tiler_ctx.bifrost)
3032bf215546Sopenharmony_ci                return batch->tiler_ctx.bifrost;
3033bf215546Sopenharmony_ci
3034bf215546Sopenharmony_ci        struct panfrost_ptr t =
3035bf215546Sopenharmony_ci                pan_pool_alloc_desc(&batch->pool.base, TILER_HEAP);
3036bf215546Sopenharmony_ci
3037bf215546Sopenharmony_ci        GENX(pan_emit_tiler_heap)(dev, t.cpu);
3038bf215546Sopenharmony_ci
3039bf215546Sopenharmony_ci        mali_ptr heap = t.gpu;
3040bf215546Sopenharmony_ci
3041bf215546Sopenharmony_ci        t = pan_pool_alloc_desc(&batch->pool.base, TILER_CONTEXT);
3042bf215546Sopenharmony_ci        GENX(pan_emit_tiler_ctx)(dev, batch->key.width, batch->key.height,
3043bf215546Sopenharmony_ci                                 util_framebuffer_get_num_samples(&batch->key),
3044bf215546Sopenharmony_ci                                 pan_tristate_get(batch->first_provoking_vertex),
3045bf215546Sopenharmony_ci                                 heap, t.cpu);
3046bf215546Sopenharmony_ci
3047bf215546Sopenharmony_ci        batch->tiler_ctx.bifrost = t.gpu;
3048bf215546Sopenharmony_ci        return batch->tiler_ctx.bifrost;
3049bf215546Sopenharmony_ci}
3050bf215546Sopenharmony_ci#endif
3051bf215546Sopenharmony_ci
3052bf215546Sopenharmony_ci/* Packs a primitive descriptor, mostly common between Midgard/Bifrost tiler
3053bf215546Sopenharmony_ci * jobs and Valhall IDVS jobs
3054bf215546Sopenharmony_ci */
3055bf215546Sopenharmony_cistatic void
3056bf215546Sopenharmony_cipanfrost_emit_primitive(struct panfrost_context *ctx,
3057bf215546Sopenharmony_ci                        const struct pipe_draw_info *info,
3058bf215546Sopenharmony_ci                        const struct pipe_draw_start_count_bias *draw,
3059bf215546Sopenharmony_ci                        mali_ptr indices, bool secondary_shader, void *out)
3060bf215546Sopenharmony_ci{
3061bf215546Sopenharmony_ci        UNUSED struct pipe_rasterizer_state *rast = &ctx->rasterizer->base;
3062bf215546Sopenharmony_ci
3063bf215546Sopenharmony_ci        bool lines = (info->mode == PIPE_PRIM_LINES ||
3064bf215546Sopenharmony_ci                      info->mode == PIPE_PRIM_LINE_LOOP ||
3065bf215546Sopenharmony_ci                      info->mode == PIPE_PRIM_LINE_STRIP);
3066bf215546Sopenharmony_ci
3067bf215546Sopenharmony_ci        pan_pack(out, PRIMITIVE, cfg) {
3068bf215546Sopenharmony_ci                cfg.draw_mode = pan_draw_mode(info->mode);
3069bf215546Sopenharmony_ci                if (panfrost_writes_point_size(ctx))
3070bf215546Sopenharmony_ci                        cfg.point_size_array_format = MALI_POINT_SIZE_ARRAY_FORMAT_FP16;
3071bf215546Sopenharmony_ci
3072bf215546Sopenharmony_ci#if PAN_ARCH <= 8
3073bf215546Sopenharmony_ci                /* For line primitives, PRIMITIVE.first_provoking_vertex must
3074bf215546Sopenharmony_ci                 * be set to true and the provoking vertex is selected with
3075bf215546Sopenharmony_ci                 * DRAW.flat_shading_vertex.
3076bf215546Sopenharmony_ci                 */
3077bf215546Sopenharmony_ci                if (lines)
3078bf215546Sopenharmony_ci                        cfg.first_provoking_vertex = true;
3079bf215546Sopenharmony_ci                else
3080bf215546Sopenharmony_ci                        cfg.first_provoking_vertex = rast->flatshade_first;
3081bf215546Sopenharmony_ci
3082bf215546Sopenharmony_ci                if (panfrost_is_implicit_prim_restart(info)) {
3083bf215546Sopenharmony_ci                        cfg.primitive_restart = MALI_PRIMITIVE_RESTART_IMPLICIT;
3084bf215546Sopenharmony_ci                } else if (info->primitive_restart) {
3085bf215546Sopenharmony_ci                        cfg.primitive_restart = MALI_PRIMITIVE_RESTART_EXPLICIT;
3086bf215546Sopenharmony_ci                        cfg.primitive_restart_index = info->restart_index;
3087bf215546Sopenharmony_ci                }
3088bf215546Sopenharmony_ci
3089bf215546Sopenharmony_ci                cfg.job_task_split = 6;
3090bf215546Sopenharmony_ci#else
3091bf215546Sopenharmony_ci                struct panfrost_shader_state *fs =
3092bf215546Sopenharmony_ci                        panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
3093bf215546Sopenharmony_ci
3094bf215546Sopenharmony_ci                cfg.allow_rotating_primitives = !(lines || fs->info.bifrost.uses_flat_shading);
3095bf215546Sopenharmony_ci                cfg.primitive_restart = info->primitive_restart;
3096bf215546Sopenharmony_ci
3097bf215546Sopenharmony_ci                /* Non-fixed restart indices should have been lowered */
3098bf215546Sopenharmony_ci                assert(!cfg.primitive_restart || panfrost_is_implicit_prim_restart(info));
3099bf215546Sopenharmony_ci#endif
3100bf215546Sopenharmony_ci
3101bf215546Sopenharmony_ci                cfg.index_count = ctx->indirect_draw ? 1 : draw->count;
3102bf215546Sopenharmony_ci                cfg.index_type = panfrost_translate_index_size(info->index_size);
3103bf215546Sopenharmony_ci
3104bf215546Sopenharmony_ci
3105bf215546Sopenharmony_ci                if (PAN_ARCH >= 9) {
3106bf215546Sopenharmony_ci                        /* Base vertex offset on Valhall is used for both
3107bf215546Sopenharmony_ci                         * indexed and non-indexed draws, in a simple way for
3108bf215546Sopenharmony_ci                         * either. Handle both cases.
3109bf215546Sopenharmony_ci                         */
3110bf215546Sopenharmony_ci                        if (cfg.index_type)
3111bf215546Sopenharmony_ci                                cfg.base_vertex_offset = draw->index_bias;
3112bf215546Sopenharmony_ci                        else
3113bf215546Sopenharmony_ci                                cfg.base_vertex_offset = draw->start;
3114bf215546Sopenharmony_ci
3115bf215546Sopenharmony_ci                        /* Indices are moved outside the primitive descriptor
3116bf215546Sopenharmony_ci                         * on Valhall, so we don't need to set that here
3117bf215546Sopenharmony_ci                         */
3118bf215546Sopenharmony_ci                } else if (cfg.index_type) {
3119bf215546Sopenharmony_ci                        cfg.base_vertex_offset = draw->index_bias - ctx->offset_start;
3120bf215546Sopenharmony_ci
3121bf215546Sopenharmony_ci#if PAN_ARCH <= 7
3122bf215546Sopenharmony_ci                        cfg.indices = indices;
3123bf215546Sopenharmony_ci#endif
3124bf215546Sopenharmony_ci                }
3125bf215546Sopenharmony_ci
3126bf215546Sopenharmony_ci#if PAN_ARCH >= 6
3127bf215546Sopenharmony_ci                cfg.secondary_shader = secondary_shader;
3128bf215546Sopenharmony_ci#endif
3129bf215546Sopenharmony_ci        }
3130bf215546Sopenharmony_ci}
3131bf215546Sopenharmony_ci
3132bf215546Sopenharmony_ci#if PAN_ARCH >= 9
3133bf215546Sopenharmony_cistatic mali_ptr
3134bf215546Sopenharmony_cipanfrost_emit_resources(struct panfrost_batch *batch,
3135bf215546Sopenharmony_ci                        enum pipe_shader_type stage,
3136bf215546Sopenharmony_ci                        mali_ptr ubos, unsigned ubo_count)
3137bf215546Sopenharmony_ci{
3138bf215546Sopenharmony_ci        struct panfrost_context *ctx = batch->ctx;
3139bf215546Sopenharmony_ci        struct panfrost_ptr T;
3140bf215546Sopenharmony_ci        unsigned nr_tables = 12;
3141bf215546Sopenharmony_ci
3142bf215546Sopenharmony_ci        /* Although individual resources need only 16 byte alignment, the
3143bf215546Sopenharmony_ci         * resource table as a whole must be 64-byte aligned.
3144bf215546Sopenharmony_ci         */
3145bf215546Sopenharmony_ci        T = pan_pool_alloc_aligned(&batch->pool.base, nr_tables * pan_size(RESOURCE), 64);
3146bf215546Sopenharmony_ci        memset(T.cpu, 0, nr_tables * pan_size(RESOURCE));
3147bf215546Sopenharmony_ci
3148bf215546Sopenharmony_ci        panfrost_make_resource_table(T, PAN_TABLE_UBO, ubos, ubo_count);
3149bf215546Sopenharmony_ci
3150bf215546Sopenharmony_ci        panfrost_make_resource_table(T, PAN_TABLE_TEXTURE,
3151bf215546Sopenharmony_ci                                     batch->textures[stage],
3152bf215546Sopenharmony_ci                                     ctx->sampler_view_count[stage]);
3153bf215546Sopenharmony_ci
3154bf215546Sopenharmony_ci        panfrost_make_resource_table(T, PAN_TABLE_SAMPLER,
3155bf215546Sopenharmony_ci                                     batch->samplers[stage],
3156bf215546Sopenharmony_ci                                     ctx->sampler_count[stage]);
3157bf215546Sopenharmony_ci
3158bf215546Sopenharmony_ci        panfrost_make_resource_table(T, PAN_TABLE_IMAGE,
3159bf215546Sopenharmony_ci                                     batch->images[stage],
3160bf215546Sopenharmony_ci                                     util_last_bit(ctx->image_mask[stage]));
3161bf215546Sopenharmony_ci
3162bf215546Sopenharmony_ci        if (stage == PIPE_SHADER_VERTEX) {
3163bf215546Sopenharmony_ci                panfrost_make_resource_table(T, PAN_TABLE_ATTRIBUTE,
3164bf215546Sopenharmony_ci                                             batch->attribs[stage],
3165bf215546Sopenharmony_ci                                             ctx->vertex->num_elements);
3166bf215546Sopenharmony_ci
3167bf215546Sopenharmony_ci                panfrost_make_resource_table(T, PAN_TABLE_ATTRIBUTE_BUFFER,
3168bf215546Sopenharmony_ci                                             batch->attrib_bufs[stage],
3169bf215546Sopenharmony_ci                                             util_last_bit(ctx->vb_mask));
3170bf215546Sopenharmony_ci        }
3171bf215546Sopenharmony_ci
3172bf215546Sopenharmony_ci        return T.gpu | nr_tables;
3173bf215546Sopenharmony_ci}
3174bf215546Sopenharmony_ci
3175bf215546Sopenharmony_cistatic void
3176bf215546Sopenharmony_cipanfrost_emit_shader(struct panfrost_batch *batch,
3177bf215546Sopenharmony_ci                     struct MALI_SHADER_ENVIRONMENT *cfg,
3178bf215546Sopenharmony_ci                     enum pipe_shader_type stage,
3179bf215546Sopenharmony_ci                     mali_ptr shader_ptr,
3180bf215546Sopenharmony_ci                     mali_ptr thread_storage)
3181bf215546Sopenharmony_ci{
3182bf215546Sopenharmony_ci        unsigned fau_words = 0, ubo_count = 0;
3183bf215546Sopenharmony_ci        mali_ptr ubos, resources;
3184bf215546Sopenharmony_ci
3185bf215546Sopenharmony_ci        ubos = panfrost_emit_const_buf(batch, stage, &ubo_count, &cfg->fau,
3186bf215546Sopenharmony_ci                                       &fau_words);
3187bf215546Sopenharmony_ci
3188bf215546Sopenharmony_ci        resources = panfrost_emit_resources(batch, stage, ubos, ubo_count);
3189bf215546Sopenharmony_ci
3190bf215546Sopenharmony_ci        cfg->thread_storage = thread_storage;
3191bf215546Sopenharmony_ci        cfg->shader = shader_ptr;
3192bf215546Sopenharmony_ci        cfg->resources = resources;
3193bf215546Sopenharmony_ci
3194bf215546Sopenharmony_ci        /* Each entry of FAU is 64-bits */
3195bf215546Sopenharmony_ci        cfg->fau_count = DIV_ROUND_UP(fau_words, 2);
3196bf215546Sopenharmony_ci}
3197bf215546Sopenharmony_ci#endif
3198bf215546Sopenharmony_ci
3199bf215546Sopenharmony_cistatic void
3200bf215546Sopenharmony_cipanfrost_emit_draw(void *out,
3201bf215546Sopenharmony_ci                   struct panfrost_batch *batch,
3202bf215546Sopenharmony_ci                   bool fs_required,
3203bf215546Sopenharmony_ci                   enum pipe_prim_type prim,
3204bf215546Sopenharmony_ci                   mali_ptr pos, mali_ptr fs_vary, mali_ptr varyings)
3205bf215546Sopenharmony_ci{
3206bf215546Sopenharmony_ci        struct panfrost_context *ctx = batch->ctx;
3207bf215546Sopenharmony_ci        struct pipe_rasterizer_state *rast = &ctx->rasterizer->base;
3208bf215546Sopenharmony_ci        bool polygon = (prim == PIPE_PRIM_TRIANGLES);
3209bf215546Sopenharmony_ci
3210bf215546Sopenharmony_ci        pan_pack(out, DRAW, cfg) {
3211bf215546Sopenharmony_ci                /*
3212bf215546Sopenharmony_ci                 * From the Gallium documentation,
3213bf215546Sopenharmony_ci                 * pipe_rasterizer_state::cull_face "indicates which faces of
3214bf215546Sopenharmony_ci                 * polygons to cull". Points and lines are not considered
3215bf215546Sopenharmony_ci                 * polygons and should be drawn even if all faces are culled.
3216bf215546Sopenharmony_ci                 * The hardware does not take primitive type into account when
3217bf215546Sopenharmony_ci                 * culling, so we need to do that check ourselves.
3218bf215546Sopenharmony_ci                 */
3219bf215546Sopenharmony_ci                cfg.cull_front_face = polygon && (rast->cull_face & PIPE_FACE_FRONT);
3220bf215546Sopenharmony_ci                cfg.cull_back_face = polygon && (rast->cull_face & PIPE_FACE_BACK);
3221bf215546Sopenharmony_ci                cfg.front_face_ccw = rast->front_ccw;
3222bf215546Sopenharmony_ci
3223bf215546Sopenharmony_ci                if (ctx->occlusion_query && ctx->active_queries) {
3224bf215546Sopenharmony_ci                        if (ctx->occlusion_query->type == PIPE_QUERY_OCCLUSION_COUNTER)
3225bf215546Sopenharmony_ci                                cfg.occlusion_query = MALI_OCCLUSION_MODE_COUNTER;
3226bf215546Sopenharmony_ci                        else
3227bf215546Sopenharmony_ci                                cfg.occlusion_query = MALI_OCCLUSION_MODE_PREDICATE;
3228bf215546Sopenharmony_ci
3229bf215546Sopenharmony_ci                        struct panfrost_resource *rsrc = pan_resource(ctx->occlusion_query->rsrc);
3230bf215546Sopenharmony_ci                        cfg.occlusion = rsrc->image.data.bo->ptr.gpu;
3231bf215546Sopenharmony_ci                        panfrost_batch_write_rsrc(ctx->batch, rsrc,
3232bf215546Sopenharmony_ci                                              PIPE_SHADER_FRAGMENT);
3233bf215546Sopenharmony_ci                }
3234bf215546Sopenharmony_ci
3235bf215546Sopenharmony_ci#if PAN_ARCH >= 9
3236bf215546Sopenharmony_ci                struct panfrost_shader_state *fs =
3237bf215546Sopenharmony_ci                        panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
3238bf215546Sopenharmony_ci
3239bf215546Sopenharmony_ci                cfg.multisample_enable = rast->multisample;
3240bf215546Sopenharmony_ci                cfg.sample_mask = rast->multisample ? ctx->sample_mask : 0xFFFF;
3241bf215546Sopenharmony_ci
3242bf215546Sopenharmony_ci                /* Use per-sample shading if required by API Also use it when a
3243bf215546Sopenharmony_ci                 * blend shader is used with multisampling, as this is handled
3244bf215546Sopenharmony_ci                 * by a single ST_TILE in the blend shader with the current
3245bf215546Sopenharmony_ci                 * sample ID, requiring per-sample shading.
3246bf215546Sopenharmony_ci                 */
3247bf215546Sopenharmony_ci                cfg.evaluate_per_sample =
3248bf215546Sopenharmony_ci                        (rast->multisample &&
3249bf215546Sopenharmony_ci                         ((ctx->min_samples > 1) || ctx->valhall_has_blend_shader));
3250bf215546Sopenharmony_ci
3251bf215546Sopenharmony_ci                cfg.single_sampled_lines = !rast->multisample;
3252bf215546Sopenharmony_ci
3253bf215546Sopenharmony_ci                cfg.vertex_array.packet = true;
3254bf215546Sopenharmony_ci
3255bf215546Sopenharmony_ci                cfg.minimum_z = batch->minimum_z;
3256bf215546Sopenharmony_ci                cfg.maximum_z = batch->maximum_z;
3257bf215546Sopenharmony_ci
3258bf215546Sopenharmony_ci                cfg.depth_stencil = batch->depth_stencil;
3259bf215546Sopenharmony_ci
3260bf215546Sopenharmony_ci                if (fs_required) {
3261bf215546Sopenharmony_ci                        bool has_oq = ctx->occlusion_query && ctx->active_queries;
3262bf215546Sopenharmony_ci
3263bf215546Sopenharmony_ci                        struct pan_earlyzs_state earlyzs =
3264bf215546Sopenharmony_ci                               pan_earlyzs_get(fs->earlyzs,
3265bf215546Sopenharmony_ci                                               ctx->depth_stencil->writes_zs || has_oq,
3266bf215546Sopenharmony_ci                                               ctx->blend->base.alpha_to_coverage,
3267bf215546Sopenharmony_ci                                               ctx->depth_stencil->zs_always_passes);
3268bf215546Sopenharmony_ci
3269bf215546Sopenharmony_ci                        cfg.pixel_kill_operation = earlyzs.kill;
3270bf215546Sopenharmony_ci                        cfg.zs_update_operation = earlyzs.update;
3271bf215546Sopenharmony_ci
3272bf215546Sopenharmony_ci                        cfg.allow_forward_pixel_to_kill = pan_allow_forward_pixel_to_kill(ctx, fs);
3273bf215546Sopenharmony_ci                        cfg.allow_forward_pixel_to_be_killed = !fs->info.writes_global;
3274bf215546Sopenharmony_ci
3275bf215546Sopenharmony_ci                        /* Mask of render targets that may be written. A render
3276bf215546Sopenharmony_ci                         * target may be written if the fragment shader writes
3277bf215546Sopenharmony_ci                         * to it AND it actually exists. If the render target
3278bf215546Sopenharmony_ci                         * doesn't actually exist, the blend descriptor will be
3279bf215546Sopenharmony_ci                         * OFF so it may be omitted from the mask.
3280bf215546Sopenharmony_ci                         *
3281bf215546Sopenharmony_ci                         * Only set when there is a fragment shader, since
3282bf215546Sopenharmony_ci                         * otherwise no colour updates are possible.
3283bf215546Sopenharmony_ci                         */
3284bf215546Sopenharmony_ci                        cfg.render_target_mask =
3285bf215546Sopenharmony_ci                                (fs->info.outputs_written >> FRAG_RESULT_DATA0) &
3286bf215546Sopenharmony_ci                                ctx->fb_rt_mask;
3287bf215546Sopenharmony_ci
3288bf215546Sopenharmony_ci                        /* Also use per-sample shading if required by the shader
3289bf215546Sopenharmony_ci                         */
3290bf215546Sopenharmony_ci                        cfg.evaluate_per_sample |= fs->info.fs.sample_shading;
3291bf215546Sopenharmony_ci
3292bf215546Sopenharmony_ci                        /* Unlike Bifrost, alpha-to-coverage must be included in
3293bf215546Sopenharmony_ci                         * this identically-named flag. Confusing, isn't it?
3294bf215546Sopenharmony_ci                         */
3295bf215546Sopenharmony_ci                        cfg.shader_modifies_coverage = fs->info.fs.writes_coverage ||
3296bf215546Sopenharmony_ci                                                       fs->info.fs.can_discard ||
3297bf215546Sopenharmony_ci                                                       ctx->blend->base.alpha_to_coverage;
3298bf215546Sopenharmony_ci
3299bf215546Sopenharmony_ci                        /* Blend descriptors are only accessed by a BLEND
3300bf215546Sopenharmony_ci                         * instruction on Valhall. It follows that if the
3301bf215546Sopenharmony_ci                         * fragment shader is omitted, we may also emit the
3302bf215546Sopenharmony_ci                         * blend descriptors.
3303bf215546Sopenharmony_ci                         */
3304bf215546Sopenharmony_ci                        cfg.blend = batch->blend;
3305bf215546Sopenharmony_ci                        cfg.blend_count = MAX2(batch->key.nr_cbufs, 1);
3306bf215546Sopenharmony_ci                        cfg.alpha_to_coverage = ctx->blend->base.alpha_to_coverage;
3307bf215546Sopenharmony_ci
3308bf215546Sopenharmony_ci                        cfg.overdraw_alpha0 = panfrost_overdraw_alpha(ctx, 0);
3309bf215546Sopenharmony_ci                        cfg.overdraw_alpha1 = panfrost_overdraw_alpha(ctx, 1);
3310bf215546Sopenharmony_ci
3311bf215546Sopenharmony_ci                        panfrost_emit_shader(batch, &cfg.shader, PIPE_SHADER_FRAGMENT,
3312bf215546Sopenharmony_ci                                             batch->rsd[PIPE_SHADER_FRAGMENT],
3313bf215546Sopenharmony_ci                                             batch->tls.gpu);
3314bf215546Sopenharmony_ci                } else {
3315bf215546Sopenharmony_ci                        /* These operations need to be FORCE to benefit from the
3316bf215546Sopenharmony_ci                         * depth-only pass optimizations.
3317bf215546Sopenharmony_ci                         */
3318bf215546Sopenharmony_ci                        cfg.pixel_kill_operation = MALI_PIXEL_KILL_FORCE_EARLY;
3319bf215546Sopenharmony_ci                        cfg.zs_update_operation = MALI_PIXEL_KILL_FORCE_EARLY;
3320bf215546Sopenharmony_ci
3321bf215546Sopenharmony_ci                        /* No shader and no blend => no shader or blend
3322bf215546Sopenharmony_ci                         * reasons to disable FPK. The only FPK-related state
3323bf215546Sopenharmony_ci                         * not covered is alpha-to-coverage which we don't set
3324bf215546Sopenharmony_ci                         * without blend.
3325bf215546Sopenharmony_ci                         */
3326bf215546Sopenharmony_ci                        cfg.allow_forward_pixel_to_kill = true;
3327bf215546Sopenharmony_ci
3328bf215546Sopenharmony_ci                        /* No shader => no shader side effects */
3329bf215546Sopenharmony_ci                        cfg.allow_forward_pixel_to_be_killed = true;
3330bf215546Sopenharmony_ci
3331bf215546Sopenharmony_ci                        /* Alpha isn't written so these are vacuous */
3332bf215546Sopenharmony_ci                        cfg.overdraw_alpha0 = true;
3333bf215546Sopenharmony_ci                        cfg.overdraw_alpha1 = true;
3334bf215546Sopenharmony_ci                }
3335bf215546Sopenharmony_ci#else
3336bf215546Sopenharmony_ci                cfg.position = pos;
3337bf215546Sopenharmony_ci                cfg.state = batch->rsd[PIPE_SHADER_FRAGMENT];
3338bf215546Sopenharmony_ci                cfg.attributes = batch->attribs[PIPE_SHADER_FRAGMENT];
3339bf215546Sopenharmony_ci                cfg.attribute_buffers = batch->attrib_bufs[PIPE_SHADER_FRAGMENT];
3340bf215546Sopenharmony_ci                cfg.viewport = batch->viewport;
3341bf215546Sopenharmony_ci                cfg.varyings = fs_vary;
3342bf215546Sopenharmony_ci                cfg.varying_buffers = fs_vary ? varyings : 0;
3343bf215546Sopenharmony_ci                cfg.thread_storage = batch->tls.gpu;
3344bf215546Sopenharmony_ci
3345bf215546Sopenharmony_ci                /* For all primitives but lines DRAW.flat_shading_vertex must
3346bf215546Sopenharmony_ci                 * be set to 0 and the provoking vertex is selected with the
3347bf215546Sopenharmony_ci                 * PRIMITIVE.first_provoking_vertex field.
3348bf215546Sopenharmony_ci                 */
3349bf215546Sopenharmony_ci                if (prim == PIPE_PRIM_LINES) {
3350bf215546Sopenharmony_ci                        /* The logic is inverted across arches. */
3351bf215546Sopenharmony_ci                        cfg.flat_shading_vertex = rast->flatshade_first
3352bf215546Sopenharmony_ci                                                ^ (PAN_ARCH <= 5);
3353bf215546Sopenharmony_ci                }
3354bf215546Sopenharmony_ci
3355bf215546Sopenharmony_ci                pan_emit_draw_descs(batch, &cfg, PIPE_SHADER_FRAGMENT);
3356bf215546Sopenharmony_ci#endif
3357bf215546Sopenharmony_ci        }
3358bf215546Sopenharmony_ci}
3359bf215546Sopenharmony_ci
3360bf215546Sopenharmony_ci#if PAN_ARCH >= 9
3361bf215546Sopenharmony_cistatic void
3362bf215546Sopenharmony_cipanfrost_emit_malloc_vertex(struct panfrost_batch *batch,
3363bf215546Sopenharmony_ci                            const struct pipe_draw_info *info,
3364bf215546Sopenharmony_ci                            const struct pipe_draw_start_count_bias *draw,
3365bf215546Sopenharmony_ci                            mali_ptr indices, bool secondary_shader,
3366bf215546Sopenharmony_ci                            void *job)
3367bf215546Sopenharmony_ci{
3368bf215546Sopenharmony_ci        struct panfrost_context *ctx = batch->ctx;
3369bf215546Sopenharmony_ci
3370bf215546Sopenharmony_ci        struct panfrost_shader_state *vs =
3371bf215546Sopenharmony_ci                panfrost_get_shader_state(ctx, PIPE_SHADER_VERTEX);
3372bf215546Sopenharmony_ci
3373bf215546Sopenharmony_ci        struct panfrost_shader_state *fs =
3374bf215546Sopenharmony_ci                panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
3375bf215546Sopenharmony_ci
3376bf215546Sopenharmony_ci        bool fs_required = panfrost_fs_required(fs, ctx->blend,
3377bf215546Sopenharmony_ci                                                &ctx->pipe_framebuffer,
3378bf215546Sopenharmony_ci                                                ctx->depth_stencil);
3379bf215546Sopenharmony_ci
3380bf215546Sopenharmony_ci        /* Varying shaders only feed data to the fragment shader, so if we omit
3381bf215546Sopenharmony_ci         * the fragment shader, we should omit the varying shader too.
3382bf215546Sopenharmony_ci         */
3383bf215546Sopenharmony_ci        secondary_shader &= fs_required;
3384bf215546Sopenharmony_ci
3385bf215546Sopenharmony_ci        panfrost_emit_primitive(ctx, info, draw, 0, secondary_shader,
3386bf215546Sopenharmony_ci                                pan_section_ptr(job, MALLOC_VERTEX_JOB, PRIMITIVE));
3387bf215546Sopenharmony_ci
3388bf215546Sopenharmony_ci        pan_section_pack(job, MALLOC_VERTEX_JOB, INSTANCE_COUNT, cfg) {
3389bf215546Sopenharmony_ci                cfg.count = info->instance_count;
3390bf215546Sopenharmony_ci        }
3391bf215546Sopenharmony_ci
3392bf215546Sopenharmony_ci        pan_section_pack(job, MALLOC_VERTEX_JOB, ALLOCATION, cfg) {
3393bf215546Sopenharmony_ci                if (secondary_shader) {
3394bf215546Sopenharmony_ci                        unsigned v = vs->info.varyings.output_count;
3395bf215546Sopenharmony_ci                        unsigned f = fs->info.varyings.input_count;
3396bf215546Sopenharmony_ci                        unsigned slots = MAX2(v, f);
3397bf215546Sopenharmony_ci                        slots += util_bitcount(fs->key.fixed_varying_mask);
3398bf215546Sopenharmony_ci                        unsigned size = slots * 16;
3399bf215546Sopenharmony_ci
3400bf215546Sopenharmony_ci                        /* Assumes 16 byte slots. We could do better. */
3401bf215546Sopenharmony_ci                        cfg.vertex_packet_stride = size + 16;
3402bf215546Sopenharmony_ci                        cfg.vertex_attribute_stride = size;
3403bf215546Sopenharmony_ci                } else {
3404bf215546Sopenharmony_ci                        /* Hardware requirement for "no varyings" */
3405bf215546Sopenharmony_ci                        cfg.vertex_packet_stride = 16;
3406bf215546Sopenharmony_ci                        cfg.vertex_attribute_stride = 0;
3407bf215546Sopenharmony_ci                }
3408bf215546Sopenharmony_ci        }
3409bf215546Sopenharmony_ci
3410bf215546Sopenharmony_ci        pan_section_pack(job, MALLOC_VERTEX_JOB, TILER, cfg) {
3411bf215546Sopenharmony_ci                cfg.address = panfrost_batch_get_bifrost_tiler(batch, ~0);
3412bf215546Sopenharmony_ci        }
3413bf215546Sopenharmony_ci
3414bf215546Sopenharmony_ci        STATIC_ASSERT(sizeof(batch->scissor) == pan_size(SCISSOR));
3415bf215546Sopenharmony_ci        memcpy(pan_section_ptr(job, MALLOC_VERTEX_JOB, SCISSOR),
3416bf215546Sopenharmony_ci               &batch->scissor, pan_size(SCISSOR));
3417bf215546Sopenharmony_ci
3418bf215546Sopenharmony_ci        panfrost_emit_primitive_size(ctx, info->mode == PIPE_PRIM_POINTS, 0,
3419bf215546Sopenharmony_ci                                     pan_section_ptr(job, MALLOC_VERTEX_JOB, PRIMITIVE_SIZE));
3420bf215546Sopenharmony_ci
3421bf215546Sopenharmony_ci        pan_section_pack(job, MALLOC_VERTEX_JOB, INDICES, cfg) {
3422bf215546Sopenharmony_ci                cfg.address = indices;
3423bf215546Sopenharmony_ci        }
3424bf215546Sopenharmony_ci
3425bf215546Sopenharmony_ci        panfrost_emit_draw(pan_section_ptr(job, MALLOC_VERTEX_JOB, DRAW),
3426bf215546Sopenharmony_ci                           batch, fs_required, u_reduced_prim(info->mode), 0, 0, 0);
3427bf215546Sopenharmony_ci
3428bf215546Sopenharmony_ci        pan_section_pack(job, MALLOC_VERTEX_JOB, POSITION, cfg) {
3429bf215546Sopenharmony_ci                /* IDVS/points vertex shader */
3430bf215546Sopenharmony_ci                mali_ptr vs_ptr = batch->rsd[PIPE_SHADER_VERTEX];
3431bf215546Sopenharmony_ci
3432bf215546Sopenharmony_ci                /* IDVS/triangle vertex shader */
3433bf215546Sopenharmony_ci                if (vs_ptr && info->mode != PIPE_PRIM_POINTS)
3434bf215546Sopenharmony_ci                        vs_ptr += pan_size(SHADER_PROGRAM);
3435bf215546Sopenharmony_ci
3436bf215546Sopenharmony_ci                panfrost_emit_shader(batch, &cfg, PIPE_SHADER_VERTEX, vs_ptr,
3437bf215546Sopenharmony_ci                                     batch->tls.gpu);
3438bf215546Sopenharmony_ci        }
3439bf215546Sopenharmony_ci
3440bf215546Sopenharmony_ci        pan_section_pack(job, MALLOC_VERTEX_JOB, VARYING, cfg) {
3441bf215546Sopenharmony_ci                /* If a varying shader is used, we configure it with the same
3442bf215546Sopenharmony_ci                 * state as the position shader for backwards compatible
3443bf215546Sopenharmony_ci                 * behaviour with Bifrost. This could be optimized.
3444bf215546Sopenharmony_ci                 */
3445bf215546Sopenharmony_ci                if (!secondary_shader) continue;
3446bf215546Sopenharmony_ci
3447bf215546Sopenharmony_ci                mali_ptr ptr = batch->rsd[PIPE_SHADER_VERTEX] +
3448bf215546Sopenharmony_ci                                (2 * pan_size(SHADER_PROGRAM));
3449bf215546Sopenharmony_ci
3450bf215546Sopenharmony_ci                panfrost_emit_shader(batch, &cfg, PIPE_SHADER_VERTEX,
3451bf215546Sopenharmony_ci                             ptr, batch->tls.gpu);
3452bf215546Sopenharmony_ci        }
3453bf215546Sopenharmony_ci}
3454bf215546Sopenharmony_ci#endif
3455bf215546Sopenharmony_ci
3456bf215546Sopenharmony_ci#if PAN_ARCH <= 7
3457bf215546Sopenharmony_cistatic void
3458bf215546Sopenharmony_cipanfrost_draw_emit_tiler(struct panfrost_batch *batch,
3459bf215546Sopenharmony_ci                         const struct pipe_draw_info *info,
3460bf215546Sopenharmony_ci                         const struct pipe_draw_start_count_bias *draw,
3461bf215546Sopenharmony_ci                         void *invocation_template,
3462bf215546Sopenharmony_ci                         mali_ptr indices, mali_ptr fs_vary, mali_ptr varyings,
3463bf215546Sopenharmony_ci                         mali_ptr pos, mali_ptr psiz, bool secondary_shader,
3464bf215546Sopenharmony_ci                         void *job)
3465bf215546Sopenharmony_ci{
3466bf215546Sopenharmony_ci        struct panfrost_context *ctx = batch->ctx;
3467bf215546Sopenharmony_ci
3468bf215546Sopenharmony_ci        void *section = pan_section_ptr(job, TILER_JOB, INVOCATION);
3469bf215546Sopenharmony_ci        memcpy(section, invocation_template, pan_size(INVOCATION));
3470bf215546Sopenharmony_ci
3471bf215546Sopenharmony_ci        panfrost_emit_primitive(ctx, info, draw, indices, secondary_shader,
3472bf215546Sopenharmony_ci                                pan_section_ptr(job, TILER_JOB, PRIMITIVE));
3473bf215546Sopenharmony_ci
3474bf215546Sopenharmony_ci        void *prim_size = pan_section_ptr(job, TILER_JOB, PRIMITIVE_SIZE);
3475bf215546Sopenharmony_ci        enum pipe_prim_type prim = u_reduced_prim(info->mode);
3476bf215546Sopenharmony_ci
3477bf215546Sopenharmony_ci#if PAN_ARCH >= 6
3478bf215546Sopenharmony_ci        pan_section_pack(job, TILER_JOB, TILER, cfg) {
3479bf215546Sopenharmony_ci                cfg.address = panfrost_batch_get_bifrost_tiler(batch, ~0);
3480bf215546Sopenharmony_ci        }
3481bf215546Sopenharmony_ci
3482bf215546Sopenharmony_ci        pan_section_pack(job, TILER_JOB, PADDING, cfg);
3483bf215546Sopenharmony_ci#endif
3484bf215546Sopenharmony_ci
3485bf215546Sopenharmony_ci        panfrost_emit_draw(pan_section_ptr(job, TILER_JOB, DRAW),
3486bf215546Sopenharmony_ci                           batch, true, prim, pos, fs_vary, varyings);
3487bf215546Sopenharmony_ci
3488bf215546Sopenharmony_ci        panfrost_emit_primitive_size(ctx, prim == PIPE_PRIM_POINTS, psiz, prim_size);
3489bf215546Sopenharmony_ci}
3490bf215546Sopenharmony_ci#endif
3491bf215546Sopenharmony_ci
3492bf215546Sopenharmony_cistatic void
3493bf215546Sopenharmony_cipanfrost_launch_xfb(struct panfrost_batch *batch,
3494bf215546Sopenharmony_ci                    const struct pipe_draw_info *info,
3495bf215546Sopenharmony_ci                    mali_ptr attribs, mali_ptr attrib_bufs,
3496bf215546Sopenharmony_ci                    unsigned count)
3497bf215546Sopenharmony_ci{
3498bf215546Sopenharmony_ci        struct panfrost_context *ctx = batch->ctx;
3499bf215546Sopenharmony_ci
3500bf215546Sopenharmony_ci        struct panfrost_ptr t =
3501bf215546Sopenharmony_ci                pan_pool_alloc_desc(&batch->pool.base, COMPUTE_JOB);
3502bf215546Sopenharmony_ci
3503bf215546Sopenharmony_ci        /* Nothing to do */
3504bf215546Sopenharmony_ci        if (batch->ctx->streamout.num_targets == 0)
3505bf215546Sopenharmony_ci                return;
3506bf215546Sopenharmony_ci
3507bf215546Sopenharmony_ci        /* TODO: XFB with index buffers */
3508bf215546Sopenharmony_ci        //assert(info->index_size == 0);
3509bf215546Sopenharmony_ci        u_trim_pipe_prim(info->mode, &count);
3510bf215546Sopenharmony_ci
3511bf215546Sopenharmony_ci        if (count == 0)
3512bf215546Sopenharmony_ci                return;
3513bf215546Sopenharmony_ci
3514bf215546Sopenharmony_ci        struct panfrost_shader_state *vs = panfrost_get_shader_state(ctx, PIPE_SHADER_VERTEX);
3515bf215546Sopenharmony_ci        struct panfrost_shader_variants v = { .variants = vs->xfb };
3516bf215546Sopenharmony_ci
3517bf215546Sopenharmony_ci        vs->xfb->stream_output = vs->stream_output;
3518bf215546Sopenharmony_ci
3519bf215546Sopenharmony_ci        struct panfrost_shader_variants *saved_vs = ctx->shader[PIPE_SHADER_VERTEX];
3520bf215546Sopenharmony_ci        mali_ptr saved_rsd = batch->rsd[PIPE_SHADER_VERTEX];
3521bf215546Sopenharmony_ci        mali_ptr saved_ubo = batch->uniform_buffers[PIPE_SHADER_VERTEX];
3522bf215546Sopenharmony_ci        mali_ptr saved_push = batch->push_uniforms[PIPE_SHADER_VERTEX];
3523bf215546Sopenharmony_ci
3524bf215546Sopenharmony_ci        ctx->shader[PIPE_SHADER_VERTEX] = &v;
3525bf215546Sopenharmony_ci        batch->rsd[PIPE_SHADER_VERTEX] = panfrost_emit_compute_shader_meta(batch, PIPE_SHADER_VERTEX);
3526bf215546Sopenharmony_ci
3527bf215546Sopenharmony_ci#if PAN_ARCH >= 9
3528bf215546Sopenharmony_ci        pan_section_pack(t.cpu, COMPUTE_JOB, PAYLOAD, cfg) {
3529bf215546Sopenharmony_ci                cfg.workgroup_size_x = 1;
3530bf215546Sopenharmony_ci                cfg.workgroup_size_y = 1;
3531bf215546Sopenharmony_ci                cfg.workgroup_size_z = 1;
3532bf215546Sopenharmony_ci
3533bf215546Sopenharmony_ci                cfg.workgroup_count_x = count;
3534bf215546Sopenharmony_ci                cfg.workgroup_count_y = info->instance_count;
3535bf215546Sopenharmony_ci                cfg.workgroup_count_z = 1;
3536bf215546Sopenharmony_ci
3537bf215546Sopenharmony_ci                panfrost_emit_shader(batch, &cfg.compute, PIPE_SHADER_VERTEX,
3538bf215546Sopenharmony_ci                                     batch->rsd[PIPE_SHADER_VERTEX],
3539bf215546Sopenharmony_ci                                     batch->tls.gpu);
3540bf215546Sopenharmony_ci
3541bf215546Sopenharmony_ci                /* TODO: Indexing. Also, this is a legacy feature... */
3542bf215546Sopenharmony_ci                cfg.compute.attribute_offset = batch->ctx->offset_start;
3543bf215546Sopenharmony_ci
3544bf215546Sopenharmony_ci                /* Transform feedback shaders do not use barriers or shared
3545bf215546Sopenharmony_ci                 * memory, so we may merge workgroups.
3546bf215546Sopenharmony_ci                 */
3547bf215546Sopenharmony_ci                cfg.allow_merging_workgroups = true;
3548bf215546Sopenharmony_ci                cfg.task_increment = 1;
3549bf215546Sopenharmony_ci                cfg.task_axis = MALI_TASK_AXIS_Z;
3550bf215546Sopenharmony_ci        }
3551bf215546Sopenharmony_ci#else
3552bf215546Sopenharmony_ci        struct mali_invocation_packed invocation;
3553bf215546Sopenharmony_ci
3554bf215546Sopenharmony_ci        panfrost_pack_work_groups_compute(&invocation,
3555bf215546Sopenharmony_ci                        1, count, info->instance_count,
3556bf215546Sopenharmony_ci                        1, 1, 1, PAN_ARCH <= 5, false);
3557bf215546Sopenharmony_ci
3558bf215546Sopenharmony_ci        batch->uniform_buffers[PIPE_SHADER_VERTEX] =
3559bf215546Sopenharmony_ci                panfrost_emit_const_buf(batch, PIPE_SHADER_VERTEX, NULL,
3560bf215546Sopenharmony_ci                                &batch->push_uniforms[PIPE_SHADER_VERTEX], NULL);
3561bf215546Sopenharmony_ci
3562bf215546Sopenharmony_ci        panfrost_draw_emit_vertex(batch, info, &invocation, 0, 0,
3563bf215546Sopenharmony_ci                                  attribs, attrib_bufs, t.cpu);
3564bf215546Sopenharmony_ci#endif
3565bf215546Sopenharmony_ci        enum mali_job_type job_type = MALI_JOB_TYPE_COMPUTE;
3566bf215546Sopenharmony_ci#if PAN_ARCH <= 5
3567bf215546Sopenharmony_ci        job_type = MALI_JOB_TYPE_VERTEX;
3568bf215546Sopenharmony_ci#endif
3569bf215546Sopenharmony_ci        panfrost_add_job(&batch->pool.base, &batch->scoreboard, job_type,
3570bf215546Sopenharmony_ci                         true, false, 0, 0, &t, false);
3571bf215546Sopenharmony_ci
3572bf215546Sopenharmony_ci        ctx->shader[PIPE_SHADER_VERTEX] = saved_vs;
3573bf215546Sopenharmony_ci        batch->rsd[PIPE_SHADER_VERTEX] = saved_rsd;
3574bf215546Sopenharmony_ci        batch->uniform_buffers[PIPE_SHADER_VERTEX] = saved_ubo;
3575bf215546Sopenharmony_ci        batch->push_uniforms[PIPE_SHADER_VERTEX] = saved_push;
3576bf215546Sopenharmony_ci}
3577bf215546Sopenharmony_ci
3578bf215546Sopenharmony_cistatic void
3579bf215546Sopenharmony_cipanfrost_direct_draw(struct panfrost_batch *batch,
3580bf215546Sopenharmony_ci                     const struct pipe_draw_info *info,
3581bf215546Sopenharmony_ci                     unsigned drawid_offset,
3582bf215546Sopenharmony_ci                     const struct pipe_draw_start_count_bias *draw)
3583bf215546Sopenharmony_ci{
3584bf215546Sopenharmony_ci        if (!draw->count || !info->instance_count)
3585bf215546Sopenharmony_ci                return;
3586bf215546Sopenharmony_ci
3587bf215546Sopenharmony_ci        struct panfrost_context *ctx = batch->ctx;
3588bf215546Sopenharmony_ci
3589bf215546Sopenharmony_ci        /* If we change whether we're drawing points, or whether point sprites
3590bf215546Sopenharmony_ci         * are enabled (specified in the rasterizer), we may need to rebind
3591bf215546Sopenharmony_ci         * shaders accordingly. This implicitly covers the case of rebinding
3592bf215546Sopenharmony_ci         * framebuffers, because all dirty flags are set there.
3593bf215546Sopenharmony_ci         */
3594bf215546Sopenharmony_ci        if ((ctx->dirty & PAN_DIRTY_RASTERIZER) ||
3595bf215546Sopenharmony_ci            ((ctx->active_prim == PIPE_PRIM_POINTS) ^
3596bf215546Sopenharmony_ci             (info->mode       == PIPE_PRIM_POINTS))) {
3597bf215546Sopenharmony_ci
3598bf215546Sopenharmony_ci                ctx->active_prim = info->mode;
3599bf215546Sopenharmony_ci                panfrost_update_shader_variant(ctx, PIPE_SHADER_FRAGMENT);
3600bf215546Sopenharmony_ci        }
3601bf215546Sopenharmony_ci
3602bf215546Sopenharmony_ci        /* Take into account a negative bias */
3603bf215546Sopenharmony_ci        ctx->indirect_draw = false;
3604bf215546Sopenharmony_ci        ctx->vertex_count = draw->count + (info->index_size ? abs(draw->index_bias) : 0);
3605bf215546Sopenharmony_ci        ctx->instance_count = info->instance_count;
3606bf215546Sopenharmony_ci        ctx->base_vertex = info->index_size ? draw->index_bias : 0;
3607bf215546Sopenharmony_ci        ctx->base_instance = info->start_instance;
3608bf215546Sopenharmony_ci        ctx->active_prim = info->mode;
3609bf215546Sopenharmony_ci        ctx->drawid = drawid_offset;
3610bf215546Sopenharmony_ci
3611bf215546Sopenharmony_ci        struct panfrost_shader_state *vs = panfrost_get_shader_state(ctx, PIPE_SHADER_VERTEX);
3612bf215546Sopenharmony_ci
3613bf215546Sopenharmony_ci        bool idvs = vs->info.vs.idvs;
3614bf215546Sopenharmony_ci        bool secondary_shader = vs->info.vs.secondary_enable;
3615bf215546Sopenharmony_ci
3616bf215546Sopenharmony_ci        UNUSED struct panfrost_ptr tiler, vertex;
3617bf215546Sopenharmony_ci
3618bf215546Sopenharmony_ci        if (idvs) {
3619bf215546Sopenharmony_ci#if PAN_ARCH >= 9
3620bf215546Sopenharmony_ci                tiler = pan_pool_alloc_desc(&batch->pool.base, MALLOC_VERTEX_JOB);
3621bf215546Sopenharmony_ci#elif PAN_ARCH >= 6
3622bf215546Sopenharmony_ci                tiler = pan_pool_alloc_desc(&batch->pool.base, INDEXED_VERTEX_JOB);
3623bf215546Sopenharmony_ci#else
3624bf215546Sopenharmony_ci                unreachable("IDVS is unsupported on Midgard");
3625bf215546Sopenharmony_ci#endif
3626bf215546Sopenharmony_ci        } else {
3627bf215546Sopenharmony_ci                vertex = pan_pool_alloc_desc(&batch->pool.base, COMPUTE_JOB);
3628bf215546Sopenharmony_ci                tiler = pan_pool_alloc_desc(&batch->pool.base, TILER_JOB);
3629bf215546Sopenharmony_ci        }
3630bf215546Sopenharmony_ci
3631bf215546Sopenharmony_ci        unsigned vertex_count = ctx->vertex_count;
3632bf215546Sopenharmony_ci
3633bf215546Sopenharmony_ci        unsigned min_index = 0, max_index = 0;
3634bf215546Sopenharmony_ci        mali_ptr indices = 0;
3635bf215546Sopenharmony_ci
3636bf215546Sopenharmony_ci        if (info->index_size && PAN_ARCH >= 9) {
3637bf215546Sopenharmony_ci                indices = panfrost_get_index_buffer(batch, info, draw);
3638bf215546Sopenharmony_ci        } else if (info->index_size) {
3639bf215546Sopenharmony_ci                indices = panfrost_get_index_buffer_bounded(batch, info, draw,
3640bf215546Sopenharmony_ci                                                            &min_index,
3641bf215546Sopenharmony_ci                                                            &max_index);
3642bf215546Sopenharmony_ci
3643bf215546Sopenharmony_ci                /* Use the corresponding values */
3644bf215546Sopenharmony_ci                vertex_count = max_index - min_index + 1;
3645bf215546Sopenharmony_ci                ctx->offset_start = min_index + draw->index_bias;
3646bf215546Sopenharmony_ci        } else {
3647bf215546Sopenharmony_ci                ctx->offset_start = draw->start;
3648bf215546Sopenharmony_ci        }
3649bf215546Sopenharmony_ci
3650bf215546Sopenharmony_ci        if (info->instance_count > 1) {
3651bf215546Sopenharmony_ci                unsigned count = vertex_count;
3652bf215546Sopenharmony_ci
3653bf215546Sopenharmony_ci                /* Index-Driven Vertex Shading requires different instances to
3654bf215546Sopenharmony_ci                 * have different cache lines for position results. Each vertex
3655bf215546Sopenharmony_ci                 * position is 16 bytes and the Mali cache line is 64 bytes, so
3656bf215546Sopenharmony_ci                 * the instance count must be aligned to 4 vertices.
3657bf215546Sopenharmony_ci                 */
3658bf215546Sopenharmony_ci                if (idvs)
3659bf215546Sopenharmony_ci                        count = ALIGN_POT(count, 4);
3660bf215546Sopenharmony_ci
3661bf215546Sopenharmony_ci                ctx->padded_count = panfrost_padded_vertex_count(count);
3662bf215546Sopenharmony_ci        } else
3663bf215546Sopenharmony_ci                ctx->padded_count = vertex_count;
3664bf215546Sopenharmony_ci
3665bf215546Sopenharmony_ci        panfrost_statistics_record(ctx, info, draw);
3666bf215546Sopenharmony_ci
3667bf215546Sopenharmony_ci#if PAN_ARCH <= 7
3668bf215546Sopenharmony_ci        struct mali_invocation_packed invocation;
3669bf215546Sopenharmony_ci        if (info->instance_count > 1) {
3670bf215546Sopenharmony_ci                panfrost_pack_work_groups_compute(&invocation,
3671bf215546Sopenharmony_ci                                                  1, vertex_count, info->instance_count,
3672bf215546Sopenharmony_ci                                                  1, 1, 1, true, false);
3673bf215546Sopenharmony_ci        } else {
3674bf215546Sopenharmony_ci                pan_pack(&invocation, INVOCATION, cfg) {
3675bf215546Sopenharmony_ci                        cfg.invocations = MALI_POSITIVE(vertex_count);
3676bf215546Sopenharmony_ci                        cfg.size_y_shift = 0;
3677bf215546Sopenharmony_ci                        cfg.size_z_shift = 0;
3678bf215546Sopenharmony_ci                        cfg.workgroups_x_shift = 0;
3679bf215546Sopenharmony_ci                        cfg.workgroups_y_shift = 0;
3680bf215546Sopenharmony_ci                        cfg.workgroups_z_shift = 32;
3681bf215546Sopenharmony_ci                        cfg.thread_group_split = MALI_SPLIT_MIN_EFFICIENT;
3682bf215546Sopenharmony_ci                }
3683bf215546Sopenharmony_ci        }
3684bf215546Sopenharmony_ci
3685bf215546Sopenharmony_ci        /* Emit all sort of descriptors. */
3686bf215546Sopenharmony_ci        mali_ptr varyings = 0, vs_vary = 0, fs_vary = 0, pos = 0, psiz = 0;
3687bf215546Sopenharmony_ci
3688bf215546Sopenharmony_ci        panfrost_emit_varying_descriptor(batch,
3689bf215546Sopenharmony_ci                                         ctx->padded_count *
3690bf215546Sopenharmony_ci                                         ctx->instance_count,
3691bf215546Sopenharmony_ci                                         &vs_vary, &fs_vary, &varyings,
3692bf215546Sopenharmony_ci                                         NULL, &pos, &psiz,
3693bf215546Sopenharmony_ci                                         info->mode == PIPE_PRIM_POINTS);
3694bf215546Sopenharmony_ci
3695bf215546Sopenharmony_ci        mali_ptr attribs, attrib_bufs;
3696bf215546Sopenharmony_ci        attribs = panfrost_emit_vertex_data(batch, &attrib_bufs);
3697bf215546Sopenharmony_ci#endif
3698bf215546Sopenharmony_ci
3699bf215546Sopenharmony_ci        panfrost_update_state_3d(batch);
3700bf215546Sopenharmony_ci        panfrost_update_shader_state(batch, PIPE_SHADER_VERTEX);
3701bf215546Sopenharmony_ci        panfrost_update_shader_state(batch, PIPE_SHADER_FRAGMENT);
3702bf215546Sopenharmony_ci        panfrost_clean_state_3d(ctx);
3703bf215546Sopenharmony_ci
3704bf215546Sopenharmony_ci        if (vs->xfb) {
3705bf215546Sopenharmony_ci#if PAN_ARCH >= 9
3706bf215546Sopenharmony_ci                mali_ptr attribs = 0, attrib_bufs = 0;
3707bf215546Sopenharmony_ci#endif
3708bf215546Sopenharmony_ci                panfrost_launch_xfb(batch, info, attribs, attrib_bufs, draw->count);
3709bf215546Sopenharmony_ci        }
3710bf215546Sopenharmony_ci
3711bf215546Sopenharmony_ci        /* Increment transform feedback offsets */
3712bf215546Sopenharmony_ci        panfrost_update_streamout_offsets(ctx);
3713bf215546Sopenharmony_ci
3714bf215546Sopenharmony_ci        /* Any side effects must be handled by the XFB shader, so we only need
3715bf215546Sopenharmony_ci         * to run vertex shaders if we need rasterization.
3716bf215546Sopenharmony_ci         */
3717bf215546Sopenharmony_ci        if (panfrost_batch_skip_rasterization(batch))
3718bf215546Sopenharmony_ci                return;
3719bf215546Sopenharmony_ci
3720bf215546Sopenharmony_ci#if PAN_ARCH >= 9
3721bf215546Sopenharmony_ci        assert(idvs && "Memory allocated IDVS required on Valhall");
3722bf215546Sopenharmony_ci
3723bf215546Sopenharmony_ci        panfrost_emit_malloc_vertex(batch, info, draw, indices,
3724bf215546Sopenharmony_ci                                    secondary_shader, tiler.cpu);
3725bf215546Sopenharmony_ci
3726bf215546Sopenharmony_ci        panfrost_add_job(&batch->pool.base, &batch->scoreboard,
3727bf215546Sopenharmony_ci                         MALI_JOB_TYPE_MALLOC_VERTEX, false, false, 0,
3728bf215546Sopenharmony_ci                         0, &tiler, false);
3729bf215546Sopenharmony_ci#else
3730bf215546Sopenharmony_ci        /* Fire off the draw itself */
3731bf215546Sopenharmony_ci        panfrost_draw_emit_tiler(batch, info, draw, &invocation, indices,
3732bf215546Sopenharmony_ci                                 fs_vary, varyings, pos, psiz, secondary_shader,
3733bf215546Sopenharmony_ci                                 tiler.cpu);
3734bf215546Sopenharmony_ci        if (idvs) {
3735bf215546Sopenharmony_ci#if PAN_ARCH >= 6
3736bf215546Sopenharmony_ci                panfrost_draw_emit_vertex_section(batch,
3737bf215546Sopenharmony_ci                                  vs_vary, varyings,
3738bf215546Sopenharmony_ci                                  attribs, attrib_bufs,
3739bf215546Sopenharmony_ci                                  pan_section_ptr(tiler.cpu, INDEXED_VERTEX_JOB, VERTEX_DRAW));
3740bf215546Sopenharmony_ci
3741bf215546Sopenharmony_ci                panfrost_add_job(&batch->pool.base, &batch->scoreboard,
3742bf215546Sopenharmony_ci                                 MALI_JOB_TYPE_INDEXED_VERTEX, false, false,
3743bf215546Sopenharmony_ci                                 0, 0, &tiler, false);
3744bf215546Sopenharmony_ci#endif
3745bf215546Sopenharmony_ci        } else {
3746bf215546Sopenharmony_ci                panfrost_draw_emit_vertex(batch, info, &invocation,
3747bf215546Sopenharmony_ci                                          vs_vary, varyings, attribs, attrib_bufs, vertex.cpu);
3748bf215546Sopenharmony_ci                panfrost_emit_vertex_tiler_jobs(batch, &vertex, &tiler);
3749bf215546Sopenharmony_ci        }
3750bf215546Sopenharmony_ci#endif
3751bf215546Sopenharmony_ci}
3752bf215546Sopenharmony_ci
3753bf215546Sopenharmony_ci#if PAN_GPU_INDIRECTS
3754bf215546Sopenharmony_cistatic void
3755bf215546Sopenharmony_cipanfrost_indirect_draw(struct panfrost_batch *batch,
3756bf215546Sopenharmony_ci                       const struct pipe_draw_info *info,
3757bf215546Sopenharmony_ci                       unsigned drawid_offset,
3758bf215546Sopenharmony_ci                       const struct pipe_draw_indirect_info *indirect,
3759bf215546Sopenharmony_ci                       const struct pipe_draw_start_count_bias *draw)
3760bf215546Sopenharmony_ci{
3761bf215546Sopenharmony_ci        /* Indirect draw count and multi-draw not supported. */
3762bf215546Sopenharmony_ci        assert(indirect->draw_count == 1 && !indirect->indirect_draw_count);
3763bf215546Sopenharmony_ci
3764bf215546Sopenharmony_ci        struct panfrost_context *ctx = batch->ctx;
3765bf215546Sopenharmony_ci        struct panfrost_device *dev = pan_device(ctx->base.screen);
3766bf215546Sopenharmony_ci
3767bf215546Sopenharmony_ci        /* TODO: update statistics (see panfrost_statistics_record()) */
3768bf215546Sopenharmony_ci        /* TODO: Increment transform feedback offsets */
3769bf215546Sopenharmony_ci        assert(ctx->streamout.num_targets == 0);
3770bf215546Sopenharmony_ci
3771bf215546Sopenharmony_ci        ctx->active_prim = info->mode;
3772bf215546Sopenharmony_ci        ctx->drawid = drawid_offset;
3773bf215546Sopenharmony_ci        ctx->indirect_draw = true;
3774bf215546Sopenharmony_ci
3775bf215546Sopenharmony_ci        struct panfrost_shader_state *vs = panfrost_get_shader_state(ctx, PIPE_SHADER_VERTEX);
3776bf215546Sopenharmony_ci
3777bf215546Sopenharmony_ci        bool idvs = vs->info.vs.idvs;
3778bf215546Sopenharmony_ci        bool secondary_shader = vs->info.vs.secondary_enable;
3779bf215546Sopenharmony_ci
3780bf215546Sopenharmony_ci        struct panfrost_ptr tiler = { 0 }, vertex = { 0 };
3781bf215546Sopenharmony_ci
3782bf215546Sopenharmony_ci        if (idvs) {
3783bf215546Sopenharmony_ci#if PAN_ARCH >= 6
3784bf215546Sopenharmony_ci                tiler = pan_pool_alloc_desc(&batch->pool.base, INDEXED_VERTEX_JOB);
3785bf215546Sopenharmony_ci#else
3786bf215546Sopenharmony_ci                unreachable("IDVS is unsupported on Midgard");
3787bf215546Sopenharmony_ci#endif
3788bf215546Sopenharmony_ci        } else {
3789bf215546Sopenharmony_ci                vertex = pan_pool_alloc_desc(&batch->pool.base, COMPUTE_JOB);
3790bf215546Sopenharmony_ci                tiler = pan_pool_alloc_desc(&batch->pool.base, TILER_JOB);
3791bf215546Sopenharmony_ci        }
3792bf215546Sopenharmony_ci
3793bf215546Sopenharmony_ci        struct panfrost_bo *index_buf = NULL;
3794bf215546Sopenharmony_ci
3795bf215546Sopenharmony_ci        if (info->index_size) {
3796bf215546Sopenharmony_ci                assert(!info->has_user_indices);
3797bf215546Sopenharmony_ci                struct panfrost_resource *rsrc = pan_resource(info->index.resource);
3798bf215546Sopenharmony_ci                index_buf = rsrc->image.data.bo;
3799bf215546Sopenharmony_ci                panfrost_batch_read_rsrc(batch, rsrc, PIPE_SHADER_VERTEX);
3800bf215546Sopenharmony_ci        }
3801bf215546Sopenharmony_ci
3802bf215546Sopenharmony_ci        mali_ptr varyings = 0, vs_vary = 0, fs_vary = 0, pos = 0, psiz = 0;
3803bf215546Sopenharmony_ci        unsigned varying_buf_count;
3804bf215546Sopenharmony_ci
3805bf215546Sopenharmony_ci        /* We want to create templates, set all count fields to 0 to reflect
3806bf215546Sopenharmony_ci         * that.
3807bf215546Sopenharmony_ci         */
3808bf215546Sopenharmony_ci        ctx->instance_count = ctx->vertex_count = ctx->padded_count = 0;
3809bf215546Sopenharmony_ci        ctx->offset_start = 0;
3810bf215546Sopenharmony_ci
3811bf215546Sopenharmony_ci        /* Set the {first,base}_vertex sysvals to NULL. Will be updated if the
3812bf215546Sopenharmony_ci         * vertex shader uses gl_VertexID or gl_BaseVertex.
3813bf215546Sopenharmony_ci         */
3814bf215546Sopenharmony_ci        ctx->first_vertex_sysval_ptr = 0;
3815bf215546Sopenharmony_ci        ctx->base_vertex_sysval_ptr = 0;
3816bf215546Sopenharmony_ci        ctx->base_instance_sysval_ptr = 0;
3817bf215546Sopenharmony_ci
3818bf215546Sopenharmony_ci        panfrost_update_state_3d(batch);
3819bf215546Sopenharmony_ci        panfrost_update_shader_state(batch, PIPE_SHADER_VERTEX);
3820bf215546Sopenharmony_ci        panfrost_update_shader_state(batch, PIPE_SHADER_FRAGMENT);
3821bf215546Sopenharmony_ci        panfrost_clean_state_3d(ctx);
3822bf215546Sopenharmony_ci
3823bf215546Sopenharmony_ci        bool point_coord_replace = (info->mode == PIPE_PRIM_POINTS);
3824bf215546Sopenharmony_ci
3825bf215546Sopenharmony_ci        panfrost_emit_varying_descriptor(batch, 0,
3826bf215546Sopenharmony_ci                                         &vs_vary, &fs_vary, &varyings,
3827bf215546Sopenharmony_ci                                         &varying_buf_count, &pos, &psiz,
3828bf215546Sopenharmony_ci                                         point_coord_replace);
3829bf215546Sopenharmony_ci
3830bf215546Sopenharmony_ci        mali_ptr attribs, attrib_bufs;
3831bf215546Sopenharmony_ci        attribs = panfrost_emit_vertex_data(batch, &attrib_bufs);
3832bf215546Sopenharmony_ci
3833bf215546Sopenharmony_ci        /* Zero-ed invocation, the compute job will update it. */
3834bf215546Sopenharmony_ci        static struct mali_invocation_packed invocation;
3835bf215546Sopenharmony_ci
3836bf215546Sopenharmony_ci        /* Fire off the draw itself */
3837bf215546Sopenharmony_ci        panfrost_draw_emit_tiler(batch, info, draw, &invocation,
3838bf215546Sopenharmony_ci                                 index_buf ? index_buf->ptr.gpu : 0,
3839bf215546Sopenharmony_ci                                 fs_vary, varyings, pos, psiz, secondary_shader,
3840bf215546Sopenharmony_ci                                 tiler.cpu);
3841bf215546Sopenharmony_ci        if (idvs) {
3842bf215546Sopenharmony_ci#if PAN_ARCH >= 6
3843bf215546Sopenharmony_ci                panfrost_draw_emit_vertex_section(batch,
3844bf215546Sopenharmony_ci                                  vs_vary, varyings,
3845bf215546Sopenharmony_ci                                  attribs, attrib_bufs,
3846bf215546Sopenharmony_ci                                  pan_section_ptr(tiler.cpu, INDEXED_VERTEX_JOB, VERTEX_DRAW));
3847bf215546Sopenharmony_ci#endif
3848bf215546Sopenharmony_ci        } else {
3849bf215546Sopenharmony_ci                panfrost_draw_emit_vertex(batch, info, &invocation,
3850bf215546Sopenharmony_ci                                          vs_vary, varyings, attribs, attrib_bufs, vertex.cpu);
3851bf215546Sopenharmony_ci        }
3852bf215546Sopenharmony_ci
3853bf215546Sopenharmony_ci        /* Add the varying heap BO to the batch if we're allocating varyings. */
3854bf215546Sopenharmony_ci        if (varyings) {
3855bf215546Sopenharmony_ci                panfrost_batch_add_bo(batch,
3856bf215546Sopenharmony_ci                                      dev->indirect_draw_shaders.varying_heap,
3857bf215546Sopenharmony_ci                                      PIPE_SHADER_VERTEX);
3858bf215546Sopenharmony_ci        }
3859bf215546Sopenharmony_ci
3860bf215546Sopenharmony_ci        assert(indirect->buffer);
3861bf215546Sopenharmony_ci
3862bf215546Sopenharmony_ci        struct panfrost_resource *draw_buf = pan_resource(indirect->buffer);
3863bf215546Sopenharmony_ci
3864bf215546Sopenharmony_ci        /* Don't count images: those attributes don't need to be patched. */
3865bf215546Sopenharmony_ci        unsigned attrib_count =
3866bf215546Sopenharmony_ci                vs->info.attribute_count -
3867bf215546Sopenharmony_ci                util_bitcount(ctx->image_mask[PIPE_SHADER_VERTEX]);
3868bf215546Sopenharmony_ci
3869bf215546Sopenharmony_ci        panfrost_batch_read_rsrc(batch, draw_buf, PIPE_SHADER_VERTEX);
3870bf215546Sopenharmony_ci
3871bf215546Sopenharmony_ci        struct pan_indirect_draw_info draw_info = {
3872bf215546Sopenharmony_ci                .last_indirect_draw = batch->indirect_draw_job_id,
3873bf215546Sopenharmony_ci                .draw_buf = draw_buf->image.data.bo->ptr.gpu + indirect->offset,
3874bf215546Sopenharmony_ci                .index_buf = index_buf ? index_buf->ptr.gpu : 0,
3875bf215546Sopenharmony_ci                .first_vertex_sysval = ctx->first_vertex_sysval_ptr,
3876bf215546Sopenharmony_ci                .base_vertex_sysval = ctx->base_vertex_sysval_ptr,
3877bf215546Sopenharmony_ci                .base_instance_sysval = ctx->base_instance_sysval_ptr,
3878bf215546Sopenharmony_ci                .vertex_job = vertex.gpu,
3879bf215546Sopenharmony_ci                .tiler_job = tiler.gpu,
3880bf215546Sopenharmony_ci                .attrib_bufs = attrib_bufs,
3881bf215546Sopenharmony_ci                .attribs = attribs,
3882bf215546Sopenharmony_ci                .attrib_count = attrib_count,
3883bf215546Sopenharmony_ci                .varying_bufs = varyings,
3884bf215546Sopenharmony_ci                .index_size = info->index_size,
3885bf215546Sopenharmony_ci        };
3886bf215546Sopenharmony_ci
3887bf215546Sopenharmony_ci        if (panfrost_writes_point_size(ctx))
3888bf215546Sopenharmony_ci                draw_info.flags |= PAN_INDIRECT_DRAW_UPDATE_PRIM_SIZE;
3889bf215546Sopenharmony_ci
3890bf215546Sopenharmony_ci        if (vs->info.vs.writes_point_size)
3891bf215546Sopenharmony_ci                draw_info.flags |= PAN_INDIRECT_DRAW_HAS_PSIZ;
3892bf215546Sopenharmony_ci
3893bf215546Sopenharmony_ci        if (idvs)
3894bf215546Sopenharmony_ci                draw_info.flags |= PAN_INDIRECT_DRAW_IDVS;
3895bf215546Sopenharmony_ci
3896bf215546Sopenharmony_ci        if (info->primitive_restart) {
3897bf215546Sopenharmony_ci                draw_info.restart_index = info->restart_index;
3898bf215546Sopenharmony_ci                draw_info.flags |= PAN_INDIRECT_DRAW_PRIMITIVE_RESTART;
3899bf215546Sopenharmony_ci        }
3900bf215546Sopenharmony_ci
3901bf215546Sopenharmony_ci        batch->indirect_draw_job_id =
3902bf215546Sopenharmony_ci                GENX(panfrost_emit_indirect_draw)(&batch->pool.base,
3903bf215546Sopenharmony_ci                                                  &batch->scoreboard,
3904bf215546Sopenharmony_ci                                                  &draw_info,
3905bf215546Sopenharmony_ci                                                  &batch->indirect_draw_ctx);
3906bf215546Sopenharmony_ci
3907bf215546Sopenharmony_ci        if (idvs) {
3908bf215546Sopenharmony_ci                panfrost_add_job(&batch->pool.base, &batch->scoreboard,
3909bf215546Sopenharmony_ci                                 MALI_JOB_TYPE_INDEXED_VERTEX, false, false,
3910bf215546Sopenharmony_ci                                 0, 0, &tiler, false);
3911bf215546Sopenharmony_ci        } else {
3912bf215546Sopenharmony_ci                panfrost_emit_vertex_tiler_jobs(batch, &vertex, &tiler);
3913bf215546Sopenharmony_ci        }
3914bf215546Sopenharmony_ci}
3915bf215546Sopenharmony_ci#endif
3916bf215546Sopenharmony_ci
3917bf215546Sopenharmony_cistatic bool
3918bf215546Sopenharmony_cipanfrost_compatible_batch_state(struct panfrost_batch *batch,
3919bf215546Sopenharmony_ci                                bool points)
3920bf215546Sopenharmony_ci{
3921bf215546Sopenharmony_ci        /* Only applies on Valhall */
3922bf215546Sopenharmony_ci        if (PAN_ARCH < 9)
3923bf215546Sopenharmony_ci                return true;
3924bf215546Sopenharmony_ci
3925bf215546Sopenharmony_ci        struct panfrost_context *ctx = batch->ctx;
3926bf215546Sopenharmony_ci        struct pipe_rasterizer_state *rast = &ctx->rasterizer->base;
3927bf215546Sopenharmony_ci
3928bf215546Sopenharmony_ci        bool coord = (rast->sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT);
3929bf215546Sopenharmony_ci        bool first = rast->flatshade_first;
3930bf215546Sopenharmony_ci
3931bf215546Sopenharmony_ci        /* gl_PointCoord orientation only matters when drawing points, but
3932bf215546Sopenharmony_ci         * provoking vertex doesn't matter for points.
3933bf215546Sopenharmony_ci         */
3934bf215546Sopenharmony_ci        if (points)
3935bf215546Sopenharmony_ci                return pan_tristate_set(&batch->sprite_coord_origin, coord);
3936bf215546Sopenharmony_ci        else
3937bf215546Sopenharmony_ci                return pan_tristate_set(&batch->first_provoking_vertex, first);
3938bf215546Sopenharmony_ci}
3939bf215546Sopenharmony_ci
3940bf215546Sopenharmony_cistatic void
3941bf215546Sopenharmony_cipanfrost_draw_vbo(struct pipe_context *pipe,
3942bf215546Sopenharmony_ci                  const struct pipe_draw_info *info,
3943bf215546Sopenharmony_ci                  unsigned drawid_offset,
3944bf215546Sopenharmony_ci                  const struct pipe_draw_indirect_info *indirect,
3945bf215546Sopenharmony_ci                  const struct pipe_draw_start_count_bias *draws,
3946bf215546Sopenharmony_ci                  unsigned num_draws)
3947bf215546Sopenharmony_ci{
3948bf215546Sopenharmony_ci        struct panfrost_context *ctx = pan_context(pipe);
3949bf215546Sopenharmony_ci        struct panfrost_device *dev = pan_device(pipe->screen);
3950bf215546Sopenharmony_ci
3951bf215546Sopenharmony_ci        if (!panfrost_render_condition_check(ctx))
3952bf215546Sopenharmony_ci                return;
3953bf215546Sopenharmony_ci
3954bf215546Sopenharmony_ci        /* Emulate indirect draws unless we're using the experimental path */
3955bf215546Sopenharmony_ci        if ((!(dev->debug & PAN_DBG_INDIRECT) || !PAN_GPU_INDIRECTS) && indirect && indirect->buffer) {
3956bf215546Sopenharmony_ci                assert(num_draws == 1);
3957bf215546Sopenharmony_ci                util_draw_indirect(pipe, info, indirect);
3958bf215546Sopenharmony_ci                return;
3959bf215546Sopenharmony_ci        }
3960bf215546Sopenharmony_ci
3961bf215546Sopenharmony_ci        /* Do some common setup */
3962bf215546Sopenharmony_ci        struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
3963bf215546Sopenharmony_ci
3964bf215546Sopenharmony_ci        /* Don't add too many jobs to a single batch. Hardware has a hard limit
3965bf215546Sopenharmony_ci         * of 65536 jobs, but we choose a smaller soft limit (arbitrary) to
3966bf215546Sopenharmony_ci         * avoid the risk of timeouts. This might not be a good idea. */
3967bf215546Sopenharmony_ci        if (unlikely(batch->scoreboard.job_index > 10000))
3968bf215546Sopenharmony_ci                batch = panfrost_get_fresh_batch_for_fbo(ctx, "Too many draws");
3969bf215546Sopenharmony_ci
3970bf215546Sopenharmony_ci        bool points = (info->mode == PIPE_PRIM_POINTS);
3971bf215546Sopenharmony_ci
3972bf215546Sopenharmony_ci        if (unlikely(!panfrost_compatible_batch_state(batch, points))) {
3973bf215546Sopenharmony_ci                batch = panfrost_get_fresh_batch_for_fbo(ctx, "State change");
3974bf215546Sopenharmony_ci
3975bf215546Sopenharmony_ci                ASSERTED bool succ = panfrost_compatible_batch_state(batch, points);
3976bf215546Sopenharmony_ci                assert(succ && "must be able to set state for a fresh batch");
3977bf215546Sopenharmony_ci        }
3978bf215546Sopenharmony_ci
3979bf215546Sopenharmony_ci        /* panfrost_batch_skip_rasterization reads
3980bf215546Sopenharmony_ci         * batch->scissor_culls_everything, which is set by
3981bf215546Sopenharmony_ci         * panfrost_emit_viewport, so call that first.
3982bf215546Sopenharmony_ci         */
3983bf215546Sopenharmony_ci        if (ctx->dirty & (PAN_DIRTY_VIEWPORT | PAN_DIRTY_SCISSOR))
3984bf215546Sopenharmony_ci                batch->viewport = panfrost_emit_viewport(batch);
3985bf215546Sopenharmony_ci
3986bf215546Sopenharmony_ci        /* Mark everything dirty when debugging */
3987bf215546Sopenharmony_ci        if (unlikely(dev->debug & PAN_DBG_DIRTY))
3988bf215546Sopenharmony_ci                panfrost_dirty_state_all(ctx);
3989bf215546Sopenharmony_ci
3990bf215546Sopenharmony_ci        /* Conservatively assume draw parameters always change */
3991bf215546Sopenharmony_ci        ctx->dirty |= PAN_DIRTY_PARAMS | PAN_DIRTY_DRAWID;
3992bf215546Sopenharmony_ci
3993bf215546Sopenharmony_ci        if (indirect) {
3994bf215546Sopenharmony_ci                assert(num_draws == 1);
3995bf215546Sopenharmony_ci                assert(PAN_GPU_INDIRECTS);
3996bf215546Sopenharmony_ci
3997bf215546Sopenharmony_ci#if PAN_GPU_INDIRECTS
3998bf215546Sopenharmony_ci                if (indirect->count_from_stream_output) {
3999bf215546Sopenharmony_ci                        struct pipe_draw_start_count_bias tmp_draw = *draws;
4000bf215546Sopenharmony_ci                        struct panfrost_streamout_target *so =
4001bf215546Sopenharmony_ci                                pan_so_target(indirect->count_from_stream_output);
4002bf215546Sopenharmony_ci
4003bf215546Sopenharmony_ci                        tmp_draw.start = 0;
4004bf215546Sopenharmony_ci                        tmp_draw.count = so->offset;
4005bf215546Sopenharmony_ci                        tmp_draw.index_bias = 0;
4006bf215546Sopenharmony_ci                        panfrost_direct_draw(batch, info, drawid_offset, &tmp_draw);
4007bf215546Sopenharmony_ci                        return;
4008bf215546Sopenharmony_ci                }
4009bf215546Sopenharmony_ci
4010bf215546Sopenharmony_ci                panfrost_indirect_draw(batch, info, drawid_offset, indirect, &draws[0]);
4011bf215546Sopenharmony_ci                return;
4012bf215546Sopenharmony_ci#endif
4013bf215546Sopenharmony_ci        }
4014bf215546Sopenharmony_ci
4015bf215546Sopenharmony_ci        struct pipe_draw_info tmp_info = *info;
4016bf215546Sopenharmony_ci        unsigned drawid = drawid_offset;
4017bf215546Sopenharmony_ci
4018bf215546Sopenharmony_ci        for (unsigned i = 0; i < num_draws; i++) {
4019bf215546Sopenharmony_ci                panfrost_direct_draw(batch, &tmp_info, drawid, &draws[i]);
4020bf215546Sopenharmony_ci
4021bf215546Sopenharmony_ci                if (tmp_info.increment_draw_id) {
4022bf215546Sopenharmony_ci                        ctx->dirty |= PAN_DIRTY_DRAWID;
4023bf215546Sopenharmony_ci                        drawid++;
4024bf215546Sopenharmony_ci                }
4025bf215546Sopenharmony_ci        }
4026bf215546Sopenharmony_ci
4027bf215546Sopenharmony_ci}
4028bf215546Sopenharmony_ci
4029bf215546Sopenharmony_ci/* Launch grid is the compute equivalent of draw_vbo, so in this routine, we
4030bf215546Sopenharmony_ci * construct the COMPUTE job and some of its payload.
4031bf215546Sopenharmony_ci */
4032bf215546Sopenharmony_ci
4033bf215546Sopenharmony_cistatic void
4034bf215546Sopenharmony_cipanfrost_launch_grid(struct pipe_context *pipe,
4035bf215546Sopenharmony_ci                const struct pipe_grid_info *info)
4036bf215546Sopenharmony_ci{
4037bf215546Sopenharmony_ci        struct panfrost_context *ctx = pan_context(pipe);
4038bf215546Sopenharmony_ci
4039bf215546Sopenharmony_ci        /* XXX - shouldn't be necessary with working memory barriers. Affected
4040bf215546Sopenharmony_ci         * test: KHR-GLES31.core.compute_shader.pipeline-post-xfb */
4041bf215546Sopenharmony_ci        panfrost_flush_all_batches(ctx, "Launch grid pre-barrier");
4042bf215546Sopenharmony_ci
4043bf215546Sopenharmony_ci        struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
4044bf215546Sopenharmony_ci
4045bf215546Sopenharmony_ci        struct panfrost_shader_state *cs =
4046bf215546Sopenharmony_ci                &ctx->shader[PIPE_SHADER_COMPUTE]->variants[0];
4047bf215546Sopenharmony_ci
4048bf215546Sopenharmony_ci        /* Indirect dispatch can't handle workgroup local storage since that
4049bf215546Sopenharmony_ci         * would require dynamic memory allocation. Bail in this case. */
4050bf215546Sopenharmony_ci        if (info->indirect && ((cs->info.wls_size != 0) || !PAN_GPU_INDIRECTS)) {
4051bf215546Sopenharmony_ci                struct pipe_transfer *transfer;
4052bf215546Sopenharmony_ci                uint32_t *params = pipe_buffer_map_range(pipe, info->indirect,
4053bf215546Sopenharmony_ci                                info->indirect_offset,
4054bf215546Sopenharmony_ci                                3 * sizeof(uint32_t),
4055bf215546Sopenharmony_ci                                PIPE_MAP_READ,
4056bf215546Sopenharmony_ci                                &transfer);
4057bf215546Sopenharmony_ci
4058bf215546Sopenharmony_ci                struct pipe_grid_info direct = *info;
4059bf215546Sopenharmony_ci                direct.indirect = NULL;
4060bf215546Sopenharmony_ci                direct.grid[0] = params[0];
4061bf215546Sopenharmony_ci                direct.grid[1] = params[1];
4062bf215546Sopenharmony_ci                direct.grid[2] = params[2];
4063bf215546Sopenharmony_ci                pipe_buffer_unmap(pipe, transfer);
4064bf215546Sopenharmony_ci
4065bf215546Sopenharmony_ci                if (params[0] && params[1] && params[2])
4066bf215546Sopenharmony_ci                        panfrost_launch_grid(pipe, &direct);
4067bf215546Sopenharmony_ci
4068bf215546Sopenharmony_ci                return;
4069bf215546Sopenharmony_ci        }
4070bf215546Sopenharmony_ci
4071bf215546Sopenharmony_ci        ctx->compute_grid = info;
4072bf215546Sopenharmony_ci
4073bf215546Sopenharmony_ci        struct panfrost_ptr t =
4074bf215546Sopenharmony_ci                pan_pool_alloc_desc(&batch->pool.base, COMPUTE_JOB);
4075bf215546Sopenharmony_ci
4076bf215546Sopenharmony_ci        /* We implement OpenCL inputs as uniforms (or a UBO -- same thing), so
4077bf215546Sopenharmony_ci         * reuse the graphics path for this by lowering to Gallium */
4078bf215546Sopenharmony_ci
4079bf215546Sopenharmony_ci        struct pipe_constant_buffer ubuf = {
4080bf215546Sopenharmony_ci                .buffer = NULL,
4081bf215546Sopenharmony_ci                .buffer_offset = 0,
4082bf215546Sopenharmony_ci                .buffer_size = ctx->shader[PIPE_SHADER_COMPUTE]->req_input_mem,
4083bf215546Sopenharmony_ci                .user_buffer = info->input
4084bf215546Sopenharmony_ci        };
4085bf215546Sopenharmony_ci
4086bf215546Sopenharmony_ci        if (info->input)
4087bf215546Sopenharmony_ci                pipe->set_constant_buffer(pipe, PIPE_SHADER_COMPUTE, 0, false, &ubuf);
4088bf215546Sopenharmony_ci
4089bf215546Sopenharmony_ci        /* Invoke according to the grid info */
4090bf215546Sopenharmony_ci
4091bf215546Sopenharmony_ci        unsigned num_wg[3] = { info->grid[0], info->grid[1], info->grid[2] };
4092bf215546Sopenharmony_ci
4093bf215546Sopenharmony_ci        if (info->indirect)
4094bf215546Sopenharmony_ci                num_wg[0] = num_wg[1] = num_wg[2] = 1;
4095bf215546Sopenharmony_ci
4096bf215546Sopenharmony_ci        panfrost_update_shader_state(batch, PIPE_SHADER_COMPUTE);
4097bf215546Sopenharmony_ci
4098bf215546Sopenharmony_ci#if PAN_ARCH <= 7
4099bf215546Sopenharmony_ci        panfrost_pack_work_groups_compute(pan_section_ptr(t.cpu, COMPUTE_JOB, INVOCATION),
4100bf215546Sopenharmony_ci                                          num_wg[0], num_wg[1], num_wg[2],
4101bf215546Sopenharmony_ci                                          info->block[0], info->block[1],
4102bf215546Sopenharmony_ci                                          info->block[2],
4103bf215546Sopenharmony_ci                                          false, info->indirect != NULL);
4104bf215546Sopenharmony_ci
4105bf215546Sopenharmony_ci        pan_section_pack(t.cpu, COMPUTE_JOB, PARAMETERS, cfg) {
4106bf215546Sopenharmony_ci                cfg.job_task_split =
4107bf215546Sopenharmony_ci                        util_logbase2_ceil(info->block[0] + 1) +
4108bf215546Sopenharmony_ci                        util_logbase2_ceil(info->block[1] + 1) +
4109bf215546Sopenharmony_ci                        util_logbase2_ceil(info->block[2] + 1);
4110bf215546Sopenharmony_ci        }
4111bf215546Sopenharmony_ci
4112bf215546Sopenharmony_ci        pan_section_pack(t.cpu, COMPUTE_JOB, DRAW, cfg) {
4113bf215546Sopenharmony_ci                cfg.state = batch->rsd[PIPE_SHADER_COMPUTE];
4114bf215546Sopenharmony_ci                cfg.attributes = panfrost_emit_image_attribs(batch, &cfg.attribute_buffers, PIPE_SHADER_COMPUTE);
4115bf215546Sopenharmony_ci                cfg.thread_storage = panfrost_emit_shared_memory(batch, info);
4116bf215546Sopenharmony_ci                cfg.uniform_buffers = batch->uniform_buffers[PIPE_SHADER_COMPUTE];
4117bf215546Sopenharmony_ci                cfg.push_uniforms = batch->push_uniforms[PIPE_SHADER_COMPUTE];
4118bf215546Sopenharmony_ci                cfg.textures = batch->textures[PIPE_SHADER_COMPUTE];
4119bf215546Sopenharmony_ci                cfg.samplers = batch->samplers[PIPE_SHADER_COMPUTE];
4120bf215546Sopenharmony_ci        }
4121bf215546Sopenharmony_ci#else
4122bf215546Sopenharmony_ci        pan_section_pack(t.cpu, COMPUTE_JOB, PAYLOAD, cfg) {
4123bf215546Sopenharmony_ci                cfg.workgroup_size_x = info->block[0];
4124bf215546Sopenharmony_ci                cfg.workgroup_size_y = info->block[1];
4125bf215546Sopenharmony_ci                cfg.workgroup_size_z = info->block[2];
4126bf215546Sopenharmony_ci
4127bf215546Sopenharmony_ci                cfg.workgroup_count_x = num_wg[0];
4128bf215546Sopenharmony_ci                cfg.workgroup_count_y = num_wg[1];
4129bf215546Sopenharmony_ci                cfg.workgroup_count_z = num_wg[2];
4130bf215546Sopenharmony_ci
4131bf215546Sopenharmony_ci                panfrost_emit_shader(batch, &cfg.compute, PIPE_SHADER_COMPUTE,
4132bf215546Sopenharmony_ci                                     batch->rsd[PIPE_SHADER_COMPUTE],
4133bf215546Sopenharmony_ci                                     panfrost_emit_shared_memory(batch, info));
4134bf215546Sopenharmony_ci
4135bf215546Sopenharmony_ci                cfg.allow_merging_workgroups = cs->info.cs.allow_merging_workgroups;
4136bf215546Sopenharmony_ci                cfg.task_increment = 1;
4137bf215546Sopenharmony_ci                cfg.task_axis = MALI_TASK_AXIS_Z;
4138bf215546Sopenharmony_ci        }
4139bf215546Sopenharmony_ci#endif
4140bf215546Sopenharmony_ci
4141bf215546Sopenharmony_ci        unsigned indirect_dep = 0;
4142bf215546Sopenharmony_ci#if PAN_GPU_INDIRECTS
4143bf215546Sopenharmony_ci        if (info->indirect) {
4144bf215546Sopenharmony_ci                struct pan_indirect_dispatch_info indirect = {
4145bf215546Sopenharmony_ci                        .job = t.gpu,
4146bf215546Sopenharmony_ci                        .indirect_dim = pan_resource(info->indirect)->image.data.bo->ptr.gpu +
4147bf215546Sopenharmony_ci                                        info->indirect_offset,
4148bf215546Sopenharmony_ci                        .num_wg_sysval = {
4149bf215546Sopenharmony_ci                                batch->num_wg_sysval[0],
4150bf215546Sopenharmony_ci                                batch->num_wg_sysval[1],
4151bf215546Sopenharmony_ci                                batch->num_wg_sysval[2],
4152bf215546Sopenharmony_ci                        },
4153bf215546Sopenharmony_ci                };
4154bf215546Sopenharmony_ci
4155bf215546Sopenharmony_ci                indirect_dep = GENX(pan_indirect_dispatch_emit)(&batch->pool.base,
4156bf215546Sopenharmony_ci                                                                &batch->scoreboard,
4157bf215546Sopenharmony_ci                                                                &indirect);
4158bf215546Sopenharmony_ci        }
4159bf215546Sopenharmony_ci#endif
4160bf215546Sopenharmony_ci
4161bf215546Sopenharmony_ci        panfrost_add_job(&batch->pool.base, &batch->scoreboard,
4162bf215546Sopenharmony_ci                         MALI_JOB_TYPE_COMPUTE, true, false,
4163bf215546Sopenharmony_ci                         indirect_dep, 0, &t, false);
4164bf215546Sopenharmony_ci        panfrost_flush_all_batches(ctx, "Launch grid post-barrier");
4165bf215546Sopenharmony_ci}
4166bf215546Sopenharmony_ci
4167bf215546Sopenharmony_cistatic void *
4168bf215546Sopenharmony_cipanfrost_create_rasterizer_state(
4169bf215546Sopenharmony_ci        struct pipe_context *pctx,
4170bf215546Sopenharmony_ci        const struct pipe_rasterizer_state *cso)
4171bf215546Sopenharmony_ci{
4172bf215546Sopenharmony_ci        struct panfrost_rasterizer *so = CALLOC_STRUCT(panfrost_rasterizer);
4173bf215546Sopenharmony_ci
4174bf215546Sopenharmony_ci        so->base = *cso;
4175bf215546Sopenharmony_ci
4176bf215546Sopenharmony_ci        /* Gauranteed with the core GL call, so don't expose ARB_polygon_offset */
4177bf215546Sopenharmony_ci        assert(cso->offset_clamp == 0.0);
4178bf215546Sopenharmony_ci
4179bf215546Sopenharmony_ci#if PAN_ARCH <= 7
4180bf215546Sopenharmony_ci        pan_pack(&so->multisample, MULTISAMPLE_MISC, cfg) {
4181bf215546Sopenharmony_ci                cfg.multisample_enable = cso->multisample;
4182bf215546Sopenharmony_ci                cfg.fixed_function_near_discard = cso->depth_clip_near;
4183bf215546Sopenharmony_ci                cfg.fixed_function_far_discard = cso->depth_clip_far;
4184bf215546Sopenharmony_ci                cfg.shader_depth_range_fixed = true;
4185bf215546Sopenharmony_ci        }
4186bf215546Sopenharmony_ci
4187bf215546Sopenharmony_ci        pan_pack(&so->stencil_misc, STENCIL_MASK_MISC, cfg) {
4188bf215546Sopenharmony_ci                cfg.front_facing_depth_bias = cso->offset_tri;
4189bf215546Sopenharmony_ci                cfg.back_facing_depth_bias = cso->offset_tri;
4190bf215546Sopenharmony_ci                cfg.single_sampled_lines = !cso->multisample;
4191bf215546Sopenharmony_ci        }
4192bf215546Sopenharmony_ci#endif
4193bf215546Sopenharmony_ci
4194bf215546Sopenharmony_ci        return so;
4195bf215546Sopenharmony_ci}
4196bf215546Sopenharmony_ci
4197bf215546Sopenharmony_ci#if PAN_ARCH >= 9
4198bf215546Sopenharmony_ci/*
4199bf215546Sopenharmony_ci * Given a pipe_vertex_element, pack the corresponding Valhall attribute
4200bf215546Sopenharmony_ci * descriptor. This function is called at CSO create time. Since
4201bf215546Sopenharmony_ci * pipe_vertex_element lacks a stride, the packed attribute descriptor will not
4202bf215546Sopenharmony_ci * be uploaded until draw time.
4203bf215546Sopenharmony_ci */
4204bf215546Sopenharmony_cistatic void
4205bf215546Sopenharmony_cipanfrost_pack_attribute(struct panfrost_device *dev,
4206bf215546Sopenharmony_ci                        const struct pipe_vertex_element el,
4207bf215546Sopenharmony_ci                        struct mali_attribute_packed *out)
4208bf215546Sopenharmony_ci{
4209bf215546Sopenharmony_ci        pan_pack(out, ATTRIBUTE, cfg) {
4210bf215546Sopenharmony_ci                cfg.table = PAN_TABLE_ATTRIBUTE_BUFFER;
4211bf215546Sopenharmony_ci                cfg.frequency = (el.instance_divisor > 0) ?
4212bf215546Sopenharmony_ci                        MALI_ATTRIBUTE_FREQUENCY_INSTANCE :
4213bf215546Sopenharmony_ci                        MALI_ATTRIBUTE_FREQUENCY_VERTEX;
4214bf215546Sopenharmony_ci                cfg.format = dev->formats[el.src_format].hw;
4215bf215546Sopenharmony_ci                cfg.offset = el.src_offset;
4216bf215546Sopenharmony_ci                cfg.buffer_index = el.vertex_buffer_index;
4217bf215546Sopenharmony_ci
4218bf215546Sopenharmony_ci                if (el.instance_divisor == 0) {
4219bf215546Sopenharmony_ci                        /* Per-vertex */
4220bf215546Sopenharmony_ci                        cfg.attribute_type = MALI_ATTRIBUTE_TYPE_1D;
4221bf215546Sopenharmony_ci                        cfg.frequency = MALI_ATTRIBUTE_FREQUENCY_VERTEX;
4222bf215546Sopenharmony_ci                        cfg.offset_enable = true;
4223bf215546Sopenharmony_ci                } else if (util_is_power_of_two_or_zero(el.instance_divisor)) {
4224bf215546Sopenharmony_ci                        /* Per-instance, POT divisor */
4225bf215546Sopenharmony_ci                        cfg.attribute_type = MALI_ATTRIBUTE_TYPE_1D_POT_DIVISOR;
4226bf215546Sopenharmony_ci                        cfg.frequency = MALI_ATTRIBUTE_FREQUENCY_INSTANCE;
4227bf215546Sopenharmony_ci                        cfg.divisor_r = __builtin_ctz(el.instance_divisor);
4228bf215546Sopenharmony_ci                } else {
4229bf215546Sopenharmony_ci                        /* Per-instance, NPOT divisor */
4230bf215546Sopenharmony_ci                        cfg.attribute_type = MALI_ATTRIBUTE_TYPE_1D_NPOT_DIVISOR;
4231bf215546Sopenharmony_ci                        cfg.frequency = MALI_ATTRIBUTE_FREQUENCY_INSTANCE;
4232bf215546Sopenharmony_ci
4233bf215546Sopenharmony_ci                        cfg.divisor_d =
4234bf215546Sopenharmony_ci                                panfrost_compute_magic_divisor(el.instance_divisor,
4235bf215546Sopenharmony_ci                                                &cfg.divisor_r, &cfg.divisor_e);
4236bf215546Sopenharmony_ci                }
4237bf215546Sopenharmony_ci        }
4238bf215546Sopenharmony_ci}
4239bf215546Sopenharmony_ci#endif
4240bf215546Sopenharmony_ci
4241bf215546Sopenharmony_cistatic void *
4242bf215546Sopenharmony_cipanfrost_create_vertex_elements_state(
4243bf215546Sopenharmony_ci        struct pipe_context *pctx,
4244bf215546Sopenharmony_ci        unsigned num_elements,
4245bf215546Sopenharmony_ci        const struct pipe_vertex_element *elements)
4246bf215546Sopenharmony_ci{
4247bf215546Sopenharmony_ci        struct panfrost_vertex_state *so = CALLOC_STRUCT(panfrost_vertex_state);
4248bf215546Sopenharmony_ci        struct panfrost_device *dev = pan_device(pctx->screen);
4249bf215546Sopenharmony_ci
4250bf215546Sopenharmony_ci        so->num_elements = num_elements;
4251bf215546Sopenharmony_ci        memcpy(so->pipe, elements, sizeof(*elements) * num_elements);
4252bf215546Sopenharmony_ci
4253bf215546Sopenharmony_ci#if PAN_ARCH >= 9
4254bf215546Sopenharmony_ci        for (unsigned i = 0; i < num_elements; ++i)
4255bf215546Sopenharmony_ci                panfrost_pack_attribute(dev, elements[i], &so->attributes[i]);
4256bf215546Sopenharmony_ci#else
4257bf215546Sopenharmony_ci        /* Assign attribute buffers corresponding to the vertex buffers, keyed
4258bf215546Sopenharmony_ci         * for a particular divisor since that's how instancing works on Mali */
4259bf215546Sopenharmony_ci        for (unsigned i = 0; i < num_elements; ++i) {
4260bf215546Sopenharmony_ci                so->element_buffer[i] = pan_assign_vertex_buffer(
4261bf215546Sopenharmony_ci                                so->buffers, &so->nr_bufs,
4262bf215546Sopenharmony_ci                                elements[i].vertex_buffer_index,
4263bf215546Sopenharmony_ci                                elements[i].instance_divisor);
4264bf215546Sopenharmony_ci        }
4265bf215546Sopenharmony_ci
4266bf215546Sopenharmony_ci        for (int i = 0; i < num_elements; ++i) {
4267bf215546Sopenharmony_ci                enum pipe_format fmt = elements[i].src_format;
4268bf215546Sopenharmony_ci                so->formats[i] = dev->formats[fmt].hw;
4269bf215546Sopenharmony_ci        }
4270bf215546Sopenharmony_ci
4271bf215546Sopenharmony_ci        /* Let's also prepare vertex builtins */
4272bf215546Sopenharmony_ci        so->formats[PAN_VERTEX_ID] = dev->formats[PIPE_FORMAT_R32_UINT].hw;
4273bf215546Sopenharmony_ci        so->formats[PAN_INSTANCE_ID] = dev->formats[PIPE_FORMAT_R32_UINT].hw;
4274bf215546Sopenharmony_ci#endif
4275bf215546Sopenharmony_ci
4276bf215546Sopenharmony_ci        return so;
4277bf215546Sopenharmony_ci}
4278bf215546Sopenharmony_ci
4279bf215546Sopenharmony_cistatic inline unsigned
4280bf215546Sopenharmony_cipan_pipe_to_stencil_op(enum pipe_stencil_op in)
4281bf215546Sopenharmony_ci{
4282bf215546Sopenharmony_ci        switch (in) {
4283bf215546Sopenharmony_ci        case PIPE_STENCIL_OP_KEEP: return MALI_STENCIL_OP_KEEP;
4284bf215546Sopenharmony_ci        case PIPE_STENCIL_OP_ZERO: return MALI_STENCIL_OP_ZERO;
4285bf215546Sopenharmony_ci        case PIPE_STENCIL_OP_REPLACE: return MALI_STENCIL_OP_REPLACE;
4286bf215546Sopenharmony_ci        case PIPE_STENCIL_OP_INCR: return MALI_STENCIL_OP_INCR_SAT;
4287bf215546Sopenharmony_ci        case PIPE_STENCIL_OP_DECR: return MALI_STENCIL_OP_DECR_SAT;
4288bf215546Sopenharmony_ci        case PIPE_STENCIL_OP_INCR_WRAP: return MALI_STENCIL_OP_INCR_WRAP;
4289bf215546Sopenharmony_ci        case PIPE_STENCIL_OP_DECR_WRAP: return MALI_STENCIL_OP_DECR_WRAP;
4290bf215546Sopenharmony_ci        case PIPE_STENCIL_OP_INVERT: return MALI_STENCIL_OP_INVERT;
4291bf215546Sopenharmony_ci        default: unreachable("Invalid stencil op");
4292bf215546Sopenharmony_ci        }
4293bf215546Sopenharmony_ci}
4294bf215546Sopenharmony_ci
4295bf215546Sopenharmony_ci#if PAN_ARCH <= 7
4296bf215546Sopenharmony_cistatic inline void
4297bf215546Sopenharmony_cipan_pipe_to_stencil(const struct pipe_stencil_state *in,
4298bf215546Sopenharmony_ci                    struct mali_stencil_packed *out)
4299bf215546Sopenharmony_ci{
4300bf215546Sopenharmony_ci        pan_pack(out, STENCIL, s) {
4301bf215546Sopenharmony_ci                s.mask = in->valuemask;
4302bf215546Sopenharmony_ci                s.compare_function = (enum mali_func) in->func;
4303bf215546Sopenharmony_ci                s.stencil_fail = pan_pipe_to_stencil_op(in->fail_op);
4304bf215546Sopenharmony_ci                s.depth_fail = pan_pipe_to_stencil_op(in->zfail_op);
4305bf215546Sopenharmony_ci                s.depth_pass = pan_pipe_to_stencil_op(in->zpass_op);
4306bf215546Sopenharmony_ci        }
4307bf215546Sopenharmony_ci}
4308bf215546Sopenharmony_ci#endif
4309bf215546Sopenharmony_ci
4310bf215546Sopenharmony_cistatic bool
4311bf215546Sopenharmony_cipipe_zs_always_passes(const struct pipe_depth_stencil_alpha_state *zsa)
4312bf215546Sopenharmony_ci{
4313bf215546Sopenharmony_ci        if (zsa->depth_enabled && zsa->depth_func != PIPE_FUNC_ALWAYS)
4314bf215546Sopenharmony_ci                return false;
4315bf215546Sopenharmony_ci
4316bf215546Sopenharmony_ci        if (zsa->stencil[0].enabled && zsa->stencil[0].func != PIPE_FUNC_ALWAYS)
4317bf215546Sopenharmony_ci                return false;
4318bf215546Sopenharmony_ci
4319bf215546Sopenharmony_ci        if (zsa->stencil[1].enabled && zsa->stencil[1].func != PIPE_FUNC_ALWAYS)
4320bf215546Sopenharmony_ci                return false;
4321bf215546Sopenharmony_ci
4322bf215546Sopenharmony_ci        return true;
4323bf215546Sopenharmony_ci}
4324bf215546Sopenharmony_ci
4325bf215546Sopenharmony_cistatic void *
4326bf215546Sopenharmony_cipanfrost_create_depth_stencil_state(struct pipe_context *pipe,
4327bf215546Sopenharmony_ci                                    const struct pipe_depth_stencil_alpha_state *zsa)
4328bf215546Sopenharmony_ci{
4329bf215546Sopenharmony_ci        struct panfrost_zsa_state *so = CALLOC_STRUCT(panfrost_zsa_state);
4330bf215546Sopenharmony_ci        so->base = *zsa;
4331bf215546Sopenharmony_ci
4332bf215546Sopenharmony_ci        const struct pipe_stencil_state front = zsa->stencil[0];
4333bf215546Sopenharmony_ci        const struct pipe_stencil_state back =
4334bf215546Sopenharmony_ci                zsa->stencil[1].enabled ? zsa->stencil[1] : front;
4335bf215546Sopenharmony_ci
4336bf215546Sopenharmony_ci        enum mali_func depth_func = zsa->depth_enabled ?
4337bf215546Sopenharmony_ci                (enum mali_func) zsa->depth_func : MALI_FUNC_ALWAYS;
4338bf215546Sopenharmony_ci
4339bf215546Sopenharmony_ci        /* Normalize (there's no separate enable) */
4340bf215546Sopenharmony_ci        if (PAN_ARCH <= 5 && !zsa->alpha_enabled)
4341bf215546Sopenharmony_ci                so->base.alpha_func = MALI_FUNC_ALWAYS;
4342bf215546Sopenharmony_ci
4343bf215546Sopenharmony_ci#if PAN_ARCH <= 7
4344bf215546Sopenharmony_ci        /* Prepack relevant parts of the Renderer State Descriptor. They will
4345bf215546Sopenharmony_ci         * be ORed in at draw-time */
4346bf215546Sopenharmony_ci        pan_pack(&so->rsd_depth, MULTISAMPLE_MISC, cfg) {
4347bf215546Sopenharmony_ci                cfg.depth_function = depth_func;
4348bf215546Sopenharmony_ci                cfg.depth_write_mask = zsa->depth_writemask;
4349bf215546Sopenharmony_ci        }
4350bf215546Sopenharmony_ci
4351bf215546Sopenharmony_ci        pan_pack(&so->rsd_stencil, STENCIL_MASK_MISC, cfg) {
4352bf215546Sopenharmony_ci                cfg.stencil_enable = front.enabled;
4353bf215546Sopenharmony_ci                cfg.stencil_mask_front = front.writemask;
4354bf215546Sopenharmony_ci                cfg.stencil_mask_back = back.writemask;
4355bf215546Sopenharmony_ci
4356bf215546Sopenharmony_ci#if PAN_ARCH <= 5
4357bf215546Sopenharmony_ci                cfg.alpha_test_compare_function =
4358bf215546Sopenharmony_ci                        (enum mali_func) so->base.alpha_func;
4359bf215546Sopenharmony_ci#endif
4360bf215546Sopenharmony_ci        }
4361bf215546Sopenharmony_ci
4362bf215546Sopenharmony_ci        /* Stencil tests have their own words in the RSD */
4363bf215546Sopenharmony_ci        pan_pipe_to_stencil(&front, &so->stencil_front);
4364bf215546Sopenharmony_ci        pan_pipe_to_stencil(&back, &so->stencil_back);
4365bf215546Sopenharmony_ci#else
4366bf215546Sopenharmony_ci        pan_pack(&so->desc, DEPTH_STENCIL, cfg) {
4367bf215546Sopenharmony_ci                cfg.front_compare_function = (enum mali_func) front.func;
4368bf215546Sopenharmony_ci                cfg.front_stencil_fail = pan_pipe_to_stencil_op(front.fail_op);
4369bf215546Sopenharmony_ci                cfg.front_depth_fail = pan_pipe_to_stencil_op(front.zfail_op);
4370bf215546Sopenharmony_ci                cfg.front_depth_pass = pan_pipe_to_stencil_op(front.zpass_op);
4371bf215546Sopenharmony_ci
4372bf215546Sopenharmony_ci                cfg.back_compare_function = (enum mali_func) back.func;
4373bf215546Sopenharmony_ci                cfg.back_stencil_fail = pan_pipe_to_stencil_op(back.fail_op);
4374bf215546Sopenharmony_ci                cfg.back_depth_fail = pan_pipe_to_stencil_op(back.zfail_op);
4375bf215546Sopenharmony_ci                cfg.back_depth_pass = pan_pipe_to_stencil_op(back.zpass_op);
4376bf215546Sopenharmony_ci
4377bf215546Sopenharmony_ci                cfg.stencil_test_enable = front.enabled;
4378bf215546Sopenharmony_ci                cfg.front_write_mask = front.writemask;
4379bf215546Sopenharmony_ci                cfg.back_write_mask = back.writemask;
4380bf215546Sopenharmony_ci                cfg.front_value_mask = front.valuemask;
4381bf215546Sopenharmony_ci                cfg.back_value_mask = back.valuemask;
4382bf215546Sopenharmony_ci
4383bf215546Sopenharmony_ci                cfg.depth_write_enable = zsa->depth_writemask;
4384bf215546Sopenharmony_ci                cfg.depth_function = depth_func;
4385bf215546Sopenharmony_ci        }
4386bf215546Sopenharmony_ci#endif
4387bf215546Sopenharmony_ci
4388bf215546Sopenharmony_ci        so->enabled = zsa->stencil[0].enabled ||
4389bf215546Sopenharmony_ci                (zsa->depth_enabled && zsa->depth_func != PIPE_FUNC_ALWAYS);
4390bf215546Sopenharmony_ci
4391bf215546Sopenharmony_ci        so->zs_always_passes = pipe_zs_always_passes(zsa);
4392bf215546Sopenharmony_ci        so->writes_zs = util_writes_depth_stencil(zsa);
4393bf215546Sopenharmony_ci
4394bf215546Sopenharmony_ci        /* TODO: Bounds test should be easy */
4395bf215546Sopenharmony_ci        assert(!zsa->depth_bounds_test);
4396bf215546Sopenharmony_ci
4397bf215546Sopenharmony_ci        return so;
4398bf215546Sopenharmony_ci}
4399bf215546Sopenharmony_ci
4400bf215546Sopenharmony_cistatic struct pipe_sampler_view *
4401bf215546Sopenharmony_cipanfrost_create_sampler_view(
4402bf215546Sopenharmony_ci        struct pipe_context *pctx,
4403bf215546Sopenharmony_ci        struct pipe_resource *texture,
4404bf215546Sopenharmony_ci        const struct pipe_sampler_view *template)
4405bf215546Sopenharmony_ci{
4406bf215546Sopenharmony_ci        struct panfrost_context *ctx = pan_context(pctx);
4407bf215546Sopenharmony_ci        struct panfrost_sampler_view *so = rzalloc(pctx, struct panfrost_sampler_view);
4408bf215546Sopenharmony_ci
4409bf215546Sopenharmony_ci        pan_legalize_afbc_format(ctx, pan_resource(texture), template->format);
4410bf215546Sopenharmony_ci
4411bf215546Sopenharmony_ci        pipe_reference(NULL, &texture->reference);
4412bf215546Sopenharmony_ci
4413bf215546Sopenharmony_ci        so->base = *template;
4414bf215546Sopenharmony_ci        so->base.texture = texture;
4415bf215546Sopenharmony_ci        so->base.reference.count = 1;
4416bf215546Sopenharmony_ci        so->base.context = pctx;
4417bf215546Sopenharmony_ci
4418bf215546Sopenharmony_ci        panfrost_create_sampler_view_bo(so, pctx, texture);
4419bf215546Sopenharmony_ci
4420bf215546Sopenharmony_ci        return (struct pipe_sampler_view *) so;
4421bf215546Sopenharmony_ci}
4422bf215546Sopenharmony_ci
4423bf215546Sopenharmony_ci/* A given Gallium blend state can be encoded to the hardware in numerous,
4424bf215546Sopenharmony_ci * dramatically divergent ways due to the interactions of blending with
4425bf215546Sopenharmony_ci * framebuffer formats. Conceptually, there are two modes:
4426bf215546Sopenharmony_ci *
4427bf215546Sopenharmony_ci * - Fixed-function blending (for suitable framebuffer formats, suitable blend
4428bf215546Sopenharmony_ci *   state, and suitable blend constant)
4429bf215546Sopenharmony_ci *
4430bf215546Sopenharmony_ci * - Blend shaders (for everything else)
4431bf215546Sopenharmony_ci *
4432bf215546Sopenharmony_ci * A given Gallium blend configuration will compile to exactly one
4433bf215546Sopenharmony_ci * fixed-function blend state, if it compiles to any, although the constant
4434bf215546Sopenharmony_ci * will vary across runs as that is tracked outside of the Gallium CSO.
4435bf215546Sopenharmony_ci *
4436bf215546Sopenharmony_ci * However, that same blend configuration will compile to many different blend
4437bf215546Sopenharmony_ci * shaders, depending on the framebuffer formats active. The rationale is that
4438bf215546Sopenharmony_ci * blend shaders override not just fixed-function blending but also
4439bf215546Sopenharmony_ci * fixed-function format conversion, so blend shaders are keyed to a particular
4440bf215546Sopenharmony_ci * framebuffer format. As an example, the tilebuffer format is identical for
4441bf215546Sopenharmony_ci * RG16F and RG16UI -- both are simply 32-bit raw pixels -- so both require
4442bf215546Sopenharmony_ci * blend shaders.
4443bf215546Sopenharmony_ci *
4444bf215546Sopenharmony_ci * All of this state is encapsulated in the panfrost_blend_state struct
4445bf215546Sopenharmony_ci * (our subclass of pipe_blend_state).
4446bf215546Sopenharmony_ci */
4447bf215546Sopenharmony_ci
4448bf215546Sopenharmony_ci/* Create a blend CSO. Essentially, try to compile a fixed-function
4449bf215546Sopenharmony_ci * expression and initialize blend shaders */
4450bf215546Sopenharmony_ci
4451bf215546Sopenharmony_cistatic void *
4452bf215546Sopenharmony_cipanfrost_create_blend_state(struct pipe_context *pipe,
4453bf215546Sopenharmony_ci                            const struct pipe_blend_state *blend)
4454bf215546Sopenharmony_ci{
4455bf215546Sopenharmony_ci        struct panfrost_blend_state *so = CALLOC_STRUCT(panfrost_blend_state);
4456bf215546Sopenharmony_ci        so->base = *blend;
4457bf215546Sopenharmony_ci
4458bf215546Sopenharmony_ci        so->pan.logicop_enable = blend->logicop_enable;
4459bf215546Sopenharmony_ci        so->pan.logicop_func = blend->logicop_func;
4460bf215546Sopenharmony_ci        so->pan.rt_count = blend->max_rt + 1;
4461bf215546Sopenharmony_ci
4462bf215546Sopenharmony_ci        for (unsigned c = 0; c < so->pan.rt_count; ++c) {
4463bf215546Sopenharmony_ci                unsigned g = blend->independent_blend_enable ? c : 0;
4464bf215546Sopenharmony_ci                const struct pipe_rt_blend_state pipe = blend->rt[g];
4465bf215546Sopenharmony_ci                struct pan_blend_equation equation = {0};
4466bf215546Sopenharmony_ci
4467bf215546Sopenharmony_ci                equation.color_mask = pipe.colormask;
4468bf215546Sopenharmony_ci                equation.blend_enable = pipe.blend_enable;
4469bf215546Sopenharmony_ci
4470bf215546Sopenharmony_ci                if (pipe.blend_enable) {
4471bf215546Sopenharmony_ci                        equation.rgb_func = util_blend_func_to_shader(pipe.rgb_func);
4472bf215546Sopenharmony_ci                        equation.rgb_src_factor = util_blend_factor_to_shader(pipe.rgb_src_factor);
4473bf215546Sopenharmony_ci                        equation.rgb_invert_src_factor = util_blend_factor_is_inverted(pipe.rgb_src_factor);
4474bf215546Sopenharmony_ci                        equation.rgb_dst_factor = util_blend_factor_to_shader(pipe.rgb_dst_factor);
4475bf215546Sopenharmony_ci                        equation.rgb_invert_dst_factor = util_blend_factor_is_inverted(pipe.rgb_dst_factor);
4476bf215546Sopenharmony_ci                        equation.alpha_func = util_blend_func_to_shader(pipe.alpha_func);
4477bf215546Sopenharmony_ci                        equation.alpha_src_factor = util_blend_factor_to_shader(pipe.alpha_src_factor);
4478bf215546Sopenharmony_ci                        equation.alpha_invert_src_factor = util_blend_factor_is_inverted(pipe.alpha_src_factor);
4479bf215546Sopenharmony_ci                        equation.alpha_dst_factor = util_blend_factor_to_shader(pipe.alpha_dst_factor);
4480bf215546Sopenharmony_ci                        equation.alpha_invert_dst_factor = util_blend_factor_is_inverted(pipe.alpha_dst_factor);
4481bf215546Sopenharmony_ci                }
4482bf215546Sopenharmony_ci
4483bf215546Sopenharmony_ci                /* Determine some common properties */
4484bf215546Sopenharmony_ci                unsigned constant_mask = pan_blend_constant_mask(equation);
4485bf215546Sopenharmony_ci                const bool supports_2src = pan_blend_supports_2src(PAN_ARCH);
4486bf215546Sopenharmony_ci                so->info[c] = (struct pan_blend_info) {
4487bf215546Sopenharmony_ci                        .no_colour = (equation.color_mask == 0),
4488bf215546Sopenharmony_ci                        .opaque = pan_blend_is_opaque(equation),
4489bf215546Sopenharmony_ci                        .constant_mask = constant_mask,
4490bf215546Sopenharmony_ci
4491bf215546Sopenharmony_ci                        /* TODO: check the dest for the logicop */
4492bf215546Sopenharmony_ci                        .load_dest = blend->logicop_enable ||
4493bf215546Sopenharmony_ci                                pan_blend_reads_dest(equation),
4494bf215546Sopenharmony_ci
4495bf215546Sopenharmony_ci                        /* Could this possibly be fixed-function? */
4496bf215546Sopenharmony_ci                        .fixed_function = !blend->logicop_enable &&
4497bf215546Sopenharmony_ci                                pan_blend_can_fixed_function(equation,
4498bf215546Sopenharmony_ci                                                             supports_2src) &&
4499bf215546Sopenharmony_ci                                (!constant_mask ||
4500bf215546Sopenharmony_ci                                 pan_blend_supports_constant(PAN_ARCH, c)),
4501bf215546Sopenharmony_ci
4502bf215546Sopenharmony_ci                        .alpha_zero_nop = pan_blend_alpha_zero_nop(equation),
4503bf215546Sopenharmony_ci                        .alpha_one_store = pan_blend_alpha_one_store(equation),
4504bf215546Sopenharmony_ci                };
4505bf215546Sopenharmony_ci
4506bf215546Sopenharmony_ci                so->pan.rts[c].equation = equation;
4507bf215546Sopenharmony_ci
4508bf215546Sopenharmony_ci                /* Bifrost needs to know if any render target loads its
4509bf215546Sopenharmony_ci                 * destination in the hot draw path, so precompute this */
4510bf215546Sopenharmony_ci                if (so->info[c].load_dest)
4511bf215546Sopenharmony_ci                        so->load_dest_mask |= BITFIELD_BIT(c);
4512bf215546Sopenharmony_ci
4513bf215546Sopenharmony_ci                /* Converting equations to Mali style is expensive, do it at
4514bf215546Sopenharmony_ci                 * CSO create time instead of draw-time */
4515bf215546Sopenharmony_ci                if (so->info[c].fixed_function) {
4516bf215546Sopenharmony_ci                        so->equation[c] = pan_pack_blend(equation);
4517bf215546Sopenharmony_ci                }
4518bf215546Sopenharmony_ci        }
4519bf215546Sopenharmony_ci
4520bf215546Sopenharmony_ci        return so;
4521bf215546Sopenharmony_ci}
4522bf215546Sopenharmony_ci
4523bf215546Sopenharmony_cistatic void
4524bf215546Sopenharmony_ciprepare_shader(struct panfrost_shader_state *state,
4525bf215546Sopenharmony_ci            struct panfrost_pool *pool, bool upload)
4526bf215546Sopenharmony_ci{
4527bf215546Sopenharmony_ci#if PAN_ARCH <= 7
4528bf215546Sopenharmony_ci        void *out = &state->partial_rsd;
4529bf215546Sopenharmony_ci
4530bf215546Sopenharmony_ci        if (upload) {
4531bf215546Sopenharmony_ci                struct panfrost_ptr ptr =
4532bf215546Sopenharmony_ci                        pan_pool_alloc_desc(&pool->base, RENDERER_STATE);
4533bf215546Sopenharmony_ci
4534bf215546Sopenharmony_ci                state->state = panfrost_pool_take_ref(pool, ptr.gpu);
4535bf215546Sopenharmony_ci                out = ptr.cpu;
4536bf215546Sopenharmony_ci        }
4537bf215546Sopenharmony_ci
4538bf215546Sopenharmony_ci        pan_pack(out, RENDERER_STATE, cfg) {
4539bf215546Sopenharmony_ci                pan_shader_prepare_rsd(&state->info, state->bin.gpu, &cfg);
4540bf215546Sopenharmony_ci
4541bf215546Sopenharmony_ci       }
4542bf215546Sopenharmony_ci#else
4543bf215546Sopenharmony_ci        assert(upload);
4544bf215546Sopenharmony_ci
4545bf215546Sopenharmony_ci        /* The address in the shader program descriptor must be non-null, but
4546bf215546Sopenharmony_ci         * the entire shader program descriptor may be omitted.
4547bf215546Sopenharmony_ci         *
4548bf215546Sopenharmony_ci         * See dEQP-GLES31.functional.compute.basic.empty
4549bf215546Sopenharmony_ci         */
4550bf215546Sopenharmony_ci        if (!state->bin.gpu)
4551bf215546Sopenharmony_ci                return;
4552bf215546Sopenharmony_ci
4553bf215546Sopenharmony_ci        bool vs = (state->info.stage == MESA_SHADER_VERTEX);
4554bf215546Sopenharmony_ci        bool secondary_enable = (vs && state->info.vs.secondary_enable);
4555bf215546Sopenharmony_ci
4556bf215546Sopenharmony_ci        unsigned nr_variants = secondary_enable ? 3 : vs ? 2 : 1;
4557bf215546Sopenharmony_ci        struct panfrost_ptr ptr = pan_pool_alloc_desc_array(&pool->base,
4558bf215546Sopenharmony_ci                                                            nr_variants,
4559bf215546Sopenharmony_ci                                                            SHADER_PROGRAM);
4560bf215546Sopenharmony_ci
4561bf215546Sopenharmony_ci        state->state = panfrost_pool_take_ref(pool, ptr.gpu);
4562bf215546Sopenharmony_ci
4563bf215546Sopenharmony_ci        /* Generic, or IDVS/points */
4564bf215546Sopenharmony_ci        pan_pack(ptr.cpu, SHADER_PROGRAM, cfg) {
4565bf215546Sopenharmony_ci                cfg.stage = pan_shader_stage(&state->info);
4566bf215546Sopenharmony_ci                cfg.primary_shader = true;
4567bf215546Sopenharmony_ci                cfg.register_allocation = pan_register_allocation(state->info.work_reg_count);
4568bf215546Sopenharmony_ci                cfg.binary = state->bin.gpu;
4569bf215546Sopenharmony_ci                cfg.preload.r48_r63 = (state->info.preload >> 48);
4570bf215546Sopenharmony_ci
4571bf215546Sopenharmony_ci                if (cfg.stage == MALI_SHADER_STAGE_FRAGMENT)
4572bf215546Sopenharmony_ci                        cfg.requires_helper_threads = state->info.contains_barrier;
4573bf215546Sopenharmony_ci        }
4574bf215546Sopenharmony_ci
4575bf215546Sopenharmony_ci        if (!vs)
4576bf215546Sopenharmony_ci                return;
4577bf215546Sopenharmony_ci
4578bf215546Sopenharmony_ci        /* IDVS/triangles */
4579bf215546Sopenharmony_ci        pan_pack(ptr.cpu + pan_size(SHADER_PROGRAM), SHADER_PROGRAM, cfg) {
4580bf215546Sopenharmony_ci                cfg.stage = pan_shader_stage(&state->info);
4581bf215546Sopenharmony_ci                cfg.primary_shader = true;
4582bf215546Sopenharmony_ci                cfg.register_allocation = pan_register_allocation(state->info.work_reg_count);
4583bf215546Sopenharmony_ci                cfg.binary = state->bin.gpu + state->info.vs.no_psiz_offset;
4584bf215546Sopenharmony_ci                cfg.preload.r48_r63 = (state->info.preload >> 48);
4585bf215546Sopenharmony_ci        }
4586bf215546Sopenharmony_ci
4587bf215546Sopenharmony_ci        if (!secondary_enable)
4588bf215546Sopenharmony_ci                return;
4589bf215546Sopenharmony_ci
4590bf215546Sopenharmony_ci        pan_pack(ptr.cpu + (pan_size(SHADER_PROGRAM) * 2), SHADER_PROGRAM, cfg) {
4591bf215546Sopenharmony_ci                unsigned work_count = state->info.vs.secondary_work_reg_count;
4592bf215546Sopenharmony_ci
4593bf215546Sopenharmony_ci                cfg.stage = pan_shader_stage(&state->info);
4594bf215546Sopenharmony_ci                cfg.primary_shader = false;
4595bf215546Sopenharmony_ci                cfg.register_allocation = pan_register_allocation(work_count);
4596bf215546Sopenharmony_ci                cfg.binary = state->bin.gpu + state->info.vs.secondary_offset;
4597bf215546Sopenharmony_ci                cfg.preload.r48_r63 = (state->info.vs.secondary_preload >> 48);
4598bf215546Sopenharmony_ci        }
4599bf215546Sopenharmony_ci#endif
4600bf215546Sopenharmony_ci}
4601bf215546Sopenharmony_ci
4602bf215546Sopenharmony_cistatic void
4603bf215546Sopenharmony_cipanfrost_get_sample_position(struct pipe_context *context,
4604bf215546Sopenharmony_ci                             unsigned sample_count,
4605bf215546Sopenharmony_ci                             unsigned sample_index,
4606bf215546Sopenharmony_ci                             float *out_value)
4607bf215546Sopenharmony_ci{
4608bf215546Sopenharmony_ci        panfrost_query_sample_position(
4609bf215546Sopenharmony_ci                        panfrost_sample_pattern(sample_count),
4610bf215546Sopenharmony_ci                        sample_index,
4611bf215546Sopenharmony_ci                        out_value);
4612bf215546Sopenharmony_ci}
4613bf215546Sopenharmony_ci
4614bf215546Sopenharmony_cistatic void
4615bf215546Sopenharmony_ciscreen_destroy(struct pipe_screen *pscreen)
4616bf215546Sopenharmony_ci{
4617bf215546Sopenharmony_ci        struct panfrost_device *dev = pan_device(pscreen);
4618bf215546Sopenharmony_ci        GENX(pan_blitter_cleanup)(dev);
4619bf215546Sopenharmony_ci
4620bf215546Sopenharmony_ci#if PAN_GPU_INDIRECTS
4621bf215546Sopenharmony_ci        GENX(panfrost_cleanup_indirect_draw_shaders)(dev);
4622bf215546Sopenharmony_ci        GENX(pan_indirect_dispatch_cleanup)(dev);
4623bf215546Sopenharmony_ci#endif
4624bf215546Sopenharmony_ci}
4625bf215546Sopenharmony_ci
4626bf215546Sopenharmony_cistatic void
4627bf215546Sopenharmony_cipreload(struct panfrost_batch *batch, struct pan_fb_info *fb)
4628bf215546Sopenharmony_ci{
4629bf215546Sopenharmony_ci        GENX(pan_preload_fb)(&batch->pool.base, &batch->scoreboard, fb, batch->tls.gpu,
4630bf215546Sopenharmony_ci                             PAN_ARCH >= 6 ? batch->tiler_ctx.bifrost : 0, NULL);
4631bf215546Sopenharmony_ci}
4632bf215546Sopenharmony_ci
4633bf215546Sopenharmony_cistatic void
4634bf215546Sopenharmony_ciinit_batch(struct panfrost_batch *batch)
4635bf215546Sopenharmony_ci{
4636bf215546Sopenharmony_ci        /* Reserve the framebuffer and local storage descriptors */
4637bf215546Sopenharmony_ci        batch->framebuffer =
4638bf215546Sopenharmony_ci#if PAN_ARCH == 4
4639bf215546Sopenharmony_ci                pan_pool_alloc_desc(&batch->pool.base, FRAMEBUFFER);
4640bf215546Sopenharmony_ci#else
4641bf215546Sopenharmony_ci                pan_pool_alloc_desc_aggregate(&batch->pool.base,
4642bf215546Sopenharmony_ci                                              PAN_DESC(FRAMEBUFFER),
4643bf215546Sopenharmony_ci                                              PAN_DESC(ZS_CRC_EXTENSION),
4644bf215546Sopenharmony_ci                                              PAN_DESC_ARRAY(MAX2(batch->key.nr_cbufs, 1), RENDER_TARGET));
4645bf215546Sopenharmony_ci
4646bf215546Sopenharmony_ci                batch->framebuffer.gpu |= MALI_FBD_TAG_IS_MFBD;
4647bf215546Sopenharmony_ci#endif
4648bf215546Sopenharmony_ci
4649bf215546Sopenharmony_ci#if PAN_ARCH >= 6
4650bf215546Sopenharmony_ci        batch->tls = pan_pool_alloc_desc(&batch->pool.base, LOCAL_STORAGE);
4651bf215546Sopenharmony_ci#else
4652bf215546Sopenharmony_ci        /* On Midgard, the TLS is embedded in the FB descriptor */
4653bf215546Sopenharmony_ci        batch->tls = batch->framebuffer;
4654bf215546Sopenharmony_ci#endif
4655bf215546Sopenharmony_ci}
4656bf215546Sopenharmony_ci
4657bf215546Sopenharmony_cistatic void
4658bf215546Sopenharmony_cipanfrost_sampler_view_destroy(
4659bf215546Sopenharmony_ci        struct pipe_context *pctx,
4660bf215546Sopenharmony_ci        struct pipe_sampler_view *pview)
4661bf215546Sopenharmony_ci{
4662bf215546Sopenharmony_ci        struct panfrost_sampler_view *view = (struct panfrost_sampler_view *) pview;
4663bf215546Sopenharmony_ci
4664bf215546Sopenharmony_ci        pipe_resource_reference(&pview->texture, NULL);
4665bf215546Sopenharmony_ci        panfrost_bo_unreference(view->state.bo);
4666bf215546Sopenharmony_ci        ralloc_free(view);
4667bf215546Sopenharmony_ci}
4668bf215546Sopenharmony_ci
4669bf215546Sopenharmony_cistatic void
4670bf215546Sopenharmony_cicontext_init(struct pipe_context *pipe)
4671bf215546Sopenharmony_ci{
4672bf215546Sopenharmony_ci        pipe->draw_vbo           = panfrost_draw_vbo;
4673bf215546Sopenharmony_ci        pipe->launch_grid        = panfrost_launch_grid;
4674bf215546Sopenharmony_ci
4675bf215546Sopenharmony_ci        pipe->create_vertex_elements_state = panfrost_create_vertex_elements_state;
4676bf215546Sopenharmony_ci        pipe->create_rasterizer_state = panfrost_create_rasterizer_state;
4677bf215546Sopenharmony_ci        pipe->create_depth_stencil_alpha_state = panfrost_create_depth_stencil_state;
4678bf215546Sopenharmony_ci        pipe->create_sampler_view = panfrost_create_sampler_view;
4679bf215546Sopenharmony_ci        pipe->sampler_view_destroy = panfrost_sampler_view_destroy;
4680bf215546Sopenharmony_ci        pipe->create_sampler_state = panfrost_create_sampler_state;
4681bf215546Sopenharmony_ci        pipe->create_blend_state = panfrost_create_blend_state;
4682bf215546Sopenharmony_ci
4683bf215546Sopenharmony_ci        pipe->get_sample_position = panfrost_get_sample_position;
4684bf215546Sopenharmony_ci}
4685bf215546Sopenharmony_ci
4686bf215546Sopenharmony_ci#if PAN_ARCH <= 5
4687bf215546Sopenharmony_ci
4688bf215546Sopenharmony_ci/* Returns the polygon list's GPU address if available, or otherwise allocates
4689bf215546Sopenharmony_ci * the polygon list.  It's perfectly fast to use allocate/free BO directly,
4690bf215546Sopenharmony_ci * since we'll hit the BO cache and this is one-per-batch anyway. */
4691bf215546Sopenharmony_ci
4692bf215546Sopenharmony_cistatic mali_ptr
4693bf215546Sopenharmony_cibatch_get_polygon_list(struct panfrost_batch *batch)
4694bf215546Sopenharmony_ci{
4695bf215546Sopenharmony_ci        struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
4696bf215546Sopenharmony_ci
4697bf215546Sopenharmony_ci        if (!batch->tiler_ctx.midgard.polygon_list) {
4698bf215546Sopenharmony_ci                bool has_draws = batch->scoreboard.first_tiler != NULL;
4699bf215546Sopenharmony_ci                unsigned size =
4700bf215546Sopenharmony_ci                        panfrost_tiler_get_polygon_list_size(dev,
4701bf215546Sopenharmony_ci                                                             batch->key.width,
4702bf215546Sopenharmony_ci                                                             batch->key.height,
4703bf215546Sopenharmony_ci                                                             has_draws);
4704bf215546Sopenharmony_ci                size = util_next_power_of_two(size);
4705bf215546Sopenharmony_ci
4706bf215546Sopenharmony_ci                /* Create the BO as invisible if we can. In the non-hierarchical tiler case,
4707bf215546Sopenharmony_ci                 * we need to write the polygon list manually because there's not WRITE_VALUE
4708bf215546Sopenharmony_ci                 * job in the chain (maybe we should add one...). */
4709bf215546Sopenharmony_ci                bool init_polygon_list = !has_draws && dev->model->quirks.no_hierarchical_tiling;
4710bf215546Sopenharmony_ci                batch->tiler_ctx.midgard.polygon_list =
4711bf215546Sopenharmony_ci                        panfrost_batch_create_bo(batch, size,
4712bf215546Sopenharmony_ci                                                 init_polygon_list ? 0 : PAN_BO_INVISIBLE,
4713bf215546Sopenharmony_ci                                                 PIPE_SHADER_VERTEX,
4714bf215546Sopenharmony_ci                                                 "Polygon list");
4715bf215546Sopenharmony_ci                panfrost_batch_add_bo(batch, batch->tiler_ctx.midgard.polygon_list,
4716bf215546Sopenharmony_ci                                PIPE_SHADER_FRAGMENT);
4717bf215546Sopenharmony_ci
4718bf215546Sopenharmony_ci                if (init_polygon_list) {
4719bf215546Sopenharmony_ci                        assert(batch->tiler_ctx.midgard.polygon_list->ptr.cpu);
4720bf215546Sopenharmony_ci                        uint32_t *polygon_list_body =
4721bf215546Sopenharmony_ci                                batch->tiler_ctx.midgard.polygon_list->ptr.cpu +
4722bf215546Sopenharmony_ci                                MALI_MIDGARD_TILER_MINIMUM_HEADER_SIZE;
4723bf215546Sopenharmony_ci
4724bf215546Sopenharmony_ci                        /* Magic for Mali T720 */
4725bf215546Sopenharmony_ci                        polygon_list_body[0] = 0xa0000000;
4726bf215546Sopenharmony_ci                }
4727bf215546Sopenharmony_ci
4728bf215546Sopenharmony_ci                batch->tiler_ctx.midgard.disable = !has_draws;
4729bf215546Sopenharmony_ci        }
4730bf215546Sopenharmony_ci
4731bf215546Sopenharmony_ci        return batch->tiler_ctx.midgard.polygon_list->ptr.gpu;
4732bf215546Sopenharmony_ci}
4733bf215546Sopenharmony_ci#endif
4734bf215546Sopenharmony_ci
4735bf215546Sopenharmony_cistatic void
4736bf215546Sopenharmony_ciinit_polygon_list(struct panfrost_batch *batch)
4737bf215546Sopenharmony_ci{
4738bf215546Sopenharmony_ci#if PAN_ARCH <= 5
4739bf215546Sopenharmony_ci        mali_ptr polygon_list = batch_get_polygon_list(batch);
4740bf215546Sopenharmony_ci        panfrost_scoreboard_initialize_tiler(&batch->pool.base,
4741bf215546Sopenharmony_ci                                             &batch->scoreboard,
4742bf215546Sopenharmony_ci                                             polygon_list);
4743bf215546Sopenharmony_ci#endif
4744bf215546Sopenharmony_ci}
4745bf215546Sopenharmony_ci
4746bf215546Sopenharmony_civoid
4747bf215546Sopenharmony_ciGENX(panfrost_cmdstream_screen_init)(struct panfrost_screen *screen)
4748bf215546Sopenharmony_ci{
4749bf215546Sopenharmony_ci        struct panfrost_device *dev = &screen->dev;
4750bf215546Sopenharmony_ci
4751bf215546Sopenharmony_ci        screen->vtbl.prepare_shader = prepare_shader;
4752bf215546Sopenharmony_ci        screen->vtbl.emit_tls    = emit_tls;
4753bf215546Sopenharmony_ci        screen->vtbl.emit_fbd    = emit_fbd;
4754bf215546Sopenharmony_ci        screen->vtbl.emit_fragment_job = emit_fragment_job;
4755bf215546Sopenharmony_ci        screen->vtbl.screen_destroy = screen_destroy;
4756bf215546Sopenharmony_ci        screen->vtbl.preload     = preload;
4757bf215546Sopenharmony_ci        screen->vtbl.context_init = context_init;
4758bf215546Sopenharmony_ci        screen->vtbl.init_batch = init_batch;
4759bf215546Sopenharmony_ci        screen->vtbl.get_blend_shader = GENX(pan_blend_get_shader_locked);
4760bf215546Sopenharmony_ci        screen->vtbl.init_polygon_list = init_polygon_list;
4761bf215546Sopenharmony_ci        screen->vtbl.get_compiler_options = GENX(pan_shader_get_compiler_options);
4762bf215546Sopenharmony_ci        screen->vtbl.compile_shader = GENX(pan_shader_compile);
4763bf215546Sopenharmony_ci
4764bf215546Sopenharmony_ci        GENX(pan_blitter_init)(dev, &screen->blitter.bin_pool.base,
4765bf215546Sopenharmony_ci                               &screen->blitter.desc_pool.base);
4766bf215546Sopenharmony_ci#if PAN_GPU_INDIRECTS
4767bf215546Sopenharmony_ci        GENX(pan_indirect_dispatch_init)(dev);
4768bf215546Sopenharmony_ci        GENX(panfrost_init_indirect_draw_shaders)(dev, &screen->indirect_draw.bin_pool.base);
4769bf215546Sopenharmony_ci#endif
4770bf215546Sopenharmony_ci}
4771