1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright (C) 2020 Collabora Ltd.
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21bf215546Sopenharmony_ci * SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ci#include "pan_context.h"
25bf215546Sopenharmony_ci#include "util/u_vbuf.h"
26bf215546Sopenharmony_ci
27bf215546Sopenharmony_civoid
28bf215546Sopenharmony_cipanfrost_analyze_sysvals(struct panfrost_shader_state *ss)
29bf215546Sopenharmony_ci{
30bf215546Sopenharmony_ci        unsigned dirty = 0;
31bf215546Sopenharmony_ci        unsigned dirty_shader = PAN_DIRTY_STAGE_SHADER | PAN_DIRTY_STAGE_CONST;
32bf215546Sopenharmony_ci
33bf215546Sopenharmony_ci        for (unsigned i = 0; i < ss->info.sysvals.sysval_count; ++i) {
34bf215546Sopenharmony_ci                switch (PAN_SYSVAL_TYPE(ss->info.sysvals.sysvals[i])) {
35bf215546Sopenharmony_ci                case PAN_SYSVAL_VIEWPORT_SCALE:
36bf215546Sopenharmony_ci                case PAN_SYSVAL_VIEWPORT_OFFSET:
37bf215546Sopenharmony_ci                        dirty |= PAN_DIRTY_VIEWPORT;
38bf215546Sopenharmony_ci                        break;
39bf215546Sopenharmony_ci
40bf215546Sopenharmony_ci                case PAN_SYSVAL_TEXTURE_SIZE:
41bf215546Sopenharmony_ci                        dirty_shader |= PAN_DIRTY_STAGE_TEXTURE;
42bf215546Sopenharmony_ci                        break;
43bf215546Sopenharmony_ci
44bf215546Sopenharmony_ci                case PAN_SYSVAL_SSBO:
45bf215546Sopenharmony_ci                        dirty_shader |= PAN_DIRTY_STAGE_SSBO;
46bf215546Sopenharmony_ci                        break;
47bf215546Sopenharmony_ci
48bf215546Sopenharmony_ci                case PAN_SYSVAL_XFB:
49bf215546Sopenharmony_ci                        dirty |= PAN_DIRTY_SO;
50bf215546Sopenharmony_ci                        break;
51bf215546Sopenharmony_ci
52bf215546Sopenharmony_ci                case PAN_SYSVAL_SAMPLER:
53bf215546Sopenharmony_ci                        dirty_shader |= PAN_DIRTY_STAGE_SAMPLER;
54bf215546Sopenharmony_ci                        break;
55bf215546Sopenharmony_ci
56bf215546Sopenharmony_ci                case PAN_SYSVAL_IMAGE_SIZE:
57bf215546Sopenharmony_ci                        dirty_shader |= PAN_DIRTY_STAGE_IMAGE;
58bf215546Sopenharmony_ci                        break;
59bf215546Sopenharmony_ci
60bf215546Sopenharmony_ci                case PAN_SYSVAL_NUM_WORK_GROUPS:
61bf215546Sopenharmony_ci                case PAN_SYSVAL_LOCAL_GROUP_SIZE:
62bf215546Sopenharmony_ci                case PAN_SYSVAL_WORK_DIM:
63bf215546Sopenharmony_ci                case PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS:
64bf215546Sopenharmony_ci                case PAN_SYSVAL_NUM_VERTICES:
65bf215546Sopenharmony_ci                        dirty |= PAN_DIRTY_PARAMS;
66bf215546Sopenharmony_ci                        break;
67bf215546Sopenharmony_ci
68bf215546Sopenharmony_ci                case PAN_SYSVAL_DRAWID:
69bf215546Sopenharmony_ci                        dirty |= PAN_DIRTY_DRAWID;
70bf215546Sopenharmony_ci                        break;
71bf215546Sopenharmony_ci
72bf215546Sopenharmony_ci                case PAN_SYSVAL_SAMPLE_POSITIONS:
73bf215546Sopenharmony_ci                case PAN_SYSVAL_MULTISAMPLED:
74bf215546Sopenharmony_ci                case PAN_SYSVAL_RT_CONVERSION:
75bf215546Sopenharmony_ci                        /* Nothing beyond the batch itself */
76bf215546Sopenharmony_ci                        break;
77bf215546Sopenharmony_ci                default:
78bf215546Sopenharmony_ci                        unreachable("Invalid sysval");
79bf215546Sopenharmony_ci                }
80bf215546Sopenharmony_ci        }
81bf215546Sopenharmony_ci
82bf215546Sopenharmony_ci        ss->dirty_3d = dirty;
83bf215546Sopenharmony_ci        ss->dirty_shader = dirty_shader;
84bf215546Sopenharmony_ci}
85bf215546Sopenharmony_ci
86bf215546Sopenharmony_ci/*
87bf215546Sopenharmony_ci * Gets a GPU address for the associated index buffer. Only gauranteed to be
88bf215546Sopenharmony_ci * good for the duration of the draw (transient), could last longer. Bounds are
89bf215546Sopenharmony_ci * not calculated.
90bf215546Sopenharmony_ci */
91bf215546Sopenharmony_cimali_ptr
92bf215546Sopenharmony_cipanfrost_get_index_buffer(struct panfrost_batch *batch,
93bf215546Sopenharmony_ci                          const struct pipe_draw_info *info,
94bf215546Sopenharmony_ci                          const struct pipe_draw_start_count_bias *draw)
95bf215546Sopenharmony_ci{
96bf215546Sopenharmony_ci        struct panfrost_resource *rsrc = pan_resource(info->index.resource);
97bf215546Sopenharmony_ci        off_t offset = draw->start * info->index_size;
98bf215546Sopenharmony_ci
99bf215546Sopenharmony_ci        if (!info->has_user_indices) {
100bf215546Sopenharmony_ci                /* Only resources can be directly mapped */
101bf215546Sopenharmony_ci                panfrost_batch_read_rsrc(batch, rsrc, PIPE_SHADER_VERTEX);
102bf215546Sopenharmony_ci                return rsrc->image.data.bo->ptr.gpu + offset;
103bf215546Sopenharmony_ci        } else {
104bf215546Sopenharmony_ci                /* Otherwise, we need to upload to transient memory */
105bf215546Sopenharmony_ci                const uint8_t *ibuf8 = (const uint8_t *) info->index.user;
106bf215546Sopenharmony_ci                struct panfrost_ptr T =
107bf215546Sopenharmony_ci                        pan_pool_alloc_aligned(&batch->pool.base,
108bf215546Sopenharmony_ci                                               draw->count *
109bf215546Sopenharmony_ci                                               info->index_size,
110bf215546Sopenharmony_ci                                               info->index_size);
111bf215546Sopenharmony_ci
112bf215546Sopenharmony_ci                memcpy(T.cpu, ibuf8 + offset, draw->count * info->index_size);
113bf215546Sopenharmony_ci                return T.gpu;
114bf215546Sopenharmony_ci        }
115bf215546Sopenharmony_ci}
116bf215546Sopenharmony_ci
117bf215546Sopenharmony_ci/* Gets a GPU address for the associated index buffer. Only gauranteed to be
118bf215546Sopenharmony_ci * good for the duration of the draw (transient), could last longer. Also get
119bf215546Sopenharmony_ci * the bounds on the index buffer for the range accessed by the draw. We do
120bf215546Sopenharmony_ci * these operations together because there are natural optimizations which
121bf215546Sopenharmony_ci * require them to be together. */
122bf215546Sopenharmony_ci
123bf215546Sopenharmony_cimali_ptr
124bf215546Sopenharmony_cipanfrost_get_index_buffer_bounded(struct panfrost_batch *batch,
125bf215546Sopenharmony_ci                                  const struct pipe_draw_info *info,
126bf215546Sopenharmony_ci                                  const struct pipe_draw_start_count_bias *draw,
127bf215546Sopenharmony_ci                                  unsigned *min_index, unsigned *max_index)
128bf215546Sopenharmony_ci{
129bf215546Sopenharmony_ci        struct panfrost_resource *rsrc = pan_resource(info->index.resource);
130bf215546Sopenharmony_ci        struct panfrost_context *ctx = batch->ctx;
131bf215546Sopenharmony_ci        bool needs_indices = true;
132bf215546Sopenharmony_ci
133bf215546Sopenharmony_ci        if (info->index_bounds_valid) {
134bf215546Sopenharmony_ci                *min_index = info->min_index;
135bf215546Sopenharmony_ci                *max_index = info->max_index;
136bf215546Sopenharmony_ci                needs_indices = false;
137bf215546Sopenharmony_ci        } else if (!info->has_user_indices) {
138bf215546Sopenharmony_ci                /* Check the cache */
139bf215546Sopenharmony_ci                needs_indices = !panfrost_minmax_cache_get(rsrc->index_cache,
140bf215546Sopenharmony_ci                                                           draw->start,
141bf215546Sopenharmony_ci                                                           draw->count,
142bf215546Sopenharmony_ci                                                           min_index,
143bf215546Sopenharmony_ci                                                           max_index);
144bf215546Sopenharmony_ci        }
145bf215546Sopenharmony_ci
146bf215546Sopenharmony_ci        if (needs_indices) {
147bf215546Sopenharmony_ci                /* Fallback */
148bf215546Sopenharmony_ci                u_vbuf_get_minmax_index(&ctx->base, info, draw, min_index, max_index);
149bf215546Sopenharmony_ci
150bf215546Sopenharmony_ci                if (!info->has_user_indices)
151bf215546Sopenharmony_ci                        panfrost_minmax_cache_add(rsrc->index_cache,
152bf215546Sopenharmony_ci                                                  draw->start, draw->count,
153bf215546Sopenharmony_ci                                                  *min_index, *max_index);
154bf215546Sopenharmony_ci        }
155bf215546Sopenharmony_ci
156bf215546Sopenharmony_ci        return panfrost_get_index_buffer(batch, info, draw);
157bf215546Sopenharmony_ci}
158bf215546Sopenharmony_ci
159bf215546Sopenharmony_ci/**
160bf215546Sopenharmony_ci * Given an (index, divisor) tuple, assign a vertex buffer. Midgard and
161bf215546Sopenharmony_ci * Bifrost put divisor information on the attribute buffer descriptor, so this
162bf215546Sopenharmony_ci * is the most we can compact in general. Crucially, this runs at vertex
163bf215546Sopenharmony_ci * elements CSO create time, not at draw time.
164bf215546Sopenharmony_ci */
165bf215546Sopenharmony_ciunsigned
166bf215546Sopenharmony_cipan_assign_vertex_buffer(struct pan_vertex_buffer *buffers,
167bf215546Sopenharmony_ci                         unsigned *nr_bufs,
168bf215546Sopenharmony_ci                         unsigned vbi,
169bf215546Sopenharmony_ci                         unsigned divisor)
170bf215546Sopenharmony_ci{
171bf215546Sopenharmony_ci        /* Look up the buffer */
172bf215546Sopenharmony_ci        for (unsigned i = 0; i < (*nr_bufs); ++i) {
173bf215546Sopenharmony_ci                if (buffers[i].vbi == vbi && buffers[i].divisor == divisor)
174bf215546Sopenharmony_ci                        return i;
175bf215546Sopenharmony_ci        }
176bf215546Sopenharmony_ci
177bf215546Sopenharmony_ci        /* Else, create a new buffer */
178bf215546Sopenharmony_ci        unsigned idx = (*nr_bufs)++;
179bf215546Sopenharmony_ci
180bf215546Sopenharmony_ci        buffers[idx] = (struct pan_vertex_buffer) {
181bf215546Sopenharmony_ci                .vbi = vbi,
182bf215546Sopenharmony_ci                .divisor = divisor
183bf215546Sopenharmony_ci        };
184bf215546Sopenharmony_ci
185bf215546Sopenharmony_ci        return idx;
186bf215546Sopenharmony_ci}
187bf215546Sopenharmony_ci
188bf215546Sopenharmony_ci/*
189bf215546Sopenharmony_ci * Helper to add a PIPE_CLEAR_* to batch->draws and batch->resolve together,
190bf215546Sopenharmony_ci * meaning that we draw to a given target. Adding to only one mask does not
191bf215546Sopenharmony_ci * generally make sense, except for clears which add to batch->clear and
192bf215546Sopenharmony_ci * batch->resolve together.
193bf215546Sopenharmony_ci */
194bf215546Sopenharmony_cistatic void
195bf215546Sopenharmony_cipanfrost_draw_target(struct panfrost_batch *batch, unsigned target)
196bf215546Sopenharmony_ci{
197bf215546Sopenharmony_ci        batch->draws |= target;
198bf215546Sopenharmony_ci        batch->resolve |= target;
199bf215546Sopenharmony_ci}
200bf215546Sopenharmony_ci
201bf215546Sopenharmony_ci/*
202bf215546Sopenharmony_ci * Draw time helper to set batch->{read, draws, resolve} based on current blend
203bf215546Sopenharmony_ci * and depth-stencil state. To be called when blend or depth/stencil dirty state
204bf215546Sopenharmony_ci * respectively changes.
205bf215546Sopenharmony_ci */
206bf215546Sopenharmony_civoid
207bf215546Sopenharmony_cipanfrost_set_batch_masks_blend(struct panfrost_batch *batch)
208bf215546Sopenharmony_ci{
209bf215546Sopenharmony_ci        struct panfrost_context *ctx = batch->ctx;
210bf215546Sopenharmony_ci        struct panfrost_blend_state *blend = ctx->blend;
211bf215546Sopenharmony_ci
212bf215546Sopenharmony_ci        for (unsigned i = 0; i < batch->key.nr_cbufs; ++i) {
213bf215546Sopenharmony_ci                if (!blend->info[i].no_colour && batch->key.cbufs[i])
214bf215546Sopenharmony_ci                        panfrost_draw_target(batch, PIPE_CLEAR_COLOR0 << i);
215bf215546Sopenharmony_ci        }
216bf215546Sopenharmony_ci}
217bf215546Sopenharmony_ci
218bf215546Sopenharmony_civoid
219bf215546Sopenharmony_cipanfrost_set_batch_masks_zs(struct panfrost_batch *batch)
220bf215546Sopenharmony_ci{
221bf215546Sopenharmony_ci        struct panfrost_context *ctx = batch->ctx;
222bf215546Sopenharmony_ci        struct pipe_depth_stencil_alpha_state *zsa = (void *) ctx->depth_stencil;
223bf215546Sopenharmony_ci
224bf215546Sopenharmony_ci        /* Assume depth is read (TODO: perf) */
225bf215546Sopenharmony_ci        if (zsa->depth_enabled)
226bf215546Sopenharmony_ci                batch->read |= PIPE_CLEAR_DEPTH;
227bf215546Sopenharmony_ci
228bf215546Sopenharmony_ci        if (zsa->depth_writemask)
229bf215546Sopenharmony_ci                panfrost_draw_target(batch, PIPE_CLEAR_DEPTH);
230bf215546Sopenharmony_ci
231bf215546Sopenharmony_ci        if (zsa->stencil[0].enabled) {
232bf215546Sopenharmony_ci                panfrost_draw_target(batch, PIPE_CLEAR_STENCIL);
233bf215546Sopenharmony_ci
234bf215546Sopenharmony_ci                /* Assume stencil is read (TODO: perf) */
235bf215546Sopenharmony_ci                batch->read |= PIPE_CLEAR_STENCIL;
236bf215546Sopenharmony_ci        }
237bf215546Sopenharmony_ci}
238bf215546Sopenharmony_ci
239bf215546Sopenharmony_civoid
240bf215546Sopenharmony_cipanfrost_track_image_access(struct panfrost_batch *batch,
241bf215546Sopenharmony_ci                            enum pipe_shader_type stage,
242bf215546Sopenharmony_ci                            struct pipe_image_view *image)
243bf215546Sopenharmony_ci{
244bf215546Sopenharmony_ci        struct panfrost_resource *rsrc = pan_resource(image->resource);
245bf215546Sopenharmony_ci
246bf215546Sopenharmony_ci        if (image->shader_access & PIPE_IMAGE_ACCESS_WRITE) {
247bf215546Sopenharmony_ci                panfrost_batch_write_rsrc(batch, rsrc, stage);
248bf215546Sopenharmony_ci
249bf215546Sopenharmony_ci                bool is_buffer = rsrc->base.target == PIPE_BUFFER;
250bf215546Sopenharmony_ci                unsigned level = is_buffer ? 0 : image->u.tex.level;
251bf215546Sopenharmony_ci                BITSET_SET(rsrc->valid.data, level);
252bf215546Sopenharmony_ci
253bf215546Sopenharmony_ci                if (is_buffer) {
254bf215546Sopenharmony_ci                        util_range_add(&rsrc->base, &rsrc->valid_buffer_range,
255bf215546Sopenharmony_ci                                        0, rsrc->base.width0);
256bf215546Sopenharmony_ci                }
257bf215546Sopenharmony_ci        } else {
258bf215546Sopenharmony_ci                panfrost_batch_read_rsrc(batch, rsrc, stage);
259bf215546Sopenharmony_ci        }
260bf215546Sopenharmony_ci}
261bf215546Sopenharmony_ci
262