1/*
2 * Copyright (C) 2020 Collabora Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24#include "pan_context.h"
25#include "util/u_vbuf.h"
26
27void
28panfrost_analyze_sysvals(struct panfrost_shader_state *ss)
29{
30        unsigned dirty = 0;
31        unsigned dirty_shader = PAN_DIRTY_STAGE_SHADER | PAN_DIRTY_STAGE_CONST;
32
33        for (unsigned i = 0; i < ss->info.sysvals.sysval_count; ++i) {
34                switch (PAN_SYSVAL_TYPE(ss->info.sysvals.sysvals[i])) {
35                case PAN_SYSVAL_VIEWPORT_SCALE:
36                case PAN_SYSVAL_VIEWPORT_OFFSET:
37                        dirty |= PAN_DIRTY_VIEWPORT;
38                        break;
39
40                case PAN_SYSVAL_TEXTURE_SIZE:
41                        dirty_shader |= PAN_DIRTY_STAGE_TEXTURE;
42                        break;
43
44                case PAN_SYSVAL_SSBO:
45                        dirty_shader |= PAN_DIRTY_STAGE_SSBO;
46                        break;
47
48                case PAN_SYSVAL_XFB:
49                        dirty |= PAN_DIRTY_SO;
50                        break;
51
52                case PAN_SYSVAL_SAMPLER:
53                        dirty_shader |= PAN_DIRTY_STAGE_SAMPLER;
54                        break;
55
56                case PAN_SYSVAL_IMAGE_SIZE:
57                        dirty_shader |= PAN_DIRTY_STAGE_IMAGE;
58                        break;
59
60                case PAN_SYSVAL_NUM_WORK_GROUPS:
61                case PAN_SYSVAL_LOCAL_GROUP_SIZE:
62                case PAN_SYSVAL_WORK_DIM:
63                case PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS:
64                case PAN_SYSVAL_NUM_VERTICES:
65                        dirty |= PAN_DIRTY_PARAMS;
66                        break;
67
68                case PAN_SYSVAL_DRAWID:
69                        dirty |= PAN_DIRTY_DRAWID;
70                        break;
71
72                case PAN_SYSVAL_SAMPLE_POSITIONS:
73                case PAN_SYSVAL_MULTISAMPLED:
74                case PAN_SYSVAL_RT_CONVERSION:
75                        /* Nothing beyond the batch itself */
76                        break;
77                default:
78                        unreachable("Invalid sysval");
79                }
80        }
81
82        ss->dirty_3d = dirty;
83        ss->dirty_shader = dirty_shader;
84}
85
86/*
87 * Gets a GPU address for the associated index buffer. Only gauranteed to be
88 * good for the duration of the draw (transient), could last longer. Bounds are
89 * not calculated.
90 */
91mali_ptr
92panfrost_get_index_buffer(struct panfrost_batch *batch,
93                          const struct pipe_draw_info *info,
94                          const struct pipe_draw_start_count_bias *draw)
95{
96        struct panfrost_resource *rsrc = pan_resource(info->index.resource);
97        off_t offset = draw->start * info->index_size;
98
99        if (!info->has_user_indices) {
100                /* Only resources can be directly mapped */
101                panfrost_batch_read_rsrc(batch, rsrc, PIPE_SHADER_VERTEX);
102                return rsrc->image.data.bo->ptr.gpu + offset;
103        } else {
104                /* Otherwise, we need to upload to transient memory */
105                const uint8_t *ibuf8 = (const uint8_t *) info->index.user;
106                struct panfrost_ptr T =
107                        pan_pool_alloc_aligned(&batch->pool.base,
108                                               draw->count *
109                                               info->index_size,
110                                               info->index_size);
111
112                memcpy(T.cpu, ibuf8 + offset, draw->count * info->index_size);
113                return T.gpu;
114        }
115}
116
117/* Gets a GPU address for the associated index buffer. Only gauranteed to be
118 * good for the duration of the draw (transient), could last longer. Also get
119 * the bounds on the index buffer for the range accessed by the draw. We do
120 * these operations together because there are natural optimizations which
121 * require them to be together. */
122
123mali_ptr
124panfrost_get_index_buffer_bounded(struct panfrost_batch *batch,
125                                  const struct pipe_draw_info *info,
126                                  const struct pipe_draw_start_count_bias *draw,
127                                  unsigned *min_index, unsigned *max_index)
128{
129        struct panfrost_resource *rsrc = pan_resource(info->index.resource);
130        struct panfrost_context *ctx = batch->ctx;
131        bool needs_indices = true;
132
133        if (info->index_bounds_valid) {
134                *min_index = info->min_index;
135                *max_index = info->max_index;
136                needs_indices = false;
137        } else if (!info->has_user_indices) {
138                /* Check the cache */
139                needs_indices = !panfrost_minmax_cache_get(rsrc->index_cache,
140                                                           draw->start,
141                                                           draw->count,
142                                                           min_index,
143                                                           max_index);
144        }
145
146        if (needs_indices) {
147                /* Fallback */
148                u_vbuf_get_minmax_index(&ctx->base, info, draw, min_index, max_index);
149
150                if (!info->has_user_indices)
151                        panfrost_minmax_cache_add(rsrc->index_cache,
152                                                  draw->start, draw->count,
153                                                  *min_index, *max_index);
154        }
155
156        return panfrost_get_index_buffer(batch, info, draw);
157}
158
159/**
160 * Given an (index, divisor) tuple, assign a vertex buffer. Midgard and
161 * Bifrost put divisor information on the attribute buffer descriptor, so this
162 * is the most we can compact in general. Crucially, this runs at vertex
163 * elements CSO create time, not at draw time.
164 */
165unsigned
166pan_assign_vertex_buffer(struct pan_vertex_buffer *buffers,
167                         unsigned *nr_bufs,
168                         unsigned vbi,
169                         unsigned divisor)
170{
171        /* Look up the buffer */
172        for (unsigned i = 0; i < (*nr_bufs); ++i) {
173                if (buffers[i].vbi == vbi && buffers[i].divisor == divisor)
174                        return i;
175        }
176
177        /* Else, create a new buffer */
178        unsigned idx = (*nr_bufs)++;
179
180        buffers[idx] = (struct pan_vertex_buffer) {
181                .vbi = vbi,
182                .divisor = divisor
183        };
184
185        return idx;
186}
187
188/*
189 * Helper to add a PIPE_CLEAR_* to batch->draws and batch->resolve together,
190 * meaning that we draw to a given target. Adding to only one mask does not
191 * generally make sense, except for clears which add to batch->clear and
192 * batch->resolve together.
193 */
194static void
195panfrost_draw_target(struct panfrost_batch *batch, unsigned target)
196{
197        batch->draws |= target;
198        batch->resolve |= target;
199}
200
201/*
202 * Draw time helper to set batch->{read, draws, resolve} based on current blend
203 * and depth-stencil state. To be called when blend or depth/stencil dirty state
204 * respectively changes.
205 */
206void
207panfrost_set_batch_masks_blend(struct panfrost_batch *batch)
208{
209        struct panfrost_context *ctx = batch->ctx;
210        struct panfrost_blend_state *blend = ctx->blend;
211
212        for (unsigned i = 0; i < batch->key.nr_cbufs; ++i) {
213                if (!blend->info[i].no_colour && batch->key.cbufs[i])
214                        panfrost_draw_target(batch, PIPE_CLEAR_COLOR0 << i);
215        }
216}
217
218void
219panfrost_set_batch_masks_zs(struct panfrost_batch *batch)
220{
221        struct panfrost_context *ctx = batch->ctx;
222        struct pipe_depth_stencil_alpha_state *zsa = (void *) ctx->depth_stencil;
223
224        /* Assume depth is read (TODO: perf) */
225        if (zsa->depth_enabled)
226                batch->read |= PIPE_CLEAR_DEPTH;
227
228        if (zsa->depth_writemask)
229                panfrost_draw_target(batch, PIPE_CLEAR_DEPTH);
230
231        if (zsa->stencil[0].enabled) {
232                panfrost_draw_target(batch, PIPE_CLEAR_STENCIL);
233
234                /* Assume stencil is read (TODO: perf) */
235                batch->read |= PIPE_CLEAR_STENCIL;
236        }
237}
238
239void
240panfrost_track_image_access(struct panfrost_batch *batch,
241                            enum pipe_shader_type stage,
242                            struct pipe_image_view *image)
243{
244        struct panfrost_resource *rsrc = pan_resource(image->resource);
245
246        if (image->shader_access & PIPE_IMAGE_ACCESS_WRITE) {
247                panfrost_batch_write_rsrc(batch, rsrc, stage);
248
249                bool is_buffer = rsrc->base.target == PIPE_BUFFER;
250                unsigned level = is_buffer ? 0 : image->u.tex.level;
251                BITSET_SET(rsrc->valid.data, level);
252
253                if (is_buffer) {
254                        util_range_add(&rsrc->base, &rsrc->valid_buffer_range,
255                                        0, rsrc->base.width0);
256                }
257        } else {
258                panfrost_batch_read_rsrc(batch, rsrc, stage);
259        }
260}
261
262