1/*
2 * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
3 * Copyright © 2018 Google, Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25#include "fd6_const.h"
26#include "fd6_pack.h"
27
28#define emit_const_user fd6_emit_const_user
29#define emit_const_bo   fd6_emit_const_bo
30#include "ir3_const.h"
31
32/* regid:          base const register
33 * prsc or dwords: buffer containing constant values
34 * sizedwords:     size of const value buffer
35 */
36void
37fd6_emit_const_user(struct fd_ringbuffer *ring,
38                    const struct ir3_shader_variant *v, uint32_t regid,
39                    uint32_t sizedwords, const uint32_t *dwords)
40{
41   emit_const_asserts(ring, v, regid, sizedwords);
42
43   /* NOTE we cheat a bit here, since we know mesa is aligning
44    * the size of the user buffer to 16 bytes.  And we want to
45    * cut cycles in a hot path.
46    */
47   uint32_t align_sz = align(sizedwords, 4);
48
49   if (fd6_geom_stage(v->type)) {
50      OUT_PKTBUF(ring, CP_LOAD_STATE6_GEOM, dwords, align_sz,
51         CP_LOAD_STATE6_0(.dst_off = regid / 4, .state_type = ST6_CONSTANTS,
52                          .state_src = SS6_DIRECT,
53                          .state_block = fd6_stage2shadersb(v->type),
54                          .num_unit = DIV_ROUND_UP(sizedwords, 4)),
55         CP_LOAD_STATE6_1(),
56         CP_LOAD_STATE6_2());
57   } else {
58      OUT_PKTBUF(ring, CP_LOAD_STATE6_FRAG, dwords, align_sz,
59         CP_LOAD_STATE6_0(.dst_off = regid / 4, .state_type = ST6_CONSTANTS,
60                          .state_src = SS6_DIRECT,
61                          .state_block = fd6_stage2shadersb(v->type),
62                          .num_unit = DIV_ROUND_UP(sizedwords, 4)),
63         CP_LOAD_STATE6_1(),
64         CP_LOAD_STATE6_2());
65   }
66}
67void
68fd6_emit_const_bo(struct fd_ringbuffer *ring,
69                  const struct ir3_shader_variant *v, uint32_t regid,
70                  uint32_t offset, uint32_t sizedwords, struct fd_bo *bo)
71{
72   uint32_t dst_off = regid / 4;
73   assert(dst_off % 4 == 0);
74   uint32_t num_unit = DIV_ROUND_UP(sizedwords, 4);
75   assert(num_unit % 4 == 0);
76
77   emit_const_asserts(ring, v, regid, sizedwords);
78
79   if (fd6_geom_stage(v->type)) {
80      OUT_PKT(ring, CP_LOAD_STATE6_GEOM,
81              CP_LOAD_STATE6_0(.dst_off = dst_off, .state_type = ST6_CONSTANTS,
82                               .state_src = SS6_INDIRECT,
83                               .state_block = fd6_stage2shadersb(v->type),
84                               .num_unit = num_unit, ),
85              CP_LOAD_STATE6_EXT_SRC_ADDR(.bo = bo, .bo_offset = offset));
86   } else {
87      OUT_PKT(ring, CP_LOAD_STATE6_FRAG,
88              CP_LOAD_STATE6_0(.dst_off = dst_off, .state_type = ST6_CONSTANTS,
89                               .state_src = SS6_INDIRECT,
90                               .state_block = fd6_stage2shadersb(v->type),
91                               .num_unit = num_unit, ),
92              CP_LOAD_STATE6_EXT_SRC_ADDR(.bo = bo, .bo_offset = offset));
93   }
94}
95
96static bool
97is_stateobj(struct fd_ringbuffer *ring)
98{
99   return true;
100}
101
102static void
103emit_const_ptrs(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v,
104                uint32_t dst_offset, uint32_t num, struct fd_bo **bos,
105                uint32_t *offsets)
106{
107   unreachable("shouldn't be called on a6xx");
108}
109
110static void
111emit_stage_tess_consts(struct fd_ringbuffer *ring, struct ir3_shader_variant *v,
112                       uint32_t *params, int num_params)
113{
114   const struct ir3_const_state *const_state = ir3_const_state(v);
115   const unsigned regid = const_state->offsets.primitive_param;
116   int size = MIN2(1 + regid, v->constlen) - regid;
117   if (size > 0)
118      fd6_emit_const_user(ring, v, regid * 4, num_params, params);
119}
120
121struct fd_ringbuffer *
122fd6_build_tess_consts(struct fd6_emit *emit)
123{
124   struct fd_context *ctx = emit->ctx;
125
126   struct fd_ringbuffer *constobj = fd_submit_new_ringbuffer(
127      ctx->batch->submit, 0x1000, FD_RINGBUFFER_STREAMING);
128
129   /* VS sizes are in bytes since that's what STLW/LDLW use, while the HS
130    * size is dwords, since that's what LDG/STG use.
131    */
132   unsigned num_vertices = emit->hs
133                              ? emit->patch_vertices
134                              : emit->gs->gs.vertices_in;
135
136   uint32_t vs_params[4] = {
137      emit->vs->output_size * num_vertices * 4, /* vs primitive stride */
138      emit->vs->output_size * 4,                /* vs vertex stride */
139      0, 0};
140
141   emit_stage_tess_consts(constobj, emit->vs, vs_params, ARRAY_SIZE(vs_params));
142
143   if (emit->hs) {
144      uint32_t hs_params[4] = {
145         emit->vs->output_size * num_vertices * 4, /* vs primitive stride */
146         emit->vs->output_size * 4,                /* vs vertex stride */
147         emit->hs->output_size, emit->patch_vertices};
148
149      emit_stage_tess_consts(constobj, emit->hs, hs_params,
150                             ARRAY_SIZE(hs_params));
151
152      if (emit->gs)
153         num_vertices = emit->gs->gs.vertices_in;
154
155      uint32_t ds_params[4] = {
156         emit->ds->output_size * num_vertices * 4, /* ds primitive stride */
157         emit->ds->output_size * 4,                /* ds vertex stride */
158         emit->hs->output_size, /* hs vertex stride (dwords) */
159         emit->hs->tess.tcs_vertices_out};
160
161      emit_stage_tess_consts(constobj, emit->ds, ds_params,
162                             ARRAY_SIZE(ds_params));
163   }
164
165   if (emit->gs) {
166      struct ir3_shader_variant *prev;
167      if (emit->ds)
168         prev = emit->ds;
169      else
170         prev = emit->vs;
171
172      uint32_t gs_params[4] = {
173         prev->output_size * num_vertices * 4, /* ds primitive stride */
174         prev->output_size * 4,                /* ds vertex stride */
175         0,
176         0,
177      };
178
179      num_vertices = emit->gs->gs.vertices_in;
180      emit_stage_tess_consts(constobj, emit->gs, gs_params,
181                             ARRAY_SIZE(gs_params));
182   }
183
184   return constobj;
185}
186
187static void
188fd6_emit_ubos(struct fd_context *ctx, const struct ir3_shader_variant *v,
189              struct fd_ringbuffer *ring, struct fd_constbuf_stateobj *constbuf)
190{
191   const struct ir3_const_state *const_state = ir3_const_state(v);
192   int num_ubos = const_state->num_ubos;
193
194   if (!num_ubos)
195      return;
196
197   OUT_PKT7(ring, fd6_stage2opcode(v->type), 3 + (2 * num_ubos));
198   OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) |
199                     CP_LOAD_STATE6_0_STATE_TYPE(ST6_UBO) |
200                     CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
201                     CP_LOAD_STATE6_0_STATE_BLOCK(fd6_stage2shadersb(v->type)) |
202                     CP_LOAD_STATE6_0_NUM_UNIT(num_ubos));
203   OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
204   OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
205
206   for (int i = 0; i < num_ubos; i++) {
207      /* NIR constant data is packed into the end of the shader. */
208      if (i == const_state->constant_data_ubo) {
209         int size_vec4s = DIV_ROUND_UP(v->constant_data_size, 16);
210         OUT_RELOC(ring, v->bo, v->info.constant_data_offset,
211                   (uint64_t)A6XX_UBO_1_SIZE(size_vec4s) << 32, 0);
212         continue;
213      }
214
215      struct pipe_constant_buffer *cb = &constbuf->cb[i];
216
217      /* If we have user pointers (constbuf 0, aka GL uniforms), upload them
218       * to a buffer now, and save it in the constbuf so that we don't have
219       * to reupload until they get changed.
220       */
221      if (cb->user_buffer) {
222         struct pipe_context *pctx = &ctx->base;
223         u_upload_data(pctx->stream_uploader, 0, cb->buffer_size, 64,
224                       cb->user_buffer, &cb->buffer_offset, &cb->buffer);
225         cb->user_buffer = NULL;
226      }
227
228      if (cb->buffer) {
229         int size_vec4s = DIV_ROUND_UP(cb->buffer_size, 16);
230         OUT_RELOC(ring, fd_resource(cb->buffer)->bo, cb->buffer_offset,
231                   (uint64_t)A6XX_UBO_1_SIZE(size_vec4s) << 32, 0);
232      } else {
233         OUT_RING(ring, 0xbad00000 | (i << 16));
234         OUT_RING(ring, A6XX_UBO_1_SIZE(0));
235      }
236   }
237}
238
239static unsigned
240user_consts_cmdstream_size(struct ir3_shader_variant *v)
241{
242   struct ir3_const_state *const_state = ir3_const_state(v);
243   struct ir3_ubo_analysis_state *ubo_state = &const_state->ubo_state;
244
245   if (unlikely(!ubo_state->cmdstream_size)) {
246      unsigned packets, size;
247
248      /* pre-calculate size required for userconst stateobj: */
249      ir3_user_consts_size(ubo_state, &packets, &size);
250
251      /* also account for UBO addresses: */
252      packets += 1;
253      size += 2 * const_state->num_ubos;
254
255      unsigned sizedwords = (4 * packets) + size;
256      ubo_state->cmdstream_size = sizedwords * 4;
257   }
258
259   return ubo_state->cmdstream_size;
260}
261
262struct fd_ringbuffer *
263fd6_build_user_consts(struct fd6_emit *emit)
264{
265   static const enum pipe_shader_type types[] = {
266      PIPE_SHADER_VERTEX,   PIPE_SHADER_TESS_CTRL, PIPE_SHADER_TESS_EVAL,
267      PIPE_SHADER_GEOMETRY, PIPE_SHADER_FRAGMENT,
268   };
269   struct ir3_shader_variant *variants[] = {
270      emit->vs, emit->hs, emit->ds, emit->gs, emit->fs,
271   };
272   struct fd_context *ctx = emit->ctx;
273   unsigned sz = 0;
274
275   for (unsigned i = 0; i < ARRAY_SIZE(types); i++) {
276      if (!variants[i])
277         continue;
278      sz += user_consts_cmdstream_size(variants[i]);
279   }
280
281   struct fd_ringbuffer *constobj =
282      fd_submit_new_ringbuffer(ctx->batch->submit, sz, FD_RINGBUFFER_STREAMING);
283
284   for (unsigned i = 0; i < ARRAY_SIZE(types); i++) {
285      if (!variants[i])
286         continue;
287      ir3_emit_user_consts(ctx->screen, variants[i], constobj,
288                           &ctx->constbuf[types[i]]);
289      fd6_emit_ubos(ctx, variants[i], constobj, &ctx->constbuf[types[i]]);
290   }
291
292   return constobj;
293}
294
295struct fd_ringbuffer *
296fd6_build_driver_params(struct fd6_emit *emit)
297{
298   struct fd_context *ctx = emit->ctx;
299   struct fd6_context *fd6_ctx = fd6_context(ctx);
300   unsigned num_dp = 0;
301
302   if (emit->vs->need_driver_params)
303      num_dp++;
304
305   if (emit->gs && emit->gs->need_driver_params)
306      num_dp++;
307
308   if (emit->ds && emit->ds->need_driver_params)
309      num_dp++;
310
311   if (!num_dp) {
312      fd6_ctx->has_dp_state = false;
313      return NULL;
314   }
315
316   unsigned size_dwords = num_dp * (4 + IR3_DP_VS_COUNT);  /* 4dw PKT7 header */
317   struct fd_ringbuffer *dpconstobj = fd_submit_new_ringbuffer(
318         ctx->batch->submit, size_dwords * 4, FD_RINGBUFFER_STREAMING);
319
320   if (emit->vs->need_driver_params) {
321      ir3_emit_driver_params(emit->vs, dpconstobj, ctx, emit->info,
322                             emit->indirect, emit->draw);
323   }
324
325   if (emit->gs && emit->gs->need_driver_params) {
326      ir3_emit_driver_params(emit->gs, dpconstobj, ctx, emit->info,
327                             emit->indirect, emit->draw);
328   }
329
330   if (emit->ds && emit->ds->need_driver_params) {
331      ir3_emit_driver_params(emit->ds, dpconstobj, ctx, emit->info,
332                             emit->indirect, emit->draw);
333   }
334
335   fd6_ctx->has_dp_state = true;
336
337   return dpconstobj;
338}
339
340void
341fd6_emit_cs_consts(const struct ir3_shader_variant *v,
342                   struct fd_ringbuffer *ring, struct fd_context *ctx,
343                   const struct pipe_grid_info *info)
344{
345   ir3_emit_cs_consts(v, ring, ctx, info);
346   fd6_emit_ubos(ctx, v, ring, &ctx->constbuf[PIPE_SHADER_COMPUTE]);
347}
348
349void
350fd6_emit_immediates(struct fd_screen *screen,
351                    const struct ir3_shader_variant *v,
352                    struct fd_ringbuffer *ring)
353{
354   ir3_emit_immediates(screen, v, ring);
355}
356
357void
358fd6_emit_link_map(struct fd_screen *screen,
359                  const struct ir3_shader_variant *producer,
360                  const struct ir3_shader_variant *v,
361                  struct fd_ringbuffer *ring)
362{
363   ir3_emit_link_map(screen, producer, v, ring);
364}
365