1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21bf215546Sopenharmony_ci * SOFTWARE.
22bf215546Sopenharmony_ci *
23bf215546Sopenharmony_ci * Authors:
24bf215546Sopenharmony_ci *    Rob Clark <robclark@freedesktop.org>
25bf215546Sopenharmony_ci */
26bf215546Sopenharmony_ci
27bf215546Sopenharmony_ci#include "ir3/ir3_nir.h"
28bf215546Sopenharmony_ci
29bf215546Sopenharmony_ci/* This has to reach into the fd_context a bit more than the rest of
30bf215546Sopenharmony_ci * ir3, but it needs to be aligned with the compiler, so both agree
31bf215546Sopenharmony_ci * on which const regs hold what.  And the logic is identical between
32bf215546Sopenharmony_ci * ir3 generations, the only difference is small details in the actual
33bf215546Sopenharmony_ci * CP_LOAD_STATE packets (which is handled inside the generation
34bf215546Sopenharmony_ci * specific ctx->emit_const(_bo)() fxns)
35bf215546Sopenharmony_ci *
36bf215546Sopenharmony_ci * This file should be included in only a single .c file per gen, which
37bf215546Sopenharmony_ci * defines the following functions:
38bf215546Sopenharmony_ci */
39bf215546Sopenharmony_ci
40bf215546Sopenharmony_cistatic bool is_stateobj(struct fd_ringbuffer *ring);
41bf215546Sopenharmony_ci
42bf215546Sopenharmony_cistatic void emit_const_user(struct fd_ringbuffer *ring,
43bf215546Sopenharmony_ci                            const struct ir3_shader_variant *v, uint32_t regid,
44bf215546Sopenharmony_ci                            uint32_t size, const uint32_t *user_buffer);
45bf215546Sopenharmony_ci
46bf215546Sopenharmony_cistatic void emit_const_bo(struct fd_ringbuffer *ring,
47bf215546Sopenharmony_ci                          const struct ir3_shader_variant *v, uint32_t regid,
48bf215546Sopenharmony_ci                          uint32_t offset, uint32_t size, struct fd_bo *bo);
49bf215546Sopenharmony_ci
50bf215546Sopenharmony_cistatic void
51bf215546Sopenharmony_ciemit_const_prsc(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v,
52bf215546Sopenharmony_ci                uint32_t regid, uint32_t offset, uint32_t size,
53bf215546Sopenharmony_ci                struct pipe_resource *buffer)
54bf215546Sopenharmony_ci{
55bf215546Sopenharmony_ci   struct fd_resource *rsc = fd_resource(buffer);
56bf215546Sopenharmony_ci   emit_const_bo(ring, v, regid, offset, size, rsc->bo);
57bf215546Sopenharmony_ci}
58bf215546Sopenharmony_ci
59bf215546Sopenharmony_cistatic void emit_const_ptrs(struct fd_ringbuffer *ring,
60bf215546Sopenharmony_ci                            const struct ir3_shader_variant *v,
61bf215546Sopenharmony_ci                            uint32_t dst_offset, uint32_t num,
62bf215546Sopenharmony_ci                            struct fd_bo **bos, uint32_t *offsets);
63bf215546Sopenharmony_ci
64bf215546Sopenharmony_cistatic void
65bf215546Sopenharmony_ciemit_const_asserts(struct fd_ringbuffer *ring,
66bf215546Sopenharmony_ci                   const struct ir3_shader_variant *v, uint32_t regid,
67bf215546Sopenharmony_ci                   uint32_t sizedwords)
68bf215546Sopenharmony_ci{
69bf215546Sopenharmony_ci   assert((regid % 4) == 0);
70bf215546Sopenharmony_ci   assert((sizedwords % 4) == 0);
71bf215546Sopenharmony_ci   assert(regid + sizedwords <= v->constlen * 4);
72bf215546Sopenharmony_ci}
73bf215546Sopenharmony_ci
74bf215546Sopenharmony_cistatic void
75bf215546Sopenharmony_ciring_wfi(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt
76bf215546Sopenharmony_ci{
77bf215546Sopenharmony_ci   /* when we emit const state via ring (IB2) we need a WFI, but when
78bf215546Sopenharmony_ci    * it is emit'd via stateobj, we don't
79bf215546Sopenharmony_ci    */
80bf215546Sopenharmony_ci   if (is_stateobj(ring))
81bf215546Sopenharmony_ci      return;
82bf215546Sopenharmony_ci
83bf215546Sopenharmony_ci   fd_wfi(batch, ring);
84bf215546Sopenharmony_ci}
85bf215546Sopenharmony_ci
86bf215546Sopenharmony_ci/**
87bf215546Sopenharmony_ci * Indirectly calculates size of cmdstream needed for ir3_emit_user_consts().
88bf215546Sopenharmony_ci * Returns number of packets, and total size of all the payload.
89bf215546Sopenharmony_ci *
90bf215546Sopenharmony_ci * The value can be a worst-case, ie. some shader variants may not read all
91bf215546Sopenharmony_ci * consts, etc.
92bf215546Sopenharmony_ci *
93bf215546Sopenharmony_ci * Returns size in dwords.
94bf215546Sopenharmony_ci */
95bf215546Sopenharmony_cistatic inline void
96bf215546Sopenharmony_ciir3_user_consts_size(struct ir3_ubo_analysis_state *state, unsigned *packets,
97bf215546Sopenharmony_ci                     unsigned *size)
98bf215546Sopenharmony_ci{
99bf215546Sopenharmony_ci   *packets = *size = 0;
100bf215546Sopenharmony_ci
101bf215546Sopenharmony_ci   for (uint32_t i = 0; i < ARRAY_SIZE(state->range); i++) {
102bf215546Sopenharmony_ci      if (state->range[i].start < state->range[i].end) {
103bf215546Sopenharmony_ci         *size += state->range[i].end - state->range[i].start;
104bf215546Sopenharmony_ci         (*packets)++;
105bf215546Sopenharmony_ci      }
106bf215546Sopenharmony_ci   }
107bf215546Sopenharmony_ci}
108bf215546Sopenharmony_ci
109bf215546Sopenharmony_ci/**
110bf215546Sopenharmony_ci * Uploads the referenced subranges of the nir constant_data to the hardware's
111bf215546Sopenharmony_ci * constant buffer.
112bf215546Sopenharmony_ci */
113bf215546Sopenharmony_cistatic inline void
114bf215546Sopenharmony_ciir3_emit_constant_data(struct fd_screen *screen,
115bf215546Sopenharmony_ci                       const struct ir3_shader_variant *v,
116bf215546Sopenharmony_ci                       struct fd_ringbuffer *ring)
117bf215546Sopenharmony_ci{
118bf215546Sopenharmony_ci   const struct ir3_const_state *const_state = ir3_const_state(v);
119bf215546Sopenharmony_ci   const struct ir3_ubo_analysis_state *state = &const_state->ubo_state;
120bf215546Sopenharmony_ci
121bf215546Sopenharmony_ci   for (unsigned i = 0; i < state->num_enabled; i++) {
122bf215546Sopenharmony_ci      unsigned ubo = state->range[i].ubo.block;
123bf215546Sopenharmony_ci      if (ubo != const_state->constant_data_ubo)
124bf215546Sopenharmony_ci         continue;
125bf215546Sopenharmony_ci
126bf215546Sopenharmony_ci      uint32_t size = state->range[i].end - state->range[i].start;
127bf215546Sopenharmony_ci
128bf215546Sopenharmony_ci      /* Pre-a6xx, we might have ranges enabled in the shader that aren't
129bf215546Sopenharmony_ci       * used in the binning variant.
130bf215546Sopenharmony_ci       */
131bf215546Sopenharmony_ci      if (16 * v->constlen <= state->range[i].offset)
132bf215546Sopenharmony_ci         continue;
133bf215546Sopenharmony_ci
134bf215546Sopenharmony_ci      /* and even if the start of the const buffer is before
135bf215546Sopenharmony_ci       * first_immediate, the end may not be:
136bf215546Sopenharmony_ci       */
137bf215546Sopenharmony_ci      size = MIN2(size, (16 * v->constlen) - state->range[i].offset);
138bf215546Sopenharmony_ci
139bf215546Sopenharmony_ci      if (size == 0)
140bf215546Sopenharmony_ci         continue;
141bf215546Sopenharmony_ci
142bf215546Sopenharmony_ci      emit_const_bo(ring, v, state->range[i].offset / 4,
143bf215546Sopenharmony_ci                    v->info.constant_data_offset + state->range[i].start,
144bf215546Sopenharmony_ci                    size / 4, v->bo);
145bf215546Sopenharmony_ci   }
146bf215546Sopenharmony_ci}
147bf215546Sopenharmony_ci
148bf215546Sopenharmony_ci/**
149bf215546Sopenharmony_ci * Uploads sub-ranges of UBOs to the hardware's constant buffer (UBO access
150bf215546Sopenharmony_ci * outside of these ranges will be done using full UBO accesses in the
151bf215546Sopenharmony_ci * shader).
152bf215546Sopenharmony_ci */
153bf215546Sopenharmony_cistatic inline void
154bf215546Sopenharmony_ciir3_emit_user_consts(struct fd_screen *screen,
155bf215546Sopenharmony_ci                     const struct ir3_shader_variant *v,
156bf215546Sopenharmony_ci                     struct fd_ringbuffer *ring,
157bf215546Sopenharmony_ci                     struct fd_constbuf_stateobj *constbuf)
158bf215546Sopenharmony_ci{
159bf215546Sopenharmony_ci   const struct ir3_const_state *const_state = ir3_const_state(v);
160bf215546Sopenharmony_ci   const struct ir3_ubo_analysis_state *state = &const_state->ubo_state;
161bf215546Sopenharmony_ci
162bf215546Sopenharmony_ci   for (unsigned i = 0; i < state->num_enabled; i++) {
163bf215546Sopenharmony_ci      assert(!state->range[i].ubo.bindless);
164bf215546Sopenharmony_ci      unsigned ubo = state->range[i].ubo.block;
165bf215546Sopenharmony_ci      if (!(constbuf->enabled_mask & (1 << ubo)) ||
166bf215546Sopenharmony_ci          ubo == const_state->constant_data_ubo) {
167bf215546Sopenharmony_ci         continue;
168bf215546Sopenharmony_ci      }
169bf215546Sopenharmony_ci      struct pipe_constant_buffer *cb = &constbuf->cb[ubo];
170bf215546Sopenharmony_ci
171bf215546Sopenharmony_ci      uint32_t size = state->range[i].end - state->range[i].start;
172bf215546Sopenharmony_ci      uint32_t offset = cb->buffer_offset + state->range[i].start;
173bf215546Sopenharmony_ci
174bf215546Sopenharmony_ci      /* Pre-a6xx, we might have ranges enabled in the shader that aren't
175bf215546Sopenharmony_ci       * used in the binning variant.
176bf215546Sopenharmony_ci       */
177bf215546Sopenharmony_ci      if (16 * v->constlen <= state->range[i].offset)
178bf215546Sopenharmony_ci         continue;
179bf215546Sopenharmony_ci
180bf215546Sopenharmony_ci      /* and even if the start of the const buffer is before
181bf215546Sopenharmony_ci       * first_immediate, the end may not be:
182bf215546Sopenharmony_ci       */
183bf215546Sopenharmony_ci      size = MIN2(size, (16 * v->constlen) - state->range[i].offset);
184bf215546Sopenharmony_ci
185bf215546Sopenharmony_ci      if (size == 0)
186bf215546Sopenharmony_ci         continue;
187bf215546Sopenharmony_ci
188bf215546Sopenharmony_ci      /* things should be aligned to vec4: */
189bf215546Sopenharmony_ci      assert((state->range[i].offset % 16) == 0);
190bf215546Sopenharmony_ci      assert((size % 16) == 0);
191bf215546Sopenharmony_ci      assert((offset % 16) == 0);
192bf215546Sopenharmony_ci
193bf215546Sopenharmony_ci      if (cb->user_buffer) {
194bf215546Sopenharmony_ci         emit_const_user(ring, v, state->range[i].offset / 4, size / 4,
195bf215546Sopenharmony_ci                         cb->user_buffer + state->range[i].start);
196bf215546Sopenharmony_ci      } else {
197bf215546Sopenharmony_ci         emit_const_prsc(ring, v, state->range[i].offset / 4, offset, size / 4,
198bf215546Sopenharmony_ci                         cb->buffer);
199bf215546Sopenharmony_ci      }
200bf215546Sopenharmony_ci   }
201bf215546Sopenharmony_ci}
202bf215546Sopenharmony_ci
203bf215546Sopenharmony_cistatic inline void
204bf215546Sopenharmony_ciir3_emit_ubos(struct fd_context *ctx, const struct ir3_shader_variant *v,
205bf215546Sopenharmony_ci              struct fd_ringbuffer *ring, struct fd_constbuf_stateobj *constbuf)
206bf215546Sopenharmony_ci{
207bf215546Sopenharmony_ci   const struct ir3_const_state *const_state = ir3_const_state(v);
208bf215546Sopenharmony_ci   uint32_t offset = const_state->offsets.ubo;
209bf215546Sopenharmony_ci
210bf215546Sopenharmony_ci   /* a6xx+ uses UBO state and ldc instead of pointers emitted in
211bf215546Sopenharmony_ci    * const state and ldg:
212bf215546Sopenharmony_ci    */
213bf215546Sopenharmony_ci   if (ctx->screen->gen >= 6)
214bf215546Sopenharmony_ci      return;
215bf215546Sopenharmony_ci
216bf215546Sopenharmony_ci   if (v->constlen > offset) {
217bf215546Sopenharmony_ci      uint32_t params = const_state->num_ubos;
218bf215546Sopenharmony_ci      uint32_t offsets[params];
219bf215546Sopenharmony_ci      struct fd_bo *bos[params];
220bf215546Sopenharmony_ci
221bf215546Sopenharmony_ci      for (uint32_t i = 0; i < params; i++) {
222bf215546Sopenharmony_ci         if (i == const_state->constant_data_ubo) {
223bf215546Sopenharmony_ci            bos[i] = v->bo;
224bf215546Sopenharmony_ci            offsets[i] = v->info.constant_data_offset;
225bf215546Sopenharmony_ci            continue;
226bf215546Sopenharmony_ci         }
227bf215546Sopenharmony_ci
228bf215546Sopenharmony_ci         struct pipe_constant_buffer *cb = &constbuf->cb[i];
229bf215546Sopenharmony_ci
230bf215546Sopenharmony_ci         /* If we have user pointers (constbuf 0, aka GL uniforms), upload
231bf215546Sopenharmony_ci          * them to a buffer now, and save it in the constbuf so that we
232bf215546Sopenharmony_ci          * don't have to reupload until they get changed.
233bf215546Sopenharmony_ci          */
234bf215546Sopenharmony_ci         if (cb->user_buffer) {
235bf215546Sopenharmony_ci            struct pipe_context *pctx = &ctx->base;
236bf215546Sopenharmony_ci            u_upload_data(pctx->stream_uploader, 0, cb->buffer_size, 64,
237bf215546Sopenharmony_ci                          cb->user_buffer, &cb->buffer_offset, &cb->buffer);
238bf215546Sopenharmony_ci            cb->user_buffer = NULL;
239bf215546Sopenharmony_ci         }
240bf215546Sopenharmony_ci
241bf215546Sopenharmony_ci         if ((constbuf->enabled_mask & (1 << i)) && cb->buffer) {
242bf215546Sopenharmony_ci            offsets[i] = cb->buffer_offset;
243bf215546Sopenharmony_ci            bos[i] = fd_resource(cb->buffer)->bo;
244bf215546Sopenharmony_ci         } else {
245bf215546Sopenharmony_ci            offsets[i] = 0;
246bf215546Sopenharmony_ci            bos[i] = NULL;
247bf215546Sopenharmony_ci         }
248bf215546Sopenharmony_ci      }
249bf215546Sopenharmony_ci
250bf215546Sopenharmony_ci      assert(offset * 4 + params <= v->constlen * 4);
251bf215546Sopenharmony_ci
252bf215546Sopenharmony_ci      emit_const_ptrs(ring, v, offset * 4, params, bos, offsets);
253bf215546Sopenharmony_ci   }
254bf215546Sopenharmony_ci}
255bf215546Sopenharmony_ci
256bf215546Sopenharmony_cistatic inline void
257bf215546Sopenharmony_ciir3_emit_image_dims(struct fd_screen *screen,
258bf215546Sopenharmony_ci                    const struct ir3_shader_variant *v,
259bf215546Sopenharmony_ci                    struct fd_ringbuffer *ring,
260bf215546Sopenharmony_ci                    struct fd_shaderimg_stateobj *si)
261bf215546Sopenharmony_ci{
262bf215546Sopenharmony_ci   const struct ir3_const_state *const_state = ir3_const_state(v);
263bf215546Sopenharmony_ci   uint32_t offset = const_state->offsets.image_dims;
264bf215546Sopenharmony_ci   if (v->constlen > offset) {
265bf215546Sopenharmony_ci      uint32_t dims[align(const_state->image_dims.count, 4)];
266bf215546Sopenharmony_ci      unsigned mask = const_state->image_dims.mask;
267bf215546Sopenharmony_ci
268bf215546Sopenharmony_ci      while (mask) {
269bf215546Sopenharmony_ci         struct pipe_image_view *img;
270bf215546Sopenharmony_ci         struct fd_resource *rsc;
271bf215546Sopenharmony_ci         unsigned index = u_bit_scan(&mask);
272bf215546Sopenharmony_ci         unsigned off = const_state->image_dims.off[index];
273bf215546Sopenharmony_ci
274bf215546Sopenharmony_ci         img = &si->si[index];
275bf215546Sopenharmony_ci         rsc = fd_resource(img->resource);
276bf215546Sopenharmony_ci
277bf215546Sopenharmony_ci         dims[off + 0] = util_format_get_blocksize(img->format);
278bf215546Sopenharmony_ci         if (img->resource->target != PIPE_BUFFER) {
279bf215546Sopenharmony_ci            struct fdl_slice *slice = fd_resource_slice(rsc, img->u.tex.level);
280bf215546Sopenharmony_ci            /* note for 2d/cube/etc images, even if re-interpreted
281bf215546Sopenharmony_ci             * as a different color format, the pixel size should
282bf215546Sopenharmony_ci             * be the same, so use original dimensions for y and z
283bf215546Sopenharmony_ci             * stride:
284bf215546Sopenharmony_ci             */
285bf215546Sopenharmony_ci            dims[off + 1] = fd_resource_pitch(rsc, img->u.tex.level);
286bf215546Sopenharmony_ci            /* see corresponding logic in fd_resource_offset(): */
287bf215546Sopenharmony_ci            if (rsc->layout.layer_first) {
288bf215546Sopenharmony_ci               dims[off + 2] = rsc->layout.layer_size;
289bf215546Sopenharmony_ci            } else {
290bf215546Sopenharmony_ci               dims[off + 2] = slice->size0;
291bf215546Sopenharmony_ci            }
292bf215546Sopenharmony_ci         } else {
293bf215546Sopenharmony_ci            /* For buffer-backed images, the log2 of the format's
294bf215546Sopenharmony_ci             * bytes-per-pixel is placed on the 2nd slot. This is useful
295bf215546Sopenharmony_ci             * when emitting image_size instructions, for which we need
296bf215546Sopenharmony_ci             * to divide by bpp for image buffers. Since the bpp
297bf215546Sopenharmony_ci             * can only be power-of-two, the division is implemented
298bf215546Sopenharmony_ci             * as a SHR, and for that it is handy to have the log2 of
299bf215546Sopenharmony_ci             * bpp as a constant. (log2 = first-set-bit - 1)
300bf215546Sopenharmony_ci             */
301bf215546Sopenharmony_ci            dims[off + 1] = ffs(dims[off + 0]) - 1;
302bf215546Sopenharmony_ci         }
303bf215546Sopenharmony_ci      }
304bf215546Sopenharmony_ci      uint32_t size = MIN2(ARRAY_SIZE(dims), v->constlen * 4 - offset * 4);
305bf215546Sopenharmony_ci
306bf215546Sopenharmony_ci      emit_const_user(ring, v, offset * 4, size, dims);
307bf215546Sopenharmony_ci   }
308bf215546Sopenharmony_ci}
309bf215546Sopenharmony_ci
310bf215546Sopenharmony_cistatic inline void
311bf215546Sopenharmony_ciir3_emit_immediates(struct fd_screen *screen,
312bf215546Sopenharmony_ci                    const struct ir3_shader_variant *v,
313bf215546Sopenharmony_ci                    struct fd_ringbuffer *ring)
314bf215546Sopenharmony_ci{
315bf215546Sopenharmony_ci   const struct ir3_const_state *const_state = ir3_const_state(v);
316bf215546Sopenharmony_ci   uint32_t base = const_state->offsets.immediate;
317bf215546Sopenharmony_ci   int size = DIV_ROUND_UP(const_state->immediates_count, 4);
318bf215546Sopenharmony_ci
319bf215546Sopenharmony_ci   /* truncate size to avoid writing constants that shader
320bf215546Sopenharmony_ci    * does not use:
321bf215546Sopenharmony_ci    */
322bf215546Sopenharmony_ci   size = MIN2(size + base, v->constlen) - base;
323bf215546Sopenharmony_ci
324bf215546Sopenharmony_ci   /* convert out of vec4: */
325bf215546Sopenharmony_ci   base *= 4;
326bf215546Sopenharmony_ci   size *= 4;
327bf215546Sopenharmony_ci
328bf215546Sopenharmony_ci   if (size > 0)
329bf215546Sopenharmony_ci      emit_const_user(ring, v, base, size, const_state->immediates);
330bf215546Sopenharmony_ci
331bf215546Sopenharmony_ci   /* NIR constant data has the same lifetime as immediates, so upload it
332bf215546Sopenharmony_ci    * now, too.
333bf215546Sopenharmony_ci    */
334bf215546Sopenharmony_ci   ir3_emit_constant_data(screen, v, ring);
335bf215546Sopenharmony_ci}
336bf215546Sopenharmony_ci
337bf215546Sopenharmony_cistatic inline void
338bf215546Sopenharmony_ciir3_emit_link_map(struct fd_screen *screen,
339bf215546Sopenharmony_ci                  const struct ir3_shader_variant *producer,
340bf215546Sopenharmony_ci                  const struct ir3_shader_variant *v,
341bf215546Sopenharmony_ci                  struct fd_ringbuffer *ring)
342bf215546Sopenharmony_ci{
343bf215546Sopenharmony_ci   const struct ir3_const_state *const_state = ir3_const_state(v);
344bf215546Sopenharmony_ci   uint32_t base = const_state->offsets.primitive_map;
345bf215546Sopenharmony_ci   int size = DIV_ROUND_UP(v->input_size, 4);
346bf215546Sopenharmony_ci
347bf215546Sopenharmony_ci   /* truncate size to avoid writing constants that shader
348bf215546Sopenharmony_ci    * does not use:
349bf215546Sopenharmony_ci    */
350bf215546Sopenharmony_ci   size = MIN2(size + base, v->constlen) - base;
351bf215546Sopenharmony_ci
352bf215546Sopenharmony_ci   /* convert out of vec4: */
353bf215546Sopenharmony_ci   base *= 4;
354bf215546Sopenharmony_ci   size *= 4;
355bf215546Sopenharmony_ci
356bf215546Sopenharmony_ci   if (size > 0)
357bf215546Sopenharmony_ci      emit_const_user(ring, v, base, size, producer->output_loc);
358bf215546Sopenharmony_ci}
359bf215546Sopenharmony_ci
360bf215546Sopenharmony_ci/* emit stream-out buffers: */
361bf215546Sopenharmony_cistatic inline void
362bf215546Sopenharmony_ciemit_tfbos(struct fd_context *ctx, const struct ir3_shader_variant *v,
363bf215546Sopenharmony_ci           struct fd_ringbuffer *ring)
364bf215546Sopenharmony_ci{
365bf215546Sopenharmony_ci   /* streamout addresses after driver-params: */
366bf215546Sopenharmony_ci   const struct ir3_const_state *const_state = ir3_const_state(v);
367bf215546Sopenharmony_ci   uint32_t offset = const_state->offsets.tfbo;
368bf215546Sopenharmony_ci   if (v->constlen > offset) {
369bf215546Sopenharmony_ci      struct fd_streamout_stateobj *so = &ctx->streamout;
370bf215546Sopenharmony_ci      const struct ir3_stream_output_info *info = &v->stream_output;
371bf215546Sopenharmony_ci      uint32_t params = 4;
372bf215546Sopenharmony_ci      uint32_t offsets[params];
373bf215546Sopenharmony_ci      struct fd_bo *bos[params];
374bf215546Sopenharmony_ci
375bf215546Sopenharmony_ci      for (uint32_t i = 0; i < params; i++) {
376bf215546Sopenharmony_ci         struct pipe_stream_output_target *target = so->targets[i];
377bf215546Sopenharmony_ci
378bf215546Sopenharmony_ci         if (target) {
379bf215546Sopenharmony_ci            offsets[i] =
380bf215546Sopenharmony_ci               (so->offsets[i] * info->stride[i] * 4) + target->buffer_offset;
381bf215546Sopenharmony_ci            bos[i] = fd_resource(target->buffer)->bo;
382bf215546Sopenharmony_ci         } else {
383bf215546Sopenharmony_ci            offsets[i] = 0;
384bf215546Sopenharmony_ci            bos[i] = NULL;
385bf215546Sopenharmony_ci         }
386bf215546Sopenharmony_ci      }
387bf215546Sopenharmony_ci
388bf215546Sopenharmony_ci      assert(offset * 4 + params <= v->constlen * 4);
389bf215546Sopenharmony_ci
390bf215546Sopenharmony_ci      emit_const_ptrs(ring, v, offset * 4, params, bos, offsets);
391bf215546Sopenharmony_ci   }
392bf215546Sopenharmony_ci}
393bf215546Sopenharmony_ci
394bf215546Sopenharmony_cistatic inline void
395bf215546Sopenharmony_ciemit_common_consts(const struct ir3_shader_variant *v,
396bf215546Sopenharmony_ci                   struct fd_ringbuffer *ring, struct fd_context *ctx,
397bf215546Sopenharmony_ci                   enum pipe_shader_type t) assert_dt
398bf215546Sopenharmony_ci{
399bf215546Sopenharmony_ci   enum fd_dirty_shader_state dirty = ctx->dirty_shader[t];
400bf215546Sopenharmony_ci
401bf215546Sopenharmony_ci   /* When we use CP_SET_DRAW_STATE objects to emit constant state,
402bf215546Sopenharmony_ci    * if we emit any of it we need to emit all.  This is because
403bf215546Sopenharmony_ci    * we are using the same state-group-id each time for uniform
404bf215546Sopenharmony_ci    * state, and if previous update is never evaluated (due to no
405bf215546Sopenharmony_ci    * visible primitives in the current tile) then the new stateobj
406bf215546Sopenharmony_ci    * completely replaces the old one.
407bf215546Sopenharmony_ci    *
408bf215546Sopenharmony_ci    * Possibly if we split up different parts of the const state to
409bf215546Sopenharmony_ci    * different state-objects we could avoid this.
410bf215546Sopenharmony_ci    */
411bf215546Sopenharmony_ci   if (dirty && is_stateobj(ring))
412bf215546Sopenharmony_ci      dirty = ~0;
413bf215546Sopenharmony_ci
414bf215546Sopenharmony_ci   if (dirty & (FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_CONST)) {
415bf215546Sopenharmony_ci      struct fd_constbuf_stateobj *constbuf;
416bf215546Sopenharmony_ci      bool shader_dirty;
417bf215546Sopenharmony_ci
418bf215546Sopenharmony_ci      constbuf = &ctx->constbuf[t];
419bf215546Sopenharmony_ci      shader_dirty = !!(dirty & FD_DIRTY_SHADER_PROG);
420bf215546Sopenharmony_ci
421bf215546Sopenharmony_ci      ring_wfi(ctx->batch, ring);
422bf215546Sopenharmony_ci
423bf215546Sopenharmony_ci      ir3_emit_user_consts(ctx->screen, v, ring, constbuf);
424bf215546Sopenharmony_ci      ir3_emit_ubos(ctx, v, ring, constbuf);
425bf215546Sopenharmony_ci      if (shader_dirty)
426bf215546Sopenharmony_ci         ir3_emit_immediates(ctx->screen, v, ring);
427bf215546Sopenharmony_ci   }
428bf215546Sopenharmony_ci
429bf215546Sopenharmony_ci   if (dirty & (FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_IMAGE)) {
430bf215546Sopenharmony_ci      struct fd_shaderimg_stateobj *si = &ctx->shaderimg[t];
431bf215546Sopenharmony_ci      ring_wfi(ctx->batch, ring);
432bf215546Sopenharmony_ci      ir3_emit_image_dims(ctx->screen, v, ring, si);
433bf215546Sopenharmony_ci   }
434bf215546Sopenharmony_ci}
435bf215546Sopenharmony_ci
436bf215546Sopenharmony_ci/* emit kernel params */
437bf215546Sopenharmony_cistatic inline void
438bf215546Sopenharmony_ciemit_kernel_params(struct fd_context *ctx, const struct ir3_shader_variant *v,
439bf215546Sopenharmony_ci                   struct fd_ringbuffer *ring, const struct pipe_grid_info *info)
440bf215546Sopenharmony_ci   assert_dt
441bf215546Sopenharmony_ci{
442bf215546Sopenharmony_ci   const struct ir3_const_state *const_state = ir3_const_state(v);
443bf215546Sopenharmony_ci   uint32_t offset = const_state->offsets.kernel_params;
444bf215546Sopenharmony_ci   if (v->constlen > offset) {
445bf215546Sopenharmony_ci      ring_wfi(ctx->batch, ring);
446bf215546Sopenharmony_ci      emit_const_user(ring, v, offset * 4,
447bf215546Sopenharmony_ci                      align(v->cs.req_input_mem, 4),
448bf215546Sopenharmony_ci                      info->input);
449bf215546Sopenharmony_ci   }
450bf215546Sopenharmony_ci}
451bf215546Sopenharmony_ci
452bf215546Sopenharmony_cistatic inline void
453bf215546Sopenharmony_ciir3_emit_driver_params(const struct ir3_shader_variant *v,
454bf215546Sopenharmony_ci                       struct fd_ringbuffer *ring, struct fd_context *ctx,
455bf215546Sopenharmony_ci                       const struct pipe_draw_info *info,
456bf215546Sopenharmony_ci                       const struct pipe_draw_indirect_info *indirect,
457bf215546Sopenharmony_ci                       const struct pipe_draw_start_count_bias *draw) assert_dt
458bf215546Sopenharmony_ci{
459bf215546Sopenharmony_ci   assert(v->need_driver_params);
460bf215546Sopenharmony_ci
461bf215546Sopenharmony_ci   const struct ir3_const_state *const_state = ir3_const_state(v);
462bf215546Sopenharmony_ci   uint32_t offset = const_state->offsets.driver_param;
463bf215546Sopenharmony_ci   uint32_t vertex_params[IR3_DP_VS_COUNT] = {
464bf215546Sopenharmony_ci      [IR3_DP_DRAWID] = 0, /* filled by hw (CP_DRAW_INDIRECT_MULTI) */
465bf215546Sopenharmony_ci      [IR3_DP_VTXID_BASE] = info->index_size ? draw->index_bias : draw->start,
466bf215546Sopenharmony_ci      [IR3_DP_INSTID_BASE] = info->start_instance,
467bf215546Sopenharmony_ci      [IR3_DP_VTXCNT_MAX] = ctx->streamout.max_tf_vtx,
468bf215546Sopenharmony_ci   };
469bf215546Sopenharmony_ci   if (v->key.ucp_enables) {
470bf215546Sopenharmony_ci      struct pipe_clip_state *ucp = &ctx->ucp;
471bf215546Sopenharmony_ci      unsigned pos = IR3_DP_UCP0_X;
472bf215546Sopenharmony_ci      for (unsigned i = 0; pos <= IR3_DP_UCP7_W; i++) {
473bf215546Sopenharmony_ci         for (unsigned j = 0; j < 4; j++) {
474bf215546Sopenharmony_ci            vertex_params[pos] = fui(ucp->ucp[i][j]);
475bf215546Sopenharmony_ci            pos++;
476bf215546Sopenharmony_ci         }
477bf215546Sopenharmony_ci      }
478bf215546Sopenharmony_ci   }
479bf215546Sopenharmony_ci
480bf215546Sopenharmony_ci   /* Only emit as many params as needed, i.e. up to the highest enabled UCP
481bf215546Sopenharmony_ci    * plane. However a binning pass may drop even some of these, so limit to
482bf215546Sopenharmony_ci    * program max.
483bf215546Sopenharmony_ci    */
484bf215546Sopenharmony_ci   const uint32_t vertex_params_size =
485bf215546Sopenharmony_ci      MIN2(const_state->num_driver_params, (v->constlen - offset) * 4);
486bf215546Sopenharmony_ci   assert(vertex_params_size <= IR3_DP_VS_COUNT);
487bf215546Sopenharmony_ci
488bf215546Sopenharmony_ci   bool needs_vtxid_base =
489bf215546Sopenharmony_ci      ir3_find_sysval_regid(v, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) !=
490bf215546Sopenharmony_ci      regid(63, 0);
491bf215546Sopenharmony_ci
492bf215546Sopenharmony_ci   /* for indirect draw, we need to copy VTXID_BASE from
493bf215546Sopenharmony_ci    * indirect-draw parameters buffer.. which is annoying
494bf215546Sopenharmony_ci    * and means we can't easily emit these consts in cmd
495bf215546Sopenharmony_ci    * stream so need to copy them to bo.
496bf215546Sopenharmony_ci    */
497bf215546Sopenharmony_ci   if (indirect && needs_vtxid_base) {
498bf215546Sopenharmony_ci      uint32_t vertex_params_area = align(vertex_params_size, 16);
499bf215546Sopenharmony_ci      struct pipe_resource *vertex_params_rsc =
500bf215546Sopenharmony_ci         pipe_buffer_create(&ctx->screen->base, PIPE_BIND_CONSTANT_BUFFER,
501bf215546Sopenharmony_ci                            PIPE_USAGE_STREAM, vertex_params_area * 4);
502bf215546Sopenharmony_ci      unsigned src_off = indirect->offset;
503bf215546Sopenharmony_ci      void *ptr;
504bf215546Sopenharmony_ci
505bf215546Sopenharmony_ci      ptr = fd_bo_map(fd_resource(vertex_params_rsc)->bo);
506bf215546Sopenharmony_ci      memcpy(ptr, vertex_params, vertex_params_size * 4);
507bf215546Sopenharmony_ci
508bf215546Sopenharmony_ci      if (info->index_size) {
509bf215546Sopenharmony_ci         /* indexed draw, index_bias is 4th field: */
510bf215546Sopenharmony_ci         src_off += 3 * 4;
511bf215546Sopenharmony_ci      } else {
512bf215546Sopenharmony_ci         /* non-indexed draw, start is 3rd field: */
513bf215546Sopenharmony_ci         src_off += 2 * 4;
514bf215546Sopenharmony_ci      }
515bf215546Sopenharmony_ci
516bf215546Sopenharmony_ci      /* copy index_bias or start from draw params: */
517bf215546Sopenharmony_ci      ctx->screen->mem_to_mem(ring, vertex_params_rsc, 0, indirect->buffer,
518bf215546Sopenharmony_ci                              src_off, 1);
519bf215546Sopenharmony_ci
520bf215546Sopenharmony_ci      emit_const_prsc(ring, v, offset * 4, 0, vertex_params_area,
521bf215546Sopenharmony_ci                      vertex_params_rsc);
522bf215546Sopenharmony_ci
523bf215546Sopenharmony_ci      pipe_resource_reference(&vertex_params_rsc, NULL);
524bf215546Sopenharmony_ci   } else {
525bf215546Sopenharmony_ci      emit_const_user(ring, v, offset * 4, vertex_params_size, vertex_params);
526bf215546Sopenharmony_ci   }
527bf215546Sopenharmony_ci
528bf215546Sopenharmony_ci   /* if needed, emit stream-out buffer addresses: */
529bf215546Sopenharmony_ci   if (vertex_params[IR3_DP_VTXCNT_MAX] > 0) {
530bf215546Sopenharmony_ci      emit_tfbos(ctx, v, ring);
531bf215546Sopenharmony_ci   }
532bf215546Sopenharmony_ci}
533bf215546Sopenharmony_ci
534bf215546Sopenharmony_cistatic inline void
535bf215546Sopenharmony_ciir3_emit_vs_consts(const struct ir3_shader_variant *v,
536bf215546Sopenharmony_ci                   struct fd_ringbuffer *ring, struct fd_context *ctx,
537bf215546Sopenharmony_ci                   const struct pipe_draw_info *info,
538bf215546Sopenharmony_ci                   const struct pipe_draw_indirect_info *indirect,
539bf215546Sopenharmony_ci                   const struct pipe_draw_start_count_bias *draw) assert_dt
540bf215546Sopenharmony_ci{
541bf215546Sopenharmony_ci   assert(v->type == MESA_SHADER_VERTEX);
542bf215546Sopenharmony_ci
543bf215546Sopenharmony_ci   emit_common_consts(v, ring, ctx, PIPE_SHADER_VERTEX);
544bf215546Sopenharmony_ci
545bf215546Sopenharmony_ci   /* emit driver params every time: */
546bf215546Sopenharmony_ci   if (info && v->need_driver_params) {
547bf215546Sopenharmony_ci      ring_wfi(ctx->batch, ring);
548bf215546Sopenharmony_ci      ir3_emit_driver_params(v, ring, ctx, info, indirect, draw);
549bf215546Sopenharmony_ci   }
550bf215546Sopenharmony_ci}
551bf215546Sopenharmony_ci
552bf215546Sopenharmony_cistatic inline void
553bf215546Sopenharmony_ciir3_emit_fs_consts(const struct ir3_shader_variant *v,
554bf215546Sopenharmony_ci                   struct fd_ringbuffer *ring, struct fd_context *ctx) assert_dt
555bf215546Sopenharmony_ci{
556bf215546Sopenharmony_ci   assert(v->type == MESA_SHADER_FRAGMENT);
557bf215546Sopenharmony_ci
558bf215546Sopenharmony_ci   emit_common_consts(v, ring, ctx, PIPE_SHADER_FRAGMENT);
559bf215546Sopenharmony_ci}
560bf215546Sopenharmony_ci
561bf215546Sopenharmony_ci/* emit compute-shader consts: */
562bf215546Sopenharmony_cistatic inline void
563bf215546Sopenharmony_ciir3_emit_cs_consts(const struct ir3_shader_variant *v,
564bf215546Sopenharmony_ci                   struct fd_ringbuffer *ring, struct fd_context *ctx,
565bf215546Sopenharmony_ci                   const struct pipe_grid_info *info) assert_dt
566bf215546Sopenharmony_ci{
567bf215546Sopenharmony_ci   assert(gl_shader_stage_is_compute(v->type));
568bf215546Sopenharmony_ci
569bf215546Sopenharmony_ci   emit_common_consts(v, ring, ctx, PIPE_SHADER_COMPUTE);
570bf215546Sopenharmony_ci   emit_kernel_params(ctx, v, ring, info);
571bf215546Sopenharmony_ci
572bf215546Sopenharmony_ci   /* a3xx/a4xx can inject these directly */
573bf215546Sopenharmony_ci   if (ctx->screen->gen <= 4)
574bf215546Sopenharmony_ci      return;
575bf215546Sopenharmony_ci
576bf215546Sopenharmony_ci   /* emit compute-shader driver-params: */
577bf215546Sopenharmony_ci   const struct ir3_const_state *const_state = ir3_const_state(v);
578bf215546Sopenharmony_ci   uint32_t offset = const_state->offsets.driver_param;
579bf215546Sopenharmony_ci   if (v->constlen > offset) {
580bf215546Sopenharmony_ci      ring_wfi(ctx->batch, ring);
581bf215546Sopenharmony_ci
582bf215546Sopenharmony_ci      if (info->indirect) {
583bf215546Sopenharmony_ci         struct pipe_resource *indirect = NULL;
584bf215546Sopenharmony_ci         unsigned indirect_offset;
585bf215546Sopenharmony_ci
586bf215546Sopenharmony_ci         /* This is a bit awkward, but CP_LOAD_STATE.EXT_SRC_ADDR needs
587bf215546Sopenharmony_ci          * to be aligned more strongly than 4 bytes.  So in this case
588bf215546Sopenharmony_ci          * we need a temporary buffer to copy NumWorkGroups.xyz to.
589bf215546Sopenharmony_ci          *
590bf215546Sopenharmony_ci          * TODO if previous compute job is writing to info->indirect,
591bf215546Sopenharmony_ci          * we might need a WFI.. but since we currently flush for each
592bf215546Sopenharmony_ci          * compute job, we are probably ok for now.
593bf215546Sopenharmony_ci          */
594bf215546Sopenharmony_ci         if (info->indirect_offset & 0xf) {
595bf215546Sopenharmony_ci            indirect = pipe_buffer_create(&ctx->screen->base,
596bf215546Sopenharmony_ci                                          PIPE_BIND_COMMAND_ARGS_BUFFER,
597bf215546Sopenharmony_ci                                          PIPE_USAGE_STREAM, 0x1000);
598bf215546Sopenharmony_ci            indirect_offset = 0;
599bf215546Sopenharmony_ci
600bf215546Sopenharmony_ci            ctx->screen->mem_to_mem(ring, indirect, 0, info->indirect,
601bf215546Sopenharmony_ci                                    info->indirect_offset, 3);
602bf215546Sopenharmony_ci         } else {
603bf215546Sopenharmony_ci            pipe_resource_reference(&indirect, info->indirect);
604bf215546Sopenharmony_ci            indirect_offset = info->indirect_offset;
605bf215546Sopenharmony_ci         }
606bf215546Sopenharmony_ci
607bf215546Sopenharmony_ci         emit_const_prsc(ring, v, offset * 4, indirect_offset, 16, indirect);
608bf215546Sopenharmony_ci
609bf215546Sopenharmony_ci         pipe_resource_reference(&indirect, NULL);
610bf215546Sopenharmony_ci      } else {
611bf215546Sopenharmony_ci         uint32_t compute_params[IR3_DP_CS_COUNT] = {
612bf215546Sopenharmony_ci            [IR3_DP_NUM_WORK_GROUPS_X] = info->grid[0],
613bf215546Sopenharmony_ci            [IR3_DP_NUM_WORK_GROUPS_Y] = info->grid[1],
614bf215546Sopenharmony_ci            [IR3_DP_NUM_WORK_GROUPS_Z] = info->grid[2],
615bf215546Sopenharmony_ci            [IR3_DP_WORK_DIM]          = info->work_dim,
616bf215546Sopenharmony_ci            [IR3_DP_LOCAL_GROUP_SIZE_X] = info->block[0],
617bf215546Sopenharmony_ci            [IR3_DP_LOCAL_GROUP_SIZE_Y] = info->block[1],
618bf215546Sopenharmony_ci            [IR3_DP_LOCAL_GROUP_SIZE_Z] = info->block[2],
619bf215546Sopenharmony_ci         };
620bf215546Sopenharmony_ci         uint32_t size =
621bf215546Sopenharmony_ci            MIN2(const_state->num_driver_params, v->constlen * 4 - offset * 4);
622bf215546Sopenharmony_ci
623bf215546Sopenharmony_ci         emit_const_user(ring, v, offset * 4, size, compute_params);
624bf215546Sopenharmony_ci      }
625bf215546Sopenharmony_ci   }
626bf215546Sopenharmony_ci}
627