1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
3bf215546Sopenharmony_ci * Copyright © 2018 Google, Inc.
4bf215546Sopenharmony_ci *
5bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
6bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
7bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
8bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
10bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
11bf215546Sopenharmony_ci *
12bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
13bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
14bf215546Sopenharmony_ci * Software.
15bf215546Sopenharmony_ci *
16bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22bf215546Sopenharmony_ci * SOFTWARE.
23bf215546Sopenharmony_ci *
24bf215546Sopenharmony_ci * Authors:
25bf215546Sopenharmony_ci *    Rob Clark <robclark@freedesktop.org>
26bf215546Sopenharmony_ci */
27bf215546Sopenharmony_ci
28bf215546Sopenharmony_ci#include "pipe/p_state.h"
29bf215546Sopenharmony_ci#include "util/format/u_format.h"
30bf215546Sopenharmony_ci#include "util/u_helpers.h"
31bf215546Sopenharmony_ci#include "util/u_memory.h"
32bf215546Sopenharmony_ci#include "util/u_string.h"
33bf215546Sopenharmony_ci#include "util/u_viewport.h"
34bf215546Sopenharmony_ci
35bf215546Sopenharmony_ci#include "common/freedreno_guardband.h"
36bf215546Sopenharmony_ci#include "freedreno_query_hw.h"
37bf215546Sopenharmony_ci#include "freedreno_resource.h"
38bf215546Sopenharmony_ci#include "freedreno_state.h"
39bf215546Sopenharmony_ci#include "freedreno_tracepoints.h"
40bf215546Sopenharmony_ci
41bf215546Sopenharmony_ci#include "fd6_blend.h"
42bf215546Sopenharmony_ci#include "fd6_const.h"
43bf215546Sopenharmony_ci#include "fd6_context.h"
44bf215546Sopenharmony_ci#include "fd6_emit.h"
45bf215546Sopenharmony_ci#include "fd6_image.h"
46bf215546Sopenharmony_ci#include "fd6_pack.h"
47bf215546Sopenharmony_ci#include "fd6_program.h"
48bf215546Sopenharmony_ci#include "fd6_rasterizer.h"
49bf215546Sopenharmony_ci#include "fd6_texture.h"
50bf215546Sopenharmony_ci#include "fd6_zsa.h"
51bf215546Sopenharmony_ci
52bf215546Sopenharmony_ci/* Border color layout is diff from a4xx/a5xx.. if it turns out to be
53bf215546Sopenharmony_ci * the same as a6xx then move this somewhere common ;-)
54bf215546Sopenharmony_ci *
55bf215546Sopenharmony_ci * Entry layout looks like (total size, 0x60 bytes):
56bf215546Sopenharmony_ci */
57bf215546Sopenharmony_ci
58bf215546Sopenharmony_cistruct PACKED bcolor_entry {
59bf215546Sopenharmony_ci   uint32_t fp32[4];
60bf215546Sopenharmony_ci   uint16_t ui16[4];
61bf215546Sopenharmony_ci   int16_t si16[4];
62bf215546Sopenharmony_ci   uint16_t fp16[4];
63bf215546Sopenharmony_ci   uint16_t rgb565;
64bf215546Sopenharmony_ci   uint16_t rgb5a1;
65bf215546Sopenharmony_ci   uint16_t rgba4;
66bf215546Sopenharmony_ci   uint8_t __pad0[2];
67bf215546Sopenharmony_ci   uint8_t ui8[4];
68bf215546Sopenharmony_ci   int8_t si8[4];
69bf215546Sopenharmony_ci   uint32_t rgb10a2;
70bf215546Sopenharmony_ci   uint32_t z24;
71bf215546Sopenharmony_ci   uint16_t
72bf215546Sopenharmony_ci      srgb[4]; /* appears to duplicate fp16[], but clamped, used for srgb */
73bf215546Sopenharmony_ci   uint8_t __pad1[56];
74bf215546Sopenharmony_ci};
75bf215546Sopenharmony_ci
76bf215546Sopenharmony_ci#define FD6_BORDER_COLOR_SIZE sizeof(struct bcolor_entry)
77bf215546Sopenharmony_ci#define FD6_BORDER_COLOR_UPLOAD_SIZE                                           \
78bf215546Sopenharmony_ci   (2 * PIPE_MAX_SAMPLERS * FD6_BORDER_COLOR_SIZE)
79bf215546Sopenharmony_ci
80bf215546Sopenharmony_cistatic void
81bf215546Sopenharmony_cisetup_border_colors(struct fd_texture_stateobj *tex,
82bf215546Sopenharmony_ci                    struct bcolor_entry *entries,
83bf215546Sopenharmony_ci                    struct fd_screen *screen)
84bf215546Sopenharmony_ci{
85bf215546Sopenharmony_ci   unsigned i, j;
86bf215546Sopenharmony_ci   STATIC_ASSERT(sizeof(struct bcolor_entry) == FD6_BORDER_COLOR_SIZE);
87bf215546Sopenharmony_ci   const bool has_z24uint_s8uint = screen->info->a6xx.has_z24uint_s8uint;
88bf215546Sopenharmony_ci
89bf215546Sopenharmony_ci   for (i = 0; i < tex->num_samplers; i++) {
90bf215546Sopenharmony_ci      struct bcolor_entry *e = &entries[i];
91bf215546Sopenharmony_ci      struct pipe_sampler_state *sampler = tex->samplers[i];
92bf215546Sopenharmony_ci      union pipe_color_union *bc;
93bf215546Sopenharmony_ci
94bf215546Sopenharmony_ci      if (!sampler)
95bf215546Sopenharmony_ci         continue;
96bf215546Sopenharmony_ci
97bf215546Sopenharmony_ci      bc = &sampler->border_color;
98bf215546Sopenharmony_ci
99bf215546Sopenharmony_ci      /*
100bf215546Sopenharmony_ci       * XXX HACK ALERT XXX
101bf215546Sopenharmony_ci       *
102bf215546Sopenharmony_ci       * The border colors need to be swizzled in a particular
103bf215546Sopenharmony_ci       * format-dependent order. Even though samplers don't know about
104bf215546Sopenharmony_ci       * formats, we can assume that with a GL state tracker, there's a
105bf215546Sopenharmony_ci       * 1:1 correspondence between sampler and texture. Take advantage
106bf215546Sopenharmony_ci       * of that knowledge.
107bf215546Sopenharmony_ci       */
108bf215546Sopenharmony_ci      if ((i >= tex->num_textures) || !tex->textures[i])
109bf215546Sopenharmony_ci         continue;
110bf215546Sopenharmony_ci
111bf215546Sopenharmony_ci      struct pipe_sampler_view *view = tex->textures[i];
112bf215546Sopenharmony_ci      enum pipe_format format = view->format;
113bf215546Sopenharmony_ci      const struct util_format_description *desc =
114bf215546Sopenharmony_ci         util_format_description(format);
115bf215546Sopenharmony_ci
116bf215546Sopenharmony_ci      e->rgb565 = 0;
117bf215546Sopenharmony_ci      e->rgb5a1 = 0;
118bf215546Sopenharmony_ci      e->rgba4 = 0;
119bf215546Sopenharmony_ci      e->rgb10a2 = 0;
120bf215546Sopenharmony_ci      e->z24 = 0;
121bf215546Sopenharmony_ci
122bf215546Sopenharmony_ci      unsigned char swiz[4];
123bf215546Sopenharmony_ci
124bf215546Sopenharmony_ci      fdl6_format_swiz(format, false, swiz);
125bf215546Sopenharmony_ci
126bf215546Sopenharmony_ci      for (j = 0; j < 4; j++) {
127bf215546Sopenharmony_ci         int c = swiz[j];
128bf215546Sopenharmony_ci         int cd = c;
129bf215546Sopenharmony_ci
130bf215546Sopenharmony_ci         /*
131bf215546Sopenharmony_ci          * HACK: for PIPE_FORMAT_X24S8_UINT we end up w/ the
132bf215546Sopenharmony_ci          * stencil border color value in bc->ui[0] but according
133bf215546Sopenharmony_ci          * to desc->swizzle and desc->channel, the .x/.w component
134bf215546Sopenharmony_ci          * is NONE and the stencil value is in the y component.
135bf215546Sopenharmony_ci          * Meanwhile the hardware wants this in the .x component
136bf215546Sopenharmony_ci          * for x24s8 and x32_s8x24, or the .y component for x24s8 with the
137bf215546Sopenharmony_ci          * special Z24UINT_S8UINT format.
138bf215546Sopenharmony_ci          */
139bf215546Sopenharmony_ci         if ((format == PIPE_FORMAT_X24S8_UINT) ||
140bf215546Sopenharmony_ci             (format == PIPE_FORMAT_X32_S8X24_UINT)) {
141bf215546Sopenharmony_ci            if (j == 0) {
142bf215546Sopenharmony_ci               c = 1;
143bf215546Sopenharmony_ci               cd = (format == PIPE_FORMAT_X24S8_UINT && has_z24uint_s8uint) ? 1 : 0;
144bf215546Sopenharmony_ci            } else {
145bf215546Sopenharmony_ci               continue;
146bf215546Sopenharmony_ci            }
147bf215546Sopenharmony_ci         }
148bf215546Sopenharmony_ci
149bf215546Sopenharmony_ci         if (c >= 4)
150bf215546Sopenharmony_ci            continue;
151bf215546Sopenharmony_ci
152bf215546Sopenharmony_ci         if (desc->channel[c].pure_integer) {
153bf215546Sopenharmony_ci            uint16_t clamped;
154bf215546Sopenharmony_ci            switch (desc->channel[c].size) {
155bf215546Sopenharmony_ci            case 2:
156bf215546Sopenharmony_ci               assert(desc->channel[c].type == UTIL_FORMAT_TYPE_UNSIGNED);
157bf215546Sopenharmony_ci               clamped = CLAMP(bc->ui[j], 0, 0x3);
158bf215546Sopenharmony_ci               break;
159bf215546Sopenharmony_ci            case 8:
160bf215546Sopenharmony_ci               if (desc->channel[c].type == UTIL_FORMAT_TYPE_SIGNED)
161bf215546Sopenharmony_ci                  clamped = CLAMP(bc->i[j], -128, 127);
162bf215546Sopenharmony_ci               else
163bf215546Sopenharmony_ci                  clamped = CLAMP(bc->ui[j], 0, 255);
164bf215546Sopenharmony_ci               break;
165bf215546Sopenharmony_ci            case 10:
166bf215546Sopenharmony_ci               assert(desc->channel[c].type == UTIL_FORMAT_TYPE_UNSIGNED);
167bf215546Sopenharmony_ci               clamped = CLAMP(bc->ui[j], 0, 0x3ff);
168bf215546Sopenharmony_ci               break;
169bf215546Sopenharmony_ci            case 16:
170bf215546Sopenharmony_ci               if (desc->channel[c].type == UTIL_FORMAT_TYPE_SIGNED)
171bf215546Sopenharmony_ci                  clamped = CLAMP(bc->i[j], -32768, 32767);
172bf215546Sopenharmony_ci               else
173bf215546Sopenharmony_ci                  clamped = CLAMP(bc->ui[j], 0, 65535);
174bf215546Sopenharmony_ci               break;
175bf215546Sopenharmony_ci            default:
176bf215546Sopenharmony_ci               assert(!"Unexpected bit size");
177bf215546Sopenharmony_ci            case 32:
178bf215546Sopenharmony_ci               clamped = 0;
179bf215546Sopenharmony_ci               break;
180bf215546Sopenharmony_ci            }
181bf215546Sopenharmony_ci            e->fp32[cd] = bc->ui[j];
182bf215546Sopenharmony_ci            e->fp16[cd] = clamped;
183bf215546Sopenharmony_ci         } else {
184bf215546Sopenharmony_ci            float f = bc->f[j];
185bf215546Sopenharmony_ci            float f_u = CLAMP(f, 0, 1);
186bf215546Sopenharmony_ci            float f_s = CLAMP(f, -1, 1);
187bf215546Sopenharmony_ci
188bf215546Sopenharmony_ci            e->fp32[c] = fui(f);
189bf215546Sopenharmony_ci            e->fp16[c] = _mesa_float_to_half(f);
190bf215546Sopenharmony_ci            e->srgb[c] = _mesa_float_to_half(f_u);
191bf215546Sopenharmony_ci            e->ui16[c] = f_u * 0xffff;
192bf215546Sopenharmony_ci            e->si16[c] = f_s * 0x7fff;
193bf215546Sopenharmony_ci            e->ui8[c] = f_u * 0xff;
194bf215546Sopenharmony_ci            e->si8[c] = f_s * 0x7f;
195bf215546Sopenharmony_ci            if (c == 1)
196bf215546Sopenharmony_ci               e->rgb565 |= (int)(f_u * 0x3f) << 5;
197bf215546Sopenharmony_ci            else if (c < 3)
198bf215546Sopenharmony_ci               e->rgb565 |= (int)(f_u * 0x1f) << (c ? 11 : 0);
199bf215546Sopenharmony_ci            if (c == 3)
200bf215546Sopenharmony_ci               e->rgb5a1 |= (f_u > 0.5f) ? 0x8000 : 0;
201bf215546Sopenharmony_ci            else
202bf215546Sopenharmony_ci               e->rgb5a1 |= (int)(f_u * 0x1f) << (c * 5);
203bf215546Sopenharmony_ci            if (c == 3)
204bf215546Sopenharmony_ci               e->rgb10a2 |= (int)(f_u * 0x3) << 30;
205bf215546Sopenharmony_ci            else
206bf215546Sopenharmony_ci               e->rgb10a2 |= (int)(f_u * 0x3ff) << (c * 10);
207bf215546Sopenharmony_ci            e->rgba4 |= (int)(f_u * 0xf) << (c * 4);
208bf215546Sopenharmony_ci            if (c == 0)
209bf215546Sopenharmony_ci               e->z24 = f_u * 0xffffff;
210bf215546Sopenharmony_ci         }
211bf215546Sopenharmony_ci      }
212bf215546Sopenharmony_ci
213bf215546Sopenharmony_ci#ifdef DEBUG
214bf215546Sopenharmony_ci      memset(&e->__pad0, 0, sizeof(e->__pad0));
215bf215546Sopenharmony_ci      memset(&e->__pad1, 0, sizeof(e->__pad1));
216bf215546Sopenharmony_ci#endif
217bf215546Sopenharmony_ci   }
218bf215546Sopenharmony_ci}
219bf215546Sopenharmony_ci
220bf215546Sopenharmony_cistatic void
221bf215546Sopenharmony_ciemit_border_color(struct fd_context *ctx, struct fd_ringbuffer *ring) assert_dt
222bf215546Sopenharmony_ci{
223bf215546Sopenharmony_ci   struct fd6_context *fd6_ctx = fd6_context(ctx);
224bf215546Sopenharmony_ci   struct bcolor_entry *entries;
225bf215546Sopenharmony_ci   unsigned off;
226bf215546Sopenharmony_ci   void *ptr;
227bf215546Sopenharmony_ci
228bf215546Sopenharmony_ci   STATIC_ASSERT(sizeof(struct bcolor_entry) == FD6_BORDER_COLOR_SIZE);
229bf215546Sopenharmony_ci
230bf215546Sopenharmony_ci   u_upload_alloc(fd6_ctx->border_color_uploader, 0,
231bf215546Sopenharmony_ci                  FD6_BORDER_COLOR_UPLOAD_SIZE, FD6_BORDER_COLOR_UPLOAD_SIZE,
232bf215546Sopenharmony_ci                  &off, &fd6_ctx->border_color_buf, &ptr);
233bf215546Sopenharmony_ci
234bf215546Sopenharmony_ci   entries = ptr;
235bf215546Sopenharmony_ci
236bf215546Sopenharmony_ci   setup_border_colors(&ctx->tex[PIPE_SHADER_VERTEX], &entries[0], ctx->screen);
237bf215546Sopenharmony_ci   setup_border_colors(&ctx->tex[PIPE_SHADER_FRAGMENT],
238bf215546Sopenharmony_ci                       &entries[ctx->tex[PIPE_SHADER_VERTEX].num_samplers],
239bf215546Sopenharmony_ci                       ctx->screen);
240bf215546Sopenharmony_ci
241bf215546Sopenharmony_ci   OUT_PKT4(ring, REG_A6XX_SP_TP_BORDER_COLOR_BASE_ADDR, 2);
242bf215546Sopenharmony_ci   OUT_RELOC(ring, fd_resource(fd6_ctx->border_color_buf)->bo, off, 0, 0);
243bf215546Sopenharmony_ci
244bf215546Sopenharmony_ci   u_upload_unmap(fd6_ctx->border_color_uploader);
245bf215546Sopenharmony_ci}
246bf215546Sopenharmony_ci
247bf215546Sopenharmony_cistatic void
248bf215546Sopenharmony_cifd6_emit_fb_tex(struct fd_ringbuffer *state, struct fd_context *ctx) assert_dt
249bf215546Sopenharmony_ci{
250bf215546Sopenharmony_ci   struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
251bf215546Sopenharmony_ci   struct pipe_surface *psurf = pfb->cbufs[0];
252bf215546Sopenharmony_ci   struct fd_resource *rsc = fd_resource(psurf->texture);
253bf215546Sopenharmony_ci
254bf215546Sopenharmony_ci   OUT_RINGP(state, 0, &ctx->batch->fb_read_patches); /* texconst0, patched in gmem emit */
255bf215546Sopenharmony_ci   OUT_RING(state, A6XX_TEX_CONST_1_WIDTH(pfb->width) |
256bf215546Sopenharmony_ci                      A6XX_TEX_CONST_1_HEIGHT(pfb->height));
257bf215546Sopenharmony_ci   OUT_RING(state, 0); /* texconst2, patched in gmem emit */
258bf215546Sopenharmony_ci   OUT_RING(state, A6XX_TEX_CONST_3_ARRAY_PITCH(rsc->layout.layer_size));
259bf215546Sopenharmony_ci   OUT_RING(state, 0); /* BASE_LO, patched in gmem emit */
260bf215546Sopenharmony_ci   OUT_RING(state, 0); /* BASE_HI, patched in gmem emit */
261bf215546Sopenharmony_ci   OUT_RING(state, 0); /* texconst6 */
262bf215546Sopenharmony_ci   OUT_RING(state, 0); /* texconst7 */
263bf215546Sopenharmony_ci   OUT_RING(state, 0); /* texconst8 */
264bf215546Sopenharmony_ci   OUT_RING(state, 0); /* texconst9 */
265bf215546Sopenharmony_ci   OUT_RING(state, 0); /* texconst10 */
266bf215546Sopenharmony_ci   OUT_RING(state, 0); /* texconst11 */
267bf215546Sopenharmony_ci   OUT_RING(state, 0);
268bf215546Sopenharmony_ci   OUT_RING(state, 0);
269bf215546Sopenharmony_ci   OUT_RING(state, 0);
270bf215546Sopenharmony_ci   OUT_RING(state, 0);
271bf215546Sopenharmony_ci}
272bf215546Sopenharmony_ci
273bf215546Sopenharmony_cibool
274bf215546Sopenharmony_cifd6_emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
275bf215546Sopenharmony_ci                  enum pipe_shader_type type, struct fd_texture_stateobj *tex,
276bf215546Sopenharmony_ci                  unsigned bcolor_offset,
277bf215546Sopenharmony_ci                  /* can be NULL if no image/SSBO/fb state to merge in: */
278bf215546Sopenharmony_ci                  const struct ir3_shader_variant *v)
279bf215546Sopenharmony_ci{
280bf215546Sopenharmony_ci   bool needs_border = false;
281bf215546Sopenharmony_ci   unsigned opcode, tex_samp_reg, tex_const_reg, tex_count_reg;
282bf215546Sopenharmony_ci   enum a6xx_state_block sb;
283bf215546Sopenharmony_ci
284bf215546Sopenharmony_ci   switch (type) {
285bf215546Sopenharmony_ci   case PIPE_SHADER_VERTEX:
286bf215546Sopenharmony_ci      sb = SB6_VS_TEX;
287bf215546Sopenharmony_ci      opcode = CP_LOAD_STATE6_GEOM;
288bf215546Sopenharmony_ci      tex_samp_reg = REG_A6XX_SP_VS_TEX_SAMP;
289bf215546Sopenharmony_ci      tex_const_reg = REG_A6XX_SP_VS_TEX_CONST;
290bf215546Sopenharmony_ci      tex_count_reg = REG_A6XX_SP_VS_TEX_COUNT;
291bf215546Sopenharmony_ci      break;
292bf215546Sopenharmony_ci   case PIPE_SHADER_TESS_CTRL:
293bf215546Sopenharmony_ci      sb = SB6_HS_TEX;
294bf215546Sopenharmony_ci      opcode = CP_LOAD_STATE6_GEOM;
295bf215546Sopenharmony_ci      tex_samp_reg = REG_A6XX_SP_HS_TEX_SAMP;
296bf215546Sopenharmony_ci      tex_const_reg = REG_A6XX_SP_HS_TEX_CONST;
297bf215546Sopenharmony_ci      tex_count_reg = REG_A6XX_SP_HS_TEX_COUNT;
298bf215546Sopenharmony_ci      break;
299bf215546Sopenharmony_ci   case PIPE_SHADER_TESS_EVAL:
300bf215546Sopenharmony_ci      sb = SB6_DS_TEX;
301bf215546Sopenharmony_ci      opcode = CP_LOAD_STATE6_GEOM;
302bf215546Sopenharmony_ci      tex_samp_reg = REG_A6XX_SP_DS_TEX_SAMP;
303bf215546Sopenharmony_ci      tex_const_reg = REG_A6XX_SP_DS_TEX_CONST;
304bf215546Sopenharmony_ci      tex_count_reg = REG_A6XX_SP_DS_TEX_COUNT;
305bf215546Sopenharmony_ci      break;
306bf215546Sopenharmony_ci   case PIPE_SHADER_GEOMETRY:
307bf215546Sopenharmony_ci      sb = SB6_GS_TEX;
308bf215546Sopenharmony_ci      opcode = CP_LOAD_STATE6_GEOM;
309bf215546Sopenharmony_ci      tex_samp_reg = REG_A6XX_SP_GS_TEX_SAMP;
310bf215546Sopenharmony_ci      tex_const_reg = REG_A6XX_SP_GS_TEX_CONST;
311bf215546Sopenharmony_ci      tex_count_reg = REG_A6XX_SP_GS_TEX_COUNT;
312bf215546Sopenharmony_ci      break;
313bf215546Sopenharmony_ci   case PIPE_SHADER_FRAGMENT:
314bf215546Sopenharmony_ci      sb = SB6_FS_TEX;
315bf215546Sopenharmony_ci      opcode = CP_LOAD_STATE6_FRAG;
316bf215546Sopenharmony_ci      tex_samp_reg = REG_A6XX_SP_FS_TEX_SAMP;
317bf215546Sopenharmony_ci      tex_const_reg = REG_A6XX_SP_FS_TEX_CONST;
318bf215546Sopenharmony_ci      tex_count_reg = REG_A6XX_SP_FS_TEX_COUNT;
319bf215546Sopenharmony_ci      break;
320bf215546Sopenharmony_ci   case PIPE_SHADER_COMPUTE:
321bf215546Sopenharmony_ci      sb = SB6_CS_TEX;
322bf215546Sopenharmony_ci      opcode = CP_LOAD_STATE6_FRAG;
323bf215546Sopenharmony_ci      tex_samp_reg = REG_A6XX_SP_CS_TEX_SAMP;
324bf215546Sopenharmony_ci      tex_const_reg = REG_A6XX_SP_CS_TEX_CONST;
325bf215546Sopenharmony_ci      tex_count_reg = REG_A6XX_SP_CS_TEX_COUNT;
326bf215546Sopenharmony_ci      break;
327bf215546Sopenharmony_ci   default:
328bf215546Sopenharmony_ci      unreachable("bad state block");
329bf215546Sopenharmony_ci   }
330bf215546Sopenharmony_ci
331bf215546Sopenharmony_ci   if (tex->num_samplers > 0) {
332bf215546Sopenharmony_ci      struct fd_ringbuffer *state =
333bf215546Sopenharmony_ci         fd_ringbuffer_new_object(ctx->pipe, tex->num_samplers * 4 * 4);
334bf215546Sopenharmony_ci      for (unsigned i = 0; i < tex->num_samplers; i++) {
335bf215546Sopenharmony_ci         static const struct fd6_sampler_stateobj dummy_sampler = {};
336bf215546Sopenharmony_ci         const struct fd6_sampler_stateobj *sampler =
337bf215546Sopenharmony_ci            tex->samplers[i] ? fd6_sampler_stateobj(tex->samplers[i])
338bf215546Sopenharmony_ci                             : &dummy_sampler;
339bf215546Sopenharmony_ci         OUT_RING(state, sampler->texsamp0);
340bf215546Sopenharmony_ci         OUT_RING(state, sampler->texsamp1);
341bf215546Sopenharmony_ci         OUT_RING(state, sampler->texsamp2 |
342bf215546Sopenharmony_ci                            A6XX_TEX_SAMP_2_BCOLOR(i + bcolor_offset));
343bf215546Sopenharmony_ci         OUT_RING(state, sampler->texsamp3);
344bf215546Sopenharmony_ci         needs_border |= sampler->needs_border;
345bf215546Sopenharmony_ci      }
346bf215546Sopenharmony_ci
347bf215546Sopenharmony_ci      /* output sampler state: */
348bf215546Sopenharmony_ci      OUT_PKT7(ring, opcode, 3);
349bf215546Sopenharmony_ci      OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) |
350bf215546Sopenharmony_ci                        CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) |
351bf215546Sopenharmony_ci                        CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
352bf215546Sopenharmony_ci                        CP_LOAD_STATE6_0_STATE_BLOCK(sb) |
353bf215546Sopenharmony_ci                        CP_LOAD_STATE6_0_NUM_UNIT(tex->num_samplers));
354bf215546Sopenharmony_ci      OUT_RB(ring, state); /* SRC_ADDR_LO/HI */
355bf215546Sopenharmony_ci
356bf215546Sopenharmony_ci      OUT_PKT4(ring, tex_samp_reg, 2);
357bf215546Sopenharmony_ci      OUT_RB(ring, state); /* SRC_ADDR_LO/HI */
358bf215546Sopenharmony_ci
359bf215546Sopenharmony_ci      fd_ringbuffer_del(state);
360bf215546Sopenharmony_ci   }
361bf215546Sopenharmony_ci
362bf215546Sopenharmony_ci   unsigned num_merged_textures = tex->num_textures;
363bf215546Sopenharmony_ci   unsigned num_textures = tex->num_textures;
364bf215546Sopenharmony_ci   if (v) {
365bf215546Sopenharmony_ci      num_merged_textures += v->image_mapping.num_tex;
366bf215546Sopenharmony_ci
367bf215546Sopenharmony_ci      if (v->fb_read)
368bf215546Sopenharmony_ci         num_merged_textures++;
369bf215546Sopenharmony_ci
370bf215546Sopenharmony_ci      /* There could be more bound textures than what the shader uses.
371bf215546Sopenharmony_ci       * Which isn't known at shader compile time.  So in the case we
372bf215546Sopenharmony_ci       * are merging tex state, only emit the textures that the shader
373bf215546Sopenharmony_ci       * uses (since the image/SSBO related tex state comes immediately
374bf215546Sopenharmony_ci       * after)
375bf215546Sopenharmony_ci       */
376bf215546Sopenharmony_ci      num_textures = v->image_mapping.tex_base;
377bf215546Sopenharmony_ci   }
378bf215546Sopenharmony_ci
379bf215546Sopenharmony_ci   if (num_merged_textures > 0) {
380bf215546Sopenharmony_ci      struct fd_ringbuffer *state =
381bf215546Sopenharmony_ci         fd_ringbuffer_new_object(ctx->pipe, num_merged_textures * 16 * 4);
382bf215546Sopenharmony_ci      for (unsigned i = 0; i < num_textures; i++) {
383bf215546Sopenharmony_ci         const struct fd6_pipe_sampler_view *view;
384bf215546Sopenharmony_ci
385bf215546Sopenharmony_ci         if (tex->textures[i]) {
386bf215546Sopenharmony_ci            view = fd6_pipe_sampler_view(tex->textures[i]);
387bf215546Sopenharmony_ci            if (unlikely(view->rsc_seqno !=
388bf215546Sopenharmony_ci                         fd_resource(view->base.texture)->seqno)) {
389bf215546Sopenharmony_ci               fd6_sampler_view_update(ctx,
390bf215546Sopenharmony_ci                                       fd6_pipe_sampler_view(tex->textures[i]));
391bf215546Sopenharmony_ci            }
392bf215546Sopenharmony_ci         } else {
393bf215546Sopenharmony_ci            static const struct fd6_pipe_sampler_view dummy_view = {};
394bf215546Sopenharmony_ci            view = &dummy_view;
395bf215546Sopenharmony_ci         }
396bf215546Sopenharmony_ci
397bf215546Sopenharmony_ci         OUT_RING(state, view->descriptor[0]);
398bf215546Sopenharmony_ci         OUT_RING(state, view->descriptor[1]);
399bf215546Sopenharmony_ci         OUT_RING(state, view->descriptor[2]);
400bf215546Sopenharmony_ci         OUT_RING(state, view->descriptor[3]);
401bf215546Sopenharmony_ci
402bf215546Sopenharmony_ci         if (view->ptr1) {
403bf215546Sopenharmony_ci            OUT_RELOC(state, view->ptr1->bo, view->descriptor[4],
404bf215546Sopenharmony_ci                      (uint64_t)view->descriptor[5] << 32, 0);
405bf215546Sopenharmony_ci         } else {
406bf215546Sopenharmony_ci            OUT_RING(state, view->descriptor[4]);
407bf215546Sopenharmony_ci            OUT_RING(state, view->descriptor[5]);
408bf215546Sopenharmony_ci         }
409bf215546Sopenharmony_ci
410bf215546Sopenharmony_ci         OUT_RING(state, view->descriptor[6]);
411bf215546Sopenharmony_ci
412bf215546Sopenharmony_ci         if (view->ptr2) {
413bf215546Sopenharmony_ci            OUT_RELOC(state, view->ptr2->bo, view->descriptor[7], 0, 0);
414bf215546Sopenharmony_ci         } else {
415bf215546Sopenharmony_ci            OUT_RING(state, view->descriptor[7]);
416bf215546Sopenharmony_ci            OUT_RING(state, view->descriptor[8]);
417bf215546Sopenharmony_ci         }
418bf215546Sopenharmony_ci
419bf215546Sopenharmony_ci         OUT_RING(state, view->descriptor[9]);
420bf215546Sopenharmony_ci         OUT_RING(state, view->descriptor[10]);
421bf215546Sopenharmony_ci         OUT_RING(state, view->descriptor[11]);
422bf215546Sopenharmony_ci         OUT_RING(state, view->descriptor[12]);
423bf215546Sopenharmony_ci         OUT_RING(state, view->descriptor[13]);
424bf215546Sopenharmony_ci         OUT_RING(state, view->descriptor[14]);
425bf215546Sopenharmony_ci         OUT_RING(state, view->descriptor[15]);
426bf215546Sopenharmony_ci      }
427bf215546Sopenharmony_ci
428bf215546Sopenharmony_ci      if (v) {
429bf215546Sopenharmony_ci         const struct ir3_ibo_mapping *mapping = &v->image_mapping;
430bf215546Sopenharmony_ci         struct fd_shaderbuf_stateobj *buf = &ctx->shaderbuf[type];
431bf215546Sopenharmony_ci         struct fd_shaderimg_stateobj *img = &ctx->shaderimg[type];
432bf215546Sopenharmony_ci
433bf215546Sopenharmony_ci         for (unsigned i = 0; i < mapping->num_tex; i++) {
434bf215546Sopenharmony_ci            unsigned idx = mapping->tex_to_image[i];
435bf215546Sopenharmony_ci            if (idx & IBO_SSBO) {
436bf215546Sopenharmony_ci               fd6_emit_ssbo_tex(ctx, state, &buf->sb[idx & ~IBO_SSBO]);
437bf215546Sopenharmony_ci            } else {
438bf215546Sopenharmony_ci               fd6_emit_image_tex(ctx, state, &img->si[idx]);
439bf215546Sopenharmony_ci            }
440bf215546Sopenharmony_ci         }
441bf215546Sopenharmony_ci
442bf215546Sopenharmony_ci         if (v->fb_read) {
443bf215546Sopenharmony_ci            fd6_emit_fb_tex(state, ctx);
444bf215546Sopenharmony_ci         }
445bf215546Sopenharmony_ci      }
446bf215546Sopenharmony_ci
447bf215546Sopenharmony_ci      /* emit texture state: */
448bf215546Sopenharmony_ci      OUT_PKT7(ring, opcode, 3);
449bf215546Sopenharmony_ci      OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) |
450bf215546Sopenharmony_ci                        CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
451bf215546Sopenharmony_ci                        CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
452bf215546Sopenharmony_ci                        CP_LOAD_STATE6_0_STATE_BLOCK(sb) |
453bf215546Sopenharmony_ci                        CP_LOAD_STATE6_0_NUM_UNIT(num_merged_textures));
454bf215546Sopenharmony_ci      OUT_RB(ring, state); /* SRC_ADDR_LO/HI */
455bf215546Sopenharmony_ci
456bf215546Sopenharmony_ci      OUT_PKT4(ring, tex_const_reg, 2);
457bf215546Sopenharmony_ci      OUT_RB(ring, state); /* SRC_ADDR_LO/HI */
458bf215546Sopenharmony_ci
459bf215546Sopenharmony_ci      fd_ringbuffer_del(state);
460bf215546Sopenharmony_ci   }
461bf215546Sopenharmony_ci
462bf215546Sopenharmony_ci   OUT_PKT4(ring, tex_count_reg, 1);
463bf215546Sopenharmony_ci   OUT_RING(ring, num_merged_textures);
464bf215546Sopenharmony_ci
465bf215546Sopenharmony_ci   return needs_border;
466bf215546Sopenharmony_ci}
467bf215546Sopenharmony_ci
468bf215546Sopenharmony_ci/* Emits combined texture state, which also includes any Image/SSBO
469bf215546Sopenharmony_ci * related texture state merged in (because we must have all texture
470bf215546Sopenharmony_ci * state for a given stage in a single buffer).  In the fast-path, if
471bf215546Sopenharmony_ci * we don't need to merge in any image/ssbo related texture state, we
472bf215546Sopenharmony_ci * just use cached texture stateobj.  Otherwise we generate a single-
473bf215546Sopenharmony_ci * use stateobj.
474bf215546Sopenharmony_ci *
475bf215546Sopenharmony_ci * TODO Is there some sane way we can still use cached texture stateobj
476bf215546Sopenharmony_ci * with image/ssbo in use?
477bf215546Sopenharmony_ci *
478bf215546Sopenharmony_ci * returns whether border_color is required:
479bf215546Sopenharmony_ci */
480bf215546Sopenharmony_cistatic bool
481bf215546Sopenharmony_cifd6_emit_combined_textures(struct fd_ringbuffer *ring, struct fd6_emit *emit,
482bf215546Sopenharmony_ci                           enum pipe_shader_type type,
483bf215546Sopenharmony_ci                           const struct ir3_shader_variant *v) assert_dt
484bf215546Sopenharmony_ci{
485bf215546Sopenharmony_ci   struct fd_context *ctx = emit->ctx;
486bf215546Sopenharmony_ci   bool needs_border = false;
487bf215546Sopenharmony_ci
488bf215546Sopenharmony_ci   static const struct {
489bf215546Sopenharmony_ci      enum fd6_state_id state_id;
490bf215546Sopenharmony_ci      unsigned enable_mask;
491bf215546Sopenharmony_ci   } s[PIPE_SHADER_TYPES] = {
492bf215546Sopenharmony_ci      [PIPE_SHADER_VERTEX] = {FD6_GROUP_VS_TEX, ENABLE_ALL},
493bf215546Sopenharmony_ci      [PIPE_SHADER_TESS_CTRL] = {FD6_GROUP_HS_TEX, ENABLE_ALL},
494bf215546Sopenharmony_ci      [PIPE_SHADER_TESS_EVAL] = {FD6_GROUP_DS_TEX, ENABLE_ALL},
495bf215546Sopenharmony_ci      [PIPE_SHADER_GEOMETRY] = {FD6_GROUP_GS_TEX, ENABLE_ALL},
496bf215546Sopenharmony_ci      [PIPE_SHADER_FRAGMENT] = {FD6_GROUP_FS_TEX, ENABLE_DRAW},
497bf215546Sopenharmony_ci   };
498bf215546Sopenharmony_ci
499bf215546Sopenharmony_ci   assert(s[type].state_id);
500bf215546Sopenharmony_ci
501bf215546Sopenharmony_ci   if (!v->image_mapping.num_tex && !v->fb_read) {
502bf215546Sopenharmony_ci      /* in the fast-path, when we don't have to mix in any image/SSBO
503bf215546Sopenharmony_ci       * related texture state, we can just lookup the stateobj and
504bf215546Sopenharmony_ci       * re-emit that:
505bf215546Sopenharmony_ci       *
506bf215546Sopenharmony_ci       * Also, framebuffer-read is a slow-path because an extra
507bf215546Sopenharmony_ci       * texture needs to be inserted.
508bf215546Sopenharmony_ci       *
509bf215546Sopenharmony_ci       * TODO we can probably simmplify things if we also treated
510bf215546Sopenharmony_ci       * border_color as a slow-path.. this way the tex state key
511bf215546Sopenharmony_ci       * wouldn't depend on bcolor_offset.. but fb_read might rather
512bf215546Sopenharmony_ci       * be *somehow* a fast-path if we eventually used it for PLS.
513bf215546Sopenharmony_ci       * I suppose there would be no harm in just *always* inserting
514bf215546Sopenharmony_ci       * an fb_read texture?
515bf215546Sopenharmony_ci       */
516bf215546Sopenharmony_ci      if ((ctx->dirty_shader[type] & FD_DIRTY_SHADER_TEX) &&
517bf215546Sopenharmony_ci          ctx->tex[type].num_textures > 0) {
518bf215546Sopenharmony_ci         struct fd6_texture_state *tex =
519bf215546Sopenharmony_ci            fd6_texture_state(ctx, type, &ctx->tex[type]);
520bf215546Sopenharmony_ci
521bf215546Sopenharmony_ci         needs_border |= tex->needs_border;
522bf215546Sopenharmony_ci
523bf215546Sopenharmony_ci         fd6_emit_add_group(emit, tex->stateobj, s[type].state_id,
524bf215546Sopenharmony_ci                            s[type].enable_mask);
525bf215546Sopenharmony_ci
526bf215546Sopenharmony_ci         fd6_texture_state_reference(&tex, NULL);
527bf215546Sopenharmony_ci      }
528bf215546Sopenharmony_ci   } else {
529bf215546Sopenharmony_ci      /* In the slow-path, create a one-shot texture state object
530bf215546Sopenharmony_ci       * if either TEX|PROG|SSBO|IMAGE state is dirty:
531bf215546Sopenharmony_ci       */
532bf215546Sopenharmony_ci      if ((ctx->dirty_shader[type] &
533bf215546Sopenharmony_ci           (FD_DIRTY_SHADER_TEX | FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_IMAGE |
534bf215546Sopenharmony_ci            FD_DIRTY_SHADER_SSBO)) ||
535bf215546Sopenharmony_ci          v->fb_read) {
536bf215546Sopenharmony_ci         struct fd_texture_stateobj *tex = &ctx->tex[type];
537bf215546Sopenharmony_ci         struct fd_ringbuffer *stateobj = fd_submit_new_ringbuffer(
538bf215546Sopenharmony_ci            ctx->batch->submit, 0x1000, FD_RINGBUFFER_STREAMING);
539bf215546Sopenharmony_ci         unsigned bcolor_offset = fd6_border_color_offset(ctx, type, tex);
540bf215546Sopenharmony_ci
541bf215546Sopenharmony_ci         needs_border |=
542bf215546Sopenharmony_ci            fd6_emit_textures(ctx, stateobj, type, tex, bcolor_offset, v);
543bf215546Sopenharmony_ci
544bf215546Sopenharmony_ci         fd6_emit_take_group(emit, stateobj, s[type].state_id,
545bf215546Sopenharmony_ci                             s[type].enable_mask);
546bf215546Sopenharmony_ci      }
547bf215546Sopenharmony_ci   }
548bf215546Sopenharmony_ci
549bf215546Sopenharmony_ci   return needs_border;
550bf215546Sopenharmony_ci}
551bf215546Sopenharmony_ci
552bf215546Sopenharmony_cistatic struct fd_ringbuffer *
553bf215546Sopenharmony_cibuild_vbo_state(struct fd6_emit *emit) assert_dt
554bf215546Sopenharmony_ci{
555bf215546Sopenharmony_ci   const struct fd_vertex_state *vtx = emit->vtx;
556bf215546Sopenharmony_ci
557bf215546Sopenharmony_ci   /* Limit PKT4 size, because at max count (32) we would overflow the
558bf215546Sopenharmony_ci    * size of the PKT4 size field:
559bf215546Sopenharmony_ci    */
560bf215546Sopenharmony_ci   const unsigned maxcnt = 16;
561bf215546Sopenharmony_ci   const unsigned cnt = vtx->vertexbuf.count;
562bf215546Sopenharmony_ci   const unsigned dwords = (cnt * 4) /* per vbo: reg64 + two reg32 */
563bf215546Sopenharmony_ci               + (1 + cnt / maxcnt); /* PKT4 hdr every 16 vbo's */
564bf215546Sopenharmony_ci
565bf215546Sopenharmony_ci   struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(
566bf215546Sopenharmony_ci      emit->ctx->batch->submit, 4 * dwords, FD_RINGBUFFER_STREAMING);
567bf215546Sopenharmony_ci
568bf215546Sopenharmony_ci   for (int32_t j = 0; j < cnt; j++) {
569bf215546Sopenharmony_ci      if ((j % maxcnt) == 0) {
570bf215546Sopenharmony_ci         unsigned sz = MIN2(maxcnt, cnt - j);
571bf215546Sopenharmony_ci         OUT_PKT4(ring, REG_A6XX_VFD_FETCH(j), 4 * sz);
572bf215546Sopenharmony_ci      }
573bf215546Sopenharmony_ci      const struct pipe_vertex_buffer *vb = &vtx->vertexbuf.vb[j];
574bf215546Sopenharmony_ci      struct fd_resource *rsc = fd_resource(vb->buffer.resource);
575bf215546Sopenharmony_ci      if (rsc == NULL) {
576bf215546Sopenharmony_ci         OUT_RING(ring, 0);
577bf215546Sopenharmony_ci         OUT_RING(ring, 0);
578bf215546Sopenharmony_ci         OUT_RING(ring, 0);
579bf215546Sopenharmony_ci         OUT_RING(ring, 0);
580bf215546Sopenharmony_ci      } else {
581bf215546Sopenharmony_ci         uint32_t off = vb->buffer_offset;
582bf215546Sopenharmony_ci         uint32_t size = vb->buffer.resource->width0 - off;
583bf215546Sopenharmony_ci
584bf215546Sopenharmony_ci         OUT_RELOC(ring, rsc->bo, off, 0, 0);
585bf215546Sopenharmony_ci         OUT_RING(ring, size);       /* VFD_FETCH[j].SIZE */
586bf215546Sopenharmony_ci         OUT_RING(ring, vb->stride); /* VFD_FETCH[j].STRIDE */
587bf215546Sopenharmony_ci      }
588bf215546Sopenharmony_ci   }
589bf215546Sopenharmony_ci
590bf215546Sopenharmony_ci   return ring;
591bf215546Sopenharmony_ci}
592bf215546Sopenharmony_ci
593bf215546Sopenharmony_cistatic enum a6xx_ztest_mode
594bf215546Sopenharmony_cicompute_ztest_mode(struct fd6_emit *emit, bool lrz_valid) assert_dt
595bf215546Sopenharmony_ci{
596bf215546Sopenharmony_ci   struct fd_context *ctx = emit->ctx;
597bf215546Sopenharmony_ci   struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
598bf215546Sopenharmony_ci   struct fd6_zsa_stateobj *zsa = fd6_zsa_stateobj(ctx->zsa);
599bf215546Sopenharmony_ci   const struct ir3_shader_variant *fs = emit->fs;
600bf215546Sopenharmony_ci
601bf215546Sopenharmony_ci   if (fs->fs.early_fragment_tests)
602bf215546Sopenharmony_ci      return A6XX_EARLY_Z;
603bf215546Sopenharmony_ci
604bf215546Sopenharmony_ci   if (fs->no_earlyz || fs->writes_pos || !zsa->base.depth_enabled ||
605bf215546Sopenharmony_ci       fs->writes_stencilref) {
606bf215546Sopenharmony_ci      return A6XX_LATE_Z;
607bf215546Sopenharmony_ci   } else if ((fs->has_kill || zsa->alpha_test) &&
608bf215546Sopenharmony_ci              (zsa->writes_zs || !pfb->zsbuf)) {
609bf215546Sopenharmony_ci      /* Slightly odd, but seems like the hw wants us to select
610bf215546Sopenharmony_ci       * LATE_Z mode if there is no depth buffer + discard.  Either
611bf215546Sopenharmony_ci       * that, or when occlusion query is enabled.  See:
612bf215546Sopenharmony_ci       *
613bf215546Sopenharmony_ci       * dEQP-GLES31.functional.fbo.no_attachments.*
614bf215546Sopenharmony_ci       */
615bf215546Sopenharmony_ci      return lrz_valid ? A6XX_EARLY_LRZ_LATE_Z : A6XX_LATE_Z;
616bf215546Sopenharmony_ci   } else {
617bf215546Sopenharmony_ci      return A6XX_EARLY_Z;
618bf215546Sopenharmony_ci   }
619bf215546Sopenharmony_ci}
620bf215546Sopenharmony_ci
621bf215546Sopenharmony_ci/**
622bf215546Sopenharmony_ci * Calculate normalized LRZ state based on zsa/prog/blend state, updating
623bf215546Sopenharmony_ci * the zsbuf's lrz state as necessary to detect the cases where we need
624bf215546Sopenharmony_ci * to invalidate lrz.
625bf215546Sopenharmony_ci */
626bf215546Sopenharmony_cistatic struct fd6_lrz_state
627bf215546Sopenharmony_cicompute_lrz_state(struct fd6_emit *emit, bool binning_pass) assert_dt
628bf215546Sopenharmony_ci{
629bf215546Sopenharmony_ci   struct fd_context *ctx = emit->ctx;
630bf215546Sopenharmony_ci   struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
631bf215546Sopenharmony_ci   const struct ir3_shader_variant *fs = emit->fs;
632bf215546Sopenharmony_ci   struct fd6_lrz_state lrz;
633bf215546Sopenharmony_ci
634bf215546Sopenharmony_ci   if (!pfb->zsbuf) {
635bf215546Sopenharmony_ci      memset(&lrz, 0, sizeof(lrz));
636bf215546Sopenharmony_ci      if (!binning_pass) {
637bf215546Sopenharmony_ci         lrz.z_mode = compute_ztest_mode(emit, false);
638bf215546Sopenharmony_ci      }
639bf215546Sopenharmony_ci      return lrz;
640bf215546Sopenharmony_ci   }
641bf215546Sopenharmony_ci
642bf215546Sopenharmony_ci   struct fd6_blend_stateobj *blend = fd6_blend_stateobj(ctx->blend);
643bf215546Sopenharmony_ci   struct fd6_zsa_stateobj *zsa = fd6_zsa_stateobj(ctx->zsa);
644bf215546Sopenharmony_ci   struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
645bf215546Sopenharmony_ci
646bf215546Sopenharmony_ci   lrz = zsa->lrz;
647bf215546Sopenharmony_ci
648bf215546Sopenharmony_ci   /* normalize lrz state: */
649bf215546Sopenharmony_ci   if (blend->reads_dest || fs->writes_pos || fs->no_earlyz || fs->has_kill ||
650bf215546Sopenharmony_ci       blend->base.alpha_to_coverage) {
651bf215546Sopenharmony_ci      lrz.write = false;
652bf215546Sopenharmony_ci      if (binning_pass)
653bf215546Sopenharmony_ci         lrz.enable = false;
654bf215546Sopenharmony_ci   }
655bf215546Sopenharmony_ci
656bf215546Sopenharmony_ci   /* if we change depthfunc direction, bail out on using LRZ.  The
657bf215546Sopenharmony_ci    * LRZ buffer encodes a min/max depth value per block, but if
658bf215546Sopenharmony_ci    * we switch from GT/GE <-> LT/LE, those values cannot be
659bf215546Sopenharmony_ci    * interpreted properly.
660bf215546Sopenharmony_ci    */
661bf215546Sopenharmony_ci   if (zsa->base.depth_enabled && (rsc->lrz_direction != FD_LRZ_UNKNOWN) &&
662bf215546Sopenharmony_ci       (rsc->lrz_direction != lrz.direction)) {
663bf215546Sopenharmony_ci      rsc->lrz_valid = false;
664bf215546Sopenharmony_ci   }
665bf215546Sopenharmony_ci
666bf215546Sopenharmony_ci   if (zsa->invalidate_lrz || !rsc->lrz_valid) {
667bf215546Sopenharmony_ci      rsc->lrz_valid = false;
668bf215546Sopenharmony_ci      memset(&lrz, 0, sizeof(lrz));
669bf215546Sopenharmony_ci   }
670bf215546Sopenharmony_ci
671bf215546Sopenharmony_ci   if (fs->no_earlyz || fs->writes_pos) {
672bf215546Sopenharmony_ci      lrz.enable = false;
673bf215546Sopenharmony_ci      lrz.write = false;
674bf215546Sopenharmony_ci      lrz.test = false;
675bf215546Sopenharmony_ci   }
676bf215546Sopenharmony_ci
677bf215546Sopenharmony_ci   if (!binning_pass) {
678bf215546Sopenharmony_ci      lrz.z_mode = compute_ztest_mode(emit, rsc->lrz_valid);
679bf215546Sopenharmony_ci   }
680bf215546Sopenharmony_ci
681bf215546Sopenharmony_ci   /* Once we start writing to the real depth buffer, we lock in the
682bf215546Sopenharmony_ci    * direction for LRZ.. if we have to skip a LRZ write for any
683bf215546Sopenharmony_ci    * reason, it is still safe to have LRZ until there is a direction
684bf215546Sopenharmony_ci    * reversal.  Prior to the reversal, since we disabled LRZ writes
685bf215546Sopenharmony_ci    * in the "unsafe" cases, this just means that the LRZ test may
686bf215546Sopenharmony_ci    * not early-discard some things that end up not passing a later
687bf215546Sopenharmony_ci    * test (ie. be overly concervative).  But once you have a reversal
688bf215546Sopenharmony_ci    * of direction, it is possible to increase/decrease the z value
689bf215546Sopenharmony_ci    * to the point where the overly-conservative test is incorrect.
690bf215546Sopenharmony_ci    */
691bf215546Sopenharmony_ci   if (zsa->base.depth_writemask) {
692bf215546Sopenharmony_ci      rsc->lrz_direction = lrz.direction;
693bf215546Sopenharmony_ci   }
694bf215546Sopenharmony_ci
695bf215546Sopenharmony_ci   return lrz;
696bf215546Sopenharmony_ci}
697bf215546Sopenharmony_ci
698bf215546Sopenharmony_cistatic struct fd_ringbuffer *
699bf215546Sopenharmony_cibuild_lrz(struct fd6_emit *emit, bool binning_pass) assert_dt
700bf215546Sopenharmony_ci{
701bf215546Sopenharmony_ci   struct fd_context *ctx = emit->ctx;
702bf215546Sopenharmony_ci   struct fd6_context *fd6_ctx = fd6_context(ctx);
703bf215546Sopenharmony_ci   struct fd6_lrz_state lrz = compute_lrz_state(emit, binning_pass);
704bf215546Sopenharmony_ci
705bf215546Sopenharmony_ci   /* If the LRZ state has not changed, we can skip the emit: */
706bf215546Sopenharmony_ci   if (!ctx->last.dirty &&
707bf215546Sopenharmony_ci       !memcmp(&fd6_ctx->last.lrz[binning_pass], &lrz, sizeof(lrz)))
708bf215546Sopenharmony_ci      return NULL;
709bf215546Sopenharmony_ci
710bf215546Sopenharmony_ci   fd6_ctx->last.lrz[binning_pass] = lrz;
711bf215546Sopenharmony_ci
712bf215546Sopenharmony_ci   struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(
713bf215546Sopenharmony_ci      ctx->batch->submit, 8 * 4, FD_RINGBUFFER_STREAMING);
714bf215546Sopenharmony_ci
715bf215546Sopenharmony_ci   OUT_REG(ring,
716bf215546Sopenharmony_ci           A6XX_GRAS_LRZ_CNTL(.enable = lrz.enable, .lrz_write = lrz.write,
717bf215546Sopenharmony_ci                              .greater = lrz.direction == FD_LRZ_GREATER,
718bf215546Sopenharmony_ci                              .z_test_enable = lrz.test, ));
719bf215546Sopenharmony_ci   OUT_REG(ring, A6XX_RB_LRZ_CNTL(.enable = lrz.enable, ));
720bf215546Sopenharmony_ci
721bf215546Sopenharmony_ci   OUT_REG(ring, A6XX_RB_DEPTH_PLANE_CNTL(.z_mode = lrz.z_mode, ));
722bf215546Sopenharmony_ci
723bf215546Sopenharmony_ci   OUT_REG(ring, A6XX_GRAS_SU_DEPTH_PLANE_CNTL(.z_mode = lrz.z_mode, ));
724bf215546Sopenharmony_ci
725bf215546Sopenharmony_ci   return ring;
726bf215546Sopenharmony_ci}
727bf215546Sopenharmony_ci
728bf215546Sopenharmony_cistatic struct fd_ringbuffer *
729bf215546Sopenharmony_cibuild_scissor(struct fd6_emit *emit) assert_dt
730bf215546Sopenharmony_ci{
731bf215546Sopenharmony_ci   struct fd_context *ctx = emit->ctx;
732bf215546Sopenharmony_ci   struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx);
733bf215546Sopenharmony_ci
734bf215546Sopenharmony_ci   struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(
735bf215546Sopenharmony_ci      emit->ctx->batch->submit, 3 * 4, FD_RINGBUFFER_STREAMING);
736bf215546Sopenharmony_ci
737bf215546Sopenharmony_ci   OUT_REG(
738bf215546Sopenharmony_ci      ring,
739bf215546Sopenharmony_ci      A6XX_GRAS_SC_SCREEN_SCISSOR_TL(0, .x = scissor->minx, .y = scissor->miny),
740bf215546Sopenharmony_ci      A6XX_GRAS_SC_SCREEN_SCISSOR_BR(0, .x = MAX2(scissor->maxx, 1) - 1,
741bf215546Sopenharmony_ci                                     .y = MAX2(scissor->maxy, 1) - 1));
742bf215546Sopenharmony_ci
743bf215546Sopenharmony_ci   ctx->batch->max_scissor.minx =
744bf215546Sopenharmony_ci      MIN2(ctx->batch->max_scissor.minx, scissor->minx);
745bf215546Sopenharmony_ci   ctx->batch->max_scissor.miny =
746bf215546Sopenharmony_ci      MIN2(ctx->batch->max_scissor.miny, scissor->miny);
747bf215546Sopenharmony_ci   ctx->batch->max_scissor.maxx =
748bf215546Sopenharmony_ci      MAX2(ctx->batch->max_scissor.maxx, scissor->maxx);
749bf215546Sopenharmony_ci   ctx->batch->max_scissor.maxy =
750bf215546Sopenharmony_ci      MAX2(ctx->batch->max_scissor.maxy, scissor->maxy);
751bf215546Sopenharmony_ci
752bf215546Sopenharmony_ci   return ring;
753bf215546Sopenharmony_ci}
754bf215546Sopenharmony_ci
755bf215546Sopenharmony_ci/* Combination of FD_DIRTY_FRAMEBUFFER | FD_DIRTY_RASTERIZER_DISCARD |
756bf215546Sopenharmony_ci * FD_DIRTY_PROG | FD_DIRTY_DUAL_BLEND
757bf215546Sopenharmony_ci */
758bf215546Sopenharmony_cistatic struct fd_ringbuffer *
759bf215546Sopenharmony_cibuild_prog_fb_rast(struct fd6_emit *emit) assert_dt
760bf215546Sopenharmony_ci{
761bf215546Sopenharmony_ci   struct fd_context *ctx = emit->ctx;
762bf215546Sopenharmony_ci   struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
763bf215546Sopenharmony_ci   const struct fd6_program_state *prog = fd6_emit_get_prog(emit);
764bf215546Sopenharmony_ci   const struct ir3_shader_variant *fs = emit->fs;
765bf215546Sopenharmony_ci
766bf215546Sopenharmony_ci   struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(
767bf215546Sopenharmony_ci      ctx->batch->submit, 9 * 4, FD_RINGBUFFER_STREAMING);
768bf215546Sopenharmony_ci
769bf215546Sopenharmony_ci   unsigned nr = pfb->nr_cbufs;
770bf215546Sopenharmony_ci
771bf215546Sopenharmony_ci   if (ctx->rasterizer->rasterizer_discard)
772bf215546Sopenharmony_ci      nr = 0;
773bf215546Sopenharmony_ci
774bf215546Sopenharmony_ci   struct fd6_blend_stateobj *blend = fd6_blend_stateobj(ctx->blend);
775bf215546Sopenharmony_ci
776bf215546Sopenharmony_ci   if (blend->use_dual_src_blend)
777bf215546Sopenharmony_ci      nr++;
778bf215546Sopenharmony_ci
779bf215546Sopenharmony_ci   OUT_PKT4(ring, REG_A6XX_RB_FS_OUTPUT_CNTL0, 2);
780bf215546Sopenharmony_ci   OUT_RING(ring, COND(fs->writes_pos, A6XX_RB_FS_OUTPUT_CNTL0_FRAG_WRITES_Z) |
781bf215546Sopenharmony_ci                     COND(fs->writes_smask && pfb->samples > 1,
782bf215546Sopenharmony_ci                          A6XX_RB_FS_OUTPUT_CNTL0_FRAG_WRITES_SAMPMASK) |
783bf215546Sopenharmony_ci                     COND(fs->writes_stencilref,
784bf215546Sopenharmony_ci                          A6XX_RB_FS_OUTPUT_CNTL0_FRAG_WRITES_STENCILREF) |
785bf215546Sopenharmony_ci                     COND(blend->use_dual_src_blend,
786bf215546Sopenharmony_ci                          A6XX_RB_FS_OUTPUT_CNTL0_DUAL_COLOR_IN_ENABLE));
787bf215546Sopenharmony_ci   OUT_RING(ring, A6XX_RB_FS_OUTPUT_CNTL1_MRT(nr));
788bf215546Sopenharmony_ci
789bf215546Sopenharmony_ci   OUT_PKT4(ring, REG_A6XX_SP_FS_OUTPUT_CNTL1, 1);
790bf215546Sopenharmony_ci   OUT_RING(ring, A6XX_SP_FS_OUTPUT_CNTL1_MRT(nr));
791bf215546Sopenharmony_ci
792bf215546Sopenharmony_ci   unsigned mrt_components = 0;
793bf215546Sopenharmony_ci   for (unsigned i = 0; i < pfb->nr_cbufs; i++) {
794bf215546Sopenharmony_ci      if (!pfb->cbufs[i])
795bf215546Sopenharmony_ci         continue;
796bf215546Sopenharmony_ci      mrt_components |= 0xf << (i * 4);
797bf215546Sopenharmony_ci   }
798bf215546Sopenharmony_ci
799bf215546Sopenharmony_ci   /* dual source blending has an extra fs output in the 2nd slot */
800bf215546Sopenharmony_ci   if (blend->use_dual_src_blend)
801bf215546Sopenharmony_ci      mrt_components |= 0xf << 4;
802bf215546Sopenharmony_ci
803bf215546Sopenharmony_ci   mrt_components &= prog->mrt_components;
804bf215546Sopenharmony_ci
805bf215546Sopenharmony_ci   OUT_REG(ring, A6XX_SP_FS_RENDER_COMPONENTS(.dword = mrt_components));
806bf215546Sopenharmony_ci   OUT_REG(ring, A6XX_RB_RENDER_COMPONENTS(.dword = mrt_components));
807bf215546Sopenharmony_ci
808bf215546Sopenharmony_ci   return ring;
809bf215546Sopenharmony_ci}
810bf215546Sopenharmony_ci
811bf215546Sopenharmony_cistatic struct fd_ringbuffer *
812bf215546Sopenharmony_cibuild_blend_color(struct fd6_emit *emit) assert_dt
813bf215546Sopenharmony_ci{
814bf215546Sopenharmony_ci   struct fd_context *ctx = emit->ctx;
815bf215546Sopenharmony_ci   struct pipe_blend_color *bcolor = &ctx->blend_color;
816bf215546Sopenharmony_ci   struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(
817bf215546Sopenharmony_ci      ctx->batch->submit, 5 * 4, FD_RINGBUFFER_STREAMING);
818bf215546Sopenharmony_ci
819bf215546Sopenharmony_ci   OUT_REG(ring, A6XX_RB_BLEND_RED_F32(bcolor->color[0]),
820bf215546Sopenharmony_ci           A6XX_RB_BLEND_GREEN_F32(bcolor->color[1]),
821bf215546Sopenharmony_ci           A6XX_RB_BLEND_BLUE_F32(bcolor->color[2]),
822bf215546Sopenharmony_ci           A6XX_RB_BLEND_ALPHA_F32(bcolor->color[3]));
823bf215546Sopenharmony_ci
824bf215546Sopenharmony_ci   return ring;
825bf215546Sopenharmony_ci}
826bf215546Sopenharmony_ci
827bf215546Sopenharmony_cistatic struct fd_ringbuffer *
828bf215546Sopenharmony_cibuild_ibo(struct fd6_emit *emit) assert_dt
829bf215546Sopenharmony_ci{
830bf215546Sopenharmony_ci   struct fd_context *ctx = emit->ctx;
831bf215546Sopenharmony_ci
832bf215546Sopenharmony_ci   if (emit->hs) {
833bf215546Sopenharmony_ci      assert(ir3_shader_nibo(emit->hs) == 0);
834bf215546Sopenharmony_ci      assert(ir3_shader_nibo(emit->ds) == 0);
835bf215546Sopenharmony_ci   }
836bf215546Sopenharmony_ci   if (emit->gs) {
837bf215546Sopenharmony_ci      assert(ir3_shader_nibo(emit->gs) == 0);
838bf215546Sopenharmony_ci   }
839bf215546Sopenharmony_ci
840bf215546Sopenharmony_ci   struct fd_ringbuffer *ibo_state =
841bf215546Sopenharmony_ci      fd6_build_ibo_state(ctx, emit->fs, PIPE_SHADER_FRAGMENT);
842bf215546Sopenharmony_ci   struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(
843bf215546Sopenharmony_ci      ctx->batch->submit, 0x100, FD_RINGBUFFER_STREAMING);
844bf215546Sopenharmony_ci
845bf215546Sopenharmony_ci   OUT_PKT7(ring, CP_LOAD_STATE6, 3);
846bf215546Sopenharmony_ci   OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) |
847bf215546Sopenharmony_ci                     CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) |
848bf215546Sopenharmony_ci                     CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
849bf215546Sopenharmony_ci                     CP_LOAD_STATE6_0_STATE_BLOCK(SB6_IBO) |
850bf215546Sopenharmony_ci                     CP_LOAD_STATE6_0_NUM_UNIT(ir3_shader_nibo(emit->fs)));
851bf215546Sopenharmony_ci   OUT_RB(ring, ibo_state);
852bf215546Sopenharmony_ci
853bf215546Sopenharmony_ci   OUT_PKT4(ring, REG_A6XX_SP_IBO, 2);
854bf215546Sopenharmony_ci   OUT_RB(ring, ibo_state);
855bf215546Sopenharmony_ci
856bf215546Sopenharmony_ci   /* TODO if we used CP_SET_DRAW_STATE for compute shaders, we could
857bf215546Sopenharmony_ci    * de-duplicate this from program->config_stateobj
858bf215546Sopenharmony_ci    */
859bf215546Sopenharmony_ci   OUT_PKT4(ring, REG_A6XX_SP_IBO_COUNT, 1);
860bf215546Sopenharmony_ci   OUT_RING(ring, ir3_shader_nibo(emit->fs));
861bf215546Sopenharmony_ci
862bf215546Sopenharmony_ci   fd_ringbuffer_del(ibo_state);
863bf215546Sopenharmony_ci
864bf215546Sopenharmony_ci   return ring;
865bf215546Sopenharmony_ci}
866bf215546Sopenharmony_ci
867bf215546Sopenharmony_cistatic void
868bf215546Sopenharmony_cifd6_emit_streamout(struct fd_ringbuffer *ring, struct fd6_emit *emit) assert_dt
869bf215546Sopenharmony_ci{
870bf215546Sopenharmony_ci   struct fd_context *ctx = emit->ctx;
871bf215546Sopenharmony_ci   const struct fd6_program_state *prog = fd6_emit_get_prog(emit);
872bf215546Sopenharmony_ci   const struct ir3_stream_output_info *info = prog->stream_output;
873bf215546Sopenharmony_ci   struct fd_streamout_stateobj *so = &ctx->streamout;
874bf215546Sopenharmony_ci
875bf215546Sopenharmony_ci   emit->streamout_mask = 0;
876bf215546Sopenharmony_ci
877bf215546Sopenharmony_ci   if (!info)
878bf215546Sopenharmony_ci      return;
879bf215546Sopenharmony_ci
880bf215546Sopenharmony_ci   for (unsigned i = 0; i < so->num_targets; i++) {
881bf215546Sopenharmony_ci      struct fd_stream_output_target *target =
882bf215546Sopenharmony_ci         fd_stream_output_target(so->targets[i]);
883bf215546Sopenharmony_ci
884bf215546Sopenharmony_ci      if (!target)
885bf215546Sopenharmony_ci         continue;
886bf215546Sopenharmony_ci
887bf215546Sopenharmony_ci      target->stride = info->stride[i];
888bf215546Sopenharmony_ci
889bf215546Sopenharmony_ci      OUT_PKT4(ring, REG_A6XX_VPC_SO_BUFFER_BASE(i), 3);
890bf215546Sopenharmony_ci      /* VPC_SO[i].BUFFER_BASE_LO: */
891bf215546Sopenharmony_ci      OUT_RELOC(ring, fd_resource(target->base.buffer)->bo, 0, 0, 0);
892bf215546Sopenharmony_ci      OUT_RING(ring, target->base.buffer_size + target->base.buffer_offset);
893bf215546Sopenharmony_ci
894bf215546Sopenharmony_ci      struct fd_bo *offset_bo = fd_resource(target->offset_buf)->bo;
895bf215546Sopenharmony_ci
896bf215546Sopenharmony_ci      if (so->reset & (1 << i)) {
897bf215546Sopenharmony_ci         assert(so->offsets[i] == 0);
898bf215546Sopenharmony_ci
899bf215546Sopenharmony_ci         OUT_PKT7(ring, CP_MEM_WRITE, 3);
900bf215546Sopenharmony_ci         OUT_RELOC(ring, offset_bo, 0, 0, 0);
901bf215546Sopenharmony_ci         OUT_RING(ring, target->base.buffer_offset);
902bf215546Sopenharmony_ci
903bf215546Sopenharmony_ci         OUT_PKT4(ring, REG_A6XX_VPC_SO_BUFFER_OFFSET(i), 1);
904bf215546Sopenharmony_ci         OUT_RING(ring, target->base.buffer_offset);
905bf215546Sopenharmony_ci      } else {
906bf215546Sopenharmony_ci         OUT_PKT7(ring, CP_MEM_TO_REG, 3);
907bf215546Sopenharmony_ci         OUT_RING(ring, CP_MEM_TO_REG_0_REG(REG_A6XX_VPC_SO_BUFFER_OFFSET(i)) |
908bf215546Sopenharmony_ci                           CP_MEM_TO_REG_0_SHIFT_BY_2 | CP_MEM_TO_REG_0_UNK31 |
909bf215546Sopenharmony_ci                           CP_MEM_TO_REG_0_CNT(0));
910bf215546Sopenharmony_ci         OUT_RELOC(ring, offset_bo, 0, 0, 0);
911bf215546Sopenharmony_ci      }
912bf215546Sopenharmony_ci
913bf215546Sopenharmony_ci      // After a draw HW would write the new offset to offset_bo
914bf215546Sopenharmony_ci      OUT_PKT4(ring, REG_A6XX_VPC_SO_FLUSH_BASE(i), 2);
915bf215546Sopenharmony_ci      OUT_RELOC(ring, offset_bo, 0, 0, 0);
916bf215546Sopenharmony_ci
917bf215546Sopenharmony_ci      so->reset &= ~(1 << i);
918bf215546Sopenharmony_ci
919bf215546Sopenharmony_ci      emit->streamout_mask |= (1 << i);
920bf215546Sopenharmony_ci   }
921bf215546Sopenharmony_ci
922bf215546Sopenharmony_ci   if (emit->streamout_mask) {
923bf215546Sopenharmony_ci      fd6_emit_add_group(emit, prog->streamout_stateobj, FD6_GROUP_SO,
924bf215546Sopenharmony_ci                         ENABLE_ALL);
925bf215546Sopenharmony_ci   } else if (ctx->last.streamout_mask != 0) {
926bf215546Sopenharmony_ci      /* If we transition from a draw with streamout to one without, turn
927bf215546Sopenharmony_ci       * off streamout.
928bf215546Sopenharmony_ci       */
929bf215546Sopenharmony_ci      fd6_emit_add_group(emit, fd6_context(ctx)->streamout_disable_stateobj,
930bf215546Sopenharmony_ci                         FD6_GROUP_SO, ENABLE_ALL);
931bf215546Sopenharmony_ci   }
932bf215546Sopenharmony_ci
933bf215546Sopenharmony_ci   /* Make sure that any use of our TFB outputs (indirect draw source or shader
934bf215546Sopenharmony_ci    * UBO reads) comes after the TFB output is written.  From the GL 4.6 core
935bf215546Sopenharmony_ci    * spec:
936bf215546Sopenharmony_ci    *
937bf215546Sopenharmony_ci    *     "Buffers should not be bound or in use for both transform feedback and
938bf215546Sopenharmony_ci    *      other purposes in the GL.  Specifically, if a buffer object is
939bf215546Sopenharmony_ci    *      simultaneously bound to a transform feedback buffer binding point
940bf215546Sopenharmony_ci    *      and elsewhere in the GL, any writes to or reads from the buffer
941bf215546Sopenharmony_ci    *      generate undefined values."
942bf215546Sopenharmony_ci    *
943bf215546Sopenharmony_ci    * So we idle whenever SO buffers change.  Note that this function is called
944bf215546Sopenharmony_ci    * on every draw with TFB enabled, so check the dirty flag for the buffers
945bf215546Sopenharmony_ci    * themselves.
946bf215546Sopenharmony_ci    */
947bf215546Sopenharmony_ci   if (ctx->dirty & FD_DIRTY_STREAMOUT)
948bf215546Sopenharmony_ci      fd_wfi(ctx->batch, ring);
949bf215546Sopenharmony_ci
950bf215546Sopenharmony_ci   ctx->last.streamout_mask = emit->streamout_mask;
951bf215546Sopenharmony_ci}
952bf215546Sopenharmony_ci
953bf215546Sopenharmony_ci/**
954bf215546Sopenharmony_ci * Stuff that less frequently changes and isn't (yet) moved into stategroups
955bf215546Sopenharmony_ci */
956bf215546Sopenharmony_cistatic void
957bf215546Sopenharmony_cifd6_emit_non_ring(struct fd_ringbuffer *ring, struct fd6_emit *emit) assert_dt
958bf215546Sopenharmony_ci{
959bf215546Sopenharmony_ci   struct fd_context *ctx = emit->ctx;
960bf215546Sopenharmony_ci   const enum fd_dirty_3d_state dirty = emit->dirty;
961bf215546Sopenharmony_ci
962bf215546Sopenharmony_ci   if (dirty & FD_DIRTY_STENCIL_REF) {
963bf215546Sopenharmony_ci      struct pipe_stencil_ref *sr = &ctx->stencil_ref;
964bf215546Sopenharmony_ci
965bf215546Sopenharmony_ci      OUT_PKT4(ring, REG_A6XX_RB_STENCILREF, 1);
966bf215546Sopenharmony_ci      OUT_RING(ring, A6XX_RB_STENCILREF_REF(sr->ref_value[0]) |
967bf215546Sopenharmony_ci                        A6XX_RB_STENCILREF_BFREF(sr->ref_value[1]));
968bf215546Sopenharmony_ci   }
969bf215546Sopenharmony_ci
970bf215546Sopenharmony_ci   if (dirty & FD_DIRTY_VIEWPORT) {
971bf215546Sopenharmony_ci      struct pipe_scissor_state *scissor = &ctx->viewport_scissor;
972bf215546Sopenharmony_ci
973bf215546Sopenharmony_ci      OUT_REG(ring, A6XX_GRAS_CL_VPORT_XOFFSET(0, ctx->viewport.translate[0]),
974bf215546Sopenharmony_ci              A6XX_GRAS_CL_VPORT_XSCALE(0, ctx->viewport.scale[0]),
975bf215546Sopenharmony_ci              A6XX_GRAS_CL_VPORT_YOFFSET(0, ctx->viewport.translate[1]),
976bf215546Sopenharmony_ci              A6XX_GRAS_CL_VPORT_YSCALE(0, ctx->viewport.scale[1]),
977bf215546Sopenharmony_ci              A6XX_GRAS_CL_VPORT_ZOFFSET(0, ctx->viewport.translate[2]),
978bf215546Sopenharmony_ci              A6XX_GRAS_CL_VPORT_ZSCALE(0, ctx->viewport.scale[2]));
979bf215546Sopenharmony_ci
980bf215546Sopenharmony_ci      OUT_REG(
981bf215546Sopenharmony_ci         ring,
982bf215546Sopenharmony_ci         A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL(0, .x = scissor->minx,
983bf215546Sopenharmony_ci                                          .y = scissor->miny),
984bf215546Sopenharmony_ci         A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR(0, .x = MAX2(scissor->maxx, 1) - 1,
985bf215546Sopenharmony_ci                                          .y = MAX2(scissor->maxy, 1) - 1));
986bf215546Sopenharmony_ci
987bf215546Sopenharmony_ci      unsigned guardband_x = fd_calc_guardband(ctx->viewport.translate[0],
988bf215546Sopenharmony_ci                                               ctx->viewport.scale[0], false);
989bf215546Sopenharmony_ci      unsigned guardband_y = fd_calc_guardband(ctx->viewport.translate[1],
990bf215546Sopenharmony_ci                                               ctx->viewport.scale[1], false);
991bf215546Sopenharmony_ci
992bf215546Sopenharmony_ci      OUT_REG(ring, A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ(.horz = guardband_x,
993bf215546Sopenharmony_ci                                                    .vert = guardband_y));
994bf215546Sopenharmony_ci   }
995bf215546Sopenharmony_ci
996bf215546Sopenharmony_ci   /* The clamp ranges are only used when the rasterizer disables
997bf215546Sopenharmony_ci    * depth clip.
998bf215546Sopenharmony_ci    */
999bf215546Sopenharmony_ci   if ((dirty & (FD_DIRTY_VIEWPORT | FD_DIRTY_RASTERIZER)) &&
1000bf215546Sopenharmony_ci       fd_depth_clip_disabled(ctx)) {
1001bf215546Sopenharmony_ci      float zmin, zmax;
1002bf215546Sopenharmony_ci      util_viewport_zmin_zmax(&ctx->viewport, ctx->rasterizer->clip_halfz,
1003bf215546Sopenharmony_ci                              &zmin, &zmax);
1004bf215546Sopenharmony_ci
1005bf215546Sopenharmony_ci      OUT_REG(ring, A6XX_GRAS_CL_Z_CLAMP_MIN(0, zmin),
1006bf215546Sopenharmony_ci              A6XX_GRAS_CL_Z_CLAMP_MAX(0, zmax));
1007bf215546Sopenharmony_ci
1008bf215546Sopenharmony_ci      OUT_REG(ring, A6XX_RB_Z_CLAMP_MIN(zmin), A6XX_RB_Z_CLAMP_MAX(zmax));
1009bf215546Sopenharmony_ci   }
1010bf215546Sopenharmony_ci}
1011bf215546Sopenharmony_ci
1012bf215546Sopenharmony_civoid
1013bf215546Sopenharmony_cifd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit)
1014bf215546Sopenharmony_ci{
1015bf215546Sopenharmony_ci   struct fd_context *ctx = emit->ctx;
1016bf215546Sopenharmony_ci   struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
1017bf215546Sopenharmony_ci   const struct fd6_program_state *prog = fd6_emit_get_prog(emit);
1018bf215546Sopenharmony_ci   const struct ir3_shader_variant *vs = emit->vs;
1019bf215546Sopenharmony_ci   const struct ir3_shader_variant *hs = emit->hs;
1020bf215546Sopenharmony_ci   const struct ir3_shader_variant *ds = emit->ds;
1021bf215546Sopenharmony_ci   const struct ir3_shader_variant *gs = emit->gs;
1022bf215546Sopenharmony_ci   const struct ir3_shader_variant *fs = emit->fs;
1023bf215546Sopenharmony_ci   bool needs_border = false;
1024bf215546Sopenharmony_ci
1025bf215546Sopenharmony_ci   emit_marker6(ring, 5);
1026bf215546Sopenharmony_ci
1027bf215546Sopenharmony_ci   /* NOTE: we track fb_read differently than _BLEND_ENABLED since we
1028bf215546Sopenharmony_ci    * might decide to do sysmem in some cases when blend is enabled:
1029bf215546Sopenharmony_ci    */
1030bf215546Sopenharmony_ci   if (fs->fb_read)
1031bf215546Sopenharmony_ci      ctx->batch->gmem_reason |= FD_GMEM_FB_READ;
1032bf215546Sopenharmony_ci
1033bf215546Sopenharmony_ci   u_foreach_bit (b, emit->dirty_groups) {
1034bf215546Sopenharmony_ci      enum fd6_state_id group = b;
1035bf215546Sopenharmony_ci      struct fd_ringbuffer *state = NULL;
1036bf215546Sopenharmony_ci      uint32_t enable_mask = ENABLE_ALL;
1037bf215546Sopenharmony_ci
1038bf215546Sopenharmony_ci      switch (group) {
1039bf215546Sopenharmony_ci      case FD6_GROUP_VTXSTATE:
1040bf215546Sopenharmony_ci         state = fd6_vertex_stateobj(ctx->vtx.vtx)->stateobj;
1041bf215546Sopenharmony_ci         fd_ringbuffer_ref(state);
1042bf215546Sopenharmony_ci         break;
1043bf215546Sopenharmony_ci      case FD6_GROUP_VBO:
1044bf215546Sopenharmony_ci         state = build_vbo_state(emit);
1045bf215546Sopenharmony_ci         break;
1046bf215546Sopenharmony_ci      case FD6_GROUP_ZSA:
1047bf215546Sopenharmony_ci         state = fd6_zsa_state(
1048bf215546Sopenharmony_ci            ctx,
1049bf215546Sopenharmony_ci            util_format_is_pure_integer(pipe_surface_format(pfb->cbufs[0])),
1050bf215546Sopenharmony_ci            fd_depth_clip_disabled(ctx));
1051bf215546Sopenharmony_ci         fd_ringbuffer_ref(state);
1052bf215546Sopenharmony_ci         break;
1053bf215546Sopenharmony_ci      case FD6_GROUP_LRZ:
1054bf215546Sopenharmony_ci         state = build_lrz(emit, false);
1055bf215546Sopenharmony_ci         if (!state)
1056bf215546Sopenharmony_ci            continue;
1057bf215546Sopenharmony_ci         enable_mask = ENABLE_DRAW;
1058bf215546Sopenharmony_ci         break;
1059bf215546Sopenharmony_ci      case FD6_GROUP_LRZ_BINNING:
1060bf215546Sopenharmony_ci         state = build_lrz(emit, true);
1061bf215546Sopenharmony_ci         if (!state)
1062bf215546Sopenharmony_ci            continue;
1063bf215546Sopenharmony_ci         enable_mask = CP_SET_DRAW_STATE__0_BINNING;
1064bf215546Sopenharmony_ci         break;
1065bf215546Sopenharmony_ci      case FD6_GROUP_SCISSOR:
1066bf215546Sopenharmony_ci         state = build_scissor(emit);
1067bf215546Sopenharmony_ci         break;
1068bf215546Sopenharmony_ci      case FD6_GROUP_PROG:
1069bf215546Sopenharmony_ci         fd6_emit_add_group(emit, prog->config_stateobj, FD6_GROUP_PROG_CONFIG,
1070bf215546Sopenharmony_ci                            ENABLE_ALL);
1071bf215546Sopenharmony_ci         fd6_emit_add_group(emit, prog->stateobj, FD6_GROUP_PROG, ENABLE_DRAW);
1072bf215546Sopenharmony_ci         fd6_emit_add_group(emit, prog->binning_stateobj,
1073bf215546Sopenharmony_ci                            FD6_GROUP_PROG_BINNING,
1074bf215546Sopenharmony_ci                            CP_SET_DRAW_STATE__0_BINNING);
1075bf215546Sopenharmony_ci
1076bf215546Sopenharmony_ci         /* emit remaining streaming program state, ie. what depends on
1077bf215546Sopenharmony_ci          * other emit state, so cannot be pre-baked.
1078bf215546Sopenharmony_ci          */
1079bf215546Sopenharmony_ci         fd6_emit_take_group(emit, fd6_program_interp_state(emit),
1080bf215546Sopenharmony_ci                             FD6_GROUP_PROG_INTERP, ENABLE_DRAW);
1081bf215546Sopenharmony_ci         continue;
1082bf215546Sopenharmony_ci      case FD6_GROUP_RASTERIZER:
1083bf215546Sopenharmony_ci         state = fd6_rasterizer_state(ctx, emit->primitive_restart);
1084bf215546Sopenharmony_ci         fd_ringbuffer_ref(state);
1085bf215546Sopenharmony_ci         break;
1086bf215546Sopenharmony_ci      case FD6_GROUP_PROG_FB_RAST:
1087bf215546Sopenharmony_ci         state = build_prog_fb_rast(emit);
1088bf215546Sopenharmony_ci         break;
1089bf215546Sopenharmony_ci      case FD6_GROUP_BLEND:
1090bf215546Sopenharmony_ci         state = fd6_blend_variant(ctx->blend, pfb->samples, ctx->sample_mask)
1091bf215546Sopenharmony_ci                    ->stateobj;
1092bf215546Sopenharmony_ci         fd_ringbuffer_ref(state);
1093bf215546Sopenharmony_ci         break;
1094bf215546Sopenharmony_ci      case FD6_GROUP_BLEND_COLOR:
1095bf215546Sopenharmony_ci         state = build_blend_color(emit);
1096bf215546Sopenharmony_ci         break;
1097bf215546Sopenharmony_ci      case FD6_GROUP_IBO:
1098bf215546Sopenharmony_ci         state = build_ibo(emit);
1099bf215546Sopenharmony_ci         break;
1100bf215546Sopenharmony_ci      case FD6_GROUP_CONST:
1101bf215546Sopenharmony_ci         state = fd6_build_user_consts(emit);
1102bf215546Sopenharmony_ci         break;
1103bf215546Sopenharmony_ci      case FD6_GROUP_DRIVER_PARAMS:
1104bf215546Sopenharmony_ci         state = fd6_build_driver_params(emit);
1105bf215546Sopenharmony_ci         break;
1106bf215546Sopenharmony_ci      case FD6_GROUP_PRIMITIVE_PARAMS:
1107bf215546Sopenharmony_ci         state = fd6_build_tess_consts(emit);
1108bf215546Sopenharmony_ci         break;
1109bf215546Sopenharmony_ci      case FD6_GROUP_VS_TEX:
1110bf215546Sopenharmony_ci         needs_border |=
1111bf215546Sopenharmony_ci            fd6_emit_combined_textures(ring, emit, PIPE_SHADER_VERTEX, vs);
1112bf215546Sopenharmony_ci         continue;
1113bf215546Sopenharmony_ci      case FD6_GROUP_HS_TEX:
1114bf215546Sopenharmony_ci         if (hs) {
1115bf215546Sopenharmony_ci            needs_border |= fd6_emit_combined_textures(
1116bf215546Sopenharmony_ci               ring, emit, PIPE_SHADER_TESS_CTRL, hs);
1117bf215546Sopenharmony_ci         }
1118bf215546Sopenharmony_ci         continue;
1119bf215546Sopenharmony_ci      case FD6_GROUP_DS_TEX:
1120bf215546Sopenharmony_ci         if (ds) {
1121bf215546Sopenharmony_ci            needs_border |= fd6_emit_combined_textures(
1122bf215546Sopenharmony_ci               ring, emit, PIPE_SHADER_TESS_EVAL, ds);
1123bf215546Sopenharmony_ci         }
1124bf215546Sopenharmony_ci         continue;
1125bf215546Sopenharmony_ci      case FD6_GROUP_GS_TEX:
1126bf215546Sopenharmony_ci         if (gs) {
1127bf215546Sopenharmony_ci            needs_border |=
1128bf215546Sopenharmony_ci               fd6_emit_combined_textures(ring, emit, PIPE_SHADER_GEOMETRY, gs);
1129bf215546Sopenharmony_ci         }
1130bf215546Sopenharmony_ci         continue;
1131bf215546Sopenharmony_ci      case FD6_GROUP_FS_TEX:
1132bf215546Sopenharmony_ci         needs_border |=
1133bf215546Sopenharmony_ci            fd6_emit_combined_textures(ring, emit, PIPE_SHADER_FRAGMENT, fs);
1134bf215546Sopenharmony_ci         continue;
1135bf215546Sopenharmony_ci      case FD6_GROUP_SO:
1136bf215546Sopenharmony_ci         fd6_emit_streamout(ring, emit);
1137bf215546Sopenharmony_ci         continue;
1138bf215546Sopenharmony_ci      case FD6_GROUP_NON_GROUP:
1139bf215546Sopenharmony_ci         fd6_emit_non_ring(ring, emit);
1140bf215546Sopenharmony_ci         continue;
1141bf215546Sopenharmony_ci      default:
1142bf215546Sopenharmony_ci         unreachable("bad state group");
1143bf215546Sopenharmony_ci      }
1144bf215546Sopenharmony_ci
1145bf215546Sopenharmony_ci      fd6_emit_take_group(emit, state, group, enable_mask);
1146bf215546Sopenharmony_ci   }
1147bf215546Sopenharmony_ci
1148bf215546Sopenharmony_ci   if (needs_border)
1149bf215546Sopenharmony_ci      emit_border_color(ctx, ring);
1150bf215546Sopenharmony_ci
1151bf215546Sopenharmony_ci   if (emit->num_groups > 0) {
1152bf215546Sopenharmony_ci      OUT_PKT7(ring, CP_SET_DRAW_STATE, 3 * emit->num_groups);
1153bf215546Sopenharmony_ci      for (unsigned i = 0; i < emit->num_groups; i++) {
1154bf215546Sopenharmony_ci         struct fd6_state_group *g = &emit->groups[i];
1155bf215546Sopenharmony_ci         unsigned n = g->stateobj ? fd_ringbuffer_size(g->stateobj) / 4 : 0;
1156bf215546Sopenharmony_ci
1157bf215546Sopenharmony_ci         assert((g->enable_mask & ~ENABLE_ALL) == 0);
1158bf215546Sopenharmony_ci
1159bf215546Sopenharmony_ci         if (n == 0) {
1160bf215546Sopenharmony_ci            OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) |
1161bf215546Sopenharmony_ci                              CP_SET_DRAW_STATE__0_DISABLE | g->enable_mask |
1162bf215546Sopenharmony_ci                              CP_SET_DRAW_STATE__0_GROUP_ID(g->group_id));
1163bf215546Sopenharmony_ci            OUT_RING(ring, 0x00000000);
1164bf215546Sopenharmony_ci            OUT_RING(ring, 0x00000000);
1165bf215546Sopenharmony_ci         } else {
1166bf215546Sopenharmony_ci            OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(n) | g->enable_mask |
1167bf215546Sopenharmony_ci                              CP_SET_DRAW_STATE__0_GROUP_ID(g->group_id));
1168bf215546Sopenharmony_ci            OUT_RB(ring, g->stateobj);
1169bf215546Sopenharmony_ci         }
1170bf215546Sopenharmony_ci
1171bf215546Sopenharmony_ci         if (g->stateobj)
1172bf215546Sopenharmony_ci            fd_ringbuffer_del(g->stateobj);
1173bf215546Sopenharmony_ci      }
1174bf215546Sopenharmony_ci      emit->num_groups = 0;
1175bf215546Sopenharmony_ci   }
1176bf215546Sopenharmony_ci}
1177bf215546Sopenharmony_ci
1178bf215546Sopenharmony_civoid
1179bf215546Sopenharmony_cifd6_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
1180bf215546Sopenharmony_ci                  struct ir3_shader_variant *cp)
1181bf215546Sopenharmony_ci{
1182bf215546Sopenharmony_ci   enum fd_dirty_shader_state dirty = ctx->dirty_shader[PIPE_SHADER_COMPUTE];
1183bf215546Sopenharmony_ci
1184bf215546Sopenharmony_ci   if (dirty & (FD_DIRTY_SHADER_TEX | FD_DIRTY_SHADER_PROG |
1185bf215546Sopenharmony_ci                FD_DIRTY_SHADER_IMAGE | FD_DIRTY_SHADER_SSBO)) {
1186bf215546Sopenharmony_ci      struct fd_texture_stateobj *tex = &ctx->tex[PIPE_SHADER_COMPUTE];
1187bf215546Sopenharmony_ci      unsigned bcolor_offset =
1188bf215546Sopenharmony_ci         fd6_border_color_offset(ctx, PIPE_SHADER_COMPUTE, tex);
1189bf215546Sopenharmony_ci
1190bf215546Sopenharmony_ci      bool needs_border = fd6_emit_textures(ctx, ring, PIPE_SHADER_COMPUTE, tex,
1191bf215546Sopenharmony_ci                                            bcolor_offset, cp);
1192bf215546Sopenharmony_ci
1193bf215546Sopenharmony_ci      if (needs_border)
1194bf215546Sopenharmony_ci         emit_border_color(ctx, ring);
1195bf215546Sopenharmony_ci
1196bf215546Sopenharmony_ci      OUT_PKT4(ring, REG_A6XX_SP_VS_TEX_COUNT, 1);
1197bf215546Sopenharmony_ci      OUT_RING(ring, 0);
1198bf215546Sopenharmony_ci
1199bf215546Sopenharmony_ci      OUT_PKT4(ring, REG_A6XX_SP_HS_TEX_COUNT, 1);
1200bf215546Sopenharmony_ci      OUT_RING(ring, 0);
1201bf215546Sopenharmony_ci
1202bf215546Sopenharmony_ci      OUT_PKT4(ring, REG_A6XX_SP_DS_TEX_COUNT, 1);
1203bf215546Sopenharmony_ci      OUT_RING(ring, 0);
1204bf215546Sopenharmony_ci
1205bf215546Sopenharmony_ci      OUT_PKT4(ring, REG_A6XX_SP_GS_TEX_COUNT, 1);
1206bf215546Sopenharmony_ci      OUT_RING(ring, 0);
1207bf215546Sopenharmony_ci
1208bf215546Sopenharmony_ci      OUT_PKT4(ring, REG_A6XX_SP_FS_TEX_COUNT, 1);
1209bf215546Sopenharmony_ci      OUT_RING(ring, 0);
1210bf215546Sopenharmony_ci   }
1211bf215546Sopenharmony_ci
1212bf215546Sopenharmony_ci   if (dirty & (FD_DIRTY_SHADER_SSBO | FD_DIRTY_SHADER_IMAGE)) {
1213bf215546Sopenharmony_ci      struct fd_ringbuffer *state =
1214bf215546Sopenharmony_ci         fd6_build_ibo_state(ctx, cp, PIPE_SHADER_COMPUTE);
1215bf215546Sopenharmony_ci
1216bf215546Sopenharmony_ci      OUT_PKT7(ring, CP_LOAD_STATE6_FRAG, 3);
1217bf215546Sopenharmony_ci      OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) |
1218bf215546Sopenharmony_ci                        CP_LOAD_STATE6_0_STATE_TYPE(ST6_IBO) |
1219bf215546Sopenharmony_ci                        CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
1220bf215546Sopenharmony_ci                        CP_LOAD_STATE6_0_STATE_BLOCK(SB6_CS_SHADER) |
1221bf215546Sopenharmony_ci                        CP_LOAD_STATE6_0_NUM_UNIT(ir3_shader_nibo(cp)));
1222bf215546Sopenharmony_ci      OUT_RB(ring, state);
1223bf215546Sopenharmony_ci
1224bf215546Sopenharmony_ci      OUT_PKT4(ring, REG_A6XX_SP_CS_IBO, 2);
1225bf215546Sopenharmony_ci      OUT_RB(ring, state);
1226bf215546Sopenharmony_ci
1227bf215546Sopenharmony_ci      OUT_PKT4(ring, REG_A6XX_SP_CS_IBO_COUNT, 1);
1228bf215546Sopenharmony_ci      OUT_RING(ring, ir3_shader_nibo(cp));
1229bf215546Sopenharmony_ci
1230bf215546Sopenharmony_ci      fd_ringbuffer_del(state);
1231bf215546Sopenharmony_ci   }
1232bf215546Sopenharmony_ci}
1233bf215546Sopenharmony_ci
1234bf215546Sopenharmony_ci/* emit setup at begin of new cmdstream buffer (don't rely on previous
1235bf215546Sopenharmony_ci * state, there could have been a context switch between ioctls):
1236bf215546Sopenharmony_ci */
1237bf215546Sopenharmony_civoid
1238bf215546Sopenharmony_cifd6_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring)
1239bf215546Sopenharmony_ci{
1240bf215546Sopenharmony_ci   struct fd_screen *screen = batch->ctx->screen;
1241bf215546Sopenharmony_ci
1242bf215546Sopenharmony_ci   if (!batch->nondraw) {
1243bf215546Sopenharmony_ci      trace_start_state_restore(&batch->trace, ring);
1244bf215546Sopenharmony_ci   }
1245bf215546Sopenharmony_ci
1246bf215546Sopenharmony_ci   fd6_cache_inv(batch, ring);
1247bf215546Sopenharmony_ci
1248bf215546Sopenharmony_ci   OUT_REG(ring,
1249bf215546Sopenharmony_ci           A6XX_HLSQ_INVALIDATE_CMD(.vs_state = true, .hs_state = true,
1250bf215546Sopenharmony_ci                                    .ds_state = true, .gs_state = true,
1251bf215546Sopenharmony_ci                                    .fs_state = true, .cs_state = true,
1252bf215546Sopenharmony_ci                                    .gfx_ibo = true, .cs_ibo = true,
1253bf215546Sopenharmony_ci                                    .gfx_shared_const = true,
1254bf215546Sopenharmony_ci                                    .cs_shared_const = true,
1255bf215546Sopenharmony_ci                                    .gfx_bindless = 0x1f, .cs_bindless = 0x1f));
1256bf215546Sopenharmony_ci
1257bf215546Sopenharmony_ci   OUT_WFI5(ring);
1258bf215546Sopenharmony_ci
1259bf215546Sopenharmony_ci   WRITE(REG_A6XX_RB_UNKNOWN_8E04, 0x0);
1260bf215546Sopenharmony_ci   WRITE(REG_A6XX_SP_FLOAT_CNTL, A6XX_SP_FLOAT_CNTL_F16_NO_INF);
1261bf215546Sopenharmony_ci   WRITE(REG_A6XX_SP_UNKNOWN_AE00, 0);
1262bf215546Sopenharmony_ci   WRITE(REG_A6XX_SP_PERFCTR_ENABLE, 0x3f);
1263bf215546Sopenharmony_ci   WRITE(REG_A6XX_TPL1_UNKNOWN_B605, 0x44);
1264bf215546Sopenharmony_ci   WRITE(REG_A6XX_TPL1_DBG_ECO_CNTL, screen->info->a6xx.magic.TPL1_DBG_ECO_CNTL);
1265bf215546Sopenharmony_ci   WRITE(REG_A6XX_HLSQ_UNKNOWN_BE00, 0x80);
1266bf215546Sopenharmony_ci   WRITE(REG_A6XX_HLSQ_UNKNOWN_BE01, 0);
1267bf215546Sopenharmony_ci
1268bf215546Sopenharmony_ci   WRITE(REG_A6XX_VPC_UNKNOWN_9600, 0);
1269bf215546Sopenharmony_ci   WRITE(REG_A6XX_GRAS_DBG_ECO_CNTL, 0x880);
1270bf215546Sopenharmony_ci   WRITE(REG_A6XX_HLSQ_UNKNOWN_BE04, 0x80000);
1271bf215546Sopenharmony_ci   WRITE(REG_A6XX_SP_CHICKEN_BITS, 0x1430);
1272bf215546Sopenharmony_ci   WRITE(REG_A6XX_SP_IBO_COUNT, 0);
1273bf215546Sopenharmony_ci   WRITE(REG_A6XX_SP_UNKNOWN_B182, 0);
1274bf215546Sopenharmony_ci   WRITE(REG_A6XX_HLSQ_SHARED_CONSTS, 0);
1275bf215546Sopenharmony_ci   WRITE(REG_A6XX_UCHE_UNKNOWN_0E12, 0x3200000);
1276bf215546Sopenharmony_ci   WRITE(REG_A6XX_UCHE_CLIENT_PF, 4);
1277bf215546Sopenharmony_ci   WRITE(REG_A6XX_RB_UNKNOWN_8E01, 0x1);
1278bf215546Sopenharmony_ci   WRITE(REG_A6XX_SP_MODE_CONTROL,
1279bf215546Sopenharmony_ci         A6XX_SP_MODE_CONTROL_CONSTANT_DEMOTION_ENABLE | 4);
1280bf215546Sopenharmony_ci   WRITE(REG_A6XX_VFD_ADD_OFFSET, A6XX_VFD_ADD_OFFSET_VERTEX);
1281bf215546Sopenharmony_ci   WRITE(REG_A6XX_RB_UNKNOWN_8811, 0x00000010);
1282bf215546Sopenharmony_ci   WRITE(REG_A6XX_PC_MODE_CNTL, 0x1f);
1283bf215546Sopenharmony_ci
1284bf215546Sopenharmony_ci   WRITE(REG_A6XX_GRAS_LRZ_PS_INPUT_CNTL, 0);
1285bf215546Sopenharmony_ci   WRITE(REG_A6XX_GRAS_SAMPLE_CNTL, 0);
1286bf215546Sopenharmony_ci   WRITE(REG_A6XX_GRAS_UNKNOWN_8110, 0x2);
1287bf215546Sopenharmony_ci
1288bf215546Sopenharmony_ci   WRITE(REG_A6XX_RB_UNKNOWN_8818, 0);
1289bf215546Sopenharmony_ci   WRITE(REG_A6XX_RB_UNKNOWN_8819, 0);
1290bf215546Sopenharmony_ci   WRITE(REG_A6XX_RB_UNKNOWN_881A, 0);
1291bf215546Sopenharmony_ci   WRITE(REG_A6XX_RB_UNKNOWN_881B, 0);
1292bf215546Sopenharmony_ci   WRITE(REG_A6XX_RB_UNKNOWN_881C, 0);
1293bf215546Sopenharmony_ci   WRITE(REG_A6XX_RB_UNKNOWN_881D, 0);
1294bf215546Sopenharmony_ci   WRITE(REG_A6XX_RB_UNKNOWN_881E, 0);
1295bf215546Sopenharmony_ci   WRITE(REG_A6XX_RB_UNKNOWN_88F0, 0);
1296bf215546Sopenharmony_ci
1297bf215546Sopenharmony_ci   WRITE(REG_A6XX_VPC_POINT_COORD_INVERT, A6XX_VPC_POINT_COORD_INVERT(0).value);
1298bf215546Sopenharmony_ci   WRITE(REG_A6XX_VPC_UNKNOWN_9300, 0);
1299bf215546Sopenharmony_ci
1300bf215546Sopenharmony_ci   WRITE(REG_A6XX_VPC_SO_DISABLE, A6XX_VPC_SO_DISABLE(true).value);
1301bf215546Sopenharmony_ci
1302bf215546Sopenharmony_ci   WRITE(REG_A6XX_PC_RASTER_CNTL, 0);
1303bf215546Sopenharmony_ci
1304bf215546Sopenharmony_ci   WRITE(REG_A6XX_PC_MULTIVIEW_CNTL, 0);
1305bf215546Sopenharmony_ci
1306bf215546Sopenharmony_ci   WRITE(REG_A6XX_SP_UNKNOWN_B183, 0);
1307bf215546Sopenharmony_ci
1308bf215546Sopenharmony_ci   WRITE(REG_A6XX_GRAS_SU_CONSERVATIVE_RAS_CNTL, 0);
1309bf215546Sopenharmony_ci   WRITE(REG_A6XX_GRAS_VS_LAYER_CNTL, 0);
1310bf215546Sopenharmony_ci   WRITE(REG_A6XX_GRAS_SC_CNTL, A6XX_GRAS_SC_CNTL_CCUSINGLECACHELINESIZE(2));
1311bf215546Sopenharmony_ci   WRITE(REG_A6XX_GRAS_UNKNOWN_80AF, 0);
1312bf215546Sopenharmony_ci   WRITE(REG_A6XX_VPC_UNKNOWN_9210, 0);
1313bf215546Sopenharmony_ci   WRITE(REG_A6XX_VPC_UNKNOWN_9211, 0);
1314bf215546Sopenharmony_ci   WRITE(REG_A6XX_VPC_UNKNOWN_9602, 0);
1315bf215546Sopenharmony_ci   WRITE(REG_A6XX_PC_UNKNOWN_9E72, 0);
1316bf215546Sopenharmony_ci   WRITE(REG_A6XX_SP_TP_SAMPLE_CONFIG, 0);
1317bf215546Sopenharmony_ci   /* NOTE blob seems to (mostly?) use 0xb2 for SP_TP_MODE_CNTL
1318bf215546Sopenharmony_ci    * but this seems to kill texture gather offsets.
1319bf215546Sopenharmony_ci    */
1320bf215546Sopenharmony_ci   WRITE(REG_A6XX_SP_TP_MODE_CNTL, 0xa0 |
1321bf215546Sopenharmony_ci         A6XX_SP_TP_MODE_CNTL_ISAMMODE(ISAMMODE_GL));
1322bf215546Sopenharmony_ci   WRITE(REG_A6XX_RB_SAMPLE_CONFIG, 0);
1323bf215546Sopenharmony_ci   WRITE(REG_A6XX_GRAS_SAMPLE_CONFIG, 0);
1324bf215546Sopenharmony_ci   WRITE(REG_A6XX_RB_Z_BOUNDS_MIN, 0);
1325bf215546Sopenharmony_ci   WRITE(REG_A6XX_RB_Z_BOUNDS_MAX, 0);
1326bf215546Sopenharmony_ci   WRITE(REG_A6XX_HLSQ_CONTROL_5_REG, 0xfc);
1327bf215546Sopenharmony_ci
1328bf215546Sopenharmony_ci   emit_marker6(ring, 7);
1329bf215546Sopenharmony_ci
1330bf215546Sopenharmony_ci   OUT_PKT4(ring, REG_A6XX_VFD_MODE_CNTL, 1);
1331bf215546Sopenharmony_ci   OUT_RING(ring, 0x00000000); /* VFD_MODE_CNTL */
1332bf215546Sopenharmony_ci
1333bf215546Sopenharmony_ci   WRITE(REG_A6XX_VFD_MULTIVIEW_CNTL, 0);
1334bf215546Sopenharmony_ci
1335bf215546Sopenharmony_ci   OUT_PKT4(ring, REG_A6XX_PC_MODE_CNTL, 1);
1336bf215546Sopenharmony_ci   OUT_RING(ring, 0x0000001f); /* PC_MODE_CNTL */
1337bf215546Sopenharmony_ci
1338bf215546Sopenharmony_ci   /* Clear any potential pending state groups to be safe: */
1339bf215546Sopenharmony_ci   OUT_PKT7(ring, CP_SET_DRAW_STATE, 3);
1340bf215546Sopenharmony_ci   OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) |
1341bf215546Sopenharmony_ci                     CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS |
1342bf215546Sopenharmony_ci                     CP_SET_DRAW_STATE__0_GROUP_ID(0));
1343bf215546Sopenharmony_ci   OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0));
1344bf215546Sopenharmony_ci   OUT_RING(ring, CP_SET_DRAW_STATE__2_ADDR_HI(0));
1345bf215546Sopenharmony_ci
1346bf215546Sopenharmony_ci   OUT_PKT4(ring, REG_A6XX_VPC_SO_STREAM_CNTL, 1);
1347bf215546Sopenharmony_ci   OUT_RING(ring, 0x00000000); /* VPC_SO_STREAM_CNTL */
1348bf215546Sopenharmony_ci
1349bf215546Sopenharmony_ci   OUT_PKT4(ring, REG_A6XX_GRAS_LRZ_CNTL, 1);
1350bf215546Sopenharmony_ci   OUT_RING(ring, 0x00000000);
1351bf215546Sopenharmony_ci
1352bf215546Sopenharmony_ci   OUT_PKT4(ring, REG_A6XX_RB_LRZ_CNTL, 1);
1353bf215546Sopenharmony_ci   OUT_RING(ring, 0x00000000);
1354bf215546Sopenharmony_ci
1355bf215546Sopenharmony_ci   /* Initialize VFD_FETCH[n].SIZE to zero to avoid iova faults trying
1356bf215546Sopenharmony_ci    * to fetch from a VFD_FETCH[n].BASE which we've potentially inherited
1357bf215546Sopenharmony_ci    * from another process:
1358bf215546Sopenharmony_ci    */
1359bf215546Sopenharmony_ci   for (int32_t i = 0; i < 32; i++) {
1360bf215546Sopenharmony_ci      OUT_PKT4(ring, REG_A6XX_VFD_FETCH_SIZE(i), 1);
1361bf215546Sopenharmony_ci      OUT_RING(ring, 0);
1362bf215546Sopenharmony_ci   }
1363bf215546Sopenharmony_ci
1364bf215546Sopenharmony_ci   /* This happens after all drawing has been emitted to the draw CS, so we know
1365bf215546Sopenharmony_ci    * whether we need the tess BO pointers.
1366bf215546Sopenharmony_ci    */
1367bf215546Sopenharmony_ci   if (batch->tessellation) {
1368bf215546Sopenharmony_ci      assert(screen->tess_bo);
1369bf215546Sopenharmony_ci      OUT_PKT4(ring, REG_A6XX_PC_TESSFACTOR_ADDR, 2);
1370bf215546Sopenharmony_ci      OUT_RELOC(ring, screen->tess_bo, 0, 0, 0);
1371bf215546Sopenharmony_ci      /* Updating PC_TESSFACTOR_ADDR could race with the next draw which uses it. */
1372bf215546Sopenharmony_ci      OUT_WFI5(ring);
1373bf215546Sopenharmony_ci   }
1374bf215546Sopenharmony_ci
1375bf215546Sopenharmony_ci   if (!batch->nondraw) {
1376bf215546Sopenharmony_ci      trace_end_state_restore(&batch->trace, ring);
1377bf215546Sopenharmony_ci   }
1378bf215546Sopenharmony_ci}
1379bf215546Sopenharmony_ci
1380bf215546Sopenharmony_cistatic void
1381bf215546Sopenharmony_cifd6_mem_to_mem(struct fd_ringbuffer *ring, struct pipe_resource *dst,
1382bf215546Sopenharmony_ci               unsigned dst_off, struct pipe_resource *src, unsigned src_off,
1383bf215546Sopenharmony_ci               unsigned sizedwords)
1384bf215546Sopenharmony_ci{
1385bf215546Sopenharmony_ci   struct fd_bo *src_bo = fd_resource(src)->bo;
1386bf215546Sopenharmony_ci   struct fd_bo *dst_bo = fd_resource(dst)->bo;
1387bf215546Sopenharmony_ci   unsigned i;
1388bf215546Sopenharmony_ci
1389bf215546Sopenharmony_ci   for (i = 0; i < sizedwords; i++) {
1390bf215546Sopenharmony_ci      OUT_PKT7(ring, CP_MEM_TO_MEM, 5);
1391bf215546Sopenharmony_ci      OUT_RING(ring, 0x00000000);
1392bf215546Sopenharmony_ci      OUT_RELOC(ring, dst_bo, dst_off, 0, 0);
1393bf215546Sopenharmony_ci      OUT_RELOC(ring, src_bo, src_off, 0, 0);
1394bf215546Sopenharmony_ci
1395bf215546Sopenharmony_ci      dst_off += 4;
1396bf215546Sopenharmony_ci      src_off += 4;
1397bf215546Sopenharmony_ci   }
1398bf215546Sopenharmony_ci}
1399bf215546Sopenharmony_ci
1400bf215546Sopenharmony_ci/* this is *almost* the same as fd6_cache_flush().. which I guess
1401bf215546Sopenharmony_ci * could be re-worked to be something a bit more generic w/ param
1402bf215546Sopenharmony_ci * indicating what needs to be flushed..  although that would mean
1403bf215546Sopenharmony_ci * figuring out which events trigger what state to flush..
1404bf215546Sopenharmony_ci */
1405bf215546Sopenharmony_cistatic void
1406bf215546Sopenharmony_cifd6_framebuffer_barrier(struct fd_context *ctx) assert_dt
1407bf215546Sopenharmony_ci{
1408bf215546Sopenharmony_ci   struct fd6_context *fd6_ctx = fd6_context(ctx);
1409bf215546Sopenharmony_ci   struct fd_batch *batch = fd_context_batch_locked(ctx);
1410bf215546Sopenharmony_ci   struct fd_ringbuffer *ring = batch->draw;
1411bf215546Sopenharmony_ci   unsigned seqno;
1412bf215546Sopenharmony_ci
1413bf215546Sopenharmony_ci   fd_batch_needs_flush(batch);
1414bf215546Sopenharmony_ci
1415bf215546Sopenharmony_ci   seqno = fd6_event_write(batch, ring, RB_DONE_TS, true);
1416bf215546Sopenharmony_ci
1417bf215546Sopenharmony_ci   OUT_PKT7(ring, CP_WAIT_REG_MEM, 6);
1418bf215546Sopenharmony_ci   OUT_RING(ring, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ) |
1419bf215546Sopenharmony_ci                     CP_WAIT_REG_MEM_0_POLL_MEMORY);
1420bf215546Sopenharmony_ci   OUT_RELOC(ring, control_ptr(fd6_ctx, seqno));
1421bf215546Sopenharmony_ci   OUT_RING(ring, CP_WAIT_REG_MEM_3_REF(seqno));
1422bf215546Sopenharmony_ci   OUT_RING(ring, CP_WAIT_REG_MEM_4_MASK(~0));
1423bf215546Sopenharmony_ci   OUT_RING(ring, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(16));
1424bf215546Sopenharmony_ci
1425bf215546Sopenharmony_ci   fd6_event_write(batch, ring, PC_CCU_FLUSH_COLOR_TS, true);
1426bf215546Sopenharmony_ci   fd6_event_write(batch, ring, PC_CCU_FLUSH_DEPTH_TS, true);
1427bf215546Sopenharmony_ci
1428bf215546Sopenharmony_ci   seqno = fd6_event_write(batch, ring, CACHE_FLUSH_TS, true);
1429bf215546Sopenharmony_ci   fd_wfi(batch, ring);
1430bf215546Sopenharmony_ci
1431bf215546Sopenharmony_ci   fd6_event_write(batch, ring, CACHE_INVALIDATE, false);
1432bf215546Sopenharmony_ci
1433bf215546Sopenharmony_ci   OUT_PKT7(ring, CP_WAIT_MEM_GTE, 4);
1434bf215546Sopenharmony_ci   OUT_RING(ring, CP_WAIT_MEM_GTE_0_RESERVED(0));
1435bf215546Sopenharmony_ci   OUT_RELOC(ring, control_ptr(fd6_ctx, seqno));
1436bf215546Sopenharmony_ci   OUT_RING(ring, CP_WAIT_MEM_GTE_3_REF(seqno));
1437bf215546Sopenharmony_ci
1438bf215546Sopenharmony_ci   fd_batch_unlock_submit(batch);
1439bf215546Sopenharmony_ci   fd_batch_reference(&batch, NULL);
1440bf215546Sopenharmony_ci}
1441bf215546Sopenharmony_ci
1442bf215546Sopenharmony_civoid
1443bf215546Sopenharmony_cifd6_emit_init_screen(struct pipe_screen *pscreen)
1444bf215546Sopenharmony_ci{
1445bf215546Sopenharmony_ci   struct fd_screen *screen = fd_screen(pscreen);
1446bf215546Sopenharmony_ci   screen->emit_ib = fd6_emit_ib;
1447bf215546Sopenharmony_ci   screen->mem_to_mem = fd6_mem_to_mem;
1448bf215546Sopenharmony_ci}
1449bf215546Sopenharmony_ci
1450bf215546Sopenharmony_civoid
1451bf215546Sopenharmony_cifd6_emit_init(struct pipe_context *pctx) disable_thread_safety_analysis
1452bf215546Sopenharmony_ci{
1453bf215546Sopenharmony_ci   struct fd_context *ctx = fd_context(pctx);
1454bf215546Sopenharmony_ci   ctx->framebuffer_barrier = fd6_framebuffer_barrier;
1455bf215546Sopenharmony_ci}
1456