1/*
2 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 *    Rob Clark <robclark@freedesktop.org>
25 */
26
27#include "pipe/p_state.h"
28#include "util/format/u_format.h"
29#include "util/u_inlines.h"
30#include "util/u_memory.h"
31#include "util/u_string.h"
32
33#include "freedreno_draw.h"
34#include "freedreno_resource.h"
35#include "freedreno_state.h"
36
37#include "fd4_context.h"
38#include "fd4_draw.h"
39#include "fd4_emit.h"
40#include "fd4_format.h"
41#include "fd4_gmem.h"
42#include "fd4_program.h"
43#include "fd4_zsa.h"
44
45static void
46fd4_gmem_emit_set_prog(struct fd_context *ctx, struct fd4_emit *emit,
47                       struct fd_program_stateobj *prog)
48{
49   emit->skip_consts = true;
50   emit->key.vs = prog->vs;
51   emit->key.fs = prog->fs;
52   emit->prog = fd4_program_state(
53      ir3_cache_lookup(ctx->shader_cache, &emit->key, &ctx->debug));
54   /* reset the fd4_emit_get_*p cache */
55   emit->vs = NULL;
56   emit->fs = NULL;
57}
58
59static void
60emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
61         struct pipe_surface **bufs, const uint32_t *bases, uint32_t bin_w,
62         bool decode_srgb)
63{
64   enum a4xx_tile_mode tile_mode;
65   unsigned i;
66
67   if (bin_w) {
68      tile_mode = 2;
69   } else {
70      tile_mode = TILE4_LINEAR;
71   }
72
73   for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
74      enum a4xx_color_fmt format = 0;
75      enum a3xx_color_swap swap = WZYX;
76      bool srgb = false;
77      struct fd_resource *rsc = NULL;
78      uint32_t stride = 0;
79      uint32_t base = 0;
80      uint32_t offset = 0;
81
82      if ((i < nr_bufs) && bufs[i]) {
83         struct pipe_surface *psurf = bufs[i];
84         enum pipe_format pformat = psurf->format;
85
86         rsc = fd_resource(psurf->texture);
87
88         /* In case we're drawing to Z32F_S8, the "color" actually goes to
89          * the stencil
90          */
91         if (rsc->stencil) {
92            rsc = rsc->stencil;
93            pformat = rsc->b.b.format;
94            if (bases)
95               bases++;
96         }
97
98         format = fd4_pipe2color(pformat);
99         swap = fd4_pipe2swap(pformat);
100
101         if (decode_srgb)
102            srgb = util_format_is_srgb(pformat);
103         else
104            pformat = util_format_linear(pformat);
105
106         assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
107
108         offset = fd_resource_offset(rsc, psurf->u.tex.level,
109                                     psurf->u.tex.first_layer);
110
111         if (bin_w) {
112            stride = bin_w << fdl_cpp_shift(&rsc->layout);
113
114            if (bases) {
115               base = bases[i];
116            }
117         } else {
118            stride = fd_resource_pitch(rsc, psurf->u.tex.level);
119         }
120      } else if ((i < nr_bufs) && bases) {
121         base = bases[i];
122      }
123
124      OUT_PKT0(ring, REG_A4XX_RB_MRT_BUF_INFO(i), 3);
125      OUT_RING(ring, A4XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) |
126                        A4XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) |
127                        A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(stride) |
128                        A4XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap) |
129                        COND(srgb, A4XX_RB_MRT_BUF_INFO_COLOR_SRGB));
130      if (bin_w || (i >= nr_bufs) || !bufs[i]) {
131         OUT_RING(ring, base);
132         OUT_RING(ring, A4XX_RB_MRT_CONTROL3_STRIDE(stride));
133      } else {
134         OUT_RELOC(ring, rsc->bo, offset, 0, 0);
135         /* RB_MRT[i].CONTROL3.STRIDE not emitted by c2d..
136          * not sure if we need to skip it for bypass or
137          * not.
138          */
139         OUT_RING(ring, A4XX_RB_MRT_CONTROL3_STRIDE(0));
140      }
141   }
142}
143
144static bool
145use_hw_binning(struct fd_batch *batch)
146{
147   const struct fd_gmem_stateobj *gmem = batch->gmem_state;
148
149   /* workaround: Like on a3xx, hw binning and scissor optimization
150    * don't play nice together.
151    *
152    * Disable binning if scissor optimization is used.
153    */
154   if (gmem->minx || gmem->miny)
155      return false;
156
157   if ((gmem->maxpw * gmem->maxph) > 32)
158      return false;
159
160   if ((gmem->maxpw > 15) || (gmem->maxph > 15))
161      return false;
162
163   return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) > 2);
164}
165
166/* transfer from gmem to system memory (ie. normal RAM) */
167
168static void
169emit_gmem2mem_surf(struct fd_batch *batch, bool stencil, uint32_t base,
170                   struct pipe_surface *psurf)
171{
172   struct fd_ringbuffer *ring = batch->gmem;
173   struct fd_resource *rsc = fd_resource(psurf->texture);
174   enum pipe_format pformat = psurf->format;
175   uint32_t offset, pitch;
176
177   if (!rsc->valid)
178      return;
179
180   if (stencil) {
181      assert(rsc->stencil);
182      rsc = rsc->stencil;
183      pformat = rsc->b.b.format;
184   }
185
186   offset =
187      fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
188   pitch = fd_resource_pitch(rsc, psurf->u.tex.level);
189
190   assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
191
192   OUT_PKT0(ring, REG_A4XX_RB_COPY_CONTROL, 4);
193   OUT_RING(ring, A4XX_RB_COPY_CONTROL_MSAA_RESOLVE(MSAA_ONE) |
194                     A4XX_RB_COPY_CONTROL_MODE(RB_COPY_RESOLVE) |
195                     A4XX_RB_COPY_CONTROL_GMEM_BASE(base));
196   OUT_RELOC(ring, rsc->bo, offset, 0, 0); /* RB_COPY_DEST_BASE */
197   OUT_RING(ring, A4XX_RB_COPY_DEST_PITCH_PITCH(pitch));
198   OUT_RING(ring, A4XX_RB_COPY_DEST_INFO_TILE(TILE4_LINEAR) |
199                     A4XX_RB_COPY_DEST_INFO_FORMAT(fd4_pipe2color(pformat)) |
200                     A4XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(0xf) |
201                     A4XX_RB_COPY_DEST_INFO_ENDIAN(ENDIAN_NONE) |
202                     A4XX_RB_COPY_DEST_INFO_SWAP(fd4_pipe2swap(pformat)));
203
204   fd4_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
205            DI_SRC_SEL_AUTO_INDEX, 2, 1, INDEX4_SIZE_8_BIT, 0, 0, NULL);
206}
207
208static void
209fd4_emit_tile_gmem2mem(struct fd_batch *batch,
210                       const struct fd_tile *tile) assert_dt
211{
212   struct fd_context *ctx = batch->ctx;
213   const struct fd_gmem_stateobj *gmem = batch->gmem_state;
214   struct fd_ringbuffer *ring = batch->gmem;
215   struct pipe_framebuffer_state *pfb = &batch->framebuffer;
216   struct fd4_emit emit = {
217      .debug = &ctx->debug,
218      .vtx = &ctx->solid_vbuf_state,
219   };
220   fd4_gmem_emit_set_prog(ctx, &emit, &ctx->solid_prog);
221
222   OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
223   OUT_RING(ring, A4XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER));
224
225   OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 2);
226   OUT_RING(ring, A4XX_RB_STENCIL_CONTROL_FUNC(FUNC_NEVER) |
227                     A4XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
228                     A4XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |
229                     A4XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
230                     A4XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) |
231                     A4XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
232                     A4XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
233                     A4XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
234   OUT_RING(ring, 0x00000000); /* RB_STENCIL_CONTROL2 */
235
236   OUT_PKT0(ring, REG_A4XX_RB_STENCILREFMASK, 2);
237   OUT_RING(ring, 0xff000000 | A4XX_RB_STENCILREFMASK_STENCILREF(0) |
238                     A4XX_RB_STENCILREFMASK_STENCILMASK(0) |
239                     A4XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
240   OUT_RING(ring, 0xff000000 | A4XX_RB_STENCILREFMASK_BF_STENCILREF(0) |
241                     A4XX_RB_STENCILREFMASK_BF_STENCILMASK(0) |
242                     A4XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff));
243
244   OUT_PKT0(ring, REG_A4XX_GRAS_SU_MODE_CONTROL, 1);
245   OUT_RING(ring, A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0));
246
247   fd_wfi(batch, ring);
248
249   OUT_PKT0(ring, REG_A4XX_GRAS_CL_CLIP_CNTL, 1);
250   OUT_RING(ring, 0x80000); /* GRAS_CL_CLIP_CNTL */
251
252   OUT_PKT0(ring, REG_A4XX_GRAS_CL_VPORT_XOFFSET_0, 6);
253   OUT_RING(ring, A4XX_GRAS_CL_VPORT_XOFFSET_0((float)pfb->width / 2.0f));
254   OUT_RING(ring, A4XX_GRAS_CL_VPORT_XSCALE_0((float)pfb->width / 2.0f));
255   OUT_RING(ring, A4XX_GRAS_CL_VPORT_YOFFSET_0((float)pfb->height / 2.0f));
256   OUT_RING(ring, A4XX_GRAS_CL_VPORT_YSCALE_0(-(float)pfb->height / 2.0f));
257   OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZOFFSET_0(0.0f));
258   OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(1.0f));
259
260   OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL, 1);
261   OUT_RING(ring, A4XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE | 0xa); /* XXX */
262
263   OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
264   OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
265                     A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
266                     A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
267                     A4XX_GRAS_SC_CONTROL_RASTER_MODE(1));
268
269   OUT_PKT0(ring, REG_A4XX_PC_PRIM_VTX_CNTL, 1);
270   OUT_RING(ring, A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);
271
272   OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
273   OUT_RING(ring, 0x00000002);
274
275   OUT_PKT0(ring, REG_A4XX_GRAS_SC_WINDOW_SCISSOR_BR, 2);
276   OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X(pfb->width - 1) |
277                     A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(pfb->height - 1));
278   OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |
279                     A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0));
280
281   OUT_PKT0(ring, REG_A4XX_VFD_INDEX_OFFSET, 2);
282   OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */
283   OUT_RING(ring, 0); /* ??? UNKNOWN_2209 */
284
285   fd4_program_emit(ring, &emit, 0, NULL);
286   fd4_emit_vertex_bufs(ring, &emit);
287
288   if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
289      struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
290      if (!rsc->stencil || (batch->resolve & FD_BUFFER_DEPTH))
291         emit_gmem2mem_surf(batch, false, gmem->zsbuf_base[0], pfb->zsbuf);
292      if (rsc->stencil && (batch->resolve & FD_BUFFER_STENCIL))
293         emit_gmem2mem_surf(batch, true, gmem->zsbuf_base[1], pfb->zsbuf);
294   }
295
296   if (batch->resolve & FD_BUFFER_COLOR) {
297      unsigned i;
298      for (i = 0; i < pfb->nr_cbufs; i++) {
299         if (!pfb->cbufs[i])
300            continue;
301         if (!(batch->resolve & (PIPE_CLEAR_COLOR0 << i)))
302            continue;
303         emit_gmem2mem_surf(batch, false, gmem->cbuf_base[i], pfb->cbufs[i]);
304      }
305   }
306
307   OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
308   OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
309                     A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
310                     A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
311                     A4XX_GRAS_SC_CONTROL_RASTER_MODE(0));
312}
313
314/* transfer from system memory to gmem */
315
316static void
317emit_mem2gmem_surf(struct fd_batch *batch, const uint32_t *bases,
318                   struct pipe_surface **bufs, uint32_t nr_bufs, uint32_t bin_w)
319{
320   struct fd_ringbuffer *ring = batch->gmem;
321   struct pipe_surface *zsbufs[2];
322
323   emit_mrt(ring, nr_bufs, bufs, bases, bin_w, false);
324
325   if (bufs[0] && (bufs[0]->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)) {
326      /* The gmem_restore_tex logic will put the first buffer's stencil
327       * as color. Supply it with the proper information to make that
328       * happen.
329       */
330      zsbufs[0] = zsbufs[1] = bufs[0];
331      bufs = zsbufs;
332      nr_bufs = 2;
333   }
334
335   fd4_emit_gmem_restore_tex(ring, nr_bufs, bufs);
336
337   fd4_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
338            DI_SRC_SEL_AUTO_INDEX, 2, 1, INDEX4_SIZE_8_BIT, 0, 0, NULL);
339}
340
341static void
342fd4_emit_tile_mem2gmem(struct fd_batch *batch,
343                       const struct fd_tile *tile) assert_dt
344{
345   struct fd_context *ctx = batch->ctx;
346   const struct fd_gmem_stateobj *gmem = batch->gmem_state;
347   struct fd_ringbuffer *ring = batch->gmem;
348   struct pipe_framebuffer_state *pfb = &batch->framebuffer;
349   struct fd4_emit emit = {
350      .debug = &ctx->debug,
351      .vtx = &ctx->blit_vbuf_state,
352      .sprite_coord_enable = 1,
353      .no_decode_srgb = true,
354   };
355   /* NOTE: They all use the same VP, this is for vtx bufs. */
356   fd4_gmem_emit_set_prog(ctx, &emit, &ctx->blit_prog[0]);
357
358   unsigned char mrt_comp[A4XX_MAX_RENDER_TARGETS] = {0};
359   float x0, y0, x1, y1;
360   unsigned bin_w = tile->bin_w;
361   unsigned bin_h = tile->bin_h;
362   unsigned i;
363
364   /* write texture coordinates to vertexbuf: */
365   x0 = ((float)tile->xoff) / ((float)pfb->width);
366   x1 = ((float)tile->xoff + bin_w) / ((float)pfb->width);
367   y0 = ((float)tile->yoff) / ((float)pfb->height);
368   y1 = ((float)tile->yoff + bin_h) / ((float)pfb->height);
369
370   OUT_PKT3(ring, CP_MEM_WRITE, 5);
371   OUT_RELOC(ring, fd_resource(ctx->blit_texcoord_vbuf)->bo, 0, 0, 0);
372   OUT_RING(ring, fui(x0));
373   OUT_RING(ring, fui(y0));
374   OUT_RING(ring, fui(x1));
375   OUT_RING(ring, fui(y1));
376
377   for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
378      mrt_comp[i] = ((i < pfb->nr_cbufs) && pfb->cbufs[i]) ? 0xf : 0;
379
380      OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1);
381      OUT_RING(ring, A4XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY) |
382                        A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0xf));
383
384      OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1);
385      OUT_RING(
386         ring,
387         A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(FACTOR_ONE) |
388            A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
389            A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(FACTOR_ZERO) |
390            A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(FACTOR_ONE) |
391            A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
392            A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(FACTOR_ZERO));
393   }
394
395   OUT_PKT0(ring, REG_A4XX_RB_RENDER_COMPONENTS, 1);
396   OUT_RING(ring, A4XX_RB_RENDER_COMPONENTS_RT0(mrt_comp[0]) |
397                     A4XX_RB_RENDER_COMPONENTS_RT1(mrt_comp[1]) |
398                     A4XX_RB_RENDER_COMPONENTS_RT2(mrt_comp[2]) |
399                     A4XX_RB_RENDER_COMPONENTS_RT3(mrt_comp[3]) |
400                     A4XX_RB_RENDER_COMPONENTS_RT4(mrt_comp[4]) |
401                     A4XX_RB_RENDER_COMPONENTS_RT5(mrt_comp[5]) |
402                     A4XX_RB_RENDER_COMPONENTS_RT6(mrt_comp[6]) |
403                     A4XX_RB_RENDER_COMPONENTS_RT7(mrt_comp[7]));
404
405   OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL, 1);
406   OUT_RING(ring, 0x8); /* XXX RB_RENDER_CONTROL */
407
408   OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
409   OUT_RING(ring, A4XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_LESS));
410
411   OUT_PKT0(ring, REG_A4XX_GRAS_CL_CLIP_CNTL, 1);
412   OUT_RING(ring, 0x280000); /* XXX GRAS_CL_CLIP_CNTL */
413
414   OUT_PKT0(ring, REG_A4XX_GRAS_SU_MODE_CONTROL, 1);
415   OUT_RING(ring, A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0) |
416                     A4XX_GRAS_SU_MODE_CONTROL_RENDERING_PASS);
417
418   OUT_PKT0(ring, REG_A4XX_GRAS_CL_VPORT_XOFFSET_0, 6);
419   OUT_RING(ring, A4XX_GRAS_CL_VPORT_XOFFSET_0((float)bin_w / 2.0f));
420   OUT_RING(ring, A4XX_GRAS_CL_VPORT_XSCALE_0((float)bin_w / 2.0f));
421   OUT_RING(ring, A4XX_GRAS_CL_VPORT_YOFFSET_0((float)bin_h / 2.0f));
422   OUT_RING(ring, A4XX_GRAS_CL_VPORT_YSCALE_0(-(float)bin_h / 2.0f));
423   OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZOFFSET_0(0.0f));
424   OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(1.0f));
425
426   OUT_PKT0(ring, REG_A4XX_GRAS_SC_WINDOW_SCISSOR_BR, 2);
427   OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X(bin_w - 1) |
428                     A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(bin_h - 1));
429   OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |
430                     A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0));
431
432   OUT_PKT0(ring, REG_A4XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
433   OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) |
434                     A4XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0));
435   OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_BR_X(bin_w - 1) |
436                     A4XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(bin_h - 1));
437
438   OUT_PKT0(ring, REG_A4XX_RB_MODE_CONTROL, 1);
439   OUT_RING(ring, A4XX_RB_MODE_CONTROL_WIDTH(gmem->bin_w) |
440                     A4XX_RB_MODE_CONTROL_HEIGHT(gmem->bin_h));
441
442   OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 2);
443   OUT_RING(ring, A4XX_RB_STENCIL_CONTROL_FUNC(FUNC_ALWAYS) |
444                     A4XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
445                     A4XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |
446                     A4XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
447                     A4XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_ALWAYS) |
448                     A4XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
449                     A4XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
450                     A4XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
451   OUT_RING(ring, 0x00000000); /* RB_STENCIL_CONTROL2 */
452
453   OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
454   OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
455                     A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
456                     A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
457                     A4XX_GRAS_SC_CONTROL_RASTER_MODE(1));
458
459   OUT_PKT0(ring, REG_A4XX_PC_PRIM_VTX_CNTL, 1);
460   OUT_RING(ring, A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST |
461                     A4XX_PC_PRIM_VTX_CNTL_VAROUT(1));
462
463   OUT_PKT0(ring, REG_A4XX_VFD_INDEX_OFFSET, 2);
464   OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */
465   OUT_RING(ring, 0); /* ??? UNKNOWN_2209 */
466
467   fd4_emit_vertex_bufs(ring, &emit);
468
469   /* for gmem pitch/base calculations, we need to use the non-
470    * truncated tile sizes:
471    */
472   bin_w = gmem->bin_w;
473   bin_h = gmem->bin_h;
474
475   if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_COLOR)) {
476      fd4_gmem_emit_set_prog(ctx, &emit, &ctx->blit_prog[pfb->nr_cbufs - 1]);
477      fd4_program_emit(ring, &emit, pfb->nr_cbufs, pfb->cbufs);
478      emit_mem2gmem_surf(batch, gmem->cbuf_base, pfb->cbufs, pfb->nr_cbufs,
479                         bin_w);
480   }
481
482   if (fd_gmem_needs_restore(batch, tile,
483                             FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
484      switch (pfb->zsbuf->format) {
485      case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
486      case PIPE_FORMAT_Z32_FLOAT:
487         if (pfb->zsbuf->format == PIPE_FORMAT_Z32_FLOAT)
488            fd4_gmem_emit_set_prog(ctx, &emit, &ctx->blit_z);
489         else
490            fd4_gmem_emit_set_prog(ctx, &emit, &ctx->blit_zs);
491
492         OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
493         OUT_RING(ring, A4XX_RB_DEPTH_CONTROL_Z_TEST_ENABLE |
494                           A4XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE |
495                           A4XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_ALWAYS) |
496                           A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE);
497
498         OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
499         OUT_RING(ring, A4XX_GRAS_ALPHA_CONTROL_ALPHA_TEST_ENABLE);
500
501         OUT_PKT0(ring, REG_A4XX_GRAS_CL_CLIP_CNTL, 1);
502         OUT_RING(ring, 0x80000); /* GRAS_CL_CLIP_CNTL */
503
504         break;
505      default:
506         /* Non-float can use a regular color write. It's split over 8-bit
507          * components, so half precision is always sufficient.
508          */
509         fd4_gmem_emit_set_prog(ctx, &emit, &ctx->blit_prog[0]);
510         break;
511      }
512      fd4_program_emit(ring, &emit, 1, &pfb->zsbuf);
513      emit_mem2gmem_surf(batch, gmem->zsbuf_base, &pfb->zsbuf, 1, bin_w);
514   }
515
516   OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
517   OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
518                     A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
519                     A4XX_GRAS_SC_CONTROL_RASTER_MODE(0));
520
521   OUT_PKT0(ring, REG_A4XX_RB_MODE_CONTROL, 1);
522   OUT_RING(ring, A4XX_RB_MODE_CONTROL_WIDTH(gmem->bin_w) |
523                     A4XX_RB_MODE_CONTROL_HEIGHT(gmem->bin_h) |
524                     0x00010000); /* XXX */
525}
526
527static void
528patch_draws(struct fd_batch *batch, enum pc_di_vis_cull_mode vismode)
529{
530   unsigned i;
531   for (i = 0; i < fd_patch_num_elements(&batch->draw_patches); i++) {
532      struct fd_cs_patch *patch = fd_patch_element(&batch->draw_patches, i);
533      *patch->cs = patch->val | DRAW4(0, 0, 0, vismode);
534   }
535   util_dynarray_clear(&batch->draw_patches);
536}
537
538/* for rendering directly to system memory: */
539static void
540fd4_emit_sysmem_prep(struct fd_batch *batch) assert_dt
541{
542   struct pipe_framebuffer_state *pfb = &batch->framebuffer;
543   struct fd_ringbuffer *ring = batch->gmem;
544
545   fd4_emit_restore(batch, ring);
546
547   OUT_PKT0(ring, REG_A4XX_RB_FRAME_BUFFER_DIMENSION, 1);
548   OUT_RING(ring, A4XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) |
549                     A4XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height));
550
551   emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, 0, true);
552
553   /* setup scissor/offset for current tile: */
554   OUT_PKT0(ring, REG_A4XX_RB_BIN_OFFSET, 1);
555   OUT_RING(ring, A4XX_RB_BIN_OFFSET_X(0) | A4XX_RB_BIN_OFFSET_Y(0));
556
557   OUT_PKT0(ring, REG_A4XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
558   OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) |
559                     A4XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0));
560   OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_BR_X(pfb->width - 1) |
561                     A4XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(pfb->height - 1));
562
563   OUT_PKT0(ring, REG_A4XX_RB_MODE_CONTROL, 1);
564   OUT_RING(ring, A4XX_RB_MODE_CONTROL_WIDTH(0) |
565                     A4XX_RB_MODE_CONTROL_HEIGHT(0) | 0x00c00000); /* XXX */
566
567   OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL, 1);
568   OUT_RING(ring, 0x8);
569
570   patch_draws(batch, IGNORE_VISIBILITY);
571}
572
573static void
574update_vsc_pipe(struct fd_batch *batch) assert_dt
575{
576   struct fd_context *ctx = batch->ctx;
577   const struct fd_gmem_stateobj *gmem = batch->gmem_state;
578   struct fd4_context *fd4_ctx = fd4_context(ctx);
579   struct fd_ringbuffer *ring = batch->gmem;
580   int i;
581
582   OUT_PKT0(ring, REG_A4XX_VSC_SIZE_ADDRESS, 1);
583   OUT_RELOC(ring, fd4_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS */
584
585   OUT_PKT0(ring, REG_A4XX_VSC_PIPE_CONFIG_REG(0), 8);
586   for (i = 0; i < 8; i++) {
587      const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[i];
588      OUT_RING(ring, A4XX_VSC_PIPE_CONFIG_REG_X(pipe->x) |
589                        A4XX_VSC_PIPE_CONFIG_REG_Y(pipe->y) |
590                        A4XX_VSC_PIPE_CONFIG_REG_W(pipe->w) |
591                        A4XX_VSC_PIPE_CONFIG_REG_H(pipe->h));
592   }
593
594   OUT_PKT0(ring, REG_A4XX_VSC_PIPE_DATA_ADDRESS_REG(0), 8);
595   for (i = 0; i < 8; i++) {
596      if (!ctx->vsc_pipe_bo[i]) {
597         ctx->vsc_pipe_bo[i] = fd_bo_new(
598            ctx->dev, 0x40000, 0, "vsc_pipe[%u]", i);
599      }
600      OUT_RELOC(ring, ctx->vsc_pipe_bo[i], 0, 0,
601                0); /* VSC_PIPE_DATA_ADDRESS[i] */
602   }
603
604   OUT_PKT0(ring, REG_A4XX_VSC_PIPE_DATA_LENGTH_REG(0), 8);
605   for (i = 0; i < 8; i++) {
606      OUT_RING(ring, fd_bo_size(ctx->vsc_pipe_bo[i]) -
607                        32); /* VSC_PIPE_DATA_LENGTH[i] */
608   }
609}
610
611static void
612emit_binning_pass(struct fd_batch *batch) assert_dt
613{
614   const struct fd_gmem_stateobj *gmem = batch->gmem_state;
615   struct pipe_framebuffer_state *pfb = &batch->framebuffer;
616   struct fd_ringbuffer *ring = batch->gmem;
617   int i;
618
619   uint32_t x1 = gmem->minx;
620   uint32_t y1 = gmem->miny;
621   uint32_t x2 = gmem->minx + gmem->width - 1;
622   uint32_t y2 = gmem->miny + gmem->height - 1;
623
624   OUT_PKT0(ring, REG_A4XX_PC_BINNING_COMMAND, 1);
625   OUT_RING(ring, A4XX_PC_BINNING_COMMAND_BINNING_ENABLE);
626
627   OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
628   OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_TILING_PASS) |
629                     A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
630                     A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
631                     A4XX_GRAS_SC_CONTROL_RASTER_MODE(0));
632
633   OUT_PKT0(ring, REG_A4XX_RB_FRAME_BUFFER_DIMENSION, 1);
634   OUT_RING(ring, A4XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) |
635                     A4XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height));
636
637   /* setup scissor/offset for whole screen: */
638   OUT_PKT0(ring, REG_A4XX_RB_BIN_OFFSET, 1);
639   OUT_RING(ring, A4XX_RB_BIN_OFFSET_X(x1) | A4XX_RB_BIN_OFFSET_Y(y1));
640
641   OUT_PKT0(ring, REG_A4XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
642   OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_TL_X(x1) |
643                     A4XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(y1));
644   OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_BR_X(x2) |
645                     A4XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(y2));
646
647   for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
648      OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1);
649      OUT_RING(ring, A4XX_RB_MRT_CONTROL_ROP_CODE(ROP_CLEAR) |
650                        A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0xf));
651   }
652
653   /* emit IB to binning drawcmds: */
654   fd4_emit_ib(ring, batch->binning);
655
656   fd_reset_wfi(batch);
657   fd_wfi(batch, ring);
658
659   /* and then put stuff back the way it was: */
660
661   OUT_PKT0(ring, REG_A4XX_PC_BINNING_COMMAND, 1);
662   OUT_RING(ring, 0x00000000);
663
664   OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
665   OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
666                     A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
667                     A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
668                     A4XX_GRAS_SC_CONTROL_RASTER_MODE(0));
669
670   fd_event_write(batch, ring, CACHE_FLUSH);
671   fd_wfi(batch, ring);
672}
673
674/* before first tile */
675static void
676fd4_emit_tile_init(struct fd_batch *batch) assert_dt
677{
678   struct fd_ringbuffer *ring = batch->gmem;
679   struct pipe_framebuffer_state *pfb = &batch->framebuffer;
680   const struct fd_gmem_stateobj *gmem = batch->gmem_state;
681
682   fd4_emit_restore(batch, ring);
683
684   OUT_PKT0(ring, REG_A4XX_VSC_BIN_SIZE, 1);
685   OUT_RING(ring, A4XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) |
686                     A4XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h));
687
688   update_vsc_pipe(batch);
689
690   fd_wfi(batch, ring);
691   OUT_PKT0(ring, REG_A4XX_RB_FRAME_BUFFER_DIMENSION, 1);
692   OUT_RING(ring, A4XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) |
693                     A4XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height));
694
695   if (use_hw_binning(batch)) {
696      OUT_PKT0(ring, REG_A4XX_RB_MODE_CONTROL, 1);
697      OUT_RING(ring, A4XX_RB_MODE_CONTROL_WIDTH(gmem->bin_w) |
698                        A4XX_RB_MODE_CONTROL_HEIGHT(gmem->bin_h));
699
700      OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL, 1);
701      OUT_RING(ring, A4XX_RB_RENDER_CONTROL_BINNING_PASS |
702                        A4XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE | 0x8);
703
704      /* emit hw binning pass: */
705      emit_binning_pass(batch);
706
707      patch_draws(batch, USE_VISIBILITY);
708   } else {
709      patch_draws(batch, IGNORE_VISIBILITY);
710   }
711
712   OUT_PKT0(ring, REG_A4XX_RB_MODE_CONTROL, 1);
713   OUT_RING(ring, A4XX_RB_MODE_CONTROL_WIDTH(gmem->bin_w) |
714                     A4XX_RB_MODE_CONTROL_HEIGHT(gmem->bin_h) |
715                     A4XX_RB_MODE_CONTROL_ENABLE_GMEM);
716}
717
718/* before mem2gmem */
719static void
720fd4_emit_tile_prep(struct fd_batch *batch, const struct fd_tile *tile)
721{
722   struct fd_ringbuffer *ring = batch->gmem;
723   struct pipe_framebuffer_state *pfb = &batch->framebuffer;
724   const struct fd_gmem_stateobj *gmem = batch->gmem_state;
725
726   if (pfb->zsbuf) {
727      struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
728      uint32_t cpp = rsc->layout.cpp;
729
730      OUT_PKT0(ring, REG_A4XX_RB_DEPTH_INFO, 3);
731      OUT_RING(ring, A4XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base[0]) |
732                        A4XX_RB_DEPTH_INFO_DEPTH_FORMAT(
733                           fd4_pipe2depth(pfb->zsbuf->format)));
734      OUT_RING(ring, A4XX_RB_DEPTH_PITCH(cpp * gmem->bin_w));
735      OUT_RING(ring, A4XX_RB_DEPTH_PITCH2(cpp * gmem->bin_w));
736
737      OUT_PKT0(ring, REG_A4XX_RB_STENCIL_INFO, 2);
738      if (rsc->stencil) {
739         OUT_RING(ring,
740                  A4XX_RB_STENCIL_INFO_SEPARATE_STENCIL |
741                     A4XX_RB_STENCIL_INFO_STENCIL_BASE(gmem->zsbuf_base[1]));
742         OUT_RING(ring, A4XX_RB_STENCIL_PITCH(rsc->stencil->layout.cpp *
743                                              gmem->bin_w));
744      } else {
745         OUT_RING(ring, 0x00000000);
746         OUT_RING(ring, 0x00000000);
747      }
748   } else {
749      OUT_PKT0(ring, REG_A4XX_RB_DEPTH_INFO, 3);
750      OUT_RING(ring, 0x00000000);
751      OUT_RING(ring, 0x00000000);
752      OUT_RING(ring, 0x00000000);
753
754      OUT_PKT0(ring, REG_A4XX_RB_STENCIL_INFO, 2);
755      OUT_RING(ring, 0); /* RB_STENCIL_INFO */
756      OUT_RING(ring, 0); /* RB_STENCIL_PITCH */
757   }
758
759   OUT_PKT0(ring, REG_A4XX_GRAS_DEPTH_CONTROL, 1);
760   if (pfb->zsbuf) {
761      OUT_RING(ring, A4XX_GRAS_DEPTH_CONTROL_FORMAT(
762                        fd4_pipe2depth(pfb->zsbuf->format)));
763   } else {
764      OUT_RING(ring, A4XX_GRAS_DEPTH_CONTROL_FORMAT(DEPTH4_NONE));
765   }
766}
767
768/* before IB to rendering cmds: */
769static void
770fd4_emit_tile_renderprep(struct fd_batch *batch,
771                         const struct fd_tile *tile) assert_dt
772{
773   struct fd_context *ctx = batch->ctx;
774   struct fd4_context *fd4_ctx = fd4_context(ctx);
775   struct fd_ringbuffer *ring = batch->gmem;
776   const struct fd_gmem_stateobj *gmem = batch->gmem_state;
777   struct pipe_framebuffer_state *pfb = &batch->framebuffer;
778
779   uint32_t x1 = tile->xoff;
780   uint32_t y1 = tile->yoff;
781   uint32_t x2 = tile->xoff + tile->bin_w - 1;
782   uint32_t y2 = tile->yoff + tile->bin_h - 1;
783
784   if (use_hw_binning(batch)) {
785      const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[tile->p];
786      struct fd_bo *pipe_bo = ctx->vsc_pipe_bo[tile->p];
787
788      assert(pipe->w && pipe->h);
789
790      fd_event_write(batch, ring, HLSQ_FLUSH);
791      fd_wfi(batch, ring);
792
793      OUT_PKT0(ring, REG_A4XX_PC_VSTREAM_CONTROL, 1);
794      OUT_RING(ring, A4XX_PC_VSTREAM_CONTROL_SIZE(pipe->w * pipe->h) |
795                        A4XX_PC_VSTREAM_CONTROL_N(tile->n));
796
797      OUT_PKT3(ring, CP_SET_BIN_DATA, 2);
798      OUT_RELOC(ring, pipe_bo, 0, 0,
799                0); /* BIN_DATA_ADDR <- VSC_PIPE[p].DATA_ADDRESS */
800      OUT_RELOC(ring, fd4_ctx->vsc_size_mem, /* BIN_SIZE_ADDR <-
801                                                VSC_SIZE_ADDRESS + (p * 4) */
802                (tile->p * 4), 0, 0);
803   } else {
804      OUT_PKT0(ring, REG_A4XX_PC_VSTREAM_CONTROL, 1);
805      OUT_RING(ring, 0x00000000);
806   }
807
808   OUT_PKT3(ring, CP_SET_BIN, 3);
809   OUT_RING(ring, 0x00000000);
810   OUT_RING(ring, CP_SET_BIN_1_X1(x1) | CP_SET_BIN_1_Y1(y1));
811   OUT_RING(ring, CP_SET_BIN_2_X2(x2) | CP_SET_BIN_2_Y2(y2));
812
813   emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, gmem->cbuf_base, gmem->bin_w,
814            true);
815
816   /* setup scissor/offset for current tile: */
817   OUT_PKT0(ring, REG_A4XX_RB_BIN_OFFSET, 1);
818   OUT_RING(ring, A4XX_RB_BIN_OFFSET_X(tile->xoff) |
819                     A4XX_RB_BIN_OFFSET_Y(tile->yoff));
820
821   OUT_PKT0(ring, REG_A4XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
822   OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_TL_X(x1) |
823                     A4XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(y1));
824   OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_BR_X(x2) |
825                     A4XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(y2));
826
827   OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL, 1);
828   OUT_RING(ring, 0x8);
829}
830
831void
832fd4_gmem_init(struct pipe_context *pctx) disable_thread_safety_analysis
833{
834   struct fd_context *ctx = fd_context(pctx);
835
836   ctx->emit_sysmem_prep = fd4_emit_sysmem_prep;
837   ctx->emit_tile_init = fd4_emit_tile_init;
838   ctx->emit_tile_prep = fd4_emit_tile_prep;
839   ctx->emit_tile_mem2gmem = fd4_emit_tile_mem2gmem;
840   ctx->emit_tile_renderprep = fd4_emit_tile_renderprep;
841   ctx->emit_tile_gmem2mem = fd4_emit_tile_gmem2mem;
842}
843