1/*
2 * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 *    Rob Clark <robclark@freedesktop.org>
25 */
26
27#include "pipe/p_state.h"
28#include "util/format/u_format.h"
29#include "util/u_inlines.h"
30#include "util/u_memory.h"
31#include "util/u_string.h"
32
33#include "freedreno_draw.h"
34#include "freedreno_resource.h"
35#include "freedreno_state.h"
36
37#include "fd5_context.h"
38#include "fd5_draw.h"
39#include "fd5_emit.h"
40#include "fd5_format.h"
41#include "fd5_gmem.h"
42#include "fd5_program.h"
43#include "fd5_zsa.h"
44
45static void
46emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
47         struct pipe_surface **bufs, const struct fd_gmem_stateobj *gmem)
48{
49   enum a5xx_tile_mode tile_mode;
50   unsigned i;
51
52   for (i = 0; i < A5XX_MAX_RENDER_TARGETS; i++) {
53      enum a5xx_color_fmt format = 0;
54      enum a3xx_color_swap swap = WZYX;
55      bool srgb = false, sint = false, uint = false;
56      struct fd_resource *rsc = NULL;
57      uint32_t stride = 0;
58      uint32_t size = 0;
59      uint32_t base = 0;
60      uint32_t offset = 0;
61
62      if (gmem) {
63         tile_mode = TILE5_2;
64      } else {
65         tile_mode = TILE5_LINEAR;
66      }
67
68      if ((i < nr_bufs) && bufs[i]) {
69         struct pipe_surface *psurf = bufs[i];
70         enum pipe_format pformat = psurf->format;
71
72         rsc = fd_resource(psurf->texture);
73
74         format = fd5_pipe2color(pformat);
75         swap = fd5_pipe2swap(pformat);
76         srgb = util_format_is_srgb(pformat);
77         sint = util_format_is_pure_sint(pformat);
78         uint = util_format_is_pure_uint(pformat);
79
80         assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
81
82         offset = fd_resource_offset(rsc, psurf->u.tex.level,
83                                     psurf->u.tex.first_layer);
84
85         if (gmem) {
86            stride = gmem->bin_w * gmem->cbuf_cpp[i];
87            size = stride * gmem->bin_h;
88            base = gmem->cbuf_base[i];
89         } else {
90            stride = fd_resource_pitch(rsc, psurf->u.tex.level);
91            size = fd_resource_layer_stride(rsc, psurf->u.tex.level);
92
93            tile_mode =
94               fd_resource_tile_mode(psurf->texture, psurf->u.tex.level);
95         }
96      }
97
98      OUT_PKT4(ring, REG_A5XX_RB_MRT_BUF_INFO(i), 5);
99      OUT_RING(
100         ring,
101         A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) |
102            A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) |
103            A5XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap) |
104            COND(gmem,
105                 0x800) | /* XXX 0x1000 for RECTLIST clear, 0x0 for BLIT.. */
106            COND(srgb, A5XX_RB_MRT_BUF_INFO_COLOR_SRGB));
107      OUT_RING(ring, A5XX_RB_MRT_PITCH(stride));
108      OUT_RING(ring, A5XX_RB_MRT_ARRAY_PITCH(size));
109      if (gmem || (i >= nr_bufs) || !bufs[i]) {
110         OUT_RING(ring, base);       /* RB_MRT[i].BASE_LO */
111         OUT_RING(ring, 0x00000000); /* RB_MRT[i].BASE_HI */
112      } else {
113         OUT_RELOC(ring, rsc->bo, offset, 0, 0); /* BASE_LO/HI */
114      }
115
116      OUT_PKT4(ring, REG_A5XX_SP_FS_MRT_REG(i), 1);
117      OUT_RING(ring, A5XX_SP_FS_MRT_REG_COLOR_FORMAT(format) |
118                        COND(sint, A5XX_SP_FS_MRT_REG_COLOR_SINT) |
119                        COND(uint, A5XX_SP_FS_MRT_REG_COLOR_UINT) |
120                        COND(srgb, A5XX_SP_FS_MRT_REG_COLOR_SRGB));
121
122      /* when we support UBWC, these would be the system memory
123       * addr/pitch/etc:
124       */
125      OUT_PKT4(ring, REG_A5XX_RB_MRT_FLAG_BUFFER(i), 4);
126      OUT_RING(ring, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_LO */
127      OUT_RING(ring, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_HI */
128      OUT_RING(ring, A5XX_RB_MRT_FLAG_BUFFER_PITCH(0));
129      OUT_RING(ring, A5XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH(0));
130   }
131}
132
133static void
134emit_zs(struct fd_ringbuffer *ring, struct pipe_surface *zsbuf,
135        const struct fd_gmem_stateobj *gmem)
136{
137   if (zsbuf) {
138      struct fd_resource *rsc = fd_resource(zsbuf->texture);
139      enum a5xx_depth_format fmt = fd5_pipe2depth(zsbuf->format);
140      uint32_t cpp = rsc->layout.cpp;
141      uint32_t stride = 0;
142      uint32_t size = 0;
143
144      if (gmem) {
145         stride = cpp * gmem->bin_w;
146         size = stride * gmem->bin_h;
147      } else {
148         stride = fd_resource_pitch(rsc, zsbuf->u.tex.level);
149         size = fd_resource_layer_stride(rsc, zsbuf->u.tex.level);
150      }
151
152      OUT_PKT4(ring, REG_A5XX_RB_DEPTH_BUFFER_INFO, 5);
153      OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(fmt));
154      if (gmem) {
155         OUT_RING(ring, gmem->zsbuf_base[0]); /* RB_DEPTH_BUFFER_BASE_LO */
156         OUT_RING(ring, 0x00000000);          /* RB_DEPTH_BUFFER_BASE_HI */
157      } else {
158         OUT_RELOC(ring, rsc->bo,
159            fd_resource_offset(rsc, zsbuf->u.tex.level, zsbuf->u.tex.first_layer),
160            0, 0); /* RB_DEPTH_BUFFER_BASE_LO/HI */
161      }
162      OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_PITCH(stride));
163      OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH(size));
164
165      OUT_PKT4(ring, REG_A5XX_GRAS_SU_DEPTH_BUFFER_INFO, 1);
166      OUT_RING(ring, A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(fmt));
167
168      OUT_PKT4(ring, REG_A5XX_RB_DEPTH_FLAG_BUFFER_BASE_LO, 3);
169      OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_LO */
170      OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_HI */
171      OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_PITCH */
172
173      if (rsc->lrz) {
174         OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_BUFFER_BASE_LO, 3);
175         OUT_RELOC(ring, rsc->lrz, 0x1000, 0, 0);
176         OUT_RING(ring, A5XX_GRAS_LRZ_BUFFER_PITCH(rsc->lrz_pitch));
177
178         OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO, 2);
179         OUT_RELOC(ring, rsc->lrz, 0, 0, 0);
180      } else {
181         OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_BUFFER_BASE_LO, 3);
182         OUT_RING(ring, 0x00000000);
183         OUT_RING(ring, 0x00000000);
184         OUT_RING(ring, 0x00000000); /* GRAS_LRZ_BUFFER_PITCH */
185
186         OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO, 2);
187         OUT_RING(ring, 0x00000000);
188         OUT_RING(ring, 0x00000000);
189      }
190
191      if (rsc->stencil) {
192         if (gmem) {
193            stride = 1 * gmem->bin_w;
194            size = stride * gmem->bin_h;
195         } else {
196            stride = fd_resource_pitch(rsc->stencil, zsbuf->u.tex.level);
197            size = fd_resource_layer_stride(rsc, zsbuf->u.tex.level);
198         }
199
200         OUT_PKT4(ring, REG_A5XX_RB_STENCIL_INFO, 5);
201         OUT_RING(ring, A5XX_RB_STENCIL_INFO_SEPARATE_STENCIL);
202         if (gmem) {
203            OUT_RING(ring, gmem->zsbuf_base[1]); /* RB_STENCIL_BASE_LO */
204            OUT_RING(ring, 0x00000000);          /* RB_STENCIL_BASE_HI */
205         } else {
206            OUT_RELOC(ring, rsc->stencil->bo,
207               fd_resource_offset(rsc->stencil, zsbuf->u.tex.level, zsbuf->u.tex.first_layer),
208                      0, 0); /* RB_STENCIL_BASE_LO/HI */
209         }
210         OUT_RING(ring, A5XX_RB_STENCIL_PITCH(stride));
211         OUT_RING(ring, A5XX_RB_STENCIL_ARRAY_PITCH(size));
212      } else {
213         OUT_PKT4(ring, REG_A5XX_RB_STENCIL_INFO, 1);
214         OUT_RING(ring, 0x00000000); /* RB_STENCIL_INFO */
215      }
216   } else {
217      OUT_PKT4(ring, REG_A5XX_RB_DEPTH_BUFFER_INFO, 5);
218      OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH5_NONE));
219      OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_LO */
220      OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_HI */
221      OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_PITCH */
222      OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_ARRAY_PITCH */
223
224      OUT_PKT4(ring, REG_A5XX_GRAS_SU_DEPTH_BUFFER_INFO, 1);
225      OUT_RING(ring, A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH5_NONE));
226
227      OUT_PKT4(ring, REG_A5XX_RB_DEPTH_FLAG_BUFFER_BASE_LO, 3);
228      OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_LO */
229      OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_HI */
230      OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_PITCH */
231
232      OUT_PKT4(ring, REG_A5XX_RB_STENCIL_INFO, 1);
233      OUT_RING(ring, 0x00000000); /* RB_STENCIL_INFO */
234   }
235}
236
237static void
238emit_msaa(struct fd_ringbuffer *ring, uint32_t nr_samples)
239{
240   enum a3xx_msaa_samples samples = fd_msaa_samples(nr_samples);
241
242   OUT_PKT4(ring, REG_A5XX_TPL1_TP_RAS_MSAA_CNTL, 2);
243   OUT_RING(ring, A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES(samples));
244   OUT_RING(ring, A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES(samples) |
245                     COND(samples == MSAA_ONE,
246                          A5XX_TPL1_TP_DEST_MSAA_CNTL_MSAA_DISABLE));
247
248   OUT_PKT4(ring, REG_A5XX_RB_RAS_MSAA_CNTL, 2);
249   OUT_RING(ring, A5XX_RB_RAS_MSAA_CNTL_SAMPLES(samples));
250   OUT_RING(ring,
251            A5XX_RB_DEST_MSAA_CNTL_SAMPLES(samples) |
252               COND(samples == MSAA_ONE, A5XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE));
253
254   OUT_PKT4(ring, REG_A5XX_GRAS_SC_RAS_MSAA_CNTL, 2);
255   OUT_RING(ring, A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES(samples));
256   OUT_RING(ring, A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES(samples) |
257                     COND(samples == MSAA_ONE,
258                          A5XX_GRAS_SC_DEST_MSAA_CNTL_MSAA_DISABLE));
259}
260
261static bool
262use_hw_binning(struct fd_batch *batch)
263{
264   const struct fd_gmem_stateobj *gmem = batch->gmem_state;
265
266   /* workaround: Like on a3xx, hw binning and scissor optimization
267    * don't play nice together.
268    *
269    * Disable binning if scissor optimization is used.
270    */
271   if (gmem->minx || gmem->miny)
272      return false;
273
274   if ((gmem->maxpw * gmem->maxph) > 32)
275      return false;
276
277   if ((gmem->maxpw > 15) || (gmem->maxph > 15))
278      return false;
279
280   return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) > 2) &&
281          (batch->num_draws > 0);
282}
283
284static void
285patch_draws(struct fd_batch *batch, enum pc_di_vis_cull_mode vismode)
286{
287   unsigned i;
288   for (i = 0; i < fd_patch_num_elements(&batch->draw_patches); i++) {
289      struct fd_cs_patch *patch = fd_patch_element(&batch->draw_patches, i);
290      *patch->cs = patch->val | DRAW4(0, 0, 0, vismode);
291   }
292   util_dynarray_clear(&batch->draw_patches);
293}
294
295static void
296update_vsc_pipe(struct fd_batch *batch) assert_dt
297{
298   struct fd_context *ctx = batch->ctx;
299   struct fd5_context *fd5_ctx = fd5_context(ctx);
300   const struct fd_gmem_stateobj *gmem = batch->gmem_state;
301   struct fd_ringbuffer *ring = batch->gmem;
302   int i;
303
304   OUT_PKT4(ring, REG_A5XX_VSC_BIN_SIZE, 3);
305   OUT_RING(ring, A5XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) |
306                     A5XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h));
307   OUT_RELOC(ring, fd5_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS_LO/HI */
308
309   OUT_PKT4(ring, REG_A5XX_UNKNOWN_0BC5, 2);
310   OUT_RING(ring, 0x00000000); /* UNKNOWN_0BC5 */
311   OUT_RING(ring, 0x00000000); /* UNKNOWN_0BC6 */
312
313   OUT_PKT4(ring, REG_A5XX_VSC_PIPE_CONFIG_REG(0), 16);
314   for (i = 0; i < 16; i++) {
315      const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[i];
316      OUT_RING(ring, A5XX_VSC_PIPE_CONFIG_REG_X(pipe->x) |
317                        A5XX_VSC_PIPE_CONFIG_REG_Y(pipe->y) |
318                        A5XX_VSC_PIPE_CONFIG_REG_W(pipe->w) |
319                        A5XX_VSC_PIPE_CONFIG_REG_H(pipe->h));
320   }
321
322   OUT_PKT4(ring, REG_A5XX_VSC_PIPE_DATA_ADDRESS_LO(0), 32);
323   for (i = 0; i < 16; i++) {
324      if (!ctx->vsc_pipe_bo[i]) {
325         ctx->vsc_pipe_bo[i] = fd_bo_new(
326            ctx->dev, 0x20000, 0, "vsc_pipe[%u]", i);
327      }
328      OUT_RELOC(ring, ctx->vsc_pipe_bo[i], 0, 0,
329                0); /* VSC_PIPE_DATA_ADDRESS[i].LO/HI */
330   }
331
332   OUT_PKT4(ring, REG_A5XX_VSC_PIPE_DATA_LENGTH_REG(0), 16);
333   for (i = 0; i < 16; i++) {
334      OUT_RING(ring, fd_bo_size(ctx->vsc_pipe_bo[i]) -
335                        32); /* VSC_PIPE_DATA_LENGTH[i] */
336   }
337}
338
339static void
340emit_binning_pass(struct fd_batch *batch) assert_dt
341{
342   struct fd_ringbuffer *ring = batch->gmem;
343   const struct fd_gmem_stateobj *gmem = batch->gmem_state;
344
345   uint32_t x1 = gmem->minx;
346   uint32_t y1 = gmem->miny;
347   uint32_t x2 = gmem->minx + gmem->width - 1;
348   uint32_t y2 = gmem->miny + gmem->height - 1;
349
350   fd5_set_render_mode(batch->ctx, ring, BINNING);
351
352   OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
353   OUT_RING(ring,
354            A5XX_RB_CNTL_WIDTH(gmem->bin_w) | A5XX_RB_CNTL_HEIGHT(gmem->bin_h));
355
356   OUT_PKT4(ring, REG_A5XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
357   OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X(x1) |
358                     A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(y1));
359   OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X(x2) |
360                     A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(y2));
361
362   OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2);
363   OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(x1) | A5XX_RB_RESOLVE_CNTL_1_Y(y1));
364   OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(x2) | A5XX_RB_RESOLVE_CNTL_2_Y(y2));
365
366   update_vsc_pipe(batch);
367
368   OUT_PKT4(ring, REG_A5XX_VPC_MODE_CNTL, 1);
369   OUT_RING(ring, A5XX_VPC_MODE_CNTL_BINNING_PASS);
370
371   fd5_event_write(batch, ring, UNK_2C, false);
372
373   OUT_PKT4(ring, REG_A5XX_RB_WINDOW_OFFSET, 1);
374   OUT_RING(ring, A5XX_RB_WINDOW_OFFSET_X(0) | A5XX_RB_WINDOW_OFFSET_Y(0));
375
376   /* emit IB to binning drawcmds: */
377   fd5_emit_ib(ring, batch->binning);
378
379   fd_reset_wfi(batch);
380
381   fd5_event_write(batch, ring, UNK_2D, false);
382
383   fd5_event_write(batch, ring, CACHE_FLUSH_TS, true);
384
385   // TODO CP_COND_WRITE's for all the vsc buffers (check for overflow??)
386
387   fd_wfi(batch, ring);
388
389   OUT_PKT4(ring, REG_A5XX_VPC_MODE_CNTL, 1);
390   OUT_RING(ring, 0x0);
391}
392
393/* before first tile */
394static void
395fd5_emit_tile_init(struct fd_batch *batch) assert_dt
396{
397   struct fd_ringbuffer *ring = batch->gmem;
398   struct pipe_framebuffer_state *pfb = &batch->framebuffer;
399
400   fd5_emit_restore(batch, ring);
401
402   if (batch->prologue)
403      fd5_emit_ib(ring, batch->prologue);
404
405   fd5_emit_lrz_flush(batch, ring);
406
407   OUT_PKT4(ring, REG_A5XX_GRAS_CL_CNTL, 1);
408   OUT_RING(ring, 0x00000080); /* GRAS_CL_CNTL */
409
410   OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
411   OUT_RING(ring, 0x0);
412
413   OUT_PKT4(ring, REG_A5XX_PC_POWER_CNTL, 1);
414   OUT_RING(ring, 0x00000003); /* PC_POWER_CNTL */
415
416   OUT_PKT4(ring, REG_A5XX_VFD_POWER_CNTL, 1);
417   OUT_RING(ring, 0x00000003); /* VFD_POWER_CNTL */
418
419   /* 0x10000000 for BYPASS.. 0x7c13c080 for GMEM: */
420   fd_wfi(batch, ring);
421   OUT_PKT4(ring, REG_A5XX_RB_CCU_CNTL, 1);
422   OUT_RING(ring, 0x7c13c080); /* RB_CCU_CNTL */
423
424   emit_zs(ring, pfb->zsbuf, batch->gmem_state);
425   emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, batch->gmem_state);
426
427   /* Enable stream output for the first pass (likely the binning). */
428   OUT_PKT4(ring, REG_A5XX_VPC_SO_OVERRIDE, 1);
429   OUT_RING(ring, 0);
430
431   if (use_hw_binning(batch)) {
432      emit_binning_pass(batch);
433
434      /* Disable stream output after binning, since each VS output should get
435       * streamed out once.
436       */
437      OUT_PKT4(ring, REG_A5XX_VPC_SO_OVERRIDE, 1);
438      OUT_RING(ring, A5XX_VPC_SO_OVERRIDE_SO_DISABLE);
439
440      fd5_emit_lrz_flush(batch, ring);
441      patch_draws(batch, USE_VISIBILITY);
442   } else {
443      patch_draws(batch, IGNORE_VISIBILITY);
444   }
445
446   fd5_set_render_mode(batch->ctx, ring, GMEM);
447
448   /* XXX If we're in gmem mode but not doing HW binning, then after the first
449    * tile we should disable stream output (fd6_gmem.c doesn't do that either).
450    */
451}
452
453/* before mem2gmem */
454static void
455fd5_emit_tile_prep(struct fd_batch *batch, const struct fd_tile *tile) assert_dt
456{
457   struct fd_context *ctx = batch->ctx;
458   const struct fd_gmem_stateobj *gmem = batch->gmem_state;
459   struct fd5_context *fd5_ctx = fd5_context(ctx);
460   struct fd_ringbuffer *ring = batch->gmem;
461
462   uint32_t x1 = tile->xoff;
463   uint32_t y1 = tile->yoff;
464   uint32_t x2 = tile->xoff + tile->bin_w - 1;
465   uint32_t y2 = tile->yoff + tile->bin_h - 1;
466
467   OUT_PKT4(ring, REG_A5XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
468   OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X(x1) |
469                     A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(y1));
470   OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X(x2) |
471                     A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(y2));
472
473   OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2);
474   OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(x1) | A5XX_RB_RESOLVE_CNTL_1_Y(y1));
475   OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(x2) | A5XX_RB_RESOLVE_CNTL_2_Y(y2));
476
477   if (use_hw_binning(batch)) {
478      const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[tile->p];
479      struct fd_bo *pipe_bo = ctx->vsc_pipe_bo[tile->p];
480
481      OUT_PKT7(ring, CP_WAIT_FOR_ME, 0);
482
483      OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
484      OUT_RING(ring, 0x0);
485
486      OUT_PKT7(ring, CP_SET_BIN_DATA5, 5);
487      OUT_RING(ring, CP_SET_BIN_DATA5_0_VSC_SIZE(pipe->w * pipe->h) |
488                        CP_SET_BIN_DATA5_0_VSC_N(tile->n));
489      OUT_RELOC(ring, pipe_bo, 0, 0, 0);     /* VSC_PIPE[p].DATA_ADDRESS */
490      OUT_RELOC(ring, fd5_ctx->vsc_size_mem, /* VSC_SIZE_ADDRESS + (p * 4) */
491                (tile->p * 4), 0, 0);
492   } else {
493      OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
494      OUT_RING(ring, 0x1);
495   }
496
497   OUT_PKT4(ring, REG_A5XX_RB_WINDOW_OFFSET, 1);
498   OUT_RING(ring, A5XX_RB_WINDOW_OFFSET_X(x1) | A5XX_RB_WINDOW_OFFSET_Y(y1));
499}
500
501/*
502 * transfer from system memory to gmem
503 */
504
505static void
506emit_mem2gmem_surf(struct fd_batch *batch, uint32_t base,
507                   struct pipe_surface *psurf, enum a5xx_blit_buf buf)
508{
509   struct fd_ringbuffer *ring = batch->gmem;
510   const struct fd_gmem_stateobj *gmem = batch->gmem_state;
511   struct fd_resource *rsc = fd_resource(psurf->texture);
512   uint32_t stride, size;
513
514   assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
515
516   if (buf == BLIT_S)
517      rsc = rsc->stencil;
518
519   if ((buf == BLIT_ZS) || (buf == BLIT_S)) {
520      // XXX hack import via BLIT_MRT0 instead of BLIT_ZS, since I don't
521      // know otherwise how to go from linear in sysmem to tiled in gmem.
522      // possibly we want to flip this around gmem2mem and keep depth
523      // tiled in sysmem (and fixup sampler state to assume tiled).. this
524      // might be required for doing depth/stencil in bypass mode?
525      enum a5xx_color_fmt format =
526         fd5_pipe2color(fd_gmem_restore_format(rsc->b.b.format));
527
528      OUT_PKT4(ring, REG_A5XX_RB_MRT_BUF_INFO(0), 5);
529      OUT_RING(ring,
530               A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) |
531                  A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(rsc->layout.tile_mode) |
532                  A5XX_RB_MRT_BUF_INFO_COLOR_SWAP(WZYX));
533      OUT_RING(ring, A5XX_RB_MRT_PITCH(fd_resource_pitch(rsc, psurf->u.tex.level)));
534      OUT_RING(ring, A5XX_RB_MRT_ARRAY_PITCH(fd_resource_layer_stride(rsc, psurf->u.tex.level)));
535      OUT_RELOC(ring, rsc->bo,
536         fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer),
537         0, 0); /* BASE_LO/HI */
538
539      buf = BLIT_MRT0;
540   }
541
542   stride = gmem->bin_w << fdl_cpp_shift(&rsc->layout);
543   size = stride * gmem->bin_h;
544
545   OUT_PKT4(ring, REG_A5XX_RB_BLIT_FLAG_DST_LO, 4);
546   OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_LO */
547   OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_HI */
548   OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_PITCH */
549   OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_ARRAY_PITCH */
550
551   OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_3, 5);
552   OUT_RING(ring, 0x00000000); /* RB_RESOLVE_CNTL_3 */
553   OUT_RING(ring, base);       /* RB_BLIT_DST_LO */
554   OUT_RING(ring, 0x00000000); /* RB_BLIT_DST_HI */
555   OUT_RING(ring, A5XX_RB_BLIT_DST_PITCH(stride));
556   OUT_RING(ring, A5XX_RB_BLIT_DST_ARRAY_PITCH(size));
557
558   OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1);
559   OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(buf));
560
561   fd5_emit_blit(batch, ring);
562}
563
564static void
565fd5_emit_tile_mem2gmem(struct fd_batch *batch, const struct fd_tile *tile)
566{
567   struct fd_ringbuffer *ring = batch->gmem;
568   const struct fd_gmem_stateobj *gmem = batch->gmem_state;
569   struct pipe_framebuffer_state *pfb = &batch->framebuffer;
570
571   /*
572    * setup mrt and zs with system memory base addresses:
573    */
574
575   emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL);
576   //	emit_zs(ring, pfb->zsbuf, NULL);
577
578   OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
579   OUT_RING(ring, A5XX_RB_CNTL_WIDTH(gmem->bin_w) |
580                     A5XX_RB_CNTL_HEIGHT(gmem->bin_h) | A5XX_RB_CNTL_BYPASS);
581
582   if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_COLOR)) {
583      unsigned i;
584      for (i = 0; i < pfb->nr_cbufs; i++) {
585         if (!pfb->cbufs[i])
586            continue;
587         if (!(batch->restore & (PIPE_CLEAR_COLOR0 << i)))
588            continue;
589         emit_mem2gmem_surf(batch, gmem->cbuf_base[i], pfb->cbufs[i],
590                            BLIT_MRT0 + i);
591      }
592   }
593
594   if (fd_gmem_needs_restore(batch, tile,
595                             FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
596      struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
597
598      if (!rsc->stencil || fd_gmem_needs_restore(batch, tile, FD_BUFFER_DEPTH))
599         emit_mem2gmem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf, BLIT_ZS);
600      if (rsc->stencil && fd_gmem_needs_restore(batch, tile, FD_BUFFER_STENCIL))
601         emit_mem2gmem_surf(batch, gmem->zsbuf_base[1], pfb->zsbuf, BLIT_S);
602   }
603}
604
605/* before IB to rendering cmds: */
606static void
607fd5_emit_tile_renderprep(struct fd_batch *batch, const struct fd_tile *tile)
608{
609   struct fd_ringbuffer *ring = batch->gmem;
610   const struct fd_gmem_stateobj *gmem = batch->gmem_state;
611   struct pipe_framebuffer_state *pfb = &batch->framebuffer;
612
613   OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
614   OUT_RING(ring,
615            A5XX_RB_CNTL_WIDTH(gmem->bin_w) | A5XX_RB_CNTL_HEIGHT(gmem->bin_h));
616
617   emit_zs(ring, pfb->zsbuf, gmem);
618   emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, gmem);
619   emit_msaa(ring, pfb->samples);
620}
621
622/*
623 * transfer from gmem to system memory (ie. normal RAM)
624 */
625
626static void
627emit_gmem2mem_surf(struct fd_batch *batch, uint32_t base,
628                   struct pipe_surface *psurf, enum a5xx_blit_buf buf)
629{
630   struct fd_ringbuffer *ring = batch->gmem;
631   struct fd_resource *rsc = fd_resource(psurf->texture);
632   bool tiled;
633   uint32_t offset, pitch;
634
635   if (!rsc->valid)
636      return;
637
638   if (buf == BLIT_S)
639      rsc = rsc->stencil;
640
641   offset =
642      fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
643   pitch = fd_resource_pitch(rsc, psurf->u.tex.level);
644
645   assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
646
647   OUT_PKT4(ring, REG_A5XX_RB_BLIT_FLAG_DST_LO, 4);
648   OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_LO */
649   OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_HI */
650   OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_PITCH */
651   OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_ARRAY_PITCH */
652
653   tiled = fd_resource_tile_mode(psurf->texture, psurf->u.tex.level);
654
655   OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_3, 5);
656   OUT_RING(ring, 0x00000004 | /* XXX RB_RESOLVE_CNTL_3 */
657                     COND(tiled, A5XX_RB_RESOLVE_CNTL_3_TILED));
658   OUT_RELOC(ring, rsc->bo, offset, 0, 0); /* RB_BLIT_DST_LO/HI */
659   OUT_RING(ring, A5XX_RB_BLIT_DST_PITCH(pitch));
660   OUT_RING(ring, A5XX_RB_BLIT_DST_ARRAY_PITCH(fd_resource_layer_stride(rsc, psurf->u.tex.level)));
661
662   OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1);
663   OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(buf));
664
665   //	bool msaa_resolve = pfb->samples > 1;
666   bool msaa_resolve = false;
667   OUT_PKT4(ring, REG_A5XX_RB_CLEAR_CNTL, 1);
668   OUT_RING(ring, COND(msaa_resolve, A5XX_RB_CLEAR_CNTL_MSAA_RESOLVE));
669
670   fd5_emit_blit(batch, ring);
671}
672
673static void
674fd5_emit_tile_gmem2mem(struct fd_batch *batch, const struct fd_tile *tile)
675{
676   const struct fd_gmem_stateobj *gmem = batch->gmem_state;
677   struct pipe_framebuffer_state *pfb = &batch->framebuffer;
678
679   if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
680      struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
681
682      if (!rsc->stencil || (batch->resolve & FD_BUFFER_DEPTH))
683         emit_gmem2mem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf, BLIT_ZS);
684      if (rsc->stencil && (batch->resolve & FD_BUFFER_STENCIL))
685         emit_gmem2mem_surf(batch, gmem->zsbuf_base[1], pfb->zsbuf, BLIT_S);
686   }
687
688   if (batch->resolve & FD_BUFFER_COLOR) {
689      unsigned i;
690      for (i = 0; i < pfb->nr_cbufs; i++) {
691         if (!pfb->cbufs[i])
692            continue;
693         if (!(batch->resolve & (PIPE_CLEAR_COLOR0 << i)))
694            continue;
695         emit_gmem2mem_surf(batch, gmem->cbuf_base[i], pfb->cbufs[i],
696                            BLIT_MRT0 + i);
697      }
698   }
699}
700
701static void
702fd5_emit_tile_fini(struct fd_batch *batch) assert_dt
703{
704   struct fd_ringbuffer *ring = batch->gmem;
705
706   OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
707   OUT_RING(ring, 0x0);
708
709   fd5_emit_lrz_flush(batch, ring);
710
711   fd5_cache_flush(batch, ring);
712   fd5_set_render_mode(batch->ctx, ring, BYPASS);
713}
714
715static void
716fd5_emit_sysmem_prep(struct fd_batch *batch) assert_dt
717{
718   struct fd_ringbuffer *ring = batch->gmem;
719
720   fd5_emit_restore(batch, ring);
721
722   fd5_emit_lrz_flush(batch, ring);
723
724   if (batch->prologue)
725      fd5_emit_ib(ring, batch->prologue);
726
727   OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
728   OUT_RING(ring, 0x0);
729
730   fd5_event_write(batch, ring, PC_CCU_INVALIDATE_COLOR, false);
731
732   OUT_PKT4(ring, REG_A5XX_PC_POWER_CNTL, 1);
733   OUT_RING(ring, 0x00000003); /* PC_POWER_CNTL */
734
735   OUT_PKT4(ring, REG_A5XX_VFD_POWER_CNTL, 1);
736   OUT_RING(ring, 0x00000003); /* VFD_POWER_CNTL */
737
738   /* 0x10000000 for BYPASS.. 0x7c13c080 for GMEM: */
739   fd_wfi(batch, ring);
740   OUT_PKT4(ring, REG_A5XX_RB_CCU_CNTL, 1);
741   OUT_RING(ring, 0x10000000); /* RB_CCU_CNTL */
742
743   OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
744   OUT_RING(ring, A5XX_RB_CNTL_WIDTH(0) | A5XX_RB_CNTL_HEIGHT(0) |
745                     A5XX_RB_CNTL_BYPASS);
746
747   /* remaining setup below here does not apply to blit/compute: */
748   if (batch->nondraw)
749      return;
750
751   struct pipe_framebuffer_state *pfb = &batch->framebuffer;
752
753   OUT_PKT4(ring, REG_A5XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
754   OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |
755                     A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0));
756   OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X(pfb->width - 1) |
757                     A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(pfb->height - 1));
758
759   OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2);
760   OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(0) | A5XX_RB_RESOLVE_CNTL_1_Y(0));
761   OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(pfb->width - 1) |
762                     A5XX_RB_RESOLVE_CNTL_2_Y(pfb->height - 1));
763
764   OUT_PKT4(ring, REG_A5XX_RB_WINDOW_OFFSET, 1);
765   OUT_RING(ring, A5XX_RB_WINDOW_OFFSET_X(0) | A5XX_RB_WINDOW_OFFSET_Y(0));
766
767   /* Enable stream output, since there's no binning pass to put it in. */
768   OUT_PKT4(ring, REG_A5XX_VPC_SO_OVERRIDE, 1);
769   OUT_RING(ring, 0);
770
771   OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
772   OUT_RING(ring, 0x1);
773
774   patch_draws(batch, IGNORE_VISIBILITY);
775
776   emit_zs(ring, pfb->zsbuf, NULL);
777   emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL);
778   emit_msaa(ring, pfb->samples);
779}
780
781static void
782fd5_emit_sysmem_fini(struct fd_batch *batch)
783{
784   struct fd_ringbuffer *ring = batch->gmem;
785
786   OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
787   OUT_RING(ring, 0x0);
788
789   fd5_emit_lrz_flush(batch, ring);
790
791   fd5_event_write(batch, ring, PC_CCU_FLUSH_COLOR_TS, true);
792   fd5_event_write(batch, ring, PC_CCU_FLUSH_DEPTH_TS, true);
793}
794
795void
796fd5_gmem_init(struct pipe_context *pctx) disable_thread_safety_analysis
797{
798   struct fd_context *ctx = fd_context(pctx);
799
800   ctx->emit_tile_init = fd5_emit_tile_init;
801   ctx->emit_tile_prep = fd5_emit_tile_prep;
802   ctx->emit_tile_mem2gmem = fd5_emit_tile_mem2gmem;
803   ctx->emit_tile_renderprep = fd5_emit_tile_renderprep;
804   ctx->emit_tile_gmem2mem = fd5_emit_tile_gmem2mem;
805   ctx->emit_tile_fini = fd5_emit_tile_fini;
806   ctx->emit_sysmem_prep = fd5_emit_sysmem_prep;
807   ctx->emit_sysmem_fini = fd5_emit_sysmem_fini;
808}
809