1/*
2 * Copyright (C) 2012-2013 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 *    Rob Clark <robclark@freedesktop.org>
25 */
26
27#include "pipe/p_state.h"
28#include "util/u_memory.h"
29#include "util/u_prim.h"
30#include "util/u_string.h"
31
32#include "freedreno_resource.h"
33#include "freedreno_state.h"
34
35#include "fd2_context.h"
36#include "fd2_draw.h"
37#include "fd2_emit.h"
38#include "fd2_program.h"
39#include "fd2_util.h"
40#include "fd2_zsa.h"
41
42static void
43emit_cacheflush(struct fd_ringbuffer *ring)
44{
45   unsigned i;
46
47   for (i = 0; i < 12; i++) {
48      OUT_PKT3(ring, CP_EVENT_WRITE, 1);
49      OUT_RING(ring, CACHE_FLUSH);
50   }
51}
52
53static void
54emit_vertexbufs(struct fd_context *ctx) assert_dt
55{
56   struct fd_vertex_stateobj *vtx = ctx->vtx.vtx;
57   struct fd_vertexbuf_stateobj *vertexbuf = &ctx->vtx.vertexbuf;
58   struct fd2_vertex_buf bufs[PIPE_MAX_ATTRIBS];
59   unsigned i;
60
61   if (!vtx->num_elements)
62      return;
63
64   for (i = 0; i < vtx->num_elements; i++) {
65      struct pipe_vertex_element *elem = &vtx->pipe[i];
66      struct pipe_vertex_buffer *vb = &vertexbuf->vb[elem->vertex_buffer_index];
67      bufs[i].offset = vb->buffer_offset;
68      bufs[i].size = fd_bo_size(fd_resource(vb->buffer.resource)->bo);
69      bufs[i].prsc = vb->buffer.resource;
70   }
71
72   // NOTE I believe the 0x78 (or 0x9c in solid_vp) relates to the
73   // CONST(20,0) (or CONST(26,0) in soliv_vp)
74
75   fd2_emit_vertex_bufs(ctx->batch->draw, 0x78, bufs, vtx->num_elements);
76   fd2_emit_vertex_bufs(ctx->batch->binning, 0x78, bufs, vtx->num_elements);
77}
78
79static void
80draw_impl(struct fd_context *ctx, const struct pipe_draw_info *info,
81          const struct pipe_draw_start_count_bias *draw, struct fd_ringbuffer *ring,
82          unsigned index_offset, bool binning) assert_dt
83{
84   OUT_PKT3(ring, CP_SET_CONSTANT, 2);
85   OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
86   OUT_RING(ring, info->index_size ? 0 : draw->start);
87
88   OUT_PKT0(ring, REG_A2XX_TC_CNTL_STATUS, 1);
89   OUT_RING(ring, A2XX_TC_CNTL_STATUS_L2_INVALIDATE);
90
91   if (is_a20x(ctx->screen)) {
92      /* wait for DMA to finish and
93       * dummy draw one triangle with indexes 0,0,0.
94       * with PRE_FETCH_CULL_ENABLE | GRP_CULL_ENABLE.
95       *
96       * this workaround is for a HW bug related to DMA alignment:
97       * it is necessary for indexed draws and possibly also
98       * draws that read binning data
99       */
100      OUT_PKT3(ring, CP_WAIT_REG_EQ, 4);
101      OUT_RING(ring, 0x000005d0); /* RBBM_STATUS */
102      OUT_RING(ring, 0x00000000);
103      OUT_RING(ring, 0x00001000); /* bit: 12: VGT_BUSY_NO_DMA */
104      OUT_RING(ring, 0x00000001);
105
106      OUT_PKT3(ring, CP_DRAW_INDX_BIN, 6);
107      OUT_RING(ring, 0x00000000);
108      OUT_RING(ring, 0x0003c004);
109      OUT_RING(ring, 0x00000000);
110      OUT_RING(ring, 0x00000003);
111      OUT_RELOC(ring, fd_resource(fd2_context(ctx)->solid_vertexbuf)->bo, 64, 0,
112                0);
113      OUT_RING(ring, 0x00000006);
114   } else {
115      OUT_WFI(ring);
116
117      OUT_PKT3(ring, CP_SET_CONSTANT, 3);
118      OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
119      OUT_RING(ring, info->index_bounds_valid ? info->max_index
120                                              : ~0); /* VGT_MAX_VTX_INDX */
121      OUT_RING(ring, info->index_bounds_valid ? info->min_index
122                                              : 0); /* VGT_MIN_VTX_INDX */
123   }
124
125   /* binning shader will take offset from C64 */
126   if (binning && is_a20x(ctx->screen)) {
127      OUT_PKT3(ring, CP_SET_CONSTANT, 5);
128      OUT_RING(ring, 0x00000180);
129      OUT_RING(ring, fui(ctx->batch->num_vertices));
130      OUT_RING(ring, fui(0.0f));
131      OUT_RING(ring, fui(0.0f));
132      OUT_RING(ring, fui(0.0f));
133   }
134
135   enum pc_di_vis_cull_mode vismode = USE_VISIBILITY;
136   if (binning || info->mode == PIPE_PRIM_POINTS)
137      vismode = IGNORE_VISIBILITY;
138
139   fd_draw_emit(ctx->batch, ring, ctx->screen->primtypes[info->mode],
140                vismode, info, draw, index_offset);
141
142   if (is_a20x(ctx->screen)) {
143      /* not sure why this is required, but it fixes some hangs */
144      OUT_WFI(ring);
145   } else {
146      OUT_PKT3(ring, CP_SET_CONSTANT, 2);
147      OUT_RING(ring, CP_REG(REG_A2XX_UNKNOWN_2010));
148      OUT_RING(ring, 0x00000000);
149   }
150
151   emit_cacheflush(ring);
152}
153
154static bool
155fd2_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *pinfo,
156			 unsigned drawid_offset,
157             const struct pipe_draw_indirect_info *indirect,
158             const struct pipe_draw_start_count_bias *pdraw,
159             unsigned index_offset) assert_dt
160{
161   if (!ctx->prog.fs || !ctx->prog.vs)
162      return false;
163
164   if (pinfo->mode != PIPE_PRIM_MAX && !indirect && !pinfo->primitive_restart &&
165       !u_trim_pipe_prim(pinfo->mode, (unsigned *)&pdraw->count))
166      return false;
167
168   if (ctx->dirty & FD_DIRTY_VTXBUF)
169      emit_vertexbufs(ctx);
170
171   if (fd_binning_enabled)
172      fd2_emit_state_binning(ctx, ctx->dirty);
173
174   fd2_emit_state(ctx, ctx->dirty);
175
176   /* a2xx can draw only 65535 vertices at once
177    * on a22x the field in the draw command is 32bits but seems limited too
178    * using a limit of 32k because it fixes an unexplained hang
179    * 32766 works for all primitives (multiple of 2 and 3)
180    */
181   if (pdraw->count > 32766) {
182      /* clang-format off */
183      static const uint16_t step_tbl[PIPE_PRIM_MAX] = {
184         [0 ... PIPE_PRIM_MAX - 1]  = 32766,
185         [PIPE_PRIM_LINE_STRIP]     = 32765,
186         [PIPE_PRIM_TRIANGLE_STRIP] = 32764,
187
188         /* needs more work */
189         [PIPE_PRIM_TRIANGLE_FAN]   = 0,
190         [PIPE_PRIM_LINE_LOOP]      = 0,
191      };
192      /* clang-format on */
193
194		struct pipe_draw_start_count_bias draw = *pdraw;
195      unsigned count = draw.count;
196      unsigned step = step_tbl[pinfo->mode];
197      unsigned num_vertices = ctx->batch->num_vertices;
198
199      if (!step)
200         return false;
201
202      for (; count + step > 32766; count -= step) {
203         draw.count = MIN2(count, 32766);
204         draw_impl(ctx, pinfo, &draw, ctx->batch->draw, index_offset, false);
205         draw_impl(ctx, pinfo, &draw, ctx->batch->binning, index_offset, true);
206         draw.start += step;
207         ctx->batch->num_vertices += step;
208      }
209      /* changing this value is a hack, restore it */
210      ctx->batch->num_vertices = num_vertices;
211   } else {
212      draw_impl(ctx, pinfo, pdraw, ctx->batch->draw, index_offset, false);
213      draw_impl(ctx, pinfo, pdraw, ctx->batch->binning, index_offset, true);
214   }
215
216   fd_context_all_clean(ctx);
217
218   return true;
219}
220
221static void
222clear_state(struct fd_batch *batch, struct fd_ringbuffer *ring,
223            unsigned buffers, bool fast_clear) assert_dt
224{
225   struct fd_context *ctx = batch->ctx;
226   struct fd2_context *fd2_ctx = fd2_context(ctx);
227   uint32_t reg;
228
229   fd2_emit_vertex_bufs(ring, 0x9c,
230                        (struct fd2_vertex_buf[]){
231                           {.prsc = fd2_ctx->solid_vertexbuf, .size = 36},
232                        },
233                        1);
234
235   OUT_PKT3(ring, CP_SET_CONSTANT, 2);
236   OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
237   OUT_RING(ring, 0);
238
239   fd2_program_emit(ctx, ring, &ctx->solid_prog);
240
241   OUT_PKT0(ring, REG_A2XX_TC_CNTL_STATUS, 1);
242   OUT_RING(ring, A2XX_TC_CNTL_STATUS_L2_INVALIDATE);
243
244   if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
245      OUT_PKT3(ring, CP_SET_CONSTANT, 2);
246      OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTHCONTROL));
247      reg = 0;
248      if (buffers & PIPE_CLEAR_DEPTH) {
249         reg |= A2XX_RB_DEPTHCONTROL_ZFUNC(FUNC_ALWAYS) |
250                A2XX_RB_DEPTHCONTROL_Z_ENABLE |
251                A2XX_RB_DEPTHCONTROL_Z_WRITE_ENABLE |
252                A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE;
253      }
254      if (buffers & PIPE_CLEAR_STENCIL) {
255         reg |= A2XX_RB_DEPTHCONTROL_STENCILFUNC(FUNC_ALWAYS) |
256                A2XX_RB_DEPTHCONTROL_STENCIL_ENABLE |
257                A2XX_RB_DEPTHCONTROL_STENCILZPASS(STENCIL_REPLACE);
258      }
259      OUT_RING(ring, reg);
260   }
261
262   OUT_PKT3(ring, CP_SET_CONSTANT, 2);
263   OUT_RING(ring, CP_REG(REG_A2XX_RB_COLORCONTROL));
264   OUT_RING(ring, A2XX_RB_COLORCONTROL_ALPHA_FUNC(FUNC_ALWAYS) |
265                     A2XX_RB_COLORCONTROL_BLEND_DISABLE |
266                     A2XX_RB_COLORCONTROL_ROP_CODE(12) |
267                     A2XX_RB_COLORCONTROL_DITHER_MODE(DITHER_DISABLE) |
268                     A2XX_RB_COLORCONTROL_DITHER_TYPE(DITHER_PIXEL));
269
270   OUT_PKT3(ring, CP_SET_CONSTANT, 3);
271   OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_CLIP_CNTL));
272   OUT_RING(ring, 0x00000000); /* PA_CL_CLIP_CNTL */
273   OUT_RING(
274      ring,
275      A2XX_PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST | /* PA_SU_SC_MODE_CNTL */
276         A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
277         A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE(PC_DRAW_TRIANGLES) |
278         (fast_clear ? A2XX_PA_SU_SC_MODE_CNTL_MSAA_ENABLE : 0));
279
280   if (fast_clear) {
281      OUT_PKT3(ring, CP_SET_CONSTANT, 2);
282      OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_CONFIG));
283      OUT_RING(ring, A2XX_PA_SC_AA_CONFIG_MSAA_NUM_SAMPLES(3));
284   }
285
286   OUT_PKT3(ring, CP_SET_CONSTANT, 2);
287   OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_MASK));
288   OUT_RING(ring, 0x0000ffff);
289
290   OUT_PKT3(ring, CP_SET_CONSTANT, 2);
291   OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_MASK));
292   if (buffers & PIPE_CLEAR_COLOR) {
293      OUT_RING(ring, A2XX_RB_COLOR_MASK_WRITE_RED |
294                        A2XX_RB_COLOR_MASK_WRITE_GREEN |
295                        A2XX_RB_COLOR_MASK_WRITE_BLUE |
296                        A2XX_RB_COLOR_MASK_WRITE_ALPHA);
297   } else {
298      OUT_RING(ring, 0x0);
299   }
300
301   OUT_PKT3(ring, CP_SET_CONSTANT, 2);
302   OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_CONTROL));
303   OUT_RING(ring, 0);
304
305   if (is_a20x(batch->ctx->screen))
306      return;
307
308   OUT_PKT3(ring, CP_SET_CONSTANT, 3);
309   OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
310   OUT_RING(ring, 3); /* VGT_MAX_VTX_INDX */
311   OUT_RING(ring, 0); /* VGT_MIN_VTX_INDX */
312
313   OUT_PKT3(ring, CP_SET_CONSTANT, 3);
314   OUT_RING(ring, CP_REG(REG_A2XX_RB_STENCILREFMASK_BF));
315   OUT_RING(ring,
316            0xff000000 | A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff));
317   OUT_RING(ring, 0xff000000 | A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
318
319   OUT_PKT3(ring, CP_SET_CONSTANT, 2);
320   OUT_RING(ring, CP_REG(REG_A2XX_A220_RB_LRZ_VSC_CONTROL));
321   OUT_RING(ring, 0x00000084);
322
323   OUT_PKT3(ring, CP_SET_CONSTANT, 2);
324   OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
325   OUT_RING(ring, 0x0000028f);
326}
327
328static void
329clear_state_restore(struct fd_context *ctx, struct fd_ringbuffer *ring)
330{
331   if (is_a20x(ctx->screen))
332      return;
333
334   OUT_PKT3(ring, CP_SET_CONSTANT, 2);
335   OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL));
336   OUT_RING(ring, 0x00000000);
337
338   OUT_PKT3(ring, CP_SET_CONSTANT, 2);
339   OUT_RING(ring, CP_REG(REG_A2XX_A220_RB_LRZ_VSC_CONTROL));
340   OUT_RING(ring, 0x00000000);
341
342   OUT_PKT3(ring, CP_SET_CONSTANT, 2);
343   OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
344   OUT_RING(ring, 0x0000003b);
345}
346
347static void
348clear_fast(struct fd_batch *batch, struct fd_ringbuffer *ring,
349           uint32_t color_clear, uint32_t depth_clear, unsigned patch_type)
350{
351   BEGIN_RING(ring, 8); /* preallocate next 2 packets (for patching) */
352
353   /* zero values are patched in */
354   OUT_PKT3(ring, CP_SET_CONSTANT, 2);
355   OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_SCREEN_SCISSOR_BR));
356   OUT_RINGP(ring, patch_type, &batch->gmem_patches);
357
358   OUT_PKT3(ring, CP_SET_CONSTANT, 4);
359   OUT_RING(ring, CP_REG(REG_A2XX_RB_SURFACE_INFO));
360   OUT_RING(ring, 0x8000 | 32);
361   OUT_RING(ring, 0);
362   OUT_RING(ring, 0);
363
364   /* set fill values */
365   if (!is_a20x(batch->ctx->screen)) {
366      OUT_PKT3(ring, CP_SET_CONSTANT, 2);
367      OUT_RING(ring, CP_REG(REG_A2XX_CLEAR_COLOR));
368      OUT_RING(ring, color_clear);
369
370      OUT_PKT3(ring, CP_SET_CONSTANT, 2);
371      OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL));
372      OUT_RING(ring, A2XX_RB_COPY_CONTROL_DEPTH_CLEAR_ENABLE |
373                        A2XX_RB_COPY_CONTROL_CLEAR_MASK(0xf));
374
375      OUT_PKT3(ring, CP_SET_CONSTANT, 2);
376      OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTH_CLEAR));
377      OUT_RING(ring, depth_clear);
378   } else {
379      const float sc = 1.0f / 255.0f;
380
381      OUT_PKT3(ring, CP_SET_CONSTANT, 5);
382      OUT_RING(ring, 0x00000480);
383      OUT_RING(ring, fui((float)(color_clear >> 0 & 0xff) * sc));
384      OUT_RING(ring, fui((float)(color_clear >> 8 & 0xff) * sc));
385      OUT_RING(ring, fui((float)(color_clear >> 16 & 0xff) * sc));
386      OUT_RING(ring, fui((float)(color_clear >> 24 & 0xff) * sc));
387
388      // XXX if using float the rounding error breaks it..
389      float depth = ((double)(depth_clear >> 8)) * (1.0 / (double)0xffffff);
390      assert((unsigned)(((double)depth * (double)0xffffff)) ==
391             (depth_clear >> 8));
392
393      OUT_PKT3(ring, CP_SET_CONSTANT, 3);
394      OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_ZSCALE));
395      OUT_RING(ring, fui(0.0f));
396      OUT_RING(ring, fui(depth));
397
398      OUT_PKT3(ring, CP_SET_CONSTANT, 3);
399      OUT_RING(ring, CP_REG(REG_A2XX_RB_STENCILREFMASK_BF));
400      OUT_RING(ring,
401               0xff000000 |
402                  A2XX_RB_STENCILREFMASK_BF_STENCILREF(depth_clear & 0xff) |
403                  A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff));
404      OUT_RING(ring, 0xff000000 |
405                        A2XX_RB_STENCILREFMASK_STENCILREF(depth_clear & 0xff) |
406                        A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
407   }
408
409   fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
410           DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL);
411}
412
413static bool
414fd2_clear_fast(struct fd_context *ctx, unsigned buffers,
415               const union pipe_color_union *color, double depth,
416               unsigned stencil) assert_dt
417{
418   /* using 4x MSAA allows clearing ~2x faster
419    * then we can use higher bpp clearing to clear lower bpp
420    * 1 "pixel" can clear 64 bits (rgba8+depth24+stencil8)
421    * note: its possible to clear with 32_32_32_32 format but its not faster
422    * note: fast clear doesn't work with sysmem rendering
423    * (sysmem rendering is disabled when clear is used)
424    *
425    * we only have 16-bit / 32-bit color formats
426    * and 16-bit / 32-bit depth formats
427    * so there are only a few possible combinations
428    *
429    * if the bpp of the color/depth doesn't match
430    * we clear with depth/color individually
431    */
432   struct fd2_context *fd2_ctx = fd2_context(ctx);
433   struct fd_batch *batch = ctx->batch;
434   struct fd_ringbuffer *ring = batch->draw;
435   struct pipe_framebuffer_state *pfb = &batch->framebuffer;
436   uint32_t color_clear = 0, depth_clear = 0;
437   enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
438   int depth_size = -1; /* -1: no clear, 0: clear 16-bit, 1: clear 32-bit */
439   int color_size = -1;
440
441   /* TODO: need to test performance on a22x */
442   if (!is_a20x(ctx->screen))
443      return false;
444
445   if (buffers & PIPE_CLEAR_COLOR)
446      color_size = util_format_get_blocksizebits(format) == 32;
447
448   if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
449      /* no fast clear when clearing only one component of depth+stencil buffer */
450      if (!(buffers & PIPE_CLEAR_DEPTH))
451         return false;
452
453      if ((pfb->zsbuf->format == PIPE_FORMAT_Z24_UNORM_S8_UINT ||
454           pfb->zsbuf->format == PIPE_FORMAT_S8_UINT_Z24_UNORM) &&
455          !(buffers & PIPE_CLEAR_STENCIL))
456         return false;
457
458      depth_size = fd_pipe2depth(pfb->zsbuf->format) == DEPTHX_24_8;
459   }
460
461   assert(color_size >= 0 || depth_size >= 0);
462
463   if (color_size == 0) {
464      color_clear = pack_rgba(format, color->f);
465      color_clear = (color_clear << 16) | (color_clear & 0xffff);
466   } else if (color_size == 1) {
467      color_clear = pack_rgba(format, color->f);
468   }
469
470   if (depth_size == 0) {
471      depth_clear = (uint32_t)(0xffff * depth);
472      depth_clear |= depth_clear << 16;
473   } else if (depth_size == 1) {
474      depth_clear = (((uint32_t)(0xffffff * depth)) << 8);
475      depth_clear |= (stencil & 0xff);
476   }
477
478   /* disable "window" scissor.. */
479   OUT_PKT3(ring, CP_SET_CONSTANT, 3);
480   OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL));
481   OUT_RING(ring, xy2d(0, 0));
482   OUT_RING(ring, xy2d(0x7fff, 0x7fff));
483
484   /* make sure we fill all "pixels" (in SCREEN_SCISSOR) */
485   OUT_PKT3(ring, CP_SET_CONSTANT, 5);
486   OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE));
487   OUT_RING(ring, fui(4096.0f));
488   OUT_RING(ring, fui(4096.0f));
489   OUT_RING(ring, fui(4096.0f));
490   OUT_RING(ring, fui(4096.0f));
491
492   clear_state(batch, ring, ~0u, true);
493
494   if (color_size >= 0 && depth_size != color_size)
495      clear_fast(batch, ring, color_clear, color_clear,
496                 GMEM_PATCH_FASTCLEAR_COLOR);
497
498   if (depth_size >= 0 && depth_size != color_size)
499      clear_fast(batch, ring, depth_clear, depth_clear,
500                 GMEM_PATCH_FASTCLEAR_DEPTH);
501
502   if (depth_size == color_size)
503      clear_fast(batch, ring, color_clear, depth_clear,
504                 GMEM_PATCH_FASTCLEAR_COLOR_DEPTH);
505
506   clear_state_restore(ctx, ring);
507
508   OUT_PKT3(ring, CP_SET_CONSTANT, 2);
509   OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_CONFIG));
510   OUT_RING(ring, 0);
511
512   /* can't patch in SCREEN_SCISSOR_BR as it can be different for each tile.
513    * MEM_WRITE the value in tile_renderprep, and use CP_LOAD_CONSTANT_CONTEXT
514    * the value is read from byte offset 60 in the given bo
515    */
516   OUT_PKT3(ring, CP_LOAD_CONSTANT_CONTEXT, 3);
517   OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 0, 0, 0);
518   OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_SCREEN_SCISSOR_BR));
519   OUT_RING(ring, 1);
520
521   OUT_PKT3(ring, CP_SET_CONSTANT, 4);
522   OUT_RING(ring, CP_REG(REG_A2XX_RB_SURFACE_INFO));
523   OUT_RINGP(ring, GMEM_PATCH_RESTORE_INFO, &batch->gmem_patches);
524   OUT_RING(ring, 0);
525   OUT_RING(ring, 0);
526   return true;
527}
528
529static bool
530fd2_clear(struct fd_context *ctx, unsigned buffers,
531          const union pipe_color_union *color, double depth,
532          unsigned stencil) assert_dt
533{
534   struct fd_ringbuffer *ring = ctx->batch->draw;
535   struct pipe_framebuffer_state *fb = &ctx->batch->framebuffer;
536
537   if (fd2_clear_fast(ctx, buffers, color, depth, stencil))
538      goto dirty;
539
540   /* set clear value */
541   if (is_a20x(ctx->screen)) {
542      if (buffers & PIPE_CLEAR_COLOR) {
543         /* C0 used by fragment shader */
544         OUT_PKT3(ring, CP_SET_CONSTANT, 5);
545         OUT_RING(ring, 0x00000480);
546         OUT_RING(ring, color->ui[0]);
547         OUT_RING(ring, color->ui[1]);
548         OUT_RING(ring, color->ui[2]);
549         OUT_RING(ring, color->ui[3]);
550      }
551
552      if (buffers & PIPE_CLEAR_DEPTH) {
553         /* use viewport to set depth value */
554         OUT_PKT3(ring, CP_SET_CONSTANT, 3);
555         OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_ZSCALE));
556         OUT_RING(ring, fui(0.0f));
557         OUT_RING(ring, fui(depth));
558      }
559
560      if (buffers & PIPE_CLEAR_STENCIL) {
561         OUT_PKT3(ring, CP_SET_CONSTANT, 3);
562         OUT_RING(ring, CP_REG(REG_A2XX_RB_STENCILREFMASK_BF));
563         OUT_RING(ring, 0xff000000 |
564                           A2XX_RB_STENCILREFMASK_BF_STENCILREF(stencil) |
565                           A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff));
566         OUT_RING(ring, 0xff000000 |
567                           A2XX_RB_STENCILREFMASK_STENCILREF(stencil) |
568                           A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
569      }
570   } else {
571      if (buffers & PIPE_CLEAR_COLOR) {
572         OUT_PKT3(ring, CP_SET_CONSTANT, 2);
573         OUT_RING(ring, CP_REG(REG_A2XX_CLEAR_COLOR));
574         OUT_RING(ring, pack_rgba(PIPE_FORMAT_R8G8B8A8_UNORM, color->f));
575      }
576
577      if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
578         uint32_t clear_mask, depth_clear;
579         switch (fd_pipe2depth(fb->zsbuf->format)) {
580         case DEPTHX_24_8:
581            clear_mask = ((buffers & PIPE_CLEAR_DEPTH) ? 0xe : 0) |
582                         ((buffers & PIPE_CLEAR_STENCIL) ? 0x1 : 0);
583            depth_clear =
584               (((uint32_t)(0xffffff * depth)) << 8) | (stencil & 0xff);
585            break;
586         case DEPTHX_16:
587            clear_mask = 0xf;
588            depth_clear = (uint32_t)(0xffffffff * depth);
589            break;
590         default:
591            unreachable("invalid depth");
592            break;
593         }
594
595         OUT_PKT3(ring, CP_SET_CONSTANT, 2);
596         OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL));
597         OUT_RING(ring, A2XX_RB_COPY_CONTROL_DEPTH_CLEAR_ENABLE |
598                           A2XX_RB_COPY_CONTROL_CLEAR_MASK(clear_mask));
599
600         OUT_PKT3(ring, CP_SET_CONSTANT, 2);
601         OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTH_CLEAR));
602         OUT_RING(ring, depth_clear);
603      }
604   }
605
606   /* scissor state */
607   OUT_PKT3(ring, CP_SET_CONSTANT, 3);
608   OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL));
609   OUT_RING(ring, xy2d(0, 0));
610   OUT_RING(ring, xy2d(fb->width, fb->height));
611
612   /* viewport state */
613   OUT_PKT3(ring, CP_SET_CONSTANT, 5);
614   OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE));
615   OUT_RING(ring, fui((float)fb->width / 2.0f));
616   OUT_RING(ring, fui((float)fb->width / 2.0f));
617   OUT_RING(ring, fui((float)fb->height / 2.0f));
618   OUT_RING(ring, fui((float)fb->height / 2.0f));
619
620   /* common state */
621   clear_state(ctx->batch, ring, buffers, false);
622
623   fd_draw(ctx->batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
624           DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL);
625
626   clear_state_restore(ctx, ring);
627
628dirty:
629   ctx->dirty |= FD_DIRTY_ZSA | FD_DIRTY_VIEWPORT | FD_DIRTY_RASTERIZER |
630                 FD_DIRTY_SAMPLE_MASK | FD_DIRTY_PROG | FD_DIRTY_CONST |
631                 FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR;
632
633   ctx->dirty_shader[PIPE_SHADER_VERTEX] |= FD_DIRTY_SHADER_PROG;
634   ctx->dirty_shader[PIPE_SHADER_FRAGMENT] |=
635      FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_CONST;
636
637   return true;
638}
639
640void
641fd2_draw_init(struct pipe_context *pctx) disable_thread_safety_analysis
642{
643   struct fd_context *ctx = fd_context(pctx);
644   ctx->draw_vbo = fd2_draw_vbo;
645   ctx->clear = fd2_clear;
646}
647