1/*
2 * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 *    Rob Clark <robclark@freedesktop.org>
25 */
26
27#include "pipe/p_state.h"
28#include "util/format/u_format.h"
29#include "util/u_draw.h"
30#include "util/u_helpers.h"
31#include "util/u_memory.h"
32#include "util/u_prim.h"
33#include "util/u_string.h"
34
35#include "freedreno_blitter.h"
36#include "freedreno_context.h"
37#include "freedreno_draw.h"
38#include "freedreno_fence.h"
39#include "freedreno_query_acc.h"
40#include "freedreno_query_hw.h"
41#include "freedreno_resource.h"
42#include "freedreno_state.h"
43#include "freedreno_util.h"
44
45static void
46resource_read(struct fd_batch *batch, struct pipe_resource *prsc) assert_dt
47{
48   if (!prsc)
49      return;
50   fd_batch_resource_read(batch, fd_resource(prsc));
51}
52
53static void
54resource_written(struct fd_batch *batch, struct pipe_resource *prsc) assert_dt
55{
56   if (!prsc)
57      return;
58   fd_batch_resource_write(batch, fd_resource(prsc));
59}
60
61static void
62batch_draw_tracking_for_dirty_bits(struct fd_batch *batch) assert_dt
63{
64   struct fd_context *ctx = batch->ctx;
65   struct pipe_framebuffer_state *pfb = &batch->framebuffer;
66   unsigned buffers = 0, restore_buffers = 0;
67
68   if (ctx->dirty & (FD_DIRTY_FRAMEBUFFER | FD_DIRTY_ZSA)) {
69      if (fd_depth_enabled(ctx)) {
70         if (fd_resource(pfb->zsbuf->texture)->valid) {
71            restore_buffers |= FD_BUFFER_DEPTH;
72            /* storing packed d/s depth also stores stencil, so we need
73             * the stencil restored too to avoid invalidating it.
74             */
75            if (pfb->zsbuf->texture->format == PIPE_FORMAT_Z24_UNORM_S8_UINT)
76               restore_buffers |= FD_BUFFER_STENCIL;
77         } else {
78            batch->invalidated |= FD_BUFFER_DEPTH;
79         }
80         batch->gmem_reason |= FD_GMEM_DEPTH_ENABLED;
81         if (fd_depth_write_enabled(ctx)) {
82            buffers |= FD_BUFFER_DEPTH;
83            resource_written(batch, pfb->zsbuf->texture);
84         } else {
85            resource_read(batch, pfb->zsbuf->texture);
86         }
87      }
88
89      if (fd_stencil_enabled(ctx)) {
90         if (fd_resource(pfb->zsbuf->texture)->valid) {
91            restore_buffers |= FD_BUFFER_STENCIL;
92            /* storing packed d/s stencil also stores depth, so we need
93             * the depth restored too to avoid invalidating it.
94             */
95            if (pfb->zsbuf->texture->format == PIPE_FORMAT_Z24_UNORM_S8_UINT)
96               restore_buffers |= FD_BUFFER_DEPTH;
97         } else {
98            batch->invalidated |= FD_BUFFER_STENCIL;
99         }
100         batch->gmem_reason |= FD_GMEM_STENCIL_ENABLED;
101         buffers |= FD_BUFFER_STENCIL;
102         resource_written(batch, pfb->zsbuf->texture);
103      }
104   }
105
106   if (ctx->dirty & FD_DIRTY_FRAMEBUFFER) {
107      for (unsigned i = 0; i < pfb->nr_cbufs; i++) {
108         struct pipe_resource *surf;
109
110         if (!pfb->cbufs[i])
111            continue;
112
113         surf = pfb->cbufs[i]->texture;
114
115         if (fd_resource(surf)->valid) {
116            restore_buffers |= PIPE_CLEAR_COLOR0 << i;
117         } else {
118            batch->invalidated |= PIPE_CLEAR_COLOR0 << i;
119         }
120
121         buffers |= PIPE_CLEAR_COLOR0 << i;
122
123         if (ctx->dirty & FD_DIRTY_FRAMEBUFFER)
124            resource_written(batch, pfb->cbufs[i]->texture);
125      }
126   }
127
128   if (ctx->dirty & FD_DIRTY_BLEND) {
129      if (ctx->blend->logicop_enable)
130         batch->gmem_reason |= FD_GMEM_LOGICOP_ENABLED;
131      for (unsigned i = 0; i < pfb->nr_cbufs; i++) {
132         if (ctx->blend->rt[i].blend_enable)
133            batch->gmem_reason |= FD_GMEM_BLEND_ENABLED;
134      }
135   }
136
137   /* Mark SSBOs */
138   if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_SSBO) {
139      const struct fd_shaderbuf_stateobj *so =
140         &ctx->shaderbuf[PIPE_SHADER_FRAGMENT];
141
142      u_foreach_bit (i, so->enabled_mask & so->writable_mask)
143         resource_written(batch, so->sb[i].buffer);
144
145      u_foreach_bit (i, so->enabled_mask & ~so->writable_mask)
146         resource_read(batch, so->sb[i].buffer);
147   }
148
149   if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_IMAGE) {
150      u_foreach_bit (i, ctx->shaderimg[PIPE_SHADER_FRAGMENT].enabled_mask) {
151         struct pipe_image_view *img =
152            &ctx->shaderimg[PIPE_SHADER_FRAGMENT].si[i];
153         if (img->access & PIPE_IMAGE_ACCESS_WRITE)
154            resource_written(batch, img->resource);
155         else
156            resource_read(batch, img->resource);
157      }
158   }
159
160   u_foreach_bit (s, ctx->bound_shader_stages) {
161      /* Mark constbuf as being read: */
162      if (ctx->dirty_shader[s] & FD_DIRTY_SHADER_CONST) {
163         u_foreach_bit (i, ctx->constbuf[s].enabled_mask)
164            resource_read(batch, ctx->constbuf[s].cb[i].buffer);
165      }
166
167      /* Mark textures as being read */
168      if (ctx->dirty_shader[s] & FD_DIRTY_SHADER_TEX) {
169         u_foreach_bit (i, ctx->tex[s].valid_textures)
170            resource_read(batch, ctx->tex[s].textures[i]->texture);
171      }
172   }
173
174   /* Mark VBOs as being read */
175   if (ctx->dirty & FD_DIRTY_VTXBUF) {
176      u_foreach_bit (i, ctx->vtx.vertexbuf.enabled_mask) {
177         assert(!ctx->vtx.vertexbuf.vb[i].is_user_buffer);
178         resource_read(batch, ctx->vtx.vertexbuf.vb[i].buffer.resource);
179      }
180   }
181
182   /* Mark streamout buffers as being written.. */
183   if (ctx->dirty & FD_DIRTY_STREAMOUT) {
184      for (unsigned i = 0; i < ctx->streamout.num_targets; i++)
185         if (ctx->streamout.targets[i])
186            resource_written(batch, ctx->streamout.targets[i]->buffer);
187   }
188
189   /* any buffers that haven't been cleared yet, we need to restore: */
190   batch->restore |= restore_buffers & (FD_BUFFER_ALL & ~batch->invalidated);
191   /* and any buffers used, need to be resolved: */
192   batch->resolve |= buffers;
193}
194
195static void
196batch_draw_tracking(struct fd_batch *batch, const struct pipe_draw_info *info,
197                    const struct pipe_draw_indirect_info *indirect) assert_dt
198{
199   struct fd_context *ctx = batch->ctx;
200
201   /* NOTE: needs to be before resource_written(batch->query_buf), otherwise
202    * query_buf may not be created yet.
203    */
204   fd_batch_update_queries(batch);
205
206   /*
207    * Figure out the buffers/features we need:
208    */
209
210   fd_screen_lock(ctx->screen);
211
212   if (ctx->dirty & FD_DIRTY_RESOURCE)
213      batch_draw_tracking_for_dirty_bits(batch);
214
215   /* Mark index buffer as being read */
216   if (info->index_size)
217      resource_read(batch, info->index.resource);
218
219   /* Mark indirect draw buffer as being read */
220   if (indirect) {
221      if (indirect->buffer)
222         resource_read(batch, indirect->buffer);
223      if (indirect->count_from_stream_output)
224         resource_read(
225            batch, fd_stream_output_target(indirect->count_from_stream_output)
226                      ->offset_buf);
227   }
228
229   resource_written(batch, batch->query_buf);
230
231   list_for_each_entry (struct fd_acc_query, aq, &ctx->acc_active_queries, node)
232      resource_written(batch, aq->prsc);
233
234   fd_screen_unlock(ctx->screen);
235}
236
237static void
238update_draw_stats(struct fd_context *ctx, const struct pipe_draw_info *info,
239                  const struct pipe_draw_start_count_bias *draws,
240                  unsigned num_draws) assert_dt
241{
242   ctx->stats.draw_calls++;
243
244   if (ctx->screen->gen < 6) {
245      /* Counting prims in sw doesn't work for GS and tesselation. For older
246       * gens we don't have those stages and don't have the hw counters enabled,
247       * so keep the count accurate for non-patch geometry.
248       */
249      unsigned prims = 0;
250      if ((info->mode != PIPE_PRIM_PATCHES) && (info->mode != PIPE_PRIM_MAX)) {
251         for (unsigned i = 0; i < num_draws; i++) {
252            prims += u_reduced_prims_for_vertices(info->mode, draws[i].count);
253         }
254      }
255
256      ctx->stats.prims_generated += prims;
257
258      if (ctx->streamout.num_targets > 0) {
259         /* Clip the prims we're writing to the size of the SO buffers. */
260         enum pipe_prim_type tf_prim = u_decomposed_prim(info->mode);
261         unsigned verts_written = u_vertices_for_prims(tf_prim, prims);
262         unsigned remaining_vert_space =
263            ctx->streamout.max_tf_vtx - ctx->streamout.verts_written;
264         if (verts_written > remaining_vert_space) {
265            verts_written = remaining_vert_space;
266            u_trim_pipe_prim(tf_prim, &remaining_vert_space);
267         }
268         ctx->streamout.verts_written += verts_written;
269
270         ctx->stats.prims_emitted +=
271            u_reduced_prims_for_vertices(tf_prim, verts_written);
272      }
273   }
274}
275
276static void
277fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
278		unsigned drawid_offset,
279            const struct pipe_draw_indirect_info *indirect,
280            const struct pipe_draw_start_count_bias *draws, unsigned num_draws) in_dt
281{
282   struct fd_context *ctx = fd_context(pctx);
283
284   /* for debugging problems with indirect draw, it is convenient
285    * to be able to emulate it, to determine if game is feeding us
286    * bogus data:
287    */
288   if (indirect && indirect->buffer && FD_DBG(NOINDR)) {
289      /* num_draws is only applicable for direct draws: */
290      assert(num_draws == 1);
291      util_draw_indirect(pctx, info, indirect);
292      return;
293   }
294
295   /* TODO: push down the region versions into the tiles */
296   if (!fd_render_condition_check(pctx))
297      return;
298
299   /* Upload a user index buffer. */
300   struct pipe_resource *indexbuf = NULL;
301   unsigned index_offset = 0;
302   struct pipe_draw_info new_info;
303   if (info->index_size) {
304      if (info->has_user_indices) {
305         if (num_draws > 1) {
306            util_draw_multi(pctx, info, drawid_offset, indirect, draws, num_draws);
307            return;
308         }
309         if (!util_upload_index_buffer(pctx, info, &draws[0], &indexbuf,
310                                       &index_offset, 4))
311            return;
312         new_info = *info;
313         new_info.index.resource = indexbuf;
314         new_info.has_user_indices = false;
315         info = &new_info;
316      } else {
317         indexbuf = info->index.resource;
318      }
319   }
320
321   if ((ctx->streamout.num_targets > 0) && (num_draws > 1)) {
322      util_draw_multi(pctx, info, drawid_offset, indirect, draws, num_draws);
323      return;
324   }
325
326   struct fd_batch *batch = fd_context_batch(ctx);
327
328   batch_draw_tracking(batch, info, indirect);
329
330   while (unlikely(!fd_batch_lock_submit(batch))) {
331      /* The current batch was flushed in batch_draw_tracking()
332       * so start anew.  We know this won't happen a second time
333       * since we are dealing with a fresh batch:
334       */
335      fd_batch_reference(&batch, NULL);
336      batch = fd_context_batch(ctx);
337      batch_draw_tracking(batch, info, indirect);
338      assert(ctx->batch == batch);
339   }
340
341   batch->num_draws++;
342
343   /* Marking the batch as needing flush must come after the batch
344    * dependency tracking (resource_read()/resource_write()), as that
345    * can trigger a flush
346    */
347   fd_batch_needs_flush(batch);
348
349   struct pipe_framebuffer_state *pfb = &batch->framebuffer;
350   DBG("%p: %ux%u num_draws=%u (%s/%s)", batch, pfb->width, pfb->height,
351       batch->num_draws,
352       util_format_short_name(pipe_surface_format(pfb->cbufs[0])),
353       util_format_short_name(pipe_surface_format(pfb->zsbuf)));
354
355   batch->cost += ctx->draw_cost;
356
357   for (unsigned i = 0; i < num_draws; i++) {
358      ctx->draw_vbo(ctx, info, drawid_offset, indirect, &draws[i], index_offset);
359
360      batch->num_vertices += draws[i].count * info->instance_count;
361   }
362
363   if (unlikely(ctx->stats_users > 0))
364      update_draw_stats(ctx, info, draws, num_draws);
365
366   for (unsigned i = 0; i < ctx->streamout.num_targets; i++) {
367      assert(num_draws == 1);
368      ctx->streamout.offsets[i] += draws[0].count;
369   }
370
371   if (FD_DBG(DDRAW))
372      fd_context_all_dirty(ctx);
373
374   assert(!batch->flushed);
375
376   fd_batch_unlock_submit(batch);
377   fd_batch_check_size(batch);
378   fd_batch_reference(&batch, NULL);
379
380   if (info == &new_info)
381      pipe_resource_reference(&indexbuf, NULL);
382}
383
384static void
385batch_clear_tracking(struct fd_batch *batch, unsigned buffers) assert_dt
386{
387   struct fd_context *ctx = batch->ctx;
388   struct pipe_framebuffer_state *pfb = &batch->framebuffer;
389   unsigned cleared_buffers;
390
391   /* pctx->clear() is only for full-surface clears, so scissor is
392    * equivalent to having GL_SCISSOR_TEST disabled:
393    */
394   batch->max_scissor.minx = 0;
395   batch->max_scissor.miny = 0;
396   batch->max_scissor.maxx = pfb->width;
397   batch->max_scissor.maxy = pfb->height;
398
399   /* for bookkeeping about which buffers have been cleared (and thus
400    * can fully or partially skip mem2gmem) we need to ignore buffers
401    * that have already had a draw, in case apps do silly things like
402    * clear after draw (ie. if you only clear the color buffer, but
403    * something like alpha-test causes side effects from the draw in
404    * the depth buffer, etc)
405    */
406   cleared_buffers = buffers & (FD_BUFFER_ALL & ~batch->restore);
407   batch->cleared |= buffers;
408   batch->invalidated |= cleared_buffers;
409
410   batch->resolve |= buffers;
411
412   fd_screen_lock(ctx->screen);
413
414   if (buffers & PIPE_CLEAR_COLOR)
415      for (unsigned i = 0; i < pfb->nr_cbufs; i++)
416         if (buffers & (PIPE_CLEAR_COLOR0 << i))
417            resource_written(batch, pfb->cbufs[i]->texture);
418
419   if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
420      resource_written(batch, pfb->zsbuf->texture);
421      batch->gmem_reason |= FD_GMEM_CLEARS_DEPTH_STENCIL;
422   }
423
424   resource_written(batch, batch->query_buf);
425
426   list_for_each_entry (struct fd_acc_query, aq, &ctx->acc_active_queries, node)
427      resource_written(batch, aq->prsc);
428
429   fd_screen_unlock(ctx->screen);
430}
431
432static void
433fd_clear(struct pipe_context *pctx, unsigned buffers,
434         const struct pipe_scissor_state *scissor_state,
435         const union pipe_color_union *color, double depth,
436         unsigned stencil) in_dt
437{
438   struct fd_context *ctx = fd_context(pctx);
439
440   /* TODO: push down the region versions into the tiles */
441   if (!fd_render_condition_check(pctx))
442      return;
443
444   struct fd_batch *batch = fd_context_batch(ctx);
445
446   batch_clear_tracking(batch, buffers);
447
448   while (unlikely(!fd_batch_lock_submit(batch))) {
449      /* The current batch was flushed in batch_clear_tracking()
450       * so start anew.  We know this won't happen a second time
451       * since we are dealing with a fresh batch:
452       */
453      fd_batch_reference(&batch, NULL);
454      batch = fd_context_batch(ctx);
455      batch_clear_tracking(batch, buffers);
456      assert(ctx->batch == batch);
457   }
458
459   /* Marking the batch as needing flush must come after the batch
460    * dependency tracking (resource_read()/resource_write()), as that
461    * can trigger a flush
462    */
463   fd_batch_needs_flush(batch);
464
465   struct pipe_framebuffer_state *pfb = &batch->framebuffer;
466   DBG("%p: %x %ux%u depth=%f, stencil=%u (%s/%s)", batch, buffers, pfb->width,
467       pfb->height, depth, stencil,
468       util_format_short_name(pipe_surface_format(pfb->cbufs[0])),
469       util_format_short_name(pipe_surface_format(pfb->zsbuf)));
470
471   /* if per-gen backend doesn't implement ctx->clear() generic
472    * blitter clear:
473    */
474   bool fallback = true;
475
476   if (ctx->clear) {
477      fd_batch_update_queries(batch);
478
479      if (ctx->clear(ctx, buffers, color, depth, stencil)) {
480         if (FD_DBG(DCLEAR))
481            fd_context_all_dirty(ctx);
482
483         fallback = false;
484      }
485   }
486
487   assert(!batch->flushed);
488
489   fd_batch_unlock_submit(batch);
490
491   if (fallback) {
492      fd_blitter_clear(pctx, buffers, color, depth, stencil);
493   }
494
495   fd_batch_check_size(batch);
496
497   fd_batch_reference(&batch, NULL);
498}
499
500static void
501fd_clear_render_target(struct pipe_context *pctx, struct pipe_surface *ps,
502                       const union pipe_color_union *color, unsigned x,
503                       unsigned y, unsigned w, unsigned h,
504                       bool render_condition_enabled)
505{
506   DBG("TODO: x=%u, y=%u, w=%u, h=%u", x, y, w, h);
507}
508
509static void
510fd_clear_depth_stencil(struct pipe_context *pctx, struct pipe_surface *ps,
511                       unsigned buffers, double depth, unsigned stencil,
512                       unsigned x, unsigned y, unsigned w, unsigned h,
513                       bool render_condition_enabled)
514{
515   DBG("TODO: buffers=%u, depth=%f, stencil=%u, x=%u, y=%u, w=%u, h=%u",
516       buffers, depth, stencil, x, y, w, h);
517}
518
519static void
520fd_launch_grid(struct pipe_context *pctx,
521               const struct pipe_grid_info *info) in_dt
522{
523   struct fd_context *ctx = fd_context(pctx);
524   const struct fd_shaderbuf_stateobj *so =
525      &ctx->shaderbuf[PIPE_SHADER_COMPUTE];
526   struct fd_batch *batch, *save_batch = NULL;
527
528   batch = fd_bc_alloc_batch(ctx, true);
529   fd_batch_reference(&save_batch, ctx->batch);
530   fd_batch_reference(&ctx->batch, batch);
531   fd_context_all_dirty(ctx);
532
533   fd_screen_lock(ctx->screen);
534
535   /* Mark SSBOs */
536   u_foreach_bit (i, so->enabled_mask & so->writable_mask)
537      resource_written(batch, so->sb[i].buffer);
538
539   u_foreach_bit (i, so->enabled_mask & ~so->writable_mask)
540      resource_read(batch, so->sb[i].buffer);
541
542   u_foreach_bit (i, ctx->shaderimg[PIPE_SHADER_COMPUTE].enabled_mask) {
543      struct pipe_image_view *img = &ctx->shaderimg[PIPE_SHADER_COMPUTE].si[i];
544      if (img->access & PIPE_IMAGE_ACCESS_WRITE)
545         resource_written(batch, img->resource);
546      else
547         resource_read(batch, img->resource);
548   }
549
550   /* UBO's are read */
551   u_foreach_bit (i, ctx->constbuf[PIPE_SHADER_COMPUTE].enabled_mask)
552      resource_read(batch, ctx->constbuf[PIPE_SHADER_COMPUTE].cb[i].buffer);
553
554   /* Mark textures as being read */
555   u_foreach_bit (i, ctx->tex[PIPE_SHADER_COMPUTE].valid_textures)
556      resource_read(batch, ctx->tex[PIPE_SHADER_COMPUTE].textures[i]->texture);
557
558   /* For global buffers, we don't really know if read or written, so assume
559    * the worst:
560    */
561   u_foreach_bit (i, ctx->global_bindings.enabled_mask)
562      resource_written(batch, ctx->global_bindings.buf[i]);
563
564   if (info->indirect)
565      resource_read(batch, info->indirect);
566
567   fd_screen_unlock(ctx->screen);
568
569   DBG("%p: work_dim=%u, block=%ux%ux%u, grid=%ux%ux%u",
570       batch, info->work_dim,
571       info->block[0], info->block[1], info->block[2],
572       info->grid[0], info->grid[1], info->grid[2]);
573
574   fd_batch_needs_flush(batch);
575   ctx->launch_grid(ctx, info);
576
577   fd_batch_flush(batch);
578
579   fd_batch_reference(&ctx->batch, save_batch);
580   fd_context_all_dirty(ctx);
581   fd_batch_reference(&save_batch, NULL);
582   fd_batch_reference(&batch, NULL);
583}
584
585void
586fd_draw_init(struct pipe_context *pctx)
587{
588   pctx->draw_vbo = fd_draw_vbo;
589   pctx->clear = fd_clear;
590   pctx->clear_render_target = fd_clear_render_target;
591   pctx->clear_depth_stencil = fd_clear_depth_stencil;
592
593   if (has_compute(fd_screen(pctx->screen))) {
594      pctx->launch_grid = fd_launch_grid;
595   }
596}
597