1/*
2 * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 *    Rob Clark <robclark@freedesktop.org>
25 */
26
27#ifndef FREEDRENO_BATCH_H_
28#define FREEDRENO_BATCH_H_
29
30#include "util/list.h"
31#include "util/simple_mtx.h"
32#include "util/u_inlines.h"
33#include "util/u_queue.h"
34#include "util/perf/u_trace.h"
35
36#include "freedreno_context.h"
37#include "freedreno_fence.h"
38#include "freedreno_util.h"
39
40#ifdef __cplusplus
41extern "C" {
42#endif
43
44struct fd_resource;
45struct fd_batch_key;
46struct fd_batch_result;
47
48/* A batch tracks everything about a cmdstream batch/submit, including the
49 * ringbuffers used for binning, draw, and gmem cmds, list of associated
50 * fd_resource-s, etc.
51 */
52struct fd_batch {
53   struct pipe_reference reference;
54   unsigned seqno;
55   unsigned idx; /* index into cache->batches[] */
56
57   struct u_trace trace;
58
59   /* To detect cases where we can skip cmdstream to record timestamp: */
60   uint32_t *last_timestamp_cmd;
61
62   int in_fence_fd;
63   struct pipe_fence_handle *fence;
64
65   struct fd_context *ctx;
66
67   /* emit_lock serializes cmdstream emission and flush.  Acquire before
68    * screen->lock.
69    */
70   simple_mtx_t submit_lock;
71
72   /* do we need to mem2gmem before rendering.  We don't, if for example,
73    * there was a glClear() that invalidated the entire previous buffer
74    * contents.  Keep track of which buffer(s) are cleared, or needs
75    * restore.  Masks of PIPE_CLEAR_*
76    *
77    * The 'cleared' bits will be set for buffers which are *entirely*
78    * cleared, and 'partial_cleared' bits will be set if you must
79    * check cleared_scissor.
80    *
81    * The 'invalidated' bits are set for cleared buffers, and buffers
82    * where the contents are undefined, ie. what we don't need to restore
83    * to gmem.
84    */
85   enum {
86      /* align bitmask values w/ PIPE_CLEAR_*.. since that is convenient.. */
87      FD_BUFFER_COLOR = PIPE_CLEAR_COLOR,
88      FD_BUFFER_DEPTH = PIPE_CLEAR_DEPTH,
89      FD_BUFFER_STENCIL = PIPE_CLEAR_STENCIL,
90      FD_BUFFER_ALL = FD_BUFFER_COLOR | FD_BUFFER_DEPTH | FD_BUFFER_STENCIL,
91   } invalidated, cleared, fast_cleared, restore, resolve;
92
93   /* is this a non-draw batch (ie compute/blit which has no pfb state)? */
94   bool nondraw : 1;
95   bool needs_flush : 1;
96   bool flushed : 1;
97   bool tessellation : 1; /* tessellation used in batch */
98
99   /* Keep track if WAIT_FOR_IDLE is needed for registers we need
100    * to update via RMW:
101    */
102   bool needs_wfi : 1;
103
104   /* To decide whether to render to system memory, keep track of the
105    * number of draws, and whether any of them require multisample,
106    * depth_test (or depth write), stencil_test, blending, and
107    * color_logic_Op (since those functions are disabled when by-
108    * passing GMEM.
109    */
110   enum fd_gmem_reason gmem_reason;
111
112   /* At submit time, once we've decided that this batch will use GMEM
113    * rendering, the appropriate gmem state is looked up:
114    */
115   const struct fd_gmem_stateobj *gmem_state;
116
117   /* A calculated "draw cost" value for the batch, which tries to
118    * estimate the bandwidth-per-sample of all the draws according
119    * to:
120    *
121    *    foreach_draw (...) {
122    *      cost += num_mrt;
123    *      if (blend_enabled)
124    *        cost += num_mrt;
125    *      if (depth_test_enabled)
126    *        cost++;
127    *      if (depth_write_enabled)
128    *        cost++;
129    *    }
130    *
131    * The idea is that each sample-passed minimally does one write
132    * per MRT.  If blend is enabled, the hw will additionally do
133    * a framebuffer read per sample-passed (for each MRT with blend
134    * enabled).  If depth-test is enabled, the hw will additionally
135    * a depth buffer read.  If depth-write is enable, the hw will
136    * additionally do a depth buffer write.
137    *
138    * This does ignore depth buffer traffic for samples which do not
139    * pass do to depth-test fail, and some other details.  But it is
140    * just intended to be a rough estimate that is easy to calculate.
141    */
142   unsigned cost;
143
144   /* Tells the gen specific backend where to write stats used for
145    * the autotune module.
146    *
147    * Pointer only valid during gmem emit code.
148    */
149   struct fd_batch_result *autotune_result;
150
151   unsigned num_draws;    /* number of draws in current batch */
152   unsigned num_vertices; /* number of vertices in current batch */
153
154   /* Currently only used on a6xx, to calculate vsc prim/draw stream
155    * sizes:
156    */
157   unsigned num_bins_per_pipe;
158   unsigned prim_strm_bits;
159   unsigned draw_strm_bits;
160
161   /* Track the maximal bounds of the scissor of all the draws within a
162    * batch.  Used at the tile rendering step (fd_gmem_render_tiles(),
163    * mem2gmem/gmem2mem) to avoid needlessly moving data in/out of gmem.
164    */
165   struct pipe_scissor_state max_scissor;
166
167   /* Keep track of DRAW initiators that need to be patched up depending
168    * on whether we using binning or not:
169    */
170   struct util_dynarray draw_patches;
171
172   /* texture state that needs patching for fb_read: */
173   struct util_dynarray fb_read_patches;
174
175   /* Keep track of writes to RB_RENDER_CONTROL which need to be patched
176    * once we know whether or not to use GMEM, and GMEM tile pitch.
177    *
178    * (only for a3xx.. but having gen specific subclasses of fd_batch
179    * seemed overkill for now)
180    */
181   struct util_dynarray rbrc_patches;
182
183   /* Keep track of GMEM related values that need to be patched up once we
184    * know the gmem layout:
185    */
186   struct util_dynarray gmem_patches;
187
188   /* Keep track of pointer to start of MEM exports for a20x binning shaders
189    *
190    * this is so the end of the shader can be cut off at the right point
191    * depending on the GMEM configuration
192    */
193   struct util_dynarray shader_patches;
194
195   struct pipe_framebuffer_state framebuffer;
196
197   struct fd_submit *submit;
198
199   /** draw pass cmdstream: */
200   struct fd_ringbuffer *draw;
201   /** binning pass cmdstream: */
202   struct fd_ringbuffer *binning;
203   /** tiling/gmem (IB0) cmdstream: */
204   struct fd_ringbuffer *gmem;
205
206   /** preemble cmdstream (executed once before first tile): */
207   struct fd_ringbuffer *prologue;
208
209   /** epilogue cmdstream (executed after each tile): */
210   struct fd_ringbuffer *epilogue;
211
212   struct fd_ringbuffer *tile_setup;
213   struct fd_ringbuffer *tile_fini;
214
215   union pipe_color_union clear_color[MAX_RENDER_TARGETS];
216   double clear_depth;
217   unsigned clear_stencil;
218
219   /**
220    * hw query related state:
221    */
222   /*@{*/
223   /* next sample offset.. incremented for each sample in the batch/
224    * submit, reset to zero on next submit.
225    */
226   uint32_t next_sample_offset;
227
228   /* cached samples (in case multiple queries need to reference
229    * the same sample snapshot)
230    */
231   struct fd_hw_sample *sample_cache[MAX_HW_SAMPLE_PROVIDERS];
232
233   /* which sample providers were used in the current batch: */
234   uint32_t query_providers_used;
235
236   /* which sample providers are currently enabled in the batch: */
237   uint32_t query_providers_active;
238
239   /* list of samples in current batch: */
240   struct util_dynarray samples;
241
242   /* current query result bo and tile stride: */
243   struct pipe_resource *query_buf;
244   uint32_t query_tile_stride;
245   /*@}*/
246
247   /* Set of resources used by currently-unsubmitted batch (read or
248    * write).. does not hold a reference to the resource.
249    */
250   struct set *resources;
251
252   /** key in batch-cache (if not null): */
253   struct fd_batch_key *key;
254   uint32_t hash;
255
256   /** set of dependent batches.. holds refs to dependent batches: */
257   uint32_t dependents_mask;
258};
259
260struct fd_batch *fd_batch_create(struct fd_context *ctx, bool nondraw);
261
262void fd_batch_reset(struct fd_batch *batch) assert_dt;
263void fd_batch_flush(struct fd_batch *batch) assert_dt;
264void fd_batch_add_dep(struct fd_batch *batch, struct fd_batch *dep) assert_dt;
265void fd_batch_resource_write(struct fd_batch *batch,
266                             struct fd_resource *rsc) assert_dt;
267void fd_batch_resource_read_slowpath(struct fd_batch *batch,
268                                     struct fd_resource *rsc) assert_dt;
269void fd_batch_check_size(struct fd_batch *batch) assert_dt;
270
271uint32_t fd_batch_key_hash(const void *_key);
272bool fd_batch_key_equals(const void *_a, const void *_b);
273struct fd_batch_key *fd_batch_key_clone(void *mem_ctx,
274                                        const struct fd_batch_key *key);
275
276/* not called directly: */
277void __fd_batch_describe(char *buf, const struct fd_batch *batch) assert_dt;
278void __fd_batch_destroy(struct fd_batch *batch);
279
280/*
281 * NOTE the rule is, you need to hold the screen->lock when destroying
282 * a batch..  so either use fd_batch_reference() (which grabs the lock
283 * for you) if you don't hold the lock, or fd_batch_reference_locked()
284 * if you do hold the lock.
285 *
286 * WARNING the _locked() version can briefly drop the lock.  Without
287 * recursive mutexes, I'm not sure there is much else we can do (since
288 * __fd_batch_destroy() needs to unref resources)
289 *
290 * WARNING you must acquire the screen->lock and use the _locked()
291 * version in case that the batch being ref'd can disappear under
292 * you.
293 */
294
295static inline void
296fd_batch_reference_locked(struct fd_batch **ptr, struct fd_batch *batch)
297{
298   struct fd_batch *old_batch = *ptr;
299
300   /* only need lock if a reference is dropped: */
301   if (old_batch)
302      fd_screen_assert_locked(old_batch->ctx->screen);
303
304   if (pipe_reference_described(
305          &(*ptr)->reference, &batch->reference,
306          (debug_reference_descriptor)__fd_batch_describe))
307      __fd_batch_destroy(old_batch);
308
309   *ptr = batch;
310}
311
312static inline void
313fd_batch_reference(struct fd_batch **ptr, struct fd_batch *batch)
314{
315   struct fd_batch *old_batch = *ptr;
316   struct fd_context *ctx = old_batch ? old_batch->ctx : NULL;
317
318   if (ctx)
319      fd_screen_lock(ctx->screen);
320
321   fd_batch_reference_locked(ptr, batch);
322
323   if (ctx)
324      fd_screen_unlock(ctx->screen);
325}
326
327static inline void
328fd_batch_unlock_submit(struct fd_batch *batch)
329{
330   simple_mtx_unlock(&batch->submit_lock);
331}
332
333/**
334 * Returns true if emit-lock was acquired, false if failed to acquire lock,
335 * ie. batch already flushed.
336 */
337static inline bool MUST_CHECK
338fd_batch_lock_submit(struct fd_batch *batch)
339{
340   simple_mtx_lock(&batch->submit_lock);
341   bool ret = !batch->flushed;
342   if (!ret)
343      fd_batch_unlock_submit(batch);
344   return ret;
345}
346
347/**
348 * Mark the batch as having something worth flushing (rendering, blit, query,
349 * etc)
350 */
351static inline void
352fd_batch_needs_flush(struct fd_batch *batch)
353{
354   batch->needs_flush = true;
355   fd_fence_ref(&batch->ctx->last_fence, NULL);
356}
357
358/* Since we reorder batches and can pause/resume queries (notably for disabling
359 * queries dueing some meta operations), we update the current query state for
360 * the batch before each draw.
361 */
362static inline void
363fd_batch_update_queries(struct fd_batch *batch) assert_dt
364{
365   struct fd_context *ctx = batch->ctx;
366
367   if (ctx->query_update_batch)
368      ctx->query_update_batch(batch, false);
369}
370
371static inline void
372fd_batch_finish_queries(struct fd_batch *batch) assert_dt
373{
374   struct fd_context *ctx = batch->ctx;
375
376   if (ctx->query_update_batch)
377      ctx->query_update_batch(batch, true);
378}
379
380static inline void
381fd_reset_wfi(struct fd_batch *batch)
382{
383   batch->needs_wfi = true;
384}
385
386void fd_wfi(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt;
387
388/* emit a CP_EVENT_WRITE:
389 */
390static inline void
391fd_event_write(struct fd_batch *batch, struct fd_ringbuffer *ring,
392               enum vgt_event_type evt)
393{
394   OUT_PKT3(ring, CP_EVENT_WRITE, 1);
395   OUT_RING(ring, evt);
396   fd_reset_wfi(batch);
397}
398
399/* Get per-tile epilogue */
400static inline struct fd_ringbuffer *
401fd_batch_get_epilogue(struct fd_batch *batch)
402{
403   if (batch->epilogue == NULL) {
404      batch->epilogue = fd_submit_new_ringbuffer(batch->submit, 0x1000,
405                                                 FD_RINGBUFFER_GROWABLE);
406   }
407
408   return batch->epilogue;
409}
410
411struct fd_ringbuffer *fd_batch_get_prologue(struct fd_batch *batch);
412
413#ifdef __cplusplus
414}
415#endif
416
417#endif /* FREEDRENO_BATCH_H_ */
418