1/*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#ifndef IRIS_BATCH_DOT_H
25#define IRIS_BATCH_DOT_H
26
27#include <stdint.h>
28#include <stdbool.h>
29#include <string.h>
30
31#include "util/u_dynarray.h"
32#include "util/perf/u_trace.h"
33
34#include "drm-uapi/i915_drm.h"
35#include "common/intel_decoder.h"
36#include "ds/intel_driver_ds.h"
37#include "ds/intel_tracepoints.h"
38
39#include "iris_fence.h"
40#include "iris_fine_fence.h"
41
42struct iris_context;
43
44/* The kernel assumes batchbuffers are smaller than 256kB. */
45#define MAX_BATCH_SIZE (256 * 1024)
46
47/* Terminating the batch takes either 4 bytes for MI_BATCH_BUFFER_END or 12
48 * bytes for MI_BATCH_BUFFER_START (when chaining).  Plus another 24 bytes for
49 * the seqno write (using PIPE_CONTROL), and another 24 bytes for the ISP
50 * invalidation pipe control.
51 */
52#define BATCH_RESERVED 60
53
54/* Our target batch size - flush approximately at this point. */
55#define BATCH_SZ (64 * 1024 - BATCH_RESERVED)
56
57enum iris_batch_name {
58   IRIS_BATCH_RENDER,
59   IRIS_BATCH_COMPUTE,
60   IRIS_BATCH_BLITTER,
61};
62
63struct iris_batch {
64   struct iris_context *ice;
65   struct iris_screen *screen;
66   struct util_debug_callback *dbg;
67   struct pipe_device_reset_callback *reset;
68
69   /** What batch is this? (e.g. IRIS_BATCH_RENDER/COMPUTE) */
70   enum iris_batch_name name;
71
72   /** Current batchbuffer being queued up. */
73   struct iris_bo *bo;
74   void *map;
75   void *map_next;
76
77   /** Size of the primary batch being submitted to execbuf (in bytes). */
78   unsigned primary_batch_size;
79
80   /** Total size of all chained batches (in bytes). */
81   unsigned total_chained_batch_size;
82
83   /** Last binder address set in this hardware context. */
84   uint64_t last_binder_address;
85
86   uint32_t ctx_id;
87   uint32_t exec_flags;
88   bool has_engines_context;
89
90   /** A list of all BOs referenced by this batch */
91   struct iris_bo **exec_bos;
92   int exec_count;
93   int exec_array_size;
94   /** Bitset of whether this batch writes to BO `i'. */
95   BITSET_WORD *bos_written;
96   uint32_t max_gem_handle;
97
98   /** Whether INTEL_BLACKHOLE_RENDER is enabled in the batch (aka first
99    * instruction is a MI_BATCH_BUFFER_END).
100    */
101   bool noop_enabled;
102
103   /** Whether the first utrace point has been recorded.
104    */
105   bool begin_trace_recorded;
106
107   /**
108    * A list of iris_syncobjs associated with this batch.
109    *
110    * The first list entry will always be a signalling sync-point, indicating
111    * that this batch has completed.  The others are likely to be sync-points
112    * to wait on before executing the batch.
113    */
114   struct util_dynarray syncobjs;
115
116   /** A list of drm_i915_exec_fences to have execbuf signal or wait on */
117   struct util_dynarray exec_fences;
118
119   /** The amount of aperture space (in bytes) used by all exec_bos */
120   int aperture_space;
121
122   struct {
123      /** Uploader to use for sequence numbers */
124      struct u_upload_mgr *uploader;
125
126      /** GPU buffer and CPU map where our seqno's will be written. */
127      struct iris_state_ref ref;
128      uint32_t *map;
129
130      /** The sequence number to write the next time we add a fence. */
131      uint32_t next;
132   } fine_fences;
133
134   /** A seqno (and syncobj) for the last batch that was submitted. */
135   struct iris_fine_fence *last_fence;
136
137   /** List of other batches which we might need to flush to use a BO */
138   struct iris_batch *other_batches[IRIS_BATCH_COUNT - 1];
139   unsigned num_other_batches;
140
141   struct {
142      /**
143       * Set of struct brw_bo * that have been rendered to within this
144       * batchbuffer and would need flushing before being used from another
145       * cache domain that isn't coherent with it (i.e. the sampler).
146       */
147      struct hash_table *render;
148   } cache;
149
150   struct intel_batch_decode_ctx decoder;
151   struct hash_table_u64 *state_sizes;
152
153   /**
154    * Matrix representation of the cache coherency status of the GPU at the
155    * current end point of the batch.  For every i and j,
156    * coherent_seqnos[i][j] denotes the seqno of the most recent flush of
157    * cache domain j visible to cache domain i (which obviously implies that
158    * coherent_seqnos[i][i] is the most recent flush of cache domain i).  This
159    * can be used to efficiently determine whether synchronization is
160    * necessary before accessing data from cache domain i if it was previously
161    * accessed from another cache domain j.
162    */
163   uint64_t coherent_seqnos[NUM_IRIS_DOMAINS][NUM_IRIS_DOMAINS];
164
165   /**
166    * A vector representing the cache coherency status of the L3.  For each
167    * cache domain i, l3_coherent_seqnos[i] denotes the seqno of the most
168    * recent flush of that domain which is visible to L3 clients.
169    */
170   uint64_t l3_coherent_seqnos[NUM_IRIS_DOMAINS];
171
172   /**
173    * Sequence number used to track the completion of any subsequent memory
174    * operations in the batch until the next sync boundary.
175    */
176   uint64_t next_seqno;
177
178   /** Have we emitted any draw calls to this batch? */
179   bool contains_draw;
180
181   /** Have we emitted any draw calls with next_seqno? */
182   bool contains_draw_with_next_seqno;
183
184   /** Batch contains fence signal operation. */
185   bool contains_fence_signal;
186
187   /**
188    * Number of times iris_batch_sync_region_start() has been called without a
189    * matching iris_batch_sync_region_end() on this batch.
190    */
191   uint32_t sync_region_depth;
192
193   uint32_t last_aux_map_state;
194   struct iris_measure_batch *measure;
195
196   /** Where tracepoints are recorded */
197   struct u_trace trace;
198
199   /** Batch wrapper structure for perfetto */
200   struct intel_ds_queue *ds;
201};
202
203void iris_init_batches(struct iris_context *ice, int priority);
204void iris_chain_to_new_batch(struct iris_batch *batch);
205void iris_destroy_batches(struct iris_context *ice);
206void iris_batch_maybe_flush(struct iris_batch *batch, unsigned estimate);
207
208void _iris_batch_flush(struct iris_batch *batch, const char *file, int line);
209#define iris_batch_flush(batch) _iris_batch_flush((batch), __FILE__, __LINE__)
210
211bool iris_batch_references(struct iris_batch *batch, struct iris_bo *bo);
212
213bool iris_batch_prepare_noop(struct iris_batch *batch, bool noop_enable);
214
215#define RELOC_WRITE EXEC_OBJECT_WRITE
216
217void iris_use_pinned_bo(struct iris_batch *batch, struct iris_bo *bo,
218                        bool writable, enum iris_domain access);
219
220enum pipe_reset_status iris_batch_check_for_reset(struct iris_batch *batch);
221
222static inline unsigned
223iris_batch_bytes_used(struct iris_batch *batch)
224{
225   return batch->map_next - batch->map;
226}
227
228/**
229 * Ensure the current command buffer has \param size bytes of space
230 * remaining.  If not, this creates a secondary batch buffer and emits
231 * a jump from the primary batch to the start of the secondary.
232 *
233 * Most callers want iris_get_command_space() instead.
234 */
235static inline void
236iris_require_command_space(struct iris_batch *batch, unsigned size)
237{
238   const unsigned required_bytes = iris_batch_bytes_used(batch) + size;
239
240   if (required_bytes >= BATCH_SZ) {
241      iris_chain_to_new_batch(batch);
242   }
243}
244
245/**
246 * Allocate space in the current command buffer, and return a pointer
247 * to the mapped area so the caller can write commands there.
248 *
249 * This should be called whenever emitting commands.
250 */
251static inline void *
252iris_get_command_space(struct iris_batch *batch, unsigned bytes)
253{
254   if (!batch->begin_trace_recorded) {
255      batch->begin_trace_recorded = true;
256      trace_intel_begin_batch(&batch->trace);
257   }
258   iris_require_command_space(batch, bytes);
259   void *map = batch->map_next;
260   batch->map_next += bytes;
261   return map;
262}
263
264/**
265 * Helper to emit GPU commands - allocates space, copies them there.
266 */
267static inline void
268iris_batch_emit(struct iris_batch *batch, const void *data, unsigned size)
269{
270   void *map = iris_get_command_space(batch, size);
271   memcpy(map, data, size);
272}
273
274/**
275 * Get a pointer to the batch's signalling syncobj.  Does not refcount.
276 */
277static inline struct iris_syncobj *
278iris_batch_get_signal_syncobj(struct iris_batch *batch)
279{
280   /* The signalling syncobj is the first one in the list. */
281   struct iris_syncobj *syncobj =
282      ((struct iris_syncobj **) util_dynarray_begin(&batch->syncobjs))[0];
283   return syncobj;
284}
285
286
287/**
288 * Take a reference to the batch's signalling syncobj.
289 *
290 * Callers can use this to wait for the the current batch under construction
291 * to complete (after flushing it).
292 */
293static inline void
294iris_batch_reference_signal_syncobj(struct iris_batch *batch,
295                                   struct iris_syncobj **out_syncobj)
296{
297   struct iris_syncobj *syncobj = iris_batch_get_signal_syncobj(batch);
298   iris_syncobj_reference(batch->screen->bufmgr, out_syncobj, syncobj);
299}
300
301/**
302 * Record the size of a piece of state for use in INTEL_DEBUG=bat printing.
303 */
304static inline void
305iris_record_state_size(struct hash_table_u64 *ht,
306                       uint32_t offset_from_base,
307                       uint32_t size)
308{
309   if (ht) {
310      _mesa_hash_table_u64_insert(ht, offset_from_base,
311                                  (void *)(uintptr_t) size);
312   }
313}
314
315/**
316 * Mark the start of a region in the batch with stable synchronization
317 * sequence number.  Any buffer object accessed by the batch buffer only needs
318 * to be marked once (e.g. via iris_bo_bump_seqno()) within a region delimited
319 * by iris_batch_sync_region_start() and iris_batch_sync_region_end().
320 */
321static inline void
322iris_batch_sync_region_start(struct iris_batch *batch)
323{
324   batch->sync_region_depth++;
325}
326
327/**
328 * Mark the end of a region in the batch with stable synchronization sequence
329 * number.  Should be called once after each call to
330 * iris_batch_sync_region_start().
331 */
332static inline void
333iris_batch_sync_region_end(struct iris_batch *batch)
334{
335   assert(batch->sync_region_depth);
336   batch->sync_region_depth--;
337}
338
339/**
340 * Start a new synchronization section at the current point of the batch,
341 * unless disallowed by a previous iris_batch_sync_region_start().
342 */
343static inline void
344iris_batch_sync_boundary(struct iris_batch *batch)
345{
346   if (!batch->sync_region_depth) {
347      batch->contains_draw_with_next_seqno = false;
348      batch->next_seqno = p_atomic_inc_return(&batch->screen->last_seqno);
349      assert(batch->next_seqno > 0);
350   }
351}
352
353/**
354 * Update the cache coherency status of the batch to reflect a flush of the
355 * specified caching domain.
356 */
357static inline void
358iris_batch_mark_flush_sync(struct iris_batch *batch,
359                           enum iris_domain access)
360{
361   const struct intel_device_info *devinfo = &batch->screen->devinfo;
362
363   if (iris_domain_is_l3_coherent(devinfo, access))
364      batch->l3_coherent_seqnos[access] = batch->next_seqno - 1;
365   else
366      batch->coherent_seqnos[access][access] = batch->next_seqno - 1;
367}
368
369/**
370 * Update the cache coherency status of the batch to reflect an invalidation
371 * of the specified caching domain.  All prior flushes of other caches will be
372 * considered visible to the specified caching domain.
373 */
374static inline void
375iris_batch_mark_invalidate_sync(struct iris_batch *batch,
376                                enum iris_domain access)
377{
378   const struct intel_device_info *devinfo = &batch->screen->devinfo;
379
380   for (unsigned i = 0; i < NUM_IRIS_DOMAINS; i++) {
381      if (i == access)
382         continue;
383
384      if (iris_domain_is_l3_coherent(devinfo, access)) {
385         if (iris_domain_is_read_only(access)) {
386            /* Invalidating a L3-coherent read-only domain "access" also
387             * triggers an invalidation of any matching L3 cachelines as well.
388             *
389             * If domain 'i' is L3-coherent, it sees the latest data in L3,
390             * otherwise it sees the latest globally-observable data.
391             */
392            batch->coherent_seqnos[access][i] =
393               iris_domain_is_l3_coherent(devinfo, i) ?
394               batch->l3_coherent_seqnos[i] : batch->coherent_seqnos[i][i];
395         } else {
396            /* Invalidating L3-coherent write domains does not trigger
397             * an invalidation of any matching L3 cachelines, however.
398             *
399             * It sees the latest data from domain i visible to L3 clients.
400             */
401            batch->coherent_seqnos[access][i] = batch->l3_coherent_seqnos[i];
402         }
403      } else {
404         /* "access" isn't L3-coherent, so invalidating it means it sees the
405          * most recent globally-observable data from domain i.
406          */
407         batch->coherent_seqnos[access][i] = batch->coherent_seqnos[i][i];
408      }
409   }
410}
411
412/**
413 * Update the cache coherency status of the batch to reflect a reset.  All
414 * previously accessed data can be considered visible to every caching domain
415 * thanks to the kernel's heavyweight flushing at batch buffer boundaries.
416 */
417static inline void
418iris_batch_mark_reset_sync(struct iris_batch *batch)
419{
420   for (unsigned i = 0; i < NUM_IRIS_DOMAINS; i++) {
421      batch->l3_coherent_seqnos[i] = batch->next_seqno - 1;
422      for (unsigned j = 0; j < NUM_IRIS_DOMAINS; j++)
423         batch->coherent_seqnos[i][j] = batch->next_seqno - 1;
424   }
425}
426
427const char *
428iris_batch_name_to_string(enum iris_batch_name name);
429
430#define iris_foreach_batch(ice, batch)                \
431   for (struct iris_batch *batch = &ice->batches[0];  \
432        batch <= &ice->batches[((struct iris_screen *)ice->ctx.screen)->devinfo.ver >= 12 ? IRIS_BATCH_BLITTER : IRIS_BATCH_COMPUTE]; \
433        ++batch)
434
435#endif
436