1/*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#ifndef CROCUS_BATCH_DOT_H
25#define CROCUS_BATCH_DOT_H
26
27#include <stdbool.h>
28#include <stdint.h>
29#include <string.h>
30
31#include "util/u_dynarray.h"
32
33#include "common/intel_decoder.h"
34#include "drm-uapi/i915_drm.h"
35
36#include "crocus_fence.h"
37#include "crocus_fine_fence.h"
38
39#include "crocus_bufmgr.h"
40/* The kernel assumes batchbuffers are smaller than 256kB. */
41#define MAX_BATCH_SIZE (256 * 1024)
42
43/* 3DSTATE_BINDING_TABLE_POINTERS has a U16 offset from Surface State Base
44 * Address, which means that we can't put binding tables beyond 64kB.  This
45 * effectively limits the maximum statebuffer size to 64kB.
46 */
47#define MAX_STATE_SIZE (64 * 1024)
48
49/* Our target batch size - flush approximately at this point. */
50#define BATCH_SZ (20 * 1024)
51#define STATE_SZ (16 * 1024)
52
53enum crocus_batch_name {
54   CROCUS_BATCH_RENDER,
55   CROCUS_BATCH_COMPUTE,
56};
57
58#define CROCUS_BATCH_COUNT 2
59
60struct crocus_address {
61   struct crocus_bo *bo;
62   int32_t offset;
63   uint32_t reloc_flags;
64};
65
66struct crocus_reloc_list {
67   struct drm_i915_gem_relocation_entry *relocs;
68   int reloc_count;
69   int reloc_array_size;
70};
71
72struct crocus_growing_bo {
73   struct crocus_bo *bo;
74   void *map;
75   void *map_next;
76   struct crocus_bo *partial_bo;
77   void *partial_bo_map;
78   unsigned partial_bytes;
79   struct crocus_reloc_list relocs;
80   unsigned used;
81};
82
83struct crocus_batch {
84   struct crocus_context *ice;
85   struct crocus_screen *screen;
86   struct util_debug_callback *dbg;
87   struct pipe_device_reset_callback *reset;
88
89   /** What batch is this? (e.g. CROCUS_BATCH_RENDER/COMPUTE) */
90   enum crocus_batch_name name;
91
92   /** buffers: command, state */
93   struct crocus_growing_bo command, state;
94
95   /** Size of the primary batch if we've moved on to a secondary. */
96   unsigned primary_batch_size;
97
98   bool state_base_address_emitted;
99   uint8_t pipe_controls_since_last_cs_stall;
100
101   uint32_t hw_ctx_id;
102
103   uint32_t valid_reloc_flags;
104
105   bool use_shadow_copy;
106   bool no_wrap;
107
108   /** The validation list */
109   struct drm_i915_gem_exec_object2 *validation_list;
110   struct crocus_bo **exec_bos;
111   int exec_count;
112   int exec_array_size;
113
114   /** Whether INTEL_BLACKHOLE_RENDER is enabled in the batch (aka first
115    * instruction is a MI_BATCH_BUFFER_END).
116    */
117   bool noop_enabled;
118
119   /**
120    * A list of crocus_syncobjs associated with this batch.
121    *
122    * The first list entry will always be a signalling sync-point, indicating
123    * that this batch has completed.  The others are likely to be sync-points
124    * to wait on before executing the batch.
125    */
126   struct util_dynarray syncobjs;
127
128   /** A list of drm_i915_exec_fences to have execbuf signal or wait on */
129   struct util_dynarray exec_fences;
130
131   /** The amount of aperture space (in bytes) used by all exec_bos */
132   int aperture_space;
133
134   struct {
135      /** Uploader to use for sequence numbers */
136      struct u_upload_mgr *uploader;
137
138      /** GPU buffer and CPU map where our seqno's will be written. */
139      struct crocus_state_ref ref;
140      uint32_t *map;
141
142      /** The sequence number to write the next time we add a fence. */
143      uint32_t next;
144   } fine_fences;
145
146   /** A seqno (and syncobj) for the last batch that was submitted. */
147   struct crocus_fine_fence *last_fence;
148
149   /** List of other batches which we might need to flush to use a BO */
150   struct crocus_batch *other_batches[CROCUS_BATCH_COUNT - 1];
151
152   struct {
153      /**
154       * Set of struct brw_bo * that have been rendered to within this
155       * batchbuffer and would need flushing before being used from another
156       * cache domain that isn't coherent with it (i.e. the sampler).
157       */
158      struct hash_table *render;
159
160      /**
161       * Set of struct brw_bo * that have been used as a depth buffer within
162       * this batchbuffer and would need flushing before being used from
163       * another cache domain that isn't coherent with it (i.e. the sampler).
164       */
165      struct set *depth;
166   } cache;
167
168   struct intel_batch_decode_ctx decoder;
169   struct hash_table_u64 *state_sizes;
170
171   /** Have we emitted any draw calls to this batch? */
172   bool contains_draw;
173
174   /** Batch contains fence signal operation. */
175   bool contains_fence_signal;
176};
177
178static inline bool
179batch_has_fine_fence(struct crocus_batch *batch)
180{
181   return !!batch->fine_fences.uploader;
182}
183
184#define BATCH_HAS_FINE_FENCES(batch) (!!(batch)->fine_fences.uploader)
185void crocus_init_batch(struct crocus_context *ctx,
186                       enum crocus_batch_name name,
187                       int priority);
188void crocus_batch_free(struct crocus_batch *batch);
189void crocus_batch_maybe_flush(struct crocus_batch *batch, unsigned estimate);
190
191void _crocus_batch_flush(struct crocus_batch *batch, const char *file, int line);
192#define crocus_batch_flush(batch) _crocus_batch_flush((batch), __FILE__, __LINE__)
193
194bool crocus_batch_references(struct crocus_batch *batch, struct crocus_bo *bo);
195
196bool crocus_batch_prepare_noop(struct crocus_batch *batch, bool noop_enable);
197
198#define RELOC_WRITE EXEC_OBJECT_WRITE
199#define RELOC_NEEDS_GGTT EXEC_OBJECT_NEEDS_GTT
200/* Inverted meaning, but using the same bit...emit_reloc will flip it. */
201#define RELOC_32BIT EXEC_OBJECT_SUPPORTS_48B_ADDRESS
202
203void crocus_use_pinned_bo(struct crocus_batch *batch, struct crocus_bo *bo,
204                          bool writable);
205uint64_t crocus_command_reloc(struct crocus_batch *batch, uint32_t batch_offset,
206                              struct crocus_bo *target, uint32_t target_offset,
207                              unsigned int reloc_flags);
208uint64_t crocus_state_reloc(struct crocus_batch *batch, uint32_t batch_offset,
209                            struct crocus_bo *target, uint32_t target_offset,
210                            unsigned int reloc_flags);
211
212enum pipe_reset_status crocus_batch_check_for_reset(struct crocus_batch *batch);
213
214void crocus_grow_buffer(struct crocus_batch *batch, bool grow_state,
215                        unsigned used, unsigned new_size);
216
217static inline unsigned
218crocus_batch_bytes_used(struct crocus_batch *batch)
219{
220   return batch->command.map_next - batch->command.map;
221}
222
223/**
224 * Ensure the current command buffer has \param size bytes of space
225 * remaining.  If not, this creates a secondary batch buffer and emits
226 * a jump from the primary batch to the start of the secondary.
227 *
228 * Most callers want crocus_get_command_space() instead.
229 */
230static inline void
231crocus_require_command_space(struct crocus_batch *batch, unsigned size)
232{
233   const unsigned required_bytes = crocus_batch_bytes_used(batch) + size;
234   unsigned used = crocus_batch_bytes_used(batch);
235   if (required_bytes >= BATCH_SZ && !batch->no_wrap) {
236      crocus_batch_flush(batch);
237   } else if (used + size >= batch->command.bo->size) {
238      const unsigned new_size =
239         MIN2(batch->command.bo->size + batch->command.bo->size / 2,
240              MAX_BATCH_SIZE);
241
242      crocus_grow_buffer(batch, false, used, new_size);
243      batch->command.map_next = (void *)batch->command.map + used;
244      assert(crocus_batch_bytes_used(batch) + size < batch->command.bo->size);
245   }
246}
247
248/**
249 * Allocate space in the current command buffer, and return a pointer
250 * to the mapped area so the caller can write commands there.
251 *
252 * This should be called whenever emitting commands.
253 */
254static inline void *
255crocus_get_command_space(struct crocus_batch *batch, unsigned bytes)
256{
257   crocus_require_command_space(batch, bytes);
258   void *map = batch->command.map_next;
259   batch->command.map_next += bytes;
260   return map;
261}
262
263/**
264 * Helper to emit GPU commands - allocates space, copies them there.
265 */
266static inline void
267crocus_batch_emit(struct crocus_batch *batch, const void *data, unsigned size)
268{
269   void *map = crocus_get_command_space(batch, size);
270   memcpy(map, data, size);
271}
272
273/**
274 * Get a pointer to the batch's signalling syncobj.  Does not refcount.
275 */
276static inline struct crocus_syncobj *
277crocus_batch_get_signal_syncobj(struct crocus_batch *batch)
278{
279   /* The signalling syncobj is the first one in the list. */
280   struct crocus_syncobj *syncobj =
281      ((struct crocus_syncobj **)util_dynarray_begin(&batch->syncobjs))[0];
282   return syncobj;
283}
284
285/**
286 * Take a reference to the batch's signalling syncobj.
287 *
288 * Callers can use this to wait for the the current batch under construction
289 * to complete (after flushing it).
290 */
291static inline void
292crocus_batch_reference_signal_syncobj(struct crocus_batch *batch,
293                                      struct crocus_syncobj **out_syncobj)
294{
295   struct crocus_syncobj *syncobj = crocus_batch_get_signal_syncobj(batch);
296   crocus_syncobj_reference(batch->screen, out_syncobj, syncobj);
297}
298
299/**
300 * Record the size of a piece of state for use in INTEL_DEBUG=bat printing.
301 */
302static inline void
303crocus_record_state_size(struct hash_table_u64 *ht, uint32_t offset_from_base,
304                         uint32_t size)
305{
306   if (ht) {
307      _mesa_hash_table_u64_insert(ht, offset_from_base,
308                                  (void *)(uintptr_t)size);
309   }
310}
311
312static inline bool
313crocus_ptr_in_state_buffer(struct crocus_batch *batch, void *p)
314{
315   return (char *)p >= (char *)batch->state.map &&
316          (char *)p < (char *)batch->state.map + batch->state.bo->size;
317}
318
319static inline void
320crocus_require_statebuffer_space(struct crocus_batch *batch, int size)
321{
322   if (batch->state.used + size >= STATE_SZ)
323      crocus_batch_flush(batch);
324}
325#endif
326