1/* 2 * Copyright © 2017 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#ifndef IRIS_BATCH_DOT_H 25#define IRIS_BATCH_DOT_H 26 27#include <stdint.h> 28#include <stdbool.h> 29#include <string.h> 30 31#include "util/u_dynarray.h" 32#include "util/perf/u_trace.h" 33 34#include "drm-uapi/i915_drm.h" 35#include "common/intel_decoder.h" 36#include "ds/intel_driver_ds.h" 37#include "ds/intel_tracepoints.h" 38 39#include "iris_fence.h" 40#include "iris_fine_fence.h" 41 42struct iris_context; 43 44/* The kernel assumes batchbuffers are smaller than 256kB. */ 45#define MAX_BATCH_SIZE (256 * 1024) 46 47/* Terminating the batch takes either 4 bytes for MI_BATCH_BUFFER_END or 12 48 * bytes for MI_BATCH_BUFFER_START (when chaining). Plus another 24 bytes for 49 * the seqno write (using PIPE_CONTROL), and another 24 bytes for the ISP 50 * invalidation pipe control. 51 */ 52#define BATCH_RESERVED 60 53 54/* Our target batch size - flush approximately at this point. */ 55#define BATCH_SZ (64 * 1024 - BATCH_RESERVED) 56 57enum iris_batch_name { 58 IRIS_BATCH_RENDER, 59 IRIS_BATCH_COMPUTE, 60 IRIS_BATCH_BLITTER, 61}; 62 63struct iris_batch { 64 struct iris_context *ice; 65 struct iris_screen *screen; 66 struct util_debug_callback *dbg; 67 struct pipe_device_reset_callback *reset; 68 69 /** What batch is this? (e.g. IRIS_BATCH_RENDER/COMPUTE) */ 70 enum iris_batch_name name; 71 72 /** Current batchbuffer being queued up. */ 73 struct iris_bo *bo; 74 void *map; 75 void *map_next; 76 77 /** Size of the primary batch being submitted to execbuf (in bytes). */ 78 unsigned primary_batch_size; 79 80 /** Total size of all chained batches (in bytes). */ 81 unsigned total_chained_batch_size; 82 83 /** Last binder address set in this hardware context. */ 84 uint64_t last_binder_address; 85 86 uint32_t ctx_id; 87 uint32_t exec_flags; 88 bool has_engines_context; 89 90 /** A list of all BOs referenced by this batch */ 91 struct iris_bo **exec_bos; 92 int exec_count; 93 int exec_array_size; 94 /** Bitset of whether this batch writes to BO `i'. */ 95 BITSET_WORD *bos_written; 96 uint32_t max_gem_handle; 97 98 /** Whether INTEL_BLACKHOLE_RENDER is enabled in the batch (aka first 99 * instruction is a MI_BATCH_BUFFER_END). 100 */ 101 bool noop_enabled; 102 103 /** Whether the first utrace point has been recorded. 104 */ 105 bool begin_trace_recorded; 106 107 /** 108 * A list of iris_syncobjs associated with this batch. 109 * 110 * The first list entry will always be a signalling sync-point, indicating 111 * that this batch has completed. The others are likely to be sync-points 112 * to wait on before executing the batch. 113 */ 114 struct util_dynarray syncobjs; 115 116 /** A list of drm_i915_exec_fences to have execbuf signal or wait on */ 117 struct util_dynarray exec_fences; 118 119 /** The amount of aperture space (in bytes) used by all exec_bos */ 120 int aperture_space; 121 122 struct { 123 /** Uploader to use for sequence numbers */ 124 struct u_upload_mgr *uploader; 125 126 /** GPU buffer and CPU map where our seqno's will be written. */ 127 struct iris_state_ref ref; 128 uint32_t *map; 129 130 /** The sequence number to write the next time we add a fence. */ 131 uint32_t next; 132 } fine_fences; 133 134 /** A seqno (and syncobj) for the last batch that was submitted. */ 135 struct iris_fine_fence *last_fence; 136 137 /** List of other batches which we might need to flush to use a BO */ 138 struct iris_batch *other_batches[IRIS_BATCH_COUNT - 1]; 139 unsigned num_other_batches; 140 141 struct { 142 /** 143 * Set of struct brw_bo * that have been rendered to within this 144 * batchbuffer and would need flushing before being used from another 145 * cache domain that isn't coherent with it (i.e. the sampler). 146 */ 147 struct hash_table *render; 148 } cache; 149 150 struct intel_batch_decode_ctx decoder; 151 struct hash_table_u64 *state_sizes; 152 153 /** 154 * Matrix representation of the cache coherency status of the GPU at the 155 * current end point of the batch. For every i and j, 156 * coherent_seqnos[i][j] denotes the seqno of the most recent flush of 157 * cache domain j visible to cache domain i (which obviously implies that 158 * coherent_seqnos[i][i] is the most recent flush of cache domain i). This 159 * can be used to efficiently determine whether synchronization is 160 * necessary before accessing data from cache domain i if it was previously 161 * accessed from another cache domain j. 162 */ 163 uint64_t coherent_seqnos[NUM_IRIS_DOMAINS][NUM_IRIS_DOMAINS]; 164 165 /** 166 * A vector representing the cache coherency status of the L3. For each 167 * cache domain i, l3_coherent_seqnos[i] denotes the seqno of the most 168 * recent flush of that domain which is visible to L3 clients. 169 */ 170 uint64_t l3_coherent_seqnos[NUM_IRIS_DOMAINS]; 171 172 /** 173 * Sequence number used to track the completion of any subsequent memory 174 * operations in the batch until the next sync boundary. 175 */ 176 uint64_t next_seqno; 177 178 /** Have we emitted any draw calls to this batch? */ 179 bool contains_draw; 180 181 /** Have we emitted any draw calls with next_seqno? */ 182 bool contains_draw_with_next_seqno; 183 184 /** Batch contains fence signal operation. */ 185 bool contains_fence_signal; 186 187 /** 188 * Number of times iris_batch_sync_region_start() has been called without a 189 * matching iris_batch_sync_region_end() on this batch. 190 */ 191 uint32_t sync_region_depth; 192 193 uint32_t last_aux_map_state; 194 struct iris_measure_batch *measure; 195 196 /** Where tracepoints are recorded */ 197 struct u_trace trace; 198 199 /** Batch wrapper structure for perfetto */ 200 struct intel_ds_queue *ds; 201}; 202 203void iris_init_batches(struct iris_context *ice, int priority); 204void iris_chain_to_new_batch(struct iris_batch *batch); 205void iris_destroy_batches(struct iris_context *ice); 206void iris_batch_maybe_flush(struct iris_batch *batch, unsigned estimate); 207 208void _iris_batch_flush(struct iris_batch *batch, const char *file, int line); 209#define iris_batch_flush(batch) _iris_batch_flush((batch), __FILE__, __LINE__) 210 211bool iris_batch_references(struct iris_batch *batch, struct iris_bo *bo); 212 213bool iris_batch_prepare_noop(struct iris_batch *batch, bool noop_enable); 214 215#define RELOC_WRITE EXEC_OBJECT_WRITE 216 217void iris_use_pinned_bo(struct iris_batch *batch, struct iris_bo *bo, 218 bool writable, enum iris_domain access); 219 220enum pipe_reset_status iris_batch_check_for_reset(struct iris_batch *batch); 221 222static inline unsigned 223iris_batch_bytes_used(struct iris_batch *batch) 224{ 225 return batch->map_next - batch->map; 226} 227 228/** 229 * Ensure the current command buffer has \param size bytes of space 230 * remaining. If not, this creates a secondary batch buffer and emits 231 * a jump from the primary batch to the start of the secondary. 232 * 233 * Most callers want iris_get_command_space() instead. 234 */ 235static inline void 236iris_require_command_space(struct iris_batch *batch, unsigned size) 237{ 238 const unsigned required_bytes = iris_batch_bytes_used(batch) + size; 239 240 if (required_bytes >= BATCH_SZ) { 241 iris_chain_to_new_batch(batch); 242 } 243} 244 245/** 246 * Allocate space in the current command buffer, and return a pointer 247 * to the mapped area so the caller can write commands there. 248 * 249 * This should be called whenever emitting commands. 250 */ 251static inline void * 252iris_get_command_space(struct iris_batch *batch, unsigned bytes) 253{ 254 if (!batch->begin_trace_recorded) { 255 batch->begin_trace_recorded = true; 256 trace_intel_begin_batch(&batch->trace); 257 } 258 iris_require_command_space(batch, bytes); 259 void *map = batch->map_next; 260 batch->map_next += bytes; 261 return map; 262} 263 264/** 265 * Helper to emit GPU commands - allocates space, copies them there. 266 */ 267static inline void 268iris_batch_emit(struct iris_batch *batch, const void *data, unsigned size) 269{ 270 void *map = iris_get_command_space(batch, size); 271 memcpy(map, data, size); 272} 273 274/** 275 * Get a pointer to the batch's signalling syncobj. Does not refcount. 276 */ 277static inline struct iris_syncobj * 278iris_batch_get_signal_syncobj(struct iris_batch *batch) 279{ 280 /* The signalling syncobj is the first one in the list. */ 281 struct iris_syncobj *syncobj = 282 ((struct iris_syncobj **) util_dynarray_begin(&batch->syncobjs))[0]; 283 return syncobj; 284} 285 286 287/** 288 * Take a reference to the batch's signalling syncobj. 289 * 290 * Callers can use this to wait for the the current batch under construction 291 * to complete (after flushing it). 292 */ 293static inline void 294iris_batch_reference_signal_syncobj(struct iris_batch *batch, 295 struct iris_syncobj **out_syncobj) 296{ 297 struct iris_syncobj *syncobj = iris_batch_get_signal_syncobj(batch); 298 iris_syncobj_reference(batch->screen->bufmgr, out_syncobj, syncobj); 299} 300 301/** 302 * Record the size of a piece of state for use in INTEL_DEBUG=bat printing. 303 */ 304static inline void 305iris_record_state_size(struct hash_table_u64 *ht, 306 uint32_t offset_from_base, 307 uint32_t size) 308{ 309 if (ht) { 310 _mesa_hash_table_u64_insert(ht, offset_from_base, 311 (void *)(uintptr_t) size); 312 } 313} 314 315/** 316 * Mark the start of a region in the batch with stable synchronization 317 * sequence number. Any buffer object accessed by the batch buffer only needs 318 * to be marked once (e.g. via iris_bo_bump_seqno()) within a region delimited 319 * by iris_batch_sync_region_start() and iris_batch_sync_region_end(). 320 */ 321static inline void 322iris_batch_sync_region_start(struct iris_batch *batch) 323{ 324 batch->sync_region_depth++; 325} 326 327/** 328 * Mark the end of a region in the batch with stable synchronization sequence 329 * number. Should be called once after each call to 330 * iris_batch_sync_region_start(). 331 */ 332static inline void 333iris_batch_sync_region_end(struct iris_batch *batch) 334{ 335 assert(batch->sync_region_depth); 336 batch->sync_region_depth--; 337} 338 339/** 340 * Start a new synchronization section at the current point of the batch, 341 * unless disallowed by a previous iris_batch_sync_region_start(). 342 */ 343static inline void 344iris_batch_sync_boundary(struct iris_batch *batch) 345{ 346 if (!batch->sync_region_depth) { 347 batch->contains_draw_with_next_seqno = false; 348 batch->next_seqno = p_atomic_inc_return(&batch->screen->last_seqno); 349 assert(batch->next_seqno > 0); 350 } 351} 352 353/** 354 * Update the cache coherency status of the batch to reflect a flush of the 355 * specified caching domain. 356 */ 357static inline void 358iris_batch_mark_flush_sync(struct iris_batch *batch, 359 enum iris_domain access) 360{ 361 const struct intel_device_info *devinfo = &batch->screen->devinfo; 362 363 if (iris_domain_is_l3_coherent(devinfo, access)) 364 batch->l3_coherent_seqnos[access] = batch->next_seqno - 1; 365 else 366 batch->coherent_seqnos[access][access] = batch->next_seqno - 1; 367} 368 369/** 370 * Update the cache coherency status of the batch to reflect an invalidation 371 * of the specified caching domain. All prior flushes of other caches will be 372 * considered visible to the specified caching domain. 373 */ 374static inline void 375iris_batch_mark_invalidate_sync(struct iris_batch *batch, 376 enum iris_domain access) 377{ 378 const struct intel_device_info *devinfo = &batch->screen->devinfo; 379 380 for (unsigned i = 0; i < NUM_IRIS_DOMAINS; i++) { 381 if (i == access) 382 continue; 383 384 if (iris_domain_is_l3_coherent(devinfo, access)) { 385 if (iris_domain_is_read_only(access)) { 386 /* Invalidating a L3-coherent read-only domain "access" also 387 * triggers an invalidation of any matching L3 cachelines as well. 388 * 389 * If domain 'i' is L3-coherent, it sees the latest data in L3, 390 * otherwise it sees the latest globally-observable data. 391 */ 392 batch->coherent_seqnos[access][i] = 393 iris_domain_is_l3_coherent(devinfo, i) ? 394 batch->l3_coherent_seqnos[i] : batch->coherent_seqnos[i][i]; 395 } else { 396 /* Invalidating L3-coherent write domains does not trigger 397 * an invalidation of any matching L3 cachelines, however. 398 * 399 * It sees the latest data from domain i visible to L3 clients. 400 */ 401 batch->coherent_seqnos[access][i] = batch->l3_coherent_seqnos[i]; 402 } 403 } else { 404 /* "access" isn't L3-coherent, so invalidating it means it sees the 405 * most recent globally-observable data from domain i. 406 */ 407 batch->coherent_seqnos[access][i] = batch->coherent_seqnos[i][i]; 408 } 409 } 410} 411 412/** 413 * Update the cache coherency status of the batch to reflect a reset. All 414 * previously accessed data can be considered visible to every caching domain 415 * thanks to the kernel's heavyweight flushing at batch buffer boundaries. 416 */ 417static inline void 418iris_batch_mark_reset_sync(struct iris_batch *batch) 419{ 420 for (unsigned i = 0; i < NUM_IRIS_DOMAINS; i++) { 421 batch->l3_coherent_seqnos[i] = batch->next_seqno - 1; 422 for (unsigned j = 0; j < NUM_IRIS_DOMAINS; j++) 423 batch->coherent_seqnos[i][j] = batch->next_seqno - 1; 424 } 425} 426 427const char * 428iris_batch_name_to_string(enum iris_batch_name name); 429 430#define iris_foreach_batch(ice, batch) \ 431 for (struct iris_batch *batch = &ice->batches[0]; \ 432 batch <= &ice->batches[((struct iris_screen *)ice->ctx.screen)->devinfo.ver >= 12 ? IRIS_BATCH_BLITTER : IRIS_BATCH_COMPUTE]; \ 433 ++batch) 434 435#endif 436