Lines Matching refs:batch

29  * collect into a "batch buffer".  Typically, many draw calls are grouped
30 * into a single batch to amortize command submission overhead.
36 * virtual memory address before executing our batch. If a BO is not in
65 /* Terminating the batch takes either 4 bytes for MI_BATCH_BUFFER_END
71 static void crocus_batch_reset(struct crocus_batch *batch);
74 num_fences(struct crocus_batch *batch)
76 return util_dynarray_num_elements(&batch->exec_fences,
84 dump_fence_list(struct crocus_batch *batch)
86 fprintf(stderr, "Fence list (length %u): ", num_fences(batch));
88 util_dynarray_foreach(&batch->exec_fences,
103 dump_validation_list(struct crocus_batch *batch)
105 fprintf(stderr, "Validation list (length %d):\n", batch->exec_count);
107 for (int i = 0; i < batch->exec_count; i++) {
108 uint64_t flags = batch->validation_list[i].flags;
109 assert(batch->validation_list[i].handle ==
110 batch->exec_bos[i]->gem_handle);
113 batch->validation_list[i].handle, batch->exec_bos[i]->name,
114 (uint64_t)batch->validation_list[i].offset, batch->exec_bos[i]->size,
115 batch->exec_bos[i]->refcount,
121 * Return BO information to the batch decoder (for debugging).
126 struct crocus_batch *batch = v_batch;
128 for (int i = 0; i < batch->exec_count; i++) {
129 struct crocus_bo *bo = batch->exec_bos[i];
137 .map = crocus_bo_map(batch->dbg, bo, MAP_READ) +
150 struct crocus_batch *batch = v_batch;
161 _mesa_hash_table_u64_search(batch->state_sizes, address - base_address);
167 * Decode the current batch.
170 decode_batch(struct crocus_batch *batch)
172 void *map = crocus_bo_map(batch->dbg, batch->exec_bos[0], MAP_READ);
173 intel_print_batch(&batch->decoder, map, batch->primary_batch_size,
174 batch->exec_bos[0]->gtt_offset, false);
191 struct crocus_batch *batch = &ice->batches[name];
195 batch->ice = ice;
196 batch->screen = screen;
197 batch->dbg = &ice->dbg;
198 batch->reset = &ice->reset;
199 batch->name = name;
200 batch->contains_fence_signal = false;
203 batch->fine_fences.uploader =
207 crocus_fine_fence_init(batch);
209 batch->hw_ctx_id = crocus_create_hw_context(screen->bufmgr);
210 assert(batch->hw_ctx_id);
212 crocus_hw_context_set_priority(screen->bufmgr, batch->hw_ctx_id, priority);
214 batch->valid_reloc_flags = EXEC_OBJECT_WRITE;
216 batch->valid_reloc_flags |= EXEC_OBJECT_NEEDS_GTT;
220 batch->use_shadow_copy = false;
222 batch->use_shadow_copy = !devinfo->has_llc;
224 util_dynarray_init(&batch->exec_fences, ralloc_context(NULL));
225 util_dynarray_init(&batch->syncobjs, ralloc_context(NULL));
227 init_reloc_list(&batch->command.relocs, 250);
228 init_reloc_list(&batch->state.relocs, 250);
230 batch->exec_count = 0;
231 batch->exec_array_size = 100;
232 batch->exec_bos =
233 malloc(batch->exec_array_size * sizeof(batch->exec_bos[0]));
234 batch->validation_list =
235 malloc(batch->exec_array_size * sizeof(batch->validation_list[0]));
237 batch->cache.render = _mesa_hash_table_create(NULL, NULL,
239 batch->cache.depth = _mesa_set_create(NULL, NULL,
242 memset(batch->other_batches, 0, sizeof(batch->other_batches));
246 batch->other_batches[j++] = &ice->batches[i];
251 batch->state_sizes = _mesa_hash_table_u64_create(NULL);
257 intel_batch_decode_ctx_init(&batch->decoder, &screen->compiler->isa,
260 decode_get_state_size, batch);
261 batch->decoder.max_vbo_decoded_lines = 32;
264 crocus_batch_reset(batch);
268 find_exec_index(struct crocus_batch *batch, struct crocus_bo *bo)
272 if (index < batch->exec_count && batch->exec_bos[index] == bo)
276 for (index = 0; index < batch->exec_count; index++) {
277 if (batch->exec_bos[index] == bo)
284 find_validation_entry(struct crocus_batch *batch, struct crocus_bo *bo)
286 int index = find_exec_index(batch, bo);
290 return &batch->validation_list[index];
294 ensure_exec_obj_space(struct crocus_batch *batch, uint32_t count)
296 while (batch->exec_count + count > batch->exec_array_size) {
297 batch->exec_array_size *= 2;
298 batch->exec_bos = realloc(
299 batch->exec_bos, batch->exec_array_size * sizeof(batch->exec_bos[0]));
300 batch->validation_list =
301 realloc(batch->validation_list,
302 batch->exec_array_size * sizeof(batch->validation_list[0]));
307 crocus_use_bo(struct crocus_batch *batch, struct crocus_bo *bo, bool writable)
309 assert(bo->bufmgr == batch->command.bo->bufmgr);
312 find_validation_entry(batch, bo);
321 if (bo != batch->command.bo && bo != batch->state.bo) {
322 /* This is the first time our batch has seen this BO. Before we use it,
325 for (int b = 0; b < ARRAY_SIZE(batch->other_batches); b++) {
327 if (!batch->other_batches[b])
330 find_validation_entry(batch->other_batches[b], bo);
332 /* If the buffer is referenced by another batch, and either batch
333 * intends to write it, then flush the other batch and synchronize.
348 crocus_batch_flush(batch->other_batches[b]);
349 crocus_batch_add_syncobj(batch,
350 batch->other_batches[b]->last_fence->syncobj,
356 /* Bump the ref count since the batch is now using this bo. */
359 ensure_exec_obj_space(batch, 1);
361 batch->validation_list[batch->exec_count] =
368 bo->index = batch->exec_count;
369 batch->exec_bos[batch->exec_count] = bo;
370 batch->aperture_space += bo->size;
372 batch->exec_count++;
374 return &batch->validation_list[batch->exec_count - 1];
378 emit_reloc(struct crocus_batch *batch,
385 if (target == batch->ice->workaround_bo)
391 crocus_use_bo(batch, target, writable);
403 * Altering the validation list flags restricts it for this batch,
416 entry->flags |= reloc_flags & batch->valid_reloc_flags;
422 .target_handle = find_exec_index(batch, target),
434 crocus_command_reloc(struct crocus_batch *batch, uint32_t batch_offset,
438 assert(batch_offset <= batch->command.bo->size - sizeof(uint32_t));
440 return emit_reloc(batch, &batch->command.relocs, batch_offset,
445 crocus_state_reloc(struct crocus_batch *batch, uint32_t state_offset,
449 assert(state_offset <= batch->state.bo->size - sizeof(uint32_t));
451 return emit_reloc(batch, &batch->state.relocs, state_offset,
456 recreate_growing_buffer(struct crocus_batch *batch,
460 struct crocus_screen *screen = batch->screen;
467 if (batch->use_shadow_copy)
475 create_batch(struct crocus_batch *batch)
477 struct crocus_screen *screen = batch->screen;
479 recreate_growing_buffer(batch, &batch->command,
483 crocus_use_bo(batch, batch->command.bo, false);
488 crocus_use_bo(batch, batch->ice->workaround_bo, false);
490 recreate_growing_buffer(batch, &batch->state,
494 batch->state.used = 1;
495 crocus_use_bo(batch, batch->state.bo, false);
499 crocus_batch_maybe_noop(struct crocus_batch *batch)
501 /* We only insert the NOOP at the beginning of the batch. */
502 assert(crocus_batch_bytes_used(batch) == 0);
504 if (batch->noop_enabled) {
508 uint32_t *map = batch->command.map_next;
512 batch->command.map_next += 4;
517 crocus_batch_reset(struct crocus_batch *batch)
519 struct crocus_screen *screen = batch->screen;
521 crocus_bo_unreference(batch->command.bo);
522 crocus_bo_unreference(batch->state.bo);
523 batch->primary_batch_size = 0;
524 batch->contains_draw = false;
525 batch->contains_fence_signal = false;
526 batch->state_base_address_emitted = false;
527 batch->screen->vtbl.batch_reset_dirty(batch);
529 create_batch(batch);
530 assert(batch->command.bo->index == 0);
532 if (batch->state_sizes)
533 _mesa_hash_table_u64_clear(batch->state_sizes);
535 crocus_batch_add_syncobj(batch, syncobj, I915_EXEC_FENCE_SIGNAL);
538 crocus_cache_sets_clear(batch);
542 crocus_batch_free(struct crocus_batch *batch)
544 struct crocus_screen *screen = batch->screen;
547 if (batch->use_shadow_copy) {
548 free(batch->command.map);
549 free(batch->state.map);
552 for (int i = 0; i < batch->exec_count; i++) {
553 crocus_bo_unreference(batch->exec_bos[i]);
556 pipe_resource_reference(&batch->fine_fences.ref.res, NULL);
558 free(batch->command.relocs.relocs);
559 free(batch->state.relocs.relocs);
560 free(batch->exec_bos);
561 free(batch->validation_list);
563 ralloc_free(batch->exec_fences.mem_ctx);
565 util_dynarray_foreach(&batch->syncobjs, struct crocus_syncobj *, s)
567 ralloc_free(batch->syncobjs.mem_ctx);
569 crocus_fine_fence_reference(batch->screen, &batch->last_fence, NULL);
570 if (batch_has_fine_fence(batch))
571 u_upload_destroy(batch->fine_fences.uploader);
573 crocus_bo_unreference(batch->command.bo);
574 crocus_bo_unreference(batch->state.bo);
575 batch->command.bo = NULL;
576 batch->command.map = NULL;
577 batch->command.map_next = NULL;
579 crocus_destroy_hw_context(bufmgr, batch->hw_ctx_id);
581 _mesa_hash_table_destroy(batch->cache.render, NULL);
582 _mesa_set_destroy(batch->cache.depth, NULL);
584 if (batch->state_sizes) {
585 _mesa_hash_table_u64_destroy(batch->state_sizes);
586 intel_batch_decode_ctx_finish(&batch->decoder);
591 * If we've chained to a secondary batch, or are getting near to the end,
595 crocus_batch_maybe_flush(struct crocus_batch *batch, unsigned estimate)
597 if (batch->command.bo != batch->exec_bos[0] ||
598 crocus_batch_bytes_used(batch) + estimate >= BATCH_SZ) {
599 crocus_batch_flush(batch);
604 * Finish copying the old batch/state buffer's contents to the new one
624 crocus_grow_buffer(struct crocus_batch *batch, bool grow_state,
628 struct crocus_screen *screen = batch->screen;
630 struct crocus_growing_bo *grow = grow_state ? &batch->state : &batch->command;
646 if (batch->use_shadow_copy) {
674 assert(bo->index < batch->exec_count);
675 assert(batch->exec_bos[bo->index] == bo);
678 batch->validation_list[bo->index].handle = new_bo->gem_handle;
684 * and then creates a brw_address pointing to brw->batch.state.bo.
689 * If we replace the brw->batch.state.bo pointer at step 2, we would
698 * brw->batch.batch.bo. If we replaced the batch pointer when growing,
700 * new BO. Otherwise, it would refer to a "batch" that never actually
708 * We also defer the memcpy of the existing batch's contents. Callers
711 * we finally submit the batch, at which point we've finished uploading
733 finish_seqno(struct crocus_batch *batch)
735 struct crocus_fine_fence *sq = crocus_fine_fence_new(batch, CROCUS_FENCE_END);
739 crocus_fine_fence_reference(batch->screen, &batch->last_fence, sq);
740 crocus_fine_fence_reference(batch->screen, &sq, NULL);
744 * Terminate a batch with MI_BATCH_BUFFER_END.
747 crocus_finish_batch(struct crocus_batch *batch)
750 batch->no_wrap = true;
751 if (batch->screen->vtbl.finish_batch)
752 batch->screen->vtbl.finish_batch(batch);
754 finish_seqno(batch);
756 /* Emit MI_BATCH_BUFFER_END to finish our batch. */
757 uint32_t *map = batch->command.map_next;
761 batch->command.map_next += 4;
762 VG(VALGRIND_CHECK_MEM_IS_DEFINED(batch->command.map, crocus_batch_bytes_used(batch)));
764 if (batch->command.bo == batch->exec_bos[0])
765 batch->primary_batch_size = crocus_batch_bytes_used(batch);
766 batch->no_wrap = false;
773 replace_hw_ctx(struct crocus_batch *batch)
775 struct crocus_screen *screen = batch->screen;
778 uint32_t new_ctx = crocus_clone_hw_context(bufmgr, batch->hw_ctx_id);
782 crocus_destroy_hw_context(bufmgr, batch->hw_ctx_id);
783 batch->hw_ctx_id = new_ctx;
786 crocus_lost_context_state(batch);
792 crocus_batch_check_for_reset(struct crocus_batch *batch)
794 struct crocus_screen *screen = batch->screen;
796 struct drm_i915_reset_stats stats = { .ctx_id = batch->hw_ctx_id };
802 /* A reset was observed while a batch from this hardware context was
807 /* A reset was observed while a batch from this context was in progress,
808 * but the batch was not executing. In this case, assume that the
819 replace_hw_ctx(batch);
826 * Submit the batch to the GPU via execbuffer2.
829 submit_batch(struct crocus_batch *batch)
832 if (batch->use_shadow_copy) {
833 void *bo_map = crocus_bo_map(batch->dbg, batch->command.bo, MAP_WRITE);
834 memcpy(bo_map, batch->command.map, crocus_batch_bytes_used(batch));
836 bo_map = crocus_bo_map(batch->dbg, batch->state.bo, MAP_WRITE);
837 memcpy(bo_map, batch->state.map, batch->state.used);
840 crocus_bo_unmap(batch->command.bo);
841 crocus_bo_unmap(batch->state.bo);
849 * Any render targets written to in the batch must be flagged with
856 const unsigned state_index = batch->state.bo->index;
857 if (state_index < batch->exec_count &&
858 batch->exec_bos[state_index] == batch->state.bo) {
860 &batch->validation_list[state_index];
861 assert(entry->handle == batch->state.bo->gem_handle);
862 entry->relocation_count = batch->state.relocs.reloc_count;
863 entry->relocs_ptr = (uintptr_t)batch->state.relocs.relocs;
867 struct drm_i915_gem_exec_object2 *entry = &batch->validation_list[0];
868 assert(entry->handle == batch->command.bo->gem_handle);
869 entry->relocation_count = batch->command.relocs.reloc_count;
870 entry->relocs_ptr = (uintptr_t)batch->command.relocs.relocs;
873 .buffers_ptr = (uintptr_t)batch->validation_list,
874 .buffer_count = batch->exec_count,
877 .batch_len = ALIGN(batch->primary_batch_size, 8),
882 .rsvd1 = batch->hw_ctx_id, /* rsvd1 is actually the context ID */
885 if (num_fences(batch)) {
887 execbuf.num_cliprects = num_fences(batch);
889 (uintptr_t)util_dynarray_begin(&batch->exec_fences);
893 if (!batch->screen->devinfo.no_hw &&
894 intel_ioctl(batch->screen->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf))
897 for (int i = 0; i < batch->exec_count; i++) {
898 struct crocus_bo *bo = batch->exec_bos[i];
904 if (batch->validation_list[i].offset != bo->gtt_offset) {
907 (uint64_t)batch->validation_list[i].offset);
909 bo->gtt_offset = batch->validation_list[i].offset;
927 * Flush the batch buffer, submitting it to the GPU and resetting it so
928 * we're ready to emit the next batch.
937 _crocus_batch_flush(struct crocus_batch *batch, const char *file, int line)
939 struct crocus_screen *screen = batch->screen;
942 if (crocus_batch_bytes_used(batch) == 0 && !batch->contains_fence_signal)
945 assert(!batch->no_wrap);
946 crocus_finish_batch(batch);
948 finish_growing_bos(&batch->command);
949 finish_growing_bos(&batch->state);
950 int ret = submit_batch(batch);
953 int bytes_for_commands = crocus_batch_bytes_used(batch);
955 if (batch->command.bo != batch->exec_bos[0]) {
957 bytes_for_commands += batch->primary_batch_size;
959 fprintf(stderr, "%19s:%-3d: %s batch [%u] flush with %5d+%5db (%0.1f%%) "
962 file, line, batch_name_to_string(batch->name), batch->hw_ctx_id,
963 batch->primary_batch_size, second_bytes,
965 batch->exec_count,
966 (float) batch->aperture_space / (1024 * 1024),
967 batch->command.relocs.reloc_count,
968 batch->state.relocs.reloc_count);
971 dump_fence_list(batch);
972 dump_validation_list(batch);
976 decode_batch(batch);
980 for (int i = 0; i < batch->exec_count; i++) {
981 struct crocus_bo *bo = batch->exec_bos[i];
985 batch->command.relocs.reloc_count = 0;
986 batch->state.relocs.reloc_count = 0;
987 batch->exec_count = 0;
988 batch->aperture_space = 0;
990 util_dynarray_foreach(&batch->syncobjs, struct crocus_syncobj *, s)
992 util_dynarray_clear(&batch->syncobjs);
994 util_dynarray_clear(&batch->exec_fences);
998 crocus_bo_wait_rendering(batch->command.bo); /* if execbuf failed; this is a nop */
1001 /* Start a new batch buffer. */
1002 crocus_batch_reset(batch);
1009 if (ret == -EIO && replace_hw_ctx(batch)) {
1010 if (batch->reset->reset) {
1012 batch->reset->reset(batch->reset->data, PIPE_GUILTY_CONTEXT_RESET);
1029 * Does the current batch refer to the given BO?
1031 * (In other words, is the BO in the current batch's validation list?)
1034 crocus_batch_references(struct crocus_batch *batch, struct crocus_bo *bo)
1036 return find_validation_entry(batch, bo) != NULL;
1044 crocus_batch_prepare_noop(struct crocus_batch *batch, bool noop_enable)
1046 if (batch->noop_enabled == noop_enable)
1049 batch->noop_enabled = noop_enable;
1051 crocus_batch_flush(batch);
1053 /* If the batch was empty, flush had no effect, so insert our noop. */
1054 if (crocus_batch_bytes_used(batch) == 0)
1055 crocus_batch_maybe_noop(batch);
1060 return !batch->noop_enabled;