Lines Matching refs:job
29 v3dv_job_add_bo(struct v3dv_job *job, struct v3dv_bo *bo)
34 if (job->bo_handle_mask & bo->handle_bit) {
35 if (_mesa_set_search(job->bos, bo))
39 _mesa_set_add(job->bos, bo);
40 job->bo_count++;
41 job->bo_handle_mask |= bo->handle_bit;
45 v3dv_job_add_bo_unchecked(struct v3dv_job *job, struct v3dv_bo *bo)
48 _mesa_set_add(job->bos, bo);
49 job->bo_count++;
50 job->bo_handle_mask |= bo->handle_bit;
109 job_destroy_gpu_cl_resources(struct v3dv_job *job)
111 assert(job->type == V3DV_JOB_TYPE_GPU_CL ||
112 job->type == V3DV_JOB_TYPE_GPU_CL_SECONDARY);
114 v3dv_cl_destroy(&job->bcl);
115 v3dv_cl_destroy(&job->rcl);
116 v3dv_cl_destroy(&job->indirect);
122 _mesa_set_destroy(job->bos, NULL);
124 v3dv_bo_free(job->device, job->tile_alloc);
125 v3dv_bo_free(job->device, job->tile_state);
129 job_destroy_cloned_gpu_cl_resources(struct v3dv_job *job)
131 assert(job->type == V3DV_JOB_TYPE_GPU_CL);
133 list_for_each_entry_safe(struct v3dv_bo, bo, &job->bcl.bo_list, list_link) {
135 vk_free(&job->device->vk.alloc, bo);
138 list_for_each_entry_safe(struct v3dv_bo, bo, &job->rcl.bo_list, list_link) {
140 vk_free(&job->device->vk.alloc, bo);
143 list_for_each_entry_safe(struct v3dv_bo, bo, &job->indirect.bo_list, list_link) {
145 vk_free(&job->device->vk.alloc, bo);
150 job_destroy_gpu_csd_resources(struct v3dv_job *job)
152 assert(job->type == V3DV_JOB_TYPE_GPU_CSD);
153 assert(job->cmd_buffer);
155 v3dv_cl_destroy(&job->indirect);
157 _mesa_set_destroy(job->bos, NULL);
159 if (job->csd.shared_memory)
160 v3dv_bo_free(job->device, job->csd.shared_memory);
164 job_destroy_cpu_wait_events_resources(struct v3dv_job *job)
166 assert(job->type == V3DV_JOB_TYPE_CPU_WAIT_EVENTS);
167 assert(job->cmd_buffer);
168 vk_free(&job->cmd_buffer->device->vk.alloc, job->cpu.event_wait.events);
172 v3dv_job_destroy(struct v3dv_job *job)
174 assert(job);
176 list_del(&job->list_link);
182 if (!job->is_clone) {
183 switch (job->type) {
186 job_destroy_gpu_cl_resources(job);
189 job_destroy_gpu_csd_resources(job);
192 job_destroy_cpu_wait_events_resources(job);
199 if (job->type == V3DV_JOB_TYPE_GPU_CL)
200 job_destroy_cloned_gpu_cl_resources(job);
203 vk_free(&job->device->vk.alloc, job);
240 list_for_each_entry_safe(struct v3dv_job, job,
242 v3dv_job_destroy(job);
245 if (cmd_buffer->state.job)
246 v3dv_job_destroy(cmd_buffer->state.job);
292 if (!cmd_buffer->state.job)
295 if (cmd_buffer->state.job->always_flush)
301 /* Each render pass starts a new job */
305 /* Two subpasses can be merged in the same job if we can emit a single RCL
307 * triggers the "render job finished" interrupt). We can do this so long
350 * Computes and sets the job frame tiling information required to setup frame
354 job_compute_frame_tiling(struct v3dv_job *job,
362 assert(job);
363 struct v3dv_frame_tiling *tiling = &job->frame_tiling;
416 v3dv_job_start_frame(struct v3dv_job *job,
425 assert(job);
427 /* Start by computing frame tiling spec for this job */
429 job_compute_frame_tiling(job,
433 v3dv_cl_ensure_space_with_branch(&job->bcl, 256);
434 v3dv_return_if_oom(NULL, job);
468 job->tile_alloc = v3dv_bo_alloc(job->device, tile_alloc_size,
470 if (!job->tile_alloc) {
471 v3dv_flag_oom(NULL, job);
475 v3dv_job_add_bo_unchecked(job, job->tile_alloc);
482 job->tile_state = v3dv_bo_alloc(job->device, tile_state_size, "TSDA", true);
483 if (!job->tile_state) {
484 v3dv_flag_oom(NULL, job);
488 v3dv_job_add_bo_unchecked(job, job->tile_state);
490 v3dv_X(job->device, job_emit_binning_prolog)(job, tiling, layers);
492 job->ez_state = V3D_EZ_UNDECIDED;
493 job->first_ez_state = V3D_EZ_UNDECIDED;
499 assert(cmd_buffer->state.job);
501 /* Typically, we have a single job for each subpass and we emit the job's RCL
503 * such as vkCmdClearAttachments need to run in their own separate job and
506 * those jobs, so we only emit the subpass RCL if the job has not recorded
509 if (v3dv_cl_offset(&cmd_buffer->state.job->rcl) == 0)
512 v3dv_X(cmd_buffer->device, job_emit_binning_flush)(cmd_buffer->state.job);
521 struct v3dv_job *job = vk_zalloc(&device->vk.alloc,
524 if (!job) {
529 v3dv_job_init(job, type, device, cmd_buffer, subpass_idx);
530 return job;
542 struct v3dv_job *job =
548 job->cpu.query_end = state->query.end.states[i];
549 list_addtail(&job->list_link, &cmd_buffer->jobs);
557 struct v3dv_job *job = cmd_buffer->state.job;
558 if (!job)
561 /* Always clear BCL state after a job has been finished if we don't have
565 * job actually required a binning sync.
573 v3dv_job_destroy(job);
574 cmd_buffer->state.job = NULL;
578 /* If we have created a job for a command buffer then we should have
579 * recorded something into it: if the job was started in a render pass, it
585 v3dv_cl_offset(&job->bcl) > 0);
587 /* When we merge multiple subpasses into the same job we must only emit one
588 * RCL, so we do that here, when we decided that we need to finish the job.
592 assert(v3dv_cl_offset(&job->rcl) != 0 || cmd_buffer->state.pass);
594 /* If we are finishing a job inside a render pass we have two scenarios:
596 * 1. It is a regular CL, in which case we will submit the job to the GPU,
603 * will be the primary job that branches to this CL.
606 if (job->type == V3DV_JOB_TYPE_GPU_CL) {
609 assert(job->type == V3DV_JOB_TYPE_GPU_CL_SECONDARY);
614 list_addtail(&job->list_link, &cmd_buffer->jobs);
615 cmd_buffer->state.job = NULL;
617 /* If we have recorded any state with this last GPU job that requires to
618 * emit CPU jobs after the job is completed, add them now. The only
621 * job into which we execute the secondary.
630 v3dv_job_type_is_gpu(struct v3dv_job *job)
632 switch (job->type) {
645 struct v3dv_job *job)
647 assert(cmd_buffer && job);
652 if (!v3dv_job_type_is_gpu(job))
661 if (job->type == V3DV_JOB_TYPE_GPU_CSD) {
662 assert(!job->is_transfer);
665 } else if (job->is_transfer) {
666 assert(job->type == V3DV_JOB_TYPE_GPU_CL ||
667 job->type == V3DV_JOB_TYPE_GPU_CL_SECONDARY ||
668 job->type == V3DV_JOB_TYPE_GPU_TFU);
672 assert(job->type == V3DV_JOB_TYPE_GPU_CL ||
673 job->type == V3DV_JOB_TYPE_GPU_CL_SECONDARY);
679 job->serialize = *src_mask;
686 v3dv_job_init(struct v3dv_job *job,
692 assert(job);
694 /* Make sure we haven't made this new job current before calling here */
695 assert(!cmd_buffer || cmd_buffer->state.job != job);
697 job->type = type;
699 job->device = device;
700 job->cmd_buffer = cmd_buffer;
702 list_inithead(&job->list_link);
707 job->bos =
709 job->bo_count = 0;
711 v3dv_cl_init(job, &job->indirect);
714 job->always_flush = true;
719 v3dv_cl_init(job, &job->bcl);
720 v3dv_cl_init(job, &job->rcl);
725 * new job.
739 /* Keep track of the first subpass that we are recording in this new job.
744 job->first_subpass = subpass_idx;
746 job->is_transfer = cmd_buffer->state.is_transfer;
748 cmd_buffer_serialize_job_if_needed(cmd_buffer, job);
750 job->perf = cmd_buffer->state.query.active_query.perf;
759 /* Don't create a new job if we can merge the current subpass into
760 * the current job.
765 cmd_buffer->state.job->is_subpass_finish = false;
766 return cmd_buffer->state.job;
769 /* Ensure we are not starting a new job without finishing a previous one */
770 if (cmd_buffer->state.job != NULL)
773 assert(cmd_buffer->state.job == NULL);
774 struct v3dv_job *job = vk_zalloc(&cmd_buffer->device->vk.alloc,
778 if (!job) {
779 fprintf(stderr, "Error: failed to allocate CPU memory for job\n");
784 v3dv_job_init(job, type, cmd_buffer->device, cmd_buffer, subpass_idx);
785 cmd_buffer->state.job = job;
787 return job;
858 * that we are no longer in a subpass by finishing the current job and
862 if (cmd_buffer->state.job)
956 * so we want to create a job for them here.
958 struct v3dv_job *job =
961 if (!job) {
1413 /* Starting a new job can trigger a finish of the current one, so don't
1414 * change the command buffer state for the new job until we are done creating
1415 * the new job.
1417 struct v3dv_job *job =
1419 if (!job)
1424 /* If we are starting a new job we need to setup binning. We only do this
1430 job->first_subpass == state->subpass_idx) {
1438 v3dv_X(job->device, framebuffer_compute_internal_bpp_msaa)
1457 v3dv_job_start_frame(job,
1467 return job;
1477 struct v3dv_job *job =
1480 if (!job)
1505 return job;
1515 struct v3dv_job *job;
1517 job = cmd_buffer_subpass_create_job(cmd_buffer, subpass_idx,
1521 job = cmd_buffer_subpass_create_job(cmd_buffer, subpass_idx,
1525 if (!job)
1528 job->is_subpass_continue = true;
1530 return job;
1536 /* We can end up here without a job if the last command recorded into the
1537 * subpass already finished the job (for example a pipeline barrier). In
1541 struct v3dv_job *job = cmd_buffer->state.job;
1542 if (job)
1543 job->is_subpass_finish = true;
1578 * finishing a recording job is when we are recording a secondary
1581 if (cmd_buffer->state.job) {
1614 /* Clones a job for inclusion in the given command buffer. Note that this
1615 * doesn't make a deep copy so the cloned job it doesn't own any resources.
1616 * Useful when we need to have a job in more than one list, which happens
1621 v3dv_job_clone_in_cmd_buffer(struct v3dv_job *job,
1624 struct v3dv_job *clone_job = vk_alloc(&job->device->vk.alloc,
1633 *clone_job = *job;
1639 * cloned job. Otherwise functions like list_length() will loop forever.
1641 if (job->type == V3DV_JOB_TYPE_GPU_CL) {
1642 clone_bo_list(cmd_buffer, &clone_job->bcl.bo_list, &job->bcl.bo_list);
1643 clone_bo_list(cmd_buffer, &clone_job->rcl.bo_list, &job->rcl.bo_list);
1645 &job->indirect.bo_list);
1679 * their job list into the primary's. However, because they may be
1681 * single list_link in each job, we can't just add then to the primary's
1682 * job list and we instead have to clone them first.
1684 * Alternatively, we could create a "execute secondary" CPU job that
1694 struct v3dv_job *job = v3dv_job_clone_in_cmd_buffer(secondary_job, primary);
1695 if (!job)
1702 job->serialize = pending_barrier.src_mask_graphics |
1707 job->needs_bcl_sync = true;
1714 * barrier state consumed with whatever comes after it (first job in
2063 (cmd_buffer->state.job, &cmd_buffer->state.clip_window);
2266 * operation in its own job (possibly with an RT config that is
2268 * after it requires that we create a new job with the subpass RT setup.
2323 struct v3dv_job *job = cmd_buffer->state.job;
2324 assert(job);
2326 /* If the job has been flagged with 'always_flush' and it has already
2327 * recorded any draw calls then we need to start a new job for it.
2329 if (job->always_flush && job->draw_count > 0) {
2331 /* First, flag the current job as not being the last in the
2334 job->is_subpass_finish = false;
2336 /* Now start a new job in the same subpass and flag it as continuing
2339 job = v3dv_cmd_buffer_subpass_resume(cmd_buffer,
2341 assert(job->draw_count == 0);
2344 job->always_flush = true;
2347 assert(job->draw_count == 0 || !job->always_flush);
2348 return job;
2366 * the job won't be valid in the scenario described by the spec.
2369 * we are in that scenario, in which case, it will restart the current job
2375 assert(cmd_buffer->state.job);
2381 if (cmd_buffer->state.job->draw_count > 0)
2388 cmd_buffer->state.job->frame_tiling.msaa) {
2397 * restart the primary job into which they are being recorded.
2402 /* Drop the current job and restart it with MSAA enabled */
2403 struct v3dv_job *old_job = cmd_buffer->state.job;
2404 cmd_buffer->state.job = NULL;
2406 struct v3dv_job *job = vk_zalloc(&cmd_buffer->device->vk.alloc,
2409 if (!job) {
2414 v3dv_job_init(job, V3DV_JOB_TYPE_GPU_CL, cmd_buffer->device, cmd_buffer,
2416 cmd_buffer->state.job = job;
2418 v3dv_job_start_frame(job,
2509 consume_bcl_sync(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_job *job)
2511 job->needs_bcl_sync = true;
2524 * an active job. In that case, create a new job continuing the current
2527 if (!cmd_buffer->state.job) {
2532 /* Restart single sample job for MSAA pipeline if needed */
2535 /* If the job is configured to flush on every draw call we need to create
2536 * a new job now.
2538 struct v3dv_job *job = cmd_buffer_pre_draw_split_job(cmd_buffer);
2539 job->draw_count++;
2541 /* Track VK_KHR_buffer_device_address usage in the job */
2543 job->uses_buffer_device_address |= pipeline->uses_buffer_device_address;
2545 /* If this job is serialized (has consumed a barrier) then check if we need
2549 if (job->serialize && (cmd_buffer->state.barrier.bcl_buffer_access ||
2551 assert(!job->needs_bcl_sync);
2554 consume_bcl_sync(cmd_buffer, job);
2804 /* If we have a recording job, finish it here */
2805 struct v3dv_job *job = cmd_buffer->state.job;
2806 if (job)
3156 * be in the middle of job recording.
3159 assert(cmd_buffer->state.job == NULL);
3164 struct v3dv_job *job =
3170 job->cpu.query_reset.pool = pool;
3171 job->cpu.query_reset.first = first;
3172 job->cpu.query_reset.count = count;
3174 list_addtail(&job->list_link, &cmd_buffer->jobs);
3249 * job to flag all of these queries as possibly available right after the
3250 * render pass job in which they have been recorded.
3287 /* Otherwise, schedule the CPU job immediately */
3288 struct v3dv_job *job =
3294 job->cpu.query_end.pool = pool;
3295 job->cpu.query_end.query = query;
3298 job->cpu.query_end.count = 1;
3300 list_addtail(&job->list_link, &cmd_buffer->jobs);
3364 * be in the middle of job recording.
3367 assert(cmd_buffer->state.job == NULL);
3372 struct v3dv_job *job =
3378 job->cpu.query_copy_results.pool = pool;
3379 job->cpu.query_copy_results.first = first;
3380 job->cpu.query_copy_results.count = count;
3381 job->cpu.query_copy_results.dst = dst;
3382 job->cpu.query_copy_results.offset = offset;
3383 job->cpu.query_copy_results.stride = stride;
3384 job->cpu.query_copy_results.flags = flags;
3386 list_addtail(&job->list_link, &cmd_buffer->jobs);
3394 struct v3dv_job *job = vk_zalloc(&device->vk.alloc,
3397 if (!job) {
3402 v3dv_job_init(job, V3DV_JOB_TYPE_GPU_TFU, device, cmd_buffer, -1);
3403 job->tfu = *tfu;
3404 list_addtail(&job->list_link, &cmd_buffer->jobs);
3416 * should not be in the middle of job recording.
3419 assert(cmd_buffer->state.job == NULL);
3421 struct v3dv_job *job =
3427 job->cpu.event_set.event = event;
3428 job->cpu.event_set.state = 1;
3430 list_addtail(&job->list_link, &cmd_buffer->jobs);
3442 * should not be in the middle of job recording.
3445 assert(cmd_buffer->state.job == NULL);
3447 struct v3dv_job *job =
3453 job->cpu.event_set.event = event;
3454 job->cpu.event_set.state = 0;
3456 list_addtail(&job->list_link, &cmd_buffer->jobs);
3476 struct v3dv_job *job =
3484 job->cpu.event_wait.events =
3487 if (!job->cpu.event_wait.events) {
3491 job->cpu.event_wait.event_count = eventCount;
3494 job->cpu.event_wait.events[i] = v3dv_event_from_handle(pEvents[i]);
3497 * an active job.
3500 * inside a render pass, it is safe to move the wait job so it happens right
3501 * before the current job we are currently recording for the subpass, if any
3505 * If we are outside a render pass then we should not have any on-going job
3506 * and we are free to just add the wait job without restrictions.
3508 assert(cmd_buffer->state.pass || !cmd_buffer->state.job);
3509 list_addtail(&job->list_link, &cmd_buffer->jobs);
3522 * job here...
3528 struct v3dv_job *job =
3534 job->cpu.query_timestamp.pool = query_pool;
3535 job->cpu.query_timestamp.query = query;
3538 job->cpu.query_timestamp.count = 1;
3542 job->cpu.query_timestamp.count = util_bitcount(subpass->view_mask);
3545 list_addtail(&job->list_link, &cmd_buffer->jobs);
3546 cmd_buffer->state.job = NULL;
3588 struct v3dv_job *job = info->csd_job;
3590 assert(job->type == V3DV_JOB_TYPE_GPU_CSD);
3593 struct drm_v3d_submit_csd *submit = &job->csd.submit;
3595 job->csd.wg_count[0] = wg_counts[0];
3596 job->csd.wg_count[1] = wg_counts[1];
3597 job->csd.wg_count[2] = wg_counts[2];
3609 * job, since we are about to overwrite some of the uniform data.
3611 v3dv_bo_wait(job->device, job->indirect.bo, PIPE_TIMEOUT_INFINITE);
3618 assert(info->wg_uniform_offsets[i] >= (uint32_t *) job->indirect.base);
3619 assert(info->wg_uniform_offsets[i] < (uint32_t *) job->indirect.next);
3642 struct v3dv_job *job = vk_zalloc(&cmd_buffer->device->vk.alloc,
3645 if (!job) {
3650 v3dv_job_init(job, V3DV_JOB_TYPE_GPU_CSD, cmd_buffer->device, cmd_buffer, -1);
3651 cmd_buffer->state.job = job;
3653 struct drm_v3d_submit_csd *submit = &job->csd.submit;
3655 job->csd.wg_count[0] = group_count_x;
3656 job->csd.wg_count[1] = group_count_y;
3657 job->csd.wg_count[2] = group_count_z;
3659 job->csd.wg_base[0] = base_offset_x;
3660 job->csd.wg_base[1] = base_offset_y;
3661 job->csd.wg_base[2] = base_offset_z;
3709 job->csd.shared_memory =
3713 if (!job->csd.shared_memory) {
3715 return job;
3719 v3dv_job_add_bo_unchecked(job, cs_assembly_bo);
3727 /* Track VK_KHR_buffer_device_address usage in the job */
3728 job->uses_buffer_device_address |= pipeline->uses_buffer_device_address;
3730 v3dv_job_add_bo(job, uniforms.bo);
3732 return job;
3747 struct v3dv_job *job =
3757 list_addtail(&job->list_link, &cmd_buffer->jobs);
3758 cmd_buffer->state.job = NULL;
3797 /* We can't do indirect dispatches, so instead we record a CPU job that,
3801 struct v3dv_job *job =
3807 /* We need to create a CSD job now, even if we still don't know the actual
3808 * dispatch parameters, because the job setup needs to be done using the
3810 * constants, etc.). So we create the job with default dispatch parameters
3812 * parameters don't match the ones we used to setup the job.
3818 &job->cpu.csd_indirect.wg_uniform_offsets[0],
3819 &job->cpu.csd_indirect.wg_size);
3823 job->cpu.csd_indirect.buffer = buffer;
3824 job->cpu.csd_indirect.offset = offset;
3825 job->cpu.csd_indirect.csd_job = csd_job;
3830 job->cpu.csd_indirect.needs_wg_uniform_rewrite =
3831 job->cpu.csd_indirect.wg_uniform_offsets[0] ||
3832 job->cpu.csd_indirect.wg_uniform_offsets[1] ||
3833 job->cpu.csd_indirect.wg_uniform_offsets[2];
3835 list_addtail(&job->list_link, &cmd_buffer->jobs);
3837 cmd_buffer->state.job = NULL;