v3dv_cmd_buffer.c - OpenGrok cross reference for /third_party/mesa3d/src/broadcom/vulkan/v3dv_cmd

Lines Matching refs:job
29 v3dv_job_add_bo(struct v3dv_job *job, struct v3dv_bo *bo)
34    if (job->bo_handle_mask & bo->handle_bit) {
35       if (_mesa_set_search(job->bos, bo))
39    _mesa_set_add(job->bos, bo);
40    job->bo_count++;
41    job->bo_handle_mask |= bo->handle_bit;
45 v3dv_job_add_bo_unchecked(struct v3dv_job *job, struct v3dv_bo *bo)
48    _mesa_set_add(job->bos, bo);
49    job->bo_count++;
50    job->bo_handle_mask |= bo->handle_bit;
109 job_destroy_gpu_cl_resources(struct v3dv_job *job)
111    assert(job->type == V3DV_JOB_TYPE_GPU_CL ||
112           job->type == V3DV_JOB_TYPE_GPU_CL_SECONDARY);
114    v3dv_cl_destroy(&job->bcl);
115    v3dv_cl_destroy(&job->rcl);
116    v3dv_cl_destroy(&job->indirect);
122    _mesa_set_destroy(job->bos, NULL);
124    v3dv_bo_free(job->device, job->tile_alloc);
125    v3dv_bo_free(job->device, job->tile_state);
129 job_destroy_cloned_gpu_cl_resources(struct v3dv_job *job)
131    assert(job->type == V3DV_JOB_TYPE_GPU_CL);
133    list_for_each_entry_safe(struct v3dv_bo, bo, &job->bcl.bo_list, list_link) {
135       vk_free(&job->device->vk.alloc, bo);
138    list_for_each_entry_safe(struct v3dv_bo, bo, &job->rcl.bo_list, list_link) {
140       vk_free(&job->device->vk.alloc, bo);
143    list_for_each_entry_safe(struct v3dv_bo, bo, &job->indirect.bo_list, list_link) {
145       vk_free(&job->device->vk.alloc, bo);
150 job_destroy_gpu_csd_resources(struct v3dv_job *job)
152    assert(job->type == V3DV_JOB_TYPE_GPU_CSD);
153    assert(job->cmd_buffer);
155    v3dv_cl_destroy(&job->indirect);
157    _mesa_set_destroy(job->bos, NULL);
159    if (job->csd.shared_memory)
160       v3dv_bo_free(job->device, job->csd.shared_memory);
164 job_destroy_cpu_wait_events_resources(struct v3dv_job *job)
166    assert(job->type == V3DV_JOB_TYPE_CPU_WAIT_EVENTS);
167    assert(job->cmd_buffer);
168    vk_free(&job->cmd_buffer->device->vk.alloc, job->cpu.event_wait.events);
172 v3dv_job_destroy(struct v3dv_job *job)
174    assert(job);
176    list_del(&job->list_link);
182    if (!job->is_clone) {
183       switch (job->type) {
186          job_destroy_gpu_cl_resources(job);
189          job_destroy_gpu_csd_resources(job);
192          job_destroy_cpu_wait_events_resources(job);
199       if (job->type == V3DV_JOB_TYPE_GPU_CL)
200          job_destroy_cloned_gpu_cl_resources(job);
203    vk_free(&job->device->vk.alloc, job);
240    list_for_each_entry_safe(struct v3dv_job, job,
242       v3dv_job_destroy(job);
245    if (cmd_buffer->state.job)
246       v3dv_job_destroy(cmd_buffer->state.job);
292    if (!cmd_buffer->state.job)
295    if (cmd_buffer->state.job->always_flush)
301    /* Each render pass starts a new job */
305    /* Two subpasses can be merged in the same job if we can emit a single RCL
307     * triggers the "render job finished" interrupt). We can do this so long
350  * Computes and sets the job frame tiling information required to setup frame
354 job_compute_frame_tiling(struct v3dv_job *job,
362    assert(job);
363    struct v3dv_frame_tiling *tiling = &job->frame_tiling;
416 v3dv_job_start_frame(struct v3dv_job *job,
425    assert(job);
427    /* Start by computing frame tiling spec for this job */
429       job_compute_frame_tiling(job,
433    v3dv_cl_ensure_space_with_branch(&job->bcl, 256);
434    v3dv_return_if_oom(NULL, job);
468    job->tile_alloc = v3dv_bo_alloc(job->device, tile_alloc_size,
470    if (!job->tile_alloc) {
471       v3dv_flag_oom(NULL, job);
475    v3dv_job_add_bo_unchecked(job, job->tile_alloc);
482    job->tile_state = v3dv_bo_alloc(job->device, tile_state_size, "TSDA", true);
483    if (!job->tile_state) {
484       v3dv_flag_oom(NULL, job);
488    v3dv_job_add_bo_unchecked(job, job->tile_state);
490    v3dv_X(job->device, job_emit_binning_prolog)(job, tiling, layers);
492    job->ez_state = V3D_EZ_UNDECIDED;
493    job->first_ez_state = V3D_EZ_UNDECIDED;
499    assert(cmd_buffer->state.job);
501    /* Typically, we have a single job for each subpass and we emit the job's RCL
503     * such as vkCmdClearAttachments need to run in their own separate job and
506     * those jobs, so we only emit the subpass RCL if the job has not recorded
509    if (v3dv_cl_offset(&cmd_buffer->state.job->rcl) == 0)
512    v3dv_X(cmd_buffer->device, job_emit_binning_flush)(cmd_buffer->state.job);
521    struct v3dv_job *job = vk_zalloc(&device->vk.alloc,
524    if (!job) {
529    v3dv_job_init(job, type, device, cmd_buffer, subpass_idx);
530    return job;
542          struct v3dv_job *job =
548          job->cpu.query_end = state->query.end.states[i];
549          list_addtail(&job->list_link, &cmd_buffer->jobs);
557    struct v3dv_job *job = cmd_buffer->state.job;
558    if (!job)
561    /* Always clear BCL state after a job has been finished if we don't have
565     * job actually required a binning sync.
573       v3dv_job_destroy(job);
574       cmd_buffer->state.job = NULL;
578    /* If we have created a job for a command buffer then we should have
579     * recorded something into it: if the job was started in a render pass, it
585           v3dv_cl_offset(&job->bcl) > 0);
587    /* When we merge multiple subpasses into the same job we must only emit one
588     * RCL, so we do that here, when we decided that we need to finish the job.
592    assert(v3dv_cl_offset(&job->rcl) != 0 || cmd_buffer->state.pass);
594    /* If we are finishing a job inside a render pass we have two scenarios:
596     * 1. It is a regular CL, in which case we will submit the job to the GPU,
603     *    will be the primary job that branches to this CL.
606       if (job->type == V3DV_JOB_TYPE_GPU_CL) {
609          assert(job->type == V3DV_JOB_TYPE_GPU_CL_SECONDARY);
614    list_addtail(&job->list_link, &cmd_buffer->jobs);
615    cmd_buffer->state.job = NULL;
617    /* If we have recorded any state with this last GPU job that requires to
618     * emit CPU jobs after the job is completed, add them now. The only
621     * job into which we execute the secondary.
630 v3dv_job_type_is_gpu(struct v3dv_job *job)
632    switch (job->type) {
645                                    struct v3dv_job *job)
647    assert(cmd_buffer && job);
652    if (!v3dv_job_type_is_gpu(job))
661    if (job->type == V3DV_JOB_TYPE_GPU_CSD) {
662       assert(!job->is_transfer);
665    } else if (job->is_transfer) {
666       assert(job->type == V3DV_JOB_TYPE_GPU_CL ||
667              job->type == V3DV_JOB_TYPE_GPU_CL_SECONDARY ||
668              job->type == V3DV_JOB_TYPE_GPU_TFU);
672       assert(job->type == V3DV_JOB_TYPE_GPU_CL ||
673              job->type == V3DV_JOB_TYPE_GPU_CL_SECONDARY);
679       job->serialize = *src_mask;
686 v3dv_job_init(struct v3dv_job *job,
692    assert(job);
694    /* Make sure we haven't made this new job current before calling here */
695    assert(!cmd_buffer || cmd_buffer->state.job != job);
697    job->type = type;
699    job->device = device;
700    job->cmd_buffer = cmd_buffer;
702    list_inithead(&job->list_link);
707       job->bos =
709       job->bo_count = 0;
711       v3dv_cl_init(job, &job->indirect);
714          job->always_flush = true;
719       v3dv_cl_init(job, &job->bcl);
720       v3dv_cl_init(job, &job->rcl);
725        * new job.
739       /* Keep track of the first subpass that we are recording in this new job.
744          job->first_subpass = subpass_idx;
746       job->is_transfer = cmd_buffer->state.is_transfer;
748       cmd_buffer_serialize_job_if_needed(cmd_buffer, job);
750       job->perf = cmd_buffer->state.query.active_query.perf;
759    /* Don't create a new job if we can merge the current subpass into
760     * the current job.
765       cmd_buffer->state.job->is_subpass_finish = false;
766       return cmd_buffer->state.job;
769    /* Ensure we are not starting a new job without finishing a previous one */
770    if (cmd_buffer->state.job != NULL)
773    assert(cmd_buffer->state.job == NULL);
774    struct v3dv_job *job = vk_zalloc(&cmd_buffer->device->vk.alloc,
778    if (!job) {
779       fprintf(stderr, "Error: failed to allocate CPU memory for job\n");
784    v3dv_job_init(job, type, cmd_buffer->device, cmd_buffer, subpass_idx);
785    cmd_buffer->state.job = job;
787    return job;
858     * that we are no longer in a subpass by finishing the current job and
862    if (cmd_buffer->state.job)
956     * so we want to create a job for them here.
958    struct v3dv_job *job =
961    if (!job) {
1413    /* Starting a new job can trigger a finish of the current one, so don't
1414     * change the command buffer state for the new job until we are done creating
1415     * the new job.
1417    struct v3dv_job *job =
1419    if (!job)
1424    /* If we are starting a new job we need to setup binning. We only do this
1430        job->first_subpass == state->subpass_idx) {
1438       v3dv_X(job->device, framebuffer_compute_internal_bpp_msaa)
1457       v3dv_job_start_frame(job,
1467    return job;
1477    struct v3dv_job *job =
1480    if (!job)
1505    return job;
1515    struct v3dv_job *job;
1517       job = cmd_buffer_subpass_create_job(cmd_buffer, subpass_idx,
1521       job = cmd_buffer_subpass_create_job(cmd_buffer, subpass_idx,
1525    if (!job)
1528    job->is_subpass_continue = true;
1530    return job;
1536    /* We can end up here without a job if the last command recorded into the
1537     * subpass already finished the job (for example a pipeline barrier). In
1541    struct v3dv_job *job = cmd_buffer->state.job;
1542    if (job)
1543       job->is_subpass_finish = true;
1578     * finishing a recording job is when we are recording a secondary
1581    if (cmd_buffer->state.job) {
1614 /* Clones a job for inclusion in the given command buffer. Note that this
1615  * doesn't make a deep copy so the cloned job it doesn't own any resources.
1616  * Useful when we need to have a job in more than one list, which happens
1621 v3dv_job_clone_in_cmd_buffer(struct v3dv_job *job,
1624    struct v3dv_job *clone_job = vk_alloc(&job->device->vk.alloc,
1633    *clone_job = *job;
1639     * cloned job. Otherwise functions like list_length() will loop forever.
1641    if (job->type == V3DV_JOB_TYPE_GPU_CL) {
1642       clone_bo_list(cmd_buffer, &clone_job->bcl.bo_list, &job->bcl.bo_list);
1643       clone_bo_list(cmd_buffer, &clone_job->rcl.bo_list, &job->rcl.bo_list);
1645                     &job->indirect.bo_list);
1679        * their job list into the primary's. However, because they may be
1681        * single list_link in each job, we can't just add then to the primary's
1682        * job list and we instead have to clone them first.
1684        * Alternatively, we could create a "execute secondary" CPU job that
1694          struct v3dv_job *job = v3dv_job_clone_in_cmd_buffer(secondary_job, primary);
1695          if (!job)
1702             job->serialize = pending_barrier.src_mask_graphics |
1707                job->needs_bcl_sync = true;
1714        * barrier state consumed with whatever comes after it (first job in
2063       (cmd_buffer->state.job, &cmd_buffer->state.clip_window);
2266        * operation in its own job (possibly with an RT config that is
2268        * after it requires that we create a new job with the subpass RT setup.
2323    struct v3dv_job *job = cmd_buffer->state.job;
2324    assert(job);
2326    /* If the job has been flagged with 'always_flush' and it has already
2327     * recorded any draw calls then we need to start a new job for it.
2329    if (job->always_flush && job->draw_count > 0) {
2331       /* First, flag the current job as not being the last in the
2334       job->is_subpass_finish = false;
2336       /* Now start a new job in the same subpass and flag it as continuing
2339       job = v3dv_cmd_buffer_subpass_resume(cmd_buffer,
2341       assert(job->draw_count == 0);
2344       job->always_flush = true;
2347    assert(job->draw_count == 0 || !job->always_flush);
2348    return job;
2366  * the job won't be valid in the scenario described by the spec.
2369  * we are in that scenario, in which case, it will restart the current job
2375    assert(cmd_buffer->state.job);
2381    if (cmd_buffer->state.job->draw_count > 0)
2388        cmd_buffer->state.job->frame_tiling.msaa) {
2397     * restart the primary job into which they are being recorded.
2402    /* Drop the current job and restart it with MSAA enabled */
2403    struct v3dv_job *old_job = cmd_buffer->state.job;
2404    cmd_buffer->state.job = NULL;
2406    struct v3dv_job *job = vk_zalloc(&cmd_buffer->device->vk.alloc,
2409    if (!job) {
2414    v3dv_job_init(job, V3DV_JOB_TYPE_GPU_CL, cmd_buffer->device, cmd_buffer,
2416    cmd_buffer->state.job = job;
2418    v3dv_job_start_frame(job,
2509 consume_bcl_sync(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_job *job)
2511    job->needs_bcl_sync = true;
2524     * an active job. In that case, create a new job continuing the current
2527    if (!cmd_buffer->state.job) {
2532    /* Restart single sample job for MSAA pipeline if needed */
2535    /* If the job is configured to flush on every draw call we need to create
2536     * a new job now.
2538    struct v3dv_job *job = cmd_buffer_pre_draw_split_job(cmd_buffer);
2539    job->draw_count++;
2541    /* Track VK_KHR_buffer_device_address usage in the job */
2543    job->uses_buffer_device_address |= pipeline->uses_buffer_device_address;
2545    /* If this job is serialized (has consumed a barrier) then check if we need
2549    if (job->serialize && (cmd_buffer->state.barrier.bcl_buffer_access ||
2551       assert(!job->needs_bcl_sync);
2554          consume_bcl_sync(cmd_buffer, job);
2804    /* If we have a recording job, finish it here */
2805    struct v3dv_job *job = cmd_buffer->state.job;
2806    if (job)
3156     * be in the middle of job recording.
3159    assert(cmd_buffer->state.job == NULL);
3164    struct v3dv_job *job =
3170    job->cpu.query_reset.pool = pool;
3171    job->cpu.query_reset.first = first;
3172    job->cpu.query_reset.count = count;
3174    list_addtail(&job->list_link, &cmd_buffer->jobs);
3249        * job to flag all of these queries as possibly available right after the
3250        * render pass job in which they have been recorded.
3287       /* Otherwise, schedule the CPU job immediately */
3288       struct v3dv_job *job =
3294       job->cpu.query_end.pool = pool;
3295       job->cpu.query_end.query = query;
3298       job->cpu.query_end.count = 1;
3300       list_addtail(&job->list_link, &cmd_buffer->jobs);
3364     * be in the middle of job recording.
3367    assert(cmd_buffer->state.job == NULL);
3372    struct v3dv_job *job =
3378    job->cpu.query_copy_results.pool = pool;
3379    job->cpu.query_copy_results.first = first;
3380    job->cpu.query_copy_results.count = count;
3381    job->cpu.query_copy_results.dst = dst;
3382    job->cpu.query_copy_results.offset = offset;
3383    job->cpu.query_copy_results.stride = stride;
3384    job->cpu.query_copy_results.flags = flags;
3386    list_addtail(&job->list_link, &cmd_buffer->jobs);
3394    struct v3dv_job *job = vk_zalloc(&device->vk.alloc,
3397    if (!job) {
3402    v3dv_job_init(job, V3DV_JOB_TYPE_GPU_TFU, device, cmd_buffer, -1);
3403    job->tfu = *tfu;
3404    list_addtail(&job->list_link, &cmd_buffer->jobs);
3416     * should not be in the middle of job recording.
3419    assert(cmd_buffer->state.job == NULL);
3421    struct v3dv_job *job =
3427    job->cpu.event_set.event = event;
3428    job->cpu.event_set.state = 1;
3430    list_addtail(&job->list_link, &cmd_buffer->jobs);
3442     * should not be in the middle of job recording.
3445    assert(cmd_buffer->state.job == NULL);
3447    struct v3dv_job *job =
3453    job->cpu.event_set.event = event;
3454    job->cpu.event_set.state = 0;
3456    list_addtail(&job->list_link, &cmd_buffer->jobs);
3476    struct v3dv_job *job =
3484    job->cpu.event_wait.events =
3487    if (!job->cpu.event_wait.events) {
3491    job->cpu.event_wait.event_count = eventCount;
3494       job->cpu.event_wait.events[i] = v3dv_event_from_handle(pEvents[i]);
3497     * an active job.
3500     * inside a render pass, it is safe to move the wait job so it happens right
3501     * before the current job we are currently recording for the subpass, if any
3505     * If we are outside a render pass then we should not have any on-going job
3506     * and we are free to just add the wait job without restrictions.
3508    assert(cmd_buffer->state.pass || !cmd_buffer->state.job);
3509    list_addtail(&job->list_link, &cmd_buffer->jobs);
3522     * job here...
3528    struct v3dv_job *job =
3534    job->cpu.query_timestamp.pool = query_pool;
3535    job->cpu.query_timestamp.query = query;
3538       job->cpu.query_timestamp.count = 1;
3542       job->cpu.query_timestamp.count = util_bitcount(subpass->view_mask);
3545    list_addtail(&job->list_link, &cmd_buffer->jobs);
3546    cmd_buffer->state.job = NULL;
3588    struct v3dv_job *job = info->csd_job;
3590    assert(job->type == V3DV_JOB_TYPE_GPU_CSD);
3593    struct drm_v3d_submit_csd *submit = &job->csd.submit;
3595    job->csd.wg_count[0] = wg_counts[0];
3596    job->csd.wg_count[1] = wg_counts[1];
3597    job->csd.wg_count[2] = wg_counts[2];
3609        * job, since we are about to overwrite some of the uniform data.
3611       v3dv_bo_wait(job->device, job->indirect.bo, PIPE_TIMEOUT_INFINITE);
3618             assert(info->wg_uniform_offsets[i] >= (uint32_t *) job->indirect.base);
3619             assert(info->wg_uniform_offsets[i] < (uint32_t *) job->indirect.next);
3642    struct v3dv_job *job = vk_zalloc(&cmd_buffer->device->vk.alloc,
3645    if (!job) {
3650    v3dv_job_init(job, V3DV_JOB_TYPE_GPU_CSD, cmd_buffer->device, cmd_buffer, -1);
3651    cmd_buffer->state.job = job;
3653    struct drm_v3d_submit_csd *submit = &job->csd.submit;
3655    job->csd.wg_count[0] = group_count_x;
3656    job->csd.wg_count[1] = group_count_y;
3657    job->csd.wg_count[2] = group_count_z;
3659    job->csd.wg_base[0] = base_offset_x;
3660    job->csd.wg_base[1] = base_offset_y;
3661    job->csd.wg_base[2] = base_offset_z;
3709       job->csd.shared_memory =
3713       if (!job->csd.shared_memory) {
3715          return job;
3719    v3dv_job_add_bo_unchecked(job, cs_assembly_bo);
3727    /* Track VK_KHR_buffer_device_address usage in the job */
3728    job->uses_buffer_device_address |= pipeline->uses_buffer_device_address;
3730    v3dv_job_add_bo(job, uniforms.bo);
3732    return job;
3747    struct v3dv_job *job =
3757    list_addtail(&job->list_link, &cmd_buffer->jobs);
3758    cmd_buffer->state.job = NULL;
3797    /* We can't do indirect dispatches, so instead we record a CPU job that,
3801    struct v3dv_job *job =
3807    /* We need to create a CSD job now, even if we still don't know the actual
3808     * dispatch parameters, because the job setup needs to be done using the
3810     * constants, etc.). So we create the job with default dispatch parameters
3812     * parameters don't match the ones we used to setup the job.
3818                                 &job->cpu.csd_indirect.wg_uniform_offsets[0],
3819                                 &job->cpu.csd_indirect.wg_size);
3823    job->cpu.csd_indirect.buffer = buffer;
3824    job->cpu.csd_indirect.offset = offset;
3825    job->cpu.csd_indirect.csd_job = csd_job;
3830    job->cpu.csd_indirect.needs_wg_uniform_rewrite =
3831       job->cpu.csd_indirect.wg_uniform_offsets[0] ||
3832       job->cpu.csd_indirect.wg_uniform_offsets[1] ||
3833       job->cpu.csd_indirect.wg_uniform_offsets[2];
3835    list_addtail(&job->list_link, &cmd_buffer->jobs);
3837    cmd_buffer->state.job = NULL;