1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2019 Raspberry Pi Ltd 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include "v3dv_private.h" 25bf215546Sopenharmony_ci#include "drm-uapi/v3d_drm.h" 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_ci#include "broadcom/clif/clif_dump.h" 28bf215546Sopenharmony_ci#include "util/libsync.h" 29bf215546Sopenharmony_ci#include "util/os_time.h" 30bf215546Sopenharmony_ci#include "vk_drm_syncobj.h" 31bf215546Sopenharmony_ci 32bf215546Sopenharmony_ci#include <errno.h> 33bf215546Sopenharmony_ci#include <time.h> 34bf215546Sopenharmony_ci 35bf215546Sopenharmony_cistatic void 36bf215546Sopenharmony_civ3dv_clif_dump(struct v3dv_device *device, 37bf215546Sopenharmony_ci struct v3dv_job *job, 38bf215546Sopenharmony_ci struct drm_v3d_submit_cl *submit) 39bf215546Sopenharmony_ci{ 40bf215546Sopenharmony_ci if (!(unlikely(V3D_DEBUG & (V3D_DEBUG_CL | 41bf215546Sopenharmony_ci V3D_DEBUG_CL_NO_BIN | 42bf215546Sopenharmony_ci V3D_DEBUG_CLIF)))) 43bf215546Sopenharmony_ci return; 44bf215546Sopenharmony_ci 45bf215546Sopenharmony_ci struct clif_dump *clif = clif_dump_init(&device->devinfo, 46bf215546Sopenharmony_ci stderr, 47bf215546Sopenharmony_ci V3D_DEBUG & (V3D_DEBUG_CL | 48bf215546Sopenharmony_ci V3D_DEBUG_CL_NO_BIN), 49bf215546Sopenharmony_ci V3D_DEBUG & V3D_DEBUG_CL_NO_BIN); 50bf215546Sopenharmony_ci 51bf215546Sopenharmony_ci set_foreach(job->bos, entry) { 52bf215546Sopenharmony_ci struct v3dv_bo *bo = (void *)entry->key; 53bf215546Sopenharmony_ci char *name = ralloc_asprintf(NULL, "%s_0x%x", 54bf215546Sopenharmony_ci bo->name, bo->offset); 55bf215546Sopenharmony_ci 56bf215546Sopenharmony_ci bool ok = v3dv_bo_map(device, bo, bo->size); 57bf215546Sopenharmony_ci if (!ok) { 58bf215546Sopenharmony_ci fprintf(stderr, "failed to map BO for clif_dump.\n"); 59bf215546Sopenharmony_ci ralloc_free(name); 60bf215546Sopenharmony_ci goto free_clif; 61bf215546Sopenharmony_ci } 62bf215546Sopenharmony_ci clif_dump_add_bo(clif, name, bo->offset, bo->size, bo->map); 63bf215546Sopenharmony_ci 64bf215546Sopenharmony_ci ralloc_free(name); 65bf215546Sopenharmony_ci } 66bf215546Sopenharmony_ci 67bf215546Sopenharmony_ci clif_dump(clif, submit); 68bf215546Sopenharmony_ci 69bf215546Sopenharmony_ci free_clif: 70bf215546Sopenharmony_ci clif_dump_destroy(clif); 71bf215546Sopenharmony_ci} 72bf215546Sopenharmony_ci 73bf215546Sopenharmony_cistatic VkResult 74bf215546Sopenharmony_ciqueue_wait_idle(struct v3dv_queue *queue, 75bf215546Sopenharmony_ci struct v3dv_submit_sync_info *sync_info) 76bf215546Sopenharmony_ci{ 77bf215546Sopenharmony_ci if (queue->device->pdevice->caps.multisync) { 78bf215546Sopenharmony_ci int ret = drmSyncobjWait(queue->device->pdevice->render_fd, 79bf215546Sopenharmony_ci queue->last_job_syncs.syncs, 3, 80bf215546Sopenharmony_ci INT64_MAX, DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL, 81bf215546Sopenharmony_ci NULL); 82bf215546Sopenharmony_ci if (ret) { 83bf215546Sopenharmony_ci return vk_errorf(queue, VK_ERROR_DEVICE_LOST, 84bf215546Sopenharmony_ci "syncobj wait failed: %m"); 85bf215546Sopenharmony_ci } 86bf215546Sopenharmony_ci 87bf215546Sopenharmony_ci bool first = true; 88bf215546Sopenharmony_ci for (int i = 0; i < 3; i++) { 89bf215546Sopenharmony_ci if (!queue->last_job_syncs.first[i]) 90bf215546Sopenharmony_ci first = false; 91bf215546Sopenharmony_ci } 92bf215546Sopenharmony_ci 93bf215546Sopenharmony_ci /* If we're not the first job, that means we're waiting on some 94bf215546Sopenharmony_ci * per-queue-type syncobj which transitively waited on the semaphores 95bf215546Sopenharmony_ci * so we can skip the semaphore wait. 96bf215546Sopenharmony_ci */ 97bf215546Sopenharmony_ci if (first) { 98bf215546Sopenharmony_ci VkResult result = vk_sync_wait_many(&queue->device->vk, 99bf215546Sopenharmony_ci sync_info->wait_count, 100bf215546Sopenharmony_ci sync_info->waits, 101bf215546Sopenharmony_ci VK_SYNC_WAIT_COMPLETE, 102bf215546Sopenharmony_ci UINT64_MAX); 103bf215546Sopenharmony_ci if (result != VK_SUCCESS) 104bf215546Sopenharmony_ci return result; 105bf215546Sopenharmony_ci } 106bf215546Sopenharmony_ci } else { 107bf215546Sopenharmony_ci /* Without multisync, all the semaphores are baked into the one syncobj 108bf215546Sopenharmony_ci * at the start of each submit so we only need to wait on the one. 109bf215546Sopenharmony_ci */ 110bf215546Sopenharmony_ci int ret = drmSyncobjWait(queue->device->pdevice->render_fd, 111bf215546Sopenharmony_ci &queue->last_job_syncs.syncs[V3DV_QUEUE_ANY], 1, 112bf215546Sopenharmony_ci INT64_MAX, DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL, 113bf215546Sopenharmony_ci NULL); 114bf215546Sopenharmony_ci if (ret) { 115bf215546Sopenharmony_ci return vk_errorf(queue, VK_ERROR_DEVICE_LOST, 116bf215546Sopenharmony_ci "syncobj wait failed: %m"); 117bf215546Sopenharmony_ci } 118bf215546Sopenharmony_ci } 119bf215546Sopenharmony_ci 120bf215546Sopenharmony_ci for (int i = 0; i < 3; i++) 121bf215546Sopenharmony_ci queue->last_job_syncs.first[i] = false; 122bf215546Sopenharmony_ci 123bf215546Sopenharmony_ci return VK_SUCCESS; 124bf215546Sopenharmony_ci} 125bf215546Sopenharmony_ci 126bf215546Sopenharmony_cistatic VkResult 127bf215546Sopenharmony_cihandle_reset_query_cpu_job(struct v3dv_queue *queue, struct v3dv_job *job, 128bf215546Sopenharmony_ci struct v3dv_submit_sync_info *sync_info) 129bf215546Sopenharmony_ci{ 130bf215546Sopenharmony_ci struct v3dv_reset_query_cpu_job_info *info = &job->cpu.query_reset; 131bf215546Sopenharmony_ci assert(info->pool); 132bf215546Sopenharmony_ci 133bf215546Sopenharmony_ci /* We are about to reset query counters so we need to make sure that 134bf215546Sopenharmony_ci * The GPU is not using them. The exception is timestamp queries, since 135bf215546Sopenharmony_ci * we handle those in the CPU. 136bf215546Sopenharmony_ci */ 137bf215546Sopenharmony_ci if (info->pool->query_type == VK_QUERY_TYPE_OCCLUSION) 138bf215546Sopenharmony_ci v3dv_bo_wait(job->device, info->pool->bo, PIPE_TIMEOUT_INFINITE); 139bf215546Sopenharmony_ci 140bf215546Sopenharmony_ci if (info->pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) { 141bf215546Sopenharmony_ci struct vk_sync_wait waits[info->count]; 142bf215546Sopenharmony_ci unsigned wait_count = 0; 143bf215546Sopenharmony_ci for (int i = 0; i < info->count; i++) { 144bf215546Sopenharmony_ci struct v3dv_query *query = &info->pool->queries[i]; 145bf215546Sopenharmony_ci /* Only wait for a query if we've used it otherwise we will be 146bf215546Sopenharmony_ci * waiting forever for the fence to become signaled. 147bf215546Sopenharmony_ci */ 148bf215546Sopenharmony_ci if (query->maybe_available) { 149bf215546Sopenharmony_ci waits[wait_count] = (struct vk_sync_wait){ 150bf215546Sopenharmony_ci .sync = info->pool->queries[i].perf.last_job_sync 151bf215546Sopenharmony_ci }; 152bf215546Sopenharmony_ci wait_count++; 153bf215546Sopenharmony_ci }; 154bf215546Sopenharmony_ci } 155bf215546Sopenharmony_ci 156bf215546Sopenharmony_ci VkResult result = vk_sync_wait_many(&job->device->vk, wait_count, waits, 157bf215546Sopenharmony_ci VK_SYNC_WAIT_COMPLETE, UINT64_MAX); 158bf215546Sopenharmony_ci 159bf215546Sopenharmony_ci if (result != VK_SUCCESS) 160bf215546Sopenharmony_ci return result; 161bf215546Sopenharmony_ci } 162bf215546Sopenharmony_ci 163bf215546Sopenharmony_ci v3dv_reset_query_pools(job->device, info->pool, info->first, info->count); 164bf215546Sopenharmony_ci 165bf215546Sopenharmony_ci return VK_SUCCESS; 166bf215546Sopenharmony_ci} 167bf215546Sopenharmony_ci 168bf215546Sopenharmony_cistatic VkResult 169bf215546Sopenharmony_ciexport_perfmon_last_job_sync(struct v3dv_queue *queue, struct v3dv_job *job, int *fd) 170bf215546Sopenharmony_ci{ 171bf215546Sopenharmony_ci int err; 172bf215546Sopenharmony_ci if (job->device->pdevice->caps.multisync) { 173bf215546Sopenharmony_ci static const enum v3dv_queue_type queues_to_sync[] = { 174bf215546Sopenharmony_ci V3DV_QUEUE_CL, 175bf215546Sopenharmony_ci V3DV_QUEUE_CSD, 176bf215546Sopenharmony_ci }; 177bf215546Sopenharmony_ci 178bf215546Sopenharmony_ci for (uint32_t i = 0; i < ARRAY_SIZE(queues_to_sync); i++) { 179bf215546Sopenharmony_ci enum v3dv_queue_type queue_type = queues_to_sync[i]; 180bf215546Sopenharmony_ci int tmp_fd = -1; 181bf215546Sopenharmony_ci 182bf215546Sopenharmony_ci err = drmSyncobjExportSyncFile(job->device->pdevice->render_fd, 183bf215546Sopenharmony_ci queue->last_job_syncs.syncs[queue_type], 184bf215546Sopenharmony_ci &tmp_fd); 185bf215546Sopenharmony_ci 186bf215546Sopenharmony_ci if (err) { 187bf215546Sopenharmony_ci close(*fd); 188bf215546Sopenharmony_ci return vk_errorf(&job->device->queue, VK_ERROR_UNKNOWN, 189bf215546Sopenharmony_ci "sync file export failed: %m"); 190bf215546Sopenharmony_ci } 191bf215546Sopenharmony_ci 192bf215546Sopenharmony_ci err = sync_accumulate("v3dv", fd, tmp_fd); 193bf215546Sopenharmony_ci 194bf215546Sopenharmony_ci if (err) { 195bf215546Sopenharmony_ci close(tmp_fd); 196bf215546Sopenharmony_ci close(*fd); 197bf215546Sopenharmony_ci return vk_errorf(&job->device->queue, VK_ERROR_UNKNOWN, 198bf215546Sopenharmony_ci "failed to accumulate sync files: %m"); 199bf215546Sopenharmony_ci } 200bf215546Sopenharmony_ci } 201bf215546Sopenharmony_ci } else { 202bf215546Sopenharmony_ci err = drmSyncobjExportSyncFile(job->device->pdevice->render_fd, 203bf215546Sopenharmony_ci queue->last_job_syncs.syncs[V3DV_QUEUE_ANY], 204bf215546Sopenharmony_ci fd); 205bf215546Sopenharmony_ci 206bf215546Sopenharmony_ci if (err) { 207bf215546Sopenharmony_ci return vk_errorf(&job->device->queue, VK_ERROR_UNKNOWN, 208bf215546Sopenharmony_ci "sync file export failed: %m"); 209bf215546Sopenharmony_ci } 210bf215546Sopenharmony_ci } 211bf215546Sopenharmony_ci return VK_SUCCESS; 212bf215546Sopenharmony_ci} 213bf215546Sopenharmony_ci 214bf215546Sopenharmony_cistatic VkResult 215bf215546Sopenharmony_cihandle_end_query_cpu_job(struct v3dv_job *job, uint32_t counter_pass_idx) 216bf215546Sopenharmony_ci{ 217bf215546Sopenharmony_ci VkResult result = VK_SUCCESS; 218bf215546Sopenharmony_ci 219bf215546Sopenharmony_ci mtx_lock(&job->device->query_mutex); 220bf215546Sopenharmony_ci 221bf215546Sopenharmony_ci struct v3dv_end_query_cpu_job_info *info = &job->cpu.query_end; 222bf215546Sopenharmony_ci struct v3dv_queue *queue = &job->device->queue; 223bf215546Sopenharmony_ci 224bf215546Sopenharmony_ci int err = 0; 225bf215546Sopenharmony_ci int fd = -1; 226bf215546Sopenharmony_ci 227bf215546Sopenharmony_ci if (info->pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) { 228bf215546Sopenharmony_ci result = export_perfmon_last_job_sync(queue, job, &fd); 229bf215546Sopenharmony_ci 230bf215546Sopenharmony_ci if (result != VK_SUCCESS) 231bf215546Sopenharmony_ci goto fail; 232bf215546Sopenharmony_ci 233bf215546Sopenharmony_ci assert(fd >= 0); 234bf215546Sopenharmony_ci } 235bf215546Sopenharmony_ci 236bf215546Sopenharmony_ci for (uint32_t i = 0; i < info->count; i++) { 237bf215546Sopenharmony_ci assert(info->query + i < info->pool->query_count); 238bf215546Sopenharmony_ci struct v3dv_query *query = &info->pool->queries[info->query + i]; 239bf215546Sopenharmony_ci 240bf215546Sopenharmony_ci if (info->pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) { 241bf215546Sopenharmony_ci uint32_t syncobj = vk_sync_as_drm_syncobj(query->perf.last_job_sync)->syncobj; 242bf215546Sopenharmony_ci err = drmSyncobjImportSyncFile(job->device->pdevice->render_fd, 243bf215546Sopenharmony_ci syncobj, fd); 244bf215546Sopenharmony_ci 245bf215546Sopenharmony_ci if (err) { 246bf215546Sopenharmony_ci result = vk_errorf(queue, VK_ERROR_UNKNOWN, 247bf215546Sopenharmony_ci "sync file import failed: %m"); 248bf215546Sopenharmony_ci goto fail; 249bf215546Sopenharmony_ci } 250bf215546Sopenharmony_ci } 251bf215546Sopenharmony_ci 252bf215546Sopenharmony_ci query->maybe_available = true; 253bf215546Sopenharmony_ci } 254bf215546Sopenharmony_ci 255bf215546Sopenharmony_cifail: 256bf215546Sopenharmony_ci if (info->pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) 257bf215546Sopenharmony_ci close(fd); 258bf215546Sopenharmony_ci 259bf215546Sopenharmony_ci cnd_broadcast(&job->device->query_ended); 260bf215546Sopenharmony_ci mtx_unlock(&job->device->query_mutex); 261bf215546Sopenharmony_ci 262bf215546Sopenharmony_ci return result; 263bf215546Sopenharmony_ci} 264bf215546Sopenharmony_ci 265bf215546Sopenharmony_cistatic VkResult 266bf215546Sopenharmony_cihandle_copy_query_results_cpu_job(struct v3dv_job *job) 267bf215546Sopenharmony_ci{ 268bf215546Sopenharmony_ci struct v3dv_copy_query_results_cpu_job_info *info = 269bf215546Sopenharmony_ci &job->cpu.query_copy_results; 270bf215546Sopenharmony_ci 271bf215546Sopenharmony_ci assert(info->dst && info->dst->mem && info->dst->mem->bo); 272bf215546Sopenharmony_ci struct v3dv_bo *bo = info->dst->mem->bo; 273bf215546Sopenharmony_ci 274bf215546Sopenharmony_ci /* Map the entire dst buffer for the CPU copy if needed */ 275bf215546Sopenharmony_ci assert(!bo->map || bo->map_size == bo->size); 276bf215546Sopenharmony_ci if (!bo->map && !v3dv_bo_map(job->device, bo, bo->size)) 277bf215546Sopenharmony_ci return vk_error(job->device, VK_ERROR_OUT_OF_HOST_MEMORY); 278bf215546Sopenharmony_ci 279bf215546Sopenharmony_ci uint8_t *offset = ((uint8_t *) bo->map) + 280bf215546Sopenharmony_ci info->offset + info->dst->mem_offset; 281bf215546Sopenharmony_ci v3dv_get_query_pool_results(job->device, 282bf215546Sopenharmony_ci info->pool, 283bf215546Sopenharmony_ci info->first, 284bf215546Sopenharmony_ci info->count, 285bf215546Sopenharmony_ci offset, 286bf215546Sopenharmony_ci info->stride, 287bf215546Sopenharmony_ci info->flags); 288bf215546Sopenharmony_ci 289bf215546Sopenharmony_ci return VK_SUCCESS; 290bf215546Sopenharmony_ci} 291bf215546Sopenharmony_ci 292bf215546Sopenharmony_cistatic VkResult 293bf215546Sopenharmony_cihandle_set_event_cpu_job(struct v3dv_queue *queue, struct v3dv_job *job, 294bf215546Sopenharmony_ci struct v3dv_submit_sync_info *sync_info) 295bf215546Sopenharmony_ci{ 296bf215546Sopenharmony_ci /* From the Vulkan 1.0 spec: 297bf215546Sopenharmony_ci * 298bf215546Sopenharmony_ci * "When vkCmdSetEvent is submitted to a queue, it defines an execution 299bf215546Sopenharmony_ci * dependency on commands that were submitted before it, and defines an 300bf215546Sopenharmony_ci * event signal operation which sets the event to the signaled state. 301bf215546Sopenharmony_ci * The first synchronization scope includes every command previously 302bf215546Sopenharmony_ci * submitted to the same queue, including those in the same command 303bf215546Sopenharmony_ci * buffer and batch". 304bf215546Sopenharmony_ci * 305bf215546Sopenharmony_ci * So we should wait for all prior work to be completed before signaling 306bf215546Sopenharmony_ci * the event, this includes all active CPU wait threads spawned for any 307bf215546Sopenharmony_ci * command buffer submitted *before* this. 308bf215546Sopenharmony_ci */ 309bf215546Sopenharmony_ci 310bf215546Sopenharmony_ci VkResult result = queue_wait_idle(queue, sync_info); 311bf215546Sopenharmony_ci if (result != VK_SUCCESS) 312bf215546Sopenharmony_ci return result; 313bf215546Sopenharmony_ci 314bf215546Sopenharmony_ci struct v3dv_event_set_cpu_job_info *info = &job->cpu.event_set; 315bf215546Sopenharmony_ci p_atomic_set(&info->event->state, info->state); 316bf215546Sopenharmony_ci 317bf215546Sopenharmony_ci return VK_SUCCESS; 318bf215546Sopenharmony_ci} 319bf215546Sopenharmony_ci 320bf215546Sopenharmony_cistatic bool 321bf215546Sopenharmony_cicheck_wait_events_complete(struct v3dv_job *job) 322bf215546Sopenharmony_ci{ 323bf215546Sopenharmony_ci assert(job->type == V3DV_JOB_TYPE_CPU_WAIT_EVENTS); 324bf215546Sopenharmony_ci 325bf215546Sopenharmony_ci struct v3dv_event_wait_cpu_job_info *info = &job->cpu.event_wait; 326bf215546Sopenharmony_ci for (uint32_t i = 0; i < info->event_count; i++) { 327bf215546Sopenharmony_ci if (!p_atomic_read(&info->events[i]->state)) 328bf215546Sopenharmony_ci return false; 329bf215546Sopenharmony_ci } 330bf215546Sopenharmony_ci return true; 331bf215546Sopenharmony_ci} 332bf215546Sopenharmony_ci 333bf215546Sopenharmony_cistatic VkResult 334bf215546Sopenharmony_cihandle_wait_events_cpu_job(struct v3dv_job *job) 335bf215546Sopenharmony_ci{ 336bf215546Sopenharmony_ci assert(job->type == V3DV_JOB_TYPE_CPU_WAIT_EVENTS); 337bf215546Sopenharmony_ci 338bf215546Sopenharmony_ci /* Wait for events to be signaled */ 339bf215546Sopenharmony_ci const useconds_t wait_interval_ms = 1; 340bf215546Sopenharmony_ci while (!check_wait_events_complete(job)) 341bf215546Sopenharmony_ci usleep(wait_interval_ms * 1000); 342bf215546Sopenharmony_ci 343bf215546Sopenharmony_ci return VK_SUCCESS; 344bf215546Sopenharmony_ci} 345bf215546Sopenharmony_ci 346bf215546Sopenharmony_cistatic VkResult 347bf215546Sopenharmony_cihandle_copy_buffer_to_image_cpu_job(struct v3dv_queue *queue, 348bf215546Sopenharmony_ci struct v3dv_job *job, 349bf215546Sopenharmony_ci struct v3dv_submit_sync_info *sync_info) 350bf215546Sopenharmony_ci{ 351bf215546Sopenharmony_ci assert(job->type == V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE); 352bf215546Sopenharmony_ci struct v3dv_copy_buffer_to_image_cpu_job_info *info = 353bf215546Sopenharmony_ci &job->cpu.copy_buffer_to_image; 354bf215546Sopenharmony_ci 355bf215546Sopenharmony_ci /* Wait for all GPU work to finish first, since we may be accessing 356bf215546Sopenharmony_ci * the BOs involved in the operation. 357bf215546Sopenharmony_ci */ 358bf215546Sopenharmony_ci VkResult result = queue_wait_idle(queue, sync_info); 359bf215546Sopenharmony_ci if (result != VK_SUCCESS) 360bf215546Sopenharmony_ci return result; 361bf215546Sopenharmony_ci 362bf215546Sopenharmony_ci /* Map BOs */ 363bf215546Sopenharmony_ci struct v3dv_bo *dst_bo = info->image->mem->bo; 364bf215546Sopenharmony_ci assert(!dst_bo->map || dst_bo->map_size == dst_bo->size); 365bf215546Sopenharmony_ci if (!dst_bo->map && !v3dv_bo_map(job->device, dst_bo, dst_bo->size)) 366bf215546Sopenharmony_ci return vk_error(job->device, VK_ERROR_OUT_OF_HOST_MEMORY); 367bf215546Sopenharmony_ci void *dst_ptr = dst_bo->map; 368bf215546Sopenharmony_ci 369bf215546Sopenharmony_ci struct v3dv_bo *src_bo = info->buffer->mem->bo; 370bf215546Sopenharmony_ci assert(!src_bo->map || src_bo->map_size == src_bo->size); 371bf215546Sopenharmony_ci if (!src_bo->map && !v3dv_bo_map(job->device, src_bo, src_bo->size)) 372bf215546Sopenharmony_ci return vk_error(job->device, VK_ERROR_OUT_OF_HOST_MEMORY); 373bf215546Sopenharmony_ci void *src_ptr = src_bo->map; 374bf215546Sopenharmony_ci 375bf215546Sopenharmony_ci const struct v3d_resource_slice *slice = 376bf215546Sopenharmony_ci &info->image->slices[info->mip_level]; 377bf215546Sopenharmony_ci 378bf215546Sopenharmony_ci const struct pipe_box box = { 379bf215546Sopenharmony_ci info->image_offset.x, info->image_offset.y, info->base_layer, 380bf215546Sopenharmony_ci info->image_extent.width, info->image_extent.height, info->layer_count, 381bf215546Sopenharmony_ci }; 382bf215546Sopenharmony_ci 383bf215546Sopenharmony_ci /* Copy each layer */ 384bf215546Sopenharmony_ci for (uint32_t i = 0; i < info->layer_count; i++) { 385bf215546Sopenharmony_ci const uint32_t dst_offset = 386bf215546Sopenharmony_ci v3dv_layer_offset(info->image, info->mip_level, info->base_layer + i); 387bf215546Sopenharmony_ci const uint32_t src_offset = 388bf215546Sopenharmony_ci info->buffer->mem_offset + info->buffer_offset + 389bf215546Sopenharmony_ci info->buffer_layer_stride * i; 390bf215546Sopenharmony_ci v3d_store_tiled_image( 391bf215546Sopenharmony_ci dst_ptr + dst_offset, slice->stride, 392bf215546Sopenharmony_ci src_ptr + src_offset, info->buffer_stride, 393bf215546Sopenharmony_ci slice->tiling, info->image->cpp, slice->padded_height, &box); 394bf215546Sopenharmony_ci } 395bf215546Sopenharmony_ci 396bf215546Sopenharmony_ci return VK_SUCCESS; 397bf215546Sopenharmony_ci} 398bf215546Sopenharmony_ci 399bf215546Sopenharmony_cistatic VkResult 400bf215546Sopenharmony_cihandle_timestamp_query_cpu_job(struct v3dv_queue *queue, struct v3dv_job *job, 401bf215546Sopenharmony_ci struct v3dv_submit_sync_info *sync_info) 402bf215546Sopenharmony_ci{ 403bf215546Sopenharmony_ci assert(job->type == V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY); 404bf215546Sopenharmony_ci struct v3dv_timestamp_query_cpu_job_info *info = &job->cpu.query_timestamp; 405bf215546Sopenharmony_ci 406bf215546Sopenharmony_ci /* Wait for completion of all work queued before the timestamp query */ 407bf215546Sopenharmony_ci VkResult result = queue_wait_idle(queue, sync_info); 408bf215546Sopenharmony_ci if (result != VK_SUCCESS) 409bf215546Sopenharmony_ci return result; 410bf215546Sopenharmony_ci 411bf215546Sopenharmony_ci mtx_lock(&job->device->query_mutex); 412bf215546Sopenharmony_ci 413bf215546Sopenharmony_ci /* Compute timestamp */ 414bf215546Sopenharmony_ci struct timespec t; 415bf215546Sopenharmony_ci clock_gettime(CLOCK_MONOTONIC, &t); 416bf215546Sopenharmony_ci 417bf215546Sopenharmony_ci for (uint32_t i = 0; i < info->count; i++) { 418bf215546Sopenharmony_ci assert(info->query + i < info->pool->query_count); 419bf215546Sopenharmony_ci struct v3dv_query *query = &info->pool->queries[info->query + i]; 420bf215546Sopenharmony_ci query->maybe_available = true; 421bf215546Sopenharmony_ci if (i == 0) 422bf215546Sopenharmony_ci query->value = t.tv_sec * 1000000000ull + t.tv_nsec; 423bf215546Sopenharmony_ci } 424bf215546Sopenharmony_ci 425bf215546Sopenharmony_ci cnd_broadcast(&job->device->query_ended); 426bf215546Sopenharmony_ci mtx_unlock(&job->device->query_mutex); 427bf215546Sopenharmony_ci 428bf215546Sopenharmony_ci return VK_SUCCESS; 429bf215546Sopenharmony_ci} 430bf215546Sopenharmony_ci 431bf215546Sopenharmony_cistatic VkResult 432bf215546Sopenharmony_cihandle_csd_indirect_cpu_job(struct v3dv_queue *queue, 433bf215546Sopenharmony_ci struct v3dv_job *job, 434bf215546Sopenharmony_ci struct v3dv_submit_sync_info *sync_info) 435bf215546Sopenharmony_ci{ 436bf215546Sopenharmony_ci assert(job->type == V3DV_JOB_TYPE_CPU_CSD_INDIRECT); 437bf215546Sopenharmony_ci struct v3dv_csd_indirect_cpu_job_info *info = &job->cpu.csd_indirect; 438bf215546Sopenharmony_ci assert(info->csd_job); 439bf215546Sopenharmony_ci 440bf215546Sopenharmony_ci /* Make sure the GPU is no longer using the indirect buffer*/ 441bf215546Sopenharmony_ci assert(info->buffer && info->buffer->mem && info->buffer->mem->bo); 442bf215546Sopenharmony_ci v3dv_bo_wait(queue->device, info->buffer->mem->bo, PIPE_TIMEOUT_INFINITE); 443bf215546Sopenharmony_ci 444bf215546Sopenharmony_ci /* Map the indirect buffer and read the dispatch parameters */ 445bf215546Sopenharmony_ci assert(info->buffer && info->buffer->mem && info->buffer->mem->bo); 446bf215546Sopenharmony_ci struct v3dv_bo *bo = info->buffer->mem->bo; 447bf215546Sopenharmony_ci if (!bo->map && !v3dv_bo_map(job->device, bo, bo->size)) 448bf215546Sopenharmony_ci return vk_error(job->device, VK_ERROR_OUT_OF_HOST_MEMORY); 449bf215546Sopenharmony_ci assert(bo->map); 450bf215546Sopenharmony_ci 451bf215546Sopenharmony_ci const uint32_t offset = info->buffer->mem_offset + info->offset; 452bf215546Sopenharmony_ci const uint32_t *group_counts = (uint32_t *) (bo->map + offset); 453bf215546Sopenharmony_ci if (group_counts[0] == 0 || group_counts[1] == 0|| group_counts[2] == 0) 454bf215546Sopenharmony_ci return VK_SUCCESS; 455bf215546Sopenharmony_ci 456bf215546Sopenharmony_ci if (memcmp(group_counts, info->csd_job->csd.wg_count, 457bf215546Sopenharmony_ci sizeof(info->csd_job->csd.wg_count)) != 0) { 458bf215546Sopenharmony_ci v3dv_cmd_buffer_rewrite_indirect_csd_job(info, group_counts); 459bf215546Sopenharmony_ci } 460bf215546Sopenharmony_ci 461bf215546Sopenharmony_ci return VK_SUCCESS; 462bf215546Sopenharmony_ci} 463bf215546Sopenharmony_ci 464bf215546Sopenharmony_cistatic VkResult 465bf215546Sopenharmony_ciprocess_waits(struct v3dv_queue *queue, 466bf215546Sopenharmony_ci uint32_t count, struct vk_sync_wait *waits) 467bf215546Sopenharmony_ci{ 468bf215546Sopenharmony_ci struct v3dv_device *device = queue->device; 469bf215546Sopenharmony_ci VkResult result = VK_SUCCESS; 470bf215546Sopenharmony_ci int err = 0; 471bf215546Sopenharmony_ci 472bf215546Sopenharmony_ci if (count == 0) 473bf215546Sopenharmony_ci return VK_SUCCESS; 474bf215546Sopenharmony_ci 475bf215546Sopenharmony_ci /* If multisync is supported, we wait on semaphores in the first job 476bf215546Sopenharmony_ci * submitted to each of the individual queues. We don't need to 477bf215546Sopenharmony_ci * pre-populate the syncobjs. 478bf215546Sopenharmony_ci */ 479bf215546Sopenharmony_ci if (queue->device->pdevice->caps.multisync) 480bf215546Sopenharmony_ci return VK_SUCCESS; 481bf215546Sopenharmony_ci 482bf215546Sopenharmony_ci int fd = -1; 483bf215546Sopenharmony_ci err = drmSyncobjExportSyncFile(device->pdevice->render_fd, 484bf215546Sopenharmony_ci queue->last_job_syncs.syncs[V3DV_QUEUE_ANY], 485bf215546Sopenharmony_ci &fd); 486bf215546Sopenharmony_ci if (err) { 487bf215546Sopenharmony_ci result = vk_errorf(queue, VK_ERROR_UNKNOWN, 488bf215546Sopenharmony_ci "sync file export failed: %m"); 489bf215546Sopenharmony_ci goto fail; 490bf215546Sopenharmony_ci } 491bf215546Sopenharmony_ci 492bf215546Sopenharmony_ci for (uint32_t i = 0; i < count; i++) { 493bf215546Sopenharmony_ci uint32_t syncobj = vk_sync_as_drm_syncobj(waits[i].sync)->syncobj; 494bf215546Sopenharmony_ci int wait_fd = -1; 495bf215546Sopenharmony_ci 496bf215546Sopenharmony_ci err = drmSyncobjExportSyncFile(device->pdevice->render_fd, 497bf215546Sopenharmony_ci syncobj, &wait_fd); 498bf215546Sopenharmony_ci if (err) { 499bf215546Sopenharmony_ci result = vk_errorf(queue, VK_ERROR_UNKNOWN, 500bf215546Sopenharmony_ci "sync file export failed: %m"); 501bf215546Sopenharmony_ci goto fail; 502bf215546Sopenharmony_ci } 503bf215546Sopenharmony_ci 504bf215546Sopenharmony_ci err = sync_accumulate("v3dv", &fd, wait_fd); 505bf215546Sopenharmony_ci close(wait_fd); 506bf215546Sopenharmony_ci if (err) { 507bf215546Sopenharmony_ci result = vk_errorf(queue, VK_ERROR_UNKNOWN, 508bf215546Sopenharmony_ci "sync file merge failed: %m"); 509bf215546Sopenharmony_ci goto fail; 510bf215546Sopenharmony_ci } 511bf215546Sopenharmony_ci } 512bf215546Sopenharmony_ci 513bf215546Sopenharmony_ci err = drmSyncobjImportSyncFile(device->pdevice->render_fd, 514bf215546Sopenharmony_ci queue->last_job_syncs.syncs[V3DV_QUEUE_ANY], 515bf215546Sopenharmony_ci fd); 516bf215546Sopenharmony_ci if (err) { 517bf215546Sopenharmony_ci result = vk_errorf(queue, VK_ERROR_UNKNOWN, 518bf215546Sopenharmony_ci "sync file import failed: %m"); 519bf215546Sopenharmony_ci } 520bf215546Sopenharmony_ci 521bf215546Sopenharmony_cifail: 522bf215546Sopenharmony_ci close(fd); 523bf215546Sopenharmony_ci return result; 524bf215546Sopenharmony_ci} 525bf215546Sopenharmony_ci 526bf215546Sopenharmony_cistatic VkResult 527bf215546Sopenharmony_ciprocess_signals(struct v3dv_queue *queue, 528bf215546Sopenharmony_ci uint32_t count, struct vk_sync_signal *signals) 529bf215546Sopenharmony_ci{ 530bf215546Sopenharmony_ci struct v3dv_device *device = queue->device; 531bf215546Sopenharmony_ci 532bf215546Sopenharmony_ci if (count == 0) 533bf215546Sopenharmony_ci return VK_SUCCESS; 534bf215546Sopenharmony_ci 535bf215546Sopenharmony_ci /* If multisync is supported, we are signalling semaphores in the last job 536bf215546Sopenharmony_ci * of the last command buffer and, therefore, we do not need to process any 537bf215546Sopenharmony_ci * semaphores here. 538bf215546Sopenharmony_ci */ 539bf215546Sopenharmony_ci if (device->pdevice->caps.multisync) 540bf215546Sopenharmony_ci return VK_SUCCESS; 541bf215546Sopenharmony_ci 542bf215546Sopenharmony_ci int fd; 543bf215546Sopenharmony_ci drmSyncobjExportSyncFile(device->pdevice->render_fd, 544bf215546Sopenharmony_ci queue->last_job_syncs.syncs[V3DV_QUEUE_ANY], 545bf215546Sopenharmony_ci &fd); 546bf215546Sopenharmony_ci if (fd == -1) { 547bf215546Sopenharmony_ci return vk_errorf(queue, VK_ERROR_UNKNOWN, 548bf215546Sopenharmony_ci "sync file export failed: %m"); 549bf215546Sopenharmony_ci } 550bf215546Sopenharmony_ci 551bf215546Sopenharmony_ci VkResult result = VK_SUCCESS; 552bf215546Sopenharmony_ci for (uint32_t i = 0; i < count; i++) { 553bf215546Sopenharmony_ci uint32_t syncobj = vk_sync_as_drm_syncobj(signals[i].sync)->syncobj; 554bf215546Sopenharmony_ci int err = drmSyncobjImportSyncFile(device->pdevice->render_fd, 555bf215546Sopenharmony_ci syncobj, fd); 556bf215546Sopenharmony_ci if (err) { 557bf215546Sopenharmony_ci result = vk_errorf(queue, VK_ERROR_UNKNOWN, 558bf215546Sopenharmony_ci "sync file import failed: %m"); 559bf215546Sopenharmony_ci break; 560bf215546Sopenharmony_ci } 561bf215546Sopenharmony_ci } 562bf215546Sopenharmony_ci 563bf215546Sopenharmony_ci assert(fd >= 0); 564bf215546Sopenharmony_ci close(fd); 565bf215546Sopenharmony_ci 566bf215546Sopenharmony_ci return result; 567bf215546Sopenharmony_ci} 568bf215546Sopenharmony_ci 569bf215546Sopenharmony_cistatic void 570bf215546Sopenharmony_cimultisync_free(struct v3dv_device *device, 571bf215546Sopenharmony_ci struct drm_v3d_multi_sync *ms) 572bf215546Sopenharmony_ci{ 573bf215546Sopenharmony_ci vk_free(&device->vk.alloc, (void *)(uintptr_t)ms->out_syncs); 574bf215546Sopenharmony_ci vk_free(&device->vk.alloc, (void *)(uintptr_t)ms->in_syncs); 575bf215546Sopenharmony_ci} 576bf215546Sopenharmony_ci 577bf215546Sopenharmony_cistatic struct drm_v3d_sem * 578bf215546Sopenharmony_ciset_in_syncs(struct v3dv_queue *queue, 579bf215546Sopenharmony_ci struct v3dv_job *job, 580bf215546Sopenharmony_ci enum v3dv_queue_type queue_sync, 581bf215546Sopenharmony_ci uint32_t *count, 582bf215546Sopenharmony_ci struct v3dv_submit_sync_info *sync_info) 583bf215546Sopenharmony_ci{ 584bf215546Sopenharmony_ci struct v3dv_device *device = queue->device; 585bf215546Sopenharmony_ci uint32_t n_syncs = 0; 586bf215546Sopenharmony_ci 587bf215546Sopenharmony_ci /* If this is the first job submitted to a given GPU queue in this cmd buf 588bf215546Sopenharmony_ci * batch, it has to wait on wait semaphores (if any) before running. 589bf215546Sopenharmony_ci */ 590bf215546Sopenharmony_ci if (queue->last_job_syncs.first[queue_sync]) 591bf215546Sopenharmony_ci n_syncs = sync_info->wait_count; 592bf215546Sopenharmony_ci 593bf215546Sopenharmony_ci /* If the serialize flag is set the job needs to be serialized in the 594bf215546Sopenharmony_ci * corresponding queues. Notice that we may implement transfer operations 595bf215546Sopenharmony_ci * as both CL or TFU jobs. 596bf215546Sopenharmony_ci * 597bf215546Sopenharmony_ci * FIXME: maybe we could track more precisely if the source of a transfer 598bf215546Sopenharmony_ci * barrier is a CL and/or a TFU job. 599bf215546Sopenharmony_ci */ 600bf215546Sopenharmony_ci bool sync_csd = job->serialize & V3DV_BARRIER_COMPUTE_BIT; 601bf215546Sopenharmony_ci bool sync_tfu = job->serialize & V3DV_BARRIER_TRANSFER_BIT; 602bf215546Sopenharmony_ci bool sync_cl = job->serialize & (V3DV_BARRIER_GRAPHICS_BIT | 603bf215546Sopenharmony_ci V3DV_BARRIER_TRANSFER_BIT); 604bf215546Sopenharmony_ci *count = n_syncs; 605bf215546Sopenharmony_ci if (sync_cl) 606bf215546Sopenharmony_ci (*count)++; 607bf215546Sopenharmony_ci if (sync_tfu) 608bf215546Sopenharmony_ci (*count)++; 609bf215546Sopenharmony_ci if (sync_csd) 610bf215546Sopenharmony_ci (*count)++; 611bf215546Sopenharmony_ci 612bf215546Sopenharmony_ci if (!*count) 613bf215546Sopenharmony_ci return NULL; 614bf215546Sopenharmony_ci 615bf215546Sopenharmony_ci struct drm_v3d_sem *syncs = 616bf215546Sopenharmony_ci vk_zalloc(&device->vk.alloc, *count * sizeof(struct drm_v3d_sem), 617bf215546Sopenharmony_ci 8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); 618bf215546Sopenharmony_ci 619bf215546Sopenharmony_ci if (!syncs) 620bf215546Sopenharmony_ci return NULL; 621bf215546Sopenharmony_ci 622bf215546Sopenharmony_ci for (int i = 0; i < n_syncs; i++) { 623bf215546Sopenharmony_ci syncs[i].handle = 624bf215546Sopenharmony_ci vk_sync_as_drm_syncobj(sync_info->waits[i].sync)->syncobj; 625bf215546Sopenharmony_ci } 626bf215546Sopenharmony_ci 627bf215546Sopenharmony_ci if (sync_cl) 628bf215546Sopenharmony_ci syncs[n_syncs++].handle = queue->last_job_syncs.syncs[V3DV_QUEUE_CL]; 629bf215546Sopenharmony_ci 630bf215546Sopenharmony_ci if (sync_csd) 631bf215546Sopenharmony_ci syncs[n_syncs++].handle = queue->last_job_syncs.syncs[V3DV_QUEUE_CSD]; 632bf215546Sopenharmony_ci 633bf215546Sopenharmony_ci if (sync_tfu) 634bf215546Sopenharmony_ci syncs[n_syncs++].handle = queue->last_job_syncs.syncs[V3DV_QUEUE_TFU]; 635bf215546Sopenharmony_ci 636bf215546Sopenharmony_ci assert(n_syncs == *count); 637bf215546Sopenharmony_ci return syncs; 638bf215546Sopenharmony_ci} 639bf215546Sopenharmony_ci 640bf215546Sopenharmony_cistatic struct drm_v3d_sem * 641bf215546Sopenharmony_ciset_out_syncs(struct v3dv_queue *queue, 642bf215546Sopenharmony_ci struct v3dv_job *job, 643bf215546Sopenharmony_ci enum v3dv_queue_type queue_sync, 644bf215546Sopenharmony_ci uint32_t *count, 645bf215546Sopenharmony_ci struct v3dv_submit_sync_info *sync_info, 646bf215546Sopenharmony_ci bool signal_syncs) 647bf215546Sopenharmony_ci{ 648bf215546Sopenharmony_ci struct v3dv_device *device = queue->device; 649bf215546Sopenharmony_ci 650bf215546Sopenharmony_ci uint32_t n_vk_syncs = signal_syncs ? sync_info->signal_count : 0; 651bf215546Sopenharmony_ci 652bf215546Sopenharmony_ci /* We always signal the syncobj from `device->last_job_syncs` related to 653bf215546Sopenharmony_ci * this v3dv_queue_type to track the last job submitted to this queue. 654bf215546Sopenharmony_ci */ 655bf215546Sopenharmony_ci (*count) = n_vk_syncs + 1; 656bf215546Sopenharmony_ci 657bf215546Sopenharmony_ci struct drm_v3d_sem *syncs = 658bf215546Sopenharmony_ci vk_zalloc(&device->vk.alloc, *count * sizeof(struct drm_v3d_sem), 659bf215546Sopenharmony_ci 8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); 660bf215546Sopenharmony_ci 661bf215546Sopenharmony_ci if (!syncs) 662bf215546Sopenharmony_ci return NULL; 663bf215546Sopenharmony_ci 664bf215546Sopenharmony_ci if (n_vk_syncs) { 665bf215546Sopenharmony_ci for (unsigned i = 0; i < n_vk_syncs; i++) { 666bf215546Sopenharmony_ci syncs[i].handle = 667bf215546Sopenharmony_ci vk_sync_as_drm_syncobj(sync_info->signals[i].sync)->syncobj; 668bf215546Sopenharmony_ci } 669bf215546Sopenharmony_ci } 670bf215546Sopenharmony_ci 671bf215546Sopenharmony_ci syncs[n_vk_syncs].handle = queue->last_job_syncs.syncs[queue_sync]; 672bf215546Sopenharmony_ci 673bf215546Sopenharmony_ci return syncs; 674bf215546Sopenharmony_ci} 675bf215546Sopenharmony_ci 676bf215546Sopenharmony_cistatic void 677bf215546Sopenharmony_ciset_ext(struct drm_v3d_extension *ext, 678bf215546Sopenharmony_ci struct drm_v3d_extension *next, 679bf215546Sopenharmony_ci uint32_t id, 680bf215546Sopenharmony_ci uintptr_t flags) 681bf215546Sopenharmony_ci{ 682bf215546Sopenharmony_ci ext->next = (uintptr_t)(void *)next; 683bf215546Sopenharmony_ci ext->id = id; 684bf215546Sopenharmony_ci ext->flags = flags; 685bf215546Sopenharmony_ci} 686bf215546Sopenharmony_ci 687bf215546Sopenharmony_ci/* This function sets the extension for multiple in/out syncobjs. When it is 688bf215546Sopenharmony_ci * successful, it sets the extension id to DRM_V3D_EXT_ID_MULTI_SYNC. 689bf215546Sopenharmony_ci * Otherwise, the extension id is 0, which means an out-of-memory error. 690bf215546Sopenharmony_ci */ 691bf215546Sopenharmony_cistatic void 692bf215546Sopenharmony_ciset_multisync(struct drm_v3d_multi_sync *ms, 693bf215546Sopenharmony_ci struct v3dv_submit_sync_info *sync_info, 694bf215546Sopenharmony_ci struct drm_v3d_extension *next, 695bf215546Sopenharmony_ci struct v3dv_device *device, 696bf215546Sopenharmony_ci struct v3dv_job *job, 697bf215546Sopenharmony_ci enum v3dv_queue_type queue_sync, 698bf215546Sopenharmony_ci enum v3d_queue wait_stage, 699bf215546Sopenharmony_ci bool signal_syncs) 700bf215546Sopenharmony_ci{ 701bf215546Sopenharmony_ci struct v3dv_queue *queue = &device->queue; 702bf215546Sopenharmony_ci uint32_t out_sync_count = 0, in_sync_count = 0; 703bf215546Sopenharmony_ci struct drm_v3d_sem *out_syncs = NULL, *in_syncs = NULL; 704bf215546Sopenharmony_ci 705bf215546Sopenharmony_ci in_syncs = set_in_syncs(queue, job, queue_sync, 706bf215546Sopenharmony_ci &in_sync_count, sync_info); 707bf215546Sopenharmony_ci if (!in_syncs && in_sync_count) 708bf215546Sopenharmony_ci goto fail; 709bf215546Sopenharmony_ci 710bf215546Sopenharmony_ci out_syncs = set_out_syncs(queue, job, queue_sync, 711bf215546Sopenharmony_ci &out_sync_count, sync_info, signal_syncs); 712bf215546Sopenharmony_ci 713bf215546Sopenharmony_ci assert(out_sync_count > 0); 714bf215546Sopenharmony_ci 715bf215546Sopenharmony_ci if (!out_syncs) 716bf215546Sopenharmony_ci goto fail; 717bf215546Sopenharmony_ci 718bf215546Sopenharmony_ci set_ext(&ms->base, next, DRM_V3D_EXT_ID_MULTI_SYNC, 0); 719bf215546Sopenharmony_ci ms->wait_stage = wait_stage; 720bf215546Sopenharmony_ci ms->out_sync_count = out_sync_count; 721bf215546Sopenharmony_ci ms->out_syncs = (uintptr_t)(void *)out_syncs; 722bf215546Sopenharmony_ci ms->in_sync_count = in_sync_count; 723bf215546Sopenharmony_ci ms->in_syncs = (uintptr_t)(void *)in_syncs; 724bf215546Sopenharmony_ci 725bf215546Sopenharmony_ci return; 726bf215546Sopenharmony_ci 727bf215546Sopenharmony_cifail: 728bf215546Sopenharmony_ci if (in_syncs) 729bf215546Sopenharmony_ci vk_free(&device->vk.alloc, in_syncs); 730bf215546Sopenharmony_ci assert(!out_syncs); 731bf215546Sopenharmony_ci 732bf215546Sopenharmony_ci return; 733bf215546Sopenharmony_ci} 734bf215546Sopenharmony_ci 735bf215546Sopenharmony_cistatic VkResult 736bf215546Sopenharmony_cihandle_cl_job(struct v3dv_queue *queue, 737bf215546Sopenharmony_ci struct v3dv_job *job, 738bf215546Sopenharmony_ci uint32_t counter_pass_idx, 739bf215546Sopenharmony_ci struct v3dv_submit_sync_info *sync_info, 740bf215546Sopenharmony_ci bool signal_syncs) 741bf215546Sopenharmony_ci{ 742bf215546Sopenharmony_ci struct v3dv_device *device = queue->device; 743bf215546Sopenharmony_ci 744bf215546Sopenharmony_ci struct drm_v3d_submit_cl submit = { 0 }; 745bf215546Sopenharmony_ci 746bf215546Sopenharmony_ci /* Sanity check: we should only flag a bcl sync on a job that needs to be 747bf215546Sopenharmony_ci * serialized. 748bf215546Sopenharmony_ci */ 749bf215546Sopenharmony_ci assert(job->serialize || !job->needs_bcl_sync); 750bf215546Sopenharmony_ci 751bf215546Sopenharmony_ci /* We expect to have just one RCL per job which should fit in just one BO. 752bf215546Sopenharmony_ci * Our BCL, could chain multiple BOS together though. 753bf215546Sopenharmony_ci */ 754bf215546Sopenharmony_ci assert(list_length(&job->rcl.bo_list) == 1); 755bf215546Sopenharmony_ci assert(list_length(&job->bcl.bo_list) >= 1); 756bf215546Sopenharmony_ci struct v3dv_bo *bcl_fist_bo = 757bf215546Sopenharmony_ci list_first_entry(&job->bcl.bo_list, struct v3dv_bo, list_link); 758bf215546Sopenharmony_ci submit.bcl_start = bcl_fist_bo->offset; 759bf215546Sopenharmony_ci submit.bcl_end = job->bcl.bo->offset + v3dv_cl_offset(&job->bcl); 760bf215546Sopenharmony_ci submit.rcl_start = job->rcl.bo->offset; 761bf215546Sopenharmony_ci submit.rcl_end = job->rcl.bo->offset + v3dv_cl_offset(&job->rcl); 762bf215546Sopenharmony_ci 763bf215546Sopenharmony_ci submit.qma = job->tile_alloc->offset; 764bf215546Sopenharmony_ci submit.qms = job->tile_alloc->size; 765bf215546Sopenharmony_ci submit.qts = job->tile_state->offset; 766bf215546Sopenharmony_ci 767bf215546Sopenharmony_ci submit.flags = 0; 768bf215546Sopenharmony_ci if (job->tmu_dirty_rcl) 769bf215546Sopenharmony_ci submit.flags |= DRM_V3D_SUBMIT_CL_FLUSH_CACHE; 770bf215546Sopenharmony_ci 771bf215546Sopenharmony_ci /* If the job uses VK_KHR_buffer_device_addess we need to ensure all 772bf215546Sopenharmony_ci * buffers flagged with VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_KHR 773bf215546Sopenharmony_ci * are included. 774bf215546Sopenharmony_ci */ 775bf215546Sopenharmony_ci if (job->uses_buffer_device_address) { 776bf215546Sopenharmony_ci util_dynarray_foreach(&queue->device->device_address_bo_list, 777bf215546Sopenharmony_ci struct v3dv_bo *, bo) { 778bf215546Sopenharmony_ci v3dv_job_add_bo(job, *bo); 779bf215546Sopenharmony_ci } 780bf215546Sopenharmony_ci } 781bf215546Sopenharmony_ci 782bf215546Sopenharmony_ci submit.bo_handle_count = job->bo_count; 783bf215546Sopenharmony_ci uint32_t *bo_handles = 784bf215546Sopenharmony_ci (uint32_t *) malloc(sizeof(uint32_t) * submit.bo_handle_count); 785bf215546Sopenharmony_ci uint32_t bo_idx = 0; 786bf215546Sopenharmony_ci set_foreach(job->bos, entry) { 787bf215546Sopenharmony_ci struct v3dv_bo *bo = (struct v3dv_bo *)entry->key; 788bf215546Sopenharmony_ci bo_handles[bo_idx++] = bo->handle; 789bf215546Sopenharmony_ci } 790bf215546Sopenharmony_ci assert(bo_idx == submit.bo_handle_count); 791bf215546Sopenharmony_ci submit.bo_handles = (uintptr_t)(void *)bo_handles; 792bf215546Sopenharmony_ci 793bf215546Sopenharmony_ci submit.perfmon_id = job->perf ? 794bf215546Sopenharmony_ci job->perf->kperfmon_ids[counter_pass_idx] : 0; 795bf215546Sopenharmony_ci const bool needs_perf_sync = queue->last_perfmon_id != submit.perfmon_id; 796bf215546Sopenharmony_ci queue->last_perfmon_id = submit.perfmon_id; 797bf215546Sopenharmony_ci 798bf215546Sopenharmony_ci /* We need a binning sync if we are the first CL job waiting on a semaphore 799bf215546Sopenharmony_ci * with a wait stage that involves the geometry pipeline, or if the job 800bf215546Sopenharmony_ci * comes after a pipeline barrier that involves geometry stages 801bf215546Sopenharmony_ci * (needs_bcl_sync) or when performance queries are in use. 802bf215546Sopenharmony_ci * 803bf215546Sopenharmony_ci * We need a render sync if the job doesn't need a binning sync but has 804bf215546Sopenharmony_ci * still been flagged for serialization. It should be noted that RCL jobs 805bf215546Sopenharmony_ci * don't start until the previous RCL job has finished so we don't really 806bf215546Sopenharmony_ci * need to add a fence for those, however, we might need to wait on a CSD or 807bf215546Sopenharmony_ci * TFU job, which are not automatically serialized with CL jobs. 808bf215546Sopenharmony_ci */ 809bf215546Sopenharmony_ci bool needs_bcl_sync = job->needs_bcl_sync || needs_perf_sync; 810bf215546Sopenharmony_ci if (queue->last_job_syncs.first[V3DV_QUEUE_CL]) { 811bf215546Sopenharmony_ci for (int i = 0; !needs_bcl_sync && i < sync_info->wait_count; i++) { 812bf215546Sopenharmony_ci needs_bcl_sync = sync_info->waits[i].stage_mask & 813bf215546Sopenharmony_ci (VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT | 814bf215546Sopenharmony_ci VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT | 815bf215546Sopenharmony_ci VK_PIPELINE_STAGE_ALL_COMMANDS_BIT | 816bf215546Sopenharmony_ci VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | 817bf215546Sopenharmony_ci VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | 818bf215546Sopenharmony_ci VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | 819bf215546Sopenharmony_ci VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT | 820bf215546Sopenharmony_ci VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT | 821bf215546Sopenharmony_ci VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT); 822bf215546Sopenharmony_ci } 823bf215546Sopenharmony_ci } 824bf215546Sopenharmony_ci 825bf215546Sopenharmony_ci bool needs_rcl_sync = job->serialize && !needs_bcl_sync; 826bf215546Sopenharmony_ci 827bf215546Sopenharmony_ci /* Replace single semaphore settings whenever our kernel-driver supports 828bf215546Sopenharmony_ci * multiple semaphores extension. 829bf215546Sopenharmony_ci */ 830bf215546Sopenharmony_ci struct drm_v3d_multi_sync ms = { 0 }; 831bf215546Sopenharmony_ci if (device->pdevice->caps.multisync) { 832bf215546Sopenharmony_ci enum v3d_queue wait_stage = needs_rcl_sync ? V3D_RENDER : V3D_BIN; 833bf215546Sopenharmony_ci set_multisync(&ms, sync_info, NULL, device, job, 834bf215546Sopenharmony_ci V3DV_QUEUE_CL, wait_stage, signal_syncs); 835bf215546Sopenharmony_ci if (!ms.base.id) 836bf215546Sopenharmony_ci return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); 837bf215546Sopenharmony_ci 838bf215546Sopenharmony_ci submit.flags |= DRM_V3D_SUBMIT_EXTENSION; 839bf215546Sopenharmony_ci submit.extensions = (uintptr_t)(void *)&ms; 840bf215546Sopenharmony_ci /* Disable legacy sync interface when multisync extension is used */ 841bf215546Sopenharmony_ci submit.in_sync_rcl = 0; 842bf215546Sopenharmony_ci submit.in_sync_bcl = 0; 843bf215546Sopenharmony_ci submit.out_sync = 0; 844bf215546Sopenharmony_ci } else { 845bf215546Sopenharmony_ci uint32_t last_job_sync = queue->last_job_syncs.syncs[V3DV_QUEUE_ANY]; 846bf215546Sopenharmony_ci submit.in_sync_bcl = needs_bcl_sync ? last_job_sync : 0; 847bf215546Sopenharmony_ci submit.in_sync_rcl = needs_rcl_sync ? last_job_sync : 0; 848bf215546Sopenharmony_ci submit.out_sync = last_job_sync; 849bf215546Sopenharmony_ci } 850bf215546Sopenharmony_ci 851bf215546Sopenharmony_ci v3dv_clif_dump(device, job, &submit); 852bf215546Sopenharmony_ci int ret = v3dv_ioctl(device->pdevice->render_fd, 853bf215546Sopenharmony_ci DRM_IOCTL_V3D_SUBMIT_CL, &submit); 854bf215546Sopenharmony_ci 855bf215546Sopenharmony_ci static bool warned = false; 856bf215546Sopenharmony_ci if (ret && !warned) { 857bf215546Sopenharmony_ci fprintf(stderr, "Draw call returned %s. Expect corruption.\n", 858bf215546Sopenharmony_ci strerror(errno)); 859bf215546Sopenharmony_ci warned = true; 860bf215546Sopenharmony_ci } 861bf215546Sopenharmony_ci 862bf215546Sopenharmony_ci free(bo_handles); 863bf215546Sopenharmony_ci multisync_free(device, &ms); 864bf215546Sopenharmony_ci 865bf215546Sopenharmony_ci queue->last_job_syncs.first[V3DV_QUEUE_CL] = false; 866bf215546Sopenharmony_ci 867bf215546Sopenharmony_ci if (ret) 868bf215546Sopenharmony_ci return vk_queue_set_lost(&queue->vk, "V3D_SUBMIT_CL failed: %m"); 869bf215546Sopenharmony_ci 870bf215546Sopenharmony_ci return VK_SUCCESS; 871bf215546Sopenharmony_ci} 872bf215546Sopenharmony_ci 873bf215546Sopenharmony_cistatic VkResult 874bf215546Sopenharmony_cihandle_tfu_job(struct v3dv_queue *queue, 875bf215546Sopenharmony_ci struct v3dv_job *job, 876bf215546Sopenharmony_ci struct v3dv_submit_sync_info *sync_info, 877bf215546Sopenharmony_ci bool signal_syncs) 878bf215546Sopenharmony_ci{ 879bf215546Sopenharmony_ci struct v3dv_device *device = queue->device; 880bf215546Sopenharmony_ci 881bf215546Sopenharmony_ci const bool needs_sync = sync_info->wait_count || job->serialize; 882bf215546Sopenharmony_ci 883bf215546Sopenharmony_ci /* Replace single semaphore settings whenever our kernel-driver supports 884bf215546Sopenharmony_ci * multiple semaphore extension. 885bf215546Sopenharmony_ci */ 886bf215546Sopenharmony_ci struct drm_v3d_multi_sync ms = { 0 }; 887bf215546Sopenharmony_ci if (device->pdevice->caps.multisync) { 888bf215546Sopenharmony_ci set_multisync(&ms, sync_info, NULL, device, job, 889bf215546Sopenharmony_ci V3DV_QUEUE_TFU, V3D_TFU, signal_syncs); 890bf215546Sopenharmony_ci if (!ms.base.id) 891bf215546Sopenharmony_ci return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); 892bf215546Sopenharmony_ci 893bf215546Sopenharmony_ci job->tfu.flags |= DRM_V3D_SUBMIT_EXTENSION; 894bf215546Sopenharmony_ci job->tfu.extensions = (uintptr_t)(void *)&ms; 895bf215546Sopenharmony_ci /* Disable legacy sync interface when multisync extension is used */ 896bf215546Sopenharmony_ci job->tfu.in_sync = 0; 897bf215546Sopenharmony_ci job->tfu.out_sync = 0; 898bf215546Sopenharmony_ci } else { 899bf215546Sopenharmony_ci uint32_t last_job_sync = queue->last_job_syncs.syncs[V3DV_QUEUE_ANY]; 900bf215546Sopenharmony_ci job->tfu.in_sync = needs_sync ? last_job_sync : 0; 901bf215546Sopenharmony_ci job->tfu.out_sync = last_job_sync; 902bf215546Sopenharmony_ci } 903bf215546Sopenharmony_ci int ret = v3dv_ioctl(device->pdevice->render_fd, 904bf215546Sopenharmony_ci DRM_IOCTL_V3D_SUBMIT_TFU, &job->tfu); 905bf215546Sopenharmony_ci 906bf215546Sopenharmony_ci multisync_free(device, &ms); 907bf215546Sopenharmony_ci queue->last_job_syncs.first[V3DV_QUEUE_TFU] = false; 908bf215546Sopenharmony_ci 909bf215546Sopenharmony_ci if (ret != 0) 910bf215546Sopenharmony_ci return vk_queue_set_lost(&queue->vk, "V3D_SUBMIT_TFU failed: %m"); 911bf215546Sopenharmony_ci 912bf215546Sopenharmony_ci return VK_SUCCESS; 913bf215546Sopenharmony_ci} 914bf215546Sopenharmony_ci 915bf215546Sopenharmony_cistatic VkResult 916bf215546Sopenharmony_cihandle_csd_job(struct v3dv_queue *queue, 917bf215546Sopenharmony_ci struct v3dv_job *job, 918bf215546Sopenharmony_ci uint32_t counter_pass_idx, 919bf215546Sopenharmony_ci struct v3dv_submit_sync_info *sync_info, 920bf215546Sopenharmony_ci bool signal_syncs) 921bf215546Sopenharmony_ci{ 922bf215546Sopenharmony_ci struct v3dv_device *device = queue->device; 923bf215546Sopenharmony_ci 924bf215546Sopenharmony_ci struct drm_v3d_submit_csd *submit = &job->csd.submit; 925bf215546Sopenharmony_ci 926bf215546Sopenharmony_ci /* If the job uses VK_KHR_buffer_device_addess we need to ensure all 927bf215546Sopenharmony_ci * buffers flagged with VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_KHR 928bf215546Sopenharmony_ci * are included. 929bf215546Sopenharmony_ci */ 930bf215546Sopenharmony_ci if (job->uses_buffer_device_address) { 931bf215546Sopenharmony_ci util_dynarray_foreach(&queue->device->device_address_bo_list, 932bf215546Sopenharmony_ci struct v3dv_bo *, bo) { 933bf215546Sopenharmony_ci v3dv_job_add_bo(job, *bo); 934bf215546Sopenharmony_ci } 935bf215546Sopenharmony_ci } 936bf215546Sopenharmony_ci 937bf215546Sopenharmony_ci submit->bo_handle_count = job->bo_count; 938bf215546Sopenharmony_ci uint32_t *bo_handles = 939bf215546Sopenharmony_ci (uint32_t *) malloc(sizeof(uint32_t) * MAX2(4, submit->bo_handle_count * 2)); 940bf215546Sopenharmony_ci uint32_t bo_idx = 0; 941bf215546Sopenharmony_ci set_foreach(job->bos, entry) { 942bf215546Sopenharmony_ci struct v3dv_bo *bo = (struct v3dv_bo *)entry->key; 943bf215546Sopenharmony_ci bo_handles[bo_idx++] = bo->handle; 944bf215546Sopenharmony_ci } 945bf215546Sopenharmony_ci assert(bo_idx == submit->bo_handle_count); 946bf215546Sopenharmony_ci submit->bo_handles = (uintptr_t)(void *)bo_handles; 947bf215546Sopenharmony_ci 948bf215546Sopenharmony_ci const bool needs_sync = sync_info->wait_count || job->serialize; 949bf215546Sopenharmony_ci 950bf215546Sopenharmony_ci /* Replace single semaphore settings whenever our kernel-driver supports 951bf215546Sopenharmony_ci * multiple semaphore extension. 952bf215546Sopenharmony_ci */ 953bf215546Sopenharmony_ci struct drm_v3d_multi_sync ms = { 0 }; 954bf215546Sopenharmony_ci if (device->pdevice->caps.multisync) { 955bf215546Sopenharmony_ci set_multisync(&ms, sync_info, NULL, device, job, 956bf215546Sopenharmony_ci V3DV_QUEUE_CSD, V3D_CSD, signal_syncs); 957bf215546Sopenharmony_ci if (!ms.base.id) 958bf215546Sopenharmony_ci return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); 959bf215546Sopenharmony_ci 960bf215546Sopenharmony_ci submit->flags |= DRM_V3D_SUBMIT_EXTENSION; 961bf215546Sopenharmony_ci submit->extensions = (uintptr_t)(void *)&ms; 962bf215546Sopenharmony_ci /* Disable legacy sync interface when multisync extension is used */ 963bf215546Sopenharmony_ci submit->in_sync = 0; 964bf215546Sopenharmony_ci submit->out_sync = 0; 965bf215546Sopenharmony_ci } else { 966bf215546Sopenharmony_ci uint32_t last_job_sync = queue->last_job_syncs.syncs[V3DV_QUEUE_ANY]; 967bf215546Sopenharmony_ci submit->in_sync = needs_sync ? last_job_sync : 0; 968bf215546Sopenharmony_ci submit->out_sync = last_job_sync; 969bf215546Sopenharmony_ci } 970bf215546Sopenharmony_ci submit->perfmon_id = job->perf ? 971bf215546Sopenharmony_ci job->perf->kperfmon_ids[counter_pass_idx] : 0; 972bf215546Sopenharmony_ci queue->last_perfmon_id = submit->perfmon_id; 973bf215546Sopenharmony_ci int ret = v3dv_ioctl(device->pdevice->render_fd, 974bf215546Sopenharmony_ci DRM_IOCTL_V3D_SUBMIT_CSD, submit); 975bf215546Sopenharmony_ci 976bf215546Sopenharmony_ci static bool warned = false; 977bf215546Sopenharmony_ci if (ret && !warned) { 978bf215546Sopenharmony_ci fprintf(stderr, "Compute dispatch returned %s. Expect corruption.\n", 979bf215546Sopenharmony_ci strerror(errno)); 980bf215546Sopenharmony_ci warned = true; 981bf215546Sopenharmony_ci } 982bf215546Sopenharmony_ci 983bf215546Sopenharmony_ci free(bo_handles); 984bf215546Sopenharmony_ci 985bf215546Sopenharmony_ci multisync_free(device, &ms); 986bf215546Sopenharmony_ci queue->last_job_syncs.first[V3DV_QUEUE_CSD] = false; 987bf215546Sopenharmony_ci 988bf215546Sopenharmony_ci if (ret) 989bf215546Sopenharmony_ci return vk_queue_set_lost(&queue->vk, "V3D_SUBMIT_CSD failed: %m"); 990bf215546Sopenharmony_ci 991bf215546Sopenharmony_ci return VK_SUCCESS; 992bf215546Sopenharmony_ci} 993bf215546Sopenharmony_ci 994bf215546Sopenharmony_cistatic VkResult 995bf215546Sopenharmony_ciqueue_handle_job(struct v3dv_queue *queue, 996bf215546Sopenharmony_ci struct v3dv_job *job, 997bf215546Sopenharmony_ci uint32_t counter_pass_idx, 998bf215546Sopenharmony_ci struct v3dv_submit_sync_info *sync_info, 999bf215546Sopenharmony_ci bool signal_syncs) 1000bf215546Sopenharmony_ci{ 1001bf215546Sopenharmony_ci switch (job->type) { 1002bf215546Sopenharmony_ci case V3DV_JOB_TYPE_GPU_CL: 1003bf215546Sopenharmony_ci return handle_cl_job(queue, job, counter_pass_idx, sync_info, signal_syncs); 1004bf215546Sopenharmony_ci case V3DV_JOB_TYPE_GPU_TFU: 1005bf215546Sopenharmony_ci return handle_tfu_job(queue, job, sync_info, signal_syncs); 1006bf215546Sopenharmony_ci case V3DV_JOB_TYPE_GPU_CSD: 1007bf215546Sopenharmony_ci return handle_csd_job(queue, job, counter_pass_idx, sync_info, signal_syncs); 1008bf215546Sopenharmony_ci case V3DV_JOB_TYPE_CPU_RESET_QUERIES: 1009bf215546Sopenharmony_ci return handle_reset_query_cpu_job(queue, job, sync_info); 1010bf215546Sopenharmony_ci case V3DV_JOB_TYPE_CPU_END_QUERY: 1011bf215546Sopenharmony_ci return handle_end_query_cpu_job(job, counter_pass_idx); 1012bf215546Sopenharmony_ci case V3DV_JOB_TYPE_CPU_COPY_QUERY_RESULTS: 1013bf215546Sopenharmony_ci return handle_copy_query_results_cpu_job(job); 1014bf215546Sopenharmony_ci case V3DV_JOB_TYPE_CPU_SET_EVENT: 1015bf215546Sopenharmony_ci return handle_set_event_cpu_job(queue, job, sync_info); 1016bf215546Sopenharmony_ci case V3DV_JOB_TYPE_CPU_WAIT_EVENTS: 1017bf215546Sopenharmony_ci return handle_wait_events_cpu_job(job); 1018bf215546Sopenharmony_ci case V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE: 1019bf215546Sopenharmony_ci return handle_copy_buffer_to_image_cpu_job(queue, job, sync_info); 1020bf215546Sopenharmony_ci case V3DV_JOB_TYPE_CPU_CSD_INDIRECT: 1021bf215546Sopenharmony_ci return handle_csd_indirect_cpu_job(queue, job, sync_info); 1022bf215546Sopenharmony_ci case V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY: 1023bf215546Sopenharmony_ci return handle_timestamp_query_cpu_job(queue, job, sync_info); 1024bf215546Sopenharmony_ci default: 1025bf215546Sopenharmony_ci unreachable("Unhandled job type"); 1026bf215546Sopenharmony_ci } 1027bf215546Sopenharmony_ci} 1028bf215546Sopenharmony_ci 1029bf215546Sopenharmony_cistatic VkResult 1030bf215546Sopenharmony_ciqueue_create_noop_job(struct v3dv_queue *queue) 1031bf215546Sopenharmony_ci{ 1032bf215546Sopenharmony_ci struct v3dv_device *device = queue->device; 1033bf215546Sopenharmony_ci queue->noop_job = vk_zalloc(&device->vk.alloc, sizeof(struct v3dv_job), 8, 1034bf215546Sopenharmony_ci VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 1035bf215546Sopenharmony_ci if (!queue->noop_job) 1036bf215546Sopenharmony_ci return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 1037bf215546Sopenharmony_ci v3dv_job_init(queue->noop_job, V3DV_JOB_TYPE_GPU_CL, device, NULL, -1); 1038bf215546Sopenharmony_ci 1039bf215546Sopenharmony_ci v3dv_X(device, job_emit_noop)(queue->noop_job); 1040bf215546Sopenharmony_ci 1041bf215546Sopenharmony_ci /* We use no-op jobs to signal semaphores/fences. These jobs needs to be 1042bf215546Sopenharmony_ci * serialized across all hw queues to comply with Vulkan's signal operation 1043bf215546Sopenharmony_ci * order requirements, which basically require that signal operations occur 1044bf215546Sopenharmony_ci * in submission order. 1045bf215546Sopenharmony_ci */ 1046bf215546Sopenharmony_ci queue->noop_job->serialize = V3DV_BARRIER_ALL; 1047bf215546Sopenharmony_ci 1048bf215546Sopenharmony_ci return VK_SUCCESS; 1049bf215546Sopenharmony_ci} 1050bf215546Sopenharmony_ci 1051bf215546Sopenharmony_cistatic VkResult 1052bf215546Sopenharmony_ciqueue_submit_noop_job(struct v3dv_queue *queue, 1053bf215546Sopenharmony_ci uint32_t counter_pass_idx, 1054bf215546Sopenharmony_ci struct v3dv_submit_sync_info *sync_info, 1055bf215546Sopenharmony_ci bool signal_syncs) 1056bf215546Sopenharmony_ci{ 1057bf215546Sopenharmony_ci if (!queue->noop_job) { 1058bf215546Sopenharmony_ci VkResult result = queue_create_noop_job(queue); 1059bf215546Sopenharmony_ci if (result != VK_SUCCESS) 1060bf215546Sopenharmony_ci return result; 1061bf215546Sopenharmony_ci } 1062bf215546Sopenharmony_ci 1063bf215546Sopenharmony_ci assert(queue->noop_job); 1064bf215546Sopenharmony_ci return queue_handle_job(queue, queue->noop_job, counter_pass_idx, 1065bf215546Sopenharmony_ci sync_info, signal_syncs); 1066bf215546Sopenharmony_ci} 1067bf215546Sopenharmony_ci 1068bf215546Sopenharmony_ciVkResult 1069bf215546Sopenharmony_civ3dv_queue_driver_submit(struct vk_queue *vk_queue, 1070bf215546Sopenharmony_ci struct vk_queue_submit *submit) 1071bf215546Sopenharmony_ci{ 1072bf215546Sopenharmony_ci struct v3dv_queue *queue = container_of(vk_queue, struct v3dv_queue, vk); 1073bf215546Sopenharmony_ci VkResult result; 1074bf215546Sopenharmony_ci 1075bf215546Sopenharmony_ci struct v3dv_submit_sync_info sync_info = { 1076bf215546Sopenharmony_ci .wait_count = submit->wait_count, 1077bf215546Sopenharmony_ci .waits = submit->waits, 1078bf215546Sopenharmony_ci .signal_count = submit->signal_count, 1079bf215546Sopenharmony_ci .signals = submit->signals, 1080bf215546Sopenharmony_ci }; 1081bf215546Sopenharmony_ci 1082bf215546Sopenharmony_ci for (int i = 0; i < V3DV_QUEUE_COUNT; i++) 1083bf215546Sopenharmony_ci queue->last_job_syncs.first[i] = true; 1084bf215546Sopenharmony_ci 1085bf215546Sopenharmony_ci result = process_waits(queue, sync_info.wait_count, sync_info.waits); 1086bf215546Sopenharmony_ci if (result != VK_SUCCESS) 1087bf215546Sopenharmony_ci return result; 1088bf215546Sopenharmony_ci 1089bf215546Sopenharmony_ci for (uint32_t i = 0; i < submit->command_buffer_count; i++) { 1090bf215546Sopenharmony_ci struct v3dv_cmd_buffer *cmd_buffer = 1091bf215546Sopenharmony_ci container_of(submit->command_buffers[i], struct v3dv_cmd_buffer, vk); 1092bf215546Sopenharmony_ci list_for_each_entry_safe(struct v3dv_job, job, 1093bf215546Sopenharmony_ci &cmd_buffer->jobs, list_link) { 1094bf215546Sopenharmony_ci 1095bf215546Sopenharmony_ci result = queue_handle_job(queue, job, submit->perf_pass_index, 1096bf215546Sopenharmony_ci &sync_info, false); 1097bf215546Sopenharmony_ci if (result != VK_SUCCESS) 1098bf215546Sopenharmony_ci return result; 1099bf215546Sopenharmony_ci } 1100bf215546Sopenharmony_ci 1101bf215546Sopenharmony_ci /* If the command buffer ends with a barrier we need to consume it now. 1102bf215546Sopenharmony_ci * 1103bf215546Sopenharmony_ci * FIXME: this will drain all hw queues. Instead, we could use the pending 1104bf215546Sopenharmony_ci * barrier state to limit the queues we serialize against. 1105bf215546Sopenharmony_ci */ 1106bf215546Sopenharmony_ci if (cmd_buffer->state.barrier.dst_mask) { 1107bf215546Sopenharmony_ci result = queue_submit_noop_job(queue, submit->perf_pass_index, 1108bf215546Sopenharmony_ci &sync_info, false); 1109bf215546Sopenharmony_ci if (result != VK_SUCCESS) 1110bf215546Sopenharmony_ci return result; 1111bf215546Sopenharmony_ci } 1112bf215546Sopenharmony_ci } 1113bf215546Sopenharmony_ci 1114bf215546Sopenharmony_ci /* Finish by submitting a no-op job that synchronizes across all queues. 1115bf215546Sopenharmony_ci * This will ensure that the signal semaphores don't get triggered until 1116bf215546Sopenharmony_ci * all work on any queue completes. See Vulkan's signal operation order 1117bf215546Sopenharmony_ci * requirements. 1118bf215546Sopenharmony_ci */ 1119bf215546Sopenharmony_ci if (submit->signal_count > 0) { 1120bf215546Sopenharmony_ci result = queue_submit_noop_job(queue, submit->perf_pass_index, 1121bf215546Sopenharmony_ci &sync_info, true); 1122bf215546Sopenharmony_ci if (result != VK_SUCCESS) 1123bf215546Sopenharmony_ci return result; 1124bf215546Sopenharmony_ci } 1125bf215546Sopenharmony_ci 1126bf215546Sopenharmony_ci process_signals(queue, sync_info.signal_count, sync_info.signals); 1127bf215546Sopenharmony_ci 1128bf215546Sopenharmony_ci return VK_SUCCESS; 1129bf215546Sopenharmony_ci} 1130bf215546Sopenharmony_ci 1131bf215546Sopenharmony_ciVKAPI_ATTR VkResult VKAPI_CALL 1132bf215546Sopenharmony_civ3dv_QueueBindSparse(VkQueue _queue, 1133bf215546Sopenharmony_ci uint32_t bindInfoCount, 1134bf215546Sopenharmony_ci const VkBindSparseInfo *pBindInfo, 1135bf215546Sopenharmony_ci VkFence fence) 1136bf215546Sopenharmony_ci{ 1137bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_queue, queue, _queue); 1138bf215546Sopenharmony_ci return vk_error(queue, VK_ERROR_FEATURE_NOT_PRESENT); 1139bf215546Sopenharmony_ci} 1140