1/* 2 * Copyright © 2018 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included 12 * in all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 * DEALINGS IN THE SOFTWARE. 21 */ 22 23/** 24 * @file crocus_fence.c 25 * 26 * Fences for driver and IPC serialisation, scheduling and synchronisation. 27 */ 28 29#include "util/u_inlines.h" 30#include "intel/common/intel_gem.h" 31 32#include "crocus_batch.h" 33#include "crocus_bufmgr.h" 34#include "crocus_context.h" 35#include "crocus_fence.h" 36#include "crocus_screen.h" 37 38static uint32_t 39gem_syncobj_create(int fd, uint32_t flags) 40{ 41 struct drm_syncobj_create args = { 42 .flags = flags, 43 }; 44 45 intel_ioctl(fd, DRM_IOCTL_SYNCOBJ_CREATE, &args); 46 47 return args.handle; 48} 49 50static void 51gem_syncobj_destroy(int fd, uint32_t handle) 52{ 53 struct drm_syncobj_destroy args = { 54 .handle = handle, 55 }; 56 57 intel_ioctl(fd, DRM_IOCTL_SYNCOBJ_DESTROY, &args); 58} 59 60/** 61 * Make a new sync-point. 62 */ 63struct crocus_syncobj * 64crocus_create_syncobj(struct crocus_screen *screen) 65{ 66 struct crocus_syncobj *syncobj = malloc(sizeof(*syncobj)); 67 68 if (!syncobj) 69 return NULL; 70 71 syncobj->handle = gem_syncobj_create(screen->fd, 0); 72 assert(syncobj->handle); 73 74 pipe_reference_init(&syncobj->ref, 1); 75 76 return syncobj; 77} 78 79void 80crocus_syncobj_destroy(struct crocus_screen *screen, 81 struct crocus_syncobj *syncobj) 82{ 83 gem_syncobj_destroy(screen->fd, syncobj->handle); 84 free(syncobj); 85} 86 87/** 88 * Add a sync-point to the batch, with the given flags. 89 * 90 * \p flags One of I915_EXEC_FENCE_WAIT or I915_EXEC_FENCE_SIGNAL. 91 */ 92void 93crocus_batch_add_syncobj(struct crocus_batch *batch, 94 struct crocus_syncobj *syncobj, unsigned flags) 95{ 96 struct drm_i915_gem_exec_fence *fence = 97 util_dynarray_grow(&batch->exec_fences, struct drm_i915_gem_exec_fence, 1); 98 99 *fence = (struct drm_i915_gem_exec_fence){ 100 .handle = syncobj->handle, 101 .flags = flags, 102 }; 103 104 struct crocus_syncobj **store = 105 util_dynarray_grow(&batch->syncobjs, struct crocus_syncobj *, 1); 106 107 *store = NULL; 108 crocus_syncobj_reference(batch->screen, store, syncobj); 109} 110 111/** 112 * Walk through a batch's dependencies (any I915_EXEC_FENCE_WAIT syncobjs) 113 * and unreference any which have already passed. 114 * 115 * Sometimes the compute batch is seldom used, and accumulates references 116 * to stale render batches that are no longer of interest, so we can free 117 * those up. 118 */ 119static void 120clear_stale_syncobjs(struct crocus_batch *batch) 121{ 122 struct crocus_screen *screen = batch->screen; 123 124 int n = util_dynarray_num_elements(&batch->syncobjs, struct crocus_syncobj *); 125 126 assert(n == util_dynarray_num_elements(&batch->exec_fences, 127 struct drm_i915_gem_exec_fence)); 128 129 /* Skip the first syncobj, as it's the signalling one. */ 130 for (int i = n - 1; i > 1; i--) { 131 struct crocus_syncobj **syncobj = 132 util_dynarray_element(&batch->syncobjs, struct crocus_syncobj *, i); 133 struct drm_i915_gem_exec_fence *fence = 134 util_dynarray_element(&batch->exec_fences, 135 struct drm_i915_gem_exec_fence, i); 136 assert(fence->flags & I915_EXEC_FENCE_WAIT); 137 138 if (crocus_wait_syncobj(&screen->base, *syncobj, 0)) 139 continue; 140 141 /* This sync object has already passed, there's no need to continue 142 * marking it as a dependency; we can stop holding on to the reference. 143 */ 144 crocus_syncobj_reference(screen, syncobj, NULL); 145 146 /* Remove it from the lists; move the last element here. */ 147 struct crocus_syncobj **nth_syncobj = 148 util_dynarray_pop_ptr(&batch->syncobjs, struct crocus_syncobj *); 149 struct drm_i915_gem_exec_fence *nth_fence = 150 util_dynarray_pop_ptr(&batch->exec_fences, 151 struct drm_i915_gem_exec_fence); 152 153 if (syncobj != nth_syncobj) { 154 *syncobj = *nth_syncobj; 155 memcpy(fence, nth_fence, sizeof(*fence)); 156 } 157 } 158} 159 160/* ------------------------------------------------------------------- */ 161 162struct pipe_fence_handle { 163 struct pipe_reference ref; 164 165 struct pipe_context *unflushed_ctx; 166 167 struct crocus_fine_fence *fine[CROCUS_BATCH_COUNT]; 168}; 169 170static void 171crocus_fence_destroy(struct pipe_screen *p_screen, 172 struct pipe_fence_handle *fence) 173{ 174 struct crocus_screen *screen = (struct crocus_screen *)p_screen; 175 176 for (unsigned i = 0; i < ARRAY_SIZE(fence->fine); i++) 177 crocus_fine_fence_reference(screen, &fence->fine[i], NULL); 178 179 free(fence); 180} 181 182static void 183crocus_fence_reference(struct pipe_screen *p_screen, 184 struct pipe_fence_handle **dst, 185 struct pipe_fence_handle *src) 186{ 187 if (pipe_reference(&(*dst)->ref, &src->ref)) 188 crocus_fence_destroy(p_screen, *dst); 189 190 *dst = src; 191} 192 193bool 194crocus_wait_syncobj(struct pipe_screen *p_screen, 195 struct crocus_syncobj *syncobj, int64_t timeout_nsec) 196{ 197 if (!syncobj) 198 return false; 199 200 struct crocus_screen *screen = (struct crocus_screen *)p_screen; 201 struct drm_syncobj_wait args = { 202 .handles = (uintptr_t)&syncobj->handle, 203 .count_handles = 1, 204 .timeout_nsec = timeout_nsec, 205 }; 206 return intel_ioctl(screen->fd, DRM_IOCTL_SYNCOBJ_WAIT, &args); 207} 208 209static void 210crocus_fence_flush(struct pipe_context *ctx, 211 struct pipe_fence_handle **out_fence, unsigned flags) 212{ 213 struct crocus_screen *screen = (void *)ctx->screen; 214 struct crocus_context *ice = (struct crocus_context *)ctx; 215 216 const bool deferred = flags & PIPE_FLUSH_DEFERRED; 217 218 if (!deferred) { 219 for (unsigned i = 0; i < ice->batch_count; i++) 220 crocus_batch_flush(&ice->batches[i]); 221 } 222 223 if (!out_fence) 224 return; 225 226 struct pipe_fence_handle *fence = calloc(1, sizeof(*fence)); 227 if (!fence) 228 return; 229 230 pipe_reference_init(&fence->ref, 1); 231 232 if (deferred) 233 fence->unflushed_ctx = ctx; 234 235 for (unsigned b = 0; b < ice->batch_count; b++) { 236 struct crocus_batch *batch = &ice->batches[b]; 237 238 if (deferred && crocus_batch_bytes_used(batch) > 0) { 239 struct crocus_fine_fence *fine = 240 crocus_fine_fence_new(batch, CROCUS_FENCE_BOTTOM_OF_PIPE); 241 crocus_fine_fence_reference(screen, &fence->fine[b], fine); 242 crocus_fine_fence_reference(screen, &fine, NULL); 243 } else { 244 /* This batch has no commands queued up (perhaps we just flushed, 245 * or all the commands are on the other batch). Wait for the last 246 * syncobj on this engine - unless it's already finished by now. 247 */ 248 if (crocus_fine_fence_signaled(batch->last_fence)) 249 continue; 250 251 crocus_fine_fence_reference(screen, &fence->fine[b], 252 batch->last_fence); 253 } 254 } 255 256 crocus_fence_reference(ctx->screen, out_fence, NULL); 257 *out_fence = fence; 258} 259 260static void 261crocus_fence_await(struct pipe_context *ctx, struct pipe_fence_handle *fence) 262{ 263 struct crocus_context *ice = (struct crocus_context *)ctx; 264 265 /* Unflushed fences from the same context are no-ops. */ 266 if (ctx && ctx == fence->unflushed_ctx) 267 return; 268 269 for (unsigned i = 0; i < ARRAY_SIZE(fence->fine); i++) { 270 struct crocus_fine_fence *fine = fence->fine[i]; 271 272 if (crocus_fine_fence_signaled(fine)) 273 continue; 274 275 for (unsigned b = 0; b < ice->batch_count; b++) { 276 struct crocus_batch *batch = &ice->batches[b]; 277 278 /* We're going to make any future work in this batch wait for our 279 * fence to have gone by. But any currently queued work doesn't 280 * need to wait. Flush the batch now, so it can happen sooner. 281 */ 282 crocus_batch_flush(batch); 283 284 /* Before adding a new reference, clean out any stale ones. */ 285 clear_stale_syncobjs(batch); 286 287 crocus_batch_add_syncobj(batch, fine->syncobj, I915_EXEC_FENCE_WAIT); 288 } 289 } 290} 291 292#define NSEC_PER_SEC (1000 * USEC_PER_SEC) 293#define USEC_PER_SEC (1000 * MSEC_PER_SEC) 294#define MSEC_PER_SEC (1000) 295 296static uint64_t 297gettime_ns(void) 298{ 299 struct timespec current; 300 clock_gettime(CLOCK_MONOTONIC, ¤t); 301 return (uint64_t)current.tv_sec * NSEC_PER_SEC + current.tv_nsec; 302} 303 304static uint64_t 305rel2abs(uint64_t timeout) 306{ 307 if (timeout == 0) 308 return 0; 309 310 uint64_t current_time = gettime_ns(); 311 uint64_t max_timeout = (uint64_t)INT64_MAX - current_time; 312 313 timeout = MIN2(max_timeout, timeout); 314 315 return current_time + timeout; 316} 317 318static bool 319crocus_fence_finish(struct pipe_screen *p_screen, struct pipe_context *ctx, 320 struct pipe_fence_handle *fence, uint64_t timeout) 321{ 322 ctx = threaded_context_unwrap_sync(ctx); 323 struct crocus_context *ice = (struct crocus_context *)ctx; 324 struct crocus_screen *screen = (struct crocus_screen *)p_screen; 325 326 /* If we created the fence with PIPE_FLUSH_DEFERRED, we may not have 327 * flushed yet. Check if our syncobj is the current batch's signalling 328 * syncobj - if so, we haven't flushed and need to now. 329 * 330 * The Gallium docs mention that a flush will occur if \p ctx matches 331 * the context the fence was created with. It may be NULL, so we check 332 * that it matches first. 333 */ 334 if (ctx && ctx == fence->unflushed_ctx) { 335 for (unsigned i = 0; i < ice->batch_count; i++) { 336 struct crocus_fine_fence *fine = fence->fine[i]; 337 338 if (crocus_fine_fence_signaled(fine)) 339 continue; 340 341 if (fine->syncobj == crocus_batch_get_signal_syncobj(&ice->batches[i])) 342 crocus_batch_flush(&ice->batches[i]); 343 } 344 345 /* The fence is no longer deferred. */ 346 fence->unflushed_ctx = NULL; 347 } 348 349 unsigned int handle_count = 0; 350 uint32_t handles[ARRAY_SIZE(fence->fine)]; 351 for (unsigned i = 0; i < ARRAY_SIZE(fence->fine); i++) { 352 struct crocus_fine_fence *fine = fence->fine[i]; 353 354 if (crocus_fine_fence_signaled(fine)) 355 continue; 356 357 handles[handle_count++] = fine->syncobj->handle; 358 } 359 360 if (handle_count == 0) 361 return true; 362 363 struct drm_syncobj_wait args = { 364 .handles = (uintptr_t)handles, 365 .count_handles = handle_count, 366 .timeout_nsec = rel2abs(timeout), 367 .flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL 368 }; 369 if (fence->unflushed_ctx) { 370 /* This fence had a deferred flush from another context. We can't 371 * safely flush it here, because the context might be bound to a 372 * different thread, and poking at its internals wouldn't be safe. 373 * 374 * Instead, use the WAIT_FOR_SUBMIT flag to block and hope that 375 * another thread submits the work. 376 */ 377 args.flags |= DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT; 378 } 379 return intel_ioctl(screen->fd, DRM_IOCTL_SYNCOBJ_WAIT, &args) == 0; 380} 381 382#ifndef SYNC_IOC_MAGIC 383/* duplicated from linux/sync_file.h to avoid build-time dependency 384 * on new (v4.7) kernel headers. Once distro's are mostly using 385 * something newer than v4.7 drop this and #include <linux/sync_file.h> 386 * instead. 387 */ 388struct sync_merge_data { 389 char name[32]; 390 __s32 fd2; 391 __s32 fence; 392 __u32 flags; 393 __u32 pad; 394}; 395 396#define SYNC_IOC_MAGIC '>' 397#define SYNC_IOC_MERGE _IOWR(SYNC_IOC_MAGIC, 3, struct sync_merge_data) 398#endif 399 400static int 401sync_merge_fd(int sync_fd, int new_fd) 402{ 403 if (sync_fd == -1) 404 return new_fd; 405 406 if (new_fd == -1) 407 return sync_fd; 408 409 struct sync_merge_data args = { 410 .name = "crocus fence", 411 .fd2 = new_fd, 412 .fence = -1, 413 }; 414 415 intel_ioctl(sync_fd, SYNC_IOC_MERGE, &args); 416 close(new_fd); 417 close(sync_fd); 418 419 return args.fence; 420} 421 422static int 423crocus_fence_get_fd(struct pipe_screen *p_screen, 424 struct pipe_fence_handle *fence) 425{ 426 struct crocus_screen *screen = (struct crocus_screen *)p_screen; 427 int fd = -1; 428 429 /* Deferred fences aren't supported. */ 430 if (fence->unflushed_ctx) 431 return -1; 432 433 for (unsigned i = 0; i < ARRAY_SIZE(fence->fine); i++) { 434 struct crocus_fine_fence *fine = fence->fine[i]; 435 436 if (crocus_fine_fence_signaled(fine)) 437 continue; 438 439 struct drm_syncobj_handle args = { 440 .handle = fine->syncobj->handle, 441 .flags = DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE, 442 .fd = -1, 443 }; 444 445 intel_ioctl(screen->fd, DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD, &args); 446 fd = sync_merge_fd(fd, args.fd); 447 } 448 449 if (fd == -1) { 450 /* Our fence has no syncobj's recorded. This means that all of the 451 * batches had already completed, their syncobj's had been signalled, 452 * and so we didn't bother to record them. But we're being asked to 453 * export such a fence. So export a dummy already-signalled syncobj. 454 */ 455 struct drm_syncobj_handle args = { 456 .flags = DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE, 457 .fd = -1, 458 }; 459 460 args.handle = gem_syncobj_create(screen->fd, DRM_SYNCOBJ_CREATE_SIGNALED); 461 intel_ioctl(screen->fd, DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD, &args); 462 gem_syncobj_destroy(screen->fd, args.handle); 463 return args.fd; 464 } 465 466 return fd; 467} 468 469static void 470crocus_fence_create_fd(struct pipe_context *ctx, struct pipe_fence_handle **out, 471 int fd, enum pipe_fd_type type) 472{ 473 assert(type == PIPE_FD_TYPE_NATIVE_SYNC || type == PIPE_FD_TYPE_SYNCOBJ); 474 475 struct crocus_screen *screen = (struct crocus_screen *)ctx->screen; 476 struct drm_syncobj_handle args = { 477 .fd = fd, 478 }; 479 480 if (type == PIPE_FD_TYPE_NATIVE_SYNC) { 481 args.flags = DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE; 482 args.handle = gem_syncobj_create(screen->fd, DRM_SYNCOBJ_CREATE_SIGNALED); 483 } 484 485 if (intel_ioctl(screen->fd, DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, &args) == -1) { 486 fprintf(stderr, "DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE failed: %s\n", 487 strerror(errno)); 488 if (type == PIPE_FD_TYPE_NATIVE_SYNC) 489 gem_syncobj_destroy(screen->fd, args.handle); 490 *out = NULL; 491 return; 492 } 493 494 struct crocus_syncobj *syncobj = malloc(sizeof(*syncobj)); 495 if (!syncobj) { 496 *out = NULL; 497 return; 498 } 499 syncobj->handle = args.handle; 500 pipe_reference_init(&syncobj->ref, 1); 501 502 struct crocus_fine_fence *fine = calloc(1, sizeof(*fine)); 503 if (!fine) { 504 free(syncobj); 505 *out = NULL; 506 return; 507 } 508 509 static const uint32_t zero = 0; 510 511 /* Fences work in terms of crocus_fine_fence, but we don't actually have a 512 * seqno for an imported fence. So, create a fake one which always 513 * returns as 'not signaled' so we fall back to using the sync object. 514 */ 515 fine->seqno = UINT32_MAX; 516 fine->map = &zero; 517 fine->syncobj = syncobj; 518 fine->flags = CROCUS_FENCE_END; 519 pipe_reference_init(&fine->reference, 1); 520 521 struct pipe_fence_handle *fence = calloc(1, sizeof(*fence)); 522 if (!fence) { 523 free(fine); 524 free(syncobj); 525 *out = NULL; 526 return; 527 } 528 pipe_reference_init(&fence->ref, 1); 529 fence->fine[0] = fine; 530 531 *out = fence; 532} 533 534static void 535crocus_fence_signal(struct pipe_context *ctx, struct pipe_fence_handle *fence) 536{ 537 struct crocus_context *ice = (struct crocus_context *)ctx; 538 539 if (ctx == fence->unflushed_ctx) 540 return; 541 542 for (unsigned b = 0; b < ice->batch_count; b++) { 543 for (unsigned i = 0; i < ARRAY_SIZE(fence->fine); i++) { 544 struct crocus_fine_fence *fine = fence->fine[i]; 545 546 /* already signaled fence skipped */ 547 if (crocus_fine_fence_signaled(fine)) 548 continue; 549 550 ice->batches[b].contains_fence_signal = true; 551 crocus_batch_add_syncobj(&ice->batches[b], fine->syncobj, 552 I915_EXEC_FENCE_SIGNAL); 553 } 554 if (ice->batches[b].contains_fence_signal) 555 crocus_batch_flush(&ice->batches[b]); 556 } 557} 558 559void 560crocus_init_screen_fence_functions(struct pipe_screen *screen) 561{ 562 screen->fence_reference = crocus_fence_reference; 563 screen->fence_finish = crocus_fence_finish; 564 screen->fence_get_fd = crocus_fence_get_fd; 565} 566 567void 568crocus_init_context_fence_functions(struct pipe_context *ctx) 569{ 570 ctx->flush = crocus_fence_flush; 571 ctx->create_fence_fd = crocus_fence_create_fd; 572 ctx->fence_server_sync = crocus_fence_await; 573 ctx->fence_server_signal = crocus_fence_signal; 574} 575