1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2012 Intel Corporation 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci/** @file glthread.c 25bf215546Sopenharmony_ci * 26bf215546Sopenharmony_ci * Support functions for the glthread feature of Mesa. 27bf215546Sopenharmony_ci * 28bf215546Sopenharmony_ci * In multicore systems, many applications end up CPU-bound with about half 29bf215546Sopenharmony_ci * their time spent inside their rendering thread and half inside Mesa. To 30bf215546Sopenharmony_ci * alleviate this, we put a shim layer in Mesa at the GL dispatch level that 31bf215546Sopenharmony_ci * quickly logs the GL commands to a buffer to be processed by a worker 32bf215546Sopenharmony_ci * thread. 33bf215546Sopenharmony_ci */ 34bf215546Sopenharmony_ci 35bf215546Sopenharmony_ci#include "main/mtypes.h" 36bf215546Sopenharmony_ci#include "main/glthread.h" 37bf215546Sopenharmony_ci#include "main/glthread_marshal.h" 38bf215546Sopenharmony_ci#include "main/hash.h" 39bf215546Sopenharmony_ci#include "util/u_atomic.h" 40bf215546Sopenharmony_ci#include "util/u_thread.h" 41bf215546Sopenharmony_ci#include "util/u_cpu_detect.h" 42bf215546Sopenharmony_ci 43bf215546Sopenharmony_ci#include "state_tracker/st_context.h" 44bf215546Sopenharmony_ci 45bf215546Sopenharmony_cistatic void 46bf215546Sopenharmony_ciglthread_unmarshal_batch(void *job, void *gdata, int thread_index) 47bf215546Sopenharmony_ci{ 48bf215546Sopenharmony_ci struct glthread_batch *batch = (struct glthread_batch*)job; 49bf215546Sopenharmony_ci struct gl_context *ctx = batch->ctx; 50bf215546Sopenharmony_ci unsigned pos = 0; 51bf215546Sopenharmony_ci unsigned used = batch->used; 52bf215546Sopenharmony_ci uint64_t *buffer = batch->buffer; 53bf215546Sopenharmony_ci const uint64_t *last = &buffer[used]; 54bf215546Sopenharmony_ci 55bf215546Sopenharmony_ci _glapi_set_dispatch(ctx->CurrentServerDispatch); 56bf215546Sopenharmony_ci 57bf215546Sopenharmony_ci _mesa_HashLockMutex(ctx->Shared->BufferObjects); 58bf215546Sopenharmony_ci ctx->BufferObjectsLocked = true; 59bf215546Sopenharmony_ci simple_mtx_lock(&ctx->Shared->TexMutex); 60bf215546Sopenharmony_ci ctx->TexturesLocked = true; 61bf215546Sopenharmony_ci 62bf215546Sopenharmony_ci while (pos < used) { 63bf215546Sopenharmony_ci const struct marshal_cmd_base *cmd = 64bf215546Sopenharmony_ci (const struct marshal_cmd_base *)&buffer[pos]; 65bf215546Sopenharmony_ci 66bf215546Sopenharmony_ci pos += _mesa_unmarshal_dispatch[cmd->cmd_id](ctx, cmd, last); 67bf215546Sopenharmony_ci } 68bf215546Sopenharmony_ci 69bf215546Sopenharmony_ci ctx->TexturesLocked = false; 70bf215546Sopenharmony_ci simple_mtx_unlock(&ctx->Shared->TexMutex); 71bf215546Sopenharmony_ci ctx->BufferObjectsLocked = false; 72bf215546Sopenharmony_ci _mesa_HashUnlockMutex(ctx->Shared->BufferObjects); 73bf215546Sopenharmony_ci 74bf215546Sopenharmony_ci assert(pos == used); 75bf215546Sopenharmony_ci batch->used = 0; 76bf215546Sopenharmony_ci 77bf215546Sopenharmony_ci unsigned batch_index = batch - ctx->GLThread.batches; 78bf215546Sopenharmony_ci /* Atomically set this to -1 if it's equal to batch_index. */ 79bf215546Sopenharmony_ci p_atomic_cmpxchg(&ctx->GLThread.LastProgramChangeBatch, batch_index, -1); 80bf215546Sopenharmony_ci p_atomic_cmpxchg(&ctx->GLThread.LastDListChangeBatchIndex, batch_index, -1); 81bf215546Sopenharmony_ci} 82bf215546Sopenharmony_ci 83bf215546Sopenharmony_cistatic void 84bf215546Sopenharmony_ciglthread_thread_initialization(void *job, void *gdata, int thread_index) 85bf215546Sopenharmony_ci{ 86bf215546Sopenharmony_ci struct gl_context *ctx = (struct gl_context*)job; 87bf215546Sopenharmony_ci 88bf215546Sopenharmony_ci st_set_background_context(ctx, &ctx->GLThread.stats); 89bf215546Sopenharmony_ci _glapi_set_context(ctx); 90bf215546Sopenharmony_ci} 91bf215546Sopenharmony_ci 92bf215546Sopenharmony_civoid 93bf215546Sopenharmony_ci_mesa_glthread_init(struct gl_context *ctx) 94bf215546Sopenharmony_ci{ 95bf215546Sopenharmony_ci struct glthread_state *glthread = &ctx->GLThread; 96bf215546Sopenharmony_ci 97bf215546Sopenharmony_ci assert(!glthread->enabled); 98bf215546Sopenharmony_ci 99bf215546Sopenharmony_ci if (!util_queue_init(&glthread->queue, "gl", MARSHAL_MAX_BATCHES - 2, 100bf215546Sopenharmony_ci 1, 0, NULL)) { 101bf215546Sopenharmony_ci return; 102bf215546Sopenharmony_ci } 103bf215546Sopenharmony_ci 104bf215546Sopenharmony_ci glthread->VAOs = _mesa_NewHashTable(); 105bf215546Sopenharmony_ci if (!glthread->VAOs) { 106bf215546Sopenharmony_ci util_queue_destroy(&glthread->queue); 107bf215546Sopenharmony_ci return; 108bf215546Sopenharmony_ci } 109bf215546Sopenharmony_ci 110bf215546Sopenharmony_ci _mesa_glthread_reset_vao(&glthread->DefaultVAO); 111bf215546Sopenharmony_ci glthread->CurrentVAO = &glthread->DefaultVAO; 112bf215546Sopenharmony_ci 113bf215546Sopenharmony_ci if (!_mesa_create_marshal_tables(ctx)) { 114bf215546Sopenharmony_ci _mesa_DeleteHashTable(glthread->VAOs); 115bf215546Sopenharmony_ci util_queue_destroy(&glthread->queue); 116bf215546Sopenharmony_ci return; 117bf215546Sopenharmony_ci } 118bf215546Sopenharmony_ci 119bf215546Sopenharmony_ci for (unsigned i = 0; i < MARSHAL_MAX_BATCHES; i++) { 120bf215546Sopenharmony_ci glthread->batches[i].ctx = ctx; 121bf215546Sopenharmony_ci util_queue_fence_init(&glthread->batches[i].fence); 122bf215546Sopenharmony_ci } 123bf215546Sopenharmony_ci glthread->next_batch = &glthread->batches[glthread->next]; 124bf215546Sopenharmony_ci glthread->used = 0; 125bf215546Sopenharmony_ci 126bf215546Sopenharmony_ci glthread->enabled = true; 127bf215546Sopenharmony_ci glthread->stats.queue = &glthread->queue; 128bf215546Sopenharmony_ci 129bf215546Sopenharmony_ci glthread->SupportsBufferUploads = 130bf215546Sopenharmony_ci ctx->Const.BufferCreateMapUnsynchronizedThreadSafe && 131bf215546Sopenharmony_ci ctx->Const.AllowMappedBuffersDuringExecution; 132bf215546Sopenharmony_ci 133bf215546Sopenharmony_ci /* If the draw start index is non-zero, glthread can upload to offset 0, 134bf215546Sopenharmony_ci * which means the attrib offset has to be -(first * stride). 135bf215546Sopenharmony_ci * So require signed vertex buffer offsets. 136bf215546Sopenharmony_ci */ 137bf215546Sopenharmony_ci glthread->SupportsNonVBOUploads = glthread->SupportsBufferUploads && 138bf215546Sopenharmony_ci ctx->Const.VertexBufferOffsetIsInt32; 139bf215546Sopenharmony_ci 140bf215546Sopenharmony_ci ctx->CurrentClientDispatch = ctx->MarshalExec; 141bf215546Sopenharmony_ci 142bf215546Sopenharmony_ci glthread->LastDListChangeBatchIndex = -1; 143bf215546Sopenharmony_ci 144bf215546Sopenharmony_ci /* Execute the thread initialization function in the thread. */ 145bf215546Sopenharmony_ci struct util_queue_fence fence; 146bf215546Sopenharmony_ci util_queue_fence_init(&fence); 147bf215546Sopenharmony_ci util_queue_add_job(&glthread->queue, ctx, &fence, 148bf215546Sopenharmony_ci glthread_thread_initialization, NULL, 0); 149bf215546Sopenharmony_ci util_queue_fence_wait(&fence); 150bf215546Sopenharmony_ci util_queue_fence_destroy(&fence); 151bf215546Sopenharmony_ci} 152bf215546Sopenharmony_ci 153bf215546Sopenharmony_cistatic void 154bf215546Sopenharmony_cifree_vao(void *data, UNUSED void *userData) 155bf215546Sopenharmony_ci{ 156bf215546Sopenharmony_ci free(data); 157bf215546Sopenharmony_ci} 158bf215546Sopenharmony_ci 159bf215546Sopenharmony_civoid 160bf215546Sopenharmony_ci_mesa_glthread_destroy(struct gl_context *ctx, const char *reason) 161bf215546Sopenharmony_ci{ 162bf215546Sopenharmony_ci struct glthread_state *glthread = &ctx->GLThread; 163bf215546Sopenharmony_ci 164bf215546Sopenharmony_ci if (!glthread->enabled) 165bf215546Sopenharmony_ci return; 166bf215546Sopenharmony_ci 167bf215546Sopenharmony_ci if (reason) 168bf215546Sopenharmony_ci _mesa_debug(ctx, "glthread destroy reason: %s\n", reason); 169bf215546Sopenharmony_ci 170bf215546Sopenharmony_ci _mesa_glthread_finish(ctx); 171bf215546Sopenharmony_ci util_queue_destroy(&glthread->queue); 172bf215546Sopenharmony_ci 173bf215546Sopenharmony_ci for (unsigned i = 0; i < MARSHAL_MAX_BATCHES; i++) 174bf215546Sopenharmony_ci util_queue_fence_destroy(&glthread->batches[i].fence); 175bf215546Sopenharmony_ci 176bf215546Sopenharmony_ci _mesa_HashDeleteAll(glthread->VAOs, free_vao, NULL); 177bf215546Sopenharmony_ci _mesa_DeleteHashTable(glthread->VAOs); 178bf215546Sopenharmony_ci 179bf215546Sopenharmony_ci ctx->GLThread.enabled = false; 180bf215546Sopenharmony_ci ctx->CurrentClientDispatch = ctx->CurrentServerDispatch; 181bf215546Sopenharmony_ci 182bf215546Sopenharmony_ci /* Update the dispatch only if the context is current. */ 183bf215546Sopenharmony_ci if (_glapi_get_dispatch() == ctx->MarshalExec) { 184bf215546Sopenharmony_ci _glapi_set_dispatch(ctx->CurrentClientDispatch); 185bf215546Sopenharmony_ci } 186bf215546Sopenharmony_ci} 187bf215546Sopenharmony_ci 188bf215546Sopenharmony_civoid 189bf215546Sopenharmony_ci_mesa_glthread_flush_batch(struct gl_context *ctx) 190bf215546Sopenharmony_ci{ 191bf215546Sopenharmony_ci struct glthread_state *glthread = &ctx->GLThread; 192bf215546Sopenharmony_ci if (!glthread->enabled) 193bf215546Sopenharmony_ci return; 194bf215546Sopenharmony_ci 195bf215546Sopenharmony_ci if (ctx->CurrentServerDispatch == ctx->ContextLost) { 196bf215546Sopenharmony_ci _mesa_glthread_destroy(ctx, "context lost"); 197bf215546Sopenharmony_ci return; 198bf215546Sopenharmony_ci } 199bf215546Sopenharmony_ci 200bf215546Sopenharmony_ci if (!glthread->used) 201bf215546Sopenharmony_ci return; /* the batch is empty */ 202bf215546Sopenharmony_ci 203bf215546Sopenharmony_ci /* Pin threads regularly to the same Zen CCX that the main thread is 204bf215546Sopenharmony_ci * running on. The main thread can move between CCXs. 205bf215546Sopenharmony_ci */ 206bf215546Sopenharmony_ci if (util_get_cpu_caps()->num_L3_caches > 1 && 207bf215546Sopenharmony_ci /* driver support */ 208bf215546Sopenharmony_ci ctx->pipe->set_context_param && 209bf215546Sopenharmony_ci ++glthread->pin_thread_counter % 128 == 0) { 210bf215546Sopenharmony_ci int cpu = util_get_current_cpu(); 211bf215546Sopenharmony_ci 212bf215546Sopenharmony_ci if (cpu >= 0) { 213bf215546Sopenharmony_ci uint16_t L3_cache = util_get_cpu_caps()->cpu_to_L3[cpu]; 214bf215546Sopenharmony_ci if (L3_cache != U_CPU_INVALID_L3) { 215bf215546Sopenharmony_ci util_set_thread_affinity(glthread->queue.threads[0], 216bf215546Sopenharmony_ci util_get_cpu_caps()->L3_affinity_mask[L3_cache], 217bf215546Sopenharmony_ci NULL, util_get_cpu_caps()->num_cpu_mask_bits); 218bf215546Sopenharmony_ci ctx->pipe->set_context_param(ctx->pipe, 219bf215546Sopenharmony_ci PIPE_CONTEXT_PARAM_PIN_THREADS_TO_L3_CACHE, 220bf215546Sopenharmony_ci L3_cache); 221bf215546Sopenharmony_ci } 222bf215546Sopenharmony_ci } 223bf215546Sopenharmony_ci } 224bf215546Sopenharmony_ci 225bf215546Sopenharmony_ci struct glthread_batch *next = glthread->next_batch; 226bf215546Sopenharmony_ci 227bf215546Sopenharmony_ci /* Debug: execute the batch immediately from this thread. 228bf215546Sopenharmony_ci * 229bf215546Sopenharmony_ci * Note that glthread_unmarshal_batch() changes the dispatch table so we'll 230bf215546Sopenharmony_ci * need to restore it when it returns. 231bf215546Sopenharmony_ci */ 232bf215546Sopenharmony_ci if (false) { 233bf215546Sopenharmony_ci glthread_unmarshal_batch(next, NULL, 0); 234bf215546Sopenharmony_ci _glapi_set_dispatch(ctx->CurrentClientDispatch); 235bf215546Sopenharmony_ci return; 236bf215546Sopenharmony_ci } 237bf215546Sopenharmony_ci 238bf215546Sopenharmony_ci p_atomic_add(&glthread->stats.num_offloaded_items, glthread->used); 239bf215546Sopenharmony_ci next->used = glthread->used; 240bf215546Sopenharmony_ci 241bf215546Sopenharmony_ci util_queue_add_job(&glthread->queue, next, &next->fence, 242bf215546Sopenharmony_ci glthread_unmarshal_batch, NULL, 0); 243bf215546Sopenharmony_ci glthread->last = glthread->next; 244bf215546Sopenharmony_ci glthread->next = (glthread->next + 1) % MARSHAL_MAX_BATCHES; 245bf215546Sopenharmony_ci glthread->next_batch = &glthread->batches[glthread->next]; 246bf215546Sopenharmony_ci glthread->used = 0; 247bf215546Sopenharmony_ci} 248bf215546Sopenharmony_ci 249bf215546Sopenharmony_ci/** 250bf215546Sopenharmony_ci * Waits for all pending batches have been unmarshaled. 251bf215546Sopenharmony_ci * 252bf215546Sopenharmony_ci * This can be used by the main thread to synchronize access to the context, 253bf215546Sopenharmony_ci * since the worker thread will be idle after this. 254bf215546Sopenharmony_ci */ 255bf215546Sopenharmony_civoid 256bf215546Sopenharmony_ci_mesa_glthread_finish(struct gl_context *ctx) 257bf215546Sopenharmony_ci{ 258bf215546Sopenharmony_ci struct glthread_state *glthread = &ctx->GLThread; 259bf215546Sopenharmony_ci if (!glthread->enabled) 260bf215546Sopenharmony_ci return; 261bf215546Sopenharmony_ci 262bf215546Sopenharmony_ci /* If this is called from the worker thread, then we've hit a path that 263bf215546Sopenharmony_ci * might be called from either the main thread or the worker (such as some 264bf215546Sopenharmony_ci * dri interface entrypoints), in which case we don't need to actually 265bf215546Sopenharmony_ci * synchronize against ourself. 266bf215546Sopenharmony_ci */ 267bf215546Sopenharmony_ci if (u_thread_is_self(glthread->queue.threads[0])) 268bf215546Sopenharmony_ci return; 269bf215546Sopenharmony_ci 270bf215546Sopenharmony_ci struct glthread_batch *last = &glthread->batches[glthread->last]; 271bf215546Sopenharmony_ci struct glthread_batch *next = glthread->next_batch; 272bf215546Sopenharmony_ci bool synced = false; 273bf215546Sopenharmony_ci 274bf215546Sopenharmony_ci if (!util_queue_fence_is_signalled(&last->fence)) { 275bf215546Sopenharmony_ci util_queue_fence_wait(&last->fence); 276bf215546Sopenharmony_ci synced = true; 277bf215546Sopenharmony_ci } 278bf215546Sopenharmony_ci 279bf215546Sopenharmony_ci if (glthread->used) { 280bf215546Sopenharmony_ci p_atomic_add(&glthread->stats.num_direct_items, glthread->used); 281bf215546Sopenharmony_ci next->used = glthread->used; 282bf215546Sopenharmony_ci glthread->used = 0; 283bf215546Sopenharmony_ci 284bf215546Sopenharmony_ci /* Since glthread_unmarshal_batch changes the dispatch to direct, 285bf215546Sopenharmony_ci * restore it after it's done. 286bf215546Sopenharmony_ci */ 287bf215546Sopenharmony_ci struct _glapi_table *dispatch = _glapi_get_dispatch(); 288bf215546Sopenharmony_ci glthread_unmarshal_batch(next, NULL, 0); 289bf215546Sopenharmony_ci _glapi_set_dispatch(dispatch); 290bf215546Sopenharmony_ci 291bf215546Sopenharmony_ci /* It's not a sync because we don't enqueue partial batches, but 292bf215546Sopenharmony_ci * it would be a sync if we did. So count it anyway. 293bf215546Sopenharmony_ci */ 294bf215546Sopenharmony_ci synced = true; 295bf215546Sopenharmony_ci } 296bf215546Sopenharmony_ci 297bf215546Sopenharmony_ci if (synced) 298bf215546Sopenharmony_ci p_atomic_inc(&glthread->stats.num_syncs); 299bf215546Sopenharmony_ci} 300bf215546Sopenharmony_ci 301bf215546Sopenharmony_civoid 302bf215546Sopenharmony_ci_mesa_glthread_finish_before(struct gl_context *ctx, const char *func) 303bf215546Sopenharmony_ci{ 304bf215546Sopenharmony_ci _mesa_glthread_finish(ctx); 305bf215546Sopenharmony_ci 306bf215546Sopenharmony_ci /* Uncomment this if you want to know where glthread syncs. */ 307bf215546Sopenharmony_ci /*printf("fallback to sync: %s\n", func);*/ 308bf215546Sopenharmony_ci} 309bf215546Sopenharmony_ci 310bf215546Sopenharmony_civoid 311bf215546Sopenharmony_ci_mesa_error_glthread_safe(struct gl_context *ctx, GLenum error, bool glthread, 312bf215546Sopenharmony_ci const char *format, ...) 313bf215546Sopenharmony_ci{ 314bf215546Sopenharmony_ci if (glthread) { 315bf215546Sopenharmony_ci _mesa_marshal_InternalSetError(error); 316bf215546Sopenharmony_ci } else { 317bf215546Sopenharmony_ci char s[MAX_DEBUG_MESSAGE_LENGTH]; 318bf215546Sopenharmony_ci va_list args; 319bf215546Sopenharmony_ci 320bf215546Sopenharmony_ci va_start(args, format); 321bf215546Sopenharmony_ci ASSERTED size_t len = vsnprintf(s, MAX_DEBUG_MESSAGE_LENGTH, format, args); 322bf215546Sopenharmony_ci va_end(args); 323bf215546Sopenharmony_ci 324bf215546Sopenharmony_ci /* Whoever calls _mesa_error should use shorter strings. */ 325bf215546Sopenharmony_ci assert(len < MAX_DEBUG_MESSAGE_LENGTH); 326bf215546Sopenharmony_ci 327bf215546Sopenharmony_ci _mesa_error(ctx, error, "%s", s); 328bf215546Sopenharmony_ci } 329bf215546Sopenharmony_ci} 330