1/* 2 * Copyright © 2012 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "main/glthread_marshal.h" 25#include "main/dispatch.h" 26#include "main/bufferobj.h" 27 28/** 29 * Create an upload buffer. This is called from the app thread, so everything 30 * has to be thread-safe in the driver. 31 */ 32static struct gl_buffer_object * 33new_upload_buffer(struct gl_context *ctx, GLsizeiptr size, uint8_t **ptr) 34{ 35 assert(ctx->GLThread.SupportsBufferUploads); 36 37 struct gl_buffer_object *obj = 38 _mesa_bufferobj_alloc(ctx, -1); 39 if (!obj) 40 return NULL; 41 42 obj->Immutable = true; 43 44 if (!_mesa_bufferobj_data(ctx, GL_ARRAY_BUFFER, size, NULL, 45 GL_WRITE_ONLY, 46 GL_CLIENT_STORAGE_BIT | GL_MAP_WRITE_BIT, 47 obj)) { 48 _mesa_delete_buffer_object(ctx, obj); 49 return NULL; 50 } 51 52 *ptr = _mesa_bufferobj_map_range(ctx, 0, size, 53 GL_MAP_WRITE_BIT | 54 GL_MAP_UNSYNCHRONIZED_BIT | 55 MESA_MAP_THREAD_SAFE_BIT, 56 obj, MAP_GLTHREAD); 57 if (!*ptr) { 58 _mesa_delete_buffer_object(ctx, obj); 59 return NULL; 60 } 61 62 return obj; 63} 64 65void 66_mesa_glthread_upload(struct gl_context *ctx, const void *data, 67 GLsizeiptr size, unsigned *out_offset, 68 struct gl_buffer_object **out_buffer, 69 uint8_t **out_ptr) 70{ 71 struct glthread_state *glthread = &ctx->GLThread; 72 const unsigned default_size = 1024 * 1024; 73 74 if (unlikely(size > INT_MAX)) 75 return; 76 77 /* The alignment was chosen arbitrarily. */ 78 unsigned offset = align(glthread->upload_offset, 8); 79 80 /* Allocate a new buffer if needed. */ 81 if (unlikely(!glthread->upload_buffer || offset + size > default_size)) { 82 /* If the size is greater than the buffer size, allocate a separate buffer 83 * just for this upload. 84 */ 85 if (unlikely(size > default_size)) { 86 uint8_t *ptr; 87 88 assert(*out_buffer == NULL); 89 *out_buffer = new_upload_buffer(ctx, size, &ptr); 90 if (!*out_buffer) 91 return; 92 93 *out_offset = 0; 94 if (data) 95 memcpy(ptr, data, size); 96 else 97 *out_ptr = ptr; 98 return; 99 } 100 101 if (glthread->upload_buffer_private_refcount > 0) { 102 p_atomic_add(&glthread->upload_buffer->RefCount, 103 -glthread->upload_buffer_private_refcount); 104 glthread->upload_buffer_private_refcount = 0; 105 } 106 _mesa_reference_buffer_object(ctx, &glthread->upload_buffer, NULL); 107 glthread->upload_buffer = 108 new_upload_buffer(ctx, default_size, &glthread->upload_ptr); 109 glthread->upload_offset = 0; 110 offset = 0; 111 112 /* Since atomic operations are very very slow when 2 threads are not 113 * sharing one L3 cache (which can happen on AMD Zen), prevent using 114 * atomics as follows: 115 * 116 * This function has to return a buffer reference to the caller. 117 * Instead of atomic_inc for every call, it does all possible future 118 * increments in advance when the upload buffer is allocated. 119 * The maximum number of times the function can be called per upload 120 * buffer is default_size, because the minimum allocation size is 1. 121 * Therefore the function can only return default_size number of 122 * references at most, so we will never need more. This is the number 123 * that is added to RefCount at allocation. 124 * 125 * upload_buffer_private_refcount tracks how many buffer references 126 * are left to return to callers. If the buffer is full and there are 127 * still references left, they are atomically subtracted from RefCount 128 * before the buffer is unreferenced. 129 * 130 * This can increase performance by 20%. 131 */ 132 glthread->upload_buffer->RefCount += default_size; 133 glthread->upload_buffer_private_refcount = default_size; 134 } 135 136 /* Upload data. */ 137 if (data) 138 memcpy(glthread->upload_ptr + offset, data, size); 139 else 140 *out_ptr = glthread->upload_ptr + offset; 141 142 glthread->upload_offset = offset + size; 143 *out_offset = offset; 144 145 assert(*out_buffer == NULL); 146 assert(glthread->upload_buffer_private_refcount > 0); 147 *out_buffer = glthread->upload_buffer; 148 glthread->upload_buffer_private_refcount--; 149} 150 151/** Tracks the current bindings for the vertex array and index array buffers. 152 * 153 * This is part of what we need to enable glthread on compat-GL contexts that 154 * happen to use VBOs, without also supporting the full tracking of VBO vs 155 * user vertex array bindings per attribute on each vertex array for 156 * determining what to upload at draw call time. 157 * 158 * Note that GL core makes it so that a buffer binding with an invalid handle 159 * in the "buffer" parameter will throw an error, and then a 160 * glVertexAttribPointer() that followsmight not end up pointing at a VBO. 161 * However, in GL core the draw call would throw an error as well, so we don't 162 * really care if our tracking is wrong for this case -- we never need to 163 * marshal user data for draw calls, and the unmarshal will just generate an 164 * error or not as appropriate. 165 * 166 * For compatibility GL, we do need to accurately know whether the draw call 167 * on the unmarshal side will dereference a user pointer or load data from a 168 * VBO per vertex. That would make it seem like we need to track whether a 169 * "buffer" is valid, so that we can know when an error will be generated 170 * instead of updating the binding. However, compat GL has the ridiculous 171 * feature that if you pass a bad name, it just gens a buffer object for you, 172 * so we escape without having to know if things are valid or not. 173 */ 174void 175_mesa_glthread_BindBuffer(struct gl_context *ctx, GLenum target, GLuint buffer) 176{ 177 struct glthread_state *glthread = &ctx->GLThread; 178 179 switch (target) { 180 case GL_ARRAY_BUFFER: 181 glthread->CurrentArrayBufferName = buffer; 182 break; 183 case GL_ELEMENT_ARRAY_BUFFER: 184 /* The current element array buffer binding is actually tracked in the 185 * vertex array object instead of the context, so this would need to 186 * change on vertex array object updates. 187 */ 188 glthread->CurrentVAO->CurrentElementBufferName = buffer; 189 break; 190 case GL_DRAW_INDIRECT_BUFFER: 191 glthread->CurrentDrawIndirectBufferName = buffer; 192 break; 193 case GL_PIXEL_PACK_BUFFER: 194 glthread->CurrentPixelPackBufferName = buffer; 195 break; 196 case GL_PIXEL_UNPACK_BUFFER: 197 glthread->CurrentPixelUnpackBufferName = buffer; 198 break; 199 case GL_QUERY_BUFFER: 200 glthread->CurrentQueryBufferName = buffer; 201 break; 202 } 203} 204 205void 206_mesa_glthread_DeleteBuffers(struct gl_context *ctx, GLsizei n, 207 const GLuint *buffers) 208{ 209 struct glthread_state *glthread = &ctx->GLThread; 210 211 if (!buffers || n < 0) 212 return; 213 214 for (unsigned i = 0; i < n; i++) { 215 GLuint id = buffers[i]; 216 217 if (id == glthread->CurrentArrayBufferName) 218 _mesa_glthread_BindBuffer(ctx, GL_ARRAY_BUFFER, 0); 219 if (id == glthread->CurrentVAO->CurrentElementBufferName) 220 _mesa_glthread_BindBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, 0); 221 if (id == glthread->CurrentDrawIndirectBufferName) 222 _mesa_glthread_BindBuffer(ctx, GL_DRAW_INDIRECT_BUFFER, 0); 223 if (id == glthread->CurrentPixelPackBufferName) 224 _mesa_glthread_BindBuffer(ctx, GL_PIXEL_PACK_BUFFER, 0); 225 if (id == glthread->CurrentPixelUnpackBufferName) 226 _mesa_glthread_BindBuffer(ctx, GL_PIXEL_UNPACK_BUFFER, 0); 227 } 228} 229 230/* BufferData: marshalled asynchronously */ 231struct marshal_cmd_BufferData 232{ 233 struct marshal_cmd_base cmd_base; 234 GLuint target_or_name; 235 GLsizeiptr size; 236 GLenum usage; 237 const GLvoid *data_external_mem; 238 bool data_null; /* If set, no data follows for "data" */ 239 bool named; 240 bool ext_dsa; 241 /* Next size bytes are GLubyte data[size] */ 242}; 243 244uint32_t 245_mesa_unmarshal_BufferData(struct gl_context *ctx, 246 const struct marshal_cmd_BufferData *cmd, 247 const uint64_t *last) 248{ 249 const GLuint target_or_name = cmd->target_or_name; 250 const GLsizei size = cmd->size; 251 const GLenum usage = cmd->usage; 252 const void *data; 253 254 if (cmd->data_null) 255 data = NULL; 256 else if (!cmd->named && target_or_name == GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD) 257 data = cmd->data_external_mem; 258 else 259 data = (const void *) (cmd + 1); 260 261 if (cmd->ext_dsa) { 262 CALL_NamedBufferDataEXT(ctx->CurrentServerDispatch, 263 (target_or_name, size, data, usage)); 264 } else if (cmd->named) { 265 CALL_NamedBufferData(ctx->CurrentServerDispatch, 266 (target_or_name, size, data, usage)); 267 } else { 268 CALL_BufferData(ctx->CurrentServerDispatch, 269 (target_or_name, size, data, usage)); 270 } 271 return cmd->cmd_base.cmd_size; 272} 273 274uint32_t 275_mesa_unmarshal_NamedBufferData(struct gl_context *ctx, 276 const struct marshal_cmd_NamedBufferData *cmd, 277 const uint64_t *last) 278{ 279 unreachable("never used - all BufferData variants use DISPATCH_CMD_BufferData"); 280 return 0; 281} 282 283uint32_t 284_mesa_unmarshal_NamedBufferDataEXT(struct gl_context *ctx, 285 const struct marshal_cmd_NamedBufferDataEXT *cmd, 286 const uint64_t *last) 287{ 288 unreachable("never used - all BufferData variants use DISPATCH_CMD_BufferData"); 289 return 0; 290} 291 292static void 293_mesa_marshal_BufferData_merged(GLuint target_or_name, GLsizeiptr size, 294 const GLvoid *data, GLenum usage, bool named, 295 bool ext_dsa, const char *func) 296{ 297 GET_CURRENT_CONTEXT(ctx); 298 bool external_mem = !named && 299 target_or_name == GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD; 300 bool copy_data = data && !external_mem; 301 size_t cmd_size = sizeof(struct marshal_cmd_BufferData) + (copy_data ? size : 0); 302 303 if (unlikely(size < 0 || size > INT_MAX || cmd_size > MARSHAL_MAX_CMD_SIZE || 304 (named && target_or_name == 0))) { 305 _mesa_glthread_finish_before(ctx, func); 306 if (named) { 307 CALL_NamedBufferData(ctx->CurrentServerDispatch, 308 (target_or_name, size, data, usage)); 309 } else { 310 CALL_BufferData(ctx->CurrentServerDispatch, 311 (target_or_name, size, data, usage)); 312 } 313 return; 314 } 315 316 struct marshal_cmd_BufferData *cmd = 317 _mesa_glthread_allocate_command(ctx, DISPATCH_CMD_BufferData, 318 cmd_size); 319 320 cmd->target_or_name = target_or_name; 321 cmd->size = size; 322 cmd->usage = usage; 323 cmd->data_null = !data; 324 cmd->named = named; 325 cmd->ext_dsa = ext_dsa; 326 cmd->data_external_mem = data; 327 328 if (copy_data) { 329 char *variable_data = (char *) (cmd + 1); 330 memcpy(variable_data, data, size); 331 } 332} 333 334void GLAPIENTRY 335_mesa_marshal_BufferData(GLenum target, GLsizeiptr size, const GLvoid * data, 336 GLenum usage) 337{ 338 _mesa_marshal_BufferData_merged(target, size, data, usage, false, false, 339 "BufferData"); 340} 341 342void GLAPIENTRY 343_mesa_marshal_NamedBufferData(GLuint buffer, GLsizeiptr size, 344 const GLvoid * data, GLenum usage) 345{ 346 _mesa_marshal_BufferData_merged(buffer, size, data, usage, true, false, 347 "NamedBufferData"); 348} 349 350void GLAPIENTRY 351_mesa_marshal_NamedBufferDataEXT(GLuint buffer, GLsizeiptr size, 352 const GLvoid *data, GLenum usage) 353{ 354 _mesa_marshal_BufferData_merged(buffer, size, data, usage, true, true, 355 "NamedBufferDataEXT"); 356} 357 358 359/* BufferSubData: marshalled asynchronously */ 360struct marshal_cmd_BufferSubData 361{ 362 struct marshal_cmd_base cmd_base; 363 GLenum target_or_name; 364 GLintptr offset; 365 GLsizeiptr size; 366 bool named; 367 bool ext_dsa; 368 /* Next size bytes are GLubyte data[size] */ 369}; 370 371uint32_t 372_mesa_unmarshal_BufferSubData(struct gl_context *ctx, 373 const struct marshal_cmd_BufferSubData *cmd, 374 const uint64_t *last) 375{ 376 const GLenum target_or_name = cmd->target_or_name; 377 const GLintptr offset = cmd->offset; 378 const GLsizeiptr size = cmd->size; 379 const void *data = (const void *) (cmd + 1); 380 381 if (cmd->ext_dsa) { 382 CALL_NamedBufferSubDataEXT(ctx->CurrentServerDispatch, 383 (target_or_name, offset, size, data)); 384 } else if (cmd->named) { 385 CALL_NamedBufferSubData(ctx->CurrentServerDispatch, 386 (target_or_name, offset, size, data)); 387 } else { 388 CALL_BufferSubData(ctx->CurrentServerDispatch, 389 (target_or_name, offset, size, data)); 390 } 391 return cmd->cmd_base.cmd_size; 392} 393 394uint32_t 395_mesa_unmarshal_NamedBufferSubData(struct gl_context *ctx, 396 const struct marshal_cmd_NamedBufferSubData *cmd, 397 const uint64_t *last) 398{ 399 unreachable("never used - all BufferSubData variants use DISPATCH_CMD_BufferSubData"); 400 return 0; 401} 402 403uint32_t 404_mesa_unmarshal_NamedBufferSubDataEXT(struct gl_context *ctx, 405 const struct marshal_cmd_NamedBufferSubDataEXT *cmd, 406 const uint64_t *last) 407{ 408 unreachable("never used - all BufferSubData variants use DISPATCH_CMD_BufferSubData"); 409 return 0; 410} 411 412static void 413_mesa_marshal_BufferSubData_merged(GLuint target_or_name, GLintptr offset, 414 GLsizeiptr size, const GLvoid *data, 415 bool named, bool ext_dsa, const char *func) 416{ 417 GET_CURRENT_CONTEXT(ctx); 418 size_t cmd_size = sizeof(struct marshal_cmd_BufferSubData) + size; 419 420 /* Fast path: Copy the data to an upload buffer, and use the GPU 421 * to copy the uploaded data to the destination buffer. 422 */ 423 /* TODO: Handle offset == 0 && size < buffer_size. 424 * If offset == 0 and size == buffer_size, it's better to discard 425 * the buffer storage, but we don't know the buffer size in glthread. 426 */ 427 if (ctx->GLThread.SupportsBufferUploads && 428 data && offset > 0 && size > 0) { 429 struct gl_buffer_object *upload_buffer = NULL; 430 unsigned upload_offset = 0; 431 432 _mesa_glthread_upload(ctx, data, size, &upload_offset, &upload_buffer, 433 NULL); 434 435 if (upload_buffer) { 436 _mesa_marshal_InternalBufferSubDataCopyMESA((GLintptr)upload_buffer, 437 upload_offset, 438 target_or_name, 439 offset, size, named, 440 ext_dsa); 441 return; 442 } 443 } 444 445 if (unlikely(size < 0 || size > INT_MAX || cmd_size < 0 || 446 cmd_size > MARSHAL_MAX_CMD_SIZE || !data || 447 (named && target_or_name == 0))) { 448 _mesa_glthread_finish_before(ctx, func); 449 if (named) { 450 CALL_NamedBufferSubData(ctx->CurrentServerDispatch, 451 (target_or_name, offset, size, data)); 452 } else { 453 CALL_BufferSubData(ctx->CurrentServerDispatch, 454 (target_or_name, offset, size, data)); 455 } 456 return; 457 } 458 459 struct marshal_cmd_BufferSubData *cmd = 460 _mesa_glthread_allocate_command(ctx, DISPATCH_CMD_BufferSubData, 461 cmd_size); 462 cmd->target_or_name = target_or_name; 463 cmd->offset = offset; 464 cmd->size = size; 465 cmd->named = named; 466 cmd->ext_dsa = ext_dsa; 467 468 char *variable_data = (char *) (cmd + 1); 469 memcpy(variable_data, data, size); 470} 471 472void GLAPIENTRY 473_mesa_marshal_BufferSubData(GLenum target, GLintptr offset, GLsizeiptr size, 474 const GLvoid * data) 475{ 476 _mesa_marshal_BufferSubData_merged(target, offset, size, data, false, 477 false, "BufferSubData"); 478} 479 480void GLAPIENTRY 481_mesa_marshal_NamedBufferSubData(GLuint buffer, GLintptr offset, 482 GLsizeiptr size, const GLvoid * data) 483{ 484 _mesa_marshal_BufferSubData_merged(buffer, offset, size, data, true, 485 false, "NamedBufferSubData"); 486} 487 488void GLAPIENTRY 489_mesa_marshal_NamedBufferSubDataEXT(GLuint buffer, GLintptr offset, 490 GLsizeiptr size, const GLvoid * data) 491{ 492 _mesa_marshal_BufferSubData_merged(buffer, offset, size, data, true, 493 true, "NamedBufferSubDataEXT"); 494} 495