1/*
2 * Copyright © 2012 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "main/glthread_marshal.h"
25#include "main/dispatch.h"
26#include "main/bufferobj.h"
27
28/**
29 * Create an upload buffer. This is called from the app thread, so everything
30 * has to be thread-safe in the driver.
31 */
32static struct gl_buffer_object *
33new_upload_buffer(struct gl_context *ctx, GLsizeiptr size, uint8_t **ptr)
34{
35   assert(ctx->GLThread.SupportsBufferUploads);
36
37   struct gl_buffer_object *obj =
38      _mesa_bufferobj_alloc(ctx, -1);
39   if (!obj)
40      return NULL;
41
42   obj->Immutable = true;
43
44   if (!_mesa_bufferobj_data(ctx, GL_ARRAY_BUFFER, size, NULL,
45                          GL_WRITE_ONLY,
46                          GL_CLIENT_STORAGE_BIT | GL_MAP_WRITE_BIT,
47                          obj)) {
48      _mesa_delete_buffer_object(ctx, obj);
49      return NULL;
50   }
51
52   *ptr = _mesa_bufferobj_map_range(ctx, 0, size,
53                                 GL_MAP_WRITE_BIT |
54                                 GL_MAP_UNSYNCHRONIZED_BIT |
55                                 MESA_MAP_THREAD_SAFE_BIT,
56                                 obj, MAP_GLTHREAD);
57   if (!*ptr) {
58      _mesa_delete_buffer_object(ctx, obj);
59      return NULL;
60   }
61
62   return obj;
63}
64
65void
66_mesa_glthread_upload(struct gl_context *ctx, const void *data,
67                      GLsizeiptr size, unsigned *out_offset,
68                      struct gl_buffer_object **out_buffer,
69                      uint8_t **out_ptr)
70{
71   struct glthread_state *glthread = &ctx->GLThread;
72   const unsigned default_size = 1024 * 1024;
73
74   if (unlikely(size > INT_MAX))
75      return;
76
77   /* The alignment was chosen arbitrarily. */
78   unsigned offset = align(glthread->upload_offset, 8);
79
80   /* Allocate a new buffer if needed. */
81   if (unlikely(!glthread->upload_buffer || offset + size > default_size)) {
82      /* If the size is greater than the buffer size, allocate a separate buffer
83       * just for this upload.
84       */
85      if (unlikely(size > default_size)) {
86         uint8_t *ptr;
87
88         assert(*out_buffer == NULL);
89         *out_buffer = new_upload_buffer(ctx, size, &ptr);
90         if (!*out_buffer)
91            return;
92
93         *out_offset = 0;
94         if (data)
95            memcpy(ptr, data, size);
96         else
97            *out_ptr = ptr;
98         return;
99      }
100
101      if (glthread->upload_buffer_private_refcount > 0) {
102         p_atomic_add(&glthread->upload_buffer->RefCount,
103                      -glthread->upload_buffer_private_refcount);
104         glthread->upload_buffer_private_refcount = 0;
105      }
106      _mesa_reference_buffer_object(ctx, &glthread->upload_buffer, NULL);
107      glthread->upload_buffer =
108         new_upload_buffer(ctx, default_size, &glthread->upload_ptr);
109      glthread->upload_offset = 0;
110      offset = 0;
111
112      /* Since atomic operations are very very slow when 2 threads are not
113       * sharing one L3 cache (which can happen on AMD Zen), prevent using
114       * atomics as follows:
115       *
116       * This function has to return a buffer reference to the caller.
117       * Instead of atomic_inc for every call, it does all possible future
118       * increments in advance when the upload buffer is allocated.
119       * The maximum number of times the function can be called per upload
120       * buffer is default_size, because the minimum allocation size is 1.
121       * Therefore the function can only return default_size number of
122       * references at most, so we will never need more. This is the number
123       * that is added to RefCount at allocation.
124       *
125       * upload_buffer_private_refcount tracks how many buffer references
126       * are left to return to callers. If the buffer is full and there are
127       * still references left, they are atomically subtracted from RefCount
128       * before the buffer is unreferenced.
129       *
130       * This can increase performance by 20%.
131       */
132      glthread->upload_buffer->RefCount += default_size;
133      glthread->upload_buffer_private_refcount = default_size;
134   }
135
136   /* Upload data. */
137   if (data)
138      memcpy(glthread->upload_ptr + offset, data, size);
139   else
140      *out_ptr = glthread->upload_ptr + offset;
141
142   glthread->upload_offset = offset + size;
143   *out_offset = offset;
144
145   assert(*out_buffer == NULL);
146   assert(glthread->upload_buffer_private_refcount > 0);
147   *out_buffer = glthread->upload_buffer;
148   glthread->upload_buffer_private_refcount--;
149}
150
151/** Tracks the current bindings for the vertex array and index array buffers.
152 *
153 * This is part of what we need to enable glthread on compat-GL contexts that
154 * happen to use VBOs, without also supporting the full tracking of VBO vs
155 * user vertex array bindings per attribute on each vertex array for
156 * determining what to upload at draw call time.
157 *
158 * Note that GL core makes it so that a buffer binding with an invalid handle
159 * in the "buffer" parameter will throw an error, and then a
160 * glVertexAttribPointer() that followsmight not end up pointing at a VBO.
161 * However, in GL core the draw call would throw an error as well, so we don't
162 * really care if our tracking is wrong for this case -- we never need to
163 * marshal user data for draw calls, and the unmarshal will just generate an
164 * error or not as appropriate.
165 *
166 * For compatibility GL, we do need to accurately know whether the draw call
167 * on the unmarshal side will dereference a user pointer or load data from a
168 * VBO per vertex.  That would make it seem like we need to track whether a
169 * "buffer" is valid, so that we can know when an error will be generated
170 * instead of updating the binding.  However, compat GL has the ridiculous
171 * feature that if you pass a bad name, it just gens a buffer object for you,
172 * so we escape without having to know if things are valid or not.
173 */
174void
175_mesa_glthread_BindBuffer(struct gl_context *ctx, GLenum target, GLuint buffer)
176{
177   struct glthread_state *glthread = &ctx->GLThread;
178
179   switch (target) {
180   case GL_ARRAY_BUFFER:
181      glthread->CurrentArrayBufferName = buffer;
182      break;
183   case GL_ELEMENT_ARRAY_BUFFER:
184      /* The current element array buffer binding is actually tracked in the
185       * vertex array object instead of the context, so this would need to
186       * change on vertex array object updates.
187       */
188      glthread->CurrentVAO->CurrentElementBufferName = buffer;
189      break;
190   case GL_DRAW_INDIRECT_BUFFER:
191      glthread->CurrentDrawIndirectBufferName = buffer;
192      break;
193   case GL_PIXEL_PACK_BUFFER:
194      glthread->CurrentPixelPackBufferName = buffer;
195      break;
196   case GL_PIXEL_UNPACK_BUFFER:
197      glthread->CurrentPixelUnpackBufferName = buffer;
198      break;
199   case GL_QUERY_BUFFER:
200      glthread->CurrentQueryBufferName = buffer;
201      break;
202   }
203}
204
205void
206_mesa_glthread_DeleteBuffers(struct gl_context *ctx, GLsizei n,
207                             const GLuint *buffers)
208{
209   struct glthread_state *glthread = &ctx->GLThread;
210
211   if (!buffers || n < 0)
212      return;
213
214   for (unsigned i = 0; i < n; i++) {
215      GLuint id = buffers[i];
216
217      if (id == glthread->CurrentArrayBufferName)
218         _mesa_glthread_BindBuffer(ctx, GL_ARRAY_BUFFER, 0);
219      if (id == glthread->CurrentVAO->CurrentElementBufferName)
220         _mesa_glthread_BindBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, 0);
221      if (id == glthread->CurrentDrawIndirectBufferName)
222         _mesa_glthread_BindBuffer(ctx, GL_DRAW_INDIRECT_BUFFER, 0);
223      if (id == glthread->CurrentPixelPackBufferName)
224         _mesa_glthread_BindBuffer(ctx, GL_PIXEL_PACK_BUFFER, 0);
225      if (id == glthread->CurrentPixelUnpackBufferName)
226         _mesa_glthread_BindBuffer(ctx, GL_PIXEL_UNPACK_BUFFER, 0);
227   }
228}
229
230/* BufferData: marshalled asynchronously */
231struct marshal_cmd_BufferData
232{
233   struct marshal_cmd_base cmd_base;
234   GLuint target_or_name;
235   GLsizeiptr size;
236   GLenum usage;
237   const GLvoid *data_external_mem;
238   bool data_null; /* If set, no data follows for "data" */
239   bool named;
240   bool ext_dsa;
241   /* Next size bytes are GLubyte data[size] */
242};
243
244uint32_t
245_mesa_unmarshal_BufferData(struct gl_context *ctx,
246                           const struct marshal_cmd_BufferData *cmd,
247                           const uint64_t *last)
248{
249   const GLuint target_or_name = cmd->target_or_name;
250   const GLsizei size = cmd->size;
251   const GLenum usage = cmd->usage;
252   const void *data;
253
254   if (cmd->data_null)
255      data = NULL;
256   else if (!cmd->named && target_or_name == GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD)
257      data = cmd->data_external_mem;
258   else
259      data = (const void *) (cmd + 1);
260
261   if (cmd->ext_dsa) {
262      CALL_NamedBufferDataEXT(ctx->CurrentServerDispatch,
263                              (target_or_name, size, data, usage));
264   } else if (cmd->named) {
265      CALL_NamedBufferData(ctx->CurrentServerDispatch,
266                           (target_or_name, size, data, usage));
267   } else {
268      CALL_BufferData(ctx->CurrentServerDispatch,
269                      (target_or_name, size, data, usage));
270   }
271   return cmd->cmd_base.cmd_size;
272}
273
274uint32_t
275_mesa_unmarshal_NamedBufferData(struct gl_context *ctx,
276                                const struct marshal_cmd_NamedBufferData *cmd,
277                                const uint64_t *last)
278{
279   unreachable("never used - all BufferData variants use DISPATCH_CMD_BufferData");
280   return 0;
281}
282
283uint32_t
284_mesa_unmarshal_NamedBufferDataEXT(struct gl_context *ctx,
285                                   const struct marshal_cmd_NamedBufferDataEXT *cmd,
286                                   const uint64_t *last)
287{
288   unreachable("never used - all BufferData variants use DISPATCH_CMD_BufferData");
289   return 0;
290}
291
292static void
293_mesa_marshal_BufferData_merged(GLuint target_or_name, GLsizeiptr size,
294                                const GLvoid *data, GLenum usage, bool named,
295                                bool ext_dsa, const char *func)
296{
297   GET_CURRENT_CONTEXT(ctx);
298   bool external_mem = !named &&
299                       target_or_name == GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD;
300   bool copy_data = data && !external_mem;
301   size_t cmd_size = sizeof(struct marshal_cmd_BufferData) + (copy_data ? size : 0);
302
303   if (unlikely(size < 0 || size > INT_MAX || cmd_size > MARSHAL_MAX_CMD_SIZE ||
304                (named && target_or_name == 0))) {
305      _mesa_glthread_finish_before(ctx, func);
306      if (named) {
307         CALL_NamedBufferData(ctx->CurrentServerDispatch,
308                              (target_or_name, size, data, usage));
309      } else {
310         CALL_BufferData(ctx->CurrentServerDispatch,
311                         (target_or_name, size, data, usage));
312      }
313      return;
314   }
315
316   struct marshal_cmd_BufferData *cmd =
317      _mesa_glthread_allocate_command(ctx, DISPATCH_CMD_BufferData,
318                                      cmd_size);
319
320   cmd->target_or_name = target_or_name;
321   cmd->size = size;
322   cmd->usage = usage;
323   cmd->data_null = !data;
324   cmd->named = named;
325   cmd->ext_dsa = ext_dsa;
326   cmd->data_external_mem = data;
327
328   if (copy_data) {
329      char *variable_data = (char *) (cmd + 1);
330      memcpy(variable_data, data, size);
331   }
332}
333
334void GLAPIENTRY
335_mesa_marshal_BufferData(GLenum target, GLsizeiptr size, const GLvoid * data,
336                         GLenum usage)
337{
338   _mesa_marshal_BufferData_merged(target, size, data, usage, false, false,
339                                   "BufferData");
340}
341
342void GLAPIENTRY
343_mesa_marshal_NamedBufferData(GLuint buffer, GLsizeiptr size,
344                              const GLvoid * data, GLenum usage)
345{
346   _mesa_marshal_BufferData_merged(buffer, size, data, usage, true, false,
347                                   "NamedBufferData");
348}
349
350void GLAPIENTRY
351_mesa_marshal_NamedBufferDataEXT(GLuint buffer, GLsizeiptr size,
352                                 const GLvoid *data, GLenum usage)
353{
354   _mesa_marshal_BufferData_merged(buffer, size, data, usage, true, true,
355                                   "NamedBufferDataEXT");
356}
357
358
359/* BufferSubData: marshalled asynchronously */
360struct marshal_cmd_BufferSubData
361{
362   struct marshal_cmd_base cmd_base;
363   GLenum target_or_name;
364   GLintptr offset;
365   GLsizeiptr size;
366   bool named;
367   bool ext_dsa;
368   /* Next size bytes are GLubyte data[size] */
369};
370
371uint32_t
372_mesa_unmarshal_BufferSubData(struct gl_context *ctx,
373                              const struct marshal_cmd_BufferSubData *cmd,
374                              const uint64_t *last)
375{
376   const GLenum target_or_name = cmd->target_or_name;
377   const GLintptr offset = cmd->offset;
378   const GLsizeiptr size = cmd->size;
379   const void *data = (const void *) (cmd + 1);
380
381   if (cmd->ext_dsa) {
382      CALL_NamedBufferSubDataEXT(ctx->CurrentServerDispatch,
383                                 (target_or_name, offset, size, data));
384   } else if (cmd->named) {
385      CALL_NamedBufferSubData(ctx->CurrentServerDispatch,
386                              (target_or_name, offset, size, data));
387   } else {
388      CALL_BufferSubData(ctx->CurrentServerDispatch,
389                         (target_or_name, offset, size, data));
390   }
391   return cmd->cmd_base.cmd_size;
392}
393
394uint32_t
395_mesa_unmarshal_NamedBufferSubData(struct gl_context *ctx,
396                                   const struct marshal_cmd_NamedBufferSubData *cmd,
397                                   const uint64_t *last)
398{
399   unreachable("never used - all BufferSubData variants use DISPATCH_CMD_BufferSubData");
400   return 0;
401}
402
403uint32_t
404_mesa_unmarshal_NamedBufferSubDataEXT(struct gl_context *ctx,
405                                      const struct marshal_cmd_NamedBufferSubDataEXT *cmd,
406                                      const uint64_t *last)
407{
408   unreachable("never used - all BufferSubData variants use DISPATCH_CMD_BufferSubData");
409   return 0;
410}
411
412static void
413_mesa_marshal_BufferSubData_merged(GLuint target_or_name, GLintptr offset,
414                                   GLsizeiptr size, const GLvoid *data,
415                                   bool named, bool ext_dsa, const char *func)
416{
417   GET_CURRENT_CONTEXT(ctx);
418   size_t cmd_size = sizeof(struct marshal_cmd_BufferSubData) + size;
419
420   /* Fast path: Copy the data to an upload buffer, and use the GPU
421    * to copy the uploaded data to the destination buffer.
422    */
423   /* TODO: Handle offset == 0 && size < buffer_size.
424    *       If offset == 0 and size == buffer_size, it's better to discard
425    *       the buffer storage, but we don't know the buffer size in glthread.
426    */
427   if (ctx->GLThread.SupportsBufferUploads &&
428       data && offset > 0 && size > 0) {
429      struct gl_buffer_object *upload_buffer = NULL;
430      unsigned upload_offset = 0;
431
432      _mesa_glthread_upload(ctx, data, size, &upload_offset, &upload_buffer,
433                            NULL);
434
435      if (upload_buffer) {
436         _mesa_marshal_InternalBufferSubDataCopyMESA((GLintptr)upload_buffer,
437                                                     upload_offset,
438                                                     target_or_name,
439                                                     offset, size, named,
440                                                     ext_dsa);
441         return;
442      }
443   }
444
445   if (unlikely(size < 0 || size > INT_MAX || cmd_size < 0 ||
446                cmd_size > MARSHAL_MAX_CMD_SIZE || !data ||
447                (named && target_or_name == 0))) {
448      _mesa_glthread_finish_before(ctx, func);
449      if (named) {
450         CALL_NamedBufferSubData(ctx->CurrentServerDispatch,
451                                 (target_or_name, offset, size, data));
452      } else {
453         CALL_BufferSubData(ctx->CurrentServerDispatch,
454                            (target_or_name, offset, size, data));
455      }
456      return;
457   }
458
459   struct marshal_cmd_BufferSubData *cmd =
460      _mesa_glthread_allocate_command(ctx, DISPATCH_CMD_BufferSubData,
461                                      cmd_size);
462   cmd->target_or_name = target_or_name;
463   cmd->offset = offset;
464   cmd->size = size;
465   cmd->named = named;
466   cmd->ext_dsa = ext_dsa;
467
468   char *variable_data = (char *) (cmd + 1);
469   memcpy(variable_data, data, size);
470}
471
472void GLAPIENTRY
473_mesa_marshal_BufferSubData(GLenum target, GLintptr offset, GLsizeiptr size,
474                            const GLvoid * data)
475{
476   _mesa_marshal_BufferSubData_merged(target, offset, size, data, false,
477                                      false, "BufferSubData");
478}
479
480void GLAPIENTRY
481_mesa_marshal_NamedBufferSubData(GLuint buffer, GLintptr offset,
482                                 GLsizeiptr size, const GLvoid * data)
483{
484   _mesa_marshal_BufferSubData_merged(buffer, offset, size, data, true,
485                                      false, "NamedBufferSubData");
486}
487
488void GLAPIENTRY
489_mesa_marshal_NamedBufferSubDataEXT(GLuint buffer, GLintptr offset,
490                                    GLsizeiptr size, const GLvoid * data)
491{
492   _mesa_marshal_BufferSubData_merged(buffer, offset, size, data, true,
493                                      true, "NamedBufferSubDataEXT");
494}
495